#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <memory.h>

#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <sys/signal.h>
#include <sys/wait.h>

#include <fcntl.h>

#include <assert.h>

typedef struct _worker_data
{
	pid_t pid;
	off_t offset;
	size_t block_size;
}worker_data;


#define MAX_PROCS 8
#define BLOCK_SIZE (4096 * MAX_PROCS * 2)

void copyblock(unsigned char * dest, unsigned char * src, off_t i, size_t block_size, off_t len)
{
	int block = (i + block_size > len) ? (len - i) : block_size;
	memcpy(&dest[i], &src[i], block);
	msync(&dest[i], block, MS_SYNC);
}

int main(int argc, char * argv[])
{
	int fd,fd2;
	struct stat stat_buf;
	off_t len;
	unsigned char * src;
	unsigned char * dest;
	int status;

	size_t lost = 0;

	worker_data procs[MAX_PROCS];
	worker_data retries[MAX_PROCS * 2];

	off_t i = 0;
	int k;
	
	if(argc < 3)
	{
		fprintf(stderr, "Usage: %s infile outfile\n", argv[0]);
		return 1;
	}
	
	if(access(argv[1], R_OK) != 0)
	{
		fprintf(stderr, "Cannot read file: %s\n", argv[1]);
		return 1;
	}

	fd = open(argv[1], O_RDONLY);

	if(!fd)
	{
		fprintf(stderr, "Could not open %s\n", argv[1]);
	}

	assert(fstat(fd, &stat_buf) == 0);

	len = stat_buf.st_size;

	src = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);

	if(access(argv[2], R_OK | W_OK) == 0)
	{
		assert(unlink(argv[2]) == 0);
	}

	fd2 = open(argv[2], O_CREAT | O_WRONLY, S_IRWXU);

	lseek(fd2, len - 1, SEEK_SET);
	write(fd2, "", 1);

	fsync(fd2);

	close(fd2);
	fd2 = open(argv[2], O_RDWR);

	dest = mmap(NULL, len, PROT_WRITE, MAP_SHARED, fd2, 0);

	if( i < len) 
	{
		copyblock(dest,src,i,BLOCK_SIZE, len);
		i += BLOCK_SIZE;
	}

	/* 	so I have a number of worker processes each doing one block each 
		wasteful, I know but works for sure 
	*/
#define FORK_OFF( k) \
	do { \
	if(i < len) { \
		if((procs[k].pid = fork())) { \
			/* fprintf(stderr, "forked off %d for [%d:%d] \n", procs[k], i, i + BLOCK_SIZE); */\
			/* parent */\
			procs[k].offset = i; \
			procs[k].block_size = BLOCK_SIZE;\
			i += BLOCK_SIZE;\
		} else { \
			/* forked child */\
			copyblock(dest, src, i, BLOCK_SIZE, len); \
			exit(0); \
		} \
	} } while(0)

#define RETRY(offset, block_size, k) \
	do {\
		if((retries[k].pid = fork())) { \
			/* parent */\
			retries[k].offset = offset; \
			retries[k].block_size = block_size;\
		} else { \
			/* forked child */\
			copyblock(dest, src, offset, block_size, len); \
			exit(0); \
		} \
	} while(0)

	memset(procs, 0, sizeof(procs));
	
	for(k = 0; k < MAX_PROCS; k++)
	{
		FORK_OFF(k);
	}

	while(i < len)
	{
		int percentage = (i * 100) / len;
		pid_t deadproc = 0;

		k = percentage/2;
		while(k--) putchar('=');
		k = 50 - (percentage/2);
		while(k--) putchar(' ');
		printf(" %02d %%\r", percentage);
		fflush(stdout);
		deadproc = wait(&status);
		if(WIFSIGNALED(status))
		{
			fprintf(stderr, "<%d> exited due to <%d>\n", deadproc, WTERMSIG(status));
		}
		if(WIFSIGNALED(status) && WTERMSIG(status) == SIGBUS)
		{
			worker_data * retry = NULL;
			for(k = 0; k < MAX_PROCS; k++)
			{
				if(procs[k].pid == deadproc)
				{
					retry = &procs[k];
				}
			}
			if(retry)
			{
				fprintf(stderr, "retrying %ld,+%ld again\n", (long)retry->offset, (long)retry->block_size);
				for(k = 0; k < 2 * MAX_PROCS; k++)
				{
					if(retries[k].pid) 
					{
						waitpid(retries[k].pid, &status, 0);
					}
				}
				
				memset(retries, 0, sizeof(retries));
				for(k = 0; k < 2 * MAX_PROCS; k++)
				{
					size_t block_size = retry->block_size / (2*MAX_PROCS);
					off_t offset = retry->offset + (k * block_size);
					fprintf(stderr, "spawn retry  #%d %ld,+%ld again\n", k, (long)offset, (long)block_size);
					RETRY(offset, block_size, k);
				}
			}
			else
			{
				fprintf(stderr, "%d failed on retry \n", deadproc);
				for(k = 0; k < 2 * MAX_PROCS; k++)
				{
					if(retries[k].pid == deadproc) 
					{
						fprintf(stderr, "lost %ld bytes at %ld\n", (long)retries[k].offset, (long)retries[k].block_size);
						lost += retries[k].block_size;
						break;
					}
				}
			}
		}
		for(k = 0; k < MAX_PROCS; k++)
		{
			if(procs[k].pid == deadproc)
			{
				FORK_OFF(k);
			}
		}
	}

	munmap(dest, len);
	munmap(src, len);
	close(fd);
	close(fd2);
	for(k = 0; k < MAX_PROCS; k++)
	{
		if(procs[k].pid) 
		{
			waitpid(procs[k].pid, &status, 0);
		}
	}
	printf("\nDone (lost %02.00f%%/0x%lx bytes)\n", (double)((double)(lost * 100)/(double)len), (long)lost);
	return 0;
}
