#include <sys/stat.h>
#include <limits.h>
#include "mDNA.h"
#include <argtable2.h>

#define FILE_SIZE  (1)
#define SEQ_LENGTH (2)

int main(int argc, char* argv[]) {
	mDNA   *dna;
	FILE   *stream;
	int     status;
	int     i;
	int     max;
	int     mode;
	char   *prefix;
	char   *infile;

	int              argcount = 0;
	int              nerrors;
	void           **argtable;

	struct arg_str  *arg_infile;
	struct arg_str  *arg_prefix;
	struct arg_str  *arg_mode;
	struct arg_int  *arg_maxlength;
	struct arg_int  *arg_size;
	struct arg_lit  *help;
	struct arg_end  *end;

	arg_infile          = arg_str1("i", "input",  "<file>",   "input fasta file");
	arg_prefix          = arg_str1("p", "prefix", "<string>", "prefix for output fasta files");
	arg_mode            = arg_str1("m", "mode",   "<string>", "mode to split fasta files (filesize, seqlength)");
	arg_maxlength       = arg_int0(NULL,"max",    "<num>",    "sequences longer than this will be lumped together (default: 100)");
	arg_size            = arg_int0("s", "size",   "<num>",    "number of basepairs (approx.) that will be in each file (default: 1000000)");
	help                = arg_lit0("h", "help",               "print this help and exit");
	end                 = arg_end(8); /* this needs to be even, otherwise each element in end->parent[] crosses an 8-byte boundary */

	argtable          = (void**) mCalloc(6, sizeof(void*));
	argtable[argcount++] = arg_infile;
	argtable[argcount++] = arg_prefix;
	argtable[argcount++] = arg_mode;
	argtable[argcount++] = arg_maxlength;
	argtable[argcount++] = arg_size;
	argtable[argcount++] = help;
	argtable[argcount++] = end;

	/* defaults */

	arg_maxlength->ival[0] = 100;
	arg_size->ival[0] = 1000000;

	/* parse command line */

	if (arg_nullcheck(argtable) != 0) {
		mDie("insufficient memory");
	}
	nerrors = arg_parse(argc, argv, argtable);

	if (help->count > 0) {
		fprintf(stdout, "Usage: splitFasta");
		arg_print_syntax(stdout, argtable, "\n");
		arg_print_glossary(stdout, argtable, "  %-25s %s\n");
		mQuit("");
	}

	if (nerrors > 0) {
		arg_print_errors(stderr, end, "splitFasta");
		fprintf(stderr, "try using -h\n");
		mQuit("");
	}

	/* figure out which mode we run under */

	if (strcmp(arg_mode->sval[0], "filesize") == 0) {
		mode = FILE_SIZE;
	} else if (strcmp(arg_mode->sval[0], "seqlength") == 0) {
		mode = SEQ_LENGTH;
	} else {
		mode = -1;
		fprintf(stderr, "Unknown mode: %s", arg_mode->sval[0]);
		fprintf(stdout, "Usage: splitFasta");
		arg_print_syntax(stdout, argtable, "\n");
		arg_print_glossary(stdout, argtable, "  %-25s %s\n");
		mQuit("");
	}

	/* arg_prefix for the output files */

	prefix = (char*) mCalloc(256, sizeof(char));
	infile = (char*) mCalloc(256, sizeof(char));

	strcpy(prefix, arg_prefix->sval[0]);
	strcpy(infile, arg_infile->sval[0]);

	/* open input stream */

	if ((stream = fopen(infile, "r")) == NULL) {
		mDie("Cannot open fasta file %s for reading", infile);
	}

	dna = (mDNA*) mCalloc(1, sizeof(mDNA));

	if (mode == SEQ_LENGTH) {

		FILE  **out;

		/* max length to separate - above max will go to a file for max */

		max = arg_maxlength->ival[0];

		/* initialize all the output stream variables */

		out = (FILE**) mCalloc((max+1), sizeof(FILE*));
		for (i=0; i<=max; i++) out[i] = NULL;

		/* start reading fasta files */

		while ((status=mReadDNALite(stream, dna))) {
			int length = dna->length;
			if (length > max) length = max;
			if (out[length] == NULL) {
				char name[256];
				sprintf(name, "%s.%dbp.fa", prefix, length);
				if ((out[length] = fopen(name, "w")) == NULL) {
					mDie("Cannot open output file %s for reading", name);
				}
			}
			mWriteDNA(out[length], dna);
			mFreeDNA(dna);
			if (status==END_OF_FASTA) break;
		}

		/* close all the output streams */

		for (i=0; i<=max; i++) {
			if (out[i] != NULL) {
				fclose(out[i]);
			}
		}
		mFree(out);
	} else if (mode == FILE_SIZE) {

		char name[256];
		struct stat statbuf;
		int current_size = 0;
		int counter = 0;
		off_t filesize;
		off_t size;
		int padding;
		FILE *out;

		out = (FILE*) mCalloc(1, sizeof(FILE));
		stat(infile, &statbuf);
		filesize = statbuf.st_size;
		size     = (off_t) arg_size->ival[0];
		padding  = 1+(int)log10(1.0*filesize/size);

		/* start reading fasta files */

		sprintf(name, "%s%0*d.fa", prefix, padding, counter);
		if ((out = fopen(name, "w")) == NULL) {
			mDie("Cannot open output file %s for reading", name);
		}

		while ((status=mReadDNALite(stream, dna))) {
			int length = dna->length;
			current_size += length;
			if (current_size > size) {
				fclose(out);
				counter++;
				sprintf(name, "%s%0*d.fa", prefix, padding, counter);
				if ((out = fopen(name, "w")) == NULL) {
					mDie("Cannot open output file %s for reading", name);
				}
				current_size = length;
			}
			mWriteDNA(out, dna);
			mFreeDNA(dna);
			if (status==END_OF_FASTA) break;
		}
	}

	/* close input stream */

	fclose(stream);

	/* Free memory etc */

	mFree(dna);
	mFree(prefix);
	mFree(infile);

	exit(0);
}
