#include <argtable2.h>
#include "mQual.h"

#define REVCOMP 0
#define BREAK 1
#define SEPARATE 2
#define GC 3
#define CLIP_454 4

int separate(int count, const char* files[], int size) {
	mQual *qual = (mQual*) mMalloc(sizeof(mQual));
	FILE *stream;
	int i;
	int status;
	FILE *out1, *out2;

	out1 = stdout;
	out2 = stderr;

	for (i=0; i<count; i++) {
		if ((stream = fopen(files[i], "r")) == NULL) {
			mDie("Cannot open %s for reading", files[i]);
		}
		while((status = mReadQual(stream, qual))) {
			if (qual->length > size)
				mWriteQual(out1, qual);
			else
				mWriteQual(out2, qual);
			mFreeQual(qual);
			if (status == END_OF_QUAL) { /* Last entry */
				break;
			}
		}
		fclose(stream);
	}
	return 0;
}

#define LIMIT 5000
int length_dist(int file_count, const char* files[]) {
	mQual *qual = (mQual*) mMalloc(sizeof(mQual));
	FILE *stream;
	int i;
	int lengths[LIMIT];
	int count = 0;
	int status;

	for (i=0; i<LIMIT; i++) lengths[i] = 0;

	for (i=0; i<file_count; i++) {
		if (strcmp(files[i], "-") == 0) {
			stream = stdin;
		} else if ((stream = fopen(files[i], "r")) == NULL) {
			mDie("Cannot open %s for reading", files[i]);
		}
		while((status = mReadQual(stream, qual))) {
			lengths[qual->length]++;
			count++;
			if (status == END_OF_QUAL) { /* Last entry */
				break;
			}
			mFreeQual(qual);
		}
		fclose(stream);
	}

	for (i=0; i<LIMIT; i++) {
		if (lengths[i] > 0) printf("%d\t%d\t%.6f\n", i, lengths[i], 1.0*lengths[i]/count);
	}
	exit(0);
}

int generic(int mode, int count, const char* files[], int size) {
	mQual *qual = (mQual*) mMalloc(sizeof(mQual));
	mQual *sub = (mQual*) mMalloc(sizeof(mQual));
	FILE *stream;
	int i;
	int status;

	for (i=0; i<count; i++) {
		if (strcmp(files[i], "-") == 0) {
			stream = stdin;
		} else if ((stream = fopen(files[i], "r")) == NULL) {
			mDie("Cannot open %s for reading", files[i]);
		}
		while((status = mReadQual(stream, qual))) {
			switch(mode) {
				case REVCOMP:
					mReverseComplementQual(qual);
					mWriteQual(stdout, qual);
					break;
				case BREAK:
					mBreakAndWriteQual(stdout, qual, size);
					break;
				case CLIP_454:
					sub = mSubQual(qual, 4, qual->length - 4);
					mWriteQual(stdout, sub);
					break;
			}
			mFreeQual(qual);
			if (status == END_OF_QUAL) { /* Last entry */
				break;
			}
		}
		fclose(stream);
	}
	mFree(sub);
	mFree(qual);
	return 0;
}

int print_help(void **argtable) {
	fprintf(stdout, "Usage:\nprocessFasta");
	arg_print_syntax(stdout, argtable, "\n");
	arg_print_glossary(stdout, argtable, "  %-25s %s\n");
	fprintf(stdout, "\nProcessing modes:\n");
	fprintf(stdout, "%10s    %s\n", "", "");
	fprintf(stdout, "%10s    %s\n", "revcomp", "reverse complement each sequence");
	fprintf(stdout, "%10s    %s\n", "break", "break sequences into fragments of size <size>");
	fprintf(stdout, "%10s    %s\n", "separate", "separate sequences into two files");
	fprintf(stdout, "%10s    %s\n", " ", "  sequences longer than <size> go to STDOUT");
	fprintf(stdout, "%10s    %s\n", " ", "  sequences shorter than or equal to <size> go to STDERR");
	fprintf(stdout, "%10s    %s\n", "clip454", "clip first 4 bases from the first cycle in every read");
	fprintf(stdout, "%10s    %s\n", "dist", "report length distribution of sequences in all files");
	mQuit("");
	return 0;
}

int main(int argc, char* argv[]) {
	struct arg_str  *mode;
	struct arg_file *files;
	struct arg_int  *size;
	struct arg_end  *end;
	int              nerrors;
	int              threshold;
	void           **argtable;

	mode        = arg_str1(NULL, "mode", "MODE",       "revcomp|break|scafbreak|separate|clip454|gc|dist");
	files       = arg_filen(NULL, NULL,  "FILE", 0, argc+2, NULL);
	size        = arg_int0("s",  "size", "SIZE", "size of output fragments for \"split\" (or) threshold for separating in \"separate\"");
	end         = arg_end(6); /* this needs to be even, otherwise each element in end->parent[] crosses an 8-byte boundary */
	argtable    = (void**) mMalloc(4*sizeof(void*));
	argtable[0] = mode;
	argtable[1] = size;
	argtable[2] = files;
	argtable[3] = end;

	size->ival[0] = -1;

	if (arg_nullcheck(argtable) != 0) {
		mDie("insufficient memory");
	}

	nerrors = arg_parse(argc, argv, argtable);
	if (nerrors > 0) {
		arg_print_errors(stderr, end, "processFasta");
		print_help(argtable);
	}

	threshold = size->ival[0];

	if (strcmp(mode->sval[0], "revcomp") == 0) {
		generic(REVCOMP, files->count, files->filename, 0);
	} else if (strcmp(mode->sval[0], "clip454") == 0) {
		generic(CLIP_454, files->count, files->filename, 0);
	} else if (strcmp(mode->sval[0], "break") == 0) {
		if (threshold == -1) {
			print_help(argtable);
			exit(-1);
		}
		generic(BREAK, files->count, files->filename, threshold);
	} else if (strcmp(mode->sval[0], "separate") == 0) {
		if (threshold == -1) {
			print_help(argtable);
			exit(-1);
		}
		separate(files->count, files->filename, threshold);
	} else if (strcmp(mode->sval[0], "dist") == 0) {
		length_dist(files->count, files->filename);
	}
	arg_freetable(argtable, 3);
	mFree(argtable);
	return 0;
}
