#include <limits.h>
#include "mDNA.h"
#include "zoeTools.h"

/*
Format:
~~~~~~~
name	mode	num1	num2

Modes:
~~~~~~
  1 : clip
-10 : split

Clip Mode:
~~~~~~~~~~
Coordinates are 0-based here.

name	1	start	length

means:
keep sequence <name> from <start> with length <length>

Special cases:
length=-1 --> no clipping


Split Mode:
~~~~~~~~~~~
Coordinates are 1-based here.

name	-10	end_first	beg_second

means:
split sequence <name> into two pieces:
piece1: [1, <end_first>]
piece2: [<beg_second>, end-of-sequence]


*/
int main(int argc, char* argv[]) {
	mDNA *seq = (mDNA*) mMalloc(sizeof(mDNA));
	mDNA *sub;
	FILE *stream, *trim_stream;
	char line[LINE_MAX];
	int status;
	int min_length = 1;
	zoeHash def2code = zoeNewHash();
	zoeHash def2start = zoeNewHash();
	zoeHash def2len = zoeNewHash();
	if (argc < 3) {
		mDie("Usage:%s <fasta_file> <trim_info>", argv[0]);
	}
	if ((stream = fopen(argv[1], "r")) == NULL) {
		mDie("Cannot open %s for reading", argv[1]);
	}
	if ((trim_stream = fopen(argv[2], "r")) == NULL) {
		mDie("Cannot open %s for reading", argv[2]);
	}
	while (fgets(line, LINE_MAX, trim_stream) != NULL) {
		long *start  = (long*) mMalloc(sizeof(long));
		long *length = (long*) mMalloc(sizeof(long));
		int *code    = (int*) mMalloc(sizeof(int));
		char def[LINE_MAX];
		if (sscanf(line, "%s\t%d\t%ld\t%ld", def, code, start, length) != 4) {
			mDie("TRIM LINE ERROR");
		}
		zoeSetHash(def2code, def, code);
		zoeSetHash(def2start, def, start);
		zoeSetHash(def2len, def, length);
	}

	while ((status=mReadDNA(stream, seq))) {
		/* defaults will lead to clipping of the whole sequence */
		/* meaning, if something is not in the list, it wont show up! */
		long  length = 0;
		long  start  = 0;
		int   code   = 1;
		char *key    = mGetFirstWord(seq->def);
		long *ptr    = (long*)zoeGetHash(def2len, key);

		if (ptr != NULL) { 
			length = *ptr;
			start  = *((long*)zoeGetHash(def2start, key));
			code   = *((int*)zoeGetHash(def2code, key));
			if (length == -1) { /* No clipping */
				start = 0;
				length = seq->length;
			}
		}
		if (code == -10) {
			char *split_def = (char*) mMalloc((16+strlen(seq->def))*sizeof(char));
			sub = mSubDNA(seq, 0, start);
			sprintf(split_def, "p1_%s", seq->def);
			sub->def = split_def;
			if (sub->length >= min_length) 
				mWriteDNA(stdout, sub);
			mFree(sub);
			sub = mSubDNA(seq, length-1, seq->length - length + 1);
			sprintf(split_def, "p2_%s", seq->def);
			sub->def = split_def;
			if (sub->length >= min_length) 
				mWriteDNA(stdout, sub);
			mFree(sub);
		} else {
			sub = mSubDNA(seq, start, length);
			if (sub->length >= min_length) 
				mWriteDNA(stdout, sub);
			mFree(sub);
		}
/*
printf("%s %ld %ld %d\n", seq->def, start, length, code);
*/
		mFreeDNA(seq);
		mFree(key);
		if (status==END_OF_FASTA) break;
	}
	fclose(trim_stream);
	fclose(stream);
	exit(0);
}
