#include <limits.h>
#include "mDNA.h"

#define PROCESS_FASTA_HEADER

/*
triminfo format:

name	code	start	end	score

Meaning:
--------
name    - fasta header (first word)
code    - trim code (1: trim ends and keep an internal fragment, -10: remove internal fragment and split sequence)
start   - first base of fragment
end     - last base of fragment
score   - some score

start and end coordinates are 1-based, inclusive.
E.g., 

test 1 25 66 100

keep 42 bases (bases 25 to 66, inclusive) from test

test -10 25 66 100

split 'test' into two pieces as follows:

p1_test: bases 1 to 24
p2_test: bases 67 to <end_of_sequence>

Special cases:
--------------

1. code=0
	No clipping. Leave the entire sequence

2. triminfo contains just name
	No clipping. Leave the entire sequence

*/

int main(int argc, char* argv[]) {
	mDNA *dna = (mDNA*) mMalloc(sizeof(mDNA));
	mDNA *sub;
	FILE *stream, *trim_stream;
	char line[LINE_MAX];
	int min_length = 1;
	if (argc < 3) {
		mDie("Usage:%s <fasta_file> <trim_info>", argv[0]);
	}
	if ((stream = fopen(argv[1], "r")) == NULL) {
		mDie("Cannot open fasta %s for reading", argv[1]);
	}
	if ((trim_stream = fopen(argv[2], "r")) == NULL) {
		mDie("Cannot open triminfo %s for reading", argv[2]);
	}
	for (;;) {
		int status;
		char def[LINE_MAX];
		int   code;
		long  start, end;
		float score;
		if (fgets(line, LINE_MAX, trim_stream) == NULL) break;
		status = mReadDNA(stream, dna); 
#ifdef PROCESS_FASTA_HEADER
		mProcessDNADef(dna);
#endif
		if (sscanf(line, "%s\t%d\t%ld\t%ld\t%f", def, &code, &start, &end, &score) == 5) {
			if (strncmp(def, dna->def, strlen(def)) != 0) mDie("Unmatched definitions found");
			if (code == -10) {
				/******************
				 *   Must split   *
				 ******************/
				char *split_def = (char*) mMalloc((16+strlen(dna->def))*sizeof(char));
				int   sub_start;
				int   sub_length;
				if (strncmp(def, dna->def, strlen(def)) != 0) mDie("Unmatched definitions found");
				sub_start  = 0;
				sub_length = start-1;
				if (sub_length >= min_length) {
					sub = mSubDNA(dna, sub_start, sub_length);
					sprintf(split_def, "p1_%s", dna->def);
					sub->def = split_def;
					mWriteDNA(stdout, sub);
					mFree(sub);
				}
				sub_start  = end;
				sub_length = dna->length-end;
				if (sub_length >= min_length) {
					sub = mSubDNA(dna, sub_start, sub_length);
					sprintf(split_def, "p2_%s", dna->def);
					sub->def = split_def;
					mWriteDNA(stdout, sub);
					mFree(sub);
				}
				mFree(split_def);
			} else if (code == 0) {
				/******************
				 *   No clipping  *
				 ******************/
				if (dna->length >= min_length) {
					mWriteDNA(stdout, dna);
				}
			} else if (code == 1) {
				/******************
				 *    Must clip   *
				 ******************/
				int   sub_start;
				int   sub_length;
				sub_start  = start-1;
				sub_length = end-start+1;
				if (sub_length >= min_length) {
					sub = mSubDNA(dna, sub_start, sub_length); /* skip start-1, since this is 1 based */
					mWriteDNA(stdout, sub);
					mFree(sub);
				}
			} else {
				mDie("Unknown code: %d\n", code);
			}
		} else if (sscanf(line, "%s", def) == 1) {
			/* move through the fasta file until you get the corresponding entry */
			/* then write the complete sequence */
			while (strncmp(def, dna->def, strlen(def)) != 0) {
				mFreeDNA(dna);
				status = mReadDNA(stream, dna);
#ifdef PROCESS_FASTA_HEADER
				mProcessDNADef(dna);
#endif
			}
			mWriteDNA(stdout, dna);
		} else {
			mDie("TRIM LINE ERROR");
		}
		mFreeDNA(dna);
		if (status == END_OF_FASTA) break; /* last entry */
	}
	fclose(trim_stream);
	fclose(stream);
	mFree(dna);
	exit(0);
}
