#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <strings.h>
#include <ctype.h>
#include <time.h>
#include <assert.h>
#include <argtable2.h>
#include "mDNA.h"

void generateReads(const char *in_fasta, int length, int count, char strand);

/* coverage per read, 454 quality values, Sanger reads, Sanger quality values */
int main(int argc, char* argv[]) {

	/* Command line parsing for argtable */

	struct arg_file *in_fasta;
	struct arg_int  *read_length;
	struct arg_int  *read_count;
	struct arg_lit  *minus;
	struct arg_lit  *help;
	struct arg_end  *end;
	int              nerrors;
	void           **argtable;
	int              i;
	char             strand;

	in_fasta     = arg_file1( "i", "input",  "<file>", "fasta file with contigs from genome");
	read_length  = arg_int1 ( "l", "length", "<n>",    "length of the read to be simulated");
	read_count   = arg_int1 ( "c", "count",  "<n>",    "number of reads to be simulated");
	minus        = arg_lit0 ( "m", "minus",            "generate reads from the minus strand");
	help         = arg_lit0 ( "h", "help",             "print this help and exit");
	end          = arg_end(6); /* this needs to be even, otherwise each element in end->parent[] crosses an 8-byte boundary */
									/* Check arg_end() in arg_end.c */
	argtable     = (void**) mMalloc(8*sizeof(void*));

	i = 0;
	argtable[i++] = in_fasta;
	argtable[i++] = read_length;
	argtable[i++] = read_count;
	argtable[i++] = minus;
	argtable[i++] = help;
	argtable[i++] = end;

	if (arg_nullcheck(argtable) != 0) {
		mDie("insufficient memory");
	}

	/* command line defaults */
	
	nerrors = arg_parse(argc, argv, argtable);

	if (help->count > 0) {
		fprintf(stdout, "Read Sampler\n");
		fprintf(stdout, "Usage: %s", argv[0]);
		arg_print_syntax(stdout, argtable, "\n");
		arg_print_glossary(stdout, argtable, "  %-25s %s\n");
		mQuit("");
	}

	if (nerrors > 0) {
		arg_print_errors(stderr, end, argv[0]);
		fprintf(stderr, "try using -h\n");
		mQuit("");
	}

	srand(time(NULL));

	if (minus->count > 0) {
		strand = '-';
	} else {
		strand = '+';
	}

	generateReads(in_fasta->filename[0], read_length->ival[0], read_count->ival[0], strand);
	/* Free up the memory */

	arg_freetable(argtable, 5);
	mFree(argtable);
	return 0;
}


/*************
 */

void generateReads(const char *in_fasta, int length, int count, char strand) {
	FILE  *ifasta;
	mDNA  *dna;
	mIVector *size_lbounds;
	mLVector *seq_ptr;
	int    i;
	int    total_length;
	int    prev_bin;

	/* Open the fasta file */

	if ((ifasta = fopen(in_fasta, "r")) == NULL) {
		mDie("Cannot open input fasta file: %s", in_fasta);
	}

	/* Read file, calculate lengths and store pointers to each sequence */

	dna  = (mDNA*)  mMalloc(sizeof(mDNA));
	size_lbounds = (mIVector*) mMalloc(sizeof(mIVector));
	mInitIVector(size_lbounds, 1);
	seq_ptr = (mLVector*) mMalloc(sizeof(mLVector));
	mInitLVector(seq_ptr, 1);

	total_length = 0;
	for (;;) {
		long current = ftell(ifasta);
		int dstatus = mReadDNALite(ifasta, dna);
		mPushIVector(size_lbounds, total_length);
		mPushLVector(seq_ptr, current);
		total_length += dna->length;
		mFreeDNA(dna);
		if (dstatus == END_OF_FASTA) { 
			break;
		}
	}
	mPushIVector(size_lbounds, total_length);

	/* Generate reads */

	prev_bin = -100;
	for (i=0; i<count; i++) {
		for (;;) {
			mDNA *sub;
			char *def;
			int number = (int) (total_length * (rand() / (RAND_MAX + 1.0)));
			int bin    = mBinarySearchIVector(size_lbounds, number);
			int pos    = number - size_lbounds->elem[bin];
			if (number+length >= size_lbounds->elem[bin+1]) {
				continue;
			}

			if (bin != prev_bin) {
				if (prev_bin != -100) mFreeDNA(dna);
				fseek(ifasta, seq_ptr->elem[bin], SEEK_SET); /* go to the right entry */
				mReadDNALite(ifasta, dna);
				mProcessDNADef(dna);
			}
			sub = mSubDNA(dna, pos, length);
			if (strand == '-') mReverseComplementDNA(sub);
			def = (char*) mMalloc(sizeof(char)*(strlen(dna->def)+64));
			sprintf(def, "%s__%d:%d:%c", dna->def, pos+1, pos+length, strand);
			sub->def = def;
			mWriteDNA(stdout, sub);
			mFree(def);
			mFree(sub);
			prev_bin = bin;
			break;
		}
	}
	mFreeIVector(size_lbounds);
	mFree(size_lbounds);
	mFreeLVector(seq_ptr);
	mFree(seq_ptr);
	mFree(dna);
	fclose(ifasta);
}
