#include <limits.h>
#include "mDNA.h"
#include "zoeTools.h"
#include <argtable2.h>

int main(int argc, char* argv[]) {
	mDNA   *dna;
	FILE   *stream, *list_stream, *out;
	char    line[LINE_MAX];
	int     status;
	int     i;
	int     exclude;
	zoeHash keep;
	zoeTVec keys;

	int              argcount = 0;
	int              nerrors;
	void           **argtable;

	struct arg_str  *in_file;
	struct arg_str  *out_file;
	struct arg_str  *list_file;
	struct arg_lit  *arg_exclude;
	struct arg_lit  *help;
	struct arg_end  *end;

	in_file             = arg_str1("i", "input",  "<file>", "input fasta file");
	out_file            = arg_str1("o", "output", "<file>", "output fasta file");
	list_file           = arg_str1("l", "list",   "<file>", "file containing list of fasta identifiers");
	arg_exclude         = arg_lit0("v", "exclude",          "exclude sequences in this list (default is false)");
	help                = arg_lit0("h", "help",                           "print this help and exit");
	end                 = arg_end(8); /* this needs to be even, otherwise each element in end->parent[] crosses an 8-byte boundary */


	argtable          = (void**) mMalloc(6*sizeof(void*));
	argtable[argcount++] = in_file;
	argtable[argcount++] = out_file;
	argtable[argcount++] = list_file;
	argtable[argcount++] = arg_exclude;
	argtable[argcount++] = help;
	argtable[argcount++] = end;

	if (arg_nullcheck(argtable) != 0) {
		mDie("insufficient memory");
	}
	nerrors = arg_parse(argc, argv, argtable);

	if (help->count > 0) {
		fprintf(stdout, "Usage: filterFasta");
		arg_print_syntax(stdout, argtable, "\n");
		arg_print_glossary(stdout, argtable, "  %-25s %s\n");
		mQuit("");
	}

	if (nerrors > 0) {
		arg_print_errors(stderr, end, "filterFasta");
		fprintf(stderr, "try using -h\n");
		mQuit("");
	}

	exclude = (arg_exclude->count)?1:0;

	if ((stream = fopen(in_file->sval[0], "r")) == NULL) {
		mDie("Cannot open fasta file %s for reading", in_file->sval[0]);
	}
	if ((list_stream = fopen(list_file->sval[0], "r")) == NULL) {
		mDie("Cannot open list file %s for reading", list_file->sval[0]);
	}
	if ((out = fopen(out_file->sval[0], "w")) == NULL) {
		mDie("Cannot open output file %s for reading", out_file->sval[0]);
	}

	keep = zoeNewHash();
	while (fgets(line, LINE_MAX, list_stream) != NULL) {
		char def[LINE_MAX];
		int *code;
		if (sscanf(line, "%s", def) != 1) {
			mDie("LIST LINE ERROR");
		}
		code = (int*) mMalloc(sizeof(int));
		*code = 1;
		zoeSetHash(keep, def, code);
	}

	dna = (mDNA*) mMalloc(sizeof(mDNA));
	while ((status=mReadDNALite(stream, dna))) {
		char *key    = mGetFirstWord(dna->def);
		int  *ptr    = (int*)zoeGetHash(keep, key);
		if ((ptr != NULL) != exclude) { 
			mWriteDNA(out, dna);
		}
		mFreeDNA(dna);
		mFree(key);
		if (status==END_OF_FASTA) break;
	}
	fclose(list_stream);
	fclose(stream);
	fclose(out);

	/* Free memory etc */
	keys = zoeKeysOfHash(keep);
	for (i=0; i<keys->size; i++) {
		mFree(zoeGetHash(keep, keys->elem[i]));
	}
	zoeDeleteTVec(keys);
	zoeDeleteHash(keep);
	mFree(dna);

	exit(0);
}
