#! /usr/bin/env perl

use strict;
use warnings;
use Pod::Usage;
use Smash::Core;
use Smash::Global qw($SMASH_SCRIPT_NAME);
use Smash::CommandLineParser qw(parse_options check_required_options);

##############
# Set up command line parsing
##############

my @allowed  = qw(collection=s upto=s help); # arguments I expect
my @required = qw(collection);                       # arguments I require

##############
# Parse command line options
##############

my $status;
my $missing;
my %options;

($status, %options) = parse_options(\@allowed);
if ($options{help}) {
	pod2usage(-exitstatus => 0, -verbose => 2);
}
if ($status != 1) {
	pod2usage(-message => "", -exitstatus => 2, -verbose => 1);
}
#print_options(%options);
($status, $missing) = check_required_options(\@required, %options);
if ($status != 1) {
	pod2usage(-message => "$SMASH_SCRIPT_NAME: Missing argument --$missing\n", -exitstatus => 2, -verbose => 1);
}

##############
# Run
##############

my $collection = $options{collection};
my $smash = new Smash::Core(COLLECTION => $collection);
$smash->init();
my $dbh     = $smash->get_db_handle;
my $engine  = $smash->get_conf_value("SmashDB", "database_engine");
my $SmashDB = $smash->get_conf_value("SmashDB", "database_name");
my $RefOrganismDB = $smash->get_conf_value("RefOrganismDB", "database_name");

if ($engine eq "sqlite3") {
	$dbh->{AutoCommit} = 1;
	$dbh->do("ATTACH DATABASE '@{[$smash->get_smashdb_sqlite_file()]}' AS $SmashDB");
}

my $stages = ":reads:assembly:genes:read_phylomap:gene_phylomap:og_ref_map:ko_ref_map:";

my $upto  = index($stages, $options{upto} || "reads");
my ($reads, $assembly, $genes, $gene_ref_map, $og_ref_map, $ko_ref_map) = (0) x 6;

if ($upto >= index($stages, "reads")) {
	$reads = 1;
}

if ($upto >= index($stages, "assembly")) {
	$assembly = 1;
}

if ($upto >= index($stages, "genes")) {
	$genes = 1;
}

if ($upto >= index($stages, "read_phylomap")) {
	$genes = 1;
}

if ($upto >= index($stages, "gene_phylomap")) {
	$gene_ref_map = 1;
}

if ($upto >= index($stages, "og_ref_map")) {
	$og_ref_map = 1;
}

if ($upto >= index($stages, "ko_ref_map")) {
	$ko_ref_map = 1;
}

=begin DEBUG

# debug
($reads, $assembly, $genes, $gene_ref_map, $og_ref_map, $ko_ref_map) = (0) x 6;
$genes = 1;
# end debug

=cut

my @statements = ();

########################
#       PART 1
# independent tables, so just
# push things together into one
# list of statements
########################

############
# reads
############

if ($reads == 1) {
	push(@statements,

		"DROP TABLE IF EXISTS tmp_read_stats",
		"CREATE TABLE tmp_read_stats AS \
			SELECT metagenome_id, COUNT(*) AS read_count, sum(length) AS total_read_length \
			FROM readinfo r INNER JOIN library USING (library_id) \
			                INNER JOIN sample  USING (sample_id) \
			GROUP BY metagenome_id ORDER BY metagenome_id",

		"DROP TABLE IF EXISTS tmp_template_stats",
		"CREATE TABLE tmp_template_stats AS \
			SELECT metagenome_id, COUNT(DISTINCT template_id) AS template_count \
			FROM readinfo r INNER JOIN library USING (library_id) \
			                INNER JOIN sample  USING (sample_id) \
			GROUP BY metagenome_id ORDER BY metagenome_id",

		"DROP TABLE IF EXISTS summary_read_stats",
		"CREATE TABLE summary_read_stats(metagenome_id VARCHAR(255) PRIMARY KEY, read_count INTEGER NOT NULL DEFAULT '0', \
			template_count INTEGER NOT NULL, total_read_length INTEGER NOT NULL)",
		"INSERT INTO  summary_read_stats(metagenome_id, read_count, template_count, total_read_length) \
			SELECT metagenome_id, read_count, template_count, total_read_length \
			FROM tmp_read_stats r INNER JOIN tmp_template_stats t USING (metagenome_id)",

		"DROP TABLE tmp_read_stats",
		"DROP TABLE tmp_template_stats");
}

############
# assembly
############

if ($assembly == 1) {
	push(@statements,

		"DROP TABLE IF EXISTS tmp_contig_reads",
		"CREATE TABLE tmp_contig_reads AS \
			SELECT a.external_id as assembly_id, COUNT(read_id) AS 'contig_reads', SUM(readinfo.length) AS 'contig_read_size' \
			FROM readinfo INNER JOIN contig2read USING (read_id) \
			              INNER JOIN contig USING (contig_id) \
			              INNER JOIN ${SmashDB}.assembly a USING (assembly_id) \
			WHERE read_count>1 \
			GROUP BY a.external_id ORDER BY a.external_id",

		"DROP TABLE IF EXISTS tmp_singleton_reads",
		"CREATE TABLE tmp_singleton_reads AS \
			SELECT a.external_id as assembly_id, SUM(read_count) AS 'singleton_reads', SUM(length) AS 'singleton_read_size' \
			FROM contig INNER JOIN ${SmashDB}.assembly a USING (assembly_id) \
			WHERE read_count=1 \
			GROUP BY a.external_id ORDER BY a.external_id",

		"DROP TABLE IF EXISTS tmp_contig_stats",
		"CREATE TABLE tmp_contig_stats AS \
			SELECT a.external_id as assembly_id, COUNT(*) AS contig_count, SUM(length) AS 'total_contig_length' \
			FROM contig INNER JOIN ${SmashDB}.assembly a USING (assembly_id) \
			GROUP BY a.external_id ORDER BY a.external_id",

		"DROP TABLE IF EXISTS summary_contig_stats",
		"CREATE TABLE summary_contig_stats(assembly_id VARCHAR(32) PRIMARY KEY, contig_count INTEGER NOT NULL DEFAULT 0, total_contig_length INTEGER NOT NULL DEFAULT 0, \
			contig_reads INTEGER NOT NULL DEFAULT 0, contig_read_size INTEGER NOT NULL DEFAULT 0, singleton_reads INTEGER NOT NULL DEFAULT 0, singleton_read_size INTEGER NOT NULL DEFAULT 0)",
		"INSERT INTO summary_contig_stats(assembly_id, contig_count, total_contig_length, contig_reads, contig_read_size, singleton_reads, singleton_read_size) \
			SELECT tcs.assembly_id, contig_count, total_contig_length, contig_reads, contig_read_size, singleton_reads, singleton_read_size \
			FROM tmp_contig_stats tcs LEFT JOIN tmp_contig_reads tcr USING(assembly_id) \
			                LEFT JOIN tmp_singleton_reads tsr USING (assembly_id)",

		"DROP TABLE tmp_singleton_reads",
		"DROP TABLE tmp_contig_reads",
		"DROP TABLE tmp_contig_stats"
	);
}

############
# genes
############

if ($genes == 1) {
	push(@statements, 
		"DROP TABLE IF EXISTS gene2read",
		"CREATE TABLE gene2read(gene_name VARCHAR(255), read_id VARCHAR(255), start INTEGER NOT NULL, end INTEGER NOT NULL, gene_prediction_id INTEGER, \
			PRIMARY KEY(gene_name, read_id))",
		);
	if ($engine eq "mysql") {
		push(@statements, 
			"INSERT INTO gene2read \
				SELECT external_id, read_id, IF(g.start > c.start, g.start, c.start), IF(g.end < c.end, g.end, c.end), gene_prediction_id \
				FROM contig2read c INNER JOIN gene g USING (contig_id) \
				WHERE (c.end >= g.start) AND (g.end >= c.start)",
			);
	} else {
		push(@statements, 
			"INSERT INTO gene2read \
				SELECT external_id, read_id, CASE WHEN g.start > c.start THEN g.start ELSE c.start END, CASE WHEN g.end < c.end THEN g.end ELSE c.end END, gene_prediction_id \
				FROM contig2read c INNER JOIN gene g USING (contig_id) \
				WHERE (c.end >= g.start) AND (g.end >= c.start)",
			);
	}
	push(@statements, 
		"CREATE INDEX \
			g2r_read_id \
			ON gene2read(read_id)",
		"DROP TABLE IF EXISTS gene_details",
		"CREATE TABLE gene_details(gene_prediction_id INTEGER, gene_name VARCHAR(255) PRIMARY KEY, length INTEGER, read_count INTEGER, \
			total_read_length INTEGER, coverage FLOAT(16,4))",
		"INSERT INTO gene_details(gene_prediction_id, gene_name, length, read_count, total_read_length) \
			SELECT gene.gene_prediction_id, gene_name, length, COUNT(*) AS read_count, SUM(r.end-r.start+1) AS total_read_length \
			FROM gene INNER JOIN gene2read r ON gene_name=external_id \
			GROUP BY gene_name",
		"UPDATE gene_details SET coverage=1.0*total_read_length/length",
		"CREATE INDEX \
			gd_gp_id \
			ON gene_details(gene_prediction_id)",
		"CREATE INDEX \
			gd_gp_id_gene_name \
			ON gene_details(gene_prediction_id, gene_name)",
		"DROP TABLE IF EXISTS tmp_complete_genes",
		"CREATE TABLE tmp_complete_genes AS \
			SELECT gp.external_id AS gene_prediction_id, COUNT(*) AS complete_genes \
			FROM gene g INNER JOIN ${SmashDB}.gene_prediction gp USING (gene_prediction_id) \
			WHERE (start_codon+stop_codon)=2 \
			GROUP BY gp.external_id ORDER BY gp.external_id",
		"DROP TABLE IF EXISTS tmp_partial_genes",
		"CREATE TABLE tmp_partial_genes AS \
			SELECT gp.external_id AS gene_prediction_id, COUNT(*) AS partial_genes \
			FROM gene g INNER JOIN ${SmashDB}.gene_prediction gp USING (gene_prediction_id) \
			WHERE (start_codon+stop_codon)=1 \
			GROUP BY gp.external_id ORDER BY gp.external_id",
		"DROP TABLE IF EXISTS tmp_incomplete_genes",
		"CREATE TABLE tmp_incomplete_genes AS \
			SELECT gp.external_id AS gene_prediction_id, COUNT(*) AS incomplete_genes \
			FROM gene g INNER JOIN ${SmashDB}.gene_prediction gp USING (gene_prediction_id) \
			WHERE (start_codon+stop_codon)=0 \
			GROUP BY gp.external_id ORDER BY gp.external_id",
		"DROP TABLE IF EXISTS tmp_gene_stats",
		"CREATE TABLE tmp_gene_stats AS \
			SELECT gp.external_id as gene_prediction_id, COUNT(*) AS gene_count, SUM(length) AS total_gene_length \
			FROM gene INNER JOIN ${SmashDB}.gene_prediction gp USING (gene_prediction_id) \
			GROUP BY gp.external_id ORDER BY gp.external_id",
		"DROP TABLE IF EXISTS tmp_gene_units",
		"CREATE TABLE tmp_gene_units AS \
			SELECT gp.external_id as gene_prediction_id, SUM(total_read_length) AS total_base_support, SUM(coverage) AS gene_unit_count \
			FROM gene_details INNER JOIN ${SmashDB}.gene_prediction gp USING (gene_prediction_id) \
			GROUP BY gp.external_id ORDER BY gp.external_id",
		"DROP TABLE IF EXISTS summary_gene_stats",
		"CREATE TABLE summary_gene_stats(gene_prediction_id VARCHAR(32) PRIMARY KEY, gene_count INTEGER NOT NULL, \
			total_gene_length INTEGER NOT NULL, gene_unit_count INTEGER NOT NULL, total_base_support INTEGER NOT NULL, \
			complete_genes INTEGER NOT NULL, incomplete_genes INTEGER NOT NULL, partial_genes INTEGER NOT NULL)",
		"INSERT INTO summary_gene_stats(gene_prediction_id, gene_count, total_gene_length, gene_unit_count, total_base_support, \
				complete_genes, incomplete_genes, partial_genes) \
			SELECT c.gene_prediction_id, gene_count, total_gene_length, gene_unit_count, total_base_support, complete_genes, incomplete_genes, partial_genes \
			FROM tmp_gene_stats INNER JOIN tmp_complete_genes c USING (gene_prediction_id) \
			                    INNER JOIN tmp_partial_genes    USING (gene_prediction_id) \
			                    INNER JOIN tmp_incomplete_genes USING (gene_prediction_id) \
			                    INNER JOIN tmp_gene_units       USING (gene_prediction_id) \
			GROUP BY c.gene_prediction_id ORDER BY c.gene_prediction_id",

		"DROP TABLE tmp_complete_genes",
		"DROP TABLE tmp_partial_genes",
		"DROP TABLE tmp_incomplete_genes",
		"DROP TABLE tmp_gene_stats",
		"DROP TABLE tmp_gene_units",

	);
}

$smash->execute_statements($dbh, $engine, @statements);

##############################################################
# from now on, each conditional will init statements to ()
# and then repopulate before executing all statements at the
# end of the conditional
##############################################################

################
# gene_phylomap
################

if ($gene_ref_map == 1) {
	@statements = ();
	push(@statements,
		"DROP TABLE IF EXISTS gene_refgenome_map",
		"CREATE TABLE gene_refgenome_map(gene_prediction_id INTEGER, gene_name VARCHAR(255), ref_taxonomy_id BIGINT(20), probability FLOAT(6, 4), PRIMARY KEY(gene_name, ref_taxonomy_id))",
		"INSERT INTO gene_refgenome_map \
			SELECT g.gene_prediction_id, gene_name, ref_taxonomy_id, SUM(confidence*(g.end-g.start+1))/total_read_length AS probability \
			FROM gene_details INNER JOIN gene2read g USING (gene_name) \
			                  INNER JOIN read_refgenome_map USING (read_id) \
			GROUP BY gene_name, ref_taxonomy_id",
		"CREATE INDEX \
			grm_gp_id \
			ON gene_refgenome_map(gene_prediction_id)",
		"CREATE INDEX \
			grm_gene_name \
			ON gene_refgenome_map(gene_name)",
		"CREATE INDEX \
			grm_ref_taxonomy_id \
			ON gene_refgenome_map(ref_taxonomy_id)",
	);

	$smash->execute_statements($dbh, $engine, @statements);

	# assign probabilities for unassigned portions of genes

	my $sth = $dbh->prepare("SELECT gene.gene_prediction_id, external_id, SUM(probability) AS total FROM gene_refgenome_map RIGHT JOIN gene ON gene_name=external_id GROUP BY external_id ORDER BY total");
	my $insert = $dbh->prepare("INSERT INTO gene_refgenome_map(gene_prediction_id, gene_name, ref_taxonomy_id, probability) VALUES(?,?,?,?)");
	my ($gp_id, $gene, $sum);
	$sth->execute();
	$sth->bind_columns(\$gp_id, \$gene, \$sum);
	while ($sth->fetch()) {
		if (!defined($sum)) {$sum = 0;}
		if ($sum < 0.9990) {  # the script generating the probabilities uses %.4f to print the probs. To avoid rounding errors not summing up to 1, we use this.
			$insert->execute($gp_id, $gene, -1, 1.0-$sum);
		}
	}
}

if ($og_ref_map == 1) {
	@statements = ();
	push(@statements,
		"DROP TABLE IF EXISTS species_og_map",
		"CREATE TABLE species_og_map(gene_prediction_id INTEGER, og VARCHAR(31), ref_taxonomy_id BIGINT(20), abundance FLOAT(20,6), PRIMARY KEY(gene_prediction_id, og, ref_taxonomy_id))",
		"INSERT INTO species_og_map(gene_prediction_id, og, ref_taxonomy_id, abundance) \
			SELECT gene_prediction_id, og, ref_taxonomy_id, SUM(probability*coverage) \
			FROM gene2og INNER JOIN gene_refgenome_map USING (gene_name) \
			             INNER JOIN gene_details USING (gene_prediction_id, gene_name) \
			GROUP BY gene_prediction_id, ref_taxonomy_id, og ORDER BY gene_prediction_id, ref_taxonomy_id, og",

		"DROP TABLE IF EXISTS og_species_count",
		"CREATE TABLE og_species_count(gene_prediction_id INTEGER, og VARCHAR(31), species_count INTEGER, PRIMARY KEY (gene_prediction_id, og))",
		"INSERT INTO og_species_count \
			SELECT gene_prediction_id, og, COUNT(DISTINCT ref_taxonomy_id) \
			FROM gene2og INNER JOIN gene_refgenome_map USING (gene_name) \
			GROUP BY gene_prediction_id, og ORDER BY og");
	$smash->execute_statements($dbh, $engine, @statements);
}

if ($ko_ref_map == 1) {
	@statements = ();
	push(@statements,
		"DROP TABLE IF EXISTS species_ko_map",
		"CREATE TABLE species_ko_map(gene_prediction_id INTEGER, ko VARCHAR(31), ref_taxonomy_id BIGINT(20), abundance FLOAT(20,6), PRIMARY KEY(gene_prediction_id, ko, ref_taxonomy_id))",
		"INSERT INTO species_ko_map(gene_prediction_id, ko, ref_taxonomy_id, abundance) \
			SELECT gene_prediction_id, ko, ref_taxonomy_id, SUM(probability*transferred_abundance) \
			FROM gene2kegg INNER JOIN gene_refgenome_map USING (gene_name) \
			               INNER JOIN ${RefOrganismDB}.protein2ko USING (protein) \
			GROUP BY gene_prediction_id, ko, ref_taxonomy_id ORDER BY gene_prediction_id, ko, ref_taxonomy_id",

		"DROP TABLE IF EXISTS ko_species_count",
		"CREATE TABLE ko_species_count(gene_prediction_id INTEGER, ko VARCHAR(31), species_count INTEGER, PRIMARY KEY (gene_prediction_id, ko))",
		"INSERT INTO ko_species_count \
			SELECT gene_prediction_id, ko, COUNT(DISTINCT ref_taxonomy_id) \
			FROM gene2ko INNER JOIN gene_refgenome_map USING (gene_name) \
			GROUP BY gene_prediction_id, ko ORDER BY ko");
	$smash->execute_statements($dbh, $engine, @statements);
}

########################
#       PART 2
#        hacks
########################


my $gene_top_hits   = 0;

my $similarity      = "83";
my $gene_ref_map_sim = 0;

my $ko_ref_map_sim   = 0;
my $kegg_ref_map    = 0;
my $kegg_ref_abund  = 0;
my $og_ref_map_sim   = 0;
my $og_ref_map_best = 0;


############
# top KEGG/COG hits
############

if ($gene_top_hits == 1) {
	push(@statements,
		"DROP TABLE IF EXISTS gene2og_best",
		"CREATE TABLE gene2og_best(gene_name VARCHAR(255), string_protein VARCHAR(255), string_version FLOAT(5,2), og VARCHAR(63), \
			placement_start INTEGER, placement_end INTEGER, bitscore FLOAT(16,2), \
			PRIMARY KEY(gene_name, string_version, og, placement_start, placement_end))",
		"INSERT INTO gene2og_best(gene_name, string_version, og, placement_start, placement_end, bitscore) \
			SELECT g.gene_name, g.string_version, g.og, g.placement_start, g.placement_end, g.bitscore \
			FROM gene2cog g INNER JOIN \
				(SELECT gene_name, MAX(bitscore) AS bitscore FROM gene2cog g2 GROUP BY gene_name) AS copy \
				USING (gene_name, bitscore);");
}

if ($gene_ref_map_sim == 1) {
	@statements = ();
	push(@statements,
		"DROP TABLE IF EXISTS gene_refgenome_map_$similarity",
		"CREATE TABLE gene_refgenome_map_$similarity(gene_prediction_id INTEGER, gene_name VARCHAR(255), ref_taxonomy_id BIGINT(20), probability FLOAT(6, 4), PRIMARY KEY(gene_name, ref_taxonomy_id), KEY grm_gpid(gene_prediction_id))",
		"INSERT INTO gene_refgenome_map_$similarity \
			SELECT g.gene_prediction_id, gene_name, ref_taxonomy_id, SUM(confidence*(g.end-g.start+1))/total_read_length AS probability \
			FROM gene_details INNER JOIN gene2read g USING (gene_name) \
			                  INNER JOIN read_refgenome_map_$similarity USING (read_id) \
			GROUP BY gene_name, ref_taxonomy_id",
		"CREATE INDEX \
			grm${similarity}_gp_id \
			ON gene_refgenome_map_$similarity(gene_prediction_id)",
		"CREATE INDEX \
			grm${similarity}_ref_taxonomy_id \
			ON gene_refgenome_map_$similarity(ref_taxonomy_id)",
	);

	$smash->execute_statements($dbh, $engine, @statements);

	# assign probabilities for unassigned portions of genes

	my $sth = $dbh->prepare("SELECT gene.gene_prediction_id, external_id, SUM(probability) AS total FROM gene_refgenome_map_$similarity RIGHT JOIN gene ON gene_name=external_id GROUP BY external_id ORDER BY total");
	my $insert = $dbh->prepare("INSERT INTO gene_refgenome_map_$similarity(gene_prediction_id, gene_name, ref_taxonomy_id, probability) VALUES(?,?,?,?)");
	my ($gp_id, $gene, $sum);
	$sth->execute();
	$sth->bind_columns(\$gp_id, \$gene, \$sum);
	while ($sth->fetch()) {
		if (!defined($sum)) {$sum = 0;}
		if ($sum < 0.9990) {  # the script generating the probabilities uses %.4f to print the probs. To avoid rounding errors not summing up to 1, we use this.
			$insert->execute($gp_id, $gene, -1, 1.0-$sum);
		}
	}
}

# finish the rest

########################
#       PART 3
########################

if ($kegg_ref_map == 1) {
	@statements = ();
	push(@statements,
		"DROP TABLE IF EXISTS kegg2read",
		"CREATE TABLE kegg2read(protein VARCHAR(255), read_id VARCHAR(255), start INTEGER NOT NULL, end INTEGER NOT NULL, gene_prediction_id INTEGER, \
			PRIMARY KEY(protein, read_id, start, end))",
		"INSERT INTO kegg2read \
			SELECT protein, read_id, IF(k.start > c.start, k.start, c.start), IF(k.end < c.end, k.end, c.end), gene_prediction_id \
			FROM contig2read c INNER JOIN contig2kegg k USING (contig_id) \
			WHERE (c.end >= k.start) AND (k.end >= c.start)",

		"DROP TABLE IF EXISTS kegg_details",
		"CREATE TABLE kegg_details(gene_prediction_id INTEGER, protein VARCHAR(255), \
			total_read_length INTEGER, coverage FLOAT(16,4), PRIMARY KEY(gene_prediction_id, protein))",
		"INSERT INTO kegg_details(gene_prediction_id, protein, total_read_length) \
			SELECT gene_prediction_id, protein, SUM(end-start+1) AS total_read_length \
			FROM kegg2read \
			GROUP BY gene_prediction_id, protein",
		"UPDATE kegg_details INNER JOIN ${RefOrganismDB}.kegg_details USING (protein) \
			SET coverage=total_read_length/(3*length)",

		"DROP TABLE IF EXISTS kegg_refgenome_map",
		"CREATE TABLE kegg_refgenome_map(gene_prediction_id INTEGER, protein VARCHAR(255), ref_taxonomy_id BIGINT(20), probability FLOAT(6, 4), PRIMARY KEY(gene_prediction_id, protein, ref_taxonomy_id))",
		"INSERT INTO kegg_refgenome_map \
			SELECT k.gene_prediction_id, protein, ref_taxonomy_id, SUM(confidence*(k.end-k.start+1)/total_read_length) AS probability \
			FROM kegg_details INNER JOIN kegg2read k USING (gene_prediction_id, protein) \
			                  INNER JOIN read_refgenome_map USING (read_id) \
			GROUP BY k.gene_prediction_id, protein, ref_taxonomy_id");

	$smash->execute_statements($dbh, $engine, @statements);

	# assign probabilities for unassigned portions of genes

	my $sth = $dbh->prepare("SELECT gene_prediction_id, protein, SUM(probability) AS total FROM kegg_refgenome_map RIGHT JOIN kegg_details USING (gene_prediction_id, protein) GROUP BY gene_prediction_id, protein");
	my $insert = $dbh->prepare("INSERT INTO kegg_refgenome_map(gene_prediction_id, protein, ref_taxonomy_id, probability) VALUES(?,?,?,?)");
	my ($gp_id, $gene, $sum);
	$sth->execute();
	$sth->bind_columns(\$gp_id, \$gene, \$sum);
	while ($sth->fetch()) {
		if (!defined($sum)) {$sum = 0;}
		if ($sum < 0.9990) {  # the script generating the probabilities uses %.4f to print the probs. To avoid rounding errors not summing up to 1, we use this.
			$insert->execute($gp_id, $gene, -1, 1.0-$sum);
		}
	}
}

if ($kegg_ref_abund == 1) {
	@statements = ();
	push(@statements,
		"DROP TABLE IF EXISTS kegg_abundance",
		"CREATE TABLE kegg_abundance(gene_prediction_id INTEGER, protein VARCHAR(255), ref_taxonomy_id INTEGER NOT NULL, coverage FLOAT(16,4), \
			PRIMARY KEY(gene_prediction_id, protein, ref_taxonomy_id))",
		"INSERT INTO kegg_abundance(gene_prediction_id, protein, ref_taxonomy_id, coverage) \
			SELECT gene_prediction_id, protein, ref_taxonomy_id, coverage*probability \
			FROM kegg_details INNER JOIN kegg_refgenome_map USING (gene_prediction_id, protein)");

	$smash->execute_statements($dbh, $engine, @statements);
}

# cumulative statement execution starts here

@statements = ();

if ($ko_ref_map_sim == 1) {
	push(@statements,
		"DROP TABLE IF EXISTS species_ko_map_$similarity",
		"CREATE TABLE species_ko_map_$similarity(gene_prediction_id INTEGER, ko VARCHAR(31), ref_taxonomy_id BIGINT(20), abundance FLOAT(20,6), PRIMARY KEY(gene_prediction_id, ko, ref_taxonomy_id))",
		"INSERT INTO species_ko_map_$similarity(gene_prediction_id, ko, ref_taxonomy_id, abundance) \
			SELECT gene_prediction_id, ko, ref_taxonomy_id, SUM(probability*transferred_abundance) \
			FROM gene2kegg INNER JOIN gene_refgenome_map_${similarity} USING (gene_name) \
			               INNER JOIN ${RefOrganismDB}.protein2ko USING (protein) \
			GROUP BY gene_prediction_id, ko, ref_taxonomy_id ORDER BY gene_prediction_id, ko, ref_taxonomy_id",

		"DROP TABLE IF EXISTS ko_species_count_$similarity",
		"CREATE TABLE ko_species_count_$similarity(gene_prediction_id INTEGER, ko VARCHAR(31), species_count INTEGER, PRIMARY KEY (gene_prediction_id, ko))",
		"INSERT INTO ko_species_count_$similarity \
			SELECT gene_prediction_id, ko, COUNT(DISTINCT ref_taxonomy_id) \
			FROM gene2ko INNER JOIN gene_refgenome_map_$similarity USING (gene_name) \
			GROUP BY gene_prediction_id, ko ORDER BY ko");
}

if ($og_ref_map_sim == 1) {
	push(@statements,
		"DROP TABLE IF EXISTS species_og_map_$similarity",
		"CREATE TABLE species_og_map_$similarity(gene_prediction_id INTEGER, og VARCHAR(31), ref_taxonomy_id BIGINT(20), abundance FLOAT(20,6), PRIMARY KEY(gene_prediction_id, og, ref_taxonomy_id))",
		"INSERT INTO species_og_map_$similarity(gene_prediction_id, og, ref_taxonomy_id, abundance) \
			SELECT gene_prediction_id, og, ref_taxonomy_id, SUM(probability*coverage) \
			FROM gene2og INNER JOIN gene_refgenome_map_$similarity USING (gene_name) \
			             INNER JOIN gene_details USING (gene_prediction_id, gene_name) \
			GROUP BY gene_prediction_id, ref_taxonomy_id, og ORDER BY gene_prediction_id, ref_taxonomy_id, og",

		"DROP TABLE IF EXISTS og_species_count_$similarity",
		"CREATE TABLE og_species_count_$similarity(gene_prediction_id INTEGER, og VARCHAR(31), species_count INTEGER, PRIMARY KEY (gene_prediction_id, og))",
		"INSERT INTO og_species_count_$similarity \
			SELECT gene_prediction_id, og, COUNT(DISTINCT ref_taxonomy_id) \
			FROM gene2og INNER JOIN gene_refgenome_map_$similarity USING (gene_name) \
			GROUP BY gene_prediction_id, og ORDER BY og");

}

if ($og_ref_map_best == 1) {
	push(@statements,
		"DROP TABLE IF EXISTS species_og_map_best",
		"CREATE TABLE species_og_map_best(gene_prediction_id INTEGER, og VARCHAR(31), ref_taxonomy_id BIGINT(20), abundance FLOAT(20,6), PRIMARY KEY(gene_prediction_id, og, ref_taxonomy_id))",
		"INSERT INTO species_og_map_best(gene_prediction_id, og, ref_taxonomy_id, abundance) \
			SELECT gene_prediction_id, og, ref_taxonomy_id, SUM(probability*coverage) \
			FROM gene2og_best INNER JOIN gene_refgenome_map USING (gene_name) \
			                  INNER JOIN gene_details USING (gene_prediction_id, gene_name) \
			GROUP BY gene_prediction_id, ref_taxonomy_id, og ORDER BY gene_prediction_id, ref_taxonomy_id, og",

		"DROP TABLE IF EXISTS og_species_count_best",
		"CREATE TABLE og_species_count_best(gene_prediction_id INTEGER, og VARCHAR(31), species_count INTEGER, PRIMARY KEY (gene_prediction_id, og))",
		"INSERT INTO og_species_count_best \
			SELECT gene_prediction_id, og, COUNT(DISTINCT ref_taxonomy_id) \
			FROM gene2og_best INNER JOIN gene_refgenome_map USING (gene_name) \
			GROUP BY gene_prediction_id, og ORDER BY og");
}

$smash->execute_statements($dbh, $engine, @statements);

$dbh->{AutoCommit} = 0;
$smash->finish();
print "<output>success</output>\n";

exit(0);

sub sqlite_attach_smashdb {
	my $smashdb_file = shift;
}

sub execute_statements {
	my @statements = @_;
	STATEMENT:foreach my $statement (@statements) {
		print "Executing: '$statement'\n";
		my $sth = $dbh->prepare($statement);
		my $status = $sth->execute();
		if (!$status) {
			last STATEMENT;
		}
	}
}

=head1 Name

updateStats.pl - Update static summary statistics of a metagenome collection

=head1 Synopsis

	updateStats.pl [options]

=head1 Options

=over 4

=item B<C<--collection>>

Name of collection to update in Smash database.

=item B<C<--upto>>

Stage upto which the summary statistics should be updated. Should be one of
C<reads>, C<assembly>, C<genes>, C<gene_phylomap>. Later stages update earlier
ones automatically. For example, C<--upto=genes> will also update C<reads> and
C<assembly>. Default: C<reads>.

=item B<C<--help>>

Prints this manual.

=back

=head1 Description

B<updateStats.pl> is a script to update the static summary statistics tables in the
Smash database. The following tables are created/updated: 

=over 4

=item B<read statistics>

Read statistics for each metagenome in a collection is stored in a table called
C<summary_read_stats> in the collection database. A sample table is give below.
The column names are self-explanatory.

	mysql> select * from summary_read_stats;
	+---------------+------------+----------------+-------------------+
	| metagenome_id | read_count | template_count | total_read_length |
	+---------------+------------+----------------+-------------------+
	| MC20.MG1      |      65042 |          33169 |          58657769 | 
	| MC20.MG10     |      80627 |          41852 |          60409959 | 
	| MC20.MG11     |      81346 |          42340 |          61015247 | 
	| MC20.MG12     |      80796 |          41722 |          59634778 | 
	| MC20.MG13     |      79972 |          40532 |          66427274 | 
	| MC20.MG14     |      79787 |          40770 |          62206431 | 
	| MC20.MG15     |      87324 |          44866 |          62863895 | 
	| MC20.MG16     |     116244 |          60102 |          76645221 | 
	| MC20.MG17     |     115636 |          60309 |          79210010 | 
	| MC20.MG18     |     116746 |          60009 |          78977847 | 
	| MC20.MG19     |     116891 |          60153 |          80275046 | 
	| MC20.MG2      |      74452 |          38364 |          68388715 | 
	| MC20.MG20     |     118227 |          61191 |          80798206 | 
	| MC20.MG21     |     116085 |          59956 |          80394241 | 
	| MC20.MG22     |     125260 |          64159 |          85902410 | 
	| MC20.MG23     |     113507 |          61009 |          73834052 | 
	| MC20.MG24     |     115862 |          59860 |          75063260 | 
	| MC20.MG25     |     120268 |          61913 |          79597214 | 
	| MC20.MG26     |     118423 |          62288 |          75841638 | 
	| MC20.MG27     |     129745 |          67255 |          85840438 | 
	| MC20.MG28     |     118172 |          62169 |          76435193 | 
	| MC20.MG29     |     237710 |         123637 |         156959758 | 
	| MC20.MG3      |      78123 |          40604 |          59268660 | 
	| MC20.MG30     |     224711 |         117975 |         146773573 | 
	| MC20.MG31     |     231024 |         118691 |         154694769 | 
	| MC20.MG32     |     227411 |         120393 |         150166441 | 
	| MC20.MG33     |     223746 |         118931 |         144874669 | 
	| MC20.MG34     |     230738 |         120234 |         151914677 | 
	| MC20.MG35     |     236855 |         121637 |         147485313 | 
	| MC20.MG36     |     229783 |         120094 |         144348952 | 
	| MC20.MG37     |     112592 |          60952 |          71978190 | 
	| MC20.MG4      |      80477 |          41276 |          59936562 | 
	| MC20.MG5      |      79846 |          40926 |          60796073 | 
	| MC20.MG6      |      78670 |          40262 |          60504572 | 
	| MC20.MG7      |      79773 |          40680 |          61236847 | 
	| MC20.MG8      |      79357 |          40423 |          61429904 | 
	| MC20.MG9      |      75532 |          38819 |          53289052 | 
	+---------------+------------+----------------+-------------------+
	37 rows in set (0.05 sec)
	
	mysql>

=item B<assembly statistics>

Summary information for each assembly of a metagenome is stored in 
C<summary_contig_stats>. A sample table is given below.

	mysql> select * from summary_contig_stats;
	+---------------+--------------+---------------------+--------------+------------------+-----------------+---------------------+
	| assembly_id   | contig_count | total_contig_length | contig_reads | contig_read_size | singleton_reads | singleton_read_size |
	+---------------+--------------+---------------------+--------------+------------------+-----------------+---------------------+
	| MC20.MG1.AS1  |        41831 |            46136049 |        30315 |         27669970 |           34718 |            30971466 | 
	| MC20.MG10.AS1 |        38642 |            39888261 |        51803 |         40327319 |           28252 |            20032738 | 
	| MC20.MG11.AS1 |        34389 |            38225044 |        62706 |         48122757 |           17969 |            12839000 | 
	| MC20.MG12.AS1 |        17588 |            22815676 |        68911 |         52092733 |           11452 |             7467123 | 
	| MC20.MG13.AS1 |         6791 |            10687920 |        74744 |         62518847 |            5120 |             3937452 | 
	| MC20.MG14.AS1 |        15971 |            19473697 |        69046 |         54963879 |           10324 |             7256690 | 
	| MC20.MG15.AS1 |        17802 |            23882918 |        75784 |         57127375 |           11137 |             5692700 | 
	| MC20.MG16.AS1 |        57133 |            49201030 |        72564 |         48692516 |           43644 |            27978044 | 
	| MC20.MG17.AS1 |        59564 |            53016574 |        68471 |         47681720 |           47103 |            31497994 | 
	| MC20.MG18.AS1 |        73824 |            61489938 |        58940 |         40412242 |           57795 |            38568864 | 
	| MC20.MG19.AS1 |        38297 |            36284293 |        85367 |         59245208 |           31691 |            21159488 | 
	| MC20.MG2.AS1  |        37448 |            46003405 |        46524 |         43047188 |           27947 |            25348993 | 
	| MC20.MG20.AS1 |        77082 |            63135227 |        55353 |         38598136 |           62846 |            42193539 | 
	| MC20.MG21.AS1 |        75435 |            62138255 |        54413 |         38426803 |           61669 |            41978095 | 
	| MC20.MG22.AS1 |        81876 |            67366915 |        58776 |         40971844 |           66486 |            44935604 | 
	| MC20.MG23.AS1 |        73590 |            57125037 |        52288 |         34801939 |           61151 |            39027809 | 
	| MC20.MG24.AS1 |        70331 |            57189273 |        58453 |         39260797 |           55637 |            35726906 | 
	| MC20.MG25.AS1 |        87546 |            67187941 |        46763 |         31786826 |           72738 |            47774093 | 
	| MC20.MG26.AS1 |        78155 |            59651934 |        54363 |         35523805 |           64043 |            40326207 | 
	| MC20.MG27.AS1 |        83931 |            66168175 |        59090 |         39785070 |           70637 |            46062444 | 
	| MC20.MG28.AS1 |        71160 |            57081404 |        61998 |         40928035 |           56166 |            35513832 | 
	| MC20.MG29.AS1 |       105516 |            86753488 |       152128 |        101682028 |           85700 |            55390875 | 
	| MC20.MG3.AS1  |        31096 |            35575967 |        61330 |         47762410 |           16561 |            11479788 | 
	| MC20.MG30.AS1 |        99166 |            83354756 |       144363 |         96337650 |           80256 |            50460220 | 
	| MC20.MG31.AS1 |       110201 |            93505669 |       142393 |         97416721 |           88736 |            57398279 | 
	| MC20.MG32.AS1 |       113540 |            96152661 |       136076 |         91627418 |           91405 |            58634356 | 
	| MC20.MG33.AS1 |        65088 |            63576076 |       173706 |        113678923 |           50190 |            31349836 | 
	| MC20.MG34.AS1 |        85009 |            70601731 |       161091 |        107522262 |           69752 |            44498769 | 
	| MC20.MG35.AS1 |        98656 |            79886227 |       158594 |        100554835 |           78396 |            47049152 | 
	| MC20.MG36.AS1 |       115558 |            94544463 |       139150 |         88497485 |           90695 |            55914314 | 
	| MC20.MG37.AS1 |        77225 |            57168299 |        47597 |         31189632 |           64959 |            40785693 | 
	| MC20.MG4.AS1  |        37749 |            39020190 |        57372 |         43884229 |           22788 |            16025831 | 
	| MC20.MG5.AS1  |        37793 |            41313262 |        59034 |         46549302 |           20442 |            14208551 | 
	| MC20.MG6.AS1  |        31171 |            36156855 |        60763 |         47827434 |           17634 |            12648774 | 
	| MC20.MG7.AS1  |        31685 |            34467680 |        60151 |         47280116 |           19383 |            13930014 | 
	| MC20.MG8.AS1  |        36803 |            41718743 |        57359 |         45739197 |           21669 |            15654650 | 
	| MC20.MG9.AS1  |        21092 |            24491884 |        59509 |         43125470 |           15765 |            10145504 | 
	+---------------+--------------+---------------------+--------------+------------------+-----------------+---------------------+
	37 rows in set (0.02 sec)

	mysql> 

=item B<gene statistics>

Summary information of each gene prediction performed on an assembly is 
stored in C<summary_gene_stats>. A sample table is given below.

	mysql> select * from summary_gene_stats;
	+--------------------+------------+-------------------+-----------------+--------------------+----------------+------------------+---------------+
	| gene_prediction_id | gene_count | total_gene_length | gene_unit_count | total_base_support | complete_genes | incomplete_genes | partial_genes |
	+--------------------+------------+-------------------+-----------------+--------------------+----------------+------------------+---------------+
	| MC20.MG1.AS1.GP1   |      72772 |          40379157 |           85822 |           51237217 |          18097 |             8805 |         45870 | 
	| MC20.MG10.AS1.GP1  |      64333 |          34231029 |           84941 |           52101083 |          18391 |             8634 |         37308 | 
	| MC20.MG11.AS1.GP1  |      59820 |          32520732 |           85887 |           52253651 |          19141 |             5801 |         34878 | 
	| MC20.MG12.AS1.GP1  |      33993 |          19098090 |           70069 |           51330848 |          13965 |             3057 |         16971 | 
	| MC20.MG13.AS1.GP1  |      14334 |           9139266 |           65805 |           57522650 |           6037 |             1367 |          6930 | 
	| MC20.MG14.AS1.GP1  |      29305 |          16569663 |           70063 |           53887919 |          10271 |             3161 |         15873 | 
	| MC20.MG15.AS1.GP1  |      34732 |          20621208 |           73627 |           54671325 |          14704 |             4760 |         15268 | 
	| MC20.MG16.AS1.GP1  |      84781 |          43911969 |          113732 |           69051475 |          12595 |            21376 |         50810 | 
	| MC20.MG17.AS1.GP1  |      90859 |          47536527 |          118788 |           71404211 |          14355 |            21748 |         54756 | 
	| MC20.MG18.AS1.GP1  |     107924 |          54545199 |          128435 |           70303051 |          14207 |            26730 |         66987 | 
	| MC20.MG19.AS1.GP1  |      58967 |          31472319 |           97951 |           70400490 |          11723 |            13487 |         33757 | 
	| MC20.MG2.AS1.GP1   |      69574 |          40497036 |           93109 |           60282733 |          20780 |             7231 |         41563 | 
	| MC20.MG20.AS1.GP1  |     111891 |          56129958 |          132062 |           72182848 |          14297 |            28354 |         69240 | 
	| MC20.MG21.AS1.GP1  |     108567 |          55039923 |          128680 |           71367352 |          13290 |            27720 |         67557 | 
	| MC20.MG22.AS1.GP1  |     118183 |          59534490 |          139232 |           75981247 |          14463 |            29881 |         73839 | 
	| MC20.MG23.AS1.GP1  |     103732 |          50480028 |          122942 |           65426705 |          12965 |            28693 |         62074 | 
	| MC20.MG24.AS1.GP1  |     100309 |          50493168 |          121130 |           66200186 |          13461 |            26585 |         60263 | 
	| MC20.MG25.AS1.GP1  |     122497 |          59143602 |          137798 |           70156846 |          14184 |            32850 |         75463 | 
	| MC20.MG26.AS1.GP1  |     109207 |          52212930 |          129118 |           66513549 |          13552 |            29827 |         65828 | 
	| MC20.MG27.AS1.GP1  |     119784 |          58628184 |          141178 |           76180769 |          15018 |            32439 |         72327 | 
	| MC20.MG28.AS1.GP1  |     101769 |          50444643 |          124474 |           67698782 |          13583 |            27144 |         61042 | 
	| MC20.MG29.AS1.GP1  |     152959 |          76290459 |          222362 |          138267126 |          22320 |            39573 |         91066 | 
	| MC20.MG3.AS1.GP1   |      54856 |          30431319 |           80302 |           51165305 |          17886 |             5376 |         31594 | 
	| MC20.MG30.AS1.GP1  |     147519 |          73464228 |          221961 |          130024265 |          24430 |            37737 |         85352 | 
	| MC20.MG31.AS1.GP1  |     162534 |          84337911 |          222526 |          140506645 |          22480 |            43652 |         96402 | 
	| MC20.MG32.AS1.GP1  |     167530 |          84801552 |          224612 |          133333765 |          26637 |            42768 |         98125 | 
	| MC20.MG33.AS1.GP1  |     102806 |          55998363 |          180428 |          129004863 |          21670 |            23729 |         57407 | 
	| MC20.MG34.AS1.GP1  |     122628 |          61739364 |          197248 |          133288167 |          18341 |            32302 |         71985 | 
	| MC20.MG35.AS1.GP1  |     140465 |          70256883 |          207824 |          130157523 |          20545 |            39720 |         80200 | 
	| MC20.MG36.AS1.GP1  |     166469 |          83798667 |          221893 |          128752749 |          24113 |            45463 |         96893 | 
	| MC20.MG37.AS1.GP1  |     106497 |          50418960 |          124724 |           63678384 |          12140 |            30527 |         63830 | 
	| MC20.MG4.AS1.GP1   |      63230 |          33948888 |           86245 |           52625882 |          15872 |             8558 |         38800 | 
	| MC20.MG5.AS1.GP1   |      64201 |          35787978 |           85113 |           53050553 |          17171 |             7478 |         39552 | 
	| MC20.MG6.AS1.GP1   |      55693 |          31167972 |           80945 |           52701142 |          17700 |             5596 |         32397 | 
	| MC20.MG7.AS1.GP1   |      54699 |          29863635 |           81763 |           53530516 |          15564 |             6858 |         32277 | 
	| MC20.MG8.AS1.GP1   |      63735 |          36102186 |           84619 |           53451893 |          19116 |             7021 |         37598 | 
	| MC20.MG9.AS1.GP1   |      37212 |          21090201 |           66093 |           46560906 |          13106 |             5294 |         18812 | 
	+--------------------+------------+-------------------+-----------------+--------------------+----------------+------------------+---------------+
	37 rows in set (0.03 sec)

	mysql> 

Gene-to-read overlap information is stored in C<gene2read> table. 
A sample table is given below (C<start> and C<end> are the start and end
of overlap in contig coordinates, since that is the common reference
for a gene and a read.

	mysql> select * from gene2read limit 10;
	+---------------------------------+-----------------------------------+-------+-----+--------------------+
	| gene_name                       | read_id                           | start | end | gene_prediction_id |
	+---------------------------------+-----------------------------------+-------+-----+--------------------+
	| MC20.MG31.AS1.GP1.I33.R82001.G1 | MC20.MG31.ADI0ACAA316YD16AHM2.SCF |     3 | 407 |                311 | 
	| MC20.MG31.AS1.GP1.I33.R82000.G1 | MC20.MG31.ADI0ACAA308YN06FM1.SCF  |   149 | 754 |                311 | 
	| MC20.MG31.AS1.GP1.I33.R8200.G2  | MC20.MG31.ADI0ACAA50YG12FM1.SCF   |   403 | 642 |                311 | 
	| MC20.MG31.AS1.GP1.I33.R8200.G1  | MC20.MG31.ADI0ACAA50YG12FM1.SCF   |     1 | 315 |                311 | 
	| MC20.MG31.AS1.GP1.I33.R820.G1   | MC20.MG31.ADI0ACAA10YC18FM1.SCF   |     2 | 256 |                311 | 
	| MC20.MG31.AS1.GP1.I33.R82.G1    | MC20.MG31.ADI0ACAA8YP11FM1.SCF    |     1 | 807 |                311 | 
	| MC20.MG31.AS1.GP1.I33.R81999.G1 | MC20.MG31.ADI0ACAA330YA01FM1.SCF  |     2 | 469 |                311 | 
	| MC20.MG31.AS1.GP1.I33.R81998.G1 | MC20.MG31.ADI0ACAA317YP24AHM1.SCF |     2 | 304 |                311 | 
	| MC20.MG31.AS1.GP1.I33.R81997.G3 | MC20.MG31.ADI0ACAA334YB22AHM1.SCF |   560 | 622 |                311 | 
	| MC20.MG31.AS1.GP1.I33.R81997.G2 | MC20.MG31.ADI0ACAA334YB22AHM1.SCF |   258 | 563 |                311 | 
	+---------------------------------+-----------------------------------+-------+-----+--------------------+
	10 rows in set (1.03 sec)

	mysql> 

Coverage information for genes is stored in C<gene_details>. This is important 
for measuring the quantitative abundance of genes from the gene-to-read overlap
information from C<gene2read>. A sample table is given below.

	mysql> select * from gene_details limit 10;
	+--------------------+-----------------------------+--------+------------+-------------------+----------+
	| gene_prediction_id | gene_name                   | length | read_count | total_read_length | coverage |
	+--------------------+-----------------------------+--------+------------+-------------------+----------+
	|                281 | MC20.MG1.AS1.GP1.I1.C1.G1   |   1179 |          3 |              2038 |   1.7286 | 
	|                281 | MC20.MG1.AS1.GP1.I1.C1.G2   |   1416 |          5 |              3056 |   2.1582 | 
	|                281 | MC20.MG1.AS1.GP1.I1.C1.G3   |   1140 |          7 |              4127 |   3.6202 | 
	|                281 | MC20.MG1.AS1.GP1.I1.C1.G4   |   1014 |          9 |              3734 |   3.6824 | 
	|                281 | MC20.MG1.AS1.GP1.I1.C1.G5   |    567 |          5 |              1394 |   2.4586 | 
	|                281 | MC20.MG1.AS1.GP1.I1.C1.G6   |    870 |          3 |              1687 |   1.9391 | 
	|                281 | MC20.MG1.AS1.GP1.I1.C1.G7   |   4272 |         14 |             11621 |   2.7203 | 
	|                281 | MC20.MG1.AS1.GP1.I1.C1.G8   |   4059 |         17 |             13325 |   3.2828 | 
	|                281 | MC20.MG1.AS1.GP1.I1.C10.G1  |    528 |          3 |               910 |   1.7235 | 
	|                281 | MC20.MG1.AS1.GP1.I1.C10.G10 |   1251 |          7 |              4922 |   3.9345 | 
	+--------------------+-----------------------------+--------+------------+-------------------+----------+
	10 rows in set (0.02 sec)

	mysql> 

=item B<gene_phylomap>

Phylogenetic mapping of reads from C<read_refgenome_map> will be transferred to
genes and stored in C<gene_refgenome_map>. A sample table is given below.

	mysql> select * from gene_refgenome_map limit 10;
	+--------------------+-----------------------------+-----------------+-------------+
	| gene_prediction_id | gene_name                   | ref_taxonomy_id | probability |
	+--------------------+-----------------------------+-----------------+-------------+
	|                281 | MC20.MG1.AS1.GP1.I1.C1.G1   |            2172 |      1.0000 | 
	|                281 | MC20.MG1.AS1.GP1.I1.C1.G2   |            2172 |      1.0000 | 
	|                281 | MC20.MG1.AS1.GP1.I1.C1.G3   |            2172 |      1.0000 | 
	|                281 | MC20.MG1.AS1.GP1.I1.C1.G4   |            2172 |      1.0000 | 
	|                281 | MC20.MG1.AS1.GP1.I1.C1.G5   |            2172 |      1.0000 | 
	|                281 | MC20.MG1.AS1.GP1.I1.C1.G6   |            2172 |      1.0000 | 
	|                281 | MC20.MG1.AS1.GP1.I1.C1.G7   |            2172 |      1.0000 | 
	|                281 | MC20.MG1.AS1.GP1.I1.C1.G8   |            2172 |      1.0000 | 
	|                281 | MC20.MG1.AS1.GP1.I1.C10.G1  |            2172 |      1.0000 | 
	|                281 | MC20.MG1.AS1.GP1.I1.C10.G10 |            2172 |      1.0000 | 
	+--------------------+-----------------------------+-----------------+-------------+
	10 rows in set (0.35 sec)

	mysql> 

=item B<og_ref_map>

Phylogenetic mapping of genes from C<gene_refgenome_map> will be transferred to
orthologous groups and stored in C<og_ref_map>. A sample table is given below.

	mysql> select * from species_og_map order by og limit 10;
	+--------------------+---------+-----------------+-----------+
	| gene_prediction_id | og      | ref_taxonomy_id | abundance |
	+--------------------+---------+-----------------+-----------+
	|                281 | COG0001 |              -1 |  7.895100 | 
	|                281 | COG0001 |            1263 |  1.000000 | 
	|                289 | COG0001 |              -1 |  2.000000 | 
	|                282 | COG0001 |              -1 |  6.080613 | 
	|                282 | COG0001 |            1485 |  1.074100 | 
	|                283 | COG0001 |              -1 |  3.002000 | 
	|                281 | COG0001 |            1730 |  1.621500 | 
	|                285 | COG0001 |             561 |  1.324300 | 
	|                282 | COG0001 |            1730 |  2.000000 | 
	|                287 | COG0001 |            1263 |  1.000000 | 
	+--------------------+---------+-----------------+-----------+
	10 rows in set (1.16 sec)

	mysql> 

=back

=cut
