#!/usr/bin/env perl

use strict;
use warnings;
use Getopt::Long;
use POSIX;
use Math::Round;

use Smash::Utils::Plot qw(:all);
use Smash::Utils::SVG qw(:all);
use Smash::Utils::Taxonomy qw(:all);
use Smash::Utils::MatrixIO qw(:all);
use Smash::Core;

my $opt_bits     = 60;
my $opt_exp      = 0.1;
my $opt_salnlen  = 100;
my $opt_sim      = 70;
my $opt_colors;
my $opt_flavor;
my $tax_id;
my $collection;
my $opt_help;

my $usage = "
$0 - Parse tabular BLAST report of metagenomic read vs reference-genome blastn and 
generate fragment recruitment plot. Alignments can be filtered by bit score, E-value, 
aligned subject length or percent similarity. 

Usage: $0 [options] <blast-file> 

Program Options:
 --flavor=<blast>      BLAST flavor used (WU or NCBI, required)
 --taxid=<num>         tax id of reference genome to draw the plot for (required)
 --collection=<name>   metagenome collection to map to the reference genome (required)
 --bits=<num>          bit threshold for results to be included (default: $opt_bits)
 --exp=<num>           E-value threshold for results to be included (default: $opt_exp)
 --sim=<num>           percent similarity threshold for hsp to be included in frag recruitment plot (default: $opt_sim)
 --minslen=<num>       minimum number of subject bases for hsp to be included (default: $opt_salnlen)
 --colors=<file>       tab-delimited file containing color definitions to differentiate reads from diff. samples
";

GetOptions(
	"flavor=s"   => \$opt_flavor,
	"colors=s"   => \$opt_colors,
	"bits=n"     => \$opt_bits,
	"exp=f"      => \$opt_exp,
	"minslen=n"  => \$opt_salnlen,
	"sim=n"      => \$opt_sim,
	"taxid=n"    => \$tax_id,
	"collection=s"=> \$collection,
	"help"       => \$opt_help);

my ($blast_file) = @ARGV;

if ($opt_help || !$tax_id ||!$collection || !$opt_flavor || !$blast_file) {
	die $usage;
}

# Choose what you want to plot:

my $PROGRESS = \*STDERR;
$PROGRESS->autoflush(1);

my $smash = new Smash::Core(COLLECTION => $collection); 
$smash->init();

# Get all contigs for that reference genome

my @seqnames;
my @sequences;
my @lengths;
my $organism;
my $dbh = $smash->get_refgenomedb_handle();
{
	my $sth = $dbh->prepare("SELECT organism, sequence_id, length, definition FROM sequence INNER JOIN taxonomy USING (taxonomy_id) WHERE taxonomy_id=? ORDER BY length DESC");
	$sth->execute($tax_id);
	while (my ($x, $seq_id, $length, $seqname) = $sth->fetchrow_array()) {
		$organism = $x;
		push(@sequences, $seq_id);
		push(@lengths, $length);
		push(@seqnames, $seqname);
	}
	$sth->finish();
}
$dbh->disconnect();

# Cannot handle multiple sequences yet
if (@sequences > 1) {
	die sprintf "Multiple sequences for %s(%d)", $organism, $tax_id;
}

# set up figure scales

my $genome_scale_down = 300;
my $contig_scale_down = 300;
my $percent_scale_down = 0.15;
my $zoom_start = undef; #1480000;
my $zoom_end   = undef; #1520000;

# draw fragment recruitment

print $PROGRESS "Drawing fragment recruitement ...";

my $FH;
open($FH, ">$tax_id.svg") || die "Cannot open $tax_id.svg: $!";
open_svg($FH);

# read in color definitions and draw color key

my $Gradient = {};
if ($opt_colors && -f $opt_colors) {
	my $ColorCount = {};
	my $ColorDefs = read_two_column_hash($opt_colors);
	print $FH "<g id=\"fragment_recruitment_key\" transform=\"translate(-200,-200)\">\n";
	my @metagenomes = $smash->get_metagenomes_for_collection($collection);
	my $count = 0;
	foreach my $metagenome (@metagenomes) {
		my $label = $smash->get_metagenome_label($metagenome);
		my $color = get_color_for_label($label, $ColorDefs);
		my $idx  = $ColorCount->{$color} || 0;
		$ColorCount->{$color}++;
		$color = blur_rgb_color($color, $idx);
		$Gradient->{$metagenome} = $color;
		draw_rect($FH, 0, 20*$count, 20, 20, "stroke:$color; fill:$color;");
		draw_text($FH, 20, 20*($count+1)-3, $label, ('font-size'=>15));
		$count++;
	}
	print $FH "</g>\n";
}

for (my $i=0; $i <= $#sequences; $i++) {
	my %params = (OUT_FILE_HANDLE => $FH,
			BLAST_FILE => $blast_file,
			BLAST_FLAVOR => $opt_flavor,
			SIMILARITY => $opt_sim,
			BITS => $opt_bits,
			E => 0.1,
			GENOME_NAME => $sequences[$i],
			GENOME_SIZE => $lengths[$i],
			GENOME_LABEL => $seqnames[$i],
			START_X => $zoom_start,
			END_X => $zoom_end,
			MINALIGNSLEN => $opt_salnlen,
			DISPLAY_LABEL => 1,
			DISPLAY_TICS => 0,
			COLORS => $Gradient,
			X_SCALE_DOWN => $genome_scale_down,
			Y_SCALE_DOWN => $percent_scale_down);
	print $FH "<g id=\"fragment_recruitment\" transform=\"translate(0,0)\">\n";
	draw_fragment_recruitment(%params);
	print $FH "</g>\n";
}
print $PROGRESS " done\n";

close_svg($FH);
close($FH);

$smash->finish();
exit(0);

sub max {
	my ($a, $b) = @_;
	[$a => $b]->[$a <= $b];
}

sub min {
	my ($a, $b) = @_;
	[$b => $a]->[$a <= $b];
}

sub blur_rgb_color {
	my $color    = shift;
	my $distance = shift;
	my ($r, $g, $b) = map {sprintf("%02X", min(max(hex($_)+16*$distance, 0), 255))} $color =~ /(\w{2})/g;
	return "#$r$g$b";
}

####
# print the colors
# if there's a color, that will be used. default will be red.
# colors will be matched by looking for a prefix of the graph label that matches with one of the available color labels
# e.g., AM-AD-1 will match A, AM, AM-, AM-A, AM-AD, AM-AD-, AM-AD-1. If more than one match is present, the longest will be used!
####

sub get_color_for_label {
	my $label  = shift;
	my $colors = shift;
	my @colored_prefixes = sort {length($b) <=> length($a)} keys %$colors;
	foreach my $available_color (@colored_prefixes) {
		if ($available_color eq substr($label, 0, length($available_color))) {
			return $colors->{$available_color};
		}
	}
	return "#000000";
}
