#! /usr/bin/env perl

use strict;
use warnings;
use Pod::Usage;
use Smash::Core;
use Smash::Global qw($SMASH_SCRIPT_NAME $SMASH_SCRIPT_LOCATION);
use Smash::CommandLineParser qw(parse_options check_required_options);
use File::Temp;

##############
# Set up command line parsing
##############

my @allowed  = qw(genepred=s string=s help); # arguments I expect
my @required = qw(genepred string);            # arguments I require

##############
# Parse command line options
##############

my $status;
my $missing;
my %options;

($status, %options) = parse_options(\@allowed);
if ($options{help}) {
	pod2usage(-exitstatus => 0, -verbose => 2);
}
if ($status != 1) {
	pod2usage(-message => "", -exitstatus => 2, -verbose => 1);
}
#print_options(%options);
($status, $missing) = check_required_options(\@required, %options);
if ($status != 1) {
	pod2usage(-message => "$SMASH_SCRIPT_NAME: Missing argument --$missing\n", -exitstatus => 2, -verbose => 1);
}

my $genepred     = $options{genepred};

my $string       = lc($options{string});
if ($string !~ /string/) {
	pod2usage(-message => "$SMASH_SCRIPT_NAME: invalid argument to --string.\n", -exitstatus => 2, -verbose => 1);
}
$string =~ s/string//;

##############
# do the mapping
##############

my $smash        = new Smash::Core(GENEPRED => $genepred);
   $smash->init();
my $dbh          = $smash->get_db_handle;
my $insert_sth   = $dbh->prepare("INSERT INTO \
					gene2og(gene_name, string_protein, string_version, og, placement_start, placement_end, bitscore) \
					VALUES( ?,         ?,              ?,              ?,  ?,               ?,             ?)");

my $genepred_dir = $smash->genepred_dir($genepred);
my $og_map_file  = "$genepred_dir/$genepred.eggnogmapping.txt";

open(OGMAP, "<$og_map_file") || die "Cannot open file $og_map_file: $!";
LINE: while (<OGMAP>) {
	chomp();
	next LINE if m/^\s*#/ or m/^\s*$/;
	my ($gene_name, $string_protein, $og, $placement_start, $placement_end, $bitscore, $string_protein_length) = split(/\t/);
	$insert_sth->execute($gene_name, $string_protein, $string, $og, $placement_start, $placement_end, $bitscore);
}
close(OGMAP);

$insert_sth->finish();
$dbh->commit();
$smash->finish();

exit(0);

=head1 loadOrthologMapping.pl

Script to parse OG mappings and load them into Smash database.

=head1 Synopsis

	loadOrthologMapping.pl [options]

=head1 Options

=over 4

=item B<C<--genepred>>

Name of geneprediction whose mappings should be loaded.

=item B<C<--string>>

Version of the string database used in the mapping.

=item B<C<--help>>

Prints this manual.

=back

=head1 Description

B<loadOrthologMapping.pl> is a wrapper script that loads the
OG mappings of predicted proteins to the database.  It is run as:

	loadOrthologMapping.pl --genepred=MC20.MG10.AS2.GP1 --string=string8

=head1 Required files

B<loadOrthologMapping.pl> requires the following 
files:

=over 4

=item ogmapping file from L<doOrthologMapping.pl|doOrthologMapping>

File containing results from OG mapping of the predicted proteins against 
STRING orthologous groups from the given version.

This file is expected to be in the gene prediction directory. To see the
location of this directory for a given gene prediction, run:

	perl showLocations.pl --item=MC10.MG23.AS1.GP2

The file F<MC10.MG23.AS1.GP2.eggnogmapping.txt> should exist in that location.

=back

=cut
