package Bio::EnsEMBL::Compara::Production::EPOanchors::HMMer::HMMsearch;

use strict;
use Data::Dumper;
use Bio::EnsEMBL::Registry;
use Bio::EnsEMBL::Utils::Exception qw(throw);
use File::Basename;

use base ('Bio::EnsEMBL::Compara::RunnableDB::BaseRunnable');

sub fetch_input {
	my ($self) = @_;
}

sub run {
	my ($self) = @_;

	my $self_dba = $self->compara_dba;

	my $dnafrag_adaptor = $self_dba->get_adaptor("DnaFrag");
	my $gab_adaptor = $self_dba->get_adaptor("GenomicAlignBlock");
	my $genome_db_adaptor = $self_dba->get_adaptor("GenomeDB");
	my ($gab_id) = $self->param('gab_id');
	my $self_gab_adaptor = $self_dba->get_adaptor("GenomicAlignBlock");
	my @hits = ();
	my $gab = $self_gab_adaptor->fetch_by_dbID($gab_id);
	my $stk_file = "/tmp/" . $ENV{USER} . "_$gab_id.stk";
	my $hmm_file = "/tmp/" . $ENV{USER} . "_$gab_id.hmm";

	$self_dba->dbc->disconnect_when_inactive(1); 

	open(IN, ">$stk_file") or throw("can not open stockholm file $stk_file for writing");
	print IN "# STOCKHOLM 1.0\n";
	foreach my $genomic_align( @{$gab->get_all_GenomicAligns} ){
		my $aligned_seq = $genomic_align->aligned_sequence;
		next if($aligned_seq=~/^[N-]+[N-]$/);
		$aligned_seq=~s/\./-/g;
		print IN $gab_id, "\/", $genomic_align->dnafrag_start, ":", $genomic_align->dnafrag_end,
			"\/", $genomic_align->dnafrag->genome_db->name, "\t",
			$aligned_seq, "\n"; 
	}
	print IN "//";
	close(IN);

	my $genome_seq_file = $self->param('target_genome')->{"genome_seq"};
	#Copy genome_seq to local disk only if md5sum parameter is set. 
	if ($self->param('md5sum')) {
	    #Copy genome_seq to local disk if it doesn't already exist
	    my $name = basename($self->param('target_genome')->{"genome_seq"});
	    my $tmp_file = "/tmp/" . $ENV{USER} . "_" . $self->param('target_genome')->{name} . "_" . $name;
	    
	    if (-e $tmp_file) {
		print "$tmp_file already exists\n";
		$genome_seq_file = $tmp_file;
	    } else {
		my $start_time = time;
		print "Need to copy file\n";
		my $rsync_cmd = "rsync $genome_seq_file $tmp_file";
		print "$rsync_cmd\n";

		system($rsync_cmd) == 0 or die "system $rsync_cmd failed:$?";

		print "Time to rsync " . (time - $start_time) . "\n";
		my $rsync_time = time;

		#Check md5sum
		my $md5sum = `md5sum $tmp_file`;
		if ($md5sum == $self->param('md5sum')) {
		    $genome_seq_file = $tmp_file;
		} else {
		    print "md5sum failed. Use $genome_seq_file\n";
		}
		print "Time to md5sum " . (time - $rsync_time) . "\n";
		print "Total time" . (time - $start_time) . "\n";
	    }
	}

	my $hmm_build_command = $self->param('hmmbuild') . " $hmm_file $stk_file";  
	print $hmm_build_command, " **\n";
	system($hmm_build_command);

	unlink($stk_file);
	
	return unless(-e $hmm_file); # The sequences in the alignment are probably too short
	my $hmm_len = `egrep "^LENG  " $hmm_file`;
	chomp($hmm_len);
	$hmm_len=~s/^LENG  //;
#	my $nhmmer_command = $self->param('nhmmer') . " --cpu 1 --noali" ." $hmm_file " . $self->param('target_genome')->{"genome_seq"};

	my $nhmmer_command = $self->param('nhmmer') . " --cpu 1 --noali" ." $hmm_file $genome_seq_file";
	print $nhmmer_command, " **\n";
	my $nhmm_fh;
	open( $nhmm_fh, "$nhmmer_command |" ) or throw("Error opening nhmmer command: $? $!"); 
	{ local $/ = ">>";
		while(my $mapping = <$nhmm_fh>){
			next unless $mapping=~/!/;
			push(@hits, [$gab_id, $mapping]);
		}
	}

	$self_dba->dbc->disconnect_when_inactive(0); 

	my @anchor_align_records;
	foreach my $hit(@hits){
		my $mapping_id = $hit->[0];
		my($target_info, $mapping_info) = (split("\n", $hit->[1]))[0,3];
		my($coor_sys, $species, $seq_region_name) = (split(":", $target_info))[0,1,2];
		my($score,$bias, $evalue, $hmm_from, $hmm_to, $alifrom, $alito) = (split(/ +/, $mapping_info))[2,3,4,5,6,8,9];
		my $strand = $alifrom > $alito ? "-1" : "1";
		($alifrom, $alito) = ($alito, $alifrom) if $strand eq "-1";
		my $taregt_genome_db = $genome_db_adaptor->fetch_by_registry_name($self->param('target_genome')->{"name"});
		my $dnafrag = $dnafrag_adaptor->fetch_by_GenomeDB_and_name($taregt_genome_db, $seq_region_name);
		next unless($dnafrag);	
		push( @anchor_align_records, [ $self->param('mlssid_of_alignments'), $mapping_id, $dnafrag->dbID, $alifrom, $alito,
						$strand, $score, $hmm_from, $hmm_to, $evalue, $hmm_len ] );  
	}	
	unlink("$stk_file");
	$self->param('mapping_hits', \@anchor_align_records) if scalar( @anchor_align_records );
	unlink($hmm_file);
}

sub write_output{
	my ($self) = @_;
	my $self_anchor_align_adaptor = $self->compara_dba->get_adaptor("AnchorAlign");
	$self_anchor_align_adaptor->store_mapping_hits( $self->param('mapping_hits') ) if $self->param('mapping_hits');
}

1;

