diff variant_effect_predictor/Bio/Tools/BPlite/Iteration.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/Tools/BPlite/Iteration.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,394 @@
+# $Id: Iteration.pm,v 1.15 2002/06/19 00:27:49 jason Exp $
+# Bioperl module Bio::Tools::BPlite::Iteration
+#	based closely on the Bio::Tools::BPlite modules
+#	Ian Korf (ikorf@sapiens.wustl.edu, http://sapiens.wustl.edu/~ikorf), 
+#	Lorenz Pollak (lorenz@ist.org, bioperl port)
+#
+# Copyright Peter Schattner
+#
+# You may distribute this module under the same terms as perl itself
+# _history
+# October 20, 2000
+# POD documentation - main docs before the code
+#
+# Added to get a simple_align object for a psiblast run with the -m 6 flag /AE
+# 
+
+=head1 NAME
+
+Bio::Tools::BPlite::Iteration - object for parsing single iteration
+of a PSIBLAST report
+
+=head1 SYNOPSIS
+
+   use Bio::Tools:: BPpsilite;
+
+   open FH, "t/psiblastreport.out";
+   $report = Bio::Tools::BPpsilite->new(-fh=>\*FH);
+
+   # determine number of iterations executed by psiblast
+   $total_iterations = $report->number_of_iterations;
+   $last_iteration = $report->round($total_iterations);
+
+   # Process only hits found in last iteration ...
+   $oldhitarray_ref = $last_iteration->oldhits;
+   HIT: while($sbjct = $last_iteration->nextSbjct) {
+       $id = $sbjct->name;
+       $is_old =  grep  /\Q$id\E/, @$oldhitarray_ref;
+       if ($is_old ){next HIT;}
+   #  do something with new hit...
+   }
+
+=head2 ALIGNMENTS
+
+  # This assumed that you have $db pointing to a database, $out to an output file
+  # $slxdir to a directory and $psiout    
+  # note the alignments can only be obtained if the flag "-m 6" is run.
+  # It might also be necessary to use the flag -v to get all alignments
+  # 
+    my @psiparams = ('database' => $db , 'output' => $out, 'j' => 3, 'm' => 6,
+		     'h' => 1.e-3 , 'F' => 'T' , 'Q' => $psiout ); 
+    my $factory = Bio::Tools::Run::StandAloneBlast->new(@psiparams);
+    my $report = $factory->blastpgp($seq);
+    my $total_iterations = $report->number_of_iterations();
+    my $last_iteration = $report->round($total_iterations);
+    my $align=$last_iteration->Align;
+    my $slxfile=$slxdir.$id.".slx";
+    my $slx = Bio::AlignIO->new('-format' => 'selex','-file' => ">".$slxfile );
+    $slx->write_aln($align);
+
+=head1 DESCRIPTION
+
+See the documentation for BPpsilite.pm for a description of the
+Iteration.pm module.
+
+=head1 AUTHORS - Peter Schattner
+
+Email: schattner@alum.mit.edu
+
+=head1 CONTRIBUTORS
+
+Jason Stajich, jason@cgt.mc.duke.edu
+
+=head1 ACKNOWLEDGEMENTS
+
+Based on work of:
+Ian Korf (ikorf@sapiens.wustl.edu, http://sapiens.wustl.edu/~ikorf), 
+Lorenz Pollak (lorenz@ist.org, bioperl port)
+
+=head1 COPYRIGHT
+
+BPlite.pm is copyright (C) 1999 by Ian Korf. 
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=cut
+
+package Bio::Tools::BPlite::Iteration;
+
+use strict;
+use vars qw(@ISA);
+use Bio::Root::Root; # root object to inherit from
+use Bio::Tools::BPlite; #
+use Bio::Tools::BPlite::Sbjct;
+
+@ISA = qw(Bio::Root::Root);
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    ($self->{'PARENT'},$self->{'ROUND'}) =
+	$self->_rearrange([qw(PARENT
+			      ROUND
+			      )],@args);
+    
+    $self->{'QUERY'} = $self->{'PARENT'}->{'QUERY'};
+    $self->{'LENGTH'} = $self->{'PARENT'}->{'LENGTH'};
+
+    if($self->_parseHeader) {$self->{'REPORT_DONE'} = 0} # there are alignments
+    else                    {$self->{'REPORT_DONE'} = 1} # empty report
+  
+    return $self; # success - we hope!
+}
+
+=head2 query
+
+ Title    : query
+ Usage    : $query = $obj->query();
+ Function : returns the query object
+ Example  :
+ Returns  : query object
+ Args     :
+
+=cut
+
+sub query    {shift->{'QUERY'}}
+
+=head2 qlength
+
+ Title    : qlength
+ Usage    : $len = $obj->qlength();
+ Returns  : length of query
+ Args     : none
+
+=cut
+
+sub qlength  {shift->{'LENGTH'}}
+
+=head2 newhits
+
+ Title    :  newhits
+ Usage    : $newhits = $obj->newhits();
+ Returns  : reference to an array listing all the hits 
+            from the current iteration which were not identified 
+            in the previous iteration
+ Args     : none
+
+=cut
+
+sub  newhits  {shift->{'NEWHITS'}}
+
+=head2 oldhits
+
+ Title    :  oldhits
+ Usage    : $oldhits = $obj->oldhits();
+ Returns  : reference to an array listing all the hits from 
+            the current iteration which were identified and 
+            above threshold in the previous iteration
+ Args     : none
+
+=cut
+
+sub  oldhits  {shift->{'OLDHITS'}}
+
+
+=head2 nextSbjct
+
+ Title    : nextSbjct
+ Usage    : $sbjct = $obj->nextSbjct();
+ Function : Method of iterating through all the Sbjct retrieved
+            from parsing the report 
+#Example  : while ( my $sbjct = $obj->nextSbjct ) {}
+ Returns  : next Sbjct object or undef if finished
+ Args     :
+
+=cut
+
+sub nextSbjct {
+  my ($self) = @_;
+  $self->_fastForward or return undef;
+ 
+  #######################
+  # get all sbjct lines #
+  #######################
+  my $def = $self->_readline();
+
+  while(defined ($_ = $self->_readline) )  {
+    if    ($_ !~ /\w/)            {next}
+    elsif ($_ =~ /Strand HSP/)    {next} # WU-BLAST non-data
+    elsif ($_ =~ /^\s{0,2}Score/) {$self->_pushback( $_); last}
+    elsif ($_ =~ /^(\d+) .* \d+$/) {   # This is not correct at all
+       $self->_pushback($_);           # 1: HSP does not work for -m 6 flag
+       $def = $1;                      # 2: length/name are incorrect     
+       my $length = undef;	       # 3: Names are repeated many times.
+       my $sbjct = new Bio::Tools::BPlite::Sbjct('-name'=>$def,
+						 '-length'=>$length,
+						 '-parent'=>$self);
+       return $sbjct;
+   } # m-6 
+    elsif ($_ =~ /^Parameters|^\s+Database:|^\s+Posted date:/) {
+	$self->_pushback( $_); 
+	last;
+    } else {$def .= $_}
+}
+  $def =~ s/\s+/ /g;
+  $def =~ s/\s+$//g;
+  $def =~ s/Length = ([\d,]+)$//g;
+  my $length = $1;
+  return 0 unless $def =~ /^>/;
+  $def =~ s/^>//;
+  
+  ####################
+  # the Sbjct object #
+  ####################
+  my $sbjct = new Bio::Tools::BPlite::Sbjct('-name'=>$def,
+					    '-length'=>$length,
+					    '-parent'=>$self);
+  return $sbjct;
+}
+
+
+# This is added by /AE
+
+=head2 Align
+
+ Title    : Align
+ Usage    : $SimpleAlign = $obj->Align();
+ Function : Method to obtain a simpleAlign object from psiblast
+ Example  : $SimpleAlign = $obj->Align();
+ Returns  : SimpleAlign object or undef if not found.
+ BUG      : Only works if psiblast has been run with m 6 flag
+ Args     :
+
+=cut
+
+sub Align {
+    use Bio::SimpleAlign;
+    my ($self) = @_;
+    $self->_fastForward or return undef;
+    my $lastline = $self->_readline();
+    return undef unless $lastline =~ /^QUERY/; # If psiblast not run correctly
+    my (%sequence,%first,%last,$num);
+
+    if ( $lastline =~ /^QUERY\s+(\d*)\s*([-\w]+)\s*(\d*)\s*$/){
+	my $name='QUERY';
+	my $start=$1; 
+	my $seq=$2; 
+	my $stop=$3; 
+	$seq =~ s/-/\./g; 
+	$start =~ s/ //g; 
+	$stop =~ s/ //g; 
+	$sequence{$name} .= $seq; 
+	if ($first{$name} eq ''){$first{$name}=$start;} 
+	if ($stop ne ''){$last{$name}=$stop;} 
+#     print "FOUND:\t$seq\t$start\t$stop\n"; 
+	$num=0;
+    } 
+    while(defined($_ = $self->_readline()) ){
+	chomp($_);
+	if ( $_ =~ /^QUERY\s+(\d+)\s*([\-A-Z]+)\s*(\+)\s*$/){
+	    my $name='QUERY';
+	    my $start=$1; 
+	    my $seq=$2; 
+	    my $stop=$3; 
+	    $seq   =~ s/-/\./g; 
+	    $start =~ s/ //g; 
+	    $stop  =~ s/ //g; 
+	    $sequence{$name} .= $seq; 
+	    if ($first{$name} eq '') { $first{$name} = $start;} 
+	    if ($stop ne '') { $last{$name}=$stop;} 
+	    $num=0;
+	} elsif ( $_ =~ /^(\d+)\s+(\d+)\s*([\-A-Z]+)\s*(\d+)\s*$/ ){
+	    my $name=$1.".".$num;
+	    my $start=$2;
+	    my $seq=$3;
+	    my $stop=$4;
+	    $seq =~ s/-/\./g;
+	    $start =~ s/ //g;
+	    $stop =~ s/ //g;
+	    $sequence{$name} .= $seq;
+	    if ($first{$name} eq ''){$first{$name}=$start;}
+	    if ($stop ne ''){$last{$name}=$stop;}
+	    $num++;
+	} 
+    } 
+    my $align = new Bio::SimpleAlign();
+    my @keys=sort keys(%sequence);
+    foreach my $name (@keys){
+	my $nse = $name."/".$first{$name}."-".$last{$name};
+	my $seqobj = Bio::LocatableSeq->new( -seq => $sequence{$name},
+					     -id  => $name,
+					     -name  => $nse,
+					     -start  => $first{$name},
+					     -end  => $last{$name}
+					     );
+
+	$align->add_seq($seqobj);
+    }
+    return $align;
+}
+
+# Start of internal subroutines.
+
+sub _parseHeader {
+  my ($self) = @_;
+  my (@old_hits, @new_hits);
+
+  my $newhits_true = ($self->{'ROUND'} < 2) ? 1  : 0 ;
+  while(defined($_ = $self->_readline()) ) {
+    if ($_ =~ /(\w\w|.*|\w+.*)\s\s+(\d+)\s+([-\.e\d]+)$/)    {
+	my $id = $1;
+	my $score= $2;	#not used currently
+	my $evalue= $3; 	#not used currently
+    	if ($newhits_true) { push ( @new_hits, $id);}
+    	else { push (@old_hits, $id);}
+    }
+    elsif ($_ =~ /^Sequences not found previously/)  {$newhits_true = 1 ;}
+# This is changed for "-m 6" option /AE
+    elsif ($_ =~ /^>/ || $_ =~ /^QUERY/)
+    {
+	$self->_pushback($_);
+	$self->{'OLDHITS'} = \@old_hits;
+	$self->{'NEWHITS'} = \@new_hits;
+	return 1;
+    }
+    elsif ($_ =~ /^Parameters|^\s+Database:|^\s*Results from round\s+(d+)/) {
+      	$self->_pushback($_);
+      	return 0; #  no sequences found in this iteration
+    }
+  }
+  return 0; # no sequences found in this iteration
+}
+
+sub _fastForward {
+  my ($self) = @_;
+  return 0 if $self->{'REPORT_DONE'}; # empty report
+
+  while(defined($_ = $self->_readline()) ) {
+      if( $_ =~ /^>/ ||
+	  $_ =~ /^QUERY|^\d+ .* \d+$/ ) { # Changed to also handle "-m 6" /AE
+	  $self->_pushback($_);
+	  return 1;
+      }
+#    print "FASTFORWARD",$_,"\n";
+      if ($_ =~ /^>|^Parameters|^\s+Database:/) {
+	  $self->_pushback($_);
+	  return 1;
+      }
+  }
+  $self->warn("Possible error (2) while parsing BLAST report!");
+}
+
+
+=head2 _readline
+
+ Title   : _readline
+ Usage   : $obj->_readline
+ Function: Reads a line of input.
+
+           Note that this method implicitely uses the value of $/ that is
+           in effect when called.
+
+           Note also that the current implementation does not handle pushed
+           back input correctly unless the pushed back input ends with the
+           value of $/.
+ Example :
+ Returns : 
+
+=cut
+
+sub _readline{
+   my ($self) = @_;
+   return $self->{'PARENT'}->_readline();
+}
+
+=head2 _pushback
+
+ Title   : _pushback
+ Usage   : $obj->_pushback($newvalue)
+ Function: puts a line previously read with _readline back into a buffer
+ Example :
+ Returns :
+ Args    : newvalue
+
+=cut
+
+sub _pushback {
+   my ($self, $arg) = @_;   
+   return $self->{'PARENT'}->_pushback($arg);    
+}
+1;
+__END__