diff variant_effect_predictor/Bio/Tools/BPlite/Sbjct.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/Tools/BPlite/Sbjct.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,361 @@
+# $Id: Sbjct.pm,v 1.23.2.1 2003/02/20 00:39:03 jason Exp $
+###############################################################################
+# Bio::Tools::BPlite::Sbjct
+###############################################################################
+#
+# The original BPlite.pm module has been written by Ian Korf !
+# see http://sapiens.wustl.edu/~ikorf
+#
+# You may distribute this module under the same terms as perl itself
+#
+# BioPerl module for Bio::Tools::BPlite::Sbjct
+#
+# Cared for by Peter Schattner <schattner@alum.mit.edu>
+#
+# Copyright Peter Schattner
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::BPlite::Sbjct - A Blast Subject (database search Hit)
+
+=head1 SYNOPSIS
+
+ use Bio::Tools::BPlite
+ my $report = new Bio::Tools::BPlite(-fh=>\*STDIN);
+ while(my $sbjct = $report->nextSbjct) {
+     $sbjct->name;    # access to the hit name
+     "$sbjct";        # overloaded to return name
+     $sbjct->nextHSP; # gets the next HSP from the sbjct
+     while(my $hsp = $sbjct->nextHSP) {
+	 # canonical form is again a while loop
+     }
+
+=head1 DESCRIPTION
+
+See L<Bio::Tools::BPlite> for a more detailed information about the
+BPlite BLAST parsing objects.
+
+The original BPlite.pm module has been written by Ian Korf!
+See http://sapiens.wustl.edu/~ikorf
+
+The Sbjct object encapsulates a Hit in a Blast database
+search.  The Subjects are the "Hits" for a particular query.  A
+Subject may be made up of multiple High Scoring Pairs (HSP) which are
+accessed through the nextHSP method.
+
+If you are searching for the P-value or percent identity that is
+specific to each HSP and you will need to use the nextHSP method to
+get access to that data.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l@bioperl.org            - General discussion
+http://bioperl.org/MailList.shtml  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+email or the web:
+
+  bioperl-bugs@bioperl.org
+  http://bugzilla.bioperl.org/
+
+=head1 AUTHOR - Peter Schattner
+
+Email: schattner@alum.mit.edu
+
+=head1 CONTRIBUTORS
+
+Jason Stajich, jason@bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Tools::BPlite::Sbjct;
+
+use strict;
+
+use Bio::Root::Root;        # root object to inherit from
+use Bio::Tools::BPlite::HSP; # we want to use HSP
+#use overload '""' => 'name';
+use vars qw(@ISA);
+
+@ISA = qw(Bio::Root::Root);
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+    
+    ($self->{'NAME'},$self->{'LENGTH'},
+     $self->{'PARENT'}) =
+	 $self->_rearrange([qw(NAME
+			       LENGTH
+			       PARENT
+			       )],@args);
+    $self->report_type($self->{'PARENT'}->{'BLAST_TYPE'} || 'UNKNOWN');
+    $self->{'HSP_ALL_PARSED'} = 0;
+    
+  return $self;
+}
+
+=head2 name
+
+ Title    : name
+ Usage    : $name = $obj->name();
+ Function : returns the name of the Sbjct 
+ Example  : 
+ Returns  : name of the Sbjct 
+ Args     :
+
+=cut
+
+sub name {shift->{'NAME'}}
+
+=head2 report_type
+
+ Title    : report_type
+ Usage    : $type = $sbjct->report_type()
+ Function : Returns the type of report from which this hit was obtained.
+            This usually pertains only to BLAST and friends reports, for which
+            the report type denotes what type of sequence was aligned against
+            what (BLASTN: dna-dna, BLASTP prt-prt, BLASTX translated dna-prt, 
+            TBLASTN prt-translated dna, TBLASTX translated dna-translated dna).
+ Example  : 
+ Returns  : A string (BLASTN, BLASTP, BLASTX, TBLASTN, TBLASTX, UNKNOWN)
+ Args     : a string on set (you should know what you are doing)
+
+=cut
+
+sub report_type {
+    my ($self, $rpt) = @_;
+    if($rpt) {
+	$self->{'_report_type'} = $rpt;
+    }
+    return $self->{'_report_type'};
+}
+
+=head2 nextFeaturePair
+
+ Title    : nextFeaturePair
+ Usage    : $name = $obj->nextFeaturePair();
+ Function : same as the nextHSP function 
+ Example  : 
+ Returns  : next FeaturePair 
+ Args     :
+
+=cut
+
+sub nextFeaturePair {shift->nextHSP}; # just another name
+
+=head2 nextHSP
+
+ Title    : nextHSP
+ Usage    : $hsp = $obj->nextHSP();
+ Function : returns the next available High Scoring Pair
+ Example  : 
+ Returns  : Bio::Tools::HSP  or null if finished
+ Args     :
+
+=cut
+
+sub nextHSP {
+  my ($self) = @_;  
+  return undef if $self->{'HSP_ALL_PARSED'};
+  
+  ############################
+  # get and parse scorelines #
+  ############################
+  my ($qframe, $sframe);
+  my $scoreline = $self->_readline();
+  my $nextline = $self->_readline();
+  return undef if not defined $nextline;
+  $scoreline .= $nextline;
+  my ($score, $bits);
+  if ($scoreline =~ /\d bits\)/) {
+    ($score, $bits) = $scoreline =~
+      /Score = (\d+) \((\S+) bits\)/; # WU-BLAST
+  }
+  else {
+    ($bits, $score) = $scoreline =~
+      /Score =\s+(\S+) bits \((\d+)/; # NCBI-BLAST
+  }
+  
+  my ($match, $hsplength) = ($scoreline =~ /Identities = (\d+)\/(\d+)/);
+  my ($positive) = ($scoreline =~ /Positives = (\d+)/);
+  my ($gaps) = ($scoreline =~ /Gaps = (\d+)/);
+  if($self->report_type() eq 'TBLASTX') {
+      ($qframe, $sframe) = $scoreline =~ /Frame =\s+([+-]\d)\s+\/\s+([+-]\d)/;
+  } elsif ($self->report_type() eq 'TBLASTN')  {
+      ($sframe) = $scoreline =~ /Frame =\s+([+-]\d)/;
+  } else {
+      ($qframe) = $scoreline =~ /Frame =\s+([+-]\d)/;
+  }
+  $positive = $match if not defined $positive;
+  $gaps = '0' if not defined $gaps;
+  my ($p)        = ($scoreline =~ /[Sum ]*P[\(\d+\)]* = (\S+)/);
+  unless (defined $p) {(undef, $p) = $scoreline =~ /Expect(\(\d+\))? =\s+(\S+)/}
+  my ($exp) = ($scoreline =~ /Expect(?:\(\d+\))? =\s+([^\s,]+)/);
+  $exp = -1 unless( defined $exp );
+
+  $self->throw("Unable to parse '$scoreline'") unless defined $score;
+  
+  #######################
+  # get alignment lines #
+  #######################
+  my (@hspline);
+  while( defined($_ = $self->_readline()) ) {
+      if ($_ =~ /^WARNING:|^NOTE:/) {
+	  while(defined($_ = $self->_readline())) {last if $_ !~ /\S/}
+      }
+      elsif ($_ !~ /\S/)            {next}
+      elsif ($_ =~ /Strand HSP/)    {next} # WU-BLAST non-data
+      elsif ($_ =~ /^\s*Strand/)    {next} # NCBI-BLAST non-data
+      elsif ($_ =~ /^\s*Score/)     {$self->_pushback($_); last}
+
+      elsif ($_ =~ /^>|^Histogram|^Searching|^Parameters|^\s+Database:|^CPU\stime|^\s*Lambda/)   
+      {    
+	  #ps 5/28/01	
+	  # elsif ($_ =~ /^>|^Parameters|^\s+Database:|^CPU\stime/)   {
+	  $self->_pushback($_);
+
+	  $self->{'HSP_ALL_PARSED'} = 1;
+	  last;
+      }
+      elsif( $_ =~ /^\s*Frame/ ) {
+	  if ($self->report_type() eq 'TBLASTX') {
+	      ($qframe, $sframe) = $_ =~ /Frame = ([\+-]\d)\s+\/\s+([\+-]\d)/;
+	  } elsif ($self->report_type() eq 'TBLASTN') {
+	      ($sframe) = $_ =~ /Frame = ([\+-]\d)/;
+	  } else {
+	      ($qframe) = $_ =~ /Frame = ([\+-]\d)/;
+	  }
+      }
+      else {
+	  push @hspline, $_;	#      store the query line
+	  $nextline = $self->_readline();
+	  # Skip "pattern" line when parsing PHIBLAST reports, otherwise store the alignment line
+	  my $l1 = ($nextline =~ /^\s*pattern/) ? $self->_readline() : $nextline;
+	  push @hspline, $l1;	# store the alignment line
+	  my $l2 = $self->_readline(); push @hspline, $l2; # grab/store the sbjct line
+      }
+  }
+  
+  #########################
+  # parse alignment lines #
+  #########################
+  my ($ql, $sl, $as) = ("", "", "");
+  my ($qb, $qe, $sb, $se) = (0,0,0,0);
+  my (@QL, @SL, @AS); # for better memory management
+  
+  for(my $i=0;$i<@hspline;$i+=3) {
+    # warn $hspline[$i], $hspline[$i+2];
+    $hspline[$i]   =~ /^(?:Query|Trans):\s+(\d+)\s*([\D\S]+)\s+(\d+)/;
+    $ql = $2; $qb = $1 unless $qb; $qe = $3;
+    
+    my $offset = index($hspline[$i], $ql);
+    $as = substr($hspline[$i+1], $offset, CORE::length($ql));
+    
+    $hspline[$i+2] =~ /^Sbjct:\s+(\d+)\s*([\D\S]+)\s+(\d+)/;
+    $sl = $2; $sb = $1 unless $sb; $se = $3;
+
+    push @QL, $ql; push @SL, $sl; push @AS, $as;
+  }
+
+  ##################
+  # the HSP object #
+  ##################
+  $ql = join("", @QL);
+  $sl = join("", @SL);
+  $as = join("", @AS);
+# Query name and length are not in the report for a bl2seq report so {'PARENT'}->query and
+# {'PARENT'}->qlength will not be available.
+  my ($qname, $qlength) = ('unknown','unknown');
+  if ($self->{'PARENT'}->can('query')) {
+	$qname = $self->{'PARENT'}->query;
+	$qlength = $self->{'PARENT'}->qlength;
+  }	
+  
+  my $hsp = new Bio::Tools::BPlite::HSP
+      ('-score'      => $score, 
+       '-bits'       => $bits, 
+       '-match'      => $match,
+       '-positive'   => $positive, 
+       '-gaps'       => $gaps,
+       '-hsplength'  => $hsplength,
+       '-p'          => $p,
+       '-exp'        => $exp,
+       '-queryBegin' => $qb, 
+       '-queryEnd'   => $qe, 
+       '-sbjctBegin' => $sb,
+       '-sbjctEnd'   => $se, 
+       '-querySeq'   => $ql, 
+       '-sbjctSeq'   => $sl,
+       '-homologySeq'=> $as, 
+       '-queryName'  => $qname,
+#					'-queryName'=>$self->{'PARENT'}->query,
+       '-sbjctName'  => $self->{'NAME'},
+       '-queryLength'=> $qlength,
+#					'-queryLength'=>$self->{'PARENT'}->qlength,
+       '-sbjctLength'=> $self->{'LENGTH'},
+       '-queryFrame' => $qframe,
+       '-sbjctFrame' => $sframe,
+       '-blastType'  => $self->report_type());
+  return $hsp;
+}
+
+=head2 _readline
+
+ Title   : _readline
+ Usage   : $obj->_readline
+ Function: Reads a line of input.
+
+           Note that this method implicitely uses the value of $/ that is
+           in effect when called.
+
+           Note also that the current implementation does not handle pushed
+           back input correctly unless the pushed back input ends with the
+           value of $/.
+ Example :
+ Returns : 
+
+=cut
+
+sub _readline{
+   my ($self) = @_;
+   return $self->{'PARENT'}->_readline();
+}
+
+=head2 _pushback
+
+ Title   : _pushback
+ Usage   : $obj->_pushback($newvalue)
+ Function: puts a line previously read with _readline back into a buffer
+ Example :
+ Returns :
+ Args    : newvalue
+
+=cut
+
+sub _pushback {
+   my ($self, $arg) = @_;   
+   return $self->{'PARENT'}->_pushback($arg);    
+}
+
+1;