diff variant_effect_predictor/Bio/Search/Result/BlastResult.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/Search/Result/BlastResult.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,716 @@
+#-----------------------------------------------------------------
+# $Id: BlastResult.pm,v 1.13 2002/12/24 15:48:41 jason Exp $
+#
+# BioPerl module Bio::Search::Result::BlastResult
+#
+# Cared for by Steve Chervitz <sac@bioperl.org>
+#
+# You may distribute this module under the same terms as perl itself
+#-----------------------------------------------------------------
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Search::Result::BlastResult - A top-level BLAST Report object
+
+=head1 SYNOPSIS
+
+The construction of BlastResult objects is performed by
+by the B<Bio::SearchIO::psiblast> parser.
+Therefore, you do not need to
+use B<Bio::Search::Result::BlastResult>) directly. If you need to construct
+BlastHits directly, see the new() function for details.
+
+For B<Bio::SearchIO> BLAST parsing usage examples, see the
+B<examples/search-blast> directory of the Bioperl distribution.
+
+=head1 DESCRIPTION
+
+This module supports BLAST versions 1.x and 2.x, gapped and ungapped,
+and PSI-BLAST.
+
+=head1 DEPENDENCIES
+
+Bio::Search::Result::BlastResult.pm is a concrete class that inherits from B<Bio::Root::Root> and B<Bio::Search::Result::ResultI>. It  relies on two other modules:
+
+=over 4
+
+=item B<Bio::Search::Hit::BlastHit> 
+
+Encapsulates a single a single BLAST hit.
+
+=item B<Bio::Search::GenericDatabase>
+
+Provides an interface to a blast database metadata.
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists 
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules.  Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+    bioperl-l@bioperl.org              - General discussion
+    http://bio.perl.org/MailList.html  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via email
+or the web:
+
+    bioperl-bugs@bio.perl.org                   
+    http://bugzilla.bioperl.org/           
+
+=head1 AUTHOR 
+
+Steve Chervitz E<lt>sac@bioperl.orgE<gt>
+
+See L<the FEEDBACK section | FEEDBACK> for where to send bug reports and comments.
+
+=head1 ACKNOWLEDGEMENTS
+
+This software was originally developed in the Department of Genetics
+at Stanford University. I would also like to acknowledge my
+colleagues at Affymetrix for useful feedback.
+
+=head1 COPYRIGHT
+
+Copyright (c) 2001 Steve Chervitz. All Rights Reserved.
+
+=cut
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Search::Result::BlastResult;
+
+use strict;
+
+use Bio::Search::Result::ResultI;
+use Bio::Root::Root;
+
+use overload 
+    '""' => \&to_string;
+
+use vars qw(@ISA $Revision );
+
+$Revision = '$Id: BlastResult.pm,v 1.13 2002/12/24 15:48:41 jason Exp $';  #'
+@ISA = qw( Bio::Root::Root Bio::Search::Result::ResultI);
+
+#----------------
+sub new {
+#----------------
+    my ($class, @args) = @_; 
+    my $self = $class->SUPER::new(@args);
+    return $self;
+}
+
+#sub DESTROY {
+#    my $self = shift;
+#    print STDERR "->DESTROYING $self\n";
+#}
+
+
+#=================================================
+# Begin Bio::Search::Result::ResultI implementation
+#=================================================
+
+=head2 next_hit
+
+See L<Bio::Search::Result::ResultI::next_hit()|Bio::Search::Result::ResultI> for documentation
+
+=cut
+
+#----------------
+sub next_hit {
+#----------------
+    my ($self) = @_;
+    
+    unless(defined $self->{'_hit_queue'}) {	
+        $self->{'_hit_queue'} = [$self->hits()];	
+    }
+    
+    shift @{$self->{'_hit_queue'}};
+}
+
+=head2 query_name
+
+See L<Bio::Search::Result::ResultI::query_name()|Bio::Search::Result::ResultI> for documentation
+
+=cut
+
+#----------------
+sub query_name {
+#----------------
+    my $self = shift;
+    if (@_) { 
+        my $name = shift;
+        $name =~ s/^\s+|(\s+|,)$//g;
+        $self->{'_query_name'} = $name;
+    }
+    return $self->{'_query_name'};
+}
+
+=head2 query_length
+
+See L<Bio::Search::Result::ResultI::query_length()|Bio::Search::Result::ResultI> for documentation
+
+=cut
+
+#----------------
+sub query_length {
+#----------------
+    my $self = shift;
+    if(@_) { $self->{'_query_length'} = shift; }
+    return $self->{'_query_length'};
+}
+
+=head2 query_description
+
+See L<Bio::Search::Result::ResultI::query_description()|Bio::Search::Result::ResultI> for documentation
+
+=cut
+
+#----------------
+sub query_description {
+#----------------
+    my $self = shift;
+    if(@_) { 
+        my $desc = shift;
+        defined $desc && $desc =~ s/(^\s+|\s+$)//g;
+        # Remove duplicated ID at beginning of description string
+        defined $desc && $desc =~ s/^$self->{'_query_name'}//o;
+        $self->{'_query_query_desc'} = $desc || '';
+    }
+    return $self->{'_query_query_desc'};
+}
+
+
+=head2 analysis_method
+
+See L<Bio::AnalysisResultI::analysis_method()|Bio::AnalysisResultI> for documentation
+
+This implementation ensures that the name matches /blast/i.
+
+=cut
+
+#----------------
+sub analysis_method { 
+#----------------
+    my ($self, $method) = @_;  
+    if($method ) {
+      if( $method =~ /blast/i) {
+	$self->{'_analysis_prog'} = $method;
+      } else {
+	$self->throw("method $method not supported in " . ref($self));
+      }
+    }
+    return $self->{'_analysis_prog'}; 
+}
+
+=head2 analysis_method_version
+
+See L<Bio::AnalysisResultI::analysis_method_version()|Bio::AnalysisResultI> for documentation
+
+=cut
+
+#----------------
+sub analysis_method_version {
+#----------------
+    my ($self, $version) = @_; 
+    if($version) {
+	$self->{'_analysis_progVersion'} = $version;
+    }
+    return $self->{'_analysis_progVersion'}; 
+}
+
+
+=head2 analysis_query
+
+See L<Bio::AnalysisResultI::analysis_query()|Bio::AnalysisResultI> for documentation
+
+=cut
+
+#----------------
+sub analysis_query {
+#----------------
+
+    my ($self) = @_;
+    if(not defined $self->{'_analysis_query'}) {
+        require Bio::PrimarySeq;
+        my $moltype = $self->analysis_method =~ /blastp|tblastn/i ? 'protein' : 'dna';
+	$self->{'_analysis_query'} =  Bio::PrimarySeq->new( -display_id => $self->query_name,
+                                                            -desc => $self->query_description,
+                                                            -moltype => $moltype
+                                                          );
+        $self->{'_analysis_query'}->length( $self->query_length );
+    }
+    return $self->{'_analysis_query'};
+}
+
+=head2 analysis_subject
+
+ Usage     : $blastdb = $result->analyis_subject();
+ Purpose   : Get a Bio::Search::DatabaseI object containing
+             information about the database used in the BLAST analysis.
+ Returns   : Bio::Search::DatabaseI object.
+ Argument  : n/a
+
+=cut
+
+#---------------
+sub analysis_subject { 
+#---------------
+    my ($self, $blastdb) = @_; 
+    if($blastdb) {
+        if( ref $blastdb and $blastdb->isa('Bio::Search::DatabaseI')) {
+            $self->{'_analysis_sbjct'} = $blastdb;
+        }
+        else {
+            $self->throw(-class =>'Bio::Root::BadParameter',
+                         -text => "Can't set BlastDB: not a Bio::Search::DatabaseI $blastdb"
+                         );
+        }
+    }
+    return $self->{'_analysis_sbjct'};
+}
+
+=head2 next_feature
+
+ Title   : next_feature
+ Usage   : while( my $feat = $blast_result->next_feature ) { # do something }
+ Function: Returns the next feature available in the analysis result, or
+           undef if there are no more features.
+ Example :
+ Returns : A Bio::SeqFeatureI compliant object, in this case, 
+           each Bio::Search::HSP::BlastHSP object within each BlastHit.
+ Args    : None
+
+=cut
+
+#---------------
+sub next_feature{
+#---------------
+   my ($self) = @_;
+   my ($hit, $hsp);
+   $hit = $self->{'_current_hit'};
+   unless( defined $hit ) {
+       $hit = $self->{'_current_hit'} = $self->next_hit;
+       return undef unless defined $hit;
+   }
+   $hsp = $hit->next_hsp;
+   unless( defined $hsp ) {
+       $self->{'_current_hit'} = undef;
+       return $self->next_feature;
+   }
+   return $hsp || undef;
+}
+
+
+sub algorithm { shift->analysis_method( @_ ); }
+sub algorithm_version { shift->analysis_method_version( @_ ); }
+
+=head2 available_parameters
+
+ Title   : available_parameters
+ Usage   : my @params = $report->available_paramters
+ Function: Returns the names of the available parameters
+ Returns : Return list of available parameters used for this report
+ Args    : none
+
+=cut
+
+sub available_parameters{
+    return ();
+}
+
+
+=head2 get_parameter
+
+ Title   : get_parameter
+ Usage   : my $gap_ext = $report->get_parameter('gapext')
+ Function: Returns the value for a specific parameter used
+           when running this report
+ Returns : string
+ Args    : name of parameter (string)
+
+=cut
+
+sub get_parameter{
+    return '';
+}
+
+=head2 get_statistic
+
+ Title   : get_statistic
+ Usage   : my $gap_ext = $report->get_statistic('kappa')
+ Function: Returns the value for a specific statistic available 
+           from this report
+ Returns : string
+ Args    : name of statistic (string)
+
+=cut
+
+sub get_statistic{
+    return '';
+}
+
+=head2 available_statistics
+
+ Title   : available_statistics
+ Usage   : my @statnames = $report->available_statistics
+ Function: Returns the names of the available statistics
+ Returns : Return list of available statistics used for this report
+ Args    : none
+
+=cut
+
+sub available_statistics{
+    return ();
+}
+
+#=================================================
+# End Bio::Search::Result::ResultI implementation
+#=================================================
+
+
+=head2 to_string
+
+ Title   : to_string
+ Usage   : print $blast->to_string;
+ Function: Returns a string representation for the Blast result. 
+           Primarily intended for debugging purposes.
+ Example : see usage
+ Returns : A string of the form:
+           [BlastResult] <analysis_method> query=<name> <description> db=<database
+           e.g.:
+           [BlastResult] BLASTP query=YEL060C vacuolar protease B, db=PDBUNIQ 
+ Args    : None
+
+=cut
+
+#---------------
+sub to_string {
+#---------------
+    my $self = shift;
+    my $str = "[BlastResult] " . $self->analysis_method . " query=" . $self->query_name . " " . $self->query_description .", db=" . $self->database_name;
+    return $str;
+}
+
+#---------------
+sub database_name {
+#---------------
+    my $self = shift;
+    my $dbname = '';
+    if( ref $self->analysis_subject) {
+      $dbname = $self->analysis_subject->name;
+    } 
+    return $dbname;
+}
+
+=head2 database_entries
+
+ Title   : database_entries
+ Usage   : $num_entries = $result->database_entries()
+ Function: Used to obtain the number of entries contained in the database.
+ Returns : a scalar integer representing the number of entities in the database
+           or undef if the information was not available.
+ Args    : [optional] new integer for the number of sequence entries in the db
+
+
+=cut
+
+#---------------
+sub database_entries {
+#---------------
+    my $self = shift;
+    my $dbentries = '';
+    if( ref $self->analysis_subject) {
+      $dbentries = $self->analysis_subject->entries;
+    } 
+    return $dbentries;
+}
+
+
+=head2 database_letters
+
+ Title   : database_letters
+ Usage   : $size = $result->database_letters()
+ Function: Used to obtain the size of database that was searched against.
+ Returns : a scalar integer (units specific to algorithm, but probably the
+           total number of residues in the database, if available) or undef if
+           the information was not available to the Processor object.
+ Args    : [optional] new scalar integer for number of letters in db 
+
+
+=cut
+
+#---------------
+sub database_letters {
+#---------------
+    my $self = shift;
+    my $dbletters = '';
+    if( ref $self->analysis_subject) {
+      $dbletters = $self->analysis_subject->letters;
+    } 
+    return $dbletters;
+}
+
+#---------------
+sub hits {
+#---------------
+    my $self = shift;
+    my @hits = ();
+    if( ref $self->{'_hits'}) {
+        @hits = @{$self->{'_hits'}};
+    }
+    return @hits;
+}
+
+=head2 add_hit
+
+ Usage     : $blast->add_hit( $hit );
+ Purpose   : Adds a hit object to the collection of hits in this BLAST result.
+ Returns   : n/a
+ Argument  : A Bio::Search::Hit::HitI object
+ Comments  : For PSI-BLAST, hits from all iterations are lumped together.
+             For any given hit, you can determine the iteration in which it was
+             found by checking $hit->iteration().
+
+=cut
+
+#---------------
+sub add_hit {
+#---------------
+    my ($self, $hit) = @_;
+    my $add_it = 1;
+    unless( ref $hit and $hit->isa('Bio::Search::Hit::HitI')) {
+        $add_it = 0;
+        $self->throw(-class =>'Bio::Root::BadParameter',
+                     -text => "Can't add hit: not a Bio::Search::Hit::HitI: $hit"
+                    );
+    }
+
+    # Avoid adding duplicate hits if we're doing multiple iterations (PSI-BLAST)
+#    if( $self->iterations > 1 ) {
+#        my $hit_name = $hit->name;
+#        if( grep $hit_name eq $_, @{$self->{'_hit_names'}}) {
+#            $add_it = 0;
+#        }
+#    }
+
+    if( $add_it ) {
+        push @{$self->{'_hits'}}, $hit;
+        push @{$self->{'_hit_names'}}, $hit->name;
+    }
+}
+
+
+=head2 is_signif
+
+ Usage     : $blast->is_signif();
+ Purpose   : Determine if the BLAST report contains significant hits.
+ Returns   : Boolean
+ Argument  : n/a
+ Comments  : BLAST reports without significant hits but with defined
+           : significance criteria will throw exceptions during construction.
+           : This obviates the need to check significant() for
+           : such objects.
+
+=cut
+
+#------------
+sub is_signif { my $self = shift; return $self->{'_is_significant'}; }
+#------------
+
+
+=head2 matrix
+
+ Usage     : $blast_object->matrix();
+ Purpose   : Get the name of the scoring matrix used.
+           : This is extracted from the report.
+ Argument  : n/a
+ Returns   : string or undef if not defined
+ Comments  : TODO: Deprecate this and implement get_parameter('matrix').
+
+=cut
+
+#------------
+sub matrix { 
+#------------
+    my $self = shift; 
+    if(@_) {
+         $self->{'_matrix'} = shift;
+    }
+    $self->{'_matrix'};
+}
+
+
+=head2 raw_statistics
+
+ Usage     : @stats = $blast_result->raw_statistics();
+ Purpose   : Get the raw, unparsed statistical parameter section of the Blast report.
+             This is the section at the end after the last HSP alignment.
+ Argument  : n/a
+ Returns   : Array of strings
+
+=cut
+
+#------------
+sub raw_statistics { 
+#------------
+    my $self = shift; 
+    if(@_) {
+	my $params = shift;
+	if( ref $params eq 'ARRAY') {
+	    $self->{'_raw_statistics'} = $params;
+	}
+	else {
+            $self->throw(-class =>'Bio::Root::BadParameter',
+                         -text => "Can't set statistical params: not an ARRAY ref: $params"
+                         );
+        }
+    }
+    if(not defined $self->{'_raw_statistics'}) {
+	$self->{'_raw_statistics'} = [];
+    }
+
+    @{$self->{'_raw_statistics'}};
+}
+
+
+
+=head2 no_hits_found
+
+ Usage     : $nohits = $blast->no_hits_found( [iteration_number] ); 
+ Purpose   : Get boolean indicator indicating whether or not any hits
+             were present in the report.
+
+             This is NOT the same as determining the number of hits via
+             the hits() method, which will return zero hits if there were no
+             hits in the report or if all hits were filtered out during the parse.
+
+             Thus, this method can be used to distinguish these possibilities
+             for hitless reports generated when filtering.
+
+ Returns   : Boolean
+ Argument  : (optional) integer indicating the iteration number (PSI-BLAST)
+             If iteration number is not specified and this is a PSI-BLAST result,
+             then this method will return true only if all iterations had
+             no hits found.
+
+=cut
+
+#-----------
+sub no_hits_found {
+#-----------
+    my ($self, $round) = @_;
+
+    my $result = 0;   # final return value of this method.
+    # Watch the double negative! 
+    # result = 0 means "yes hits were found"
+    # result = 1 means "no hits were found" (for the indicated iteration or all iterations)
+
+    # If a iteration was not specified and there were multiple iterations,
+    # this method should return true only if all iterations had no hits found.
+    if( not defined $round ) {
+        if( $self->{'_iterations'} > 1) {
+            $result = 1;
+            foreach my $i( 1..$self->{'_iterations'} ) {
+                if( not defined $self->{"_iteration_$i"}->{'_no_hits_found'} ) {
+                    $result = 0;
+                    last;
+                }
+            }
+        }
+        else {
+            $result = $self->{"_iteration_1"}->{'_no_hits_found'};
+        }
+    }
+    else {
+        $result = $self->{"_iteration_$round"}->{'_no_hits_found'};
+    }
+
+    return $result;
+}
+
+
+=head2 set_no_hits_found
+
+ Usage     : $blast->set_no_hits_found( [iteration_number] ); 
+ Purpose   : Set boolean indicator indicating whether or not any hits
+             were present in the report.
+ Returns   : n/a
+ Argument  : (optional) integer indicating the iteration number (PSI-BLAST)
+
+=cut
+
+#-----------
+sub set_no_hits_found {
+#-----------
+    my ($self, $round) = @_;
+    $round ||= 1;
+    $self->{"_iteration_$round"}->{'_no_hits_found'} = 1;
+}
+
+
+=head2 iterations
+
+ Usage     : $num_iterations = $blast->iterations;  (get)
+             $blast->iterations($num_iterations);   (set)
+ Purpose   : Set/get the number of iterations in the Blast Report (PSI-BLAST).
+ Returns   : Total number of iterations in the report
+ Argument  : integer  (when setting)
+
+=cut
+
+#----------------
+sub iterations {
+#----------------
+    my ($self, $num ) = @_;
+    if( defined $num ) {
+        $self->{'_iterations'} = $num;
+    }
+    return $self->{'_iterations'};
+}
+
+
+=head2 psiblast
+
+ Usage     : if( $blast->psiblast ) { ... }
+ Purpose   : Set/get a boolean indicator whether or not the report 
+             is a PSI-BLAST report.
+ Returns   : 1 if PSI-BLAST, undef if not.
+ Argument  : 1 (when setting)
+
+=cut
+
+#----------------
+sub psiblast {
+#----------------
+    my ($self, $val ) = @_;
+    if( $val ) {
+        $self->{'_psiblast'} = 1;
+    }
+    return $self->{'_psiblast'};
+}
+
+
+1;
+__END__