diff variant_effect_predictor/Bio/Search/Result/GenericResult.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/Search/Result/GenericResult.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,761 @@
+# $Id: GenericResult.pm,v 1.15 2002/12/05 13:46:34 heikki Exp $
+#
+# BioPerl module for Bio::Search::Result::GenericResult
+#
+# Cared for by Jason Stajich <jason@bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Search::Result::GenericResult - Generic Implementation of Bio::Search::Result::ResultI interface applicable to most search results.
+
+=head1 SYNOPSIS
+
+
+    # typically one gets Results from a SearchIO stream
+    use Bio::SearchIO;
+    my $io = new Bio::SearchIO(-format => 'blast',
+ 			       -file   => 't/data/HUMBETGLOA.tblastx');
+    while( my $result = $io->next_result) {
+        # process all search results within the input stream
+        while( my $hit = $result->next_hits()) {  
+        # insert code here for hit processing
+        }
+    }
+
+    use Bio::Search::Result::GenericResult;
+    my @hits = (); # would be a list of Bio::Search::Hit::HitI objects
+    # typically these are created from a Bio::SearchIO stream
+    my $result = new Bio::Search::Result::GenericResult
+	( -query_name        => 'HUMBETGLOA',
+	  -query_accession   => ''
+	  -query_description => 'Human haplotype C4 beta-globin gene, complete cds.'
+	  -query_length      => 3002
+	  -database_name     => 'ecoli.aa'
+	  -database_letters  => 4662239,
+	  -database_entries  => 400,
+	  -parameters        => { 'e' => '0.001' },
+	  -statistics        => { 'kappa' => 0.731 },
+	  -algorithm         => 'blastp',
+          -algorithm_version => '2.1.2',
+	  );
+
+    my $id = $result->query_name();
+
+    my $desc = $result->query_description();
+
+    my $name = $result->database_name();
+
+    my $size = $result->database_letters();
+
+    my $num_entries = $result->database_entries();
+
+    my $gap_ext = $result->get_parameter('e');
+
+    my @params = $result->available_parameters;
+
+    my $kappa = $result->get_statistic('kappa');
+
+    my @statnames = $result->available_statistics;
+
+
+
+=head1 DESCRIPTION
+
+This object is an implementation of the Bio::Search::Result::ResultI
+interface and provides a generic place to store results from a
+sequence database search.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l@bioperl.org              - General discussion
+  http://bioperl.org/MailList.shtml  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+email or the web:
+
+  bioperl-bugs@bioperl.org
+  http://bugzilla.bioperl.org/
+
+=head1 AUTHOR - Jason Stajich and Steve Chervitz
+
+Email jason@bioperl.org
+Email sac@bioperl.org
+
+=head1 CONTRIBUTORS
+
+Additional contributors names and emails here
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Search::Result::GenericResult;
+use vars qw(@ISA);
+use strict;
+
+use Bio::Root::Root;
+use Bio::Search::Result::ResultI;
+
+use overload 
+    '""' => \&to_string;
+
+@ISA = qw(Bio::Root::Root Bio::Search::Result::ResultI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Search::Result::GenericResult();
+ Function: Builds a new Bio::Search::Result::GenericResult object 
+ Returns : Bio::Search::Result::GenericResult
+ Args    : -query_name        => Name of query Sequence
+           -query_accession   => Query accession number (if available)
+           -query_description => Description of query sequence
+           -query_length      => Length of query sequence
+           -database_name     => Name of database
+           -database_letters  => Number of residues in database
+           -database_entries  => Number of entries in database
+           -parameters        => hash ref of search parameters (key => value)
+           -statistics        => hash ref of search statistics (key => value)
+           -algorithm         => program name (blastx)
+           -algorithm_version   => version of the algorithm (2.1.2)
+           -algorithm_reference => literature reference string for this algorithm
+
+=cut
+
+sub new {
+  my($class,@args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+
+  $self->{'_hits'} = [];
+  $self->{'_hitindex'} = 0;
+  $self->{'_statistics'} = {};
+  $self->{'_parameters'} = {};
+
+  my ($qname,$qacc,$qdesc,$qlen,
+      $dbname,$dblet,$dbent,$params,   
+      $stats, $hits, $algo, $algo_v,
+      $prog_ref, $algo_r) = $self->_rearrange([qw(QUERY_NAME
+                                                  QUERY_ACCESSION
+                                                  QUERY_DESCRIPTION
+                                                  QUERY_LENGTH
+                                                  DATABASE_NAME
+                                                  DATABASE_LETTERS
+                                                  DATABASE_ENTRIES
+                                                  PARAMETERS
+                                                  STATISTICS
+                                                  HITS
+                                                  ALGORITHM
+                                                  ALGORITHM_VERSION
+                                                  PROGRAM_REFERENCE
+                                                  ALGORITHM_REFERENCE
+                                                 )],@args);
+
+  $algo_r ||= $prog_ref;         
+  defined $algo   && $self->algorithm($algo);
+  defined $algo_v && $self->algorithm_version($algo_v);
+  defined $algo_r && $self->algorithm_reference($algo_r);
+
+  defined $qname && $self->query_name($qname);
+  defined $qacc  && $self->query_accession($qacc);
+  defined $qdesc && $self->query_description($qdesc);
+  defined $qlen  && $self->query_length($qlen);
+  defined $dbname && $self->database_name($dbname);
+  defined $dblet  && $self->database_letters($dblet);
+  defined $dbent  && $self->database_entries($dbent);
+
+  if( defined $params ) {
+      if( ref($params) !~ /hash/i ) {
+	  $self->throw("Must specify a hash reference with the the parameter '-parameters");
+      }
+      while( my ($key,$value) = each %{$params} ) {
+	  $self->add_parameter($key,$value);
+      }
+  }
+  if( defined $stats ) {
+      if( ref($stats) !~ /hash/i ) {
+	  $self->throw("Must specify a hash reference with the the parameter '-statistics");
+      }
+      while( my ($key,$value) = each %{$stats} ) {
+	  $self->add_statistic($key,$value);
+      }
+  }
+
+  if( defined $hits  ) { 
+      $self->throw("Must define arrayref of Hits when initializing a $class\n") unless ref($hits) =~ /array/i;
+
+      foreach my $s ( @$hits ) {
+	  $self->add_hit($s);
+      }
+  }
+  return $self;
+}
+
+=head2 algorithm
+
+ Title   : algorithm
+ Usage   : my $r_type = $hsp->algorithm
+ Function: Obtain the name of the algorithm used to obtain the Result
+ Returns : string (e.g., BLASTP)
+ Args    : [optional] scalar string to set value
+
+=cut
+
+sub algorithm{
+    my ($self,$value) = @_;
+    my $previous = $self->{'_algorithm'};
+    if( defined $value || ! defined $previous ) { 
+	$value = $previous = '' unless defined $value;
+	$self->{'_algorithm'} = $value;
+    } 
+    return $previous;   
+}
+
+=head2 algorithm_version
+
+ Title   : algorithm_version
+ Usage   : my $r_version = $hsp->algorithm_version
+ Function: Obtain the version of the algorithm used to obtain the Result
+ Returns : string (e.g., 2.1.2)
+ Args    : [optional] scalar string to set algorithm version value
+
+=cut
+
+sub algorithm_version{
+    my ($self,$value) = @_;
+    my $previous = $self->{'_algorithm_version'};
+    if( defined $value || ! defined $previous ) { 
+	$value = $previous = '' unless defined $value;
+	$self->{'_algorithm_version'} = $value;
+    } 
+
+    return $previous;   
+}
+
+=head2 Bio::Search::Result::ResultI interface methods
+
+Bio::Search::Result::ResultI implementation
+
+=head2 next_hit
+
+ Title   : next_hit
+ Usage   : while( $hit = $result->next_hit()) { ... }
+ Function: Returns the next available Hit object, representing potential
+           matches between the query and various entities from the database.
+ Returns : a Bio::Search::Hit::HitI object or undef if there are no more.
+ Args    : none
+
+
+=cut
+
+sub next_hit {
+    my ($self,@args) = @_;
+    my $index = $self->_nexthitindex;
+    return undef if $index > scalar @{$self->{'_hits'}};
+    return $self->{'_hits'}->[$index];    
+}
+
+=head2 query_name
+
+ Title   : query_name
+ Usage   : $id = $result->query_name();
+ Function: Get the string identifier of the query used by the
+           algorithm that performed the search.
+ Returns : a string.
+ Args    : [optional] new string value for query name
+
+=cut
+
+sub query_name {
+    my ($self,$value) = @_;
+    my $previous = $self->{'_queryname'};
+    if( defined $value || ! defined $previous ) {
+	$value = $previous = '' unless defined $value;
+	$self->{'_queryname'} = $value;
+    } 
+    return $previous;
+}
+
+=head2 query_accession
+
+ Title   : query_accession
+ Usage   : $id = $result->query_accession();
+ Function: Get the accession (if available) for the query sequence
+ Returns : a string
+ Args    : [optional] new string value for accession
+
+=cut
+
+sub query_accession {
+    my ($self,$value) = @_;
+    my $previous = $self->{'_queryacc'};
+    if( defined $value || ! defined $previous ) {
+	$value = $previous = '' unless defined $value;
+	$self->{'_queryacc'} = $value;
+    } 
+    return $previous;
+}
+
+=head2 query_length
+
+ Title   : query_length
+ Usage   : $id = $result->query_length();
+ Function: Get the length of the query sequence
+           used in the search.
+ Returns : a number
+ Args    :  [optional] new integer value for query length
+
+=cut
+
+sub query_length {
+    my ($self,$value) = @_;
+    my $previous = $self->{'_querylength'};
+    if( defined $value || ! defined $previous ) {
+	$value = $previous = 0 unless defined $value;
+	$self->{'_querylength'} = $value;
+    } 
+    return $previous;
+}
+
+=head2 query_description
+
+ Title   : query_description
+ Usage   : $id = $result->query_description();
+ Function: Get the description of the query sequence
+           used in the search.
+ Returns : a string
+ Args    : [optional] new string for the query description
+
+=cut
+
+sub query_description {
+    my ($self,$value) = @_;
+    my $previous = $self->{'_querydesc'};
+    if( defined $value || ! defined $previous ) {
+	$value = $previous = '' unless defined $value;
+	$self->{'_querydesc'} = $value;
+    } 
+    return $previous;
+}
+
+
+=head2 database_name
+
+ Title   : database_name
+ Usage   : $name = $result->database_name()
+ Function: Used to obtain the name of the database that the query was searched
+           against by the algorithm.
+ Returns : a scalar string
+ Args    : [optional] new string for the db name
+
+=cut
+
+sub database_name {
+    my ($self,$value) = @_;
+    my $previous = $self->{'_dbname'};
+    if( defined $value || ! defined $previous ) {
+	$value = $previous = '' unless defined $value;
+	$self->{'_dbname'} = $value;
+    } 
+    return $previous;
+}
+
+=head2 database_letters
+
+ Title   : database_letters
+ Usage   : $size = $result->database_letters()
+ Function: Used to obtain the size of database that was searched against.
+ Returns : a scalar integer (units specific to algorithm, but probably the
+           total number of residues in the database, if available) or undef if
+           the information was not available to the Processor object.
+ Args    : [optional] new scalar integer for number of letters in db 
+
+
+=cut
+
+sub database_letters {
+    my ($self,$value) = @_;
+    my $previous = $self->{'_dbletters'};
+    if( defined $value || ! defined $previous ) {
+	$value = $previous = '' unless defined $value;
+	$self->{'_dbletters'} = $value;
+    } 
+    return $previous;
+}
+
+=head2 database_entries
+
+ Title   : database_entries
+ Usage   : $num_entries = $result->database_entries()
+ Function: Used to obtain the number of entries contained in the database.
+ Returns : a scalar integer representing the number of entities in the database
+           or undef if the information was not available.
+ Args    : [optional] new integer for the number of sequence entries in the db
+
+
+=cut
+
+sub database_entries {
+    my ($self,$value) = @_;
+    my $previous = $self->{'_dbentries'};
+    if( defined $value || ! defined $previous ) {
+	$value = $previous = '' unless defined $value;
+	$self->{'_dbentries'} = $value;
+    } 
+    return $previous;
+}
+
+=head2 get_parameter
+
+ Title   : get_parameter
+ Usage   : my $gap_ext = $report->get_parameter('gapext')
+ Function: Returns the value for a specific parameter used
+           when running this report
+ Returns : string
+ Args    : name of parameter (string)
+
+=cut
+
+sub get_parameter{
+   my ($self,$name) = @_;
+   return $self->{'_parameters'}->{$name};
+}
+
+=head2 available_parameters
+
+ Title   : available_parameters
+ Usage   : my @params = $report->available_paramters
+ Function: Returns the names of the available parameters
+ Returns : Return list of available parameters used for this report
+ Args    : none
+
+=cut
+
+sub available_parameters{
+   my ($self) = @_;
+   return keys %{$self->{'_parameters'}};
+}
+
+
+=head2 get_statistic
+
+ Title   : get_statistic
+ Usage   : my $gap_ext = $report->get_statistic('kappa')
+ Function: Returns the value for a specific statistic available 
+           from this report
+ Returns : string
+ Args    : name of statistic (string)
+
+=cut
+
+sub get_statistic{
+   my ($self,$key) = @_;
+   return $self->{'_statistics'}->{$key};
+}
+
+=head2 available_statistics
+
+ Title   : available_statistics
+ Usage   : my @statnames = $report->available_statistics
+ Function: Returns the names of the available statistics
+ Returns : Return list of available statistics used for this report
+ Args    : none
+
+=cut
+
+sub available_statistics{
+   my ($self) = @_;
+   return keys %{$self->{'_statistics'}};
+}
+
+=head2 Bio::Search::Report 
+
+Bio::Search::Result::GenericResult specific methods
+
+=head2 add_hit
+
+ Title   : add_hit
+ Usage   : $report->add_hit($hit)
+ Function: Adds a HitI to the stored list of hits
+ Returns : Number of HitI currently stored
+ Args    : Bio::Search::Hit::HitI
+
+=cut
+
+sub add_hit {
+    my ($self,$s) = @_;
+    if( $s->isa('Bio::Search::Hit::HitI') ) { 
+	push @{$self->{'_hits'}}, $s;
+    } else { 
+	$self->warn("Passed in " .ref($s). 
+		    " as a Hit which is not a Bio::Search::HitI... skipping");
+    }
+    return scalar @{$self->{'_hits'}};
+}
+
+
+=head2 rewind
+
+ Title   : rewind
+ Usage   : $result->rewind;
+ Function: Allow one to reset the Hit iteration to the beginning
+           Since this is an in-memory implementation
+ Returns : none
+ Args    : none
+
+=cut
+
+sub rewind{
+   my ($self) = @_;
+   $self->{'_hitindex'} = 0;
+}
+
+
+=head2 _nexthitindex
+
+ Title   : _nexthitindex
+ Usage   : private
+
+=cut
+
+sub _nexthitindex{
+   my ($self,@args) = @_;
+   return $self->{'_hitindex'}++;
+}
+
+
+
+=head2 add_parameter
+
+ Title   : add_parameter
+ Usage   : $report->add_parameter('gapext', 11);
+ Function: Adds a parameter
+ Returns : none
+ Args    : key  - key value name for this parama
+           value - value for this parameter
+
+=cut
+
+sub add_parameter{
+   my ($self,$key,$value) = @_;
+   $self->{'_parameters'}->{$key} = $value;
+}
+
+
+=head2 add_statistic
+
+ Title   : add_statistic
+ Usage   : $report->add_statistic('lambda', 2.3);
+ Function: Adds a parameter
+ Returns : none
+ Args    : key  - key value name for this parama
+           value - value for this parameter
+
+=cut
+
+sub add_statistic {
+   my ($self,$key,$value) = @_;
+   $self->{'_statistics'}->{$key} = $value;
+   return;
+}
+
+
+=head2 num_hits
+
+ Title   : num_hits
+ Usage   : my $hitcount= $result->num_hits
+ Function: returns the number of hits for this query result
+ Returns : integer
+ Args    : none
+
+
+=cut
+
+sub num_hits{
+   my ($self) = shift;
+   if (not defined $self->{'_hits'}) {
+       $self->throw("Can't get Hits: data not collected.");
+    }
+    return scalar(@{$self->{'_hits'}});
+}
+
+
+=head2 hits
+
+ Title   : hits
+ Usage   : my @hits = $result->hits
+ Function: Returns the available hits for this Result
+ Returns : Array of L<Bio::Search::Hit::HitI> objects
+ Args    : none
+
+
+=cut
+
+sub hits{
+   my ($self) = shift;
+   my @hits = ();
+   if( ref $self->{'_hits'}) {
+       @hits = @{$self->{'_hits'}};
+   }
+    return @hits;   
+}
+
+=head2 algorithm_reference
+
+ Title   : algorithm_reference
+ Usage   : $obj->algorithm_reference($newval)
+ Function: 
+ Returns : string containing literature reference for the algorithm
+ Args    : newvalue string (optional)
+ Comments: Formerly named program_reference(), which is still supported
+           for backwards compatibility.
+
+=cut
+
+sub algorithm_reference{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'algorithm_reference'} = $value;
+    }
+    return $self->{'algorithm_reference'};
+}
+
+
+sub program_reference { shift->algorithm_reference(@_); }
+
+
+=head2 no_hits_found
+
+See documentation in L<Bio::Search::Result::ResultI::no_hits_found()|Bio::Search::Result::ResultI>
+
+=cut
+
+#-----------
+sub no_hits_found {
+#-----------
+    my ($self, $round) = @_;
+
+    my $result = 0;   # final return value of this method.
+    # Watch the double negative! 
+    # result = 0 means "yes hits were found"
+    # result = 1 means "no hits were found" (for the indicated iteration or all iterations)
+
+    # If a iteration was not specified and there were multiple iterations,
+    # this method should return true only if all iterations had no hits found.
+    if( not defined $round ) {
+        if( $self->{'_iterations'} > 1) {
+            $result = 1;
+            foreach my $i( 1..$self->{'_iterations'} ) {
+                if( not defined $self->{"_iteration_$i"}->{'_no_hits_found'} ) {
+                    $result = 0;
+                    last;
+                }
+            }
+        }
+        else {
+            $result = $self->{"_iteration_1"}->{'_no_hits_found'};
+        }
+    }
+    else {
+        $result = $self->{"_iteration_$round"}->{'_no_hits_found'};
+    }
+
+    return $result;
+}
+
+
+=head2 set_no_hits_found
+
+See documentation in L<Bio::Search::Result::ResultI::set_no_hits_found()|Bio::Search::Result::ResultI>
+
+=cut
+
+#-----------
+sub set_no_hits_found {
+#-----------
+    my ($self, $round) = @_;
+    $round ||= 1;
+    $self->{"_iteration_$round"}->{'_no_hits_found'} = 1;
+}
+
+
+=head2 iterations
+
+See documentation in L<Bio::Search::Result::ResultI::iterations()|Bio::Search::Result::ResultI>
+
+=cut
+
+#----------------
+sub iterations {
+#----------------
+    my ($self, $num ) = @_;
+    if( defined $num ) {
+        $self->{'_iterations'} = $num;
+    }
+    return $self->{'_iterations'};
+}
+
+
+=head2 psiblast
+
+See documentation in L<Bio::Search::Result::ResultI::psiblast()|Bio::Search::Result::ResultI>
+
+=cut
+
+#----------------
+sub psiblast {
+#----------------
+    my ($self, $val ) = @_;
+    if( $val ) {
+        $self->{'_psiblast'} = 1;
+    }
+    return $self->{'_psiblast'};
+}
+
+
+=head2 to_string
+
+ Title   : to_string
+ Usage   : print $blast->to_string;
+ Function: Returns a string representation for the Blast result. 
+           Primarily intended for debugging purposes.
+ Example : see usage
+ Returns : A string of the form:
+           [GenericResult] <analysis_method> query=<name> <description> db=<database
+           e.g.:
+           [GenericResult] BLASTP query=YEL060C vacuolar protease B, db=PDBUNIQ 
+ Args    : None
+
+=cut
+
+#---------------
+sub to_string {
+#---------------
+    my $self = shift;
+    my $str = "[GenericResult] " . $self->algorithm . " query=" . $self->query_name . " " . $self->query_description .", db=" . $self->database_name;
+    return $str;
+}
+
+1;