view variant_effect_predictor/Bio/Search/Result/BlastResult.pm @ 2:a5976b2dce6f

changing defualt values for ensembl database
author mahtabm
date Thu, 11 Apr 2013 17:15:42 +1000
parents 1f6dce3d34e0
children
line wrap: on
line source

#-----------------------------------------------------------------
# $Id: BlastResult.pm,v 1.13 2002/12/24 15:48:41 jason Exp $
#
# BioPerl module Bio::Search::Result::BlastResult
#
# Cared for by Steve Chervitz <sac@bioperl.org>
#
# You may distribute this module under the same terms as perl itself
#-----------------------------------------------------------------

# POD documentation - main docs before the code

=head1 NAME

Bio::Search::Result::BlastResult - A top-level BLAST Report object

=head1 SYNOPSIS

The construction of BlastResult objects is performed by
by the B<Bio::SearchIO::psiblast> parser.
Therefore, you do not need to
use B<Bio::Search::Result::BlastResult>) directly. If you need to construct
BlastHits directly, see the new() function for details.

For B<Bio::SearchIO> BLAST parsing usage examples, see the
B<examples/search-blast> directory of the Bioperl distribution.

=head1 DESCRIPTION

This module supports BLAST versions 1.x and 2.x, gapped and ungapped,
and PSI-BLAST.

=head1 DEPENDENCIES

Bio::Search::Result::BlastResult.pm is a concrete class that inherits from B<Bio::Root::Root> and B<Bio::Search::Result::ResultI>. It  relies on two other modules:

=over 4

=item B<Bio::Search::Hit::BlastHit> 

Encapsulates a single a single BLAST hit.

=item B<Bio::Search::GenericDatabase>

Provides an interface to a blast database metadata.

=back

=head1 FEEDBACK

=head2 Mailing Lists 

User feedback is an integral part of the evolution of this and other
Bioperl modules.  Send your comments and suggestions preferably to one
of the Bioperl mailing lists.  Your participation is much appreciated.

    bioperl-l@bioperl.org              - General discussion
    http://bio.perl.org/MailList.html  - About the mailing lists

=head2 Reporting Bugs

Report bugs to the Bioperl bug tracking system to help us keep track
the bugs and their resolution. Bug reports can be submitted via email
or the web:

    bioperl-bugs@bio.perl.org                   
    http://bugzilla.bioperl.org/           

=head1 AUTHOR 

Steve Chervitz E<lt>sac@bioperl.orgE<gt>

See L<the FEEDBACK section | FEEDBACK> for where to send bug reports and comments.

=head1 ACKNOWLEDGEMENTS

This software was originally developed in the Department of Genetics
at Stanford University. I would also like to acknowledge my
colleagues at Affymetrix for useful feedback.

=head1 COPYRIGHT

Copyright (c) 2001 Steve Chervitz. All Rights Reserved.

=cut

=head1 DISCLAIMER

This software is provided "as is" without warranty of any kind.

=head1 APPENDIX

The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _

=cut


# Let the code begin...

package Bio::Search::Result::BlastResult;

use strict;

use Bio::Search::Result::ResultI;
use Bio::Root::Root;

use overload 
    '""' => \&to_string;

use vars qw(@ISA $Revision );

$Revision = '$Id: BlastResult.pm,v 1.13 2002/12/24 15:48:41 jason Exp $';  #'
@ISA = qw( Bio::Root::Root Bio::Search::Result::ResultI);

#----------------
sub new {
#----------------
    my ($class, @args) = @_; 
    my $self = $class->SUPER::new(@args);
    return $self;
}

#sub DESTROY {
#    my $self = shift;
#    print STDERR "->DESTROYING $self\n";
#}


#=================================================
# Begin Bio::Search::Result::ResultI implementation
#=================================================

=head2 next_hit

See L<Bio::Search::Result::ResultI::next_hit()|Bio::Search::Result::ResultI> for documentation

=cut

#----------------
sub next_hit {
#----------------
    my ($self) = @_;
    
    unless(defined $self->{'_hit_queue'}) {	
        $self->{'_hit_queue'} = [$self->hits()];	
    }
    
    shift @{$self->{'_hit_queue'}};
}

=head2 query_name

See L<Bio::Search::Result::ResultI::query_name()|Bio::Search::Result::ResultI> for documentation

=cut

#----------------
sub query_name {
#----------------
    my $self = shift;
    if (@_) { 
        my $name = shift;
        $name =~ s/^\s+|(\s+|,)$//g;
        $self->{'_query_name'} = $name;
    }
    return $self->{'_query_name'};
}

=head2 query_length

See L<Bio::Search::Result::ResultI::query_length()|Bio::Search::Result::ResultI> for documentation

=cut

#----------------
sub query_length {
#----------------
    my $self = shift;
    if(@_) { $self->{'_query_length'} = shift; }
    return $self->{'_query_length'};
}

=head2 query_description

See L<Bio::Search::Result::ResultI::query_description()|Bio::Search::Result::ResultI> for documentation

=cut

#----------------
sub query_description {
#----------------
    my $self = shift;
    if(@_) { 
        my $desc = shift;
        defined $desc && $desc =~ s/(^\s+|\s+$)//g;
        # Remove duplicated ID at beginning of description string
        defined $desc && $desc =~ s/^$self->{'_query_name'}//o;
        $self->{'_query_query_desc'} = $desc || '';
    }
    return $self->{'_query_query_desc'};
}


=head2 analysis_method

See L<Bio::AnalysisResultI::analysis_method()|Bio::AnalysisResultI> for documentation

This implementation ensures that the name matches /blast/i.

=cut

#----------------
sub analysis_method { 
#----------------
    my ($self, $method) = @_;  
    if($method ) {
      if( $method =~ /blast/i) {
	$self->{'_analysis_prog'} = $method;
      } else {
	$self->throw("method $method not supported in " . ref($self));
      }
    }
    return $self->{'_analysis_prog'}; 
}

=head2 analysis_method_version

See L<Bio::AnalysisResultI::analysis_method_version()|Bio::AnalysisResultI> for documentation

=cut

#----------------
sub analysis_method_version {
#----------------
    my ($self, $version) = @_; 
    if($version) {
	$self->{'_analysis_progVersion'} = $version;
    }
    return $self->{'_analysis_progVersion'}; 
}


=head2 analysis_query

See L<Bio::AnalysisResultI::analysis_query()|Bio::AnalysisResultI> for documentation

=cut

#----------------
sub analysis_query {
#----------------

    my ($self) = @_;
    if(not defined $self->{'_analysis_query'}) {
        require Bio::PrimarySeq;
        my $moltype = $self->analysis_method =~ /blastp|tblastn/i ? 'protein' : 'dna';
	$self->{'_analysis_query'} =  Bio::PrimarySeq->new( -display_id => $self->query_name,
                                                            -desc => $self->query_description,
                                                            -moltype => $moltype
                                                          );
        $self->{'_analysis_query'}->length( $self->query_length );
    }
    return $self->{'_analysis_query'};
}

=head2 analysis_subject

 Usage     : $blastdb = $result->analyis_subject();
 Purpose   : Get a Bio::Search::DatabaseI object containing
             information about the database used in the BLAST analysis.
 Returns   : Bio::Search::DatabaseI object.
 Argument  : n/a

=cut

#---------------
sub analysis_subject { 
#---------------
    my ($self, $blastdb) = @_; 
    if($blastdb) {
        if( ref $blastdb and $blastdb->isa('Bio::Search::DatabaseI')) {
            $self->{'_analysis_sbjct'} = $blastdb;
        }
        else {
            $self->throw(-class =>'Bio::Root::BadParameter',
                         -text => "Can't set BlastDB: not a Bio::Search::DatabaseI $blastdb"
                         );
        }
    }
    return $self->{'_analysis_sbjct'};
}

=head2 next_feature

 Title   : next_feature
 Usage   : while( my $feat = $blast_result->next_feature ) { # do something }
 Function: Returns the next feature available in the analysis result, or
           undef if there are no more features.
 Example :
 Returns : A Bio::SeqFeatureI compliant object, in this case, 
           each Bio::Search::HSP::BlastHSP object within each BlastHit.
 Args    : None

=cut

#---------------
sub next_feature{
#---------------
   my ($self) = @_;
   my ($hit, $hsp);
   $hit = $self->{'_current_hit'};
   unless( defined $hit ) {
       $hit = $self->{'_current_hit'} = $self->next_hit;
       return undef unless defined $hit;
   }
   $hsp = $hit->next_hsp;
   unless( defined $hsp ) {
       $self->{'_current_hit'} = undef;
       return $self->next_feature;
   }
   return $hsp || undef;
}


sub algorithm { shift->analysis_method( @_ ); }
sub algorithm_version { shift->analysis_method_version( @_ ); }

=head2 available_parameters

 Title   : available_parameters
 Usage   : my @params = $report->available_paramters
 Function: Returns the names of the available parameters
 Returns : Return list of available parameters used for this report
 Args    : none

=cut

sub available_parameters{
    return ();
}


=head2 get_parameter

 Title   : get_parameter
 Usage   : my $gap_ext = $report->get_parameter('gapext')
 Function: Returns the value for a specific parameter used
           when running this report
 Returns : string
 Args    : name of parameter (string)

=cut

sub get_parameter{
    return '';
}

=head2 get_statistic

 Title   : get_statistic
 Usage   : my $gap_ext = $report->get_statistic('kappa')
 Function: Returns the value for a specific statistic available 
           from this report
 Returns : string
 Args    : name of statistic (string)

=cut

sub get_statistic{
    return '';
}

=head2 available_statistics

 Title   : available_statistics
 Usage   : my @statnames = $report->available_statistics
 Function: Returns the names of the available statistics
 Returns : Return list of available statistics used for this report
 Args    : none

=cut

sub available_statistics{
    return ();
}

#=================================================
# End Bio::Search::Result::ResultI implementation
#=================================================


=head2 to_string

 Title   : to_string
 Usage   : print $blast->to_string;
 Function: Returns a string representation for the Blast result. 
           Primarily intended for debugging purposes.
 Example : see usage
 Returns : A string of the form:
           [BlastResult] <analysis_method> query=<name> <description> db=<database
           e.g.:
           [BlastResult] BLASTP query=YEL060C vacuolar protease B, db=PDBUNIQ 
 Args    : None

=cut

#---------------
sub to_string {
#---------------
    my $self = shift;
    my $str = "[BlastResult] " . $self->analysis_method . " query=" . $self->query_name . " " . $self->query_description .", db=" . $self->database_name;
    return $str;
}

#---------------
sub database_name {
#---------------
    my $self = shift;
    my $dbname = '';
    if( ref $self->analysis_subject) {
      $dbname = $self->analysis_subject->name;
    } 
    return $dbname;
}

=head2 database_entries

 Title   : database_entries
 Usage   : $num_entries = $result->database_entries()
 Function: Used to obtain the number of entries contained in the database.
 Returns : a scalar integer representing the number of entities in the database
           or undef if the information was not available.
 Args    : [optional] new integer for the number of sequence entries in the db


=cut

#---------------
sub database_entries {
#---------------
    my $self = shift;
    my $dbentries = '';
    if( ref $self->analysis_subject) {
      $dbentries = $self->analysis_subject->entries;
    } 
    return $dbentries;
}


=head2 database_letters

 Title   : database_letters
 Usage   : $size = $result->database_letters()
 Function: Used to obtain the size of database that was searched against.
 Returns : a scalar integer (units specific to algorithm, but probably the
           total number of residues in the database, if available) or undef if
           the information was not available to the Processor object.
 Args    : [optional] new scalar integer for number of letters in db 


=cut

#---------------
sub database_letters {
#---------------
    my $self = shift;
    my $dbletters = '';
    if( ref $self->analysis_subject) {
      $dbletters = $self->analysis_subject->letters;
    } 
    return $dbletters;
}

#---------------
sub hits {
#---------------
    my $self = shift;
    my @hits = ();
    if( ref $self->{'_hits'}) {
        @hits = @{$self->{'_hits'}};
    }
    return @hits;
}

=head2 add_hit

 Usage     : $blast->add_hit( $hit );
 Purpose   : Adds a hit object to the collection of hits in this BLAST result.
 Returns   : n/a
 Argument  : A Bio::Search::Hit::HitI object
 Comments  : For PSI-BLAST, hits from all iterations are lumped together.
             For any given hit, you can determine the iteration in which it was
             found by checking $hit->iteration().

=cut

#---------------
sub add_hit {
#---------------
    my ($self, $hit) = @_;
    my $add_it = 1;
    unless( ref $hit and $hit->isa('Bio::Search::Hit::HitI')) {
        $add_it = 0;
        $self->throw(-class =>'Bio::Root::BadParameter',
                     -text => "Can't add hit: not a Bio::Search::Hit::HitI: $hit"
                    );
    }

    # Avoid adding duplicate hits if we're doing multiple iterations (PSI-BLAST)
#    if( $self->iterations > 1 ) {
#        my $hit_name = $hit->name;
#        if( grep $hit_name eq $_, @{$self->{'_hit_names'}}) {
#            $add_it = 0;
#        }
#    }

    if( $add_it ) {
        push @{$self->{'_hits'}}, $hit;
        push @{$self->{'_hit_names'}}, $hit->name;
    }
}


=head2 is_signif

 Usage     : $blast->is_signif();
 Purpose   : Determine if the BLAST report contains significant hits.
 Returns   : Boolean
 Argument  : n/a
 Comments  : BLAST reports without significant hits but with defined
           : significance criteria will throw exceptions during construction.
           : This obviates the need to check significant() for
           : such objects.

=cut

#------------
sub is_signif { my $self = shift; return $self->{'_is_significant'}; }
#------------


=head2 matrix

 Usage     : $blast_object->matrix();
 Purpose   : Get the name of the scoring matrix used.
           : This is extracted from the report.
 Argument  : n/a
 Returns   : string or undef if not defined
 Comments  : TODO: Deprecate this and implement get_parameter('matrix').

=cut

#------------
sub matrix { 
#------------
    my $self = shift; 
    if(@_) {
         $self->{'_matrix'} = shift;
    }
    $self->{'_matrix'};
}


=head2 raw_statistics

 Usage     : @stats = $blast_result->raw_statistics();
 Purpose   : Get the raw, unparsed statistical parameter section of the Blast report.
             This is the section at the end after the last HSP alignment.
 Argument  : n/a
 Returns   : Array of strings

=cut

#------------
sub raw_statistics { 
#------------
    my $self = shift; 
    if(@_) {
	my $params = shift;
	if( ref $params eq 'ARRAY') {
	    $self->{'_raw_statistics'} = $params;
	}
	else {
            $self->throw(-class =>'Bio::Root::BadParameter',
                         -text => "Can't set statistical params: not an ARRAY ref: $params"
                         );
        }
    }
    if(not defined $self->{'_raw_statistics'}) {
	$self->{'_raw_statistics'} = [];
    }

    @{$self->{'_raw_statistics'}};
}



=head2 no_hits_found

 Usage     : $nohits = $blast->no_hits_found( [iteration_number] ); 
 Purpose   : Get boolean indicator indicating whether or not any hits
             were present in the report.

             This is NOT the same as determining the number of hits via
             the hits() method, which will return zero hits if there were no
             hits in the report or if all hits were filtered out during the parse.

             Thus, this method can be used to distinguish these possibilities
             for hitless reports generated when filtering.

 Returns   : Boolean
 Argument  : (optional) integer indicating the iteration number (PSI-BLAST)
             If iteration number is not specified and this is a PSI-BLAST result,
             then this method will return true only if all iterations had
             no hits found.

=cut

#-----------
sub no_hits_found {
#-----------
    my ($self, $round) = @_;

    my $result = 0;   # final return value of this method.
    # Watch the double negative! 
    # result = 0 means "yes hits were found"
    # result = 1 means "no hits were found" (for the indicated iteration or all iterations)

    # If a iteration was not specified and there were multiple iterations,
    # this method should return true only if all iterations had no hits found.
    if( not defined $round ) {
        if( $self->{'_iterations'} > 1) {
            $result = 1;
            foreach my $i( 1..$self->{'_iterations'} ) {
                if( not defined $self->{"_iteration_$i"}->{'_no_hits_found'} ) {
                    $result = 0;
                    last;
                }
            }
        }
        else {
            $result = $self->{"_iteration_1"}->{'_no_hits_found'};
        }
    }
    else {
        $result = $self->{"_iteration_$round"}->{'_no_hits_found'};
    }

    return $result;
}


=head2 set_no_hits_found

 Usage     : $blast->set_no_hits_found( [iteration_number] ); 
 Purpose   : Set boolean indicator indicating whether or not any hits
             were present in the report.
 Returns   : n/a
 Argument  : (optional) integer indicating the iteration number (PSI-BLAST)

=cut

#-----------
sub set_no_hits_found {
#-----------
    my ($self, $round) = @_;
    $round ||= 1;
    $self->{"_iteration_$round"}->{'_no_hits_found'} = 1;
}


=head2 iterations

 Usage     : $num_iterations = $blast->iterations;  (get)
             $blast->iterations($num_iterations);   (set)
 Purpose   : Set/get the number of iterations in the Blast Report (PSI-BLAST).
 Returns   : Total number of iterations in the report
 Argument  : integer  (when setting)

=cut

#----------------
sub iterations {
#----------------
    my ($self, $num ) = @_;
    if( defined $num ) {
        $self->{'_iterations'} = $num;
    }
    return $self->{'_iterations'};
}


=head2 psiblast

 Usage     : if( $blast->psiblast ) { ... }
 Purpose   : Set/get a boolean indicator whether or not the report 
             is a PSI-BLAST report.
 Returns   : 1 if PSI-BLAST, undef if not.
 Argument  : 1 (when setting)

=cut

#----------------
sub psiblast {
#----------------
    my ($self, $val ) = @_;
    if( $val ) {
        $self->{'_psiblast'} = 1;
    }
    return $self->{'_psiblast'};
}


1;
__END__