view variant_effect_predictor/Bio/Tools/BPlite/HSP.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line source

###############################################################################
# Bio::Tools::BPlite::HSP
###############################################################################
# HSP = High Scoring Pair (to all non-experts as I am)
#
# The original BPlite.pm module has been written by Ian Korf !
# see http://sapiens.wustl.edu/~ikorf
#
# You may distribute this module under the same terms as perl itself


#
# BioPerl module for Bio::Tools::BPlite::HSP
#
# Cared for by Peter Schattner <schattner@alum.mit.edu>
#
# Copyright Peter Schattner
#
# You may distribute this module under the same terms as perl itself

# POD documentation - main docs before the code

=head1 NAME

Bio::Tools::BPlite::HSP - Blast report High Scoring Pair (HSP)

=head1 SYNOPSIS

 use Bio::Tools::BPlite;
 my $report = new Bio::Tools::BPlite(-fh=>\*STDIN);
 {
    while(my $sbjct = $report->nextSbjct) {
	while (my $hsp = $sbjct->nextHSP) {
	    $hsp->score;
	    $hsp->bits;
	    $hsp->percent;
	    $hsp->P;
	    $hsp->match;
	    $hsp->positive;
	    $hsp->length;
	    $hsp->querySeq;
	    $hsp->sbjctSeq;
	    $hsp->homologySeq;
	    $hsp->query->start;
	    $hsp->query->end;
	    $hsp->hit->start;
	    $hsp->hit->end;
	    $hsp->hit->seq_id;
	    $hsp->hit->overlaps($exon);
	}
    }

    # the following line takes you to the next report in the stream/file
    # it will return 0 if that report is empty,
    # but that is valid for an empty blast report.
    # Returns -1 for EOF.

    last if ($report->_parseHeader == -1));

 redo
 }

=head1 DESCRIPTION

This object handles the High Scoring Pair data for a Blast report.
This is where the percent identity, query and hit sequence length,
P value, etc are stored and where most of the necessary information is located when building logic around parsing a Blast report.

See L<Bio::Tools::BPlite> for more detailed information on the entire
BPlite Blast parsing system.

=head1 FEEDBACK

=head2 Mailing Lists

User feedback is an integral part of the evolution of this and other
Bioperl modules. Send your comments and suggestions preferably to
the Bioperl mailing list.  Your participation is much appreciated.

  bioperl-l@bioperl.org            - General discussion
http://bioperl.org/MailList.shtml  - About the mailing lists

=head2 Reporting Bugs

Report bugs to the Bioperl bug tracking system to help us keep track
of the bugs and their resolution. Bug reports can be submitted via
email or the web:

  bioperl-bugs@bioperl.org
  http://bugzilla.bioperl.org/

=head1 AUTHOR - Peter Schattner

Email: schattner@alum.mit.edu

=head1 CONTRIBUTORS

Jason Stajich, jason@bioperl.org

=head1 APPENDIX

The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _

=cut

# Let the code begin...

package Bio::Tools::BPlite::HSP;

use vars qw(@ISA);
use strict;

# to disable overloading comment this out:
#use overload '""' => '_overload';

# Object preamble - inheriets from Bio::SeqFeature::SimilarityPair

use Bio::SeqFeature::SimilarityPair;
use Bio::SeqFeature::Similarity;

@ISA = qw(Bio::SeqFeature::SimilarityPair);

sub new {
    my ($class, @args) = @_;

    # workaround to make sure frame is not set before strand is
    # interpreted from query/hit info 
    # this workaround removes the key from the hash
    # so the superclass does not try and work with it
    # we'll take care of setting it in this module later on

    my %newargs = @args;
    foreach ( keys %newargs ) {
	if( /frame$/i ) {
	    delete $newargs{$_};
	} 
    }
    # done with workaround

    my $self = $class->SUPER::new(%newargs);
    
    my ($score,$bits,$match,$hsplength,$positive,$gaps,$p,$exp,$qb,$qe,$sb,
	$se,$qs,$ss,$hs,$qname,$sname,$qlength,$slength,$qframe,$sframe,
	$blasttype) = 
	    $self->_rearrange([qw(SCORE
				  BITS
				  MATCH
				  HSPLENGTH
				  POSITIVE
				  GAPS				  
				  P
				  EXP
				  QUERYBEGIN
				  QUERYEND
				  SBJCTBEGIN
				  SBJCTEND
				  QUERYSEQ
				  SBJCTSEQ
				  HOMOLOGYSEQ
				  QUERYNAME
				  SBJCTNAME
				  QUERYLENGTH
				  SBJCTLENGTH
				  QUERYFRAME
				  SBJCTFRAME
				  BLASTTYPE
				  )],@args);
    
    $blasttype = 'UNKNOWN' unless $blasttype;
    $self->report_type($blasttype);
    # Determine strand meanings
    my ($queryfactor, $sbjctfactor) = (1,0); # default
    if ($blasttype eq 'BLASTP' || $blasttype eq 'TBLASTN' ) {
	$queryfactor = 0;
    }
    if ($blasttype eq 'TBLASTN' || $blasttype eq 'TBLASTX' || 
	$blasttype eq 'BLASTN' )  {
	$sbjctfactor = 1;
    }
    
    # Set BLAST type
    $self->{'BLAST_TYPE'} = $blasttype;
	
    # Store the aligned query as sequence feature
    my $strand;
    if ($qe > $qb) {		# normal query: start < end
		if ($queryfactor) { $strand = 1; } else { $strand = undef; }
		$self->query( Bio::SeqFeature::Similarity->new
		      (-start=>$qb, -end=>$qe, -strand=>$strand, 
		       -source=>"BLAST" ) ) }
    else {			# reverse query (i dont know if this is possible, but feel free to correct)	
		if ($queryfactor) { $strand = -1; } else { $strand = undef; }
		$self->query( Bio::SeqFeature::Similarity->new
		      (-start=>$qe, -end=>$qb, -strand=>$strand,
		       -source=>"BLAST" ) ) }

    # store the aligned hit as sequence feature
    if ($se > $sb) {		# normal hit
	if ($sbjctfactor) { $strand = 1; } else { $strand = undef; }
	$self->hit( Bio::SeqFeature::Similarity->new
			(-start=>$sb, -end=>$se, -strand=>$strand,
			 -source=>"BLAST" ) ) }
    else { # reverse hit: start bigger than end
	if ($sbjctfactor) { $strand = -1; } else { $strand = undef; }
	$self->hit( Bio::SeqFeature::Similarity->new
			(-start=>$se, -end=>$sb, -strand=>$strand,
			 -source=>"BLAST" ) ) }
    
    # name the sequences
    $self->query->seq_id($qname); # query name
    $self->hit->seq_id($sname);   # hit name

    # set lengths
    $self->query->seqlength($qlength); # query length
    $self->hit->seqlength($slength);   # hit length

    # set object vars
    $self->score($score);
    $self->bits($bits);

    $self->significance($p);
    $self->{'EXP'} = $exp;
    
    $self->query->frac_identical($match);
    $self->hit->frac_identical($match);
    $self->{'HSPLENGTH'} = $hsplength;
    $self->{'PERCENT'} = int((1000 * $match)/$hsplength)/10;
    $self->{'POSITIVE'} = $positive;
    $self->{'GAPS'} = $gaps;
    $self->{'QS'} = $qs;
    $self->{'SS'} = $ss;
    $self->{'HS'} = $hs;
    
    $self->frame($qframe, $sframe);
    return $self;		# success - we hope!
}

# to disable overloading comment this out:
sub _overload {
	my $self = shift;
	return $self->start."..".$self->end." ".$self->bits;
}

=head2 report_type

 Title    : report_type
 Usage    : $type = $sbjct->report_type()
 Function : Returns the type of report from which this hit was obtained.
            This usually pertains only to BLAST and friends reports, for which
            the report type denotes what type of sequence was aligned against
            what (BLASTN: dna-dna, BLASTP prt-prt, BLASTX translated dna-prt, 
            TBLASTN prt-translated dna, TBLASTX translated dna-translated dna).
 Example  : 
 Returns  : A string (BLASTN, BLASTP, BLASTX, TBLASTN, TBLASTX, UNKNOWN)
 Args     : a string on set (you should know what you are doing)

=cut

sub report_type {
    my ($self, $rpt) = @_;
    if($rpt) {
	$self->{'_report_type'} = $rpt;
    }
    return $self->{'_report_type'};
}

=head2 EXP

 Title   : EXP
 Usage   : my $exp = $hsp->EXP;
 Function: returns the EXP value for the HSP
 Returns : string value
 Args    : none
 Note    : Patch provided by Sami Ashour for BTK parsing


=cut

sub EXP{
    return $_[0]->{'EXP'};
}


=head2 P

 Title    : P
 Usage    : $hsp->P();
 Function : returns the P (significance) value for a HSP 
 Returns  : (double) significance value
 Args     :

=cut

sub P {
	my ($self, @args) = @_;
	my $float = $self->significance(@args);
	my $match = '([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?'; # Perl Cookbook 2.1
	if ($float =~ /^$match$/) {
	    # Is a C float
	    return $float;
	} elsif ("1$float" =~ /^$match$/) {
	    # Almost C float, Jitterbug 974
	    return "1$float";
	} else {
		$self->warn("[HSP::P()] '$float' is not a known number format. Returning zero (0) instead.");
		return 0;
	}
}

=head2 percent

 Title    : percent
 Usage    : $hsp->percent();
 Function : returns the percent matching 
 Returns  : (double) percent matching
 Args     : none

=cut

sub percent         {shift->{'PERCENT'}}


=head2 match

 Title    : match
 Usage    : $hsp->match();
 Function : returns the match
 Example  : 
 Returns  : (double) frac_identical 
 Args     :

=cut

sub match           {shift->query->frac_identical(@_)}

=head2 hsplength

 Title    : hsplength
 Usage    : $hsp->hsplength();
 Function : returns the HSP length (including gaps)
 Returns  : (integer) HSP length
 Args     : none

=cut

sub hsplength              {shift->{'HSPLENGTH'}}

=head2 positive

 Title    : positive
 Usage    : $hsp->positive();
 Function : returns the number of positive matches (symbols in the alignment
            with a positive score)
 Returns  : (int) number of positive matches in the alignment
 Args     : none

=cut

sub positive        {shift->{'POSITIVE'}}

=head2 gaps

 Title    : gaps
 Usage    : $hsp->gaps();
 Function : returns the number of gaps or 0 if none
 Returns  : (int) number of gaps or 0 if none
 Args     : none

=cut

sub gaps        {shift->{'GAPS'}}

=head2 querySeq

 Title    : querySeq
 Usage    : $hsp->querySeq();
 Function : returns the query sequence
 Returns  : (string) the Query Sequence 
 Args     : none

=cut

sub querySeq        {shift->{'QS'}}

=head2 sbjctSeq

 Title    : sbjctSeq
 Usage    : $hsp->sbjctSeq();
 Function : returns the Sbjct sequence 
 Returns  : (string) the Sbjct Sequence 
 Args     : none

=cut

sub sbjctSeq        {shift->{'SS'}}

=head2 homologySeq

 Title    : homologySeq
 Usage    : $hsp->homologySeq();
 Function : returns the homologous sequence 
 Returns  : (string) homologous sequence 
 Args     : none

=cut

sub homologySeq     {shift->{'HS'}}

=head2 qs

 Title    : qs
 Usage    : $hsp->qs();
 Function : returns the Query Sequence (same as querySeq)
 Returns  : (string) query Sequence 
 Args     : none

=cut

sub qs              {shift->{'QS'}}

=head2 ss

 Title    : ss
 Usage    : $hsp->ss();
 Function : returns the subject sequence ( same as sbjctSeq) 
 Returns  : (string) Sbjct Sequence
 Args     : none

=cut

sub ss              {shift->{'SS'}}

=head2 hs

 Title    : hs
 Usage    : $hsp->hs();
 Function : returns the Homologous Sequence (same as homologySeq ) 
 Returns  : (string) Homologous Sequence
 Args     : none

=cut

sub hs              {shift->{'HS'}}

sub frame {
    my ($self, $qframe, $sframe) = @_;
    if( defined $qframe ) {
	if( $qframe == 0 ) {
	    $qframe = undef;
	} elsif( $qframe !~ /^([+-])?([1-3])/ ) {	    
	    $self->warn("Specifying an invalid query frame ($qframe)");
	    $qframe = undef;
	} else { 
	    if( ($1 eq '-' && $self->query->strand >= 0) || 
		($1 eq '+' && $self->query->strand <= 0) ) {
		$self->warn("Query frame ($qframe) did not match strand of query (". $self->query->strand() . ")");
	    }
	    # Set frame to GFF [0-2]
	    $qframe = $2 - 1;
	}
	$self->{'QFRAME'} = $qframe;
    }
    if( defined $sframe ) {
	  if( $sframe == 0 ) {
	    $sframe = undef;
	  } elsif( $sframe !~ /^([+-])?([1-3])/ ) {	    
	    $self->warn("Specifying an invalid hit frame ($sframe)");
	    $sframe = undef;
	  } else { 
	      if( ($1 eq '-' && $self->hit->strand >= 0) || 
		  ($1 eq '+' && $self->hit->strand <= 0) ) 
	      {
		  $self->warn("Hit frame ($sframe) did not match strand of hit (". $self->hit->strand() . ")");
	      }
	      
	      # Set frame to GFF [0-2]
	      $sframe = $2 - 1;
	  }
	  $self->{'SFRAME'} = $sframe;
      }

    (defined $qframe && $self->SUPER::frame($qframe) && 
     ($self->{'FRAME'} = $qframe)) || 
    (defined $sframe && $self->SUPER::frame($sframe) && 
     ($self->{'FRAME'} = $sframe));

    if (wantarray() && 
	$self->{'BLAST_TYPE'} eq 'TBLASTX') 
    { 
	return ($self->{'QFRAME'}, $self->{'SFRAME'}); 
    } elsif (wantarray())  { 
	(defined $self->{'QFRAME'} && 
	 return ($self->{'QFRAME'}, undef)) || 
	     (defined $self->{'SFRAME'} && 
	      return (undef, $self->{'SFRAME'})); 
    } else { 
	(defined $self->{'QFRAME'} && 
	 return $self->{'QFRAME'}) || 
	(defined $self->{'SFRAME'} && 
	 return $self->{'SFRAME'}); 
    }
}

1;