Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/Tools/BPlite/Sbjct.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/Tools/BPlite/Sbjct.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,361 @@ +# $Id: Sbjct.pm,v 1.23.2.1 2003/02/20 00:39:03 jason Exp $ +############################################################################### +# Bio::Tools::BPlite::Sbjct +############################################################################### +# +# The original BPlite.pm module has been written by Ian Korf ! +# see http://sapiens.wustl.edu/~ikorf +# +# You may distribute this module under the same terms as perl itself +# +# BioPerl module for Bio::Tools::BPlite::Sbjct +# +# Cared for by Peter Schattner <schattner@alum.mit.edu> +# +# Copyright Peter Schattner +# +# You may distribute this module under the same terms as perl itself + +# POD documentation - main docs before the code + +=head1 NAME + +Bio::Tools::BPlite::Sbjct - A Blast Subject (database search Hit) + +=head1 SYNOPSIS + + use Bio::Tools::BPlite + my $report = new Bio::Tools::BPlite(-fh=>\*STDIN); + while(my $sbjct = $report->nextSbjct) { + $sbjct->name; # access to the hit name + "$sbjct"; # overloaded to return name + $sbjct->nextHSP; # gets the next HSP from the sbjct + while(my $hsp = $sbjct->nextHSP) { + # canonical form is again a while loop + } + +=head1 DESCRIPTION + +See L<Bio::Tools::BPlite> for a more detailed information about the +BPlite BLAST parsing objects. + +The original BPlite.pm module has been written by Ian Korf! +See http://sapiens.wustl.edu/~ikorf + +The Sbjct object encapsulates a Hit in a Blast database +search. The Subjects are the "Hits" for a particular query. A +Subject may be made up of multiple High Scoring Pairs (HSP) which are +accessed through the nextHSP method. + +If you are searching for the P-value or percent identity that is +specific to each HSP and you will need to use the nextHSP method to +get access to that data. + +=head1 FEEDBACK + +=head2 Mailing Lists + +User feedback is an integral part of the evolution of this and other +Bioperl modules. Send your comments and suggestions preferably to +the Bioperl mailing list. Your participation is much appreciated. + + bioperl-l@bioperl.org - General discussion +http://bioperl.org/MailList.shtml - About the mailing lists + +=head2 Reporting Bugs + +Report bugs to the Bioperl bug tracking system to help us keep track +of the bugs and their resolution. Bug reports can be submitted via +email or the web: + + bioperl-bugs@bioperl.org + http://bugzilla.bioperl.org/ + +=head1 AUTHOR - Peter Schattner + +Email: schattner@alum.mit.edu + +=head1 CONTRIBUTORS + +Jason Stajich, jason@bioperl.org + +=head1 APPENDIX + +The rest of the documentation details each of the object methods. +Internal methods are usually preceded with a _ + +=cut + +# Let the code begin... + +package Bio::Tools::BPlite::Sbjct; + +use strict; + +use Bio::Root::Root; # root object to inherit from +use Bio::Tools::BPlite::HSP; # we want to use HSP +#use overload '""' => 'name'; +use vars qw(@ISA); + +@ISA = qw(Bio::Root::Root); + +sub new { + my ($class, @args) = @_; + my $self = $class->SUPER::new(@args); + + ($self->{'NAME'},$self->{'LENGTH'}, + $self->{'PARENT'}) = + $self->_rearrange([qw(NAME + LENGTH + PARENT + )],@args); + $self->report_type($self->{'PARENT'}->{'BLAST_TYPE'} || 'UNKNOWN'); + $self->{'HSP_ALL_PARSED'} = 0; + + return $self; +} + +=head2 name + + Title : name + Usage : $name = $obj->name(); + Function : returns the name of the Sbjct + Example : + Returns : name of the Sbjct + Args : + +=cut + +sub name {shift->{'NAME'}} + +=head2 report_type + + Title : report_type + Usage : $type = $sbjct->report_type() + Function : Returns the type of report from which this hit was obtained. + This usually pertains only to BLAST and friends reports, for which + the report type denotes what type of sequence was aligned against + what (BLASTN: dna-dna, BLASTP prt-prt, BLASTX translated dna-prt, + TBLASTN prt-translated dna, TBLASTX translated dna-translated dna). + Example : + Returns : A string (BLASTN, BLASTP, BLASTX, TBLASTN, TBLASTX, UNKNOWN) + Args : a string on set (you should know what you are doing) + +=cut + +sub report_type { + my ($self, $rpt) = @_; + if($rpt) { + $self->{'_report_type'} = $rpt; + } + return $self->{'_report_type'}; +} + +=head2 nextFeaturePair + + Title : nextFeaturePair + Usage : $name = $obj->nextFeaturePair(); + Function : same as the nextHSP function + Example : + Returns : next FeaturePair + Args : + +=cut + +sub nextFeaturePair {shift->nextHSP}; # just another name + +=head2 nextHSP + + Title : nextHSP + Usage : $hsp = $obj->nextHSP(); + Function : returns the next available High Scoring Pair + Example : + Returns : Bio::Tools::HSP or null if finished + Args : + +=cut + +sub nextHSP { + my ($self) = @_; + return undef if $self->{'HSP_ALL_PARSED'}; + + ############################ + # get and parse scorelines # + ############################ + my ($qframe, $sframe); + my $scoreline = $self->_readline(); + my $nextline = $self->_readline(); + return undef if not defined $nextline; + $scoreline .= $nextline; + my ($score, $bits); + if ($scoreline =~ /\d bits\)/) { + ($score, $bits) = $scoreline =~ + /Score = (\d+) \((\S+) bits\)/; # WU-BLAST + } + else { + ($bits, $score) = $scoreline =~ + /Score =\s+(\S+) bits \((\d+)/; # NCBI-BLAST + } + + my ($match, $hsplength) = ($scoreline =~ /Identities = (\d+)\/(\d+)/); + my ($positive) = ($scoreline =~ /Positives = (\d+)/); + my ($gaps) = ($scoreline =~ /Gaps = (\d+)/); + if($self->report_type() eq 'TBLASTX') { + ($qframe, $sframe) = $scoreline =~ /Frame =\s+([+-]\d)\s+\/\s+([+-]\d)/; + } elsif ($self->report_type() eq 'TBLASTN') { + ($sframe) = $scoreline =~ /Frame =\s+([+-]\d)/; + } else { + ($qframe) = $scoreline =~ /Frame =\s+([+-]\d)/; + } + $positive = $match if not defined $positive; + $gaps = '0' if not defined $gaps; + my ($p) = ($scoreline =~ /[Sum ]*P[\(\d+\)]* = (\S+)/); + unless (defined $p) {(undef, $p) = $scoreline =~ /Expect(\(\d+\))? =\s+(\S+)/} + my ($exp) = ($scoreline =~ /Expect(?:\(\d+\))? =\s+([^\s,]+)/); + $exp = -1 unless( defined $exp ); + + $self->throw("Unable to parse '$scoreline'") unless defined $score; + + ####################### + # get alignment lines # + ####################### + my (@hspline); + while( defined($_ = $self->_readline()) ) { + if ($_ =~ /^WARNING:|^NOTE:/) { + while(defined($_ = $self->_readline())) {last if $_ !~ /\S/} + } + elsif ($_ !~ /\S/) {next} + elsif ($_ =~ /Strand HSP/) {next} # WU-BLAST non-data + elsif ($_ =~ /^\s*Strand/) {next} # NCBI-BLAST non-data + elsif ($_ =~ /^\s*Score/) {$self->_pushback($_); last} + + elsif ($_ =~ /^>|^Histogram|^Searching|^Parameters|^\s+Database:|^CPU\stime|^\s*Lambda/) + { + #ps 5/28/01 + # elsif ($_ =~ /^>|^Parameters|^\s+Database:|^CPU\stime/) { + $self->_pushback($_); + + $self->{'HSP_ALL_PARSED'} = 1; + last; + } + elsif( $_ =~ /^\s*Frame/ ) { + if ($self->report_type() eq 'TBLASTX') { + ($qframe, $sframe) = $_ =~ /Frame = ([\+-]\d)\s+\/\s+([\+-]\d)/; + } elsif ($self->report_type() eq 'TBLASTN') { + ($sframe) = $_ =~ /Frame = ([\+-]\d)/; + } else { + ($qframe) = $_ =~ /Frame = ([\+-]\d)/; + } + } + else { + push @hspline, $_; # store the query line + $nextline = $self->_readline(); + # Skip "pattern" line when parsing PHIBLAST reports, otherwise store the alignment line + my $l1 = ($nextline =~ /^\s*pattern/) ? $self->_readline() : $nextline; + push @hspline, $l1; # store the alignment line + my $l2 = $self->_readline(); push @hspline, $l2; # grab/store the sbjct line + } + } + + ######################### + # parse alignment lines # + ######################### + my ($ql, $sl, $as) = ("", "", ""); + my ($qb, $qe, $sb, $se) = (0,0,0,0); + my (@QL, @SL, @AS); # for better memory management + + for(my $i=0;$i<@hspline;$i+=3) { + # warn $hspline[$i], $hspline[$i+2]; + $hspline[$i] =~ /^(?:Query|Trans):\s+(\d+)\s*([\D\S]+)\s+(\d+)/; + $ql = $2; $qb = $1 unless $qb; $qe = $3; + + my $offset = index($hspline[$i], $ql); + $as = substr($hspline[$i+1], $offset, CORE::length($ql)); + + $hspline[$i+2] =~ /^Sbjct:\s+(\d+)\s*([\D\S]+)\s+(\d+)/; + $sl = $2; $sb = $1 unless $sb; $se = $3; + + push @QL, $ql; push @SL, $sl; push @AS, $as; + } + + ################## + # the HSP object # + ################## + $ql = join("", @QL); + $sl = join("", @SL); + $as = join("", @AS); +# Query name and length are not in the report for a bl2seq report so {'PARENT'}->query and +# {'PARENT'}->qlength will not be available. + my ($qname, $qlength) = ('unknown','unknown'); + if ($self->{'PARENT'}->can('query')) { + $qname = $self->{'PARENT'}->query; + $qlength = $self->{'PARENT'}->qlength; + } + + my $hsp = new Bio::Tools::BPlite::HSP + ('-score' => $score, + '-bits' => $bits, + '-match' => $match, + '-positive' => $positive, + '-gaps' => $gaps, + '-hsplength' => $hsplength, + '-p' => $p, + '-exp' => $exp, + '-queryBegin' => $qb, + '-queryEnd' => $qe, + '-sbjctBegin' => $sb, + '-sbjctEnd' => $se, + '-querySeq' => $ql, + '-sbjctSeq' => $sl, + '-homologySeq'=> $as, + '-queryName' => $qname, +# '-queryName'=>$self->{'PARENT'}->query, + '-sbjctName' => $self->{'NAME'}, + '-queryLength'=> $qlength, +# '-queryLength'=>$self->{'PARENT'}->qlength, + '-sbjctLength'=> $self->{'LENGTH'}, + '-queryFrame' => $qframe, + '-sbjctFrame' => $sframe, + '-blastType' => $self->report_type()); + return $hsp; +} + +=head2 _readline + + Title : _readline + Usage : $obj->_readline + Function: Reads a line of input. + + Note that this method implicitely uses the value of $/ that is + in effect when called. + + Note also that the current implementation does not handle pushed + back input correctly unless the pushed back input ends with the + value of $/. + Example : + Returns : + +=cut + +sub _readline{ + my ($self) = @_; + return $self->{'PARENT'}->_readline(); +} + +=head2 _pushback + + Title : _pushback + Usage : $obj->_pushback($newvalue) + Function: puts a line previously read with _readline back into a buffer + Example : + Returns : + Args : newvalue + +=cut + +sub _pushback { + my ($self, $arg) = @_; + return $self->{'PARENT'}->_pushback($arg); +} + +1;