Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/Tools/BPbl2seq.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/Tools/BPbl2seq.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,447 @@ +# $Id: BPbl2seq.pm,v 1.21.2.2 2003/06/03 14:38:18 jason Exp $ +# +# Bioperl module Bio::Tools::BPbl2seq +# based closely on the Bio::Tools::BPlite modules +# Ian Korf (ikorf@sapiens.wustl.edu, http://sapiens.wustl.edu/~ikorf), +# Lorenz Pollak (lorenz@ist.org, bioperl port) +# +# +# Copyright Peter Schattner +# +# You may distribute this module under the same terms as perl itself +# _history +# October 20, 2000 +# May 29, 2001 +# Fixed bug which prevented reading of more than one HSP / hit. +# This fix required changing calling syntax as described below. (PS) +# POD documentation - main docs before the code + +=head1 NAME + +Bio::Tools::BPbl2seq - Lightweight BLAST parser for pair-wise sequence +alignment using the BLAST algorithm. + +=head1 SYNOPSIS + + use Bio::Tools::BPbl2seq; + my $report = Bio::Tools::BPbl2seq->new(-file => 't/bl2seq.out'); + $report->sbjctName; + $report->sbjctLength; + while(my $hsp = $report->next_feature) { + $hsp->score; + $hsp->bits; + $hsp->percent; + $hsp->P; + $hsp->match; + $hsp->positive; + $hsp->length; + $hsp->querySeq; + $hsp->sbjctSeq; + $hsp->homologySeq; + $hsp->query->start; + $hsp->query->end; + $hsp->sbjct->start; + $hsp->sbjct->end; + $hsp->sbjct->seq_id; + $hsp->sbjct->overlaps($exon); + } + +=head1 DESCRIPTION + +BPbl2seq is a package for parsing BLAST bl2seq reports. BLAST bl2seq is a +program for comparing and aligning two sequences using BLAST. Although +the report format is similar to that of a conventional BLAST, there are a +few differences so that BPlite is unable to read bl2seq reports directly. + +From the user's perspective, one difference between bl2seq and +other blast reports is that the bl2seq report does not print out the +name of the first of the two aligned sequences. (The second sequence +name is given in the report as the name of the "hit"). Consequently, +BPbl2seq has no way of identifying the name of the initial sequence +unless it is passed to constructor as a second argument as in: + + my $report = Bio::Tools::BPbl2seq->new(\*FH, "ALEU_HORVU"); + +If the name of the first sequence (the "query") is not passed to +BPbl2seq.pm in this manner, the name of the first sequence will be +left as "unknown". (Note that to preserve a common interface with the +other BLAST programs the two sequences being compared are referred to +in bl2seq as "query" and "subject" although this is perhaps a bit +misleading when simply comparing 2 sequences as opposed to querying a +database.) + +In addition, since there will only be (at most) one "subject" (hit) in +a bl2seq report, one should use the method $report-E<gt>next_feature, +rather than $report-E<gt>nextSbjct-E<gt>nextHSP to obtain the next +high scoring pair. + +One should note that the previous (0.7) version of BPbl2seq used +slightly different syntax. That version had a bug and consequently the +old syntax has been eliminated. Attempts to use the old syntax will +return error messages explaining the (minor) recoding required to use +the current syntax. + +=head1 FEEDBACK + +=head2 Mailing Lists + +User feedback is an integral part of the evolution of this and other +Bioperl modules. Send your comments and suggestions preferably to one +of the Bioperl mailing lists. Your participation is much appreciated. + + bioperl-l@bioperl.org - General discussion + http://bio.perl.org/MailList.html - About the mailing lists + +=head2 Reporting Bugs + +Report bugs to the Bioperl bug tracking system to help us keep track + the bugs and their resolution. Bug reports can be submitted via + email or the web: + + bioperl-bugs@bio.perl.org + http://bugzilla.bioperl.org/ + +=head1 AUTHOR - Peter Schattner + +Email: schattner@alum.mit.edu + +=head1 ACKNOWLEDGEMENTS + +Based on work of: +Ian Korf (ikorf@sapiens.wustl.edu, http://sapiens.wustl.edu/~ikorf), +Lorenz Pollak (lorenz@ist.org, bioperl port) + +=head1 CONTRIBUTORS + +Jason Stajich, jason@cgt.mc.duke.edu + +=cut + +#' +package Bio::Tools::BPbl2seq; + +use strict; +use vars qw(@ISA); +use Bio::Tools::BPlite; +use Bio::Root::Root; +use Bio::Root::IO; +use Bio::Tools::BPlite::Sbjct; # we want to use Sbjct +use Bio::SeqAnalysisParserI; +use Symbol; + +@ISA = qw(Bio::Root::Root Bio::SeqAnalysisParserI Bio::Root::IO); + +#@ISA = qw(Bio::Tools::BPlite); + +=head2 new + + Title : new + Function: Create a new Bio::Tools::BPbl2seq object + Returns : Bio::Tools::BPbl2seq + Args : -file input file (alternative to -fh) + -fh input stream (alternative to -file) + -queryname name of query sequence + +=cut + +sub new { + my ($class, @args) = @_; + my $self = $class->SUPER::new(@args); + # initialize IO + $self->_initialize_io(@args); + + my ($queryname,$rt) = $self->_rearrange([qw(QUERYNAME + REPORT_TYPE)], @args); + $queryname = 'unknown' if( ! defined $queryname ); + if( $rt && $rt =~ /BLAST/i ) { + $self->{'BLAST_TYPE'} = uc($rt); + } else { + $self->warn("Must provide which type of BLAST was run (blastp,blastn, tblastn, tblastx, blastx) if you want strand information to get set properly for DNA query or subjects"); + } + my $sbjct = $self->getSbjct(); + $self->{'_current_sbjct'} = $sbjct; + + $self->{'_query'}->{'NAME'} = $queryname; + return $self; +} + + +=head2 getSbjct + + Title : + Usage : $sbjct = $obj->getSbjct(); + Function : Method of obtaining single "subject" of a bl2seq report + Example : my $sbjct = $obj->getSbjct ) {} + Returns : Sbjct object or null if finished + Args : + +=cut + +sub getSbjct { + my ($self) = @_; +# $self->_fastForward or return undef; + + ####################### + # get bl2seq "sbjct" name and length # + ####################### + my $length; + my $def; + READLOOP: while(defined ($_ = $self->_readline) ) { + if ($_ =~ /^>(.+)$/) { + $def = $1; + next READLOOP; + } + elsif ($_ =~ /^\s*Length\s.+\D(\d+)/i) { + $length = $1; + next READLOOP; + } + elsif ($_ =~ /^\s{0,2}Score/) { + $self->_pushback($_); + last READLOOP; + } + } + return undef if ! defined $def; + $def =~ s/\s+/ /g; + $def =~ s/\s+$//g; + + + #################### + # the Sbjct object # + #################### + my $sbjct = new Bio::Tools::BPlite::Sbjct('-name'=>$def, + '-length'=>$length, + '-parent'=>$self); + return $sbjct; +} + + + + +=head2 next_feature + + Title : next_feature + Usage : while( my $feat = $res->next_feature ) { # do something } + Function: calls next_feature function from BPlite. + Example : + Returns : A Bio::SeqFeatureI compliant object, in this case a + Bio::Tools::BPlite::HSP object, and FALSE if there are no more + HSPs. + Args : None + +=cut + +sub next_feature{ + my ($self) = @_; + my ($sbjct, $hsp); + $sbjct = $self->{'_current_sbjct'}; + unless( defined $sbjct ) { + $self->debug(" No hit object found for bl2seq report \n ") ; + return undef; + } + $hsp = $sbjct->nextHSP; + return $hsp || undef; +} + +=head2 queryName + + Title : + Usage : $name = $report->queryName(); + Function : get /set the name of the query + Example : + Returns : name of the query + Args : + +=cut + +sub queryName { + my ($self, $queryname) = @_; + if( $queryname ) { + $self->{'_query'}->{'NAME'} = $queryname; + } + $self->{'_query'}->{'NAME'}; +} + +=head2 sbjctName + + Title : + Usage : $name = $report->sbjctName(); + Function : returns the name of the Sbjct + Example : + Returns : name of the Sbjct + Args : + +=cut + +sub sbjctName { + my $self = shift; +# unless( defined $self->{'_current_sbjct'} ) { +# my $sbjct = $self->{'_current_sbjct'} = $self->nextSbjct; +# return undef unless defined $sbjct; +# } + $self->{'_current_sbjct'}->{'NAME'} || ''; +} + +=head2 sbjctLength + + Title : sbjctLength + Usage : $length = $report->sbjctLength(); + Function : returns the length of the Sbjct + Example : + Returns : name of the Sbjct + Args : + +=cut + +sub sbjctLength { + my $self = shift; +# unless( defined $self->{'_current_sbjct'} ) { +# my $sbjct = $self->{'_current_sbjct'} = $self->nextSbjct; +# return undef unless defined $sbjct; +# } + $self->{'_current_sbjct'}->{'LENGTH'}; +} + +=head2 P + + Title : P + Usage : + Function : Syntax no longer supported, error message only + +=cut + +sub P { + my $self = shift; + $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n "); +} + +=head2 percent + + Title : percent + Usage : $hsp->percent(); + Function : Syntax no longer supported, error message only + +=cut + +sub percent { + my $self = shift; + $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n "); +} + +=head2 match + + Title : match + Usage : $hsp->match(); + Function : Syntax no longer supported, error message only + +=cut + +sub match { + my $self = shift; + $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n "); +} + +=head2 positive + + Title : positive + Usage : $hsp->positive(); + Function : Syntax no longer supported, error message only + +=cut + +sub positive { + my $self = shift; + $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ; +} + +=head2 querySeq + + Title : querySeq + Usage : $hsp->querySeq(); + Function : Syntax no longer supported, error message only + +=cut + +sub querySeq { + my $self = shift; + $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ; +} + +=head2 sbjctSeq + + Title : sbjctSeq + Usage : $hsp->sbjctSeq(); + Function : Syntax no longer supported, error message only + +=cut + +sub sbjctSeq { + my $self = shift; + $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ; +} + +=head2 homologySeq + + Title : homologySeq + Usage : $hsp->homologySeq(); + Function : Syntax no longer supported, error message only + +=cut + +sub homologySeq { + my $self = shift; + $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ; +} + +=head2 qs + + Title : qs + Usage : $hsp->qs(); + Function : Syntax no longer supported, error message only + +=cut + +sub qs { + my $self = shift; + $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ; +} + +=head2 ss + + Title : ss + Usage : $hsp->ss(); + Function : Syntax no longer supported, error message only + +=cut + +sub ss { + my $self = shift; + $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ; +} + +=head2 hs + + Title : hs + Usage : $hsp->hs(); + Function : Syntax no longer supported, error message only + +=cut + +sub hs { + my $self = shift; + $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ; +} + +sub _fastForward { + my ($self) = @_; + return 0 if $self->{'REPORT_DONE'}; # empty report + while(defined( $_ = $self->_readline() ) ) { + if ($_ =~ /^>|^Parameters|^\s+Database:|^\s+Posted date:|^\s*Lambda/) { + $self->_pushback($_); + return 1; + } + } + $self->warn("Possible error (1) while parsing BLAST report!"); +} + +1; +__END__