comparison variant_effect_predictor/Bio/Tools/BPbl2seq.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: BPbl2seq.pm,v 1.21.2.2 2003/06/03 14:38:18 jason Exp $
2 #
3 # Bioperl module Bio::Tools::BPbl2seq
4 # based closely on the Bio::Tools::BPlite modules
5 # Ian Korf (ikorf@sapiens.wustl.edu, http://sapiens.wustl.edu/~ikorf),
6 # Lorenz Pollak (lorenz@ist.org, bioperl port)
7 #
8 #
9 # Copyright Peter Schattner
10 #
11 # You may distribute this module under the same terms as perl itself
12 # _history
13 # October 20, 2000
14 # May 29, 2001
15 # Fixed bug which prevented reading of more than one HSP / hit.
16 # This fix required changing calling syntax as described below. (PS)
17 # POD documentation - main docs before the code
18
19 =head1 NAME
20
21 Bio::Tools::BPbl2seq - Lightweight BLAST parser for pair-wise sequence
22 alignment using the BLAST algorithm.
23
24 =head1 SYNOPSIS
25
26 use Bio::Tools::BPbl2seq;
27 my $report = Bio::Tools::BPbl2seq->new(-file => 't/bl2seq.out');
28 $report->sbjctName;
29 $report->sbjctLength;
30 while(my $hsp = $report->next_feature) {
31 $hsp->score;
32 $hsp->bits;
33 $hsp->percent;
34 $hsp->P;
35 $hsp->match;
36 $hsp->positive;
37 $hsp->length;
38 $hsp->querySeq;
39 $hsp->sbjctSeq;
40 $hsp->homologySeq;
41 $hsp->query->start;
42 $hsp->query->end;
43 $hsp->sbjct->start;
44 $hsp->sbjct->end;
45 $hsp->sbjct->seq_id;
46 $hsp->sbjct->overlaps($exon);
47 }
48
49 =head1 DESCRIPTION
50
51 BPbl2seq is a package for parsing BLAST bl2seq reports. BLAST bl2seq is a
52 program for comparing and aligning two sequences using BLAST. Although
53 the report format is similar to that of a conventional BLAST, there are a
54 few differences so that BPlite is unable to read bl2seq reports directly.
55
56 From the user's perspective, one difference between bl2seq and
57 other blast reports is that the bl2seq report does not print out the
58 name of the first of the two aligned sequences. (The second sequence
59 name is given in the report as the name of the "hit"). Consequently,
60 BPbl2seq has no way of identifying the name of the initial sequence
61 unless it is passed to constructor as a second argument as in:
62
63 my $report = Bio::Tools::BPbl2seq->new(\*FH, "ALEU_HORVU");
64
65 If the name of the first sequence (the "query") is not passed to
66 BPbl2seq.pm in this manner, the name of the first sequence will be
67 left as "unknown". (Note that to preserve a common interface with the
68 other BLAST programs the two sequences being compared are referred to
69 in bl2seq as "query" and "subject" although this is perhaps a bit
70 misleading when simply comparing 2 sequences as opposed to querying a
71 database.)
72
73 In addition, since there will only be (at most) one "subject" (hit) in
74 a bl2seq report, one should use the method $report-E<gt>next_feature,
75 rather than $report-E<gt>nextSbjct-E<gt>nextHSP to obtain the next
76 high scoring pair.
77
78 One should note that the previous (0.7) version of BPbl2seq used
79 slightly different syntax. That version had a bug and consequently the
80 old syntax has been eliminated. Attempts to use the old syntax will
81 return error messages explaining the (minor) recoding required to use
82 the current syntax.
83
84 =head1 FEEDBACK
85
86 =head2 Mailing Lists
87
88 User feedback is an integral part of the evolution of this and other
89 Bioperl modules. Send your comments and suggestions preferably to one
90 of the Bioperl mailing lists. Your participation is much appreciated.
91
92 bioperl-l@bioperl.org - General discussion
93 http://bio.perl.org/MailList.html - About the mailing lists
94
95 =head2 Reporting Bugs
96
97 Report bugs to the Bioperl bug tracking system to help us keep track
98 the bugs and their resolution. Bug reports can be submitted via
99 email or the web:
100
101 bioperl-bugs@bio.perl.org
102 http://bugzilla.bioperl.org/
103
104 =head1 AUTHOR - Peter Schattner
105
106 Email: schattner@alum.mit.edu
107
108 =head1 ACKNOWLEDGEMENTS
109
110 Based on work of:
111 Ian Korf (ikorf@sapiens.wustl.edu, http://sapiens.wustl.edu/~ikorf),
112 Lorenz Pollak (lorenz@ist.org, bioperl port)
113
114 =head1 CONTRIBUTORS
115
116 Jason Stajich, jason@cgt.mc.duke.edu
117
118 =cut
119
120 #'
121 package Bio::Tools::BPbl2seq;
122
123 use strict;
124 use vars qw(@ISA);
125 use Bio::Tools::BPlite;
126 use Bio::Root::Root;
127 use Bio::Root::IO;
128 use Bio::Tools::BPlite::Sbjct; # we want to use Sbjct
129 use Bio::SeqAnalysisParserI;
130 use Symbol;
131
132 @ISA = qw(Bio::Root::Root Bio::SeqAnalysisParserI Bio::Root::IO);
133
134 #@ISA = qw(Bio::Tools::BPlite);
135
136 =head2 new
137
138 Title : new
139 Function: Create a new Bio::Tools::BPbl2seq object
140 Returns : Bio::Tools::BPbl2seq
141 Args : -file input file (alternative to -fh)
142 -fh input stream (alternative to -file)
143 -queryname name of query sequence
144
145 =cut
146
147 sub new {
148 my ($class, @args) = @_;
149 my $self = $class->SUPER::new(@args);
150 # initialize IO
151 $self->_initialize_io(@args);
152
153 my ($queryname,$rt) = $self->_rearrange([qw(QUERYNAME
154 REPORT_TYPE)], @args);
155 $queryname = 'unknown' if( ! defined $queryname );
156 if( $rt && $rt =~ /BLAST/i ) {
157 $self->{'BLAST_TYPE'} = uc($rt);
158 } else {
159 $self->warn("Must provide which type of BLAST was run (blastp,blastn, tblastn, tblastx, blastx) if you want strand information to get set properly for DNA query or subjects");
160 }
161 my $sbjct = $self->getSbjct();
162 $self->{'_current_sbjct'} = $sbjct;
163
164 $self->{'_query'}->{'NAME'} = $queryname;
165 return $self;
166 }
167
168
169 =head2 getSbjct
170
171 Title :
172 Usage : $sbjct = $obj->getSbjct();
173 Function : Method of obtaining single "subject" of a bl2seq report
174 Example : my $sbjct = $obj->getSbjct ) {}
175 Returns : Sbjct object or null if finished
176 Args :
177
178 =cut
179
180 sub getSbjct {
181 my ($self) = @_;
182 # $self->_fastForward or return undef;
183
184 #######################
185 # get bl2seq "sbjct" name and length #
186 #######################
187 my $length;
188 my $def;
189 READLOOP: while(defined ($_ = $self->_readline) ) {
190 if ($_ =~ /^>(.+)$/) {
191 $def = $1;
192 next READLOOP;
193 }
194 elsif ($_ =~ /^\s*Length\s.+\D(\d+)/i) {
195 $length = $1;
196 next READLOOP;
197 }
198 elsif ($_ =~ /^\s{0,2}Score/) {
199 $self->_pushback($_);
200 last READLOOP;
201 }
202 }
203 return undef if ! defined $def;
204 $def =~ s/\s+/ /g;
205 $def =~ s/\s+$//g;
206
207
208 ####################
209 # the Sbjct object #
210 ####################
211 my $sbjct = new Bio::Tools::BPlite::Sbjct('-name'=>$def,
212 '-length'=>$length,
213 '-parent'=>$self);
214 return $sbjct;
215 }
216
217
218
219
220 =head2 next_feature
221
222 Title : next_feature
223 Usage : while( my $feat = $res->next_feature ) { # do something }
224 Function: calls next_feature function from BPlite.
225 Example :
226 Returns : A Bio::SeqFeatureI compliant object, in this case a
227 Bio::Tools::BPlite::HSP object, and FALSE if there are no more
228 HSPs.
229 Args : None
230
231 =cut
232
233 sub next_feature{
234 my ($self) = @_;
235 my ($sbjct, $hsp);
236 $sbjct = $self->{'_current_sbjct'};
237 unless( defined $sbjct ) {
238 $self->debug(" No hit object found for bl2seq report \n ") ;
239 return undef;
240 }
241 $hsp = $sbjct->nextHSP;
242 return $hsp || undef;
243 }
244
245 =head2 queryName
246
247 Title :
248 Usage : $name = $report->queryName();
249 Function : get /set the name of the query
250 Example :
251 Returns : name of the query
252 Args :
253
254 =cut
255
256 sub queryName {
257 my ($self, $queryname) = @_;
258 if( $queryname ) {
259 $self->{'_query'}->{'NAME'} = $queryname;
260 }
261 $self->{'_query'}->{'NAME'};
262 }
263
264 =head2 sbjctName
265
266 Title :
267 Usage : $name = $report->sbjctName();
268 Function : returns the name of the Sbjct
269 Example :
270 Returns : name of the Sbjct
271 Args :
272
273 =cut
274
275 sub sbjctName {
276 my $self = shift;
277 # unless( defined $self->{'_current_sbjct'} ) {
278 # my $sbjct = $self->{'_current_sbjct'} = $self->nextSbjct;
279 # return undef unless defined $sbjct;
280 # }
281 $self->{'_current_sbjct'}->{'NAME'} || '';
282 }
283
284 =head2 sbjctLength
285
286 Title : sbjctLength
287 Usage : $length = $report->sbjctLength();
288 Function : returns the length of the Sbjct
289 Example :
290 Returns : name of the Sbjct
291 Args :
292
293 =cut
294
295 sub sbjctLength {
296 my $self = shift;
297 # unless( defined $self->{'_current_sbjct'} ) {
298 # my $sbjct = $self->{'_current_sbjct'} = $self->nextSbjct;
299 # return undef unless defined $sbjct;
300 # }
301 $self->{'_current_sbjct'}->{'LENGTH'};
302 }
303
304 =head2 P
305
306 Title : P
307 Usage :
308 Function : Syntax no longer supported, error message only
309
310 =cut
311
312 sub P {
313 my $self = shift;
314 $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ");
315 }
316
317 =head2 percent
318
319 Title : percent
320 Usage : $hsp->percent();
321 Function : Syntax no longer supported, error message only
322
323 =cut
324
325 sub percent {
326 my $self = shift;
327 $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ");
328 }
329
330 =head2 match
331
332 Title : match
333 Usage : $hsp->match();
334 Function : Syntax no longer supported, error message only
335
336 =cut
337
338 sub match {
339 my $self = shift;
340 $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ");
341 }
342
343 =head2 positive
344
345 Title : positive
346 Usage : $hsp->positive();
347 Function : Syntax no longer supported, error message only
348
349 =cut
350
351 sub positive {
352 my $self = shift;
353 $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ;
354 }
355
356 =head2 querySeq
357
358 Title : querySeq
359 Usage : $hsp->querySeq();
360 Function : Syntax no longer supported, error message only
361
362 =cut
363
364 sub querySeq {
365 my $self = shift;
366 $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ;
367 }
368
369 =head2 sbjctSeq
370
371 Title : sbjctSeq
372 Usage : $hsp->sbjctSeq();
373 Function : Syntax no longer supported, error message only
374
375 =cut
376
377 sub sbjctSeq {
378 my $self = shift;
379 $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ;
380 }
381
382 =head2 homologySeq
383
384 Title : homologySeq
385 Usage : $hsp->homologySeq();
386 Function : Syntax no longer supported, error message only
387
388 =cut
389
390 sub homologySeq {
391 my $self = shift;
392 $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ;
393 }
394
395 =head2 qs
396
397 Title : qs
398 Usage : $hsp->qs();
399 Function : Syntax no longer supported, error message only
400
401 =cut
402
403 sub qs {
404 my $self = shift;
405 $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ;
406 }
407
408 =head2 ss
409
410 Title : ss
411 Usage : $hsp->ss();
412 Function : Syntax no longer supported, error message only
413
414 =cut
415
416 sub ss {
417 my $self = shift;
418 $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ;
419 }
420
421 =head2 hs
422
423 Title : hs
424 Usage : $hsp->hs();
425 Function : Syntax no longer supported, error message only
426
427 =cut
428
429 sub hs {
430 my $self = shift;
431 $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ;
432 }
433
434 sub _fastForward {
435 my ($self) = @_;
436 return 0 if $self->{'REPORT_DONE'}; # empty report
437 while(defined( $_ = $self->_readline() ) ) {
438 if ($_ =~ /^>|^Parameters|^\s+Database:|^\s+Posted date:|^\s*Lambda/) {
439 $self->_pushback($_);
440 return 1;
441 }
442 }
443 $self->warn("Possible error (1) while parsing BLAST report!");
444 }
445
446 1;
447 __END__