0
|
1 # $Id: BPbl2seq.pm,v 1.21.2.2 2003/06/03 14:38:18 jason Exp $
|
|
2 #
|
|
3 # Bioperl module Bio::Tools::BPbl2seq
|
|
4 # based closely on the Bio::Tools::BPlite modules
|
|
5 # Ian Korf (ikorf@sapiens.wustl.edu, http://sapiens.wustl.edu/~ikorf),
|
|
6 # Lorenz Pollak (lorenz@ist.org, bioperl port)
|
|
7 #
|
|
8 #
|
|
9 # Copyright Peter Schattner
|
|
10 #
|
|
11 # You may distribute this module under the same terms as perl itself
|
|
12 # _history
|
|
13 # October 20, 2000
|
|
14 # May 29, 2001
|
|
15 # Fixed bug which prevented reading of more than one HSP / hit.
|
|
16 # This fix required changing calling syntax as described below. (PS)
|
|
17 # POD documentation - main docs before the code
|
|
18
|
|
19 =head1 NAME
|
|
20
|
|
21 Bio::Tools::BPbl2seq - Lightweight BLAST parser for pair-wise sequence
|
|
22 alignment using the BLAST algorithm.
|
|
23
|
|
24 =head1 SYNOPSIS
|
|
25
|
|
26 use Bio::Tools::BPbl2seq;
|
|
27 my $report = Bio::Tools::BPbl2seq->new(-file => 't/bl2seq.out');
|
|
28 $report->sbjctName;
|
|
29 $report->sbjctLength;
|
|
30 while(my $hsp = $report->next_feature) {
|
|
31 $hsp->score;
|
|
32 $hsp->bits;
|
|
33 $hsp->percent;
|
|
34 $hsp->P;
|
|
35 $hsp->match;
|
|
36 $hsp->positive;
|
|
37 $hsp->length;
|
|
38 $hsp->querySeq;
|
|
39 $hsp->sbjctSeq;
|
|
40 $hsp->homologySeq;
|
|
41 $hsp->query->start;
|
|
42 $hsp->query->end;
|
|
43 $hsp->sbjct->start;
|
|
44 $hsp->sbjct->end;
|
|
45 $hsp->sbjct->seq_id;
|
|
46 $hsp->sbjct->overlaps($exon);
|
|
47 }
|
|
48
|
|
49 =head1 DESCRIPTION
|
|
50
|
|
51 BPbl2seq is a package for parsing BLAST bl2seq reports. BLAST bl2seq is a
|
|
52 program for comparing and aligning two sequences using BLAST. Although
|
|
53 the report format is similar to that of a conventional BLAST, there are a
|
|
54 few differences so that BPlite is unable to read bl2seq reports directly.
|
|
55
|
|
56 From the user's perspective, one difference between bl2seq and
|
|
57 other blast reports is that the bl2seq report does not print out the
|
|
58 name of the first of the two aligned sequences. (The second sequence
|
|
59 name is given in the report as the name of the "hit"). Consequently,
|
|
60 BPbl2seq has no way of identifying the name of the initial sequence
|
|
61 unless it is passed to constructor as a second argument as in:
|
|
62
|
|
63 my $report = Bio::Tools::BPbl2seq->new(\*FH, "ALEU_HORVU");
|
|
64
|
|
65 If the name of the first sequence (the "query") is not passed to
|
|
66 BPbl2seq.pm in this manner, the name of the first sequence will be
|
|
67 left as "unknown". (Note that to preserve a common interface with the
|
|
68 other BLAST programs the two sequences being compared are referred to
|
|
69 in bl2seq as "query" and "subject" although this is perhaps a bit
|
|
70 misleading when simply comparing 2 sequences as opposed to querying a
|
|
71 database.)
|
|
72
|
|
73 In addition, since there will only be (at most) one "subject" (hit) in
|
|
74 a bl2seq report, one should use the method $report-E<gt>next_feature,
|
|
75 rather than $report-E<gt>nextSbjct-E<gt>nextHSP to obtain the next
|
|
76 high scoring pair.
|
|
77
|
|
78 One should note that the previous (0.7) version of BPbl2seq used
|
|
79 slightly different syntax. That version had a bug and consequently the
|
|
80 old syntax has been eliminated. Attempts to use the old syntax will
|
|
81 return error messages explaining the (minor) recoding required to use
|
|
82 the current syntax.
|
|
83
|
|
84 =head1 FEEDBACK
|
|
85
|
|
86 =head2 Mailing Lists
|
|
87
|
|
88 User feedback is an integral part of the evolution of this and other
|
|
89 Bioperl modules. Send your comments and suggestions preferably to one
|
|
90 of the Bioperl mailing lists. Your participation is much appreciated.
|
|
91
|
|
92 bioperl-l@bioperl.org - General discussion
|
|
93 http://bio.perl.org/MailList.html - About the mailing lists
|
|
94
|
|
95 =head2 Reporting Bugs
|
|
96
|
|
97 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
98 the bugs and their resolution. Bug reports can be submitted via
|
|
99 email or the web:
|
|
100
|
|
101 bioperl-bugs@bio.perl.org
|
|
102 http://bugzilla.bioperl.org/
|
|
103
|
|
104 =head1 AUTHOR - Peter Schattner
|
|
105
|
|
106 Email: schattner@alum.mit.edu
|
|
107
|
|
108 =head1 ACKNOWLEDGEMENTS
|
|
109
|
|
110 Based on work of:
|
|
111 Ian Korf (ikorf@sapiens.wustl.edu, http://sapiens.wustl.edu/~ikorf),
|
|
112 Lorenz Pollak (lorenz@ist.org, bioperl port)
|
|
113
|
|
114 =head1 CONTRIBUTORS
|
|
115
|
|
116 Jason Stajich, jason@cgt.mc.duke.edu
|
|
117
|
|
118 =cut
|
|
119
|
|
120 #'
|
|
121 package Bio::Tools::BPbl2seq;
|
|
122
|
|
123 use strict;
|
|
124 use vars qw(@ISA);
|
|
125 use Bio::Tools::BPlite;
|
|
126 use Bio::Root::Root;
|
|
127 use Bio::Root::IO;
|
|
128 use Bio::Tools::BPlite::Sbjct; # we want to use Sbjct
|
|
129 use Bio::SeqAnalysisParserI;
|
|
130 use Symbol;
|
|
131
|
|
132 @ISA = qw(Bio::Root::Root Bio::SeqAnalysisParserI Bio::Root::IO);
|
|
133
|
|
134 #@ISA = qw(Bio::Tools::BPlite);
|
|
135
|
|
136 =head2 new
|
|
137
|
|
138 Title : new
|
|
139 Function: Create a new Bio::Tools::BPbl2seq object
|
|
140 Returns : Bio::Tools::BPbl2seq
|
|
141 Args : -file input file (alternative to -fh)
|
|
142 -fh input stream (alternative to -file)
|
|
143 -queryname name of query sequence
|
|
144
|
|
145 =cut
|
|
146
|
|
147 sub new {
|
|
148 my ($class, @args) = @_;
|
|
149 my $self = $class->SUPER::new(@args);
|
|
150 # initialize IO
|
|
151 $self->_initialize_io(@args);
|
|
152
|
|
153 my ($queryname,$rt) = $self->_rearrange([qw(QUERYNAME
|
|
154 REPORT_TYPE)], @args);
|
|
155 $queryname = 'unknown' if( ! defined $queryname );
|
|
156 if( $rt && $rt =~ /BLAST/i ) {
|
|
157 $self->{'BLAST_TYPE'} = uc($rt);
|
|
158 } else {
|
|
159 $self->warn("Must provide which type of BLAST was run (blastp,blastn, tblastn, tblastx, blastx) if you want strand information to get set properly for DNA query or subjects");
|
|
160 }
|
|
161 my $sbjct = $self->getSbjct();
|
|
162 $self->{'_current_sbjct'} = $sbjct;
|
|
163
|
|
164 $self->{'_query'}->{'NAME'} = $queryname;
|
|
165 return $self;
|
|
166 }
|
|
167
|
|
168
|
|
169 =head2 getSbjct
|
|
170
|
|
171 Title :
|
|
172 Usage : $sbjct = $obj->getSbjct();
|
|
173 Function : Method of obtaining single "subject" of a bl2seq report
|
|
174 Example : my $sbjct = $obj->getSbjct ) {}
|
|
175 Returns : Sbjct object or null if finished
|
|
176 Args :
|
|
177
|
|
178 =cut
|
|
179
|
|
180 sub getSbjct {
|
|
181 my ($self) = @_;
|
|
182 # $self->_fastForward or return undef;
|
|
183
|
|
184 #######################
|
|
185 # get bl2seq "sbjct" name and length #
|
|
186 #######################
|
|
187 my $length;
|
|
188 my $def;
|
|
189 READLOOP: while(defined ($_ = $self->_readline) ) {
|
|
190 if ($_ =~ /^>(.+)$/) {
|
|
191 $def = $1;
|
|
192 next READLOOP;
|
|
193 }
|
|
194 elsif ($_ =~ /^\s*Length\s.+\D(\d+)/i) {
|
|
195 $length = $1;
|
|
196 next READLOOP;
|
|
197 }
|
|
198 elsif ($_ =~ /^\s{0,2}Score/) {
|
|
199 $self->_pushback($_);
|
|
200 last READLOOP;
|
|
201 }
|
|
202 }
|
|
203 return undef if ! defined $def;
|
|
204 $def =~ s/\s+/ /g;
|
|
205 $def =~ s/\s+$//g;
|
|
206
|
|
207
|
|
208 ####################
|
|
209 # the Sbjct object #
|
|
210 ####################
|
|
211 my $sbjct = new Bio::Tools::BPlite::Sbjct('-name'=>$def,
|
|
212 '-length'=>$length,
|
|
213 '-parent'=>$self);
|
|
214 return $sbjct;
|
|
215 }
|
|
216
|
|
217
|
|
218
|
|
219
|
|
220 =head2 next_feature
|
|
221
|
|
222 Title : next_feature
|
|
223 Usage : while( my $feat = $res->next_feature ) { # do something }
|
|
224 Function: calls next_feature function from BPlite.
|
|
225 Example :
|
|
226 Returns : A Bio::SeqFeatureI compliant object, in this case a
|
|
227 Bio::Tools::BPlite::HSP object, and FALSE if there are no more
|
|
228 HSPs.
|
|
229 Args : None
|
|
230
|
|
231 =cut
|
|
232
|
|
233 sub next_feature{
|
|
234 my ($self) = @_;
|
|
235 my ($sbjct, $hsp);
|
|
236 $sbjct = $self->{'_current_sbjct'};
|
|
237 unless( defined $sbjct ) {
|
|
238 $self->debug(" No hit object found for bl2seq report \n ") ;
|
|
239 return undef;
|
|
240 }
|
|
241 $hsp = $sbjct->nextHSP;
|
|
242 return $hsp || undef;
|
|
243 }
|
|
244
|
|
245 =head2 queryName
|
|
246
|
|
247 Title :
|
|
248 Usage : $name = $report->queryName();
|
|
249 Function : get /set the name of the query
|
|
250 Example :
|
|
251 Returns : name of the query
|
|
252 Args :
|
|
253
|
|
254 =cut
|
|
255
|
|
256 sub queryName {
|
|
257 my ($self, $queryname) = @_;
|
|
258 if( $queryname ) {
|
|
259 $self->{'_query'}->{'NAME'} = $queryname;
|
|
260 }
|
|
261 $self->{'_query'}->{'NAME'};
|
|
262 }
|
|
263
|
|
264 =head2 sbjctName
|
|
265
|
|
266 Title :
|
|
267 Usage : $name = $report->sbjctName();
|
|
268 Function : returns the name of the Sbjct
|
|
269 Example :
|
|
270 Returns : name of the Sbjct
|
|
271 Args :
|
|
272
|
|
273 =cut
|
|
274
|
|
275 sub sbjctName {
|
|
276 my $self = shift;
|
|
277 # unless( defined $self->{'_current_sbjct'} ) {
|
|
278 # my $sbjct = $self->{'_current_sbjct'} = $self->nextSbjct;
|
|
279 # return undef unless defined $sbjct;
|
|
280 # }
|
|
281 $self->{'_current_sbjct'}->{'NAME'} || '';
|
|
282 }
|
|
283
|
|
284 =head2 sbjctLength
|
|
285
|
|
286 Title : sbjctLength
|
|
287 Usage : $length = $report->sbjctLength();
|
|
288 Function : returns the length of the Sbjct
|
|
289 Example :
|
|
290 Returns : name of the Sbjct
|
|
291 Args :
|
|
292
|
|
293 =cut
|
|
294
|
|
295 sub sbjctLength {
|
|
296 my $self = shift;
|
|
297 # unless( defined $self->{'_current_sbjct'} ) {
|
|
298 # my $sbjct = $self->{'_current_sbjct'} = $self->nextSbjct;
|
|
299 # return undef unless defined $sbjct;
|
|
300 # }
|
|
301 $self->{'_current_sbjct'}->{'LENGTH'};
|
|
302 }
|
|
303
|
|
304 =head2 P
|
|
305
|
|
306 Title : P
|
|
307 Usage :
|
|
308 Function : Syntax no longer supported, error message only
|
|
309
|
|
310 =cut
|
|
311
|
|
312 sub P {
|
|
313 my $self = shift;
|
|
314 $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ");
|
|
315 }
|
|
316
|
|
317 =head2 percent
|
|
318
|
|
319 Title : percent
|
|
320 Usage : $hsp->percent();
|
|
321 Function : Syntax no longer supported, error message only
|
|
322
|
|
323 =cut
|
|
324
|
|
325 sub percent {
|
|
326 my $self = shift;
|
|
327 $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ");
|
|
328 }
|
|
329
|
|
330 =head2 match
|
|
331
|
|
332 Title : match
|
|
333 Usage : $hsp->match();
|
|
334 Function : Syntax no longer supported, error message only
|
|
335
|
|
336 =cut
|
|
337
|
|
338 sub match {
|
|
339 my $self = shift;
|
|
340 $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ");
|
|
341 }
|
|
342
|
|
343 =head2 positive
|
|
344
|
|
345 Title : positive
|
|
346 Usage : $hsp->positive();
|
|
347 Function : Syntax no longer supported, error message only
|
|
348
|
|
349 =cut
|
|
350
|
|
351 sub positive {
|
|
352 my $self = shift;
|
|
353 $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ;
|
|
354 }
|
|
355
|
|
356 =head2 querySeq
|
|
357
|
|
358 Title : querySeq
|
|
359 Usage : $hsp->querySeq();
|
|
360 Function : Syntax no longer supported, error message only
|
|
361
|
|
362 =cut
|
|
363
|
|
364 sub querySeq {
|
|
365 my $self = shift;
|
|
366 $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ;
|
|
367 }
|
|
368
|
|
369 =head2 sbjctSeq
|
|
370
|
|
371 Title : sbjctSeq
|
|
372 Usage : $hsp->sbjctSeq();
|
|
373 Function : Syntax no longer supported, error message only
|
|
374
|
|
375 =cut
|
|
376
|
|
377 sub sbjctSeq {
|
|
378 my $self = shift;
|
|
379 $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ;
|
|
380 }
|
|
381
|
|
382 =head2 homologySeq
|
|
383
|
|
384 Title : homologySeq
|
|
385 Usage : $hsp->homologySeq();
|
|
386 Function : Syntax no longer supported, error message only
|
|
387
|
|
388 =cut
|
|
389
|
|
390 sub homologySeq {
|
|
391 my $self = shift;
|
|
392 $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ;
|
|
393 }
|
|
394
|
|
395 =head2 qs
|
|
396
|
|
397 Title : qs
|
|
398 Usage : $hsp->qs();
|
|
399 Function : Syntax no longer supported, error message only
|
|
400
|
|
401 =cut
|
|
402
|
|
403 sub qs {
|
|
404 my $self = shift;
|
|
405 $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ;
|
|
406 }
|
|
407
|
|
408 =head2 ss
|
|
409
|
|
410 Title : ss
|
|
411 Usage : $hsp->ss();
|
|
412 Function : Syntax no longer supported, error message only
|
|
413
|
|
414 =cut
|
|
415
|
|
416 sub ss {
|
|
417 my $self = shift;
|
|
418 $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ;
|
|
419 }
|
|
420
|
|
421 =head2 hs
|
|
422
|
|
423 Title : hs
|
|
424 Usage : $hsp->hs();
|
|
425 Function : Syntax no longer supported, error message only
|
|
426
|
|
427 =cut
|
|
428
|
|
429 sub hs {
|
|
430 my $self = shift;
|
|
431 $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ;
|
|
432 }
|
|
433
|
|
434 sub _fastForward {
|
|
435 my ($self) = @_;
|
|
436 return 0 if $self->{'REPORT_DONE'}; # empty report
|
|
437 while(defined( $_ = $self->_readline() ) ) {
|
|
438 if ($_ =~ /^>|^Parameters|^\s+Database:|^\s+Posted date:|^\s*Lambda/) {
|
|
439 $self->_pushback($_);
|
|
440 return 1;
|
|
441 }
|
|
442 }
|
|
443 $self->warn("Possible error (1) while parsing BLAST report!");
|
|
444 }
|
|
445
|
|
446 1;
|
|
447 __END__
|