Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Tools/BPbl2seq.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $Id: BPbl2seq.pm,v 1.21.2.2 2003/06/03 14:38:18 jason Exp $ | |
2 # | |
3 # Bioperl module Bio::Tools::BPbl2seq | |
4 # based closely on the Bio::Tools::BPlite modules | |
5 # Ian Korf (ikorf@sapiens.wustl.edu, http://sapiens.wustl.edu/~ikorf), | |
6 # Lorenz Pollak (lorenz@ist.org, bioperl port) | |
7 # | |
8 # | |
9 # Copyright Peter Schattner | |
10 # | |
11 # You may distribute this module under the same terms as perl itself | |
12 # _history | |
13 # October 20, 2000 | |
14 # May 29, 2001 | |
15 # Fixed bug which prevented reading of more than one HSP / hit. | |
16 # This fix required changing calling syntax as described below. (PS) | |
17 # POD documentation - main docs before the code | |
18 | |
19 =head1 NAME | |
20 | |
21 Bio::Tools::BPbl2seq - Lightweight BLAST parser for pair-wise sequence | |
22 alignment using the BLAST algorithm. | |
23 | |
24 =head1 SYNOPSIS | |
25 | |
26 use Bio::Tools::BPbl2seq; | |
27 my $report = Bio::Tools::BPbl2seq->new(-file => 't/bl2seq.out'); | |
28 $report->sbjctName; | |
29 $report->sbjctLength; | |
30 while(my $hsp = $report->next_feature) { | |
31 $hsp->score; | |
32 $hsp->bits; | |
33 $hsp->percent; | |
34 $hsp->P; | |
35 $hsp->match; | |
36 $hsp->positive; | |
37 $hsp->length; | |
38 $hsp->querySeq; | |
39 $hsp->sbjctSeq; | |
40 $hsp->homologySeq; | |
41 $hsp->query->start; | |
42 $hsp->query->end; | |
43 $hsp->sbjct->start; | |
44 $hsp->sbjct->end; | |
45 $hsp->sbjct->seq_id; | |
46 $hsp->sbjct->overlaps($exon); | |
47 } | |
48 | |
49 =head1 DESCRIPTION | |
50 | |
51 BPbl2seq is a package for parsing BLAST bl2seq reports. BLAST bl2seq is a | |
52 program for comparing and aligning two sequences using BLAST. Although | |
53 the report format is similar to that of a conventional BLAST, there are a | |
54 few differences so that BPlite is unable to read bl2seq reports directly. | |
55 | |
56 From the user's perspective, one difference between bl2seq and | |
57 other blast reports is that the bl2seq report does not print out the | |
58 name of the first of the two aligned sequences. (The second sequence | |
59 name is given in the report as the name of the "hit"). Consequently, | |
60 BPbl2seq has no way of identifying the name of the initial sequence | |
61 unless it is passed to constructor as a second argument as in: | |
62 | |
63 my $report = Bio::Tools::BPbl2seq->new(\*FH, "ALEU_HORVU"); | |
64 | |
65 If the name of the first sequence (the "query") is not passed to | |
66 BPbl2seq.pm in this manner, the name of the first sequence will be | |
67 left as "unknown". (Note that to preserve a common interface with the | |
68 other BLAST programs the two sequences being compared are referred to | |
69 in bl2seq as "query" and "subject" although this is perhaps a bit | |
70 misleading when simply comparing 2 sequences as opposed to querying a | |
71 database.) | |
72 | |
73 In addition, since there will only be (at most) one "subject" (hit) in | |
74 a bl2seq report, one should use the method $report-E<gt>next_feature, | |
75 rather than $report-E<gt>nextSbjct-E<gt>nextHSP to obtain the next | |
76 high scoring pair. | |
77 | |
78 One should note that the previous (0.7) version of BPbl2seq used | |
79 slightly different syntax. That version had a bug and consequently the | |
80 old syntax has been eliminated. Attempts to use the old syntax will | |
81 return error messages explaining the (minor) recoding required to use | |
82 the current syntax. | |
83 | |
84 =head1 FEEDBACK | |
85 | |
86 =head2 Mailing Lists | |
87 | |
88 User feedback is an integral part of the evolution of this and other | |
89 Bioperl modules. Send your comments and suggestions preferably to one | |
90 of the Bioperl mailing lists. Your participation is much appreciated. | |
91 | |
92 bioperl-l@bioperl.org - General discussion | |
93 http://bio.perl.org/MailList.html - About the mailing lists | |
94 | |
95 =head2 Reporting Bugs | |
96 | |
97 Report bugs to the Bioperl bug tracking system to help us keep track | |
98 the bugs and their resolution. Bug reports can be submitted via | |
99 email or the web: | |
100 | |
101 bioperl-bugs@bio.perl.org | |
102 http://bugzilla.bioperl.org/ | |
103 | |
104 =head1 AUTHOR - Peter Schattner | |
105 | |
106 Email: schattner@alum.mit.edu | |
107 | |
108 =head1 ACKNOWLEDGEMENTS | |
109 | |
110 Based on work of: | |
111 Ian Korf (ikorf@sapiens.wustl.edu, http://sapiens.wustl.edu/~ikorf), | |
112 Lorenz Pollak (lorenz@ist.org, bioperl port) | |
113 | |
114 =head1 CONTRIBUTORS | |
115 | |
116 Jason Stajich, jason@cgt.mc.duke.edu | |
117 | |
118 =cut | |
119 | |
120 #' | |
121 package Bio::Tools::BPbl2seq; | |
122 | |
123 use strict; | |
124 use vars qw(@ISA); | |
125 use Bio::Tools::BPlite; | |
126 use Bio::Root::Root; | |
127 use Bio::Root::IO; | |
128 use Bio::Tools::BPlite::Sbjct; # we want to use Sbjct | |
129 use Bio::SeqAnalysisParserI; | |
130 use Symbol; | |
131 | |
132 @ISA = qw(Bio::Root::Root Bio::SeqAnalysisParserI Bio::Root::IO); | |
133 | |
134 #@ISA = qw(Bio::Tools::BPlite); | |
135 | |
136 =head2 new | |
137 | |
138 Title : new | |
139 Function: Create a new Bio::Tools::BPbl2seq object | |
140 Returns : Bio::Tools::BPbl2seq | |
141 Args : -file input file (alternative to -fh) | |
142 -fh input stream (alternative to -file) | |
143 -queryname name of query sequence | |
144 | |
145 =cut | |
146 | |
147 sub new { | |
148 my ($class, @args) = @_; | |
149 my $self = $class->SUPER::new(@args); | |
150 # initialize IO | |
151 $self->_initialize_io(@args); | |
152 | |
153 my ($queryname,$rt) = $self->_rearrange([qw(QUERYNAME | |
154 REPORT_TYPE)], @args); | |
155 $queryname = 'unknown' if( ! defined $queryname ); | |
156 if( $rt && $rt =~ /BLAST/i ) { | |
157 $self->{'BLAST_TYPE'} = uc($rt); | |
158 } else { | |
159 $self->warn("Must provide which type of BLAST was run (blastp,blastn, tblastn, tblastx, blastx) if you want strand information to get set properly for DNA query or subjects"); | |
160 } | |
161 my $sbjct = $self->getSbjct(); | |
162 $self->{'_current_sbjct'} = $sbjct; | |
163 | |
164 $self->{'_query'}->{'NAME'} = $queryname; | |
165 return $self; | |
166 } | |
167 | |
168 | |
169 =head2 getSbjct | |
170 | |
171 Title : | |
172 Usage : $sbjct = $obj->getSbjct(); | |
173 Function : Method of obtaining single "subject" of a bl2seq report | |
174 Example : my $sbjct = $obj->getSbjct ) {} | |
175 Returns : Sbjct object or null if finished | |
176 Args : | |
177 | |
178 =cut | |
179 | |
180 sub getSbjct { | |
181 my ($self) = @_; | |
182 # $self->_fastForward or return undef; | |
183 | |
184 ####################### | |
185 # get bl2seq "sbjct" name and length # | |
186 ####################### | |
187 my $length; | |
188 my $def; | |
189 READLOOP: while(defined ($_ = $self->_readline) ) { | |
190 if ($_ =~ /^>(.+)$/) { | |
191 $def = $1; | |
192 next READLOOP; | |
193 } | |
194 elsif ($_ =~ /^\s*Length\s.+\D(\d+)/i) { | |
195 $length = $1; | |
196 next READLOOP; | |
197 } | |
198 elsif ($_ =~ /^\s{0,2}Score/) { | |
199 $self->_pushback($_); | |
200 last READLOOP; | |
201 } | |
202 } | |
203 return undef if ! defined $def; | |
204 $def =~ s/\s+/ /g; | |
205 $def =~ s/\s+$//g; | |
206 | |
207 | |
208 #################### | |
209 # the Sbjct object # | |
210 #################### | |
211 my $sbjct = new Bio::Tools::BPlite::Sbjct('-name'=>$def, | |
212 '-length'=>$length, | |
213 '-parent'=>$self); | |
214 return $sbjct; | |
215 } | |
216 | |
217 | |
218 | |
219 | |
220 =head2 next_feature | |
221 | |
222 Title : next_feature | |
223 Usage : while( my $feat = $res->next_feature ) { # do something } | |
224 Function: calls next_feature function from BPlite. | |
225 Example : | |
226 Returns : A Bio::SeqFeatureI compliant object, in this case a | |
227 Bio::Tools::BPlite::HSP object, and FALSE if there are no more | |
228 HSPs. | |
229 Args : None | |
230 | |
231 =cut | |
232 | |
233 sub next_feature{ | |
234 my ($self) = @_; | |
235 my ($sbjct, $hsp); | |
236 $sbjct = $self->{'_current_sbjct'}; | |
237 unless( defined $sbjct ) { | |
238 $self->debug(" No hit object found for bl2seq report \n ") ; | |
239 return undef; | |
240 } | |
241 $hsp = $sbjct->nextHSP; | |
242 return $hsp || undef; | |
243 } | |
244 | |
245 =head2 queryName | |
246 | |
247 Title : | |
248 Usage : $name = $report->queryName(); | |
249 Function : get /set the name of the query | |
250 Example : | |
251 Returns : name of the query | |
252 Args : | |
253 | |
254 =cut | |
255 | |
256 sub queryName { | |
257 my ($self, $queryname) = @_; | |
258 if( $queryname ) { | |
259 $self->{'_query'}->{'NAME'} = $queryname; | |
260 } | |
261 $self->{'_query'}->{'NAME'}; | |
262 } | |
263 | |
264 =head2 sbjctName | |
265 | |
266 Title : | |
267 Usage : $name = $report->sbjctName(); | |
268 Function : returns the name of the Sbjct | |
269 Example : | |
270 Returns : name of the Sbjct | |
271 Args : | |
272 | |
273 =cut | |
274 | |
275 sub sbjctName { | |
276 my $self = shift; | |
277 # unless( defined $self->{'_current_sbjct'} ) { | |
278 # my $sbjct = $self->{'_current_sbjct'} = $self->nextSbjct; | |
279 # return undef unless defined $sbjct; | |
280 # } | |
281 $self->{'_current_sbjct'}->{'NAME'} || ''; | |
282 } | |
283 | |
284 =head2 sbjctLength | |
285 | |
286 Title : sbjctLength | |
287 Usage : $length = $report->sbjctLength(); | |
288 Function : returns the length of the Sbjct | |
289 Example : | |
290 Returns : name of the Sbjct | |
291 Args : | |
292 | |
293 =cut | |
294 | |
295 sub sbjctLength { | |
296 my $self = shift; | |
297 # unless( defined $self->{'_current_sbjct'} ) { | |
298 # my $sbjct = $self->{'_current_sbjct'} = $self->nextSbjct; | |
299 # return undef unless defined $sbjct; | |
300 # } | |
301 $self->{'_current_sbjct'}->{'LENGTH'}; | |
302 } | |
303 | |
304 =head2 P | |
305 | |
306 Title : P | |
307 Usage : | |
308 Function : Syntax no longer supported, error message only | |
309 | |
310 =cut | |
311 | |
312 sub P { | |
313 my $self = shift; | |
314 $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n "); | |
315 } | |
316 | |
317 =head2 percent | |
318 | |
319 Title : percent | |
320 Usage : $hsp->percent(); | |
321 Function : Syntax no longer supported, error message only | |
322 | |
323 =cut | |
324 | |
325 sub percent { | |
326 my $self = shift; | |
327 $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n "); | |
328 } | |
329 | |
330 =head2 match | |
331 | |
332 Title : match | |
333 Usage : $hsp->match(); | |
334 Function : Syntax no longer supported, error message only | |
335 | |
336 =cut | |
337 | |
338 sub match { | |
339 my $self = shift; | |
340 $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n "); | |
341 } | |
342 | |
343 =head2 positive | |
344 | |
345 Title : positive | |
346 Usage : $hsp->positive(); | |
347 Function : Syntax no longer supported, error message only | |
348 | |
349 =cut | |
350 | |
351 sub positive { | |
352 my $self = shift; | |
353 $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ; | |
354 } | |
355 | |
356 =head2 querySeq | |
357 | |
358 Title : querySeq | |
359 Usage : $hsp->querySeq(); | |
360 Function : Syntax no longer supported, error message only | |
361 | |
362 =cut | |
363 | |
364 sub querySeq { | |
365 my $self = shift; | |
366 $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ; | |
367 } | |
368 | |
369 =head2 sbjctSeq | |
370 | |
371 Title : sbjctSeq | |
372 Usage : $hsp->sbjctSeq(); | |
373 Function : Syntax no longer supported, error message only | |
374 | |
375 =cut | |
376 | |
377 sub sbjctSeq { | |
378 my $self = shift; | |
379 $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ; | |
380 } | |
381 | |
382 =head2 homologySeq | |
383 | |
384 Title : homologySeq | |
385 Usage : $hsp->homologySeq(); | |
386 Function : Syntax no longer supported, error message only | |
387 | |
388 =cut | |
389 | |
390 sub homologySeq { | |
391 my $self = shift; | |
392 $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ; | |
393 } | |
394 | |
395 =head2 qs | |
396 | |
397 Title : qs | |
398 Usage : $hsp->qs(); | |
399 Function : Syntax no longer supported, error message only | |
400 | |
401 =cut | |
402 | |
403 sub qs { | |
404 my $self = shift; | |
405 $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ; | |
406 } | |
407 | |
408 =head2 ss | |
409 | |
410 Title : ss | |
411 Usage : $hsp->ss(); | |
412 Function : Syntax no longer supported, error message only | |
413 | |
414 =cut | |
415 | |
416 sub ss { | |
417 my $self = shift; | |
418 $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ; | |
419 } | |
420 | |
421 =head2 hs | |
422 | |
423 Title : hs | |
424 Usage : $hsp->hs(); | |
425 Function : Syntax no longer supported, error message only | |
426 | |
427 =cut | |
428 | |
429 sub hs { | |
430 my $self = shift; | |
431 $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ; | |
432 } | |
433 | |
434 sub _fastForward { | |
435 my ($self) = @_; | |
436 return 0 if $self->{'REPORT_DONE'}; # empty report | |
437 while(defined( $_ = $self->_readline() ) ) { | |
438 if ($_ =~ /^>|^Parameters|^\s+Database:|^\s+Posted date:|^\s*Lambda/) { | |
439 $self->_pushback($_); | |
440 return 1; | |
441 } | |
442 } | |
443 $self->warn("Possible error (1) while parsing BLAST report!"); | |
444 } | |
445 | |
446 1; | |
447 __END__ |