Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Tools/BPlite/HSP.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 ############################################################################### | |
2 # Bio::Tools::BPlite::HSP | |
3 ############################################################################### | |
4 # HSP = High Scoring Pair (to all non-experts as I am) | |
5 # | |
6 # The original BPlite.pm module has been written by Ian Korf ! | |
7 # see http://sapiens.wustl.edu/~ikorf | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 | |
11 | |
12 # | |
13 # BioPerl module for Bio::Tools::BPlite::HSP | |
14 # | |
15 # Cared for by Peter Schattner <schattner@alum.mit.edu> | |
16 # | |
17 # Copyright Peter Schattner | |
18 # | |
19 # You may distribute this module under the same terms as perl itself | |
20 | |
21 # POD documentation - main docs before the code | |
22 | |
23 =head1 NAME | |
24 | |
25 Bio::Tools::BPlite::HSP - Blast report High Scoring Pair (HSP) | |
26 | |
27 =head1 SYNOPSIS | |
28 | |
29 use Bio::Tools::BPlite; | |
30 my $report = new Bio::Tools::BPlite(-fh=>\*STDIN); | |
31 { | |
32 while(my $sbjct = $report->nextSbjct) { | |
33 while (my $hsp = $sbjct->nextHSP) { | |
34 $hsp->score; | |
35 $hsp->bits; | |
36 $hsp->percent; | |
37 $hsp->P; | |
38 $hsp->match; | |
39 $hsp->positive; | |
40 $hsp->length; | |
41 $hsp->querySeq; | |
42 $hsp->sbjctSeq; | |
43 $hsp->homologySeq; | |
44 $hsp->query->start; | |
45 $hsp->query->end; | |
46 $hsp->hit->start; | |
47 $hsp->hit->end; | |
48 $hsp->hit->seq_id; | |
49 $hsp->hit->overlaps($exon); | |
50 } | |
51 } | |
52 | |
53 # the following line takes you to the next report in the stream/file | |
54 # it will return 0 if that report is empty, | |
55 # but that is valid for an empty blast report. | |
56 # Returns -1 for EOF. | |
57 | |
58 last if ($report->_parseHeader == -1)); | |
59 | |
60 redo | |
61 } | |
62 | |
63 =head1 DESCRIPTION | |
64 | |
65 This object handles the High Scoring Pair data for a Blast report. | |
66 This is where the percent identity, query and hit sequence length, | |
67 P value, etc are stored and where most of the necessary information is located when building logic around parsing a Blast report. | |
68 | |
69 See L<Bio::Tools::BPlite> for more detailed information on the entire | |
70 BPlite Blast parsing system. | |
71 | |
72 =head1 FEEDBACK | |
73 | |
74 =head2 Mailing Lists | |
75 | |
76 User feedback is an integral part of the evolution of this and other | |
77 Bioperl modules. Send your comments and suggestions preferably to | |
78 the Bioperl mailing list. Your participation is much appreciated. | |
79 | |
80 bioperl-l@bioperl.org - General discussion | |
81 http://bioperl.org/MailList.shtml - About the mailing lists | |
82 | |
83 =head2 Reporting Bugs | |
84 | |
85 Report bugs to the Bioperl bug tracking system to help us keep track | |
86 of the bugs and their resolution. Bug reports can be submitted via | |
87 email or the web: | |
88 | |
89 bioperl-bugs@bioperl.org | |
90 http://bugzilla.bioperl.org/ | |
91 | |
92 =head1 AUTHOR - Peter Schattner | |
93 | |
94 Email: schattner@alum.mit.edu | |
95 | |
96 =head1 CONTRIBUTORS | |
97 | |
98 Jason Stajich, jason@bioperl.org | |
99 | |
100 =head1 APPENDIX | |
101 | |
102 The rest of the documentation details each of the object methods. | |
103 Internal methods are usually preceded with a _ | |
104 | |
105 =cut | |
106 | |
107 # Let the code begin... | |
108 | |
109 package Bio::Tools::BPlite::HSP; | |
110 | |
111 use vars qw(@ISA); | |
112 use strict; | |
113 | |
114 # to disable overloading comment this out: | |
115 #use overload '""' => '_overload'; | |
116 | |
117 # Object preamble - inheriets from Bio::SeqFeature::SimilarityPair | |
118 | |
119 use Bio::SeqFeature::SimilarityPair; | |
120 use Bio::SeqFeature::Similarity; | |
121 | |
122 @ISA = qw(Bio::SeqFeature::SimilarityPair); | |
123 | |
124 sub new { | |
125 my ($class, @args) = @_; | |
126 | |
127 # workaround to make sure frame is not set before strand is | |
128 # interpreted from query/hit info | |
129 # this workaround removes the key from the hash | |
130 # so the superclass does not try and work with it | |
131 # we'll take care of setting it in this module later on | |
132 | |
133 my %newargs = @args; | |
134 foreach ( keys %newargs ) { | |
135 if( /frame$/i ) { | |
136 delete $newargs{$_}; | |
137 } | |
138 } | |
139 # done with workaround | |
140 | |
141 my $self = $class->SUPER::new(%newargs); | |
142 | |
143 my ($score,$bits,$match,$hsplength,$positive,$gaps,$p,$exp,$qb,$qe,$sb, | |
144 $se,$qs,$ss,$hs,$qname,$sname,$qlength,$slength,$qframe,$sframe, | |
145 $blasttype) = | |
146 $self->_rearrange([qw(SCORE | |
147 BITS | |
148 MATCH | |
149 HSPLENGTH | |
150 POSITIVE | |
151 GAPS | |
152 P | |
153 EXP | |
154 QUERYBEGIN | |
155 QUERYEND | |
156 SBJCTBEGIN | |
157 SBJCTEND | |
158 QUERYSEQ | |
159 SBJCTSEQ | |
160 HOMOLOGYSEQ | |
161 QUERYNAME | |
162 SBJCTNAME | |
163 QUERYLENGTH | |
164 SBJCTLENGTH | |
165 QUERYFRAME | |
166 SBJCTFRAME | |
167 BLASTTYPE | |
168 )],@args); | |
169 | |
170 $blasttype = 'UNKNOWN' unless $blasttype; | |
171 $self->report_type($blasttype); | |
172 # Determine strand meanings | |
173 my ($queryfactor, $sbjctfactor) = (1,0); # default | |
174 if ($blasttype eq 'BLASTP' || $blasttype eq 'TBLASTN' ) { | |
175 $queryfactor = 0; | |
176 } | |
177 if ($blasttype eq 'TBLASTN' || $blasttype eq 'TBLASTX' || | |
178 $blasttype eq 'BLASTN' ) { | |
179 $sbjctfactor = 1; | |
180 } | |
181 | |
182 # Set BLAST type | |
183 $self->{'BLAST_TYPE'} = $blasttype; | |
184 | |
185 # Store the aligned query as sequence feature | |
186 my $strand; | |
187 if ($qe > $qb) { # normal query: start < end | |
188 if ($queryfactor) { $strand = 1; } else { $strand = undef; } | |
189 $self->query( Bio::SeqFeature::Similarity->new | |
190 (-start=>$qb, -end=>$qe, -strand=>$strand, | |
191 -source=>"BLAST" ) ) } | |
192 else { # reverse query (i dont know if this is possible, but feel free to correct) | |
193 if ($queryfactor) { $strand = -1; } else { $strand = undef; } | |
194 $self->query( Bio::SeqFeature::Similarity->new | |
195 (-start=>$qe, -end=>$qb, -strand=>$strand, | |
196 -source=>"BLAST" ) ) } | |
197 | |
198 # store the aligned hit as sequence feature | |
199 if ($se > $sb) { # normal hit | |
200 if ($sbjctfactor) { $strand = 1; } else { $strand = undef; } | |
201 $self->hit( Bio::SeqFeature::Similarity->new | |
202 (-start=>$sb, -end=>$se, -strand=>$strand, | |
203 -source=>"BLAST" ) ) } | |
204 else { # reverse hit: start bigger than end | |
205 if ($sbjctfactor) { $strand = -1; } else { $strand = undef; } | |
206 $self->hit( Bio::SeqFeature::Similarity->new | |
207 (-start=>$se, -end=>$sb, -strand=>$strand, | |
208 -source=>"BLAST" ) ) } | |
209 | |
210 # name the sequences | |
211 $self->query->seq_id($qname); # query name | |
212 $self->hit->seq_id($sname); # hit name | |
213 | |
214 # set lengths | |
215 $self->query->seqlength($qlength); # query length | |
216 $self->hit->seqlength($slength); # hit length | |
217 | |
218 # set object vars | |
219 $self->score($score); | |
220 $self->bits($bits); | |
221 | |
222 $self->significance($p); | |
223 $self->{'EXP'} = $exp; | |
224 | |
225 $self->query->frac_identical($match); | |
226 $self->hit->frac_identical($match); | |
227 $self->{'HSPLENGTH'} = $hsplength; | |
228 $self->{'PERCENT'} = int((1000 * $match)/$hsplength)/10; | |
229 $self->{'POSITIVE'} = $positive; | |
230 $self->{'GAPS'} = $gaps; | |
231 $self->{'QS'} = $qs; | |
232 $self->{'SS'} = $ss; | |
233 $self->{'HS'} = $hs; | |
234 | |
235 $self->frame($qframe, $sframe); | |
236 return $self; # success - we hope! | |
237 } | |
238 | |
239 # to disable overloading comment this out: | |
240 sub _overload { | |
241 my $self = shift; | |
242 return $self->start."..".$self->end." ".$self->bits; | |
243 } | |
244 | |
245 =head2 report_type | |
246 | |
247 Title : report_type | |
248 Usage : $type = $sbjct->report_type() | |
249 Function : Returns the type of report from which this hit was obtained. | |
250 This usually pertains only to BLAST and friends reports, for which | |
251 the report type denotes what type of sequence was aligned against | |
252 what (BLASTN: dna-dna, BLASTP prt-prt, BLASTX translated dna-prt, | |
253 TBLASTN prt-translated dna, TBLASTX translated dna-translated dna). | |
254 Example : | |
255 Returns : A string (BLASTN, BLASTP, BLASTX, TBLASTN, TBLASTX, UNKNOWN) | |
256 Args : a string on set (you should know what you are doing) | |
257 | |
258 =cut | |
259 | |
260 sub report_type { | |
261 my ($self, $rpt) = @_; | |
262 if($rpt) { | |
263 $self->{'_report_type'} = $rpt; | |
264 } | |
265 return $self->{'_report_type'}; | |
266 } | |
267 | |
268 =head2 EXP | |
269 | |
270 Title : EXP | |
271 Usage : my $exp = $hsp->EXP; | |
272 Function: returns the EXP value for the HSP | |
273 Returns : string value | |
274 Args : none | |
275 Note : Patch provided by Sami Ashour for BTK parsing | |
276 | |
277 | |
278 =cut | |
279 | |
280 sub EXP{ | |
281 return $_[0]->{'EXP'}; | |
282 } | |
283 | |
284 | |
285 =head2 P | |
286 | |
287 Title : P | |
288 Usage : $hsp->P(); | |
289 Function : returns the P (significance) value for a HSP | |
290 Returns : (double) significance value | |
291 Args : | |
292 | |
293 =cut | |
294 | |
295 sub P { | |
296 my ($self, @args) = @_; | |
297 my $float = $self->significance(@args); | |
298 my $match = '([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?'; # Perl Cookbook 2.1 | |
299 if ($float =~ /^$match$/) { | |
300 # Is a C float | |
301 return $float; | |
302 } elsif ("1$float" =~ /^$match$/) { | |
303 # Almost C float, Jitterbug 974 | |
304 return "1$float"; | |
305 } else { | |
306 $self->warn("[HSP::P()] '$float' is not a known number format. Returning zero (0) instead."); | |
307 return 0; | |
308 } | |
309 } | |
310 | |
311 =head2 percent | |
312 | |
313 Title : percent | |
314 Usage : $hsp->percent(); | |
315 Function : returns the percent matching | |
316 Returns : (double) percent matching | |
317 Args : none | |
318 | |
319 =cut | |
320 | |
321 sub percent {shift->{'PERCENT'}} | |
322 | |
323 | |
324 =head2 match | |
325 | |
326 Title : match | |
327 Usage : $hsp->match(); | |
328 Function : returns the match | |
329 Example : | |
330 Returns : (double) frac_identical | |
331 Args : | |
332 | |
333 =cut | |
334 | |
335 sub match {shift->query->frac_identical(@_)} | |
336 | |
337 =head2 hsplength | |
338 | |
339 Title : hsplength | |
340 Usage : $hsp->hsplength(); | |
341 Function : returns the HSP length (including gaps) | |
342 Returns : (integer) HSP length | |
343 Args : none | |
344 | |
345 =cut | |
346 | |
347 sub hsplength {shift->{'HSPLENGTH'}} | |
348 | |
349 =head2 positive | |
350 | |
351 Title : positive | |
352 Usage : $hsp->positive(); | |
353 Function : returns the number of positive matches (symbols in the alignment | |
354 with a positive score) | |
355 Returns : (int) number of positive matches in the alignment | |
356 Args : none | |
357 | |
358 =cut | |
359 | |
360 sub positive {shift->{'POSITIVE'}} | |
361 | |
362 =head2 gaps | |
363 | |
364 Title : gaps | |
365 Usage : $hsp->gaps(); | |
366 Function : returns the number of gaps or 0 if none | |
367 Returns : (int) number of gaps or 0 if none | |
368 Args : none | |
369 | |
370 =cut | |
371 | |
372 sub gaps {shift->{'GAPS'}} | |
373 | |
374 =head2 querySeq | |
375 | |
376 Title : querySeq | |
377 Usage : $hsp->querySeq(); | |
378 Function : returns the query sequence | |
379 Returns : (string) the Query Sequence | |
380 Args : none | |
381 | |
382 =cut | |
383 | |
384 sub querySeq {shift->{'QS'}} | |
385 | |
386 =head2 sbjctSeq | |
387 | |
388 Title : sbjctSeq | |
389 Usage : $hsp->sbjctSeq(); | |
390 Function : returns the Sbjct sequence | |
391 Returns : (string) the Sbjct Sequence | |
392 Args : none | |
393 | |
394 =cut | |
395 | |
396 sub sbjctSeq {shift->{'SS'}} | |
397 | |
398 =head2 homologySeq | |
399 | |
400 Title : homologySeq | |
401 Usage : $hsp->homologySeq(); | |
402 Function : returns the homologous sequence | |
403 Returns : (string) homologous sequence | |
404 Args : none | |
405 | |
406 =cut | |
407 | |
408 sub homologySeq {shift->{'HS'}} | |
409 | |
410 =head2 qs | |
411 | |
412 Title : qs | |
413 Usage : $hsp->qs(); | |
414 Function : returns the Query Sequence (same as querySeq) | |
415 Returns : (string) query Sequence | |
416 Args : none | |
417 | |
418 =cut | |
419 | |
420 sub qs {shift->{'QS'}} | |
421 | |
422 =head2 ss | |
423 | |
424 Title : ss | |
425 Usage : $hsp->ss(); | |
426 Function : returns the subject sequence ( same as sbjctSeq) | |
427 Returns : (string) Sbjct Sequence | |
428 Args : none | |
429 | |
430 =cut | |
431 | |
432 sub ss {shift->{'SS'}} | |
433 | |
434 =head2 hs | |
435 | |
436 Title : hs | |
437 Usage : $hsp->hs(); | |
438 Function : returns the Homologous Sequence (same as homologySeq ) | |
439 Returns : (string) Homologous Sequence | |
440 Args : none | |
441 | |
442 =cut | |
443 | |
444 sub hs {shift->{'HS'}} | |
445 | |
446 sub frame { | |
447 my ($self, $qframe, $sframe) = @_; | |
448 if( defined $qframe ) { | |
449 if( $qframe == 0 ) { | |
450 $qframe = undef; | |
451 } elsif( $qframe !~ /^([+-])?([1-3])/ ) { | |
452 $self->warn("Specifying an invalid query frame ($qframe)"); | |
453 $qframe = undef; | |
454 } else { | |
455 if( ($1 eq '-' && $self->query->strand >= 0) || | |
456 ($1 eq '+' && $self->query->strand <= 0) ) { | |
457 $self->warn("Query frame ($qframe) did not match strand of query (". $self->query->strand() . ")"); | |
458 } | |
459 # Set frame to GFF [0-2] | |
460 $qframe = $2 - 1; | |
461 } | |
462 $self->{'QFRAME'} = $qframe; | |
463 } | |
464 if( defined $sframe ) { | |
465 if( $sframe == 0 ) { | |
466 $sframe = undef; | |
467 } elsif( $sframe !~ /^([+-])?([1-3])/ ) { | |
468 $self->warn("Specifying an invalid hit frame ($sframe)"); | |
469 $sframe = undef; | |
470 } else { | |
471 if( ($1 eq '-' && $self->hit->strand >= 0) || | |
472 ($1 eq '+' && $self->hit->strand <= 0) ) | |
473 { | |
474 $self->warn("Hit frame ($sframe) did not match strand of hit (". $self->hit->strand() . ")"); | |
475 } | |
476 | |
477 # Set frame to GFF [0-2] | |
478 $sframe = $2 - 1; | |
479 } | |
480 $self->{'SFRAME'} = $sframe; | |
481 } | |
482 | |
483 (defined $qframe && $self->SUPER::frame($qframe) && | |
484 ($self->{'FRAME'} = $qframe)) || | |
485 (defined $sframe && $self->SUPER::frame($sframe) && | |
486 ($self->{'FRAME'} = $sframe)); | |
487 | |
488 if (wantarray() && | |
489 $self->{'BLAST_TYPE'} eq 'TBLASTX') | |
490 { | |
491 return ($self->{'QFRAME'}, $self->{'SFRAME'}); | |
492 } elsif (wantarray()) { | |
493 (defined $self->{'QFRAME'} && | |
494 return ($self->{'QFRAME'}, undef)) || | |
495 (defined $self->{'SFRAME'} && | |
496 return (undef, $self->{'SFRAME'})); | |
497 } else { | |
498 (defined $self->{'QFRAME'} && | |
499 return $self->{'QFRAME'}) || | |
500 (defined $self->{'SFRAME'} && | |
501 return $self->{'SFRAME'}); | |
502 } | |
503 } | |
504 | |
505 1; |