comparison variant_effect_predictor/Bio/Tools/BPlite/HSP.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 ###############################################################################
2 # Bio::Tools::BPlite::HSP
3 ###############################################################################
4 # HSP = High Scoring Pair (to all non-experts as I am)
5 #
6 # The original BPlite.pm module has been written by Ian Korf !
7 # see http://sapiens.wustl.edu/~ikorf
8 #
9 # You may distribute this module under the same terms as perl itself
10
11
12 #
13 # BioPerl module for Bio::Tools::BPlite::HSP
14 #
15 # Cared for by Peter Schattner <schattner@alum.mit.edu>
16 #
17 # Copyright Peter Schattner
18 #
19 # You may distribute this module under the same terms as perl itself
20
21 # POD documentation - main docs before the code
22
23 =head1 NAME
24
25 Bio::Tools::BPlite::HSP - Blast report High Scoring Pair (HSP)
26
27 =head1 SYNOPSIS
28
29 use Bio::Tools::BPlite;
30 my $report = new Bio::Tools::BPlite(-fh=>\*STDIN);
31 {
32 while(my $sbjct = $report->nextSbjct) {
33 while (my $hsp = $sbjct->nextHSP) {
34 $hsp->score;
35 $hsp->bits;
36 $hsp->percent;
37 $hsp->P;
38 $hsp->match;
39 $hsp->positive;
40 $hsp->length;
41 $hsp->querySeq;
42 $hsp->sbjctSeq;
43 $hsp->homologySeq;
44 $hsp->query->start;
45 $hsp->query->end;
46 $hsp->hit->start;
47 $hsp->hit->end;
48 $hsp->hit->seq_id;
49 $hsp->hit->overlaps($exon);
50 }
51 }
52
53 # the following line takes you to the next report in the stream/file
54 # it will return 0 if that report is empty,
55 # but that is valid for an empty blast report.
56 # Returns -1 for EOF.
57
58 last if ($report->_parseHeader == -1));
59
60 redo
61 }
62
63 =head1 DESCRIPTION
64
65 This object handles the High Scoring Pair data for a Blast report.
66 This is where the percent identity, query and hit sequence length,
67 P value, etc are stored and where most of the necessary information is located when building logic around parsing a Blast report.
68
69 See L<Bio::Tools::BPlite> for more detailed information on the entire
70 BPlite Blast parsing system.
71
72 =head1 FEEDBACK
73
74 =head2 Mailing Lists
75
76 User feedback is an integral part of the evolution of this and other
77 Bioperl modules. Send your comments and suggestions preferably to
78 the Bioperl mailing list. Your participation is much appreciated.
79
80 bioperl-l@bioperl.org - General discussion
81 http://bioperl.org/MailList.shtml - About the mailing lists
82
83 =head2 Reporting Bugs
84
85 Report bugs to the Bioperl bug tracking system to help us keep track
86 of the bugs and their resolution. Bug reports can be submitted via
87 email or the web:
88
89 bioperl-bugs@bioperl.org
90 http://bugzilla.bioperl.org/
91
92 =head1 AUTHOR - Peter Schattner
93
94 Email: schattner@alum.mit.edu
95
96 =head1 CONTRIBUTORS
97
98 Jason Stajich, jason@bioperl.org
99
100 =head1 APPENDIX
101
102 The rest of the documentation details each of the object methods.
103 Internal methods are usually preceded with a _
104
105 =cut
106
107 # Let the code begin...
108
109 package Bio::Tools::BPlite::HSP;
110
111 use vars qw(@ISA);
112 use strict;
113
114 # to disable overloading comment this out:
115 #use overload '""' => '_overload';
116
117 # Object preamble - inheriets from Bio::SeqFeature::SimilarityPair
118
119 use Bio::SeqFeature::SimilarityPair;
120 use Bio::SeqFeature::Similarity;
121
122 @ISA = qw(Bio::SeqFeature::SimilarityPair);
123
124 sub new {
125 my ($class, @args) = @_;
126
127 # workaround to make sure frame is not set before strand is
128 # interpreted from query/hit info
129 # this workaround removes the key from the hash
130 # so the superclass does not try and work with it
131 # we'll take care of setting it in this module later on
132
133 my %newargs = @args;
134 foreach ( keys %newargs ) {
135 if( /frame$/i ) {
136 delete $newargs{$_};
137 }
138 }
139 # done with workaround
140
141 my $self = $class->SUPER::new(%newargs);
142
143 my ($score,$bits,$match,$hsplength,$positive,$gaps,$p,$exp,$qb,$qe,$sb,
144 $se,$qs,$ss,$hs,$qname,$sname,$qlength,$slength,$qframe,$sframe,
145 $blasttype) =
146 $self->_rearrange([qw(SCORE
147 BITS
148 MATCH
149 HSPLENGTH
150 POSITIVE
151 GAPS
152 P
153 EXP
154 QUERYBEGIN
155 QUERYEND
156 SBJCTBEGIN
157 SBJCTEND
158 QUERYSEQ
159 SBJCTSEQ
160 HOMOLOGYSEQ
161 QUERYNAME
162 SBJCTNAME
163 QUERYLENGTH
164 SBJCTLENGTH
165 QUERYFRAME
166 SBJCTFRAME
167 BLASTTYPE
168 )],@args);
169
170 $blasttype = 'UNKNOWN' unless $blasttype;
171 $self->report_type($blasttype);
172 # Determine strand meanings
173 my ($queryfactor, $sbjctfactor) = (1,0); # default
174 if ($blasttype eq 'BLASTP' || $blasttype eq 'TBLASTN' ) {
175 $queryfactor = 0;
176 }
177 if ($blasttype eq 'TBLASTN' || $blasttype eq 'TBLASTX' ||
178 $blasttype eq 'BLASTN' ) {
179 $sbjctfactor = 1;
180 }
181
182 # Set BLAST type
183 $self->{'BLAST_TYPE'} = $blasttype;
184
185 # Store the aligned query as sequence feature
186 my $strand;
187 if ($qe > $qb) { # normal query: start < end
188 if ($queryfactor) { $strand = 1; } else { $strand = undef; }
189 $self->query( Bio::SeqFeature::Similarity->new
190 (-start=>$qb, -end=>$qe, -strand=>$strand,
191 -source=>"BLAST" ) ) }
192 else { # reverse query (i dont know if this is possible, but feel free to correct)
193 if ($queryfactor) { $strand = -1; } else { $strand = undef; }
194 $self->query( Bio::SeqFeature::Similarity->new
195 (-start=>$qe, -end=>$qb, -strand=>$strand,
196 -source=>"BLAST" ) ) }
197
198 # store the aligned hit as sequence feature
199 if ($se > $sb) { # normal hit
200 if ($sbjctfactor) { $strand = 1; } else { $strand = undef; }
201 $self->hit( Bio::SeqFeature::Similarity->new
202 (-start=>$sb, -end=>$se, -strand=>$strand,
203 -source=>"BLAST" ) ) }
204 else { # reverse hit: start bigger than end
205 if ($sbjctfactor) { $strand = -1; } else { $strand = undef; }
206 $self->hit( Bio::SeqFeature::Similarity->new
207 (-start=>$se, -end=>$sb, -strand=>$strand,
208 -source=>"BLAST" ) ) }
209
210 # name the sequences
211 $self->query->seq_id($qname); # query name
212 $self->hit->seq_id($sname); # hit name
213
214 # set lengths
215 $self->query->seqlength($qlength); # query length
216 $self->hit->seqlength($slength); # hit length
217
218 # set object vars
219 $self->score($score);
220 $self->bits($bits);
221
222 $self->significance($p);
223 $self->{'EXP'} = $exp;
224
225 $self->query->frac_identical($match);
226 $self->hit->frac_identical($match);
227 $self->{'HSPLENGTH'} = $hsplength;
228 $self->{'PERCENT'} = int((1000 * $match)/$hsplength)/10;
229 $self->{'POSITIVE'} = $positive;
230 $self->{'GAPS'} = $gaps;
231 $self->{'QS'} = $qs;
232 $self->{'SS'} = $ss;
233 $self->{'HS'} = $hs;
234
235 $self->frame($qframe, $sframe);
236 return $self; # success - we hope!
237 }
238
239 # to disable overloading comment this out:
240 sub _overload {
241 my $self = shift;
242 return $self->start."..".$self->end." ".$self->bits;
243 }
244
245 =head2 report_type
246
247 Title : report_type
248 Usage : $type = $sbjct->report_type()
249 Function : Returns the type of report from which this hit was obtained.
250 This usually pertains only to BLAST and friends reports, for which
251 the report type denotes what type of sequence was aligned against
252 what (BLASTN: dna-dna, BLASTP prt-prt, BLASTX translated dna-prt,
253 TBLASTN prt-translated dna, TBLASTX translated dna-translated dna).
254 Example :
255 Returns : A string (BLASTN, BLASTP, BLASTX, TBLASTN, TBLASTX, UNKNOWN)
256 Args : a string on set (you should know what you are doing)
257
258 =cut
259
260 sub report_type {
261 my ($self, $rpt) = @_;
262 if($rpt) {
263 $self->{'_report_type'} = $rpt;
264 }
265 return $self->{'_report_type'};
266 }
267
268 =head2 EXP
269
270 Title : EXP
271 Usage : my $exp = $hsp->EXP;
272 Function: returns the EXP value for the HSP
273 Returns : string value
274 Args : none
275 Note : Patch provided by Sami Ashour for BTK parsing
276
277
278 =cut
279
280 sub EXP{
281 return $_[0]->{'EXP'};
282 }
283
284
285 =head2 P
286
287 Title : P
288 Usage : $hsp->P();
289 Function : returns the P (significance) value for a HSP
290 Returns : (double) significance value
291 Args :
292
293 =cut
294
295 sub P {
296 my ($self, @args) = @_;
297 my $float = $self->significance(@args);
298 my $match = '([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?'; # Perl Cookbook 2.1
299 if ($float =~ /^$match$/) {
300 # Is a C float
301 return $float;
302 } elsif ("1$float" =~ /^$match$/) {
303 # Almost C float, Jitterbug 974
304 return "1$float";
305 } else {
306 $self->warn("[HSP::P()] '$float' is not a known number format. Returning zero (0) instead.");
307 return 0;
308 }
309 }
310
311 =head2 percent
312
313 Title : percent
314 Usage : $hsp->percent();
315 Function : returns the percent matching
316 Returns : (double) percent matching
317 Args : none
318
319 =cut
320
321 sub percent {shift->{'PERCENT'}}
322
323
324 =head2 match
325
326 Title : match
327 Usage : $hsp->match();
328 Function : returns the match
329 Example :
330 Returns : (double) frac_identical
331 Args :
332
333 =cut
334
335 sub match {shift->query->frac_identical(@_)}
336
337 =head2 hsplength
338
339 Title : hsplength
340 Usage : $hsp->hsplength();
341 Function : returns the HSP length (including gaps)
342 Returns : (integer) HSP length
343 Args : none
344
345 =cut
346
347 sub hsplength {shift->{'HSPLENGTH'}}
348
349 =head2 positive
350
351 Title : positive
352 Usage : $hsp->positive();
353 Function : returns the number of positive matches (symbols in the alignment
354 with a positive score)
355 Returns : (int) number of positive matches in the alignment
356 Args : none
357
358 =cut
359
360 sub positive {shift->{'POSITIVE'}}
361
362 =head2 gaps
363
364 Title : gaps
365 Usage : $hsp->gaps();
366 Function : returns the number of gaps or 0 if none
367 Returns : (int) number of gaps or 0 if none
368 Args : none
369
370 =cut
371
372 sub gaps {shift->{'GAPS'}}
373
374 =head2 querySeq
375
376 Title : querySeq
377 Usage : $hsp->querySeq();
378 Function : returns the query sequence
379 Returns : (string) the Query Sequence
380 Args : none
381
382 =cut
383
384 sub querySeq {shift->{'QS'}}
385
386 =head2 sbjctSeq
387
388 Title : sbjctSeq
389 Usage : $hsp->sbjctSeq();
390 Function : returns the Sbjct sequence
391 Returns : (string) the Sbjct Sequence
392 Args : none
393
394 =cut
395
396 sub sbjctSeq {shift->{'SS'}}
397
398 =head2 homologySeq
399
400 Title : homologySeq
401 Usage : $hsp->homologySeq();
402 Function : returns the homologous sequence
403 Returns : (string) homologous sequence
404 Args : none
405
406 =cut
407
408 sub homologySeq {shift->{'HS'}}
409
410 =head2 qs
411
412 Title : qs
413 Usage : $hsp->qs();
414 Function : returns the Query Sequence (same as querySeq)
415 Returns : (string) query Sequence
416 Args : none
417
418 =cut
419
420 sub qs {shift->{'QS'}}
421
422 =head2 ss
423
424 Title : ss
425 Usage : $hsp->ss();
426 Function : returns the subject sequence ( same as sbjctSeq)
427 Returns : (string) Sbjct Sequence
428 Args : none
429
430 =cut
431
432 sub ss {shift->{'SS'}}
433
434 =head2 hs
435
436 Title : hs
437 Usage : $hsp->hs();
438 Function : returns the Homologous Sequence (same as homologySeq )
439 Returns : (string) Homologous Sequence
440 Args : none
441
442 =cut
443
444 sub hs {shift->{'HS'}}
445
446 sub frame {
447 my ($self, $qframe, $sframe) = @_;
448 if( defined $qframe ) {
449 if( $qframe == 0 ) {
450 $qframe = undef;
451 } elsif( $qframe !~ /^([+-])?([1-3])/ ) {
452 $self->warn("Specifying an invalid query frame ($qframe)");
453 $qframe = undef;
454 } else {
455 if( ($1 eq '-' && $self->query->strand >= 0) ||
456 ($1 eq '+' && $self->query->strand <= 0) ) {
457 $self->warn("Query frame ($qframe) did not match strand of query (". $self->query->strand() . ")");
458 }
459 # Set frame to GFF [0-2]
460 $qframe = $2 - 1;
461 }
462 $self->{'QFRAME'} = $qframe;
463 }
464 if( defined $sframe ) {
465 if( $sframe == 0 ) {
466 $sframe = undef;
467 } elsif( $sframe !~ /^([+-])?([1-3])/ ) {
468 $self->warn("Specifying an invalid hit frame ($sframe)");
469 $sframe = undef;
470 } else {
471 if( ($1 eq '-' && $self->hit->strand >= 0) ||
472 ($1 eq '+' && $self->hit->strand <= 0) )
473 {
474 $self->warn("Hit frame ($sframe) did not match strand of hit (". $self->hit->strand() . ")");
475 }
476
477 # Set frame to GFF [0-2]
478 $sframe = $2 - 1;
479 }
480 $self->{'SFRAME'} = $sframe;
481 }
482
483 (defined $qframe && $self->SUPER::frame($qframe) &&
484 ($self->{'FRAME'} = $qframe)) ||
485 (defined $sframe && $self->SUPER::frame($sframe) &&
486 ($self->{'FRAME'} = $sframe));
487
488 if (wantarray() &&
489 $self->{'BLAST_TYPE'} eq 'TBLASTX')
490 {
491 return ($self->{'QFRAME'}, $self->{'SFRAME'});
492 } elsif (wantarray()) {
493 (defined $self->{'QFRAME'} &&
494 return ($self->{'QFRAME'}, undef)) ||
495 (defined $self->{'SFRAME'} &&
496 return (undef, $self->{'SFRAME'}));
497 } else {
498 (defined $self->{'QFRAME'} &&
499 return $self->{'QFRAME'}) ||
500 (defined $self->{'SFRAME'} &&
501 return $self->{'SFRAME'});
502 }
503 }
504
505 1;