Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Search/Result/BlastResult.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 #----------------------------------------------------------------- | |
2 # $Id: BlastResult.pm,v 1.13 2002/12/24 15:48:41 jason Exp $ | |
3 # | |
4 # BioPerl module Bio::Search::Result::BlastResult | |
5 # | |
6 # Cared for by Steve Chervitz <sac@bioperl.org> | |
7 # | |
8 # You may distribute this module under the same terms as perl itself | |
9 #----------------------------------------------------------------- | |
10 | |
11 # POD documentation - main docs before the code | |
12 | |
13 =head1 NAME | |
14 | |
15 Bio::Search::Result::BlastResult - A top-level BLAST Report object | |
16 | |
17 =head1 SYNOPSIS | |
18 | |
19 The construction of BlastResult objects is performed by | |
20 by the B<Bio::SearchIO::psiblast> parser. | |
21 Therefore, you do not need to | |
22 use B<Bio::Search::Result::BlastResult>) directly. If you need to construct | |
23 BlastHits directly, see the new() function for details. | |
24 | |
25 For B<Bio::SearchIO> BLAST parsing usage examples, see the | |
26 B<examples/search-blast> directory of the Bioperl distribution. | |
27 | |
28 =head1 DESCRIPTION | |
29 | |
30 This module supports BLAST versions 1.x and 2.x, gapped and ungapped, | |
31 and PSI-BLAST. | |
32 | |
33 =head1 DEPENDENCIES | |
34 | |
35 Bio::Search::Result::BlastResult.pm is a concrete class that inherits from B<Bio::Root::Root> and B<Bio::Search::Result::ResultI>. It relies on two other modules: | |
36 | |
37 =over 4 | |
38 | |
39 =item B<Bio::Search::Hit::BlastHit> | |
40 | |
41 Encapsulates a single a single BLAST hit. | |
42 | |
43 =item B<Bio::Search::GenericDatabase> | |
44 | |
45 Provides an interface to a blast database metadata. | |
46 | |
47 =back | |
48 | |
49 =head1 FEEDBACK | |
50 | |
51 =head2 Mailing Lists | |
52 | |
53 User feedback is an integral part of the evolution of this and other | |
54 Bioperl modules. Send your comments and suggestions preferably to one | |
55 of the Bioperl mailing lists. Your participation is much appreciated. | |
56 | |
57 bioperl-l@bioperl.org - General discussion | |
58 http://bio.perl.org/MailList.html - About the mailing lists | |
59 | |
60 =head2 Reporting Bugs | |
61 | |
62 Report bugs to the Bioperl bug tracking system to help us keep track | |
63 the bugs and their resolution. Bug reports can be submitted via email | |
64 or the web: | |
65 | |
66 bioperl-bugs@bio.perl.org | |
67 http://bugzilla.bioperl.org/ | |
68 | |
69 =head1 AUTHOR | |
70 | |
71 Steve Chervitz E<lt>sac@bioperl.orgE<gt> | |
72 | |
73 See L<the FEEDBACK section | FEEDBACK> for where to send bug reports and comments. | |
74 | |
75 =head1 ACKNOWLEDGEMENTS | |
76 | |
77 This software was originally developed in the Department of Genetics | |
78 at Stanford University. I would also like to acknowledge my | |
79 colleagues at Affymetrix for useful feedback. | |
80 | |
81 =head1 COPYRIGHT | |
82 | |
83 Copyright (c) 2001 Steve Chervitz. All Rights Reserved. | |
84 | |
85 =cut | |
86 | |
87 =head1 DISCLAIMER | |
88 | |
89 This software is provided "as is" without warranty of any kind. | |
90 | |
91 =head1 APPENDIX | |
92 | |
93 The rest of the documentation details each of the object methods. | |
94 Internal methods are usually preceded with a _ | |
95 | |
96 =cut | |
97 | |
98 | |
99 # Let the code begin... | |
100 | |
101 package Bio::Search::Result::BlastResult; | |
102 | |
103 use strict; | |
104 | |
105 use Bio::Search::Result::ResultI; | |
106 use Bio::Root::Root; | |
107 | |
108 use overload | |
109 '""' => \&to_string; | |
110 | |
111 use vars qw(@ISA $Revision ); | |
112 | |
113 $Revision = '$Id: BlastResult.pm,v 1.13 2002/12/24 15:48:41 jason Exp $'; #' | |
114 @ISA = qw( Bio::Root::Root Bio::Search::Result::ResultI); | |
115 | |
116 #---------------- | |
117 sub new { | |
118 #---------------- | |
119 my ($class, @args) = @_; | |
120 my $self = $class->SUPER::new(@args); | |
121 return $self; | |
122 } | |
123 | |
124 #sub DESTROY { | |
125 # my $self = shift; | |
126 # print STDERR "->DESTROYING $self\n"; | |
127 #} | |
128 | |
129 | |
130 #================================================= | |
131 # Begin Bio::Search::Result::ResultI implementation | |
132 #================================================= | |
133 | |
134 =head2 next_hit | |
135 | |
136 See L<Bio::Search::Result::ResultI::next_hit()|Bio::Search::Result::ResultI> for documentation | |
137 | |
138 =cut | |
139 | |
140 #---------------- | |
141 sub next_hit { | |
142 #---------------- | |
143 my ($self) = @_; | |
144 | |
145 unless(defined $self->{'_hit_queue'}) { | |
146 $self->{'_hit_queue'} = [$self->hits()]; | |
147 } | |
148 | |
149 shift @{$self->{'_hit_queue'}}; | |
150 } | |
151 | |
152 =head2 query_name | |
153 | |
154 See L<Bio::Search::Result::ResultI::query_name()|Bio::Search::Result::ResultI> for documentation | |
155 | |
156 =cut | |
157 | |
158 #---------------- | |
159 sub query_name { | |
160 #---------------- | |
161 my $self = shift; | |
162 if (@_) { | |
163 my $name = shift; | |
164 $name =~ s/^\s+|(\s+|,)$//g; | |
165 $self->{'_query_name'} = $name; | |
166 } | |
167 return $self->{'_query_name'}; | |
168 } | |
169 | |
170 =head2 query_length | |
171 | |
172 See L<Bio::Search::Result::ResultI::query_length()|Bio::Search::Result::ResultI> for documentation | |
173 | |
174 =cut | |
175 | |
176 #---------------- | |
177 sub query_length { | |
178 #---------------- | |
179 my $self = shift; | |
180 if(@_) { $self->{'_query_length'} = shift; } | |
181 return $self->{'_query_length'}; | |
182 } | |
183 | |
184 =head2 query_description | |
185 | |
186 See L<Bio::Search::Result::ResultI::query_description()|Bio::Search::Result::ResultI> for documentation | |
187 | |
188 =cut | |
189 | |
190 #---------------- | |
191 sub query_description { | |
192 #---------------- | |
193 my $self = shift; | |
194 if(@_) { | |
195 my $desc = shift; | |
196 defined $desc && $desc =~ s/(^\s+|\s+$)//g; | |
197 # Remove duplicated ID at beginning of description string | |
198 defined $desc && $desc =~ s/^$self->{'_query_name'}//o; | |
199 $self->{'_query_query_desc'} = $desc || ''; | |
200 } | |
201 return $self->{'_query_query_desc'}; | |
202 } | |
203 | |
204 | |
205 =head2 analysis_method | |
206 | |
207 See L<Bio::AnalysisResultI::analysis_method()|Bio::AnalysisResultI> for documentation | |
208 | |
209 This implementation ensures that the name matches /blast/i. | |
210 | |
211 =cut | |
212 | |
213 #---------------- | |
214 sub analysis_method { | |
215 #---------------- | |
216 my ($self, $method) = @_; | |
217 if($method ) { | |
218 if( $method =~ /blast/i) { | |
219 $self->{'_analysis_prog'} = $method; | |
220 } else { | |
221 $self->throw("method $method not supported in " . ref($self)); | |
222 } | |
223 } | |
224 return $self->{'_analysis_prog'}; | |
225 } | |
226 | |
227 =head2 analysis_method_version | |
228 | |
229 See L<Bio::AnalysisResultI::analysis_method_version()|Bio::AnalysisResultI> for documentation | |
230 | |
231 =cut | |
232 | |
233 #---------------- | |
234 sub analysis_method_version { | |
235 #---------------- | |
236 my ($self, $version) = @_; | |
237 if($version) { | |
238 $self->{'_analysis_progVersion'} = $version; | |
239 } | |
240 return $self->{'_analysis_progVersion'}; | |
241 } | |
242 | |
243 | |
244 =head2 analysis_query | |
245 | |
246 See L<Bio::AnalysisResultI::analysis_query()|Bio::AnalysisResultI> for documentation | |
247 | |
248 =cut | |
249 | |
250 #---------------- | |
251 sub analysis_query { | |
252 #---------------- | |
253 | |
254 my ($self) = @_; | |
255 if(not defined $self->{'_analysis_query'}) { | |
256 require Bio::PrimarySeq; | |
257 my $moltype = $self->analysis_method =~ /blastp|tblastn/i ? 'protein' : 'dna'; | |
258 $self->{'_analysis_query'} = Bio::PrimarySeq->new( -display_id => $self->query_name, | |
259 -desc => $self->query_description, | |
260 -moltype => $moltype | |
261 ); | |
262 $self->{'_analysis_query'}->length( $self->query_length ); | |
263 } | |
264 return $self->{'_analysis_query'}; | |
265 } | |
266 | |
267 =head2 analysis_subject | |
268 | |
269 Usage : $blastdb = $result->analyis_subject(); | |
270 Purpose : Get a Bio::Search::DatabaseI object containing | |
271 information about the database used in the BLAST analysis. | |
272 Returns : Bio::Search::DatabaseI object. | |
273 Argument : n/a | |
274 | |
275 =cut | |
276 | |
277 #--------------- | |
278 sub analysis_subject { | |
279 #--------------- | |
280 my ($self, $blastdb) = @_; | |
281 if($blastdb) { | |
282 if( ref $blastdb and $blastdb->isa('Bio::Search::DatabaseI')) { | |
283 $self->{'_analysis_sbjct'} = $blastdb; | |
284 } | |
285 else { | |
286 $self->throw(-class =>'Bio::Root::BadParameter', | |
287 -text => "Can't set BlastDB: not a Bio::Search::DatabaseI $blastdb" | |
288 ); | |
289 } | |
290 } | |
291 return $self->{'_analysis_sbjct'}; | |
292 } | |
293 | |
294 =head2 next_feature | |
295 | |
296 Title : next_feature | |
297 Usage : while( my $feat = $blast_result->next_feature ) { # do something } | |
298 Function: Returns the next feature available in the analysis result, or | |
299 undef if there are no more features. | |
300 Example : | |
301 Returns : A Bio::SeqFeatureI compliant object, in this case, | |
302 each Bio::Search::HSP::BlastHSP object within each BlastHit. | |
303 Args : None | |
304 | |
305 =cut | |
306 | |
307 #--------------- | |
308 sub next_feature{ | |
309 #--------------- | |
310 my ($self) = @_; | |
311 my ($hit, $hsp); | |
312 $hit = $self->{'_current_hit'}; | |
313 unless( defined $hit ) { | |
314 $hit = $self->{'_current_hit'} = $self->next_hit; | |
315 return undef unless defined $hit; | |
316 } | |
317 $hsp = $hit->next_hsp; | |
318 unless( defined $hsp ) { | |
319 $self->{'_current_hit'} = undef; | |
320 return $self->next_feature; | |
321 } | |
322 return $hsp || undef; | |
323 } | |
324 | |
325 | |
326 sub algorithm { shift->analysis_method( @_ ); } | |
327 sub algorithm_version { shift->analysis_method_version( @_ ); } | |
328 | |
329 =head2 available_parameters | |
330 | |
331 Title : available_parameters | |
332 Usage : my @params = $report->available_paramters | |
333 Function: Returns the names of the available parameters | |
334 Returns : Return list of available parameters used for this report | |
335 Args : none | |
336 | |
337 =cut | |
338 | |
339 sub available_parameters{ | |
340 return (); | |
341 } | |
342 | |
343 | |
344 =head2 get_parameter | |
345 | |
346 Title : get_parameter | |
347 Usage : my $gap_ext = $report->get_parameter('gapext') | |
348 Function: Returns the value for a specific parameter used | |
349 when running this report | |
350 Returns : string | |
351 Args : name of parameter (string) | |
352 | |
353 =cut | |
354 | |
355 sub get_parameter{ | |
356 return ''; | |
357 } | |
358 | |
359 =head2 get_statistic | |
360 | |
361 Title : get_statistic | |
362 Usage : my $gap_ext = $report->get_statistic('kappa') | |
363 Function: Returns the value for a specific statistic available | |
364 from this report | |
365 Returns : string | |
366 Args : name of statistic (string) | |
367 | |
368 =cut | |
369 | |
370 sub get_statistic{ | |
371 return ''; | |
372 } | |
373 | |
374 =head2 available_statistics | |
375 | |
376 Title : available_statistics | |
377 Usage : my @statnames = $report->available_statistics | |
378 Function: Returns the names of the available statistics | |
379 Returns : Return list of available statistics used for this report | |
380 Args : none | |
381 | |
382 =cut | |
383 | |
384 sub available_statistics{ | |
385 return (); | |
386 } | |
387 | |
388 #================================================= | |
389 # End Bio::Search::Result::ResultI implementation | |
390 #================================================= | |
391 | |
392 | |
393 =head2 to_string | |
394 | |
395 Title : to_string | |
396 Usage : print $blast->to_string; | |
397 Function: Returns a string representation for the Blast result. | |
398 Primarily intended for debugging purposes. | |
399 Example : see usage | |
400 Returns : A string of the form: | |
401 [BlastResult] <analysis_method> query=<name> <description> db=<database | |
402 e.g.: | |
403 [BlastResult] BLASTP query=YEL060C vacuolar protease B, db=PDBUNIQ | |
404 Args : None | |
405 | |
406 =cut | |
407 | |
408 #--------------- | |
409 sub to_string { | |
410 #--------------- | |
411 my $self = shift; | |
412 my $str = "[BlastResult] " . $self->analysis_method . " query=" . $self->query_name . " " . $self->query_description .", db=" . $self->database_name; | |
413 return $str; | |
414 } | |
415 | |
416 #--------------- | |
417 sub database_name { | |
418 #--------------- | |
419 my $self = shift; | |
420 my $dbname = ''; | |
421 if( ref $self->analysis_subject) { | |
422 $dbname = $self->analysis_subject->name; | |
423 } | |
424 return $dbname; | |
425 } | |
426 | |
427 =head2 database_entries | |
428 | |
429 Title : database_entries | |
430 Usage : $num_entries = $result->database_entries() | |
431 Function: Used to obtain the number of entries contained in the database. | |
432 Returns : a scalar integer representing the number of entities in the database | |
433 or undef if the information was not available. | |
434 Args : [optional] new integer for the number of sequence entries in the db | |
435 | |
436 | |
437 =cut | |
438 | |
439 #--------------- | |
440 sub database_entries { | |
441 #--------------- | |
442 my $self = shift; | |
443 my $dbentries = ''; | |
444 if( ref $self->analysis_subject) { | |
445 $dbentries = $self->analysis_subject->entries; | |
446 } | |
447 return $dbentries; | |
448 } | |
449 | |
450 | |
451 =head2 database_letters | |
452 | |
453 Title : database_letters | |
454 Usage : $size = $result->database_letters() | |
455 Function: Used to obtain the size of database that was searched against. | |
456 Returns : a scalar integer (units specific to algorithm, but probably the | |
457 total number of residues in the database, if available) or undef if | |
458 the information was not available to the Processor object. | |
459 Args : [optional] new scalar integer for number of letters in db | |
460 | |
461 | |
462 =cut | |
463 | |
464 #--------------- | |
465 sub database_letters { | |
466 #--------------- | |
467 my $self = shift; | |
468 my $dbletters = ''; | |
469 if( ref $self->analysis_subject) { | |
470 $dbletters = $self->analysis_subject->letters; | |
471 } | |
472 return $dbletters; | |
473 } | |
474 | |
475 #--------------- | |
476 sub hits { | |
477 #--------------- | |
478 my $self = shift; | |
479 my @hits = (); | |
480 if( ref $self->{'_hits'}) { | |
481 @hits = @{$self->{'_hits'}}; | |
482 } | |
483 return @hits; | |
484 } | |
485 | |
486 =head2 add_hit | |
487 | |
488 Usage : $blast->add_hit( $hit ); | |
489 Purpose : Adds a hit object to the collection of hits in this BLAST result. | |
490 Returns : n/a | |
491 Argument : A Bio::Search::Hit::HitI object | |
492 Comments : For PSI-BLAST, hits from all iterations are lumped together. | |
493 For any given hit, you can determine the iteration in which it was | |
494 found by checking $hit->iteration(). | |
495 | |
496 =cut | |
497 | |
498 #--------------- | |
499 sub add_hit { | |
500 #--------------- | |
501 my ($self, $hit) = @_; | |
502 my $add_it = 1; | |
503 unless( ref $hit and $hit->isa('Bio::Search::Hit::HitI')) { | |
504 $add_it = 0; | |
505 $self->throw(-class =>'Bio::Root::BadParameter', | |
506 -text => "Can't add hit: not a Bio::Search::Hit::HitI: $hit" | |
507 ); | |
508 } | |
509 | |
510 # Avoid adding duplicate hits if we're doing multiple iterations (PSI-BLAST) | |
511 # if( $self->iterations > 1 ) { | |
512 # my $hit_name = $hit->name; | |
513 # if( grep $hit_name eq $_, @{$self->{'_hit_names'}}) { | |
514 # $add_it = 0; | |
515 # } | |
516 # } | |
517 | |
518 if( $add_it ) { | |
519 push @{$self->{'_hits'}}, $hit; | |
520 push @{$self->{'_hit_names'}}, $hit->name; | |
521 } | |
522 } | |
523 | |
524 | |
525 =head2 is_signif | |
526 | |
527 Usage : $blast->is_signif(); | |
528 Purpose : Determine if the BLAST report contains significant hits. | |
529 Returns : Boolean | |
530 Argument : n/a | |
531 Comments : BLAST reports without significant hits but with defined | |
532 : significance criteria will throw exceptions during construction. | |
533 : This obviates the need to check significant() for | |
534 : such objects. | |
535 | |
536 =cut | |
537 | |
538 #------------ | |
539 sub is_signif { my $self = shift; return $self->{'_is_significant'}; } | |
540 #------------ | |
541 | |
542 | |
543 =head2 matrix | |
544 | |
545 Usage : $blast_object->matrix(); | |
546 Purpose : Get the name of the scoring matrix used. | |
547 : This is extracted from the report. | |
548 Argument : n/a | |
549 Returns : string or undef if not defined | |
550 Comments : TODO: Deprecate this and implement get_parameter('matrix'). | |
551 | |
552 =cut | |
553 | |
554 #------------ | |
555 sub matrix { | |
556 #------------ | |
557 my $self = shift; | |
558 if(@_) { | |
559 $self->{'_matrix'} = shift; | |
560 } | |
561 $self->{'_matrix'}; | |
562 } | |
563 | |
564 | |
565 =head2 raw_statistics | |
566 | |
567 Usage : @stats = $blast_result->raw_statistics(); | |
568 Purpose : Get the raw, unparsed statistical parameter section of the Blast report. | |
569 This is the section at the end after the last HSP alignment. | |
570 Argument : n/a | |
571 Returns : Array of strings | |
572 | |
573 =cut | |
574 | |
575 #------------ | |
576 sub raw_statistics { | |
577 #------------ | |
578 my $self = shift; | |
579 if(@_) { | |
580 my $params = shift; | |
581 if( ref $params eq 'ARRAY') { | |
582 $self->{'_raw_statistics'} = $params; | |
583 } | |
584 else { | |
585 $self->throw(-class =>'Bio::Root::BadParameter', | |
586 -text => "Can't set statistical params: not an ARRAY ref: $params" | |
587 ); | |
588 } | |
589 } | |
590 if(not defined $self->{'_raw_statistics'}) { | |
591 $self->{'_raw_statistics'} = []; | |
592 } | |
593 | |
594 @{$self->{'_raw_statistics'}}; | |
595 } | |
596 | |
597 | |
598 | |
599 =head2 no_hits_found | |
600 | |
601 Usage : $nohits = $blast->no_hits_found( [iteration_number] ); | |
602 Purpose : Get boolean indicator indicating whether or not any hits | |
603 were present in the report. | |
604 | |
605 This is NOT the same as determining the number of hits via | |
606 the hits() method, which will return zero hits if there were no | |
607 hits in the report or if all hits were filtered out during the parse. | |
608 | |
609 Thus, this method can be used to distinguish these possibilities | |
610 for hitless reports generated when filtering. | |
611 | |
612 Returns : Boolean | |
613 Argument : (optional) integer indicating the iteration number (PSI-BLAST) | |
614 If iteration number is not specified and this is a PSI-BLAST result, | |
615 then this method will return true only if all iterations had | |
616 no hits found. | |
617 | |
618 =cut | |
619 | |
620 #----------- | |
621 sub no_hits_found { | |
622 #----------- | |
623 my ($self, $round) = @_; | |
624 | |
625 my $result = 0; # final return value of this method. | |
626 # Watch the double negative! | |
627 # result = 0 means "yes hits were found" | |
628 # result = 1 means "no hits were found" (for the indicated iteration or all iterations) | |
629 | |
630 # If a iteration was not specified and there were multiple iterations, | |
631 # this method should return true only if all iterations had no hits found. | |
632 if( not defined $round ) { | |
633 if( $self->{'_iterations'} > 1) { | |
634 $result = 1; | |
635 foreach my $i( 1..$self->{'_iterations'} ) { | |
636 if( not defined $self->{"_iteration_$i"}->{'_no_hits_found'} ) { | |
637 $result = 0; | |
638 last; | |
639 } | |
640 } | |
641 } | |
642 else { | |
643 $result = $self->{"_iteration_1"}->{'_no_hits_found'}; | |
644 } | |
645 } | |
646 else { | |
647 $result = $self->{"_iteration_$round"}->{'_no_hits_found'}; | |
648 } | |
649 | |
650 return $result; | |
651 } | |
652 | |
653 | |
654 =head2 set_no_hits_found | |
655 | |
656 Usage : $blast->set_no_hits_found( [iteration_number] ); | |
657 Purpose : Set boolean indicator indicating whether or not any hits | |
658 were present in the report. | |
659 Returns : n/a | |
660 Argument : (optional) integer indicating the iteration number (PSI-BLAST) | |
661 | |
662 =cut | |
663 | |
664 #----------- | |
665 sub set_no_hits_found { | |
666 #----------- | |
667 my ($self, $round) = @_; | |
668 $round ||= 1; | |
669 $self->{"_iteration_$round"}->{'_no_hits_found'} = 1; | |
670 } | |
671 | |
672 | |
673 =head2 iterations | |
674 | |
675 Usage : $num_iterations = $blast->iterations; (get) | |
676 $blast->iterations($num_iterations); (set) | |
677 Purpose : Set/get the number of iterations in the Blast Report (PSI-BLAST). | |
678 Returns : Total number of iterations in the report | |
679 Argument : integer (when setting) | |
680 | |
681 =cut | |
682 | |
683 #---------------- | |
684 sub iterations { | |
685 #---------------- | |
686 my ($self, $num ) = @_; | |
687 if( defined $num ) { | |
688 $self->{'_iterations'} = $num; | |
689 } | |
690 return $self->{'_iterations'}; | |
691 } | |
692 | |
693 | |
694 =head2 psiblast | |
695 | |
696 Usage : if( $blast->psiblast ) { ... } | |
697 Purpose : Set/get a boolean indicator whether or not the report | |
698 is a PSI-BLAST report. | |
699 Returns : 1 if PSI-BLAST, undef if not. | |
700 Argument : 1 (when setting) | |
701 | |
702 =cut | |
703 | |
704 #---------------- | |
705 sub psiblast { | |
706 #---------------- | |
707 my ($self, $val ) = @_; | |
708 if( $val ) { | |
709 $self->{'_psiblast'} = 1; | |
710 } | |
711 return $self->{'_psiblast'}; | |
712 } | |
713 | |
714 | |
715 1; | |
716 __END__ |