comparison variant_effect_predictor/Bio/Search/Result/BlastResult.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 #-----------------------------------------------------------------
2 # $Id: BlastResult.pm,v 1.13 2002/12/24 15:48:41 jason Exp $
3 #
4 # BioPerl module Bio::Search::Result::BlastResult
5 #
6 # Cared for by Steve Chervitz <sac@bioperl.org>
7 #
8 # You may distribute this module under the same terms as perl itself
9 #-----------------------------------------------------------------
10
11 # POD documentation - main docs before the code
12
13 =head1 NAME
14
15 Bio::Search::Result::BlastResult - A top-level BLAST Report object
16
17 =head1 SYNOPSIS
18
19 The construction of BlastResult objects is performed by
20 by the B<Bio::SearchIO::psiblast> parser.
21 Therefore, you do not need to
22 use B<Bio::Search::Result::BlastResult>) directly. If you need to construct
23 BlastHits directly, see the new() function for details.
24
25 For B<Bio::SearchIO> BLAST parsing usage examples, see the
26 B<examples/search-blast> directory of the Bioperl distribution.
27
28 =head1 DESCRIPTION
29
30 This module supports BLAST versions 1.x and 2.x, gapped and ungapped,
31 and PSI-BLAST.
32
33 =head1 DEPENDENCIES
34
35 Bio::Search::Result::BlastResult.pm is a concrete class that inherits from B<Bio::Root::Root> and B<Bio::Search::Result::ResultI>. It relies on two other modules:
36
37 =over 4
38
39 =item B<Bio::Search::Hit::BlastHit>
40
41 Encapsulates a single a single BLAST hit.
42
43 =item B<Bio::Search::GenericDatabase>
44
45 Provides an interface to a blast database metadata.
46
47 =back
48
49 =head1 FEEDBACK
50
51 =head2 Mailing Lists
52
53 User feedback is an integral part of the evolution of this and other
54 Bioperl modules. Send your comments and suggestions preferably to one
55 of the Bioperl mailing lists. Your participation is much appreciated.
56
57 bioperl-l@bioperl.org - General discussion
58 http://bio.perl.org/MailList.html - About the mailing lists
59
60 =head2 Reporting Bugs
61
62 Report bugs to the Bioperl bug tracking system to help us keep track
63 the bugs and their resolution. Bug reports can be submitted via email
64 or the web:
65
66 bioperl-bugs@bio.perl.org
67 http://bugzilla.bioperl.org/
68
69 =head1 AUTHOR
70
71 Steve Chervitz E<lt>sac@bioperl.orgE<gt>
72
73 See L<the FEEDBACK section | FEEDBACK> for where to send bug reports and comments.
74
75 =head1 ACKNOWLEDGEMENTS
76
77 This software was originally developed in the Department of Genetics
78 at Stanford University. I would also like to acknowledge my
79 colleagues at Affymetrix for useful feedback.
80
81 =head1 COPYRIGHT
82
83 Copyright (c) 2001 Steve Chervitz. All Rights Reserved.
84
85 =cut
86
87 =head1 DISCLAIMER
88
89 This software is provided "as is" without warranty of any kind.
90
91 =head1 APPENDIX
92
93 The rest of the documentation details each of the object methods.
94 Internal methods are usually preceded with a _
95
96 =cut
97
98
99 # Let the code begin...
100
101 package Bio::Search::Result::BlastResult;
102
103 use strict;
104
105 use Bio::Search::Result::ResultI;
106 use Bio::Root::Root;
107
108 use overload
109 '""' => \&to_string;
110
111 use vars qw(@ISA $Revision );
112
113 $Revision = '$Id: BlastResult.pm,v 1.13 2002/12/24 15:48:41 jason Exp $'; #'
114 @ISA = qw( Bio::Root::Root Bio::Search::Result::ResultI);
115
116 #----------------
117 sub new {
118 #----------------
119 my ($class, @args) = @_;
120 my $self = $class->SUPER::new(@args);
121 return $self;
122 }
123
124 #sub DESTROY {
125 # my $self = shift;
126 # print STDERR "->DESTROYING $self\n";
127 #}
128
129
130 #=================================================
131 # Begin Bio::Search::Result::ResultI implementation
132 #=================================================
133
134 =head2 next_hit
135
136 See L<Bio::Search::Result::ResultI::next_hit()|Bio::Search::Result::ResultI> for documentation
137
138 =cut
139
140 #----------------
141 sub next_hit {
142 #----------------
143 my ($self) = @_;
144
145 unless(defined $self->{'_hit_queue'}) {
146 $self->{'_hit_queue'} = [$self->hits()];
147 }
148
149 shift @{$self->{'_hit_queue'}};
150 }
151
152 =head2 query_name
153
154 See L<Bio::Search::Result::ResultI::query_name()|Bio::Search::Result::ResultI> for documentation
155
156 =cut
157
158 #----------------
159 sub query_name {
160 #----------------
161 my $self = shift;
162 if (@_) {
163 my $name = shift;
164 $name =~ s/^\s+|(\s+|,)$//g;
165 $self->{'_query_name'} = $name;
166 }
167 return $self->{'_query_name'};
168 }
169
170 =head2 query_length
171
172 See L<Bio::Search::Result::ResultI::query_length()|Bio::Search::Result::ResultI> for documentation
173
174 =cut
175
176 #----------------
177 sub query_length {
178 #----------------
179 my $self = shift;
180 if(@_) { $self->{'_query_length'} = shift; }
181 return $self->{'_query_length'};
182 }
183
184 =head2 query_description
185
186 See L<Bio::Search::Result::ResultI::query_description()|Bio::Search::Result::ResultI> for documentation
187
188 =cut
189
190 #----------------
191 sub query_description {
192 #----------------
193 my $self = shift;
194 if(@_) {
195 my $desc = shift;
196 defined $desc && $desc =~ s/(^\s+|\s+$)//g;
197 # Remove duplicated ID at beginning of description string
198 defined $desc && $desc =~ s/^$self->{'_query_name'}//o;
199 $self->{'_query_query_desc'} = $desc || '';
200 }
201 return $self->{'_query_query_desc'};
202 }
203
204
205 =head2 analysis_method
206
207 See L<Bio::AnalysisResultI::analysis_method()|Bio::AnalysisResultI> for documentation
208
209 This implementation ensures that the name matches /blast/i.
210
211 =cut
212
213 #----------------
214 sub analysis_method {
215 #----------------
216 my ($self, $method) = @_;
217 if($method ) {
218 if( $method =~ /blast/i) {
219 $self->{'_analysis_prog'} = $method;
220 } else {
221 $self->throw("method $method not supported in " . ref($self));
222 }
223 }
224 return $self->{'_analysis_prog'};
225 }
226
227 =head2 analysis_method_version
228
229 See L<Bio::AnalysisResultI::analysis_method_version()|Bio::AnalysisResultI> for documentation
230
231 =cut
232
233 #----------------
234 sub analysis_method_version {
235 #----------------
236 my ($self, $version) = @_;
237 if($version) {
238 $self->{'_analysis_progVersion'} = $version;
239 }
240 return $self->{'_analysis_progVersion'};
241 }
242
243
244 =head2 analysis_query
245
246 See L<Bio::AnalysisResultI::analysis_query()|Bio::AnalysisResultI> for documentation
247
248 =cut
249
250 #----------------
251 sub analysis_query {
252 #----------------
253
254 my ($self) = @_;
255 if(not defined $self->{'_analysis_query'}) {
256 require Bio::PrimarySeq;
257 my $moltype = $self->analysis_method =~ /blastp|tblastn/i ? 'protein' : 'dna';
258 $self->{'_analysis_query'} = Bio::PrimarySeq->new( -display_id => $self->query_name,
259 -desc => $self->query_description,
260 -moltype => $moltype
261 );
262 $self->{'_analysis_query'}->length( $self->query_length );
263 }
264 return $self->{'_analysis_query'};
265 }
266
267 =head2 analysis_subject
268
269 Usage : $blastdb = $result->analyis_subject();
270 Purpose : Get a Bio::Search::DatabaseI object containing
271 information about the database used in the BLAST analysis.
272 Returns : Bio::Search::DatabaseI object.
273 Argument : n/a
274
275 =cut
276
277 #---------------
278 sub analysis_subject {
279 #---------------
280 my ($self, $blastdb) = @_;
281 if($blastdb) {
282 if( ref $blastdb and $blastdb->isa('Bio::Search::DatabaseI')) {
283 $self->{'_analysis_sbjct'} = $blastdb;
284 }
285 else {
286 $self->throw(-class =>'Bio::Root::BadParameter',
287 -text => "Can't set BlastDB: not a Bio::Search::DatabaseI $blastdb"
288 );
289 }
290 }
291 return $self->{'_analysis_sbjct'};
292 }
293
294 =head2 next_feature
295
296 Title : next_feature
297 Usage : while( my $feat = $blast_result->next_feature ) { # do something }
298 Function: Returns the next feature available in the analysis result, or
299 undef if there are no more features.
300 Example :
301 Returns : A Bio::SeqFeatureI compliant object, in this case,
302 each Bio::Search::HSP::BlastHSP object within each BlastHit.
303 Args : None
304
305 =cut
306
307 #---------------
308 sub next_feature{
309 #---------------
310 my ($self) = @_;
311 my ($hit, $hsp);
312 $hit = $self->{'_current_hit'};
313 unless( defined $hit ) {
314 $hit = $self->{'_current_hit'} = $self->next_hit;
315 return undef unless defined $hit;
316 }
317 $hsp = $hit->next_hsp;
318 unless( defined $hsp ) {
319 $self->{'_current_hit'} = undef;
320 return $self->next_feature;
321 }
322 return $hsp || undef;
323 }
324
325
326 sub algorithm { shift->analysis_method( @_ ); }
327 sub algorithm_version { shift->analysis_method_version( @_ ); }
328
329 =head2 available_parameters
330
331 Title : available_parameters
332 Usage : my @params = $report->available_paramters
333 Function: Returns the names of the available parameters
334 Returns : Return list of available parameters used for this report
335 Args : none
336
337 =cut
338
339 sub available_parameters{
340 return ();
341 }
342
343
344 =head2 get_parameter
345
346 Title : get_parameter
347 Usage : my $gap_ext = $report->get_parameter('gapext')
348 Function: Returns the value for a specific parameter used
349 when running this report
350 Returns : string
351 Args : name of parameter (string)
352
353 =cut
354
355 sub get_parameter{
356 return '';
357 }
358
359 =head2 get_statistic
360
361 Title : get_statistic
362 Usage : my $gap_ext = $report->get_statistic('kappa')
363 Function: Returns the value for a specific statistic available
364 from this report
365 Returns : string
366 Args : name of statistic (string)
367
368 =cut
369
370 sub get_statistic{
371 return '';
372 }
373
374 =head2 available_statistics
375
376 Title : available_statistics
377 Usage : my @statnames = $report->available_statistics
378 Function: Returns the names of the available statistics
379 Returns : Return list of available statistics used for this report
380 Args : none
381
382 =cut
383
384 sub available_statistics{
385 return ();
386 }
387
388 #=================================================
389 # End Bio::Search::Result::ResultI implementation
390 #=================================================
391
392
393 =head2 to_string
394
395 Title : to_string
396 Usage : print $blast->to_string;
397 Function: Returns a string representation for the Blast result.
398 Primarily intended for debugging purposes.
399 Example : see usage
400 Returns : A string of the form:
401 [BlastResult] <analysis_method> query=<name> <description> db=<database
402 e.g.:
403 [BlastResult] BLASTP query=YEL060C vacuolar protease B, db=PDBUNIQ
404 Args : None
405
406 =cut
407
408 #---------------
409 sub to_string {
410 #---------------
411 my $self = shift;
412 my $str = "[BlastResult] " . $self->analysis_method . " query=" . $self->query_name . " " . $self->query_description .", db=" . $self->database_name;
413 return $str;
414 }
415
416 #---------------
417 sub database_name {
418 #---------------
419 my $self = shift;
420 my $dbname = '';
421 if( ref $self->analysis_subject) {
422 $dbname = $self->analysis_subject->name;
423 }
424 return $dbname;
425 }
426
427 =head2 database_entries
428
429 Title : database_entries
430 Usage : $num_entries = $result->database_entries()
431 Function: Used to obtain the number of entries contained in the database.
432 Returns : a scalar integer representing the number of entities in the database
433 or undef if the information was not available.
434 Args : [optional] new integer for the number of sequence entries in the db
435
436
437 =cut
438
439 #---------------
440 sub database_entries {
441 #---------------
442 my $self = shift;
443 my $dbentries = '';
444 if( ref $self->analysis_subject) {
445 $dbentries = $self->analysis_subject->entries;
446 }
447 return $dbentries;
448 }
449
450
451 =head2 database_letters
452
453 Title : database_letters
454 Usage : $size = $result->database_letters()
455 Function: Used to obtain the size of database that was searched against.
456 Returns : a scalar integer (units specific to algorithm, but probably the
457 total number of residues in the database, if available) or undef if
458 the information was not available to the Processor object.
459 Args : [optional] new scalar integer for number of letters in db
460
461
462 =cut
463
464 #---------------
465 sub database_letters {
466 #---------------
467 my $self = shift;
468 my $dbletters = '';
469 if( ref $self->analysis_subject) {
470 $dbletters = $self->analysis_subject->letters;
471 }
472 return $dbletters;
473 }
474
475 #---------------
476 sub hits {
477 #---------------
478 my $self = shift;
479 my @hits = ();
480 if( ref $self->{'_hits'}) {
481 @hits = @{$self->{'_hits'}};
482 }
483 return @hits;
484 }
485
486 =head2 add_hit
487
488 Usage : $blast->add_hit( $hit );
489 Purpose : Adds a hit object to the collection of hits in this BLAST result.
490 Returns : n/a
491 Argument : A Bio::Search::Hit::HitI object
492 Comments : For PSI-BLAST, hits from all iterations are lumped together.
493 For any given hit, you can determine the iteration in which it was
494 found by checking $hit->iteration().
495
496 =cut
497
498 #---------------
499 sub add_hit {
500 #---------------
501 my ($self, $hit) = @_;
502 my $add_it = 1;
503 unless( ref $hit and $hit->isa('Bio::Search::Hit::HitI')) {
504 $add_it = 0;
505 $self->throw(-class =>'Bio::Root::BadParameter',
506 -text => "Can't add hit: not a Bio::Search::Hit::HitI: $hit"
507 );
508 }
509
510 # Avoid adding duplicate hits if we're doing multiple iterations (PSI-BLAST)
511 # if( $self->iterations > 1 ) {
512 # my $hit_name = $hit->name;
513 # if( grep $hit_name eq $_, @{$self->{'_hit_names'}}) {
514 # $add_it = 0;
515 # }
516 # }
517
518 if( $add_it ) {
519 push @{$self->{'_hits'}}, $hit;
520 push @{$self->{'_hit_names'}}, $hit->name;
521 }
522 }
523
524
525 =head2 is_signif
526
527 Usage : $blast->is_signif();
528 Purpose : Determine if the BLAST report contains significant hits.
529 Returns : Boolean
530 Argument : n/a
531 Comments : BLAST reports without significant hits but with defined
532 : significance criteria will throw exceptions during construction.
533 : This obviates the need to check significant() for
534 : such objects.
535
536 =cut
537
538 #------------
539 sub is_signif { my $self = shift; return $self->{'_is_significant'}; }
540 #------------
541
542
543 =head2 matrix
544
545 Usage : $blast_object->matrix();
546 Purpose : Get the name of the scoring matrix used.
547 : This is extracted from the report.
548 Argument : n/a
549 Returns : string or undef if not defined
550 Comments : TODO: Deprecate this and implement get_parameter('matrix').
551
552 =cut
553
554 #------------
555 sub matrix {
556 #------------
557 my $self = shift;
558 if(@_) {
559 $self->{'_matrix'} = shift;
560 }
561 $self->{'_matrix'};
562 }
563
564
565 =head2 raw_statistics
566
567 Usage : @stats = $blast_result->raw_statistics();
568 Purpose : Get the raw, unparsed statistical parameter section of the Blast report.
569 This is the section at the end after the last HSP alignment.
570 Argument : n/a
571 Returns : Array of strings
572
573 =cut
574
575 #------------
576 sub raw_statistics {
577 #------------
578 my $self = shift;
579 if(@_) {
580 my $params = shift;
581 if( ref $params eq 'ARRAY') {
582 $self->{'_raw_statistics'} = $params;
583 }
584 else {
585 $self->throw(-class =>'Bio::Root::BadParameter',
586 -text => "Can't set statistical params: not an ARRAY ref: $params"
587 );
588 }
589 }
590 if(not defined $self->{'_raw_statistics'}) {
591 $self->{'_raw_statistics'} = [];
592 }
593
594 @{$self->{'_raw_statistics'}};
595 }
596
597
598
599 =head2 no_hits_found
600
601 Usage : $nohits = $blast->no_hits_found( [iteration_number] );
602 Purpose : Get boolean indicator indicating whether or not any hits
603 were present in the report.
604
605 This is NOT the same as determining the number of hits via
606 the hits() method, which will return zero hits if there were no
607 hits in the report or if all hits were filtered out during the parse.
608
609 Thus, this method can be used to distinguish these possibilities
610 for hitless reports generated when filtering.
611
612 Returns : Boolean
613 Argument : (optional) integer indicating the iteration number (PSI-BLAST)
614 If iteration number is not specified and this is a PSI-BLAST result,
615 then this method will return true only if all iterations had
616 no hits found.
617
618 =cut
619
620 #-----------
621 sub no_hits_found {
622 #-----------
623 my ($self, $round) = @_;
624
625 my $result = 0; # final return value of this method.
626 # Watch the double negative!
627 # result = 0 means "yes hits were found"
628 # result = 1 means "no hits were found" (for the indicated iteration or all iterations)
629
630 # If a iteration was not specified and there were multiple iterations,
631 # this method should return true only if all iterations had no hits found.
632 if( not defined $round ) {
633 if( $self->{'_iterations'} > 1) {
634 $result = 1;
635 foreach my $i( 1..$self->{'_iterations'} ) {
636 if( not defined $self->{"_iteration_$i"}->{'_no_hits_found'} ) {
637 $result = 0;
638 last;
639 }
640 }
641 }
642 else {
643 $result = $self->{"_iteration_1"}->{'_no_hits_found'};
644 }
645 }
646 else {
647 $result = $self->{"_iteration_$round"}->{'_no_hits_found'};
648 }
649
650 return $result;
651 }
652
653
654 =head2 set_no_hits_found
655
656 Usage : $blast->set_no_hits_found( [iteration_number] );
657 Purpose : Set boolean indicator indicating whether or not any hits
658 were present in the report.
659 Returns : n/a
660 Argument : (optional) integer indicating the iteration number (PSI-BLAST)
661
662 =cut
663
664 #-----------
665 sub set_no_hits_found {
666 #-----------
667 my ($self, $round) = @_;
668 $round ||= 1;
669 $self->{"_iteration_$round"}->{'_no_hits_found'} = 1;
670 }
671
672
673 =head2 iterations
674
675 Usage : $num_iterations = $blast->iterations; (get)
676 $blast->iterations($num_iterations); (set)
677 Purpose : Set/get the number of iterations in the Blast Report (PSI-BLAST).
678 Returns : Total number of iterations in the report
679 Argument : integer (when setting)
680
681 =cut
682
683 #----------------
684 sub iterations {
685 #----------------
686 my ($self, $num ) = @_;
687 if( defined $num ) {
688 $self->{'_iterations'} = $num;
689 }
690 return $self->{'_iterations'};
691 }
692
693
694 =head2 psiblast
695
696 Usage : if( $blast->psiblast ) { ... }
697 Purpose : Set/get a boolean indicator whether or not the report
698 is a PSI-BLAST report.
699 Returns : 1 if PSI-BLAST, undef if not.
700 Argument : 1 (when setting)
701
702 =cut
703
704 #----------------
705 sub psiblast {
706 #----------------
707 my ($self, $val ) = @_;
708 if( $val ) {
709 $self->{'_psiblast'} = 1;
710 }
711 return $self->{'_psiblast'};
712 }
713
714
715 1;
716 __END__