comparison variant_effect_predictor/Bio/Search/Result/GenericResult.pm @ 0:21066c0abaf5 draft

Uploaded
author willmclaren
date Fri, 03 Aug 2012 10:04:48 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:21066c0abaf5
1 # $Id: GenericResult.pm,v 1.15 2002/12/05 13:46:34 heikki Exp $
2 #
3 # BioPerl module for Bio::Search::Result::GenericResult
4 #
5 # Cared for by Jason Stajich <jason@bioperl.org>
6 #
7 # Copyright Jason Stajich
8 #
9 # You may distribute this module under the same terms as perl itself
10
11 # POD documentation - main docs before the code
12
13 =head1 NAME
14
15 Bio::Search::Result::GenericResult - Generic Implementation of Bio::Search::Result::ResultI interface applicable to most search results.
16
17 =head1 SYNOPSIS
18
19
20 # typically one gets Results from a SearchIO stream
21 use Bio::SearchIO;
22 my $io = new Bio::SearchIO(-format => 'blast',
23 -file => 't/data/HUMBETGLOA.tblastx');
24 while( my $result = $io->next_result) {
25 # process all search results within the input stream
26 while( my $hit = $result->next_hits()) {
27 # insert code here for hit processing
28 }
29 }
30
31 use Bio::Search::Result::GenericResult;
32 my @hits = (); # would be a list of Bio::Search::Hit::HitI objects
33 # typically these are created from a Bio::SearchIO stream
34 my $result = new Bio::Search::Result::GenericResult
35 ( -query_name => 'HUMBETGLOA',
36 -query_accession => ''
37 -query_description => 'Human haplotype C4 beta-globin gene, complete cds.'
38 -query_length => 3002
39 -database_name => 'ecoli.aa'
40 -database_letters => 4662239,
41 -database_entries => 400,
42 -parameters => { 'e' => '0.001' },
43 -statistics => { 'kappa' => 0.731 },
44 -algorithm => 'blastp',
45 -algorithm_version => '2.1.2',
46 );
47
48 my $id = $result->query_name();
49
50 my $desc = $result->query_description();
51
52 my $name = $result->database_name();
53
54 my $size = $result->database_letters();
55
56 my $num_entries = $result->database_entries();
57
58 my $gap_ext = $result->get_parameter('e');
59
60 my @params = $result->available_parameters;
61
62 my $kappa = $result->get_statistic('kappa');
63
64 my @statnames = $result->available_statistics;
65
66
67
68 =head1 DESCRIPTION
69
70 This object is an implementation of the Bio::Search::Result::ResultI
71 interface and provides a generic place to store results from a
72 sequence database search.
73
74 =head1 FEEDBACK
75
76 =head2 Mailing Lists
77
78 User feedback is an integral part of the evolution of this and other
79 Bioperl modules. Send your comments and suggestions preferably to
80 the Bioperl mailing list. Your participation is much appreciated.
81
82 bioperl-l@bioperl.org - General discussion
83 http://bioperl.org/MailList.shtml - About the mailing lists
84
85 =head2 Reporting Bugs
86
87 Report bugs to the Bioperl bug tracking system to help us keep track
88 of the bugs and their resolution. Bug reports can be submitted via
89 email or the web:
90
91 bioperl-bugs@bioperl.org
92 http://bugzilla.bioperl.org/
93
94 =head1 AUTHOR - Jason Stajich and Steve Chervitz
95
96 Email jason@bioperl.org
97 Email sac@bioperl.org
98
99 =head1 CONTRIBUTORS
100
101 Additional contributors names and emails here
102
103 =head1 APPENDIX
104
105 The rest of the documentation details each of the object methods.
106 Internal methods are usually preceded with a _
107
108 =cut
109
110
111 # Let the code begin...
112
113
114 package Bio::Search::Result::GenericResult;
115 use vars qw(@ISA);
116 use strict;
117
118 use Bio::Root::Root;
119 use Bio::Search::Result::ResultI;
120
121 use overload
122 '""' => \&to_string;
123
124 @ISA = qw(Bio::Root::Root Bio::Search::Result::ResultI);
125
126 =head2 new
127
128 Title : new
129 Usage : my $obj = new Bio::Search::Result::GenericResult();
130 Function: Builds a new Bio::Search::Result::GenericResult object
131 Returns : Bio::Search::Result::GenericResult
132 Args : -query_name => Name of query Sequence
133 -query_accession => Query accession number (if available)
134 -query_description => Description of query sequence
135 -query_length => Length of query sequence
136 -database_name => Name of database
137 -database_letters => Number of residues in database
138 -database_entries => Number of entries in database
139 -parameters => hash ref of search parameters (key => value)
140 -statistics => hash ref of search statistics (key => value)
141 -algorithm => program name (blastx)
142 -algorithm_version => version of the algorithm (2.1.2)
143 -algorithm_reference => literature reference string for this algorithm
144
145 =cut
146
147 sub new {
148 my($class,@args) = @_;
149
150 my $self = $class->SUPER::new(@args);
151
152 $self->{'_hits'} = [];
153 $self->{'_hitindex'} = 0;
154 $self->{'_statistics'} = {};
155 $self->{'_parameters'} = {};
156
157 my ($qname,$qacc,$qdesc,$qlen,
158 $dbname,$dblet,$dbent,$params,
159 $stats, $hits, $algo, $algo_v,
160 $prog_ref, $algo_r) = $self->_rearrange([qw(QUERY_NAME
161 QUERY_ACCESSION
162 QUERY_DESCRIPTION
163 QUERY_LENGTH
164 DATABASE_NAME
165 DATABASE_LETTERS
166 DATABASE_ENTRIES
167 PARAMETERS
168 STATISTICS
169 HITS
170 ALGORITHM
171 ALGORITHM_VERSION
172 PROGRAM_REFERENCE
173 ALGORITHM_REFERENCE
174 )],@args);
175
176 $algo_r ||= $prog_ref;
177 defined $algo && $self->algorithm($algo);
178 defined $algo_v && $self->algorithm_version($algo_v);
179 defined $algo_r && $self->algorithm_reference($algo_r);
180
181 defined $qname && $self->query_name($qname);
182 defined $qacc && $self->query_accession($qacc);
183 defined $qdesc && $self->query_description($qdesc);
184 defined $qlen && $self->query_length($qlen);
185 defined $dbname && $self->database_name($dbname);
186 defined $dblet && $self->database_letters($dblet);
187 defined $dbent && $self->database_entries($dbent);
188
189 if( defined $params ) {
190 if( ref($params) !~ /hash/i ) {
191 $self->throw("Must specify a hash reference with the the parameter '-parameters");
192 }
193 while( my ($key,$value) = each %{$params} ) {
194 $self->add_parameter($key,$value);
195 }
196 }
197 if( defined $stats ) {
198 if( ref($stats) !~ /hash/i ) {
199 $self->throw("Must specify a hash reference with the the parameter '-statistics");
200 }
201 while( my ($key,$value) = each %{$stats} ) {
202 $self->add_statistic($key,$value);
203 }
204 }
205
206 if( defined $hits ) {
207 $self->throw("Must define arrayref of Hits when initializing a $class\n") unless ref($hits) =~ /array/i;
208
209 foreach my $s ( @$hits ) {
210 $self->add_hit($s);
211 }
212 }
213 return $self;
214 }
215
216 =head2 algorithm
217
218 Title : algorithm
219 Usage : my $r_type = $hsp->algorithm
220 Function: Obtain the name of the algorithm used to obtain the Result
221 Returns : string (e.g., BLASTP)
222 Args : [optional] scalar string to set value
223
224 =cut
225
226 sub algorithm{
227 my ($self,$value) = @_;
228 my $previous = $self->{'_algorithm'};
229 if( defined $value || ! defined $previous ) {
230 $value = $previous = '' unless defined $value;
231 $self->{'_algorithm'} = $value;
232 }
233 return $previous;
234 }
235
236 =head2 algorithm_version
237
238 Title : algorithm_version
239 Usage : my $r_version = $hsp->algorithm_version
240 Function: Obtain the version of the algorithm used to obtain the Result
241 Returns : string (e.g., 2.1.2)
242 Args : [optional] scalar string to set algorithm version value
243
244 =cut
245
246 sub algorithm_version{
247 my ($self,$value) = @_;
248 my $previous = $self->{'_algorithm_version'};
249 if( defined $value || ! defined $previous ) {
250 $value = $previous = '' unless defined $value;
251 $self->{'_algorithm_version'} = $value;
252 }
253
254 return $previous;
255 }
256
257 =head2 Bio::Search::Result::ResultI interface methods
258
259 Bio::Search::Result::ResultI implementation
260
261 =head2 next_hit
262
263 Title : next_hit
264 Usage : while( $hit = $result->next_hit()) { ... }
265 Function: Returns the next available Hit object, representing potential
266 matches between the query and various entities from the database.
267 Returns : a Bio::Search::Hit::HitI object or undef if there are no more.
268 Args : none
269
270
271 =cut
272
273 sub next_hit {
274 my ($self,@args) = @_;
275 my $index = $self->_nexthitindex;
276 return undef if $index > scalar @{$self->{'_hits'}};
277 return $self->{'_hits'}->[$index];
278 }
279
280 =head2 query_name
281
282 Title : query_name
283 Usage : $id = $result->query_name();
284 Function: Get the string identifier of the query used by the
285 algorithm that performed the search.
286 Returns : a string.
287 Args : [optional] new string value for query name
288
289 =cut
290
291 sub query_name {
292 my ($self,$value) = @_;
293 my $previous = $self->{'_queryname'};
294 if( defined $value || ! defined $previous ) {
295 $value = $previous = '' unless defined $value;
296 $self->{'_queryname'} = $value;
297 }
298 return $previous;
299 }
300
301 =head2 query_accession
302
303 Title : query_accession
304 Usage : $id = $result->query_accession();
305 Function: Get the accession (if available) for the query sequence
306 Returns : a string
307 Args : [optional] new string value for accession
308
309 =cut
310
311 sub query_accession {
312 my ($self,$value) = @_;
313 my $previous = $self->{'_queryacc'};
314 if( defined $value || ! defined $previous ) {
315 $value = $previous = '' unless defined $value;
316 $self->{'_queryacc'} = $value;
317 }
318 return $previous;
319 }
320
321 =head2 query_length
322
323 Title : query_length
324 Usage : $id = $result->query_length();
325 Function: Get the length of the query sequence
326 used in the search.
327 Returns : a number
328 Args : [optional] new integer value for query length
329
330 =cut
331
332 sub query_length {
333 my ($self,$value) = @_;
334 my $previous = $self->{'_querylength'};
335 if( defined $value || ! defined $previous ) {
336 $value = $previous = 0 unless defined $value;
337 $self->{'_querylength'} = $value;
338 }
339 return $previous;
340 }
341
342 =head2 query_description
343
344 Title : query_description
345 Usage : $id = $result->query_description();
346 Function: Get the description of the query sequence
347 used in the search.
348 Returns : a string
349 Args : [optional] new string for the query description
350
351 =cut
352
353 sub query_description {
354 my ($self,$value) = @_;
355 my $previous = $self->{'_querydesc'};
356 if( defined $value || ! defined $previous ) {
357 $value = $previous = '' unless defined $value;
358 $self->{'_querydesc'} = $value;
359 }
360 return $previous;
361 }
362
363
364 =head2 database_name
365
366 Title : database_name
367 Usage : $name = $result->database_name()
368 Function: Used to obtain the name of the database that the query was searched
369 against by the algorithm.
370 Returns : a scalar string
371 Args : [optional] new string for the db name
372
373 =cut
374
375 sub database_name {
376 my ($self,$value) = @_;
377 my $previous = $self->{'_dbname'};
378 if( defined $value || ! defined $previous ) {
379 $value = $previous = '' unless defined $value;
380 $self->{'_dbname'} = $value;
381 }
382 return $previous;
383 }
384
385 =head2 database_letters
386
387 Title : database_letters
388 Usage : $size = $result->database_letters()
389 Function: Used to obtain the size of database that was searched against.
390 Returns : a scalar integer (units specific to algorithm, but probably the
391 total number of residues in the database, if available) or undef if
392 the information was not available to the Processor object.
393 Args : [optional] new scalar integer for number of letters in db
394
395
396 =cut
397
398 sub database_letters {
399 my ($self,$value) = @_;
400 my $previous = $self->{'_dbletters'};
401 if( defined $value || ! defined $previous ) {
402 $value = $previous = '' unless defined $value;
403 $self->{'_dbletters'} = $value;
404 }
405 return $previous;
406 }
407
408 =head2 database_entries
409
410 Title : database_entries
411 Usage : $num_entries = $result->database_entries()
412 Function: Used to obtain the number of entries contained in the database.
413 Returns : a scalar integer representing the number of entities in the database
414 or undef if the information was not available.
415 Args : [optional] new integer for the number of sequence entries in the db
416
417
418 =cut
419
420 sub database_entries {
421 my ($self,$value) = @_;
422 my $previous = $self->{'_dbentries'};
423 if( defined $value || ! defined $previous ) {
424 $value = $previous = '' unless defined $value;
425 $self->{'_dbentries'} = $value;
426 }
427 return $previous;
428 }
429
430 =head2 get_parameter
431
432 Title : get_parameter
433 Usage : my $gap_ext = $report->get_parameter('gapext')
434 Function: Returns the value for a specific parameter used
435 when running this report
436 Returns : string
437 Args : name of parameter (string)
438
439 =cut
440
441 sub get_parameter{
442 my ($self,$name) = @_;
443 return $self->{'_parameters'}->{$name};
444 }
445
446 =head2 available_parameters
447
448 Title : available_parameters
449 Usage : my @params = $report->available_paramters
450 Function: Returns the names of the available parameters
451 Returns : Return list of available parameters used for this report
452 Args : none
453
454 =cut
455
456 sub available_parameters{
457 my ($self) = @_;
458 return keys %{$self->{'_parameters'}};
459 }
460
461
462 =head2 get_statistic
463
464 Title : get_statistic
465 Usage : my $gap_ext = $report->get_statistic('kappa')
466 Function: Returns the value for a specific statistic available
467 from this report
468 Returns : string
469 Args : name of statistic (string)
470
471 =cut
472
473 sub get_statistic{
474 my ($self,$key) = @_;
475 return $self->{'_statistics'}->{$key};
476 }
477
478 =head2 available_statistics
479
480 Title : available_statistics
481 Usage : my @statnames = $report->available_statistics
482 Function: Returns the names of the available statistics
483 Returns : Return list of available statistics used for this report
484 Args : none
485
486 =cut
487
488 sub available_statistics{
489 my ($self) = @_;
490 return keys %{$self->{'_statistics'}};
491 }
492
493 =head2 Bio::Search::Report
494
495 Bio::Search::Result::GenericResult specific methods
496
497 =head2 add_hit
498
499 Title : add_hit
500 Usage : $report->add_hit($hit)
501 Function: Adds a HitI to the stored list of hits
502 Returns : Number of HitI currently stored
503 Args : Bio::Search::Hit::HitI
504
505 =cut
506
507 sub add_hit {
508 my ($self,$s) = @_;
509 if( $s->isa('Bio::Search::Hit::HitI') ) {
510 push @{$self->{'_hits'}}, $s;
511 } else {
512 $self->warn("Passed in " .ref($s).
513 " as a Hit which is not a Bio::Search::HitI... skipping");
514 }
515 return scalar @{$self->{'_hits'}};
516 }
517
518
519 =head2 rewind
520
521 Title : rewind
522 Usage : $result->rewind;
523 Function: Allow one to reset the Hit iteration to the beginning
524 Since this is an in-memory implementation
525 Returns : none
526 Args : none
527
528 =cut
529
530 sub rewind{
531 my ($self) = @_;
532 $self->{'_hitindex'} = 0;
533 }
534
535
536 =head2 _nexthitindex
537
538 Title : _nexthitindex
539 Usage : private
540
541 =cut
542
543 sub _nexthitindex{
544 my ($self,@args) = @_;
545 return $self->{'_hitindex'}++;
546 }
547
548
549
550 =head2 add_parameter
551
552 Title : add_parameter
553 Usage : $report->add_parameter('gapext', 11);
554 Function: Adds a parameter
555 Returns : none
556 Args : key - key value name for this parama
557 value - value for this parameter
558
559 =cut
560
561 sub add_parameter{
562 my ($self,$key,$value) = @_;
563 $self->{'_parameters'}->{$key} = $value;
564 }
565
566
567 =head2 add_statistic
568
569 Title : add_statistic
570 Usage : $report->add_statistic('lambda', 2.3);
571 Function: Adds a parameter
572 Returns : none
573 Args : key - key value name for this parama
574 value - value for this parameter
575
576 =cut
577
578 sub add_statistic {
579 my ($self,$key,$value) = @_;
580 $self->{'_statistics'}->{$key} = $value;
581 return;
582 }
583
584
585 =head2 num_hits
586
587 Title : num_hits
588 Usage : my $hitcount= $result->num_hits
589 Function: returns the number of hits for this query result
590 Returns : integer
591 Args : none
592
593
594 =cut
595
596 sub num_hits{
597 my ($self) = shift;
598 if (not defined $self->{'_hits'}) {
599 $self->throw("Can't get Hits: data not collected.");
600 }
601 return scalar(@{$self->{'_hits'}});
602 }
603
604
605 =head2 hits
606
607 Title : hits
608 Usage : my @hits = $result->hits
609 Function: Returns the available hits for this Result
610 Returns : Array of L<Bio::Search::Hit::HitI> objects
611 Args : none
612
613
614 =cut
615
616 sub hits{
617 my ($self) = shift;
618 my @hits = ();
619 if( ref $self->{'_hits'}) {
620 @hits = @{$self->{'_hits'}};
621 }
622 return @hits;
623 }
624
625 =head2 algorithm_reference
626
627 Title : algorithm_reference
628 Usage : $obj->algorithm_reference($newval)
629 Function:
630 Returns : string containing literature reference for the algorithm
631 Args : newvalue string (optional)
632 Comments: Formerly named program_reference(), which is still supported
633 for backwards compatibility.
634
635 =cut
636
637 sub algorithm_reference{
638 my ($self,$value) = @_;
639 if( defined $value) {
640 $self->{'algorithm_reference'} = $value;
641 }
642 return $self->{'algorithm_reference'};
643 }
644
645
646 sub program_reference { shift->algorithm_reference(@_); }
647
648
649 =head2 no_hits_found
650
651 See documentation in L<Bio::Search::Result::ResultI::no_hits_found()|Bio::Search::Result::ResultI>
652
653 =cut
654
655 #-----------
656 sub no_hits_found {
657 #-----------
658 my ($self, $round) = @_;
659
660 my $result = 0; # final return value of this method.
661 # Watch the double negative!
662 # result = 0 means "yes hits were found"
663 # result = 1 means "no hits were found" (for the indicated iteration or all iterations)
664
665 # If a iteration was not specified and there were multiple iterations,
666 # this method should return true only if all iterations had no hits found.
667 if( not defined $round ) {
668 if( $self->{'_iterations'} > 1) {
669 $result = 1;
670 foreach my $i( 1..$self->{'_iterations'} ) {
671 if( not defined $self->{"_iteration_$i"}->{'_no_hits_found'} ) {
672 $result = 0;
673 last;
674 }
675 }
676 }
677 else {
678 $result = $self->{"_iteration_1"}->{'_no_hits_found'};
679 }
680 }
681 else {
682 $result = $self->{"_iteration_$round"}->{'_no_hits_found'};
683 }
684
685 return $result;
686 }
687
688
689 =head2 set_no_hits_found
690
691 See documentation in L<Bio::Search::Result::ResultI::set_no_hits_found()|Bio::Search::Result::ResultI>
692
693 =cut
694
695 #-----------
696 sub set_no_hits_found {
697 #-----------
698 my ($self, $round) = @_;
699 $round ||= 1;
700 $self->{"_iteration_$round"}->{'_no_hits_found'} = 1;
701 }
702
703
704 =head2 iterations
705
706 See documentation in L<Bio::Search::Result::ResultI::iterations()|Bio::Search::Result::ResultI>
707
708 =cut
709
710 #----------------
711 sub iterations {
712 #----------------
713 my ($self, $num ) = @_;
714 if( defined $num ) {
715 $self->{'_iterations'} = $num;
716 }
717 return $self->{'_iterations'};
718 }
719
720
721 =head2 psiblast
722
723 See documentation in L<Bio::Search::Result::ResultI::psiblast()|Bio::Search::Result::ResultI>
724
725 =cut
726
727 #----------------
728 sub psiblast {
729 #----------------
730 my ($self, $val ) = @_;
731 if( $val ) {
732 $self->{'_psiblast'} = 1;
733 }
734 return $self->{'_psiblast'};
735 }
736
737
738 =head2 to_string
739
740 Title : to_string
741 Usage : print $blast->to_string;
742 Function: Returns a string representation for the Blast result.
743 Primarily intended for debugging purposes.
744 Example : see usage
745 Returns : A string of the form:
746 [GenericResult] <analysis_method> query=<name> <description> db=<database
747 e.g.:
748 [GenericResult] BLASTP query=YEL060C vacuolar protease B, db=PDBUNIQ
749 Args : None
750
751 =cut
752
753 #---------------
754 sub to_string {
755 #---------------
756 my $self = shift;
757 my $str = "[GenericResult] " . $self->algorithm . " query=" . $self->query_name . " " . $self->query_description .", db=" . $self->database_name;
758 return $str;
759 }
760
761 1;