comparison variant_effect_predictor/Bio/Search/Hit/HitI.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 #-----------------------------------------------------------------
2 # $Id: HitI.pm,v 1.17 2002/11/13 11:16:37 sac Exp $
3 #
4 # BioPerl module Bio::Search::Hit::HitI
5 #
6 # Cared for by Steve Chervitz <sac@bioperl.org>
7 #
8 # Originally created by Aaron Mackey <amackey@virginia.edu>
9 #
10 # You may distribute this module under the same terms as perl itself
11 #-----------------------------------------------------------------
12
13 # POD documentation - main docs before the code
14
15 =head1 NAME
16
17 Bio::Search::Hit::HitI - Interface for a hit in a similarity search result
18
19 =head1 SYNOPSIS
20
21 Bio::Search::Hit::HitI objects should not be instantiated since this
22 module defines a pure interface.
23
24 Given an object that implements the Bio::Search::Hit::HitI interface,
25 you can do the following things with it:
26
27 $hit_name = $hit->name();
28
29 $desc = $hit->description();
30
31 $len = $hit->length
32
33 $alg = $hit->algorithm();
34
35 $score = $hit->raw_score();
36
37 $significance = $hit->significance();
38
39 $rank = $hit->rank(); # the Nth hit for a specific query
40
41 while( $hsp = $obj->next_hsp()) { ... } # process in iterator fashion
42
43 for my $hsp ( $obj->hsps()()) { ... } # process in list fashion
44
45 =head1 DESCRIPTION
46
47 Bio::Search::Hit::* objects are data structures that contain information
48 about specific hits obtained during a library search. Some information will
49 be algorithm-specific, but others will be generally defined.
50
51 =head1 FEEDBACK
52
53 =head2 Mailing Lists
54
55 User feedback is an integral part of the evolution of this and other
56 Bioperl modules. Send your comments and suggestions preferably to one
57 of the Bioperl mailing lists. Your participation is much appreciated.
58
59 bioperl-l@bioperl.org - General discussion
60 http://bio.perl.org/MailList.html - About the mailing lists
61
62 =head2 Reporting Bugs
63
64 Report bugs to the Bioperl bug tracking system to help us keep track
65 the bugs and their resolution. Bug reports can be submitted via email
66 or the web:
67
68 bioperl-bugs@bio.perl.org
69 http://bugzilla.bioperl.org/
70
71 =head1 AUTHOR - Aaron Mackey, Steve Chervitz
72
73 Email amackey@virginia.edu (original author)
74 Email sac@bioperl.org
75
76 =head1 COPYRIGHT
77
78 Copyright (c) 1999-2001 Aaron Mackey, Steve Chervitz. All Rights Reserved.
79
80 =head1 DISCLAIMER
81
82 This software is provided "as is" without warranty of any kind.
83
84 =head1 APPENDIX
85
86 The rest of the documentation details each of the object
87 methods. Internal methods are usually preceded with a _
88
89 =cut
90
91 # Let the code begin...
92
93 package Bio::Search::Hit::HitI;
94
95 use Bio::Root::RootI;
96
97 use vars qw(@ISA);
98 use strict;
99
100 @ISA = qw( Bio::Root::RootI );
101
102
103 =head2 name
104
105 Title : name
106 Usage : $hit_name = $hit->name();
107 Function: returns the name of the Hit sequence
108 Returns : a scalar string
109 Args : none
110
111 =cut
112
113 sub name {
114 my ($self,@args) = @_;
115 $self->throw_not_implemented;
116 }
117
118 =head2 description
119
120 Title : description
121 Usage : $desc = $hit->description();
122 Function: Retrieve the description for the hit
123 Returns : a scalar string
124 Args : none
125
126 =cut
127
128 sub description {
129 my ($self,@args) = @_;
130 $self->throw_not_implemented;
131 }
132
133 =head2 accession
134
135 Title : accession
136 Usage : $acc = $hit->accession();
137 Function: Retrieve the accession (if available) for the hit
138 Returns : a scalar string (empty string if not set)
139 Args : none
140
141 =cut
142
143 sub accession {
144 my ($self,@args) = @_;
145 $self->throw_not_implemented;
146 }
147
148 =head2 locus
149
150 Title : locus
151 Usage : $acc = $hit->locus();
152 Function: Retrieve the locus(if available) for the hit
153 Returns : a scalar string (empty string if not set)
154 Args : none
155
156 =cut
157
158 sub locus {
159 my ($self,@args) = @_;
160 $self->throw_not_implemented;
161 }
162
163 =head2 length
164
165 Title : length
166 Usage : my $len = $hit->length
167 Function: Returns the length of the hit
168 Returns : integer
169 Args : none
170
171 =cut
172
173 sub length {
174 my ($self,@args) = @_;
175 $self->throw_not_implemented;
176 }
177
178
179 =head2 algorithm
180
181 Title : algorithm
182 Usage : $alg = $hit->algorithm();
183 Function: Gets the algorithm specification that was used to obtain the hit
184 For BLAST, the algorithm denotes what type of sequence was aligned
185 against what (BLASTN: dna-dna, BLASTP prt-prt, BLASTX translated
186 dna-prt, TBLASTN prt-translated dna, TBLASTX translated
187 dna-translated dna).
188 Returns : a scalar string
189 Args : none
190
191 =cut
192
193 sub algorithm {
194 my ($self,@args) = @_;
195 $self->throw_not_implemented;
196 }
197
198 =head2 raw_score
199
200 Title : raw_score
201 Usage : $score = $hit->raw_score();
202 Function: Gets the "raw score" generated by the algorithm. What
203 this score is exactly will vary from algorithm to algorithm,
204 returning undef if unavailable.
205 Returns : a scalar value
206 Args : none
207
208 =cut
209
210 sub raw_score {
211 $_[0]->throw_not_implemented;
212 }
213
214 =head2 significance
215
216 Title : significance
217 Usage : $significance = $hit->significance();
218 Function: Used to obtain the E or P value of a hit, i.e. the probability that
219 this particular hit was obtained purely by random chance. If
220 information is not available (nor calculatable from other
221 information sources), return undef.
222 Returns : a scalar value or undef if unavailable
223 Args : none
224
225 =cut
226
227 sub significance {
228 $_[0]->throw_not_implemented;
229 }
230
231 =head2 bits
232
233 Usage : $hit_object->bits();
234 Purpose : Gets the bit score of the best HSP for the current hit.
235 Example : $bits = $hit_object->bits();
236 Returns : Integer or double for FASTA reports
237 Argument : n/a
238 Comments : For BLAST1, the non-bit score is listed in the summary line.
239
240 See Also : L<score()|score>
241
242 =cut
243
244 #---------
245 sub bits {
246 #---------
247 $_[0]->throw_not_implemented();
248 }
249
250 =head2 next_hsp
251
252 Title : next_hsp
253 Usage : while( $hsp = $obj->next_hsp()) { ... }
254 Function : Returns the next available High Scoring Pair
255 Example :
256 Returns : Bio::Search::HSP::HSPI object or null if finished
257 Args : none
258
259 =cut
260
261 sub next_hsp {
262 my ($self,@args) = @_;
263 $self->throw_not_implemented;
264 }
265
266
267 =head2 hsps
268
269 Usage : $hit_object->hsps();
270 Purpose : Get a list containing all HSP objects.
271 : Get the numbers of HSPs for the current hit.
272 Example : @hsps = $hit_object->hsps();
273 : $num = $hit_object->hsps(); # alternatively, use num_hsps()
274 Returns : Array context : list of Bio::Search::HSP::BlastHSP.pm objects.
275 : Scalar context: integer (number of HSPs).
276 : (Equivalent to num_hsps()).
277 Argument : n/a. Relies on wantarray
278 Throws : Exception if the HSPs have not been collected.
279
280 See Also : L<hsp()|hsp>, L<num_hsps()|num_hsps>
281
282 =cut
283
284 #---------
285 sub hsps {
286 #---------
287 my $self = shift;
288
289 $self->throw_not_implemented();
290 }
291
292
293
294 =head2 num_hsps
295
296 Usage : $hit_object->num_hsps();
297 Purpose : Get the number of HSPs for the present Blast hit.
298 Example : $nhsps = $hit_object->num_hsps();
299 Returns : Integer
300 Argument : n/a
301 Throws : Exception if the HSPs have not been collected.
302
303 See Also : L<hsps()|hsps>
304
305 =cut
306
307 #-------------
308 sub num_hsps {
309 #-------------
310 shift->throw_not_implemented();
311 }
312
313
314 =head2 seq_inds
315
316 Usage : $hit->seq_inds( seq_type, class, collapse );
317 Purpose : Get a list of residue positions (indices) across all HSPs
318 : for identical or conserved residues in the query or sbjct sequence.
319 Example : @s_ind = $hit->seq_inds('query', 'identical');
320 : @h_ind = $hit->seq_inds('hit', 'conserved');
321 : @h_ind = $hit->seq_inds('hit', 'conserved', 1);
322 Returns : Array of integers
323 : May include ranges if collapse is non-zero.
324 Argument : [0] seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
325 : ('sbjct' is synonymous with 'hit')
326 : [1] class = 'identical' or 'conserved' (default = 'identical')
327 : (can be shortened to 'id' or 'cons')
328 : (actually, anything not 'id' will evaluate to 'conserved').
329 : [2] collapse = boolean, if non-zero, consecutive positions are merged
330 : using a range notation, e.g., "1 2 3 4 5 7 9 10 11"
331 : collapses to "1-5 7 9-11". This is useful for
332 : consolidating long lists. Default = no collapse.
333 Throws : n/a.
334
335 See Also : L<Bio::Search::HSP::BlastHSP::seq_inds()|Bio::Search::HSP::BlastHSP>
336
337 =cut
338
339 #-------------
340 sub seq_inds {
341 #-------------
342 my ($self, $seqType, $class, $collapse) = @_;
343
344 $seqType ||= 'query';
345 $class ||= 'identical';
346 $collapse ||= 0;
347
348 $seqType = 'sbjct' if $seqType eq 'hit';
349
350 my (@inds, $hsp);
351 foreach $hsp ($self->hsps) {
352 # This will merge data for all HSPs together.
353 push @inds, $hsp->seq_inds($seqType, $class);
354 }
355
356 # Need to remove duplicates and sort the merged positions.
357 if(@inds) {
358 my %tmp = map { $_, 1 } @inds;
359 @inds = sort {$a <=> $b} keys %tmp;
360 }
361
362 $collapse ? &Bio::Search::BlastUtils::collapse_nums(@inds) : @inds;
363 }
364
365 =head2 rewind
366
367 Title : rewind
368 Usage : $hit->rewind;
369 Function: Allow one to reset the HSP iteration to the beginning
370 if possible
371 Returns : none
372 Args : none
373
374 =cut
375
376 sub rewind{
377 my ($self) = @_;
378 $self->throw_not_implemented();
379 }
380
381
382 =head2 iteration
383
384 Usage : $hit->iteration( );
385 Purpose : Gets the iteration number in which the Hit was found.
386 Example : $iteration_num = $sbjct->iteration();
387 Returns : Integer greater than or equal to 1
388 Non-PSI-BLAST reports will report iteration as 1, but this number
389 is only meaningful for PSI-BLAST reports.
390 Argument : none
391 Throws : none
392
393 See Also : L<found_again()|found_again>
394
395 =cut
396
397 #----------------
398 sub iteration { shift->throw_not_implemented }
399 #----------------
400
401 =head2 found_again
402
403 Usage : $hit->found_again;
404 Purpose : Gets a boolean indicator whether or not the hit has
405 been found in a previous iteration.
406 This is only applicable to PSI-BLAST reports.
407
408 This method indicates if the hit was reported in the
409 "Sequences used in model and found again" section of the
410 PSI-BLAST report or if it was reported in the
411 "Sequences not found previously or not previously below threshold"
412 section of the PSI-BLAST report. Only for hits in iteration > 1.
413
414 Example : if( $sbjct->found_again()) { ... };
415 Returns : Boolean (1 or 0) for PSI-BLAST report iterations greater than 1.
416 Returns undef for PSI-BLAST report iteration 1 and non PSI_BLAST
417 reports.
418 Argument : none
419 Throws : none
420
421 See Also : L<found_again()|found_again>
422
423 =cut
424
425 #----------------
426 sub found_again { shift->throw_not_implemented }
427 #----------------
428
429
430 =head2 overlap
431
432 Usage : $hit_object->overlap( [integer] );
433 Purpose : Gets/Sets the allowable amount overlap between different HSP sequences.
434 Example : $hit_object->overlap(5);
435 : $overlap = $hit_object->overlap;
436 Returns : Integer.
437 Argument : integer.
438 Throws : n/a
439 Status : Experimental
440 Comments : Any two HSPs whose sequences overlap by less than or equal
441 : to the overlap() number of resides will be considered separate HSPs
442 : and will not get tiled by Bio::Search::BlastUtils::_adjust_contigs().
443
444 See Also : L<Bio::Search::BlastUtils::_adjust_contigs()|Bio::Search::BlastUtils>, L<BUGS | BUGS>
445
446 =cut
447
448 #-------------
449 sub overlap { shift->throw_not_implemented }
450
451
452 =head2 n
453
454 Usage : $hit_object->n();
455 Purpose : Gets the N number for the current Blast hit.
456 : This is the number of HSPs in the set which was ascribed
457 : the lowest P-value (listed on the description line).
458 : This number is not the same as the total number of HSPs.
459 : To get the total number of HSPs, use num_hsps().
460 Example : $n = $hit_object->n();
461 Returns : Integer
462 Argument : n/a
463 Throws : Exception if HSPs have not been set (BLAST2 reports).
464 Comments : Note that the N parameter is not reported in gapped BLAST2.
465 : Calling n() on such reports will result in a call to num_hsps().
466 : The num_hsps() method will count the actual number of
467 : HSPs in the alignment listing, which may exceed N in
468 : some cases.
469
470 See Also : L<num_hsps()|num_hsps>
471
472 =cut
473
474 #-----
475 sub n { shift->throw_not_implemented }
476
477 =head2 p
478
479 Usage : $hit_object->p( [format] );
480 Purpose : Get the P-value for the best HSP of the given BLAST hit.
481 : (Note that P-values are not provided with NCBI Blast2 reports).
482 Example : $p = $sbjct->p;
483 : $p = $sbjct->p('exp'); # get exponent only.
484 : ($num, $exp) = $sbjct->p('parts'); # split sci notation into parts
485 Returns : Float or scientific notation number (the raw P-value, DEFAULT).
486 : Integer if format == 'exp' (the magnitude of the base 10 exponent).
487 : 2-element list (float, int) if format == 'parts' and P-value
488 : is in scientific notation (See Comments).
489 Argument : format: string of 'raw' | 'exp' | 'parts'
490 : 'raw' returns value given in report. Default. (1.2e-34)
491 : 'exp' returns exponent value only (34)
492 : 'parts' returns the decimal and exponent as a
493 : 2-element list (1.2, -34) (See Comments).
494 Throws : Warns if no P-value is defined. Uses expect instead.
495 Comments : Using the 'parts' argument is not recommended since it will not
496 : work as expected if the P-value is not in scientific notation.
497 : That is, floats are not converted into sci notation before
498 : splitting into parts.
499
500 See Also : L<expect()|expect>, L<signif()|signif>, L<Bio::Search::BlastUtils::get_exponent()|Bio::Search::BlastUtils>
501
502 =cut
503
504 #--------
505 sub p { shift->throw_not_implemented() }
506
507 =head2 hsp
508
509 Usage : $hit_object->hsp( [string] );
510 Purpose : Get a single HSPI object for the present HitI object.
511 Example : $hspObj = $hit_object->hsp; # same as 'best'
512 : $hspObj = $hit_object->hsp('best');
513 : $hspObj = $hit_object->hsp('worst');
514 Returns : Object reference for a Bio::Search::HSP::BlastHSP.pm object.
515 Argument : String (or no argument).
516 : No argument (default) = highest scoring HSP (same as 'best').
517 : 'best' or 'first' = highest scoring HSP.
518 : 'worst' or 'last' = lowest scoring HSP.
519 Throws : Exception if the HSPs have not been collected.
520 : Exception if an unrecognized argument is used.
521
522 See Also : L<hsps()|hsps>, L<num_hsps>()
523
524 =cut
525
526 #----------
527 sub hsp { shift->throw_not_implemented }
528
529 =head2 logical_length
530
531 Usage : $hit_object->logical_length( [seq_type] );
532 : (mostly intended for internal use).
533 Purpose : Get the logical length of the hit sequence.
534 : If the Blast is a TBLASTN or TBLASTX, the returned length
535 : is the length of the would-be amino acid sequence (length/3).
536 : For all other BLAST flavors, this function is the same as length().
537 Example : $len = $hit_object->logical_length();
538 Returns : Integer
539 Argument : seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
540 ('sbjct' is synonymous with 'hit')
541 Throws : n/a
542 Comments : This is important for functions like frac_aligned_query()
543 : which need to operate in amino acid coordinate space when dealing
544 : with [T]BLAST[NX] type reports.
545
546 See Also : L<length()|length>, L<frac_aligned_query()|frac_aligned_query>, L<frac_aligned_hit()|frac_aligned_hit>
547
548 =cut
549
550 #--------------------
551 sub logical_length { shift->throw_not_implemented() }
552
553
554 =head2 rank
555
556 Title : rank
557 Usage : $obj->rank($newval)
558 Function: Get/Set the rank of this Hit in the Query search list
559 i.e. this is the Nth hit for a specific query
560 Returns : value of rank
561 Args : newvalue (optional)
562
563
564 =cut
565
566 sub rank{
567 my ($self,$value) = @_;
568 $self->throw_not_implemented();
569 }
570
571 =head2 each_accession_number
572
573 Title : each_accession_number
574 Usage : $obj->each_accession_number
575 Function: Get each accession number listed in the description of the hit.
576 If there are no alternatives, then only the primary accession will
577 be given
578 Returns : list of all accession numbers in the description
579 Args : none
580
581
582 =cut
583
584 sub each_accession_number{
585 my ($self,$value) = @_;
586 $self->throw_not_implemented();
587 }
588
589
590 =head2 tiled_hsps
591
592 Usage : $hit_object->tiled_hsps( [integer] );
593 Purpose : Gets/Sets an indicator for whether or not the HSPs in this Hit
594 : have been tiled.
595 : Methods that rely on HSPs being tiled should check this
596 : and then call SearchUtils::tile_hsps() if not.
597 Example : $hit_object->tiled_hsps(1);
598 : if( $hit_object->tiled_hsps ) { # do something }
599 Returns : Boolean (1 or 0)
600 Argument : integer (optional)
601 Throws : n/a
602
603 =cut
604
605 sub tiled_hsps { shift->throw_not_implemented }
606
607
608 =head2 strand
609
610 Usage : $sbjct->strand( [seq_type] );
611 Purpose : Gets the strand(s) for the query, sbjct, or both sequences
612 : in the best HSP of the BlastHit object after HSP tiling.
613 : Only valid for BLASTN, TBLASTX, BLASTX-query, TBLASTN-hit.
614 Example : $qstrand = $sbjct->strand('query');
615 : $sstrand = $sbjct->strand('hit');
616 : ($qstrand, $sstrand) = $sbjct->strand();
617 Returns : scalar context: integer '1', '-1', or '0'
618 : array context without args: list of two strings (queryStrand, sbjctStrand)
619 : Array context can be "induced" by providing an argument of 'list' or 'array'.
620 Argument : In scalar context: seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
621 ('sbjct' is synonymous with 'hit')
622 Throws : n/a
623 Comments : This method requires that all HSPs be tiled. If they have not
624 : already been tiled, they will be tiled first automatically..
625 : If you don't want the tiled data, iterate through each HSP
626 : calling strand() on each (use hsps() to get all HSPs).
627 :
628 : Formerly (prior to 10/21/02), this method would return the
629 : string "-1/1" for hits with HSPs on both strands.
630 : However, now that strand and frame is properly being accounted
631 : for during HSP tiling, it makes more sense for strand()
632 : to return the strand data for the best HSP after tiling.
633 :
634 : If you really want to know about hits on opposite strands,
635 : you should be iterating through the HSPs using methods on the
636 : HSP objects.
637 :
638 : A possible use case where knowing whether a hit has HSPs
639 : on both strands would be when filtering via SearchIO for hits with
640 : this property. However, in this case it would be better to have a
641 : dedicated method such as $hit->hsps_on_both_strands(). Similarly
642 : for frame. This could be provided if there is interest.
643
644 See Also : B<Bio::Search::HSP::BlastHSP::strand>()
645
646 =cut
647
648 #---------'
649 sub strand { shift->throw_not_implemented }
650
651
652 =head2 frame
653
654 Usage : $hit_object->frame();
655 Purpose : Gets the reading frame for the best HSP after HSP tiling.
656 : This is only valid for BLASTX and TBLASTN/X type reports.
657 Example : $frame = $hit_object->frame();
658 Returns : Integer (-2 .. +2)
659 Argument : n/a
660 Throws : Exception if HSPs have not been set.
661 Comments : This method requires that all HSPs be tiled. If they have not
662 : already been tiled, they will be tiled first automatically..
663 : If you don't want the tiled data, iterate through each HSP
664 : calling frame() on each (use hsps() to get all HSPs).
665
666 See Also : L<hsps()|hsps>
667
668 =cut
669
670 #---------'
671 sub frame { shift->throw_not_implemented }
672
673
674 =head2 matches
675
676 Usage : $hit_object->matches( [class] );
677 Purpose : Get the total number of identical or conserved matches
678 : (or both) across all HSPs.
679 : (Note: 'conservative' matches are indicated as 'positives'
680 : in BLAST reports.)
681 Example : ($id,$cons) = $hit_object->matches(); # no argument
682 : $id = $hit_object->matches('id');
683 : $cons = $hit_object->matches('cons');
684 Returns : Integer or a 2-element array of integers
685 Argument : class = 'id' | 'cons' OR none.
686 : If no argument is provided, both identical and conservative
687 : numbers are returned in a two element list.
688 : (Other terms can be used to refer to the conservative
689 : matches, e.g., 'positive'. All that is checked is whether or
690 : not the supplied string starts with 'id'. If not, the
691 : conservative matches are returned.)
692 Throws : Exception if the requested data cannot be obtained.
693 Comments : This method requires that all HSPs be tiled. If there is more than one
694 : HSP and they have not already been tiled, they will be tiled first automatically..
695 :
696 : If you need data for each HSP, use hsps() and then interate
697 : through the HSP objects.
698 : Does not rely on wantarray to return a list. Only checks for
699 : the presence of an argument (no arg = return list).
700
701 See Also : L<Bio::Search::HSP::GenericHSP::matches()|Bio::Search::HSP::GenericHSP>, L<hsps()|hsps>
702
703 =cut
704
705 sub matches { shift->throw_not_implemented }
706
707 1;
708
709
710
711