comparison variant_effect_predictor/Bio/Search/Hit/GenericHit.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: GenericHit.pm,v 1.20.2.1 2003/02/28 09:27:56 jason Exp $
2 #
3 # BioPerl module for Bio::Search::Hit::GenericHit
4 #
5 # Cared for by Jason Stajich <jason@bioperl.org>
6 #
7 # Copyright Jason Stajich
8 #
9 # You may distribute this module under the same terms as perl itself
10
11 # POD documentation - main docs before the code
12
13 =head1 NAME
14
15 Bio::Search::Hit::GenericHit - A generic implementation of the Bio::Search::Hit::HitI interface
16
17 =head1 SYNOPSIS
18
19 use Bio::Search::Hit::GenericHit;
20 my $hit = new Bio::Search::Hit::GenericHit(-algorithm => 'blastp');
21
22 # more likely
23 use Bio::SearchIO;
24 my $parser = new Bio::SearchIO(-format => 'blast', -file => 'result.bls');
25
26 my $result = $parser->next_result;
27 my $hit = $result->next_hit;
28
29
30 =head1 DESCRIPTION
31
32 This object handles the hit data from a Database Sequence Search such
33 as FASTA or BLAST.
34
35 =head1 FEEDBACK
36
37 =head2 Mailing Lists
38
39 User feedback is an integral part of the evolution of this and other
40 Bioperl modules. Send your comments and suggestions preferably to
41 the Bioperl mailing list. Your participation is much appreciated.
42
43 bioperl-l@bioperl.org - General discussion
44 http://bioperl.org/MailList.shtml - About the mailing lists
45
46 =head2 Reporting Bugs
47
48 Report bugs to the Bioperl bug tracking system to help us keep track
49 of the bugs and their resolution. Bug reports can be submitted via
50 email or the web:
51
52 bioperl-bugs@bioperl.org
53 http://bugzilla.bioperl.org/
54
55 =head1 AUTHOR - Jason Stajich and Steve Chervitz
56
57 Email jason@bioperl.org
58 Email sac@bioperl.org
59
60 =head1 CONTRIBUTORS
61
62 Additional contributors names and emails here
63
64 =head1 APPENDIX
65
66 The rest of the documentation details each of the object methods.
67 Internal methods are usually preceded with a _
68
69 =cut
70
71
72 # Let the code begin...
73
74
75 package Bio::Search::Hit::GenericHit;
76 use vars qw(@ISA);
77 use strict;
78
79 use Bio::Root::Root;
80 use Bio::Search::Hit::HitI;
81 require Bio::Search::SearchUtils;
82
83 @ISA = qw(Bio::Root::Root Bio::Search::Hit::HitI );
84
85 =head2 new
86
87 Title : new
88 Usage : my $obj = new Bio::Search::Hit::GenericHit();
89 Function: Builds a new Bio::Search::Hit::GenericHit object
90 Returns : Bio::Search::Hit::GenericHit
91 Args : -name => Name of Hit (required)
92 -description => Description (optional)
93 -accession => Accession number (optional)
94 -length => Length of the Hit (optional)
95 -score => Raw Score for the Hit (optional)
96 -significance => Significance value for the Hit (optional)
97 -algorithm => Algorithm used (BLASTP, FASTX, etc...)
98 -hsps => Array ref of HSPs for this Hit.
99 -iteration => integer for the PSI-Blast iteration number
100
101 =cut
102
103 sub new {
104 my($class,@args) = @_;
105
106 my $self = $class->SUPER::new(@args);
107 my ($hsps, $name,$query_len,$desc, $acc, $locus, $length,
108 $score,$algo,$signif,$bits,
109 $iter,$rank) = $self->_rearrange([qw(HSPS
110 NAME
111 QUERY_LEN
112 DESCRIPTION
113 ACCESSION
114 LOCUS
115 LENGTH SCORE ALGORITHM
116 SIGNIFICANCE BITS ITERATION
117 RANK )], @args);
118
119 $self->{'_query_length'} = $query_len;
120
121 if( ! defined $name ) {
122 $self->throw("Must have defined a valid name for Hit");
123 } else {
124 $self->name($name);
125 }
126
127 defined $acc && $self->accession($acc);
128 defined $locus && $self->locus($locus);
129 defined $desc && $self->description($desc);
130 defined $length && $self->length($length);
131 defined $algo && $self->algorithm($algo);
132 defined $signif && $self->significance($signif);
133 defined $score && $self->raw_score($score);
134 defined $bits && $self->bits($bits);
135 defined $iter && $self->iteration($iter);
136 defined $rank && $self->rank($rank);
137
138 $self->{'_iterator'} = 0;
139 $self->{'_hsps'} = [];
140 if( defined $hsps ) {
141 if( ref($hsps) !~ /array/i ) {
142 $self->warn("Did not specify a valid array ref for the param HSPS ($hsps)");
143 } else {
144 while( @$hsps ) {
145 $self->add_hsp(shift @$hsps );
146 }
147 }
148 }
149 return $self;
150 }
151
152 =head2 add_hsp
153
154 Title : add_hsp
155 Usage : $hit->add_hsp($hsp)
156 Function: Add a HSP to the collection of HSPs for a Hit
157 Returns : number of HSPs in the Hit
158 Args : Bio::Search::HSP::HSPI object
159
160
161 =cut
162
163 sub add_hsp {
164 my ($self,$hsp) = @_;
165 if( !defined $hsp || ! $hsp->isa('Bio::Search::HSP::HSPI') ) {
166 $self->warn("Must provide a valid Bio::Search::HSP::HSPI object to object: $self method: add_hsp value: $hsp");
167 return undef;
168 }
169 push @{$self->{'_hsps'}}, $hsp;
170 return scalar @{$self->{'_hsps'}};
171 }
172
173
174
175 =head2 Bio::Search::Hit::HitI methods
176
177 Implementation of Bio::Search::Hit::HitI methods
178
179 =head2 name
180
181 Title : name
182 Usage : $hit_name = $hit->name();
183 Function: returns the name of the Hit sequence
184 Returns : a scalar string
185 Args : [optional] scalar string to set the name
186
187 =cut
188
189 sub name {
190 my ($self,$value) = @_;
191 my $previous = $self->{'_name'};
192 if( defined $value || ! defined $previous ) {
193 $value = $previous = '' unless defined $value;
194 $self->{'_name'} = $value;
195 }
196 return $previous;
197 }
198
199 =head2 accession
200
201 Title : accession
202 Usage : $acc = $hit->accession();
203 Function: Retrieve the accession (if available) for the hit
204 Returns : a scalar string (empty string if not set)
205 Args : none
206
207 =cut
208
209 sub accession {
210 my ($self,$value) = @_;
211 my $previous = $self->{'_accession'};
212 if( defined $value || ! defined $previous ) {
213 $value = $previous = '' unless defined $value;
214 $self->{'_accession'} = $value;
215 }
216 return $previous;
217 }
218
219 =head2 description
220
221 Title : description
222 Usage : $desc = $hit->description();
223 Function: Retrieve the description for the hit
224 Returns : a scalar string
225 Args : [optional] scalar string to set the descrition
226
227 =cut
228
229 sub description {
230 my ($self,$value) = @_;
231 my $previous = $self->{'_description'};
232 if( defined $value || ! defined $previous ) {
233 $value = $previous = '' unless defined $value;
234 $self->{'_description'} = $value;
235 }
236 return $previous;
237 }
238
239 =head2 length
240
241 Title : length
242 Usage : my $len = $hit->length
243 Function: Returns the length of the hit
244 Returns : integer
245 Args : [optional] integer to set the length
246
247 =cut
248
249 sub length {
250 my ($self,$value) = @_;
251 my $previous = $self->{'_length'};
252 if( defined $value || ! defined $previous ) {
253 $value = $previous = 0 unless defined $value;
254 $self->{'_length'} = $value;
255 }
256 return $previous;
257 }
258
259
260 =head2 algorithm
261
262 Title : algorithm
263 Usage : $alg = $hit->algorithm();
264 Function: Gets the algorithm specification that was used to obtain the hit
265 For BLAST, the algorithm denotes what type of sequence was aligned
266 against what (BLASTN: dna-dna, BLASTP prt-prt, BLASTX translated
267 dna-prt, TBLASTN prt-translated dna, TBLASTX translated
268 dna-translated dna).
269 Returns : a scalar string
270 Args : [optional] scalar string to set the algorithm
271
272 =cut
273
274 sub algorithm {
275 my ($self,$value) = @_;
276 my $previous = $self->{'_algorithm'};
277 if( defined $value || ! defined $previous ) {
278 $value = $previous = '' unless defined $value;
279 $self->{'_algorithm'} = $value;
280 }
281 return $previous;
282 }
283
284 =head2 raw_score
285
286 Title : raw_score
287 Usage : $score = $hit->raw_score();
288 Function: Gets the "raw score" generated by the algorithm. What
289 this score is exactly will vary from algorithm to algorithm,
290 returning undef if unavailable.
291 Returns : a scalar value
292 Args : [optional] scalar value to set the raw score
293
294 =cut
295
296 sub raw_score {
297 my ($self,$value) = @_;
298 my $previous = $self->{'_score'};
299 if( defined $value || ! defined $previous ) {
300 $value = $previous = '' unless defined $value;
301 $self->{'_score'} = $value;
302 }
303 return $previous;
304 }
305
306 =head2 significance
307
308 Title : significance
309 Usage : $significance = $hit->significance();
310 Function: Used to obtain the E or P value of a hit, i.e. the probability that
311 this particular hit was obtained purely by random chance. If
312 information is not available (nor calculatable from other
313 information sources), return undef.
314 Returns : a scalar value or undef if unavailable
315 Args : [optional] scalar value to set the significance
316
317 =cut
318
319 sub significance {
320 my ($self,$value) = @_;
321 my $previous = $self->{'_significance'};
322 if( defined $value || ! defined $previous ) {
323 $value = $previous = '' unless defined $value;
324 $self->{'_significance'} = $value;
325 }
326 return $previous;
327 }
328
329 =head2 bits
330
331 Usage : $hit_object->bits();
332 Purpose : Gets the bit score of the best HSP for the current hit.
333 Example : $bits = $hit_object->bits();
334 Returns : Integer or undef if bit score is not set
335 Argument : n/a
336 Comments : For BLAST1, the non-bit score is listed in the summary line.
337
338 See Also : L<score()|score>
339
340 =cut
341
342 #---------
343 sub bits {
344 #---------
345 my ($self) = @_;
346
347 my $bits = $self->{'_bits'};
348 if( ! defined $bits ) {
349 $bits = $self->{'_hsps'}->[0]->bits();
350 $self->{'_bits'} = $bits;
351 }
352 return $bits;
353 }
354
355 =head2 next_hsp
356
357 Title : next_hsp
358 Usage : while( $hsp = $obj->next_hsp()) { ... }
359 Function : Returns the next available High Scoring Pair
360 Example :
361 Returns : Bio::Search::HSP::HSPI object or null if finished
362 Args : none
363
364 =cut
365
366 sub next_hsp {
367 my ($self) = @_;
368 $self->{'_iterator'} = 0 unless defined $self->{'_iterator'};
369 return undef if $self->{'_iterator'} > scalar @{$self->{'_hsps'}};
370 return $self->{'_hsps'}->[$self->{'_iterator'}++];
371 }
372
373
374 =head2 hsps
375
376 Usage : $hit_object->hsps();
377 Purpose : Get a list containing all HSP objects.
378 : Get the numbers of HSPs for the current hit.
379 Example : @hsps = $hit_object->hsps();
380 : $num = $hit_object->hsps(); # alternatively, use num_hsps()
381 Returns : Array context : list of Bio::Search::HSP::BlastHSP.pm objects.
382 : Scalar context: integer (number of HSPs).
383 : (Equivalent to num_hsps()).
384 Argument : n/a. Relies on wantarray
385 Throws : Exception if the HSPs have not been collected.
386
387 See Also : L<hsp()|hsp>, L<num_hsps()|num_hsps>
388
389 =cut
390
391 #---------
392 sub hsps {
393 #---------
394 my $self = shift;
395
396 if (not ref $self->{'_hsps'}) {
397 $self->throw("Can't get HSPs: data not collected.");
398 }
399
400 return wantarray
401 # returning list containing all HSPs.
402 ? @{$self->{'_hsps'}}
403 # returning number of HSPs.
404 : scalar(@{$self->{'_hsps'}});
405 }
406
407 =head2 num_hsps
408
409 Usage : $hit_object->num_hsps();
410 Purpose : Get the number of HSPs for the present Blast hit.
411 Example : $nhsps = $hit_object->num_hsps();
412 Returns : Integer
413 Argument : n/a
414 Throws : Exception if the HSPs have not been collected.
415
416 See Also : L<hsps()|hsps>
417
418 =cut
419
420 #-------------
421 sub num_hsps {
422 my $self = shift;
423
424 if (not defined $self->{'_hsps'}) {
425 $self->throw("Can't get HSPs: data not collected.");
426 }
427
428 return scalar(@{$self->{'_hsps'}});
429 }
430
431 =head2 rewind
432
433 Title : rewind
434 Usage : $hit->rewind;
435 Function: Allow one to reset the HSP iteration to the beginning
436 Since this is an in-memory implementation
437 Returns : none
438 Args : none
439
440 =cut
441
442 sub rewind{
443 my ($self) = @_;
444 $self->{'_iterator'} = 0;
445 }
446
447 =head2 iteration
448
449 Title : iteration
450 Usage : $obj->iteration($newval)
451 Function: PSI-BLAST iteration
452 Returns : value of iteration
453 Args : newvalue (optional)
454
455
456 =cut
457
458 sub iteration{
459 my ($self,$value) = @_;
460 if( defined $value) {
461 $self->{'_psiblast_iteration'} = $value;
462 }
463 return $self->{'_psiblast_iteration'};
464
465 }
466
467
468 =head2 ambiguous_aln
469
470 Usage : $ambig_code = $hit_object->ambiguous_aln();
471 Purpose : Sets/Gets ambiguity code data member.
472 Example : (see usage)
473 Returns : String = 'q', 's', 'qs', '-'
474 : 'q' = query sequence contains overlapping sub-sequences
475 : while sbjct does not.
476 : 's' = sbjct sequence contains overlapping sub-sequences
477 : while query does not.
478 : 'qs' = query and sbjct sequence contains overlapping sub-sequences
479 : relative to each other.
480 : '-' = query and sbjct sequence do not contains multiple domains
481 : relative to each other OR both contain the same distribution
482 : of similar domains.
483 Argument : n/a
484 Throws : n/a
485 Status : Experimental
486
487 =cut
488
489 #--------------------
490 sub ambiguous_aln {
491 #--------------------
492 my $self = shift;
493 if(@_) { $self->{'_ambiguous_aln'} = shift; }
494 $self->{'_ambiguous_aln'} || '-';
495 }
496
497 =head2 overlap
498
499 See documentation in L<Bio::Search::SearchUtils::overlap()|Bio::Search::SearchUtils>
500
501 =cut
502
503 #-------------
504 sub overlap {
505 #-------------
506 my $self = shift;
507 if(@_) { $self->{'_overlap'} = shift; }
508 defined $self->{'_overlap'} ? $self->{'_overlap'} : 0;
509 }
510
511
512 =head2 n
513
514 Usage : $hit_object->n();
515 Purpose : Gets the N number for the current hit.
516 : This is the number of HSPs in the set which was ascribed
517 : the lowest P-value (listed on the description line).
518 : This number is not the same as the total number of HSPs.
519 : To get the total number of HSPs, use num_hsps().
520 Example : $n = $hit_object->n();
521 Returns : Integer
522 Argument : n/a
523 Throws : Exception if HSPs have not been set (BLAST2 reports).
524 Comments : Note that the N parameter is not reported in gapped BLAST2.
525 : Calling n() on such reports will result in a call to num_hsps().
526 : The num_hsps() method will count the actual number of
527 : HSPs in the alignment listing, which may exceed N in
528 : some cases.
529
530 See Also : L<num_hsps()|num_hsps>
531
532 =cut
533
534 #-----
535 sub n {
536 #-----
537 my $self = shift;
538
539 # The check for $self->{'_n'} is a remnant from the 'query' mode days
540 # in which the sbjct object would collect data from the description
541 # line only.
542
543 my ($n);
544 if(not defined($self->{'_n'})) {
545 $n = $self->hsp->n;
546 } else {
547 $n = $self->{'_n'};
548 }
549 $n ||= $self->num_hsps;
550
551 return $n;
552 }
553
554 =head2 p
555
556 Usage : $hit_object->p( [format] );
557 Purpose : Get the P-value for the best HSP of the given BLAST hit.
558 : (Note that P-values are not provided with NCBI Blast2 reports).
559 Example : $p = $sbjct->p;
560 : $p = $sbjct->p('exp'); # get exponent only.
561 : ($num, $exp) = $sbjct->p('parts'); # split sci notation into parts
562 Returns : Float or scientific notation number (the raw P-value, DEFAULT).
563 : Integer if format == 'exp' (the magnitude of the base 10 exponent).
564 : 2-element list (float, int) if format == 'parts' and P-value
565 : is in scientific notation (See Comments).
566 Argument : format: string of 'raw' | 'exp' | 'parts'
567 : 'raw' returns value given in report. Default. (1.2e-34)
568 : 'exp' returns exponent value only (34)
569 : 'parts' returns the decimal and exponent as a
570 : 2-element list (1.2, -34) (See Comments).
571 Throws : Warns if no P-value is defined. Uses expect instead.
572 Comments : Using the 'parts' argument is not recommended since it will not
573 : work as expected if the P-value is not in scientific notation.
574 : That is, floats are not converted into sci notation before
575 : splitting into parts.
576
577 See Also : L<expect()|expect>, L<signif()|signif>, L<Bio::Search::SearchUtils::get_exponent()|Bio::Search::SearchUtils>
578
579 =cut
580
581 #--------
582 sub p {
583 #--------
584 # Some duplication of logic for p(), expect() and signif() for the sake of performance.
585 my ($self, $fmt) = @_;
586
587 my $val = $self->{'_p'};
588
589 # $val can be zero.
590 if(not defined $val) {
591 # P-value not defined, must be a NCBI Blast2 report.
592 # Use expect instead.
593 $self->warn( "P-value not defined. Using expect() instead.");
594 $val = $self->{'_expect'};
595 }
596
597 return $val if not $fmt or $fmt =~ /^raw/i;
598 ## Special formats: exponent-only or as list.
599 return &Bio::Search::SearchUtils::get_exponent($val) if $fmt =~ /^exp/i;
600 return (split (/eE/, $val)) if $fmt =~ /^parts/i;
601
602 ## Default: return the raw P-value.
603 return $val;
604 }
605
606 =head2 hsp
607
608 Usage : $hit_object->hsp( [string] );
609 Purpose : Get a single HSPI object for the present HitI object.
610 Example : $hspObj = $hit_object->hsp; # same as 'best'
611 : $hspObj = $hit_object->hsp('best');
612 : $hspObj = $hit_object->hsp('worst');
613 Returns : Object reference for a Bio::Search::HSP::BlastHSP.pm object.
614 Argument : String (or no argument).
615 : No argument (default) = highest scoring HSP (same as 'best').
616 : 'best' or 'first' = highest scoring HSP.
617 : 'worst' or 'last' = lowest scoring HSP.
618 Throws : Exception if the HSPs have not been collected.
619 : Exception if an unrecognized argument is used.
620
621 See Also : L<hsps()|hsps>, L<num_hsps>()
622
623 =cut
624
625 #----------
626 sub hsp {
627 #----------
628 my( $self, $option ) = @_;
629 $option ||= 'best';
630
631 if (not ref $self->{'_hsps'}) {
632 $self->throw("Can't get HSPs: data not collected.");
633 }
634
635 my @hsps = @{$self->{'_hsps'}};
636
637 return $hsps[0] if $option =~ /best|first|1/i;
638 return $hsps[$#hsps] if $option =~ /worst|last/i;
639
640 $self->throw("Can't get HSP for: $option\n" .
641 "Valid arguments: 'best', 'worst'");
642 }
643
644 =head2 logical_length
645
646 Usage : $hit_object->logical_length( [seq_type] );
647 : (mostly intended for internal use).
648 Purpose : Get the logical length of the hit sequence.
649 : If the Blast is a TBLASTN or TBLASTX, the returned length
650 : is the length of the would-be amino acid sequence (length/3).
651 : For all other BLAST flavors, this function is the same as length().
652 Example : $len = $hit_object->logical_length();
653 Returns : Integer
654 Argument : seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
655 ('sbjct' is synonymous with 'hit')
656 Throws : n/a
657 Comments : This is important for functions like frac_aligned_query()
658 : which need to operate in amino acid coordinate space when dealing
659 : with [T]BLAST[NX] type reports.
660
661 See Also : L<length()|length>, L<frac_aligned_query()|frac_aligned_query>, L<frac_aligned_hit()|frac_aligned_hit>
662
663 =cut
664
665 #--------------------
666 sub logical_length {
667 #--------------------
668 my $self = shift;
669 my $seqType = shift || 'query';
670 $seqType = 'sbjct' if $seqType eq 'hit';
671
672 my $length;
673
674 # For the sbjct, return logical sbjct length
675 if( $seqType eq 'sbjct' ) {
676 $length = $self->{'_logical_length'} || $self->{'_length'};
677 }
678 else {
679 # Otherwise, return logical query length
680 $length = $self->{'_query_length'};
681 $self->throw("Must have defined query_len") unless ( $length );
682
683 # Adjust length based on BLAST flavor.
684 if($self->algorithm =~ /T?BLASTX/ ) {
685 $length /= 3;
686 }
687 }
688 return $length;
689 }
690
691 =head2 length_aln
692
693 Usage : $hit_object->length_aln( [seq_type] );
694 Purpose : Get the total length of the aligned region for query or sbjct seq.
695 : This number will include all HSPs
696 Example : $len = $hit_object->length_aln(); # default = query
697 : $lenAln = $hit_object->length_aln('query');
698 Returns : Integer
699 Argument : seq_Type = 'query' or 'hit' or 'sbjct' (Default = 'query')
700 ('sbjct' is synonymous with 'hit')
701 Throws : Exception if the argument is not recognized.
702 Comments : This method will report the logical length of the alignment,
703 : meaning that for TBLAST[NX] reports, the length is reported
704 : using amino acid coordinate space (i.e., nucleotides / 3).
705 :
706 : This method requires that all HSPs be tiled. If they have not
707 : already been tiled, they will be tiled first automatically..
708 : If you don't want the tiled data, iterate through each HSP
709 : calling length() on each (use hsps() to get all HSPs).
710
711 See Also : L<length()|length>, L<frac_aligned_query()|frac_aligned_query>, L<frac_aligned_hit()|frac_aligned_hit>, L<gaps()|gaps>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>, L<Bio::Search::HSP::BlastHSP::length()|Bio::Search::HSP::BlastHSP>
712
713 =cut
714
715 #---------------'
716 sub length_aln {
717 #---------------
718 my( $self, $seqType, $num ) = @_;
719
720 $seqType ||= 'query';
721 $seqType = 'sbjct' if $seqType eq 'hit';
722
723 Bio::Search::SearchUtils::tile_hsps($self) if not $self->{'_tiled_hsps'};
724
725 if( defined $num) {
726 return $self->{'_length_aln_'.$seqType} = $num;
727 }
728
729 my $data = $self->{'_length_aln_'.$seqType};
730
731 ## If we don't have data, figure out what went wrong.
732 if(!$data) {
733 $self->throw("Can't get length aln for sequence type \"$seqType\". " .
734 "Valid types are 'query', 'hit', 'sbjct' ('sbjct' = 'hit')");
735 }
736 return $data;
737 }
738
739 =head2 gaps
740
741 Usage : $hit_object->gaps( [seq_type] );
742 Purpose : Get the number of gaps in the aligned query, hit, or both sequences.
743 : Data is summed across all HSPs.
744 Example : $qgaps = $hit_object->gaps('query');
745 : $hgaps = $hit_object->gaps('hit');
746 : $tgaps = $hit_object->gaps(); # default = total (query + hit)
747 Returns : scalar context: integer
748 : array context without args: two-element list of integers
749 : (queryGaps, hitGaps)
750 : Array context can be forced by providing an argument of 'list' or 'array'.
751 :
752 : CAUTION: Calling this method within printf or sprintf is arrray context.
753 : So this function may not give you what you expect. For example:
754 : printf "Total gaps: %d", $hit->gaps();
755 : Actually returns a two-element array, so what gets printed
756 : is the number of gaps in the query, not the total
757 :
758 Argument : seq_type: 'query' | 'hit' or 'sbjct' | 'total' | 'list' (default = 'total')
759 ('sbjct' is synonymous with 'hit')
760 Throws : n/a
761 Comments : If you need data for each HSP, use hsps() and then interate
762 : through each HSP object.
763 : This method requires that all HSPs be tiled. If they have not
764 : already been tiled, they will be tiled first automatically..
765 : Not relying on wantarray since that will fail in situations
766 : such as printf "%d", $hit->gaps() in which you might expect to
767 : be printing the total gaps, but evaluates to array context.
768
769 See Also : L<length_aln()|length_aln>
770
771 =cut
772
773 #----------
774 sub gaps {
775 #----------
776 my( $self, $seqType, $num ) = @_;
777
778 $seqType ||= (wantarray ? 'list' : 'total');
779 $seqType = 'sbjct' if $seqType eq 'hit';
780
781 Bio::Search::SearchUtils::tile_hsps($self) if not $self->{'_tiled_hsps'};
782
783 $seqType = lc($seqType);
784
785 if( defined $num ) {
786 $self->throw("Can't set gaps for seqType '$seqType'. Must be 'query' or 'hit'\n") unless ($seqType eq 'sbjct' or $seqType eq 'query');
787
788 return $self->{'_gaps_'.$seqType} = $num;
789 }
790 elsif($seqType =~ /list|array/i) {
791 return ($self->{'_gaps_query'}, $self->{'_gaps_sbjct'});
792 }
793 elsif($seqType eq 'total') {
794 return ($self->{'_gaps_query'} + $self->{'_gaps_sbjct'}) || 0;
795 } else {
796 return $self->{'_gaps_'.$seqType} || 0;
797 }
798 }
799
800
801 =head2 matches
802
803 See documentation in L<Bio::Search::Hit::HitI::matches()|Bio::Search::Hit::HitI>
804
805 =cut
806
807 #---------------
808 sub matches {
809 #---------------
810 my( $self, $arg1, $arg2) = @_;
811 my(@data,$data);
812
813 Bio::Search::SearchUtils::tile_hsps($self) if not $self->{'_tiled_hsps'};
814
815 if(!$arg1) {
816 @data = ($self->{'_totalIdentical'}, $self->{'_totalConserved'});
817
818 return @data if @data;
819
820 } else {
821
822 if( defined $arg2 ) {
823 $self->{'_totalIdentical'} = $arg1;
824 $self->{'_totalConserved'} = $arg2;
825 return ( $arg1, $arg2 );
826 }
827 elsif($arg1 =~ /^id/i) {
828 $data = $self->{'_totalIdentical'};
829 } else {
830 $data = $self->{'_totalConserved'};
831 }
832 return $data if $data;
833 }
834
835 ## Something went wrong if we make it to here.
836 $self->throw("Can't get identical or conserved data: no data.");
837 }
838
839
840 =head2 start
841
842 Usage : $sbjct->start( [seq_type] );
843 Purpose : Gets the start coordinate for the query, sbjct, or both sequences
844 : in the BlastHit object. If there is more than one HSP, the lowest start
845 : value of all HSPs is returned.
846 Example : $qbeg = $sbjct->start('query');
847 : $sbeg = $sbjct->start('hit');
848 : ($qbeg, $sbeg) = $sbjct->start();
849 Returns : scalar context: integer
850 : array context without args: list of two integers (queryStart, sbjctStart)
851 : Array context can be "induced" by providing an argument of 'list' or 'array'.
852 Argument : In scalar context: seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
853 ('sbjct' is synonymous with 'hit')
854 Throws : n/a
855 Comments : This method requires that all HSPs be tiled. If there is more than one
856 : HSP and they have not already been tiled, they will be tiled first automatically..
857 : Remember that the start and end coordinates of all HSPs are
858 : normalized so that start < end. Strand information can be
859 : obtained by calling $hit->strand().
860
861 See Also : L<end()|end>, L<range()|range>, L<strand()|strand>,
862 L<Bio::Search::HSP::BlastHSP::start|Bio::Search::HSP::BlastHSP>
863
864 =cut
865
866 #----------
867 sub start {
868 #----------
869 my ($self, $seqType, $num) = @_;
870
871 $seqType ||= (wantarray ? 'list' : 'query');
872 $seqType = 'sbjct' if $seqType eq 'hit';
873
874 if( defined $num ) {
875 $seqType = "_\L$seqType\E";
876 return $self->{$seqType.'Start'} = $num;
877 }
878
879 # If there is only one HSP, defer this call to the solitary HSP.
880 if($self->num_hsps == 1) {
881 return $self->hsp->start($seqType);
882 } else {
883 Bio::Search::SearchUtils::tile_hsps($self) if not $self->{'_tiled_hsps'};
884 if($seqType =~ /list|array/i) {
885 return ($self->{'_queryStart'}, $self->{'_sbjctStart'});
886 } else {
887 ## Sensitive to member name changes.
888 $seqType = "_\L$seqType\E";
889 return $self->{$seqType.'Start'};
890 }
891 }
892 }
893
894
895 =head2 end
896
897 Usage : $sbjct->end( [seq_type] );
898 Purpose : Gets the end coordinate for the query, sbjct, or both sequences
899 : in the BlastHit object. If there is more than one HSP, the largest end
900 : value of all HSPs is returned.
901 Example : $qend = $sbjct->end('query');
902 : $send = $sbjct->end('hit');
903 : ($qend, $send) = $sbjct->end();
904 Returns : scalar context: integer
905 : array context without args: list of two integers (queryEnd, sbjctEnd)
906 : Array context can be "induced" by providing an argument of 'list' or 'array'.
907 Argument : In scalar context: seq_type = 'query' or 'sbjct'
908 : (case insensitive). If not supplied, 'query' is used.
909 Throws : n/a
910 Comments : This method requires that all HSPs be tiled. If there is more than one
911 : HSP and they have not already been tiled, they will be tiled first automatically..
912 : Remember that the start and end coordinates of all HSPs are
913 : normalized so that start < end. Strand information can be
914 : obtained by calling $hit->strand().
915
916 See Also : L<start()|start>, L<range()|range>, L<strand()|strand>
917
918 =cut
919
920 #----------
921 sub end {
922 #----------
923 my ($self, $seqType, $num) = @_;
924
925 $seqType ||= (wantarray ? 'list' : 'query');
926 $seqType = 'sbjct' if $seqType eq 'hit';
927
928
929 if( defined $num ) {
930 $seqType = "_\L$seqType\E";
931 return $self->{$seqType.'Stop'} = $num;
932 }
933
934 # If there is only one HSP, defer this call to the solitary HSP.
935 if($self->num_hsps == 1) {
936 return $self->hsp->end($seqType);
937 } else {
938 Bio::Search::SearchUtils::tile_hsps($self) if not $self->{'_tiled_hsps'};
939 if($seqType =~ /list|array/i) {
940 return ($self->{'_queryStop'}, $self->{'_sbjctStop'});
941 } else {
942 ## Sensitive to member name changes.
943 $seqType = "_\L$seqType\E";
944 return $self->{$seqType.'Stop'};
945 }
946 }
947 }
948
949 =head2 range
950
951 Usage : $sbjct->range( [seq_type] );
952 Purpose : Gets the (start, end) coordinates for the query or sbjct sequence
953 : in the HSP alignment.
954 Example : ($qbeg, $qend) = $sbjct->range('query');
955 : ($sbeg, $send) = $sbjct->range('hit');
956 Returns : Two-element array of integers
957 Argument : seq_type = string, 'query' or 'hit' or 'sbjct' (default = 'query')
958 ('sbjct' is synonymous with 'hit')
959 Throws : n/a
960
961 See Also : L<start()|start>, L<end()|end>
962
963 =cut
964
965 #----------
966 sub range {
967 #----------
968 my ($self, $seqType) = @_;
969 $seqType ||= 'query';
970 $seqType = 'sbjct' if $seqType eq 'hit';
971 return ($self->start($seqType), $self->end($seqType));
972 }
973
974
975 =head2 frac_identical
976
977 Usage : $hit_object->frac_identical( [seq_type] );
978 Purpose : Get the overall fraction of identical positions across all HSPs.
979 : The number refers to only the aligned regions and does not
980 : account for unaligned regions in between the HSPs, if any.
981 Example : $frac_iden = $hit_object->frac_identical('query');
982 Returns : Float (2-decimal precision, e.g., 0.75).
983 Argument : seq_type: 'query' | 'hit' or 'sbjct' | 'total'
984 : default = 'query' (but see comments below).
985 : ('sbjct' is synonymous with 'hit')
986 Throws : n/a
987 Comments : Different versions of Blast report different values for the total
988 : length of the alignment. This is the number reported in the
989 : denominators in the stats section:
990 : "Identical = 34/120 Positives = 67/120".
991 : NCBI BLAST uses the total length of the alignment (with gaps)
992 : WU-BLAST uses the length of the query sequence (without gaps).
993 :
994 : Therefore, when called with an argument of 'total',
995 : this method will report different values depending on the
996 : version of BLAST used. Total does NOT take into account HSP
997 : tiling, so it should not be used.
998 :
999 : To get the fraction identical among only the aligned residues,
1000 : ignoring the gaps, call this method without an argument or
1001 : with an argument of 'query' or 'hit'.
1002 :
1003 : If you need data for each HSP, use hsps() and then iterate
1004 : through the HSP objects.
1005 : This method requires that all HSPs be tiled. If they have not
1006 : already been tiled, they will be tiled first automatically.
1007
1008 See Also : L<frac_conserved()|frac_conserved>, L<frac_aligned_query()|frac_aligned_query>, L<matches()|matches>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>
1009
1010 =cut
1011
1012 #------------------
1013 sub frac_identical {
1014 #------------------
1015 my ($self, $seqType) = @_;
1016 $seqType ||= 'query';
1017 $seqType = 'sbjct' if $seqType eq 'hit';
1018
1019 ## Sensitive to member name format.
1020 $seqType = lc($seqType);
1021
1022 Bio::Search::SearchUtils::tile_hsps($self) if not $self->{'_tiled_hsps'};
1023
1024 my $ident = $self->{'_totalIdentical'};
1025 my $total = $self->{'_length_aln_'.$seqType};
1026 my $ratio = $ident / $total;
1027 my $ratio_rounded = sprintf( "%.3f", $ratio);
1028
1029 # Round down iff normal rounding yields 1 (just like blast)
1030 $ratio_rounded = 0.999 if (($ratio_rounded == 1) && ($ratio < 1));
1031 return $ratio_rounded;
1032 }
1033
1034
1035
1036 =head2 frac_conserved
1037
1038 Usage : $hit_object->frac_conserved( [seq_type] );
1039 Purpose : Get the overall fraction of conserved positions across all HSPs.
1040 : The number refers to only the aligned regions and does not
1041 : account for unaligned regions in between the HSPs, if any.
1042 Example : $frac_cons = $hit_object->frac_conserved('hit');
1043 Returns : Float (2-decimal precision, e.g., 0.75).
1044 Argument : seq_type: 'query' | 'hit' or 'sbjct' | 'total'
1045 : default = 'query' (but see comments below).
1046 : ('sbjct' is synonymous with 'hit')
1047 Throws : n/a
1048 Comments : Different versions of Blast report different values for the total
1049 : length of the alignment. This is the number reported in the
1050 : denominators in the stats section:
1051 : "Positives = 34/120 Positives = 67/120".
1052 : NCBI BLAST uses the total length of the alignment (with gaps)
1053 : WU-BLAST uses the length of the query sequence (without gaps).
1054 :
1055 : Therefore, when called with an argument of 'total',
1056 : this method will report different values depending on the
1057 : version of BLAST used. Total does NOT take into account HSP
1058 : tiling, so it should not be used.
1059 :
1060 : To get the fraction conserved among only the aligned residues,
1061 : ignoring the gaps, call this method without an argument or
1062 : with an argument of 'query' or 'hit'.
1063 :
1064 : If you need data for each HSP, use hsps() and then interate
1065 : through the HSP objects.
1066 : This method requires that all HSPs be tiled. If they have not
1067 : already been tiled, they will be tiled first automatically.
1068
1069 See Also : L<frac_identical()|frac_identical>, L<matches()|matches>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>
1070
1071 =cut
1072
1073 #--------------------
1074 sub frac_conserved {
1075 #--------------------
1076 my ($self, $seqType) = @_;
1077 $seqType ||= 'query';
1078 $seqType = 'sbjct' if $seqType eq 'hit';
1079
1080 ## Sensitive to member name format.
1081 $seqType = lc($seqType);
1082
1083 Bio::Search::SearchUtils::tile_hsps($self) if not $self->{'_tiled_hsps'};
1084
1085 my $consv = $self->{'_totalConserved'};
1086 my $total = $self->{'_length_aln_'.$seqType};
1087 my $ratio = $consv / $total;
1088 my $ratio_rounded = sprintf( "%.3f", $ratio);
1089
1090 # Round down iff normal rounding yields 1 (just like blast)
1091 $ratio_rounded = 0.999 if (($ratio_rounded == 1) && ($ratio < 1));
1092 return $ratio_rounded;
1093 }
1094
1095
1096
1097
1098 =head2 frac_aligned_query
1099
1100 Usage : $hit_object->frac_aligned_query();
1101 Purpose : Get the fraction of the query sequence which has been aligned
1102 : across all HSPs (not including intervals between non-overlapping
1103 : HSPs).
1104 Example : $frac_alnq = $hit_object->frac_aligned_query();
1105 Returns : Float (2-decimal precision, e.g., 0.75).
1106 Argument : n/a
1107 Throws : n/a
1108 Comments : If you need data for each HSP, use hsps() and then interate
1109 : through the HSP objects.
1110 : To compute the fraction aligned, the logical length of the query
1111 : sequence is used, meaning that for [T]BLASTX reports, the
1112 : full length of the query sequence is converted into amino acids
1113 : by dividing by 3. This is necessary because of the way
1114 : the lengths of aligned sequences are computed.
1115 : This method requires that all HSPs be tiled. If they have not
1116 : already been tiled, they will be tiled first automatically.
1117
1118 See Also : L<frac_aligned_hit()|frac_aligned_hit>, L<logical_length()|logical_length>, L<length_aln()|length_aln>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>
1119
1120 =cut
1121
1122 #----------------------
1123 sub frac_aligned_query {
1124 #----------------------
1125 my $self = shift;
1126
1127 Bio::Search::SearchUtils::tile_hsps($self) if not $self->{'_tiled_hsps'};
1128
1129 sprintf( "%.2f", $self->{'_length_aln_query'}/
1130 $self->logical_length('query'));
1131 }
1132
1133
1134
1135 =head2 frac_aligned_hit
1136
1137 Usage : $hit_object->frac_aligned_hit();
1138 Purpose : Get the fraction of the hit (sbjct) sequence which has been aligned
1139 : across all HSPs (not including intervals between non-overlapping
1140 : HSPs).
1141 Example : $frac_alnq = $hit_object->frac_aligned_hit();
1142 Returns : Float (2-decimal precision, e.g., 0.75).
1143 Argument : n/a
1144 Throws : n/a
1145 Comments : If you need data for each HSP, use hsps() and then interate
1146 : through the HSP objects.
1147 : To compute the fraction aligned, the logical length of the sbjct
1148 : sequence is used, meaning that for TBLAST[NX] reports, the
1149 : full length of the sbjct sequence is converted into amino acids
1150 : by dividing by 3. This is necessary because of the way
1151 : the lengths of aligned sequences are computed.
1152 : This method requires that all HSPs be tiled. If they have not
1153 : already been tiled, they will be tiled first automatically.
1154
1155 See Also : L<frac_aligned_query()|frac_aligned_query>, L<matches()|matches>, , L<logical_length()|logical_length>, L<length_aln()|length_aln>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>
1156
1157 =cut
1158
1159 #--------------------
1160 sub frac_aligned_hit {
1161 #--------------------
1162 my $self = shift;
1163
1164 Bio::Search::SearchUtils::tile_hsps($self) if not $self->{'_tiled_hsps'};
1165
1166 sprintf( "%.2f", $self->{'_length_aln_sbjct'}/$self->logical_length('sbjct'));
1167 }
1168
1169
1170 ## These methods are being maintained for backward compatibility.
1171
1172 =head2 frac_aligned_sbjct
1173
1174 Same as L<frac_aligned_hit()|frac_aligned_hit>
1175
1176 =cut
1177
1178 #----------------
1179 sub frac_aligned_sbjct { my $self=shift; $self->frac_aligned_hit(@_); }
1180 #----------------
1181
1182 =head2 num_unaligned_sbjct
1183
1184 Same as L<num_unaligned_hit()|num_unaligned_hit>
1185
1186 =cut
1187
1188 #----------------
1189 sub num_unaligned_sbjct { my $self=shift; $self->num_unaligned_hit(@_); }
1190 #----------------
1191
1192
1193
1194 =head2 num_unaligned_hit
1195
1196 Usage : $hit_object->num_unaligned_hit();
1197 Purpose : Get the number of the unaligned residues in the hit sequence.
1198 : Sums across all all HSPs.
1199 Example : $num_unaln = $hit_object->num_unaligned_hit();
1200 Returns : Integer
1201 Argument : n/a
1202 Throws : n/a
1203 Comments : See notes regarding logical lengths in the comments for frac_aligned_hit().
1204 : They apply here as well.
1205 : If you need data for each HSP, use hsps() and then interate
1206 : through the HSP objects.
1207 : This method requires that all HSPs be tiled. If they have not
1208 : already been tiled, they will be tiled first automatically..
1209
1210 See Also : L<num_unaligned_query()|num_unaligned_query>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>, L<frac_aligned_hit()|frac_aligned_hit>
1211
1212 =cut
1213
1214 #---------------------
1215 sub num_unaligned_hit {
1216 #---------------------
1217 my $self = shift;
1218
1219 Bio::Search::SearchUtils::tile_hsps($self) if not $self->{'_tiled_hsps'};
1220
1221 my $num = $self->logical_length('sbjct') - $self->{'_length_aln_sbjct'};
1222 ($num < 0 ? 0 : $num );
1223 }
1224
1225
1226 =head2 num_unaligned_query
1227
1228 Usage : $hit_object->num_unaligned_query();
1229 Purpose : Get the number of the unaligned residues in the query sequence.
1230 : Sums across all all HSPs.
1231 Example : $num_unaln = $hit_object->num_unaligned_query();
1232 Returns : Integer
1233 Argument : n/a
1234 Throws : n/a
1235 Comments : See notes regarding logical lengths in the comments for frac_aligned_query().
1236 : They apply here as well.
1237 : If you need data for each HSP, use hsps() and then interate
1238 : through the HSP objects.
1239 : This method requires that all HSPs be tiled. If they have not
1240 : already been tiled, they will be tiled first automatically..
1241
1242 See Also : L<num_unaligned_hit()|num_unaligned_hit>, L<frac_aligned_query()|frac_aligned_query>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>
1243
1244 =cut
1245
1246 #-----------------------
1247 sub num_unaligned_query {
1248 #-----------------------
1249 my $self = shift;
1250
1251 Bio::Search::SearchUtils::tile_hsps($self) if not $self->{'_tiled_hsps'};
1252
1253 my $num = $self->logical_length('query') - $self->{'_length_aln_query'};
1254 ($num < 0 ? 0 : $num );
1255 }
1256
1257
1258
1259 =head2 seq_inds
1260
1261 Usage : $hit->seq_inds( seq_type, class, collapse );
1262 Purpose : Get a list of residue positions (indices) across all HSPs
1263 : for identical or conserved residues in the query or sbjct sequence.
1264 Example : @s_ind = $hit->seq_inds('query', 'identical');
1265 : @h_ind = $hit->seq_inds('hit', 'conserved');
1266 : @h_ind = $hit->seq_inds('hit', 'conserved', 1);
1267 Returns : Array of integers
1268 : May include ranges if collapse is non-zero.
1269 Argument : [0] seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
1270 : ('sbjct' is synonymous with 'hit')
1271 : [1] class = 'identical' or 'conserved' (default = 'identical')
1272 : (can be shortened to 'id' or 'cons')
1273 : (actually, anything not 'id' will evaluate to 'conserved').
1274 : [2] collapse = boolean, if non-zero, consecutive positions are merged
1275 : using a range notation, e.g., "1 2 3 4 5 7 9 10 11"
1276 : collapses to "1-5 7 9-11". This is useful for
1277 : consolidating long lists. Default = no collapse.
1278 Throws : n/a.
1279
1280 See Also : L<Bio::Search::HSP::BlastHSP::seq_inds()|Bio::Search::HSP::BlastHSP>
1281
1282 =cut
1283
1284 #-------------
1285 sub seq_inds {
1286 #-------------
1287 my ($self, $seqType, $class, $collapse) = @_;
1288
1289 $seqType ||= 'query';
1290 $class ||= 'identical';
1291 $collapse ||= 0;
1292
1293 $seqType = 'sbjct' if $seqType eq 'hit';
1294
1295 my (@inds, $hsp);
1296 foreach $hsp ($self->hsps) {
1297 # This will merge data for all HSPs together.
1298 push @inds, $hsp->seq_inds($seqType, $class);
1299 }
1300
1301 # Need to remove duplicates and sort the merged positions.
1302 if(@inds) {
1303 my %tmp = map { $_, 1 } @inds;
1304 @inds = sort {$a <=> $b} keys %tmp;
1305 }
1306
1307 $collapse ? &Bio::Search::SearchUtils::collapse_nums(@inds) : @inds;
1308 }
1309
1310
1311 =head2 strand
1312
1313 See documentation in L<Bio::Search::SearchUtils::strand()|Bio::Search::SearchUtils>
1314
1315 =cut
1316
1317 #----------'
1318 sub strand {
1319 #----------
1320 my ($self, $seqType, $strnd) = @_;
1321
1322 Bio::Search::SearchUtils::tile_hsps($self) if not $self->{'_tiled_hsps'};
1323
1324 $seqType ||= (wantarray ? 'list' : 'query');
1325 $seqType = 'sbjct' if $seqType eq 'hit';
1326
1327 $seqType = lc($seqType);
1328
1329 if( defined $strnd ) {
1330 $self->throw("Can't set strand for seqType '$seqType'. Must be 'query' or 'hit'\n") unless ($seqType eq 'sbjct' or $seqType eq 'query');
1331
1332 return $self->{'_strand_'.$seqType} = $strnd;
1333 }
1334
1335 my ($qstr, $hstr);
1336 # If there is only one HSP, defer this call to the solitary HSP.
1337 if($self->num_hsps == 1) {
1338 return $self->hsp->strand($seqType);
1339 }
1340 elsif( defined $self->{'_strand_query'}) {
1341 # Get the data computed during hsp tiling.
1342 $qstr = $self->{'_strand_query'};
1343 $hstr = $self->{'_strand_sbjct'}
1344 }
1345 else {
1346 # otherwise, iterate through all HSPs collecting strand info.
1347 # This will return the string "-1/1" if there are HSPs on different strands.
1348 # NOTE: This was the pre-10/21/02 procedure which will no longer be used,
1349 # (unless the above elsif{} is commented out).
1350 my (%qstr, %hstr);
1351 foreach my $hsp( $self->hsps ) {
1352 my ( $q, $h ) = $hsp->strand();
1353 $qstr{ $q }++;
1354 $hstr{ $h }++;
1355 }
1356 $qstr = join( '/', sort keys %qstr);
1357 $hstr = join( '/', sort keys %hstr);
1358 }
1359
1360 if($seqType =~ /list|array/i) {
1361 return ($qstr, $hstr);
1362 } elsif( $seqType eq 'query' ) {
1363 return $qstr;
1364 } else {
1365 return $hstr;
1366 }
1367 }
1368
1369 =head2 frame
1370
1371 See documentation in L<Bio::Search::SearchUtils::frame()|Bio::Search::SearchUtils>
1372
1373 =cut
1374
1375 #----------'
1376 sub frame {
1377 #----------
1378 my( $self, $frm ) = @_;
1379
1380 Bio::Search::SearchUtils::tile_hsps($self) if not $self->{'_tiled_hsps'};
1381
1382 if( defined $frm ) {
1383 return $self->{'_frame'} = $frm;
1384 }
1385
1386 # The check for $self->{'_frame'} is a remnant from the 'query' mode days
1387 # in which the sbjct object would collect data from the description line only.
1388
1389 my ($frame);
1390 if(not defined($self->{'_frame'})) {
1391 $frame = $self->hsp->frame;
1392 } else {
1393 $frame = $self->{'_frame'};
1394 }
1395 return $frame;
1396 }
1397
1398 =head2 rank
1399
1400 Title : rank
1401 Usage : $obj->rank($newval)
1402 Function: Get/Set the rank of this Hit in the Query search list
1403 i.e. this is the Nth hit for a specific query
1404 Returns : value of rank
1405 Args : newvalue (optional)
1406
1407
1408 =cut
1409
1410 sub rank{
1411 my $self = shift;
1412 return $self->{'_rank'} = shift if @_;
1413 return $self->{'_rank'} || 1;
1414 }
1415
1416 =head2 locus
1417
1418 Title : locus
1419 Usage : $locus = $hit->locus();
1420 Function: Retrieve the locus (if available) for the hit
1421 Returns : a scalar string (empty string if not set)
1422 Args : none
1423
1424 =cut
1425
1426 sub locus {
1427 my ($self,$value) = @_;
1428 my $previous = $self->{'_locus'};
1429 if( defined $value || ! defined $previous ) {
1430 unless (defined $value) {
1431 if ($self->{'_name'} =~/(gb|emb|dbj|ref)\|(.*)\|(.*)/) {
1432 $value = $previous = $3;
1433 } else {
1434 $value = $previous = '';
1435 }
1436 }
1437 $self->{'_locus'} = $value;
1438 }
1439 return $previous;
1440 }
1441
1442 =head2 each_accession_number
1443
1444 Title : each_accession_number
1445 Usage : @each_accession_number = $hit->each_accession_number();
1446 Function: Get each accession number listed in the description of the hit.
1447 If there are no alternatives, then only the primary accession will
1448 be given
1449 Returns : list of all accession numbers in the description
1450 Args : none
1451
1452 =cut
1453
1454 sub each_accession_number {
1455 my ($self,$value) = @_;
1456 my $desc = $self->{'_description'};
1457 #put primary accnum on the list
1458 my @accnums;
1459 push (@accnums,$self->{'_accession'});
1460 if( defined $desc ) {
1461 while ($desc =~ /(\b\S+\|\S*\|\S*\s?)/g) {
1462 my $id = $1;
1463 my ($acc, $version);
1464 if ($id =~ /(gb|emb|dbj|sp|pdb|bbs|ref|lcl)\|(.*)\|(.*)/) {
1465 ($acc, $version) = split /\./, $2;
1466 } elsif ($id =~ /(pir|prf|pat|gnl)\|(.*)\|(.*)/) {
1467 ($acc, $version) = split /\./, $3;
1468 } else {
1469 #punt, not matching the db's at ftp://ftp.ncbi.nih.gov/blast/db/README
1470 #Database Name Identifier Syntax
1471 #============================ ========================
1472 #GenBank gb|accession|locus
1473 #EMBL Data Library emb|accession|locus
1474 #DDBJ, DNA Database of Japan dbj|accession|locus
1475 #NBRF PIR pir||entry
1476 #Protein Research Foundation prf||name
1477 #SWISS-PROT sp|accession|entry name
1478 #Brookhaven Protein Data Bank pdb|entry|chain
1479 #Patents pat|country|number
1480 #GenInfo Backbone Id bbs|number
1481 #General database identifier gnl|database|identifier
1482 #NCBI Reference Sequence ref|accession|locus
1483 #Local Sequence identifier lcl|identifier
1484 $acc=$id;
1485 }
1486 push(@accnums, $acc);
1487 }
1488 }
1489 return @accnums;
1490 }
1491
1492 =head2 tiled_hsps
1493
1494 See documentation in L<Bio::Search::SearchUtils::tiled_hsps()|Bio::Search::SearchUtils>
1495
1496 =cut
1497
1498 sub tiled_hsps {
1499 my $self = shift;
1500 return $self->{'_tiled_hsps'} = shift if @_;
1501 return $self->{'_tiled_hsps'};
1502 }
1503
1504 1;