comparison variant_effect_predictor/Bio/Search/HSP/HSPI.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 #-----------------------------------------------------------------
2 # $Id: HSPI.pm,v 1.21.2.1 2003/01/22 22:51:00 jason Exp $
3 #
4 # BioPerl module for Bio::Search::HSP::HSPI
5 #
6 # Cared for by Steve Chervitz <sac@bioperl.org>
7 # and Jason Stajich <jason@bioperl.org>
8 #
9 # You may distribute this module under the same terms as perl itself
10 #-----------------------------------------------------------------
11
12 # POD documentation - main docs before the code
13
14 =head1 NAME
15
16 Bio::Search::HSP::HSPI - Interface for a High Scoring Pair in a similarity search result
17
18 =head1 SYNOPSIS
19
20 $r_type = $hsp->algorithm
21
22 $pvalue = $hsp->pvalue();
23
24 $evalue = $hsp->evalue();
25
26 $frac_id = $hsp->frac_identical( ['query'|'hit'|'total'] );
27
28 $frac_cons = $hsp->frac_conserved( ['query'|'hit'|'total'] );
29
30 $gaps = $hsp->gaps( ['query'|'hit'|'total'] );
31
32 $qseq = $hsp->query_string;
33
34 $hseq = $hsp->hit_string;
35
36 $homology_string = $hsp->homology_string;
37
38 $len = $hsp->length( ['query'|'hit'|'total'] );
39
40 $rank = $hsp->rank;
41
42 =head1 DESCRIPTION
43
44 Bio::Search::HSP::HSPI objects cannot be instantiated since this
45 module defines a pure interface.
46
47 Given an object that implements the Bio::Search::HSP::HSPI interface,
48 you can do the following things with it:
49
50 =head1 SEE ALSO
51
52 This interface inherits methods from these other modules:
53
54 L<Bio::SeqFeatureI>,
55 L<Bio::SeqFeature::FeaturePair>
56 L<Bio::SeqFeature::SimilarityPair>
57
58 Please refer to these modules for documentation of the
59 many additional inherited methods.
60
61 =head1 FEEDBACK
62
63 =head2 Mailing Lists
64
65 User feedback is an integral part of the evolution of this and other
66 Bioperl modules. Send your comments and suggestions preferably to
67 the Bioperl mailing list. Your participation is much appreciated.
68
69 bioperl-l@bioperl.org - General discussion
70 http://bioperl.org/MailList.shtml - About the mailing lists
71
72 =head2 Reporting Bugs
73
74 Report bugs to the Bioperl bug tracking system to help us keep track
75 of the bugs and their resolution. Bug reports can be submitted via
76 email or the web:
77
78 bioperl-bugs@bioperl.org
79 http://bugzilla.bioperl.org/
80
81 =head1 AUTHOR - Steve Chervitz, Jason Stajich
82
83 Email sac@bioperl.org
84 Email jason@bioperl.org
85
86 =head1 COPYRIGHT
87
88 Copyright (c) 2001 Steve Chervitz, Jason Stajich. All Rights Reserved.
89
90 =head1 DISCLAIMER
91
92 This software is provided "as is" without warranty of any kind.
93
94 =head1 APPENDIX
95
96 The rest of the documentation details each of the object methods.
97 Internal methods are usually preceded with a _
98
99 =cut
100
101
102 # Let the code begin...
103
104
105 package Bio::Search::HSP::HSPI;
106 use vars qw(@ISA);
107
108 use Bio::Root::RootI;
109 use Bio::SeqFeature::SimilarityPair;
110
111 use strict;
112 use Carp;
113
114 @ISA = qw(Bio::SeqFeature::SimilarityPair Bio::Root::RootI);
115
116
117 =head2 algorithm
118
119 Title : algorithm
120 Usage : my $r_type = $hsp->algorithm
121 Function: Obtain the name of the algorithm used to obtain the HSP
122 Returns : string (e.g., BLASTP)
123 Args : none
124
125 =cut
126
127 sub algorithm{
128 my ($self,@args) = @_;
129 $self->throw_not_implemented;
130 }
131
132 =head2 pvalue
133
134 Title : pvalue
135 Usage : my $pvalue = $hsp->pvalue();
136 Function: Returns the P-value for this HSP or undef
137 Returns : float or exponential (2e-10)
138 P-value is not defined with NCBI Blast2 reports.
139 Args : none
140
141 =cut
142
143 sub pvalue {
144 my ($self) = @_;
145 $self->throw_not_implemented;
146 }
147
148 =head2 evalue
149
150 Title : evalue
151 Usage : my $evalue = $hsp->evalue();
152 Function: Returns the e-value for this HSP
153 Returns : float or exponential (2e-10)
154 Args : none
155
156 =cut
157
158 sub evalue {
159 my ($self) = @_;
160 $self->throw_not_implemented;
161 }
162
163 =head2 frac_identical
164
165 Title : frac_identical
166 Usage : my $frac_id = $hsp->frac_identical( ['query'|'hit'|'total'] );
167 Function: Returns the fraction of identitical positions for this HSP
168 Returns : Float in range 0.0 -> 1.0
169 Args : 'query' = num identical / length of query seq (without gaps)
170 'hit' = num identical / length of hit seq (without gaps)
171 'total' = num identical / length of alignment (with gaps)
172 default = 'total'
173
174 =cut
175
176 sub frac_identical {
177 my ($self, $type) = @_;
178 $self->throw_not_implemented;
179 }
180
181 =head2 frac_conserved
182
183 Title : frac_conserved
184 Usage : my $frac_cons = $hsp->frac_conserved( ['query'|'hit'|'total'] );
185 Function : Returns the fraction of conserved positions for this HSP.
186 This is the fraction of symbols in the alignment with a
187 positive score.
188 Returns : Float in range 0.0 -> 1.0
189 Args : 'query' = num conserved / length of query seq (without gaps)
190 'hit' = num conserved / length of hit seq (without gaps)
191 'total' = num conserved / length of alignment (with gaps)
192 default = 'total'
193
194 =cut
195
196 sub frac_conserved {
197 my ($self, $type) = @_;
198 $self->throw_not_implemented;
199 }
200
201 =head2 num_identical
202
203 Title : num_identical
204 Usage : $obj->num_identical($newval)
205 Function: returns the number of identical residues in the alignment
206 Returns : integer
207 Args : integer (optional)
208
209
210 =cut
211
212 sub num_identical{
213 shift->throw_not_implemented;
214 }
215
216 =head2 num_conserved
217
218 Title : num_conserved
219 Usage : $obj->num_conserved($newval)
220 Function: returns the number of conserved residues in the alignment
221 Returns : inetger
222 Args : integer (optional)
223
224
225 =cut
226
227 sub num_conserved{
228 shift->throw_not_implemented();
229 }
230
231 =head2 gaps
232
233 Title : gaps
234 Usage : my $gaps = $hsp->gaps( ['query'|'hit'|'total'] );
235 Function : Get the number of gaps in the query, hit, or total alignment.
236 Returns : Integer, number of gaps or 0 if none
237 Args : 'query' = num conserved / length of query seq (without gaps)
238 'hit' = num conserved / length of hit seq (without gaps)
239 'total' = num conserved / length of alignment (with gaps)
240 default = 'total'
241
242 =cut
243
244 sub gaps {
245 my ($self, $type) = @_;
246 $self->throw_not_implemented;
247 }
248
249 =head2 query_string
250
251 Title : query_string
252 Usage : my $qseq = $hsp->query_string;
253 Function: Retrieves the query sequence of this HSP as a string
254 Returns : string
255 Args : none
256
257
258 =cut
259
260 sub query_string{
261 my ($self) = @_;
262 $self->throw_not_implemented;
263 }
264
265 =head2 hit_string
266
267 Title : hit_string
268 Usage : my $hseq = $hsp->hit_string;
269 Function: Retrieves the hit sequence of this HSP as a string
270 Returns : string
271 Args : none
272
273
274 =cut
275
276 sub hit_string{
277 my ($self) = @_;
278 $self->throw_not_implemented;
279 }
280
281 =head2 homology_string
282
283 Title : homology_string
284 Usage : my $homo_string = $hsp->homology_string;
285 Function: Retrieves the homology sequence for this HSP as a string.
286 : The homology sequence is the string of symbols in between the
287 : query and hit sequences in the alignment indicating the degree
288 : of conservation (e.g., identical, similar, not similar).
289 Returns : string
290 Args : none
291
292 =cut
293
294 sub homology_string{
295 my ($self) = @_;
296 $self->throw_not_implemented;
297 }
298
299 =head2 length
300
301 Title : length
302 Usage : my $len = $hsp->length( ['query'|'hit'|'total'] );
303 Function : Returns the length of the query or hit in the alignment (without gaps)
304 or the aggregate length of the HSP (including gaps;
305 this may be greater than either hit or query )
306 Returns : integer
307 Args : 'query' = length of query seq (without gaps)
308 'hit' = length of hit seq (without gaps)
309 'total' = length of alignment (with gaps)
310 default = 'total'
311 Args : none
312
313 =cut
314
315 sub length{
316 shift->throw_not_implemented();
317 }
318
319 =head2 percent_identity
320
321 Title : percent_identity
322 Usage : my $percentid = $hsp->percent_identity()
323 Function: Returns the calculated percent identity for an HSP
324 Returns : floating point between 0 and 100
325 Args : none
326
327
328 =cut
329
330 sub percent_identity{
331 my ($self) = @_;
332 return $self->frac_identical('hsp') * 100;
333 }
334
335 =head2 get_aln
336
337 Title : get_aln
338 Usage : my $aln = $hsp->gel_aln
339 Function: Returns a Bio::SimpleAlign representing the HSP alignment
340 Returns : Bio::SimpleAlign
341 Args : none
342
343 =cut
344
345 sub get_aln {
346 my ($self) = @_;
347 $self->throw_not_implemented;
348 }
349
350
351 =head2 seq_inds
352
353 Title : seq_inds
354 Purpose : Get a list of residue positions (indices) for all identical
355 : or conserved residues in the query or sbjct sequence.
356 Example : @s_ind = $hsp->seq_inds('query', 'identical');
357 : @h_ind = $hsp->seq_inds('hit', 'conserved');
358 : @h_ind = $hsp->seq_inds('hit', 'conserved', 1);
359 Returns : List of integers
360 : May include ranges if collapse is true.
361 Argument : seq_type = 'query' or 'hit' or 'sbjct' (default = query)
362 ('sbjct' is synonymous with 'hit')
363 class = 'identical' or 'conserved' or 'nomatch' or 'gap'
364 (default = identical)
365 (can be shortened to 'id' or 'cons')
366
367 collapse = boolean, if true, consecutive positions are merged
368 using a range notation, e.g., "1 2 3 4 5 7 9 10 11"
369 collapses to "1-5 7 9-11". This is useful for
370 consolidating long lists. Default = no collapse.
371 Throws : n/a.
372 Comments :
373
374 See Also : L<Bio::Search::BlastUtils::collapse_nums()|Bio::Search::BlastUtils>, L<Bio::Search::Hit::HitI::seq_inds()|Bio::Search::Hit::HitI>
375
376 =cut
377
378 sub seq_inds {
379 shift->throw_not_implemented();
380 }
381
382 =head2 Inherited from Bio::SeqFeature::SimilarityPair
383
384 These methods come from Bio::SeqFeature::SimilarityPair
385
386 =head2 query
387
388 Title : query
389 Usage : my $query = $hsp->query
390 Function: Returns a SeqFeature representing the query in the HSP
391 Returns : Bio::SeqFeature::Similarity
392 Args : [optional] new value to set
393
394
395 =head2 hit
396
397 Title : hit
398 Usage : my $hit = $hsp->hit
399 Function: Returns a SeqFeature representing the hit in the HSP
400 Returns : Bio::SeqFeature::Similarity
401 Args : [optional] new value to set
402
403
404 =head2 significance
405
406 Title : significance
407 Usage : $evalue = $obj->significance();
408 $obj->significance($evalue);
409 Function: Get/Set the significance value (see Bio::SeqFeature::SimilarityPair)
410 Returns : significance value (scientific notation string)
411 Args : significance value (sci notation string)
412
413
414 =head2 score
415
416 Title : score
417 Usage : my $score = $hsp->score();
418 Function: Returns the score for this HSP or undef
419 Returns : numeric
420 Args : [optional] numeric to set value
421
422 =head2 bits
423
424 Title : bits
425 Usage : my $bits = $hsp->bits();
426 Function: Returns the bit value for this HSP or undef
427 Returns : numeric
428 Args : none
429
430 =cut
431
432 # override
433
434 =head2 strand
435
436 Title : strand
437 Usage : $hsp->strand('query')
438 Function: Retrieves the strand for the HSP component requested
439 Returns : +1 or -1 (0 if unknown)
440 Args : 'hit' or 'subject' or 'sbjct' to retrieve the strand of the subject
441 'query' to retrieve the query strand (default)
442 'list' or 'array' to retreive both query and hit together
443
444 =cut
445
446 sub strand {
447 my $self = shift;
448 my $val = shift;
449 $val = 'query' unless defined $val;
450 $val =~ s/^\s+//;
451
452 if( $val =~ /^q/i ) {
453 return $self->query->strand(shift);
454 } elsif( $val =~ /^(hi|s)/i ) {
455 return $self->hit->strand(shift);
456 } elsif ( $val =~ m/^(list|array)/) {
457 return ($self->query->strand(shift), $self->hit->strand(shift));
458 } else {
459 $self->warn("unrecognized component $val requested\n");
460 }
461 return 0;
462 }
463
464 =head2 start
465
466 Title : start
467 Usage : $hsp->start('query')
468 Function: Retrieves the start for the HSP component requested
469 Returns : integer
470 Args : 'hit' or 'subject' or 'sbjct' to retrieve the start of the subject
471 'query' to retrieve the query start (default)
472
473 =cut
474
475 sub start {
476 my $self = shift;
477 my $val = shift;
478 $val = 'query' unless defined $val;
479 $val =~ s/^\s+//;
480
481 if( $val =~ /^q/i ) {
482 return $self->query->start(shift);
483 } elsif( $val =~ /^(hi|s)/i ) {
484 return $self->hit->start(shift);
485 } else {
486 $self->warn("unrecognized component $val requested\n");
487 }
488 return 0;
489 }
490
491 =head2 end
492
493 Title : end
494 Usage : $hsp->end('query')
495 Function: Retrieves the end for the HSP component requested
496 Returns : integer
497 Args : 'hit' or 'subject' or 'sbjct' to retrieve the end of the subject
498 'query' to retrieve the query end (default)
499
500 =cut
501
502 sub end {
503 my $self = shift;
504 my $val = shift;
505 $val = 'query' unless defined $val;
506 $val =~ s/^\s+//;
507
508 if( $val =~ /^q/i ) {
509 return $self->query->end(shift);
510 } elsif( $val =~ /^(hi|s)/i ) {
511 return $self->hit->end(shift);
512 } else {
513 $self->warn("unrecognized component $val requested\n");
514 }
515 return 0;
516 }
517
518 =head2 seq_str
519
520 Usage : $hsp->seq_str( seq_type );
521 Purpose : Get the full query, sbjct, or 'match' sequence as a string.
522 : The 'match' sequence is the string of symbols in between the
523 : query and sbjct sequences.
524 Example : $str = $hsp->seq_str('query');
525 Returns : String
526 Argument : seq_Type = 'query' or 'hit' or 'sbjct' or 'match'
527 : ('sbjct' is synonymous with 'hit')
528 : default is 'query'
529 Throws : Exception if the argument does not match an accepted seq_type.
530 Comments :
531
532 See Also : L<seq()|seq>, L<seq_inds()|seq_inds>, B<_set_match_seq()>
533
534 =cut
535
536 sub seq_str {
537 my $self = shift;
538 my $type = shift;
539 if( $type =~ /^q/i ) { return $self->query_string(shift) }
540 elsif( $type =~ /^(s|hi)/i ) { return $self->hit_string(shift)}
541 elsif ( $type =~ /^(ho|ma)/i ) { return $self->hit_string(shift) }
542 else {
543 $self->warn("unknown sequence type $type");
544 }
545 return '';
546 }
547
548
549 =head2 rank
550
551 Usage : $hsp->rank( [string] );
552 Purpose : Get the rank of the HSP within a given Blast hit.
553 Example : $rank = $hsp->rank;
554 Returns : Integer (1..n) corresponding to the order in which the HSP
555 appears in the BLAST report.
556
557 =cut
558
559 sub rank { shift->throw_not_implemented }
560
561 =head2 matches
562
563 Usage : $hsp->matches([seq_type], [start], [stop]);
564 Purpose : Get the total number of identical and conservative matches
565 : in the query or sbjct sequence for the given HSP. Optionally can
566 : report data within a defined interval along the seq.
567 : (Note: 'conservative' matches are called 'positives' in the
568 : Blast report.)
569 Example : ($id,$cons) = $hsp_object->matches('hit');
570 : ($id,$cons) = $hsp_object->matches('query',300,400);
571 Returns : 2-element array of integers
572 Argument : (1) seq_type = 'query' or 'hit' or 'sbjct' (default = query)
573 : ('sbjct' is synonymous with 'hit')
574 : (2) start = Starting coordinate (optional)
575 : (3) stop = Ending coordinate (optional)
576 Throws : Exception if the supplied coordinates are out of range.
577 Comments : Relies on seq_str('match') to get the string of alignment symbols
578 : between the query and sbjct lines which are used for determining
579 : the number of identical and conservative matches.
580
581 See Also : L<length()|length>, L<gaps()|gaps>, L<seq_str()|seq_str>, L<Bio::Search::Hit::BlastHit::_adjust_contigs()|Bio::Search::Hit::BlastHit>
582
583 =cut
584
585 #-----------
586 sub matches {
587 #-----------
588 my( $self, %param ) = @_;
589 my(@data);
590 my($seqType, $beg, $end) = ($param{-SEQ}, $param{-START}, $param{-STOP});
591 $seqType ||= 'query';
592 $seqType = 'sbjct' if $seqType eq 'hit';
593
594 if(!defined $beg && !defined $end) {
595 ## Get data for the whole alignment.
596 push @data, ($self->num_identical, $self->num_conserved);
597 } else {
598 ## Get the substring representing the desired sub-section of aln.
599 $beg ||= 0;
600 $end ||= 0;
601 my($start,$stop) = $self->range($seqType);
602 if($beg == 0) { $beg = $start; $end = $beg+$end; }
603 elsif($end == 0) { $end = $stop; $beg = $end-$beg; }
604
605 if($end >= $stop) { $end = $stop; } ##ML changed from if (end >stop)
606 else { $end += 1;} ##ML moved from commented position below, makes
607 ##more sense here
608 # if($end > $stop) { $end = $stop; }
609 if($beg < $start) { $beg = $start; }
610 # else { $end += 1;}
611
612 # my $seq = substr($self->seq_str('match'), $beg-$start, ($end-$beg));
613
614 ## ML: START fix for substr out of range error ------------------
615 my $seq = "";
616 if (($self->algorithm eq 'TBLASTN') and ($seqType eq 'sbjct'))
617 {
618 $seq = substr($self->seq_str('match'),
619 int(($beg-$start)/3), int(($end-$beg+1)/3));
620
621 } elsif (($self->algorithm eq 'BLASTX') and ($seqType eq 'query'))
622 {
623 $seq = substr($self->seq_str('match'),
624 int(($beg-$start)/3), int(($end-$beg+1)/3));
625 } else {
626 $seq = substr($self->seq_str('match'),
627 $beg-$start, ($end-$beg));
628 }
629 ## ML: End of fix for substr out of range error -----------------
630
631
632 ## ML: debugging code
633 ## This is where we get our exception. Try printing out the values going
634 ## into this:
635 ##
636 # print STDERR
637 # qq(*------------MY EXCEPTION --------------------\nSeq: ") ,
638 # $self->seq_str("$seqType"), qq("\n),$self->rank,",( index:";
639 # print STDERR $beg-$start, ", len: ", $end-$beg," ), (HSPRealLen:",
640 # CORE::length $self->seq_str("$seqType");
641 # print STDERR ", HSPCalcLen: ", $stop - $start +1 ," ),
642 # ( beg: $beg, end: $end ), ( start: $start, stop: stop )\n";
643 ## ML: END DEBUGGING CODE----------
644
645 if(!CORE::length $seq) {
646 $self->throw("Undefined sub-sequence ($beg,$end). Valid range = $start - $stop");
647 }
648 ## Get data for a substring.
649 # printf "Collecting HSP subsection data: beg,end = %d,%d; start,stop = %d,%d\n%s<---\n", $beg, $end, $start, $stop, $seq;
650 # printf "Original match seq:\n%s\n",$self->seq_str('match');
651 $seq =~ s/ //g; # remove space (no info).
652 my $len_cons = CORE::length $seq;
653 $seq =~ s/\+//g; # remove '+' characters (conservative substitutions)
654 my $len_id = CORE::length $seq;
655 push @data, ($len_id, $len_cons);
656 # printf " HSP = %s\n id = %d; cons = %d\n", $self->rank, $len_id, $len_cons; <STDIN>;
657 }
658 @data;
659 }
660
661 =head2 n
662
663 Usage : $hsp_obj->n()
664 Purpose : Get the N value (num HSPs on which P/Expect is based).
665 : This value is not defined with NCBI Blast2 with gapping.
666 Returns : Integer or null string if not defined.
667 Argument : n/a
668 Throws : n/a
669 Comments : The 'N' value is listed in parenthesis with P/Expect value:
670 : e.g., P(3) = 1.2e-30 ---> (N = 3).
671 : Not defined in NCBI Blast2 with gaps.
672 : This typically is equal to the number of HSPs but not always.
673 : To obtain the number of HSPs, use Bio::Search::Hit::HitI::num_hsps().
674
675 See Also : L<Bio::SeqFeature::SimilarityPair::score()|Bio::SeqFeature::SimilarityPair>
676
677 =cut
678
679 sub n { shift->throw_not_implemented }
680
681 =head2 range
682
683 Usage : $hsp->range( [seq_type] );
684 Purpose : Gets the (start, end) coordinates for the query or sbjct sequence
685 : in the HSP alignment.
686 Example : ($query_beg, $query_end) = $hsp->range('query');
687 : ($hit_beg, $hit_end) = $hsp->range('hit');
688 Returns : Two-element array of integers
689 Argument : seq_type = string, 'query' or 'hit' or 'sbjct' (default = 'query')
690 : ('sbjct' is synonymous with 'hit')
691 Throws : n/a
692 Comments : This is a convenience method for constructions such as
693 ($hsp->query->start, $hsp->query->end)
694
695 =cut
696
697 sub range { shift->throw_not_implemented }
698
699
700 1;
701