0
|
1 #-----------------------------------------------------------------
|
|
2 # $Id: HSPI.pm,v 1.21.2.1 2003/01/22 22:51:00 jason Exp $
|
|
3 #
|
|
4 # BioPerl module for Bio::Search::HSP::HSPI
|
|
5 #
|
|
6 # Cared for by Steve Chervitz <sac@bioperl.org>
|
|
7 # and Jason Stajich <jason@bioperl.org>
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10 #-----------------------------------------------------------------
|
|
11
|
|
12 # POD documentation - main docs before the code
|
|
13
|
|
14 =head1 NAME
|
|
15
|
|
16 Bio::Search::HSP::HSPI - Interface for a High Scoring Pair in a similarity search result
|
|
17
|
|
18 =head1 SYNOPSIS
|
|
19
|
|
20 $r_type = $hsp->algorithm
|
|
21
|
|
22 $pvalue = $hsp->pvalue();
|
|
23
|
|
24 $evalue = $hsp->evalue();
|
|
25
|
|
26 $frac_id = $hsp->frac_identical( ['query'|'hit'|'total'] );
|
|
27
|
|
28 $frac_cons = $hsp->frac_conserved( ['query'|'hit'|'total'] );
|
|
29
|
|
30 $gaps = $hsp->gaps( ['query'|'hit'|'total'] );
|
|
31
|
|
32 $qseq = $hsp->query_string;
|
|
33
|
|
34 $hseq = $hsp->hit_string;
|
|
35
|
|
36 $homology_string = $hsp->homology_string;
|
|
37
|
|
38 $len = $hsp->length( ['query'|'hit'|'total'] );
|
|
39
|
|
40 $rank = $hsp->rank;
|
|
41
|
|
42 =head1 DESCRIPTION
|
|
43
|
|
44 Bio::Search::HSP::HSPI objects cannot be instantiated since this
|
|
45 module defines a pure interface.
|
|
46
|
|
47 Given an object that implements the Bio::Search::HSP::HSPI interface,
|
|
48 you can do the following things with it:
|
|
49
|
|
50 =head1 SEE ALSO
|
|
51
|
|
52 This interface inherits methods from these other modules:
|
|
53
|
|
54 L<Bio::SeqFeatureI>,
|
|
55 L<Bio::SeqFeature::FeaturePair>
|
|
56 L<Bio::SeqFeature::SimilarityPair>
|
|
57
|
|
58 Please refer to these modules for documentation of the
|
|
59 many additional inherited methods.
|
|
60
|
|
61 =head1 FEEDBACK
|
|
62
|
|
63 =head2 Mailing Lists
|
|
64
|
|
65 User feedback is an integral part of the evolution of this and other
|
|
66 Bioperl modules. Send your comments and suggestions preferably to
|
|
67 the Bioperl mailing list. Your participation is much appreciated.
|
|
68
|
|
69 bioperl-l@bioperl.org - General discussion
|
|
70 http://bioperl.org/MailList.shtml - About the mailing lists
|
|
71
|
|
72 =head2 Reporting Bugs
|
|
73
|
|
74 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
75 of the bugs and their resolution. Bug reports can be submitted via
|
|
76 email or the web:
|
|
77
|
|
78 bioperl-bugs@bioperl.org
|
|
79 http://bugzilla.bioperl.org/
|
|
80
|
|
81 =head1 AUTHOR - Steve Chervitz, Jason Stajich
|
|
82
|
|
83 Email sac@bioperl.org
|
|
84 Email jason@bioperl.org
|
|
85
|
|
86 =head1 COPYRIGHT
|
|
87
|
|
88 Copyright (c) 2001 Steve Chervitz, Jason Stajich. All Rights Reserved.
|
|
89
|
|
90 =head1 DISCLAIMER
|
|
91
|
|
92 This software is provided "as is" without warranty of any kind.
|
|
93
|
|
94 =head1 APPENDIX
|
|
95
|
|
96 The rest of the documentation details each of the object methods.
|
|
97 Internal methods are usually preceded with a _
|
|
98
|
|
99 =cut
|
|
100
|
|
101
|
|
102 # Let the code begin...
|
|
103
|
|
104
|
|
105 package Bio::Search::HSP::HSPI;
|
|
106 use vars qw(@ISA);
|
|
107
|
|
108 use Bio::Root::RootI;
|
|
109 use Bio::SeqFeature::SimilarityPair;
|
|
110
|
|
111 use strict;
|
|
112 use Carp;
|
|
113
|
|
114 @ISA = qw(Bio::SeqFeature::SimilarityPair Bio::Root::RootI);
|
|
115
|
|
116
|
|
117 =head2 algorithm
|
|
118
|
|
119 Title : algorithm
|
|
120 Usage : my $r_type = $hsp->algorithm
|
|
121 Function: Obtain the name of the algorithm used to obtain the HSP
|
|
122 Returns : string (e.g., BLASTP)
|
|
123 Args : none
|
|
124
|
|
125 =cut
|
|
126
|
|
127 sub algorithm{
|
|
128 my ($self,@args) = @_;
|
|
129 $self->throw_not_implemented;
|
|
130 }
|
|
131
|
|
132 =head2 pvalue
|
|
133
|
|
134 Title : pvalue
|
|
135 Usage : my $pvalue = $hsp->pvalue();
|
|
136 Function: Returns the P-value for this HSP or undef
|
|
137 Returns : float or exponential (2e-10)
|
|
138 P-value is not defined with NCBI Blast2 reports.
|
|
139 Args : none
|
|
140
|
|
141 =cut
|
|
142
|
|
143 sub pvalue {
|
|
144 my ($self) = @_;
|
|
145 $self->throw_not_implemented;
|
|
146 }
|
|
147
|
|
148 =head2 evalue
|
|
149
|
|
150 Title : evalue
|
|
151 Usage : my $evalue = $hsp->evalue();
|
|
152 Function: Returns the e-value for this HSP
|
|
153 Returns : float or exponential (2e-10)
|
|
154 Args : none
|
|
155
|
|
156 =cut
|
|
157
|
|
158 sub evalue {
|
|
159 my ($self) = @_;
|
|
160 $self->throw_not_implemented;
|
|
161 }
|
|
162
|
|
163 =head2 frac_identical
|
|
164
|
|
165 Title : frac_identical
|
|
166 Usage : my $frac_id = $hsp->frac_identical( ['query'|'hit'|'total'] );
|
|
167 Function: Returns the fraction of identitical positions for this HSP
|
|
168 Returns : Float in range 0.0 -> 1.0
|
|
169 Args : 'query' = num identical / length of query seq (without gaps)
|
|
170 'hit' = num identical / length of hit seq (without gaps)
|
|
171 'total' = num identical / length of alignment (with gaps)
|
|
172 default = 'total'
|
|
173
|
|
174 =cut
|
|
175
|
|
176 sub frac_identical {
|
|
177 my ($self, $type) = @_;
|
|
178 $self->throw_not_implemented;
|
|
179 }
|
|
180
|
|
181 =head2 frac_conserved
|
|
182
|
|
183 Title : frac_conserved
|
|
184 Usage : my $frac_cons = $hsp->frac_conserved( ['query'|'hit'|'total'] );
|
|
185 Function : Returns the fraction of conserved positions for this HSP.
|
|
186 This is the fraction of symbols in the alignment with a
|
|
187 positive score.
|
|
188 Returns : Float in range 0.0 -> 1.0
|
|
189 Args : 'query' = num conserved / length of query seq (without gaps)
|
|
190 'hit' = num conserved / length of hit seq (without gaps)
|
|
191 'total' = num conserved / length of alignment (with gaps)
|
|
192 default = 'total'
|
|
193
|
|
194 =cut
|
|
195
|
|
196 sub frac_conserved {
|
|
197 my ($self, $type) = @_;
|
|
198 $self->throw_not_implemented;
|
|
199 }
|
|
200
|
|
201 =head2 num_identical
|
|
202
|
|
203 Title : num_identical
|
|
204 Usage : $obj->num_identical($newval)
|
|
205 Function: returns the number of identical residues in the alignment
|
|
206 Returns : integer
|
|
207 Args : integer (optional)
|
|
208
|
|
209
|
|
210 =cut
|
|
211
|
|
212 sub num_identical{
|
|
213 shift->throw_not_implemented;
|
|
214 }
|
|
215
|
|
216 =head2 num_conserved
|
|
217
|
|
218 Title : num_conserved
|
|
219 Usage : $obj->num_conserved($newval)
|
|
220 Function: returns the number of conserved residues in the alignment
|
|
221 Returns : inetger
|
|
222 Args : integer (optional)
|
|
223
|
|
224
|
|
225 =cut
|
|
226
|
|
227 sub num_conserved{
|
|
228 shift->throw_not_implemented();
|
|
229 }
|
|
230
|
|
231 =head2 gaps
|
|
232
|
|
233 Title : gaps
|
|
234 Usage : my $gaps = $hsp->gaps( ['query'|'hit'|'total'] );
|
|
235 Function : Get the number of gaps in the query, hit, or total alignment.
|
|
236 Returns : Integer, number of gaps or 0 if none
|
|
237 Args : 'query' = num conserved / length of query seq (without gaps)
|
|
238 'hit' = num conserved / length of hit seq (without gaps)
|
|
239 'total' = num conserved / length of alignment (with gaps)
|
|
240 default = 'total'
|
|
241
|
|
242 =cut
|
|
243
|
|
244 sub gaps {
|
|
245 my ($self, $type) = @_;
|
|
246 $self->throw_not_implemented;
|
|
247 }
|
|
248
|
|
249 =head2 query_string
|
|
250
|
|
251 Title : query_string
|
|
252 Usage : my $qseq = $hsp->query_string;
|
|
253 Function: Retrieves the query sequence of this HSP as a string
|
|
254 Returns : string
|
|
255 Args : none
|
|
256
|
|
257
|
|
258 =cut
|
|
259
|
|
260 sub query_string{
|
|
261 my ($self) = @_;
|
|
262 $self->throw_not_implemented;
|
|
263 }
|
|
264
|
|
265 =head2 hit_string
|
|
266
|
|
267 Title : hit_string
|
|
268 Usage : my $hseq = $hsp->hit_string;
|
|
269 Function: Retrieves the hit sequence of this HSP as a string
|
|
270 Returns : string
|
|
271 Args : none
|
|
272
|
|
273
|
|
274 =cut
|
|
275
|
|
276 sub hit_string{
|
|
277 my ($self) = @_;
|
|
278 $self->throw_not_implemented;
|
|
279 }
|
|
280
|
|
281 =head2 homology_string
|
|
282
|
|
283 Title : homology_string
|
|
284 Usage : my $homo_string = $hsp->homology_string;
|
|
285 Function: Retrieves the homology sequence for this HSP as a string.
|
|
286 : The homology sequence is the string of symbols in between the
|
|
287 : query and hit sequences in the alignment indicating the degree
|
|
288 : of conservation (e.g., identical, similar, not similar).
|
|
289 Returns : string
|
|
290 Args : none
|
|
291
|
|
292 =cut
|
|
293
|
|
294 sub homology_string{
|
|
295 my ($self) = @_;
|
|
296 $self->throw_not_implemented;
|
|
297 }
|
|
298
|
|
299 =head2 length
|
|
300
|
|
301 Title : length
|
|
302 Usage : my $len = $hsp->length( ['query'|'hit'|'total'] );
|
|
303 Function : Returns the length of the query or hit in the alignment (without gaps)
|
|
304 or the aggregate length of the HSP (including gaps;
|
|
305 this may be greater than either hit or query )
|
|
306 Returns : integer
|
|
307 Args : 'query' = length of query seq (without gaps)
|
|
308 'hit' = length of hit seq (without gaps)
|
|
309 'total' = length of alignment (with gaps)
|
|
310 default = 'total'
|
|
311 Args : none
|
|
312
|
|
313 =cut
|
|
314
|
|
315 sub length{
|
|
316 shift->throw_not_implemented();
|
|
317 }
|
|
318
|
|
319 =head2 percent_identity
|
|
320
|
|
321 Title : percent_identity
|
|
322 Usage : my $percentid = $hsp->percent_identity()
|
|
323 Function: Returns the calculated percent identity for an HSP
|
|
324 Returns : floating point between 0 and 100
|
|
325 Args : none
|
|
326
|
|
327
|
|
328 =cut
|
|
329
|
|
330 sub percent_identity{
|
|
331 my ($self) = @_;
|
|
332 return $self->frac_identical('hsp') * 100;
|
|
333 }
|
|
334
|
|
335 =head2 get_aln
|
|
336
|
|
337 Title : get_aln
|
|
338 Usage : my $aln = $hsp->gel_aln
|
|
339 Function: Returns a Bio::SimpleAlign representing the HSP alignment
|
|
340 Returns : Bio::SimpleAlign
|
|
341 Args : none
|
|
342
|
|
343 =cut
|
|
344
|
|
345 sub get_aln {
|
|
346 my ($self) = @_;
|
|
347 $self->throw_not_implemented;
|
|
348 }
|
|
349
|
|
350
|
|
351 =head2 seq_inds
|
|
352
|
|
353 Title : seq_inds
|
|
354 Purpose : Get a list of residue positions (indices) for all identical
|
|
355 : or conserved residues in the query or sbjct sequence.
|
|
356 Example : @s_ind = $hsp->seq_inds('query', 'identical');
|
|
357 : @h_ind = $hsp->seq_inds('hit', 'conserved');
|
|
358 : @h_ind = $hsp->seq_inds('hit', 'conserved', 1);
|
|
359 Returns : List of integers
|
|
360 : May include ranges if collapse is true.
|
|
361 Argument : seq_type = 'query' or 'hit' or 'sbjct' (default = query)
|
|
362 ('sbjct' is synonymous with 'hit')
|
|
363 class = 'identical' or 'conserved' or 'nomatch' or 'gap'
|
|
364 (default = identical)
|
|
365 (can be shortened to 'id' or 'cons')
|
|
366
|
|
367 collapse = boolean, if true, consecutive positions are merged
|
|
368 using a range notation, e.g., "1 2 3 4 5 7 9 10 11"
|
|
369 collapses to "1-5 7 9-11". This is useful for
|
|
370 consolidating long lists. Default = no collapse.
|
|
371 Throws : n/a.
|
|
372 Comments :
|
|
373
|
|
374 See Also : L<Bio::Search::BlastUtils::collapse_nums()|Bio::Search::BlastUtils>, L<Bio::Search::Hit::HitI::seq_inds()|Bio::Search::Hit::HitI>
|
|
375
|
|
376 =cut
|
|
377
|
|
378 sub seq_inds {
|
|
379 shift->throw_not_implemented();
|
|
380 }
|
|
381
|
|
382 =head2 Inherited from Bio::SeqFeature::SimilarityPair
|
|
383
|
|
384 These methods come from Bio::SeqFeature::SimilarityPair
|
|
385
|
|
386 =head2 query
|
|
387
|
|
388 Title : query
|
|
389 Usage : my $query = $hsp->query
|
|
390 Function: Returns a SeqFeature representing the query in the HSP
|
|
391 Returns : Bio::SeqFeature::Similarity
|
|
392 Args : [optional] new value to set
|
|
393
|
|
394
|
|
395 =head2 hit
|
|
396
|
|
397 Title : hit
|
|
398 Usage : my $hit = $hsp->hit
|
|
399 Function: Returns a SeqFeature representing the hit in the HSP
|
|
400 Returns : Bio::SeqFeature::Similarity
|
|
401 Args : [optional] new value to set
|
|
402
|
|
403
|
|
404 =head2 significance
|
|
405
|
|
406 Title : significance
|
|
407 Usage : $evalue = $obj->significance();
|
|
408 $obj->significance($evalue);
|
|
409 Function: Get/Set the significance value (see Bio::SeqFeature::SimilarityPair)
|
|
410 Returns : significance value (scientific notation string)
|
|
411 Args : significance value (sci notation string)
|
|
412
|
|
413
|
|
414 =head2 score
|
|
415
|
|
416 Title : score
|
|
417 Usage : my $score = $hsp->score();
|
|
418 Function: Returns the score for this HSP or undef
|
|
419 Returns : numeric
|
|
420 Args : [optional] numeric to set value
|
|
421
|
|
422 =head2 bits
|
|
423
|
|
424 Title : bits
|
|
425 Usage : my $bits = $hsp->bits();
|
|
426 Function: Returns the bit value for this HSP or undef
|
|
427 Returns : numeric
|
|
428 Args : none
|
|
429
|
|
430 =cut
|
|
431
|
|
432 # override
|
|
433
|
|
434 =head2 strand
|
|
435
|
|
436 Title : strand
|
|
437 Usage : $hsp->strand('query')
|
|
438 Function: Retrieves the strand for the HSP component requested
|
|
439 Returns : +1 or -1 (0 if unknown)
|
|
440 Args : 'hit' or 'subject' or 'sbjct' to retrieve the strand of the subject
|
|
441 'query' to retrieve the query strand (default)
|
|
442 'list' or 'array' to retreive both query and hit together
|
|
443
|
|
444 =cut
|
|
445
|
|
446 sub strand {
|
|
447 my $self = shift;
|
|
448 my $val = shift;
|
|
449 $val = 'query' unless defined $val;
|
|
450 $val =~ s/^\s+//;
|
|
451
|
|
452 if( $val =~ /^q/i ) {
|
|
453 return $self->query->strand(shift);
|
|
454 } elsif( $val =~ /^(hi|s)/i ) {
|
|
455 return $self->hit->strand(shift);
|
|
456 } elsif ( $val =~ m/^(list|array)/) {
|
|
457 return ($self->query->strand(shift), $self->hit->strand(shift));
|
|
458 } else {
|
|
459 $self->warn("unrecognized component $val requested\n");
|
|
460 }
|
|
461 return 0;
|
|
462 }
|
|
463
|
|
464 =head2 start
|
|
465
|
|
466 Title : start
|
|
467 Usage : $hsp->start('query')
|
|
468 Function: Retrieves the start for the HSP component requested
|
|
469 Returns : integer
|
|
470 Args : 'hit' or 'subject' or 'sbjct' to retrieve the start of the subject
|
|
471 'query' to retrieve the query start (default)
|
|
472
|
|
473 =cut
|
|
474
|
|
475 sub start {
|
|
476 my $self = shift;
|
|
477 my $val = shift;
|
|
478 $val = 'query' unless defined $val;
|
|
479 $val =~ s/^\s+//;
|
|
480
|
|
481 if( $val =~ /^q/i ) {
|
|
482 return $self->query->start(shift);
|
|
483 } elsif( $val =~ /^(hi|s)/i ) {
|
|
484 return $self->hit->start(shift);
|
|
485 } else {
|
|
486 $self->warn("unrecognized component $val requested\n");
|
|
487 }
|
|
488 return 0;
|
|
489 }
|
|
490
|
|
491 =head2 end
|
|
492
|
|
493 Title : end
|
|
494 Usage : $hsp->end('query')
|
|
495 Function: Retrieves the end for the HSP component requested
|
|
496 Returns : integer
|
|
497 Args : 'hit' or 'subject' or 'sbjct' to retrieve the end of the subject
|
|
498 'query' to retrieve the query end (default)
|
|
499
|
|
500 =cut
|
|
501
|
|
502 sub end {
|
|
503 my $self = shift;
|
|
504 my $val = shift;
|
|
505 $val = 'query' unless defined $val;
|
|
506 $val =~ s/^\s+//;
|
|
507
|
|
508 if( $val =~ /^q/i ) {
|
|
509 return $self->query->end(shift);
|
|
510 } elsif( $val =~ /^(hi|s)/i ) {
|
|
511 return $self->hit->end(shift);
|
|
512 } else {
|
|
513 $self->warn("unrecognized component $val requested\n");
|
|
514 }
|
|
515 return 0;
|
|
516 }
|
|
517
|
|
518 =head2 seq_str
|
|
519
|
|
520 Usage : $hsp->seq_str( seq_type );
|
|
521 Purpose : Get the full query, sbjct, or 'match' sequence as a string.
|
|
522 : The 'match' sequence is the string of symbols in between the
|
|
523 : query and sbjct sequences.
|
|
524 Example : $str = $hsp->seq_str('query');
|
|
525 Returns : String
|
|
526 Argument : seq_Type = 'query' or 'hit' or 'sbjct' or 'match'
|
|
527 : ('sbjct' is synonymous with 'hit')
|
|
528 : default is 'query'
|
|
529 Throws : Exception if the argument does not match an accepted seq_type.
|
|
530 Comments :
|
|
531
|
|
532 See Also : L<seq()|seq>, L<seq_inds()|seq_inds>, B<_set_match_seq()>
|
|
533
|
|
534 =cut
|
|
535
|
|
536 sub seq_str {
|
|
537 my $self = shift;
|
|
538 my $type = shift;
|
|
539 if( $type =~ /^q/i ) { return $self->query_string(shift) }
|
|
540 elsif( $type =~ /^(s|hi)/i ) { return $self->hit_string(shift)}
|
|
541 elsif ( $type =~ /^(ho|ma)/i ) { return $self->hit_string(shift) }
|
|
542 else {
|
|
543 $self->warn("unknown sequence type $type");
|
|
544 }
|
|
545 return '';
|
|
546 }
|
|
547
|
|
548
|
|
549 =head2 rank
|
|
550
|
|
551 Usage : $hsp->rank( [string] );
|
|
552 Purpose : Get the rank of the HSP within a given Blast hit.
|
|
553 Example : $rank = $hsp->rank;
|
|
554 Returns : Integer (1..n) corresponding to the order in which the HSP
|
|
555 appears in the BLAST report.
|
|
556
|
|
557 =cut
|
|
558
|
|
559 sub rank { shift->throw_not_implemented }
|
|
560
|
|
561 =head2 matches
|
|
562
|
|
563 Usage : $hsp->matches([seq_type], [start], [stop]);
|
|
564 Purpose : Get the total number of identical and conservative matches
|
|
565 : in the query or sbjct sequence for the given HSP. Optionally can
|
|
566 : report data within a defined interval along the seq.
|
|
567 : (Note: 'conservative' matches are called 'positives' in the
|
|
568 : Blast report.)
|
|
569 Example : ($id,$cons) = $hsp_object->matches('hit');
|
|
570 : ($id,$cons) = $hsp_object->matches('query',300,400);
|
|
571 Returns : 2-element array of integers
|
|
572 Argument : (1) seq_type = 'query' or 'hit' or 'sbjct' (default = query)
|
|
573 : ('sbjct' is synonymous with 'hit')
|
|
574 : (2) start = Starting coordinate (optional)
|
|
575 : (3) stop = Ending coordinate (optional)
|
|
576 Throws : Exception if the supplied coordinates are out of range.
|
|
577 Comments : Relies on seq_str('match') to get the string of alignment symbols
|
|
578 : between the query and sbjct lines which are used for determining
|
|
579 : the number of identical and conservative matches.
|
|
580
|
|
581 See Also : L<length()|length>, L<gaps()|gaps>, L<seq_str()|seq_str>, L<Bio::Search::Hit::BlastHit::_adjust_contigs()|Bio::Search::Hit::BlastHit>
|
|
582
|
|
583 =cut
|
|
584
|
|
585 #-----------
|
|
586 sub matches {
|
|
587 #-----------
|
|
588 my( $self, %param ) = @_;
|
|
589 my(@data);
|
|
590 my($seqType, $beg, $end) = ($param{-SEQ}, $param{-START}, $param{-STOP});
|
|
591 $seqType ||= 'query';
|
|
592 $seqType = 'sbjct' if $seqType eq 'hit';
|
|
593
|
|
594 if(!defined $beg && !defined $end) {
|
|
595 ## Get data for the whole alignment.
|
|
596 push @data, ($self->num_identical, $self->num_conserved);
|
|
597 } else {
|
|
598 ## Get the substring representing the desired sub-section of aln.
|
|
599 $beg ||= 0;
|
|
600 $end ||= 0;
|
|
601 my($start,$stop) = $self->range($seqType);
|
|
602 if($beg == 0) { $beg = $start; $end = $beg+$end; }
|
|
603 elsif($end == 0) { $end = $stop; $beg = $end-$beg; }
|
|
604
|
|
605 if($end >= $stop) { $end = $stop; } ##ML changed from if (end >stop)
|
|
606 else { $end += 1;} ##ML moved from commented position below, makes
|
|
607 ##more sense here
|
|
608 # if($end > $stop) { $end = $stop; }
|
|
609 if($beg < $start) { $beg = $start; }
|
|
610 # else { $end += 1;}
|
|
611
|
|
612 # my $seq = substr($self->seq_str('match'), $beg-$start, ($end-$beg));
|
|
613
|
|
614 ## ML: START fix for substr out of range error ------------------
|
|
615 my $seq = "";
|
|
616 if (($self->algorithm eq 'TBLASTN') and ($seqType eq 'sbjct'))
|
|
617 {
|
|
618 $seq = substr($self->seq_str('match'),
|
|
619 int(($beg-$start)/3), int(($end-$beg+1)/3));
|
|
620
|
|
621 } elsif (($self->algorithm eq 'BLASTX') and ($seqType eq 'query'))
|
|
622 {
|
|
623 $seq = substr($self->seq_str('match'),
|
|
624 int(($beg-$start)/3), int(($end-$beg+1)/3));
|
|
625 } else {
|
|
626 $seq = substr($self->seq_str('match'),
|
|
627 $beg-$start, ($end-$beg));
|
|
628 }
|
|
629 ## ML: End of fix for substr out of range error -----------------
|
|
630
|
|
631
|
|
632 ## ML: debugging code
|
|
633 ## This is where we get our exception. Try printing out the values going
|
|
634 ## into this:
|
|
635 ##
|
|
636 # print STDERR
|
|
637 # qq(*------------MY EXCEPTION --------------------\nSeq: ") ,
|
|
638 # $self->seq_str("$seqType"), qq("\n),$self->rank,",( index:";
|
|
639 # print STDERR $beg-$start, ", len: ", $end-$beg," ), (HSPRealLen:",
|
|
640 # CORE::length $self->seq_str("$seqType");
|
|
641 # print STDERR ", HSPCalcLen: ", $stop - $start +1 ," ),
|
|
642 # ( beg: $beg, end: $end ), ( start: $start, stop: stop )\n";
|
|
643 ## ML: END DEBUGGING CODE----------
|
|
644
|
|
645 if(!CORE::length $seq) {
|
|
646 $self->throw("Undefined sub-sequence ($beg,$end). Valid range = $start - $stop");
|
|
647 }
|
|
648 ## Get data for a substring.
|
|
649 # printf "Collecting HSP subsection data: beg,end = %d,%d; start,stop = %d,%d\n%s<---\n", $beg, $end, $start, $stop, $seq;
|
|
650 # printf "Original match seq:\n%s\n",$self->seq_str('match');
|
|
651 $seq =~ s/ //g; # remove space (no info).
|
|
652 my $len_cons = CORE::length $seq;
|
|
653 $seq =~ s/\+//g; # remove '+' characters (conservative substitutions)
|
|
654 my $len_id = CORE::length $seq;
|
|
655 push @data, ($len_id, $len_cons);
|
|
656 # printf " HSP = %s\n id = %d; cons = %d\n", $self->rank, $len_id, $len_cons; <STDIN>;
|
|
657 }
|
|
658 @data;
|
|
659 }
|
|
660
|
|
661 =head2 n
|
|
662
|
|
663 Usage : $hsp_obj->n()
|
|
664 Purpose : Get the N value (num HSPs on which P/Expect is based).
|
|
665 : This value is not defined with NCBI Blast2 with gapping.
|
|
666 Returns : Integer or null string if not defined.
|
|
667 Argument : n/a
|
|
668 Throws : n/a
|
|
669 Comments : The 'N' value is listed in parenthesis with P/Expect value:
|
|
670 : e.g., P(3) = 1.2e-30 ---> (N = 3).
|
|
671 : Not defined in NCBI Blast2 with gaps.
|
|
672 : This typically is equal to the number of HSPs but not always.
|
|
673 : To obtain the number of HSPs, use Bio::Search::Hit::HitI::num_hsps().
|
|
674
|
|
675 See Also : L<Bio::SeqFeature::SimilarityPair::score()|Bio::SeqFeature::SimilarityPair>
|
|
676
|
|
677 =cut
|
|
678
|
|
679 sub n { shift->throw_not_implemented }
|
|
680
|
|
681 =head2 range
|
|
682
|
|
683 Usage : $hsp->range( [seq_type] );
|
|
684 Purpose : Gets the (start, end) coordinates for the query or sbjct sequence
|
|
685 : in the HSP alignment.
|
|
686 Example : ($query_beg, $query_end) = $hsp->range('query');
|
|
687 : ($hit_beg, $hit_end) = $hsp->range('hit');
|
|
688 Returns : Two-element array of integers
|
|
689 Argument : seq_type = string, 'query' or 'hit' or 'sbjct' (default = 'query')
|
|
690 : ('sbjct' is synonymous with 'hit')
|
|
691 Throws : n/a
|
|
692 Comments : This is a convenience method for constructions such as
|
|
693 ($hsp->query->start, $hsp->query->end)
|
|
694
|
|
695 =cut
|
|
696
|
|
697 sub range { shift->throw_not_implemented }
|
|
698
|
|
699
|
|
700 1;
|
|
701
|