comparison variant_effect_predictor/Bio/Search/HSP/GenericHSP.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: GenericHSP.pm,v 1.40.2.3 2003/03/24 20:44:45 jason Exp $
2 #
3 # BioPerl module for Bio::Search::HSP::GenericHSP
4 #
5 # Cared for by Jason Stajich <jason@bioperl.org>
6 #
7 # Copyright Jason Stajich
8 #
9 # You may distribute this module under the same terms as perl itself
10
11 # POD documentation - main docs before the code
12
13 =head1 NAME
14
15 Bio::Search::HSP::GenericHSP - A "Generic" implementation of a High Scoring Pair
16
17 =head1 SYNOPSIS
18
19 my $hsp = new Bio::Search::HSP::GenericHSP( -algorithm => 'blastp',
20 -evalue => '1e-30',
21 );
22
23 $r_type = $hsp->algorithm
24
25 $pvalue = $hsp->p();
26
27 $evalue = $hsp->evalue();
28
29 $frac_id = $hsp->frac_identical( ['query'|'hit'|'total'] );
30
31 $frac_cons = $hsp->frac_conserved( ['query'|'hit'|'total'] );
32
33 $gaps = $hsp->gaps( ['query'|'hit'|'total'] );
34
35 $qseq = $hsp->query_string;
36
37 $hseq = $hsp->hit_string;
38
39 $homo_string = $hsp->homology_string;
40
41 $len = $hsp->length( ['query'|'hit'|'total'] );
42
43 $len = $hsp->length( ['query'|'hit'|'total'] );
44
45 $rank = $hsp->rank;
46
47
48 =head1 DESCRIPTION
49
50 This implementation is "Generic", meaning it is is suitable for
51 holding information about High Scoring pairs from most Search reports
52 such as BLAST and FastA. Specialized objects can be derived from
53 this.
54
55 =head1 FEEDBACK
56
57 =head2 Mailing Lists
58
59 User feedback is an integral part of the evolution of this and other
60 Bioperl modules. Send your comments and suggestions preferably to
61 the Bioperl mailing list. Your participation is much appreciated.
62
63 bioperl-l@bioperl.org - General discussion
64 http://bioperl.org/MailList.shtml - About the mailing lists
65
66 =head2 Reporting Bugs
67
68 Report bugs to the Bioperl bug tracking system to help us keep track
69 of the bugs and their resolution. Bug reports can be submitted via
70 email or the web:
71
72 bioperl-bugs@bioperl.org
73 http://bugzilla.bioperl.org/
74
75 =head1 AUTHOR - Jason Stajich and Steve Chervitz
76
77 Email jason@bioperl.org
78 Email sac@bioperl.org
79
80 Describe contact details here
81
82 =head1 CONTRIBUTORS
83
84 Additional contributors names and emails here
85
86 =head1 APPENDIX
87
88 The rest of the documentation details each of the object methods.
89 Internal methods are usually preceded with a _
90
91 =cut
92
93
94 # Let the code begin...
95
96
97 package Bio::Search::HSP::GenericHSP;
98 use vars qw(@ISA $GAP_SYMBOL);
99 use strict;
100
101 use Bio::Root::Root;
102 use Bio::SeqFeature::Similarity;
103 use Bio::Search::HSP::HSPI;
104
105 @ISA = qw(Bio::Search::HSP::HSPI Bio::Root::Root );
106
107 BEGIN {
108 $GAP_SYMBOL = '-';
109 }
110 =head2 new
111
112 Title : new
113 Usage : my $obj = new Bio::Search::HSP::GenericHSP();
114 Function: Builds a new Bio::Search::HSP::GenericHSP object
115 Returns : Bio::Search::HSP::GenericHSP
116 Args : -algorithm => algorithm used (BLASTP, TBLASTX, FASTX, etc)
117 -evalue => evalue
118 -pvalue => pvalue
119 -bits => bit value for HSP
120 -score => score value for HSP (typically z-score but depends on
121 analysis)
122 -hsp_length=> Length of the HSP (including gaps)
123 -identical => # of residues that that matched identically
124 -conserved => # of residues that matched conservatively
125 (only protein comparisions;
126 conserved == identical in nucleotide comparisons)
127 -hsp_gaps => # of gaps in the HSP
128 -query_gaps => # of gaps in the query in the alignment
129 -hit_gaps => # of gaps in the subject in the alignment
130 -query_name => HSP Query sequence name (if available)
131 -query_start => HSP Query start (in original query sequence coords)
132 -query_end => HSP Query end (in original query sequence coords)
133 -hit_name => HSP Hit sequence name (if available)
134 -hit_start => HSP Hit start (in original hit sequence coords)
135 -hit_end => HSP Hit end (in original hit sequence coords)
136 -hit_length => total length of the hit sequence
137 -query_length=> total length of the query sequence
138 -query_seq => query sequence portion of the HSP
139 -hit_seq => hit sequence portion of the HSP
140 -homology_seq=> homology sequence for the HSP
141 -hit_frame => hit frame (only if hit is translated protein)
142 -query_frame => query frame (only if query is translated protein)
143 -rank => HSP rank
144
145
146 =cut
147
148 sub new {
149 my($class,@args) = @_;
150
151 my $self = $class->SUPER::new(@args);
152 my ($algo, $evalue, $pvalue, $identical, $conserved,
153 $gaps, $query_gaps, $hit_gaps,
154 $hit_seq, $query_seq, $homology_seq,
155 $hsp_len, $query_len,$hit_len,
156 $hit_name,$query_name,$bits,$score,
157 $hs,$he,$qs,$qe,
158 $qframe,$hframe,
159 $rank) = $self->_rearrange([qw(ALGORITHM
160 EVALUE
161 PVALUE
162 IDENTICAL
163 CONSERVED
164 HSP_GAPS
165 QUERY_GAPS
166 HIT_GAPS
167 HIT_SEQ
168 QUERY_SEQ
169 HOMOLOGY_SEQ
170 HSP_LENGTH
171 QUERY_LENGTH
172 HIT_LENGTH
173 HIT_NAME
174 QUERY_NAME
175 BITS
176 SCORE
177 HIT_START
178 HIT_END
179 QUERY_START
180 QUERY_END
181 QUERY_FRAME
182 HIT_FRAME
183 RANK
184 )], @args);
185
186 $algo = 'GENERIC' unless defined $algo;
187 $self->algorithm($algo);
188
189 # defined $evalue && $self->evalue($evalue)
190 # $hsp->significance is initialized by the
191 # the SimilarityPair object - let's only keep one
192 # value, don't need 2 slots.
193
194 defined $pvalue && $self->pvalue($pvalue);
195 defined $bits && $self->bits($bits);
196 defined $score && $self->score($score);
197 my ($queryfactor, $hitfactor) = (0,0);
198
199 if( $algo =~ /^(PSI)?T(BLAST|FAST)[NY]/oi ) {
200 $hitfactor = 1;
201 } elsif ($algo =~ /^(FAST|BLAST)(X|Y|XY)/oi ) {
202 $queryfactor = 1;
203 } elsif ($algo =~ /^T(BLAST|FAST)(X|Y|XY)/oi ||
204 $algo =~ /^(BLAST|FAST)N/oi ||
205 $algo eq 'WABA' ||
206 $algo eq 'EXONERATE' || $algo eq 'MEGABLAST' ||
207 $algo eq 'SMITH-WATERMAN' ){
208 $hitfactor = 1;
209 $queryfactor = 1;
210 } elsif( $algo eq 'RPSBLAST' ) {
211 $queryfactor = $hitfactor = 0;
212 $qframe = $hframe = 0;
213 }
214 # Store the aligned query as sequence feature
215 my $strand;
216 unless( defined $qe && defined $qs ) { $self->throw("Did not specify a Query End or Query Begin @args ($qs,$qe)"); }
217 unless( defined $he && defined $hs ) { $self->throw("Did not specify a Hit End or Hit Begin"); }
218 if ($qe > $qs) { # normal query: start < end
219 if ($queryfactor) { $strand = 1; } else { $strand = undef; }
220 } else { # reverse query (i dont know if this is possible,
221 # but feel free to correct)
222 if ($queryfactor) { $strand = -1; } else { $strand = undef; }
223 ($qs,$qe) = ($qe,$qs);
224
225 }
226 $self->query( new Bio::SeqFeature::Similarity
227 ('-primary' => $self->primary_tag,
228 '-start' => $qs,
229 '-expect' => $evalue,
230 '-bits' => $bits,
231 '-end' => $qe,
232 '-strand' => $strand,
233 '-seq_id' => $query_name,
234 '-seqlength'=> $query_len,
235 '-source' => $algo,
236 ) );
237
238 # to determine frame from something like FASTXY which doesn't
239 # report the frame
240 if( defined $strand && ! defined $qframe && $queryfactor ) {
241 $qframe = ( $self->query->start % 3 ) * $strand;
242 } elsif( ! defined $strand ) {
243 $qframe = 0;
244 }
245 # store the aligned subject as sequence feature
246 if ($he > $hs) { # normal subject
247 if ($hitfactor) { $strand = 1; } else { $strand = undef; }
248 } else {
249 if ($hitfactor) { $strand = -1; } else { $strand = undef; }
250 ($hs,$he) = ( $he,$hs); # reverse subject: start bigger than end
251 }
252
253 $self->hit( Bio::SeqFeature::Similarity->new
254 ('-start' => $hs,
255 '-end' => $he,
256 '-strand' => $strand,
257 '-expect' => $evalue,
258 '-bits' => $bits,
259 '-source' => $algo,
260 '-seq_id' => $hit_name,
261 '-seqlength' => $hit_len,
262 '-primary' => $self->primary_tag ));
263
264 if( defined $strand && ! defined $hframe && $hitfactor ) {
265 $hframe = ( $hs % 3 ) * $strand;
266 } elsif( ! defined $strand ) {
267 $hframe = 0;
268 }
269
270 $self->frame($qframe,$hframe);
271
272 if( ! defined $query_len || ! defined $hit_len ) {
273 $self->throw("Must defined hit and query length");
274 }
275
276 if( ! defined $identical ) {
277 $self->warn("Did not defined the number of identical matches in the HSP assuming 0");
278 $identical = 0;
279 }
280 if( ! defined $conserved ) {
281 $self->warn("Did not defined the number of conserved matches in the HSP assuming conserved == identical ($identical)")
282 if( $algo !~ /^((FAST|BLAST)N)|Exonerate/oi);
283 $conserved = $identical;
284 }
285 # protect for divide by zero if user does not specify
286 # hsp_len, query_len, or hit_len
287
288 $self->num_identical($identical);
289 $self->num_conserved($conserved);
290
291 if( $hsp_len ) {
292 $self->length('total', $hsp_len);
293 $self->frac_identical( 'total', $identical / $self->length('total'));
294 $self->frac_conserved( 'total', $conserved / $self->length('total'));
295 }
296 if( $hit_len ) {
297 # $self->length('hit', $self->hit->length);
298 $self->frac_identical( 'hit', $identical / $self->length('hit'));
299 $self->frac_conserved( 'hit', $conserved / $self->length('hit'));
300 }
301 if( $query_len ) {
302 # $self->length('query', $self->query->length);
303 $self->frac_identical( 'query', $identical / $self->length('query')) ;
304 $self->frac_conserved( 'query', $conserved / $self->length('query'));
305 }
306 $self->query_string($query_seq);
307 $self->hit_string($hit_seq);
308 $self->homology_string($homology_seq);
309
310 if( defined $query_gaps ) {
311 $self->gaps('query', $query_gaps);
312 } elsif( defined $query_seq ) {
313 $self->gaps('query', scalar ( $query_seq =~ tr/\-//));
314 }
315 if( defined $hit_gaps ) {
316 $self->gaps('hit', $hit_gaps);
317 } elsif( defined $hit_seq ) {
318 $self->gaps('hit', scalar ( $hit_seq =~ tr/\-//));
319 }
320 if( ! defined $gaps ) {
321 $gaps = $self->gaps("query") + $self->gaps("hit");
322 }
323 $self->gaps('total', $gaps);
324 $self->percent_identity($identical / $hsp_len ) if( $hsp_len > 0 );
325
326 $rank && $self->rank($rank);
327 return $self;
328 }
329
330
331
332 =head2 Bio::Search::HSP::HSPI methods
333
334 Implementation of Bio::Search::HSP::HSPI methods follow
335
336 =head2 algorithm
337
338 Title : algorithm
339 Usage : my $r_type = $hsp->algorithm
340 Function: Obtain the name of the algorithm used to obtain the HSP
341 Returns : string (e.g., BLASTP)
342 Args : [optional] scalar string to set value
343
344 =cut
345
346 sub algorithm{
347 my ($self,$value) = @_;
348 my $previous = $self->{'_algorithm'};
349 if( defined $value || ! defined $previous ) {
350 $value = $previous = '' unless defined $value;
351 $self->{'_algorithm'} = $value;
352 }
353
354 return $previous;
355 }
356
357 =head2 pvalue
358
359 Title : pvalue
360 Usage : my $pvalue = $hsp->pvalue();
361 Function: Returns the P-value for this HSP or undef
362 Returns : float or exponential (2e-10)
363 P-value is not defined with NCBI Blast2 reports.
364 Args : [optional] numeric to set value
365
366 =cut
367
368 sub pvalue {
369 my ($self,$value) = @_;
370 my $previous = $self->{'_pvalue'};
371 if( defined $value ) {
372 $self->{'_pvalue'} = $value;
373 }
374 return $previous;
375 }
376
377 =head2 evalue
378
379 Title : evalue
380 Usage : my $evalue = $hsp->evalue();
381 Function: Returns the e-value for this HSP
382 Returns : float or exponential (2e-10)
383 Args : [optional] numeric to set value
384
385 =cut
386
387 sub evalue { shift->significance(@_) }
388
389 =head2 frac_identical
390
391 Title : frac_identical
392 Usage : my $frac_id = $hsp->frac_identical( ['query'|'hit'|'total'] );
393 Function: Returns the fraction of identitical positions for this HSP
394 Returns : Float in range 0.0 -> 1.0
395 Args : arg 1: 'query' = num identical / length of query seq (without gaps)
396 'hit' = num identical / length of hit seq (without gaps)
397 'total' = num identical / length of alignment (with gaps)
398 default = 'total'
399 arg 2: [optional] frac identical value to set for the type requested
400
401 =cut
402
403 sub frac_identical {
404 my ($self, $type,$value) = @_;
405
406 $type = lc $type if defined $type;
407 $type = 'total' if( ! defined $type ||
408 $type !~ /query|hit|total/);
409 my $previous = $self->{'_frac_identical'}->{$type};
410 if( defined $value || ! defined $previous ) {
411 $value = $previous = '' unless defined $value;
412 if( $type eq 'hit' || $type eq 'query' ) {
413 $self->$type()->frac_identical( $value);
414 }
415 $self->{'_frac_identical'}->{$type} = $value;
416 }
417 return $previous;
418
419 }
420
421 =head2 frac_conserved
422
423 Title : frac_conserved
424 Usage : my $frac_cons = $hsp->frac_conserved( ['query'|'hit'|'total'] );
425 Function : Returns the fraction of conserved positions for this HSP.
426 This is the fraction of symbols in the alignment with a
427 positive score.
428 Returns : Float in range 0.0 -> 1.0
429 Args : arg 1: 'query' = num conserved / length of query seq (without gaps)
430 'hit' = num conserved / length of hit seq (without gaps)
431 'total' = num conserved / length of alignment (with gaps)
432 default = 'total'
433 arg 2: [optional] frac conserved value to set for the type requested
434
435 =cut
436
437 sub frac_conserved {
438 my ($self, $type,$value) = @_;
439 $type = lc $type if defined $type;
440 $type = 'total' if( ! defined $type ||
441 $type !~ /query|hit|total/);
442 my $previous = $self->{'_frac_conserved'}->{$type};
443 if( defined $value || ! defined $previous ) {
444 $value = $previous = '' unless defined $value;
445 $self->{'_frac_conserved'}->{$type} = $value;
446 }
447 return $previous;
448 }
449
450 =head2 gaps
451
452 Title : gaps
453 Usage : my $gaps = $hsp->gaps( ['query'|'hit'|'total'] );
454 Function : Get the number of gaps in the query, hit, or total alignment.
455 Returns : Integer, number of gaps or 0 if none
456 Args : arg 1: 'query' = num gaps in query seq
457 'hit' = num gaps in hit seq
458 'total' = num gaps in whole alignment
459 default = 'total'
460 arg 2: [optional] integer gap value to set for the type requested
461
462 =cut
463
464 sub gaps {
465 my ($self, $type,$value) = @_;
466 $type = lc $type if defined $type;
467 $type = 'total' if( ! defined $type ||
468 $type !~ /query|hit|subject|sbjct|total/);
469 $type = 'hit' if $type =~ /sbjct|subject/;
470 my $previous = $self->{'_gaps'}->{$type};
471 if( defined $value || ! defined $previous ) {
472 $value = $previous = '' unless defined $value;
473 $self->{'_gaps'}->{$type} = $value;
474 }
475 return $previous || 0;
476 }
477
478 =head2 query_string
479
480 Title : query_string
481 Usage : my $qseq = $hsp->query_string;
482 Function: Retrieves the query sequence of this HSP as a string
483 Returns : string
484 Args : [optional] string to set for query sequence
485
486
487 =cut
488
489 sub query_string{
490 my ($self,$value) = @_;
491 my $previous = $self->{'_query_string'};
492 if( defined $value || ! defined $previous ) {
493 $value = $previous = '' unless defined $value;
494 $self->{'_query_string'} = $value;
495 # do some housekeeping so we know when to
496 # re-run _calculate_seq_positions
497 $self->{'_sequenceschanged'} = 1;
498 }
499 return $previous;
500 }
501
502 =head2 hit_string
503
504 Title : hit_string
505 Usage : my $hseq = $hsp->hit_string;
506 Function: Retrieves the hit sequence of this HSP as a string
507 Returns : string
508 Args : [optional] string to set for hit sequence
509
510
511 =cut
512
513 sub hit_string{
514 my ($self,$value) = @_;
515 my $previous = $self->{'_hit_string'};
516 if( defined $value || ! defined $previous ) {
517 $value = $previous = '' unless defined $value;
518 $self->{'_hit_string'} = $value;
519 # do some housekeeping so we know when to
520 # re-run _calculate_seq_positions
521 $self->{'_sequenceschanged'} = 1;
522 }
523 return $previous;
524 }
525
526 =head2 homology_string
527
528 Title : homology_string
529 Usage : my $homo_string = $hsp->homology_string;
530 Function: Retrieves the homology sequence for this HSP as a string.
531 : The homology sequence is the string of symbols in between the
532 : query and hit sequences in the alignment indicating the degree
533 : of conservation (e.g., identical, similar, not similar).
534 Returns : string
535 Args : [optional] string to set for homology sequence
536
537 =cut
538
539 sub homology_string{
540 my ($self,$value) = @_;
541 my $previous = $self->{'_homology_string'};
542 if( defined $value || ! defined $previous ) {
543 $value = $previous = '' unless defined $value;
544 $self->{'_homology_string'} = $value;
545 # do some housekeeping so we know when to
546 # re-run _calculate_seq_positions
547 $self->{'_sequenceschanged'} = 1;
548 }
549 return $previous;
550 }
551
552 =head2 length
553
554 Title : length
555 Usage : my $len = $hsp->length( ['query'|'hit'|'total'] );
556 Function : Returns the length of the query or hit in the alignment
557 (without gaps)
558 or the aggregate length of the HSP (including gaps;
559 this may be greater than either hit or query )
560 Returns : integer
561 Args : arg 1: 'query' = length of query seq (without gaps)
562 'hit' = length of hit seq (without gaps)
563 'total' = length of alignment (with gaps)
564 default = 'total'
565 arg 2: [optional] integer length value to set for specific type
566
567 =cut
568
569 sub length {
570
571 my $self = shift;
572 my $type = shift;
573
574 $type = 'total' unless defined $type;
575 $type = lc $type;
576
577 if( $type =~ /^q/i ) {
578 return $self->query()->length(shift);
579 } elsif( $type =~ /^(hit|subject|sbjct)/ ) {
580 return $self->hit()->length(shift);
581 } else {
582 my $v = shift;
583 if( defined $v ) {
584 $self->{'_hsplength'} = $v;
585 }
586 return $self->{'_hsplength'};
587 }
588 return 0; # should never get here
589 }
590
591 =head2 hsp_length
592
593 Title : hsp_length
594 Usage : my $len = $hsp->hsp_length()
595 Function: shortcut length('hsp')
596 Returns : floating point between 0 and 100
597 Args : none
598
599 =cut
600
601 sub hsp_length { return shift->length('hsp', shift); }
602
603 =head2 percent_identity
604
605 Title : percent_identity
606 Usage : my $percentid = $hsp->percent_identity()
607 Function: Returns the calculated percent identity for an HSP
608 Returns : floating point between 0 and 100
609 Args : none
610
611
612 =cut
613
614
615 =head2 frame
616
617 Title : frame
618 Usage : $hsp->frame($queryframe,$subjectframe)
619 Function: Set the Frame for both query and subject and insure that
620 they agree.
621 This overrides the frame() method implementation in
622 FeaturePair.
623 Returns : array of query and subjects if return type wants an array
624 or query frame if defined or subject frame
625 Args : none
626 Note : Frames are stored in the GFF way (0-2) not 1-3
627 as they are in BLAST (negative frames are deduced by checking
628 the strand of the query or hit)
629
630 =cut
631
632 sub frame {
633 my ($self, $qframe, $sframe) = @_;
634 if( defined $qframe ) {
635 if( $qframe == 0 ) {
636 $qframe = 0;
637 } elsif( $qframe !~ /^([+-])?([1-3])/ ) {
638 $self->warn("Specifying an invalid query frame ($qframe)");
639 $qframe = undef;
640 } else {
641 my $dir = $1;
642 $dir = '+' unless defined $dir;
643 if( ($dir eq '-' && $self->query->strand >= 0) ||
644 ($dir eq '+' && $self->query->strand <= 0) ) {
645 $self->warn("Query frame ($qframe) did not match strand of query (". $self->query->strand() . ")");
646 }
647 # Set frame to GFF [0-2] -
648 # what if someone tries to put in a GFF frame!
649 $qframe = $2 - 1;
650 }
651 $self->query->frame($qframe);
652 }
653 if( defined $sframe ) {
654 if( $sframe == 0 ) {
655 $sframe = 0;
656 } elsif( $sframe !~ /^([+-])?([1-3])/ ) {
657 $self->warn("Specifying an invalid subject frame ($sframe)");
658 $sframe = undef;
659 } else {
660 my $dir = $1;
661 $dir = '+' unless defined $dir;
662 if( ($dir eq '-' && $self->hit->strand >= 0) ||
663 ($dir eq '+' && $self->hit->strand <= 0) )
664 {
665 $self->warn("Subject frame ($sframe) did not match strand of subject (". $self->hit->strand() . ")");
666 }
667
668 # Set frame to GFF [0-2]
669 $sframe = $2 - 1;
670 }
671 $self->hit->frame($sframe);
672 }
673 if (wantarray() && $self->algorithm =~ /^T(BLAST|FAST)(X|Y|XY)/oi)
674 {
675 return ($self->query->frame(), $self->hit->frame());
676 } elsif (wantarray()) {
677 ($self->query->frame() &&
678 return ($self->query->frame(), undef)) ||
679 ($self->hit->frame() &&
680 return (undef, $self->hit->frame()));
681 } else {
682 ($self->query->frame() &&
683 return $self->query->frame()) ||
684 ($self->hit->frame() &&
685 return $self->hit->frame());
686 }
687 }
688
689
690 =head2 get_aln
691
692 Title : get_aln
693 Usage : my $aln = $hsp->gel_aln
694 Function: Returns a Bio::SimpleAlign representing the HSP alignment
695 Returns : Bio::SimpleAlign
696 Args : none
697
698 =cut
699
700 sub get_aln {
701 my ($self) = @_;
702 require Bio::LocatableSeq;
703 require Bio::SimpleAlign;
704 my $aln = new Bio::SimpleAlign;
705 my $hs = $self->hit_string();
706 my $qs = $self->query_string();
707 # FASTA specific stuff moved to the FastaHSP object
708 my $seqonly = $qs;
709 $seqonly =~ s/[\-\s]//g;
710 my ($q_nm,$s_nm) = ($self->query->seq_id(),
711 $self->hit->seq_id());
712 unless( defined $q_nm && CORE::length ($q_nm) ) {
713 $q_nm = 'query';
714 }
715 unless( defined $s_nm && CORE::length ($s_nm) ) {
716 $s_nm = 'hit';
717 }
718 my $query = new Bio::LocatableSeq('-seq' => $qs,
719 '-id' => $q_nm,
720 '-start' => 1,
721 '-end' => CORE::length($seqonly),
722 );
723 $seqonly = $hs;
724 $seqonly =~ s/[\-\s]//g;
725 my $hit = new Bio::LocatableSeq('-seq' => $hs,
726 '-id' => $s_nm,
727 '-start' => 1,
728 '-end' => CORE::length($seqonly),
729 );
730 $aln->add_seq($query);
731 $aln->add_seq($hit);
732 return $aln;
733 }
734
735 =head2 num_conserved
736
737 Title : num_conserved
738 Usage : $obj->num_conserved($newval)
739 Function: returns the number of conserved residues in the alignment
740 Returns : inetger
741 Args : integer (optional)
742
743
744 =cut
745
746 sub num_conserved{
747 my ($self,$value) = @_;
748 if( defined $value) {
749 $self->{'num_conserved'} = $value;
750 }
751 return $self->{'num_conserved'};
752 }
753
754 =head2 num_identical
755
756 Title : num_identical
757 Usage : $obj->num_identical($newval)
758 Function: returns the number of identical residues in the alignment
759 Returns : integer
760 Args : integer (optional)
761
762
763 =cut
764
765 sub num_identical{
766 my ($self,$value) = @_;
767 if( defined $value) {
768 $self->{'_num_identical'} = $value;
769 }
770 return $self->{'_num_identical'};
771 }
772
773 =head2 rank
774
775 Usage : $hsp->rank( [string] );
776 Purpose : Get the rank of the HSP within a given Blast hit.
777 Example : $rank = $hsp->rank;
778 Returns : Integer (1..n) corresponding to the order in which the HSP
779 appears in the BLAST report.
780
781 =cut
782
783 sub rank {
784 my ($self,$value) = @_;
785 if( defined $value) {
786 $self->{'_rank'} = $value;
787 }
788 return $self->{'_rank'};
789 }
790
791
792 =head2 seq_inds
793
794 Title : seq_inds
795 Purpose : Get a list of residue positions (indices) for all identical
796 : or conserved residues in the query or sbjct sequence.
797 Example : @s_ind = $hsp->seq_inds('query', 'identical');
798 : @h_ind = $hsp->seq_inds('hit', 'conserved');
799 @h_ind = $hsp->seq_inds('hit', 'conserved-not-identical');
800 : @h_ind = $hsp->seq_inds('hit', 'conserved', 1);
801 Returns : List of integers
802 : May include ranges if collapse is true.
803 Argument : seq_type = 'query' or 'hit' or 'sbjct' (default = query)
804 : ('sbjct' is synonymous with 'hit')
805 : class = 'identical' or 'conserved' or 'nomatch' or 'gap'
806 : (default = identical)
807 : (can be shortened to 'id' or 'cons')
808 : or 'conserved-not-identical'
809 : collapse = boolean, if true, consecutive positions are merged
810 : using a range notation, e.g., "1 2 3 4 5 7 9 10 11"
811 : collapses to "1-5 7 9-11". This is useful for
812 : consolidating long lists. Default = no collapse.
813 Throws : n/a.
814 Comments :
815
816 See Also : L<Bio::Search::SearchUtils::collapse_nums()|Bio::Search::SearchUtils>,
817 L<Bio::Search::Hit::HitI::seq_inds()|Bio::Search::Hit::HitI>
818
819 =cut
820
821 sub seq_inds{
822 my ($self, $seqType, $class, $collapse) = @_;
823
824 # prepare the internal structures - this is cached so
825 # if the strings have not changed we're okay
826 $self->_calculate_seq_positions();
827
828 $seqType ||= 'query';
829 $class ||= 'identical';
830 $collapse ||= 0;
831 $seqType = 'sbjct' if $seqType eq 'hit';
832 my $t = lc(substr($seqType,0,1));
833 if( $t eq 'q' ) {
834 $seqType = 'query';
835 } elsif ( $t eq 's' || $t eq 'h' ) {
836 $seqType = 'sbjct';
837 } else {
838 $self->warn("unknown seqtype $seqType using 'query'");
839 $seqType = 'query';
840 }
841 $t = lc(substr($class,0,1));
842
843 if( $t eq 'c' ) {
844 if( $class =~ /conserved\-not\-identical/ ) {
845 $class = 'conserved';
846 } else {
847 $class = 'conservedall';
848 }
849 } elsif( $t eq 'i' ) {
850 $class = 'identical';
851 } elsif( $t eq 'n' ) {
852 $class = 'nomatch';
853 } elsif( $t eq 'g' ) {
854 $class = 'gap';
855 } else {
856 $self->warn("unknown sequence class $class using 'identical'");
857 $class = 'identical';
858 }
859
860 ## Sensitive to member name changes.
861 $seqType = "_\L$seqType\E";
862 $class = "_\L$class\E";
863 my @ary;
864
865 if( $class eq '_gap' ) {
866 # this means that we are remapping the gap length that is stored
867 # in the hash (for example $self->{'_gapRes_query'} )
868 # so we'll return an array which has the values of the position of the
869 # of the gap (the key in the hash) + the gap length (value in the
870 # hash for this key - 1.
871
872 @ary = map { $_ > 1 ?
873 $_..($_ + $self->{"${class}Res$seqType"}->{$_} - 1) :
874 $_ }
875 sort { $a <=> $b } keys %{ $self->{"${class}Res$seqType"}};
876 } elsif( $class eq '_conservedall' ) {
877 @ary = sort { $a <=> $b }
878 keys %{ $self->{"_conservedRes$seqType"}},
879 keys %{ $self->{"_identicalRes$seqType"}},
880 } else {
881 @ary = sort { $a <=> $b } keys %{ $self->{"${class}Res$seqType"}};
882 }
883 require Bio::Search::BlastUtils if $collapse;
884
885 return $collapse ? &Bio::Search::SearchUtils::collapse_nums(@ary) : @ary;
886 }
887
888
889 =head2 Inherited from Bio::SeqFeature::SimilarityPair
890
891 These methods come from Bio::SeqFeature::SimilarityPair
892
893 =head2 query
894
895 Title : query
896 Usage : my $query = $hsp->query
897 Function: Returns a SeqFeature representing the query in the HSP
898 Returns : Bio::SeqFeature::Similarity
899 Args : [optional] new value to set
900
901
902 =head2 hit
903
904 Title : hit
905 Usage : my $hit = $hsp->hit
906 Function: Returns a SeqFeature representing the hit in the HSP
907 Returns : Bio::SeqFeature::Similarity
908 Args : [optional] new value to set
909
910
911 =head2 significance
912
913 Title : significance
914 Usage : $evalue = $obj->significance();
915 $obj->significance($evalue);
916 Function: Get/Set the significance value
917 Returns : numeric
918 Args : [optional] new value to set
919
920
921 =head2 score
922
923 Title : score
924 Usage : my $score = $hsp->score();
925 Function: Returns the score for this HSP or undef
926 Returns : numeric
927 Args : [optional] numeric to set value
928
929 =cut
930
931 # overriding
932
933 sub score {
934 my ($self,$value) = @_;
935 my $previous = $self->{'_score'};
936 if( defined $value ) {
937 $self->{'_score'} = $value;
938 }
939 return $previous;
940 }
941
942 =head2 bits
943
944 Title : bits
945 Usage : my $bits = $hsp->bits();
946 Function: Returns the bit value for this HSP or undef
947 Returns : numeric
948 Args : none
949
950 =cut
951
952 # overriding
953
954 sub bits {
955 my ($self,$value) = @_;
956 my $previous = $self->{'_bits'};
957 if( defined $value ) {
958 $self->{'_bits'} = $value;
959 }
960 return $previous;
961 }
962
963
964 =head2 strand
965
966 Title : strand
967 Usage : $hsp->strand('quer')
968 Function: Retrieves the strand for the HSP component requested
969 Returns : +1 or -1 (0 if unknown)
970 Args : 'hit' or 'subject' or 'sbjct' to retrieve the strand of the subject
971 'query' to retrieve the query strand (default)
972
973 =cut
974
975 =head1 Private methods
976
977 =cut
978
979 =head2 _calculate_seq_positions
980
981 Title : _calculate_seq_positions
982 Usage : $self->_calculate_seq_positions
983 Function: Internal function
984 Returns :
985 Args :
986
987
988 =cut
989
990 sub _calculate_seq_positions {
991 my ($self,@args) = @_;
992 return unless ( $self->{'_sequenceschanged'} );
993 $self->{'_sequenceschanged'} = 0;
994 my ($mchar, $schar, $qchar);
995 my ($seqString, $qseq,$sseq) = ( $self->homology_string(),
996 $self->query_string(),
997 $self->hit_string() );
998
999 # Using hashes to avoid saving duplicate residue numbers.
1000 my %identicalList_query = ();
1001 my %identicalList_sbjct = ();
1002 my %conservedList_query = ();
1003 my %conservedList_sbjct = ();
1004
1005 my %gapList_query = ();
1006 my %gapList_sbjct = ();
1007 my %nomatchList_query = ();
1008 my %nomatchList_sbjct = ();
1009
1010 my $qdir = $self->query->strand || 1;
1011 my $sdir = $self->hit->strand || 1;
1012 my $resCount_query = ($qdir >=0) ? $self->query->end : $self->query->start;
1013 my $resCount_sbjct = ($sdir >=0) ? $self->hit->end : $self->hit->start;
1014
1015 my $prog = $self->algorithm;
1016 if( $prog =~ /FAST/i ) {
1017 # fasta reports some extra 'regional' sequence information
1018 # we need to clear out first
1019 # this seemed a bit insane to me at first, but it appears to
1020 # work --jason
1021
1022 # we infer the end of the regional sequence where the first
1023 # non space is in the homology string
1024 # then we use the HSP->length to tell us how far to read
1025 # to cut off the end of the sequence
1026
1027 # one possible problem is the sequence which
1028
1029 my ($start) = (0);
1030 if( $seqString =~ /^(\s+)/ ) {
1031 $start = CORE::length($1);
1032 }
1033
1034 $seqString = substr($seqString, $start,$self->length('total'));
1035 $qseq = substr($qseq, $start,$self->length('total'));
1036 $sseq = substr($sseq, $start,$self->length('total'));
1037
1038 $qseq =~ s![\\\/]!!g;
1039 $sseq =~ s![\\\/]!!g;
1040 }
1041
1042 if($prog =~ /^(PSI)?T(BLAST|FAST)N/oi ) {
1043 $resCount_sbjct = int($resCount_sbjct / 3);
1044 } elsif($prog =~ /^(BLAST|FAST)(X|Y|XY)/oi ) {
1045 $resCount_query = int($resCount_query / 3);
1046 } elsif($prog =~ /^T(BLAST|FAST)(X|Y|XY)/oi ) {
1047 $resCount_query = int($resCount_query / 3);
1048 $resCount_sbjct = int($resCount_sbjct / 3);
1049 }
1050 while( $mchar = chop($seqString) ) {
1051 ($qchar, $schar) = (chop($qseq), chop($sseq));
1052 if( $mchar eq '+' || $mchar eq '.' || $mchar eq ':' ) {
1053 $conservedList_query{ $resCount_query } = 1;
1054 $conservedList_sbjct{ $resCount_sbjct } = 1;
1055 } elsif( $mchar ne ' ' ) {
1056 $identicalList_query{ $resCount_query } = 1;
1057 $identicalList_sbjct{ $resCount_sbjct } = 1;
1058 } elsif( $mchar eq ' ') {
1059 $nomatchList_query{ $resCount_query } = 1;
1060 $nomatchList_sbjct{ $resCount_sbjct } = 1;
1061 }
1062 if( $qchar eq $GAP_SYMBOL ) {
1063 $gapList_query{ $resCount_query } ++;
1064 } else {
1065 $resCount_query -= $qdir;
1066 }
1067 if( $schar eq $GAP_SYMBOL ) {
1068 $gapList_sbjct{ $resCount_query } ++;
1069 } else {
1070 $resCount_sbjct -=$sdir;
1071 }
1072 }
1073 $self->{'_identicalRes_query'} = \%identicalList_query;
1074 $self->{'_conservedRes_query'} = \%conservedList_query;
1075 $self->{'_nomatchRes_query'} = \%nomatchList_query;
1076 $self->{'_gapRes_query'} = \%gapList_query;
1077
1078 $self->{'_identicalRes_sbjct'} = \%identicalList_sbjct;
1079 $self->{'_conservedRes_sbjct'} = \%conservedList_sbjct;
1080 $self->{'_nomatchRes_sbjct'} = \%nomatchList_sbjct;
1081 $self->{'_gapRes_sbjct'} = \%gapList_sbjct;
1082 return 1;
1083 }
1084
1085 =head2 n
1086
1087 See documentation in L<Bio::Search::HSP::HSPI::n()|Bio::Search::HSP::HSPI>
1088
1089 =cut
1090
1091 #-----
1092 sub n {
1093 my $self = shift;
1094 if(@_) { $self->{'_n'} = shift; }
1095 defined $self->{'_n'} ? $self->{'_n'} : '';
1096 }
1097
1098 =head2 range
1099
1100 See documentation in L<Bio::Search::HSP::HSPI::range()|Bio::Search::HSP::HSPI>
1101
1102 =cut
1103
1104 #----------
1105 sub range {
1106 #----------
1107 my ($self, $seqType) = @_;
1108
1109 $seqType ||= 'query';
1110 $seqType = 'sbjct' if $seqType eq 'hit';
1111
1112 my ($start, $end);
1113 if( $seqType eq 'query' ) {
1114 $start = $self->query->start;
1115 $end = $self->query->end;
1116 }
1117 else {
1118 $start = $self->hit->start;
1119 $end = $self->hit->end;
1120 }
1121 return ($start, $end);
1122 }
1123
1124
1125 1;