annotate variant_effect_predictor/Bio/Search/HSP/GenericHSP.pm @ 1:d6778b5d8382 draft default tip

Deleted selected files
author willmclaren
date Fri, 03 Aug 2012 10:05:43 -0400
parents 21066c0abaf5
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1 # $Id: GenericHSP.pm,v 1.40.2.3 2003/03/24 20:44:45 jason Exp $
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
2 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
3 # BioPerl module for Bio::Search::HSP::GenericHSP
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
4 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
5 # Cared for by Jason Stajich <jason@bioperl.org>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
6 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
7 # Copyright Jason Stajich
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
8 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
9 # You may distribute this module under the same terms as perl itself
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
10
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
11 # POD documentation - main docs before the code
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
12
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
13 =head1 NAME
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
14
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
15 Bio::Search::HSP::GenericHSP - A "Generic" implementation of a High Scoring Pair
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
16
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
17 =head1 SYNOPSIS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
18
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
19 my $hsp = new Bio::Search::HSP::GenericHSP( -algorithm => 'blastp',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
20 -evalue => '1e-30',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
21 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
22
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
23 $r_type = $hsp->algorithm
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
24
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
25 $pvalue = $hsp->p();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
26
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
27 $evalue = $hsp->evalue();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
28
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
29 $frac_id = $hsp->frac_identical( ['query'|'hit'|'total'] );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
30
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
31 $frac_cons = $hsp->frac_conserved( ['query'|'hit'|'total'] );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
32
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
33 $gaps = $hsp->gaps( ['query'|'hit'|'total'] );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
34
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
35 $qseq = $hsp->query_string;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
36
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
37 $hseq = $hsp->hit_string;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
38
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
39 $homo_string = $hsp->homology_string;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
40
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
41 $len = $hsp->length( ['query'|'hit'|'total'] );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
42
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
43 $len = $hsp->length( ['query'|'hit'|'total'] );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
44
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
45 $rank = $hsp->rank;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
46
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
47
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
48 =head1 DESCRIPTION
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
49
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
50 This implementation is "Generic", meaning it is is suitable for
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
51 holding information about High Scoring pairs from most Search reports
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
52 such as BLAST and FastA. Specialized objects can be derived from
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
53 this.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
54
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
55 =head1 FEEDBACK
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
56
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
57 =head2 Mailing Lists
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
58
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
59 User feedback is an integral part of the evolution of this and other
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
60 Bioperl modules. Send your comments and suggestions preferably to
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
61 the Bioperl mailing list. Your participation is much appreciated.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
62
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
63 bioperl-l@bioperl.org - General discussion
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
64 http://bioperl.org/MailList.shtml - About the mailing lists
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
65
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
66 =head2 Reporting Bugs
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
67
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
68 Report bugs to the Bioperl bug tracking system to help us keep track
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
69 of the bugs and their resolution. Bug reports can be submitted via
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
70 email or the web:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
71
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
72 bioperl-bugs@bioperl.org
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
73 http://bugzilla.bioperl.org/
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
74
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
75 =head1 AUTHOR - Jason Stajich and Steve Chervitz
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
76
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
77 Email jason@bioperl.org
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
78 Email sac@bioperl.org
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
79
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
80 Describe contact details here
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
81
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
82 =head1 CONTRIBUTORS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
83
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
84 Additional contributors names and emails here
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
85
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
86 =head1 APPENDIX
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
87
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
88 The rest of the documentation details each of the object methods.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
89 Internal methods are usually preceded with a _
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
90
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
91 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
92
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
93
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
94 # Let the code begin...
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
95
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
96
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
97 package Bio::Search::HSP::GenericHSP;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
98 use vars qw(@ISA $GAP_SYMBOL);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
99 use strict;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
100
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
101 use Bio::Root::Root;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
102 use Bio::SeqFeature::Similarity;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
103 use Bio::Search::HSP::HSPI;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
104
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
105 @ISA = qw(Bio::Search::HSP::HSPI Bio::Root::Root );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
106
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
107 BEGIN {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
108 $GAP_SYMBOL = '-';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
109 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
110 =head2 new
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
111
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
112 Title : new
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
113 Usage : my $obj = new Bio::Search::HSP::GenericHSP();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
114 Function: Builds a new Bio::Search::HSP::GenericHSP object
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
115 Returns : Bio::Search::HSP::GenericHSP
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
116 Args : -algorithm => algorithm used (BLASTP, TBLASTX, FASTX, etc)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
117 -evalue => evalue
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
118 -pvalue => pvalue
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
119 -bits => bit value for HSP
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
120 -score => score value for HSP (typically z-score but depends on
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
121 analysis)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
122 -hsp_length=> Length of the HSP (including gaps)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
123 -identical => # of residues that that matched identically
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
124 -conserved => # of residues that matched conservatively
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
125 (only protein comparisions;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
126 conserved == identical in nucleotide comparisons)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
127 -hsp_gaps => # of gaps in the HSP
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
128 -query_gaps => # of gaps in the query in the alignment
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
129 -hit_gaps => # of gaps in the subject in the alignment
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
130 -query_name => HSP Query sequence name (if available)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
131 -query_start => HSP Query start (in original query sequence coords)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
132 -query_end => HSP Query end (in original query sequence coords)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
133 -hit_name => HSP Hit sequence name (if available)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
134 -hit_start => HSP Hit start (in original hit sequence coords)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
135 -hit_end => HSP Hit end (in original hit sequence coords)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
136 -hit_length => total length of the hit sequence
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
137 -query_length=> total length of the query sequence
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
138 -query_seq => query sequence portion of the HSP
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
139 -hit_seq => hit sequence portion of the HSP
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
140 -homology_seq=> homology sequence for the HSP
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
141 -hit_frame => hit frame (only if hit is translated protein)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
142 -query_frame => query frame (only if query is translated protein)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
143 -rank => HSP rank
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
144
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
145
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
146 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
147
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
148 sub new {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
149 my($class,@args) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
150
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
151 my $self = $class->SUPER::new(@args);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
152 my ($algo, $evalue, $pvalue, $identical, $conserved,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
153 $gaps, $query_gaps, $hit_gaps,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
154 $hit_seq, $query_seq, $homology_seq,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
155 $hsp_len, $query_len,$hit_len,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
156 $hit_name,$query_name,$bits,$score,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
157 $hs,$he,$qs,$qe,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
158 $qframe,$hframe,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
159 $rank) = $self->_rearrange([qw(ALGORITHM
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
160 EVALUE
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
161 PVALUE
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
162 IDENTICAL
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
163 CONSERVED
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
164 HSP_GAPS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
165 QUERY_GAPS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
166 HIT_GAPS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
167 HIT_SEQ
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
168 QUERY_SEQ
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
169 HOMOLOGY_SEQ
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
170 HSP_LENGTH
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
171 QUERY_LENGTH
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
172 HIT_LENGTH
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
173 HIT_NAME
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
174 QUERY_NAME
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
175 BITS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
176 SCORE
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
177 HIT_START
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
178 HIT_END
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
179 QUERY_START
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
180 QUERY_END
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
181 QUERY_FRAME
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
182 HIT_FRAME
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
183 RANK
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
184 )], @args);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
185
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
186 $algo = 'GENERIC' unless defined $algo;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
187 $self->algorithm($algo);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
188
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
189 # defined $evalue && $self->evalue($evalue)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
190 # $hsp->significance is initialized by the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
191 # the SimilarityPair object - let's only keep one
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
192 # value, don't need 2 slots.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
193
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
194 defined $pvalue && $self->pvalue($pvalue);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
195 defined $bits && $self->bits($bits);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
196 defined $score && $self->score($score);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
197 my ($queryfactor, $hitfactor) = (0,0);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
198
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
199 if( $algo =~ /^(PSI)?T(BLAST|FAST)[NY]/oi ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
200 $hitfactor = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
201 } elsif ($algo =~ /^(FAST|BLAST)(X|Y|XY)/oi ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
202 $queryfactor = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
203 } elsif ($algo =~ /^T(BLAST|FAST)(X|Y|XY)/oi ||
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
204 $algo =~ /^(BLAST|FAST)N/oi ||
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
205 $algo eq 'WABA' ||
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
206 $algo eq 'EXONERATE' || $algo eq 'MEGABLAST' ||
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
207 $algo eq 'SMITH-WATERMAN' ){
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
208 $hitfactor = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
209 $queryfactor = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
210 } elsif( $algo eq 'RPSBLAST' ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
211 $queryfactor = $hitfactor = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
212 $qframe = $hframe = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
213 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
214 # Store the aligned query as sequence feature
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
215 my $strand;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
216 unless( defined $qe && defined $qs ) { $self->throw("Did not specify a Query End or Query Begin @args ($qs,$qe)"); }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
217 unless( defined $he && defined $hs ) { $self->throw("Did not specify a Hit End or Hit Begin"); }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
218 if ($qe > $qs) { # normal query: start < end
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
219 if ($queryfactor) { $strand = 1; } else { $strand = undef; }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
220 } else { # reverse query (i dont know if this is possible,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
221 # but feel free to correct)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
222 if ($queryfactor) { $strand = -1; } else { $strand = undef; }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
223 ($qs,$qe) = ($qe,$qs);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
224
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
225 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
226 $self->query( new Bio::SeqFeature::Similarity
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
227 ('-primary' => $self->primary_tag,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
228 '-start' => $qs,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
229 '-expect' => $evalue,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
230 '-bits' => $bits,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
231 '-end' => $qe,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
232 '-strand' => $strand,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
233 '-seq_id' => $query_name,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
234 '-seqlength'=> $query_len,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
235 '-source' => $algo,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
236 ) );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
237
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
238 # to determine frame from something like FASTXY which doesn't
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
239 # report the frame
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
240 if( defined $strand && ! defined $qframe && $queryfactor ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
241 $qframe = ( $self->query->start % 3 ) * $strand;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
242 } elsif( ! defined $strand ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
243 $qframe = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
244 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
245 # store the aligned subject as sequence feature
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
246 if ($he > $hs) { # normal subject
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
247 if ($hitfactor) { $strand = 1; } else { $strand = undef; }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
248 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
249 if ($hitfactor) { $strand = -1; } else { $strand = undef; }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
250 ($hs,$he) = ( $he,$hs); # reverse subject: start bigger than end
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
251 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
252
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
253 $self->hit( Bio::SeqFeature::Similarity->new
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
254 ('-start' => $hs,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
255 '-end' => $he,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
256 '-strand' => $strand,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
257 '-expect' => $evalue,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
258 '-bits' => $bits,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
259 '-source' => $algo,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
260 '-seq_id' => $hit_name,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
261 '-seqlength' => $hit_len,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
262 '-primary' => $self->primary_tag ));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
263
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
264 if( defined $strand && ! defined $hframe && $hitfactor ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
265 $hframe = ( $hs % 3 ) * $strand;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
266 } elsif( ! defined $strand ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
267 $hframe = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
268 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
269
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
270 $self->frame($qframe,$hframe);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
271
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
272 if( ! defined $query_len || ! defined $hit_len ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
273 $self->throw("Must defined hit and query length");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
274 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
275
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
276 if( ! defined $identical ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
277 $self->warn("Did not defined the number of identical matches in the HSP assuming 0");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
278 $identical = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
279 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
280 if( ! defined $conserved ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
281 $self->warn("Did not defined the number of conserved matches in the HSP assuming conserved == identical ($identical)")
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
282 if( $algo !~ /^((FAST|BLAST)N)|Exonerate/oi);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
283 $conserved = $identical;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
284 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
285 # protect for divide by zero if user does not specify
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
286 # hsp_len, query_len, or hit_len
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
287
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
288 $self->num_identical($identical);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
289 $self->num_conserved($conserved);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
290
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
291 if( $hsp_len ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
292 $self->length('total', $hsp_len);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
293 $self->frac_identical( 'total', $identical / $self->length('total'));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
294 $self->frac_conserved( 'total', $conserved / $self->length('total'));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
295 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
296 if( $hit_len ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
297 # $self->length('hit', $self->hit->length);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
298 $self->frac_identical( 'hit', $identical / $self->length('hit'));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
299 $self->frac_conserved( 'hit', $conserved / $self->length('hit'));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
300 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
301 if( $query_len ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
302 # $self->length('query', $self->query->length);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
303 $self->frac_identical( 'query', $identical / $self->length('query')) ;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
304 $self->frac_conserved( 'query', $conserved / $self->length('query'));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
305 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
306 $self->query_string($query_seq);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
307 $self->hit_string($hit_seq);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
308 $self->homology_string($homology_seq);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
309
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
310 if( defined $query_gaps ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
311 $self->gaps('query', $query_gaps);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
312 } elsif( defined $query_seq ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
313 $self->gaps('query', scalar ( $query_seq =~ tr/\-//));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
314 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
315 if( defined $hit_gaps ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
316 $self->gaps('hit', $hit_gaps);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
317 } elsif( defined $hit_seq ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
318 $self->gaps('hit', scalar ( $hit_seq =~ tr/\-//));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
319 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
320 if( ! defined $gaps ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
321 $gaps = $self->gaps("query") + $self->gaps("hit");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
322 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
323 $self->gaps('total', $gaps);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
324 $self->percent_identity($identical / $hsp_len ) if( $hsp_len > 0 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
325
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
326 $rank && $self->rank($rank);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
327 return $self;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
328 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
329
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
330
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
331
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
332 =head2 Bio::Search::HSP::HSPI methods
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
333
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
334 Implementation of Bio::Search::HSP::HSPI methods follow
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
335
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
336 =head2 algorithm
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
337
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
338 Title : algorithm
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
339 Usage : my $r_type = $hsp->algorithm
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
340 Function: Obtain the name of the algorithm used to obtain the HSP
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
341 Returns : string (e.g., BLASTP)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
342 Args : [optional] scalar string to set value
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
343
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
344 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
345
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
346 sub algorithm{
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
347 my ($self,$value) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
348 my $previous = $self->{'_algorithm'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
349 if( defined $value || ! defined $previous ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
350 $value = $previous = '' unless defined $value;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
351 $self->{'_algorithm'} = $value;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
352 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
353
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
354 return $previous;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
355 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
356
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
357 =head2 pvalue
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
358
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
359 Title : pvalue
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
360 Usage : my $pvalue = $hsp->pvalue();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
361 Function: Returns the P-value for this HSP or undef
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
362 Returns : float or exponential (2e-10)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
363 P-value is not defined with NCBI Blast2 reports.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
364 Args : [optional] numeric to set value
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
365
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
366 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
367
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
368 sub pvalue {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
369 my ($self,$value) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
370 my $previous = $self->{'_pvalue'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
371 if( defined $value ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
372 $self->{'_pvalue'} = $value;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
373 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
374 return $previous;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
375 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
376
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
377 =head2 evalue
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
378
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
379 Title : evalue
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
380 Usage : my $evalue = $hsp->evalue();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
381 Function: Returns the e-value for this HSP
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
382 Returns : float or exponential (2e-10)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
383 Args : [optional] numeric to set value
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
384
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
385 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
386
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
387 sub evalue { shift->significance(@_) }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
388
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
389 =head2 frac_identical
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
390
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
391 Title : frac_identical
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
392 Usage : my $frac_id = $hsp->frac_identical( ['query'|'hit'|'total'] );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
393 Function: Returns the fraction of identitical positions for this HSP
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
394 Returns : Float in range 0.0 -> 1.0
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
395 Args : arg 1: 'query' = num identical / length of query seq (without gaps)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
396 'hit' = num identical / length of hit seq (without gaps)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
397 'total' = num identical / length of alignment (with gaps)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
398 default = 'total'
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
399 arg 2: [optional] frac identical value to set for the type requested
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
400
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
401 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
402
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
403 sub frac_identical {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
404 my ($self, $type,$value) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
405
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
406 $type = lc $type if defined $type;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
407 $type = 'total' if( ! defined $type ||
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
408 $type !~ /query|hit|total/);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
409 my $previous = $self->{'_frac_identical'}->{$type};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
410 if( defined $value || ! defined $previous ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
411 $value = $previous = '' unless defined $value;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
412 if( $type eq 'hit' || $type eq 'query' ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
413 $self->$type()->frac_identical( $value);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
414 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
415 $self->{'_frac_identical'}->{$type} = $value;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
416 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
417 return $previous;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
418
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
419 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
420
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
421 =head2 frac_conserved
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
422
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
423 Title : frac_conserved
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
424 Usage : my $frac_cons = $hsp->frac_conserved( ['query'|'hit'|'total'] );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
425 Function : Returns the fraction of conserved positions for this HSP.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
426 This is the fraction of symbols in the alignment with a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
427 positive score.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
428 Returns : Float in range 0.0 -> 1.0
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
429 Args : arg 1: 'query' = num conserved / length of query seq (without gaps)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
430 'hit' = num conserved / length of hit seq (without gaps)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
431 'total' = num conserved / length of alignment (with gaps)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
432 default = 'total'
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
433 arg 2: [optional] frac conserved value to set for the type requested
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
434
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
435 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
436
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
437 sub frac_conserved {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
438 my ($self, $type,$value) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
439 $type = lc $type if defined $type;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
440 $type = 'total' if( ! defined $type ||
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
441 $type !~ /query|hit|total/);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
442 my $previous = $self->{'_frac_conserved'}->{$type};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
443 if( defined $value || ! defined $previous ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
444 $value = $previous = '' unless defined $value;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
445 $self->{'_frac_conserved'}->{$type} = $value;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
446 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
447 return $previous;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
448 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
449
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
450 =head2 gaps
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
451
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
452 Title : gaps
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
453 Usage : my $gaps = $hsp->gaps( ['query'|'hit'|'total'] );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
454 Function : Get the number of gaps in the query, hit, or total alignment.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
455 Returns : Integer, number of gaps or 0 if none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
456 Args : arg 1: 'query' = num gaps in query seq
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
457 'hit' = num gaps in hit seq
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
458 'total' = num gaps in whole alignment
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
459 default = 'total'
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
460 arg 2: [optional] integer gap value to set for the type requested
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
461
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
462 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
463
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
464 sub gaps {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
465 my ($self, $type,$value) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
466 $type = lc $type if defined $type;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
467 $type = 'total' if( ! defined $type ||
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
468 $type !~ /query|hit|subject|sbjct|total/);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
469 $type = 'hit' if $type =~ /sbjct|subject/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
470 my $previous = $self->{'_gaps'}->{$type};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
471 if( defined $value || ! defined $previous ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
472 $value = $previous = '' unless defined $value;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
473 $self->{'_gaps'}->{$type} = $value;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
474 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
475 return $previous || 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
476 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
477
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
478 =head2 query_string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
479
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
480 Title : query_string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
481 Usage : my $qseq = $hsp->query_string;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
482 Function: Retrieves the query sequence of this HSP as a string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
483 Returns : string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
484 Args : [optional] string to set for query sequence
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
485
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
486
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
487 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
488
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
489 sub query_string{
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
490 my ($self,$value) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
491 my $previous = $self->{'_query_string'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
492 if( defined $value || ! defined $previous ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
493 $value = $previous = '' unless defined $value;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
494 $self->{'_query_string'} = $value;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
495 # do some housekeeping so we know when to
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
496 # re-run _calculate_seq_positions
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
497 $self->{'_sequenceschanged'} = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
498 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
499 return $previous;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
500 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
501
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
502 =head2 hit_string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
503
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
504 Title : hit_string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
505 Usage : my $hseq = $hsp->hit_string;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
506 Function: Retrieves the hit sequence of this HSP as a string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
507 Returns : string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
508 Args : [optional] string to set for hit sequence
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
509
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
510
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
511 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
512
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
513 sub hit_string{
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
514 my ($self,$value) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
515 my $previous = $self->{'_hit_string'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
516 if( defined $value || ! defined $previous ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
517 $value = $previous = '' unless defined $value;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
518 $self->{'_hit_string'} = $value;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
519 # do some housekeeping so we know when to
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
520 # re-run _calculate_seq_positions
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
521 $self->{'_sequenceschanged'} = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
522 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
523 return $previous;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
524 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
525
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
526 =head2 homology_string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
527
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
528 Title : homology_string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
529 Usage : my $homo_string = $hsp->homology_string;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
530 Function: Retrieves the homology sequence for this HSP as a string.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
531 : The homology sequence is the string of symbols in between the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
532 : query and hit sequences in the alignment indicating the degree
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
533 : of conservation (e.g., identical, similar, not similar).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
534 Returns : string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
535 Args : [optional] string to set for homology sequence
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
536
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
537 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
538
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
539 sub homology_string{
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
540 my ($self,$value) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
541 my $previous = $self->{'_homology_string'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
542 if( defined $value || ! defined $previous ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
543 $value = $previous = '' unless defined $value;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
544 $self->{'_homology_string'} = $value;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
545 # do some housekeeping so we know when to
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
546 # re-run _calculate_seq_positions
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
547 $self->{'_sequenceschanged'} = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
548 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
549 return $previous;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
550 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
551
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
552 =head2 length
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
553
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
554 Title : length
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
555 Usage : my $len = $hsp->length( ['query'|'hit'|'total'] );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
556 Function : Returns the length of the query or hit in the alignment
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
557 (without gaps)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
558 or the aggregate length of the HSP (including gaps;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
559 this may be greater than either hit or query )
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
560 Returns : integer
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
561 Args : arg 1: 'query' = length of query seq (without gaps)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
562 'hit' = length of hit seq (without gaps)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
563 'total' = length of alignment (with gaps)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
564 default = 'total'
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
565 arg 2: [optional] integer length value to set for specific type
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
566
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
567 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
568
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
569 sub length {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
570
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
571 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
572 my $type = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
573
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
574 $type = 'total' unless defined $type;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
575 $type = lc $type;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
576
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
577 if( $type =~ /^q/i ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
578 return $self->query()->length(shift);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
579 } elsif( $type =~ /^(hit|subject|sbjct)/ ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
580 return $self->hit()->length(shift);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
581 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
582 my $v = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
583 if( defined $v ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
584 $self->{'_hsplength'} = $v;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
585 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
586 return $self->{'_hsplength'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
587 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
588 return 0; # should never get here
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
589 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
590
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
591 =head2 hsp_length
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
592
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
593 Title : hsp_length
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
594 Usage : my $len = $hsp->hsp_length()
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
595 Function: shortcut length('hsp')
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
596 Returns : floating point between 0 and 100
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
597 Args : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
598
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
599 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
600
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
601 sub hsp_length { return shift->length('hsp', shift); }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
602
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
603 =head2 percent_identity
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
604
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
605 Title : percent_identity
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
606 Usage : my $percentid = $hsp->percent_identity()
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
607 Function: Returns the calculated percent identity for an HSP
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
608 Returns : floating point between 0 and 100
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
609 Args : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
610
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
611
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
612 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
613
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
614
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
615 =head2 frame
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
616
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
617 Title : frame
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
618 Usage : $hsp->frame($queryframe,$subjectframe)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
619 Function: Set the Frame for both query and subject and insure that
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
620 they agree.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
621 This overrides the frame() method implementation in
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
622 FeaturePair.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
623 Returns : array of query and subjects if return type wants an array
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
624 or query frame if defined or subject frame
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
625 Args : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
626 Note : Frames are stored in the GFF way (0-2) not 1-3
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
627 as they are in BLAST (negative frames are deduced by checking
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
628 the strand of the query or hit)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
629
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
630 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
631
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
632 sub frame {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
633 my ($self, $qframe, $sframe) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
634 if( defined $qframe ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
635 if( $qframe == 0 ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
636 $qframe = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
637 } elsif( $qframe !~ /^([+-])?([1-3])/ ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
638 $self->warn("Specifying an invalid query frame ($qframe)");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
639 $qframe = undef;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
640 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
641 my $dir = $1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
642 $dir = '+' unless defined $dir;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
643 if( ($dir eq '-' && $self->query->strand >= 0) ||
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
644 ($dir eq '+' && $self->query->strand <= 0) ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
645 $self->warn("Query frame ($qframe) did not match strand of query (". $self->query->strand() . ")");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
646 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
647 # Set frame to GFF [0-2] -
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
648 # what if someone tries to put in a GFF frame!
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
649 $qframe = $2 - 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
650 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
651 $self->query->frame($qframe);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
652 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
653 if( defined $sframe ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
654 if( $sframe == 0 ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
655 $sframe = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
656 } elsif( $sframe !~ /^([+-])?([1-3])/ ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
657 $self->warn("Specifying an invalid subject frame ($sframe)");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
658 $sframe = undef;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
659 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
660 my $dir = $1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
661 $dir = '+' unless defined $dir;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
662 if( ($dir eq '-' && $self->hit->strand >= 0) ||
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
663 ($dir eq '+' && $self->hit->strand <= 0) )
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
664 {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
665 $self->warn("Subject frame ($sframe) did not match strand of subject (". $self->hit->strand() . ")");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
666 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
667
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
668 # Set frame to GFF [0-2]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
669 $sframe = $2 - 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
670 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
671 $self->hit->frame($sframe);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
672 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
673 if (wantarray() && $self->algorithm =~ /^T(BLAST|FAST)(X|Y|XY)/oi)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
674 {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
675 return ($self->query->frame(), $self->hit->frame());
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
676 } elsif (wantarray()) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
677 ($self->query->frame() &&
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
678 return ($self->query->frame(), undef)) ||
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
679 ($self->hit->frame() &&
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
680 return (undef, $self->hit->frame()));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
681 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
682 ($self->query->frame() &&
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
683 return $self->query->frame()) ||
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
684 ($self->hit->frame() &&
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
685 return $self->hit->frame());
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
686 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
687 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
688
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
689
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
690 =head2 get_aln
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
691
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
692 Title : get_aln
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
693 Usage : my $aln = $hsp->gel_aln
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
694 Function: Returns a Bio::SimpleAlign representing the HSP alignment
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
695 Returns : Bio::SimpleAlign
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
696 Args : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
697
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
698 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
699
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
700 sub get_aln {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
701 my ($self) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
702 require Bio::LocatableSeq;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
703 require Bio::SimpleAlign;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
704 my $aln = new Bio::SimpleAlign;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
705 my $hs = $self->hit_string();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
706 my $qs = $self->query_string();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
707 # FASTA specific stuff moved to the FastaHSP object
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
708 my $seqonly = $qs;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
709 $seqonly =~ s/[\-\s]//g;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
710 my ($q_nm,$s_nm) = ($self->query->seq_id(),
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
711 $self->hit->seq_id());
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
712 unless( defined $q_nm && CORE::length ($q_nm) ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
713 $q_nm = 'query';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
714 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
715 unless( defined $s_nm && CORE::length ($s_nm) ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
716 $s_nm = 'hit';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
717 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
718 my $query = new Bio::LocatableSeq('-seq' => $qs,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
719 '-id' => $q_nm,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
720 '-start' => 1,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
721 '-end' => CORE::length($seqonly),
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
722 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
723 $seqonly = $hs;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
724 $seqonly =~ s/[\-\s]//g;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
725 my $hit = new Bio::LocatableSeq('-seq' => $hs,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
726 '-id' => $s_nm,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
727 '-start' => 1,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
728 '-end' => CORE::length($seqonly),
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
729 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
730 $aln->add_seq($query);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
731 $aln->add_seq($hit);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
732 return $aln;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
733 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
734
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
735 =head2 num_conserved
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
736
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
737 Title : num_conserved
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
738 Usage : $obj->num_conserved($newval)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
739 Function: returns the number of conserved residues in the alignment
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
740 Returns : inetger
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
741 Args : integer (optional)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
742
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
743
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
744 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
745
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
746 sub num_conserved{
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
747 my ($self,$value) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
748 if( defined $value) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
749 $self->{'num_conserved'} = $value;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
750 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
751 return $self->{'num_conserved'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
752 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
753
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
754 =head2 num_identical
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
755
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
756 Title : num_identical
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
757 Usage : $obj->num_identical($newval)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
758 Function: returns the number of identical residues in the alignment
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
759 Returns : integer
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
760 Args : integer (optional)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
761
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
762
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
763 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
764
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
765 sub num_identical{
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
766 my ($self,$value) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
767 if( defined $value) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
768 $self->{'_num_identical'} = $value;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
769 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
770 return $self->{'_num_identical'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
771 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
772
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
773 =head2 rank
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
774
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
775 Usage : $hsp->rank( [string] );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
776 Purpose : Get the rank of the HSP within a given Blast hit.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
777 Example : $rank = $hsp->rank;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
778 Returns : Integer (1..n) corresponding to the order in which the HSP
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
779 appears in the BLAST report.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
780
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
781 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
782
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
783 sub rank {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
784 my ($self,$value) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
785 if( defined $value) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
786 $self->{'_rank'} = $value;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
787 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
788 return $self->{'_rank'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
789 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
790
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
791
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
792 =head2 seq_inds
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
793
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
794 Title : seq_inds
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
795 Purpose : Get a list of residue positions (indices) for all identical
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
796 : or conserved residues in the query or sbjct sequence.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
797 Example : @s_ind = $hsp->seq_inds('query', 'identical');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
798 : @h_ind = $hsp->seq_inds('hit', 'conserved');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
799 @h_ind = $hsp->seq_inds('hit', 'conserved-not-identical');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
800 : @h_ind = $hsp->seq_inds('hit', 'conserved', 1);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
801 Returns : List of integers
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
802 : May include ranges if collapse is true.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
803 Argument : seq_type = 'query' or 'hit' or 'sbjct' (default = query)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
804 : ('sbjct' is synonymous with 'hit')
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
805 : class = 'identical' or 'conserved' or 'nomatch' or 'gap'
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
806 : (default = identical)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
807 : (can be shortened to 'id' or 'cons')
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
808 : or 'conserved-not-identical'
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
809 : collapse = boolean, if true, consecutive positions are merged
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
810 : using a range notation, e.g., "1 2 3 4 5 7 9 10 11"
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
811 : collapses to "1-5 7 9-11". This is useful for
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
812 : consolidating long lists. Default = no collapse.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
813 Throws : n/a.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
814 Comments :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
815
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
816 See Also : L<Bio::Search::SearchUtils::collapse_nums()|Bio::Search::SearchUtils>,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
817 L<Bio::Search::Hit::HitI::seq_inds()|Bio::Search::Hit::HitI>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
818
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
819 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
820
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
821 sub seq_inds{
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
822 my ($self, $seqType, $class, $collapse) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
823
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
824 # prepare the internal structures - this is cached so
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
825 # if the strings have not changed we're okay
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
826 $self->_calculate_seq_positions();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
827
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
828 $seqType ||= 'query';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
829 $class ||= 'identical';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
830 $collapse ||= 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
831 $seqType = 'sbjct' if $seqType eq 'hit';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
832 my $t = lc(substr($seqType,0,1));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
833 if( $t eq 'q' ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
834 $seqType = 'query';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
835 } elsif ( $t eq 's' || $t eq 'h' ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
836 $seqType = 'sbjct';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
837 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
838 $self->warn("unknown seqtype $seqType using 'query'");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
839 $seqType = 'query';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
840 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
841 $t = lc(substr($class,0,1));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
842
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
843 if( $t eq 'c' ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
844 if( $class =~ /conserved\-not\-identical/ ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
845 $class = 'conserved';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
846 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
847 $class = 'conservedall';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
848 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
849 } elsif( $t eq 'i' ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
850 $class = 'identical';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
851 } elsif( $t eq 'n' ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
852 $class = 'nomatch';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
853 } elsif( $t eq 'g' ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
854 $class = 'gap';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
855 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
856 $self->warn("unknown sequence class $class using 'identical'");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
857 $class = 'identical';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
858 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
859
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
860 ## Sensitive to member name changes.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
861 $seqType = "_\L$seqType\E";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
862 $class = "_\L$class\E";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
863 my @ary;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
864
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
865 if( $class eq '_gap' ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
866 # this means that we are remapping the gap length that is stored
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
867 # in the hash (for example $self->{'_gapRes_query'} )
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
868 # so we'll return an array which has the values of the position of the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
869 # of the gap (the key in the hash) + the gap length (value in the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
870 # hash for this key - 1.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
871
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
872 @ary = map { $_ > 1 ?
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
873 $_..($_ + $self->{"${class}Res$seqType"}->{$_} - 1) :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
874 $_ }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
875 sort { $a <=> $b } keys %{ $self->{"${class}Res$seqType"}};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
876 } elsif( $class eq '_conservedall' ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
877 @ary = sort { $a <=> $b }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
878 keys %{ $self->{"_conservedRes$seqType"}},
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
879 keys %{ $self->{"_identicalRes$seqType"}},
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
880 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
881 @ary = sort { $a <=> $b } keys %{ $self->{"${class}Res$seqType"}};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
882 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
883 require Bio::Search::BlastUtils if $collapse;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
884
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
885 return $collapse ? &Bio::Search::SearchUtils::collapse_nums(@ary) : @ary;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
886 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
887
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
888
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
889 =head2 Inherited from Bio::SeqFeature::SimilarityPair
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
890
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
891 These methods come from Bio::SeqFeature::SimilarityPair
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
892
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
893 =head2 query
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
894
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
895 Title : query
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
896 Usage : my $query = $hsp->query
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
897 Function: Returns a SeqFeature representing the query in the HSP
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
898 Returns : Bio::SeqFeature::Similarity
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
899 Args : [optional] new value to set
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
900
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
901
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
902 =head2 hit
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
903
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
904 Title : hit
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
905 Usage : my $hit = $hsp->hit
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
906 Function: Returns a SeqFeature representing the hit in the HSP
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
907 Returns : Bio::SeqFeature::Similarity
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
908 Args : [optional] new value to set
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
909
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
910
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
911 =head2 significance
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
912
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
913 Title : significance
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
914 Usage : $evalue = $obj->significance();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
915 $obj->significance($evalue);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
916 Function: Get/Set the significance value
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
917 Returns : numeric
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
918 Args : [optional] new value to set
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
919
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
920
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
921 =head2 score
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
922
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
923 Title : score
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
924 Usage : my $score = $hsp->score();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
925 Function: Returns the score for this HSP or undef
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
926 Returns : numeric
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
927 Args : [optional] numeric to set value
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
928
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
929 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
930
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
931 # overriding
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
932
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
933 sub score {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
934 my ($self,$value) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
935 my $previous = $self->{'_score'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
936 if( defined $value ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
937 $self->{'_score'} = $value;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
938 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
939 return $previous;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
940 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
941
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
942 =head2 bits
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
943
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
944 Title : bits
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
945 Usage : my $bits = $hsp->bits();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
946 Function: Returns the bit value for this HSP or undef
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
947 Returns : numeric
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
948 Args : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
949
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
950 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
951
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
952 # overriding
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
953
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
954 sub bits {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
955 my ($self,$value) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
956 my $previous = $self->{'_bits'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
957 if( defined $value ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
958 $self->{'_bits'} = $value;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
959 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
960 return $previous;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
961 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
962
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
963
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
964 =head2 strand
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
965
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
966 Title : strand
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
967 Usage : $hsp->strand('quer')
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
968 Function: Retrieves the strand for the HSP component requested
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
969 Returns : +1 or -1 (0 if unknown)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
970 Args : 'hit' or 'subject' or 'sbjct' to retrieve the strand of the subject
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
971 'query' to retrieve the query strand (default)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
972
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
973 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
974
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
975 =head1 Private methods
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
976
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
977 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
978
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
979 =head2 _calculate_seq_positions
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
980
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
981 Title : _calculate_seq_positions
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
982 Usage : $self->_calculate_seq_positions
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
983 Function: Internal function
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
984 Returns :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
985 Args :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
986
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
987
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
988 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
989
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
990 sub _calculate_seq_positions {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
991 my ($self,@args) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
992 return unless ( $self->{'_sequenceschanged'} );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
993 $self->{'_sequenceschanged'} = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
994 my ($mchar, $schar, $qchar);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
995 my ($seqString, $qseq,$sseq) = ( $self->homology_string(),
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
996 $self->query_string(),
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
997 $self->hit_string() );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
998
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
999 # Using hashes to avoid saving duplicate residue numbers.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1000 my %identicalList_query = ();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1001 my %identicalList_sbjct = ();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1002 my %conservedList_query = ();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1003 my %conservedList_sbjct = ();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1004
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1005 my %gapList_query = ();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1006 my %gapList_sbjct = ();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1007 my %nomatchList_query = ();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1008 my %nomatchList_sbjct = ();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1009
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1010 my $qdir = $self->query->strand || 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1011 my $sdir = $self->hit->strand || 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1012 my $resCount_query = ($qdir >=0) ? $self->query->end : $self->query->start;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1013 my $resCount_sbjct = ($sdir >=0) ? $self->hit->end : $self->hit->start;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1014
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1015 my $prog = $self->algorithm;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1016 if( $prog =~ /FAST/i ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1017 # fasta reports some extra 'regional' sequence information
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1018 # we need to clear out first
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1019 # this seemed a bit insane to me at first, but it appears to
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1020 # work --jason
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1021
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1022 # we infer the end of the regional sequence where the first
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1023 # non space is in the homology string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1024 # then we use the HSP->length to tell us how far to read
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1025 # to cut off the end of the sequence
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1026
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1027 # one possible problem is the sequence which
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1028
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1029 my ($start) = (0);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1030 if( $seqString =~ /^(\s+)/ ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1031 $start = CORE::length($1);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1032 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1033
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1034 $seqString = substr($seqString, $start,$self->length('total'));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1035 $qseq = substr($qseq, $start,$self->length('total'));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1036 $sseq = substr($sseq, $start,$self->length('total'));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1037
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1038 $qseq =~ s![\\\/]!!g;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1039 $sseq =~ s![\\\/]!!g;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1040 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1041
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1042 if($prog =~ /^(PSI)?T(BLAST|FAST)N/oi ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1043 $resCount_sbjct = int($resCount_sbjct / 3);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1044 } elsif($prog =~ /^(BLAST|FAST)(X|Y|XY)/oi ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1045 $resCount_query = int($resCount_query / 3);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1046 } elsif($prog =~ /^T(BLAST|FAST)(X|Y|XY)/oi ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1047 $resCount_query = int($resCount_query / 3);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1048 $resCount_sbjct = int($resCount_sbjct / 3);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1049 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1050 while( $mchar = chop($seqString) ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1051 ($qchar, $schar) = (chop($qseq), chop($sseq));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1052 if( $mchar eq '+' || $mchar eq '.' || $mchar eq ':' ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1053 $conservedList_query{ $resCount_query } = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1054 $conservedList_sbjct{ $resCount_sbjct } = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1055 } elsif( $mchar ne ' ' ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1056 $identicalList_query{ $resCount_query } = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1057 $identicalList_sbjct{ $resCount_sbjct } = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1058 } elsif( $mchar eq ' ') {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1059 $nomatchList_query{ $resCount_query } = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1060 $nomatchList_sbjct{ $resCount_sbjct } = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1061 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1062 if( $qchar eq $GAP_SYMBOL ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1063 $gapList_query{ $resCount_query } ++;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1064 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1065 $resCount_query -= $qdir;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1066 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1067 if( $schar eq $GAP_SYMBOL ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1068 $gapList_sbjct{ $resCount_query } ++;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1069 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1070 $resCount_sbjct -=$sdir;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1071 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1072 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1073 $self->{'_identicalRes_query'} = \%identicalList_query;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1074 $self->{'_conservedRes_query'} = \%conservedList_query;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1075 $self->{'_nomatchRes_query'} = \%nomatchList_query;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1076 $self->{'_gapRes_query'} = \%gapList_query;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1077
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1078 $self->{'_identicalRes_sbjct'} = \%identicalList_sbjct;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1079 $self->{'_conservedRes_sbjct'} = \%conservedList_sbjct;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1080 $self->{'_nomatchRes_sbjct'} = \%nomatchList_sbjct;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1081 $self->{'_gapRes_sbjct'} = \%gapList_sbjct;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1082 return 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1083 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1084
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1085 =head2 n
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1086
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1087 See documentation in L<Bio::Search::HSP::HSPI::n()|Bio::Search::HSP::HSPI>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1088
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1089 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1090
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1091 #-----
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1092 sub n {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1093 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1094 if(@_) { $self->{'_n'} = shift; }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1095 defined $self->{'_n'} ? $self->{'_n'} : '';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1096 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1097
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1098 =head2 range
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1099
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1100 See documentation in L<Bio::Search::HSP::HSPI::range()|Bio::Search::HSP::HSPI>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1101
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1102 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1103
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1104 #----------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1105 sub range {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1106 #----------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1107 my ($self, $seqType) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1108
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1109 $seqType ||= 'query';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1110 $seqType = 'sbjct' if $seqType eq 'hit';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1111
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1112 my ($start, $end);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1113 if( $seqType eq 'query' ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1114 $start = $self->query->start;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1115 $end = $self->query->end;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1116 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1117 else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1118 $start = $self->hit->start;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1119 $end = $self->hit->end;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1120 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1121 return ($start, $end);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1122 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1123
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1124
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1125 1;