0
|
1 #-----------------------------------------------------------------------------
|
|
2 # PACKAGE : Bio::Tools::Sigcleave
|
|
3 # AUTHOR : Chris Dagdigian, dag@sonsorol.org
|
|
4 # CREATED : Jan 28 1999
|
|
5 # REVISION: $Id: Sigcleave.pm,v 1.17 2002/10/22 07:45:22 lapp Exp $
|
|
6 #
|
|
7 # Copyright (c) 1997-9 bioperl, Chris Dagdigian and others. All Rights Reserved.
|
|
8 # This module is free software; you can redistribute it and/or
|
|
9 # modify it under the same terms as Perl itself.
|
|
10 #
|
|
11 # _History_
|
|
12 #
|
|
13 # Object framework ripped from Steve Chervits's SeqPattern.pm
|
|
14 #
|
|
15 # Core EGCG Sigcleave emulation from perl code developed by
|
|
16 # Danh Nguyen & Kamalakar Gulukota which itself was based
|
|
17 # loosely on Colgrove's signal.c program.
|
|
18 #
|
|
19 # The overall idea is to replicate the output of the sigcleave
|
|
20 # program which was distributed with the EGCG extension to the GCG sequence
|
|
21 # analysis package. There is also an accessor method for just getting at
|
|
22 # the raw results.
|
|
23 #
|
|
24 #-----------------------------------------------------------------------------
|
|
25
|
|
26 =head1 NAME
|
|
27
|
|
28 Bio::Tools::Sigcleave - Bioperl object for sigcleave analysis
|
|
29
|
|
30 =head1 SYNOPSIS
|
|
31
|
|
32 =head2 Object Creation
|
|
33
|
|
34 use Bio::Tools::Sigcleave ();
|
|
35
|
|
36 # to keep the module backwar compatible, you can pass it a sequence string, but
|
|
37 # there recommended say is to pass it a Seq object
|
|
38
|
|
39 # this works
|
|
40 $seq = "MVLLLILSVLLLKEDVRGSAQSSERRVVAHMPGDIIIGALFSVHHQPTVDKVHERKCGAVREQYGI";
|
|
41 $sig = new Bio::Tools::Sigcleave(-seq => $seq,
|
|
42 -type => 'protein',
|
|
43 -threshold=>'3.5',
|
|
44 );
|
|
45 # but you do:
|
|
46 $seqobj = Bio::PrimarySeq->new(-seq => $seq);
|
|
47
|
|
48 $sig = new Bio::Tools::Sigcleave(-seq => $seqobj,
|
|
49 -threshold=>'3.5',
|
|
50 );
|
|
51
|
|
52
|
|
53 # now you can detect procaryotic signal sequences as well as eucaryotic
|
|
54 $sig->matrix('eucaryotic'); # or 'procaryotic'
|
|
55
|
|
56
|
|
57 =head2 Object Methods & Accessors
|
|
58
|
|
59 # you can use this method to fine tune the threshod before printing out the results
|
|
60 $sig->result_count:
|
|
61
|
|
62 %raw_results = $sig->signals;
|
|
63 $formatted_output = $sig->pretty_print;
|
|
64
|
|
65 =head1 DESCRIPTION
|
|
66
|
|
67 "Sigcleave" was a program distributed as part of the free EGCG add-on
|
|
68 to earlier versions of the GCG Sequence Analysis package. A new
|
|
69 implementation of the algorithm is now part of EMBOSS package.
|
|
70
|
|
71 From the EGCG documentation:
|
|
72
|
|
73 SigCleave uses the von Heijne method to locate signal sequences, and
|
|
74 to identify the cleavage site. The method is 95% accurate in
|
|
75 resolving signal sequences from non-signal sequences with a cutoff
|
|
76 score of 3.5, and 75-80% accurate in identifying the cleavage
|
|
77 site. The program reports all hits above a minimum value.
|
|
78
|
|
79 The EGCG Sigcleave program was written by Peter Rice (E-mail:
|
|
80 pmr@sanger.ac.uk Post: Informatics Division, The Sanger Centre,
|
|
81 Wellcome Trust Genome Campus, Hinxton, Cambs, CB10 1SA, UK).
|
|
82
|
|
83 Since EGCG is no longer distributed for the latest versions of GCG,
|
|
84 this code was developed to emulate the output of the original program
|
|
85 as much as possible for those who lost access to sigcleave when
|
|
86 upgrading to newer versions of GCG.
|
|
87
|
|
88 There are 2 accessor methods for this object. "signals" will return a
|
|
89 perl associative array containing the sigcleave scores keyed by amino
|
|
90 acid position. "pretty_print" returns a formatted string similar to
|
|
91 the output of the original sigcleave utility.
|
|
92
|
|
93 In both cases, the "threshold" setting controls the score reporting
|
|
94 level. If no value for threshold is passed in by the user, the code
|
|
95 defaults to a reporting value of 3.5.
|
|
96
|
|
97 In this implemntation the accessor will never return any
|
|
98 score/position pair which does not meet the threshold limit. This is
|
|
99 the slightly different from the behaviour of the 8.1 EGCG sigcleave
|
|
100 program which will report the highest of the under-threshold results
|
|
101 if nothing else is found.
|
|
102
|
|
103
|
|
104 Example of pretty_print output:
|
|
105
|
|
106 SIGCLEAVE of sigtest from: 1 to 146
|
|
107
|
|
108 Report scores over 3.5
|
|
109 Maximum score 4.9 at residue 131
|
|
110
|
|
111 Sequence: FVILAAMSIQGSA-NLQTQWKSTASLALET
|
|
112 | (signal) | (mature peptide)
|
|
113 118 131
|
|
114
|
|
115 Other entries above 3.5
|
|
116
|
|
117 Maximum score 3.7 at residue 112
|
|
118
|
|
119 Sequence: CSRQLFGWLFCKV-HPGAIVFVILAAMSIQGSANLQTQWKSTASLALET
|
|
120 | (signal) | (mature peptide)
|
|
121 99 112
|
|
122
|
|
123
|
|
124 =head1 FEEDBACK
|
|
125
|
|
126 When updating and maintaining a module, it helps to know that people
|
|
127 are actually using it. Let us know if you find a bug, think this code
|
|
128 is useful or have any improvements/features to suggest.
|
|
129
|
|
130 =head2 Reporting Bugs
|
|
131
|
|
132 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
133 the bugs and their resolution. Bug reports can be submitted via email
|
|
134 or the web:
|
|
135
|
|
136 bioperl-bugs@bio.perl.org
|
|
137 http://bugzilla.bioperl.org/
|
|
138
|
|
139 =head1 AUTHOR
|
|
140
|
|
141 Chris Dagdigian, dag@sonsorol.org & others
|
|
142
|
|
143 =head1 CONTRIBUTORS
|
|
144
|
|
145 Heikki Lehvaslaiho, heikki@ebi.ac.uk
|
|
146
|
|
147 =head1 VERSION
|
|
148
|
|
149 Bio::Tools::Sigcleave.pm, $Id: Sigcleave.pm,v 1.17 2002/10/22 07:45:22 lapp Exp $
|
|
150
|
|
151 =head1 COPYRIGHT
|
|
152
|
|
153 Copyright (c) 1999 Chris Dagdigian & others. All Rights Reserved.
|
|
154 This module is free software; you can redistribute it and/or modify it
|
|
155 under the same terms as Perl itself.
|
|
156
|
|
157 =head1 REFERENCES / SEE ALSO
|
|
158
|
|
159 von Heijne G. (1986) "A new method for predicting signal sequences
|
|
160 cleavage sites." Nucleic Acids Res. 14, 4683-4690.
|
|
161
|
|
162 von Heijne G. (1987) in "Sequence Analysis in Molecular Biology:
|
|
163 Treasure Trove or Trivial Pursuit" (Acad. Press, (1987), 113-117).
|
|
164
|
|
165
|
|
166 =head1 APPENDIX
|
|
167
|
|
168 The following documentation describes the various functions
|
|
169 contained in this module. Some functions are for internal
|
|
170 use and are not meant to be called by the user; they are
|
|
171 preceded by an underscore ("_").
|
|
172
|
|
173
|
|
174 =cut
|
|
175
|
|
176 #
|
|
177 ##
|
|
178 ###
|
|
179 #### END of main POD documentation.
|
|
180 ###
|
|
181 ##
|
|
182 #
|
|
183
|
|
184
|
|
185 package Bio::Tools::Sigcleave;
|
|
186
|
|
187 use Bio::Root::Root;
|
|
188 use Bio::PrimarySeq;
|
|
189
|
|
190 @ISA = qw(Bio::Root::Root);
|
|
191 use strict;
|
|
192 use vars qw ($ID $VERSION %WeightTable_euc %WeightTable_pro );
|
|
193 $ID = 'Bio::Tools::Sigcleave';
|
|
194 $VERSION = 0.02;
|
|
195
|
|
196
|
|
197 %WeightTable_euc = (
|
|
198 #Sample: 161 aligned sequences
|
|
199 # R -13 -12 -11 -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 +1 +2 Expect
|
|
200 'A' => [16, 13, 14, 15, 20, 18, 18, 17, 25, 15, 47, 6, 80, 18, 6, 14.5],
|
|
201 'C' => [ 3, 6, 9, 7, 9, 14, 6, 8, 5, 6, 19, 3, 9, 8, 3, 4.5],
|
|
202 'D' => [ 0, 0, 0, 0, 0, 0, 0, 0, 5, 3, 0, 5, 0, 10, 11, 8.9],
|
|
203 'E' => [ 0, 0, 0, 1, 0, 0, 0, 0, 3, 7, 0, 7, 0, 13, 14, 10.0],
|
|
204 'F' => [13, 9, 11, 11, 6, 7, 18, 13, 4, 5, 0, 13, 0, 6, 4, 5.6],
|
|
205 'G' => [ 4, 4, 3, 6, 3, 13, 3, 2, 19, 34, 5, 7, 39, 10, 7, 12.1],
|
|
206 'H' => [ 0, 0, 0, 0, 0, 1, 1, 0, 5, 0, 0, 6, 0, 4, 2, 3.4],
|
|
207 'I' => [15, 15, 8, 6, 11, 5, 4, 8, 5, 1, 10, 5, 0, 8, 7, 7.4],
|
|
208 'K' => [ 0, 0, 0, 1, 0, 0, 1, 0, 0, 4, 0, 2, 0, 11, 9, 11.3],
|
|
209 'L' => [71, 68, 72, 79, 78, 45, 64, 49, 10, 23, 8, 20, 1, 8, 4, 12.1],
|
|
210 'M' => [ 0, 3, 7, 4, 1, 6, 2, 2, 0, 0, 0, 1, 0, 1, 2, 2.7],
|
|
211 'N' => [ 0, 1, 0, 1, 1, 0, 0, 0, 3, 3, 0, 10, 0, 4, 7, 7.1],
|
|
212 'P' => [ 2, 0, 2, 0, 0, 4, 1, 8, 20, 14, 0, 1, 3, 0, 22, 7.4],
|
|
213 'Q' => [ 0, 0, 0, 1, 0, 6, 1, 0, 10, 8, 0, 18, 3, 19, 10, 6.3],
|
|
214 'R' => [ 2, 0, 0, 0, 0, 1, 0, 0, 7, 4, 0, 15, 0, 12, 9, 7.6],
|
|
215 'S' => [ 9, 3, 8, 6, 13, 10, 15, 16, 26, 11, 23, 17, 20, 15, 10, 11.4],
|
|
216 'T' => [ 2, 10, 5, 4, 5, 13, 7, 7, 12, 6, 17, 8, 6, 3, 10, 9.7],
|
|
217 'V' => [20, 25, 15, 18, 13, 15, 11, 27, 0, 12, 32, 3, 0, 8, 17, 11.1],
|
|
218 'W' => [ 4, 3, 3, 1, 1, 2, 6, 3, 1, 3, 0, 9, 0, 2, 0, 1.8],
|
|
219 'Y' => [ 0, 1, 4, 0, 0, 1, 3, 1, 1, 2, 0, 5, 0, 1, 7, 5.6]
|
|
220 );
|
|
221
|
|
222 %WeightTable_pro = (
|
|
223 #Sample: 36 aligned sequences
|
|
224 # R -13 -12 -11 -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 +1 +2 Expect
|
|
225 'A' => [0, 8, 8, 9, 6, 7, 5, 6, 7, 7, 24, 2, 31, 18, 4, 3.2],
|
|
226 'C' => [1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1.0],
|
|
227 'D' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 8, 2.0],
|
|
228 'E' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 4, 8, 2.2],
|
|
229 'F' => [2, 4, 3, 4, 1, 1, 8, 0, 4, 1, 0, 7, 0, 1, 0, 1.3],
|
|
230 'G' => [4, 2, 2, 2, 3, 5, 2, 4, 2, 2, 0, 2, 2, 1, 0, 2.7],
|
|
231 'H' => [0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 7, 0, 1, 0, 0.8],
|
|
232 'I' => [3, 1, 5, 1, 5, 0, 1, 3, 0, 0, 0, 0, 0, 0, 2, 1.7],
|
|
233 'K' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 3, 0, 2.5],
|
|
234 'L' => [8, 11, 9, 8, 9, 13, 1, 0, 2, 2, 1, 2, 0, 0, 1, 2.7],
|
|
235 'M' => [0, 2, 1, 1, 3, 2, 3, 0, 1, 2, 0, 4, 0, 0, 1, 0.6],
|
|
236 'N' => [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 3, 0, 1, 4, 1.6],
|
|
237 'P' => [0, 1, 1, 1, 1, 1, 2, 3, 5, 2, 0, 0, 0, 0, 5, 1.7],
|
|
238 'Q' => [0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 3, 0, 0, 1, 1.4],
|
|
239 'R' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1.7],
|
|
240 'S' => [1, 0, 1, 4, 4, 1, 5, 15, 5, 8, 5, 2, 2, 0, 0, 2.6],
|
|
241 'T' => [2, 0, 4, 2, 2, 2, 2, 2, 5, 1, 3, 0, 1, 1, 2, 2.2],
|
|
242 'V' => [5, 7, 1, 3, 1, 4, 7, 0, 0, 4, 3, 0, 0, 2, 0, 2.5],
|
|
243 'W' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0.4],
|
|
244 'Y' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 1, 0, 0, 0, 1.3]
|
|
245 );
|
|
246
|
|
247
|
|
248 ##
|
|
249 ## Now we calculate the _real_ values for the weight tables
|
|
250 ##
|
|
251 ##
|
|
252 ## yeah yeah yeah there is lots of math here that gets repeated
|
|
253 ## every single time a sigcleave object gets created. This is
|
|
254 ## a quick hack to make sure that we get the scores as accurate as
|
|
255 ## possible. Need all those significant digits....
|
|
256 ##
|
|
257 ## suggestions for speedup aproaches welcome
|
|
258 ##
|
|
259
|
|
260
|
|
261 foreach my $i (keys %WeightTable_euc) {
|
|
262 my $expected = $WeightTable_euc{$i}[15];
|
|
263 if ($expected > 0) {
|
|
264 for (my $j=0; $j<16; $j++) {
|
|
265 if ($WeightTable_euc{$i}[$j] == 0) {
|
|
266 $WeightTable_euc{$i}[$j] = 1;
|
|
267 if ($j == 10 || $j == 12) {
|
|
268 $WeightTable_euc{$i}[$j] = 1.e-10;
|
|
269 }
|
|
270 }
|
|
271 $WeightTable_euc{$i}[$j] = log($WeightTable_euc{$i}[$j]/$expected);
|
|
272 }
|
|
273 }
|
|
274 }
|
|
275
|
|
276
|
|
277 foreach my $i (keys %WeightTable_pro) {
|
|
278 my $expected = $WeightTable_pro{$i}[15];
|
|
279 if ($expected > 0) {
|
|
280 for (my $j=0; $j<16; $j++) {
|
|
281 if ($WeightTable_pro{$i}[$j] == 0) {
|
|
282 $WeightTable_pro{$i}[$j] = 1;
|
|
283 if ($j == 10 || $j == 12) {
|
|
284 $WeightTable_pro{$i}[$j] = 1.e-10;
|
|
285 }
|
|
286 }
|
|
287 $WeightTable_pro{$i}[$j] = log($WeightTable_pro{$i}[$j]/$expected);
|
|
288 }
|
|
289 }
|
|
290 }
|
|
291
|
|
292
|
|
293
|
|
294 #####################################################################################
|
|
295 ## CONSTRUCTOR ##
|
|
296 #####################################################################################
|
|
297
|
|
298
|
|
299 sub new {
|
|
300 my ($class, @args) = @_;
|
|
301
|
|
302 my $self = $class->SUPER::new(@args);
|
|
303 #my $self = Bio::Seq->new(@args);
|
|
304
|
|
305 my ($seq, $threshold, $matrix) = $self->_rearrange([qw(SEQ THRESHOLD MATRIX)],@args);
|
|
306
|
|
307 defined $threshold && $self->threshold($threshold);
|
|
308 $matrix && $self->matrix($matrix);
|
|
309 $seq && $self->seq($seq);
|
|
310
|
|
311 return $self;
|
|
312 }
|
|
313
|
|
314
|
|
315
|
|
316 =head1 threshold
|
|
317
|
|
318 Title : threshold
|
|
319 Usage : $value = $self->threshold
|
|
320 Purpose : Read/write method sigcleave score reporting threshold.
|
|
321 Returns : float.
|
|
322 Argument : new value, float
|
|
323 Throws : on non-number argument
|
|
324 Comments : defaults to 3.5
|
|
325 See Also : n/a
|
|
326
|
|
327 =cut
|
|
328
|
|
329 #----------------
|
|
330 sub threshold {
|
|
331 #----------------
|
|
332 my ($self, $value) = @_;
|
|
333 if( defined $value) {
|
|
334 $self->throw("I need a number, not [$value]")
|
|
335 if $value !~ /^[+-]?[\d\.]+$/;
|
|
336 $self->{'_threshold'} = $value;
|
|
337 }
|
|
338 return $self->{'_threshold'} || 3.5 ;
|
|
339 }
|
|
340
|
|
341 =head1 matrix
|
|
342
|
|
343 Title : matrix
|
|
344 Usage : $value = $self->matrix('procaryotic')
|
|
345 Purpose : Read/write method sigcleave matrix.
|
|
346 Returns : float.
|
|
347 Argument : new value: 'eucaryotic' or 'procaryotic'
|
|
348 Throws : on non-number argument
|
|
349 Comments : defaults to 3.5
|
|
350 See Also : n/a
|
|
351
|
|
352 =cut
|
|
353
|
|
354 #----------------
|
|
355 sub matrix {
|
|
356 #----------------
|
|
357 my ($self, $value) = @_;
|
|
358 if( defined $value) {
|
|
359 $self->throw("I need 'eucaryotic' or 'procaryotic', not [$value]")
|
|
360 unless $value eq 'eucaryotic' or $value eq 'procaryotic';
|
|
361 $self->{'_matrix'} = $value;
|
|
362 }
|
|
363 return $self->{'_matrix'} || 'eucaryotic' ;
|
|
364
|
|
365 }
|
|
366
|
|
367 =head1 seq
|
|
368
|
|
369 Title : seq
|
|
370 Usage : $value = $self->seq('procaryotic')
|
|
371 Purpose : Read/write method sigcleave seq.
|
|
372 Returns : float.
|
|
373 Argument : new value: 'eucaryotic' or 'procaryotic'
|
|
374 Throws : on non-number argument
|
|
375 Comments : defaults to 3.5
|
|
376 See Also : n/a
|
|
377
|
|
378 =cut
|
|
379
|
|
380 #----------------
|
|
381 sub seq {
|
|
382 #----------------
|
|
383 my ($self, $value) = @_;
|
|
384 if( defined $value) {
|
|
385 if ($value->isa('Bio::PrimarySeqI')) {
|
|
386 $self->{'_seq'} = $value;
|
|
387 } else {
|
|
388 $self->{'_seq'} = Bio::PrimarySeq->new(-seq=>$value,
|
|
389 -alphabet=>'protein');
|
|
390 }
|
|
391 }
|
|
392 return $self->{'_seq'};
|
|
393 }
|
|
394
|
|
395
|
|
396
|
|
397 =head1 _Analyze
|
|
398
|
|
399 Title : _Analyze
|
|
400 Usage : N/A This is an internal method. Not meant to be called from outside
|
|
401 : the package
|
|
402 :
|
|
403 Purpose : calculates sigcleave score and amino acid position for the
|
|
404 : given protein sequence. The score reporting threshold can
|
|
405 : be adjusted by passing in the "threshold" parameter during
|
|
406 : object construction. If no threshold is passed in, the code
|
|
407 : defaults to reporting any scores equal to or above 3.5
|
|
408 :
|
|
409 Returns : nothing. results are added to the object
|
|
410 Argument : none.
|
|
411 Throws : nothing.
|
|
412 Comments : nothing.
|
|
413 See Also : n/a
|
|
414
|
|
415 =cut
|
|
416
|
|
417 #----------------
|
|
418 sub _Analyze {
|
|
419 #----------------
|
|
420 my($self) = @_;
|
|
421
|
|
422 my %signals;
|
|
423 my @hitWeight = ();
|
|
424 my @hitsort = ();
|
|
425 my @hitpos = ();
|
|
426 my $maxSite = "";
|
|
427 my $seqPos = "";
|
|
428 my $istart = "";
|
|
429 my $iend = "";
|
|
430 my $icol = "";
|
|
431 my $i = "";
|
|
432 my $weight = "";
|
|
433 my $k = 0;
|
|
434 my $c = 0;
|
|
435 my $seqBegin = 0;
|
|
436 my $pVal = -13;
|
|
437 my $nVal = 2;
|
|
438 my $nHits = 0;
|
|
439 my $seqEnd = $self->seq->length;
|
|
440 my $pep = $self->seq->seq;
|
|
441 my $minWeight = $self->threshold;
|
|
442 my $matrix = $self->matrix;
|
|
443
|
|
444 ## The weight table is keyed by UPPERCASE letters so we uppercase
|
|
445 ## the pep string because we don't want to alter the actual object
|
|
446 ## sequence.
|
|
447
|
|
448 $pep =~ tr/a-z/A-Z/;
|
|
449
|
|
450 for ($seqPos = $seqBegin; $seqPos < $seqEnd; $seqPos++) {
|
|
451 $istart = (0 > $seqPos + $pVal)? 0 : $seqPos + $pVal;
|
|
452 $iend = ($seqPos + $nVal - 1 < $seqEnd)? $seqPos + $nVal - 1 : $seqEnd;
|
|
453 $icol= $iend - $istart + 1;
|
|
454 $weight = 0.00;
|
|
455 for ($k=0; $k<$icol; $k++) {
|
|
456 $c = substr($pep, $istart + $k, 1);
|
|
457
|
|
458 ## CD: The if(defined) stuff was put in here because Sigcleave.pm
|
|
459 ## CD: kept getting warnings about undefined vals during 'make test' ...
|
|
460 if ($matrix eq 'eucaryotic') {
|
|
461 $weight += $WeightTable_euc{$c}[$k] if defined $WeightTable_euc{$c}[$k];
|
|
462 } else {
|
|
463 $weight += $WeightTable_pro{$c}[$k] if defined $WeightTable_pro{$c}[$k];
|
|
464 }
|
|
465 }
|
|
466 $signals{$seqPos+1} = sprintf ("%.1f", $weight) if $weight >= $minWeight;
|
|
467 }
|
|
468
|
|
469 $self->{"_signal_scores"} = { %signals };
|
|
470 }
|
|
471
|
|
472
|
|
473 =head1 signals
|
|
474
|
|
475 Title : signals
|
|
476 Usage : %sigcleave_results = $sig->signals;
|
|
477 :
|
|
478 Purpose : Accessor method for sigcleave results
|
|
479 :
|
|
480 Returns : Associative array. The key value represents the amino acid position
|
|
481 : and the value represents the score. Only scores that
|
|
482 : are greater than or equal to the THRESHOLD value are reported.
|
|
483 :
|
|
484 Argument : none.
|
|
485 Throws : none.
|
|
486 Comments : none.
|
|
487 See Also : THRESHOLD
|
|
488
|
|
489 =cut
|
|
490
|
|
491 #----------------
|
|
492 sub signals {
|
|
493 #----------------
|
|
494 my $self = shift;
|
|
495 my %results;
|
|
496 my $position;
|
|
497
|
|
498 # do the calculations
|
|
499 $self->_Analyze;
|
|
500
|
|
501 foreach $position ( sort keys %{ $self->{'_signal_scores'} } ) {
|
|
502 $results{$position} = $self->{'_signal_scores'}{$position};
|
|
503 }
|
|
504 return %results;
|
|
505 }
|
|
506
|
|
507
|
|
508 =head1 result_count
|
|
509
|
|
510 Title : result_count
|
|
511 Usage : $count = $sig->result_count;
|
|
512 :
|
|
513 Purpose : Accessor method for sigcleave results
|
|
514 :
|
|
515 Returns : Integer, number of results above the threshold
|
|
516 :
|
|
517 Argument : none.
|
|
518 Throws : none.
|
|
519 Comments : none.
|
|
520
|
|
521 See Also : THRESHOLD
|
|
522
|
|
523 =cut
|
|
524
|
|
525 #----------------
|
|
526 sub result_count {
|
|
527 #----------------
|
|
528 my $self = shift;
|
|
529 $self->_Analyze;
|
|
530 return keys %{ $self->{'_signal_scores'} };
|
|
531 }
|
|
532
|
|
533
|
|
534 =head1 pretty_print
|
|
535
|
|
536 Title : pretty_print
|
|
537 Usage : $output = $sig->pretty_print;
|
|
538 : print $sig->pretty_print;
|
|
539 :
|
|
540 Purpose : Emulates the output of the EGCG Sigcleave
|
|
541 : utility.
|
|
542 :
|
|
543 Returns : A formatted string.
|
|
544 Argument : none.
|
|
545 Throws : none.
|
|
546 Comments : none.
|
|
547 See Also : n/a
|
|
548
|
|
549 =cut
|
|
550
|
|
551 #----------------
|
|
552 sub pretty_print {
|
|
553 #----------------
|
|
554 my $self = shift;
|
|
555 my $pos;
|
|
556 my $output;
|
|
557 my $cnt = 1;
|
|
558 my %results = $self->signals;
|
|
559 my @hits = keys %results;
|
|
560 my $hitcount = $#hits; $hitcount++;
|
|
561 my $thresh = $self->threshold;
|
|
562 my $seqlen = $self->seq->length || 0;
|
|
563 my $name = $self->seq->id || 'NONAME';
|
|
564 my $pep = $self->seq->seq;
|
|
565 $pep =~ tr/a-z/A-Z/;
|
|
566
|
|
567 $output = "SIGCLEAVE of $name from: 1 to $seqlen\n\n";
|
|
568
|
|
569 if ($hitcount > 0) {
|
|
570 $output .= "Report scores over $thresh\n";
|
|
571 foreach $pos ((sort { $results{$b} cmp $results{$a} } keys %results)) {
|
|
572 my $start = $pos - 15;
|
|
573 $start = 1 if $start < 1;
|
|
574 my $sig = substr($pep,$start -1,$pos-$start );
|
|
575
|
|
576 $output .= sprintf ("Maximum score %1.1f at residue %3d\n",$results{$pos},$pos);
|
|
577 $output .= "\n";
|
|
578 $output .= " Sequence: ";
|
|
579 $output .= $sig;
|
|
580 $output .= "-" x (15- length($sig));
|
|
581 $output .= "-";
|
|
582 $output .= substr($pep,$pos-1,50);
|
|
583 $output .= "\n";
|
|
584 $output .= " " x 12;
|
|
585 $output .= "| \(signal\) | \(mature peptide\)\n";
|
|
586 $output .= sprintf(" %3d %3d\n\n",$start,$pos);
|
|
587
|
|
588 if (($hitcount > 1) && ($cnt == 1)) {
|
|
589 $output .= " Other entries above $thresh\n\n";
|
|
590 }
|
|
591 $cnt++;
|
|
592 }
|
|
593 }
|
|
594 $output;
|
|
595 }
|
|
596
|
|
597
|
|
598 1;
|
|
599 __END__
|
|
600
|
|
601
|
|
602 #########################################################################
|
|
603 # End of class
|
|
604 #########################################################################
|