Mercurial > repos > mahtabm > ensemb_rep_gvl
comparison variant_effect_predictor/Bio/Tools/Sigcleave.pm @ 0:2bc9b66ada89 draft default tip
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 06:29:17 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2bc9b66ada89 |
---|---|
1 #----------------------------------------------------------------------------- | |
2 # PACKAGE : Bio::Tools::Sigcleave | |
3 # AUTHOR : Chris Dagdigian, dag@sonsorol.org | |
4 # CREATED : Jan 28 1999 | |
5 # REVISION: $Id: Sigcleave.pm,v 1.17 2002/10/22 07:45:22 lapp Exp $ | |
6 # | |
7 # Copyright (c) 1997-9 bioperl, Chris Dagdigian and others. All Rights Reserved. | |
8 # This module is free software; you can redistribute it and/or | |
9 # modify it under the same terms as Perl itself. | |
10 # | |
11 # _History_ | |
12 # | |
13 # Object framework ripped from Steve Chervits's SeqPattern.pm | |
14 # | |
15 # Core EGCG Sigcleave emulation from perl code developed by | |
16 # Danh Nguyen & Kamalakar Gulukota which itself was based | |
17 # loosely on Colgrove's signal.c program. | |
18 # | |
19 # The overall idea is to replicate the output of the sigcleave | |
20 # program which was distributed with the EGCG extension to the GCG sequence | |
21 # analysis package. There is also an accessor method for just getting at | |
22 # the raw results. | |
23 # | |
24 #----------------------------------------------------------------------------- | |
25 | |
26 =head1 NAME | |
27 | |
28 Bio::Tools::Sigcleave - Bioperl object for sigcleave analysis | |
29 | |
30 =head1 SYNOPSIS | |
31 | |
32 =head2 Object Creation | |
33 | |
34 use Bio::Tools::Sigcleave (); | |
35 | |
36 # to keep the module backwar compatible, you can pass it a sequence string, but | |
37 # there recommended say is to pass it a Seq object | |
38 | |
39 # this works | |
40 $seq = "MVLLLILSVLLLKEDVRGSAQSSERRVVAHMPGDIIIGALFSVHHQPTVDKVHERKCGAVREQYGI"; | |
41 $sig = new Bio::Tools::Sigcleave(-seq => $seq, | |
42 -type => 'protein', | |
43 -threshold=>'3.5', | |
44 ); | |
45 # but you do: | |
46 $seqobj = Bio::PrimarySeq->new(-seq => $seq); | |
47 | |
48 $sig = new Bio::Tools::Sigcleave(-seq => $seqobj, | |
49 -threshold=>'3.5', | |
50 ); | |
51 | |
52 | |
53 # now you can detect procaryotic signal sequences as well as eucaryotic | |
54 $sig->matrix('eucaryotic'); # or 'procaryotic' | |
55 | |
56 | |
57 =head2 Object Methods & Accessors | |
58 | |
59 # you can use this method to fine tune the threshod before printing out the results | |
60 $sig->result_count: | |
61 | |
62 %raw_results = $sig->signals; | |
63 $formatted_output = $sig->pretty_print; | |
64 | |
65 =head1 DESCRIPTION | |
66 | |
67 "Sigcleave" was a program distributed as part of the free EGCG add-on | |
68 to earlier versions of the GCG Sequence Analysis package. A new | |
69 implementation of the algorithm is now part of EMBOSS package. | |
70 | |
71 From the EGCG documentation: | |
72 | |
73 SigCleave uses the von Heijne method to locate signal sequences, and | |
74 to identify the cleavage site. The method is 95% accurate in | |
75 resolving signal sequences from non-signal sequences with a cutoff | |
76 score of 3.5, and 75-80% accurate in identifying the cleavage | |
77 site. The program reports all hits above a minimum value. | |
78 | |
79 The EGCG Sigcleave program was written by Peter Rice (E-mail: | |
80 pmr@sanger.ac.uk Post: Informatics Division, The Sanger Centre, | |
81 Wellcome Trust Genome Campus, Hinxton, Cambs, CB10 1SA, UK). | |
82 | |
83 Since EGCG is no longer distributed for the latest versions of GCG, | |
84 this code was developed to emulate the output of the original program | |
85 as much as possible for those who lost access to sigcleave when | |
86 upgrading to newer versions of GCG. | |
87 | |
88 There are 2 accessor methods for this object. "signals" will return a | |
89 perl associative array containing the sigcleave scores keyed by amino | |
90 acid position. "pretty_print" returns a formatted string similar to | |
91 the output of the original sigcleave utility. | |
92 | |
93 In both cases, the "threshold" setting controls the score reporting | |
94 level. If no value for threshold is passed in by the user, the code | |
95 defaults to a reporting value of 3.5. | |
96 | |
97 In this implemntation the accessor will never return any | |
98 score/position pair which does not meet the threshold limit. This is | |
99 the slightly different from the behaviour of the 8.1 EGCG sigcleave | |
100 program which will report the highest of the under-threshold results | |
101 if nothing else is found. | |
102 | |
103 | |
104 Example of pretty_print output: | |
105 | |
106 SIGCLEAVE of sigtest from: 1 to 146 | |
107 | |
108 Report scores over 3.5 | |
109 Maximum score 4.9 at residue 131 | |
110 | |
111 Sequence: FVILAAMSIQGSA-NLQTQWKSTASLALET | |
112 | (signal) | (mature peptide) | |
113 118 131 | |
114 | |
115 Other entries above 3.5 | |
116 | |
117 Maximum score 3.7 at residue 112 | |
118 | |
119 Sequence: CSRQLFGWLFCKV-HPGAIVFVILAAMSIQGSANLQTQWKSTASLALET | |
120 | (signal) | (mature peptide) | |
121 99 112 | |
122 | |
123 | |
124 =head1 FEEDBACK | |
125 | |
126 When updating and maintaining a module, it helps to know that people | |
127 are actually using it. Let us know if you find a bug, think this code | |
128 is useful or have any improvements/features to suggest. | |
129 | |
130 =head2 Reporting Bugs | |
131 | |
132 Report bugs to the Bioperl bug tracking system to help us keep track | |
133 the bugs and their resolution. Bug reports can be submitted via email | |
134 or the web: | |
135 | |
136 bioperl-bugs@bio.perl.org | |
137 http://bugzilla.bioperl.org/ | |
138 | |
139 =head1 AUTHOR | |
140 | |
141 Chris Dagdigian, dag@sonsorol.org & others | |
142 | |
143 =head1 CONTRIBUTORS | |
144 | |
145 Heikki Lehvaslaiho, heikki@ebi.ac.uk | |
146 | |
147 =head1 VERSION | |
148 | |
149 Bio::Tools::Sigcleave.pm, $Id: Sigcleave.pm,v 1.17 2002/10/22 07:45:22 lapp Exp $ | |
150 | |
151 =head1 COPYRIGHT | |
152 | |
153 Copyright (c) 1999 Chris Dagdigian & others. All Rights Reserved. | |
154 This module is free software; you can redistribute it and/or modify it | |
155 under the same terms as Perl itself. | |
156 | |
157 =head1 REFERENCES / SEE ALSO | |
158 | |
159 von Heijne G. (1986) "A new method for predicting signal sequences | |
160 cleavage sites." Nucleic Acids Res. 14, 4683-4690. | |
161 | |
162 von Heijne G. (1987) in "Sequence Analysis in Molecular Biology: | |
163 Treasure Trove or Trivial Pursuit" (Acad. Press, (1987), 113-117). | |
164 | |
165 | |
166 =head1 APPENDIX | |
167 | |
168 The following documentation describes the various functions | |
169 contained in this module. Some functions are for internal | |
170 use and are not meant to be called by the user; they are | |
171 preceded by an underscore ("_"). | |
172 | |
173 | |
174 =cut | |
175 | |
176 # | |
177 ## | |
178 ### | |
179 #### END of main POD documentation. | |
180 ### | |
181 ## | |
182 # | |
183 | |
184 | |
185 package Bio::Tools::Sigcleave; | |
186 | |
187 use Bio::Root::Root; | |
188 use Bio::PrimarySeq; | |
189 | |
190 @ISA = qw(Bio::Root::Root); | |
191 use strict; | |
192 use vars qw ($ID $VERSION %WeightTable_euc %WeightTable_pro ); | |
193 $ID = 'Bio::Tools::Sigcleave'; | |
194 $VERSION = 0.02; | |
195 | |
196 | |
197 %WeightTable_euc = ( | |
198 #Sample: 161 aligned sequences | |
199 # R -13 -12 -11 -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 +1 +2 Expect | |
200 'A' => [16, 13, 14, 15, 20, 18, 18, 17, 25, 15, 47, 6, 80, 18, 6, 14.5], | |
201 'C' => [ 3, 6, 9, 7, 9, 14, 6, 8, 5, 6, 19, 3, 9, 8, 3, 4.5], | |
202 'D' => [ 0, 0, 0, 0, 0, 0, 0, 0, 5, 3, 0, 5, 0, 10, 11, 8.9], | |
203 'E' => [ 0, 0, 0, 1, 0, 0, 0, 0, 3, 7, 0, 7, 0, 13, 14, 10.0], | |
204 'F' => [13, 9, 11, 11, 6, 7, 18, 13, 4, 5, 0, 13, 0, 6, 4, 5.6], | |
205 'G' => [ 4, 4, 3, 6, 3, 13, 3, 2, 19, 34, 5, 7, 39, 10, 7, 12.1], | |
206 'H' => [ 0, 0, 0, 0, 0, 1, 1, 0, 5, 0, 0, 6, 0, 4, 2, 3.4], | |
207 'I' => [15, 15, 8, 6, 11, 5, 4, 8, 5, 1, 10, 5, 0, 8, 7, 7.4], | |
208 'K' => [ 0, 0, 0, 1, 0, 0, 1, 0, 0, 4, 0, 2, 0, 11, 9, 11.3], | |
209 'L' => [71, 68, 72, 79, 78, 45, 64, 49, 10, 23, 8, 20, 1, 8, 4, 12.1], | |
210 'M' => [ 0, 3, 7, 4, 1, 6, 2, 2, 0, 0, 0, 1, 0, 1, 2, 2.7], | |
211 'N' => [ 0, 1, 0, 1, 1, 0, 0, 0, 3, 3, 0, 10, 0, 4, 7, 7.1], | |
212 'P' => [ 2, 0, 2, 0, 0, 4, 1, 8, 20, 14, 0, 1, 3, 0, 22, 7.4], | |
213 'Q' => [ 0, 0, 0, 1, 0, 6, 1, 0, 10, 8, 0, 18, 3, 19, 10, 6.3], | |
214 'R' => [ 2, 0, 0, 0, 0, 1, 0, 0, 7, 4, 0, 15, 0, 12, 9, 7.6], | |
215 'S' => [ 9, 3, 8, 6, 13, 10, 15, 16, 26, 11, 23, 17, 20, 15, 10, 11.4], | |
216 'T' => [ 2, 10, 5, 4, 5, 13, 7, 7, 12, 6, 17, 8, 6, 3, 10, 9.7], | |
217 'V' => [20, 25, 15, 18, 13, 15, 11, 27, 0, 12, 32, 3, 0, 8, 17, 11.1], | |
218 'W' => [ 4, 3, 3, 1, 1, 2, 6, 3, 1, 3, 0, 9, 0, 2, 0, 1.8], | |
219 'Y' => [ 0, 1, 4, 0, 0, 1, 3, 1, 1, 2, 0, 5, 0, 1, 7, 5.6] | |
220 ); | |
221 | |
222 %WeightTable_pro = ( | |
223 #Sample: 36 aligned sequences | |
224 # R -13 -12 -11 -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 +1 +2 Expect | |
225 'A' => [0, 8, 8, 9, 6, 7, 5, 6, 7, 7, 24, 2, 31, 18, 4, 3.2], | |
226 'C' => [1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1.0], | |
227 'D' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 8, 2.0], | |
228 'E' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 4, 8, 2.2], | |
229 'F' => [2, 4, 3, 4, 1, 1, 8, 0, 4, 1, 0, 7, 0, 1, 0, 1.3], | |
230 'G' => [4, 2, 2, 2, 3, 5, 2, 4, 2, 2, 0, 2, 2, 1, 0, 2.7], | |
231 'H' => [0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 7, 0, 1, 0, 0.8], | |
232 'I' => [3, 1, 5, 1, 5, 0, 1, 3, 0, 0, 0, 0, 0, 0, 2, 1.7], | |
233 'K' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 3, 0, 2.5], | |
234 'L' => [8, 11, 9, 8, 9, 13, 1, 0, 2, 2, 1, 2, 0, 0, 1, 2.7], | |
235 'M' => [0, 2, 1, 1, 3, 2, 3, 0, 1, 2, 0, 4, 0, 0, 1, 0.6], | |
236 'N' => [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 3, 0, 1, 4, 1.6], | |
237 'P' => [0, 1, 1, 1, 1, 1, 2, 3, 5, 2, 0, 0, 0, 0, 5, 1.7], | |
238 'Q' => [0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 3, 0, 0, 1, 1.4], | |
239 'R' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1.7], | |
240 'S' => [1, 0, 1, 4, 4, 1, 5, 15, 5, 8, 5, 2, 2, 0, 0, 2.6], | |
241 'T' => [2, 0, 4, 2, 2, 2, 2, 2, 5, 1, 3, 0, 1, 1, 2, 2.2], | |
242 'V' => [5, 7, 1, 3, 1, 4, 7, 0, 0, 4, 3, 0, 0, 2, 0, 2.5], | |
243 'W' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0.4], | |
244 'Y' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 1, 0, 0, 0, 1.3] | |
245 ); | |
246 | |
247 | |
248 ## | |
249 ## Now we calculate the _real_ values for the weight tables | |
250 ## | |
251 ## | |
252 ## yeah yeah yeah there is lots of math here that gets repeated | |
253 ## every single time a sigcleave object gets created. This is | |
254 ## a quick hack to make sure that we get the scores as accurate as | |
255 ## possible. Need all those significant digits.... | |
256 ## | |
257 ## suggestions for speedup aproaches welcome | |
258 ## | |
259 | |
260 | |
261 foreach my $i (keys %WeightTable_euc) { | |
262 my $expected = $WeightTable_euc{$i}[15]; | |
263 if ($expected > 0) { | |
264 for (my $j=0; $j<16; $j++) { | |
265 if ($WeightTable_euc{$i}[$j] == 0) { | |
266 $WeightTable_euc{$i}[$j] = 1; | |
267 if ($j == 10 || $j == 12) { | |
268 $WeightTable_euc{$i}[$j] = 1.e-10; | |
269 } | |
270 } | |
271 $WeightTable_euc{$i}[$j] = log($WeightTable_euc{$i}[$j]/$expected); | |
272 } | |
273 } | |
274 } | |
275 | |
276 | |
277 foreach my $i (keys %WeightTable_pro) { | |
278 my $expected = $WeightTable_pro{$i}[15]; | |
279 if ($expected > 0) { | |
280 for (my $j=0; $j<16; $j++) { | |
281 if ($WeightTable_pro{$i}[$j] == 0) { | |
282 $WeightTable_pro{$i}[$j] = 1; | |
283 if ($j == 10 || $j == 12) { | |
284 $WeightTable_pro{$i}[$j] = 1.e-10; | |
285 } | |
286 } | |
287 $WeightTable_pro{$i}[$j] = log($WeightTable_pro{$i}[$j]/$expected); | |
288 } | |
289 } | |
290 } | |
291 | |
292 | |
293 | |
294 ##################################################################################### | |
295 ## CONSTRUCTOR ## | |
296 ##################################################################################### | |
297 | |
298 | |
299 sub new { | |
300 my ($class, @args) = @_; | |
301 | |
302 my $self = $class->SUPER::new(@args); | |
303 #my $self = Bio::Seq->new(@args); | |
304 | |
305 my ($seq, $threshold, $matrix) = $self->_rearrange([qw(SEQ THRESHOLD MATRIX)],@args); | |
306 | |
307 defined $threshold && $self->threshold($threshold); | |
308 $matrix && $self->matrix($matrix); | |
309 $seq && $self->seq($seq); | |
310 | |
311 return $self; | |
312 } | |
313 | |
314 | |
315 | |
316 =head1 threshold | |
317 | |
318 Title : threshold | |
319 Usage : $value = $self->threshold | |
320 Purpose : Read/write method sigcleave score reporting threshold. | |
321 Returns : float. | |
322 Argument : new value, float | |
323 Throws : on non-number argument | |
324 Comments : defaults to 3.5 | |
325 See Also : n/a | |
326 | |
327 =cut | |
328 | |
329 #---------------- | |
330 sub threshold { | |
331 #---------------- | |
332 my ($self, $value) = @_; | |
333 if( defined $value) { | |
334 $self->throw("I need a number, not [$value]") | |
335 if $value !~ /^[+-]?[\d\.]+$/; | |
336 $self->{'_threshold'} = $value; | |
337 } | |
338 return $self->{'_threshold'} || 3.5 ; | |
339 } | |
340 | |
341 =head1 matrix | |
342 | |
343 Title : matrix | |
344 Usage : $value = $self->matrix('procaryotic') | |
345 Purpose : Read/write method sigcleave matrix. | |
346 Returns : float. | |
347 Argument : new value: 'eucaryotic' or 'procaryotic' | |
348 Throws : on non-number argument | |
349 Comments : defaults to 3.5 | |
350 See Also : n/a | |
351 | |
352 =cut | |
353 | |
354 #---------------- | |
355 sub matrix { | |
356 #---------------- | |
357 my ($self, $value) = @_; | |
358 if( defined $value) { | |
359 $self->throw("I need 'eucaryotic' or 'procaryotic', not [$value]") | |
360 unless $value eq 'eucaryotic' or $value eq 'procaryotic'; | |
361 $self->{'_matrix'} = $value; | |
362 } | |
363 return $self->{'_matrix'} || 'eucaryotic' ; | |
364 | |
365 } | |
366 | |
367 =head1 seq | |
368 | |
369 Title : seq | |
370 Usage : $value = $self->seq('procaryotic') | |
371 Purpose : Read/write method sigcleave seq. | |
372 Returns : float. | |
373 Argument : new value: 'eucaryotic' or 'procaryotic' | |
374 Throws : on non-number argument | |
375 Comments : defaults to 3.5 | |
376 See Also : n/a | |
377 | |
378 =cut | |
379 | |
380 #---------------- | |
381 sub seq { | |
382 #---------------- | |
383 my ($self, $value) = @_; | |
384 if( defined $value) { | |
385 if ($value->isa('Bio::PrimarySeqI')) { | |
386 $self->{'_seq'} = $value; | |
387 } else { | |
388 $self->{'_seq'} = Bio::PrimarySeq->new(-seq=>$value, | |
389 -alphabet=>'protein'); | |
390 } | |
391 } | |
392 return $self->{'_seq'}; | |
393 } | |
394 | |
395 | |
396 | |
397 =head1 _Analyze | |
398 | |
399 Title : _Analyze | |
400 Usage : N/A This is an internal method. Not meant to be called from outside | |
401 : the package | |
402 : | |
403 Purpose : calculates sigcleave score and amino acid position for the | |
404 : given protein sequence. The score reporting threshold can | |
405 : be adjusted by passing in the "threshold" parameter during | |
406 : object construction. If no threshold is passed in, the code | |
407 : defaults to reporting any scores equal to or above 3.5 | |
408 : | |
409 Returns : nothing. results are added to the object | |
410 Argument : none. | |
411 Throws : nothing. | |
412 Comments : nothing. | |
413 See Also : n/a | |
414 | |
415 =cut | |
416 | |
417 #---------------- | |
418 sub _Analyze { | |
419 #---------------- | |
420 my($self) = @_; | |
421 | |
422 my %signals; | |
423 my @hitWeight = (); | |
424 my @hitsort = (); | |
425 my @hitpos = (); | |
426 my $maxSite = ""; | |
427 my $seqPos = ""; | |
428 my $istart = ""; | |
429 my $iend = ""; | |
430 my $icol = ""; | |
431 my $i = ""; | |
432 my $weight = ""; | |
433 my $k = 0; | |
434 my $c = 0; | |
435 my $seqBegin = 0; | |
436 my $pVal = -13; | |
437 my $nVal = 2; | |
438 my $nHits = 0; | |
439 my $seqEnd = $self->seq->length; | |
440 my $pep = $self->seq->seq; | |
441 my $minWeight = $self->threshold; | |
442 my $matrix = $self->matrix; | |
443 | |
444 ## The weight table is keyed by UPPERCASE letters so we uppercase | |
445 ## the pep string because we don't want to alter the actual object | |
446 ## sequence. | |
447 | |
448 $pep =~ tr/a-z/A-Z/; | |
449 | |
450 for ($seqPos = $seqBegin; $seqPos < $seqEnd; $seqPos++) { | |
451 $istart = (0 > $seqPos + $pVal)? 0 : $seqPos + $pVal; | |
452 $iend = ($seqPos + $nVal - 1 < $seqEnd)? $seqPos + $nVal - 1 : $seqEnd; | |
453 $icol= $iend - $istart + 1; | |
454 $weight = 0.00; | |
455 for ($k=0; $k<$icol; $k++) { | |
456 $c = substr($pep, $istart + $k, 1); | |
457 | |
458 ## CD: The if(defined) stuff was put in here because Sigcleave.pm | |
459 ## CD: kept getting warnings about undefined vals during 'make test' ... | |
460 if ($matrix eq 'eucaryotic') { | |
461 $weight += $WeightTable_euc{$c}[$k] if defined $WeightTable_euc{$c}[$k]; | |
462 } else { | |
463 $weight += $WeightTable_pro{$c}[$k] if defined $WeightTable_pro{$c}[$k]; | |
464 } | |
465 } | |
466 $signals{$seqPos+1} = sprintf ("%.1f", $weight) if $weight >= $minWeight; | |
467 } | |
468 | |
469 $self->{"_signal_scores"} = { %signals }; | |
470 } | |
471 | |
472 | |
473 =head1 signals | |
474 | |
475 Title : signals | |
476 Usage : %sigcleave_results = $sig->signals; | |
477 : | |
478 Purpose : Accessor method for sigcleave results | |
479 : | |
480 Returns : Associative array. The key value represents the amino acid position | |
481 : and the value represents the score. Only scores that | |
482 : are greater than or equal to the THRESHOLD value are reported. | |
483 : | |
484 Argument : none. | |
485 Throws : none. | |
486 Comments : none. | |
487 See Also : THRESHOLD | |
488 | |
489 =cut | |
490 | |
491 #---------------- | |
492 sub signals { | |
493 #---------------- | |
494 my $self = shift; | |
495 my %results; | |
496 my $position; | |
497 | |
498 # do the calculations | |
499 $self->_Analyze; | |
500 | |
501 foreach $position ( sort keys %{ $self->{'_signal_scores'} } ) { | |
502 $results{$position} = $self->{'_signal_scores'}{$position}; | |
503 } | |
504 return %results; | |
505 } | |
506 | |
507 | |
508 =head1 result_count | |
509 | |
510 Title : result_count | |
511 Usage : $count = $sig->result_count; | |
512 : | |
513 Purpose : Accessor method for sigcleave results | |
514 : | |
515 Returns : Integer, number of results above the threshold | |
516 : | |
517 Argument : none. | |
518 Throws : none. | |
519 Comments : none. | |
520 | |
521 See Also : THRESHOLD | |
522 | |
523 =cut | |
524 | |
525 #---------------- | |
526 sub result_count { | |
527 #---------------- | |
528 my $self = shift; | |
529 $self->_Analyze; | |
530 return keys %{ $self->{'_signal_scores'} }; | |
531 } | |
532 | |
533 | |
534 =head1 pretty_print | |
535 | |
536 Title : pretty_print | |
537 Usage : $output = $sig->pretty_print; | |
538 : print $sig->pretty_print; | |
539 : | |
540 Purpose : Emulates the output of the EGCG Sigcleave | |
541 : utility. | |
542 : | |
543 Returns : A formatted string. | |
544 Argument : none. | |
545 Throws : none. | |
546 Comments : none. | |
547 See Also : n/a | |
548 | |
549 =cut | |
550 | |
551 #---------------- | |
552 sub pretty_print { | |
553 #---------------- | |
554 my $self = shift; | |
555 my $pos; | |
556 my $output; | |
557 my $cnt = 1; | |
558 my %results = $self->signals; | |
559 my @hits = keys %results; | |
560 my $hitcount = $#hits; $hitcount++; | |
561 my $thresh = $self->threshold; | |
562 my $seqlen = $self->seq->length || 0; | |
563 my $name = $self->seq->id || 'NONAME'; | |
564 my $pep = $self->seq->seq; | |
565 $pep =~ tr/a-z/A-Z/; | |
566 | |
567 $output = "SIGCLEAVE of $name from: 1 to $seqlen\n\n"; | |
568 | |
569 if ($hitcount > 0) { | |
570 $output .= "Report scores over $thresh\n"; | |
571 foreach $pos ((sort { $results{$b} cmp $results{$a} } keys %results)) { | |
572 my $start = $pos - 15; | |
573 $start = 1 if $start < 1; | |
574 my $sig = substr($pep,$start -1,$pos-$start ); | |
575 | |
576 $output .= sprintf ("Maximum score %1.1f at residue %3d\n",$results{$pos},$pos); | |
577 $output .= "\n"; | |
578 $output .= " Sequence: "; | |
579 $output .= $sig; | |
580 $output .= "-" x (15- length($sig)); | |
581 $output .= "-"; | |
582 $output .= substr($pep,$pos-1,50); | |
583 $output .= "\n"; | |
584 $output .= " " x 12; | |
585 $output .= "| \(signal\) | \(mature peptide\)\n"; | |
586 $output .= sprintf(" %3d %3d\n\n",$start,$pos); | |
587 | |
588 if (($hitcount > 1) && ($cnt == 1)) { | |
589 $output .= " Other entries above $thresh\n\n"; | |
590 } | |
591 $cnt++; | |
592 } | |
593 } | |
594 $output; | |
595 } | |
596 | |
597 | |
598 1; | |
599 __END__ | |
600 | |
601 | |
602 ######################################################################### | |
603 # End of class | |
604 ######################################################################### |