annotate variant_effect_predictor/Bio/Tools/Sigcleave.pm @ 0:21066c0abaf5 draft

Uploaded
author willmclaren
date Fri, 03 Aug 2012 10:04:48 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1 #-----------------------------------------------------------------------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
2 # PACKAGE : Bio::Tools::Sigcleave
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
3 # AUTHOR : Chris Dagdigian, dag@sonsorol.org
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
4 # CREATED : Jan 28 1999
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
5 # REVISION: $Id: Sigcleave.pm,v 1.17 2002/10/22 07:45:22 lapp Exp $
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
6 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
7 # Copyright (c) 1997-9 bioperl, Chris Dagdigian and others. All Rights Reserved.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
8 # This module is free software; you can redistribute it and/or
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
9 # modify it under the same terms as Perl itself.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
10 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
11 # _History_
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
12 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
13 # Object framework ripped from Steve Chervits's SeqPattern.pm
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
14 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
15 # Core EGCG Sigcleave emulation from perl code developed by
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
16 # Danh Nguyen & Kamalakar Gulukota which itself was based
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
17 # loosely on Colgrove's signal.c program.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
18 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
19 # The overall idea is to replicate the output of the sigcleave
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
20 # program which was distributed with the EGCG extension to the GCG sequence
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
21 # analysis package. There is also an accessor method for just getting at
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
22 # the raw results.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
23 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
24 #-----------------------------------------------------------------------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
25
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
26 =head1 NAME
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
27
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
28 Bio::Tools::Sigcleave - Bioperl object for sigcleave analysis
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
29
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
30 =head1 SYNOPSIS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
31
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
32 =head2 Object Creation
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
33
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
34 use Bio::Tools::Sigcleave ();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
35
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
36 # to keep the module backwar compatible, you can pass it a sequence string, but
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
37 # there recommended say is to pass it a Seq object
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
38
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
39 # this works
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
40 $seq = "MVLLLILSVLLLKEDVRGSAQSSERRVVAHMPGDIIIGALFSVHHQPTVDKVHERKCGAVREQYGI";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
41 $sig = new Bio::Tools::Sigcleave(-seq => $seq,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
42 -type => 'protein',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
43 -threshold=>'3.5',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
44 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
45 # but you do:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
46 $seqobj = Bio::PrimarySeq->new(-seq => $seq);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
47
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
48 $sig = new Bio::Tools::Sigcleave(-seq => $seqobj,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
49 -threshold=>'3.5',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
50 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
51
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
52
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
53 # now you can detect procaryotic signal sequences as well as eucaryotic
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
54 $sig->matrix('eucaryotic'); # or 'procaryotic'
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
55
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
56
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
57 =head2 Object Methods & Accessors
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
58
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
59 # you can use this method to fine tune the threshod before printing out the results
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
60 $sig->result_count:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
61
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
62 %raw_results = $sig->signals;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
63 $formatted_output = $sig->pretty_print;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
64
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
65 =head1 DESCRIPTION
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
66
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
67 "Sigcleave" was a program distributed as part of the free EGCG add-on
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
68 to earlier versions of the GCG Sequence Analysis package. A new
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
69 implementation of the algorithm is now part of EMBOSS package.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
70
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
71 From the EGCG documentation:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
72
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
73 SigCleave uses the von Heijne method to locate signal sequences, and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
74 to identify the cleavage site. The method is 95% accurate in
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
75 resolving signal sequences from non-signal sequences with a cutoff
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
76 score of 3.5, and 75-80% accurate in identifying the cleavage
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
77 site. The program reports all hits above a minimum value.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
78
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
79 The EGCG Sigcleave program was written by Peter Rice (E-mail:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
80 pmr@sanger.ac.uk Post: Informatics Division, The Sanger Centre,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
81 Wellcome Trust Genome Campus, Hinxton, Cambs, CB10 1SA, UK).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
82
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
83 Since EGCG is no longer distributed for the latest versions of GCG,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
84 this code was developed to emulate the output of the original program
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
85 as much as possible for those who lost access to sigcleave when
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
86 upgrading to newer versions of GCG.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
87
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
88 There are 2 accessor methods for this object. "signals" will return a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
89 perl associative array containing the sigcleave scores keyed by amino
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
90 acid position. "pretty_print" returns a formatted string similar to
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
91 the output of the original sigcleave utility.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
92
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
93 In both cases, the "threshold" setting controls the score reporting
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
94 level. If no value for threshold is passed in by the user, the code
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
95 defaults to a reporting value of 3.5.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
96
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
97 In this implemntation the accessor will never return any
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
98 score/position pair which does not meet the threshold limit. This is
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
99 the slightly different from the behaviour of the 8.1 EGCG sigcleave
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
100 program which will report the highest of the under-threshold results
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
101 if nothing else is found.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
102
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
103
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
104 Example of pretty_print output:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
105
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
106 SIGCLEAVE of sigtest from: 1 to 146
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
107
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
108 Report scores over 3.5
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
109 Maximum score 4.9 at residue 131
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
110
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
111 Sequence: FVILAAMSIQGSA-NLQTQWKSTASLALET
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
112 | (signal) | (mature peptide)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
113 118 131
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
114
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
115 Other entries above 3.5
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
116
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
117 Maximum score 3.7 at residue 112
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
118
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
119 Sequence: CSRQLFGWLFCKV-HPGAIVFVILAAMSIQGSANLQTQWKSTASLALET
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
120 | (signal) | (mature peptide)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
121 99 112
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
122
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
123
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
124 =head1 FEEDBACK
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
125
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
126 When updating and maintaining a module, it helps to know that people
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
127 are actually using it. Let us know if you find a bug, think this code
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
128 is useful or have any improvements/features to suggest.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
129
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
130 =head2 Reporting Bugs
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
131
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
132 Report bugs to the Bioperl bug tracking system to help us keep track
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
133 the bugs and their resolution. Bug reports can be submitted via email
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
134 or the web:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
135
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
136 bioperl-bugs@bio.perl.org
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
137 http://bugzilla.bioperl.org/
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
138
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
139 =head1 AUTHOR
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
140
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
141 Chris Dagdigian, dag@sonsorol.org & others
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
142
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
143 =head1 CONTRIBUTORS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
144
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
145 Heikki Lehvaslaiho, heikki@ebi.ac.uk
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
146
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
147 =head1 VERSION
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
148
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
149 Bio::Tools::Sigcleave.pm, $Id: Sigcleave.pm,v 1.17 2002/10/22 07:45:22 lapp Exp $
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
150
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
151 =head1 COPYRIGHT
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
152
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
153 Copyright (c) 1999 Chris Dagdigian & others. All Rights Reserved.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
154 This module is free software; you can redistribute it and/or modify it
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
155 under the same terms as Perl itself.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
156
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
157 =head1 REFERENCES / SEE ALSO
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
158
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
159 von Heijne G. (1986) "A new method for predicting signal sequences
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
160 cleavage sites." Nucleic Acids Res. 14, 4683-4690.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
161
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
162 von Heijne G. (1987) in "Sequence Analysis in Molecular Biology:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
163 Treasure Trove or Trivial Pursuit" (Acad. Press, (1987), 113-117).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
164
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
165
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
166 =head1 APPENDIX
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
167
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
168 The following documentation describes the various functions
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
169 contained in this module. Some functions are for internal
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
170 use and are not meant to be called by the user; they are
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
171 preceded by an underscore ("_").
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
172
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
173
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
174 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
175
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
176 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
177 ##
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
178 ###
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
179 #### END of main POD documentation.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
180 ###
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
181 ##
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
182 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
183
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
184
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
185 package Bio::Tools::Sigcleave;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
186
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
187 use Bio::Root::Root;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
188 use Bio::PrimarySeq;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
189
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
190 @ISA = qw(Bio::Root::Root);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
191 use strict;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
192 use vars qw ($ID $VERSION %WeightTable_euc %WeightTable_pro );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
193 $ID = 'Bio::Tools::Sigcleave';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
194 $VERSION = 0.02;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
195
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
196
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
197 %WeightTable_euc = (
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
198 #Sample: 161 aligned sequences
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
199 # R -13 -12 -11 -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 +1 +2 Expect
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
200 'A' => [16, 13, 14, 15, 20, 18, 18, 17, 25, 15, 47, 6, 80, 18, 6, 14.5],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
201 'C' => [ 3, 6, 9, 7, 9, 14, 6, 8, 5, 6, 19, 3, 9, 8, 3, 4.5],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
202 'D' => [ 0, 0, 0, 0, 0, 0, 0, 0, 5, 3, 0, 5, 0, 10, 11, 8.9],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
203 'E' => [ 0, 0, 0, 1, 0, 0, 0, 0, 3, 7, 0, 7, 0, 13, 14, 10.0],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
204 'F' => [13, 9, 11, 11, 6, 7, 18, 13, 4, 5, 0, 13, 0, 6, 4, 5.6],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
205 'G' => [ 4, 4, 3, 6, 3, 13, 3, 2, 19, 34, 5, 7, 39, 10, 7, 12.1],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
206 'H' => [ 0, 0, 0, 0, 0, 1, 1, 0, 5, 0, 0, 6, 0, 4, 2, 3.4],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
207 'I' => [15, 15, 8, 6, 11, 5, 4, 8, 5, 1, 10, 5, 0, 8, 7, 7.4],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
208 'K' => [ 0, 0, 0, 1, 0, 0, 1, 0, 0, 4, 0, 2, 0, 11, 9, 11.3],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
209 'L' => [71, 68, 72, 79, 78, 45, 64, 49, 10, 23, 8, 20, 1, 8, 4, 12.1],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
210 'M' => [ 0, 3, 7, 4, 1, 6, 2, 2, 0, 0, 0, 1, 0, 1, 2, 2.7],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
211 'N' => [ 0, 1, 0, 1, 1, 0, 0, 0, 3, 3, 0, 10, 0, 4, 7, 7.1],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
212 'P' => [ 2, 0, 2, 0, 0, 4, 1, 8, 20, 14, 0, 1, 3, 0, 22, 7.4],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
213 'Q' => [ 0, 0, 0, 1, 0, 6, 1, 0, 10, 8, 0, 18, 3, 19, 10, 6.3],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
214 'R' => [ 2, 0, 0, 0, 0, 1, 0, 0, 7, 4, 0, 15, 0, 12, 9, 7.6],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
215 'S' => [ 9, 3, 8, 6, 13, 10, 15, 16, 26, 11, 23, 17, 20, 15, 10, 11.4],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
216 'T' => [ 2, 10, 5, 4, 5, 13, 7, 7, 12, 6, 17, 8, 6, 3, 10, 9.7],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
217 'V' => [20, 25, 15, 18, 13, 15, 11, 27, 0, 12, 32, 3, 0, 8, 17, 11.1],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
218 'W' => [ 4, 3, 3, 1, 1, 2, 6, 3, 1, 3, 0, 9, 0, 2, 0, 1.8],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
219 'Y' => [ 0, 1, 4, 0, 0, 1, 3, 1, 1, 2, 0, 5, 0, 1, 7, 5.6]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
220 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
221
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
222 %WeightTable_pro = (
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
223 #Sample: 36 aligned sequences
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
224 # R -13 -12 -11 -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 +1 +2 Expect
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
225 'A' => [0, 8, 8, 9, 6, 7, 5, 6, 7, 7, 24, 2, 31, 18, 4, 3.2],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
226 'C' => [1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1.0],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
227 'D' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 8, 2.0],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
228 'E' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 4, 8, 2.2],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
229 'F' => [2, 4, 3, 4, 1, 1, 8, 0, 4, 1, 0, 7, 0, 1, 0, 1.3],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
230 'G' => [4, 2, 2, 2, 3, 5, 2, 4, 2, 2, 0, 2, 2, 1, 0, 2.7],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
231 'H' => [0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 7, 0, 1, 0, 0.8],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
232 'I' => [3, 1, 5, 1, 5, 0, 1, 3, 0, 0, 0, 0, 0, 0, 2, 1.7],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
233 'K' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 3, 0, 2.5],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
234 'L' => [8, 11, 9, 8, 9, 13, 1, 0, 2, 2, 1, 2, 0, 0, 1, 2.7],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
235 'M' => [0, 2, 1, 1, 3, 2, 3, 0, 1, 2, 0, 4, 0, 0, 1, 0.6],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
236 'N' => [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 3, 0, 1, 4, 1.6],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
237 'P' => [0, 1, 1, 1, 1, 1, 2, 3, 5, 2, 0, 0, 0, 0, 5, 1.7],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
238 'Q' => [0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 3, 0, 0, 1, 1.4],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
239 'R' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1.7],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
240 'S' => [1, 0, 1, 4, 4, 1, 5, 15, 5, 8, 5, 2, 2, 0, 0, 2.6],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
241 'T' => [2, 0, 4, 2, 2, 2, 2, 2, 5, 1, 3, 0, 1, 1, 2, 2.2],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
242 'V' => [5, 7, 1, 3, 1, 4, 7, 0, 0, 4, 3, 0, 0, 2, 0, 2.5],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
243 'W' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0.4],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
244 'Y' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 1, 0, 0, 0, 1.3]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
245 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
246
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
247
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
248 ##
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
249 ## Now we calculate the _real_ values for the weight tables
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
250 ##
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
251 ##
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
252 ## yeah yeah yeah there is lots of math here that gets repeated
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
253 ## every single time a sigcleave object gets created. This is
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
254 ## a quick hack to make sure that we get the scores as accurate as
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
255 ## possible. Need all those significant digits....
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
256 ##
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
257 ## suggestions for speedup aproaches welcome
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
258 ##
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
259
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
260
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
261 foreach my $i (keys %WeightTable_euc) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
262 my $expected = $WeightTable_euc{$i}[15];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
263 if ($expected > 0) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
264 for (my $j=0; $j<16; $j++) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
265 if ($WeightTable_euc{$i}[$j] == 0) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
266 $WeightTable_euc{$i}[$j] = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
267 if ($j == 10 || $j == 12) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
268 $WeightTable_euc{$i}[$j] = 1.e-10;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
269 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
270 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
271 $WeightTable_euc{$i}[$j] = log($WeightTable_euc{$i}[$j]/$expected);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
272 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
273 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
274 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
275
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
276
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
277 foreach my $i (keys %WeightTable_pro) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
278 my $expected = $WeightTable_pro{$i}[15];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
279 if ($expected > 0) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
280 for (my $j=0; $j<16; $j++) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
281 if ($WeightTable_pro{$i}[$j] == 0) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
282 $WeightTable_pro{$i}[$j] = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
283 if ($j == 10 || $j == 12) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
284 $WeightTable_pro{$i}[$j] = 1.e-10;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
285 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
286 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
287 $WeightTable_pro{$i}[$j] = log($WeightTable_pro{$i}[$j]/$expected);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
288 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
289 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
290 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
291
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
292
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
293
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
294 #####################################################################################
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
295 ## CONSTRUCTOR ##
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
296 #####################################################################################
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
297
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
298
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
299 sub new {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
300 my ($class, @args) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
301
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
302 my $self = $class->SUPER::new(@args);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
303 #my $self = Bio::Seq->new(@args);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
304
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
305 my ($seq, $threshold, $matrix) = $self->_rearrange([qw(SEQ THRESHOLD MATRIX)],@args);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
306
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
307 defined $threshold && $self->threshold($threshold);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
308 $matrix && $self->matrix($matrix);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
309 $seq && $self->seq($seq);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
310
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
311 return $self;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
312 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
313
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
314
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
315
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
316 =head1 threshold
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
317
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
318 Title : threshold
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
319 Usage : $value = $self->threshold
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
320 Purpose : Read/write method sigcleave score reporting threshold.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
321 Returns : float.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
322 Argument : new value, float
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
323 Throws : on non-number argument
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
324 Comments : defaults to 3.5
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
325 See Also : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
326
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
327 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
328
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
329 #----------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
330 sub threshold {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
331 #----------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
332 my ($self, $value) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
333 if( defined $value) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
334 $self->throw("I need a number, not [$value]")
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
335 if $value !~ /^[+-]?[\d\.]+$/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
336 $self->{'_threshold'} = $value;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
337 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
338 return $self->{'_threshold'} || 3.5 ;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
339 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
340
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
341 =head1 matrix
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
342
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
343 Title : matrix
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
344 Usage : $value = $self->matrix('procaryotic')
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
345 Purpose : Read/write method sigcleave matrix.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
346 Returns : float.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
347 Argument : new value: 'eucaryotic' or 'procaryotic'
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
348 Throws : on non-number argument
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
349 Comments : defaults to 3.5
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
350 See Also : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
351
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
352 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
353
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
354 #----------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
355 sub matrix {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
356 #----------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
357 my ($self, $value) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
358 if( defined $value) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
359 $self->throw("I need 'eucaryotic' or 'procaryotic', not [$value]")
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
360 unless $value eq 'eucaryotic' or $value eq 'procaryotic';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
361 $self->{'_matrix'} = $value;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
362 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
363 return $self->{'_matrix'} || 'eucaryotic' ;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
364
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
365 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
366
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
367 =head1 seq
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
368
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
369 Title : seq
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
370 Usage : $value = $self->seq('procaryotic')
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
371 Purpose : Read/write method sigcleave seq.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
372 Returns : float.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
373 Argument : new value: 'eucaryotic' or 'procaryotic'
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
374 Throws : on non-number argument
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
375 Comments : defaults to 3.5
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
376 See Also : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
377
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
378 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
379
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
380 #----------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
381 sub seq {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
382 #----------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
383 my ($self, $value) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
384 if( defined $value) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
385 if ($value->isa('Bio::PrimarySeqI')) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
386 $self->{'_seq'} = $value;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
387 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
388 $self->{'_seq'} = Bio::PrimarySeq->new(-seq=>$value,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
389 -alphabet=>'protein');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
390 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
391 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
392 return $self->{'_seq'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
393 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
394
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
395
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
396
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
397 =head1 _Analyze
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
398
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
399 Title : _Analyze
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
400 Usage : N/A This is an internal method. Not meant to be called from outside
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
401 : the package
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
402 :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
403 Purpose : calculates sigcleave score and amino acid position for the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
404 : given protein sequence. The score reporting threshold can
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
405 : be adjusted by passing in the "threshold" parameter during
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
406 : object construction. If no threshold is passed in, the code
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
407 : defaults to reporting any scores equal to or above 3.5
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
408 :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
409 Returns : nothing. results are added to the object
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
410 Argument : none.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
411 Throws : nothing.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
412 Comments : nothing.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
413 See Also : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
414
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
415 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
416
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
417 #----------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
418 sub _Analyze {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
419 #----------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
420 my($self) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
421
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
422 my %signals;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
423 my @hitWeight = ();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
424 my @hitsort = ();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
425 my @hitpos = ();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
426 my $maxSite = "";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
427 my $seqPos = "";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
428 my $istart = "";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
429 my $iend = "";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
430 my $icol = "";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
431 my $i = "";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
432 my $weight = "";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
433 my $k = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
434 my $c = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
435 my $seqBegin = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
436 my $pVal = -13;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
437 my $nVal = 2;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
438 my $nHits = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
439 my $seqEnd = $self->seq->length;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
440 my $pep = $self->seq->seq;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
441 my $minWeight = $self->threshold;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
442 my $matrix = $self->matrix;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
443
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
444 ## The weight table is keyed by UPPERCASE letters so we uppercase
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
445 ## the pep string because we don't want to alter the actual object
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
446 ## sequence.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
447
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
448 $pep =~ tr/a-z/A-Z/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
449
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
450 for ($seqPos = $seqBegin; $seqPos < $seqEnd; $seqPos++) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
451 $istart = (0 > $seqPos + $pVal)? 0 : $seqPos + $pVal;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
452 $iend = ($seqPos + $nVal - 1 < $seqEnd)? $seqPos + $nVal - 1 : $seqEnd;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
453 $icol= $iend - $istart + 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
454 $weight = 0.00;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
455 for ($k=0; $k<$icol; $k++) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
456 $c = substr($pep, $istart + $k, 1);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
457
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
458 ## CD: The if(defined) stuff was put in here because Sigcleave.pm
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
459 ## CD: kept getting warnings about undefined vals during 'make test' ...
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
460 if ($matrix eq 'eucaryotic') {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
461 $weight += $WeightTable_euc{$c}[$k] if defined $WeightTable_euc{$c}[$k];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
462 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
463 $weight += $WeightTable_pro{$c}[$k] if defined $WeightTable_pro{$c}[$k];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
464 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
465 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
466 $signals{$seqPos+1} = sprintf ("%.1f", $weight) if $weight >= $minWeight;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
467 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
468
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
469 $self->{"_signal_scores"} = { %signals };
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
470 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
471
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
472
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
473 =head1 signals
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
474
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
475 Title : signals
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
476 Usage : %sigcleave_results = $sig->signals;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
477 :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
478 Purpose : Accessor method for sigcleave results
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
479 :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
480 Returns : Associative array. The key value represents the amino acid position
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
481 : and the value represents the score. Only scores that
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
482 : are greater than or equal to the THRESHOLD value are reported.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
483 :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
484 Argument : none.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
485 Throws : none.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
486 Comments : none.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
487 See Also : THRESHOLD
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
488
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
489 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
490
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
491 #----------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
492 sub signals {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
493 #----------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
494 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
495 my %results;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
496 my $position;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
497
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
498 # do the calculations
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
499 $self->_Analyze;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
500
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
501 foreach $position ( sort keys %{ $self->{'_signal_scores'} } ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
502 $results{$position} = $self->{'_signal_scores'}{$position};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
503 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
504 return %results;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
505 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
506
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
507
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
508 =head1 result_count
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
509
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
510 Title : result_count
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
511 Usage : $count = $sig->result_count;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
512 :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
513 Purpose : Accessor method for sigcleave results
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
514 :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
515 Returns : Integer, number of results above the threshold
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
516 :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
517 Argument : none.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
518 Throws : none.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
519 Comments : none.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
520
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
521 See Also : THRESHOLD
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
522
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
523 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
524
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
525 #----------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
526 sub result_count {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
527 #----------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
528 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
529 $self->_Analyze;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
530 return keys %{ $self->{'_signal_scores'} };
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
531 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
532
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
533
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
534 =head1 pretty_print
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
535
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
536 Title : pretty_print
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
537 Usage : $output = $sig->pretty_print;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
538 : print $sig->pretty_print;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
539 :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
540 Purpose : Emulates the output of the EGCG Sigcleave
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
541 : utility.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
542 :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
543 Returns : A formatted string.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
544 Argument : none.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
545 Throws : none.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
546 Comments : none.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
547 See Also : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
548
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
549 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
550
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
551 #----------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
552 sub pretty_print {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
553 #----------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
554 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
555 my $pos;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
556 my $output;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
557 my $cnt = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
558 my %results = $self->signals;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
559 my @hits = keys %results;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
560 my $hitcount = $#hits; $hitcount++;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
561 my $thresh = $self->threshold;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
562 my $seqlen = $self->seq->length || 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
563 my $name = $self->seq->id || 'NONAME';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
564 my $pep = $self->seq->seq;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
565 $pep =~ tr/a-z/A-Z/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
566
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
567 $output = "SIGCLEAVE of $name from: 1 to $seqlen\n\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
568
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
569 if ($hitcount > 0) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
570 $output .= "Report scores over $thresh\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
571 foreach $pos ((sort { $results{$b} cmp $results{$a} } keys %results)) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
572 my $start = $pos - 15;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
573 $start = 1 if $start < 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
574 my $sig = substr($pep,$start -1,$pos-$start );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
575
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
576 $output .= sprintf ("Maximum score %1.1f at residue %3d\n",$results{$pos},$pos);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
577 $output .= "\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
578 $output .= " Sequence: ";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
579 $output .= $sig;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
580 $output .= "-" x (15- length($sig));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
581 $output .= "-";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
582 $output .= substr($pep,$pos-1,50);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
583 $output .= "\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
584 $output .= " " x 12;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
585 $output .= "| \(signal\) | \(mature peptide\)\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
586 $output .= sprintf(" %3d %3d\n\n",$start,$pos);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
587
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
588 if (($hitcount > 1) && ($cnt == 1)) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
589 $output .= " Other entries above $thresh\n\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
590 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
591 $cnt++;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
592 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
593 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
594 $output;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
595 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
596
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
597
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
598 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
599 __END__
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
600
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
601
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
602 #########################################################################
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
603 # End of class
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
604 #########################################################################