annotate variant_effect_predictor/Bio/Tools/Sigcleave.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1 #-----------------------------------------------------------------------------
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2 # PACKAGE : Bio::Tools::Sigcleave
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3 # AUTHOR : Chris Dagdigian, dag@sonsorol.org
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
4 # CREATED : Jan 28 1999
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
5 # REVISION: $Id: Sigcleave.pm,v 1.17 2002/10/22 07:45:22 lapp Exp $
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
6 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
7 # Copyright (c) 1997-9 bioperl, Chris Dagdigian and others. All Rights Reserved.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
8 # This module is free software; you can redistribute it and/or
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
9 # modify it under the same terms as Perl itself.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
10 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
11 # _History_
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
12 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
13 # Object framework ripped from Steve Chervits's SeqPattern.pm
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
14 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
15 # Core EGCG Sigcleave emulation from perl code developed by
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
16 # Danh Nguyen & Kamalakar Gulukota which itself was based
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
17 # loosely on Colgrove's signal.c program.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
18 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
19 # The overall idea is to replicate the output of the sigcleave
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
20 # program which was distributed with the EGCG extension to the GCG sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
21 # analysis package. There is also an accessor method for just getting at
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
22 # the raw results.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
23 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
24 #-----------------------------------------------------------------------------
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
25
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
26 =head1 NAME
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
27
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
28 Bio::Tools::Sigcleave - Bioperl object for sigcleave analysis
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
29
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
30 =head1 SYNOPSIS
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
31
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
32 =head2 Object Creation
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
33
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
34 use Bio::Tools::Sigcleave ();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
35
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
36 # to keep the module backwar compatible, you can pass it a sequence string, but
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
37 # there recommended say is to pass it a Seq object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
38
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
39 # this works
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
40 $seq = "MVLLLILSVLLLKEDVRGSAQSSERRVVAHMPGDIIIGALFSVHHQPTVDKVHERKCGAVREQYGI";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
41 $sig = new Bio::Tools::Sigcleave(-seq => $seq,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
42 -type => 'protein',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
43 -threshold=>'3.5',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
44 );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
45 # but you do:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
46 $seqobj = Bio::PrimarySeq->new(-seq => $seq);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
47
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
48 $sig = new Bio::Tools::Sigcleave(-seq => $seqobj,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
49 -threshold=>'3.5',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
50 );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
51
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
52
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
53 # now you can detect procaryotic signal sequences as well as eucaryotic
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
54 $sig->matrix('eucaryotic'); # or 'procaryotic'
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
55
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
56
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
57 =head2 Object Methods & Accessors
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
58
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
59 # you can use this method to fine tune the threshod before printing out the results
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
60 $sig->result_count:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
61
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
62 %raw_results = $sig->signals;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
63 $formatted_output = $sig->pretty_print;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
64
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
65 =head1 DESCRIPTION
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
66
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
67 "Sigcleave" was a program distributed as part of the free EGCG add-on
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
68 to earlier versions of the GCG Sequence Analysis package. A new
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
69 implementation of the algorithm is now part of EMBOSS package.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
70
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
71 From the EGCG documentation:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
72
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
73 SigCleave uses the von Heijne method to locate signal sequences, and
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
74 to identify the cleavage site. The method is 95% accurate in
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
75 resolving signal sequences from non-signal sequences with a cutoff
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
76 score of 3.5, and 75-80% accurate in identifying the cleavage
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
77 site. The program reports all hits above a minimum value.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
78
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
79 The EGCG Sigcleave program was written by Peter Rice (E-mail:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
80 pmr@sanger.ac.uk Post: Informatics Division, The Sanger Centre,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
81 Wellcome Trust Genome Campus, Hinxton, Cambs, CB10 1SA, UK).
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
82
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
83 Since EGCG is no longer distributed for the latest versions of GCG,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
84 this code was developed to emulate the output of the original program
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
85 as much as possible for those who lost access to sigcleave when
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
86 upgrading to newer versions of GCG.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
87
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
88 There are 2 accessor methods for this object. "signals" will return a
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
89 perl associative array containing the sigcleave scores keyed by amino
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
90 acid position. "pretty_print" returns a formatted string similar to
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
91 the output of the original sigcleave utility.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
92
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
93 In both cases, the "threshold" setting controls the score reporting
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
94 level. If no value for threshold is passed in by the user, the code
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
95 defaults to a reporting value of 3.5.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
96
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
97 In this implemntation the accessor will never return any
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
98 score/position pair which does not meet the threshold limit. This is
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
99 the slightly different from the behaviour of the 8.1 EGCG sigcleave
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
100 program which will report the highest of the under-threshold results
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
101 if nothing else is found.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
102
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
103
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
104 Example of pretty_print output:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
105
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
106 SIGCLEAVE of sigtest from: 1 to 146
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
107
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
108 Report scores over 3.5
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
109 Maximum score 4.9 at residue 131
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
110
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
111 Sequence: FVILAAMSIQGSA-NLQTQWKSTASLALET
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
112 | (signal) | (mature peptide)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
113 118 131
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
114
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
115 Other entries above 3.5
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
116
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
117 Maximum score 3.7 at residue 112
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
118
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
119 Sequence: CSRQLFGWLFCKV-HPGAIVFVILAAMSIQGSANLQTQWKSTASLALET
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
120 | (signal) | (mature peptide)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
121 99 112
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
122
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
123
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
124 =head1 FEEDBACK
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
125
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
126 When updating and maintaining a module, it helps to know that people
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
127 are actually using it. Let us know if you find a bug, think this code
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
128 is useful or have any improvements/features to suggest.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
129
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
130 =head2 Reporting Bugs
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
131
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
132 Report bugs to the Bioperl bug tracking system to help us keep track
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
133 the bugs and their resolution. Bug reports can be submitted via email
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
134 or the web:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
135
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
136 bioperl-bugs@bio.perl.org
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
137 http://bugzilla.bioperl.org/
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
138
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
139 =head1 AUTHOR
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
140
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
141 Chris Dagdigian, dag@sonsorol.org & others
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
142
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
143 =head1 CONTRIBUTORS
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
144
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
145 Heikki Lehvaslaiho, heikki@ebi.ac.uk
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
146
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
147 =head1 VERSION
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
148
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
149 Bio::Tools::Sigcleave.pm, $Id: Sigcleave.pm,v 1.17 2002/10/22 07:45:22 lapp Exp $
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
150
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
151 =head1 COPYRIGHT
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
152
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
153 Copyright (c) 1999 Chris Dagdigian & others. All Rights Reserved.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
154 This module is free software; you can redistribute it and/or modify it
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
155 under the same terms as Perl itself.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
156
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
157 =head1 REFERENCES / SEE ALSO
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
158
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
159 von Heijne G. (1986) "A new method for predicting signal sequences
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
160 cleavage sites." Nucleic Acids Res. 14, 4683-4690.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
161
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
162 von Heijne G. (1987) in "Sequence Analysis in Molecular Biology:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
163 Treasure Trove or Trivial Pursuit" (Acad. Press, (1987), 113-117).
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
164
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
165
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
166 =head1 APPENDIX
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
167
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
168 The following documentation describes the various functions
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
169 contained in this module. Some functions are for internal
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
170 use and are not meant to be called by the user; they are
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
171 preceded by an underscore ("_").
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
172
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
173
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
174 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
175
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
176 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
177 ##
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
178 ###
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
179 #### END of main POD documentation.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
180 ###
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
181 ##
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
182 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
183
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
184
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
185 package Bio::Tools::Sigcleave;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
186
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
187 use Bio::Root::Root;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
188 use Bio::PrimarySeq;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
189
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
190 @ISA = qw(Bio::Root::Root);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
191 use strict;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
192 use vars qw ($ID $VERSION %WeightTable_euc %WeightTable_pro );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
193 $ID = 'Bio::Tools::Sigcleave';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
194 $VERSION = 0.02;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
195
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
196
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
197 %WeightTable_euc = (
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
198 #Sample: 161 aligned sequences
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
199 # R -13 -12 -11 -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 +1 +2 Expect
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
200 'A' => [16, 13, 14, 15, 20, 18, 18, 17, 25, 15, 47, 6, 80, 18, 6, 14.5],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
201 'C' => [ 3, 6, 9, 7, 9, 14, 6, 8, 5, 6, 19, 3, 9, 8, 3, 4.5],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
202 'D' => [ 0, 0, 0, 0, 0, 0, 0, 0, 5, 3, 0, 5, 0, 10, 11, 8.9],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
203 'E' => [ 0, 0, 0, 1, 0, 0, 0, 0, 3, 7, 0, 7, 0, 13, 14, 10.0],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
204 'F' => [13, 9, 11, 11, 6, 7, 18, 13, 4, 5, 0, 13, 0, 6, 4, 5.6],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
205 'G' => [ 4, 4, 3, 6, 3, 13, 3, 2, 19, 34, 5, 7, 39, 10, 7, 12.1],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
206 'H' => [ 0, 0, 0, 0, 0, 1, 1, 0, 5, 0, 0, 6, 0, 4, 2, 3.4],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
207 'I' => [15, 15, 8, 6, 11, 5, 4, 8, 5, 1, 10, 5, 0, 8, 7, 7.4],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
208 'K' => [ 0, 0, 0, 1, 0, 0, 1, 0, 0, 4, 0, 2, 0, 11, 9, 11.3],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
209 'L' => [71, 68, 72, 79, 78, 45, 64, 49, 10, 23, 8, 20, 1, 8, 4, 12.1],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
210 'M' => [ 0, 3, 7, 4, 1, 6, 2, 2, 0, 0, 0, 1, 0, 1, 2, 2.7],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
211 'N' => [ 0, 1, 0, 1, 1, 0, 0, 0, 3, 3, 0, 10, 0, 4, 7, 7.1],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
212 'P' => [ 2, 0, 2, 0, 0, 4, 1, 8, 20, 14, 0, 1, 3, 0, 22, 7.4],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
213 'Q' => [ 0, 0, 0, 1, 0, 6, 1, 0, 10, 8, 0, 18, 3, 19, 10, 6.3],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
214 'R' => [ 2, 0, 0, 0, 0, 1, 0, 0, 7, 4, 0, 15, 0, 12, 9, 7.6],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
215 'S' => [ 9, 3, 8, 6, 13, 10, 15, 16, 26, 11, 23, 17, 20, 15, 10, 11.4],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
216 'T' => [ 2, 10, 5, 4, 5, 13, 7, 7, 12, 6, 17, 8, 6, 3, 10, 9.7],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
217 'V' => [20, 25, 15, 18, 13, 15, 11, 27, 0, 12, 32, 3, 0, 8, 17, 11.1],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
218 'W' => [ 4, 3, 3, 1, 1, 2, 6, 3, 1, 3, 0, 9, 0, 2, 0, 1.8],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
219 'Y' => [ 0, 1, 4, 0, 0, 1, 3, 1, 1, 2, 0, 5, 0, 1, 7, 5.6]
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
220 );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
221
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
222 %WeightTable_pro = (
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
223 #Sample: 36 aligned sequences
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
224 # R -13 -12 -11 -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 +1 +2 Expect
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
225 'A' => [0, 8, 8, 9, 6, 7, 5, 6, 7, 7, 24, 2, 31, 18, 4, 3.2],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
226 'C' => [1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1.0],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
227 'D' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 8, 2.0],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
228 'E' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 4, 8, 2.2],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
229 'F' => [2, 4, 3, 4, 1, 1, 8, 0, 4, 1, 0, 7, 0, 1, 0, 1.3],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
230 'G' => [4, 2, 2, 2, 3, 5, 2, 4, 2, 2, 0, 2, 2, 1, 0, 2.7],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
231 'H' => [0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 7, 0, 1, 0, 0.8],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
232 'I' => [3, 1, 5, 1, 5, 0, 1, 3, 0, 0, 0, 0, 0, 0, 2, 1.7],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
233 'K' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 3, 0, 2.5],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
234 'L' => [8, 11, 9, 8, 9, 13, 1, 0, 2, 2, 1, 2, 0, 0, 1, 2.7],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
235 'M' => [0, 2, 1, 1, 3, 2, 3, 0, 1, 2, 0, 4, 0, 0, 1, 0.6],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
236 'N' => [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 3, 0, 1, 4, 1.6],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
237 'P' => [0, 1, 1, 1, 1, 1, 2, 3, 5, 2, 0, 0, 0, 0, 5, 1.7],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
238 'Q' => [0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 3, 0, 0, 1, 1.4],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
239 'R' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1.7],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
240 'S' => [1, 0, 1, 4, 4, 1, 5, 15, 5, 8, 5, 2, 2, 0, 0, 2.6],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
241 'T' => [2, 0, 4, 2, 2, 2, 2, 2, 5, 1, 3, 0, 1, 1, 2, 2.2],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
242 'V' => [5, 7, 1, 3, 1, 4, 7, 0, 0, 4, 3, 0, 0, 2, 0, 2.5],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
243 'W' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0.4],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
244 'Y' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 1, 0, 0, 0, 1.3]
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
245 );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
246
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
247
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
248 ##
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
249 ## Now we calculate the _real_ values for the weight tables
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
250 ##
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
251 ##
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
252 ## yeah yeah yeah there is lots of math here that gets repeated
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
253 ## every single time a sigcleave object gets created. This is
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
254 ## a quick hack to make sure that we get the scores as accurate as
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
255 ## possible. Need all those significant digits....
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
256 ##
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
257 ## suggestions for speedup aproaches welcome
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
258 ##
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
259
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
260
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
261 foreach my $i (keys %WeightTable_euc) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
262 my $expected = $WeightTable_euc{$i}[15];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
263 if ($expected > 0) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
264 for (my $j=0; $j<16; $j++) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
265 if ($WeightTable_euc{$i}[$j] == 0) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
266 $WeightTable_euc{$i}[$j] = 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
267 if ($j == 10 || $j == 12) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
268 $WeightTable_euc{$i}[$j] = 1.e-10;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
269 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
270 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
271 $WeightTable_euc{$i}[$j] = log($WeightTable_euc{$i}[$j]/$expected);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
272 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
273 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
274 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
275
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
276
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
277 foreach my $i (keys %WeightTable_pro) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
278 my $expected = $WeightTable_pro{$i}[15];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
279 if ($expected > 0) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
280 for (my $j=0; $j<16; $j++) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
281 if ($WeightTable_pro{$i}[$j] == 0) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
282 $WeightTable_pro{$i}[$j] = 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
283 if ($j == 10 || $j == 12) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
284 $WeightTable_pro{$i}[$j] = 1.e-10;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
285 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
286 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
287 $WeightTable_pro{$i}[$j] = log($WeightTable_pro{$i}[$j]/$expected);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
288 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
289 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
290 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
291
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
292
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
293
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
294 #####################################################################################
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
295 ## CONSTRUCTOR ##
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
296 #####################################################################################
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
297
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
298
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
299 sub new {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
300 my ($class, @args) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
301
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
302 my $self = $class->SUPER::new(@args);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
303 #my $self = Bio::Seq->new(@args);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
304
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
305 my ($seq, $threshold, $matrix) = $self->_rearrange([qw(SEQ THRESHOLD MATRIX)],@args);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
306
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
307 defined $threshold && $self->threshold($threshold);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
308 $matrix && $self->matrix($matrix);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
309 $seq && $self->seq($seq);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
310
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
311 return $self;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
312 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
313
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
314
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
315
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
316 =head1 threshold
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
317
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
318 Title : threshold
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
319 Usage : $value = $self->threshold
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
320 Purpose : Read/write method sigcleave score reporting threshold.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
321 Returns : float.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
322 Argument : new value, float
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
323 Throws : on non-number argument
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
324 Comments : defaults to 3.5
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
325 See Also : n/a
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
326
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
327 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
328
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
329 #----------------
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
330 sub threshold {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
331 #----------------
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
332 my ($self, $value) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
333 if( defined $value) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
334 $self->throw("I need a number, not [$value]")
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
335 if $value !~ /^[+-]?[\d\.]+$/;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
336 $self->{'_threshold'} = $value;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
337 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
338 return $self->{'_threshold'} || 3.5 ;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
339 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
340
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
341 =head1 matrix
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
342
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
343 Title : matrix
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
344 Usage : $value = $self->matrix('procaryotic')
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
345 Purpose : Read/write method sigcleave matrix.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
346 Returns : float.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
347 Argument : new value: 'eucaryotic' or 'procaryotic'
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
348 Throws : on non-number argument
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
349 Comments : defaults to 3.5
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
350 See Also : n/a
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
351
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
352 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
353
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
354 #----------------
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
355 sub matrix {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
356 #----------------
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
357 my ($self, $value) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
358 if( defined $value) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
359 $self->throw("I need 'eucaryotic' or 'procaryotic', not [$value]")
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
360 unless $value eq 'eucaryotic' or $value eq 'procaryotic';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
361 $self->{'_matrix'} = $value;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
362 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
363 return $self->{'_matrix'} || 'eucaryotic' ;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
364
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
365 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
366
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
367 =head1 seq
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
368
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
369 Title : seq
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
370 Usage : $value = $self->seq('procaryotic')
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
371 Purpose : Read/write method sigcleave seq.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
372 Returns : float.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
373 Argument : new value: 'eucaryotic' or 'procaryotic'
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
374 Throws : on non-number argument
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
375 Comments : defaults to 3.5
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
376 See Also : n/a
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
377
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
378 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
379
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
380 #----------------
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
381 sub seq {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
382 #----------------
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
383 my ($self, $value) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
384 if( defined $value) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
385 if ($value->isa('Bio::PrimarySeqI')) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
386 $self->{'_seq'} = $value;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
387 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
388 $self->{'_seq'} = Bio::PrimarySeq->new(-seq=>$value,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
389 -alphabet=>'protein');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
390 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
391 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
392 return $self->{'_seq'};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
393 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
394
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
395
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
396
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
397 =head1 _Analyze
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
398
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
399 Title : _Analyze
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
400 Usage : N/A This is an internal method. Not meant to be called from outside
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
401 : the package
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
402 :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
403 Purpose : calculates sigcleave score and amino acid position for the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
404 : given protein sequence. The score reporting threshold can
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
405 : be adjusted by passing in the "threshold" parameter during
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
406 : object construction. If no threshold is passed in, the code
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
407 : defaults to reporting any scores equal to or above 3.5
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
408 :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
409 Returns : nothing. results are added to the object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
410 Argument : none.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
411 Throws : nothing.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
412 Comments : nothing.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
413 See Also : n/a
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
414
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
415 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
416
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
417 #----------------
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
418 sub _Analyze {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
419 #----------------
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
420 my($self) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
421
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
422 my %signals;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
423 my @hitWeight = ();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
424 my @hitsort = ();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
425 my @hitpos = ();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
426 my $maxSite = "";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
427 my $seqPos = "";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
428 my $istart = "";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
429 my $iend = "";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
430 my $icol = "";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
431 my $i = "";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
432 my $weight = "";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
433 my $k = 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
434 my $c = 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
435 my $seqBegin = 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
436 my $pVal = -13;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
437 my $nVal = 2;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
438 my $nHits = 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
439 my $seqEnd = $self->seq->length;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
440 my $pep = $self->seq->seq;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
441 my $minWeight = $self->threshold;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
442 my $matrix = $self->matrix;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
443
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
444 ## The weight table is keyed by UPPERCASE letters so we uppercase
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
445 ## the pep string because we don't want to alter the actual object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
446 ## sequence.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
447
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
448 $pep =~ tr/a-z/A-Z/;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
449
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
450 for ($seqPos = $seqBegin; $seqPos < $seqEnd; $seqPos++) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
451 $istart = (0 > $seqPos + $pVal)? 0 : $seqPos + $pVal;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
452 $iend = ($seqPos + $nVal - 1 < $seqEnd)? $seqPos + $nVal - 1 : $seqEnd;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
453 $icol= $iend - $istart + 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
454 $weight = 0.00;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
455 for ($k=0; $k<$icol; $k++) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
456 $c = substr($pep, $istart + $k, 1);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
457
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
458 ## CD: The if(defined) stuff was put in here because Sigcleave.pm
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
459 ## CD: kept getting warnings about undefined vals during 'make test' ...
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
460 if ($matrix eq 'eucaryotic') {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
461 $weight += $WeightTable_euc{$c}[$k] if defined $WeightTable_euc{$c}[$k];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
462 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
463 $weight += $WeightTable_pro{$c}[$k] if defined $WeightTable_pro{$c}[$k];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
464 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
465 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
466 $signals{$seqPos+1} = sprintf ("%.1f", $weight) if $weight >= $minWeight;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
467 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
468
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
469 $self->{"_signal_scores"} = { %signals };
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
470 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
471
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
472
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
473 =head1 signals
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
474
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
475 Title : signals
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
476 Usage : %sigcleave_results = $sig->signals;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
477 :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
478 Purpose : Accessor method for sigcleave results
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
479 :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
480 Returns : Associative array. The key value represents the amino acid position
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
481 : and the value represents the score. Only scores that
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
482 : are greater than or equal to the THRESHOLD value are reported.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
483 :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
484 Argument : none.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
485 Throws : none.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
486 Comments : none.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
487 See Also : THRESHOLD
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
488
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
489 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
490
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
491 #----------------
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
492 sub signals {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
493 #----------------
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
494 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
495 my %results;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
496 my $position;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
497
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
498 # do the calculations
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
499 $self->_Analyze;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
500
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
501 foreach $position ( sort keys %{ $self->{'_signal_scores'} } ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
502 $results{$position} = $self->{'_signal_scores'}{$position};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
503 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
504 return %results;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
505 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
506
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
507
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
508 =head1 result_count
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
509
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
510 Title : result_count
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
511 Usage : $count = $sig->result_count;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
512 :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
513 Purpose : Accessor method for sigcleave results
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
514 :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
515 Returns : Integer, number of results above the threshold
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
516 :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
517 Argument : none.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
518 Throws : none.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
519 Comments : none.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
520
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
521 See Also : THRESHOLD
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
522
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
523 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
524
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
525 #----------------
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
526 sub result_count {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
527 #----------------
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
528 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
529 $self->_Analyze;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
530 return keys %{ $self->{'_signal_scores'} };
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
531 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
532
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
533
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
534 =head1 pretty_print
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
535
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
536 Title : pretty_print
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
537 Usage : $output = $sig->pretty_print;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
538 : print $sig->pretty_print;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
539 :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
540 Purpose : Emulates the output of the EGCG Sigcleave
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
541 : utility.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
542 :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
543 Returns : A formatted string.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
544 Argument : none.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
545 Throws : none.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
546 Comments : none.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
547 See Also : n/a
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
548
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
549 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
550
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
551 #----------------
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
552 sub pretty_print {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
553 #----------------
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
554 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
555 my $pos;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
556 my $output;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
557 my $cnt = 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
558 my %results = $self->signals;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
559 my @hits = keys %results;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
560 my $hitcount = $#hits; $hitcount++;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
561 my $thresh = $self->threshold;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
562 my $seqlen = $self->seq->length || 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
563 my $name = $self->seq->id || 'NONAME';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
564 my $pep = $self->seq->seq;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
565 $pep =~ tr/a-z/A-Z/;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
566
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
567 $output = "SIGCLEAVE of $name from: 1 to $seqlen\n\n";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
568
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
569 if ($hitcount > 0) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
570 $output .= "Report scores over $thresh\n";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
571 foreach $pos ((sort { $results{$b} cmp $results{$a} } keys %results)) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
572 my $start = $pos - 15;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
573 $start = 1 if $start < 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
574 my $sig = substr($pep,$start -1,$pos-$start );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
575
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
576 $output .= sprintf ("Maximum score %1.1f at residue %3d\n",$results{$pos},$pos);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
577 $output .= "\n";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
578 $output .= " Sequence: ";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
579 $output .= $sig;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
580 $output .= "-" x (15- length($sig));
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
581 $output .= "-";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
582 $output .= substr($pep,$pos-1,50);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
583 $output .= "\n";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
584 $output .= " " x 12;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
585 $output .= "| \(signal\) | \(mature peptide\)\n";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
586 $output .= sprintf(" %3d %3d\n\n",$start,$pos);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
587
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
588 if (($hitcount > 1) && ($cnt == 1)) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
589 $output .= " Other entries above $thresh\n\n";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
590 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
591 $cnt++;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
592 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
593 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
594 $output;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
595 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
596
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
597
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
598 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
599 __END__
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
600
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
601
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
602 #########################################################################
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
603 # End of class
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
604 #########################################################################