annotate variant_effect_predictor/Bio/Tools/Sigcleave.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 #-----------------------------------------------------------------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2 # PACKAGE : Bio::Tools::Sigcleave
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 # AUTHOR : Chris Dagdigian, dag@sonsorol.org
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 # CREATED : Jan 28 1999
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5 # REVISION: $Id: Sigcleave.pm,v 1.17 2002/10/22 07:45:22 lapp Exp $
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 # Copyright (c) 1997-9 bioperl, Chris Dagdigian and others. All Rights Reserved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8 # This module is free software; you can redistribute it and/or
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 # modify it under the same terms as Perl itself.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 # _History_
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 # Object framework ripped from Steve Chervits's SeqPattern.pm
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15 # Core EGCG Sigcleave emulation from perl code developed by
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 # Danh Nguyen & Kamalakar Gulukota which itself was based
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 # loosely on Colgrove's signal.c program.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 # The overall idea is to replicate the output of the sigcleave
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20 # program which was distributed with the EGCG extension to the GCG sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 # analysis package. There is also an accessor method for just getting at
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22 # the raw results.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24 #-----------------------------------------------------------------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28 Bio::Tools::Sigcleave - Bioperl object for sigcleave analysis
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30 =head1 SYNOPSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32 =head2 Object Creation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 use Bio::Tools::Sigcleave ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 # to keep the module backwar compatible, you can pass it a sequence string, but
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 # there recommended say is to pass it a Seq object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39 # this works
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40 $seq = "MVLLLILSVLLLKEDVRGSAQSSERRVVAHMPGDIIIGALFSVHHQPTVDKVHERKCGAVREQYGI";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41 $sig = new Bio::Tools::Sigcleave(-seq => $seq,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 -type => 'protein',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43 -threshold=>'3.5',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 # but you do:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 $seqobj = Bio::PrimarySeq->new(-seq => $seq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 $sig = new Bio::Tools::Sigcleave(-seq => $seqobj,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 -threshold=>'3.5',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 # now you can detect procaryotic signal sequences as well as eucaryotic
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54 $sig->matrix('eucaryotic'); # or 'procaryotic'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 =head2 Object Methods & Accessors
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 # you can use this method to fine tune the threshod before printing out the results
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 $sig->result_count:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 %raw_results = $sig->signals;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63 $formatted_output = $sig->pretty_print;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 "Sigcleave" was a program distributed as part of the free EGCG add-on
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 to earlier versions of the GCG Sequence Analysis package. A new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 implementation of the algorithm is now part of EMBOSS package.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 From the EGCG documentation:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 SigCleave uses the von Heijne method to locate signal sequences, and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 to identify the cleavage site. The method is 95% accurate in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 resolving signal sequences from non-signal sequences with a cutoff
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 score of 3.5, and 75-80% accurate in identifying the cleavage
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 site. The program reports all hits above a minimum value.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 The EGCG Sigcleave program was written by Peter Rice (E-mail:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 pmr@sanger.ac.uk Post: Informatics Division, The Sanger Centre,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 Wellcome Trust Genome Campus, Hinxton, Cambs, CB10 1SA, UK).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 Since EGCG is no longer distributed for the latest versions of GCG,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 this code was developed to emulate the output of the original program
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 as much as possible for those who lost access to sigcleave when
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 upgrading to newer versions of GCG.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 There are 2 accessor methods for this object. "signals" will return a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 perl associative array containing the sigcleave scores keyed by amino
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 acid position. "pretty_print" returns a formatted string similar to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 the output of the original sigcleave utility.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 In both cases, the "threshold" setting controls the score reporting
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 level. If no value for threshold is passed in by the user, the code
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 defaults to a reporting value of 3.5.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 In this implemntation the accessor will never return any
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 score/position pair which does not meet the threshold limit. This is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 the slightly different from the behaviour of the 8.1 EGCG sigcleave
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 program which will report the highest of the under-threshold results
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 if nothing else is found.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 Example of pretty_print output:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 SIGCLEAVE of sigtest from: 1 to 146
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 Report scores over 3.5
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 Maximum score 4.9 at residue 131
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 Sequence: FVILAAMSIQGSA-NLQTQWKSTASLALET
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 | (signal) | (mature peptide)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 118 131
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 Other entries above 3.5
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 Maximum score 3.7 at residue 112
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 Sequence: CSRQLFGWLFCKV-HPGAIVFVILAAMSIQGSANLQTQWKSTASLALET
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 | (signal) | (mature peptide)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 99 112
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 =head1 FEEDBACK
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126 When updating and maintaining a module, it helps to know that people
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 are actually using it. Let us know if you find a bug, think this code
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 is useful or have any improvements/features to suggest.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 =head2 Reporting Bugs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 Report bugs to the Bioperl bug tracking system to help us keep track
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133 the bugs and their resolution. Bug reports can be submitted via email
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 or the web:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 bioperl-bugs@bio.perl.org
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 http://bugzilla.bioperl.org/
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139 =head1 AUTHOR
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 Chris Dagdigian, dag@sonsorol.org & others
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 =head1 CONTRIBUTORS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 Heikki Lehvaslaiho, heikki@ebi.ac.uk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 =head1 VERSION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 Bio::Tools::Sigcleave.pm, $Id: Sigcleave.pm,v 1.17 2002/10/22 07:45:22 lapp Exp $
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151 =head1 COPYRIGHT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 Copyright (c) 1999 Chris Dagdigian & others. All Rights Reserved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154 This module is free software; you can redistribute it and/or modify it
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 under the same terms as Perl itself.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 =head1 REFERENCES / SEE ALSO
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 von Heijne G. (1986) "A new method for predicting signal sequences
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 cleavage sites." Nucleic Acids Res. 14, 4683-4690.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162 von Heijne G. (1987) in "Sequence Analysis in Molecular Biology:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163 Treasure Trove or Trivial Pursuit" (Acad. Press, (1987), 113-117).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166 =head1 APPENDIX
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168 The following documentation describes the various functions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 contained in this module. Some functions are for internal
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170 use and are not meant to be called by the user; they are
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 preceded by an underscore ("_").
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177 ##
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 ###
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 #### END of main POD documentation.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 ###
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 ##
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185 package Bio::Tools::Sigcleave;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187 use Bio::Root::Root;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188 use Bio::PrimarySeq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190 @ISA = qw(Bio::Root::Root);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 use vars qw ($ID $VERSION %WeightTable_euc %WeightTable_pro );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193 $ID = 'Bio::Tools::Sigcleave';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194 $VERSION = 0.02;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197 %WeightTable_euc = (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198 #Sample: 161 aligned sequences
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199 # R -13 -12 -11 -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 +1 +2 Expect
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200 'A' => [16, 13, 14, 15, 20, 18, 18, 17, 25, 15, 47, 6, 80, 18, 6, 14.5],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201 'C' => [ 3, 6, 9, 7, 9, 14, 6, 8, 5, 6, 19, 3, 9, 8, 3, 4.5],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202 'D' => [ 0, 0, 0, 0, 0, 0, 0, 0, 5, 3, 0, 5, 0, 10, 11, 8.9],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203 'E' => [ 0, 0, 0, 1, 0, 0, 0, 0, 3, 7, 0, 7, 0, 13, 14, 10.0],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 'F' => [13, 9, 11, 11, 6, 7, 18, 13, 4, 5, 0, 13, 0, 6, 4, 5.6],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205 'G' => [ 4, 4, 3, 6, 3, 13, 3, 2, 19, 34, 5, 7, 39, 10, 7, 12.1],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206 'H' => [ 0, 0, 0, 0, 0, 1, 1, 0, 5, 0, 0, 6, 0, 4, 2, 3.4],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207 'I' => [15, 15, 8, 6, 11, 5, 4, 8, 5, 1, 10, 5, 0, 8, 7, 7.4],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208 'K' => [ 0, 0, 0, 1, 0, 0, 1, 0, 0, 4, 0, 2, 0, 11, 9, 11.3],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209 'L' => [71, 68, 72, 79, 78, 45, 64, 49, 10, 23, 8, 20, 1, 8, 4, 12.1],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210 'M' => [ 0, 3, 7, 4, 1, 6, 2, 2, 0, 0, 0, 1, 0, 1, 2, 2.7],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211 'N' => [ 0, 1, 0, 1, 1, 0, 0, 0, 3, 3, 0, 10, 0, 4, 7, 7.1],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 'P' => [ 2, 0, 2, 0, 0, 4, 1, 8, 20, 14, 0, 1, 3, 0, 22, 7.4],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213 'Q' => [ 0, 0, 0, 1, 0, 6, 1, 0, 10, 8, 0, 18, 3, 19, 10, 6.3],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214 'R' => [ 2, 0, 0, 0, 0, 1, 0, 0, 7, 4, 0, 15, 0, 12, 9, 7.6],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215 'S' => [ 9, 3, 8, 6, 13, 10, 15, 16, 26, 11, 23, 17, 20, 15, 10, 11.4],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216 'T' => [ 2, 10, 5, 4, 5, 13, 7, 7, 12, 6, 17, 8, 6, 3, 10, 9.7],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217 'V' => [20, 25, 15, 18, 13, 15, 11, 27, 0, 12, 32, 3, 0, 8, 17, 11.1],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 'W' => [ 4, 3, 3, 1, 1, 2, 6, 3, 1, 3, 0, 9, 0, 2, 0, 1.8],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219 'Y' => [ 0, 1, 4, 0, 0, 1, 3, 1, 1, 2, 0, 5, 0, 1, 7, 5.6]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222 %WeightTable_pro = (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223 #Sample: 36 aligned sequences
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224 # R -13 -12 -11 -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 +1 +2 Expect
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225 'A' => [0, 8, 8, 9, 6, 7, 5, 6, 7, 7, 24, 2, 31, 18, 4, 3.2],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226 'C' => [1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1.0],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227 'D' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 8, 2.0],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228 'E' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 4, 8, 2.2],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229 'F' => [2, 4, 3, 4, 1, 1, 8, 0, 4, 1, 0, 7, 0, 1, 0, 1.3],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230 'G' => [4, 2, 2, 2, 3, 5, 2, 4, 2, 2, 0, 2, 2, 1, 0, 2.7],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231 'H' => [0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 7, 0, 1, 0, 0.8],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232 'I' => [3, 1, 5, 1, 5, 0, 1, 3, 0, 0, 0, 0, 0, 0, 2, 1.7],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233 'K' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 3, 0, 2.5],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234 'L' => [8, 11, 9, 8, 9, 13, 1, 0, 2, 2, 1, 2, 0, 0, 1, 2.7],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235 'M' => [0, 2, 1, 1, 3, 2, 3, 0, 1, 2, 0, 4, 0, 0, 1, 0.6],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236 'N' => [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 3, 0, 1, 4, 1.6],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237 'P' => [0, 1, 1, 1, 1, 1, 2, 3, 5, 2, 0, 0, 0, 0, 5, 1.7],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238 'Q' => [0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 3, 0, 0, 1, 1.4],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239 'R' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1.7],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240 'S' => [1, 0, 1, 4, 4, 1, 5, 15, 5, 8, 5, 2, 2, 0, 0, 2.6],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241 'T' => [2, 0, 4, 2, 2, 2, 2, 2, 5, 1, 3, 0, 1, 1, 2, 2.2],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242 'V' => [5, 7, 1, 3, 1, 4, 7, 0, 0, 4, 3, 0, 0, 2, 0, 2.5],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243 'W' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0.4],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244 'Y' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 1, 0, 0, 0, 1.3]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248 ##
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249 ## Now we calculate the _real_ values for the weight tables
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250 ##
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251 ##
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252 ## yeah yeah yeah there is lots of math here that gets repeated
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253 ## every single time a sigcleave object gets created. This is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254 ## a quick hack to make sure that we get the scores as accurate as
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255 ## possible. Need all those significant digits....
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256 ##
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257 ## suggestions for speedup aproaches welcome
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258 ##
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261 foreach my $i (keys %WeightTable_euc) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262 my $expected = $WeightTable_euc{$i}[15];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263 if ($expected > 0) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264 for (my $j=0; $j<16; $j++) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265 if ($WeightTable_euc{$i}[$j] == 0) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266 $WeightTable_euc{$i}[$j] = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267 if ($j == 10 || $j == 12) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268 $WeightTable_euc{$i}[$j] = 1.e-10;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271 $WeightTable_euc{$i}[$j] = log($WeightTable_euc{$i}[$j]/$expected);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277 foreach my $i (keys %WeightTable_pro) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278 my $expected = $WeightTable_pro{$i}[15];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279 if ($expected > 0) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280 for (my $j=0; $j<16; $j++) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281 if ($WeightTable_pro{$i}[$j] == 0) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282 $WeightTable_pro{$i}[$j] = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283 if ($j == 10 || $j == 12) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284 $WeightTable_pro{$i}[$j] = 1.e-10;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287 $WeightTable_pro{$i}[$j] = log($WeightTable_pro{$i}[$j]/$expected);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294 #####################################################################################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295 ## CONSTRUCTOR ##
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296 #####################################################################################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299 sub new {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300 my ($class, @args) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302 my $self = $class->SUPER::new(@args);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303 #my $self = Bio::Seq->new(@args);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305 my ($seq, $threshold, $matrix) = $self->_rearrange([qw(SEQ THRESHOLD MATRIX)],@args);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
306
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
307 defined $threshold && $self->threshold($threshold);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
308 $matrix && $self->matrix($matrix);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
309 $seq && $self->seq($seq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
310
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
311 return $self;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
312 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
313
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
314
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
315
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
316 =head1 threshold
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
317
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
318 Title : threshold
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
319 Usage : $value = $self->threshold
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
320 Purpose : Read/write method sigcleave score reporting threshold.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
321 Returns : float.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
322 Argument : new value, float
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
323 Throws : on non-number argument
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
324 Comments : defaults to 3.5
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
325 See Also : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
326
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
327 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
328
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
329 #----------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
330 sub threshold {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
331 #----------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
332 my ($self, $value) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
333 if( defined $value) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
334 $self->throw("I need a number, not [$value]")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
335 if $value !~ /^[+-]?[\d\.]+$/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
336 $self->{'_threshold'} = $value;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
337 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
338 return $self->{'_threshold'} || 3.5 ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
339 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
340
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
341 =head1 matrix
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
342
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
343 Title : matrix
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
344 Usage : $value = $self->matrix('procaryotic')
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
345 Purpose : Read/write method sigcleave matrix.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
346 Returns : float.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
347 Argument : new value: 'eucaryotic' or 'procaryotic'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
348 Throws : on non-number argument
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
349 Comments : defaults to 3.5
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
350 See Also : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
351
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
352 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
353
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
354 #----------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
355 sub matrix {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
356 #----------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
357 my ($self, $value) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
358 if( defined $value) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
359 $self->throw("I need 'eucaryotic' or 'procaryotic', not [$value]")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
360 unless $value eq 'eucaryotic' or $value eq 'procaryotic';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
361 $self->{'_matrix'} = $value;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
362 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
363 return $self->{'_matrix'} || 'eucaryotic' ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
364
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
365 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
366
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
367 =head1 seq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
368
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
369 Title : seq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
370 Usage : $value = $self->seq('procaryotic')
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
371 Purpose : Read/write method sigcleave seq.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
372 Returns : float.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
373 Argument : new value: 'eucaryotic' or 'procaryotic'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
374 Throws : on non-number argument
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
375 Comments : defaults to 3.5
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
376 See Also : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
377
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
378 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
379
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
380 #----------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
381 sub seq {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
382 #----------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
383 my ($self, $value) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
384 if( defined $value) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
385 if ($value->isa('Bio::PrimarySeqI')) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
386 $self->{'_seq'} = $value;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
387 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
388 $self->{'_seq'} = Bio::PrimarySeq->new(-seq=>$value,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
389 -alphabet=>'protein');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
390 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
391 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
392 return $self->{'_seq'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
393 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
394
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
395
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
396
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
397 =head1 _Analyze
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
398
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
399 Title : _Analyze
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
400 Usage : N/A This is an internal method. Not meant to be called from outside
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
401 : the package
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
402 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
403 Purpose : calculates sigcleave score and amino acid position for the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
404 : given protein sequence. The score reporting threshold can
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
405 : be adjusted by passing in the "threshold" parameter during
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
406 : object construction. If no threshold is passed in, the code
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
407 : defaults to reporting any scores equal to or above 3.5
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
408 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
409 Returns : nothing. results are added to the object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
410 Argument : none.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
411 Throws : nothing.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
412 Comments : nothing.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
413 See Also : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
414
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
415 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
416
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
417 #----------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
418 sub _Analyze {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
419 #----------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
420 my($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
421
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
422 my %signals;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
423 my @hitWeight = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
424 my @hitsort = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
425 my @hitpos = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
426 my $maxSite = "";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
427 my $seqPos = "";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
428 my $istart = "";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
429 my $iend = "";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
430 my $icol = "";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
431 my $i = "";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
432 my $weight = "";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
433 my $k = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
434 my $c = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
435 my $seqBegin = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
436 my $pVal = -13;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
437 my $nVal = 2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
438 my $nHits = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
439 my $seqEnd = $self->seq->length;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
440 my $pep = $self->seq->seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
441 my $minWeight = $self->threshold;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
442 my $matrix = $self->matrix;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
443
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
444 ## The weight table is keyed by UPPERCASE letters so we uppercase
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
445 ## the pep string because we don't want to alter the actual object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
446 ## sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
447
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
448 $pep =~ tr/a-z/A-Z/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
449
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
450 for ($seqPos = $seqBegin; $seqPos < $seqEnd; $seqPos++) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
451 $istart = (0 > $seqPos + $pVal)? 0 : $seqPos + $pVal;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
452 $iend = ($seqPos + $nVal - 1 < $seqEnd)? $seqPos + $nVal - 1 : $seqEnd;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
453 $icol= $iend - $istart + 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
454 $weight = 0.00;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
455 for ($k=0; $k<$icol; $k++) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
456 $c = substr($pep, $istart + $k, 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
457
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
458 ## CD: The if(defined) stuff was put in here because Sigcleave.pm
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
459 ## CD: kept getting warnings about undefined vals during 'make test' ...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
460 if ($matrix eq 'eucaryotic') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
461 $weight += $WeightTable_euc{$c}[$k] if defined $WeightTable_euc{$c}[$k];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
462 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
463 $weight += $WeightTable_pro{$c}[$k] if defined $WeightTable_pro{$c}[$k];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
464 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
465 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
466 $signals{$seqPos+1} = sprintf ("%.1f", $weight) if $weight >= $minWeight;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
467 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
468
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
469 $self->{"_signal_scores"} = { %signals };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
470 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
471
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
472
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
473 =head1 signals
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
474
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
475 Title : signals
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
476 Usage : %sigcleave_results = $sig->signals;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
477 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
478 Purpose : Accessor method for sigcleave results
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
479 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
480 Returns : Associative array. The key value represents the amino acid position
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
481 : and the value represents the score. Only scores that
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
482 : are greater than or equal to the THRESHOLD value are reported.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
483 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
484 Argument : none.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
485 Throws : none.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
486 Comments : none.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
487 See Also : THRESHOLD
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
488
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
489 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
490
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
491 #----------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
492 sub signals {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
493 #----------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
494 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
495 my %results;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
496 my $position;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
497
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
498 # do the calculations
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
499 $self->_Analyze;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
500
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
501 foreach $position ( sort keys %{ $self->{'_signal_scores'} } ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
502 $results{$position} = $self->{'_signal_scores'}{$position};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
503 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
504 return %results;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
505 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
506
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
507
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
508 =head1 result_count
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
509
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
510 Title : result_count
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
511 Usage : $count = $sig->result_count;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
512 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
513 Purpose : Accessor method for sigcleave results
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
514 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
515 Returns : Integer, number of results above the threshold
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
516 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
517 Argument : none.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
518 Throws : none.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
519 Comments : none.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
520
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
521 See Also : THRESHOLD
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
522
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
523 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
524
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
525 #----------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
526 sub result_count {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
527 #----------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
528 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
529 $self->_Analyze;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
530 return keys %{ $self->{'_signal_scores'} };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
531 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
532
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
533
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
534 =head1 pretty_print
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
535
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
536 Title : pretty_print
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
537 Usage : $output = $sig->pretty_print;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
538 : print $sig->pretty_print;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
539 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
540 Purpose : Emulates the output of the EGCG Sigcleave
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
541 : utility.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
542 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
543 Returns : A formatted string.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
544 Argument : none.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
545 Throws : none.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
546 Comments : none.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
547 See Also : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
548
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
549 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
550
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
551 #----------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
552 sub pretty_print {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
553 #----------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
554 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
555 my $pos;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
556 my $output;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
557 my $cnt = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
558 my %results = $self->signals;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
559 my @hits = keys %results;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
560 my $hitcount = $#hits; $hitcount++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
561 my $thresh = $self->threshold;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
562 my $seqlen = $self->seq->length || 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
563 my $name = $self->seq->id || 'NONAME';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
564 my $pep = $self->seq->seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
565 $pep =~ tr/a-z/A-Z/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
566
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
567 $output = "SIGCLEAVE of $name from: 1 to $seqlen\n\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
568
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
569 if ($hitcount > 0) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
570 $output .= "Report scores over $thresh\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
571 foreach $pos ((sort { $results{$b} cmp $results{$a} } keys %results)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
572 my $start = $pos - 15;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
573 $start = 1 if $start < 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
574 my $sig = substr($pep,$start -1,$pos-$start );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
575
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
576 $output .= sprintf ("Maximum score %1.1f at residue %3d\n",$results{$pos},$pos);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
577 $output .= "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
578 $output .= " Sequence: ";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
579 $output .= $sig;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
580 $output .= "-" x (15- length($sig));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
581 $output .= "-";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
582 $output .= substr($pep,$pos-1,50);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
583 $output .= "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
584 $output .= " " x 12;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
585 $output .= "| \(signal\) | \(mature peptide\)\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
586 $output .= sprintf(" %3d %3d\n\n",$start,$pos);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
587
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
588 if (($hitcount > 1) && ($cnt == 1)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
589 $output .= " Other entries above $thresh\n\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
590 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
591 $cnt++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
592 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
593 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
594 $output;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
595 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
596
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
597
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
598 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
599 __END__
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
600
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
601
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
602 #########################################################################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
603 # End of class
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
604 #########################################################################