annotate variant_effect_predictor/Bio/Tools/OddCodes.pm @ 1:d6778b5d8382 draft default tip

Deleted selected files
author willmclaren
date Fri, 03 Aug 2012 10:05:43 -0400
parents 21066c0abaf5
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1 #$Id: OddCodes.pm,v 1.10.2.1 2003/04/07 04:27:42 heikki Exp $
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
2 #-----------------------------------------------------------------------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
3 # PACKAGE : OddCodes.pm
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
4 # PURPOSE : To write amino acid sequences in alternative alphabets
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
5 # AUTHOR : Derek Gatherer (D.Gatherer@organon.nhe.akzonobel.nl)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
6 # SOURCE :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
7 # CREATED : 8th July 2000
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
8 # MODIFIED :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
9 # DISCLAIMER : I am employed in the pharmaceutical industry but my
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
10 # : employers do not endorse or sponsor this module
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
11 # : in any way whatsoever. The above email address is
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
12 # : given purely for the purpose of easy communication
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
13 # : with the author, and does not imply any connection
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
14 # : between my employers and anything written below.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
15 # LICENCE : You may distribute this module under the same terms
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
16 # : as the rest of BioPerl.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
17 #----------------------------------------------------------------------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
18
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
19 =head1 NAME
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
20
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
21 Bio::Tools::OddCodes - Object holding alternative alphabet coding for
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
22 one protein sequence
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
23
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
24 =head1 SYNOPSIS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
25
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
26 Take a sequence object from eg, an inputstream, and creates an object
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
27 for the purposes of rewriting that sequence in another alphabet.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
28 These are abbreviated amino acid sequence alphabets, designed to
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
29 simplify the statistical aspects of analysing protein sequences, by
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
30 reducing the combinatorial explosion of the 20-letter alphabet. These
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
31 abbreviated alphabets range in size from 2 to 8.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
32
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
33 Creating the OddCodes object, eg:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
34
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
35 my $inputstream = Bio::SeqIO->new( '-file' => "seqfile",
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
36 '-format' => 'Fasta');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
37 my $seqobj = $inputstream->next_seq();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
38 my $oddcode_obj = Bio::Tools::Oddcodes->new(-seq => $seqobj);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
39
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
40 or:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
41
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
42 my $seqobj = Bio::PrimarySeq->new
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
43 (-seq=>'[cut and paste a sequence here]',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
44 -alphabet => 'protein',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
45 -id => 'test');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
46 my $oddcode_obj = Bio::Tools::OddCodes->new(-seq => $seqobj);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
47
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
48 do the alternative coding, returning the answer as a reference to a string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
49
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
50 my $output = $oddcode_obj->structural();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
51 my $output = $oddcode_obj->chemical();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
52 my $output = $oddcode_obj->functional();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
53 my $output = $oddcode_obj->charge();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
54 my $output = $oddcode_obj->hydrophobic();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
55 my $output = $oddcode_obj->Dayhoff();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
56 my $output = $oddcode_obj->Sneath();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
57 my $output = $oddcode_obj->Stanfel();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
58
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
59
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
60 display sequence in new form, eg:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
61
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
62 my $new_coding = $$output;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
63 print "\n$new_coding";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
64
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
65 =head1 DESCRIPTION
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
66
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
67 Bio::Tools::Oddcodes is a welterweight object for rewriting a protein
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
68 sequence in an alternative alphabet. 8 of these are provided, ranging
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
69 from the the 2-letter hydrophobic alphabet, to the 8-letter chemical
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
70 alphabet. These are useful for the statistical analysis of protein
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
71 sequences since they can partially avoid the combinatorial explosion
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
72 produced by the full 20-letter alphabet (eg. 400 dimers, 8000 trimers
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
73 etc.)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
74
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
75 The objects will print out a warning if the input sequence is not a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
76 protein. If you know what you are doing, you can silence the warning
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
77 by setting verbose() to a negetive value.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
78
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
79 See Synopsis above for object creation code.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
80
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
81 =head1 FEEDBACK
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
82
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
83 =head2 Mailing Lists
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
84
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
85 User feedback is an integral part of the evolution of this
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
86 and other Bioperl modules. Send your comments and suggestions preferably
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
87 to one of the Bioperl mailing lists.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
88 Your participation is much appreciated.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
89
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
90 bioperl-l@bioperl.org - General discussion
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
91 http://www.bioperl.org/MailList.html - About the mailing lists
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
92
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
93 =head2 Reporting Bugs
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
94
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
95 Report bugs to the Bioperl bug tracking system to help us keep track
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
96 the bugs and their resolution. Bug reports can be submitted via email
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
97 or the web:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
98
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
99 bioperl-bugs@bioperl.org
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
100 http://www.bugzilla.bioperl.org/
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
101
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
102 =head1 AUTHOR
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
103
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
104 Derek Gatherer
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
105
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
106 =head1 APPENDIX
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
107
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
108 The rest of the documentation details each of the object methods.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
109 Internal methods are usually preceded with a _
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
110
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
111 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
112
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
113 #'
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
114
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
115 package Bio::Tools::OddCodes;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
116 use vars qw(@ISA);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
117 use strict;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
118
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
119 use Bio::Root::Root;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
120
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
121 @ISA = qw(Bio::Root::Root);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
122
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
123
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
124 sub new
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
125 {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
126 my($class,@args) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
127
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
128 my $self = $class->SUPER::new(@args);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
129
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
130 my ($seqobj) = $self->_rearrange([qw(SEQ)],@args);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
131 if((! defined($seqobj)) && @args && ref($args[0])) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
132 # parameter not passed as named parameter?
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
133 $seqobj = $args[0];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
134 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
135 unless ($seqobj->isa("Bio::PrimarySeqI"))
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
136 {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
137 die("die in _init, OddCodes works only on PrimarySeqI
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
138 objects\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
139 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
140
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
141 $self->{'_seqref'} = $seqobj;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
142
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
143 return $self;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
144 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
145
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
146 =head2 structural
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
147
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
148 Title : structural
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
149 Usage : $output = $oddcode_obj->structural();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
150 Function: turns amino acid sequence into 3-letter structural alphabet
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
151 : A (ambivalent), E (external), I (internal)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
152 Example : a sequence ACDEFGH will become AAEEIAE
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
153 Returns : Reference to the new sequence string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
154 Args : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
155
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
156 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
157
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
158 sub structural()
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
159 {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
160 my $self = $_[0];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
161 my $seqstring = &_pullseq($self); # see _pullseq() below
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
162
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
163 # now the real business
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
164
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
165 $seqstring =~ tr/[ACGPSTWY]/1/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
166 $seqstring =~ tr/[RNDQEHK]/2/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
167 $seqstring =~ tr/[ILMFV]/3/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
168 $seqstring =~ tr/1/A/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
169 $seqstring =~ tr/2/E/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
170 $seqstring =~ tr/3/I/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
171
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
172 return \$seqstring;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
173
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
174 # and that's that one
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
175 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
176
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
177 =head2 functional
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
178
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
179 Title : functional
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
180 Usage : $output = $oddcode_obj->functional();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
181 Function: turns amino acid sequence into 4-letter functional alphabet
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
182 : A (acidic), C (basic), H (hydrophobic), P (polar)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
183 Example : a sequence ACDEFGH will become HPAAHHC
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
184 Returns : Reference to the new sequence string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
185 Args : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
186
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
187 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
188
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
189 sub functional()
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
190 {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
191 my $self = $_[0];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
192 my $seqstring = &_pullseq($self);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
193
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
194 # now the real business
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
195
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
196 $seqstring =~ tr/[DE]/1/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
197 $seqstring =~ tr/[HKR]/2/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
198 $seqstring =~ tr/[AFILMPVW]/3/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
199 $seqstring =~ tr/[CGNQSTY]/4/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
200 $seqstring =~ tr/1/A/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
201 $seqstring =~ tr/2/C/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
202 $seqstring =~ tr/3/H/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
203 $seqstring =~ tr/4/P/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
204
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
205 return \$seqstring;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
206
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
207 # and that's that one
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
208 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
209
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
210 =head2 hydrophobic
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
211
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
212 Title : hydrophobic
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
213 Usage : $output = $oddcode_obj->hydrophobic();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
214 Function: turns amino acid sequence into 2-letter hydrophobicity alphabet
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
215 : O (hydrophobic), I (hydrophilic)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
216 Example : a sequence ACDEFGH will become OIIIOII
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
217 Returns : Reference to the new sequence string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
218 Args : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
219
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
220 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
221
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
222 sub hydrophobic()
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
223 {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
224 my $self = $_[0];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
225 my $seqstring = &_pullseq($self);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
226
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
227 # now the real business
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
228
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
229 $seqstring =~ tr/[AFILMPVW]/1/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
230 $seqstring =~ tr/[CDEGHKNQRSTY]/2/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
231 $seqstring =~ tr/1/I/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
232 $seqstring =~ tr/2/O/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
233
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
234 return \$seqstring;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
235
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
236 # and that's that one
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
237 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
238
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
239 =head2 Dayhoff
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
240
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
241 Title : Dayhoff
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
242 Usage : $output = $oddcode_obj->Dayhoff();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
243 Function: turns amino acid sequence into 6-letter Dayhoff alphabet
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
244 Example : a sequence ACDEFGH will become CADDGCE
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
245 Returns : Reference to the new sequence string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
246 Args : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
247
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
248 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
249
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
250 sub Dayhoff()
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
251 {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
252 my $self = $_[0];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
253 my $seqstring = &_pullseq($self);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
254
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
255 # now the real business
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
256
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
257 $seqstring =~ tr/[C]/1/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
258 $seqstring =~ tr/[AGPST]/2/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
259 $seqstring =~ tr/[DENQ]/3/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
260 $seqstring =~ tr/[HKR]/4/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
261 $seqstring =~ tr/[ILMV]/5/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
262 $seqstring =~ tr/[FWY]/6/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
263 $seqstring =~ tr/1/A/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
264 $seqstring =~ tr/2/C/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
265 $seqstring =~ tr/3/D/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
266 $seqstring =~ tr/4/E/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
267 $seqstring =~ tr/5/F/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
268 $seqstring =~ tr/6/G/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
269
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
270 return \$seqstring;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
271
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
272 # and that's that one
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
273 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
274
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
275 =head2 Sneath
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
276
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
277 Title : Sneath
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
278 Usage : $output = $oddcode_obj->Sneath();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
279 Function: turns amino acid sequence into 7-letter Sneath alphabet
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
280 Example : a sequence ACDEFGH will become CEFFHCF
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
281 Returns : Reference to the new sequence string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
282 Args : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
283
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
284 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
285
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
286 sub Sneath()
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
287 {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
288 my $self = $_[0];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
289 my $seqstring = &_pullseq($self);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
290
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
291 # now the real business
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
292
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
293 $seqstring =~ tr/[ILV]/1/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
294 $seqstring =~ tr/[AGP]/2/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
295 $seqstring =~ tr/[MNQ]/3/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
296 $seqstring =~ tr/[CST]/4/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
297 $seqstring =~ tr/[DE]/5/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
298 $seqstring =~ tr/[KR]/6/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
299 $seqstring =~ tr/[FHWY]/7/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
300 $seqstring =~ tr/1/A/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
301 $seqstring =~ tr/2/C/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
302 $seqstring =~ tr/3/D/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
303 $seqstring =~ tr/4/E/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
304 $seqstring =~ tr/5/F/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
305 $seqstring =~ tr/6/G/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
306 $seqstring =~ tr/7/H/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
307
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
308 return \$seqstring;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
309
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
310 # and that's that one
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
311 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
312
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
313 =head2 Stanfel
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
314
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
315 Title : Stanfel
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
316 Usage : $output = $oddcode_obj->Stanfel();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
317 Function: turns amino acid sequence into 4-letter Stanfel alphabet
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
318 Example : a sequence ACDEFGH will become AACCDAE
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
319 Returns : Reference to the new sequence string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
320 Args : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
321
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
322 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
323
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
324 sub Stanfel()
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
325 {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
326 my $self = $_[0];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
327 my $seqstring = &_pullseq($self);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
328
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
329 # now the real business
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
330
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
331 $seqstring =~ tr/[ACGILMPSTV]/1/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
332 $seqstring =~ tr/[DENQ]/2/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
333 $seqstring =~ tr/[FWY]/3/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
334 $seqstring =~ tr/[HKR]/4/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
335 $seqstring =~ tr/1/A/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
336 $seqstring =~ tr/2/C/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
337 $seqstring =~ tr/3/D/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
338 $seqstring =~ tr/4/E/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
339
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
340 return \$seqstring;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
341
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
342 # and that's that one
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
343 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
344
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
345 =head2 chemical()
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
346
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
347 Title : chemical
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
348 Usage : $output = $oddcode_obj->chemical();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
349 Function: turns amino acid sequence into 8-letter chemical alphabet
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
350 : A (acidic), L (aliphatic), M (amide), R (aromatic)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
351 : C (basic), H (hydroxyl), I (imino), S (sulphur)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
352 Example : a sequence ACDEFGH will become LSAARAC
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
353 Returns : Reference to the new sequence string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
354 Args : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
355
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
356 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
357
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
358 sub chemical()
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
359 {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
360 my $self = $_[0];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
361 my $seqstring = &_pullseq($self);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
362
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
363 # now the real business
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
364
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
365 $seqstring =~ tr/[DE]/1/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
366 $seqstring =~ tr/[AGILV]/2/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
367 $seqstring =~ tr/[NQ]/3/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
368 $seqstring =~ tr/[FWY]/4/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
369 $seqstring =~ tr/[RHK]/5/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
370 $seqstring =~ tr/[ST]/6/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
371 $seqstring =~ tr/P/7/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
372 $seqstring =~ tr/[CM]/8/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
373 $seqstring =~ tr/1/A/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
374 $seqstring =~ tr/2/L/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
375 $seqstring =~ tr/3/M/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
376 $seqstring =~ tr/4/R/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
377 $seqstring =~ tr/5/C/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
378 $seqstring =~ tr/6/H/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
379 $seqstring =~ tr/7/I/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
380 $seqstring =~ tr/8/S/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
381
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
382 return \$seqstring;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
383
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
384 # and that's that one
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
385 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
386
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
387 =head2 charge
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
388
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
389 Title : charge
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
390 Usage : $output = $oddcode_obj->charge();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
391 Function: turns amino acid sequence into 3-letter charge alphabet
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
392 Example : a sequence ACDEFGH will become NNAANNC
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
393 Returns : Reference to the new sequence string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
394 Args : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
395
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
396 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
397
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
398 sub charge()
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
399 {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
400 my $self = $_[0];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
401 my $seqstring = &_pullseq($self);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
402
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
403 # now the real business
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
404
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
405 $seqstring =~ tr/[DE]/1/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
406 $seqstring =~ tr/[HKR]/2/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
407 $seqstring =~ tr/[ACFGILMNPQSTVWY]/3/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
408 $seqstring =~ tr/1/A/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
409 $seqstring =~ tr/2/C/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
410 $seqstring =~ tr/3/N/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
411
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
412 return \$seqstring;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
413
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
414 # and that's that one
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
415 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
416
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
417 # _pullseq is called within each of the subroutines
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
418 # it just checks a few things and returns the sequence
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
419
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
420 sub _pullseq
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
421 {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
422 my $self = $_[0];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
423
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
424 my $seqobj = $self->{'_seqref'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
425
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
426 unless ($seqobj->isa("Bio::PrimarySeqI"))
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
427 {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
428 die("die, OddCodes works only on PrimarySeqI objects\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
429 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
430 $self->warn("\tAll OddCode alphabets need a protein sequence,\n".
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
431 "\tbut BioPerl thinks this is not: [". $seqobj->id. "]")
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
432 unless $seqobj->alphabet eq 'protein' or $self->verbose < 0;;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
433
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
434 my $seqstring = uc $seqobj->seq();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
435
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
436 if(length($seqstring)<1)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
437 {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
438 die("$seqstring: die, sequence has zero length\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
439 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
440 return $seqstring;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
441 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
442
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
443 1;