Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Tools/OddCodes.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 #$Id: OddCodes.pm,v 1.10.2.1 2003/04/07 04:27:42 heikki Exp $ | |
2 #----------------------------------------------------------------------------- | |
3 # PACKAGE : OddCodes.pm | |
4 # PURPOSE : To write amino acid sequences in alternative alphabets | |
5 # AUTHOR : Derek Gatherer (D.Gatherer@organon.nhe.akzonobel.nl) | |
6 # SOURCE : | |
7 # CREATED : 8th July 2000 | |
8 # MODIFIED : | |
9 # DISCLAIMER : I am employed in the pharmaceutical industry but my | |
10 # : employers do not endorse or sponsor this module | |
11 # : in any way whatsoever. The above email address is | |
12 # : given purely for the purpose of easy communication | |
13 # : with the author, and does not imply any connection | |
14 # : between my employers and anything written below. | |
15 # LICENCE : You may distribute this module under the same terms | |
16 # : as the rest of BioPerl. | |
17 #---------------------------------------------------------------------------- | |
18 | |
19 =head1 NAME | |
20 | |
21 Bio::Tools::OddCodes - Object holding alternative alphabet coding for | |
22 one protein sequence | |
23 | |
24 =head1 SYNOPSIS | |
25 | |
26 Take a sequence object from eg, an inputstream, and creates an object | |
27 for the purposes of rewriting that sequence in another alphabet. | |
28 These are abbreviated amino acid sequence alphabets, designed to | |
29 simplify the statistical aspects of analysing protein sequences, by | |
30 reducing the combinatorial explosion of the 20-letter alphabet. These | |
31 abbreviated alphabets range in size from 2 to 8. | |
32 | |
33 Creating the OddCodes object, eg: | |
34 | |
35 my $inputstream = Bio::SeqIO->new( '-file' => "seqfile", | |
36 '-format' => 'Fasta'); | |
37 my $seqobj = $inputstream->next_seq(); | |
38 my $oddcode_obj = Bio::Tools::Oddcodes->new(-seq => $seqobj); | |
39 | |
40 or: | |
41 | |
42 my $seqobj = Bio::PrimarySeq->new | |
43 (-seq=>'[cut and paste a sequence here]', | |
44 -alphabet => 'protein', | |
45 -id => 'test'); | |
46 my $oddcode_obj = Bio::Tools::OddCodes->new(-seq => $seqobj); | |
47 | |
48 do the alternative coding, returning the answer as a reference to a string | |
49 | |
50 my $output = $oddcode_obj->structural(); | |
51 my $output = $oddcode_obj->chemical(); | |
52 my $output = $oddcode_obj->functional(); | |
53 my $output = $oddcode_obj->charge(); | |
54 my $output = $oddcode_obj->hydrophobic(); | |
55 my $output = $oddcode_obj->Dayhoff(); | |
56 my $output = $oddcode_obj->Sneath(); | |
57 my $output = $oddcode_obj->Stanfel(); | |
58 | |
59 | |
60 display sequence in new form, eg: | |
61 | |
62 my $new_coding = $$output; | |
63 print "\n$new_coding"; | |
64 | |
65 =head1 DESCRIPTION | |
66 | |
67 Bio::Tools::Oddcodes is a welterweight object for rewriting a protein | |
68 sequence in an alternative alphabet. 8 of these are provided, ranging | |
69 from the the 2-letter hydrophobic alphabet, to the 8-letter chemical | |
70 alphabet. These are useful for the statistical analysis of protein | |
71 sequences since they can partially avoid the combinatorial explosion | |
72 produced by the full 20-letter alphabet (eg. 400 dimers, 8000 trimers | |
73 etc.) | |
74 | |
75 The objects will print out a warning if the input sequence is not a | |
76 protein. If you know what you are doing, you can silence the warning | |
77 by setting verbose() to a negetive value. | |
78 | |
79 See Synopsis above for object creation code. | |
80 | |
81 =head1 FEEDBACK | |
82 | |
83 =head2 Mailing Lists | |
84 | |
85 User feedback is an integral part of the evolution of this | |
86 and other Bioperl modules. Send your comments and suggestions preferably | |
87 to one of the Bioperl mailing lists. | |
88 Your participation is much appreciated. | |
89 | |
90 bioperl-l@bioperl.org - General discussion | |
91 http://www.bioperl.org/MailList.html - About the mailing lists | |
92 | |
93 =head2 Reporting Bugs | |
94 | |
95 Report bugs to the Bioperl bug tracking system to help us keep track | |
96 the bugs and their resolution. Bug reports can be submitted via email | |
97 or the web: | |
98 | |
99 bioperl-bugs@bioperl.org | |
100 http://www.bugzilla.bioperl.org/ | |
101 | |
102 =head1 AUTHOR | |
103 | |
104 Derek Gatherer | |
105 | |
106 =head1 APPENDIX | |
107 | |
108 The rest of the documentation details each of the object methods. | |
109 Internal methods are usually preceded with a _ | |
110 | |
111 =cut | |
112 | |
113 #' | |
114 | |
115 package Bio::Tools::OddCodes; | |
116 use vars qw(@ISA); | |
117 use strict; | |
118 | |
119 use Bio::Root::Root; | |
120 | |
121 @ISA = qw(Bio::Root::Root); | |
122 | |
123 | |
124 sub new | |
125 { | |
126 my($class,@args) = @_; | |
127 | |
128 my $self = $class->SUPER::new(@args); | |
129 | |
130 my ($seqobj) = $self->_rearrange([qw(SEQ)],@args); | |
131 if((! defined($seqobj)) && @args && ref($args[0])) { | |
132 # parameter not passed as named parameter? | |
133 $seqobj = $args[0]; | |
134 } | |
135 unless ($seqobj->isa("Bio::PrimarySeqI")) | |
136 { | |
137 die("die in _init, OddCodes works only on PrimarySeqI | |
138 objects\n"); | |
139 } | |
140 | |
141 $self->{'_seqref'} = $seqobj; | |
142 | |
143 return $self; | |
144 } | |
145 | |
146 =head2 structural | |
147 | |
148 Title : structural | |
149 Usage : $output = $oddcode_obj->structural(); | |
150 Function: turns amino acid sequence into 3-letter structural alphabet | |
151 : A (ambivalent), E (external), I (internal) | |
152 Example : a sequence ACDEFGH will become AAEEIAE | |
153 Returns : Reference to the new sequence string | |
154 Args : none | |
155 | |
156 =cut | |
157 | |
158 sub structural() | |
159 { | |
160 my $self = $_[0]; | |
161 my $seqstring = &_pullseq($self); # see _pullseq() below | |
162 | |
163 # now the real business | |
164 | |
165 $seqstring =~ tr/[ACGPSTWY]/1/; | |
166 $seqstring =~ tr/[RNDQEHK]/2/; | |
167 $seqstring =~ tr/[ILMFV]/3/; | |
168 $seqstring =~ tr/1/A/; | |
169 $seqstring =~ tr/2/E/; | |
170 $seqstring =~ tr/3/I/; | |
171 | |
172 return \$seqstring; | |
173 | |
174 # and that's that one | |
175 } | |
176 | |
177 =head2 functional | |
178 | |
179 Title : functional | |
180 Usage : $output = $oddcode_obj->functional(); | |
181 Function: turns amino acid sequence into 4-letter functional alphabet | |
182 : A (acidic), C (basic), H (hydrophobic), P (polar) | |
183 Example : a sequence ACDEFGH will become HPAAHHC | |
184 Returns : Reference to the new sequence string | |
185 Args : none | |
186 | |
187 =cut | |
188 | |
189 sub functional() | |
190 { | |
191 my $self = $_[0]; | |
192 my $seqstring = &_pullseq($self); | |
193 | |
194 # now the real business | |
195 | |
196 $seqstring =~ tr/[DE]/1/; | |
197 $seqstring =~ tr/[HKR]/2/; | |
198 $seqstring =~ tr/[AFILMPVW]/3/; | |
199 $seqstring =~ tr/[CGNQSTY]/4/; | |
200 $seqstring =~ tr/1/A/; | |
201 $seqstring =~ tr/2/C/; | |
202 $seqstring =~ tr/3/H/; | |
203 $seqstring =~ tr/4/P/; | |
204 | |
205 return \$seqstring; | |
206 | |
207 # and that's that one | |
208 } | |
209 | |
210 =head2 hydrophobic | |
211 | |
212 Title : hydrophobic | |
213 Usage : $output = $oddcode_obj->hydrophobic(); | |
214 Function: turns amino acid sequence into 2-letter hydrophobicity alphabet | |
215 : O (hydrophobic), I (hydrophilic) | |
216 Example : a sequence ACDEFGH will become OIIIOII | |
217 Returns : Reference to the new sequence string | |
218 Args : none | |
219 | |
220 =cut | |
221 | |
222 sub hydrophobic() | |
223 { | |
224 my $self = $_[0]; | |
225 my $seqstring = &_pullseq($self); | |
226 | |
227 # now the real business | |
228 | |
229 $seqstring =~ tr/[AFILMPVW]/1/; | |
230 $seqstring =~ tr/[CDEGHKNQRSTY]/2/; | |
231 $seqstring =~ tr/1/I/; | |
232 $seqstring =~ tr/2/O/; | |
233 | |
234 return \$seqstring; | |
235 | |
236 # and that's that one | |
237 } | |
238 | |
239 =head2 Dayhoff | |
240 | |
241 Title : Dayhoff | |
242 Usage : $output = $oddcode_obj->Dayhoff(); | |
243 Function: turns amino acid sequence into 6-letter Dayhoff alphabet | |
244 Example : a sequence ACDEFGH will become CADDGCE | |
245 Returns : Reference to the new sequence string | |
246 Args : none | |
247 | |
248 =cut | |
249 | |
250 sub Dayhoff() | |
251 { | |
252 my $self = $_[0]; | |
253 my $seqstring = &_pullseq($self); | |
254 | |
255 # now the real business | |
256 | |
257 $seqstring =~ tr/[C]/1/; | |
258 $seqstring =~ tr/[AGPST]/2/; | |
259 $seqstring =~ tr/[DENQ]/3/; | |
260 $seqstring =~ tr/[HKR]/4/; | |
261 $seqstring =~ tr/[ILMV]/5/; | |
262 $seqstring =~ tr/[FWY]/6/; | |
263 $seqstring =~ tr/1/A/; | |
264 $seqstring =~ tr/2/C/; | |
265 $seqstring =~ tr/3/D/; | |
266 $seqstring =~ tr/4/E/; | |
267 $seqstring =~ tr/5/F/; | |
268 $seqstring =~ tr/6/G/; | |
269 | |
270 return \$seqstring; | |
271 | |
272 # and that's that one | |
273 } | |
274 | |
275 =head2 Sneath | |
276 | |
277 Title : Sneath | |
278 Usage : $output = $oddcode_obj->Sneath(); | |
279 Function: turns amino acid sequence into 7-letter Sneath alphabet | |
280 Example : a sequence ACDEFGH will become CEFFHCF | |
281 Returns : Reference to the new sequence string | |
282 Args : none | |
283 | |
284 =cut | |
285 | |
286 sub Sneath() | |
287 { | |
288 my $self = $_[0]; | |
289 my $seqstring = &_pullseq($self); | |
290 | |
291 # now the real business | |
292 | |
293 $seqstring =~ tr/[ILV]/1/; | |
294 $seqstring =~ tr/[AGP]/2/; | |
295 $seqstring =~ tr/[MNQ]/3/; | |
296 $seqstring =~ tr/[CST]/4/; | |
297 $seqstring =~ tr/[DE]/5/; | |
298 $seqstring =~ tr/[KR]/6/; | |
299 $seqstring =~ tr/[FHWY]/7/; | |
300 $seqstring =~ tr/1/A/; | |
301 $seqstring =~ tr/2/C/; | |
302 $seqstring =~ tr/3/D/; | |
303 $seqstring =~ tr/4/E/; | |
304 $seqstring =~ tr/5/F/; | |
305 $seqstring =~ tr/6/G/; | |
306 $seqstring =~ tr/7/H/; | |
307 | |
308 return \$seqstring; | |
309 | |
310 # and that's that one | |
311 } | |
312 | |
313 =head2 Stanfel | |
314 | |
315 Title : Stanfel | |
316 Usage : $output = $oddcode_obj->Stanfel(); | |
317 Function: turns amino acid sequence into 4-letter Stanfel alphabet | |
318 Example : a sequence ACDEFGH will become AACCDAE | |
319 Returns : Reference to the new sequence string | |
320 Args : none | |
321 | |
322 =cut | |
323 | |
324 sub Stanfel() | |
325 { | |
326 my $self = $_[0]; | |
327 my $seqstring = &_pullseq($self); | |
328 | |
329 # now the real business | |
330 | |
331 $seqstring =~ tr/[ACGILMPSTV]/1/; | |
332 $seqstring =~ tr/[DENQ]/2/; | |
333 $seqstring =~ tr/[FWY]/3/; | |
334 $seqstring =~ tr/[HKR]/4/; | |
335 $seqstring =~ tr/1/A/; | |
336 $seqstring =~ tr/2/C/; | |
337 $seqstring =~ tr/3/D/; | |
338 $seqstring =~ tr/4/E/; | |
339 | |
340 return \$seqstring; | |
341 | |
342 # and that's that one | |
343 } | |
344 | |
345 =head2 chemical() | |
346 | |
347 Title : chemical | |
348 Usage : $output = $oddcode_obj->chemical(); | |
349 Function: turns amino acid sequence into 8-letter chemical alphabet | |
350 : A (acidic), L (aliphatic), M (amide), R (aromatic) | |
351 : C (basic), H (hydroxyl), I (imino), S (sulphur) | |
352 Example : a sequence ACDEFGH will become LSAARAC | |
353 Returns : Reference to the new sequence string | |
354 Args : none | |
355 | |
356 =cut | |
357 | |
358 sub chemical() | |
359 { | |
360 my $self = $_[0]; | |
361 my $seqstring = &_pullseq($self); | |
362 | |
363 # now the real business | |
364 | |
365 $seqstring =~ tr/[DE]/1/; | |
366 $seqstring =~ tr/[AGILV]/2/; | |
367 $seqstring =~ tr/[NQ]/3/; | |
368 $seqstring =~ tr/[FWY]/4/; | |
369 $seqstring =~ tr/[RHK]/5/; | |
370 $seqstring =~ tr/[ST]/6/; | |
371 $seqstring =~ tr/P/7/; | |
372 $seqstring =~ tr/[CM]/8/; | |
373 $seqstring =~ tr/1/A/; | |
374 $seqstring =~ tr/2/L/; | |
375 $seqstring =~ tr/3/M/; | |
376 $seqstring =~ tr/4/R/; | |
377 $seqstring =~ tr/5/C/; | |
378 $seqstring =~ tr/6/H/; | |
379 $seqstring =~ tr/7/I/; | |
380 $seqstring =~ tr/8/S/; | |
381 | |
382 return \$seqstring; | |
383 | |
384 # and that's that one | |
385 } | |
386 | |
387 =head2 charge | |
388 | |
389 Title : charge | |
390 Usage : $output = $oddcode_obj->charge(); | |
391 Function: turns amino acid sequence into 3-letter charge alphabet | |
392 Example : a sequence ACDEFGH will become NNAANNC | |
393 Returns : Reference to the new sequence string | |
394 Args : none | |
395 | |
396 =cut | |
397 | |
398 sub charge() | |
399 { | |
400 my $self = $_[0]; | |
401 my $seqstring = &_pullseq($self); | |
402 | |
403 # now the real business | |
404 | |
405 $seqstring =~ tr/[DE]/1/; | |
406 $seqstring =~ tr/[HKR]/2/; | |
407 $seqstring =~ tr/[ACFGILMNPQSTVWY]/3/; | |
408 $seqstring =~ tr/1/A/; | |
409 $seqstring =~ tr/2/C/; | |
410 $seqstring =~ tr/3/N/; | |
411 | |
412 return \$seqstring; | |
413 | |
414 # and that's that one | |
415 } | |
416 | |
417 # _pullseq is called within each of the subroutines | |
418 # it just checks a few things and returns the sequence | |
419 | |
420 sub _pullseq | |
421 { | |
422 my $self = $_[0]; | |
423 | |
424 my $seqobj = $self->{'_seqref'}; | |
425 | |
426 unless ($seqobj->isa("Bio::PrimarySeqI")) | |
427 { | |
428 die("die, OddCodes works only on PrimarySeqI objects\n"); | |
429 } | |
430 $self->warn("\tAll OddCode alphabets need a protein sequence,\n". | |
431 "\tbut BioPerl thinks this is not: [". $seqobj->id. "]") | |
432 unless $seqobj->alphabet eq 'protein' or $self->verbose < 0;; | |
433 | |
434 my $seqstring = uc $seqobj->seq(); | |
435 | |
436 if(length($seqstring)<1) | |
437 { | |
438 die("$seqstring: die, sequence has zero length\n"); | |
439 } | |
440 return $seqstring; | |
441 } | |
442 | |
443 1; |