comparison variant_effect_predictor/Bio/Tools/OddCodes.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 #$Id: OddCodes.pm,v 1.10.2.1 2003/04/07 04:27:42 heikki Exp $
2 #-----------------------------------------------------------------------------
3 # PACKAGE : OddCodes.pm
4 # PURPOSE : To write amino acid sequences in alternative alphabets
5 # AUTHOR : Derek Gatherer (D.Gatherer@organon.nhe.akzonobel.nl)
6 # SOURCE :
7 # CREATED : 8th July 2000
8 # MODIFIED :
9 # DISCLAIMER : I am employed in the pharmaceutical industry but my
10 # : employers do not endorse or sponsor this module
11 # : in any way whatsoever. The above email address is
12 # : given purely for the purpose of easy communication
13 # : with the author, and does not imply any connection
14 # : between my employers and anything written below.
15 # LICENCE : You may distribute this module under the same terms
16 # : as the rest of BioPerl.
17 #----------------------------------------------------------------------------
18
19 =head1 NAME
20
21 Bio::Tools::OddCodes - Object holding alternative alphabet coding for
22 one protein sequence
23
24 =head1 SYNOPSIS
25
26 Take a sequence object from eg, an inputstream, and creates an object
27 for the purposes of rewriting that sequence in another alphabet.
28 These are abbreviated amino acid sequence alphabets, designed to
29 simplify the statistical aspects of analysing protein sequences, by
30 reducing the combinatorial explosion of the 20-letter alphabet. These
31 abbreviated alphabets range in size from 2 to 8.
32
33 Creating the OddCodes object, eg:
34
35 my $inputstream = Bio::SeqIO->new( '-file' => "seqfile",
36 '-format' => 'Fasta');
37 my $seqobj = $inputstream->next_seq();
38 my $oddcode_obj = Bio::Tools::Oddcodes->new(-seq => $seqobj);
39
40 or:
41
42 my $seqobj = Bio::PrimarySeq->new
43 (-seq=>'[cut and paste a sequence here]',
44 -alphabet => 'protein',
45 -id => 'test');
46 my $oddcode_obj = Bio::Tools::OddCodes->new(-seq => $seqobj);
47
48 do the alternative coding, returning the answer as a reference to a string
49
50 my $output = $oddcode_obj->structural();
51 my $output = $oddcode_obj->chemical();
52 my $output = $oddcode_obj->functional();
53 my $output = $oddcode_obj->charge();
54 my $output = $oddcode_obj->hydrophobic();
55 my $output = $oddcode_obj->Dayhoff();
56 my $output = $oddcode_obj->Sneath();
57 my $output = $oddcode_obj->Stanfel();
58
59
60 display sequence in new form, eg:
61
62 my $new_coding = $$output;
63 print "\n$new_coding";
64
65 =head1 DESCRIPTION
66
67 Bio::Tools::Oddcodes is a welterweight object for rewriting a protein
68 sequence in an alternative alphabet. 8 of these are provided, ranging
69 from the the 2-letter hydrophobic alphabet, to the 8-letter chemical
70 alphabet. These are useful for the statistical analysis of protein
71 sequences since they can partially avoid the combinatorial explosion
72 produced by the full 20-letter alphabet (eg. 400 dimers, 8000 trimers
73 etc.)
74
75 The objects will print out a warning if the input sequence is not a
76 protein. If you know what you are doing, you can silence the warning
77 by setting verbose() to a negetive value.
78
79 See Synopsis above for object creation code.
80
81 =head1 FEEDBACK
82
83 =head2 Mailing Lists
84
85 User feedback is an integral part of the evolution of this
86 and other Bioperl modules. Send your comments and suggestions preferably
87 to one of the Bioperl mailing lists.
88 Your participation is much appreciated.
89
90 bioperl-l@bioperl.org - General discussion
91 http://www.bioperl.org/MailList.html - About the mailing lists
92
93 =head2 Reporting Bugs
94
95 Report bugs to the Bioperl bug tracking system to help us keep track
96 the bugs and their resolution. Bug reports can be submitted via email
97 or the web:
98
99 bioperl-bugs@bioperl.org
100 http://www.bugzilla.bioperl.org/
101
102 =head1 AUTHOR
103
104 Derek Gatherer
105
106 =head1 APPENDIX
107
108 The rest of the documentation details each of the object methods.
109 Internal methods are usually preceded with a _
110
111 =cut
112
113 #'
114
115 package Bio::Tools::OddCodes;
116 use vars qw(@ISA);
117 use strict;
118
119 use Bio::Root::Root;
120
121 @ISA = qw(Bio::Root::Root);
122
123
124 sub new
125 {
126 my($class,@args) = @_;
127
128 my $self = $class->SUPER::new(@args);
129
130 my ($seqobj) = $self->_rearrange([qw(SEQ)],@args);
131 if((! defined($seqobj)) && @args && ref($args[0])) {
132 # parameter not passed as named parameter?
133 $seqobj = $args[0];
134 }
135 unless ($seqobj->isa("Bio::PrimarySeqI"))
136 {
137 die("die in _init, OddCodes works only on PrimarySeqI
138 objects\n");
139 }
140
141 $self->{'_seqref'} = $seqobj;
142
143 return $self;
144 }
145
146 =head2 structural
147
148 Title : structural
149 Usage : $output = $oddcode_obj->structural();
150 Function: turns amino acid sequence into 3-letter structural alphabet
151 : A (ambivalent), E (external), I (internal)
152 Example : a sequence ACDEFGH will become AAEEIAE
153 Returns : Reference to the new sequence string
154 Args : none
155
156 =cut
157
158 sub structural()
159 {
160 my $self = $_[0];
161 my $seqstring = &_pullseq($self); # see _pullseq() below
162
163 # now the real business
164
165 $seqstring =~ tr/[ACGPSTWY]/1/;
166 $seqstring =~ tr/[RNDQEHK]/2/;
167 $seqstring =~ tr/[ILMFV]/3/;
168 $seqstring =~ tr/1/A/;
169 $seqstring =~ tr/2/E/;
170 $seqstring =~ tr/3/I/;
171
172 return \$seqstring;
173
174 # and that's that one
175 }
176
177 =head2 functional
178
179 Title : functional
180 Usage : $output = $oddcode_obj->functional();
181 Function: turns amino acid sequence into 4-letter functional alphabet
182 : A (acidic), C (basic), H (hydrophobic), P (polar)
183 Example : a sequence ACDEFGH will become HPAAHHC
184 Returns : Reference to the new sequence string
185 Args : none
186
187 =cut
188
189 sub functional()
190 {
191 my $self = $_[0];
192 my $seqstring = &_pullseq($self);
193
194 # now the real business
195
196 $seqstring =~ tr/[DE]/1/;
197 $seqstring =~ tr/[HKR]/2/;
198 $seqstring =~ tr/[AFILMPVW]/3/;
199 $seqstring =~ tr/[CGNQSTY]/4/;
200 $seqstring =~ tr/1/A/;
201 $seqstring =~ tr/2/C/;
202 $seqstring =~ tr/3/H/;
203 $seqstring =~ tr/4/P/;
204
205 return \$seqstring;
206
207 # and that's that one
208 }
209
210 =head2 hydrophobic
211
212 Title : hydrophobic
213 Usage : $output = $oddcode_obj->hydrophobic();
214 Function: turns amino acid sequence into 2-letter hydrophobicity alphabet
215 : O (hydrophobic), I (hydrophilic)
216 Example : a sequence ACDEFGH will become OIIIOII
217 Returns : Reference to the new sequence string
218 Args : none
219
220 =cut
221
222 sub hydrophobic()
223 {
224 my $self = $_[0];
225 my $seqstring = &_pullseq($self);
226
227 # now the real business
228
229 $seqstring =~ tr/[AFILMPVW]/1/;
230 $seqstring =~ tr/[CDEGHKNQRSTY]/2/;
231 $seqstring =~ tr/1/I/;
232 $seqstring =~ tr/2/O/;
233
234 return \$seqstring;
235
236 # and that's that one
237 }
238
239 =head2 Dayhoff
240
241 Title : Dayhoff
242 Usage : $output = $oddcode_obj->Dayhoff();
243 Function: turns amino acid sequence into 6-letter Dayhoff alphabet
244 Example : a sequence ACDEFGH will become CADDGCE
245 Returns : Reference to the new sequence string
246 Args : none
247
248 =cut
249
250 sub Dayhoff()
251 {
252 my $self = $_[0];
253 my $seqstring = &_pullseq($self);
254
255 # now the real business
256
257 $seqstring =~ tr/[C]/1/;
258 $seqstring =~ tr/[AGPST]/2/;
259 $seqstring =~ tr/[DENQ]/3/;
260 $seqstring =~ tr/[HKR]/4/;
261 $seqstring =~ tr/[ILMV]/5/;
262 $seqstring =~ tr/[FWY]/6/;
263 $seqstring =~ tr/1/A/;
264 $seqstring =~ tr/2/C/;
265 $seqstring =~ tr/3/D/;
266 $seqstring =~ tr/4/E/;
267 $seqstring =~ tr/5/F/;
268 $seqstring =~ tr/6/G/;
269
270 return \$seqstring;
271
272 # and that's that one
273 }
274
275 =head2 Sneath
276
277 Title : Sneath
278 Usage : $output = $oddcode_obj->Sneath();
279 Function: turns amino acid sequence into 7-letter Sneath alphabet
280 Example : a sequence ACDEFGH will become CEFFHCF
281 Returns : Reference to the new sequence string
282 Args : none
283
284 =cut
285
286 sub Sneath()
287 {
288 my $self = $_[0];
289 my $seqstring = &_pullseq($self);
290
291 # now the real business
292
293 $seqstring =~ tr/[ILV]/1/;
294 $seqstring =~ tr/[AGP]/2/;
295 $seqstring =~ tr/[MNQ]/3/;
296 $seqstring =~ tr/[CST]/4/;
297 $seqstring =~ tr/[DE]/5/;
298 $seqstring =~ tr/[KR]/6/;
299 $seqstring =~ tr/[FHWY]/7/;
300 $seqstring =~ tr/1/A/;
301 $seqstring =~ tr/2/C/;
302 $seqstring =~ tr/3/D/;
303 $seqstring =~ tr/4/E/;
304 $seqstring =~ tr/5/F/;
305 $seqstring =~ tr/6/G/;
306 $seqstring =~ tr/7/H/;
307
308 return \$seqstring;
309
310 # and that's that one
311 }
312
313 =head2 Stanfel
314
315 Title : Stanfel
316 Usage : $output = $oddcode_obj->Stanfel();
317 Function: turns amino acid sequence into 4-letter Stanfel alphabet
318 Example : a sequence ACDEFGH will become AACCDAE
319 Returns : Reference to the new sequence string
320 Args : none
321
322 =cut
323
324 sub Stanfel()
325 {
326 my $self = $_[0];
327 my $seqstring = &_pullseq($self);
328
329 # now the real business
330
331 $seqstring =~ tr/[ACGILMPSTV]/1/;
332 $seqstring =~ tr/[DENQ]/2/;
333 $seqstring =~ tr/[FWY]/3/;
334 $seqstring =~ tr/[HKR]/4/;
335 $seqstring =~ tr/1/A/;
336 $seqstring =~ tr/2/C/;
337 $seqstring =~ tr/3/D/;
338 $seqstring =~ tr/4/E/;
339
340 return \$seqstring;
341
342 # and that's that one
343 }
344
345 =head2 chemical()
346
347 Title : chemical
348 Usage : $output = $oddcode_obj->chemical();
349 Function: turns amino acid sequence into 8-letter chemical alphabet
350 : A (acidic), L (aliphatic), M (amide), R (aromatic)
351 : C (basic), H (hydroxyl), I (imino), S (sulphur)
352 Example : a sequence ACDEFGH will become LSAARAC
353 Returns : Reference to the new sequence string
354 Args : none
355
356 =cut
357
358 sub chemical()
359 {
360 my $self = $_[0];
361 my $seqstring = &_pullseq($self);
362
363 # now the real business
364
365 $seqstring =~ tr/[DE]/1/;
366 $seqstring =~ tr/[AGILV]/2/;
367 $seqstring =~ tr/[NQ]/3/;
368 $seqstring =~ tr/[FWY]/4/;
369 $seqstring =~ tr/[RHK]/5/;
370 $seqstring =~ tr/[ST]/6/;
371 $seqstring =~ tr/P/7/;
372 $seqstring =~ tr/[CM]/8/;
373 $seqstring =~ tr/1/A/;
374 $seqstring =~ tr/2/L/;
375 $seqstring =~ tr/3/M/;
376 $seqstring =~ tr/4/R/;
377 $seqstring =~ tr/5/C/;
378 $seqstring =~ tr/6/H/;
379 $seqstring =~ tr/7/I/;
380 $seqstring =~ tr/8/S/;
381
382 return \$seqstring;
383
384 # and that's that one
385 }
386
387 =head2 charge
388
389 Title : charge
390 Usage : $output = $oddcode_obj->charge();
391 Function: turns amino acid sequence into 3-letter charge alphabet
392 Example : a sequence ACDEFGH will become NNAANNC
393 Returns : Reference to the new sequence string
394 Args : none
395
396 =cut
397
398 sub charge()
399 {
400 my $self = $_[0];
401 my $seqstring = &_pullseq($self);
402
403 # now the real business
404
405 $seqstring =~ tr/[DE]/1/;
406 $seqstring =~ tr/[HKR]/2/;
407 $seqstring =~ tr/[ACFGILMNPQSTVWY]/3/;
408 $seqstring =~ tr/1/A/;
409 $seqstring =~ tr/2/C/;
410 $seqstring =~ tr/3/N/;
411
412 return \$seqstring;
413
414 # and that's that one
415 }
416
417 # _pullseq is called within each of the subroutines
418 # it just checks a few things and returns the sequence
419
420 sub _pullseq
421 {
422 my $self = $_[0];
423
424 my $seqobj = $self->{'_seqref'};
425
426 unless ($seqobj->isa("Bio::PrimarySeqI"))
427 {
428 die("die, OddCodes works only on PrimarySeqI objects\n");
429 }
430 $self->warn("\tAll OddCode alphabets need a protein sequence,\n".
431 "\tbut BioPerl thinks this is not: [". $seqobj->id. "]")
432 unless $seqobj->alphabet eq 'protein' or $self->verbose < 0;;
433
434 my $seqstring = uc $seqobj->seq();
435
436 if(length($seqstring)<1)
437 {
438 die("$seqstring: die, sequence has zero length\n");
439 }
440 return $seqstring;
441 }
442
443 1;