Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Tools/CodonTable.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $Id: CodonTable.pm,v 1.23 2002/10/22 07:38:45 lapp Exp $ | |
2 # | |
3 # bioperl module for Bio::Tools::CodonTable | |
4 # | |
5 # Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk> | |
6 # | |
7 # Copyright Heikki Lehvaslaiho | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 | |
11 # POD documentation - main docs before the code | |
12 | |
13 =head1 NAME | |
14 | |
15 Bio::Tools::CodonTable - Bioperl codon table object | |
16 | |
17 =head1 SYNOPSIS | |
18 | |
19 This is a read-only class for all known codon tables. The IDs are | |
20 the ones used by nucleotide sequence databases. All common IUPAC | |
21 ambiguity codes for DNA, RNA and animo acids are recognized. | |
22 | |
23 # to use | |
24 use Bio::Tools::CodonTable; | |
25 | |
26 # defaults to ID 1 "Standard" | |
27 $myCodonTable = Bio::Tools::CodonTable->new(); | |
28 $myCodonTable2 = Bio::Tools::CodonTable -> new ( -id => 3 ); | |
29 | |
30 # change codon table | |
31 $myCodonTable->id(5); | |
32 | |
33 # examine codon table | |
34 print join (' ', "The name of the codon table no.", $myCodonTable->id(4), | |
35 "is:", $myCodonTable->name(), "\n"); | |
36 | |
37 # translate a codon | |
38 $aa = $myCodonTable->translate('ACU'); | |
39 $aa = $myCodonTable->translate('act'); | |
40 $aa = $myCodonTable->translate('ytr'); | |
41 | |
42 # reverse translate an amino acid | |
43 @codons = $myCodonTable->revtranslate('A'); | |
44 @codons = $myCodonTable->revtranslate('Ser'); | |
45 @codons = $myCodonTable->revtranslate('Glx'); | |
46 @codons = $myCodonTable->revtranslate('cYS', 'rna'); | |
47 | |
48 #boolean tests | |
49 print "Is a start\n" if $myCodonTable->is_start_codon('ATG'); | |
50 print "Is a termianator\n" if $myCodonTable->is_ter_codon('tar'); | |
51 print "Is a unknown\n" if $myCodonTable->is_unknown_codon('JTG'); | |
52 | |
53 =head1 DESCRIPTION | |
54 | |
55 Codon tables are also called translation tables or genetics codes | |
56 since that is what they try to represent. A bit more complete picture | |
57 of the full complexity of codon usage in various taxonomic groups | |
58 presented at the NCBI Genetic Codes Home page. | |
59 | |
60 CodonTable is a BioPerl class that knows all current translation | |
61 tables that are used by primary nucleotide sequence databases | |
62 (GenBank, EMBL and DDBJ). It provides methods to output information | |
63 about tables and relationships between codons and amino acids. | |
64 | |
65 This class and its methods recognized all common IUPAC ambiguity codes | |
66 for DNA, RNA and animo acids. The translation method follows the | |
67 conventions in EMBL and TREMBL databases. | |
68 | |
69 It is a nuisance to separate RNA and cDNA representations of nucleic | |
70 acid transcripts. The CodonTable object accepts codons of both type as | |
71 input and allows the user to set the mode for output when reverse | |
72 translating. Its default for output is DNA. | |
73 | |
74 Note: This class deals primarily with individual codons and amino | |
75 acids. However in the interest of speed you can L<translate> | |
76 longer sequence, too. The full complexity of protein translation | |
77 is tackled by L<Bio::PrimarySeqI::translate>. | |
78 | |
79 | |
80 The amino acid codes are IUPAC recommendations for common amino acids: | |
81 | |
82 A Ala Alanine | |
83 R Arg Arginine | |
84 N Asn Asparagine | |
85 D Asp Aspartic acid | |
86 C Cys Cysteine | |
87 Q Gln Glutamine | |
88 E Glu Glutamic acid | |
89 G Gly Glycine | |
90 H His Histidine | |
91 I Ile Isoleucine | |
92 L Leu Leucine | |
93 K Lys Lysine | |
94 M Met Methionine | |
95 F Phe Phenylalanine | |
96 P Pro Proline | |
97 S Ser Serine | |
98 T Thr Threonine | |
99 W Trp Tryptophan | |
100 Y Tyr Tyrosine | |
101 V Val Valine | |
102 B Asx Aspartic acid or Asparagine | |
103 Z Glx Glutamine or Glutamic acid | |
104 X Xaa Any or unknown amino acid | |
105 | |
106 | |
107 It is worth noting that, "Bacterial" codon table no. 11 produces an | |
108 polypeptide that is, confusingly, identical to the standard one. The | |
109 only differences are in available initiator codons. | |
110 | |
111 | |
112 NCBI Genetic Codes home page: | |
113 http://www.ncbi.nlm.nih.gov/htbin-post/Taxonomy/wprintgc?mode=c | |
114 | |
115 EBI Translation Table Viewer: | |
116 http://www.ebi.ac.uk/cgi-bin/mutations/trtables.cgi | |
117 | |
118 Amended ASN.1 version with ids 16 and 21 is at: | |
119 ftp://ftp.ebi.ac.uk/pub/databases/geneticcode/ | |
120 | |
121 Thank your for Matteo diTomasso for the original Perl implementation | |
122 of these tables. | |
123 | |
124 =head1 FEEDBACK | |
125 | |
126 =head2 Mailing Lists | |
127 | |
128 User feedback is an integral part of the evolution of this and other | |
129 Bioperl modules. Send your comments and suggestions preferably to the | |
130 Bioperl mailing lists Your participation is much appreciated. | |
131 | |
132 bioperl-l@bioperl.org - General discussion | |
133 http://bio.perl.org/MailList.html - About the mailing lists | |
134 | |
135 =head2 Reporting Bugs | |
136 | |
137 report bugs to the Bioperl bug tracking system to help us keep track | |
138 the bugs and their resolution. Bug reports can be submitted via | |
139 email or the web: | |
140 | |
141 bioperl-bugs@bio.perl.org | |
142 http://bugzilla.bioperl.org/ | |
143 | |
144 =head1 AUTHOR - Heikki Lehvaslaiho | |
145 | |
146 Email: heikki@ebi.ac.uk | |
147 Address: | |
148 | |
149 EMBL Outstation, European Bioinformatics Institute | |
150 Wellcome Trust Genome Campus, Hinxton | |
151 Cambs. CB10 1SD, United Kingdom | |
152 | |
153 =head1 APPENDIX | |
154 | |
155 The rest of the documentation details each of the object | |
156 methods. Internal methods are usually preceded with a _ | |
157 | |
158 =cut | |
159 | |
160 | |
161 # Let the code begin... | |
162 | |
163 package Bio::Tools::CodonTable; | |
164 use vars qw(@ISA @NAMES @TABLES @STARTS $TRCOL $CODONS %IUPAC_DNA | |
165 %IUPAC_AA %THREELETTERSYMBOLS $VALID_PROTEIN $TERMINATOR); | |
166 use strict; | |
167 | |
168 # Object preamble - inherits from Bio::Root::Root | |
169 use Bio::Root::Root; | |
170 use Bio::Tools::IUPAC; | |
171 use Bio::SeqUtils; | |
172 | |
173 @ISA = qw(Bio::Root::Root); | |
174 | |
175 # first set internal values for all translation tables | |
176 | |
177 BEGIN { | |
178 @NAMES = #id | |
179 ( | |
180 'Standard', #1 | |
181 'Vertebrate Mitochondrial',#2 | |
182 'Yeast Mitochondrial',# 3 | |
183 'Mold, Protozoan, and CoelenterateMitochondrial and Mycoplasma/Spiroplasma',#4 | |
184 'Invertebrate Mitochondrial',#5 | |
185 'Ciliate, Dasycladacean and Hexamita Nuclear',# 6 | |
186 '', '', | |
187 'Echinoderm Mitochondrial',#9 | |
188 'Euplotid Nuclear',#10 | |
189 '"Bacterial"',# 11 | |
190 'Alternative Yeast Nuclear',# 12 | |
191 'Ascidian Mitochondrial',# 13 | |
192 'Flatworm Mitochondrial',# 14 | |
193 'Blepharisma Nuclear',# 15 | |
194 'Chlorophycean Mitochondrial',# 16 | |
195 '', '', '', '', | |
196 'Trematode Mitochondrial',# 21 | |
197 'Scenedesmus obliquus Mitochondrial', #22 | |
198 'Thraustochytrium Mitochondrial' #23 | |
199 ); | |
200 | |
201 @TABLES = | |
202 qw( | |
203 FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | |
204 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG | |
205 FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG | |
206 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | |
207 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG | |
208 FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | |
209 '' '' | |
210 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG | |
211 FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | |
212 FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | |
213 FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | |
214 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG | |
215 FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG | |
216 FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | |
217 FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | |
218 '' '' '' '' | |
219 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG | |
220 FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | |
221 FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | |
222 ); | |
223 | |
224 | |
225 @STARTS = | |
226 qw( | |
227 ---M---------------M---------------M---------------------------- | |
228 --------------------------------MMMM---------------M------------ | |
229 ----------------------------------MM---------------------------- | |
230 --MM---------------M------------MMMM---------------M------------ | |
231 ---M----------------------------MMMM---------------M------------ | |
232 -----------------------------------M---------------------------- | |
233 '' '' | |
234 -----------------------------------M---------------------------- | |
235 -----------------------------------M---------------------------- | |
236 ---M---------------M------------MMMM---------------M------------ | |
237 -------------------M---------------M---------------------------- | |
238 -----------------------------------M---------------------------- | |
239 -----------------------------------M---------------------------- | |
240 -----------------------------------M---------------------------- | |
241 -----------------------------------M---------------------------- | |
242 '' '' '' '' | |
243 -----------------------------------M---------------M------------ | |
244 -----------------------------------M---------------------------- | |
245 --------------------------------M--M---------------M------------ | |
246 ); | |
247 | |
248 my @nucs = qw(t c a g); | |
249 my $x = 0; | |
250 ($CODONS, $TRCOL) = ({}, {}); | |
251 for my $i (@nucs) { | |
252 for my $j (@nucs) { | |
253 for my $k (@nucs) { | |
254 my $codon = "$i$j$k"; | |
255 $CODONS->{$codon} = $x; | |
256 $TRCOL->{$x} = $codon; | |
257 $x++; | |
258 } | |
259 } | |
260 } | |
261 %IUPAC_DNA = Bio::Tools::IUPAC->iupac_iub(); | |
262 %IUPAC_AA = Bio::Tools::IUPAC->iupac_iup(); | |
263 %THREELETTERSYMBOLS = Bio::SeqUtils->valid_aa(2); | |
264 $VALID_PROTEIN = '['.join('',Bio::SeqUtils->valid_aa(0)).']'; | |
265 $TERMINATOR = '*'; | |
266 } | |
267 | |
268 sub new { | |
269 my($class,@args) = @_; | |
270 my $self = $class->SUPER::new(@args); | |
271 | |
272 my($id) = | |
273 $self->_rearrange([qw(ID | |
274 )], | |
275 @args); | |
276 | |
277 $id = 1 if ( ! $id ); | |
278 $id && $self->id($id); | |
279 return $self; # success - we hope! | |
280 } | |
281 | |
282 =head2 id | |
283 | |
284 Title : id | |
285 Usage : $obj->id(3); $id_integer = $obj->id(); | |
286 Function: | |
287 | |
288 Sets or returns the id of the translation table. IDs are | |
289 integers from 1 to 15, excluding 7 and 8 which have been | |
290 removed as redundant. If an invalid ID is given the method | |
291 returns 0, false. | |
292 | |
293 | |
294 Example : | |
295 Returns : value of id, a scalar, 0 if not a valid | |
296 Args : newvalue (optional) | |
297 | |
298 =cut | |
299 | |
300 sub id{ | |
301 my ($self,$value) = @_; | |
302 if( defined $value) { | |
303 if ( !(defined $TABLES[$value-1]) or $TABLES[$value-1] eq '') { | |
304 $self->warn("Not a valid codon table ID [$value] "); | |
305 $value = 0; | |
306 } | |
307 $self->{'id'} = $value; | |
308 } | |
309 return $self->{'id'}; | |
310 } | |
311 | |
312 =head2 name | |
313 | |
314 Title : name | |
315 Usage : $obj->name() | |
316 Function: returns the descriptive name of the translation table | |
317 Example : | |
318 Returns : A string | |
319 Args : None | |
320 | |
321 | |
322 =cut | |
323 | |
324 sub name{ | |
325 my ($self) = @_; | |
326 | |
327 my ($id) = $self->{'id'}; | |
328 return $NAMES[$id-1]; | |
329 } | |
330 | |
331 =head2 translate | |
332 | |
333 Title : translate | |
334 Usage : $obj->translate('YTR') | |
335 Function: Returns a string of one letter amino acid codes from | |
336 nucleotide sequence input. The imput can be of any length. | |
337 | |
338 Returns 'X' for unknown codons and codons that code for | |
339 more than one amino acid. Returns an empty string if input | |
340 is not three characters long. Exceptions for these are: | |
341 | |
342 - IUPAC amino acid code B for Aspartic Acid and | |
343 Asparagine, is used. | |
344 - IUPAC amino acid code Z for Glutamic Acid, Glutamine is | |
345 used. | |
346 - if the codon is two nucleotides long and if by adding | |
347 an a third character 'N', it codes for a single amino | |
348 acid (with exceptions above), return that, otherwise | |
349 return empty string. | |
350 | |
351 Returns empty string for other input strings that are not | |
352 three characters long. | |
353 | |
354 Example : | |
355 Returns : a string of one letter ambiguous IUPAC amino acid codes | |
356 Args : ambiguous IUPAC nucleotide string | |
357 | |
358 | |
359 =cut | |
360 | |
361 sub translate { | |
362 my ($self, $seq) = @_; | |
363 $self->throw("Calling translate without a seq argument!") unless defined $seq; | |
364 return '' unless $seq; | |
365 | |
366 my $id = $self->id; | |
367 my ($partial) = 0; | |
368 $partial = 2 if length($seq) % 3 == 2; | |
369 | |
370 $seq = lc $seq; | |
371 $seq =~ tr/u/t/; | |
372 my $protein = ""; | |
373 if ($seq =~ /[^actg]/ ) { #ambiguous chars | |
374 for (my $i = 0; $i < (length($seq) - 2 ); $i+=3) { | |
375 my $triplet = substr($seq, $i, 3); | |
376 if (exists $CODONS->{$triplet}) { | |
377 $protein .= substr($TABLES[$id-1], | |
378 $CODONS->{$triplet},1); | |
379 } else { | |
380 $protein .= $self->_translate_ambiguous_codon($triplet); | |
381 } | |
382 } | |
383 } else { # simple, strict translation | |
384 for (my $i = 0; $i < (length($seq) - 2 ); $i+=3) { | |
385 my $triplet = substr($seq, $i, 3); | |
386 if (exists $CODONS->{$triplet}) { | |
387 $protein .= substr($TABLES[$id-1], $CODONS->{$triplet}, 1); | |
388 } else { | |
389 $protein .= 'X'; | |
390 } | |
391 } | |
392 } | |
393 if ($partial == 2) { # 2 overhanging nucleotides | |
394 my $triplet = substr($seq, ($partial -4)). "n"; | |
395 if (exists $CODONS->{$triplet}) { | |
396 my $aa = substr($TABLES[$id-1], $CODONS->{$triplet},1); | |
397 $protein .= $aa; | |
398 } else { | |
399 $protein .= $self->_translate_ambiguous_codon($triplet, $partial); | |
400 } | |
401 } | |
402 return $protein; | |
403 } | |
404 | |
405 sub _translate_ambiguous_codon { | |
406 my ($self, $triplet, $partial) = @_; | |
407 $partial ||= 0; | |
408 my $id = $self->id; | |
409 my $aa; | |
410 my @codons = _unambiquous_codons($triplet); | |
411 my %aas =(); | |
412 foreach my $codon (@codons) { | |
413 $aas{substr($TABLES[$id-1],$CODONS->{$codon},1)} = 1; | |
414 } | |
415 my $count = scalar keys %aas; | |
416 if ( $count == 1 ) { | |
417 $aa = (keys %aas)[0]; | |
418 } | |
419 elsif ( $count == 2 ) { | |
420 if ($aas{'D'} and $aas{'N'}) { | |
421 $aa = 'B'; | |
422 } | |
423 elsif ($aas{'E'} and $aas{'Q'}) { | |
424 $aa = 'Z'; | |
425 } else { | |
426 $partial ? ($aa = '') : ($aa = 'X'); | |
427 } | |
428 } else { | |
429 $partial ? ($aa = '') : ($aa = 'X'); | |
430 } | |
431 return $aa; | |
432 } | |
433 | |
434 =head2 translate_strict | |
435 | |
436 Title : translate_strict | |
437 Usage : $obj->translate_strict('ACT') | |
438 Function: returns one letter amino acid code for a codon input | |
439 | |
440 Fast and simple translation. User is responsible to resolve | |
441 ambiguous nucleotide codes before calling this | |
442 method. Returns 'X' for unknown codons and an empty string | |
443 for input strings that are not three characters long. | |
444 | |
445 It is not recommended to use this method in a production | |
446 environment. Use method translate, instead. | |
447 | |
448 Example : | |
449 Returns : A string | |
450 Args : a codon = a three nucleotide character string | |
451 | |
452 | |
453 =cut | |
454 | |
455 sub translate_strict{ | |
456 my ($self, $value) = @_; | |
457 my ($id) = $self->{'id'}; | |
458 | |
459 $value = lc $value; | |
460 $value =~ tr/u/t/; | |
461 | |
462 if (length $value != 3 ) { | |
463 return ''; | |
464 } | |
465 elsif (!(defined $CODONS->{$value})) { | |
466 return 'X'; | |
467 } | |
468 else { | |
469 return substr($TABLES[$id-1],$CODONS->{$value},1); | |
470 } | |
471 } | |
472 | |
473 =head2 revtranslate | |
474 | |
475 Title : revtranslate | |
476 Usage : $obj->revtranslate('G') | |
477 Function: returns codons for an amino acid | |
478 | |
479 Returns an empty string for unknown amino acid | |
480 codes. Ambiquous IUPAC codes Asx,B, (Asp,D; Asn,N) and | |
481 Glx,Z (Glu,E; Gln,Q) are resolved. Both single and three | |
482 letter amino acid codes are accepted. '*' and 'Ter' are | |
483 used for terminator. | |
484 | |
485 By default, the output codons are shown in DNA. If the | |
486 output is needed in RNA (tr/t/u/), add a second argument | |
487 'RNA'. | |
488 | |
489 Example : $obj->revtranslate('Gly', 'RNA') | |
490 Returns : An array of three lower case letter strings i.e. codons | |
491 Args : amino acid, 'RNA' | |
492 | |
493 =cut | |
494 | |
495 sub revtranslate { | |
496 my ($self, $value, $coding) = @_; | |
497 my ($id) = $self->{'id'}; | |
498 my (@aas, $p); | |
499 my (@codons) = (); | |
500 | |
501 if (length($value) == 3 ) { | |
502 $value = lc $value; | |
503 $value = ucfirst $value; | |
504 $value = $THREELETTERSYMBOLS{$value}; | |
505 } | |
506 if ( defined $value and $value =~ /$VALID_PROTEIN/ | |
507 and length($value) == 1 ) { | |
508 $value = uc $value; | |
509 @aas = @{$IUPAC_AA{$value}}; | |
510 foreach my $aa (@aas) { | |
511 #print $aa, " -2\n"; | |
512 $aa = '\*' if $aa eq '*'; | |
513 while ($TABLES[$id-1] =~ m/$aa/g) { | |
514 $p = pos $TABLES[$id-1]; | |
515 push (@codons, $TRCOL->{--$p}); | |
516 } | |
517 } | |
518 } | |
519 | |
520 if ($coding and uc ($coding) eq 'RNA') { | |
521 for my $i (0..$#codons) { | |
522 $codons[$i] =~ tr/t/u/; | |
523 } | |
524 } | |
525 | |
526 return @codons; | |
527 } | |
528 | |
529 =head2 is_start_codon | |
530 | |
531 Title : is_start_codon | |
532 Usage : $obj->is_start_codon('ATG') | |
533 Function: returns true (1) for all codons that can be used as a | |
534 translation start, false (0) for others. | |
535 Example : $myCodonTable->is_start_codon('ATG') | |
536 Returns : boolean | |
537 Args : codon | |
538 | |
539 | |
540 =cut | |
541 | |
542 sub is_start_codon{ | |
543 my ($self, $value) = @_; | |
544 my ($id) = $self->{'id'}; | |
545 | |
546 $value = lc $value; | |
547 $value =~ tr/u/t/; | |
548 | |
549 if (length $value != 3 ) { | |
550 return 0; | |
551 } | |
552 else { | |
553 my $result = 1; | |
554 my @ms = map { substr($STARTS[$id-1],$CODONS->{$_},1) } _unambiquous_codons($value); | |
555 foreach my $c (@ms) { | |
556 $result = 0 if $c ne 'M'; | |
557 } | |
558 return $result; | |
559 } | |
560 } | |
561 | |
562 | |
563 | |
564 =head2 is_ter_codon | |
565 | |
566 Title : is_ter_codon | |
567 Usage : $obj->is_ter_codon('GAA') | |
568 Function: returns true (1) for all codons that can be used as a | |
569 translation tarminator, false (0) for others. | |
570 Example : $myCodonTable->is_ter_codon('ATG') | |
571 Returns : boolean | |
572 Args : codon | |
573 | |
574 | |
575 =cut | |
576 | |
577 sub is_ter_codon{ | |
578 my ($self, $value) = @_; | |
579 my ($id) = $self->{'id'}; | |
580 | |
581 $value = lc $value; | |
582 $value =~ tr/u/t/; | |
583 | |
584 if (length $value != 3 ) { | |
585 return 0; | |
586 } | |
587 else { | |
588 my $result = 1; | |
589 my @ms = map { substr($TABLES[$id-1],$CODONS->{$_},1) } _unambiquous_codons($value); | |
590 foreach my $c (@ms) { | |
591 $result = 0 if $c ne $TERMINATOR; | |
592 } | |
593 return $result; | |
594 } | |
595 } | |
596 | |
597 =head2 is_unknown_codon | |
598 | |
599 Title : is_unknown_codon | |
600 Usage : $obj->is_unknown_codon('GAJ') | |
601 Function: returns false (0) for all codons that are valid, | |
602 true (1) for others. | |
603 Example : $myCodonTable->is_unknown_codon('NTG') | |
604 Returns : boolean | |
605 Args : codon | |
606 | |
607 | |
608 =cut | |
609 | |
610 sub is_unknown_codon{ | |
611 my ($self, $value) = @_; | |
612 my ($id) = $self->{'id'}; | |
613 | |
614 $value = lc $value; | |
615 $value =~ tr/u/t/; | |
616 | |
617 if (length $value != 3 ) { | |
618 return 1; | |
619 } | |
620 else { | |
621 my $result = 0; | |
622 my @cs = map { substr($TABLES[$id-1],$CODONS->{$_},1) } _unambiquous_codons($value); | |
623 $result = 1 if scalar @cs == 0; | |
624 return $result; | |
625 } | |
626 } | |
627 | |
628 =head2 _unambiquous_codons | |
629 | |
630 Title : _unambiquous_codons | |
631 Usage : @codons = _unambiquous_codons('ACN') | |
632 Function: | |
633 Example : | |
634 Returns : array of strings (one letter unambiguous amino acid codes) | |
635 Args : a codon = a three IUPAC nucleotide character string | |
636 | |
637 =cut | |
638 | |
639 sub _unambiquous_codons{ | |
640 my ($value) = @_; | |
641 my @nts = (); | |
642 my @codons = (); | |
643 my ($i, $j, $k); | |
644 @nts = map { $IUPAC_DNA{uc $_} } split(//, $value); | |
645 for my $i (@{$nts[0]}) { | |
646 for my $j (@{$nts[1]}) { | |
647 for my $k (@{$nts[2]}) { | |
648 push @codons, lc "$i$j$k"; | |
649 } | |
650 } | |
651 } | |
652 return @codons; | |
653 } | |
654 | |
655 1; |