0
|
1 # $Id: CodonTable.pm,v 1.23 2002/10/22 07:38:45 lapp Exp $
|
|
2 #
|
|
3 # bioperl module for Bio::Tools::CodonTable
|
|
4 #
|
|
5 # Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk>
|
|
6 #
|
|
7 # Copyright Heikki Lehvaslaiho
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10
|
|
11 # POD documentation - main docs before the code
|
|
12
|
|
13 =head1 NAME
|
|
14
|
|
15 Bio::Tools::CodonTable - Bioperl codon table object
|
|
16
|
|
17 =head1 SYNOPSIS
|
|
18
|
|
19 This is a read-only class for all known codon tables. The IDs are
|
|
20 the ones used by nucleotide sequence databases. All common IUPAC
|
|
21 ambiguity codes for DNA, RNA and animo acids are recognized.
|
|
22
|
|
23 # to use
|
|
24 use Bio::Tools::CodonTable;
|
|
25
|
|
26 # defaults to ID 1 "Standard"
|
|
27 $myCodonTable = Bio::Tools::CodonTable->new();
|
|
28 $myCodonTable2 = Bio::Tools::CodonTable -> new ( -id => 3 );
|
|
29
|
|
30 # change codon table
|
|
31 $myCodonTable->id(5);
|
|
32
|
|
33 # examine codon table
|
|
34 print join (' ', "The name of the codon table no.", $myCodonTable->id(4),
|
|
35 "is:", $myCodonTable->name(), "\n");
|
|
36
|
|
37 # translate a codon
|
|
38 $aa = $myCodonTable->translate('ACU');
|
|
39 $aa = $myCodonTable->translate('act');
|
|
40 $aa = $myCodonTable->translate('ytr');
|
|
41
|
|
42 # reverse translate an amino acid
|
|
43 @codons = $myCodonTable->revtranslate('A');
|
|
44 @codons = $myCodonTable->revtranslate('Ser');
|
|
45 @codons = $myCodonTable->revtranslate('Glx');
|
|
46 @codons = $myCodonTable->revtranslate('cYS', 'rna');
|
|
47
|
|
48 #boolean tests
|
|
49 print "Is a start\n" if $myCodonTable->is_start_codon('ATG');
|
|
50 print "Is a termianator\n" if $myCodonTable->is_ter_codon('tar');
|
|
51 print "Is a unknown\n" if $myCodonTable->is_unknown_codon('JTG');
|
|
52
|
|
53 =head1 DESCRIPTION
|
|
54
|
|
55 Codon tables are also called translation tables or genetics codes
|
|
56 since that is what they try to represent. A bit more complete picture
|
|
57 of the full complexity of codon usage in various taxonomic groups
|
|
58 presented at the NCBI Genetic Codes Home page.
|
|
59
|
|
60 CodonTable is a BioPerl class that knows all current translation
|
|
61 tables that are used by primary nucleotide sequence databases
|
|
62 (GenBank, EMBL and DDBJ). It provides methods to output information
|
|
63 about tables and relationships between codons and amino acids.
|
|
64
|
|
65 This class and its methods recognized all common IUPAC ambiguity codes
|
|
66 for DNA, RNA and animo acids. The translation method follows the
|
|
67 conventions in EMBL and TREMBL databases.
|
|
68
|
|
69 It is a nuisance to separate RNA and cDNA representations of nucleic
|
|
70 acid transcripts. The CodonTable object accepts codons of both type as
|
|
71 input and allows the user to set the mode for output when reverse
|
|
72 translating. Its default for output is DNA.
|
|
73
|
|
74 Note: This class deals primarily with individual codons and amino
|
|
75 acids. However in the interest of speed you can L<translate>
|
|
76 longer sequence, too. The full complexity of protein translation
|
|
77 is tackled by L<Bio::PrimarySeqI::translate>.
|
|
78
|
|
79
|
|
80 The amino acid codes are IUPAC recommendations for common amino acids:
|
|
81
|
|
82 A Ala Alanine
|
|
83 R Arg Arginine
|
|
84 N Asn Asparagine
|
|
85 D Asp Aspartic acid
|
|
86 C Cys Cysteine
|
|
87 Q Gln Glutamine
|
|
88 E Glu Glutamic acid
|
|
89 G Gly Glycine
|
|
90 H His Histidine
|
|
91 I Ile Isoleucine
|
|
92 L Leu Leucine
|
|
93 K Lys Lysine
|
|
94 M Met Methionine
|
|
95 F Phe Phenylalanine
|
|
96 P Pro Proline
|
|
97 S Ser Serine
|
|
98 T Thr Threonine
|
|
99 W Trp Tryptophan
|
|
100 Y Tyr Tyrosine
|
|
101 V Val Valine
|
|
102 B Asx Aspartic acid or Asparagine
|
|
103 Z Glx Glutamine or Glutamic acid
|
|
104 X Xaa Any or unknown amino acid
|
|
105
|
|
106
|
|
107 It is worth noting that, "Bacterial" codon table no. 11 produces an
|
|
108 polypeptide that is, confusingly, identical to the standard one. The
|
|
109 only differences are in available initiator codons.
|
|
110
|
|
111
|
|
112 NCBI Genetic Codes home page:
|
|
113 http://www.ncbi.nlm.nih.gov/htbin-post/Taxonomy/wprintgc?mode=c
|
|
114
|
|
115 EBI Translation Table Viewer:
|
|
116 http://www.ebi.ac.uk/cgi-bin/mutations/trtables.cgi
|
|
117
|
|
118 Amended ASN.1 version with ids 16 and 21 is at:
|
|
119 ftp://ftp.ebi.ac.uk/pub/databases/geneticcode/
|
|
120
|
|
121 Thank your for Matteo diTomasso for the original Perl implementation
|
|
122 of these tables.
|
|
123
|
|
124 =head1 FEEDBACK
|
|
125
|
|
126 =head2 Mailing Lists
|
|
127
|
|
128 User feedback is an integral part of the evolution of this and other
|
|
129 Bioperl modules. Send your comments and suggestions preferably to the
|
|
130 Bioperl mailing lists Your participation is much appreciated.
|
|
131
|
|
132 bioperl-l@bioperl.org - General discussion
|
|
133 http://bio.perl.org/MailList.html - About the mailing lists
|
|
134
|
|
135 =head2 Reporting Bugs
|
|
136
|
|
137 report bugs to the Bioperl bug tracking system to help us keep track
|
|
138 the bugs and their resolution. Bug reports can be submitted via
|
|
139 email or the web:
|
|
140
|
|
141 bioperl-bugs@bio.perl.org
|
|
142 http://bugzilla.bioperl.org/
|
|
143
|
|
144 =head1 AUTHOR - Heikki Lehvaslaiho
|
|
145
|
|
146 Email: heikki@ebi.ac.uk
|
|
147 Address:
|
|
148
|
|
149 EMBL Outstation, European Bioinformatics Institute
|
|
150 Wellcome Trust Genome Campus, Hinxton
|
|
151 Cambs. CB10 1SD, United Kingdom
|
|
152
|
|
153 =head1 APPENDIX
|
|
154
|
|
155 The rest of the documentation details each of the object
|
|
156 methods. Internal methods are usually preceded with a _
|
|
157
|
|
158 =cut
|
|
159
|
|
160
|
|
161 # Let the code begin...
|
|
162
|
|
163 package Bio::Tools::CodonTable;
|
|
164 use vars qw(@ISA @NAMES @TABLES @STARTS $TRCOL $CODONS %IUPAC_DNA
|
|
165 %IUPAC_AA %THREELETTERSYMBOLS $VALID_PROTEIN $TERMINATOR);
|
|
166 use strict;
|
|
167
|
|
168 # Object preamble - inherits from Bio::Root::Root
|
|
169 use Bio::Root::Root;
|
|
170 use Bio::Tools::IUPAC;
|
|
171 use Bio::SeqUtils;
|
|
172
|
|
173 @ISA = qw(Bio::Root::Root);
|
|
174
|
|
175 # first set internal values for all translation tables
|
|
176
|
|
177 BEGIN {
|
|
178 @NAMES = #id
|
|
179 (
|
|
180 'Standard', #1
|
|
181 'Vertebrate Mitochondrial',#2
|
|
182 'Yeast Mitochondrial',# 3
|
|
183 'Mold, Protozoan, and CoelenterateMitochondrial and Mycoplasma/Spiroplasma',#4
|
|
184 'Invertebrate Mitochondrial',#5
|
|
185 'Ciliate, Dasycladacean and Hexamita Nuclear',# 6
|
|
186 '', '',
|
|
187 'Echinoderm Mitochondrial',#9
|
|
188 'Euplotid Nuclear',#10
|
|
189 '"Bacterial"',# 11
|
|
190 'Alternative Yeast Nuclear',# 12
|
|
191 'Ascidian Mitochondrial',# 13
|
|
192 'Flatworm Mitochondrial',# 14
|
|
193 'Blepharisma Nuclear',# 15
|
|
194 'Chlorophycean Mitochondrial',# 16
|
|
195 '', '', '', '',
|
|
196 'Trematode Mitochondrial',# 21
|
|
197 'Scenedesmus obliquus Mitochondrial', #22
|
|
198 'Thraustochytrium Mitochondrial' #23
|
|
199 );
|
|
200
|
|
201 @TABLES =
|
|
202 qw(
|
|
203 FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
|
|
204 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG
|
|
205 FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG
|
|
206 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
|
|
207 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG
|
|
208 FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
|
|
209 '' ''
|
|
210 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG
|
|
211 FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
|
|
212 FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
|
|
213 FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
|
|
214 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG
|
|
215 FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG
|
|
216 FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
|
|
217 FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
|
|
218 '' '' '' ''
|
|
219 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG
|
|
220 FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
|
|
221 FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
|
|
222 );
|
|
223
|
|
224
|
|
225 @STARTS =
|
|
226 qw(
|
|
227 ---M---------------M---------------M----------------------------
|
|
228 --------------------------------MMMM---------------M------------
|
|
229 ----------------------------------MM----------------------------
|
|
230 --MM---------------M------------MMMM---------------M------------
|
|
231 ---M----------------------------MMMM---------------M------------
|
|
232 -----------------------------------M----------------------------
|
|
233 '' ''
|
|
234 -----------------------------------M----------------------------
|
|
235 -----------------------------------M----------------------------
|
|
236 ---M---------------M------------MMMM---------------M------------
|
|
237 -------------------M---------------M----------------------------
|
|
238 -----------------------------------M----------------------------
|
|
239 -----------------------------------M----------------------------
|
|
240 -----------------------------------M----------------------------
|
|
241 -----------------------------------M----------------------------
|
|
242 '' '' '' ''
|
|
243 -----------------------------------M---------------M------------
|
|
244 -----------------------------------M----------------------------
|
|
245 --------------------------------M--M---------------M------------
|
|
246 );
|
|
247
|
|
248 my @nucs = qw(t c a g);
|
|
249 my $x = 0;
|
|
250 ($CODONS, $TRCOL) = ({}, {});
|
|
251 for my $i (@nucs) {
|
|
252 for my $j (@nucs) {
|
|
253 for my $k (@nucs) {
|
|
254 my $codon = "$i$j$k";
|
|
255 $CODONS->{$codon} = $x;
|
|
256 $TRCOL->{$x} = $codon;
|
|
257 $x++;
|
|
258 }
|
|
259 }
|
|
260 }
|
|
261 %IUPAC_DNA = Bio::Tools::IUPAC->iupac_iub();
|
|
262 %IUPAC_AA = Bio::Tools::IUPAC->iupac_iup();
|
|
263 %THREELETTERSYMBOLS = Bio::SeqUtils->valid_aa(2);
|
|
264 $VALID_PROTEIN = '['.join('',Bio::SeqUtils->valid_aa(0)).']';
|
|
265 $TERMINATOR = '*';
|
|
266 }
|
|
267
|
|
268 sub new {
|
|
269 my($class,@args) = @_;
|
|
270 my $self = $class->SUPER::new(@args);
|
|
271
|
|
272 my($id) =
|
|
273 $self->_rearrange([qw(ID
|
|
274 )],
|
|
275 @args);
|
|
276
|
|
277 $id = 1 if ( ! $id );
|
|
278 $id && $self->id($id);
|
|
279 return $self; # success - we hope!
|
|
280 }
|
|
281
|
|
282 =head2 id
|
|
283
|
|
284 Title : id
|
|
285 Usage : $obj->id(3); $id_integer = $obj->id();
|
|
286 Function:
|
|
287
|
|
288 Sets or returns the id of the translation table. IDs are
|
|
289 integers from 1 to 15, excluding 7 and 8 which have been
|
|
290 removed as redundant. If an invalid ID is given the method
|
|
291 returns 0, false.
|
|
292
|
|
293
|
|
294 Example :
|
|
295 Returns : value of id, a scalar, 0 if not a valid
|
|
296 Args : newvalue (optional)
|
|
297
|
|
298 =cut
|
|
299
|
|
300 sub id{
|
|
301 my ($self,$value) = @_;
|
|
302 if( defined $value) {
|
|
303 if ( !(defined $TABLES[$value-1]) or $TABLES[$value-1] eq '') {
|
|
304 $self->warn("Not a valid codon table ID [$value] ");
|
|
305 $value = 0;
|
|
306 }
|
|
307 $self->{'id'} = $value;
|
|
308 }
|
|
309 return $self->{'id'};
|
|
310 }
|
|
311
|
|
312 =head2 name
|
|
313
|
|
314 Title : name
|
|
315 Usage : $obj->name()
|
|
316 Function: returns the descriptive name of the translation table
|
|
317 Example :
|
|
318 Returns : A string
|
|
319 Args : None
|
|
320
|
|
321
|
|
322 =cut
|
|
323
|
|
324 sub name{
|
|
325 my ($self) = @_;
|
|
326
|
|
327 my ($id) = $self->{'id'};
|
|
328 return $NAMES[$id-1];
|
|
329 }
|
|
330
|
|
331 =head2 translate
|
|
332
|
|
333 Title : translate
|
|
334 Usage : $obj->translate('YTR')
|
|
335 Function: Returns a string of one letter amino acid codes from
|
|
336 nucleotide sequence input. The imput can be of any length.
|
|
337
|
|
338 Returns 'X' for unknown codons and codons that code for
|
|
339 more than one amino acid. Returns an empty string if input
|
|
340 is not three characters long. Exceptions for these are:
|
|
341
|
|
342 - IUPAC amino acid code B for Aspartic Acid and
|
|
343 Asparagine, is used.
|
|
344 - IUPAC amino acid code Z for Glutamic Acid, Glutamine is
|
|
345 used.
|
|
346 - if the codon is two nucleotides long and if by adding
|
|
347 an a third character 'N', it codes for a single amino
|
|
348 acid (with exceptions above), return that, otherwise
|
|
349 return empty string.
|
|
350
|
|
351 Returns empty string for other input strings that are not
|
|
352 three characters long.
|
|
353
|
|
354 Example :
|
|
355 Returns : a string of one letter ambiguous IUPAC amino acid codes
|
|
356 Args : ambiguous IUPAC nucleotide string
|
|
357
|
|
358
|
|
359 =cut
|
|
360
|
|
361 sub translate {
|
|
362 my ($self, $seq) = @_;
|
|
363 $self->throw("Calling translate without a seq argument!") unless defined $seq;
|
|
364 return '' unless $seq;
|
|
365
|
|
366 my $id = $self->id;
|
|
367 my ($partial) = 0;
|
|
368 $partial = 2 if length($seq) % 3 == 2;
|
|
369
|
|
370 $seq = lc $seq;
|
|
371 $seq =~ tr/u/t/;
|
|
372 my $protein = "";
|
|
373 if ($seq =~ /[^actg]/ ) { #ambiguous chars
|
|
374 for (my $i = 0; $i < (length($seq) - 2 ); $i+=3) {
|
|
375 my $triplet = substr($seq, $i, 3);
|
|
376 if (exists $CODONS->{$triplet}) {
|
|
377 $protein .= substr($TABLES[$id-1],
|
|
378 $CODONS->{$triplet},1);
|
|
379 } else {
|
|
380 $protein .= $self->_translate_ambiguous_codon($triplet);
|
|
381 }
|
|
382 }
|
|
383 } else { # simple, strict translation
|
|
384 for (my $i = 0; $i < (length($seq) - 2 ); $i+=3) {
|
|
385 my $triplet = substr($seq, $i, 3);
|
|
386 if (exists $CODONS->{$triplet}) {
|
|
387 $protein .= substr($TABLES[$id-1], $CODONS->{$triplet}, 1);
|
|
388 } else {
|
|
389 $protein .= 'X';
|
|
390 }
|
|
391 }
|
|
392 }
|
|
393 if ($partial == 2) { # 2 overhanging nucleotides
|
|
394 my $triplet = substr($seq, ($partial -4)). "n";
|
|
395 if (exists $CODONS->{$triplet}) {
|
|
396 my $aa = substr($TABLES[$id-1], $CODONS->{$triplet},1);
|
|
397 $protein .= $aa;
|
|
398 } else {
|
|
399 $protein .= $self->_translate_ambiguous_codon($triplet, $partial);
|
|
400 }
|
|
401 }
|
|
402 return $protein;
|
|
403 }
|
|
404
|
|
405 sub _translate_ambiguous_codon {
|
|
406 my ($self, $triplet, $partial) = @_;
|
|
407 $partial ||= 0;
|
|
408 my $id = $self->id;
|
|
409 my $aa;
|
|
410 my @codons = _unambiquous_codons($triplet);
|
|
411 my %aas =();
|
|
412 foreach my $codon (@codons) {
|
|
413 $aas{substr($TABLES[$id-1],$CODONS->{$codon},1)} = 1;
|
|
414 }
|
|
415 my $count = scalar keys %aas;
|
|
416 if ( $count == 1 ) {
|
|
417 $aa = (keys %aas)[0];
|
|
418 }
|
|
419 elsif ( $count == 2 ) {
|
|
420 if ($aas{'D'} and $aas{'N'}) {
|
|
421 $aa = 'B';
|
|
422 }
|
|
423 elsif ($aas{'E'} and $aas{'Q'}) {
|
|
424 $aa = 'Z';
|
|
425 } else {
|
|
426 $partial ? ($aa = '') : ($aa = 'X');
|
|
427 }
|
|
428 } else {
|
|
429 $partial ? ($aa = '') : ($aa = 'X');
|
|
430 }
|
|
431 return $aa;
|
|
432 }
|
|
433
|
|
434 =head2 translate_strict
|
|
435
|
|
436 Title : translate_strict
|
|
437 Usage : $obj->translate_strict('ACT')
|
|
438 Function: returns one letter amino acid code for a codon input
|
|
439
|
|
440 Fast and simple translation. User is responsible to resolve
|
|
441 ambiguous nucleotide codes before calling this
|
|
442 method. Returns 'X' for unknown codons and an empty string
|
|
443 for input strings that are not three characters long.
|
|
444
|
|
445 It is not recommended to use this method in a production
|
|
446 environment. Use method translate, instead.
|
|
447
|
|
448 Example :
|
|
449 Returns : A string
|
|
450 Args : a codon = a three nucleotide character string
|
|
451
|
|
452
|
|
453 =cut
|
|
454
|
|
455 sub translate_strict{
|
|
456 my ($self, $value) = @_;
|
|
457 my ($id) = $self->{'id'};
|
|
458
|
|
459 $value = lc $value;
|
|
460 $value =~ tr/u/t/;
|
|
461
|
|
462 if (length $value != 3 ) {
|
|
463 return '';
|
|
464 }
|
|
465 elsif (!(defined $CODONS->{$value})) {
|
|
466 return 'X';
|
|
467 }
|
|
468 else {
|
|
469 return substr($TABLES[$id-1],$CODONS->{$value},1);
|
|
470 }
|
|
471 }
|
|
472
|
|
473 =head2 revtranslate
|
|
474
|
|
475 Title : revtranslate
|
|
476 Usage : $obj->revtranslate('G')
|
|
477 Function: returns codons for an amino acid
|
|
478
|
|
479 Returns an empty string for unknown amino acid
|
|
480 codes. Ambiquous IUPAC codes Asx,B, (Asp,D; Asn,N) and
|
|
481 Glx,Z (Glu,E; Gln,Q) are resolved. Both single and three
|
|
482 letter amino acid codes are accepted. '*' and 'Ter' are
|
|
483 used for terminator.
|
|
484
|
|
485 By default, the output codons are shown in DNA. If the
|
|
486 output is needed in RNA (tr/t/u/), add a second argument
|
|
487 'RNA'.
|
|
488
|
|
489 Example : $obj->revtranslate('Gly', 'RNA')
|
|
490 Returns : An array of three lower case letter strings i.e. codons
|
|
491 Args : amino acid, 'RNA'
|
|
492
|
|
493 =cut
|
|
494
|
|
495 sub revtranslate {
|
|
496 my ($self, $value, $coding) = @_;
|
|
497 my ($id) = $self->{'id'};
|
|
498 my (@aas, $p);
|
|
499 my (@codons) = ();
|
|
500
|
|
501 if (length($value) == 3 ) {
|
|
502 $value = lc $value;
|
|
503 $value = ucfirst $value;
|
|
504 $value = $THREELETTERSYMBOLS{$value};
|
|
505 }
|
|
506 if ( defined $value and $value =~ /$VALID_PROTEIN/
|
|
507 and length($value) == 1 ) {
|
|
508 $value = uc $value;
|
|
509 @aas = @{$IUPAC_AA{$value}};
|
|
510 foreach my $aa (@aas) {
|
|
511 #print $aa, " -2\n";
|
|
512 $aa = '\*' if $aa eq '*';
|
|
513 while ($TABLES[$id-1] =~ m/$aa/g) {
|
|
514 $p = pos $TABLES[$id-1];
|
|
515 push (@codons, $TRCOL->{--$p});
|
|
516 }
|
|
517 }
|
|
518 }
|
|
519
|
|
520 if ($coding and uc ($coding) eq 'RNA') {
|
|
521 for my $i (0..$#codons) {
|
|
522 $codons[$i] =~ tr/t/u/;
|
|
523 }
|
|
524 }
|
|
525
|
|
526 return @codons;
|
|
527 }
|
|
528
|
|
529 =head2 is_start_codon
|
|
530
|
|
531 Title : is_start_codon
|
|
532 Usage : $obj->is_start_codon('ATG')
|
|
533 Function: returns true (1) for all codons that can be used as a
|
|
534 translation start, false (0) for others.
|
|
535 Example : $myCodonTable->is_start_codon('ATG')
|
|
536 Returns : boolean
|
|
537 Args : codon
|
|
538
|
|
539
|
|
540 =cut
|
|
541
|
|
542 sub is_start_codon{
|
|
543 my ($self, $value) = @_;
|
|
544 my ($id) = $self->{'id'};
|
|
545
|
|
546 $value = lc $value;
|
|
547 $value =~ tr/u/t/;
|
|
548
|
|
549 if (length $value != 3 ) {
|
|
550 return 0;
|
|
551 }
|
|
552 else {
|
|
553 my $result = 1;
|
|
554 my @ms = map { substr($STARTS[$id-1],$CODONS->{$_},1) } _unambiquous_codons($value);
|
|
555 foreach my $c (@ms) {
|
|
556 $result = 0 if $c ne 'M';
|
|
557 }
|
|
558 return $result;
|
|
559 }
|
|
560 }
|
|
561
|
|
562
|
|
563
|
|
564 =head2 is_ter_codon
|
|
565
|
|
566 Title : is_ter_codon
|
|
567 Usage : $obj->is_ter_codon('GAA')
|
|
568 Function: returns true (1) for all codons that can be used as a
|
|
569 translation tarminator, false (0) for others.
|
|
570 Example : $myCodonTable->is_ter_codon('ATG')
|
|
571 Returns : boolean
|
|
572 Args : codon
|
|
573
|
|
574
|
|
575 =cut
|
|
576
|
|
577 sub is_ter_codon{
|
|
578 my ($self, $value) = @_;
|
|
579 my ($id) = $self->{'id'};
|
|
580
|
|
581 $value = lc $value;
|
|
582 $value =~ tr/u/t/;
|
|
583
|
|
584 if (length $value != 3 ) {
|
|
585 return 0;
|
|
586 }
|
|
587 else {
|
|
588 my $result = 1;
|
|
589 my @ms = map { substr($TABLES[$id-1],$CODONS->{$_},1) } _unambiquous_codons($value);
|
|
590 foreach my $c (@ms) {
|
|
591 $result = 0 if $c ne $TERMINATOR;
|
|
592 }
|
|
593 return $result;
|
|
594 }
|
|
595 }
|
|
596
|
|
597 =head2 is_unknown_codon
|
|
598
|
|
599 Title : is_unknown_codon
|
|
600 Usage : $obj->is_unknown_codon('GAJ')
|
|
601 Function: returns false (0) for all codons that are valid,
|
|
602 true (1) for others.
|
|
603 Example : $myCodonTable->is_unknown_codon('NTG')
|
|
604 Returns : boolean
|
|
605 Args : codon
|
|
606
|
|
607
|
|
608 =cut
|
|
609
|
|
610 sub is_unknown_codon{
|
|
611 my ($self, $value) = @_;
|
|
612 my ($id) = $self->{'id'};
|
|
613
|
|
614 $value = lc $value;
|
|
615 $value =~ tr/u/t/;
|
|
616
|
|
617 if (length $value != 3 ) {
|
|
618 return 1;
|
|
619 }
|
|
620 else {
|
|
621 my $result = 0;
|
|
622 my @cs = map { substr($TABLES[$id-1],$CODONS->{$_},1) } _unambiquous_codons($value);
|
|
623 $result = 1 if scalar @cs == 0;
|
|
624 return $result;
|
|
625 }
|
|
626 }
|
|
627
|
|
628 =head2 _unambiquous_codons
|
|
629
|
|
630 Title : _unambiquous_codons
|
|
631 Usage : @codons = _unambiquous_codons('ACN')
|
|
632 Function:
|
|
633 Example :
|
|
634 Returns : array of strings (one letter unambiguous amino acid codes)
|
|
635 Args : a codon = a three IUPAC nucleotide character string
|
|
636
|
|
637 =cut
|
|
638
|
|
639 sub _unambiquous_codons{
|
|
640 my ($value) = @_;
|
|
641 my @nts = ();
|
|
642 my @codons = ();
|
|
643 my ($i, $j, $k);
|
|
644 @nts = map { $IUPAC_DNA{uc $_} } split(//, $value);
|
|
645 for my $i (@{$nts[0]}) {
|
|
646 for my $j (@{$nts[1]}) {
|
|
647 for my $k (@{$nts[2]}) {
|
|
648 push @codons, lc "$i$j$k";
|
|
649 }
|
|
650 }
|
|
651 }
|
|
652 return @codons;
|
|
653 }
|
|
654
|
|
655 1;
|