Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Tools/CodonTable.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 # $Id: CodonTable.pm,v 1.23 2002/10/22 07:38:45 lapp Exp $ | |
| 2 # | |
| 3 # bioperl module for Bio::Tools::CodonTable | |
| 4 # | |
| 5 # Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk> | |
| 6 # | |
| 7 # Copyright Heikki Lehvaslaiho | |
| 8 # | |
| 9 # You may distribute this module under the same terms as perl itself | |
| 10 | |
| 11 # POD documentation - main docs before the code | |
| 12 | |
| 13 =head1 NAME | |
| 14 | |
| 15 Bio::Tools::CodonTable - Bioperl codon table object | |
| 16 | |
| 17 =head1 SYNOPSIS | |
| 18 | |
| 19 This is a read-only class for all known codon tables. The IDs are | |
| 20 the ones used by nucleotide sequence databases. All common IUPAC | |
| 21 ambiguity codes for DNA, RNA and animo acids are recognized. | |
| 22 | |
| 23 # to use | |
| 24 use Bio::Tools::CodonTable; | |
| 25 | |
| 26 # defaults to ID 1 "Standard" | |
| 27 $myCodonTable = Bio::Tools::CodonTable->new(); | |
| 28 $myCodonTable2 = Bio::Tools::CodonTable -> new ( -id => 3 ); | |
| 29 | |
| 30 # change codon table | |
| 31 $myCodonTable->id(5); | |
| 32 | |
| 33 # examine codon table | |
| 34 print join (' ', "The name of the codon table no.", $myCodonTable->id(4), | |
| 35 "is:", $myCodonTable->name(), "\n"); | |
| 36 | |
| 37 # translate a codon | |
| 38 $aa = $myCodonTable->translate('ACU'); | |
| 39 $aa = $myCodonTable->translate('act'); | |
| 40 $aa = $myCodonTable->translate('ytr'); | |
| 41 | |
| 42 # reverse translate an amino acid | |
| 43 @codons = $myCodonTable->revtranslate('A'); | |
| 44 @codons = $myCodonTable->revtranslate('Ser'); | |
| 45 @codons = $myCodonTable->revtranslate('Glx'); | |
| 46 @codons = $myCodonTable->revtranslate('cYS', 'rna'); | |
| 47 | |
| 48 #boolean tests | |
| 49 print "Is a start\n" if $myCodonTable->is_start_codon('ATG'); | |
| 50 print "Is a termianator\n" if $myCodonTable->is_ter_codon('tar'); | |
| 51 print "Is a unknown\n" if $myCodonTable->is_unknown_codon('JTG'); | |
| 52 | |
| 53 =head1 DESCRIPTION | |
| 54 | |
| 55 Codon tables are also called translation tables or genetics codes | |
| 56 since that is what they try to represent. A bit more complete picture | |
| 57 of the full complexity of codon usage in various taxonomic groups | |
| 58 presented at the NCBI Genetic Codes Home page. | |
| 59 | |
| 60 CodonTable is a BioPerl class that knows all current translation | |
| 61 tables that are used by primary nucleotide sequence databases | |
| 62 (GenBank, EMBL and DDBJ). It provides methods to output information | |
| 63 about tables and relationships between codons and amino acids. | |
| 64 | |
| 65 This class and its methods recognized all common IUPAC ambiguity codes | |
| 66 for DNA, RNA and animo acids. The translation method follows the | |
| 67 conventions in EMBL and TREMBL databases. | |
| 68 | |
| 69 It is a nuisance to separate RNA and cDNA representations of nucleic | |
| 70 acid transcripts. The CodonTable object accepts codons of both type as | |
| 71 input and allows the user to set the mode for output when reverse | |
| 72 translating. Its default for output is DNA. | |
| 73 | |
| 74 Note: This class deals primarily with individual codons and amino | |
| 75 acids. However in the interest of speed you can L<translate> | |
| 76 longer sequence, too. The full complexity of protein translation | |
| 77 is tackled by L<Bio::PrimarySeqI::translate>. | |
| 78 | |
| 79 | |
| 80 The amino acid codes are IUPAC recommendations for common amino acids: | |
| 81 | |
| 82 A Ala Alanine | |
| 83 R Arg Arginine | |
| 84 N Asn Asparagine | |
| 85 D Asp Aspartic acid | |
| 86 C Cys Cysteine | |
| 87 Q Gln Glutamine | |
| 88 E Glu Glutamic acid | |
| 89 G Gly Glycine | |
| 90 H His Histidine | |
| 91 I Ile Isoleucine | |
| 92 L Leu Leucine | |
| 93 K Lys Lysine | |
| 94 M Met Methionine | |
| 95 F Phe Phenylalanine | |
| 96 P Pro Proline | |
| 97 S Ser Serine | |
| 98 T Thr Threonine | |
| 99 W Trp Tryptophan | |
| 100 Y Tyr Tyrosine | |
| 101 V Val Valine | |
| 102 B Asx Aspartic acid or Asparagine | |
| 103 Z Glx Glutamine or Glutamic acid | |
| 104 X Xaa Any or unknown amino acid | |
| 105 | |
| 106 | |
| 107 It is worth noting that, "Bacterial" codon table no. 11 produces an | |
| 108 polypeptide that is, confusingly, identical to the standard one. The | |
| 109 only differences are in available initiator codons. | |
| 110 | |
| 111 | |
| 112 NCBI Genetic Codes home page: | |
| 113 http://www.ncbi.nlm.nih.gov/htbin-post/Taxonomy/wprintgc?mode=c | |
| 114 | |
| 115 EBI Translation Table Viewer: | |
| 116 http://www.ebi.ac.uk/cgi-bin/mutations/trtables.cgi | |
| 117 | |
| 118 Amended ASN.1 version with ids 16 and 21 is at: | |
| 119 ftp://ftp.ebi.ac.uk/pub/databases/geneticcode/ | |
| 120 | |
| 121 Thank your for Matteo diTomasso for the original Perl implementation | |
| 122 of these tables. | |
| 123 | |
| 124 =head1 FEEDBACK | |
| 125 | |
| 126 =head2 Mailing Lists | |
| 127 | |
| 128 User feedback is an integral part of the evolution of this and other | |
| 129 Bioperl modules. Send your comments and suggestions preferably to the | |
| 130 Bioperl mailing lists Your participation is much appreciated. | |
| 131 | |
| 132 bioperl-l@bioperl.org - General discussion | |
| 133 http://bio.perl.org/MailList.html - About the mailing lists | |
| 134 | |
| 135 =head2 Reporting Bugs | |
| 136 | |
| 137 report bugs to the Bioperl bug tracking system to help us keep track | |
| 138 the bugs and their resolution. Bug reports can be submitted via | |
| 139 email or the web: | |
| 140 | |
| 141 bioperl-bugs@bio.perl.org | |
| 142 http://bugzilla.bioperl.org/ | |
| 143 | |
| 144 =head1 AUTHOR - Heikki Lehvaslaiho | |
| 145 | |
| 146 Email: heikki@ebi.ac.uk | |
| 147 Address: | |
| 148 | |
| 149 EMBL Outstation, European Bioinformatics Institute | |
| 150 Wellcome Trust Genome Campus, Hinxton | |
| 151 Cambs. CB10 1SD, United Kingdom | |
| 152 | |
| 153 =head1 APPENDIX | |
| 154 | |
| 155 The rest of the documentation details each of the object | |
| 156 methods. Internal methods are usually preceded with a _ | |
| 157 | |
| 158 =cut | |
| 159 | |
| 160 | |
| 161 # Let the code begin... | |
| 162 | |
| 163 package Bio::Tools::CodonTable; | |
| 164 use vars qw(@ISA @NAMES @TABLES @STARTS $TRCOL $CODONS %IUPAC_DNA | |
| 165 %IUPAC_AA %THREELETTERSYMBOLS $VALID_PROTEIN $TERMINATOR); | |
| 166 use strict; | |
| 167 | |
| 168 # Object preamble - inherits from Bio::Root::Root | |
| 169 use Bio::Root::Root; | |
| 170 use Bio::Tools::IUPAC; | |
| 171 use Bio::SeqUtils; | |
| 172 | |
| 173 @ISA = qw(Bio::Root::Root); | |
| 174 | |
| 175 # first set internal values for all translation tables | |
| 176 | |
| 177 BEGIN { | |
| 178 @NAMES = #id | |
| 179 ( | |
| 180 'Standard', #1 | |
| 181 'Vertebrate Mitochondrial',#2 | |
| 182 'Yeast Mitochondrial',# 3 | |
| 183 'Mold, Protozoan, and CoelenterateMitochondrial and Mycoplasma/Spiroplasma',#4 | |
| 184 'Invertebrate Mitochondrial',#5 | |
| 185 'Ciliate, Dasycladacean and Hexamita Nuclear',# 6 | |
| 186 '', '', | |
| 187 'Echinoderm Mitochondrial',#9 | |
| 188 'Euplotid Nuclear',#10 | |
| 189 '"Bacterial"',# 11 | |
| 190 'Alternative Yeast Nuclear',# 12 | |
| 191 'Ascidian Mitochondrial',# 13 | |
| 192 'Flatworm Mitochondrial',# 14 | |
| 193 'Blepharisma Nuclear',# 15 | |
| 194 'Chlorophycean Mitochondrial',# 16 | |
| 195 '', '', '', '', | |
| 196 'Trematode Mitochondrial',# 21 | |
| 197 'Scenedesmus obliquus Mitochondrial', #22 | |
| 198 'Thraustochytrium Mitochondrial' #23 | |
| 199 ); | |
| 200 | |
| 201 @TABLES = | |
| 202 qw( | |
| 203 FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | |
| 204 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG | |
| 205 FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG | |
| 206 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | |
| 207 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG | |
| 208 FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | |
| 209 '' '' | |
| 210 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG | |
| 211 FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | |
| 212 FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | |
| 213 FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | |
| 214 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG | |
| 215 FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG | |
| 216 FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | |
| 217 FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | |
| 218 '' '' '' '' | |
| 219 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG | |
| 220 FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | |
| 221 FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | |
| 222 ); | |
| 223 | |
| 224 | |
| 225 @STARTS = | |
| 226 qw( | |
| 227 ---M---------------M---------------M---------------------------- | |
| 228 --------------------------------MMMM---------------M------------ | |
| 229 ----------------------------------MM---------------------------- | |
| 230 --MM---------------M------------MMMM---------------M------------ | |
| 231 ---M----------------------------MMMM---------------M------------ | |
| 232 -----------------------------------M---------------------------- | |
| 233 '' '' | |
| 234 -----------------------------------M---------------------------- | |
| 235 -----------------------------------M---------------------------- | |
| 236 ---M---------------M------------MMMM---------------M------------ | |
| 237 -------------------M---------------M---------------------------- | |
| 238 -----------------------------------M---------------------------- | |
| 239 -----------------------------------M---------------------------- | |
| 240 -----------------------------------M---------------------------- | |
| 241 -----------------------------------M---------------------------- | |
| 242 '' '' '' '' | |
| 243 -----------------------------------M---------------M------------ | |
| 244 -----------------------------------M---------------------------- | |
| 245 --------------------------------M--M---------------M------------ | |
| 246 ); | |
| 247 | |
| 248 my @nucs = qw(t c a g); | |
| 249 my $x = 0; | |
| 250 ($CODONS, $TRCOL) = ({}, {}); | |
| 251 for my $i (@nucs) { | |
| 252 for my $j (@nucs) { | |
| 253 for my $k (@nucs) { | |
| 254 my $codon = "$i$j$k"; | |
| 255 $CODONS->{$codon} = $x; | |
| 256 $TRCOL->{$x} = $codon; | |
| 257 $x++; | |
| 258 } | |
| 259 } | |
| 260 } | |
| 261 %IUPAC_DNA = Bio::Tools::IUPAC->iupac_iub(); | |
| 262 %IUPAC_AA = Bio::Tools::IUPAC->iupac_iup(); | |
| 263 %THREELETTERSYMBOLS = Bio::SeqUtils->valid_aa(2); | |
| 264 $VALID_PROTEIN = '['.join('',Bio::SeqUtils->valid_aa(0)).']'; | |
| 265 $TERMINATOR = '*'; | |
| 266 } | |
| 267 | |
| 268 sub new { | |
| 269 my($class,@args) = @_; | |
| 270 my $self = $class->SUPER::new(@args); | |
| 271 | |
| 272 my($id) = | |
| 273 $self->_rearrange([qw(ID | |
| 274 )], | |
| 275 @args); | |
| 276 | |
| 277 $id = 1 if ( ! $id ); | |
| 278 $id && $self->id($id); | |
| 279 return $self; # success - we hope! | |
| 280 } | |
| 281 | |
| 282 =head2 id | |
| 283 | |
| 284 Title : id | |
| 285 Usage : $obj->id(3); $id_integer = $obj->id(); | |
| 286 Function: | |
| 287 | |
| 288 Sets or returns the id of the translation table. IDs are | |
| 289 integers from 1 to 15, excluding 7 and 8 which have been | |
| 290 removed as redundant. If an invalid ID is given the method | |
| 291 returns 0, false. | |
| 292 | |
| 293 | |
| 294 Example : | |
| 295 Returns : value of id, a scalar, 0 if not a valid | |
| 296 Args : newvalue (optional) | |
| 297 | |
| 298 =cut | |
| 299 | |
| 300 sub id{ | |
| 301 my ($self,$value) = @_; | |
| 302 if( defined $value) { | |
| 303 if ( !(defined $TABLES[$value-1]) or $TABLES[$value-1] eq '') { | |
| 304 $self->warn("Not a valid codon table ID [$value] "); | |
| 305 $value = 0; | |
| 306 } | |
| 307 $self->{'id'} = $value; | |
| 308 } | |
| 309 return $self->{'id'}; | |
| 310 } | |
| 311 | |
| 312 =head2 name | |
| 313 | |
| 314 Title : name | |
| 315 Usage : $obj->name() | |
| 316 Function: returns the descriptive name of the translation table | |
| 317 Example : | |
| 318 Returns : A string | |
| 319 Args : None | |
| 320 | |
| 321 | |
| 322 =cut | |
| 323 | |
| 324 sub name{ | |
| 325 my ($self) = @_; | |
| 326 | |
| 327 my ($id) = $self->{'id'}; | |
| 328 return $NAMES[$id-1]; | |
| 329 } | |
| 330 | |
| 331 =head2 translate | |
| 332 | |
| 333 Title : translate | |
| 334 Usage : $obj->translate('YTR') | |
| 335 Function: Returns a string of one letter amino acid codes from | |
| 336 nucleotide sequence input. The imput can be of any length. | |
| 337 | |
| 338 Returns 'X' for unknown codons and codons that code for | |
| 339 more than one amino acid. Returns an empty string if input | |
| 340 is not three characters long. Exceptions for these are: | |
| 341 | |
| 342 - IUPAC amino acid code B for Aspartic Acid and | |
| 343 Asparagine, is used. | |
| 344 - IUPAC amino acid code Z for Glutamic Acid, Glutamine is | |
| 345 used. | |
| 346 - if the codon is two nucleotides long and if by adding | |
| 347 an a third character 'N', it codes for a single amino | |
| 348 acid (with exceptions above), return that, otherwise | |
| 349 return empty string. | |
| 350 | |
| 351 Returns empty string for other input strings that are not | |
| 352 three characters long. | |
| 353 | |
| 354 Example : | |
| 355 Returns : a string of one letter ambiguous IUPAC amino acid codes | |
| 356 Args : ambiguous IUPAC nucleotide string | |
| 357 | |
| 358 | |
| 359 =cut | |
| 360 | |
| 361 sub translate { | |
| 362 my ($self, $seq) = @_; | |
| 363 $self->throw("Calling translate without a seq argument!") unless defined $seq; | |
| 364 return '' unless $seq; | |
| 365 | |
| 366 my $id = $self->id; | |
| 367 my ($partial) = 0; | |
| 368 $partial = 2 if length($seq) % 3 == 2; | |
| 369 | |
| 370 $seq = lc $seq; | |
| 371 $seq =~ tr/u/t/; | |
| 372 my $protein = ""; | |
| 373 if ($seq =~ /[^actg]/ ) { #ambiguous chars | |
| 374 for (my $i = 0; $i < (length($seq) - 2 ); $i+=3) { | |
| 375 my $triplet = substr($seq, $i, 3); | |
| 376 if (exists $CODONS->{$triplet}) { | |
| 377 $protein .= substr($TABLES[$id-1], | |
| 378 $CODONS->{$triplet},1); | |
| 379 } else { | |
| 380 $protein .= $self->_translate_ambiguous_codon($triplet); | |
| 381 } | |
| 382 } | |
| 383 } else { # simple, strict translation | |
| 384 for (my $i = 0; $i < (length($seq) - 2 ); $i+=3) { | |
| 385 my $triplet = substr($seq, $i, 3); | |
| 386 if (exists $CODONS->{$triplet}) { | |
| 387 $protein .= substr($TABLES[$id-1], $CODONS->{$triplet}, 1); | |
| 388 } else { | |
| 389 $protein .= 'X'; | |
| 390 } | |
| 391 } | |
| 392 } | |
| 393 if ($partial == 2) { # 2 overhanging nucleotides | |
| 394 my $triplet = substr($seq, ($partial -4)). "n"; | |
| 395 if (exists $CODONS->{$triplet}) { | |
| 396 my $aa = substr($TABLES[$id-1], $CODONS->{$triplet},1); | |
| 397 $protein .= $aa; | |
| 398 } else { | |
| 399 $protein .= $self->_translate_ambiguous_codon($triplet, $partial); | |
| 400 } | |
| 401 } | |
| 402 return $protein; | |
| 403 } | |
| 404 | |
| 405 sub _translate_ambiguous_codon { | |
| 406 my ($self, $triplet, $partial) = @_; | |
| 407 $partial ||= 0; | |
| 408 my $id = $self->id; | |
| 409 my $aa; | |
| 410 my @codons = _unambiquous_codons($triplet); | |
| 411 my %aas =(); | |
| 412 foreach my $codon (@codons) { | |
| 413 $aas{substr($TABLES[$id-1],$CODONS->{$codon},1)} = 1; | |
| 414 } | |
| 415 my $count = scalar keys %aas; | |
| 416 if ( $count == 1 ) { | |
| 417 $aa = (keys %aas)[0]; | |
| 418 } | |
| 419 elsif ( $count == 2 ) { | |
| 420 if ($aas{'D'} and $aas{'N'}) { | |
| 421 $aa = 'B'; | |
| 422 } | |
| 423 elsif ($aas{'E'} and $aas{'Q'}) { | |
| 424 $aa = 'Z'; | |
| 425 } else { | |
| 426 $partial ? ($aa = '') : ($aa = 'X'); | |
| 427 } | |
| 428 } else { | |
| 429 $partial ? ($aa = '') : ($aa = 'X'); | |
| 430 } | |
| 431 return $aa; | |
| 432 } | |
| 433 | |
| 434 =head2 translate_strict | |
| 435 | |
| 436 Title : translate_strict | |
| 437 Usage : $obj->translate_strict('ACT') | |
| 438 Function: returns one letter amino acid code for a codon input | |
| 439 | |
| 440 Fast and simple translation. User is responsible to resolve | |
| 441 ambiguous nucleotide codes before calling this | |
| 442 method. Returns 'X' for unknown codons and an empty string | |
| 443 for input strings that are not three characters long. | |
| 444 | |
| 445 It is not recommended to use this method in a production | |
| 446 environment. Use method translate, instead. | |
| 447 | |
| 448 Example : | |
| 449 Returns : A string | |
| 450 Args : a codon = a three nucleotide character string | |
| 451 | |
| 452 | |
| 453 =cut | |
| 454 | |
| 455 sub translate_strict{ | |
| 456 my ($self, $value) = @_; | |
| 457 my ($id) = $self->{'id'}; | |
| 458 | |
| 459 $value = lc $value; | |
| 460 $value =~ tr/u/t/; | |
| 461 | |
| 462 if (length $value != 3 ) { | |
| 463 return ''; | |
| 464 } | |
| 465 elsif (!(defined $CODONS->{$value})) { | |
| 466 return 'X'; | |
| 467 } | |
| 468 else { | |
| 469 return substr($TABLES[$id-1],$CODONS->{$value},1); | |
| 470 } | |
| 471 } | |
| 472 | |
| 473 =head2 revtranslate | |
| 474 | |
| 475 Title : revtranslate | |
| 476 Usage : $obj->revtranslate('G') | |
| 477 Function: returns codons for an amino acid | |
| 478 | |
| 479 Returns an empty string for unknown amino acid | |
| 480 codes. Ambiquous IUPAC codes Asx,B, (Asp,D; Asn,N) and | |
| 481 Glx,Z (Glu,E; Gln,Q) are resolved. Both single and three | |
| 482 letter amino acid codes are accepted. '*' and 'Ter' are | |
| 483 used for terminator. | |
| 484 | |
| 485 By default, the output codons are shown in DNA. If the | |
| 486 output is needed in RNA (tr/t/u/), add a second argument | |
| 487 'RNA'. | |
| 488 | |
| 489 Example : $obj->revtranslate('Gly', 'RNA') | |
| 490 Returns : An array of three lower case letter strings i.e. codons | |
| 491 Args : amino acid, 'RNA' | |
| 492 | |
| 493 =cut | |
| 494 | |
| 495 sub revtranslate { | |
| 496 my ($self, $value, $coding) = @_; | |
| 497 my ($id) = $self->{'id'}; | |
| 498 my (@aas, $p); | |
| 499 my (@codons) = (); | |
| 500 | |
| 501 if (length($value) == 3 ) { | |
| 502 $value = lc $value; | |
| 503 $value = ucfirst $value; | |
| 504 $value = $THREELETTERSYMBOLS{$value}; | |
| 505 } | |
| 506 if ( defined $value and $value =~ /$VALID_PROTEIN/ | |
| 507 and length($value) == 1 ) { | |
| 508 $value = uc $value; | |
| 509 @aas = @{$IUPAC_AA{$value}}; | |
| 510 foreach my $aa (@aas) { | |
| 511 #print $aa, " -2\n"; | |
| 512 $aa = '\*' if $aa eq '*'; | |
| 513 while ($TABLES[$id-1] =~ m/$aa/g) { | |
| 514 $p = pos $TABLES[$id-1]; | |
| 515 push (@codons, $TRCOL->{--$p}); | |
| 516 } | |
| 517 } | |
| 518 } | |
| 519 | |
| 520 if ($coding and uc ($coding) eq 'RNA') { | |
| 521 for my $i (0..$#codons) { | |
| 522 $codons[$i] =~ tr/t/u/; | |
| 523 } | |
| 524 } | |
| 525 | |
| 526 return @codons; | |
| 527 } | |
| 528 | |
| 529 =head2 is_start_codon | |
| 530 | |
| 531 Title : is_start_codon | |
| 532 Usage : $obj->is_start_codon('ATG') | |
| 533 Function: returns true (1) for all codons that can be used as a | |
| 534 translation start, false (0) for others. | |
| 535 Example : $myCodonTable->is_start_codon('ATG') | |
| 536 Returns : boolean | |
| 537 Args : codon | |
| 538 | |
| 539 | |
| 540 =cut | |
| 541 | |
| 542 sub is_start_codon{ | |
| 543 my ($self, $value) = @_; | |
| 544 my ($id) = $self->{'id'}; | |
| 545 | |
| 546 $value = lc $value; | |
| 547 $value =~ tr/u/t/; | |
| 548 | |
| 549 if (length $value != 3 ) { | |
| 550 return 0; | |
| 551 } | |
| 552 else { | |
| 553 my $result = 1; | |
| 554 my @ms = map { substr($STARTS[$id-1],$CODONS->{$_},1) } _unambiquous_codons($value); | |
| 555 foreach my $c (@ms) { | |
| 556 $result = 0 if $c ne 'M'; | |
| 557 } | |
| 558 return $result; | |
| 559 } | |
| 560 } | |
| 561 | |
| 562 | |
| 563 | |
| 564 =head2 is_ter_codon | |
| 565 | |
| 566 Title : is_ter_codon | |
| 567 Usage : $obj->is_ter_codon('GAA') | |
| 568 Function: returns true (1) for all codons that can be used as a | |
| 569 translation tarminator, false (0) for others. | |
| 570 Example : $myCodonTable->is_ter_codon('ATG') | |
| 571 Returns : boolean | |
| 572 Args : codon | |
| 573 | |
| 574 | |
| 575 =cut | |
| 576 | |
| 577 sub is_ter_codon{ | |
| 578 my ($self, $value) = @_; | |
| 579 my ($id) = $self->{'id'}; | |
| 580 | |
| 581 $value = lc $value; | |
| 582 $value =~ tr/u/t/; | |
| 583 | |
| 584 if (length $value != 3 ) { | |
| 585 return 0; | |
| 586 } | |
| 587 else { | |
| 588 my $result = 1; | |
| 589 my @ms = map { substr($TABLES[$id-1],$CODONS->{$_},1) } _unambiquous_codons($value); | |
| 590 foreach my $c (@ms) { | |
| 591 $result = 0 if $c ne $TERMINATOR; | |
| 592 } | |
| 593 return $result; | |
| 594 } | |
| 595 } | |
| 596 | |
| 597 =head2 is_unknown_codon | |
| 598 | |
| 599 Title : is_unknown_codon | |
| 600 Usage : $obj->is_unknown_codon('GAJ') | |
| 601 Function: returns false (0) for all codons that are valid, | |
| 602 true (1) for others. | |
| 603 Example : $myCodonTable->is_unknown_codon('NTG') | |
| 604 Returns : boolean | |
| 605 Args : codon | |
| 606 | |
| 607 | |
| 608 =cut | |
| 609 | |
| 610 sub is_unknown_codon{ | |
| 611 my ($self, $value) = @_; | |
| 612 my ($id) = $self->{'id'}; | |
| 613 | |
| 614 $value = lc $value; | |
| 615 $value =~ tr/u/t/; | |
| 616 | |
| 617 if (length $value != 3 ) { | |
| 618 return 1; | |
| 619 } | |
| 620 else { | |
| 621 my $result = 0; | |
| 622 my @cs = map { substr($TABLES[$id-1],$CODONS->{$_},1) } _unambiquous_codons($value); | |
| 623 $result = 1 if scalar @cs == 0; | |
| 624 return $result; | |
| 625 } | |
| 626 } | |
| 627 | |
| 628 =head2 _unambiquous_codons | |
| 629 | |
| 630 Title : _unambiquous_codons | |
| 631 Usage : @codons = _unambiquous_codons('ACN') | |
| 632 Function: | |
| 633 Example : | |
| 634 Returns : array of strings (one letter unambiguous amino acid codes) | |
| 635 Args : a codon = a three IUPAC nucleotide character string | |
| 636 | |
| 637 =cut | |
| 638 | |
| 639 sub _unambiquous_codons{ | |
| 640 my ($value) = @_; | |
| 641 my @nts = (); | |
| 642 my @codons = (); | |
| 643 my ($i, $j, $k); | |
| 644 @nts = map { $IUPAC_DNA{uc $_} } split(//, $value); | |
| 645 for my $i (@{$nts[0]}) { | |
| 646 for my $j (@{$nts[1]}) { | |
| 647 for my $k (@{$nts[2]}) { | |
| 648 push @codons, lc "$i$j$k"; | |
| 649 } | |
| 650 } | |
| 651 } | |
| 652 return @codons; | |
| 653 } | |
| 654 | |
| 655 1; |
