annotate variant_effect_predictor/Bio/Tools/CodonTable.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1 # $Id: CodonTable.pm,v 1.23 2002/10/22 07:38:45 lapp Exp $
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3 # bioperl module for Bio::Tools::CodonTable
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
4 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
5 # Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk>
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
6 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
7 # Copyright Heikki Lehvaslaiho
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
8 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
9 # You may distribute this module under the same terms as perl itself
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
10
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
11 # POD documentation - main docs before the code
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
12
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
13 =head1 NAME
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
14
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
15 Bio::Tools::CodonTable - Bioperl codon table object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
16
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
17 =head1 SYNOPSIS
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
18
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
19 This is a read-only class for all known codon tables. The IDs are
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
20 the ones used by nucleotide sequence databases. All common IUPAC
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
21 ambiguity codes for DNA, RNA and animo acids are recognized.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
22
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
23 # to use
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
24 use Bio::Tools::CodonTable;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
25
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
26 # defaults to ID 1 "Standard"
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
27 $myCodonTable = Bio::Tools::CodonTable->new();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
28 $myCodonTable2 = Bio::Tools::CodonTable -> new ( -id => 3 );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
29
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
30 # change codon table
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
31 $myCodonTable->id(5);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
32
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
33 # examine codon table
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
34 print join (' ', "The name of the codon table no.", $myCodonTable->id(4),
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
35 "is:", $myCodonTable->name(), "\n");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
36
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
37 # translate a codon
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
38 $aa = $myCodonTable->translate('ACU');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
39 $aa = $myCodonTable->translate('act');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
40 $aa = $myCodonTable->translate('ytr');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
41
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
42 # reverse translate an amino acid
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
43 @codons = $myCodonTable->revtranslate('A');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
44 @codons = $myCodonTable->revtranslate('Ser');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
45 @codons = $myCodonTable->revtranslate('Glx');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
46 @codons = $myCodonTable->revtranslate('cYS', 'rna');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
47
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
48 #boolean tests
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
49 print "Is a start\n" if $myCodonTable->is_start_codon('ATG');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
50 print "Is a termianator\n" if $myCodonTable->is_ter_codon('tar');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
51 print "Is a unknown\n" if $myCodonTable->is_unknown_codon('JTG');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
52
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
53 =head1 DESCRIPTION
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
54
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
55 Codon tables are also called translation tables or genetics codes
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
56 since that is what they try to represent. A bit more complete picture
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
57 of the full complexity of codon usage in various taxonomic groups
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
58 presented at the NCBI Genetic Codes Home page.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
59
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
60 CodonTable is a BioPerl class that knows all current translation
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
61 tables that are used by primary nucleotide sequence databases
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
62 (GenBank, EMBL and DDBJ). It provides methods to output information
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
63 about tables and relationships between codons and amino acids.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
64
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
65 This class and its methods recognized all common IUPAC ambiguity codes
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
66 for DNA, RNA and animo acids. The translation method follows the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
67 conventions in EMBL and TREMBL databases.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
68
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
69 It is a nuisance to separate RNA and cDNA representations of nucleic
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
70 acid transcripts. The CodonTable object accepts codons of both type as
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
71 input and allows the user to set the mode for output when reverse
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
72 translating. Its default for output is DNA.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
73
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
74 Note: This class deals primarily with individual codons and amino
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
75 acids. However in the interest of speed you can L<translate>
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
76 longer sequence, too. The full complexity of protein translation
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
77 is tackled by L<Bio::PrimarySeqI::translate>.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
78
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
79
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
80 The amino acid codes are IUPAC recommendations for common amino acids:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
81
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
82 A Ala Alanine
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
83 R Arg Arginine
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
84 N Asn Asparagine
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
85 D Asp Aspartic acid
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
86 C Cys Cysteine
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
87 Q Gln Glutamine
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
88 E Glu Glutamic acid
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
89 G Gly Glycine
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
90 H His Histidine
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
91 I Ile Isoleucine
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
92 L Leu Leucine
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
93 K Lys Lysine
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
94 M Met Methionine
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
95 F Phe Phenylalanine
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
96 P Pro Proline
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
97 S Ser Serine
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
98 T Thr Threonine
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
99 W Trp Tryptophan
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
100 Y Tyr Tyrosine
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
101 V Val Valine
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
102 B Asx Aspartic acid or Asparagine
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
103 Z Glx Glutamine or Glutamic acid
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
104 X Xaa Any or unknown amino acid
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
105
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
106
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
107 It is worth noting that, "Bacterial" codon table no. 11 produces an
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
108 polypeptide that is, confusingly, identical to the standard one. The
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
109 only differences are in available initiator codons.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
110
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
111
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
112 NCBI Genetic Codes home page:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
113 http://www.ncbi.nlm.nih.gov/htbin-post/Taxonomy/wprintgc?mode=c
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
114
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
115 EBI Translation Table Viewer:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
116 http://www.ebi.ac.uk/cgi-bin/mutations/trtables.cgi
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
117
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
118 Amended ASN.1 version with ids 16 and 21 is at:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
119 ftp://ftp.ebi.ac.uk/pub/databases/geneticcode/
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
120
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
121 Thank your for Matteo diTomasso for the original Perl implementation
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
122 of these tables.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
123
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
124 =head1 FEEDBACK
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
125
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
126 =head2 Mailing Lists
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
127
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
128 User feedback is an integral part of the evolution of this and other
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
129 Bioperl modules. Send your comments and suggestions preferably to the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
130 Bioperl mailing lists Your participation is much appreciated.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
131
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
132 bioperl-l@bioperl.org - General discussion
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
133 http://bio.perl.org/MailList.html - About the mailing lists
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
134
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
135 =head2 Reporting Bugs
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
136
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
137 report bugs to the Bioperl bug tracking system to help us keep track
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
138 the bugs and their resolution. Bug reports can be submitted via
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
139 email or the web:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
140
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
141 bioperl-bugs@bio.perl.org
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
142 http://bugzilla.bioperl.org/
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
143
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
144 =head1 AUTHOR - Heikki Lehvaslaiho
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
145
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
146 Email: heikki@ebi.ac.uk
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
147 Address:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
148
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
149 EMBL Outstation, European Bioinformatics Institute
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
150 Wellcome Trust Genome Campus, Hinxton
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
151 Cambs. CB10 1SD, United Kingdom
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
152
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
153 =head1 APPENDIX
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
154
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
155 The rest of the documentation details each of the object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
156 methods. Internal methods are usually preceded with a _
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
157
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
158 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
159
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
160
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
161 # Let the code begin...
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
162
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
163 package Bio::Tools::CodonTable;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
164 use vars qw(@ISA @NAMES @TABLES @STARTS $TRCOL $CODONS %IUPAC_DNA
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
165 %IUPAC_AA %THREELETTERSYMBOLS $VALID_PROTEIN $TERMINATOR);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
166 use strict;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
167
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
168 # Object preamble - inherits from Bio::Root::Root
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
169 use Bio::Root::Root;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
170 use Bio::Tools::IUPAC;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
171 use Bio::SeqUtils;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
172
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
173 @ISA = qw(Bio::Root::Root);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
174
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
175 # first set internal values for all translation tables
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
176
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
177 BEGIN {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
178 @NAMES = #id
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
179 (
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
180 'Standard', #1
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
181 'Vertebrate Mitochondrial',#2
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
182 'Yeast Mitochondrial',# 3
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
183 'Mold, Protozoan, and CoelenterateMitochondrial and Mycoplasma/Spiroplasma',#4
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
184 'Invertebrate Mitochondrial',#5
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
185 'Ciliate, Dasycladacean and Hexamita Nuclear',# 6
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
186 '', '',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
187 'Echinoderm Mitochondrial',#9
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
188 'Euplotid Nuclear',#10
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
189 '"Bacterial"',# 11
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
190 'Alternative Yeast Nuclear',# 12
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
191 'Ascidian Mitochondrial',# 13
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
192 'Flatworm Mitochondrial',# 14
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
193 'Blepharisma Nuclear',# 15
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
194 'Chlorophycean Mitochondrial',# 16
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
195 '', '', '', '',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
196 'Trematode Mitochondrial',# 21
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
197 'Scenedesmus obliquus Mitochondrial', #22
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
198 'Thraustochytrium Mitochondrial' #23
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
199 );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
200
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
201 @TABLES =
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
202 qw(
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
203 FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
204 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
205 FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
206 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
207 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
208 FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
209 '' ''
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
210 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
211 FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
212 FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
213 FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
214 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
215 FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
216 FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
217 FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
218 '' '' '' ''
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
219 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
220 FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
221 FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
222 );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
223
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
224
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
225 @STARTS =
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
226 qw(
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
227 ---M---------------M---------------M----------------------------
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
228 --------------------------------MMMM---------------M------------
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
229 ----------------------------------MM----------------------------
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
230 --MM---------------M------------MMMM---------------M------------
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
231 ---M----------------------------MMMM---------------M------------
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
232 -----------------------------------M----------------------------
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
233 '' ''
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
234 -----------------------------------M----------------------------
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
235 -----------------------------------M----------------------------
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
236 ---M---------------M------------MMMM---------------M------------
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
237 -------------------M---------------M----------------------------
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
238 -----------------------------------M----------------------------
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
239 -----------------------------------M----------------------------
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
240 -----------------------------------M----------------------------
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
241 -----------------------------------M----------------------------
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
242 '' '' '' ''
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
243 -----------------------------------M---------------M------------
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
244 -----------------------------------M----------------------------
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
245 --------------------------------M--M---------------M------------
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
246 );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
247
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
248 my @nucs = qw(t c a g);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
249 my $x = 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
250 ($CODONS, $TRCOL) = ({}, {});
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
251 for my $i (@nucs) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
252 for my $j (@nucs) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
253 for my $k (@nucs) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
254 my $codon = "$i$j$k";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
255 $CODONS->{$codon} = $x;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
256 $TRCOL->{$x} = $codon;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
257 $x++;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
258 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
259 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
260 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
261 %IUPAC_DNA = Bio::Tools::IUPAC->iupac_iub();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
262 %IUPAC_AA = Bio::Tools::IUPAC->iupac_iup();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
263 %THREELETTERSYMBOLS = Bio::SeqUtils->valid_aa(2);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
264 $VALID_PROTEIN = '['.join('',Bio::SeqUtils->valid_aa(0)).']';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
265 $TERMINATOR = '*';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
266 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
267
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
268 sub new {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
269 my($class,@args) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
270 my $self = $class->SUPER::new(@args);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
271
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
272 my($id) =
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
273 $self->_rearrange([qw(ID
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
274 )],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
275 @args);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
276
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
277 $id = 1 if ( ! $id );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
278 $id && $self->id($id);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
279 return $self; # success - we hope!
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
280 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
281
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
282 =head2 id
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
283
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
284 Title : id
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
285 Usage : $obj->id(3); $id_integer = $obj->id();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
286 Function:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
287
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
288 Sets or returns the id of the translation table. IDs are
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
289 integers from 1 to 15, excluding 7 and 8 which have been
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
290 removed as redundant. If an invalid ID is given the method
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
291 returns 0, false.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
292
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
293
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
294 Example :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
295 Returns : value of id, a scalar, 0 if not a valid
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
296 Args : newvalue (optional)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
297
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
298 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
299
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
300 sub id{
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
301 my ($self,$value) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
302 if( defined $value) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
303 if ( !(defined $TABLES[$value-1]) or $TABLES[$value-1] eq '') {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
304 $self->warn("Not a valid codon table ID [$value] ");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
305 $value = 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
306 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
307 $self->{'id'} = $value;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
308 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
309 return $self->{'id'};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
310 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
311
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
312 =head2 name
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
313
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
314 Title : name
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
315 Usage : $obj->name()
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
316 Function: returns the descriptive name of the translation table
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
317 Example :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
318 Returns : A string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
319 Args : None
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
320
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
321
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
322 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
323
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
324 sub name{
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
325 my ($self) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
326
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
327 my ($id) = $self->{'id'};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
328 return $NAMES[$id-1];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
329 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
330
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
331 =head2 translate
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
332
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
333 Title : translate
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
334 Usage : $obj->translate('YTR')
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
335 Function: Returns a string of one letter amino acid codes from
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
336 nucleotide sequence input. The imput can be of any length.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
337
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
338 Returns 'X' for unknown codons and codons that code for
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
339 more than one amino acid. Returns an empty string if input
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
340 is not three characters long. Exceptions for these are:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
341
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
342 - IUPAC amino acid code B for Aspartic Acid and
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
343 Asparagine, is used.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
344 - IUPAC amino acid code Z for Glutamic Acid, Glutamine is
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
345 used.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
346 - if the codon is two nucleotides long and if by adding
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
347 an a third character 'N', it codes for a single amino
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
348 acid (with exceptions above), return that, otherwise
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
349 return empty string.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
350
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
351 Returns empty string for other input strings that are not
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
352 three characters long.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
353
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
354 Example :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
355 Returns : a string of one letter ambiguous IUPAC amino acid codes
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
356 Args : ambiguous IUPAC nucleotide string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
357
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
358
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
359 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
360
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
361 sub translate {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
362 my ($self, $seq) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
363 $self->throw("Calling translate without a seq argument!") unless defined $seq;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
364 return '' unless $seq;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
365
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
366 my $id = $self->id;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
367 my ($partial) = 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
368 $partial = 2 if length($seq) % 3 == 2;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
369
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
370 $seq = lc $seq;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
371 $seq =~ tr/u/t/;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
372 my $protein = "";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
373 if ($seq =~ /[^actg]/ ) { #ambiguous chars
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
374 for (my $i = 0; $i < (length($seq) - 2 ); $i+=3) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
375 my $triplet = substr($seq, $i, 3);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
376 if (exists $CODONS->{$triplet}) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
377 $protein .= substr($TABLES[$id-1],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
378 $CODONS->{$triplet},1);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
379 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
380 $protein .= $self->_translate_ambiguous_codon($triplet);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
381 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
382 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
383 } else { # simple, strict translation
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
384 for (my $i = 0; $i < (length($seq) - 2 ); $i+=3) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
385 my $triplet = substr($seq, $i, 3);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
386 if (exists $CODONS->{$triplet}) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
387 $protein .= substr($TABLES[$id-1], $CODONS->{$triplet}, 1);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
388 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
389 $protein .= 'X';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
390 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
391 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
392 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
393 if ($partial == 2) { # 2 overhanging nucleotides
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
394 my $triplet = substr($seq, ($partial -4)). "n";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
395 if (exists $CODONS->{$triplet}) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
396 my $aa = substr($TABLES[$id-1], $CODONS->{$triplet},1);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
397 $protein .= $aa;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
398 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
399 $protein .= $self->_translate_ambiguous_codon($triplet, $partial);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
400 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
401 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
402 return $protein;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
403 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
404
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
405 sub _translate_ambiguous_codon {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
406 my ($self, $triplet, $partial) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
407 $partial ||= 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
408 my $id = $self->id;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
409 my $aa;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
410 my @codons = _unambiquous_codons($triplet);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
411 my %aas =();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
412 foreach my $codon (@codons) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
413 $aas{substr($TABLES[$id-1],$CODONS->{$codon},1)} = 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
414 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
415 my $count = scalar keys %aas;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
416 if ( $count == 1 ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
417 $aa = (keys %aas)[0];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
418 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
419 elsif ( $count == 2 ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
420 if ($aas{'D'} and $aas{'N'}) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
421 $aa = 'B';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
422 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
423 elsif ($aas{'E'} and $aas{'Q'}) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
424 $aa = 'Z';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
425 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
426 $partial ? ($aa = '') : ($aa = 'X');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
427 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
428 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
429 $partial ? ($aa = '') : ($aa = 'X');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
430 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
431 return $aa;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
432 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
433
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
434 =head2 translate_strict
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
435
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
436 Title : translate_strict
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
437 Usage : $obj->translate_strict('ACT')
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
438 Function: returns one letter amino acid code for a codon input
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
439
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
440 Fast and simple translation. User is responsible to resolve
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
441 ambiguous nucleotide codes before calling this
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
442 method. Returns 'X' for unknown codons and an empty string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
443 for input strings that are not three characters long.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
444
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
445 It is not recommended to use this method in a production
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
446 environment. Use method translate, instead.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
447
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
448 Example :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
449 Returns : A string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
450 Args : a codon = a three nucleotide character string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
451
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
452
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
453 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
454
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
455 sub translate_strict{
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
456 my ($self, $value) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
457 my ($id) = $self->{'id'};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
458
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
459 $value = lc $value;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
460 $value =~ tr/u/t/;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
461
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
462 if (length $value != 3 ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
463 return '';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
464 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
465 elsif (!(defined $CODONS->{$value})) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
466 return 'X';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
467 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
468 else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
469 return substr($TABLES[$id-1],$CODONS->{$value},1);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
470 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
471 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
472
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
473 =head2 revtranslate
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
474
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
475 Title : revtranslate
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
476 Usage : $obj->revtranslate('G')
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
477 Function: returns codons for an amino acid
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
478
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
479 Returns an empty string for unknown amino acid
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
480 codes. Ambiquous IUPAC codes Asx,B, (Asp,D; Asn,N) and
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
481 Glx,Z (Glu,E; Gln,Q) are resolved. Both single and three
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
482 letter amino acid codes are accepted. '*' and 'Ter' are
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
483 used for terminator.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
484
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
485 By default, the output codons are shown in DNA. If the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
486 output is needed in RNA (tr/t/u/), add a second argument
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
487 'RNA'.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
488
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
489 Example : $obj->revtranslate('Gly', 'RNA')
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
490 Returns : An array of three lower case letter strings i.e. codons
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
491 Args : amino acid, 'RNA'
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
492
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
493 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
494
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
495 sub revtranslate {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
496 my ($self, $value, $coding) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
497 my ($id) = $self->{'id'};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
498 my (@aas, $p);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
499 my (@codons) = ();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
500
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
501 if (length($value) == 3 ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
502 $value = lc $value;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
503 $value = ucfirst $value;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
504 $value = $THREELETTERSYMBOLS{$value};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
505 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
506 if ( defined $value and $value =~ /$VALID_PROTEIN/
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
507 and length($value) == 1 ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
508 $value = uc $value;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
509 @aas = @{$IUPAC_AA{$value}};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
510 foreach my $aa (@aas) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
511 #print $aa, " -2\n";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
512 $aa = '\*' if $aa eq '*';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
513 while ($TABLES[$id-1] =~ m/$aa/g) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
514 $p = pos $TABLES[$id-1];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
515 push (@codons, $TRCOL->{--$p});
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
516 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
517 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
518 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
519
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
520 if ($coding and uc ($coding) eq 'RNA') {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
521 for my $i (0..$#codons) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
522 $codons[$i] =~ tr/t/u/;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
523 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
524 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
525
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
526 return @codons;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
527 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
528
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
529 =head2 is_start_codon
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
530
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
531 Title : is_start_codon
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
532 Usage : $obj->is_start_codon('ATG')
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
533 Function: returns true (1) for all codons that can be used as a
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
534 translation start, false (0) for others.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
535 Example : $myCodonTable->is_start_codon('ATG')
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
536 Returns : boolean
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
537 Args : codon
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
538
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
539
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
540 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
541
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
542 sub is_start_codon{
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
543 my ($self, $value) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
544 my ($id) = $self->{'id'};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
545
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
546 $value = lc $value;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
547 $value =~ tr/u/t/;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
548
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
549 if (length $value != 3 ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
550 return 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
551 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
552 else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
553 my $result = 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
554 my @ms = map { substr($STARTS[$id-1],$CODONS->{$_},1) } _unambiquous_codons($value);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
555 foreach my $c (@ms) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
556 $result = 0 if $c ne 'M';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
557 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
558 return $result;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
559 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
560 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
561
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
562
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
563
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
564 =head2 is_ter_codon
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
565
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
566 Title : is_ter_codon
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
567 Usage : $obj->is_ter_codon('GAA')
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
568 Function: returns true (1) for all codons that can be used as a
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
569 translation tarminator, false (0) for others.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
570 Example : $myCodonTable->is_ter_codon('ATG')
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
571 Returns : boolean
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
572 Args : codon
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
573
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
574
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
575 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
576
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
577 sub is_ter_codon{
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
578 my ($self, $value) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
579 my ($id) = $self->{'id'};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
580
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
581 $value = lc $value;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
582 $value =~ tr/u/t/;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
583
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
584 if (length $value != 3 ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
585 return 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
586 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
587 else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
588 my $result = 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
589 my @ms = map { substr($TABLES[$id-1],$CODONS->{$_},1) } _unambiquous_codons($value);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
590 foreach my $c (@ms) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
591 $result = 0 if $c ne $TERMINATOR;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
592 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
593 return $result;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
594 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
595 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
596
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
597 =head2 is_unknown_codon
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
598
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
599 Title : is_unknown_codon
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
600 Usage : $obj->is_unknown_codon('GAJ')
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
601 Function: returns false (0) for all codons that are valid,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
602 true (1) for others.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
603 Example : $myCodonTable->is_unknown_codon('NTG')
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
604 Returns : boolean
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
605 Args : codon
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
606
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
607
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
608 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
609
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
610 sub is_unknown_codon{
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
611 my ($self, $value) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
612 my ($id) = $self->{'id'};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
613
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
614 $value = lc $value;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
615 $value =~ tr/u/t/;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
616
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
617 if (length $value != 3 ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
618 return 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
619 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
620 else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
621 my $result = 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
622 my @cs = map { substr($TABLES[$id-1],$CODONS->{$_},1) } _unambiquous_codons($value);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
623 $result = 1 if scalar @cs == 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
624 return $result;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
625 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
626 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
627
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
628 =head2 _unambiquous_codons
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
629
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
630 Title : _unambiquous_codons
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
631 Usage : @codons = _unambiquous_codons('ACN')
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
632 Function:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
633 Example :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
634 Returns : array of strings (one letter unambiguous amino acid codes)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
635 Args : a codon = a three IUPAC nucleotide character string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
636
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
637 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
638
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
639 sub _unambiquous_codons{
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
640 my ($value) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
641 my @nts = ();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
642 my @codons = ();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
643 my ($i, $j, $k);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
644 @nts = map { $IUPAC_DNA{uc $_} } split(//, $value);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
645 for my $i (@{$nts[0]}) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
646 for my $j (@{$nts[1]}) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
647 for my $k (@{$nts[2]}) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
648 push @codons, lc "$i$j$k";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
649 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
650 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
651 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
652 return @codons;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
653 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
654
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
655 1;