annotate variant_effect_predictor/Bio/Tools/CodonTable.pm @ 1:d6778b5d8382 draft default tip

Deleted selected files
author willmclaren
date Fri, 03 Aug 2012 10:05:43 -0400
parents 21066c0abaf5
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1 # $Id: CodonTable.pm,v 1.23 2002/10/22 07:38:45 lapp Exp $
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
2 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
3 # bioperl module for Bio::Tools::CodonTable
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
4 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
5 # Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
6 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
7 # Copyright Heikki Lehvaslaiho
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
8 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
9 # You may distribute this module under the same terms as perl itself
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
10
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
11 # POD documentation - main docs before the code
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
12
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
13 =head1 NAME
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
14
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
15 Bio::Tools::CodonTable - Bioperl codon table object
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
16
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
17 =head1 SYNOPSIS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
18
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
19 This is a read-only class for all known codon tables. The IDs are
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
20 the ones used by nucleotide sequence databases. All common IUPAC
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
21 ambiguity codes for DNA, RNA and animo acids are recognized.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
22
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
23 # to use
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
24 use Bio::Tools::CodonTable;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
25
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
26 # defaults to ID 1 "Standard"
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
27 $myCodonTable = Bio::Tools::CodonTable->new();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
28 $myCodonTable2 = Bio::Tools::CodonTable -> new ( -id => 3 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
29
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
30 # change codon table
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
31 $myCodonTable->id(5);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
32
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
33 # examine codon table
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
34 print join (' ', "The name of the codon table no.", $myCodonTable->id(4),
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
35 "is:", $myCodonTable->name(), "\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
36
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
37 # translate a codon
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
38 $aa = $myCodonTable->translate('ACU');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
39 $aa = $myCodonTable->translate('act');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
40 $aa = $myCodonTable->translate('ytr');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
41
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
42 # reverse translate an amino acid
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
43 @codons = $myCodonTable->revtranslate('A');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
44 @codons = $myCodonTable->revtranslate('Ser');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
45 @codons = $myCodonTable->revtranslate('Glx');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
46 @codons = $myCodonTable->revtranslate('cYS', 'rna');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
47
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
48 #boolean tests
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
49 print "Is a start\n" if $myCodonTable->is_start_codon('ATG');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
50 print "Is a termianator\n" if $myCodonTable->is_ter_codon('tar');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
51 print "Is a unknown\n" if $myCodonTable->is_unknown_codon('JTG');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
52
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
53 =head1 DESCRIPTION
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
54
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
55 Codon tables are also called translation tables or genetics codes
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
56 since that is what they try to represent. A bit more complete picture
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
57 of the full complexity of codon usage in various taxonomic groups
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
58 presented at the NCBI Genetic Codes Home page.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
59
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
60 CodonTable is a BioPerl class that knows all current translation
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
61 tables that are used by primary nucleotide sequence databases
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
62 (GenBank, EMBL and DDBJ). It provides methods to output information
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
63 about tables and relationships between codons and amino acids.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
64
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
65 This class and its methods recognized all common IUPAC ambiguity codes
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
66 for DNA, RNA and animo acids. The translation method follows the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
67 conventions in EMBL and TREMBL databases.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
68
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
69 It is a nuisance to separate RNA and cDNA representations of nucleic
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
70 acid transcripts. The CodonTable object accepts codons of both type as
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
71 input and allows the user to set the mode for output when reverse
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
72 translating. Its default for output is DNA.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
73
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
74 Note: This class deals primarily with individual codons and amino
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
75 acids. However in the interest of speed you can L<translate>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
76 longer sequence, too. The full complexity of protein translation
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
77 is tackled by L<Bio::PrimarySeqI::translate>.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
78
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
79
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
80 The amino acid codes are IUPAC recommendations for common amino acids:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
81
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
82 A Ala Alanine
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
83 R Arg Arginine
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
84 N Asn Asparagine
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
85 D Asp Aspartic acid
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
86 C Cys Cysteine
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
87 Q Gln Glutamine
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
88 E Glu Glutamic acid
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
89 G Gly Glycine
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
90 H His Histidine
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
91 I Ile Isoleucine
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
92 L Leu Leucine
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
93 K Lys Lysine
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
94 M Met Methionine
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
95 F Phe Phenylalanine
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
96 P Pro Proline
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
97 S Ser Serine
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
98 T Thr Threonine
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
99 W Trp Tryptophan
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
100 Y Tyr Tyrosine
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
101 V Val Valine
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
102 B Asx Aspartic acid or Asparagine
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
103 Z Glx Glutamine or Glutamic acid
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
104 X Xaa Any or unknown amino acid
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
105
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
106
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
107 It is worth noting that, "Bacterial" codon table no. 11 produces an
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
108 polypeptide that is, confusingly, identical to the standard one. The
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
109 only differences are in available initiator codons.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
110
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
111
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
112 NCBI Genetic Codes home page:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
113 http://www.ncbi.nlm.nih.gov/htbin-post/Taxonomy/wprintgc?mode=c
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
114
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
115 EBI Translation Table Viewer:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
116 http://www.ebi.ac.uk/cgi-bin/mutations/trtables.cgi
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
117
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
118 Amended ASN.1 version with ids 16 and 21 is at:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
119 ftp://ftp.ebi.ac.uk/pub/databases/geneticcode/
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
120
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
121 Thank your for Matteo diTomasso for the original Perl implementation
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
122 of these tables.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
123
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
124 =head1 FEEDBACK
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
125
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
126 =head2 Mailing Lists
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
127
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
128 User feedback is an integral part of the evolution of this and other
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
129 Bioperl modules. Send your comments and suggestions preferably to the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
130 Bioperl mailing lists Your participation is much appreciated.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
131
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
132 bioperl-l@bioperl.org - General discussion
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
133 http://bio.perl.org/MailList.html - About the mailing lists
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
134
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
135 =head2 Reporting Bugs
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
136
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
137 report bugs to the Bioperl bug tracking system to help us keep track
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
138 the bugs and their resolution. Bug reports can be submitted via
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
139 email or the web:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
140
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
141 bioperl-bugs@bio.perl.org
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
142 http://bugzilla.bioperl.org/
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
143
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
144 =head1 AUTHOR - Heikki Lehvaslaiho
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
145
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
146 Email: heikki@ebi.ac.uk
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
147 Address:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
148
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
149 EMBL Outstation, European Bioinformatics Institute
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
150 Wellcome Trust Genome Campus, Hinxton
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
151 Cambs. CB10 1SD, United Kingdom
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
152
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
153 =head1 APPENDIX
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
154
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
155 The rest of the documentation details each of the object
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
156 methods. Internal methods are usually preceded with a _
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
157
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
158 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
159
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
160
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
161 # Let the code begin...
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
162
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
163 package Bio::Tools::CodonTable;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
164 use vars qw(@ISA @NAMES @TABLES @STARTS $TRCOL $CODONS %IUPAC_DNA
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
165 %IUPAC_AA %THREELETTERSYMBOLS $VALID_PROTEIN $TERMINATOR);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
166 use strict;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
167
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
168 # Object preamble - inherits from Bio::Root::Root
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
169 use Bio::Root::Root;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
170 use Bio::Tools::IUPAC;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
171 use Bio::SeqUtils;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
172
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
173 @ISA = qw(Bio::Root::Root);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
174
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
175 # first set internal values for all translation tables
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
176
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
177 BEGIN {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
178 @NAMES = #id
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
179 (
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
180 'Standard', #1
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
181 'Vertebrate Mitochondrial',#2
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
182 'Yeast Mitochondrial',# 3
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
183 'Mold, Protozoan, and CoelenterateMitochondrial and Mycoplasma/Spiroplasma',#4
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
184 'Invertebrate Mitochondrial',#5
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
185 'Ciliate, Dasycladacean and Hexamita Nuclear',# 6
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
186 '', '',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
187 'Echinoderm Mitochondrial',#9
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
188 'Euplotid Nuclear',#10
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
189 '"Bacterial"',# 11
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
190 'Alternative Yeast Nuclear',# 12
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
191 'Ascidian Mitochondrial',# 13
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
192 'Flatworm Mitochondrial',# 14
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
193 'Blepharisma Nuclear',# 15
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
194 'Chlorophycean Mitochondrial',# 16
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
195 '', '', '', '',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
196 'Trematode Mitochondrial',# 21
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
197 'Scenedesmus obliquus Mitochondrial', #22
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
198 'Thraustochytrium Mitochondrial' #23
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
199 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
200
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
201 @TABLES =
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
202 qw(
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
203 FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
204 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
205 FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
206 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
207 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
208 FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
209 '' ''
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
210 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
211 FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
212 FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
213 FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
214 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
215 FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
216 FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
217 FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
218 '' '' '' ''
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
219 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
220 FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
221 FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
222 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
223
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
224
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
225 @STARTS =
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
226 qw(
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
227 ---M---------------M---------------M----------------------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
228 --------------------------------MMMM---------------M------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
229 ----------------------------------MM----------------------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
230 --MM---------------M------------MMMM---------------M------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
231 ---M----------------------------MMMM---------------M------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
232 -----------------------------------M----------------------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
233 '' ''
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
234 -----------------------------------M----------------------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
235 -----------------------------------M----------------------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
236 ---M---------------M------------MMMM---------------M------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
237 -------------------M---------------M----------------------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
238 -----------------------------------M----------------------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
239 -----------------------------------M----------------------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
240 -----------------------------------M----------------------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
241 -----------------------------------M----------------------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
242 '' '' '' ''
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
243 -----------------------------------M---------------M------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
244 -----------------------------------M----------------------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
245 --------------------------------M--M---------------M------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
246 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
247
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
248 my @nucs = qw(t c a g);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
249 my $x = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
250 ($CODONS, $TRCOL) = ({}, {});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
251 for my $i (@nucs) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
252 for my $j (@nucs) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
253 for my $k (@nucs) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
254 my $codon = "$i$j$k";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
255 $CODONS->{$codon} = $x;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
256 $TRCOL->{$x} = $codon;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
257 $x++;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
258 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
259 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
260 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
261 %IUPAC_DNA = Bio::Tools::IUPAC->iupac_iub();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
262 %IUPAC_AA = Bio::Tools::IUPAC->iupac_iup();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
263 %THREELETTERSYMBOLS = Bio::SeqUtils->valid_aa(2);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
264 $VALID_PROTEIN = '['.join('',Bio::SeqUtils->valid_aa(0)).']';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
265 $TERMINATOR = '*';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
266 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
267
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
268 sub new {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
269 my($class,@args) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
270 my $self = $class->SUPER::new(@args);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
271
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
272 my($id) =
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
273 $self->_rearrange([qw(ID
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
274 )],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
275 @args);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
276
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
277 $id = 1 if ( ! $id );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
278 $id && $self->id($id);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
279 return $self; # success - we hope!
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
280 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
281
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
282 =head2 id
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
283
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
284 Title : id
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
285 Usage : $obj->id(3); $id_integer = $obj->id();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
286 Function:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
287
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
288 Sets or returns the id of the translation table. IDs are
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
289 integers from 1 to 15, excluding 7 and 8 which have been
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
290 removed as redundant. If an invalid ID is given the method
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
291 returns 0, false.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
292
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
293
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
294 Example :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
295 Returns : value of id, a scalar, 0 if not a valid
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
296 Args : newvalue (optional)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
297
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
298 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
299
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
300 sub id{
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
301 my ($self,$value) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
302 if( defined $value) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
303 if ( !(defined $TABLES[$value-1]) or $TABLES[$value-1] eq '') {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
304 $self->warn("Not a valid codon table ID [$value] ");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
305 $value = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
306 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
307 $self->{'id'} = $value;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
308 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
309 return $self->{'id'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
310 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
311
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
312 =head2 name
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
313
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
314 Title : name
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
315 Usage : $obj->name()
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
316 Function: returns the descriptive name of the translation table
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
317 Example :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
318 Returns : A string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
319 Args : None
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
320
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
321
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
322 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
323
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
324 sub name{
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
325 my ($self) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
326
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
327 my ($id) = $self->{'id'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
328 return $NAMES[$id-1];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
329 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
330
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
331 =head2 translate
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
332
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
333 Title : translate
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
334 Usage : $obj->translate('YTR')
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
335 Function: Returns a string of one letter amino acid codes from
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
336 nucleotide sequence input. The imput can be of any length.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
337
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
338 Returns 'X' for unknown codons and codons that code for
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
339 more than one amino acid. Returns an empty string if input
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
340 is not three characters long. Exceptions for these are:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
341
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
342 - IUPAC amino acid code B for Aspartic Acid and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
343 Asparagine, is used.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
344 - IUPAC amino acid code Z for Glutamic Acid, Glutamine is
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
345 used.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
346 - if the codon is two nucleotides long and if by adding
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
347 an a third character 'N', it codes for a single amino
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
348 acid (with exceptions above), return that, otherwise
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
349 return empty string.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
350
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
351 Returns empty string for other input strings that are not
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
352 three characters long.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
353
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
354 Example :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
355 Returns : a string of one letter ambiguous IUPAC amino acid codes
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
356 Args : ambiguous IUPAC nucleotide string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
357
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
358
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
359 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
360
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
361 sub translate {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
362 my ($self, $seq) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
363 $self->throw("Calling translate without a seq argument!") unless defined $seq;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
364 return '' unless $seq;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
365
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
366 my $id = $self->id;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
367 my ($partial) = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
368 $partial = 2 if length($seq) % 3 == 2;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
369
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
370 $seq = lc $seq;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
371 $seq =~ tr/u/t/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
372 my $protein = "";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
373 if ($seq =~ /[^actg]/ ) { #ambiguous chars
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
374 for (my $i = 0; $i < (length($seq) - 2 ); $i+=3) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
375 my $triplet = substr($seq, $i, 3);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
376 if (exists $CODONS->{$triplet}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
377 $protein .= substr($TABLES[$id-1],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
378 $CODONS->{$triplet},1);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
379 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
380 $protein .= $self->_translate_ambiguous_codon($triplet);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
381 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
382 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
383 } else { # simple, strict translation
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
384 for (my $i = 0; $i < (length($seq) - 2 ); $i+=3) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
385 my $triplet = substr($seq, $i, 3);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
386 if (exists $CODONS->{$triplet}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
387 $protein .= substr($TABLES[$id-1], $CODONS->{$triplet}, 1);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
388 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
389 $protein .= 'X';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
390 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
391 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
392 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
393 if ($partial == 2) { # 2 overhanging nucleotides
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
394 my $triplet = substr($seq, ($partial -4)). "n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
395 if (exists $CODONS->{$triplet}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
396 my $aa = substr($TABLES[$id-1], $CODONS->{$triplet},1);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
397 $protein .= $aa;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
398 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
399 $protein .= $self->_translate_ambiguous_codon($triplet, $partial);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
400 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
401 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
402 return $protein;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
403 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
404
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
405 sub _translate_ambiguous_codon {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
406 my ($self, $triplet, $partial) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
407 $partial ||= 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
408 my $id = $self->id;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
409 my $aa;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
410 my @codons = _unambiquous_codons($triplet);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
411 my %aas =();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
412 foreach my $codon (@codons) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
413 $aas{substr($TABLES[$id-1],$CODONS->{$codon},1)} = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
414 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
415 my $count = scalar keys %aas;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
416 if ( $count == 1 ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
417 $aa = (keys %aas)[0];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
418 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
419 elsif ( $count == 2 ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
420 if ($aas{'D'} and $aas{'N'}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
421 $aa = 'B';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
422 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
423 elsif ($aas{'E'} and $aas{'Q'}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
424 $aa = 'Z';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
425 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
426 $partial ? ($aa = '') : ($aa = 'X');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
427 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
428 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
429 $partial ? ($aa = '') : ($aa = 'X');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
430 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
431 return $aa;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
432 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
433
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
434 =head2 translate_strict
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
435
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
436 Title : translate_strict
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
437 Usage : $obj->translate_strict('ACT')
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
438 Function: returns one letter amino acid code for a codon input
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
439
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
440 Fast and simple translation. User is responsible to resolve
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
441 ambiguous nucleotide codes before calling this
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
442 method. Returns 'X' for unknown codons and an empty string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
443 for input strings that are not three characters long.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
444
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
445 It is not recommended to use this method in a production
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
446 environment. Use method translate, instead.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
447
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
448 Example :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
449 Returns : A string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
450 Args : a codon = a three nucleotide character string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
451
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
452
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
453 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
454
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
455 sub translate_strict{
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
456 my ($self, $value) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
457 my ($id) = $self->{'id'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
458
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
459 $value = lc $value;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
460 $value =~ tr/u/t/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
461
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
462 if (length $value != 3 ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
463 return '';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
464 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
465 elsif (!(defined $CODONS->{$value})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
466 return 'X';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
467 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
468 else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
469 return substr($TABLES[$id-1],$CODONS->{$value},1);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
470 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
471 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
472
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
473 =head2 revtranslate
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
474
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
475 Title : revtranslate
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
476 Usage : $obj->revtranslate('G')
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
477 Function: returns codons for an amino acid
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
478
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
479 Returns an empty string for unknown amino acid
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
480 codes. Ambiquous IUPAC codes Asx,B, (Asp,D; Asn,N) and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
481 Glx,Z (Glu,E; Gln,Q) are resolved. Both single and three
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
482 letter amino acid codes are accepted. '*' and 'Ter' are
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
483 used for terminator.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
484
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
485 By default, the output codons are shown in DNA. If the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
486 output is needed in RNA (tr/t/u/), add a second argument
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
487 'RNA'.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
488
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
489 Example : $obj->revtranslate('Gly', 'RNA')
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
490 Returns : An array of three lower case letter strings i.e. codons
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
491 Args : amino acid, 'RNA'
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
492
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
493 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
494
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
495 sub revtranslate {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
496 my ($self, $value, $coding) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
497 my ($id) = $self->{'id'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
498 my (@aas, $p);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
499 my (@codons) = ();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
500
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
501 if (length($value) == 3 ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
502 $value = lc $value;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
503 $value = ucfirst $value;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
504 $value = $THREELETTERSYMBOLS{$value};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
505 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
506 if ( defined $value and $value =~ /$VALID_PROTEIN/
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
507 and length($value) == 1 ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
508 $value = uc $value;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
509 @aas = @{$IUPAC_AA{$value}};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
510 foreach my $aa (@aas) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
511 #print $aa, " -2\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
512 $aa = '\*' if $aa eq '*';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
513 while ($TABLES[$id-1] =~ m/$aa/g) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
514 $p = pos $TABLES[$id-1];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
515 push (@codons, $TRCOL->{--$p});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
516 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
517 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
518 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
519
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
520 if ($coding and uc ($coding) eq 'RNA') {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
521 for my $i (0..$#codons) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
522 $codons[$i] =~ tr/t/u/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
523 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
524 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
525
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
526 return @codons;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
527 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
528
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
529 =head2 is_start_codon
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
530
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
531 Title : is_start_codon
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
532 Usage : $obj->is_start_codon('ATG')
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
533 Function: returns true (1) for all codons that can be used as a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
534 translation start, false (0) for others.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
535 Example : $myCodonTable->is_start_codon('ATG')
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
536 Returns : boolean
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
537 Args : codon
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
538
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
539
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
540 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
541
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
542 sub is_start_codon{
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
543 my ($self, $value) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
544 my ($id) = $self->{'id'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
545
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
546 $value = lc $value;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
547 $value =~ tr/u/t/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
548
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
549 if (length $value != 3 ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
550 return 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
551 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
552 else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
553 my $result = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
554 my @ms = map { substr($STARTS[$id-1],$CODONS->{$_},1) } _unambiquous_codons($value);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
555 foreach my $c (@ms) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
556 $result = 0 if $c ne 'M';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
557 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
558 return $result;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
559 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
560 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
561
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
562
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
563
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
564 =head2 is_ter_codon
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
565
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
566 Title : is_ter_codon
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
567 Usage : $obj->is_ter_codon('GAA')
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
568 Function: returns true (1) for all codons that can be used as a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
569 translation tarminator, false (0) for others.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
570 Example : $myCodonTable->is_ter_codon('ATG')
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
571 Returns : boolean
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
572 Args : codon
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
573
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
574
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
575 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
576
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
577 sub is_ter_codon{
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
578 my ($self, $value) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
579 my ($id) = $self->{'id'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
580
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
581 $value = lc $value;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
582 $value =~ tr/u/t/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
583
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
584 if (length $value != 3 ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
585 return 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
586 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
587 else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
588 my $result = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
589 my @ms = map { substr($TABLES[$id-1],$CODONS->{$_},1) } _unambiquous_codons($value);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
590 foreach my $c (@ms) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
591 $result = 0 if $c ne $TERMINATOR;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
592 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
593 return $result;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
594 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
595 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
596
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
597 =head2 is_unknown_codon
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
598
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
599 Title : is_unknown_codon
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
600 Usage : $obj->is_unknown_codon('GAJ')
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
601 Function: returns false (0) for all codons that are valid,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
602 true (1) for others.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
603 Example : $myCodonTable->is_unknown_codon('NTG')
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
604 Returns : boolean
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
605 Args : codon
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
606
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
607
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
608 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
609
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
610 sub is_unknown_codon{
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
611 my ($self, $value) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
612 my ($id) = $self->{'id'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
613
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
614 $value = lc $value;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
615 $value =~ tr/u/t/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
616
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
617 if (length $value != 3 ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
618 return 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
619 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
620 else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
621 my $result = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
622 my @cs = map { substr($TABLES[$id-1],$CODONS->{$_},1) } _unambiquous_codons($value);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
623 $result = 1 if scalar @cs == 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
624 return $result;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
625 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
626 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
627
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
628 =head2 _unambiquous_codons
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
629
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
630 Title : _unambiquous_codons
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
631 Usage : @codons = _unambiquous_codons('ACN')
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
632 Function:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
633 Example :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
634 Returns : array of strings (one letter unambiguous amino acid codes)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
635 Args : a codon = a three IUPAC nucleotide character string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
636
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
637 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
638
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
639 sub _unambiquous_codons{
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
640 my ($value) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
641 my @nts = ();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
642 my @codons = ();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
643 my ($i, $j, $k);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
644 @nts = map { $IUPAC_DNA{uc $_} } split(//, $value);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
645 for my $i (@{$nts[0]}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
646 for my $j (@{$nts[1]}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
647 for my $k (@{$nts[2]}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
648 push @codons, lc "$i$j$k";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
649 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
650 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
651 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
652 return @codons;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
653 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
654
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
655 1;