annotate variant_effect_predictor/Bio/SeqUtils.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 # $Id: SeqUtils.pm,v 1.11.2.1 2003/08/11 20:11:17 jason Exp $
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 # BioPerl module for Bio::SeqUtils
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5 # Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 # Copyright Heikki Lehvaslaiho
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 # You may distribute this module under the same terms as perl itself
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 # POD documentation - main docs before the code
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15 Bio::SeqUtils - Additional methods for PrimarySeq objects
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 =head1 SYNOPSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 use Bio::SeqUtils;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20 # get a Bio::PrimarySeqI compliant object, $seq, somehow
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 $util = new Bio::SeqUtils;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22 $polypeptide_3char = $util->seq3($seq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 # or
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24 $polypeptide_3char = Bio::SeqUtils->seq3($seq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26 # set the sequence string (stored in one char code in the object)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 Bio::SeqUtils->seq3($seq, $polypeptide_3char);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 # translate a sequence in all six frames
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30 @seqs = Bio::SeqUtils->translate_6frames($seq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 This class is a holder of methods that work on Bio::PrimarySeqI-
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35 compliant sequence objects, e.g. Bio::PrimarySeq and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 Bio::Seq. These methods are not part of the Bio::PrimarySeqI
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 interface and should in general not be essential to the primary function
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38 of sequence objects. If you are thinking of adding essential
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39 functions, it might be better to create your own sequence class.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40 See L<Bio::PrimarySeqI>, L<Bio::PrimarySeq>, and L<Bio::Seq> for more.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 The methods take as their first argument a sequence object. It is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43 possible to use methods without first creating a SeqUtils object,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44 i.e. use it as an anonymous hash.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 The first two methods, seq3() and seq3in(), give out or read in protein
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 sequences coded in three letter IUPAC amino acid codes.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 The next two methods, translate_3frames() and translate_6frames(), wrap
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 around the standard translate method to give back an array of three
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 forward or all six frame translations.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 =head1 FEEDBACK
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55 =head2 Mailing Lists
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 User feedback is an integral part of the evolution of this and other
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 Bioperl modules. Send your comments and suggestions preferably to one
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 of the Bioperl mailing lists. Your participation is much appreciated.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61 bioperl-l@bioperl.org - General discussion
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 http://bio.perl.org/MailList.html - About the mailing lists
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 =head2 Reporting Bugs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 Report bugs to the Bioperl bug tracking system to help us keep track
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 the bugs and their resolution. Bug reports can be submitted via email
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 or the web:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 bioperl-bugs@bio.perl.org
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 http://bugzilla.bioperl.org/
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 =head1 AUTHOR - Heikki Lehvaslaiho
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 Email: heikki@ebi.ac.uk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 Address:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 EMBL Outstation, European Bioinformatics Institute
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 Wellcome Trust Genome Campus, Hinxton
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 Cambs. CB10 1SD, United Kingdom
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 =head1 APPENDIX
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 The rest of the documentation details each of the object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 methods. Internal methods are usually preceded with a _
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 # Let the code begin...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 package Bio::SeqUtils;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 use vars qw(@ISA %ONECODE %THREECODE);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 use Carp;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 @ISA = qw(Bio::Root::Root);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 # new inherited from RootI
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 BEGIN {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 %ONECODE =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 ('Ala' => 'A', 'Asx' => 'B', 'Cys' => 'C', 'Asp' => 'D',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 'Glu' => 'E', 'Phe' => 'F', 'Gly' => 'G', 'His' => 'H',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 'Ile' => 'I', 'Lys' => 'K', 'Leu' => 'L', 'Met' => 'M',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 'Asn' => 'N', 'Pro' => 'P', 'Gln' => 'Q', 'Arg' => 'R',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 'Ser' => 'S', 'Thr' => 'T', 'Val' => 'V', 'Trp' => 'W',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 'Xaa' => 'X', 'Tyr' => 'Y', 'Glx' => 'Z', 'Ter' => '*',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 'Sec' => 'U'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 %THREECODE =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 ('A' => 'Ala', 'B' => 'Asx', 'C' => 'Cys', 'D' => 'Asp',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 'E' => 'Glu', 'F' => 'Phe', 'G' => 'Gly', 'H' => 'His',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 'I' => 'Ile', 'K' => 'Lys', 'L' => 'Leu', 'M' => 'Met',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 'N' => 'Asn', 'P' => 'Pro', 'Q' => 'Gln', 'R' => 'Arg',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118 'S' => 'Ser', 'T' => 'Thr', 'V' => 'Val', 'W' => 'Trp',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 'Y' => 'Tyr', 'Z' => 'Glx', 'X' => 'Xaa', '*' => 'Ter',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 'U' => 'Sec'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 =head2 seq3
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126 Title : seq3
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 Usage : $string = Bio::SeqUtils->seq3($seq)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 Function:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 Read only method that returns the amino acid sequence as a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 string of three letter codes. alphabet has to be
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 'protein'. Output follows the IUPAC standard plus 'Ter' for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133 terminator. Any unknown character, including the default
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 unknown character 'X', is changed into 'Xaa'. A noncoded
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135 aminoacid selenocystein is recognized (Sec, U).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 Returns : A scalar
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 Args : character used for stop in the protein sequence optional,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139 defaults to '*' string used to separate the output amino
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140 acid codes, optional, defaults to ''
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144 sub seq3 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 my ($self, $seq, $stop, $sep ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 $seq->isa('Bio::PrimarySeqI') ||
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 $self->throw('Not a Bio::PrimarySeqI object but [$self]');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 $seq->alphabet eq 'protein' ||
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 $self->throw('Not a protein sequence');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152 if (defined $stop) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 length $stop != 1 and $self->throw('One character stop needed, not [$stop]');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154 $THREECODE{$stop} = "Ter";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 $sep ||= '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 my $aa3s;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 foreach my $aa (split //, uc $seq->seq) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 $THREECODE{$aa} and $aa3s .= $THREECODE{$aa}. $sep, next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 $aa3s .= 'Xaa'. $sep;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163 $sep and substr($aa3s, -(length $sep), length $sep) = '' ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164 return $aa3s;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 =head2 seq3in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 Title : seq3in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170 Usage : $string = Bio::SeqUtils->seq3in($seq, 'MetGlyTer')
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 Function:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173 Method for in-place changing of the sequence of a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174 Bio::PrimarySeqI sequence object. The three letter amino
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175 acid input string is converted into one letter code. Any
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176 unknown character triplet, including the default 'Xaa', is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177 converted into 'X'.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 Returns : Bio::PrimarySeq object;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 Args : character to be used for stop in the protein seqence,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 optional, defaults to '*'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 character to be used for unknown in the protein seqence,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183 optional, defaults to 'X'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187 sub seq3in {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188 my ($self, $seq, $string, $stop, $unknown) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190 $seq->isa('Bio::PrimarySeqI') ||
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191 $self->throw('Not a Bio::PrimarySeqI object but [$self]');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 $seq->alphabet eq 'protein' ||
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193 $self->throw('Not a protein sequence');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195 if (defined $stop) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196 length $stop != 1 and $self->throw('One character stop needed, not [$stop]');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197 $ONECODE{'Ter'} = $stop;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199 if (defined $unknown) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200 length $unknown != 1 and $self->throw('One character stop needed, not [$unknown]');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201 $ONECODE{'Xaa'} = $unknown;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 my ($aas, $aa3);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205 my $length = (length $string) - 2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206 for (my $i = 0 ; $i < $length ; $i += 3) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207 $aa3 = substr($string, $i, 3);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208 $ONECODE{$aa3} and $aas .= $ONECODE{$aa3}, next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209 $aas .= 'X';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211 $seq->seq($aas);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 return $seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215 =head2 translate_3frames
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217 Title : translate_3frames
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 Usage : @prots = Bio::SeqUtils->translate_3frames($seq)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219 Function: Translate a nucleotide sequence in three forward frames.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220 The IDs of the sequences are appended with '-0F', '-1F', '-2F'.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221 Returns : An array of seq objects
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222 Args : sequence object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223 same arguments as to Bio::PrimarySeqI::translate
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227 sub translate_3frames {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228 my ($self, $seq, @args ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230 $self->throw('Object [$seq] '. 'of class ['. ref($seq). '] can not be translated.')
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231 unless $seq->can('translate');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233 my ($stop, $unknown, $frame, $tableid, $fullCDS, $throw) = @args;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234 my @seqs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235 my $f = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236 while ($f != 3) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237 my $translation = $seq->translate($stop, $unknown,$f,$tableid, $fullCDS, $throw );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238 $translation->id($seq->id. "-". $f. "F");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239 push @seqs, $translation;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240 $f++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243 return @seqs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246 =head2 translate_6frames
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248 Title : translate_6frames
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249 Usage : @prots = Bio::SeqUtils->translate_6frames($seq)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250 Function: translate a nucleotide sequence in all six frames
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251 The IDs of the sequences are appended with '-0F', '-1F', '-2F',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252 '-0R', '-1R', '-2R'.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253 Returns : An array of seq objects
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254 Args : sequence object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255 same arguments as to Bio::PrimarySeqI::translate
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259 sub translate_6frames {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260 my ($self, $seq, @args ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262 my @seqs = $self->translate_3frames($seq, @args);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263 $seq->seq($seq->revcom->seq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264 my @seqs2 = $self->translate_3frames($seq, @args);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265 foreach my $seq2 (@seqs2) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266 my ($tmp) = $seq2->id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267 $tmp =~ s/F$/R/g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268 $seq2->id($tmp);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270 return @seqs, @seqs2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274 =head2 valid_aa
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276 Title : valid_aa
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277 Usage : my @aa = $table->valid_aa
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278 Function: Retrieves a list of the valid amino acid codes.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279 The list is ordered so that first 21 codes are for unique
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280 amino acids. The rest are ['B', 'Z', 'X', '*'].
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281 Returns : array of all the valid amino acid codes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282 Args : [optional] $code => [0 -> return list of 1 letter aa codes,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283 1 -> return list of 3 letter aa codes,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284 2 -> return associative array of both ]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288 sub valid_aa{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289 my ($self,$code) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291 if( ! $code ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292 my @codes;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293 foreach my $c ( sort values %ONECODE ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294 push @codes, $c unless ( $c =~ /[BZX\*]/ );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296 push @codes, qw(B Z X *); # so they are in correct order ?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297 return @codes;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299 elsif( $code == 1 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300 my @codes;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301 foreach my $c ( sort keys %ONECODE ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302 push @codes, $c unless ( $c =~ /(Asx|Glx|Xaa|Ter)/ );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304 push @codes, ('Asx', 'Glx', 'Xaa', 'Ter' );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305 return @codes;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
306 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
307 elsif( $code == 2 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
308 my %codes = %ONECODE;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
309 foreach my $c ( keys %ONECODE ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
310 my $aa = $ONECODE{$c};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
311 $codes{$aa} = $c;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
312 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
313 return %codes;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
314 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
315 $self->warn("unrecognized code in ".ref($self)." method valid_aa()");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
316 return ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
317 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
318 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
319
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
320 1;