0
|
1 # $Id: Translation.pm,v 1.12 2002/09/25 08:57:52 heikki Exp $
|
|
2 #
|
|
3 # bioperl module for Bio::LiveSeq::Translation
|
|
4 #
|
|
5 # Cared for by Joseph Insana <insana@ebi.ac.uk> <jinsana@gmx.net>
|
|
6 #
|
|
7 # Copyright Joseph Insana
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10 #
|
|
11 # POD documentation - main docs before the code
|
|
12
|
|
13 =head1 NAME
|
|
14
|
|
15 Bio::LiveSeq::Translation - Translation class for LiveSeq
|
|
16
|
|
17 =head1 SYNOPSIS
|
|
18
|
|
19 #documentation needed
|
|
20
|
|
21 =head1 DESCRIPTION
|
|
22
|
|
23 This stores informations about aminoacids translations of transcripts.
|
|
24 The implementation is that a Translation object is the translation of
|
|
25 a Transcript object, with different possibilities of manipulation,
|
|
26 different coordinate system and eventually its own ranges (protein domains).
|
|
27
|
|
28 =head1 AUTHOR - Joseph A.L. Insana
|
|
29
|
|
30 Email: Insana@ebi.ac.uk, jinsana@gmx.net
|
|
31
|
|
32 Address:
|
|
33
|
|
34 EMBL Outstation, European Bioinformatics Institute
|
|
35 Wellcome Trust Genome Campus, Hinxton
|
|
36 Cambs. CB10 1SD, United Kingdom
|
|
37
|
|
38 =head1 APPENDIX
|
|
39
|
|
40 The rest of the documentation details each of the object
|
|
41 methods. Internal methods are usually preceded with a _
|
|
42
|
|
43 =cut
|
|
44
|
|
45 # Let the code begin...
|
|
46
|
|
47 package Bio::LiveSeq::Translation;
|
|
48 $VERSION=1.8;
|
|
49
|
|
50 # Version history:
|
|
51 # Thu Mar 23 14:41:52 GMT 2000 v.1.0 begun
|
|
52 # Sat Mar 25 04:08:59 GMT 2000 v 1.2 valid(), label(), position()
|
|
53 # Tue Mar 28 03:37:17 BST 2000 v 1.3 added inheritance from Transcript, subseq relies on it!
|
|
54 # Fri Mar 31 16:53:53 BST 2000 v 1.4 new seq() function that checks for stop codons: it now returns only up to the stop but doesn't continue if stop not found
|
|
55 # Fri Mar 31 18:45:07 BST 2000 v 1.41 now it asks for Transcript->downstream_seq
|
|
56 # Fri Mar 31 19:20:04 BST 2000 v 1.49 seq() now works correctly
|
|
57 # Thu Apr 13 00:10:29 BST 2000 v 1.5 start and end now take the information from Transcript
|
|
58 # Thu Apr 27 16:18:55 BST 2000 v 1.6 translation_table info added
|
|
59 # Thu May 11 17:30:41 BST 2000 v 1.66 position method updated so to return a position also for labels not in frame (not at 1st triplet position)
|
|
60 # Mon May 22 14:59:14 BST 2000 v 1.7 labelsubseq added
|
|
61 # Mon May 22 15:22:12 BST 2000 v 1.71 labelsubseq tweaked for cases where startlabel==endlabel (no useless follow() query!)
|
|
62 # Mon May 22 15:28:49 BST 2000 v 1.74 modified seq() so that the "*" is printed
|
|
63 # Wed Jun 7 04:02:18 BST 2000 v 1.75 added offset()
|
|
64 # Thu Jun 29 15:10:22 BST 2000 v 1.76 bug corrected for elongation mutations, if stop codon is not found downstream
|
|
65 # Wed Mar 28 16:37:37 BST 2001 v 1.8 carp -> warn,throw (coded methods in SeqI)
|
|
66
|
|
67 use strict;
|
|
68 #use Carp qw(croak carp cluck);
|
|
69 use vars qw($VERSION @ISA);
|
|
70 use Bio::LiveSeq::SeqI 3.2; # uses SeqI, inherits from it
|
|
71 use Bio::PrimarySeq;
|
|
72 @ISA=qw(Bio::LiveSeq::Transcript );
|
|
73
|
|
74
|
|
75 =head2 new
|
|
76
|
|
77 Title : new
|
|
78 Usage : $protein = Bio::LiveSeq::Translation->new(-transcript => $transcr);
|
|
79
|
|
80 Function: generates a new Bio::LiveSeq::Translation
|
|
81 Returns : reference to a new object of class Translation
|
|
82 Errorcode -1
|
|
83 Args : reference to an object of class Transcript
|
|
84
|
|
85 =cut
|
|
86
|
|
87 sub new {
|
|
88 my ($thing, %args) = @_;
|
|
89 my $class = ref($thing) || $thing;
|
|
90 my ($obj,%translation);
|
|
91
|
|
92 my $transcript=$args{-transcript};
|
|
93
|
|
94 $obj = \%translation;
|
|
95 $obj = bless $obj, $class;
|
|
96
|
|
97 unless ($transcript) {
|
|
98 $obj->throw("$class not initialised because no -transcript given");
|
|
99 }
|
|
100 unless (ref($transcript) eq "Bio::LiveSeq::Transcript") {
|
|
101 $obj->throw("$class not initialised because no object of class Transcript given");
|
|
102 }
|
|
103
|
|
104 #my $startbase = $transcript->start;
|
|
105 #my $endbase = $transcript->end;
|
|
106 my $strand = $transcript->strand;
|
|
107 my $seq = $transcript->{'seq'};
|
|
108
|
|
109 $obj->{'strand'}=$strand;
|
|
110 $obj->{'seq'}=$seq;
|
|
111 $obj->{'transcript'}=$transcript;
|
|
112 $obj->{'alphabet'}="protein";
|
|
113
|
|
114 $transcript->{'translation'}=$obj;# set the Translation ref into its Transcript
|
|
115 return $obj;
|
|
116 }
|
|
117
|
|
118 =head2 get_Transcript
|
|
119
|
|
120 Title : valid
|
|
121 Usage : $transcript = $obj->get_Transcript()
|
|
122 Function: retrieves the reference to the object of class Transcript (if any)
|
|
123 attached to a LiveSeq object
|
|
124 Returns : object reference
|
|
125 Args : none
|
|
126
|
|
127 =cut
|
|
128
|
|
129 sub get_Transcript {
|
|
130 my $self=shift;
|
|
131 return ($self->{'transcript'});
|
|
132 }
|
|
133
|
|
134 # These get redefined here, overriding the SeqI ones
|
|
135
|
|
136 sub change {
|
|
137 my ($self)=@_;
|
|
138 $self->warn("Cannot change a Translation object!\nChanges have to be issued at the nucleotide level!");
|
|
139 return (-1);
|
|
140 }
|
|
141 sub positionchange {
|
|
142 my ($self)=@_;
|
|
143 $self->warn("Cannot change a Translation object!\nChanges have to be issued at the nucleotide level!");
|
|
144 return (-1);
|
|
145 }
|
|
146 sub labelchange {
|
|
147 my ($self)=@_;
|
|
148 $self->warn("Cannot change a Translation object!\nChanges have to be issued at the nucleotide level!");
|
|
149 return (-1);
|
|
150 }
|
|
151
|
|
152 # this just returns the translation of the transcript, without checking for
|
|
153 # stop codons
|
|
154 sub transl_seq {
|
|
155 my $self=shift;
|
|
156 my $transcript=$self->get_Transcript;
|
|
157 my $translation=$transcript->translate(undef, undef, undef,
|
|
158 $self->translation_table)->seq;
|
|
159 return $translation;
|
|
160 }
|
|
161
|
|
162 # version 1.74 -> now the "*" is printed
|
|
163 sub seq {
|
|
164 my $self=shift;
|
|
165 my $proteinseq;
|
|
166 my $transcript=$self->get_Transcript;
|
|
167 my $translation=$transcript->translate(undef, undef, undef,
|
|
168 $self->translation_table)->seq;
|
|
169 my $stop_pos=index($translation,"*");
|
|
170 if ($stop_pos == -1) { # no stop present, continue downstream
|
|
171 my $downstreamseq=$transcript->downstream_seq();
|
|
172 #carp "the downstream is: $downstreamseq"; # debug
|
|
173 my $cdnaseq=$transcript->seq();
|
|
174 my $extendedseq = new Bio::PrimarySeq(-seq => "$cdnaseq$downstreamseq",
|
|
175 -alphabet => 'dna'
|
|
176 );
|
|
177
|
|
178 $translation=$extendedseq->translate(undef, undef, undef,
|
|
179 $self->translation_table)->seq;
|
|
180 #carp "the new translation is: $translation"; # debug
|
|
181 $stop_pos=index($translation,"*");
|
|
182 if ($stop_pos == -1) { # still no stop present, return warning
|
|
183 $self->warn("Warning: no stop codon found in the retrieved sequence downstream of Transcript ",1);
|
|
184 undef $stop_pos;
|
|
185 $proteinseq=$translation;
|
|
186 } else {
|
|
187 $proteinseq=substr($translation,0,$stop_pos+1);
|
|
188 #carp "the new stopped translation is: $proteinseq, because the stop is at position $stop_pos"; # debug
|
|
189 }
|
|
190 } else {
|
|
191 $proteinseq=substr($translation,0,$stop_pos+1);
|
|
192 }
|
|
193 return $proteinseq;
|
|
194 }
|
|
195
|
|
196 sub length {
|
|
197 my $self=shift;
|
|
198 my $seq=$self->seq;
|
|
199 my $length=length($seq);
|
|
200 return $length;
|
|
201 }
|
|
202
|
|
203 sub all_labels {
|
|
204 my $self=shift;
|
|
205 return $self->get_Transcript->all_labels;
|
|
206 }
|
|
207
|
|
208 # counts in triplet. Only a label matching the beginning of a triplet coding
|
|
209 # for an aminoacid is considered valid when setting coordinate_start
|
|
210 # (i.e. only in frame!)
|
|
211 sub valid {
|
|
212 my ($self,$label)=@_;
|
|
213 my $i;
|
|
214 my @labels=$self->get_Transcript->all_labels;
|
|
215 my $length=$#labels;
|
|
216 while ($i <= $length) {
|
|
217 if ($label == $labels[$i]) {
|
|
218 return (1); # found
|
|
219 }
|
|
220 $i=$i+3;
|
|
221 }
|
|
222 return (0); # not found
|
|
223 }
|
|
224
|
|
225 # returns the label to the first nucleotide of the triplet coding for $position aminoacid
|
|
226 sub label {
|
|
227 my ($self,$position)=@_;
|
|
228 my $firstlabel=$self->coordinate_start; # this is in_frame checked
|
|
229 if ($position > 0) {
|
|
230 $position=$position*3-2;
|
|
231 } else { # if position = 0 this will be caught by Transcript, error thrown
|
|
232 $position=$position*3;
|
|
233 }
|
|
234 return $self->get_Transcript->label($position,$firstlabel);
|
|
235 # check for coord_start different
|
|
236 }
|
|
237
|
|
238 # returns position (aminoacids numbering) of a particular label
|
|
239 # used to return 0 for not in frame labels
|
|
240 # now returns the position anyway (after version 1.66)
|
|
241 sub position {
|
|
242 my ($self,$label)=@_;
|
|
243 my $firstlabel=$self->coordinate_start; # this is in_frame checked
|
|
244 my $position=$self->get_Transcript->position($label,$firstlabel);
|
|
245 use integer;
|
|
246 my $modulus=$position % 3;
|
|
247 if ($position == 0) {
|
|
248 return (0);
|
|
249 } elsif ($position > 0) {
|
|
250 if ($modulus != 1) {
|
|
251 $self->warn("Attention! Label $label is not in frame ".
|
|
252 "(1st position of triplet) with protein",1) if $self->verbose > 0; # ignorable
|
|
253 if ($modulus == 2) {
|
|
254 return ($position / 3 + 1);
|
|
255 } else { # i.e. modulus == 0
|
|
256 return ($position / 3);
|
|
257 }
|
|
258 }
|
|
259 return ($position / 3 + 1);
|
|
260 } else { # pos < 0
|
|
261 if ($modulus != 0) {
|
|
262 $self->warn("Attention! Label $label is not in frame ".
|
|
263 "(1st position of triplet) with protein",1) if $self->verbose > 0; # ignorable
|
|
264 return ($position / 3 - 1); # ok for both other positions
|
|
265 }
|
|
266 return ($position / 3);
|
|
267 }
|
|
268 $self->throw( "WEIRD: execution shouldn't have reached here");
|
|
269 return (0); # this should never happen, but just in case
|
|
270 }
|
|
271
|
|
272 # note: it inherits subseq and labelsubseq from Transcript!
|
|
273
|
|
274 sub start {
|
|
275 my $self=shift;
|
|
276 return ($self->{'transcript'}->start);
|
|
277 }
|
|
278
|
|
279 sub end {
|
|
280 my $self=shift;
|
|
281 return ($self->{'transcript'}->end);
|
|
282 }
|
|
283
|
|
284 =head2 aa_ranges
|
|
285
|
|
286 Title : aa_ranges
|
|
287 Usage : @proteinfeatures = $translation->aa_ranges()
|
|
288 Function: to retrieve all the LiveSeq AARange objects attached to a
|
|
289 Translation, usually created out of a SwissProt database entry
|
|
290 crossreferenced from an EMBL CDS feature.
|
|
291 Returns : an array
|
|
292 Args : none
|
|
293
|
|
294 =cut
|
|
295
|
|
296 # returns an array of obj_ref of AARange objects attached to the Translation
|
|
297 sub aa_ranges {
|
|
298 my $self=shift;
|
|
299 return ($self->{'aa_ranges'});
|
|
300 }
|
|
301
|
|
302 sub translation_table {
|
|
303 my $self=shift;
|
|
304 $self->get_Transcript->translation_table(@_);
|
|
305 }
|
|
306
|
|
307 # returns all aminoacids "affected" i.e. all aminoacids coded by any codon
|
|
308 # "touched" by the range selected between the labels, even if only partially.
|
|
309
|
|
310 # it's not optimized for performance but it's useful
|
|
311
|
|
312 sub labelsubseq {
|
|
313 my ($self,$start,$length,$end)=@_;
|
|
314 my ($pos1,$pos2);
|
|
315 my $transcript=$self->get_Transcript;
|
|
316 if ($start) {
|
|
317 unless ($transcript->valid($start)) {
|
|
318 $self->warn("Start label not valid"); return (-1);
|
|
319 }
|
|
320 $pos1=$self->position($start);
|
|
321 }
|
|
322 if ($end) {
|
|
323 if ($end == $start) {
|
|
324 $length=1;
|
|
325 } else {
|
|
326 unless ($transcript->valid($end)) {
|
|
327 $self->warn("End label not valid"); return (-1);
|
|
328 }
|
|
329 unless ($transcript->follows($start,$end) == 1) {
|
|
330 $self->warn("End label does not follow Start label!"); return (-1);
|
|
331 }
|
|
332 $pos2=$self->position($end);
|
|
333 $length=$pos2-$pos1+1;
|
|
334 }
|
|
335 }
|
|
336 my $sequence=$self->seq;
|
|
337 return (substr($sequence,$pos1-1,$length));
|
|
338 }
|
|
339
|
|
340 # return the offset in aminoacids from LiveSeq protein sequence and SwissProt
|
|
341 # sequence (usually as a result of an INIT_MET or a gap)
|
|
342 sub offset {
|
|
343 my $self=shift;
|
|
344 return ($self->{'offset'});
|
|
345 }
|
|
346
|
|
347 1;
|