comparison variant_effect_predictor/Bio/LiveSeq/Translation.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: Translation.pm,v 1.12 2002/09/25 08:57:52 heikki Exp $
2 #
3 # bioperl module for Bio::LiveSeq::Translation
4 #
5 # Cared for by Joseph Insana <insana@ebi.ac.uk> <jinsana@gmx.net>
6 #
7 # Copyright Joseph Insana
8 #
9 # You may distribute this module under the same terms as perl itself
10 #
11 # POD documentation - main docs before the code
12
13 =head1 NAME
14
15 Bio::LiveSeq::Translation - Translation class for LiveSeq
16
17 =head1 SYNOPSIS
18
19 #documentation needed
20
21 =head1 DESCRIPTION
22
23 This stores informations about aminoacids translations of transcripts.
24 The implementation is that a Translation object is the translation of
25 a Transcript object, with different possibilities of manipulation,
26 different coordinate system and eventually its own ranges (protein domains).
27
28 =head1 AUTHOR - Joseph A.L. Insana
29
30 Email: Insana@ebi.ac.uk, jinsana@gmx.net
31
32 Address:
33
34 EMBL Outstation, European Bioinformatics Institute
35 Wellcome Trust Genome Campus, Hinxton
36 Cambs. CB10 1SD, United Kingdom
37
38 =head1 APPENDIX
39
40 The rest of the documentation details each of the object
41 methods. Internal methods are usually preceded with a _
42
43 =cut
44
45 # Let the code begin...
46
47 package Bio::LiveSeq::Translation;
48 $VERSION=1.8;
49
50 # Version history:
51 # Thu Mar 23 14:41:52 GMT 2000 v.1.0 begun
52 # Sat Mar 25 04:08:59 GMT 2000 v 1.2 valid(), label(), position()
53 # Tue Mar 28 03:37:17 BST 2000 v 1.3 added inheritance from Transcript, subseq relies on it!
54 # Fri Mar 31 16:53:53 BST 2000 v 1.4 new seq() function that checks for stop codons: it now returns only up to the stop but doesn't continue if stop not found
55 # Fri Mar 31 18:45:07 BST 2000 v 1.41 now it asks for Transcript->downstream_seq
56 # Fri Mar 31 19:20:04 BST 2000 v 1.49 seq() now works correctly
57 # Thu Apr 13 00:10:29 BST 2000 v 1.5 start and end now take the information from Transcript
58 # Thu Apr 27 16:18:55 BST 2000 v 1.6 translation_table info added
59 # Thu May 11 17:30:41 BST 2000 v 1.66 position method updated so to return a position also for labels not in frame (not at 1st triplet position)
60 # Mon May 22 14:59:14 BST 2000 v 1.7 labelsubseq added
61 # Mon May 22 15:22:12 BST 2000 v 1.71 labelsubseq tweaked for cases where startlabel==endlabel (no useless follow() query!)
62 # Mon May 22 15:28:49 BST 2000 v 1.74 modified seq() so that the "*" is printed
63 # Wed Jun 7 04:02:18 BST 2000 v 1.75 added offset()
64 # Thu Jun 29 15:10:22 BST 2000 v 1.76 bug corrected for elongation mutations, if stop codon is not found downstream
65 # Wed Mar 28 16:37:37 BST 2001 v 1.8 carp -> warn,throw (coded methods in SeqI)
66
67 use strict;
68 #use Carp qw(croak carp cluck);
69 use vars qw($VERSION @ISA);
70 use Bio::LiveSeq::SeqI 3.2; # uses SeqI, inherits from it
71 use Bio::PrimarySeq;
72 @ISA=qw(Bio::LiveSeq::Transcript );
73
74
75 =head2 new
76
77 Title : new
78 Usage : $protein = Bio::LiveSeq::Translation->new(-transcript => $transcr);
79
80 Function: generates a new Bio::LiveSeq::Translation
81 Returns : reference to a new object of class Translation
82 Errorcode -1
83 Args : reference to an object of class Transcript
84
85 =cut
86
87 sub new {
88 my ($thing, %args) = @_;
89 my $class = ref($thing) || $thing;
90 my ($obj,%translation);
91
92 my $transcript=$args{-transcript};
93
94 $obj = \%translation;
95 $obj = bless $obj, $class;
96
97 unless ($transcript) {
98 $obj->throw("$class not initialised because no -transcript given");
99 }
100 unless (ref($transcript) eq "Bio::LiveSeq::Transcript") {
101 $obj->throw("$class not initialised because no object of class Transcript given");
102 }
103
104 #my $startbase = $transcript->start;
105 #my $endbase = $transcript->end;
106 my $strand = $transcript->strand;
107 my $seq = $transcript->{'seq'};
108
109 $obj->{'strand'}=$strand;
110 $obj->{'seq'}=$seq;
111 $obj->{'transcript'}=$transcript;
112 $obj->{'alphabet'}="protein";
113
114 $transcript->{'translation'}=$obj;# set the Translation ref into its Transcript
115 return $obj;
116 }
117
118 =head2 get_Transcript
119
120 Title : valid
121 Usage : $transcript = $obj->get_Transcript()
122 Function: retrieves the reference to the object of class Transcript (if any)
123 attached to a LiveSeq object
124 Returns : object reference
125 Args : none
126
127 =cut
128
129 sub get_Transcript {
130 my $self=shift;
131 return ($self->{'transcript'});
132 }
133
134 # These get redefined here, overriding the SeqI ones
135
136 sub change {
137 my ($self)=@_;
138 $self->warn("Cannot change a Translation object!\nChanges have to be issued at the nucleotide level!");
139 return (-1);
140 }
141 sub positionchange {
142 my ($self)=@_;
143 $self->warn("Cannot change a Translation object!\nChanges have to be issued at the nucleotide level!");
144 return (-1);
145 }
146 sub labelchange {
147 my ($self)=@_;
148 $self->warn("Cannot change a Translation object!\nChanges have to be issued at the nucleotide level!");
149 return (-1);
150 }
151
152 # this just returns the translation of the transcript, without checking for
153 # stop codons
154 sub transl_seq {
155 my $self=shift;
156 my $transcript=$self->get_Transcript;
157 my $translation=$transcript->translate(undef, undef, undef,
158 $self->translation_table)->seq;
159 return $translation;
160 }
161
162 # version 1.74 -> now the "*" is printed
163 sub seq {
164 my $self=shift;
165 my $proteinseq;
166 my $transcript=$self->get_Transcript;
167 my $translation=$transcript->translate(undef, undef, undef,
168 $self->translation_table)->seq;
169 my $stop_pos=index($translation,"*");
170 if ($stop_pos == -1) { # no stop present, continue downstream
171 my $downstreamseq=$transcript->downstream_seq();
172 #carp "the downstream is: $downstreamseq"; # debug
173 my $cdnaseq=$transcript->seq();
174 my $extendedseq = new Bio::PrimarySeq(-seq => "$cdnaseq$downstreamseq",
175 -alphabet => 'dna'
176 );
177
178 $translation=$extendedseq->translate(undef, undef, undef,
179 $self->translation_table)->seq;
180 #carp "the new translation is: $translation"; # debug
181 $stop_pos=index($translation,"*");
182 if ($stop_pos == -1) { # still no stop present, return warning
183 $self->warn("Warning: no stop codon found in the retrieved sequence downstream of Transcript ",1);
184 undef $stop_pos;
185 $proteinseq=$translation;
186 } else {
187 $proteinseq=substr($translation,0,$stop_pos+1);
188 #carp "the new stopped translation is: $proteinseq, because the stop is at position $stop_pos"; # debug
189 }
190 } else {
191 $proteinseq=substr($translation,0,$stop_pos+1);
192 }
193 return $proteinseq;
194 }
195
196 sub length {
197 my $self=shift;
198 my $seq=$self->seq;
199 my $length=length($seq);
200 return $length;
201 }
202
203 sub all_labels {
204 my $self=shift;
205 return $self->get_Transcript->all_labels;
206 }
207
208 # counts in triplet. Only a label matching the beginning of a triplet coding
209 # for an aminoacid is considered valid when setting coordinate_start
210 # (i.e. only in frame!)
211 sub valid {
212 my ($self,$label)=@_;
213 my $i;
214 my @labels=$self->get_Transcript->all_labels;
215 my $length=$#labels;
216 while ($i <= $length) {
217 if ($label == $labels[$i]) {
218 return (1); # found
219 }
220 $i=$i+3;
221 }
222 return (0); # not found
223 }
224
225 # returns the label to the first nucleotide of the triplet coding for $position aminoacid
226 sub label {
227 my ($self,$position)=@_;
228 my $firstlabel=$self->coordinate_start; # this is in_frame checked
229 if ($position > 0) {
230 $position=$position*3-2;
231 } else { # if position = 0 this will be caught by Transcript, error thrown
232 $position=$position*3;
233 }
234 return $self->get_Transcript->label($position,$firstlabel);
235 # check for coord_start different
236 }
237
238 # returns position (aminoacids numbering) of a particular label
239 # used to return 0 for not in frame labels
240 # now returns the position anyway (after version 1.66)
241 sub position {
242 my ($self,$label)=@_;
243 my $firstlabel=$self->coordinate_start; # this is in_frame checked
244 my $position=$self->get_Transcript->position($label,$firstlabel);
245 use integer;
246 my $modulus=$position % 3;
247 if ($position == 0) {
248 return (0);
249 } elsif ($position > 0) {
250 if ($modulus != 1) {
251 $self->warn("Attention! Label $label is not in frame ".
252 "(1st position of triplet) with protein",1) if $self->verbose > 0; # ignorable
253 if ($modulus == 2) {
254 return ($position / 3 + 1);
255 } else { # i.e. modulus == 0
256 return ($position / 3);
257 }
258 }
259 return ($position / 3 + 1);
260 } else { # pos < 0
261 if ($modulus != 0) {
262 $self->warn("Attention! Label $label is not in frame ".
263 "(1st position of triplet) with protein",1) if $self->verbose > 0; # ignorable
264 return ($position / 3 - 1); # ok for both other positions
265 }
266 return ($position / 3);
267 }
268 $self->throw( "WEIRD: execution shouldn't have reached here");
269 return (0); # this should never happen, but just in case
270 }
271
272 # note: it inherits subseq and labelsubseq from Transcript!
273
274 sub start {
275 my $self=shift;
276 return ($self->{'transcript'}->start);
277 }
278
279 sub end {
280 my $self=shift;
281 return ($self->{'transcript'}->end);
282 }
283
284 =head2 aa_ranges
285
286 Title : aa_ranges
287 Usage : @proteinfeatures = $translation->aa_ranges()
288 Function: to retrieve all the LiveSeq AARange objects attached to a
289 Translation, usually created out of a SwissProt database entry
290 crossreferenced from an EMBL CDS feature.
291 Returns : an array
292 Args : none
293
294 =cut
295
296 # returns an array of obj_ref of AARange objects attached to the Translation
297 sub aa_ranges {
298 my $self=shift;
299 return ($self->{'aa_ranges'});
300 }
301
302 sub translation_table {
303 my $self=shift;
304 $self->get_Transcript->translation_table(@_);
305 }
306
307 # returns all aminoacids "affected" i.e. all aminoacids coded by any codon
308 # "touched" by the range selected between the labels, even if only partially.
309
310 # it's not optimized for performance but it's useful
311
312 sub labelsubseq {
313 my ($self,$start,$length,$end)=@_;
314 my ($pos1,$pos2);
315 my $transcript=$self->get_Transcript;
316 if ($start) {
317 unless ($transcript->valid($start)) {
318 $self->warn("Start label not valid"); return (-1);
319 }
320 $pos1=$self->position($start);
321 }
322 if ($end) {
323 if ($end == $start) {
324 $length=1;
325 } else {
326 unless ($transcript->valid($end)) {
327 $self->warn("End label not valid"); return (-1);
328 }
329 unless ($transcript->follows($start,$end) == 1) {
330 $self->warn("End label does not follow Start label!"); return (-1);
331 }
332 $pos2=$self->position($end);
333 $length=$pos2-$pos1+1;
334 }
335 }
336 my $sequence=$self->seq;
337 return (substr($sequence,$pos1-1,$length));
338 }
339
340 # return the offset in aminoacids from LiveSeq protein sequence and SwissProt
341 # sequence (usually as a result of an INIT_MET or a gap)
342 sub offset {
343 my $self=shift;
344 return ($self->{'offset'});
345 }
346
347 1;