Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/LiveSeq/Translation.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 # $Id: Translation.pm,v 1.12 2002/09/25 08:57:52 heikki Exp $ | |
| 2 # | |
| 3 # bioperl module for Bio::LiveSeq::Translation | |
| 4 # | |
| 5 # Cared for by Joseph Insana <insana@ebi.ac.uk> <jinsana@gmx.net> | |
| 6 # | |
| 7 # Copyright Joseph Insana | |
| 8 # | |
| 9 # You may distribute this module under the same terms as perl itself | |
| 10 # | |
| 11 # POD documentation - main docs before the code | |
| 12 | |
| 13 =head1 NAME | |
| 14 | |
| 15 Bio::LiveSeq::Translation - Translation class for LiveSeq | |
| 16 | |
| 17 =head1 SYNOPSIS | |
| 18 | |
| 19 #documentation needed | |
| 20 | |
| 21 =head1 DESCRIPTION | |
| 22 | |
| 23 This stores informations about aminoacids translations of transcripts. | |
| 24 The implementation is that a Translation object is the translation of | |
| 25 a Transcript object, with different possibilities of manipulation, | |
| 26 different coordinate system and eventually its own ranges (protein domains). | |
| 27 | |
| 28 =head1 AUTHOR - Joseph A.L. Insana | |
| 29 | |
| 30 Email: Insana@ebi.ac.uk, jinsana@gmx.net | |
| 31 | |
| 32 Address: | |
| 33 | |
| 34 EMBL Outstation, European Bioinformatics Institute | |
| 35 Wellcome Trust Genome Campus, Hinxton | |
| 36 Cambs. CB10 1SD, United Kingdom | |
| 37 | |
| 38 =head1 APPENDIX | |
| 39 | |
| 40 The rest of the documentation details each of the object | |
| 41 methods. Internal methods are usually preceded with a _ | |
| 42 | |
| 43 =cut | |
| 44 | |
| 45 # Let the code begin... | |
| 46 | |
| 47 package Bio::LiveSeq::Translation; | |
| 48 $VERSION=1.8; | |
| 49 | |
| 50 # Version history: | |
| 51 # Thu Mar 23 14:41:52 GMT 2000 v.1.0 begun | |
| 52 # Sat Mar 25 04:08:59 GMT 2000 v 1.2 valid(), label(), position() | |
| 53 # Tue Mar 28 03:37:17 BST 2000 v 1.3 added inheritance from Transcript, subseq relies on it! | |
| 54 # Fri Mar 31 16:53:53 BST 2000 v 1.4 new seq() function that checks for stop codons: it now returns only up to the stop but doesn't continue if stop not found | |
| 55 # Fri Mar 31 18:45:07 BST 2000 v 1.41 now it asks for Transcript->downstream_seq | |
| 56 # Fri Mar 31 19:20:04 BST 2000 v 1.49 seq() now works correctly | |
| 57 # Thu Apr 13 00:10:29 BST 2000 v 1.5 start and end now take the information from Transcript | |
| 58 # Thu Apr 27 16:18:55 BST 2000 v 1.6 translation_table info added | |
| 59 # Thu May 11 17:30:41 BST 2000 v 1.66 position method updated so to return a position also for labels not in frame (not at 1st triplet position) | |
| 60 # Mon May 22 14:59:14 BST 2000 v 1.7 labelsubseq added | |
| 61 # Mon May 22 15:22:12 BST 2000 v 1.71 labelsubseq tweaked for cases where startlabel==endlabel (no useless follow() query!) | |
| 62 # Mon May 22 15:28:49 BST 2000 v 1.74 modified seq() so that the "*" is printed | |
| 63 # Wed Jun 7 04:02:18 BST 2000 v 1.75 added offset() | |
| 64 # Thu Jun 29 15:10:22 BST 2000 v 1.76 bug corrected for elongation mutations, if stop codon is not found downstream | |
| 65 # Wed Mar 28 16:37:37 BST 2001 v 1.8 carp -> warn,throw (coded methods in SeqI) | |
| 66 | |
| 67 use strict; | |
| 68 #use Carp qw(croak carp cluck); | |
| 69 use vars qw($VERSION @ISA); | |
| 70 use Bio::LiveSeq::SeqI 3.2; # uses SeqI, inherits from it | |
| 71 use Bio::PrimarySeq; | |
| 72 @ISA=qw(Bio::LiveSeq::Transcript ); | |
| 73 | |
| 74 | |
| 75 =head2 new | |
| 76 | |
| 77 Title : new | |
| 78 Usage : $protein = Bio::LiveSeq::Translation->new(-transcript => $transcr); | |
| 79 | |
| 80 Function: generates a new Bio::LiveSeq::Translation | |
| 81 Returns : reference to a new object of class Translation | |
| 82 Errorcode -1 | |
| 83 Args : reference to an object of class Transcript | |
| 84 | |
| 85 =cut | |
| 86 | |
| 87 sub new { | |
| 88 my ($thing, %args) = @_; | |
| 89 my $class = ref($thing) || $thing; | |
| 90 my ($obj,%translation); | |
| 91 | |
| 92 my $transcript=$args{-transcript}; | |
| 93 | |
| 94 $obj = \%translation; | |
| 95 $obj = bless $obj, $class; | |
| 96 | |
| 97 unless ($transcript) { | |
| 98 $obj->throw("$class not initialised because no -transcript given"); | |
| 99 } | |
| 100 unless (ref($transcript) eq "Bio::LiveSeq::Transcript") { | |
| 101 $obj->throw("$class not initialised because no object of class Transcript given"); | |
| 102 } | |
| 103 | |
| 104 #my $startbase = $transcript->start; | |
| 105 #my $endbase = $transcript->end; | |
| 106 my $strand = $transcript->strand; | |
| 107 my $seq = $transcript->{'seq'}; | |
| 108 | |
| 109 $obj->{'strand'}=$strand; | |
| 110 $obj->{'seq'}=$seq; | |
| 111 $obj->{'transcript'}=$transcript; | |
| 112 $obj->{'alphabet'}="protein"; | |
| 113 | |
| 114 $transcript->{'translation'}=$obj;# set the Translation ref into its Transcript | |
| 115 return $obj; | |
| 116 } | |
| 117 | |
| 118 =head2 get_Transcript | |
| 119 | |
| 120 Title : valid | |
| 121 Usage : $transcript = $obj->get_Transcript() | |
| 122 Function: retrieves the reference to the object of class Transcript (if any) | |
| 123 attached to a LiveSeq object | |
| 124 Returns : object reference | |
| 125 Args : none | |
| 126 | |
| 127 =cut | |
| 128 | |
| 129 sub get_Transcript { | |
| 130 my $self=shift; | |
| 131 return ($self->{'transcript'}); | |
| 132 } | |
| 133 | |
| 134 # These get redefined here, overriding the SeqI ones | |
| 135 | |
| 136 sub change { | |
| 137 my ($self)=@_; | |
| 138 $self->warn("Cannot change a Translation object!\nChanges have to be issued at the nucleotide level!"); | |
| 139 return (-1); | |
| 140 } | |
| 141 sub positionchange { | |
| 142 my ($self)=@_; | |
| 143 $self->warn("Cannot change a Translation object!\nChanges have to be issued at the nucleotide level!"); | |
| 144 return (-1); | |
| 145 } | |
| 146 sub labelchange { | |
| 147 my ($self)=@_; | |
| 148 $self->warn("Cannot change a Translation object!\nChanges have to be issued at the nucleotide level!"); | |
| 149 return (-1); | |
| 150 } | |
| 151 | |
| 152 # this just returns the translation of the transcript, without checking for | |
| 153 # stop codons | |
| 154 sub transl_seq { | |
| 155 my $self=shift; | |
| 156 my $transcript=$self->get_Transcript; | |
| 157 my $translation=$transcript->translate(undef, undef, undef, | |
| 158 $self->translation_table)->seq; | |
| 159 return $translation; | |
| 160 } | |
| 161 | |
| 162 # version 1.74 -> now the "*" is printed | |
| 163 sub seq { | |
| 164 my $self=shift; | |
| 165 my $proteinseq; | |
| 166 my $transcript=$self->get_Transcript; | |
| 167 my $translation=$transcript->translate(undef, undef, undef, | |
| 168 $self->translation_table)->seq; | |
| 169 my $stop_pos=index($translation,"*"); | |
| 170 if ($stop_pos == -1) { # no stop present, continue downstream | |
| 171 my $downstreamseq=$transcript->downstream_seq(); | |
| 172 #carp "the downstream is: $downstreamseq"; # debug | |
| 173 my $cdnaseq=$transcript->seq(); | |
| 174 my $extendedseq = new Bio::PrimarySeq(-seq => "$cdnaseq$downstreamseq", | |
| 175 -alphabet => 'dna' | |
| 176 ); | |
| 177 | |
| 178 $translation=$extendedseq->translate(undef, undef, undef, | |
| 179 $self->translation_table)->seq; | |
| 180 #carp "the new translation is: $translation"; # debug | |
| 181 $stop_pos=index($translation,"*"); | |
| 182 if ($stop_pos == -1) { # still no stop present, return warning | |
| 183 $self->warn("Warning: no stop codon found in the retrieved sequence downstream of Transcript ",1); | |
| 184 undef $stop_pos; | |
| 185 $proteinseq=$translation; | |
| 186 } else { | |
| 187 $proteinseq=substr($translation,0,$stop_pos+1); | |
| 188 #carp "the new stopped translation is: $proteinseq, because the stop is at position $stop_pos"; # debug | |
| 189 } | |
| 190 } else { | |
| 191 $proteinseq=substr($translation,0,$stop_pos+1); | |
| 192 } | |
| 193 return $proteinseq; | |
| 194 } | |
| 195 | |
| 196 sub length { | |
| 197 my $self=shift; | |
| 198 my $seq=$self->seq; | |
| 199 my $length=length($seq); | |
| 200 return $length; | |
| 201 } | |
| 202 | |
| 203 sub all_labels { | |
| 204 my $self=shift; | |
| 205 return $self->get_Transcript->all_labels; | |
| 206 } | |
| 207 | |
| 208 # counts in triplet. Only a label matching the beginning of a triplet coding | |
| 209 # for an aminoacid is considered valid when setting coordinate_start | |
| 210 # (i.e. only in frame!) | |
| 211 sub valid { | |
| 212 my ($self,$label)=@_; | |
| 213 my $i; | |
| 214 my @labels=$self->get_Transcript->all_labels; | |
| 215 my $length=$#labels; | |
| 216 while ($i <= $length) { | |
| 217 if ($label == $labels[$i]) { | |
| 218 return (1); # found | |
| 219 } | |
| 220 $i=$i+3; | |
| 221 } | |
| 222 return (0); # not found | |
| 223 } | |
| 224 | |
| 225 # returns the label to the first nucleotide of the triplet coding for $position aminoacid | |
| 226 sub label { | |
| 227 my ($self,$position)=@_; | |
| 228 my $firstlabel=$self->coordinate_start; # this is in_frame checked | |
| 229 if ($position > 0) { | |
| 230 $position=$position*3-2; | |
| 231 } else { # if position = 0 this will be caught by Transcript, error thrown | |
| 232 $position=$position*3; | |
| 233 } | |
| 234 return $self->get_Transcript->label($position,$firstlabel); | |
| 235 # check for coord_start different | |
| 236 } | |
| 237 | |
| 238 # returns position (aminoacids numbering) of a particular label | |
| 239 # used to return 0 for not in frame labels | |
| 240 # now returns the position anyway (after version 1.66) | |
| 241 sub position { | |
| 242 my ($self,$label)=@_; | |
| 243 my $firstlabel=$self->coordinate_start; # this is in_frame checked | |
| 244 my $position=$self->get_Transcript->position($label,$firstlabel); | |
| 245 use integer; | |
| 246 my $modulus=$position % 3; | |
| 247 if ($position == 0) { | |
| 248 return (0); | |
| 249 } elsif ($position > 0) { | |
| 250 if ($modulus != 1) { | |
| 251 $self->warn("Attention! Label $label is not in frame ". | |
| 252 "(1st position of triplet) with protein",1) if $self->verbose > 0; # ignorable | |
| 253 if ($modulus == 2) { | |
| 254 return ($position / 3 + 1); | |
| 255 } else { # i.e. modulus == 0 | |
| 256 return ($position / 3); | |
| 257 } | |
| 258 } | |
| 259 return ($position / 3 + 1); | |
| 260 } else { # pos < 0 | |
| 261 if ($modulus != 0) { | |
| 262 $self->warn("Attention! Label $label is not in frame ". | |
| 263 "(1st position of triplet) with protein",1) if $self->verbose > 0; # ignorable | |
| 264 return ($position / 3 - 1); # ok for both other positions | |
| 265 } | |
| 266 return ($position / 3); | |
| 267 } | |
| 268 $self->throw( "WEIRD: execution shouldn't have reached here"); | |
| 269 return (0); # this should never happen, but just in case | |
| 270 } | |
| 271 | |
| 272 # note: it inherits subseq and labelsubseq from Transcript! | |
| 273 | |
| 274 sub start { | |
| 275 my $self=shift; | |
| 276 return ($self->{'transcript'}->start); | |
| 277 } | |
| 278 | |
| 279 sub end { | |
| 280 my $self=shift; | |
| 281 return ($self->{'transcript'}->end); | |
| 282 } | |
| 283 | |
| 284 =head2 aa_ranges | |
| 285 | |
| 286 Title : aa_ranges | |
| 287 Usage : @proteinfeatures = $translation->aa_ranges() | |
| 288 Function: to retrieve all the LiveSeq AARange objects attached to a | |
| 289 Translation, usually created out of a SwissProt database entry | |
| 290 crossreferenced from an EMBL CDS feature. | |
| 291 Returns : an array | |
| 292 Args : none | |
| 293 | |
| 294 =cut | |
| 295 | |
| 296 # returns an array of obj_ref of AARange objects attached to the Translation | |
| 297 sub aa_ranges { | |
| 298 my $self=shift; | |
| 299 return ($self->{'aa_ranges'}); | |
| 300 } | |
| 301 | |
| 302 sub translation_table { | |
| 303 my $self=shift; | |
| 304 $self->get_Transcript->translation_table(@_); | |
| 305 } | |
| 306 | |
| 307 # returns all aminoacids "affected" i.e. all aminoacids coded by any codon | |
| 308 # "touched" by the range selected between the labels, even if only partially. | |
| 309 | |
| 310 # it's not optimized for performance but it's useful | |
| 311 | |
| 312 sub labelsubseq { | |
| 313 my ($self,$start,$length,$end)=@_; | |
| 314 my ($pos1,$pos2); | |
| 315 my $transcript=$self->get_Transcript; | |
| 316 if ($start) { | |
| 317 unless ($transcript->valid($start)) { | |
| 318 $self->warn("Start label not valid"); return (-1); | |
| 319 } | |
| 320 $pos1=$self->position($start); | |
| 321 } | |
| 322 if ($end) { | |
| 323 if ($end == $start) { | |
| 324 $length=1; | |
| 325 } else { | |
| 326 unless ($transcript->valid($end)) { | |
| 327 $self->warn("End label not valid"); return (-1); | |
| 328 } | |
| 329 unless ($transcript->follows($start,$end) == 1) { | |
| 330 $self->warn("End label does not follow Start label!"); return (-1); | |
| 331 } | |
| 332 $pos2=$self->position($end); | |
| 333 $length=$pos2-$pos1+1; | |
| 334 } | |
| 335 } | |
| 336 my $sequence=$self->seq; | |
| 337 return (substr($sequence,$pos1-1,$length)); | |
| 338 } | |
| 339 | |
| 340 # return the offset in aminoacids from LiveSeq protein sequence and SwissProt | |
| 341 # sequence (usually as a result of an INIT_MET or a gap) | |
| 342 sub offset { | |
| 343 my $self=shift; | |
| 344 return ($self->{'offset'}); | |
| 345 } | |
| 346 | |
| 347 1; |
