Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/LiveSeq/Translation.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/LiveSeq/Translation.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,347 @@ +# $Id: Translation.pm,v 1.12 2002/09/25 08:57:52 heikki Exp $ +# +# bioperl module for Bio::LiveSeq::Translation +# +# Cared for by Joseph Insana <insana@ebi.ac.uk> <jinsana@gmx.net> +# +# Copyright Joseph Insana +# +# You may distribute this module under the same terms as perl itself +# +# POD documentation - main docs before the code + +=head1 NAME + +Bio::LiveSeq::Translation - Translation class for LiveSeq + +=head1 SYNOPSIS + + #documentation needed + +=head1 DESCRIPTION + +This stores informations about aminoacids translations of transcripts. +The implementation is that a Translation object is the translation of +a Transcript object, with different possibilities of manipulation, +different coordinate system and eventually its own ranges (protein domains). + +=head1 AUTHOR - Joseph A.L. Insana + +Email: Insana@ebi.ac.uk, jinsana@gmx.net + +Address: + + EMBL Outstation, European Bioinformatics Institute + Wellcome Trust Genome Campus, Hinxton + Cambs. CB10 1SD, United Kingdom + +=head1 APPENDIX + +The rest of the documentation details each of the object +methods. Internal methods are usually preceded with a _ + +=cut + +# Let the code begin... + +package Bio::LiveSeq::Translation; +$VERSION=1.8; + +# Version history: +# Thu Mar 23 14:41:52 GMT 2000 v.1.0 begun +# Sat Mar 25 04:08:59 GMT 2000 v 1.2 valid(), label(), position() +# Tue Mar 28 03:37:17 BST 2000 v 1.3 added inheritance from Transcript, subseq relies on it! +# Fri Mar 31 16:53:53 BST 2000 v 1.4 new seq() function that checks for stop codons: it now returns only up to the stop but doesn't continue if stop not found +# Fri Mar 31 18:45:07 BST 2000 v 1.41 now it asks for Transcript->downstream_seq +# Fri Mar 31 19:20:04 BST 2000 v 1.49 seq() now works correctly +# Thu Apr 13 00:10:29 BST 2000 v 1.5 start and end now take the information from Transcript +# Thu Apr 27 16:18:55 BST 2000 v 1.6 translation_table info added +# Thu May 11 17:30:41 BST 2000 v 1.66 position method updated so to return a position also for labels not in frame (not at 1st triplet position) +# Mon May 22 14:59:14 BST 2000 v 1.7 labelsubseq added +# Mon May 22 15:22:12 BST 2000 v 1.71 labelsubseq tweaked for cases where startlabel==endlabel (no useless follow() query!) +# Mon May 22 15:28:49 BST 2000 v 1.74 modified seq() so that the "*" is printed +# Wed Jun 7 04:02:18 BST 2000 v 1.75 added offset() +# Thu Jun 29 15:10:22 BST 2000 v 1.76 bug corrected for elongation mutations, if stop codon is not found downstream +# Wed Mar 28 16:37:37 BST 2001 v 1.8 carp -> warn,throw (coded methods in SeqI) + +use strict; +#use Carp qw(croak carp cluck); +use vars qw($VERSION @ISA); +use Bio::LiveSeq::SeqI 3.2; # uses SeqI, inherits from it +use Bio::PrimarySeq; +@ISA=qw(Bio::LiveSeq::Transcript ); + + +=head2 new + + Title : new + Usage : $protein = Bio::LiveSeq::Translation->new(-transcript => $transcr); + + Function: generates a new Bio::LiveSeq::Translation + Returns : reference to a new object of class Translation + Errorcode -1 + Args : reference to an object of class Transcript + +=cut + +sub new { + my ($thing, %args) = @_; + my $class = ref($thing) || $thing; + my ($obj,%translation); + + my $transcript=$args{-transcript}; + + $obj = \%translation; + $obj = bless $obj, $class; + + unless ($transcript) { + $obj->throw("$class not initialised because no -transcript given"); + } + unless (ref($transcript) eq "Bio::LiveSeq::Transcript") { + $obj->throw("$class not initialised because no object of class Transcript given"); + } + + #my $startbase = $transcript->start; + #my $endbase = $transcript->end; + my $strand = $transcript->strand; + my $seq = $transcript->{'seq'}; + + $obj->{'strand'}=$strand; + $obj->{'seq'}=$seq; + $obj->{'transcript'}=$transcript; + $obj->{'alphabet'}="protein"; + + $transcript->{'translation'}=$obj;# set the Translation ref into its Transcript + return $obj; +} + +=head2 get_Transcript + + Title : valid + Usage : $transcript = $obj->get_Transcript() + Function: retrieves the reference to the object of class Transcript (if any) + attached to a LiveSeq object + Returns : object reference + Args : none + +=cut + +sub get_Transcript { + my $self=shift; + return ($self->{'transcript'}); +} + +# These get redefined here, overriding the SeqI ones + +sub change { + my ($self)=@_; + $self->warn("Cannot change a Translation object!\nChanges have to be issued at the nucleotide level!"); + return (-1); +} +sub positionchange { + my ($self)=@_; + $self->warn("Cannot change a Translation object!\nChanges have to be issued at the nucleotide level!"); + return (-1); +} +sub labelchange { + my ($self)=@_; + $self->warn("Cannot change a Translation object!\nChanges have to be issued at the nucleotide level!"); + return (-1); +} + +# this just returns the translation of the transcript, without checking for +# stop codons +sub transl_seq { + my $self=shift; + my $transcript=$self->get_Transcript; + my $translation=$transcript->translate(undef, undef, undef, + $self->translation_table)->seq; + return $translation; +} + +# version 1.74 -> now the "*" is printed +sub seq { + my $self=shift; + my $proteinseq; + my $transcript=$self->get_Transcript; + my $translation=$transcript->translate(undef, undef, undef, + $self->translation_table)->seq; + my $stop_pos=index($translation,"*"); + if ($stop_pos == -1) { # no stop present, continue downstream + my $downstreamseq=$transcript->downstream_seq(); + #carp "the downstream is: $downstreamseq"; # debug + my $cdnaseq=$transcript->seq(); + my $extendedseq = new Bio::PrimarySeq(-seq => "$cdnaseq$downstreamseq", + -alphabet => 'dna' + ); + + $translation=$extendedseq->translate(undef, undef, undef, + $self->translation_table)->seq; + #carp "the new translation is: $translation"; # debug + $stop_pos=index($translation,"*"); + if ($stop_pos == -1) { # still no stop present, return warning + $self->warn("Warning: no stop codon found in the retrieved sequence downstream of Transcript ",1); + undef $stop_pos; + $proteinseq=$translation; + } else { + $proteinseq=substr($translation,0,$stop_pos+1); + #carp "the new stopped translation is: $proteinseq, because the stop is at position $stop_pos"; # debug + } + } else { + $proteinseq=substr($translation,0,$stop_pos+1); + } + return $proteinseq; +} + +sub length { + my $self=shift; + my $seq=$self->seq; + my $length=length($seq); + return $length; +} + +sub all_labels { + my $self=shift; + return $self->get_Transcript->all_labels; +} + +# counts in triplet. Only a label matching the beginning of a triplet coding +# for an aminoacid is considered valid when setting coordinate_start +# (i.e. only in frame!) +sub valid { + my ($self,$label)=@_; + my $i; + my @labels=$self->get_Transcript->all_labels; + my $length=$#labels; + while ($i <= $length) { + if ($label == $labels[$i]) { + return (1); # found + } + $i=$i+3; + } + return (0); # not found +} + +# returns the label to the first nucleotide of the triplet coding for $position aminoacid +sub label { + my ($self,$position)=@_; + my $firstlabel=$self->coordinate_start; # this is in_frame checked + if ($position > 0) { + $position=$position*3-2; + } else { # if position = 0 this will be caught by Transcript, error thrown + $position=$position*3; + } + return $self->get_Transcript->label($position,$firstlabel); + # check for coord_start different +} + +# returns position (aminoacids numbering) of a particular label +# used to return 0 for not in frame labels +# now returns the position anyway (after version 1.66) +sub position { + my ($self,$label)=@_; + my $firstlabel=$self->coordinate_start; # this is in_frame checked + my $position=$self->get_Transcript->position($label,$firstlabel); + use integer; + my $modulus=$position % 3; + if ($position == 0) { + return (0); + } elsif ($position > 0) { + if ($modulus != 1) { + $self->warn("Attention! Label $label is not in frame ". + "(1st position of triplet) with protein",1) if $self->verbose > 0; # ignorable + if ($modulus == 2) { + return ($position / 3 + 1); + } else { # i.e. modulus == 0 + return ($position / 3); + } + } + return ($position / 3 + 1); + } else { # pos < 0 + if ($modulus != 0) { + $self->warn("Attention! Label $label is not in frame ". + "(1st position of triplet) with protein",1) if $self->verbose > 0; # ignorable + return ($position / 3 - 1); # ok for both other positions + } + return ($position / 3); + } + $self->throw( "WEIRD: execution shouldn't have reached here"); + return (0); # this should never happen, but just in case +} + +# note: it inherits subseq and labelsubseq from Transcript! + +sub start { + my $self=shift; + return ($self->{'transcript'}->start); +} + +sub end { + my $self=shift; + return ($self->{'transcript'}->end); +} + +=head2 aa_ranges + + Title : aa_ranges + Usage : @proteinfeatures = $translation->aa_ranges() + Function: to retrieve all the LiveSeq AARange objects attached to a + Translation, usually created out of a SwissProt database entry + crossreferenced from an EMBL CDS feature. + Returns : an array + Args : none + +=cut + +# returns an array of obj_ref of AARange objects attached to the Translation +sub aa_ranges { + my $self=shift; + return ($self->{'aa_ranges'}); +} + +sub translation_table { + my $self=shift; + $self->get_Transcript->translation_table(@_); +} + +# returns all aminoacids "affected" i.e. all aminoacids coded by any codon +# "touched" by the range selected between the labels, even if only partially. + +# it's not optimized for performance but it's useful + +sub labelsubseq { + my ($self,$start,$length,$end)=@_; + my ($pos1,$pos2); + my $transcript=$self->get_Transcript; + if ($start) { + unless ($transcript->valid($start)) { + $self->warn("Start label not valid"); return (-1); + } + $pos1=$self->position($start); + } + if ($end) { + if ($end == $start) { + $length=1; + } else { + unless ($transcript->valid($end)) { + $self->warn("End label not valid"); return (-1); + } + unless ($transcript->follows($start,$end) == 1) { + $self->warn("End label does not follow Start label!"); return (-1); + } + $pos2=$self->position($end); + $length=$pos2-$pos1+1; + } + } + my $sequence=$self->seq; + return (substr($sequence,$pos1-1,$length)); +} + +# return the offset in aminoacids from LiveSeq protein sequence and SwissProt +# sequence (usually as a result of an INIT_MET or a gap) +sub offset { + my $self=shift; + return ($self->{'offset'}); +} + +1;