Mercurial > repos > mahtabm > ensemb_rep_gvl
diff variant_effect_predictor/Bio/EnsEMBL/PredictionTranscript.pm @ 0:2bc9b66ada89 draft default tip
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 06:29:17 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/EnsEMBL/PredictionTranscript.pm Thu Apr 11 06:29:17 2013 -0400 @@ -0,0 +1,571 @@ +=head1 LICENSE + + Copyright (c) 1999-2012 The European Bioinformatics Institute and + Genome Research Limited. All rights reserved. + + This software is distributed under a modified Apache license. + For license details, please see + + http://www.ensembl.org/info/about/code_licence.html + +=head1 CONTACT + + Please email comments or questions to the public Ensembl + developers list at <dev@ensembl.org>. + + Questions may also be sent to the Ensembl help desk at + <helpdesk@ensembl.org>. + +=cut + +=head1 NAME + +PredictionTranscript + +=head1 SYNOPSIS + +=head1 DESCRIPTION + +Container for single transcript ab initio gene prediction such as +GenScan or SNAP. Is directly storable/retrievable in Ensembl using +PredictionTranscriptAdaptor. + +Creation: + + my $tran = new Bio::EnsEMBL::PredictionTranscript(); + $tran->add_Exon($pred_exon); + + my $tran = + new Bio::EnsEMBL::PredictionTranscript( -EXONS => @pred_exons ); + +Manipulation: + + # Returns an array of PredictionExon objects + my @pred_exons = @{ $tran->get_all_Exons }; + + # Returns the peptide translation as string + my $pep = $tran->translate()->seq(); + + # Get the exon cdna sequence. + my $cdna = $trans->spliced_seq(); + +=head1 METHODS + +=cut + +package Bio::EnsEMBL::PredictionTranscript; + +use vars qw(@ISA); +use strict; + +use Bio::Seq; +use Bio::EnsEMBL::Feature; +use Bio::EnsEMBL::Transcript; +use Bio::EnsEMBL::Translation; + +use Bio::EnsEMBL::Utils::Exception qw( deprecate throw warning ); +use Bio::EnsEMBL::Utils::Argument qw( rearrange ); + +@ISA = qw(Bio::EnsEMBL::Transcript); + + +=head2 new + + Arg [-DISPLAY_LABEL] + string - a displayable identifier for this prediction + Arg [...] : See Bio::EnsEMBL::Transcript superclass constructor + Example : $pt = Bio::EnsEMBL::PredictionTranscript->new + ( '-start' => $seq_region_start, + '-end' => $seq_region_end, + '-strand' => $seq_region_strand, + '-adaptor' => $self, + '-slice' => $slice, + '-analysis' => $analysis, + '-dbID' => $prediction_transcript_id, + '-display_label' => $display_label); + Description: Constructor. Creates a new Bio::EnsEMBL::PredictionTranscript + object + Returntype : Bio::EnsEMBL::PredictionTranscript + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub new { + my $class = shift; + + my $self = $class->SUPER::new(@_); + + my ($display_label) = rearrange(['DISPLAY_LABEL'], @_); + + $self->{'display_label'} = $display_label; + + return $self; +} + + +=head2 coding_region_start + + Arg [1] : none + Example : $coding_region_start = $pt->coding_region_start + Description: Retrieves the start of the coding region of this transcript in + slice coordinates. For prediction transcripts this + is always the start of the transcript (i.e. there is no UTR). + By convention, the coding_region_start is always lower than + the value returned by the coding_end method. + The value returned by this function is NOT the biological + coding start since on the reverse strand the biological coding + start would be the higher genomic value. + Returntype : int + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub coding_region_start { + my $self = shift; + return $self->start(); +} + + +=head2 coding_region_end + + Arg [1] : none + Example : $coding_region_end = $transcript->coding_region_end + Description: Retrieves the start of the coding region of this prediction + transcript. For prediction transcripts this is always the same + as the end since no UTRs are stored. + By convention, the coding_region_end is always higher than the + value returned by the coding_region_start method. + The value returned by this function is NOT the biological + coding start since on the reverse strand the biological coding + end would be the lower genomic value. + Returntype : int + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub coding_region_end { + my $self = shift; + return $self->end(); +} + + + +=head2 get_all_translateable_Exons + + Arg [1] : none + Example : $exons = $self->get_all_translateable_Exons + Description: Retrieves the translateable portion of all exons in this + transcript. For prediction transcripts this means all exons + since no UTRs are stored for them. + Returntype : listref of Bio::EnsEMBL::PredictionExons + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub get_all_translateable_Exons { + my $self = shift; + return $self->get_all_Exons(); +} + + +=head2 display_label + + Arg [1] : string $newval (optional) + The new value to set the display_label attribute to + Example : $display_label = $pt->display_label() + Description: Getter/Setter for a displayable identifier for this + prediction transcript. + Returntype : string + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub display_label{ + my $self = shift; + return $self->{'display_label'} = shift if(@_); + return $self->{'display_label'}; +} + + + +=head2 stable_id + + Arg [1] : none + Example : print $pt->stable_id(); + Description: Gets a 'stable' identifier for this prediction transcript. Note + that prediction transcripts do not have true *stable* + identifiers (i.e. identifiers maintained between releases). + This method chains to the display_label method and is intended + to provide polymorphism with the Transcript class. + Returntype : string + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub stable_id { return display_label(@_); } + +sub get_all_DBEntries { return []; } + +sub get_all_DBLinks { return []; } + +sub add_DBEntry {} + +sub external_db { return undef; } + +sub external_status { return undef; } + +sub external_name { return undef; } + +sub is_known { return 0;} + + +=head2 translation + + Arg [1] : none + Example : $translation = $pt->translation(); + Description: Retrieves a Bio::EnsEMBL::Translation object for this prediction + transcript. Note that this translation is generated on the fly + and is not stored in the database. The translation always + spans the entire transcript (no UTRs; all CDS) and does not + have an associated dbID, stable_id or adaptor. + Returntype : int + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub translation { + my $self = shift; + + #calculate translation on the fly + my $strand = $self->strand(); + + my $start_exon; + my $end_exon; + + my @exons = @{$self->get_all_Exons()}; + + return undef if(!@exons); + + $start_exon = $exons[0]; + $end_exon = $exons[-1]; + + my $pta; + + if($self->adaptor()) { + $pta = $self->adaptor()->db()->get_TranslationAdaptor(); + } else { + #warning("PredictionTranscript has no adaptor, may not be able to obtain " . + # "translation"); + } + + my $Xseq = $self->spliced_seq(); + my $start_phase = $start_exon->phase; + if( $start_phase > 0 ) { + $Xseq = "N"x$start_phase . $Xseq; + } + + my $tmpSeq = new Bio::Seq( -id => $self->display_id, + -seq => $Xseq, + -moltype => 'dna', + -alphabet => 'dna' ); + + return Bio::EnsEMBL::Translation->new + (-ADAPTOR => $pta, + -START_EXON => $start_exon, + -END_EXON => $end_exon, + -SEQ_START => 1, + -SEQ_END => $end_exon->length(), + -SEQ => $tmpSeq->translate()->seq()); +} + + + +=head2 translate + + Args : none + Function : Give a peptide translation of all exons currently in + the PT. Gives empty string when none is in. + Returntype: a Bio::Seq as in transcript->translate() + Exceptions: none + Caller : general + Status : Stable + +=cut + + +sub translate { + my ($self) = @_; + + my $dna = $self->translateable_seq(); + + my $codon_table_id; + if ( defined( $self->slice() ) ) { + my $attrib; + + ($attrib) = @{ $self->slice()->get_all_Attributes('codon_table') }; + if ( defined($attrib) ) { + $codon_table_id = $attrib->value(); + } + } + $codon_table_id ||= 1; #default will be vertebrates + + if( CORE::length( $dna ) % 3 == 0 ) { + # $dna =~ s/TAG$|TGA$|TAA$//i; + my $codon_table = Bio::Tools::CodonTable->new( -id => $codon_table_id ); + + if ( $codon_table->is_ter_codon( substr( $dna, -3, 3 ) ) ) { + substr( $dna, -3, 3, '' ); + } + } + # the above line will remove the final stop codon from the mrna + # sequence produced if it is present, this is so any peptide produced + # won't have a terminal stop codon + # if you want to have a terminal stop codon either comment this line out + # or call translatable seq directly and produce a translation from it + + my $bioseq = new Bio::Seq( -id => $self->display_id, + -seq => $dna, + -moltype => 'dna', + -alphabet => 'dna' ); + + my $translation = $bioseq->translate(undef,undef,undef,$codon_table_id); + + return $translation; +} + + +=head2 cdna_coding_start + + Arg [1] : none + Example : $relative_coding_start = $transcript->cdna_coding_start(); + Description: Retrieves the position of the coding start of this transcript + in cdna coordinates (relative to the start of the 5prime end of + the transcript, excluding introns, including utrs). This is + always 1 for prediction transcripts because they have no UTRs. + Returntype : int + Exceptions : none + Caller : five_prime_utr, get_all_snps, general + Status : Stable + +=cut + +sub cdna_coding_start { return 1 } + + + +=head2 cdna_coding_end + + Arg [1] : none + Example : $relative_coding_start = $transcript->cdna_coding_end(); + Description: Retrieves the position of the coding end of this transcript + in cdna coordinates (relative to the start of the 5prime end of + the transcript, excluding introns, including utrs). This is + always te length of the cdna for prediction transcripts because + they have no UTRs. + Returntype : int + Exceptions : none + Caller : five_prime_utr, get_all_snps, general + Status : Stable + +=cut + +sub cdna_coding_end { + my ($self) = @_; + return length( $self->spliced_seq() ); +} + + +=head2 transform + + Arg 1 : String $coordinate_system_name + Arg [2] : String $coordinate_system_version + Example : $ptrans = $ptrans->transform('chromosome', 'NCBI33'); + $ptrans = $ptrans->transform('clone'); + Description: Moves this PredictionTranscript to the given coordinate system. + If this Transcript has Exons attached, they move as well. + A new Transcript is returned or undefined if this PT is not + defined in the new coordinate system. + Returntype : Bio::EnsEMBL::PredictionTranscript + Exceptions : wrong parameters + Caller : general + Status : Stable + +=cut + +sub transform { + my $self = shift; + + # catch for old style transform calls + if( ref $_[0] && ($_[0]->isa( "Bio::EnsEMBL::Slice" ) or $_[0]->isa( "Bio::EnsEMBL::LRGSlice" ))) { + throw("transform needs coordinate systems details now," . + "please use transfer"); + } + + my $new_transcript = Bio::EnsEMBL::Feature::transform($self, @_ ); + return undef unless $new_transcript; + + #go through the _trans_exon_array so as not to prompt lazy-loading + if(exists($self->{'_trans_exon_array'})) { + my @new_exons; + foreach my $old_exon ( @{$self->{'_trans_exon_array'}} ) { + my $new_exon = $old_exon->transform(@_); + push(@new_exons, $new_exon); + } + $new_transcript->{'_trans_exon_array'} = \@new_exons; + } + + return $new_transcript; +} + + + +=head2 transfer + + Arg 1 : Bio::EnsEMBL::Slice $destination_slice + Example : $ptrans = $ptrans->transfer($slice); + Description: Moves this PredictionTranscript to the given slice. + If this Transcripts has Exons attached, they move as well. + If this transcript cannot be moved then undef is returned + instead. + Returntype : Bio::EnsEMBL::PredictionTranscript + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub transfer { + my $self = shift; + + my $new_transcript = $self->SUPER::transfer( @_ ); + return undef unless $new_transcript; + + if( exists $self->{'_trans_exon_array'} ) { + my @new_exons; + for my $old_exon ( @{$self->{'_trans_exon_array'}} ) { + my $new_exon = $old_exon->transfer( @_ ); + push( @new_exons, $new_exon ); + } + + $new_transcript->{'_trans_exon_array'} = \@new_exons; + } + + return $new_transcript; +} + +=head2 get_all_Exons + + Arg [1] : none + Example : my @exons = @{$transcript->get_all_Exons()}; + Description: Returns an listref of the exons in this transcipr in order. + i.e. the first exon in the listref is the 5prime most exon in + the transcript. + Returntype : a list reference to Bio::EnsEMBL::Exon objects + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub get_all_Exons { + my ($self) = @_; + if( ! defined $self->{'_trans_exon_array'} && defined $self->adaptor() ) { + $self->{'_trans_exon_array'} = $self->adaptor()->db()-> + get_PredictionExonAdaptor()->fetch_all_by_PredictionTranscript( $self ); + } + return $self->{'_trans_exon_array'}; +} + +=head2 display_id + + Arg [1] : none + Example : print $rf->display_id(); + Description: This method returns a string that is considered to be + the 'display' identifier. For prediction transcripts this is + (depending on availability and in this order) the stable Id, the + dbID or an empty string. + Returntype : string + Exceptions : none + Caller : web drawing code + Status : Stable + +=cut + +sub display_id { + my $self = shift; + return $self->stable_id || $self->dbID || ''; +} + +=head2 get_all_Attributes + + Arg [1] : none + Example : + Description: DOES NOTHING, Returns empty listref. Provided here to prevent + Transcript attributes being returned for PredictionTranscripts. + Returntype : EMPTY listref Bio::EnsEMBL::Attribute + Exceptions : none + Caller : general + Status : At risk + +=cut + +sub get_all_Attributes { + my $self = shift; + + return []; +} + + + +=head2 get_exon_count + + Description: DEPRECATED - use get_all_Exons instead + +=cut + +sub get_exon_count { + my $self = shift; + deprecate('Use scalar(@{$transcript->get_all_Exon()s}) instead'); + return scalar( @{$self->get_all_Exons} ); +} + + +=head2 set_exon_count + + Description: DEPRECATED - this method does nothing now + +=cut + +sub set_exon_count { + deprecate('This method no longer does anything.'); +} + + + +=head2 get_cdna + + Description : DEPRECATED - use spliced_seq() or translateable_seq instead + +=cut + +sub get_cdna { + my $self = shift; + deprecate('use spliced_seq instead'); + return $self->spliced_seq(); +} + +1;