Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/EnsEMBL/Exon.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/EnsEMBL/Exon.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,1661 @@ +=head1 LICENSE + + Copyright (c) 1999-2012 The European Bioinformatics Institute and + Genome Research Limited. All rights reserved. + + This software is distributed under a modified Apache license. + For license details, please see + + http://www.ensembl.org/info/about/code_licence.html + +=head1 CONTACT + + Please email comments or questions to the public Ensembl + developers list at <dev@ensembl.org>. + + Questions may also be sent to the Ensembl help desk at + <helpdesk@ensembl.org>. + +=cut + +=head1 NAME + +Bio::EnsEMBL::Exon - A class representing an Exon + +=head1 SYNOPSIS + + $ex = new Bio::EnsEMBL::Exon( + -START => 100, + -END => 200, + -STRAND => 1, + -SLICE => $slice, + -DBID => $dbID, + -ANALYSIS => $analysis, + -STABLE_ID => 'ENSE000000123', + -VERSION => 2 + ); + + # seq() returns a Bio::Seq + my $seq = $exon->seq->seq(); + + # Peptide only makes sense within transcript context + my $pep = $exon->peptide($transcript)->seq(); + + # Normal feature operations can be performed: + $exon = $exon->transform('clone'); + $exon->move( $new_start, $new_end, $new_strand ); + print $exon->slice->seq_region_name(); + +=head1 DESCRIPTION + +This is a class which represents an exon which is part of a transcript. +See Bio::EnsEMBL:Transcript + +=head1 METHODS + +=cut + +package Bio::EnsEMBL::Exon; + +use strict; + +use Bio::EnsEMBL::Feature; +use Bio::Seq; # exons have to have sequences... + +use Bio::EnsEMBL::Utils::Exception qw( warning throw deprecate ); +use Bio::EnsEMBL::Utils::Argument qw( rearrange ); +use Bio::EnsEMBL::Utils::Scalar qw( assert_ref ); +use Bio::EnsEMBL::DBSQL::SupportingFeatureAdaptor; + +use vars qw(@ISA); +@ISA = qw(Bio::EnsEMBL::Feature); + + +=head2 new + + Arg [-SLICE]: Bio::EnsEMBL::SLice - Represents the sequence that this + feature is on. The coordinates of the created feature are + relative to the start of the slice. + Arg [-START]: The start coordinate of this feature relative to the start + of the slice it is sitting on. Coordinates start at 1 and + are inclusive. + Arg [-END] : The end coordinate of this feature relative to the start of + the slice it is sitting on. Coordinates start at 1 and are + inclusive. + Arg [-STRAND]: The orientation of this feature. Valid values are 1,-1,0. + Arg [-SEQNAME] : (optional) A seqname to be used instead of the default name + of the of the slice. Useful for features that do not have an + attached slice such as protein features. + Arg [-dbID] : (optional) internal database id + Arg [-ADAPTOR]: (optional) Bio::EnsEMBL::DBSQL::BaseAdaptor + Arg [-PHASE] : the phase. + Arg [-END_PHASE]: the end phase + Arg [-STABLE_ID]: (optional) the stable id of the exon + Arg [-VERSION] : (optional) the version + Arg [-CREATED_DATE] : (optional) the created date + Arg [-MODIFIED_DATE]: (optional) the last midifeid date + + Example : none + Description: create an Exon object + Returntype : Bio::EnsEMBL::Exon + Exceptions : if phase is not valid (i.e. 0,1, 2 -1) + Caller : general + Status : Stable + +=cut + +sub new { + my $class = shift; + + $class = ref $class || $class; + + my $self = $class->SUPER::new( @_ ); + + my ( $phase, $end_phase, $stable_id, $version, $created_date, + $modified_date, $is_current, $is_constitutive ) + = rearrange( [ + "PHASE", "END_PHASE", + "STABLE_ID", "VERSION", + "CREATED_DATE", "MODIFIED_DATE", + "IS_CURRENT", "IS_CONSTITUTIVE" + ], + @_ + ); + + if ( defined($phase) ) { # make sure phase is valid. + $self->phase($phase); + } + + $self->{'end_phase'} = $end_phase; + $self->{'stable_id'} = $stable_id; + $self->{'version'} = $version; + $self->{'created_date'} = $created_date; + $self->{'modified_date'} = $modified_date; + + # Default is_current + if ( !defined($is_current) ) { $is_current = 1 } + $self->{'is_current'} = $is_current; + + # Default is_constitutive + if ( !defined($is_constitutive) ) { $is_constitutive = 0 } + $self->{'is_constitutive'} = $is_constitutive; + + return $self; +} + + +# =head2 new_fast + +# Arg [1] : Bio::EnsEMBL::Slice $slice +# Arg [2] : int $start +# Arg [3] : int $end +# Arg [4] : int $strand (1 or -1) +# Example : none +# Description: create an Exon object +# Returntype : Bio::EnsEMBL::Exon +# Exceptions : throws if end < start +# Caller : general +# Status : Stable + +# =cut + +# sub new_fast { +# my ($class, $slice, $start, $end, $strand) = @_; + +# my $self = bless {}, $class; + +# # Swap start and end if they're in the wrong order +# # We assume that the strand is correct and keep the input value. + +# if ($start > $end) { +# throw( "End smaller than start not allowed" ); +# } + +# $self->start ($start); +# $self->end ($end); +# $self->strand($strand); +# $self->slice($slice); + +# return $self; +# } + + +=head2 end_phase + + Arg [1] : (optional) int $end_phase + Example : $end_phase = $feat->end_phase; + Description: Gets/Sets the end phase of the exon. + end_phase = number of bases from the last incomplete codon of + this exon. + Usually, end_phase = (phase + exon_length)%3 + but end_phase could be -1 if the exon is half-coding and its 3 + prime end is UTR. + Returntype : int + Exceptions : warning if end_phase is called without an argument and the + value is not set. + Caller : general + Status : Stable + +=cut + +sub end_phase { + my $self = shift; + if (@_) { + $self->{'end_phase'} = shift; + } + else { + if ( !defined( $self->{'end_phase'} ) ) { + warning("No end phase set in Exon. You must set it explicitly."); + } + } + return $self->{'end_phase'}; +} + + +=head2 phase + + Arg [1] : (optional) int $phase + Example : my $phase = $exon->phase; + $exon->phase(2); + Description: Gets/Sets the phase of the exon. + Returntype : int + Exceptions : throws if phase is not (0, 1 2 or -1). + Caller : general + Status : Stable + + +Get or set the phase of the Exon, which tells the +translation machinery, which makes a peptide from +the DNA, where to start. + +The Ensembl phase convention can be thought of as +"the number of bases of the first codon which are +on the previous exon". It is therefore 0, 1 or 2 +(or -1 if the exon is non-coding). In ascii art, +with alternate codons represented by B<###> and +B<+++>: + + Previous Exon Intron This Exon + ...------------- -------------... + + 5' Phase 3' + ...#+++###+++### 0 +++###+++###+... + ...+++###+++###+ 1 ++###+++###++... + ...++###+++###++ 2 +###+++###+++... + +Here is another explanation from Ewan: + +Phase means the place where the intron lands +inside the codon - 0 between codons, 1 between +the 1st and second base, 2 between the second and +3rd base. Exons therefore have a start phase and +a end phase, but introns have just one phase. + +=cut + +sub phase { + my ($self,$value) = @_; + + if (defined($value)) { + # Value must be 0,1,2, or -1 for non-coding + if ($value =~ /^(-1|0|1|2)$/) { + #print STDERR "Setting phase to $value\n"; + $self->{'phase'} = $value; + } else { + throw("Bad value ($value) for exon phase. Should only be" . + " -1,0,1,2\n"); + } + } + return $self->{'phase'}; +} + + +=head2 frame + + Arg [1] : none + Example : $frame = $exon->frame + Description: Gets the frame of this exon + Returntype : int + Exceptions : thrown if an arg is passed + thrown if frame cannot be calculated due to a bad phase value + Caller : general + Status : Stable + +=cut + +sub frame { + my ($self,$value) = @_; + + if( defined $value ) { + throw("Cannot set frame. Deduced from seq_start and phase"); + } + + # frame is mod 3 of the translation point + + if( $self->phase == -1 ) { + return '.'; # gff convention for no frame info + } + if( $self->phase == 0 ) { + return $self->start%3; + } + + if( $self->phase == 1 ) { + return ($self->start+2)%3; + } + + if( $self->phase == 2 ) { + return ($self->start+1)%3; + } + + throw("bad phase in exon ".$self->phase); + +} + + +=head2 start + + Arg [1] : int $start (optional) + Example : $start = $exon->start(); + Description: Getter/Setter for the start of this exon. The superclass + implmentation is overridden to flush the internal sequence + cache if this value is altered + Returntype : int + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub start { + my $self = shift; + # if an arg was provided, flush the internal sequence cache + delete $self->{'_seq_cache'} if(@_); + return $self->SUPER::start(@_); +} + + +=head2 end + + Arg [1] : int $end (optional) + Example : $end = $exon->end(); + Description: Getter/Setter for the end of this exon. The superclass + implmentation is overridden to flush the internal sequence + cache if this value is altered + Returntype : int + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub end { + my $self = shift; + # if an arg was provided, flush the internal sequence cache + delete $self->{'_seq_cache'} if(@_); + return $self->SUPER::end(@_); +} + + +=head2 strand + + Arg [1] : int $strand (optional) + Example : $start = $exon->strand(); + Description: Getter/Setter for the strand of this exon. The superclass + implmentation is overridden to flush the internal sequence + cache if this value is altered + Returntype : int + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub strand { + my $self = shift; + # if an arg was provided, flush the internal sequence cache + delete $self->{'_seq_cache'} if(@_); + return $self->SUPER::strand(@_); +} + +=head2 cdna_start + + Arg [1] : Bio::EnsEMBL::Transcript $transcript + The transcript for which cDNA coordinates should be + relative to. + Example : $cdna_start = $exon->cdna_start($transcript); + Description : Returns the start position of the exon in cDNA + coordinates. + Since an exon may be part of one or more transcripts, + the relevant transcript must be given as argument to + this method. + Return type : Integer + Exceptions : Throws if the given argument is not a transcript. + Throws if the first part of the exon maps into a gap. + Throws if the exon can not be mapped at all. + Caller : General + Status : Stable + +=cut + +sub cdna_start { + my ($self, $transcript) = @_; + assert_ref($transcript, 'Bio::EnsEMBL::Transcript', 'transcript'); + + my $id = $transcript->dbID(); + + if(defined $id && exists $self->{cdna_start}->{$id}) { + return $self->{cdna_start}->{$id}; + } + + my $cdna_start; + my @coords = $transcript->genomic2cdna($self->start(), $self->end(), $self->strand()); + if(@coords && !$coords[0]->isa('Bio::EnsEMBL::Mapper::Gap')) { + $cdna_start = $coords[0]->start(); + } + elsif(@coords) { + throw "First part of exon maps into gap"; + } + else { + throw "Can not map exon"; + } + + if(defined $id) { + $self->{cdna_start}->{$id} = $cdna_start; + } + + return $cdna_start; +} ## end sub cdna_start + +=head2 cdna_end + + Arg [1] : Bio::EnsEMBL::Transcript $transcript + The transcript for which cDNA coordinates should be + relative to. + Example : $cdna_end = $exon->cdna_end($transcript); + Description : Returns the end position of the exon in cDNA + coordinates. + Since an exon may be part of one or more transcripts, + the relevant transcript must be given as argument to + this method. + Return type : Integer + Exceptions : Throws if the given argument is not a transcript. + Throws if the last part of the exon maps into a gap. + Throws if the exon can not be mapped at all. + Caller : General + Status : Stable + +=cut + +sub cdna_end { + my ($self, $transcript) = @_; + assert_ref($transcript, 'Bio::EnsEMBL::Transcript', 'transcript'); + + my $id = $transcript->dbID(); + + if(defined $id && exists $self->{cdna_end}->{$id}) { + return $self->{cdna_end}->{$id}; + } + + my $cdna_end; + my @coords = $transcript->genomic2cdna($self->start(), $self->end(), $self->strand()); + if(@coords && !$coords[-1]->isa('Bio::EnsEMBL::Mapper::Gap')) { + $cdna_end = $coords[-1]->end(); + } + elsif(@coords) { + throw "Last part of exon maps into gap"; + } + else { + throw "Can not map exon"; + } + + if(defined $id) { + $self->{cdna_end}->{$id} = $cdna_end; + } + + return $cdna_end; +} ## end sub cdna_end + +=head2 cdna_coding_start + + Arg [1] : Bio::EnsEMBL::Transcript $transcript + The transcript for which cDNA coordinates should be + relative to. + Example : $cdna_coding_start = $exon->cdna_coding_start($transcript); + Description : Returns the start position of the coding region of the + exon in cDNA coordinates. Returns undef if the whole + exon is non-coding. + Since an exon may be part of one or more transcripts, + the relevant transcript must be given as argument to + this method. + Return type : Integer or undef + Exceptions : Throws if the given argument is not a transcript. + Caller : General + Status : Stable + +=cut + +sub cdna_coding_start { + my ($self, $transcript) = @_; + assert_ref($transcript, 'Bio::EnsEMBL::Transcript', 'transcript'); + + my $id = $transcript->dbID(); + + if(defined $id && exists $self->{cdna_coding_start}->{$id}) { + return $self->{cdna_coding_start}->{$id}; + } + + my $cdna_coding_start; + my $transcript_coding_start = $transcript->cdna_coding_start(); + if(defined $transcript_coding_start) { + my $cdna_start = $self->cdna_start($transcript); + + if ( $transcript_coding_start < $cdna_start ) { + # Coding region starts upstream of this exon... + + if ( $transcript->cdna_coding_end() < $cdna_start ) { + # ... and also ends upstream of this exon. + $cdna_coding_start = undef; + } + else { + # ... and does not end upstream of this exon. + $cdna_coding_start = $cdna_start; + } + } else { + # Coding region starts either within or downstream of this + # exon. + + if ( $transcript_coding_start <= $self->cdna_end($transcript) ) { + # Coding region starts within this exon. + $cdna_coding_start = $transcript_coding_start; + } + else { + # Coding region starts downstream of this exon. + $cdna_coding_start = undef; + } + } + } + else { + $cdna_coding_start = undef; + } + + if(defined $id) { + $self->{cdna_coding_start}->{$id} = $cdna_coding_start; + $self->{cdna_coding_end}->{$id} = undef if ! defined $cdna_coding_start; + } + + return $cdna_coding_start; +} ## end sub cdna_coding_start + +=head2 cdna_coding_end + + Arg [1] : Bio::EnsEMBL::Transcript $transcript + The transcript for which cDNA coordinates should be + relative to. + Example : $cdna_coding_end = $exon->cdna_coding_end($transcript); + Description : Returns the end position of the coding region of the + exon in cDNA coordinates. Returns undef if the whole + exon is non-coding. + Since an exon may be part of one or more transcripts, + the relevant transcript must be given as argument to + this method. + Return type : Integer or undef + Exceptions : Throws if the given argument is not a transcript. + Caller : General + Status : Stable + +=cut + +sub cdna_coding_end { + my ($self, $transcript) = @_; + assert_ref($transcript, 'Bio::EnsEMBL::Transcript', 'transcript'); + + my $id = $transcript->dbID(); + + if(defined $id && exists $self->{cdna_coding_end}->{$id}) { + return $self->{cdna_coding_end}->{$id}; + } + + my $cdna_coding_end; + my $transcript_coding_end = $transcript->cdna_coding_end(); + if(defined $transcript_coding_end) { + my $cdna_end = $self->cdna_end($transcript); + + if ( $transcript_coding_end > $cdna_end ) { + + # Coding region ends downstream of this exon... + if ( $transcript->cdna_coding_start() > $cdna_end ) { + # ... and also starts downstream of this exon. + $cdna_coding_end = undef; + } + else { + # ... and does not start downstream of this exon. + $cdna_coding_end = $cdna_end; + } + } + else { + # Coding region ends either within or upstream of this + # exon. + + if ( $transcript_coding_end >= $self->cdna_start($transcript) ) { + # Coding region ends within this exon. + $cdna_coding_end = $transcript_coding_end; + } + else { + # Coding region ends upstream of this exon. + $cdna_coding_end = undef; + } + } + } + else { + $cdna_coding_end = undef; + } + + if(defined $id) { + $self->{cdna_coding_end}->{$id} = $cdna_coding_end; + $self->{cdna_coding_start}->{$id} = undef if ! defined $cdna_coding_end; + } + + return $cdna_coding_end; +} ## end sub cdna_coding_end + +=head2 coding_region_start + + Arg [1] : Bio::EnsEMBL::Transcript $transcript + Example : $coding_region_start = + $exon->coding_region_start($transcript); + Description : Returns the start position of the coding region + of the exon in slice-relative coordinates on the + forward strand. Returns undef if the whole exon is + non-coding. + Since an exon may be part of one or more transcripts, + the relevant transcript must be given as argument to + this method. + Return type : Integer or undef + Exceptions : Throws if the given argument is not a transcript. + Caller : General + Status : Stable + +=cut + +# The implementation of this method is analogous to the implementation +# of cdna_coding_start(). + +sub coding_region_start { + my ($self, $transcript) = @_; + assert_ref($transcript, 'Bio::EnsEMBL::Transcript', 'transcript'); + + my $id = $transcript->dbID(); + + if(defined $id && exists $self->{coding_region_start}->{$id}) { + return $self->{coding_region_start}->{$id}; + } + + my $coding_region_start; + my $transcript_coding_start = $transcript->coding_region_start(); + if(defined $transcript_coding_start) { + my $start = $self->start(); + + if ( $transcript_coding_start < $start ) { + # Coding region starts upstream of this exon... + + if ( $transcript->coding_region_end() < $start ) { + # ... and also ends upstream of this exon. + $coding_region_start = undef; + } + else { + # ... and does not end upstream of this exon. + $coding_region_start = $start; + } + } + else { + # Coding region starts either within or downstream of this + # exon. + + if ( $transcript_coding_start <= $self->end() ) { + # Coding region starts within this exon. + $coding_region_start = $transcript_coding_start; + } + else { + # Coding region starts downstream of this exon. + $coding_region_start = undef; + } + } + } + else { + $coding_region_start = undef; + } + + if(defined $id) { + $self->{coding_region_start}->{$id} = $coding_region_start; + $self->{coding_region_end}->{$id} = undef if ! defined $coding_region_start; + } + + return $coding_region_start; +} ## end sub coding_region_start + +=head2 coding_region_end + + Arg [1] : Bio::EnsEMBL::Transcript $transcript + Example : $coding_region_end = + $exon->coding_region_end($transcript); + Description : Returns the end position of the coding region of + the exon in slice-relative coordinates on the + forward strand. Returns undef if the whole exon is + non-coding. + Since an exon may be part of one or more transcripts, + the relevant transcript must be given as argument to + this method. + Return type : Integer or undef + Exceptions : Throws if the given argument is not a transcript. + Caller : General + Status : Stable + +=cut + +# The implementation of this method is analogous to the implementation +# of cdna_coding_end(). + +sub coding_region_end { + my ($self, $transcript) = @_; + assert_ref($transcript, 'Bio::EnsEMBL::Transcript', 'transcript'); + + my $id = $transcript->dbID(); + + if(defined $id && exists $self->{coding_region_end}->{$id}) { + return $self->{coding_region_end}->{$id}; + } + + my $coding_region_end; + my $transcript_coding_end = $transcript->coding_region_end(); + if(defined $transcript_coding_end) { + + my $end = $self->end(); + if($transcript_coding_end > $end) { + # Coding region ends downstream of this exon... + + if ( $transcript->coding_region_start() > $end ) { + # ... and also starts downstream of this exon. + $coding_region_end = undef; + } + else { + # ... and does not start downstream of this exon. + $coding_region_end = $end; + } + } + else { + # Coding region ends either within or upstream of this + # exon. + if ( $transcript_coding_end >= $self->start() ) { + $coding_region_end = $transcript_coding_end; + } + else { + $coding_region_end = undef; + } + } + } + else { + # This is a non-coding transcript. + $coding_region_end = undef; + } + + if(defined $id) { + $self->{coding_region_end}->{$id} = $coding_region_end; + $self->{coding_region_start}->{$id} = undef if ! defined $coding_region_end; + } + + return $coding_region_end; +} ## end sub coding_region_end + +=head2 slice + + Arg [1] : Bio::EnsEMBL::Slice + Example : $slice = $exon->slice(); + Description: Getter/Setter for the slice this exon is on. The superclass + implmentation is overridden to flush the internal sequence + cache if this value is altered + Returntype : Bio::EnsEMBL::Slice + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub slice { + my ( $self, $slice ) = @_; + + if ( defined($slice) ) { + # If a new slice was provided, flush the internal sequence cache and + # transfer all supporting evidence to the new slice. + + delete $self->{'_seq_cache'}; + + if ( exists( $self->{'_supporting_evidence'} ) ) { + my @new_features; + + for my $old_feature ( @{ $self->{'_supporting_evidence'} } ) { + + my $new_feature; + + if ( defined( $old_feature->slice() ) ) { + $new_feature = $old_feature->transfer($slice); + } else { + # If the old feature does not have a slice, assume transfer is + # not necessary. + $new_feature = $old_feature; + } + + push( @new_features, $new_feature ); + } + + $self->{'_supporting_evidence'} = \@new_features; + } + + return $self->SUPER::slice($slice); + } elsif ( @_ > 1 ) { + return $self->SUPER::slice(undef); + } else { + return $self->SUPER::slice(); + } +} ## end sub slice + +=head2 equals + + Arg [1] : Bio::EnsEMBL::Exon exon + Example : if ($exonA->equals($exonB)) { ... } + Description : Compares two exons for equality. + The test for eqality goes through the following list + and terminates at the first true match: + + 1. If Bio::EnsEMBL::Feature::equals() returns false, + then the exons are *not* equal. + 2. If both exons have stable IDs: if these are the + same, the exons are equal, otherwise not. + 3. If the exons have the same start, end, strand, and + phase, then they are equal, otherwise not. + + Return type : Boolean (0, 1) + + Exceptions : Thrown if a non-transcript is passed as the argument. + +=cut + +sub equals { + my ( $self, $exon ) = @_; + + if ( !defined($exon) ) { return 0 } + if ( $self eq $exon ) { return 1 } + + assert_ref( $exon, 'Bio::EnsEMBL::Exon' ); + + my $feature_equals = $self->SUPER::equals($exon); + if ( defined($feature_equals) && $feature_equals == 0 ) { + return 0; + } + + if ( defined( $self->stable_id() ) && defined( $exon->stable_id() ) ) + { + if ( $self->stable_id() eq $exon->stable_id() ) { + return 1; + } + else { + return 0; + } + } + + if ( $self->start() == $exon->start() && + $self->end() == $exon->end() && + $self->strand() == $exon->strand() && + $self->phase() == $exon->phase() && + $self->end_phase() == $exon->end_phase() ) + { + return 1; + } + + return 0; +} ## end sub equals + +=head2 move + + Arg [1] : int start + Arg [2] : int end + Arg [3] : (optional) int strand + Example : None + Description: Sets the start, end and strand in one call rather than in + 3 seperate calls to the start(), end() and strand() methods. + This is for convenience and for speed when this needs to be + done within a tight loop. This overrides the superclass + move() method so that the internal sequence cache can be + flushed if the exon if moved. + Returntype : none + Exceptions : Thrown is invalid arguments are provided + Caller : general + Status : Stable + +=cut + +sub move { + my $self = shift; + # flush the internal sequence cache + delete $self->{'_seq_cache'}; + return $self->SUPER::move(@_); +} + + +=head2 transform + + Arg 1 : String $coordinate_system_name + Arg [2] : String $coordinate_system_version + Description: moves this exon to the given coordinate system. If this exon has + attached supporting evidence, they move as well. + Returntype : Bio::EnsEMBL::Exon + Exceptions : wrong parameters + Caller : general + Status : Stable + +=cut + +sub transform { + my $self = shift; + + # catch for old style transform calls + if( !@_ || ( ref $_[0] && + ($_[0]->isa( "Bio::EnsEMBL::Slice" ) or $_[0]->isa( "Bio::EnsEMBL::LRGSlice" )) + )) { + deprecate('Calling transform without a coord system name is deprecated.'); + return $self->_deprecated_transform(@_); + } + + my $new_exon = $self->SUPER::transform( @_ ); + if (not defined $new_exon or + $new_exon->length != $self->length) { + return undef; + } + + if( exists $self->{'_supporting_evidence'} ) { + my @new_features; + for my $old_feature ( @{$self->{'_supporting_evidence'}} ) { + my $new_feature = $old_feature->transform( @_ ); + if (defined $new_feature) { + push( @new_features, $new_feature ); + } + } + $new_exon->{'_supporting_evidence'} = \@new_features; + } + + #dont want to share the same sequence cache + delete $new_exon->{'_seq_cache'}; + + return $new_exon; +} + + +=head2 transfer + + Arg [1] : Bio::EnsEMBL::Slice $destination_slice + Example : none + Description: Moves this Exon to given target slice coordinates. If Features + are attached they are moved as well. Returns a new exon. + Returntype : Bio::EnsEMBL::Gene + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub transfer { + my $self = shift; + + my $new_exon = $self->SUPER::transfer( @_ ); + return undef unless $new_exon; + + if( exists $self->{'_supporting_evidence'} ) { + my @new_features; + for my $old_feature ( @{$self->{'_supporting_evidence'}} ) { + my $new_feature = $old_feature->transfer( @_ ); + push( @new_features, $new_feature ); + } + $new_exon->{'_supporting_evidence'} = \@new_features; + } + + #dont want to share the same sequence cache + delete $new_exon->{'_seq_cache'}; + + return $new_exon; +} + + +=head2 add_supporting_features + + Arg [1] : Bio::EnsEMBL::Feature $feature + Example : $exon->add_supporting_features(@features); + Description: Adds a list of supporting features to this exon. + Duplicate features are not added. + If supporting features are added manually in this + way, prior to calling get_all_supporting_features then the + get_all_supporting_features call will not retrieve supporting + features from the database. + Returntype : none + Exceptions : throw if any of the features are not Feature + throw if any of the features are not in the same coordinate + system as the exon + Caller : general + Status : Stable + +=cut + +sub add_supporting_features { + my ($self,@features) = @_; + + return unless @features; + + $self->{_supporting_evidence} ||= []; + + # check whether this feature object has been added already + FEATURE: foreach my $feature (@features) { + unless($feature && $feature->isa("Bio::EnsEMBL::Feature")) { + throw("Supporting feat [$feature] not a " . + "Bio::EnsEMBL::Feature"); + } + + if ((defined $self->slice() && defined $feature->slice())&& + ( $self->slice()->name() ne $feature->slice()->name())){ + throw("Supporting feat not in same coord system as exon\n" . + "exon is attached to [".$self->slice()->name()."]\n" . + "feat is attached to [".$feature->slice()->name()."]"); + } + + foreach my $added_feature ( @{ $self->{_supporting_evidence} } ){ + # compare objects + if ( $feature == $added_feature ){ + # this feature has already been added + next FEATURE; + } + } + + # no duplicate was found, add the feature + push(@{$self->{_supporting_evidence}},$feature); + } +} + + +=head2 flush_supporting_features + + Example : $exon->flush_supporting_features; + Description : Removes all supporting evidence from the exon. + Return type : (Empty) listref + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub flush_supporting_features { + my $self = shift; + $self->{'_supporting_evidence'} = []; +} + + +=head2 get_all_supporting_features + + Arg [1] : none + Example : @evidence = @{$exon->get_all_supporting_features()}; + Description: Retreives any supporting features added manually by + calls to add_supporting_features. If no features have been + added manually and this exon is in a database (i.e. it h + Returntype : listreference of Bio::EnsEMBL::BaseAlignFeature objects + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub get_all_supporting_features { + my $self = shift; + + if( !exists $self->{_supporting_evidence} ) { + if($self->adaptor) { + my $sfa = $self->adaptor->db->get_SupportingFeatureAdaptor(); + $self->{_supporting_evidence} = $sfa->fetch_all_by_Exon($self); + } + } + + return $self->{_supporting_evidence} || []; +} + + +=head2 find_supporting_evidence + +# This method is only for genebuild backwards compatibility. +# Avoid using it if possible + + Arg [1] : Bio::EnsEMBL::Feature $features + The list of features to search for supporting (i.e. overlapping) + evidence. + Arg [2] : (optional) boolean $sorted + Used to speed up the calculation of overlapping features. + Should be set to true if the list of features is sorted in + ascending order on their start coordinates. + Example : $exon->find_supporting_evidence(\@features); + Description: Looks through all the similarity features and + stores as supporting features any feature + that overlaps with an exon. + Returntype : none + Exceptions : none + Caller : general + Status : Medium Risk + +=cut + +sub find_supporting_evidence { + my ($self,$features,$sorted) = @_; + + foreach my $f (@$features) { + # return if we have a sorted feature array + if ($sorted == 1 && $f->start > $self->end) { + return; + } + if ($f->sub_SeqFeature) { + my @subf = $f->sub_SeqFeature; + + $self->find_supporting_evidence(\@subf); + } + else { + if ($f->entire_seq()->name eq $self->slice()->name) { + if ($f->end >= $self->start && $f->start <= $self->end && $f->strand == $self->strand) { + $self->add_supporting_features($f); + } + } + } + } +} + + +=head2 stable_id + + Arg [1] : string $stable_id + Example : none + Description: get/set for attribute stable_id + Returntype : string + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub stable_id { + my $self = shift; + $self->{'stable_id'} = shift if( @_ ); + return $self->{'stable_id'}; +} + + +=head2 created_date + + Arg [1] : string $created_date + Example : none + Description: get/set for attribute created_date + Returntype : string + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub created_date { + my $self = shift; + $self->{'created_date'} = shift if ( @_ ); + return $self->{'created_date'}; +} + + +=head2 modified_date + + Arg [1] : string $modified_date + Example : none + Description: get/set for attribute modified_date + Returntype : string + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub modified_date { + my $self = shift; + $self->{'modified_date'} = shift if ( @_ ); + return $self->{'modified_date'}; +} + + +=head2 version + + Arg [1] : string $version + Example : none + Description: get/set for attribute version + Returntype : string + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub version { + my $self = shift; + $self->{'version'} = shift if( @_ ); + return $self->{'version'}; +} + + +=head2 is_current + + Arg [1] : Boolean $is_current + Example : $exon->is_current(1) + Description: Getter/setter for is_current state of this exon. + Returntype : Int + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub is_current { + my ( $self, $value ) = @_; + + if ( defined($value) ) { + $self->{'is_current'} = $value; + } + return $self->{'is_current'}; +} + +=head2 is_constitutive + + Arg [1] : Boolean $is_constitutive + Example : $exon->is_constitutive(0) + Description: Getter/setter for is_constitutive state of this exon. + Returntype : Int + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub is_constitutive { + my ( $self, $value ) = @_; + + if ( defined($value) ) { + $self->{'is_constitutive'} = $value; + } + return $self->{'is_constitutive'}; +} + + +=head2 adjust_start_end + + Arg 1 : int $start_adjustment + Arg 2 : int $end_adjustment + Example : none + Description: returns a new Exon with this much shifted coordinates + Returntype : Bio::EnsEMBL::Exon + Exceptions : none + Caller : Transcript->get_all_translateable_Exons() + Status : Stable + +=cut + +sub adjust_start_end { + my ( $self, $start_adjust, $end_adjust ) = @_; + + my $new_exon = Bio::EnsEMBL::Exon->new(); + %{$new_exon} = %{$self}; + + #invalidate the sequence cache + delete $new_exon->{'_seq_cache'}; + + if( $self->strand() == 1 ) { + $new_exon->start( $self->start() + $start_adjust ); + $new_exon->end( $self->end() + $end_adjust ) + } else { + $new_exon->start( $self->start() - $end_adjust ); + $new_exon->end( $self->end() - $start_adjust ) + } + + return $new_exon; +} + + +=head2 peptide + + Arg [1] : Bio::EnsEMBL::Transcript $tr + Example : my $pep_str = $exon->peptide($transcript)->seq; + Description: Retrieves the portion of the transcripts peptide + encoded by this exon. The transcript argument is necessary + because outside of the context of a transcript it is not + possible to correctly determine the translation. Note that + an entire amino acid will be present at the exon boundaries + even if only a partial codon is present. Therefore the + concatenation of all of the peptides of a transcripts exons + is not the same as a transcripts translation because the + summation may contain duplicated amino acids at splice sites. + In the case that this exon is entirely UTR, a Bio::Seq object + with an empty sequence string is returned. + Returntype : Bio::Seq + Exceptions : thrown if transcript argument is not provided + Caller : general + Status : Stable + +=cut + +sub peptide { + my $self = shift; + my $tr = shift; + + unless($tr && ref($tr) && $tr->isa('Bio::EnsEMBL::Transcript')) { + throw("transcript arg must be Bio::EnsEMBL:::Transcript not [$tr]"); + } + + #convert exons coordinates to peptide coordinates + my $tmp_exon = $self->transfer($tr->slice); + if (!$tmp_exon) { + throw("Couldn't transfer exon to transcript's slice"); + } + + my @coords = + $tr->genomic2pep($tmp_exon->start, $tmp_exon->end, $tmp_exon->strand); + + #filter out gaps + @coords = grep {$_->isa('Bio::EnsEMBL::Mapper::Coordinate')} @coords; + + #if this is UTR then the peptide will be empty string + my $pep_str = ''; + + + if(scalar(@coords) > 1) { + my $coord = $self->_merge_ajoining_coords(\@coords); + if($coord) { + @coords = ($coord); + } + else { + my ($e_id, $tr_id) = ($self->stable_id(), $tr->stable_id()); + throw("Error. Exon maps to multiple locations in peptide and those". + " locations are not continuous." . + " Is this exon [$e_id] a member of this transcript [$tr_id]?"); + } + } + elsif(scalar(@coords) == 1) { + my $c = $coords[0]; + my $pep = $tr->translate; + + #bioperl doesn't give back residues for incomplete codons + #make sure we don't subseq too far... + my ($start, $end); + $end = ($c->end > $pep->length) ? $pep->length : $c->end; + $start = ($c->start < $end) ? $c->start : $end; + $pep_str = $tr->translate->subseq($start, $end); + } + + return + Bio::Seq->new( -seq => $pep_str, + -moltype => 'protein', + -alphabet => 'protein', + -id => $self->display_id ); +} + +=head2 _merge_ajoining_coords + + Arg [1] : ArrayRef of Bio::EnsEMBL::Mapper::Coordinate objects + Example : + Description : Merges coords which are ajoining or overlapping + Returntype : Bio::EnsEMBL::Mapper::Coordinate or undef if it cannot happen + Exceptions : Exception if the cooords cannot be condensed into one location + Caller : internal + Status : Development + +=cut + +sub _merge_ajoining_coords { + my ($self, $coords) = @_; + + my $okay = 1; + my $coord = shift @{$coords}; + my $start = $coord->start(); + my $last_end = $coord->end(); + foreach my $other_coord (@{$coords}) { + if( ($last_end + 1) >= $other_coord->start() ) { + $last_end = $other_coord->end(); + } + else { + $okay = 0; + last; + } + } + + if(!$okay) { + return; + } + + my $new_coord = Bio::EnsEMBL::Mapper::Coordinate->new( + $coord->id(), $start, $last_end, $coord->strand(), $coord->rank()); + return $new_coord; +} + + + + +=head2 seq + + Arg [1] : none + Example : my $seq_str = $exon->seq->seq; + Description: Retrieves the dna sequence of this Exon. + Returned in a Bio::Seq object. Note that the sequence may + include UTRs (or even be entirely UTR). + Returntype : Bio::Seq or undef + Exceptions : warning if argument passed, + warning if exon does not have attatched slice + warning if exon strand is not defined (or 0) + Caller : general + Status : Stable + +=cut + +sub seq { + my ( $self, $arg ) = @_; + + if ( defined $arg ) { + warning("seq setting on Exon not supported currently"); + $self->{'_seq_cache'} = $arg->seq(); + } + + if ( !defined( $self->{'_seq_cache'} ) ) { + my $seq; + + if ( !defined $self->slice() ) { + warning("Cannot retrieve seq for exon without slice\n"); + return undef; + } + + if ( !$self->strand() ) { + warning("Cannot retrieve seq for unstranded exon\n"); + return undef; + } + + if ($self->slice->is_circular() ) { + if ( $self->slice->start > $self->slice->end) { +# Normally exons overlapping chromosome origin will have negative feature start, but slice will be from 1 .. length +# But in case you got an exon attached to a sub slice try this + my $mid_point = $self->slice()->seq_region_length() - $self->slice()->start() + 1; + my $seq1 = $self->slice()->subseq( $self->start(), $mid_point, $self->strand() ); + + my $seq2 = $self->slice()->subseq( $mid_point + 1, $self->end(), $self->strand() ); + + $seq = $self->strand() > 0 ? "$seq1$seq2" : "$seq2$seq1"; + } elsif ( $self->start < 0 || $self->start > $self->end) { +# Normally exons overlapping chromosome origin will be 0 based, and can have negative start +# But if you go via sub_Slice it gives you chromosome based coordinates, i.e it will have start greater then end + my $start_point = $self->slice->seq_region_length + $self->slice->start; + my $mid_point = $self->slice->seq_region_length; + my $seq1 = $self->slice->subseq( $self->start, $mid_point, $self->strand); + my $seq2 = $self->slice->subseq(1, $self->end, $self->strand ); + $seq = $self->strand > 0 ? "$seq1$seq2" : "$seq2$seq1"; + } else { +# End this is the case for genes not overlapping the origin + $seq = $self->slice()->subseq( $self->start(), $self->end(), $self->strand() ); + } + } else { + $seq = $self->slice()->subseq( $self->start(), $self->end(), $self->strand() ); + } + + $self->{'_seq_cache'} = $seq; + } ## end if ( !defined( $self->...)) + + return + Bio::Seq->new( -seq => $self->{'_seq_cache'}, + -id => $self->display_id, + -moltype => 'dna', + -alphabet => 'dna' ); +} ## end sub seq + + +=head2 hashkey + + Arg [1] : none + Example : if(exists $hash{$exon->hashkey}) { do_something(); } + Description: Returns a unique hashkey that can be used to uniquely identify + this exon. Exons are considered to be identical if they share + the same seq_region, start, end, strand, phase, end_phase. + Note that this will consider two exons on different slices + to be different, even if they actually are not. + Returntype : string formatted as slice_name-start-end-strand-phase-end_phase + Exceptions : thrown if not all the necessary attributes needed to generate + a unique hash value are set + set + Caller : general + Status : Stable + +=cut + +sub hashkey { + my $self = shift; + + my $slice = $self->{'slice'}; + my $slice_name = ($slice) ? $slice->name() : undef; + my $start = $self->{'start'}; + my $end = $self->{'end'}; + my $strand = $self->{'strand'}; + my $phase = $self->{'phase'}; + my $end_phase = $self->{'end_phase'}; + + if(!defined($slice_name)) { + throw('Slice must be set to generate correct hashkey.'); + } + + if(!defined($start)) { + warning("start attribute must be defined to generate correct hashkey."); + } + + if(!defined($end)) { + throw("end attribute must be defined to generate correct hashkey."); + } + + if(!defined($strand)) { + throw("strand attribute must be defined to generate correct hashkey."); + } + + if(!defined($phase)) { + throw("phase attribute must be defined to generate correct hashkey."); + } + + if(!defined($end_phase)) { + throw("end_phase attribute must be defined to generate correct hashkey."); + } + + return "$slice_name-$start-$end-$strand-$phase-$end_phase"; +} + + +=head2 display_id + + Arg [1] : none + Example : print $exons->display_id(); + Description: This method returns a string that is considered to be + the 'display' identifier. For exons this is (depending on + availability and in this order) the stable Id, the dbID or an + empty string. + Returntype : string + Exceptions : none + Caller : web drawing code + Status : Stable + +=cut + +sub display_id { + my $self = shift; + return $self->{'stable_id'} || $self->dbID || ''; +} + + +=head2 load + + Args : None + Example : $exon->load(); + Description : The Ensembl API makes extensive use of + lazy-loading. Under some circumstances (e.g., + when copying genes between databases), all data of + an object needs to be fully loaded. This method + loads the parts of the object that are usually + lazy-loaded. + Returns : Nothing. + +=cut + +sub load { + my ($self) = @_; + + $self->analysis(); + $self->stable_id(); + $self->get_all_supporting_features(); +} + +=head1 DEPRECATED METHODS + +=cut + + +=head2 _get_stable_entry_info + + Description: DEPRECATED. + +=cut + +sub _get_stable_entry_info { + my $self = shift; + deprecate( "This function shouldnt be called any more" ); + if( !defined $self->adaptor ) { + return undef; + } + $self->adaptor->get_stable_entry_info($self); +} + + +=head2 temporary_id + + Description: DEPRECATED. This should not be necessary. + +=cut + +sub temporary_id { + my $self = shift; + deprecate('It should not be necessary to use this method.'); + $self->{'tempID'} = shift if(@_); + return $self->{'tempID'}; +} + + +=head2 created + + Description: DEPRECATED. Do not use. + +=cut + +sub created { + my ($self,$value) = @_; + deprecate( "Created attribute not supported any more." ); + if(defined $value ) { + $self->{'_created'} = $value; + } + return $self->{'_created'}; +} + +=head2 modified + + Description: DEPRECATED. Do not use. + +=cut + + +sub modified { + my ($self,$value) = @_; + deprecate( "Modified attribute not supported any more." ); + if( defined $value ) { + $self->{'_modified'} = $value; + } + return $self->{'_modified'}; +} + + +=head2 type + + Description: DEPRECATED. Do not use. + +=cut + +sub type { + my ($self,$value) = @_; + deprecate("Type attribute not supported anymore."); + if (defined($value)) { + $self->{'type'} = $value; + } + return $self->{'type'}; +} + + +1;