diff variant_effect_predictor/Bio/EnsEMBL/Feature.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/EnsEMBL/Feature.pm	Thu Apr 11 06:29:17 2013 -0400
@@ -0,0 +1,1582 @@
+=head1 LICENSE
+
+  Copyright (c) 1999-2012 The European Bioinformatics Institute and
+  Genome Research Limited.  All rights reserved.
+
+  This software is distributed under a modified Apache license.
+  For license details, please see
+
+    http://www.ensembl.org/info/about/code_licence.html
+
+=head1 CONTACT
+
+  Please email comments or questions to the public Ensembl
+  developers list at <dev@ensembl.org>.
+
+  Questions may also be sent to the Ensembl help desk at
+  <helpdesk@ensembl.org>.
+
+=cut
+
+=head1 NAME
+
+Bio::EnsEMBL::Feature - Ensembl specific sequence feature.
+
+=head1 SYNOPSIS
+
+    my $feat = new Bio::EnsEMBL::Feature(
+      -start  => 100,
+      -end    => 220,
+      -strand => -1,
+      -slice  => $slice -analysis => $analysis
+    );
+
+    my $start  = $feat->start();
+    my $end    = $feat->end();
+    my $strand = $feat->strand();
+
+    # Move the feature to the chromosomal coordinate system
+    $feature = $feature->transform('chromosome');
+
+    # Move the feature to a different slice (possibly on another coord
+    # system)
+    $feature = $feature->transfer($new_slice);
+
+    # Project the feature onto another coordinate system possibly across
+    # boundaries:
+    @projection = @{ $feature->project('contig') };
+
+    # Change the start, end, and strand of the feature in place
+    $feature->move( $new_start, $new_end, $new_strand );
+
+=head1 DESCRIPTION
+
+This is the Base feature class from which all Ensembl features inherit.
+It provides a bare minimum functionality that all features require.  It
+basically describes a location on a sequence in an arbitrary coordinate
+system.
+
+=head1 METHODS
+
+=cut
+
+
+package Bio::EnsEMBL::Feature;
+
+use strict;
+use warnings;
+
+use Bio::EnsEMBL::Storable;
+use Bio::EnsEMBL::Utils::Argument qw(rearrange);
+use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning);
+use Bio::EnsEMBL::Utils::Scalar qw(check_ref assert_ref);
+use Bio::EnsEMBL::Slice;
+use Bio::EnsEMBL::StrainSlice;
+use vars qw(@ISA);
+
+use Scalar::Util qw(weaken isweak);
+
+@ISA = qw(Bio::EnsEMBL::Storable);
+
+
+=head2 new
+
+  Arg [-SLICE]: Bio::EnsEMBL::SLice - Represents the sequence that this
+                feature is on. The coordinates of the created feature are
+                relative to the start of the slice.
+  Arg [-START]: The start coordinate of this feature relative to the start
+                of the slice it is sitting on.  Coordinates start at 1 and
+                are inclusive.
+  Arg [-END]  : The end coordinate of this feature relative to the start of
+                the slice it is sitting on.  Coordinates start at 1 and are
+                inclusive.
+  Arg [-STRAND]: The orientation of this feature.  Valid values are 1,-1,0.
+  Arg [-SEQNAME] : A seqname to be used instead of the default name of the 
+                of the slice.  Useful for features that do not have an 
+                attached slice such as protein features.
+  Arg [-dbID]   : (optional) internal database id
+  Arg [-ADAPTOR]: (optional) Bio::EnsEMBL::DBSQL::BaseAdaptor
+  Example    : $feature = Bio::EnsEMBL::Feature->new(-start    => 1, 
+                                                     -end      => 100,
+                                                     -strand   => 1,
+                                                     -slice    => $slice,
+                                                     -analysis => $analysis);
+  Description: Constructs a new Bio::EnsEMBL::Feature.  Generally subclasses
+               of this method are instantiated, rather than this class itself.
+  Returntype : Bio::EnsEMBL::Feature
+  Exceptions : Thrown on invalid -SLICE, -ANALYSIS, -STRAND ,-ADAPTOR arguments
+  Caller     : general, subclass constructors
+  Status     : Stable
+
+=cut
+
+
+sub new {
+  my $caller = shift;
+
+  my $class = ref($caller) || $caller;
+  my ( $start, $end, $strand, $slice, $analysis,$seqname, $dbID, $adaptor ) =
+      rearrange(['START','END','STRAND','SLICE','ANALYSIS', 'SEQNAME',
+		 'DBID', 'ADAPTOR'], @_);   
+  if($slice) {
+    if(!ref($slice) || !($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
+      throw('-SLICE argument must be a Bio::EnsEMBL::Slice not '.$slice);
+    }
+  }
+
+  if($analysis) {
+    if(!ref($analysis) || !$analysis->isa('Bio::EnsEMBL::Analysis')) {
+      throw('-ANALYSIS argument must be a Bio::EnsEMBL::Analysis not '.
+            $analysis);
+    }
+  }
+
+  if(defined($strand)) {
+    if(!($strand == 1) && !($strand == -1) && !($strand == 0)) {
+      throw('-STRAND argument must be 1, -1, or 0');
+    }
+  }
+
+  if(defined($start) && defined($end)) {
+      if (($start =~ /\d+/) && ($end =~ /\d+/)) {
+	  if($end+1 < $start) {
+	      throw(sprintf('Start (%d) must be less than or equal to end+1 (%d)', $start, ($end+1)));
+	  }
+      } else {
+	      throw('Start and end must be integers');
+      }
+  }
+
+  my $self =  bless({'start'    => $start,
+                'end'      => $end,
+                'strand'   => $strand,
+                'slice'    => $slice,
+                'analysis' => $analysis,
+                'seqname'  => $seqname,
+                'dbID'     => $dbID}, $class);
+
+  $self->adaptor($adaptor);
+  return $self;
+}
+
+
+=head2 new_fast
+
+  Arg [1]    : hashref to be blessed
+  Description: Construct a new Bio::EnsEMBL::Feature using the hashref.
+  Exceptions : none
+  Returntype : Bio::EnsEMBL::Feature
+  Caller     : general, subclass constructors
+  Status     : Stable
+
+=cut
+
+
+sub new_fast {
+  my $class = shift;
+  my $hashref = shift;
+  my $self = bless $hashref, $class;
+  weaken($self->{adaptor})  if ( ! isweak($self->{adaptor}) );
+  return $self;
+}
+
+=head2 start
+
+  Arg [1]    : (optional) int $start
+               The start of this feature relative to the start of the slice
+               that it is on.
+  Example    : $start = $feat->start()
+  Description: Getter/Setter for the start of this feature relative to the 
+               start of the slice it is on.  Note that negative values, or
+               values exceeding the length of the slice are permitted.
+               Start must be less than or equal to the end regardless of the 
+               strand. Coordinate values start at 1 and are inclusive.
+  Returntype : int
+  Exceptions : none
+  Caller     : general
+  Status     : Stable
+
+=cut
+
+sub start {
+  my ( $self, $value ) = @_;
+
+  if ( defined($value) ) {
+    $self->{'start'} = $value;
+  }
+
+  return $self->{'start'};
+}
+
+
+
+=head2 end
+
+  Arg [1]    : (optional) int $end
+  Example    : $end = $feat->end();
+  Description: Getter/Setter for the end of this feature relative to the
+               start of the slice that it is on.  Note that negative values,
+               of values exceeding the length of the slice are permitted.  End
+               must be greater than or equal to start regardless of the strand.
+               Coordinate values start at 1 and are inclusive.
+  Returntype : int
+  Exceptions : none
+  Caller     : general
+  Status     : Stable
+
+=cut
+
+sub end {
+  my ( $self, $value ) = @_;
+
+  if ( defined($value) ) {
+    $self->{'end'} = $value;
+  }
+
+  return $self->{'end'};
+}
+
+
+
+
+=head2 strand
+
+  Arg [1]    : (optional) int $strand
+  Example    : $feat->strand(-1);
+  Description: Getter/Setter for the strand of this feature relative to the
+               slice it is on.  0 is an unknown or non-applicable strand.  
+               -1 is the reverse (negative) strand and 1 is the forward 
+               (positive) strand.  No other values are permitted.
+  Returntype : int
+  Exceptions : thrown if an invalid strand argument is passed
+  Caller     : general
+  Status     : Stable
+
+=cut
+
+sub strand {
+  my ( $self, $strand ) = @_;
+
+  if ( defined($strand) ) {
+    if ( $strand != 0 && $strand != 1 && $strand != -1 ) {
+      throw('strand argument must be 0, -1 or 1');
+    }
+
+    $self->{'strand'} = $strand;
+  }
+
+  return $self->{'strand'};
+}
+
+=head2 move
+
+  Arg [1]    : int start
+  Arg [2]    : int end
+  Arg [3]    : (optional) int strand
+  Example    : None
+  Description: Sets the start, end and strand in one call rather than in 
+               3 seperate calls to the start(), end() and strand() methods.
+               This is for convenience and for speed when this needs to be
+               done within a tight loop.
+  Returntype : none
+  Exceptions : Thrown is invalid arguments are provided
+  Caller     : general
+  Status     : Stable
+
+=cut
+
+sub move {
+  my $self = shift;
+
+  throw('start and end arguments are required') if(@_ < 2);
+
+  my $start  = shift;
+  my $end    = shift;
+  my $strand = shift;
+
+  if(defined($start) && defined($end) && $end < $start) {
+    throw('start must be less than or equal to end');
+  }
+  if(defined($strand) && $strand != 0 && $strand != -1 && $strand != 1) {
+    throw('strand must be 0, -1 or 1');
+  }
+
+  $self->{'start'} = $start;
+  $self->{'end'} = $end;
+  $self->{'strand'} = $strand if(defined($strand));
+}
+
+
+
+=head2 length
+
+  Arg [1]    : none
+  Example    : $length = $feat->length();
+  Description: Returns the length of this feature
+  Returntype : Integer
+  Exceptions : Throws if end < start and the feature is not on a
+               circular slice
+  Caller     : general
+  Status     : Stable
+
+=cut
+
+sub length {
+  my ($self) = @_;
+
+  if ( $self->{'end'} < $self->{'start'} ) {
+    # if circular, we can work out the length of an origin-spanning
+    # feature using the size of the underlying region.
+    if ( $self->slice() && $self->slice()->is_circular() ) {
+      my $len =
+        $self->slice()->seq_region_length() -
+        ( $self->{'start'} - $self->{'end'} ) + 1;
+      return $len;
+    } else {
+      throw(   "Cannot determine length of non-circular feature "
+             . "where start > end" );
+    }
+  }
+
+  return $self->{'end'} - $self->{'start'} + 1;
+}
+
+=head2 analysis
+
+  Arg [1]    : (optional) Bio::EnsEMBL::Analysis $analysis
+  Example    : $feature->analysis(new Bio::EnsEMBL::Analysis(...))
+  Description: Getter/Setter for the analysis that is associated with 
+               this feature.  The analysis describes how this feature 
+               was derived.
+  Returntype : Bio::EnsEMBL::Analysis
+  Exceptions : thrown if an invalid argument is passed
+  Caller     : general
+  Status     : Stable
+
+=cut
+
+sub analysis {
+  my $self = shift;
+
+  if(@_) {
+    my $an = shift;
+    if(defined($an) && (!ref($an) || !$an->isa('Bio::EnsEMBL::Analysis'))) {
+      throw('analysis argument must be a Bio::EnsEMBL::Analysis');
+    }
+    $self->{'analysis'} = $an;
+  }
+
+  return $self->{'analysis'};
+}
+
+
+
+=head2 slice
+
+  Arg [1]    : (optional) Bio::EnsEMBL::Slice $slice
+  Example    : $seqname = $feature->slice()->name();
+  Description: Getter/Setter for the Slice that is associated with this 
+               feature.  The slice represents the underlying sequence that this
+               feature is on.  Note that this method call is analagous to the
+               old SeqFeature methods contig(), entire_seq(), attach_seq(),
+               etc.
+  Returntype : Bio::EnsEMBL::Slice
+  Exceptions : thrown if an invalid argument is passed
+  Caller     : general
+  Status     : Stable
+
+=cut
+
+sub slice {
+  my ( $self, $slice ) = @_;
+
+  if ( defined($slice) ) {
+    if (    !check_ref( $slice, 'Bio::EnsEMBL::Slice' )
+         && !check_ref( $slice, 'Bio::EnsEMBL::LRGSlice' ) )
+    {
+      throw('slice argument must be a Bio::EnsEMBL::Slice');
+    }
+
+    $self->{'slice'} = $slice;
+  } elsif ( @_ > 1 ) {
+    delete($self->{'slice'});
+  }
+
+  return $self->{'slice'};
+}
+
+=head2 equals
+
+  Arg [1]       : Bio::EnsEMBL::Feature object
+  Example       : if ($featureA->equals($featureB)) { ... }
+  Description   : Compares two features using various criteria.  The
+                  test for eqality goes through the following list and
+                  terminates at the first true match:
+
+                  1. If the two features are the same object, they are
+                     equal.
+                  2. If they are of different types (e.g., transcript
+                     and gene), they are *not* equal.
+                  3. If they both have dbIDs: if these are the same,
+                     then they are equal, otherwise not.
+                  4. If they both have slices and analysis objects:
+                     if the analysis dbIDs are the same and the
+                     seq_region_id are the same, along with
+                     seq_region_start and seq_region_end, then they are
+                     equal, otherwise not.
+
+                  If none of the above is able to determine equality,
+                  undef is returned.
+
+    Return type : tri-Boolean (0, 1, undef = "unknown")
+
+    Exceptions  : Thrown if a non-feature is passed as the argument.
+
+=cut
+
+sub equals {
+  my ( $self, $feature ) = @_;
+
+  # If the features are the same object, they are equal.
+  if ( !defined($feature) ) { return 0 }
+  if ( $self eq $feature ) { return 1 }
+
+  assert_ref( $feature, 'Bio::EnsEMBL::Feature' );
+
+  # If the features have different types, they are *not* equal.
+  if ( ref($self) ne ref($feature) ) {
+    return 0;
+  }
+
+  # If the features has the same dbID, they are equal.
+  if ( defined( $self->dbID() ) && defined( $feature->dbID() ) ) {
+    if   ( $self->dbID() == $feature->dbID() ) { return 1 }
+    else                                       { return 0 }
+  }
+
+  # We now know that one of the features do not have a dbID.
+
+  # If the features have the same start, end, strand and seq_region_id,
+  # and analysis_id, they are equal.
+  if (
+     ( defined( $self->analysis() ) && defined( $feature->analysis() ) )
+     && ( defined( $self->slice() ) && defined( $feature->slice() ) ) )
+  {
+    if ( ( $self->start() == $feature->start() ) &&
+         ( $self->end() == $feature->end() ) &&
+         ( $self->strand() == $feature->strand() ) &&
+         ( $self->slice()->get_seq_region_id() ==
+           $feature->slice()->get_seq_region_id() ) &&
+         ( $self->analysis()->dbID() == $feature->analysis()->dbID() ) )
+    {
+      return 1;
+    }
+    else { return 0 }
+  }
+
+  # We now know that one of the features does not have either analysis
+  # or slice.
+
+  # We don't know if the features are equal.  This happens if they are
+  # not the same object but are of the same type, and one of them lacks
+  # dbID, and if there aren't slice and analysis objects attached to
+  # them both.
+  return undef;
+} ## end sub equals
+
+
+=head2 transform
+
+  Arg [1]    : string $coord_system
+               The coord system to transform this feature to.
+  Arg [2]    : string $version (optional)
+               The version of the coord system to transform this feature to.
+  Example    : $feature = $feature->transform('contig');
+               next if(!defined($feature));
+  Description: Returns a copy of this feature, but converted to a different
+               coordinate system. The converted feature will be placed on a
+               slice which spans an entire sequence region of the new
+               coordinate system. If the requested coordinate system is the
+               same coordinate system it is simply placed on a slice which
+               spans the entire seq_region (as opposed to the original slice
+               which may have only partially covered the seq_region).
+
+               If a feature spans a boundary in the new coordinate system,
+               undef is returned instead.
+
+               For example, transforming an exon in contig coordinates to one 
+               in chromosomal coodinates will place the exon on a slice of an 
+               entire chromosome.
+  Returntype : Bio::EnsEMBL::Feature (or undef)
+  Exceptions : thrown if an invalid coordinate system is provided
+               warning if Feature is not attached to a slice
+  Caller     : general, transfer()
+  Status     : Stable
+
+=cut
+
+sub transform {
+  my $self = shift;
+  my $cs_name = shift;
+  my $cs_version = shift;
+  my $to_slice = shift;
+
+  #
+  # For backwards compatibility check if the arguments are old style args
+  #
+  if(!$cs_name || ref($cs_name)) {
+    deprecate('Calling transform without a coord system name is deprecated.');
+    return $self->_deprecated_transform($cs_name);
+  }
+
+  my $slice = $self->{'slice'};
+
+  if(!$slice) {
+    warning("Feature cannot be transformed without attached slice.");
+    return undef;
+  }
+
+  if(!$slice->adaptor()) {
+    warning("Feature cannot be transformed without adaptor on" .
+            " attached slice.");
+    return undef;
+  }
+
+  #use db from slice since this feature may not yet be stored in a database
+  my $db = $slice->adaptor->db();
+  my $cs = $db->get_CoordSystemAdaptor->fetch_by_name($cs_name, $cs_version);
+  my $current_cs = $slice->coord_system();
+
+  if(!$current_cs) {
+    warning("Feature cannot be transformed without CoordSystem on " .
+            "attached slice.");
+    return undef;
+  }
+
+  if(!$cs) {
+    throw("Cannot transform to unknown coordinate system " .
+          "[$cs_name $cs_version]\n");
+  }
+
+  # if feature is already in the requested coordinate system, we can just
+  # return a copy
+  if( $cs->equals( $current_cs ) && $slice->start() == 1 &&
+      $slice->strand() == 1 ) {
+    my $new_feature;
+    %$new_feature = %$self;
+    bless $new_feature, ref $self;
+    return $new_feature;
+  }
+  my $projection;
+  if(defined($to_slice)){
+    $projection = $self->project_to_slice( $to_slice );  }
+  else{
+    $projection = $self->project( $cs_name, $cs_version );
+  }
+
+  if(@$projection == 0){
+    return undef;
+  }
+  if( @$projection != 1 and !defined($to_slice)) {
+#    warn "MORE than one projection and NO slice specified ";
+#    warn "from ".$self->slice->name." to $cs_name, $cs_version\n";
+    return undef;
+  }
+  my $index = 0;
+  if(defined($to_slice)){
+    my $found = 0;
+    my $i = 0;
+    foreach my $proj (@{$projection}) {
+      my $slice = $proj->[2];
+      if($to_slice->get_seq_region_id eq $slice->get_seq_region_id){
+	$found =1;
+	$index = $i;
+      }
+      $i++;
+    }
+    if(!$found){
+      if(@$projection != 1){
+	if(@$projection == 0){
+	  warn "number of mappings is ".@$projection."\n";
+	  warn "could not project feature ".ref($self)." from ".$self->slice->seq_region_name." to ".$to_slice->seq_region_name."\n";
+	  warn "In the region of ".$self->slice->start." <-> ".$self->slice->end."\n";
+	  warn "feat start=".($self->slice->start+$self->start)."\tend=".($self->slice->start+$self->end)."\n";
+	}
+	else{
+	  foreach my $proj (@{$projection}) {
+	    my $slice = $proj->[2];
+	    warn "available slice ".$slice->seq_regon_name."\n";
+	  }
+	  warn "MORE than one projection and none to slice specified (".$to_slice->seq_region_name.")\n";
+	}
+      }	
+      else {
+	foreach my $proj (@{$projection}) {
+	  warn "Mapping is to ".$proj->[2]->seq_region_name."\n";
+	}
+	warn "One projection but none to slice specified\n";
+      }
+      return undef;
+    }
+  }
+ 
+  my $p_slice = $projection->[$index]->[2];
+  my $slice_adaptor = $db->get_SliceAdaptor;
+  $slice = $slice_adaptor->fetch_by_region($p_slice->coord_system()->name(),
+					   $p_slice->seq_region_name(),
+					   undef, #start
+					   undef, #end
+					   1, #strand
+					   $p_slice->coord_system()->version);
+  
+  my $new_feature;
+  %$new_feature = %$self;
+  bless $new_feature, ref $self;
+  $new_feature->{'start'}  = $p_slice->start();
+  $new_feature->{'end'}    = $p_slice->end();
+    $new_feature->{'strand'} =
+      ($self->{'strand'} == 0) ? 0 : $p_slice->strand();
+  $new_feature->{'slice'}  = $slice;
+  return $new_feature;
+
+}
+
+
+
+=head2 transfer
+
+  Arg [1]    : Bio::EnsEMBL::Slice $slice
+               The slice to transfer this feature to
+  Example    : $feature = $feature->transfer($slice);
+               next if(!defined($feature));
+  Description: Returns a copy of this feature which has been shifted onto
+               another slice.
+
+               If the new slice is in a different coordinate system the
+               feature is transformed first and then placed on the slice.
+               If the feature would be split across a coordinate system
+               boundary or mapped to a gap undef is returned instead.
+
+               If the feature cannot be placed on the provided slice because
+               it maps to an entirely different location, undef is returned
+               instead.
+
+  Returntype : Bio::EnsEMBL::Feature (or undef)
+  Exceptions : throw on incorrect argument
+               throw if feature does not have attached slice
+  Caller     : general, transform()
+  Status     : Stable
+
+=cut
+
+
+sub transfer {
+  my $self = shift;
+  my $slice = shift;
+
+  if(!$slice || !ref($slice) || (!$slice->isa('Bio::EnsEMBL::Slice') && !$slice->isa('Bio::EnsEMBL::LRGSlice'))) {
+    throw('Slice argument is required');
+  }
+
+  #make a shallow copy of the feature to be transfered
+  my $feature;
+  %{$feature} = %{$self};
+  bless $feature, ref($self);
+  weaken $feature->{adaptor};
+
+  my $current_slice = $self->{'slice'};
+
+  if(!$current_slice) {
+    warning("Feature cannot be transfered without attached slice.");
+    return undef;
+  }
+
+  my $cur_cs = $current_slice->coord_system();
+  my $dest_cs = $slice->coord_system();
+
+  #if we are not in the same coord system a transformation step is needed first
+  if(!$dest_cs->equals($cur_cs)) {
+    $feature = $feature->transform($dest_cs->name, $dest_cs->version, $slice);
+    return undef if(!defined($feature));
+    $current_slice = $feature->{'slice'};
+  }
+
+  # feature went to entirely different seq_region
+  if($current_slice->seq_region_name() ne $slice->seq_region_name()) {
+    return undef;
+  }
+
+  #if the current feature positions are not relative to the start of the
+  #seq region, convert them so they are
+  my $cur_slice_start  = $current_slice->start();
+  my $cur_slice_strand = $current_slice->strand();
+  if($cur_slice_start != 1 || $cur_slice_strand != 1) {
+    my $fstart = $feature->{'start'};
+    my $fend   = $feature->{'end'};
+
+    if($cur_slice_strand == 1) {
+      $feature->{'start'} = $fstart + $cur_slice_start - 1;
+      $feature->{'end'}   = $fend   + $cur_slice_start - 1;
+    } else {
+      my $cur_slice_end = $current_slice->end();
+      $feature->{'start'}  = $cur_slice_end - $fend   + 1;
+      $feature->{'end'}    = $cur_slice_end - $fstart + 1;
+      $feature->{'strand'} *= -1;
+    }
+  }
+
+  my $fstart = $feature->{'start'};
+  my $fend   = $feature->{'end'};
+
+  #convert to destination slice coords
+  if($slice->strand == 1) {
+    $feature->{'start'} = $fstart - $slice->start() + 1;
+    $feature->{'end'}   = $fend   - $slice->start() + 1;
+  } else {
+    $feature->{'start'} = $slice->end() - $fend   + 1;
+    $feature->{'end'}   = $slice->end() - $fstart + 1;
+    $feature->{'strand'} *= -1;
+  }
+
+  $feature->{'slice'} = $slice;
+
+  return $feature;
+}
+
+=head2 project_to_slice
+
+  Arg [1]    : slice to project to
+
+
+  Example    :
+    my $clone_projection = $feature->project_to_slice($slice);
+
+    foreach my $seg (@$clone_projection) {
+      my $clone = $seg->to_Slice();
+      print "Features current coords ", $seg->from_start, '-',
+        $seg->from_end, " project onto clone coords " .
+        $clone->seq_region_name, ':', $clone->start, '-', $clone->end,
+        $clone->strand, "\n";
+    }
+  Description: Returns the results of 'projecting' this feature onto another
+               slice . This is useful to see where a feature
+               would lie in a coordinate system in which it
+               crosses a boundary.
+
+               This method returns a reference to a list of
+               Bio::EnsEMBL::ProjectionSegment objects.
+               ProjectionSegments are blessed arrays and can also be used as
+               triplets [from_start,from_end,to_Slice]. The from_start and
+               from_end are the coordinates relative to the feature start.
+               For example, if a feature is current 100-200bp on a slice
+               then the triplets returned might be:
+               [1,50,$slice1],
+               [51,101,$slice2]
+
+               The to_Slice is a slice spanning the region on the requested
+               coordinate system that this feature projected to.
+
+               If the feature projects entirely into a gap then a reference to
+               an empty list is returned.
+
+  Returntype : listref of Bio::EnsEMBL::ProjectionSegments
+               which can also be used as [$start,$end,$slice] triplets
+  Exceptions : slice does not have an adaptor
+  Caller     : general
+  Status     : At Risk
+
+=cut
+
+sub project_to_slice {
+  my $self = shift;
+  my $to_slice = shift;
+  my $slice = $self->{'slice'};
+
+  if(!$slice) {
+    warning("Feature cannot be projected without attached slice.");
+    return [];
+  }
+
+
+  #get an adaptor from the attached slice because this feature may not yet
+  #be stored and may not have its own adaptor
+  my $slice_adaptor = $slice->adaptor();
+
+  if(!$slice_adaptor) {
+    throw("Cannot project feature because associated slice does not have an " .
+          " adaptor");
+  }
+
+  my $strand = $self->strand() * $slice->strand();
+  #fetch by feature always gives back forward strand slice:
+  $slice = $slice_adaptor->fetch_by_Feature($self);
+  $slice = $slice->invert if($strand == -1);
+  return $slice->project_to_slice($to_slice);
+}
+
+
+=head2 project
+
+  Arg [1]    : string $name
+               The name of the coordinate system to project this feature onto
+  Arg [2]    : string $version (optional)
+               The version of the coordinate system (such as 'NCBI34') to
+               project this feature onto
+  Example    :
+    my $clone_projection = $feature->project('clone');
+
+    foreach my $seg (@$clone_projection) {
+      my $clone = $seg->to_Slice();
+      print "Features current coords ", $seg->from_start, '-',
+        $seg->from_end, " project onto clone coords " .
+        $clone->seq_region_name, ':', $clone->start, '-', $clone->end,
+        $clone->strand, "\n";
+    }
+  Description: Returns the results of 'projecting' this feature onto another
+               coordinate system.  This is useful to see where a feature
+               would lie in a coordinate system in which it
+               crosses a boundary.
+
+               This method returns a reference to a list of
+               Bio::EnsEMBL::ProjectionSegment objects.
+               ProjectionSegments are blessed arrays and can also be used as
+               triplets [from_start,from_end,to_Slice]. The from_start and
+               from_end are the coordinates relative to the feature start.
+               For example, if a feature is current 100-200bp on a slice
+               then the triplets returned might be:
+               [1,50,$slice1],
+               [51,101,$slice2]
+
+               The to_Slice is a slice spanning the region on the requested
+               coordinate system that this feature projected to.
+
+               If the feature projects entirely into a gap then a reference to
+               an empty list is returned.
+
+  Returntype : listref of Bio::EnsEMBL::ProjectionSegments
+               which can also be used as [$start,$end,$slice] triplets
+  Exceptions : slice does not have an adaptor
+  Caller     : general
+  Status     : Stable
+
+=cut
+
+sub project {
+  my $self = shift;
+  my $cs_name = shift;
+  my $cs_version = shift;
+
+  my $slice = $self->{'slice'};
+
+  if(!$slice) {
+    warning("Feature cannot be projected without attached slice.");
+    return [];
+  }
+
+
+  #get an adaptor from the attached slice because this feature may not yet
+  #be stored and may not have its own adaptor
+  my $slice_adaptor = $slice->adaptor();
+
+  if(!$slice_adaptor) {
+    throw("Cannot project feature because associated slice does not have an " .
+          " adaptor");
+  }
+
+  my $strand = $self->strand() * $slice->strand();
+  #fetch by feature always gives back forward strand slice:
+  $slice = $slice_adaptor->fetch_by_Feature($self);
+  $slice = $slice->invert if($strand == -1);
+  return $slice->project($cs_name, $cs_version);
+}
+
+
+
+=head2 seqname
+
+  Arg [1]    : (optional) $seqname
+  Example    : $seqname = $feat->seqname();
+  Description: Getter/Setter for the name of the sequence that this feature
+               is on. Normally you can get away with not setting this value
+               and it will default to the name of the slice on which this
+               feature is on.  It is useful to set this value on features which
+               do not ordinarily sit on features such as ProteinFeatures which
+               sit on peptides.
+  Returntype : string
+  Exceptions : none
+  Caller     : general
+  Status     : Stable
+
+=cut
+
+sub seqname {
+  my $self = shift;
+  
+  if(@_) {
+    $self->{'seqname'} = shift;
+  }
+
+  if(!$self->{'seqname'} && $self->slice()) {
+    return $self->slice->name();
+  }
+
+  return $self->{'seqname'};
+}
+
+
+
+
+=head2 display_id
+
+  Arg [1]    : none
+  Example    : print $f->display_id();
+  Description: This method returns a string that is considered to be
+               the 'display' identifier.  It is overridden by subclasses to
+               return an appropriate value for objects of that particular 
+               class.  If no appropriate display id is available an empty
+               string is returned instead.
+  Returntype : string
+  Exceptions : none
+  Caller     : web drawing code
+  Status     : Stable
+
+=cut
+
+sub display_id {
+  my $self = shift;
+  return '';
+}
+
+
+=head2 feature_Slice
+
+  Args       : none
+  Example    : $slice = $feature->feature_Slice()
+  Description: This is a convenience method to return a slice that covers the
+               Area of this feature. The feature start will be at 1 on it, and
+               it will have the length of this feature.
+  Returntype : Bio::EnsEMBL::Slice or undef if this feature has no attached
+               Slice.
+  Exceptions : warning if Feature does not have attached slice.
+  Caller     : web drawing code
+  Status     : Stable
+
+=cut
+
+sub feature_Slice {
+  my $self = shift;
+
+  my $slice = $self->slice();
+
+  if(!$slice) {
+    warning('Cannot obtain Feature_Slice for feature without attached slice');
+    return undef;
+  }
+
+  if($slice->isa("Bio::EnsEMBL::StrainSlice")){
+    return Bio::EnsEMBL::StrainSlice->new
+      (-seq_region_name   => $slice->seq_region_name,
+       -seq_region_length => $slice->seq_region_length,
+       -coord_system      => $slice->coord_system,
+       -start             => $self->seq_region_start(),
+       -end               => $self->seq_region_end(),
+       -strand            => $self->seq_region_strand(),
+       -adaptor           => $slice->adaptor(),
+       -strain_name       => $slice->strain_name());
+  }
+  else{
+    return Bio::EnsEMBL::Slice->new
+      (-seq_region_name   => $slice->seq_region_name,
+       -seq_region_length => $slice->seq_region_length,
+       -coord_system      => $slice->coord_system,
+       -start             => $self->seq_region_start(),
+       -end               => $self->seq_region_end(),
+       -strand            => $self->seq_region_strand(),
+       -adaptor           => $slice->adaptor());
+  }
+}
+
+
+=head2 seq_region_name
+
+  Arg [1]    : none
+  Example    : print $feature->seq_region_name();
+  Description: Gets the name of the seq_region which this feature is on.
+               Returns undef if this Feature is not on a slice.
+  Returntype : string or undef
+  Exceptions : none
+  Caller     : general
+  Status     : Stable
+
+=cut
+
+sub seq_region_name {
+  my $self = shift;
+  my $slice = $self->{'slice'};
+
+  return ($slice) ? $slice->seq_region_name() : undef;
+}
+
+
+=head2 seq_region_length
+
+  Arg [1]    : none
+  Example    : print $feature->seq_region_length();
+  Description: Returns the length of the seq_region which this feature is on 
+               Returns undef if this Feature is not on a slice.
+  Returntype : int (unsigned) or undef
+  Exceptions : none
+  Caller     : general
+  Status     : Stable
+
+=cut
+
+
+sub seq_region_length {
+  my $self = shift;
+  my $slice = $self->{'slice'};
+
+  return ($slice) ? $slice->seq_region_length() : undef;
+}
+
+
+=head2 seq_region_strand
+
+  Arg [1]    : none
+  Example    : print $feature->seq_region_strand();
+  Description: Returns the strand of the seq_region which this feature is on 
+               (i.e. feature_strand * slice_strand)
+               Returns undef if this Feature is not on a slice.
+  Returntype : 1,0,-1 or undef
+  Exceptions : none
+  Caller     : general
+  Status     : Stable
+
+=cut
+
+
+sub seq_region_strand {
+  my $self = shift;
+  my $slice = $self->{'slice'};
+
+  return ($slice) ? $slice->strand() * $self->{'strand'} : undef;
+}
+
+
+=head2 seq_region_start
+
+  Arg [1]    : none
+  Example    : print $feature->seq_region_start();
+  Description: Convenience method which returns the absolute start of this
+               feature on the seq_region, as opposed to the relative (slice) 
+               position.
+
+               Returns undef if this feature is not on a slice.
+  Returntype : int or undef
+  Exceptions : none
+  Caller     : general
+  Status     : Stable
+
+=cut
+
+sub seq_region_start {
+  my ($self) = @_;
+
+  my $slice = $self->slice();
+
+  if ( defined($slice) ) {
+    my $start;
+
+    if ( $slice->strand() == 1 ) {
+      if ( defined( $self->start() ) ) {
+	  if ($self->start < 0 && $slice->is_circular) {
+	      $start = $slice->seq_region_length + $self->start;
+	  } else {
+        $start = $slice->start() + $self->start() - 1;
+    }
+      }
+    } else {
+      if ( defined( $self->end() ) ) {
+        $start = $slice->end() - $self->end() + 1;
+      }
+    }
+
+    if (    defined($start)
+         && $slice->is_circular()
+         && $start > $slice->seq_region_length() )
+    {
+      $start -= $slice->seq_region_length();
+    }
+
+    return $start;
+  }
+
+  return undef;
+} ## end sub seq_region_start
+
+
+=head2 seq_region_end
+
+  Arg [1]    : none
+  Example    : print $feature->seq_region_end();
+  Description: Convenience method which returns the absolute end of this
+               feature on the seq_region, as opposed to the relative (slice)
+               position.
+
+               Returns undef if this feature is not on a slice.
+  Returntype : int or undef
+  Exceptions : none
+  Caller     : general
+  Status     : Stable
+
+=cut
+
+sub seq_region_end {
+  my ($self) = @_;
+
+  my $slice = $self->slice();
+
+  if ( defined($slice) ) {
+    my $end;
+
+    if ( $slice->strand() == 1 ) {
+      if ( defined( $self->end() ) ) {
+        $end = $slice->start() + $self->end() - 1;
+      }
+    } else {
+      if ( defined( $self->start() ) ) {
+        $end = $slice->end() - $self->start() + 1;
+      }
+    }
+
+    if (    defined($end)
+         && $slice->is_circular()
+         && $end > $slice->seq_region_length() )
+    {
+      $end -= $slice->seq_region_length();
+    }
+
+    return $end;
+  }
+
+  return undef;
+} ## end sub seq_region_end
+
+
+=head2 coord_system_name
+
+  Arg [1]    : none
+  Example    : print $feature->coord_system_name()
+  Description: Gets the name of the coord_system which this feature is on.
+               Returns undef if this Feature is not on a slice.
+  Returntype : string or undef
+  Exceptions : none
+  Caller     : general
+  Status     : Stable
+
+=cut
+
+sub coord_system_name {
+  my $self = shift;
+  my $slice = $self->{'slice'};
+  return ($slice) ? $slice->coord_system_name() : undef;
+}
+
+
+=head2 seq
+
+  Args       : none
+  Example    : my $dna_sequence = $simple_feature->seq();
+  Description: Returns the dna sequence from the attached slice and 
+               attached database that overlaps with this feature.
+               Returns undef if there is no slice or no database.
+               Returns undef if this feature is unstranded (i.e. strand=0).
+  Returntype : undef or string
+  Exceptions : warning if this feature is not stranded
+  Caller     : general
+  Status     : Stable
+
+=cut
+
+
+sub seq {
+  my $self = shift;
+
+  if( ! defined $self->{'slice'} ) {
+    return undef;
+  }
+
+  if(!$self->strand()) {
+    warning("Cannot retrieve sequence for unstranded feature.");
+    return undef;
+  }
+
+  return $self->{'slice'}->subseq($self->start(), $self->end(),
+                                  $self->strand());
+
+}
+
+
+
+
+=head2 get_all_alt_locations
+
+  Arg [1]    : none
+  Example    : @features = @{$feature->get_all_alt_locations()};
+               foreach $f (@features) {
+                 print $f->slice->seq_region_name,' ',$f->start, $f->end,"\n";
+               }
+
+  Description: Retrieves shallow copies of this feature in its alternate
+               locations.  A feature can be considered to have multiple
+               locations when it sits on a alternative structural haplotype
+               or when it is on a pseudo autosomal region.  Most features will
+               just return a reference to an empty list though.
+               The features returned by this method will be on a slice which
+               covers the entire alternate region.
+
+               Currently this method does not take into account alternate
+               locations on the alternate locations (e.g. a reference
+               sequence may have multiple alternate haplotypes.  Asking
+               for alternate locations of a feature on one of the alternate
+               haplotypes will give you back the reference location, but not
+               locations on the other alternate haplotypes).
+
+  Returntype : listref of features of the same type of this feature.
+  Exceptions : none
+  Caller     : general
+  Status     : Stable
+
+=cut
+
+sub get_all_alt_locations {
+  my $self = shift;
+  my $return_all = shift || 0;
+
+  my $slice = $self->{'slice'} or return [];
+  my $sa = $slice->adaptor() or return [];
+
+  # get slice of entire region
+  $slice = $sa->fetch_by_seq_region_id($slice->get_seq_region_id);
+
+  my $axfa = $sa->db->get_AssemblyExceptionFeatureAdaptor();
+  my $axfs = $axfa->fetch_all_by_Slice($slice);
+
+  my (@haps, @alt);
+
+  foreach my $axf (@$axfs) {
+    if(uc($axf->type()) eq 'HAP') {
+      push @haps, $axf;
+    } elsif(uc($axf->type()) =~ 'PAR') {
+      push @alt, $axf;
+    } elsif( $axf->type() eq "PATCH_FIX"){
+      push @haps, $axf;
+    } elsif( $axf->type() eq "PATCH_FIX REF"){
+      push @haps, $axf  if $return_all > 0 ;
+    } elsif( $axf->type() eq "HAP REF" ) {
+      push @haps, $axf if $return_all > 0 ;
+      # do nothing when you are on REF
+    } elsif( $axf->type() eq "PATCH_NOVEL"){
+      push @haps, $axf;
+    }elsif( $axf->type() eq "PATCH_NOVEL REF"){
+      push @haps, $axf  if $return_all > 0 ;
+    } else {
+      warning("Unknown exception feature type ". $axf->type()."- ignoring.");
+    }
+  }
+
+  # regions surrounding hap are those of interest, not hap itself
+  # convert hap alt. exc. features to regions around haps instead
+  foreach my $h (@haps) {
+    my $haslice = $h->alternate_slice();
+    my $hacs    = $haslice->coord_system();
+
+    if($h->start() > 1 && $haslice->start() > 1) {
+      my $aslice = $sa->fetch_by_region($hacs->name(),
+                                        $haslice->seq_region_name(),
+                                        1,
+                                        $haslice->start()-1,
+                                        $haslice->strand(),
+                                        $hacs->version());
+
+      push @alt, Bio::EnsEMBL::AssemblyExceptionFeature->new
+        (-start  => 1,
+         -end    => $h->start()-1,
+         -alternate_slice => $aslice);
+    }
+
+    if($h->end() < $slice->seq_region_length() &&
+       $haslice->end < $haslice->seq_region_length()) {
+      my $aslice = $sa->fetch_by_region($hacs->name(),
+                                        $haslice->seq_region_name(),
+                                        $haslice->end()+1,
+                                        $haslice->seq_region_length(),
+                                        $haslice->strand(),
+                                        $hacs->version());
+
+      push @alt, Bio::EnsEMBL::AssemblyExceptionFeature->new
+        (-start  => $h->end() + 1,
+         -end    => $slice->seq_region_length(),
+         -alternate_slice => $aslice);
+    }
+  }
+
+
+  # check if exception regions contain our feature
+
+  my @features;
+
+  foreach my $axf (@alt) {
+    # ignore other region if feature is not entirely on it
+    next if($self->seq_region_start() < $axf->start() ||
+            $self->seq_region_end()   > $axf->end());
+
+    # quick shallow copy of the feature
+    my $f;
+    %$f = %$self;
+    bless $f, ref($self);
+
+    my $aslice = $axf->alternate_slice();
+
+    # position feature on entire slice of other region
+    $f->{'start'}  = $f->seq_region_start() - $axf->start() + $aslice->start();
+    $f->{'end'}    = $f->seq_region_end()   - $axf->start() + $aslice->start();
+    $f->{'strand'} *= $aslice->strand();
+
+    $f->{'slice'} = $sa->fetch_by_seq_region_id($aslice->get_seq_region_id());
+
+    push @features, $f;
+  }
+
+  return \@features;
+}
+
+
+=head2 overlaps
+
+  Arg [1]    : Bio::EnsEMBL::Feature $f
+               The other feature you want to check overlap with this feature
+               for.
+  Description: This method does a range comparison of this features start and
+               end and compares it with another features start and end. It will
+               return true if these ranges overlap and the features are on the
+               same seq_region.
+  Returntype : TRUE if features overlap, FALSE if they don't
+  Exceptions : warning if features are on different seq_regions
+  Caller     : general
+  Status     : Stable
+
+=cut
+
+sub overlaps {
+  my $self = shift;
+  my $f = shift;
+
+  my $sr1_name = $self->seq_region_name;
+  my $sr2_name = $f->seq_region_name;
+
+  if ($sr1_name and $sr2_name and ($sr1_name ne $sr2_name)) {
+    warning("Bio::EnsEMBL::Feature->overlaps(): features are on different seq regions.");
+    return undef;
+  }
+  
+  return ($self->seq_region_end >= $f->seq_region_start and $self->seq_region_start <= $f->seq_region_end);
+}
+
+
+=head2 get_overlapping_Genes
+
+  Description: Get all the genes that overlap this feature.
+  Returntype : list ref of Bio::EnsEMBL::Gene
+  Caller     : general
+  Status     : UnStable
+
+=cut
+
+sub get_overlapping_Genes{
+  my $self = shift;
+
+  my $slice = $self->feature_Slice;
+  return $slice->get_all_Genes();
+}
+
+# query for absolute nearest.
+# select x.display_label, g.gene_id, g.seq_region_start, ABS(cast((32921638 - g.seq_region_end) as signed))  as 'dist' from gene g, xref x where g.display_xref_id = x.xref_id and seq_region_id = 27513 order by ABS(cast((32921638 - g.seq_region_end) as signed)) limit 10;
+
+=head2 get_nearest_Gene
+
+  Description: Get all the nearest  gene to the feature
+  Returntype : Bio::EnsEMBL::Gene
+  Caller     : general
+  Status     : UnStable
+
+=cut
+
+sub get_nearest_Gene {
+  my $self = shift;
+  my $stranded = shift;
+  my $stream = shift;
+
+  my $ga = Bio::EnsEMBL::Registry->get_adaptor($self->adaptor->db->species,"core","Gene");
+
+  return $ga->fetch_nearest_Gene_by_Feature($self, $stranded, $stream);
+
+}
+
+=head2 summary_as_hash
+
+  Example       : $feature_summary = $feature->summary_as_hash();
+  Description   : Retrieves a textual summary of this Feature.
+	              Should be overidden by subclasses for specific tweaking
+  Returns       : hashref of arrays of descriptive strings
+  Status        : Intended for internal use
+=cut
+
+sub summary_as_hash {
+  my $self = shift;
+  my %summary;
+  $summary{'ID'} = $self->display_id;
+  $summary{'start'} = $self->seq_region_start;
+  $summary{'end'} = $self->seq_region_end;
+  $summary{'strand'} = $self->strand;
+  $summary{'seq_region_name'} = $self->seq_region_name;
+  return \%summary;
+}
+
+=head2 species
+
+  Example     : $feature->species();
+  Description : Shortcut to the feature's DBAdaptor and returns its species name 
+  Returntype  : String the species name
+  Exceptions  : Thrown if there is no attached adaptor
+  Caller      : Webcode
+
+=cut
+
+sub species {
+  my ($self) = @_;
+  throw "Can only call this method if you have attached an adaptor" if ! $self->adaptor();
+  return $self->adaptor()->db()->species();
+}
+
+
+##############################################
+# Methods included for backwards compatibility
+##############################################
+
+
+=head2 contig
+
+ This method is deprecated and included for backwards compatibility only.
+ Use slice() instead
+=cut
+sub contig {
+  deprecate('Use slice() instead');
+  slice(@_);
+}
+
+
+
+=head2 sub_SeqFeature
+
+ This method is deprecated and only for genebuild backwards compatibility.
+ Avoid using it if possible
+=cut
+sub sub_SeqFeature{
+  my ($self) = @_;
+  return @{$self->{'_gsf_sub_array'}} if($self->{'_gsf_sub_array'});
+}
+
+=head2 add_sub_SeqFeature
+
+ This method is deprecated and only for genebuild backwards compatibility.
+ Avoid using it if possible
+=cut
+sub add_sub_SeqFeature{
+  my ($self,$feat,$expand) = @_;
+  my ($p, $f, $l) = caller;
+  if( $expand eq 'EXPAND' ) {
+    # if this doesn't have start/end set - forget it!
+    if( ! $self->start && ! $self->end ) {
+      
+      $self->start($feat->start());
+      $self->end($feat->end());
+      $self->strand($feat->strand);
+    } else {
+      if( $feat->start < $self->start ) {
+        $self->start($feat->start);
+      }
+
+      if( $feat->end > $self->end ) {
+        $self->end($feat->end);
+      }
+    }
+   } else {
+     if($self->start > $feat->start || $self->end < $feat->end) {
+       throw("$feat is not contained within parent feature, " .
+             "and expansion is not valid");
+     }
+   }
+
+   push(@{$self->{'_gsf_sub_array'}},$feat);
+}
+
+=head2 flush_sub_SeqFeature
+
+ This method is deprecated and only for genebuild backwards compatibility.
+ Avoid using it isf possible
+=cut
+sub flush_sub_SeqFeature {
+  my ($self) = @_;
+  $self->{'_gsf_sub_array'} = [];
+}
+
+
+sub _deprecated_transform {
+  my $self = shift;
+  my $arg = shift;
+
+  if(!$arg) {
+    warning("Calling transform() with no arguments is deprecated.\n".
+          "A coordinate system name argument should be used instead.\n".
+          "You probably wanted transform('seqlevel') or transform('contig').");
+    return $self->transform('seqlevel');
+  }
+
+  if(ref($arg) eq 'Bio::EnsEMBL::Slice') {
+    if($arg->{'empty'}) {
+      warning("Calling transform with an empty slice is deprecated.\n" .
+                "A coordinate system name argument should be used instead.\n".
+                "You probably wanted transform('chromosome') or " .
+                "transform('toplevel')");
+      return $self->transform('toplevel');
+    }
+    warning("Calling transform with a slice is deprecated.\n" .
+              "Use the transfer method instead");
+    return $self->transfer($arg);
+  }
+
+  warning("Calling transform with a [".ref($arg)."] arg is no longer " .
+          "(or never was) supported.  Doing nothing instead.");
+
+  return $self;
+}
+
+
+=head2 id
+
+This method is deprecated and only included for backwards compatibility.
+Use display_id, hseqname, dbID or stable_id instead
+
+=cut
+
+sub id {
+  my $self = shift;
+  deprecate("id method is not used - use display_id instead");
+  return $self->{'stable_id'} if($self->{'stable_id'});
+  return $self->{'hseqname'} if($self->{'hseqname'});
+  return $self->{'seqname'}  if($self->{'seqname'});
+  return $self->{'dbID'};
+}
+
+1;