diff variant_effect_predictor/Bio/EnsEMBL/DBSQL/BaseFeatureAdaptor.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/EnsEMBL/DBSQL/BaseFeatureAdaptor.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,1338 @@
+=head1 LICENSE
+
+  Copyright (c) 1999-2012 The European Bioinformatics Institute and
+  Genome Research Limited.  All rights reserved.
+
+  This software is distributed under a modified Apache license.
+  For license details, please see
+
+    http://www.ensembl.org/info/about/code_licence.html
+
+=head1 CONTACT
+
+  Please email comments or questions to the public Ensembl
+  developers list at <dev@ensembl.org>.
+
+  Questions may also be sent to the Ensembl help desk at
+  <helpdesk@ensembl.org>.
+
+=cut
+
+=head1 NAME
+
+Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor - An Abstract Base class for all
+FeatureAdaptors
+
+=head1 SYNOPSIS
+
+Abstract class - should not be instantiated.  Implementation of
+abstract methods must be performed by subclasses.
+
+=head1 DESCRIPTION
+
+This is a base adaptor for feature adaptors. This base class is simply a way
+of eliminating code duplication through the implementation of methods
+common to all feature adaptors.
+
+=head1 METHODS
+
+=cut
+
+package Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor;
+use vars qw(@ISA @EXPORT);
+use strict;
+
+use Bio::EnsEMBL::DBSQL::BaseAdaptor;
+use Bio::EnsEMBL::Utils::Cache;
+use Bio::EnsEMBL::Utils::Exception qw(warning throw deprecate stack_trace_dump);
+use Bio::EnsEMBL::Utils::Argument qw(rearrange);
+use Bio::EnsEMBL::Utils::Iterator;
+
+@ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor);
+
+@EXPORT = (@{$DBI::EXPORT_TAGS{'sql_types'}});
+
+our $SLICE_FEATURE_CACHE_SIZE    = 4;
+our $MAX_SPLIT_QUERY_SEQ_REGIONS = 3;
+our $SILENCE_CACHE_WARNINGS = 0;
+
+=head2 new
+
+  Arg [1]    : list of args @args
+               Superclass constructor arguments
+  Example    : none
+  Description: Constructor which warns if caching has been switched off
+  Returntype : Bio::EnsEMBL::BaseFeatureAdaptor
+  Exceptions : none
+  Caller     : implementing subclass constructors
+  Status     : Stable
+
+=cut
+
+sub new {
+  my ($class, @args) = @_;
+  my $self = $class->SUPER::new(@args);
+  if ( defined $self->db->no_cache() && $self->db->no_cache() && ! $SILENCE_CACHE_WARNINGS) {
+    warning(  "You are using the API without caching most recent features. "
+            . "Performance might be affected." );
+  }
+  return $self;
+}
+
+=head2 start_equals_end
+
+  Arg [1]    : (optional) boolean $newval
+  Example    : $bfa->start_equals_end(1);
+  Description: Getter/Setter for the start_equals_end flag.  If set
+               to true sub _slice_fetch will use a simplified sql to retrieve 1bp slices.
+  Returntype : boolean
+  Exceptions : none
+  Caller     : EnsemblGenomes variation DB build
+  Status     : Stable
+  
+=cut
+
+sub start_equals_end {
+  my ( $self, $value ) = @_;
+
+  if ( defined($value) ) {
+    $self->{'start_equals_end'} = $value;
+  }
+  return $self->{'start_equals_end'};
+}
+
+
+=head2 clear_cache
+
+  Args      : None
+  Example   : my $sa =
+                $registry->get_adaptor( 'Mus musculus', 'Core',
+                                        'Slice' );
+              my $ga =
+                $registry->get_adaptor( 'Mus musculus', 'Core',
+                                        'Gene' );
+
+              my $slice =
+                $sa->fetch_by_region( 'Chromosome', '1', 1e8,
+                                      1.05e8 );
+
+              my $genes = $ga->fetch_all_by_Slice($slice);
+
+              $ga->clear_cache();
+
+  Description   : Empties the feature cache associated with this
+                  feature adaptor.
+  Return type   : None
+  Exceptions    : None
+  Caller        : General
+  Status        : At risk (under development)
+
+=cut
+
+sub clear_cache {
+  my ($self) = @_;
+  %{$self->{_slice_feature_cache}} = ();
+  return;
+}
+
+=head2 _slice_feature_cache
+ 
+  Description	: Returns the feature cache if we are allowed to cache and
+                will build it if we need to. We will never return a reference
+                to the hash to avoid unintentional auto-vivfying caching
+  Returntype 	: Bio::EnsEMBL::Utils::Cache
+  Exceptions 	: None
+  Caller     	: Internal
+
+=cut
+
+sub _slice_feature_cache {
+  my ($self) = @_;
+  return if $self->db()->no_cache();
+  if(! exists $self->{_slice_feature_cache}) {
+    tie my %cache, 'Bio::EnsEMBL::Utils::Cache', $SLICE_FEATURE_CACHE_SIZE;
+    $self->{_slice_feature_cache} = \%cache;
+  }
+  return $self->{_slice_feature_cache};
+}
+
+=head2 fetch_all_by_Slice
+
+  Arg [1]    : Bio::EnsEMBL::Slice $slice
+               the slice from which to obtain features
+  Arg [2]    : (optional) string $logic_name
+               the logic name of the type of features to obtain
+  Example    : $fts = $a->fetch_all_by_Slice($slice, 'Swall');
+  Description: Returns a listref of features created from the database 
+               which are on the Slice defined by $slice. If $logic_name is 
+               defined only features with an analysis of type $logic_name 
+               will be returned. 
+               NOTE: only features that are entirely on the slice's seq_region
+               will be returned (i.e. if they hang off the start/end of a
+               seq_region they will be discarded). Features can extend over the
+               slice boundaries though (in cases where you have a slice that
+               doesn't span the whole seq_region).
+  Returntype : listref of Bio::EnsEMBL::SeqFeatures in Slice coordinates
+  Exceptions : none
+  Caller     : Bio::EnsEMBL::Slice
+  Status     : Stable
+
+=cut
+
+sub fetch_all_by_Slice {
+  my ($self, $slice, $logic_name) = @_;
+  #fetch by constraint with empty constraint
+  return $self->fetch_all_by_Slice_constraint($slice, '', $logic_name);
+}
+
+
+
+=head2 fetch_Iterator_by_Slice_method
+
+  Arg [1]    : CODE ref of Slice fetch method
+  Arg [2]    : ARRAY ref of parameters for Slice fetch method
+  Arg [3]    : Optional int: Slice index in parameters array
+  Arg [4]    : Optional int: Slice chunk size. Default=500000
+  Example    : my $slice_iter = $feature_adaptor->fetch_Iterator_by_Slice_method
+                               	      ($feature_adaptor->can('fetch_all_by_Slice_Arrays'),
+	                                   \@fetch_method_params,
+	                                   0,#Slice idx
+	                                  );
+
+               while(my $feature = $slice_iter->next && defined $feature){
+                 #Do something here
+               }
+
+  Description: Creates an Iterator which chunks the query Slice to facilitate
+               large Slice queries which would have previously run out of memory
+  Returntype : Bio::EnsEMBL::Utils::Iterator
+  Exceptions : Throws if mandatory params not valid
+  Caller     : general
+  Status     : at risk
+
+=cut
+
+#Does not support Collections. See Funcgen ResultFeatureAdaptor::fetch_collection_Iterator_by_Slice_method
+
+sub fetch_Iterator_by_Slice_method{
+  my ($self, $slice_method_ref, $params_ref, $slice_idx, $chunk_size) = @_;
+
+  if(! ( defined $slice_method_ref &&
+		 ref($slice_method_ref) eq 'CODE')
+	){
+	throw('Must pass a valid Slice fetch method CODE ref');
+  }
+
+  if (! ($params_ref && 
+		 ref($params_ref) eq 'ARRAY')) {
+	#Don't need to check size here so long as we have valid Slice
+	throw('You must pass a method params ARRAYREF');
+  }
+  
+  $slice_idx    = 0 if(! defined $slice_idx);
+  my $slice     = $params_ref->[$slice_idx];
+  $chunk_size ||= 1000000;
+		
+  my @feat_cache;
+  my $finished     = 0;
+  my $start        = 1;	#local coord for sub slice
+  my $end          = $slice->length;
+  my $num_overlaps = 0;
+  
+  my $coderef = 
+	sub {
+	  
+	  while (scalar(@feat_cache) == 0 &&
+			 ! $finished) {
+		
+		my $new_end = ($start + $chunk_size - 1);
+		
+		if ($new_end >= $end) {
+		  # this is our last chunk
+		  $new_end = $end;
+		  $finished = 1;  
+		}
+		
+		#Chunk by sub slicing
+		my $sub_slice             = $slice->sub_Slice($start, $new_end);
+		$params_ref->[$slice_idx] = $sub_slice;
+		@feat_cache = @{ $slice_method_ref->($self, @$params_ref)};
+		
+		#Remove & count overlapping features
+		splice(@feat_cache, 0, $num_overlaps) if($num_overlaps);
+		my $i;
+		
+		if (scalar(@feat_cache) > 0) {
+		  
+		  my $feat_end  = $feat_cache[$#feat_cache]->seq_region_end;
+		  my $slice_end = $sub_slice->end;
+		  $num_overlaps = 0;
+		  
+		  for ($i = $#feat_cache; $i >=0; $i--) {
+			
+			if ($feat_end > $slice_end) {
+			  $feat_end  = $feat_cache[$i]->end;
+			  $num_overlaps ++;
+			} else {
+			  last;
+			}
+			
+		  }
+		}
+		
+		# update the start coordinate
+		$start = $new_end + 1;
+	  }
+	  
+	  #this maybe returning from an undef cache
+	  #Need to sub this out even more?
+	  return shift @feat_cache;
+	};
+
+  return Bio::EnsEMBL::Utils::Iterator->new($coderef);
+}
+
+
+=head2 fetch_Iterator_by_Slice
+
+  Arg [1]    : Bio::EnsEMBL::Slice
+  Arg [2]    : Optional string: logic name of analysis
+  Arg [3]    : Optional int: Chunk size to iterate over. Default is 500000
+  Example    : my $slice_iter = $feature_adaptor->fetch_Iterator_by_Slice($slice);
+
+               while(my $feature = $slice_iter->next && defined $feature){
+                 #Do something here
+               }
+
+  Description: Creates an Iterator which chunks the query Slice to facilitate
+               large Slice queries which would have previously run out of memory
+  Returntype : Bio::EnsEMBL::Utils::Iterator
+  Exceptions : None
+  Caller     : general
+  Status     : at risk
+
+=cut
+
+sub fetch_Iterator_by_Slice{
+  my ($self, $slice, $logic_name, $chunk_size) = @_;
+
+  my $method_ref = $self->can('fetch_all_by_Slice');
+
+  return $self->fetch_Iterator_by_Slice_method($method_ref, [$slice, $logic_name], 0, $chunk_size);
+}
+
+
+=head2 fetch_all_by_Slice_and_score
+
+  Arg [1]    : Bio::EnsEMBL::Slice $slice
+               the slice from which to obtain features
+  Arg [2]    : (optional) float $score
+               lower bound of the the score of the features retrieved
+  Arg [3]    : (optional) string $logic_name
+               the logic name of the type of features to obtain
+  Example    : $fts = $a->fetch_all_by_Slice_and_score($slice,90,'Swall');
+  Description: Returns a list of features created from the database which are 
+               are on the Slice defined by $slice and which have a score 
+               greater than $score. If $logic_name is defined, 
+               only features with an analysis of type $logic_name will be 
+               returned. 
+  Returntype : listref of Bio::EnsEMBL::SeqFeatures in Slice coordinates
+  Exceptions : none
+  Caller     : Bio::EnsEMBL::Slice
+  Status     : Stable
+
+=cut
+
+sub fetch_all_by_Slice_and_score {
+  my ( $self, $slice, $score, $logic_name ) = @_;
+
+  my $constraint;
+  if ( defined($score) ) {
+    # Get the synonym of the primary_table
+    my @tabs = $self->_tables();
+    my $syn  = $tabs[0]->[1];
+
+    $constraint = sprintf( "%s.score > %s",
+                $syn,
+                $self->dbc()->db_handle()->quote( $score, SQL_FLOAT ) );
+  }
+
+  return
+    $self->fetch_all_by_Slice_constraint( $slice, $constraint,
+                                          $logic_name );
+}
+
+
+=head2 fetch_all_by_Slice_constraint
+
+  Arg [1]    : Bio::EnsEMBL::Slice $slice
+               the slice from which to obtain features
+  Arg [2]    : (optional) string $constraint
+               An SQL query constraint (i.e. part of the WHERE clause)
+  Arg [3]    : (optional) string $logic_name
+               the logic name of the type of features to obtain
+  Example    : $fs = $a->fetch_all_by_Slice_constraint($slc, 'perc_ident > 5');
+  Description: Returns a listref of features created from the database which 
+               are on the Slice defined by $slice and fulfill the SQL 
+               constraint defined by $constraint. If logic name is defined, 
+               only features with an analysis of type $logic_name will be 
+               returned. 
+  Returntype : listref of Bio::EnsEMBL::SeqFeatures in Slice coordinates
+  Exceptions : thrown if $slice is not defined
+  Caller     : Bio::EnsEMBL::Slice
+  Status     : Stable
+
+=cut
+
+sub fetch_all_by_Slice_constraint {
+  my ( $self, $slice, $constraint, $logic_name ) = @_;
+
+
+  my @result = ();
+
+  if ( !ref($slice)
+       || !(    $slice->isa('Bio::EnsEMBL::Slice')
+             or $slice->isa('Bio::EnsEMBL::LRGSlice') ) )
+  {
+    throw("Bio::EnsEMBL::Slice argument expected.");
+  }
+
+  $constraint ||= '';
+  $constraint =
+    $self->_logic_name_to_constraint( $constraint, $logic_name );
+
+  # If the logic name was invalid, undef was returned
+  if ( !defined($constraint) ) { return [] }
+
+  my $key;
+  my $cache;
+
+  # Will only use feature_cache if hasn't been no_cache attribute set
+  if (
+    !( defined( $self->db()->no_cache() ) && $self->db()->no_cache() ) )
+  {
+
+    #strain test and add to constraint if so to stop caching.
+    if ( $slice->isa('Bio::EnsEMBL::StrainSlice') ) {
+      my $string =
+        $self->dbc()->db_handle()->quote( $slice->strain_name() );
+
+      if ( $constraint ne "" ) {
+        $constraint .= " AND $string = $string ";
+      } else {
+        $constraint .= " $string = $string ";
+      }
+    }
+
+    # Check the cache and return the cached results if we have already
+    # done this query.  The cache key is the made up from the slice
+    # name, the constraint, and the bound parameters (if there are any).
+    $key = uc( join( ':', $slice->name(), $constraint ) );
+
+    my $bind_params = $self->bind_param_generic_fetch();
+
+    if ( defined($bind_params) ) {
+      $key .= ':'
+        . join( ':', map { $_->[0] . '/' . $_->[1] } @{$bind_params} );
+    }
+
+    $cache = $self->_slice_feature_cache();
+    if ( exists( $cache->{$key} ) ) {
+      # Clear the bound parameters and return the cached data.
+      $self->{'_bind_param_generic_fetch'} = ();
+      return $cache->{$key};
+    }
+  } ## end if ( !( defined( $self...)))
+
+  my $sa = $slice->adaptor();
+
+  # Hap/PAR support: retrieve normalized 'non-symlinked' slices.
+  my @proj = @{ $sa->fetch_normalized_slice_projection($slice) };
+
+
+ 
+  if ( !@proj ) {
+    throw( 'Could not retrieve normalized Slices. '
+         . 'Database contains incorrect assembly_exception information.'
+    );
+  }
+
+  # Want to get features on the FULL original slice as well as any
+  # symlinked slices.
+
+  # Filter out partial slices from projection that are on same
+  # seq_region as original slice.
+
+  my $sr_id = $slice->get_seq_region_id();
+
+  @proj = grep { $_->to_Slice->get_seq_region_id() != $sr_id } @proj;
+
+  my $segment = bless( [ 1, $slice->length(), $slice ],
+                       'Bio::EnsEMBL::ProjectionSegment' );
+  push( @proj, $segment );
+
+  # construct list of Hap/PAR boundaries for entire seq region
+  my @bounds;
+
+  my $ent_slice = $sa->fetch_by_seq_region_id($sr_id);
+  if ( $slice->strand() == -1 ) {
+    $ent_slice = $ent_slice->invert();
+  }
+
+  my @ent_proj =
+    @{ $sa->fetch_normalized_slice_projection($ent_slice) };
+  shift(@ent_proj);    # skip first
+
+  @bounds = map { $_->from_start() - $slice->start() + 1 } @ent_proj;
+
+
+  # fetch features for the primary slice AND all symlinked slices
+  foreach my $seg (@proj) {
+
+
+    my $offset    = $seg->from_start();
+    my $seg_slice = $seg->to_Slice();
+    my $features =
+      $self->_slice_fetch( $seg_slice, $constraint );
+
+    # If this was a symlinked slice offset the feature coordinates as
+    # needed.
+    if ( $seg_slice->name() ne $slice->name() ) {
+
+    FEATURE:
+      foreach my $f ( @{$features} ) {
+        if ( $offset != 1 ) {
+          $f->{'start'} += $offset - 1;
+          $f->{'end'}   += $offset - 1;
+        }
+
+        # discard boundary crossing features from symlinked regions
+        foreach my $bound (@bounds) {
+          if ( $f->{'start'} < $bound && $f->{'end'} >= $bound ) {
+            next FEATURE;
+          }
+        }
+
+        $f->{'slice'} = $slice;
+        push( @result, $f );
+      }
+    } else {
+      push( @result, @{$features} );
+    }
+  } ## end foreach my $seg (@proj)
+
+  # Will only use feature_cache when set attribute no_cache in DBAdaptor
+  if ( defined($key) ) {
+    $cache->{$key} = \@result;
+  }
+
+  return \@result;
+} ## end sub fetch_all_by_Slice_constraint
+
+
+=head2 fetch_all_by_logic_name
+
+  Arg [3]    : string $logic_name
+               the logic name of the type of features to obtain
+  Example    : $fs = $a->fetch_all_by_logic_name('foobar');
+  Description: Returns a listref of features created from the database.
+               only features with an analysis of type $logic_name will
+               be returned.  If the logic name is invalid (not in the
+               analysis table), a reference to an empty list will be
+               returned.
+  Returntype : listref of Bio::EnsEMBL::SeqFeatures
+  Exceptions : thrown if no $logic_name
+  Caller     : General
+  Status     : Stable
+
+=cut
+
+sub fetch_all_by_logic_name {
+  my ( $self, $logic_name ) = @_;
+
+  if ( !defined($logic_name) ) {
+    throw("Need a logic_name");
+  }
+
+  my $constraint = $self->_logic_name_to_constraint( '', $logic_name );
+
+  if ( !defined($constraint) ) {
+    warning("Invalid logic name: $logic_name");
+    return [];
+  }
+
+  return $self->generic_fetch($constraint);
+}
+
+# Method that creates an object.  Called by the _objs_from_sth() method
+# in the sub-classes (the various feature adaptors).  Overridden by the
+# feature collection classes.
+
+sub _create_feature {
+  my ( $self, $feature_type, $args ) = @_;
+  return $feature_type->new( %{$args} );
+}
+
+# This is the same as the above, but calls the new_fast() constructor of
+# the feature type.
+
+sub _create_feature_fast {
+  my ( $self, $feature_type, $args ) = @_;
+  return $feature_type->new_fast($args);
+}
+
+#
+# helper function used by fetch_all_by_Slice_constraint method
+#
+sub _slice_fetch {
+  my ( $self, $slice, $orig_constraint ) = @_;
+
+  my $slice_start         = $slice->start();
+  my $slice_end           = $slice->end();
+  my $slice_strand        = $slice->strand();
+  my $slice_cs            = $slice->coord_system();
+  my $slice_seq_region    = $slice->seq_region_name();
+  my $slice_seq_region_id = $slice->get_seq_region_id();
+
+  #get the synonym and name of the primary_table
+  my @tabs = $self->_tables;
+  my ( $tab_name, $tab_syn ) = @{ $tabs[0] };
+
+  #find out what coordinate systems the features are in
+  my $mcc      = $self->db->get_MetaCoordContainer();
+  my @feat_css = ();
+
+  my $mca        = $self->db->get_MetaContainer();
+  my $value_list = $mca->list_value_by_key( $tab_name . "build.level" );
+  if ( @$value_list and $slice->is_toplevel() ) {
+    push @feat_css, $slice_cs;
+  } else {
+    @feat_css =
+      @{ $mcc->fetch_all_CoordSystems_by_feature_type($tab_name) };
+  }
+
+  my $asma = $self->db->get_AssemblyMapperAdaptor();
+  my @features;
+
+  # fetch the features from each coordinate system they are stored in
+COORD_SYSTEM: foreach my $feat_cs (@feat_css) {
+    my $mapper;
+    my @coords;
+    my @ids;
+
+    if ( $feat_cs->equals($slice_cs) ) {
+      # no mapping is required if this is the same coord system
+
+      my $max_len = $self->_max_feature_length()
+        || $mcc->fetch_max_length_by_CoordSystem_feature_type( $feat_cs,
+                                                            $tab_name );
+
+      my $constraint = $orig_constraint;
+
+      my $sr_id;
+      if ( $slice->adaptor() ) {
+        $sr_id = $slice->adaptor()->get_seq_region_id($slice);
+      } else {
+        $sr_id =
+          $self->db()->get_SliceAdaptor()->get_seq_region_id($slice);
+      }
+
+      # If there is mapping information, use the external_seq_region_id
+      # to get features.
+
+      my @sr_ids = ($sr_id);
+
+      while (1) {
+        my $ext_sr_id = $self->get_seq_region_id_external($sr_id);
+
+        if ( $ext_sr_id == $sr_id ) { last }
+
+        push( @sr_ids, $ext_sr_id );
+        $sr_id = $ext_sr_id;
+      }
+
+      $constraint .= " AND " if ($constraint);
+
+      
+      $constraint .= "${tab_syn}.seq_region_id IN ("
+          . join( ',', @sr_ids ) . ") AND";
+
+      #faster query for 1bp slices where SNP data is not compressed
+      if ( $self->start_equals_end && $slice_start == $slice_end ) {
+	  $constraint .=
+	      " AND ${tab_syn}.seq_region_start = $slice_end" .
+	      " AND ${tab_syn}.seq_region_end = $slice_start";
+
+      } else {
+
+	  if ( !$slice->is_circular() ) {
+	      # Deal with the default case of a non-circular chromosome.
+	      $constraint .= " ${tab_syn}.seq_region_start <= $slice_end AND "
+		  . "${tab_syn}.seq_region_end >= $slice_start";
+
+	      if ( $max_len ) {
+		  my $min_start = $slice_start - $max_len;
+		  $constraint .= " AND ${tab_syn}.seq_region_start >= $min_start";
+	      }
+
+	  } else {
+	      # Deal with the case of a circular chromosome.
+	      if ( $slice_start > $slice_end ) {
+		  $constraint .= " ( ${tab_syn}.seq_region_start >= $slice_start "
+		      . "OR ${tab_syn}.seq_region_start <= $slice_end "
+		      . "OR ${tab_syn}.seq_region_end >= $slice_start "
+		      . "OR ${tab_syn}.seq_region_end <= $slice_end "
+		      . "OR ${tab_syn}.seq_region_start > ${tab_syn}.seq_region_end)";
+
+	      } else {
+		  $constraint .= " ((${tab_syn}.seq_region_start <= $slice_end "
+		      . "AND ${tab_syn}.seq_region_end >= $slice_start) "
+		      . "OR (${tab_syn}.seq_region_start > ${tab_syn}.seq_region_end "
+		      . "AND (${tab_syn}.seq_region_start <= $slice_end "
+		      . "OR ${tab_syn}.seq_region_end >= $slice_start)))";
+	      }
+	  }
+
+      }
+
+      my $fs = $self->generic_fetch( $constraint, undef, $slice );
+
+    # features may still have to have coordinates made relative to slice
+    # start
+      $fs = $self->_remap( $fs, $mapper, $slice );
+
+      push @features, @$fs;
+    } else {
+      $mapper = $asma->fetch_by_CoordSystems( $slice_cs, $feat_cs );
+
+      next unless defined $mapper;
+
+      # Get list of coordinates and corresponding internal ids for
+      # regions the slice spans
+      @coords =
+        $mapper->map( $slice_seq_region, $slice_start, $slice_end,
+                      $slice_strand, $slice_cs );
+
+      @coords = grep { !$_->isa('Bio::EnsEMBL::Mapper::Gap') } @coords;
+
+      next COORD_SYSTEM if ( !@coords );
+
+      @ids = map { $_->id() } @coords;
+      #coords are now id rather than name
+      #      @ids = @{$asma->seq_regions_to_ids($feat_cs, \@ids)};
+
+      # When regions are large and only partially spanned
+      # by slice it is faster to to limit the query with
+      # start and end constraints.  Take simple approach:
+      # use regional constraints if there are less than a
+      # specific number of regions covered.
+
+      if ( @coords > $MAX_SPLIT_QUERY_SEQ_REGIONS ) {
+        my $constraint = $orig_constraint;
+        my $id_str = join( ',', @ids );
+        $constraint .= " AND " if ($constraint);
+        $constraint .= "${tab_syn}.seq_region_id IN ($id_str)";
+        my $fs = $self->generic_fetch( $constraint, $mapper, $slice );
+
+        $fs = $self->_remap( $fs, $mapper, $slice );
+
+        push @features, @$fs;
+
+      } else {
+        # do multiple split queries using start / end constraints
+
+        my $max_len = (
+                $self->_max_feature_length()
+                  || $mcc->fetch_max_length_by_CoordSystem_feature_type(
+                                                     $feat_cs, $tab_name
+                  ) );
+
+        my $len = @coords;
+        for ( my $i = 0; $i < $len; $i++ ) {
+          my $constraint = $orig_constraint;
+          $constraint .= " AND " if ($constraint);
+          $constraint .=
+              "${tab_syn}.seq_region_id = "
+            . $ids[$i] . " AND "
+            . "${tab_syn}.seq_region_start <= "
+            . $coords[$i]->end() . " AND "
+            . "${tab_syn}.seq_region_end >= "
+            . $coords[$i]->start();
+
+          if ($max_len) {
+            my $min_start = $coords[$i]->start() - $max_len;
+            $constraint .=
+              " AND ${tab_syn}.seq_region_start >= $min_start";
+          }
+          my $fs = $self->generic_fetch( $constraint, $mapper, $slice );
+
+          $fs = $self->_remap( $fs, $mapper, $slice );
+
+          push @features, @$fs;
+        }
+      } ## end else [ if ( @coords > $MAX_SPLIT_QUERY_SEQ_REGIONS)]
+    } ## end else [ if ( $feat_cs->equals(...))]
+  } ## end foreach my $feat_cs (@feat_css)
+
+  return \@features;
+} ## end sub _slice_fetch
+
+
+#for a given seq_region_id, gets the one used in an external database, if present, otherwise, returns the internal one
+sub get_seq_region_id_external {
+  my ( $self, $sr_id ) = @_;
+  my $cs_a = $self->db()->get_CoordSystemAdaptor();
+  return ( exists( $cs_a->{'_internal_seq_region_mapping'}->{$sr_id} )
+           ? $cs_a->{'_internal_seq_region_mapping'}->{$sr_id}
+           : $sr_id );
+}
+
+#for a given seq_region_id and coord_system, gets the one used in the internal (core) database
+sub get_seq_region_id_internal{
+  my ( $self, $sr_id ) = @_;
+  my $cs_a = $self->db()->get_CoordSystemAdaptor();
+  return (  exists $cs_a->{'_external_seq_region_mapping'}->{$sr_id} 
+            ? $cs_a->{'_external_seq_region_mapping'}->{$sr_id} 
+            : $sr_id);
+}
+
+#
+# Helper function containing some common feature storing functionality
+#
+# Given a Feature this will return a copy (or the same feature if no changes 
+# to the feature are needed) of the feature which is relative to the start
+# of the seq_region it is on. The seq_region_id of the seq_region it is on
+# is also returned.
+#
+# This method will also ensure that the database knows which coordinate
+# systems that this feature is stored in.
+#
+
+sub _pre_store {
+  my $self    = shift;
+  my $feature = shift;
+
+  if(!ref($feature) || !$feature->isa('Bio::EnsEMBL::Feature')) {
+    throw('Expected Feature argument.');
+  }
+  my $slice = $feature->slice();
+
+  $self->_check_start_end_strand($feature->start(),$feature->end(),
+                                 $feature->strand(), $slice);
+
+
+  my $db = $self->db();
+
+  my $slice_adaptor = $db->get_SliceAdaptor();
+
+  if(!ref($slice) || !($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice'))  ) {
+    throw('Feature must be attached to Slice to be stored.');
+  }
+
+  # make sure feature coords are relative to start of entire seq_region
+
+  if($slice->start != 1 || $slice->strand != 1) {
+    #move feature onto a slice of the entire seq_region
+    $slice = $slice_adaptor->fetch_by_region($slice->coord_system->name(),
+                                             $slice->seq_region_name(),
+                                             undef, #start
+                                             undef, #end
+                                             undef, #strand
+                                             $slice->coord_system->version());
+
+    $feature = $feature->transfer($slice);
+
+    if(!$feature) {
+      throw('Could not transfer Feature to slice of ' .
+            'entire seq_region prior to storing');
+    }
+  }
+
+  # Ensure this type of feature is known to be stored in this coord system.
+  my $cs = $slice->coord_system;
+
+  my ($tab) = $self->_tables();
+  my $tabname = $tab->[0];
+
+  my $mcc = $db->get_MetaCoordContainer();
+
+  $mcc->add_feature_type($cs, $tabname, $feature->length);
+
+  my $seq_region_id = $slice_adaptor->get_seq_region_id($slice);
+
+  if(!$seq_region_id) {
+    throw('Feature is associated with seq_region which is not in this DB.');
+  }
+
+  return ($feature, $seq_region_id);
+}
+
+
+# The same function as _pre_store
+# This one is used to store user uploaded features in XXX_userdata db
+
+sub _pre_store_userdata {
+  my $self    = shift;
+  my $feature = shift;
+
+  if(!ref($feature) || !$feature->isa('Bio::EnsEMBL::Feature')) {
+    throw('Expected Feature argument.');
+  }
+
+  my $slice = $feature->slice();
+  my $slice_adaptor = $slice->adaptor;
+  
+  $self->_check_start_end_strand($feature->start(),$feature->end(),
+                                 $feature->strand(), $slice);
+
+
+  if(!ref($slice) || !($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
+    throw('Feature must be attached to Slice to be stored.');
+  }
+
+  # make sure feature coords are relative to start of entire seq_region
+
+  if($slice->start != 1 || $slice->strand != 1) {
+    #move feature onto a slice of the entire seq_region
+    $slice = $slice_adaptor->fetch_by_region($slice->coord_system->name(),
+                                             $slice->seq_region_name(),
+                                             undef, #start
+                                             undef, #end
+                                             undef, #strand
+                                             $slice->coord_system->version());
+
+    $feature = $feature->transfer($slice);
+
+    if(!$feature) {
+      throw('Could not transfer Feature to slice of ' .
+            'entire seq_region prior to storing');
+    }
+  }
+
+  # Ensure this type of feature is known to be stored in this coord system.
+  my $cs = $slice->coord_system;
+
+  my ($tab) = $self->_tables();
+  my $tabname = $tab->[0];
+
+  my $db = $self->db;
+  my $mcc = $db->get_MetaCoordContainer();
+
+  $mcc->add_feature_type($cs, $tabname, $feature->length);
+
+  my $seq_region_id = $slice_adaptor->get_seq_region_id($slice);
+
+  if(!$seq_region_id) {
+    throw('Feature is associated with seq_region which is not in this DB.');
+  }
+
+  return ($feature, $seq_region_id);
+}
+
+
+#
+# helper function used to validate start/end/strand and 
+# hstart/hend/hstrand etc.
+#
+sub _check_start_end_strand {
+  my $self = shift;
+  my $start = shift;
+  my $end   = shift;
+  my $strand = shift;
+  my $slice = shift;
+
+  #
+  # Make sure that the start, end, strand are valid
+  #
+  if(int($start) != $start) {
+    throw("Invalid Feature start [$start].  Must be integer.");
+  }
+  if(int($end) != $end) {
+    throw("Invalid Feature end [$end]. Must be integer.");
+  }
+  if(int($strand) != $strand || $strand < -1 || $strand > 1) {
+    throw("Invalid Feature strand [$strand]. Must be -1, 0 or 1.");
+  }
+  if($end < $start && !$slice->is_circular()) {
+    throw("Invalid Feature start/end [$start/$end]. Start must be less " .
+          "than or equal to end.");
+  }
+
+  return 1;
+}
+
+
+#
+# Given a list of features checks if they are in the correct coord system
+# by looking at the first features slice.  If they are not then they are
+# converted and placed on the slice.
+#
+sub _remap {
+  my ( $self, $features, $mapper, $slice ) = @_;
+
+  #check if any remapping is actually needed
+  if(@$features && (!$features->[0]->isa('Bio::EnsEMBL::Feature') ||
+                    $features->[0]->slice == $slice)) {
+    return $features;
+  }
+
+  #remapping has not been done, we have to do our own conversion from
+  #to slice coords
+
+  my @out;
+
+  my $slice_start = $slice->start();
+  my $slice_end   = $slice->end();
+  my $slice_strand = $slice->strand();
+  my $slice_cs    = $slice->coord_system();
+
+  my ($seq_region, $start, $end, $strand);
+
+  my $slice_seq_region_id = $slice->get_seq_region_id();
+  my $slice_seq_region = $slice->seq_region_name();
+
+  foreach my $f (@$features) {
+    #since feats were obtained in contig coords, attached seq is a contig
+    my $fslice = $f->slice();
+    if(!$fslice) {
+      throw("Feature does not have attached slice.\n");
+    }
+    my $fseq_region = $fslice->seq_region_name();
+    my $fseq_region_id = $fslice->get_seq_region_id();
+    my $fcs = $fslice->coord_system();
+
+    if(!$slice_cs->equals($fcs)) {
+      #slice of feature in different coord system, mapping required
+
+      ($seq_region, $start, $end, $strand) =
+        $mapper->fastmap($fseq_region_id,$f->start(),$f->end(),$f->strand(),$fcs);
+
+      # undefined start means gap
+      next if(!defined $start);
+    } else {
+      $start      = $f->start();
+      $end        = $f->end();
+      $strand     = $f->strand();
+      $seq_region = $f->slice->seq_region_name();
+    }
+    
+    # maps to region outside desired area
+    next if ($start > $slice_end) || ($end < $slice_start) || 
+      ($slice_seq_region ne $seq_region);
+
+    #shift the feature start, end and strand in one call
+    if($slice_strand == -1) {
+      $f->move( $slice_end - $end + 1, $slice_end - $start + 1, $strand * -1 );
+    } else {
+      $f->move( $start - $slice_start + 1, $end - $slice_start + 1, $strand );
+    }
+
+    $f->slice($slice);
+
+    push @out,$f;
+  }
+
+  return \@out;
+}
+
+
+#
+# Given a logic name and an existing constraint this will
+# add an analysis table constraint to the feature.  Note that if no
+# analysis_id exists in the columns of the primary table then no
+# constraint is added at all
+#
+sub _logic_name_to_constraint {
+  my $self = shift;
+  my $constraint = shift;
+  my $logic_name = shift;
+
+  return $constraint if(!$logic_name);
+
+  #make sure that an analysis_id exists in the primary table
+  my ($prim_tab) = $self->_tables();
+  my $prim_synonym = $prim_tab->[1];
+
+  my $found_analysis=0;
+  foreach my $col ($self->_columns) {
+    my ($syn,$col_name) = split(/\./,$col);
+    next if($syn ne $prim_synonym);
+    if($col_name eq 'analysis_id') {
+      $found_analysis = 1;
+      last;
+    }
+  }
+
+  if(!$found_analysis) {
+    warning("This feature is not associated with an analysis.\n" .
+            "Ignoring logic_name argument = [$logic_name].\n");
+    return $constraint;
+  }
+
+  my $aa = $self->db->get_AnalysisAdaptor();
+  my $an = $aa->fetch_by_logic_name($logic_name);
+
+  if ( !defined($an) ) {
+    return undef;
+  }
+
+  my $an_id = $an->dbID();
+
+  $constraint .= ' AND' if($constraint);
+  $constraint .= " ${prim_synonym}.analysis_id = $an_id";
+  return $constraint;
+}
+
+
+=head2 store
+
+  Arg [1]    : list of Bio::EnsEMBL::SeqFeature
+  Example    : $adaptor->store(@feats);
+  Description: ABSTRACT  Subclasses are responsible for implementing this 
+               method.  It should take a list of features and store them in 
+               the database.
+  Returntype : none
+  Exceptions : thrown method is not implemented by subclass
+  Caller     : general
+  Status     : At Risk
+             : throws if called.
+
+=cut
+
+sub store{
+  my $self = @_;
+
+  throw("Abstract method store not defined by implementing subclass\n");
+}
+
+
+=head2 remove
+
+  Arg [1]    : A feature $feature 
+  Example    : $feature_adaptor->remove($feature);
+  Description: This removes a feature from the database.  The table the
+               feature is removed from is defined by the abstract method
+               _tablename, and the primary key of the table is assumed
+               to be _tablename() . '_id'.  The feature argument must 
+               be an object implementing the dbID method, and for the
+               feature to be removed from the database a dbID value must
+               be returned.
+  Returntype : none
+  Exceptions : thrown if $feature arg does not implement dbID(), or if
+               $feature->dbID is not a true value
+  Caller     : general
+  Status     : Stable
+
+=cut
+
+
+sub remove {
+  my ($self, $feature) = @_;
+
+  if(!$feature || !ref($feature) || !$feature->isa('Bio::EnsEMBL::Feature')) {
+    throw('Feature argument is required');
+  }
+
+  if(!$feature->is_stored($self->db)) {
+    throw("This feature is not stored in this database");
+  }
+
+  my @tabs = $self->_tables;
+  my ($table) = @{$tabs[0]};
+
+  my $sth = $self->prepare("DELETE FROM $table WHERE ${table}_id = ?");
+  $sth->bind_param(1,$feature->dbID,SQL_INTEGER);
+  $sth->execute();
+
+  #unset the feature dbID ad adaptor
+  $feature->dbID(undef);
+  $feature->adaptor(undef);
+
+  return;
+}
+
+
+=head2 remove_by_Slice
+
+  Arg [1]    : Bio::Ensembl::Slice $slice
+  Example    : $feature_adaptor->remove_by_Slice($slice);
+  Description: This removes features from the database which lie on a region
+               represented by the passed in slice.  Only features which are
+               fully contained by the slice are deleted; features which overlap
+               the edge of the slice are not removed.
+               The table the features are removed from is defined by
+               the abstract method_tablename.
+  Returntype : none
+  Exceptions : thrown if no slice is supplied
+  Caller     : general
+  Status     : Stable
+
+=cut
+
+sub remove_by_Slice {
+  my ($self, $slice) = @_;
+
+  if(!$slice || !ref($slice) || !($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
+    throw("Slice argument is required");
+  }
+
+  my @tabs = $self->_tables;
+  my ($table_name) = @{$tabs[0]};
+
+  my $seq_region_id = $self->db->get_SliceAdaptor->get_seq_region_id($slice);
+  my $start = $slice->start();
+  my $end   = $slice->end();
+
+  #
+  # Delete only features fully on the slice, not overlapping ones
+  #
+  my $sth = $self->prepare("DELETE FROM $table_name " .
+                           "WHERE seq_region_id = ? " .
+                           "AND   seq_region_start >= ? " .
+                           "AND   seq_region_end <= ?");
+
+  $sth->bind_param(1,$seq_region_id,SQL_INTEGER);
+  $sth->bind_param(2,$start,SQL_INTEGER);
+  $sth->bind_param(3,$end,SQL_INTEGER);
+  $sth->execute();
+  $sth->finish();
+}
+
+
+#
+# Internal function. Allows the max feature length which is normally
+# retrieved from the meta_coord table to be overridden.  This allows
+# for some significant optimizations to be put in when it is known
+# that requested features will not be over a certain size.
+#
+sub _max_feature_length {
+  my $self = shift;
+  return $self->{'_max_feature_length'} = shift if(@_);
+  return $self->{'_max_feature_length'};
+}
+
+
+#
+# Lists all seq_region_ids that a particular feature type is found on.
+# Useful e.g. for finding out which seq_regions have genes.
+# Returns a listref of seq_region_ids.
+#
+sub _list_seq_region_ids {
+  my ($self, $table) = @_;
+  
+  my @out;
+  
+  my $sql = qq(
+  SELECT DISTINCT
+            sr.seq_region_id
+  FROM      seq_region sr,
+            $table a,
+            coord_system cs
+  WHERE     sr.seq_region_id = a.seq_region_id
+    AND     sr.coord_system_id = cs.coord_system_id
+    AND     cs.species_id = ?);
+
+  my $sth = $self->prepare($sql);
+
+  $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
+
+  $sth->execute();
+
+  while (my ($id) = $sth->fetchrow) {
+    push(@out, $id);
+  }
+
+  $sth->finish;
+
+  return \@out;
+}
+
+
+=head1 DEPRECATED METHODS
+
+=cut
+
+
+=head2 fetch_all_by_RawContig_constraint
+
+  Description: DEPRECATED use fetch_all_by_RawContig_constraint instead
+
+=cut
+
+sub fetch_all_by_RawContig_constraint {
+  my $self = shift;
+  deprecate('Use fetch_all_by_Slice_constraint() instead.');
+  return $self->fetch_all_by_slice_constraint(@_);
+}
+
+=head2 fetch_all_by_RawContig
+
+  Description: DEPRECATED use fetch_all_by_Slice instead
+
+=cut
+
+sub fetch_all_by_RawContig {
+  my $self = shift;
+  deprecate('Use fetch_all_by_Slice() instead.');
+  return $self->fetch_all_by_Slice(@_);
+}
+
+=head2 fetch_all_by_RawContig_and_score
+
+  Description: DEPRECATED use fetch_all_by_Slice_and_score instead
+
+=cut
+
+sub fetch_all_by_RawContig_and_score{
+  my $self = shift;
+  deprecate('Use fetch_all_by_Slice_and_score() instead.');
+  return $self->fetch_all_by_Slice_and_score(@_);
+}
+
+=head2 remove_by_RawContig
+
+  Description: DEPRECATED use remove_by_Slice instead
+
+=cut
+
+sub remove_by_RawContig {
+  my $self = shift;
+  deprecate("Use remove_by_Slice instead");
+  return $self->remove_by_Slice(@_);
+}
+
+
+sub remove_by_analysis_id {
+  my ($self, $analysis_id) = @_;
+
+  $analysis_id or throw("Must call with analysis id");
+
+  my @tabs = $self->_tables;
+  my ($tablename) = @{$tabs[0]};
+
+  my $sql = "DELETE FROM $tablename WHERE analysis_id = $analysis_id";
+#  warn "SQL : $sql";
+      
+  my $sth = $self->prepare($sql);
+  $sth->execute();
+  $sth->finish();
+}
+
+sub remove_by_feature_id {
+  my ($self, $features_list) = @_;
+
+  my @feats = @$features_list or throw("Must call store with features");
+
+  my @tabs = $self->_tables;
+  my ($tablename) = @{$tabs[0]};
+
+  my $sql = sprintf "DELETE FROM $tablename WHERE ${tablename}_id IN (%s)", join ', ', @feats;
+#  warn "SQL : $sql";
+      
+  my $sth = $self->prepare($sql);
+  $sth->execute();
+  $sth->finish();
+}
+
+
+1;