view variant_effect_predictor/Bio/EnsEMBL/DBSQL/BaseFeatureAdaptor.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line source

=head1 LICENSE

  Copyright (c) 1999-2012 The European Bioinformatics Institute and
  Genome Research Limited.  All rights reserved.

  This software is distributed under a modified Apache license.
  For license details, please see

    http://www.ensembl.org/info/about/code_licence.html

=head1 CONTACT

  Please email comments or questions to the public Ensembl
  developers list at <dev@ensembl.org>.

  Questions may also be sent to the Ensembl help desk at
  <helpdesk@ensembl.org>.

=cut

=head1 NAME

Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor - An Abstract Base class for all
FeatureAdaptors

=head1 SYNOPSIS

Abstract class - should not be instantiated.  Implementation of
abstract methods must be performed by subclasses.

=head1 DESCRIPTION

This is a base adaptor for feature adaptors. This base class is simply a way
of eliminating code duplication through the implementation of methods
common to all feature adaptors.

=head1 METHODS

=cut

package Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor;
use vars qw(@ISA @EXPORT);
use strict;

use Bio::EnsEMBL::DBSQL::BaseAdaptor;
use Bio::EnsEMBL::Utils::Cache;
use Bio::EnsEMBL::Utils::Exception qw(warning throw deprecate stack_trace_dump);
use Bio::EnsEMBL::Utils::Argument qw(rearrange);
use Bio::EnsEMBL::Utils::Iterator;

@ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor);

@EXPORT = (@{$DBI::EXPORT_TAGS{'sql_types'}});

our $SLICE_FEATURE_CACHE_SIZE    = 4;
our $MAX_SPLIT_QUERY_SEQ_REGIONS = 3;
our $SILENCE_CACHE_WARNINGS = 0;

=head2 new

  Arg [1]    : list of args @args
               Superclass constructor arguments
  Example    : none
  Description: Constructor which warns if caching has been switched off
  Returntype : Bio::EnsEMBL::BaseFeatureAdaptor
  Exceptions : none
  Caller     : implementing subclass constructors
  Status     : Stable

=cut

sub new {
  my ($class, @args) = @_;
  my $self = $class->SUPER::new(@args);
  if ( defined $self->db->no_cache() && $self->db->no_cache() && ! $SILENCE_CACHE_WARNINGS) {
    warning(  "You are using the API without caching most recent features. "
            . "Performance might be affected." );
  }
  return $self;
}

=head2 start_equals_end

  Arg [1]    : (optional) boolean $newval
  Example    : $bfa->start_equals_end(1);
  Description: Getter/Setter for the start_equals_end flag.  If set
               to true sub _slice_fetch will use a simplified sql to retrieve 1bp slices.
  Returntype : boolean
  Exceptions : none
  Caller     : EnsemblGenomes variation DB build
  Status     : Stable
  
=cut

sub start_equals_end {
  my ( $self, $value ) = @_;

  if ( defined($value) ) {
    $self->{'start_equals_end'} = $value;
  }
  return $self->{'start_equals_end'};
}


=head2 clear_cache

  Args      : None
  Example   : my $sa =
                $registry->get_adaptor( 'Mus musculus', 'Core',
                                        'Slice' );
              my $ga =
                $registry->get_adaptor( 'Mus musculus', 'Core',
                                        'Gene' );

              my $slice =
                $sa->fetch_by_region( 'Chromosome', '1', 1e8,
                                      1.05e8 );

              my $genes = $ga->fetch_all_by_Slice($slice);

              $ga->clear_cache();

  Description   : Empties the feature cache associated with this
                  feature adaptor.
  Return type   : None
  Exceptions    : None
  Caller        : General
  Status        : At risk (under development)

=cut

sub clear_cache {
  my ($self) = @_;
  %{$self->{_slice_feature_cache}} = ();
  return;
}

=head2 _slice_feature_cache
 
  Description	: Returns the feature cache if we are allowed to cache and
                will build it if we need to. We will never return a reference
                to the hash to avoid unintentional auto-vivfying caching
  Returntype 	: Bio::EnsEMBL::Utils::Cache
  Exceptions 	: None
  Caller     	: Internal

=cut

sub _slice_feature_cache {
  my ($self) = @_;
  return if $self->db()->no_cache();
  if(! exists $self->{_slice_feature_cache}) {
    tie my %cache, 'Bio::EnsEMBL::Utils::Cache', $SLICE_FEATURE_CACHE_SIZE;
    $self->{_slice_feature_cache} = \%cache;
  }
  return $self->{_slice_feature_cache};
}

=head2 fetch_all_by_Slice

  Arg [1]    : Bio::EnsEMBL::Slice $slice
               the slice from which to obtain features
  Arg [2]    : (optional) string $logic_name
               the logic name of the type of features to obtain
  Example    : $fts = $a->fetch_all_by_Slice($slice, 'Swall');
  Description: Returns a listref of features created from the database 
               which are on the Slice defined by $slice. If $logic_name is 
               defined only features with an analysis of type $logic_name 
               will be returned. 
               NOTE: only features that are entirely on the slice's seq_region
               will be returned (i.e. if they hang off the start/end of a
               seq_region they will be discarded). Features can extend over the
               slice boundaries though (in cases where you have a slice that
               doesn't span the whole seq_region).
  Returntype : listref of Bio::EnsEMBL::SeqFeatures in Slice coordinates
  Exceptions : none
  Caller     : Bio::EnsEMBL::Slice
  Status     : Stable

=cut

sub fetch_all_by_Slice {
  my ($self, $slice, $logic_name) = @_;
  #fetch by constraint with empty constraint
  return $self->fetch_all_by_Slice_constraint($slice, '', $logic_name);
}



=head2 fetch_Iterator_by_Slice_method

  Arg [1]    : CODE ref of Slice fetch method
  Arg [2]    : ARRAY ref of parameters for Slice fetch method
  Arg [3]    : Optional int: Slice index in parameters array
  Arg [4]    : Optional int: Slice chunk size. Default=500000
  Example    : my $slice_iter = $feature_adaptor->fetch_Iterator_by_Slice_method
                               	      ($feature_adaptor->can('fetch_all_by_Slice_Arrays'),
	                                   \@fetch_method_params,
	                                   0,#Slice idx
	                                  );

               while(my $feature = $slice_iter->next && defined $feature){
                 #Do something here
               }

  Description: Creates an Iterator which chunks the query Slice to facilitate
               large Slice queries which would have previously run out of memory
  Returntype : Bio::EnsEMBL::Utils::Iterator
  Exceptions : Throws if mandatory params not valid
  Caller     : general
  Status     : at risk

=cut

#Does not support Collections. See Funcgen ResultFeatureAdaptor::fetch_collection_Iterator_by_Slice_method

sub fetch_Iterator_by_Slice_method{
  my ($self, $slice_method_ref, $params_ref, $slice_idx, $chunk_size) = @_;

  if(! ( defined $slice_method_ref &&
		 ref($slice_method_ref) eq 'CODE')
	){
	throw('Must pass a valid Slice fetch method CODE ref');
  }

  if (! ($params_ref && 
		 ref($params_ref) eq 'ARRAY')) {
	#Don't need to check size here so long as we have valid Slice
	throw('You must pass a method params ARRAYREF');
  }
  
  $slice_idx    = 0 if(! defined $slice_idx);
  my $slice     = $params_ref->[$slice_idx];
  $chunk_size ||= 1000000;
		
  my @feat_cache;
  my $finished     = 0;
  my $start        = 1;	#local coord for sub slice
  my $end          = $slice->length;
  my $num_overlaps = 0;
  
  my $coderef = 
	sub {
	  
	  while (scalar(@feat_cache) == 0 &&
			 ! $finished) {
		
		my $new_end = ($start + $chunk_size - 1);
		
		if ($new_end >= $end) {
		  # this is our last chunk
		  $new_end = $end;
		  $finished = 1;  
		}
		
		#Chunk by sub slicing
		my $sub_slice             = $slice->sub_Slice($start, $new_end);
		$params_ref->[$slice_idx] = $sub_slice;
		@feat_cache = @{ $slice_method_ref->($self, @$params_ref)};
		
		#Remove & count overlapping features
		splice(@feat_cache, 0, $num_overlaps) if($num_overlaps);
		my $i;
		
		if (scalar(@feat_cache) > 0) {
		  
		  my $feat_end  = $feat_cache[$#feat_cache]->seq_region_end;
		  my $slice_end = $sub_slice->end;
		  $num_overlaps = 0;
		  
		  for ($i = $#feat_cache; $i >=0; $i--) {
			
			if ($feat_end > $slice_end) {
			  $feat_end  = $feat_cache[$i]->end;
			  $num_overlaps ++;
			} else {
			  last;
			}
			
		  }
		}
		
		# update the start coordinate
		$start = $new_end + 1;
	  }
	  
	  #this maybe returning from an undef cache
	  #Need to sub this out even more?
	  return shift @feat_cache;
	};

  return Bio::EnsEMBL::Utils::Iterator->new($coderef);
}


=head2 fetch_Iterator_by_Slice

  Arg [1]    : Bio::EnsEMBL::Slice
  Arg [2]    : Optional string: logic name of analysis
  Arg [3]    : Optional int: Chunk size to iterate over. Default is 500000
  Example    : my $slice_iter = $feature_adaptor->fetch_Iterator_by_Slice($slice);

               while(my $feature = $slice_iter->next && defined $feature){
                 #Do something here
               }

  Description: Creates an Iterator which chunks the query Slice to facilitate
               large Slice queries which would have previously run out of memory
  Returntype : Bio::EnsEMBL::Utils::Iterator
  Exceptions : None
  Caller     : general
  Status     : at risk

=cut

sub fetch_Iterator_by_Slice{
  my ($self, $slice, $logic_name, $chunk_size) = @_;

  my $method_ref = $self->can('fetch_all_by_Slice');

  return $self->fetch_Iterator_by_Slice_method($method_ref, [$slice, $logic_name], 0, $chunk_size);
}


=head2 fetch_all_by_Slice_and_score

  Arg [1]    : Bio::EnsEMBL::Slice $slice
               the slice from which to obtain features
  Arg [2]    : (optional) float $score
               lower bound of the the score of the features retrieved
  Arg [3]    : (optional) string $logic_name
               the logic name of the type of features to obtain
  Example    : $fts = $a->fetch_all_by_Slice_and_score($slice,90,'Swall');
  Description: Returns a list of features created from the database which are 
               are on the Slice defined by $slice and which have a score 
               greater than $score. If $logic_name is defined, 
               only features with an analysis of type $logic_name will be 
               returned. 
  Returntype : listref of Bio::EnsEMBL::SeqFeatures in Slice coordinates
  Exceptions : none
  Caller     : Bio::EnsEMBL::Slice
  Status     : Stable

=cut

sub fetch_all_by_Slice_and_score {
  my ( $self, $slice, $score, $logic_name ) = @_;

  my $constraint;
  if ( defined($score) ) {
    # Get the synonym of the primary_table
    my @tabs = $self->_tables();
    my $syn  = $tabs[0]->[1];

    $constraint = sprintf( "%s.score > %s",
                $syn,
                $self->dbc()->db_handle()->quote( $score, SQL_FLOAT ) );
  }

  return
    $self->fetch_all_by_Slice_constraint( $slice, $constraint,
                                          $logic_name );
}


=head2 fetch_all_by_Slice_constraint

  Arg [1]    : Bio::EnsEMBL::Slice $slice
               the slice from which to obtain features
  Arg [2]    : (optional) string $constraint
               An SQL query constraint (i.e. part of the WHERE clause)
  Arg [3]    : (optional) string $logic_name
               the logic name of the type of features to obtain
  Example    : $fs = $a->fetch_all_by_Slice_constraint($slc, 'perc_ident > 5');
  Description: Returns a listref of features created from the database which 
               are on the Slice defined by $slice and fulfill the SQL 
               constraint defined by $constraint. If logic name is defined, 
               only features with an analysis of type $logic_name will be 
               returned. 
  Returntype : listref of Bio::EnsEMBL::SeqFeatures in Slice coordinates
  Exceptions : thrown if $slice is not defined
  Caller     : Bio::EnsEMBL::Slice
  Status     : Stable

=cut

sub fetch_all_by_Slice_constraint {
  my ( $self, $slice, $constraint, $logic_name ) = @_;


  my @result = ();

  if ( !ref($slice)
       || !(    $slice->isa('Bio::EnsEMBL::Slice')
             or $slice->isa('Bio::EnsEMBL::LRGSlice') ) )
  {
    throw("Bio::EnsEMBL::Slice argument expected.");
  }

  $constraint ||= '';
  $constraint =
    $self->_logic_name_to_constraint( $constraint, $logic_name );

  # If the logic name was invalid, undef was returned
  if ( !defined($constraint) ) { return [] }

  my $key;
  my $cache;

  # Will only use feature_cache if hasn't been no_cache attribute set
  if (
    !( defined( $self->db()->no_cache() ) && $self->db()->no_cache() ) )
  {

    #strain test and add to constraint if so to stop caching.
    if ( $slice->isa('Bio::EnsEMBL::StrainSlice') ) {
      my $string =
        $self->dbc()->db_handle()->quote( $slice->strain_name() );

      if ( $constraint ne "" ) {
        $constraint .= " AND $string = $string ";
      } else {
        $constraint .= " $string = $string ";
      }
    }

    # Check the cache and return the cached results if we have already
    # done this query.  The cache key is the made up from the slice
    # name, the constraint, and the bound parameters (if there are any).
    $key = uc( join( ':', $slice->name(), $constraint ) );

    my $bind_params = $self->bind_param_generic_fetch();

    if ( defined($bind_params) ) {
      $key .= ':'
        . join( ':', map { $_->[0] . '/' . $_->[1] } @{$bind_params} );
    }

    $cache = $self->_slice_feature_cache();
    if ( exists( $cache->{$key} ) ) {
      # Clear the bound parameters and return the cached data.
      $self->{'_bind_param_generic_fetch'} = ();
      return $cache->{$key};
    }
  } ## end if ( !( defined( $self...)))

  my $sa = $slice->adaptor();

  # Hap/PAR support: retrieve normalized 'non-symlinked' slices.
  my @proj = @{ $sa->fetch_normalized_slice_projection($slice) };


 
  if ( !@proj ) {
    throw( 'Could not retrieve normalized Slices. '
         . 'Database contains incorrect assembly_exception information.'
    );
  }

  # Want to get features on the FULL original slice as well as any
  # symlinked slices.

  # Filter out partial slices from projection that are on same
  # seq_region as original slice.

  my $sr_id = $slice->get_seq_region_id();

  @proj = grep { $_->to_Slice->get_seq_region_id() != $sr_id } @proj;

  my $segment = bless( [ 1, $slice->length(), $slice ],
                       'Bio::EnsEMBL::ProjectionSegment' );
  push( @proj, $segment );

  # construct list of Hap/PAR boundaries for entire seq region
  my @bounds;

  my $ent_slice = $sa->fetch_by_seq_region_id($sr_id);
  if ( $slice->strand() == -1 ) {
    $ent_slice = $ent_slice->invert();
  }

  my @ent_proj =
    @{ $sa->fetch_normalized_slice_projection($ent_slice) };
  shift(@ent_proj);    # skip first

  @bounds = map { $_->from_start() - $slice->start() + 1 } @ent_proj;


  # fetch features for the primary slice AND all symlinked slices
  foreach my $seg (@proj) {


    my $offset    = $seg->from_start();
    my $seg_slice = $seg->to_Slice();
    my $features =
      $self->_slice_fetch( $seg_slice, $constraint );

    # If this was a symlinked slice offset the feature coordinates as
    # needed.
    if ( $seg_slice->name() ne $slice->name() ) {

    FEATURE:
      foreach my $f ( @{$features} ) {
        if ( $offset != 1 ) {
          $f->{'start'} += $offset - 1;
          $f->{'end'}   += $offset - 1;
        }

        # discard boundary crossing features from symlinked regions
        foreach my $bound (@bounds) {
          if ( $f->{'start'} < $bound && $f->{'end'} >= $bound ) {
            next FEATURE;
          }
        }

        $f->{'slice'} = $slice;
        push( @result, $f );
      }
    } else {
      push( @result, @{$features} );
    }
  } ## end foreach my $seg (@proj)

  # Will only use feature_cache when set attribute no_cache in DBAdaptor
  if ( defined($key) ) {
    $cache->{$key} = \@result;
  }

  return \@result;
} ## end sub fetch_all_by_Slice_constraint


=head2 fetch_all_by_logic_name

  Arg [3]    : string $logic_name
               the logic name of the type of features to obtain
  Example    : $fs = $a->fetch_all_by_logic_name('foobar');
  Description: Returns a listref of features created from the database.
               only features with an analysis of type $logic_name will
               be returned.  If the logic name is invalid (not in the
               analysis table), a reference to an empty list will be
               returned.
  Returntype : listref of Bio::EnsEMBL::SeqFeatures
  Exceptions : thrown if no $logic_name
  Caller     : General
  Status     : Stable

=cut

sub fetch_all_by_logic_name {
  my ( $self, $logic_name ) = @_;

  if ( !defined($logic_name) ) {
    throw("Need a logic_name");
  }

  my $constraint = $self->_logic_name_to_constraint( '', $logic_name );

  if ( !defined($constraint) ) {
    warning("Invalid logic name: $logic_name");
    return [];
  }

  return $self->generic_fetch($constraint);
}

# Method that creates an object.  Called by the _objs_from_sth() method
# in the sub-classes (the various feature adaptors).  Overridden by the
# feature collection classes.

sub _create_feature {
  my ( $self, $feature_type, $args ) = @_;
  return $feature_type->new( %{$args} );
}

# This is the same as the above, but calls the new_fast() constructor of
# the feature type.

sub _create_feature_fast {
  my ( $self, $feature_type, $args ) = @_;
  return $feature_type->new_fast($args);
}

#
# helper function used by fetch_all_by_Slice_constraint method
#
sub _slice_fetch {
  my ( $self, $slice, $orig_constraint ) = @_;

  my $slice_start         = $slice->start();
  my $slice_end           = $slice->end();
  my $slice_strand        = $slice->strand();
  my $slice_cs            = $slice->coord_system();
  my $slice_seq_region    = $slice->seq_region_name();
  my $slice_seq_region_id = $slice->get_seq_region_id();

  #get the synonym and name of the primary_table
  my @tabs = $self->_tables;
  my ( $tab_name, $tab_syn ) = @{ $tabs[0] };

  #find out what coordinate systems the features are in
  my $mcc      = $self->db->get_MetaCoordContainer();
  my @feat_css = ();

  my $mca        = $self->db->get_MetaContainer();
  my $value_list = $mca->list_value_by_key( $tab_name . "build.level" );
  if ( @$value_list and $slice->is_toplevel() ) {
    push @feat_css, $slice_cs;
  } else {
    @feat_css =
      @{ $mcc->fetch_all_CoordSystems_by_feature_type($tab_name) };
  }

  my $asma = $self->db->get_AssemblyMapperAdaptor();
  my @features;

  # fetch the features from each coordinate system they are stored in
COORD_SYSTEM: foreach my $feat_cs (@feat_css) {
    my $mapper;
    my @coords;
    my @ids;

    if ( $feat_cs->equals($slice_cs) ) {
      # no mapping is required if this is the same coord system

      my $max_len = $self->_max_feature_length()
        || $mcc->fetch_max_length_by_CoordSystem_feature_type( $feat_cs,
                                                            $tab_name );

      my $constraint = $orig_constraint;

      my $sr_id;
      if ( $slice->adaptor() ) {
        $sr_id = $slice->adaptor()->get_seq_region_id($slice);
      } else {
        $sr_id =
          $self->db()->get_SliceAdaptor()->get_seq_region_id($slice);
      }

      # If there is mapping information, use the external_seq_region_id
      # to get features.

      my @sr_ids = ($sr_id);

      while (1) {
        my $ext_sr_id = $self->get_seq_region_id_external($sr_id);

        if ( $ext_sr_id == $sr_id ) { last }

        push( @sr_ids, $ext_sr_id );
        $sr_id = $ext_sr_id;
      }

      $constraint .= " AND " if ($constraint);

      
      $constraint .= "${tab_syn}.seq_region_id IN ("
          . join( ',', @sr_ids ) . ") AND";

      #faster query for 1bp slices where SNP data is not compressed
      if ( $self->start_equals_end && $slice_start == $slice_end ) {
	  $constraint .=
	      " AND ${tab_syn}.seq_region_start = $slice_end" .
	      " AND ${tab_syn}.seq_region_end = $slice_start";

      } else {

	  if ( !$slice->is_circular() ) {
	      # Deal with the default case of a non-circular chromosome.
	      $constraint .= " ${tab_syn}.seq_region_start <= $slice_end AND "
		  . "${tab_syn}.seq_region_end >= $slice_start";

	      if ( $max_len ) {
		  my $min_start = $slice_start - $max_len;
		  $constraint .= " AND ${tab_syn}.seq_region_start >= $min_start";
	      }

	  } else {
	      # Deal with the case of a circular chromosome.
	      if ( $slice_start > $slice_end ) {
		  $constraint .= " ( ${tab_syn}.seq_region_start >= $slice_start "
		      . "OR ${tab_syn}.seq_region_start <= $slice_end "
		      . "OR ${tab_syn}.seq_region_end >= $slice_start "
		      . "OR ${tab_syn}.seq_region_end <= $slice_end "
		      . "OR ${tab_syn}.seq_region_start > ${tab_syn}.seq_region_end)";

	      } else {
		  $constraint .= " ((${tab_syn}.seq_region_start <= $slice_end "
		      . "AND ${tab_syn}.seq_region_end >= $slice_start) "
		      . "OR (${tab_syn}.seq_region_start > ${tab_syn}.seq_region_end "
		      . "AND (${tab_syn}.seq_region_start <= $slice_end "
		      . "OR ${tab_syn}.seq_region_end >= $slice_start)))";
	      }
	  }

      }

      my $fs = $self->generic_fetch( $constraint, undef, $slice );

    # features may still have to have coordinates made relative to slice
    # start
      $fs = $self->_remap( $fs, $mapper, $slice );

      push @features, @$fs;
    } else {
      $mapper = $asma->fetch_by_CoordSystems( $slice_cs, $feat_cs );

      next unless defined $mapper;

      # Get list of coordinates and corresponding internal ids for
      # regions the slice spans
      @coords =
        $mapper->map( $slice_seq_region, $slice_start, $slice_end,
                      $slice_strand, $slice_cs );

      @coords = grep { !$_->isa('Bio::EnsEMBL::Mapper::Gap') } @coords;

      next COORD_SYSTEM if ( !@coords );

      @ids = map { $_->id() } @coords;
      #coords are now id rather than name
      #      @ids = @{$asma->seq_regions_to_ids($feat_cs, \@ids)};

      # When regions are large and only partially spanned
      # by slice it is faster to to limit the query with
      # start and end constraints.  Take simple approach:
      # use regional constraints if there are less than a
      # specific number of regions covered.

      if ( @coords > $MAX_SPLIT_QUERY_SEQ_REGIONS ) {
        my $constraint = $orig_constraint;
        my $id_str = join( ',', @ids );
        $constraint .= " AND " if ($constraint);
        $constraint .= "${tab_syn}.seq_region_id IN ($id_str)";
        my $fs = $self->generic_fetch( $constraint, $mapper, $slice );

        $fs = $self->_remap( $fs, $mapper, $slice );

        push @features, @$fs;

      } else {
        # do multiple split queries using start / end constraints

        my $max_len = (
                $self->_max_feature_length()
                  || $mcc->fetch_max_length_by_CoordSystem_feature_type(
                                                     $feat_cs, $tab_name
                  ) );

        my $len = @coords;
        for ( my $i = 0; $i < $len; $i++ ) {
          my $constraint = $orig_constraint;
          $constraint .= " AND " if ($constraint);
          $constraint .=
              "${tab_syn}.seq_region_id = "
            . $ids[$i] . " AND "
            . "${tab_syn}.seq_region_start <= "
            . $coords[$i]->end() . " AND "
            . "${tab_syn}.seq_region_end >= "
            . $coords[$i]->start();

          if ($max_len) {
            my $min_start = $coords[$i]->start() - $max_len;
            $constraint .=
              " AND ${tab_syn}.seq_region_start >= $min_start";
          }
          my $fs = $self->generic_fetch( $constraint, $mapper, $slice );

          $fs = $self->_remap( $fs, $mapper, $slice );

          push @features, @$fs;
        }
      } ## end else [ if ( @coords > $MAX_SPLIT_QUERY_SEQ_REGIONS)]
    } ## end else [ if ( $feat_cs->equals(...))]
  } ## end foreach my $feat_cs (@feat_css)

  return \@features;
} ## end sub _slice_fetch


#for a given seq_region_id, gets the one used in an external database, if present, otherwise, returns the internal one
sub get_seq_region_id_external {
  my ( $self, $sr_id ) = @_;
  my $cs_a = $self->db()->get_CoordSystemAdaptor();
  return ( exists( $cs_a->{'_internal_seq_region_mapping'}->{$sr_id} )
           ? $cs_a->{'_internal_seq_region_mapping'}->{$sr_id}
           : $sr_id );
}

#for a given seq_region_id and coord_system, gets the one used in the internal (core) database
sub get_seq_region_id_internal{
  my ( $self, $sr_id ) = @_;
  my $cs_a = $self->db()->get_CoordSystemAdaptor();
  return (  exists $cs_a->{'_external_seq_region_mapping'}->{$sr_id} 
            ? $cs_a->{'_external_seq_region_mapping'}->{$sr_id} 
            : $sr_id);
}

#
# Helper function containing some common feature storing functionality
#
# Given a Feature this will return a copy (or the same feature if no changes 
# to the feature are needed) of the feature which is relative to the start
# of the seq_region it is on. The seq_region_id of the seq_region it is on
# is also returned.
#
# This method will also ensure that the database knows which coordinate
# systems that this feature is stored in.
#

sub _pre_store {
  my $self    = shift;
  my $feature = shift;

  if(!ref($feature) || !$feature->isa('Bio::EnsEMBL::Feature')) {
    throw('Expected Feature argument.');
  }
  my $slice = $feature->slice();

  $self->_check_start_end_strand($feature->start(),$feature->end(),
                                 $feature->strand(), $slice);


  my $db = $self->db();

  my $slice_adaptor = $db->get_SliceAdaptor();

  if(!ref($slice) || !($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice'))  ) {
    throw('Feature must be attached to Slice to be stored.');
  }

  # make sure feature coords are relative to start of entire seq_region

  if($slice->start != 1 || $slice->strand != 1) {
    #move feature onto a slice of the entire seq_region
    $slice = $slice_adaptor->fetch_by_region($slice->coord_system->name(),
                                             $slice->seq_region_name(),
                                             undef, #start
                                             undef, #end
                                             undef, #strand
                                             $slice->coord_system->version());

    $feature = $feature->transfer($slice);

    if(!$feature) {
      throw('Could not transfer Feature to slice of ' .
            'entire seq_region prior to storing');
    }
  }

  # Ensure this type of feature is known to be stored in this coord system.
  my $cs = $slice->coord_system;

  my ($tab) = $self->_tables();
  my $tabname = $tab->[0];

  my $mcc = $db->get_MetaCoordContainer();

  $mcc->add_feature_type($cs, $tabname, $feature->length);

  my $seq_region_id = $slice_adaptor->get_seq_region_id($slice);

  if(!$seq_region_id) {
    throw('Feature is associated with seq_region which is not in this DB.');
  }

  return ($feature, $seq_region_id);
}


# The same function as _pre_store
# This one is used to store user uploaded features in XXX_userdata db

sub _pre_store_userdata {
  my $self    = shift;
  my $feature = shift;

  if(!ref($feature) || !$feature->isa('Bio::EnsEMBL::Feature')) {
    throw('Expected Feature argument.');
  }

  my $slice = $feature->slice();
  my $slice_adaptor = $slice->adaptor;
  
  $self->_check_start_end_strand($feature->start(),$feature->end(),
                                 $feature->strand(), $slice);


  if(!ref($slice) || !($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
    throw('Feature must be attached to Slice to be stored.');
  }

  # make sure feature coords are relative to start of entire seq_region

  if($slice->start != 1 || $slice->strand != 1) {
    #move feature onto a slice of the entire seq_region
    $slice = $slice_adaptor->fetch_by_region($slice->coord_system->name(),
                                             $slice->seq_region_name(),
                                             undef, #start
                                             undef, #end
                                             undef, #strand
                                             $slice->coord_system->version());

    $feature = $feature->transfer($slice);

    if(!$feature) {
      throw('Could not transfer Feature to slice of ' .
            'entire seq_region prior to storing');
    }
  }

  # Ensure this type of feature is known to be stored in this coord system.
  my $cs = $slice->coord_system;

  my ($tab) = $self->_tables();
  my $tabname = $tab->[0];

  my $db = $self->db;
  my $mcc = $db->get_MetaCoordContainer();

  $mcc->add_feature_type($cs, $tabname, $feature->length);

  my $seq_region_id = $slice_adaptor->get_seq_region_id($slice);

  if(!$seq_region_id) {
    throw('Feature is associated with seq_region which is not in this DB.');
  }

  return ($feature, $seq_region_id);
}


#
# helper function used to validate start/end/strand and 
# hstart/hend/hstrand etc.
#
sub _check_start_end_strand {
  my $self = shift;
  my $start = shift;
  my $end   = shift;
  my $strand = shift;
  my $slice = shift;

  #
  # Make sure that the start, end, strand are valid
  #
  if(int($start) != $start) {
    throw("Invalid Feature start [$start].  Must be integer.");
  }
  if(int($end) != $end) {
    throw("Invalid Feature end [$end]. Must be integer.");
  }
  if(int($strand) != $strand || $strand < -1 || $strand > 1) {
    throw("Invalid Feature strand [$strand]. Must be -1, 0 or 1.");
  }
  if($end < $start && !$slice->is_circular()) {
    throw("Invalid Feature start/end [$start/$end]. Start must be less " .
          "than or equal to end.");
  }

  return 1;
}


#
# Given a list of features checks if they are in the correct coord system
# by looking at the first features slice.  If they are not then they are
# converted and placed on the slice.
#
sub _remap {
  my ( $self, $features, $mapper, $slice ) = @_;

  #check if any remapping is actually needed
  if(@$features && (!$features->[0]->isa('Bio::EnsEMBL::Feature') ||
                    $features->[0]->slice == $slice)) {
    return $features;
  }

  #remapping has not been done, we have to do our own conversion from
  #to slice coords

  my @out;

  my $slice_start = $slice->start();
  my $slice_end   = $slice->end();
  my $slice_strand = $slice->strand();
  my $slice_cs    = $slice->coord_system();

  my ($seq_region, $start, $end, $strand);

  my $slice_seq_region_id = $slice->get_seq_region_id();
  my $slice_seq_region = $slice->seq_region_name();

  foreach my $f (@$features) {
    #since feats were obtained in contig coords, attached seq is a contig
    my $fslice = $f->slice();
    if(!$fslice) {
      throw("Feature does not have attached slice.\n");
    }
    my $fseq_region = $fslice->seq_region_name();
    my $fseq_region_id = $fslice->get_seq_region_id();
    my $fcs = $fslice->coord_system();

    if(!$slice_cs->equals($fcs)) {
      #slice of feature in different coord system, mapping required

      ($seq_region, $start, $end, $strand) =
        $mapper->fastmap($fseq_region_id,$f->start(),$f->end(),$f->strand(),$fcs);

      # undefined start means gap
      next if(!defined $start);
    } else {
      $start      = $f->start();
      $end        = $f->end();
      $strand     = $f->strand();
      $seq_region = $f->slice->seq_region_name();
    }
    
    # maps to region outside desired area
    next if ($start > $slice_end) || ($end < $slice_start) || 
      ($slice_seq_region ne $seq_region);

    #shift the feature start, end and strand in one call
    if($slice_strand == -1) {
      $f->move( $slice_end - $end + 1, $slice_end - $start + 1, $strand * -1 );
    } else {
      $f->move( $start - $slice_start + 1, $end - $slice_start + 1, $strand );
    }

    $f->slice($slice);

    push @out,$f;
  }

  return \@out;
}


#
# Given a logic name and an existing constraint this will
# add an analysis table constraint to the feature.  Note that if no
# analysis_id exists in the columns of the primary table then no
# constraint is added at all
#
sub _logic_name_to_constraint {
  my $self = shift;
  my $constraint = shift;
  my $logic_name = shift;

  return $constraint if(!$logic_name);

  #make sure that an analysis_id exists in the primary table
  my ($prim_tab) = $self->_tables();
  my $prim_synonym = $prim_tab->[1];

  my $found_analysis=0;
  foreach my $col ($self->_columns) {
    my ($syn,$col_name) = split(/\./,$col);
    next if($syn ne $prim_synonym);
    if($col_name eq 'analysis_id') {
      $found_analysis = 1;
      last;
    }
  }

  if(!$found_analysis) {
    warning("This feature is not associated with an analysis.\n" .
            "Ignoring logic_name argument = [$logic_name].\n");
    return $constraint;
  }

  my $aa = $self->db->get_AnalysisAdaptor();
  my $an = $aa->fetch_by_logic_name($logic_name);

  if ( !defined($an) ) {
    return undef;
  }

  my $an_id = $an->dbID();

  $constraint .= ' AND' if($constraint);
  $constraint .= " ${prim_synonym}.analysis_id = $an_id";
  return $constraint;
}


=head2 store

  Arg [1]    : list of Bio::EnsEMBL::SeqFeature
  Example    : $adaptor->store(@feats);
  Description: ABSTRACT  Subclasses are responsible for implementing this 
               method.  It should take a list of features and store them in 
               the database.
  Returntype : none
  Exceptions : thrown method is not implemented by subclass
  Caller     : general
  Status     : At Risk
             : throws if called.

=cut

sub store{
  my $self = @_;

  throw("Abstract method store not defined by implementing subclass\n");
}


=head2 remove

  Arg [1]    : A feature $feature 
  Example    : $feature_adaptor->remove($feature);
  Description: This removes a feature from the database.  The table the
               feature is removed from is defined by the abstract method
               _tablename, and the primary key of the table is assumed
               to be _tablename() . '_id'.  The feature argument must 
               be an object implementing the dbID method, and for the
               feature to be removed from the database a dbID value must
               be returned.
  Returntype : none
  Exceptions : thrown if $feature arg does not implement dbID(), or if
               $feature->dbID is not a true value
  Caller     : general
  Status     : Stable

=cut


sub remove {
  my ($self, $feature) = @_;

  if(!$feature || !ref($feature) || !$feature->isa('Bio::EnsEMBL::Feature')) {
    throw('Feature argument is required');
  }

  if(!$feature->is_stored($self->db)) {
    throw("This feature is not stored in this database");
  }

  my @tabs = $self->_tables;
  my ($table) = @{$tabs[0]};

  my $sth = $self->prepare("DELETE FROM $table WHERE ${table}_id = ?");
  $sth->bind_param(1,$feature->dbID,SQL_INTEGER);
  $sth->execute();

  #unset the feature dbID ad adaptor
  $feature->dbID(undef);
  $feature->adaptor(undef);

  return;
}


=head2 remove_by_Slice

  Arg [1]    : Bio::Ensembl::Slice $slice
  Example    : $feature_adaptor->remove_by_Slice($slice);
  Description: This removes features from the database which lie on a region
               represented by the passed in slice.  Only features which are
               fully contained by the slice are deleted; features which overlap
               the edge of the slice are not removed.
               The table the features are removed from is defined by
               the abstract method_tablename.
  Returntype : none
  Exceptions : thrown if no slice is supplied
  Caller     : general
  Status     : Stable

=cut

sub remove_by_Slice {
  my ($self, $slice) = @_;

  if(!$slice || !ref($slice) || !($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
    throw("Slice argument is required");
  }

  my @tabs = $self->_tables;
  my ($table_name) = @{$tabs[0]};

  my $seq_region_id = $self->db->get_SliceAdaptor->get_seq_region_id($slice);
  my $start = $slice->start();
  my $end   = $slice->end();

  #
  # Delete only features fully on the slice, not overlapping ones
  #
  my $sth = $self->prepare("DELETE FROM $table_name " .
                           "WHERE seq_region_id = ? " .
                           "AND   seq_region_start >= ? " .
                           "AND   seq_region_end <= ?");

  $sth->bind_param(1,$seq_region_id,SQL_INTEGER);
  $sth->bind_param(2,$start,SQL_INTEGER);
  $sth->bind_param(3,$end,SQL_INTEGER);
  $sth->execute();
  $sth->finish();
}


#
# Internal function. Allows the max feature length which is normally
# retrieved from the meta_coord table to be overridden.  This allows
# for some significant optimizations to be put in when it is known
# that requested features will not be over a certain size.
#
sub _max_feature_length {
  my $self = shift;
  return $self->{'_max_feature_length'} = shift if(@_);
  return $self->{'_max_feature_length'};
}


#
# Lists all seq_region_ids that a particular feature type is found on.
# Useful e.g. for finding out which seq_regions have genes.
# Returns a listref of seq_region_ids.
#
sub _list_seq_region_ids {
  my ($self, $table) = @_;
  
  my @out;
  
  my $sql = qq(
  SELECT DISTINCT
            sr.seq_region_id
  FROM      seq_region sr,
            $table a,
            coord_system cs
  WHERE     sr.seq_region_id = a.seq_region_id
    AND     sr.coord_system_id = cs.coord_system_id
    AND     cs.species_id = ?);

  my $sth = $self->prepare($sql);

  $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );

  $sth->execute();

  while (my ($id) = $sth->fetchrow) {
    push(@out, $id);
  }

  $sth->finish;

  return \@out;
}


=head1 DEPRECATED METHODS

=cut


=head2 fetch_all_by_RawContig_constraint

  Description: DEPRECATED use fetch_all_by_RawContig_constraint instead

=cut

sub fetch_all_by_RawContig_constraint {
  my $self = shift;
  deprecate('Use fetch_all_by_Slice_constraint() instead.');
  return $self->fetch_all_by_slice_constraint(@_);
}

=head2 fetch_all_by_RawContig

  Description: DEPRECATED use fetch_all_by_Slice instead

=cut

sub fetch_all_by_RawContig {
  my $self = shift;
  deprecate('Use fetch_all_by_Slice() instead.');
  return $self->fetch_all_by_Slice(@_);
}

=head2 fetch_all_by_RawContig_and_score

  Description: DEPRECATED use fetch_all_by_Slice_and_score instead

=cut

sub fetch_all_by_RawContig_and_score{
  my $self = shift;
  deprecate('Use fetch_all_by_Slice_and_score() instead.');
  return $self->fetch_all_by_Slice_and_score(@_);
}

=head2 remove_by_RawContig

  Description: DEPRECATED use remove_by_Slice instead

=cut

sub remove_by_RawContig {
  my $self = shift;
  deprecate("Use remove_by_Slice instead");
  return $self->remove_by_Slice(@_);
}


sub remove_by_analysis_id {
  my ($self, $analysis_id) = @_;

  $analysis_id or throw("Must call with analysis id");

  my @tabs = $self->_tables;
  my ($tablename) = @{$tabs[0]};

  my $sql = "DELETE FROM $tablename WHERE analysis_id = $analysis_id";
#  warn "SQL : $sql";
      
  my $sth = $self->prepare($sql);
  $sth->execute();
  $sth->finish();
}

sub remove_by_feature_id {
  my ($self, $features_list) = @_;

  my @feats = @$features_list or throw("Must call store with features");

  my @tabs = $self->_tables;
  my ($tablename) = @{$tabs[0]};

  my $sql = sprintf "DELETE FROM $tablename WHERE ${tablename}_id IN (%s)", join ', ', @feats;
#  warn "SQL : $sql";
      
  my $sth = $self->prepare($sql);
  $sth->execute();
  $sth->finish();
}


1;