diff variant_effect_predictor/Bio/EnsEMBL/DBSQL/AssemblyMapperAdaptor.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/EnsEMBL/DBSQL/AssemblyMapperAdaptor.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,1841 @@
+=head1 LICENSE
+
+  Copyright (c) 1999-2012 The European Bioinformatics Institute and
+  Genome Research Limited.  All rights reserved.
+
+  This software is distributed under a modified Apache license.
+  For license details, please see
+
+    http://www.ensembl.org/info/about/code_licence.html
+
+=head1 CONTACT
+
+  Please email comments or questions to the public Ensembl
+  developers list at <dev@ensembl.org>.
+
+  Questions may also be sent to the Ensembl help desk at
+  <helpdesk@ensembl.org>.
+
+=cut
+
+=head1 NAME
+
+Bio::EnsEMBL::DBSQL::AssemblyMapperAdaptor
+
+=head1 SYNOPSIS
+
+  use Bio::EnsEMBL::Registry;
+
+  Bio::EnsEMBL::Registry->load_registry_from_db(
+    -host => 'ensembldb.ensembl.org',
+    -user => 'anonymous'
+  );
+
+  $asma = Bio::EnsEMBL::Registry->get_adaptor( "human", "core",
+    "assemblymapper" );
+
+  $csa = Bio::EnsEMBL::Registry->get_adaptor( "human", "core",
+    "coordsystem" );
+
+  my $chr33_cs = $csa->fetch_by_name( 'chromosome', 'NCBI33' );
+  my $chr34_cs = $csa->fetch_by_name( 'chromosome', 'NCBI34' );
+  my $ctg_cs   = $csa->fetch_by_name('contig');
+  my $clone_cs = $csa->fetch_by_name('clone');
+
+  my $chr_ctg_mapper =
+    $asma->fetch_by_CoordSystems( $chr33_cs, $ctg_cs );
+
+  my $ncbi33_ncbi34_mapper =
+    $asm_adptr->fetch_by_CoordSystems( $chr33, $chr34 );
+
+  my $ctg_clone_mapper =
+    $asm_adptr->fetch_by_CoordSystems( $ctg_cs, $clone_cs );
+
+
+=head1 DESCRIPTION
+
+Adaptor for handling Assembly mappers.  This is a I<Singleton> class.
+ie: There is only one per database (C<DBAdaptor>).
+
+This is used to retrieve mappers between any two coordinate systems
+whose makeup is described by the assembly table.  Currently one step
+(explicit) and two step (implicit) pairwise mapping is supported.  In
+one-step mapping an explicit relationship between the coordinate systems
+is defined in the assembly table.  In two-step 'chained' mapping no
+explicit mapping is present but the coordinate systems must share a
+common mapping to an intermediate coordinate system.
+
+=head1 METHODS
+
+=cut
+
+package Bio::EnsEMBL::DBSQL::AssemblyMapperAdaptor;
+use vars qw(@ISA);
+use strict;
+
+use Bio::EnsEMBL::DBSQL::BaseAdaptor;
+use Bio::EnsEMBL::AssemblyMapper;
+use Bio::EnsEMBL::ChainedAssemblyMapper;
+use Bio::EnsEMBL::TopLevelAssemblyMapper;
+
+use Bio::EnsEMBL::Utils::Cache; #CPAN LRU cache
+use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning stack_trace_dump);
+#use Bio::EnsEMBL::Utils::Exception qw(deprecate throw);
+use Bio::EnsEMBL::Utils::SeqRegionCache;
+
+use integer; #do proper arithmetic bitshifts
+
+@ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor);
+
+
+my $CHUNKFACTOR = 20;  # 2^20 = approx. 10^6
+
+=head2 new
+
+  Arg [1]    : Bio::EnsEMBL::DBAdaptor $dbadaptor the adaptor for
+               the database this assembly mapper is using.
+  Example    : my $asma = new Bio::EnsEMBL::AssemblyMapperAdaptor($dbadaptor);
+  Description: Creates a new AssemblyMapperAdaptor object
+  Returntype : Bio::EnsEMBL::DBSQL::AssemblyMapperAdaptor
+  Exceptions : none
+  Caller     : Bio::EnsEMBL::DBSQL::DBAdaptor
+  Status     : Stable
+
+=cut
+
+sub new {
+  my($class, $dbadaptor) = @_;
+
+  my $self = $class->SUPER::new($dbadaptor);
+
+  $self->{'_asm_mapper_cache'} = {};
+
+  # use a shared cache (for this database) that contains info about
+  # seq regions
+  my $seq_region_cache = $self->db->get_SeqRegionCache();
+  $self->{'sr_name_cache'} = $seq_region_cache->{'name_cache'};
+  $self->{'sr_id_cache'}   = $seq_region_cache->{'id_cache'};
+
+  return $self;
+}
+
+
+
+=head2  cache_seq_ids_with_mult_assemblys
+
+  Example    : $self->adaptor->cache_seq_ids_with_mult_assemblys();
+  Description: Creates a hash of the component seq region ids that
+               map to more than one assembly from the assembly table.
+  Retruntype : none
+  Exceptions : none
+  Caller     : AssemblyMapper, ChainedAssemblyMapper
+  Status     : At Risk
+
+=cut
+
+sub cache_seq_ids_with_mult_assemblys{
+  my $self = shift;
+  my %multis;
+
+  return if (defined($self->{'multi_seq_ids'}));
+
+  $self->{'multi_seq_ids'} = {};
+
+  my $sql = qq(
+  SELECT    sra.seq_region_id
+  FROM      seq_region_attrib sra,
+            attrib_type at,
+            seq_region sr,
+            coord_system cs
+  WHERE     sra.attrib_type_id = at.attrib_type_id
+    AND     code = "MultAssem"
+    AND     sra.seq_region_id = sr.seq_region_id
+    AND     sr.coord_system_id = cs.coord_system_id
+    AND     cs.species_id = ?);
+
+  my $sth = $self->prepare($sql);
+
+  $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
+
+  $sth->execute();
+
+  my ($seq_region_id);
+
+  $sth->bind_columns(\$seq_region_id);
+
+  while($sth->fetch()) {
+    $self->{'multi_seq_ids'}->{$seq_region_id} = 1;
+  }
+  $sth->finish;
+}
+
+
+=head2 fetch_by_CoordSystems
+
+  Arg [1]    : Bio::EnsEMBL::CoordSystem $cs1
+               One of the coordinate systems to retrieve the mapper
+               between
+  Arg [2]    : Bio::EnsEMBL::CoordSystem $cs2
+               The other coordinate system to map between
+  Description: Retrieves an Assembly mapper for two coordinate
+               systems whose relationship is described in the
+               assembly table.
+
+               The ordering of the coodinate systems is arbitrary.
+               The following two statements are equivalent:
+               $mapper = $asma->fetch_by_CoordSystems($cs1,$cs2);
+               $mapper = $asma->fetch_by_CoordSystems($cs2,$cs1);
+  Returntype : Bio::EnsEMBL::AssemblyMapper
+  Exceptions : wrong argument types
+  Caller     : general
+  Status     : Stable
+
+=cut
+
+sub fetch_by_CoordSystems {
+  my $self = shift;
+  my $cs1  = shift;
+  my $cs2  = shift;
+
+  if(!ref($cs1) || !$cs1->isa('Bio::EnsEMBL::CoordSystem')) {
+    throw("cs1 argument must be a Bio::EnsEMBL::CoordSystem.");
+  }
+  if(!ref($cs2) || !$cs2->isa('Bio::EnsEMBL::CoordSystem')) {
+    throw("cs2 argument must be a Bio::EnsEMBL::CoordSystem.");
+  }
+
+#  if($cs1->equals($cs2)) {
+#    throw("Cannot create mapper between same coord systems: " .
+#          $cs1->name . " " . $cs1->version);
+#  }
+
+  if($cs1->is_top_level()) {
+    return Bio::EnsEMBL::TopLevelAssemblyMapper->new($self, $cs1, $cs2);
+  }
+  if($cs2->is_top_level()) {
+    return Bio::EnsEMBL::TopLevelAssemblyMapper->new($self, $cs2, $cs1);
+  }
+
+  my $csa = $self->db->get_CoordSystemAdaptor();
+
+  #retrieve the shortest possible mapping path between these systems
+  my @mapping_path = @{$csa->get_mapping_path($cs1,$cs2)};
+
+  if(!@mapping_path) {
+
+    # It is perfectly fine not to have a mapping. No warning needed really
+    # Just check the return code!!
+
+#    warning(
+#      "There is no mapping defined between these coord systems:\n" .
+#      $cs1->name() . " " . $cs1->version() . " and " . $cs2->name() . " " .
+#      $cs2->version()
+#    );
+    return undef;
+  }
+
+  my $key = join(':', map({defined($_)?$_->dbID():"-"} @mapping_path));
+
+  my $asm_mapper = $self->{'_asm_mapper_cache'}->{$key};
+
+  return $asm_mapper if($asm_mapper);
+
+  if(@mapping_path == 1) {
+    throw("Incorrect mapping path defined in meta table. " .
+	  "0 step mapping encountered between:\n" .
+	  $cs1->name() . " " . $cs1->version() . " and " . $cs2->name . " " .
+	  $cs2->version());
+  }
+
+  if(@mapping_path == 2) {
+    #1 step regular mapping
+    $asm_mapper = Bio::EnsEMBL::AssemblyMapper->new($self, @mapping_path);
+
+#   If you want multiple pieces on two seqRegions to map to each other
+#   you need to make an assembly.mapping entry that is seperated with a #
+#   instead of an |.
+
+    $self->{'_asm_mapper_cache'}->{$key} = $asm_mapper;
+    return $asm_mapper;
+  }
+
+  if(@mapping_path == 3) {
+   #two step chained mapping
+    $asm_mapper = Bio::EnsEMBL::ChainedAssemblyMapper->new($self,@mapping_path);
+    #in multi-step mapping it is possible get requests with the
+    #coordinate system ordering reversed since both mappings directions
+    #cache on both orderings just in case
+    #e.g.   chr <-> contig <-> clone   and   clone <-> contig <-> chr
+
+    $self->{'_asm_mapper_cache'}->{$key} = $asm_mapper;
+    $key = join(':', map({defined($_)?$_->dbID():"-"} reverse(@mapping_path)));
+    $self->{'_asm_mapper_cache'}->{$key} = $asm_mapper;
+    return $asm_mapper;
+  }
+
+  throw("Only 1 and 2 step coordinate system mapping is currently\n" .
+	"supported.  Mapping between " .
+          $cs1->name() . " " . $cs1->version() . " and " .
+          $cs2->name() . " " . $cs2->version() .
+          " requires ". (scalar(@mapping_path)-1) . " steps.");
+}
+
+
+
+=head2 register_assembled
+
+  Arg [1]    : Bio::EnsEMBL::AssemblyMapper $asm_mapper
+               A valid AssemblyMapper object
+  Arg [2]    : integer $asm_seq_region
+               The dbID of the seq_region to be registered
+  Arg [3]    : int $asm_start
+               The start of the region to be registered
+  Arg [4]    : int $asm_end
+               The end of the region to be registered
+  Description: Declares an assembled region to the AssemblyMapper.
+               This extracts the relevant data from the assembly
+               table and stores it in Mapper internal to the $asm_mapper.
+               It therefore must be called before any mapping is
+               attempted on that region. Otherwise only gaps will
+               be returned.  Note that the AssemblyMapper automatically
+               calls this method when the need arises.
+  Returntype : none
+  Exceptions : throw if the seq_region to be registered does not exist
+               or if it associated with multiple assembled pieces (bad data
+               in assembly table)
+  Caller     : Bio::EnsEMBL::AssemblyMapper
+  Status     : Stable
+
+=cut
+
+
+sub register_assembled {
+  my $self = shift;
+  my $asm_mapper = shift;
+  my $asm_seq_region = shift;
+  my $asm_start      = shift;
+  my $asm_end        = shift;
+
+  if(!ref($asm_mapper) || !$asm_mapper->isa('Bio::EnsEMBL::AssemblyMapper')) {
+    throw("Bio::EnsEMBL::AssemblyMapper argument expected");
+  }
+
+  throw("asm_seq_region argument expected") if(!defined($asm_seq_region));
+  throw("asm_start argument expected") if(!defined($asm_start));
+  throw("asm_end argument expected") if(!defined($asm_end));
+
+  my $asm_cs_id = $asm_mapper->assembled_CoordSystem->dbID();
+  my $cmp_cs_id = $asm_mapper->component_CoordSystem->dbID();
+
+  #split up the region to be registered into fixed chunks
+  #this allows us to keep track of regions that have already been
+  #registered and also works under the assumption that if a small region
+  #is requested it is likely that other requests will be made in the
+  #vicinity (the minimum size registered the chunksize (2^chunkfactor)
+
+  my @chunk_regions;
+  #determine span of chunks
+  #bitwise shift right is fast and easy integer division
+
+  my($start_chunk, $end_chunk);
+
+  $start_chunk = $asm_start >> $CHUNKFACTOR;
+  $end_chunk   = $asm_end   >> $CHUNKFACTOR;
+
+  # inserts have start = end + 1, on boundary condition start_chunk
+  # could be less than end chunk
+  if($asm_start == $asm_end + 1) {
+    ($start_chunk, $end_chunk) = ($end_chunk, $start_chunk);
+  }
+
+  #find regions of continuous unregistered chunks
+  my $i;
+  my ($begin_chunk_region,$end_chunk_region);
+  for ($i = $start_chunk; $i <= $end_chunk; $i++) {
+    if($asm_mapper->have_registered_assembled($asm_seq_region, $i)) {
+      if(defined($begin_chunk_region)) {
+        #this is the end of an unregistered region.
+        my $region = [($begin_chunk_region   << $CHUNKFACTOR),
+                      (($end_chunk_region+1)     << $CHUNKFACTOR)-1];
+        push @chunk_regions, $region;
+        $begin_chunk_region = $end_chunk_region = undef;
+      }
+    } else {
+      $begin_chunk_region = $i if(!defined($begin_chunk_region));
+      $end_chunk_region   = $i+1;
+      $asm_mapper->register_assembled($asm_seq_region,$i);
+    }
+  }
+
+  #the last part may have been an unregistered region too
+  if(defined($begin_chunk_region)) {
+    my $region = [($begin_chunk_region << $CHUNKFACTOR),
+                  (($end_chunk_region+1)   << $CHUNKFACTOR) -1];
+    push @chunk_regions, $region;
+  }
+
+  return if(!@chunk_regions);
+
+  # keep the Mapper to a reasonable size
+  if( $asm_mapper->size() > $asm_mapper->max_pair_count() ) {
+    $asm_mapper->flush();
+    #we now have to go and register the entire requested region since we
+    #just flushed everything
+
+    @chunk_regions = ( [ ( $start_chunk << $CHUNKFACTOR)
+                         , (($end_chunk+1) << $CHUNKFACTOR)-1 ] );
+
+    for( my $i = $start_chunk; $i <= $end_chunk; $i++ ) {
+      $asm_mapper->register_assembled( $asm_seq_region, $i );
+    }
+  }
+
+#  my $asm_seq_region_id =
+#    $self->_seq_region_name_to_id($asm_seq_region,$asm_cs_id);
+
+  # Retrieve the description of how the assembled region is made from
+  # component regions for each of the continuous blocks of unregistered,
+  # chunked regions
+
+  my $q = qq{
+      SELECT
+         asm.cmp_start,
+         asm.cmp_end,
+         asm.cmp_seq_region_id,
+         sr.name,
+         sr.length,
+         asm.ori,
+         asm.asm_start,
+         asm.asm_end
+      FROM
+         assembly asm, seq_region sr
+      WHERE
+         asm.asm_seq_region_id = ? AND
+         ? <= asm.asm_end AND
+         ? >= asm.asm_start AND
+         asm.cmp_seq_region_id = sr.seq_region_id AND
+	 sr.coord_system_id = ?
+   };
+
+  my $sth = $self->prepare($q);
+
+  foreach my $region (@chunk_regions) {
+    my($region_start, $region_end) = @$region;
+    $sth->bind_param(1,$asm_seq_region,SQL_INTEGER);
+    $sth->bind_param(2,$region_start,SQL_INTEGER);
+    $sth->bind_param(3,$region_end,SQL_INTEGER);
+    $sth->bind_param(4,$cmp_cs_id,SQL_INTEGER);
+
+    $sth->execute();
+
+    my($cmp_start, $cmp_end, $cmp_seq_region_id, $cmp_seq_region, $ori,
+      $asm_start, $asm_end, $cmp_seq_region_length);
+
+    $sth->bind_columns(\$cmp_start, \$cmp_end, \$cmp_seq_region_id,
+                       \$cmp_seq_region, \$cmp_seq_region_length, \$ori,
+                       \$asm_start, \$asm_end);
+
+    #
+    # Load the unregistered regions of the mapper
+    #
+    while($sth->fetch()) {
+      next if($asm_mapper->have_registered_component($cmp_seq_region_id)
+               and !defined($self->{'multi_seq_ids'}->{$cmp_seq_region_id}));
+      $asm_mapper->register_component($cmp_seq_region_id);
+      $asm_mapper->mapper->add_map_coordinates(
+                 $asm_seq_region, $asm_start, $asm_end,
+                 $ori,
+                 $cmp_seq_region_id, $cmp_start, $cmp_end);
+
+      my $arr = [ $cmp_seq_region_id, $cmp_seq_region,
+                  $cmp_cs_id, $cmp_seq_region_length ];
+
+      $self->{'sr_name_cache'}->{"$cmp_seq_region:$cmp_cs_id"} = $arr;
+      $self->{'sr_id_cache'}->{"$cmp_seq_region_id"} = $arr;
+    }
+  }
+
+  $sth->finish();
+}
+
+
+
+sub _seq_region_name_to_id {
+  my $self    = shift;
+  my $sr_name = shift;
+  my $cs_id   = shift;
+
+  ($sr_name && $cs_id) || throw('seq_region_name and coord_system_id args ' .
+				'are required');
+
+  my $arr = $self->{'sr_name_cache'}->{"$sr_name:$cs_id"};
+  if( $arr ) {
+    return $arr->[0];
+  }
+
+  # Get the seq_region_id via the name.  This would be quicker if we just
+  # used internal ids instead but stored but then we lose the ability
+  # the transform accross databases with different internal ids
+
+  my $sth = $self->prepare("SELECT seq_region_id, length " .
+			   "FROM   seq_region " .
+			   "WHERE  name = ? AND coord_system_id = ?");
+
+  $sth->bind_param(1,$sr_name,SQL_VARCHAR);
+  $sth->bind_param(2,$cs_id,SQL_INTEGER);
+  $sth->execute();
+
+  if(!$sth->rows() == 1) {
+    throw("Ambiguous or non-existant seq_region [$sr_name] " .
+	  "in coord system $cs_id");
+  }
+
+  my ($sr_id, $sr_length) = $sth->fetchrow_array();
+  $sth->finish();
+
+  $arr = [ $sr_id, $sr_name, $cs_id, $sr_length ];
+
+  $self->{'sr_name_cache'}->{"$sr_name:$cs_id"} = $arr;
+  $self->{'sr_id_cache'}->{"$sr_id"} = $arr;
+
+  return $sr_id;
+}
+
+sub _seq_region_id_to_name {
+  my $self    = shift;
+  my $sr_id = shift;
+
+  ($sr_id) || throw('seq_region_is required');
+
+  my $arr = $self->{'sr_id_cache'}->{"$sr_id"};
+  if( $arr ) {
+    return $arr->[1];
+  }
+
+  # Get the seq_region name via the id.  This would be quicker if we just
+  # used internal ids instead but stored but then we lose the ability
+  # the transform accross databases with different internal ids
+
+  my $sth = $self->prepare("SELECT name, length ,coord_system_id " .
+			   "FROM   seq_region " .
+			   "WHERE  seq_region_id = ? ");
+
+  $sth->bind_param(1,$sr_id,SQL_INTEGER);
+  $sth->execute();
+
+  if(!$sth->rows() == 1) {
+    throw("non-existant seq_region [$sr_id]");
+  }
+
+  my ($sr_name, $sr_length, $cs_id) = $sth->fetchrow_array();
+  $sth->finish();
+
+  $arr = [ $sr_id, $sr_name, $cs_id, $sr_length ];
+
+  $self->{'sr_name_cache'}->{"$sr_name:$cs_id"} = $arr;
+  $self->{'sr_id_cache'}->{"$sr_id"} = $arr;
+
+  return $sr_name;
+}
+
+
+=head2 register_component
+
+  Arg [1]    : Bio::EnsEMBL::AssemblyMapper $asm_mapper
+               A valid AssemblyMapper object
+  Arg [2]    : integer $cmp_seq_region
+               The dbID of the seq_region to be registered
+  Description: Declares a component region to the AssemblyMapper.
+               This extracts the relevant data from the assembly
+               table and stores it in Mapper internal to the $asm_mapper.
+               It therefore must be called before any mapping is
+               attempted on that region. Otherwise only gaps will
+               be returned.  Note that the AssemblyMapper automatically
+               calls this method when the need arises.
+  Returntype : none
+  Exceptions : throw if the seq_region to be registered does not exist
+               or if it associated with multiple assembled pieces (bad data
+               in assembly table)
+  Caller     : Bio::EnsEMBL::AssemblyMapper
+  Status     : Stable
+
+=cut
+
+sub register_component {
+  my $self = shift;
+  my $asm_mapper = shift;
+  my $cmp_seq_region = shift;
+
+  if(!ref($asm_mapper) || !$asm_mapper->isa('Bio::EnsEMBL::AssemblyMapper')) {
+    throw("Bio::EnsEMBL::AssemblyMapper argument expected");
+  }
+
+  if(!defined($cmp_seq_region)) {
+    throw("cmp_seq_region argument expected");
+  }
+
+  my $cmp_cs_id = $asm_mapper->component_CoordSystem()->dbID();
+  my $asm_cs_id = $asm_mapper->assembled_CoordSystem()->dbID();
+
+  #do nothing if this region is already registered or special case
+  return if($asm_mapper->have_registered_component($cmp_seq_region)
+  and !defined($self->{'multi_seq_ids'}->{$cmp_seq_region}));
+
+#  my $cmp_seq_region_id =
+#    $self->_seq_region_name_to_id($cmp_seq_region, $cmp_cs_id);
+
+  # Determine what part of the assembled region this component region makes up
+
+  my $q = qq{
+      SELECT
+         asm.asm_start,
+         asm.asm_end,
+         asm.asm_seq_region_id,
+         sr.name,
+         sr.length
+      FROM
+         assembly asm, seq_region sr
+      WHERE
+         asm.cmp_seq_region_id = ? AND
+         asm.asm_seq_region_id = sr.seq_region_id AND
+         sr.coord_system_id = ?
+   };
+
+  my $sth = $self->prepare($q);
+  $sth->bind_param(1,$cmp_seq_region,SQL_INTEGER);
+  $sth->bind_param(2,$asm_cs_id,SQL_INTEGER);
+  $sth->execute();
+
+  if($sth->rows() == 0) {
+    #this component is not used in the assembled part i.e. gap
+    $asm_mapper->register_component($cmp_seq_region);
+    return;
+  }
+
+  #we do not currently support components mapping to multiple assembled
+  # make sure that you've got the correct mapping in the meta-table :
+  #   chromosome:EquCab2#contig ( use'#' for multiple mappings )
+  #   chromosome:EquCab2|contig ( use '|' delimiter for 1-1 mappings )
+  #
+  if($sth->rows() != 1) {
+    throw("Multiple assembled regions for single " .
+          "component region cmp_seq_region_id=[$cmp_seq_region]\n".
+          "Remember that multiple mappings use the \#-operaator".
+          " in the meta-table (i.e. chromosome:EquCab2\#contig\n");
+  }
+
+  my ($asm_start, $asm_end, $asm_seq_region_id,
+      $asm_seq_region, $asm_seq_region_length) = $sth->fetchrow_array();
+
+  my $arr = [ $asm_seq_region_id, $asm_seq_region,
+              $asm_cs_id, $asm_seq_region_length ];
+
+  $self->{'sr_name_cache'}->{"$asm_seq_region:$asm_cs_id"} = $arr;
+  $self->{'sr_id_cache'}->{"$asm_seq_region_id"} = $arr;
+
+  $sth->finish();
+
+  # Register the corresponding assembled region. This allows a us to
+  # register things in assembled chunks which allows us to:
+  # (1) Keep track of what assembled regions are registered
+  # (2) Use locality of reference (if they want something in same general
+  #     region it will already be registered).
+
+  $self->register_assembled($asm_mapper,$asm_seq_region_id,$asm_start,$asm_end);
+}
+
+
+
+=head2 register_chained
+
+  Arg [1]    : Bio::EnsEMBL::ChainedAssemblyMapper $casm_mapper
+               The chained assembly mapper to register regions on
+  Arg [2]    : string $from ('first' or 'last')
+               The direction we are registering from, and the name of the
+               internal mapper.
+  Arg [3]    : string $seq_region_name
+               The name of the seqregion we are registering on
+  Arg [4]    : listref $ranges
+               A list  of ranges to register (in [$start,$end] tuples).
+  Arg [5]    : (optional) $to_slice
+               Only register those on this Slice.
+  Description: Registers a set of ranges on a chained assembly mapper.
+               This function is at the heart of the chained mapping process.
+               It retrieves information from the assembly table and
+               dynamically constructs the mappings between two coordinate
+               systems which are 2 mapping steps apart. It does this by using
+               two internal mappers to load up a third mapper which is
+               actually used by the ChainedAssemblyMapper to perform the
+               mapping.
+
+               This method must be called before any mapping is
+               attempted on regions of interest, otherwise only gaps will
+               be returned.  Note that the ChainedAssemblyMapper automatically
+               calls this method when the need arises.
+  Returntype : none
+  Exceptions : throw if the seq_region to be registered does not exist
+               or if it associated with multiple assembled pieces (bad data
+               in assembly table)
+
+               throw if the mapping between the coordinate systems cannot
+               be performed in two steps, which means there is an internal
+               error in the data in the meta table or in the code that creates
+               the mapping paths.
+  Caller     : Bio::EnsEMBL::AssemblyMapper
+  Status     : Stable
+
+=cut
+
+sub register_chained {
+  my $self = shift;
+  my $casm_mapper = shift;
+  my $from = shift;
+  my $seq_region_id = shift;
+  my $ranges = shift;
+  my $to_slice = shift;
+
+  my $to_seq_region_id;
+  if(defined($to_slice)){
+   if($casm_mapper->first_CoordSystem()->equals($casm_mapper->last_CoordSystem())){
+      return $self->_register_chained_special($casm_mapper, $from, $seq_region_id, $ranges, $to_slice);
+    }
+    $to_seq_region_id = $to_slice->get_seq_region_id();
+    if(!defined($to_seq_region_id)){
+      die "Could not get seq_region_id for to_slice".$to_slice->seq_region_name."\n";
+    }
+  }
+
+  my ($start_name, $start_mid_mapper, $start_cs, $start_registry,
+      $end_name, $end_mid_mapper, $end_cs, $end_registry);
+
+  if($from eq 'first') {
+    $start_name       = 'first';
+    $start_mid_mapper = $casm_mapper->first_middle_mapper();
+    $start_cs         = $casm_mapper->first_CoordSystem();
+    $start_registry   = $casm_mapper->first_registry();
+    $end_mid_mapper   = $casm_mapper->last_middle_mapper();
+    $end_cs           = $casm_mapper->last_CoordSystem();
+    $end_registry     = $casm_mapper->last_registry();
+    $end_name         = 'last';
+  } elsif($from eq 'last') {
+    $start_name       = 'last';
+    $start_mid_mapper = $casm_mapper->last_middle_mapper();
+    $start_cs         = $casm_mapper->last_CoordSystem();
+    $start_registry   = $casm_mapper->last_registry();
+    $end_mid_mapper   = $casm_mapper->first_middle_mapper();
+    $end_cs           = $casm_mapper->first_CoordSystem();
+    $end_registry     = $casm_mapper->first_registry();
+    $end_name         = 'first';
+  } else {
+    throw("Invalid from argument: [$from], must be 'first' or 'last'");
+  }
+
+  my $combined_mapper = $casm_mapper->first_last_mapper();
+  my $mid_cs          = $casm_mapper->middle_CoordSystem();
+  my $mid_name        = 'middle';
+  my $csa             = $self->db->get_CoordSystemAdaptor();
+
+  # Check for the simple case where the ChainedMapper is short
+  if( ! defined $mid_cs ) {
+      $start_mid_mapper = $combined_mapper;
+  }
+
+
+  ##############
+  # obtain the first half of the mappings and load them into the start mapper
+  #
+
+  #ascertain which is component and which is actually assembled coord system
+  my @path;
+
+  # check for the simple case, where the ChainedMapper is short
+  if( defined $mid_cs ) {
+      @path = @{$csa->get_mapping_path($start_cs, $mid_cs)};
+  } else {
+      @path = @{$csa->get_mapping_path( $start_cs, $end_cs )};
+  }
+
+  if(@path != 2 && defined( $path[1] )) {
+    my $path = join(',', map({$_->name .' '. $_->version} @path));
+    my $len  = scalar(@path) - 1;
+    throw("Unexpected mapping path between start and intermediate " .
+	  "coord systems (". $start_cs->name . " " . $start_cs->version .
+	  " and " . $mid_cs->name . " " . $mid_cs->version . ")." .
+	  "\nExpected path length 1, got $len. " .
+	  "(path=$path)");
+  }
+
+  my $sth;
+  my ($asm_cs,$cmp_cs);
+  $asm_cs = $path[0];
+  $cmp_cs = $path[-1];
+
+  #the SQL varies depending on whether we are coming from assembled or
+  #component coordinate system
+
+my $asm2cmp = (<<ASMCMP);
+   SELECT
+         asm.cmp_start,
+         asm.cmp_end,
+         asm.cmp_seq_region_id,
+         sr.name,
+         sr.length,
+         asm.ori,
+         asm.asm_start,
+         asm.asm_end
+      FROM
+         assembly asm, seq_region sr
+      WHERE
+         asm.asm_seq_region_id = ? AND
+         ? <= asm.asm_end AND
+         ? >= asm.asm_start AND
+         asm.cmp_seq_region_id = sr.seq_region_id AND
+	 sr.coord_system_id = ?
+ASMCMP
+
+
+my $cmp2asm = (<<CMPASM);
+   SELECT
+         asm.asm_start,
+         asm.asm_end,
+         asm.asm_seq_region_id,
+         sr.name,
+         sr.length,
+         asm.ori,
+         asm.cmp_start,
+         asm.cmp_end
+      FROM
+         assembly asm, seq_region sr
+      WHERE
+         asm.cmp_seq_region_id = ? AND
+         ? <= asm.cmp_end AND
+         ? >= asm.cmp_start AND
+         asm.asm_seq_region_id = sr.seq_region_id AND
+	 sr.coord_system_id = ?
+CMPASM
+
+  my $asm2cmp_sth;
+  my $cmp2asm_sth;
+  if(defined($to_slice)){
+    my $to_cs = $to_slice->coord_system;
+    if($asm_cs->equals($to_cs)){
+      $asm2cmp_sth = $self->prepare($asm2cmp);
+      $cmp2asm_sth = $self->prepare($cmp2asm." AND asm.asm_seq_region_id = $to_seq_region_id");
+    }
+    elsif($cmp_cs->equals($to_cs)){
+      $asm2cmp_sth = $self->prepare($asm2cmp." AND asm.cmp_seq_region_id = $to_seq_region_id");
+      $cmp2asm_sth = $self->prepare($cmp2asm);
+    }
+    else{
+      $asm2cmp_sth = $self->prepare($asm2cmp);
+      $cmp2asm_sth = $self->prepare($cmp2asm);
+    }
+  }	
+  else{
+    $asm2cmp_sth = $self->prepare($asm2cmp);
+    $cmp2asm_sth = $self->prepare($cmp2asm);
+  }
+
+
+
+  $sth = ($asm_cs->equals($start_cs)) ? $asm2cmp_sth : $cmp2asm_sth;
+
+  my $mid_cs_id;
+
+  # check for the simple case where the ChainedMapper is short
+  if( defined $mid_cs ) {
+      $mid_cs_id = $mid_cs->dbID();
+  } else {
+      $mid_cs_id = $end_cs->dbID();
+  }
+
+  my @mid_ranges;
+  my @start_ranges;
+
+  #need to perform the query for each unregistered range
+  foreach my $range (@$ranges) {
+    my ($start, $end) = @$range;
+    $sth->bind_param(1,$seq_region_id,SQL_INTEGER);
+    $sth->bind_param(2,$start,SQL_INTEGER);
+    $sth->bind_param(3,$end,SQL_INTEGER);
+    $sth->bind_param(4,$mid_cs_id,SQL_INTEGER);
+    $sth->execute();
+
+    #load the start <-> mid mapper with the results and record the mid cs
+    #ranges we just added to the mapper
+
+    my ($mid_start, $mid_end, $mid_seq_region_id, $mid_seq_region, $mid_length,
+        $ori, $start_start, $start_end);
+
+    $sth->bind_columns(\$mid_start, \$mid_end, \$mid_seq_region_id,
+		       \$mid_seq_region, \$mid_length, \$ori, \$start_start,
+		       \$start_end);
+
+    while($sth->fetch()) {
+
+      if( defined $mid_cs ) {
+        $start_mid_mapper->add_map_coordinates
+          (
+           $seq_region_id,$start_start, $start_end, $ori,
+           $mid_seq_region_id, $mid_start, $mid_end
+          );
+      } else {
+        if( $from eq "first" ) {
+          $combined_mapper->add_map_coordinates
+            (
+             $seq_region_id,$start_start, $start_end, $ori,
+             $mid_seq_region_id, $mid_start, $mid_end
+            );
+        } else {
+          $combined_mapper->add_map_coordinates
+            (
+             $mid_seq_region_id, $mid_start, $mid_end, $ori,
+             $seq_region_id,$start_start, $start_end
+            );
+        }
+      }
+
+      #update sr_name cache
+      my $arr = [ $mid_seq_region_id, $mid_seq_region,
+                  $mid_cs_id, $mid_length ];
+
+      $self->{'sr_name_cache'}->{"$mid_seq_region:$mid_cs_id"} = $arr;
+      $self->{'sr_id_cache'}->{"$mid_seq_region_id"} = $arr;
+
+      push @mid_ranges,[$mid_seq_region_id,$mid_seq_region,
+                        $mid_start,$mid_end];
+      push @start_ranges, [ $seq_region_id, $start_start, $start_end ];
+
+      #the region that we actually register may actually be larger or smaller
+      #than the region that we wanted to register.
+      #register the intersection of the region so we do not end up doing
+      #extra work later
+
+      if($start_start < $start || $start_end > $end) {
+        $start_registry->check_and_register($seq_region_id,$start_start,
+                                            $start_end);
+      }
+    }
+    $sth->finish();
+  }
+
+  # in the one step case, we load the mid ranges in the
+  # last_registry and we are done
+  if( ! defined $mid_cs ) {
+    for my $range ( @mid_ranges ) {
+      $end_registry->check_and_register( $range->[0], $range->[2],
+                                         $range->[3] );
+    }
+
+    # and thats it for the simple case ...
+    return;
+  }
+
+
+  ###########
+  # now the second half of the mapping
+  # perform another query and load the mid <-> end mapper using the mid cs
+  # ranges
+  #
+
+  #ascertain which is component and which is actually assembled coord system
+  @path = @{$csa->get_mapping_path($mid_cs, $end_cs)};
+  if(@path == 2 || ( @path == 3 && !defined $path[1])) {
+
+    $asm_cs = $path[0];
+    $cmp_cs = $path[-1];
+  } else {
+    my $path = join(',', map({$_->name .' '. $_->version} @path));
+    my $len = scalar(@path)-1;
+    throw("Unexpected mapping path between intermediate and last" .
+	  "coord systems (". $mid_cs->name . " " . $mid_cs->version .
+	  " and " . $end_cs->name . " " . $end_cs->version . ")." .
+	  "\nExpected path length 1, got $len. " .
+	  "(path=$path)");
+  }
+
+  if(defined($to_slice)){
+    my $to_cs = $to_slice->coord_system;
+    if($asm_cs->equals($to_cs)){
+      $asm2cmp_sth = $self->prepare($asm2cmp);
+      $cmp2asm_sth = $self->prepare($cmp2asm." AND asm.asm_seq_region_id = $to_seq_region_id");
+    }
+    elsif($cmp_cs->equals($to_cs)){
+      $asm2cmp_sth = $self->prepare($asm2cmp." AND asm.cmp_seq_region_id = $to_seq_region_id");
+      $cmp2asm_sth = $self->prepare($cmp2asm);
+    }
+    else{
+      $asm2cmp_sth = $self->prepare($asm2cmp);
+      $cmp2asm_sth = $self->prepare($cmp2asm);
+    }
+  }
+
+  $sth = ($asm_cs->equals($mid_cs)) ? $asm2cmp_sth : $cmp2asm_sth;
+
+  my $end_cs_id = $end_cs->dbID();
+  foreach my $mid_range (@mid_ranges) {
+    my ($mid_seq_region_id, $mid_seq_region,$start, $end) = @$mid_range;
+    $sth->bind_param(1,$mid_seq_region_id,SQL_INTEGER);
+    $sth->bind_param(2,$start,SQL_INTEGER);
+    $sth->bind_param(3,$end,SQL_INTEGER);
+    $sth->bind_param(4,$end_cs_id,SQL_INTEGER);
+    $sth->execute();
+
+    #load the end <-> mid mapper with the results and record the mid cs
+    #ranges we just added to the mapper
+
+    my ($end_start, $end_end, $end_seq_region_id, $end_seq_region, $end_length,
+        $ori, $mid_start, $mid_end);
+
+    $sth->bind_columns(\$end_start, \$end_end, \$end_seq_region_id,
+		       \$end_seq_region, \$end_length, \$ori, \$mid_start,
+		       \$mid_end);
+
+    while($sth->fetch()) {
+      $end_mid_mapper->add_map_coordinates
+        (
+         $end_seq_region_id, $end_start, $end_end, $ori,
+         $mid_seq_region_id, $mid_start, $mid_end
+        );
+
+      #update sr_name cache
+      my $arr = [ $end_seq_region_id,$end_seq_region,$end_cs_id,$end_length ];
+
+      $self->{'sr_name_cache'}->{"$end_seq_region:$end_cs_id"} = $arr;
+      $self->{'sr_id_cache'}->{"$end_seq_region_id"} = $arr;
+
+      #register this region on the end coord system
+      $end_registry->check_and_register($end_seq_region_id, $end_start, $end_end);
+    }
+    $sth->finish();
+  }
+
+  #########
+  # Now that both halves are loaded
+  # Do stepwise mapping using both of the loaded mappers to load
+  # the final start <-> end mapper
+  #
+
+  _build_combined_mapper(\@start_ranges, $start_mid_mapper, $end_mid_mapper,
+                         $combined_mapper, $start_name);
+  #all done!
+  return;
+}
+
+
+=head2 _register_chained_special
+
+  Arg [1]    : Bio::EnsEMBL::ChainedAssemblyMapper $casm_mapper
+               The chained assembly mapper to register regions on
+  Arg [2]    : string $from ('first' or 'last')
+               The direction we are registering from, and the name of the
+               internal mapper.
+  Arg [3]    : string $seq_region_name
+               The name of the seqregion we are registering on
+  Arg [4]    : listref $ranges
+               A list  of ranges to register (in [$start,$end] tuples).
+  Arg [5]    : (optional) $to_slice
+               Only register those on this Slice.
+  Description: Registers a set of ranges on a chained assembly mapper.
+               This function is at the heart of the chained mapping process.
+               It retrieves information from the assembly table and
+               dynamically constructs the mappings between two coordinate
+               systems which are 2 mapping steps apart. It does this by using
+               two internal mappers to load up a third mapper which is
+               actually used by the ChainedAssemblyMapper to perform the
+               mapping.
+
+               This method must be called before any mapping is
+               attempted on regions of interest, otherwise only gaps will
+               be returned.  Note that the ChainedAssemblyMapper automatically
+               calls this method when the need arises.
+  Returntype : none
+  Exceptions : throw if the seq_region to be registered does not exist
+               or if it associated with multiple assembled pieces (bad data
+               in assembly table)
+
+               throw if the mapping between the coordinate systems cannot
+               be performed in two steps, which means there is an internal
+               error in the data in the meta table or in the code that creates
+               the mapping paths.
+  Caller     : Bio::EnsEMBL::AssemblyMapper
+  Status     : Stable
+
+=cut
+
+sub _register_chained_special {
+  my $self = shift;
+  my $casm_mapper = shift;
+  my $from = shift;
+  my $seq_region_id = shift;
+  my $ranges = shift;
+  my $to_slice = shift;
+  my $found = 0;
+
+  my $sth = $self->prepare("SELECT
+		      asm.cmp_start,
+		      asm.cmp_end,
+		      asm.cmp_seq_region_id,
+		      sr.name,
+		      sr.length,
+		      asm.ori,
+		      asm.asm_start,
+		      asm.asm_end
+		    FROM
+		      assembly asm, seq_region sr
+		    WHERE
+		      asm.asm_seq_region_id = ? AND
+                      ? <= asm.asm_end AND
+		      ? >= asm.asm_start AND
+		      asm.cmp_seq_region_id = sr.seq_region_id AND
+		      sr.coord_system_id = ? AND
+		      asm.cmp_seq_region_id = ?");
+
+
+  my ($start_name, $start_mid_mapper, $start_cs, $start_registry,
+      $end_name, $end_mid_mapper, $end_cs, $end_registry);
+
+  if($from eq 'first') {
+    $start_name       = 'first';
+    $start_mid_mapper = $casm_mapper->first_middle_mapper();
+    $start_cs         = $casm_mapper->first_CoordSystem();
+    $start_registry   = $casm_mapper->first_registry();
+    $end_mid_mapper   = $casm_mapper->last_middle_mapper();
+    $end_cs           = $casm_mapper->last_CoordSystem();
+    $end_registry     = $casm_mapper->last_registry();
+    $end_name         = 'last';
+  } elsif($from eq 'last') {
+    $start_name       = 'last';
+    $start_mid_mapper = $casm_mapper->last_middle_mapper();
+    $start_cs         = $casm_mapper->last_CoordSystem();
+    $start_registry   = $casm_mapper->last_registry();
+    $end_mid_mapper   = $casm_mapper->first_middle_mapper();
+    $end_cs           = $casm_mapper->first_CoordSystem();
+    $end_registry     = $casm_mapper->first_registry();
+    $end_name         = 'first';
+  } else {
+    throw("Invalid from argument: [$from], must be 'first' or 'last'");
+  }
+
+  my $combined_mapper = $casm_mapper->first_last_mapper();
+  my $mid_cs          = $casm_mapper->middle_CoordSystem();
+  my $mid_name        = 'middle';
+  my $csa             = $self->db->get_CoordSystemAdaptor();
+
+  # Check for the simple case where the ChainedMapper is short
+  if( ! defined $mid_cs ) {
+      $start_mid_mapper = $combined_mapper;
+  }
+
+
+  my @path;
+  if( defined $mid_cs ) {
+      @path = @{$csa->get_mapping_path($start_cs, $mid_cs)};
+  } else {
+      @path = @{$csa->get_mapping_path( $start_cs, $end_cs )};
+  }
+  if( ! defined $mid_cs ) {
+      $start_mid_mapper = $combined_mapper;
+  }
+
+  if(@path != 2 && defined( $path[1] )) {
+    my $path = join(',', map({$_->name .' '. $_->version} @path));
+    my $len  = scalar(@path) - 1;
+    throw("Unexpected mapping path between start and intermediate " .
+	  "coord systems (". $start_cs->name . " " . $start_cs->version .
+	  " and " . $mid_cs->name . " " . $mid_cs->version . ")." .
+	  "\nExpected path length 1, got $len. " .
+	  "(path=$path)");
+  }
+
+  my ($asm_cs,$cmp_cs);
+  $asm_cs = $path[0];
+  $cmp_cs = $path[-1];
+
+  $combined_mapper = $casm_mapper->first_last_mapper();
+  $mid_cs          = $casm_mapper->middle_CoordSystem();
+  $mid_name        = 'middle';
+  $csa             = $self->db->get_CoordSystemAdaptor();
+
+  my $mid_cs_id;
+
+  # Check for the simple case where the ChainedMapper is short
+  if ( !defined $mid_cs ) {
+    $start_mid_mapper = $combined_mapper;
+  } else {
+    $mid_cs_id = $mid_cs->dbID();
+  }
+
+  my @mid_ranges;
+  my @start_ranges;
+
+  my $to_cs = $to_slice->coord_system;
+  foreach my $direction (1, 0){
+    my $id1;
+    my $id2;
+    if($direction){
+      $id1 = $seq_region_id;
+      $id2 = $to_slice->get_seq_region_id();
+    }
+    else{
+      $id2 = $seq_region_id;
+      $id1 = $to_slice->get_seq_region_id();
+    }
+
+    foreach my $range (@$ranges) {
+      my ($start, $end) = @$range;
+      $sth->bind_param(1,$id1,SQL_INTEGER);
+      $sth->bind_param(2,$start,SQL_INTEGER);
+      $sth->bind_param(3,$end,SQL_INTEGER);
+      $sth->bind_param(4,$to_cs->dbID,SQL_INTEGER);
+      $sth->bind_param(5,$id2,SQL_INTEGER);
+      $sth->execute();
+
+      my ($mid_start, $mid_end, $mid_seq_region_id, $mid_seq_region, $mid_length,
+	  $ori, $start_start, $start_end);
+
+      $sth->bind_columns(\$mid_start, \$mid_end, \$mid_seq_region_id,
+			 \$mid_seq_region, \$mid_length, \$ori, \$start_start,
+			 \$start_end);
+
+      while($sth->fetch()) {
+	$found = 1;
+
+	if( defined $mid_cs ) {
+	  $start_mid_mapper->add_map_coordinates
+	    (
+	     $id1,$start_start, $start_end, $ori,
+	     $mid_seq_region_id, $mid_start, $mid_end
+	    );
+	} else {
+	  if( $from eq "first") {
+	    if($direction){
+	      $combined_mapper->add_map_coordinates
+		(
+		 $id1,$start_start, $start_end, $ori,
+		 $mid_seq_region_id, $mid_start, $mid_end
+		);
+	    }
+	    else{
+	      $combined_mapper->add_map_coordinates
+		(
+		 $mid_seq_region_id, $mid_start, $mid_end, $ori,
+		 $id1,$start_start, $start_end
+		);
+	    }	
+	  } else {
+	    if($direction){
+	      $combined_mapper->add_map_coordinates
+		(
+		 $mid_seq_region_id, $mid_start, $mid_end, $ori,
+		 $id1,$start_start, $start_end
+		);
+	    }
+	    else{
+	      $combined_mapper->add_map_coordinates
+		(
+		 $id1,$start_start, $start_end, $ori,
+		 $mid_seq_region_id, $mid_start, $mid_end
+		);
+	    }
+	  }
+	}
+	
+	#update sr_name cache
+	my $arr = [ $mid_seq_region_id, $mid_seq_region,
+		    $mid_cs_id, $mid_length ];
+	
+	$self->{'sr_name_cache'}->{"$mid_seq_region:$mid_cs_id"} = $arr;
+	$self->{'sr_id_cache'}->{"$mid_seq_region_id"} = $arr;
+	
+	push @mid_ranges,[$mid_seq_region_id,$mid_seq_region,
+			  $mid_start,$mid_end];
+	push @start_ranges, [ $id1, $start_start, $start_end ];
+	
+	#the region that we actually register may actually be larger or smaller
+	#than the region that we wanted to register.
+	#register the intersection of the region so we do not end up doing
+	#extra work later
+	
+	if($start_start < $start || $start_end > $end) {
+	  $start_registry->check_and_register($id1,$start_start,
+					      $start_end);
+	}
+      }
+    $sth->finish();
+    }
+    if($found){
+      if( ! defined $mid_cs ) {
+	for my $range ( @mid_ranges ) {
+	  $end_registry->check_and_register( $range->[0], $range->[2],
+					     $range->[3] );
+	}
+	
+	# and thats it for the simple case ...
+	return;
+      }
+    }
+  }
+}
+
+
+=head2 register_all
+
+  Arg [1]    : Bio::EnsEMBL::AssemblyMapper $mapper
+  Example    : $mapper = $asm_mapper_adaptor->fetch_by_CoordSystems($cs1,$cs2);
+
+               # make cache large enough to hold all of the mappings
+               $mapper->max_pair_count(10e6);
+               $asm_mapper_adaptor->register_all($mapper);
+
+               # perform mappings as normal
+               $mapper->map($slice->seq_region_name(), $sr_start, $sr_end,
+                            $sr_strand, $cs1);
+               ...
+  Description: This function registers the entire set of mappings between
+               two coordinate systems in an assembly mapper.
+               This will use a lot of memory but will be much more efficient
+               when doing a lot of mapping which is spread over the entire
+               genome.
+  Returntype : none
+  Exceptions : none
+  Caller     : specialised prograhsm
+  Status     : Stable
+
+=cut
+
+sub register_all {
+  my $self = shift;
+  my $mapper = shift;
+
+  my $asm_cs_id = $mapper->assembled_CoordSystem()->dbID();
+  my $cmp_cs_id = $mapper->component_CoordSystem()->dbID();
+
+  # retrieve every relevant assembled/component pair from the assembly table
+
+  my $q = qq{
+      SELECT
+         asm.cmp_start,
+         asm.cmp_end,
+         asm.cmp_seq_region_id,
+         cmp_sr.name,
+         cmp_sr.length,
+         asm.ori,
+         asm.asm_start,
+         asm.asm_end,
+         asm.asm_seq_region_id,
+         asm_sr.name,
+         asm_sr.length
+      FROM
+         assembly asm, seq_region asm_sr, seq_region cmp_sr
+      WHERE
+         asm.cmp_seq_region_id = cmp_sr.seq_region_id AND
+         asm.asm_seq_region_id = asm_sr.seq_region_id AND
+         cmp_sr.coord_system_id = ? AND
+         asm_sr.coord_system_id = ?
+   };
+
+  my $sth = $self->prepare($q);
+
+  $sth->bind_param(1,$cmp_cs_id,SQL_INTEGER);
+  $sth->bind_param(2,$asm_cs_id,SQL_INTEGER);
+  $sth->execute();
+
+  # load the mapper with the assembly information
+
+  my ($cmp_start, $cmp_end, $cmp_seq_region_id, $cmp_seq_region, $cmp_length,
+      $ori,
+      $asm_start, $asm_end, $asm_seq_region_id, $asm_seq_region, $asm_length);
+
+  $sth->bind_columns(\$cmp_start, \$cmp_end, \$cmp_seq_region_id,
+                     \$cmp_seq_region, \$cmp_length, \$ori,
+                     \$asm_start, \$asm_end, \$asm_seq_region_id,
+                     \$asm_seq_region, \$asm_length);
+
+  my %asm_registered;
+
+  while($sth->fetch()) {
+    $mapper->register_component($cmp_seq_region_id);
+    $mapper->mapper->add_map_coordinates(
+                 $asm_seq_region_id, $asm_start, $asm_end,
+                 $ori,
+                 $cmp_seq_region_id, $cmp_start, $cmp_end);
+
+      my $arr = [$cmp_seq_region_id, $cmp_seq_region, $cmp_cs_id, $cmp_length];
+
+      $self->{'sr_name_cache'}->{"$cmp_seq_region:$cmp_cs_id"} = $arr;
+      $self->{'sr_id_cache'}->{"$cmp_seq_region_id"} = $arr;
+
+    # only register each asm seq_region once since it requires some work
+    if(!$asm_registered{$asm_seq_region_id}) {
+      $asm_registered{$asm_seq_region_id} = 1;
+
+      # register all chunks from start of seq region to end
+      my $end_chunk = $asm_length >> $CHUNKFACTOR;
+      for(my $i = 0; $i <= $end_chunk; $i++) {
+        $mapper->register_assembled($asm_seq_region_id, $i);
+      }
+
+      $arr = [$asm_seq_region_id, $asm_seq_region, $asm_cs_id, $asm_length];
+
+      $self->{'sr_name_cache'}->{"$asm_seq_region:$asm_cs_id"} = $arr;
+      $self->{'sr_id_cache'}->{"$asm_seq_region_id"} = $arr;
+    }
+  }
+
+  $sth->finish();
+
+  return;
+}
+
+
+
+=head2 register_all_chained
+
+  Arg [1]    : Bio::EnsEMBL::ChainedAssemblyMapper $casm_mapper
+  Example    : $mapper = $asm_mapper_adaptor->fetch_by_CoordSystems($cs1,$cs2);
+
+               # make the cache large enough to hold all of the mappings
+               $mapper->max_pair_count(10e6);
+               # load all of the mapping data
+               $asm_mapper_adaptor->register_all_chained($mapper);
+
+               # perform mappings as normal
+               $mapper->map($slice->seq_region_name(), $sr_start, $sr_end,
+                            $sr_strand, $cs1);
+               ...
+  Description: This function registers the entire set of mappings between
+               two coordinate systems in a chained mapper.  This will use a lot
+               of memory but will be much more efficient when doing a lot of
+               mapping which is spread over the entire genome.
+  Returntype : none
+  Exceptions : throw if mapper is between coord systems with unexpected
+               mapping paths
+  Caller     : specialised programs doing a lot of genome-wide mapping
+  Status     : Stable
+
+=cut
+
+sub register_all_chained {
+  my $self = shift;
+  my $casm_mapper = shift;
+
+  my $first_cs = $casm_mapper->first_CoordSystem();
+  my $mid_cs   = $casm_mapper->middle_CoordSystem();
+  my $last_cs  = $casm_mapper->last_CoordSystem();
+
+  my $start_mid_mapper = $casm_mapper->first_middle_mapper();
+  my $end_mid_mapper   = $casm_mapper->last_middle_mapper();
+  my $combined_mapper  = $casm_mapper->first_last_mapper();
+
+  my @ranges;
+
+  my $sth = $self->prepare(
+     'SELECT
+         asm.cmp_start,
+         asm.cmp_end,
+         asm.cmp_seq_region_id,
+         sr_cmp.name,
+         sr_cmp.length,
+         asm.ori,
+         asm.asm_start,
+         asm.asm_end,
+         asm.asm_seq_region_id,
+         sr_asm.name,
+         sr_asm.length
+      FROM
+         assembly asm, seq_region sr_asm, seq_region sr_cmp
+      WHERE
+         sr_asm.seq_region_id = asm.asm_seq_region_id AND
+         sr_cmp.seq_region_id = asm.cmp_seq_region_id AND
+         sr_asm.coord_system_id = ? AND
+         sr_cmp.coord_system_id = ?');
+
+  my $csa = $self->db()->get_CoordSystemAdaptor();
+
+  my @path;
+
+  if ( !defined $mid_cs ) {
+    @path = @{ $csa->get_mapping_path( $first_cs, $last_cs ) };
+    if ( !defined( $path[1] ) ) {
+      splice( @path, 1, 1 );
+    }
+  } else {
+    @path = @{ $csa->get_mapping_path( $first_cs, $mid_cs ) };
+    # fix for when we have something like supercontig#contig#chromosome
+    if ( !defined( $path[1] ) ) {
+      splice( @path, 1, 1 );
+    }
+  }
+
+  if ( @path != 2 ) {
+    my $path =
+      join( ',', map( { $_->name . ' ' . $_->version } @path ) );
+    my $len = scalar(@path) - 1;
+    throw(   "Unexpected mapping path between start and intermediate "
+           . "coord systems ("
+           . $first_cs->name . " "
+           . $first_cs->version . " and "
+           . $mid_cs->name . " "
+           . $mid_cs->version . ")."
+           . "\nExpected path length 1, got $len. "
+           . "(path=$path)" );
+  }
+
+  my ($asm_cs,$cmp_cs) = @path;
+
+  $sth->{mysql_use_result} = 1;
+  $sth->bind_param(1,$asm_cs->dbID,SQL_INTEGER);
+  $sth->bind_param(2,$cmp_cs->dbID,SQL_INTEGER);
+  $sth->execute();
+
+
+  my ($mid_start, $mid_end, $mid_seq_region_id, $mid_seq_region, $mid_length,
+      $ori, $start_start, $start_end, $start_seq_region_id, $start_seq_region,
+      $start_length);
+
+  if($asm_cs->equals($first_cs)) {
+    $sth->bind_columns(\$mid_start, \$mid_end, \$mid_seq_region_id,
+                       \$mid_seq_region, \$mid_length, \$ori, \$start_start,
+                       \$start_end, \$start_seq_region_id, \$start_seq_region,
+                       \$start_length);
+  } else {
+    $sth->bind_columns(\$start_start, \$start_end, \$start_seq_region_id,
+                       \$start_seq_region, \$start_length, \$ori,
+                       \$mid_start, \$mid_end, \$mid_seq_region_id,
+                       \$mid_seq_region, \$mid_length);
+
+  }
+
+  my ( $mid_cs_id, $start_cs_id, $reg, $mapper );
+  if ( !defined $mid_cs ) {
+    $mid_cs_id   = $last_cs->dbID();
+    $start_cs_id = $first_cs->dbID();
+    $mapper      = $combined_mapper;
+  } else {
+    $mid_cs_id   = $mid_cs->dbID();
+    $start_cs_id = $first_cs->dbID();
+    $mapper      = $start_mid_mapper;
+  }
+
+  $reg =  $casm_mapper->first_registry();
+
+  while($sth->fetch()) {
+    $mapper->add_map_coordinates
+      (
+       $start_seq_region_id, $start_start, $start_end, $ori,
+       $mid_seq_region_id, $mid_start, $mid_end
+      );
+    push( @ranges, [$start_seq_region_id, $start_start, $start_end ] );
+
+    $reg->check_and_register( $start_seq_region_id, 1, $start_length );
+    if( ! defined $mid_cs ) {
+      $casm_mapper->last_registry()->check_and_register
+	( $mid_seq_region_id, $mid_start, $mid_end );
+    }
+
+    my $arr = [ $mid_seq_region_id, $mid_seq_region,
+                $mid_cs_id, $mid_length ];
+
+    $self->{'sr_name_cache'}->{"$mid_seq_region:$mid_cs_id"} = $arr;
+    $self->{'sr_id_cache'}->{"$mid_seq_region_id"} = $arr;
+
+    $arr = [ $start_seq_region_id, $start_seq_region,
+             $start_cs_id, $start_length ];
+
+    $self->{'sr_name_cache'}->{"$start_seq_region:$start_cs_id"} = $arr;
+    $self->{'sr_id_cache'}->{"$start_seq_region_id"} = $arr;
+  }
+
+  if( ! defined $mid_cs ) {
+    # thats it for the simple case
+    return;
+  }
+
+
+  @path = @{ $csa->get_mapping_path( $last_cs, $mid_cs ) };
+  if ( defined($mid_cs) ) {
+    if ( !defined( $path[1] ) ) {
+      splice( @path, 1, 1 );
+    }
+  }
+
+  if ( @path != 2 ) {
+    my $path =
+      join( ',', map( { $_->name . ' ' . $_->version } @path ) );
+    my $len = scalar(@path) - 1;
+    throw(   "Unexpected mapping path between intermediate and last "
+           . "coord systems ("
+           . $last_cs->name . " "
+           . $last_cs->version . " and "
+           . $mid_cs->name . " "
+           . $mid_cs->version . ")."
+           . "\nExpected path length 1, got $len. "
+           . "(path=$path)" );
+  }
+
+  ($asm_cs,$cmp_cs) = @path;
+
+  $sth->bind_param(1,$asm_cs->dbID,SQL_INTEGER);
+  $sth->bind_param(2,$cmp_cs->dbID,SQL_INTEGER);
+  $sth->execute();
+
+
+  my ($end_start, $end_end, $end_seq_region_id, $end_seq_region,
+      $end_length);
+
+  if($asm_cs->equals($mid_cs)) {
+    $sth->bind_columns(\$end_start, \$end_end, \$end_seq_region_id,
+                       \$end_seq_region, \$end_length, \$ori,
+                       \$mid_start, \$mid_end, \$mid_seq_region_id,
+                       \$mid_seq_region, \$mid_length);
+  } else {
+    $sth->bind_columns(\$mid_start, \$mid_end, \$mid_seq_region_id,
+                       \$mid_seq_region, \$mid_length, \$ori, \$end_start,
+                       \$end_end, \$end_seq_region_id, \$end_seq_region,
+                       \$end_length);
+  }
+
+  my $end_cs_id = $last_cs->dbID();
+  $reg = $casm_mapper->last_registry();
+
+  while($sth->fetch()) {
+    $end_mid_mapper->add_map_coordinates
+      (
+       $end_seq_region_id, $end_start, $end_end, $ori,
+       $mid_seq_region_id, $mid_start, $mid_end
+      );
+
+    $reg->check_and_register( $end_seq_region_id, 1, $end_length );
+
+    my $arr = [ $end_seq_region_id, $end_seq_region,
+             $end_cs_id, $end_length ];
+    $self->{'sr_name_cache'}->{"$end_seq_region:$end_cs_id"} = $arr;
+    $self->{'sr_id_cache'}->{"$end_seq_region_id"} = $arr;
+  }
+
+  _build_combined_mapper( \@ranges, $start_mid_mapper, $end_mid_mapper,
+                          $combined_mapper, "first" );
+
+  return;
+}
+
+
+
+# after both halves of a chained mapper are loaded
+# this function maps all ranges in $ranges and loads the
+# results into the combined mapper
+sub _build_combined_mapper {
+  my $ranges = shift;
+  my $start_mid_mapper = shift;
+  my $end_mid_mapper = shift;
+  my $combined_mapper = shift;
+  my $start_name = shift;
+
+  my $mid_name = "middle";
+
+  foreach my $range (@$ranges) {
+    my ( $seq_region_id, $start, $end) = @$range;
+
+    my $sum = 0;
+
+    my @initial_coords = $start_mid_mapper->map_coordinates($seq_region_id,
+                                                            $start,$end,1,
+                                                            $start_name);
+
+    foreach my $icoord (@initial_coords) {
+      #skip gaps
+      if($icoord->isa('Bio::EnsEMBL::Mapper::Gap')) {
+        $sum += $icoord->length();
+        next;
+      }
+
+
+      #feed the results of the first mapping into the second mapper
+      my @final_coords =
+        $end_mid_mapper->map_coordinates($icoord->id, $icoord->start,
+                                         $icoord->end,
+                                         $icoord->strand, $mid_name);
+
+
+      foreach my $fcoord (@final_coords) {
+        #load up the final mapper
+
+        if($fcoord->isa('Bio::EnsEMBL::Mapper::Coordinate')) {
+          my $total_start = $start + $sum;
+          my $total_end   = $total_start + $fcoord->length - 1;
+          my $ori = $fcoord->strand();
+
+          if($start_name eq 'first') { # add coords in consistant order
+            $combined_mapper->add_map_coordinates(
+                             $seq_region_id, $total_start, $total_end, $ori,
+                             $fcoord->id(), $fcoord->start(), $fcoord->end());
+          } else {
+            $combined_mapper->add_map_coordinates(
+                        $fcoord->id(), $fcoord->start(), $fcoord->end(),$ori,
+                        $seq_region_id, $total_start, $total_end);
+          }
+
+        }
+        $sum += $fcoord->length();
+      }
+    }
+  }
+  #all done!
+}
+
+
+=head2 seq_regions_to_ids
+
+  Arg [1]    : Bio::EnsEMBL::CoordSystem $coord_system
+  Arg [2]    : listref of strings $seq_regions
+  Example    : my @ids = @{$asma->seq_regions_to_ids($coord_sys, \@seq_regs)};
+  Description: Converts a list of seq_region names to internal identifiers
+               using the internal cache that has accumulated while registering
+               regions for AssemblyMappers. If any requested regions are
+               not  found in the cache an attempt is made to retrieve them
+               from the database.
+  Returntype : listref of ints
+  Exceptions : throw if a non-existant seqregion is provided
+  Caller     : general
+  Status     : Stable
+
+=cut
+
+sub seq_regions_to_ids {
+  my $self = shift;
+  my $coord_system = shift;
+  my $seq_regions = shift;
+
+  my $cs_id = $coord_system->dbID();
+
+  my @out;
+
+  foreach my $sr (@$seq_regions) {
+    my $arr = $self->{'sr_name_cache'}->{"$sr:$cs_id"};
+    if( $arr ) {
+      push( @out, $arr->[0] );
+    } else {
+      push @out, $self->_seq_region_name_to_id($sr,$cs_id);
+    }
+  }
+
+  return \@out;
+}
+
+
+=head2 seq_ids_to_regions
+
+  Arg [1]    : listref of   seq_region ids
+  Example    : my @ids = @{$asma->ids_to_seq_regions(\@seq_ids)};
+  Description: Converts a list of seq_region ids to seq region names
+               using the internal cache that has accumulated while registering
+               regions for AssemblyMappers. If any requested regions are
+               not  found in the cache an attempt is made to retrieve them
+               from the database.
+  Returntype : listref of strings
+  Exceptions : throw if a non-existant seq_region_id is provided
+  Caller     : general
+  Status     : Stable
+
+=cut
+
+sub seq_ids_to_regions {
+  my $self = shift;
+  my $seq_region_ids = shift;
+
+  my @out;
+
+  foreach my $sr (@$seq_region_ids) {
+    my $arr = $self->{'sr_id_cache'}->{"$sr"};
+    if( $arr ) {
+      push( @out, $arr->[1] );
+    } else {
+      push @out, $self->_seq_region_id_to_name($sr);
+    }
+  }
+
+  return \@out;
+}
+
+=head2 delete_cache
+
+ Description: Delete all the caches for the mappings/seq_regions
+ Returntype : none
+ Exceptions : none
+ Caller     : General
+ Status     : At risk
+
+=cut
+
+sub delete_cache{
+  my ($self) = @_;
+
+  %{$self->{'sr_name_cache'}}     = ();
+  %{$self->{'sr_id_cache'}}       = ();
+
+  foreach my $key (keys %{$self->{'_asm_mapper_cache'}}){
+    $self->{'_asm_mapper_cache'}->{$key}->flush();
+  }
+  %{$self->{'_asm_mapper_cache'}} = ();
+  return;
+}
+
+
+=head2 register_region
+
+  Description: DEPRECATED use register_assembled instead
+
+=cut
+
+sub register_region{
+  my ($self, $assmapper, $type, $chr_name, $start, $end) = @_;
+
+  deprecate('Use register_assembled instead');
+
+  $self->register_assembled($assmapper, $chr_name, $start, $end);
+}
+
+
+=head2 register_contig
+
+  Description: DEPRECATED use register_component instead
+
+=cut
+
+sub register_contig {
+   my ($self, $assmapper, $type, $contig_id ) = @_;
+
+   deprecate('Use register_component instead');
+
+   #not sure if the use is passing in a seq_region_name or a
+   #seq_region_id...
+   register_component($assmapper, $contig_id);
+}
+
+
+=head2 fetch_by_type
+
+  Description: DEPRECATED use fetch_by_CoordSystems instead
+
+=cut
+
+sub fetch_by_type{
+  my ($self,$type) = @_;
+
+  deprecate('Use fetch_by_CoordSystems instead');
+
+  #assume that what the user wanted was a mapper between the sequence coord
+  #level and the top coord level
+
+  my $csa = $self->db()->get_CoordSystemAdaptor();
+
+  my $cs1 = $csa->fetch_top_level($type);
+  my $cs2 = $csa->fetch_sequence_level();
+
+  return $self->fetch_by_CoordSystems($cs1,$cs2);
+}
+
+
+
+1;