diff variant_effect_predictor/Bio/EnsEMBL/StableIdHistoryTree.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/EnsEMBL/StableIdHistoryTree.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,1202 @@
+=head1 LICENSE
+
+  Copyright (c) 1999-2012 The European Bioinformatics Institute and
+  Genome Research Limited.  All rights reserved.
+
+  This software is distributed under a modified Apache license.
+  For license details, please see
+
+    http://www.ensembl.org/info/about/code_licence.html
+
+=head1 CONTACT
+
+  Please email comments or questions to the public Ensembl
+  developers list at <dev@ensembl.org>.
+
+  Questions may also be sent to the Ensembl help desk at
+  <helpdesk@ensembl.org>.
+
+=cut
+
+=head1 NAME
+
+Bio::EnsEMBL::StableIdHistoryTree - object representing a stable ID history tree
+
+=head1 SYNOPSIS
+
+  my $reg = "Bio::EnsEMBL::Registry";
+  my $archiveStableIdAdaptor =
+    $reg->get_adaptor( 'human', 'core', 'ArchiveStableId' );
+
+  my $stable_id = 'ENSG00000068990';
+  my $history =
+    $archiveStableIdAdaptor->fetch_history_tree_by_stable_id('ENSG01');
+
+  print "Unique stable IDs in this tree:\n";
+  print join( ", ", @{ $history->get_unique_stable_ids } ), "\n";
+
+  print "\nReleases in this tree:\n";
+  print join( ", ", @{ $history->get_release_display_names } ), "\n";
+
+  print "\nCoordinates of nodes in the tree:\n\n";
+  foreach my $a ( @{ $history->get_all_ArchiveStableIds } ) {
+    print "  Stable ID: " . $a->stable_id . "." . $a->version . "\n";
+    print "  Release: "
+      . $a->release . " ("
+      . $a->assembly . ", "
+      . $a->db_name . ")\n";
+    print "  coords: "
+      . join( ', ', @{ $history->coords_by_ArchiveStableId($a) } )
+      . "\n\n";
+  }
+
+=head1 DESCRIPTION
+
+This object represents a stable ID history tree graph.
+
+The graph is implemented as a collection of nodes (ArchiveStableId
+objects) and links (StableIdEvent objects) which have positions
+on an (x,y) grid. The x axis is used for releases, the y axis for
+stable_ids. The idea is to create a plot similar to this (the numbers
+shown on the nodes are the stable ID versions):
+
+  ENSG001   1-------------- 2--
+                                \
+  ENSG003                         1-----1
+                                /
+  ENSG002   1-------2----------
+
+           38      39      40    41    42
+
+The grid coordinates of the ArchiveStableId objects in this example
+would be (note that coordinates are zero-based):
+
+  ENSG001.1               (0, 0)
+  ENSG001.2               (2, 0)
+  ENSG003.1 (release 41)  (3, 1) 
+  ENSG003.1 (release 42)  (4, 1) 
+  ENSG002.1               (0, 2)
+  ENSG002.2               (1, 2)
+
+The tree will only contain those nodes which had a change in the stable
+ID version. Therefore, in the above example, in release 39 ENSG001 was
+present and had version 1 (but will not be drawn there, to unclutter the
+output).
+
+The grid positions will be calculated by the API and will try to
+untangle the tree (i.e. try to avoid overlapping lines).
+
+=head1 METHODS
+
+  new
+  add_ArchiveStableIds
+  add_ArchiveStableIds_for_events
+  remove_ArchiveStableId
+  flush_ArchiveStableIds
+  add_StableIdEvents
+  remove_StableIdEvent
+  flush_StableIdEvents
+  get_all_ArchiveStableIds
+  get_all_StableIdEvents
+  get_latest_StableIdEvent
+  get_release_display_names
+  get_release_db_names
+  get_unique_stable_ids
+  optimise_tree
+  coords_by_ArchiveStableId
+  calculate_coords
+  consolidate_tree
+  reset_tree
+  current_dbname
+  current_release
+  current_assembly
+  is_incomplete
+
+=head1 RELATED MODULES
+
+  Bio::EnsEMBL::ArchiveStableId
+  Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor
+  Bio::EnsEMBL::StableIdEvent
+
+=cut
+
+package Bio::EnsEMBL::StableIdHistoryTree;
+
+use strict;
+use warnings;
+no warnings 'uninitialized';
+
+use Bio::EnsEMBL::Utils::Argument qw(rearrange);
+use Bio::EnsEMBL::Utils::Exception qw(throw warning);
+
+
+=head2 new
+
+  Arg [CURRENT_DBNAME]   : (optional) name of current db
+  Arg [CURRENT_RELEASE]  : (optional) current release number
+  Arg [CURRENT_ASSEMBLY] : (optional) current assembly name
+  Example     : my $history = Bio::EnsEMBL::StableIdHistoryTree->new;
+  Description : object constructor
+  Return type : Bio::EnsEMBL::StableIdHistoryTree
+  Exceptions  : none
+  Caller      : general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub new {
+  my $caller = shift;
+  my $class = ref($caller) || $caller;
+  
+  my $self = {};
+  bless $self, $class;
+
+  my ($current_dbname, $current_release, $current_assembly) =
+    rearrange([qw( CURRENT_DBNAME CURRENT_RELEASE CURRENT_ASSEMBLY )], @_ );
+
+  # initialise
+  $self->{'current_dbname'} = $current_dbname;
+  $self->{'current_release'} = $current_release;
+  $self->{'current_assembly'} = $current_assembly;
+  
+  return $self;
+}
+
+
+=head2 add_ArchiveStableIds
+
+  Arg[1..n]   : Bio::EnsEMBL::ArchiveStableId's @archive_ids
+                The ArchiveStableIds to add to the the history tree
+  Example     : my $archive_id = $archiveStableIdAdaptor->fetch_by_stable_id(
+                  'ENSG00024808');
+                $history->add_ArchiveStableId($archive_id);
+  Description : Adds ArchiveStableIds (nodes) to the history tree. No
+                calculation of grid coordinates is done at this point, you need
+                to initiate this manually with calculate_coords().
+                ArchiveStableIds are only added once for each release (to avoid
+                duplicates).
+  Return type : none
+  Exceptions  : thrown on invalid or missing argument
+  Caller      : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::fetch_history_by_stable_id, general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub add_ArchiveStableIds {
+  my ($self, @archive_ids) = @_;
+
+  throw("You must provide one or more Bio::EnsEMBL::ArchiveStableIds to add.")
+    unless (@archive_ids);
+
+  foreach my $archive_id (@archive_ids) {
+    throw("Bio::EnsEMBL::ArchiveStableId object expected.")
+      unless (ref($archive_id) &&
+              $archive_id->isa('Bio::EnsEMBL::ArchiveStableId'));
+
+    $self->{'nodes'}->{$self->_node_id($archive_id)} = $archive_id;
+  }
+}
+
+
+=head2 add_ArchiveStableIds_for_events 
+
+  Example     : my $history = Bio::EnsEMBL::StableIdHistoryTree->new;
+                $history->add_StableIdEvents($event1, $event2);
+                $history->add_ArchiveStableIds_for_events;
+  Description : Convenience method that adds all ArchiveStableIds for all
+                StableIdEvents attached to this object to the tree.
+  Return type : none
+  Exceptions  : none
+  Caller      : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::fetch_history_by_stable_id, general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub add_ArchiveStableIds_for_events {
+  my $self = shift;
+
+  foreach my $event (@{ $self->get_all_StableIdEvents }) {
+    if ($event->old_ArchiveStableId) {
+      $self->add_ArchiveStableIds($event->old_ArchiveStableId);
+    }
+    if ($event->new_ArchiveStableId) {
+      $self->add_ArchiveStableIds($event->new_ArchiveStableId);
+    }
+  }
+}
+
+
+=head2 remove_ArchiveStableId
+
+  Arg[1]      : Bio::EnsEMBL::ArchiveStableId $archive_id
+                the ArchiveStableId to remove from the tree
+  Example     : $history->remove_ArchiveStableId($archive_id);
+  Description : Removes an ArchiveStableId from the tree.
+  Return type : none
+  Exceptions  : thrown on missing or invalid argument
+  Caller      : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::fetch_history_by_stable_id, general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub remove_ArchiveStableId {
+  my ($self, $archive_id) = @_;
+    
+  throw("Bio::EnsEMBL::ArchiveStableId object expected.")
+    unless ($archive_id && ref($archive_id) &&
+            $archive_id->isa('Bio::EnsEMBL::ArchiveStableId'));
+
+  my %nodes = %{ $self->{'nodes'} };
+  delete $nodes{$self->_node_id($archive_id)};
+  $self->{'nodes'} = \%nodes;
+}
+
+
+=head2 flush_ArchiveStableIds
+
+  Example     : $history->flush_ArchiveStableIds;
+  Description : Remove all ArchiveStableIds from the tree.
+  Return type : none
+  Exceptions  : none
+  Caller      : general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub flush_ArchiveStableIds {
+  my $self = shift;
+  $self->{'nodes'} = undef;
+}
+
+
+#
+# generate a unique node identifier
+# 
+sub _node_id {
+  my ($self, $archive_id) = @_;
+  return $archive_id->stable_id . ':' . $archive_id->db_name;
+}
+
+
+=head2 add_StableIdEvents 
+
+  Arg[1..n]   : Bio::EnsEMBL::StableIdEvent's @events
+                The StableIdEvents to add to the the history tree
+  Example     : $history->add_StableIdEvents($event);
+  Description : Adds StableIdEvents (links) to the history tree. Note that 
+                ArchiveStableIds attached to the StableIdEvent aren't added to
+                the tree automatically, you'll need to call
+                add_ArchiveStableIds_for_events later.
+  Return type : none
+  Exceptions  : thrown on invalid or missing argument
+  Caller      : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::fetch_history_by_stable_id, general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub add_StableIdEvents {
+  my ($self, @events) = @_;
+
+  throw("You must provide one or more Bio::EnsEMBL::StableIdsEvents to add.")
+    unless (@events);
+
+  foreach my $event (@events) {
+    throw("Bio::EnsEMBL::StableIdEvent object expected.")
+      unless ($event->isa('Bio::EnsEMBL::StableIdEvent'));
+
+    $self->{'links'}->{$self->_link_id($event)} = $event;
+  }
+}
+
+
+=head2 remove_StableIdEvent 
+
+  Arg[1]      : Bio::EnsEMBL::StableIdEvent $event
+                the StableIdEvent to remove from the tree
+  Example     : $history->remove_StableIdEvent($event);
+  Description : Removes a StableIdEvent from the tree.
+  Return type : none
+  Exceptions  : thrown on missing or invalid arguments
+  Caller      : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::fetch_history_by_stable_id, general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub remove_StableIdEvent {
+  my ($self, $event) = @_;
+    
+  throw("Bio::EnsEMBL::StableIdEvent object expected.") unless
+    ($event && ref($event) && $event->isa('Bio::EnsEMBL::StableIdEvent'));
+
+  my %links = %{ $self->{'links'} };
+  delete $links{$self->_link_id($event)};
+  $self->{'links'} = \%links;
+}
+
+
+=head2 flush_StableIdEvents 
+
+  Example     : $history->flush_StableIdEvents; 
+  Description : Removes all StableIdEvents from the tree.
+  Return type : none
+  Exceptions  : none
+  Caller      : general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub flush_StableIdEvents {
+  my $self = shift;
+  $self->{'links'} = undef; 
+}
+
+
+#
+# generate a unique link identifier
+# 
+sub _link_id {
+  my ($self, $event) = @_;
+
+  my ($old_id, $old_db_name, $new_id, $new_db_name);
+  if ($event->old_ArchiveStableId) {
+    $old_id = $event->old_ArchiveStableId->stable_id;
+    $old_db_name = $event->old_ArchiveStableId->db_name;
+  }
+  if ($event->new_ArchiveStableId) {
+    $new_id = $event->new_ArchiveStableId->stable_id;
+    $new_db_name = $event->new_ArchiveStableId->db_name;
+  }
+
+  return join(':', $old_id, $old_db_name, $new_id, $new_db_name);
+}
+
+
+=head2 get_all_ArchiveStableIds 
+
+  Example     : foreach my $arch_id (@{ $history->get_all_ArchiveStableIds }) {
+                  print $arch_id->stable_id, '.', $arch_id->version, "\n";
+                }
+  Description : Gets all ArchiveStableIds (nodes) in this tree.
+  Return type : Arrayref of Bio::EnsEMBL::ArchiveStableId objects
+  Exceptions  : none
+  Caller      : general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub get_all_ArchiveStableIds {
+  my $self = shift;
+  return [ values %{ $self->{'nodes'} } ]; 
+}
+
+
+=head2 get_all_current_ArchiveStableIds 
+
+  Example     : foreach my $arch_id (@{ $history->get_all_current_ArchiveStableIds }) {
+                  print $arch_id->stable_id, '.', $arch_id->version, "\n";
+                }
+  Description : Convenience method to get all current ArchiveStableIds in this
+                tree.
+                
+                Note that no lazy loading of "current" status is done at that
+                stage; as long as you retrieve your StableIdHistoryTree object
+                from ArchiveStableIdAdaptor, you'll get the right answer. In
+                other use cases, if you want to make sure you really get all
+                current stable IDs, loop over the result of
+                get_all_ArchiveStableIds() and call
+                ArchiveStableId->current_version() on all of them.
+  Return type : Arrayref of Bio::EnsEMBL::ArchiveStableId objects
+  Exceptions  : none
+  Caller      : general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub get_all_current_ArchiveStableIds {
+  my $self = shift;
+
+  my @current = ();
+
+  foreach my $arch_id (@{ $self->get_all_ArchiveStableIds }) {
+    push @current, $arch_id if ($arch_id->is_current);
+  }
+
+  return \@current;
+}
+
+
+=head2 get_all_StableIdEvents 
+
+  Example     : foreach my $event (@{ $history->get_all_StableIdsEvents }) {
+                  print "Old stable ID: ", 
+                    ($event->get_attribute('old', 'stable_id') or 'none'), "\n";
+                  print "New stable ID: ", 
+                    ($event->get_attribute('new', 'stable_id') or 'none'), "\n";
+                  print "Mapping score: ", $event->score, "\n";
+                }
+  Description : Gets all StableIdsEvents (links) in this tree.
+  Return type : Arrayref of Bio::EnsEMBL::StableIdEvent objects
+  Exceptions  : none
+  Caller      : general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub get_all_StableIdEvents {
+  my $self = shift;
+  return [ values %{ $self->{'links'} } ]; 
+}
+
+
+=head2 get_latest_StableIdEvent
+
+  Arg[1]      : Bio::EnsEMBL::ArchiveStableId $arch_id - the stable ID to get
+                the latest Event for
+  Example     : my $arch_id = Bio::EnsEMBL::ArchiveStableId->new(
+                  -stable_id => 'ENSG00001'
+                );
+                my $event = $history->get_latest_Event($arch_id);
+  Description : Returns the latest StableIdEvent found in the tree where a given
+                stable ID is the new stable ID. If more than one is found (e.g.
+                in a merge scenario in the latest mapping), preference is given
+                to self-events.
+  Return type : Bio::EnsEMBL::StableIdEvent
+  Exceptions  : thrown on missing or wrong argument
+  Caller      : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::add_all_current_to_history, general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub get_latest_StableIdEvent {
+  my $self = shift;
+  my $arch_id = shift;
+  
+  unless ($arch_id and $arch_id->isa('Bio::EnsEMBL::ArchiveStableId')) {
+    throw("Need a Bio::EnsEMBL::ArchiveStableId.");
+  }
+
+  my @all_events = @{ $self->get_all_StableIdEvents };
+  my @self_events = ();
+
+  while (my $event = shift(@all_events)) {
+    if ($event->new_ArchiveStableId and
+        $event->new_ArchiveStableId->stable_id eq $arch_id->stable_id) {
+      push @self_events, $event;
+    }
+  }
+
+  my @sorted = sort { $b->new_ArchiveStableId->release <=>
+                      $a->new_ArchiveStableId->release } @self_events;
+  
+  # give priority to self events
+  my $latest;
+  while ($latest = shift @sorted) {
+    last if (($latest->old_ArchiveStableId and
+              $latest->old_ArchiveStableId->stable_id eq $arch_id->stable_id)
+             or !$latest->old_ArchiveStableId);
+  }
+
+  return $latest;
+}
+
+
+=head2 get_release_display_names
+
+  Example     : print "Unique release display_names in this tree:\n"
+                foreach my $name (@{ $history->get_release_display_names }) {
+                  print "  $name\n";
+                }
+  Description : Returns a chronologically sorted list of unique release
+                display_names in this tree.
+
+                This method can be used to determine the number of columns when
+                plotting the history tree.
+  Return type : Arrayref of strings.
+  Exceptions  : none
+  Caller      : general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub get_release_display_names {
+  my $self = shift;
+  
+  my @display_names = map { $_->[1] } @{ $self->_sort_releases };
+
+  return \@display_names;
+}
+
+
+=head2 get_release_db_names
+
+  Example     : print "Unique release db_names in this tree:\n"
+                foreach my $name (@{ $history->get_release_db_names }) {
+                  print "  $name\n";
+                }
+  Description : Returns a chronologically sorted list of unique release
+                db_names in this tree.
+  Return type : Arrayref of strings.
+  Exceptions  : none
+  Caller      : general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub get_release_db_names {
+  my $self = shift;
+  
+  my @db_names = map { $_->[0] } @{ $self->_sort_releases };
+
+  return \@db_names;
+}
+
+
+#
+# Create a chronologically sorted list of releases.
+#
+# Return type : Arrayref of arrayrefs (db_name, release)
+#
+sub _sort_releases {
+  my $self = shift;
+
+  unless ($self->{'sorted_tree'}->{'releases'}) {
+
+    my %unique = ();
+
+    foreach my $archive_id (@{ $self->get_all_ArchiveStableIds }) {
+      $unique{join(':', $archive_id->db_name, $archive_id->release)} = 1;
+    }
+
+    # sort releases by release number, then db_name; this should get them into
+    # chronological order
+    my @releases = sort { $a->[1] <=> $b->[1] || $a->[0] cmp $b->[0] }
+      map { [ split(/:/, $_) ] } keys(%unique);
+
+    $self->{'sorted_tree'}->{'releases'} = \@releases;
+  
+  }
+
+  return $self->{'sorted_tree'}->{'releases'};
+}
+
+
+=head2 get_unique_stable_ids 
+
+  Example     : print "Unique stable IDs in this tree:\n"
+                foreach my $id (@{ $history->get_unique_stable_ids }) {
+                  print "  $id\n";
+                }
+  Description : Returns a list of unique stable IDs in this tree. Version is not
+                taken into account here. This method can be used to determine
+                the number of rows when plotting the history with each stable ID
+                occupying one line.
+
+                Sort algorithm will depend on what was chosen when the sorted
+                tree was generated. This ranges from a simple alphanumeric sort
+                to algorithms trying to untangle the history tree. If no
+                pre-sorted data is found, an alphanumerically sorted list will
+                be returned by default.
+  Return type : Arrayref of strings.
+  Exceptions  : none
+  Caller      : general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub get_unique_stable_ids {
+  my $self = shift;
+  
+  unless ($self->{'sorted_tree'}->{'stable_ids'}) {
+    $self->{'sorted_tree'}->{'stable_ids'} = $self->_sort_stable_ids;
+  }
+  
+  return $self->{'sorted_tree'}->{'stable_ids'};
+}
+
+
+#
+# Returns a list of stable IDs in this history tree, sorted alphabetically.
+# This is the simplest sort function used and doesn't try to untangle the tree.
+#
+# Return type : Arrayref
+#
+sub _sort_stable_ids {
+  my $self = shift;
+  my %unique = map { $_->stable_id => 1 } @{ $self->get_all_ArchiveStableIds };
+  return [sort keys %unique];
+}
+
+
+=head2 optimise_tree
+
+  Example     : $history->optimise_tree;
+  Description : This method sorts the history tree so that the number of
+                overlapping branches is minimised (thus "untangling" the tree).
+                
+                It uses a clustering algorithm for this which iteratively moves
+                the nodes with the largest vertical distance next to each other
+                and looking for a mininum in total branch length. This might not
+                produce the overall optimum but usually converges on a local
+                optimum very quickly.
+  Return type : none
+  Exceptions  : none
+  Caller      : calculate_coords
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub optimise_tree {
+  my $self = shift;
+
+  # get all non-self events
+  my @links;
+  foreach my $event (@{ $self->get_all_StableIdEvents }) {
+    next unless ($event->old_ArchiveStableId and $event->new_ArchiveStableId);
+    my $old_id = $event->old_ArchiveStableId->stable_id;
+    my $new_id = $event->new_ArchiveStableId->stable_id;
+    push @links, [$old_id, $new_id] if ($old_id ne $new_id);
+  }
+
+  # get initial list of sorted unique stable IDs and put them into a position
+  # lookup hash
+  my $i = 0;
+  my %pos = map { $_ => $i++ } @{ $self->_sort_stable_ids };
+
+  my $opt_length;
+  my $successive_fails = 0;
+  my $k = 0;
+  my %seen;
+
+  # for debug purposes:
+  # find the number of permutations for the given number of stable IDs
+  my $fact = $self->_factorial(scalar(keys %pos));
+
+  OPT:
+  while ($successive_fails < 100) {
+
+    # sort links by vertical distance
+    #warn "sorting\n";
+    $self->_sort_links(\@links, \%pos);
+
+    # loop over sorted links
+    SORTED:
+    foreach my $link (@links) {
+      
+      #warn "  trying ".join('-', @$link)."\n";
+
+      $k++;
+      
+      # remember last sort order
+      my %last = %pos;
+      
+      #my $this_order = join(':', sort { $pos{$a} <=> $pos{$b} } keys %pos);
+      #warn "    before $this_order\n";
+
+      # try both to move bottom node next to top node's current position and
+      # top node next to bottom node's position - one of the methods might give
+      # you better results
+      DIRECT:
+      foreach my $direction (qw(up down)) {
+
+        # move the nodes next to each other
+        $self->_move_nodes($link, \%pos, $direction);
+
+        # next if we've seen this sort order before
+        my $new_order = join(':', sort { $pos{$a} <=> $pos{$b} } keys %pos);
+        #warn "    after ($direction) $new_order\n";
+        if ($seen{$new_order}) {
+          #warn "      seen\n";
+          %pos = %last;
+          next DIRECT;
+        }
+        $seen{$new_order} = 1;
+
+        # calculate total link length for this sort order
+        my $total_length = $self->_total_link_length(\@links, \%pos);
+
+        if (!$opt_length or $total_length < $opt_length) {
+          #warn "      better ($total_length/$opt_length)\n";
+          $opt_length = $total_length;
+          $successive_fails = 0;
+          next OPT;
+        } else {
+          #warn "      worse ($total_length/$opt_length)\n";
+          %pos = %last;
+          $successive_fails++;
+        }
+      }
+      
+    }
+
+    last OPT;
+    
+  }
+
+  #warn "Needed $k tries (of $fact) to find optimal tree.\n";
+
+  my @best = sort { $pos{$a} <=> $pos{$b} } keys %pos;
+  $self->{'sorted_tree'}->{'stable_ids'} = \@best;
+}
+
+
+#
+# find the number of permutations for a give array size.
+# used for debugging code (compare implemented algorithm to looping over all
+# possible permutations).
+#
+sub _factorial {
+  my ($self, $n) = @_;
+  my $s = 1;
+  $s *= $n-- while $n > 0;
+  return $s;
+}
+
+
+#
+# sort links by vertical distance
+#
+sub _sort_links {
+  my ($self, $links, $pos) = @_;
+
+  my @lookup;
+
+  foreach my $link (@$links) {
+    my $dist = $pos->{$link->[0]} - $pos->{$link->[1]};
+    $dist = -$dist if ($dist < 0);
+    push @lookup, [$dist, $link];
+    #warn " $dist ".join(' ', @$link)."\n";
+  }
+
+  @$links = map { $_->[1] } sort { $b->[0] <=> $a->[0] } @lookup;
+}
+
+
+#
+# make two nodes adjacent by moving the second node next to the first node
+# all other node coordinates are adjusted accordingly
+#
+sub _move_nodes {
+  my ($self, $link, $pos, $direction) = @_;
+
+  my $first_pos = $pos->{$link->[0]};
+  my $second_pos = $pos->{$link->[1]};
+
+  # swap positions if necessary
+  if ($first_pos > $second_pos) {
+    my $tmp = $second_pos;
+    $second_pos = $first_pos;
+    $first_pos = $tmp;
+  }
+  #warn "      $first_pos:$second_pos\n";
+
+  foreach my $p (keys %$pos) {
+    
+    my $val = $pos->{$p};
+    
+    #warn "      $p $val\n";
+    if ($direction eq 'up') {
+      if ($val > $first_pos and $val < $second_pos) {
+        $val++;
+      } elsif ($val == $second_pos) {
+        $val = $first_pos + 1;
+      }
+    } else {
+      if ($val > $first_pos and $val < $second_pos) {
+        $val--;
+      } elsif ($val == $first_pos) {
+        $val = $second_pos - 1;
+      }
+    }
+    
+    #warn "      $p $val\n";
+    $pos->{$p} = $val;
+    #warn "\n";
+  }
+}
+
+
+#
+# calculate the total link (vertical distance) length based on this sort order
+#
+sub _total_link_length {
+  my ($self, $links, $pos) = @_;
+
+  my $total_length;
+
+  foreach my $link (@$links) {
+    my $length = $pos->{$link->[0]} - $pos->{$link->[1]};
+    $length = -$length if ($length < 0);
+    $total_length += $length;
+  }
+
+  return $total_length;
+}
+
+
+=head2 coords_by_ArchiveStableId 
+
+  Arg[1]      : Bio::EnsEMBL::ArchiveStableId $archive_id
+                The ArchiveStableId to get tree grid coordinates for
+  Example     : my ($x, $y) =
+                  @{ $history->coords_by_ArchiveStableId($archive_id) };
+                print $archive_id->stable_id, " coords: $x, $y\n";
+  Description : Returns the coordinates of an ArchiveStableId in the history
+                tree grid. If the ArchiveStableId isn't found in this tree, an
+                empty list is returned.
+                
+                Coordinates are zero-based (i.e. the top leftmost element in
+                the grid has coordinates [0, 0], not [1, 1]). This is to
+                facilitate using them to create a matrix as a two-dimensional
+                array of arrays.
+  Return type : Arrayref (x coordinate, y coordinate)
+  Exceptions  : thrown on wrong argument type
+  Caller      : general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub coords_by_ArchiveStableId {
+  my ($self, $archive_id) = @_;
+
+  throw("Bio::EnsEMBL::ArchiveStableId object expected.")
+    unless ($archive_id and ref($archive_id) and
+      $archive_id->isa('Bio::EnsEMBL::ArchiveStableId'));
+  
+  return $self->{'sorted_tree'}->{'coords'}->{$self->_node_id($archive_id)}
+    || [];
+}
+
+
+=head2 calculate_coords
+
+  Example     : $history->calculate_coords;
+  Description : Pre-calculates the grid coordinates of all nodes in the tree.
+  Return type : none
+  Exceptions  : none
+  Caller      : ArchiveStableIdAdaptor::fetch_history_by_stable_id
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub calculate_coords {
+  my $self = shift;
+
+  # reset any previous tree cordinate calculations
+  $self->reset_tree;
+
+  # the "master" information for the sorted tree is stored as the sorted lists
+  # of releases (x) and stable IDs (y). Sort them now.
+  my $db_names = $self->get_release_db_names;
+
+  # untangle tree by sorting stable IDs appropriately
+  $self->optimise_tree;
+  my $stable_ids = $self->get_unique_stable_ids;
+  
+  # for performance reasons, additionally store coordinates in a lookup hash
+  foreach my $archive_id (@{ $self->get_all_ArchiveStableIds }) {
+  
+    # coordinates are positions in the sorted lists
+    my $x = $self->_index_of($archive_id->db_name, $db_names);
+    my $y = $self->_index_of($archive_id->stable_id, $stable_ids);
+  
+    $self->{'sorted_tree'}->{'coords'}->{$self->_node_id($archive_id)} =
+      [ $x, $y ];
+  }
+}
+
+#
+# Description : Returns the index of an element in an array
+# Example     : my @array = (a, b, c);
+#               my $i = _index_of('b', \@array); # will return 1
+# Return type : Int (or undef if element is not found in array)
+#
+sub _index_of {
+  my ($self, $element, $arrayref) = @_;
+
+  throw("Expecting arrayref argument.") unless (ref($arrayref) eq 'ARRAY');
+
+  my @array = @$arrayref;
+
+  while (my $e = pop(@array)) {
+    return scalar(@array) if ($e eq $element);
+  }
+
+  return undef;
+}
+
+
+=head2 consolidate_tree
+
+  Example     : $history->consolidate_tree;
+  Description : Consolidate the history tree. This means removing nodes where
+                there wasn't a change and bridging gaps in the history. The end
+                result will be a sparse tree which only contains the necessary
+                information.
+  Return type : none
+  Exceptions  : none
+  Caller      : ArchiveStableIdAdaptor->fetch_history_tree_by_stable_id
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub consolidate_tree {
+  my $self = shift;
+
+  #
+  # get all self-events and creations/deletions and sort them (by stable ID and
+  # chronologically)
+  #
+  my @event_lookup;
+  
+  foreach my $event (@{ $self->get_all_StableIdEvents }) {
+
+    my $old_id = $event->old_ArchiveStableId;
+    my $new_id = $event->new_ArchiveStableId;
+
+    if (!$old_id or !$new_id or ($old_id->stable_id eq $new_id->stable_id)) {
+      if ($old_id) {
+        push @event_lookup, [$old_id->stable_id, $old_id->release, 
+          $old_id->db_name, $event];
+      } else {
+        push @event_lookup, [$new_id->stable_id, $new_id->release - 1,
+          $new_id->db_name, $event];
+      }
+    }
+  }
+
+  my @self_events = map { $_->[3] }
+    sort { $a->[0] cmp $b->[0] || $a->[1] <=> $b->[1] || $a->[2] cmp $b->[2] }
+      @event_lookup;
+
+  #
+  # consolidate tree
+  #
+  my $last = shift(@self_events);
+
+  while (my $event = shift(@self_events)) {
+
+    my $lo = $last->old_ArchiveStableId;
+    my $ln = $last->new_ArchiveStableId;
+    my $eo = $event->old_ArchiveStableId;
+    my $en = $event->new_ArchiveStableId;
+
+    if ($lo and $eo and $en and $lo->stable_id eq $eo->stable_id
+        and $lo->version eq $eo->version) {
+
+      # this removes redundant nodes and connects the remaining nodes:
+      #
+      # o--o--o  ->  o-----o
+      # 1  1  1      1     1
+
+      #warn 'A: '.$last->ident_string.' | '.$event->ident_string."\n";
+
+      $self->remove_StableIdEvent($last);
+      $self->remove_StableIdEvent($event);
+
+      $event->old_ArchiveStableId($lo);
+
+      $self->add_StableIdEvents($event);
+
+    } elsif ($ln and $eo and $ln->db_name ne $eo->db_name
+        and $ln->stable_id eq $eo->stable_id and $ln->version eq $eo->version) {
+        
+      # try to brigde gaps
+
+      if ($en) {
+        
+        # o--o  o--o  ->  o--o-----o
+        # 1  2  2  2      1  2     2
+        #
+        #    o  o--o  ->  o-----o
+        #    1  1  1      1     1
+        
+        #warn 'X: '.$last->ident_string.' | '.$event->ident_string."\n";
+
+        $self->remove_StableIdEvent($event);
+        $event->old_ArchiveStableId($ln);
+        $self->add_StableIdEvents($event);
+
+      } elsif ($lo) {
+        
+        # there's a deletion event, deal with it differently
+
+        if ($lo->version eq $ln->version) {
+        
+          # o--o  o  ->  o-----o
+          # 1  1  1      1     1
+          
+          #warn 'Y: '.$last->ident_string.' | '.$event->ident_string."\n";
+
+          $self->remove_StableIdEvent($last);
+          $last->new_ArchiveStableId($eo);
+          $self->add_StableIdEvents($last);
+
+        } else {
+
+          # o--o  o  ->  o--o--o
+          # 1  2  2      1  2  2
+          
+          #warn 'Z: '.$last->ident_string.' | '.$event->ident_string."\n";
+
+          $self->remove_StableIdEvent($event);
+          $event->old_ArchiveStableId($ln);
+          $event->new_ArchiveStableId($eo);
+          $self->add_StableIdEvents($event);
+
+        }
+
+      } else {
+
+        # creation followed by deletion in next mapping
+        #
+        # o  o  ->  o--o
+        # 1  1      1  1
+
+        #warn 'Q: '.$last->ident_string.' | '.$event->ident_string."\n";
+
+        $self->remove_StableIdEvent($last);
+        $self->remove_StableIdEvent($event);
+        $event->old_ArchiveStableId($ln);
+        $event->new_ArchiveStableId($eo);
+        $self->add_StableIdEvents($event);
+
+      }
+
+    } else {
+      #warn 'C: '.$last->ident_string.' | '.$event->ident_string."\n";
+    }
+  
+    $last = $event;
+  }
+  
+  # now add ArchiveStableIds of the remaining events to the tree
+  $self->add_ArchiveStableIds_for_events;
+}
+
+
+=head2 reset_tree
+
+  Example     : $history->reset_tree;
+  Description : Resets all pre-calculated tree grid data. Mostly used internally
+                by methods that modify the tree.
+  Return type : none
+  Exceptions  : none
+  Caller      : internal
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub reset_tree {
+  my $self = shift;
+  $self->{'sorted_tree'} = undef;
+}
+
+
+=head2 current_dbname
+
+  Arg[1]      : (optional) String $dbname - the dbname to set
+  Example     : my $dbname = $history->current_dbname;
+  Description : Getter/setter for current dbname.
+  Return type : String
+  Exceptions  : none
+  Caller      : general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub current_dbname {
+  my $self = shift;
+  $self->{'current_dbname'} = shift if (@_);
+  return $self->{'current_dbname'};
+}
+
+
+=head2 current_release
+
+  Arg[1]      : (optional) Int $release - the release to set
+  Example     : my $release = $history->current_release;
+  Description : Getter/setter for current release.
+  Return type : Int
+  Exceptions  : none
+  Caller      : general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub current_release {
+  my $self = shift;
+  $self->{'current_release'} = shift if (@_);
+  return $self->{'current_release'};
+}
+
+
+=head2 current_assembly
+
+  Arg[1]      : (optional) String $assembly - the assembly to set
+  Example     : my $assembly = $history->current_assembly;
+  Description : Getter/setter for current assembly.
+  Return type : String
+  Exceptions  : none
+  Caller      : general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub current_assembly {
+  my $self = shift;
+  $self->{'current_assembly'} = shift if (@_);
+  return $self->{'current_assembly'};
+}
+
+
+=head2 is_incomplete
+
+  Arg[1]      : (optional) Boolean $incomplete 
+  Example     : if ($history->is_incomplete) {
+                  print "Returned tree is incomplete due to too many mappings
+                    in the database.\n";
+                }
+  Description : Getter/setter for incomplete flag. This is used by
+                ArchiveStableIdAdaptor to indicate that it finished building
+                the tree prematurely due to too many mappins in the db and can
+                be used by applications to print warning messages.
+  Return type : Boolean
+  Exceptions  : none
+  Caller      : general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub is_incomplete {
+  my $self = shift;
+  $self->{'incomplete'} = shift if (@_);
+  return $self->{'incomplete'};
+}
+
+
+1;
+