view variant_effect_predictor/Bio/EnsEMBL/StableIdHistoryTree.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
line wrap: on
line source

=head1 LICENSE

  Copyright (c) 1999-2012 The European Bioinformatics Institute and
  Genome Research Limited.  All rights reserved.

  This software is distributed under a modified Apache license.
  For license details, please see

    http://www.ensembl.org/info/about/code_licence.html

=head1 CONTACT

  Please email comments or questions to the public Ensembl
  developers list at <dev@ensembl.org>.

  Questions may also be sent to the Ensembl help desk at
  <helpdesk@ensembl.org>.

=cut

=head1 NAME

Bio::EnsEMBL::StableIdHistoryTree - object representing a stable ID history tree

=head1 SYNOPSIS

  my $reg = "Bio::EnsEMBL::Registry";
  my $archiveStableIdAdaptor =
    $reg->get_adaptor( 'human', 'core', 'ArchiveStableId' );

  my $stable_id = 'ENSG00000068990';
  my $history =
    $archiveStableIdAdaptor->fetch_history_tree_by_stable_id('ENSG01');

  print "Unique stable IDs in this tree:\n";
  print join( ", ", @{ $history->get_unique_stable_ids } ), "\n";

  print "\nReleases in this tree:\n";
  print join( ", ", @{ $history->get_release_display_names } ), "\n";

  print "\nCoordinates of nodes in the tree:\n\n";
  foreach my $a ( @{ $history->get_all_ArchiveStableIds } ) {
    print "  Stable ID: " . $a->stable_id . "." . $a->version . "\n";
    print "  Release: "
      . $a->release . " ("
      . $a->assembly . ", "
      . $a->db_name . ")\n";
    print "  coords: "
      . join( ', ', @{ $history->coords_by_ArchiveStableId($a) } )
      . "\n\n";
  }

=head1 DESCRIPTION

This object represents a stable ID history tree graph.

The graph is implemented as a collection of nodes (ArchiveStableId
objects) and links (StableIdEvent objects) which have positions
on an (x,y) grid. The x axis is used for releases, the y axis for
stable_ids. The idea is to create a plot similar to this (the numbers
shown on the nodes are the stable ID versions):

  ENSG001   1-------------- 2--
                                \
  ENSG003                         1-----1
                                /
  ENSG002   1-------2----------

           38      39      40    41    42

The grid coordinates of the ArchiveStableId objects in this example
would be (note that coordinates are zero-based):

  ENSG001.1               (0, 0)
  ENSG001.2               (2, 0)
  ENSG003.1 (release 41)  (3, 1) 
  ENSG003.1 (release 42)  (4, 1) 
  ENSG002.1               (0, 2)
  ENSG002.2               (1, 2)

The tree will only contain those nodes which had a change in the stable
ID version. Therefore, in the above example, in release 39 ENSG001 was
present and had version 1 (but will not be drawn there, to unclutter the
output).

The grid positions will be calculated by the API and will try to
untangle the tree (i.e. try to avoid overlapping lines).

=head1 METHODS

  new
  add_ArchiveStableIds
  add_ArchiveStableIds_for_events
  remove_ArchiveStableId
  flush_ArchiveStableIds
  add_StableIdEvents
  remove_StableIdEvent
  flush_StableIdEvents
  get_all_ArchiveStableIds
  get_all_StableIdEvents
  get_latest_StableIdEvent
  get_release_display_names
  get_release_db_names
  get_unique_stable_ids
  optimise_tree
  coords_by_ArchiveStableId
  calculate_coords
  consolidate_tree
  reset_tree
  current_dbname
  current_release
  current_assembly
  is_incomplete

=head1 RELATED MODULES

  Bio::EnsEMBL::ArchiveStableId
  Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor
  Bio::EnsEMBL::StableIdEvent

=cut

package Bio::EnsEMBL::StableIdHistoryTree;

use strict;
use warnings;
no warnings 'uninitialized';

use Bio::EnsEMBL::Utils::Argument qw(rearrange);
use Bio::EnsEMBL::Utils::Exception qw(throw warning);


=head2 new

  Arg [CURRENT_DBNAME]   : (optional) name of current db
  Arg [CURRENT_RELEASE]  : (optional) current release number
  Arg [CURRENT_ASSEMBLY] : (optional) current assembly name
  Example     : my $history = Bio::EnsEMBL::StableIdHistoryTree->new;
  Description : object constructor
  Return type : Bio::EnsEMBL::StableIdHistoryTree
  Exceptions  : none
  Caller      : general
  Status      : At Risk
              : under development

=cut

sub new {
  my $caller = shift;
  my $class = ref($caller) || $caller;
  
  my $self = {};
  bless $self, $class;

  my ($current_dbname, $current_release, $current_assembly) =
    rearrange([qw( CURRENT_DBNAME CURRENT_RELEASE CURRENT_ASSEMBLY )], @_ );

  # initialise
  $self->{'current_dbname'} = $current_dbname;
  $self->{'current_release'} = $current_release;
  $self->{'current_assembly'} = $current_assembly;
  
  return $self;
}


=head2 add_ArchiveStableIds

  Arg[1..n]   : Bio::EnsEMBL::ArchiveStableId's @archive_ids
                The ArchiveStableIds to add to the the history tree
  Example     : my $archive_id = $archiveStableIdAdaptor->fetch_by_stable_id(
                  'ENSG00024808');
                $history->add_ArchiveStableId($archive_id);
  Description : Adds ArchiveStableIds (nodes) to the history tree. No
                calculation of grid coordinates is done at this point, you need
                to initiate this manually with calculate_coords().
                ArchiveStableIds are only added once for each release (to avoid
                duplicates).
  Return type : none
  Exceptions  : thrown on invalid or missing argument
  Caller      : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::fetch_history_by_stable_id, general
  Status      : At Risk
              : under development

=cut

sub add_ArchiveStableIds {
  my ($self, @archive_ids) = @_;

  throw("You must provide one or more Bio::EnsEMBL::ArchiveStableIds to add.")
    unless (@archive_ids);

  foreach my $archive_id (@archive_ids) {
    throw("Bio::EnsEMBL::ArchiveStableId object expected.")
      unless (ref($archive_id) &&
              $archive_id->isa('Bio::EnsEMBL::ArchiveStableId'));

    $self->{'nodes'}->{$self->_node_id($archive_id)} = $archive_id;
  }
}


=head2 add_ArchiveStableIds_for_events 

  Example     : my $history = Bio::EnsEMBL::StableIdHistoryTree->new;
                $history->add_StableIdEvents($event1, $event2);
                $history->add_ArchiveStableIds_for_events;
  Description : Convenience method that adds all ArchiveStableIds for all
                StableIdEvents attached to this object to the tree.
  Return type : none
  Exceptions  : none
  Caller      : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::fetch_history_by_stable_id, general
  Status      : At Risk
              : under development

=cut

sub add_ArchiveStableIds_for_events {
  my $self = shift;

  foreach my $event (@{ $self->get_all_StableIdEvents }) {
    if ($event->old_ArchiveStableId) {
      $self->add_ArchiveStableIds($event->old_ArchiveStableId);
    }
    if ($event->new_ArchiveStableId) {
      $self->add_ArchiveStableIds($event->new_ArchiveStableId);
    }
  }
}


=head2 remove_ArchiveStableId

  Arg[1]      : Bio::EnsEMBL::ArchiveStableId $archive_id
                the ArchiveStableId to remove from the tree
  Example     : $history->remove_ArchiveStableId($archive_id);
  Description : Removes an ArchiveStableId from the tree.
  Return type : none
  Exceptions  : thrown on missing or invalid argument
  Caller      : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::fetch_history_by_stable_id, general
  Status      : At Risk
              : under development

=cut

sub remove_ArchiveStableId {
  my ($self, $archive_id) = @_;
    
  throw("Bio::EnsEMBL::ArchiveStableId object expected.")
    unless ($archive_id && ref($archive_id) &&
            $archive_id->isa('Bio::EnsEMBL::ArchiveStableId'));

  my %nodes = %{ $self->{'nodes'} };
  delete $nodes{$self->_node_id($archive_id)};
  $self->{'nodes'} = \%nodes;
}


=head2 flush_ArchiveStableIds

  Example     : $history->flush_ArchiveStableIds;
  Description : Remove all ArchiveStableIds from the tree.
  Return type : none
  Exceptions  : none
  Caller      : general
  Status      : At Risk
              : under development

=cut

sub flush_ArchiveStableIds {
  my $self = shift;
  $self->{'nodes'} = undef;
}


#
# generate a unique node identifier
# 
sub _node_id {
  my ($self, $archive_id) = @_;
  return $archive_id->stable_id . ':' . $archive_id->db_name;
}


=head2 add_StableIdEvents 

  Arg[1..n]   : Bio::EnsEMBL::StableIdEvent's @events
                The StableIdEvents to add to the the history tree
  Example     : $history->add_StableIdEvents($event);
  Description : Adds StableIdEvents (links) to the history tree. Note that 
                ArchiveStableIds attached to the StableIdEvent aren't added to
                the tree automatically, you'll need to call
                add_ArchiveStableIds_for_events later.
  Return type : none
  Exceptions  : thrown on invalid or missing argument
  Caller      : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::fetch_history_by_stable_id, general
  Status      : At Risk
              : under development

=cut

sub add_StableIdEvents {
  my ($self, @events) = @_;

  throw("You must provide one or more Bio::EnsEMBL::StableIdsEvents to add.")
    unless (@events);

  foreach my $event (@events) {
    throw("Bio::EnsEMBL::StableIdEvent object expected.")
      unless ($event->isa('Bio::EnsEMBL::StableIdEvent'));

    $self->{'links'}->{$self->_link_id($event)} = $event;
  }
}


=head2 remove_StableIdEvent 

  Arg[1]      : Bio::EnsEMBL::StableIdEvent $event
                the StableIdEvent to remove from the tree
  Example     : $history->remove_StableIdEvent($event);
  Description : Removes a StableIdEvent from the tree.
  Return type : none
  Exceptions  : thrown on missing or invalid arguments
  Caller      : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::fetch_history_by_stable_id, general
  Status      : At Risk
              : under development

=cut

sub remove_StableIdEvent {
  my ($self, $event) = @_;
    
  throw("Bio::EnsEMBL::StableIdEvent object expected.") unless
    ($event && ref($event) && $event->isa('Bio::EnsEMBL::StableIdEvent'));

  my %links = %{ $self->{'links'} };
  delete $links{$self->_link_id($event)};
  $self->{'links'} = \%links;
}


=head2 flush_StableIdEvents 

  Example     : $history->flush_StableIdEvents; 
  Description : Removes all StableIdEvents from the tree.
  Return type : none
  Exceptions  : none
  Caller      : general
  Status      : At Risk
              : under development

=cut

sub flush_StableIdEvents {
  my $self = shift;
  $self->{'links'} = undef; 
}


#
# generate a unique link identifier
# 
sub _link_id {
  my ($self, $event) = @_;

  my ($old_id, $old_db_name, $new_id, $new_db_name);
  if ($event->old_ArchiveStableId) {
    $old_id = $event->old_ArchiveStableId->stable_id;
    $old_db_name = $event->old_ArchiveStableId->db_name;
  }
  if ($event->new_ArchiveStableId) {
    $new_id = $event->new_ArchiveStableId->stable_id;
    $new_db_name = $event->new_ArchiveStableId->db_name;
  }

  return join(':', $old_id, $old_db_name, $new_id, $new_db_name);
}


=head2 get_all_ArchiveStableIds 

  Example     : foreach my $arch_id (@{ $history->get_all_ArchiveStableIds }) {
                  print $arch_id->stable_id, '.', $arch_id->version, "\n";
                }
  Description : Gets all ArchiveStableIds (nodes) in this tree.
  Return type : Arrayref of Bio::EnsEMBL::ArchiveStableId objects
  Exceptions  : none
  Caller      : general
  Status      : At Risk
              : under development

=cut

sub get_all_ArchiveStableIds {
  my $self = shift;
  return [ values %{ $self->{'nodes'} } ]; 
}


=head2 get_all_current_ArchiveStableIds 

  Example     : foreach my $arch_id (@{ $history->get_all_current_ArchiveStableIds }) {
                  print $arch_id->stable_id, '.', $arch_id->version, "\n";
                }
  Description : Convenience method to get all current ArchiveStableIds in this
                tree.
                
                Note that no lazy loading of "current" status is done at that
                stage; as long as you retrieve your StableIdHistoryTree object
                from ArchiveStableIdAdaptor, you'll get the right answer. In
                other use cases, if you want to make sure you really get all
                current stable IDs, loop over the result of
                get_all_ArchiveStableIds() and call
                ArchiveStableId->current_version() on all of them.
  Return type : Arrayref of Bio::EnsEMBL::ArchiveStableId objects
  Exceptions  : none
  Caller      : general
  Status      : At Risk
              : under development

=cut

sub get_all_current_ArchiveStableIds {
  my $self = shift;

  my @current = ();

  foreach my $arch_id (@{ $self->get_all_ArchiveStableIds }) {
    push @current, $arch_id if ($arch_id->is_current);
  }

  return \@current;
}


=head2 get_all_StableIdEvents 

  Example     : foreach my $event (@{ $history->get_all_StableIdsEvents }) {
                  print "Old stable ID: ", 
                    ($event->get_attribute('old', 'stable_id') or 'none'), "\n";
                  print "New stable ID: ", 
                    ($event->get_attribute('new', 'stable_id') or 'none'), "\n";
                  print "Mapping score: ", $event->score, "\n";
                }
  Description : Gets all StableIdsEvents (links) in this tree.
  Return type : Arrayref of Bio::EnsEMBL::StableIdEvent objects
  Exceptions  : none
  Caller      : general
  Status      : At Risk
              : under development

=cut

sub get_all_StableIdEvents {
  my $self = shift;
  return [ values %{ $self->{'links'} } ]; 
}


=head2 get_latest_StableIdEvent

  Arg[1]      : Bio::EnsEMBL::ArchiveStableId $arch_id - the stable ID to get
                the latest Event for
  Example     : my $arch_id = Bio::EnsEMBL::ArchiveStableId->new(
                  -stable_id => 'ENSG00001'
                );
                my $event = $history->get_latest_Event($arch_id);
  Description : Returns the latest StableIdEvent found in the tree where a given
                stable ID is the new stable ID. If more than one is found (e.g.
                in a merge scenario in the latest mapping), preference is given
                to self-events.
  Return type : Bio::EnsEMBL::StableIdEvent
  Exceptions  : thrown on missing or wrong argument
  Caller      : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::add_all_current_to_history, general
  Status      : At Risk
              : under development

=cut

sub get_latest_StableIdEvent {
  my $self = shift;
  my $arch_id = shift;
  
  unless ($arch_id and $arch_id->isa('Bio::EnsEMBL::ArchiveStableId')) {
    throw("Need a Bio::EnsEMBL::ArchiveStableId.");
  }

  my @all_events = @{ $self->get_all_StableIdEvents };
  my @self_events = ();

  while (my $event = shift(@all_events)) {
    if ($event->new_ArchiveStableId and
        $event->new_ArchiveStableId->stable_id eq $arch_id->stable_id) {
      push @self_events, $event;
    }
  }

  my @sorted = sort { $b->new_ArchiveStableId->release <=>
                      $a->new_ArchiveStableId->release } @self_events;
  
  # give priority to self events
  my $latest;
  while ($latest = shift @sorted) {
    last if (($latest->old_ArchiveStableId and
              $latest->old_ArchiveStableId->stable_id eq $arch_id->stable_id)
             or !$latest->old_ArchiveStableId);
  }

  return $latest;
}


=head2 get_release_display_names

  Example     : print "Unique release display_names in this tree:\n"
                foreach my $name (@{ $history->get_release_display_names }) {
                  print "  $name\n";
                }
  Description : Returns a chronologically sorted list of unique release
                display_names in this tree.

                This method can be used to determine the number of columns when
                plotting the history tree.
  Return type : Arrayref of strings.
  Exceptions  : none
  Caller      : general
  Status      : At Risk
              : under development

=cut

sub get_release_display_names {
  my $self = shift;
  
  my @display_names = map { $_->[1] } @{ $self->_sort_releases };

  return \@display_names;
}


=head2 get_release_db_names

  Example     : print "Unique release db_names in this tree:\n"
                foreach my $name (@{ $history->get_release_db_names }) {
                  print "  $name\n";
                }
  Description : Returns a chronologically sorted list of unique release
                db_names in this tree.
  Return type : Arrayref of strings.
  Exceptions  : none
  Caller      : general
  Status      : At Risk
              : under development

=cut

sub get_release_db_names {
  my $self = shift;
  
  my @db_names = map { $_->[0] } @{ $self->_sort_releases };

  return \@db_names;
}


#
# Create a chronologically sorted list of releases.
#
# Return type : Arrayref of arrayrefs (db_name, release)
#
sub _sort_releases {
  my $self = shift;

  unless ($self->{'sorted_tree'}->{'releases'}) {

    my %unique = ();

    foreach my $archive_id (@{ $self->get_all_ArchiveStableIds }) {
      $unique{join(':', $archive_id->db_name, $archive_id->release)} = 1;
    }

    # sort releases by release number, then db_name; this should get them into
    # chronological order
    my @releases = sort { $a->[1] <=> $b->[1] || $a->[0] cmp $b->[0] }
      map { [ split(/:/, $_) ] } keys(%unique);

    $self->{'sorted_tree'}->{'releases'} = \@releases;
  
  }

  return $self->{'sorted_tree'}->{'releases'};
}


=head2 get_unique_stable_ids 

  Example     : print "Unique stable IDs in this tree:\n"
                foreach my $id (@{ $history->get_unique_stable_ids }) {
                  print "  $id\n";
                }
  Description : Returns a list of unique stable IDs in this tree. Version is not
                taken into account here. This method can be used to determine
                the number of rows when plotting the history with each stable ID
                occupying one line.

                Sort algorithm will depend on what was chosen when the sorted
                tree was generated. This ranges from a simple alphanumeric sort
                to algorithms trying to untangle the history tree. If no
                pre-sorted data is found, an alphanumerically sorted list will
                be returned by default.
  Return type : Arrayref of strings.
  Exceptions  : none
  Caller      : general
  Status      : At Risk
              : under development

=cut

sub get_unique_stable_ids {
  my $self = shift;
  
  unless ($self->{'sorted_tree'}->{'stable_ids'}) {
    $self->{'sorted_tree'}->{'stable_ids'} = $self->_sort_stable_ids;
  }
  
  return $self->{'sorted_tree'}->{'stable_ids'};
}


#
# Returns a list of stable IDs in this history tree, sorted alphabetically.
# This is the simplest sort function used and doesn't try to untangle the tree.
#
# Return type : Arrayref
#
sub _sort_stable_ids {
  my $self = shift;
  my %unique = map { $_->stable_id => 1 } @{ $self->get_all_ArchiveStableIds };
  return [sort keys %unique];
}


=head2 optimise_tree

  Example     : $history->optimise_tree;
  Description : This method sorts the history tree so that the number of
                overlapping branches is minimised (thus "untangling" the tree).
                
                It uses a clustering algorithm for this which iteratively moves
                the nodes with the largest vertical distance next to each other
                and looking for a mininum in total branch length. This might not
                produce the overall optimum but usually converges on a local
                optimum very quickly.
  Return type : none
  Exceptions  : none
  Caller      : calculate_coords
  Status      : At Risk
              : under development

=cut

sub optimise_tree {
  my $self = shift;

  # get all non-self events
  my @links;
  foreach my $event (@{ $self->get_all_StableIdEvents }) {
    next unless ($event->old_ArchiveStableId and $event->new_ArchiveStableId);
    my $old_id = $event->old_ArchiveStableId->stable_id;
    my $new_id = $event->new_ArchiveStableId->stable_id;
    push @links, [$old_id, $new_id] if ($old_id ne $new_id);
  }

  # get initial list of sorted unique stable IDs and put them into a position
  # lookup hash
  my $i = 0;
  my %pos = map { $_ => $i++ } @{ $self->_sort_stable_ids };

  my $opt_length;
  my $successive_fails = 0;
  my $k = 0;
  my %seen;

  # for debug purposes:
  # find the number of permutations for the given number of stable IDs
  my $fact = $self->_factorial(scalar(keys %pos));

  OPT:
  while ($successive_fails < 100) {

    # sort links by vertical distance
    #warn "sorting\n";
    $self->_sort_links(\@links, \%pos);

    # loop over sorted links
    SORTED:
    foreach my $link (@links) {
      
      #warn "  trying ".join('-', @$link)."\n";

      $k++;
      
      # remember last sort order
      my %last = %pos;
      
      #my $this_order = join(':', sort { $pos{$a} <=> $pos{$b} } keys %pos);
      #warn "    before $this_order\n";

      # try both to move bottom node next to top node's current position and
      # top node next to bottom node's position - one of the methods might give
      # you better results
      DIRECT:
      foreach my $direction (qw(up down)) {

        # move the nodes next to each other
        $self->_move_nodes($link, \%pos, $direction);

        # next if we've seen this sort order before
        my $new_order = join(':', sort { $pos{$a} <=> $pos{$b} } keys %pos);
        #warn "    after ($direction) $new_order\n";
        if ($seen{$new_order}) {
          #warn "      seen\n";
          %pos = %last;
          next DIRECT;
        }
        $seen{$new_order} = 1;

        # calculate total link length for this sort order
        my $total_length = $self->_total_link_length(\@links, \%pos);

        if (!$opt_length or $total_length < $opt_length) {
          #warn "      better ($total_length/$opt_length)\n";
          $opt_length = $total_length;
          $successive_fails = 0;
          next OPT;
        } else {
          #warn "      worse ($total_length/$opt_length)\n";
          %pos = %last;
          $successive_fails++;
        }
      }
      
    }

    last OPT;
    
  }

  #warn "Needed $k tries (of $fact) to find optimal tree.\n";

  my @best = sort { $pos{$a} <=> $pos{$b} } keys %pos;
  $self->{'sorted_tree'}->{'stable_ids'} = \@best;
}


#
# find the number of permutations for a give array size.
# used for debugging code (compare implemented algorithm to looping over all
# possible permutations).
#
sub _factorial {
  my ($self, $n) = @_;
  my $s = 1;
  $s *= $n-- while $n > 0;
  return $s;
}


#
# sort links by vertical distance
#
sub _sort_links {
  my ($self, $links, $pos) = @_;

  my @lookup;

  foreach my $link (@$links) {
    my $dist = $pos->{$link->[0]} - $pos->{$link->[1]};
    $dist = -$dist if ($dist < 0);
    push @lookup, [$dist, $link];
    #warn " $dist ".join(' ', @$link)."\n";
  }

  @$links = map { $_->[1] } sort { $b->[0] <=> $a->[0] } @lookup;
}


#
# make two nodes adjacent by moving the second node next to the first node
# all other node coordinates are adjusted accordingly
#
sub _move_nodes {
  my ($self, $link, $pos, $direction) = @_;

  my $first_pos = $pos->{$link->[0]};
  my $second_pos = $pos->{$link->[1]};

  # swap positions if necessary
  if ($first_pos > $second_pos) {
    my $tmp = $second_pos;
    $second_pos = $first_pos;
    $first_pos = $tmp;
  }
  #warn "      $first_pos:$second_pos\n";

  foreach my $p (keys %$pos) {
    
    my $val = $pos->{$p};
    
    #warn "      $p $val\n";
    if ($direction eq 'up') {
      if ($val > $first_pos and $val < $second_pos) {
        $val++;
      } elsif ($val == $second_pos) {
        $val = $first_pos + 1;
      }
    } else {
      if ($val > $first_pos and $val < $second_pos) {
        $val--;
      } elsif ($val == $first_pos) {
        $val = $second_pos - 1;
      }
    }
    
    #warn "      $p $val\n";
    $pos->{$p} = $val;
    #warn "\n";
  }
}


#
# calculate the total link (vertical distance) length based on this sort order
#
sub _total_link_length {
  my ($self, $links, $pos) = @_;

  my $total_length;

  foreach my $link (@$links) {
    my $length = $pos->{$link->[0]} - $pos->{$link->[1]};
    $length = -$length if ($length < 0);
    $total_length += $length;
  }

  return $total_length;
}


=head2 coords_by_ArchiveStableId 

  Arg[1]      : Bio::EnsEMBL::ArchiveStableId $archive_id
                The ArchiveStableId to get tree grid coordinates for
  Example     : my ($x, $y) =
                  @{ $history->coords_by_ArchiveStableId($archive_id) };
                print $archive_id->stable_id, " coords: $x, $y\n";
  Description : Returns the coordinates of an ArchiveStableId in the history
                tree grid. If the ArchiveStableId isn't found in this tree, an
                empty list is returned.
                
                Coordinates are zero-based (i.e. the top leftmost element in
                the grid has coordinates [0, 0], not [1, 1]). This is to
                facilitate using them to create a matrix as a two-dimensional
                array of arrays.
  Return type : Arrayref (x coordinate, y coordinate)
  Exceptions  : thrown on wrong argument type
  Caller      : general
  Status      : At Risk
              : under development

=cut

sub coords_by_ArchiveStableId {
  my ($self, $archive_id) = @_;

  throw("Bio::EnsEMBL::ArchiveStableId object expected.")
    unless ($archive_id and ref($archive_id) and
      $archive_id->isa('Bio::EnsEMBL::ArchiveStableId'));
  
  return $self->{'sorted_tree'}->{'coords'}->{$self->_node_id($archive_id)}
    || [];
}


=head2 calculate_coords

  Example     : $history->calculate_coords;
  Description : Pre-calculates the grid coordinates of all nodes in the tree.
  Return type : none
  Exceptions  : none
  Caller      : ArchiveStableIdAdaptor::fetch_history_by_stable_id
  Status      : At Risk
              : under development

=cut

sub calculate_coords {
  my $self = shift;

  # reset any previous tree cordinate calculations
  $self->reset_tree;

  # the "master" information for the sorted tree is stored as the sorted lists
  # of releases (x) and stable IDs (y). Sort them now.
  my $db_names = $self->get_release_db_names;

  # untangle tree by sorting stable IDs appropriately
  $self->optimise_tree;
  my $stable_ids = $self->get_unique_stable_ids;
  
  # for performance reasons, additionally store coordinates in a lookup hash
  foreach my $archive_id (@{ $self->get_all_ArchiveStableIds }) {
  
    # coordinates are positions in the sorted lists
    my $x = $self->_index_of($archive_id->db_name, $db_names);
    my $y = $self->_index_of($archive_id->stable_id, $stable_ids);
  
    $self->{'sorted_tree'}->{'coords'}->{$self->_node_id($archive_id)} =
      [ $x, $y ];
  }
}

#
# Description : Returns the index of an element in an array
# Example     : my @array = (a, b, c);
#               my $i = _index_of('b', \@array); # will return 1
# Return type : Int (or undef if element is not found in array)
#
sub _index_of {
  my ($self, $element, $arrayref) = @_;

  throw("Expecting arrayref argument.") unless (ref($arrayref) eq 'ARRAY');

  my @array = @$arrayref;

  while (my $e = pop(@array)) {
    return scalar(@array) if ($e eq $element);
  }

  return undef;
}


=head2 consolidate_tree

  Example     : $history->consolidate_tree;
  Description : Consolidate the history tree. This means removing nodes where
                there wasn't a change and bridging gaps in the history. The end
                result will be a sparse tree which only contains the necessary
                information.
  Return type : none
  Exceptions  : none
  Caller      : ArchiveStableIdAdaptor->fetch_history_tree_by_stable_id
  Status      : At Risk
              : under development

=cut

sub consolidate_tree {
  my $self = shift;

  #
  # get all self-events and creations/deletions and sort them (by stable ID and
  # chronologically)
  #
  my @event_lookup;
  
  foreach my $event (@{ $self->get_all_StableIdEvents }) {

    my $old_id = $event->old_ArchiveStableId;
    my $new_id = $event->new_ArchiveStableId;

    if (!$old_id or !$new_id or ($old_id->stable_id eq $new_id->stable_id)) {
      if ($old_id) {
        push @event_lookup, [$old_id->stable_id, $old_id->release, 
          $old_id->db_name, $event];
      } else {
        push @event_lookup, [$new_id->stable_id, $new_id->release - 1,
          $new_id->db_name, $event];
      }
    }
  }

  my @self_events = map { $_->[3] }
    sort { $a->[0] cmp $b->[0] || $a->[1] <=> $b->[1] || $a->[2] cmp $b->[2] }
      @event_lookup;

  #
  # consolidate tree
  #
  my $last = shift(@self_events);

  while (my $event = shift(@self_events)) {

    my $lo = $last->old_ArchiveStableId;
    my $ln = $last->new_ArchiveStableId;
    my $eo = $event->old_ArchiveStableId;
    my $en = $event->new_ArchiveStableId;

    if ($lo and $eo and $en and $lo->stable_id eq $eo->stable_id
        and $lo->version eq $eo->version) {

      # this removes redundant nodes and connects the remaining nodes:
      #
      # o--o--o  ->  o-----o
      # 1  1  1      1     1

      #warn 'A: '.$last->ident_string.' | '.$event->ident_string."\n";

      $self->remove_StableIdEvent($last);
      $self->remove_StableIdEvent($event);

      $event->old_ArchiveStableId($lo);

      $self->add_StableIdEvents($event);

    } elsif ($ln and $eo and $ln->db_name ne $eo->db_name
        and $ln->stable_id eq $eo->stable_id and $ln->version eq $eo->version) {
        
      # try to brigde gaps

      if ($en) {
        
        # o--o  o--o  ->  o--o-----o
        # 1  2  2  2      1  2     2
        #
        #    o  o--o  ->  o-----o
        #    1  1  1      1     1
        
        #warn 'X: '.$last->ident_string.' | '.$event->ident_string."\n";

        $self->remove_StableIdEvent($event);
        $event->old_ArchiveStableId($ln);
        $self->add_StableIdEvents($event);

      } elsif ($lo) {
        
        # there's a deletion event, deal with it differently

        if ($lo->version eq $ln->version) {
        
          # o--o  o  ->  o-----o
          # 1  1  1      1     1
          
          #warn 'Y: '.$last->ident_string.' | '.$event->ident_string."\n";

          $self->remove_StableIdEvent($last);
          $last->new_ArchiveStableId($eo);
          $self->add_StableIdEvents($last);

        } else {

          # o--o  o  ->  o--o--o
          # 1  2  2      1  2  2
          
          #warn 'Z: '.$last->ident_string.' | '.$event->ident_string."\n";

          $self->remove_StableIdEvent($event);
          $event->old_ArchiveStableId($ln);
          $event->new_ArchiveStableId($eo);
          $self->add_StableIdEvents($event);

        }

      } else {

        # creation followed by deletion in next mapping
        #
        # o  o  ->  o--o
        # 1  1      1  1

        #warn 'Q: '.$last->ident_string.' | '.$event->ident_string."\n";

        $self->remove_StableIdEvent($last);
        $self->remove_StableIdEvent($event);
        $event->old_ArchiveStableId($ln);
        $event->new_ArchiveStableId($eo);
        $self->add_StableIdEvents($event);

      }

    } else {
      #warn 'C: '.$last->ident_string.' | '.$event->ident_string."\n";
    }
  
    $last = $event;
  }
  
  # now add ArchiveStableIds of the remaining events to the tree
  $self->add_ArchiveStableIds_for_events;
}


=head2 reset_tree

  Example     : $history->reset_tree;
  Description : Resets all pre-calculated tree grid data. Mostly used internally
                by methods that modify the tree.
  Return type : none
  Exceptions  : none
  Caller      : internal
  Status      : At Risk
              : under development

=cut

sub reset_tree {
  my $self = shift;
  $self->{'sorted_tree'} = undef;
}


=head2 current_dbname

  Arg[1]      : (optional) String $dbname - the dbname to set
  Example     : my $dbname = $history->current_dbname;
  Description : Getter/setter for current dbname.
  Return type : String
  Exceptions  : none
  Caller      : general
  Status      : At Risk
              : under development

=cut

sub current_dbname {
  my $self = shift;
  $self->{'current_dbname'} = shift if (@_);
  return $self->{'current_dbname'};
}


=head2 current_release

  Arg[1]      : (optional) Int $release - the release to set
  Example     : my $release = $history->current_release;
  Description : Getter/setter for current release.
  Return type : Int
  Exceptions  : none
  Caller      : general
  Status      : At Risk
              : under development

=cut

sub current_release {
  my $self = shift;
  $self->{'current_release'} = shift if (@_);
  return $self->{'current_release'};
}


=head2 current_assembly

  Arg[1]      : (optional) String $assembly - the assembly to set
  Example     : my $assembly = $history->current_assembly;
  Description : Getter/setter for current assembly.
  Return type : String
  Exceptions  : none
  Caller      : general
  Status      : At Risk
              : under development

=cut

sub current_assembly {
  my $self = shift;
  $self->{'current_assembly'} = shift if (@_);
  return $self->{'current_assembly'};
}


=head2 is_incomplete

  Arg[1]      : (optional) Boolean $incomplete 
  Example     : if ($history->is_incomplete) {
                  print "Returned tree is incomplete due to too many mappings
                    in the database.\n";
                }
  Description : Getter/setter for incomplete flag. This is used by
                ArchiveStableIdAdaptor to indicate that it finished building
                the tree prematurely due to too many mappins in the db and can
                be used by applications to print warning messages.
  Return type : Boolean
  Exceptions  : none
  Caller      : general
  Status      : At Risk
              : under development

=cut

sub is_incomplete {
  my $self = shift;
  $self->{'incomplete'} = shift if (@_);
  return $self->{'incomplete'};
}


1;