Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/EnsEMBL/StableIdHistoryTree.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/EnsEMBL/StableIdHistoryTree.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,1202 @@ +=head1 LICENSE + + Copyright (c) 1999-2012 The European Bioinformatics Institute and + Genome Research Limited. All rights reserved. + + This software is distributed under a modified Apache license. + For license details, please see + + http://www.ensembl.org/info/about/code_licence.html + +=head1 CONTACT + + Please email comments or questions to the public Ensembl + developers list at <dev@ensembl.org>. + + Questions may also be sent to the Ensembl help desk at + <helpdesk@ensembl.org>. + +=cut + +=head1 NAME + +Bio::EnsEMBL::StableIdHistoryTree - object representing a stable ID history tree + +=head1 SYNOPSIS + + my $reg = "Bio::EnsEMBL::Registry"; + my $archiveStableIdAdaptor = + $reg->get_adaptor( 'human', 'core', 'ArchiveStableId' ); + + my $stable_id = 'ENSG00000068990'; + my $history = + $archiveStableIdAdaptor->fetch_history_tree_by_stable_id('ENSG01'); + + print "Unique stable IDs in this tree:\n"; + print join( ", ", @{ $history->get_unique_stable_ids } ), "\n"; + + print "\nReleases in this tree:\n"; + print join( ", ", @{ $history->get_release_display_names } ), "\n"; + + print "\nCoordinates of nodes in the tree:\n\n"; + foreach my $a ( @{ $history->get_all_ArchiveStableIds } ) { + print " Stable ID: " . $a->stable_id . "." . $a->version . "\n"; + print " Release: " + . $a->release . " (" + . $a->assembly . ", " + . $a->db_name . ")\n"; + print " coords: " + . join( ', ', @{ $history->coords_by_ArchiveStableId($a) } ) + . "\n\n"; + } + +=head1 DESCRIPTION + +This object represents a stable ID history tree graph. + +The graph is implemented as a collection of nodes (ArchiveStableId +objects) and links (StableIdEvent objects) which have positions +on an (x,y) grid. The x axis is used for releases, the y axis for +stable_ids. The idea is to create a plot similar to this (the numbers +shown on the nodes are the stable ID versions): + + ENSG001 1-------------- 2-- + \ + ENSG003 1-----1 + / + ENSG002 1-------2---------- + + 38 39 40 41 42 + +The grid coordinates of the ArchiveStableId objects in this example +would be (note that coordinates are zero-based): + + ENSG001.1 (0, 0) + ENSG001.2 (2, 0) + ENSG003.1 (release 41) (3, 1) + ENSG003.1 (release 42) (4, 1) + ENSG002.1 (0, 2) + ENSG002.2 (1, 2) + +The tree will only contain those nodes which had a change in the stable +ID version. Therefore, in the above example, in release 39 ENSG001 was +present and had version 1 (but will not be drawn there, to unclutter the +output). + +The grid positions will be calculated by the API and will try to +untangle the tree (i.e. try to avoid overlapping lines). + +=head1 METHODS + + new + add_ArchiveStableIds + add_ArchiveStableIds_for_events + remove_ArchiveStableId + flush_ArchiveStableIds + add_StableIdEvents + remove_StableIdEvent + flush_StableIdEvents + get_all_ArchiveStableIds + get_all_StableIdEvents + get_latest_StableIdEvent + get_release_display_names + get_release_db_names + get_unique_stable_ids + optimise_tree + coords_by_ArchiveStableId + calculate_coords + consolidate_tree + reset_tree + current_dbname + current_release + current_assembly + is_incomplete + +=head1 RELATED MODULES + + Bio::EnsEMBL::ArchiveStableId + Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor + Bio::EnsEMBL::StableIdEvent + +=cut + +package Bio::EnsEMBL::StableIdHistoryTree; + +use strict; +use warnings; +no warnings 'uninitialized'; + +use Bio::EnsEMBL::Utils::Argument qw(rearrange); +use Bio::EnsEMBL::Utils::Exception qw(throw warning); + + +=head2 new + + Arg [CURRENT_DBNAME] : (optional) name of current db + Arg [CURRENT_RELEASE] : (optional) current release number + Arg [CURRENT_ASSEMBLY] : (optional) current assembly name + Example : my $history = Bio::EnsEMBL::StableIdHistoryTree->new; + Description : object constructor + Return type : Bio::EnsEMBL::StableIdHistoryTree + Exceptions : none + Caller : general + Status : At Risk + : under development + +=cut + +sub new { + my $caller = shift; + my $class = ref($caller) || $caller; + + my $self = {}; + bless $self, $class; + + my ($current_dbname, $current_release, $current_assembly) = + rearrange([qw( CURRENT_DBNAME CURRENT_RELEASE CURRENT_ASSEMBLY )], @_ ); + + # initialise + $self->{'current_dbname'} = $current_dbname; + $self->{'current_release'} = $current_release; + $self->{'current_assembly'} = $current_assembly; + + return $self; +} + + +=head2 add_ArchiveStableIds + + Arg[1..n] : Bio::EnsEMBL::ArchiveStableId's @archive_ids + The ArchiveStableIds to add to the the history tree + Example : my $archive_id = $archiveStableIdAdaptor->fetch_by_stable_id( + 'ENSG00024808'); + $history->add_ArchiveStableId($archive_id); + Description : Adds ArchiveStableIds (nodes) to the history tree. No + calculation of grid coordinates is done at this point, you need + to initiate this manually with calculate_coords(). + ArchiveStableIds are only added once for each release (to avoid + duplicates). + Return type : none + Exceptions : thrown on invalid or missing argument + Caller : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::fetch_history_by_stable_id, general + Status : At Risk + : under development + +=cut + +sub add_ArchiveStableIds { + my ($self, @archive_ids) = @_; + + throw("You must provide one or more Bio::EnsEMBL::ArchiveStableIds to add.") + unless (@archive_ids); + + foreach my $archive_id (@archive_ids) { + throw("Bio::EnsEMBL::ArchiveStableId object expected.") + unless (ref($archive_id) && + $archive_id->isa('Bio::EnsEMBL::ArchiveStableId')); + + $self->{'nodes'}->{$self->_node_id($archive_id)} = $archive_id; + } +} + + +=head2 add_ArchiveStableIds_for_events + + Example : my $history = Bio::EnsEMBL::StableIdHistoryTree->new; + $history->add_StableIdEvents($event1, $event2); + $history->add_ArchiveStableIds_for_events; + Description : Convenience method that adds all ArchiveStableIds for all + StableIdEvents attached to this object to the tree. + Return type : none + Exceptions : none + Caller : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::fetch_history_by_stable_id, general + Status : At Risk + : under development + +=cut + +sub add_ArchiveStableIds_for_events { + my $self = shift; + + foreach my $event (@{ $self->get_all_StableIdEvents }) { + if ($event->old_ArchiveStableId) { + $self->add_ArchiveStableIds($event->old_ArchiveStableId); + } + if ($event->new_ArchiveStableId) { + $self->add_ArchiveStableIds($event->new_ArchiveStableId); + } + } +} + + +=head2 remove_ArchiveStableId + + Arg[1] : Bio::EnsEMBL::ArchiveStableId $archive_id + the ArchiveStableId to remove from the tree + Example : $history->remove_ArchiveStableId($archive_id); + Description : Removes an ArchiveStableId from the tree. + Return type : none + Exceptions : thrown on missing or invalid argument + Caller : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::fetch_history_by_stable_id, general + Status : At Risk + : under development + +=cut + +sub remove_ArchiveStableId { + my ($self, $archive_id) = @_; + + throw("Bio::EnsEMBL::ArchiveStableId object expected.") + unless ($archive_id && ref($archive_id) && + $archive_id->isa('Bio::EnsEMBL::ArchiveStableId')); + + my %nodes = %{ $self->{'nodes'} }; + delete $nodes{$self->_node_id($archive_id)}; + $self->{'nodes'} = \%nodes; +} + + +=head2 flush_ArchiveStableIds + + Example : $history->flush_ArchiveStableIds; + Description : Remove all ArchiveStableIds from the tree. + Return type : none + Exceptions : none + Caller : general + Status : At Risk + : under development + +=cut + +sub flush_ArchiveStableIds { + my $self = shift; + $self->{'nodes'} = undef; +} + + +# +# generate a unique node identifier +# +sub _node_id { + my ($self, $archive_id) = @_; + return $archive_id->stable_id . ':' . $archive_id->db_name; +} + + +=head2 add_StableIdEvents + + Arg[1..n] : Bio::EnsEMBL::StableIdEvent's @events + The StableIdEvents to add to the the history tree + Example : $history->add_StableIdEvents($event); + Description : Adds StableIdEvents (links) to the history tree. Note that + ArchiveStableIds attached to the StableIdEvent aren't added to + the tree automatically, you'll need to call + add_ArchiveStableIds_for_events later. + Return type : none + Exceptions : thrown on invalid or missing argument + Caller : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::fetch_history_by_stable_id, general + Status : At Risk + : under development + +=cut + +sub add_StableIdEvents { + my ($self, @events) = @_; + + throw("You must provide one or more Bio::EnsEMBL::StableIdsEvents to add.") + unless (@events); + + foreach my $event (@events) { + throw("Bio::EnsEMBL::StableIdEvent object expected.") + unless ($event->isa('Bio::EnsEMBL::StableIdEvent')); + + $self->{'links'}->{$self->_link_id($event)} = $event; + } +} + + +=head2 remove_StableIdEvent + + Arg[1] : Bio::EnsEMBL::StableIdEvent $event + the StableIdEvent to remove from the tree + Example : $history->remove_StableIdEvent($event); + Description : Removes a StableIdEvent from the tree. + Return type : none + Exceptions : thrown on missing or invalid arguments + Caller : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::fetch_history_by_stable_id, general + Status : At Risk + : under development + +=cut + +sub remove_StableIdEvent { + my ($self, $event) = @_; + + throw("Bio::EnsEMBL::StableIdEvent object expected.") unless + ($event && ref($event) && $event->isa('Bio::EnsEMBL::StableIdEvent')); + + my %links = %{ $self->{'links'} }; + delete $links{$self->_link_id($event)}; + $self->{'links'} = \%links; +} + + +=head2 flush_StableIdEvents + + Example : $history->flush_StableIdEvents; + Description : Removes all StableIdEvents from the tree. + Return type : none + Exceptions : none + Caller : general + Status : At Risk + : under development + +=cut + +sub flush_StableIdEvents { + my $self = shift; + $self->{'links'} = undef; +} + + +# +# generate a unique link identifier +# +sub _link_id { + my ($self, $event) = @_; + + my ($old_id, $old_db_name, $new_id, $new_db_name); + if ($event->old_ArchiveStableId) { + $old_id = $event->old_ArchiveStableId->stable_id; + $old_db_name = $event->old_ArchiveStableId->db_name; + } + if ($event->new_ArchiveStableId) { + $new_id = $event->new_ArchiveStableId->stable_id; + $new_db_name = $event->new_ArchiveStableId->db_name; + } + + return join(':', $old_id, $old_db_name, $new_id, $new_db_name); +} + + +=head2 get_all_ArchiveStableIds + + Example : foreach my $arch_id (@{ $history->get_all_ArchiveStableIds }) { + print $arch_id->stable_id, '.', $arch_id->version, "\n"; + } + Description : Gets all ArchiveStableIds (nodes) in this tree. + Return type : Arrayref of Bio::EnsEMBL::ArchiveStableId objects + Exceptions : none + Caller : general + Status : At Risk + : under development + +=cut + +sub get_all_ArchiveStableIds { + my $self = shift; + return [ values %{ $self->{'nodes'} } ]; +} + + +=head2 get_all_current_ArchiveStableIds + + Example : foreach my $arch_id (@{ $history->get_all_current_ArchiveStableIds }) { + print $arch_id->stable_id, '.', $arch_id->version, "\n"; + } + Description : Convenience method to get all current ArchiveStableIds in this + tree. + + Note that no lazy loading of "current" status is done at that + stage; as long as you retrieve your StableIdHistoryTree object + from ArchiveStableIdAdaptor, you'll get the right answer. In + other use cases, if you want to make sure you really get all + current stable IDs, loop over the result of + get_all_ArchiveStableIds() and call + ArchiveStableId->current_version() on all of them. + Return type : Arrayref of Bio::EnsEMBL::ArchiveStableId objects + Exceptions : none + Caller : general + Status : At Risk + : under development + +=cut + +sub get_all_current_ArchiveStableIds { + my $self = shift; + + my @current = (); + + foreach my $arch_id (@{ $self->get_all_ArchiveStableIds }) { + push @current, $arch_id if ($arch_id->is_current); + } + + return \@current; +} + + +=head2 get_all_StableIdEvents + + Example : foreach my $event (@{ $history->get_all_StableIdsEvents }) { + print "Old stable ID: ", + ($event->get_attribute('old', 'stable_id') or 'none'), "\n"; + print "New stable ID: ", + ($event->get_attribute('new', 'stable_id') or 'none'), "\n"; + print "Mapping score: ", $event->score, "\n"; + } + Description : Gets all StableIdsEvents (links) in this tree. + Return type : Arrayref of Bio::EnsEMBL::StableIdEvent objects + Exceptions : none + Caller : general + Status : At Risk + : under development + +=cut + +sub get_all_StableIdEvents { + my $self = shift; + return [ values %{ $self->{'links'} } ]; +} + + +=head2 get_latest_StableIdEvent + + Arg[1] : Bio::EnsEMBL::ArchiveStableId $arch_id - the stable ID to get + the latest Event for + Example : my $arch_id = Bio::EnsEMBL::ArchiveStableId->new( + -stable_id => 'ENSG00001' + ); + my $event = $history->get_latest_Event($arch_id); + Description : Returns the latest StableIdEvent found in the tree where a given + stable ID is the new stable ID. If more than one is found (e.g. + in a merge scenario in the latest mapping), preference is given + to self-events. + Return type : Bio::EnsEMBL::StableIdEvent + Exceptions : thrown on missing or wrong argument + Caller : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::add_all_current_to_history, general + Status : At Risk + : under development + +=cut + +sub get_latest_StableIdEvent { + my $self = shift; + my $arch_id = shift; + + unless ($arch_id and $arch_id->isa('Bio::EnsEMBL::ArchiveStableId')) { + throw("Need a Bio::EnsEMBL::ArchiveStableId."); + } + + my @all_events = @{ $self->get_all_StableIdEvents }; + my @self_events = (); + + while (my $event = shift(@all_events)) { + if ($event->new_ArchiveStableId and + $event->new_ArchiveStableId->stable_id eq $arch_id->stable_id) { + push @self_events, $event; + } + } + + my @sorted = sort { $b->new_ArchiveStableId->release <=> + $a->new_ArchiveStableId->release } @self_events; + + # give priority to self events + my $latest; + while ($latest = shift @sorted) { + last if (($latest->old_ArchiveStableId and + $latest->old_ArchiveStableId->stable_id eq $arch_id->stable_id) + or !$latest->old_ArchiveStableId); + } + + return $latest; +} + + +=head2 get_release_display_names + + Example : print "Unique release display_names in this tree:\n" + foreach my $name (@{ $history->get_release_display_names }) { + print " $name\n"; + } + Description : Returns a chronologically sorted list of unique release + display_names in this tree. + + This method can be used to determine the number of columns when + plotting the history tree. + Return type : Arrayref of strings. + Exceptions : none + Caller : general + Status : At Risk + : under development + +=cut + +sub get_release_display_names { + my $self = shift; + + my @display_names = map { $_->[1] } @{ $self->_sort_releases }; + + return \@display_names; +} + + +=head2 get_release_db_names + + Example : print "Unique release db_names in this tree:\n" + foreach my $name (@{ $history->get_release_db_names }) { + print " $name\n"; + } + Description : Returns a chronologically sorted list of unique release + db_names in this tree. + Return type : Arrayref of strings. + Exceptions : none + Caller : general + Status : At Risk + : under development + +=cut + +sub get_release_db_names { + my $self = shift; + + my @db_names = map { $_->[0] } @{ $self->_sort_releases }; + + return \@db_names; +} + + +# +# Create a chronologically sorted list of releases. +# +# Return type : Arrayref of arrayrefs (db_name, release) +# +sub _sort_releases { + my $self = shift; + + unless ($self->{'sorted_tree'}->{'releases'}) { + + my %unique = (); + + foreach my $archive_id (@{ $self->get_all_ArchiveStableIds }) { + $unique{join(':', $archive_id->db_name, $archive_id->release)} = 1; + } + + # sort releases by release number, then db_name; this should get them into + # chronological order + my @releases = sort { $a->[1] <=> $b->[1] || $a->[0] cmp $b->[0] } + map { [ split(/:/, $_) ] } keys(%unique); + + $self->{'sorted_tree'}->{'releases'} = \@releases; + + } + + return $self->{'sorted_tree'}->{'releases'}; +} + + +=head2 get_unique_stable_ids + + Example : print "Unique stable IDs in this tree:\n" + foreach my $id (@{ $history->get_unique_stable_ids }) { + print " $id\n"; + } + Description : Returns a list of unique stable IDs in this tree. Version is not + taken into account here. This method can be used to determine + the number of rows when plotting the history with each stable ID + occupying one line. + + Sort algorithm will depend on what was chosen when the sorted + tree was generated. This ranges from a simple alphanumeric sort + to algorithms trying to untangle the history tree. If no + pre-sorted data is found, an alphanumerically sorted list will + be returned by default. + Return type : Arrayref of strings. + Exceptions : none + Caller : general + Status : At Risk + : under development + +=cut + +sub get_unique_stable_ids { + my $self = shift; + + unless ($self->{'sorted_tree'}->{'stable_ids'}) { + $self->{'sorted_tree'}->{'stable_ids'} = $self->_sort_stable_ids; + } + + return $self->{'sorted_tree'}->{'stable_ids'}; +} + + +# +# Returns a list of stable IDs in this history tree, sorted alphabetically. +# This is the simplest sort function used and doesn't try to untangle the tree. +# +# Return type : Arrayref +# +sub _sort_stable_ids { + my $self = shift; + my %unique = map { $_->stable_id => 1 } @{ $self->get_all_ArchiveStableIds }; + return [sort keys %unique]; +} + + +=head2 optimise_tree + + Example : $history->optimise_tree; + Description : This method sorts the history tree so that the number of + overlapping branches is minimised (thus "untangling" the tree). + + It uses a clustering algorithm for this which iteratively moves + the nodes with the largest vertical distance next to each other + and looking for a mininum in total branch length. This might not + produce the overall optimum but usually converges on a local + optimum very quickly. + Return type : none + Exceptions : none + Caller : calculate_coords + Status : At Risk + : under development + +=cut + +sub optimise_tree { + my $self = shift; + + # get all non-self events + my @links; + foreach my $event (@{ $self->get_all_StableIdEvents }) { + next unless ($event->old_ArchiveStableId and $event->new_ArchiveStableId); + my $old_id = $event->old_ArchiveStableId->stable_id; + my $new_id = $event->new_ArchiveStableId->stable_id; + push @links, [$old_id, $new_id] if ($old_id ne $new_id); + } + + # get initial list of sorted unique stable IDs and put them into a position + # lookup hash + my $i = 0; + my %pos = map { $_ => $i++ } @{ $self->_sort_stable_ids }; + + my $opt_length; + my $successive_fails = 0; + my $k = 0; + my %seen; + + # for debug purposes: + # find the number of permutations for the given number of stable IDs + my $fact = $self->_factorial(scalar(keys %pos)); + + OPT: + while ($successive_fails < 100) { + + # sort links by vertical distance + #warn "sorting\n"; + $self->_sort_links(\@links, \%pos); + + # loop over sorted links + SORTED: + foreach my $link (@links) { + + #warn " trying ".join('-', @$link)."\n"; + + $k++; + + # remember last sort order + my %last = %pos; + + #my $this_order = join(':', sort { $pos{$a} <=> $pos{$b} } keys %pos); + #warn " before $this_order\n"; + + # try both to move bottom node next to top node's current position and + # top node next to bottom node's position - one of the methods might give + # you better results + DIRECT: + foreach my $direction (qw(up down)) { + + # move the nodes next to each other + $self->_move_nodes($link, \%pos, $direction); + + # next if we've seen this sort order before + my $new_order = join(':', sort { $pos{$a} <=> $pos{$b} } keys %pos); + #warn " after ($direction) $new_order\n"; + if ($seen{$new_order}) { + #warn " seen\n"; + %pos = %last; + next DIRECT; + } + $seen{$new_order} = 1; + + # calculate total link length for this sort order + my $total_length = $self->_total_link_length(\@links, \%pos); + + if (!$opt_length or $total_length < $opt_length) { + #warn " better ($total_length/$opt_length)\n"; + $opt_length = $total_length; + $successive_fails = 0; + next OPT; + } else { + #warn " worse ($total_length/$opt_length)\n"; + %pos = %last; + $successive_fails++; + } + } + + } + + last OPT; + + } + + #warn "Needed $k tries (of $fact) to find optimal tree.\n"; + + my @best = sort { $pos{$a} <=> $pos{$b} } keys %pos; + $self->{'sorted_tree'}->{'stable_ids'} = \@best; +} + + +# +# find the number of permutations for a give array size. +# used for debugging code (compare implemented algorithm to looping over all +# possible permutations). +# +sub _factorial { + my ($self, $n) = @_; + my $s = 1; + $s *= $n-- while $n > 0; + return $s; +} + + +# +# sort links by vertical distance +# +sub _sort_links { + my ($self, $links, $pos) = @_; + + my @lookup; + + foreach my $link (@$links) { + my $dist = $pos->{$link->[0]} - $pos->{$link->[1]}; + $dist = -$dist if ($dist < 0); + push @lookup, [$dist, $link]; + #warn " $dist ".join(' ', @$link)."\n"; + } + + @$links = map { $_->[1] } sort { $b->[0] <=> $a->[0] } @lookup; +} + + +# +# make two nodes adjacent by moving the second node next to the first node +# all other node coordinates are adjusted accordingly +# +sub _move_nodes { + my ($self, $link, $pos, $direction) = @_; + + my $first_pos = $pos->{$link->[0]}; + my $second_pos = $pos->{$link->[1]}; + + # swap positions if necessary + if ($first_pos > $second_pos) { + my $tmp = $second_pos; + $second_pos = $first_pos; + $first_pos = $tmp; + } + #warn " $first_pos:$second_pos\n"; + + foreach my $p (keys %$pos) { + + my $val = $pos->{$p}; + + #warn " $p $val\n"; + if ($direction eq 'up') { + if ($val > $first_pos and $val < $second_pos) { + $val++; + } elsif ($val == $second_pos) { + $val = $first_pos + 1; + } + } else { + if ($val > $first_pos and $val < $second_pos) { + $val--; + } elsif ($val == $first_pos) { + $val = $second_pos - 1; + } + } + + #warn " $p $val\n"; + $pos->{$p} = $val; + #warn "\n"; + } +} + + +# +# calculate the total link (vertical distance) length based on this sort order +# +sub _total_link_length { + my ($self, $links, $pos) = @_; + + my $total_length; + + foreach my $link (@$links) { + my $length = $pos->{$link->[0]} - $pos->{$link->[1]}; + $length = -$length if ($length < 0); + $total_length += $length; + } + + return $total_length; +} + + +=head2 coords_by_ArchiveStableId + + Arg[1] : Bio::EnsEMBL::ArchiveStableId $archive_id + The ArchiveStableId to get tree grid coordinates for + Example : my ($x, $y) = + @{ $history->coords_by_ArchiveStableId($archive_id) }; + print $archive_id->stable_id, " coords: $x, $y\n"; + Description : Returns the coordinates of an ArchiveStableId in the history + tree grid. If the ArchiveStableId isn't found in this tree, an + empty list is returned. + + Coordinates are zero-based (i.e. the top leftmost element in + the grid has coordinates [0, 0], not [1, 1]). This is to + facilitate using them to create a matrix as a two-dimensional + array of arrays. + Return type : Arrayref (x coordinate, y coordinate) + Exceptions : thrown on wrong argument type + Caller : general + Status : At Risk + : under development + +=cut + +sub coords_by_ArchiveStableId { + my ($self, $archive_id) = @_; + + throw("Bio::EnsEMBL::ArchiveStableId object expected.") + unless ($archive_id and ref($archive_id) and + $archive_id->isa('Bio::EnsEMBL::ArchiveStableId')); + + return $self->{'sorted_tree'}->{'coords'}->{$self->_node_id($archive_id)} + || []; +} + + +=head2 calculate_coords + + Example : $history->calculate_coords; + Description : Pre-calculates the grid coordinates of all nodes in the tree. + Return type : none + Exceptions : none + Caller : ArchiveStableIdAdaptor::fetch_history_by_stable_id + Status : At Risk + : under development + +=cut + +sub calculate_coords { + my $self = shift; + + # reset any previous tree cordinate calculations + $self->reset_tree; + + # the "master" information for the sorted tree is stored as the sorted lists + # of releases (x) and stable IDs (y). Sort them now. + my $db_names = $self->get_release_db_names; + + # untangle tree by sorting stable IDs appropriately + $self->optimise_tree; + my $stable_ids = $self->get_unique_stable_ids; + + # for performance reasons, additionally store coordinates in a lookup hash + foreach my $archive_id (@{ $self->get_all_ArchiveStableIds }) { + + # coordinates are positions in the sorted lists + my $x = $self->_index_of($archive_id->db_name, $db_names); + my $y = $self->_index_of($archive_id->stable_id, $stable_ids); + + $self->{'sorted_tree'}->{'coords'}->{$self->_node_id($archive_id)} = + [ $x, $y ]; + } +} + +# +# Description : Returns the index of an element in an array +# Example : my @array = (a, b, c); +# my $i = _index_of('b', \@array); # will return 1 +# Return type : Int (or undef if element is not found in array) +# +sub _index_of { + my ($self, $element, $arrayref) = @_; + + throw("Expecting arrayref argument.") unless (ref($arrayref) eq 'ARRAY'); + + my @array = @$arrayref; + + while (my $e = pop(@array)) { + return scalar(@array) if ($e eq $element); + } + + return undef; +} + + +=head2 consolidate_tree + + Example : $history->consolidate_tree; + Description : Consolidate the history tree. This means removing nodes where + there wasn't a change and bridging gaps in the history. The end + result will be a sparse tree which only contains the necessary + information. + Return type : none + Exceptions : none + Caller : ArchiveStableIdAdaptor->fetch_history_tree_by_stable_id + Status : At Risk + : under development + +=cut + +sub consolidate_tree { + my $self = shift; + + # + # get all self-events and creations/deletions and sort them (by stable ID and + # chronologically) + # + my @event_lookup; + + foreach my $event (@{ $self->get_all_StableIdEvents }) { + + my $old_id = $event->old_ArchiveStableId; + my $new_id = $event->new_ArchiveStableId; + + if (!$old_id or !$new_id or ($old_id->stable_id eq $new_id->stable_id)) { + if ($old_id) { + push @event_lookup, [$old_id->stable_id, $old_id->release, + $old_id->db_name, $event]; + } else { + push @event_lookup, [$new_id->stable_id, $new_id->release - 1, + $new_id->db_name, $event]; + } + } + } + + my @self_events = map { $_->[3] } + sort { $a->[0] cmp $b->[0] || $a->[1] <=> $b->[1] || $a->[2] cmp $b->[2] } + @event_lookup; + + # + # consolidate tree + # + my $last = shift(@self_events); + + while (my $event = shift(@self_events)) { + + my $lo = $last->old_ArchiveStableId; + my $ln = $last->new_ArchiveStableId; + my $eo = $event->old_ArchiveStableId; + my $en = $event->new_ArchiveStableId; + + if ($lo and $eo and $en and $lo->stable_id eq $eo->stable_id + and $lo->version eq $eo->version) { + + # this removes redundant nodes and connects the remaining nodes: + # + # o--o--o -> o-----o + # 1 1 1 1 1 + + #warn 'A: '.$last->ident_string.' | '.$event->ident_string."\n"; + + $self->remove_StableIdEvent($last); + $self->remove_StableIdEvent($event); + + $event->old_ArchiveStableId($lo); + + $self->add_StableIdEvents($event); + + } elsif ($ln and $eo and $ln->db_name ne $eo->db_name + and $ln->stable_id eq $eo->stable_id and $ln->version eq $eo->version) { + + # try to brigde gaps + + if ($en) { + + # o--o o--o -> o--o-----o + # 1 2 2 2 1 2 2 + # + # o o--o -> o-----o + # 1 1 1 1 1 + + #warn 'X: '.$last->ident_string.' | '.$event->ident_string."\n"; + + $self->remove_StableIdEvent($event); + $event->old_ArchiveStableId($ln); + $self->add_StableIdEvents($event); + + } elsif ($lo) { + + # there's a deletion event, deal with it differently + + if ($lo->version eq $ln->version) { + + # o--o o -> o-----o + # 1 1 1 1 1 + + #warn 'Y: '.$last->ident_string.' | '.$event->ident_string."\n"; + + $self->remove_StableIdEvent($last); + $last->new_ArchiveStableId($eo); + $self->add_StableIdEvents($last); + + } else { + + # o--o o -> o--o--o + # 1 2 2 1 2 2 + + #warn 'Z: '.$last->ident_string.' | '.$event->ident_string."\n"; + + $self->remove_StableIdEvent($event); + $event->old_ArchiveStableId($ln); + $event->new_ArchiveStableId($eo); + $self->add_StableIdEvents($event); + + } + + } else { + + # creation followed by deletion in next mapping + # + # o o -> o--o + # 1 1 1 1 + + #warn 'Q: '.$last->ident_string.' | '.$event->ident_string."\n"; + + $self->remove_StableIdEvent($last); + $self->remove_StableIdEvent($event); + $event->old_ArchiveStableId($ln); + $event->new_ArchiveStableId($eo); + $self->add_StableIdEvents($event); + + } + + } else { + #warn 'C: '.$last->ident_string.' | '.$event->ident_string."\n"; + } + + $last = $event; + } + + # now add ArchiveStableIds of the remaining events to the tree + $self->add_ArchiveStableIds_for_events; +} + + +=head2 reset_tree + + Example : $history->reset_tree; + Description : Resets all pre-calculated tree grid data. Mostly used internally + by methods that modify the tree. + Return type : none + Exceptions : none + Caller : internal + Status : At Risk + : under development + +=cut + +sub reset_tree { + my $self = shift; + $self->{'sorted_tree'} = undef; +} + + +=head2 current_dbname + + Arg[1] : (optional) String $dbname - the dbname to set + Example : my $dbname = $history->current_dbname; + Description : Getter/setter for current dbname. + Return type : String + Exceptions : none + Caller : general + Status : At Risk + : under development + +=cut + +sub current_dbname { + my $self = shift; + $self->{'current_dbname'} = shift if (@_); + return $self->{'current_dbname'}; +} + + +=head2 current_release + + Arg[1] : (optional) Int $release - the release to set + Example : my $release = $history->current_release; + Description : Getter/setter for current release. + Return type : Int + Exceptions : none + Caller : general + Status : At Risk + : under development + +=cut + +sub current_release { + my $self = shift; + $self->{'current_release'} = shift if (@_); + return $self->{'current_release'}; +} + + +=head2 current_assembly + + Arg[1] : (optional) String $assembly - the assembly to set + Example : my $assembly = $history->current_assembly; + Description : Getter/setter for current assembly. + Return type : String + Exceptions : none + Caller : general + Status : At Risk + : under development + +=cut + +sub current_assembly { + my $self = shift; + $self->{'current_assembly'} = shift if (@_); + return $self->{'current_assembly'}; +} + + +=head2 is_incomplete + + Arg[1] : (optional) Boolean $incomplete + Example : if ($history->is_incomplete) { + print "Returned tree is incomplete due to too many mappings + in the database.\n"; + } + Description : Getter/setter for incomplete flag. This is used by + ArchiveStableIdAdaptor to indicate that it finished building + the tree prematurely due to too many mappins in the db and can + be used by applications to print warning messages. + Return type : Boolean + Exceptions : none + Caller : general + Status : At Risk + : under development + +=cut + +sub is_incomplete { + my $self = shift; + $self->{'incomplete'} = shift if (@_); + return $self->{'incomplete'}; +} + + +1; +