comparison variant_effect_predictor/Bio/EnsEMBL/StableIdHistoryTree.pm @ 0:21066c0abaf5 draft

Uploaded
author willmclaren
date Fri, 03 Aug 2012 10:04:48 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:21066c0abaf5
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =cut
20
21 =head1 NAME
22
23 Bio::EnsEMBL::StableIdHistoryTree - object representing a stable ID history tree
24
25 =head1 SYNOPSIS
26
27 my $reg = "Bio::EnsEMBL::Registry";
28 my $archiveStableIdAdaptor =
29 $reg->get_adaptor( 'human', 'core', 'ArchiveStableId' );
30
31 my $stable_id = 'ENSG00000068990';
32 my $history =
33 $archiveStableIdAdaptor->fetch_history_tree_by_stable_id('ENSG01');
34
35 print "Unique stable IDs in this tree:\n";
36 print join( ", ", @{ $history->get_unique_stable_ids } ), "\n";
37
38 print "\nReleases in this tree:\n";
39 print join( ", ", @{ $history->get_release_display_names } ), "\n";
40
41 print "\nCoordinates of nodes in the tree:\n\n";
42 foreach my $a ( @{ $history->get_all_ArchiveStableIds } ) {
43 print " Stable ID: " . $a->stable_id . "." . $a->version . "\n";
44 print " Release: "
45 . $a->release . " ("
46 . $a->assembly . ", "
47 . $a->db_name . ")\n";
48 print " coords: "
49 . join( ', ', @{ $history->coords_by_ArchiveStableId($a) } )
50 . "\n\n";
51 }
52
53 =head1 DESCRIPTION
54
55 This object represents a stable ID history tree graph.
56
57 The graph is implemented as a collection of nodes (ArchiveStableId
58 objects) and links (StableIdEvent objects) which have positions
59 on an (x,y) grid. The x axis is used for releases, the y axis for
60 stable_ids. The idea is to create a plot similar to this (the numbers
61 shown on the nodes are the stable ID versions):
62
63 ENSG001 1-------------- 2--
64 \
65 ENSG003 1-----1
66 /
67 ENSG002 1-------2----------
68
69 38 39 40 41 42
70
71 The grid coordinates of the ArchiveStableId objects in this example
72 would be (note that coordinates are zero-based):
73
74 ENSG001.1 (0, 0)
75 ENSG001.2 (2, 0)
76 ENSG003.1 (release 41) (3, 1)
77 ENSG003.1 (release 42) (4, 1)
78 ENSG002.1 (0, 2)
79 ENSG002.2 (1, 2)
80
81 The tree will only contain those nodes which had a change in the stable
82 ID version. Therefore, in the above example, in release 39 ENSG001 was
83 present and had version 1 (but will not be drawn there, to unclutter the
84 output).
85
86 The grid positions will be calculated by the API and will try to
87 untangle the tree (i.e. try to avoid overlapping lines).
88
89 =head1 METHODS
90
91 new
92 add_ArchiveStableIds
93 add_ArchiveStableIds_for_events
94 remove_ArchiveStableId
95 flush_ArchiveStableIds
96 add_StableIdEvents
97 remove_StableIdEvent
98 flush_StableIdEvents
99 get_all_ArchiveStableIds
100 get_all_StableIdEvents
101 get_latest_StableIdEvent
102 get_release_display_names
103 get_release_db_names
104 get_unique_stable_ids
105 optimise_tree
106 coords_by_ArchiveStableId
107 calculate_coords
108 consolidate_tree
109 reset_tree
110 current_dbname
111 current_release
112 current_assembly
113 is_incomplete
114
115 =head1 RELATED MODULES
116
117 Bio::EnsEMBL::ArchiveStableId
118 Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor
119 Bio::EnsEMBL::StableIdEvent
120
121 =cut
122
123 package Bio::EnsEMBL::StableIdHistoryTree;
124
125 use strict;
126 use warnings;
127 no warnings 'uninitialized';
128
129 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
130 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
131
132
133 =head2 new
134
135 Arg [CURRENT_DBNAME] : (optional) name of current db
136 Arg [CURRENT_RELEASE] : (optional) current release number
137 Arg [CURRENT_ASSEMBLY] : (optional) current assembly name
138 Example : my $history = Bio::EnsEMBL::StableIdHistoryTree->new;
139 Description : object constructor
140 Return type : Bio::EnsEMBL::StableIdHistoryTree
141 Exceptions : none
142 Caller : general
143 Status : At Risk
144 : under development
145
146 =cut
147
148 sub new {
149 my $caller = shift;
150 my $class = ref($caller) || $caller;
151
152 my $self = {};
153 bless $self, $class;
154
155 my ($current_dbname, $current_release, $current_assembly) =
156 rearrange([qw( CURRENT_DBNAME CURRENT_RELEASE CURRENT_ASSEMBLY )], @_ );
157
158 # initialise
159 $self->{'current_dbname'} = $current_dbname;
160 $self->{'current_release'} = $current_release;
161 $self->{'current_assembly'} = $current_assembly;
162
163 return $self;
164 }
165
166
167 =head2 add_ArchiveStableIds
168
169 Arg[1..n] : Bio::EnsEMBL::ArchiveStableId's @archive_ids
170 The ArchiveStableIds to add to the the history tree
171 Example : my $archive_id = $archiveStableIdAdaptor->fetch_by_stable_id(
172 'ENSG00024808');
173 $history->add_ArchiveStableId($archive_id);
174 Description : Adds ArchiveStableIds (nodes) to the history tree. No
175 calculation of grid coordinates is done at this point, you need
176 to initiate this manually with calculate_coords().
177 ArchiveStableIds are only added once for each release (to avoid
178 duplicates).
179 Return type : none
180 Exceptions : thrown on invalid or missing argument
181 Caller : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::fetch_history_by_stable_id, general
182 Status : At Risk
183 : under development
184
185 =cut
186
187 sub add_ArchiveStableIds {
188 my ($self, @archive_ids) = @_;
189
190 throw("You must provide one or more Bio::EnsEMBL::ArchiveStableIds to add.")
191 unless (@archive_ids);
192
193 foreach my $archive_id (@archive_ids) {
194 throw("Bio::EnsEMBL::ArchiveStableId object expected.")
195 unless (ref($archive_id) &&
196 $archive_id->isa('Bio::EnsEMBL::ArchiveStableId'));
197
198 $self->{'nodes'}->{$self->_node_id($archive_id)} = $archive_id;
199 }
200 }
201
202
203 =head2 add_ArchiveStableIds_for_events
204
205 Example : my $history = Bio::EnsEMBL::StableIdHistoryTree->new;
206 $history->add_StableIdEvents($event1, $event2);
207 $history->add_ArchiveStableIds_for_events;
208 Description : Convenience method that adds all ArchiveStableIds for all
209 StableIdEvents attached to this object to the tree.
210 Return type : none
211 Exceptions : none
212 Caller : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::fetch_history_by_stable_id, general
213 Status : At Risk
214 : under development
215
216 =cut
217
218 sub add_ArchiveStableIds_for_events {
219 my $self = shift;
220
221 foreach my $event (@{ $self->get_all_StableIdEvents }) {
222 if ($event->old_ArchiveStableId) {
223 $self->add_ArchiveStableIds($event->old_ArchiveStableId);
224 }
225 if ($event->new_ArchiveStableId) {
226 $self->add_ArchiveStableIds($event->new_ArchiveStableId);
227 }
228 }
229 }
230
231
232 =head2 remove_ArchiveStableId
233
234 Arg[1] : Bio::EnsEMBL::ArchiveStableId $archive_id
235 the ArchiveStableId to remove from the tree
236 Example : $history->remove_ArchiveStableId($archive_id);
237 Description : Removes an ArchiveStableId from the tree.
238 Return type : none
239 Exceptions : thrown on missing or invalid argument
240 Caller : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::fetch_history_by_stable_id, general
241 Status : At Risk
242 : under development
243
244 =cut
245
246 sub remove_ArchiveStableId {
247 my ($self, $archive_id) = @_;
248
249 throw("Bio::EnsEMBL::ArchiveStableId object expected.")
250 unless ($archive_id && ref($archive_id) &&
251 $archive_id->isa('Bio::EnsEMBL::ArchiveStableId'));
252
253 my %nodes = %{ $self->{'nodes'} };
254 delete $nodes{$self->_node_id($archive_id)};
255 $self->{'nodes'} = \%nodes;
256 }
257
258
259 =head2 flush_ArchiveStableIds
260
261 Example : $history->flush_ArchiveStableIds;
262 Description : Remove all ArchiveStableIds from the tree.
263 Return type : none
264 Exceptions : none
265 Caller : general
266 Status : At Risk
267 : under development
268
269 =cut
270
271 sub flush_ArchiveStableIds {
272 my $self = shift;
273 $self->{'nodes'} = undef;
274 }
275
276
277 #
278 # generate a unique node identifier
279 #
280 sub _node_id {
281 my ($self, $archive_id) = @_;
282 return $archive_id->stable_id . ':' . $archive_id->db_name;
283 }
284
285
286 =head2 add_StableIdEvents
287
288 Arg[1..n] : Bio::EnsEMBL::StableIdEvent's @events
289 The StableIdEvents to add to the the history tree
290 Example : $history->add_StableIdEvents($event);
291 Description : Adds StableIdEvents (links) to the history tree. Note that
292 ArchiveStableIds attached to the StableIdEvent aren't added to
293 the tree automatically, you'll need to call
294 add_ArchiveStableIds_for_events later.
295 Return type : none
296 Exceptions : thrown on invalid or missing argument
297 Caller : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::fetch_history_by_stable_id, general
298 Status : At Risk
299 : under development
300
301 =cut
302
303 sub add_StableIdEvents {
304 my ($self, @events) = @_;
305
306 throw("You must provide one or more Bio::EnsEMBL::StableIdsEvents to add.")
307 unless (@events);
308
309 foreach my $event (@events) {
310 throw("Bio::EnsEMBL::StableIdEvent object expected.")
311 unless ($event->isa('Bio::EnsEMBL::StableIdEvent'));
312
313 $self->{'links'}->{$self->_link_id($event)} = $event;
314 }
315 }
316
317
318 =head2 remove_StableIdEvent
319
320 Arg[1] : Bio::EnsEMBL::StableIdEvent $event
321 the StableIdEvent to remove from the tree
322 Example : $history->remove_StableIdEvent($event);
323 Description : Removes a StableIdEvent from the tree.
324 Return type : none
325 Exceptions : thrown on missing or invalid arguments
326 Caller : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::fetch_history_by_stable_id, general
327 Status : At Risk
328 : under development
329
330 =cut
331
332 sub remove_StableIdEvent {
333 my ($self, $event) = @_;
334
335 throw("Bio::EnsEMBL::StableIdEvent object expected.") unless
336 ($event && ref($event) && $event->isa('Bio::EnsEMBL::StableIdEvent'));
337
338 my %links = %{ $self->{'links'} };
339 delete $links{$self->_link_id($event)};
340 $self->{'links'} = \%links;
341 }
342
343
344 =head2 flush_StableIdEvents
345
346 Example : $history->flush_StableIdEvents;
347 Description : Removes all StableIdEvents from the tree.
348 Return type : none
349 Exceptions : none
350 Caller : general
351 Status : At Risk
352 : under development
353
354 =cut
355
356 sub flush_StableIdEvents {
357 my $self = shift;
358 $self->{'links'} = undef;
359 }
360
361
362 #
363 # generate a unique link identifier
364 #
365 sub _link_id {
366 my ($self, $event) = @_;
367
368 my ($old_id, $old_db_name, $new_id, $new_db_name);
369 if ($event->old_ArchiveStableId) {
370 $old_id = $event->old_ArchiveStableId->stable_id;
371 $old_db_name = $event->old_ArchiveStableId->db_name;
372 }
373 if ($event->new_ArchiveStableId) {
374 $new_id = $event->new_ArchiveStableId->stable_id;
375 $new_db_name = $event->new_ArchiveStableId->db_name;
376 }
377
378 return join(':', $old_id, $old_db_name, $new_id, $new_db_name);
379 }
380
381
382 =head2 get_all_ArchiveStableIds
383
384 Example : foreach my $arch_id (@{ $history->get_all_ArchiveStableIds }) {
385 print $arch_id->stable_id, '.', $arch_id->version, "\n";
386 }
387 Description : Gets all ArchiveStableIds (nodes) in this tree.
388 Return type : Arrayref of Bio::EnsEMBL::ArchiveStableId objects
389 Exceptions : none
390 Caller : general
391 Status : At Risk
392 : under development
393
394 =cut
395
396 sub get_all_ArchiveStableIds {
397 my $self = shift;
398 return [ values %{ $self->{'nodes'} } ];
399 }
400
401
402 =head2 get_all_current_ArchiveStableIds
403
404 Example : foreach my $arch_id (@{ $history->get_all_current_ArchiveStableIds }) {
405 print $arch_id->stable_id, '.', $arch_id->version, "\n";
406 }
407 Description : Convenience method to get all current ArchiveStableIds in this
408 tree.
409
410 Note that no lazy loading of "current" status is done at that
411 stage; as long as you retrieve your StableIdHistoryTree object
412 from ArchiveStableIdAdaptor, you'll get the right answer. In
413 other use cases, if you want to make sure you really get all
414 current stable IDs, loop over the result of
415 get_all_ArchiveStableIds() and call
416 ArchiveStableId->current_version() on all of them.
417 Return type : Arrayref of Bio::EnsEMBL::ArchiveStableId objects
418 Exceptions : none
419 Caller : general
420 Status : At Risk
421 : under development
422
423 =cut
424
425 sub get_all_current_ArchiveStableIds {
426 my $self = shift;
427
428 my @current = ();
429
430 foreach my $arch_id (@{ $self->get_all_ArchiveStableIds }) {
431 push @current, $arch_id if ($arch_id->is_current);
432 }
433
434 return \@current;
435 }
436
437
438 =head2 get_all_StableIdEvents
439
440 Example : foreach my $event (@{ $history->get_all_StableIdsEvents }) {
441 print "Old stable ID: ",
442 ($event->get_attribute('old', 'stable_id') or 'none'), "\n";
443 print "New stable ID: ",
444 ($event->get_attribute('new', 'stable_id') or 'none'), "\n";
445 print "Mapping score: ", $event->score, "\n";
446 }
447 Description : Gets all StableIdsEvents (links) in this tree.
448 Return type : Arrayref of Bio::EnsEMBL::StableIdEvent objects
449 Exceptions : none
450 Caller : general
451 Status : At Risk
452 : under development
453
454 =cut
455
456 sub get_all_StableIdEvents {
457 my $self = shift;
458 return [ values %{ $self->{'links'} } ];
459 }
460
461
462 =head2 get_latest_StableIdEvent
463
464 Arg[1] : Bio::EnsEMBL::ArchiveStableId $arch_id - the stable ID to get
465 the latest Event for
466 Example : my $arch_id = Bio::EnsEMBL::ArchiveStableId->new(
467 -stable_id => 'ENSG00001'
468 );
469 my $event = $history->get_latest_Event($arch_id);
470 Description : Returns the latest StableIdEvent found in the tree where a given
471 stable ID is the new stable ID. If more than one is found (e.g.
472 in a merge scenario in the latest mapping), preference is given
473 to self-events.
474 Return type : Bio::EnsEMBL::StableIdEvent
475 Exceptions : thrown on missing or wrong argument
476 Caller : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::add_all_current_to_history, general
477 Status : At Risk
478 : under development
479
480 =cut
481
482 sub get_latest_StableIdEvent {
483 my $self = shift;
484 my $arch_id = shift;
485
486 unless ($arch_id and $arch_id->isa('Bio::EnsEMBL::ArchiveStableId')) {
487 throw("Need a Bio::EnsEMBL::ArchiveStableId.");
488 }
489
490 my @all_events = @{ $self->get_all_StableIdEvents };
491 my @self_events = ();
492
493 while (my $event = shift(@all_events)) {
494 if ($event->new_ArchiveStableId and
495 $event->new_ArchiveStableId->stable_id eq $arch_id->stable_id) {
496 push @self_events, $event;
497 }
498 }
499
500 my @sorted = sort { $b->new_ArchiveStableId->release <=>
501 $a->new_ArchiveStableId->release } @self_events;
502
503 # give priority to self events
504 my $latest;
505 while ($latest = shift @sorted) {
506 last if (($latest->old_ArchiveStableId and
507 $latest->old_ArchiveStableId->stable_id eq $arch_id->stable_id)
508 or !$latest->old_ArchiveStableId);
509 }
510
511 return $latest;
512 }
513
514
515 =head2 get_release_display_names
516
517 Example : print "Unique release display_names in this tree:\n"
518 foreach my $name (@{ $history->get_release_display_names }) {
519 print " $name\n";
520 }
521 Description : Returns a chronologically sorted list of unique release
522 display_names in this tree.
523
524 This method can be used to determine the number of columns when
525 plotting the history tree.
526 Return type : Arrayref of strings.
527 Exceptions : none
528 Caller : general
529 Status : At Risk
530 : under development
531
532 =cut
533
534 sub get_release_display_names {
535 my $self = shift;
536
537 my @display_names = map { $_->[1] } @{ $self->_sort_releases };
538
539 return \@display_names;
540 }
541
542
543 =head2 get_release_db_names
544
545 Example : print "Unique release db_names in this tree:\n"
546 foreach my $name (@{ $history->get_release_db_names }) {
547 print " $name\n";
548 }
549 Description : Returns a chronologically sorted list of unique release
550 db_names in this tree.
551 Return type : Arrayref of strings.
552 Exceptions : none
553 Caller : general
554 Status : At Risk
555 : under development
556
557 =cut
558
559 sub get_release_db_names {
560 my $self = shift;
561
562 my @db_names = map { $_->[0] } @{ $self->_sort_releases };
563
564 return \@db_names;
565 }
566
567
568 #
569 # Create a chronologically sorted list of releases.
570 #
571 # Return type : Arrayref of arrayrefs (db_name, release)
572 #
573 sub _sort_releases {
574 my $self = shift;
575
576 unless ($self->{'sorted_tree'}->{'releases'}) {
577
578 my %unique = ();
579
580 foreach my $archive_id (@{ $self->get_all_ArchiveStableIds }) {
581 $unique{join(':', $archive_id->db_name, $archive_id->release)} = 1;
582 }
583
584 # sort releases by release number, then db_name; this should get them into
585 # chronological order
586 my @releases = sort { $a->[1] <=> $b->[1] || $a->[0] cmp $b->[0] }
587 map { [ split(/:/, $_) ] } keys(%unique);
588
589 $self->{'sorted_tree'}->{'releases'} = \@releases;
590
591 }
592
593 return $self->{'sorted_tree'}->{'releases'};
594 }
595
596
597 =head2 get_unique_stable_ids
598
599 Example : print "Unique stable IDs in this tree:\n"
600 foreach my $id (@{ $history->get_unique_stable_ids }) {
601 print " $id\n";
602 }
603 Description : Returns a list of unique stable IDs in this tree. Version is not
604 taken into account here. This method can be used to determine
605 the number of rows when plotting the history with each stable ID
606 occupying one line.
607
608 Sort algorithm will depend on what was chosen when the sorted
609 tree was generated. This ranges from a simple alphanumeric sort
610 to algorithms trying to untangle the history tree. If no
611 pre-sorted data is found, an alphanumerically sorted list will
612 be returned by default.
613 Return type : Arrayref of strings.
614 Exceptions : none
615 Caller : general
616 Status : At Risk
617 : under development
618
619 =cut
620
621 sub get_unique_stable_ids {
622 my $self = shift;
623
624 unless ($self->{'sorted_tree'}->{'stable_ids'}) {
625 $self->{'sorted_tree'}->{'stable_ids'} = $self->_sort_stable_ids;
626 }
627
628 return $self->{'sorted_tree'}->{'stable_ids'};
629 }
630
631
632 #
633 # Returns a list of stable IDs in this history tree, sorted alphabetically.
634 # This is the simplest sort function used and doesn't try to untangle the tree.
635 #
636 # Return type : Arrayref
637 #
638 sub _sort_stable_ids {
639 my $self = shift;
640 my %unique = map { $_->stable_id => 1 } @{ $self->get_all_ArchiveStableIds };
641 return [sort keys %unique];
642 }
643
644
645 =head2 optimise_tree
646
647 Example : $history->optimise_tree;
648 Description : This method sorts the history tree so that the number of
649 overlapping branches is minimised (thus "untangling" the tree).
650
651 It uses a clustering algorithm for this which iteratively moves
652 the nodes with the largest vertical distance next to each other
653 and looking for a mininum in total branch length. This might not
654 produce the overall optimum but usually converges on a local
655 optimum very quickly.
656 Return type : none
657 Exceptions : none
658 Caller : calculate_coords
659 Status : At Risk
660 : under development
661
662 =cut
663
664 sub optimise_tree {
665 my $self = shift;
666
667 # get all non-self events
668 my @links;
669 foreach my $event (@{ $self->get_all_StableIdEvents }) {
670 next unless ($event->old_ArchiveStableId and $event->new_ArchiveStableId);
671 my $old_id = $event->old_ArchiveStableId->stable_id;
672 my $new_id = $event->new_ArchiveStableId->stable_id;
673 push @links, [$old_id, $new_id] if ($old_id ne $new_id);
674 }
675
676 # get initial list of sorted unique stable IDs and put them into a position
677 # lookup hash
678 my $i = 0;
679 my %pos = map { $_ => $i++ } @{ $self->_sort_stable_ids };
680
681 my $opt_length;
682 my $successive_fails = 0;
683 my $k = 0;
684 my %seen;
685
686 # for debug purposes:
687 # find the number of permutations for the given number of stable IDs
688 my $fact = $self->_factorial(scalar(keys %pos));
689
690 OPT:
691 while ($successive_fails < 100) {
692
693 # sort links by vertical distance
694 #warn "sorting\n";
695 $self->_sort_links(\@links, \%pos);
696
697 # loop over sorted links
698 SORTED:
699 foreach my $link (@links) {
700
701 #warn " trying ".join('-', @$link)."\n";
702
703 $k++;
704
705 # remember last sort order
706 my %last = %pos;
707
708 #my $this_order = join(':', sort { $pos{$a} <=> $pos{$b} } keys %pos);
709 #warn " before $this_order\n";
710
711 # try both to move bottom node next to top node's current position and
712 # top node next to bottom node's position - one of the methods might give
713 # you better results
714 DIRECT:
715 foreach my $direction (qw(up down)) {
716
717 # move the nodes next to each other
718 $self->_move_nodes($link, \%pos, $direction);
719
720 # next if we've seen this sort order before
721 my $new_order = join(':', sort { $pos{$a} <=> $pos{$b} } keys %pos);
722 #warn " after ($direction) $new_order\n";
723 if ($seen{$new_order}) {
724 #warn " seen\n";
725 %pos = %last;
726 next DIRECT;
727 }
728 $seen{$new_order} = 1;
729
730 # calculate total link length for this sort order
731 my $total_length = $self->_total_link_length(\@links, \%pos);
732
733 if (!$opt_length or $total_length < $opt_length) {
734 #warn " better ($total_length/$opt_length)\n";
735 $opt_length = $total_length;
736 $successive_fails = 0;
737 next OPT;
738 } else {
739 #warn " worse ($total_length/$opt_length)\n";
740 %pos = %last;
741 $successive_fails++;
742 }
743 }
744
745 }
746
747 last OPT;
748
749 }
750
751 #warn "Needed $k tries (of $fact) to find optimal tree.\n";
752
753 my @best = sort { $pos{$a} <=> $pos{$b} } keys %pos;
754 $self->{'sorted_tree'}->{'stable_ids'} = \@best;
755 }
756
757
758 #
759 # find the number of permutations for a give array size.
760 # used for debugging code (compare implemented algorithm to looping over all
761 # possible permutations).
762 #
763 sub _factorial {
764 my ($self, $n) = @_;
765 my $s = 1;
766 $s *= $n-- while $n > 0;
767 return $s;
768 }
769
770
771 #
772 # sort links by vertical distance
773 #
774 sub _sort_links {
775 my ($self, $links, $pos) = @_;
776
777 my @lookup;
778
779 foreach my $link (@$links) {
780 my $dist = $pos->{$link->[0]} - $pos->{$link->[1]};
781 $dist = -$dist if ($dist < 0);
782 push @lookup, [$dist, $link];
783 #warn " $dist ".join(' ', @$link)."\n";
784 }
785
786 @$links = map { $_->[1] } sort { $b->[0] <=> $a->[0] } @lookup;
787 }
788
789
790 #
791 # make two nodes adjacent by moving the second node next to the first node
792 # all other node coordinates are adjusted accordingly
793 #
794 sub _move_nodes {
795 my ($self, $link, $pos, $direction) = @_;
796
797 my $first_pos = $pos->{$link->[0]};
798 my $second_pos = $pos->{$link->[1]};
799
800 # swap positions if necessary
801 if ($first_pos > $second_pos) {
802 my $tmp = $second_pos;
803 $second_pos = $first_pos;
804 $first_pos = $tmp;
805 }
806 #warn " $first_pos:$second_pos\n";
807
808 foreach my $p (keys %$pos) {
809
810 my $val = $pos->{$p};
811
812 #warn " $p $val\n";
813 if ($direction eq 'up') {
814 if ($val > $first_pos and $val < $second_pos) {
815 $val++;
816 } elsif ($val == $second_pos) {
817 $val = $first_pos + 1;
818 }
819 } else {
820 if ($val > $first_pos and $val < $second_pos) {
821 $val--;
822 } elsif ($val == $first_pos) {
823 $val = $second_pos - 1;
824 }
825 }
826
827 #warn " $p $val\n";
828 $pos->{$p} = $val;
829 #warn "\n";
830 }
831 }
832
833
834 #
835 # calculate the total link (vertical distance) length based on this sort order
836 #
837 sub _total_link_length {
838 my ($self, $links, $pos) = @_;
839
840 my $total_length;
841
842 foreach my $link (@$links) {
843 my $length = $pos->{$link->[0]} - $pos->{$link->[1]};
844 $length = -$length if ($length < 0);
845 $total_length += $length;
846 }
847
848 return $total_length;
849 }
850
851
852 =head2 coords_by_ArchiveStableId
853
854 Arg[1] : Bio::EnsEMBL::ArchiveStableId $archive_id
855 The ArchiveStableId to get tree grid coordinates for
856 Example : my ($x, $y) =
857 @{ $history->coords_by_ArchiveStableId($archive_id) };
858 print $archive_id->stable_id, " coords: $x, $y\n";
859 Description : Returns the coordinates of an ArchiveStableId in the history
860 tree grid. If the ArchiveStableId isn't found in this tree, an
861 empty list is returned.
862
863 Coordinates are zero-based (i.e. the top leftmost element in
864 the grid has coordinates [0, 0], not [1, 1]). This is to
865 facilitate using them to create a matrix as a two-dimensional
866 array of arrays.
867 Return type : Arrayref (x coordinate, y coordinate)
868 Exceptions : thrown on wrong argument type
869 Caller : general
870 Status : At Risk
871 : under development
872
873 =cut
874
875 sub coords_by_ArchiveStableId {
876 my ($self, $archive_id) = @_;
877
878 throw("Bio::EnsEMBL::ArchiveStableId object expected.")
879 unless ($archive_id and ref($archive_id) and
880 $archive_id->isa('Bio::EnsEMBL::ArchiveStableId'));
881
882 return $self->{'sorted_tree'}->{'coords'}->{$self->_node_id($archive_id)}
883 || [];
884 }
885
886
887 =head2 calculate_coords
888
889 Example : $history->calculate_coords;
890 Description : Pre-calculates the grid coordinates of all nodes in the tree.
891 Return type : none
892 Exceptions : none
893 Caller : ArchiveStableIdAdaptor::fetch_history_by_stable_id
894 Status : At Risk
895 : under development
896
897 =cut
898
899 sub calculate_coords {
900 my $self = shift;
901
902 # reset any previous tree cordinate calculations
903 $self->reset_tree;
904
905 # the "master" information for the sorted tree is stored as the sorted lists
906 # of releases (x) and stable IDs (y). Sort them now.
907 my $db_names = $self->get_release_db_names;
908
909 # untangle tree by sorting stable IDs appropriately
910 $self->optimise_tree;
911 my $stable_ids = $self->get_unique_stable_ids;
912
913 # for performance reasons, additionally store coordinates in a lookup hash
914 foreach my $archive_id (@{ $self->get_all_ArchiveStableIds }) {
915
916 # coordinates are positions in the sorted lists
917 my $x = $self->_index_of($archive_id->db_name, $db_names);
918 my $y = $self->_index_of($archive_id->stable_id, $stable_ids);
919
920 $self->{'sorted_tree'}->{'coords'}->{$self->_node_id($archive_id)} =
921 [ $x, $y ];
922 }
923 }
924
925 #
926 # Description : Returns the index of an element in an array
927 # Example : my @array = (a, b, c);
928 # my $i = _index_of('b', \@array); # will return 1
929 # Return type : Int (or undef if element is not found in array)
930 #
931 sub _index_of {
932 my ($self, $element, $arrayref) = @_;
933
934 throw("Expecting arrayref argument.") unless (ref($arrayref) eq 'ARRAY');
935
936 my @array = @$arrayref;
937
938 while (my $e = pop(@array)) {
939 return scalar(@array) if ($e eq $element);
940 }
941
942 return undef;
943 }
944
945
946 =head2 consolidate_tree
947
948 Example : $history->consolidate_tree;
949 Description : Consolidate the history tree. This means removing nodes where
950 there wasn't a change and bridging gaps in the history. The end
951 result will be a sparse tree which only contains the necessary
952 information.
953 Return type : none
954 Exceptions : none
955 Caller : ArchiveStableIdAdaptor->fetch_history_tree_by_stable_id
956 Status : At Risk
957 : under development
958
959 =cut
960
961 sub consolidate_tree {
962 my $self = shift;
963
964 #
965 # get all self-events and creations/deletions and sort them (by stable ID and
966 # chronologically)
967 #
968 my @event_lookup;
969
970 foreach my $event (@{ $self->get_all_StableIdEvents }) {
971
972 my $old_id = $event->old_ArchiveStableId;
973 my $new_id = $event->new_ArchiveStableId;
974
975 if (!$old_id or !$new_id or ($old_id->stable_id eq $new_id->stable_id)) {
976 if ($old_id) {
977 push @event_lookup, [$old_id->stable_id, $old_id->release,
978 $old_id->db_name, $event];
979 } else {
980 push @event_lookup, [$new_id->stable_id, $new_id->release - 1,
981 $new_id->db_name, $event];
982 }
983 }
984 }
985
986 my @self_events = map { $_->[3] }
987 sort { $a->[0] cmp $b->[0] || $a->[1] <=> $b->[1] || $a->[2] cmp $b->[2] }
988 @event_lookup;
989
990 #
991 # consolidate tree
992 #
993 my $last = shift(@self_events);
994
995 while (my $event = shift(@self_events)) {
996
997 my $lo = $last->old_ArchiveStableId;
998 my $ln = $last->new_ArchiveStableId;
999 my $eo = $event->old_ArchiveStableId;
1000 my $en = $event->new_ArchiveStableId;
1001
1002 if ($lo and $eo and $en and $lo->stable_id eq $eo->stable_id
1003 and $lo->version eq $eo->version) {
1004
1005 # this removes redundant nodes and connects the remaining nodes:
1006 #
1007 # o--o--o -> o-----o
1008 # 1 1 1 1 1
1009
1010 #warn 'A: '.$last->ident_string.' | '.$event->ident_string."\n";
1011
1012 $self->remove_StableIdEvent($last);
1013 $self->remove_StableIdEvent($event);
1014
1015 $event->old_ArchiveStableId($lo);
1016
1017 $self->add_StableIdEvents($event);
1018
1019 } elsif ($ln and $eo and $ln->db_name ne $eo->db_name
1020 and $ln->stable_id eq $eo->stable_id and $ln->version eq $eo->version) {
1021
1022 # try to brigde gaps
1023
1024 if ($en) {
1025
1026 # o--o o--o -> o--o-----o
1027 # 1 2 2 2 1 2 2
1028 #
1029 # o o--o -> o-----o
1030 # 1 1 1 1 1
1031
1032 #warn 'X: '.$last->ident_string.' | '.$event->ident_string."\n";
1033
1034 $self->remove_StableIdEvent($event);
1035 $event->old_ArchiveStableId($ln);
1036 $self->add_StableIdEvents($event);
1037
1038 } elsif ($lo) {
1039
1040 # there's a deletion event, deal with it differently
1041
1042 if ($lo->version eq $ln->version) {
1043
1044 # o--o o -> o-----o
1045 # 1 1 1 1 1
1046
1047 #warn 'Y: '.$last->ident_string.' | '.$event->ident_string."\n";
1048
1049 $self->remove_StableIdEvent($last);
1050 $last->new_ArchiveStableId($eo);
1051 $self->add_StableIdEvents($last);
1052
1053 } else {
1054
1055 # o--o o -> o--o--o
1056 # 1 2 2 1 2 2
1057
1058 #warn 'Z: '.$last->ident_string.' | '.$event->ident_string."\n";
1059
1060 $self->remove_StableIdEvent($event);
1061 $event->old_ArchiveStableId($ln);
1062 $event->new_ArchiveStableId($eo);
1063 $self->add_StableIdEvents($event);
1064
1065 }
1066
1067 } else {
1068
1069 # creation followed by deletion in next mapping
1070 #
1071 # o o -> o--o
1072 # 1 1 1 1
1073
1074 #warn 'Q: '.$last->ident_string.' | '.$event->ident_string."\n";
1075
1076 $self->remove_StableIdEvent($last);
1077 $self->remove_StableIdEvent($event);
1078 $event->old_ArchiveStableId($ln);
1079 $event->new_ArchiveStableId($eo);
1080 $self->add_StableIdEvents($event);
1081
1082 }
1083
1084 } else {
1085 #warn 'C: '.$last->ident_string.' | '.$event->ident_string."\n";
1086 }
1087
1088 $last = $event;
1089 }
1090
1091 # now add ArchiveStableIds of the remaining events to the tree
1092 $self->add_ArchiveStableIds_for_events;
1093 }
1094
1095
1096 =head2 reset_tree
1097
1098 Example : $history->reset_tree;
1099 Description : Resets all pre-calculated tree grid data. Mostly used internally
1100 by methods that modify the tree.
1101 Return type : none
1102 Exceptions : none
1103 Caller : internal
1104 Status : At Risk
1105 : under development
1106
1107 =cut
1108
1109 sub reset_tree {
1110 my $self = shift;
1111 $self->{'sorted_tree'} = undef;
1112 }
1113
1114
1115 =head2 current_dbname
1116
1117 Arg[1] : (optional) String $dbname - the dbname to set
1118 Example : my $dbname = $history->current_dbname;
1119 Description : Getter/setter for current dbname.
1120 Return type : String
1121 Exceptions : none
1122 Caller : general
1123 Status : At Risk
1124 : under development
1125
1126 =cut
1127
1128 sub current_dbname {
1129 my $self = shift;
1130 $self->{'current_dbname'} = shift if (@_);
1131 return $self->{'current_dbname'};
1132 }
1133
1134
1135 =head2 current_release
1136
1137 Arg[1] : (optional) Int $release - the release to set
1138 Example : my $release = $history->current_release;
1139 Description : Getter/setter for current release.
1140 Return type : Int
1141 Exceptions : none
1142 Caller : general
1143 Status : At Risk
1144 : under development
1145
1146 =cut
1147
1148 sub current_release {
1149 my $self = shift;
1150 $self->{'current_release'} = shift if (@_);
1151 return $self->{'current_release'};
1152 }
1153
1154
1155 =head2 current_assembly
1156
1157 Arg[1] : (optional) String $assembly - the assembly to set
1158 Example : my $assembly = $history->current_assembly;
1159 Description : Getter/setter for current assembly.
1160 Return type : String
1161 Exceptions : none
1162 Caller : general
1163 Status : At Risk
1164 : under development
1165
1166 =cut
1167
1168 sub current_assembly {
1169 my $self = shift;
1170 $self->{'current_assembly'} = shift if (@_);
1171 return $self->{'current_assembly'};
1172 }
1173
1174
1175 =head2 is_incomplete
1176
1177 Arg[1] : (optional) Boolean $incomplete
1178 Example : if ($history->is_incomplete) {
1179 print "Returned tree is incomplete due to too many mappings
1180 in the database.\n";
1181 }
1182 Description : Getter/setter for incomplete flag. This is used by
1183 ArchiveStableIdAdaptor to indicate that it finished building
1184 the tree prematurely due to too many mappins in the db and can
1185 be used by applications to print warning messages.
1186 Return type : Boolean
1187 Exceptions : none
1188 Caller : general
1189 Status : At Risk
1190 : under development
1191
1192 =cut
1193
1194 sub is_incomplete {
1195 my $self = shift;
1196 $self->{'incomplete'} = shift if (@_);
1197 return $self->{'incomplete'};
1198 }
1199
1200
1201 1;
1202