Mercurial > repos > willmclaren > ensembl_vep
comparison variant_effect_predictor/Bio/EnsEMBL/StableIdHistoryTree.pm @ 0:21066c0abaf5 draft
Uploaded
author | willmclaren |
---|---|
date | Fri, 03 Aug 2012 10:04:48 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:21066c0abaf5 |
---|---|
1 =head1 LICENSE | |
2 | |
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
4 Genome Research Limited. All rights reserved. | |
5 | |
6 This software is distributed under a modified Apache license. | |
7 For license details, please see | |
8 | |
9 http://www.ensembl.org/info/about/code_licence.html | |
10 | |
11 =head1 CONTACT | |
12 | |
13 Please email comments or questions to the public Ensembl | |
14 developers list at <dev@ensembl.org>. | |
15 | |
16 Questions may also be sent to the Ensembl help desk at | |
17 <helpdesk@ensembl.org>. | |
18 | |
19 =cut | |
20 | |
21 =head1 NAME | |
22 | |
23 Bio::EnsEMBL::StableIdHistoryTree - object representing a stable ID history tree | |
24 | |
25 =head1 SYNOPSIS | |
26 | |
27 my $reg = "Bio::EnsEMBL::Registry"; | |
28 my $archiveStableIdAdaptor = | |
29 $reg->get_adaptor( 'human', 'core', 'ArchiveStableId' ); | |
30 | |
31 my $stable_id = 'ENSG00000068990'; | |
32 my $history = | |
33 $archiveStableIdAdaptor->fetch_history_tree_by_stable_id('ENSG01'); | |
34 | |
35 print "Unique stable IDs in this tree:\n"; | |
36 print join( ", ", @{ $history->get_unique_stable_ids } ), "\n"; | |
37 | |
38 print "\nReleases in this tree:\n"; | |
39 print join( ", ", @{ $history->get_release_display_names } ), "\n"; | |
40 | |
41 print "\nCoordinates of nodes in the tree:\n\n"; | |
42 foreach my $a ( @{ $history->get_all_ArchiveStableIds } ) { | |
43 print " Stable ID: " . $a->stable_id . "." . $a->version . "\n"; | |
44 print " Release: " | |
45 . $a->release . " (" | |
46 . $a->assembly . ", " | |
47 . $a->db_name . ")\n"; | |
48 print " coords: " | |
49 . join( ', ', @{ $history->coords_by_ArchiveStableId($a) } ) | |
50 . "\n\n"; | |
51 } | |
52 | |
53 =head1 DESCRIPTION | |
54 | |
55 This object represents a stable ID history tree graph. | |
56 | |
57 The graph is implemented as a collection of nodes (ArchiveStableId | |
58 objects) and links (StableIdEvent objects) which have positions | |
59 on an (x,y) grid. The x axis is used for releases, the y axis for | |
60 stable_ids. The idea is to create a plot similar to this (the numbers | |
61 shown on the nodes are the stable ID versions): | |
62 | |
63 ENSG001 1-------------- 2-- | |
64 \ | |
65 ENSG003 1-----1 | |
66 / | |
67 ENSG002 1-------2---------- | |
68 | |
69 38 39 40 41 42 | |
70 | |
71 The grid coordinates of the ArchiveStableId objects in this example | |
72 would be (note that coordinates are zero-based): | |
73 | |
74 ENSG001.1 (0, 0) | |
75 ENSG001.2 (2, 0) | |
76 ENSG003.1 (release 41) (3, 1) | |
77 ENSG003.1 (release 42) (4, 1) | |
78 ENSG002.1 (0, 2) | |
79 ENSG002.2 (1, 2) | |
80 | |
81 The tree will only contain those nodes which had a change in the stable | |
82 ID version. Therefore, in the above example, in release 39 ENSG001 was | |
83 present and had version 1 (but will not be drawn there, to unclutter the | |
84 output). | |
85 | |
86 The grid positions will be calculated by the API and will try to | |
87 untangle the tree (i.e. try to avoid overlapping lines). | |
88 | |
89 =head1 METHODS | |
90 | |
91 new | |
92 add_ArchiveStableIds | |
93 add_ArchiveStableIds_for_events | |
94 remove_ArchiveStableId | |
95 flush_ArchiveStableIds | |
96 add_StableIdEvents | |
97 remove_StableIdEvent | |
98 flush_StableIdEvents | |
99 get_all_ArchiveStableIds | |
100 get_all_StableIdEvents | |
101 get_latest_StableIdEvent | |
102 get_release_display_names | |
103 get_release_db_names | |
104 get_unique_stable_ids | |
105 optimise_tree | |
106 coords_by_ArchiveStableId | |
107 calculate_coords | |
108 consolidate_tree | |
109 reset_tree | |
110 current_dbname | |
111 current_release | |
112 current_assembly | |
113 is_incomplete | |
114 | |
115 =head1 RELATED MODULES | |
116 | |
117 Bio::EnsEMBL::ArchiveStableId | |
118 Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor | |
119 Bio::EnsEMBL::StableIdEvent | |
120 | |
121 =cut | |
122 | |
123 package Bio::EnsEMBL::StableIdHistoryTree; | |
124 | |
125 use strict; | |
126 use warnings; | |
127 no warnings 'uninitialized'; | |
128 | |
129 use Bio::EnsEMBL::Utils::Argument qw(rearrange); | |
130 use Bio::EnsEMBL::Utils::Exception qw(throw warning); | |
131 | |
132 | |
133 =head2 new | |
134 | |
135 Arg [CURRENT_DBNAME] : (optional) name of current db | |
136 Arg [CURRENT_RELEASE] : (optional) current release number | |
137 Arg [CURRENT_ASSEMBLY] : (optional) current assembly name | |
138 Example : my $history = Bio::EnsEMBL::StableIdHistoryTree->new; | |
139 Description : object constructor | |
140 Return type : Bio::EnsEMBL::StableIdHistoryTree | |
141 Exceptions : none | |
142 Caller : general | |
143 Status : At Risk | |
144 : under development | |
145 | |
146 =cut | |
147 | |
148 sub new { | |
149 my $caller = shift; | |
150 my $class = ref($caller) || $caller; | |
151 | |
152 my $self = {}; | |
153 bless $self, $class; | |
154 | |
155 my ($current_dbname, $current_release, $current_assembly) = | |
156 rearrange([qw( CURRENT_DBNAME CURRENT_RELEASE CURRENT_ASSEMBLY )], @_ ); | |
157 | |
158 # initialise | |
159 $self->{'current_dbname'} = $current_dbname; | |
160 $self->{'current_release'} = $current_release; | |
161 $self->{'current_assembly'} = $current_assembly; | |
162 | |
163 return $self; | |
164 } | |
165 | |
166 | |
167 =head2 add_ArchiveStableIds | |
168 | |
169 Arg[1..n] : Bio::EnsEMBL::ArchiveStableId's @archive_ids | |
170 The ArchiveStableIds to add to the the history tree | |
171 Example : my $archive_id = $archiveStableIdAdaptor->fetch_by_stable_id( | |
172 'ENSG00024808'); | |
173 $history->add_ArchiveStableId($archive_id); | |
174 Description : Adds ArchiveStableIds (nodes) to the history tree. No | |
175 calculation of grid coordinates is done at this point, you need | |
176 to initiate this manually with calculate_coords(). | |
177 ArchiveStableIds are only added once for each release (to avoid | |
178 duplicates). | |
179 Return type : none | |
180 Exceptions : thrown on invalid or missing argument | |
181 Caller : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::fetch_history_by_stable_id, general | |
182 Status : At Risk | |
183 : under development | |
184 | |
185 =cut | |
186 | |
187 sub add_ArchiveStableIds { | |
188 my ($self, @archive_ids) = @_; | |
189 | |
190 throw("You must provide one or more Bio::EnsEMBL::ArchiveStableIds to add.") | |
191 unless (@archive_ids); | |
192 | |
193 foreach my $archive_id (@archive_ids) { | |
194 throw("Bio::EnsEMBL::ArchiveStableId object expected.") | |
195 unless (ref($archive_id) && | |
196 $archive_id->isa('Bio::EnsEMBL::ArchiveStableId')); | |
197 | |
198 $self->{'nodes'}->{$self->_node_id($archive_id)} = $archive_id; | |
199 } | |
200 } | |
201 | |
202 | |
203 =head2 add_ArchiveStableIds_for_events | |
204 | |
205 Example : my $history = Bio::EnsEMBL::StableIdHistoryTree->new; | |
206 $history->add_StableIdEvents($event1, $event2); | |
207 $history->add_ArchiveStableIds_for_events; | |
208 Description : Convenience method that adds all ArchiveStableIds for all | |
209 StableIdEvents attached to this object to the tree. | |
210 Return type : none | |
211 Exceptions : none | |
212 Caller : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::fetch_history_by_stable_id, general | |
213 Status : At Risk | |
214 : under development | |
215 | |
216 =cut | |
217 | |
218 sub add_ArchiveStableIds_for_events { | |
219 my $self = shift; | |
220 | |
221 foreach my $event (@{ $self->get_all_StableIdEvents }) { | |
222 if ($event->old_ArchiveStableId) { | |
223 $self->add_ArchiveStableIds($event->old_ArchiveStableId); | |
224 } | |
225 if ($event->new_ArchiveStableId) { | |
226 $self->add_ArchiveStableIds($event->new_ArchiveStableId); | |
227 } | |
228 } | |
229 } | |
230 | |
231 | |
232 =head2 remove_ArchiveStableId | |
233 | |
234 Arg[1] : Bio::EnsEMBL::ArchiveStableId $archive_id | |
235 the ArchiveStableId to remove from the tree | |
236 Example : $history->remove_ArchiveStableId($archive_id); | |
237 Description : Removes an ArchiveStableId from the tree. | |
238 Return type : none | |
239 Exceptions : thrown on missing or invalid argument | |
240 Caller : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::fetch_history_by_stable_id, general | |
241 Status : At Risk | |
242 : under development | |
243 | |
244 =cut | |
245 | |
246 sub remove_ArchiveStableId { | |
247 my ($self, $archive_id) = @_; | |
248 | |
249 throw("Bio::EnsEMBL::ArchiveStableId object expected.") | |
250 unless ($archive_id && ref($archive_id) && | |
251 $archive_id->isa('Bio::EnsEMBL::ArchiveStableId')); | |
252 | |
253 my %nodes = %{ $self->{'nodes'} }; | |
254 delete $nodes{$self->_node_id($archive_id)}; | |
255 $self->{'nodes'} = \%nodes; | |
256 } | |
257 | |
258 | |
259 =head2 flush_ArchiveStableIds | |
260 | |
261 Example : $history->flush_ArchiveStableIds; | |
262 Description : Remove all ArchiveStableIds from the tree. | |
263 Return type : none | |
264 Exceptions : none | |
265 Caller : general | |
266 Status : At Risk | |
267 : under development | |
268 | |
269 =cut | |
270 | |
271 sub flush_ArchiveStableIds { | |
272 my $self = shift; | |
273 $self->{'nodes'} = undef; | |
274 } | |
275 | |
276 | |
277 # | |
278 # generate a unique node identifier | |
279 # | |
280 sub _node_id { | |
281 my ($self, $archive_id) = @_; | |
282 return $archive_id->stable_id . ':' . $archive_id->db_name; | |
283 } | |
284 | |
285 | |
286 =head2 add_StableIdEvents | |
287 | |
288 Arg[1..n] : Bio::EnsEMBL::StableIdEvent's @events | |
289 The StableIdEvents to add to the the history tree | |
290 Example : $history->add_StableIdEvents($event); | |
291 Description : Adds StableIdEvents (links) to the history tree. Note that | |
292 ArchiveStableIds attached to the StableIdEvent aren't added to | |
293 the tree automatically, you'll need to call | |
294 add_ArchiveStableIds_for_events later. | |
295 Return type : none | |
296 Exceptions : thrown on invalid or missing argument | |
297 Caller : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::fetch_history_by_stable_id, general | |
298 Status : At Risk | |
299 : under development | |
300 | |
301 =cut | |
302 | |
303 sub add_StableIdEvents { | |
304 my ($self, @events) = @_; | |
305 | |
306 throw("You must provide one or more Bio::EnsEMBL::StableIdsEvents to add.") | |
307 unless (@events); | |
308 | |
309 foreach my $event (@events) { | |
310 throw("Bio::EnsEMBL::StableIdEvent object expected.") | |
311 unless ($event->isa('Bio::EnsEMBL::StableIdEvent')); | |
312 | |
313 $self->{'links'}->{$self->_link_id($event)} = $event; | |
314 } | |
315 } | |
316 | |
317 | |
318 =head2 remove_StableIdEvent | |
319 | |
320 Arg[1] : Bio::EnsEMBL::StableIdEvent $event | |
321 the StableIdEvent to remove from the tree | |
322 Example : $history->remove_StableIdEvent($event); | |
323 Description : Removes a StableIdEvent from the tree. | |
324 Return type : none | |
325 Exceptions : thrown on missing or invalid arguments | |
326 Caller : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::fetch_history_by_stable_id, general | |
327 Status : At Risk | |
328 : under development | |
329 | |
330 =cut | |
331 | |
332 sub remove_StableIdEvent { | |
333 my ($self, $event) = @_; | |
334 | |
335 throw("Bio::EnsEMBL::StableIdEvent object expected.") unless | |
336 ($event && ref($event) && $event->isa('Bio::EnsEMBL::StableIdEvent')); | |
337 | |
338 my %links = %{ $self->{'links'} }; | |
339 delete $links{$self->_link_id($event)}; | |
340 $self->{'links'} = \%links; | |
341 } | |
342 | |
343 | |
344 =head2 flush_StableIdEvents | |
345 | |
346 Example : $history->flush_StableIdEvents; | |
347 Description : Removes all StableIdEvents from the tree. | |
348 Return type : none | |
349 Exceptions : none | |
350 Caller : general | |
351 Status : At Risk | |
352 : under development | |
353 | |
354 =cut | |
355 | |
356 sub flush_StableIdEvents { | |
357 my $self = shift; | |
358 $self->{'links'} = undef; | |
359 } | |
360 | |
361 | |
362 # | |
363 # generate a unique link identifier | |
364 # | |
365 sub _link_id { | |
366 my ($self, $event) = @_; | |
367 | |
368 my ($old_id, $old_db_name, $new_id, $new_db_name); | |
369 if ($event->old_ArchiveStableId) { | |
370 $old_id = $event->old_ArchiveStableId->stable_id; | |
371 $old_db_name = $event->old_ArchiveStableId->db_name; | |
372 } | |
373 if ($event->new_ArchiveStableId) { | |
374 $new_id = $event->new_ArchiveStableId->stable_id; | |
375 $new_db_name = $event->new_ArchiveStableId->db_name; | |
376 } | |
377 | |
378 return join(':', $old_id, $old_db_name, $new_id, $new_db_name); | |
379 } | |
380 | |
381 | |
382 =head2 get_all_ArchiveStableIds | |
383 | |
384 Example : foreach my $arch_id (@{ $history->get_all_ArchiveStableIds }) { | |
385 print $arch_id->stable_id, '.', $arch_id->version, "\n"; | |
386 } | |
387 Description : Gets all ArchiveStableIds (nodes) in this tree. | |
388 Return type : Arrayref of Bio::EnsEMBL::ArchiveStableId objects | |
389 Exceptions : none | |
390 Caller : general | |
391 Status : At Risk | |
392 : under development | |
393 | |
394 =cut | |
395 | |
396 sub get_all_ArchiveStableIds { | |
397 my $self = shift; | |
398 return [ values %{ $self->{'nodes'} } ]; | |
399 } | |
400 | |
401 | |
402 =head2 get_all_current_ArchiveStableIds | |
403 | |
404 Example : foreach my $arch_id (@{ $history->get_all_current_ArchiveStableIds }) { | |
405 print $arch_id->stable_id, '.', $arch_id->version, "\n"; | |
406 } | |
407 Description : Convenience method to get all current ArchiveStableIds in this | |
408 tree. | |
409 | |
410 Note that no lazy loading of "current" status is done at that | |
411 stage; as long as you retrieve your StableIdHistoryTree object | |
412 from ArchiveStableIdAdaptor, you'll get the right answer. In | |
413 other use cases, if you want to make sure you really get all | |
414 current stable IDs, loop over the result of | |
415 get_all_ArchiveStableIds() and call | |
416 ArchiveStableId->current_version() on all of them. | |
417 Return type : Arrayref of Bio::EnsEMBL::ArchiveStableId objects | |
418 Exceptions : none | |
419 Caller : general | |
420 Status : At Risk | |
421 : under development | |
422 | |
423 =cut | |
424 | |
425 sub get_all_current_ArchiveStableIds { | |
426 my $self = shift; | |
427 | |
428 my @current = (); | |
429 | |
430 foreach my $arch_id (@{ $self->get_all_ArchiveStableIds }) { | |
431 push @current, $arch_id if ($arch_id->is_current); | |
432 } | |
433 | |
434 return \@current; | |
435 } | |
436 | |
437 | |
438 =head2 get_all_StableIdEvents | |
439 | |
440 Example : foreach my $event (@{ $history->get_all_StableIdsEvents }) { | |
441 print "Old stable ID: ", | |
442 ($event->get_attribute('old', 'stable_id') or 'none'), "\n"; | |
443 print "New stable ID: ", | |
444 ($event->get_attribute('new', 'stable_id') or 'none'), "\n"; | |
445 print "Mapping score: ", $event->score, "\n"; | |
446 } | |
447 Description : Gets all StableIdsEvents (links) in this tree. | |
448 Return type : Arrayref of Bio::EnsEMBL::StableIdEvent objects | |
449 Exceptions : none | |
450 Caller : general | |
451 Status : At Risk | |
452 : under development | |
453 | |
454 =cut | |
455 | |
456 sub get_all_StableIdEvents { | |
457 my $self = shift; | |
458 return [ values %{ $self->{'links'} } ]; | |
459 } | |
460 | |
461 | |
462 =head2 get_latest_StableIdEvent | |
463 | |
464 Arg[1] : Bio::EnsEMBL::ArchiveStableId $arch_id - the stable ID to get | |
465 the latest Event for | |
466 Example : my $arch_id = Bio::EnsEMBL::ArchiveStableId->new( | |
467 -stable_id => 'ENSG00001' | |
468 ); | |
469 my $event = $history->get_latest_Event($arch_id); | |
470 Description : Returns the latest StableIdEvent found in the tree where a given | |
471 stable ID is the new stable ID. If more than one is found (e.g. | |
472 in a merge scenario in the latest mapping), preference is given | |
473 to self-events. | |
474 Return type : Bio::EnsEMBL::StableIdEvent | |
475 Exceptions : thrown on missing or wrong argument | |
476 Caller : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::add_all_current_to_history, general | |
477 Status : At Risk | |
478 : under development | |
479 | |
480 =cut | |
481 | |
482 sub get_latest_StableIdEvent { | |
483 my $self = shift; | |
484 my $arch_id = shift; | |
485 | |
486 unless ($arch_id and $arch_id->isa('Bio::EnsEMBL::ArchiveStableId')) { | |
487 throw("Need a Bio::EnsEMBL::ArchiveStableId."); | |
488 } | |
489 | |
490 my @all_events = @{ $self->get_all_StableIdEvents }; | |
491 my @self_events = (); | |
492 | |
493 while (my $event = shift(@all_events)) { | |
494 if ($event->new_ArchiveStableId and | |
495 $event->new_ArchiveStableId->stable_id eq $arch_id->stable_id) { | |
496 push @self_events, $event; | |
497 } | |
498 } | |
499 | |
500 my @sorted = sort { $b->new_ArchiveStableId->release <=> | |
501 $a->new_ArchiveStableId->release } @self_events; | |
502 | |
503 # give priority to self events | |
504 my $latest; | |
505 while ($latest = shift @sorted) { | |
506 last if (($latest->old_ArchiveStableId and | |
507 $latest->old_ArchiveStableId->stable_id eq $arch_id->stable_id) | |
508 or !$latest->old_ArchiveStableId); | |
509 } | |
510 | |
511 return $latest; | |
512 } | |
513 | |
514 | |
515 =head2 get_release_display_names | |
516 | |
517 Example : print "Unique release display_names in this tree:\n" | |
518 foreach my $name (@{ $history->get_release_display_names }) { | |
519 print " $name\n"; | |
520 } | |
521 Description : Returns a chronologically sorted list of unique release | |
522 display_names in this tree. | |
523 | |
524 This method can be used to determine the number of columns when | |
525 plotting the history tree. | |
526 Return type : Arrayref of strings. | |
527 Exceptions : none | |
528 Caller : general | |
529 Status : At Risk | |
530 : under development | |
531 | |
532 =cut | |
533 | |
534 sub get_release_display_names { | |
535 my $self = shift; | |
536 | |
537 my @display_names = map { $_->[1] } @{ $self->_sort_releases }; | |
538 | |
539 return \@display_names; | |
540 } | |
541 | |
542 | |
543 =head2 get_release_db_names | |
544 | |
545 Example : print "Unique release db_names in this tree:\n" | |
546 foreach my $name (@{ $history->get_release_db_names }) { | |
547 print " $name\n"; | |
548 } | |
549 Description : Returns a chronologically sorted list of unique release | |
550 db_names in this tree. | |
551 Return type : Arrayref of strings. | |
552 Exceptions : none | |
553 Caller : general | |
554 Status : At Risk | |
555 : under development | |
556 | |
557 =cut | |
558 | |
559 sub get_release_db_names { | |
560 my $self = shift; | |
561 | |
562 my @db_names = map { $_->[0] } @{ $self->_sort_releases }; | |
563 | |
564 return \@db_names; | |
565 } | |
566 | |
567 | |
568 # | |
569 # Create a chronologically sorted list of releases. | |
570 # | |
571 # Return type : Arrayref of arrayrefs (db_name, release) | |
572 # | |
573 sub _sort_releases { | |
574 my $self = shift; | |
575 | |
576 unless ($self->{'sorted_tree'}->{'releases'}) { | |
577 | |
578 my %unique = (); | |
579 | |
580 foreach my $archive_id (@{ $self->get_all_ArchiveStableIds }) { | |
581 $unique{join(':', $archive_id->db_name, $archive_id->release)} = 1; | |
582 } | |
583 | |
584 # sort releases by release number, then db_name; this should get them into | |
585 # chronological order | |
586 my @releases = sort { $a->[1] <=> $b->[1] || $a->[0] cmp $b->[0] } | |
587 map { [ split(/:/, $_) ] } keys(%unique); | |
588 | |
589 $self->{'sorted_tree'}->{'releases'} = \@releases; | |
590 | |
591 } | |
592 | |
593 return $self->{'sorted_tree'}->{'releases'}; | |
594 } | |
595 | |
596 | |
597 =head2 get_unique_stable_ids | |
598 | |
599 Example : print "Unique stable IDs in this tree:\n" | |
600 foreach my $id (@{ $history->get_unique_stable_ids }) { | |
601 print " $id\n"; | |
602 } | |
603 Description : Returns a list of unique stable IDs in this tree. Version is not | |
604 taken into account here. This method can be used to determine | |
605 the number of rows when plotting the history with each stable ID | |
606 occupying one line. | |
607 | |
608 Sort algorithm will depend on what was chosen when the sorted | |
609 tree was generated. This ranges from a simple alphanumeric sort | |
610 to algorithms trying to untangle the history tree. If no | |
611 pre-sorted data is found, an alphanumerically sorted list will | |
612 be returned by default. | |
613 Return type : Arrayref of strings. | |
614 Exceptions : none | |
615 Caller : general | |
616 Status : At Risk | |
617 : under development | |
618 | |
619 =cut | |
620 | |
621 sub get_unique_stable_ids { | |
622 my $self = shift; | |
623 | |
624 unless ($self->{'sorted_tree'}->{'stable_ids'}) { | |
625 $self->{'sorted_tree'}->{'stable_ids'} = $self->_sort_stable_ids; | |
626 } | |
627 | |
628 return $self->{'sorted_tree'}->{'stable_ids'}; | |
629 } | |
630 | |
631 | |
632 # | |
633 # Returns a list of stable IDs in this history tree, sorted alphabetically. | |
634 # This is the simplest sort function used and doesn't try to untangle the tree. | |
635 # | |
636 # Return type : Arrayref | |
637 # | |
638 sub _sort_stable_ids { | |
639 my $self = shift; | |
640 my %unique = map { $_->stable_id => 1 } @{ $self->get_all_ArchiveStableIds }; | |
641 return [sort keys %unique]; | |
642 } | |
643 | |
644 | |
645 =head2 optimise_tree | |
646 | |
647 Example : $history->optimise_tree; | |
648 Description : This method sorts the history tree so that the number of | |
649 overlapping branches is minimised (thus "untangling" the tree). | |
650 | |
651 It uses a clustering algorithm for this which iteratively moves | |
652 the nodes with the largest vertical distance next to each other | |
653 and looking for a mininum in total branch length. This might not | |
654 produce the overall optimum but usually converges on a local | |
655 optimum very quickly. | |
656 Return type : none | |
657 Exceptions : none | |
658 Caller : calculate_coords | |
659 Status : At Risk | |
660 : under development | |
661 | |
662 =cut | |
663 | |
664 sub optimise_tree { | |
665 my $self = shift; | |
666 | |
667 # get all non-self events | |
668 my @links; | |
669 foreach my $event (@{ $self->get_all_StableIdEvents }) { | |
670 next unless ($event->old_ArchiveStableId and $event->new_ArchiveStableId); | |
671 my $old_id = $event->old_ArchiveStableId->stable_id; | |
672 my $new_id = $event->new_ArchiveStableId->stable_id; | |
673 push @links, [$old_id, $new_id] if ($old_id ne $new_id); | |
674 } | |
675 | |
676 # get initial list of sorted unique stable IDs and put them into a position | |
677 # lookup hash | |
678 my $i = 0; | |
679 my %pos = map { $_ => $i++ } @{ $self->_sort_stable_ids }; | |
680 | |
681 my $opt_length; | |
682 my $successive_fails = 0; | |
683 my $k = 0; | |
684 my %seen; | |
685 | |
686 # for debug purposes: | |
687 # find the number of permutations for the given number of stable IDs | |
688 my $fact = $self->_factorial(scalar(keys %pos)); | |
689 | |
690 OPT: | |
691 while ($successive_fails < 100) { | |
692 | |
693 # sort links by vertical distance | |
694 #warn "sorting\n"; | |
695 $self->_sort_links(\@links, \%pos); | |
696 | |
697 # loop over sorted links | |
698 SORTED: | |
699 foreach my $link (@links) { | |
700 | |
701 #warn " trying ".join('-', @$link)."\n"; | |
702 | |
703 $k++; | |
704 | |
705 # remember last sort order | |
706 my %last = %pos; | |
707 | |
708 #my $this_order = join(':', sort { $pos{$a} <=> $pos{$b} } keys %pos); | |
709 #warn " before $this_order\n"; | |
710 | |
711 # try both to move bottom node next to top node's current position and | |
712 # top node next to bottom node's position - one of the methods might give | |
713 # you better results | |
714 DIRECT: | |
715 foreach my $direction (qw(up down)) { | |
716 | |
717 # move the nodes next to each other | |
718 $self->_move_nodes($link, \%pos, $direction); | |
719 | |
720 # next if we've seen this sort order before | |
721 my $new_order = join(':', sort { $pos{$a} <=> $pos{$b} } keys %pos); | |
722 #warn " after ($direction) $new_order\n"; | |
723 if ($seen{$new_order}) { | |
724 #warn " seen\n"; | |
725 %pos = %last; | |
726 next DIRECT; | |
727 } | |
728 $seen{$new_order} = 1; | |
729 | |
730 # calculate total link length for this sort order | |
731 my $total_length = $self->_total_link_length(\@links, \%pos); | |
732 | |
733 if (!$opt_length or $total_length < $opt_length) { | |
734 #warn " better ($total_length/$opt_length)\n"; | |
735 $opt_length = $total_length; | |
736 $successive_fails = 0; | |
737 next OPT; | |
738 } else { | |
739 #warn " worse ($total_length/$opt_length)\n"; | |
740 %pos = %last; | |
741 $successive_fails++; | |
742 } | |
743 } | |
744 | |
745 } | |
746 | |
747 last OPT; | |
748 | |
749 } | |
750 | |
751 #warn "Needed $k tries (of $fact) to find optimal tree.\n"; | |
752 | |
753 my @best = sort { $pos{$a} <=> $pos{$b} } keys %pos; | |
754 $self->{'sorted_tree'}->{'stable_ids'} = \@best; | |
755 } | |
756 | |
757 | |
758 # | |
759 # find the number of permutations for a give array size. | |
760 # used for debugging code (compare implemented algorithm to looping over all | |
761 # possible permutations). | |
762 # | |
763 sub _factorial { | |
764 my ($self, $n) = @_; | |
765 my $s = 1; | |
766 $s *= $n-- while $n > 0; | |
767 return $s; | |
768 } | |
769 | |
770 | |
771 # | |
772 # sort links by vertical distance | |
773 # | |
774 sub _sort_links { | |
775 my ($self, $links, $pos) = @_; | |
776 | |
777 my @lookup; | |
778 | |
779 foreach my $link (@$links) { | |
780 my $dist = $pos->{$link->[0]} - $pos->{$link->[1]}; | |
781 $dist = -$dist if ($dist < 0); | |
782 push @lookup, [$dist, $link]; | |
783 #warn " $dist ".join(' ', @$link)."\n"; | |
784 } | |
785 | |
786 @$links = map { $_->[1] } sort { $b->[0] <=> $a->[0] } @lookup; | |
787 } | |
788 | |
789 | |
790 # | |
791 # make two nodes adjacent by moving the second node next to the first node | |
792 # all other node coordinates are adjusted accordingly | |
793 # | |
794 sub _move_nodes { | |
795 my ($self, $link, $pos, $direction) = @_; | |
796 | |
797 my $first_pos = $pos->{$link->[0]}; | |
798 my $second_pos = $pos->{$link->[1]}; | |
799 | |
800 # swap positions if necessary | |
801 if ($first_pos > $second_pos) { | |
802 my $tmp = $second_pos; | |
803 $second_pos = $first_pos; | |
804 $first_pos = $tmp; | |
805 } | |
806 #warn " $first_pos:$second_pos\n"; | |
807 | |
808 foreach my $p (keys %$pos) { | |
809 | |
810 my $val = $pos->{$p}; | |
811 | |
812 #warn " $p $val\n"; | |
813 if ($direction eq 'up') { | |
814 if ($val > $first_pos and $val < $second_pos) { | |
815 $val++; | |
816 } elsif ($val == $second_pos) { | |
817 $val = $first_pos + 1; | |
818 } | |
819 } else { | |
820 if ($val > $first_pos and $val < $second_pos) { | |
821 $val--; | |
822 } elsif ($val == $first_pos) { | |
823 $val = $second_pos - 1; | |
824 } | |
825 } | |
826 | |
827 #warn " $p $val\n"; | |
828 $pos->{$p} = $val; | |
829 #warn "\n"; | |
830 } | |
831 } | |
832 | |
833 | |
834 # | |
835 # calculate the total link (vertical distance) length based on this sort order | |
836 # | |
837 sub _total_link_length { | |
838 my ($self, $links, $pos) = @_; | |
839 | |
840 my $total_length; | |
841 | |
842 foreach my $link (@$links) { | |
843 my $length = $pos->{$link->[0]} - $pos->{$link->[1]}; | |
844 $length = -$length if ($length < 0); | |
845 $total_length += $length; | |
846 } | |
847 | |
848 return $total_length; | |
849 } | |
850 | |
851 | |
852 =head2 coords_by_ArchiveStableId | |
853 | |
854 Arg[1] : Bio::EnsEMBL::ArchiveStableId $archive_id | |
855 The ArchiveStableId to get tree grid coordinates for | |
856 Example : my ($x, $y) = | |
857 @{ $history->coords_by_ArchiveStableId($archive_id) }; | |
858 print $archive_id->stable_id, " coords: $x, $y\n"; | |
859 Description : Returns the coordinates of an ArchiveStableId in the history | |
860 tree grid. If the ArchiveStableId isn't found in this tree, an | |
861 empty list is returned. | |
862 | |
863 Coordinates are zero-based (i.e. the top leftmost element in | |
864 the grid has coordinates [0, 0], not [1, 1]). This is to | |
865 facilitate using them to create a matrix as a two-dimensional | |
866 array of arrays. | |
867 Return type : Arrayref (x coordinate, y coordinate) | |
868 Exceptions : thrown on wrong argument type | |
869 Caller : general | |
870 Status : At Risk | |
871 : under development | |
872 | |
873 =cut | |
874 | |
875 sub coords_by_ArchiveStableId { | |
876 my ($self, $archive_id) = @_; | |
877 | |
878 throw("Bio::EnsEMBL::ArchiveStableId object expected.") | |
879 unless ($archive_id and ref($archive_id) and | |
880 $archive_id->isa('Bio::EnsEMBL::ArchiveStableId')); | |
881 | |
882 return $self->{'sorted_tree'}->{'coords'}->{$self->_node_id($archive_id)} | |
883 || []; | |
884 } | |
885 | |
886 | |
887 =head2 calculate_coords | |
888 | |
889 Example : $history->calculate_coords; | |
890 Description : Pre-calculates the grid coordinates of all nodes in the tree. | |
891 Return type : none | |
892 Exceptions : none | |
893 Caller : ArchiveStableIdAdaptor::fetch_history_by_stable_id | |
894 Status : At Risk | |
895 : under development | |
896 | |
897 =cut | |
898 | |
899 sub calculate_coords { | |
900 my $self = shift; | |
901 | |
902 # reset any previous tree cordinate calculations | |
903 $self->reset_tree; | |
904 | |
905 # the "master" information for the sorted tree is stored as the sorted lists | |
906 # of releases (x) and stable IDs (y). Sort them now. | |
907 my $db_names = $self->get_release_db_names; | |
908 | |
909 # untangle tree by sorting stable IDs appropriately | |
910 $self->optimise_tree; | |
911 my $stable_ids = $self->get_unique_stable_ids; | |
912 | |
913 # for performance reasons, additionally store coordinates in a lookup hash | |
914 foreach my $archive_id (@{ $self->get_all_ArchiveStableIds }) { | |
915 | |
916 # coordinates are positions in the sorted lists | |
917 my $x = $self->_index_of($archive_id->db_name, $db_names); | |
918 my $y = $self->_index_of($archive_id->stable_id, $stable_ids); | |
919 | |
920 $self->{'sorted_tree'}->{'coords'}->{$self->_node_id($archive_id)} = | |
921 [ $x, $y ]; | |
922 } | |
923 } | |
924 | |
925 # | |
926 # Description : Returns the index of an element in an array | |
927 # Example : my @array = (a, b, c); | |
928 # my $i = _index_of('b', \@array); # will return 1 | |
929 # Return type : Int (or undef if element is not found in array) | |
930 # | |
931 sub _index_of { | |
932 my ($self, $element, $arrayref) = @_; | |
933 | |
934 throw("Expecting arrayref argument.") unless (ref($arrayref) eq 'ARRAY'); | |
935 | |
936 my @array = @$arrayref; | |
937 | |
938 while (my $e = pop(@array)) { | |
939 return scalar(@array) if ($e eq $element); | |
940 } | |
941 | |
942 return undef; | |
943 } | |
944 | |
945 | |
946 =head2 consolidate_tree | |
947 | |
948 Example : $history->consolidate_tree; | |
949 Description : Consolidate the history tree. This means removing nodes where | |
950 there wasn't a change and bridging gaps in the history. The end | |
951 result will be a sparse tree which only contains the necessary | |
952 information. | |
953 Return type : none | |
954 Exceptions : none | |
955 Caller : ArchiveStableIdAdaptor->fetch_history_tree_by_stable_id | |
956 Status : At Risk | |
957 : under development | |
958 | |
959 =cut | |
960 | |
961 sub consolidate_tree { | |
962 my $self = shift; | |
963 | |
964 # | |
965 # get all self-events and creations/deletions and sort them (by stable ID and | |
966 # chronologically) | |
967 # | |
968 my @event_lookup; | |
969 | |
970 foreach my $event (@{ $self->get_all_StableIdEvents }) { | |
971 | |
972 my $old_id = $event->old_ArchiveStableId; | |
973 my $new_id = $event->new_ArchiveStableId; | |
974 | |
975 if (!$old_id or !$new_id or ($old_id->stable_id eq $new_id->stable_id)) { | |
976 if ($old_id) { | |
977 push @event_lookup, [$old_id->stable_id, $old_id->release, | |
978 $old_id->db_name, $event]; | |
979 } else { | |
980 push @event_lookup, [$new_id->stable_id, $new_id->release - 1, | |
981 $new_id->db_name, $event]; | |
982 } | |
983 } | |
984 } | |
985 | |
986 my @self_events = map { $_->[3] } | |
987 sort { $a->[0] cmp $b->[0] || $a->[1] <=> $b->[1] || $a->[2] cmp $b->[2] } | |
988 @event_lookup; | |
989 | |
990 # | |
991 # consolidate tree | |
992 # | |
993 my $last = shift(@self_events); | |
994 | |
995 while (my $event = shift(@self_events)) { | |
996 | |
997 my $lo = $last->old_ArchiveStableId; | |
998 my $ln = $last->new_ArchiveStableId; | |
999 my $eo = $event->old_ArchiveStableId; | |
1000 my $en = $event->new_ArchiveStableId; | |
1001 | |
1002 if ($lo and $eo and $en and $lo->stable_id eq $eo->stable_id | |
1003 and $lo->version eq $eo->version) { | |
1004 | |
1005 # this removes redundant nodes and connects the remaining nodes: | |
1006 # | |
1007 # o--o--o -> o-----o | |
1008 # 1 1 1 1 1 | |
1009 | |
1010 #warn 'A: '.$last->ident_string.' | '.$event->ident_string."\n"; | |
1011 | |
1012 $self->remove_StableIdEvent($last); | |
1013 $self->remove_StableIdEvent($event); | |
1014 | |
1015 $event->old_ArchiveStableId($lo); | |
1016 | |
1017 $self->add_StableIdEvents($event); | |
1018 | |
1019 } elsif ($ln and $eo and $ln->db_name ne $eo->db_name | |
1020 and $ln->stable_id eq $eo->stable_id and $ln->version eq $eo->version) { | |
1021 | |
1022 # try to brigde gaps | |
1023 | |
1024 if ($en) { | |
1025 | |
1026 # o--o o--o -> o--o-----o | |
1027 # 1 2 2 2 1 2 2 | |
1028 # | |
1029 # o o--o -> o-----o | |
1030 # 1 1 1 1 1 | |
1031 | |
1032 #warn 'X: '.$last->ident_string.' | '.$event->ident_string."\n"; | |
1033 | |
1034 $self->remove_StableIdEvent($event); | |
1035 $event->old_ArchiveStableId($ln); | |
1036 $self->add_StableIdEvents($event); | |
1037 | |
1038 } elsif ($lo) { | |
1039 | |
1040 # there's a deletion event, deal with it differently | |
1041 | |
1042 if ($lo->version eq $ln->version) { | |
1043 | |
1044 # o--o o -> o-----o | |
1045 # 1 1 1 1 1 | |
1046 | |
1047 #warn 'Y: '.$last->ident_string.' | '.$event->ident_string."\n"; | |
1048 | |
1049 $self->remove_StableIdEvent($last); | |
1050 $last->new_ArchiveStableId($eo); | |
1051 $self->add_StableIdEvents($last); | |
1052 | |
1053 } else { | |
1054 | |
1055 # o--o o -> o--o--o | |
1056 # 1 2 2 1 2 2 | |
1057 | |
1058 #warn 'Z: '.$last->ident_string.' | '.$event->ident_string."\n"; | |
1059 | |
1060 $self->remove_StableIdEvent($event); | |
1061 $event->old_ArchiveStableId($ln); | |
1062 $event->new_ArchiveStableId($eo); | |
1063 $self->add_StableIdEvents($event); | |
1064 | |
1065 } | |
1066 | |
1067 } else { | |
1068 | |
1069 # creation followed by deletion in next mapping | |
1070 # | |
1071 # o o -> o--o | |
1072 # 1 1 1 1 | |
1073 | |
1074 #warn 'Q: '.$last->ident_string.' | '.$event->ident_string."\n"; | |
1075 | |
1076 $self->remove_StableIdEvent($last); | |
1077 $self->remove_StableIdEvent($event); | |
1078 $event->old_ArchiveStableId($ln); | |
1079 $event->new_ArchiveStableId($eo); | |
1080 $self->add_StableIdEvents($event); | |
1081 | |
1082 } | |
1083 | |
1084 } else { | |
1085 #warn 'C: '.$last->ident_string.' | '.$event->ident_string."\n"; | |
1086 } | |
1087 | |
1088 $last = $event; | |
1089 } | |
1090 | |
1091 # now add ArchiveStableIds of the remaining events to the tree | |
1092 $self->add_ArchiveStableIds_for_events; | |
1093 } | |
1094 | |
1095 | |
1096 =head2 reset_tree | |
1097 | |
1098 Example : $history->reset_tree; | |
1099 Description : Resets all pre-calculated tree grid data. Mostly used internally | |
1100 by methods that modify the tree. | |
1101 Return type : none | |
1102 Exceptions : none | |
1103 Caller : internal | |
1104 Status : At Risk | |
1105 : under development | |
1106 | |
1107 =cut | |
1108 | |
1109 sub reset_tree { | |
1110 my $self = shift; | |
1111 $self->{'sorted_tree'} = undef; | |
1112 } | |
1113 | |
1114 | |
1115 =head2 current_dbname | |
1116 | |
1117 Arg[1] : (optional) String $dbname - the dbname to set | |
1118 Example : my $dbname = $history->current_dbname; | |
1119 Description : Getter/setter for current dbname. | |
1120 Return type : String | |
1121 Exceptions : none | |
1122 Caller : general | |
1123 Status : At Risk | |
1124 : under development | |
1125 | |
1126 =cut | |
1127 | |
1128 sub current_dbname { | |
1129 my $self = shift; | |
1130 $self->{'current_dbname'} = shift if (@_); | |
1131 return $self->{'current_dbname'}; | |
1132 } | |
1133 | |
1134 | |
1135 =head2 current_release | |
1136 | |
1137 Arg[1] : (optional) Int $release - the release to set | |
1138 Example : my $release = $history->current_release; | |
1139 Description : Getter/setter for current release. | |
1140 Return type : Int | |
1141 Exceptions : none | |
1142 Caller : general | |
1143 Status : At Risk | |
1144 : under development | |
1145 | |
1146 =cut | |
1147 | |
1148 sub current_release { | |
1149 my $self = shift; | |
1150 $self->{'current_release'} = shift if (@_); | |
1151 return $self->{'current_release'}; | |
1152 } | |
1153 | |
1154 | |
1155 =head2 current_assembly | |
1156 | |
1157 Arg[1] : (optional) String $assembly - the assembly to set | |
1158 Example : my $assembly = $history->current_assembly; | |
1159 Description : Getter/setter for current assembly. | |
1160 Return type : String | |
1161 Exceptions : none | |
1162 Caller : general | |
1163 Status : At Risk | |
1164 : under development | |
1165 | |
1166 =cut | |
1167 | |
1168 sub current_assembly { | |
1169 my $self = shift; | |
1170 $self->{'current_assembly'} = shift if (@_); | |
1171 return $self->{'current_assembly'}; | |
1172 } | |
1173 | |
1174 | |
1175 =head2 is_incomplete | |
1176 | |
1177 Arg[1] : (optional) Boolean $incomplete | |
1178 Example : if ($history->is_incomplete) { | |
1179 print "Returned tree is incomplete due to too many mappings | |
1180 in the database.\n"; | |
1181 } | |
1182 Description : Getter/setter for incomplete flag. This is used by | |
1183 ArchiveStableIdAdaptor to indicate that it finished building | |
1184 the tree prematurely due to too many mappins in the db and can | |
1185 be used by applications to print warning messages. | |
1186 Return type : Boolean | |
1187 Exceptions : none | |
1188 Caller : general | |
1189 Status : At Risk | |
1190 : under development | |
1191 | |
1192 =cut | |
1193 | |
1194 sub is_incomplete { | |
1195 my $self = shift; | |
1196 $self->{'incomplete'} = shift if (@_); | |
1197 return $self->{'incomplete'}; | |
1198 } | |
1199 | |
1200 | |
1201 1; | |
1202 |