0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 Bio::EnsEMBL::StableIdHistoryTree - object representing a stable ID history tree
|
|
24
|
|
25 =head1 SYNOPSIS
|
|
26
|
|
27 my $reg = "Bio::EnsEMBL::Registry";
|
|
28 my $archiveStableIdAdaptor =
|
|
29 $reg->get_adaptor( 'human', 'core', 'ArchiveStableId' );
|
|
30
|
|
31 my $stable_id = 'ENSG00000068990';
|
|
32 my $history =
|
|
33 $archiveStableIdAdaptor->fetch_history_tree_by_stable_id('ENSG01');
|
|
34
|
|
35 print "Unique stable IDs in this tree:\n";
|
|
36 print join( ", ", @{ $history->get_unique_stable_ids } ), "\n";
|
|
37
|
|
38 print "\nReleases in this tree:\n";
|
|
39 print join( ", ", @{ $history->get_release_display_names } ), "\n";
|
|
40
|
|
41 print "\nCoordinates of nodes in the tree:\n\n";
|
|
42 foreach my $a ( @{ $history->get_all_ArchiveStableIds } ) {
|
|
43 print " Stable ID: " . $a->stable_id . "." . $a->version . "\n";
|
|
44 print " Release: "
|
|
45 . $a->release . " ("
|
|
46 . $a->assembly . ", "
|
|
47 . $a->db_name . ")\n";
|
|
48 print " coords: "
|
|
49 . join( ', ', @{ $history->coords_by_ArchiveStableId($a) } )
|
|
50 . "\n\n";
|
|
51 }
|
|
52
|
|
53 =head1 DESCRIPTION
|
|
54
|
|
55 This object represents a stable ID history tree graph.
|
|
56
|
|
57 The graph is implemented as a collection of nodes (ArchiveStableId
|
|
58 objects) and links (StableIdEvent objects) which have positions
|
|
59 on an (x,y) grid. The x axis is used for releases, the y axis for
|
|
60 stable_ids. The idea is to create a plot similar to this (the numbers
|
|
61 shown on the nodes are the stable ID versions):
|
|
62
|
|
63 ENSG001 1-------------- 2--
|
|
64 \
|
|
65 ENSG003 1-----1
|
|
66 /
|
|
67 ENSG002 1-------2----------
|
|
68
|
|
69 38 39 40 41 42
|
|
70
|
|
71 The grid coordinates of the ArchiveStableId objects in this example
|
|
72 would be (note that coordinates are zero-based):
|
|
73
|
|
74 ENSG001.1 (0, 0)
|
|
75 ENSG001.2 (2, 0)
|
|
76 ENSG003.1 (release 41) (3, 1)
|
|
77 ENSG003.1 (release 42) (4, 1)
|
|
78 ENSG002.1 (0, 2)
|
|
79 ENSG002.2 (1, 2)
|
|
80
|
|
81 The tree will only contain those nodes which had a change in the stable
|
|
82 ID version. Therefore, in the above example, in release 39 ENSG001 was
|
|
83 present and had version 1 (but will not be drawn there, to unclutter the
|
|
84 output).
|
|
85
|
|
86 The grid positions will be calculated by the API and will try to
|
|
87 untangle the tree (i.e. try to avoid overlapping lines).
|
|
88
|
|
89 =head1 METHODS
|
|
90
|
|
91 new
|
|
92 add_ArchiveStableIds
|
|
93 add_ArchiveStableIds_for_events
|
|
94 remove_ArchiveStableId
|
|
95 flush_ArchiveStableIds
|
|
96 add_StableIdEvents
|
|
97 remove_StableIdEvent
|
|
98 flush_StableIdEvents
|
|
99 get_all_ArchiveStableIds
|
|
100 get_all_StableIdEvents
|
|
101 get_latest_StableIdEvent
|
|
102 get_release_display_names
|
|
103 get_release_db_names
|
|
104 get_unique_stable_ids
|
|
105 optimise_tree
|
|
106 coords_by_ArchiveStableId
|
|
107 calculate_coords
|
|
108 consolidate_tree
|
|
109 reset_tree
|
|
110 current_dbname
|
|
111 current_release
|
|
112 current_assembly
|
|
113 is_incomplete
|
|
114
|
|
115 =head1 RELATED MODULES
|
|
116
|
|
117 Bio::EnsEMBL::ArchiveStableId
|
|
118 Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor
|
|
119 Bio::EnsEMBL::StableIdEvent
|
|
120
|
|
121 =cut
|
|
122
|
|
123 package Bio::EnsEMBL::StableIdHistoryTree;
|
|
124
|
|
125 use strict;
|
|
126 use warnings;
|
|
127 no warnings 'uninitialized';
|
|
128
|
|
129 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
|
|
130 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
|
|
131
|
|
132
|
|
133 =head2 new
|
|
134
|
|
135 Arg [CURRENT_DBNAME] : (optional) name of current db
|
|
136 Arg [CURRENT_RELEASE] : (optional) current release number
|
|
137 Arg [CURRENT_ASSEMBLY] : (optional) current assembly name
|
|
138 Example : my $history = Bio::EnsEMBL::StableIdHistoryTree->new;
|
|
139 Description : object constructor
|
|
140 Return type : Bio::EnsEMBL::StableIdHistoryTree
|
|
141 Exceptions : none
|
|
142 Caller : general
|
|
143 Status : At Risk
|
|
144 : under development
|
|
145
|
|
146 =cut
|
|
147
|
|
148 sub new {
|
|
149 my $caller = shift;
|
|
150 my $class = ref($caller) || $caller;
|
|
151
|
|
152 my $self = {};
|
|
153 bless $self, $class;
|
|
154
|
|
155 my ($current_dbname, $current_release, $current_assembly) =
|
|
156 rearrange([qw( CURRENT_DBNAME CURRENT_RELEASE CURRENT_ASSEMBLY )], @_ );
|
|
157
|
|
158 # initialise
|
|
159 $self->{'current_dbname'} = $current_dbname;
|
|
160 $self->{'current_release'} = $current_release;
|
|
161 $self->{'current_assembly'} = $current_assembly;
|
|
162
|
|
163 return $self;
|
|
164 }
|
|
165
|
|
166
|
|
167 =head2 add_ArchiveStableIds
|
|
168
|
|
169 Arg[1..n] : Bio::EnsEMBL::ArchiveStableId's @archive_ids
|
|
170 The ArchiveStableIds to add to the the history tree
|
|
171 Example : my $archive_id = $archiveStableIdAdaptor->fetch_by_stable_id(
|
|
172 'ENSG00024808');
|
|
173 $history->add_ArchiveStableId($archive_id);
|
|
174 Description : Adds ArchiveStableIds (nodes) to the history tree. No
|
|
175 calculation of grid coordinates is done at this point, you need
|
|
176 to initiate this manually with calculate_coords().
|
|
177 ArchiveStableIds are only added once for each release (to avoid
|
|
178 duplicates).
|
|
179 Return type : none
|
|
180 Exceptions : thrown on invalid or missing argument
|
|
181 Caller : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::fetch_history_by_stable_id, general
|
|
182 Status : At Risk
|
|
183 : under development
|
|
184
|
|
185 =cut
|
|
186
|
|
187 sub add_ArchiveStableIds {
|
|
188 my ($self, @archive_ids) = @_;
|
|
189
|
|
190 throw("You must provide one or more Bio::EnsEMBL::ArchiveStableIds to add.")
|
|
191 unless (@archive_ids);
|
|
192
|
|
193 foreach my $archive_id (@archive_ids) {
|
|
194 throw("Bio::EnsEMBL::ArchiveStableId object expected.")
|
|
195 unless (ref($archive_id) &&
|
|
196 $archive_id->isa('Bio::EnsEMBL::ArchiveStableId'));
|
|
197
|
|
198 $self->{'nodes'}->{$self->_node_id($archive_id)} = $archive_id;
|
|
199 }
|
|
200 }
|
|
201
|
|
202
|
|
203 =head2 add_ArchiveStableIds_for_events
|
|
204
|
|
205 Example : my $history = Bio::EnsEMBL::StableIdHistoryTree->new;
|
|
206 $history->add_StableIdEvents($event1, $event2);
|
|
207 $history->add_ArchiveStableIds_for_events;
|
|
208 Description : Convenience method that adds all ArchiveStableIds for all
|
|
209 StableIdEvents attached to this object to the tree.
|
|
210 Return type : none
|
|
211 Exceptions : none
|
|
212 Caller : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::fetch_history_by_stable_id, general
|
|
213 Status : At Risk
|
|
214 : under development
|
|
215
|
|
216 =cut
|
|
217
|
|
218 sub add_ArchiveStableIds_for_events {
|
|
219 my $self = shift;
|
|
220
|
|
221 foreach my $event (@{ $self->get_all_StableIdEvents }) {
|
|
222 if ($event->old_ArchiveStableId) {
|
|
223 $self->add_ArchiveStableIds($event->old_ArchiveStableId);
|
|
224 }
|
|
225 if ($event->new_ArchiveStableId) {
|
|
226 $self->add_ArchiveStableIds($event->new_ArchiveStableId);
|
|
227 }
|
|
228 }
|
|
229 }
|
|
230
|
|
231
|
|
232 =head2 remove_ArchiveStableId
|
|
233
|
|
234 Arg[1] : Bio::EnsEMBL::ArchiveStableId $archive_id
|
|
235 the ArchiveStableId to remove from the tree
|
|
236 Example : $history->remove_ArchiveStableId($archive_id);
|
|
237 Description : Removes an ArchiveStableId from the tree.
|
|
238 Return type : none
|
|
239 Exceptions : thrown on missing or invalid argument
|
|
240 Caller : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::fetch_history_by_stable_id, general
|
|
241 Status : At Risk
|
|
242 : under development
|
|
243
|
|
244 =cut
|
|
245
|
|
246 sub remove_ArchiveStableId {
|
|
247 my ($self, $archive_id) = @_;
|
|
248
|
|
249 throw("Bio::EnsEMBL::ArchiveStableId object expected.")
|
|
250 unless ($archive_id && ref($archive_id) &&
|
|
251 $archive_id->isa('Bio::EnsEMBL::ArchiveStableId'));
|
|
252
|
|
253 my %nodes = %{ $self->{'nodes'} };
|
|
254 delete $nodes{$self->_node_id($archive_id)};
|
|
255 $self->{'nodes'} = \%nodes;
|
|
256 }
|
|
257
|
|
258
|
|
259 =head2 flush_ArchiveStableIds
|
|
260
|
|
261 Example : $history->flush_ArchiveStableIds;
|
|
262 Description : Remove all ArchiveStableIds from the tree.
|
|
263 Return type : none
|
|
264 Exceptions : none
|
|
265 Caller : general
|
|
266 Status : At Risk
|
|
267 : under development
|
|
268
|
|
269 =cut
|
|
270
|
|
271 sub flush_ArchiveStableIds {
|
|
272 my $self = shift;
|
|
273 $self->{'nodes'} = undef;
|
|
274 }
|
|
275
|
|
276
|
|
277 #
|
|
278 # generate a unique node identifier
|
|
279 #
|
|
280 sub _node_id {
|
|
281 my ($self, $archive_id) = @_;
|
|
282 return $archive_id->stable_id . ':' . $archive_id->db_name;
|
|
283 }
|
|
284
|
|
285
|
|
286 =head2 add_StableIdEvents
|
|
287
|
|
288 Arg[1..n] : Bio::EnsEMBL::StableIdEvent's @events
|
|
289 The StableIdEvents to add to the the history tree
|
|
290 Example : $history->add_StableIdEvents($event);
|
|
291 Description : Adds StableIdEvents (links) to the history tree. Note that
|
|
292 ArchiveStableIds attached to the StableIdEvent aren't added to
|
|
293 the tree automatically, you'll need to call
|
|
294 add_ArchiveStableIds_for_events later.
|
|
295 Return type : none
|
|
296 Exceptions : thrown on invalid or missing argument
|
|
297 Caller : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::fetch_history_by_stable_id, general
|
|
298 Status : At Risk
|
|
299 : under development
|
|
300
|
|
301 =cut
|
|
302
|
|
303 sub add_StableIdEvents {
|
|
304 my ($self, @events) = @_;
|
|
305
|
|
306 throw("You must provide one or more Bio::EnsEMBL::StableIdsEvents to add.")
|
|
307 unless (@events);
|
|
308
|
|
309 foreach my $event (@events) {
|
|
310 throw("Bio::EnsEMBL::StableIdEvent object expected.")
|
|
311 unless ($event->isa('Bio::EnsEMBL::StableIdEvent'));
|
|
312
|
|
313 $self->{'links'}->{$self->_link_id($event)} = $event;
|
|
314 }
|
|
315 }
|
|
316
|
|
317
|
|
318 =head2 remove_StableIdEvent
|
|
319
|
|
320 Arg[1] : Bio::EnsEMBL::StableIdEvent $event
|
|
321 the StableIdEvent to remove from the tree
|
|
322 Example : $history->remove_StableIdEvent($event);
|
|
323 Description : Removes a StableIdEvent from the tree.
|
|
324 Return type : none
|
|
325 Exceptions : thrown on missing or invalid arguments
|
|
326 Caller : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::fetch_history_by_stable_id, general
|
|
327 Status : At Risk
|
|
328 : under development
|
|
329
|
|
330 =cut
|
|
331
|
|
332 sub remove_StableIdEvent {
|
|
333 my ($self, $event) = @_;
|
|
334
|
|
335 throw("Bio::EnsEMBL::StableIdEvent object expected.") unless
|
|
336 ($event && ref($event) && $event->isa('Bio::EnsEMBL::StableIdEvent'));
|
|
337
|
|
338 my %links = %{ $self->{'links'} };
|
|
339 delete $links{$self->_link_id($event)};
|
|
340 $self->{'links'} = \%links;
|
|
341 }
|
|
342
|
|
343
|
|
344 =head2 flush_StableIdEvents
|
|
345
|
|
346 Example : $history->flush_StableIdEvents;
|
|
347 Description : Removes all StableIdEvents from the tree.
|
|
348 Return type : none
|
|
349 Exceptions : none
|
|
350 Caller : general
|
|
351 Status : At Risk
|
|
352 : under development
|
|
353
|
|
354 =cut
|
|
355
|
|
356 sub flush_StableIdEvents {
|
|
357 my $self = shift;
|
|
358 $self->{'links'} = undef;
|
|
359 }
|
|
360
|
|
361
|
|
362 #
|
|
363 # generate a unique link identifier
|
|
364 #
|
|
365 sub _link_id {
|
|
366 my ($self, $event) = @_;
|
|
367
|
|
368 my ($old_id, $old_db_name, $new_id, $new_db_name);
|
|
369 if ($event->old_ArchiveStableId) {
|
|
370 $old_id = $event->old_ArchiveStableId->stable_id;
|
|
371 $old_db_name = $event->old_ArchiveStableId->db_name;
|
|
372 }
|
|
373 if ($event->new_ArchiveStableId) {
|
|
374 $new_id = $event->new_ArchiveStableId->stable_id;
|
|
375 $new_db_name = $event->new_ArchiveStableId->db_name;
|
|
376 }
|
|
377
|
|
378 return join(':', $old_id, $old_db_name, $new_id, $new_db_name);
|
|
379 }
|
|
380
|
|
381
|
|
382 =head2 get_all_ArchiveStableIds
|
|
383
|
|
384 Example : foreach my $arch_id (@{ $history->get_all_ArchiveStableIds }) {
|
|
385 print $arch_id->stable_id, '.', $arch_id->version, "\n";
|
|
386 }
|
|
387 Description : Gets all ArchiveStableIds (nodes) in this tree.
|
|
388 Return type : Arrayref of Bio::EnsEMBL::ArchiveStableId objects
|
|
389 Exceptions : none
|
|
390 Caller : general
|
|
391 Status : At Risk
|
|
392 : under development
|
|
393
|
|
394 =cut
|
|
395
|
|
396 sub get_all_ArchiveStableIds {
|
|
397 my $self = shift;
|
|
398 return [ values %{ $self->{'nodes'} } ];
|
|
399 }
|
|
400
|
|
401
|
|
402 =head2 get_all_current_ArchiveStableIds
|
|
403
|
|
404 Example : foreach my $arch_id (@{ $history->get_all_current_ArchiveStableIds }) {
|
|
405 print $arch_id->stable_id, '.', $arch_id->version, "\n";
|
|
406 }
|
|
407 Description : Convenience method to get all current ArchiveStableIds in this
|
|
408 tree.
|
|
409
|
|
410 Note that no lazy loading of "current" status is done at that
|
|
411 stage; as long as you retrieve your StableIdHistoryTree object
|
|
412 from ArchiveStableIdAdaptor, you'll get the right answer. In
|
|
413 other use cases, if you want to make sure you really get all
|
|
414 current stable IDs, loop over the result of
|
|
415 get_all_ArchiveStableIds() and call
|
|
416 ArchiveStableId->current_version() on all of them.
|
|
417 Return type : Arrayref of Bio::EnsEMBL::ArchiveStableId objects
|
|
418 Exceptions : none
|
|
419 Caller : general
|
|
420 Status : At Risk
|
|
421 : under development
|
|
422
|
|
423 =cut
|
|
424
|
|
425 sub get_all_current_ArchiveStableIds {
|
|
426 my $self = shift;
|
|
427
|
|
428 my @current = ();
|
|
429
|
|
430 foreach my $arch_id (@{ $self->get_all_ArchiveStableIds }) {
|
|
431 push @current, $arch_id if ($arch_id->is_current);
|
|
432 }
|
|
433
|
|
434 return \@current;
|
|
435 }
|
|
436
|
|
437
|
|
438 =head2 get_all_StableIdEvents
|
|
439
|
|
440 Example : foreach my $event (@{ $history->get_all_StableIdsEvents }) {
|
|
441 print "Old stable ID: ",
|
|
442 ($event->get_attribute('old', 'stable_id') or 'none'), "\n";
|
|
443 print "New stable ID: ",
|
|
444 ($event->get_attribute('new', 'stable_id') or 'none'), "\n";
|
|
445 print "Mapping score: ", $event->score, "\n";
|
|
446 }
|
|
447 Description : Gets all StableIdsEvents (links) in this tree.
|
|
448 Return type : Arrayref of Bio::EnsEMBL::StableIdEvent objects
|
|
449 Exceptions : none
|
|
450 Caller : general
|
|
451 Status : At Risk
|
|
452 : under development
|
|
453
|
|
454 =cut
|
|
455
|
|
456 sub get_all_StableIdEvents {
|
|
457 my $self = shift;
|
|
458 return [ values %{ $self->{'links'} } ];
|
|
459 }
|
|
460
|
|
461
|
|
462 =head2 get_latest_StableIdEvent
|
|
463
|
|
464 Arg[1] : Bio::EnsEMBL::ArchiveStableId $arch_id - the stable ID to get
|
|
465 the latest Event for
|
|
466 Example : my $arch_id = Bio::EnsEMBL::ArchiveStableId->new(
|
|
467 -stable_id => 'ENSG00001'
|
|
468 );
|
|
469 my $event = $history->get_latest_Event($arch_id);
|
|
470 Description : Returns the latest StableIdEvent found in the tree where a given
|
|
471 stable ID is the new stable ID. If more than one is found (e.g.
|
|
472 in a merge scenario in the latest mapping), preference is given
|
|
473 to self-events.
|
|
474 Return type : Bio::EnsEMBL::StableIdEvent
|
|
475 Exceptions : thrown on missing or wrong argument
|
|
476 Caller : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::add_all_current_to_history, general
|
|
477 Status : At Risk
|
|
478 : under development
|
|
479
|
|
480 =cut
|
|
481
|
|
482 sub get_latest_StableIdEvent {
|
|
483 my $self = shift;
|
|
484 my $arch_id = shift;
|
|
485
|
|
486 unless ($arch_id and $arch_id->isa('Bio::EnsEMBL::ArchiveStableId')) {
|
|
487 throw("Need a Bio::EnsEMBL::ArchiveStableId.");
|
|
488 }
|
|
489
|
|
490 my @all_events = @{ $self->get_all_StableIdEvents };
|
|
491 my @self_events = ();
|
|
492
|
|
493 while (my $event = shift(@all_events)) {
|
|
494 if ($event->new_ArchiveStableId and
|
|
495 $event->new_ArchiveStableId->stable_id eq $arch_id->stable_id) {
|
|
496 push @self_events, $event;
|
|
497 }
|
|
498 }
|
|
499
|
|
500 my @sorted = sort { $b->new_ArchiveStableId->release <=>
|
|
501 $a->new_ArchiveStableId->release } @self_events;
|
|
502
|
|
503 # give priority to self events
|
|
504 my $latest;
|
|
505 while ($latest = shift @sorted) {
|
|
506 last if (($latest->old_ArchiveStableId and
|
|
507 $latest->old_ArchiveStableId->stable_id eq $arch_id->stable_id)
|
|
508 or !$latest->old_ArchiveStableId);
|
|
509 }
|
|
510
|
|
511 return $latest;
|
|
512 }
|
|
513
|
|
514
|
|
515 =head2 get_release_display_names
|
|
516
|
|
517 Example : print "Unique release display_names in this tree:\n"
|
|
518 foreach my $name (@{ $history->get_release_display_names }) {
|
|
519 print " $name\n";
|
|
520 }
|
|
521 Description : Returns a chronologically sorted list of unique release
|
|
522 display_names in this tree.
|
|
523
|
|
524 This method can be used to determine the number of columns when
|
|
525 plotting the history tree.
|
|
526 Return type : Arrayref of strings.
|
|
527 Exceptions : none
|
|
528 Caller : general
|
|
529 Status : At Risk
|
|
530 : under development
|
|
531
|
|
532 =cut
|
|
533
|
|
534 sub get_release_display_names {
|
|
535 my $self = shift;
|
|
536
|
|
537 my @display_names = map { $_->[1] } @{ $self->_sort_releases };
|
|
538
|
|
539 return \@display_names;
|
|
540 }
|
|
541
|
|
542
|
|
543 =head2 get_release_db_names
|
|
544
|
|
545 Example : print "Unique release db_names in this tree:\n"
|
|
546 foreach my $name (@{ $history->get_release_db_names }) {
|
|
547 print " $name\n";
|
|
548 }
|
|
549 Description : Returns a chronologically sorted list of unique release
|
|
550 db_names in this tree.
|
|
551 Return type : Arrayref of strings.
|
|
552 Exceptions : none
|
|
553 Caller : general
|
|
554 Status : At Risk
|
|
555 : under development
|
|
556
|
|
557 =cut
|
|
558
|
|
559 sub get_release_db_names {
|
|
560 my $self = shift;
|
|
561
|
|
562 my @db_names = map { $_->[0] } @{ $self->_sort_releases };
|
|
563
|
|
564 return \@db_names;
|
|
565 }
|
|
566
|
|
567
|
|
568 #
|
|
569 # Create a chronologically sorted list of releases.
|
|
570 #
|
|
571 # Return type : Arrayref of arrayrefs (db_name, release)
|
|
572 #
|
|
573 sub _sort_releases {
|
|
574 my $self = shift;
|
|
575
|
|
576 unless ($self->{'sorted_tree'}->{'releases'}) {
|
|
577
|
|
578 my %unique = ();
|
|
579
|
|
580 foreach my $archive_id (@{ $self->get_all_ArchiveStableIds }) {
|
|
581 $unique{join(':', $archive_id->db_name, $archive_id->release)} = 1;
|
|
582 }
|
|
583
|
|
584 # sort releases by release number, then db_name; this should get them into
|
|
585 # chronological order
|
|
586 my @releases = sort { $a->[1] <=> $b->[1] || $a->[0] cmp $b->[0] }
|
|
587 map { [ split(/:/, $_) ] } keys(%unique);
|
|
588
|
|
589 $self->{'sorted_tree'}->{'releases'} = \@releases;
|
|
590
|
|
591 }
|
|
592
|
|
593 return $self->{'sorted_tree'}->{'releases'};
|
|
594 }
|
|
595
|
|
596
|
|
597 =head2 get_unique_stable_ids
|
|
598
|
|
599 Example : print "Unique stable IDs in this tree:\n"
|
|
600 foreach my $id (@{ $history->get_unique_stable_ids }) {
|
|
601 print " $id\n";
|
|
602 }
|
|
603 Description : Returns a list of unique stable IDs in this tree. Version is not
|
|
604 taken into account here. This method can be used to determine
|
|
605 the number of rows when plotting the history with each stable ID
|
|
606 occupying one line.
|
|
607
|
|
608 Sort algorithm will depend on what was chosen when the sorted
|
|
609 tree was generated. This ranges from a simple alphanumeric sort
|
|
610 to algorithms trying to untangle the history tree. If no
|
|
611 pre-sorted data is found, an alphanumerically sorted list will
|
|
612 be returned by default.
|
|
613 Return type : Arrayref of strings.
|
|
614 Exceptions : none
|
|
615 Caller : general
|
|
616 Status : At Risk
|
|
617 : under development
|
|
618
|
|
619 =cut
|
|
620
|
|
621 sub get_unique_stable_ids {
|
|
622 my $self = shift;
|
|
623
|
|
624 unless ($self->{'sorted_tree'}->{'stable_ids'}) {
|
|
625 $self->{'sorted_tree'}->{'stable_ids'} = $self->_sort_stable_ids;
|
|
626 }
|
|
627
|
|
628 return $self->{'sorted_tree'}->{'stable_ids'};
|
|
629 }
|
|
630
|
|
631
|
|
632 #
|
|
633 # Returns a list of stable IDs in this history tree, sorted alphabetically.
|
|
634 # This is the simplest sort function used and doesn't try to untangle the tree.
|
|
635 #
|
|
636 # Return type : Arrayref
|
|
637 #
|
|
638 sub _sort_stable_ids {
|
|
639 my $self = shift;
|
|
640 my %unique = map { $_->stable_id => 1 } @{ $self->get_all_ArchiveStableIds };
|
|
641 return [sort keys %unique];
|
|
642 }
|
|
643
|
|
644
|
|
645 =head2 optimise_tree
|
|
646
|
|
647 Example : $history->optimise_tree;
|
|
648 Description : This method sorts the history tree so that the number of
|
|
649 overlapping branches is minimised (thus "untangling" the tree).
|
|
650
|
|
651 It uses a clustering algorithm for this which iteratively moves
|
|
652 the nodes with the largest vertical distance next to each other
|
|
653 and looking for a mininum in total branch length. This might not
|
|
654 produce the overall optimum but usually converges on a local
|
|
655 optimum very quickly.
|
|
656 Return type : none
|
|
657 Exceptions : none
|
|
658 Caller : calculate_coords
|
|
659 Status : At Risk
|
|
660 : under development
|
|
661
|
|
662 =cut
|
|
663
|
|
664 sub optimise_tree {
|
|
665 my $self = shift;
|
|
666
|
|
667 # get all non-self events
|
|
668 my @links;
|
|
669 foreach my $event (@{ $self->get_all_StableIdEvents }) {
|
|
670 next unless ($event->old_ArchiveStableId and $event->new_ArchiveStableId);
|
|
671 my $old_id = $event->old_ArchiveStableId->stable_id;
|
|
672 my $new_id = $event->new_ArchiveStableId->stable_id;
|
|
673 push @links, [$old_id, $new_id] if ($old_id ne $new_id);
|
|
674 }
|
|
675
|
|
676 # get initial list of sorted unique stable IDs and put them into a position
|
|
677 # lookup hash
|
|
678 my $i = 0;
|
|
679 my %pos = map { $_ => $i++ } @{ $self->_sort_stable_ids };
|
|
680
|
|
681 my $opt_length;
|
|
682 my $successive_fails = 0;
|
|
683 my $k = 0;
|
|
684 my %seen;
|
|
685
|
|
686 # for debug purposes:
|
|
687 # find the number of permutations for the given number of stable IDs
|
|
688 my $fact = $self->_factorial(scalar(keys %pos));
|
|
689
|
|
690 OPT:
|
|
691 while ($successive_fails < 100) {
|
|
692
|
|
693 # sort links by vertical distance
|
|
694 #warn "sorting\n";
|
|
695 $self->_sort_links(\@links, \%pos);
|
|
696
|
|
697 # loop over sorted links
|
|
698 SORTED:
|
|
699 foreach my $link (@links) {
|
|
700
|
|
701 #warn " trying ".join('-', @$link)."\n";
|
|
702
|
|
703 $k++;
|
|
704
|
|
705 # remember last sort order
|
|
706 my %last = %pos;
|
|
707
|
|
708 #my $this_order = join(':', sort { $pos{$a} <=> $pos{$b} } keys %pos);
|
|
709 #warn " before $this_order\n";
|
|
710
|
|
711 # try both to move bottom node next to top node's current position and
|
|
712 # top node next to bottom node's position - one of the methods might give
|
|
713 # you better results
|
|
714 DIRECT:
|
|
715 foreach my $direction (qw(up down)) {
|
|
716
|
|
717 # move the nodes next to each other
|
|
718 $self->_move_nodes($link, \%pos, $direction);
|
|
719
|
|
720 # next if we've seen this sort order before
|
|
721 my $new_order = join(':', sort { $pos{$a} <=> $pos{$b} } keys %pos);
|
|
722 #warn " after ($direction) $new_order\n";
|
|
723 if ($seen{$new_order}) {
|
|
724 #warn " seen\n";
|
|
725 %pos = %last;
|
|
726 next DIRECT;
|
|
727 }
|
|
728 $seen{$new_order} = 1;
|
|
729
|
|
730 # calculate total link length for this sort order
|
|
731 my $total_length = $self->_total_link_length(\@links, \%pos);
|
|
732
|
|
733 if (!$opt_length or $total_length < $opt_length) {
|
|
734 #warn " better ($total_length/$opt_length)\n";
|
|
735 $opt_length = $total_length;
|
|
736 $successive_fails = 0;
|
|
737 next OPT;
|
|
738 } else {
|
|
739 #warn " worse ($total_length/$opt_length)\n";
|
|
740 %pos = %last;
|
|
741 $successive_fails++;
|
|
742 }
|
|
743 }
|
|
744
|
|
745 }
|
|
746
|
|
747 last OPT;
|
|
748
|
|
749 }
|
|
750
|
|
751 #warn "Needed $k tries (of $fact) to find optimal tree.\n";
|
|
752
|
|
753 my @best = sort { $pos{$a} <=> $pos{$b} } keys %pos;
|
|
754 $self->{'sorted_tree'}->{'stable_ids'} = \@best;
|
|
755 }
|
|
756
|
|
757
|
|
758 #
|
|
759 # find the number of permutations for a give array size.
|
|
760 # used for debugging code (compare implemented algorithm to looping over all
|
|
761 # possible permutations).
|
|
762 #
|
|
763 sub _factorial {
|
|
764 my ($self, $n) = @_;
|
|
765 my $s = 1;
|
|
766 $s *= $n-- while $n > 0;
|
|
767 return $s;
|
|
768 }
|
|
769
|
|
770
|
|
771 #
|
|
772 # sort links by vertical distance
|
|
773 #
|
|
774 sub _sort_links {
|
|
775 my ($self, $links, $pos) = @_;
|
|
776
|
|
777 my @lookup;
|
|
778
|
|
779 foreach my $link (@$links) {
|
|
780 my $dist = $pos->{$link->[0]} - $pos->{$link->[1]};
|
|
781 $dist = -$dist if ($dist < 0);
|
|
782 push @lookup, [$dist, $link];
|
|
783 #warn " $dist ".join(' ', @$link)."\n";
|
|
784 }
|
|
785
|
|
786 @$links = map { $_->[1] } sort { $b->[0] <=> $a->[0] } @lookup;
|
|
787 }
|
|
788
|
|
789
|
|
790 #
|
|
791 # make two nodes adjacent by moving the second node next to the first node
|
|
792 # all other node coordinates are adjusted accordingly
|
|
793 #
|
|
794 sub _move_nodes {
|
|
795 my ($self, $link, $pos, $direction) = @_;
|
|
796
|
|
797 my $first_pos = $pos->{$link->[0]};
|
|
798 my $second_pos = $pos->{$link->[1]};
|
|
799
|
|
800 # swap positions if necessary
|
|
801 if ($first_pos > $second_pos) {
|
|
802 my $tmp = $second_pos;
|
|
803 $second_pos = $first_pos;
|
|
804 $first_pos = $tmp;
|
|
805 }
|
|
806 #warn " $first_pos:$second_pos\n";
|
|
807
|
|
808 foreach my $p (keys %$pos) {
|
|
809
|
|
810 my $val = $pos->{$p};
|
|
811
|
|
812 #warn " $p $val\n";
|
|
813 if ($direction eq 'up') {
|
|
814 if ($val > $first_pos and $val < $second_pos) {
|
|
815 $val++;
|
|
816 } elsif ($val == $second_pos) {
|
|
817 $val = $first_pos + 1;
|
|
818 }
|
|
819 } else {
|
|
820 if ($val > $first_pos and $val < $second_pos) {
|
|
821 $val--;
|
|
822 } elsif ($val == $first_pos) {
|
|
823 $val = $second_pos - 1;
|
|
824 }
|
|
825 }
|
|
826
|
|
827 #warn " $p $val\n";
|
|
828 $pos->{$p} = $val;
|
|
829 #warn "\n";
|
|
830 }
|
|
831 }
|
|
832
|
|
833
|
|
834 #
|
|
835 # calculate the total link (vertical distance) length based on this sort order
|
|
836 #
|
|
837 sub _total_link_length {
|
|
838 my ($self, $links, $pos) = @_;
|
|
839
|
|
840 my $total_length;
|
|
841
|
|
842 foreach my $link (@$links) {
|
|
843 my $length = $pos->{$link->[0]} - $pos->{$link->[1]};
|
|
844 $length = -$length if ($length < 0);
|
|
845 $total_length += $length;
|
|
846 }
|
|
847
|
|
848 return $total_length;
|
|
849 }
|
|
850
|
|
851
|
|
852 =head2 coords_by_ArchiveStableId
|
|
853
|
|
854 Arg[1] : Bio::EnsEMBL::ArchiveStableId $archive_id
|
|
855 The ArchiveStableId to get tree grid coordinates for
|
|
856 Example : my ($x, $y) =
|
|
857 @{ $history->coords_by_ArchiveStableId($archive_id) };
|
|
858 print $archive_id->stable_id, " coords: $x, $y\n";
|
|
859 Description : Returns the coordinates of an ArchiveStableId in the history
|
|
860 tree grid. If the ArchiveStableId isn't found in this tree, an
|
|
861 empty list is returned.
|
|
862
|
|
863 Coordinates are zero-based (i.e. the top leftmost element in
|
|
864 the grid has coordinates [0, 0], not [1, 1]). This is to
|
|
865 facilitate using them to create a matrix as a two-dimensional
|
|
866 array of arrays.
|
|
867 Return type : Arrayref (x coordinate, y coordinate)
|
|
868 Exceptions : thrown on wrong argument type
|
|
869 Caller : general
|
|
870 Status : At Risk
|
|
871 : under development
|
|
872
|
|
873 =cut
|
|
874
|
|
875 sub coords_by_ArchiveStableId {
|
|
876 my ($self, $archive_id) = @_;
|
|
877
|
|
878 throw("Bio::EnsEMBL::ArchiveStableId object expected.")
|
|
879 unless ($archive_id and ref($archive_id) and
|
|
880 $archive_id->isa('Bio::EnsEMBL::ArchiveStableId'));
|
|
881
|
|
882 return $self->{'sorted_tree'}->{'coords'}->{$self->_node_id($archive_id)}
|
|
883 || [];
|
|
884 }
|
|
885
|
|
886
|
|
887 =head2 calculate_coords
|
|
888
|
|
889 Example : $history->calculate_coords;
|
|
890 Description : Pre-calculates the grid coordinates of all nodes in the tree.
|
|
891 Return type : none
|
|
892 Exceptions : none
|
|
893 Caller : ArchiveStableIdAdaptor::fetch_history_by_stable_id
|
|
894 Status : At Risk
|
|
895 : under development
|
|
896
|
|
897 =cut
|
|
898
|
|
899 sub calculate_coords {
|
|
900 my $self = shift;
|
|
901
|
|
902 # reset any previous tree cordinate calculations
|
|
903 $self->reset_tree;
|
|
904
|
|
905 # the "master" information for the sorted tree is stored as the sorted lists
|
|
906 # of releases (x) and stable IDs (y). Sort them now.
|
|
907 my $db_names = $self->get_release_db_names;
|
|
908
|
|
909 # untangle tree by sorting stable IDs appropriately
|
|
910 $self->optimise_tree;
|
|
911 my $stable_ids = $self->get_unique_stable_ids;
|
|
912
|
|
913 # for performance reasons, additionally store coordinates in a lookup hash
|
|
914 foreach my $archive_id (@{ $self->get_all_ArchiveStableIds }) {
|
|
915
|
|
916 # coordinates are positions in the sorted lists
|
|
917 my $x = $self->_index_of($archive_id->db_name, $db_names);
|
|
918 my $y = $self->_index_of($archive_id->stable_id, $stable_ids);
|
|
919
|
|
920 $self->{'sorted_tree'}->{'coords'}->{$self->_node_id($archive_id)} =
|
|
921 [ $x, $y ];
|
|
922 }
|
|
923 }
|
|
924
|
|
925 #
|
|
926 # Description : Returns the index of an element in an array
|
|
927 # Example : my @array = (a, b, c);
|
|
928 # my $i = _index_of('b', \@array); # will return 1
|
|
929 # Return type : Int (or undef if element is not found in array)
|
|
930 #
|
|
931 sub _index_of {
|
|
932 my ($self, $element, $arrayref) = @_;
|
|
933
|
|
934 throw("Expecting arrayref argument.") unless (ref($arrayref) eq 'ARRAY');
|
|
935
|
|
936 my @array = @$arrayref;
|
|
937
|
|
938 while (my $e = pop(@array)) {
|
|
939 return scalar(@array) if ($e eq $element);
|
|
940 }
|
|
941
|
|
942 return undef;
|
|
943 }
|
|
944
|
|
945
|
|
946 =head2 consolidate_tree
|
|
947
|
|
948 Example : $history->consolidate_tree;
|
|
949 Description : Consolidate the history tree. This means removing nodes where
|
|
950 there wasn't a change and bridging gaps in the history. The end
|
|
951 result will be a sparse tree which only contains the necessary
|
|
952 information.
|
|
953 Return type : none
|
|
954 Exceptions : none
|
|
955 Caller : ArchiveStableIdAdaptor->fetch_history_tree_by_stable_id
|
|
956 Status : At Risk
|
|
957 : under development
|
|
958
|
|
959 =cut
|
|
960
|
|
961 sub consolidate_tree {
|
|
962 my $self = shift;
|
|
963
|
|
964 #
|
|
965 # get all self-events and creations/deletions and sort them (by stable ID and
|
|
966 # chronologically)
|
|
967 #
|
|
968 my @event_lookup;
|
|
969
|
|
970 foreach my $event (@{ $self->get_all_StableIdEvents }) {
|
|
971
|
|
972 my $old_id = $event->old_ArchiveStableId;
|
|
973 my $new_id = $event->new_ArchiveStableId;
|
|
974
|
|
975 if (!$old_id or !$new_id or ($old_id->stable_id eq $new_id->stable_id)) {
|
|
976 if ($old_id) {
|
|
977 push @event_lookup, [$old_id->stable_id, $old_id->release,
|
|
978 $old_id->db_name, $event];
|
|
979 } else {
|
|
980 push @event_lookup, [$new_id->stable_id, $new_id->release - 1,
|
|
981 $new_id->db_name, $event];
|
|
982 }
|
|
983 }
|
|
984 }
|
|
985
|
|
986 my @self_events = map { $_->[3] }
|
|
987 sort { $a->[0] cmp $b->[0] || $a->[1] <=> $b->[1] || $a->[2] cmp $b->[2] }
|
|
988 @event_lookup;
|
|
989
|
|
990 #
|
|
991 # consolidate tree
|
|
992 #
|
|
993 my $last = shift(@self_events);
|
|
994
|
|
995 while (my $event = shift(@self_events)) {
|
|
996
|
|
997 my $lo = $last->old_ArchiveStableId;
|
|
998 my $ln = $last->new_ArchiveStableId;
|
|
999 my $eo = $event->old_ArchiveStableId;
|
|
1000 my $en = $event->new_ArchiveStableId;
|
|
1001
|
|
1002 if ($lo and $eo and $en and $lo->stable_id eq $eo->stable_id
|
|
1003 and $lo->version eq $eo->version) {
|
|
1004
|
|
1005 # this removes redundant nodes and connects the remaining nodes:
|
|
1006 #
|
|
1007 # o--o--o -> o-----o
|
|
1008 # 1 1 1 1 1
|
|
1009
|
|
1010 #warn 'A: '.$last->ident_string.' | '.$event->ident_string."\n";
|
|
1011
|
|
1012 $self->remove_StableIdEvent($last);
|
|
1013 $self->remove_StableIdEvent($event);
|
|
1014
|
|
1015 $event->old_ArchiveStableId($lo);
|
|
1016
|
|
1017 $self->add_StableIdEvents($event);
|
|
1018
|
|
1019 } elsif ($ln and $eo and $ln->db_name ne $eo->db_name
|
|
1020 and $ln->stable_id eq $eo->stable_id and $ln->version eq $eo->version) {
|
|
1021
|
|
1022 # try to brigde gaps
|
|
1023
|
|
1024 if ($en) {
|
|
1025
|
|
1026 # o--o o--o -> o--o-----o
|
|
1027 # 1 2 2 2 1 2 2
|
|
1028 #
|
|
1029 # o o--o -> o-----o
|
|
1030 # 1 1 1 1 1
|
|
1031
|
|
1032 #warn 'X: '.$last->ident_string.' | '.$event->ident_string."\n";
|
|
1033
|
|
1034 $self->remove_StableIdEvent($event);
|
|
1035 $event->old_ArchiveStableId($ln);
|
|
1036 $self->add_StableIdEvents($event);
|
|
1037
|
|
1038 } elsif ($lo) {
|
|
1039
|
|
1040 # there's a deletion event, deal with it differently
|
|
1041
|
|
1042 if ($lo->version eq $ln->version) {
|
|
1043
|
|
1044 # o--o o -> o-----o
|
|
1045 # 1 1 1 1 1
|
|
1046
|
|
1047 #warn 'Y: '.$last->ident_string.' | '.$event->ident_string."\n";
|
|
1048
|
|
1049 $self->remove_StableIdEvent($last);
|
|
1050 $last->new_ArchiveStableId($eo);
|
|
1051 $self->add_StableIdEvents($last);
|
|
1052
|
|
1053 } else {
|
|
1054
|
|
1055 # o--o o -> o--o--o
|
|
1056 # 1 2 2 1 2 2
|
|
1057
|
|
1058 #warn 'Z: '.$last->ident_string.' | '.$event->ident_string."\n";
|
|
1059
|
|
1060 $self->remove_StableIdEvent($event);
|
|
1061 $event->old_ArchiveStableId($ln);
|
|
1062 $event->new_ArchiveStableId($eo);
|
|
1063 $self->add_StableIdEvents($event);
|
|
1064
|
|
1065 }
|
|
1066
|
|
1067 } else {
|
|
1068
|
|
1069 # creation followed by deletion in next mapping
|
|
1070 #
|
|
1071 # o o -> o--o
|
|
1072 # 1 1 1 1
|
|
1073
|
|
1074 #warn 'Q: '.$last->ident_string.' | '.$event->ident_string."\n";
|
|
1075
|
|
1076 $self->remove_StableIdEvent($last);
|
|
1077 $self->remove_StableIdEvent($event);
|
|
1078 $event->old_ArchiveStableId($ln);
|
|
1079 $event->new_ArchiveStableId($eo);
|
|
1080 $self->add_StableIdEvents($event);
|
|
1081
|
|
1082 }
|
|
1083
|
|
1084 } else {
|
|
1085 #warn 'C: '.$last->ident_string.' | '.$event->ident_string."\n";
|
|
1086 }
|
|
1087
|
|
1088 $last = $event;
|
|
1089 }
|
|
1090
|
|
1091 # now add ArchiveStableIds of the remaining events to the tree
|
|
1092 $self->add_ArchiveStableIds_for_events;
|
|
1093 }
|
|
1094
|
|
1095
|
|
1096 =head2 reset_tree
|
|
1097
|
|
1098 Example : $history->reset_tree;
|
|
1099 Description : Resets all pre-calculated tree grid data. Mostly used internally
|
|
1100 by methods that modify the tree.
|
|
1101 Return type : none
|
|
1102 Exceptions : none
|
|
1103 Caller : internal
|
|
1104 Status : At Risk
|
|
1105 : under development
|
|
1106
|
|
1107 =cut
|
|
1108
|
|
1109 sub reset_tree {
|
|
1110 my $self = shift;
|
|
1111 $self->{'sorted_tree'} = undef;
|
|
1112 }
|
|
1113
|
|
1114
|
|
1115 =head2 current_dbname
|
|
1116
|
|
1117 Arg[1] : (optional) String $dbname - the dbname to set
|
|
1118 Example : my $dbname = $history->current_dbname;
|
|
1119 Description : Getter/setter for current dbname.
|
|
1120 Return type : String
|
|
1121 Exceptions : none
|
|
1122 Caller : general
|
|
1123 Status : At Risk
|
|
1124 : under development
|
|
1125
|
|
1126 =cut
|
|
1127
|
|
1128 sub current_dbname {
|
|
1129 my $self = shift;
|
|
1130 $self->{'current_dbname'} = shift if (@_);
|
|
1131 return $self->{'current_dbname'};
|
|
1132 }
|
|
1133
|
|
1134
|
|
1135 =head2 current_release
|
|
1136
|
|
1137 Arg[1] : (optional) Int $release - the release to set
|
|
1138 Example : my $release = $history->current_release;
|
|
1139 Description : Getter/setter for current release.
|
|
1140 Return type : Int
|
|
1141 Exceptions : none
|
|
1142 Caller : general
|
|
1143 Status : At Risk
|
|
1144 : under development
|
|
1145
|
|
1146 =cut
|
|
1147
|
|
1148 sub current_release {
|
|
1149 my $self = shift;
|
|
1150 $self->{'current_release'} = shift if (@_);
|
|
1151 return $self->{'current_release'};
|
|
1152 }
|
|
1153
|
|
1154
|
|
1155 =head2 current_assembly
|
|
1156
|
|
1157 Arg[1] : (optional) String $assembly - the assembly to set
|
|
1158 Example : my $assembly = $history->current_assembly;
|
|
1159 Description : Getter/setter for current assembly.
|
|
1160 Return type : String
|
|
1161 Exceptions : none
|
|
1162 Caller : general
|
|
1163 Status : At Risk
|
|
1164 : under development
|
|
1165
|
|
1166 =cut
|
|
1167
|
|
1168 sub current_assembly {
|
|
1169 my $self = shift;
|
|
1170 $self->{'current_assembly'} = shift if (@_);
|
|
1171 return $self->{'current_assembly'};
|
|
1172 }
|
|
1173
|
|
1174
|
|
1175 =head2 is_incomplete
|
|
1176
|
|
1177 Arg[1] : (optional) Boolean $incomplete
|
|
1178 Example : if ($history->is_incomplete) {
|
|
1179 print "Returned tree is incomplete due to too many mappings
|
|
1180 in the database.\n";
|
|
1181 }
|
|
1182 Description : Getter/setter for incomplete flag. This is used by
|
|
1183 ArchiveStableIdAdaptor to indicate that it finished building
|
|
1184 the tree prematurely due to too many mappins in the db and can
|
|
1185 be used by applications to print warning messages.
|
|
1186 Return type : Boolean
|
|
1187 Exceptions : none
|
|
1188 Caller : general
|
|
1189 Status : At Risk
|
|
1190 : under development
|
|
1191
|
|
1192 =cut
|
|
1193
|
|
1194 sub is_incomplete {
|
|
1195 my $self = shift;
|
|
1196 $self->{'incomplete'} = shift if (@_);
|
|
1197 return $self->{'incomplete'};
|
|
1198 }
|
|
1199
|
|
1200
|
|
1201 1;
|
|
1202
|