0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =head1 NAME
|
|
20
|
|
21 Bio::EnsEMBL::Compara::GeneTree
|
|
22
|
|
23 =head1 DESCRIPTION
|
|
24
|
|
25 Class to represent a gene tree object. Contains a link to
|
|
26 the root of the tree, as long as general tree properties.
|
|
27 It implements the AlignedMemberSet interface (via the leaves).
|
|
28
|
|
29 =head1 INHERITANCE TREE
|
|
30
|
|
31 Bio::EnsEMBL::Compara::GeneTree
|
|
32 +- Bio::EnsEMBL::Compara::AlignedMemberSet
|
|
33 `- Bio::EnsEMBL::Compara::Taggable
|
|
34
|
|
35 =head1 AUTHORSHIP
|
|
36
|
|
37 Ensembl Team. Individual contributions can be found in the CVS log.
|
|
38
|
|
39 =head1 MAINTAINER
|
|
40
|
|
41 $Author: mm14 $
|
|
42
|
|
43 =head VERSION
|
|
44
|
|
45 $Revision: 1.23 $
|
|
46
|
|
47 =head1 APPENDIX
|
|
48
|
|
49 The rest of the documentation details each of the object methods.
|
|
50 Internal methods are usually preceded with an underscore (_)
|
|
51
|
|
52 =cut
|
|
53
|
|
54 package Bio::EnsEMBL::Compara::GeneTree;
|
|
55
|
|
56 use Bio::EnsEMBL::Utils::Argument;
|
|
57 use Bio::EnsEMBL::Utils::Scalar qw(:assert);
|
|
58
|
|
59 use Bio::EnsEMBL::Compara::GeneTreeNode;
|
|
60 use Bio::EnsEMBL::Compara::GeneTreeMember;
|
|
61
|
|
62 use strict;
|
|
63 no strict 'refs';
|
|
64
|
|
65 use base ('Bio::EnsEMBL::Compara::AlignedMemberSet', 'Bio::EnsEMBL::Compara::Taggable');
|
|
66
|
|
67
|
|
68 ##############################
|
|
69 # Constructors / Destructors #
|
|
70 ##############################
|
|
71
|
|
72 =head2 new
|
|
73
|
|
74 Arg [1] :
|
|
75 Example :
|
|
76 Description:
|
|
77 Returntype : Bio::EnsEMBL::Compara::GeneTree
|
|
78 Exceptions :
|
|
79 Caller :
|
|
80
|
|
81 =cut
|
|
82
|
|
83 sub new {
|
|
84 my($class,@args) = @_;
|
|
85
|
|
86 my $self = $class->SUPER::new(@args);
|
|
87
|
|
88 if (scalar @args) {
|
|
89 my ($root_id, $member_type, $tree_type, $clusterset_id) = rearrange([qw(ROOT_ID MEMBER_TYPE TREE_TYPE CLUSTERSET_ID)], @args);
|
|
90
|
|
91 $self->{'_root_id'} = $root_id if defined $root_id;
|
|
92 $member_type && $self->member_type($member_type);
|
|
93 $tree_type && $self->tree_type($tree_type);
|
|
94 $clusterset_id && $self->clusterset_id($clusterset_id);
|
|
95 }
|
|
96
|
|
97 return $self;
|
|
98 }
|
|
99
|
|
100
|
|
101 =head2 deep_copy
|
|
102
|
|
103 Description: Returns a copy of $self (as an AlignedMemberSet). All the
|
|
104 members are themselves copied, but the tree topology is lost.
|
|
105 Returntype : Bio::EnsEMBL::Compara::GeneTree
|
|
106 Caller : General
|
|
107
|
|
108 =cut
|
|
109
|
|
110 sub deep_copy {
|
|
111 my $self = shift;
|
|
112 my $copy = $self->SUPER::deep_copy();
|
|
113 foreach my $attr (qw(tree_type member_type clusterset_id)) {
|
|
114 $copy->$attr($self->$attr);
|
|
115 }
|
|
116 return $copy;
|
|
117 }
|
|
118
|
|
119
|
|
120 =head2 DESTROY
|
|
121
|
|
122 Description : Deletes the reference to the root node and breaks
|
|
123 the circular reference.
|
|
124 Returntype : None
|
|
125 Caller : System
|
|
126
|
|
127 =cut
|
|
128
|
|
129 sub DESTROY {
|
|
130 my $self = shift;
|
|
131 delete $self->{'_root'};
|
|
132 }
|
|
133
|
|
134
|
|
135 #####################
|
|
136 # Object attributes #
|
|
137 #####################
|
|
138
|
|
139 =head2 tree_type
|
|
140
|
|
141 Description : Getter/Setter for the tree_type field. This field can
|
|
142 currently be 'tree', 'supertree' or 'clusterset'
|
|
143 Returntype : String
|
|
144 Example : my $type = $tree->tree_type();
|
|
145 Caller : General
|
|
146
|
|
147 =cut
|
|
148
|
|
149 sub tree_type {
|
|
150 my $self = shift;
|
|
151 $self->{'_tree_type'} = shift if(@_);
|
|
152 return $self->{'_tree_type'};
|
|
153 }
|
|
154
|
|
155
|
|
156 =head2 member_type
|
|
157
|
|
158 Description : Getter/Setter for the member_type field. This field can
|
|
159 currently be 'ncrna' or 'protein'
|
|
160 Returntype : String
|
|
161 Example : my $type = $tree->member_type();
|
|
162 Caller : General
|
|
163
|
|
164 =cut
|
|
165
|
|
166 sub member_type {
|
|
167 my $self = shift;
|
|
168 $self->{'_member_type'} = shift if(@_);
|
|
169 return $self->{'_member_type'};
|
|
170 }
|
|
171
|
|
172
|
|
173 =head2 clusterset_id
|
|
174
|
|
175 Description : Getter/Setter for the clusterset_id field. This field can
|
|
176 be any string. Each dataset should contain a set of trees
|
|
177 with the "default" clusterset_id. Other clusterset_id are
|
|
178 used to store linked / additionnal data.
|
|
179 Returntype : String
|
|
180 Example : my $clusterset_id = $tree->clusterset_id();
|
|
181 Caller : General
|
|
182
|
|
183 =cut
|
|
184
|
|
185 sub clusterset_id {
|
|
186 my $self = shift;
|
|
187 $self->{'_clusterset_id'} = shift if(@_);
|
|
188 return $self->{'_clusterset_id'};
|
|
189 }
|
|
190
|
|
191
|
|
192 =head2 root_id
|
|
193
|
|
194 Description : Getter for the root_id of the root node of the tree.
|
|
195 Returntype : Integer
|
|
196 Example : my $root_node_id = $tree->root_id();
|
|
197 Caller : General
|
|
198
|
|
199 =cut
|
|
200
|
|
201 sub root_id {
|
|
202 my $self = shift;
|
|
203 return $self->{'_root_id'};
|
|
204 }
|
|
205
|
|
206
|
|
207 ################
|
|
208 # Tree loading #
|
|
209 ################
|
|
210
|
|
211 =head2 root
|
|
212
|
|
213 Description : Getter for the root node of the tree. This returns an
|
|
214 object fetch from the database if root_id is defined.
|
|
215 Otherwise, it will create a new GeneTreeNode object.
|
|
216 Returntype : Bio::EnsEMBL::Compara::GeneTreeNode
|
|
217 Example : my $root_node = $tree->root();
|
|
218 Caller : General
|
|
219
|
|
220 =cut
|
|
221
|
|
222 sub root {
|
|
223 my $self = shift;
|
|
224
|
|
225 if (not defined $self->{'_root'}) {
|
|
226 if (defined $self->{'_root_id'} and defined $self->adaptor) {
|
|
227 # Loads all the nodes in one go
|
|
228 my $gtn_adaptor = $self->adaptor->db->get_GeneTreeNodeAdaptor;
|
|
229 $gtn_adaptor->{'_ref_tree'} = $self;
|
|
230 $self->{'_root'} = $gtn_adaptor->fetch_node_by_node_id($self->{'_root_id'});
|
|
231 delete $gtn_adaptor->{'_ref_tree'};
|
|
232
|
|
233 } else {
|
|
234 # Creates a new GeneTreeNode object
|
|
235 $self->{'_root'} = new Bio::EnsEMBL::Compara::GeneTreeNode;
|
|
236 $self->{'_root'}->tree($self);
|
|
237 }
|
|
238 }
|
|
239 return $self->{'_root'};
|
|
240 }
|
|
241
|
|
242
|
|
243 =head2 preload
|
|
244
|
|
245 Description : Method to load all the tree data in one go. This currently
|
|
246 includes if not loaded yet, and all the gene Members
|
|
247 associated with the leaves.
|
|
248 In the future, it will include all the tags
|
|
249 Returntype : node
|
|
250 Example : $tree->preload();
|
|
251 Caller : General
|
|
252
|
|
253 =cut
|
|
254
|
|
255 sub preload {
|
|
256 my $self = shift;
|
|
257 return unless defined $self->adaptor;
|
|
258
|
|
259 if (not defined $self->{'_root'} and defined $self->{'_root_id'}) {
|
|
260 my $gtn_adaptor = $self->adaptor->db->get_GeneTreeNodeAdaptor;
|
|
261 $gtn_adaptor->{'_ref_tree'} = $self;
|
|
262 $self->{'_root'} = $gtn_adaptor->fetch_tree_by_root_id($self->{'_root_id'});
|
|
263 delete $gtn_adaptor->{'_ref_tree'};
|
|
264 }
|
|
265
|
|
266 # Loads all the gene members in one go
|
|
267 my %leaves;
|
|
268 foreach my $pm (@{$self->root->get_all_leaves}) {
|
|
269 $leaves{$pm->gene_member_id} = $pm if UNIVERSAL::isa($pm, 'Bio::EnsEMBL::Compara::GeneTreeMember');
|
|
270 }
|
|
271 my @m_ids = keys(%leaves);
|
|
272 my $all_gm = $self->adaptor->db->get_MemberAdaptor->fetch_all_by_dbID_list(\@m_ids);
|
|
273 foreach my $gm (@$all_gm) {
|
|
274 $leaves{$gm->dbID}->gene_member($gm);
|
|
275 }
|
|
276 }
|
|
277
|
|
278
|
|
279 =head2 attach_alignment
|
|
280
|
|
281 Arg [1] : String: clusterset_id
|
|
282 Description : Method to fetch the alternative tree with the given
|
|
283 clusterset_id and attach its multiple alignment to
|
|
284 the current tree. The alternative tree is returned.
|
|
285 Returntype : GeneTree
|
|
286 Example : $supertree->attach_alignment('super-align');
|
|
287 Caller : General
|
|
288
|
|
289 =cut
|
|
290
|
|
291 sub attach_alignment {
|
|
292 my $self = shift;
|
|
293 my $other_clusterset_id = shift;
|
|
294 return unless defined $self->adaptor;
|
|
295
|
|
296 # Gets the other tree
|
|
297 my $others = $self->adaptor->fetch_all_linked_trees($self);
|
|
298 my @good_others = grep {$_->clusterset_id eq $other_clusterset_id} @$others;
|
|
299 die "'$other_clusterset_id' tree not found\n" unless scalar(@good_others);
|
|
300
|
|
301 # Gets the alignment
|
|
302 my %cigars;
|
|
303 my $gtn_adaptor = $self->adaptor->db->get_GeneTreeNodeAdaptor;
|
|
304 foreach my $leaf (@{$gtn_adaptor->fetch_all_AlignedMember_by_root_id($good_others[0]->root_id)}) {
|
|
305 $cigars{$leaf->member_id} = $leaf->cigar_line;
|
|
306 }
|
|
307
|
|
308 # Assigns it
|
|
309 foreach my $leaf (@{$self->root->get_all_leaves}) {
|
|
310 $leaf->cigar_line($cigars{$leaf->member_id});
|
|
311 }
|
|
312
|
|
313 return $good_others[0];
|
|
314 }
|
|
315
|
|
316
|
|
317 =head2 expand_subtrees
|
|
318
|
|
319 Description : Method to fetch the subtrees of the current tree
|
|
320 and attach them to the tips of the current tree
|
|
321 Returntype : none
|
|
322 Example : $supertree->expand_subtrees();
|
|
323 Caller : General
|
|
324
|
|
325 =cut
|
|
326
|
|
327 sub expand_subtrees {
|
|
328 my $self = shift;
|
|
329 return unless defined $self->adaptor;
|
|
330
|
|
331 # Gets the subtrees
|
|
332 my %subtrees;
|
|
333 foreach my $subtree (@{$self->adaptor->fetch_subtrees($self)}) {
|
|
334 $subtree->preload;
|
|
335 $subtrees{$subtree->root->_parent_id} = $subtree->root;
|
|
336 }
|
|
337
|
|
338 # Attaches them
|
|
339 $self->preload;
|
|
340 foreach my $leaf (@{$self->root->get_all_leaves}) {
|
|
341 next unless exists $subtrees{$leaf->node_id};
|
|
342 $leaf->parent->add_child($subtrees{$leaf->node_id});
|
|
343 $leaf->disavow_parent;
|
|
344 }
|
|
345 }
|
|
346
|
|
347
|
|
348 ##############################
|
|
349 # AlignedMemberSet interface #
|
|
350 ##############################
|
|
351
|
|
352 =head2 member_class
|
|
353
|
|
354 Description: Returns the type of member used in the set
|
|
355 Returntype : String: Bio::EnsEMBL::Compara::GeneTreeMember
|
|
356 Caller : Bio::EnsEMBL::Compara::MemberSet
|
|
357
|
|
358 =cut
|
|
359
|
|
360 sub member_class {
|
|
361 return 'Bio::EnsEMBL::Compara::GeneTreeMember';
|
|
362 }
|
|
363
|
|
364
|
|
365 =head2 get_all_Members
|
|
366
|
|
367 Example :
|
|
368 Description: Returns the list of all the GeneTreeMember of the tree
|
|
369 Returntype : array reference of Bio::EnsEMBL::Compara::GeneTreeMember
|
|
370 Caller : General
|
|
371
|
|
372 =cut
|
|
373
|
|
374 sub get_all_Members {
|
|
375 my ($self) = @_;
|
|
376
|
|
377 unless (defined $self->{'_member_array'}) {
|
|
378
|
|
379 $self->{'_member_array'} = [];
|
|
380 $self->{'_members_by_source'} = {};
|
|
381 $self->{'_members_by_source_taxon'} = {};
|
|
382 $self->{'_members_by_source_genome_db'} = {};
|
|
383 $self->{'_members_by_genome_db'} = {};
|
|
384 foreach my $leaf (@{$self->root->get_all_leaves}) {
|
|
385 $self->SUPER::add_Member($leaf) if UNIVERSAL::isa($leaf, 'Bio::EnsEMBL::Compara::GeneTreeMember');
|
|
386 }
|
|
387 }
|
|
388 return $self->{'_member_array'};
|
|
389 }
|
|
390
|
|
391
|
|
392 =head2 add_Member
|
|
393
|
|
394 Arg [1] : GeneTreeMember
|
|
395 Example :
|
|
396 Description: Add a new GeneTreeMember to this set and to the tree as
|
|
397 a child of the root node
|
|
398 Returntype : none
|
|
399 Exceptions : Throws if input objects don't check
|
|
400 Caller : General
|
|
401
|
|
402 =cut
|
|
403
|
|
404 sub add_Member {
|
|
405 my ($self, $member) = @_;
|
|
406 assert_ref($member, 'Bio::EnsEMBL::Compara::GeneTreeMember');
|
|
407 $self->root->add_child($member);
|
|
408 $member->tree($self);
|
|
409 $self->SUPER::add_Member($member);
|
|
410 }
|
|
411
|
|
412
|
|
413 ########
|
|
414 # Misc #
|
|
415 ########
|
|
416
|
|
417 # Dynamic definition of functions to allow NestedSet methods work with GeneTrees
|
|
418 foreach my $func_name (qw(get_all_nodes get_all_leaves get_all_sorted_leaves
|
|
419 find_leaf_by_node_id find_leaf_by_name find_node_by_node_id
|
|
420 find_node_by_name remove_nodes build_leftright_indexing flatten_tree
|
|
421 newick_format nhx_format string_tree print_tree
|
|
422 release_tree
|
|
423 )) {
|
|
424 my $full_name = "Bio::EnsEMBL::Compara::GeneTree::$func_name";
|
|
425 *$full_name = sub {
|
|
426 my $self = shift;
|
|
427 my $ret = $self->root->$func_name(@_);
|
|
428 return $ret;
|
|
429 };
|
|
430 # print STDERR "REDEFINE $func_name\n";
|
|
431 }
|
|
432
|
|
433
|
|
434 1;
|
|
435
|