comparison variant_effect_predictor/Bio/EnsEMBL/Compara/GeneTree.pm @ 0:21066c0abaf5 draft

Uploaded
author willmclaren
date Fri, 03 Aug 2012 10:04:48 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:21066c0abaf5
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =head1 NAME
20
21 Bio::EnsEMBL::Compara::GeneTree
22
23 =head1 DESCRIPTION
24
25 Class to represent a gene tree object. Contains a link to
26 the root of the tree, as long as general tree properties.
27 It implements the AlignedMemberSet interface (via the leaves).
28
29 =head1 INHERITANCE TREE
30
31 Bio::EnsEMBL::Compara::GeneTree
32 +- Bio::EnsEMBL::Compara::AlignedMemberSet
33 `- Bio::EnsEMBL::Compara::Taggable
34
35 =head1 AUTHORSHIP
36
37 Ensembl Team. Individual contributions can be found in the CVS log.
38
39 =head1 MAINTAINER
40
41 $Author: mm14 $
42
43 =head VERSION
44
45 $Revision: 1.23 $
46
47 =head1 APPENDIX
48
49 The rest of the documentation details each of the object methods.
50 Internal methods are usually preceded with an underscore (_)
51
52 =cut
53
54 package Bio::EnsEMBL::Compara::GeneTree;
55
56 use Bio::EnsEMBL::Utils::Argument;
57 use Bio::EnsEMBL::Utils::Scalar qw(:assert);
58
59 use Bio::EnsEMBL::Compara::GeneTreeNode;
60 use Bio::EnsEMBL::Compara::GeneTreeMember;
61
62 use strict;
63 no strict 'refs';
64
65 use base ('Bio::EnsEMBL::Compara::AlignedMemberSet', 'Bio::EnsEMBL::Compara::Taggable');
66
67
68 ##############################
69 # Constructors / Destructors #
70 ##############################
71
72 =head2 new
73
74 Arg [1] :
75 Example :
76 Description:
77 Returntype : Bio::EnsEMBL::Compara::GeneTree
78 Exceptions :
79 Caller :
80
81 =cut
82
83 sub new {
84 my($class,@args) = @_;
85
86 my $self = $class->SUPER::new(@args);
87
88 if (scalar @args) {
89 my ($root_id, $member_type, $tree_type, $clusterset_id) = rearrange([qw(ROOT_ID MEMBER_TYPE TREE_TYPE CLUSTERSET_ID)], @args);
90
91 $self->{'_root_id'} = $root_id if defined $root_id;
92 $member_type && $self->member_type($member_type);
93 $tree_type && $self->tree_type($tree_type);
94 $clusterset_id && $self->clusterset_id($clusterset_id);
95 }
96
97 return $self;
98 }
99
100
101 =head2 deep_copy
102
103 Description: Returns a copy of $self (as an AlignedMemberSet). All the
104 members are themselves copied, but the tree topology is lost.
105 Returntype : Bio::EnsEMBL::Compara::GeneTree
106 Caller : General
107
108 =cut
109
110 sub deep_copy {
111 my $self = shift;
112 my $copy = $self->SUPER::deep_copy();
113 foreach my $attr (qw(tree_type member_type clusterset_id)) {
114 $copy->$attr($self->$attr);
115 }
116 return $copy;
117 }
118
119
120 =head2 DESTROY
121
122 Description : Deletes the reference to the root node and breaks
123 the circular reference.
124 Returntype : None
125 Caller : System
126
127 =cut
128
129 sub DESTROY {
130 my $self = shift;
131 delete $self->{'_root'};
132 }
133
134
135 #####################
136 # Object attributes #
137 #####################
138
139 =head2 tree_type
140
141 Description : Getter/Setter for the tree_type field. This field can
142 currently be 'tree', 'supertree' or 'clusterset'
143 Returntype : String
144 Example : my $type = $tree->tree_type();
145 Caller : General
146
147 =cut
148
149 sub tree_type {
150 my $self = shift;
151 $self->{'_tree_type'} = shift if(@_);
152 return $self->{'_tree_type'};
153 }
154
155
156 =head2 member_type
157
158 Description : Getter/Setter for the member_type field. This field can
159 currently be 'ncrna' or 'protein'
160 Returntype : String
161 Example : my $type = $tree->member_type();
162 Caller : General
163
164 =cut
165
166 sub member_type {
167 my $self = shift;
168 $self->{'_member_type'} = shift if(@_);
169 return $self->{'_member_type'};
170 }
171
172
173 =head2 clusterset_id
174
175 Description : Getter/Setter for the clusterset_id field. This field can
176 be any string. Each dataset should contain a set of trees
177 with the "default" clusterset_id. Other clusterset_id are
178 used to store linked / additionnal data.
179 Returntype : String
180 Example : my $clusterset_id = $tree->clusterset_id();
181 Caller : General
182
183 =cut
184
185 sub clusterset_id {
186 my $self = shift;
187 $self->{'_clusterset_id'} = shift if(@_);
188 return $self->{'_clusterset_id'};
189 }
190
191
192 =head2 root_id
193
194 Description : Getter for the root_id of the root node of the tree.
195 Returntype : Integer
196 Example : my $root_node_id = $tree->root_id();
197 Caller : General
198
199 =cut
200
201 sub root_id {
202 my $self = shift;
203 return $self->{'_root_id'};
204 }
205
206
207 ################
208 # Tree loading #
209 ################
210
211 =head2 root
212
213 Description : Getter for the root node of the tree. This returns an
214 object fetch from the database if root_id is defined.
215 Otherwise, it will create a new GeneTreeNode object.
216 Returntype : Bio::EnsEMBL::Compara::GeneTreeNode
217 Example : my $root_node = $tree->root();
218 Caller : General
219
220 =cut
221
222 sub root {
223 my $self = shift;
224
225 if (not defined $self->{'_root'}) {
226 if (defined $self->{'_root_id'} and defined $self->adaptor) {
227 # Loads all the nodes in one go
228 my $gtn_adaptor = $self->adaptor->db->get_GeneTreeNodeAdaptor;
229 $gtn_adaptor->{'_ref_tree'} = $self;
230 $self->{'_root'} = $gtn_adaptor->fetch_node_by_node_id($self->{'_root_id'});
231 delete $gtn_adaptor->{'_ref_tree'};
232
233 } else {
234 # Creates a new GeneTreeNode object
235 $self->{'_root'} = new Bio::EnsEMBL::Compara::GeneTreeNode;
236 $self->{'_root'}->tree($self);
237 }
238 }
239 return $self->{'_root'};
240 }
241
242
243 =head2 preload
244
245 Description : Method to load all the tree data in one go. This currently
246 includes if not loaded yet, and all the gene Members
247 associated with the leaves.
248 In the future, it will include all the tags
249 Returntype : node
250 Example : $tree->preload();
251 Caller : General
252
253 =cut
254
255 sub preload {
256 my $self = shift;
257 return unless defined $self->adaptor;
258
259 if (not defined $self->{'_root'} and defined $self->{'_root_id'}) {
260 my $gtn_adaptor = $self->adaptor->db->get_GeneTreeNodeAdaptor;
261 $gtn_adaptor->{'_ref_tree'} = $self;
262 $self->{'_root'} = $gtn_adaptor->fetch_tree_by_root_id($self->{'_root_id'});
263 delete $gtn_adaptor->{'_ref_tree'};
264 }
265
266 # Loads all the gene members in one go
267 my %leaves;
268 foreach my $pm (@{$self->root->get_all_leaves}) {
269 $leaves{$pm->gene_member_id} = $pm if UNIVERSAL::isa($pm, 'Bio::EnsEMBL::Compara::GeneTreeMember');
270 }
271 my @m_ids = keys(%leaves);
272 my $all_gm = $self->adaptor->db->get_MemberAdaptor->fetch_all_by_dbID_list(\@m_ids);
273 foreach my $gm (@$all_gm) {
274 $leaves{$gm->dbID}->gene_member($gm);
275 }
276 }
277
278
279 =head2 attach_alignment
280
281 Arg [1] : String: clusterset_id
282 Description : Method to fetch the alternative tree with the given
283 clusterset_id and attach its multiple alignment to
284 the current tree. The alternative tree is returned.
285 Returntype : GeneTree
286 Example : $supertree->attach_alignment('super-align');
287 Caller : General
288
289 =cut
290
291 sub attach_alignment {
292 my $self = shift;
293 my $other_clusterset_id = shift;
294 return unless defined $self->adaptor;
295
296 # Gets the other tree
297 my $others = $self->adaptor->fetch_all_linked_trees($self);
298 my @good_others = grep {$_->clusterset_id eq $other_clusterset_id} @$others;
299 die "'$other_clusterset_id' tree not found\n" unless scalar(@good_others);
300
301 # Gets the alignment
302 my %cigars;
303 my $gtn_adaptor = $self->adaptor->db->get_GeneTreeNodeAdaptor;
304 foreach my $leaf (@{$gtn_adaptor->fetch_all_AlignedMember_by_root_id($good_others[0]->root_id)}) {
305 $cigars{$leaf->member_id} = $leaf->cigar_line;
306 }
307
308 # Assigns it
309 foreach my $leaf (@{$self->root->get_all_leaves}) {
310 $leaf->cigar_line($cigars{$leaf->member_id});
311 }
312
313 return $good_others[0];
314 }
315
316
317 =head2 expand_subtrees
318
319 Description : Method to fetch the subtrees of the current tree
320 and attach them to the tips of the current tree
321 Returntype : none
322 Example : $supertree->expand_subtrees();
323 Caller : General
324
325 =cut
326
327 sub expand_subtrees {
328 my $self = shift;
329 return unless defined $self->adaptor;
330
331 # Gets the subtrees
332 my %subtrees;
333 foreach my $subtree (@{$self->adaptor->fetch_subtrees($self)}) {
334 $subtree->preload;
335 $subtrees{$subtree->root->_parent_id} = $subtree->root;
336 }
337
338 # Attaches them
339 $self->preload;
340 foreach my $leaf (@{$self->root->get_all_leaves}) {
341 next unless exists $subtrees{$leaf->node_id};
342 $leaf->parent->add_child($subtrees{$leaf->node_id});
343 $leaf->disavow_parent;
344 }
345 }
346
347
348 ##############################
349 # AlignedMemberSet interface #
350 ##############################
351
352 =head2 member_class
353
354 Description: Returns the type of member used in the set
355 Returntype : String: Bio::EnsEMBL::Compara::GeneTreeMember
356 Caller : Bio::EnsEMBL::Compara::MemberSet
357
358 =cut
359
360 sub member_class {
361 return 'Bio::EnsEMBL::Compara::GeneTreeMember';
362 }
363
364
365 =head2 get_all_Members
366
367 Example :
368 Description: Returns the list of all the GeneTreeMember of the tree
369 Returntype : array reference of Bio::EnsEMBL::Compara::GeneTreeMember
370 Caller : General
371
372 =cut
373
374 sub get_all_Members {
375 my ($self) = @_;
376
377 unless (defined $self->{'_member_array'}) {
378
379 $self->{'_member_array'} = [];
380 $self->{'_members_by_source'} = {};
381 $self->{'_members_by_source_taxon'} = {};
382 $self->{'_members_by_source_genome_db'} = {};
383 $self->{'_members_by_genome_db'} = {};
384 foreach my $leaf (@{$self->root->get_all_leaves}) {
385 $self->SUPER::add_Member($leaf) if UNIVERSAL::isa($leaf, 'Bio::EnsEMBL::Compara::GeneTreeMember');
386 }
387 }
388 return $self->{'_member_array'};
389 }
390
391
392 =head2 add_Member
393
394 Arg [1] : GeneTreeMember
395 Example :
396 Description: Add a new GeneTreeMember to this set and to the tree as
397 a child of the root node
398 Returntype : none
399 Exceptions : Throws if input objects don't check
400 Caller : General
401
402 =cut
403
404 sub add_Member {
405 my ($self, $member) = @_;
406 assert_ref($member, 'Bio::EnsEMBL::Compara::GeneTreeMember');
407 $self->root->add_child($member);
408 $member->tree($self);
409 $self->SUPER::add_Member($member);
410 }
411
412
413 ########
414 # Misc #
415 ########
416
417 # Dynamic definition of functions to allow NestedSet methods work with GeneTrees
418 foreach my $func_name (qw(get_all_nodes get_all_leaves get_all_sorted_leaves
419 find_leaf_by_node_id find_leaf_by_name find_node_by_node_id
420 find_node_by_name remove_nodes build_leftright_indexing flatten_tree
421 newick_format nhx_format string_tree print_tree
422 release_tree
423 )) {
424 my $full_name = "Bio::EnsEMBL::Compara::GeneTree::$func_name";
425 *$full_name = sub {
426 my $self = shift;
427 my $ret = $self->root->$func_name(@_);
428 return $ret;
429 };
430 # print STDERR "REDEFINE $func_name\n";
431 }
432
433
434 1;
435