Mercurial > repos > willmclaren > ensembl_vep
comparison variant_effect_predictor/Bio/EnsEMBL/Compara/GeneTree.pm @ 0:21066c0abaf5 draft
Uploaded
author | willmclaren |
---|---|
date | Fri, 03 Aug 2012 10:04:48 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:21066c0abaf5 |
---|---|
1 =head1 LICENSE | |
2 | |
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
4 Genome Research Limited. All rights reserved. | |
5 | |
6 This software is distributed under a modified Apache license. | |
7 For license details, please see | |
8 | |
9 http://www.ensembl.org/info/about/code_licence.html | |
10 | |
11 =head1 CONTACT | |
12 | |
13 Please email comments or questions to the public Ensembl | |
14 developers list at <dev@ensembl.org>. | |
15 | |
16 Questions may also be sent to the Ensembl help desk at | |
17 <helpdesk@ensembl.org>. | |
18 | |
19 =head1 NAME | |
20 | |
21 Bio::EnsEMBL::Compara::GeneTree | |
22 | |
23 =head1 DESCRIPTION | |
24 | |
25 Class to represent a gene tree object. Contains a link to | |
26 the root of the tree, as long as general tree properties. | |
27 It implements the AlignedMemberSet interface (via the leaves). | |
28 | |
29 =head1 INHERITANCE TREE | |
30 | |
31 Bio::EnsEMBL::Compara::GeneTree | |
32 +- Bio::EnsEMBL::Compara::AlignedMemberSet | |
33 `- Bio::EnsEMBL::Compara::Taggable | |
34 | |
35 =head1 AUTHORSHIP | |
36 | |
37 Ensembl Team. Individual contributions can be found in the CVS log. | |
38 | |
39 =head1 MAINTAINER | |
40 | |
41 $Author: mm14 $ | |
42 | |
43 =head VERSION | |
44 | |
45 $Revision: 1.23 $ | |
46 | |
47 =head1 APPENDIX | |
48 | |
49 The rest of the documentation details each of the object methods. | |
50 Internal methods are usually preceded with an underscore (_) | |
51 | |
52 =cut | |
53 | |
54 package Bio::EnsEMBL::Compara::GeneTree; | |
55 | |
56 use Bio::EnsEMBL::Utils::Argument; | |
57 use Bio::EnsEMBL::Utils::Scalar qw(:assert); | |
58 | |
59 use Bio::EnsEMBL::Compara::GeneTreeNode; | |
60 use Bio::EnsEMBL::Compara::GeneTreeMember; | |
61 | |
62 use strict; | |
63 no strict 'refs'; | |
64 | |
65 use base ('Bio::EnsEMBL::Compara::AlignedMemberSet', 'Bio::EnsEMBL::Compara::Taggable'); | |
66 | |
67 | |
68 ############################## | |
69 # Constructors / Destructors # | |
70 ############################## | |
71 | |
72 =head2 new | |
73 | |
74 Arg [1] : | |
75 Example : | |
76 Description: | |
77 Returntype : Bio::EnsEMBL::Compara::GeneTree | |
78 Exceptions : | |
79 Caller : | |
80 | |
81 =cut | |
82 | |
83 sub new { | |
84 my($class,@args) = @_; | |
85 | |
86 my $self = $class->SUPER::new(@args); | |
87 | |
88 if (scalar @args) { | |
89 my ($root_id, $member_type, $tree_type, $clusterset_id) = rearrange([qw(ROOT_ID MEMBER_TYPE TREE_TYPE CLUSTERSET_ID)], @args); | |
90 | |
91 $self->{'_root_id'} = $root_id if defined $root_id; | |
92 $member_type && $self->member_type($member_type); | |
93 $tree_type && $self->tree_type($tree_type); | |
94 $clusterset_id && $self->clusterset_id($clusterset_id); | |
95 } | |
96 | |
97 return $self; | |
98 } | |
99 | |
100 | |
101 =head2 deep_copy | |
102 | |
103 Description: Returns a copy of $self (as an AlignedMemberSet). All the | |
104 members are themselves copied, but the tree topology is lost. | |
105 Returntype : Bio::EnsEMBL::Compara::GeneTree | |
106 Caller : General | |
107 | |
108 =cut | |
109 | |
110 sub deep_copy { | |
111 my $self = shift; | |
112 my $copy = $self->SUPER::deep_copy(); | |
113 foreach my $attr (qw(tree_type member_type clusterset_id)) { | |
114 $copy->$attr($self->$attr); | |
115 } | |
116 return $copy; | |
117 } | |
118 | |
119 | |
120 =head2 DESTROY | |
121 | |
122 Description : Deletes the reference to the root node and breaks | |
123 the circular reference. | |
124 Returntype : None | |
125 Caller : System | |
126 | |
127 =cut | |
128 | |
129 sub DESTROY { | |
130 my $self = shift; | |
131 delete $self->{'_root'}; | |
132 } | |
133 | |
134 | |
135 ##################### | |
136 # Object attributes # | |
137 ##################### | |
138 | |
139 =head2 tree_type | |
140 | |
141 Description : Getter/Setter for the tree_type field. This field can | |
142 currently be 'tree', 'supertree' or 'clusterset' | |
143 Returntype : String | |
144 Example : my $type = $tree->tree_type(); | |
145 Caller : General | |
146 | |
147 =cut | |
148 | |
149 sub tree_type { | |
150 my $self = shift; | |
151 $self->{'_tree_type'} = shift if(@_); | |
152 return $self->{'_tree_type'}; | |
153 } | |
154 | |
155 | |
156 =head2 member_type | |
157 | |
158 Description : Getter/Setter for the member_type field. This field can | |
159 currently be 'ncrna' or 'protein' | |
160 Returntype : String | |
161 Example : my $type = $tree->member_type(); | |
162 Caller : General | |
163 | |
164 =cut | |
165 | |
166 sub member_type { | |
167 my $self = shift; | |
168 $self->{'_member_type'} = shift if(@_); | |
169 return $self->{'_member_type'}; | |
170 } | |
171 | |
172 | |
173 =head2 clusterset_id | |
174 | |
175 Description : Getter/Setter for the clusterset_id field. This field can | |
176 be any string. Each dataset should contain a set of trees | |
177 with the "default" clusterset_id. Other clusterset_id are | |
178 used to store linked / additionnal data. | |
179 Returntype : String | |
180 Example : my $clusterset_id = $tree->clusterset_id(); | |
181 Caller : General | |
182 | |
183 =cut | |
184 | |
185 sub clusterset_id { | |
186 my $self = shift; | |
187 $self->{'_clusterset_id'} = shift if(@_); | |
188 return $self->{'_clusterset_id'}; | |
189 } | |
190 | |
191 | |
192 =head2 root_id | |
193 | |
194 Description : Getter for the root_id of the root node of the tree. | |
195 Returntype : Integer | |
196 Example : my $root_node_id = $tree->root_id(); | |
197 Caller : General | |
198 | |
199 =cut | |
200 | |
201 sub root_id { | |
202 my $self = shift; | |
203 return $self->{'_root_id'}; | |
204 } | |
205 | |
206 | |
207 ################ | |
208 # Tree loading # | |
209 ################ | |
210 | |
211 =head2 root | |
212 | |
213 Description : Getter for the root node of the tree. This returns an | |
214 object fetch from the database if root_id is defined. | |
215 Otherwise, it will create a new GeneTreeNode object. | |
216 Returntype : Bio::EnsEMBL::Compara::GeneTreeNode | |
217 Example : my $root_node = $tree->root(); | |
218 Caller : General | |
219 | |
220 =cut | |
221 | |
222 sub root { | |
223 my $self = shift; | |
224 | |
225 if (not defined $self->{'_root'}) { | |
226 if (defined $self->{'_root_id'} and defined $self->adaptor) { | |
227 # Loads all the nodes in one go | |
228 my $gtn_adaptor = $self->adaptor->db->get_GeneTreeNodeAdaptor; | |
229 $gtn_adaptor->{'_ref_tree'} = $self; | |
230 $self->{'_root'} = $gtn_adaptor->fetch_node_by_node_id($self->{'_root_id'}); | |
231 delete $gtn_adaptor->{'_ref_tree'}; | |
232 | |
233 } else { | |
234 # Creates a new GeneTreeNode object | |
235 $self->{'_root'} = new Bio::EnsEMBL::Compara::GeneTreeNode; | |
236 $self->{'_root'}->tree($self); | |
237 } | |
238 } | |
239 return $self->{'_root'}; | |
240 } | |
241 | |
242 | |
243 =head2 preload | |
244 | |
245 Description : Method to load all the tree data in one go. This currently | |
246 includes if not loaded yet, and all the gene Members | |
247 associated with the leaves. | |
248 In the future, it will include all the tags | |
249 Returntype : node | |
250 Example : $tree->preload(); | |
251 Caller : General | |
252 | |
253 =cut | |
254 | |
255 sub preload { | |
256 my $self = shift; | |
257 return unless defined $self->adaptor; | |
258 | |
259 if (not defined $self->{'_root'} and defined $self->{'_root_id'}) { | |
260 my $gtn_adaptor = $self->adaptor->db->get_GeneTreeNodeAdaptor; | |
261 $gtn_adaptor->{'_ref_tree'} = $self; | |
262 $self->{'_root'} = $gtn_adaptor->fetch_tree_by_root_id($self->{'_root_id'}); | |
263 delete $gtn_adaptor->{'_ref_tree'}; | |
264 } | |
265 | |
266 # Loads all the gene members in one go | |
267 my %leaves; | |
268 foreach my $pm (@{$self->root->get_all_leaves}) { | |
269 $leaves{$pm->gene_member_id} = $pm if UNIVERSAL::isa($pm, 'Bio::EnsEMBL::Compara::GeneTreeMember'); | |
270 } | |
271 my @m_ids = keys(%leaves); | |
272 my $all_gm = $self->adaptor->db->get_MemberAdaptor->fetch_all_by_dbID_list(\@m_ids); | |
273 foreach my $gm (@$all_gm) { | |
274 $leaves{$gm->dbID}->gene_member($gm); | |
275 } | |
276 } | |
277 | |
278 | |
279 =head2 attach_alignment | |
280 | |
281 Arg [1] : String: clusterset_id | |
282 Description : Method to fetch the alternative tree with the given | |
283 clusterset_id and attach its multiple alignment to | |
284 the current tree. The alternative tree is returned. | |
285 Returntype : GeneTree | |
286 Example : $supertree->attach_alignment('super-align'); | |
287 Caller : General | |
288 | |
289 =cut | |
290 | |
291 sub attach_alignment { | |
292 my $self = shift; | |
293 my $other_clusterset_id = shift; | |
294 return unless defined $self->adaptor; | |
295 | |
296 # Gets the other tree | |
297 my $others = $self->adaptor->fetch_all_linked_trees($self); | |
298 my @good_others = grep {$_->clusterset_id eq $other_clusterset_id} @$others; | |
299 die "'$other_clusterset_id' tree not found\n" unless scalar(@good_others); | |
300 | |
301 # Gets the alignment | |
302 my %cigars; | |
303 my $gtn_adaptor = $self->adaptor->db->get_GeneTreeNodeAdaptor; | |
304 foreach my $leaf (@{$gtn_adaptor->fetch_all_AlignedMember_by_root_id($good_others[0]->root_id)}) { | |
305 $cigars{$leaf->member_id} = $leaf->cigar_line; | |
306 } | |
307 | |
308 # Assigns it | |
309 foreach my $leaf (@{$self->root->get_all_leaves}) { | |
310 $leaf->cigar_line($cigars{$leaf->member_id}); | |
311 } | |
312 | |
313 return $good_others[0]; | |
314 } | |
315 | |
316 | |
317 =head2 expand_subtrees | |
318 | |
319 Description : Method to fetch the subtrees of the current tree | |
320 and attach them to the tips of the current tree | |
321 Returntype : none | |
322 Example : $supertree->expand_subtrees(); | |
323 Caller : General | |
324 | |
325 =cut | |
326 | |
327 sub expand_subtrees { | |
328 my $self = shift; | |
329 return unless defined $self->adaptor; | |
330 | |
331 # Gets the subtrees | |
332 my %subtrees; | |
333 foreach my $subtree (@{$self->adaptor->fetch_subtrees($self)}) { | |
334 $subtree->preload; | |
335 $subtrees{$subtree->root->_parent_id} = $subtree->root; | |
336 } | |
337 | |
338 # Attaches them | |
339 $self->preload; | |
340 foreach my $leaf (@{$self->root->get_all_leaves}) { | |
341 next unless exists $subtrees{$leaf->node_id}; | |
342 $leaf->parent->add_child($subtrees{$leaf->node_id}); | |
343 $leaf->disavow_parent; | |
344 } | |
345 } | |
346 | |
347 | |
348 ############################## | |
349 # AlignedMemberSet interface # | |
350 ############################## | |
351 | |
352 =head2 member_class | |
353 | |
354 Description: Returns the type of member used in the set | |
355 Returntype : String: Bio::EnsEMBL::Compara::GeneTreeMember | |
356 Caller : Bio::EnsEMBL::Compara::MemberSet | |
357 | |
358 =cut | |
359 | |
360 sub member_class { | |
361 return 'Bio::EnsEMBL::Compara::GeneTreeMember'; | |
362 } | |
363 | |
364 | |
365 =head2 get_all_Members | |
366 | |
367 Example : | |
368 Description: Returns the list of all the GeneTreeMember of the tree | |
369 Returntype : array reference of Bio::EnsEMBL::Compara::GeneTreeMember | |
370 Caller : General | |
371 | |
372 =cut | |
373 | |
374 sub get_all_Members { | |
375 my ($self) = @_; | |
376 | |
377 unless (defined $self->{'_member_array'}) { | |
378 | |
379 $self->{'_member_array'} = []; | |
380 $self->{'_members_by_source'} = {}; | |
381 $self->{'_members_by_source_taxon'} = {}; | |
382 $self->{'_members_by_source_genome_db'} = {}; | |
383 $self->{'_members_by_genome_db'} = {}; | |
384 foreach my $leaf (@{$self->root->get_all_leaves}) { | |
385 $self->SUPER::add_Member($leaf) if UNIVERSAL::isa($leaf, 'Bio::EnsEMBL::Compara::GeneTreeMember'); | |
386 } | |
387 } | |
388 return $self->{'_member_array'}; | |
389 } | |
390 | |
391 | |
392 =head2 add_Member | |
393 | |
394 Arg [1] : GeneTreeMember | |
395 Example : | |
396 Description: Add a new GeneTreeMember to this set and to the tree as | |
397 a child of the root node | |
398 Returntype : none | |
399 Exceptions : Throws if input objects don't check | |
400 Caller : General | |
401 | |
402 =cut | |
403 | |
404 sub add_Member { | |
405 my ($self, $member) = @_; | |
406 assert_ref($member, 'Bio::EnsEMBL::Compara::GeneTreeMember'); | |
407 $self->root->add_child($member); | |
408 $member->tree($self); | |
409 $self->SUPER::add_Member($member); | |
410 } | |
411 | |
412 | |
413 ######## | |
414 # Misc # | |
415 ######## | |
416 | |
417 # Dynamic definition of functions to allow NestedSet methods work with GeneTrees | |
418 foreach my $func_name (qw(get_all_nodes get_all_leaves get_all_sorted_leaves | |
419 find_leaf_by_node_id find_leaf_by_name find_node_by_node_id | |
420 find_node_by_name remove_nodes build_leftright_indexing flatten_tree | |
421 newick_format nhx_format string_tree print_tree | |
422 release_tree | |
423 )) { | |
424 my $full_name = "Bio::EnsEMBL::Compara::GeneTree::$func_name"; | |
425 *$full_name = sub { | |
426 my $self = shift; | |
427 my $ret = $self->root->$func_name(@_); | |
428 return $ret; | |
429 }; | |
430 # print STDERR "REDEFINE $func_name\n"; | |
431 } | |
432 | |
433 | |
434 1; | |
435 |