comparison variant_effect_predictor/Bio/Taxonomy/Tree.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:2bc9b66ada89
1 # $Id: Tree.pm,v 1.1 2002/11/18 22:08:33 kortsch Exp $
2 #
3 # BioPerl module for Bio::Taxonomy::Tree
4 #
5 # Cared for by Dan Kortschak but pilfered extensively from Bio::Tree::Tree by Jason Stajich
6 #
7 # You may distribute this module under the same terms as perl itself
8
9 # POD documentation - main docs before the code
10
11 =head1 NAME
12
13 Bio::Taxonomy::Tree - An Organism Level Implementation of TreeI interface.
14
15 =head1 SYNOPSIS
16
17 # like from a TreeIO
18 my $treeio = new Bio::TreeIO(-format => 'newick', -file => 'treefile.dnd');
19 my $tree = $treeio->next_tree;
20 my @nodes = $tree->get_nodes;
21 my $root = $tree->get_root_node;
22 my @leaves = $tree->get_leaves;
23
24
25 =head1 DESCRIPTION
26
27 This object holds handles to Taxonomic Nodes which make up a tree.
28
29 =head1 EXAMPLES
30
31 use Bio::Species;
32 use Bio::Taxonomy::Tree;
33
34 my $human=new Bio::Species;
35 my $chimp=new Bio::Species;
36 my $bonobo=new Bio::Species;
37
38 $human->classification(qw( sapiens Homo Hominidae
39 Catarrhini Primates Eutheria
40 Mammalia Euteleostomi Vertebrata
41 Craniata Chordata
42 Metazoa Eukaryota ));
43 $chimp->classification(qw( troglodytes Pan Hominidae
44 Catarrhini Primates Eutheria
45 Mammalia Euteleostomi Vertebrata
46 Craniata Chordata
47 Metazoa Eukaryota ));
48 $bonobo->classification(qw( paniscus Pan Hominidae
49 Catarrhini Primates Eutheria
50 Mammalia Euteleostomi Vertebrata
51 Craniata Chordata
52 Metazoa Eukaryota ));
53
54 # ranks passed to $taxonomy match ranks of species
55 my @ranks = ('superkingdom','kingdom','phylum','subphylum',
56 'no rank 1','no rank 2','class','no rank 3','order',
57 'suborder','family','genus','species');
58
59 my $taxonomy=new Bio::Taxonomy(-ranks => \@ranks,
60 -method => 'trust',
61 -order => -1);
62
63 my @nodes;
64
65 my $tree1=new Bio::Taxonomy::Tree;
66 my $tree2=new Bio::Taxonomy::Tree;
67
68 push @nodes,$tree1->make_species_branch($human,$taxonomy);
69 push @nodes,$tree2->make_species_branch($chimp,$taxonomy);
70
71 my ($homo_sapiens)=$tree1->get_leaves;
72
73 $tree1->splice($tree2);
74
75 push @nodes,$tree1->add_species($bonobo,$taxonomy);
76
77 my @taxa;
78 foreach my $leaf ($tree1->get_leaves) {
79 push @taxa,$leaf->taxon;
80 }
81 print join(", ",@taxa)."\n";
82
83 @taxa=();
84 $tree1->remove_branch($homo_sapiens);
85 foreach my $leaf ($tree1->get_leaves) {
86 push @taxa,$leaf->taxon;
87 }
88 print join(", ",@taxa)."\n";
89
90 =head1 FEEDBACK
91
92 See AUTHOR
93
94 =head1 AUTHOR - Dan Kortschak
95
96 Email kortschak@rsbs.anu.edu.au
97
98 =head1 CONTRIBUTORS
99
100 Mainly Jason Stajich
101
102 =head1 APPENDIX
103
104 The rest of the documentation details each of the object methods.
105 Internal methods are usually preceded with a _
106
107 =cut
108
109
110 # Code begins...
111
112
113 package Bio::Taxonomy::Tree;
114 use vars qw(@ISA);
115 use strict;
116
117 # Object preamble - inherits from Bio::Root::Root
118
119 use Bio::Root::Root;
120 use Bio::Tree::TreeFunctionsI;
121 use Bio::Tree::TreeI;
122 use Bio::Taxonomy::Taxon;
123
124 # Import rank information from Bio::Taxonomy.pm
125 use vars qw(@RANK %RANK);
126
127 @ISA = qw(Bio::Root::Root Bio::Tree::TreeI Bio::Tree::TreeFunctionsI);
128
129 =head2 new
130
131 Title : new
132 Usage : my $obj = new Bio::Taxonomy::Tree();
133 Function: Builds a new Bio::Taxonomy::Tree object
134 Returns : Bio::Taxonomy::Tree
135 Args :
136
137
138 =cut
139
140 sub new {
141 my($class,@args) = @_;
142
143 my $self = $class->SUPER::new(@args);
144 $self->{'_rootnode'} = undef;
145 $self->{'_maxbranchlen'} = 0;
146
147 my ($root)= $self->_rearrange([qw(ROOT)], @args);
148 if( $root ) { $self->set_root_node($root); }
149 return $self;
150 }
151
152
153 =head2 get_nodes
154
155 Title : get_nodes
156 Usage : my @nodes = $tree->get_nodes()
157 Function: Return list of Bio::Taxonomy::Taxon objects
158 Returns : array of Bio::Taxonomy::Taxon objects
159 Args : (named values) hash with one value
160 order => 'b|breadth' first order or 'd|depth' first order
161
162 =cut
163
164 sub get_nodes{
165 my ($self, @args) = @_;
166
167 my ($order, $sortby) = $self->_rearrange([qw(ORDER SORTBY)],@args);
168 $order ||= 'depth';
169 $sortby ||= 'height';
170
171 if ($order =~ m/^b|(breadth)$/oi) {
172 my $node = $self->get_root_node;
173 my @children = ($node);
174 for (@children) {
175 push @children, $_->each_Descendent($sortby);
176 }
177 return @children;
178 }
179
180 if ($order =~ m/^d|(depth)$/oi) {
181 # this is depth-first search I believe
182 my $node = $self->get_root_node;
183 my @children = ($node,$node->get_Descendents($sortby));
184 return @children;
185 }
186 }
187
188 =head2 get_root_node
189
190 Title : get_root_node
191 Usage : my $node = $tree->get_root_node();
192 Function: Get the Top Node in the tree, in this implementation
193 Trees only have one top node.
194 Returns : Bio::Taxonomy::Taxon object
195 Args : none
196
197 =cut
198
199
200 sub get_root_node{
201 my ($self) = @_;
202 return $self->{'_rootnode'};
203 }
204
205 =head2 set_root_node
206
207 Title : set_root_node
208 Usage : $tree->set_root_node($node)
209 Function: Set the Root Node for the Tree
210 Returns : Bio::Taxonomy::Taxon
211 Args : Bio::Taxonomy::Taxon
212
213 =cut
214
215
216 sub set_root_node{
217 my ($self,$value) = @_;
218 if( defined $value ) {
219 if( ! $value->isa('Bio::Taxonomy::Taxon') ) {
220 $self->warn("Trying to set the root node to $value which is not a Bio::Taxonomy::Taxon");
221 return $self->get_root_node;
222 }
223 $self->{'_rootnode'} = $value;
224 }
225 return $self->get_root_node;
226 }
227
228
229 =head2 get_leaves
230
231 Title : get_leaves
232 Usage : my @nodes = $tree->get_leaves()
233 Function: Return list of Bio::Taxonomy::Taxon objects
234 Returns : array of Bio::Taxonomy::Taxon objects
235 Args :
236
237 =cut
238
239
240 sub get_leaves{
241 my ($self) = @_;
242
243 my $node = $self->get_root_node;
244 my @leaves;
245 my @children = ($node);
246 for (@children) {
247 push @children, $_->each_Descendent();
248 }
249 for (@children) {
250 push @leaves, $_ if $_->is_Leaf;
251 }
252 return @leaves;
253 }
254
255 =head2 make_species_branch
256
257 Title : make_species_branch
258 Usage : @nodes = $tree->make_species_branch($species,$taxonomy)
259 Function: Return list of Bio::Taxonomy::Taxon objects based on a Bio::Species object
260 Returns : array of Bio::Taxonomy::Taxon objects
261 Args : Bio::Species and Bio::Taxonomy objects
262
263 =cut
264
265 # I'm not happy that make_species_branch and make_branch are seperate routines
266 # should be able to just make_branch and have it sort things out
267
268 sub make_species_branch{
269 my ($self,$species,$taxonomy) = @_;
270
271 if (! $species->isa('Bio::Species') ) {
272 $self->throw("Trying to classify $species which is not a Bio::Species object");
273 }
274 if (! $taxonomy->isa('Bio::Taxonomy') ) {
275 $self->throw("Trying to classify with $taxonomy which is not a Bio::Taxonomy object");
276 }
277
278 # this is done to make sure we aren't duplicating a path (let God sort them out)
279 if (defined $self->get_root_node) {
280 $self->get_root_node->remove_all_Descendents;
281 }
282
283 my @nodes;
284
285 # nb taxa in [i][0] and ranks in [i][1]
286 my @taxa=$taxonomy->classify($species);
287
288 for (my $i = 0; $i < @taxa; $i++) {
289 $nodes[$i]=Bio::Taxonomy::Taxon->new(-taxon => $taxa[$i][0],
290 -rank => $taxa[$i][1]);
291 }
292
293 for (my $i = 0; $i < @taxa-1; $i++) {
294 $nodes[$i]->add_Descendent($nodes[$i+1]);
295 }
296
297 $self->set_root_node($nodes[0]);
298
299 return @nodes;
300 }
301
302
303 =head2 make_branch
304
305 Title : make_branch
306 Usage : $tree->make_branch($node)
307 Function: Make a linear Bio::Taxonomy::Tree object from a leafish node
308 Returns :
309 Args : Bio::Taxonomy::Taxon object
310
311 =cut
312
313
314 sub make_branch{
315 my ($self,$node) = @_;
316
317 # this is done to make sure we aren't duplicating a path (let God sort them out)
318 # note that if you are using a linked set of node which include node
319 # already in the tree, this will break
320 $self->get_root_node->remove_all_Descendents;
321
322 while (defined $node->ancestor) {
323 $self->set_root_node($node);
324 $node=$node->ancestor;
325 }
326 }
327
328
329 =head2 splice
330
331 Title : splice
332 Usage : @nodes = $tree->splice($tree)
333 Function: Return a of Bio::Taxonomy::Tree object that is a fusion of two
334 Returns : array of Bio::Taxonomy::Taxon added to tree
335 Args : Bio::Taxonomy::Tree object
336
337 =cut
338
339
340 sub splice{
341 my ($self,$tree) = @_;
342
343 my @nodes;
344
345 my @newleaves = $tree->get_leaves;
346 foreach my $leaf (@newleaves) {
347 push @nodes,$self->add_branch($leaf);
348 }
349
350 return @nodes;
351 }
352
353 =head2 add_species
354
355 Title : add_species
356 Usage : @nodes = $tree->add_species($species,$taxonomy)
357 Function: Return a of Bio::Taxonomy::Tree object with a new species added
358 Returns : array of Bio::Taxonomy::Taxon added to tree
359 Args : Bio::Species object
360
361 =cut
362
363
364 sub add_species{
365 my ($self,$species,$taxonomy) = @_;
366
367 my $branch=Bio::Taxonomy::Tree->new;
368 my @nodes=$branch->make_species_branch($species,$taxonomy);
369
370 my ($newleaf)=$branch->get_leaves;
371
372 return $self->add_branch($newleaf);
373 }
374
375 =head2 add_branch
376
377 Title : add_branch
378 Usage : $tree->add_branch($node,boolean)
379 Function: Return a of Bio::Taxonomy::Tree object with a new branch added
380 Returns : array of Bio::Taxonomy::Taxon objects of the resulting tree
381 Args : Bio::Taxonomy::Taxon object
382 boolean flag to force overwrite of descendent
383 (see Bio::Node->add_Descendent)
384
385 =cut
386
387
388 sub add_branch {
389 my ($self,$node,$force) = @_;
390
391 my $best_node_level=0;
392 my ($best_node,@nodes,$common);
393
394 my @leaves=$self->get_leaves;
395 foreach my $leaf (@leaves) {
396 $common=$node->recent_common_ancestor($leaf); # the root of the part to add
397 if (defined $common && ($common->distance_to_root > $best_node_level)) {
398 $best_node_level = $common->distance_to_root;
399 $best_node = $common;
400 }
401 }
402
403 return unless defined $best_node;
404
405 push @nodes,($self->get_root_node,$self->get_root_node->get_Descendents);
406 foreach my $node (@nodes) {
407 if ((defined $best_node->id && $best_node->id == $node->id) ||
408 ($best_node->rank eq $node->rank && $best_node->taxon eq $node->taxon) &&
409 ($best_node->rank ne 'no rank')) {
410 foreach my $descendent ($common->each_Descendent) {
411 $node->add_Descendent($descendent,$force);
412 }
413 }
414
415 $self->set_root_node($node) if $node->distance_to_root==0;
416 }
417
418 return ($common->get_Descendents);
419 }
420
421 =head2 remove_branch
422
423 Title : remove_branch
424 Usage : $tree->remove_branch($node)
425 Function: remove a branch up to the next multifurcation
426 Returns :
427 Args : Bio::Taxonomy::Taxon object
428
429 =cut
430
431
432 sub remove_branch{
433 my ($self,$node) = @_;
434
435 # we can define a branch at any point along it
436
437 while (defined $node->ancestor) {
438 last if $node->ancestor->each_Descendent > 1;
439 $node=$node->ancestor;
440 }
441 $node->remove_all_Descendents; # I'm not sure if this is necessary,
442 # but I don't see that remove_Descendent
443 # has the side effect of deleting
444 # descendent nodes of the deletee
445 $node->ancestor->remove_Descendent($node);
446 }
447
448
449
450 1;