0
|
1 # $Id: Tree.pm,v 1.1 2002/11/18 22:08:33 kortsch Exp $
|
|
2 #
|
|
3 # BioPerl module for Bio::Taxonomy::Tree
|
|
4 #
|
|
5 # Cared for by Dan Kortschak but pilfered extensively from Bio::Tree::Tree by Jason Stajich
|
|
6 #
|
|
7 # You may distribute this module under the same terms as perl itself
|
|
8
|
|
9 # POD documentation - main docs before the code
|
|
10
|
|
11 =head1 NAME
|
|
12
|
|
13 Bio::Taxonomy::Tree - An Organism Level Implementation of TreeI interface.
|
|
14
|
|
15 =head1 SYNOPSIS
|
|
16
|
|
17 # like from a TreeIO
|
|
18 my $treeio = new Bio::TreeIO(-format => 'newick', -file => 'treefile.dnd');
|
|
19 my $tree = $treeio->next_tree;
|
|
20 my @nodes = $tree->get_nodes;
|
|
21 my $root = $tree->get_root_node;
|
|
22 my @leaves = $tree->get_leaves;
|
|
23
|
|
24
|
|
25 =head1 DESCRIPTION
|
|
26
|
|
27 This object holds handles to Taxonomic Nodes which make up a tree.
|
|
28
|
|
29 =head1 EXAMPLES
|
|
30
|
|
31 use Bio::Species;
|
|
32 use Bio::Taxonomy::Tree;
|
|
33
|
|
34 my $human=new Bio::Species;
|
|
35 my $chimp=new Bio::Species;
|
|
36 my $bonobo=new Bio::Species;
|
|
37
|
|
38 $human->classification(qw( sapiens Homo Hominidae
|
|
39 Catarrhini Primates Eutheria
|
|
40 Mammalia Euteleostomi Vertebrata
|
|
41 Craniata Chordata
|
|
42 Metazoa Eukaryota ));
|
|
43 $chimp->classification(qw( troglodytes Pan Hominidae
|
|
44 Catarrhini Primates Eutheria
|
|
45 Mammalia Euteleostomi Vertebrata
|
|
46 Craniata Chordata
|
|
47 Metazoa Eukaryota ));
|
|
48 $bonobo->classification(qw( paniscus Pan Hominidae
|
|
49 Catarrhini Primates Eutheria
|
|
50 Mammalia Euteleostomi Vertebrata
|
|
51 Craniata Chordata
|
|
52 Metazoa Eukaryota ));
|
|
53
|
|
54 # ranks passed to $taxonomy match ranks of species
|
|
55 my @ranks = ('superkingdom','kingdom','phylum','subphylum',
|
|
56 'no rank 1','no rank 2','class','no rank 3','order',
|
|
57 'suborder','family','genus','species');
|
|
58
|
|
59 my $taxonomy=new Bio::Taxonomy(-ranks => \@ranks,
|
|
60 -method => 'trust',
|
|
61 -order => -1);
|
|
62
|
|
63 my @nodes;
|
|
64
|
|
65 my $tree1=new Bio::Taxonomy::Tree;
|
|
66 my $tree2=new Bio::Taxonomy::Tree;
|
|
67
|
|
68 push @nodes,$tree1->make_species_branch($human,$taxonomy);
|
|
69 push @nodes,$tree2->make_species_branch($chimp,$taxonomy);
|
|
70
|
|
71 my ($homo_sapiens)=$tree1->get_leaves;
|
|
72
|
|
73 $tree1->splice($tree2);
|
|
74
|
|
75 push @nodes,$tree1->add_species($bonobo,$taxonomy);
|
|
76
|
|
77 my @taxa;
|
|
78 foreach my $leaf ($tree1->get_leaves) {
|
|
79 push @taxa,$leaf->taxon;
|
|
80 }
|
|
81 print join(", ",@taxa)."\n";
|
|
82
|
|
83 @taxa=();
|
|
84 $tree1->remove_branch($homo_sapiens);
|
|
85 foreach my $leaf ($tree1->get_leaves) {
|
|
86 push @taxa,$leaf->taxon;
|
|
87 }
|
|
88 print join(", ",@taxa)."\n";
|
|
89
|
|
90 =head1 FEEDBACK
|
|
91
|
|
92 See AUTHOR
|
|
93
|
|
94 =head1 AUTHOR - Dan Kortschak
|
|
95
|
|
96 Email kortschak@rsbs.anu.edu.au
|
|
97
|
|
98 =head1 CONTRIBUTORS
|
|
99
|
|
100 Mainly Jason Stajich
|
|
101
|
|
102 =head1 APPENDIX
|
|
103
|
|
104 The rest of the documentation details each of the object methods.
|
|
105 Internal methods are usually preceded with a _
|
|
106
|
|
107 =cut
|
|
108
|
|
109
|
|
110 # Code begins...
|
|
111
|
|
112
|
|
113 package Bio::Taxonomy::Tree;
|
|
114 use vars qw(@ISA);
|
|
115 use strict;
|
|
116
|
|
117 # Object preamble - inherits from Bio::Root::Root
|
|
118
|
|
119 use Bio::Root::Root;
|
|
120 use Bio::Tree::TreeFunctionsI;
|
|
121 use Bio::Tree::TreeI;
|
|
122 use Bio::Taxonomy::Taxon;
|
|
123
|
|
124 # Import rank information from Bio::Taxonomy.pm
|
|
125 use vars qw(@RANK %RANK);
|
|
126
|
|
127 @ISA = qw(Bio::Root::Root Bio::Tree::TreeI Bio::Tree::TreeFunctionsI);
|
|
128
|
|
129 =head2 new
|
|
130
|
|
131 Title : new
|
|
132 Usage : my $obj = new Bio::Taxonomy::Tree();
|
|
133 Function: Builds a new Bio::Taxonomy::Tree object
|
|
134 Returns : Bio::Taxonomy::Tree
|
|
135 Args :
|
|
136
|
|
137
|
|
138 =cut
|
|
139
|
|
140 sub new {
|
|
141 my($class,@args) = @_;
|
|
142
|
|
143 my $self = $class->SUPER::new(@args);
|
|
144 $self->{'_rootnode'} = undef;
|
|
145 $self->{'_maxbranchlen'} = 0;
|
|
146
|
|
147 my ($root)= $self->_rearrange([qw(ROOT)], @args);
|
|
148 if( $root ) { $self->set_root_node($root); }
|
|
149 return $self;
|
|
150 }
|
|
151
|
|
152
|
|
153 =head2 get_nodes
|
|
154
|
|
155 Title : get_nodes
|
|
156 Usage : my @nodes = $tree->get_nodes()
|
|
157 Function: Return list of Bio::Taxonomy::Taxon objects
|
|
158 Returns : array of Bio::Taxonomy::Taxon objects
|
|
159 Args : (named values) hash with one value
|
|
160 order => 'b|breadth' first order or 'd|depth' first order
|
|
161
|
|
162 =cut
|
|
163
|
|
164 sub get_nodes{
|
|
165 my ($self, @args) = @_;
|
|
166
|
|
167 my ($order, $sortby) = $self->_rearrange([qw(ORDER SORTBY)],@args);
|
|
168 $order ||= 'depth';
|
|
169 $sortby ||= 'height';
|
|
170
|
|
171 if ($order =~ m/^b|(breadth)$/oi) {
|
|
172 my $node = $self->get_root_node;
|
|
173 my @children = ($node);
|
|
174 for (@children) {
|
|
175 push @children, $_->each_Descendent($sortby);
|
|
176 }
|
|
177 return @children;
|
|
178 }
|
|
179
|
|
180 if ($order =~ m/^d|(depth)$/oi) {
|
|
181 # this is depth-first search I believe
|
|
182 my $node = $self->get_root_node;
|
|
183 my @children = ($node,$node->get_Descendents($sortby));
|
|
184 return @children;
|
|
185 }
|
|
186 }
|
|
187
|
|
188 =head2 get_root_node
|
|
189
|
|
190 Title : get_root_node
|
|
191 Usage : my $node = $tree->get_root_node();
|
|
192 Function: Get the Top Node in the tree, in this implementation
|
|
193 Trees only have one top node.
|
|
194 Returns : Bio::Taxonomy::Taxon object
|
|
195 Args : none
|
|
196
|
|
197 =cut
|
|
198
|
|
199
|
|
200 sub get_root_node{
|
|
201 my ($self) = @_;
|
|
202 return $self->{'_rootnode'};
|
|
203 }
|
|
204
|
|
205 =head2 set_root_node
|
|
206
|
|
207 Title : set_root_node
|
|
208 Usage : $tree->set_root_node($node)
|
|
209 Function: Set the Root Node for the Tree
|
|
210 Returns : Bio::Taxonomy::Taxon
|
|
211 Args : Bio::Taxonomy::Taxon
|
|
212
|
|
213 =cut
|
|
214
|
|
215
|
|
216 sub set_root_node{
|
|
217 my ($self,$value) = @_;
|
|
218 if( defined $value ) {
|
|
219 if( ! $value->isa('Bio::Taxonomy::Taxon') ) {
|
|
220 $self->warn("Trying to set the root node to $value which is not a Bio::Taxonomy::Taxon");
|
|
221 return $self->get_root_node;
|
|
222 }
|
|
223 $self->{'_rootnode'} = $value;
|
|
224 }
|
|
225 return $self->get_root_node;
|
|
226 }
|
|
227
|
|
228
|
|
229 =head2 get_leaves
|
|
230
|
|
231 Title : get_leaves
|
|
232 Usage : my @nodes = $tree->get_leaves()
|
|
233 Function: Return list of Bio::Taxonomy::Taxon objects
|
|
234 Returns : array of Bio::Taxonomy::Taxon objects
|
|
235 Args :
|
|
236
|
|
237 =cut
|
|
238
|
|
239
|
|
240 sub get_leaves{
|
|
241 my ($self) = @_;
|
|
242
|
|
243 my $node = $self->get_root_node;
|
|
244 my @leaves;
|
|
245 my @children = ($node);
|
|
246 for (@children) {
|
|
247 push @children, $_->each_Descendent();
|
|
248 }
|
|
249 for (@children) {
|
|
250 push @leaves, $_ if $_->is_Leaf;
|
|
251 }
|
|
252 return @leaves;
|
|
253 }
|
|
254
|
|
255 =head2 make_species_branch
|
|
256
|
|
257 Title : make_species_branch
|
|
258 Usage : @nodes = $tree->make_species_branch($species,$taxonomy)
|
|
259 Function: Return list of Bio::Taxonomy::Taxon objects based on a Bio::Species object
|
|
260 Returns : array of Bio::Taxonomy::Taxon objects
|
|
261 Args : Bio::Species and Bio::Taxonomy objects
|
|
262
|
|
263 =cut
|
|
264
|
|
265 # I'm not happy that make_species_branch and make_branch are seperate routines
|
|
266 # should be able to just make_branch and have it sort things out
|
|
267
|
|
268 sub make_species_branch{
|
|
269 my ($self,$species,$taxonomy) = @_;
|
|
270
|
|
271 if (! $species->isa('Bio::Species') ) {
|
|
272 $self->throw("Trying to classify $species which is not a Bio::Species object");
|
|
273 }
|
|
274 if (! $taxonomy->isa('Bio::Taxonomy') ) {
|
|
275 $self->throw("Trying to classify with $taxonomy which is not a Bio::Taxonomy object");
|
|
276 }
|
|
277
|
|
278 # this is done to make sure we aren't duplicating a path (let God sort them out)
|
|
279 if (defined $self->get_root_node) {
|
|
280 $self->get_root_node->remove_all_Descendents;
|
|
281 }
|
|
282
|
|
283 my @nodes;
|
|
284
|
|
285 # nb taxa in [i][0] and ranks in [i][1]
|
|
286 my @taxa=$taxonomy->classify($species);
|
|
287
|
|
288 for (my $i = 0; $i < @taxa; $i++) {
|
|
289 $nodes[$i]=Bio::Taxonomy::Taxon->new(-taxon => $taxa[$i][0],
|
|
290 -rank => $taxa[$i][1]);
|
|
291 }
|
|
292
|
|
293 for (my $i = 0; $i < @taxa-1; $i++) {
|
|
294 $nodes[$i]->add_Descendent($nodes[$i+1]);
|
|
295 }
|
|
296
|
|
297 $self->set_root_node($nodes[0]);
|
|
298
|
|
299 return @nodes;
|
|
300 }
|
|
301
|
|
302
|
|
303 =head2 make_branch
|
|
304
|
|
305 Title : make_branch
|
|
306 Usage : $tree->make_branch($node)
|
|
307 Function: Make a linear Bio::Taxonomy::Tree object from a leafish node
|
|
308 Returns :
|
|
309 Args : Bio::Taxonomy::Taxon object
|
|
310
|
|
311 =cut
|
|
312
|
|
313
|
|
314 sub make_branch{
|
|
315 my ($self,$node) = @_;
|
|
316
|
|
317 # this is done to make sure we aren't duplicating a path (let God sort them out)
|
|
318 # note that if you are using a linked set of node which include node
|
|
319 # already in the tree, this will break
|
|
320 $self->get_root_node->remove_all_Descendents;
|
|
321
|
|
322 while (defined $node->ancestor) {
|
|
323 $self->set_root_node($node);
|
|
324 $node=$node->ancestor;
|
|
325 }
|
|
326 }
|
|
327
|
|
328
|
|
329 =head2 splice
|
|
330
|
|
331 Title : splice
|
|
332 Usage : @nodes = $tree->splice($tree)
|
|
333 Function: Return a of Bio::Taxonomy::Tree object that is a fusion of two
|
|
334 Returns : array of Bio::Taxonomy::Taxon added to tree
|
|
335 Args : Bio::Taxonomy::Tree object
|
|
336
|
|
337 =cut
|
|
338
|
|
339
|
|
340 sub splice{
|
|
341 my ($self,$tree) = @_;
|
|
342
|
|
343 my @nodes;
|
|
344
|
|
345 my @newleaves = $tree->get_leaves;
|
|
346 foreach my $leaf (@newleaves) {
|
|
347 push @nodes,$self->add_branch($leaf);
|
|
348 }
|
|
349
|
|
350 return @nodes;
|
|
351 }
|
|
352
|
|
353 =head2 add_species
|
|
354
|
|
355 Title : add_species
|
|
356 Usage : @nodes = $tree->add_species($species,$taxonomy)
|
|
357 Function: Return a of Bio::Taxonomy::Tree object with a new species added
|
|
358 Returns : array of Bio::Taxonomy::Taxon added to tree
|
|
359 Args : Bio::Species object
|
|
360
|
|
361 =cut
|
|
362
|
|
363
|
|
364 sub add_species{
|
|
365 my ($self,$species,$taxonomy) = @_;
|
|
366
|
|
367 my $branch=Bio::Taxonomy::Tree->new;
|
|
368 my @nodes=$branch->make_species_branch($species,$taxonomy);
|
|
369
|
|
370 my ($newleaf)=$branch->get_leaves;
|
|
371
|
|
372 return $self->add_branch($newleaf);
|
|
373 }
|
|
374
|
|
375 =head2 add_branch
|
|
376
|
|
377 Title : add_branch
|
|
378 Usage : $tree->add_branch($node,boolean)
|
|
379 Function: Return a of Bio::Taxonomy::Tree object with a new branch added
|
|
380 Returns : array of Bio::Taxonomy::Taxon objects of the resulting tree
|
|
381 Args : Bio::Taxonomy::Taxon object
|
|
382 boolean flag to force overwrite of descendent
|
|
383 (see Bio::Node->add_Descendent)
|
|
384
|
|
385 =cut
|
|
386
|
|
387
|
|
388 sub add_branch {
|
|
389 my ($self,$node,$force) = @_;
|
|
390
|
|
391 my $best_node_level=0;
|
|
392 my ($best_node,@nodes,$common);
|
|
393
|
|
394 my @leaves=$self->get_leaves;
|
|
395 foreach my $leaf (@leaves) {
|
|
396 $common=$node->recent_common_ancestor($leaf); # the root of the part to add
|
|
397 if (defined $common && ($common->distance_to_root > $best_node_level)) {
|
|
398 $best_node_level = $common->distance_to_root;
|
|
399 $best_node = $common;
|
|
400 }
|
|
401 }
|
|
402
|
|
403 return unless defined $best_node;
|
|
404
|
|
405 push @nodes,($self->get_root_node,$self->get_root_node->get_Descendents);
|
|
406 foreach my $node (@nodes) {
|
|
407 if ((defined $best_node->id && $best_node->id == $node->id) ||
|
|
408 ($best_node->rank eq $node->rank && $best_node->taxon eq $node->taxon) &&
|
|
409 ($best_node->rank ne 'no rank')) {
|
|
410 foreach my $descendent ($common->each_Descendent) {
|
|
411 $node->add_Descendent($descendent,$force);
|
|
412 }
|
|
413 }
|
|
414
|
|
415 $self->set_root_node($node) if $node->distance_to_root==0;
|
|
416 }
|
|
417
|
|
418 return ($common->get_Descendents);
|
|
419 }
|
|
420
|
|
421 =head2 remove_branch
|
|
422
|
|
423 Title : remove_branch
|
|
424 Usage : $tree->remove_branch($node)
|
|
425 Function: remove a branch up to the next multifurcation
|
|
426 Returns :
|
|
427 Args : Bio::Taxonomy::Taxon object
|
|
428
|
|
429 =cut
|
|
430
|
|
431
|
|
432 sub remove_branch{
|
|
433 my ($self,$node) = @_;
|
|
434
|
|
435 # we can define a branch at any point along it
|
|
436
|
|
437 while (defined $node->ancestor) {
|
|
438 last if $node->ancestor->each_Descendent > 1;
|
|
439 $node=$node->ancestor;
|
|
440 }
|
|
441 $node->remove_all_Descendents; # I'm not sure if this is necessary,
|
|
442 # but I don't see that remove_Descendent
|
|
443 # has the side effect of deleting
|
|
444 # descendent nodes of the deletee
|
|
445 $node->ancestor->remove_Descendent($node);
|
|
446 }
|
|
447
|
|
448
|
|
449
|
|
450 1;
|