Mercurial > repos > mahtabm > ensemb_rep_gvl
comparison variant_effect_predictor/Bio/Taxonomy/Tree.pm @ 0:2bc9b66ada89 draft default tip
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 06:29:17 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2bc9b66ada89 |
---|---|
1 # $Id: Tree.pm,v 1.1 2002/11/18 22:08:33 kortsch Exp $ | |
2 # | |
3 # BioPerl module for Bio::Taxonomy::Tree | |
4 # | |
5 # Cared for by Dan Kortschak but pilfered extensively from Bio::Tree::Tree by Jason Stajich | |
6 # | |
7 # You may distribute this module under the same terms as perl itself | |
8 | |
9 # POD documentation - main docs before the code | |
10 | |
11 =head1 NAME | |
12 | |
13 Bio::Taxonomy::Tree - An Organism Level Implementation of TreeI interface. | |
14 | |
15 =head1 SYNOPSIS | |
16 | |
17 # like from a TreeIO | |
18 my $treeio = new Bio::TreeIO(-format => 'newick', -file => 'treefile.dnd'); | |
19 my $tree = $treeio->next_tree; | |
20 my @nodes = $tree->get_nodes; | |
21 my $root = $tree->get_root_node; | |
22 my @leaves = $tree->get_leaves; | |
23 | |
24 | |
25 =head1 DESCRIPTION | |
26 | |
27 This object holds handles to Taxonomic Nodes which make up a tree. | |
28 | |
29 =head1 EXAMPLES | |
30 | |
31 use Bio::Species; | |
32 use Bio::Taxonomy::Tree; | |
33 | |
34 my $human=new Bio::Species; | |
35 my $chimp=new Bio::Species; | |
36 my $bonobo=new Bio::Species; | |
37 | |
38 $human->classification(qw( sapiens Homo Hominidae | |
39 Catarrhini Primates Eutheria | |
40 Mammalia Euteleostomi Vertebrata | |
41 Craniata Chordata | |
42 Metazoa Eukaryota )); | |
43 $chimp->classification(qw( troglodytes Pan Hominidae | |
44 Catarrhini Primates Eutheria | |
45 Mammalia Euteleostomi Vertebrata | |
46 Craniata Chordata | |
47 Metazoa Eukaryota )); | |
48 $bonobo->classification(qw( paniscus Pan Hominidae | |
49 Catarrhini Primates Eutheria | |
50 Mammalia Euteleostomi Vertebrata | |
51 Craniata Chordata | |
52 Metazoa Eukaryota )); | |
53 | |
54 # ranks passed to $taxonomy match ranks of species | |
55 my @ranks = ('superkingdom','kingdom','phylum','subphylum', | |
56 'no rank 1','no rank 2','class','no rank 3','order', | |
57 'suborder','family','genus','species'); | |
58 | |
59 my $taxonomy=new Bio::Taxonomy(-ranks => \@ranks, | |
60 -method => 'trust', | |
61 -order => -1); | |
62 | |
63 my @nodes; | |
64 | |
65 my $tree1=new Bio::Taxonomy::Tree; | |
66 my $tree2=new Bio::Taxonomy::Tree; | |
67 | |
68 push @nodes,$tree1->make_species_branch($human,$taxonomy); | |
69 push @nodes,$tree2->make_species_branch($chimp,$taxonomy); | |
70 | |
71 my ($homo_sapiens)=$tree1->get_leaves; | |
72 | |
73 $tree1->splice($tree2); | |
74 | |
75 push @nodes,$tree1->add_species($bonobo,$taxonomy); | |
76 | |
77 my @taxa; | |
78 foreach my $leaf ($tree1->get_leaves) { | |
79 push @taxa,$leaf->taxon; | |
80 } | |
81 print join(", ",@taxa)."\n"; | |
82 | |
83 @taxa=(); | |
84 $tree1->remove_branch($homo_sapiens); | |
85 foreach my $leaf ($tree1->get_leaves) { | |
86 push @taxa,$leaf->taxon; | |
87 } | |
88 print join(", ",@taxa)."\n"; | |
89 | |
90 =head1 FEEDBACK | |
91 | |
92 See AUTHOR | |
93 | |
94 =head1 AUTHOR - Dan Kortschak | |
95 | |
96 Email kortschak@rsbs.anu.edu.au | |
97 | |
98 =head1 CONTRIBUTORS | |
99 | |
100 Mainly Jason Stajich | |
101 | |
102 =head1 APPENDIX | |
103 | |
104 The rest of the documentation details each of the object methods. | |
105 Internal methods are usually preceded with a _ | |
106 | |
107 =cut | |
108 | |
109 | |
110 # Code begins... | |
111 | |
112 | |
113 package Bio::Taxonomy::Tree; | |
114 use vars qw(@ISA); | |
115 use strict; | |
116 | |
117 # Object preamble - inherits from Bio::Root::Root | |
118 | |
119 use Bio::Root::Root; | |
120 use Bio::Tree::TreeFunctionsI; | |
121 use Bio::Tree::TreeI; | |
122 use Bio::Taxonomy::Taxon; | |
123 | |
124 # Import rank information from Bio::Taxonomy.pm | |
125 use vars qw(@RANK %RANK); | |
126 | |
127 @ISA = qw(Bio::Root::Root Bio::Tree::TreeI Bio::Tree::TreeFunctionsI); | |
128 | |
129 =head2 new | |
130 | |
131 Title : new | |
132 Usage : my $obj = new Bio::Taxonomy::Tree(); | |
133 Function: Builds a new Bio::Taxonomy::Tree object | |
134 Returns : Bio::Taxonomy::Tree | |
135 Args : | |
136 | |
137 | |
138 =cut | |
139 | |
140 sub new { | |
141 my($class,@args) = @_; | |
142 | |
143 my $self = $class->SUPER::new(@args); | |
144 $self->{'_rootnode'} = undef; | |
145 $self->{'_maxbranchlen'} = 0; | |
146 | |
147 my ($root)= $self->_rearrange([qw(ROOT)], @args); | |
148 if( $root ) { $self->set_root_node($root); } | |
149 return $self; | |
150 } | |
151 | |
152 | |
153 =head2 get_nodes | |
154 | |
155 Title : get_nodes | |
156 Usage : my @nodes = $tree->get_nodes() | |
157 Function: Return list of Bio::Taxonomy::Taxon objects | |
158 Returns : array of Bio::Taxonomy::Taxon objects | |
159 Args : (named values) hash with one value | |
160 order => 'b|breadth' first order or 'd|depth' first order | |
161 | |
162 =cut | |
163 | |
164 sub get_nodes{ | |
165 my ($self, @args) = @_; | |
166 | |
167 my ($order, $sortby) = $self->_rearrange([qw(ORDER SORTBY)],@args); | |
168 $order ||= 'depth'; | |
169 $sortby ||= 'height'; | |
170 | |
171 if ($order =~ m/^b|(breadth)$/oi) { | |
172 my $node = $self->get_root_node; | |
173 my @children = ($node); | |
174 for (@children) { | |
175 push @children, $_->each_Descendent($sortby); | |
176 } | |
177 return @children; | |
178 } | |
179 | |
180 if ($order =~ m/^d|(depth)$/oi) { | |
181 # this is depth-first search I believe | |
182 my $node = $self->get_root_node; | |
183 my @children = ($node,$node->get_Descendents($sortby)); | |
184 return @children; | |
185 } | |
186 } | |
187 | |
188 =head2 get_root_node | |
189 | |
190 Title : get_root_node | |
191 Usage : my $node = $tree->get_root_node(); | |
192 Function: Get the Top Node in the tree, in this implementation | |
193 Trees only have one top node. | |
194 Returns : Bio::Taxonomy::Taxon object | |
195 Args : none | |
196 | |
197 =cut | |
198 | |
199 | |
200 sub get_root_node{ | |
201 my ($self) = @_; | |
202 return $self->{'_rootnode'}; | |
203 } | |
204 | |
205 =head2 set_root_node | |
206 | |
207 Title : set_root_node | |
208 Usage : $tree->set_root_node($node) | |
209 Function: Set the Root Node for the Tree | |
210 Returns : Bio::Taxonomy::Taxon | |
211 Args : Bio::Taxonomy::Taxon | |
212 | |
213 =cut | |
214 | |
215 | |
216 sub set_root_node{ | |
217 my ($self,$value) = @_; | |
218 if( defined $value ) { | |
219 if( ! $value->isa('Bio::Taxonomy::Taxon') ) { | |
220 $self->warn("Trying to set the root node to $value which is not a Bio::Taxonomy::Taxon"); | |
221 return $self->get_root_node; | |
222 } | |
223 $self->{'_rootnode'} = $value; | |
224 } | |
225 return $self->get_root_node; | |
226 } | |
227 | |
228 | |
229 =head2 get_leaves | |
230 | |
231 Title : get_leaves | |
232 Usage : my @nodes = $tree->get_leaves() | |
233 Function: Return list of Bio::Taxonomy::Taxon objects | |
234 Returns : array of Bio::Taxonomy::Taxon objects | |
235 Args : | |
236 | |
237 =cut | |
238 | |
239 | |
240 sub get_leaves{ | |
241 my ($self) = @_; | |
242 | |
243 my $node = $self->get_root_node; | |
244 my @leaves; | |
245 my @children = ($node); | |
246 for (@children) { | |
247 push @children, $_->each_Descendent(); | |
248 } | |
249 for (@children) { | |
250 push @leaves, $_ if $_->is_Leaf; | |
251 } | |
252 return @leaves; | |
253 } | |
254 | |
255 =head2 make_species_branch | |
256 | |
257 Title : make_species_branch | |
258 Usage : @nodes = $tree->make_species_branch($species,$taxonomy) | |
259 Function: Return list of Bio::Taxonomy::Taxon objects based on a Bio::Species object | |
260 Returns : array of Bio::Taxonomy::Taxon objects | |
261 Args : Bio::Species and Bio::Taxonomy objects | |
262 | |
263 =cut | |
264 | |
265 # I'm not happy that make_species_branch and make_branch are seperate routines | |
266 # should be able to just make_branch and have it sort things out | |
267 | |
268 sub make_species_branch{ | |
269 my ($self,$species,$taxonomy) = @_; | |
270 | |
271 if (! $species->isa('Bio::Species') ) { | |
272 $self->throw("Trying to classify $species which is not a Bio::Species object"); | |
273 } | |
274 if (! $taxonomy->isa('Bio::Taxonomy') ) { | |
275 $self->throw("Trying to classify with $taxonomy which is not a Bio::Taxonomy object"); | |
276 } | |
277 | |
278 # this is done to make sure we aren't duplicating a path (let God sort them out) | |
279 if (defined $self->get_root_node) { | |
280 $self->get_root_node->remove_all_Descendents; | |
281 } | |
282 | |
283 my @nodes; | |
284 | |
285 # nb taxa in [i][0] and ranks in [i][1] | |
286 my @taxa=$taxonomy->classify($species); | |
287 | |
288 for (my $i = 0; $i < @taxa; $i++) { | |
289 $nodes[$i]=Bio::Taxonomy::Taxon->new(-taxon => $taxa[$i][0], | |
290 -rank => $taxa[$i][1]); | |
291 } | |
292 | |
293 for (my $i = 0; $i < @taxa-1; $i++) { | |
294 $nodes[$i]->add_Descendent($nodes[$i+1]); | |
295 } | |
296 | |
297 $self->set_root_node($nodes[0]); | |
298 | |
299 return @nodes; | |
300 } | |
301 | |
302 | |
303 =head2 make_branch | |
304 | |
305 Title : make_branch | |
306 Usage : $tree->make_branch($node) | |
307 Function: Make a linear Bio::Taxonomy::Tree object from a leafish node | |
308 Returns : | |
309 Args : Bio::Taxonomy::Taxon object | |
310 | |
311 =cut | |
312 | |
313 | |
314 sub make_branch{ | |
315 my ($self,$node) = @_; | |
316 | |
317 # this is done to make sure we aren't duplicating a path (let God sort them out) | |
318 # note that if you are using a linked set of node which include node | |
319 # already in the tree, this will break | |
320 $self->get_root_node->remove_all_Descendents; | |
321 | |
322 while (defined $node->ancestor) { | |
323 $self->set_root_node($node); | |
324 $node=$node->ancestor; | |
325 } | |
326 } | |
327 | |
328 | |
329 =head2 splice | |
330 | |
331 Title : splice | |
332 Usage : @nodes = $tree->splice($tree) | |
333 Function: Return a of Bio::Taxonomy::Tree object that is a fusion of two | |
334 Returns : array of Bio::Taxonomy::Taxon added to tree | |
335 Args : Bio::Taxonomy::Tree object | |
336 | |
337 =cut | |
338 | |
339 | |
340 sub splice{ | |
341 my ($self,$tree) = @_; | |
342 | |
343 my @nodes; | |
344 | |
345 my @newleaves = $tree->get_leaves; | |
346 foreach my $leaf (@newleaves) { | |
347 push @nodes,$self->add_branch($leaf); | |
348 } | |
349 | |
350 return @nodes; | |
351 } | |
352 | |
353 =head2 add_species | |
354 | |
355 Title : add_species | |
356 Usage : @nodes = $tree->add_species($species,$taxonomy) | |
357 Function: Return a of Bio::Taxonomy::Tree object with a new species added | |
358 Returns : array of Bio::Taxonomy::Taxon added to tree | |
359 Args : Bio::Species object | |
360 | |
361 =cut | |
362 | |
363 | |
364 sub add_species{ | |
365 my ($self,$species,$taxonomy) = @_; | |
366 | |
367 my $branch=Bio::Taxonomy::Tree->new; | |
368 my @nodes=$branch->make_species_branch($species,$taxonomy); | |
369 | |
370 my ($newleaf)=$branch->get_leaves; | |
371 | |
372 return $self->add_branch($newleaf); | |
373 } | |
374 | |
375 =head2 add_branch | |
376 | |
377 Title : add_branch | |
378 Usage : $tree->add_branch($node,boolean) | |
379 Function: Return a of Bio::Taxonomy::Tree object with a new branch added | |
380 Returns : array of Bio::Taxonomy::Taxon objects of the resulting tree | |
381 Args : Bio::Taxonomy::Taxon object | |
382 boolean flag to force overwrite of descendent | |
383 (see Bio::Node->add_Descendent) | |
384 | |
385 =cut | |
386 | |
387 | |
388 sub add_branch { | |
389 my ($self,$node,$force) = @_; | |
390 | |
391 my $best_node_level=0; | |
392 my ($best_node,@nodes,$common); | |
393 | |
394 my @leaves=$self->get_leaves; | |
395 foreach my $leaf (@leaves) { | |
396 $common=$node->recent_common_ancestor($leaf); # the root of the part to add | |
397 if (defined $common && ($common->distance_to_root > $best_node_level)) { | |
398 $best_node_level = $common->distance_to_root; | |
399 $best_node = $common; | |
400 } | |
401 } | |
402 | |
403 return unless defined $best_node; | |
404 | |
405 push @nodes,($self->get_root_node,$self->get_root_node->get_Descendents); | |
406 foreach my $node (@nodes) { | |
407 if ((defined $best_node->id && $best_node->id == $node->id) || | |
408 ($best_node->rank eq $node->rank && $best_node->taxon eq $node->taxon) && | |
409 ($best_node->rank ne 'no rank')) { | |
410 foreach my $descendent ($common->each_Descendent) { | |
411 $node->add_Descendent($descendent,$force); | |
412 } | |
413 } | |
414 | |
415 $self->set_root_node($node) if $node->distance_to_root==0; | |
416 } | |
417 | |
418 return ($common->get_Descendents); | |
419 } | |
420 | |
421 =head2 remove_branch | |
422 | |
423 Title : remove_branch | |
424 Usage : $tree->remove_branch($node) | |
425 Function: remove a branch up to the next multifurcation | |
426 Returns : | |
427 Args : Bio::Taxonomy::Taxon object | |
428 | |
429 =cut | |
430 | |
431 | |
432 sub remove_branch{ | |
433 my ($self,$node) = @_; | |
434 | |
435 # we can define a branch at any point along it | |
436 | |
437 while (defined $node->ancestor) { | |
438 last if $node->ancestor->each_Descendent > 1; | |
439 $node=$node->ancestor; | |
440 } | |
441 $node->remove_all_Descendents; # I'm not sure if this is necessary, | |
442 # but I don't see that remove_Descendent | |
443 # has the side effect of deleting | |
444 # descendent nodes of the deletee | |
445 $node->ancestor->remove_Descendent($node); | |
446 } | |
447 | |
448 | |
449 | |
450 1; |