Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Taxonomy/Tree.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 # $Id: Tree.pm,v 1.1 2002/11/18 22:08:33 kortsch Exp $ | |
| 2 # | |
| 3 # BioPerl module for Bio::Taxonomy::Tree | |
| 4 # | |
| 5 # Cared for by Dan Kortschak but pilfered extensively from Bio::Tree::Tree by Jason Stajich | |
| 6 # | |
| 7 # You may distribute this module under the same terms as perl itself | |
| 8 | |
| 9 # POD documentation - main docs before the code | |
| 10 | |
| 11 =head1 NAME | |
| 12 | |
| 13 Bio::Taxonomy::Tree - An Organism Level Implementation of TreeI interface. | |
| 14 | |
| 15 =head1 SYNOPSIS | |
| 16 | |
| 17 # like from a TreeIO | |
| 18 my $treeio = new Bio::TreeIO(-format => 'newick', -file => 'treefile.dnd'); | |
| 19 my $tree = $treeio->next_tree; | |
| 20 my @nodes = $tree->get_nodes; | |
| 21 my $root = $tree->get_root_node; | |
| 22 my @leaves = $tree->get_leaves; | |
| 23 | |
| 24 | |
| 25 =head1 DESCRIPTION | |
| 26 | |
| 27 This object holds handles to Taxonomic Nodes which make up a tree. | |
| 28 | |
| 29 =head1 EXAMPLES | |
| 30 | |
| 31 use Bio::Species; | |
| 32 use Bio::Taxonomy::Tree; | |
| 33 | |
| 34 my $human=new Bio::Species; | |
| 35 my $chimp=new Bio::Species; | |
| 36 my $bonobo=new Bio::Species; | |
| 37 | |
| 38 $human->classification(qw( sapiens Homo Hominidae | |
| 39 Catarrhini Primates Eutheria | |
| 40 Mammalia Euteleostomi Vertebrata | |
| 41 Craniata Chordata | |
| 42 Metazoa Eukaryota )); | |
| 43 $chimp->classification(qw( troglodytes Pan Hominidae | |
| 44 Catarrhini Primates Eutheria | |
| 45 Mammalia Euteleostomi Vertebrata | |
| 46 Craniata Chordata | |
| 47 Metazoa Eukaryota )); | |
| 48 $bonobo->classification(qw( paniscus Pan Hominidae | |
| 49 Catarrhini Primates Eutheria | |
| 50 Mammalia Euteleostomi Vertebrata | |
| 51 Craniata Chordata | |
| 52 Metazoa Eukaryota )); | |
| 53 | |
| 54 # ranks passed to $taxonomy match ranks of species | |
| 55 my @ranks = ('superkingdom','kingdom','phylum','subphylum', | |
| 56 'no rank 1','no rank 2','class','no rank 3','order', | |
| 57 'suborder','family','genus','species'); | |
| 58 | |
| 59 my $taxonomy=new Bio::Taxonomy(-ranks => \@ranks, | |
| 60 -method => 'trust', | |
| 61 -order => -1); | |
| 62 | |
| 63 my @nodes; | |
| 64 | |
| 65 my $tree1=new Bio::Taxonomy::Tree; | |
| 66 my $tree2=new Bio::Taxonomy::Tree; | |
| 67 | |
| 68 push @nodes,$tree1->make_species_branch($human,$taxonomy); | |
| 69 push @nodes,$tree2->make_species_branch($chimp,$taxonomy); | |
| 70 | |
| 71 my ($homo_sapiens)=$tree1->get_leaves; | |
| 72 | |
| 73 $tree1->splice($tree2); | |
| 74 | |
| 75 push @nodes,$tree1->add_species($bonobo,$taxonomy); | |
| 76 | |
| 77 my @taxa; | |
| 78 foreach my $leaf ($tree1->get_leaves) { | |
| 79 push @taxa,$leaf->taxon; | |
| 80 } | |
| 81 print join(", ",@taxa)."\n"; | |
| 82 | |
| 83 @taxa=(); | |
| 84 $tree1->remove_branch($homo_sapiens); | |
| 85 foreach my $leaf ($tree1->get_leaves) { | |
| 86 push @taxa,$leaf->taxon; | |
| 87 } | |
| 88 print join(", ",@taxa)."\n"; | |
| 89 | |
| 90 =head1 FEEDBACK | |
| 91 | |
| 92 See AUTHOR | |
| 93 | |
| 94 =head1 AUTHOR - Dan Kortschak | |
| 95 | |
| 96 Email kortschak@rsbs.anu.edu.au | |
| 97 | |
| 98 =head1 CONTRIBUTORS | |
| 99 | |
| 100 Mainly Jason Stajich | |
| 101 | |
| 102 =head1 APPENDIX | |
| 103 | |
| 104 The rest of the documentation details each of the object methods. | |
| 105 Internal methods are usually preceded with a _ | |
| 106 | |
| 107 =cut | |
| 108 | |
| 109 | |
| 110 # Code begins... | |
| 111 | |
| 112 | |
| 113 package Bio::Taxonomy::Tree; | |
| 114 use vars qw(@ISA); | |
| 115 use strict; | |
| 116 | |
| 117 # Object preamble - inherits from Bio::Root::Root | |
| 118 | |
| 119 use Bio::Root::Root; | |
| 120 use Bio::Tree::TreeFunctionsI; | |
| 121 use Bio::Tree::TreeI; | |
| 122 use Bio::Taxonomy::Taxon; | |
| 123 | |
| 124 # Import rank information from Bio::Taxonomy.pm | |
| 125 use vars qw(@RANK %RANK); | |
| 126 | |
| 127 @ISA = qw(Bio::Root::Root Bio::Tree::TreeI Bio::Tree::TreeFunctionsI); | |
| 128 | |
| 129 =head2 new | |
| 130 | |
| 131 Title : new | |
| 132 Usage : my $obj = new Bio::Taxonomy::Tree(); | |
| 133 Function: Builds a new Bio::Taxonomy::Tree object | |
| 134 Returns : Bio::Taxonomy::Tree | |
| 135 Args : | |
| 136 | |
| 137 | |
| 138 =cut | |
| 139 | |
| 140 sub new { | |
| 141 my($class,@args) = @_; | |
| 142 | |
| 143 my $self = $class->SUPER::new(@args); | |
| 144 $self->{'_rootnode'} = undef; | |
| 145 $self->{'_maxbranchlen'} = 0; | |
| 146 | |
| 147 my ($root)= $self->_rearrange([qw(ROOT)], @args); | |
| 148 if( $root ) { $self->set_root_node($root); } | |
| 149 return $self; | |
| 150 } | |
| 151 | |
| 152 | |
| 153 =head2 get_nodes | |
| 154 | |
| 155 Title : get_nodes | |
| 156 Usage : my @nodes = $tree->get_nodes() | |
| 157 Function: Return list of Bio::Taxonomy::Taxon objects | |
| 158 Returns : array of Bio::Taxonomy::Taxon objects | |
| 159 Args : (named values) hash with one value | |
| 160 order => 'b|breadth' first order or 'd|depth' first order | |
| 161 | |
| 162 =cut | |
| 163 | |
| 164 sub get_nodes{ | |
| 165 my ($self, @args) = @_; | |
| 166 | |
| 167 my ($order, $sortby) = $self->_rearrange([qw(ORDER SORTBY)],@args); | |
| 168 $order ||= 'depth'; | |
| 169 $sortby ||= 'height'; | |
| 170 | |
| 171 if ($order =~ m/^b|(breadth)$/oi) { | |
| 172 my $node = $self->get_root_node; | |
| 173 my @children = ($node); | |
| 174 for (@children) { | |
| 175 push @children, $_->each_Descendent($sortby); | |
| 176 } | |
| 177 return @children; | |
| 178 } | |
| 179 | |
| 180 if ($order =~ m/^d|(depth)$/oi) { | |
| 181 # this is depth-first search I believe | |
| 182 my $node = $self->get_root_node; | |
| 183 my @children = ($node,$node->get_Descendents($sortby)); | |
| 184 return @children; | |
| 185 } | |
| 186 } | |
| 187 | |
| 188 =head2 get_root_node | |
| 189 | |
| 190 Title : get_root_node | |
| 191 Usage : my $node = $tree->get_root_node(); | |
| 192 Function: Get the Top Node in the tree, in this implementation | |
| 193 Trees only have one top node. | |
| 194 Returns : Bio::Taxonomy::Taxon object | |
| 195 Args : none | |
| 196 | |
| 197 =cut | |
| 198 | |
| 199 | |
| 200 sub get_root_node{ | |
| 201 my ($self) = @_; | |
| 202 return $self->{'_rootnode'}; | |
| 203 } | |
| 204 | |
| 205 =head2 set_root_node | |
| 206 | |
| 207 Title : set_root_node | |
| 208 Usage : $tree->set_root_node($node) | |
| 209 Function: Set the Root Node for the Tree | |
| 210 Returns : Bio::Taxonomy::Taxon | |
| 211 Args : Bio::Taxonomy::Taxon | |
| 212 | |
| 213 =cut | |
| 214 | |
| 215 | |
| 216 sub set_root_node{ | |
| 217 my ($self,$value) = @_; | |
| 218 if( defined $value ) { | |
| 219 if( ! $value->isa('Bio::Taxonomy::Taxon') ) { | |
| 220 $self->warn("Trying to set the root node to $value which is not a Bio::Taxonomy::Taxon"); | |
| 221 return $self->get_root_node; | |
| 222 } | |
| 223 $self->{'_rootnode'} = $value; | |
| 224 } | |
| 225 return $self->get_root_node; | |
| 226 } | |
| 227 | |
| 228 | |
| 229 =head2 get_leaves | |
| 230 | |
| 231 Title : get_leaves | |
| 232 Usage : my @nodes = $tree->get_leaves() | |
| 233 Function: Return list of Bio::Taxonomy::Taxon objects | |
| 234 Returns : array of Bio::Taxonomy::Taxon objects | |
| 235 Args : | |
| 236 | |
| 237 =cut | |
| 238 | |
| 239 | |
| 240 sub get_leaves{ | |
| 241 my ($self) = @_; | |
| 242 | |
| 243 my $node = $self->get_root_node; | |
| 244 my @leaves; | |
| 245 my @children = ($node); | |
| 246 for (@children) { | |
| 247 push @children, $_->each_Descendent(); | |
| 248 } | |
| 249 for (@children) { | |
| 250 push @leaves, $_ if $_->is_Leaf; | |
| 251 } | |
| 252 return @leaves; | |
| 253 } | |
| 254 | |
| 255 =head2 make_species_branch | |
| 256 | |
| 257 Title : make_species_branch | |
| 258 Usage : @nodes = $tree->make_species_branch($species,$taxonomy) | |
| 259 Function: Return list of Bio::Taxonomy::Taxon objects based on a Bio::Species object | |
| 260 Returns : array of Bio::Taxonomy::Taxon objects | |
| 261 Args : Bio::Species and Bio::Taxonomy objects | |
| 262 | |
| 263 =cut | |
| 264 | |
| 265 # I'm not happy that make_species_branch and make_branch are seperate routines | |
| 266 # should be able to just make_branch and have it sort things out | |
| 267 | |
| 268 sub make_species_branch{ | |
| 269 my ($self,$species,$taxonomy) = @_; | |
| 270 | |
| 271 if (! $species->isa('Bio::Species') ) { | |
| 272 $self->throw("Trying to classify $species which is not a Bio::Species object"); | |
| 273 } | |
| 274 if (! $taxonomy->isa('Bio::Taxonomy') ) { | |
| 275 $self->throw("Trying to classify with $taxonomy which is not a Bio::Taxonomy object"); | |
| 276 } | |
| 277 | |
| 278 # this is done to make sure we aren't duplicating a path (let God sort them out) | |
| 279 if (defined $self->get_root_node) { | |
| 280 $self->get_root_node->remove_all_Descendents; | |
| 281 } | |
| 282 | |
| 283 my @nodes; | |
| 284 | |
| 285 # nb taxa in [i][0] and ranks in [i][1] | |
| 286 my @taxa=$taxonomy->classify($species); | |
| 287 | |
| 288 for (my $i = 0; $i < @taxa; $i++) { | |
| 289 $nodes[$i]=Bio::Taxonomy::Taxon->new(-taxon => $taxa[$i][0], | |
| 290 -rank => $taxa[$i][1]); | |
| 291 } | |
| 292 | |
| 293 for (my $i = 0; $i < @taxa-1; $i++) { | |
| 294 $nodes[$i]->add_Descendent($nodes[$i+1]); | |
| 295 } | |
| 296 | |
| 297 $self->set_root_node($nodes[0]); | |
| 298 | |
| 299 return @nodes; | |
| 300 } | |
| 301 | |
| 302 | |
| 303 =head2 make_branch | |
| 304 | |
| 305 Title : make_branch | |
| 306 Usage : $tree->make_branch($node) | |
| 307 Function: Make a linear Bio::Taxonomy::Tree object from a leafish node | |
| 308 Returns : | |
| 309 Args : Bio::Taxonomy::Taxon object | |
| 310 | |
| 311 =cut | |
| 312 | |
| 313 | |
| 314 sub make_branch{ | |
| 315 my ($self,$node) = @_; | |
| 316 | |
| 317 # this is done to make sure we aren't duplicating a path (let God sort them out) | |
| 318 # note that if you are using a linked set of node which include node | |
| 319 # already in the tree, this will break | |
| 320 $self->get_root_node->remove_all_Descendents; | |
| 321 | |
| 322 while (defined $node->ancestor) { | |
| 323 $self->set_root_node($node); | |
| 324 $node=$node->ancestor; | |
| 325 } | |
| 326 } | |
| 327 | |
| 328 | |
| 329 =head2 splice | |
| 330 | |
| 331 Title : splice | |
| 332 Usage : @nodes = $tree->splice($tree) | |
| 333 Function: Return a of Bio::Taxonomy::Tree object that is a fusion of two | |
| 334 Returns : array of Bio::Taxonomy::Taxon added to tree | |
| 335 Args : Bio::Taxonomy::Tree object | |
| 336 | |
| 337 =cut | |
| 338 | |
| 339 | |
| 340 sub splice{ | |
| 341 my ($self,$tree) = @_; | |
| 342 | |
| 343 my @nodes; | |
| 344 | |
| 345 my @newleaves = $tree->get_leaves; | |
| 346 foreach my $leaf (@newleaves) { | |
| 347 push @nodes,$self->add_branch($leaf); | |
| 348 } | |
| 349 | |
| 350 return @nodes; | |
| 351 } | |
| 352 | |
| 353 =head2 add_species | |
| 354 | |
| 355 Title : add_species | |
| 356 Usage : @nodes = $tree->add_species($species,$taxonomy) | |
| 357 Function: Return a of Bio::Taxonomy::Tree object with a new species added | |
| 358 Returns : array of Bio::Taxonomy::Taxon added to tree | |
| 359 Args : Bio::Species object | |
| 360 | |
| 361 =cut | |
| 362 | |
| 363 | |
| 364 sub add_species{ | |
| 365 my ($self,$species,$taxonomy) = @_; | |
| 366 | |
| 367 my $branch=Bio::Taxonomy::Tree->new; | |
| 368 my @nodes=$branch->make_species_branch($species,$taxonomy); | |
| 369 | |
| 370 my ($newleaf)=$branch->get_leaves; | |
| 371 | |
| 372 return $self->add_branch($newleaf); | |
| 373 } | |
| 374 | |
| 375 =head2 add_branch | |
| 376 | |
| 377 Title : add_branch | |
| 378 Usage : $tree->add_branch($node,boolean) | |
| 379 Function: Return a of Bio::Taxonomy::Tree object with a new branch added | |
| 380 Returns : array of Bio::Taxonomy::Taxon objects of the resulting tree | |
| 381 Args : Bio::Taxonomy::Taxon object | |
| 382 boolean flag to force overwrite of descendent | |
| 383 (see Bio::Node->add_Descendent) | |
| 384 | |
| 385 =cut | |
| 386 | |
| 387 | |
| 388 sub add_branch { | |
| 389 my ($self,$node,$force) = @_; | |
| 390 | |
| 391 my $best_node_level=0; | |
| 392 my ($best_node,@nodes,$common); | |
| 393 | |
| 394 my @leaves=$self->get_leaves; | |
| 395 foreach my $leaf (@leaves) { | |
| 396 $common=$node->recent_common_ancestor($leaf); # the root of the part to add | |
| 397 if (defined $common && ($common->distance_to_root > $best_node_level)) { | |
| 398 $best_node_level = $common->distance_to_root; | |
| 399 $best_node = $common; | |
| 400 } | |
| 401 } | |
| 402 | |
| 403 return unless defined $best_node; | |
| 404 | |
| 405 push @nodes,($self->get_root_node,$self->get_root_node->get_Descendents); | |
| 406 foreach my $node (@nodes) { | |
| 407 if ((defined $best_node->id && $best_node->id == $node->id) || | |
| 408 ($best_node->rank eq $node->rank && $best_node->taxon eq $node->taxon) && | |
| 409 ($best_node->rank ne 'no rank')) { | |
| 410 foreach my $descendent ($common->each_Descendent) { | |
| 411 $node->add_Descendent($descendent,$force); | |
| 412 } | |
| 413 } | |
| 414 | |
| 415 $self->set_root_node($node) if $node->distance_to_root==0; | |
| 416 } | |
| 417 | |
| 418 return ($common->get_Descendents); | |
| 419 } | |
| 420 | |
| 421 =head2 remove_branch | |
| 422 | |
| 423 Title : remove_branch | |
| 424 Usage : $tree->remove_branch($node) | |
| 425 Function: remove a branch up to the next multifurcation | |
| 426 Returns : | |
| 427 Args : Bio::Taxonomy::Taxon object | |
| 428 | |
| 429 =cut | |
| 430 | |
| 431 | |
| 432 sub remove_branch{ | |
| 433 my ($self,$node) = @_; | |
| 434 | |
| 435 # we can define a branch at any point along it | |
| 436 | |
| 437 while (defined $node->ancestor) { | |
| 438 last if $node->ancestor->each_Descendent > 1; | |
| 439 $node=$node->ancestor; | |
| 440 } | |
| 441 $node->remove_all_Descendents; # I'm not sure if this is necessary, | |
| 442 # but I don't see that remove_Descendent | |
| 443 # has the side effect of deleting | |
| 444 # descendent nodes of the deletee | |
| 445 $node->ancestor->remove_Descendent($node); | |
| 446 } | |
| 447 | |
| 448 | |
| 449 | |
| 450 1; |
