Mercurial > repos > willmclaren > ensembl_vep
diff variant_effect_predictor/Bio/EnsEMBL/Compara/GeneTree.pm @ 0:21066c0abaf5 draft
Uploaded
author | willmclaren |
---|---|
date | Fri, 03 Aug 2012 10:04:48 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/EnsEMBL/Compara/GeneTree.pm Fri Aug 03 10:04:48 2012 -0400 @@ -0,0 +1,435 @@ +=head1 LICENSE + + Copyright (c) 1999-2012 The European Bioinformatics Institute and + Genome Research Limited. All rights reserved. + + This software is distributed under a modified Apache license. + For license details, please see + + http://www.ensembl.org/info/about/code_licence.html + +=head1 CONTACT + + Please email comments or questions to the public Ensembl + developers list at <dev@ensembl.org>. + + Questions may also be sent to the Ensembl help desk at + <helpdesk@ensembl.org>. + +=head1 NAME + +Bio::EnsEMBL::Compara::GeneTree + +=head1 DESCRIPTION + +Class to represent a gene tree object. Contains a link to +the root of the tree, as long as general tree properties. +It implements the AlignedMemberSet interface (via the leaves). + +=head1 INHERITANCE TREE + + Bio::EnsEMBL::Compara::GeneTree + +- Bio::EnsEMBL::Compara::AlignedMemberSet + `- Bio::EnsEMBL::Compara::Taggable + +=head1 AUTHORSHIP + +Ensembl Team. Individual contributions can be found in the CVS log. + +=head1 MAINTAINER + +$Author: mm14 $ + +=head VERSION + +$Revision: 1.23 $ + +=head1 APPENDIX + +The rest of the documentation details each of the object methods. +Internal methods are usually preceded with an underscore (_) + +=cut + +package Bio::EnsEMBL::Compara::GeneTree; + +use Bio::EnsEMBL::Utils::Argument; +use Bio::EnsEMBL::Utils::Scalar qw(:assert); + +use Bio::EnsEMBL::Compara::GeneTreeNode; +use Bio::EnsEMBL::Compara::GeneTreeMember; + +use strict; +no strict 'refs'; + +use base ('Bio::EnsEMBL::Compara::AlignedMemberSet', 'Bio::EnsEMBL::Compara::Taggable'); + + +############################## +# Constructors / Destructors # +############################## + +=head2 new + + Arg [1] : + Example : + Description: + Returntype : Bio::EnsEMBL::Compara::GeneTree + Exceptions : + Caller : + +=cut + +sub new { + my($class,@args) = @_; + + my $self = $class->SUPER::new(@args); + + if (scalar @args) { + my ($root_id, $member_type, $tree_type, $clusterset_id) = rearrange([qw(ROOT_ID MEMBER_TYPE TREE_TYPE CLUSTERSET_ID)], @args); + + $self->{'_root_id'} = $root_id if defined $root_id; + $member_type && $self->member_type($member_type); + $tree_type && $self->tree_type($tree_type); + $clusterset_id && $self->clusterset_id($clusterset_id); + } + + return $self; +} + + +=head2 deep_copy + + Description: Returns a copy of $self (as an AlignedMemberSet). All the + members are themselves copied, but the tree topology is lost. + Returntype : Bio::EnsEMBL::Compara::GeneTree + Caller : General + +=cut + +sub deep_copy { + my $self = shift; + my $copy = $self->SUPER::deep_copy(); + foreach my $attr (qw(tree_type member_type clusterset_id)) { + $copy->$attr($self->$attr); + } + return $copy; +} + + +=head2 DESTROY + + Description : Deletes the reference to the root node and breaks + the circular reference. + Returntype : None + Caller : System + +=cut + +sub DESTROY { + my $self = shift; + delete $self->{'_root'}; +} + + +##################### +# Object attributes # +##################### + +=head2 tree_type + + Description : Getter/Setter for the tree_type field. This field can + currently be 'tree', 'supertree' or 'clusterset' + Returntype : String + Example : my $type = $tree->tree_type(); + Caller : General + +=cut + +sub tree_type { + my $self = shift; + $self->{'_tree_type'} = shift if(@_); + return $self->{'_tree_type'}; +} + + +=head2 member_type + + Description : Getter/Setter for the member_type field. This field can + currently be 'ncrna' or 'protein' + Returntype : String + Example : my $type = $tree->member_type(); + Caller : General + +=cut + +sub member_type { + my $self = shift; + $self->{'_member_type'} = shift if(@_); + return $self->{'_member_type'}; +} + + +=head2 clusterset_id + + Description : Getter/Setter for the clusterset_id field. This field can + be any string. Each dataset should contain a set of trees + with the "default" clusterset_id. Other clusterset_id are + used to store linked / additionnal data. + Returntype : String + Example : my $clusterset_id = $tree->clusterset_id(); + Caller : General + +=cut + +sub clusterset_id { + my $self = shift; + $self->{'_clusterset_id'} = shift if(@_); + return $self->{'_clusterset_id'}; +} + + +=head2 root_id + + Description : Getter for the root_id of the root node of the tree. + Returntype : Integer + Example : my $root_node_id = $tree->root_id(); + Caller : General + +=cut + +sub root_id { + my $self = shift; + return $self->{'_root_id'}; +} + + +################ +# Tree loading # +################ + +=head2 root + + Description : Getter for the root node of the tree. This returns an + object fetch from the database if root_id is defined. + Otherwise, it will create a new GeneTreeNode object. + Returntype : Bio::EnsEMBL::Compara::GeneTreeNode + Example : my $root_node = $tree->root(); + Caller : General + +=cut + +sub root { + my $self = shift; + + if (not defined $self->{'_root'}) { + if (defined $self->{'_root_id'} and defined $self->adaptor) { + # Loads all the nodes in one go + my $gtn_adaptor = $self->adaptor->db->get_GeneTreeNodeAdaptor; + $gtn_adaptor->{'_ref_tree'} = $self; + $self->{'_root'} = $gtn_adaptor->fetch_node_by_node_id($self->{'_root_id'}); + delete $gtn_adaptor->{'_ref_tree'}; + + } else { + # Creates a new GeneTreeNode object + $self->{'_root'} = new Bio::EnsEMBL::Compara::GeneTreeNode; + $self->{'_root'}->tree($self); + } + } + return $self->{'_root'}; +} + + +=head2 preload + + Description : Method to load all the tree data in one go. This currently + includes if not loaded yet, and all the gene Members + associated with the leaves. + In the future, it will include all the tags + Returntype : node + Example : $tree->preload(); + Caller : General + +=cut + +sub preload { + my $self = shift; + return unless defined $self->adaptor; + + if (not defined $self->{'_root'} and defined $self->{'_root_id'}) { + my $gtn_adaptor = $self->adaptor->db->get_GeneTreeNodeAdaptor; + $gtn_adaptor->{'_ref_tree'} = $self; + $self->{'_root'} = $gtn_adaptor->fetch_tree_by_root_id($self->{'_root_id'}); + delete $gtn_adaptor->{'_ref_tree'}; + } + + # Loads all the gene members in one go + my %leaves; + foreach my $pm (@{$self->root->get_all_leaves}) { + $leaves{$pm->gene_member_id} = $pm if UNIVERSAL::isa($pm, 'Bio::EnsEMBL::Compara::GeneTreeMember'); + } + my @m_ids = keys(%leaves); + my $all_gm = $self->adaptor->db->get_MemberAdaptor->fetch_all_by_dbID_list(\@m_ids); + foreach my $gm (@$all_gm) { + $leaves{$gm->dbID}->gene_member($gm); + } +} + + +=head2 attach_alignment + + Arg [1] : String: clusterset_id + Description : Method to fetch the alternative tree with the given + clusterset_id and attach its multiple alignment to + the current tree. The alternative tree is returned. + Returntype : GeneTree + Example : $supertree->attach_alignment('super-align'); + Caller : General + +=cut + +sub attach_alignment { + my $self = shift; + my $other_clusterset_id = shift; + return unless defined $self->adaptor; + + # Gets the other tree + my $others = $self->adaptor->fetch_all_linked_trees($self); + my @good_others = grep {$_->clusterset_id eq $other_clusterset_id} @$others; + die "'$other_clusterset_id' tree not found\n" unless scalar(@good_others); + + # Gets the alignment + my %cigars; + my $gtn_adaptor = $self->adaptor->db->get_GeneTreeNodeAdaptor; + foreach my $leaf (@{$gtn_adaptor->fetch_all_AlignedMember_by_root_id($good_others[0]->root_id)}) { + $cigars{$leaf->member_id} = $leaf->cigar_line; + } + + # Assigns it + foreach my $leaf (@{$self->root->get_all_leaves}) { + $leaf->cigar_line($cigars{$leaf->member_id}); + } + + return $good_others[0]; +} + + +=head2 expand_subtrees + + Description : Method to fetch the subtrees of the current tree + and attach them to the tips of the current tree + Returntype : none + Example : $supertree->expand_subtrees(); + Caller : General + +=cut + +sub expand_subtrees { + my $self = shift; + return unless defined $self->adaptor; + + # Gets the subtrees + my %subtrees; + foreach my $subtree (@{$self->adaptor->fetch_subtrees($self)}) { + $subtree->preload; + $subtrees{$subtree->root->_parent_id} = $subtree->root; + } + + # Attaches them + $self->preload; + foreach my $leaf (@{$self->root->get_all_leaves}) { + next unless exists $subtrees{$leaf->node_id}; + $leaf->parent->add_child($subtrees{$leaf->node_id}); + $leaf->disavow_parent; + } +} + + +############################## +# AlignedMemberSet interface # +############################## + +=head2 member_class + + Description: Returns the type of member used in the set + Returntype : String: Bio::EnsEMBL::Compara::GeneTreeMember + Caller : Bio::EnsEMBL::Compara::MemberSet + +=cut + +sub member_class { + return 'Bio::EnsEMBL::Compara::GeneTreeMember'; +} + + +=head2 get_all_Members + + Example : + Description: Returns the list of all the GeneTreeMember of the tree + Returntype : array reference of Bio::EnsEMBL::Compara::GeneTreeMember + Caller : General + +=cut + +sub get_all_Members { + my ($self) = @_; + + unless (defined $self->{'_member_array'}) { + + $self->{'_member_array'} = []; + $self->{'_members_by_source'} = {}; + $self->{'_members_by_source_taxon'} = {}; + $self->{'_members_by_source_genome_db'} = {}; + $self->{'_members_by_genome_db'} = {}; + foreach my $leaf (@{$self->root->get_all_leaves}) { + $self->SUPER::add_Member($leaf) if UNIVERSAL::isa($leaf, 'Bio::EnsEMBL::Compara::GeneTreeMember'); + } + } + return $self->{'_member_array'}; +} + + +=head2 add_Member + + Arg [1] : GeneTreeMember + Example : + Description: Add a new GeneTreeMember to this set and to the tree as + a child of the root node + Returntype : none + Exceptions : Throws if input objects don't check + Caller : General + +=cut + +sub add_Member { + my ($self, $member) = @_; + assert_ref($member, 'Bio::EnsEMBL::Compara::GeneTreeMember'); + $self->root->add_child($member); + $member->tree($self); + $self->SUPER::add_Member($member); +} + + +######## +# Misc # +######## + +# Dynamic definition of functions to allow NestedSet methods work with GeneTrees +foreach my $func_name (qw(get_all_nodes get_all_leaves get_all_sorted_leaves + find_leaf_by_node_id find_leaf_by_name find_node_by_node_id + find_node_by_name remove_nodes build_leftright_indexing flatten_tree + newick_format nhx_format string_tree print_tree + release_tree + )) { + my $full_name = "Bio::EnsEMBL::Compara::GeneTree::$func_name"; + *$full_name = sub { + my $self = shift; + my $ret = $self->root->$func_name(@_); + return $ret; + }; +# print STDERR "REDEFINE $func_name\n"; +} + + +1; +