Mercurial > repos > willmclaren > ensembl_vep
view variant_effect_predictor/Bio/EnsEMBL/Compara/NCBITaxon.pm @ 1:d6778b5d8382 draft default tip
Deleted selected files
author | willmclaren |
---|---|
date | Fri, 03 Aug 2012 10:05:43 -0400 |
parents | 21066c0abaf5 |
children |
line wrap: on
line source
=head1 NAME NCBITaxon - DESCRIPTION of Object =head1 DESCRIPTION An object that hold a node within a taxonomic tree. Inherits from NestedSet. From Bio::Species classification common_name binomial Here are also the additional methods in Bio::Species that "might" be useful, but let us forget about these for now. genus species sub_species variant organelle division =head1 CONTACT Contact Jessica Severin on implemetation/design detail: jessica@ebi.ac.uk Contact Ewan Birney on EnsEMBL in general: birney@sanger.ac.uk =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ =cut package Bio::EnsEMBL::Compara::NCBITaxon; use strict; use Bio::Species; use Bio::EnsEMBL::Compara::NestedSet; use Bio::EnsEMBL::Utils::Exception; use Bio::EnsEMBL::Utils::Argument; our @ISA = qw(Bio::EnsEMBL::Compara::NestedSet); =head2 copy Arg [1] : int $member_id (optional) Example : Description: returns copy of object, calling superclass copy method Returntype : Exceptions : Caller : =cut sub copy { my $self = shift; my $mycopy = $self->SUPER::copy; bless $mycopy, "Bio::EnsEMBL::Compara::NCBITaxon"; $mycopy->ncbi_taxid($self->ncbi_taxid); $mycopy->rank($self->rank); $mycopy->genbank_hidden_flag($self->genbank_hidden_flag); return $mycopy; } sub ncbi_taxid { my $self = shift; my $value = shift; $self->node_id($value) if($value); return $self->node_id; } sub taxon_id { my $self = shift; my $value = shift; $self->node_id($value) if($value); return $self->node_id; } sub dbID { my $self = shift; my $value = shift; $self->node_id($value) if($value); return $self->node_id; } sub rank { my $self = shift; $self->{'_rank'} = shift if(@_); return $self->{'_rank'}; } sub genbank_hidden_flag { my $self = shift; $self->{'_genbank_hidden_flag'} = shift if(@_); return $self->{'_genbank_hidden_flag'}; } =head2 classification Arg[SEPARATOR] : String (optional); used to separate the classification by when returning as a string. If not specified then a single space will be used. Arg[FULL] : Boolean (optional); indicates we want all nodes including those which Genbank sets as hidden Arg[AS_ARRAY] : Boolean (optional); says the return type will be an ArrayRef of all nodes in the classification as instances of NCBITaxon. Example : my $classification_string = $node->classification(); Description : Returns the String representation of a taxon node's classification or the objects which constitute it ( including the current node). The String return when split is compatible with BioPerl's Species classification code and will return a data structure compatible with that found in core species MetaContainers. This code is a redevelopment of existing code which descended down the taxonomy which had disadvanatages when a classification was requested on nodes causing the taxonomy to bi/multi-furcate. Note the String representation does have some disadvantages when working with the poorer end of the taxonomy where species nodes are not well defined. For these situations you are better using the array representation and capturing the required information from the nodes. Also to maintain the original functionality of the method we filter any species, subspecies or subgenus nodes above the current node. For the true classification always call using the array structure. Recalling this subroutine with the same parameters for separators will return a cached representation. Calling for AS_ARRAY will cause the classificaiton to be recalculated each time. Returntype : String if not asking for an array otherwise the array Exceptions : - Caller : Public =cut sub classification { my ($self, @args) = @_; my ($separator, $full, $as_array) = rearrange([qw( SEPARATOR FULL AS_ARRAY )], @args); #setup defaults $separator = ' ' unless(defined $separator); $full = 0 unless (defined $full); if(!$as_array) { #Reset the separators & classifications if we already had one & it #differed from the input if(defined $self->{_separator} && $self->{_separator} ne $separator) { $self->{_separator} = undef; $self->{_classification} = undef; } if(defined $self->{_separator_full} && $self->{_separator_full} ne $separator) { $self->{_separator_full} = undef; $self->{_classification_full} = undef; } $self->{_separator} = $separator unless (defined $self->{_separator}); $self->{_separator_full} = $separator unless (defined $self->{_separator_full}); return $self->{_classification_full} if ($full && defined $self->{_classification_full}); return $self->{_classification} if (!$full && defined $self->{_classification}); } my $node = $self; my @classification; while( $node->name() ne 'root' ) { my $subgenus = $node->rank() eq 'subgenus'; if($full) { push(@classification, $node); } else { unless($node->genbank_hidden_flag() || $subgenus) { push(@classification, $node); } } $node = $node->parent(); } if($as_array) { return \@classification; } #Once we have a normal array we can do top-down as before to replicate #the original functionality my $text_classification = $self->_to_text_classification(\@classification); if ($full) { $self->{_classification_full} = join($separator, @{$text_classification}); $self->{_separator_full} = $separator; return $self->{_classification_full}; } else { $self->{_classification} = join($separator, @{$text_classification}); $self->{_separator} = $separator; return $self->{_classification}; } } =head2 _to_text_classification Arg[1] : ArrayRef of the classification array to be converted to the text classification Example : my $array = $node->_to_text_classification(\@classification); Description : Returns the Array representation of a taxon node's classification or the objects which constitute it ( including the current node) as the species names or split according to the rules for working with BioPerl. Returntype : ArrayRef of Strings Exceptions : - Caller : Private =cut sub _to_text_classification { my ($self, $classification) = @_; my @text_classification; my $first = 1; for my $node ( @{$classification}) { my $subgenus = $node->rank() eq 'subgenus'; my $species = $node->rank() eq 'species'; my $subspecies = $node->rank() eq 'subspecies'; if($first) { if($species || $subspecies) { my ($genus, $species, $subspecies) = split(q{ }, $node->binomial()); unshift @text_classification, $species; unshift @text_classification, $subspecies if (defined $subspecies); } $first = 0; next; } next if $subgenus || $species || $subspecies; push(@text_classification, $node->name()); } return \@text_classification; } =head2 subspecies Example : $ncbi->subspecies; Description: Returns the subspeceis name for this species Example : "verus" for Pan troglodytes verus Returntype : string Exceptions : Caller : general =cut sub subspecies { my $self = shift; unless (defined $self->{'_species'}) { my ($genus, $species, $subspecies) = split(" ", $self->binomial); $self->{'_species'} = $species; $self->{'_genus'} = $genus; $self->{'_subspecies'} = $subspecies; } return $self->{'_species'}; } =head2 species Example : $ncbi->species; Description: Returns the speceis name for this species Example : "sapiens" for Homo sapiens Returntype : string Exceptions : Caller : general =cut sub species { my $self = shift; unless (defined $self->{'_species'}) { my ($genus, $species, $subspecies) = split(" ", $self->binomial); $self->{'_species'} = $species; $self->{'_genus'} = $genus; $self->{'_subspecies'} = $subspecies; } return $self->{'_species'}; } =head2 genus Example : $ncbi->genus; Description: Returns the genus name for this species Returntype : string Example : "Homo" for Homo sapiens Exceptions : Caller : general =cut sub genus { my $self = shift; unless (defined $self->{'_genus'}) { my ($genus, $species, $subspecies) = split(" ", $self->binomial); $self->{'_species'} = $species; $self->{'_genus'} = $genus; $self->{'_subspecies'} = $subspecies; } return $self->{'_genus'}; } =head2 common_name Example : $ncbi->common_name; Description: The comon name as defined by Genbank Returntype : string Exceptions : returns undef if no genbank common name exists. Caller : general =cut sub common_name { my $self = shift; if ($self->has_tag('genbank common name') && $self->rank eq 'species') { return $self->get_tagvalue('genbank common name'); } else { return undef; } } =head2 ensembl_alias_name Example : $ncbi->ensembl_alias_name; Description: The comon name as defined by ensembl alias Returntype : string Exceptions : returns undef if no ensembl alias name exists. Caller : general =cut sub ensembl_alias_name { my $self = shift; #Not checking for rank as we do above, because we do not get dog since the #rank for dog is subspecies (ensembl-51). if ($self->has_tag('ensembl alias name')) { return $self->get_tagvalue('ensembl alias name'); } else { return undef; } } =head scientific_name Example : $ncbi->scientific_name; Description: The scientific name of this taxon Returntype : string Exceptions : Caller : general =cut sub scientific_name { my ($self) = @_; return $self->get_tagvalue('scientific name'); } =head2 binomial Example : $ncbi->binomial; Description: The binomial name (AKA the scientific name) of this genome Returntype : string Exceptions : warns when node is not a species or has no scientific name Caller : general =cut sub binomial { my $self = shift; if ($self->has_tag('scientific name') && ($self->rank eq 'species' || $self->rank eq 'subspecies')) { return $self->scientific_name; } else { warning("taxon_id=",$self->node_id," is not a species or subspecies. So binomial is undef (try the scientific_name method)\n"); return undef; } } =head2 ensembl_alias Example : $ncbi->ensembl_alias; Description: The ensembl_alias name (AKA the name in the ensembl website) of this genome Returntype : string Exceptions : warns when node is not a species or has no ensembl_alias Caller : general =cut sub ensembl_alias { my $self = shift; if ($self->has_tag('ensembl alias name')) { return $self->get_tagvalue('ensembl alias name'); } else { warning("taxon_id=",$self->node_id," is not a species or subspecies. So ensembl_alias is undef\n"); return undef; } } =head2 short_name Example : $ncbi->short_name; Description: The name of this genome in the Gspe ('G'enera 'spe'cies) format. Returntype : string Exceptions : none Caller : general =cut sub short_name { my $self = shift; my $name = $self->name; $name =~ s/(\S)\S+\s(\S{3})\S+/$1$2/; $name =~ s/\ //g; return $name; } sub get_short_name { my $self = shift; return $self->short_name; } sub RAP_species_format { my $self = shift; my $newick = ""; if($self->get_child_count() > 0) { $newick .= "("; my $first_child=1; foreach my $child (@{$self->sorted_children}) { $newick .= "," unless($first_child); $newick .= $child->newick_format; $first_child = 0; } $newick .= ")"; } $newick .= sprintf("\"%s\"", $self->name,); $newick .= sprintf(":%1.4f", $self->distance_to_parent) if($self->distance_to_parent > 0); if(!($self->has_parent)) { $newick .= ";"; } return $newick; } sub print_node { my $self = shift; printf("(%s", $self->node_id); printf(" %s", $self->rank) if($self->rank); print(")"); printf("%s", $self->name) if($self->name); print("\n"); } 1;