Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/TreeIO/nhx.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/TreeIO/nhx.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,296 @@ +# $Id: nhx.pm,v 1.4.2.2 2003/09/14 19:00:36 jason Exp $ +# +# BioPerl module for Bio::TreeIO::nhx +# +# Cared for by Aaron Mackey <amackey@virginia.edu> +# +# Copyright Aaron Mackey +# +# You may distribute this module under the same terms as perl itself + +# POD documentation - main docs before the code + +=head1 NAME + +Bio::TreeIO::nhx - TreeIO implementation for parsing + Newick/New Hampshire eXtendend (NHX) format. + +=head1 SYNOPSIS + + # do not use this module directly + use Bio::TreeIO; + my $treeio = new Bio::TreeIO(-format => 'nhx', -file => 'tree.dnd'); + my $tree = $treeio->next_tree; + +=head1 DESCRIPTION + +This module handles parsing and writing of Newick/New Hampshire eXtended (NHX) format. + +=head1 FEEDBACK + +=head2 Mailing Lists + +User feedback is an integral part of the evolution of this and other +Bioperl modules. Send your comments and suggestions preferably to the +Bioperl mailing list. Your participation is much appreciated. + + bioperl-l@bioperl.org - General discussion + http://bioperl.org/MailList.shtml - About the mailing lists + +=head2 Reporting Bugs + +Report bugs to the Bioperl bug tracking system to help us keep track +of the bugs and their resolution. Bug reports can be submitted via +email or the web: + + bioperl-bugs@bioperl.org + http://bugzilla.bioperl.org/ + +=head1 AUTHOR - Aaron Mackey + +Email amackey@virginia.edu + +Describe contact details here + +=head1 CONTRIBUTORS + +Additional contributors names and emails here + +=head1 APPENDIX + +The rest of the documentation details each of the object methods. +Internal methods are usually preceded with a _ + +=cut + + +# Let the code begin... + + +package Bio::TreeIO::nhx; +use vars qw(@ISA); +use strict; + +# Object preamble - inherits from Bio::Root::Root + +use Bio::TreeIO; +use Bio::Tree::NodeNHX; +use Bio::Event::EventGeneratorI; +#use XML::Handler::Subs; + + +@ISA = qw(Bio::TreeIO ); + +sub _initialize { + my($self, %args) = @_; + $args{-nodetype} ||= 'Bio::Tree::NodeNHX'; + $self->SUPER::_initialize(%args); +} + +=head2 next_tree + + Title : next_tree + Usage : my $tree = $treeio->next_tree + Function: Gets the next tree in the stream + Returns : Bio::Tree::TreeI + Args : none + + +=cut + +sub next_tree{ + my ($self) = @_; + local $/ = ";\n"; + return unless $_ = $self->_readline; + s/\s+//g; + $self->debug("entry is $_\n"); + my $chars = ''; + $self->_eventHandler->start_document; + my ($prev_event,$lastevent) = ('',''); + my @ch = split(//, $_); + foreach my $ch (@ch) { + if( $ch eq ';' ) { + $self->_eventHandler->in_element('node') && + $self->_eventHandler->end_element( {'Name' => 'node'}); + return $self->_eventHandler->end_document; + } elsif ($ch eq '[') { + if ( length $chars ) { + if ( $lastevent eq ':' ) { + $self->_eventHandler->start_element( { Name => 'branch_length' } ); + $self->_eventHandler->characters($chars); + $self->_eventHandler->end_element( { Name => 'branch_length' }); + $lastevent = $prev_event; + } else { + $self->debug("id with no branchlength is $chars\n"); + $self->_eventHandler->start_element( { 'Name' => 'node' } ); + $self->_eventHandler->start_element( { 'Name' => 'id' } ); + $self->_eventHandler->characters($chars); + $self->_eventHandler->end_element( { 'Name' => 'id' } ); + } + } else { + $self->_eventHandler->start_element( { Name => 'node' } ); + } + my $leafstatus = ( $lastevent ne ')' ) ? 1 : 0; + $self->_eventHandler->start_element({'Name' => 'leaf'}); + $self->_eventHandler->characters($leafstatus); + $self->_eventHandler->end_element({'Name' => 'leaf'}); + $chars = ''; + + $self->_eventHandler->start_element( { Name => 'nhx_tag' }); + } elsif( $ch eq '(' ) { + $chars = ''; + $self->_eventHandler->start_element( {'Name' => 'tree'} ); + } elsif($ch eq ')' ) { + if( length $chars ) { + if( $lastevent eq ':') { + unless ($self->_eventHandler->within_element('nhx_tag')) { + $self->_eventHandler->start_element( { 'Name' => 'branch_length'}); + $self->_eventHandler->characters($chars); + $self->_eventHandler->end_element( {'Name' => 'branch_length'}); + } else { + $self->throw("malformed input; end of node ) before ] found"); + } + } else { + $self->debug("id with no branchlength is $chars\n"); + $self->_eventHandler->start_element( { 'Name' => 'node' } ); + $self->_eventHandler->start_element( { 'Name' => 'id' } ); + $self->_eventHandler->characters($chars); + $self->_eventHandler->end_element( { 'Name' => 'id' } ); + } + + } elsif ( $lastevent ne ']' ) { + $self->_eventHandler->start_element( {'Name' => 'node'} ); + } + my $leafstatus = ( $lastevent ne ')' ) ? 1 : 0; + $self->_eventHandler->start_element({'Name' => 'leaf'}); + $self->_eventHandler->characters($leafstatus); + $self->_eventHandler->end_element({'Name' => 'leaf'}); + + $self->_eventHandler->end_element( {'Name' => 'node'} ); + $self->_eventHandler->end_element( {'Name' => 'tree'} ); + $chars = ''; + } elsif ( $ch eq ',' ) { + if( length $chars ) { + if( $lastevent eq ':' ) { + $self->_eventHandler->start_element( { 'Name' => 'branch_length'}); + $self->_eventHandler->characters($chars); + $self->_eventHandler->end_element( {'Name' => 'branch_length'}); + $lastevent = $prev_event; + } else { + $self->debug("id with no branchlength is $chars\n"); + $self->_eventHandler->start_element( { 'Name' => 'node' } ); + $self->_eventHandler->start_element( { 'Name' => 'id' } ); + $self->_eventHandler->characters($chars); + $self->_eventHandler->end_element( { 'Name' => 'id' } ); + } + } elsif ( $lastevent ne ']' ) { + $self->_eventHandler->start_element( { 'Name' => 'node' } ); + } + $self->_eventHandler->end_element( {'Name' => 'node'} ); + $chars = ''; + } elsif( $ch eq ':' ) { + if ($self->_eventHandler->within_element('nhx_tag')) { + if ($lastevent eq '=') { + $self->_eventHandler->start_element( { Name => 'tag_value' } ); + $self->_eventHandler->characters($chars); + $self->_eventHandler->end_element( { Name => 'tag_value' } ); + $chars = ''; + } else { + if ($chars eq '&&NHX') { + $chars = ''; # get rid of &&NHX: + } else { + $self->throw("Unrecognized, non \&\&NHX string: >>$chars<<"); + } + } + } elsif ($lastevent ne ']') { + $self->debug("id with a branchlength coming is $chars\n"); + $self->_eventHandler->start_element( { 'Name' => 'node' } ); + $self->_eventHandler->start_element( { 'Name' => 'id' } ); + $self->_eventHandler->characters($chars); + $self->_eventHandler->end_element( { 'Name' => 'id' } ); + $chars = ''; + } + } elsif ( $ch eq '=' ) { + if ($self->_eventHandler->within_element('nhx_tag')) { + $self->_eventHandler->start_element( { Name => 'tag_name' } ); + $self->_eventHandler->characters($chars); + $self->_eventHandler->end_element( { Name => 'tag_name' } ); + $chars = ''; + } else { + $chars .= $ch; + } + } elsif ( $ch eq ']' ) { + if ($self->_eventHandler->within_element('nhx_tag') && $lastevent eq '=') { + $self->_eventHandler->start_element( { Name => 'tag_value' } ); + $self->_eventHandler->characters($chars); + $self->_eventHandler->end_element( { Name => 'tag_value' } ); + $chars = ''; + $self->_eventHandler->end_element( { Name => 'nhx_tag' } ); + } else { + $chars .= $ch; + next; + } + } else { + $chars .= $ch; + next; + } + $prev_event = $lastevent; + $lastevent = $ch; + } + return undef; +} + +=head2 write_tree + + Title : write_tree + Usage : $treeio->write_tree($tree); + Function: Write a tree out to data stream in nhx format + Returns : none + Args : Bio::Tree::TreeI object + +=cut + +sub write_tree{ + my ($self,@trees) = @_; + foreach my $tree ( @trees ) { + my @data = _write_tree_Helper($tree->get_root_node); + if($data[-1] !~ /\)$/ ) { + $data[0] = "(".$data[0]; + $data[-1] .= ")"; + } + $self->_print(join(',', @data), ";\n"); + } + $self->flush if $self->_flush_on_write && defined $self->_fh; + return; +} + +sub _write_tree_Helper { + my ($node) = @_; + return () unless defined $node; + + my @data; + + foreach my $n ( $node->each_Descendent() ) { + push @data, _write_tree_Helper($n); + } + + if( @data > 1 ) { + $data[0] = "(" . $data[0]; + $data[-1] .= ")"; + $data[-1] .= ":". $node->branch_length if $node->branch_length; + # this is to not print out an empty NHX for the root node which is + # a convience for how we get a handle to the whole tree + if( $node->ancestor || $node->id || defined $node->branch_length ) { + $data[-1] .= '[' . + join(":", "&&NHX", + map { "$_=" .join(',',$node->get_tag_values($_)) } + $node->get_all_tags() ) . ']'; + } + } else { + push @data, $node->to_string; # a leaf + } + return @data; +} + + +1;