0
|
1 # $Id: nhx.pm,v 1.4.2.2 2003/09/14 19:00:36 jason Exp $
|
|
2 #
|
|
3 # BioPerl module for Bio::TreeIO::nhx
|
|
4 #
|
|
5 # Cared for by Aaron Mackey <amackey@virginia.edu>
|
|
6 #
|
|
7 # Copyright Aaron Mackey
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10
|
|
11 # POD documentation - main docs before the code
|
|
12
|
|
13 =head1 NAME
|
|
14
|
|
15 Bio::TreeIO::nhx - TreeIO implementation for parsing
|
|
16 Newick/New Hampshire eXtendend (NHX) format.
|
|
17
|
|
18 =head1 SYNOPSIS
|
|
19
|
|
20 # do not use this module directly
|
|
21 use Bio::TreeIO;
|
|
22 my $treeio = new Bio::TreeIO(-format => 'nhx', -file => 'tree.dnd');
|
|
23 my $tree = $treeio->next_tree;
|
|
24
|
|
25 =head1 DESCRIPTION
|
|
26
|
|
27 This module handles parsing and writing of Newick/New Hampshire eXtended (NHX) format.
|
|
28
|
|
29 =head1 FEEDBACK
|
|
30
|
|
31 =head2 Mailing Lists
|
|
32
|
|
33 User feedback is an integral part of the evolution of this and other
|
|
34 Bioperl modules. Send your comments and suggestions preferably to the
|
|
35 Bioperl mailing list. Your participation is much appreciated.
|
|
36
|
|
37 bioperl-l@bioperl.org - General discussion
|
|
38 http://bioperl.org/MailList.shtml - About the mailing lists
|
|
39
|
|
40 =head2 Reporting Bugs
|
|
41
|
|
42 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
43 of the bugs and their resolution. Bug reports can be submitted via
|
|
44 email or the web:
|
|
45
|
|
46 bioperl-bugs@bioperl.org
|
|
47 http://bugzilla.bioperl.org/
|
|
48
|
|
49 =head1 AUTHOR - Aaron Mackey
|
|
50
|
|
51 Email amackey@virginia.edu
|
|
52
|
|
53 Describe contact details here
|
|
54
|
|
55 =head1 CONTRIBUTORS
|
|
56
|
|
57 Additional contributors names and emails here
|
|
58
|
|
59 =head1 APPENDIX
|
|
60
|
|
61 The rest of the documentation details each of the object methods.
|
|
62 Internal methods are usually preceded with a _
|
|
63
|
|
64 =cut
|
|
65
|
|
66
|
|
67 # Let the code begin...
|
|
68
|
|
69
|
|
70 package Bio::TreeIO::nhx;
|
|
71 use vars qw(@ISA);
|
|
72 use strict;
|
|
73
|
|
74 # Object preamble - inherits from Bio::Root::Root
|
|
75
|
|
76 use Bio::TreeIO;
|
|
77 use Bio::Tree::NodeNHX;
|
|
78 use Bio::Event::EventGeneratorI;
|
|
79 #use XML::Handler::Subs;
|
|
80
|
|
81
|
|
82 @ISA = qw(Bio::TreeIO );
|
|
83
|
|
84 sub _initialize {
|
|
85 my($self, %args) = @_;
|
|
86 $args{-nodetype} ||= 'Bio::Tree::NodeNHX';
|
|
87 $self->SUPER::_initialize(%args);
|
|
88 }
|
|
89
|
|
90 =head2 next_tree
|
|
91
|
|
92 Title : next_tree
|
|
93 Usage : my $tree = $treeio->next_tree
|
|
94 Function: Gets the next tree in the stream
|
|
95 Returns : Bio::Tree::TreeI
|
|
96 Args : none
|
|
97
|
|
98
|
|
99 =cut
|
|
100
|
|
101 sub next_tree{
|
|
102 my ($self) = @_;
|
|
103 local $/ = ";\n";
|
|
104 return unless $_ = $self->_readline;
|
|
105 s/\s+//g;
|
|
106 $self->debug("entry is $_\n");
|
|
107 my $chars = '';
|
|
108 $self->_eventHandler->start_document;
|
|
109 my ($prev_event,$lastevent) = ('','');
|
|
110 my @ch = split(//, $_);
|
|
111 foreach my $ch (@ch) {
|
|
112 if( $ch eq ';' ) {
|
|
113 $self->_eventHandler->in_element('node') &&
|
|
114 $self->_eventHandler->end_element( {'Name' => 'node'});
|
|
115 return $self->_eventHandler->end_document;
|
|
116 } elsif ($ch eq '[') {
|
|
117 if ( length $chars ) {
|
|
118 if ( $lastevent eq ':' ) {
|
|
119 $self->_eventHandler->start_element( { Name => 'branch_length' } );
|
|
120 $self->_eventHandler->characters($chars);
|
|
121 $self->_eventHandler->end_element( { Name => 'branch_length' });
|
|
122 $lastevent = $prev_event;
|
|
123 } else {
|
|
124 $self->debug("id with no branchlength is $chars\n");
|
|
125 $self->_eventHandler->start_element( { 'Name' => 'node' } );
|
|
126 $self->_eventHandler->start_element( { 'Name' => 'id' } );
|
|
127 $self->_eventHandler->characters($chars);
|
|
128 $self->_eventHandler->end_element( { 'Name' => 'id' } );
|
|
129 }
|
|
130 } else {
|
|
131 $self->_eventHandler->start_element( { Name => 'node' } );
|
|
132 }
|
|
133 my $leafstatus = ( $lastevent ne ')' ) ? 1 : 0;
|
|
134 $self->_eventHandler->start_element({'Name' => 'leaf'});
|
|
135 $self->_eventHandler->characters($leafstatus);
|
|
136 $self->_eventHandler->end_element({'Name' => 'leaf'});
|
|
137 $chars = '';
|
|
138
|
|
139 $self->_eventHandler->start_element( { Name => 'nhx_tag' });
|
|
140 } elsif( $ch eq '(' ) {
|
|
141 $chars = '';
|
|
142 $self->_eventHandler->start_element( {'Name' => 'tree'} );
|
|
143 } elsif($ch eq ')' ) {
|
|
144 if( length $chars ) {
|
|
145 if( $lastevent eq ':') {
|
|
146 unless ($self->_eventHandler->within_element('nhx_tag')) {
|
|
147 $self->_eventHandler->start_element( { 'Name' => 'branch_length'});
|
|
148 $self->_eventHandler->characters($chars);
|
|
149 $self->_eventHandler->end_element( {'Name' => 'branch_length'});
|
|
150 } else {
|
|
151 $self->throw("malformed input; end of node ) before ] found");
|
|
152 }
|
|
153 } else {
|
|
154 $self->debug("id with no branchlength is $chars\n");
|
|
155 $self->_eventHandler->start_element( { 'Name' => 'node' } );
|
|
156 $self->_eventHandler->start_element( { 'Name' => 'id' } );
|
|
157 $self->_eventHandler->characters($chars);
|
|
158 $self->_eventHandler->end_element( { 'Name' => 'id' } );
|
|
159 }
|
|
160
|
|
161 } elsif ( $lastevent ne ']' ) {
|
|
162 $self->_eventHandler->start_element( {'Name' => 'node'} );
|
|
163 }
|
|
164 my $leafstatus = ( $lastevent ne ')' ) ? 1 : 0;
|
|
165 $self->_eventHandler->start_element({'Name' => 'leaf'});
|
|
166 $self->_eventHandler->characters($leafstatus);
|
|
167 $self->_eventHandler->end_element({'Name' => 'leaf'});
|
|
168
|
|
169 $self->_eventHandler->end_element( {'Name' => 'node'} );
|
|
170 $self->_eventHandler->end_element( {'Name' => 'tree'} );
|
|
171 $chars = '';
|
|
172 } elsif ( $ch eq ',' ) {
|
|
173 if( length $chars ) {
|
|
174 if( $lastevent eq ':' ) {
|
|
175 $self->_eventHandler->start_element( { 'Name' => 'branch_length'});
|
|
176 $self->_eventHandler->characters($chars);
|
|
177 $self->_eventHandler->end_element( {'Name' => 'branch_length'});
|
|
178 $lastevent = $prev_event;
|
|
179 } else {
|
|
180 $self->debug("id with no branchlength is $chars\n");
|
|
181 $self->_eventHandler->start_element( { 'Name' => 'node' } );
|
|
182 $self->_eventHandler->start_element( { 'Name' => 'id' } );
|
|
183 $self->_eventHandler->characters($chars);
|
|
184 $self->_eventHandler->end_element( { 'Name' => 'id' } );
|
|
185 }
|
|
186 } elsif ( $lastevent ne ']' ) {
|
|
187 $self->_eventHandler->start_element( { 'Name' => 'node' } );
|
|
188 }
|
|
189 $self->_eventHandler->end_element( {'Name' => 'node'} );
|
|
190 $chars = '';
|
|
191 } elsif( $ch eq ':' ) {
|
|
192 if ($self->_eventHandler->within_element('nhx_tag')) {
|
|
193 if ($lastevent eq '=') {
|
|
194 $self->_eventHandler->start_element( { Name => 'tag_value' } );
|
|
195 $self->_eventHandler->characters($chars);
|
|
196 $self->_eventHandler->end_element( { Name => 'tag_value' } );
|
|
197 $chars = '';
|
|
198 } else {
|
|
199 if ($chars eq '&&NHX') {
|
|
200 $chars = ''; # get rid of &&NHX:
|
|
201 } else {
|
|
202 $self->throw("Unrecognized, non \&\&NHX string: >>$chars<<");
|
|
203 }
|
|
204 }
|
|
205 } elsif ($lastevent ne ']') {
|
|
206 $self->debug("id with a branchlength coming is $chars\n");
|
|
207 $self->_eventHandler->start_element( { 'Name' => 'node' } );
|
|
208 $self->_eventHandler->start_element( { 'Name' => 'id' } );
|
|
209 $self->_eventHandler->characters($chars);
|
|
210 $self->_eventHandler->end_element( { 'Name' => 'id' } );
|
|
211 $chars = '';
|
|
212 }
|
|
213 } elsif ( $ch eq '=' ) {
|
|
214 if ($self->_eventHandler->within_element('nhx_tag')) {
|
|
215 $self->_eventHandler->start_element( { Name => 'tag_name' } );
|
|
216 $self->_eventHandler->characters($chars);
|
|
217 $self->_eventHandler->end_element( { Name => 'tag_name' } );
|
|
218 $chars = '';
|
|
219 } else {
|
|
220 $chars .= $ch;
|
|
221 }
|
|
222 } elsif ( $ch eq ']' ) {
|
|
223 if ($self->_eventHandler->within_element('nhx_tag') && $lastevent eq '=') {
|
|
224 $self->_eventHandler->start_element( { Name => 'tag_value' } );
|
|
225 $self->_eventHandler->characters($chars);
|
|
226 $self->_eventHandler->end_element( { Name => 'tag_value' } );
|
|
227 $chars = '';
|
|
228 $self->_eventHandler->end_element( { Name => 'nhx_tag' } );
|
|
229 } else {
|
|
230 $chars .= $ch;
|
|
231 next;
|
|
232 }
|
|
233 } else {
|
|
234 $chars .= $ch;
|
|
235 next;
|
|
236 }
|
|
237 $prev_event = $lastevent;
|
|
238 $lastevent = $ch;
|
|
239 }
|
|
240 return undef;
|
|
241 }
|
|
242
|
|
243 =head2 write_tree
|
|
244
|
|
245 Title : write_tree
|
|
246 Usage : $treeio->write_tree($tree);
|
|
247 Function: Write a tree out to data stream in nhx format
|
|
248 Returns : none
|
|
249 Args : Bio::Tree::TreeI object
|
|
250
|
|
251 =cut
|
|
252
|
|
253 sub write_tree{
|
|
254 my ($self,@trees) = @_;
|
|
255 foreach my $tree ( @trees ) {
|
|
256 my @data = _write_tree_Helper($tree->get_root_node);
|
|
257 if($data[-1] !~ /\)$/ ) {
|
|
258 $data[0] = "(".$data[0];
|
|
259 $data[-1] .= ")";
|
|
260 }
|
|
261 $self->_print(join(',', @data), ";\n");
|
|
262 }
|
|
263 $self->flush if $self->_flush_on_write && defined $self->_fh;
|
|
264 return;
|
|
265 }
|
|
266
|
|
267 sub _write_tree_Helper {
|
|
268 my ($node) = @_;
|
|
269 return () unless defined $node;
|
|
270
|
|
271 my @data;
|
|
272
|
|
273 foreach my $n ( $node->each_Descendent() ) {
|
|
274 push @data, _write_tree_Helper($n);
|
|
275 }
|
|
276
|
|
277 if( @data > 1 ) {
|
|
278 $data[0] = "(" . $data[0];
|
|
279 $data[-1] .= ")";
|
|
280 $data[-1] .= ":". $node->branch_length if $node->branch_length;
|
|
281 # this is to not print out an empty NHX for the root node which is
|
|
282 # a convience for how we get a handle to the whole tree
|
|
283 if( $node->ancestor || $node->id || defined $node->branch_length ) {
|
|
284 $data[-1] .= '[' .
|
|
285 join(":", "&&NHX",
|
|
286 map { "$_=" .join(',',$node->get_tag_values($_)) }
|
|
287 $node->get_all_tags() ) . ']';
|
|
288 }
|
|
289 } else {
|
|
290 push @data, $node->to_string; # a leaf
|
|
291 }
|
|
292 return @data;
|
|
293 }
|
|
294
|
|
295
|
|
296 1;
|