Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/TreeIO/nhx.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $Id: nhx.pm,v 1.4.2.2 2003/09/14 19:00:36 jason Exp $ | |
2 # | |
3 # BioPerl module for Bio::TreeIO::nhx | |
4 # | |
5 # Cared for by Aaron Mackey <amackey@virginia.edu> | |
6 # | |
7 # Copyright Aaron Mackey | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 | |
11 # POD documentation - main docs before the code | |
12 | |
13 =head1 NAME | |
14 | |
15 Bio::TreeIO::nhx - TreeIO implementation for parsing | |
16 Newick/New Hampshire eXtendend (NHX) format. | |
17 | |
18 =head1 SYNOPSIS | |
19 | |
20 # do not use this module directly | |
21 use Bio::TreeIO; | |
22 my $treeio = new Bio::TreeIO(-format => 'nhx', -file => 'tree.dnd'); | |
23 my $tree = $treeio->next_tree; | |
24 | |
25 =head1 DESCRIPTION | |
26 | |
27 This module handles parsing and writing of Newick/New Hampshire eXtended (NHX) format. | |
28 | |
29 =head1 FEEDBACK | |
30 | |
31 =head2 Mailing Lists | |
32 | |
33 User feedback is an integral part of the evolution of this and other | |
34 Bioperl modules. Send your comments and suggestions preferably to the | |
35 Bioperl mailing list. Your participation is much appreciated. | |
36 | |
37 bioperl-l@bioperl.org - General discussion | |
38 http://bioperl.org/MailList.shtml - About the mailing lists | |
39 | |
40 =head2 Reporting Bugs | |
41 | |
42 Report bugs to the Bioperl bug tracking system to help us keep track | |
43 of the bugs and their resolution. Bug reports can be submitted via | |
44 email or the web: | |
45 | |
46 bioperl-bugs@bioperl.org | |
47 http://bugzilla.bioperl.org/ | |
48 | |
49 =head1 AUTHOR - Aaron Mackey | |
50 | |
51 Email amackey@virginia.edu | |
52 | |
53 Describe contact details here | |
54 | |
55 =head1 CONTRIBUTORS | |
56 | |
57 Additional contributors names and emails here | |
58 | |
59 =head1 APPENDIX | |
60 | |
61 The rest of the documentation details each of the object methods. | |
62 Internal methods are usually preceded with a _ | |
63 | |
64 =cut | |
65 | |
66 | |
67 # Let the code begin... | |
68 | |
69 | |
70 package Bio::TreeIO::nhx; | |
71 use vars qw(@ISA); | |
72 use strict; | |
73 | |
74 # Object preamble - inherits from Bio::Root::Root | |
75 | |
76 use Bio::TreeIO; | |
77 use Bio::Tree::NodeNHX; | |
78 use Bio::Event::EventGeneratorI; | |
79 #use XML::Handler::Subs; | |
80 | |
81 | |
82 @ISA = qw(Bio::TreeIO ); | |
83 | |
84 sub _initialize { | |
85 my($self, %args) = @_; | |
86 $args{-nodetype} ||= 'Bio::Tree::NodeNHX'; | |
87 $self->SUPER::_initialize(%args); | |
88 } | |
89 | |
90 =head2 next_tree | |
91 | |
92 Title : next_tree | |
93 Usage : my $tree = $treeio->next_tree | |
94 Function: Gets the next tree in the stream | |
95 Returns : Bio::Tree::TreeI | |
96 Args : none | |
97 | |
98 | |
99 =cut | |
100 | |
101 sub next_tree{ | |
102 my ($self) = @_; | |
103 local $/ = ";\n"; | |
104 return unless $_ = $self->_readline; | |
105 s/\s+//g; | |
106 $self->debug("entry is $_\n"); | |
107 my $chars = ''; | |
108 $self->_eventHandler->start_document; | |
109 my ($prev_event,$lastevent) = ('',''); | |
110 my @ch = split(//, $_); | |
111 foreach my $ch (@ch) { | |
112 if( $ch eq ';' ) { | |
113 $self->_eventHandler->in_element('node') && | |
114 $self->_eventHandler->end_element( {'Name' => 'node'}); | |
115 return $self->_eventHandler->end_document; | |
116 } elsif ($ch eq '[') { | |
117 if ( length $chars ) { | |
118 if ( $lastevent eq ':' ) { | |
119 $self->_eventHandler->start_element( { Name => 'branch_length' } ); | |
120 $self->_eventHandler->characters($chars); | |
121 $self->_eventHandler->end_element( { Name => 'branch_length' }); | |
122 $lastevent = $prev_event; | |
123 } else { | |
124 $self->debug("id with no branchlength is $chars\n"); | |
125 $self->_eventHandler->start_element( { 'Name' => 'node' } ); | |
126 $self->_eventHandler->start_element( { 'Name' => 'id' } ); | |
127 $self->_eventHandler->characters($chars); | |
128 $self->_eventHandler->end_element( { 'Name' => 'id' } ); | |
129 } | |
130 } else { | |
131 $self->_eventHandler->start_element( { Name => 'node' } ); | |
132 } | |
133 my $leafstatus = ( $lastevent ne ')' ) ? 1 : 0; | |
134 $self->_eventHandler->start_element({'Name' => 'leaf'}); | |
135 $self->_eventHandler->characters($leafstatus); | |
136 $self->_eventHandler->end_element({'Name' => 'leaf'}); | |
137 $chars = ''; | |
138 | |
139 $self->_eventHandler->start_element( { Name => 'nhx_tag' }); | |
140 } elsif( $ch eq '(' ) { | |
141 $chars = ''; | |
142 $self->_eventHandler->start_element( {'Name' => 'tree'} ); | |
143 } elsif($ch eq ')' ) { | |
144 if( length $chars ) { | |
145 if( $lastevent eq ':') { | |
146 unless ($self->_eventHandler->within_element('nhx_tag')) { | |
147 $self->_eventHandler->start_element( { 'Name' => 'branch_length'}); | |
148 $self->_eventHandler->characters($chars); | |
149 $self->_eventHandler->end_element( {'Name' => 'branch_length'}); | |
150 } else { | |
151 $self->throw("malformed input; end of node ) before ] found"); | |
152 } | |
153 } else { | |
154 $self->debug("id with no branchlength is $chars\n"); | |
155 $self->_eventHandler->start_element( { 'Name' => 'node' } ); | |
156 $self->_eventHandler->start_element( { 'Name' => 'id' } ); | |
157 $self->_eventHandler->characters($chars); | |
158 $self->_eventHandler->end_element( { 'Name' => 'id' } ); | |
159 } | |
160 | |
161 } elsif ( $lastevent ne ']' ) { | |
162 $self->_eventHandler->start_element( {'Name' => 'node'} ); | |
163 } | |
164 my $leafstatus = ( $lastevent ne ')' ) ? 1 : 0; | |
165 $self->_eventHandler->start_element({'Name' => 'leaf'}); | |
166 $self->_eventHandler->characters($leafstatus); | |
167 $self->_eventHandler->end_element({'Name' => 'leaf'}); | |
168 | |
169 $self->_eventHandler->end_element( {'Name' => 'node'} ); | |
170 $self->_eventHandler->end_element( {'Name' => 'tree'} ); | |
171 $chars = ''; | |
172 } elsif ( $ch eq ',' ) { | |
173 if( length $chars ) { | |
174 if( $lastevent eq ':' ) { | |
175 $self->_eventHandler->start_element( { 'Name' => 'branch_length'}); | |
176 $self->_eventHandler->characters($chars); | |
177 $self->_eventHandler->end_element( {'Name' => 'branch_length'}); | |
178 $lastevent = $prev_event; | |
179 } else { | |
180 $self->debug("id with no branchlength is $chars\n"); | |
181 $self->_eventHandler->start_element( { 'Name' => 'node' } ); | |
182 $self->_eventHandler->start_element( { 'Name' => 'id' } ); | |
183 $self->_eventHandler->characters($chars); | |
184 $self->_eventHandler->end_element( { 'Name' => 'id' } ); | |
185 } | |
186 } elsif ( $lastevent ne ']' ) { | |
187 $self->_eventHandler->start_element( { 'Name' => 'node' } ); | |
188 } | |
189 $self->_eventHandler->end_element( {'Name' => 'node'} ); | |
190 $chars = ''; | |
191 } elsif( $ch eq ':' ) { | |
192 if ($self->_eventHandler->within_element('nhx_tag')) { | |
193 if ($lastevent eq '=') { | |
194 $self->_eventHandler->start_element( { Name => 'tag_value' } ); | |
195 $self->_eventHandler->characters($chars); | |
196 $self->_eventHandler->end_element( { Name => 'tag_value' } ); | |
197 $chars = ''; | |
198 } else { | |
199 if ($chars eq '&&NHX') { | |
200 $chars = ''; # get rid of &&NHX: | |
201 } else { | |
202 $self->throw("Unrecognized, non \&\&NHX string: >>$chars<<"); | |
203 } | |
204 } | |
205 } elsif ($lastevent ne ']') { | |
206 $self->debug("id with a branchlength coming is $chars\n"); | |
207 $self->_eventHandler->start_element( { 'Name' => 'node' } ); | |
208 $self->_eventHandler->start_element( { 'Name' => 'id' } ); | |
209 $self->_eventHandler->characters($chars); | |
210 $self->_eventHandler->end_element( { 'Name' => 'id' } ); | |
211 $chars = ''; | |
212 } | |
213 } elsif ( $ch eq '=' ) { | |
214 if ($self->_eventHandler->within_element('nhx_tag')) { | |
215 $self->_eventHandler->start_element( { Name => 'tag_name' } ); | |
216 $self->_eventHandler->characters($chars); | |
217 $self->_eventHandler->end_element( { Name => 'tag_name' } ); | |
218 $chars = ''; | |
219 } else { | |
220 $chars .= $ch; | |
221 } | |
222 } elsif ( $ch eq ']' ) { | |
223 if ($self->_eventHandler->within_element('nhx_tag') && $lastevent eq '=') { | |
224 $self->_eventHandler->start_element( { Name => 'tag_value' } ); | |
225 $self->_eventHandler->characters($chars); | |
226 $self->_eventHandler->end_element( { Name => 'tag_value' } ); | |
227 $chars = ''; | |
228 $self->_eventHandler->end_element( { Name => 'nhx_tag' } ); | |
229 } else { | |
230 $chars .= $ch; | |
231 next; | |
232 } | |
233 } else { | |
234 $chars .= $ch; | |
235 next; | |
236 } | |
237 $prev_event = $lastevent; | |
238 $lastevent = $ch; | |
239 } | |
240 return undef; | |
241 } | |
242 | |
243 =head2 write_tree | |
244 | |
245 Title : write_tree | |
246 Usage : $treeio->write_tree($tree); | |
247 Function: Write a tree out to data stream in nhx format | |
248 Returns : none | |
249 Args : Bio::Tree::TreeI object | |
250 | |
251 =cut | |
252 | |
253 sub write_tree{ | |
254 my ($self,@trees) = @_; | |
255 foreach my $tree ( @trees ) { | |
256 my @data = _write_tree_Helper($tree->get_root_node); | |
257 if($data[-1] !~ /\)$/ ) { | |
258 $data[0] = "(".$data[0]; | |
259 $data[-1] .= ")"; | |
260 } | |
261 $self->_print(join(',', @data), ";\n"); | |
262 } | |
263 $self->flush if $self->_flush_on_write && defined $self->_fh; | |
264 return; | |
265 } | |
266 | |
267 sub _write_tree_Helper { | |
268 my ($node) = @_; | |
269 return () unless defined $node; | |
270 | |
271 my @data; | |
272 | |
273 foreach my $n ( $node->each_Descendent() ) { | |
274 push @data, _write_tree_Helper($n); | |
275 } | |
276 | |
277 if( @data > 1 ) { | |
278 $data[0] = "(" . $data[0]; | |
279 $data[-1] .= ")"; | |
280 $data[-1] .= ":". $node->branch_length if $node->branch_length; | |
281 # this is to not print out an empty NHX for the root node which is | |
282 # a convience for how we get a handle to the whole tree | |
283 if( $node->ancestor || $node->id || defined $node->branch_length ) { | |
284 $data[-1] .= '[' . | |
285 join(":", "&&NHX", | |
286 map { "$_=" .join(',',$node->get_tag_values($_)) } | |
287 $node->get_all_tags() ) . ']'; | |
288 } | |
289 } else { | |
290 push @data, $node->to_string; # a leaf | |
291 } | |
292 return @data; | |
293 } | |
294 | |
295 | |
296 1; |