Mercurial > repos > willmclaren > ensembl_vep
comparison variant_effect_predictor/Bio/Tools/Phylo/PAML.pm @ 0:21066c0abaf5 draft
Uploaded
author | willmclaren |
---|---|
date | Fri, 03 Aug 2012 10:04:48 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:21066c0abaf5 |
---|---|
1 # PAML.pm,v 1.3 2002/06/20 18:50:37 amackey Exp | |
2 # | |
3 # BioPerl module for Bio::Tools::Phylo::PAML | |
4 # | |
5 # Cared for by Jason Stajich <jason@bioperl.org> | |
6 # | |
7 # Copyright Jason Stajich, Aaron J Mackey | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 | |
11 # POD documentation - main docs before the code | |
12 | |
13 =head1 NAME | |
14 | |
15 Bio::Tools::Phylo::PAML - Parses output from the PAML programs codeml, | |
16 baseml, basemlg, codemlsites and yn00 | |
17 | |
18 =head1 SYNOPSIS | |
19 | |
20 #!/usr/bin/perl -Tw | |
21 use strict; | |
22 | |
23 use Bio::Tools::Phylo::PAML; | |
24 | |
25 # need to specify the output file name (or a fh) (defaults to | |
26 # -file => "codeml.mlc"); also, optionally, the directory in which | |
27 # the other result files (rst, 2ML.dS, etc) may be found (defaults | |
28 # to "./") | |
29 my $parser = new Bio::Tools::Phylo::PAML | |
30 (-file => "./results/mlc", -dir => "./results/"); | |
31 | |
32 # get the first/next result; a Bio::Tools::Phylo::PAML::Result object, | |
33 # which isa Bio::SeqAnalysisResultI object. | |
34 my $result = $parser->next_result(); | |
35 | |
36 # get the sequences used in the analysis; returns Bio::PrimarySeq | |
37 # objects (OTU = Operational Taxonomic Unit). | |
38 my @otus = $result->get_seqs(); | |
39 | |
40 # codon summary: codon usage of each sequence [ arrayref of { | |
41 # hashref of counts for each codon } for each sequence and the | |
42 # overall sum ], and positional nucleotide distribution [ arrayref | |
43 # of { hashref of frequencies for each nucleotide } for each | |
44 # sequence and overall frequencies ]: | |
45 my ($codonusage, $ntdist) = $result->get_codon_summary(); | |
46 | |
47 # example manipulations of $codonusage and $ntdist: | |
48 printf "There were %d '%s' codons in the first seq (%s)\n", | |
49 $codonusage->[0]->{AAA}, 'AAA', $otus[0]->id(); | |
50 printf "There were %d '%s' codons used in all the sequences\n", | |
51 $codonusage->[$#{$codonusage}]->{AAA}, 'AAA'; | |
52 printf "Nucleotide '%c' was present %g of the time in seq %s\n", | |
53 'A', $ntdist->[1]->{A}, $otus[1]->id(); | |
54 | |
55 # get Nei & Gojobori dN/dS matrix: | |
56 my $NGmatrix = $result->get_NGmatrix(); | |
57 | |
58 # get ML-estimated dN/dS matrix, if calculated; this corresponds to | |
59 # the runmode = -2, pairwise comparison usage of codeml | |
60 my $MLmatrix = $result->get_MLmatrix(); | |
61 | |
62 # These matrices are length(@otu) x length(@otu) "strict lower | |
63 # triangle" 2D-matrices, which means that the diagonal and | |
64 # everything above it is undefined. Each of the defined cells is a | |
65 # hashref of estimates for "dN", "dS", "omega" (dN/dS ratio), "t", | |
66 # "S" and "N". If a ML matrix, "lnL" will also be defined. | |
67 printf "The omega ratio for sequences %s vs %s was: %g\n", | |
68 $otus[0]->id, $otus[1]->id, $MLmatrix->[0]->[1]->{omega}; | |
69 | |
70 # with a little work, these matrices could also be passed to | |
71 # Bio::Tools::Run::Phylip::Neighbor, or other similar tree-building | |
72 # method that accepts a matrix of "distances" (using the LOWTRI | |
73 # option): | |
74 my $distmat = [ map { [ map { $$_{omega} } @$_ ] } @$MLmatrix ]; | |
75 | |
76 # for runmode's other than -2, get tree topology with estimated | |
77 # branch lengths; returns a Bio::Tree::TreeI-based tree object with | |
78 # added PAML parameters at each node | |
79 my $tree = $result->get_tree(); | |
80 for my $node ($tree->get_nodes()) { | |
81 # inspect the tree: the "t" (time) parameter is available via | |
82 # $node->branch_length(); all other branch-specific parameters | |
83 # ("omega", "dN", etc.) are available via $node->param('omega'); | |
84 } | |
85 | |
86 # get any general model parameters: kappa (the | |
87 # transition/transversion ratio), NSsites model parameters ("p0", | |
88 # "p1", "w0", "w1", etc.), etc. | |
89 my $params = $result->get_model_params(); | |
90 printf "M1 params: p0 = %g\tp1 = %g\n", $params->{p0}, $params->{p1}; | |
91 | |
92 # for NSsites models, obtain arrayrefs of posterior probabilities | |
93 # for membership in each class for every position; probabilities | |
94 # correspond to classes w0, w1, ... etc. | |
95 my @probs = $result->get_posteriors(); | |
96 | |
97 # find, say, positively selected sites! | |
98 if ($params->{w2} > 1) { | |
99 for (my $i = 0; $i < @probs ; $i++) { | |
100 if ($probs[$i]->[2] > 0.5) { | |
101 # assumes model M1: three w's, w0, w1 and w2 (positive selection) | |
102 printf "position %d: (%g prob, %g omega, %g mean w)\n", | |
103 $i, $probs[$i]->[2], $params->{w2}, $probs[$i]->[3]; | |
104 } | |
105 } | |
106 } else { print "No positive selection found!\n"; } | |
107 | |
108 =head1 DESCRIPTION | |
109 | |
110 This module is used to parse the output from the PAML programs codeml, | |
111 baseml, basemlg, codemlsites and yn00. You can use the | |
112 Bio::Tools::Run::Phylo::PAML::* modules to actually run some of the | |
113 PAML programs, but this module is only useful to parse the output. | |
114 | |
115 =head1 FEEDBACK | |
116 | |
117 =head2 Mailing Lists | |
118 | |
119 User feedback is an integral part of the evolution of this and other | |
120 Bioperl modules. Send your comments and suggestions preferably to | |
121 the Bioperl mailing list. Your participation is much appreciated. | |
122 | |
123 bioperl-l@bioperl.org - General discussion | |
124 http://bioperl.org/MailList.shtml - About the mailing lists | |
125 | |
126 =head2 Reporting Bugs | |
127 | |
128 Report bugs to the Bioperl bug tracking system to help us keep track | |
129 of the bugs and their resolution. Bug reports can be submitted via | |
130 email or the web: | |
131 | |
132 bioperl-bugs@bioperl.org | |
133 http://bugzilla.bioperl.org/ | |
134 | |
135 =head1 AUTHOR - Jason Stajich, Aaron Mackey | |
136 | |
137 Email jason@bioperl.org | |
138 Email amackey@virginia.edu | |
139 | |
140 =head1 TODO | |
141 | |
142 check output from pre 1.12 | |
143 | |
144 =head1 APPENDIX | |
145 | |
146 The rest of the documentation details each of the object methods. | |
147 Internal methods are usually preceded with a _ | |
148 | |
149 =cut | |
150 | |
151 | |
152 # Let the code begin... | |
153 | |
154 | |
155 package Bio::Tools::Phylo::PAML; | |
156 use vars qw(@ISA); | |
157 use strict; | |
158 | |
159 # Object preamble - inherits from Bio::Root::Root | |
160 | |
161 use Bio::Root::Root; | |
162 use Bio::AnalysisParserI; | |
163 use Bio::Root::IO; | |
164 | |
165 @ISA = qw(Bio::Root::Root Bio::Root::IO Bio::AnalysisParserI); | |
166 | |
167 # other objects used: | |
168 use IO::String; | |
169 use Bio::TreeIO; | |
170 use Bio::Tools::Phylo::PAML::Result; | |
171 use Bio::PrimarySeq; | |
172 | |
173 =head2 new | |
174 | |
175 Title : new | |
176 Usage : my $obj = new Bio::Tools::Phylo::PAML(%args); | |
177 Function: Builds a new Bio::Tools::Phylo::PAML object | |
178 Returns : Bio::Tools::Phylo::PAML | |
179 Args : Hash of options: -file, -fh, -dir | |
180 -file (or -fh) should contain the contents of the PAML | |
181 outfile; -dir is the (optional) name of the directory in | |
182 which the PAML program was run (and includes other | |
183 PAML-generated files from which we can try to gather data) | |
184 | |
185 =cut | |
186 | |
187 sub new { | |
188 | |
189 my ($class, @args) = @_; | |
190 | |
191 my $self = $class->SUPER::new(@args); | |
192 $self->_initialize_io(@args); | |
193 | |
194 my ($dir) = $self->_rearrange([qw(DIR)], @args); | |
195 $self->{_dir} = $dir if defined $dir; | |
196 | |
197 return $self; | |
198 } | |
199 | |
200 =head2 Implement Bio::AnalysisParserI interface | |
201 | |
202 =cut | |
203 | |
204 =head2 next_result | |
205 | |
206 Title : next_result | |
207 Usage : $result = $obj->next_result(); | |
208 Function: Returns the next result available from the input, or | |
209 undef if there are no more results. | |
210 Example : | |
211 Returns : a Bio::Tools::Phylo::PAML::Result object | |
212 Args : none | |
213 | |
214 =cut | |
215 | |
216 sub next_result { | |
217 | |
218 my ($self) = @_; | |
219 | |
220 my %data; | |
221 # get the various codon and other sequence summary data, if necessary: | |
222 $self->_parse_summary | |
223 unless ($self->{'_summary'} && !$self->{'_summary'}->{'multidata'}); | |
224 | |
225 # OK, depending on seqtype and runmode now, one of a few things can happen: | |
226 my $seqtype = $self->{'_summary'}->{'seqtype'}; | |
227 if ($seqtype eq 'CODONML' || $seqtype eq 'AAML') { | |
228 while ($_ = $self->_readline) { | |
229 if ($seqtype eq 'CODONML' && | |
230 m/^pairwise comparison, codon frequencies:/o) { | |
231 | |
232 # runmode = -2, CODONML | |
233 $self->_pushback($_); | |
234 %data = $self->_parse_PairwiseCodon; | |
235 last; | |
236 | |
237 } elsif ($seqtype eq 'AAML' && m/^ML distances of aa seqs\.$/o) { | |
238 | |
239 # runmode = -2, AAML | |
240 $self->throw( -class => 'Bio::Root::NotImplemented', | |
241 -text => "Pairwise AA not yet implemented!" | |
242 ); | |
243 | |
244 # $self->_pushback($_); | |
245 # %data = $self->_parse_PairwiseAA; | |
246 # last; | |
247 } elsif (m/^Model \d+: /o) { | |
248 | |
249 # NSSitesBatch | |
250 $self->throw( -class => 'Bio::Root::NotImplemented', | |
251 -text => "NSsitesBatch not yet implemented!" | |
252 ); | |
253 | |
254 # $self->_pushback($_); | |
255 # %data = $self->_parse_NSsitesBatch; | |
256 # last; | |
257 | |
258 } elsif (m/^TREE/) { | |
259 | |
260 # runmode = 0 | |
261 $self->_pushback($_); | |
262 %data = $self->_parse_Forestry; | |
263 last; | |
264 | |
265 } elsif (m/Heuristic tree search by stepwise addition$/o) { | |
266 | |
267 # runmode = 3 | |
268 $self->throw( -class => 'Bio::Root::NotImplemented', | |
269 -text => "StepwiseAddition not yet implemented!" | |
270 ); | |
271 | |
272 # $self->_pushback($_); | |
273 # %data = $self->_parse_StepwiseAddition; | |
274 # last; | |
275 | |
276 } elsif (m/Heuristic tree search by NNI perturbation$/o) { | |
277 | |
278 # runmode = 4 | |
279 $self->throw( -class => 'Bio::Root::NotImplemented', | |
280 -text => "NNI Perturbation not yet implemented!" | |
281 ); | |
282 | |
283 # $self->_pushback($_); | |
284 # %data = $self->_parse_Perturbation; | |
285 # last; | |
286 | |
287 } elsif (m/^stage 0:/o) { | |
288 | |
289 # runmode = (1 or 2) | |
290 $self->throw( -class => 'Bio::Root::NotImplemented', | |
291 -text => "StarDecomposition not yet implemented!" | |
292 ); | |
293 | |
294 # $self->_pushback($_); | |
295 # %data = $self->_parse_StarDecomposition; | |
296 # last; | |
297 | |
298 } | |
299 } | |
300 } elsif ($seqtype eq 'BASEML') { | |
301 } elsif ($seqtype eq 'YN00') { | |
302 while ($_ = $self->_readline) { | |
303 if( m/^Estimation by the method/ ) { | |
304 $self->_pushback($_); | |
305 %data = $self->_parse_YN_Pairwise; | |
306 last; | |
307 } | |
308 } | |
309 } | |
310 if (%data) { | |
311 $data{'-version'} = $self->{'_summary'}->{'version'}; | |
312 $data{'-seqs'} = $self->{'_summary'}->{'seqs'}; | |
313 $data{'-patterns'} = $self->{'_summary'}->{'patterns'}; | |
314 $data{'-ngmatrix'} = $self->{'_summary'}->{'ngmatrix'}; | |
315 $data{'-codonpos'} = $self->{'_summary'}->{'codonposition'}; | |
316 $data{'-codonfreq'} = $self->{'_summary'}->{'codonfreqs'}; | |
317 return new Bio::Tools::Phylo::PAML::Result %data; | |
318 } else { | |
319 return undef; | |
320 } | |
321 } | |
322 | |
323 | |
324 sub _parse_summary { | |
325 | |
326 my ($self) = @_; | |
327 | |
328 # Depending on whether verbose > 0 or not, and whether the result | |
329 # set comes from a multi-data run, the first few lines could be | |
330 # various things; we're going to throw away any sequence data | |
331 # here, since we'll get it later anyways | |
332 | |
333 # multidata ? : \n\nData set 1\n | |
334 # verbose ? : cleandata ? : \nBefore deleting alignment gaps. \d sites\n | |
335 # [ sequence printout ] | |
336 # \nAfter deleting gaps. \d sites\n" | |
337 # : [ sequence printout ] | |
338 # CODONML (in paml 3.12 February 2002) <<-- what we want to see! | |
339 | |
340 my $SEQTYPES = qr( (?: (?: CODON | AA | BASE | CODON2AA ) ML ) | YN00 )x; | |
341 while ($_ = $self->_readline) { | |
342 if ( m/^($SEQTYPES) \s+ # seqtype: CODONML, AAML, BASEML, CODON2AAML, YN00, etc | |
343 (?: \(in \s+ ([^\)]+?) \s* \) \s* )? # version: "paml 3.12 February 2002"; not present < 3.1 or YN00 | |
344 (\S+) \s* # tree filename | |
345 (?: (.+?) )? # model description (not there in YN00) | |
346 \s* $ # trim any trailing space | |
347 /ox | |
348 ) { | |
349 | |
350 @{$self->{_summary}}{qw(seqtype version treefile model)} = ($1, | |
351 $2, | |
352 $3, | |
353 $4); | |
354 last; | |
355 | |
356 } elsif (m/^Data set \d$/o) { | |
357 $self->{'_summary'} = {}; | |
358 $self->{'_summary'}->{'multidata'}++; | |
359 } | |
360 } | |
361 | |
362 unless (defined $self->{'_summary'}->{'seqtype'}) { | |
363 $self->throw( -class => 'Bio::Root::NotImplemented', | |
364 -text => 'Unknown format of PAML output'); | |
365 } | |
366 | |
367 | |
368 my $seqtype = $self->{'_summary'}->{'seqtype'}; | |
369 $self->debug( "seqtype is $seqtype\n"); | |
370 if ($seqtype eq "CODONML") { | |
371 | |
372 $self->_parse_inputparams(); # settings from the .ctl file that get printed | |
373 $self->_parse_patterns(); # codon patterns - not very interesting | |
374 $self->_parse_seqs(); # the sequences data used for analysis | |
375 $self->_parse_codoncts(); # counts and distributions of codon/nt usage | |
376 $self->_parse_codon_freqs(); # codon frequencies | |
377 $self->_parse_distmat(); # NG distance matrices | |
378 | |
379 | |
380 } elsif ($seqtype eq "AAML") { | |
381 $self->throw( -class => 'Bio::Root::NotImplemented', | |
382 -text => 'AAML parsing not yet implemented!'); | |
383 } elsif ($seqtype eq "CODON2AAML") { | |
384 $self->throw( -class => 'Bio::Root::NotImplemented', | |
385 -text => 'CODON2AAML parsing not yet implemented!'); | |
386 } elsif ($seqtype eq "BASEML") { | |
387 $self->throw( -class => 'Bio::Root::NotImplemented', | |
388 -text => 'BASEML parsing not yet implemented!'); | |
389 } elsif ($seqtype eq "YN00") { | |
390 $self->_parse_codon_freqs(); | |
391 $self->_parse_codoncts(); | |
392 $self->_parse_distmat(); # NG distance matrices | |
393 | |
394 } else { | |
395 $self->throw( -class => 'Bio::Root::NotImplemented', | |
396 -text => 'Unknown seqtype, not yet implemented!', | |
397 -value => $seqtype | |
398 ); | |
399 } | |
400 | |
401 } | |
402 | |
403 | |
404 sub _parse_inputparams { | |
405 my ($self) = @_; | |
406 | |
407 } | |
408 | |
409 sub _parse_codon_freqs { | |
410 my ($self) = @_; | |
411 my ($okay,$done) = (0,0); | |
412 while( defined($_ = $self->_readline ) ) { | |
413 if( /^Nei/ ) { $self->_pushback($_); last } | |
414 last if( $done); | |
415 next if ( /^\s+/); | |
416 next unless($okay || /^Codon position x base \(3x4\) table\, overall/ ); | |
417 $okay = 1; | |
418 if( s/^position\s+(\d+):\s+// ) { | |
419 my $pos = $1; | |
420 s/\s+$//; | |
421 my @bases = split; | |
422 foreach my $str ( @bases ) { | |
423 my ( $base,$freq) = split(/:/,$str,2); | |
424 $self->{'_summary'}->{'codonposition'}->[$pos-1]->{$base} = $freq; | |
425 } | |
426 $done = 1 if $pos == 3; | |
427 } | |
428 } | |
429 $done = 0; | |
430 while( defined( $_ = $self->_readline) ) { | |
431 if( /^Nei\s\&\sGojobori/ ) { $self->_pushback($_); last } | |
432 last if ( $done ); | |
433 if( /^Codon frequencies under model, for use in evolver:/ ){ | |
434 while( defined( $_ = $self->_readline) ) { | |
435 last if( /^\s+$/ ); | |
436 s/^\s+//; | |
437 s/\s+$//; | |
438 push @{$self->{'_summary'}->{'codonfreqs'}},[split]; | |
439 } | |
440 $done = 1; | |
441 } | |
442 } | |
443 } | |
444 | |
445 sub _parse_patterns { | |
446 my ($self) = @_; | |
447 my ($patternct,@patterns,$ns,$ls); | |
448 while( defined($_ = $self->_readline) ) { | |
449 if( $patternct ) { | |
450 # last unless ( @patterns == $patternct ); | |
451 last if( /^\s+$/ ); | |
452 s/^\s+//; | |
453 push @patterns, split; | |
454 } elsif( /^ns\s+\=\s*(\d+)\s+ls\s+\=\s*(\d+)/ ) { | |
455 ($ns,$ls) = ($1,$2); | |
456 } elsif( /^\# site patterns \=\s*(\d+)/ ) { | |
457 $patternct = $1; | |
458 } else { | |
459 # $self->debug("Unknown line: $_"); | |
460 } | |
461 } | |
462 $self->{'_summary'}->{'patterns'} = { -patterns => \@patterns, | |
463 -ns => $ns, | |
464 -ls => $ls}; | |
465 } | |
466 | |
467 sub _parse_seqs { | |
468 | |
469 # this should in fact be packed into a Bio::SimpleAlign object instead of | |
470 # an array but we'll stay with this for now | |
471 my ($self) = @_; | |
472 my (@firstseq,@seqs); | |
473 while( defined ($_ = $self->_readline) ) { | |
474 last if( /^\s+$/ && @seqs > 0 ); | |
475 next if ( /^\s+$/ ); | |
476 next if( /^\d+\s+$/ ); | |
477 | |
478 my ($name,$seqstr) = split(/\s+/,$_,2); | |
479 $seqstr =~ s/\s+//g; # remove whitespace | |
480 unless( @firstseq) { | |
481 @firstseq = split(//,$seqstr); | |
482 push @seqs, new Bio::PrimarySeq(-id => $name, | |
483 -seq => $seqstr); | |
484 } else { | |
485 | |
486 my $i = 0; | |
487 my $v; | |
488 while(($v = index($seqstr,'.',$i)) >= $i ) { | |
489 # replace the '.' with the correct seq from the | |
490 substr($seqstr,$v,1,$firstseq[$v]); | |
491 $i = $v; | |
492 } | |
493 $self->debug( "adding seq $seqstr\n"); | |
494 push @seqs, new Bio::PrimarySeq(-id => $name, | |
495 -seq => $seqstr); | |
496 } | |
497 } | |
498 $self->{'_summary'}->{'seqs'} = \@seqs; | |
499 } | |
500 | |
501 sub _parse_codoncts { } | |
502 | |
503 sub _parse_distmat { | |
504 my ($self) = @_; | |
505 my @results; | |
506 while( defined ($_ = $self->_readline) ) { | |
507 next if/^\s+$/; | |
508 last; | |
509 } | |
510 return unless (/^Nei\s*\&\s*Gojobori/); | |
511 # skip the next 3 lines | |
512 if( $self->{'_summary'}->{'seqtype'} eq 'CODONML' ) { | |
513 $self->_readline; | |
514 $self->_readline; | |
515 $self->_readline; | |
516 } | |
517 my $seqct = 0; | |
518 while( defined ($_ = $self->_readline ) ) { | |
519 last if( /^\s+$/ && exists $self->{'_summary'}->{'ngmatrix'} ); | |
520 next if( /^\s+$/ ); | |
521 chomp; | |
522 my ($seq,$rest) = split(/\s+/,$_,2); | |
523 my $j = 0; | |
524 while( $rest =~ | |
525 /(\-?\d+(\.\d+)?)\s*\(\-?(\d+(\.\d+)?)\s+(\-?\d+(\.\d+)?)\)/g ) { | |
526 $self->{'_summary'}->{'ngmatrix'}->[$j++]->[$seqct] = | |
527 { 'omega' => $1, | |
528 'dN' => $3, | |
529 'dS' => $5 }; | |
530 } | |
531 $seqct++; | |
532 } | |
533 } | |
534 | |
535 sub _parse_PairwiseCodon { | |
536 my ($self) = @_; | |
537 my @result; | |
538 my ($a,$b,$log,$model); | |
539 while( defined( $_ = $self->_readline) ) { | |
540 if( /^pairwise comparison, codon frequencies\:\s*(\S+)\./) { | |
541 $model = $1; | |
542 } elsif( /^(\d+)\s+\((\S+)\)\s+\.\.\.\s+(\d+)\s+\((\S+)\)/ ) { | |
543 ($a,$b) = ($1,$3); | |
544 } elsif( /^lnL\s+\=\s*(\-?\d+(\.\d+)?)/ ) { | |
545 $log = $1; | |
546 } elsif( m/^t\=\s*(\d+(\.\d+)?)\s+ | |
547 S\=\s*(\d+(\.\d+)?)\s+ | |
548 N\=\s*(\d+(\.\d+)?)\s+ | |
549 dN\/dS\=\s*(\d+(\.\d+)?)\s+ | |
550 dN\=\s*(\d+(\.\d+)?)\s+ | |
551 dS\=\s*(\d+(\.\d+)?)/ox ) { | |
552 $result[$b-1]->[$a-1] = { | |
553 'lnL' => $log, | |
554 't' => $1, | |
555 'S' => $3, | |
556 'N' => $5, | |
557 'omega' => $7, | |
558 'dN' => $9, | |
559 'dS' => $11 }; | |
560 } elsif( /^\s+$/ ) { | |
561 next; | |
562 } elsif( /^\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)/ ) { | |
563 } else { | |
564 $self->debug( "unknown line: $_"); | |
565 } | |
566 } | |
567 return ( -mlmatrix => \@result); | |
568 } | |
569 | |
570 sub _parse_YN_Pairwise { | |
571 my ($self) = @_; | |
572 my @result; | |
573 while( defined( $_ = $self->_readline) ) { | |
574 last if( /^seq\.\s+seq\./); | |
575 } | |
576 while( defined( $_ = $self->_readline) ) { | |
577 if( m/^\s+(\d+)\s+ # seq # | |
578 (\d+)\s+ # seq # | |
579 (\d+(\.\d+))\s+ # S | |
580 (\d+(\.\d+))\s+ # N | |
581 (\d+(\.\d+))\s+ # t | |
582 (\d+(\.\d+))\s+ # kappa | |
583 (\d+(\.\d+))\s+ # omega | |
584 (\d+(\.\d+))\s+ # dN | |
585 \+\-\s+ | |
586 (\d+(\.\d+))\s+ # dN SE | |
587 (\d+(\.\d+))\s+ # dS | |
588 \+\-\s+ | |
589 (\d+(\.\d+))\s+ # dS SE | |
590 /ox | |
591 ) { | |
592 | |
593 $result[$2-1]->[$1-1] = { | |
594 'S' => $3, | |
595 'N' => $5, | |
596 't' => $7, | |
597 'kappa' => $9, | |
598 'omega' => $11, | |
599 'dN' => $13, | |
600 'dN_SE' => $15, | |
601 'dS' => $17, | |
602 'dS_SE' => $19, | |
603 }; | |
604 } elsif( /^\s+$/ ) { | |
605 next; | |
606 } | |
607 } | |
608 return ( -mlmatrix => \@result); | |
609 } | |
610 | |
611 sub _parse_Forestry { | |
612 | |
613 my ($self) = @_; | |
614 my %data = (-trees => []); | |
615 | |
616 | |
617 return %data | |
618 }; | |
619 | |
620 # parse the mlc file | |
621 | |
622 sub _parse_mlc { | |
623 my ($self) = @_; | |
624 my %data; | |
625 while( defined( $_ = $self->_readline) ) { | |
626 $self->debug( "mlc parse: $_"); | |
627 # Aaron this is where the parsing should begin | |
628 | |
629 # I'll do the Tree objects if you like - I'd do it by building | |
630 # an IO::String for the the tree data or does it make more | |
631 # sense to parse this out of a collection of files? | |
632 if( /^TREE/ ) { | |
633 # ... | |
634 while( defined($_ = $self->_readline) ) { | |
635 if( /^\(/) { | |
636 my $treestr = new IO::String($_); | |
637 my $treeio = new Bio::TreeIO(-fh => $treestr, | |
638 -format => 'newick'); | |
639 # this is very tenative here!! | |
640 push @{$self->{'_trees'}}, $treeio->next_tree; | |
641 } | |
642 } | |
643 } | |
644 } | |
645 } | |
646 | |
647 1; |