0
|
1 # PAML.pm,v 1.3 2002/06/20 18:50:37 amackey Exp
|
|
2 #
|
|
3 # BioPerl module for Bio::Tools::Phylo::PAML
|
|
4 #
|
|
5 # Cared for by Jason Stajich <jason@bioperl.org>
|
|
6 #
|
|
7 # Copyright Jason Stajich, Aaron J Mackey
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10
|
|
11 # POD documentation - main docs before the code
|
|
12
|
|
13 =head1 NAME
|
|
14
|
|
15 Bio::Tools::Phylo::PAML - Parses output from the PAML programs codeml,
|
|
16 baseml, basemlg, codemlsites and yn00
|
|
17
|
|
18 =head1 SYNOPSIS
|
|
19
|
|
20 #!/usr/bin/perl -Tw
|
|
21 use strict;
|
|
22
|
|
23 use Bio::Tools::Phylo::PAML;
|
|
24
|
|
25 # need to specify the output file name (or a fh) (defaults to
|
|
26 # -file => "codeml.mlc"); also, optionally, the directory in which
|
|
27 # the other result files (rst, 2ML.dS, etc) may be found (defaults
|
|
28 # to "./")
|
|
29 my $parser = new Bio::Tools::Phylo::PAML
|
|
30 (-file => "./results/mlc", -dir => "./results/");
|
|
31
|
|
32 # get the first/next result; a Bio::Tools::Phylo::PAML::Result object,
|
|
33 # which isa Bio::SeqAnalysisResultI object.
|
|
34 my $result = $parser->next_result();
|
|
35
|
|
36 # get the sequences used in the analysis; returns Bio::PrimarySeq
|
|
37 # objects (OTU = Operational Taxonomic Unit).
|
|
38 my @otus = $result->get_seqs();
|
|
39
|
|
40 # codon summary: codon usage of each sequence [ arrayref of {
|
|
41 # hashref of counts for each codon } for each sequence and the
|
|
42 # overall sum ], and positional nucleotide distribution [ arrayref
|
|
43 # of { hashref of frequencies for each nucleotide } for each
|
|
44 # sequence and overall frequencies ]:
|
|
45 my ($codonusage, $ntdist) = $result->get_codon_summary();
|
|
46
|
|
47 # example manipulations of $codonusage and $ntdist:
|
|
48 printf "There were %d '%s' codons in the first seq (%s)\n",
|
|
49 $codonusage->[0]->{AAA}, 'AAA', $otus[0]->id();
|
|
50 printf "There were %d '%s' codons used in all the sequences\n",
|
|
51 $codonusage->[$#{$codonusage}]->{AAA}, 'AAA';
|
|
52 printf "Nucleotide '%c' was present %g of the time in seq %s\n",
|
|
53 'A', $ntdist->[1]->{A}, $otus[1]->id();
|
|
54
|
|
55 # get Nei & Gojobori dN/dS matrix:
|
|
56 my $NGmatrix = $result->get_NGmatrix();
|
|
57
|
|
58 # get ML-estimated dN/dS matrix, if calculated; this corresponds to
|
|
59 # the runmode = -2, pairwise comparison usage of codeml
|
|
60 my $MLmatrix = $result->get_MLmatrix();
|
|
61
|
|
62 # These matrices are length(@otu) x length(@otu) "strict lower
|
|
63 # triangle" 2D-matrices, which means that the diagonal and
|
|
64 # everything above it is undefined. Each of the defined cells is a
|
|
65 # hashref of estimates for "dN", "dS", "omega" (dN/dS ratio), "t",
|
|
66 # "S" and "N". If a ML matrix, "lnL" will also be defined.
|
|
67 printf "The omega ratio for sequences %s vs %s was: %g\n",
|
|
68 $otus[0]->id, $otus[1]->id, $MLmatrix->[0]->[1]->{omega};
|
|
69
|
|
70 # with a little work, these matrices could also be passed to
|
|
71 # Bio::Tools::Run::Phylip::Neighbor, or other similar tree-building
|
|
72 # method that accepts a matrix of "distances" (using the LOWTRI
|
|
73 # option):
|
|
74 my $distmat = [ map { [ map { $$_{omega} } @$_ ] } @$MLmatrix ];
|
|
75
|
|
76 # for runmode's other than -2, get tree topology with estimated
|
|
77 # branch lengths; returns a Bio::Tree::TreeI-based tree object with
|
|
78 # added PAML parameters at each node
|
|
79 my $tree = $result->get_tree();
|
|
80 for my $node ($tree->get_nodes()) {
|
|
81 # inspect the tree: the "t" (time) parameter is available via
|
|
82 # $node->branch_length(); all other branch-specific parameters
|
|
83 # ("omega", "dN", etc.) are available via $node->param('omega');
|
|
84 }
|
|
85
|
|
86 # get any general model parameters: kappa (the
|
|
87 # transition/transversion ratio), NSsites model parameters ("p0",
|
|
88 # "p1", "w0", "w1", etc.), etc.
|
|
89 my $params = $result->get_model_params();
|
|
90 printf "M1 params: p0 = %g\tp1 = %g\n", $params->{p0}, $params->{p1};
|
|
91
|
|
92 # for NSsites models, obtain arrayrefs of posterior probabilities
|
|
93 # for membership in each class for every position; probabilities
|
|
94 # correspond to classes w0, w1, ... etc.
|
|
95 my @probs = $result->get_posteriors();
|
|
96
|
|
97 # find, say, positively selected sites!
|
|
98 if ($params->{w2} > 1) {
|
|
99 for (my $i = 0; $i < @probs ; $i++) {
|
|
100 if ($probs[$i]->[2] > 0.5) {
|
|
101 # assumes model M1: three w's, w0, w1 and w2 (positive selection)
|
|
102 printf "position %d: (%g prob, %g omega, %g mean w)\n",
|
|
103 $i, $probs[$i]->[2], $params->{w2}, $probs[$i]->[3];
|
|
104 }
|
|
105 }
|
|
106 } else { print "No positive selection found!\n"; }
|
|
107
|
|
108 =head1 DESCRIPTION
|
|
109
|
|
110 This module is used to parse the output from the PAML programs codeml,
|
|
111 baseml, basemlg, codemlsites and yn00. You can use the
|
|
112 Bio::Tools::Run::Phylo::PAML::* modules to actually run some of the
|
|
113 PAML programs, but this module is only useful to parse the output.
|
|
114
|
|
115 =head1 FEEDBACK
|
|
116
|
|
117 =head2 Mailing Lists
|
|
118
|
|
119 User feedback is an integral part of the evolution of this and other
|
|
120 Bioperl modules. Send your comments and suggestions preferably to
|
|
121 the Bioperl mailing list. Your participation is much appreciated.
|
|
122
|
|
123 bioperl-l@bioperl.org - General discussion
|
|
124 http://bioperl.org/MailList.shtml - About the mailing lists
|
|
125
|
|
126 =head2 Reporting Bugs
|
|
127
|
|
128 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
129 of the bugs and their resolution. Bug reports can be submitted via
|
|
130 email or the web:
|
|
131
|
|
132 bioperl-bugs@bioperl.org
|
|
133 http://bugzilla.bioperl.org/
|
|
134
|
|
135 =head1 AUTHOR - Jason Stajich, Aaron Mackey
|
|
136
|
|
137 Email jason@bioperl.org
|
|
138 Email amackey@virginia.edu
|
|
139
|
|
140 =head1 TODO
|
|
141
|
|
142 check output from pre 1.12
|
|
143
|
|
144 =head1 APPENDIX
|
|
145
|
|
146 The rest of the documentation details each of the object methods.
|
|
147 Internal methods are usually preceded with a _
|
|
148
|
|
149 =cut
|
|
150
|
|
151
|
|
152 # Let the code begin...
|
|
153
|
|
154
|
|
155 package Bio::Tools::Phylo::PAML;
|
|
156 use vars qw(@ISA);
|
|
157 use strict;
|
|
158
|
|
159 # Object preamble - inherits from Bio::Root::Root
|
|
160
|
|
161 use Bio::Root::Root;
|
|
162 use Bio::AnalysisParserI;
|
|
163 use Bio::Root::IO;
|
|
164
|
|
165 @ISA = qw(Bio::Root::Root Bio::Root::IO Bio::AnalysisParserI);
|
|
166
|
|
167 # other objects used:
|
|
168 use IO::String;
|
|
169 use Bio::TreeIO;
|
|
170 use Bio::Tools::Phylo::PAML::Result;
|
|
171 use Bio::PrimarySeq;
|
|
172
|
|
173 =head2 new
|
|
174
|
|
175 Title : new
|
|
176 Usage : my $obj = new Bio::Tools::Phylo::PAML(%args);
|
|
177 Function: Builds a new Bio::Tools::Phylo::PAML object
|
|
178 Returns : Bio::Tools::Phylo::PAML
|
|
179 Args : Hash of options: -file, -fh, -dir
|
|
180 -file (or -fh) should contain the contents of the PAML
|
|
181 outfile; -dir is the (optional) name of the directory in
|
|
182 which the PAML program was run (and includes other
|
|
183 PAML-generated files from which we can try to gather data)
|
|
184
|
|
185 =cut
|
|
186
|
|
187 sub new {
|
|
188
|
|
189 my ($class, @args) = @_;
|
|
190
|
|
191 my $self = $class->SUPER::new(@args);
|
|
192 $self->_initialize_io(@args);
|
|
193
|
|
194 my ($dir) = $self->_rearrange([qw(DIR)], @args);
|
|
195 $self->{_dir} = $dir if defined $dir;
|
|
196
|
|
197 return $self;
|
|
198 }
|
|
199
|
|
200 =head2 Implement Bio::AnalysisParserI interface
|
|
201
|
|
202 =cut
|
|
203
|
|
204 =head2 next_result
|
|
205
|
|
206 Title : next_result
|
|
207 Usage : $result = $obj->next_result();
|
|
208 Function: Returns the next result available from the input, or
|
|
209 undef if there are no more results.
|
|
210 Example :
|
|
211 Returns : a Bio::Tools::Phylo::PAML::Result object
|
|
212 Args : none
|
|
213
|
|
214 =cut
|
|
215
|
|
216 sub next_result {
|
|
217
|
|
218 my ($self) = @_;
|
|
219
|
|
220 my %data;
|
|
221 # get the various codon and other sequence summary data, if necessary:
|
|
222 $self->_parse_summary
|
|
223 unless ($self->{'_summary'} && !$self->{'_summary'}->{'multidata'});
|
|
224
|
|
225 # OK, depending on seqtype and runmode now, one of a few things can happen:
|
|
226 my $seqtype = $self->{'_summary'}->{'seqtype'};
|
|
227 if ($seqtype eq 'CODONML' || $seqtype eq 'AAML') {
|
|
228 while ($_ = $self->_readline) {
|
|
229 if ($seqtype eq 'CODONML' &&
|
|
230 m/^pairwise comparison, codon frequencies:/o) {
|
|
231
|
|
232 # runmode = -2, CODONML
|
|
233 $self->_pushback($_);
|
|
234 %data = $self->_parse_PairwiseCodon;
|
|
235 last;
|
|
236
|
|
237 } elsif ($seqtype eq 'AAML' && m/^ML distances of aa seqs\.$/o) {
|
|
238
|
|
239 # runmode = -2, AAML
|
|
240 $self->throw( -class => 'Bio::Root::NotImplemented',
|
|
241 -text => "Pairwise AA not yet implemented!"
|
|
242 );
|
|
243
|
|
244 # $self->_pushback($_);
|
|
245 # %data = $self->_parse_PairwiseAA;
|
|
246 # last;
|
|
247 } elsif (m/^Model \d+: /o) {
|
|
248
|
|
249 # NSSitesBatch
|
|
250 $self->throw( -class => 'Bio::Root::NotImplemented',
|
|
251 -text => "NSsitesBatch not yet implemented!"
|
|
252 );
|
|
253
|
|
254 # $self->_pushback($_);
|
|
255 # %data = $self->_parse_NSsitesBatch;
|
|
256 # last;
|
|
257
|
|
258 } elsif (m/^TREE/) {
|
|
259
|
|
260 # runmode = 0
|
|
261 $self->_pushback($_);
|
|
262 %data = $self->_parse_Forestry;
|
|
263 last;
|
|
264
|
|
265 } elsif (m/Heuristic tree search by stepwise addition$/o) {
|
|
266
|
|
267 # runmode = 3
|
|
268 $self->throw( -class => 'Bio::Root::NotImplemented',
|
|
269 -text => "StepwiseAddition not yet implemented!"
|
|
270 );
|
|
271
|
|
272 # $self->_pushback($_);
|
|
273 # %data = $self->_parse_StepwiseAddition;
|
|
274 # last;
|
|
275
|
|
276 } elsif (m/Heuristic tree search by NNI perturbation$/o) {
|
|
277
|
|
278 # runmode = 4
|
|
279 $self->throw( -class => 'Bio::Root::NotImplemented',
|
|
280 -text => "NNI Perturbation not yet implemented!"
|
|
281 );
|
|
282
|
|
283 # $self->_pushback($_);
|
|
284 # %data = $self->_parse_Perturbation;
|
|
285 # last;
|
|
286
|
|
287 } elsif (m/^stage 0:/o) {
|
|
288
|
|
289 # runmode = (1 or 2)
|
|
290 $self->throw( -class => 'Bio::Root::NotImplemented',
|
|
291 -text => "StarDecomposition not yet implemented!"
|
|
292 );
|
|
293
|
|
294 # $self->_pushback($_);
|
|
295 # %data = $self->_parse_StarDecomposition;
|
|
296 # last;
|
|
297
|
|
298 }
|
|
299 }
|
|
300 } elsif ($seqtype eq 'BASEML') {
|
|
301 } elsif ($seqtype eq 'YN00') {
|
|
302 while ($_ = $self->_readline) {
|
|
303 if( m/^Estimation by the method/ ) {
|
|
304 $self->_pushback($_);
|
|
305 %data = $self->_parse_YN_Pairwise;
|
|
306 last;
|
|
307 }
|
|
308 }
|
|
309 }
|
|
310 if (%data) {
|
|
311 $data{'-version'} = $self->{'_summary'}->{'version'};
|
|
312 $data{'-seqs'} = $self->{'_summary'}->{'seqs'};
|
|
313 $data{'-patterns'} = $self->{'_summary'}->{'patterns'};
|
|
314 $data{'-ngmatrix'} = $self->{'_summary'}->{'ngmatrix'};
|
|
315 $data{'-codonpos'} = $self->{'_summary'}->{'codonposition'};
|
|
316 $data{'-codonfreq'} = $self->{'_summary'}->{'codonfreqs'};
|
|
317 return new Bio::Tools::Phylo::PAML::Result %data;
|
|
318 } else {
|
|
319 return undef;
|
|
320 }
|
|
321 }
|
|
322
|
|
323
|
|
324 sub _parse_summary {
|
|
325
|
|
326 my ($self) = @_;
|
|
327
|
|
328 # Depending on whether verbose > 0 or not, and whether the result
|
|
329 # set comes from a multi-data run, the first few lines could be
|
|
330 # various things; we're going to throw away any sequence data
|
|
331 # here, since we'll get it later anyways
|
|
332
|
|
333 # multidata ? : \n\nData set 1\n
|
|
334 # verbose ? : cleandata ? : \nBefore deleting alignment gaps. \d sites\n
|
|
335 # [ sequence printout ]
|
|
336 # \nAfter deleting gaps. \d sites\n"
|
|
337 # : [ sequence printout ]
|
|
338 # CODONML (in paml 3.12 February 2002) <<-- what we want to see!
|
|
339
|
|
340 my $SEQTYPES = qr( (?: (?: CODON | AA | BASE | CODON2AA ) ML ) | YN00 )x;
|
|
341 while ($_ = $self->_readline) {
|
|
342 if ( m/^($SEQTYPES) \s+ # seqtype: CODONML, AAML, BASEML, CODON2AAML, YN00, etc
|
|
343 (?: \(in \s+ ([^\)]+?) \s* \) \s* )? # version: "paml 3.12 February 2002"; not present < 3.1 or YN00
|
|
344 (\S+) \s* # tree filename
|
|
345 (?: (.+?) )? # model description (not there in YN00)
|
|
346 \s* $ # trim any trailing space
|
|
347 /ox
|
|
348 ) {
|
|
349
|
|
350 @{$self->{_summary}}{qw(seqtype version treefile model)} = ($1,
|
|
351 $2,
|
|
352 $3,
|
|
353 $4);
|
|
354 last;
|
|
355
|
|
356 } elsif (m/^Data set \d$/o) {
|
|
357 $self->{'_summary'} = {};
|
|
358 $self->{'_summary'}->{'multidata'}++;
|
|
359 }
|
|
360 }
|
|
361
|
|
362 unless (defined $self->{'_summary'}->{'seqtype'}) {
|
|
363 $self->throw( -class => 'Bio::Root::NotImplemented',
|
|
364 -text => 'Unknown format of PAML output');
|
|
365 }
|
|
366
|
|
367
|
|
368 my $seqtype = $self->{'_summary'}->{'seqtype'};
|
|
369 $self->debug( "seqtype is $seqtype\n");
|
|
370 if ($seqtype eq "CODONML") {
|
|
371
|
|
372 $self->_parse_inputparams(); # settings from the .ctl file that get printed
|
|
373 $self->_parse_patterns(); # codon patterns - not very interesting
|
|
374 $self->_parse_seqs(); # the sequences data used for analysis
|
|
375 $self->_parse_codoncts(); # counts and distributions of codon/nt usage
|
|
376 $self->_parse_codon_freqs(); # codon frequencies
|
|
377 $self->_parse_distmat(); # NG distance matrices
|
|
378
|
|
379
|
|
380 } elsif ($seqtype eq "AAML") {
|
|
381 $self->throw( -class => 'Bio::Root::NotImplemented',
|
|
382 -text => 'AAML parsing not yet implemented!');
|
|
383 } elsif ($seqtype eq "CODON2AAML") {
|
|
384 $self->throw( -class => 'Bio::Root::NotImplemented',
|
|
385 -text => 'CODON2AAML parsing not yet implemented!');
|
|
386 } elsif ($seqtype eq "BASEML") {
|
|
387 $self->throw( -class => 'Bio::Root::NotImplemented',
|
|
388 -text => 'BASEML parsing not yet implemented!');
|
|
389 } elsif ($seqtype eq "YN00") {
|
|
390 $self->_parse_codon_freqs();
|
|
391 $self->_parse_codoncts();
|
|
392 $self->_parse_distmat(); # NG distance matrices
|
|
393
|
|
394 } else {
|
|
395 $self->throw( -class => 'Bio::Root::NotImplemented',
|
|
396 -text => 'Unknown seqtype, not yet implemented!',
|
|
397 -value => $seqtype
|
|
398 );
|
|
399 }
|
|
400
|
|
401 }
|
|
402
|
|
403
|
|
404 sub _parse_inputparams {
|
|
405 my ($self) = @_;
|
|
406
|
|
407 }
|
|
408
|
|
409 sub _parse_codon_freqs {
|
|
410 my ($self) = @_;
|
|
411 my ($okay,$done) = (0,0);
|
|
412 while( defined($_ = $self->_readline ) ) {
|
|
413 if( /^Nei/ ) { $self->_pushback($_); last }
|
|
414 last if( $done);
|
|
415 next if ( /^\s+/);
|
|
416 next unless($okay || /^Codon position x base \(3x4\) table\, overall/ );
|
|
417 $okay = 1;
|
|
418 if( s/^position\s+(\d+):\s+// ) {
|
|
419 my $pos = $1;
|
|
420 s/\s+$//;
|
|
421 my @bases = split;
|
|
422 foreach my $str ( @bases ) {
|
|
423 my ( $base,$freq) = split(/:/,$str,2);
|
|
424 $self->{'_summary'}->{'codonposition'}->[$pos-1]->{$base} = $freq;
|
|
425 }
|
|
426 $done = 1 if $pos == 3;
|
|
427 }
|
|
428 }
|
|
429 $done = 0;
|
|
430 while( defined( $_ = $self->_readline) ) {
|
|
431 if( /^Nei\s\&\sGojobori/ ) { $self->_pushback($_); last }
|
|
432 last if ( $done );
|
|
433 if( /^Codon frequencies under model, for use in evolver:/ ){
|
|
434 while( defined( $_ = $self->_readline) ) {
|
|
435 last if( /^\s+$/ );
|
|
436 s/^\s+//;
|
|
437 s/\s+$//;
|
|
438 push @{$self->{'_summary'}->{'codonfreqs'}},[split];
|
|
439 }
|
|
440 $done = 1;
|
|
441 }
|
|
442 }
|
|
443 }
|
|
444
|
|
445 sub _parse_patterns {
|
|
446 my ($self) = @_;
|
|
447 my ($patternct,@patterns,$ns,$ls);
|
|
448 while( defined($_ = $self->_readline) ) {
|
|
449 if( $patternct ) {
|
|
450 # last unless ( @patterns == $patternct );
|
|
451 last if( /^\s+$/ );
|
|
452 s/^\s+//;
|
|
453 push @patterns, split;
|
|
454 } elsif( /^ns\s+\=\s*(\d+)\s+ls\s+\=\s*(\d+)/ ) {
|
|
455 ($ns,$ls) = ($1,$2);
|
|
456 } elsif( /^\# site patterns \=\s*(\d+)/ ) {
|
|
457 $patternct = $1;
|
|
458 } else {
|
|
459 # $self->debug("Unknown line: $_");
|
|
460 }
|
|
461 }
|
|
462 $self->{'_summary'}->{'patterns'} = { -patterns => \@patterns,
|
|
463 -ns => $ns,
|
|
464 -ls => $ls};
|
|
465 }
|
|
466
|
|
467 sub _parse_seqs {
|
|
468
|
|
469 # this should in fact be packed into a Bio::SimpleAlign object instead of
|
|
470 # an array but we'll stay with this for now
|
|
471 my ($self) = @_;
|
|
472 my (@firstseq,@seqs);
|
|
473 while( defined ($_ = $self->_readline) ) {
|
|
474 last if( /^\s+$/ && @seqs > 0 );
|
|
475 next if ( /^\s+$/ );
|
|
476 next if( /^\d+\s+$/ );
|
|
477
|
|
478 my ($name,$seqstr) = split(/\s+/,$_,2);
|
|
479 $seqstr =~ s/\s+//g; # remove whitespace
|
|
480 unless( @firstseq) {
|
|
481 @firstseq = split(//,$seqstr);
|
|
482 push @seqs, new Bio::PrimarySeq(-id => $name,
|
|
483 -seq => $seqstr);
|
|
484 } else {
|
|
485
|
|
486 my $i = 0;
|
|
487 my $v;
|
|
488 while(($v = index($seqstr,'.',$i)) >= $i ) {
|
|
489 # replace the '.' with the correct seq from the
|
|
490 substr($seqstr,$v,1,$firstseq[$v]);
|
|
491 $i = $v;
|
|
492 }
|
|
493 $self->debug( "adding seq $seqstr\n");
|
|
494 push @seqs, new Bio::PrimarySeq(-id => $name,
|
|
495 -seq => $seqstr);
|
|
496 }
|
|
497 }
|
|
498 $self->{'_summary'}->{'seqs'} = \@seqs;
|
|
499 }
|
|
500
|
|
501 sub _parse_codoncts { }
|
|
502
|
|
503 sub _parse_distmat {
|
|
504 my ($self) = @_;
|
|
505 my @results;
|
|
506 while( defined ($_ = $self->_readline) ) {
|
|
507 next if/^\s+$/;
|
|
508 last;
|
|
509 }
|
|
510 return unless (/^Nei\s*\&\s*Gojobori/);
|
|
511 # skip the next 3 lines
|
|
512 if( $self->{'_summary'}->{'seqtype'} eq 'CODONML' ) {
|
|
513 $self->_readline;
|
|
514 $self->_readline;
|
|
515 $self->_readline;
|
|
516 }
|
|
517 my $seqct = 0;
|
|
518 while( defined ($_ = $self->_readline ) ) {
|
|
519 last if( /^\s+$/ && exists $self->{'_summary'}->{'ngmatrix'} );
|
|
520 next if( /^\s+$/ );
|
|
521 chomp;
|
|
522 my ($seq,$rest) = split(/\s+/,$_,2);
|
|
523 my $j = 0;
|
|
524 while( $rest =~
|
|
525 /(\-?\d+(\.\d+)?)\s*\(\-?(\d+(\.\d+)?)\s+(\-?\d+(\.\d+)?)\)/g ) {
|
|
526 $self->{'_summary'}->{'ngmatrix'}->[$j++]->[$seqct] =
|
|
527 { 'omega' => $1,
|
|
528 'dN' => $3,
|
|
529 'dS' => $5 };
|
|
530 }
|
|
531 $seqct++;
|
|
532 }
|
|
533 }
|
|
534
|
|
535 sub _parse_PairwiseCodon {
|
|
536 my ($self) = @_;
|
|
537 my @result;
|
|
538 my ($a,$b,$log,$model);
|
|
539 while( defined( $_ = $self->_readline) ) {
|
|
540 if( /^pairwise comparison, codon frequencies\:\s*(\S+)\./) {
|
|
541 $model = $1;
|
|
542 } elsif( /^(\d+)\s+\((\S+)\)\s+\.\.\.\s+(\d+)\s+\((\S+)\)/ ) {
|
|
543 ($a,$b) = ($1,$3);
|
|
544 } elsif( /^lnL\s+\=\s*(\-?\d+(\.\d+)?)/ ) {
|
|
545 $log = $1;
|
|
546 } elsif( m/^t\=\s*(\d+(\.\d+)?)\s+
|
|
547 S\=\s*(\d+(\.\d+)?)\s+
|
|
548 N\=\s*(\d+(\.\d+)?)\s+
|
|
549 dN\/dS\=\s*(\d+(\.\d+)?)\s+
|
|
550 dN\=\s*(\d+(\.\d+)?)\s+
|
|
551 dS\=\s*(\d+(\.\d+)?)/ox ) {
|
|
552 $result[$b-1]->[$a-1] = {
|
|
553 'lnL' => $log,
|
|
554 't' => $1,
|
|
555 'S' => $3,
|
|
556 'N' => $5,
|
|
557 'omega' => $7,
|
|
558 'dN' => $9,
|
|
559 'dS' => $11 };
|
|
560 } elsif( /^\s+$/ ) {
|
|
561 next;
|
|
562 } elsif( /^\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)/ ) {
|
|
563 } else {
|
|
564 $self->debug( "unknown line: $_");
|
|
565 }
|
|
566 }
|
|
567 return ( -mlmatrix => \@result);
|
|
568 }
|
|
569
|
|
570 sub _parse_YN_Pairwise {
|
|
571 my ($self) = @_;
|
|
572 my @result;
|
|
573 while( defined( $_ = $self->_readline) ) {
|
|
574 last if( /^seq\.\s+seq\./);
|
|
575 }
|
|
576 while( defined( $_ = $self->_readline) ) {
|
|
577 if( m/^\s+(\d+)\s+ # seq #
|
|
578 (\d+)\s+ # seq #
|
|
579 (\d+(\.\d+))\s+ # S
|
|
580 (\d+(\.\d+))\s+ # N
|
|
581 (\d+(\.\d+))\s+ # t
|
|
582 (\d+(\.\d+))\s+ # kappa
|
|
583 (\d+(\.\d+))\s+ # omega
|
|
584 (\d+(\.\d+))\s+ # dN
|
|
585 \+\-\s+
|
|
586 (\d+(\.\d+))\s+ # dN SE
|
|
587 (\d+(\.\d+))\s+ # dS
|
|
588 \+\-\s+
|
|
589 (\d+(\.\d+))\s+ # dS SE
|
|
590 /ox
|
|
591 ) {
|
|
592
|
|
593 $result[$2-1]->[$1-1] = {
|
|
594 'S' => $3,
|
|
595 'N' => $5,
|
|
596 't' => $7,
|
|
597 'kappa' => $9,
|
|
598 'omega' => $11,
|
|
599 'dN' => $13,
|
|
600 'dN_SE' => $15,
|
|
601 'dS' => $17,
|
|
602 'dS_SE' => $19,
|
|
603 };
|
|
604 } elsif( /^\s+$/ ) {
|
|
605 next;
|
|
606 }
|
|
607 }
|
|
608 return ( -mlmatrix => \@result);
|
|
609 }
|
|
610
|
|
611 sub _parse_Forestry {
|
|
612
|
|
613 my ($self) = @_;
|
|
614 my %data = (-trees => []);
|
|
615
|
|
616
|
|
617 return %data
|
|
618 };
|
|
619
|
|
620 # parse the mlc file
|
|
621
|
|
622 sub _parse_mlc {
|
|
623 my ($self) = @_;
|
|
624 my %data;
|
|
625 while( defined( $_ = $self->_readline) ) {
|
|
626 $self->debug( "mlc parse: $_");
|
|
627 # Aaron this is where the parsing should begin
|
|
628
|
|
629 # I'll do the Tree objects if you like - I'd do it by building
|
|
630 # an IO::String for the the tree data or does it make more
|
|
631 # sense to parse this out of a collection of files?
|
|
632 if( /^TREE/ ) {
|
|
633 # ...
|
|
634 while( defined($_ = $self->_readline) ) {
|
|
635 if( /^\(/) {
|
|
636 my $treestr = new IO::String($_);
|
|
637 my $treeio = new Bio::TreeIO(-fh => $treestr,
|
|
638 -format => 'newick');
|
|
639 # this is very tenative here!!
|
|
640 push @{$self->{'_trees'}}, $treeio->next_tree;
|
|
641 }
|
|
642 }
|
|
643 }
|
|
644 }
|
|
645 }
|
|
646
|
|
647 1;
|