annotate variant_effect_predictor/Bio/Tools/Phylo/PAML.pm @ 1:d6778b5d8382 draft default tip

Deleted selected files
author willmclaren
date Fri, 03 Aug 2012 10:05:43 -0400
parents 21066c0abaf5
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1 # PAML.pm,v 1.3 2002/06/20 18:50:37 amackey Exp
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
2 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
3 # BioPerl module for Bio::Tools::Phylo::PAML
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
4 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
5 # Cared for by Jason Stajich <jason@bioperl.org>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
6 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
7 # Copyright Jason Stajich, Aaron J Mackey
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
8 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
9 # You may distribute this module under the same terms as perl itself
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
10
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
11 # POD documentation - main docs before the code
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
12
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
13 =head1 NAME
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
14
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
15 Bio::Tools::Phylo::PAML - Parses output from the PAML programs codeml,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
16 baseml, basemlg, codemlsites and yn00
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
17
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
18 =head1 SYNOPSIS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
19
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
20 #!/usr/bin/perl -Tw
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
21 use strict;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
22
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
23 use Bio::Tools::Phylo::PAML;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
24
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
25 # need to specify the output file name (or a fh) (defaults to
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
26 # -file => "codeml.mlc"); also, optionally, the directory in which
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
27 # the other result files (rst, 2ML.dS, etc) may be found (defaults
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
28 # to "./")
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
29 my $parser = new Bio::Tools::Phylo::PAML
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
30 (-file => "./results/mlc", -dir => "./results/");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
31
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
32 # get the first/next result; a Bio::Tools::Phylo::PAML::Result object,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
33 # which isa Bio::SeqAnalysisResultI object.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
34 my $result = $parser->next_result();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
35
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
36 # get the sequences used in the analysis; returns Bio::PrimarySeq
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
37 # objects (OTU = Operational Taxonomic Unit).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
38 my @otus = $result->get_seqs();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
39
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
40 # codon summary: codon usage of each sequence [ arrayref of {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
41 # hashref of counts for each codon } for each sequence and the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
42 # overall sum ], and positional nucleotide distribution [ arrayref
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
43 # of { hashref of frequencies for each nucleotide } for each
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
44 # sequence and overall frequencies ]:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
45 my ($codonusage, $ntdist) = $result->get_codon_summary();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
46
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
47 # example manipulations of $codonusage and $ntdist:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
48 printf "There were %d '%s' codons in the first seq (%s)\n",
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
49 $codonusage->[0]->{AAA}, 'AAA', $otus[0]->id();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
50 printf "There were %d '%s' codons used in all the sequences\n",
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
51 $codonusage->[$#{$codonusage}]->{AAA}, 'AAA';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
52 printf "Nucleotide '%c' was present %g of the time in seq %s\n",
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
53 'A', $ntdist->[1]->{A}, $otus[1]->id();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
54
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
55 # get Nei & Gojobori dN/dS matrix:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
56 my $NGmatrix = $result->get_NGmatrix();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
57
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
58 # get ML-estimated dN/dS matrix, if calculated; this corresponds to
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
59 # the runmode = -2, pairwise comparison usage of codeml
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
60 my $MLmatrix = $result->get_MLmatrix();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
61
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
62 # These matrices are length(@otu) x length(@otu) "strict lower
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
63 # triangle" 2D-matrices, which means that the diagonal and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
64 # everything above it is undefined. Each of the defined cells is a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
65 # hashref of estimates for "dN", "dS", "omega" (dN/dS ratio), "t",
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
66 # "S" and "N". If a ML matrix, "lnL" will also be defined.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
67 printf "The omega ratio for sequences %s vs %s was: %g\n",
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
68 $otus[0]->id, $otus[1]->id, $MLmatrix->[0]->[1]->{omega};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
69
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
70 # with a little work, these matrices could also be passed to
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
71 # Bio::Tools::Run::Phylip::Neighbor, or other similar tree-building
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
72 # method that accepts a matrix of "distances" (using the LOWTRI
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
73 # option):
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
74 my $distmat = [ map { [ map { $$_{omega} } @$_ ] } @$MLmatrix ];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
75
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
76 # for runmode's other than -2, get tree topology with estimated
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
77 # branch lengths; returns a Bio::Tree::TreeI-based tree object with
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
78 # added PAML parameters at each node
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
79 my $tree = $result->get_tree();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
80 for my $node ($tree->get_nodes()) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
81 # inspect the tree: the "t" (time) parameter is available via
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
82 # $node->branch_length(); all other branch-specific parameters
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
83 # ("omega", "dN", etc.) are available via $node->param('omega');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
84 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
85
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
86 # get any general model parameters: kappa (the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
87 # transition/transversion ratio), NSsites model parameters ("p0",
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
88 # "p1", "w0", "w1", etc.), etc.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
89 my $params = $result->get_model_params();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
90 printf "M1 params: p0 = %g\tp1 = %g\n", $params->{p0}, $params->{p1};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
91
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
92 # for NSsites models, obtain arrayrefs of posterior probabilities
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
93 # for membership in each class for every position; probabilities
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
94 # correspond to classes w0, w1, ... etc.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
95 my @probs = $result->get_posteriors();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
96
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
97 # find, say, positively selected sites!
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
98 if ($params->{w2} > 1) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
99 for (my $i = 0; $i < @probs ; $i++) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
100 if ($probs[$i]->[2] > 0.5) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
101 # assumes model M1: three w's, w0, w1 and w2 (positive selection)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
102 printf "position %d: (%g prob, %g omega, %g mean w)\n",
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
103 $i, $probs[$i]->[2], $params->{w2}, $probs[$i]->[3];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
104 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
105 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
106 } else { print "No positive selection found!\n"; }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
107
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
108 =head1 DESCRIPTION
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
109
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
110 This module is used to parse the output from the PAML programs codeml,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
111 baseml, basemlg, codemlsites and yn00. You can use the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
112 Bio::Tools::Run::Phylo::PAML::* modules to actually run some of the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
113 PAML programs, but this module is only useful to parse the output.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
114
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
115 =head1 FEEDBACK
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
116
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
117 =head2 Mailing Lists
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
118
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
119 User feedback is an integral part of the evolution of this and other
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
120 Bioperl modules. Send your comments and suggestions preferably to
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
121 the Bioperl mailing list. Your participation is much appreciated.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
122
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
123 bioperl-l@bioperl.org - General discussion
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
124 http://bioperl.org/MailList.shtml - About the mailing lists
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
125
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
126 =head2 Reporting Bugs
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
127
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
128 Report bugs to the Bioperl bug tracking system to help us keep track
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
129 of the bugs and their resolution. Bug reports can be submitted via
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
130 email or the web:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
131
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
132 bioperl-bugs@bioperl.org
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
133 http://bugzilla.bioperl.org/
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
134
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
135 =head1 AUTHOR - Jason Stajich, Aaron Mackey
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
136
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
137 Email jason@bioperl.org
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
138 Email amackey@virginia.edu
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
139
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
140 =head1 TODO
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
141
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
142 check output from pre 1.12
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
143
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
144 =head1 APPENDIX
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
145
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
146 The rest of the documentation details each of the object methods.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
147 Internal methods are usually preceded with a _
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
148
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
149 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
150
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
151
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
152 # Let the code begin...
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
153
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
154
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
155 package Bio::Tools::Phylo::PAML;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
156 use vars qw(@ISA);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
157 use strict;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
158
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
159 # Object preamble - inherits from Bio::Root::Root
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
160
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
161 use Bio::Root::Root;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
162 use Bio::AnalysisParserI;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
163 use Bio::Root::IO;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
164
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
165 @ISA = qw(Bio::Root::Root Bio::Root::IO Bio::AnalysisParserI);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
166
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
167 # other objects used:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
168 use IO::String;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
169 use Bio::TreeIO;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
170 use Bio::Tools::Phylo::PAML::Result;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
171 use Bio::PrimarySeq;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
172
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
173 =head2 new
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
174
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
175 Title : new
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
176 Usage : my $obj = new Bio::Tools::Phylo::PAML(%args);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
177 Function: Builds a new Bio::Tools::Phylo::PAML object
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
178 Returns : Bio::Tools::Phylo::PAML
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
179 Args : Hash of options: -file, -fh, -dir
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
180 -file (or -fh) should contain the contents of the PAML
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
181 outfile; -dir is the (optional) name of the directory in
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
182 which the PAML program was run (and includes other
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
183 PAML-generated files from which we can try to gather data)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
184
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
185 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
186
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
187 sub new {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
188
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
189 my ($class, @args) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
190
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
191 my $self = $class->SUPER::new(@args);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
192 $self->_initialize_io(@args);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
193
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
194 my ($dir) = $self->_rearrange([qw(DIR)], @args);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
195 $self->{_dir} = $dir if defined $dir;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
196
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
197 return $self;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
198 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
199
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
200 =head2 Implement Bio::AnalysisParserI interface
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
201
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
202 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
203
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
204 =head2 next_result
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
205
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
206 Title : next_result
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
207 Usage : $result = $obj->next_result();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
208 Function: Returns the next result available from the input, or
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
209 undef if there are no more results.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
210 Example :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
211 Returns : a Bio::Tools::Phylo::PAML::Result object
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
212 Args : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
213
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
214 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
215
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
216 sub next_result {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
217
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
218 my ($self) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
219
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
220 my %data;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
221 # get the various codon and other sequence summary data, if necessary:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
222 $self->_parse_summary
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
223 unless ($self->{'_summary'} && !$self->{'_summary'}->{'multidata'});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
224
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
225 # OK, depending on seqtype and runmode now, one of a few things can happen:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
226 my $seqtype = $self->{'_summary'}->{'seqtype'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
227 if ($seqtype eq 'CODONML' || $seqtype eq 'AAML') {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
228 while ($_ = $self->_readline) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
229 if ($seqtype eq 'CODONML' &&
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
230 m/^pairwise comparison, codon frequencies:/o) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
231
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
232 # runmode = -2, CODONML
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
233 $self->_pushback($_);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
234 %data = $self->_parse_PairwiseCodon;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
235 last;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
236
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
237 } elsif ($seqtype eq 'AAML' && m/^ML distances of aa seqs\.$/o) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
238
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
239 # runmode = -2, AAML
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
240 $self->throw( -class => 'Bio::Root::NotImplemented',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
241 -text => "Pairwise AA not yet implemented!"
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
242 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
243
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
244 # $self->_pushback($_);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
245 # %data = $self->_parse_PairwiseAA;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
246 # last;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
247 } elsif (m/^Model \d+: /o) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
248
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
249 # NSSitesBatch
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
250 $self->throw( -class => 'Bio::Root::NotImplemented',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
251 -text => "NSsitesBatch not yet implemented!"
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
252 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
253
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
254 # $self->_pushback($_);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
255 # %data = $self->_parse_NSsitesBatch;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
256 # last;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
257
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
258 } elsif (m/^TREE/) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
259
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
260 # runmode = 0
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
261 $self->_pushback($_);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
262 %data = $self->_parse_Forestry;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
263 last;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
264
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
265 } elsif (m/Heuristic tree search by stepwise addition$/o) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
266
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
267 # runmode = 3
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
268 $self->throw( -class => 'Bio::Root::NotImplemented',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
269 -text => "StepwiseAddition not yet implemented!"
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
270 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
271
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
272 # $self->_pushback($_);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
273 # %data = $self->_parse_StepwiseAddition;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
274 # last;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
275
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
276 } elsif (m/Heuristic tree search by NNI perturbation$/o) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
277
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
278 # runmode = 4
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
279 $self->throw( -class => 'Bio::Root::NotImplemented',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
280 -text => "NNI Perturbation not yet implemented!"
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
281 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
282
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
283 # $self->_pushback($_);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
284 # %data = $self->_parse_Perturbation;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
285 # last;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
286
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
287 } elsif (m/^stage 0:/o) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
288
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
289 # runmode = (1 or 2)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
290 $self->throw( -class => 'Bio::Root::NotImplemented',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
291 -text => "StarDecomposition not yet implemented!"
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
292 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
293
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
294 # $self->_pushback($_);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
295 # %data = $self->_parse_StarDecomposition;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
296 # last;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
297
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
298 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
299 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
300 } elsif ($seqtype eq 'BASEML') {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
301 } elsif ($seqtype eq 'YN00') {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
302 while ($_ = $self->_readline) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
303 if( m/^Estimation by the method/ ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
304 $self->_pushback($_);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
305 %data = $self->_parse_YN_Pairwise;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
306 last;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
307 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
308 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
309 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
310 if (%data) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
311 $data{'-version'} = $self->{'_summary'}->{'version'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
312 $data{'-seqs'} = $self->{'_summary'}->{'seqs'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
313 $data{'-patterns'} = $self->{'_summary'}->{'patterns'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
314 $data{'-ngmatrix'} = $self->{'_summary'}->{'ngmatrix'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
315 $data{'-codonpos'} = $self->{'_summary'}->{'codonposition'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
316 $data{'-codonfreq'} = $self->{'_summary'}->{'codonfreqs'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
317 return new Bio::Tools::Phylo::PAML::Result %data;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
318 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
319 return undef;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
320 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
321 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
322
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
323
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
324 sub _parse_summary {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
325
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
326 my ($self) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
327
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
328 # Depending on whether verbose > 0 or not, and whether the result
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
329 # set comes from a multi-data run, the first few lines could be
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
330 # various things; we're going to throw away any sequence data
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
331 # here, since we'll get it later anyways
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
332
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
333 # multidata ? : \n\nData set 1\n
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
334 # verbose ? : cleandata ? : \nBefore deleting alignment gaps. \d sites\n
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
335 # [ sequence printout ]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
336 # \nAfter deleting gaps. \d sites\n"
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
337 # : [ sequence printout ]
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
338 # CODONML (in paml 3.12 February 2002) <<-- what we want to see!
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
339
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
340 my $SEQTYPES = qr( (?: (?: CODON | AA | BASE | CODON2AA ) ML ) | YN00 )x;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
341 while ($_ = $self->_readline) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
342 if ( m/^($SEQTYPES) \s+ # seqtype: CODONML, AAML, BASEML, CODON2AAML, YN00, etc
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
343 (?: \(in \s+ ([^\)]+?) \s* \) \s* )? # version: "paml 3.12 February 2002"; not present < 3.1 or YN00
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
344 (\S+) \s* # tree filename
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
345 (?: (.+?) )? # model description (not there in YN00)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
346 \s* $ # trim any trailing space
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
347 /ox
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
348 ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
349
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
350 @{$self->{_summary}}{qw(seqtype version treefile model)} = ($1,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
351 $2,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
352 $3,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
353 $4);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
354 last;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
355
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
356 } elsif (m/^Data set \d$/o) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
357 $self->{'_summary'} = {};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
358 $self->{'_summary'}->{'multidata'}++;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
359 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
360 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
361
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
362 unless (defined $self->{'_summary'}->{'seqtype'}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
363 $self->throw( -class => 'Bio::Root::NotImplemented',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
364 -text => 'Unknown format of PAML output');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
365 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
366
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
367
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
368 my $seqtype = $self->{'_summary'}->{'seqtype'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
369 $self->debug( "seqtype is $seqtype\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
370 if ($seqtype eq "CODONML") {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
371
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
372 $self->_parse_inputparams(); # settings from the .ctl file that get printed
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
373 $self->_parse_patterns(); # codon patterns - not very interesting
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
374 $self->_parse_seqs(); # the sequences data used for analysis
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
375 $self->_parse_codoncts(); # counts and distributions of codon/nt usage
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
376 $self->_parse_codon_freqs(); # codon frequencies
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
377 $self->_parse_distmat(); # NG distance matrices
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
378
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
379
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
380 } elsif ($seqtype eq "AAML") {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
381 $self->throw( -class => 'Bio::Root::NotImplemented',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
382 -text => 'AAML parsing not yet implemented!');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
383 } elsif ($seqtype eq "CODON2AAML") {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
384 $self->throw( -class => 'Bio::Root::NotImplemented',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
385 -text => 'CODON2AAML parsing not yet implemented!');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
386 } elsif ($seqtype eq "BASEML") {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
387 $self->throw( -class => 'Bio::Root::NotImplemented',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
388 -text => 'BASEML parsing not yet implemented!');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
389 } elsif ($seqtype eq "YN00") {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
390 $self->_parse_codon_freqs();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
391 $self->_parse_codoncts();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
392 $self->_parse_distmat(); # NG distance matrices
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
393
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
394 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
395 $self->throw( -class => 'Bio::Root::NotImplemented',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
396 -text => 'Unknown seqtype, not yet implemented!',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
397 -value => $seqtype
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
398 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
399 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
400
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
401 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
402
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
403
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
404 sub _parse_inputparams {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
405 my ($self) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
406
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
407 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
408
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
409 sub _parse_codon_freqs {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
410 my ($self) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
411 my ($okay,$done) = (0,0);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
412 while( defined($_ = $self->_readline ) ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
413 if( /^Nei/ ) { $self->_pushback($_); last }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
414 last if( $done);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
415 next if ( /^\s+/);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
416 next unless($okay || /^Codon position x base \(3x4\) table\, overall/ );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
417 $okay = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
418 if( s/^position\s+(\d+):\s+// ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
419 my $pos = $1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
420 s/\s+$//;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
421 my @bases = split;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
422 foreach my $str ( @bases ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
423 my ( $base,$freq) = split(/:/,$str,2);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
424 $self->{'_summary'}->{'codonposition'}->[$pos-1]->{$base} = $freq;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
425 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
426 $done = 1 if $pos == 3;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
427 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
428 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
429 $done = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
430 while( defined( $_ = $self->_readline) ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
431 if( /^Nei\s\&\sGojobori/ ) { $self->_pushback($_); last }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
432 last if ( $done );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
433 if( /^Codon frequencies under model, for use in evolver:/ ){
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
434 while( defined( $_ = $self->_readline) ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
435 last if( /^\s+$/ );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
436 s/^\s+//;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
437 s/\s+$//;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
438 push @{$self->{'_summary'}->{'codonfreqs'}},[split];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
439 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
440 $done = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
441 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
442 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
443 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
444
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
445 sub _parse_patterns {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
446 my ($self) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
447 my ($patternct,@patterns,$ns,$ls);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
448 while( defined($_ = $self->_readline) ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
449 if( $patternct ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
450 # last unless ( @patterns == $patternct );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
451 last if( /^\s+$/ );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
452 s/^\s+//;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
453 push @patterns, split;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
454 } elsif( /^ns\s+\=\s*(\d+)\s+ls\s+\=\s*(\d+)/ ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
455 ($ns,$ls) = ($1,$2);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
456 } elsif( /^\# site patterns \=\s*(\d+)/ ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
457 $patternct = $1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
458 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
459 # $self->debug("Unknown line: $_");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
460 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
461 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
462 $self->{'_summary'}->{'patterns'} = { -patterns => \@patterns,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
463 -ns => $ns,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
464 -ls => $ls};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
465 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
466
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
467 sub _parse_seqs {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
468
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
469 # this should in fact be packed into a Bio::SimpleAlign object instead of
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
470 # an array but we'll stay with this for now
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
471 my ($self) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
472 my (@firstseq,@seqs);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
473 while( defined ($_ = $self->_readline) ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
474 last if( /^\s+$/ && @seqs > 0 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
475 next if ( /^\s+$/ );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
476 next if( /^\d+\s+$/ );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
477
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
478 my ($name,$seqstr) = split(/\s+/,$_,2);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
479 $seqstr =~ s/\s+//g; # remove whitespace
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
480 unless( @firstseq) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
481 @firstseq = split(//,$seqstr);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
482 push @seqs, new Bio::PrimarySeq(-id => $name,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
483 -seq => $seqstr);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
484 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
485
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
486 my $i = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
487 my $v;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
488 while(($v = index($seqstr,'.',$i)) >= $i ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
489 # replace the '.' with the correct seq from the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
490 substr($seqstr,$v,1,$firstseq[$v]);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
491 $i = $v;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
492 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
493 $self->debug( "adding seq $seqstr\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
494 push @seqs, new Bio::PrimarySeq(-id => $name,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
495 -seq => $seqstr);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
496 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
497 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
498 $self->{'_summary'}->{'seqs'} = \@seqs;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
499 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
500
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
501 sub _parse_codoncts { }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
502
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
503 sub _parse_distmat {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
504 my ($self) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
505 my @results;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
506 while( defined ($_ = $self->_readline) ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
507 next if/^\s+$/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
508 last;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
509 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
510 return unless (/^Nei\s*\&\s*Gojobori/);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
511 # skip the next 3 lines
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
512 if( $self->{'_summary'}->{'seqtype'} eq 'CODONML' ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
513 $self->_readline;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
514 $self->_readline;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
515 $self->_readline;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
516 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
517 my $seqct = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
518 while( defined ($_ = $self->_readline ) ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
519 last if( /^\s+$/ && exists $self->{'_summary'}->{'ngmatrix'} );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
520 next if( /^\s+$/ );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
521 chomp;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
522 my ($seq,$rest) = split(/\s+/,$_,2);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
523 my $j = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
524 while( $rest =~
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
525 /(\-?\d+(\.\d+)?)\s*\(\-?(\d+(\.\d+)?)\s+(\-?\d+(\.\d+)?)\)/g ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
526 $self->{'_summary'}->{'ngmatrix'}->[$j++]->[$seqct] =
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
527 { 'omega' => $1,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
528 'dN' => $3,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
529 'dS' => $5 };
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
530 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
531 $seqct++;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
532 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
533 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
534
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
535 sub _parse_PairwiseCodon {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
536 my ($self) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
537 my @result;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
538 my ($a,$b,$log,$model);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
539 while( defined( $_ = $self->_readline) ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
540 if( /^pairwise comparison, codon frequencies\:\s*(\S+)\./) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
541 $model = $1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
542 } elsif( /^(\d+)\s+\((\S+)\)\s+\.\.\.\s+(\d+)\s+\((\S+)\)/ ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
543 ($a,$b) = ($1,$3);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
544 } elsif( /^lnL\s+\=\s*(\-?\d+(\.\d+)?)/ ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
545 $log = $1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
546 } elsif( m/^t\=\s*(\d+(\.\d+)?)\s+
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
547 S\=\s*(\d+(\.\d+)?)\s+
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
548 N\=\s*(\d+(\.\d+)?)\s+
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
549 dN\/dS\=\s*(\d+(\.\d+)?)\s+
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
550 dN\=\s*(\d+(\.\d+)?)\s+
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
551 dS\=\s*(\d+(\.\d+)?)/ox ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
552 $result[$b-1]->[$a-1] = {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
553 'lnL' => $log,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
554 't' => $1,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
555 'S' => $3,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
556 'N' => $5,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
557 'omega' => $7,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
558 'dN' => $9,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
559 'dS' => $11 };
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
560 } elsif( /^\s+$/ ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
561 next;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
562 } elsif( /^\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)/ ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
563 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
564 $self->debug( "unknown line: $_");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
565 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
566 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
567 return ( -mlmatrix => \@result);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
568 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
569
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
570 sub _parse_YN_Pairwise {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
571 my ($self) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
572 my @result;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
573 while( defined( $_ = $self->_readline) ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
574 last if( /^seq\.\s+seq\./);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
575 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
576 while( defined( $_ = $self->_readline) ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
577 if( m/^\s+(\d+)\s+ # seq #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
578 (\d+)\s+ # seq #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
579 (\d+(\.\d+))\s+ # S
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
580 (\d+(\.\d+))\s+ # N
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
581 (\d+(\.\d+))\s+ # t
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
582 (\d+(\.\d+))\s+ # kappa
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
583 (\d+(\.\d+))\s+ # omega
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
584 (\d+(\.\d+))\s+ # dN
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
585 \+\-\s+
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
586 (\d+(\.\d+))\s+ # dN SE
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
587 (\d+(\.\d+))\s+ # dS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
588 \+\-\s+
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
589 (\d+(\.\d+))\s+ # dS SE
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
590 /ox
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
591 ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
592
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
593 $result[$2-1]->[$1-1] = {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
594 'S' => $3,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
595 'N' => $5,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
596 't' => $7,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
597 'kappa' => $9,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
598 'omega' => $11,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
599 'dN' => $13,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
600 'dN_SE' => $15,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
601 'dS' => $17,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
602 'dS_SE' => $19,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
603 };
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
604 } elsif( /^\s+$/ ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
605 next;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
606 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
607 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
608 return ( -mlmatrix => \@result);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
609 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
610
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
611 sub _parse_Forestry {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
612
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
613 my ($self) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
614 my %data = (-trees => []);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
615
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
616
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
617 return %data
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
618 };
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
619
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
620 # parse the mlc file
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
621
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
622 sub _parse_mlc {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
623 my ($self) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
624 my %data;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
625 while( defined( $_ = $self->_readline) ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
626 $self->debug( "mlc parse: $_");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
627 # Aaron this is where the parsing should begin
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
628
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
629 # I'll do the Tree objects if you like - I'd do it by building
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
630 # an IO::String for the the tree data or does it make more
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
631 # sense to parse this out of a collection of files?
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
632 if( /^TREE/ ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
633 # ...
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
634 while( defined($_ = $self->_readline) ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
635 if( /^\(/) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
636 my $treestr = new IO::String($_);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
637 my $treeio = new Bio::TreeIO(-fh => $treestr,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
638 -format => 'newick');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
639 # this is very tenative here!!
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
640 push @{$self->{'_trees'}}, $treeio->next_tree;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
641 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
642 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
643 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
644 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
645 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
646
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
647 1;