0
|
1 # $Id: Molphy.pm,v 1.3 2002/10/22 07:45:23 lapp Exp $
|
|
2 #
|
|
3 # BioPerl module for Bio::Tools::Phylo::Molphy
|
|
4 #
|
|
5 # Cared for by Jason Stajich <jason@bioperl.org>
|
|
6 #
|
|
7 # Copyright Jason Stajich
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10
|
|
11 # POD documentation - main docs before the code
|
|
12
|
|
13 =head1 NAME
|
|
14
|
|
15 Bio::Tools::Phylo::Molphy - DESCRIPTION of Object
|
|
16
|
|
17 =head1 SYNOPSIS
|
|
18
|
|
19 use Bio::Tools::Phylo::Molphy;
|
|
20 my $parser = new Bio::Tools::Phylo::Molphy(-file => 'output.protml');
|
|
21 while( my $result = $parser->next_result ) {
|
|
22
|
|
23 }
|
|
24
|
|
25 =head1 DESCRIPTION
|
|
26
|
|
27 A parser for Molphy output (protml,dnaml)
|
|
28
|
|
29 =head1 FEEDBACK
|
|
30
|
|
31 =head2 Mailing Lists
|
|
32
|
|
33 User feedback is an integral part of the evolution of this and other
|
|
34 Bioperl modules. Send your comments and suggestions preferably to
|
|
35 the Bioperl mailing list. Your participation is much appreciated.
|
|
36
|
|
37 bioperl-l@bioperl.org - General discussion
|
|
38 http://bioperl.org/MailList.shtml - About the mailing lists
|
|
39
|
|
40 =head2 Reporting Bugs
|
|
41
|
|
42 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
43 of the bugs and their resolution. Bug reports can be submitted via
|
|
44 email or the web:
|
|
45
|
|
46 bioperl-bugs@bioperl.org
|
|
47 http://bugzilla.bioperl.org/
|
|
48
|
|
49 =head1 AUTHOR - Jason Stajich
|
|
50
|
|
51 Email jason@bioperl.org
|
|
52
|
|
53 Describe contact details here
|
|
54
|
|
55 =head1 CONTRIBUTORS
|
|
56
|
|
57 Additional contributors names and emails here
|
|
58
|
|
59 =head1 APPENDIX
|
|
60
|
|
61 The rest of the documentation details each of the object methods.
|
|
62 Internal methods are usually preceded with a _
|
|
63
|
|
64 =cut
|
|
65
|
|
66
|
|
67 # Let the code begin...
|
|
68
|
|
69
|
|
70 package Bio::Tools::Phylo::Molphy;
|
|
71 use vars qw(@ISA);
|
|
72 use strict;
|
|
73
|
|
74 use Bio::Tools::Phylo::Molphy::Result;
|
|
75 use Bio::Root::Root;
|
|
76 use Bio::Root::IO;
|
|
77 use Bio::TreeIO;
|
|
78 use IO::String;
|
|
79
|
|
80 @ISA = qw(Bio::Root::Root Bio::Root::IO );
|
|
81
|
|
82 =head2 new
|
|
83
|
|
84 Title : new
|
|
85 Usage : my $obj = new Bio::Tools::Phylo::Molphy();
|
|
86 Function: Builds a new Bio::Tools::Phylo::Molphy object
|
|
87 Returns : Bio::Tools::Phylo::Molphy
|
|
88 Args : -fh/-file => $val, # for initing input, see Bio::Root::IO
|
|
89
|
|
90
|
|
91 =cut
|
|
92
|
|
93 sub new {
|
|
94 my($class,@args) = @_;
|
|
95
|
|
96 my $self = $class->SUPER::new(@args);
|
|
97 $self->_initialize_io(@args);
|
|
98
|
|
99 return $self;
|
|
100 }
|
|
101
|
|
102 =head2 next_result
|
|
103
|
|
104 Title : next_result
|
|
105 Usage : my $r = $molphy->next_result
|
|
106 Function: Get the next result set from parser data
|
|
107 Returns : Bio::Tools::Phylo::Molphy::Result object
|
|
108 Args : none
|
|
109
|
|
110
|
|
111 =cut
|
|
112
|
|
113 sub next_result{
|
|
114 my ($self) = @_;
|
|
115
|
|
116 # A little statemachine for the parser here
|
|
117 my ($state,$transition_ct,
|
|
118 @transition_matrix, %transition_mat, @resloc,) = ( 0,0);
|
|
119 my ( %subst_matrix, @treelines, @treedata, %frequencies);
|
|
120 my ( $treenum,$possible_trees, $model);
|
|
121 while( defined ( $_ = $self->_readline()) ) {
|
|
122 if( /^Relative Substitution Rate Matrix/ ) {
|
|
123 if( %subst_matrix ) {
|
|
124 $self->_pushback($_);
|
|
125 last;
|
|
126 }
|
|
127 $state = 0;
|
|
128 my ( @tempdata);
|
|
129 @resloc = ();
|
|
130 while( defined ($_ = $self->_readline) ) {
|
|
131 last if (/^\s+$/);
|
|
132 # remove leading/trailing spaces
|
|
133 s/^\s+//;
|
|
134 s/\s+$//;
|
|
135 my @data = split;
|
|
136 my $i = 0;
|
|
137 for my $l ( @data ) {
|
|
138 if( $l =~ /\D+/ ) {
|
|
139 push @resloc, $l;
|
|
140 }
|
|
141 $i++;
|
|
142 }
|
|
143 push @tempdata, \@data;
|
|
144 }
|
|
145 my $i = 0;
|
|
146 for my $row ( @tempdata ) {
|
|
147 my $j = 0;
|
|
148 for my $col ( @$row ) {
|
|
149 if( $i == $j ) {
|
|
150 # empty string for diagonals
|
|
151 $subst_matrix{$resloc[$i]}->{$resloc[$j]} = '';
|
|
152 } else {
|
|
153 $subst_matrix{$resloc[$i]}->{$resloc[$j]} = $col;
|
|
154 }
|
|
155 $j++;
|
|
156 }
|
|
157 $i++;
|
|
158 }
|
|
159 } elsif( /^Transition Probability Matrix/ ) {
|
|
160 if( /1\.0e7/ ) {
|
|
161 $state = 1;
|
|
162 $transition_ct = 0;
|
|
163 } else {
|
|
164 $state = 0;
|
|
165 }
|
|
166 } elsif ( /Acid Frequencies/ ) {
|
|
167 $state = 0;
|
|
168 $self->_readline(); # skip the next line
|
|
169 while( defined( $_ = $self->_readline) ) {
|
|
170 unless( /^\s+/) {
|
|
171 $self->_pushback($_);
|
|
172 last;
|
|
173 }
|
|
174 s/^\s+//;
|
|
175 s/\s+$//;
|
|
176 my ($index,$res,$model,$data) = split;
|
|
177 $frequencies{$res} = [ $model,$data];
|
|
178 }
|
|
179 } elsif( /^(\d+)\s*\/\s*(\d+)\s+(.+)\s+model/ ) {
|
|
180 my @save = ($1,$2,$3);
|
|
181 # finish processing the transition_matrix
|
|
182 my $i =0;
|
|
183 foreach my $row ( @transition_matrix ) {
|
|
184 my $j = 0;
|
|
185 foreach my $col ( @$row ) {
|
|
186 $transition_mat{$resloc[$i]}->{$resloc[$j]} = $col;
|
|
187 $j++;
|
|
188 }
|
|
189 $i++;
|
|
190 }
|
|
191
|
|
192 if( defined $treenum ) {
|
|
193 $self->_pushback($_);
|
|
194 last;
|
|
195 }
|
|
196
|
|
197 $state = 2;
|
|
198 ($treenum,$possible_trees, $model) = @save;
|
|
199 $model =~ s/\s+/ /g;
|
|
200 } elsif( $state == 1 ) {
|
|
201 next if( /^\s+$/ );
|
|
202 s/^\s+//;
|
|
203 s/\s+$//;
|
|
204 # because the matrix is split up into 2-10 column sets
|
|
205 push @{$transition_matrix[$transition_ct++]}, split ;
|
|
206 $transition_ct = 0 if $transition_ct % 20 == 0;
|
|
207 } elsif( $state == 2 ) {
|
|
208 if( s/^(\d+)\s+(\-?\d+(\.\d+)?)\s+// ) {
|
|
209 push @treedata, [ $1,$2];
|
|
210 }
|
|
211 # save this for the end so that we can
|
|
212 # be efficient and only open one tree parser
|
|
213 push @treelines, $_;
|
|
214 }
|
|
215 }
|
|
216 # waiting till the end to do this, is it better
|
|
217 my @trees;
|
|
218 if( @treelines ) {
|
|
219 my $strdat = IO::String->new(join('',@treelines));
|
|
220 my $treeio = new Bio::TreeIO(-fh => $strdat,
|
|
221 -format => 'newick');
|
|
222 while( my $tree = $treeio->next_tree ) {
|
|
223 if( @treedata ) {
|
|
224 my $dat = shift @treedata;
|
|
225 # set the associated information
|
|
226 $tree->id($dat->[0]);
|
|
227 $tree->score($dat->[1]);
|
|
228 }
|
|
229 push @trees, $tree;
|
|
230 }
|
|
231 }
|
|
232
|
|
233 my $result = new Bio::Tools::Phylo::Molphy::Result
|
|
234 (-trees => \@trees,
|
|
235 -substitution_matrix => \%subst_matrix,
|
|
236 -transition_matrix => \%transition_mat,
|
|
237 -frequencies => \%frequencies,
|
|
238 -model => $model,
|
|
239 -search_space => $possible_trees,
|
|
240 );
|
|
241
|
|
242 }
|
|
243
|
|
244 1;
|