Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Tools/Phylo/Molphy.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $Id: Molphy.pm,v 1.3 2002/10/22 07:45:23 lapp Exp $ | |
2 # | |
3 # BioPerl module for Bio::Tools::Phylo::Molphy | |
4 # | |
5 # Cared for by Jason Stajich <jason@bioperl.org> | |
6 # | |
7 # Copyright Jason Stajich | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 | |
11 # POD documentation - main docs before the code | |
12 | |
13 =head1 NAME | |
14 | |
15 Bio::Tools::Phylo::Molphy - DESCRIPTION of Object | |
16 | |
17 =head1 SYNOPSIS | |
18 | |
19 use Bio::Tools::Phylo::Molphy; | |
20 my $parser = new Bio::Tools::Phylo::Molphy(-file => 'output.protml'); | |
21 while( my $result = $parser->next_result ) { | |
22 | |
23 } | |
24 | |
25 =head1 DESCRIPTION | |
26 | |
27 A parser for Molphy output (protml,dnaml) | |
28 | |
29 =head1 FEEDBACK | |
30 | |
31 =head2 Mailing Lists | |
32 | |
33 User feedback is an integral part of the evolution of this and other | |
34 Bioperl modules. Send your comments and suggestions preferably to | |
35 the Bioperl mailing list. Your participation is much appreciated. | |
36 | |
37 bioperl-l@bioperl.org - General discussion | |
38 http://bioperl.org/MailList.shtml - About the mailing lists | |
39 | |
40 =head2 Reporting Bugs | |
41 | |
42 Report bugs to the Bioperl bug tracking system to help us keep track | |
43 of the bugs and their resolution. Bug reports can be submitted via | |
44 email or the web: | |
45 | |
46 bioperl-bugs@bioperl.org | |
47 http://bugzilla.bioperl.org/ | |
48 | |
49 =head1 AUTHOR - Jason Stajich | |
50 | |
51 Email jason@bioperl.org | |
52 | |
53 Describe contact details here | |
54 | |
55 =head1 CONTRIBUTORS | |
56 | |
57 Additional contributors names and emails here | |
58 | |
59 =head1 APPENDIX | |
60 | |
61 The rest of the documentation details each of the object methods. | |
62 Internal methods are usually preceded with a _ | |
63 | |
64 =cut | |
65 | |
66 | |
67 # Let the code begin... | |
68 | |
69 | |
70 package Bio::Tools::Phylo::Molphy; | |
71 use vars qw(@ISA); | |
72 use strict; | |
73 | |
74 use Bio::Tools::Phylo::Molphy::Result; | |
75 use Bio::Root::Root; | |
76 use Bio::Root::IO; | |
77 use Bio::TreeIO; | |
78 use IO::String; | |
79 | |
80 @ISA = qw(Bio::Root::Root Bio::Root::IO ); | |
81 | |
82 =head2 new | |
83 | |
84 Title : new | |
85 Usage : my $obj = new Bio::Tools::Phylo::Molphy(); | |
86 Function: Builds a new Bio::Tools::Phylo::Molphy object | |
87 Returns : Bio::Tools::Phylo::Molphy | |
88 Args : -fh/-file => $val, # for initing input, see Bio::Root::IO | |
89 | |
90 | |
91 =cut | |
92 | |
93 sub new { | |
94 my($class,@args) = @_; | |
95 | |
96 my $self = $class->SUPER::new(@args); | |
97 $self->_initialize_io(@args); | |
98 | |
99 return $self; | |
100 } | |
101 | |
102 =head2 next_result | |
103 | |
104 Title : next_result | |
105 Usage : my $r = $molphy->next_result | |
106 Function: Get the next result set from parser data | |
107 Returns : Bio::Tools::Phylo::Molphy::Result object | |
108 Args : none | |
109 | |
110 | |
111 =cut | |
112 | |
113 sub next_result{ | |
114 my ($self) = @_; | |
115 | |
116 # A little statemachine for the parser here | |
117 my ($state,$transition_ct, | |
118 @transition_matrix, %transition_mat, @resloc,) = ( 0,0); | |
119 my ( %subst_matrix, @treelines, @treedata, %frequencies); | |
120 my ( $treenum,$possible_trees, $model); | |
121 while( defined ( $_ = $self->_readline()) ) { | |
122 if( /^Relative Substitution Rate Matrix/ ) { | |
123 if( %subst_matrix ) { | |
124 $self->_pushback($_); | |
125 last; | |
126 } | |
127 $state = 0; | |
128 my ( @tempdata); | |
129 @resloc = (); | |
130 while( defined ($_ = $self->_readline) ) { | |
131 last if (/^\s+$/); | |
132 # remove leading/trailing spaces | |
133 s/^\s+//; | |
134 s/\s+$//; | |
135 my @data = split; | |
136 my $i = 0; | |
137 for my $l ( @data ) { | |
138 if( $l =~ /\D+/ ) { | |
139 push @resloc, $l; | |
140 } | |
141 $i++; | |
142 } | |
143 push @tempdata, \@data; | |
144 } | |
145 my $i = 0; | |
146 for my $row ( @tempdata ) { | |
147 my $j = 0; | |
148 for my $col ( @$row ) { | |
149 if( $i == $j ) { | |
150 # empty string for diagonals | |
151 $subst_matrix{$resloc[$i]}->{$resloc[$j]} = ''; | |
152 } else { | |
153 $subst_matrix{$resloc[$i]}->{$resloc[$j]} = $col; | |
154 } | |
155 $j++; | |
156 } | |
157 $i++; | |
158 } | |
159 } elsif( /^Transition Probability Matrix/ ) { | |
160 if( /1\.0e7/ ) { | |
161 $state = 1; | |
162 $transition_ct = 0; | |
163 } else { | |
164 $state = 0; | |
165 } | |
166 } elsif ( /Acid Frequencies/ ) { | |
167 $state = 0; | |
168 $self->_readline(); # skip the next line | |
169 while( defined( $_ = $self->_readline) ) { | |
170 unless( /^\s+/) { | |
171 $self->_pushback($_); | |
172 last; | |
173 } | |
174 s/^\s+//; | |
175 s/\s+$//; | |
176 my ($index,$res,$model,$data) = split; | |
177 $frequencies{$res} = [ $model,$data]; | |
178 } | |
179 } elsif( /^(\d+)\s*\/\s*(\d+)\s+(.+)\s+model/ ) { | |
180 my @save = ($1,$2,$3); | |
181 # finish processing the transition_matrix | |
182 my $i =0; | |
183 foreach my $row ( @transition_matrix ) { | |
184 my $j = 0; | |
185 foreach my $col ( @$row ) { | |
186 $transition_mat{$resloc[$i]}->{$resloc[$j]} = $col; | |
187 $j++; | |
188 } | |
189 $i++; | |
190 } | |
191 | |
192 if( defined $treenum ) { | |
193 $self->_pushback($_); | |
194 last; | |
195 } | |
196 | |
197 $state = 2; | |
198 ($treenum,$possible_trees, $model) = @save; | |
199 $model =~ s/\s+/ /g; | |
200 } elsif( $state == 1 ) { | |
201 next if( /^\s+$/ ); | |
202 s/^\s+//; | |
203 s/\s+$//; | |
204 # because the matrix is split up into 2-10 column sets | |
205 push @{$transition_matrix[$transition_ct++]}, split ; | |
206 $transition_ct = 0 if $transition_ct % 20 == 0; | |
207 } elsif( $state == 2 ) { | |
208 if( s/^(\d+)\s+(\-?\d+(\.\d+)?)\s+// ) { | |
209 push @treedata, [ $1,$2]; | |
210 } | |
211 # save this for the end so that we can | |
212 # be efficient and only open one tree parser | |
213 push @treelines, $_; | |
214 } | |
215 } | |
216 # waiting till the end to do this, is it better | |
217 my @trees; | |
218 if( @treelines ) { | |
219 my $strdat = IO::String->new(join('',@treelines)); | |
220 my $treeio = new Bio::TreeIO(-fh => $strdat, | |
221 -format => 'newick'); | |
222 while( my $tree = $treeio->next_tree ) { | |
223 if( @treedata ) { | |
224 my $dat = shift @treedata; | |
225 # set the associated information | |
226 $tree->id($dat->[0]); | |
227 $tree->score($dat->[1]); | |
228 } | |
229 push @trees, $tree; | |
230 } | |
231 } | |
232 | |
233 my $result = new Bio::Tools::Phylo::Molphy::Result | |
234 (-trees => \@trees, | |
235 -substitution_matrix => \%subst_matrix, | |
236 -transition_matrix => \%transition_mat, | |
237 -frequencies => \%frequencies, | |
238 -model => $model, | |
239 -search_space => $possible_trees, | |
240 ); | |
241 | |
242 } | |
243 | |
244 1; |