comparison variant_effect_predictor/Bio/Tools/Phylo/Molphy.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: Molphy.pm,v 1.3 2002/10/22 07:45:23 lapp Exp $
2 #
3 # BioPerl module for Bio::Tools::Phylo::Molphy
4 #
5 # Cared for by Jason Stajich <jason@bioperl.org>
6 #
7 # Copyright Jason Stajich
8 #
9 # You may distribute this module under the same terms as perl itself
10
11 # POD documentation - main docs before the code
12
13 =head1 NAME
14
15 Bio::Tools::Phylo::Molphy - DESCRIPTION of Object
16
17 =head1 SYNOPSIS
18
19 use Bio::Tools::Phylo::Molphy;
20 my $parser = new Bio::Tools::Phylo::Molphy(-file => 'output.protml');
21 while( my $result = $parser->next_result ) {
22
23 }
24
25 =head1 DESCRIPTION
26
27 A parser for Molphy output (protml,dnaml)
28
29 =head1 FEEDBACK
30
31 =head2 Mailing Lists
32
33 User feedback is an integral part of the evolution of this and other
34 Bioperl modules. Send your comments and suggestions preferably to
35 the Bioperl mailing list. Your participation is much appreciated.
36
37 bioperl-l@bioperl.org - General discussion
38 http://bioperl.org/MailList.shtml - About the mailing lists
39
40 =head2 Reporting Bugs
41
42 Report bugs to the Bioperl bug tracking system to help us keep track
43 of the bugs and their resolution. Bug reports can be submitted via
44 email or the web:
45
46 bioperl-bugs@bioperl.org
47 http://bugzilla.bioperl.org/
48
49 =head1 AUTHOR - Jason Stajich
50
51 Email jason@bioperl.org
52
53 Describe contact details here
54
55 =head1 CONTRIBUTORS
56
57 Additional contributors names and emails here
58
59 =head1 APPENDIX
60
61 The rest of the documentation details each of the object methods.
62 Internal methods are usually preceded with a _
63
64 =cut
65
66
67 # Let the code begin...
68
69
70 package Bio::Tools::Phylo::Molphy;
71 use vars qw(@ISA);
72 use strict;
73
74 use Bio::Tools::Phylo::Molphy::Result;
75 use Bio::Root::Root;
76 use Bio::Root::IO;
77 use Bio::TreeIO;
78 use IO::String;
79
80 @ISA = qw(Bio::Root::Root Bio::Root::IO );
81
82 =head2 new
83
84 Title : new
85 Usage : my $obj = new Bio::Tools::Phylo::Molphy();
86 Function: Builds a new Bio::Tools::Phylo::Molphy object
87 Returns : Bio::Tools::Phylo::Molphy
88 Args : -fh/-file => $val, # for initing input, see Bio::Root::IO
89
90
91 =cut
92
93 sub new {
94 my($class,@args) = @_;
95
96 my $self = $class->SUPER::new(@args);
97 $self->_initialize_io(@args);
98
99 return $self;
100 }
101
102 =head2 next_result
103
104 Title : next_result
105 Usage : my $r = $molphy->next_result
106 Function: Get the next result set from parser data
107 Returns : Bio::Tools::Phylo::Molphy::Result object
108 Args : none
109
110
111 =cut
112
113 sub next_result{
114 my ($self) = @_;
115
116 # A little statemachine for the parser here
117 my ($state,$transition_ct,
118 @transition_matrix, %transition_mat, @resloc,) = ( 0,0);
119 my ( %subst_matrix, @treelines, @treedata, %frequencies);
120 my ( $treenum,$possible_trees, $model);
121 while( defined ( $_ = $self->_readline()) ) {
122 if( /^Relative Substitution Rate Matrix/ ) {
123 if( %subst_matrix ) {
124 $self->_pushback($_);
125 last;
126 }
127 $state = 0;
128 my ( @tempdata);
129 @resloc = ();
130 while( defined ($_ = $self->_readline) ) {
131 last if (/^\s+$/);
132 # remove leading/trailing spaces
133 s/^\s+//;
134 s/\s+$//;
135 my @data = split;
136 my $i = 0;
137 for my $l ( @data ) {
138 if( $l =~ /\D+/ ) {
139 push @resloc, $l;
140 }
141 $i++;
142 }
143 push @tempdata, \@data;
144 }
145 my $i = 0;
146 for my $row ( @tempdata ) {
147 my $j = 0;
148 for my $col ( @$row ) {
149 if( $i == $j ) {
150 # empty string for diagonals
151 $subst_matrix{$resloc[$i]}->{$resloc[$j]} = '';
152 } else {
153 $subst_matrix{$resloc[$i]}->{$resloc[$j]} = $col;
154 }
155 $j++;
156 }
157 $i++;
158 }
159 } elsif( /^Transition Probability Matrix/ ) {
160 if( /1\.0e7/ ) {
161 $state = 1;
162 $transition_ct = 0;
163 } else {
164 $state = 0;
165 }
166 } elsif ( /Acid Frequencies/ ) {
167 $state = 0;
168 $self->_readline(); # skip the next line
169 while( defined( $_ = $self->_readline) ) {
170 unless( /^\s+/) {
171 $self->_pushback($_);
172 last;
173 }
174 s/^\s+//;
175 s/\s+$//;
176 my ($index,$res,$model,$data) = split;
177 $frequencies{$res} = [ $model,$data];
178 }
179 } elsif( /^(\d+)\s*\/\s*(\d+)\s+(.+)\s+model/ ) {
180 my @save = ($1,$2,$3);
181 # finish processing the transition_matrix
182 my $i =0;
183 foreach my $row ( @transition_matrix ) {
184 my $j = 0;
185 foreach my $col ( @$row ) {
186 $transition_mat{$resloc[$i]}->{$resloc[$j]} = $col;
187 $j++;
188 }
189 $i++;
190 }
191
192 if( defined $treenum ) {
193 $self->_pushback($_);
194 last;
195 }
196
197 $state = 2;
198 ($treenum,$possible_trees, $model) = @save;
199 $model =~ s/\s+/ /g;
200 } elsif( $state == 1 ) {
201 next if( /^\s+$/ );
202 s/^\s+//;
203 s/\s+$//;
204 # because the matrix is split up into 2-10 column sets
205 push @{$transition_matrix[$transition_ct++]}, split ;
206 $transition_ct = 0 if $transition_ct % 20 == 0;
207 } elsif( $state == 2 ) {
208 if( s/^(\d+)\s+(\-?\d+(\.\d+)?)\s+// ) {
209 push @treedata, [ $1,$2];
210 }
211 # save this for the end so that we can
212 # be efficient and only open one tree parser
213 push @treelines, $_;
214 }
215 }
216 # waiting till the end to do this, is it better
217 my @trees;
218 if( @treelines ) {
219 my $strdat = IO::String->new(join('',@treelines));
220 my $treeio = new Bio::TreeIO(-fh => $strdat,
221 -format => 'newick');
222 while( my $tree = $treeio->next_tree ) {
223 if( @treedata ) {
224 my $dat = shift @treedata;
225 # set the associated information
226 $tree->id($dat->[0]);
227 $tree->score($dat->[1]);
228 }
229 push @trees, $tree;
230 }
231 }
232
233 my $result = new Bio::Tools::Phylo::Molphy::Result
234 (-trees => \@trees,
235 -substitution_matrix => \%subst_matrix,
236 -transition_matrix => \%transition_mat,
237 -frequencies => \%frequencies,
238 -model => $model,
239 -search_space => $possible_trees,
240 );
241
242 }
243
244 1;