Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/Tools/Phylo/Molphy.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/Tools/Phylo/Molphy.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,244 @@ +# $Id: Molphy.pm,v 1.3 2002/10/22 07:45:23 lapp Exp $ +# +# BioPerl module for Bio::Tools::Phylo::Molphy +# +# Cared for by Jason Stajich <jason@bioperl.org> +# +# Copyright Jason Stajich +# +# You may distribute this module under the same terms as perl itself + +# POD documentation - main docs before the code + +=head1 NAME + +Bio::Tools::Phylo::Molphy - DESCRIPTION of Object + +=head1 SYNOPSIS + + use Bio::Tools::Phylo::Molphy; + my $parser = new Bio::Tools::Phylo::Molphy(-file => 'output.protml'); + while( my $result = $parser->next_result ) { + + } + +=head1 DESCRIPTION + +A parser for Molphy output (protml,dnaml) + +=head1 FEEDBACK + +=head2 Mailing Lists + +User feedback is an integral part of the evolution of this and other +Bioperl modules. Send your comments and suggestions preferably to +the Bioperl mailing list. Your participation is much appreciated. + + bioperl-l@bioperl.org - General discussion + http://bioperl.org/MailList.shtml - About the mailing lists + +=head2 Reporting Bugs + +Report bugs to the Bioperl bug tracking system to help us keep track +of the bugs and their resolution. Bug reports can be submitted via +email or the web: + + bioperl-bugs@bioperl.org + http://bugzilla.bioperl.org/ + +=head1 AUTHOR - Jason Stajich + +Email jason@bioperl.org + +Describe contact details here + +=head1 CONTRIBUTORS + +Additional contributors names and emails here + +=head1 APPENDIX + +The rest of the documentation details each of the object methods. +Internal methods are usually preceded with a _ + +=cut + + +# Let the code begin... + + +package Bio::Tools::Phylo::Molphy; +use vars qw(@ISA); +use strict; + +use Bio::Tools::Phylo::Molphy::Result; +use Bio::Root::Root; +use Bio::Root::IO; +use Bio::TreeIO; +use IO::String; + +@ISA = qw(Bio::Root::Root Bio::Root::IO ); + +=head2 new + + Title : new + Usage : my $obj = new Bio::Tools::Phylo::Molphy(); + Function: Builds a new Bio::Tools::Phylo::Molphy object + Returns : Bio::Tools::Phylo::Molphy + Args : -fh/-file => $val, # for initing input, see Bio::Root::IO + + +=cut + +sub new { + my($class,@args) = @_; + + my $self = $class->SUPER::new(@args); + $self->_initialize_io(@args); + + return $self; +} + +=head2 next_result + + Title : next_result + Usage : my $r = $molphy->next_result + Function: Get the next result set from parser data + Returns : Bio::Tools::Phylo::Molphy::Result object + Args : none + + +=cut + +sub next_result{ + my ($self) = @_; + + # A little statemachine for the parser here + my ($state,$transition_ct, + @transition_matrix, %transition_mat, @resloc,) = ( 0,0); + my ( %subst_matrix, @treelines, @treedata, %frequencies); + my ( $treenum,$possible_trees, $model); + while( defined ( $_ = $self->_readline()) ) { + if( /^Relative Substitution Rate Matrix/ ) { + if( %subst_matrix ) { + $self->_pushback($_); + last; + } + $state = 0; + my ( @tempdata); + @resloc = (); + while( defined ($_ = $self->_readline) ) { + last if (/^\s+$/); + # remove leading/trailing spaces + s/^\s+//; + s/\s+$//; + my @data = split; + my $i = 0; + for my $l ( @data ) { + if( $l =~ /\D+/ ) { + push @resloc, $l; + } + $i++; + } + push @tempdata, \@data; + } + my $i = 0; + for my $row ( @tempdata ) { + my $j = 0; + for my $col ( @$row ) { + if( $i == $j ) { + # empty string for diagonals + $subst_matrix{$resloc[$i]}->{$resloc[$j]} = ''; + } else { + $subst_matrix{$resloc[$i]}->{$resloc[$j]} = $col; + } + $j++; + } + $i++; + } + } elsif( /^Transition Probability Matrix/ ) { + if( /1\.0e7/ ) { + $state = 1; + $transition_ct = 0; + } else { + $state = 0; + } + } elsif ( /Acid Frequencies/ ) { + $state = 0; + $self->_readline(); # skip the next line + while( defined( $_ = $self->_readline) ) { + unless( /^\s+/) { + $self->_pushback($_); + last; + } + s/^\s+//; + s/\s+$//; + my ($index,$res,$model,$data) = split; + $frequencies{$res} = [ $model,$data]; + } + } elsif( /^(\d+)\s*\/\s*(\d+)\s+(.+)\s+model/ ) { + my @save = ($1,$2,$3); + # finish processing the transition_matrix + my $i =0; + foreach my $row ( @transition_matrix ) { + my $j = 0; + foreach my $col ( @$row ) { + $transition_mat{$resloc[$i]}->{$resloc[$j]} = $col; + $j++; + } + $i++; + } + + if( defined $treenum ) { + $self->_pushback($_); + last; + } + + $state = 2; + ($treenum,$possible_trees, $model) = @save; + $model =~ s/\s+/ /g; + } elsif( $state == 1 ) { + next if( /^\s+$/ ); + s/^\s+//; + s/\s+$//; + # because the matrix is split up into 2-10 column sets + push @{$transition_matrix[$transition_ct++]}, split ; + $transition_ct = 0 if $transition_ct % 20 == 0; + } elsif( $state == 2 ) { + if( s/^(\d+)\s+(\-?\d+(\.\d+)?)\s+// ) { + push @treedata, [ $1,$2]; + } + # save this for the end so that we can + # be efficient and only open one tree parser + push @treelines, $_; + } + } + # waiting till the end to do this, is it better + my @trees; + if( @treelines ) { + my $strdat = IO::String->new(join('',@treelines)); + my $treeio = new Bio::TreeIO(-fh => $strdat, + -format => 'newick'); + while( my $tree = $treeio->next_tree ) { + if( @treedata ) { + my $dat = shift @treedata; + # set the associated information + $tree->id($dat->[0]); + $tree->score($dat->[1]); + } + push @trees, $tree; + } + } + + my $result = new Bio::Tools::Phylo::Molphy::Result + (-trees => \@trees, + -substitution_matrix => \%subst_matrix, + -transition_matrix => \%transition_mat, + -frequencies => \%frequencies, + -model => $model, + -search_space => $possible_trees, + ); + +} + +1;