Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/Matrix/PhylipDist.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/Matrix/PhylipDist.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,377 @@ +# BioPerl module for Bio::Matrix::PhylipDist +# +# Cared for by Shawn Hoon <shawnh@fugu-sg.org> +# +# Copyright Shawn Hoon +# +# You may distribute this module under the same terms as perl itself + +# POD documentation - main docs before the code + +=head1 NAME + +Bio::Matrix::PhylipDist - A Phylip Distance Matrix object + +=head1 SYNOPSIS + + use Bio::Matrix::PhylipDist; + + my $dist = Bio::Matrix::PhylipDist->new(-file=>"protdist.out",-program=>"ProtDist"); + #or + my $dist = Bio::Matrix::PhylipDist->new(-fh=>$FH,-program=>"ProtDist"); + + #get specific entries + my $distance_value = $dist->get_entry('ALPHA','BETA'); + my @columns = $dist->get_column('ALPHA'); + my @rows = $dist->get_row('BETA'); + my @diagonal = $dist->get_diagonal(); + + #print the matrix in phylip numerical format + print $dist->print_matrix; + +=head1 DESCRIPTION + +Simple object for holding Distance Matrices generated by the following Phylip programs: + +1) dnadist +2) protdist +3) restdist + +It currently handles parsing of the matrix without the data output option. + + 5 +Alpha 0.00000 4.23419 3.63330 6.20865 3.45431 +Beta 4.23419 0.00000 3.49289 3.36540 4.29179 +Gamma 3.63330 3.49289 0.00000 3.68733 5.84929 +Delta 6.20865 3.36540 3.68733 0.00000 4.43345 +Epsilon 3.45431 4.29179 5.84929 4.43345 0.00000 + +=head1 FEEDBACK + + +=head2 Mailing Lists + +User feedback is an integral part of the evolution of this and other +Bioperl modules. Send your comments and suggestions preferably to one +of the Bioperl mailing lists. Your participation is much appreciated. + + bioperl-l@bioperl.org - General discussion + http://bio.perl.org/MailList.html - About the mailing lists + +=head2 Reporting Bugs + +Report bugs to the Bioperl bug tracking system to help us keep track +the bugs and their resolution. Bug reports can be submitted via email +or the web: + + bioperl-bugs@bioperl.org + http://bugzilla.bioperl.org/ + +=head1 AUTHOR - Shawn Hoon + +Email shawnh@fugu-sg.org + + +=head1 APPENDIX + + +The rest of the documentation details each of the object +methods. Internal methods are usually preceded with a "_". + +=cut + +# Let the code begin... + +package Bio::Matrix::PhylipDist; +use strict; + +use vars qw(@ISA); + +use Bio::Root::Root; +use Bio::Root::IO; + +@ISA = qw(Bio::Root::Root Bio::Root::IO); + +=head2 new + + Title : new + Usage : my $family = Bio::Matrix::PhylipDist->new(-file=>"protdist.out", + -program=>"protdist"); + Function: Constructor for PhylipDist Object + Returns : L<Bio::Matrix::PhylipDist> + +=cut + +sub new { + my ($class,@args) = @_; + my $self = $class->SUPER::new(@args); + my ($matrix,$values, $names,$file, $fh,$program) = $self->_rearrange([qw(MATRIX VALUES NAMES FILE FH PROGRAM)],@args); + + ($matrix && $values && $names) || $file || $fh || $self->throw("Need a file or file handle!"); + + $program && $self->program($program); + $self->_initialize_io(@args); + + $self->_matrix($matrix) if $matrix; + $self->_values($values) if $values; + $self->names($names) if $names; + if(!$matrix && !$values && !$names){ + $self->_parse(); + } + + return $self; +} + +=head2 _parse + + Title : _parse + Usage : $matrix->_parse(); + Function: internal method that parses the distance matrix file. + Returns : + Arguments: + +=cut + +sub _parse { + my ($self) = @_; + my @names; + my @values; + while (my $entry = $self->_readline){ + next if ($entry=~/^\s+\d+$/); + my ($n,@line) = split( /\s+/,$entry); + push @names, $n; + push @values, [@line]; + } + + my %dist; + my $i=0; + foreach my $name(@names){ + my $j=0; + foreach my $n(@names) { + $dist{$name}{$n} = [$i,$j]; + $j++; + } + $i++; + } + + $self->_matrix(\%dist); + $self->names(\@names); + $self->_values(\@values); +} + +=head2 get_entry + + Title : get_entry + Usage : $matrix->get_entry(); + Function: returns a particular entry + Returns : a float + Arguments: string id1, string id2 + +=cut + +sub get_entry { + my ($self,$row,$column) = @_; + $row && $column || $self->throw("Need at least 2 ids"); + my %matrix = %{$self->_matrix}; + my @values = @{$self->_values}; + if(ref $matrix{$row}{$column}){ + my ($i,$j) = @{$matrix{$row}{$column}}; + return $values[$i][$j]; + } + return; + +} + +=head2 get_row + + Title : get_row + Usage : $matrix->get_row('ALPHA'); + Function: returns a particular row + Returns : an array of float + Arguments: string id1 + +=cut + +sub get_row { + my ($self,$row) = @_; + $row || $self->throw("Need at least a row id"); + + my %matrix = %{$self->_matrix}; + my @values = @{$self->_values}; + my @names = @{$self->names}; + $matrix{$row} || return; + my @row = %{$matrix{$row}}; + my $row_pointer = $row[1]->[0]; + my $index = scalar(@names)-1; + return @{$values[$row_pointer]}[0..$index]; +} + +=head2 get_column + + Title : get_column + Usage : $matrix->get_column('ALPHA'); + Function: returns a particular column + Returns : an array of floats + Arguments: string id1 + +=cut + +sub get_column { + my ($self,$column) = @_; + $column || $self->throw("Need at least a column id"); + + my %matrix = %{$self->_matrix}; + my @values = @{$self->_values}; + my @names = @{$self->names}; + $matrix{$column} || return; + my @column = %{$matrix{$column}}; + my $row_pointer = $column[1]->[0]; + my @return; + for(my $i=0; $i < scalar(@names); $i++){ + push @return, $values[$i][$row_pointer]; + } + return @return; +} + +=head2 get_diagonal + + Title : get_diagonal + Usage : $matrix->get_diagonal(); + Function: returns the diagonal of the matrix + Returns : an array of float + Arguments: string id1 + +=cut + +sub get_diagonal { + my ($self) = @_; + my %matrix = %{$self->_matrix}; + my @values = @{$self->_values}; + my @return; + foreach my $name (@{$self->names}){ + my ($i,$j) = @{$matrix{$name}{$name}}; + push @return,$values[$i][$j]; + } + return @return; +} + +=head2 print_matrix + + Title : print_matrix + Usage : $matrix->print_matrix(); + Function: returns a string of the matrix in phylip format + Returns : a string + Arguments: + +=cut + +sub print_matrix { + my ($self) = @_; + my @names = @{$self->names}; + my @values = @{$self->_values}; + my %matrix = %{$self->_matrix}; + my $str; + $str.= (" "x 4). scalar(@names)."\n"; + foreach my $name (@names){ + my $newname = $name. (" " x (15-length($name))); + $str.=$newname; + my $count = 0; + foreach my $n (@names){ + my ($i,$j) = @{$matrix{$name}{$n}}; + if($count < $#names){ + $str.= $values[$i][$j]. " "; + } + else { + $str.= $values[$i][$j]; + } + $count++; + } + $str.="\n"; + } + return $str; +} + +=head2 _matrix + + Title : _matrix + Usage : $matrix->_matrix(); + Function: get/set for hash reference of the pointers + to the value matrix + Returns : hash reference + Arguments: hash reference + +=cut + +sub _matrix { + my ($self,$val) = @_; + if($val){ + $self->{'_matrix'} = $val; + } + return $self->{'_matrix'}; +} + + +=head2 names + + Title : names + Usage : $matrix->names(); + Function: get/set for array ref of names of sequences + Returns : an array reference + Arguments: an array reference + +=cut + +sub names { + my ($self,$val) = @_; + if($val){ + $self->{"_names"} = $val; + } + return $self->{'_names'}; +} + +=head2 program + + Title : program + Usage : $matrix->program(); + Function: get/set for the program name generating this + matrix + Returns : string + Arguments: string + +=cut + +sub program { + my ($self,$val) = @_; + if($val){ + $self->{'_program'} = $val; + } + return $self->{'_program'}; +} + +=head2 _values + + Title : _values + Usage : $matrix->_values(); + Function: get/set for array ref of the matrix containing + distance values + Returns : an array reference + Arguments: an array reference + +=cut + +sub _values { + my ($self,$val) = @_; + if($val){ + $self->{'_values'} = $val; + } + return $self->{'_values'}; +} + +1; + + + + + + +