0
|
1 # BioPerl module for Bio::Matrix::PhylipDist
|
|
2 #
|
|
3 # Cared for by Shawn Hoon <shawnh@fugu-sg.org>
|
|
4 #
|
|
5 # Copyright Shawn Hoon
|
|
6 #
|
|
7 # You may distribute this module under the same terms as perl itself
|
|
8
|
|
9 # POD documentation - main docs before the code
|
|
10
|
|
11 =head1 NAME
|
|
12
|
|
13 Bio::Matrix::PhylipDist - A Phylip Distance Matrix object
|
|
14
|
|
15 =head1 SYNOPSIS
|
|
16
|
|
17 use Bio::Matrix::PhylipDist;
|
|
18
|
|
19 my $dist = Bio::Matrix::PhylipDist->new(-file=>"protdist.out",-program=>"ProtDist");
|
|
20 #or
|
|
21 my $dist = Bio::Matrix::PhylipDist->new(-fh=>$FH,-program=>"ProtDist");
|
|
22
|
|
23 #get specific entries
|
|
24 my $distance_value = $dist->get_entry('ALPHA','BETA');
|
|
25 my @columns = $dist->get_column('ALPHA');
|
|
26 my @rows = $dist->get_row('BETA');
|
|
27 my @diagonal = $dist->get_diagonal();
|
|
28
|
|
29 #print the matrix in phylip numerical format
|
|
30 print $dist->print_matrix;
|
|
31
|
|
32 =head1 DESCRIPTION
|
|
33
|
|
34 Simple object for holding Distance Matrices generated by the following Phylip programs:
|
|
35
|
|
36 1) dnadist
|
|
37 2) protdist
|
|
38 3) restdist
|
|
39
|
|
40 It currently handles parsing of the matrix without the data output option.
|
|
41
|
|
42 5
|
|
43 Alpha 0.00000 4.23419 3.63330 6.20865 3.45431
|
|
44 Beta 4.23419 0.00000 3.49289 3.36540 4.29179
|
|
45 Gamma 3.63330 3.49289 0.00000 3.68733 5.84929
|
|
46 Delta 6.20865 3.36540 3.68733 0.00000 4.43345
|
|
47 Epsilon 3.45431 4.29179 5.84929 4.43345 0.00000
|
|
48
|
|
49 =head1 FEEDBACK
|
|
50
|
|
51
|
|
52 =head2 Mailing Lists
|
|
53
|
|
54 User feedback is an integral part of the evolution of this and other
|
|
55 Bioperl modules. Send your comments and suggestions preferably to one
|
|
56 of the Bioperl mailing lists. Your participation is much appreciated.
|
|
57
|
|
58 bioperl-l@bioperl.org - General discussion
|
|
59 http://bio.perl.org/MailList.html - About the mailing lists
|
|
60
|
|
61 =head2 Reporting Bugs
|
|
62
|
|
63 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
64 the bugs and their resolution. Bug reports can be submitted via email
|
|
65 or the web:
|
|
66
|
|
67 bioperl-bugs@bioperl.org
|
|
68 http://bugzilla.bioperl.org/
|
|
69
|
|
70 =head1 AUTHOR - Shawn Hoon
|
|
71
|
|
72 Email shawnh@fugu-sg.org
|
|
73
|
|
74
|
|
75 =head1 APPENDIX
|
|
76
|
|
77
|
|
78 The rest of the documentation details each of the object
|
|
79 methods. Internal methods are usually preceded with a "_".
|
|
80
|
|
81 =cut
|
|
82
|
|
83 # Let the code begin...
|
|
84
|
|
85 package Bio::Matrix::PhylipDist;
|
|
86 use strict;
|
|
87
|
|
88 use vars qw(@ISA);
|
|
89
|
|
90 use Bio::Root::Root;
|
|
91 use Bio::Root::IO;
|
|
92
|
|
93 @ISA = qw(Bio::Root::Root Bio::Root::IO);
|
|
94
|
|
95 =head2 new
|
|
96
|
|
97 Title : new
|
|
98 Usage : my $family = Bio::Matrix::PhylipDist->new(-file=>"protdist.out",
|
|
99 -program=>"protdist");
|
|
100 Function: Constructor for PhylipDist Object
|
|
101 Returns : L<Bio::Matrix::PhylipDist>
|
|
102
|
|
103 =cut
|
|
104
|
|
105 sub new {
|
|
106 my ($class,@args) = @_;
|
|
107 my $self = $class->SUPER::new(@args);
|
|
108 my ($matrix,$values, $names,$file, $fh,$program) = $self->_rearrange([qw(MATRIX VALUES NAMES FILE FH PROGRAM)],@args);
|
|
109
|
|
110 ($matrix && $values && $names) || $file || $fh || $self->throw("Need a file or file handle!");
|
|
111
|
|
112 $program && $self->program($program);
|
|
113 $self->_initialize_io(@args);
|
|
114
|
|
115 $self->_matrix($matrix) if $matrix;
|
|
116 $self->_values($values) if $values;
|
|
117 $self->names($names) if $names;
|
|
118 if(!$matrix && !$values && !$names){
|
|
119 $self->_parse();
|
|
120 }
|
|
121
|
|
122 return $self;
|
|
123 }
|
|
124
|
|
125 =head2 _parse
|
|
126
|
|
127 Title : _parse
|
|
128 Usage : $matrix->_parse();
|
|
129 Function: internal method that parses the distance matrix file.
|
|
130 Returns :
|
|
131 Arguments:
|
|
132
|
|
133 =cut
|
|
134
|
|
135 sub _parse {
|
|
136 my ($self) = @_;
|
|
137 my @names;
|
|
138 my @values;
|
|
139 while (my $entry = $self->_readline){
|
|
140 next if ($entry=~/^\s+\d+$/);
|
|
141 my ($n,@line) = split( /\s+/,$entry);
|
|
142 push @names, $n;
|
|
143 push @values, [@line];
|
|
144 }
|
|
145
|
|
146 my %dist;
|
|
147 my $i=0;
|
|
148 foreach my $name(@names){
|
|
149 my $j=0;
|
|
150 foreach my $n(@names) {
|
|
151 $dist{$name}{$n} = [$i,$j];
|
|
152 $j++;
|
|
153 }
|
|
154 $i++;
|
|
155 }
|
|
156
|
|
157 $self->_matrix(\%dist);
|
|
158 $self->names(\@names);
|
|
159 $self->_values(\@values);
|
|
160 }
|
|
161
|
|
162 =head2 get_entry
|
|
163
|
|
164 Title : get_entry
|
|
165 Usage : $matrix->get_entry();
|
|
166 Function: returns a particular entry
|
|
167 Returns : a float
|
|
168 Arguments: string id1, string id2
|
|
169
|
|
170 =cut
|
|
171
|
|
172 sub get_entry {
|
|
173 my ($self,$row,$column) = @_;
|
|
174 $row && $column || $self->throw("Need at least 2 ids");
|
|
175 my %matrix = %{$self->_matrix};
|
|
176 my @values = @{$self->_values};
|
|
177 if(ref $matrix{$row}{$column}){
|
|
178 my ($i,$j) = @{$matrix{$row}{$column}};
|
|
179 return $values[$i][$j];
|
|
180 }
|
|
181 return;
|
|
182
|
|
183 }
|
|
184
|
|
185 =head2 get_row
|
|
186
|
|
187 Title : get_row
|
|
188 Usage : $matrix->get_row('ALPHA');
|
|
189 Function: returns a particular row
|
|
190 Returns : an array of float
|
|
191 Arguments: string id1
|
|
192
|
|
193 =cut
|
|
194
|
|
195 sub get_row {
|
|
196 my ($self,$row) = @_;
|
|
197 $row || $self->throw("Need at least a row id");
|
|
198
|
|
199 my %matrix = %{$self->_matrix};
|
|
200 my @values = @{$self->_values};
|
|
201 my @names = @{$self->names};
|
|
202 $matrix{$row} || return;
|
|
203 my @row = %{$matrix{$row}};
|
|
204 my $row_pointer = $row[1]->[0];
|
|
205 my $index = scalar(@names)-1;
|
|
206 return @{$values[$row_pointer]}[0..$index];
|
|
207 }
|
|
208
|
|
209 =head2 get_column
|
|
210
|
|
211 Title : get_column
|
|
212 Usage : $matrix->get_column('ALPHA');
|
|
213 Function: returns a particular column
|
|
214 Returns : an array of floats
|
|
215 Arguments: string id1
|
|
216
|
|
217 =cut
|
|
218
|
|
219 sub get_column {
|
|
220 my ($self,$column) = @_;
|
|
221 $column || $self->throw("Need at least a column id");
|
|
222
|
|
223 my %matrix = %{$self->_matrix};
|
|
224 my @values = @{$self->_values};
|
|
225 my @names = @{$self->names};
|
|
226 $matrix{$column} || return;
|
|
227 my @column = %{$matrix{$column}};
|
|
228 my $row_pointer = $column[1]->[0];
|
|
229 my @return;
|
|
230 for(my $i=0; $i < scalar(@names); $i++){
|
|
231 push @return, $values[$i][$row_pointer];
|
|
232 }
|
|
233 return @return;
|
|
234 }
|
|
235
|
|
236 =head2 get_diagonal
|
|
237
|
|
238 Title : get_diagonal
|
|
239 Usage : $matrix->get_diagonal();
|
|
240 Function: returns the diagonal of the matrix
|
|
241 Returns : an array of float
|
|
242 Arguments: string id1
|
|
243
|
|
244 =cut
|
|
245
|
|
246 sub get_diagonal {
|
|
247 my ($self) = @_;
|
|
248 my %matrix = %{$self->_matrix};
|
|
249 my @values = @{$self->_values};
|
|
250 my @return;
|
|
251 foreach my $name (@{$self->names}){
|
|
252 my ($i,$j) = @{$matrix{$name}{$name}};
|
|
253 push @return,$values[$i][$j];
|
|
254 }
|
|
255 return @return;
|
|
256 }
|
|
257
|
|
258 =head2 print_matrix
|
|
259
|
|
260 Title : print_matrix
|
|
261 Usage : $matrix->print_matrix();
|
|
262 Function: returns a string of the matrix in phylip format
|
|
263 Returns : a string
|
|
264 Arguments:
|
|
265
|
|
266 =cut
|
|
267
|
|
268 sub print_matrix {
|
|
269 my ($self) = @_;
|
|
270 my @names = @{$self->names};
|
|
271 my @values = @{$self->_values};
|
|
272 my %matrix = %{$self->_matrix};
|
|
273 my $str;
|
|
274 $str.= (" "x 4). scalar(@names)."\n";
|
|
275 foreach my $name (@names){
|
|
276 my $newname = $name. (" " x (15-length($name)));
|
|
277 $str.=$newname;
|
|
278 my $count = 0;
|
|
279 foreach my $n (@names){
|
|
280 my ($i,$j) = @{$matrix{$name}{$n}};
|
|
281 if($count < $#names){
|
|
282 $str.= $values[$i][$j]. " ";
|
|
283 }
|
|
284 else {
|
|
285 $str.= $values[$i][$j];
|
|
286 }
|
|
287 $count++;
|
|
288 }
|
|
289 $str.="\n";
|
|
290 }
|
|
291 return $str;
|
|
292 }
|
|
293
|
|
294 =head2 _matrix
|
|
295
|
|
296 Title : _matrix
|
|
297 Usage : $matrix->_matrix();
|
|
298 Function: get/set for hash reference of the pointers
|
|
299 to the value matrix
|
|
300 Returns : hash reference
|
|
301 Arguments: hash reference
|
|
302
|
|
303 =cut
|
|
304
|
|
305 sub _matrix {
|
|
306 my ($self,$val) = @_;
|
|
307 if($val){
|
|
308 $self->{'_matrix'} = $val;
|
|
309 }
|
|
310 return $self->{'_matrix'};
|
|
311 }
|
|
312
|
|
313
|
|
314 =head2 names
|
|
315
|
|
316 Title : names
|
|
317 Usage : $matrix->names();
|
|
318 Function: get/set for array ref of names of sequences
|
|
319 Returns : an array reference
|
|
320 Arguments: an array reference
|
|
321
|
|
322 =cut
|
|
323
|
|
324 sub names {
|
|
325 my ($self,$val) = @_;
|
|
326 if($val){
|
|
327 $self->{"_names"} = $val;
|
|
328 }
|
|
329 return $self->{'_names'};
|
|
330 }
|
|
331
|
|
332 =head2 program
|
|
333
|
|
334 Title : program
|
|
335 Usage : $matrix->program();
|
|
336 Function: get/set for the program name generating this
|
|
337 matrix
|
|
338 Returns : string
|
|
339 Arguments: string
|
|
340
|
|
341 =cut
|
|
342
|
|
343 sub program {
|
|
344 my ($self,$val) = @_;
|
|
345 if($val){
|
|
346 $self->{'_program'} = $val;
|
|
347 }
|
|
348 return $self->{'_program'};
|
|
349 }
|
|
350
|
|
351 =head2 _values
|
|
352
|
|
353 Title : _values
|
|
354 Usage : $matrix->_values();
|
|
355 Function: get/set for array ref of the matrix containing
|
|
356 distance values
|
|
357 Returns : an array reference
|
|
358 Arguments: an array reference
|
|
359
|
|
360 =cut
|
|
361
|
|
362 sub _values {
|
|
363 my ($self,$val) = @_;
|
|
364 if($val){
|
|
365 $self->{'_values'} = $val;
|
|
366 }
|
|
367 return $self->{'_values'};
|
|
368 }
|
|
369
|
|
370 1;
|
|
371
|
|
372
|
|
373
|
|
374
|
|
375
|
|
376
|
|
377
|