0
|
1 # $Id: ResultTableWriter.pm,v 1.13 2002/12/05 13:46:35 heikki Exp $
|
|
2
|
|
3 =head1 NAME
|
|
4
|
|
5 Bio::SearchIO::Writer::ResultTableWriter - Outputs tab-delimited data for each Bio::Search::Result::ResultI object.
|
|
6
|
|
7 =head1 SYNOPSIS
|
|
8
|
|
9 =head2 Example 1: Using the default columns
|
|
10
|
|
11 use Bio::SearchIO;
|
|
12 use Bio::SearchIO::Writer::ResultTableWriter;
|
|
13
|
|
14 my $in = Bio::SearchIO->new();
|
|
15
|
|
16 my $writer = Bio::SearchIO::Writer::ResultTableWriter->new();
|
|
17
|
|
18 my $out = Bio::SearchIO->new( -writer => $writer );
|
|
19
|
|
20 while ( my $result = $in->next_result() ) {
|
|
21 $out->write_result($result, ($in->report_count - 1 ? 0 : 1) );
|
|
22 }
|
|
23
|
|
24 =head2 Example 2: Specifying a subset of columns
|
|
25
|
|
26 use Bio::SearchIO;
|
|
27 use Bio::SearchIO::Writer::ResultTableWriter;
|
|
28
|
|
29 my $in = Bio::SearchIO->new();
|
|
30
|
|
31 my $writer = Bio::SearchIO::Writer::ResultTableWriter->new(
|
|
32 -columns => [qw(
|
|
33 query_name
|
|
34 query_length
|
|
35 )] );
|
|
36
|
|
37 my $out = Bio::SearchIO->new( -writer => $writer,
|
|
38 -file => ">result.out" );
|
|
39
|
|
40 while ( my $result = $in->next_result() ) {
|
|
41 $out->write_result($result, ($in->report_count - 1 ? 0 : 1) );
|
|
42 }
|
|
43
|
|
44 =head2 Custom Labels
|
|
45
|
|
46 You can also specify different column labels if you don't want to use
|
|
47 the defaults. Do this by specifying a C<-labels> hash reference
|
|
48 parameter when creating the ResultTableWriter object. The keys of the
|
|
49 hash should be the column number (left-most column = 1) for the label(s)
|
|
50 you want to specify. Here's an example:
|
|
51
|
|
52 my $writer = Bio::SearchIO::Writer::ResultTableWriter->new(
|
|
53 -columns => [qw( query_name
|
|
54 query_length
|
|
55 query_description )],
|
|
56 -labels => { 1 => 'QUERY_GI',
|
|
57 2 => 'QUERY_LENGTH' } );
|
|
58
|
|
59
|
|
60 =head1 DESCRIPTION
|
|
61
|
|
62 Bio::SearchIO::Writer::ResultTableWriter outputs data in tab-delimited
|
|
63 format for each search result, one row per search result. This is a very
|
|
64 coarse-grain level of information since it only includes data
|
|
65 stored in the Bio::Search::Result::ResultI object itself and does not
|
|
66 include any information about hits or HSPs.
|
|
67
|
|
68 You most likely will never use this object but instead will use one of
|
|
69 its subclasses: Bio::SearchIO::Writer::HitTableWriter or
|
|
70 Bio::SearchIO::Writer::HSPTableWriter.
|
|
71
|
|
72 =head2 Available Columns
|
|
73
|
|
74 Here are the columns that can be specified in the C<-columns>
|
|
75 parameter when creating a ResultTableWriter object. If a C<-columns> parameter
|
|
76 is not specified, this list, in this order, will be used as the default.
|
|
77
|
|
78 query_name
|
|
79 query_length
|
|
80 query_description
|
|
81
|
|
82 For more details about these columns, see the documentation for the
|
|
83 corresponding method in L<Bio::Search::Result::ResultI|Bio::Search::Result::ResultI>.
|
|
84
|
|
85 =head1 FEEDBACK
|
|
86
|
|
87 =head2 Mailing Lists
|
|
88
|
|
89 User feedback is an integral part of the evolution of this and other
|
|
90 Bioperl modules. Send your comments and suggestions preferably to one
|
|
91 of the Bioperl mailing lists. Your participation is much appreciated.
|
|
92
|
|
93 bioperl-l@bioperl.org - General discussion
|
|
94 http://bio.perl.org/MailList.html - About the mailing lists
|
|
95
|
|
96 =head2 Reporting Bugs
|
|
97
|
|
98 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
99 the bugs and their resolution. Bug reports can be submitted via email
|
|
100 or the web:
|
|
101
|
|
102 bioperl-bugs@bio.perl.org
|
|
103 http://bugzilla.bioperl.org/
|
|
104
|
|
105 =head1 AUTHOR
|
|
106
|
|
107 Steve Chervitz E<lt>sac@bioperl.orgE<gt>
|
|
108
|
|
109 See L<the FEEDBACK section | FEEDBACK> for where to send bug reports
|
|
110 and comments.
|
|
111
|
|
112 =head1 COPYRIGHT
|
|
113
|
|
114 Copyright (c) 2001 Steve Chervitz. All Rights Reserved.
|
|
115
|
|
116 This library is free software; you can redistribute it and/or modify
|
|
117 it under the same terms as Perl itself.
|
|
118
|
|
119 =head1 DISCLAIMER
|
|
120
|
|
121 This software is provided "as is" without warranty of any kind.
|
|
122
|
|
123 =head1 SEE ALSO
|
|
124
|
|
125 L<Bio::SearchIO::Writer::HitTableWriter>,
|
|
126 L<Bio::SearchIO::Writer::HSPTableWriter>
|
|
127
|
|
128 =head1 METHODS
|
|
129
|
|
130 =cut
|
|
131
|
|
132
|
|
133 package Bio::SearchIO::Writer::ResultTableWriter;
|
|
134
|
|
135 use strict;
|
|
136 use Bio::Root::Root;
|
|
137 use Bio::SearchIO::SearchWriterI;
|
|
138
|
|
139 use vars qw( @ISA );
|
|
140 @ISA = qw( Bio::Root::Root Bio::SearchIO::SearchWriterI );
|
|
141
|
|
142 # Array fields: column, object, method[/argument], printf format, column label
|
|
143 # Methods are defined in Bio::Search::Result::ResultI.
|
|
144 # Tech note: If a bogus method is supplied, it will result in all values to be zero.
|
|
145 # Don't know why this is.
|
|
146 my %column_map = (
|
|
147 'query_name' => ['1', 'result', 'query_name', 's', 'QUERY' ],
|
|
148 'query_length' => ['2', 'result', 'query_length', 'd', 'LEN_Q'],
|
|
149 'query_description' => ['3', 'result', 'query_description', 's', 'DESC_Q'],
|
|
150 );
|
|
151
|
|
152 sub column_map { return %column_map }
|
|
153
|
|
154 sub new {
|
|
155 my ($class, @args) = @_;
|
|
156 my $self = $class->SUPER::new(@args);
|
|
157
|
|
158 my( $col_spec, $label_spec,
|
|
159 $filters ) = $self->_rearrange( [qw(COLUMNS
|
|
160 LABELS
|
|
161 FILTERS)], @args);
|
|
162
|
|
163 $self->_set_cols( $col_spec );
|
|
164 $self->_set_labels( $label_spec ) if $label_spec;
|
|
165 $self->_set_printf_fmt();
|
|
166 $self->_set_row_data_func();
|
|
167 $self->_set_column_labels();
|
|
168
|
|
169 if( defined $filters ) {
|
|
170 if( !ref($filters) =~ /HASH/i ) {
|
|
171 $self->warn("Did not provide a hashref for the FILTERS option, ignoring.");
|
|
172 } else {
|
|
173 while( my ($type,$code) = each %{$filters} ) {
|
|
174 $self->filter($type,$code);
|
|
175 }
|
|
176 }
|
|
177 }
|
|
178
|
|
179
|
|
180 return $self;
|
|
181 }
|
|
182
|
|
183
|
|
184 # Purpose : Stores the column spec internally. Also performs QC on the
|
|
185 # user-supplied column specification.
|
|
186 #
|
|
187 sub _set_cols {
|
|
188 my ($self, $col_spec_ref) = @_;
|
|
189 return if defined $self->{'_cols'}; # only set columns once
|
|
190
|
|
191 my %map = $self->column_map;
|
|
192
|
|
193 if( not defined $col_spec_ref) {
|
|
194 print STDERR "\nUsing default column map.\n";
|
|
195 $col_spec_ref = [ map { $_ } sort { $map{$a}->[0] <=> $map{$b}->[0] } keys %map ];
|
|
196 }
|
|
197
|
|
198 if( ref($col_spec_ref) eq 'ARRAY') {
|
|
199 # printf "%d columns to process\n", scalar(@$col_spec_ref);
|
|
200 my @col_spec = @{$col_spec_ref};
|
|
201 while( my $item = lc(shift @col_spec) ) {
|
|
202 if( not defined ($map{$item}) ) {
|
|
203 $self->throw(-class =>'Bio::Root::BadParameter',
|
|
204 -text => "Unknown column name: $item"
|
|
205 );
|
|
206 }
|
|
207 push @{$self->{'_cols'}}, $item;
|
|
208 #print "pushing on to col $col_num, $inner: $item\n";
|
|
209 }
|
|
210 }
|
|
211 else {
|
|
212 $self->throw(-class =>'Bio::Root::BadParameter',
|
|
213 -text => "Can't set columns: not a ARRAY ref",
|
|
214 -value => $col_spec_ref
|
|
215 );
|
|
216 }
|
|
217 }
|
|
218
|
|
219 sub _set_printf_fmt {
|
|
220 my ($self) = @_;
|
|
221
|
|
222 my @cols = $self->columns();
|
|
223 my %map = $self->column_map;
|
|
224
|
|
225 my $printf_fmt = '';
|
|
226
|
|
227 foreach my $col ( @cols ) {
|
|
228 $printf_fmt .= "\%$map{$col}->[3]\t";
|
|
229 }
|
|
230
|
|
231 $printf_fmt =~ s/\\t$//;
|
|
232
|
|
233 $self->{'_printf_fmt'} = $printf_fmt;
|
|
234 }
|
|
235
|
|
236 sub printf_fmt { shift->{'_printf_fmt'} }
|
|
237
|
|
238 # Sets the data to be used for the labels.
|
|
239 sub _set_labels {
|
|
240 my ($self, $label_spec) = @_;
|
|
241 if( ref($label_spec) eq 'HASH') {
|
|
242 foreach my $col ( sort { $a <=> $b } keys %$label_spec ) {
|
|
243 # print "LABEL: $col $label_spec->{$col}\n";
|
|
244 $self->{'_custom_labels'}->{$col} = $label_spec->{$col};
|
|
245 }
|
|
246 }
|
|
247 else {
|
|
248 $self->throw(-class =>'Bio::Root::BadParameter',
|
|
249 -text => "Can't set labels: not a HASH ref: $label_spec"
|
|
250 );
|
|
251 }
|
|
252 }
|
|
253
|
|
254 sub _set_column_labels {
|
|
255 my $self = shift;
|
|
256
|
|
257 my @cols = $self->columns;
|
|
258 my %map = $self->column_map;
|
|
259 my $printf_fmt = '';
|
|
260 my (@data, $label, @underbars);
|
|
261
|
|
262 my $i = 0;
|
|
263 foreach my $col( @cols ) {
|
|
264 $i++;
|
|
265 $printf_fmt .= "\%s\t";
|
|
266
|
|
267 if(defined $self->{'_custom_labels'}->{$i}) {
|
|
268 $label = $self->{'_custom_labels'}->{$i};
|
|
269 }
|
|
270 else {
|
|
271 $label = $map{$col}->[4];
|
|
272 }
|
|
273 push @data, $label;
|
|
274 push @underbars, '-' x length($label);
|
|
275
|
|
276 }
|
|
277 $printf_fmt =~ s/\\t$//;
|
|
278
|
|
279 my $str = sprintf "$printf_fmt\n", @data;
|
|
280
|
|
281 $str =~ s/\t\n/\n/;
|
|
282 $str .= sprintf "$printf_fmt\n", @underbars;
|
|
283
|
|
284 $str =~ s/\t\n/\n/gs;
|
|
285 $self->{'_column_labels'} = $str;
|
|
286 }
|
|
287
|
|
288 # Purpose : Generate a function that will call the appropriate
|
|
289 # methods on the result, hit, and hsp objects to retrieve the column data
|
|
290 # specified in the column spec.
|
|
291 #
|
|
292 # We should only have to go through the column spec once
|
|
293 # for a given ResultTableWriter. To permit this, we'll generate code
|
|
294 # for a method that returns an array of the data for a row of output
|
|
295 # given a result, hit, and hsp object as arguments.
|
|
296 #
|
|
297 sub _set_row_data_func {
|
|
298 my $self = shift;
|
|
299
|
|
300 # Now we need to generate a string that can be eval'd to get the data.
|
|
301 my @cols = $self->columns();
|
|
302 my %map = $self->column_map;
|
|
303 my @data;
|
|
304 while( my $col = shift @cols ) {
|
|
305 my $object = $map{$col}->[1];
|
|
306 my $method = $map{$col}->[2];
|
|
307 my $arg = '';
|
|
308 if( $method =~ m!(\w+)/(\w+)! ) {
|
|
309 $method = $1;
|
|
310 $arg = "\"$2\"";
|
|
311 }
|
|
312 push @data, "\$$object->$method($arg)";
|
|
313 }
|
|
314 my $code = join( ",", @data);
|
|
315
|
|
316 if( $self->verbose > 0 ) {
|
|
317 ## Begin Debugging
|
|
318 $self->debug( "Data to print:\n");
|
|
319 foreach( 0..$#data) { $self->debug( " [". ($_+ 1) . "] $data[$_]\n");}
|
|
320 $self->debug( "CODE:\n$code\n");
|
|
321 $self->debug("Printf format: ". $self->printf_fmt. "\n");
|
|
322 ## End Debugging
|
|
323 }
|
|
324
|
|
325 my $func = sub {
|
|
326 my ($result, $hit, $hsp) = @_;
|
|
327 my @r = eval $code;
|
|
328 # This should reduce the occurrence of those opaque "all zeros" bugs.
|
|
329 if( $@ ) { $self->throw("Trouble in ResultTableWriter::_set_row_data_func() eval: $@\n\n");
|
|
330 }
|
|
331 return @r;
|
|
332 };
|
|
333 $self->{'_row_data_func'} = $func;
|
|
334 }
|
|
335
|
|
336 sub row_data_func { shift->{'_row_data_func'} }
|
|
337
|
|
338
|
|
339 =head2 to_string()
|
|
340
|
|
341 Note: this method is not intended for direct use. The
|
|
342 SearchIO::write_result() method calls it automatically if the writer
|
|
343 is hooked up to a SearchIO object as illustrated in L<the SYNOPSIS section | SYNOPSIS>.
|
|
344
|
|
345 Title : to_string()
|
|
346 :
|
|
347 Usage : print $writer->to_string( $result_obj, [$include_labels] );
|
|
348 :
|
|
349 Argument : $result_obj = A Bio::Search::Result::ResultI object
|
|
350 : $include_labels = boolean, if true column labels are included (default: false)
|
|
351 :
|
|
352 Returns : String containing tab-delimited set of data for each hit
|
|
353 : in a ResultI object. Some data is summed across multiple HSPs.
|
|
354 :
|
|
355 Throws : n/a
|
|
356
|
|
357 =cut
|
|
358
|
|
359 #----------------
|
|
360 sub to_string {
|
|
361 #----------------
|
|
362 my ($self, $result, $include_labels) = @_;
|
|
363
|
|
364 my $str = $include_labels ? $self->column_labels() : '';
|
|
365 my $resultfilter = $self->filter('RESULT');
|
|
366 if( ! defined $resultfilter ||
|
|
367 &{$resultfilter}($result) ) {
|
|
368 my @row_data = &{$self->{'_row_data_func'}}( $result );
|
|
369 $str .= sprintf "$self->{'_printf_fmt'}\n", @row_data;
|
|
370 $str =~ s/\t\n/\n/gs;
|
|
371 }
|
|
372 return $str;
|
|
373 }
|
|
374
|
|
375
|
|
376
|
|
377 sub columns {
|
|
378 my $self = shift;
|
|
379 my @cols;
|
|
380 if( ref $self->{'_cols'} ) {
|
|
381 @cols = @{$self->{'_cols'}};
|
|
382 }
|
|
383 else {
|
|
384 my %map = $self->column_map;
|
|
385 @cols = sort { $map{$a}->[0] <=> $map{$b}->[0] } keys %map;
|
|
386 }
|
|
387 return @cols;
|
|
388 }
|
|
389
|
|
390
|
|
391 =head2 column_labels
|
|
392
|
|
393 Usage : print $result_obj->column_labels();
|
|
394 Purpose : Get column labels for to_string().
|
|
395 Returns : String containing column labels. Tab-delimited.
|
|
396 Argument : n/a
|
|
397 Throws : n/a
|
|
398
|
|
399 =cut
|
|
400
|
|
401 sub column_labels { shift->{'_column_labels'} }
|
|
402
|
|
403 =head2 end_report
|
|
404
|
|
405 Title : end_report
|
|
406 Usage : $self->end_report()
|
|
407 Function: The method to call when ending a report, this is
|
|
408 mostly for cleanup for formats which require you to
|
|
409 have something at the end of the document. Nothing for
|
|
410 a text message.
|
|
411 Returns : string
|
|
412 Args : none
|
|
413
|
|
414 =cut
|
|
415
|
|
416 sub end_report {
|
|
417 return '';
|
|
418 }
|
|
419
|
|
420 =head2 filter
|
|
421
|
|
422 Title : filter
|
|
423 Usage : $writer->filter('hsp', \&hsp_filter);
|
|
424 Function: Filter out either at HSP,Hit,or Result level
|
|
425 Returns : none
|
|
426 Args : string => data type,
|
|
427 CODE reference
|
|
428
|
|
429
|
|
430 =cut
|
|
431
|
|
432
|
|
433 # Is this really needed?
|
|
434 #=head2 signif_format
|
|
435 #
|
|
436 # Usage : $writer->signif_format( [FMT] );
|
|
437 # Purpose : Allows retrieval of the P/Expect exponent values only
|
|
438 # : or as a two-element list (mantissa, exponent).
|
|
439 # Usage : $writer->signif_format('exp');
|
|
440 # : $writer->signif_format('parts');
|
|
441 # Returns : String or '' if not set.
|
|
442 # Argument : String, FMT = 'exp' (return the exponent only)
|
|
443 # : = 'parts'(return exponent + mantissa in 2-elem list)
|
|
444 # : = undefined (return the raw value)
|
|
445 # Comments : P/Expect values are still stored internally as the full,
|
|
446 # : scientific notation value.
|
|
447 #
|
|
448 #=cut
|
|
449 #
|
|
450 ##-------------
|
|
451 #sub signif_format {
|
|
452 ##-------------
|
|
453 # my $self = shift;
|
|
454 # if(@_) { $self->{'_signif_format'} = shift; }
|
|
455 # return $self->{'_signif_format'};
|
|
456 #}
|
|
457
|
|
458 1;
|