Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/SearchIO/Writer/ResultTableWriter.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $Id: ResultTableWriter.pm,v 1.13 2002/12/05 13:46:35 heikki Exp $ | |
2 | |
3 =head1 NAME | |
4 | |
5 Bio::SearchIO::Writer::ResultTableWriter - Outputs tab-delimited data for each Bio::Search::Result::ResultI object. | |
6 | |
7 =head1 SYNOPSIS | |
8 | |
9 =head2 Example 1: Using the default columns | |
10 | |
11 use Bio::SearchIO; | |
12 use Bio::SearchIO::Writer::ResultTableWriter; | |
13 | |
14 my $in = Bio::SearchIO->new(); | |
15 | |
16 my $writer = Bio::SearchIO::Writer::ResultTableWriter->new(); | |
17 | |
18 my $out = Bio::SearchIO->new( -writer => $writer ); | |
19 | |
20 while ( my $result = $in->next_result() ) { | |
21 $out->write_result($result, ($in->report_count - 1 ? 0 : 1) ); | |
22 } | |
23 | |
24 =head2 Example 2: Specifying a subset of columns | |
25 | |
26 use Bio::SearchIO; | |
27 use Bio::SearchIO::Writer::ResultTableWriter; | |
28 | |
29 my $in = Bio::SearchIO->new(); | |
30 | |
31 my $writer = Bio::SearchIO::Writer::ResultTableWriter->new( | |
32 -columns => [qw( | |
33 query_name | |
34 query_length | |
35 )] ); | |
36 | |
37 my $out = Bio::SearchIO->new( -writer => $writer, | |
38 -file => ">result.out" ); | |
39 | |
40 while ( my $result = $in->next_result() ) { | |
41 $out->write_result($result, ($in->report_count - 1 ? 0 : 1) ); | |
42 } | |
43 | |
44 =head2 Custom Labels | |
45 | |
46 You can also specify different column labels if you don't want to use | |
47 the defaults. Do this by specifying a C<-labels> hash reference | |
48 parameter when creating the ResultTableWriter object. The keys of the | |
49 hash should be the column number (left-most column = 1) for the label(s) | |
50 you want to specify. Here's an example: | |
51 | |
52 my $writer = Bio::SearchIO::Writer::ResultTableWriter->new( | |
53 -columns => [qw( query_name | |
54 query_length | |
55 query_description )], | |
56 -labels => { 1 => 'QUERY_GI', | |
57 2 => 'QUERY_LENGTH' } ); | |
58 | |
59 | |
60 =head1 DESCRIPTION | |
61 | |
62 Bio::SearchIO::Writer::ResultTableWriter outputs data in tab-delimited | |
63 format for each search result, one row per search result. This is a very | |
64 coarse-grain level of information since it only includes data | |
65 stored in the Bio::Search::Result::ResultI object itself and does not | |
66 include any information about hits or HSPs. | |
67 | |
68 You most likely will never use this object but instead will use one of | |
69 its subclasses: Bio::SearchIO::Writer::HitTableWriter or | |
70 Bio::SearchIO::Writer::HSPTableWriter. | |
71 | |
72 =head2 Available Columns | |
73 | |
74 Here are the columns that can be specified in the C<-columns> | |
75 parameter when creating a ResultTableWriter object. If a C<-columns> parameter | |
76 is not specified, this list, in this order, will be used as the default. | |
77 | |
78 query_name | |
79 query_length | |
80 query_description | |
81 | |
82 For more details about these columns, see the documentation for the | |
83 corresponding method in L<Bio::Search::Result::ResultI|Bio::Search::Result::ResultI>. | |
84 | |
85 =head1 FEEDBACK | |
86 | |
87 =head2 Mailing Lists | |
88 | |
89 User feedback is an integral part of the evolution of this and other | |
90 Bioperl modules. Send your comments and suggestions preferably to one | |
91 of the Bioperl mailing lists. Your participation is much appreciated. | |
92 | |
93 bioperl-l@bioperl.org - General discussion | |
94 http://bio.perl.org/MailList.html - About the mailing lists | |
95 | |
96 =head2 Reporting Bugs | |
97 | |
98 Report bugs to the Bioperl bug tracking system to help us keep track | |
99 the bugs and their resolution. Bug reports can be submitted via email | |
100 or the web: | |
101 | |
102 bioperl-bugs@bio.perl.org | |
103 http://bugzilla.bioperl.org/ | |
104 | |
105 =head1 AUTHOR | |
106 | |
107 Steve Chervitz E<lt>sac@bioperl.orgE<gt> | |
108 | |
109 See L<the FEEDBACK section | FEEDBACK> for where to send bug reports | |
110 and comments. | |
111 | |
112 =head1 COPYRIGHT | |
113 | |
114 Copyright (c) 2001 Steve Chervitz. All Rights Reserved. | |
115 | |
116 This library is free software; you can redistribute it and/or modify | |
117 it under the same terms as Perl itself. | |
118 | |
119 =head1 DISCLAIMER | |
120 | |
121 This software is provided "as is" without warranty of any kind. | |
122 | |
123 =head1 SEE ALSO | |
124 | |
125 L<Bio::SearchIO::Writer::HitTableWriter>, | |
126 L<Bio::SearchIO::Writer::HSPTableWriter> | |
127 | |
128 =head1 METHODS | |
129 | |
130 =cut | |
131 | |
132 | |
133 package Bio::SearchIO::Writer::ResultTableWriter; | |
134 | |
135 use strict; | |
136 use Bio::Root::Root; | |
137 use Bio::SearchIO::SearchWriterI; | |
138 | |
139 use vars qw( @ISA ); | |
140 @ISA = qw( Bio::Root::Root Bio::SearchIO::SearchWriterI ); | |
141 | |
142 # Array fields: column, object, method[/argument], printf format, column label | |
143 # Methods are defined in Bio::Search::Result::ResultI. | |
144 # Tech note: If a bogus method is supplied, it will result in all values to be zero. | |
145 # Don't know why this is. | |
146 my %column_map = ( | |
147 'query_name' => ['1', 'result', 'query_name', 's', 'QUERY' ], | |
148 'query_length' => ['2', 'result', 'query_length', 'd', 'LEN_Q'], | |
149 'query_description' => ['3', 'result', 'query_description', 's', 'DESC_Q'], | |
150 ); | |
151 | |
152 sub column_map { return %column_map } | |
153 | |
154 sub new { | |
155 my ($class, @args) = @_; | |
156 my $self = $class->SUPER::new(@args); | |
157 | |
158 my( $col_spec, $label_spec, | |
159 $filters ) = $self->_rearrange( [qw(COLUMNS | |
160 LABELS | |
161 FILTERS)], @args); | |
162 | |
163 $self->_set_cols( $col_spec ); | |
164 $self->_set_labels( $label_spec ) if $label_spec; | |
165 $self->_set_printf_fmt(); | |
166 $self->_set_row_data_func(); | |
167 $self->_set_column_labels(); | |
168 | |
169 if( defined $filters ) { | |
170 if( !ref($filters) =~ /HASH/i ) { | |
171 $self->warn("Did not provide a hashref for the FILTERS option, ignoring."); | |
172 } else { | |
173 while( my ($type,$code) = each %{$filters} ) { | |
174 $self->filter($type,$code); | |
175 } | |
176 } | |
177 } | |
178 | |
179 | |
180 return $self; | |
181 } | |
182 | |
183 | |
184 # Purpose : Stores the column spec internally. Also performs QC on the | |
185 # user-supplied column specification. | |
186 # | |
187 sub _set_cols { | |
188 my ($self, $col_spec_ref) = @_; | |
189 return if defined $self->{'_cols'}; # only set columns once | |
190 | |
191 my %map = $self->column_map; | |
192 | |
193 if( not defined $col_spec_ref) { | |
194 print STDERR "\nUsing default column map.\n"; | |
195 $col_spec_ref = [ map { $_ } sort { $map{$a}->[0] <=> $map{$b}->[0] } keys %map ]; | |
196 } | |
197 | |
198 if( ref($col_spec_ref) eq 'ARRAY') { | |
199 # printf "%d columns to process\n", scalar(@$col_spec_ref); | |
200 my @col_spec = @{$col_spec_ref}; | |
201 while( my $item = lc(shift @col_spec) ) { | |
202 if( not defined ($map{$item}) ) { | |
203 $self->throw(-class =>'Bio::Root::BadParameter', | |
204 -text => "Unknown column name: $item" | |
205 ); | |
206 } | |
207 push @{$self->{'_cols'}}, $item; | |
208 #print "pushing on to col $col_num, $inner: $item\n"; | |
209 } | |
210 } | |
211 else { | |
212 $self->throw(-class =>'Bio::Root::BadParameter', | |
213 -text => "Can't set columns: not a ARRAY ref", | |
214 -value => $col_spec_ref | |
215 ); | |
216 } | |
217 } | |
218 | |
219 sub _set_printf_fmt { | |
220 my ($self) = @_; | |
221 | |
222 my @cols = $self->columns(); | |
223 my %map = $self->column_map; | |
224 | |
225 my $printf_fmt = ''; | |
226 | |
227 foreach my $col ( @cols ) { | |
228 $printf_fmt .= "\%$map{$col}->[3]\t"; | |
229 } | |
230 | |
231 $printf_fmt =~ s/\\t$//; | |
232 | |
233 $self->{'_printf_fmt'} = $printf_fmt; | |
234 } | |
235 | |
236 sub printf_fmt { shift->{'_printf_fmt'} } | |
237 | |
238 # Sets the data to be used for the labels. | |
239 sub _set_labels { | |
240 my ($self, $label_spec) = @_; | |
241 if( ref($label_spec) eq 'HASH') { | |
242 foreach my $col ( sort { $a <=> $b } keys %$label_spec ) { | |
243 # print "LABEL: $col $label_spec->{$col}\n"; | |
244 $self->{'_custom_labels'}->{$col} = $label_spec->{$col}; | |
245 } | |
246 } | |
247 else { | |
248 $self->throw(-class =>'Bio::Root::BadParameter', | |
249 -text => "Can't set labels: not a HASH ref: $label_spec" | |
250 ); | |
251 } | |
252 } | |
253 | |
254 sub _set_column_labels { | |
255 my $self = shift; | |
256 | |
257 my @cols = $self->columns; | |
258 my %map = $self->column_map; | |
259 my $printf_fmt = ''; | |
260 my (@data, $label, @underbars); | |
261 | |
262 my $i = 0; | |
263 foreach my $col( @cols ) { | |
264 $i++; | |
265 $printf_fmt .= "\%s\t"; | |
266 | |
267 if(defined $self->{'_custom_labels'}->{$i}) { | |
268 $label = $self->{'_custom_labels'}->{$i}; | |
269 } | |
270 else { | |
271 $label = $map{$col}->[4]; | |
272 } | |
273 push @data, $label; | |
274 push @underbars, '-' x length($label); | |
275 | |
276 } | |
277 $printf_fmt =~ s/\\t$//; | |
278 | |
279 my $str = sprintf "$printf_fmt\n", @data; | |
280 | |
281 $str =~ s/\t\n/\n/; | |
282 $str .= sprintf "$printf_fmt\n", @underbars; | |
283 | |
284 $str =~ s/\t\n/\n/gs; | |
285 $self->{'_column_labels'} = $str; | |
286 } | |
287 | |
288 # Purpose : Generate a function that will call the appropriate | |
289 # methods on the result, hit, and hsp objects to retrieve the column data | |
290 # specified in the column spec. | |
291 # | |
292 # We should only have to go through the column spec once | |
293 # for a given ResultTableWriter. To permit this, we'll generate code | |
294 # for a method that returns an array of the data for a row of output | |
295 # given a result, hit, and hsp object as arguments. | |
296 # | |
297 sub _set_row_data_func { | |
298 my $self = shift; | |
299 | |
300 # Now we need to generate a string that can be eval'd to get the data. | |
301 my @cols = $self->columns(); | |
302 my %map = $self->column_map; | |
303 my @data; | |
304 while( my $col = shift @cols ) { | |
305 my $object = $map{$col}->[1]; | |
306 my $method = $map{$col}->[2]; | |
307 my $arg = ''; | |
308 if( $method =~ m!(\w+)/(\w+)! ) { | |
309 $method = $1; | |
310 $arg = "\"$2\""; | |
311 } | |
312 push @data, "\$$object->$method($arg)"; | |
313 } | |
314 my $code = join( ",", @data); | |
315 | |
316 if( $self->verbose > 0 ) { | |
317 ## Begin Debugging | |
318 $self->debug( "Data to print:\n"); | |
319 foreach( 0..$#data) { $self->debug( " [". ($_+ 1) . "] $data[$_]\n");} | |
320 $self->debug( "CODE:\n$code\n"); | |
321 $self->debug("Printf format: ". $self->printf_fmt. "\n"); | |
322 ## End Debugging | |
323 } | |
324 | |
325 my $func = sub { | |
326 my ($result, $hit, $hsp) = @_; | |
327 my @r = eval $code; | |
328 # This should reduce the occurrence of those opaque "all zeros" bugs. | |
329 if( $@ ) { $self->throw("Trouble in ResultTableWriter::_set_row_data_func() eval: $@\n\n"); | |
330 } | |
331 return @r; | |
332 }; | |
333 $self->{'_row_data_func'} = $func; | |
334 } | |
335 | |
336 sub row_data_func { shift->{'_row_data_func'} } | |
337 | |
338 | |
339 =head2 to_string() | |
340 | |
341 Note: this method is not intended for direct use. The | |
342 SearchIO::write_result() method calls it automatically if the writer | |
343 is hooked up to a SearchIO object as illustrated in L<the SYNOPSIS section | SYNOPSIS>. | |
344 | |
345 Title : to_string() | |
346 : | |
347 Usage : print $writer->to_string( $result_obj, [$include_labels] ); | |
348 : | |
349 Argument : $result_obj = A Bio::Search::Result::ResultI object | |
350 : $include_labels = boolean, if true column labels are included (default: false) | |
351 : | |
352 Returns : String containing tab-delimited set of data for each hit | |
353 : in a ResultI object. Some data is summed across multiple HSPs. | |
354 : | |
355 Throws : n/a | |
356 | |
357 =cut | |
358 | |
359 #---------------- | |
360 sub to_string { | |
361 #---------------- | |
362 my ($self, $result, $include_labels) = @_; | |
363 | |
364 my $str = $include_labels ? $self->column_labels() : ''; | |
365 my $resultfilter = $self->filter('RESULT'); | |
366 if( ! defined $resultfilter || | |
367 &{$resultfilter}($result) ) { | |
368 my @row_data = &{$self->{'_row_data_func'}}( $result ); | |
369 $str .= sprintf "$self->{'_printf_fmt'}\n", @row_data; | |
370 $str =~ s/\t\n/\n/gs; | |
371 } | |
372 return $str; | |
373 } | |
374 | |
375 | |
376 | |
377 sub columns { | |
378 my $self = shift; | |
379 my @cols; | |
380 if( ref $self->{'_cols'} ) { | |
381 @cols = @{$self->{'_cols'}}; | |
382 } | |
383 else { | |
384 my %map = $self->column_map; | |
385 @cols = sort { $map{$a}->[0] <=> $map{$b}->[0] } keys %map; | |
386 } | |
387 return @cols; | |
388 } | |
389 | |
390 | |
391 =head2 column_labels | |
392 | |
393 Usage : print $result_obj->column_labels(); | |
394 Purpose : Get column labels for to_string(). | |
395 Returns : String containing column labels. Tab-delimited. | |
396 Argument : n/a | |
397 Throws : n/a | |
398 | |
399 =cut | |
400 | |
401 sub column_labels { shift->{'_column_labels'} } | |
402 | |
403 =head2 end_report | |
404 | |
405 Title : end_report | |
406 Usage : $self->end_report() | |
407 Function: The method to call when ending a report, this is | |
408 mostly for cleanup for formats which require you to | |
409 have something at the end of the document. Nothing for | |
410 a text message. | |
411 Returns : string | |
412 Args : none | |
413 | |
414 =cut | |
415 | |
416 sub end_report { | |
417 return ''; | |
418 } | |
419 | |
420 =head2 filter | |
421 | |
422 Title : filter | |
423 Usage : $writer->filter('hsp', \&hsp_filter); | |
424 Function: Filter out either at HSP,Hit,or Result level | |
425 Returns : none | |
426 Args : string => data type, | |
427 CODE reference | |
428 | |
429 | |
430 =cut | |
431 | |
432 | |
433 # Is this really needed? | |
434 #=head2 signif_format | |
435 # | |
436 # Usage : $writer->signif_format( [FMT] ); | |
437 # Purpose : Allows retrieval of the P/Expect exponent values only | |
438 # : or as a two-element list (mantissa, exponent). | |
439 # Usage : $writer->signif_format('exp'); | |
440 # : $writer->signif_format('parts'); | |
441 # Returns : String or '' if not set. | |
442 # Argument : String, FMT = 'exp' (return the exponent only) | |
443 # : = 'parts'(return exponent + mantissa in 2-elem list) | |
444 # : = undefined (return the raw value) | |
445 # Comments : P/Expect values are still stored internally as the full, | |
446 # : scientific notation value. | |
447 # | |
448 #=cut | |
449 # | |
450 ##------------- | |
451 #sub signif_format { | |
452 ##------------- | |
453 # my $self = shift; | |
454 # if(@_) { $self->{'_signif_format'} = shift; } | |
455 # return $self->{'_signif_format'}; | |
456 #} | |
457 | |
458 1; |