0
|
1 # $Id: HitTableWriter.pm,v 1.14 2002/12/24 15:46:47 jason Exp $
|
|
2
|
|
3 =head1 NAME
|
|
4
|
|
5 Bio::SearchIO::Writer::HitTableWriter - Tab-delimited data for Bio::Search::Hit::HitI objects
|
|
6
|
|
7 =head1 SYNOPSIS
|
|
8
|
|
9 =head2 Example 1: Using the default columns
|
|
10
|
|
11 use Bio::SearchIO;
|
|
12 use Bio::SearchIO::Writer::HitTableWriter;
|
|
13
|
|
14 my $in = Bio::SearchIO->new();
|
|
15
|
|
16 my $writer = Bio::SearchIO::Writer::HitTableWriter->new();
|
|
17
|
|
18 my $out = Bio::SearchIO->new( -writer => $writer );
|
|
19
|
|
20 while ( my $result = $in->next_result() ) {
|
|
21 $out->write_result($result, ($in->report_count - 1 ? 0 : 1) );
|
|
22 }
|
|
23
|
|
24 =head2 Example 2: Specifying a subset of columns
|
|
25
|
|
26 use Bio::SearchIO;
|
|
27 use Bio::SearchIO::Writer::HitTableWriter;
|
|
28
|
|
29 my $in = Bio::SearchIO->new();
|
|
30
|
|
31 my $writer = Bio::SearchIO::Writer::HitTableWriter->new(
|
|
32 -columns => [qw(
|
|
33 query_name
|
|
34 query_length
|
|
35 hit_name
|
|
36 hit_length
|
|
37 frac_identical_query
|
|
38 expect
|
|
39 )] );
|
|
40
|
|
41 my $out = Bio::SearchIO->new( -writer => $writer,
|
|
42 -file => ">searchio.out" );
|
|
43
|
|
44 while ( my $result = $in->next_result() ) {
|
|
45 $out->write_result($result, ($in->report_count - 1 ? 0 : 1) );
|
|
46 }
|
|
47
|
|
48 =head2 Custom Labels
|
|
49
|
|
50 You can also specify different column labels if you don't want to use
|
|
51 the defaults. Do this by specifying a C<-labels> hash reference
|
|
52 parameter when creating the HitTableWriter object. The keys of the
|
|
53 hash should be the column number (left-most column = 1) for the label(s)
|
|
54 you want to specify. Here's an example:
|
|
55
|
|
56 my $writer = Bio::SearchIO::Writer::HitTableWriter->new(
|
|
57 -columns => [qw( query_name
|
|
58 query_length
|
|
59 hit_name
|
|
60 hit_length )],
|
|
61 -labels => { 1 => 'QUERY_GI',
|
|
62 3 => 'HIT_IDENTIFIER' } );
|
|
63
|
|
64
|
|
65 =head1 DESCRIPTION
|
|
66
|
|
67 Bio::SearchIO::Writer::HitTableWriter outputs summary data
|
|
68 for each Hit within a search result. Output is in tab-delimited format,
|
|
69 one row per Hit.
|
|
70
|
|
71 The reason why this is considered summary data is that if a hit
|
|
72 contains multiple HSPs, the HSPs will be tiled and
|
|
73 the data represents a summary across all HSPs.
|
|
74 See below for which columns are affected.
|
|
75 See the docs in L<Bio::Search::Hit::BlastHit|Bio::Search::Hit::BlastHit>
|
|
76 for more details on HSP tiling.
|
|
77
|
|
78 =head2 Available Columns
|
|
79
|
|
80 Here are the columns that can be specified in the C<-columns>
|
|
81 parameter when creating a HitTableWriter object. If a C<-columns> parameter
|
|
82 is not specified, this list, in this order, will be used as the default.
|
|
83
|
|
84 query_name # Sequence identifier of the query.
|
|
85 query_length # Full length of the query sequence
|
|
86 hit_name # Sequence identifier of the hit
|
|
87 hit_length # Full length of the hit sequence
|
|
88 round # Round number for hit (PSI-BLAST)
|
|
89 expect # Expect value for the alignment
|
|
90 score # Score for the alignment (e.g., BLAST score)
|
|
91 bits # Bit score for the alignment
|
|
92 num_hsps # Number of HSPs (not the "N" value)
|
|
93 frac_identical_query* # fraction of identical substitutions in query
|
|
94 frac_identical_hit* # fraction of identical substitutions in hit
|
|
95 frac_conserved_query* # fraction of conserved substitutions in query
|
|
96 frac_conserved_hit* # fraction of conserved substitutions in hit
|
|
97 frac_aligned_query* # fraction of the query sequence that is aligned
|
|
98 frac_aligned_hit* # fraction of the hit sequence that is aligned
|
|
99 length_aln_query* # Length of the aligned portion of the query sequence
|
|
100 length_aln_hit* # Length of the aligned portion of the hit sequence
|
|
101 gaps_query* # Number of gaps in the aligned query sequence
|
|
102 gaps_hit* # Number of gaps in the aligned hit sequence
|
|
103 gaps_total* # Number of gaps in the aligned query and hit sequences
|
|
104 start_query* # Starting coordinate of the aligned portion of the query sequence
|
|
105 end_query* # Ending coordinate of the aligned portion of the query sequence
|
|
106 start_hit* # Starting coordinate of the aligned portion of the hit sequence
|
|
107 end_hit* # Ending coordinate of the aligned portion of the hit sequence
|
|
108 strand_query # Strand of the aligned query sequence
|
|
109 strand_hit # Strand of the aligned hit sequence
|
|
110 frame # Frame of the alignment (0,1,2)
|
|
111 ambiguous_aln # Ambiguous alignment indicator ('qs', 'q', 's')
|
|
112 hit_description # Full description of the hit sequence
|
|
113 query_description # Full description of the query sequence
|
|
114
|
|
115 Items marked with a C<*> report data summed across all HSPs
|
|
116 after tiling them to avoid counting data from overlapping regions
|
|
117 multiple times.
|
|
118
|
|
119 For more details about these columns, see the documentation for the
|
|
120 corresponding method in Bio::Search::Result::BlastHit.
|
|
121
|
|
122 =head1 TODO
|
|
123
|
|
124 Figure out the best way to incorporate algorithm-specific score columns.
|
|
125 The best route is probably to have algorithm-specific subclasses
|
|
126 (e.g., BlastHitTableWriter, FastaHitTableWriter).
|
|
127
|
|
128 =head1 FEEDBACK
|
|
129
|
|
130 =head2 Mailing Lists
|
|
131
|
|
132 User feedback is an integral part of the evolution of this and other
|
|
133 Bioperl modules. Send your comments and suggestions preferably to one
|
|
134 of the Bioperl mailing lists. Your participation is much appreciated.
|
|
135
|
|
136 bioperl-l@bioperl.org - General discussion
|
|
137 http://bioperl.org/MailList.html - About the mailing lists
|
|
138
|
|
139 =head2 Reporting Bugs
|
|
140
|
|
141 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
142 the bugs and their resolution. Bug reports can be submitted via email
|
|
143 or the web:
|
|
144
|
|
145 bioperl-bugs@bio.perl.org
|
|
146 http://bugzilla.bioperl.org/
|
|
147
|
|
148 =head1 AUTHOR
|
|
149
|
|
150 Steve Chervitz E<lt>sac@bioperl.orgE<gt>
|
|
151
|
|
152 See L<the FEEDBACK section | FEEDBACK> for where to send bug reports
|
|
153 and comments.
|
|
154
|
|
155 =head1 COPYRIGHT
|
|
156
|
|
157 Copyright (c) 2001, 2002 Steve Chervitz. All Rights Reserved.
|
|
158
|
|
159 This library is free software; you can redistribute it and/or modify
|
|
160 it under the same terms as Perl itself.
|
|
161
|
|
162 =head1 DISCLAIMER
|
|
163
|
|
164 This software is provided "as is" without warranty of any kind.
|
|
165
|
|
166 =head1 SEE ALSO
|
|
167
|
|
168 L<Bio::SearchIO::Writer::HitTableWriter>,
|
|
169 L<Bio::SearchIO::Writer::ResultTableWriter>
|
|
170
|
|
171 =head1 METHODS
|
|
172
|
|
173 =cut
|
|
174
|
|
175 package Bio::SearchIO::Writer::HitTableWriter;
|
|
176
|
|
177 use strict;
|
|
178 use Bio::SearchIO::Writer::ResultTableWriter;
|
|
179
|
|
180 use vars qw( @ISA );
|
|
181 @ISA = qw( Bio::SearchIO::Writer::ResultTableWriter );
|
|
182
|
|
183
|
|
184 # Array fields: column, object, method[/argument], printf format,
|
|
185 # column label Methods for result object are defined in
|
|
186 # Bio::Search::Result::ResultI. Methods for hit object are defined in
|
|
187 # Bio::Search::Hit::HitI. Tech note: If a bogus method is supplied,
|
|
188 # it will result in all values to be zero. Don't know why this is.
|
|
189
|
|
190 # TODO (maybe): Allow specification of separate mantissa/exponent for
|
|
191 # significance data.
|
|
192
|
|
193 my %column_map = (
|
|
194 'query_name' => ['1', 'result', 'query_name', 's', 'QUERY' ],
|
|
195 'query_length' => ['2', 'result', 'query_length', 'd', 'LEN_Q'],
|
|
196 'hit_name' => ['3', 'hit', 'name', 's', 'HIT'],
|
|
197 'hit_length' => ['4', 'hit', 'length', 'd', 'LEN_H'],
|
|
198 'round' => ['5', 'hit', 'iteration', 'd', 'ROUND'],
|
|
199 'expect' => ['6', 'hit', 'significance', '.1e', 'EXPCT'],
|
|
200 'score' => ['7', 'hit', 'raw_score', 'd', 'SCORE'],
|
|
201 'bits' => ['8', 'hit', 'bits', 'd', 'BITS'],
|
|
202 'num_hsps' => ['9', 'hit', 'num_hsps', 'd', 'HSPS'],
|
|
203 'frac_identical_query' => ['10', 'hit', 'frac_identical/query', '.2f', 'FR_IDQ'],
|
|
204 'frac_identical_hit' => ['11', 'hit', 'frac_identical/hit', '.2f', 'FR_IDH'],
|
|
205 'frac_conserved_query' => ['12', 'hit', 'frac_conserved/query', '.2f', 'FR_CNQ'],
|
|
206 'frac_conserved_hit' => ['13', 'hit', 'frac_conserved/hit', '.2f', 'FR_CNH'],
|
|
207 'frac_aligned_query' => ['14', 'hit', 'frac_aligned_query', '.2f', 'FR_ALQ'],
|
|
208 'frac_aligned_hit' => ['15', 'hit', 'frac_aligned_hit', '.2f', 'FR_ALH'],
|
|
209 'length_aln_query' => ['16', 'hit', 'length_aln/query', 'd', 'LN_ALQ'],
|
|
210 'length_aln_hit' => ['17', 'hit', 'length_aln/hit', 'd', 'LN_ALH'],
|
|
211 'gaps_query' => ['18', 'hit', 'gaps/query', 'd', 'GAPS_Q'],
|
|
212 'gaps_hit' => ['19', 'hit', 'gaps/hit', 'd', 'GAPS_H'],
|
|
213 'gaps_total' => ['20', 'hit', 'gaps/total', 'd', 'GAPS_QH'],
|
|
214 'start_query' => ['21', 'hit', 'start/query', 'd', 'START_Q'],
|
|
215 'end_query' => ['22', 'hit', 'end/query', 'd', 'END_Q'],
|
|
216 'start_hit' => ['23', 'hit', 'start/hit', 'd', 'START_H'],
|
|
217 'end_hit' => ['24', 'hit', 'end/hit', 'd', 'END_H'],
|
|
218 'strand_query' => ['25', 'hit', 'strand/query', 's', 'STRND_Q'],
|
|
219 'strand_hit' => ['26', 'hit', 'strand/hit', 's', 'STRND_H'],
|
|
220 'frame' => ['27', 'hit', 'frame', 'd', 'FRAME'],
|
|
221 'ambiguous_aln' => ['28', 'hit', 'ambiguous_aln', 's', 'AMBIG'],
|
|
222 'hit_description' => ['29', 'hit', 'description', 's', 'DESC_H'],
|
|
223 'query_description' => ['30', 'result', 'query_description', 's', 'DESC_Q'],
|
|
224 );
|
|
225
|
|
226 sub column_map { return %column_map }
|
|
227
|
|
228
|
|
229 =head2 to_string()
|
|
230
|
|
231 Note: this method is not intended for direct use. The
|
|
232 SearchIO::write_result() method calls it automatically if the writer
|
|
233 is hooked up to a SearchIO object as illustrated in
|
|
234 L<the SYNOPSIS section | SYNOPSIS>.
|
|
235
|
|
236 Title : to_string()
|
|
237 :
|
|
238 Usage : print $writer->to_string( $result_obj, [$include_labels] );
|
|
239 :
|
|
240 Argument : $result_obj = A Bio::Search::Result::BlastResult object
|
|
241 : $include_labels = boolean, if true column labels are included (default: false)
|
|
242 :
|
|
243 Returns : String containing tab-delimited set of data for each hit
|
|
244 : in a BlastResult object. Some data is summed across multiple HSPs.
|
|
245 :
|
|
246 Throws : n/a
|
|
247
|
|
248 =cut
|
|
249
|
|
250 #----------------
|
|
251 sub to_string {
|
|
252 #----------------
|
|
253 my ($self, $result, $include_labels) = @_;
|
|
254
|
|
255 my $str = $include_labels ? $self->column_labels() : '';
|
|
256 my $func_ref = $self->row_data_func;
|
|
257 my $printf_fmt = $self->printf_fmt;
|
|
258
|
|
259 my ($resultfilter,$hitfilter) = ( $self->filter('RESULT'),
|
|
260 $self->filter('HIT') );
|
|
261 if( ! defined $resultfilter ||
|
|
262 &{$resultfilter}($result) ) {
|
|
263 $result->can('rewind') &&
|
|
264 $result->rewind(); # insure we're at the beginning
|
|
265 foreach my $hit($result->hits) {
|
|
266 next if( defined $hitfilter && ! &{$hitfilter}($hit));
|
|
267 my @row_data = map { defined $_ ? $_ : 0 } &{$func_ref}($result, $hit);
|
|
268 $str .= sprintf "$printf_fmt\n", @row_data;
|
|
269 }
|
|
270 }
|
|
271 $str =~ s/\t\n/\n/gs;
|
|
272 return $str;
|
|
273 }
|
|
274
|
|
275 =head2 end_report
|
|
276
|
|
277 Title : end_report
|
|
278 Usage : $self->end_report()
|
|
279 Function: The method to call when ending a report, this is
|
|
280 mostly for cleanup for formats which require you to
|
|
281 have something at the end of the document. Nothing for
|
|
282 a text message.
|
|
283 Returns : string
|
|
284 Args : none
|
|
285
|
|
286 =cut
|
|
287
|
|
288 sub end_report {
|
|
289 return '';
|
|
290 }
|
|
291
|
|
292
|
|
293 =head2 filter
|
|
294
|
|
295 Title : filter
|
|
296 Usage : $writer->filter('hsp', \&hsp_filter);
|
|
297 Function: Filter out either at HSP,Hit,or Result level
|
|
298 Returns : none
|
|
299 Args : string => data type,
|
|
300 CODE reference
|
|
301
|
|
302
|
|
303 =cut
|
|
304
|
|
305 1;
|