0
|
1 # $Id: HSPTableWriter.pm,v 1.12 2002/11/23 15:32:24 jason Exp $
|
|
2
|
|
3 =head1 NAME
|
|
4
|
|
5 Bio::SearchIO::Writer::HSPTableWriter - Tab-delimited data for Bio::Search::HSP::HSPI objects
|
|
6
|
|
7 =head1 SYNOPSIS
|
|
8
|
|
9 =head2 Example 1: Using the default columns
|
|
10
|
|
11 use Bio::SearchIO;
|
|
12 use Bio::SearchIO::Writer::HSPTableWriter;
|
|
13
|
|
14 my $in = Bio::SearchIO->new();
|
|
15
|
|
16 my $writer = Bio::SearchIO::Writer::HSPTableWriter->new();
|
|
17
|
|
18 my $out = Bio::SearchIO->new( -writer => $writer );
|
|
19
|
|
20 while ( my $result = $in->next_result() ) {
|
|
21 $out->write_result($result, ($in->report_count - 1 ? 0 : 1) );
|
|
22 }
|
|
23
|
|
24 =head2 Example 2: Specifying a subset of columns
|
|
25
|
|
26 use Bio::SearchIO;
|
|
27 use Bio::SearchIO::Writer::HSPTableWriter;
|
|
28
|
|
29 my $in = Bio::SearchIO->new();
|
|
30
|
|
31 my $writer = Bio::SearchIO::Writer::HSPTableWriter->new(
|
|
32 -columns => [qw(
|
|
33 query_name
|
|
34 query_length
|
|
35 hit_name
|
|
36 hit_length
|
|
37 rank
|
|
38 frac_identical_query
|
|
39 expect
|
|
40 )] );
|
|
41
|
|
42 my $out = Bio::SearchIO->new( -writer => $writer,
|
|
43 -file => ">searchio.out" );
|
|
44
|
|
45 while ( my $result = $in->next_result() ) {
|
|
46 $out->write_result($result, ($in->report_count - 1 ? 0 : 1) );
|
|
47 }
|
|
48
|
|
49 =head2 Custom Labels
|
|
50
|
|
51 You can also specify different column labels if you don't want to use
|
|
52 the defaults. Do this by specifying a C<-labels> hash reference
|
|
53 parameter when creating the HSPTableWriter object. The keys of the
|
|
54 hash should be the column number (left-most column = 1) for the label(s)
|
|
55 you want to specify. Here's an example:
|
|
56
|
|
57 my $writer = Bio::SearchIO::Writer::HSPTableWriter->new(
|
|
58 -columns => [qw( query_name
|
|
59 query_length
|
|
60 hit_name
|
|
61 hit_length )],
|
|
62 -labels => { 1 => 'QUERY_GI',
|
|
63 3 => 'HIT_IDENTIFIER' } );
|
|
64
|
|
65
|
|
66 =head1 DESCRIPTION
|
|
67
|
|
68 Bio::SearchIO::Writer::HSPTableWriter generates output at the finest
|
|
69 level of granularity for data within a search result. Data for each HSP
|
|
70 within each hit in a search result is output in tab-delimited format,
|
|
71 one row per HSP.
|
|
72
|
|
73 =head2 Available Columns
|
|
74
|
|
75 Here are the columns that can be specified in the C<-columns>
|
|
76 parameter when creating a HSPTableWriter object. If a C<-columns> parameter
|
|
77 is not specified, this list, in this order, will be used as the default.
|
|
78
|
|
79 query_name # Sequence identifier of the query.
|
|
80 query_length # Full length of the query sequence
|
|
81 hit_name # Sequence identifier of the hit
|
|
82 hit_length # Full length of the hit sequence
|
|
83 round # Round number for hit (PSI-BLAST)
|
|
84 rank
|
|
85 expect # Expect value for the alignment
|
|
86 score # Score for the alignment (e.g., BLAST score)
|
|
87 bits # Bit score for the alignment
|
|
88 frac_identical_query # fraction of identical substitutions in query
|
|
89 frac_identical_hit # fraction of identical substitutions in hit
|
|
90 frac_conserved_query # fraction of conserved substitutions in query
|
|
91 frac_conserved_hit # fraction of conserved substitutions in hit
|
|
92 length_aln_query # Length of the aligned portion of the query sequence
|
|
93 length_aln_hit # Length of the aligned portion of the hit sequence
|
|
94 gaps_query # Number of gaps in the aligned query sequence
|
|
95 gaps_hit # Number of gaps in the aligned hit sequence
|
|
96 gaps_total # Number of gaps in the aligned query and hit sequences
|
|
97 start_query # Starting coordinate of the aligned portion of the query sequence
|
|
98 end_query # Ending coordinate of the aligned portion of the query sequence
|
|
99 start_hit # Starting coordinate of the aligned portion of the hit sequence
|
|
100 end_hit # Ending coordinate of the aligned portion of the hit sequence
|
|
101 strand_query # Strand of the aligned query sequence
|
|
102 strand_hit # Strand of the aligned hit sequence
|
|
103 frame # Reading frame of the aligned query sequence
|
|
104 hit_description # Full description of the hit sequence
|
|
105 query_description # Full description of the query sequence
|
|
106
|
|
107 For more details about these columns, see the documentation for the
|
|
108 corresponding method in Bio::Search::HSP::HSPI.
|
|
109
|
|
110 =head1 TODO
|
|
111
|
|
112 Figure out the best way to incorporate algorithm-specific score columns.
|
|
113 The best route is probably to have algorith-specific subclasses
|
|
114 (e.g., BlastHSPTableWriter, FastaHSPTableWriter).
|
|
115
|
|
116 =head1 FEEDBACK
|
|
117
|
|
118 =head2 Mailing Lists
|
|
119
|
|
120 User feedback is an integral part of the evolution of this and other
|
|
121 Bioperl modules. Send your comments and suggestions preferably to one
|
|
122 of the Bioperl mailing lists. Your participation is much appreciated.
|
|
123
|
|
124 bioperl-l@bioperl.org - General discussion
|
|
125 http://bio.perl.org/MailList.html - About the mailing lists
|
|
126
|
|
127 =head2 Reporting Bugs
|
|
128
|
|
129 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
130 the bugs and their resolution. Bug reports can be submitted via email
|
|
131 or the web:
|
|
132
|
|
133 bioperl-bugs@bio.perl.org
|
|
134 http://bugzilla.bioperl.org/
|
|
135
|
|
136 =head1 AUTHOR
|
|
137
|
|
138 Steve Chervitz E<lt>sac@bioperl.orgE<gt>
|
|
139
|
|
140 See L<the FEEDBACK section | FEEDBACK> for where to send bug reports
|
|
141 and comments.
|
|
142
|
|
143 =head1 COPYRIGHT
|
|
144
|
|
145 Copyright (c) 2001 Steve Chervitz. All Rights Reserved.
|
|
146
|
|
147 This library is free software; you can redistribute it and/or modify
|
|
148 it under the same terms as Perl itself.
|
|
149
|
|
150 =head1 DISCLAIMER
|
|
151
|
|
152 This software is provided "as is" without warranty of any kind.
|
|
153
|
|
154 =head1 SEE ALSO
|
|
155
|
|
156 Bio::SearchIO::Writer::HitTableWriter
|
|
157 Bio::SearchIO::Writer::ResultTableWriter
|
|
158
|
|
159 =head1 METHODS
|
|
160
|
|
161 =cut
|
|
162
|
|
163 package Bio::SearchIO::Writer::HSPTableWriter;
|
|
164
|
|
165 use strict;
|
|
166 use Bio::SearchIO::Writer::ResultTableWriter;
|
|
167
|
|
168 use vars qw( @ISA );
|
|
169 @ISA = qw( Bio::SearchIO::Writer::ResultTableWriter );
|
|
170
|
|
171
|
|
172 # Array fields: column, object, method[/argument], printf format, column label
|
|
173 # Methods for result object are defined in Bio::Search::Result::ResultI.
|
|
174 # Methods for hit object are defined in Bio::Search::Hit::HitI.
|
|
175 # Methods for hsp object are defined in Bio::Search::HSP::HSPI.
|
|
176 # Tech note: If a bogus method is supplied, it will result in all values to be zero.
|
|
177 # Don't know why this is.
|
|
178 # TODO (maybe): Allow specification of signif_format (i.e., separate mantissa/exponent)
|
|
179 my %column_map = (
|
|
180 'query_name' => ['1', 'result', 'query_name', 's', 'QUERY' ],
|
|
181 'query_length' => ['2', 'result', 'query_length', 'd', 'LEN_Q'],
|
|
182 'hit_name' => ['3', 'hit', 'name', 's', 'HIT'],
|
|
183 'hit_length' => ['4', 'hit', 'hit_length', 'd', 'LEN_H'],
|
|
184 'round' => ['5', 'hit', 'iteration', 'd', 'ROUND', 'hit'],
|
|
185 'rank' => ['6', 'hsp', 'rank', 'd', 'RANK'],
|
|
186 'expect' => ['7', 'hsp', 'expect', '.1e', 'EXPCT'],
|
|
187 'score' => ['8', 'hsp', 'score', 'd', 'SCORE'],
|
|
188 'bits' => ['9', 'hsp', 'bits', 'd', 'BITS'],
|
|
189 'frac_identical_query' => ['10', 'hsp', 'frac_identical/query', '.2f', 'FR_IDQ'],
|
|
190 'frac_identical_hit' => ['11', 'hsp', 'frac_identical/hit', '.2f', 'FR_IDH'],
|
|
191 'frac_conserved_query' => ['12', 'hsp', 'frac_conserved/query', '.2f', 'FR_CNQ'],
|
|
192 'frac_conserved_hit' => ['13', 'hsp', 'frac_conserved/hit', '.2f', 'FR_CNH'],
|
|
193 'length_aln_query' => ['14', 'hsp', 'length/query', 'd', 'LN_ALQ'],
|
|
194 'length_aln_hit' => ['15', 'hsp', 'length/hit', 'd', 'LN_ALH'],
|
|
195 'gaps_query' => ['16', 'hsp', 'gaps/query', 'd', 'GAPS_Q'],
|
|
196 'gaps_hit' => ['17', 'hsp', 'gaps/hit', 'd', 'GAPS_H'],
|
|
197 'gaps_total' => ['18', 'hsp', 'gaps/total', 'd', 'GAPS_QH'],
|
|
198 'start_query' => ['19', 'hsp', 'start/query', 'd', 'START_Q'],
|
|
199 'end_query' => ['20', 'hsp', 'end/query', 'd', 'END_Q'],
|
|
200 'start_hit' => ['21', 'hsp', 'start/hit', 'd', 'START_H'],
|
|
201 'end_hit' => ['22', 'hsp', 'end/hit', 'd', 'END_H'],
|
|
202 'strand_query' => ['23', 'hsp', 'strand/query', 'd', 'STRND_Q'],
|
|
203 'strand_hit' => ['24', 'hsp', 'strand/hit', 'd', 'STRND_H'],
|
|
204 'frame' => ['25', 'hsp', 'frame', 's', 'FRAME'],
|
|
205 'hit_description' => ['26', 'hit', 'hit_description', 's', 'DESC_H'],
|
|
206 'query_description' => ['27', 'result', 'query_description', 's', 'DESC_Q'],
|
|
207 );
|
|
208
|
|
209 sub column_map { return %column_map }
|
|
210
|
|
211
|
|
212 =head2 to_string()
|
|
213
|
|
214 Note: this method is not intended for direct use.
|
|
215 The SearchIO::write_result() method calls it automatically
|
|
216 if the writer is hooked up to a SearchIO object as illustrated in
|
|
217 L<the SYNOPSIS section | SYNOPSIS>.
|
|
218
|
|
219 Title : to_string()
|
|
220 :
|
|
221 Usage : print $writer->to_string( $result_obj, [$include_labels] );
|
|
222 :
|
|
223 Argument : $result_obj = A Bio::Search::Result::ResultI object
|
|
224 : $include_labels = boolean, if true column labels are included (default: false)
|
|
225 :
|
|
226 Returns : String containing tab-delimited set of data for each HSP
|
|
227 : in each Hit of the supplied ResultI object.
|
|
228 :
|
|
229 Throws : n/a
|
|
230
|
|
231 =cut
|
|
232
|
|
233 sub to_string {
|
|
234 my ($self, $result, $include_labels) = @_;
|
|
235
|
|
236 my $str = $include_labels ? $self->column_labels() : '';
|
|
237 my ($resultfilter,$hitfilter,
|
|
238 $hspfilter) = ( $self->filter('RESULT'),
|
|
239 $self->filter('HIT'),
|
|
240 $self->filter('HSP'));
|
|
241 if( ! defined $resultfilter || &{$resultfilter}($result) ) {
|
|
242 my $func_ref = $self->row_data_func;
|
|
243 my $printf_fmt = $self->printf_fmt;
|
|
244 $result->can('rewind') &&
|
|
245 $result->rewind(); # insure we're at the beginning
|
|
246 while( my $hit = $result->next_hit) {
|
|
247 next if( defined $hitfilter && ! &{$hitfilter}($hit) );
|
|
248 $hit->can('rewind') && $hit->rewind;# insure we're at the beginning
|
|
249 while(my $hsp = $hit->next_hsp) {
|
|
250 next if ( defined $hspfilter && ! &{$hspfilter}($hsp));
|
|
251 my @row_data = &{$func_ref}($result, $hit, $hsp);
|
|
252 $str .= sprintf "$printf_fmt\n", @row_data;
|
|
253 }
|
|
254 }
|
|
255 }
|
|
256 $str =~ s/\t\n/\n/gs;
|
|
257 return $str;
|
|
258 }
|
|
259
|
|
260 =head2 end_report
|
|
261
|
|
262 Title : end_report
|
|
263 Usage : $self->end_report()
|
|
264 Function: The method to call when ending a report, this is
|
|
265 mostly for cleanup for formats which require you to
|
|
266 have something at the end of the document. Nothing for
|
|
267 a text message.
|
|
268 Returns : string
|
|
269 Args : none
|
|
270
|
|
271 =cut
|
|
272
|
|
273 sub end_report {
|
|
274 return '';
|
|
275 }
|
|
276
|
|
277 =head2 filter
|
|
278
|
|
279 Title : filter
|
|
280 Usage : $writer->filter('hsp', \&hsp_filter);
|
|
281 Function: Filter out either at HSP,Hit,or Result level
|
|
282 Returns : none
|
|
283 Args : string => data type,
|
|
284 CODE reference
|
|
285
|
|
286
|
|
287 =cut
|
|
288
|
|
289
|
|
290 1;
|