Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/SearchIO/Writer/HSPTableWriter.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $Id: HSPTableWriter.pm,v 1.12 2002/11/23 15:32:24 jason Exp $ | |
2 | |
3 =head1 NAME | |
4 | |
5 Bio::SearchIO::Writer::HSPTableWriter - Tab-delimited data for Bio::Search::HSP::HSPI objects | |
6 | |
7 =head1 SYNOPSIS | |
8 | |
9 =head2 Example 1: Using the default columns | |
10 | |
11 use Bio::SearchIO; | |
12 use Bio::SearchIO::Writer::HSPTableWriter; | |
13 | |
14 my $in = Bio::SearchIO->new(); | |
15 | |
16 my $writer = Bio::SearchIO::Writer::HSPTableWriter->new(); | |
17 | |
18 my $out = Bio::SearchIO->new( -writer => $writer ); | |
19 | |
20 while ( my $result = $in->next_result() ) { | |
21 $out->write_result($result, ($in->report_count - 1 ? 0 : 1) ); | |
22 } | |
23 | |
24 =head2 Example 2: Specifying a subset of columns | |
25 | |
26 use Bio::SearchIO; | |
27 use Bio::SearchIO::Writer::HSPTableWriter; | |
28 | |
29 my $in = Bio::SearchIO->new(); | |
30 | |
31 my $writer = Bio::SearchIO::Writer::HSPTableWriter->new( | |
32 -columns => [qw( | |
33 query_name | |
34 query_length | |
35 hit_name | |
36 hit_length | |
37 rank | |
38 frac_identical_query | |
39 expect | |
40 )] ); | |
41 | |
42 my $out = Bio::SearchIO->new( -writer => $writer, | |
43 -file => ">searchio.out" ); | |
44 | |
45 while ( my $result = $in->next_result() ) { | |
46 $out->write_result($result, ($in->report_count - 1 ? 0 : 1) ); | |
47 } | |
48 | |
49 =head2 Custom Labels | |
50 | |
51 You can also specify different column labels if you don't want to use | |
52 the defaults. Do this by specifying a C<-labels> hash reference | |
53 parameter when creating the HSPTableWriter object. The keys of the | |
54 hash should be the column number (left-most column = 1) for the label(s) | |
55 you want to specify. Here's an example: | |
56 | |
57 my $writer = Bio::SearchIO::Writer::HSPTableWriter->new( | |
58 -columns => [qw( query_name | |
59 query_length | |
60 hit_name | |
61 hit_length )], | |
62 -labels => { 1 => 'QUERY_GI', | |
63 3 => 'HIT_IDENTIFIER' } ); | |
64 | |
65 | |
66 =head1 DESCRIPTION | |
67 | |
68 Bio::SearchIO::Writer::HSPTableWriter generates output at the finest | |
69 level of granularity for data within a search result. Data for each HSP | |
70 within each hit in a search result is output in tab-delimited format, | |
71 one row per HSP. | |
72 | |
73 =head2 Available Columns | |
74 | |
75 Here are the columns that can be specified in the C<-columns> | |
76 parameter when creating a HSPTableWriter object. If a C<-columns> parameter | |
77 is not specified, this list, in this order, will be used as the default. | |
78 | |
79 query_name # Sequence identifier of the query. | |
80 query_length # Full length of the query sequence | |
81 hit_name # Sequence identifier of the hit | |
82 hit_length # Full length of the hit sequence | |
83 round # Round number for hit (PSI-BLAST) | |
84 rank | |
85 expect # Expect value for the alignment | |
86 score # Score for the alignment (e.g., BLAST score) | |
87 bits # Bit score for the alignment | |
88 frac_identical_query # fraction of identical substitutions in query | |
89 frac_identical_hit # fraction of identical substitutions in hit | |
90 frac_conserved_query # fraction of conserved substitutions in query | |
91 frac_conserved_hit # fraction of conserved substitutions in hit | |
92 length_aln_query # Length of the aligned portion of the query sequence | |
93 length_aln_hit # Length of the aligned portion of the hit sequence | |
94 gaps_query # Number of gaps in the aligned query sequence | |
95 gaps_hit # Number of gaps in the aligned hit sequence | |
96 gaps_total # Number of gaps in the aligned query and hit sequences | |
97 start_query # Starting coordinate of the aligned portion of the query sequence | |
98 end_query # Ending coordinate of the aligned portion of the query sequence | |
99 start_hit # Starting coordinate of the aligned portion of the hit sequence | |
100 end_hit # Ending coordinate of the aligned portion of the hit sequence | |
101 strand_query # Strand of the aligned query sequence | |
102 strand_hit # Strand of the aligned hit sequence | |
103 frame # Reading frame of the aligned query sequence | |
104 hit_description # Full description of the hit sequence | |
105 query_description # Full description of the query sequence | |
106 | |
107 For more details about these columns, see the documentation for the | |
108 corresponding method in Bio::Search::HSP::HSPI. | |
109 | |
110 =head1 TODO | |
111 | |
112 Figure out the best way to incorporate algorithm-specific score columns. | |
113 The best route is probably to have algorith-specific subclasses | |
114 (e.g., BlastHSPTableWriter, FastaHSPTableWriter). | |
115 | |
116 =head1 FEEDBACK | |
117 | |
118 =head2 Mailing Lists | |
119 | |
120 User feedback is an integral part of the evolution of this and other | |
121 Bioperl modules. Send your comments and suggestions preferably to one | |
122 of the Bioperl mailing lists. Your participation is much appreciated. | |
123 | |
124 bioperl-l@bioperl.org - General discussion | |
125 http://bio.perl.org/MailList.html - About the mailing lists | |
126 | |
127 =head2 Reporting Bugs | |
128 | |
129 Report bugs to the Bioperl bug tracking system to help us keep track | |
130 the bugs and their resolution. Bug reports can be submitted via email | |
131 or the web: | |
132 | |
133 bioperl-bugs@bio.perl.org | |
134 http://bugzilla.bioperl.org/ | |
135 | |
136 =head1 AUTHOR | |
137 | |
138 Steve Chervitz E<lt>sac@bioperl.orgE<gt> | |
139 | |
140 See L<the FEEDBACK section | FEEDBACK> for where to send bug reports | |
141 and comments. | |
142 | |
143 =head1 COPYRIGHT | |
144 | |
145 Copyright (c) 2001 Steve Chervitz. All Rights Reserved. | |
146 | |
147 This library is free software; you can redistribute it and/or modify | |
148 it under the same terms as Perl itself. | |
149 | |
150 =head1 DISCLAIMER | |
151 | |
152 This software is provided "as is" without warranty of any kind. | |
153 | |
154 =head1 SEE ALSO | |
155 | |
156 Bio::SearchIO::Writer::HitTableWriter | |
157 Bio::SearchIO::Writer::ResultTableWriter | |
158 | |
159 =head1 METHODS | |
160 | |
161 =cut | |
162 | |
163 package Bio::SearchIO::Writer::HSPTableWriter; | |
164 | |
165 use strict; | |
166 use Bio::SearchIO::Writer::ResultTableWriter; | |
167 | |
168 use vars qw( @ISA ); | |
169 @ISA = qw( Bio::SearchIO::Writer::ResultTableWriter ); | |
170 | |
171 | |
172 # Array fields: column, object, method[/argument], printf format, column label | |
173 # Methods for result object are defined in Bio::Search::Result::ResultI. | |
174 # Methods for hit object are defined in Bio::Search::Hit::HitI. | |
175 # Methods for hsp object are defined in Bio::Search::HSP::HSPI. | |
176 # Tech note: If a bogus method is supplied, it will result in all values to be zero. | |
177 # Don't know why this is. | |
178 # TODO (maybe): Allow specification of signif_format (i.e., separate mantissa/exponent) | |
179 my %column_map = ( | |
180 'query_name' => ['1', 'result', 'query_name', 's', 'QUERY' ], | |
181 'query_length' => ['2', 'result', 'query_length', 'd', 'LEN_Q'], | |
182 'hit_name' => ['3', 'hit', 'name', 's', 'HIT'], | |
183 'hit_length' => ['4', 'hit', 'hit_length', 'd', 'LEN_H'], | |
184 'round' => ['5', 'hit', 'iteration', 'd', 'ROUND', 'hit'], | |
185 'rank' => ['6', 'hsp', 'rank', 'd', 'RANK'], | |
186 'expect' => ['7', 'hsp', 'expect', '.1e', 'EXPCT'], | |
187 'score' => ['8', 'hsp', 'score', 'd', 'SCORE'], | |
188 'bits' => ['9', 'hsp', 'bits', 'd', 'BITS'], | |
189 'frac_identical_query' => ['10', 'hsp', 'frac_identical/query', '.2f', 'FR_IDQ'], | |
190 'frac_identical_hit' => ['11', 'hsp', 'frac_identical/hit', '.2f', 'FR_IDH'], | |
191 'frac_conserved_query' => ['12', 'hsp', 'frac_conserved/query', '.2f', 'FR_CNQ'], | |
192 'frac_conserved_hit' => ['13', 'hsp', 'frac_conserved/hit', '.2f', 'FR_CNH'], | |
193 'length_aln_query' => ['14', 'hsp', 'length/query', 'd', 'LN_ALQ'], | |
194 'length_aln_hit' => ['15', 'hsp', 'length/hit', 'd', 'LN_ALH'], | |
195 'gaps_query' => ['16', 'hsp', 'gaps/query', 'd', 'GAPS_Q'], | |
196 'gaps_hit' => ['17', 'hsp', 'gaps/hit', 'd', 'GAPS_H'], | |
197 'gaps_total' => ['18', 'hsp', 'gaps/total', 'd', 'GAPS_QH'], | |
198 'start_query' => ['19', 'hsp', 'start/query', 'd', 'START_Q'], | |
199 'end_query' => ['20', 'hsp', 'end/query', 'd', 'END_Q'], | |
200 'start_hit' => ['21', 'hsp', 'start/hit', 'd', 'START_H'], | |
201 'end_hit' => ['22', 'hsp', 'end/hit', 'd', 'END_H'], | |
202 'strand_query' => ['23', 'hsp', 'strand/query', 'd', 'STRND_Q'], | |
203 'strand_hit' => ['24', 'hsp', 'strand/hit', 'd', 'STRND_H'], | |
204 'frame' => ['25', 'hsp', 'frame', 's', 'FRAME'], | |
205 'hit_description' => ['26', 'hit', 'hit_description', 's', 'DESC_H'], | |
206 'query_description' => ['27', 'result', 'query_description', 's', 'DESC_Q'], | |
207 ); | |
208 | |
209 sub column_map { return %column_map } | |
210 | |
211 | |
212 =head2 to_string() | |
213 | |
214 Note: this method is not intended for direct use. | |
215 The SearchIO::write_result() method calls it automatically | |
216 if the writer is hooked up to a SearchIO object as illustrated in | |
217 L<the SYNOPSIS section | SYNOPSIS>. | |
218 | |
219 Title : to_string() | |
220 : | |
221 Usage : print $writer->to_string( $result_obj, [$include_labels] ); | |
222 : | |
223 Argument : $result_obj = A Bio::Search::Result::ResultI object | |
224 : $include_labels = boolean, if true column labels are included (default: false) | |
225 : | |
226 Returns : String containing tab-delimited set of data for each HSP | |
227 : in each Hit of the supplied ResultI object. | |
228 : | |
229 Throws : n/a | |
230 | |
231 =cut | |
232 | |
233 sub to_string { | |
234 my ($self, $result, $include_labels) = @_; | |
235 | |
236 my $str = $include_labels ? $self->column_labels() : ''; | |
237 my ($resultfilter,$hitfilter, | |
238 $hspfilter) = ( $self->filter('RESULT'), | |
239 $self->filter('HIT'), | |
240 $self->filter('HSP')); | |
241 if( ! defined $resultfilter || &{$resultfilter}($result) ) { | |
242 my $func_ref = $self->row_data_func; | |
243 my $printf_fmt = $self->printf_fmt; | |
244 $result->can('rewind') && | |
245 $result->rewind(); # insure we're at the beginning | |
246 while( my $hit = $result->next_hit) { | |
247 next if( defined $hitfilter && ! &{$hitfilter}($hit) ); | |
248 $hit->can('rewind') && $hit->rewind;# insure we're at the beginning | |
249 while(my $hsp = $hit->next_hsp) { | |
250 next if ( defined $hspfilter && ! &{$hspfilter}($hsp)); | |
251 my @row_data = &{$func_ref}($result, $hit, $hsp); | |
252 $str .= sprintf "$printf_fmt\n", @row_data; | |
253 } | |
254 } | |
255 } | |
256 $str =~ s/\t\n/\n/gs; | |
257 return $str; | |
258 } | |
259 | |
260 =head2 end_report | |
261 | |
262 Title : end_report | |
263 Usage : $self->end_report() | |
264 Function: The method to call when ending a report, this is | |
265 mostly for cleanup for formats which require you to | |
266 have something at the end of the document. Nothing for | |
267 a text message. | |
268 Returns : string | |
269 Args : none | |
270 | |
271 =cut | |
272 | |
273 sub end_report { | |
274 return ''; | |
275 } | |
276 | |
277 =head2 filter | |
278 | |
279 Title : filter | |
280 Usage : $writer->filter('hsp', \&hsp_filter); | |
281 Function: Filter out either at HSP,Hit,or Result level | |
282 Returns : none | |
283 Args : string => data type, | |
284 CODE reference | |
285 | |
286 | |
287 =cut | |
288 | |
289 | |
290 1; |