Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/SearchIO/Writer/HitTableWriter.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 # $Id: HitTableWriter.pm,v 1.14 2002/12/24 15:46:47 jason Exp $ | |
| 2 | |
| 3 =head1 NAME | |
| 4 | |
| 5 Bio::SearchIO::Writer::HitTableWriter - Tab-delimited data for Bio::Search::Hit::HitI objects | |
| 6 | |
| 7 =head1 SYNOPSIS | |
| 8 | |
| 9 =head2 Example 1: Using the default columns | |
| 10 | |
| 11 use Bio::SearchIO; | |
| 12 use Bio::SearchIO::Writer::HitTableWriter; | |
| 13 | |
| 14 my $in = Bio::SearchIO->new(); | |
| 15 | |
| 16 my $writer = Bio::SearchIO::Writer::HitTableWriter->new(); | |
| 17 | |
| 18 my $out = Bio::SearchIO->new( -writer => $writer ); | |
| 19 | |
| 20 while ( my $result = $in->next_result() ) { | |
| 21 $out->write_result($result, ($in->report_count - 1 ? 0 : 1) ); | |
| 22 } | |
| 23 | |
| 24 =head2 Example 2: Specifying a subset of columns | |
| 25 | |
| 26 use Bio::SearchIO; | |
| 27 use Bio::SearchIO::Writer::HitTableWriter; | |
| 28 | |
| 29 my $in = Bio::SearchIO->new(); | |
| 30 | |
| 31 my $writer = Bio::SearchIO::Writer::HitTableWriter->new( | |
| 32 -columns => [qw( | |
| 33 query_name | |
| 34 query_length | |
| 35 hit_name | |
| 36 hit_length | |
| 37 frac_identical_query | |
| 38 expect | |
| 39 )] ); | |
| 40 | |
| 41 my $out = Bio::SearchIO->new( -writer => $writer, | |
| 42 -file => ">searchio.out" ); | |
| 43 | |
| 44 while ( my $result = $in->next_result() ) { | |
| 45 $out->write_result($result, ($in->report_count - 1 ? 0 : 1) ); | |
| 46 } | |
| 47 | |
| 48 =head2 Custom Labels | |
| 49 | |
| 50 You can also specify different column labels if you don't want to use | |
| 51 the defaults. Do this by specifying a C<-labels> hash reference | |
| 52 parameter when creating the HitTableWriter object. The keys of the | |
| 53 hash should be the column number (left-most column = 1) for the label(s) | |
| 54 you want to specify. Here's an example: | |
| 55 | |
| 56 my $writer = Bio::SearchIO::Writer::HitTableWriter->new( | |
| 57 -columns => [qw( query_name | |
| 58 query_length | |
| 59 hit_name | |
| 60 hit_length )], | |
| 61 -labels => { 1 => 'QUERY_GI', | |
| 62 3 => 'HIT_IDENTIFIER' } ); | |
| 63 | |
| 64 | |
| 65 =head1 DESCRIPTION | |
| 66 | |
| 67 Bio::SearchIO::Writer::HitTableWriter outputs summary data | |
| 68 for each Hit within a search result. Output is in tab-delimited format, | |
| 69 one row per Hit. | |
| 70 | |
| 71 The reason why this is considered summary data is that if a hit | |
| 72 contains multiple HSPs, the HSPs will be tiled and | |
| 73 the data represents a summary across all HSPs. | |
| 74 See below for which columns are affected. | |
| 75 See the docs in L<Bio::Search::Hit::BlastHit|Bio::Search::Hit::BlastHit> | |
| 76 for more details on HSP tiling. | |
| 77 | |
| 78 =head2 Available Columns | |
| 79 | |
| 80 Here are the columns that can be specified in the C<-columns> | |
| 81 parameter when creating a HitTableWriter object. If a C<-columns> parameter | |
| 82 is not specified, this list, in this order, will be used as the default. | |
| 83 | |
| 84 query_name # Sequence identifier of the query. | |
| 85 query_length # Full length of the query sequence | |
| 86 hit_name # Sequence identifier of the hit | |
| 87 hit_length # Full length of the hit sequence | |
| 88 round # Round number for hit (PSI-BLAST) | |
| 89 expect # Expect value for the alignment | |
| 90 score # Score for the alignment (e.g., BLAST score) | |
| 91 bits # Bit score for the alignment | |
| 92 num_hsps # Number of HSPs (not the "N" value) | |
| 93 frac_identical_query* # fraction of identical substitutions in query | |
| 94 frac_identical_hit* # fraction of identical substitutions in hit | |
| 95 frac_conserved_query* # fraction of conserved substitutions in query | |
| 96 frac_conserved_hit* # fraction of conserved substitutions in hit | |
| 97 frac_aligned_query* # fraction of the query sequence that is aligned | |
| 98 frac_aligned_hit* # fraction of the hit sequence that is aligned | |
| 99 length_aln_query* # Length of the aligned portion of the query sequence | |
| 100 length_aln_hit* # Length of the aligned portion of the hit sequence | |
| 101 gaps_query* # Number of gaps in the aligned query sequence | |
| 102 gaps_hit* # Number of gaps in the aligned hit sequence | |
| 103 gaps_total* # Number of gaps in the aligned query and hit sequences | |
| 104 start_query* # Starting coordinate of the aligned portion of the query sequence | |
| 105 end_query* # Ending coordinate of the aligned portion of the query sequence | |
| 106 start_hit* # Starting coordinate of the aligned portion of the hit sequence | |
| 107 end_hit* # Ending coordinate of the aligned portion of the hit sequence | |
| 108 strand_query # Strand of the aligned query sequence | |
| 109 strand_hit # Strand of the aligned hit sequence | |
| 110 frame # Frame of the alignment (0,1,2) | |
| 111 ambiguous_aln # Ambiguous alignment indicator ('qs', 'q', 's') | |
| 112 hit_description # Full description of the hit sequence | |
| 113 query_description # Full description of the query sequence | |
| 114 | |
| 115 Items marked with a C<*> report data summed across all HSPs | |
| 116 after tiling them to avoid counting data from overlapping regions | |
| 117 multiple times. | |
| 118 | |
| 119 For more details about these columns, see the documentation for the | |
| 120 corresponding method in Bio::Search::Result::BlastHit. | |
| 121 | |
| 122 =head1 TODO | |
| 123 | |
| 124 Figure out the best way to incorporate algorithm-specific score columns. | |
| 125 The best route is probably to have algorithm-specific subclasses | |
| 126 (e.g., BlastHitTableWriter, FastaHitTableWriter). | |
| 127 | |
| 128 =head1 FEEDBACK | |
| 129 | |
| 130 =head2 Mailing Lists | |
| 131 | |
| 132 User feedback is an integral part of the evolution of this and other | |
| 133 Bioperl modules. Send your comments and suggestions preferably to one | |
| 134 of the Bioperl mailing lists. Your participation is much appreciated. | |
| 135 | |
| 136 bioperl-l@bioperl.org - General discussion | |
| 137 http://bioperl.org/MailList.html - About the mailing lists | |
| 138 | |
| 139 =head2 Reporting Bugs | |
| 140 | |
| 141 Report bugs to the Bioperl bug tracking system to help us keep track | |
| 142 the bugs and their resolution. Bug reports can be submitted via email | |
| 143 or the web: | |
| 144 | |
| 145 bioperl-bugs@bio.perl.org | |
| 146 http://bugzilla.bioperl.org/ | |
| 147 | |
| 148 =head1 AUTHOR | |
| 149 | |
| 150 Steve Chervitz E<lt>sac@bioperl.orgE<gt> | |
| 151 | |
| 152 See L<the FEEDBACK section | FEEDBACK> for where to send bug reports | |
| 153 and comments. | |
| 154 | |
| 155 =head1 COPYRIGHT | |
| 156 | |
| 157 Copyright (c) 2001, 2002 Steve Chervitz. All Rights Reserved. | |
| 158 | |
| 159 This library is free software; you can redistribute it and/or modify | |
| 160 it under the same terms as Perl itself. | |
| 161 | |
| 162 =head1 DISCLAIMER | |
| 163 | |
| 164 This software is provided "as is" without warranty of any kind. | |
| 165 | |
| 166 =head1 SEE ALSO | |
| 167 | |
| 168 L<Bio::SearchIO::Writer::HitTableWriter>, | |
| 169 L<Bio::SearchIO::Writer::ResultTableWriter> | |
| 170 | |
| 171 =head1 METHODS | |
| 172 | |
| 173 =cut | |
| 174 | |
| 175 package Bio::SearchIO::Writer::HitTableWriter; | |
| 176 | |
| 177 use strict; | |
| 178 use Bio::SearchIO::Writer::ResultTableWriter; | |
| 179 | |
| 180 use vars qw( @ISA ); | |
| 181 @ISA = qw( Bio::SearchIO::Writer::ResultTableWriter ); | |
| 182 | |
| 183 | |
| 184 # Array fields: column, object, method[/argument], printf format, | |
| 185 # column label Methods for result object are defined in | |
| 186 # Bio::Search::Result::ResultI. Methods for hit object are defined in | |
| 187 # Bio::Search::Hit::HitI. Tech note: If a bogus method is supplied, | |
| 188 # it will result in all values to be zero. Don't know why this is. | |
| 189 | |
| 190 # TODO (maybe): Allow specification of separate mantissa/exponent for | |
| 191 # significance data. | |
| 192 | |
| 193 my %column_map = ( | |
| 194 'query_name' => ['1', 'result', 'query_name', 's', 'QUERY' ], | |
| 195 'query_length' => ['2', 'result', 'query_length', 'd', 'LEN_Q'], | |
| 196 'hit_name' => ['3', 'hit', 'name', 's', 'HIT'], | |
| 197 'hit_length' => ['4', 'hit', 'length', 'd', 'LEN_H'], | |
| 198 'round' => ['5', 'hit', 'iteration', 'd', 'ROUND'], | |
| 199 'expect' => ['6', 'hit', 'significance', '.1e', 'EXPCT'], | |
| 200 'score' => ['7', 'hit', 'raw_score', 'd', 'SCORE'], | |
| 201 'bits' => ['8', 'hit', 'bits', 'd', 'BITS'], | |
| 202 'num_hsps' => ['9', 'hit', 'num_hsps', 'd', 'HSPS'], | |
| 203 'frac_identical_query' => ['10', 'hit', 'frac_identical/query', '.2f', 'FR_IDQ'], | |
| 204 'frac_identical_hit' => ['11', 'hit', 'frac_identical/hit', '.2f', 'FR_IDH'], | |
| 205 'frac_conserved_query' => ['12', 'hit', 'frac_conserved/query', '.2f', 'FR_CNQ'], | |
| 206 'frac_conserved_hit' => ['13', 'hit', 'frac_conserved/hit', '.2f', 'FR_CNH'], | |
| 207 'frac_aligned_query' => ['14', 'hit', 'frac_aligned_query', '.2f', 'FR_ALQ'], | |
| 208 'frac_aligned_hit' => ['15', 'hit', 'frac_aligned_hit', '.2f', 'FR_ALH'], | |
| 209 'length_aln_query' => ['16', 'hit', 'length_aln/query', 'd', 'LN_ALQ'], | |
| 210 'length_aln_hit' => ['17', 'hit', 'length_aln/hit', 'd', 'LN_ALH'], | |
| 211 'gaps_query' => ['18', 'hit', 'gaps/query', 'd', 'GAPS_Q'], | |
| 212 'gaps_hit' => ['19', 'hit', 'gaps/hit', 'd', 'GAPS_H'], | |
| 213 'gaps_total' => ['20', 'hit', 'gaps/total', 'd', 'GAPS_QH'], | |
| 214 'start_query' => ['21', 'hit', 'start/query', 'd', 'START_Q'], | |
| 215 'end_query' => ['22', 'hit', 'end/query', 'd', 'END_Q'], | |
| 216 'start_hit' => ['23', 'hit', 'start/hit', 'd', 'START_H'], | |
| 217 'end_hit' => ['24', 'hit', 'end/hit', 'd', 'END_H'], | |
| 218 'strand_query' => ['25', 'hit', 'strand/query', 's', 'STRND_Q'], | |
| 219 'strand_hit' => ['26', 'hit', 'strand/hit', 's', 'STRND_H'], | |
| 220 'frame' => ['27', 'hit', 'frame', 'd', 'FRAME'], | |
| 221 'ambiguous_aln' => ['28', 'hit', 'ambiguous_aln', 's', 'AMBIG'], | |
| 222 'hit_description' => ['29', 'hit', 'description', 's', 'DESC_H'], | |
| 223 'query_description' => ['30', 'result', 'query_description', 's', 'DESC_Q'], | |
| 224 ); | |
| 225 | |
| 226 sub column_map { return %column_map } | |
| 227 | |
| 228 | |
| 229 =head2 to_string() | |
| 230 | |
| 231 Note: this method is not intended for direct use. The | |
| 232 SearchIO::write_result() method calls it automatically if the writer | |
| 233 is hooked up to a SearchIO object as illustrated in | |
| 234 L<the SYNOPSIS section | SYNOPSIS>. | |
| 235 | |
| 236 Title : to_string() | |
| 237 : | |
| 238 Usage : print $writer->to_string( $result_obj, [$include_labels] ); | |
| 239 : | |
| 240 Argument : $result_obj = A Bio::Search::Result::BlastResult object | |
| 241 : $include_labels = boolean, if true column labels are included (default: false) | |
| 242 : | |
| 243 Returns : String containing tab-delimited set of data for each hit | |
| 244 : in a BlastResult object. Some data is summed across multiple HSPs. | |
| 245 : | |
| 246 Throws : n/a | |
| 247 | |
| 248 =cut | |
| 249 | |
| 250 #---------------- | |
| 251 sub to_string { | |
| 252 #---------------- | |
| 253 my ($self, $result, $include_labels) = @_; | |
| 254 | |
| 255 my $str = $include_labels ? $self->column_labels() : ''; | |
| 256 my $func_ref = $self->row_data_func; | |
| 257 my $printf_fmt = $self->printf_fmt; | |
| 258 | |
| 259 my ($resultfilter,$hitfilter) = ( $self->filter('RESULT'), | |
| 260 $self->filter('HIT') ); | |
| 261 if( ! defined $resultfilter || | |
| 262 &{$resultfilter}($result) ) { | |
| 263 $result->can('rewind') && | |
| 264 $result->rewind(); # insure we're at the beginning | |
| 265 foreach my $hit($result->hits) { | |
| 266 next if( defined $hitfilter && ! &{$hitfilter}($hit)); | |
| 267 my @row_data = map { defined $_ ? $_ : 0 } &{$func_ref}($result, $hit); | |
| 268 $str .= sprintf "$printf_fmt\n", @row_data; | |
| 269 } | |
| 270 } | |
| 271 $str =~ s/\t\n/\n/gs; | |
| 272 return $str; | |
| 273 } | |
| 274 | |
| 275 =head2 end_report | |
| 276 | |
| 277 Title : end_report | |
| 278 Usage : $self->end_report() | |
| 279 Function: The method to call when ending a report, this is | |
| 280 mostly for cleanup for formats which require you to | |
| 281 have something at the end of the document. Nothing for | |
| 282 a text message. | |
| 283 Returns : string | |
| 284 Args : none | |
| 285 | |
| 286 =cut | |
| 287 | |
| 288 sub end_report { | |
| 289 return ''; | |
| 290 } | |
| 291 | |
| 292 | |
| 293 =head2 filter | |
| 294 | |
| 295 Title : filter | |
| 296 Usage : $writer->filter('hsp', \&hsp_filter); | |
| 297 Function: Filter out either at HSP,Hit,or Result level | |
| 298 Returns : none | |
| 299 Args : string => data type, | |
| 300 CODE reference | |
| 301 | |
| 302 | |
| 303 =cut | |
| 304 | |
| 305 1; |
