0
|
1 # $Id: TextResultWriter.pm,v 1.5.2.5 2003/09/15 16:19:24 jason Exp $
|
|
2 #
|
|
3 # BioPerl module for Bio::SearchIO::Writer::TextResultWriter
|
|
4 #
|
|
5 # Cared for by Jason Stajich <jason@bioperl.org>
|
|
6 #
|
|
7 # Copyright Jason Stajich
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10
|
|
11 # POD documentation - main docs before the code
|
|
12
|
|
13 =head1 NAME
|
|
14
|
|
15 Bio::SearchIO::Writer::TextResultWriter - Object to implement writing a Bio::Search::ResultI in Text.
|
|
16
|
|
17 =head1 SYNOPSIS
|
|
18
|
|
19 use Bio::SearchIO;
|
|
20 use Bio::SearchIO::Writer::TextResultWriter;
|
|
21
|
|
22 my $in = new Bio::SearchIO(-format => 'blast',
|
|
23 -file => shift @ARGV);
|
|
24
|
|
25 my $writer = new Bio::SearchIO::Writer::TextResultWriter();
|
|
26 my $out = new Bio::SearchIO(-writer => $writer);
|
|
27 $out->write_result($in->next_result);
|
|
28
|
|
29 =head1 DESCRIPTION
|
|
30
|
|
31 This object implements the SearchWriterI interface which will produce
|
|
32 a set of Text for a specific Bio::Search::Report::ReportI interface.
|
|
33
|
|
34 You can also provide the argument -filters => \%hash to filter the at
|
|
35 the hsp, hit, or result level. %hash is an associative array which
|
|
36 contains any or all of the keys (HSP, HIT, RESULT). The values
|
|
37 pointed to by these keys would be references to a subroutine which
|
|
38 expects to be passed an object - one of Bio::Search::HSP::HSPI,
|
|
39 Bio::Search::Hit::HitI, and Bio::Search::Result::ResultI respectively.
|
|
40 Each function needs to return a boolean value as to whether or not the
|
|
41 passed element should be included in the output report - true if it is to be included, false if it to be omitted.
|
|
42
|
|
43 For example to filter on sequences in the database which are too short
|
|
44 for your criteria you would do the following.
|
|
45
|
|
46 Define a hit filter method
|
|
47
|
|
48 sub hit_filter {
|
|
49 my $hit = shift;
|
|
50 return $hit->length E<gt> 100; # test if length of the hit sequence
|
|
51 # long enough
|
|
52 }
|
|
53 my $writer = new Bio::SearchIO::Writer::TextResultWriter(
|
|
54 -filters => { 'HIT' =E<gt> \&hit_filter }
|
|
55 );
|
|
56
|
|
57 Another example would be to filter HSPs on percent identity, let's
|
|
58 only include HSPs which are 75% identical or better.
|
|
59
|
|
60 sub hsp_filter {
|
|
61 my $hsp = shift;
|
|
62 return $hsp->percent_identity E<gt> 75;
|
|
63 }
|
|
64 my $writer = new Bio::SearchIO::Writer::TextResultWriter(
|
|
65 -filters => { 'HSP' =E<gt> \&hsp_filter }
|
|
66 );
|
|
67
|
|
68 See L<Bio::SearchIO::SearchWriterI> for more info on the filter method.
|
|
69
|
|
70
|
|
71 This module will use the module Text::Wrap if it is installed to wrap
|
|
72 the Query description line. If you do not have Text::Wrap installed
|
|
73 this module will work fine but you won't have the Query line wrapped.
|
|
74 You will see a warning about this when you first instantiate a
|
|
75 TextResultWriter - to avoid these warnings from showing up, simply set
|
|
76 the verbosity upon initialization to -1 like this: my $writer = new
|
|
77 Bio::SearchIO::Writer::TextResultWriter(-verbose => -1);
|
|
78
|
|
79 =head1 FEEDBACK
|
|
80
|
|
81 =head2 Mailing Lists
|
|
82
|
|
83 User feedback is an integral part of the evolution of this and other
|
|
84 Bioperl modules. Send your comments and suggestions preferably to
|
|
85 the Bioperl mailing list. Your participation is much appreciated.
|
|
86
|
|
87 bioperl-l@bioperl.org - General discussion
|
|
88 http://bioperl.org/MailList.shtml - About the mailing lists
|
|
89
|
|
90 =head2 Reporting Bugs
|
|
91
|
|
92 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
93 of the bugs and their resolution. Bug reports can be submitted via
|
|
94 email or the web:
|
|
95
|
|
96 bioperl-bugs@bioperl.org
|
|
97 http://bugzilla.bioperl.org/
|
|
98
|
|
99 =head1 AUTHOR - Jason Stajich
|
|
100
|
|
101 Email jason@bioperl.org
|
|
102
|
|
103 =head1 APPENDIX
|
|
104
|
|
105 The rest of the documentation details each of the object methods.
|
|
106 Internal methods are usually preceded with a _
|
|
107
|
|
108 =cut
|
|
109
|
|
110
|
|
111 # Let the code begin...
|
|
112
|
|
113
|
|
114 package Bio::SearchIO::Writer::TextResultWriter;
|
|
115 use vars qw(@ISA $MaxNameLen $MaxDescLen $AlignmentLineWidth
|
|
116 $DescLineLen $TextWrapLoaded);
|
|
117 use strict;
|
|
118
|
|
119 # Object preamble - inherits from Bio::Root::RootI
|
|
120
|
|
121 BEGIN {
|
|
122 $MaxDescLen = 65;
|
|
123 $AlignmentLineWidth = 60;
|
|
124 eval { require Text::Wrap; $TextWrapLoaded = 1;};
|
|
125 if( $@ ) {
|
|
126 $TextWrapLoaded = 0;
|
|
127 }
|
|
128 }
|
|
129
|
|
130 use Bio::Root::Root;
|
|
131 use Bio::SearchIO::SearchWriterI;
|
|
132 use POSIX;
|
|
133
|
|
134 @ISA = qw(Bio::Root::Root Bio::SearchIO::SearchWriterI);
|
|
135
|
|
136 =head2 new
|
|
137
|
|
138 Title : new
|
|
139 Usage : my $obj = new Bio::SearchIO::Writer::TextResultWriter();
|
|
140 Function: Builds a new Bio::SearchIO::Writer::TextResultWriter object
|
|
141 Returns : Bio::SearchIO::Writer::TextResultWriter
|
|
142 Args : -filters => hashref with any or all of the keys (HSP HIT RESULT)
|
|
143 which have values pointing to a subroutine reference
|
|
144 which will expect to get a
|
|
145
|
|
146
|
|
147 =cut
|
|
148
|
|
149 sub new {
|
|
150 my($class,@args) = @_;
|
|
151
|
|
152 my $self = $class->SUPER::new(@args);
|
|
153 my ($filters) = $self->_rearrange([qw(FILTERS)],@args);
|
|
154 if( defined $filters ) {
|
|
155 if( !ref($filters) =~ /HASH/i ) {
|
|
156 $self->warn("Did not provide a hashref for the FILTERS option, ignoring.");
|
|
157 } else {
|
|
158 while( my ($type,$code) = each %{$filters} ) {
|
|
159 $self->filter($type,$code);
|
|
160 }
|
|
161 }
|
|
162 }
|
|
163 unless( $TextWrapLoaded ) {
|
|
164 $self->warn("Could not load Text::Wrap - the Query Description will not be line wrapped\n");
|
|
165 } else {
|
|
166 $Text::Wrap::columns = $MaxDescLen;
|
|
167 }
|
|
168 return $self;
|
|
169 }
|
|
170
|
|
171
|
|
172 =head2 to_string
|
|
173
|
|
174 Purpose : Produces data for each Search::Result::ResultI in a string.
|
|
175 : This is an abstract method. For some useful implementations,
|
|
176 : see ResultTableWriter.pm, HitTableWriter.pm,
|
|
177 : and HSPTableWriter.pm.
|
|
178 Usage : print $writer->to_string( $result_obj, @args );
|
|
179 Argument : $result_obj = A Bio::Search::Result::ResultI object
|
|
180 : @args = any additional arguments used by your implementation.
|
|
181 Returns : String containing data for each search Result or any of its
|
|
182 : sub-objects (Hits and HSPs).
|
|
183 Throws : n/a
|
|
184
|
|
185 =cut
|
|
186
|
|
187 sub to_string {
|
|
188 my ($self,$result,$num) = @_;
|
|
189 $num ||= 0;
|
|
190 return unless defined $result;
|
|
191 my ($resultfilter,$hitfilter, $hspfilter) = ( $self->filter('RESULT'),
|
|
192 $self->filter('HIT'),
|
|
193 $self->filter('HSP') );
|
|
194 return '' if( defined $resultfilter && ! &{$resultfilter}($result) );
|
|
195
|
|
196 my ($qtype,$dbtype,$dbseqtype,$type);
|
|
197 my $alg = $result->algorithm;
|
|
198
|
|
199 # This is actually wrong for the FASTAs I think
|
|
200 if( $alg =~ /T(FAST|BLAST)([XY])/i ) {
|
|
201 $qtype = $dbtype = 'translated';
|
|
202 $dbseqtype = $type = 'PROTEIN';
|
|
203 } elsif( $alg =~ /T(FAST|BLAST)N/i ) {
|
|
204 $qtype = '';
|
|
205 $dbtype = 'translated';
|
|
206 $type = 'PROTEIN';
|
|
207 $dbseqtype = 'NUCLEOTIDE';
|
|
208 } elsif( $alg =~ /(FAST|BLAST)N/i ||
|
|
209 $alg =~ /(WABA|EXONERATE)/i ) {
|
|
210 $qtype = $dbtype = '';
|
|
211 $type = $dbseqtype = 'NUCLEOTIDE';
|
|
212 } elsif( $alg =~ /(FAST|BLAST)P/ || $alg =~ /SSEARCH/i ) {
|
|
213 $qtype = $dbtype = '';
|
|
214 $type = $dbseqtype = 'PROTEIN';
|
|
215 } elsif( $alg =~ /(FAST|BLAST)[XY]/i ) {
|
|
216 $qtype = 'translated';
|
|
217 $dbtype = 'PROTEIN';
|
|
218 $dbseqtype = $type = 'PROTEIN';
|
|
219 } else {
|
|
220 print STDERR "algorithm was ", $result->algorithm, " couldn't match\n";
|
|
221 }
|
|
222
|
|
223
|
|
224 my %baselens = ( 'Sbjct:' => ( $dbtype eq 'translated' ) ? 3 : 1,
|
|
225 'Query:' => ( $qtype eq 'translated' ) ? 3 : 1);
|
|
226
|
|
227 my $str;
|
|
228 if( ! defined $num || $num <= 1 ) {
|
|
229 $str = &{$self->start_report}($result);
|
|
230 }
|
|
231
|
|
232 $str .= &{$self->title}($result);
|
|
233
|
|
234 $str .= $result->algorithm_reference || $self->algorithm_reference($result);
|
|
235 $str .= &{$self->introduction}($result);
|
|
236
|
|
237
|
|
238 $str .= qq{
|
|
239 Score E
|
|
240 Sequences producing significant alignments: (bits) value
|
|
241 };
|
|
242 my $hspstr = '';
|
|
243 if( $result->can('rewind')) {
|
|
244 $result->rewind(); # support stream based parsing routines
|
|
245 }
|
|
246 while( my $hit = $result->next_hit ) {
|
|
247 next if( defined $hitfilter && ! &{$hitfilter}($hit) );
|
|
248 my $nm = $hit->name();
|
|
249 $self->debug( "no $nm for name (".$hit->description(). "\n")
|
|
250 unless $nm;
|
|
251 my ($gi,$acc) = &{$self->id_parser}($nm);
|
|
252 my $p = "%-$MaxDescLen". "s";
|
|
253 my $descsub;
|
|
254 my $desc = sprintf("%s %s",$nm,$hit->description);
|
|
255 if( length($desc) - 3 > $MaxDescLen) {
|
|
256 $descsub = sprintf($p,
|
|
257 substr($desc,0,$MaxDescLen-3) .
|
|
258 "...");
|
|
259 } else {
|
|
260 $descsub = sprintf($p,$desc);
|
|
261 }
|
|
262
|
|
263 $str .= sprintf("%s %-4s %s\n",
|
|
264 $descsub,
|
|
265 defined $hit->raw_score ? $hit->raw_score : ' ',
|
|
266 defined $hit->significance ? $hit->significance : '?');
|
|
267 my @hsps = $hit->hsps;
|
|
268
|
|
269 $hspstr .= sprintf(">%s %s\n%9sLength = %d\n\n",
|
|
270 $hit->name,
|
|
271 defined $hit->description ? $hit->description : '',
|
|
272 '', # empty is for the %9s in the str formatting
|
|
273 $hit->length);
|
|
274
|
|
275 foreach my $hsp ( @hsps ) {
|
|
276 next if( defined $hspfilter && ! &{$hspfilter}($hsp) );
|
|
277 $hspstr .= sprintf(" Score = %4s bits (%s), Expect = %s",
|
|
278 $hsp->bits, $hsp->score, $hsp->evalue);
|
|
279 if( $hsp->pvalue ) {
|
|
280 $hspstr .= ", P = ".$hsp->pvalue;
|
|
281 }
|
|
282 $hspstr .= "\n";
|
|
283 $hspstr .= sprintf(" Identities = %d/%d (%d%%)",
|
|
284 ( $hsp->frac_identical('total') *
|
|
285 $hsp->length('total')),
|
|
286 $hsp->length('total'),
|
|
287 POSIX::floor($hsp->frac_identical('total')
|
|
288 * 100));
|
|
289
|
|
290 if( $type eq 'PROTEIN' ) {
|
|
291 $hspstr .= sprintf(", Positives = %d/%d (%d%%)",
|
|
292 ( $hsp->frac_conserved('total') *
|
|
293 $hsp->length('total')),
|
|
294 $hsp->length('total'),
|
|
295 POSIX::floor($hsp->frac_conserved('total') * 100));
|
|
296
|
|
297 }
|
|
298 if( $hsp->gaps ) {
|
|
299 $hspstr .= sprintf(", Gaps = %d/%d (%d%%)",
|
|
300 $hsp->gaps('total'),
|
|
301 $hsp->length('total'),
|
|
302 POSIX::floor(100 * $hsp->gaps('total') /
|
|
303 $hsp->length('total')));
|
|
304 }
|
|
305 $hspstr .= "\n";
|
|
306 my ($hframe,$qframe) = ( $hsp->hit->frame,
|
|
307 $hsp->query->frame);
|
|
308 my ($hstrand,$qstrand) = ($hsp->hit->strand,$hsp->query->strand);
|
|
309 # so TBLASTX will have Query/Hit frames
|
|
310 # BLASTX will have Query frame
|
|
311 # TBLASTN will have Hit frame
|
|
312 if( $hstrand || $qstrand ) {
|
|
313 $hspstr .= " Frame = ";
|
|
314 my ($signq, $signh);
|
|
315 unless( $hstrand ) {
|
|
316 $hframe = undef;
|
|
317 # if strand is null or 0 then it is protein
|
|
318 # and this no frame
|
|
319 } else {
|
|
320 $signh = $hstrand < 0 ? '-' : '+';
|
|
321 }
|
|
322 unless( $qstrand ) {
|
|
323 $qframe = undef;
|
|
324 # if strand is null or 0 then it is protein
|
|
325 } else {
|
|
326 $signq =$qstrand < 0 ? '-' : '+';
|
|
327 }
|
|
328 # remember bioperl stores frames as 0,1,2 (GFF way)
|
|
329 # BLAST reports reports as 1,2,3 so
|
|
330 # we have to add 1 to the frame values
|
|
331 if( defined $hframe && ! defined $qframe) {
|
|
332 $hspstr .= "$signh".($hframe+1);
|
|
333 } elsif( defined $qframe && ! defined $hframe) {
|
|
334 $hspstr .= "$signq".($qframe+1);
|
|
335 } else {
|
|
336 $hspstr .= sprintf(" %s%d / %s%d",
|
|
337 $signq,$qframe+1,
|
|
338 $signh, $hframe+1);
|
|
339 }
|
|
340 }
|
|
341 $hspstr .= "\n\n";
|
|
342
|
|
343 my @hspvals = ( {'name' => 'Query:',
|
|
344 'seq' => $hsp->query_string,
|
|
345 'start' => ( $hstrand >= 0 ?
|
|
346 $hsp->query->start :
|
|
347 $hsp->query->end),
|
|
348 'end' => ($qstrand >= 0 ?
|
|
349 $hsp->query->end :
|
|
350 $hsp->query->start),
|
|
351 'index' => 0,
|
|
352 'direction' => $qstrand || 1
|
|
353 },
|
|
354 { 'name' => ' 'x6, # this might need to adjust for long coordinates??
|
|
355 'seq' => $hsp->homology_string,
|
|
356 'start' => undef,
|
|
357 'end' => undef,
|
|
358 'index' => 0,
|
|
359 'direction' => 1
|
|
360 },
|
|
361 { 'name' => 'Sbjct:',
|
|
362 'seq' => $hsp->hit_string,
|
|
363 'start' => ($hstrand >= 0 ?
|
|
364 $hsp->hit->start : $hsp->hit->end),
|
|
365 'end' => ($hstrand >= 0 ?
|
|
366 $hsp->hit->end : $hsp->hit->start),
|
|
367 'index' => 0,
|
|
368 'direction' => $hstrand || 1
|
|
369 }
|
|
370 );
|
|
371
|
|
372
|
|
373 # let's set the expected length (in chars) of the starting number
|
|
374 # in an alignment block so we can have things line up
|
|
375 # Just going to try and set to the largest
|
|
376
|
|
377 my ($numwidth) = sort { $b <=> $a }(length($hspvals[0]->{'start'}),
|
|
378 length($hspvals[0]->{'end'}),
|
|
379 length($hspvals[2]->{'start'}),
|
|
380 length($hspvals[2]->{'end'}));
|
|
381 my $count = 0;
|
|
382 while ( $count <= $hsp->length('total') ) {
|
|
383 foreach my $v ( @hspvals ) {
|
|
384 my $piece = substr($v->{'seq'}, $v->{'index'} +$count,
|
|
385 $AlignmentLineWidth);
|
|
386 my $cp = $piece;
|
|
387 my $plen = scalar ( $cp =~ tr/\-//);
|
|
388 my ($start,$end) = ('','');
|
|
389 if( defined $v->{'start'} ) {
|
|
390 $start = $v->{'start'};
|
|
391 # since strand can be + or - use the direction
|
|
392 # to signify which whether to add or substract from end
|
|
393 my $d = $v->{'direction'} * ( $AlignmentLineWidth - $plen )*
|
|
394 $baselens{$v->{'name'}};
|
|
395 if( length($piece) < $AlignmentLineWidth ) {
|
|
396 $d = (length($piece) - $plen) * $v->{'direction'} *
|
|
397 $baselens{$v->{'name'}};
|
|
398 }
|
|
399 $end = $v->{'start'} + $d - $v->{'direction'};
|
|
400 $v->{'start'} += $d;
|
|
401 }
|
|
402 $hspstr .= sprintf("%s %-".$numwidth."s %s %s\n",
|
|
403 $v->{'name'},
|
|
404 $start,
|
|
405 $piece,
|
|
406 $end
|
|
407 );
|
|
408 }
|
|
409 $count += $AlignmentLineWidth;
|
|
410 $hspstr .= "\n";
|
|
411 }
|
|
412 }
|
|
413 $hspstr .= "\n";
|
|
414 }
|
|
415 $str .= "\n\n".$hspstr;
|
|
416
|
|
417 $str .= sprintf(qq{ Database: %s
|
|
418 Posted date: %s
|
|
419 Number of letters in database: %s
|
|
420 Number of sequences in database: %s
|
|
421
|
|
422 Matrix: %s
|
|
423 },
|
|
424 $result->database_name(),
|
|
425 $result->get_statistic('posted_date') ||
|
|
426 POSIX::strftime("%b %d, %Y %I:%M %p",localtime),
|
|
427 &_numwithcommas($result->database_entries()),
|
|
428 &_numwithcommas($result->database_letters()),
|
|
429 $result->get_parameter('matrix') || '');
|
|
430
|
|
431 if( defined (my $open = $result->get_parameter('gapopen')) ) {
|
|
432 $str .= sprintf("Gap Penalties Existence: %d, Extension: %d\n",
|
|
433 $open || 0, $result->get_parameter('gapext') || 0);
|
|
434 }
|
|
435
|
|
436 # skip those params we've already output
|
|
437 foreach my $param ( grep { ! /matrix|gapopen|gapext/i }
|
|
438 $result->available_parameters ) {
|
|
439 $str .= "$param: ". $result->get_parameter($param) ."\n";
|
|
440
|
|
441 }
|
|
442 $str .= "Search Statistics\n";
|
|
443 # skip posted date, we already output it
|
|
444 foreach my $stat ( sort grep { ! /posted_date/ }
|
|
445 $result->available_statistics ) {
|
|
446 my $expect = $result->get_parameter('expect');
|
|
447 my $v = $result->get_statistic($stat);
|
|
448 if( $v =~ /^\d+$/ ) {
|
|
449 $v = &_numwithcommas($v);
|
|
450 }
|
|
451 if( defined $expect &&
|
|
452 $stat eq 'seqs_better_than_cutoff' ) {
|
|
453 $str .= "seqs_better_than_$expect: $v\n";
|
|
454 } else {
|
|
455 my $v =
|
|
456 $str .= "$stat: $v\n";
|
|
457 }
|
|
458 }
|
|
459 $str .= "\n\n";
|
|
460 return $str;
|
|
461 }
|
|
462
|
|
463
|
|
464 =head2 start_report
|
|
465
|
|
466 Title : start_report
|
|
467 Usage : $index->start_report( CODE )
|
|
468 Function: Stores or returns the code to
|
|
469 write the start of the <HTML> block, the <TITLE> block
|
|
470 and the start of the <BODY> block of HTML. Useful
|
|
471 for (for instance) specifying alternative
|
|
472 HTML if you are embedding the output in
|
|
473 an HTML page which you have already started.
|
|
474 (For example a routine returning a null string).
|
|
475 Returns \&default_start_report (see below) if not
|
|
476 set.
|
|
477 Example : $index->start_report( \&my_start_report )
|
|
478 Returns : ref to CODE if called without arguments
|
|
479 Args : CODE
|
|
480
|
|
481 =cut
|
|
482
|
|
483 sub start_report {
|
|
484 my( $self, $code ) = @_;
|
|
485 if ($code) {
|
|
486 $self->{'_start_report'} = $code;
|
|
487 }
|
|
488 return $self->{'_start_report'} || \&default_start_report;
|
|
489 }
|
|
490
|
|
491 =head2 default_start_report
|
|
492
|
|
493 Title : default_start_report
|
|
494 Usage : $self->default_start_report($result)
|
|
495 Function: The default method to call when starting a report.
|
|
496 Returns : sting
|
|
497 Args : First argument is a Bio::Search::Result::ResultI
|
|
498
|
|
499 =cut
|
|
500
|
|
501 sub default_start_report {
|
|
502 my ($result) = @_;
|
|
503 return "";
|
|
504 }
|
|
505
|
|
506 =head2 title
|
|
507
|
|
508 Title : title
|
|
509 Usage : $self->title($CODE)
|
|
510
|
|
511 Function: Stores or returns the code to provide HTML for the given
|
|
512 BLAST report that will appear at the top of the BLAST report
|
|
513 HTML output. Useful for (for instance) specifying
|
|
514 alternative routines to write your own titles.
|
|
515 Returns \&default_title (see below) if not
|
|
516 set.
|
|
517 Example : $index->title( \&my_title )
|
|
518 Returns : ref to CODE if called without arguments
|
|
519 Args : CODE
|
|
520
|
|
521 =cut
|
|
522
|
|
523 sub title {
|
|
524 my( $self, $code ) = @_;
|
|
525 if ($code) {
|
|
526 $self->{'_title'} = $code;
|
|
527 }
|
|
528 return $self->{'_title'} || \&default_title;
|
|
529 }
|
|
530
|
|
531 =head2 default_title
|
|
532
|
|
533 Title : default_title
|
|
534 Usage : $self->default_title($result)
|
|
535 Function: Provides HTML for the given BLAST report that will appear
|
|
536 at the top of the BLAST report output.
|
|
537 Returns : empty for text implementation
|
|
538 Args : First argument is a Bio::Search::Result::ResultI
|
|
539
|
|
540 =cut
|
|
541
|
|
542 sub default_title {
|
|
543 my ($result) = @_;
|
|
544 return "";
|
|
545 # The HTML implementation
|
|
546 # return sprintf(
|
|
547 # qq{<CENTER><H1><a href="http://bioperl.org">Bioperl</a> Reformatted HTML of %s Search Report<br> for %s</H1></CENTER>},
|
|
548 # $result->algorithm,
|
|
549 # $result->query_name());
|
|
550 }
|
|
551
|
|
552
|
|
553 =head2 introduction
|
|
554
|
|
555 Title : introduction
|
|
556 Usage : $self->introduction($CODE)
|
|
557
|
|
558 Function: Stores or returns the code to provide HTML for the given
|
|
559 BLAST report detailing the query and the
|
|
560 database information.
|
|
561 Useful for (for instance) specifying
|
|
562 routines returning alternative introductions.
|
|
563 Returns \&default_introduction (see below) if not
|
|
564 set.
|
|
565 Example : $index->introduction( \&my_introduction )
|
|
566 Returns : ref to CODE if called without arguments
|
|
567 Args : CODE
|
|
568
|
|
569 =cut
|
|
570
|
|
571 sub introduction {
|
|
572 my( $self, $code ) = @_;
|
|
573 if ($code) {
|
|
574 $self->{'_introduction'} = $code;
|
|
575 }
|
|
576 return $self->{'_introduction'} || \&default_introduction;
|
|
577 }
|
|
578
|
|
579 =head2 default_introduction
|
|
580
|
|
581 Title : default_introduction
|
|
582 Usage : $self->default_introduction($result)
|
|
583 Function: Outputs HTML to provide the query
|
|
584 and the database information
|
|
585 Returns : string containing HTML
|
|
586 Args : First argument is a Bio::Search::Result::ResultI
|
|
587 Second argument is string holding literature citation
|
|
588
|
|
589 =cut
|
|
590
|
|
591 sub default_introduction {
|
|
592 my ($result) = @_;
|
|
593
|
|
594 return sprintf(
|
|
595 qq{
|
|
596 Query= %s
|
|
597 (%s letters)
|
|
598
|
|
599 Database: %s
|
|
600 %s sequences; %s total letters
|
|
601 },
|
|
602 &_linewrap($result->query_name . " " .
|
|
603 $result->query_description),
|
|
604 &_numwithcommas($result->query_length),
|
|
605 $result->database_name(),
|
|
606 &_numwithcommas($result->database_entries()),
|
|
607 &_numwithcommas($result->database_letters()),
|
|
608 );
|
|
609 }
|
|
610
|
|
611 =head2 end_report
|
|
612
|
|
613 Title : end_report
|
|
614 Usage : $self->end_report()
|
|
615 Function: The method to call when ending a report, this is
|
|
616 mostly for cleanup for formats which require you to
|
|
617 have something at the end of the document (</BODY></HTML>)
|
|
618 for HTML
|
|
619 Returns : string
|
|
620 Args : none
|
|
621
|
|
622 =cut
|
|
623
|
|
624 sub end_report {
|
|
625 return "";
|
|
626 }
|
|
627
|
|
628
|
|
629 # copied from Bio::Index::Fasta
|
|
630 # useful here as well
|
|
631
|
|
632 =head2 id_parser
|
|
633
|
|
634 Title : id_parser
|
|
635 Usage : $index->id_parser( CODE )
|
|
636 Function: Stores or returns the code used by record_id to
|
|
637 parse the ID for record from a string. Useful
|
|
638 for (for instance) specifying a different
|
|
639 parser for different flavours of FASTA file.
|
|
640 Returns \&default_id_parser (see below) if not
|
|
641 set. If you supply your own id_parser
|
|
642 subroutine, then it should expect a fasta
|
|
643 description line. An entry will be added to
|
|
644 the index for each string in the list returned.
|
|
645 Example : $index->id_parser( \&my_id_parser )
|
|
646 Returns : ref to CODE if called without arguments
|
|
647 Args : CODE
|
|
648
|
|
649 =cut
|
|
650
|
|
651 sub id_parser {
|
|
652 my( $self, $code ) = @_;
|
|
653
|
|
654 if ($code) {
|
|
655 $self->{'_id_parser'} = $code;
|
|
656 }
|
|
657 return $self->{'_id_parser'} || \&default_id_parser;
|
|
658 }
|
|
659
|
|
660
|
|
661
|
|
662 =head2 default_id_parser
|
|
663
|
|
664 Title : default_id_parser
|
|
665 Usage : $id = default_id_parser( $header )
|
|
666 Function: The default Fasta ID parser for Fasta.pm
|
|
667 Returns $1 from applying the regexp /^>\s*(\S+)/
|
|
668 to $header.
|
|
669 Returns : ID string
|
|
670 Args : a fasta header line string
|
|
671
|
|
672 =cut
|
|
673
|
|
674 sub default_id_parser {
|
|
675 my ($string) = @_;
|
|
676 my ($gi,$acc);
|
|
677 if( $string =~ s/gi\|(\d+)\|?// )
|
|
678 { $gi = $1; $acc = $1;}
|
|
679
|
|
680 if( $string =~ /(\w+)\|([A-Z\d\.\_]+)(\|[A-Z\d\_]+)?/ ) {
|
|
681 $acc = defined $2 ? $2 : $1;
|
|
682 } else {
|
|
683 $acc = $string;
|
|
684 $acc =~ s/^\s+(\S+)/$1/;
|
|
685 $acc =~ s/(\S+)\s+$/$1/;
|
|
686 }
|
|
687 return ($gi,$acc);
|
|
688 }
|
|
689
|
|
690 sub MIN { $a <=> $b ? $a : $b; }
|
|
691 sub MAX { $a <=> $b ? $b : $a; }
|
|
692
|
|
693
|
|
694 =head2 algorithm_reference
|
|
695
|
|
696 Title : algorithm_reference
|
|
697 Usage : my $reference = $writer->algorithm_reference($result);
|
|
698 Function: Returns the appropriate Bibliographic reference for the
|
|
699 algorithm format being produced
|
|
700 Returns : String
|
|
701 Args : L<Bio::Search::Result::ResultI> to reference
|
|
702
|
|
703
|
|
704 =cut
|
|
705
|
|
706 sub algorithm_reference{
|
|
707 my ($self,$result) = @_;
|
|
708 return '' if( ! defined $result || !ref($result) ||
|
|
709 ! $result->isa('Bio::Search::Result::ResultI')) ;
|
|
710 if( $result->algorithm =~ /BLAST/i ) {
|
|
711 my $res = $result->algorithm . ' '. $result->algorithm_version. "\n";
|
|
712 if( $result->algorithm_version =~ /WashU/i ) {
|
|
713 return $res .qq{
|
|
714 Copyright (C) 1996-2000 Washington University, Saint Louis, Missouri USA.
|
|
715 All Rights Reserved.
|
|
716
|
|
717 Reference: Gish, W. (1996-2000) http://blast.wustl.edu
|
|
718 };
|
|
719 } else {
|
|
720 return $res . qq{
|
|
721 Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer,
|
|
722 Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997),
|
|
723 "Gapped BLAST and PSI-BLAST: a new generation of protein database search
|
|
724 programs", Nucleic Acids Res. 25:3389-3402.
|
|
725 };
|
|
726 }
|
|
727 } elsif( $result->algorithm =~ /FAST/i ) {
|
|
728 return $result->algorithm. " ". $result->algorithm_version . "\n".
|
|
729 "\nReference: Pearson et al, Genomics (1997) 46:24-36\n";
|
|
730 } else {
|
|
731 return '';
|
|
732 }
|
|
733 }
|
|
734
|
|
735 # from Perl Cookbook 2.17
|
|
736 sub _numwithcommas {
|
|
737 my $num = reverse( $_[0] );
|
|
738 $num =~ s/(\d{3})(?=\d)(?!\d*\.)/$1,/g;
|
|
739 return scalar reverse $num;
|
|
740 }
|
|
741
|
|
742 sub _linewrap {
|
|
743 my ($str) = @_;
|
|
744 if($TextWrapLoaded) {
|
|
745 return Text::Wrap::wrap("","",$str); # use Text::Wrap
|
|
746 } else { return $str; } # cannot wrap
|
|
747 }
|
|
748 =head2 Methods Bio::SearchIO::SearchWriterI
|
|
749
|
|
750 L<Bio::SearchIO::SearchWriterI> inherited methods.
|
|
751
|
|
752 =head2 filter
|
|
753
|
|
754 Title : filter
|
|
755 Usage : $writer->filter('hsp', \&hsp_filter);
|
|
756 Function: Filter out either at HSP,Hit,or Result level
|
|
757 Returns : none
|
|
758 Args : string => data type,
|
|
759 CODE reference
|
|
760
|
|
761
|
|
762 =cut
|
|
763
|
|
764
|
|
765 1;
|