0
|
1 # $Id: HTMLResultWriter.pm,v 1.12.2.4 2003/09/15 16:08:55 jason Exp $
|
|
2 #
|
|
3 # BioPerl module for Bio::SearchIO::Writer::HTMLResultWriter
|
|
4 #
|
|
5 # Cared for by Jason Stajich <jason@bioperl.org>
|
|
6 #
|
|
7 # Copyright Jason Stajich
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10
|
|
11 # Changes 2003-07-31 (jason)
|
|
12 # Gary has cleaned up the code a lot to produce better looking
|
|
13 # HTML
|
|
14 # POD documentation - main docs before the code
|
|
15
|
|
16 =head1 NAME
|
|
17
|
|
18 Bio::SearchIO::Writer::HTMLResultWriter - Object to implement writing a Bio::Search::ResultI in HTML.
|
|
19
|
|
20 =head1 SYNOPSIS
|
|
21
|
|
22 use Bio::SearchIO;
|
|
23 use Bio::SearchIO::Writer::HTMLResultWriter;
|
|
24
|
|
25 my $in = new Bio::SearchIO(-format => 'blast',
|
|
26 -file => shift @ARGV);
|
|
27
|
|
28 my $writer = new Bio::SearchIO::Writer::HTMLResultWriter();
|
|
29 my $out = new Bio::SearchIO(-writer => $writer);
|
|
30 $out->write_result($in->next_result);
|
|
31
|
|
32
|
|
33 # to filter your output
|
|
34 my $MinLength = 100; # need a variable with scope outside the method
|
|
35 sub hsp_filter {
|
|
36 my $hsp = shift;
|
|
37 return 1 if $hsp->length('total') > $MinLength;
|
|
38 }
|
|
39 sub result_filter {
|
|
40 my $result = shift;
|
|
41 return $hsp->num_hits > 0;
|
|
42 }
|
|
43
|
|
44 my $writer = new Bio::SearchIO::Writer::HTMLResultWriter
|
|
45 (-filters => { 'HSP' => \&hsp_filter} );
|
|
46 my $out = new Bio::SearchIO(-writer => $writer);
|
|
47 $out->write_result($in->next_result);
|
|
48
|
|
49 # can also set the filter via the writer object
|
|
50 $writer->filter('RESULT', \&result_filter);
|
|
51
|
|
52 =head1 DESCRIPTION
|
|
53
|
|
54 This object implements the SearchWriterI interface which will produce
|
|
55 a set of HTML for a specific Bio::Search::Report::ReportI interface.
|
|
56
|
|
57
|
|
58 You can also provide the argument -filters => \%hash to filter the at
|
|
59 the hsp, hit, or result level. %hash is an associative array which
|
|
60 contains any or all of the keys (HSP, HIT, RESULT). The values
|
|
61 pointed to by these keys would be references to a subroutine which
|
|
62 expects to be passed an object - one of Bio::Search::HSP::HSPI,
|
|
63 Bio::Search::Hit::HitI, and Bio::Search::Result::ResultI respectively.
|
|
64 Each function needs to return a boolean value as to whether or not the
|
|
65 passed element should be included in the output report - true if it is to be included, false if it to be omitted.
|
|
66
|
|
67 For example to filter on sequences in the database which are too short
|
|
68 for your criteria you would do the following.
|
|
69
|
|
70 Define a hit filter method
|
|
71
|
|
72 sub hit_filter {
|
|
73 my $hit = shift;
|
|
74 return $hit->length E<gt> 100; # test if length of the hit sequence
|
|
75 # long enough
|
|
76 }
|
|
77 my $writer = new Bio::SearchIO::Writer::TextResultWriter(
|
|
78 -filters => { 'HIT' =E<gt> \&hit_filter }
|
|
79 );
|
|
80
|
|
81 Another example would be to filter HSPs on percent identity, let's
|
|
82 only include HSPs which are 75% identical or better.
|
|
83
|
|
84 sub hsp_filter {
|
|
85 my $hsp = shift;
|
|
86 return $hsp->percent_identity E<gt> 75;
|
|
87 }
|
|
88 my $writer = new Bio::SearchIO::Writer::TextResultWriter(
|
|
89 -filters => { 'HSP' =E<gt> \&hsp_filter }
|
|
90 );
|
|
91
|
|
92 See L<Bio::SearchIO::SearchWriterI> for more info on the filter method.
|
|
93
|
|
94
|
|
95 =head1 FEEDBACK
|
|
96
|
|
97 =head2 Mailing Lists
|
|
98
|
|
99 User feedback is an integral part of the evolution of this and other
|
|
100 Bioperl modules. Send your comments and suggestions preferably to
|
|
101 the Bioperl mailing list. Your participation is much appreciated.
|
|
102
|
|
103 bioperl-l@bioperl.org - General discussion
|
|
104 http://bioperl.org/MailList.shtml - About the mailing lists
|
|
105
|
|
106 =head2 Reporting Bugs
|
|
107
|
|
108 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
109 of the bugs and their resolution. Bug reports can be submitted via
|
|
110 email or the web:
|
|
111
|
|
112 bioperl-bugs@bioperl.org
|
|
113 http://bugzilla.bioperl.org/
|
|
114
|
|
115 =head1 AUTHOR - Jason Stajich
|
|
116
|
|
117 Email jason-at-bioperl-dot-org
|
|
118
|
|
119 =head1 CONTRIBUTORS
|
|
120
|
|
121 Gary Williams G.Williams@hgmp.mrc.ac.uk
|
|
122
|
|
123 =head1 APPENDIX
|
|
124
|
|
125 The rest of the documentation details each of the object methods.
|
|
126 Internal methods are usually preceded with a _
|
|
127
|
|
128 =cut
|
|
129
|
|
130
|
|
131
|
|
132 package Bio::SearchIO::Writer::HTMLResultWriter;
|
|
133 use vars qw(@ISA %RemoteURLDefault
|
|
134 $MaxDescLen $DATE $AlignmentLineWidth $Revision);
|
|
135 use strict;
|
|
136 $Revision = '$Id: HTMLResultWriter.pm,v 1.12.2.4 2003/09/15 16:08:55 jason Exp $'; #'
|
|
137
|
|
138 # Object preamble - inherits from Bio::Root::RootI
|
|
139
|
|
140 BEGIN {
|
|
141 $DATE = localtime(time);
|
|
142 %RemoteURLDefault = ( 'PROTEIN' => 'http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=protein&cmd=search&term=%s',
|
|
143 'NUCLEOTIDE' => 'http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=nucleotide&cmd=search&term=%s'
|
|
144 );
|
|
145
|
|
146 $MaxDescLen = 60;
|
|
147 $AlignmentLineWidth = 60;
|
|
148 }
|
|
149
|
|
150 use Bio::Root::Root;
|
|
151 use Bio::SearchIO::SearchWriterI;
|
|
152
|
|
153 @ISA = qw(Bio::Root::Root Bio::SearchIO::SearchWriterI);
|
|
154
|
|
155 =head2 new
|
|
156
|
|
157 Title : new
|
|
158 Usage : my $obj = new Bio::SearchIO::Writer::HTMLResultWriter();
|
|
159 Function: Builds a new Bio::SearchIO::Writer::HTMLResultWriter object
|
|
160 Returns : Bio::SearchIO::Writer::HTMLResultWriter
|
|
161 Args : -filters => hashref with any or all of the keys (HSP HIT RESULT)
|
|
162 which have values pointing to a subroutine reference
|
|
163 which will expect to get a
|
|
164
|
|
165 =cut
|
|
166
|
|
167 sub new {
|
|
168 my($class,@args) = @_;
|
|
169
|
|
170 my $self = $class->SUPER::new(@args);
|
|
171 my ($p,$n,$filters) = $self->_rearrange([qw(PROTEIN_URL
|
|
172 NUCLEOTIDE_URL
|
|
173 FILTERS)],@args);
|
|
174 $self->remote_database_url('p',$p || $RemoteURLDefault{'PROTEIN'});
|
|
175 $self->remote_database_url('n',$n || $RemoteURLDefault{'NUCLEOTIDE'});
|
|
176
|
|
177 if( defined $filters ) {
|
|
178 if( !ref($filters) =~ /HASH/i ) {
|
|
179 $self->warn("Did not provide a hashref for the FILTERS option, ignoring.");
|
|
180 } else {
|
|
181 while( my ($type,$code) = each %{$filters} ) {
|
|
182 $self->filter($type,$code);
|
|
183 }
|
|
184 }
|
|
185 }
|
|
186
|
|
187 return $self;
|
|
188 }
|
|
189
|
|
190 =head2 remote_database_url
|
|
191
|
|
192 Title : remote_database_url
|
|
193 Usage : $obj->remote_database_url($type,$newval)
|
|
194 Function: This should return or set a string that contains a %s which can be
|
|
195 filled in with sprintf.
|
|
196 Returns : value of remote_database_url
|
|
197 Args : $type - 'PROTEIN' or 'P' for protein URLS
|
|
198 'NUCLEOTIDE' or 'N' for nucleotide URLS
|
|
199 $value - new value to set [optional]
|
|
200
|
|
201
|
|
202 =cut
|
|
203
|
|
204 sub remote_database_url{
|
|
205 my ($self,$type,$value) = @_;
|
|
206 if( ! defined $type || $type !~ /^(P|N)/i ) {
|
|
207 $self->warn("Must provide a type (PROTEIN or NUCLEOTIDE)");
|
|
208 return '';
|
|
209 }
|
|
210 $type = uc $1;
|
|
211 if( defined $value) {
|
|
212 $self->{'remote_database_url'}->{$type} = $value;
|
|
213 }
|
|
214 return $self->{'remote_database_url'}->{$type};
|
|
215 }
|
|
216
|
|
217 =head2 to_string
|
|
218
|
|
219 Purpose : Produces data for each Search::Result::ResultI in a string.
|
|
220 : This is an abstract method. For some useful implementations,
|
|
221 : see ResultTableWriter.pm, HitTableWriter.pm,
|
|
222 : and HSPTableWriter.pm.
|
|
223 Usage : print $writer->to_string( $result_obj, @args );
|
|
224 Argument : $result_obj = A Bio::Search::Result::ResultI object
|
|
225 : @args = any additional arguments used by your implementation.
|
|
226 Returns : String containing data for each search Result or any of its
|
|
227 : sub-objects (Hits and HSPs).
|
|
228 Throws : n/a
|
|
229
|
|
230 =cut
|
|
231
|
|
232 sub to_string {
|
|
233 my ($self,$result,$num) = @_;
|
|
234 $num ||= 0;
|
|
235 return unless defined $result;
|
|
236 my ($resultfilter,$hitfilter, $hspfilter) = ( $self->filter('RESULT'),
|
|
237 $self->filter('HIT'),
|
|
238 $self->filter('HSP') );
|
|
239 return '' if( defined $resultfilter && ! &{$resultfilter}($result) );
|
|
240
|
|
241 my ($qtype,$dbtype,$dbseqtype,$type);
|
|
242 my $alg = $result->algorithm;
|
|
243
|
|
244 # This is actually wrong for the FASTAs I think
|
|
245 if( $alg =~ /T(FAST|BLAST)([XY])/i ) {
|
|
246 $qtype = $dbtype = 'translated';
|
|
247 $dbseqtype = $type = 'PROTEIN';
|
|
248 } elsif( $alg =~ /T(FAST|BLAST)N/i ) {
|
|
249 $qtype = '';
|
|
250 $dbtype = 'translated';
|
|
251 $type = 'PROTEIN';
|
|
252 $dbseqtype = 'NUCLEOTIDE';
|
|
253 } elsif( $alg =~ /(FAST|BLAST)N/i ||
|
|
254 $alg =~ /(WABA|EXONERATE)/i ) {
|
|
255 $qtype = $dbtype = '';
|
|
256 $type = $dbseqtype = 'NUCLEOTIDE';
|
|
257 } elsif( $alg =~ /(FAST|BLAST)P/ || $alg =~ /SSEARCH/i ) {
|
|
258 $qtype = $dbtype = '';
|
|
259 $type = $dbseqtype = 'PROTEIN';
|
|
260 } elsif( $alg =~ /(FAST|BLAST)[XY]/i ) {
|
|
261 $qtype = 'translated';
|
|
262 $dbtype = 'PROTEIN';
|
|
263 $dbseqtype = $type = 'PROTEIN';
|
|
264 } else {
|
|
265 print STDERR "algorithm was ", $result->algorithm, " couldn't match\n";
|
|
266 }
|
|
267
|
|
268
|
|
269 my %baselens = ( 'Sbjct:' => ( $dbtype eq 'translated' ) ? 3 : 1,
|
|
270 'Query:' => ( $qtype eq 'translated' ) ? 3 : 1);
|
|
271
|
|
272 my $str;
|
|
273 if( ! defined $num || $num <= 1 ) {
|
|
274 $str = &{$self->start_report}($result);
|
|
275 }
|
|
276
|
|
277 $str .= &{$self->title}($result);
|
|
278
|
|
279 $str .= $result->algorithm_reference || $self->algorithm_reference($result);
|
|
280 $str .= &{$self->introduction}($result);
|
|
281
|
|
282 $str .= "<table border=0>
|
|
283 <tr><th>Sequences producing significant alignments:</th>
|
|
284 <th>Score<br>(bits)</th><th>E<br>value</th></tr>";
|
|
285
|
|
286 my $hspstr = '<p><p>';
|
|
287 if( $result->can('rewind')) {
|
|
288 $result->rewind(); # support stream based parsing routines
|
|
289 }
|
|
290
|
|
291 while( my $hit = $result->next_hit ) {
|
|
292 next if( $hitfilter && ! &{$hitfilter}($hit) );
|
|
293 my $nm = $hit->name();
|
|
294
|
|
295 $self->debug( "no $nm for name (".$hit->description(). "\n")
|
|
296 unless $nm;
|
|
297 my ($gi,$acc) = &{$self->id_parser}($nm);
|
|
298 my $p = "%-$MaxDescLen". "s";
|
|
299 my $descsub;
|
|
300 if( length($hit->description) > ($MaxDescLen - 3) ) {
|
|
301 $descsub = sprintf($p,
|
|
302 substr($hit->description,0,$MaxDescLen-3) . "...");
|
|
303 } else {
|
|
304 $descsub = sprintf($p,$hit->description);
|
|
305 }
|
|
306
|
|
307 my $url_desc = &{$self->hit_link_desc()}($self,$hit, $result);
|
|
308 my $url_align = &{$self->hit_link_align()}($self,$hit, $result);
|
|
309
|
|
310 my @hsps = $hit->hsps;
|
|
311
|
|
312 # failover to first HSP if the data does not contain a
|
|
313 # bitscore/significance value for the Hit (NCBI XML data for one)
|
|
314
|
|
315 $str .= sprintf('<tr><td>%s %s</td><td>%s</td><td><a href="#%s">%.2g</a></td></tr>'."\n",
|
|
316 $url_desc, $descsub,
|
|
317 ($hit->raw_score ? $hit->raw_score :
|
|
318 (defined $hsps[0] ? $hsps[0]->score : ' ')),
|
|
319 $acc,
|
|
320 ( $hit->significance ? $hit->significance :
|
|
321 (defined $hsps[0] ? $hsps[0]->evalue : ' '))
|
|
322 );
|
|
323
|
|
324 $hspstr .= "<a name=\"$acc\">\n".
|
|
325 sprintf("><b>%s</b> %s\n<dd>Length = %s</dd><p>\n\n", $url_align,
|
|
326 defined $hit->description ? $hit->description : '',
|
|
327 &_numwithcommas($hit->length));
|
|
328 my $ct = 0;
|
|
329 foreach my $hsp (@hsps ) {
|
|
330 next if( $hspfilter && ! &{$hspfilter}($hsp) );
|
|
331 $hspstr .= sprintf(" Score = %s bits (%s), Expect = %s",
|
|
332 $hsp->bits, $hsp->score, $hsp->evalue);
|
|
333 if( defined $hsp->pvalue ) {
|
|
334 $hspstr .= ", P = ".$hsp->pvalue;
|
|
335 }
|
|
336 $hspstr .= "<br>\n";
|
|
337 $hspstr .= sprintf(" Identities = %d/%d (%d%%)",
|
|
338 ( $hsp->frac_identical('total') *
|
|
339 $hsp->length('total')),
|
|
340 $hsp->length('total'),
|
|
341 $hsp->frac_identical('total') * 100);
|
|
342
|
|
343 if( $type eq 'PROTEIN' ) {
|
|
344 $hspstr .= sprintf(", Positives = %d/%d (%d%%)",
|
|
345 ( $hsp->frac_conserved('total') *
|
|
346 $hsp->length('total')),
|
|
347 $hsp->length('total'),
|
|
348 $hsp->frac_conserved('total') * 100);
|
|
349 }
|
|
350 if( $hsp->gaps ) {
|
|
351 $hspstr .= sprintf(", Gaps = %d/%d (%d%%)",
|
|
352 $hsp->gaps('total'),
|
|
353 $hsp->length('total'),
|
|
354 (100 * $hsp->gaps('total') /
|
|
355 $hsp->length('total')));
|
|
356 }
|
|
357
|
|
358 my ($hframe,$qframe) = ( $hsp->hit->frame, $hsp->query->frame);
|
|
359 my ($hstrand,$qstrand) = ($hsp->hit->strand,$hsp->query->strand);
|
|
360 # so TBLASTX will have Query/Hit frames
|
|
361 # BLASTX will have Query frame
|
|
362 # TBLASTN will have Hit frame
|
|
363 if( $hstrand || $qstrand ) {
|
|
364 $hspstr .= ", Frame = ";
|
|
365 my ($signq, $signh);
|
|
366 unless( $hstrand ) {
|
|
367 $hframe = undef;
|
|
368 # if strand is null or 0 then it is protein
|
|
369 # and this no frame
|
|
370 } else {
|
|
371 $signh = $hstrand < 0 ? '-' : '+';
|
|
372 }
|
|
373 unless( $qstrand ) {
|
|
374 $qframe = undef;
|
|
375 # if strand is null or 0 then it is protein
|
|
376 } else {
|
|
377 $signq =$qstrand < 0 ? '-' : '+';
|
|
378 }
|
|
379 # remember bioperl stores frames as 0,1,2 (GFF way)
|
|
380 # BLAST reports reports as 1,2,3 so
|
|
381 # we have to add 1 to the frame values
|
|
382 if( defined $hframe && ! defined $qframe) {
|
|
383 $hspstr .= "$signh".($hframe+1);
|
|
384 } elsif( defined $qframe && ! defined $hframe) {
|
|
385 $hspstr .= "$signq".($qframe+1);
|
|
386 } else {
|
|
387 $hspstr .= sprintf(" %s%d / %s%d",
|
|
388 $signq,$qframe+1,
|
|
389 $signh, $hframe+1);
|
|
390 }
|
|
391 }
|
|
392 # $hspstr .= "</pre></a><p>\n<pre>";
|
|
393 $hspstr .= "</a><p>\n<pre>";
|
|
394
|
|
395 my @hspvals = ( {'name' => 'Query:',
|
|
396 'seq' => $hsp->query_string,
|
|
397 'start' => ($qstrand >= 0 ?
|
|
398 $hsp->query->start :
|
|
399 $hsp->query->end),
|
|
400 'end' => ($qstrand >= 0 ?
|
|
401 $hsp->query->end :
|
|
402 $hsp->query->start),
|
|
403 'index' => 0,
|
|
404 'direction' => $qstrand || 1
|
|
405 },
|
|
406 { 'name' => ' 'x6,
|
|
407 'seq' => $hsp->homology_string,
|
|
408 'start' => undef,
|
|
409 'end' => undef,
|
|
410 'index' => 0,
|
|
411 'direction' => 1
|
|
412 },
|
|
413 { 'name' => 'Sbjct:',
|
|
414 'seq' => $hsp->hit_string,
|
|
415 'start' => ($hstrand >= 0 ?
|
|
416 $hsp->hit->start :
|
|
417 $hsp->hit->end),
|
|
418 'end' => ($hstrand >= 0 ?
|
|
419 $hsp->hit->end :
|
|
420 $hsp->hit->start),
|
|
421 'index' => 0,
|
|
422 'direction' => $hstrand || 1
|
|
423 }
|
|
424 );
|
|
425
|
|
426
|
|
427 # let's set the expected length (in chars) of the starting number
|
|
428 # in an alignment block so we can have things line up
|
|
429 # Just going to try and set to the largest
|
|
430
|
|
431 my ($numwidth) = sort { $b <=> $a }(length($hspvals[0]->{'start'}),
|
|
432 length($hspvals[0]->{'end'}),
|
|
433 length($hspvals[2]->{'start'}),
|
|
434 length($hspvals[2]->{'end'}));
|
|
435 my $count = 0;
|
|
436 while ( $count <= $hsp->length('total') ) {
|
|
437 foreach my $v ( @hspvals ) {
|
|
438 my $piece = substr($v->{'seq'}, $v->{'index'} + $count,
|
|
439 $AlignmentLineWidth);
|
|
440 my $cp = $piece;
|
|
441 my $plen = scalar ( $cp =~ tr/\-//);
|
|
442 my ($start,$end) = ('','');
|
|
443 if( defined $v->{'start'} ) {
|
|
444 $start = $v->{'start'};
|
|
445 # since strand can be + or - use the direction
|
|
446 # to signify which whether to add or substract from end
|
|
447 my $d = $v->{'direction'} * ( $AlignmentLineWidth - $plen )*
|
|
448 $baselens{$v->{'name'}};
|
|
449 if( length($piece) < $AlignmentLineWidth ) {
|
|
450 $d = (length($piece) - $plen) * $v->{'direction'} *
|
|
451 $baselens{$v->{'name'}};
|
|
452 }
|
|
453 $end = $v->{'start'} + $d - $v->{'direction'};
|
|
454 $v->{'start'} += $d;
|
|
455 }
|
|
456 $hspstr .= sprintf("%s %-".$numwidth."s %s %s\n",
|
|
457 $v->{'name'},
|
|
458 $start,
|
|
459 $piece,
|
|
460 $end
|
|
461 );
|
|
462 }
|
|
463 $count += $AlignmentLineWidth;
|
|
464 $hspstr .= "\n\n";
|
|
465 }
|
|
466 $hspstr .= "</pre>\n";
|
|
467 }
|
|
468 # $hspstr .= "</pre>\n";
|
|
469 }
|
|
470
|
|
471
|
|
472 # make table of search statistics and end the web page
|
|
473 $str .= "</table><p>\n".$hspstr."<p><p><hr><h2>Search Parameters</h2><table border=1><tr><th>Parameter</th><th>Value</th>\n";
|
|
474
|
|
475 foreach my $param ( $result->available_parameters ) {
|
|
476 $str .= "<tr><td>$param</td><td>". $result->get_parameter($param) ."</td></tr>\n";
|
|
477
|
|
478 }
|
|
479 $str .= "</table><p><h2>Search Statistics</h2><table border=1><tr><th>Statistic</th><th>Value</th></tr>\n";
|
|
480 foreach my $stat ( sort $result->available_statistics ) {
|
|
481 $str .= "<tr><td>$stat</td><td>". $result->get_statistic($stat). "</td></th>\n";
|
|
482 }
|
|
483 $str .= "</table><P>".$self->footer() . "<P>\n";
|
|
484 return $str;
|
|
485 }
|
|
486
|
|
487 =head2 hit_link_desc
|
|
488
|
|
489 Title : hit_link_desc
|
|
490 Usage : $self->hit_link_desc(\&link_function);
|
|
491 Function: Get/Set the function which provides an HTML
|
|
492 link(s) for the given hit to be used
|
|
493 within the description section at the top of the BLAST report.
|
|
494 This allows a person reading the report within
|
|
495 a web browser to go to one or more database entries for
|
|
496 the given hit from the description section.
|
|
497 Returns : Function reference
|
|
498 Args : Function reference
|
|
499 See Also: L<default_hit_link_desc()>
|
|
500
|
|
501 =cut
|
|
502
|
|
503 sub hit_link_desc{
|
|
504 my( $self, $code ) = @_;
|
|
505 if ($code) {
|
|
506 $self->{'_hit_link_desc'} = $code;
|
|
507 }
|
|
508 return $self->{'_hit_link_desc'} || \&default_hit_link_desc;
|
|
509 }
|
|
510
|
|
511 =head2 default_hit_link_desc
|
|
512
|
|
513 Title : defaulthit_link_desc
|
|
514 Usage : $self->default_hit_link_desc($hit, $result)
|
|
515 Function: Provides an HTML link(s) for the given hit to be used
|
|
516 within the description section at the top of the BLAST report.
|
|
517 This allows a person reading the report within
|
|
518 a web browser to go to one or more database entries for
|
|
519 the given hit from the description section.
|
|
520 Returns : string containing HTML markup "<a href...")
|
|
521
|
|
522 The default implementation returns an HTML link to the
|
|
523 URL supplied by the remote_database_url() method
|
|
524 and using the identifier supplied by the id_parser() method.
|
|
525 It will use the NCBI GI if present, and the accession if not.
|
|
526
|
|
527 Args : First argument is a Bio::Search::Hit::HitI
|
|
528 Second argument is a Bio::Search::Result::ResultI
|
|
529
|
|
530 See Also: L<hit_link_align>, L<remote_database>, L<id_parser>
|
|
531
|
|
532 =cut
|
|
533
|
|
534 sub default_hit_link_desc {
|
|
535 my($self, $hit, $result) = @_;
|
|
536 my $type = ( $result->algorithm =~ /(P|X|Y)$/i ) ? 'PROTEIN' : 'NUCLEOTIDE';
|
|
537 my ($gi,$acc) = &{$self->id_parser}($hit->name);
|
|
538
|
|
539 my $url = length($self->remote_database_url($type)) > 0 ?
|
|
540 sprintf('<a href="%s">%s</a>',
|
|
541 sprintf($self->remote_database_url($type),$gi || $acc),
|
|
542 $hit->name()) : $hit->name();
|
|
543
|
|
544 return $url;
|
|
545 }
|
|
546
|
|
547
|
|
548 =head2 hit_link_align
|
|
549
|
|
550 Title : hit_link_align
|
|
551 Usage : $self->hit_link_align(\&link_function);
|
|
552 Function: Get/Set the function which provides an HTML link(s)
|
|
553 for the given hit to be used
|
|
554 within the HSP alignment section of the BLAST report.
|
|
555 This allows a person reading the report within
|
|
556 a web browser to go to one or more database entries for
|
|
557 the given hit from the alignment section.
|
|
558 Returns : string containing HTML markup "<a href...")
|
|
559
|
|
560 The default implementation delegates to hit_link_desc().
|
|
561
|
|
562 Args : First argument is a Bio::Search::Hit::HitI
|
|
563 Second argument is a Bio::Search::Result::ResultI
|
|
564
|
|
565 See Also: L<hit_link_desc>, L<remote_database>, L<id_parser>
|
|
566
|
|
567 =cut
|
|
568
|
|
569 sub hit_link_align {
|
|
570 my ($self,$code) = @_;
|
|
571 if ($code) {
|
|
572 $self->{'_hit_link_align'} = $code;
|
|
573 }
|
|
574 return $self->{'_hit_link_align'} || \&default_hit_link_desc;
|
|
575 }
|
|
576
|
|
577 =head2 start_report
|
|
578
|
|
579 Title : start_report
|
|
580 Usage : $index->start_report( CODE )
|
|
581 Function: Stores or returns the code to
|
|
582 write the start of the <HTML> block, the <TITLE> block
|
|
583 and the start of the <BODY> block of HTML. Useful
|
|
584 for (for instance) specifying alternative
|
|
585 HTML if you are embedding the output in
|
|
586 an HTML page which you have already started.
|
|
587 (For example a routine returning a null string).
|
|
588 Returns \&default_start_report (see below) if not
|
|
589 set.
|
|
590 Example : $index->start_report( \&my_start_report )
|
|
591 Returns : ref to CODE if called without arguments
|
|
592 Args : CODE
|
|
593
|
|
594 =cut
|
|
595
|
|
596 sub start_report {
|
|
597 my( $self, $code ) = @_;
|
|
598 if ($code) {
|
|
599 $self->{'_start_report'} = $code;
|
|
600 }
|
|
601 return $self->{'_start_report'} || \&default_start_report;
|
|
602 }
|
|
603
|
|
604 =head2 default_start_report
|
|
605
|
|
606 Title : default_start_report
|
|
607 Usage : $self->default_start_report($result)
|
|
608 Function: The default method to call when starting a report.
|
|
609 Returns : sting
|
|
610 Args : First argument is a Bio::Search::Result::ResultI
|
|
611
|
|
612 =cut
|
|
613
|
|
614 sub default_start_report {
|
|
615 my ($result) = @_;
|
|
616 return sprintf(
|
|
617 qq{<HTML>
|
|
618 <HEAD> <CENTER><TITLE>Bioperl Reformatted HTML of %s output with Bioperl Bio::SearchIO system</TITLE></CENTER></HEAD>
|
|
619 <!------------------------------------------------------------------->
|
|
620 <!-- Generated by Bio::SearchIO::Writer::HTMLResultWriter -->
|
|
621 <!-- %s -->
|
|
622 <!-- http://bioperl.org -->
|
|
623 <!------------------------------------------------------------------->
|
|
624 <BODY BGCOLOR="WHITE">
|
|
625 },$result->algorithm,$Revision);
|
|
626
|
|
627 }
|
|
628
|
|
629 =head2 title
|
|
630
|
|
631 Title : title
|
|
632 Usage : $self->title($CODE)
|
|
633
|
|
634 Function: Stores or returns the code to provide HTML for the given
|
|
635 BLAST report that will appear at the top of the BLAST report
|
|
636 HTML output. Useful for (for instance) specifying
|
|
637 alternative routines to write your own titles.
|
|
638 Returns \&default_title (see below) if not
|
|
639 set.
|
|
640 Example : $index->title( \&my_title )
|
|
641 Returns : ref to CODE if called without arguments
|
|
642 Args : CODE
|
|
643
|
|
644 =cut
|
|
645
|
|
646 sub title {
|
|
647 my( $self, $code ) = @_;
|
|
648 if ($code) {
|
|
649 $self->{'_title'} = $code;
|
|
650 }
|
|
651 return $self->{'_title'} || \&default_title;
|
|
652 }
|
|
653
|
|
654 =head2 default_title
|
|
655
|
|
656 Title : default_title
|
|
657 Usage : $self->default_title($result)
|
|
658 Function: Provides HTML for the given BLAST report that will appear
|
|
659 at the top of the BLAST report HTML output.
|
|
660 Returns : string containing HTML markup
|
|
661 The default implementation returns <CENTER> <H1> HTML
|
|
662 containing text such as:
|
|
663 "Bioperl Reformatted HTML of BLASTP Search Report
|
|
664 for gi|1786183|gb|AAC73113.1|"
|
|
665 Args : First argument is a Bio::Search::Result::ResultI
|
|
666
|
|
667 =cut
|
|
668
|
|
669 sub default_title {
|
|
670 my ($result) = @_;
|
|
671
|
|
672 return sprintf(
|
|
673 qq{<CENTER><H1><a href="http://bioperl.org">Bioperl</a> Reformatted HTML of %s Search Report<br> for %s</H1></CENTER>},
|
|
674 $result->algorithm,
|
|
675 $result->query_name());
|
|
676 }
|
|
677
|
|
678
|
|
679 =head2 introduction
|
|
680
|
|
681 Title : introduction
|
|
682 Usage : $self->introduction($CODE)
|
|
683
|
|
684 Function: Stores or returns the code to provide HTML for the given
|
|
685 BLAST report detailing the query and the
|
|
686 database information.
|
|
687 Useful for (for instance) specifying
|
|
688 routines returning alternative introductions.
|
|
689 Returns \&default_introduction (see below) if not
|
|
690 set.
|
|
691 Example : $index->introduction( \&my_introduction )
|
|
692 Returns : ref to CODE if called without arguments
|
|
693 Args : CODE
|
|
694
|
|
695 =cut
|
|
696
|
|
697 sub introduction {
|
|
698 my( $self, $code ) = @_;
|
|
699 if ($code) {
|
|
700 $self->{'_introduction'} = $code;
|
|
701 }
|
|
702 return $self->{'_introduction'} || \&default_introduction;
|
|
703 }
|
|
704
|
|
705 =head2 default_introduction
|
|
706
|
|
707 Title : default_introduction
|
|
708 Usage : $self->default_introduction($result)
|
|
709 Function: Outputs HTML to provide the query
|
|
710 and the database information
|
|
711 Returns : string containing HTML
|
|
712 Args : First argument is a Bio::Search::Result::ResultI
|
|
713 Second argument is string holding literature citation
|
|
714
|
|
715 =cut
|
|
716
|
|
717 sub default_introduction {
|
|
718 my ($result) = @_;
|
|
719
|
|
720 return sprintf(
|
|
721 qq{
|
|
722 <b>Query=</b> %s %s<br><dd>(%s letters)</dd>
|
|
723 <p>
|
|
724 <b>Database:</b> %s<br><dd>%s sequences; %s total letters<p></dd>
|
|
725 <p>
|
|
726 },
|
|
727 $result->query_name,
|
|
728 $result->query_description,
|
|
729 &_numwithcommas($result->query_length),
|
|
730 $result->database_name(),
|
|
731 &_numwithcommas($result->database_entries()),
|
|
732 &_numwithcommas($result->database_letters()),
|
|
733 );
|
|
734 }
|
|
735
|
|
736 =head2 end_report
|
|
737
|
|
738 Title : end_report
|
|
739 Usage : $self->end_report()
|
|
740 Function: The method to call when ending a report, this is
|
|
741 mostly for cleanup for formats which require you to
|
|
742 have something at the end of the document (</BODY></HTML>)
|
|
743 for HTML
|
|
744 Returns : string
|
|
745 Args : none
|
|
746
|
|
747 =cut
|
|
748
|
|
749 sub end_report {
|
|
750 return "</BODY>\n</HTML>\n";
|
|
751 }
|
|
752
|
|
753 # copied from Bio::Index::Fasta
|
|
754 # useful here as well
|
|
755
|
|
756 =head2 id_parser
|
|
757
|
|
758 Title : id_parser
|
|
759 Usage : $index->id_parser( CODE )
|
|
760 Function: Stores or returns the code used by record_id to
|
|
761 parse the ID for record from a string. Useful
|
|
762 for (for instance) specifying a different
|
|
763 parser for different flavours of FASTA file.
|
|
764 Returns \&default_id_parser (see below) if not
|
|
765 set. If you supply your own id_parser
|
|
766 subroutine, then it should expect a fasta
|
|
767 description line. An entry will be added to
|
|
768 the index for each string in the list returned.
|
|
769 Example : $index->id_parser( \&my_id_parser )
|
|
770 Returns : ref to CODE if called without arguments
|
|
771 Args : CODE
|
|
772
|
|
773 =cut
|
|
774
|
|
775 sub id_parser {
|
|
776 my( $self, $code ) = @_;
|
|
777
|
|
778 if ($code) {
|
|
779 $self->{'_id_parser'} = $code;
|
|
780 }
|
|
781 return $self->{'_id_parser'} || \&default_id_parser;
|
|
782 }
|
|
783
|
|
784
|
|
785
|
|
786 =head2 default_id_parser
|
|
787
|
|
788 Title : default_id_parser
|
|
789 Usage : $id = default_id_parser( $header )
|
|
790 Function: The default Fasta ID parser for Fasta.pm
|
|
791 Returns $1 from applying the regexp /^>\s*(\S+)/
|
|
792 to $header.
|
|
793 Returns : ID string
|
|
794 The default implementation checks for NCBI-style
|
|
795 identifiers in the given string ('gi|12345|AA54321').
|
|
796 For these IDs, it extracts the GI and accession and
|
|
797 returns a two-element list of strings (GI, acc).
|
|
798 Args : a fasta header line string
|
|
799
|
|
800 =cut
|
|
801
|
|
802 sub default_id_parser {
|
|
803 my ($string) = @_;
|
|
804 my ($gi,$acc);
|
|
805 if( $string =~ s/gi\|(\d+)\|?// )
|
|
806 { $gi = $1; $acc = $1;}
|
|
807
|
|
808 if( $string =~ /(\w+)\|([A-Z\d\.\_]+)(\|[A-Z\d\_]+)?/ ) {
|
|
809 $acc = defined $2 ? $2 : $1;
|
|
810 } else {
|
|
811 $acc = $string;
|
|
812 $acc =~ s/^\s+(\S+)/$1/;
|
|
813 $acc =~ s/(\S+)\s+$/$1/;
|
|
814 }
|
|
815 return ($gi,$acc);
|
|
816 }
|
|
817
|
|
818 sub MIN { $a <=> $b ? $a : $b; }
|
|
819 sub MAX { $a <=> $b ? $b : $a; }
|
|
820
|
|
821 sub footer {
|
|
822 my ($self) = @_;
|
|
823 return "<hr><h5>Produced by Bioperl module ".ref($self)." on $DATE<br>Revision: $Revision</h5>\n"
|
|
824
|
|
825 }
|
|
826
|
|
827 =head2 algorithm_reference
|
|
828
|
|
829 Title : algorithm_reference
|
|
830 Usage : my $reference = $writer->algorithm_reference($result);
|
|
831 Function: Returns the appropriate Bibliographic reference for the
|
|
832 algorithm format being produced
|
|
833 Returns : String
|
|
834 Args : L<Bio::Search::Result::ResultI> to reference
|
|
835
|
|
836
|
|
837 =cut
|
|
838
|
|
839 sub algorithm_reference {
|
|
840 my ($self,$result) = @_;
|
|
841 return '' if( ! defined $result || !ref($result) ||
|
|
842 ! $result->isa('Bio::Search::Result::ResultI')) ;
|
|
843 if( $result->algorithm =~ /BLAST/i ) {
|
|
844 my $res = $result->algorithm . ' ' . $result->algorithm_version . "<p>";
|
|
845 if( $result->algorithm_version =~ /WashU/i ) {
|
|
846 return $res .
|
|
847 "Copyright (C) 1996-2000 Washington University, Saint Louis, Missouri USA.<br>
|
|
848 All Rights Reserved.<p>
|
|
849 <b>Reference:</b> Gish, W. (1996-2000) <a href=\"http://blast.wustl.edu\">http://blast.wustl.edu</a><p>";
|
|
850 } else {
|
|
851 return $res .
|
|
852 "<b>Reference:</b> Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer,<br>
|
|
853 Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997),<br>
|
|
854 \"Gapped BLAST and PSI-BLAST: a new generation of protein database search<br>
|
|
855 programs\", Nucleic Acids Res. 25:3389-3402.<p>";
|
|
856
|
|
857 }
|
|
858 } elsif( $result->algorithm =~ /FAST/i ) {
|
|
859 return $result->algorithm . " " . $result->algorithm_version . "<br>" .
|
|
860 "\n<b>Reference:</b> Pearson et al, Genomics (1997) 46:24-36<p>";
|
|
861 } else {
|
|
862 return '';
|
|
863 }
|
|
864 }
|
|
865
|
|
866 # from Perl Cookbook 2.17
|
|
867 sub _numwithcommas {
|
|
868 my $num = reverse( $_[0] );
|
|
869 $num =~ s/(\d{3})(?=\d)(?!\d*\.)/$1,/g;
|
|
870 return scalar reverse $num;
|
|
871 }
|
|
872
|
|
873 =head2 Methods Bio::SearchIO::SearchWriterI
|
|
874
|
|
875 L<Bio::SearchIO::SearchWriterI> inherited methods.
|
|
876
|
|
877 =head2 filter
|
|
878
|
|
879 Title : filter
|
|
880 Usage : $writer->filter('hsp', \&hsp_filter);
|
|
881 Function: Filter out either at HSP,Hit,or Result level
|
|
882 Returns : none
|
|
883 Args : string => data type,
|
|
884 CODE reference
|
|
885
|
|
886
|
|
887 =cut
|
|
888
|
|
889 1;
|