Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/SearchIO/Writer/HTMLResultWriter.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $Id: HTMLResultWriter.pm,v 1.12.2.4 2003/09/15 16:08:55 jason Exp $ | |
2 # | |
3 # BioPerl module for Bio::SearchIO::Writer::HTMLResultWriter | |
4 # | |
5 # Cared for by Jason Stajich <jason@bioperl.org> | |
6 # | |
7 # Copyright Jason Stajich | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 | |
11 # Changes 2003-07-31 (jason) | |
12 # Gary has cleaned up the code a lot to produce better looking | |
13 # HTML | |
14 # POD documentation - main docs before the code | |
15 | |
16 =head1 NAME | |
17 | |
18 Bio::SearchIO::Writer::HTMLResultWriter - Object to implement writing a Bio::Search::ResultI in HTML. | |
19 | |
20 =head1 SYNOPSIS | |
21 | |
22 use Bio::SearchIO; | |
23 use Bio::SearchIO::Writer::HTMLResultWriter; | |
24 | |
25 my $in = new Bio::SearchIO(-format => 'blast', | |
26 -file => shift @ARGV); | |
27 | |
28 my $writer = new Bio::SearchIO::Writer::HTMLResultWriter(); | |
29 my $out = new Bio::SearchIO(-writer => $writer); | |
30 $out->write_result($in->next_result); | |
31 | |
32 | |
33 # to filter your output | |
34 my $MinLength = 100; # need a variable with scope outside the method | |
35 sub hsp_filter { | |
36 my $hsp = shift; | |
37 return 1 if $hsp->length('total') > $MinLength; | |
38 } | |
39 sub result_filter { | |
40 my $result = shift; | |
41 return $hsp->num_hits > 0; | |
42 } | |
43 | |
44 my $writer = new Bio::SearchIO::Writer::HTMLResultWriter | |
45 (-filters => { 'HSP' => \&hsp_filter} ); | |
46 my $out = new Bio::SearchIO(-writer => $writer); | |
47 $out->write_result($in->next_result); | |
48 | |
49 # can also set the filter via the writer object | |
50 $writer->filter('RESULT', \&result_filter); | |
51 | |
52 =head1 DESCRIPTION | |
53 | |
54 This object implements the SearchWriterI interface which will produce | |
55 a set of HTML for a specific Bio::Search::Report::ReportI interface. | |
56 | |
57 | |
58 You can also provide the argument -filters => \%hash to filter the at | |
59 the hsp, hit, or result level. %hash is an associative array which | |
60 contains any or all of the keys (HSP, HIT, RESULT). The values | |
61 pointed to by these keys would be references to a subroutine which | |
62 expects to be passed an object - one of Bio::Search::HSP::HSPI, | |
63 Bio::Search::Hit::HitI, and Bio::Search::Result::ResultI respectively. | |
64 Each function needs to return a boolean value as to whether or not the | |
65 passed element should be included in the output report - true if it is to be included, false if it to be omitted. | |
66 | |
67 For example to filter on sequences in the database which are too short | |
68 for your criteria you would do the following. | |
69 | |
70 Define a hit filter method | |
71 | |
72 sub hit_filter { | |
73 my $hit = shift; | |
74 return $hit->length E<gt> 100; # test if length of the hit sequence | |
75 # long enough | |
76 } | |
77 my $writer = new Bio::SearchIO::Writer::TextResultWriter( | |
78 -filters => { 'HIT' =E<gt> \&hit_filter } | |
79 ); | |
80 | |
81 Another example would be to filter HSPs on percent identity, let's | |
82 only include HSPs which are 75% identical or better. | |
83 | |
84 sub hsp_filter { | |
85 my $hsp = shift; | |
86 return $hsp->percent_identity E<gt> 75; | |
87 } | |
88 my $writer = new Bio::SearchIO::Writer::TextResultWriter( | |
89 -filters => { 'HSP' =E<gt> \&hsp_filter } | |
90 ); | |
91 | |
92 See L<Bio::SearchIO::SearchWriterI> for more info on the filter method. | |
93 | |
94 | |
95 =head1 FEEDBACK | |
96 | |
97 =head2 Mailing Lists | |
98 | |
99 User feedback is an integral part of the evolution of this and other | |
100 Bioperl modules. Send your comments and suggestions preferably to | |
101 the Bioperl mailing list. Your participation is much appreciated. | |
102 | |
103 bioperl-l@bioperl.org - General discussion | |
104 http://bioperl.org/MailList.shtml - About the mailing lists | |
105 | |
106 =head2 Reporting Bugs | |
107 | |
108 Report bugs to the Bioperl bug tracking system to help us keep track | |
109 of the bugs and their resolution. Bug reports can be submitted via | |
110 email or the web: | |
111 | |
112 bioperl-bugs@bioperl.org | |
113 http://bugzilla.bioperl.org/ | |
114 | |
115 =head1 AUTHOR - Jason Stajich | |
116 | |
117 Email jason-at-bioperl-dot-org | |
118 | |
119 =head1 CONTRIBUTORS | |
120 | |
121 Gary Williams G.Williams@hgmp.mrc.ac.uk | |
122 | |
123 =head1 APPENDIX | |
124 | |
125 The rest of the documentation details each of the object methods. | |
126 Internal methods are usually preceded with a _ | |
127 | |
128 =cut | |
129 | |
130 | |
131 | |
132 package Bio::SearchIO::Writer::HTMLResultWriter; | |
133 use vars qw(@ISA %RemoteURLDefault | |
134 $MaxDescLen $DATE $AlignmentLineWidth $Revision); | |
135 use strict; | |
136 $Revision = '$Id: HTMLResultWriter.pm,v 1.12.2.4 2003/09/15 16:08:55 jason Exp $'; #' | |
137 | |
138 # Object preamble - inherits from Bio::Root::RootI | |
139 | |
140 BEGIN { | |
141 $DATE = localtime(time); | |
142 %RemoteURLDefault = ( 'PROTEIN' => 'http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=protein&cmd=search&term=%s', | |
143 'NUCLEOTIDE' => 'http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=nucleotide&cmd=search&term=%s' | |
144 ); | |
145 | |
146 $MaxDescLen = 60; | |
147 $AlignmentLineWidth = 60; | |
148 } | |
149 | |
150 use Bio::Root::Root; | |
151 use Bio::SearchIO::SearchWriterI; | |
152 | |
153 @ISA = qw(Bio::Root::Root Bio::SearchIO::SearchWriterI); | |
154 | |
155 =head2 new | |
156 | |
157 Title : new | |
158 Usage : my $obj = new Bio::SearchIO::Writer::HTMLResultWriter(); | |
159 Function: Builds a new Bio::SearchIO::Writer::HTMLResultWriter object | |
160 Returns : Bio::SearchIO::Writer::HTMLResultWriter | |
161 Args : -filters => hashref with any or all of the keys (HSP HIT RESULT) | |
162 which have values pointing to a subroutine reference | |
163 which will expect to get a | |
164 | |
165 =cut | |
166 | |
167 sub new { | |
168 my($class,@args) = @_; | |
169 | |
170 my $self = $class->SUPER::new(@args); | |
171 my ($p,$n,$filters) = $self->_rearrange([qw(PROTEIN_URL | |
172 NUCLEOTIDE_URL | |
173 FILTERS)],@args); | |
174 $self->remote_database_url('p',$p || $RemoteURLDefault{'PROTEIN'}); | |
175 $self->remote_database_url('n',$n || $RemoteURLDefault{'NUCLEOTIDE'}); | |
176 | |
177 if( defined $filters ) { | |
178 if( !ref($filters) =~ /HASH/i ) { | |
179 $self->warn("Did not provide a hashref for the FILTERS option, ignoring."); | |
180 } else { | |
181 while( my ($type,$code) = each %{$filters} ) { | |
182 $self->filter($type,$code); | |
183 } | |
184 } | |
185 } | |
186 | |
187 return $self; | |
188 } | |
189 | |
190 =head2 remote_database_url | |
191 | |
192 Title : remote_database_url | |
193 Usage : $obj->remote_database_url($type,$newval) | |
194 Function: This should return or set a string that contains a %s which can be | |
195 filled in with sprintf. | |
196 Returns : value of remote_database_url | |
197 Args : $type - 'PROTEIN' or 'P' for protein URLS | |
198 'NUCLEOTIDE' or 'N' for nucleotide URLS | |
199 $value - new value to set [optional] | |
200 | |
201 | |
202 =cut | |
203 | |
204 sub remote_database_url{ | |
205 my ($self,$type,$value) = @_; | |
206 if( ! defined $type || $type !~ /^(P|N)/i ) { | |
207 $self->warn("Must provide a type (PROTEIN or NUCLEOTIDE)"); | |
208 return ''; | |
209 } | |
210 $type = uc $1; | |
211 if( defined $value) { | |
212 $self->{'remote_database_url'}->{$type} = $value; | |
213 } | |
214 return $self->{'remote_database_url'}->{$type}; | |
215 } | |
216 | |
217 =head2 to_string | |
218 | |
219 Purpose : Produces data for each Search::Result::ResultI in a string. | |
220 : This is an abstract method. For some useful implementations, | |
221 : see ResultTableWriter.pm, HitTableWriter.pm, | |
222 : and HSPTableWriter.pm. | |
223 Usage : print $writer->to_string( $result_obj, @args ); | |
224 Argument : $result_obj = A Bio::Search::Result::ResultI object | |
225 : @args = any additional arguments used by your implementation. | |
226 Returns : String containing data for each search Result or any of its | |
227 : sub-objects (Hits and HSPs). | |
228 Throws : n/a | |
229 | |
230 =cut | |
231 | |
232 sub to_string { | |
233 my ($self,$result,$num) = @_; | |
234 $num ||= 0; | |
235 return unless defined $result; | |
236 my ($resultfilter,$hitfilter, $hspfilter) = ( $self->filter('RESULT'), | |
237 $self->filter('HIT'), | |
238 $self->filter('HSP') ); | |
239 return '' if( defined $resultfilter && ! &{$resultfilter}($result) ); | |
240 | |
241 my ($qtype,$dbtype,$dbseqtype,$type); | |
242 my $alg = $result->algorithm; | |
243 | |
244 # This is actually wrong for the FASTAs I think | |
245 if( $alg =~ /T(FAST|BLAST)([XY])/i ) { | |
246 $qtype = $dbtype = 'translated'; | |
247 $dbseqtype = $type = 'PROTEIN'; | |
248 } elsif( $alg =~ /T(FAST|BLAST)N/i ) { | |
249 $qtype = ''; | |
250 $dbtype = 'translated'; | |
251 $type = 'PROTEIN'; | |
252 $dbseqtype = 'NUCLEOTIDE'; | |
253 } elsif( $alg =~ /(FAST|BLAST)N/i || | |
254 $alg =~ /(WABA|EXONERATE)/i ) { | |
255 $qtype = $dbtype = ''; | |
256 $type = $dbseqtype = 'NUCLEOTIDE'; | |
257 } elsif( $alg =~ /(FAST|BLAST)P/ || $alg =~ /SSEARCH/i ) { | |
258 $qtype = $dbtype = ''; | |
259 $type = $dbseqtype = 'PROTEIN'; | |
260 } elsif( $alg =~ /(FAST|BLAST)[XY]/i ) { | |
261 $qtype = 'translated'; | |
262 $dbtype = 'PROTEIN'; | |
263 $dbseqtype = $type = 'PROTEIN'; | |
264 } else { | |
265 print STDERR "algorithm was ", $result->algorithm, " couldn't match\n"; | |
266 } | |
267 | |
268 | |
269 my %baselens = ( 'Sbjct:' => ( $dbtype eq 'translated' ) ? 3 : 1, | |
270 'Query:' => ( $qtype eq 'translated' ) ? 3 : 1); | |
271 | |
272 my $str; | |
273 if( ! defined $num || $num <= 1 ) { | |
274 $str = &{$self->start_report}($result); | |
275 } | |
276 | |
277 $str .= &{$self->title}($result); | |
278 | |
279 $str .= $result->algorithm_reference || $self->algorithm_reference($result); | |
280 $str .= &{$self->introduction}($result); | |
281 | |
282 $str .= "<table border=0> | |
283 <tr><th>Sequences producing significant alignments:</th> | |
284 <th>Score<br>(bits)</th><th>E<br>value</th></tr>"; | |
285 | |
286 my $hspstr = '<p><p>'; | |
287 if( $result->can('rewind')) { | |
288 $result->rewind(); # support stream based parsing routines | |
289 } | |
290 | |
291 while( my $hit = $result->next_hit ) { | |
292 next if( $hitfilter && ! &{$hitfilter}($hit) ); | |
293 my $nm = $hit->name(); | |
294 | |
295 $self->debug( "no $nm for name (".$hit->description(). "\n") | |
296 unless $nm; | |
297 my ($gi,$acc) = &{$self->id_parser}($nm); | |
298 my $p = "%-$MaxDescLen". "s"; | |
299 my $descsub; | |
300 if( length($hit->description) > ($MaxDescLen - 3) ) { | |
301 $descsub = sprintf($p, | |
302 substr($hit->description,0,$MaxDescLen-3) . "..."); | |
303 } else { | |
304 $descsub = sprintf($p,$hit->description); | |
305 } | |
306 | |
307 my $url_desc = &{$self->hit_link_desc()}($self,$hit, $result); | |
308 my $url_align = &{$self->hit_link_align()}($self,$hit, $result); | |
309 | |
310 my @hsps = $hit->hsps; | |
311 | |
312 # failover to first HSP if the data does not contain a | |
313 # bitscore/significance value for the Hit (NCBI XML data for one) | |
314 | |
315 $str .= sprintf('<tr><td>%s %s</td><td>%s</td><td><a href="#%s">%.2g</a></td></tr>'."\n", | |
316 $url_desc, $descsub, | |
317 ($hit->raw_score ? $hit->raw_score : | |
318 (defined $hsps[0] ? $hsps[0]->score : ' ')), | |
319 $acc, | |
320 ( $hit->significance ? $hit->significance : | |
321 (defined $hsps[0] ? $hsps[0]->evalue : ' ')) | |
322 ); | |
323 | |
324 $hspstr .= "<a name=\"$acc\">\n". | |
325 sprintf("><b>%s</b> %s\n<dd>Length = %s</dd><p>\n\n", $url_align, | |
326 defined $hit->description ? $hit->description : '', | |
327 &_numwithcommas($hit->length)); | |
328 my $ct = 0; | |
329 foreach my $hsp (@hsps ) { | |
330 next if( $hspfilter && ! &{$hspfilter}($hsp) ); | |
331 $hspstr .= sprintf(" Score = %s bits (%s), Expect = %s", | |
332 $hsp->bits, $hsp->score, $hsp->evalue); | |
333 if( defined $hsp->pvalue ) { | |
334 $hspstr .= ", P = ".$hsp->pvalue; | |
335 } | |
336 $hspstr .= "<br>\n"; | |
337 $hspstr .= sprintf(" Identities = %d/%d (%d%%)", | |
338 ( $hsp->frac_identical('total') * | |
339 $hsp->length('total')), | |
340 $hsp->length('total'), | |
341 $hsp->frac_identical('total') * 100); | |
342 | |
343 if( $type eq 'PROTEIN' ) { | |
344 $hspstr .= sprintf(", Positives = %d/%d (%d%%)", | |
345 ( $hsp->frac_conserved('total') * | |
346 $hsp->length('total')), | |
347 $hsp->length('total'), | |
348 $hsp->frac_conserved('total') * 100); | |
349 } | |
350 if( $hsp->gaps ) { | |
351 $hspstr .= sprintf(", Gaps = %d/%d (%d%%)", | |
352 $hsp->gaps('total'), | |
353 $hsp->length('total'), | |
354 (100 * $hsp->gaps('total') / | |
355 $hsp->length('total'))); | |
356 } | |
357 | |
358 my ($hframe,$qframe) = ( $hsp->hit->frame, $hsp->query->frame); | |
359 my ($hstrand,$qstrand) = ($hsp->hit->strand,$hsp->query->strand); | |
360 # so TBLASTX will have Query/Hit frames | |
361 # BLASTX will have Query frame | |
362 # TBLASTN will have Hit frame | |
363 if( $hstrand || $qstrand ) { | |
364 $hspstr .= ", Frame = "; | |
365 my ($signq, $signh); | |
366 unless( $hstrand ) { | |
367 $hframe = undef; | |
368 # if strand is null or 0 then it is protein | |
369 # and this no frame | |
370 } else { | |
371 $signh = $hstrand < 0 ? '-' : '+'; | |
372 } | |
373 unless( $qstrand ) { | |
374 $qframe = undef; | |
375 # if strand is null or 0 then it is protein | |
376 } else { | |
377 $signq =$qstrand < 0 ? '-' : '+'; | |
378 } | |
379 # remember bioperl stores frames as 0,1,2 (GFF way) | |
380 # BLAST reports reports as 1,2,3 so | |
381 # we have to add 1 to the frame values | |
382 if( defined $hframe && ! defined $qframe) { | |
383 $hspstr .= "$signh".($hframe+1); | |
384 } elsif( defined $qframe && ! defined $hframe) { | |
385 $hspstr .= "$signq".($qframe+1); | |
386 } else { | |
387 $hspstr .= sprintf(" %s%d / %s%d", | |
388 $signq,$qframe+1, | |
389 $signh, $hframe+1); | |
390 } | |
391 } | |
392 # $hspstr .= "</pre></a><p>\n<pre>"; | |
393 $hspstr .= "</a><p>\n<pre>"; | |
394 | |
395 my @hspvals = ( {'name' => 'Query:', | |
396 'seq' => $hsp->query_string, | |
397 'start' => ($qstrand >= 0 ? | |
398 $hsp->query->start : | |
399 $hsp->query->end), | |
400 'end' => ($qstrand >= 0 ? | |
401 $hsp->query->end : | |
402 $hsp->query->start), | |
403 'index' => 0, | |
404 'direction' => $qstrand || 1 | |
405 }, | |
406 { 'name' => ' 'x6, | |
407 'seq' => $hsp->homology_string, | |
408 'start' => undef, | |
409 'end' => undef, | |
410 'index' => 0, | |
411 'direction' => 1 | |
412 }, | |
413 { 'name' => 'Sbjct:', | |
414 'seq' => $hsp->hit_string, | |
415 'start' => ($hstrand >= 0 ? | |
416 $hsp->hit->start : | |
417 $hsp->hit->end), | |
418 'end' => ($hstrand >= 0 ? | |
419 $hsp->hit->end : | |
420 $hsp->hit->start), | |
421 'index' => 0, | |
422 'direction' => $hstrand || 1 | |
423 } | |
424 ); | |
425 | |
426 | |
427 # let's set the expected length (in chars) of the starting number | |
428 # in an alignment block so we can have things line up | |
429 # Just going to try and set to the largest | |
430 | |
431 my ($numwidth) = sort { $b <=> $a }(length($hspvals[0]->{'start'}), | |
432 length($hspvals[0]->{'end'}), | |
433 length($hspvals[2]->{'start'}), | |
434 length($hspvals[2]->{'end'})); | |
435 my $count = 0; | |
436 while ( $count <= $hsp->length('total') ) { | |
437 foreach my $v ( @hspvals ) { | |
438 my $piece = substr($v->{'seq'}, $v->{'index'} + $count, | |
439 $AlignmentLineWidth); | |
440 my $cp = $piece; | |
441 my $plen = scalar ( $cp =~ tr/\-//); | |
442 my ($start,$end) = ('',''); | |
443 if( defined $v->{'start'} ) { | |
444 $start = $v->{'start'}; | |
445 # since strand can be + or - use the direction | |
446 # to signify which whether to add or substract from end | |
447 my $d = $v->{'direction'} * ( $AlignmentLineWidth - $plen )* | |
448 $baselens{$v->{'name'}}; | |
449 if( length($piece) < $AlignmentLineWidth ) { | |
450 $d = (length($piece) - $plen) * $v->{'direction'} * | |
451 $baselens{$v->{'name'}}; | |
452 } | |
453 $end = $v->{'start'} + $d - $v->{'direction'}; | |
454 $v->{'start'} += $d; | |
455 } | |
456 $hspstr .= sprintf("%s %-".$numwidth."s %s %s\n", | |
457 $v->{'name'}, | |
458 $start, | |
459 $piece, | |
460 $end | |
461 ); | |
462 } | |
463 $count += $AlignmentLineWidth; | |
464 $hspstr .= "\n\n"; | |
465 } | |
466 $hspstr .= "</pre>\n"; | |
467 } | |
468 # $hspstr .= "</pre>\n"; | |
469 } | |
470 | |
471 | |
472 # make table of search statistics and end the web page | |
473 $str .= "</table><p>\n".$hspstr."<p><p><hr><h2>Search Parameters</h2><table border=1><tr><th>Parameter</th><th>Value</th>\n"; | |
474 | |
475 foreach my $param ( $result->available_parameters ) { | |
476 $str .= "<tr><td>$param</td><td>". $result->get_parameter($param) ."</td></tr>\n"; | |
477 | |
478 } | |
479 $str .= "</table><p><h2>Search Statistics</h2><table border=1><tr><th>Statistic</th><th>Value</th></tr>\n"; | |
480 foreach my $stat ( sort $result->available_statistics ) { | |
481 $str .= "<tr><td>$stat</td><td>". $result->get_statistic($stat). "</td></th>\n"; | |
482 } | |
483 $str .= "</table><P>".$self->footer() . "<P>\n"; | |
484 return $str; | |
485 } | |
486 | |
487 =head2 hit_link_desc | |
488 | |
489 Title : hit_link_desc | |
490 Usage : $self->hit_link_desc(\&link_function); | |
491 Function: Get/Set the function which provides an HTML | |
492 link(s) for the given hit to be used | |
493 within the description section at the top of the BLAST report. | |
494 This allows a person reading the report within | |
495 a web browser to go to one or more database entries for | |
496 the given hit from the description section. | |
497 Returns : Function reference | |
498 Args : Function reference | |
499 See Also: L<default_hit_link_desc()> | |
500 | |
501 =cut | |
502 | |
503 sub hit_link_desc{ | |
504 my( $self, $code ) = @_; | |
505 if ($code) { | |
506 $self->{'_hit_link_desc'} = $code; | |
507 } | |
508 return $self->{'_hit_link_desc'} || \&default_hit_link_desc; | |
509 } | |
510 | |
511 =head2 default_hit_link_desc | |
512 | |
513 Title : defaulthit_link_desc | |
514 Usage : $self->default_hit_link_desc($hit, $result) | |
515 Function: Provides an HTML link(s) for the given hit to be used | |
516 within the description section at the top of the BLAST report. | |
517 This allows a person reading the report within | |
518 a web browser to go to one or more database entries for | |
519 the given hit from the description section. | |
520 Returns : string containing HTML markup "<a href...") | |
521 | |
522 The default implementation returns an HTML link to the | |
523 URL supplied by the remote_database_url() method | |
524 and using the identifier supplied by the id_parser() method. | |
525 It will use the NCBI GI if present, and the accession if not. | |
526 | |
527 Args : First argument is a Bio::Search::Hit::HitI | |
528 Second argument is a Bio::Search::Result::ResultI | |
529 | |
530 See Also: L<hit_link_align>, L<remote_database>, L<id_parser> | |
531 | |
532 =cut | |
533 | |
534 sub default_hit_link_desc { | |
535 my($self, $hit, $result) = @_; | |
536 my $type = ( $result->algorithm =~ /(P|X|Y)$/i ) ? 'PROTEIN' : 'NUCLEOTIDE'; | |
537 my ($gi,$acc) = &{$self->id_parser}($hit->name); | |
538 | |
539 my $url = length($self->remote_database_url($type)) > 0 ? | |
540 sprintf('<a href="%s">%s</a>', | |
541 sprintf($self->remote_database_url($type),$gi || $acc), | |
542 $hit->name()) : $hit->name(); | |
543 | |
544 return $url; | |
545 } | |
546 | |
547 | |
548 =head2 hit_link_align | |
549 | |
550 Title : hit_link_align | |
551 Usage : $self->hit_link_align(\&link_function); | |
552 Function: Get/Set the function which provides an HTML link(s) | |
553 for the given hit to be used | |
554 within the HSP alignment section of the BLAST report. | |
555 This allows a person reading the report within | |
556 a web browser to go to one or more database entries for | |
557 the given hit from the alignment section. | |
558 Returns : string containing HTML markup "<a href...") | |
559 | |
560 The default implementation delegates to hit_link_desc(). | |
561 | |
562 Args : First argument is a Bio::Search::Hit::HitI | |
563 Second argument is a Bio::Search::Result::ResultI | |
564 | |
565 See Also: L<hit_link_desc>, L<remote_database>, L<id_parser> | |
566 | |
567 =cut | |
568 | |
569 sub hit_link_align { | |
570 my ($self,$code) = @_; | |
571 if ($code) { | |
572 $self->{'_hit_link_align'} = $code; | |
573 } | |
574 return $self->{'_hit_link_align'} || \&default_hit_link_desc; | |
575 } | |
576 | |
577 =head2 start_report | |
578 | |
579 Title : start_report | |
580 Usage : $index->start_report( CODE ) | |
581 Function: Stores or returns the code to | |
582 write the start of the <HTML> block, the <TITLE> block | |
583 and the start of the <BODY> block of HTML. Useful | |
584 for (for instance) specifying alternative | |
585 HTML if you are embedding the output in | |
586 an HTML page which you have already started. | |
587 (For example a routine returning a null string). | |
588 Returns \&default_start_report (see below) if not | |
589 set. | |
590 Example : $index->start_report( \&my_start_report ) | |
591 Returns : ref to CODE if called without arguments | |
592 Args : CODE | |
593 | |
594 =cut | |
595 | |
596 sub start_report { | |
597 my( $self, $code ) = @_; | |
598 if ($code) { | |
599 $self->{'_start_report'} = $code; | |
600 } | |
601 return $self->{'_start_report'} || \&default_start_report; | |
602 } | |
603 | |
604 =head2 default_start_report | |
605 | |
606 Title : default_start_report | |
607 Usage : $self->default_start_report($result) | |
608 Function: The default method to call when starting a report. | |
609 Returns : sting | |
610 Args : First argument is a Bio::Search::Result::ResultI | |
611 | |
612 =cut | |
613 | |
614 sub default_start_report { | |
615 my ($result) = @_; | |
616 return sprintf( | |
617 qq{<HTML> | |
618 <HEAD> <CENTER><TITLE>Bioperl Reformatted HTML of %s output with Bioperl Bio::SearchIO system</TITLE></CENTER></HEAD> | |
619 <!-------------------------------------------------------------------> | |
620 <!-- Generated by Bio::SearchIO::Writer::HTMLResultWriter --> | |
621 <!-- %s --> | |
622 <!-- http://bioperl.org --> | |
623 <!-------------------------------------------------------------------> | |
624 <BODY BGCOLOR="WHITE"> | |
625 },$result->algorithm,$Revision); | |
626 | |
627 } | |
628 | |
629 =head2 title | |
630 | |
631 Title : title | |
632 Usage : $self->title($CODE) | |
633 | |
634 Function: Stores or returns the code to provide HTML for the given | |
635 BLAST report that will appear at the top of the BLAST report | |
636 HTML output. Useful for (for instance) specifying | |
637 alternative routines to write your own titles. | |
638 Returns \&default_title (see below) if not | |
639 set. | |
640 Example : $index->title( \&my_title ) | |
641 Returns : ref to CODE if called without arguments | |
642 Args : CODE | |
643 | |
644 =cut | |
645 | |
646 sub title { | |
647 my( $self, $code ) = @_; | |
648 if ($code) { | |
649 $self->{'_title'} = $code; | |
650 } | |
651 return $self->{'_title'} || \&default_title; | |
652 } | |
653 | |
654 =head2 default_title | |
655 | |
656 Title : default_title | |
657 Usage : $self->default_title($result) | |
658 Function: Provides HTML for the given BLAST report that will appear | |
659 at the top of the BLAST report HTML output. | |
660 Returns : string containing HTML markup | |
661 The default implementation returns <CENTER> <H1> HTML | |
662 containing text such as: | |
663 "Bioperl Reformatted HTML of BLASTP Search Report | |
664 for gi|1786183|gb|AAC73113.1|" | |
665 Args : First argument is a Bio::Search::Result::ResultI | |
666 | |
667 =cut | |
668 | |
669 sub default_title { | |
670 my ($result) = @_; | |
671 | |
672 return sprintf( | |
673 qq{<CENTER><H1><a href="http://bioperl.org">Bioperl</a> Reformatted HTML of %s Search Report<br> for %s</H1></CENTER>}, | |
674 $result->algorithm, | |
675 $result->query_name()); | |
676 } | |
677 | |
678 | |
679 =head2 introduction | |
680 | |
681 Title : introduction | |
682 Usage : $self->introduction($CODE) | |
683 | |
684 Function: Stores or returns the code to provide HTML for the given | |
685 BLAST report detailing the query and the | |
686 database information. | |
687 Useful for (for instance) specifying | |
688 routines returning alternative introductions. | |
689 Returns \&default_introduction (see below) if not | |
690 set. | |
691 Example : $index->introduction( \&my_introduction ) | |
692 Returns : ref to CODE if called without arguments | |
693 Args : CODE | |
694 | |
695 =cut | |
696 | |
697 sub introduction { | |
698 my( $self, $code ) = @_; | |
699 if ($code) { | |
700 $self->{'_introduction'} = $code; | |
701 } | |
702 return $self->{'_introduction'} || \&default_introduction; | |
703 } | |
704 | |
705 =head2 default_introduction | |
706 | |
707 Title : default_introduction | |
708 Usage : $self->default_introduction($result) | |
709 Function: Outputs HTML to provide the query | |
710 and the database information | |
711 Returns : string containing HTML | |
712 Args : First argument is a Bio::Search::Result::ResultI | |
713 Second argument is string holding literature citation | |
714 | |
715 =cut | |
716 | |
717 sub default_introduction { | |
718 my ($result) = @_; | |
719 | |
720 return sprintf( | |
721 qq{ | |
722 <b>Query=</b> %s %s<br><dd>(%s letters)</dd> | |
723 <p> | |
724 <b>Database:</b> %s<br><dd>%s sequences; %s total letters<p></dd> | |
725 <p> | |
726 }, | |
727 $result->query_name, | |
728 $result->query_description, | |
729 &_numwithcommas($result->query_length), | |
730 $result->database_name(), | |
731 &_numwithcommas($result->database_entries()), | |
732 &_numwithcommas($result->database_letters()), | |
733 ); | |
734 } | |
735 | |
736 =head2 end_report | |
737 | |
738 Title : end_report | |
739 Usage : $self->end_report() | |
740 Function: The method to call when ending a report, this is | |
741 mostly for cleanup for formats which require you to | |
742 have something at the end of the document (</BODY></HTML>) | |
743 for HTML | |
744 Returns : string | |
745 Args : none | |
746 | |
747 =cut | |
748 | |
749 sub end_report { | |
750 return "</BODY>\n</HTML>\n"; | |
751 } | |
752 | |
753 # copied from Bio::Index::Fasta | |
754 # useful here as well | |
755 | |
756 =head2 id_parser | |
757 | |
758 Title : id_parser | |
759 Usage : $index->id_parser( CODE ) | |
760 Function: Stores or returns the code used by record_id to | |
761 parse the ID for record from a string. Useful | |
762 for (for instance) specifying a different | |
763 parser for different flavours of FASTA file. | |
764 Returns \&default_id_parser (see below) if not | |
765 set. If you supply your own id_parser | |
766 subroutine, then it should expect a fasta | |
767 description line. An entry will be added to | |
768 the index for each string in the list returned. | |
769 Example : $index->id_parser( \&my_id_parser ) | |
770 Returns : ref to CODE if called without arguments | |
771 Args : CODE | |
772 | |
773 =cut | |
774 | |
775 sub id_parser { | |
776 my( $self, $code ) = @_; | |
777 | |
778 if ($code) { | |
779 $self->{'_id_parser'} = $code; | |
780 } | |
781 return $self->{'_id_parser'} || \&default_id_parser; | |
782 } | |
783 | |
784 | |
785 | |
786 =head2 default_id_parser | |
787 | |
788 Title : default_id_parser | |
789 Usage : $id = default_id_parser( $header ) | |
790 Function: The default Fasta ID parser for Fasta.pm | |
791 Returns $1 from applying the regexp /^>\s*(\S+)/ | |
792 to $header. | |
793 Returns : ID string | |
794 The default implementation checks for NCBI-style | |
795 identifiers in the given string ('gi|12345|AA54321'). | |
796 For these IDs, it extracts the GI and accession and | |
797 returns a two-element list of strings (GI, acc). | |
798 Args : a fasta header line string | |
799 | |
800 =cut | |
801 | |
802 sub default_id_parser { | |
803 my ($string) = @_; | |
804 my ($gi,$acc); | |
805 if( $string =~ s/gi\|(\d+)\|?// ) | |
806 { $gi = $1; $acc = $1;} | |
807 | |
808 if( $string =~ /(\w+)\|([A-Z\d\.\_]+)(\|[A-Z\d\_]+)?/ ) { | |
809 $acc = defined $2 ? $2 : $1; | |
810 } else { | |
811 $acc = $string; | |
812 $acc =~ s/^\s+(\S+)/$1/; | |
813 $acc =~ s/(\S+)\s+$/$1/; | |
814 } | |
815 return ($gi,$acc); | |
816 } | |
817 | |
818 sub MIN { $a <=> $b ? $a : $b; } | |
819 sub MAX { $a <=> $b ? $b : $a; } | |
820 | |
821 sub footer { | |
822 my ($self) = @_; | |
823 return "<hr><h5>Produced by Bioperl module ".ref($self)." on $DATE<br>Revision: $Revision</h5>\n" | |
824 | |
825 } | |
826 | |
827 =head2 algorithm_reference | |
828 | |
829 Title : algorithm_reference | |
830 Usage : my $reference = $writer->algorithm_reference($result); | |
831 Function: Returns the appropriate Bibliographic reference for the | |
832 algorithm format being produced | |
833 Returns : String | |
834 Args : L<Bio::Search::Result::ResultI> to reference | |
835 | |
836 | |
837 =cut | |
838 | |
839 sub algorithm_reference { | |
840 my ($self,$result) = @_; | |
841 return '' if( ! defined $result || !ref($result) || | |
842 ! $result->isa('Bio::Search::Result::ResultI')) ; | |
843 if( $result->algorithm =~ /BLAST/i ) { | |
844 my $res = $result->algorithm . ' ' . $result->algorithm_version . "<p>"; | |
845 if( $result->algorithm_version =~ /WashU/i ) { | |
846 return $res . | |
847 "Copyright (C) 1996-2000 Washington University, Saint Louis, Missouri USA.<br> | |
848 All Rights Reserved.<p> | |
849 <b>Reference:</b> Gish, W. (1996-2000) <a href=\"http://blast.wustl.edu\">http://blast.wustl.edu</a><p>"; | |
850 } else { | |
851 return $res . | |
852 "<b>Reference:</b> Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer,<br> | |
853 Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997),<br> | |
854 \"Gapped BLAST and PSI-BLAST: a new generation of protein database search<br> | |
855 programs\", Nucleic Acids Res. 25:3389-3402.<p>"; | |
856 | |
857 } | |
858 } elsif( $result->algorithm =~ /FAST/i ) { | |
859 return $result->algorithm . " " . $result->algorithm_version . "<br>" . | |
860 "\n<b>Reference:</b> Pearson et al, Genomics (1997) 46:24-36<p>"; | |
861 } else { | |
862 return ''; | |
863 } | |
864 } | |
865 | |
866 # from Perl Cookbook 2.17 | |
867 sub _numwithcommas { | |
868 my $num = reverse( $_[0] ); | |
869 $num =~ s/(\d{3})(?=\d)(?!\d*\.)/$1,/g; | |
870 return scalar reverse $num; | |
871 } | |
872 | |
873 =head2 Methods Bio::SearchIO::SearchWriterI | |
874 | |
875 L<Bio::SearchIO::SearchWriterI> inherited methods. | |
876 | |
877 =head2 filter | |
878 | |
879 Title : filter | |
880 Usage : $writer->filter('hsp', \&hsp_filter); | |
881 Function: Filter out either at HSP,Hit,or Result level | |
882 Returns : none | |
883 Args : string => data type, | |
884 CODE reference | |
885 | |
886 | |
887 =cut | |
888 | |
889 1; |