Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/SearchIO/Writer/TextResultWriter.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $Id: TextResultWriter.pm,v 1.5.2.5 2003/09/15 16:19:24 jason Exp $ | |
2 # | |
3 # BioPerl module for Bio::SearchIO::Writer::TextResultWriter | |
4 # | |
5 # Cared for by Jason Stajich <jason@bioperl.org> | |
6 # | |
7 # Copyright Jason Stajich | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 | |
11 # POD documentation - main docs before the code | |
12 | |
13 =head1 NAME | |
14 | |
15 Bio::SearchIO::Writer::TextResultWriter - Object to implement writing a Bio::Search::ResultI in Text. | |
16 | |
17 =head1 SYNOPSIS | |
18 | |
19 use Bio::SearchIO; | |
20 use Bio::SearchIO::Writer::TextResultWriter; | |
21 | |
22 my $in = new Bio::SearchIO(-format => 'blast', | |
23 -file => shift @ARGV); | |
24 | |
25 my $writer = new Bio::SearchIO::Writer::TextResultWriter(); | |
26 my $out = new Bio::SearchIO(-writer => $writer); | |
27 $out->write_result($in->next_result); | |
28 | |
29 =head1 DESCRIPTION | |
30 | |
31 This object implements the SearchWriterI interface which will produce | |
32 a set of Text for a specific Bio::Search::Report::ReportI interface. | |
33 | |
34 You can also provide the argument -filters => \%hash to filter the at | |
35 the hsp, hit, or result level. %hash is an associative array which | |
36 contains any or all of the keys (HSP, HIT, RESULT). The values | |
37 pointed to by these keys would be references to a subroutine which | |
38 expects to be passed an object - one of Bio::Search::HSP::HSPI, | |
39 Bio::Search::Hit::HitI, and Bio::Search::Result::ResultI respectively. | |
40 Each function needs to return a boolean value as to whether or not the | |
41 passed element should be included in the output report - true if it is to be included, false if it to be omitted. | |
42 | |
43 For example to filter on sequences in the database which are too short | |
44 for your criteria you would do the following. | |
45 | |
46 Define a hit filter method | |
47 | |
48 sub hit_filter { | |
49 my $hit = shift; | |
50 return $hit->length E<gt> 100; # test if length of the hit sequence | |
51 # long enough | |
52 } | |
53 my $writer = new Bio::SearchIO::Writer::TextResultWriter( | |
54 -filters => { 'HIT' =E<gt> \&hit_filter } | |
55 ); | |
56 | |
57 Another example would be to filter HSPs on percent identity, let's | |
58 only include HSPs which are 75% identical or better. | |
59 | |
60 sub hsp_filter { | |
61 my $hsp = shift; | |
62 return $hsp->percent_identity E<gt> 75; | |
63 } | |
64 my $writer = new Bio::SearchIO::Writer::TextResultWriter( | |
65 -filters => { 'HSP' =E<gt> \&hsp_filter } | |
66 ); | |
67 | |
68 See L<Bio::SearchIO::SearchWriterI> for more info on the filter method. | |
69 | |
70 | |
71 This module will use the module Text::Wrap if it is installed to wrap | |
72 the Query description line. If you do not have Text::Wrap installed | |
73 this module will work fine but you won't have the Query line wrapped. | |
74 You will see a warning about this when you first instantiate a | |
75 TextResultWriter - to avoid these warnings from showing up, simply set | |
76 the verbosity upon initialization to -1 like this: my $writer = new | |
77 Bio::SearchIO::Writer::TextResultWriter(-verbose => -1); | |
78 | |
79 =head1 FEEDBACK | |
80 | |
81 =head2 Mailing Lists | |
82 | |
83 User feedback is an integral part of the evolution of this and other | |
84 Bioperl modules. Send your comments and suggestions preferably to | |
85 the Bioperl mailing list. Your participation is much appreciated. | |
86 | |
87 bioperl-l@bioperl.org - General discussion | |
88 http://bioperl.org/MailList.shtml - About the mailing lists | |
89 | |
90 =head2 Reporting Bugs | |
91 | |
92 Report bugs to the Bioperl bug tracking system to help us keep track | |
93 of the bugs and their resolution. Bug reports can be submitted via | |
94 email or the web: | |
95 | |
96 bioperl-bugs@bioperl.org | |
97 http://bugzilla.bioperl.org/ | |
98 | |
99 =head1 AUTHOR - Jason Stajich | |
100 | |
101 Email jason@bioperl.org | |
102 | |
103 =head1 APPENDIX | |
104 | |
105 The rest of the documentation details each of the object methods. | |
106 Internal methods are usually preceded with a _ | |
107 | |
108 =cut | |
109 | |
110 | |
111 # Let the code begin... | |
112 | |
113 | |
114 package Bio::SearchIO::Writer::TextResultWriter; | |
115 use vars qw(@ISA $MaxNameLen $MaxDescLen $AlignmentLineWidth | |
116 $DescLineLen $TextWrapLoaded); | |
117 use strict; | |
118 | |
119 # Object preamble - inherits from Bio::Root::RootI | |
120 | |
121 BEGIN { | |
122 $MaxDescLen = 65; | |
123 $AlignmentLineWidth = 60; | |
124 eval { require Text::Wrap; $TextWrapLoaded = 1;}; | |
125 if( $@ ) { | |
126 $TextWrapLoaded = 0; | |
127 } | |
128 } | |
129 | |
130 use Bio::Root::Root; | |
131 use Bio::SearchIO::SearchWriterI; | |
132 use POSIX; | |
133 | |
134 @ISA = qw(Bio::Root::Root Bio::SearchIO::SearchWriterI); | |
135 | |
136 =head2 new | |
137 | |
138 Title : new | |
139 Usage : my $obj = new Bio::SearchIO::Writer::TextResultWriter(); | |
140 Function: Builds a new Bio::SearchIO::Writer::TextResultWriter object | |
141 Returns : Bio::SearchIO::Writer::TextResultWriter | |
142 Args : -filters => hashref with any or all of the keys (HSP HIT RESULT) | |
143 which have values pointing to a subroutine reference | |
144 which will expect to get a | |
145 | |
146 | |
147 =cut | |
148 | |
149 sub new { | |
150 my($class,@args) = @_; | |
151 | |
152 my $self = $class->SUPER::new(@args); | |
153 my ($filters) = $self->_rearrange([qw(FILTERS)],@args); | |
154 if( defined $filters ) { | |
155 if( !ref($filters) =~ /HASH/i ) { | |
156 $self->warn("Did not provide a hashref for the FILTERS option, ignoring."); | |
157 } else { | |
158 while( my ($type,$code) = each %{$filters} ) { | |
159 $self->filter($type,$code); | |
160 } | |
161 } | |
162 } | |
163 unless( $TextWrapLoaded ) { | |
164 $self->warn("Could not load Text::Wrap - the Query Description will not be line wrapped\n"); | |
165 } else { | |
166 $Text::Wrap::columns = $MaxDescLen; | |
167 } | |
168 return $self; | |
169 } | |
170 | |
171 | |
172 =head2 to_string | |
173 | |
174 Purpose : Produces data for each Search::Result::ResultI in a string. | |
175 : This is an abstract method. For some useful implementations, | |
176 : see ResultTableWriter.pm, HitTableWriter.pm, | |
177 : and HSPTableWriter.pm. | |
178 Usage : print $writer->to_string( $result_obj, @args ); | |
179 Argument : $result_obj = A Bio::Search::Result::ResultI object | |
180 : @args = any additional arguments used by your implementation. | |
181 Returns : String containing data for each search Result or any of its | |
182 : sub-objects (Hits and HSPs). | |
183 Throws : n/a | |
184 | |
185 =cut | |
186 | |
187 sub to_string { | |
188 my ($self,$result,$num) = @_; | |
189 $num ||= 0; | |
190 return unless defined $result; | |
191 my ($resultfilter,$hitfilter, $hspfilter) = ( $self->filter('RESULT'), | |
192 $self->filter('HIT'), | |
193 $self->filter('HSP') ); | |
194 return '' if( defined $resultfilter && ! &{$resultfilter}($result) ); | |
195 | |
196 my ($qtype,$dbtype,$dbseqtype,$type); | |
197 my $alg = $result->algorithm; | |
198 | |
199 # This is actually wrong for the FASTAs I think | |
200 if( $alg =~ /T(FAST|BLAST)([XY])/i ) { | |
201 $qtype = $dbtype = 'translated'; | |
202 $dbseqtype = $type = 'PROTEIN'; | |
203 } elsif( $alg =~ /T(FAST|BLAST)N/i ) { | |
204 $qtype = ''; | |
205 $dbtype = 'translated'; | |
206 $type = 'PROTEIN'; | |
207 $dbseqtype = 'NUCLEOTIDE'; | |
208 } elsif( $alg =~ /(FAST|BLAST)N/i || | |
209 $alg =~ /(WABA|EXONERATE)/i ) { | |
210 $qtype = $dbtype = ''; | |
211 $type = $dbseqtype = 'NUCLEOTIDE'; | |
212 } elsif( $alg =~ /(FAST|BLAST)P/ || $alg =~ /SSEARCH/i ) { | |
213 $qtype = $dbtype = ''; | |
214 $type = $dbseqtype = 'PROTEIN'; | |
215 } elsif( $alg =~ /(FAST|BLAST)[XY]/i ) { | |
216 $qtype = 'translated'; | |
217 $dbtype = 'PROTEIN'; | |
218 $dbseqtype = $type = 'PROTEIN'; | |
219 } else { | |
220 print STDERR "algorithm was ", $result->algorithm, " couldn't match\n"; | |
221 } | |
222 | |
223 | |
224 my %baselens = ( 'Sbjct:' => ( $dbtype eq 'translated' ) ? 3 : 1, | |
225 'Query:' => ( $qtype eq 'translated' ) ? 3 : 1); | |
226 | |
227 my $str; | |
228 if( ! defined $num || $num <= 1 ) { | |
229 $str = &{$self->start_report}($result); | |
230 } | |
231 | |
232 $str .= &{$self->title}($result); | |
233 | |
234 $str .= $result->algorithm_reference || $self->algorithm_reference($result); | |
235 $str .= &{$self->introduction}($result); | |
236 | |
237 | |
238 $str .= qq{ | |
239 Score E | |
240 Sequences producing significant alignments: (bits) value | |
241 }; | |
242 my $hspstr = ''; | |
243 if( $result->can('rewind')) { | |
244 $result->rewind(); # support stream based parsing routines | |
245 } | |
246 while( my $hit = $result->next_hit ) { | |
247 next if( defined $hitfilter && ! &{$hitfilter}($hit) ); | |
248 my $nm = $hit->name(); | |
249 $self->debug( "no $nm for name (".$hit->description(). "\n") | |
250 unless $nm; | |
251 my ($gi,$acc) = &{$self->id_parser}($nm); | |
252 my $p = "%-$MaxDescLen". "s"; | |
253 my $descsub; | |
254 my $desc = sprintf("%s %s",$nm,$hit->description); | |
255 if( length($desc) - 3 > $MaxDescLen) { | |
256 $descsub = sprintf($p, | |
257 substr($desc,0,$MaxDescLen-3) . | |
258 "..."); | |
259 } else { | |
260 $descsub = sprintf($p,$desc); | |
261 } | |
262 | |
263 $str .= sprintf("%s %-4s %s\n", | |
264 $descsub, | |
265 defined $hit->raw_score ? $hit->raw_score : ' ', | |
266 defined $hit->significance ? $hit->significance : '?'); | |
267 my @hsps = $hit->hsps; | |
268 | |
269 $hspstr .= sprintf(">%s %s\n%9sLength = %d\n\n", | |
270 $hit->name, | |
271 defined $hit->description ? $hit->description : '', | |
272 '', # empty is for the %9s in the str formatting | |
273 $hit->length); | |
274 | |
275 foreach my $hsp ( @hsps ) { | |
276 next if( defined $hspfilter && ! &{$hspfilter}($hsp) ); | |
277 $hspstr .= sprintf(" Score = %4s bits (%s), Expect = %s", | |
278 $hsp->bits, $hsp->score, $hsp->evalue); | |
279 if( $hsp->pvalue ) { | |
280 $hspstr .= ", P = ".$hsp->pvalue; | |
281 } | |
282 $hspstr .= "\n"; | |
283 $hspstr .= sprintf(" Identities = %d/%d (%d%%)", | |
284 ( $hsp->frac_identical('total') * | |
285 $hsp->length('total')), | |
286 $hsp->length('total'), | |
287 POSIX::floor($hsp->frac_identical('total') | |
288 * 100)); | |
289 | |
290 if( $type eq 'PROTEIN' ) { | |
291 $hspstr .= sprintf(", Positives = %d/%d (%d%%)", | |
292 ( $hsp->frac_conserved('total') * | |
293 $hsp->length('total')), | |
294 $hsp->length('total'), | |
295 POSIX::floor($hsp->frac_conserved('total') * 100)); | |
296 | |
297 } | |
298 if( $hsp->gaps ) { | |
299 $hspstr .= sprintf(", Gaps = %d/%d (%d%%)", | |
300 $hsp->gaps('total'), | |
301 $hsp->length('total'), | |
302 POSIX::floor(100 * $hsp->gaps('total') / | |
303 $hsp->length('total'))); | |
304 } | |
305 $hspstr .= "\n"; | |
306 my ($hframe,$qframe) = ( $hsp->hit->frame, | |
307 $hsp->query->frame); | |
308 my ($hstrand,$qstrand) = ($hsp->hit->strand,$hsp->query->strand); | |
309 # so TBLASTX will have Query/Hit frames | |
310 # BLASTX will have Query frame | |
311 # TBLASTN will have Hit frame | |
312 if( $hstrand || $qstrand ) { | |
313 $hspstr .= " Frame = "; | |
314 my ($signq, $signh); | |
315 unless( $hstrand ) { | |
316 $hframe = undef; | |
317 # if strand is null or 0 then it is protein | |
318 # and this no frame | |
319 } else { | |
320 $signh = $hstrand < 0 ? '-' : '+'; | |
321 } | |
322 unless( $qstrand ) { | |
323 $qframe = undef; | |
324 # if strand is null or 0 then it is protein | |
325 } else { | |
326 $signq =$qstrand < 0 ? '-' : '+'; | |
327 } | |
328 # remember bioperl stores frames as 0,1,2 (GFF way) | |
329 # BLAST reports reports as 1,2,3 so | |
330 # we have to add 1 to the frame values | |
331 if( defined $hframe && ! defined $qframe) { | |
332 $hspstr .= "$signh".($hframe+1); | |
333 } elsif( defined $qframe && ! defined $hframe) { | |
334 $hspstr .= "$signq".($qframe+1); | |
335 } else { | |
336 $hspstr .= sprintf(" %s%d / %s%d", | |
337 $signq,$qframe+1, | |
338 $signh, $hframe+1); | |
339 } | |
340 } | |
341 $hspstr .= "\n\n"; | |
342 | |
343 my @hspvals = ( {'name' => 'Query:', | |
344 'seq' => $hsp->query_string, | |
345 'start' => ( $hstrand >= 0 ? | |
346 $hsp->query->start : | |
347 $hsp->query->end), | |
348 'end' => ($qstrand >= 0 ? | |
349 $hsp->query->end : | |
350 $hsp->query->start), | |
351 'index' => 0, | |
352 'direction' => $qstrand || 1 | |
353 }, | |
354 { 'name' => ' 'x6, # this might need to adjust for long coordinates?? | |
355 'seq' => $hsp->homology_string, | |
356 'start' => undef, | |
357 'end' => undef, | |
358 'index' => 0, | |
359 'direction' => 1 | |
360 }, | |
361 { 'name' => 'Sbjct:', | |
362 'seq' => $hsp->hit_string, | |
363 'start' => ($hstrand >= 0 ? | |
364 $hsp->hit->start : $hsp->hit->end), | |
365 'end' => ($hstrand >= 0 ? | |
366 $hsp->hit->end : $hsp->hit->start), | |
367 'index' => 0, | |
368 'direction' => $hstrand || 1 | |
369 } | |
370 ); | |
371 | |
372 | |
373 # let's set the expected length (in chars) of the starting number | |
374 # in an alignment block so we can have things line up | |
375 # Just going to try and set to the largest | |
376 | |
377 my ($numwidth) = sort { $b <=> $a }(length($hspvals[0]->{'start'}), | |
378 length($hspvals[0]->{'end'}), | |
379 length($hspvals[2]->{'start'}), | |
380 length($hspvals[2]->{'end'})); | |
381 my $count = 0; | |
382 while ( $count <= $hsp->length('total') ) { | |
383 foreach my $v ( @hspvals ) { | |
384 my $piece = substr($v->{'seq'}, $v->{'index'} +$count, | |
385 $AlignmentLineWidth); | |
386 my $cp = $piece; | |
387 my $plen = scalar ( $cp =~ tr/\-//); | |
388 my ($start,$end) = ('',''); | |
389 if( defined $v->{'start'} ) { | |
390 $start = $v->{'start'}; | |
391 # since strand can be + or - use the direction | |
392 # to signify which whether to add or substract from end | |
393 my $d = $v->{'direction'} * ( $AlignmentLineWidth - $plen )* | |
394 $baselens{$v->{'name'}}; | |
395 if( length($piece) < $AlignmentLineWidth ) { | |
396 $d = (length($piece) - $plen) * $v->{'direction'} * | |
397 $baselens{$v->{'name'}}; | |
398 } | |
399 $end = $v->{'start'} + $d - $v->{'direction'}; | |
400 $v->{'start'} += $d; | |
401 } | |
402 $hspstr .= sprintf("%s %-".$numwidth."s %s %s\n", | |
403 $v->{'name'}, | |
404 $start, | |
405 $piece, | |
406 $end | |
407 ); | |
408 } | |
409 $count += $AlignmentLineWidth; | |
410 $hspstr .= "\n"; | |
411 } | |
412 } | |
413 $hspstr .= "\n"; | |
414 } | |
415 $str .= "\n\n".$hspstr; | |
416 | |
417 $str .= sprintf(qq{ Database: %s | |
418 Posted date: %s | |
419 Number of letters in database: %s | |
420 Number of sequences in database: %s | |
421 | |
422 Matrix: %s | |
423 }, | |
424 $result->database_name(), | |
425 $result->get_statistic('posted_date') || | |
426 POSIX::strftime("%b %d, %Y %I:%M %p",localtime), | |
427 &_numwithcommas($result->database_entries()), | |
428 &_numwithcommas($result->database_letters()), | |
429 $result->get_parameter('matrix') || ''); | |
430 | |
431 if( defined (my $open = $result->get_parameter('gapopen')) ) { | |
432 $str .= sprintf("Gap Penalties Existence: %d, Extension: %d\n", | |
433 $open || 0, $result->get_parameter('gapext') || 0); | |
434 } | |
435 | |
436 # skip those params we've already output | |
437 foreach my $param ( grep { ! /matrix|gapopen|gapext/i } | |
438 $result->available_parameters ) { | |
439 $str .= "$param: ". $result->get_parameter($param) ."\n"; | |
440 | |
441 } | |
442 $str .= "Search Statistics\n"; | |
443 # skip posted date, we already output it | |
444 foreach my $stat ( sort grep { ! /posted_date/ } | |
445 $result->available_statistics ) { | |
446 my $expect = $result->get_parameter('expect'); | |
447 my $v = $result->get_statistic($stat); | |
448 if( $v =~ /^\d+$/ ) { | |
449 $v = &_numwithcommas($v); | |
450 } | |
451 if( defined $expect && | |
452 $stat eq 'seqs_better_than_cutoff' ) { | |
453 $str .= "seqs_better_than_$expect: $v\n"; | |
454 } else { | |
455 my $v = | |
456 $str .= "$stat: $v\n"; | |
457 } | |
458 } | |
459 $str .= "\n\n"; | |
460 return $str; | |
461 } | |
462 | |
463 | |
464 =head2 start_report | |
465 | |
466 Title : start_report | |
467 Usage : $index->start_report( CODE ) | |
468 Function: Stores or returns the code to | |
469 write the start of the <HTML> block, the <TITLE> block | |
470 and the start of the <BODY> block of HTML. Useful | |
471 for (for instance) specifying alternative | |
472 HTML if you are embedding the output in | |
473 an HTML page which you have already started. | |
474 (For example a routine returning a null string). | |
475 Returns \&default_start_report (see below) if not | |
476 set. | |
477 Example : $index->start_report( \&my_start_report ) | |
478 Returns : ref to CODE if called without arguments | |
479 Args : CODE | |
480 | |
481 =cut | |
482 | |
483 sub start_report { | |
484 my( $self, $code ) = @_; | |
485 if ($code) { | |
486 $self->{'_start_report'} = $code; | |
487 } | |
488 return $self->{'_start_report'} || \&default_start_report; | |
489 } | |
490 | |
491 =head2 default_start_report | |
492 | |
493 Title : default_start_report | |
494 Usage : $self->default_start_report($result) | |
495 Function: The default method to call when starting a report. | |
496 Returns : sting | |
497 Args : First argument is a Bio::Search::Result::ResultI | |
498 | |
499 =cut | |
500 | |
501 sub default_start_report { | |
502 my ($result) = @_; | |
503 return ""; | |
504 } | |
505 | |
506 =head2 title | |
507 | |
508 Title : title | |
509 Usage : $self->title($CODE) | |
510 | |
511 Function: Stores or returns the code to provide HTML for the given | |
512 BLAST report that will appear at the top of the BLAST report | |
513 HTML output. Useful for (for instance) specifying | |
514 alternative routines to write your own titles. | |
515 Returns \&default_title (see below) if not | |
516 set. | |
517 Example : $index->title( \&my_title ) | |
518 Returns : ref to CODE if called without arguments | |
519 Args : CODE | |
520 | |
521 =cut | |
522 | |
523 sub title { | |
524 my( $self, $code ) = @_; | |
525 if ($code) { | |
526 $self->{'_title'} = $code; | |
527 } | |
528 return $self->{'_title'} || \&default_title; | |
529 } | |
530 | |
531 =head2 default_title | |
532 | |
533 Title : default_title | |
534 Usage : $self->default_title($result) | |
535 Function: Provides HTML for the given BLAST report that will appear | |
536 at the top of the BLAST report output. | |
537 Returns : empty for text implementation | |
538 Args : First argument is a Bio::Search::Result::ResultI | |
539 | |
540 =cut | |
541 | |
542 sub default_title { | |
543 my ($result) = @_; | |
544 return ""; | |
545 # The HTML implementation | |
546 # return sprintf( | |
547 # qq{<CENTER><H1><a href="http://bioperl.org">Bioperl</a> Reformatted HTML of %s Search Report<br> for %s</H1></CENTER>}, | |
548 # $result->algorithm, | |
549 # $result->query_name()); | |
550 } | |
551 | |
552 | |
553 =head2 introduction | |
554 | |
555 Title : introduction | |
556 Usage : $self->introduction($CODE) | |
557 | |
558 Function: Stores or returns the code to provide HTML for the given | |
559 BLAST report detailing the query and the | |
560 database information. | |
561 Useful for (for instance) specifying | |
562 routines returning alternative introductions. | |
563 Returns \&default_introduction (see below) if not | |
564 set. | |
565 Example : $index->introduction( \&my_introduction ) | |
566 Returns : ref to CODE if called without arguments | |
567 Args : CODE | |
568 | |
569 =cut | |
570 | |
571 sub introduction { | |
572 my( $self, $code ) = @_; | |
573 if ($code) { | |
574 $self->{'_introduction'} = $code; | |
575 } | |
576 return $self->{'_introduction'} || \&default_introduction; | |
577 } | |
578 | |
579 =head2 default_introduction | |
580 | |
581 Title : default_introduction | |
582 Usage : $self->default_introduction($result) | |
583 Function: Outputs HTML to provide the query | |
584 and the database information | |
585 Returns : string containing HTML | |
586 Args : First argument is a Bio::Search::Result::ResultI | |
587 Second argument is string holding literature citation | |
588 | |
589 =cut | |
590 | |
591 sub default_introduction { | |
592 my ($result) = @_; | |
593 | |
594 return sprintf( | |
595 qq{ | |
596 Query= %s | |
597 (%s letters) | |
598 | |
599 Database: %s | |
600 %s sequences; %s total letters | |
601 }, | |
602 &_linewrap($result->query_name . " " . | |
603 $result->query_description), | |
604 &_numwithcommas($result->query_length), | |
605 $result->database_name(), | |
606 &_numwithcommas($result->database_entries()), | |
607 &_numwithcommas($result->database_letters()), | |
608 ); | |
609 } | |
610 | |
611 =head2 end_report | |
612 | |
613 Title : end_report | |
614 Usage : $self->end_report() | |
615 Function: The method to call when ending a report, this is | |
616 mostly for cleanup for formats which require you to | |
617 have something at the end of the document (</BODY></HTML>) | |
618 for HTML | |
619 Returns : string | |
620 Args : none | |
621 | |
622 =cut | |
623 | |
624 sub end_report { | |
625 return ""; | |
626 } | |
627 | |
628 | |
629 # copied from Bio::Index::Fasta | |
630 # useful here as well | |
631 | |
632 =head2 id_parser | |
633 | |
634 Title : id_parser | |
635 Usage : $index->id_parser( CODE ) | |
636 Function: Stores or returns the code used by record_id to | |
637 parse the ID for record from a string. Useful | |
638 for (for instance) specifying a different | |
639 parser for different flavours of FASTA file. | |
640 Returns \&default_id_parser (see below) if not | |
641 set. If you supply your own id_parser | |
642 subroutine, then it should expect a fasta | |
643 description line. An entry will be added to | |
644 the index for each string in the list returned. | |
645 Example : $index->id_parser( \&my_id_parser ) | |
646 Returns : ref to CODE if called without arguments | |
647 Args : CODE | |
648 | |
649 =cut | |
650 | |
651 sub id_parser { | |
652 my( $self, $code ) = @_; | |
653 | |
654 if ($code) { | |
655 $self->{'_id_parser'} = $code; | |
656 } | |
657 return $self->{'_id_parser'} || \&default_id_parser; | |
658 } | |
659 | |
660 | |
661 | |
662 =head2 default_id_parser | |
663 | |
664 Title : default_id_parser | |
665 Usage : $id = default_id_parser( $header ) | |
666 Function: The default Fasta ID parser for Fasta.pm | |
667 Returns $1 from applying the regexp /^>\s*(\S+)/ | |
668 to $header. | |
669 Returns : ID string | |
670 Args : a fasta header line string | |
671 | |
672 =cut | |
673 | |
674 sub default_id_parser { | |
675 my ($string) = @_; | |
676 my ($gi,$acc); | |
677 if( $string =~ s/gi\|(\d+)\|?// ) | |
678 { $gi = $1; $acc = $1;} | |
679 | |
680 if( $string =~ /(\w+)\|([A-Z\d\.\_]+)(\|[A-Z\d\_]+)?/ ) { | |
681 $acc = defined $2 ? $2 : $1; | |
682 } else { | |
683 $acc = $string; | |
684 $acc =~ s/^\s+(\S+)/$1/; | |
685 $acc =~ s/(\S+)\s+$/$1/; | |
686 } | |
687 return ($gi,$acc); | |
688 } | |
689 | |
690 sub MIN { $a <=> $b ? $a : $b; } | |
691 sub MAX { $a <=> $b ? $b : $a; } | |
692 | |
693 | |
694 =head2 algorithm_reference | |
695 | |
696 Title : algorithm_reference | |
697 Usage : my $reference = $writer->algorithm_reference($result); | |
698 Function: Returns the appropriate Bibliographic reference for the | |
699 algorithm format being produced | |
700 Returns : String | |
701 Args : L<Bio::Search::Result::ResultI> to reference | |
702 | |
703 | |
704 =cut | |
705 | |
706 sub algorithm_reference{ | |
707 my ($self,$result) = @_; | |
708 return '' if( ! defined $result || !ref($result) || | |
709 ! $result->isa('Bio::Search::Result::ResultI')) ; | |
710 if( $result->algorithm =~ /BLAST/i ) { | |
711 my $res = $result->algorithm . ' '. $result->algorithm_version. "\n"; | |
712 if( $result->algorithm_version =~ /WashU/i ) { | |
713 return $res .qq{ | |
714 Copyright (C) 1996-2000 Washington University, Saint Louis, Missouri USA. | |
715 All Rights Reserved. | |
716 | |
717 Reference: Gish, W. (1996-2000) http://blast.wustl.edu | |
718 }; | |
719 } else { | |
720 return $res . qq{ | |
721 Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, | |
722 Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), | |
723 "Gapped BLAST and PSI-BLAST: a new generation of protein database search | |
724 programs", Nucleic Acids Res. 25:3389-3402. | |
725 }; | |
726 } | |
727 } elsif( $result->algorithm =~ /FAST/i ) { | |
728 return $result->algorithm. " ". $result->algorithm_version . "\n". | |
729 "\nReference: Pearson et al, Genomics (1997) 46:24-36\n"; | |
730 } else { | |
731 return ''; | |
732 } | |
733 } | |
734 | |
735 # from Perl Cookbook 2.17 | |
736 sub _numwithcommas { | |
737 my $num = reverse( $_[0] ); | |
738 $num =~ s/(\d{3})(?=\d)(?!\d*\.)/$1,/g; | |
739 return scalar reverse $num; | |
740 } | |
741 | |
742 sub _linewrap { | |
743 my ($str) = @_; | |
744 if($TextWrapLoaded) { | |
745 return Text::Wrap::wrap("","",$str); # use Text::Wrap | |
746 } else { return $str; } # cannot wrap | |
747 } | |
748 =head2 Methods Bio::SearchIO::SearchWriterI | |
749 | |
750 L<Bio::SearchIO::SearchWriterI> inherited methods. | |
751 | |
752 =head2 filter | |
753 | |
754 Title : filter | |
755 Usage : $writer->filter('hsp', \&hsp_filter); | |
756 Function: Filter out either at HSP,Hit,or Result level | |
757 Returns : none | |
758 Args : string => data type, | |
759 CODE reference | |
760 | |
761 | |
762 =cut | |
763 | |
764 | |
765 1; |