Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/SearchIO/Writer/TextResultWriter.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 # $Id: TextResultWriter.pm,v 1.5.2.5 2003/09/15 16:19:24 jason Exp $ | |
| 2 # | |
| 3 # BioPerl module for Bio::SearchIO::Writer::TextResultWriter | |
| 4 # | |
| 5 # Cared for by Jason Stajich <jason@bioperl.org> | |
| 6 # | |
| 7 # Copyright Jason Stajich | |
| 8 # | |
| 9 # You may distribute this module under the same terms as perl itself | |
| 10 | |
| 11 # POD documentation - main docs before the code | |
| 12 | |
| 13 =head1 NAME | |
| 14 | |
| 15 Bio::SearchIO::Writer::TextResultWriter - Object to implement writing a Bio::Search::ResultI in Text. | |
| 16 | |
| 17 =head1 SYNOPSIS | |
| 18 | |
| 19 use Bio::SearchIO; | |
| 20 use Bio::SearchIO::Writer::TextResultWriter; | |
| 21 | |
| 22 my $in = new Bio::SearchIO(-format => 'blast', | |
| 23 -file => shift @ARGV); | |
| 24 | |
| 25 my $writer = new Bio::SearchIO::Writer::TextResultWriter(); | |
| 26 my $out = new Bio::SearchIO(-writer => $writer); | |
| 27 $out->write_result($in->next_result); | |
| 28 | |
| 29 =head1 DESCRIPTION | |
| 30 | |
| 31 This object implements the SearchWriterI interface which will produce | |
| 32 a set of Text for a specific Bio::Search::Report::ReportI interface. | |
| 33 | |
| 34 You can also provide the argument -filters => \%hash to filter the at | |
| 35 the hsp, hit, or result level. %hash is an associative array which | |
| 36 contains any or all of the keys (HSP, HIT, RESULT). The values | |
| 37 pointed to by these keys would be references to a subroutine which | |
| 38 expects to be passed an object - one of Bio::Search::HSP::HSPI, | |
| 39 Bio::Search::Hit::HitI, and Bio::Search::Result::ResultI respectively. | |
| 40 Each function needs to return a boolean value as to whether or not the | |
| 41 passed element should be included in the output report - true if it is to be included, false if it to be omitted. | |
| 42 | |
| 43 For example to filter on sequences in the database which are too short | |
| 44 for your criteria you would do the following. | |
| 45 | |
| 46 Define a hit filter method | |
| 47 | |
| 48 sub hit_filter { | |
| 49 my $hit = shift; | |
| 50 return $hit->length E<gt> 100; # test if length of the hit sequence | |
| 51 # long enough | |
| 52 } | |
| 53 my $writer = new Bio::SearchIO::Writer::TextResultWriter( | |
| 54 -filters => { 'HIT' =E<gt> \&hit_filter } | |
| 55 ); | |
| 56 | |
| 57 Another example would be to filter HSPs on percent identity, let's | |
| 58 only include HSPs which are 75% identical or better. | |
| 59 | |
| 60 sub hsp_filter { | |
| 61 my $hsp = shift; | |
| 62 return $hsp->percent_identity E<gt> 75; | |
| 63 } | |
| 64 my $writer = new Bio::SearchIO::Writer::TextResultWriter( | |
| 65 -filters => { 'HSP' =E<gt> \&hsp_filter } | |
| 66 ); | |
| 67 | |
| 68 See L<Bio::SearchIO::SearchWriterI> for more info on the filter method. | |
| 69 | |
| 70 | |
| 71 This module will use the module Text::Wrap if it is installed to wrap | |
| 72 the Query description line. If you do not have Text::Wrap installed | |
| 73 this module will work fine but you won't have the Query line wrapped. | |
| 74 You will see a warning about this when you first instantiate a | |
| 75 TextResultWriter - to avoid these warnings from showing up, simply set | |
| 76 the verbosity upon initialization to -1 like this: my $writer = new | |
| 77 Bio::SearchIO::Writer::TextResultWriter(-verbose => -1); | |
| 78 | |
| 79 =head1 FEEDBACK | |
| 80 | |
| 81 =head2 Mailing Lists | |
| 82 | |
| 83 User feedback is an integral part of the evolution of this and other | |
| 84 Bioperl modules. Send your comments and suggestions preferably to | |
| 85 the Bioperl mailing list. Your participation is much appreciated. | |
| 86 | |
| 87 bioperl-l@bioperl.org - General discussion | |
| 88 http://bioperl.org/MailList.shtml - About the mailing lists | |
| 89 | |
| 90 =head2 Reporting Bugs | |
| 91 | |
| 92 Report bugs to the Bioperl bug tracking system to help us keep track | |
| 93 of the bugs and their resolution. Bug reports can be submitted via | |
| 94 email or the web: | |
| 95 | |
| 96 bioperl-bugs@bioperl.org | |
| 97 http://bugzilla.bioperl.org/ | |
| 98 | |
| 99 =head1 AUTHOR - Jason Stajich | |
| 100 | |
| 101 Email jason@bioperl.org | |
| 102 | |
| 103 =head1 APPENDIX | |
| 104 | |
| 105 The rest of the documentation details each of the object methods. | |
| 106 Internal methods are usually preceded with a _ | |
| 107 | |
| 108 =cut | |
| 109 | |
| 110 | |
| 111 # Let the code begin... | |
| 112 | |
| 113 | |
| 114 package Bio::SearchIO::Writer::TextResultWriter; | |
| 115 use vars qw(@ISA $MaxNameLen $MaxDescLen $AlignmentLineWidth | |
| 116 $DescLineLen $TextWrapLoaded); | |
| 117 use strict; | |
| 118 | |
| 119 # Object preamble - inherits from Bio::Root::RootI | |
| 120 | |
| 121 BEGIN { | |
| 122 $MaxDescLen = 65; | |
| 123 $AlignmentLineWidth = 60; | |
| 124 eval { require Text::Wrap; $TextWrapLoaded = 1;}; | |
| 125 if( $@ ) { | |
| 126 $TextWrapLoaded = 0; | |
| 127 } | |
| 128 } | |
| 129 | |
| 130 use Bio::Root::Root; | |
| 131 use Bio::SearchIO::SearchWriterI; | |
| 132 use POSIX; | |
| 133 | |
| 134 @ISA = qw(Bio::Root::Root Bio::SearchIO::SearchWriterI); | |
| 135 | |
| 136 =head2 new | |
| 137 | |
| 138 Title : new | |
| 139 Usage : my $obj = new Bio::SearchIO::Writer::TextResultWriter(); | |
| 140 Function: Builds a new Bio::SearchIO::Writer::TextResultWriter object | |
| 141 Returns : Bio::SearchIO::Writer::TextResultWriter | |
| 142 Args : -filters => hashref with any or all of the keys (HSP HIT RESULT) | |
| 143 which have values pointing to a subroutine reference | |
| 144 which will expect to get a | |
| 145 | |
| 146 | |
| 147 =cut | |
| 148 | |
| 149 sub new { | |
| 150 my($class,@args) = @_; | |
| 151 | |
| 152 my $self = $class->SUPER::new(@args); | |
| 153 my ($filters) = $self->_rearrange([qw(FILTERS)],@args); | |
| 154 if( defined $filters ) { | |
| 155 if( !ref($filters) =~ /HASH/i ) { | |
| 156 $self->warn("Did not provide a hashref for the FILTERS option, ignoring."); | |
| 157 } else { | |
| 158 while( my ($type,$code) = each %{$filters} ) { | |
| 159 $self->filter($type,$code); | |
| 160 } | |
| 161 } | |
| 162 } | |
| 163 unless( $TextWrapLoaded ) { | |
| 164 $self->warn("Could not load Text::Wrap - the Query Description will not be line wrapped\n"); | |
| 165 } else { | |
| 166 $Text::Wrap::columns = $MaxDescLen; | |
| 167 } | |
| 168 return $self; | |
| 169 } | |
| 170 | |
| 171 | |
| 172 =head2 to_string | |
| 173 | |
| 174 Purpose : Produces data for each Search::Result::ResultI in a string. | |
| 175 : This is an abstract method. For some useful implementations, | |
| 176 : see ResultTableWriter.pm, HitTableWriter.pm, | |
| 177 : and HSPTableWriter.pm. | |
| 178 Usage : print $writer->to_string( $result_obj, @args ); | |
| 179 Argument : $result_obj = A Bio::Search::Result::ResultI object | |
| 180 : @args = any additional arguments used by your implementation. | |
| 181 Returns : String containing data for each search Result or any of its | |
| 182 : sub-objects (Hits and HSPs). | |
| 183 Throws : n/a | |
| 184 | |
| 185 =cut | |
| 186 | |
| 187 sub to_string { | |
| 188 my ($self,$result,$num) = @_; | |
| 189 $num ||= 0; | |
| 190 return unless defined $result; | |
| 191 my ($resultfilter,$hitfilter, $hspfilter) = ( $self->filter('RESULT'), | |
| 192 $self->filter('HIT'), | |
| 193 $self->filter('HSP') ); | |
| 194 return '' if( defined $resultfilter && ! &{$resultfilter}($result) ); | |
| 195 | |
| 196 my ($qtype,$dbtype,$dbseqtype,$type); | |
| 197 my $alg = $result->algorithm; | |
| 198 | |
| 199 # This is actually wrong for the FASTAs I think | |
| 200 if( $alg =~ /T(FAST|BLAST)([XY])/i ) { | |
| 201 $qtype = $dbtype = 'translated'; | |
| 202 $dbseqtype = $type = 'PROTEIN'; | |
| 203 } elsif( $alg =~ /T(FAST|BLAST)N/i ) { | |
| 204 $qtype = ''; | |
| 205 $dbtype = 'translated'; | |
| 206 $type = 'PROTEIN'; | |
| 207 $dbseqtype = 'NUCLEOTIDE'; | |
| 208 } elsif( $alg =~ /(FAST|BLAST)N/i || | |
| 209 $alg =~ /(WABA|EXONERATE)/i ) { | |
| 210 $qtype = $dbtype = ''; | |
| 211 $type = $dbseqtype = 'NUCLEOTIDE'; | |
| 212 } elsif( $alg =~ /(FAST|BLAST)P/ || $alg =~ /SSEARCH/i ) { | |
| 213 $qtype = $dbtype = ''; | |
| 214 $type = $dbseqtype = 'PROTEIN'; | |
| 215 } elsif( $alg =~ /(FAST|BLAST)[XY]/i ) { | |
| 216 $qtype = 'translated'; | |
| 217 $dbtype = 'PROTEIN'; | |
| 218 $dbseqtype = $type = 'PROTEIN'; | |
| 219 } else { | |
| 220 print STDERR "algorithm was ", $result->algorithm, " couldn't match\n"; | |
| 221 } | |
| 222 | |
| 223 | |
| 224 my %baselens = ( 'Sbjct:' => ( $dbtype eq 'translated' ) ? 3 : 1, | |
| 225 'Query:' => ( $qtype eq 'translated' ) ? 3 : 1); | |
| 226 | |
| 227 my $str; | |
| 228 if( ! defined $num || $num <= 1 ) { | |
| 229 $str = &{$self->start_report}($result); | |
| 230 } | |
| 231 | |
| 232 $str .= &{$self->title}($result); | |
| 233 | |
| 234 $str .= $result->algorithm_reference || $self->algorithm_reference($result); | |
| 235 $str .= &{$self->introduction}($result); | |
| 236 | |
| 237 | |
| 238 $str .= qq{ | |
| 239 Score E | |
| 240 Sequences producing significant alignments: (bits) value | |
| 241 }; | |
| 242 my $hspstr = ''; | |
| 243 if( $result->can('rewind')) { | |
| 244 $result->rewind(); # support stream based parsing routines | |
| 245 } | |
| 246 while( my $hit = $result->next_hit ) { | |
| 247 next if( defined $hitfilter && ! &{$hitfilter}($hit) ); | |
| 248 my $nm = $hit->name(); | |
| 249 $self->debug( "no $nm for name (".$hit->description(). "\n") | |
| 250 unless $nm; | |
| 251 my ($gi,$acc) = &{$self->id_parser}($nm); | |
| 252 my $p = "%-$MaxDescLen". "s"; | |
| 253 my $descsub; | |
| 254 my $desc = sprintf("%s %s",$nm,$hit->description); | |
| 255 if( length($desc) - 3 > $MaxDescLen) { | |
| 256 $descsub = sprintf($p, | |
| 257 substr($desc,0,$MaxDescLen-3) . | |
| 258 "..."); | |
| 259 } else { | |
| 260 $descsub = sprintf($p,$desc); | |
| 261 } | |
| 262 | |
| 263 $str .= sprintf("%s %-4s %s\n", | |
| 264 $descsub, | |
| 265 defined $hit->raw_score ? $hit->raw_score : ' ', | |
| 266 defined $hit->significance ? $hit->significance : '?'); | |
| 267 my @hsps = $hit->hsps; | |
| 268 | |
| 269 $hspstr .= sprintf(">%s %s\n%9sLength = %d\n\n", | |
| 270 $hit->name, | |
| 271 defined $hit->description ? $hit->description : '', | |
| 272 '', # empty is for the %9s in the str formatting | |
| 273 $hit->length); | |
| 274 | |
| 275 foreach my $hsp ( @hsps ) { | |
| 276 next if( defined $hspfilter && ! &{$hspfilter}($hsp) ); | |
| 277 $hspstr .= sprintf(" Score = %4s bits (%s), Expect = %s", | |
| 278 $hsp->bits, $hsp->score, $hsp->evalue); | |
| 279 if( $hsp->pvalue ) { | |
| 280 $hspstr .= ", P = ".$hsp->pvalue; | |
| 281 } | |
| 282 $hspstr .= "\n"; | |
| 283 $hspstr .= sprintf(" Identities = %d/%d (%d%%)", | |
| 284 ( $hsp->frac_identical('total') * | |
| 285 $hsp->length('total')), | |
| 286 $hsp->length('total'), | |
| 287 POSIX::floor($hsp->frac_identical('total') | |
| 288 * 100)); | |
| 289 | |
| 290 if( $type eq 'PROTEIN' ) { | |
| 291 $hspstr .= sprintf(", Positives = %d/%d (%d%%)", | |
| 292 ( $hsp->frac_conserved('total') * | |
| 293 $hsp->length('total')), | |
| 294 $hsp->length('total'), | |
| 295 POSIX::floor($hsp->frac_conserved('total') * 100)); | |
| 296 | |
| 297 } | |
| 298 if( $hsp->gaps ) { | |
| 299 $hspstr .= sprintf(", Gaps = %d/%d (%d%%)", | |
| 300 $hsp->gaps('total'), | |
| 301 $hsp->length('total'), | |
| 302 POSIX::floor(100 * $hsp->gaps('total') / | |
| 303 $hsp->length('total'))); | |
| 304 } | |
| 305 $hspstr .= "\n"; | |
| 306 my ($hframe,$qframe) = ( $hsp->hit->frame, | |
| 307 $hsp->query->frame); | |
| 308 my ($hstrand,$qstrand) = ($hsp->hit->strand,$hsp->query->strand); | |
| 309 # so TBLASTX will have Query/Hit frames | |
| 310 # BLASTX will have Query frame | |
| 311 # TBLASTN will have Hit frame | |
| 312 if( $hstrand || $qstrand ) { | |
| 313 $hspstr .= " Frame = "; | |
| 314 my ($signq, $signh); | |
| 315 unless( $hstrand ) { | |
| 316 $hframe = undef; | |
| 317 # if strand is null or 0 then it is protein | |
| 318 # and this no frame | |
| 319 } else { | |
| 320 $signh = $hstrand < 0 ? '-' : '+'; | |
| 321 } | |
| 322 unless( $qstrand ) { | |
| 323 $qframe = undef; | |
| 324 # if strand is null or 0 then it is protein | |
| 325 } else { | |
| 326 $signq =$qstrand < 0 ? '-' : '+'; | |
| 327 } | |
| 328 # remember bioperl stores frames as 0,1,2 (GFF way) | |
| 329 # BLAST reports reports as 1,2,3 so | |
| 330 # we have to add 1 to the frame values | |
| 331 if( defined $hframe && ! defined $qframe) { | |
| 332 $hspstr .= "$signh".($hframe+1); | |
| 333 } elsif( defined $qframe && ! defined $hframe) { | |
| 334 $hspstr .= "$signq".($qframe+1); | |
| 335 } else { | |
| 336 $hspstr .= sprintf(" %s%d / %s%d", | |
| 337 $signq,$qframe+1, | |
| 338 $signh, $hframe+1); | |
| 339 } | |
| 340 } | |
| 341 $hspstr .= "\n\n"; | |
| 342 | |
| 343 my @hspvals = ( {'name' => 'Query:', | |
| 344 'seq' => $hsp->query_string, | |
| 345 'start' => ( $hstrand >= 0 ? | |
| 346 $hsp->query->start : | |
| 347 $hsp->query->end), | |
| 348 'end' => ($qstrand >= 0 ? | |
| 349 $hsp->query->end : | |
| 350 $hsp->query->start), | |
| 351 'index' => 0, | |
| 352 'direction' => $qstrand || 1 | |
| 353 }, | |
| 354 { 'name' => ' 'x6, # this might need to adjust for long coordinates?? | |
| 355 'seq' => $hsp->homology_string, | |
| 356 'start' => undef, | |
| 357 'end' => undef, | |
| 358 'index' => 0, | |
| 359 'direction' => 1 | |
| 360 }, | |
| 361 { 'name' => 'Sbjct:', | |
| 362 'seq' => $hsp->hit_string, | |
| 363 'start' => ($hstrand >= 0 ? | |
| 364 $hsp->hit->start : $hsp->hit->end), | |
| 365 'end' => ($hstrand >= 0 ? | |
| 366 $hsp->hit->end : $hsp->hit->start), | |
| 367 'index' => 0, | |
| 368 'direction' => $hstrand || 1 | |
| 369 } | |
| 370 ); | |
| 371 | |
| 372 | |
| 373 # let's set the expected length (in chars) of the starting number | |
| 374 # in an alignment block so we can have things line up | |
| 375 # Just going to try and set to the largest | |
| 376 | |
| 377 my ($numwidth) = sort { $b <=> $a }(length($hspvals[0]->{'start'}), | |
| 378 length($hspvals[0]->{'end'}), | |
| 379 length($hspvals[2]->{'start'}), | |
| 380 length($hspvals[2]->{'end'})); | |
| 381 my $count = 0; | |
| 382 while ( $count <= $hsp->length('total') ) { | |
| 383 foreach my $v ( @hspvals ) { | |
| 384 my $piece = substr($v->{'seq'}, $v->{'index'} +$count, | |
| 385 $AlignmentLineWidth); | |
| 386 my $cp = $piece; | |
| 387 my $plen = scalar ( $cp =~ tr/\-//); | |
| 388 my ($start,$end) = ('',''); | |
| 389 if( defined $v->{'start'} ) { | |
| 390 $start = $v->{'start'}; | |
| 391 # since strand can be + or - use the direction | |
| 392 # to signify which whether to add or substract from end | |
| 393 my $d = $v->{'direction'} * ( $AlignmentLineWidth - $plen )* | |
| 394 $baselens{$v->{'name'}}; | |
| 395 if( length($piece) < $AlignmentLineWidth ) { | |
| 396 $d = (length($piece) - $plen) * $v->{'direction'} * | |
| 397 $baselens{$v->{'name'}}; | |
| 398 } | |
| 399 $end = $v->{'start'} + $d - $v->{'direction'}; | |
| 400 $v->{'start'} += $d; | |
| 401 } | |
| 402 $hspstr .= sprintf("%s %-".$numwidth."s %s %s\n", | |
| 403 $v->{'name'}, | |
| 404 $start, | |
| 405 $piece, | |
| 406 $end | |
| 407 ); | |
| 408 } | |
| 409 $count += $AlignmentLineWidth; | |
| 410 $hspstr .= "\n"; | |
| 411 } | |
| 412 } | |
| 413 $hspstr .= "\n"; | |
| 414 } | |
| 415 $str .= "\n\n".$hspstr; | |
| 416 | |
| 417 $str .= sprintf(qq{ Database: %s | |
| 418 Posted date: %s | |
| 419 Number of letters in database: %s | |
| 420 Number of sequences in database: %s | |
| 421 | |
| 422 Matrix: %s | |
| 423 }, | |
| 424 $result->database_name(), | |
| 425 $result->get_statistic('posted_date') || | |
| 426 POSIX::strftime("%b %d, %Y %I:%M %p",localtime), | |
| 427 &_numwithcommas($result->database_entries()), | |
| 428 &_numwithcommas($result->database_letters()), | |
| 429 $result->get_parameter('matrix') || ''); | |
| 430 | |
| 431 if( defined (my $open = $result->get_parameter('gapopen')) ) { | |
| 432 $str .= sprintf("Gap Penalties Existence: %d, Extension: %d\n", | |
| 433 $open || 0, $result->get_parameter('gapext') || 0); | |
| 434 } | |
| 435 | |
| 436 # skip those params we've already output | |
| 437 foreach my $param ( grep { ! /matrix|gapopen|gapext/i } | |
| 438 $result->available_parameters ) { | |
| 439 $str .= "$param: ". $result->get_parameter($param) ."\n"; | |
| 440 | |
| 441 } | |
| 442 $str .= "Search Statistics\n"; | |
| 443 # skip posted date, we already output it | |
| 444 foreach my $stat ( sort grep { ! /posted_date/ } | |
| 445 $result->available_statistics ) { | |
| 446 my $expect = $result->get_parameter('expect'); | |
| 447 my $v = $result->get_statistic($stat); | |
| 448 if( $v =~ /^\d+$/ ) { | |
| 449 $v = &_numwithcommas($v); | |
| 450 } | |
| 451 if( defined $expect && | |
| 452 $stat eq 'seqs_better_than_cutoff' ) { | |
| 453 $str .= "seqs_better_than_$expect: $v\n"; | |
| 454 } else { | |
| 455 my $v = | |
| 456 $str .= "$stat: $v\n"; | |
| 457 } | |
| 458 } | |
| 459 $str .= "\n\n"; | |
| 460 return $str; | |
| 461 } | |
| 462 | |
| 463 | |
| 464 =head2 start_report | |
| 465 | |
| 466 Title : start_report | |
| 467 Usage : $index->start_report( CODE ) | |
| 468 Function: Stores or returns the code to | |
| 469 write the start of the <HTML> block, the <TITLE> block | |
| 470 and the start of the <BODY> block of HTML. Useful | |
| 471 for (for instance) specifying alternative | |
| 472 HTML if you are embedding the output in | |
| 473 an HTML page which you have already started. | |
| 474 (For example a routine returning a null string). | |
| 475 Returns \&default_start_report (see below) if not | |
| 476 set. | |
| 477 Example : $index->start_report( \&my_start_report ) | |
| 478 Returns : ref to CODE if called without arguments | |
| 479 Args : CODE | |
| 480 | |
| 481 =cut | |
| 482 | |
| 483 sub start_report { | |
| 484 my( $self, $code ) = @_; | |
| 485 if ($code) { | |
| 486 $self->{'_start_report'} = $code; | |
| 487 } | |
| 488 return $self->{'_start_report'} || \&default_start_report; | |
| 489 } | |
| 490 | |
| 491 =head2 default_start_report | |
| 492 | |
| 493 Title : default_start_report | |
| 494 Usage : $self->default_start_report($result) | |
| 495 Function: The default method to call when starting a report. | |
| 496 Returns : sting | |
| 497 Args : First argument is a Bio::Search::Result::ResultI | |
| 498 | |
| 499 =cut | |
| 500 | |
| 501 sub default_start_report { | |
| 502 my ($result) = @_; | |
| 503 return ""; | |
| 504 } | |
| 505 | |
| 506 =head2 title | |
| 507 | |
| 508 Title : title | |
| 509 Usage : $self->title($CODE) | |
| 510 | |
| 511 Function: Stores or returns the code to provide HTML for the given | |
| 512 BLAST report that will appear at the top of the BLAST report | |
| 513 HTML output. Useful for (for instance) specifying | |
| 514 alternative routines to write your own titles. | |
| 515 Returns \&default_title (see below) if not | |
| 516 set. | |
| 517 Example : $index->title( \&my_title ) | |
| 518 Returns : ref to CODE if called without arguments | |
| 519 Args : CODE | |
| 520 | |
| 521 =cut | |
| 522 | |
| 523 sub title { | |
| 524 my( $self, $code ) = @_; | |
| 525 if ($code) { | |
| 526 $self->{'_title'} = $code; | |
| 527 } | |
| 528 return $self->{'_title'} || \&default_title; | |
| 529 } | |
| 530 | |
| 531 =head2 default_title | |
| 532 | |
| 533 Title : default_title | |
| 534 Usage : $self->default_title($result) | |
| 535 Function: Provides HTML for the given BLAST report that will appear | |
| 536 at the top of the BLAST report output. | |
| 537 Returns : empty for text implementation | |
| 538 Args : First argument is a Bio::Search::Result::ResultI | |
| 539 | |
| 540 =cut | |
| 541 | |
| 542 sub default_title { | |
| 543 my ($result) = @_; | |
| 544 return ""; | |
| 545 # The HTML implementation | |
| 546 # return sprintf( | |
| 547 # qq{<CENTER><H1><a href="http://bioperl.org">Bioperl</a> Reformatted HTML of %s Search Report<br> for %s</H1></CENTER>}, | |
| 548 # $result->algorithm, | |
| 549 # $result->query_name()); | |
| 550 } | |
| 551 | |
| 552 | |
| 553 =head2 introduction | |
| 554 | |
| 555 Title : introduction | |
| 556 Usage : $self->introduction($CODE) | |
| 557 | |
| 558 Function: Stores or returns the code to provide HTML for the given | |
| 559 BLAST report detailing the query and the | |
| 560 database information. | |
| 561 Useful for (for instance) specifying | |
| 562 routines returning alternative introductions. | |
| 563 Returns \&default_introduction (see below) if not | |
| 564 set. | |
| 565 Example : $index->introduction( \&my_introduction ) | |
| 566 Returns : ref to CODE if called without arguments | |
| 567 Args : CODE | |
| 568 | |
| 569 =cut | |
| 570 | |
| 571 sub introduction { | |
| 572 my( $self, $code ) = @_; | |
| 573 if ($code) { | |
| 574 $self->{'_introduction'} = $code; | |
| 575 } | |
| 576 return $self->{'_introduction'} || \&default_introduction; | |
| 577 } | |
| 578 | |
| 579 =head2 default_introduction | |
| 580 | |
| 581 Title : default_introduction | |
| 582 Usage : $self->default_introduction($result) | |
| 583 Function: Outputs HTML to provide the query | |
| 584 and the database information | |
| 585 Returns : string containing HTML | |
| 586 Args : First argument is a Bio::Search::Result::ResultI | |
| 587 Second argument is string holding literature citation | |
| 588 | |
| 589 =cut | |
| 590 | |
| 591 sub default_introduction { | |
| 592 my ($result) = @_; | |
| 593 | |
| 594 return sprintf( | |
| 595 qq{ | |
| 596 Query= %s | |
| 597 (%s letters) | |
| 598 | |
| 599 Database: %s | |
| 600 %s sequences; %s total letters | |
| 601 }, | |
| 602 &_linewrap($result->query_name . " " . | |
| 603 $result->query_description), | |
| 604 &_numwithcommas($result->query_length), | |
| 605 $result->database_name(), | |
| 606 &_numwithcommas($result->database_entries()), | |
| 607 &_numwithcommas($result->database_letters()), | |
| 608 ); | |
| 609 } | |
| 610 | |
| 611 =head2 end_report | |
| 612 | |
| 613 Title : end_report | |
| 614 Usage : $self->end_report() | |
| 615 Function: The method to call when ending a report, this is | |
| 616 mostly for cleanup for formats which require you to | |
| 617 have something at the end of the document (</BODY></HTML>) | |
| 618 for HTML | |
| 619 Returns : string | |
| 620 Args : none | |
| 621 | |
| 622 =cut | |
| 623 | |
| 624 sub end_report { | |
| 625 return ""; | |
| 626 } | |
| 627 | |
| 628 | |
| 629 # copied from Bio::Index::Fasta | |
| 630 # useful here as well | |
| 631 | |
| 632 =head2 id_parser | |
| 633 | |
| 634 Title : id_parser | |
| 635 Usage : $index->id_parser( CODE ) | |
| 636 Function: Stores or returns the code used by record_id to | |
| 637 parse the ID for record from a string. Useful | |
| 638 for (for instance) specifying a different | |
| 639 parser for different flavours of FASTA file. | |
| 640 Returns \&default_id_parser (see below) if not | |
| 641 set. If you supply your own id_parser | |
| 642 subroutine, then it should expect a fasta | |
| 643 description line. An entry will be added to | |
| 644 the index for each string in the list returned. | |
| 645 Example : $index->id_parser( \&my_id_parser ) | |
| 646 Returns : ref to CODE if called without arguments | |
| 647 Args : CODE | |
| 648 | |
| 649 =cut | |
| 650 | |
| 651 sub id_parser { | |
| 652 my( $self, $code ) = @_; | |
| 653 | |
| 654 if ($code) { | |
| 655 $self->{'_id_parser'} = $code; | |
| 656 } | |
| 657 return $self->{'_id_parser'} || \&default_id_parser; | |
| 658 } | |
| 659 | |
| 660 | |
| 661 | |
| 662 =head2 default_id_parser | |
| 663 | |
| 664 Title : default_id_parser | |
| 665 Usage : $id = default_id_parser( $header ) | |
| 666 Function: The default Fasta ID parser for Fasta.pm | |
| 667 Returns $1 from applying the regexp /^>\s*(\S+)/ | |
| 668 to $header. | |
| 669 Returns : ID string | |
| 670 Args : a fasta header line string | |
| 671 | |
| 672 =cut | |
| 673 | |
| 674 sub default_id_parser { | |
| 675 my ($string) = @_; | |
| 676 my ($gi,$acc); | |
| 677 if( $string =~ s/gi\|(\d+)\|?// ) | |
| 678 { $gi = $1; $acc = $1;} | |
| 679 | |
| 680 if( $string =~ /(\w+)\|([A-Z\d\.\_]+)(\|[A-Z\d\_]+)?/ ) { | |
| 681 $acc = defined $2 ? $2 : $1; | |
| 682 } else { | |
| 683 $acc = $string; | |
| 684 $acc =~ s/^\s+(\S+)/$1/; | |
| 685 $acc =~ s/(\S+)\s+$/$1/; | |
| 686 } | |
| 687 return ($gi,$acc); | |
| 688 } | |
| 689 | |
| 690 sub MIN { $a <=> $b ? $a : $b; } | |
| 691 sub MAX { $a <=> $b ? $b : $a; } | |
| 692 | |
| 693 | |
| 694 =head2 algorithm_reference | |
| 695 | |
| 696 Title : algorithm_reference | |
| 697 Usage : my $reference = $writer->algorithm_reference($result); | |
| 698 Function: Returns the appropriate Bibliographic reference for the | |
| 699 algorithm format being produced | |
| 700 Returns : String | |
| 701 Args : L<Bio::Search::Result::ResultI> to reference | |
| 702 | |
| 703 | |
| 704 =cut | |
| 705 | |
| 706 sub algorithm_reference{ | |
| 707 my ($self,$result) = @_; | |
| 708 return '' if( ! defined $result || !ref($result) || | |
| 709 ! $result->isa('Bio::Search::Result::ResultI')) ; | |
| 710 if( $result->algorithm =~ /BLAST/i ) { | |
| 711 my $res = $result->algorithm . ' '. $result->algorithm_version. "\n"; | |
| 712 if( $result->algorithm_version =~ /WashU/i ) { | |
| 713 return $res .qq{ | |
| 714 Copyright (C) 1996-2000 Washington University, Saint Louis, Missouri USA. | |
| 715 All Rights Reserved. | |
| 716 | |
| 717 Reference: Gish, W. (1996-2000) http://blast.wustl.edu | |
| 718 }; | |
| 719 } else { | |
| 720 return $res . qq{ | |
| 721 Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, | |
| 722 Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), | |
| 723 "Gapped BLAST and PSI-BLAST: a new generation of protein database search | |
| 724 programs", Nucleic Acids Res. 25:3389-3402. | |
| 725 }; | |
| 726 } | |
| 727 } elsif( $result->algorithm =~ /FAST/i ) { | |
| 728 return $result->algorithm. " ". $result->algorithm_version . "\n". | |
| 729 "\nReference: Pearson et al, Genomics (1997) 46:24-36\n"; | |
| 730 } else { | |
| 731 return ''; | |
| 732 } | |
| 733 } | |
| 734 | |
| 735 # from Perl Cookbook 2.17 | |
| 736 sub _numwithcommas { | |
| 737 my $num = reverse( $_[0] ); | |
| 738 $num =~ s/(\d{3})(?=\d)(?!\d*\.)/$1,/g; | |
| 739 return scalar reverse $num; | |
| 740 } | |
| 741 | |
| 742 sub _linewrap { | |
| 743 my ($str) = @_; | |
| 744 if($TextWrapLoaded) { | |
| 745 return Text::Wrap::wrap("","",$str); # use Text::Wrap | |
| 746 } else { return $str; } # cannot wrap | |
| 747 } | |
| 748 =head2 Methods Bio::SearchIO::SearchWriterI | |
| 749 | |
| 750 L<Bio::SearchIO::SearchWriterI> inherited methods. | |
| 751 | |
| 752 =head2 filter | |
| 753 | |
| 754 Title : filter | |
| 755 Usage : $writer->filter('hsp', \&hsp_filter); | |
| 756 Function: Filter out either at HSP,Hit,or Result level | |
| 757 Returns : none | |
| 758 Args : string => data type, | |
| 759 CODE reference | |
| 760 | |
| 761 | |
| 762 =cut | |
| 763 | |
| 764 | |
| 765 1; |
