Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/SearchIO/psiblast.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 #------------------------------------------------------------------ | |
| 2 # $Id: psiblast.pm,v 1.17 2002/12/24 15:48:41 jason Exp $ | |
| 3 # | |
| 4 # BioPerl module Bio::SearchIO::psiblast | |
| 5 # | |
| 6 # Cared for by Steve Chervitz <sac@bioperl.org> | |
| 7 # | |
| 8 # You may distribute this module under the same terms as perl itself | |
| 9 #------------------------------------------------------------------ | |
| 10 | |
| 11 =head1 NAME | |
| 12 | |
| 13 Bio::SearchIO::psiblast - Parser for traditional BLAST and PSI-BLAST reports | |
| 14 | |
| 15 =head1 SYNOPSIS | |
| 16 | |
| 17 use Bio::SearchIO; | |
| 18 | |
| 19 my $in = Bio::SearchIO->new( -format => 'psiblast', | |
| 20 -file => 'report.blastp' ); | |
| 21 | |
| 22 while ( my $blast = $in->next_result() ) { | |
| 23 foreach my $hit ( $blast->hits ) { | |
| 24 print "Hit: $hit\n"; | |
| 25 } | |
| 26 } | |
| 27 | |
| 28 # BLAST hit filtering function. All hits of each BLAST report must satisfy | |
| 29 # this criteria to be retained. If a hit fails this test, it is ignored. | |
| 30 # If all hits of a report fail, the report will be considered hitless. | |
| 31 # But we can distinguish this from the case where there were no | |
| 32 # hits in the report by testing the function $blast->no_hits_found(). | |
| 33 | |
| 34 my $filt_func = sub{ my $hit=shift; | |
| 35 $hit->frac_identical('query') >= 0.5 | |
| 36 && $hit->frac_aligned_query >= 0.50 | |
| 37 }; | |
| 38 | |
| 39 # Not supplying a -file or -fh parameter means read from STDIN | |
| 40 | |
| 41 my $in2 = Bio::SearchIO->new( -format => 'psiblast', | |
| 42 -hit_filter => $filt_func | |
| 43 ); | |
| 44 | |
| 45 | |
| 46 =head1 DESCRIPTION | |
| 47 | |
| 48 This module parses BLAST and PSI-BLAST reports and acts as a factory for | |
| 49 objects that encapsulate BLAST results: | |
| 50 L<Bio::Search::Result::BlastResult>, L<Bio::Search::Hit::BlastHit>, | |
| 51 L<Bio::Search::HSP::BlastHSP>. | |
| 52 | |
| 53 This module does not parse XML-formatted BLAST reports. | |
| 54 See L<Bio::SearchIO::blastxml|Bio::SearchIO::blastxml> if you need to do that. | |
| 55 | |
| 56 To use this module, the only module you need to C<use> is | |
| 57 Bio::SearchIO.pm. SearchIO knows how to load this module when you | |
| 58 supply a C<-format =E<gt> 'psiblast'> parameters to its C<new>() | |
| 59 function. For more information about the SearchIO system, see | |
| 60 documentation in Bio::SearchIO.pm. | |
| 61 | |
| 62 =head2 PSI-BLAST Support | |
| 63 | |
| 64 In addition to BLAST1 and BLAST2 reports, this module can also handle | |
| 65 PSI-BLAST reports. When accessing the set of Hits in a result, hits | |
| 66 from different iterations are lumped together but can be distinguished by | |
| 67 interrogating L<Bio::Search::Hit::BlastHit::iteration> and | |
| 68 L<Bio::Search::Hit::BlastHit::found_again>. | |
| 69 | |
| 70 If you want to collect hits only from a certain iteration during parsing, | |
| 71 supply a function using the C<-HIT_FILTER> parameter. | |
| 72 | |
| 73 =head1 EXAMPLES | |
| 74 | |
| 75 To get a feel for how to use this, have look at scripts in the | |
| 76 B<examples/searchio> and B<examples/searchio/writer> directory of the Bioperl | |
| 77 distribution as well as the test script B<t/SearchIO.t>. | |
| 78 | |
| 79 =head1 SEE ALSO | |
| 80 | |
| 81 For more documentation about working with Blast result objects that are | |
| 82 produced by this parser, see L<Bio::Search::Result::BlastResult>, | |
| 83 L<Bio::Search::Hit::BlastHit>, L<Bio::Search::HSP::BlastHSP>. | |
| 84 | |
| 85 =head1 FEEDBACK | |
| 86 | |
| 87 =head2 Mailing Lists | |
| 88 | |
| 89 User feedback is an integral part of the evolution of this and other | |
| 90 Bioperl modules. Send your comments and suggestions preferably to | |
| 91 the Bioperl mailing list. Your participation is much appreciated. | |
| 92 | |
| 93 bioperl-l@bioperl.org - General discussion | |
| 94 http://bioperl.org/MailList.shtml - About the mailing lists | |
| 95 | |
| 96 =head2 Reporting Bugs | |
| 97 | |
| 98 Report bugs to the Bioperl bug tracking system to help us keep track | |
| 99 of the bugs and their resolution. Bug reports can be submitted via | |
| 100 email or the web: | |
| 101 | |
| 102 bioperl-bugs@bioperl.org | |
| 103 http://bugzilla.bioperl.org/ | |
| 104 | |
| 105 =head1 AUTHOR | |
| 106 | |
| 107 Steve Chervitz E<lt>sac@bioperl.orgE<gt> | |
| 108 | |
| 109 See L<the FEEDBACK section | FEEDBACK> for where to send bug reports and comments. | |
| 110 | |
| 111 =head1 ACKNOWLEDGEMENTS | |
| 112 | |
| 113 I would like to acknowledge my colleagues at Affymetrix for useful | |
| 114 feedback. | |
| 115 | |
| 116 =head1 COPYRIGHT | |
| 117 | |
| 118 Copyright (c) 2001 Steve Chervitz. All Rights Reserved. | |
| 119 | |
| 120 This library is free software; you can redistribute it and/or modify | |
| 121 it under the same terms as Perl itself. | |
| 122 | |
| 123 =head1 DISCLAIMER | |
| 124 | |
| 125 This software is provided "as is" without warranty of any kind. | |
| 126 | |
| 127 =head1 APPENDIX | |
| 128 | |
| 129 The rest of the documentation details each of the object methods. | |
| 130 Internal methods are usually preceded with a _ | |
| 131 | |
| 132 =cut | |
| 133 | |
| 134 | |
| 135 # Let the code begin... | |
| 136 | |
| 137 | |
| 138 package Bio::SearchIO::psiblast; | |
| 139 | |
| 140 use strict; | |
| 141 use vars qw( @ISA | |
| 142 $MAX_HSP_OVERLAP | |
| 143 $DEFAULT_MATRIX | |
| 144 $DEFAULT_SIGNIF | |
| 145 $DEFAULT_SCORE | |
| 146 $DEFAULT_BLAST_WRITER_CLASS | |
| 147 $DEFAULT_HIT_FACTORY_CLASS | |
| 148 $DEFAULT_RESULT_FACTORY_CLASS | |
| 149 ); | |
| 150 | |
| 151 use Bio::SearchIO; | |
| 152 use Bio::Search::Result::BlastResult; | |
| 153 use Bio::Factory::BlastHitFactory; | |
| 154 use Bio::Factory::BlastResultFactory; | |
| 155 use Bio::Tools::StateMachine::IOStateMachine qw($INITIAL_STATE $FINAL_STATE); | |
| 156 | |
| 157 @ISA = qw( Bio::SearchIO | |
| 158 Bio::Tools::StateMachine::IOStateMachine ); | |
| 159 | |
| 160 $MAX_HSP_OVERLAP = 2; # Used when tiling multiple HSPs. | |
| 161 $DEFAULT_MATRIX = 'BLOSUM62'; | |
| 162 $DEFAULT_SIGNIF = 999;# Value used as significance cutoff if none supplied. | |
| 163 $DEFAULT_SCORE = 0; # Value used as score cutoff if none supplied. | |
| 164 $DEFAULT_BLAST_WRITER_CLASS = 'Bio::Search::Writer::HitTableWriter'; | |
| 165 $DEFAULT_HIT_FACTORY_CLASS = 'Bio::Factory::BlastHitFactory'; | |
| 166 $DEFAULT_RESULT_FACTORY_CLASS = 'Bio::Factory::BlastResultFactory'; | |
| 167 | |
| 168 my %state = ( | |
| 169 'header' => 'Header', | |
| 170 'descr' => 'Descriptions', | |
| 171 'align' => 'Alignment', | |
| 172 'footer' => 'Footer', | |
| 173 'iteration' => 'Iteration', # psiblast | |
| 174 'nohits' => 'No Hits' | |
| 175 ); | |
| 176 | |
| 177 # These are the expected transitions assuming a "normal" report (Blast2 or PSI-Blast). | |
| 178 my @state_transitions = ( [ $INITIAL_STATE, $state{'header'}], | |
| 179 [ $state{'header'}, $state{'descr'} ], | |
| 180 [ $state{'header'}, $state{'iteration'} ], | |
| 181 [ $state{'iteration'}, $state{'descr'} ], | |
| 182 [ $state{'iteration'}, $state{'nohits'} ], | |
| 183 [ $state{'descr'}, $state{'align'} ], | |
| 184 [ $state{'align'}, $state{'align'} ], | |
| 185 [ $state{'align'}, $state{'footer'} ], | |
| 186 [ $state{'align'}, $state{'iteration'} ], # psiblast | |
| 187 [ $state{'nohits'}, $state{'iteration'} ], # psiblast | |
| 188 [ $state{'nohits'}, $state{'footer'} ], | |
| 189 [ $state{'footer'}, $state{'header'} ], | |
| 190 [ $state{'footer'}, $FINAL_STATE] | |
| 191 ); | |
| 192 | |
| 193 my $current_iteration; # psiblast | |
| 194 | |
| 195 =head2 new | |
| 196 | |
| 197 Usage : Bio::SearchIO::psiblast->new( %named_parameters ) | |
| 198 Purpose : Parse traditional BLAST or PSI-BLAST data a file or input stream. | |
| 199 : Handles Blast1, Blast2, NCBI and WU Blast reports. | |
| 200 : Populates Bio::Search:: objects with data extracted from the report. | |
| 201 : (The exact type of Bio::Search objects depends on the type of | |
| 202 : Bio::Factory::ResultFactory and Bio::Factory::HitFactory you hook up | |
| 203 : to the SearchIO object.) | |
| 204 Returns : Bio::SearchIO::psiblast object. | |
| 205 Argument : Named parameters: (PARAMETER TAGS CAN BE UPPER OR LOWER CASE). | |
| 206 : These are in addition to those specified by Bio::SearchIO::new() (see). | |
| 207 : -SIGNIF => number (float or scientific notation number to be used | |
| 208 : as a P- or Expect value cutoff; default = 999.) | |
| 209 : -SCORE => number (integer or scientific notation number to be used | |
| 210 : as a score value cutoff; default = 0.) | |
| 211 : -HIT_FILTER => func_ref (reference to a function to be used for | |
| 212 : filtering out hits based on arbitrary criteria. | |
| 213 : This function should take a | |
| 214 : Bio::Search::Hit::BlastHit.pm object as its first | |
| 215 : argument and return a boolean value, | |
| 216 : true if the hit should be filtered out). | |
| 217 : Sample filter function: | |
| 218 : -HIT_FILTER => sub { $hit = shift; | |
| 219 : $hit->gaps == 0; }, | |
| 220 : Historical note: This parameter was formerly | |
| 221 called -FILT_FUNC in the older | |
| 222 Bio::Tools::Blast::parse method. Using | |
| 223 -FILT_FUNC will still work for backward | |
| 224 compatibility. | |
| 225 : -CHECK_ALL_HITS => boolean (check all hits for significance against | |
| 226 : significance criteria. Default = false. | |
| 227 : If false, stops processing hits after the first | |
| 228 : non-significant hit or the first hit that fails | |
| 229 : the hit_filter call. This speeds parsing, | |
| 230 : taking advantage of the fact that the hits | |
| 231 : are processed in the order they are ranked.) | |
| 232 : -MIN_LEN => integer (to be used as a minimum query sequence length | |
| 233 : sequences below this length will not be processed). | |
| 234 : default = no minimum length). | |
| 235 : -STATS => boolean (collect key statistical parameters for the report: | |
| 236 : matrix, filters, etc. default = false). | |
| 237 : This requires extra parsing | |
| 238 : so if you aren't interested in this info, don't | |
| 239 : set this parameter. Note that the unparsed | |
| 240 : parameter section of a Blast report is always | |
| 241 : accessible via $blast->raw_statistics(). | |
| 242 : -BEST => boolean (only process the best hit of each report; | |
| 243 : default = false). | |
| 244 : -OVERLAP => integer (the amount of overlap to permit between | |
| 245 : adjacent HSPs when tiling HSPs. A reasonable value is 2. | |
| 246 : Default = $Bio::SearchIO::psiblast::MAX_HSP_OVERLAP) | |
| 247 : -HOLD_RAW_DATA => boolean (store the raw alignment sections for each hit. | |
| 248 : used with the -SHALLOW_PARSE option). | |
| 249 : -SHALLOW_PARSE => boolean (only minimal parsing; does not parse HSPs. | |
| 250 : Hit data is limited to what can be obtained | |
| 251 : from the description line. | |
| 252 : Replaces the older NO_ALIGNS option.) | |
| 253 : | |
| 254 : | |
| 255 Comments : Do NOT remove the HTML from an HTML-formatted Blast report by using the | |
| 256 : "Save As" option of a web browser to save it as text. This renders the | |
| 257 : report unparsable. | |
| 258 Throws : An exception will be thrown if a BLAST report contains a FATAL: error. | |
| 259 | |
| 260 =cut | |
| 261 | |
| 262 sub new { | |
| 263 my ($class, %args) = @_; | |
| 264 | |
| 265 # TODO: Resolve this issue: | |
| 266 # Temporary hack to allow factory-based and non-factory based | |
| 267 # SearchIO objects co-exist. | |
| 268 # steve --- Sat Dec 22 04:41:20 2001 | |
| 269 $args{-USE_FACTORIES} = 1; | |
| 270 | |
| 271 my $self = $class->Bio::SearchIO::new(%args); | |
| 272 $self->_init_state_machine( %args, -transition_table => \@state_transitions); | |
| 273 | |
| 274 $self->_init_parse_params( %args ); | |
| 275 | |
| 276 $self->pause_between_reports( 1 ); | |
| 277 | |
| 278 $self->{'_result_count'} = 0; | |
| 279 | |
| 280 return $self; | |
| 281 } | |
| 282 | |
| 283 | |
| 284 sub default_result_factory_class { | |
| 285 my $self = shift; | |
| 286 return $DEFAULT_RESULT_FACTORY_CLASS; | |
| 287 } | |
| 288 | |
| 289 sub default_hit_factory_class { | |
| 290 my $self = shift; | |
| 291 return $DEFAULT_HIT_FACTORY_CLASS; | |
| 292 } | |
| 293 | |
| 294 sub check_for_new_state { | |
| 295 my ($self) = @_; | |
| 296 | |
| 297 # Ignore blank lines | |
| 298 my $chunk = $self->SUPER::check_for_new_state(1); | |
| 299 | |
| 300 my $newstate = undef; | |
| 301 | |
| 302 # End the run if there's no more input. | |
| 303 if( ! $chunk ) { | |
| 304 return $self->final_state; | |
| 305 } | |
| 306 $self->clear_state_change_cache; | |
| 307 my $curr_state = $self->current_state; | |
| 308 | |
| 309 if( $chunk =~ /^(<.*>)?T?BLAST[NPX]/ ) { | |
| 310 $newstate = $state{header}; | |
| 311 $self->state_change_cache( $chunk ); | |
| 312 } | |
| 313 | |
| 314 elsif ($chunk =~ /^Sequences producing/ ) { | |
| 315 $newstate = $state{descr}; | |
| 316 $self->state_change_cache( $chunk ); | |
| 317 } | |
| 318 | |
| 319 elsif ($chunk =~ /No hits found/i ) { | |
| 320 $newstate = $state{nohits}; | |
| 321 $self->state_change_cache( $chunk ); | |
| 322 } | |
| 323 | |
| 324 elsif ($chunk =~ /^\s*Searching/ ) { | |
| 325 $newstate = $state{iteration}; | |
| 326 } | |
| 327 | |
| 328 elsif ($chunk =~ /^>(.*)/ ) { | |
| 329 $newstate = $state{align}; | |
| 330 $self->state_change_cache( "$1\n" ); | |
| 331 } | |
| 332 | |
| 333 elsif ($chunk =~ /^(CPU time|Parameters):/ ) { | |
| 334 $newstate = $state{footer}; | |
| 335 $self->state_change_cache( $chunk ); | |
| 336 } | |
| 337 | |
| 338 # Necessary to distinguish " Database:" lines that start a footer section | |
| 339 # from those that are internal to a footer section. | |
| 340 elsif ($chunk =~ /^\s+Database:/ && $curr_state ne $state{'footer'}) { | |
| 341 $newstate = $state{footer}; | |
| 342 $self->state_change_cache( $chunk ); | |
| 343 } | |
| 344 | |
| 345 if( $curr_state ne $INITIAL_STATE and not $newstate ) { | |
| 346 # print "Appending input cache with ($curr_state): $chunk\n"; | |
| 347 $self->append_input_cache( $chunk ); | |
| 348 } | |
| 349 | |
| 350 return $newstate; | |
| 351 } | |
| 352 | |
| 353 | |
| 354 sub change_state { | |
| 355 my ($self, $state) = @_; | |
| 356 | |
| 357 my $from_state = $self->current_state; | |
| 358 my $verbose = $self->verbose; | |
| 359 $verbose and print STDERR ">>>>> Changing state from $from_state to $state\n"; | |
| 360 | |
| 361 if ( $self->validate_transition( $from_state, $state ) ) { | |
| 362 | |
| 363 # Now we know the current state is complete | |
| 364 # and all data from it is now in the input cache. | |
| 365 my @data = $self->get_input_cache(); | |
| 366 | |
| 367 # if($from_state eq $state{iteration} ) { | |
| 368 # do{ | |
| 369 # print STDERR "State change cache: ", $self->state_change_cache, "\n"; | |
| 370 # print STDERR "Input cache ($from_state):\n@data\n\n"; | |
| 371 # }; | |
| 372 # } | |
| 373 | |
| 374 # Now we need to process the input cache data. | |
| 375 # Remember, at this point, the current state is the "from" state | |
| 376 # of the state transition. The "to" state doesn't get set until | |
| 377 # the finalize_state_change() call at the end of this method. | |
| 378 | |
| 379 if($from_state eq $state{header} ) { | |
| 380 $self->_process_header( @data ); | |
| 381 } | |
| 382 elsif($from_state eq $state{descr} ) { | |
| 383 $self->_process_descriptions( @data ); | |
| 384 } | |
| 385 elsif($from_state eq $state{iteration} ) { | |
| 386 $self->_process_iteration( @data, $self->state_change_cache() ); | |
| 387 } | |
| 388 elsif($from_state eq $state{align} ) { | |
| 389 $self->_process_alignment( @data ); | |
| 390 } | |
| 391 elsif($from_state eq $state{footer} ) { | |
| 392 my $ok_to_pause = not $state eq $self->final_state; | |
| 393 $self->_process_footer( $ok_to_pause, @data ); | |
| 394 } | |
| 395 | |
| 396 $self->finalize_state_change( $state, 1 ); | |
| 397 } | |
| 398 } | |
| 399 | |
| 400 | |
| 401 sub _add_error { | |
| 402 my ($self, $err) = @_; | |
| 403 if( $err ) { | |
| 404 push @{$self->{'_blast_errs'}}, $err; | |
| 405 } | |
| 406 } | |
| 407 | |
| 408 sub _clear_errors { | |
| 409 my $self = shift; | |
| 410 $self->{'_blast_errs'} = undef; | |
| 411 } | |
| 412 | |
| 413 #--------- | |
| 414 sub errors { | |
| 415 #--------- | |
| 416 my $self = shift; | |
| 417 my @errs = (); | |
| 418 @errs = @{$self->{'_blast_errs'}} if ref $self->{'_blast_errs'}; | |
| 419 return @errs; | |
| 420 } | |
| 421 | |
| 422 | |
| 423 #---------------------- | |
| 424 sub _init_parse_params { | |
| 425 #---------------------- | |
| 426 #Initializes parameters used during parsing of Blast reports. | |
| 427 | |
| 428 my ( $self, @param ) = @_; | |
| 429 # -FILT_FUNC has been replaced by -HIT_FILTER. | |
| 430 # Leaving -FILT_FUNC in place for backward compatibility | |
| 431 my($signif, $filt_func, $hit_filter, $min_len, $check_all, | |
| 432 $gapped, $score, $overlap, $stats, $best, $shallow_parse, | |
| 433 $hold_raw) = | |
| 434 $self->_rearrange([qw(SIGNIF FILT_FUNC HIT_FILTER MIN_LEN | |
| 435 CHECK_ALL_HITS GAPPED SCORE | |
| 436 OVERLAP STATS BEST SHALLOW_PARSE HOLD_RAW_DATA)], @param); | |
| 437 | |
| 438 $self->{'_hit_filter'} = $hit_filter || $filt_func || 0; | |
| 439 $self->{'_check_all'} = $check_all || 0; | |
| 440 $self->{'_shallow_parse'} = $shallow_parse || 0; | |
| 441 $self->{'_hold_raw_data'} = $hold_raw || 0; | |
| 442 | |
| 443 $self->_set_signif($signif, $min_len, $self->{'_hit_filter'}, $score); | |
| 444 $self->best_hit_only($best) if $best; | |
| 445 $self->{'_blast_count'} = 0; | |
| 446 | |
| 447 $self->{'_collect_stats'} = defined($stats) ? $stats : 0; | |
| 448 | |
| 449 # TODO: Automatically determine whether gapping was used. | |
| 450 # e.g., based on version number. Otherwise, have to read params. | |
| 451 $self->{'_gapped'} = $gapped || 1; | |
| 452 | |
| 453 # Clear any errors from previous parse. | |
| 454 $self->_clear_errors; | |
| 455 undef $self->{'_hit_count'}; | |
| 456 undef $self->{'_num_hits_significant'}; | |
| 457 } | |
| 458 | |
| 459 #=head2 _set_signif | |
| 460 # | |
| 461 # Usage : n/a; called automatically by _init_parse_params() | |
| 462 # Purpose : Sets significance criteria for the BLAST object. | |
| 463 # Argument : Obligatory three arguments: | |
| 464 # : $signif = float or sci-notation number or undef | |
| 465 # : $min_len = integer or undef | |
| 466 # : $hit_filter = function reference or undef | |
| 467 # : | |
| 468 # : If $signif is undefined, a default value is set | |
| 469 # : (see $DEFAULT_SIGNIF; min_length = not set). | |
| 470 # Throws : Exception if significance value is defined but appears | |
| 471 # : out of range or invalid. | |
| 472 # : Exception if $hit_filter if defined and is not a func ref. | |
| 473 # Comments : The significance of a BLAST report can be based on | |
| 474 # : the P (or Expect) value and/or the length of the query sequence. | |
| 475 # : P (or Expect) values GREATER than '_max_significance' are not significant. | |
| 476 # : Query sequence lengths LESS than '_min_length' are not significant. | |
| 477 # : | |
| 478 # : Hits can also be screened using arbitrary significance criteria | |
| 479 # : as discussed in the parse() method. | |
| 480 # : | |
| 481 # : If no $signif is defined, the '_max_significance' level is set to | |
| 482 # : $DEFAULT_SIGNIF (999). | |
| 483 # | |
| 484 #See Also : L<signif>(), L<min_length>(), L<_init_parse_params>(), L<parse>() | |
| 485 # | |
| 486 #=cut | |
| 487 | |
| 488 #----------------- | |
| 489 sub _set_signif { | |
| 490 #----------------- | |
| 491 my( $self, $sig, $len, $func, $score ) = @_; | |
| 492 | |
| 493 if(defined $sig) { | |
| 494 $self->{'_confirm_significance'} = 1; | |
| 495 if( $sig =~ /[^\d.e-]/ or $sig <= 0) { | |
| 496 $self->throw(-class => 'Bio::Root::BadParameter', | |
| 497 -text => "Invalid significance value: $sig\n". | |
| 498 "Must be a number greater than zero."); | |
| 499 } | |
| 500 $self->{'_max_significance'} = $sig; | |
| 501 } else { | |
| 502 $self->{'_max_significance'} = $DEFAULT_SIGNIF; | |
| 503 } | |
| 504 | |
| 505 if(defined $score) { | |
| 506 $self->{'_confirm_significance'} = 1; | |
| 507 if( $score =~ /[^\de+]/ or $score <= 0) { | |
| 508 $self->throw(-class => 'Bio::Root::BadParameter', | |
| 509 -text => "Invalid score value: $score\n". | |
| 510 "Must be an integer greater than zero."); | |
| 511 } | |
| 512 $self->{'_min_score'} = $score; | |
| 513 } else { | |
| 514 $self->{'_min_score'} = $DEFAULT_SCORE; | |
| 515 } | |
| 516 | |
| 517 if(defined $len) { | |
| 518 if($len =~ /\D/ or $len <= 0) { | |
| 519 $self->warn("Invalid minimum length value: $len", | |
| 520 "Value must be an integer > 0. Value not set."); | |
| 521 } else { | |
| 522 $self->{'_min_length'} = $len; | |
| 523 } | |
| 524 } | |
| 525 | |
| 526 if(defined $func) { | |
| 527 $self->{'_check_all'} = 1; | |
| 528 $self->{'_confirm_significance'} = 1; | |
| 529 if($func and not ref $func eq 'CODE') { | |
| 530 $self->throw("Not a function reference: $func", | |
| 531 "The -hit_filter parameter must be function reference."); | |
| 532 } | |
| 533 } | |
| 534 } | |
| 535 | |
| 536 =head2 signif | |
| 537 | |
| 538 Synonym for L<max_significance()|max_significance> | |
| 539 | |
| 540 =cut | |
| 541 | |
| 542 #----------- | |
| 543 sub signif { shift->max_significance } | |
| 544 | |
| 545 | |
| 546 =head2 max_significance | |
| 547 | |
| 548 Usage : $obj->max_significance(); | |
| 549 Purpose : Gets the P or Expect value used as significance screening cutoff. | |
| 550 This is the value of the -signif parameter supplied to new(). | |
| 551 Hits with P or E-value above this are skipped. | |
| 552 Returns : Scientific notation number with this format: 1.0e-05. | |
| 553 Argument : n/a | |
| 554 Comments : Screening of significant hits uses the data provided on the | |
| 555 : description line. For NCBI BLAST1 and WU-BLAST, this data | |
| 556 : is P-value. for NCBI BLAST2 it is an Expect value. | |
| 557 | |
| 558 =cut | |
| 559 | |
| 560 #----------- | |
| 561 sub max_significance { | |
| 562 #----------- | |
| 563 my $self = shift; | |
| 564 my $sig = $self->{'_max_significance'}; | |
| 565 sprintf "%.1e", $sig; | |
| 566 } | |
| 567 | |
| 568 =head2 min_score | |
| 569 | |
| 570 Usage : $obj->min_score(); | |
| 571 Purpose : Gets the Blast score used as screening cutoff. | |
| 572 This is the value of the -score parameter supplied to new(). | |
| 573 Hits with scores below this are skipped. | |
| 574 Returns : Integer or scientific notation number. | |
| 575 Argument : n/a | |
| 576 Comments : Screening of significant hits uses the data provided on the | |
| 577 : description line. | |
| 578 | |
| 579 =cut | |
| 580 | |
| 581 #----------- | |
| 582 sub min_score { | |
| 583 #----------- | |
| 584 my $self = shift; | |
| 585 return $self->{'_min_score'}; | |
| 586 } | |
| 587 | |
| 588 =head2 min_length | |
| 589 | |
| 590 Usage : $obj->min_length(); | |
| 591 Purpose : Gets the query sequence length used as screening criteria. | |
| 592 This is the value of the -min_len parameter supplied to new(). | |
| 593 Hits with sequence length below this are skipped. | |
| 594 Returns : Integer | |
| 595 Argument : n/a | |
| 596 | |
| 597 See Also : L<signif()|signif> | |
| 598 | |
| 599 =cut | |
| 600 | |
| 601 #-------------- | |
| 602 sub min_length { | |
| 603 #-------------- | |
| 604 my $self = shift; | |
| 605 $self->{'_min_length'}; | |
| 606 } | |
| 607 | |
| 608 =head2 highest_signif | |
| 609 | |
| 610 Usage : $value = $obj->highest_signif(); | |
| 611 Purpose : Gets the largest significance (P- or E-value) observed in | |
| 612 the report. | |
| 613 : For NCBI BLAST1 and WU-BLAST, this is a P-value. | |
| 614 : For NCBI BLAST2 it is an Expect value. | |
| 615 Returns : Float or sci notation number | |
| 616 Argument : n/a | |
| 617 | |
| 618 =cut | |
| 619 | |
| 620 sub highest_signif { shift->{'_highestSignif'} } | |
| 621 | |
| 622 =head2 lowest_signif | |
| 623 | |
| 624 Usage : $value = $obj->lowest_signif(); | |
| 625 Purpose : Gets the largest significance (P- or E-value) observed in | |
| 626 the report. | |
| 627 : For NCBI BLAST1 and WU-BLAST, this is a P-value. | |
| 628 : For NCBI BLAST2 it is an Expect value. | |
| 629 Returns : Float or sci notation number | |
| 630 Argument : n/a | |
| 631 | |
| 632 =cut | |
| 633 | |
| 634 sub lowest_signif { shift->{'_lowestSignif'} } | |
| 635 | |
| 636 =head2 highest_score | |
| 637 | |
| 638 Usage : $value = $obj->highest_score(); | |
| 639 Purpose : Gets the largest BLAST score observed in the report. | |
| 640 Returns : Integer or sci notation number | |
| 641 Argument : n/a | |
| 642 | |
| 643 =cut | |
| 644 | |
| 645 sub highest_score { shift->{'_highestScore'} } | |
| 646 | |
| 647 =head2 lowest_score | |
| 648 | |
| 649 Usage : $value = $obj->lowest_score(); | |
| 650 Purpose : Gets the largest BLAST score observed in the report. | |
| 651 Returns : Integer or sci notation number | |
| 652 Argument : n/a | |
| 653 | |
| 654 =cut | |
| 655 | |
| 656 sub lowest_score { shift->{'_lowestScore'} } | |
| 657 | |
| 658 | |
| 659 # Throws : Exception if BLAST report contains a FATAL: error. | |
| 660 sub _process_header { | |
| 661 my ($self, @data) = @_; | |
| 662 | |
| 663 # print STDERR "Processing Header...\n"; | |
| 664 | |
| 665 $current_iteration = 0; | |
| 666 $self->{'_result_count'}++; | |
| 667 # Finish off the current Blast object, if any | |
| 668 my $blast = $self->{'_current_blast'} = $self->result_factory->create_result(); | |
| 669 | |
| 670 my ($set_method, $set_query, $set_db, $set_length); | |
| 671 my ($query_start, $query_desc); | |
| 672 | |
| 673 foreach my $line (@data) { | |
| 674 if( $line =~ /^(<.*>)?(T?BLAST[NPX])\s+(.*)$/ ) { | |
| 675 $blast->analysis_method( $2 ); | |
| 676 $blast->analysis_method_version( $3 ); | |
| 677 $set_method = 1; | |
| 678 } | |
| 679 elsif ($line =~ /^Query= *(.+)$/ ) { | |
| 680 $query_start = 1; | |
| 681 my $info = $1; | |
| 682 $info =~ s/TITLE //; | |
| 683 # Split the query line into two parts. | |
| 684 # Using \s instead of ' ' | |
| 685 $info =~ /(\S+?)\s+(.*)/; | |
| 686 # set name of Blast object and return. | |
| 687 $blast->query_name($1 || 'UNKNOWN'); | |
| 688 $query_desc = $2 || ''; | |
| 689 $set_query = 1; | |
| 690 } | |
| 691 elsif ($line =~ /^Database:\s+(.+)$/ ) { | |
| 692 require Bio::Search::GenericDatabase; | |
| 693 my $blastdb = Bio::Search::GenericDatabase->new( -name => $1 ); | |
| 694 $blast->analysis_subject( $blastdb ); | |
| 695 $set_db = 1; | |
| 696 } | |
| 697 elsif( $line =~ m/^\s+\(([\d|,]+) letters\)/ ) { | |
| 698 my $length = $1; | |
| 699 $length =~ s/,//g; | |
| 700 $self->_set_query_length( $length ); | |
| 701 $set_length = 1; | |
| 702 $blast->query_description( $query_desc ); | |
| 703 $query_start = 0; | |
| 704 } | |
| 705 elsif( $line =~ /WARNING: (.+?)/ ) { | |
| 706 $self->warn( $1 ); | |
| 707 } | |
| 708 elsif( $line =~ /FATAL: (.+?)/ ) { | |
| 709 $self->throw("FATAL BLAST Report Error: $1"); | |
| 710 } | |
| 711 # This needs to be the last elsif block. | |
| 712 elsif( $query_start ) { | |
| 713 # Handling multi-line query descriptions. | |
| 714 chomp( $line ); | |
| 715 $query_desc .= " $line"; | |
| 716 } | |
| 717 } | |
| 718 if (!$set_method) { | |
| 719 $self->throw("Can't determine type of BLAST program."); | |
| 720 } | |
| 721 if (!$set_query) { | |
| 722 $self->throw("Can't determine name of query sequence."); | |
| 723 } | |
| 724 if(!$set_db) { | |
| 725 $self->throw("Can't determine name of database."); | |
| 726 } | |
| 727 if(!$set_length) { | |
| 728 $self->throw("Can't determine sequence length from BLAST report."); | |
| 729 } | |
| 730 | |
| 731 } | |
| 732 | |
| 733 sub _process_descriptions { | |
| 734 my ($self, @data) = @_; | |
| 735 # print STDERR "Processing Descriptions...\n"; | |
| 736 | |
| 737 # Step through each line parsing out the P/Expect value | |
| 738 # All we really need to do is check the first one, if it doesn't | |
| 739 # meet the significance requirement, we can skip the report. | |
| 740 # BUT: we want to collect data for all hits anyway to get min/max signif. | |
| 741 | |
| 742 my $max_signif = $self->max_significance; | |
| 743 my $min_score = $self->min_score; | |
| 744 my $layout_set = $self->{'_layout'} || 0; | |
| 745 my ($layout, $sig, $hitid, $score, $is_p_value); | |
| 746 | |
| 747 if( $data[0] =~ /^\s*Sequences producing.*Score\s+P/i ) { | |
| 748 $is_p_value = 1; | |
| 749 } else { | |
| 750 $is_p_value = 0; | |
| 751 } | |
| 752 | |
| 753 my $hit_found_again; | |
| 754 | |
| 755 desc_loop: | |
| 756 foreach my $line (@data) { | |
| 757 last desc_loop if $line =~ / NONE |End of List|Results from round/; | |
| 758 next desc_loop if $line =~ /^\.\./; | |
| 759 | |
| 760 if($line =~ /^Sequences used in model/ ) { | |
| 761 #Sequences used in model and found again: | |
| 762 $hit_found_again = 1; | |
| 763 next; | |
| 764 } | |
| 765 elsif($line =~ /^Sequences not found previously/ ) { | |
| 766 #Sequences not found previously or not previously below threshold: | |
| 767 $hit_found_again = 0; | |
| 768 next; | |
| 769 } | |
| 770 | |
| 771 ## Checking the significance value (P- or Expect value) of the hit | |
| 772 ## in the description line. | |
| 773 | |
| 774 next desc_loop unless $line =~ /\d/; | |
| 775 | |
| 776 # TODO: These regexps need testing on a variety of reports. | |
| 777 if ( $line =~ /^(\S+)\s+.*\s+([\de.+-]+)\s{1,5}[\de.-]+\s*$/) { | |
| 778 $hitid = $1; | |
| 779 $score = $2; | |
| 780 $layout = 2; | |
| 781 } elsif( $line =~ /^(\S+)\s+.*\s+([\de.+-]+)\s{1,5}[\de.-]+\s{1,}\d+\s*$/) { | |
| 782 $hitid = $1; | |
| 783 $score = $2; | |
| 784 $layout = 1; | |
| 785 } else { | |
| 786 $self->warn("Can't parse description line\n $line"); | |
| 787 next desc_loop; | |
| 788 } | |
| 789 not $layout_set and ($self->_layout($layout), $layout_set = 1); | |
| 790 | |
| 791 $sig = &_parse_signif( $line, $layout, $self->{'_gapped'} ); | |
| 792 | |
| 793 # print STDERR " Parsed signif for $hitid: $sig (layout=$layout)\n"; | |
| 794 | |
| 795 $self->{'_hit_hash'}->{$hitid}->{'signif'} = $sig; | |
| 796 $self->{'_hit_hash'}->{$hitid}->{'score'} = $score; | |
| 797 $self->{'_hit_hash'}->{$hitid}->{'found_again'} = $hit_found_again; | |
| 798 $self->{'_hit_hash'}->{'is_pval'} = $is_p_value; | |
| 799 | |
| 800 last desc_loop if (not $self->{'_check_all'} and | |
| 801 ($sig > $max_signif or $score < $min_score)); | |
| 802 | |
| 803 $self->_process_significance($sig, $score); | |
| 804 } | |
| 805 | |
| 806 # printf "\n%d SIGNIFICANT HITS.\nDONE PARSING DESCRIPTIONS.\n", $self->{'_num_hits_significant'}; | |
| 807 } | |
| 808 | |
| 809 | |
| 810 #=head2 _set_query_length | |
| 811 # | |
| 812 # Usage : n/a; called automatically during Blast report parsing. | |
| 813 # Purpose : Sets the length of the query sequence (extracted from report). | |
| 814 # Returns : integer (length of the query sequence) | |
| 815 # Throws : Exception if cannot determine the query sequence length from | |
| 816 # : the BLAST report. | |
| 817 # : Exception if the length is below the min_length cutoff (if any). | |
| 818 # Comments : The logic here is a bit different from the other _set_XXX() | |
| 819 # : methods since the significance of the BLAST report is assessed | |
| 820 # : if MIN_LENGTH is set. | |
| 821 # | |
| 822 #=cut | |
| 823 | |
| 824 #--------------- | |
| 825 sub _set_query_length { | |
| 826 #--------------- | |
| 827 my ($self, $length) = @_; | |
| 828 | |
| 829 my($sig_len); | |
| 830 if(defined($self->{'_min_length'})) { | |
| 831 local $^W = 0; | |
| 832 if($length < $self->{'_min_len'}) { | |
| 833 $self->throw("Query sequence too short (Query= ${\$self->{'_current_blast'}->query_name}, length= $length)", | |
| 834 "Minimum length is $self->{'_min_len'}"); | |
| 835 } | |
| 836 } | |
| 837 | |
| 838 $self->{'_current_blast'}->query_length($length); | |
| 839 } | |
| 840 | |
| 841 | |
| 842 # Records the highest and lowest significance (P- or E-value) and | |
| 843 # score encountered in a given report. | |
| 844 sub _set_hi_low_signif_and_score { | |
| 845 my($self, $sig, $score) = @_; | |
| 846 | |
| 847 my $hiSig = $self->{'_highestSignif'} || 0; | |
| 848 my $lowSig = $self->{'_lowestSignif'} || $DEFAULT_SIGNIF; | |
| 849 my $hiScore = $self->{'_highestScore'} || 0; | |
| 850 my $lowScore = $self->{'_lowestScore'} || $DEFAULT_SIGNIF; | |
| 851 | |
| 852 $self->{'_highestSignif'} = ($sig > $hiSig) | |
| 853 ? $sig : $hiSig; | |
| 854 | |
| 855 $self->{'_lowestSignif'} = ($sig < $lowSig) | |
| 856 ? $sig : $lowSig; | |
| 857 | |
| 858 $self->{'_highestScore'} = ($score > $hiScore) | |
| 859 ? $score : $hiScore; | |
| 860 | |
| 861 $self->{'_lowestScore'} = ($score < $lowScore) | |
| 862 ? $score : $lowScore; | |
| 863 } | |
| 864 | |
| 865 | |
| 866 sub _process_significance { | |
| 867 my($self, $sig, $score) = @_; | |
| 868 | |
| 869 $self->_set_hi_low_signif_and_score($sig, $score); | |
| 870 | |
| 871 # Significance value assessment. | |
| 872 if($sig <= $self->{'_max_significance'} and $score >= $self->{'_min_score'}) { | |
| 873 $self->{'_num_hits_significant'}++; | |
| 874 } | |
| 875 $self->{'_num_hits'}++; | |
| 876 | |
| 877 $self->{'_is_significant'} = 1 if $self->{'_num_hits_significant'}; | |
| 878 } | |
| 879 | |
| 880 #=head2 _layout | |
| 881 # | |
| 882 # Usage : n/a; internal method. | |
| 883 # Purpose : Set/Get indicator for the layout of the report. | |
| 884 # Returns : Integer (1 | 2) | |
| 885 # : Defaults to 2 if not set. | |
| 886 # Comments : Blast1 and WashU-Blast2 have a layout = 1. | |
| 887 # : This is intended for internal use by this and closely | |
| 888 # : allied modules like BlastHit.pm and BlastHSP.pm. | |
| 889 # | |
| 890 #=cut | |
| 891 | |
| 892 #------------ | |
| 893 sub _layout { | |
| 894 #------------ | |
| 895 my $self = shift; | |
| 896 if(@_) { | |
| 897 $self->{'_layout'} = shift; | |
| 898 } | |
| 899 $self->{'_layout'} || 2; | |
| 900 } | |
| 901 | |
| 902 #=head2 _parse_signif | |
| 903 # | |
| 904 # Usage : $signif = _parse_signif(string, layout, gapped); | |
| 905 # : This is a class function. | |
| 906 # Purpose : Extracts the P- or Expect value from a single line of a BLAST description section. | |
| 907 # Example : _parse_signif("PDB_UNIQUEP:3HSC_ heat-shock cognate ... 799 4.0e-206 2", 1); | |
| 908 # : _parse_signif("gi|758803 (U23828) peritrophin-95 precurs 38 0.19", 2); | |
| 909 # Argument : string = line from BLAST description section | |
| 910 # : layout = integer (1 or 2) | |
| 911 # : gapped = boolean (true if gapped Blast). | |
| 912 # Returns : Float (0.001 or 1e-03) | |
| 913 # Status : Static | |
| 914 # | |
| 915 #=cut | |
| 916 | |
| 917 #------------------ | |
| 918 sub _parse_signif { | |
| 919 #------------------ | |
| 920 my ($line, $layout, $gapped) = @_; | |
| 921 | |
| 922 local $_ = $line; | |
| 923 my @linedat = split(); | |
| 924 | |
| 925 # When processing both Blast1 and Blast2 reports | |
| 926 # in the same run, offset needs to be configured each time. | |
| 927 # NOTE: We likely won't be supporting mixed report streams. Not needed. | |
| 928 | |
| 929 my $offset = 0; | |
| 930 $offset = 1 if $layout == 1 or not $gapped; | |
| 931 | |
| 932 my $signif = $linedat[ $#linedat - $offset ]; | |
| 933 | |
| 934 # fail-safe check | |
| 935 if(not $signif =~ /[.-]/) { | |
| 936 $offset = ($offset == 0 ? 1 : 0); | |
| 937 $signif = $linedat[ $#linedat - $offset ]; | |
| 938 } | |
| 939 | |
| 940 $signif = "1$signif" if $signif =~ /^e/i; | |
| 941 return $signif; | |
| 942 } | |
| 943 | |
| 944 | |
| 945 =head2 best_hit_only | |
| 946 | |
| 947 Usage : print "only getting best hit.\n" if $obj->best_hit_only(); | |
| 948 Purpose : Set/Get the indicator for whether or not to processing only | |
| 949 : the best BlastHit. | |
| 950 Returns : Boolean (1 | 0) | |
| 951 Argument : n/a | |
| 952 | |
| 953 =cut | |
| 954 | |
| 955 #---------- | |
| 956 sub best_hit_only { | |
| 957 #---------- | |
| 958 my $self = shift; | |
| 959 if(@_) { $self->{'_best'} = shift; } | |
| 960 $self->{'_best'}; | |
| 961 } | |
| 962 | |
| 963 sub _process_alignment { | |
| 964 my ($self, @data) = @_; | |
| 965 # print STDERR "Processing Alignment...\n"; | |
| 966 | |
| 967 # If all of the significant hits have been parsed, | |
| 968 # return if we're not checking all or if we don't need to get | |
| 969 # the Blast stats (parameters at footer of report). | |
| 970 if(defined $self->{'_hit_count'} and | |
| 971 defined $self->{'_num_hits_significant'}) { | |
| 972 return if $self->{'_hit_count'} >= $self->{'_num_hits_significant'} and | |
| 973 not ($self->{'_check_all'} or $self->{'_collect_stats'}); | |
| 974 } | |
| 975 | |
| 976 # Return if we're only interested in the best hit. | |
| 977 # This has to occur after checking for the parameters section | |
| 978 # in the footer (since we may still be interested in them). | |
| 979 return if $self->best_hit_only and ( defined $self->{'_hit_count'} and $self->{'_hit_count'} >=1); | |
| 980 | |
| 981 push @data, 'end'; | |
| 982 | |
| 983 # print STDERR "\nALIGNMENT DATA:\n@data\n"; | |
| 984 | |
| 985 my $max_signif = $self->max_significance; | |
| 986 my $min_score = $self->min_score; | |
| 987 | |
| 988 my ($hitid, $score, $signif, $is_pval, $found_again); | |
| 989 if( $data[0] =~ /^(\S+)\s+/ ) { | |
| 990 $hitid = $1; | |
| 991 return unless defined $self->{'_hit_hash'}->{$hitid}; | |
| 992 $score = $self->{'_hit_hash'}->{$hitid}->{'score'}; | |
| 993 $signif = $self->{'_hit_hash'}->{$hitid}->{'signif'}; | |
| 994 $found_again = $self->{'_hit_hash'}->{$hitid}->{'found_again'}; | |
| 995 $is_pval = $self->{'_hit_hash'}->{'is_pval'}; | |
| 996 # print STDERR " Got hitid: $hitid ($signif, $score, P?=$is_pval)\n"; | |
| 997 } | |
| 998 | |
| 999 # Now construct the BlastHit objects from the alignment section | |
| 1000 | |
| 1001 # debug(1); | |
| 1002 | |
| 1003 $self->{'_hit_count'}++; | |
| 1004 | |
| 1005 # If not confirming significance, _process_descriptions will not have been run, | |
| 1006 # so we need to count the total number of hits here. | |
| 1007 if( not $self->{'_confirm_significance'}) { | |
| 1008 $self->{'_num_hits'}++; | |
| 1009 } | |
| 1010 | |
| 1011 my %hit_params = ( -RESULT => $self->{'_current_blast'}, | |
| 1012 -RAW_DATA =>\@data, | |
| 1013 -SIGNIF => $signif, | |
| 1014 -IS_PVAL => $is_pval, | |
| 1015 -SCORE => $score, | |
| 1016 -RANK => $self->{'_hit_count'}, | |
| 1017 -RANK_BY => 'order', | |
| 1018 -OVERLAP => $self->{'_overlap'} || $MAX_HSP_OVERLAP, | |
| 1019 -FOUND_AGAIN => $found_again, | |
| 1020 -SHALLOW_PARSE => $self->{'_shallow_parse'}, | |
| 1021 -HOLD_RAW_DATA => $self->{'_hold_raw_data'}, | |
| 1022 ); | |
| 1023 | |
| 1024 my $hit; | |
| 1025 $hit = $self->hit_factory->create_hit( %hit_params ); | |
| 1026 | |
| 1027 # printf STDERR "NEW HIT: %s, SIGNIFICANCE = %g\n", $hit->name, $hit->expect; <STDIN>; | |
| 1028 # The BLAST report may have not had a description section. | |
| 1029 if(not $self->{'_has_descriptions'}) { | |
| 1030 $self->_process_significance($hit->signif, $score); | |
| 1031 } | |
| 1032 | |
| 1033 # Collect overall signif data if we don't already have it, | |
| 1034 # (as occurs if no -signif or -score parameter are supplied). | |
| 1035 my $hit_signif = $hit->signif; | |
| 1036 | |
| 1037 if (not $self->{'_confirm_significance'} ) { | |
| 1038 $self->_set_hi_low_signif_and_score($hit_signif, $score); | |
| 1039 } | |
| 1040 | |
| 1041 # Test significance using custom function (if supplied) | |
| 1042 my $add_hit = 0; | |
| 1043 | |
| 1044 my $hit_filter = $self->{'_hit_filter'} || 0; | |
| 1045 | |
| 1046 if($hit_filter) { | |
| 1047 if(&$hit_filter($hit)) { | |
| 1048 $add_hit = 1; | |
| 1049 } | |
| 1050 } elsif($hit_signif <= $max_signif and $score >= $min_score) { | |
| 1051 $add_hit = 1; | |
| 1052 } | |
| 1053 $add_hit && $self->{'_current_blast'}->add_hit( $hit ); | |
| 1054 } | |
| 1055 | |
| 1056 | |
| 1057 sub _process_footer { | |
| 1058 my ($self, $ok_to_pause, @data) = @_; | |
| 1059 # print STDERR "Processing Footer...\n"; | |
| 1060 | |
| 1061 $self->{'_current_blast'}->raw_statistics( [@data] ); | |
| 1062 | |
| 1063 if($self->{'_collect_stats'}) { | |
| 1064 foreach my $line (@data) { | |
| 1065 if( $line =~ /^\s*Matrix:\s*(\S+)/i ) { | |
| 1066 $self->{'_current_blast'}->matrix( $1 ); | |
| 1067 } | |
| 1068 elsif( $line =~ /^\s*Number of Sequences:\s*(\d+)/i ) { | |
| 1069 $self->{'_current_blast'}->analysis_subject->entries( $1 ); | |
| 1070 } | |
| 1071 elsif( $line =~ /^\s*length of database:\s*(\d+)/i ) { | |
| 1072 $self->{'_current_blast'}->analysis_subject->letters( $1 ); | |
| 1073 } | |
| 1074 elsif( $line =~ /^\s*Posted date:\s*(.+)$/i ) { | |
| 1075 $self->{'_current_blast'}->analysis_subject->date( $1 ); | |
| 1076 } | |
| 1077 } | |
| 1078 } | |
| 1079 | |
| 1080 if( $self->errors ) { | |
| 1081 my $num_err = scalar($self->errors); | |
| 1082 $self->warn( "$num_err Blast parsing errors occurred."); | |
| 1083 foreach( $self->errors ) { print STDERR "$_\n"; }; | |
| 1084 } | |
| 1085 | |
| 1086 if( $self->{'_pause_between_reports'} and $ok_to_pause ) { | |
| 1087 $self->pause; | |
| 1088 } | |
| 1089 | |
| 1090 } | |
| 1091 | |
| 1092 sub _process_nohits { | |
| 1093 my $self = shift; | |
| 1094 # print STDERR "Processing No Hits (iteration = $current_iteration)\n"; | |
| 1095 $self->{'_current_blast'}->set_no_hits_found( $current_iteration ); | |
| 1096 } | |
| 1097 | |
| 1098 | |
| 1099 sub _process_iteration { | |
| 1100 my ($self, @data) = @_; | |
| 1101 # print STDERR "Processing Iteration\n"; | |
| 1102 # print STDERR " Incrementing current iteration (was=$current_iteration)\n"; | |
| 1103 $current_iteration++; | |
| 1104 $self->{'_current_blast'}->iterations( $current_iteration ); | |
| 1105 | |
| 1106 foreach( @data ) { | |
| 1107 if( /Results from round \d+/i ) { | |
| 1108 $self->{'_current_blast'}->psiblast( 1 ); | |
| 1109 } | |
| 1110 elsif( /No hits found/i ) { | |
| 1111 $self->_process_nohits(); | |
| 1112 last; | |
| 1113 } | |
| 1114 elsif( /^\s*Sequences/i ) { | |
| 1115 $self->_process_descriptions( @data ); | |
| 1116 last; | |
| 1117 } | |
| 1118 } | |
| 1119 } | |
| 1120 | |
| 1121 sub pause_between_reports { | |
| 1122 my ($self, $setting) = @_; | |
| 1123 if( defined $setting ) { | |
| 1124 $self->{'_pause_between_reports'} = $setting; | |
| 1125 } | |
| 1126 $self->{'_pause_between_reports'}; | |
| 1127 } | |
| 1128 | |
| 1129 sub result_count { | |
| 1130 my $self = shift; | |
| 1131 return $self->{'_result_count'}; | |
| 1132 } | |
| 1133 | |
| 1134 # For backward compatibility: | |
| 1135 sub report_count { shift->result_count } | |
| 1136 | |
| 1137 sub next_result { | |
| 1138 my ($self) = @_; | |
| 1139 # print STDERR "next_result() called\n"; | |
| 1140 if( not $self->running ) { | |
| 1141 $self->run; | |
| 1142 } | |
| 1143 else { | |
| 1144 $self->resume; | |
| 1145 } | |
| 1146 my $blast = $self->{'_current_blast'}; | |
| 1147 $self->{'_current_blast'} = undef; | |
| 1148 return $blast; | |
| 1149 } | |
| 1150 | |
| 1151 =head2 write_result | |
| 1152 | |
| 1153 Title : write_result | |
| 1154 Usage : $stream->write_result($result_result, @other_args) | |
| 1155 Function: Writes data from the $result_result object into the stream. | |
| 1156 : Delegates to the to_string() method of the associated | |
| 1157 : WriterI object. | |
| 1158 Returns : 1 for success and 0 for error | |
| 1159 Args : Bio::Search:Result::ResultI object, | |
| 1160 : plus any other arguments for the Writer | |
| 1161 Throws : Bio::Root::Exception if a Writer has not been set. | |
| 1162 | |
| 1163 See L<Bio::Root::Exception> | |
| 1164 | |
| 1165 =cut | |
| 1166 | |
| 1167 sub write_result { | |
| 1168 my ($self, $blast, @args) = @_; | |
| 1169 | |
| 1170 if( not defined($self->writer) ) { | |
| 1171 $self->warn("Writer not defined. Using a $DEFAULT_BLAST_WRITER_CLASS"); | |
| 1172 $self->writer( $DEFAULT_BLAST_WRITER_CLASS->new() ); | |
| 1173 } | |
| 1174 $self->SUPER::write_result( $blast, @args ); | |
| 1175 } | |
| 1176 | |
| 1177 | |
| 1178 | |
| 1179 1; | |
| 1180 __END__ | |
| 1181 |
