Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/SearchIO.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 # $Id: SearchIO.pm,v 1.18 2002/12/13 13:54:03 jason Exp $ | |
| 2 # | |
| 3 # BioPerl module for Bio::SearchIO | |
| 4 # | |
| 5 # Cared for by Jason Stajich <jason@bioperl.org> | |
| 6 # | |
| 7 # Copyright Jason Stajich | |
| 8 # | |
| 9 # You may distribute this module under the same terms as perl itself | |
| 10 | |
| 11 # POD documentation - main docs before the code | |
| 12 | |
| 13 =head1 NAME | |
| 14 | |
| 15 Bio::SearchIO - Driver for parsing Sequence Database Searches (Blast,FASTA,...) | |
| 16 | |
| 17 =head1 SYNOPSIS | |
| 18 | |
| 19 use Bio::SearchIO; | |
| 20 # format can be 'fasta', 'blast' | |
| 21 my $searchio = new Bio::SearchIO( -format => 'blastxml', | |
| 22 -file => 'blastout.xml' ); | |
| 23 while ( my $result = $searchio->next_result() ) { | |
| 24 while( my $hit = $result->next_hit ) { | |
| 25 # process the Bio::Search::Hit::HitI object | |
| 26 while( my $hsp = $hit->next_hsp ) { | |
| 27 # process the Bio::Search::HSP::HSPI object | |
| 28 } | |
| 29 } | |
| 30 | |
| 31 =head1 DESCRIPTION | |
| 32 | |
| 33 This is a driver for instantiating a parser for report files from | |
| 34 sequence database searches. This object serves as a wrapper for the | |
| 35 format parsers in Bio::SearchIO::* - you should not need to ever | |
| 36 use those format parsers directly. (For people used to the SeqIO | |
| 37 system it, we are deliberately using the same pattern). | |
| 38 | |
| 39 Once you get a SearchIO object, calling next_result() gives you back | |
| 40 a L<Bio::Search::Result::ResultI> compliant object, which is an object that | |
| 41 represents one Blast/Fasta/HMMER whatever report. | |
| 42 | |
| 43 =head1 FEEDBACK | |
| 44 | |
| 45 =head2 Mailing Lists | |
| 46 | |
| 47 User feedback is an integral part of the evolution of this and other | |
| 48 Bioperl modules. Send your comments and suggestions preferably to | |
| 49 the Bioperl mailing list. Your participation is much appreciated. | |
| 50 | |
| 51 bioperl-l@bioperl.org - General discussion | |
| 52 http://bioperl.org/MailList.shtml - About the mailing lists | |
| 53 | |
| 54 =head2 Reporting Bugs | |
| 55 | |
| 56 Report bugs to the Bioperl bug tracking system to help us keep track | |
| 57 of the bugs and their resolution. Bug reports can be submitted via | |
| 58 email or the web: | |
| 59 | |
| 60 bioperl-bugs@bioperl.org | |
| 61 http://bugzilla.bioperl.org/ | |
| 62 | |
| 63 =head1 AUTHOR - Jason Stajich & Steve Chervitz | |
| 64 | |
| 65 Email jason@bioperl.org | |
| 66 Email sac@bioperl.org | |
| 67 | |
| 68 =head1 CONTRIBUTORS | |
| 69 | |
| 70 Additional contributors names and emails here | |
| 71 | |
| 72 =head1 APPENDIX | |
| 73 | |
| 74 The rest of the documentation details each of the object methods. | |
| 75 Internal methods are usually preceded with a _ | |
| 76 | |
| 77 =cut | |
| 78 | |
| 79 | |
| 80 # Let the code begin... | |
| 81 | |
| 82 | |
| 83 package Bio::SearchIO; | |
| 84 use strict; | |
| 85 use vars qw(@ISA); | |
| 86 | |
| 87 # Object preamble - inherits from Bio::Root::IO | |
| 88 | |
| 89 use Bio::Root::IO; | |
| 90 use Bio::Event::EventGeneratorI; | |
| 91 use Bio::SearchIO::SearchResultEventBuilder; | |
| 92 use Bio::AnalysisParserI; | |
| 93 use Symbol(); | |
| 94 | |
| 95 @ISA = qw( Bio::Root::IO Bio::Event::EventGeneratorI Bio::AnalysisParserI); | |
| 96 | |
| 97 =head2 new | |
| 98 | |
| 99 Title : new | |
| 100 Usage : my $obj = new Bio::SearchIO(); | |
| 101 Function: Builds a new Bio::SearchIO object | |
| 102 Returns : Bio::SearchIO initialized with the correct format | |
| 103 Args : Args : -file => $filename | |
| 104 -format => format | |
| 105 -fh => filehandle to attach to | |
| 106 -result_factory => Object implementing Bio::Factory::ResultFactoryI | |
| 107 -hit_factory => Object implementing Bio::Factory::HitFactoryI | |
| 108 -writer => Object implementing Bio::SearchIO::SearchWriterI | |
| 109 -output_format => output format, which will dynamically load writer | |
| 110 | |
| 111 See L<Bio::Factory::ResultFactoryI>, L<Bio::Factory::HitFactoryI>, | |
| 112 L<Bio::SearchIO::SearchWriterI> | |
| 113 | |
| 114 =cut | |
| 115 | |
| 116 sub new { | |
| 117 my($caller,@args) = @_; | |
| 118 my $class = ref($caller) || $caller; | |
| 119 | |
| 120 # or do we want to call SUPER on an object if $caller is an | |
| 121 # object? | |
| 122 if( $class =~ /Bio::SearchIO::(\S+)/ ) { | |
| 123 my ($self) = $class->SUPER::new(@args); | |
| 124 $self->_initialize(@args); | |
| 125 return $self; | |
| 126 } else { | |
| 127 my %param = @args; | |
| 128 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys | |
| 129 my $format = $param{'-format'} || | |
| 130 $class->_guess_format( $param{'-file'} || $ARGV[0] ) || 'blast'; | |
| 131 | |
| 132 my $output_format = $param{'-output_format'}; | |
| 133 my $writer = undef; | |
| 134 | |
| 135 if( defined $output_format ) { | |
| 136 if( defined $param{'-writer'} ) { | |
| 137 my $dummy = Bio::Root::Root->new(); | |
| 138 $dummy->throw("Both writer and output format specified - not good"); | |
| 139 } | |
| 140 | |
| 141 if( $output_format =~ /^blast$/i ) { | |
| 142 $output_format = 'TextResultWriter'; | |
| 143 } | |
| 144 my $output_module = "Bio::SearchIO::Writer::".$output_format; | |
| 145 $class->_load_module($output_module); | |
| 146 $writer = $output_module->new(); | |
| 147 push(@args,"-writer",$writer); | |
| 148 } | |
| 149 | |
| 150 | |
| 151 # normalize capitalization to lower case | |
| 152 $format = "\L$format"; | |
| 153 | |
| 154 return undef unless( $class->_load_format_module($format) ); | |
| 155 return "Bio::SearchIO::${format}"->new(@args); | |
| 156 } | |
| 157 } | |
| 158 | |
| 159 =head2 newFh | |
| 160 | |
| 161 Title : newFh | |
| 162 Usage : $fh = Bio::SearchIO->newFh(-file=>$filename, | |
| 163 -format=>'Format') | |
| 164 Function: does a new() followed by an fh() | |
| 165 Example : $fh = Bio::SearchIO->newFh(-file=>$filename, | |
| 166 -format=>'Format') | |
| 167 $result = <$fh>; # read a ResultI object | |
| 168 print $fh $result; # write a ResultI object | |
| 169 Returns : filehandle tied to the Bio::SearchIO::Fh class | |
| 170 Args : | |
| 171 | |
| 172 =cut | |
| 173 | |
| 174 sub newFh { | |
| 175 my $class = shift; | |
| 176 return unless my $self = $class->new(@_); | |
| 177 return $self->fh; | |
| 178 } | |
| 179 | |
| 180 =head2 fh | |
| 181 | |
| 182 Title : fh | |
| 183 Usage : $obj->fh | |
| 184 Function: | |
| 185 Example : $fh = $obj->fh; # make a tied filehandle | |
| 186 $result = <$fh>; # read a ResultI object | |
| 187 print $fh $result; # write a ResultI object | |
| 188 Returns : filehandle tied to the Bio::SearchIO::Fh class | |
| 189 Args : | |
| 190 | |
| 191 =cut | |
| 192 | |
| 193 | |
| 194 sub fh { | |
| 195 my $self = shift; | |
| 196 my $class = ref($self) || $self; | |
| 197 my $s = Symbol::gensym; | |
| 198 tie $$s,$class,$self; | |
| 199 return $s; | |
| 200 } | |
| 201 | |
| 202 =head2 attach_EventHandler | |
| 203 | |
| 204 Title : attach_EventHandler | |
| 205 Usage : $parser->attatch_EventHandler($handler) | |
| 206 Function: Adds an event handler to listen for events | |
| 207 Returns : none | |
| 208 Args : Bio::SearchIO::EventHandlerI | |
| 209 | |
| 210 See L<Bio::SearchIO::EventHandlerI> | |
| 211 | |
| 212 =cut | |
| 213 | |
| 214 sub attach_EventHandler{ | |
| 215 my ($self,$handler) = @_; | |
| 216 return if( ! $handler ); | |
| 217 if( ! $handler->isa('Bio::SearchIO::EventHandlerI') ) { | |
| 218 $self->warn("Ignoring request to attatch handler ".ref($handler). ' because it is not a Bio::SearchIO::EventHandlerI'); | |
| 219 } | |
| 220 $self->{'_handler'} = $handler; | |
| 221 return; | |
| 222 } | |
| 223 | |
| 224 =head2 _eventHandler | |
| 225 | |
| 226 Title : _eventHandler | |
| 227 Usage : private | |
| 228 Function: Get the EventHandler | |
| 229 Returns : Bio::SearchIO::EventHandlerI | |
| 230 Args : none | |
| 231 | |
| 232 See L<Bio::SearchIO::EventHandlerI> | |
| 233 | |
| 234 =cut | |
| 235 | |
| 236 sub _eventHandler{ | |
| 237 my ($self) = @_; | |
| 238 return $self->{'_handler'}; | |
| 239 } | |
| 240 | |
| 241 sub _initialize { | |
| 242 my($self, @args) = @_; | |
| 243 $self->{'_handler'} = undef; | |
| 244 # not really necessary unless we put more in RootI | |
| 245 #$self->SUPER::_initialize(@args); | |
| 246 | |
| 247 # initialize the IO part | |
| 248 $self->_initialize_io(@args); | |
| 249 $self->attach_EventHandler(new Bio::SearchIO::SearchResultEventBuilder()); | |
| 250 $self->{'_reporttype'} = ''; | |
| 251 | |
| 252 my ( $writer, $rfactory, $hfactory, $use_factories ) = | |
| 253 $self->_rearrange([qw(WRITER | |
| 254 RESULT_FACTORY | |
| 255 HIT_FACTORY | |
| 256 USE_FACTORIES)], @args); | |
| 257 | |
| 258 $self->writer( $writer ) if $writer; | |
| 259 | |
| 260 # TODO: Resolve this issue: | |
| 261 # The $use_factories flag is a temporary hack to allow factory-based and | |
| 262 # non-factory based SearchIO objects to co-exist. | |
| 263 # steve --- Sat Dec 22 04:41:20 2001 | |
| 264 if( $use_factories) { | |
| 265 if( not defined $self->{'_result_factory'}) { | |
| 266 $self->result_factory( $rfactory || $self->default_result_factory_class->new ); | |
| 267 } | |
| 268 if( not defined $self->{'_hit_factory'}) { | |
| 269 $self->hit_factory( $hfactory || $self->default_hit_factory_class->new ); | |
| 270 } | |
| 271 } | |
| 272 } | |
| 273 | |
| 274 =head2 next_result | |
| 275 | |
| 276 Title : next_result | |
| 277 Usage : $result = stream->next_result | |
| 278 Function: Reads the next ResultI object from the stream and returns it. | |
| 279 | |
| 280 Certain driver modules may encounter entries in the stream that | |
| 281 are either misformatted or that use syntax not yet understood | |
| 282 by the driver. If such an incident is recoverable, e.g., by | |
| 283 dismissing a feature of a feature table or some other non-mandatory | |
| 284 part of an entry, the driver will issue a warning. In the case | |
| 285 of a non-recoverable situation an exception will be thrown. | |
| 286 Do not assume that you can resume parsing the same stream after | |
| 287 catching the exception. Note that you can always turn recoverable | |
| 288 errors into exceptions by calling $stream->verbose(2) (see | |
| 289 Bio::Root::RootI POD page). | |
| 290 Returns : A Bio::Search::Result::ResultI object | |
| 291 Args : n/a | |
| 292 | |
| 293 See L<Bio::Root::RootI> | |
| 294 | |
| 295 =cut | |
| 296 | |
| 297 sub next_result { | |
| 298 my ($self) = @_; | |
| 299 $self->throw_not_implemented; | |
| 300 } | |
| 301 | |
| 302 =head2 write_result | |
| 303 | |
| 304 Title : write_result | |
| 305 Usage : $stream->write_result($result_result, @other_args) | |
| 306 Function: Writes data from the $result_result object into the stream. | |
| 307 : Delegates to the to_string() method of the associated | |
| 308 : WriterI object. | |
| 309 Returns : 1 for success and 0 for error | |
| 310 Args : Bio::Search:Result::ResultI object, | |
| 311 : plus any other arguments for the Writer | |
| 312 Throws : Bio::Root::Exception if a Writer has not been set. | |
| 313 | |
| 314 See L<Bio::Root::Exception> | |
| 315 | |
| 316 =cut | |
| 317 | |
| 318 sub write_result { | |
| 319 my ($self, $result, @args) = @_; | |
| 320 | |
| 321 if( not ref($self->{'_result_writer'}) ) { | |
| 322 $self->throw("ResultWriter not defined."); | |
| 323 } | |
| 324 my $str = $self->writer->to_string( $result, @args ); | |
| 325 #print "Got string: \n$str\n"; | |
| 326 $self->_print( "$str" ); | |
| 327 | |
| 328 return 1; | |
| 329 } | |
| 330 | |
| 331 | |
| 332 =head2 writer | |
| 333 | |
| 334 Title : writer | |
| 335 Usage : $writer = $stream->writer; | |
| 336 Function: Sets/Gets a SearchWriterI object to be used for this searchIO. | |
| 337 Returns : 1 for success and 0 for error | |
| 338 Args : Bio::SearchIO::SearchWriterI object (when setting) | |
| 339 Throws : Bio::Root::Exception if a non-Bio::SearchIO::SearchWriterI object | |
| 340 is passed in. | |
| 341 | |
| 342 =cut | |
| 343 | |
| 344 sub writer { | |
| 345 my ($self, $writer) = @_; | |
| 346 if( ref($writer) and $writer->isa( 'Bio::SearchIO::SearchWriterI' )) { | |
| 347 $self->{'_result_writer'} = $writer; | |
| 348 } | |
| 349 elsif( defined $writer ) { | |
| 350 $self->throw("Can't set ResultWriter. Not a Bio::SearchIO::SearchWriterI: $writer"); | |
| 351 } | |
| 352 return $self->{'_result_writer'}; | |
| 353 } | |
| 354 | |
| 355 | |
| 356 =head2 hit_factory | |
| 357 | |
| 358 Title : hit_factory | |
| 359 Usage : $hit_factory = $stream->hit_factory; (get) | |
| 360 : $stream->hit_factory( $factory ); (set) | |
| 361 Function: Sets/Gets a factory object to create hit objects for this SearchIO | |
| 362 Returns : Bio::Factory::HitFactoryI object | |
| 363 Args : Bio::Factory::HitFactoryI object (when setting) | |
| 364 Throws : Bio::Root::Exception if a non-Bio::Factory::HitFactoryI object | |
| 365 is passed in. | |
| 366 Comments: A SearchIO implementation should provide a default hit factory. | |
| 367 | |
| 368 See L<Bio::Factory::HitFactoryI> | |
| 369 | |
| 370 =cut | |
| 371 | |
| 372 sub hit_factory { | |
| 373 my ($self, $fact) = @_; | |
| 374 if( ref $fact and $fact->isa( 'Bio::Factory::HitFactoryI' )) { | |
| 375 $self->{'_hit_factory'} = $fact; | |
| 376 } | |
| 377 elsif( defined $fact ) { | |
| 378 $self->throw("Can't set HitFactory. Not a Bio::Factory::HitFactoryI: $fact"); | |
| 379 } | |
| 380 return $self->{'_hit_factory'}; | |
| 381 } | |
| 382 | |
| 383 =head2 result_factory | |
| 384 | |
| 385 Title : result_factory | |
| 386 Usage : $result_factory = $stream->result_factory; (get) | |
| 387 : $stream->result_factory( $factory ); (set) | |
| 388 Function: Sets/Gets a factory object to create result objects for this | |
| 389 SearchIO. | |
| 390 Returns : Bio::Factory::ResultFactoryI object | |
| 391 Args : Bio::Factory::ResultFactoryI object (when setting) | |
| 392 Throws : Bio::Root::Exception if a non-Bio::Factory::ResultFactoryI object | |
| 393 is passed in. | |
| 394 Comments: A SearchIO implementation should provide a default result factory. | |
| 395 | |
| 396 See L<Bio::Factory::ResultFactoryI> | |
| 397 | |
| 398 =cut | |
| 399 | |
| 400 sub result_factory { | |
| 401 my ($self, $fact) = @_; | |
| 402 if( ref $fact and $fact->isa( 'Bio::Factory::ResultFactoryI' )) { | |
| 403 $self->{'_result_factory'} = $fact; | |
| 404 } | |
| 405 elsif( defined $fact ) { | |
| 406 $self->throw("Can't set ResultFactory. Not a Bio::Factory::ResultFactoryI: $fact"); | |
| 407 } | |
| 408 return $self->{'_result_factory'}; | |
| 409 } | |
| 410 | |
| 411 | |
| 412 =head2 result_count | |
| 413 | |
| 414 Title : result_count | |
| 415 Usage : $num = $stream->result_count; | |
| 416 Function: Gets the number of Blast results that have been parsed. | |
| 417 Returns : integer | |
| 418 Args : none | |
| 419 Throws : none | |
| 420 | |
| 421 =cut | |
| 422 | |
| 423 sub result_count { | |
| 424 my $self = shift; | |
| 425 $self->throw_not_implemented; | |
| 426 } | |
| 427 | |
| 428 | |
| 429 =head2 default_hit_factory_class | |
| 430 | |
| 431 Title : default_hit_factory_class | |
| 432 Usage : $res_factory = $obj->default_hit_factory_class()->new( @args ) | |
| 433 Function: Returns the name of the default class to be used for creating | |
| 434 Bio::Search::Hit::HitI objects. | |
| 435 Example : | |
| 436 Returns : A string containing the name of a class that implements | |
| 437 the Bio::Search::Hit::HitI interface. | |
| 438 Args : none | |
| 439 Comments: Bio::SearchIO does not implement this method. It throws a | |
| 440 NotImplemented exception | |
| 441 | |
| 442 See L<Bio::Search::Hit::HitI> | |
| 443 | |
| 444 =cut | |
| 445 | |
| 446 sub default_hit_factory_class { | |
| 447 my $self = shift; | |
| 448 # TODO: Uncomment this when Jason's SearchIO code conforms | |
| 449 # $self->throw_not_implemented; | |
| 450 } | |
| 451 | |
| 452 =head2 _load_format_module | |
| 453 | |
| 454 Title : _load_format_module | |
| 455 Usage : *INTERNAL SearchIO stuff* | |
| 456 Function: Loads up (like use) a module at run time on demand | |
| 457 Example : | |
| 458 Returns : | |
| 459 Args : | |
| 460 | |
| 461 =cut | |
| 462 | |
| 463 sub _load_format_module { | |
| 464 my ($self,$format) = @_; | |
| 465 my $module = "Bio::SearchIO::" . $format; | |
| 466 my $ok; | |
| 467 | |
| 468 eval { | |
| 469 $ok = $self->_load_module($module); | |
| 470 }; | |
| 471 if ( $@ ) { | |
| 472 print STDERR <<END; | |
| 473 $self: $format cannot be found | |
| 474 Exception $@ | |
| 475 For more information about the SearchIO system please see the SearchIO docs. | |
| 476 This includes ways of checking for formats at compile time, not run time | |
| 477 END | |
| 478 ; | |
| 479 } | |
| 480 return $ok; | |
| 481 } | |
| 482 | |
| 483 | |
| 484 =head2 _guess_format | |
| 485 | |
| 486 Title : _guess_format | |
| 487 Usage : $obj->_guess_format($filename) | |
| 488 Function: | |
| 489 Example : | |
| 490 Returns : guessed format of filename (lower case) | |
| 491 Args : | |
| 492 | |
| 493 =cut | |
| 494 | |
| 495 | |
| 496 sub _guess_format { | |
| 497 my $class = shift; | |
| 498 return unless $_ = shift; | |
| 499 return 'blast' if (/blast/i or /\.bl\w$/i); | |
| 500 return 'fasta' if (/fasta/i or /\.fas$/i); | |
| 501 return 'blastxml' if (/blast/i and /\.xml$/i); | |
| 502 return 'exonerate' if ( /\.exonerate/i or /\.exon/i ); | |
| 503 } | |
| 504 | |
| 505 sub close { | |
| 506 my $self = shift; | |
| 507 if( $self->writer ) { | |
| 508 $self->_print($self->writer->end_report()); | |
| 509 } | |
| 510 $self->SUPER::close(@_); | |
| 511 } | |
| 512 | |
| 513 sub DESTROY { | |
| 514 my $self = shift; | |
| 515 $self->close(); | |
| 516 } | |
| 517 | |
| 518 sub TIEHANDLE { | |
| 519 my $class = shift; | |
| 520 return bless {processor => shift}, $class; | |
| 521 } | |
| 522 | |
| 523 sub READLINE { | |
| 524 my $self = shift; | |
| 525 return $self->{'processor'}->next_result() unless wantarray; | |
| 526 my (@list, $obj); | |
| 527 push @list, $obj while $obj = $self->{'processor'}->next_result(); | |
| 528 return @list; | |
| 529 } | |
| 530 | |
| 531 sub PRINT { | |
| 532 my $self = shift; | |
| 533 $self->{'processor'}->write_result(@_); | |
| 534 } | |
| 535 | |
| 536 1; | |
| 537 | |
| 538 __END__ |
