0
|
1 # $Id: SearchIO.pm,v 1.18 2002/12/13 13:54:03 jason Exp $
|
|
2 #
|
|
3 # BioPerl module for Bio::SearchIO
|
|
4 #
|
|
5 # Cared for by Jason Stajich <jason@bioperl.org>
|
|
6 #
|
|
7 # Copyright Jason Stajich
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10
|
|
11 # POD documentation - main docs before the code
|
|
12
|
|
13 =head1 NAME
|
|
14
|
|
15 Bio::SearchIO - Driver for parsing Sequence Database Searches (Blast,FASTA,...)
|
|
16
|
|
17 =head1 SYNOPSIS
|
|
18
|
|
19 use Bio::SearchIO;
|
|
20 # format can be 'fasta', 'blast'
|
|
21 my $searchio = new Bio::SearchIO( -format => 'blastxml',
|
|
22 -file => 'blastout.xml' );
|
|
23 while ( my $result = $searchio->next_result() ) {
|
|
24 while( my $hit = $result->next_hit ) {
|
|
25 # process the Bio::Search::Hit::HitI object
|
|
26 while( my $hsp = $hit->next_hsp ) {
|
|
27 # process the Bio::Search::HSP::HSPI object
|
|
28 }
|
|
29 }
|
|
30
|
|
31 =head1 DESCRIPTION
|
|
32
|
|
33 This is a driver for instantiating a parser for report files from
|
|
34 sequence database searches. This object serves as a wrapper for the
|
|
35 format parsers in Bio::SearchIO::* - you should not need to ever
|
|
36 use those format parsers directly. (For people used to the SeqIO
|
|
37 system it, we are deliberately using the same pattern).
|
|
38
|
|
39 Once you get a SearchIO object, calling next_result() gives you back
|
|
40 a L<Bio::Search::Result::ResultI> compliant object, which is an object that
|
|
41 represents one Blast/Fasta/HMMER whatever report.
|
|
42
|
|
43 =head1 FEEDBACK
|
|
44
|
|
45 =head2 Mailing Lists
|
|
46
|
|
47 User feedback is an integral part of the evolution of this and other
|
|
48 Bioperl modules. Send your comments and suggestions preferably to
|
|
49 the Bioperl mailing list. Your participation is much appreciated.
|
|
50
|
|
51 bioperl-l@bioperl.org - General discussion
|
|
52 http://bioperl.org/MailList.shtml - About the mailing lists
|
|
53
|
|
54 =head2 Reporting Bugs
|
|
55
|
|
56 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
57 of the bugs and their resolution. Bug reports can be submitted via
|
|
58 email or the web:
|
|
59
|
|
60 bioperl-bugs@bioperl.org
|
|
61 http://bugzilla.bioperl.org/
|
|
62
|
|
63 =head1 AUTHOR - Jason Stajich & Steve Chervitz
|
|
64
|
|
65 Email jason@bioperl.org
|
|
66 Email sac@bioperl.org
|
|
67
|
|
68 =head1 CONTRIBUTORS
|
|
69
|
|
70 Additional contributors names and emails here
|
|
71
|
|
72 =head1 APPENDIX
|
|
73
|
|
74 The rest of the documentation details each of the object methods.
|
|
75 Internal methods are usually preceded with a _
|
|
76
|
|
77 =cut
|
|
78
|
|
79
|
|
80 # Let the code begin...
|
|
81
|
|
82
|
|
83 package Bio::SearchIO;
|
|
84 use strict;
|
|
85 use vars qw(@ISA);
|
|
86
|
|
87 # Object preamble - inherits from Bio::Root::IO
|
|
88
|
|
89 use Bio::Root::IO;
|
|
90 use Bio::Event::EventGeneratorI;
|
|
91 use Bio::SearchIO::SearchResultEventBuilder;
|
|
92 use Bio::AnalysisParserI;
|
|
93 use Symbol();
|
|
94
|
|
95 @ISA = qw( Bio::Root::IO Bio::Event::EventGeneratorI Bio::AnalysisParserI);
|
|
96
|
|
97 =head2 new
|
|
98
|
|
99 Title : new
|
|
100 Usage : my $obj = new Bio::SearchIO();
|
|
101 Function: Builds a new Bio::SearchIO object
|
|
102 Returns : Bio::SearchIO initialized with the correct format
|
|
103 Args : Args : -file => $filename
|
|
104 -format => format
|
|
105 -fh => filehandle to attach to
|
|
106 -result_factory => Object implementing Bio::Factory::ResultFactoryI
|
|
107 -hit_factory => Object implementing Bio::Factory::HitFactoryI
|
|
108 -writer => Object implementing Bio::SearchIO::SearchWriterI
|
|
109 -output_format => output format, which will dynamically load writer
|
|
110
|
|
111 See L<Bio::Factory::ResultFactoryI>, L<Bio::Factory::HitFactoryI>,
|
|
112 L<Bio::SearchIO::SearchWriterI>
|
|
113
|
|
114 =cut
|
|
115
|
|
116 sub new {
|
|
117 my($caller,@args) = @_;
|
|
118 my $class = ref($caller) || $caller;
|
|
119
|
|
120 # or do we want to call SUPER on an object if $caller is an
|
|
121 # object?
|
|
122 if( $class =~ /Bio::SearchIO::(\S+)/ ) {
|
|
123 my ($self) = $class->SUPER::new(@args);
|
|
124 $self->_initialize(@args);
|
|
125 return $self;
|
|
126 } else {
|
|
127 my %param = @args;
|
|
128 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
|
|
129 my $format = $param{'-format'} ||
|
|
130 $class->_guess_format( $param{'-file'} || $ARGV[0] ) || 'blast';
|
|
131
|
|
132 my $output_format = $param{'-output_format'};
|
|
133 my $writer = undef;
|
|
134
|
|
135 if( defined $output_format ) {
|
|
136 if( defined $param{'-writer'} ) {
|
|
137 my $dummy = Bio::Root::Root->new();
|
|
138 $dummy->throw("Both writer and output format specified - not good");
|
|
139 }
|
|
140
|
|
141 if( $output_format =~ /^blast$/i ) {
|
|
142 $output_format = 'TextResultWriter';
|
|
143 }
|
|
144 my $output_module = "Bio::SearchIO::Writer::".$output_format;
|
|
145 $class->_load_module($output_module);
|
|
146 $writer = $output_module->new();
|
|
147 push(@args,"-writer",$writer);
|
|
148 }
|
|
149
|
|
150
|
|
151 # normalize capitalization to lower case
|
|
152 $format = "\L$format";
|
|
153
|
|
154 return undef unless( $class->_load_format_module($format) );
|
|
155 return "Bio::SearchIO::${format}"->new(@args);
|
|
156 }
|
|
157 }
|
|
158
|
|
159 =head2 newFh
|
|
160
|
|
161 Title : newFh
|
|
162 Usage : $fh = Bio::SearchIO->newFh(-file=>$filename,
|
|
163 -format=>'Format')
|
|
164 Function: does a new() followed by an fh()
|
|
165 Example : $fh = Bio::SearchIO->newFh(-file=>$filename,
|
|
166 -format=>'Format')
|
|
167 $result = <$fh>; # read a ResultI object
|
|
168 print $fh $result; # write a ResultI object
|
|
169 Returns : filehandle tied to the Bio::SearchIO::Fh class
|
|
170 Args :
|
|
171
|
|
172 =cut
|
|
173
|
|
174 sub newFh {
|
|
175 my $class = shift;
|
|
176 return unless my $self = $class->new(@_);
|
|
177 return $self->fh;
|
|
178 }
|
|
179
|
|
180 =head2 fh
|
|
181
|
|
182 Title : fh
|
|
183 Usage : $obj->fh
|
|
184 Function:
|
|
185 Example : $fh = $obj->fh; # make a tied filehandle
|
|
186 $result = <$fh>; # read a ResultI object
|
|
187 print $fh $result; # write a ResultI object
|
|
188 Returns : filehandle tied to the Bio::SearchIO::Fh class
|
|
189 Args :
|
|
190
|
|
191 =cut
|
|
192
|
|
193
|
|
194 sub fh {
|
|
195 my $self = shift;
|
|
196 my $class = ref($self) || $self;
|
|
197 my $s = Symbol::gensym;
|
|
198 tie $$s,$class,$self;
|
|
199 return $s;
|
|
200 }
|
|
201
|
|
202 =head2 attach_EventHandler
|
|
203
|
|
204 Title : attach_EventHandler
|
|
205 Usage : $parser->attatch_EventHandler($handler)
|
|
206 Function: Adds an event handler to listen for events
|
|
207 Returns : none
|
|
208 Args : Bio::SearchIO::EventHandlerI
|
|
209
|
|
210 See L<Bio::SearchIO::EventHandlerI>
|
|
211
|
|
212 =cut
|
|
213
|
|
214 sub attach_EventHandler{
|
|
215 my ($self,$handler) = @_;
|
|
216 return if( ! $handler );
|
|
217 if( ! $handler->isa('Bio::SearchIO::EventHandlerI') ) {
|
|
218 $self->warn("Ignoring request to attatch handler ".ref($handler). ' because it is not a Bio::SearchIO::EventHandlerI');
|
|
219 }
|
|
220 $self->{'_handler'} = $handler;
|
|
221 return;
|
|
222 }
|
|
223
|
|
224 =head2 _eventHandler
|
|
225
|
|
226 Title : _eventHandler
|
|
227 Usage : private
|
|
228 Function: Get the EventHandler
|
|
229 Returns : Bio::SearchIO::EventHandlerI
|
|
230 Args : none
|
|
231
|
|
232 See L<Bio::SearchIO::EventHandlerI>
|
|
233
|
|
234 =cut
|
|
235
|
|
236 sub _eventHandler{
|
|
237 my ($self) = @_;
|
|
238 return $self->{'_handler'};
|
|
239 }
|
|
240
|
|
241 sub _initialize {
|
|
242 my($self, @args) = @_;
|
|
243 $self->{'_handler'} = undef;
|
|
244 # not really necessary unless we put more in RootI
|
|
245 #$self->SUPER::_initialize(@args);
|
|
246
|
|
247 # initialize the IO part
|
|
248 $self->_initialize_io(@args);
|
|
249 $self->attach_EventHandler(new Bio::SearchIO::SearchResultEventBuilder());
|
|
250 $self->{'_reporttype'} = '';
|
|
251
|
|
252 my ( $writer, $rfactory, $hfactory, $use_factories ) =
|
|
253 $self->_rearrange([qw(WRITER
|
|
254 RESULT_FACTORY
|
|
255 HIT_FACTORY
|
|
256 USE_FACTORIES)], @args);
|
|
257
|
|
258 $self->writer( $writer ) if $writer;
|
|
259
|
|
260 # TODO: Resolve this issue:
|
|
261 # The $use_factories flag is a temporary hack to allow factory-based and
|
|
262 # non-factory based SearchIO objects to co-exist.
|
|
263 # steve --- Sat Dec 22 04:41:20 2001
|
|
264 if( $use_factories) {
|
|
265 if( not defined $self->{'_result_factory'}) {
|
|
266 $self->result_factory( $rfactory || $self->default_result_factory_class->new );
|
|
267 }
|
|
268 if( not defined $self->{'_hit_factory'}) {
|
|
269 $self->hit_factory( $hfactory || $self->default_hit_factory_class->new );
|
|
270 }
|
|
271 }
|
|
272 }
|
|
273
|
|
274 =head2 next_result
|
|
275
|
|
276 Title : next_result
|
|
277 Usage : $result = stream->next_result
|
|
278 Function: Reads the next ResultI object from the stream and returns it.
|
|
279
|
|
280 Certain driver modules may encounter entries in the stream that
|
|
281 are either misformatted or that use syntax not yet understood
|
|
282 by the driver. If such an incident is recoverable, e.g., by
|
|
283 dismissing a feature of a feature table or some other non-mandatory
|
|
284 part of an entry, the driver will issue a warning. In the case
|
|
285 of a non-recoverable situation an exception will be thrown.
|
|
286 Do not assume that you can resume parsing the same stream after
|
|
287 catching the exception. Note that you can always turn recoverable
|
|
288 errors into exceptions by calling $stream->verbose(2) (see
|
|
289 Bio::Root::RootI POD page).
|
|
290 Returns : A Bio::Search::Result::ResultI object
|
|
291 Args : n/a
|
|
292
|
|
293 See L<Bio::Root::RootI>
|
|
294
|
|
295 =cut
|
|
296
|
|
297 sub next_result {
|
|
298 my ($self) = @_;
|
|
299 $self->throw_not_implemented;
|
|
300 }
|
|
301
|
|
302 =head2 write_result
|
|
303
|
|
304 Title : write_result
|
|
305 Usage : $stream->write_result($result_result, @other_args)
|
|
306 Function: Writes data from the $result_result object into the stream.
|
|
307 : Delegates to the to_string() method of the associated
|
|
308 : WriterI object.
|
|
309 Returns : 1 for success and 0 for error
|
|
310 Args : Bio::Search:Result::ResultI object,
|
|
311 : plus any other arguments for the Writer
|
|
312 Throws : Bio::Root::Exception if a Writer has not been set.
|
|
313
|
|
314 See L<Bio::Root::Exception>
|
|
315
|
|
316 =cut
|
|
317
|
|
318 sub write_result {
|
|
319 my ($self, $result, @args) = @_;
|
|
320
|
|
321 if( not ref($self->{'_result_writer'}) ) {
|
|
322 $self->throw("ResultWriter not defined.");
|
|
323 }
|
|
324 my $str = $self->writer->to_string( $result, @args );
|
|
325 #print "Got string: \n$str\n";
|
|
326 $self->_print( "$str" );
|
|
327
|
|
328 return 1;
|
|
329 }
|
|
330
|
|
331
|
|
332 =head2 writer
|
|
333
|
|
334 Title : writer
|
|
335 Usage : $writer = $stream->writer;
|
|
336 Function: Sets/Gets a SearchWriterI object to be used for this searchIO.
|
|
337 Returns : 1 for success and 0 for error
|
|
338 Args : Bio::SearchIO::SearchWriterI object (when setting)
|
|
339 Throws : Bio::Root::Exception if a non-Bio::SearchIO::SearchWriterI object
|
|
340 is passed in.
|
|
341
|
|
342 =cut
|
|
343
|
|
344 sub writer {
|
|
345 my ($self, $writer) = @_;
|
|
346 if( ref($writer) and $writer->isa( 'Bio::SearchIO::SearchWriterI' )) {
|
|
347 $self->{'_result_writer'} = $writer;
|
|
348 }
|
|
349 elsif( defined $writer ) {
|
|
350 $self->throw("Can't set ResultWriter. Not a Bio::SearchIO::SearchWriterI: $writer");
|
|
351 }
|
|
352 return $self->{'_result_writer'};
|
|
353 }
|
|
354
|
|
355
|
|
356 =head2 hit_factory
|
|
357
|
|
358 Title : hit_factory
|
|
359 Usage : $hit_factory = $stream->hit_factory; (get)
|
|
360 : $stream->hit_factory( $factory ); (set)
|
|
361 Function: Sets/Gets a factory object to create hit objects for this SearchIO
|
|
362 Returns : Bio::Factory::HitFactoryI object
|
|
363 Args : Bio::Factory::HitFactoryI object (when setting)
|
|
364 Throws : Bio::Root::Exception if a non-Bio::Factory::HitFactoryI object
|
|
365 is passed in.
|
|
366 Comments: A SearchIO implementation should provide a default hit factory.
|
|
367
|
|
368 See L<Bio::Factory::HitFactoryI>
|
|
369
|
|
370 =cut
|
|
371
|
|
372 sub hit_factory {
|
|
373 my ($self, $fact) = @_;
|
|
374 if( ref $fact and $fact->isa( 'Bio::Factory::HitFactoryI' )) {
|
|
375 $self->{'_hit_factory'} = $fact;
|
|
376 }
|
|
377 elsif( defined $fact ) {
|
|
378 $self->throw("Can't set HitFactory. Not a Bio::Factory::HitFactoryI: $fact");
|
|
379 }
|
|
380 return $self->{'_hit_factory'};
|
|
381 }
|
|
382
|
|
383 =head2 result_factory
|
|
384
|
|
385 Title : result_factory
|
|
386 Usage : $result_factory = $stream->result_factory; (get)
|
|
387 : $stream->result_factory( $factory ); (set)
|
|
388 Function: Sets/Gets a factory object to create result objects for this
|
|
389 SearchIO.
|
|
390 Returns : Bio::Factory::ResultFactoryI object
|
|
391 Args : Bio::Factory::ResultFactoryI object (when setting)
|
|
392 Throws : Bio::Root::Exception if a non-Bio::Factory::ResultFactoryI object
|
|
393 is passed in.
|
|
394 Comments: A SearchIO implementation should provide a default result factory.
|
|
395
|
|
396 See L<Bio::Factory::ResultFactoryI>
|
|
397
|
|
398 =cut
|
|
399
|
|
400 sub result_factory {
|
|
401 my ($self, $fact) = @_;
|
|
402 if( ref $fact and $fact->isa( 'Bio::Factory::ResultFactoryI' )) {
|
|
403 $self->{'_result_factory'} = $fact;
|
|
404 }
|
|
405 elsif( defined $fact ) {
|
|
406 $self->throw("Can't set ResultFactory. Not a Bio::Factory::ResultFactoryI: $fact");
|
|
407 }
|
|
408 return $self->{'_result_factory'};
|
|
409 }
|
|
410
|
|
411
|
|
412 =head2 result_count
|
|
413
|
|
414 Title : result_count
|
|
415 Usage : $num = $stream->result_count;
|
|
416 Function: Gets the number of Blast results that have been parsed.
|
|
417 Returns : integer
|
|
418 Args : none
|
|
419 Throws : none
|
|
420
|
|
421 =cut
|
|
422
|
|
423 sub result_count {
|
|
424 my $self = shift;
|
|
425 $self->throw_not_implemented;
|
|
426 }
|
|
427
|
|
428
|
|
429 =head2 default_hit_factory_class
|
|
430
|
|
431 Title : default_hit_factory_class
|
|
432 Usage : $res_factory = $obj->default_hit_factory_class()->new( @args )
|
|
433 Function: Returns the name of the default class to be used for creating
|
|
434 Bio::Search::Hit::HitI objects.
|
|
435 Example :
|
|
436 Returns : A string containing the name of a class that implements
|
|
437 the Bio::Search::Hit::HitI interface.
|
|
438 Args : none
|
|
439 Comments: Bio::SearchIO does not implement this method. It throws a
|
|
440 NotImplemented exception
|
|
441
|
|
442 See L<Bio::Search::Hit::HitI>
|
|
443
|
|
444 =cut
|
|
445
|
|
446 sub default_hit_factory_class {
|
|
447 my $self = shift;
|
|
448 # TODO: Uncomment this when Jason's SearchIO code conforms
|
|
449 # $self->throw_not_implemented;
|
|
450 }
|
|
451
|
|
452 =head2 _load_format_module
|
|
453
|
|
454 Title : _load_format_module
|
|
455 Usage : *INTERNAL SearchIO stuff*
|
|
456 Function: Loads up (like use) a module at run time on demand
|
|
457 Example :
|
|
458 Returns :
|
|
459 Args :
|
|
460
|
|
461 =cut
|
|
462
|
|
463 sub _load_format_module {
|
|
464 my ($self,$format) = @_;
|
|
465 my $module = "Bio::SearchIO::" . $format;
|
|
466 my $ok;
|
|
467
|
|
468 eval {
|
|
469 $ok = $self->_load_module($module);
|
|
470 };
|
|
471 if ( $@ ) {
|
|
472 print STDERR <<END;
|
|
473 $self: $format cannot be found
|
|
474 Exception $@
|
|
475 For more information about the SearchIO system please see the SearchIO docs.
|
|
476 This includes ways of checking for formats at compile time, not run time
|
|
477 END
|
|
478 ;
|
|
479 }
|
|
480 return $ok;
|
|
481 }
|
|
482
|
|
483
|
|
484 =head2 _guess_format
|
|
485
|
|
486 Title : _guess_format
|
|
487 Usage : $obj->_guess_format($filename)
|
|
488 Function:
|
|
489 Example :
|
|
490 Returns : guessed format of filename (lower case)
|
|
491 Args :
|
|
492
|
|
493 =cut
|
|
494
|
|
495
|
|
496 sub _guess_format {
|
|
497 my $class = shift;
|
|
498 return unless $_ = shift;
|
|
499 return 'blast' if (/blast/i or /\.bl\w$/i);
|
|
500 return 'fasta' if (/fasta/i or /\.fas$/i);
|
|
501 return 'blastxml' if (/blast/i and /\.xml$/i);
|
|
502 return 'exonerate' if ( /\.exonerate/i or /\.exon/i );
|
|
503 }
|
|
504
|
|
505 sub close {
|
|
506 my $self = shift;
|
|
507 if( $self->writer ) {
|
|
508 $self->_print($self->writer->end_report());
|
|
509 }
|
|
510 $self->SUPER::close(@_);
|
|
511 }
|
|
512
|
|
513 sub DESTROY {
|
|
514 my $self = shift;
|
|
515 $self->close();
|
|
516 }
|
|
517
|
|
518 sub TIEHANDLE {
|
|
519 my $class = shift;
|
|
520 return bless {processor => shift}, $class;
|
|
521 }
|
|
522
|
|
523 sub READLINE {
|
|
524 my $self = shift;
|
|
525 return $self->{'processor'}->next_result() unless wantarray;
|
|
526 my (@list, $obj);
|
|
527 push @list, $obj while $obj = $self->{'processor'}->next_result();
|
|
528 return @list;
|
|
529 }
|
|
530
|
|
531 sub PRINT {
|
|
532 my $self = shift;
|
|
533 $self->{'processor'}->write_result(@_);
|
|
534 }
|
|
535
|
|
536 1;
|
|
537
|
|
538 __END__
|