comparison variant_effect_predictor/Bio/SearchIO.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:2bc9b66ada89
1 # $Id: SearchIO.pm,v 1.18 2002/12/13 13:54:03 jason Exp $
2 #
3 # BioPerl module for Bio::SearchIO
4 #
5 # Cared for by Jason Stajich <jason@bioperl.org>
6 #
7 # Copyright Jason Stajich
8 #
9 # You may distribute this module under the same terms as perl itself
10
11 # POD documentation - main docs before the code
12
13 =head1 NAME
14
15 Bio::SearchIO - Driver for parsing Sequence Database Searches (Blast,FASTA,...)
16
17 =head1 SYNOPSIS
18
19 use Bio::SearchIO;
20 # format can be 'fasta', 'blast'
21 my $searchio = new Bio::SearchIO( -format => 'blastxml',
22 -file => 'blastout.xml' );
23 while ( my $result = $searchio->next_result() ) {
24 while( my $hit = $result->next_hit ) {
25 # process the Bio::Search::Hit::HitI object
26 while( my $hsp = $hit->next_hsp ) {
27 # process the Bio::Search::HSP::HSPI object
28 }
29 }
30
31 =head1 DESCRIPTION
32
33 This is a driver for instantiating a parser for report files from
34 sequence database searches. This object serves as a wrapper for the
35 format parsers in Bio::SearchIO::* - you should not need to ever
36 use those format parsers directly. (For people used to the SeqIO
37 system it, we are deliberately using the same pattern).
38
39 Once you get a SearchIO object, calling next_result() gives you back
40 a L<Bio::Search::Result::ResultI> compliant object, which is an object that
41 represents one Blast/Fasta/HMMER whatever report.
42
43 =head1 FEEDBACK
44
45 =head2 Mailing Lists
46
47 User feedback is an integral part of the evolution of this and other
48 Bioperl modules. Send your comments and suggestions preferably to
49 the Bioperl mailing list. Your participation is much appreciated.
50
51 bioperl-l@bioperl.org - General discussion
52 http://bioperl.org/MailList.shtml - About the mailing lists
53
54 =head2 Reporting Bugs
55
56 Report bugs to the Bioperl bug tracking system to help us keep track
57 of the bugs and their resolution. Bug reports can be submitted via
58 email or the web:
59
60 bioperl-bugs@bioperl.org
61 http://bugzilla.bioperl.org/
62
63 =head1 AUTHOR - Jason Stajich & Steve Chervitz
64
65 Email jason@bioperl.org
66 Email sac@bioperl.org
67
68 =head1 CONTRIBUTORS
69
70 Additional contributors names and emails here
71
72 =head1 APPENDIX
73
74 The rest of the documentation details each of the object methods.
75 Internal methods are usually preceded with a _
76
77 =cut
78
79
80 # Let the code begin...
81
82
83 package Bio::SearchIO;
84 use strict;
85 use vars qw(@ISA);
86
87 # Object preamble - inherits from Bio::Root::IO
88
89 use Bio::Root::IO;
90 use Bio::Event::EventGeneratorI;
91 use Bio::SearchIO::SearchResultEventBuilder;
92 use Bio::AnalysisParserI;
93 use Symbol();
94
95 @ISA = qw( Bio::Root::IO Bio::Event::EventGeneratorI Bio::AnalysisParserI);
96
97 =head2 new
98
99 Title : new
100 Usage : my $obj = new Bio::SearchIO();
101 Function: Builds a new Bio::SearchIO object
102 Returns : Bio::SearchIO initialized with the correct format
103 Args : Args : -file => $filename
104 -format => format
105 -fh => filehandle to attach to
106 -result_factory => Object implementing Bio::Factory::ResultFactoryI
107 -hit_factory => Object implementing Bio::Factory::HitFactoryI
108 -writer => Object implementing Bio::SearchIO::SearchWriterI
109 -output_format => output format, which will dynamically load writer
110
111 See L<Bio::Factory::ResultFactoryI>, L<Bio::Factory::HitFactoryI>,
112 L<Bio::SearchIO::SearchWriterI>
113
114 =cut
115
116 sub new {
117 my($caller,@args) = @_;
118 my $class = ref($caller) || $caller;
119
120 # or do we want to call SUPER on an object if $caller is an
121 # object?
122 if( $class =~ /Bio::SearchIO::(\S+)/ ) {
123 my ($self) = $class->SUPER::new(@args);
124 $self->_initialize(@args);
125 return $self;
126 } else {
127 my %param = @args;
128 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
129 my $format = $param{'-format'} ||
130 $class->_guess_format( $param{'-file'} || $ARGV[0] ) || 'blast';
131
132 my $output_format = $param{'-output_format'};
133 my $writer = undef;
134
135 if( defined $output_format ) {
136 if( defined $param{'-writer'} ) {
137 my $dummy = Bio::Root::Root->new();
138 $dummy->throw("Both writer and output format specified - not good");
139 }
140
141 if( $output_format =~ /^blast$/i ) {
142 $output_format = 'TextResultWriter';
143 }
144 my $output_module = "Bio::SearchIO::Writer::".$output_format;
145 $class->_load_module($output_module);
146 $writer = $output_module->new();
147 push(@args,"-writer",$writer);
148 }
149
150
151 # normalize capitalization to lower case
152 $format = "\L$format";
153
154 return undef unless( $class->_load_format_module($format) );
155 return "Bio::SearchIO::${format}"->new(@args);
156 }
157 }
158
159 =head2 newFh
160
161 Title : newFh
162 Usage : $fh = Bio::SearchIO->newFh(-file=>$filename,
163 -format=>'Format')
164 Function: does a new() followed by an fh()
165 Example : $fh = Bio::SearchIO->newFh(-file=>$filename,
166 -format=>'Format')
167 $result = <$fh>; # read a ResultI object
168 print $fh $result; # write a ResultI object
169 Returns : filehandle tied to the Bio::SearchIO::Fh class
170 Args :
171
172 =cut
173
174 sub newFh {
175 my $class = shift;
176 return unless my $self = $class->new(@_);
177 return $self->fh;
178 }
179
180 =head2 fh
181
182 Title : fh
183 Usage : $obj->fh
184 Function:
185 Example : $fh = $obj->fh; # make a tied filehandle
186 $result = <$fh>; # read a ResultI object
187 print $fh $result; # write a ResultI object
188 Returns : filehandle tied to the Bio::SearchIO::Fh class
189 Args :
190
191 =cut
192
193
194 sub fh {
195 my $self = shift;
196 my $class = ref($self) || $self;
197 my $s = Symbol::gensym;
198 tie $$s,$class,$self;
199 return $s;
200 }
201
202 =head2 attach_EventHandler
203
204 Title : attach_EventHandler
205 Usage : $parser->attatch_EventHandler($handler)
206 Function: Adds an event handler to listen for events
207 Returns : none
208 Args : Bio::SearchIO::EventHandlerI
209
210 See L<Bio::SearchIO::EventHandlerI>
211
212 =cut
213
214 sub attach_EventHandler{
215 my ($self,$handler) = @_;
216 return if( ! $handler );
217 if( ! $handler->isa('Bio::SearchIO::EventHandlerI') ) {
218 $self->warn("Ignoring request to attatch handler ".ref($handler). ' because it is not a Bio::SearchIO::EventHandlerI');
219 }
220 $self->{'_handler'} = $handler;
221 return;
222 }
223
224 =head2 _eventHandler
225
226 Title : _eventHandler
227 Usage : private
228 Function: Get the EventHandler
229 Returns : Bio::SearchIO::EventHandlerI
230 Args : none
231
232 See L<Bio::SearchIO::EventHandlerI>
233
234 =cut
235
236 sub _eventHandler{
237 my ($self) = @_;
238 return $self->{'_handler'};
239 }
240
241 sub _initialize {
242 my($self, @args) = @_;
243 $self->{'_handler'} = undef;
244 # not really necessary unless we put more in RootI
245 #$self->SUPER::_initialize(@args);
246
247 # initialize the IO part
248 $self->_initialize_io(@args);
249 $self->attach_EventHandler(new Bio::SearchIO::SearchResultEventBuilder());
250 $self->{'_reporttype'} = '';
251
252 my ( $writer, $rfactory, $hfactory, $use_factories ) =
253 $self->_rearrange([qw(WRITER
254 RESULT_FACTORY
255 HIT_FACTORY
256 USE_FACTORIES)], @args);
257
258 $self->writer( $writer ) if $writer;
259
260 # TODO: Resolve this issue:
261 # The $use_factories flag is a temporary hack to allow factory-based and
262 # non-factory based SearchIO objects to co-exist.
263 # steve --- Sat Dec 22 04:41:20 2001
264 if( $use_factories) {
265 if( not defined $self->{'_result_factory'}) {
266 $self->result_factory( $rfactory || $self->default_result_factory_class->new );
267 }
268 if( not defined $self->{'_hit_factory'}) {
269 $self->hit_factory( $hfactory || $self->default_hit_factory_class->new );
270 }
271 }
272 }
273
274 =head2 next_result
275
276 Title : next_result
277 Usage : $result = stream->next_result
278 Function: Reads the next ResultI object from the stream and returns it.
279
280 Certain driver modules may encounter entries in the stream that
281 are either misformatted or that use syntax not yet understood
282 by the driver. If such an incident is recoverable, e.g., by
283 dismissing a feature of a feature table or some other non-mandatory
284 part of an entry, the driver will issue a warning. In the case
285 of a non-recoverable situation an exception will be thrown.
286 Do not assume that you can resume parsing the same stream after
287 catching the exception. Note that you can always turn recoverable
288 errors into exceptions by calling $stream->verbose(2) (see
289 Bio::Root::RootI POD page).
290 Returns : A Bio::Search::Result::ResultI object
291 Args : n/a
292
293 See L<Bio::Root::RootI>
294
295 =cut
296
297 sub next_result {
298 my ($self) = @_;
299 $self->throw_not_implemented;
300 }
301
302 =head2 write_result
303
304 Title : write_result
305 Usage : $stream->write_result($result_result, @other_args)
306 Function: Writes data from the $result_result object into the stream.
307 : Delegates to the to_string() method of the associated
308 : WriterI object.
309 Returns : 1 for success and 0 for error
310 Args : Bio::Search:Result::ResultI object,
311 : plus any other arguments for the Writer
312 Throws : Bio::Root::Exception if a Writer has not been set.
313
314 See L<Bio::Root::Exception>
315
316 =cut
317
318 sub write_result {
319 my ($self, $result, @args) = @_;
320
321 if( not ref($self->{'_result_writer'}) ) {
322 $self->throw("ResultWriter not defined.");
323 }
324 my $str = $self->writer->to_string( $result, @args );
325 #print "Got string: \n$str\n";
326 $self->_print( "$str" );
327
328 return 1;
329 }
330
331
332 =head2 writer
333
334 Title : writer
335 Usage : $writer = $stream->writer;
336 Function: Sets/Gets a SearchWriterI object to be used for this searchIO.
337 Returns : 1 for success and 0 for error
338 Args : Bio::SearchIO::SearchWriterI object (when setting)
339 Throws : Bio::Root::Exception if a non-Bio::SearchIO::SearchWriterI object
340 is passed in.
341
342 =cut
343
344 sub writer {
345 my ($self, $writer) = @_;
346 if( ref($writer) and $writer->isa( 'Bio::SearchIO::SearchWriterI' )) {
347 $self->{'_result_writer'} = $writer;
348 }
349 elsif( defined $writer ) {
350 $self->throw("Can't set ResultWriter. Not a Bio::SearchIO::SearchWriterI: $writer");
351 }
352 return $self->{'_result_writer'};
353 }
354
355
356 =head2 hit_factory
357
358 Title : hit_factory
359 Usage : $hit_factory = $stream->hit_factory; (get)
360 : $stream->hit_factory( $factory ); (set)
361 Function: Sets/Gets a factory object to create hit objects for this SearchIO
362 Returns : Bio::Factory::HitFactoryI object
363 Args : Bio::Factory::HitFactoryI object (when setting)
364 Throws : Bio::Root::Exception if a non-Bio::Factory::HitFactoryI object
365 is passed in.
366 Comments: A SearchIO implementation should provide a default hit factory.
367
368 See L<Bio::Factory::HitFactoryI>
369
370 =cut
371
372 sub hit_factory {
373 my ($self, $fact) = @_;
374 if( ref $fact and $fact->isa( 'Bio::Factory::HitFactoryI' )) {
375 $self->{'_hit_factory'} = $fact;
376 }
377 elsif( defined $fact ) {
378 $self->throw("Can't set HitFactory. Not a Bio::Factory::HitFactoryI: $fact");
379 }
380 return $self->{'_hit_factory'};
381 }
382
383 =head2 result_factory
384
385 Title : result_factory
386 Usage : $result_factory = $stream->result_factory; (get)
387 : $stream->result_factory( $factory ); (set)
388 Function: Sets/Gets a factory object to create result objects for this
389 SearchIO.
390 Returns : Bio::Factory::ResultFactoryI object
391 Args : Bio::Factory::ResultFactoryI object (when setting)
392 Throws : Bio::Root::Exception if a non-Bio::Factory::ResultFactoryI object
393 is passed in.
394 Comments: A SearchIO implementation should provide a default result factory.
395
396 See L<Bio::Factory::ResultFactoryI>
397
398 =cut
399
400 sub result_factory {
401 my ($self, $fact) = @_;
402 if( ref $fact and $fact->isa( 'Bio::Factory::ResultFactoryI' )) {
403 $self->{'_result_factory'} = $fact;
404 }
405 elsif( defined $fact ) {
406 $self->throw("Can't set ResultFactory. Not a Bio::Factory::ResultFactoryI: $fact");
407 }
408 return $self->{'_result_factory'};
409 }
410
411
412 =head2 result_count
413
414 Title : result_count
415 Usage : $num = $stream->result_count;
416 Function: Gets the number of Blast results that have been parsed.
417 Returns : integer
418 Args : none
419 Throws : none
420
421 =cut
422
423 sub result_count {
424 my $self = shift;
425 $self->throw_not_implemented;
426 }
427
428
429 =head2 default_hit_factory_class
430
431 Title : default_hit_factory_class
432 Usage : $res_factory = $obj->default_hit_factory_class()->new( @args )
433 Function: Returns the name of the default class to be used for creating
434 Bio::Search::Hit::HitI objects.
435 Example :
436 Returns : A string containing the name of a class that implements
437 the Bio::Search::Hit::HitI interface.
438 Args : none
439 Comments: Bio::SearchIO does not implement this method. It throws a
440 NotImplemented exception
441
442 See L<Bio::Search::Hit::HitI>
443
444 =cut
445
446 sub default_hit_factory_class {
447 my $self = shift;
448 # TODO: Uncomment this when Jason's SearchIO code conforms
449 # $self->throw_not_implemented;
450 }
451
452 =head2 _load_format_module
453
454 Title : _load_format_module
455 Usage : *INTERNAL SearchIO stuff*
456 Function: Loads up (like use) a module at run time on demand
457 Example :
458 Returns :
459 Args :
460
461 =cut
462
463 sub _load_format_module {
464 my ($self,$format) = @_;
465 my $module = "Bio::SearchIO::" . $format;
466 my $ok;
467
468 eval {
469 $ok = $self->_load_module($module);
470 };
471 if ( $@ ) {
472 print STDERR <<END;
473 $self: $format cannot be found
474 Exception $@
475 For more information about the SearchIO system please see the SearchIO docs.
476 This includes ways of checking for formats at compile time, not run time
477 END
478 ;
479 }
480 return $ok;
481 }
482
483
484 =head2 _guess_format
485
486 Title : _guess_format
487 Usage : $obj->_guess_format($filename)
488 Function:
489 Example :
490 Returns : guessed format of filename (lower case)
491 Args :
492
493 =cut
494
495
496 sub _guess_format {
497 my $class = shift;
498 return unless $_ = shift;
499 return 'blast' if (/blast/i or /\.bl\w$/i);
500 return 'fasta' if (/fasta/i or /\.fas$/i);
501 return 'blastxml' if (/blast/i and /\.xml$/i);
502 return 'exonerate' if ( /\.exonerate/i or /\.exon/i );
503 }
504
505 sub close {
506 my $self = shift;
507 if( $self->writer ) {
508 $self->_print($self->writer->end_report());
509 }
510 $self->SUPER::close(@_);
511 }
512
513 sub DESTROY {
514 my $self = shift;
515 $self->close();
516 }
517
518 sub TIEHANDLE {
519 my $class = shift;
520 return bless {processor => shift}, $class;
521 }
522
523 sub READLINE {
524 my $self = shift;
525 return $self->{'processor'}->next_result() unless wantarray;
526 my (@list, $obj);
527 push @list, $obj while $obj = $self->{'processor'}->next_result();
528 return @list;
529 }
530
531 sub PRINT {
532 my $self = shift;
533 $self->{'processor'}->write_result(@_);
534 }
535
536 1;
537
538 __END__