Mercurial > repos > mahtabm > ensemb_rep_gvl
comparison variant_effect_predictor/Bio/SearchIO.pm @ 0:2bc9b66ada89 draft default tip
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 06:29:17 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2bc9b66ada89 |
---|---|
1 # $Id: SearchIO.pm,v 1.18 2002/12/13 13:54:03 jason Exp $ | |
2 # | |
3 # BioPerl module for Bio::SearchIO | |
4 # | |
5 # Cared for by Jason Stajich <jason@bioperl.org> | |
6 # | |
7 # Copyright Jason Stajich | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 | |
11 # POD documentation - main docs before the code | |
12 | |
13 =head1 NAME | |
14 | |
15 Bio::SearchIO - Driver for parsing Sequence Database Searches (Blast,FASTA,...) | |
16 | |
17 =head1 SYNOPSIS | |
18 | |
19 use Bio::SearchIO; | |
20 # format can be 'fasta', 'blast' | |
21 my $searchio = new Bio::SearchIO( -format => 'blastxml', | |
22 -file => 'blastout.xml' ); | |
23 while ( my $result = $searchio->next_result() ) { | |
24 while( my $hit = $result->next_hit ) { | |
25 # process the Bio::Search::Hit::HitI object | |
26 while( my $hsp = $hit->next_hsp ) { | |
27 # process the Bio::Search::HSP::HSPI object | |
28 } | |
29 } | |
30 | |
31 =head1 DESCRIPTION | |
32 | |
33 This is a driver for instantiating a parser for report files from | |
34 sequence database searches. This object serves as a wrapper for the | |
35 format parsers in Bio::SearchIO::* - you should not need to ever | |
36 use those format parsers directly. (For people used to the SeqIO | |
37 system it, we are deliberately using the same pattern). | |
38 | |
39 Once you get a SearchIO object, calling next_result() gives you back | |
40 a L<Bio::Search::Result::ResultI> compliant object, which is an object that | |
41 represents one Blast/Fasta/HMMER whatever report. | |
42 | |
43 =head1 FEEDBACK | |
44 | |
45 =head2 Mailing Lists | |
46 | |
47 User feedback is an integral part of the evolution of this and other | |
48 Bioperl modules. Send your comments and suggestions preferably to | |
49 the Bioperl mailing list. Your participation is much appreciated. | |
50 | |
51 bioperl-l@bioperl.org - General discussion | |
52 http://bioperl.org/MailList.shtml - About the mailing lists | |
53 | |
54 =head2 Reporting Bugs | |
55 | |
56 Report bugs to the Bioperl bug tracking system to help us keep track | |
57 of the bugs and their resolution. Bug reports can be submitted via | |
58 email or the web: | |
59 | |
60 bioperl-bugs@bioperl.org | |
61 http://bugzilla.bioperl.org/ | |
62 | |
63 =head1 AUTHOR - Jason Stajich & Steve Chervitz | |
64 | |
65 Email jason@bioperl.org | |
66 Email sac@bioperl.org | |
67 | |
68 =head1 CONTRIBUTORS | |
69 | |
70 Additional contributors names and emails here | |
71 | |
72 =head1 APPENDIX | |
73 | |
74 The rest of the documentation details each of the object methods. | |
75 Internal methods are usually preceded with a _ | |
76 | |
77 =cut | |
78 | |
79 | |
80 # Let the code begin... | |
81 | |
82 | |
83 package Bio::SearchIO; | |
84 use strict; | |
85 use vars qw(@ISA); | |
86 | |
87 # Object preamble - inherits from Bio::Root::IO | |
88 | |
89 use Bio::Root::IO; | |
90 use Bio::Event::EventGeneratorI; | |
91 use Bio::SearchIO::SearchResultEventBuilder; | |
92 use Bio::AnalysisParserI; | |
93 use Symbol(); | |
94 | |
95 @ISA = qw( Bio::Root::IO Bio::Event::EventGeneratorI Bio::AnalysisParserI); | |
96 | |
97 =head2 new | |
98 | |
99 Title : new | |
100 Usage : my $obj = new Bio::SearchIO(); | |
101 Function: Builds a new Bio::SearchIO object | |
102 Returns : Bio::SearchIO initialized with the correct format | |
103 Args : Args : -file => $filename | |
104 -format => format | |
105 -fh => filehandle to attach to | |
106 -result_factory => Object implementing Bio::Factory::ResultFactoryI | |
107 -hit_factory => Object implementing Bio::Factory::HitFactoryI | |
108 -writer => Object implementing Bio::SearchIO::SearchWriterI | |
109 -output_format => output format, which will dynamically load writer | |
110 | |
111 See L<Bio::Factory::ResultFactoryI>, L<Bio::Factory::HitFactoryI>, | |
112 L<Bio::SearchIO::SearchWriterI> | |
113 | |
114 =cut | |
115 | |
116 sub new { | |
117 my($caller,@args) = @_; | |
118 my $class = ref($caller) || $caller; | |
119 | |
120 # or do we want to call SUPER on an object if $caller is an | |
121 # object? | |
122 if( $class =~ /Bio::SearchIO::(\S+)/ ) { | |
123 my ($self) = $class->SUPER::new(@args); | |
124 $self->_initialize(@args); | |
125 return $self; | |
126 } else { | |
127 my %param = @args; | |
128 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys | |
129 my $format = $param{'-format'} || | |
130 $class->_guess_format( $param{'-file'} || $ARGV[0] ) || 'blast'; | |
131 | |
132 my $output_format = $param{'-output_format'}; | |
133 my $writer = undef; | |
134 | |
135 if( defined $output_format ) { | |
136 if( defined $param{'-writer'} ) { | |
137 my $dummy = Bio::Root::Root->new(); | |
138 $dummy->throw("Both writer and output format specified - not good"); | |
139 } | |
140 | |
141 if( $output_format =~ /^blast$/i ) { | |
142 $output_format = 'TextResultWriter'; | |
143 } | |
144 my $output_module = "Bio::SearchIO::Writer::".$output_format; | |
145 $class->_load_module($output_module); | |
146 $writer = $output_module->new(); | |
147 push(@args,"-writer",$writer); | |
148 } | |
149 | |
150 | |
151 # normalize capitalization to lower case | |
152 $format = "\L$format"; | |
153 | |
154 return undef unless( $class->_load_format_module($format) ); | |
155 return "Bio::SearchIO::${format}"->new(@args); | |
156 } | |
157 } | |
158 | |
159 =head2 newFh | |
160 | |
161 Title : newFh | |
162 Usage : $fh = Bio::SearchIO->newFh(-file=>$filename, | |
163 -format=>'Format') | |
164 Function: does a new() followed by an fh() | |
165 Example : $fh = Bio::SearchIO->newFh(-file=>$filename, | |
166 -format=>'Format') | |
167 $result = <$fh>; # read a ResultI object | |
168 print $fh $result; # write a ResultI object | |
169 Returns : filehandle tied to the Bio::SearchIO::Fh class | |
170 Args : | |
171 | |
172 =cut | |
173 | |
174 sub newFh { | |
175 my $class = shift; | |
176 return unless my $self = $class->new(@_); | |
177 return $self->fh; | |
178 } | |
179 | |
180 =head2 fh | |
181 | |
182 Title : fh | |
183 Usage : $obj->fh | |
184 Function: | |
185 Example : $fh = $obj->fh; # make a tied filehandle | |
186 $result = <$fh>; # read a ResultI object | |
187 print $fh $result; # write a ResultI object | |
188 Returns : filehandle tied to the Bio::SearchIO::Fh class | |
189 Args : | |
190 | |
191 =cut | |
192 | |
193 | |
194 sub fh { | |
195 my $self = shift; | |
196 my $class = ref($self) || $self; | |
197 my $s = Symbol::gensym; | |
198 tie $$s,$class,$self; | |
199 return $s; | |
200 } | |
201 | |
202 =head2 attach_EventHandler | |
203 | |
204 Title : attach_EventHandler | |
205 Usage : $parser->attatch_EventHandler($handler) | |
206 Function: Adds an event handler to listen for events | |
207 Returns : none | |
208 Args : Bio::SearchIO::EventHandlerI | |
209 | |
210 See L<Bio::SearchIO::EventHandlerI> | |
211 | |
212 =cut | |
213 | |
214 sub attach_EventHandler{ | |
215 my ($self,$handler) = @_; | |
216 return if( ! $handler ); | |
217 if( ! $handler->isa('Bio::SearchIO::EventHandlerI') ) { | |
218 $self->warn("Ignoring request to attatch handler ".ref($handler). ' because it is not a Bio::SearchIO::EventHandlerI'); | |
219 } | |
220 $self->{'_handler'} = $handler; | |
221 return; | |
222 } | |
223 | |
224 =head2 _eventHandler | |
225 | |
226 Title : _eventHandler | |
227 Usage : private | |
228 Function: Get the EventHandler | |
229 Returns : Bio::SearchIO::EventHandlerI | |
230 Args : none | |
231 | |
232 See L<Bio::SearchIO::EventHandlerI> | |
233 | |
234 =cut | |
235 | |
236 sub _eventHandler{ | |
237 my ($self) = @_; | |
238 return $self->{'_handler'}; | |
239 } | |
240 | |
241 sub _initialize { | |
242 my($self, @args) = @_; | |
243 $self->{'_handler'} = undef; | |
244 # not really necessary unless we put more in RootI | |
245 #$self->SUPER::_initialize(@args); | |
246 | |
247 # initialize the IO part | |
248 $self->_initialize_io(@args); | |
249 $self->attach_EventHandler(new Bio::SearchIO::SearchResultEventBuilder()); | |
250 $self->{'_reporttype'} = ''; | |
251 | |
252 my ( $writer, $rfactory, $hfactory, $use_factories ) = | |
253 $self->_rearrange([qw(WRITER | |
254 RESULT_FACTORY | |
255 HIT_FACTORY | |
256 USE_FACTORIES)], @args); | |
257 | |
258 $self->writer( $writer ) if $writer; | |
259 | |
260 # TODO: Resolve this issue: | |
261 # The $use_factories flag is a temporary hack to allow factory-based and | |
262 # non-factory based SearchIO objects to co-exist. | |
263 # steve --- Sat Dec 22 04:41:20 2001 | |
264 if( $use_factories) { | |
265 if( not defined $self->{'_result_factory'}) { | |
266 $self->result_factory( $rfactory || $self->default_result_factory_class->new ); | |
267 } | |
268 if( not defined $self->{'_hit_factory'}) { | |
269 $self->hit_factory( $hfactory || $self->default_hit_factory_class->new ); | |
270 } | |
271 } | |
272 } | |
273 | |
274 =head2 next_result | |
275 | |
276 Title : next_result | |
277 Usage : $result = stream->next_result | |
278 Function: Reads the next ResultI object from the stream and returns it. | |
279 | |
280 Certain driver modules may encounter entries in the stream that | |
281 are either misformatted or that use syntax not yet understood | |
282 by the driver. If such an incident is recoverable, e.g., by | |
283 dismissing a feature of a feature table or some other non-mandatory | |
284 part of an entry, the driver will issue a warning. In the case | |
285 of a non-recoverable situation an exception will be thrown. | |
286 Do not assume that you can resume parsing the same stream after | |
287 catching the exception. Note that you can always turn recoverable | |
288 errors into exceptions by calling $stream->verbose(2) (see | |
289 Bio::Root::RootI POD page). | |
290 Returns : A Bio::Search::Result::ResultI object | |
291 Args : n/a | |
292 | |
293 See L<Bio::Root::RootI> | |
294 | |
295 =cut | |
296 | |
297 sub next_result { | |
298 my ($self) = @_; | |
299 $self->throw_not_implemented; | |
300 } | |
301 | |
302 =head2 write_result | |
303 | |
304 Title : write_result | |
305 Usage : $stream->write_result($result_result, @other_args) | |
306 Function: Writes data from the $result_result object into the stream. | |
307 : Delegates to the to_string() method of the associated | |
308 : WriterI object. | |
309 Returns : 1 for success and 0 for error | |
310 Args : Bio::Search:Result::ResultI object, | |
311 : plus any other arguments for the Writer | |
312 Throws : Bio::Root::Exception if a Writer has not been set. | |
313 | |
314 See L<Bio::Root::Exception> | |
315 | |
316 =cut | |
317 | |
318 sub write_result { | |
319 my ($self, $result, @args) = @_; | |
320 | |
321 if( not ref($self->{'_result_writer'}) ) { | |
322 $self->throw("ResultWriter not defined."); | |
323 } | |
324 my $str = $self->writer->to_string( $result, @args ); | |
325 #print "Got string: \n$str\n"; | |
326 $self->_print( "$str" ); | |
327 | |
328 return 1; | |
329 } | |
330 | |
331 | |
332 =head2 writer | |
333 | |
334 Title : writer | |
335 Usage : $writer = $stream->writer; | |
336 Function: Sets/Gets a SearchWriterI object to be used for this searchIO. | |
337 Returns : 1 for success and 0 for error | |
338 Args : Bio::SearchIO::SearchWriterI object (when setting) | |
339 Throws : Bio::Root::Exception if a non-Bio::SearchIO::SearchWriterI object | |
340 is passed in. | |
341 | |
342 =cut | |
343 | |
344 sub writer { | |
345 my ($self, $writer) = @_; | |
346 if( ref($writer) and $writer->isa( 'Bio::SearchIO::SearchWriterI' )) { | |
347 $self->{'_result_writer'} = $writer; | |
348 } | |
349 elsif( defined $writer ) { | |
350 $self->throw("Can't set ResultWriter. Not a Bio::SearchIO::SearchWriterI: $writer"); | |
351 } | |
352 return $self->{'_result_writer'}; | |
353 } | |
354 | |
355 | |
356 =head2 hit_factory | |
357 | |
358 Title : hit_factory | |
359 Usage : $hit_factory = $stream->hit_factory; (get) | |
360 : $stream->hit_factory( $factory ); (set) | |
361 Function: Sets/Gets a factory object to create hit objects for this SearchIO | |
362 Returns : Bio::Factory::HitFactoryI object | |
363 Args : Bio::Factory::HitFactoryI object (when setting) | |
364 Throws : Bio::Root::Exception if a non-Bio::Factory::HitFactoryI object | |
365 is passed in. | |
366 Comments: A SearchIO implementation should provide a default hit factory. | |
367 | |
368 See L<Bio::Factory::HitFactoryI> | |
369 | |
370 =cut | |
371 | |
372 sub hit_factory { | |
373 my ($self, $fact) = @_; | |
374 if( ref $fact and $fact->isa( 'Bio::Factory::HitFactoryI' )) { | |
375 $self->{'_hit_factory'} = $fact; | |
376 } | |
377 elsif( defined $fact ) { | |
378 $self->throw("Can't set HitFactory. Not a Bio::Factory::HitFactoryI: $fact"); | |
379 } | |
380 return $self->{'_hit_factory'}; | |
381 } | |
382 | |
383 =head2 result_factory | |
384 | |
385 Title : result_factory | |
386 Usage : $result_factory = $stream->result_factory; (get) | |
387 : $stream->result_factory( $factory ); (set) | |
388 Function: Sets/Gets a factory object to create result objects for this | |
389 SearchIO. | |
390 Returns : Bio::Factory::ResultFactoryI object | |
391 Args : Bio::Factory::ResultFactoryI object (when setting) | |
392 Throws : Bio::Root::Exception if a non-Bio::Factory::ResultFactoryI object | |
393 is passed in. | |
394 Comments: A SearchIO implementation should provide a default result factory. | |
395 | |
396 See L<Bio::Factory::ResultFactoryI> | |
397 | |
398 =cut | |
399 | |
400 sub result_factory { | |
401 my ($self, $fact) = @_; | |
402 if( ref $fact and $fact->isa( 'Bio::Factory::ResultFactoryI' )) { | |
403 $self->{'_result_factory'} = $fact; | |
404 } | |
405 elsif( defined $fact ) { | |
406 $self->throw("Can't set ResultFactory. Not a Bio::Factory::ResultFactoryI: $fact"); | |
407 } | |
408 return $self->{'_result_factory'}; | |
409 } | |
410 | |
411 | |
412 =head2 result_count | |
413 | |
414 Title : result_count | |
415 Usage : $num = $stream->result_count; | |
416 Function: Gets the number of Blast results that have been parsed. | |
417 Returns : integer | |
418 Args : none | |
419 Throws : none | |
420 | |
421 =cut | |
422 | |
423 sub result_count { | |
424 my $self = shift; | |
425 $self->throw_not_implemented; | |
426 } | |
427 | |
428 | |
429 =head2 default_hit_factory_class | |
430 | |
431 Title : default_hit_factory_class | |
432 Usage : $res_factory = $obj->default_hit_factory_class()->new( @args ) | |
433 Function: Returns the name of the default class to be used for creating | |
434 Bio::Search::Hit::HitI objects. | |
435 Example : | |
436 Returns : A string containing the name of a class that implements | |
437 the Bio::Search::Hit::HitI interface. | |
438 Args : none | |
439 Comments: Bio::SearchIO does not implement this method. It throws a | |
440 NotImplemented exception | |
441 | |
442 See L<Bio::Search::Hit::HitI> | |
443 | |
444 =cut | |
445 | |
446 sub default_hit_factory_class { | |
447 my $self = shift; | |
448 # TODO: Uncomment this when Jason's SearchIO code conforms | |
449 # $self->throw_not_implemented; | |
450 } | |
451 | |
452 =head2 _load_format_module | |
453 | |
454 Title : _load_format_module | |
455 Usage : *INTERNAL SearchIO stuff* | |
456 Function: Loads up (like use) a module at run time on demand | |
457 Example : | |
458 Returns : | |
459 Args : | |
460 | |
461 =cut | |
462 | |
463 sub _load_format_module { | |
464 my ($self,$format) = @_; | |
465 my $module = "Bio::SearchIO::" . $format; | |
466 my $ok; | |
467 | |
468 eval { | |
469 $ok = $self->_load_module($module); | |
470 }; | |
471 if ( $@ ) { | |
472 print STDERR <<END; | |
473 $self: $format cannot be found | |
474 Exception $@ | |
475 For more information about the SearchIO system please see the SearchIO docs. | |
476 This includes ways of checking for formats at compile time, not run time | |
477 END | |
478 ; | |
479 } | |
480 return $ok; | |
481 } | |
482 | |
483 | |
484 =head2 _guess_format | |
485 | |
486 Title : _guess_format | |
487 Usage : $obj->_guess_format($filename) | |
488 Function: | |
489 Example : | |
490 Returns : guessed format of filename (lower case) | |
491 Args : | |
492 | |
493 =cut | |
494 | |
495 | |
496 sub _guess_format { | |
497 my $class = shift; | |
498 return unless $_ = shift; | |
499 return 'blast' if (/blast/i or /\.bl\w$/i); | |
500 return 'fasta' if (/fasta/i or /\.fas$/i); | |
501 return 'blastxml' if (/blast/i and /\.xml$/i); | |
502 return 'exonerate' if ( /\.exonerate/i or /\.exon/i ); | |
503 } | |
504 | |
505 sub close { | |
506 my $self = shift; | |
507 if( $self->writer ) { | |
508 $self->_print($self->writer->end_report()); | |
509 } | |
510 $self->SUPER::close(@_); | |
511 } | |
512 | |
513 sub DESTROY { | |
514 my $self = shift; | |
515 $self->close(); | |
516 } | |
517 | |
518 sub TIEHANDLE { | |
519 my $class = shift; | |
520 return bless {processor => shift}, $class; | |
521 } | |
522 | |
523 sub READLINE { | |
524 my $self = shift; | |
525 return $self->{'processor'}->next_result() unless wantarray; | |
526 my (@list, $obj); | |
527 push @list, $obj while $obj = $self->{'processor'}->next_result(); | |
528 return @list; | |
529 } | |
530 | |
531 sub PRINT { | |
532 my $self = shift; | |
533 $self->{'processor'}->write_result(@_); | |
534 } | |
535 | |
536 1; | |
537 | |
538 __END__ |