diff variant_effect_predictor/Bio/SearchIO.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/SearchIO.pm	Thu Apr 11 06:29:17 2013 -0400
@@ -0,0 +1,538 @@
+# $Id: SearchIO.pm,v 1.18 2002/12/13 13:54:03 jason Exp $
+#
+# BioPerl module for Bio::SearchIO
+#
+# Cared for by Jason Stajich <jason@bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SearchIO - Driver for parsing Sequence Database Searches (Blast,FASTA,...)
+
+=head1 SYNOPSIS
+
+    use Bio::SearchIO;
+    # format can be 'fasta', 'blast'
+    my $searchio = new Bio::SearchIO( -format => 'blastxml',
+                                      -file   => 'blastout.xml' );
+    while ( my $result = $searchio->next_result() ) {
+       while( my $hit = $result->next_hit ) {
+	# process the Bio::Search::Hit::HitI object
+           while( my $hsp = $hit->next_hsp ) { 
+	    # process the Bio::Search::HSP::HSPI object
+	}
+    }
+
+=head1 DESCRIPTION
+
+This is a driver for instantiating a parser for report files from
+sequence database searches. This object serves as a wrapper for the
+format parsers in Bio::SearchIO::* - you should not need to ever
+use those format parsers directly. (For people used to the SeqIO
+system it, we are deliberately using the same pattern).
+
+Once you get a SearchIO object, calling next_result() gives you back
+a L<Bio::Search::Result::ResultI> compliant object, which is an object that
+represents one Blast/Fasta/HMMER whatever report.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l@bioperl.org              - General discussion
+  http://bioperl.org/MailList.shtml  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+email or the web:
+
+  bioperl-bugs@bioperl.org
+  http://bugzilla.bioperl.org/
+
+=head1 AUTHOR - Jason Stajich & Steve Chervitz
+
+Email jason@bioperl.org
+Email sac@bioperl.org
+
+=head1 CONTRIBUTORS
+
+Additional contributors names and emails here
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SearchIO;
+use strict;
+use vars qw(@ISA);
+
+# Object preamble - inherits from Bio::Root::IO
+
+use Bio::Root::IO;
+use Bio::Event::EventGeneratorI;
+use Bio::SearchIO::SearchResultEventBuilder;
+use Bio::AnalysisParserI;
+use Symbol();
+
+@ISA = qw( Bio::Root::IO Bio::Event::EventGeneratorI Bio::AnalysisParserI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::SearchIO();
+ Function: Builds a new Bio::SearchIO object 
+ Returns : Bio::SearchIO initialized with the correct format
+ Args    : Args    : -file => $filename
+           -format => format
+           -fh => filehandle to attach to
+           -result_factory => Object implementing Bio::Factory::ResultFactoryI
+           -hit_factory    => Object implementing Bio::Factory::HitFactoryI
+           -writer         => Object implementing Bio::SearchIO::SearchWriterI
+           -output_format  => output format, which will dynamically load writer
+
+See L<Bio::Factory::ResultFactoryI>, L<Bio::Factory::HitFactoryI>,
+L<Bio::SearchIO::SearchWriterI>
+
+=cut
+
+sub new {
+  my($caller,@args) = @_;
+  my $class = ref($caller) || $caller;
+    
+  # or do we want to call SUPER on an object if $caller is an
+  # object?
+  if( $class =~ /Bio::SearchIO::(\S+)/ ) {
+    my ($self) = $class->SUPER::new(@args);	
+    $self->_initialize(@args);
+    return $self;
+  } else { 
+    my %param = @args;
+    @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
+    my $format = $param{'-format'} ||
+      $class->_guess_format( $param{'-file'} || $ARGV[0] ) || 'blast';
+
+    my $output_format = $param{'-output_format'};
+    my $writer = undef;
+
+    if( defined $output_format ) {
+	if( defined $param{'-writer'} ) {
+	    my $dummy = Bio::Root::Root->new();
+	    $dummy->throw("Both writer and output format specified - not good");
+	}
+
+	if( $output_format =~ /^blast$/i ) {
+	    $output_format = 'TextResultWriter';
+	}
+	my $output_module = "Bio::SearchIO::Writer::".$output_format;
+	$class->_load_module($output_module);
+	$writer = $output_module->new();
+	push(@args,"-writer",$writer);
+    }
+
+
+    # normalize capitalization to lower case
+    $format = "\L$format";
+    
+    return undef unless( $class->_load_format_module($format) );
+    return "Bio::SearchIO::${format}"->new(@args);
+  }
+}
+
+=head2 newFh
+
+ Title   : newFh
+ Usage   : $fh = Bio::SearchIO->newFh(-file=>$filename,
+                                      -format=>'Format')
+ Function: does a new() followed by an fh()
+ Example : $fh = Bio::SearchIO->newFh(-file=>$filename,
+                                      -format=>'Format')
+           $result = <$fh>;   # read a ResultI object
+           print $fh $result; # write a ResultI object
+ Returns : filehandle tied to the Bio::SearchIO::Fh class
+ Args    :
+
+=cut
+
+sub newFh {
+  my $class = shift;
+  return unless my $self = $class->new(@_);
+  return $self->fh;
+}
+
+=head2 fh
+
+ Title   : fh
+ Usage   : $obj->fh
+ Function:
+ Example : $fh = $obj->fh;      # make a tied filehandle
+           $result = <$fh>;     # read a ResultI object
+           print $fh $result;   # write a ResultI object
+ Returns : filehandle tied to the Bio::SearchIO::Fh class
+ Args    :
+
+=cut
+
+
+sub fh {
+  my $self = shift;
+  my $class = ref($self) || $self;
+  my $s = Symbol::gensym;
+  tie $$s,$class,$self;
+  return $s;
+}
+
+=head2 attach_EventHandler
+
+ Title   : attach_EventHandler
+ Usage   : $parser->attatch_EventHandler($handler)
+ Function: Adds an event handler to listen for events
+ Returns : none
+ Args    : Bio::SearchIO::EventHandlerI
+
+See L<Bio::SearchIO::EventHandlerI>
+
+=cut
+
+sub attach_EventHandler{
+    my ($self,$handler) = @_;
+    return if( ! $handler );
+    if( ! $handler->isa('Bio::SearchIO::EventHandlerI') ) {
+	$self->warn("Ignoring request to attatch handler ".ref($handler). ' because it is not a Bio::SearchIO::EventHandlerI');
+    }
+    $self->{'_handler'} = $handler;
+    return;
+}
+
+=head2 _eventHandler
+
+ Title   : _eventHandler
+ Usage   : private
+ Function: Get the EventHandler
+ Returns : Bio::SearchIO::EventHandlerI
+ Args    : none
+
+See L<Bio::SearchIO::EventHandlerI>
+
+=cut
+
+sub _eventHandler{
+   my ($self) = @_;
+   return $self->{'_handler'};
+}
+
+sub _initialize {
+    my($self, @args) = @_;
+    $self->{'_handler'} = undef;
+    # not really necessary unless we put more in RootI
+    #$self->SUPER::_initialize(@args);
+    
+    # initialize the IO part
+    $self->_initialize_io(@args);
+    $self->attach_EventHandler(new Bio::SearchIO::SearchResultEventBuilder());
+    $self->{'_reporttype'} = '';
+
+    my ( $writer, $rfactory, $hfactory, $use_factories ) =
+      $self->_rearrange([qw(WRITER 
+			    RESULT_FACTORY 
+			    HIT_FACTORY
+			    USE_FACTORIES)], @args);
+
+    $self->writer( $writer ) if $writer;
+
+    # TODO: Resolve this issue:
+    # The $use_factories flag is a temporary hack to allow factory-based and 
+    # non-factory based SearchIO objects to co-exist. 
+    # steve --- Sat Dec 22 04:41:20 2001
+    if( $use_factories) {
+      if( not defined $self->{'_result_factory'}) {
+	$self->result_factory( $rfactory || $self->default_result_factory_class->new );
+      }
+      if( not defined $self->{'_hit_factory'}) {
+	$self->hit_factory( $hfactory || $self->default_hit_factory_class->new );
+      }
+    }
+}
+
+=head2 next_result
+
+ Title   : next_result
+ Usage   : $result = stream->next_result
+ Function: Reads the next ResultI object from the stream and returns it.
+
+           Certain driver modules may encounter entries in the stream that
+           are either misformatted or that use syntax not yet understood
+           by the driver. If such an incident is recoverable, e.g., by
+           dismissing a feature of a feature table or some other non-mandatory
+           part of an entry, the driver will issue a warning. In the case
+           of a non-recoverable situation an exception will be thrown.
+           Do not assume that you can resume parsing the same stream after
+           catching the exception. Note that you can always turn recoverable
+           errors into exceptions by calling $stream->verbose(2) (see
+           Bio::Root::RootI POD page).
+ Returns : A Bio::Search::Result::ResultI object
+ Args    : n/a
+
+See L<Bio::Root::RootI>
+
+=cut
+
+sub next_result {
+   my ($self) = @_;
+   $self->throw_not_implemented;
+}
+
+=head2 write_result
+
+ Title   : write_result
+ Usage   : $stream->write_result($result_result, @other_args)
+ Function: Writes data from the $result_result object into the stream.
+         : Delegates to the to_string() method of the associated 
+         : WriterI object.
+ Returns : 1 for success and 0 for error
+ Args    : Bio::Search:Result::ResultI object,
+         : plus any other arguments for the Writer
+ Throws  : Bio::Root::Exception if a Writer has not been set.
+
+See L<Bio::Root::Exception>
+
+=cut
+
+sub write_result {
+   my ($self, $result, @args) = @_;
+
+   if( not ref($self->{'_result_writer'}) ) {
+       $self->throw("ResultWriter not defined.");
+   }
+   my $str = $self->writer->to_string( $result, @args );
+   #print "Got string: \n$str\n";
+   $self->_print( "$str" );
+   
+   return 1;
+}
+
+
+=head2 writer
+
+ Title   : writer
+ Usage   : $writer = $stream->writer;
+ Function: Sets/Gets a SearchWriterI object to be used for this searchIO.
+ Returns : 1 for success and 0 for error
+ Args    : Bio::SearchIO::SearchWriterI object (when setting)
+ Throws  : Bio::Root::Exception if a non-Bio::SearchIO::SearchWriterI object
+           is passed in.
+
+=cut
+
+sub writer {
+    my ($self, $writer) = @_;
+    if( ref($writer) and $writer->isa( 'Bio::SearchIO::SearchWriterI' )) {
+        $self->{'_result_writer'} = $writer;
+    }
+    elsif( defined $writer ) {
+        $self->throw("Can't set ResultWriter. Not a Bio::SearchIO::SearchWriterI: $writer");
+    }
+    return $self->{'_result_writer'};
+}
+
+
+=head2 hit_factory
+
+ Title   : hit_factory
+ Usage   : $hit_factory = $stream->hit_factory;  (get)
+         : $stream->hit_factory( $factory );     (set)
+ Function: Sets/Gets a factory object to create hit objects for this SearchIO
+ Returns : Bio::Factory::HitFactoryI object 
+ Args    : Bio::Factory::HitFactoryI object (when setting)
+ Throws  : Bio::Root::Exception if a non-Bio::Factory::HitFactoryI object  
+           is passed in.
+ Comments: A SearchIO implementation should provide a default hit factory.
+
+See L<Bio::Factory::HitFactoryI>
+
+=cut
+
+sub hit_factory {
+    my ($self, $fact) = @_;
+    if( ref $fact and $fact->isa( 'Bio::Factory::HitFactoryI' )) {
+    	   $self->{'_hit_factory'} = $fact;
+    }
+    elsif( defined $fact ) {
+        $self->throw("Can't set HitFactory. Not a Bio::Factory::HitFactoryI: $fact");
+    }
+    return $self->{'_hit_factory'};
+}
+
+=head2 result_factory
+
+ Title   : result_factory
+ Usage   : $result_factory = $stream->result_factory;  (get)
+         : $stream->result_factory( $factory );        (set)
+ Function: Sets/Gets a factory object to create result objects for this
+           SearchIO.
+ Returns : Bio::Factory::ResultFactoryI object 
+ Args    : Bio::Factory::ResultFactoryI object (when setting)
+ Throws  : Bio::Root::Exception if a non-Bio::Factory::ResultFactoryI object
+           is passed in.
+ Comments: A SearchIO implementation should provide a default result factory.
+
+See L<Bio::Factory::ResultFactoryI>
+
+=cut
+
+sub result_factory {
+    my ($self, $fact) = @_;
+    if( ref $fact and $fact->isa( 'Bio::Factory::ResultFactoryI' )) {
+    	   $self->{'_result_factory'} = $fact;
+    }
+    elsif( defined $fact ) {
+        $self->throw("Can't set ResultFactory. Not a Bio::Factory::ResultFactoryI: $fact");
+    }
+    return $self->{'_result_factory'};
+}
+
+
+=head2 result_count
+
+ Title   : result_count
+ Usage   : $num = $stream->result_count;
+ Function: Gets the number of Blast results that have been parsed.
+ Returns : integer
+ Args    : none
+ Throws  : none
+
+=cut
+
+sub result_count {
+    my $self = shift;
+    $self->throw_not_implemented;
+}
+
+
+=head2 default_hit_factory_class
+
+ Title   : default_hit_factory_class
+ Usage   : $res_factory = $obj->default_hit_factory_class()->new( @args )
+ Function: Returns the name of the default class to be used for creating
+           Bio::Search::Hit::HitI objects.
+ Example :
+ Returns : A string containing the name of a class that implements 
+           the Bio::Search::Hit::HitI interface.
+ Args    : none
+ Comments: Bio::SearchIO does not implement this method. It throws a
+           NotImplemented exception
+
+See L<Bio::Search::Hit::HitI>
+
+=cut
+
+sub default_hit_factory_class {
+    my $self = shift;
+# TODO: Uncomment this when Jason's SearchIO code conforms
+#    $self->throw_not_implemented;
+}
+
+=head2 _load_format_module
+
+ Title   : _load_format_module
+ Usage   : *INTERNAL SearchIO stuff*
+ Function: Loads up (like use) a module at run time on demand
+ Example : 
+ Returns : 
+ Args    : 
+
+=cut
+
+sub _load_format_module {
+  my ($self,$format) = @_;
+  my $module = "Bio::SearchIO::" . $format;
+  my $ok;
+  
+  eval {
+      $ok = $self->_load_module($module);
+  };
+  if ( $@ ) {
+      print STDERR <<END;
+$self: $format cannot be found
+Exception $@
+For more information about the SearchIO system please see the SearchIO docs.
+This includes ways of checking for formats at compile time, not run time
+END
+  ;
+  }
+  return $ok;
+}
+
+
+=head2 _guess_format
+
+ Title   : _guess_format
+ Usage   : $obj->_guess_format($filename)
+ Function:
+ Example :
+ Returns : guessed format of filename (lower case)
+ Args    :
+
+=cut
+
+
+sub _guess_format {
+   my $class = shift;
+   return unless $_ = shift;
+   return 'blast'   if (/blast/i or /\.bl\w$/i);
+   return 'fasta' if (/fasta/i or /\.fas$/i);
+   return 'blastxml' if (/blast/i and /\.xml$/i);
+   return 'exonerate' if ( /\.exonerate/i or /\.exon/i );
+}
+
+sub close { 
+    my $self = shift;
+    if( $self->writer ) {
+	$self->_print($self->writer->end_report());
+    }
+    $self->SUPER::close(@_);
+}
+
+sub DESTROY {
+    my $self = shift;
+    $self->close();
+}
+
+sub TIEHANDLE {
+  my $class = shift;
+  return bless {processor => shift}, $class;
+}
+
+sub READLINE {
+  my $self = shift;
+  return $self->{'processor'}->next_result() unless wantarray;
+  my (@list, $obj);
+  push @list, $obj while $obj = $self->{'processor'}->next_result();
+  return @list;
+}
+
+sub PRINT {
+  my $self = shift;
+  $self->{'processor'}->write_result(@_);
+}
+
+1;
+
+__END__