diff variant_effect_predictor/Bio/SearchIO/SearchResultEventBuilder.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/SearchIO/SearchResultEventBuilder.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,365 @@
+# $Id: SearchResultEventBuilder.pm,v 1.25.2.1 2003/01/17 20:32:54 jason Exp $
+#
+# BioPerl module for Bio::SearchIO::SearchResultEventBuilder
+#
+# Cared for by Jason Stajich <jason@bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SearchIO::SearchResultEventBuilder - Event Handler for SearchIO events.
+
+=head1 SYNOPSIS
+
+# Do not use this object directly, this object is part of the SearchIO
+# event based parsing system.
+
+=head1 DESCRIPTION
+
+This object handles Search Events generated by the SearchIO classes
+and build appropriate Bio::Search::* objects from them.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l@bioperl.org              - General discussion
+  http://bioperl.org/MailList.shtml  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+email or the web:
+
+  bioperl-bugs@bioperl.org
+  http://bugzilla.bioperl.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason@bioperl.org
+
+Describe contact details here
+
+=head1 CONTRIBUTORS
+
+Additional contributors names and emails here
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SearchIO::SearchResultEventBuilder;
+use vars qw(@ISA %KNOWNEVENTS);
+use strict;
+
+use Bio::Root::Root;
+use Bio::SearchIO::EventHandlerI;
+use Bio::Search::HSP::HSPFactory;
+use Bio::Search::Hit::HitFactory;
+use Bio::Search::Result::ResultFactory;
+
+@ISA = qw(Bio::Root::Root Bio::SearchIO::EventHandlerI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::SearchIO::SearchResultEventBuilder();
+ Function: Builds a new Bio::SearchIO::SearchResultEventBuilder object 
+ Returns : Bio::SearchIO::SearchResultEventBuilder
+ Args    : -hsp_factory    => Bio::Factory::ObjectFactoryI
+           -hit_factory    => Bio::Factory::ObjectFactoryI
+           -result_factory => Bio::Factory::ObjectFactoryI
+
+See L<Bio::Factory::ObjectFactoryI> for more information
+
+=cut
+
+sub new { 
+    my ($class,@args) = @_;
+    my $self = $class->SUPER::new(@args);
+    my ($hspF,$hitF,$resultF) = $self->_rearrange([qw(HSP_FACTORY
+						      HIT_FACTORY
+						      RESULT_FACTORY)],@args);
+    $self->register_factory('hsp', $hspF || Bio::Search::HSP::HSPFactory->new());
+    $self->register_factory('hit', $hitF || Bio::Search::Hit::HitFactory->new());
+    $self->register_factory('result', $resultF || Bio::Search::Result::ResultFactory->new());
+
+    return $self;
+}
+
+# new comes from the superclass
+
+=head2 will_handle
+
+ Title   : will_handle
+ Usage   : if( $handler->will_handle($event_type) ) { ... }
+ Function: Tests if this event builder knows how to process a specific event
+ Returns : boolean
+ Args    : event type name
+
+
+=cut
+
+sub will_handle{
+   my ($self,$type) = @_;
+   # these are the events we recognize
+   return ( $type eq 'hsp' || $type eq 'hit' || $type eq 'result' );
+}
+
+=head2 SAX methods
+
+=cut
+
+=head2 start_result
+
+ Title   : start_result
+ Usage   : $handler->start_result($resulttype)
+ Function: Begins a result event cycle
+ Returns : none 
+ Args    : Type of Report
+
+=cut
+
+sub start_result {
+   my ($self,$type) = @_;
+   $self->{'_resulttype'} = $type;
+   $self->{'_hits'} = [];   
+   $self->{'_hsps'} = [];
+   return;
+}
+
+=head2 end_result
+
+ Title   : end_result
+ Usage   : my @results = $parser->end_result
+ Function: Finishes a result handler cycle 
+ Returns : A Bio::Search::Result::ResultI
+ Args    : none
+
+=cut
+
+sub end_result {
+    my ($self,$type,$data) = @_;    
+    if( defined $data->{'runid'} &&
+	$data->{'runid'} !~ /^\s+$/ ) {	
+
+	if( $data->{'runid'} !~ /^lcl\|/) { 
+	    $data->{"RESULT-query_name"}= $data->{'runid'};
+	} else { 
+	    ($data->{"RESULT-query_name"},$data->{"RESULT-query_description"}) = split(/\s+/,$data->{"RESULT-query_description"},2);
+	}
+	
+	if( my @a = split(/\|/,$data->{'RESULT-query_name'}) ) {
+	    my $acc = pop @a ; # this is for accession |1234|gb|AAABB1.1|AAABB1
+	    # this is for |123|gb|ABC1.1|
+	    $acc = pop @a if( ! defined $acc || $acc =~ /^\s+$/);
+	    $data->{"RESULT-query_accession"}= $acc;
+	}
+	delete $data->{'runid'};
+    }
+    my %args = map { my $v = $data->{$_}; s/RESULT//; ($_ => $v); } 
+               grep { /^RESULT/ } keys %{$data};
+    
+    $args{'-algorithm'} =  uc( $args{'-algorithm_name'} || 
+			       $data->{'RESULT-algorithm_name'} || $type);
+    $args{'-hits'}      =  $self->{'_hits'};
+    my $result = $self->factory('result')->create(%args);
+    $self->{'_hits'} = [];
+    return $result;
+}
+
+=head2 start_hsp
+
+ Title   : start_hsp
+ Usage   : $handler->start_hsp($name,$data)
+ Function: Begins processing a HSP event
+ Returns : none
+ Args    : type of element 
+           associated data (hashref)
+
+=cut
+
+sub start_hsp {
+    my ($self,@args) = @_;
+    return;
+}
+
+=head2 end_hsp
+
+ Title   : end_hsp
+ Usage   : $handler->end_hsp()
+ Function: Finish processing a HSP event
+ Returns : none
+ Args    : type of event and associated hashref
+
+
+=cut
+
+sub end_hsp {
+    my ($self,$type,$data) = @_;
+    # this code is to deal with the fact that Blast XML data
+    # always has start < end and one has to infer strandedness
+    # from the frame which is a problem for the Search::HSP object
+    # which expect to be able to infer strand from the order of 
+    # of the begin/end of the query and hit coordinates
+    if( defined $data->{'HSP-query_frame'} && # this is here to protect from undefs
+	(( $data->{'HSP-query_frame'} < 0 && 
+	   $data->{'HSP-query_start'} < $data->{'HSP-query_end'} ) ||       
+	 $data->{'HSP-query_frame'} > 0 && 
+	 ( $data->{'HSP-query_start'} > $data->{'HSP-query_end'} ) ) 
+	)
+    { 
+	# swap
+	($data->{'HSP-query_start'},
+	 $data->{'HSP-query_end'}) = ($data->{'HSP-query_end'},
+				      $data->{'HSP-query_start'});
+    } 
+    if( defined $data->{'HSP-hit_frame'} && # this is here to protect from undefs
+	((defined $data->{'HSP-hit_frame'} && $data->{'HSP-hit_frame'} < 0 && 
+	  $data->{'HSP-hit_start'} < $data->{'HSP-hit_end'} ) ||       
+	 defined $data->{'HSP-hit_frame'} && $data->{'HSP-hit_frame'} > 0 && 
+	 ( $data->{'HSP-hit_start'} > $data->{'HSP-hit_end'} ) )
+	) 
+    { 
+	# swap
+	($data->{'HSP-hit_start'},
+	 $data->{'HSP-hit_end'}) = ($data->{'HSP-hit_end'},
+				    $data->{'HSP-hit_start'});
+    }
+    $data->{'HSP-query_frame'} ||= 0;
+    $data->{'HSP-hit_frame'} ||= 0;
+    # handle Blast 2.1.2 which did not support data member: hsp_align-len
+    $data->{'HSP-query_length'} ||= length ($data->{'HSP-query_seq'} || '');
+    $data->{'HSP-hit_length'}   ||= length ($data->{'HSP-hit_seq'} || '');
+    $data->{'HSP-hsp_length'}   ||= length ($data->{'HSP-homology_seq'} || '');
+    
+    my %args = map { my $v = $data->{$_}; s/HSP//; ($_ => $v) } 
+               grep { /^HSP/ } keys %{$data};
+    
+    $args{'-algorithm'} =  uc( $args{'-algorithm_name'} || 
+			       $data->{'RESULT-algorithm_name'} || $type);
+    # copy this over from result
+    $args{'-query_name'} = $data->{'RESULT-query_name'};
+    $args{'-hit_name'} = $data->{'HIT-name'};
+    my ($rank) = scalar @{$self->{'_hsps'}} + 1;
+    $args{'-rank'} = $rank;
+
+    my $hsp = $self->factory('hsp')->create(%args);
+    push @{$self->{'_hsps'}}, $hsp;
+    return $hsp;
+}
+
+
+=head2 start_hit
+
+ Title   : start_hit
+ Usage   : $handler->start_hit()
+ Function: Starts a Hit event cycle
+ Returns : none
+ Args    : type of event and associated hashref
+
+
+=cut
+
+sub start_hit{
+    my ($self,$type) = @_;
+    $self->{'_hsps'} = [];    
+    return;
+}
+
+
+=head2 end_hit
+
+ Title   : end_hit
+ Usage   : $handler->end_hit()
+ Function: Ends a Hit event cycle
+ Returns : Bio::Search::Hit::HitI object
+ Args    : type of event and associated hashref
+
+
+=cut
+
+sub end_hit{
+    my ($self,$type,$data) = @_;   
+    my %args = map { my $v = $data->{$_}; s/HIT//; ($_ => $v); } grep { /^HIT/ } keys %{$data};
+
+    # I hate special cases, but this is here because NCBI BLAST XML
+    # doesn't play nice and is undergoing mutation -jason
+    if( $args{'-name'} =~ /BL_ORD_ID/ ) {
+	($args{'-name'}, $args{'-description'}) = split(/\s+/,$args{'-description'},2);
+    }    
+    $args{'-algorithm'} =  uc( $args{'-algorithm_name'} || 
+			       $data->{'RESULT-algorithm_name'} || $type);
+    $args{'-hsps'}      = $self->{'_hsps'};
+    $args{'-query_len'} =  $data->{'RESULT-query_length'};
+    my ($hitrank) = scalar @{$self->{'_hits'}} + 1;
+    $args{'-rank'} = $hitrank;
+    my $hit = $self->factory('hit')->create(%args);
+    push @{$self->{'_hits'}}, $hit;
+    $self->{'_hsps'} = [];
+    return $hit;
+}
+
+=head2 Factory methods
+
+=cut
+
+=head2 register_factory
+
+ Title   : register_factory
+ Usage   : $handler->register_factory('TYPE',$factory);
+ Function: Register a specific factory for a object type class
+ Returns : none
+ Args    : string representing the class and
+           Bio::Factory::ObjectFactoryI
+
+See L<Bio::Factory::ObjectFactoryI> for more information
+
+=cut
+
+sub register_factory{
+   my ($self, $type,$f) = @_;
+   if( ! defined $f || ! ref($f) || 
+       ! $f->isa('Bio::Factory::ObjectFactoryI') ) { 
+       $self->throw("Cannot set factory to value $f".ref($f)."\n");
+   }
+   $self->{'_factories'}->{lc($type)} = $f;
+}
+
+
+=head2 factory
+
+ Title   : factory
+ Usage   : my $f = $handler->factory('TYPE');
+ Function: Retrieves the associated factory for requested 'TYPE'
+ Returns : a Bio::Factory::ObjectFactoryI or undef if none registered
+ Args    : name of factory class to retrieve
+
+See L<Bio::Factory::ObjectFactoryI> for more information
+
+=cut
+
+sub factory{
+   my ($self,$type) = @_;
+   return $self->{'_factories'}->{lc($type)} || $self->throw("No factory registered for $type");
+}
+
+
+1;