diff variant_effect_predictor/Bio/Factory/BlastHitFactory.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/Factory/BlastHitFactory.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,249 @@
+#-----------------------------------------------------------------
+# $Id: BlastHitFactory.pm,v 1.7 2002/10/22 09:38:09 sac Exp $
+#
+# BioPerl module for Bio::Factory::BlastHitFactory
+#
+# Cared for by Steve Chervitz <sac@bioperl.org>
+#
+# You may distribute this module under the same terms as perl itself
+#-----------------------------------------------------------------
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Factory::BlastHitFactory - Factory for Bio::Search::Hit::BlastHit objects
+
+=head1 SYNOPSIS
+
+    use Bio::Factory::BlastHitFactory;
+
+    my $hit_fact = Bio::Factory::BlastHitFactory->new();
+
+    my $hit = $hit_fact->create_hit( %parameters );
+
+See documentation for create_hit() for information about C<%parameters>.
+
+=head1 DESCRIPTION
+
+This module encapsulates code for creating Bio::Search::Hit::BlastHit
+and Bio::Search::HSP::BlastHSP objects from traditional BLAST report
+data (i.e., non-XML formatted).
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+ to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+
+  bioperl-l@bioperl.org                - General discussion
+  http://bioperl.org/MailList.html     - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via email
+or the web:
+
+  bioperl-bugs@bio.perl.org
+  http://bugzilla.bioperl.org/
+
+=head1 AUTHOR 
+
+Steve Chervitz E<lt>sac@bioperl.orgE<gt>
+
+See L<the FEEDBACK section | FEEDBACK> for where to send bug reports and comments.
+
+=head1 COPYRIGHT
+
+Copyright (c) 2001 Steve Chervitz. All Rights Reserved.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
+
+=cut
+
+#'
+
+package Bio::Factory::BlastHitFactory;
+
+use strict;
+use Bio::Root::Root;
+use Bio::Factory::HitFactoryI;
+use Bio::Search::Hit::BlastHit;
+
+use vars qw(@ISA);
+
+@ISA = qw(Bio::Root::Root Bio::Factory::HitFactoryI); 
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+    return $self;
+}
+
+=head2 create_hit
+
+ Title   : create_hit
+ Usage   : $hit = $factory->create_hit( %params );
+ Function: Creates a new Bio::Search::Hit::BlastHit object given 
+           raw BLAST report data, formatted in traditional BLAST report format.
+ Returns : A single Bio::Search::Hit::BlastHit object
+ Args    : Named parameters to be passed to the BlastHit object.
+           Parameter keys are case-insensitive.
+           See Bio::Search::Hit::BlastHit::new() documentation for 
+           details about these parameters.
+           The only additional parameter required is:
+              -RESULT    => a Bio::Search::Result::BlastResult object.
+           From this result object, the program, query length, 
+           and iteration are obtained and passed on to the BlastHit.
+
+=cut
+
+sub create_hit {
+    my ($self, @args) = @_;
+
+    my ($blast, $raw_data, $shallow_parse) =
+      $self->_rearrange( [qw(RESULT
+			     RAW_DATA
+			     SHALLOW_PARSE)], @args);
+
+    my %args = @args;
+    $args{'-PROGRAM'}   = $blast->analysis_method;
+    $args{'-QUERY_LEN'} = $blast->query_length;
+    $args{'-ITERATION'} = $blast->iterations;
+
+    my $hit = Bio::Search::Hit::BlastHit->new( %args );
+    
+    unless( $shallow_parse ) {
+      $self->_add_hsps( $hit, 
+			$args{'-PROGRAM'}, 
+			$args{'-QUERY_LEN'}, 
+			$blast->query_name, 
+			@{$raw_data} );
+    }
+
+    return $hit;
+}
+
+#=head2 _add_hsps
+#
+# Usage     : Private method; called automatically by create_hit().
+# Purpose   : Creates BlastHSP.pm objects for each HSP in a BLAST hit alignment.
+#           : Also collects the full description of the hit from the
+#           : HSP alignment section.
+# Returns   : n/a
+# Argument  : (<$BlastHit_object>, <$program_name>, <$query_length>, <$query_name>, <@raw_data>
+#             'raw data list' consists of traditional BLAST report 
+#             format for a single HSP, supplied as a list of strings.
+# Throws    : Warnings for each BlastHSP.pm object that fails to be constructed.
+#           : Exception if no BlastHSP.pm objects can be constructed.
+#           : Exception if can't parse length data for hit sequence.
+# Comments  : Requires Bio::Search::HSP::BlastHSP.pm.
+#           : Sets the description using the full string present in 
+#           : the alignment data.
+#=cut
+
+#--------------
+sub _add_hsps { 
+#--------------
+    my( $self, $hit, $prog, $qlen, $qname, @data ) = @_;
+    my $start     = 0;
+    my $hspCount  = 0;
+
+    require Bio::Search::HSP::BlastHSP;
+
+#    printf STDERR "\nBlastHit \"$hit\" _process_hsps(). \nDATA (%d lines) =\n@data\n", scalar(@data);
+
+    my( @hspData, @hspList, @errs, @bad_names );
+    my($line, $set_desc, @desc);
+    $set_desc = 0;
+    my $hname = $hit->name;
+    my $hlen;
+
+    hit_loop:
+   foreach $line( @data ) {
+
+       if( $line =~ /^\s*Length = ([\d,]+)/ ) {
+	   $hit->_set_description(@desc);
+	   $set_desc = 1;
+	   $hit->_set_length($1);
+           $hlen = $hit->length;
+	   next hit_loop;
+       } elsif( !$set_desc) {
+	   $line =~ s/^\s+|\s+$//g;
+	   push @desc, $line;
+	   next hit_loop;
+       } elsif( $line =~ /^\s*Score/ ) {
+	   ## This block is for setting multiple HSPs.
+
+	   if( not scalar @hspData ) {
+	       $start = 1; 
+	       push @hspData, $line; 
+	       next hit_loop;
+
+	    } elsif( scalar @hspData) {  
+		$hspCount++;
+		$self->verbose and do{ print STDERR +( $hspCount % 10 ? "+" : "+\n" ); };
+
+#		print STDERR "\nBlastHit: setting HSP #$hspCount \n@hspData\n";
+		my $hspObj =  Bio::Search::HSP::BlastHSP->new
+				      (-RAW_DATA   => \@hspData, 
+				       -RANK       => $hspCount,
+				       -PROGRAM    => $prog,
+				       -QUERY_NAME => $qname,
+				       -HIT_NAME   => $hname,
+				      ); 
+		push @hspList, $hspObj;
+		@hspData = ();
+		push @hspData, $line;
+		next;
+	   } else {
+	       push @hspData, $line;
+	   }
+       } elsif( $start ) {
+	   ## This block is for setting the last HSP (which may be the first as well!).
+	   if( $line =~ /^(end|>|Parameters|CPU|Database:)/ ) {
+	       $hspCount++;
+	       $self->verbose and do{ print STDERR +( $hspCount % 10 ? "+" : "+\n" ); };
+
+#	       print STDERR "\nBlastHit: setting HSP #$hspCount \n@hspData"; 
+
+	       my $hspObj = Bio::Search::HSP::BlastHSP->new
+				     (-RAW_DATA   => \@hspData, 
+				      -RANK       => $hspCount,
+				      -PROGRAM    => $prog,
+				      -QUERY_NAME => $qname,
+				      -HIT_NAME   => $hname,
+				     );
+	       push @hspList, $hspObj;
+	   } else {
+	       push @hspData, $line;
+	   }
+       }
+   }		
+
+    $hit->{'_length'} or $self->throw( "Can't determine hit sequence length.");
+
+    # Adjust logical length based on BLAST flavor.
+    if($prog =~ /TBLAST[NX]/) {
+	$hit->{'_logical_length'} = $hit->{'_length'} / 3;
+    }
+
+    $hit->{'_hsps'} = [ @hspList ];
+
+#    print STDERR "\n--------> Done building HSPs for $hit (total HSPS: ${\$hit->num_hsps})\n";
+
+}
+
+
+
+1;