Mercurial > repos > mahtabm > ensembl

diff variant_effect_predictor/Bio/Index/Blast.pm @ 0:1f6dce3d34e0
Uploaded
author: mahtabm
date: Thu, 11 Apr 2013 02:01:53 -0400
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/Index/Blast.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,468 @@
+# $Id: Blast.pm,v 1.8.2.1 2003/06/28 21:57:04 jason Exp $
+#
+# BioPerl module for Bio::Index::Blast
+#
+# Cared for by Jason Stajich <jason@cgt.mc.duke.edu>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Index::Blast - Indexes Blast reports and supports retrieval based on query accession(s)
+
+=head1 SYNOPSIS
+
+    use strict;
+    use Bio::Index::Blast;
+    my $index = new Bio::Index::Blast(-filename => $indexfile,
+				      -write_flag => 1);
+    $index->make_index($file1, $file2);
+
+    my $data = $index->get_stream($id);
+
+    my $bplite_report = $index->fetch_report($id);
+    print "query is ", $bplite_report->query, "\n";
+    while( my $sbjct = $bplite_report->nextSbjct ) {
+	print $sbjct->name, "\n";
+	while( my $hsp = $sbjct->nextHSP ) {
+	    print "\t e-value ", $hsp->P,
+	}
+	print "\n";
+    }
+
+=head1 DESCRIPTION
+
+This object allows one to build an index on a blast file (or files)
+and provide quick access to the blast report for that accession.
+Note: for best results 'use strict'.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l@bioperl.org            - General discussion
+http://bioperl.org/MailList.shtml  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+email or the web:
+
+  bioperl-bugs@bioperl.org
+  http://bugzilla.bioperl.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason@cgt.mc.duke.edu
+
+Describe contact details here
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Index::Blast;
+use vars qw(@ISA $VERSION);
+use strict;
+
+use Bio::Root::Root;
+use Bio::Index::Abstract;
+use Bio::Tools::BPlite;
+use IO::String;
+
+@ISA = qw(Bio::Index::Abstract Bio::Root::Root );
+
+BEGIN { 
+    $VERSION = 0.1;
+}
+
+sub _version {
+    return $VERSION;
+}
+
+=head2 new
+
+  Usage   : $index = Bio::Index::Abstract->new(
+                -filename    => $dbm_file,
+                -write_flag  => 0,  
+                -dbm_package => 'DB_File',
+                -verbose     => 0);
+  Function: Returns a new index object.  If filename is
+            specified, then open_dbm() is immediately called. 
+            Bio::Index::Abstract->new() will usually be called
+            directly only when opening an existing index.
+  Returns : A new index object
+  Args    : -filename    The name of the dbm index file.
+            -write_flag  TRUE if write access to the dbm file is
+                         needed.
+            -dbm_package The Perl dbm module to use for the
+                         index.
+            -verbose     Print debugging output to STDERR if
+                         TRUE.
+
+=cut
+
+sub new {
+
+  my($class,@args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+
+}
+
+=head2 Bio::Index::Blast implemented methods
+
+=cut
+
+=head2 fetch_report
+
+ Title   : fetch_report
+ Usage   : my $blastreport = $idx->fetch_report($id);
+ Function: Returns a Bio::Tools::BPlite report object 
+           for a specific blast report
+ Returns : Bio::Tools::BPlite
+ Args    : valid id
+
+=cut
+
+sub fetch_report{
+    my ($self,$id) = @_;
+    my $fh = $self->get_stream($id);
+    my $report = new Bio::Tools::BPlite(-fh      => $fh,
+					-noclose => 1);
+    return $report;
+}
+
+
+# shamlessly stolen from Bio::Index::Fasta
+
+=head2 id_parser
+
+  Title   : id_parser
+  Usage   : $index->id_parser( CODE )
+  Function: Stores or returns the code used by record_id to
+            parse the ID for record from a string.  Useful
+            for (for instance) specifying a different
+            parser for different flavours of blast dbs. 
+            Returns \&default_id_parser (see below) if not
+            set. If you supply your own id_parser
+            subroutine, then it should expect a fasta
+            description line.  An entry will be added to
+            the index for each string in the list returned.
+  Example : $index->id_parser( \&my_id_parser )
+  Returns : ref to CODE if called without arguments
+  Args    : CODE
+
+=cut
+
+sub id_parser {
+    my( $self, $code ) = @_;
+    
+    if ($code) {
+        $self->{'_id_parser'} = $code;
+    }
+    return $self->{'_id_parser'} || \&default_id_parser;
+}
+
+
+
+=head2 default_id_parser
+
+  Title   : default_id_parser
+  Usage   : $id = default_id_parser( $header )
+  Function: The default Blast Query ID parser for Bio::Index::Blast.pm
+            Returns $1 from applying the regexp /^>\s*(\S+)/
+            to $header.
+  Returns : ID string
+  Args    : a header line string
+
+=cut
+
+sub default_id_parser {    
+    if ($_[0] =~ /^\s*(\S+)/) {
+        return $1;
+    } else {
+        return;
+    }
+}
+
+=head2 Require methods from Bio::Index::Abstract
+
+=cut
+
+=head2 _index_file
+
+  Title   : _index_file
+  Usage   : $index->_index_file( $file_name, $i )
+  Function: Specialist function to index BLAST report file(s).
+            Is provided with a filename and an integer
+            by make_index in its SUPER class.
+  Example : 
+  Returns : 
+  Args    : 
+
+=cut
+
+sub _index_file {
+    my( $self,
+        $file, # File name
+        $i,    # Index-number of file being indexed
+        ) = @_;
+    
+    my( $begin,     # Offset from start of file of the start
+                    # of the last found record.
+        );
+
+    open(BLAST, "<$file") or die("cannot open file $file\n");
+    
+    my (@data, @records);
+    my $indexpoint = 0;
+    my $lastline = 0;
+
+    while(<BLAST> ) {	
+	if( /(T)?BLAST[PNX]/ ) {
+	    if( @data ) { 
+		# if we have already read a report
+		# then store the data for this report 
+		# in the CURRENT index
+		$self->_process_report($indexpoint, $i,join("",@data));
+		
+	    } # handle fencepost problem (beginning) 
+	      # by skipping here when empty
+
+	    # since we are at the beginning of a new report
+	    # store this begin location for the next index	   
+	    $indexpoint = $lastline;
+	    @data = ();
+	}
+	push @data, $_;
+	$lastline = tell(BLAST);
+    }
+    # handle fencepost problem (end)
+    if( @data ) {
+	$self->_process_report($indexpoint,$i,join("",@data));
+    }
+}
+
+sub _process_report {
+    my ($self,$begin,$i,$data) = @_;
+    
+    if( ! $data ) { 
+	$self->warn("calling _process_report without a valid data string"); 
+	return ; 
+    }
+    my $id_parser = $self->id_parser;
+
+    my $datal = new IO::String($data);
+    my $report = new Bio::Tools::BPlite(-fh      => $datal,
+					-noclose => 1);
+    
+    my $query = $report->query;		
+    foreach my $id (&$id_parser($query)) {
+	print "id is $id, begin is $begin\n" if( $self->verbose > 0);
+	$self->add_record($id, $i, $begin);
+    }
+}
+=head2 Bio::Index::Abstract methods
+
+=head2 filename
+
+ Title   : filename
+ Usage   : $value = $self->filename();
+           $self->filename($value);
+ Function: Gets or sets the name of the dbm index file.
+ Returns : The current value of filename
+ Args    : Value of filename if setting, or none if
+           getting the value.
+
+=head2 write_flag
+
+ Title   : write_flag
+ Usage   : $value = $self->write_flag();
+           $self->write_flag($value);
+ Function: Gets or sets the value of write_flag, which
+           is wether the dbm file should be opened with
+           write access.
+ Returns : The current value of write_flag (default 0)
+ Args    : Value of write_flag if setting, or none if
+           getting the value.
+
+=head2 dbm_package
+
+ Usage   : $value = $self->dbm_package();
+           $self->dbm_package($value);
+
+ Function: Gets or sets the name of the Perl dbm module used. 
+           If the value is unset, then it returns the value of
+           the package variable $USE_DBM_TYPE or if that is
+           unset, then it chooses the best available dbm type,
+           choosing 'DB_File' in preference to 'SDBM_File'. 
+           Bio::Abstract::Index may work with other dbm file
+           types.
+
+ Returns : The current value of dbm_package
+ Args    : Value of dbm_package if setting, or none if
+           getting the value.
+
+
+=head2 get_stream
+
+ Title   : get_stream
+ Usage   : $stream = $index->get_stream( $id );
+ Function: Returns a file handle with the file pointer
+           at the approprite place
+
+           This provides for a way to get the actual
+           file contents and not an object 
+
+           WARNING: you must parse the record deliminter
+           *yourself*. Abstract wont do this for you 
+           So this code
+
+           $fh = $index->get_stream($myid);
+           while( <$fh> ) {
+              # do something
+           }
+           will parse the entire file if you don't put in
+           a last statement in, like
+
+           while( <$fh> ) {
+              /^\/\// && last; # end of record
+              # do something
+           }
+
+ Returns : A filehandle object
+ Args    : string represents the accession number
+ Notes   : This method should not be used without forethought 
+
+
+=head2 open_dbm
+
+  Usage   : $index->open_dbm()
+  Function: Opens the dbm file associated with the index
+            object.  Write access is only given if explicitly
+            asked for by calling new(-write => 1) or having set
+            the write_flag(1) on the index object.  The type of
+            dbm file opened is that returned by dbm_package(). 
+            The name of the file to be is opened is obtained by
+            calling the filename() method.
+
+  Example : $index->_open_dbm()
+  Returns : 1 on success
+
+
+=head2 _version
+
+  Title   : _version
+  Usage   : $type = $index->_version()
+  Function: Returns a string which identifes the version of an
+            index module.  Used to permanently identify an index
+            file as having been created by a particular version
+            of the index module.  Must be provided by the sub class
+  Example : 
+  Returns : 
+  Args    : none
+
+=head2 _filename
+
+  Title   : _filename
+  Usage   : $index->_filename( FILE INT )
+  Function: Indexes the file
+  Example : 
+  Returns : 
+  Args    : 
+
+=head2 _file_handle
+
+  Title   : _file_handle
+  Usage   : $fh = $index->_file_handle( INT )
+  Function: Returns an open filehandle for the file
+            index INT.  On opening a new filehandle it
+            caches it in the @{$index->_filehandle} array.
+            If the requested filehandle is already open,
+            it simply returns it from the array.
+  Example : $fist_file_indexed = $index->_file_handle( 0 );
+  Returns : ref to a filehandle
+  Args    : INT
+
+=head2 _file_count
+
+  Title   : _file_count
+  Usage   : $index->_file_count( INT )
+  Function: Used by the index building sub in a sub class to
+            track the number of files indexed.  Sets or gets
+            the number of files indexed when called with or
+            without an argument.
+  Example : 
+  Returns : INT
+  Args    : INT
+
+
+=head2 add_record
+
+  Title   : add_record
+  Usage   : $index->add_record( $id, @stuff );
+  Function: Calls pack_record on @stuff, and adds the result
+            of pack_record to the index database under key $id.
+            If $id is a reference to an array, then a new entry
+            is added under a key corresponding to each element
+            of the array.
+  Example : $index->add_record( $id, $fileNumber, $begin, $end )
+  Returns : TRUE on success or FALSE on failure
+  Args    : ID LIST
+
+=head2 pack_record
+
+  Title   : pack_record
+  Usage   : $packed_string = $index->pack_record( LIST )
+  Function: Packs an array of scalars into a single string
+            joined by ASCII 034 (which is unlikely to be used
+            in any of the strings), and returns it. 
+  Example : $packed_string = $index->pack_record( $fileNumber, $begin, $end )
+  Returns : STRING or undef
+  Args    : LIST
+
+=head2 unpack_record
+
+  Title   : unpack_record
+  Usage   : $index->unpack_record( STRING )
+  Function: Splits the sting provided into an array,
+            splitting on ASCII 034.
+  Example : ( $fileNumber, $begin, $end ) = $index->unpack_record( $self->db->{$id} )
+  Returns : A 3 element ARRAY
+  Args    : STRING containing ASCII 034
+
+=head2 DESTROY
+
+ Title   : DESTROY
+ Usage   : Called automatically when index goes out of scope
+ Function: Closes connection to database and handles to
+           sequence files
+ Returns : NEVER
+ Args    : NONE
+
+
+=cut
+
+
+1;
+
+
+
+1;
author	mahtabm
date	Thu, 11 Apr 2013 02:01:53 -0400
parents
children