diff variant_effect_predictor/Bio/Index/Fasta.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/Index/Fasta.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,223 @@
+#
+# $Id: Fasta.pm,v 1.20 2002/10/22 07:38:33 lapp Exp $
+#
+# BioPerl module for Bio::Index::Fasta
+#
+# Cared for by James Gilbert <jgrg@sanger.ac.uk>
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Index::Fasta - Interface for indexing (multiple) fasta files
+
+=head1 SYNOPSIS
+
+    # Complete code for making an index for several
+    # fasta files
+    use Bio::Index::Fasta;
+    use strict;
+
+    my $Index_File_Name = shift;
+    my $inx = Bio::Index::Fasta->new(
+        '-filename' => $Index_File_Name,
+        '-write_flag' => 1);
+    $inx->make_index(@ARGV);
+
+    # Print out several sequences present in the index
+    # in Fasta format
+    use Bio::Index::Fasta;
+    use strict;
+
+    my $Index_File_Name = shift;
+    my $inx = Bio::Index::Fasta->new('-filename' => $Index_File_Name);
+    my $out = Bio::SeqIO->new('-format' => 'Fasta','-fh' => \*STDOUT);
+
+    foreach my $id (@ARGV) {
+        my $seq = $inx->fetch($id); # Returns Bio::Seq object
+	$out->write_seq($seq);
+    }
+
+    # or, alternatively
+
+    my $seq = $inx->get_Seq_by_id($id); #identical to fetch
+
+=head1 DESCRIPTION
+
+Inherits functions for managing dbm files from Bio::Index::Abstract.pm,
+and provides the basic funtionallity for indexing fasta files, and
+retrieving the sequence from them. Note: for best results 'use strict'.
+
+Bio::Index::Fasta supports the Bio::DB::BioSeqI interface, meaning
+it can be used a a Sequence database for other parts of bioperl
+
+=head1 FEED_BACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l@bioperl.org             - General discussion
+  http://bioperl.org/MailList.shtml - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via
+email or the web:
+
+  bioperl-bugs@bio.perl.org
+  http://bugzilla.bioperl.org/
+
+=head1 AUTHOR - James Gilbert
+
+Email - jgrg@sanger.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Index::Fasta;
+
+use vars qw($VERSION @ISA);
+use strict;
+
+use Bio::Index::AbstractSeq;
+use Bio::Seq;
+
+@ISA = qw(Bio::Index::AbstractSeq);
+
+#
+# Suggested fix by Michael G Schwern <schwern@pobox.com> to
+# get around a clash with CPAN shell...
+#
+
+BEGIN { 
+    $VERSION = 0.2;
+}
+
+sub _version {
+    return $VERSION;
+}
+
+=head2 _file_format
+
+ Title   : _file_format
+ Function: The file format for this package, which is needed
+           by the SeqIO system when reading the sequence.
+ Returns : 'Fasta'
+
+=cut
+
+sub _file_format {
+    return 'Fasta';
+}
+
+
+
+=head2 _index_file
+
+  Title   : _index_file
+  Usage   : $index->_index_file( $file_name, $i )
+  Function: Specialist function to index FASTA format files.
+            Is provided with a filename and an integer
+            by make_index in its SUPER class.
+  Example : 
+  Returns : 
+  Args    : 
+
+=cut
+
+sub _index_file {
+    my( $self,
+        $file, # File name
+        $i,    # Index-number of file being indexed
+        ) = @_;
+    
+    my( $begin,     # Offset from start of file of the start
+                    # of the last found record.
+        );
+
+    $begin = 0;
+
+    my $id_parser = $self->id_parser;
+
+    open FASTA, $file or $self->throw("Can't open file for read : $file");
+
+    # Main indexing loop
+    while (<FASTA>) {
+        if (/^>/) {
+            # $begin is the position of the first character after the '>'
+            my $begin = tell(FASTA) - length( $_ ) + 1;
+	    
+            foreach my $id (&$id_parser($_)) {
+		$self->add_record($id, $i, $begin);
+            }
+        }
+    }
+
+    close FASTA;
+    return 1;
+}
+
+=head2 id_parser
+
+  Title   : id_parser
+  Usage   : $index->id_parser( CODE )
+  Function: Stores or returns the code used by record_id to
+            parse the ID for record from a string.  Useful
+            for (for instance) specifying a different
+            parser for different flavours of FASTA file. 
+            Returns \&default_id_parser (see below) if not
+            set. If you supply your own id_parser
+            subroutine, then it should expect a fasta
+            description line.  An entry will be added to
+            the index for each string in the list returned.
+  Example : $index->id_parser( \&my_id_parser )
+  Returns : ref to CODE if called without arguments
+  Args    : CODE
+
+=cut
+
+sub id_parser {
+    my( $self, $code ) = @_;
+    
+    if ($code) {
+        $self->{'_id_parser'} = $code;
+    }
+    return $self->{'_id_parser'} || \&default_id_parser;
+}
+
+
+
+=head2 default_id_parser
+
+  Title   : default_id_parser
+  Usage   : $id = default_id_parser( $header )
+  Function: The default Fasta ID parser for Fasta.pm
+            Returns $1 from applying the regexp /^>\s*(\S+)/
+            to $header.
+  Returns : ID string
+  Args    : a fasta header line string
+
+=cut
+
+sub default_id_parser {    
+    if ($_[0] =~ /^>\s*(\S+)/) {
+        return $1;
+    } else {
+        return;
+    }
+}
+
+1;