diff variant_effect_predictor/Bio/DB/FileCache.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/DB/FileCache.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,323 @@
+# POD documentation - main docs before the code
+#
+#
+
+=head1 NAME
+
+Bio::DB::FileCache - In file cache for BioSeq objects
+
+=head1 SYNOPSIS
+
+
+
+  $cachedb = Bio::DB::FileCache->new($real_db);
+
+  #
+  # $real_db is a Bio::DB::RandomAccessI database
+  #
+
+  $seq = $cachedb->get_Seq_by_id('ROA1_HUMAN');
+
+  #
+  # $seq is a Bio::Seq object
+  #
+
+  # more control provided with named-parameter form
+
+  $cachedb = Bio::DB::FileCache->new( -seqdb => $real_db,
+				      -file  => $path,
+				      -keep  => $flag,
+				    );
+=head1 DESCRIPTION
+
+This is a disk cache system which saves the objects returned by
+Bio::DB::RandomAccessI on disk.  The disk cache grows without limit,
+while the process is running, but is automatically unlinked at process
+termination unless the -keep flag is set.
+
+This module requires DB_File and Storable.
+
+=head1 CONTACT
+
+Lincoln Stein
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via email
+or the web:
+
+    bioperl-bugs@bio.perl.org
+    http://bugzilla.bioperl.org/
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::DB::FileCache;
+
+use DB_File;
+use Storable qw(freeze thaw);
+use Fcntl qw(O_CREAT O_RDWR O_RDONLY);
+use File::Temp 'tmpnam';
+
+use vars qw(@ISA);
+use strict;
+
+use Bio::Root::Root;
+
+@ISA = qw(Bio::Root::Root Bio::DB::SeqI);
+
+use Bio::DB::SeqI;
+use Bio::Seq::RichSeq;
+use Bio::Location::Split;
+use Bio::Location::Fuzzy;
+use Bio::Seq;
+use Bio::SeqFeature::Generic;
+use Bio::Species;
+use Bio::Annotation::Collection;
+
+=head2 new
+
+ Title   : new
+ Usage   : $db = Bio::DB::FileCache->new(
+                 -seqdb => $db,   # Bio::DB::RandomAccessI database
+                 -file  => $path, # path to index file
+                 -keep  => $flag, # don't unlink index file
+          )
+ Function: creates a new on-disk cache
+ Returns : a Bio::DB::RandomAccessI database
+ Args    : as above
+ Throws  : "Must be a randomaccess database" exception
+           "Could not open primary index file" exception
+
+If no index file is specified, will create a temporary file in your
+system's temporary file directory.  The name of this temporary file
+can be retrieved using file_name().
+
+=cut
+
+sub new {
+    my ($class,@args) = @_;
+
+    my $self = Bio::Root::Root->new();
+    bless $self,$class;
+
+    my ($seqdb,$file_name,$keep) = $self->_rearrange([qw(SEQDB FILE KEEP)],@args);
+
+    if( !defined $seqdb || !ref $seqdb || !$seqdb->isa('Bio::DB::RandomAccessI') ) {
+       $self->throw("Must be a randomaccess database not a [$seqdb]");
+    }
+
+    $self->seqdb($seqdb);
+    $file_name ||= tmpnam();
+    $self->file_name($file_name);
+    $self->keep($keep);
+
+    $self->_open_database($file_name);
+    return $self;
+}
+
+=head2 get_Seq_by_id
+
+ Title   : get_Seq_by_id
+ Usage   : $seq = $db->get_Seq_by_id('ROA1_HUMAN')
+ Function: Gets a Bio::Seq object by its name
+ Returns : a Bio::Seq object
+ Args    : the id (as a string) of a sequence
+ Throws  : "id does not exist" exception
+
+
+=cut
+
+sub get_Seq_by_id{
+   my ($self,$id) = @_;
+
+   # look in the cache first
+   my $obj = $self->_get('id' => $id);
+   return $obj if defined $obj;
+
+   # get object from seqdb
+   $obj = $self->seqdb->get_Seq_by_id($id);
+   $self->_store('id' => $id, $obj);
+
+   return $obj;
+}
+
+=head2 get_Seq_by_acc
+
+ Title   : get_Seq_by_acc
+ Usage   : $seq = $db->get_Seq_by_acc('X77802');
+ Function: Gets a Bio::Seq object by accession number
+ Returns : A Bio::Seq object
+ Args    : accession number (as a string)
+ Throws  : "acc does not exist" exception
+
+
+=cut
+
+sub get_Seq_by_acc{
+   my ($self,$acc) = @_;
+
+   # look in the cache first
+   my $obj = $self->_get('acc' => $acc);
+   return $obj if defined $obj;
+
+   # get object from seqdb
+   $obj = $self->seqdb->get_Seq_by_acc($acc);
+   $self->_store('acc' => $acc, $obj);
+
+   return $obj;
+}
+
+=head2 seqdb
+
+ Title   : seqdb
+ Usage   : $seqdb = $db->seqdb([$seqdb])
+ Function: gets/sets the Bio::DB::RandomAccessI database
+ Returns : a Bio::DB::RandomAccessI database
+ Args    : new sequence database (optional)
+ Throws  : nothing
+
+=cut
+
+sub seqdb {
+    my ($self, $seqdb) = @_;
+    if ($seqdb) {
+        $self->{'seqdb'} = $seqdb;
+    } else {
+        return $self->{'seqdb'};
+    }
+}
+
+=head2 file_name
+
+ Title   : file_name
+ Usage   : $path = $db->file_name([$file_name])
+ Function: gets/sets the name of the cache file
+ Returns : a path
+ Args    : new cache file name (optional)
+ Throws  : nothing
+
+It probably isn't useful to set the cache file name after you've
+opened it.
+
+=cut
+
+#'
+
+sub file_name {
+  my $self = shift;
+  my $d = $self->{file_name};
+  $self->{file_name} = shift if @_;
+  $d;
+}
+
+=head2 keep
+
+ Title   : keep
+ Usage   : $keep = $db->keep([$flag])
+ Function: gets/sets the value of the "keep" flag
+ Returns : current value
+ Args    : new value (optional)
+ Throws  : nothing
+
+The keep flag will cause the index file to be unlinked when the
+process exits.  Since on some operating systems (Unix, OS/2) the
+unlinking occurs during the new() call immediately after opening the
+file, it probably isn't safe to change this value.
+
+=cut
+
+sub keep {
+  my $self = shift;
+  my $d = $self->{keep};
+  $self->{keep} = shift if @_;
+  $d;
+}
+
+=head2 db
+
+ Title   : db
+ Usage   : $db->db
+ Function: returns tied hash to index database
+ Returns : a Berkeley DB tied hashref
+ Args    : none
+ Throws  : nothing
+
+=cut
+
+sub db { shift->{db} }
+
+=head2 flush
+
+ Title   : flush
+ Usage   : $db->flush
+ Function: flushes the cache
+ Returns : nothing
+ Args    : none
+ Throws  : nothing
+
+=cut
+
+sub flush {
+  my $db = shift->db or return;
+  %{$db} = ();
+}
+
+sub _get {
+  my $self = shift;
+  my ($type,$id) = @_;
+  my $serialized = $self->db->{"${type}_${id}"};
+  my $obj = thaw($serialized);
+  $obj;
+}
+
+sub _store {
+  my $self = shift;
+  my ($type,$id,$obj) = @_;
+  my $serialized = freeze($obj);
+  $self->db->{"${type}_${id}"} = $serialized;
+}
+
+=head2 get_Seq_by_version
+
+ Title   : get_Seq_by_version
+ Usage   : $seq = $db->get_Seq_by_version('X77802.1');
+ Function: Gets a Bio::Seq object by sequence version
+ Returns : A Bio::Seq object
+ Args    : accession.version (as a string)
+ Throws  : "acc.version does not exist" exception
+
+=cut
+
+sub get_Seq_by_version{
+   my ($self,@args) = @_;
+   $self->throw("Not implemented it");
+}
+
+sub DESTROY {
+  my $self = shift;
+  unlink $self->file_name unless $self->keep;
+}
+
+
+sub _open_database {
+  my $self = shift;
+  my $file = shift;
+  my $flags = O_CREAT|O_RDWR;
+  my %db;
+  tie(%db,'DB_File',$file,$flags,0666,$DB_BTREE)
+    or $self->throw("Could not open primary index file");
+  $self->{db} = \%db;
+  unlink $file unless $self->keep;
+}
+
+## End of Package
+
+1;