diff variant_effect_predictor/Bio/DB/InMemoryCache.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/DB/InMemoryCache.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,258 @@
+# POD documentation - main docs before the code
+#
+#
+
+=head1 NAME
+
+Bio::DB::InMemoryCache - Abstract interface for a sequence database
+
+=head1 SYNOPSIS
+
+  $cachedb = Bio::DB::InMemoryCache->new( -seqdb => $real_db,
+                                          -number => 1000);
+
+  #
+  # get a database object somehow using a concrete class
+  #
+
+  $seq = $cachedb->get_Seq_by_id('ROA1_HUMAN');
+
+  #
+  # $seq is a Bio::Seq object
+  #
+
+=head1 DESCRIPTION
+
+This is a memory cache system which saves the objects returned by Bio::DB::RandomAccessI in 
+memory to a hard limit of sequences.
+
+=head1 CONTACT
+
+Ewan Birney
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via email
+or the web:
+
+    bioperl-bugs@bio.perl.org                   
+    http://bugzilla.bioperl.org/           
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::DB::InMemoryCache;
+
+use Bio::DB::SeqI;
+
+use vars qw(@ISA);
+use strict;
+
+use Bio::Root::Root;
+use Bio::Seq;
+
+@ISA = qw(Bio::Root::Root Bio::DB::SeqI);
+
+
+sub new {
+    my ($class,@args) = @_;
+
+    my $self = Bio::Root::Root->new();
+    bless $self,$class;
+
+    my ($seqdb,$number,$agr) = $self->_rearrange([qw(SEQDB NUMBER AGRESSION)],@args);
+
+    if( !defined $seqdb || !ref $seqdb || !$seqdb->isa('Bio::DB::RandomAccessI') ) {
+       $self->throw("Must be a randomaccess database not a [$seqdb]");
+    }
+    if( !defined $number ) {
+        $number = 1000;
+    }
+    
+    $self->seqdb($seqdb);
+    $self->number($number);
+    $self->agr($agr);
+
+    # we consider acc as the primary id here
+    $self->{'_cache_number_hash'} = {};
+    $self->{'_cache_id_hash'}     = {};
+    $self->{'_cache_acc_hash'}    = {};
+    $self->{'_cache_number'}      = 1;
+
+    return $self;
+}
+
+
+
+=head2 get_Seq_by_id
+
+ Title   : get_Seq_by_id
+ Usage   : $seq = $db->get_Seq_by_id('ROA1_HUMAN')
+ Function: Gets a Bio::Seq object by its name
+ Returns : a Bio::Seq object
+ Args    : the id (as a string) of a sequence
+ Throws  : "id does not exist" exception
+
+
+=cut
+
+sub get_Seq_by_id{
+   my ($self,$id) = @_;
+
+   if( defined $self->{'_cache_id_hash'}->{$id} ) {
+     my $acc = $self->{'_cache_id_hash'}->{$id};
+     my $seq = $self->{'_cache_acc_hash'}->{$acc};
+     $self->{'_cache_number_hash'}->{$seq->accession} = $self->{'_cache_number'}++;
+     return $seq;
+   } else {
+     return $self->_load_Seq('id',$id);
+   }
+}
+
+=head2 get_Seq_by_acc
+
+ Title   : get_Seq_by_acc
+ Usage   : $seq = $db->get_Seq_by_acc('X77802');
+ Function: Gets a Bio::Seq object by accession number
+ Returns : A Bio::Seq object
+ Args    : accession number (as a string)
+ Throws  : "acc does not exist" exception
+
+
+=cut
+
+sub get_Seq_by_acc{
+   my ($self,$acc) = @_;
+
+   #print STDERR "In cache get for $acc\n";
+   if( defined $self->{'_cache_acc_hash'}->{$acc} ) {
+       #print STDERR "Returning cached $acc\n";
+       my $seq = $self->{'_cache_acc_hash'}->{$acc};
+       $self->{'_cache_number_hash'}->{$seq->accession} = $self->{'_cache_number'}++;
+       return $seq;
+   } else {
+     return $self->_load_Seq('acc',$acc);
+   }
+}
+
+
+
+sub number {
+    my ($self, $number) = @_;
+    if ($number) {
+        $self->{'number'} = $number;
+    } else {
+        return $self->{'number'};
+    }
+}
+
+sub seqdb {
+    my ($self, $seqdb) = @_;
+    if ($seqdb) {
+        $self->{'seqdb'} = $seqdb;
+    } else {
+        return $self->{'seqdb'};
+    }
+}
+
+sub agr {
+    my ($self, $agr) = @_;
+    if ($agr) {
+        $self->{'agr'} = $agr;
+    } else {
+        return $self->{'agr'};
+    }
+}
+
+
+sub _load_Seq {
+  my ($self,$type,$id) = @_;
+
+  my $seq;
+
+  if( $type eq 'id') {
+    $seq = $self->seqdb->get_Seq_by_id($id);
+  }elsif ( $type eq 'acc' ) {
+    $seq = $self->seqdb->get_Seq_by_acc($id);
+  } else {
+    $self->throw("Bad internal error. Don't understand $type");
+  }
+
+  if( $self->agr() ) {
+      #print STDERR "Pulling out into memory\n";
+      my $newseq = Bio::Seq->new( -display_id => $seq->display_id,
+				  -accession_number  => $seq->accession,
+				  -seq        => $seq->seq,
+				  -desc       => $seq->desc,
+				  );
+      if( $self->agr() == 1 ) {
+	  foreach my $sf ( $seq->top_SeqFeatures() ) {
+	      $newseq->add_SeqFeature($sf);
+	  }
+	  
+	  $newseq->annotation($seq->annotation);
+      }
+      $seq = $newseq;
+  }
+
+  if( $self->_number_free < 1 ) {
+    # remove the latest thing from the hash
+    my @accs = sort { $self->{'_cache_number_hash'}->{$a} <=> $self->{'_cache_number_hash'}->{$b} } keys %{$self->{'_cache_number_hash'}};
+
+    my $acc = shift @accs;
+    # remove this guy
+    my $seq = $self->{'_cache_acc_hash'}->{$acc};
+
+    delete $self->{'_cache_number_hash'}->{$acc};
+    delete $self->{'_cache_id_hash'}->{$seq->id};
+    delete $self->{'_cache_acc_hash'}->{$acc};
+  }
+
+  # up the number, register this sequence into the hash.
+  $self->{'_cache_id_hash'}->{$seq->id} = $seq->accession;
+  $self->{'_cache_acc_hash'}->{$seq->accession} = $seq;
+  $self->{'_cache_number_hash'}->{$seq->accession} = $self->{'_cache_number'}++;
+
+  return $seq;
+}
+
+
+sub _number_free {
+  my $self = shift;
+
+  return $self->number - scalar(keys %{$self->{'_cache_number_hash'}});
+}
+
+
+
+
+=head2 get_Seq_by_version
+
+ Title   : get_Seq_by_version
+ Usage   : $seq = $db->get_Seq_by_version('X77802.1');
+ Function: Gets a Bio::Seq object by sequence version
+ Returns : A Bio::Seq object
+ Args    : accession.version (as a string)
+ Throws  : "acc.version does not exist" exception
+
+=cut
+
+
+sub get_Seq_by_version{
+   my ($self,@args) = @_;
+   $self->throw("Not implemented it");
+}
+
+
+
+## End of Package
+
+1;