view variant_effect_predictor/Bio/DB/InMemoryCache.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
line wrap: on
line source

# POD documentation - main docs before the code
#
#

=head1 NAME

Bio::DB::InMemoryCache - Abstract interface for a sequence database

=head1 SYNOPSIS

  $cachedb = Bio::DB::InMemoryCache->new( -seqdb => $real_db,
                                          -number => 1000);

  #
  # get a database object somehow using a concrete class
  #

  $seq = $cachedb->get_Seq_by_id('ROA1_HUMAN');

  #
  # $seq is a Bio::Seq object
  #

=head1 DESCRIPTION

This is a memory cache system which saves the objects returned by Bio::DB::RandomAccessI in 
memory to a hard limit of sequences.

=head1 CONTACT

Ewan Birney

=head2 Reporting Bugs

Report bugs to the Bioperl bug tracking system to help us keep track
the bugs and their resolution. Bug reports can be submitted via email
or the web:

    bioperl-bugs@bio.perl.org                   
    http://bugzilla.bioperl.org/           

=head1 APPENDIX

The rest of the documentation details each of the object
methods. Internal methods are usually preceded with a _

=cut


# Let the code begin...

package Bio::DB::InMemoryCache;

use Bio::DB::SeqI;

use vars qw(@ISA);
use strict;

use Bio::Root::Root;
use Bio::Seq;

@ISA = qw(Bio::Root::Root Bio::DB::SeqI);


sub new {
    my ($class,@args) = @_;

    my $self = Bio::Root::Root->new();
    bless $self,$class;

    my ($seqdb,$number,$agr) = $self->_rearrange([qw(SEQDB NUMBER AGRESSION)],@args);

    if( !defined $seqdb || !ref $seqdb || !$seqdb->isa('Bio::DB::RandomAccessI') ) {
       $self->throw("Must be a randomaccess database not a [$seqdb]");
    }
    if( !defined $number ) {
        $number = 1000;
    }
    
    $self->seqdb($seqdb);
    $self->number($number);
    $self->agr($agr);

    # we consider acc as the primary id here
    $self->{'_cache_number_hash'} = {};
    $self->{'_cache_id_hash'}     = {};
    $self->{'_cache_acc_hash'}    = {};
    $self->{'_cache_number'}      = 1;

    return $self;
}



=head2 get_Seq_by_id

 Title   : get_Seq_by_id
 Usage   : $seq = $db->get_Seq_by_id('ROA1_HUMAN')
 Function: Gets a Bio::Seq object by its name
 Returns : a Bio::Seq object
 Args    : the id (as a string) of a sequence
 Throws  : "id does not exist" exception


=cut

sub get_Seq_by_id{
   my ($self,$id) = @_;

   if( defined $self->{'_cache_id_hash'}->{$id} ) {
     my $acc = $self->{'_cache_id_hash'}->{$id};
     my $seq = $self->{'_cache_acc_hash'}->{$acc};
     $self->{'_cache_number_hash'}->{$seq->accession} = $self->{'_cache_number'}++;
     return $seq;
   } else {
     return $self->_load_Seq('id',$id);
   }
}

=head2 get_Seq_by_acc

 Title   : get_Seq_by_acc
 Usage   : $seq = $db->get_Seq_by_acc('X77802');
 Function: Gets a Bio::Seq object by accession number
 Returns : A Bio::Seq object
 Args    : accession number (as a string)
 Throws  : "acc does not exist" exception


=cut

sub get_Seq_by_acc{
   my ($self,$acc) = @_;

   #print STDERR "In cache get for $acc\n";
   if( defined $self->{'_cache_acc_hash'}->{$acc} ) {
       #print STDERR "Returning cached $acc\n";
       my $seq = $self->{'_cache_acc_hash'}->{$acc};
       $self->{'_cache_number_hash'}->{$seq->accession} = $self->{'_cache_number'}++;
       return $seq;
   } else {
     return $self->_load_Seq('acc',$acc);
   }
}



sub number {
    my ($self, $number) = @_;
    if ($number) {
        $self->{'number'} = $number;
    } else {
        return $self->{'number'};
    }
}

sub seqdb {
    my ($self, $seqdb) = @_;
    if ($seqdb) {
        $self->{'seqdb'} = $seqdb;
    } else {
        return $self->{'seqdb'};
    }
}

sub agr {
    my ($self, $agr) = @_;
    if ($agr) {
        $self->{'agr'} = $agr;
    } else {
        return $self->{'agr'};
    }
}


sub _load_Seq {
  my ($self,$type,$id) = @_;

  my $seq;

  if( $type eq 'id') {
    $seq = $self->seqdb->get_Seq_by_id($id);
  }elsif ( $type eq 'acc' ) {
    $seq = $self->seqdb->get_Seq_by_acc($id);
  } else {
    $self->throw("Bad internal error. Don't understand $type");
  }

  if( $self->agr() ) {
      #print STDERR "Pulling out into memory\n";
      my $newseq = Bio::Seq->new( -display_id => $seq->display_id,
				  -accession_number  => $seq->accession,
				  -seq        => $seq->seq,
				  -desc       => $seq->desc,
				  );
      if( $self->agr() == 1 ) {
	  foreach my $sf ( $seq->top_SeqFeatures() ) {
	      $newseq->add_SeqFeature($sf);
	  }
	  
	  $newseq->annotation($seq->annotation);
      }
      $seq = $newseq;
  }

  if( $self->_number_free < 1 ) {
    # remove the latest thing from the hash
    my @accs = sort { $self->{'_cache_number_hash'}->{$a} <=> $self->{'_cache_number_hash'}->{$b} } keys %{$self->{'_cache_number_hash'}};

    my $acc = shift @accs;
    # remove this guy
    my $seq = $self->{'_cache_acc_hash'}->{$acc};

    delete $self->{'_cache_number_hash'}->{$acc};
    delete $self->{'_cache_id_hash'}->{$seq->id};
    delete $self->{'_cache_acc_hash'}->{$acc};
  }

  # up the number, register this sequence into the hash.
  $self->{'_cache_id_hash'}->{$seq->id} = $seq->accession;
  $self->{'_cache_acc_hash'}->{$seq->accession} = $seq;
  $self->{'_cache_number_hash'}->{$seq->accession} = $self->{'_cache_number'}++;

  return $seq;
}


sub _number_free {
  my $self = shift;

  return $self->number - scalar(keys %{$self->{'_cache_number_hash'}});
}




=head2 get_Seq_by_version

 Title   : get_Seq_by_version
 Usage   : $seq = $db->get_Seq_by_version('X77802.1');
 Function: Gets a Bio::Seq object by sequence version
 Returns : A Bio::Seq object
 Args    : accession.version (as a string)
 Throws  : "acc.version does not exist" exception

=cut


sub get_Seq_by_version{
   my ($self,@args) = @_;
   $self->throw("Not implemented it");
}



## End of Package

1;