comparison variant_effect_predictor/Bio/DB/InMemoryCache.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # POD documentation - main docs before the code
2 #
3 #
4
5 =head1 NAME
6
7 Bio::DB::InMemoryCache - Abstract interface for a sequence database
8
9 =head1 SYNOPSIS
10
11 $cachedb = Bio::DB::InMemoryCache->new( -seqdb => $real_db,
12 -number => 1000);
13
14 #
15 # get a database object somehow using a concrete class
16 #
17
18 $seq = $cachedb->get_Seq_by_id('ROA1_HUMAN');
19
20 #
21 # $seq is a Bio::Seq object
22 #
23
24 =head1 DESCRIPTION
25
26 This is a memory cache system which saves the objects returned by Bio::DB::RandomAccessI in
27 memory to a hard limit of sequences.
28
29 =head1 CONTACT
30
31 Ewan Birney
32
33 =head2 Reporting Bugs
34
35 Report bugs to the Bioperl bug tracking system to help us keep track
36 the bugs and their resolution. Bug reports can be submitted via email
37 or the web:
38
39 bioperl-bugs@bio.perl.org
40 http://bugzilla.bioperl.org/
41
42 =head1 APPENDIX
43
44 The rest of the documentation details each of the object
45 methods. Internal methods are usually preceded with a _
46
47 =cut
48
49
50 # Let the code begin...
51
52 package Bio::DB::InMemoryCache;
53
54 use Bio::DB::SeqI;
55
56 use vars qw(@ISA);
57 use strict;
58
59 use Bio::Root::Root;
60 use Bio::Seq;
61
62 @ISA = qw(Bio::Root::Root Bio::DB::SeqI);
63
64
65 sub new {
66 my ($class,@args) = @_;
67
68 my $self = Bio::Root::Root->new();
69 bless $self,$class;
70
71 my ($seqdb,$number,$agr) = $self->_rearrange([qw(SEQDB NUMBER AGRESSION)],@args);
72
73 if( !defined $seqdb || !ref $seqdb || !$seqdb->isa('Bio::DB::RandomAccessI') ) {
74 $self->throw("Must be a randomaccess database not a [$seqdb]");
75 }
76 if( !defined $number ) {
77 $number = 1000;
78 }
79
80 $self->seqdb($seqdb);
81 $self->number($number);
82 $self->agr($agr);
83
84 # we consider acc as the primary id here
85 $self->{'_cache_number_hash'} = {};
86 $self->{'_cache_id_hash'} = {};
87 $self->{'_cache_acc_hash'} = {};
88 $self->{'_cache_number'} = 1;
89
90 return $self;
91 }
92
93
94
95 =head2 get_Seq_by_id
96
97 Title : get_Seq_by_id
98 Usage : $seq = $db->get_Seq_by_id('ROA1_HUMAN')
99 Function: Gets a Bio::Seq object by its name
100 Returns : a Bio::Seq object
101 Args : the id (as a string) of a sequence
102 Throws : "id does not exist" exception
103
104
105 =cut
106
107 sub get_Seq_by_id{
108 my ($self,$id) = @_;
109
110 if( defined $self->{'_cache_id_hash'}->{$id} ) {
111 my $acc = $self->{'_cache_id_hash'}->{$id};
112 my $seq = $self->{'_cache_acc_hash'}->{$acc};
113 $self->{'_cache_number_hash'}->{$seq->accession} = $self->{'_cache_number'}++;
114 return $seq;
115 } else {
116 return $self->_load_Seq('id',$id);
117 }
118 }
119
120 =head2 get_Seq_by_acc
121
122 Title : get_Seq_by_acc
123 Usage : $seq = $db->get_Seq_by_acc('X77802');
124 Function: Gets a Bio::Seq object by accession number
125 Returns : A Bio::Seq object
126 Args : accession number (as a string)
127 Throws : "acc does not exist" exception
128
129
130 =cut
131
132 sub get_Seq_by_acc{
133 my ($self,$acc) = @_;
134
135 #print STDERR "In cache get for $acc\n";
136 if( defined $self->{'_cache_acc_hash'}->{$acc} ) {
137 #print STDERR "Returning cached $acc\n";
138 my $seq = $self->{'_cache_acc_hash'}->{$acc};
139 $self->{'_cache_number_hash'}->{$seq->accession} = $self->{'_cache_number'}++;
140 return $seq;
141 } else {
142 return $self->_load_Seq('acc',$acc);
143 }
144 }
145
146
147
148 sub number {
149 my ($self, $number) = @_;
150 if ($number) {
151 $self->{'number'} = $number;
152 } else {
153 return $self->{'number'};
154 }
155 }
156
157 sub seqdb {
158 my ($self, $seqdb) = @_;
159 if ($seqdb) {
160 $self->{'seqdb'} = $seqdb;
161 } else {
162 return $self->{'seqdb'};
163 }
164 }
165
166 sub agr {
167 my ($self, $agr) = @_;
168 if ($agr) {
169 $self->{'agr'} = $agr;
170 } else {
171 return $self->{'agr'};
172 }
173 }
174
175
176 sub _load_Seq {
177 my ($self,$type,$id) = @_;
178
179 my $seq;
180
181 if( $type eq 'id') {
182 $seq = $self->seqdb->get_Seq_by_id($id);
183 }elsif ( $type eq 'acc' ) {
184 $seq = $self->seqdb->get_Seq_by_acc($id);
185 } else {
186 $self->throw("Bad internal error. Don't understand $type");
187 }
188
189 if( $self->agr() ) {
190 #print STDERR "Pulling out into memory\n";
191 my $newseq = Bio::Seq->new( -display_id => $seq->display_id,
192 -accession_number => $seq->accession,
193 -seq => $seq->seq,
194 -desc => $seq->desc,
195 );
196 if( $self->agr() == 1 ) {
197 foreach my $sf ( $seq->top_SeqFeatures() ) {
198 $newseq->add_SeqFeature($sf);
199 }
200
201 $newseq->annotation($seq->annotation);
202 }
203 $seq = $newseq;
204 }
205
206 if( $self->_number_free < 1 ) {
207 # remove the latest thing from the hash
208 my @accs = sort { $self->{'_cache_number_hash'}->{$a} <=> $self->{'_cache_number_hash'}->{$b} } keys %{$self->{'_cache_number_hash'}};
209
210 my $acc = shift @accs;
211 # remove this guy
212 my $seq = $self->{'_cache_acc_hash'}->{$acc};
213
214 delete $self->{'_cache_number_hash'}->{$acc};
215 delete $self->{'_cache_id_hash'}->{$seq->id};
216 delete $self->{'_cache_acc_hash'}->{$acc};
217 }
218
219 # up the number, register this sequence into the hash.
220 $self->{'_cache_id_hash'}->{$seq->id} = $seq->accession;
221 $self->{'_cache_acc_hash'}->{$seq->accession} = $seq;
222 $self->{'_cache_number_hash'}->{$seq->accession} = $self->{'_cache_number'}++;
223
224 return $seq;
225 }
226
227
228 sub _number_free {
229 my $self = shift;
230
231 return $self->number - scalar(keys %{$self->{'_cache_number_hash'}});
232 }
233
234
235
236
237 =head2 get_Seq_by_version
238
239 Title : get_Seq_by_version
240 Usage : $seq = $db->get_Seq_by_version('X77802.1');
241 Function: Gets a Bio::Seq object by sequence version
242 Returns : A Bio::Seq object
243 Args : accession.version (as a string)
244 Throws : "acc.version does not exist" exception
245
246 =cut
247
248
249 sub get_Seq_by_version{
250 my ($self,@args) = @_;
251 $self->throw("Not implemented it");
252 }
253
254
255
256 ## End of Package
257
258 1;