0
|
1 # POD documentation - main docs before the code
|
|
2 #
|
|
3 #
|
|
4
|
|
5 =head1 NAME
|
|
6
|
|
7 Bio::DB::FileCache - In file cache for BioSeq objects
|
|
8
|
|
9 =head1 SYNOPSIS
|
|
10
|
|
11
|
|
12
|
|
13 $cachedb = Bio::DB::FileCache->new($real_db);
|
|
14
|
|
15 #
|
|
16 # $real_db is a Bio::DB::RandomAccessI database
|
|
17 #
|
|
18
|
|
19 $seq = $cachedb->get_Seq_by_id('ROA1_HUMAN');
|
|
20
|
|
21 #
|
|
22 # $seq is a Bio::Seq object
|
|
23 #
|
|
24
|
|
25 # more control provided with named-parameter form
|
|
26
|
|
27 $cachedb = Bio::DB::FileCache->new( -seqdb => $real_db,
|
|
28 -file => $path,
|
|
29 -keep => $flag,
|
|
30 );
|
|
31 =head1 DESCRIPTION
|
|
32
|
|
33 This is a disk cache system which saves the objects returned by
|
|
34 Bio::DB::RandomAccessI on disk. The disk cache grows without limit,
|
|
35 while the process is running, but is automatically unlinked at process
|
|
36 termination unless the -keep flag is set.
|
|
37
|
|
38 This module requires DB_File and Storable.
|
|
39
|
|
40 =head1 CONTACT
|
|
41
|
|
42 Lincoln Stein
|
|
43
|
|
44 =head2 Reporting Bugs
|
|
45
|
|
46 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
47 the bugs and their resolution. Bug reports can be submitted via email
|
|
48 or the web:
|
|
49
|
|
50 bioperl-bugs@bio.perl.org
|
|
51 http://bugzilla.bioperl.org/
|
|
52
|
|
53 =head1 APPENDIX
|
|
54
|
|
55 The rest of the documentation details each of the object
|
|
56 methods. Internal methods are usually preceded with a _
|
|
57
|
|
58 =cut
|
|
59
|
|
60 # Let the code begin...
|
|
61
|
|
62 package Bio::DB::FileCache;
|
|
63
|
|
64 use DB_File;
|
|
65 use Storable qw(freeze thaw);
|
|
66 use Fcntl qw(O_CREAT O_RDWR O_RDONLY);
|
|
67 use File::Temp 'tmpnam';
|
|
68
|
|
69 use vars qw(@ISA);
|
|
70 use strict;
|
|
71
|
|
72 use Bio::Root::Root;
|
|
73
|
|
74 @ISA = qw(Bio::Root::Root Bio::DB::SeqI);
|
|
75
|
|
76 use Bio::DB::SeqI;
|
|
77 use Bio::Seq::RichSeq;
|
|
78 use Bio::Location::Split;
|
|
79 use Bio::Location::Fuzzy;
|
|
80 use Bio::Seq;
|
|
81 use Bio::SeqFeature::Generic;
|
|
82 use Bio::Species;
|
|
83 use Bio::Annotation::Collection;
|
|
84
|
|
85 =head2 new
|
|
86
|
|
87 Title : new
|
|
88 Usage : $db = Bio::DB::FileCache->new(
|
|
89 -seqdb => $db, # Bio::DB::RandomAccessI database
|
|
90 -file => $path, # path to index file
|
|
91 -keep => $flag, # don't unlink index file
|
|
92 )
|
|
93 Function: creates a new on-disk cache
|
|
94 Returns : a Bio::DB::RandomAccessI database
|
|
95 Args : as above
|
|
96 Throws : "Must be a randomaccess database" exception
|
|
97 "Could not open primary index file" exception
|
|
98
|
|
99 If no index file is specified, will create a temporary file in your
|
|
100 system's temporary file directory. The name of this temporary file
|
|
101 can be retrieved using file_name().
|
|
102
|
|
103 =cut
|
|
104
|
|
105 sub new {
|
|
106 my ($class,@args) = @_;
|
|
107
|
|
108 my $self = Bio::Root::Root->new();
|
|
109 bless $self,$class;
|
|
110
|
|
111 my ($seqdb,$file_name,$keep) = $self->_rearrange([qw(SEQDB FILE KEEP)],@args);
|
|
112
|
|
113 if( !defined $seqdb || !ref $seqdb || !$seqdb->isa('Bio::DB::RandomAccessI') ) {
|
|
114 $self->throw("Must be a randomaccess database not a [$seqdb]");
|
|
115 }
|
|
116
|
|
117 $self->seqdb($seqdb);
|
|
118 $file_name ||= tmpnam();
|
|
119 $self->file_name($file_name);
|
|
120 $self->keep($keep);
|
|
121
|
|
122 $self->_open_database($file_name);
|
|
123 return $self;
|
|
124 }
|
|
125
|
|
126 =head2 get_Seq_by_id
|
|
127
|
|
128 Title : get_Seq_by_id
|
|
129 Usage : $seq = $db->get_Seq_by_id('ROA1_HUMAN')
|
|
130 Function: Gets a Bio::Seq object by its name
|
|
131 Returns : a Bio::Seq object
|
|
132 Args : the id (as a string) of a sequence
|
|
133 Throws : "id does not exist" exception
|
|
134
|
|
135
|
|
136 =cut
|
|
137
|
|
138 sub get_Seq_by_id{
|
|
139 my ($self,$id) = @_;
|
|
140
|
|
141 # look in the cache first
|
|
142 my $obj = $self->_get('id' => $id);
|
|
143 return $obj if defined $obj;
|
|
144
|
|
145 # get object from seqdb
|
|
146 $obj = $self->seqdb->get_Seq_by_id($id);
|
|
147 $self->_store('id' => $id, $obj);
|
|
148
|
|
149 return $obj;
|
|
150 }
|
|
151
|
|
152 =head2 get_Seq_by_acc
|
|
153
|
|
154 Title : get_Seq_by_acc
|
|
155 Usage : $seq = $db->get_Seq_by_acc('X77802');
|
|
156 Function: Gets a Bio::Seq object by accession number
|
|
157 Returns : A Bio::Seq object
|
|
158 Args : accession number (as a string)
|
|
159 Throws : "acc does not exist" exception
|
|
160
|
|
161
|
|
162 =cut
|
|
163
|
|
164 sub get_Seq_by_acc{
|
|
165 my ($self,$acc) = @_;
|
|
166
|
|
167 # look in the cache first
|
|
168 my $obj = $self->_get('acc' => $acc);
|
|
169 return $obj if defined $obj;
|
|
170
|
|
171 # get object from seqdb
|
|
172 $obj = $self->seqdb->get_Seq_by_acc($acc);
|
|
173 $self->_store('acc' => $acc, $obj);
|
|
174
|
|
175 return $obj;
|
|
176 }
|
|
177
|
|
178 =head2 seqdb
|
|
179
|
|
180 Title : seqdb
|
|
181 Usage : $seqdb = $db->seqdb([$seqdb])
|
|
182 Function: gets/sets the Bio::DB::RandomAccessI database
|
|
183 Returns : a Bio::DB::RandomAccessI database
|
|
184 Args : new sequence database (optional)
|
|
185 Throws : nothing
|
|
186
|
|
187 =cut
|
|
188
|
|
189 sub seqdb {
|
|
190 my ($self, $seqdb) = @_;
|
|
191 if ($seqdb) {
|
|
192 $self->{'seqdb'} = $seqdb;
|
|
193 } else {
|
|
194 return $self->{'seqdb'};
|
|
195 }
|
|
196 }
|
|
197
|
|
198 =head2 file_name
|
|
199
|
|
200 Title : file_name
|
|
201 Usage : $path = $db->file_name([$file_name])
|
|
202 Function: gets/sets the name of the cache file
|
|
203 Returns : a path
|
|
204 Args : new cache file name (optional)
|
|
205 Throws : nothing
|
|
206
|
|
207 It probably isn't useful to set the cache file name after you've
|
|
208 opened it.
|
|
209
|
|
210 =cut
|
|
211
|
|
212 #'
|
|
213
|
|
214 sub file_name {
|
|
215 my $self = shift;
|
|
216 my $d = $self->{file_name};
|
|
217 $self->{file_name} = shift if @_;
|
|
218 $d;
|
|
219 }
|
|
220
|
|
221 =head2 keep
|
|
222
|
|
223 Title : keep
|
|
224 Usage : $keep = $db->keep([$flag])
|
|
225 Function: gets/sets the value of the "keep" flag
|
|
226 Returns : current value
|
|
227 Args : new value (optional)
|
|
228 Throws : nothing
|
|
229
|
|
230 The keep flag will cause the index file to be unlinked when the
|
|
231 process exits. Since on some operating systems (Unix, OS/2) the
|
|
232 unlinking occurs during the new() call immediately after opening the
|
|
233 file, it probably isn't safe to change this value.
|
|
234
|
|
235 =cut
|
|
236
|
|
237 sub keep {
|
|
238 my $self = shift;
|
|
239 my $d = $self->{keep};
|
|
240 $self->{keep} = shift if @_;
|
|
241 $d;
|
|
242 }
|
|
243
|
|
244 =head2 db
|
|
245
|
|
246 Title : db
|
|
247 Usage : $db->db
|
|
248 Function: returns tied hash to index database
|
|
249 Returns : a Berkeley DB tied hashref
|
|
250 Args : none
|
|
251 Throws : nothing
|
|
252
|
|
253 =cut
|
|
254
|
|
255 sub db { shift->{db} }
|
|
256
|
|
257 =head2 flush
|
|
258
|
|
259 Title : flush
|
|
260 Usage : $db->flush
|
|
261 Function: flushes the cache
|
|
262 Returns : nothing
|
|
263 Args : none
|
|
264 Throws : nothing
|
|
265
|
|
266 =cut
|
|
267
|
|
268 sub flush {
|
|
269 my $db = shift->db or return;
|
|
270 %{$db} = ();
|
|
271 }
|
|
272
|
|
273 sub _get {
|
|
274 my $self = shift;
|
|
275 my ($type,$id) = @_;
|
|
276 my $serialized = $self->db->{"${type}_${id}"};
|
|
277 my $obj = thaw($serialized);
|
|
278 $obj;
|
|
279 }
|
|
280
|
|
281 sub _store {
|
|
282 my $self = shift;
|
|
283 my ($type,$id,$obj) = @_;
|
|
284 my $serialized = freeze($obj);
|
|
285 $self->db->{"${type}_${id}"} = $serialized;
|
|
286 }
|
|
287
|
|
288 =head2 get_Seq_by_version
|
|
289
|
|
290 Title : get_Seq_by_version
|
|
291 Usage : $seq = $db->get_Seq_by_version('X77802.1');
|
|
292 Function: Gets a Bio::Seq object by sequence version
|
|
293 Returns : A Bio::Seq object
|
|
294 Args : accession.version (as a string)
|
|
295 Throws : "acc.version does not exist" exception
|
|
296
|
|
297 =cut
|
|
298
|
|
299 sub get_Seq_by_version{
|
|
300 my ($self,@args) = @_;
|
|
301 $self->throw("Not implemented it");
|
|
302 }
|
|
303
|
|
304 sub DESTROY {
|
|
305 my $self = shift;
|
|
306 unlink $self->file_name unless $self->keep;
|
|
307 }
|
|
308
|
|
309
|
|
310 sub _open_database {
|
|
311 my $self = shift;
|
|
312 my $file = shift;
|
|
313 my $flags = O_CREAT|O_RDWR;
|
|
314 my %db;
|
|
315 tie(%db,'DB_File',$file,$flags,0666,$DB_BTREE)
|
|
316 or $self->throw("Could not open primary index file");
|
|
317 $self->{db} = \%db;
|
|
318 unlink $file unless $self->keep;
|
|
319 }
|
|
320
|
|
321 ## End of Package
|
|
322
|
|
323 1;
|