comparison variant_effect_predictor/Bio/DB/FileCache.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # POD documentation - main docs before the code
2 #
3 #
4
5 =head1 NAME
6
7 Bio::DB::FileCache - In file cache for BioSeq objects
8
9 =head1 SYNOPSIS
10
11
12
13 $cachedb = Bio::DB::FileCache->new($real_db);
14
15 #
16 # $real_db is a Bio::DB::RandomAccessI database
17 #
18
19 $seq = $cachedb->get_Seq_by_id('ROA1_HUMAN');
20
21 #
22 # $seq is a Bio::Seq object
23 #
24
25 # more control provided with named-parameter form
26
27 $cachedb = Bio::DB::FileCache->new( -seqdb => $real_db,
28 -file => $path,
29 -keep => $flag,
30 );
31 =head1 DESCRIPTION
32
33 This is a disk cache system which saves the objects returned by
34 Bio::DB::RandomAccessI on disk. The disk cache grows without limit,
35 while the process is running, but is automatically unlinked at process
36 termination unless the -keep flag is set.
37
38 This module requires DB_File and Storable.
39
40 =head1 CONTACT
41
42 Lincoln Stein
43
44 =head2 Reporting Bugs
45
46 Report bugs to the Bioperl bug tracking system to help us keep track
47 the bugs and their resolution. Bug reports can be submitted via email
48 or the web:
49
50 bioperl-bugs@bio.perl.org
51 http://bugzilla.bioperl.org/
52
53 =head1 APPENDIX
54
55 The rest of the documentation details each of the object
56 methods. Internal methods are usually preceded with a _
57
58 =cut
59
60 # Let the code begin...
61
62 package Bio::DB::FileCache;
63
64 use DB_File;
65 use Storable qw(freeze thaw);
66 use Fcntl qw(O_CREAT O_RDWR O_RDONLY);
67 use File::Temp 'tmpnam';
68
69 use vars qw(@ISA);
70 use strict;
71
72 use Bio::Root::Root;
73
74 @ISA = qw(Bio::Root::Root Bio::DB::SeqI);
75
76 use Bio::DB::SeqI;
77 use Bio::Seq::RichSeq;
78 use Bio::Location::Split;
79 use Bio::Location::Fuzzy;
80 use Bio::Seq;
81 use Bio::SeqFeature::Generic;
82 use Bio::Species;
83 use Bio::Annotation::Collection;
84
85 =head2 new
86
87 Title : new
88 Usage : $db = Bio::DB::FileCache->new(
89 -seqdb => $db, # Bio::DB::RandomAccessI database
90 -file => $path, # path to index file
91 -keep => $flag, # don't unlink index file
92 )
93 Function: creates a new on-disk cache
94 Returns : a Bio::DB::RandomAccessI database
95 Args : as above
96 Throws : "Must be a randomaccess database" exception
97 "Could not open primary index file" exception
98
99 If no index file is specified, will create a temporary file in your
100 system's temporary file directory. The name of this temporary file
101 can be retrieved using file_name().
102
103 =cut
104
105 sub new {
106 my ($class,@args) = @_;
107
108 my $self = Bio::Root::Root->new();
109 bless $self,$class;
110
111 my ($seqdb,$file_name,$keep) = $self->_rearrange([qw(SEQDB FILE KEEP)],@args);
112
113 if( !defined $seqdb || !ref $seqdb || !$seqdb->isa('Bio::DB::RandomAccessI') ) {
114 $self->throw("Must be a randomaccess database not a [$seqdb]");
115 }
116
117 $self->seqdb($seqdb);
118 $file_name ||= tmpnam();
119 $self->file_name($file_name);
120 $self->keep($keep);
121
122 $self->_open_database($file_name);
123 return $self;
124 }
125
126 =head2 get_Seq_by_id
127
128 Title : get_Seq_by_id
129 Usage : $seq = $db->get_Seq_by_id('ROA1_HUMAN')
130 Function: Gets a Bio::Seq object by its name
131 Returns : a Bio::Seq object
132 Args : the id (as a string) of a sequence
133 Throws : "id does not exist" exception
134
135
136 =cut
137
138 sub get_Seq_by_id{
139 my ($self,$id) = @_;
140
141 # look in the cache first
142 my $obj = $self->_get('id' => $id);
143 return $obj if defined $obj;
144
145 # get object from seqdb
146 $obj = $self->seqdb->get_Seq_by_id($id);
147 $self->_store('id' => $id, $obj);
148
149 return $obj;
150 }
151
152 =head2 get_Seq_by_acc
153
154 Title : get_Seq_by_acc
155 Usage : $seq = $db->get_Seq_by_acc('X77802');
156 Function: Gets a Bio::Seq object by accession number
157 Returns : A Bio::Seq object
158 Args : accession number (as a string)
159 Throws : "acc does not exist" exception
160
161
162 =cut
163
164 sub get_Seq_by_acc{
165 my ($self,$acc) = @_;
166
167 # look in the cache first
168 my $obj = $self->_get('acc' => $acc);
169 return $obj if defined $obj;
170
171 # get object from seqdb
172 $obj = $self->seqdb->get_Seq_by_acc($acc);
173 $self->_store('acc' => $acc, $obj);
174
175 return $obj;
176 }
177
178 =head2 seqdb
179
180 Title : seqdb
181 Usage : $seqdb = $db->seqdb([$seqdb])
182 Function: gets/sets the Bio::DB::RandomAccessI database
183 Returns : a Bio::DB::RandomAccessI database
184 Args : new sequence database (optional)
185 Throws : nothing
186
187 =cut
188
189 sub seqdb {
190 my ($self, $seqdb) = @_;
191 if ($seqdb) {
192 $self->{'seqdb'} = $seqdb;
193 } else {
194 return $self->{'seqdb'};
195 }
196 }
197
198 =head2 file_name
199
200 Title : file_name
201 Usage : $path = $db->file_name([$file_name])
202 Function: gets/sets the name of the cache file
203 Returns : a path
204 Args : new cache file name (optional)
205 Throws : nothing
206
207 It probably isn't useful to set the cache file name after you've
208 opened it.
209
210 =cut
211
212 #'
213
214 sub file_name {
215 my $self = shift;
216 my $d = $self->{file_name};
217 $self->{file_name} = shift if @_;
218 $d;
219 }
220
221 =head2 keep
222
223 Title : keep
224 Usage : $keep = $db->keep([$flag])
225 Function: gets/sets the value of the "keep" flag
226 Returns : current value
227 Args : new value (optional)
228 Throws : nothing
229
230 The keep flag will cause the index file to be unlinked when the
231 process exits. Since on some operating systems (Unix, OS/2) the
232 unlinking occurs during the new() call immediately after opening the
233 file, it probably isn't safe to change this value.
234
235 =cut
236
237 sub keep {
238 my $self = shift;
239 my $d = $self->{keep};
240 $self->{keep} = shift if @_;
241 $d;
242 }
243
244 =head2 db
245
246 Title : db
247 Usage : $db->db
248 Function: returns tied hash to index database
249 Returns : a Berkeley DB tied hashref
250 Args : none
251 Throws : nothing
252
253 =cut
254
255 sub db { shift->{db} }
256
257 =head2 flush
258
259 Title : flush
260 Usage : $db->flush
261 Function: flushes the cache
262 Returns : nothing
263 Args : none
264 Throws : nothing
265
266 =cut
267
268 sub flush {
269 my $db = shift->db or return;
270 %{$db} = ();
271 }
272
273 sub _get {
274 my $self = shift;
275 my ($type,$id) = @_;
276 my $serialized = $self->db->{"${type}_${id}"};
277 my $obj = thaw($serialized);
278 $obj;
279 }
280
281 sub _store {
282 my $self = shift;
283 my ($type,$id,$obj) = @_;
284 my $serialized = freeze($obj);
285 $self->db->{"${type}_${id}"} = $serialized;
286 }
287
288 =head2 get_Seq_by_version
289
290 Title : get_Seq_by_version
291 Usage : $seq = $db->get_Seq_by_version('X77802.1');
292 Function: Gets a Bio::Seq object by sequence version
293 Returns : A Bio::Seq object
294 Args : accession.version (as a string)
295 Throws : "acc.version does not exist" exception
296
297 =cut
298
299 sub get_Seq_by_version{
300 my ($self,@args) = @_;
301 $self->throw("Not implemented it");
302 }
303
304 sub DESTROY {
305 my $self = shift;
306 unlink $self->file_name unless $self->keep;
307 }
308
309
310 sub _open_database {
311 my $self = shift;
312 my $file = shift;
313 my $flags = O_CREAT|O_RDWR;
314 my %db;
315 tie(%db,'DB_File',$file,$flags,0666,$DB_BTREE)
316 or $self->throw("Could not open primary index file");
317 $self->{db} = \%db;
318 unlink $file unless $self->keep;
319 }
320
321 ## End of Package
322
323 1;