Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/DB/FileCache.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # POD documentation - main docs before the code | |
2 # | |
3 # | |
4 | |
5 =head1 NAME | |
6 | |
7 Bio::DB::FileCache - In file cache for BioSeq objects | |
8 | |
9 =head1 SYNOPSIS | |
10 | |
11 | |
12 | |
13 $cachedb = Bio::DB::FileCache->new($real_db); | |
14 | |
15 # | |
16 # $real_db is a Bio::DB::RandomAccessI database | |
17 # | |
18 | |
19 $seq = $cachedb->get_Seq_by_id('ROA1_HUMAN'); | |
20 | |
21 # | |
22 # $seq is a Bio::Seq object | |
23 # | |
24 | |
25 # more control provided with named-parameter form | |
26 | |
27 $cachedb = Bio::DB::FileCache->new( -seqdb => $real_db, | |
28 -file => $path, | |
29 -keep => $flag, | |
30 ); | |
31 =head1 DESCRIPTION | |
32 | |
33 This is a disk cache system which saves the objects returned by | |
34 Bio::DB::RandomAccessI on disk. The disk cache grows without limit, | |
35 while the process is running, but is automatically unlinked at process | |
36 termination unless the -keep flag is set. | |
37 | |
38 This module requires DB_File and Storable. | |
39 | |
40 =head1 CONTACT | |
41 | |
42 Lincoln Stein | |
43 | |
44 =head2 Reporting Bugs | |
45 | |
46 Report bugs to the Bioperl bug tracking system to help us keep track | |
47 the bugs and their resolution. Bug reports can be submitted via email | |
48 or the web: | |
49 | |
50 bioperl-bugs@bio.perl.org | |
51 http://bugzilla.bioperl.org/ | |
52 | |
53 =head1 APPENDIX | |
54 | |
55 The rest of the documentation details each of the object | |
56 methods. Internal methods are usually preceded with a _ | |
57 | |
58 =cut | |
59 | |
60 # Let the code begin... | |
61 | |
62 package Bio::DB::FileCache; | |
63 | |
64 use DB_File; | |
65 use Storable qw(freeze thaw); | |
66 use Fcntl qw(O_CREAT O_RDWR O_RDONLY); | |
67 use File::Temp 'tmpnam'; | |
68 | |
69 use vars qw(@ISA); | |
70 use strict; | |
71 | |
72 use Bio::Root::Root; | |
73 | |
74 @ISA = qw(Bio::Root::Root Bio::DB::SeqI); | |
75 | |
76 use Bio::DB::SeqI; | |
77 use Bio::Seq::RichSeq; | |
78 use Bio::Location::Split; | |
79 use Bio::Location::Fuzzy; | |
80 use Bio::Seq; | |
81 use Bio::SeqFeature::Generic; | |
82 use Bio::Species; | |
83 use Bio::Annotation::Collection; | |
84 | |
85 =head2 new | |
86 | |
87 Title : new | |
88 Usage : $db = Bio::DB::FileCache->new( | |
89 -seqdb => $db, # Bio::DB::RandomAccessI database | |
90 -file => $path, # path to index file | |
91 -keep => $flag, # don't unlink index file | |
92 ) | |
93 Function: creates a new on-disk cache | |
94 Returns : a Bio::DB::RandomAccessI database | |
95 Args : as above | |
96 Throws : "Must be a randomaccess database" exception | |
97 "Could not open primary index file" exception | |
98 | |
99 If no index file is specified, will create a temporary file in your | |
100 system's temporary file directory. The name of this temporary file | |
101 can be retrieved using file_name(). | |
102 | |
103 =cut | |
104 | |
105 sub new { | |
106 my ($class,@args) = @_; | |
107 | |
108 my $self = Bio::Root::Root->new(); | |
109 bless $self,$class; | |
110 | |
111 my ($seqdb,$file_name,$keep) = $self->_rearrange([qw(SEQDB FILE KEEP)],@args); | |
112 | |
113 if( !defined $seqdb || !ref $seqdb || !$seqdb->isa('Bio::DB::RandomAccessI') ) { | |
114 $self->throw("Must be a randomaccess database not a [$seqdb]"); | |
115 } | |
116 | |
117 $self->seqdb($seqdb); | |
118 $file_name ||= tmpnam(); | |
119 $self->file_name($file_name); | |
120 $self->keep($keep); | |
121 | |
122 $self->_open_database($file_name); | |
123 return $self; | |
124 } | |
125 | |
126 =head2 get_Seq_by_id | |
127 | |
128 Title : get_Seq_by_id | |
129 Usage : $seq = $db->get_Seq_by_id('ROA1_HUMAN') | |
130 Function: Gets a Bio::Seq object by its name | |
131 Returns : a Bio::Seq object | |
132 Args : the id (as a string) of a sequence | |
133 Throws : "id does not exist" exception | |
134 | |
135 | |
136 =cut | |
137 | |
138 sub get_Seq_by_id{ | |
139 my ($self,$id) = @_; | |
140 | |
141 # look in the cache first | |
142 my $obj = $self->_get('id' => $id); | |
143 return $obj if defined $obj; | |
144 | |
145 # get object from seqdb | |
146 $obj = $self->seqdb->get_Seq_by_id($id); | |
147 $self->_store('id' => $id, $obj); | |
148 | |
149 return $obj; | |
150 } | |
151 | |
152 =head2 get_Seq_by_acc | |
153 | |
154 Title : get_Seq_by_acc | |
155 Usage : $seq = $db->get_Seq_by_acc('X77802'); | |
156 Function: Gets a Bio::Seq object by accession number | |
157 Returns : A Bio::Seq object | |
158 Args : accession number (as a string) | |
159 Throws : "acc does not exist" exception | |
160 | |
161 | |
162 =cut | |
163 | |
164 sub get_Seq_by_acc{ | |
165 my ($self,$acc) = @_; | |
166 | |
167 # look in the cache first | |
168 my $obj = $self->_get('acc' => $acc); | |
169 return $obj if defined $obj; | |
170 | |
171 # get object from seqdb | |
172 $obj = $self->seqdb->get_Seq_by_acc($acc); | |
173 $self->_store('acc' => $acc, $obj); | |
174 | |
175 return $obj; | |
176 } | |
177 | |
178 =head2 seqdb | |
179 | |
180 Title : seqdb | |
181 Usage : $seqdb = $db->seqdb([$seqdb]) | |
182 Function: gets/sets the Bio::DB::RandomAccessI database | |
183 Returns : a Bio::DB::RandomAccessI database | |
184 Args : new sequence database (optional) | |
185 Throws : nothing | |
186 | |
187 =cut | |
188 | |
189 sub seqdb { | |
190 my ($self, $seqdb) = @_; | |
191 if ($seqdb) { | |
192 $self->{'seqdb'} = $seqdb; | |
193 } else { | |
194 return $self->{'seqdb'}; | |
195 } | |
196 } | |
197 | |
198 =head2 file_name | |
199 | |
200 Title : file_name | |
201 Usage : $path = $db->file_name([$file_name]) | |
202 Function: gets/sets the name of the cache file | |
203 Returns : a path | |
204 Args : new cache file name (optional) | |
205 Throws : nothing | |
206 | |
207 It probably isn't useful to set the cache file name after you've | |
208 opened it. | |
209 | |
210 =cut | |
211 | |
212 #' | |
213 | |
214 sub file_name { | |
215 my $self = shift; | |
216 my $d = $self->{file_name}; | |
217 $self->{file_name} = shift if @_; | |
218 $d; | |
219 } | |
220 | |
221 =head2 keep | |
222 | |
223 Title : keep | |
224 Usage : $keep = $db->keep([$flag]) | |
225 Function: gets/sets the value of the "keep" flag | |
226 Returns : current value | |
227 Args : new value (optional) | |
228 Throws : nothing | |
229 | |
230 The keep flag will cause the index file to be unlinked when the | |
231 process exits. Since on some operating systems (Unix, OS/2) the | |
232 unlinking occurs during the new() call immediately after opening the | |
233 file, it probably isn't safe to change this value. | |
234 | |
235 =cut | |
236 | |
237 sub keep { | |
238 my $self = shift; | |
239 my $d = $self->{keep}; | |
240 $self->{keep} = shift if @_; | |
241 $d; | |
242 } | |
243 | |
244 =head2 db | |
245 | |
246 Title : db | |
247 Usage : $db->db | |
248 Function: returns tied hash to index database | |
249 Returns : a Berkeley DB tied hashref | |
250 Args : none | |
251 Throws : nothing | |
252 | |
253 =cut | |
254 | |
255 sub db { shift->{db} } | |
256 | |
257 =head2 flush | |
258 | |
259 Title : flush | |
260 Usage : $db->flush | |
261 Function: flushes the cache | |
262 Returns : nothing | |
263 Args : none | |
264 Throws : nothing | |
265 | |
266 =cut | |
267 | |
268 sub flush { | |
269 my $db = shift->db or return; | |
270 %{$db} = (); | |
271 } | |
272 | |
273 sub _get { | |
274 my $self = shift; | |
275 my ($type,$id) = @_; | |
276 my $serialized = $self->db->{"${type}_${id}"}; | |
277 my $obj = thaw($serialized); | |
278 $obj; | |
279 } | |
280 | |
281 sub _store { | |
282 my $self = shift; | |
283 my ($type,$id,$obj) = @_; | |
284 my $serialized = freeze($obj); | |
285 $self->db->{"${type}_${id}"} = $serialized; | |
286 } | |
287 | |
288 =head2 get_Seq_by_version | |
289 | |
290 Title : get_Seq_by_version | |
291 Usage : $seq = $db->get_Seq_by_version('X77802.1'); | |
292 Function: Gets a Bio::Seq object by sequence version | |
293 Returns : A Bio::Seq object | |
294 Args : accession.version (as a string) | |
295 Throws : "acc.version does not exist" exception | |
296 | |
297 =cut | |
298 | |
299 sub get_Seq_by_version{ | |
300 my ($self,@args) = @_; | |
301 $self->throw("Not implemented it"); | |
302 } | |
303 | |
304 sub DESTROY { | |
305 my $self = shift; | |
306 unlink $self->file_name unless $self->keep; | |
307 } | |
308 | |
309 | |
310 sub _open_database { | |
311 my $self = shift; | |
312 my $file = shift; | |
313 my $flags = O_CREAT|O_RDWR; | |
314 my %db; | |
315 tie(%db,'DB_File',$file,$flags,0666,$DB_BTREE) | |
316 or $self->throw("Could not open primary index file"); | |
317 $self->{db} = \%db; | |
318 unlink $file unless $self->keep; | |
319 } | |
320 | |
321 ## End of Package | |
322 | |
323 1; |