Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/DB/FileCache.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 # POD documentation - main docs before the code | |
| 2 # | |
| 3 # | |
| 4 | |
| 5 =head1 NAME | |
| 6 | |
| 7 Bio::DB::FileCache - In file cache for BioSeq objects | |
| 8 | |
| 9 =head1 SYNOPSIS | |
| 10 | |
| 11 | |
| 12 | |
| 13 $cachedb = Bio::DB::FileCache->new($real_db); | |
| 14 | |
| 15 # | |
| 16 # $real_db is a Bio::DB::RandomAccessI database | |
| 17 # | |
| 18 | |
| 19 $seq = $cachedb->get_Seq_by_id('ROA1_HUMAN'); | |
| 20 | |
| 21 # | |
| 22 # $seq is a Bio::Seq object | |
| 23 # | |
| 24 | |
| 25 # more control provided with named-parameter form | |
| 26 | |
| 27 $cachedb = Bio::DB::FileCache->new( -seqdb => $real_db, | |
| 28 -file => $path, | |
| 29 -keep => $flag, | |
| 30 ); | |
| 31 =head1 DESCRIPTION | |
| 32 | |
| 33 This is a disk cache system which saves the objects returned by | |
| 34 Bio::DB::RandomAccessI on disk. The disk cache grows without limit, | |
| 35 while the process is running, but is automatically unlinked at process | |
| 36 termination unless the -keep flag is set. | |
| 37 | |
| 38 This module requires DB_File and Storable. | |
| 39 | |
| 40 =head1 CONTACT | |
| 41 | |
| 42 Lincoln Stein | |
| 43 | |
| 44 =head2 Reporting Bugs | |
| 45 | |
| 46 Report bugs to the Bioperl bug tracking system to help us keep track | |
| 47 the bugs and their resolution. Bug reports can be submitted via email | |
| 48 or the web: | |
| 49 | |
| 50 bioperl-bugs@bio.perl.org | |
| 51 http://bugzilla.bioperl.org/ | |
| 52 | |
| 53 =head1 APPENDIX | |
| 54 | |
| 55 The rest of the documentation details each of the object | |
| 56 methods. Internal methods are usually preceded with a _ | |
| 57 | |
| 58 =cut | |
| 59 | |
| 60 # Let the code begin... | |
| 61 | |
| 62 package Bio::DB::FileCache; | |
| 63 | |
| 64 use DB_File; | |
| 65 use Storable qw(freeze thaw); | |
| 66 use Fcntl qw(O_CREAT O_RDWR O_RDONLY); | |
| 67 use File::Temp 'tmpnam'; | |
| 68 | |
| 69 use vars qw(@ISA); | |
| 70 use strict; | |
| 71 | |
| 72 use Bio::Root::Root; | |
| 73 | |
| 74 @ISA = qw(Bio::Root::Root Bio::DB::SeqI); | |
| 75 | |
| 76 use Bio::DB::SeqI; | |
| 77 use Bio::Seq::RichSeq; | |
| 78 use Bio::Location::Split; | |
| 79 use Bio::Location::Fuzzy; | |
| 80 use Bio::Seq; | |
| 81 use Bio::SeqFeature::Generic; | |
| 82 use Bio::Species; | |
| 83 use Bio::Annotation::Collection; | |
| 84 | |
| 85 =head2 new | |
| 86 | |
| 87 Title : new | |
| 88 Usage : $db = Bio::DB::FileCache->new( | |
| 89 -seqdb => $db, # Bio::DB::RandomAccessI database | |
| 90 -file => $path, # path to index file | |
| 91 -keep => $flag, # don't unlink index file | |
| 92 ) | |
| 93 Function: creates a new on-disk cache | |
| 94 Returns : a Bio::DB::RandomAccessI database | |
| 95 Args : as above | |
| 96 Throws : "Must be a randomaccess database" exception | |
| 97 "Could not open primary index file" exception | |
| 98 | |
| 99 If no index file is specified, will create a temporary file in your | |
| 100 system's temporary file directory. The name of this temporary file | |
| 101 can be retrieved using file_name(). | |
| 102 | |
| 103 =cut | |
| 104 | |
| 105 sub new { | |
| 106 my ($class,@args) = @_; | |
| 107 | |
| 108 my $self = Bio::Root::Root->new(); | |
| 109 bless $self,$class; | |
| 110 | |
| 111 my ($seqdb,$file_name,$keep) = $self->_rearrange([qw(SEQDB FILE KEEP)],@args); | |
| 112 | |
| 113 if( !defined $seqdb || !ref $seqdb || !$seqdb->isa('Bio::DB::RandomAccessI') ) { | |
| 114 $self->throw("Must be a randomaccess database not a [$seqdb]"); | |
| 115 } | |
| 116 | |
| 117 $self->seqdb($seqdb); | |
| 118 $file_name ||= tmpnam(); | |
| 119 $self->file_name($file_name); | |
| 120 $self->keep($keep); | |
| 121 | |
| 122 $self->_open_database($file_name); | |
| 123 return $self; | |
| 124 } | |
| 125 | |
| 126 =head2 get_Seq_by_id | |
| 127 | |
| 128 Title : get_Seq_by_id | |
| 129 Usage : $seq = $db->get_Seq_by_id('ROA1_HUMAN') | |
| 130 Function: Gets a Bio::Seq object by its name | |
| 131 Returns : a Bio::Seq object | |
| 132 Args : the id (as a string) of a sequence | |
| 133 Throws : "id does not exist" exception | |
| 134 | |
| 135 | |
| 136 =cut | |
| 137 | |
| 138 sub get_Seq_by_id{ | |
| 139 my ($self,$id) = @_; | |
| 140 | |
| 141 # look in the cache first | |
| 142 my $obj = $self->_get('id' => $id); | |
| 143 return $obj if defined $obj; | |
| 144 | |
| 145 # get object from seqdb | |
| 146 $obj = $self->seqdb->get_Seq_by_id($id); | |
| 147 $self->_store('id' => $id, $obj); | |
| 148 | |
| 149 return $obj; | |
| 150 } | |
| 151 | |
| 152 =head2 get_Seq_by_acc | |
| 153 | |
| 154 Title : get_Seq_by_acc | |
| 155 Usage : $seq = $db->get_Seq_by_acc('X77802'); | |
| 156 Function: Gets a Bio::Seq object by accession number | |
| 157 Returns : A Bio::Seq object | |
| 158 Args : accession number (as a string) | |
| 159 Throws : "acc does not exist" exception | |
| 160 | |
| 161 | |
| 162 =cut | |
| 163 | |
| 164 sub get_Seq_by_acc{ | |
| 165 my ($self,$acc) = @_; | |
| 166 | |
| 167 # look in the cache first | |
| 168 my $obj = $self->_get('acc' => $acc); | |
| 169 return $obj if defined $obj; | |
| 170 | |
| 171 # get object from seqdb | |
| 172 $obj = $self->seqdb->get_Seq_by_acc($acc); | |
| 173 $self->_store('acc' => $acc, $obj); | |
| 174 | |
| 175 return $obj; | |
| 176 } | |
| 177 | |
| 178 =head2 seqdb | |
| 179 | |
| 180 Title : seqdb | |
| 181 Usage : $seqdb = $db->seqdb([$seqdb]) | |
| 182 Function: gets/sets the Bio::DB::RandomAccessI database | |
| 183 Returns : a Bio::DB::RandomAccessI database | |
| 184 Args : new sequence database (optional) | |
| 185 Throws : nothing | |
| 186 | |
| 187 =cut | |
| 188 | |
| 189 sub seqdb { | |
| 190 my ($self, $seqdb) = @_; | |
| 191 if ($seqdb) { | |
| 192 $self->{'seqdb'} = $seqdb; | |
| 193 } else { | |
| 194 return $self->{'seqdb'}; | |
| 195 } | |
| 196 } | |
| 197 | |
| 198 =head2 file_name | |
| 199 | |
| 200 Title : file_name | |
| 201 Usage : $path = $db->file_name([$file_name]) | |
| 202 Function: gets/sets the name of the cache file | |
| 203 Returns : a path | |
| 204 Args : new cache file name (optional) | |
| 205 Throws : nothing | |
| 206 | |
| 207 It probably isn't useful to set the cache file name after you've | |
| 208 opened it. | |
| 209 | |
| 210 =cut | |
| 211 | |
| 212 #' | |
| 213 | |
| 214 sub file_name { | |
| 215 my $self = shift; | |
| 216 my $d = $self->{file_name}; | |
| 217 $self->{file_name} = shift if @_; | |
| 218 $d; | |
| 219 } | |
| 220 | |
| 221 =head2 keep | |
| 222 | |
| 223 Title : keep | |
| 224 Usage : $keep = $db->keep([$flag]) | |
| 225 Function: gets/sets the value of the "keep" flag | |
| 226 Returns : current value | |
| 227 Args : new value (optional) | |
| 228 Throws : nothing | |
| 229 | |
| 230 The keep flag will cause the index file to be unlinked when the | |
| 231 process exits. Since on some operating systems (Unix, OS/2) the | |
| 232 unlinking occurs during the new() call immediately after opening the | |
| 233 file, it probably isn't safe to change this value. | |
| 234 | |
| 235 =cut | |
| 236 | |
| 237 sub keep { | |
| 238 my $self = shift; | |
| 239 my $d = $self->{keep}; | |
| 240 $self->{keep} = shift if @_; | |
| 241 $d; | |
| 242 } | |
| 243 | |
| 244 =head2 db | |
| 245 | |
| 246 Title : db | |
| 247 Usage : $db->db | |
| 248 Function: returns tied hash to index database | |
| 249 Returns : a Berkeley DB tied hashref | |
| 250 Args : none | |
| 251 Throws : nothing | |
| 252 | |
| 253 =cut | |
| 254 | |
| 255 sub db { shift->{db} } | |
| 256 | |
| 257 =head2 flush | |
| 258 | |
| 259 Title : flush | |
| 260 Usage : $db->flush | |
| 261 Function: flushes the cache | |
| 262 Returns : nothing | |
| 263 Args : none | |
| 264 Throws : nothing | |
| 265 | |
| 266 =cut | |
| 267 | |
| 268 sub flush { | |
| 269 my $db = shift->db or return; | |
| 270 %{$db} = (); | |
| 271 } | |
| 272 | |
| 273 sub _get { | |
| 274 my $self = shift; | |
| 275 my ($type,$id) = @_; | |
| 276 my $serialized = $self->db->{"${type}_${id}"}; | |
| 277 my $obj = thaw($serialized); | |
| 278 $obj; | |
| 279 } | |
| 280 | |
| 281 sub _store { | |
| 282 my $self = shift; | |
| 283 my ($type,$id,$obj) = @_; | |
| 284 my $serialized = freeze($obj); | |
| 285 $self->db->{"${type}_${id}"} = $serialized; | |
| 286 } | |
| 287 | |
| 288 =head2 get_Seq_by_version | |
| 289 | |
| 290 Title : get_Seq_by_version | |
| 291 Usage : $seq = $db->get_Seq_by_version('X77802.1'); | |
| 292 Function: Gets a Bio::Seq object by sequence version | |
| 293 Returns : A Bio::Seq object | |
| 294 Args : accession.version (as a string) | |
| 295 Throws : "acc.version does not exist" exception | |
| 296 | |
| 297 =cut | |
| 298 | |
| 299 sub get_Seq_by_version{ | |
| 300 my ($self,@args) = @_; | |
| 301 $self->throw("Not implemented it"); | |
| 302 } | |
| 303 | |
| 304 sub DESTROY { | |
| 305 my $self = shift; | |
| 306 unlink $self->file_name unless $self->keep; | |
| 307 } | |
| 308 | |
| 309 | |
| 310 sub _open_database { | |
| 311 my $self = shift; | |
| 312 my $file = shift; | |
| 313 my $flags = O_CREAT|O_RDWR; | |
| 314 my %db; | |
| 315 tie(%db,'DB_File',$file,$flags,0666,$DB_BTREE) | |
| 316 or $self->throw("Could not open primary index file"); | |
| 317 $self->{db} = \%db; | |
| 318 unlink $file unless $self->keep; | |
| 319 } | |
| 320 | |
| 321 ## End of Package | |
| 322 | |
| 323 1; |
