0
|
1
|
|
2 #
|
|
3 # $Id: Abstract.pm,v 1.41 2002/12/17 02:08:36 jason Exp $
|
|
4 #
|
|
5 # BioPerl module for Bio::Index::Abstract
|
|
6 #
|
|
7 # Cared for by Ewan Birney <birney@sanger.ac.uk>
|
|
8 # and James Gilbert <jgrg@sanger.ac.uk>
|
|
9 #
|
|
10 # You may distribute this module under the same terms as perl itself
|
|
11
|
|
12 # POD documentation - main docs before the code
|
|
13
|
|
14 =head1 NAME
|
|
15
|
|
16 Bio::Index::Abstract - Abstract interface for indexing a flat file
|
|
17
|
|
18 =head1 SYNOPSIS
|
|
19
|
|
20 You should not be using this module directly
|
|
21
|
|
22 =head1 USING DB_FILE
|
|
23
|
|
24 To use DB_File and not SDBM for this index, pass the value:
|
|
25
|
|
26 -dbm_package => 'DB_File'
|
|
27
|
|
28 to new (see below).
|
|
29
|
|
30 =head1 DESCRIPTION
|
|
31
|
|
32 This object provides the basic mechanism to associate positions
|
|
33 in files with names. The position and filenames are stored in DBM
|
|
34 which can then be accessed later on. It is the equivalent of flat
|
|
35 file indexing (eg, SRS or efetch).
|
|
36
|
|
37 This object is the guts to the mechanism, which will be used by the
|
|
38 specific objects inheriting from it.
|
|
39
|
|
40 =head1 FEEDBACK
|
|
41
|
|
42 =head2 Mailing Lists
|
|
43
|
|
44 User feedback is an integral part of the evolution of this and other
|
|
45 Bioperl modules. Send your comments and suggestions preferably to one
|
|
46 of the Bioperl mailing lists. Your participation is much appreciated.
|
|
47
|
|
48 bioperl-l@bioperl.org - General discussion
|
|
49 http://bioperl.org/MailList.shtml - About the mailing lists
|
|
50
|
|
51 =head2 Reporting Bugs
|
|
52
|
|
53 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
54 the bugs and their resolution. Bug reports can be submitted via
|
|
55 email or the web:
|
|
56
|
|
57 bioperl-bugs@bio.perl.org
|
|
58 http://bugzilla.bioperl.org/
|
|
59
|
|
60 =head1 AUTHOR - Ewan Birney, James Gilbert
|
|
61
|
|
62 Email - birney@sanger.ac.uk, jgrg@sanger.ac.uk
|
|
63
|
|
64 =head1 APPENDIX
|
|
65
|
|
66 The rest of the documentation details each of the object methods. Internal
|
|
67 methods are usually preceded with an "_" (underscore).
|
|
68
|
|
69 =cut
|
|
70
|
|
71
|
|
72 # Let the code begin...
|
|
73
|
|
74 package Bio::Index::Abstract;
|
|
75
|
|
76 use strict;
|
|
77 use Fcntl qw( O_RDWR O_CREAT O_RDONLY );
|
|
78 use vars qw( $TYPE_AND_VERSION_KEY
|
|
79 @ISA $USE_DBM_TYPE $DB_HASH );
|
|
80
|
|
81 # Object preamble - inheriets from Bio::Root::Object
|
|
82
|
|
83 use Bio::Root::Root;
|
|
84 use Bio::Root::IO;
|
|
85 use Symbol();
|
|
86
|
|
87 @ISA = qw(Bio::Root::Root);
|
|
88
|
|
89 # Generate accessor methods for simple object fields
|
|
90 BEGIN {
|
|
91 foreach my $func (qw(filename write_flag)) {
|
|
92 no strict 'refs';
|
|
93 my $field = "_$func";
|
|
94
|
|
95 *$func = sub {
|
|
96 my( $self, $value ) = @_;
|
|
97
|
|
98 if (defined $value) {
|
|
99 $self->{$field} = $value;
|
|
100 }
|
|
101 return $self->{$field};
|
|
102 }
|
|
103 }
|
|
104 }
|
|
105
|
|
106 =head2 new
|
|
107
|
|
108 Usage : $index = Bio::Index::Abstract->new(
|
|
109 -filename => $dbm_file,
|
|
110 -write_flag => 0,
|
|
111 -dbm_package => 'DB_File',
|
|
112 -verbose => 0);
|
|
113 Function: Returns a new index object. If filename is
|
|
114 specified, then open_dbm() is immediately called.
|
|
115 Bio::Index::Abstract->new() will usually be called
|
|
116 directly only when opening an existing index.
|
|
117 Returns : A new index object
|
|
118 Args : -filename The name of the dbm index file.
|
|
119 -write_flag TRUE if write access to the dbm file is
|
|
120 needed.
|
|
121 -dbm_package The Perl dbm module to use for the
|
|
122 index.
|
|
123 -verbose Print debugging output to STDERR if
|
|
124 TRUE.
|
|
125
|
|
126 =cut
|
|
127
|
|
128 sub new {
|
|
129 my($class, @args) = @_;
|
|
130 my $self = $class->SUPER::new(@args);
|
|
131 my( $filename, $write_flag, $dbm_package, $cachesize, $ffactor ) =
|
|
132 $self->_rearrange([qw(FILENAME
|
|
133 WRITE_FLAG
|
|
134 DBM_PACKAGE
|
|
135 CACHESIZE
|
|
136 FFACTOR
|
|
137 )], @args);
|
|
138
|
|
139 # Store any parameters passed
|
|
140 $self->filename($filename) if $filename;
|
|
141 $self->cachesize($cachesize) if $cachesize;
|
|
142 $self->ffactor($ffactor) if $ffactor;
|
|
143 $self->write_flag($write_flag) if $write_flag;
|
|
144 $self->dbm_package($dbm_package) if $dbm_package;
|
|
145
|
|
146 $self->{'_filehandle'} = []; # Array in which to cache SeqIO objects
|
|
147 $self->{'_DB'} = {}; # Gets tied to the DBM file
|
|
148
|
|
149 # Open database
|
|
150 $self->open_dbm() if $filename;
|
|
151 return $self;
|
|
152 }
|
|
153
|
|
154 =pod
|
|
155
|
|
156 =head2 filename
|
|
157
|
|
158 Title : filename
|
|
159 Usage : $value = $self->filename();
|
|
160 $self->filename($value);
|
|
161 Function: Gets or sets the name of the dbm index file.
|
|
162 Returns : The current value of filename
|
|
163 Args : Value of filename if setting, or none if
|
|
164 getting the value.
|
|
165
|
|
166 =head2 write_flag
|
|
167
|
|
168 Title : write_flag
|
|
169 Usage : $value = $self->write_flag();
|
|
170 $self->write_flag($value);
|
|
171 Function: Gets or sets the value of write_flag, which
|
|
172 is wether the dbm file should be opened with
|
|
173 write access.
|
|
174 Returns : The current value of write_flag (default 0)
|
|
175 Args : Value of write_flag if setting, or none if
|
|
176 getting the value.
|
|
177
|
|
178 =head2 dbm_package
|
|
179
|
|
180 Usage : $value = $self->dbm_package();
|
|
181 $self->dbm_package($value);
|
|
182
|
|
183 Function: Gets or sets the name of the Perl dbm module used.
|
|
184 If the value is unset, then it returns the value of
|
|
185 the package variable $USE_DBM_TYPE or if that is
|
|
186 unset, then it chooses the best available dbm type,
|
|
187 choosing 'DB_File' in preference to 'SDBM_File'.
|
|
188 Bio::Abstract::Index may work with other dbm file
|
|
189 types.
|
|
190
|
|
191 Returns : The current value of dbm_package
|
|
192 Args : Value of dbm_package if setting, or none if
|
|
193 getting the value.
|
|
194
|
|
195 =cut
|
|
196
|
|
197 sub dbm_package {
|
|
198 my( $self, $value ) = @_;
|
|
199 my $to_require = 0;
|
|
200 if( $value || ! $self->{'_dbm_package'} ) {
|
|
201 my $type = $value || $USE_DBM_TYPE || 'DB_File';
|
|
202 if( $type =~ /DB_File/i ) {
|
|
203 eval {
|
|
204 require DB_File;
|
|
205 };
|
|
206 $type = ( $@ ) ? 'SDBM_File' : 'DB_File';
|
|
207 }
|
|
208 if( $type ne 'DB_File' ) {
|
|
209 eval { require "$type.pm"; };
|
|
210 $self->throw($@) if( $@ );
|
|
211 }
|
|
212 $self->{'_dbm_package'} = $type;
|
|
213 if( ! defined $USE_DBM_TYPE ) {
|
|
214 $USE_DBM_TYPE = $self->{'_dbm_package'};
|
|
215 }
|
|
216 }
|
|
217 return $self->{'_dbm_package'};
|
|
218 }
|
|
219
|
|
220 =head2 db
|
|
221
|
|
222 Title : db
|
|
223 Usage : $index->db
|
|
224 Function: Returns a ref to the hash which is tied to the dbm
|
|
225 file. Used internally when adding and retrieving
|
|
226 data from the database.
|
|
227 Example : $db = $index->db();
|
|
228 $db->{ $some_key } = $data
|
|
229 $data = $index->db->{ $some_key };
|
|
230 Returns : ref to HASH
|
|
231 Args : NONE
|
|
232
|
|
233 =cut
|
|
234
|
|
235 sub db {
|
|
236 return $_[0]->{'_DB'};
|
|
237 }
|
|
238
|
|
239
|
|
240 =head2 get_stream
|
|
241
|
|
242 Title : get_stream
|
|
243 Usage : $stream = $index->get_stream( $id );
|
|
244 Function: Returns a file handle with the file pointer
|
|
245 at the approprite place
|
|
246
|
|
247 This provides for a way to get the actual
|
|
248 file contents and not an object
|
|
249
|
|
250 WARNING: you must parse the record deliminter
|
|
251 *yourself*. Abstract wont do this for you
|
|
252 So this code
|
|
253
|
|
254 $fh = $index->get_stream($myid);
|
|
255 while( <$fh> ) {
|
|
256 # do something
|
|
257 }
|
|
258 will parse the entire file if you don't put in
|
|
259 a last statement in, like
|
|
260
|
|
261 while( <$fh> ) {
|
|
262 /^\/\// && last; # end of record
|
|
263 # do something
|
|
264 }
|
|
265
|
|
266 Returns : A filehandle object
|
|
267 Args : string represents the accession number
|
|
268 Notes : This method should not be used without forethought
|
|
269
|
|
270 =cut
|
|
271
|
|
272 #'
|
|
273
|
|
274 sub get_stream {
|
|
275 my ($self,$id) = @_;
|
|
276
|
|
277 my ($desc,$acc,$out);
|
|
278 my $db = $self->db();
|
|
279
|
|
280 if (my $rec = $db->{ $id }) {
|
|
281 my( @record );
|
|
282
|
|
283 my ($file, $begin, $end) = $self->unpack_record( $rec );
|
|
284
|
|
285 # Get the (possibly cached) filehandle
|
|
286 my $fh = $self->_file_handle( $file );
|
|
287
|
|
288 # move to start
|
|
289 seek($fh, $begin, 0);
|
|
290
|
|
291 return $fh;
|
|
292 }
|
|
293 else {
|
|
294 $self->throw("Unable to find a record for $id in the flat file index");
|
|
295 }
|
|
296 }
|
|
297
|
|
298
|
|
299 =head2 cachesize
|
|
300
|
|
301 Usage : $index->cachesize(1000000)
|
|
302 Function: Sets the dbm file cache size for the index.
|
|
303 Needs to be set before the DBM file gets opened.
|
|
304 Example : $index->cachesize(1000000)
|
|
305 Returns : size of the curent cache
|
|
306
|
|
307 =cut
|
|
308
|
|
309 sub cachesize {
|
|
310 my( $self, $size ) = @_;
|
|
311
|
|
312 if(defined $size){
|
|
313 $self->{'_cachesize'} = $size;
|
|
314 }
|
|
315 return ( $self->{'_cachesize'} );
|
|
316
|
|
317 }
|
|
318
|
|
319
|
|
320 =head2 ffactor
|
|
321
|
|
322 Usage : $index->ffactor(1000000)
|
|
323 Function: Sets the dbm file fill factor.
|
|
324 Needs to be set before the DBM file gets opened.
|
|
325
|
|
326 Example : $index->ffactor(1000000)
|
|
327 Returns : size of the curent cache
|
|
328
|
|
329 =cut
|
|
330
|
|
331 sub ffactor {
|
|
332 my( $self, $size ) = @_;
|
|
333
|
|
334 if(defined $size){
|
|
335 $self->{'_ffactor'} = $size;
|
|
336 }
|
|
337 return ( $self->{'_ffactor'} );
|
|
338
|
|
339 }
|
|
340
|
|
341
|
|
342 =head2 open_dbm
|
|
343
|
|
344 Usage : $index->open_dbm()
|
|
345 Function: Opens the dbm file associated with the index
|
|
346 object. Write access is only given if explicitly
|
|
347 asked for by calling new(-write => 1) or having set
|
|
348 the write_flag(1) on the index object. The type of
|
|
349 dbm file opened is that returned by dbm_package().
|
|
350 The name of the file to be is opened is obtained by
|
|
351 calling the filename() method.
|
|
352
|
|
353 Example : $index->_open_dbm()
|
|
354 Returns : 1 on success
|
|
355
|
|
356 =cut
|
|
357
|
|
358 sub open_dbm {
|
|
359 my( $self ) = @_;
|
|
360
|
|
361 my $filename = $self->filename()
|
|
362 or $self->throw("filename() not set");
|
|
363
|
|
364 my $db = $self->db();
|
|
365
|
|
366 # Close the dbm file if already open (maybe we're getting
|
|
367 # or dropping write access
|
|
368 if (ref($db) ne 'HASH') {
|
|
369 untie($db);
|
|
370 }
|
|
371
|
|
372 # What kind of DBM file are we going to open?
|
|
373 my $dbm_type = $self->dbm_package;
|
|
374
|
|
375 # Choose mode for opening dbm file (read/write+create or read-only).
|
|
376 my $mode_flags = $self->write_flag ? O_RDWR|O_CREAT : O_RDONLY;
|
|
377
|
|
378 # Open the dbm file
|
|
379 if ($dbm_type eq 'DB_File') {
|
|
380 my $hash_inf = DB_File::HASHINFO->new();
|
|
381 my $cache = $self->cachesize();
|
|
382 my $ffactor = $self->ffactor();
|
|
383 if ($cache){
|
|
384 $hash_inf->{'cachesize'} = $cache;
|
|
385 }
|
|
386 if ($ffactor){
|
|
387 $hash_inf->{'ffactor'} = $ffactor;
|
|
388 }
|
|
389 tie( %$db, $dbm_type, $filename, $mode_flags, 0644, $hash_inf )
|
|
390 or $self->throw("Can't open '$dbm_type' dbm file '$filename' : $!");
|
|
391 } else {
|
|
392 tie( %$db, $dbm_type, $filename, $mode_flags, 0644 )
|
|
393 or $self->throw("Can't open '$dbm_type' dbm file '$filename' : $!");
|
|
394 }
|
|
395
|
|
396 # The following methods access data in the dbm file:
|
|
397
|
|
398 # Now, if we're a Bio::Index::Abstract caterpillar, then we
|
|
399 # transform ourselves into a Bio::Index::<something> butterfly!
|
|
400 if( ref($self) eq "Bio::Index::Abstract" ) {
|
|
401 my $pkg = $self->_code_base();
|
|
402 bless $self, $pkg;
|
|
403 }
|
|
404
|
|
405 # Check or set this is the right kind and version of index
|
|
406 $self->_type_and_version();
|
|
407
|
|
408 # Check files haven't changed size since they were indexed
|
|
409 $self->_check_file_sizes();
|
|
410
|
|
411 return 1;
|
|
412 }
|
|
413
|
|
414 =head2 _version
|
|
415
|
|
416 Title : _version
|
|
417 Usage : $type = $index->_version()
|
|
418 Function: Returns a string which identifes the version of an
|
|
419 index module. Used to permanently identify an index
|
|
420 file as having been created by a particular version
|
|
421 of the index module. Must be provided by the sub class
|
|
422 Example :
|
|
423 Returns :
|
|
424 Args : none
|
|
425
|
|
426 =cut
|
|
427
|
|
428 sub _version {
|
|
429 my $self = shift;
|
|
430
|
|
431 $self->throw("In Bio::Index::Abstract, no _version method in sub class");
|
|
432 }
|
|
433
|
|
434 =head2 _code_base
|
|
435
|
|
436 Title : _code_base
|
|
437 Usage : $code = $db->_code_base();
|
|
438 Function:
|
|
439 Example :
|
|
440 Returns : Code package to be used with this
|
|
441 Args :
|
|
442
|
|
443
|
|
444 =cut
|
|
445
|
|
446 sub _code_base {
|
|
447 my ($self) = @_;
|
|
448 my $code_key = '__TYPE_AND_VERSION';
|
|
449 my $record;
|
|
450
|
|
451 $record = $self->db->{$code_key};
|
|
452
|
|
453 my($code,$version) = $self->unpack_record($record);
|
|
454 if( wantarray ) {
|
|
455 return ($code,$version);
|
|
456 } else {
|
|
457 return $code;
|
|
458 }
|
|
459 }
|
|
460
|
|
461
|
|
462 =head2 _type_and_version
|
|
463
|
|
464 Title : _type_and_version
|
|
465 Usage : Called by _initalize
|
|
466 Function: Checks that the index opened is made by the same index
|
|
467 module and version of that module that made it. If the
|
|
468 index is empty, then it adds the information to the
|
|
469 database.
|
|
470 Example :
|
|
471 Returns : 1 or exception
|
|
472 Args : none
|
|
473
|
|
474 =cut
|
|
475
|
|
476 sub _type_and_version {
|
|
477 my $self = shift;
|
|
478 my $key = '__TYPE_AND_VERSION';
|
|
479 my $version = $self->_version();
|
|
480 my $type = ref $self;
|
|
481
|
|
482 # Run check or add type and version key if missing
|
|
483 if (my $rec = $self->db->{ $key }) {
|
|
484 my( $db_type, $db_version ) = $self->unpack_record($rec);
|
|
485 $self->throw("This index file is from version [$db_version] - You need to rebuild it to use module version [$version]")
|
|
486 unless $db_version == $version;
|
|
487 $self->throw("This index file is type [$db_type] - Can't access it with module for [$type]")
|
|
488 unless $db_type eq $type;
|
|
489 } else {
|
|
490 $self->add_record( $key, $type, $version )
|
|
491 or $self->throw("Can't add Type and Version record");
|
|
492 }
|
|
493 return 1;
|
|
494 }
|
|
495
|
|
496
|
|
497 =head2 _check_file_sizes
|
|
498
|
|
499 Title : _check_file_sizes
|
|
500 Usage : $index->_check_file_sizes()
|
|
501 Function: Verifies that the files listed in the database
|
|
502 are the same size as when the database was built,
|
|
503 or throws an exception. Called by the new()
|
|
504 function.
|
|
505 Example :
|
|
506 Returns : 1 or exception
|
|
507 Args :
|
|
508
|
|
509 =cut
|
|
510
|
|
511 sub _check_file_sizes {
|
|
512 my $self = shift;
|
|
513 my $num = $self->_file_count() || 0;
|
|
514
|
|
515 for (my $i = 0; $i < $num; $i++) {
|
|
516 my( $file, $stored_size ) = $self->unpack_record( $self->db->{"__FILE_$i"} );
|
|
517 my $size = -s $file;
|
|
518 unless ($size == $stored_size) {
|
|
519 $self->throw("file $i [ $file ] has changed size $stored_size -> $size. This probably means you need to rebuild the index.");
|
|
520 }
|
|
521 }
|
|
522 return 1;
|
|
523 }
|
|
524
|
|
525
|
|
526 =head2 make_index
|
|
527
|
|
528 Title : make_index
|
|
529 Usage : $index->make_index( FILE_LIST )
|
|
530 Function: Takes a list of file names, checks that they are
|
|
531 all fully qualified, and then calls _filename() on
|
|
532 each. It supplies _filename() with the name of the
|
|
533 file, and an integer which is stored with each record
|
|
534 created by _filename(). Can be called multiple times,
|
|
535 and can be used to add to an existing index file.
|
|
536 Example : $index->make_index( '/home/seqs1', '/home/seqs2', '/nfs/pub/big_db' );
|
|
537 Returns : Number of files indexed
|
|
538 Args : LIST OF FILES
|
|
539
|
|
540 =cut
|
|
541
|
|
542 sub make_index {
|
|
543 my($self, @files) = @_;
|
|
544 my $count = 0;
|
|
545 my $recs = 0;
|
|
546 # blow up if write flag is not set. EB fix
|
|
547
|
|
548 if( !defined $self->write_flag ) {
|
|
549 $self->throw("Attempting to make an index on a read-only database. What about a WRITE flag on opening the index?");
|
|
550 }
|
|
551
|
|
552 # We're really fussy/lazy, expecting all file names to be fully qualified
|
|
553 $self->throw("No files to index provided") unless @files;
|
|
554 for(my $i=0;$i<scalar @files; $i++) {
|
|
555 if( $Bio::Root::IO::FILESPECLOADED && File::Spec->can('rel2abs') ) {
|
|
556 if( ! File::Spec->file_name_is_absolute($files[$i]) ) {
|
|
557 $files[$i] = File::Spec->rel2abs($files[$i]);
|
|
558 }
|
|
559 } else {
|
|
560 if( $^O =~ /MSWin/i ) {
|
|
561 ($files[$i] =~ m|^[A-Za-z]:/|) ||
|
|
562 $self->throw("Not an absolute file path '$files[$i]'");
|
|
563 } else {
|
|
564 ($files[$i] =~ m|^/|) ||
|
|
565 $self->throw("Not an absolute file path '$files[$i]'");
|
|
566 }
|
|
567 }
|
|
568 $self->throw("File does not exist '$files[$i]'") unless -e $files[$i];
|
|
569 }
|
|
570
|
|
571 # Add each file to the index
|
|
572 FILE :
|
|
573 foreach my $file (@files) {
|
|
574
|
|
575 my $i; # index for this file
|
|
576
|
|
577 # Get new index for this file and increment file count
|
|
578 if ( defined(my $count = $self->_file_count) ) {
|
|
579 $i = $count;
|
|
580 } else {
|
|
581 $i = 0; $self->_file_count(0);
|
|
582 }
|
|
583
|
|
584 # see whether this file has been already indexed
|
|
585 my ($record,$number,$size);
|
|
586
|
|
587 if( ($record = $self->db->{"__FILENAME_$file"}) ) {
|
|
588 ($number,$size) = $self->unpack_record($record);
|
|
589
|
|
590 # if it is the same size - fine. Otherwise die
|
|
591 if( -s $file == $size ) {
|
|
592 warn "File $file already indexed. Skipping...\n";
|
|
593 next FILE;
|
|
594 } else {
|
|
595 $self->throw("In index, $file has changed size ($size). Indicates that the index is out of date");
|
|
596 }
|
|
597 }
|
|
598
|
|
599 # index this file
|
|
600 warn "Indexing file $file\n" if( $self->verbose > 0);
|
|
601
|
|
602 # this is supplied by the subclass and does the serious work
|
|
603 $recs += $self->_index_file( $file, $i ); # Specific method for each type of index
|
|
604
|
|
605
|
|
606 # Save file name and size for this index
|
|
607 $self->add_record("__FILE_$i", $file, -s $file)
|
|
608 or $self->throw("Can't add data to file: $file");
|
|
609 $self->add_record("__FILENAME_$file", $i, -s $file)
|
|
610 or $self->throw("Can't add data to file: $file");
|
|
611
|
|
612 # increment file lines
|
|
613 $i++; $self->_file_count($i);
|
|
614 my $temp;
|
|
615 $temp = $self->_file_count();
|
|
616
|
|
617
|
|
618 }
|
|
619 return ($count, $recs);
|
|
620 }
|
|
621
|
|
622 =head2 _filename
|
|
623
|
|
624 Title : _filename
|
|
625 Usage : $index->_filename( FILE INT )
|
|
626 Function: Indexes the file
|
|
627 Example :
|
|
628 Returns :
|
|
629 Args :
|
|
630
|
|
631 =cut
|
|
632
|
|
633 sub _index_file {
|
|
634 my $self = shift;
|
|
635
|
|
636 my $pkg = ref($self);
|
|
637 $self->throw("Error: '$pkg' does not provide the _index_file() method");
|
|
638 }
|
|
639
|
|
640
|
|
641
|
|
642 =head2 _file_handle
|
|
643
|
|
644 Title : _file_handle
|
|
645 Usage : $fh = $index->_file_handle( INT )
|
|
646 Function: Returns an open filehandle for the file
|
|
647 index INT. On opening a new filehandle it
|
|
648 caches it in the @{$index->_filehandle} array.
|
|
649 If the requested filehandle is already open,
|
|
650 it simply returns it from the array.
|
|
651 Example : $fist_file_indexed = $index->_file_handle( 0 );
|
|
652 Returns : ref to a filehandle
|
|
653 Args : INT
|
|
654
|
|
655 =cut
|
|
656
|
|
657 sub _file_handle {
|
|
658 my( $self, $i ) = @_;
|
|
659
|
|
660 unless ($self->{'_filehandle'}[$i]) {
|
|
661 my $fh = Symbol::gensym();
|
|
662 my @rec = $self->unpack_record($self->db->{"__FILE_$i"})
|
|
663 or $self->throw("Can't get filename for index : $i");
|
|
664 my $file = $rec[0];
|
|
665 open $fh, $file or $self->throw("Can't read file '$file' : $!");
|
|
666 $self->{'_filehandle'}[$i] = $fh; # Cache filehandle
|
|
667 }
|
|
668 return $self->{'_filehandle'}[$i];
|
|
669 }
|
|
670
|
|
671
|
|
672 =head2 _file_count
|
|
673
|
|
674 Title : _file_count
|
|
675 Usage : $index->_file_count( INT )
|
|
676 Function: Used by the index building sub in a sub class to
|
|
677 track the number of files indexed. Sets or gets
|
|
678 the number of files indexed when called with or
|
|
679 without an argument.
|
|
680 Example :
|
|
681 Returns : INT
|
|
682 Args : INT
|
|
683
|
|
684 =cut
|
|
685
|
|
686 sub _file_count {
|
|
687 my $self = shift;
|
|
688 if (@_) {
|
|
689 $self->db->{'__FILE_COUNT'} = shift;
|
|
690 }
|
|
691 return $self->db->{'__FILE_COUNT'};
|
|
692 }
|
|
693
|
|
694
|
|
695 =head2 add_record
|
|
696
|
|
697 Title : add_record
|
|
698 Usage : $index->add_record( $id, @stuff );
|
|
699 Function: Calls pack_record on @stuff, and adds the result
|
|
700 of pack_record to the index database under key $id.
|
|
701 If $id is a reference to an array, then a new entry
|
|
702 is added under a key corresponding to each element
|
|
703 of the array.
|
|
704 Example : $index->add_record( $id, $fileNumber, $begin, $end )
|
|
705 Returns : TRUE on success or FALSE on failure
|
|
706 Args : ID LIST
|
|
707
|
|
708 =cut
|
|
709
|
|
710 sub add_record {
|
|
711 my( $self, $id, @rec ) = @_;
|
|
712 $self->debug( "Adding key $id\n") if( $self->verbose > 0 );
|
|
713 $self->db->{$id} = $self->pack_record( @rec );
|
|
714 return 1;
|
|
715 }
|
|
716
|
|
717
|
|
718 =head2 pack_record
|
|
719
|
|
720 Title : pack_record
|
|
721 Usage : $packed_string = $index->pack_record( LIST )
|
|
722 Function: Packs an array of scalars into a single string
|
|
723 joined by ASCII 034 (which is unlikely to be used
|
|
724 in any of the strings), and returns it.
|
|
725 Example : $packed_string = $index->pack_record( $fileNumber, $begin, $end )
|
|
726 Returns : STRING or undef
|
|
727 Args : LIST
|
|
728
|
|
729 =cut
|
|
730
|
|
731 sub pack_record {
|
|
732 my( $self, @args ) = @_;
|
|
733 return join "\034", @args;
|
|
734 }
|
|
735
|
|
736 =head2 unpack_record
|
|
737
|
|
738 Title : unpack_record
|
|
739 Usage : $index->unpack_record( STRING )
|
|
740 Function: Splits the sting provided into an array,
|
|
741 splitting on ASCII 034.
|
|
742 Example : ( $fileNumber, $begin, $end ) = $index->unpack_record( $self->db->{$id} )
|
|
743 Returns : A 3 element ARRAY
|
|
744 Args : STRING containing ASCII 034
|
|
745
|
|
746 =cut
|
|
747
|
|
748 sub unpack_record {
|
|
749 my( $self, @args ) = @_;
|
|
750 return split /\034/, $args[0];
|
|
751 }
|
|
752
|
|
753 =head2 count_records
|
|
754
|
|
755 Title : count_records
|
|
756 Usage : $recs = $seqdb->count_records()
|
|
757 Function: return count of all recs in the index
|
|
758 Example :
|
|
759 Returns : a scalar
|
|
760 Args : none
|
|
761
|
|
762
|
|
763 =cut
|
|
764
|
|
765 sub count_records {
|
|
766 my ($self,@args) = @_;
|
|
767 my $db = $self->db;
|
|
768 my $c = 0;
|
|
769 while (my($id, $rec) = each %$db) {
|
|
770 if( $id =~ /^__/ ) {
|
|
771 # internal info
|
|
772 next;
|
|
773 }
|
|
774 $c++;
|
|
775 }
|
|
776
|
|
777 return ($c);
|
|
778 }
|
|
779
|
|
780
|
|
781 =head2 DESTROY
|
|
782
|
|
783 Title : DESTROY
|
|
784 Usage : Called automatically when index goes out of scope
|
|
785 Function: Closes connection to database and handles to
|
|
786 sequence files
|
|
787 Returns : NEVER
|
|
788 Args : NONE
|
|
789
|
|
790
|
|
791 =cut
|
|
792
|
|
793 sub DESTROY {
|
|
794 my $self = shift;
|
|
795 untie($self->{'_DB'});
|
|
796 }
|
|
797
|
|
798 1;
|