Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Index/Abstract.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 | |
| 2 # | |
| 3 # $Id: Abstract.pm,v 1.41 2002/12/17 02:08:36 jason Exp $ | |
| 4 # | |
| 5 # BioPerl module for Bio::Index::Abstract | |
| 6 # | |
| 7 # Cared for by Ewan Birney <birney@sanger.ac.uk> | |
| 8 # and James Gilbert <jgrg@sanger.ac.uk> | |
| 9 # | |
| 10 # You may distribute this module under the same terms as perl itself | |
| 11 | |
| 12 # POD documentation - main docs before the code | |
| 13 | |
| 14 =head1 NAME | |
| 15 | |
| 16 Bio::Index::Abstract - Abstract interface for indexing a flat file | |
| 17 | |
| 18 =head1 SYNOPSIS | |
| 19 | |
| 20 You should not be using this module directly | |
| 21 | |
| 22 =head1 USING DB_FILE | |
| 23 | |
| 24 To use DB_File and not SDBM for this index, pass the value: | |
| 25 | |
| 26 -dbm_package => 'DB_File' | |
| 27 | |
| 28 to new (see below). | |
| 29 | |
| 30 =head1 DESCRIPTION | |
| 31 | |
| 32 This object provides the basic mechanism to associate positions | |
| 33 in files with names. The position and filenames are stored in DBM | |
| 34 which can then be accessed later on. It is the equivalent of flat | |
| 35 file indexing (eg, SRS or efetch). | |
| 36 | |
| 37 This object is the guts to the mechanism, which will be used by the | |
| 38 specific objects inheriting from it. | |
| 39 | |
| 40 =head1 FEEDBACK | |
| 41 | |
| 42 =head2 Mailing Lists | |
| 43 | |
| 44 User feedback is an integral part of the evolution of this and other | |
| 45 Bioperl modules. Send your comments and suggestions preferably to one | |
| 46 of the Bioperl mailing lists. Your participation is much appreciated. | |
| 47 | |
| 48 bioperl-l@bioperl.org - General discussion | |
| 49 http://bioperl.org/MailList.shtml - About the mailing lists | |
| 50 | |
| 51 =head2 Reporting Bugs | |
| 52 | |
| 53 Report bugs to the Bioperl bug tracking system to help us keep track | |
| 54 the bugs and their resolution. Bug reports can be submitted via | |
| 55 email or the web: | |
| 56 | |
| 57 bioperl-bugs@bio.perl.org | |
| 58 http://bugzilla.bioperl.org/ | |
| 59 | |
| 60 =head1 AUTHOR - Ewan Birney, James Gilbert | |
| 61 | |
| 62 Email - birney@sanger.ac.uk, jgrg@sanger.ac.uk | |
| 63 | |
| 64 =head1 APPENDIX | |
| 65 | |
| 66 The rest of the documentation details each of the object methods. Internal | |
| 67 methods are usually preceded with an "_" (underscore). | |
| 68 | |
| 69 =cut | |
| 70 | |
| 71 | |
| 72 # Let the code begin... | |
| 73 | |
| 74 package Bio::Index::Abstract; | |
| 75 | |
| 76 use strict; | |
| 77 use Fcntl qw( O_RDWR O_CREAT O_RDONLY ); | |
| 78 use vars qw( $TYPE_AND_VERSION_KEY | |
| 79 @ISA $USE_DBM_TYPE $DB_HASH ); | |
| 80 | |
| 81 # Object preamble - inheriets from Bio::Root::Object | |
| 82 | |
| 83 use Bio::Root::Root; | |
| 84 use Bio::Root::IO; | |
| 85 use Symbol(); | |
| 86 | |
| 87 @ISA = qw(Bio::Root::Root); | |
| 88 | |
| 89 # Generate accessor methods for simple object fields | |
| 90 BEGIN { | |
| 91 foreach my $func (qw(filename write_flag)) { | |
| 92 no strict 'refs'; | |
| 93 my $field = "_$func"; | |
| 94 | |
| 95 *$func = sub { | |
| 96 my( $self, $value ) = @_; | |
| 97 | |
| 98 if (defined $value) { | |
| 99 $self->{$field} = $value; | |
| 100 } | |
| 101 return $self->{$field}; | |
| 102 } | |
| 103 } | |
| 104 } | |
| 105 | |
| 106 =head2 new | |
| 107 | |
| 108 Usage : $index = Bio::Index::Abstract->new( | |
| 109 -filename => $dbm_file, | |
| 110 -write_flag => 0, | |
| 111 -dbm_package => 'DB_File', | |
| 112 -verbose => 0); | |
| 113 Function: Returns a new index object. If filename is | |
| 114 specified, then open_dbm() is immediately called. | |
| 115 Bio::Index::Abstract->new() will usually be called | |
| 116 directly only when opening an existing index. | |
| 117 Returns : A new index object | |
| 118 Args : -filename The name of the dbm index file. | |
| 119 -write_flag TRUE if write access to the dbm file is | |
| 120 needed. | |
| 121 -dbm_package The Perl dbm module to use for the | |
| 122 index. | |
| 123 -verbose Print debugging output to STDERR if | |
| 124 TRUE. | |
| 125 | |
| 126 =cut | |
| 127 | |
| 128 sub new { | |
| 129 my($class, @args) = @_; | |
| 130 my $self = $class->SUPER::new(@args); | |
| 131 my( $filename, $write_flag, $dbm_package, $cachesize, $ffactor ) = | |
| 132 $self->_rearrange([qw(FILENAME | |
| 133 WRITE_FLAG | |
| 134 DBM_PACKAGE | |
| 135 CACHESIZE | |
| 136 FFACTOR | |
| 137 )], @args); | |
| 138 | |
| 139 # Store any parameters passed | |
| 140 $self->filename($filename) if $filename; | |
| 141 $self->cachesize($cachesize) if $cachesize; | |
| 142 $self->ffactor($ffactor) if $ffactor; | |
| 143 $self->write_flag($write_flag) if $write_flag; | |
| 144 $self->dbm_package($dbm_package) if $dbm_package; | |
| 145 | |
| 146 $self->{'_filehandle'} = []; # Array in which to cache SeqIO objects | |
| 147 $self->{'_DB'} = {}; # Gets tied to the DBM file | |
| 148 | |
| 149 # Open database | |
| 150 $self->open_dbm() if $filename; | |
| 151 return $self; | |
| 152 } | |
| 153 | |
| 154 =pod | |
| 155 | |
| 156 =head2 filename | |
| 157 | |
| 158 Title : filename | |
| 159 Usage : $value = $self->filename(); | |
| 160 $self->filename($value); | |
| 161 Function: Gets or sets the name of the dbm index file. | |
| 162 Returns : The current value of filename | |
| 163 Args : Value of filename if setting, or none if | |
| 164 getting the value. | |
| 165 | |
| 166 =head2 write_flag | |
| 167 | |
| 168 Title : write_flag | |
| 169 Usage : $value = $self->write_flag(); | |
| 170 $self->write_flag($value); | |
| 171 Function: Gets or sets the value of write_flag, which | |
| 172 is wether the dbm file should be opened with | |
| 173 write access. | |
| 174 Returns : The current value of write_flag (default 0) | |
| 175 Args : Value of write_flag if setting, or none if | |
| 176 getting the value. | |
| 177 | |
| 178 =head2 dbm_package | |
| 179 | |
| 180 Usage : $value = $self->dbm_package(); | |
| 181 $self->dbm_package($value); | |
| 182 | |
| 183 Function: Gets or sets the name of the Perl dbm module used. | |
| 184 If the value is unset, then it returns the value of | |
| 185 the package variable $USE_DBM_TYPE or if that is | |
| 186 unset, then it chooses the best available dbm type, | |
| 187 choosing 'DB_File' in preference to 'SDBM_File'. | |
| 188 Bio::Abstract::Index may work with other dbm file | |
| 189 types. | |
| 190 | |
| 191 Returns : The current value of dbm_package | |
| 192 Args : Value of dbm_package if setting, or none if | |
| 193 getting the value. | |
| 194 | |
| 195 =cut | |
| 196 | |
| 197 sub dbm_package { | |
| 198 my( $self, $value ) = @_; | |
| 199 my $to_require = 0; | |
| 200 if( $value || ! $self->{'_dbm_package'} ) { | |
| 201 my $type = $value || $USE_DBM_TYPE || 'DB_File'; | |
| 202 if( $type =~ /DB_File/i ) { | |
| 203 eval { | |
| 204 require DB_File; | |
| 205 }; | |
| 206 $type = ( $@ ) ? 'SDBM_File' : 'DB_File'; | |
| 207 } | |
| 208 if( $type ne 'DB_File' ) { | |
| 209 eval { require "$type.pm"; }; | |
| 210 $self->throw($@) if( $@ ); | |
| 211 } | |
| 212 $self->{'_dbm_package'} = $type; | |
| 213 if( ! defined $USE_DBM_TYPE ) { | |
| 214 $USE_DBM_TYPE = $self->{'_dbm_package'}; | |
| 215 } | |
| 216 } | |
| 217 return $self->{'_dbm_package'}; | |
| 218 } | |
| 219 | |
| 220 =head2 db | |
| 221 | |
| 222 Title : db | |
| 223 Usage : $index->db | |
| 224 Function: Returns a ref to the hash which is tied to the dbm | |
| 225 file. Used internally when adding and retrieving | |
| 226 data from the database. | |
| 227 Example : $db = $index->db(); | |
| 228 $db->{ $some_key } = $data | |
| 229 $data = $index->db->{ $some_key }; | |
| 230 Returns : ref to HASH | |
| 231 Args : NONE | |
| 232 | |
| 233 =cut | |
| 234 | |
| 235 sub db { | |
| 236 return $_[0]->{'_DB'}; | |
| 237 } | |
| 238 | |
| 239 | |
| 240 =head2 get_stream | |
| 241 | |
| 242 Title : get_stream | |
| 243 Usage : $stream = $index->get_stream( $id ); | |
| 244 Function: Returns a file handle with the file pointer | |
| 245 at the approprite place | |
| 246 | |
| 247 This provides for a way to get the actual | |
| 248 file contents and not an object | |
| 249 | |
| 250 WARNING: you must parse the record deliminter | |
| 251 *yourself*. Abstract wont do this for you | |
| 252 So this code | |
| 253 | |
| 254 $fh = $index->get_stream($myid); | |
| 255 while( <$fh> ) { | |
| 256 # do something | |
| 257 } | |
| 258 will parse the entire file if you don't put in | |
| 259 a last statement in, like | |
| 260 | |
| 261 while( <$fh> ) { | |
| 262 /^\/\// && last; # end of record | |
| 263 # do something | |
| 264 } | |
| 265 | |
| 266 Returns : A filehandle object | |
| 267 Args : string represents the accession number | |
| 268 Notes : This method should not be used without forethought | |
| 269 | |
| 270 =cut | |
| 271 | |
| 272 #' | |
| 273 | |
| 274 sub get_stream { | |
| 275 my ($self,$id) = @_; | |
| 276 | |
| 277 my ($desc,$acc,$out); | |
| 278 my $db = $self->db(); | |
| 279 | |
| 280 if (my $rec = $db->{ $id }) { | |
| 281 my( @record ); | |
| 282 | |
| 283 my ($file, $begin, $end) = $self->unpack_record( $rec ); | |
| 284 | |
| 285 # Get the (possibly cached) filehandle | |
| 286 my $fh = $self->_file_handle( $file ); | |
| 287 | |
| 288 # move to start | |
| 289 seek($fh, $begin, 0); | |
| 290 | |
| 291 return $fh; | |
| 292 } | |
| 293 else { | |
| 294 $self->throw("Unable to find a record for $id in the flat file index"); | |
| 295 } | |
| 296 } | |
| 297 | |
| 298 | |
| 299 =head2 cachesize | |
| 300 | |
| 301 Usage : $index->cachesize(1000000) | |
| 302 Function: Sets the dbm file cache size for the index. | |
| 303 Needs to be set before the DBM file gets opened. | |
| 304 Example : $index->cachesize(1000000) | |
| 305 Returns : size of the curent cache | |
| 306 | |
| 307 =cut | |
| 308 | |
| 309 sub cachesize { | |
| 310 my( $self, $size ) = @_; | |
| 311 | |
| 312 if(defined $size){ | |
| 313 $self->{'_cachesize'} = $size; | |
| 314 } | |
| 315 return ( $self->{'_cachesize'} ); | |
| 316 | |
| 317 } | |
| 318 | |
| 319 | |
| 320 =head2 ffactor | |
| 321 | |
| 322 Usage : $index->ffactor(1000000) | |
| 323 Function: Sets the dbm file fill factor. | |
| 324 Needs to be set before the DBM file gets opened. | |
| 325 | |
| 326 Example : $index->ffactor(1000000) | |
| 327 Returns : size of the curent cache | |
| 328 | |
| 329 =cut | |
| 330 | |
| 331 sub ffactor { | |
| 332 my( $self, $size ) = @_; | |
| 333 | |
| 334 if(defined $size){ | |
| 335 $self->{'_ffactor'} = $size; | |
| 336 } | |
| 337 return ( $self->{'_ffactor'} ); | |
| 338 | |
| 339 } | |
| 340 | |
| 341 | |
| 342 =head2 open_dbm | |
| 343 | |
| 344 Usage : $index->open_dbm() | |
| 345 Function: Opens the dbm file associated with the index | |
| 346 object. Write access is only given if explicitly | |
| 347 asked for by calling new(-write => 1) or having set | |
| 348 the write_flag(1) on the index object. The type of | |
| 349 dbm file opened is that returned by dbm_package(). | |
| 350 The name of the file to be is opened is obtained by | |
| 351 calling the filename() method. | |
| 352 | |
| 353 Example : $index->_open_dbm() | |
| 354 Returns : 1 on success | |
| 355 | |
| 356 =cut | |
| 357 | |
| 358 sub open_dbm { | |
| 359 my( $self ) = @_; | |
| 360 | |
| 361 my $filename = $self->filename() | |
| 362 or $self->throw("filename() not set"); | |
| 363 | |
| 364 my $db = $self->db(); | |
| 365 | |
| 366 # Close the dbm file if already open (maybe we're getting | |
| 367 # or dropping write access | |
| 368 if (ref($db) ne 'HASH') { | |
| 369 untie($db); | |
| 370 } | |
| 371 | |
| 372 # What kind of DBM file are we going to open? | |
| 373 my $dbm_type = $self->dbm_package; | |
| 374 | |
| 375 # Choose mode for opening dbm file (read/write+create or read-only). | |
| 376 my $mode_flags = $self->write_flag ? O_RDWR|O_CREAT : O_RDONLY; | |
| 377 | |
| 378 # Open the dbm file | |
| 379 if ($dbm_type eq 'DB_File') { | |
| 380 my $hash_inf = DB_File::HASHINFO->new(); | |
| 381 my $cache = $self->cachesize(); | |
| 382 my $ffactor = $self->ffactor(); | |
| 383 if ($cache){ | |
| 384 $hash_inf->{'cachesize'} = $cache; | |
| 385 } | |
| 386 if ($ffactor){ | |
| 387 $hash_inf->{'ffactor'} = $ffactor; | |
| 388 } | |
| 389 tie( %$db, $dbm_type, $filename, $mode_flags, 0644, $hash_inf ) | |
| 390 or $self->throw("Can't open '$dbm_type' dbm file '$filename' : $!"); | |
| 391 } else { | |
| 392 tie( %$db, $dbm_type, $filename, $mode_flags, 0644 ) | |
| 393 or $self->throw("Can't open '$dbm_type' dbm file '$filename' : $!"); | |
| 394 } | |
| 395 | |
| 396 # The following methods access data in the dbm file: | |
| 397 | |
| 398 # Now, if we're a Bio::Index::Abstract caterpillar, then we | |
| 399 # transform ourselves into a Bio::Index::<something> butterfly! | |
| 400 if( ref($self) eq "Bio::Index::Abstract" ) { | |
| 401 my $pkg = $self->_code_base(); | |
| 402 bless $self, $pkg; | |
| 403 } | |
| 404 | |
| 405 # Check or set this is the right kind and version of index | |
| 406 $self->_type_and_version(); | |
| 407 | |
| 408 # Check files haven't changed size since they were indexed | |
| 409 $self->_check_file_sizes(); | |
| 410 | |
| 411 return 1; | |
| 412 } | |
| 413 | |
| 414 =head2 _version | |
| 415 | |
| 416 Title : _version | |
| 417 Usage : $type = $index->_version() | |
| 418 Function: Returns a string which identifes the version of an | |
| 419 index module. Used to permanently identify an index | |
| 420 file as having been created by a particular version | |
| 421 of the index module. Must be provided by the sub class | |
| 422 Example : | |
| 423 Returns : | |
| 424 Args : none | |
| 425 | |
| 426 =cut | |
| 427 | |
| 428 sub _version { | |
| 429 my $self = shift; | |
| 430 | |
| 431 $self->throw("In Bio::Index::Abstract, no _version method in sub class"); | |
| 432 } | |
| 433 | |
| 434 =head2 _code_base | |
| 435 | |
| 436 Title : _code_base | |
| 437 Usage : $code = $db->_code_base(); | |
| 438 Function: | |
| 439 Example : | |
| 440 Returns : Code package to be used with this | |
| 441 Args : | |
| 442 | |
| 443 | |
| 444 =cut | |
| 445 | |
| 446 sub _code_base { | |
| 447 my ($self) = @_; | |
| 448 my $code_key = '__TYPE_AND_VERSION'; | |
| 449 my $record; | |
| 450 | |
| 451 $record = $self->db->{$code_key}; | |
| 452 | |
| 453 my($code,$version) = $self->unpack_record($record); | |
| 454 if( wantarray ) { | |
| 455 return ($code,$version); | |
| 456 } else { | |
| 457 return $code; | |
| 458 } | |
| 459 } | |
| 460 | |
| 461 | |
| 462 =head2 _type_and_version | |
| 463 | |
| 464 Title : _type_and_version | |
| 465 Usage : Called by _initalize | |
| 466 Function: Checks that the index opened is made by the same index | |
| 467 module and version of that module that made it. If the | |
| 468 index is empty, then it adds the information to the | |
| 469 database. | |
| 470 Example : | |
| 471 Returns : 1 or exception | |
| 472 Args : none | |
| 473 | |
| 474 =cut | |
| 475 | |
| 476 sub _type_and_version { | |
| 477 my $self = shift; | |
| 478 my $key = '__TYPE_AND_VERSION'; | |
| 479 my $version = $self->_version(); | |
| 480 my $type = ref $self; | |
| 481 | |
| 482 # Run check or add type and version key if missing | |
| 483 if (my $rec = $self->db->{ $key }) { | |
| 484 my( $db_type, $db_version ) = $self->unpack_record($rec); | |
| 485 $self->throw("This index file is from version [$db_version] - You need to rebuild it to use module version [$version]") | |
| 486 unless $db_version == $version; | |
| 487 $self->throw("This index file is type [$db_type] - Can't access it with module for [$type]") | |
| 488 unless $db_type eq $type; | |
| 489 } else { | |
| 490 $self->add_record( $key, $type, $version ) | |
| 491 or $self->throw("Can't add Type and Version record"); | |
| 492 } | |
| 493 return 1; | |
| 494 } | |
| 495 | |
| 496 | |
| 497 =head2 _check_file_sizes | |
| 498 | |
| 499 Title : _check_file_sizes | |
| 500 Usage : $index->_check_file_sizes() | |
| 501 Function: Verifies that the files listed in the database | |
| 502 are the same size as when the database was built, | |
| 503 or throws an exception. Called by the new() | |
| 504 function. | |
| 505 Example : | |
| 506 Returns : 1 or exception | |
| 507 Args : | |
| 508 | |
| 509 =cut | |
| 510 | |
| 511 sub _check_file_sizes { | |
| 512 my $self = shift; | |
| 513 my $num = $self->_file_count() || 0; | |
| 514 | |
| 515 for (my $i = 0; $i < $num; $i++) { | |
| 516 my( $file, $stored_size ) = $self->unpack_record( $self->db->{"__FILE_$i"} ); | |
| 517 my $size = -s $file; | |
| 518 unless ($size == $stored_size) { | |
| 519 $self->throw("file $i [ $file ] has changed size $stored_size -> $size. This probably means you need to rebuild the index."); | |
| 520 } | |
| 521 } | |
| 522 return 1; | |
| 523 } | |
| 524 | |
| 525 | |
| 526 =head2 make_index | |
| 527 | |
| 528 Title : make_index | |
| 529 Usage : $index->make_index( FILE_LIST ) | |
| 530 Function: Takes a list of file names, checks that they are | |
| 531 all fully qualified, and then calls _filename() on | |
| 532 each. It supplies _filename() with the name of the | |
| 533 file, and an integer which is stored with each record | |
| 534 created by _filename(). Can be called multiple times, | |
| 535 and can be used to add to an existing index file. | |
| 536 Example : $index->make_index( '/home/seqs1', '/home/seqs2', '/nfs/pub/big_db' ); | |
| 537 Returns : Number of files indexed | |
| 538 Args : LIST OF FILES | |
| 539 | |
| 540 =cut | |
| 541 | |
| 542 sub make_index { | |
| 543 my($self, @files) = @_; | |
| 544 my $count = 0; | |
| 545 my $recs = 0; | |
| 546 # blow up if write flag is not set. EB fix | |
| 547 | |
| 548 if( !defined $self->write_flag ) { | |
| 549 $self->throw("Attempting to make an index on a read-only database. What about a WRITE flag on opening the index?"); | |
| 550 } | |
| 551 | |
| 552 # We're really fussy/lazy, expecting all file names to be fully qualified | |
| 553 $self->throw("No files to index provided") unless @files; | |
| 554 for(my $i=0;$i<scalar @files; $i++) { | |
| 555 if( $Bio::Root::IO::FILESPECLOADED && File::Spec->can('rel2abs') ) { | |
| 556 if( ! File::Spec->file_name_is_absolute($files[$i]) ) { | |
| 557 $files[$i] = File::Spec->rel2abs($files[$i]); | |
| 558 } | |
| 559 } else { | |
| 560 if( $^O =~ /MSWin/i ) { | |
| 561 ($files[$i] =~ m|^[A-Za-z]:/|) || | |
| 562 $self->throw("Not an absolute file path '$files[$i]'"); | |
| 563 } else { | |
| 564 ($files[$i] =~ m|^/|) || | |
| 565 $self->throw("Not an absolute file path '$files[$i]'"); | |
| 566 } | |
| 567 } | |
| 568 $self->throw("File does not exist '$files[$i]'") unless -e $files[$i]; | |
| 569 } | |
| 570 | |
| 571 # Add each file to the index | |
| 572 FILE : | |
| 573 foreach my $file (@files) { | |
| 574 | |
| 575 my $i; # index for this file | |
| 576 | |
| 577 # Get new index for this file and increment file count | |
| 578 if ( defined(my $count = $self->_file_count) ) { | |
| 579 $i = $count; | |
| 580 } else { | |
| 581 $i = 0; $self->_file_count(0); | |
| 582 } | |
| 583 | |
| 584 # see whether this file has been already indexed | |
| 585 my ($record,$number,$size); | |
| 586 | |
| 587 if( ($record = $self->db->{"__FILENAME_$file"}) ) { | |
| 588 ($number,$size) = $self->unpack_record($record); | |
| 589 | |
| 590 # if it is the same size - fine. Otherwise die | |
| 591 if( -s $file == $size ) { | |
| 592 warn "File $file already indexed. Skipping...\n"; | |
| 593 next FILE; | |
| 594 } else { | |
| 595 $self->throw("In index, $file has changed size ($size). Indicates that the index is out of date"); | |
| 596 } | |
| 597 } | |
| 598 | |
| 599 # index this file | |
| 600 warn "Indexing file $file\n" if( $self->verbose > 0); | |
| 601 | |
| 602 # this is supplied by the subclass and does the serious work | |
| 603 $recs += $self->_index_file( $file, $i ); # Specific method for each type of index | |
| 604 | |
| 605 | |
| 606 # Save file name and size for this index | |
| 607 $self->add_record("__FILE_$i", $file, -s $file) | |
| 608 or $self->throw("Can't add data to file: $file"); | |
| 609 $self->add_record("__FILENAME_$file", $i, -s $file) | |
| 610 or $self->throw("Can't add data to file: $file"); | |
| 611 | |
| 612 # increment file lines | |
| 613 $i++; $self->_file_count($i); | |
| 614 my $temp; | |
| 615 $temp = $self->_file_count(); | |
| 616 | |
| 617 | |
| 618 } | |
| 619 return ($count, $recs); | |
| 620 } | |
| 621 | |
| 622 =head2 _filename | |
| 623 | |
| 624 Title : _filename | |
| 625 Usage : $index->_filename( FILE INT ) | |
| 626 Function: Indexes the file | |
| 627 Example : | |
| 628 Returns : | |
| 629 Args : | |
| 630 | |
| 631 =cut | |
| 632 | |
| 633 sub _index_file { | |
| 634 my $self = shift; | |
| 635 | |
| 636 my $pkg = ref($self); | |
| 637 $self->throw("Error: '$pkg' does not provide the _index_file() method"); | |
| 638 } | |
| 639 | |
| 640 | |
| 641 | |
| 642 =head2 _file_handle | |
| 643 | |
| 644 Title : _file_handle | |
| 645 Usage : $fh = $index->_file_handle( INT ) | |
| 646 Function: Returns an open filehandle for the file | |
| 647 index INT. On opening a new filehandle it | |
| 648 caches it in the @{$index->_filehandle} array. | |
| 649 If the requested filehandle is already open, | |
| 650 it simply returns it from the array. | |
| 651 Example : $fist_file_indexed = $index->_file_handle( 0 ); | |
| 652 Returns : ref to a filehandle | |
| 653 Args : INT | |
| 654 | |
| 655 =cut | |
| 656 | |
| 657 sub _file_handle { | |
| 658 my( $self, $i ) = @_; | |
| 659 | |
| 660 unless ($self->{'_filehandle'}[$i]) { | |
| 661 my $fh = Symbol::gensym(); | |
| 662 my @rec = $self->unpack_record($self->db->{"__FILE_$i"}) | |
| 663 or $self->throw("Can't get filename for index : $i"); | |
| 664 my $file = $rec[0]; | |
| 665 open $fh, $file or $self->throw("Can't read file '$file' : $!"); | |
| 666 $self->{'_filehandle'}[$i] = $fh; # Cache filehandle | |
| 667 } | |
| 668 return $self->{'_filehandle'}[$i]; | |
| 669 } | |
| 670 | |
| 671 | |
| 672 =head2 _file_count | |
| 673 | |
| 674 Title : _file_count | |
| 675 Usage : $index->_file_count( INT ) | |
| 676 Function: Used by the index building sub in a sub class to | |
| 677 track the number of files indexed. Sets or gets | |
| 678 the number of files indexed when called with or | |
| 679 without an argument. | |
| 680 Example : | |
| 681 Returns : INT | |
| 682 Args : INT | |
| 683 | |
| 684 =cut | |
| 685 | |
| 686 sub _file_count { | |
| 687 my $self = shift; | |
| 688 if (@_) { | |
| 689 $self->db->{'__FILE_COUNT'} = shift; | |
| 690 } | |
| 691 return $self->db->{'__FILE_COUNT'}; | |
| 692 } | |
| 693 | |
| 694 | |
| 695 =head2 add_record | |
| 696 | |
| 697 Title : add_record | |
| 698 Usage : $index->add_record( $id, @stuff ); | |
| 699 Function: Calls pack_record on @stuff, and adds the result | |
| 700 of pack_record to the index database under key $id. | |
| 701 If $id is a reference to an array, then a new entry | |
| 702 is added under a key corresponding to each element | |
| 703 of the array. | |
| 704 Example : $index->add_record( $id, $fileNumber, $begin, $end ) | |
| 705 Returns : TRUE on success or FALSE on failure | |
| 706 Args : ID LIST | |
| 707 | |
| 708 =cut | |
| 709 | |
| 710 sub add_record { | |
| 711 my( $self, $id, @rec ) = @_; | |
| 712 $self->debug( "Adding key $id\n") if( $self->verbose > 0 ); | |
| 713 $self->db->{$id} = $self->pack_record( @rec ); | |
| 714 return 1; | |
| 715 } | |
| 716 | |
| 717 | |
| 718 =head2 pack_record | |
| 719 | |
| 720 Title : pack_record | |
| 721 Usage : $packed_string = $index->pack_record( LIST ) | |
| 722 Function: Packs an array of scalars into a single string | |
| 723 joined by ASCII 034 (which is unlikely to be used | |
| 724 in any of the strings), and returns it. | |
| 725 Example : $packed_string = $index->pack_record( $fileNumber, $begin, $end ) | |
| 726 Returns : STRING or undef | |
| 727 Args : LIST | |
| 728 | |
| 729 =cut | |
| 730 | |
| 731 sub pack_record { | |
| 732 my( $self, @args ) = @_; | |
| 733 return join "\034", @args; | |
| 734 } | |
| 735 | |
| 736 =head2 unpack_record | |
| 737 | |
| 738 Title : unpack_record | |
| 739 Usage : $index->unpack_record( STRING ) | |
| 740 Function: Splits the sting provided into an array, | |
| 741 splitting on ASCII 034. | |
| 742 Example : ( $fileNumber, $begin, $end ) = $index->unpack_record( $self->db->{$id} ) | |
| 743 Returns : A 3 element ARRAY | |
| 744 Args : STRING containing ASCII 034 | |
| 745 | |
| 746 =cut | |
| 747 | |
| 748 sub unpack_record { | |
| 749 my( $self, @args ) = @_; | |
| 750 return split /\034/, $args[0]; | |
| 751 } | |
| 752 | |
| 753 =head2 count_records | |
| 754 | |
| 755 Title : count_records | |
| 756 Usage : $recs = $seqdb->count_records() | |
| 757 Function: return count of all recs in the index | |
| 758 Example : | |
| 759 Returns : a scalar | |
| 760 Args : none | |
| 761 | |
| 762 | |
| 763 =cut | |
| 764 | |
| 765 sub count_records { | |
| 766 my ($self,@args) = @_; | |
| 767 my $db = $self->db; | |
| 768 my $c = 0; | |
| 769 while (my($id, $rec) = each %$db) { | |
| 770 if( $id =~ /^__/ ) { | |
| 771 # internal info | |
| 772 next; | |
| 773 } | |
| 774 $c++; | |
| 775 } | |
| 776 | |
| 777 return ($c); | |
| 778 } | |
| 779 | |
| 780 | |
| 781 =head2 DESTROY | |
| 782 | |
| 783 Title : DESTROY | |
| 784 Usage : Called automatically when index goes out of scope | |
| 785 Function: Closes connection to database and handles to | |
| 786 sequence files | |
| 787 Returns : NEVER | |
| 788 Args : NONE | |
| 789 | |
| 790 | |
| 791 =cut | |
| 792 | |
| 793 sub DESTROY { | |
| 794 my $self = shift; | |
| 795 untie($self->{'_DB'}); | |
| 796 } | |
| 797 | |
| 798 1; |
