1 package FileIO::FingerprintsSDFileIO; 2 # 3 # $RCSfile: FingerprintsSDFileIO.pm,v $ 4 # $Date: 2015/02/28 20:48:43 $ 5 # $Revision: 1.18 $ 6 # 7 # Author: Manish Sud <msud@san.rr.com> 8 # 9 # Copyright (C) 2015 Manish Sud. All rights reserved. 10 # 11 # This file is part of MayaChemTools. 12 # 13 # MayaChemTools is free software; you can redistribute it and/or modify it under 14 # the terms of the GNU Lesser General Public License as published by the Free 15 # Software Foundation; either version 3 of the License, or (at your option) any 16 # later version. 17 # 18 # MayaChemTools is distributed in the hope that it will be useful, but without 19 # any warranty; without even the implied warranty of merchantability of fitness 20 # for a particular purpose. See the GNU Lesser General Public License for more 21 # details. 22 # 23 # You should have received a copy of the GNU Lesser General Public License 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 26 # Boston, MA, 02111-1307, USA. 27 # 28 29 use strict; 30 use Carp; 31 use Exporter; 32 use Scalar::Util (); 33 use TextUtil (); 34 use FileUtil (); 35 use SDFileUtil (); 36 use Fingerprints::FingerprintsStringUtil (); 37 use FileIO::FileIO; 38 39 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 40 41 @ISA = qw(FileIO::FileIO Exporter); 42 @EXPORT = qw(); 43 @EXPORT_OK = qw(IsFingerprintsSDFile); 44 45 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); 46 47 # Setup class variables... 48 my($ClassName); 49 _InitializeClass(); 50 51 # Class constructor... 52 sub new { 53 my($Class, %NamesAndValues) = @_; 54 55 # Initialize object... 56 my $This = $Class->SUPER::new(); 57 bless $This, ref($Class) || $Class; 58 $This->_InitializeFingerprintsSDFileIO(); 59 60 $This->_InitializeFingerprintsSDFileIOProperties(%NamesAndValues); 61 62 return $This; 63 } 64 65 # Initialize object data... 66 # 67 sub _InitializeFingerprintsSDFileIO { 68 my($This) = @_; 69 70 # Fingerprints string data format during read/write... 71 # 72 # For file read: 73 # 74 # AutoDetect - automatically detect format of fingerprints string 75 # FingerprintsBitVectorString - Bit vector fingerprints string format 76 # FingerprintsVectorString - Vector fingerprints string format 77 # 78 # Default value: AutoDetect 79 # 80 # For file write: 81 # 82 # FingerprintsBitVectorString - Bit vector fingerprints string format 83 # FingerprintsVectorString - Vector fingerprints string format 84 # 85 # Default value: undef 86 # 87 $This->{FingerprintsStringMode} = undef; 88 89 # For file read: 90 # 91 # o Fingerprints bit-vector and vector object for current fingerprints string 92 # 93 # For file write: 94 # 95 # o Fingerprints bit-vector and vector object for current fingerprints string 96 # o Any supported fingerprints object: PathLengthFingerprints, ExtendedConnectivity, and so on. 97 # 98 $This->{FingerprintsObject} = undef; 99 100 # Fingerprints SD file data field label during read/write 101 # 102 # For file read: 103 # 104 # Value of AutoDetect implies use first data field containing the word Fingerprints in its 105 # data field label to retrieve fingerprints string data. Othwewise, a valid data field name 106 # must be specified. 107 # 108 # For file write: 109 # 110 # Data field label to use for writing fingerprints string. Default: Fingerprints 111 # 112 $This->{FingerprintsFieldLabel} = undef; 113 114 # Fingepritns string for current line during read/write... 115 $This->{FingerprintsString} = undef; 116 117 # First compound data string read/write... 118 $This->{FirstCompoundDataIO} = 1; 119 120 # Current fingerprints string data compound number during read/write... 121 $This->{CompoundNum} = 0; 122 123 # Compound data string during read/write... 124 $This->{CompoundString} = undef; 125 126 # Initialize parameters for read... 127 $This->_InitializeFingerprintsSDFileIORead(); 128 129 # Initialize parameters for write... 130 $This->_InitializeFingerprintsSDFileIOWrite(); 131 132 return $This; 133 } 134 135 # Initialize class ... 136 sub _InitializeClass { 137 #Class name... 138 $ClassName = __PACKAGE__; 139 140 } 141 142 # Initialize object data for reading fingerprints SD file... 143 # 144 sub _InitializeFingerprintsSDFileIORead { 145 my($This) = @_; 146 147 # Compound ID mode to use for retrieving compound IDs for fingerprints... 148 # 149 # Specify how to generate compound IDs: use a SD file datafield value; use molname line from 150 # SD file; generate a sequential ID with specific prefix; use combination of both MolName and 151 # LabelPrefix with usage of LabelPrefix values for empty molname lines. 152 # 153 # Possible values: DataField, MolName, LabelPrefix, or MolNameOrLabelPrefix. Default: LabelPrefix. 154 # 155 # For MolNameAndLabelPrefix value of CompoundIDMode, molname line in SD file takes precedence over 156 # sequential compound IDs generated using LabelPrefix and only empty molname values are replaced 157 # with sequential compound IDs. 158 # 159 $This->{CompoundIDMode} = 'LabelPrefix'; 160 161 # 162 # Compound ID data field label name whose value is used as compound ID during DatafField value of 163 # CompoundIDMode 164 # 165 $This->{CompoundIDFieldLabel} = undef; 166 167 # A prefix string used for generating compound IDs like LabelPrefixString<Number> during LabelPrefix 168 # or MolNameOrLabelPrefix value of CompoundIDMode. Default value, Cmpd, generates compound IDs 169 # which look like Cmpd<Number>. 170 # 171 $This->{CompoundIDPrefix} = 'Cmpd'; 172 173 # By default, the fingerprints data corresponding to FingerprintsCol is assumed to 174 # be valid and no validation is performed before generating fingerprints objects... 175 # 176 $This->{ValidateData} = 1; 177 178 # Level of detail to print during validation of data for invalid or missing data... 179 $This->{DetailLevel} = 1; 180 181 # Number of missing and invalid fingerprints string data compound strings... 182 $This->{NumOfCmpdsWithMissingData} = 0; 183 $This->{NumOfCmpdsWithInvalidData} = 0; 184 185 # Compound ID for current fingerprints string... 186 $This->{CompoundID} = undef; 187 188 # Compound data field labels and values map for current compound data... 189 %{$This->{DataFieldLabelsAndValues}} = (); 190 191 # Status of data in fingerprints SD file... 192 $This->{ValidFileData} = 0; 193 194 $This->{ValidCompoundIDField} = 0; 195 $This->{ValidFingerprintsField} = 0; 196 197 $This->{ValidFingerprintsStringMode} = 0; 198 199 return $This; 200 } 201 202 # Initialize object data for writing fingerprints SD file... 203 # 204 sub _InitializeFingerprintsSDFileIOWrite { 205 my($This) = @_; 206 207 # Fingerprints bit vector string format... 208 # 209 # Possible values: BinaryString or HexadecimalString [Default] 210 # 211 # Default BitStringFormat is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultBitStringFormat. 212 # 213 $This->{BitStringFormat} = undef; 214 215 # Bits order in fingerprints bit vector string... 216 # 217 # Ascending - First bit in each byte as the lowest bit [Default] 218 # Descending - First bit in each byte as the highest bit 219 # 220 # Default BitsOrder is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultBitsOrder. 221 # 222 $This->{BitsOrder} = undef; 223 224 # Fingerprints vector string format... 225 # 226 # Possible values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, ValuesString 227 # 228 # Default VectorStringFormat is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultVectorStringFormat. 229 # For fingerprints vector object containing vector NumericalValues, it corresponds to IDsAndValuesString; othwerwise, 230 # it's set to ValuesString. 231 # 232 $This->{VectorStringFormat} = undef; 233 234 # Overwriting existing file... 235 $This->{Overwrite} = 0; 236 237 return $This; 238 } 239 240 # Initialize object values... 241 sub _InitializeFingerprintsSDFileIOProperties { 242 my($This, %NamesAndValues) = @_; 243 244 # All other property names and values along with all Set/Get<PropertyName> methods 245 # are implemented on-demand using ObjectProperty class. 246 247 my($Name, $Value, $MethodName); 248 while (($Name, $Value) = each %NamesAndValues) { 249 $MethodName = "Set${Name}"; 250 $This->$MethodName($Value); 251 } 252 253 if (!exists $NamesAndValues{Name}) { 254 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying file name..."; 255 } 256 257 # Make sure it's a fingerprints file... 258 $Name = $NamesAndValues{Name}; 259 if (!$This->IsFingerprintsSDFile($Name)) { 260 croak "Error: ${ClassName}->New: Object can't be instantiated: File, $Name, doesn't appear to be fingerprints format..."; 261 } 262 263 if ($This->GetMode() =~ /^Read$/i) { 264 $This->_InitializeFingerprintsSDFileIOReadProperties(%NamesAndValues); 265 } 266 elsif ($This->GetMode() =~ /^(Write|Append)$/i) { 267 $This->_InitializeFingerprintsSDFileIOWriteProperties(%NamesAndValues); 268 } 269 270 return $This; 271 } 272 273 # Initialize object properties for reading fingerprints SD file... 274 # 275 sub _InitializeFingerprintsSDFileIOReadProperties { 276 my($This, %NamesAndValues) = @_; 277 278 # Set default value for FingerprintsStringMode... 279 if (!$This->{FingerprintsStringMode}) { 280 $This->{FingerprintsStringMode} = 'AutoDetect'; 281 } 282 283 # Set default value for FingerprintsFieldLabel... 284 if (!$This->{FingerprintsFieldLabel}) { 285 $This->{FingerprintsFieldLabel} = 'AutoDetect'; 286 } 287 288 # Check compound ID data field... 289 if (($This->{CompoundIDMode} =~ /^DataField$/i) && (!defined($This->{CompoundIDFieldLabel}))) { 290 croak "Error: ${ClassName}->: Object can't be instantiated: Compound ID data field lable must be specifed using \"CompoundIDFieldLabel\" during \"DataField\" value of \"CompoundIDMode\"..."; 291 } 292 293 $This->_PrepareForReadingFingerprintsSDFileData(); 294 295 return $This; 296 } 297 298 # Initialize object properties for writing fingerprints SD file... 299 # 300 sub _InitializeFingerprintsSDFileIOWriteProperties { 301 my($This, %NamesAndValues) = @_; 302 303 # Check FingerprintsStringMode value... 304 if (!exists $NamesAndValues{FingerprintsStringMode}) { 305 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying FingerprintsStringMode..."; 306 } 307 308 if ($This->{FingerprintsStringMode} !~ /^(FingerprintsBitVectorString|FingerprintsVectorString)$/i) { 309 croak "Error: ${ClassName}->: Object can't be instantiated: FingerprintsStringMode value, $This->{FingerprintsStringMode}, is not valid; Supported values for write/append: FingerprintsBitVectorString or FingerprintsVectorString..."; 310 } 311 312 # Set default value for FingerprintsFieldLabel... 313 if (!$This->{FingerprintsFieldLabel}) { 314 $This->{FingerprintsFieldLabel} = 'Fingerprints'; 315 } 316 317 $This->_PrepareForWritingFingerprintsSDFileData(); 318 319 return $This; 320 } 321 322 # Set FingerprintsStringMode... 323 # 324 sub SetFingerprintsStringMode { 325 my($This, $Value) = @_; 326 327 # AutoDetect - automatically detect format of fingerprints string 328 # FingerprintsBitVectorString - Bit vector fingerprints string format 329 # FingerprintsVectorString - Vector fingerprints string format 330 331 if ($Value !~ /^(AutoDetect|FingerprintsBitVectorString|FingerprintsVectorString)$/i) { 332 croak "Error: ${ClassName}->SetFingerprintsStringMode: FingerprintsStringMode value, $Value, is not valid; Supported values: AutoDetect, FingerprintsBitVectorString or FingerprintsVectorString..."; 333 } 334 335 $This->{FingerprintsStringMode} = $Value; 336 337 return $This; 338 } 339 340 # Set CompoundIDMode... 341 # 342 sub SetCompoundIDMode { 343 my($This, $Value) = @_; 344 345 if ($Value !~ /^(DataField|MolName|LabelPrefix|MolNameOrLabelPrefix)$/i) { 346 croak "Error: ${ClassName}->SetFingerprintsStringMode: CompoundIDMode value, $Value, is not valid; Supported values: DataField, MolName, LabelPrefix, or MolNameOrLabelPrefix..."; 347 } 348 349 $This->{CompoundIDMode} = $Value; 350 351 return $This; 352 } 353 354 # Set DetailLevel... 355 # 356 sub SetDetailLevel { 357 my($This, $Value) = @_; 358 359 if (!TextUtil::IsPositiveInteger($Value)) { 360 croak "Error: ${ClassName}->SetDetailLevel: DetailLevel value, $Value, is not valid; Supported values: > 0..."; 361 } 362 363 $This->{DetailLevel} = $Value; 364 365 return $This; 366 } 367 368 # Set BitStringFormat... 369 # 370 sub SetBitStringFormat { 371 my($This, $Value) = @_; 372 373 if ($Value !~ /^(BinaryString|HexadecimalString)$/i) { 374 croak "Error: ${ClassName}->SetBitStringFormat: BitStringFormat value, $Value, is not valid; Supported values: BinaryString or HexadecimalString..."; 375 } 376 377 $This->{BitStringFormat} = $Value; 378 379 return $This; 380 } 381 382 # Set BitsOrder... 383 # 384 sub SetBitsOrder { 385 my($This, $Value) = @_; 386 387 # Ascending - First bit in each byte as the lowest bit 388 # Descending - First bit in each byte as the highest bit 389 # 390 if ($Value !~ /^(Ascending|Descending)$/i) { 391 croak "Error: ${ClassName}->SetBitsOrder: FingerprintsStringMode value, $Value, is not valid; Supported values: Ascending or Descending..."; 392 } 393 394 $This->{BitsOrder} = $Value; 395 396 return $This; 397 } 398 399 # Set VectorStringFormat... 400 # 401 sub SetVectorStringFormat { 402 my($This, $Value) = @_; 403 404 # Possible values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, ValuesString 405 406 if ($Value !~ /^(IDsAndValuesString|IDsAndValuesPairsString|ValuesAndIDsString|ValuesAndIDsPairsString|ValuesString)$/i) { 407 croak "Error: ${ClassName}->SetVectorStringFormat: FingerprintsStringMode value, $Value, is not valid; Supported values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, or ValuesString..."; 408 } 409 410 $This->{VectorStringFormat} = $Value; 411 412 return $This; 413 } 414 415 # Get compound string for current compound with optional removal of fingerprints data.. 416 # 417 sub GetCompoundString { 418 my($This, $RemoveFingerprintsData) = @_; 419 420 $RemoveFingerprintsData = defined $RemoveFingerprintsData ? $RemoveFingerprintsData : 0; 421 422 if ($RemoveFingerprintsData && $This->_IsFingerprintsDataPresentInCompoundString()) { 423 return SDFileUtil::RemoveCmpdDataHeaderLabelAndValue($This->{CompoundString}, $This->{FingerprintsFieldLabel}); 424 } 425 426 return $This->{CompoundString}; 427 } 428 429 # Set compound string for current compound.. 430 # 431 sub SetCompoundString { 432 my($This, $CompoundString) = @_; 433 434 $This->{CompoundString} = $CompoundString; 435 436 return $This; 437 } 438 439 # Get fingerprints object for current compound using fingerprints, fingerprints bit-vector 440 # fingerprints vector object. Fingerprints object correspond to any of supported fingerprints 441 # objects such as PathLengthFingerprints, ExtendedConnectivity, and so on. 442 # 443 sub GetFingerprints { 444 my($This) = @_; 445 446 return $This->{FingerprintsObject}; 447 } 448 449 # Set fingerprints object for current compound... 450 # 451 sub SetFingerprints { 452 my($This, $FingerprintsObject) = @_; 453 454 $This->{FingerprintsObject} = $FingerprintsObject; 455 456 return $This; 457 } 458 459 # Get fingerprints string for current compound... 460 # 461 sub GetFingerprintsString { 462 my($This) = @_; 463 464 return $This->{FingerprintsString} ? $This->{FingerprintsString} : 'None'; 465 } 466 467 # Set fingerprints string for current compound... 468 # 469 sub SetFingerprintsString { 470 my($This, $FingerprintsString) = @_; 471 472 $This->{FingerprintsString} = $FingerprintsString; 473 474 return $This; 475 } 476 477 # Does fingerprints SD file contain valid data? 478 # 479 sub IsFingerprintsFileDataValid { 480 my($This) = @_; 481 482 return $This->{ValidFileData} ? 1 : 0; 483 } 484 485 # Does current compound contains valid fingerprints object data? 486 # 487 sub IsFingerprintsDataValid { 488 my($This) = @_; 489 490 return defined $This->{FingerprintsObject} ? 1 : 0; 491 } 492 493 # Read next available compound data string, process it and generate appropriate fingerprints 494 # objects... 495 # 496 sub Read { 497 my($This) = @_; 498 499 # Read compound data string... 500 if (!$This->_ReadCompoundDataString()) { 501 return undef; 502 } 503 504 # No need to process invalid SD file with invalid data... 505 if (!$This->{ValidFileData}) { 506 if ($This->{ValidateData}) { 507 $This->{NumOfCmpdsWithMissingData} += 1; 508 } 509 return $This; 510 } 511 512 # Perform data validation... 513 if ($This->{ValidateData}) { 514 if (!$This->_ValidateReadCompoundDataString()) { 515 return $This; 516 } 517 } 518 519 # Setup fingerprints string after checking again to handle problematic data for 520 # non-validated compound string data... 521 # 522 my($FingerprintsFieldLabel); 523 $FingerprintsFieldLabel = $This->{FingerprintsFieldLabel}; 524 if (exists $This->{DataFieldLabelsAndValues}{$FingerprintsFieldLabel}) { 525 $This->{FingerprintsString} = $This->{DataFieldLabelsAndValues}{$FingerprintsFieldLabel}; 526 } 527 528 # Generate fingeprints object... 529 $This->_GenerateFingerprintsObject(); 530 531 # Setup fingerprints compound ID for fingerprints string... 532 $This->_GenerateCompoundID(); 533 534 return $This; 535 } 536 537 # Read next available compound data string, process it and generate appropriate fingerprints 538 # objects... 539 # 540 sub Next { 541 my($This) = @_; 542 543 return $This->Read(); 544 } 545 546 # Read compound data string... 547 # 548 sub _ReadCompoundDataString { 549 my($This) = @_; 550 my(@CmpdLines); 551 552 if ($This->{FirstCompoundDataIO}) { 553 $This->_ProcessFirstCompoundDataStringRead(); 554 } 555 556 # Initialize data for current compound data string... 557 $This->_InitializeReadCompoundDataString(); 558 559 # Get next compound data line... 560 $This->{CompoundString} = SDFileUtil::ReadCmpdString($This->{FileHandle}); 561 if (!$This->{CompoundString}) { 562 return 0; 563 } 564 565 $This->{CompoundNum} += 1; 566 567 # Set up data field labels and values... 568 @CmpdLines = split "\n", $This->{CompoundString}; 569 %{$This->{DataFieldLabelsAndValues}} = SDFileUtil::GetCmpdDataHeaderLabelsAndValues(\@CmpdLines); 570 571 return 1; 572 } 573 574 # Initialize compound data string for reading... 575 # 576 sub _InitializeReadCompoundDataString { 577 my($This) = @_; 578 579 $This->{CompoundID} = undef; 580 $This->{CompoundString} = undef; 581 582 %{$This->{DataFieldLabelsAndValues}} = (); 583 584 $This->{FingerprintsObject} = undef; 585 $This->{FingerprintsString} = undef; 586 587 return $This; 588 } 589 590 # Validate compound data string containing fingerprints data... 591 # 592 sub _ValidateReadCompoundDataString { 593 my($This) = @_; 594 my($FingerprintsFieldLabel); 595 596 $FingerprintsFieldLabel = $This->{FingerprintsFieldLabel}; 597 598 # Check for missing data... 599 if (!exists $This->{DataFieldLabelsAndValues}{$FingerprintsFieldLabel}) { 600 # Missing data... 601 $This->{NumOfCmpdsWithMissingData} += 1; 602 if ($This->{DetailLevel} >= 3) { 603 carp "Warning: ${ClassName}->_ValidateReadCompoundDataString: Compound number $This->{CompoundNum} contains no fingerprints data: $This->{CompoundString}..."; 604 } 605 elsif ($This->{DetailLevel} >= 2) { 606 carp "Warning: ${ClassName}->_ValidateReadCompoundDataString: Compound number $This->{CompoundNum} contains no fingerprints data..."; 607 } 608 return 0; 609 } 610 611 # Check for invalid data... 612 my($InvalidFingerprintsData, $FingerprintsType, $FingerprintsDescription); 613 614 $InvalidFingerprintsData = 0; 615 616 if (Fingerprints::FingerprintsStringUtil::AreFingerprintsStringValuesValid($This->{DataFieldLabelsAndValues}{$FingerprintsFieldLabel})) { 617 ($FingerprintsType, $FingerprintsDescription) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringTypeAndDescription($This->{DataFieldLabelsAndValues}{$FingerprintsFieldLabel}); 618 if (defined($FingerprintsType) && defined($FingerprintsDescription)) { 619 if ($This->{FirstFingerprintsStringType} !~ /^$FingerprintsType$/i || $This->{FirstFingerprintsStringDescription} !~ /^$FingerprintsDescription$/i) { 620 $InvalidFingerprintsData = 1; 621 } 622 } 623 else { 624 $InvalidFingerprintsData = 1; 625 } 626 } 627 else { 628 $InvalidFingerprintsData = 1; 629 } 630 631 if ($InvalidFingerprintsData) { 632 $This->{NumOfCmpdsWithInvalidData} += 1; 633 if ($This->{DetailLevel} >= 3) { 634 carp "Warning: ${ClassName}->_ValidateReadCompoundDataString: Compound number $This->{CompoundNum} contains invalid fingerprints data: $This->{DataLine}..."; 635 } 636 elsif ($This->{DetailLevel} >= 2) { 637 carp "Warning: ${ClassName}->_ValidateReadCompoundDataString: Compound number $This->{CompoundNum} contains invalid fingerprints data..."; 638 } 639 return 0; 640 } 641 642 return 1; 643 } 644 645 # Setup fingerprints compound ID for fingerprints string... 646 sub _GenerateCompoundID { 647 my($This) = @_; 648 my($CompoundID, $MolName); 649 650 $CompoundID = ''; 651 652 if ($This->{CompoundIDMode} =~ /^LabelPrefix$/i) { 653 $CompoundID = "$This->{CompoundIDPrefix}$This->{CompoundNum}"; 654 } 655 elsif ($This->{CompoundIDMode} =~ /^DataField$/i) { 656 my($SpecifiedDataField); 657 $SpecifiedDataField = $This->{CompoundIDFieldLabel}; 658 $CompoundID = exists $This->{DataFieldLabelsAndValues}{$SpecifiedDataField} ? $This->{DataFieldLabelsAndValues}{$SpecifiedDataField} : ''; 659 } 660 elsif ($This->{CompoundIDMode} =~ /^MolNameOrLabelPrefix$/i) { 661 ($MolName) = split "\n", $This->{CompoundString}; 662 $CompoundID = TextUtil::IsNotEmpty($MolName) ? $MolName : "$This->{CompoundIDPrefix}$This->{CompoundNum}"; 663 } 664 elsif ($This->{CompoundIDMode} =~ /^MolName$/i) { 665 ($MolName) = split "\n", $This->{CompoundString}; 666 $CompoundID = $MolName; 667 } 668 669 $This->{CompoundID} = $CompoundID; 670 671 return $This; 672 } 673 674 # Process first compound data string read... 675 # 676 sub _ProcessFirstCompoundDataStringRead { 677 my($This) = @_; 678 my($Line, $FileHandle); 679 680 $This->{FirstCompoundDataIO} = 0; 681 682 return $This; 683 } 684 685 # Get ready for reading fingerprints SD file... 686 # 687 sub _PrepareForReadingFingerprintsSDFileData { 688 my($This) = @_; 689 690 # Retrieve SD file data fields information.... 691 $This->_RetrieveSDFileDataFields(); 692 693 # Validate compound and fingerprints field information... 694 $This->_ValidateReadCompoundIDField(); 695 $This->_ValidateReadFingerprintsField(); 696 697 # Validate fingeprints string mode information... 698 if ($This->{ValidFingerprintsField}) { 699 $This->_ValidateReadFingerprintsStringMode(); 700 } 701 702 # Set status of SD file data... 703 $This->{ValidFileData} = ($This->{ValidCompoundIDField} && $This->{ValidFingerprintsField} && $This->{ValidFingerprintsStringMode}) ? 1 : 0; 704 705 return $This; 706 } 707 708 # Retrieve information data fields and fingerprints string... 709 # 710 sub _RetrieveSDFileDataFields { 711 my($This) = @_; 712 my($SDFile, $CmpdString, @CmpdLines); 713 714 $SDFile = $This->{Name}; 715 716 if (!(-e $SDFile)) { 717 croak "Error: ${ClassName}->New: Object can't be instantiated: File, $SDFile, doesn't exist..."; 718 } 719 720 if (!open SDFILE, "$SDFile") { 721 croak "Error: ${ClassName}->New: Object can't be instantiated: Couldn't open input SD file $SDFile: $! ..."; 722 } 723 $CmpdString = SDFileUtil::ReadCmpdString(\*SDFILE); 724 close SDFILE; 725 726 # Set up data field labels and values for first compound string data... 727 @CmpdLines = split "\n", $CmpdString; 728 729 %{$This->{FirstDataFieldLabelsAndValues}} = (); 730 %{$This->{FirstDataFieldLabelsAndValues}} = SDFileUtil::GetCmpdDataHeaderLabelsAndValues(\@CmpdLines); 731 732 return $This; 733 } 734 735 # Validate compound ID field information... 736 # 737 sub _ValidateReadCompoundIDField { 738 my($This) = @_; 739 my($SpecifiedDataField); 740 741 $This->{ValidCompoundIDField} = 0; 742 743 if ($This->{CompoundIDMode} =~ /^DataField$/i) { 744 $SpecifiedDataField = $This->{CompoundIDFieldLabel}; 745 if (! exists $This->{FirstDataFieldLabelsAndValues}{$SpecifiedDataField}) { 746 carp "Warning: ${ClassName}->_ValidateReadCompoundIDField: Compound ID data field, $SpecifiedDataField, specified using \"CompoundIDField\" in \"DataField\" \"CompoundIDMode\" doesn't exist..."; 747 return 0; 748 } 749 } 750 751 $This->{ValidCompoundIDField} = 1; 752 753 return 1; 754 } 755 756 # Validate fingerprints string field information... 757 # 758 sub _ValidateReadFingerprintsField { 759 my($This) = @_; 760 my($FingerprintsFieldLabel); 761 762 $This->{ValidFingerprintsField} = 0; 763 764 $FingerprintsFieldLabel = ''; 765 766 if ($This->{FingerprintsFieldLabel} !~ /^AutoDetect$/i) { 767 $FingerprintsFieldLabel = $This->{FingerprintsFieldLabel}; 768 if (! exists $This->{FirstDataFieldLabelsAndValues}{$FingerprintsFieldLabel}) { 769 carp "Warning: ${ClassName}->_ValidateReadFingerprintsField: Fingerprints data field value, $FingerprintsFieldLabel, specified using \"FingerprintsFieldLabel\" doesn't exist..."; 770 return 0; 771 } 772 } 773 else { 774 # Make sure default fingerprints field does exist... 775 my($FingerprintsFieldFound, $DataFieldLabel); 776 $FingerprintsFieldFound = 0; 777 778 DATAFIELDLABEL: for $DataFieldLabel (keys %{$This->{FirstDataFieldLabelsAndValues}}) { 779 if ($DataFieldLabel =~ /Fingerprints/i) { 780 $FingerprintsFieldFound = 1; 781 $FingerprintsFieldLabel = $DataFieldLabel; 782 last DATAFIELDLABEL; 783 } 784 } 785 if (!$FingerprintsFieldFound) { 786 carp "Warning: ${ClassName}->_ValidateReadFingerprintsField: Data field label containing \"Fingerprints\" string in its name doesn't exist..."; 787 return 0; 788 } 789 } 790 791 $This->{ValidFingerprintsField} = 1; 792 $This->{FingerprintsFieldLabel} = $FingerprintsFieldLabel; 793 794 return 1; 795 } 796 797 # Validate fingerprints string mode information... 798 # 799 sub _ValidateReadFingerprintsStringMode { 800 my($This) = @_; 801 my($FingerprintsBitVectorStringMode, $FingerprintsVectorStringMode, $FirstFingerprintsStringType, $FirstFingerprintsStringDescription, $FingerprintsFieldLabel, $FingerprintsType, $FingerprintsDescription); 802 803 $This->{ValidFingerprintsStringMode} = 0; 804 805 $This->{FingerprintsBitVectorStringMode} = 0; 806 $This->{FingerprintsVectorStringMode} = 0; 807 808 $This->{FirstFingerprintsStringType} = ''; 809 $This->{FirstFingerprintsStringDescription} = ''; 810 811 $FingerprintsBitVectorStringMode = 0; 812 $FingerprintsVectorStringMode = 0; 813 814 $FirstFingerprintsStringType = ''; 815 $FirstFingerprintsStringDescription = ''; 816 817 $FingerprintsFieldLabel = $This->{FingerprintsFieldLabel}; 818 819 ($FingerprintsType, $FingerprintsDescription) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringTypeAndDescription($This->{FirstDataFieldLabelsAndValues}{$FingerprintsFieldLabel}); 820 821 if ($This->{FingerprintsStringMode} =~ /^FingerprintsBitVectorString$/i) { 822 if ($FingerprintsType !~ /^FingerprintsBitVector$/i) { 823 carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: First fingerprint string data type, $FingerprintsType, doesn't match value, FingerprintsBitVectorString, specified using \"FingerprintsStringMode\"..."; 824 return 0; 825 } 826 $FingerprintsBitVectorStringMode = 1; 827 $FirstFingerprintsStringType = 'FingerprintsBitVector'; 828 $FirstFingerprintsStringDescription = $FingerprintsDescription; 829 } 830 elsif ($This->{FingerprintsStringMode} =~ /^FingerprintsVectorString$/i) { 831 if ($FingerprintsType !~ /^FingerprintsVector$/i) { 832 carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: First fingerprint string data type, $FingerprintsType, doesn't match value, FingerprintsVectorString, specified using \"FingerprintsStringMode\"..."; 833 return 0; 834 } 835 $FingerprintsVectorStringMode = 1; 836 $FirstFingerprintsStringType = 'FingerprintsVector'; 837 $FirstFingerprintsStringDescription = $FingerprintsDescription; 838 } 839 else { 840 # AutoDetect mode... 841 if ($FingerprintsType =~ /^FingerprintsBitVector$/i) { 842 $FingerprintsBitVectorStringMode = 1; 843 } 844 elsif ($FingerprintsType =~ /^FingerprintsVector$/i) { 845 $FingerprintsVectorStringMode = 1; 846 } 847 else { 848 carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: First fingerprint string data type, $FingerprintsType, identified during, AutoDetect, value of \"FingerprintsStringMode\" is not valid; Supported fingerprints types: FingerprintBitVector or FingerprintsVector..."; 849 return 0; 850 } 851 $FirstFingerprintsStringType = $FingerprintsType; 852 $FirstFingerprintsStringDescription = $FingerprintsDescription; 853 } 854 855 $This->{ValidFingerprintsStringMode} = 1; 856 857 $This->{FingerprintsBitVectorStringMode} = $FingerprintsBitVectorStringMode; 858 $This->{FingerprintsVectorStringMode} = $FingerprintsVectorStringMode; 859 860 $This->{FirstFingerprintsStringType} = $FirstFingerprintsStringType; 861 $This->{FirstFingerprintsStringDescription} = $FirstFingerprintsStringDescription; 862 863 return 1; 864 } 865 866 # Write fingerprints string generated from specified fingerprints, fingerprints-bit vector, or 867 # fingerprints vector object and other data to SD file... 868 # 869 sub WriteFingerprints { 870 my($This, $FingerprintsObject, $CompoundString) = @_; 871 872 # Initialize data for current compound... 873 $This->_InitializeWriteCompoundDataString(); 874 875 # Set fingerprints object... 876 $This->{FingerprintsObject} = $FingerprintsObject; 877 878 # Generate fingerprints string... 879 $This->_GenerateFingerprintsString(); 880 881 # Set and update compound string... 882 $This->{CompoundString} = $CompoundString; 883 $This->_AddFingerprintsDataToCompoundString(); 884 885 # Write it out... 886 $This->_WriteCompoundDataString(); 887 888 return $This; 889 } 890 891 # Write fingerprints string and other data to SD file... 892 # 893 # Note: 894 # o FingerprintsStringMode, BitStringFormat, BitsOrder, VectorStringFormat values 895 # are ignored during writing of fingerprints and it's written to the file as it is. 896 # o CompoundString contains no fingerprints data 897 # 898 sub WriteFingerprintsString { 899 my($This, $FingerprintsString, $CompoundString) = @_; 900 901 # Initialize data for current compound... 902 $This->_InitializeWriteCompoundDataString(); 903 904 # Set fingerprints string... 905 $This->{FingerprintsString} = $FingerprintsString; 906 907 # Generate fingerprints object... 908 $This->_GenerateFingerprintsObject(); 909 910 # Set and update compound string... 911 $This->{CompoundString} = $CompoundString; 912 $This->_AddFingerprintsDataToCompoundString(); 913 914 # Write it out... 915 $This->_WriteCompoundDataString(); 916 917 return $This; 918 } 919 920 # Initialize compound data string for writing... 921 # 922 sub _InitializeWriteCompoundDataString { 923 my($This) = @_; 924 925 $This->{CompoundString} = undef; 926 927 $This->{FingerprintsObject} = undef; 928 $This->{FingerprintsString} = undef; 929 930 return $This; 931 } 932 933 # Writi compound data string... 934 # 935 sub _WriteCompoundDataString { 936 my($This) = @_; 937 my($FileHandle); 938 939 if ($This->{FirstCompoundDataIO}) { 940 $This->_ProcessFirstCompoundDataStringWrite(); 941 } 942 943 $This->{CompoundNum} += 1; 944 $FileHandle = $This->{FileHandle}; 945 946 print $FileHandle "$This->{CompoundString}\n"; 947 948 return $This; 949 } 950 951 # Process first compound data string write... 952 # 953 sub _ProcessFirstCompoundDataStringWrite { 954 my($This) = @_; 955 my($Line, $FileHandle); 956 957 $This->{FirstCompoundDataIO} = 0; 958 959 return $This; 960 } 961 962 # Get ready for writing fingerprints SD file... 963 # 964 sub _PrepareForWritingFingerprintsSDFileData { 965 my($This) = @_; 966 my($SDFile); 967 968 $SDFile = $This->{Name}; 969 if (!$This->{Overwrite}) { 970 if (-e $SDFile) { 971 croak "Error: ${ClassName}->New: Object can't be instantiated: File, $SDFile, already exist. Use overwrite option..."; 972 } 973 } 974 975 # Setup FingerprintsStringMode status... 976 977 $This->{FingerprintsBitVectorStringMode} = 0; 978 $This->{FingerprintsVectorStringMode} = 0; 979 $This->{ValidFingerprintsStringMode} = 0; 980 981 if ($This->{FingerprintsStringMode} =~ /^FingerprintsBitVectorString$/i) { 982 $This->{FingerprintsBitVectorStringMode} = 1; 983 } 984 elsif ($This->{FingerprintsStringMode} =~ /^FingerprintsVectorString$/i) { 985 $This->{FingerprintsVectorStringMode} = 1; 986 } 987 988 $This->{ValidFingerprintsStringMode} = ($This->{FingerprintsBitVectorStringMode} || $This->{FingerprintsVectorStringMode}) ? 1 : 0; 989 990 if ($This->{FingerprintsBitVectorStringMode}) { 991 $This->_SetDefaultBitStringFormat(); 992 $This->_SetDefaultBitsOrder(); 993 } 994 elsif ($This->{FingerprintsVectorStringMode}) { 995 $This->_SetDefaultVectorStringFormat(); 996 } 997 998 return $This; 999 } 1000 1001 # Set default value for bit string format... 1002 # 1003 sub _SetDefaultBitStringFormat { 1004 my($This) = @_; 1005 1006 if (!$This->{BitStringFormat}) { 1007 $This->{BitStringFormat} = Fingerprints::FingerprintsStringUtil::GetDefaultBitStringFormat(); 1008 } 1009 1010 return $This; 1011 } 1012 1013 # Set default value for bit string format... 1014 # 1015 sub _SetDefaultBitsOrder { 1016 my($This) = @_; 1017 1018 if (!$This->{BitsOrder}) { 1019 $This->{BitsOrder} = Fingerprints::FingerprintsStringUtil::GetDefaultBitsOrder(); 1020 } 1021 1022 return $This; 1023 } 1024 1025 # Set default value for vector string format... 1026 # 1027 sub _SetDefaultVectorStringFormat { 1028 my($This) = @_; 1029 1030 if (!$This->{VectorStringFormat} && $This->{FingerprintsObject}) { 1031 $This->{VectorStringFormat} = Fingerprints::FingerprintsStringUtil::GetDefaultVectorStringFormat($This->{FingerprintsObject}); 1032 } 1033 1034 return $This; 1035 } 1036 1037 # Add fingerprints data to compound string... 1038 # 1039 sub _AddFingerprintsDataToCompoundString { 1040 my($This) = @_; 1041 my($CmpdString); 1042 1043 # Check and remove existing fingerprints data... 1044 if ($This->_IsFingerprintsDataPresentInCompoundString()) { 1045 carp "Warning: ${ClassName}->_AddFingerprintsDataToCompoundString: The compound string already contains fingerprints data corresponding to fingerprints field label $This->{FingerprintsFieldLabel}; It has been replaced with new fingerprints data..."; 1046 $This->{CompoundString} = SDFileUtil::RemoveCmpdDataHeaderLabelAndValue($This->{CompoundString}, $This->{FingerprintsFieldLabel}); 1047 } 1048 1049 $CmpdString = $This->{CompoundString}; 1050 1051 $CmpdString =~ s/\$\$\$\$$//; 1052 1053 $This->{CompoundString} = "${CmpdString}> <$This->{FingerprintsFieldLabel}>\n$This->{FingerprintsString}\n\n\$\$\$\$"; 1054 1055 return $This; 1056 } 1057 1058 # Is fingerprints data already present in compound string? 1059 # 1060 sub _IsFingerprintsDataPresentInCompoundString { 1061 my($This) = @_; 1062 my($FingerprintsFieldLabel); 1063 1064 if (TextUtil::IsEmpty($This->{CompoundString}) || TextUtil::IsEmpty($This->{FingerprintsFieldLabel})) { 1065 return 0; 1066 } 1067 1068 $FingerprintsFieldLabel = $This->{FingerprintsFieldLabel}; 1069 1070 return ($This->{CompoundString} =~ /<$FingerprintsFieldLabel>/) ? 1 : 0; 1071 } 1072 1073 # Generate fingerprints object using current fingerprints string... 1074 # 1075 sub _GenerateFingerprintsObject { 1076 my($This) = @_; 1077 1078 $This->{FingerprintsObject} = undef; 1079 1080 if (!$This->{FingerprintsString}) { 1081 return $This; 1082 } 1083 1084 if ($This->{FingerprintsBitVectorStringMode}) { 1085 $This->{FingerprintsObject} = Fingerprints::FingerprintsStringUtil::ParseFingerprintsBitVectorString($This->{FingerprintsString}); 1086 } 1087 elsif ($This->{FingerprintsVectorStringMode}) { 1088 $This->{FingerprintsObject} = Fingerprints::FingerprintsStringUtil::ParseFingerprintsVectorString($This->{FingerprintsString}); 1089 } 1090 else { 1091 return undef; 1092 } 1093 1094 return $This; 1095 } 1096 1097 # Generate fingerprints string using current fingerprints object... 1098 # 1099 sub _GenerateFingerprintsString { 1100 my($This) = @_; 1101 1102 $This->{FingerprintsString} = ''; 1103 1104 if (!$This->{FingerprintsObject}) { 1105 return $This; 1106 } 1107 1108 if ($This->{FingerprintsBitVectorStringMode}) { 1109 $This->{FingerprintsString} = Fingerprints::FingerprintsStringUtil::GenerateFingerprintsString($This->{FingerprintsObject}, $This->{BitStringFormat}, $This->{BitsOrder}); 1110 } 1111 elsif ($This->{FingerprintsVectorStringMode}) { 1112 $This->{FingerprintsString} = Fingerprints::FingerprintsStringUtil::GenerateFingerprintsString($This->{FingerprintsObject}, $This->{VectorStringFormat}); 1113 } 1114 1115 return $This; 1116 } 1117 1118 # Is it a fingerprints file? 1119 sub IsFingerprintsSDFile ($;$) { 1120 my($FirstParameter, $SecondParameter) = @_; 1121 my($This, $FileName, $Status); 1122 1123 if ((@_ == 2) && (_IsFingerprintsSDFileIO($FirstParameter))) { 1124 ($This, $FileName) = ($FirstParameter, $SecondParameter); 1125 } 1126 else { 1127 $FileName = $FirstParameter; 1128 } 1129 1130 # Check file extension... 1131 $Status = FileUtil::CheckFileType($FileName, "sdf sd"); 1132 1133 return $Status; 1134 } 1135 1136 # Is it a FingerprintsSDFileIO object? 1137 sub _IsFingerprintsSDFileIO { 1138 my($Object) = @_; 1139 1140 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0; 1141 } 1142