MayaChemTools

   1 package FileIO::FingerprintsSDFileIO;
   2 #
   3 # $RCSfile: FingerprintsSDFileIO.pm,v $
   4 # $Date: 2015/02/28 20:48:43 $
   5 # $Revision: 1.18 $
   6 #
   7 # Author: Manish Sud <msud@san.rr.com>
   8 #
   9 # Copyright (C) 2015 Manish Sud. All rights reserved.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 use strict;
  30 use Carp;
  31 use Exporter;
  32 use Scalar::Util ();
  33 use TextUtil ();
  34 use FileUtil ();
  35 use SDFileUtil ();
  36 use Fingerprints::FingerprintsStringUtil ();
  37 use FileIO::FileIO;
  38 
  39 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  40 
  41 @ISA = qw(FileIO::FileIO Exporter);
  42 @EXPORT = qw();
  43 @EXPORT_OK = qw(IsFingerprintsSDFile);
  44 
  45 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  46 
  47 # Setup class variables...
  48 my($ClassName);
  49 _InitializeClass();
  50 
  51 # Class constructor...
  52 sub new {
  53   my($Class, %NamesAndValues) = @_;
  54 
  55   # Initialize object...
  56   my $This = $Class->SUPER::new();
  57   bless $This, ref($Class) || $Class;
  58   $This->_InitializeFingerprintsSDFileIO();
  59 
  60   $This->_InitializeFingerprintsSDFileIOProperties(%NamesAndValues);
  61 
  62   return $This;
  63 }
  64 
  65 # Initialize object data...
  66 #
  67 sub _InitializeFingerprintsSDFileIO {
  68   my($This) = @_;
  69 
  70   # Fingerprints string data format during read/write...
  71   #
  72   # For file read:
  73   #
  74   # AutoDetect  - automatically detect format of fingerprints string
  75   # FingerprintsBitVectorString - Bit vector fingerprints string format
  76   # FingerprintsVectorString - Vector fingerprints string format
  77   #
  78   # Default value: AutoDetect
  79   #
  80   # For file write:
  81   #
  82   # FingerprintsBitVectorString - Bit vector fingerprints string format
  83   # FingerprintsVectorString - Vector fingerprints string format
  84   #
  85   # Default value: undef
  86   #
  87   $This->{FingerprintsStringMode} = undef;
  88 
  89   # For file read:
  90   #
  91   #   o Fingerprints bit-vector and vector object for current fingerprints string
  92   #
  93   # For file write:
  94   #
  95   #   o Fingerprints bit-vector and vector object for current fingerprints string
  96   #   o Any supported fingerprints object: PathLengthFingerprints, ExtendedConnectivity, and so on.
  97   #
  98   $This->{FingerprintsObject} = undef;
  99 
 100   # Fingerprints SD file data field label during read/write
 101   #
 102   # For file read:
 103   #
 104   # Value of AutoDetect implies use first data field containing the word Fingerprints in its
 105   # data field label to retrieve fingerprints string data. Othwewise, a valid data field name
 106   # must be specified.
 107   #
 108   # For file write:
 109   #
 110   # Data field label to use for writing fingerprints string. Default: Fingerprints
 111   #
 112   $This->{FingerprintsFieldLabel} = undef;
 113 
 114   # Fingepritns string for current line during read/write...
 115   $This->{FingerprintsString} = undef;
 116 
 117   # First compound data string read/write...
 118   $This->{FirstCompoundDataIO} = 1;
 119 
 120   # Current fingerprints string data compound number during read/write...
 121   $This->{CompoundNum} = 0;
 122 
 123   # Compound data string during read/write...
 124   $This->{CompoundString} = undef;
 125 
 126   # Initialize parameters for read...
 127   $This->_InitializeFingerprintsSDFileIORead();
 128 
 129   # Initialize parameters for write...
 130   $This->_InitializeFingerprintsSDFileIOWrite();
 131 
 132   return $This;
 133 }
 134 
 135 # Initialize class ...
 136 sub _InitializeClass {
 137   #Class name...
 138   $ClassName = __PACKAGE__;
 139 
 140 }
 141 
 142 # Initialize object data for reading fingerprints SD file...
 143 #
 144 sub _InitializeFingerprintsSDFileIORead {
 145   my($This) = @_;
 146 
 147   # Compound ID mode to use for retrieving compound IDs for fingerprints...
 148   #
 149   # Specify how to generate compound IDs: use a SD file datafield value; use molname line from
 150   # SD file; generate a sequential ID with specific prefix; use combination of both MolName and
 151   # LabelPrefix with usage of LabelPrefix values for empty molname lines.
 152   #
 153   # Possible values: DataField, MolName, LabelPrefix, or MolNameOrLabelPrefix. Default: LabelPrefix.
 154   #
 155   # For MolNameAndLabelPrefix value of CompoundIDMode, molname line in SD file takes precedence over
 156   # sequential compound IDs generated using LabelPrefix and only empty molname values are replaced
 157   # with sequential compound IDs.
 158   #
 159   $This->{CompoundIDMode} = 'LabelPrefix';
 160 
 161   #
 162   # Compound ID data field label name whose value is used as compound ID during DatafField value of
 163   # CompoundIDMode
 164   #
 165   $This->{CompoundIDFieldLabel} = undef;
 166 
 167   # A prefix string used for generating compound IDs like LabelPrefixString<Number> during LabelPrefix
 168   # or MolNameOrLabelPrefix value of  CompoundIDMode. Default value, Cmpd, generates compound IDs
 169   # which look like Cmpd<Number>.
 170   #
 171   $This->{CompoundIDPrefix} = 'Cmpd';
 172 
 173   # By default, the fingerprints data corresponding to FingerprintsCol is assumed to
 174   # be valid and no validation is performed before generating fingerprints objects...
 175   #
 176   $This->{ValidateData} = 1;
 177 
 178   # Level of detail to print during validation of data for invalid or missing data...
 179   $This->{DetailLevel} = 1;
 180 
 181   # Number of missing and invalid fingerprints string data compound strings...
 182   $This->{NumOfCmpdsWithMissingData} = 0;
 183   $This->{NumOfCmpdsWithInvalidData} = 0;
 184 
 185   # Compound ID for current fingerprints string...
 186   $This->{CompoundID} = undef;
 187 
 188   # Compound data field labels and values map for current compound data...
 189   %{$This->{DataFieldLabelsAndValues}} = ();
 190 
 191   # Status of data in fingerprints SD file...
 192   $This->{ValidFileData} = 0;
 193 
 194   $This->{ValidCompoundIDField} = 0;
 195   $This->{ValidFingerprintsField} = 0;
 196 
 197   $This->{ValidFingerprintsStringMode} = 0;
 198 
 199   return $This;
 200 }
 201 
 202 # Initialize object data for writing fingerprints SD file...
 203 #
 204 sub _InitializeFingerprintsSDFileIOWrite {
 205   my($This) = @_;
 206 
 207   # Fingerprints bit vector string format...
 208   #
 209   # Possible values: BinaryString or HexadecimalString [Default]
 210   #
 211   # Default BitStringFormat is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultBitStringFormat.
 212   #
 213   $This->{BitStringFormat} = undef;
 214 
 215   # Bits order in fingerprints bit vector string...
 216   #
 217   # Ascending - First bit in each byte as the lowest bit [Default]
 218   # Descending - First bit in each byte as the highest bit
 219   #
 220   # Default BitsOrder is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultBitsOrder.
 221   #
 222   $This->{BitsOrder} = undef;
 223 
 224   # Fingerprints vector string format...
 225   #
 226   # Possible values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, ValuesString
 227   #
 228   # Default VectorStringFormat is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultVectorStringFormat.
 229   # For fingerprints vector object containing vector NumericalValues, it corresponds to IDsAndValuesString; othwerwise,
 230   # it's set to ValuesString.
 231   #
 232   $This->{VectorStringFormat} = undef;
 233 
 234   # Overwriting existing file...
 235   $This->{Overwrite} = 0;
 236 
 237   return $This;
 238 }
 239 
 240 # Initialize object values...
 241 sub _InitializeFingerprintsSDFileIOProperties {
 242   my($This, %NamesAndValues) = @_;
 243 
 244   # All other property names and values along with all Set/Get<PropertyName> methods
 245   # are implemented on-demand using ObjectProperty class.
 246 
 247   my($Name, $Value, $MethodName);
 248   while (($Name, $Value) = each  %NamesAndValues) {
 249     $MethodName = "Set${Name}";
 250     $This->$MethodName($Value);
 251   }
 252 
 253   if (!exists $NamesAndValues{Name}) {
 254     croak "Error: ${ClassName}->New: Object can't be instantiated without specifying file name...";
 255   }
 256 
 257   # Make sure it's a fingerprints file...
 258   $Name = $NamesAndValues{Name};
 259   if (!$This->IsFingerprintsSDFile($Name)) {
 260     croak "Error: ${ClassName}->New: Object can't be instantiated: File, $Name, doesn't appear to be fingerprints format...";
 261   }
 262 
 263   if ($This->GetMode() =~ /^Read$/i) {
 264     $This->_InitializeFingerprintsSDFileIOReadProperties(%NamesAndValues);
 265   }
 266   elsif ($This->GetMode() =~ /^(Write|Append)$/i) {
 267     $This->_InitializeFingerprintsSDFileIOWriteProperties(%NamesAndValues);
 268   }
 269 
 270   return $This;
 271 }
 272 
 273 # Initialize object properties for reading fingerprints SD file...
 274 #
 275 sub _InitializeFingerprintsSDFileIOReadProperties {
 276   my($This, %NamesAndValues) = @_;
 277 
 278   # Set default value for FingerprintsStringMode...
 279   if (!$This->{FingerprintsStringMode}) {
 280     $This->{FingerprintsStringMode} = 'AutoDetect';
 281   }
 282 
 283   # Set default value for FingerprintsFieldLabel...
 284   if (!$This->{FingerprintsFieldLabel}) {
 285     $This->{FingerprintsFieldLabel} = 'AutoDetect';
 286   }
 287 
 288   # Check compound ID data field...
 289   if (($This->{CompoundIDMode} =~ /^DataField$/i) && (!defined($This->{CompoundIDFieldLabel}))) {
 290     croak "Error: ${ClassName}->: Object can't be instantiated: Compound ID data field lable must be specifed using \"CompoundIDFieldLabel\" during \"DataField\" value of \"CompoundIDMode\"...";
 291   }
 292 
 293   $This->_PrepareForReadingFingerprintsSDFileData();
 294 
 295   return $This;
 296 }
 297 
 298 # Initialize object properties for writing fingerprints SD file...
 299 #
 300 sub _InitializeFingerprintsSDFileIOWriteProperties {
 301   my($This, %NamesAndValues) = @_;
 302 
 303   # Check FingerprintsStringMode value...
 304   if (!exists $NamesAndValues{FingerprintsStringMode}) {
 305     croak "Error: ${ClassName}->New: Object can't be instantiated without specifying FingerprintsStringMode...";
 306   }
 307 
 308   if ($This->{FingerprintsStringMode} !~ /^(FingerprintsBitVectorString|FingerprintsVectorString)$/i) {
 309     croak "Error: ${ClassName}->: Object can't be instantiated: FingerprintsStringMode value, $This->{FingerprintsStringMode}, is not valid; Supported values for write/append: FingerprintsBitVectorString or FingerprintsVectorString...";
 310   }
 311 
 312   # Set default value for FingerprintsFieldLabel...
 313   if (!$This->{FingerprintsFieldLabel}) {
 314     $This->{FingerprintsFieldLabel} = 'Fingerprints';
 315   }
 316 
 317   $This->_PrepareForWritingFingerprintsSDFileData();
 318 
 319   return $This;
 320 }
 321 
 322 # Set FingerprintsStringMode...
 323 #
 324 sub SetFingerprintsStringMode {
 325   my($This, $Value) = @_;
 326 
 327   # AutoDetect - automatically detect format of fingerprints string
 328   # FingerprintsBitVectorString - Bit vector fingerprints string format
 329   # FingerprintsVectorString - Vector fingerprints string format
 330 
 331   if ($Value !~ /^(AutoDetect|FingerprintsBitVectorString|FingerprintsVectorString)$/i) {
 332     croak "Error: ${ClassName}->SetFingerprintsStringMode: FingerprintsStringMode value, $Value, is not valid; Supported values: AutoDetect, FingerprintsBitVectorString or FingerprintsVectorString...";
 333   }
 334 
 335   $This->{FingerprintsStringMode} = $Value;
 336 
 337   return $This;
 338 }
 339 
 340 # Set CompoundIDMode...
 341 #
 342 sub SetCompoundIDMode {
 343   my($This, $Value) = @_;
 344 
 345   if ($Value !~ /^(DataField|MolName|LabelPrefix|MolNameOrLabelPrefix)$/i) {
 346     croak "Error: ${ClassName}->SetFingerprintsStringMode: CompoundIDMode value, $Value, is not valid; Supported values: DataField, MolName, LabelPrefix, or MolNameOrLabelPrefix...";
 347   }
 348 
 349   $This->{CompoundIDMode} = $Value;
 350 
 351   return $This;
 352 }
 353 
 354 # Set DetailLevel...
 355 #
 356 sub SetDetailLevel {
 357   my($This, $Value) = @_;
 358 
 359   if (!TextUtil::IsPositiveInteger($Value)) {
 360     croak "Error: ${ClassName}->SetDetailLevel: DetailLevel value, $Value, is not valid; Supported values: > 0...";
 361   }
 362 
 363   $This->{DetailLevel} = $Value;
 364 
 365   return $This;
 366 }
 367 
 368 # Set BitStringFormat...
 369 #
 370 sub SetBitStringFormat {
 371   my($This, $Value) = @_;
 372 
 373   if ($Value !~ /^(BinaryString|HexadecimalString)$/i) {
 374     croak "Error: ${ClassName}->SetBitStringFormat: BitStringFormat value, $Value, is not valid; Supported values: BinaryString or HexadecimalString...";
 375   }
 376 
 377   $This->{BitStringFormat} = $Value;
 378 
 379   return $This;
 380 }
 381 
 382 # Set BitsOrder...
 383 #
 384 sub SetBitsOrder {
 385   my($This, $Value) = @_;
 386 
 387   # Ascending - First bit in each byte as the lowest bit
 388   # Descending - First bit in each byte as the highest bit
 389   #
 390   if ($Value !~ /^(Ascending|Descending)$/i) {
 391     croak "Error: ${ClassName}->SetBitsOrder: FingerprintsStringMode value, $Value, is not valid; Supported values: Ascending or Descending...";
 392   }
 393 
 394   $This->{BitsOrder} = $Value;
 395 
 396   return $This;
 397 }
 398 
 399 # Set VectorStringFormat...
 400 #
 401 sub SetVectorStringFormat {
 402   my($This, $Value) = @_;
 403 
 404   # Possible values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, ValuesString
 405 
 406   if ($Value !~ /^(IDsAndValuesString|IDsAndValuesPairsString|ValuesAndIDsString|ValuesAndIDsPairsString|ValuesString)$/i) {
 407     croak "Error: ${ClassName}->SetVectorStringFormat: FingerprintsStringMode value, $Value, is not valid; Supported values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, or ValuesString...";
 408   }
 409 
 410   $This->{VectorStringFormat} = $Value;
 411 
 412   return $This;
 413 }
 414 
 415 # Get compound string for current compound with optional removal of fingerprints data..
 416 #
 417 sub GetCompoundString {
 418   my($This, $RemoveFingerprintsData) = @_;
 419 
 420   $RemoveFingerprintsData = defined $RemoveFingerprintsData ? $RemoveFingerprintsData : 0;
 421 
 422   if ($RemoveFingerprintsData && $This->_IsFingerprintsDataPresentInCompoundString()) {
 423     return SDFileUtil::RemoveCmpdDataHeaderLabelAndValue($This->{CompoundString}, $This->{FingerprintsFieldLabel});
 424   }
 425 
 426   return $This->{CompoundString};
 427 }
 428 
 429 # Set compound string for current compound..
 430 #
 431 sub SetCompoundString {
 432   my($This, $CompoundString) = @_;
 433 
 434   $This->{CompoundString} = $CompoundString;
 435 
 436   return $This;
 437 }
 438 
 439 # Get fingerprints object for current compound using fingerprints, fingerprints bit-vector
 440 # fingerprints vector object. Fingerprints object correspond to any of supported fingerprints
 441 # objects such as PathLengthFingerprints, ExtendedConnectivity, and so on.
 442 #
 443 sub GetFingerprints {
 444   my($This) = @_;
 445 
 446   return $This->{FingerprintsObject};
 447 }
 448 
 449 # Set fingerprints object for current compound...
 450 #
 451 sub SetFingerprints {
 452   my($This, $FingerprintsObject) = @_;
 453 
 454   $This->{FingerprintsObject} = $FingerprintsObject;
 455 
 456   return $This;
 457 }
 458 
 459 # Get fingerprints string  for current compound...
 460 #
 461 sub GetFingerprintsString {
 462   my($This) = @_;
 463 
 464   return $This->{FingerprintsString} ? $This->{FingerprintsString} : 'None';
 465 }
 466 
 467 # Set fingerprints string for current compound...
 468 #
 469 sub SetFingerprintsString {
 470   my($This, $FingerprintsString) = @_;
 471 
 472   $This->{FingerprintsString} = $FingerprintsString;
 473 
 474   return $This;
 475 }
 476 
 477 # Does fingerprints SD file contain valid data?
 478 #
 479 sub IsFingerprintsFileDataValid {
 480   my($This) = @_;
 481 
 482   return $This->{ValidFileData} ? 1 : 0;
 483 }
 484 
 485 # Does current compound contains valid fingerprints object data?
 486 #
 487 sub IsFingerprintsDataValid {
 488   my($This) = @_;
 489 
 490   return defined $This->{FingerprintsObject} ? 1 : 0;
 491 }
 492 
 493 # Read next available compound data string,  process it and generate appropriate fingerprints
 494 # objects...
 495 #
 496 sub Read {
 497   my($This) = @_;
 498 
 499   # Read compound data string...
 500   if (!$This->_ReadCompoundDataString()) {
 501     return undef;
 502   }
 503 
 504   # No need to process invalid SD file with invalid data...
 505   if (!$This->{ValidFileData}) {
 506     if ($This->{ValidateData}) {
 507       $This->{NumOfCmpdsWithMissingData} += 1;
 508     }
 509     return $This;
 510   }
 511 
 512   # Perform data validation...
 513   if ($This->{ValidateData}) {
 514     if (!$This->_ValidateReadCompoundDataString()) {
 515       return $This;
 516     }
 517   }
 518 
 519   # Setup fingerprints string after checking again to handle problematic data for
 520   # non-validated compound string data...
 521   #
 522   my($FingerprintsFieldLabel);
 523   $FingerprintsFieldLabel = $This->{FingerprintsFieldLabel};
 524   if (exists $This->{DataFieldLabelsAndValues}{$FingerprintsFieldLabel}) {
 525     $This->{FingerprintsString} = $This->{DataFieldLabelsAndValues}{$FingerprintsFieldLabel};
 526   }
 527 
 528   # Generate fingeprints object...
 529   $This->_GenerateFingerprintsObject();
 530 
 531   # Setup fingerprints compound ID for fingerprints string...
 532   $This->_GenerateCompoundID();
 533 
 534   return $This;
 535 }
 536 
 537 # Read next available compound data string,  process it and generate appropriate fingerprints
 538 # objects...
 539 #
 540 sub Next {
 541   my($This) = @_;
 542 
 543   return $This->Read();
 544 }
 545 
 546 # Read compound data string...
 547 #
 548 sub _ReadCompoundDataString {
 549   my($This) = @_;
 550   my(@CmpdLines);
 551 
 552   if ($This->{FirstCompoundDataIO}) {
 553     $This->_ProcessFirstCompoundDataStringRead();
 554   }
 555 
 556   # Initialize data for current compound data string...
 557   $This->_InitializeReadCompoundDataString();
 558 
 559   # Get next compound data line...
 560   $This->{CompoundString} = SDFileUtil::ReadCmpdString($This->{FileHandle});
 561   if (!$This->{CompoundString}) {
 562     return 0;
 563   }
 564 
 565   $This->{CompoundNum} += 1;
 566 
 567   # Set up data field labels and values...
 568   @CmpdLines = split "\n", $This->{CompoundString};
 569   %{$This->{DataFieldLabelsAndValues}} = SDFileUtil::GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
 570 
 571   return 1;
 572 }
 573 
 574 # Initialize compound data string for reading...
 575 #
 576 sub _InitializeReadCompoundDataString {
 577   my($This) = @_;
 578 
 579   $This->{CompoundID} = undef;
 580   $This->{CompoundString} = undef;
 581 
 582   %{$This->{DataFieldLabelsAndValues}} = ();
 583 
 584   $This->{FingerprintsObject} = undef;
 585   $This->{FingerprintsString} = undef;
 586 
 587   return $This;
 588 }
 589 
 590 # Validate compound data string containing fingerprints data...
 591 #
 592 sub _ValidateReadCompoundDataString {
 593   my($This) = @_;
 594   my($FingerprintsFieldLabel);
 595 
 596   $FingerprintsFieldLabel = $This->{FingerprintsFieldLabel};
 597 
 598   # Check for missing data...
 599   if (!exists $This->{DataFieldLabelsAndValues}{$FingerprintsFieldLabel}) {
 600     # Missing data...
 601     $This->{NumOfCmpdsWithMissingData} += 1;
 602     if ($This->{DetailLevel} >= 3) {
 603       carp "Warning: ${ClassName}->_ValidateReadCompoundDataString: Compound number $This->{CompoundNum} contains no fingerprints data: $This->{CompoundString}...";
 604     }
 605     elsif ($This->{DetailLevel} >= 2) {
 606       carp "Warning: ${ClassName}->_ValidateReadCompoundDataString: Compound number $This->{CompoundNum} contains no fingerprints data...";
 607     }
 608     return 0;
 609   }
 610 
 611   # Check for invalid data...
 612   my($InvalidFingerprintsData, $FingerprintsType, $FingerprintsDescription);
 613 
 614   $InvalidFingerprintsData = 0;
 615 
 616   if (Fingerprints::FingerprintsStringUtil::AreFingerprintsStringValuesValid($This->{DataFieldLabelsAndValues}{$FingerprintsFieldLabel})) {
 617     ($FingerprintsType, $FingerprintsDescription) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringTypeAndDescription($This->{DataFieldLabelsAndValues}{$FingerprintsFieldLabel});
 618     if (defined($FingerprintsType) && defined($FingerprintsDescription)) {
 619       if ($This->{FirstFingerprintsStringType} !~ /^$FingerprintsType$/i || $This->{FirstFingerprintsStringDescription} !~ /^$FingerprintsDescription$/i) {
 620         $InvalidFingerprintsData = 1;
 621       }
 622     }
 623     else {
 624       $InvalidFingerprintsData = 1;
 625     }
 626   }
 627   else {
 628     $InvalidFingerprintsData = 1;
 629   }
 630 
 631   if ($InvalidFingerprintsData) {
 632     $This->{NumOfCmpdsWithInvalidData} += 1;
 633     if ($This->{DetailLevel} >= 3) {
 634       carp "Warning: ${ClassName}->_ValidateReadCompoundDataString: Compound number $This->{CompoundNum} contains invalid fingerprints data: $This->{DataLine}...";
 635     }
 636     elsif ($This->{DetailLevel} >= 2) {
 637       carp "Warning: ${ClassName}->_ValidateReadCompoundDataString: Compound number $This->{CompoundNum} contains invalid fingerprints data...";
 638     }
 639     return 0;
 640   }
 641 
 642   return 1;
 643 }
 644 
 645 # Setup fingerprints compound ID for fingerprints string...
 646 sub _GenerateCompoundID {
 647   my($This) = @_;
 648   my($CompoundID, $MolName);
 649 
 650   $CompoundID = '';
 651 
 652   if ($This->{CompoundIDMode} =~ /^LabelPrefix$/i) {
 653     $CompoundID = "$This->{CompoundIDPrefix}$This->{CompoundNum}";
 654   }
 655   elsif ($This->{CompoundIDMode} =~ /^DataField$/i) {
 656     my($SpecifiedDataField);
 657     $SpecifiedDataField = $This->{CompoundIDFieldLabel};
 658     $CompoundID = exists $This->{DataFieldLabelsAndValues}{$SpecifiedDataField} ?  $This->{DataFieldLabelsAndValues}{$SpecifiedDataField} : '';
 659   }
 660   elsif ($This->{CompoundIDMode} =~ /^MolNameOrLabelPrefix$/i) {
 661     ($MolName) = split "\n", $This->{CompoundString};
 662     $CompoundID = TextUtil::IsNotEmpty($MolName) ? $MolName : "$This->{CompoundIDPrefix}$This->{CompoundNum}";
 663   }
 664   elsif ($This->{CompoundIDMode} =~ /^MolName$/i) {
 665     ($MolName) = split "\n", $This->{CompoundString};
 666     $CompoundID = $MolName;
 667   }
 668 
 669   $This->{CompoundID} = $CompoundID;
 670 
 671   return $This;
 672 }
 673 
 674 # Process first compound data string read...
 675 #
 676 sub _ProcessFirstCompoundDataStringRead {
 677   my($This) = @_;
 678   my($Line, $FileHandle);
 679 
 680   $This->{FirstCompoundDataIO} = 0;
 681 
 682   return $This;
 683 }
 684 
 685 # Get ready for reading fingerprints SD file...
 686 #
 687 sub _PrepareForReadingFingerprintsSDFileData {
 688   my($This) = @_;
 689 
 690   # Retrieve SD file data fields information....
 691   $This->_RetrieveSDFileDataFields();
 692 
 693   # Validate compound and fingerprints field information...
 694   $This->_ValidateReadCompoundIDField();
 695   $This->_ValidateReadFingerprintsField();
 696 
 697   # Validate fingeprints string mode information...
 698   if ($This->{ValidFingerprintsField}) {
 699     $This->_ValidateReadFingerprintsStringMode();
 700   }
 701 
 702   # Set status of SD file data...
 703   $This->{ValidFileData} = ($This->{ValidCompoundIDField} && $This->{ValidFingerprintsField} && $This->{ValidFingerprintsStringMode}) ? 1 : 0;
 704 
 705   return $This;
 706 }
 707 
 708 # Retrieve information data fields and fingerprints string...
 709 #
 710 sub _RetrieveSDFileDataFields {
 711   my($This) = @_;
 712   my($SDFile, $CmpdString, @CmpdLines);
 713 
 714   $SDFile = $This->{Name};
 715 
 716   if (!(-e $SDFile)) {
 717     croak "Error: ${ClassName}->New: Object can't be instantiated: File, $SDFile, doesn't exist...";
 718   }
 719 
 720   if (!open SDFILE, "$SDFile") {
 721     croak "Error: ${ClassName}->New: Object can't be instantiated: Couldn't open input SD file $SDFile: $! ...";
 722   }
 723   $CmpdString = SDFileUtil::ReadCmpdString(\*SDFILE);
 724   close SDFILE;
 725 
 726   # Set up data field labels and values for first compound string data...
 727   @CmpdLines = split "\n", $CmpdString;
 728 
 729   %{$This->{FirstDataFieldLabelsAndValues}} = ();
 730   %{$This->{FirstDataFieldLabelsAndValues}} = SDFileUtil::GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
 731 
 732   return $This;
 733 }
 734 
 735 # Validate compound ID field information...
 736 #
 737 sub _ValidateReadCompoundIDField {
 738   my($This) = @_;
 739   my($SpecifiedDataField);
 740 
 741   $This->{ValidCompoundIDField} = 0;
 742 
 743   if ($This->{CompoundIDMode} =~ /^DataField$/i) {
 744     $SpecifiedDataField = $This->{CompoundIDFieldLabel};
 745     if (! exists $This->{FirstDataFieldLabelsAndValues}{$SpecifiedDataField}) {
 746       carp "Warning: ${ClassName}->_ValidateReadCompoundIDField: Compound ID data field, $SpecifiedDataField, specified using \"CompoundIDField\" in \"DataField\" \"CompoundIDMode\" doesn't exist...";
 747       return 0;
 748     }
 749   }
 750 
 751   $This->{ValidCompoundIDField} = 1;
 752 
 753   return 1;
 754 }
 755 
 756 # Validate fingerprints string field information...
 757 #
 758 sub _ValidateReadFingerprintsField {
 759   my($This) = @_;
 760   my($FingerprintsFieldLabel);
 761 
 762   $This->{ValidFingerprintsField} = 0;
 763 
 764   $FingerprintsFieldLabel = '';
 765 
 766   if ($This->{FingerprintsFieldLabel} !~ /^AutoDetect$/i) {
 767     $FingerprintsFieldLabel = $This->{FingerprintsFieldLabel};
 768     if (! exists $This->{FirstDataFieldLabelsAndValues}{$FingerprintsFieldLabel}) {
 769       carp "Warning: ${ClassName}->_ValidateReadFingerprintsField: Fingerprints data field value, $FingerprintsFieldLabel, specified using \"FingerprintsFieldLabel\" doesn't exist...";
 770       return 0;
 771     }
 772   }
 773   else {
 774     # Make sure default fingerprints field does exist...
 775     my($FingerprintsFieldFound, $DataFieldLabel);
 776     $FingerprintsFieldFound = 0;
 777 
 778     DATAFIELDLABEL: for $DataFieldLabel (keys %{$This->{FirstDataFieldLabelsAndValues}}) {
 779       if ($DataFieldLabel =~ /Fingerprints/i) {
 780         $FingerprintsFieldFound = 1;
 781         $FingerprintsFieldLabel = $DataFieldLabel;
 782         last DATAFIELDLABEL;
 783       }
 784     }
 785     if (!$FingerprintsFieldFound) {
 786       carp "Warning: ${ClassName}->_ValidateReadFingerprintsField: Data field label containing \"Fingerprints\" string in its name doesn't exist...";
 787       return 0;
 788     }
 789   }
 790 
 791   $This->{ValidFingerprintsField} = 1;
 792   $This->{FingerprintsFieldLabel} = $FingerprintsFieldLabel;
 793 
 794   return 1;
 795 }
 796 
 797 # Validate fingerprints string mode information...
 798 #
 799 sub _ValidateReadFingerprintsStringMode {
 800   my($This) = @_;
 801   my($FingerprintsBitVectorStringMode, $FingerprintsVectorStringMode, $FirstFingerprintsStringType, $FirstFingerprintsStringDescription, $FingerprintsFieldLabel, $FingerprintsType, $FingerprintsDescription);
 802 
 803   $This->{ValidFingerprintsStringMode} = 0;
 804 
 805   $This->{FingerprintsBitVectorStringMode} = 0;
 806   $This->{FingerprintsVectorStringMode} = 0;
 807 
 808   $This->{FirstFingerprintsStringType} = '';
 809   $This->{FirstFingerprintsStringDescription} = '';
 810 
 811   $FingerprintsBitVectorStringMode = 0;
 812   $FingerprintsVectorStringMode = 0;
 813 
 814   $FirstFingerprintsStringType = '';
 815   $FirstFingerprintsStringDescription = '';
 816 
 817   $FingerprintsFieldLabel = $This->{FingerprintsFieldLabel};
 818 
 819   ($FingerprintsType, $FingerprintsDescription) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringTypeAndDescription($This->{FirstDataFieldLabelsAndValues}{$FingerprintsFieldLabel});
 820 
 821   if ($This->{FingerprintsStringMode} =~ /^FingerprintsBitVectorString$/i) {
 822     if ($FingerprintsType !~ /^FingerprintsBitVector$/i) {
 823       carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: First fingerprint string data type, $FingerprintsType, doesn't match value, FingerprintsBitVectorString, specified using \"FingerprintsStringMode\"...";
 824       return 0;
 825     }
 826     $FingerprintsBitVectorStringMode = 1;
 827     $FirstFingerprintsStringType = 'FingerprintsBitVector';
 828     $FirstFingerprintsStringDescription = $FingerprintsDescription;
 829   }
 830   elsif ($This->{FingerprintsStringMode} =~ /^FingerprintsVectorString$/i) {
 831     if ($FingerprintsType !~ /^FingerprintsVector$/i) {
 832       carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: First fingerprint string data type, $FingerprintsType, doesn't match value, FingerprintsVectorString, specified using \"FingerprintsStringMode\"...";
 833       return 0;
 834     }
 835     $FingerprintsVectorStringMode = 1;
 836     $FirstFingerprintsStringType = 'FingerprintsVector';
 837     $FirstFingerprintsStringDescription = $FingerprintsDescription;
 838   }
 839   else {
 840     # AutoDetect mode...
 841     if ($FingerprintsType =~ /^FingerprintsBitVector$/i) {
 842       $FingerprintsBitVectorStringMode = 1;
 843     }
 844     elsif ($FingerprintsType =~ /^FingerprintsVector$/i) {
 845       $FingerprintsVectorStringMode = 1;
 846     }
 847     else {
 848       carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: First fingerprint string data type, $FingerprintsType, identified during, AutoDetect, value of \"FingerprintsStringMode\" is not valid; Supported fingerprints types: FingerprintBitVector or FingerprintsVector...";
 849       return 0;
 850     }
 851     $FirstFingerprintsStringType = $FingerprintsType;
 852     $FirstFingerprintsStringDescription = $FingerprintsDescription;
 853   }
 854 
 855   $This->{ValidFingerprintsStringMode} = 1;
 856 
 857   $This->{FingerprintsBitVectorStringMode} = $FingerprintsBitVectorStringMode;
 858   $This->{FingerprintsVectorStringMode} = $FingerprintsVectorStringMode;
 859 
 860   $This->{FirstFingerprintsStringType} = $FirstFingerprintsStringType;
 861   $This->{FirstFingerprintsStringDescription} = $FirstFingerprintsStringDescription;
 862 
 863   return 1;
 864 }
 865 
 866 # Write fingerprints string generated from specified fingerprints, fingerprints-bit vector, or
 867 # fingerprints vector object and other data to SD file...
 868 #
 869 sub WriteFingerprints {
 870   my($This, $FingerprintsObject, $CompoundString) = @_;
 871 
 872   # Initialize data for current compound...
 873   $This->_InitializeWriteCompoundDataString();
 874 
 875   # Set fingerprints object...
 876   $This->{FingerprintsObject} = $FingerprintsObject;
 877 
 878   # Generate fingerprints string...
 879   $This->_GenerateFingerprintsString();
 880 
 881   # Set and update compound string...
 882   $This->{CompoundString} = $CompoundString;
 883   $This->_AddFingerprintsDataToCompoundString();
 884 
 885   # Write it out...
 886   $This->_WriteCompoundDataString();
 887 
 888   return $This;
 889 }
 890 
 891 # Write fingerprints string and other data to SD file...
 892 #
 893 # Note:
 894 #   o FingerprintsStringMode, BitStringFormat, BitsOrder, VectorStringFormat values
 895 #     are ignored during writing of fingerprints and it's written to the file as it is.
 896 #   o CompoundString contains no fingerprints data
 897 #
 898 sub WriteFingerprintsString {
 899   my($This, $FingerprintsString, $CompoundString) = @_;
 900 
 901   # Initialize data for current compound...
 902   $This->_InitializeWriteCompoundDataString();
 903 
 904   # Set fingerprints string...
 905   $This->{FingerprintsString} = $FingerprintsString;
 906 
 907   # Generate fingerprints object...
 908   $This->_GenerateFingerprintsObject();
 909 
 910   # Set and update compound string...
 911   $This->{CompoundString} = $CompoundString;
 912   $This->_AddFingerprintsDataToCompoundString();
 913 
 914   # Write it out...
 915   $This->_WriteCompoundDataString();
 916 
 917   return $This;
 918 }
 919 
 920 # Initialize compound data string for writing...
 921 #
 922 sub _InitializeWriteCompoundDataString {
 923   my($This) = @_;
 924 
 925   $This->{CompoundString} = undef;
 926 
 927   $This->{FingerprintsObject} = undef;
 928   $This->{FingerprintsString} = undef;
 929 
 930   return $This;
 931 }
 932 
 933 # Writi compound data string...
 934 #
 935 sub _WriteCompoundDataString {
 936   my($This) = @_;
 937   my($FileHandle);
 938 
 939   if ($This->{FirstCompoundDataIO}) {
 940     $This->_ProcessFirstCompoundDataStringWrite();
 941   }
 942 
 943   $This->{CompoundNum} += 1;
 944   $FileHandle = $This->{FileHandle};
 945 
 946   print $FileHandle "$This->{CompoundString}\n";
 947 
 948   return $This;
 949 }
 950 
 951 # Process first compound data string write...
 952 #
 953 sub _ProcessFirstCompoundDataStringWrite {
 954   my($This) = @_;
 955   my($Line, $FileHandle);
 956 
 957   $This->{FirstCompoundDataIO} = 0;
 958 
 959   return $This;
 960 }
 961 
 962 # Get ready for writing fingerprints SD file...
 963 #
 964 sub _PrepareForWritingFingerprintsSDFileData {
 965   my($This) = @_;
 966   my($SDFile);
 967 
 968   $SDFile = $This->{Name};
 969   if (!$This->{Overwrite}) {
 970     if (-e $SDFile) {
 971       croak "Error: ${ClassName}->New: Object can't be instantiated: File, $SDFile, already exist. Use overwrite option...";
 972     }
 973   }
 974 
 975   # Setup FingerprintsStringMode status...
 976 
 977   $This->{FingerprintsBitVectorStringMode} = 0;
 978   $This->{FingerprintsVectorStringMode} = 0;
 979   $This->{ValidFingerprintsStringMode} = 0;
 980 
 981   if ($This->{FingerprintsStringMode} =~ /^FingerprintsBitVectorString$/i) {
 982     $This->{FingerprintsBitVectorStringMode} = 1;
 983   }
 984   elsif ($This->{FingerprintsStringMode} =~ /^FingerprintsVectorString$/i) {
 985     $This->{FingerprintsVectorStringMode} = 1;
 986   }
 987 
 988   $This->{ValidFingerprintsStringMode} = ($This->{FingerprintsBitVectorStringMode} || $This->{FingerprintsVectorStringMode}) ? 1 : 0;
 989 
 990   if ($This->{FingerprintsBitVectorStringMode}) {
 991     $This->_SetDefaultBitStringFormat();
 992     $This->_SetDefaultBitsOrder();
 993   }
 994   elsif ($This->{FingerprintsVectorStringMode}) {
 995     $This->_SetDefaultVectorStringFormat();
 996   }
 997 
 998   return $This;
 999 }
1000 
1001 # Set default value for bit string format...
1002 #
1003 sub _SetDefaultBitStringFormat {
1004   my($This) = @_;
1005 
1006   if (!$This->{BitStringFormat}) {
1007     $This->{BitStringFormat} = Fingerprints::FingerprintsStringUtil::GetDefaultBitStringFormat();
1008   }
1009 
1010   return $This;
1011 }
1012 
1013 # Set default value for bit string format...
1014 #
1015 sub _SetDefaultBitsOrder {
1016   my($This) = @_;
1017 
1018   if (!$This->{BitsOrder}) {
1019     $This->{BitsOrder} = Fingerprints::FingerprintsStringUtil::GetDefaultBitsOrder();
1020   }
1021 
1022   return $This;
1023 }
1024 
1025 # Set default value for vector string format...
1026 #
1027 sub _SetDefaultVectorStringFormat {
1028   my($This) = @_;
1029 
1030   if (!$This->{VectorStringFormat} && $This->{FingerprintsObject}) {
1031     $This->{VectorStringFormat} = Fingerprints::FingerprintsStringUtil::GetDefaultVectorStringFormat($This->{FingerprintsObject});
1032   }
1033 
1034   return $This;
1035 }
1036 
1037 # Add fingerprints data to compound string...
1038 #
1039 sub _AddFingerprintsDataToCompoundString {
1040   my($This) = @_;
1041   my($CmpdString);
1042 
1043   # Check and remove existing fingerprints data...
1044   if ($This->_IsFingerprintsDataPresentInCompoundString()) {
1045     carp "Warning: ${ClassName}->_AddFingerprintsDataToCompoundString: The compound string already contains fingerprints data corresponding to fingerprints field label $This->{FingerprintsFieldLabel}; It has been replaced with new fingerprints data...";
1046     $This->{CompoundString} = SDFileUtil::RemoveCmpdDataHeaderLabelAndValue($This->{CompoundString}, $This->{FingerprintsFieldLabel});
1047   }
1048 
1049   $CmpdString = $This->{CompoundString};
1050 
1051   $CmpdString =~ s/\$\$\$\$$//;
1052 
1053   $This->{CompoundString} = "${CmpdString}>  <$This->{FingerprintsFieldLabel}>\n$This->{FingerprintsString}\n\n\$\$\$\$";
1054 
1055   return $This;
1056 }
1057 
1058 # Is fingerprints data already present in compound string?
1059 #
1060 sub _IsFingerprintsDataPresentInCompoundString {
1061   my($This) = @_;
1062   my($FingerprintsFieldLabel);
1063 
1064   if (TextUtil::IsEmpty($This->{CompoundString}) || TextUtil::IsEmpty($This->{FingerprintsFieldLabel})) {
1065     return 0;
1066   }
1067 
1068   $FingerprintsFieldLabel = $This->{FingerprintsFieldLabel};
1069 
1070   return ($This->{CompoundString} =~ /<$FingerprintsFieldLabel>/) ? 1 : 0;
1071 }
1072 
1073 # Generate fingerprints object using current fingerprints string...
1074 #
1075 sub _GenerateFingerprintsObject {
1076   my($This) = @_;
1077 
1078   $This->{FingerprintsObject} = undef;
1079 
1080   if (!$This->{FingerprintsString}) {
1081     return $This;
1082   }
1083 
1084   if ($This->{FingerprintsBitVectorStringMode}) {
1085     $This->{FingerprintsObject} = Fingerprints::FingerprintsStringUtil::ParseFingerprintsBitVectorString($This->{FingerprintsString});
1086   }
1087   elsif ($This->{FingerprintsVectorStringMode}) {
1088     $This->{FingerprintsObject} = Fingerprints::FingerprintsStringUtil::ParseFingerprintsVectorString($This->{FingerprintsString});
1089   }
1090   else {
1091     return undef;
1092   }
1093 
1094   return $This;
1095 }
1096 
1097 # Generate fingerprints string using current fingerprints object...
1098 #
1099 sub _GenerateFingerprintsString {
1100   my($This) = @_;
1101 
1102   $This->{FingerprintsString} = '';
1103 
1104   if (!$This->{FingerprintsObject}) {
1105     return $This;
1106   }
1107 
1108   if ($This->{FingerprintsBitVectorStringMode}) {
1109     $This->{FingerprintsString} = Fingerprints::FingerprintsStringUtil::GenerateFingerprintsString($This->{FingerprintsObject}, $This->{BitStringFormat}, $This->{BitsOrder});
1110   }
1111   elsif ($This->{FingerprintsVectorStringMode}) {
1112     $This->{FingerprintsString} = Fingerprints::FingerprintsStringUtil::GenerateFingerprintsString($This->{FingerprintsObject}, $This->{VectorStringFormat});
1113   }
1114 
1115   return $This;
1116 }
1117 
1118 # Is it a fingerprints file?
1119 sub IsFingerprintsSDFile ($;$) {
1120   my($FirstParameter, $SecondParameter) = @_;
1121   my($This, $FileName, $Status);
1122 
1123   if ((@_ == 2) && (_IsFingerprintsSDFileIO($FirstParameter))) {
1124     ($This, $FileName) = ($FirstParameter, $SecondParameter);
1125   }
1126   else {
1127     $FileName = $FirstParameter;
1128   }
1129 
1130   # Check file extension...
1131   $Status = FileUtil::CheckFileType($FileName, "sdf sd");
1132 
1133   return $Status;
1134 }
1135 
1136 # Is it a FingerprintsSDFileIO object?
1137 sub _IsFingerprintsSDFileIO {
1138   my($Object) = @_;
1139 
1140   return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0;
1141 }
1142