MayaChemTools

   1 package Fingerprints::AtomTypesFingerprints;
   2 #
   3 # $RCSfile: AtomTypesFingerprints.pm,v $
   4 # $Date: 2015/02/28 20:48:54 $
   5 # $Revision: 1.22 $
   6 #
   7 # Author: Manish Sud <msud@san.rr.com>
   8 #
   9 # Copyright (C) 2015 Manish Sud. All rights reserved.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 use strict;
  30 use Carp;
  31 use Exporter;
  32 use Text::ParseWords;
  33 use TextUtil ();
  34 use FileUtil ();
  35 use MathUtil ();
  36 use Fingerprints::Fingerprints;
  37 use Molecule;
  38 use AtomTypes::AtomicInvariantsAtomTypes;
  39 use AtomTypes::DREIDINGAtomTypes;
  40 use AtomTypes::EStateAtomTypes;
  41 use AtomTypes::FunctionalClassAtomTypes;
  42 use AtomTypes::MMFF94AtomTypes;
  43 use AtomTypes::SLogPAtomTypes;
  44 use AtomTypes::SYBYLAtomTypes;
  45 use AtomTypes::TPSAAtomTypes;
  46 use AtomTypes::UFFAtomTypes;
  47 
  48 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  49 
  50 @ISA = qw(Fingerprints::Fingerprints Exporter);
  51 @EXPORT = qw();
  52 @EXPORT_OK = qw();
  53 
  54 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  55 
  56 # Setup class variables...
  57 my($ClassName);
  58 _InitializeClass();
  59 
  60 # Overload Perl functions...
  61 use overload '""' => 'StringifyAtomTypesFingerprints';
  62 
  63 # Class constructor...
  64 sub new {
  65   my($Class, %NamesAndValues) = @_;
  66 
  67   # Initialize object...
  68   my $This = $Class->SUPER::new();
  69   bless $This, ref($Class) || $Class;
  70   $This->_InitializeAtomTypesFingerprints();
  71 
  72   $This->_InitializeAtomTypesFingerprintsProperties(%NamesAndValues);
  73 
  74   return $This;
  75 }
  76 
  77 # Initialize object data...
  78 #
  79 sub _InitializeAtomTypesFingerprints {
  80   my($This) = @_;
  81 
  82   # Type of atom type fingerprint to generate:
  83   #
  84   # AtomTypesCount - A vector containing count of atom types
  85   # AtomTypesBits - A bit vector indicating presence/absence of atom types
  86   #
  87   $This->{Type} = '';
  88 
  89   # AtomTypes to use for generating fingerprints...
  90   #
  91   # Currently supported values are: AtomicInvariantsAtomTypes, DREIDINGAtomTypes,
  92   # EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes,
  93   # SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes
  94   #
  95   $This->{AtomIdentifierType} = '';
  96 
  97   # AtomTypesSetToUse for AtomTypesCount:
  98   #
  99   # ArbitrarySize - Corrresponds to only AtomTypes atom types detected in molecule
 100   # FixedSize - Corresponds to fixed number of atom types previously defined for
 101   #             specific atom types.
 102   #
 103   # The default AtomTypesSetToUse value for AtomTypesCount fingerprints type: ArbitrarySize.
 104   #
 105   # Possible values: ArbitrarySize or FixedSize. However, for AtomTypesBits fingerprints type, only FixedSize
 106   # value is allowed.
 107   #
 108   $This->{AtomTypesSetToUse} = '';
 109 
 110   # By default, hydrogens are ignored during fingerprint generation...
 111   $This->{IgnoreHydrogens} = 1;
 112 
 113   # Assigned AtomTypes atom types...
 114   %{$This->{AtomTypes}} = ();
 115 
 116   # AtomTypes atom types count for generating atom types count and bits fingerprints...
 117   %{$This->{AtomTypesCount}} = ();
 118 }
 119 
 120 # Initialize class ...
 121 sub _InitializeClass {
 122   #Class name...
 123   $ClassName = __PACKAGE__;
 124 }
 125 
 126 # Initialize object properties....
 127 sub _InitializeAtomTypesFingerprintsProperties {
 128   my($This, %NamesAndValues) = @_;
 129 
 130   my($Name, $Value, $MethodName);
 131   while (($Name, $Value) = each  %NamesAndValues) {
 132     $MethodName = "Set${Name}";
 133     $This->$MethodName($Value);
 134   }
 135 
 136   # Make sure molecule object was specified...
 137   if (!exists $NamesAndValues{Molecule}) {
 138     croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule...";
 139   }
 140 
 141   # Make sure type and identifier type were specified...
 142   if (!exists $NamesAndValues{Type}) {
 143     croak "Error: ${ClassName}->New: Object can't be instantiated without specifying type...";
 144   }
 145   if (!exists $NamesAndValues{AtomIdentifierType}) {
 146     croak "Error: ${ClassName}->New: Object can't be instantiated without specifying AtomIdentifierType...";
 147   }
 148 
 149   if ($This->{Type} =~ /^AtomTypesCount$/i) {
 150     $This->_InitializeAtomTypesCount();
 151   }
 152   elsif ($This->{Type} =~ /^AtomTypesBits$/i) {
 153     $This->_InitializeAtomTypesBits();
 154   }
 155   else {
 156     croak "Error: ${ClassName}->_InitializeAtomTypesFingerprintsProperties: Unknown AtomTypes fingerprints type: $This->{Type}; Supported fingerprints types: AtomTypesCount or AtomTypesBits...";
 157   }
 158 
 159   return $This;
 160 }
 161 
 162 # Initialize atom type counts...
 163 #
 164 sub _InitializeAtomTypesCount {
 165   my($This) = @_;
 166 
 167   # Set default AtomTypesSetToUse...
 168   if (!$This->{AtomTypesSetToUse}) {
 169     $This->{AtomTypesSetToUse} = ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) ? 'FixedSize' : 'ArbitrarySize';
 170   }
 171 
 172   # Make sure AtomTypesSetToUse value is okay...
 173   $This->_ValidateAtomTypesSetToUse($This->{AtomTypesSetToUse});
 174 
 175   # Vector type and type of values...
 176   $This->{VectorType} = 'FingerprintsVector';
 177 
 178   if ($This->{AtomTypesSetToUse} =~ /^FixedSize$/i) {
 179     $This->{FingerprintsVectorType} = 'OrderedNumericalValues';
 180   }
 181   else {
 182     $This->{FingerprintsVectorType} = 'NumericalValues';
 183   }
 184 
 185   $This->_InitializeFingerprintsVector();
 186 
 187   return $This;
 188 }
 189 
 190 # Initialize atom types bits...
 191 #
 192 sub _InitializeAtomTypesBits {
 193   my($This) = @_;
 194 
 195   # Set default AtomTypesSetToUse...
 196   $This->{AtomTypesSetToUse} = 'FixedSize';
 197 
 198   # Make sure AtomTypesSetToUse value is okay...
 199   $This->_ValidateAtomTypesSetToUse($This->{AtomTypesSetToUse});
 200 
 201   # Vector type...
 202   $This->{VectorType} = 'FingerprintsBitVector';
 203 
 204   # Vector size...
 205   $This->{Size} = $This->_GetFixedSizeAtomTypesSetSize();
 206 
 207   $This->_InitializeFingerprintsBitVector();
 208 
 209   return $This;
 210 }
 211 
 212 # Set type...
 213 #
 214 sub SetType {
 215   my($This, $Type) = @_;
 216 
 217   if ($This->{Type}) {
 218     croak "Error: ${ClassName}->SetType: Can't change type:  It's already set...";
 219   }
 220 
 221   if ($Type =~ /^AtomTypesCount$/i) {
 222     $This->{Type} = 'AtomTypesCount';;
 223   }
 224   elsif ($Type =~ /^AtomTypesBits$/i) {
 225     $This->{Type} = 'AtomTypesBits';;
 226   }
 227   else {
 228     croak "Error: ${ClassName}->SetType: Unknown AtomTypes fingerprints type: $Type; Supported fingerprints types: AtomTypesCount or AtomTypesBit...";
 229   }
 230   return $This;
 231 }
 232 
 233 # Disable set size method...
 234 #
 235 sub SetSize {
 236   my($This, $Type) = @_;
 237 
 238   croak "Error: ${ClassName}->SetSize: Can't change size:  It's not allowed...";
 239 }
 240 
 241 # Set atom types set to use...
 242 #
 243 sub SetAtomTypesSetToUse {
 244   my($This, $Value) = @_;
 245 
 246   if ($This->{AtomTypesSetToUse}) {
 247     croak "Error: ${ClassName}->SetAtomTypesSetToUse: Can't change size:  It's already set...";
 248   }
 249 
 250   $This->_ValidateAtomTypesSetToUse($Value);
 251 
 252   $This->{AtomTypesSetToUse} = $Value;
 253 
 254   return $This;
 255 }
 256 
 257 # Validate AtomTypesSetToUse value...
 258 #
 259 sub _ValidateAtomTypesSetToUse {
 260   my($This, $Value) = @_;
 261 
 262   if ($Value !~ /^(ArbitrarySize|FixedSize)/i) {
 263     croak "Error: ${ClassName}->_ValidateAtomTypesSetToUse: Unknown AtomTypesSetToUse value: $Value; Supported values: ArbitrarySize or FixedSize";
 264   }
 265 
 266   if ($Value =~ /^ArbitrarySize$/i && $This->{Type} =~ /^AtomTypesBits$/i) {
 267     croak "Error: ${ClassName}->_ValidateAtomTypesSetToUse: Specified AtomTypesSetToUse value, $Value, is not allowed for AtomTypesBits fingerprints...";
 268   }
 269 
 270   if ($Value =~ /^FixedSize$/i && $This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
 271     croak "Error: ${ClassName}->_ValidateAtomTypesSetToUse: Specified AtomTypesSetToUse value, $Value, is not allowed for AtomicInvariantsAtomTypes fingerprints...";
 272   }
 273 
 274   if ($Value =~ /^FixedSize$/i && $This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
 275     croak "Error: ${ClassName}->_ValidateAtomTypesSetToUse: Specified AtomTypesSetToUse value, $Value, is not allowed for FunctionalClassAtomTypes fingerprints...";
 276   }
 277 
 278   if ($Value =~ /^ArbitrarySize$/i && $This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) {
 279     croak "Error: ${ClassName}->_ValidateAtomTypesSetToUse: Specified AtomTypesSetToUse value, $Value, is not allowed for TPSAAtomTypes fingerprints...";
 280   }
 281 
 282   return $This;
 283 }
 284 
 285 # Set atom identifier type...
 286 #
 287 sub SetAtomIdentifierType {
 288   my($This, $IdentifierType) = @_;
 289 
 290   if ($IdentifierType !~ /^(AtomicInvariantsAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|FunctionalClassAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
 291     croak "Error: ${ClassName}->SetAtomIdentifierType: Specified value, $IdentifierType, for AtomIdentifierType is not vaild. Supported types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, and UFFAtomTypes.";
 292   }
 293 
 294   if ($This->{AtomIdentifierType}) {
 295     croak "Error: ${ClassName}->SetAtomIdentifierType: Can't change intial atom identifier type:  It's already set...";
 296   }
 297 
 298   if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i && $This->{AtomTypesSetToUse} =~ /^FixedSize$/i) {
 299     croak "Error: ${ClassName}->SetAtomIdentifierType: Specified AtomTypesSetToUse value, $IdentifierType, is not allowed for AtomicInvariantsAtomTypes fingerprints...";
 300   }
 301 
 302   if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i && $This->{AtomTypesSetToUse} =~ /^FixedSize$/i) {
 303     croak "Error: ${ClassName}->SetAtomIdentifierType: Specified AtomTypesSetToUse value, $IdentifierType, is not allowed for FunctionalClassAtomTypes fingerprints...";
 304   }
 305 
 306   $This->{AtomIdentifierType} = $IdentifierType;
 307 
 308   # Initialize atom identifier type information...
 309   $This->_InitializeAtomIdentifierTypeInformation();
 310 
 311   return $This;
 312 }
 313 
 314 # Generate fingerprints description...
 315 #
 316 sub GetDescription {
 317   my($This) = @_;
 318 
 319   # Is description explicity set?
 320   if (exists $This->{Description}) {
 321     return $This->{Description};
 322   }
 323 
 324   # Generate fingerprints description...
 325 
 326   return "$This->{Type}:$This->{AtomIdentifierType}:$This->{AtomTypesSetToUse}";
 327 }
 328 
 329 # Generate atom types fingerprints...
 330 #
 331 # The current release of MayaChemTools supports generation of two types of AtomTypes
 332 # fingerprints corresponding to non-hydrogen and/or hydrogen atoms:
 333 #
 334 # AtomTypesCount - A vector containing count of  atom types
 335 # AtomTypesBits - A bit vector indicating presence/absence of atom types
 336 #
 337 # For AtomTypesCount fingerprints, two types of atom types set size is allowed:
 338 #
 339 # ArbitrarySize - Corrresponds to only atom types detected in molecule
 340 # FixedSize - Corresponds to fixed number of atom types previously defined
 341 #
 342 # For AtomTypesBits fingeprints, only FixedSize atom type set is allowed.
 343 #
 344 # The fixed size atom type set size used during generation of fingerprints corresponding
 345 # to FixedSize value of AtomTypesSetToUse contains all possible atom types in datafiles
 346 # distributed with MayaChemTools release for each supported type.
 347 #
 348 # Combination of Type and AtomTypesSetToUse allow generation of 21 different types of
 349 # AtomTypes fingerprints:
 350 #
 351 # Type                  AtomIdentifierType           AtomTypesSetToUse
 352 #
 353 # AtomTypesCount        AtomicInvariantsAtomTypes    ArbitrarySize
 354 #
 355 # AtomTypesCount        DREIDINGAtomTypes            ArbitrarySize
 356 # AtomTypesCount        DREIDINGAtomTypes            FixedSize
 357 # AtomTypesBits         DREIDINGAtomTypes            FixedSize
 358 #
 359 # AtomTypesCount        EStateAtomTypes              ArbitrarySize
 360 # AtomTypesCount        EStateAtomTypes              FixedSize
 361 # AtomTypesBits         EStateAtomTypes              FixedSize
 362 #
 363 # AtomTypesCount        FunctionalClassAtomTypes    ArbitrarySize
 364 #
 365 # AtomTypesCount        MMFF94AtomTypes              ArbitrarySize
 366 # AtomTypesCount        MMFF94AtomTypes              FixedSize
 367 # AtomTypesBits         MMFF94AtomTypes              FixedSize
 368 #
 369 # AtomTypesCount        SLogPAtomTypes               ArbitrarySize
 370 # AtomTypesCount        SLogPAtomTypes               FixedSize
 371 # AtomTypesBits         SLogPAtomTypes               FixedSize
 372 #
 373 # AtomTypesCount        SYBYLAtomTypes               ArbitrarySize
 374 # AtomTypesCount        SYBYLAtomTypes               FixedSize
 375 # AtomTypesBits         SYBYLAtomTypes               FixedSize
 376 #
 377 # AtomTypesCount        TPSAAtomTypes                 FixedSize
 378 # AtomTypesBits         TPSAAtomTypes                 FixedSize
 379 #
 380 # AtomTypesCount        UFFAtomTypes                 ArbitrarySize
 381 # AtomTypesCount        UFFAtomTypes                 FixedSize
 382 # AtomTypesBits         UFFAtomTypes                 FixedSize
 383 #
 384 sub GenerateFingerprints {
 385   my($This) = @_;
 386 
 387   # Cache appropriate molecule data...
 388   $This->_SetupMoleculeDataCache();
 389 
 390   # Check and assign appropriate atom types...
 391   if (!$This->_AssignAtomTypes()) {
 392     carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't assign valid $This->{AtomIdentifierType} to all atoms...";
 393     return $This;
 394   }
 395 
 396   # Count atom types...
 397   $This->_CountAtomTypes();
 398 
 399   # Set final fingerprints...
 400   $This->_SetFinalFingerprints();
 401 
 402   # Clear cached molecule data...
 403   $This->_ClearMoleculeDataCache();
 404 
 405   return $This;
 406 }
 407 
 408 # Assign appropriate atom types...
 409 #
 410 sub _AssignAtomTypes {
 411   my($This) = @_;
 412   my($SpecifiedAtomTypes, $Atom, $AtomID);
 413 
 414   %{$This->{AtomTypes}} = ();
 415   $SpecifiedAtomTypes = undef;
 416 
 417   IDENTIFIERTYPE: {
 418     if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
 419       $SpecifiedAtomTypes = new AtomTypes::AtomicInvariantsAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $This->{IgnoreHydrogens}, 'AtomicInvariantsToUse' => $This->{AtomicInvariantsToUse});
 420       last IDENTIFIERTYPE;
 421     }
 422 
 423     if ($This->{AtomIdentifierType} =~ /^DREIDINGAtomTypes$/i) {
 424       $SpecifiedAtomTypes = new AtomTypes::DREIDINGAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $This->{IgnoreHydrogens});
 425       last IDENTIFIERTYPE;
 426     }
 427 
 428     if ($This->{AtomIdentifierType} =~ /^EStateAtomTypes$/i) {
 429       $SpecifiedAtomTypes = new AtomTypes::EStateAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $This->{IgnoreHydrogens});
 430       last IDENTIFIERTYPE;
 431     }
 432 
 433     if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
 434       $SpecifiedAtomTypes = new AtomTypes::FunctionalClassAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $This->{IgnoreHydrogens}, 'FunctionalClassesToUse' => $This->{FunctionalClassesToUse});
 435       last IDENTIFIERTYPE;
 436     }
 437 
 438     if ($This->{AtomIdentifierType} =~ /^MMFF94AtomTypes$/i) {
 439       $SpecifiedAtomTypes = new AtomTypes::MMFF94AtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $This->{IgnoreHydrogens});
 440       last IDENTIFIERTYPE;
 441     }
 442 
 443     if ($This->{AtomIdentifierType} =~ /^SLogPAtomTypes$/i) {
 444       $SpecifiedAtomTypes = new AtomTypes::SLogPAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $This->{IgnoreHydrogens});
 445       last IDENTIFIERTYPE;
 446     }
 447     if ($This->{AtomIdentifierType} =~ /^SYBYLAtomTypes$/i) {
 448       $SpecifiedAtomTypes = new AtomTypes::SYBYLAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $This->{IgnoreHydrogens});
 449       last IDENTIFIERTYPE;
 450     }
 451 
 452     if ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) {
 453       $SpecifiedAtomTypes = new AtomTypes::TPSAAtomTypes('Molecule' => $This->{Molecule}, 'IgnorePhosphorus' => 0, 'IgnoreSulfur' => 0);
 454       last IDENTIFIERTYPE;
 455     }
 456 
 457     if ($This->{AtomIdentifierType} =~ /^UFFAtomTypes$/i) {
 458       $SpecifiedAtomTypes = new AtomTypes::UFFAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $This->{IgnoreHydrogens});
 459       last IDENTIFIERTYPE;
 460     }
 461 
 462     croak "Error: ${ClassName}->_AssignAtomTypes: Unknown atom indentifier type $This->{AtomIdentifierType}...";
 463   }
 464 
 465   # Assign atom types...
 466   $SpecifiedAtomTypes->AssignAtomTypes();
 467 
 468   # Make sure atom types assignment is successful...
 469   if (!$SpecifiedAtomTypes->IsAtomTypesAssignmentSuccessful()) {
 470     return undef;
 471   }
 472 
 473   # Collect assigned atom types...
 474   for $Atom (@{$This->{Atoms}}) {
 475     $AtomID = $Atom->GetID();
 476     $This->{AtomTypes}{$AtomID} = $SpecifiedAtomTypes->GetAtomType($Atom);
 477   }
 478 
 479   return $This;
 480 }
 481 
 482 # Count atom types...
 483 #
 484 sub _CountAtomTypes {
 485   my($This) = @_;
 486   my($Atom, $AtomID, $AtomType);
 487 
 488   %{$This->{AtomTypesCount}} = ();
 489 
 490   for $Atom (@{$This->{Atoms}}) {
 491     $AtomID = $Atom->GetID();
 492     $AtomType = $This->{AtomTypes}{$AtomID};
 493 
 494     if (!exists $This->{AtomTypesCount}{$AtomType}) {
 495       $This->{AtomTypesCount}{$AtomType} = 0;
 496     }
 497 
 498     $This->{AtomTypesCount}{$AtomType} += 1;
 499   }
 500   return $This;
 501 }
 502 
 503 # Set final fingerprints...
 504 #
 505 sub _SetFinalFingerprints {
 506   my($This) = @_;
 507 
 508   # Mark successful generation of fingerprints...
 509   $This->{FingerprintsGenerated} = 1;
 510 
 511   if ($This->{Type} =~ /^AtomTypesCount$/i) {
 512     $This->_SetFinalAtomTypesCountFingerprints();
 513   }
 514   elsif ($This->{Type} =~ /^AtomTypesBits$/i) {
 515     $This->_SetFinalAtomTypesBitsFingerprints();
 516   }
 517   return $This;
 518 }
 519 
 520 # Set final final fingerpritns for atom types count...
 521 #
 522 sub _SetFinalAtomTypesCountFingerprints {
 523   my($This) = @_;
 524   my($AtomType, @Values, @IDs);
 525 
 526   @Values = ();
 527   @IDs = ();
 528 
 529   if ($This->{AtomTypesSetToUse} =~ /^FixedSize$/i) {
 530     for $AtomType (@{$This->_GetFixedSizeAtomTypesSet()}) {
 531       push @IDs, $AtomType;
 532       push @Values, exists($This->{AtomTypesCount}{$AtomType}) ? $This->{AtomTypesCount}{$AtomType} : 0;
 533     }
 534   }
 535   else {
 536     for $AtomType (sort keys %{$This->{AtomTypesCount}}) {
 537       push @IDs, $AtomType;
 538       push @Values, $This->{AtomTypesCount}{$AtomType};
 539     }
 540   }
 541 
 542   # Add IDs and values to fingerprint vector...
 543   if (@IDs) {
 544     $This->{FingerprintsVector}->AddValueIDs(\@IDs);
 545   }
 546   $This->{FingerprintsVector}->AddValues(\@Values);
 547 
 548   return $This;
 549 }
 550 
 551 # Set final final fingerpritns for atom types count bits...
 552 #
 553 sub _SetFinalAtomTypesBitsFingerprints {
 554   my($This) = @_;
 555   my($AtomType, $SkipPosCheck, $AtomTypeNum, $AtomTypeBitIndex);
 556 
 557   $SkipPosCheck = 1;
 558   $AtomTypeNum = 0;
 559 
 560   ATOMTYPE: for $AtomType (@{$This->_GetFixedSizeAtomTypesSet()}) {
 561     $AtomTypeNum++;
 562     if (!(exists($This->{AtomTypesCount}{$AtomType}) && $This->{AtomTypesCount}{$AtomType})) {
 563       next ATOMTYPE;
 564     }
 565     $AtomTypeBitIndex = $AtomTypeNum - 1;
 566     $This->{FingerprintsBitVector}->SetBit($AtomTypeBitIndex, $SkipPosCheck);
 567   }
 568 
 569   return $This;
 570 }
 571 
 572 # Cache  appropriate molecule data...
 573 #
 574 sub _SetupMoleculeDataCache {
 575   my($This) = @_;
 576 
 577   if ($This->{IgnoreHydrogens}) {
 578     # Get all non-hydrogen atoms...
 579     my($NegateAtomCheckMethod);
 580     $NegateAtomCheckMethod = 1;
 581 
 582     @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms("IsHydrogen", $NegateAtomCheckMethod);
 583   }
 584   else {
 585     @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms();
 586   }
 587 
 588   return $This;
 589 }
 590 
 591 # Clear cached molecule data...
 592 #
 593 sub _ClearMoleculeDataCache {
 594   my($This) = @_;
 595 
 596   @{$This->{Atoms}} = ();
 597 
 598   return $This;
 599 }
 600 
 601 # Get fixed size atom types set size...
 602 #
 603 sub _GetFixedSizeAtomTypesSetSize {
 604   my($This) = @_;
 605   my($Size);
 606 
 607   $Size = 0;
 608 
 609   IDENTIFIERTYPE: {
 610     if ($This->{AtomIdentifierType} =~ /^DREIDINGAtomTypes$/i) {
 611       $Size = $This->{IgnoreHydrogens} ? scalar @{AtomTypes::DREIDINGAtomTypes::GetAllPossibleDREIDINGNonHydrogenAtomTypes()} : scalar @{AtomTypes::DREIDINGAtomTypes::GetAllPossibleDREIDINGAtomTypes()};
 612       last IDENTIFIERTYPE;
 613     }
 614 
 615     if ($This->{AtomIdentifierType} =~ /^EStateAtomTypes$/i) {
 616       $Size = $This->{IgnoreHydrogens} ? scalar @{AtomTypes::EStateAtomTypes::GetAllPossibleEStateNonHydrogenAtomTypes()} : scalar @{AtomTypes::EStateAtomTypes::GetAllPossibleEStateAtomTypes()};
 617       last IDENTIFIERTYPE;
 618     }
 619 
 620     if ($This->{AtomIdentifierType} =~ /^MMFF94AtomTypes$/i) {
 621       $Size = $This->{IgnoreHydrogens} ? scalar @{AtomTypes::MMFF94AtomTypes::GetAllPossibleMMFF94NonHydrogenAtomTypes()} : scalar @{AtomTypes::MMFF94AtomTypes::GetAllPossibleMMFF94AtomTypes()};
 622       last IDENTIFIERTYPE;
 623     }
 624 
 625     if ($This->{AtomIdentifierType} =~ /^SLogPAtomTypes$/i) {
 626       $Size = $This->{IgnoreHydrogens} ? scalar @{AtomTypes::SLogPAtomTypes::GetAllPossibleSLogPNonHydrogenAtomTypes()} : scalar @{AtomTypes::SLogPAtomTypes::GetAllPossibleSLogPAtomTypes()};
 627       last IDENTIFIERTYPE;
 628     }
 629 
 630     if ($This->{AtomIdentifierType} =~ /^SYBYLAtomTypes$/i) {
 631       $Size = $This->{IgnoreHydrogens} ? scalar @{AtomTypes::SYBYLAtomTypes::GetAllPossibleSYBYLNonHydrogenAtomTypes()} : scalar @{AtomTypes::SYBYLAtomTypes::GetAllPossibleSYBYLAtomTypes()};
 632       last IDENTIFIERTYPE;
 633     }
 634 
 635     if ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) {
 636       $Size =  scalar @{AtomTypes::TPSAAtomTypes::GetAllPossibleTPSAAtomTypes()};
 637       last IDENTIFIERTYPE;
 638     }
 639 
 640     if ($This->{AtomIdentifierType} =~ /^UFFAtomTypes$/i) {
 641       $Size = $This->{IgnoreHydrogens} ? scalar @{AtomTypes::UFFAtomTypes::GetAllPossibleUFFNonHydrogenAtomTypes()} : scalar @{AtomTypes::UFFAtomTypes::GetAllPossibleUFFAtomTypes()};
 642       last IDENTIFIERTYPE;
 643     }
 644 
 645     croak "Error: ${ClassName}->_GetFixedSizeAtomTypesSetSize: Atom types set size for atom indentifier type, $This->{AtomIdentifierType}, is not available...";
 646   }
 647 
 648   return $Size;
 649 }
 650 
 651 # Get fixed size atom types set...
 652 #
 653 sub _GetFixedSizeAtomTypesSet {
 654   my($This) = @_;
 655   my($AtomTypesRef);
 656 
 657   $AtomTypesRef = undef;
 658 
 659   IDENTIFIERTYPE: {
 660     if ($This->{AtomIdentifierType} =~ /^DREIDINGAtomTypes$/i) {
 661       $AtomTypesRef = $This->{IgnoreHydrogens} ? AtomTypes::DREIDINGAtomTypes::GetAllPossibleDREIDINGNonHydrogenAtomTypes() : AtomTypes::DREIDINGAtomTypes::GetAllPossibleDREIDINGAtomTypes();
 662       last IDENTIFIERTYPE;
 663     }
 664 
 665     if ($This->{AtomIdentifierType} =~ /^EStateAtomTypes$/i) {
 666       $AtomTypesRef = $This->{IgnoreHydrogens} ? AtomTypes::EStateAtomTypes::GetAllPossibleEStateNonHydrogenAtomTypes() : AtomTypes::EStateAtomTypes::GetAllPossibleEStateAtomTypes();
 667       last IDENTIFIERTYPE;
 668     }
 669 
 670     if ($This->{AtomIdentifierType} =~ /^MMFF94AtomTypes$/i) {
 671       $AtomTypesRef = $This->{IgnoreHydrogens} ? AtomTypes::MMFF94AtomTypes::GetAllPossibleMMFF94NonHydrogenAtomTypes() : AtomTypes::MMFF94AtomTypes::GetAllPossibleMMFF94AtomTypes();
 672       last IDENTIFIERTYPE;
 673     }
 674 
 675     if ($This->{AtomIdentifierType} =~ /^SLogPAtomTypes$/i) {
 676       $AtomTypesRef = $This->{IgnoreHydrogens} ? AtomTypes::SLogPAtomTypes::GetAllPossibleSLogPNonHydrogenAtomTypes() : AtomTypes::SLogPAtomTypes::GetAllPossibleSLogPAtomTypes();
 677       last IDENTIFIERTYPE;
 678     }
 679 
 680     if ($This->{AtomIdentifierType} =~ /^SYBYLAtomTypes$/i) {
 681       $AtomTypesRef = $This->{IgnoreHydrogens} ? AtomTypes::SYBYLAtomTypes::GetAllPossibleSYBYLNonHydrogenAtomTypes() : AtomTypes::SYBYLAtomTypes::GetAllPossibleSYBYLAtomTypes();
 682       last IDENTIFIERTYPE;
 683     }
 684 
 685     if ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) {
 686       $AtomTypesRef = AtomTypes::TPSAAtomTypes::GetAllPossibleTPSAAtomTypes();
 687       last IDENTIFIERTYPE;
 688     }
 689 
 690     if ($This->{AtomIdentifierType} =~ /^UFFAtomTypes$/i) {
 691       $AtomTypesRef = $This->{IgnoreHydrogens} ? AtomTypes::UFFAtomTypes::GetAllPossibleUFFNonHydrogenAtomTypes() : AtomTypes::UFFAtomTypes::GetAllPossibleUFFAtomTypes();
 692       last IDENTIFIERTYPE;
 693     }
 694 
 695     croak "Error: ${ClassName}->_GetFixedSizeAtomTypesSet: Atom types set for atom indentifier type, $This->{AtomIdentifierType}, is not available...";
 696   }
 697 
 698   return $AtomTypesRef;
 699 }
 700 
 701 # Initialize atom indentifier type information...
 702 #
 703 # Current supported values:
 704 #
 705 # AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes,
 706 # MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes
 707 #
 708 sub _InitializeAtomIdentifierTypeInformation {
 709   my($This) = @_;
 710 
 711   if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
 712     $This->_InitializeAtomicInvariantsAtomTypesInformation();
 713   }
 714   elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
 715     $This->_InitializeFunctionalClassAtomTypesInformation();
 716   }
 717   elsif ($This->{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
 718     # Nothing to do for now...
 719   }
 720   else {
 721     croak "Error: ${ClassName}->_InitializeAtomIdentifierTypeInformation: Unknown atom indentifier type $This->{AtomIdentifierType}...";
 722   }
 723 
 724   return $This;
 725 }
 726 
 727 # Initialize atomic invariants atom types to use for generating atom IDs in atom pairs...
 728 #
 729 # Let:
 730 #   AS = Atom symbol corresponding to element symbol
 731 #
 732 #   X<n>   = Number of non-hydrogen atom neighbors or heavy atoms attached to atom
 733 #   BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms attached to atom
 734 #   LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms attached to atom
 735 #   SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
 736 #   DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
 737 #   TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
 738 #   H<n>   = Number of implicit and explicit hydrogens for atom
 739 #   Ar     = Aromatic annotation indicating whether atom is aromatic
 740 #   RA     = Ring atom annotation indicating whether atom is a ring
 741 #   FC<+n/-n> = Formal charge assigned to atom
 742 #   MN<n> = Mass number indicating isotope other than most abundant isotope
 743 #   SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or 3 (triplet)
 744 #
 745 #   AtomTypeIDx = Atomic invariants atom type for atom x
 746 #   AtomTypeIDy = Atomic invariants atom type for atom y
 747 #   Dn   = Topological distance between atom x and y
 748 #
 749 # Then:
 750 #
 751 #   AtomID generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to:
 752 #
 753 #     AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n>
 754 #
 755 # Except for AS which is a required atomic invariant atom types AtomIDs, all other atomic invariants are
 756 # optional. Default atomic invariants used for AtomID are: AS, X<n>, BO<n>, H<n>, FC<+n/-n>.
 757 # AtomID specification doesn't include atomic invariants with zero or undefined values.
 758 #
 759 # Examples of  AtomIDs:
 760 #
 761 #   O.X1.BO1.H1 - Hydroxyl oxygen in carboxylate with attached hydrogen and no explicit charge
 762 #   O.X1.BO1.FC-1 - Hydroxyl ozygen in carboxylate with explicit negative charge
 763 #   O.X1.BO2 - Carbonyl oxygen in carboxylate with double bond to carbon
 764 #   O.X2.BO2 - Hydroxyl ozygen in carboxylate attached to carbonyl carbon and another heavy atom
 765 #
 766 #   C.X2.BO3.H1.Ar - Aromatic carbon
 767 #
 768 sub _InitializeAtomicInvariantsAtomTypesInformation {
 769   my($This) = @_;
 770 
 771   # Default atomic invariants to use for generating atom pair atom IDs: AS, X, BO, H, FC
 772   #
 773   @{$This->{AtomicInvariantsToUse}} = ();
 774   @{$This->{AtomicInvariantsToUse}} = ('AS', 'X', 'BO', 'H', 'FC');
 775 
 776   return $This;
 777 }
 778 
 779 # Initialize functional class atom types, generated by AtomTypes::FunctionalClassAtomTypes
 780 # class, to use for generating atom identifiers...
 781 #
 782 # Let:
 783 #   HBD: HydrogenBondDonor
 784 #   HBA: HydrogenBondAcceptor
 785 #   PI :  PositivelyIonizable
 786 #   NI : NegativelyIonizable
 787 #   Ar : Aromatic
 788 #   Hal : Halogen
 789 #   H : Hydrophobic
 790 #   RA : RingAtom
 791 #   CA : ChainAtom
 792 #
 793 # Then:
 794 #
 795 #   Functiononal class atom type specification for an atom corresponds to:
 796 #
 797 #     Ar.CA.H.HBA.HBD.Hal.NI.PI.RA
 798 #
 799 #   Default functional classes used are: HBD, HBA, PI, NI, Ar, Hal
 800 #
 801 #   FunctionalAtomTypes are assigned using the following definitions [ Ref 60-61, Ref 65-66 ]:
 802 #
 803 #     HydrogenBondDonor: NH, NH2, OH
 804 #     HydrogenBondAcceptor: N[!H], O
 805 #     PositivelyIonizable: +, NH2
 806 #     NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH
 807 #
 808 sub _InitializeFunctionalClassAtomTypesInformation {
 809   my($This) = @_;
 810 
 811   # Default functional class atom typess to use for generating atom identifiers
 812   # are: HBD, HBA, PI, NI, Ar, Hal
 813   #
 814   @{$This->{FunctionalClassesToUse}} = ();
 815   @{$This->{FunctionalClassesToUse}} = ('HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal');
 816 
 817   return $This;
 818 }
 819 
 820 # Set atomic invariants to use for atom IDs...
 821 #
 822 sub SetAtomicInvariantsToUse {
 823   my($This, @Values) = @_;
 824   my($FirstValue, $TypeOfFirstValue, $AtomicInvariant, $SpecifiedAtomicInvariant, $AtomicInvariantValue, @SpecifiedAtomicInvariants, @AtomicInvariantsToUse);
 825 
 826   if (!@Values) {
 827     carp "Warning: ${ClassName}->SetAtomicInvariantsToUse: No values specified...";
 828     return;
 829   }
 830 
 831   $FirstValue = $Values[0];
 832   $TypeOfFirstValue = ref $FirstValue;
 833 
 834   @SpecifiedAtomicInvariants = ();
 835   @AtomicInvariantsToUse = ();
 836 
 837   if ($TypeOfFirstValue =~ /^ARRAY/) {
 838     push @SpecifiedAtomicInvariants, @{$FirstValue};
 839   }
 840   else {
 841     push @SpecifiedAtomicInvariants, @Values;
 842   }
 843 
 844   # Make sure specified AtomicInvariants are valid...
 845   for $SpecifiedAtomicInvariant (@SpecifiedAtomicInvariants) {
 846     if (!AtomTypes::AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($SpecifiedAtomicInvariant)) {
 847       croak "Error: ${ClassName}->SetAtomicInvariantsToUse: Specified atomic invariant, $SpecifiedAtomicInvariant, is not supported...\n ";
 848     }
 849     $AtomicInvariant = $SpecifiedAtomicInvariant;
 850     push @AtomicInvariantsToUse, $AtomicInvariant;
 851   }
 852 
 853   # Set atomic invariants to use...
 854   @{$This->{AtomicInvariantsToUse}} = ();
 855   push @{$This->{AtomicInvariantsToUse}}, @AtomicInvariantsToUse;
 856 
 857   return $This;
 858 }
 859 
 860 # Set functional classes to use for generation of intial atom indentifiers...
 861 #
 862 sub SetFunctionalClassesToUse {
 863   my($This, @Values) = @_;
 864   my($FirstValue, $TypeOfFirstValue, $FunctionalClass, $SpecifiedFunctionalClass, @SpecifiedFunctionalClasses, @FunctionalClassesToUse);
 865 
 866   if (!@Values) {
 867     carp "Warning: ${ClassName}->SetFunctionalClassesToUse: No values specified...";
 868     return;
 869   }
 870 
 871   if ($This->{AtomIdentifierType} !~ /^FunctionalClassAtomTypes$/i) {
 872     carp "Warning: ${ClassName}->SetFunctionalClassesToUse: FunctionalClassesToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}...";
 873     return;
 874   }
 875 
 876   $FirstValue = $Values[0];
 877   $TypeOfFirstValue = ref $FirstValue;
 878 
 879   @SpecifiedFunctionalClasses = ();
 880   @FunctionalClassesToUse = ();
 881 
 882   if ($TypeOfFirstValue =~ /^ARRAY/) {
 883     push @SpecifiedFunctionalClasses, @{$FirstValue};
 884   }
 885   else {
 886     push @SpecifiedFunctionalClasses, @Values;
 887   }
 888 
 889   # Make sure specified FunctionalClasses are valid...
 890   for $SpecifiedFunctionalClass (@SpecifiedFunctionalClasses) {
 891     if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedFunctionalClass)) {
 892       croak "Error: ${ClassName}->SetFunctionalClassesToUse: Specified functional class, $SpecifiedFunctionalClass, is not supported...\n ";
 893     }
 894     push @FunctionalClassesToUse, $SpecifiedFunctionalClass;
 895   }
 896 
 897   # Set functional classes to use...
 898   @{$This->{FunctionalClassesToUse}} = ();
 899   push @{$This->{FunctionalClassesToUse}}, @FunctionalClassesToUse;
 900 
 901   return $This;
 902 }
 903 
 904 # Return a string containg data for AtomTypesFingerprints object...
 905 sub StringifyAtomTypesFingerprints {
 906   my($This) = @_;
 907   my($FingerprintsString, $IgnoreHydrogens);
 908 
 909   $FingerprintsString = "Type: $This->{Type}; AtomIdentifierType: $This->{AtomIdentifierType}; AtomTypesSetToUse: $This->{AtomTypesSetToUse}";
 910 
 911   if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
 912     my($AtomicInvariant, @AtomicInvariants, @AtomicInvariantsOrder, %AvailableAtomicInvariants);
 913 
 914     @AtomicInvariantsOrder = AtomTypes::AtomicInvariantsAtomTypes::GetAtomicInvariantsOrder();
 915     %AvailableAtomicInvariants = AtomTypes::AtomicInvariantsAtomTypes::GetAvailableAtomicInvariants();
 916 
 917     for $AtomicInvariant (@AtomicInvariantsOrder) {
 918       push @AtomicInvariants, "$AtomicInvariant: $AvailableAtomicInvariants{$AtomicInvariant}";
 919     }
 920 
 921     $FingerprintsString .= "; AtomicInvariantsToUse: <" . TextUtil::JoinWords(\@{$This->{AtomicInvariantsToUse}}, ", ", 0) . ">";
 922     $FingerprintsString .= "; AtomicInvariantsOrder: <" . TextUtil::JoinWords(\@AtomicInvariantsOrder, ", ", 0) . ">";
 923     $FingerprintsString .= "; AvailableAtomicInvariants: <" . TextUtil::JoinWords(\@AtomicInvariants, ", ", 0) . ">";
 924   }
 925   elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
 926     my($FunctionalClass, @FunctionalClasses, @FunctionalClassesOrder, %AvailableFunctionalClasses);
 927 
 928     @FunctionalClassesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder();
 929     %AvailableFunctionalClasses = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses();
 930 
 931     for $FunctionalClass (@FunctionalClassesOrder) {
 932       push @FunctionalClasses, "$FunctionalClass: $AvailableFunctionalClasses{$FunctionalClass}";
 933     }
 934 
 935     $FingerprintsString .= "; FunctionalClassesToUse: <" . TextUtil::JoinWords(\@{$This->{FunctionalClassesToUse}}, ", ", 0) . ">";
 936     $FingerprintsString .= "; FunctionalClassesOrder: <" . TextUtil::JoinWords(\@FunctionalClassesOrder, ", ", 0) . ">";
 937     $FingerprintsString .= "; AvailableFunctionalClasses: <" . TextUtil::JoinWords(\@FunctionalClasses, ", ", 0) . ">";
 938   }
 939 
 940 
 941   $IgnoreHydrogens = $This->{IgnoreHydrogens} ? "Yes" : "No";
 942   $FingerprintsString .= "; IgnoreHydrogens: $IgnoreHydrogens";
 943 
 944   if ($This->{Type} =~ /^AtomTypesCount$/i) {
 945     $FingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >";
 946   }
 947   elsif ($This->{Type} =~ /^AtomTypesBits$/i) {
 948     $FingerprintsString .= "; FingerprintsBitVector: < $This->{FingerprintsBitVector} >";
 949   }
 950 
 951   return $FingerprintsString;
 952 }
 953