MayaChemTools

   1 package Fingerprints::ExtendedConnectivityFingerprints;
   2 #
   3 # $RCSfile: ExtendedConnectivityFingerprints.pm,v $
   4 # $Date: 2015/02/28 20:48:54 $
   5 # $Revision: 1.39 $
   6 #
   7 # Author: Manish Sud <msud@san.rr.com>
   8 #
   9 # Copyright (C) 2015 Manish Sud. All rights reserved.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 use strict;
  30 use Carp;
  31 use Exporter;
  32 use TextUtil ();
  33 use MathUtil ();
  34 use Fingerprints::Fingerprints;
  35 use Molecule;
  36 use AtomTypes::AtomicInvariantsAtomTypes;
  37 use AtomTypes::FunctionalClassAtomTypes;
  38 use AtomTypes::DREIDINGAtomTypes;
  39 use AtomTypes::EStateAtomTypes;
  40 use AtomTypes::MMFF94AtomTypes;
  41 use AtomTypes::SLogPAtomTypes;
  42 use AtomTypes::SYBYLAtomTypes;
  43 use AtomTypes::TPSAAtomTypes;
  44 use AtomTypes::UFFAtomTypes;
  45 
  46 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  47 
  48 @ISA = qw(Fingerprints::Fingerprints Exporter);
  49 @EXPORT = qw();
  50 @EXPORT_OK = qw();
  51 
  52 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  53 
  54 # Setup class variables...
  55 my($ClassName);
  56 _InitializeClass();
  57 
  58 # Overload Perl functions...
  59 use overload '""' => 'StringifyExtendedConnectivityFingerprints';
  60 
  61 # Class constructor...
  62 sub new {
  63   my($Class, %NamesAndValues) = @_;
  64 
  65   # Initialize object...
  66   my $This = $Class->SUPER::new();
  67   bless $This, ref($Class) || $Class;
  68   $This->_InitializeExtendedConnectivityFingerprints();
  69 
  70   $This->_InitializeExtendedConnectivityFingerprintsProperties(%NamesAndValues);
  71 
  72   return $This;
  73 }
  74 
  75 # Initialize object data...
  76 #
  77 sub _InitializeExtendedConnectivityFingerprints {
  78   my($This) = @_;
  79 
  80   # Type of fingerprint to generate:
  81   #
  82   # ExtendedConnectivity - Set of integer identifiers corresponding to structurally unique features
  83   # ExtendedConnectivityCount - Set of integer identifiers corresponding to structurally unique features and their count
  84   # ExtendedConnectivityBits - A bit vector indicating presence/absence of structurally unique features
  85   #
  86   $This->{Type} = 'ExtendedConnectivity';
  87 
  88   # Atomic neighborhoods radius for extended connectivity...
  89   $This->{NeighborhoodRadius} = 2;
  90 
  91   # Size of bit bector to use during generation of ExtendedConnectivityBits fingerprints...
  92   $This->{Size} = 1024;
  93 
  94   # Min and max size of bit bector to use during generation of ExtendedConnectivityBits fingerprints...
  95   $This->{MinSize} = 32;
  96   $This->{MaxSize} = 2**32;
  97 
  98   # Type of atom attributes to use for initial identifier assignment to non-hydrogen atoms
  99   # during the calculation of extended connectivity fingerprints [ Ref 48, Ref 52 ]...
 100   #
 101   # Currently supported values are: AtomicInvariantsAtomTypes, FunctionalClassAtomTypes,
 102   # DREIDINGAtomTypes, EStateAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes,
 103   # TPSAAtomTypes, UFFAtomTypes
 104   #
 105   $This->{AtomIdentifierType} = '';
 106 
 107   # Random number generator to use during generation of fingerprints bit-vector
 108   # string: Perl CORE::rand or MayaChemTools MathUtil::random function.
 109   #
 110   # The random number generator implemented in MayaChemTools is a variant of
 111   # linear congruential generator (LCG) as described by Miller et al. [ Ref 120 ].
 112   # It is also referred to as Lehmer random number generator or Park-Miller
 113   # random number generator.
 114   #
 115   # Unlike Perl's core random number generator function rand, the random number
 116   # generator implemented in MayaChemTools, MathUtil::random,  generates consistent
 117   # random values across different platformsfor a specific random seed and leads
 118   # to generation of portable fingerprints bit-vector strings.
 119   #
 120   $This->{UsePerlCoreRandom} = 1;
 121 
 122   # Atom neighorhoods up to specified neighborhood radius...
 123   %{$This->{AtomNeighborhoods}} = ();
 124 
 125   # Atom identifiers at different neighborhoods up to specified neighborhood radius...
 126   %{$This->{AtomIdentifiers}} = ();
 127 
 128   # Structurally unique atom identifiers at different neighborhoods up to specified neighborhood radius...
 129   %{$This->{UniqueAtomIdentifiers}} = ();
 130   %{$This->{UniqueAtomIdentifiersCount}} = ();
 131 
 132   # Unique atom identifiers at different neighborhoods up to specified neighborhood radius...
 133   %{$This->{StructurallyUniqueAtomIdentifiers}} = ();
 134   %{$This->{StructurallyUniqueAtomIdentifiersCount}} = ();
 135 
 136   # Structure feature  information at different neighborhoods up to specified neighborhood
 137   # radius used during removal of atom indentifiers which are structually equivalent...
 138   %{$This->{StructureFeatures}} = ();
 139 }
 140 
 141 # Initialize class ...
 142 sub _InitializeClass {
 143   #Class name...
 144   $ClassName = __PACKAGE__;
 145 }
 146 
 147 # Initialize object properties....
 148 sub _InitializeExtendedConnectivityFingerprintsProperties {
 149   my($This, %NamesAndValues) = @_;
 150 
 151   my($Name, $Value, $MethodName);
 152   while (($Name, $Value) = each  %NamesAndValues) {
 153     $MethodName = "Set${Name}";
 154     $This->$MethodName($Value);
 155   }
 156 
 157   # Make sure molecule object was specified...
 158   if (!exists $NamesAndValues{Molecule}) {
 159     croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule...";
 160   }
 161 
 162   # Make sure AtomIdentifierType was specified...
 163   if (!exists $NamesAndValues{AtomIdentifierType}) {
 164     croak "Error: ${ClassName}->New: Object can't be instantiated without specifying AtomIdentifierType...";
 165   }
 166 
 167   # Make sure it's power of 2...
 168   if (exists $NamesAndValues{Size}) {
 169     if (!TextUtil::IsNumberPowerOfNumber($NamesAndValues{Size}, 2)) {
 170       croak "Error: ${ClassName}->New: Specified size value, $NamesAndValues{Size}, must be power of 2...";
 171     }
 172   }
 173 
 174   if ($This->{Type} =~ /^ExtendedConnectivity$/i) {
 175     $This->_InitializeExtendedConnectivityFingerprintsVector();
 176   }
 177   elsif ($This->{Type} =~ /^ExtendedConnectivityCount$/i) {
 178     $This->_InitializeExtendedConnectivityCountFingerprintsVector();
 179   }
 180   elsif ($This->{Type} =~ /^ExtendedConnectivityBits$/i) {
 181     $This->_InitializeExtendedConnectivityBitsFingerprintsBitVector();
 182   }
 183   else {
 184     croak "Error: ${ClassName}->_InitializeExtendedConnectivityFingerprintsProperties: Unknown ExtendedConnectivity fingerprints type: $This->{Type}; Supported fingerprints types: ExtendedConnectivity, ExtendedConnectivityCount or ExtendedConnectivityBits...";
 185   }
 186 
 187   return $This;
 188 }
 189 
 190 # Initialize extended connectivity fingerprints vector...
 191 #
 192 sub _InitializeExtendedConnectivityFingerprintsVector {
 193   my($This) = @_;
 194 
 195   # Type of vector...
 196   $This->{VectorType} = 'FingerprintsVector';
 197 
 198   # Type of FingerprintsVector...
 199   $This->{FingerprintsVectorType} = 'AlphaNumericalValues';
 200 
 201   $This->_InitializeFingerprintsVector();
 202 
 203   return $This;
 204 }
 205 
 206 # Initialize extended connectivity count fingerprints vector...
 207 #
 208 sub _InitializeExtendedConnectivityCountFingerprintsVector {
 209   my($This) = @_;
 210 
 211   # Type of vector...
 212   $This->{VectorType} = 'FingerprintsVector';
 213 
 214   # Type of FingerprintsVector...
 215   $This->{FingerprintsVectorType} = 'NumericalValues';
 216 
 217   $This->_InitializeFingerprintsVector();
 218 
 219   return $This;
 220 }
 221 
 222 # Initialize extended connectivity bit fingerprints vector...
 223 #
 224 sub _InitializeExtendedConnectivityBitsFingerprintsBitVector {
 225   my($This) = @_;
 226 
 227   # Type of vector...
 228   $This->{VectorType} = 'FingerprintsBitVector';
 229 
 230   $This->_InitializeFingerprintsBitVector();
 231 
 232   return $This;
 233 }
 234 
 235 # Set type...
 236 #
 237 sub SetType {
 238   my($This, $Type) = @_;
 239 
 240   if ($Type =~ /^ExtendedConnectivity$/i) {
 241     $This->{Type} = 'ExtendedConnectivity';;
 242   }
 243   elsif ($Type =~ /^ExtendedConnectivityCount$/i) {
 244     $This->{Type} = 'ExtendedConnectivityCount';;
 245   }
 246   elsif ($Type =~ /^ExtendedConnectivityBits$/i) {
 247     $This->{Type} = 'ExtendedConnectivityBits';;
 248   }
 249   else {
 250     croak "Error: ${ClassName}->SetType: Unknown ExtendedConnectivity fingerprints type: $This->{Type}; Supported fingerprints types: ExtendedConnectivity, ExtendedConnectivityCount or ExtendedConnectivityBits...";
 251   }
 252   return $This;
 253 }
 254 
 255 # Disable vector type change...
 256 #
 257 sub SetVectorType {
 258   my($This, $Type) = @_;
 259 
 260   croak "Error: ${ClassName}->SetVectorType: Can't change vector type...";
 261 
 262   return $This;
 263 }
 264 
 265 # Disable vector type change...
 266 #
 267 sub SetFingerprintsVectorType {
 268   my($This, $Type) = @_;
 269 
 270   croak "Error: ${ClassName}->SetFingerprintsVectorType: Can't change fingerprints vector type...";
 271 
 272   return $This;
 273 }
 274 
 275 # Set intial atom identifier type..
 276 #
 277 sub SetAtomIdentifierType {
 278   my($This, $IdentifierType) = @_;
 279 
 280   if ($IdentifierType !~ /^(AtomicInvariantsAtomTypes|FunctionalClassAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
 281     croak "Error: ${ClassName}->SetAtomIdentifierType: Specified value, $IdentifierType, for AtomIdentifierType is not vaild. Supported types in current release of MayaChemTools: AtomicInvariantsAtomTypes, FunctionalClassAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes and UFFAtomTypes.";
 282   }
 283 
 284   if ($This->{AtomIdentifierType}) {
 285     croak "Error: ${ClassName}->SetAtomIdentifierType: Can't change intial atom identifier type:  It's already set...";
 286   }
 287 
 288   $This->{AtomIdentifierType} = $IdentifierType;
 289 
 290   # Initialize identifier type information...
 291   $This->_InitializeAtomIdentifierTypeInformation();
 292 
 293   return $This;
 294 }
 295 
 296 # Set atom neighborhood radius...
 297 #
 298 sub SetNeighborhoodRadius {
 299   my($This, $Value) = @_;
 300 
 301   if (!TextUtil::IsInteger($Value)) {
 302     croak "Error: ${ClassName}->SetNeighborhoodRadius: NeighborhoodRadius value, $Value, is not valid:  It must be an  integer...";
 303   }
 304 
 305   if ($Value < 0 ) {
 306     croak "Error: ${ClassName}->SetNeighborhoodRadius: NeighborhoodRadius value, $Value, is not valid:  It must be >= 0...";
 307   }
 308   $This->{NeighborhoodRadius} = $Value;
 309 
 310   return $This;
 311 }
 312 
 313 # Generate fingerprints description...
 314 #
 315 sub GetDescription {
 316   my($This) = @_;
 317 
 318   # Is description explicity set?
 319   if (exists $This->{Description}) {
 320     return $This->{Description};
 321   }
 322 
 323   # Generate fingerprints description...
 324 
 325   return "$This->{Type}:$This->{AtomIdentifierType}:Radius$This->{NeighborhoodRadius}";
 326 }
 327 
 328 # Generate fingerprints...
 329 #
 330 # Methodology:
 331 #   . Assign initial atom identfiers to all non-hydrogen atoms in the molecule
 332 #
 333 #   . Remove duplicates from the initial identifiers and add them to list corresponding
 334 #     to molecule fingerprint
 335 #
 336 #   . For NeighborhoodRadius value of 0, just return the molecule fingerprint list
 337 #
 338 #   . For each NeighborhoodRadius level
 339 #      . For each non-hydrogen CentralAtom at this NeighborhoodRadius level
 340 #         . For each non-hydrogen SuccessorNeighborAtom
 341 #           . Collect (BondOrder AtomIdentifier) pair of values corresponding to
 342 #             (CentralAtom SuccessorNeighborAtom)  and add it to a list
 343 #
 344 #         . Sort list containing (BondOrder AtomIdentifier) pairs first by BondOrder followed
 345 #            by AtomIdendifiers to make these values graph invariant
 346 #         . Generate a hash code for the values in the list
 347 #         . Assign hash code as new atom identifier at the current NeighborhoodRadius level
 348 #         . Save all atoms and bonds corresponding to the substructure involved in
 349 #           generating the hash code to be used for identifying structural duplicate hash code
 350 #
 351 #         . Add the new identifier to the molecule fingerprint list making sure it's not a duplicate
 352 #           identifier
 353 #
 354 #   Hash code atom identifier deduplication:
 355 #     . Track/remove the identifier generated at higher neighborhood radius level
 356 #
 357 #  Structural atom identifier deduplication:
 358 #    . For equivalent atoms and bonds corresponding to substructure at a NeighborhoodRadius level,
 359 #      track/remove the atom identifier with largest value
 360 #
 361 #
 362 sub GenerateFingerprints {
 363   my($This) = @_;
 364 
 365   # Cache appropriate molecule data...
 366   $This->_SetupMoleculeDataCache();
 367 
 368   # Assign intial atom identifers...
 369   if (!$This->_AssignInitialAtomIdentifiers()) {
 370     carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't assign valid $This->{AtomIdentifierType} to all atoms...";
 371     return $This;
 372   }
 373 
 374   # Identify atom neighborhoods up to specified radius...
 375   $This->_GetAtomNeighborhoods();
 376 
 377   # Assign atom identifiers to central atoms considering atom neighborhoods at each
 378   # radius level...
 379   $This->_AssignAtomIdentifiersToAtomNeighborhoods();
 380 
 381   # Remove duplicates identifiers...
 382   $This->_RemoveDuplicateAtomIdentifiers();
 383 
 384   # Set final fingerprints...
 385   $This->_SetFinalFingerprints();
 386 
 387   # Clear cached molecule data...
 388   $This->_ClearMoleculeDataCache();
 389 
 390   return $This;
 391 }
 392 
 393 # Assign appropriate initial atom identifiers...
 394 #
 395 #   Generation of initial identifier for a specific atom involves:
 396 #     . Values of the specified atom attributes are appended in a specific order to
 397 #       generate an initial atom identifier string
 398 #     . A 32 bit unsigned integer hash key, using TextUtil::HashCode function,  is
 399 #       generated for the atom indentifier and assigned to the atom as initial
 400 #       atom identifier.
 401 #
 402 sub _AssignInitialAtomIdentifiers {
 403   my($This) = @_;
 404   my($Atom, $AtomID, $Radius, $SpecifiedAtomTypes, $IgnoreHydrogens, $AtomType, $InitialAtomTypeString, $InitialAtomIdentifier);
 405 
 406   # Initialize atom identifiers...
 407   $This->_InitializeAtomIdentifiers();
 408 
 409   # Set up atom types...
 410   $IgnoreHydrogens = 1;
 411   $SpecifiedAtomTypes = undef;
 412 
 413   IDENTIFIERTYPE: {
 414     if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
 415       $SpecifiedAtomTypes = new AtomTypes::AtomicInvariantsAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'AtomicInvariantsToUse' => $This->{AtomicInvariantsToUse});
 416       last IDENTIFIERTYPE;
 417     }
 418 
 419     if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
 420       $SpecifiedAtomTypes = new AtomTypes::FunctionalClassAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'FunctionalClassesToUse' => $This->{FunctionalClassesToUse});
 421       last IDENTIFIERTYPE;
 422     }
 423 
 424     if ($This->{AtomIdentifierType} =~ /^DREIDINGAtomTypes$/i) {
 425       $SpecifiedAtomTypes = new AtomTypes::DREIDINGAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 426       last IDENTIFIERTYPE;
 427     }
 428 
 429     if ($This->{AtomIdentifierType} =~ /^EStateAtomTypes$/i) {
 430       $SpecifiedAtomTypes = new AtomTypes::EStateAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 431       last IDENTIFIERTYPE;
 432     }
 433 
 434     if ($This->{AtomIdentifierType} =~ /^MMFF94AtomTypes$/i) {
 435       $SpecifiedAtomTypes = new AtomTypes::MMFF94AtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 436       last IDENTIFIERTYPE;
 437     }
 438 
 439     if ($This->{AtomIdentifierType} =~ /^SLogPAtomTypes$/i) {
 440       $SpecifiedAtomTypes = new AtomTypes::SLogPAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 441       last IDENTIFIERTYPE;
 442     }
 443 
 444     if ($This->{AtomIdentifierType} =~ /^SYBYLAtomTypes$/i) {
 445       $SpecifiedAtomTypes = new AtomTypes::SYBYLAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 446       last IDENTIFIERTYPE;
 447     }
 448 
 449     if ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) {
 450       $SpecifiedAtomTypes = new AtomTypes::TPSAAtomTypes('Molecule' => $This->{Molecule}, 'IgnorePhosphorus' => 0, 'IgnoreSulfur' => 0);
 451       last IDENTIFIERTYPE;
 452     }
 453 
 454     if ($This->{AtomIdentifierType} =~ /^UFFAtomTypes$/i) {
 455       $SpecifiedAtomTypes = new AtomTypes::UFFAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 456       last IDENTIFIERTYPE;
 457     }
 458 
 459     croak "Error: ${ClassName}->_AssignInitialAtomIdentifiers: Couldn't assign intial atom identifiers: InitialAtomIdentifierType $This->{AtomIdentifierType} is not supported...";
 460   }
 461 
 462   # Assign atom types...
 463   $SpecifiedAtomTypes->AssignAtomTypes();
 464 
 465   # Make sure atom types assignment is successful...
 466   if (!$SpecifiedAtomTypes->IsAtomTypesAssignmentSuccessful()) {
 467     return undef;
 468   }
 469 
 470   # Assign atom identifiers at radius 0...
 471   $Radius = 0;
 472   for $Atom (@{$This->{Atoms}}) {
 473     $AtomID = $Atom->GetID();
 474 
 475     $AtomType = $SpecifiedAtomTypes->GetAtomType($Atom);
 476     $InitialAtomTypeString = $AtomType ? $AtomType : 'None';
 477 
 478     $InitialAtomIdentifier = TextUtil::HashCode($InitialAtomTypeString);
 479     $This->{AtomIdentifiers}{$Radius}{$AtomID} = $InitialAtomIdentifier;
 480   }
 481 
 482   return $This;
 483 }
 484 
 485 # Initialize atom identifiers...
 486 #
 487 sub _InitializeAtomIdentifiers {
 488   my($This) = @_;
 489   my($Radius, $CurrentRadius);
 490 
 491   $Radius = $This->{NeighborhoodRadius};
 492 
 493   %{$This->{AtomIdentifiers}} = ();
 494   for $CurrentRadius (0 .. $Radius) {
 495     # Atom idenfiers key and value correspond to AtomID and AtomIdentifier
 496     %{$This->{AtomIdentifiers}{$CurrentRadius}} = ();
 497 
 498     # Unique and strcuturally unique idenfiers key and value correspond to AtomIdentifier and AtomID
 499     %{$This->{UniqueAtomIdentifiers}{$CurrentRadius}} = ();
 500     %{$This->{UniqueAtomIdentifiersCount}{$CurrentRadius}} = ();
 501 
 502     %{$This->{StructurallyUniqueAtomIdentifiers}{$CurrentRadius}} = ();
 503     %{$This->{StructurallyUniqueAtomIdentifiersCount}{$CurrentRadius}} = ();
 504   }
 505 
 506 }
 507 
 508 # Collect atom neighborhoods upto specified neighborhood radius...
 509 #
 510 sub _GetAtomNeighborhoods {
 511   my($This) = @_;
 512   my($Atom, $AtomID, $Radius, $CurrentRadius, $Molecule);
 513 
 514   %{$This->{AtomNeighborhoods}} = ();
 515 
 516   $Radius = $This->{NeighborhoodRadius};
 517   if ($Radius < 1) {
 518     # At radius level 0, it's just the atoms...
 519     return;
 520   }
 521 
 522   # Initialize neighborhood at different radii...
 523   for $CurrentRadius (0 .. $Radius) {
 524     %{$This->{AtomNeighborhoods}{$CurrentRadius}} = ();
 525   }
 526 
 527   $Molecule = $This->GetMolecule();
 528 
 529   # Collect available atom neighborhoods at different at different neighborhood level for each atom...
 530   my($AtomsNeighborhoodWithSuccessorAtomsRef);
 531 
 532   for $Atom (@{$This->{Atoms}}) {
 533     $AtomID = $Atom->GetID();
 534     $CurrentRadius = 0;
 535     for $AtomsNeighborhoodWithSuccessorAtomsRef ($Molecule->GetAtomNeighborhoodsWithSuccessorAtomsAndRadiusUpto($Atom, $Radius)) {
 536       $This->{AtomNeighborhoods}{$CurrentRadius}{$AtomID} = $AtomsNeighborhoodWithSuccessorAtomsRef;
 537       $CurrentRadius++;
 538     }
 539   }
 540   return $This;
 541 }
 542 
 543 # Assign atom identifiers to central atom at each neighborhood radius level...
 544 #
 545 sub _AssignAtomIdentifiersToAtomNeighborhoods {
 546   my($This) = @_;
 547   my($Radius, $NextRadius, $Atom, $AtomID, $NeighborhoodAtom, $SuccessorAtom, $SuccessorAtomID, $NeighborhoodAtomSuccessorAtomsRef, $NeighborhoodAtomsWithSuccessorAtomsRef, $Bond, $BondOrder, $SuccessorAtomCount);
 548 
 549   if ($This->{NeighborhoodRadius} < 1) {
 550     return;
 551   }
 552 
 553   # Go over the atom neighborhoods at each radius upto specified radius and assign atom
 554   # indentifiers using their connected successor atoms and their identifiers.
 555   #
 556   # For a neighborhood atom at a specified radius, the successor connected atoms correpond
 557   # to next radius level and the last set of neighorhood atoms don't have any successor connected
 558   # atoms. Additionally, radius level 0 just correspond to initial atom identifiers.
 559   #
 560   # So in order to process atom neighborhood upto specified radius level, the last atom neighborhood
 561   # doesn't need to be processed: it gets processed at previous radius level as successor connected
 562   # atoms.
 563   #
 564   RADIUS: for $Radius (0 .. ($This->{NeighborhoodRadius} - 1)) {
 565     ATOM: for $Atom (@{$This->{Atoms}}) {
 566       $AtomID = $Atom->GetID();
 567 
 568       # Are there any available atom neighborhoods at this radius?
 569       if (!exists $This->{AtomNeighborhoods}{$Radius}{$AtomID}) {
 570         next ATOM;
 571       }
 572       $NextRadius = $Radius + 1;
 573 
 574       # Go over neighborhood atoms and their successor connected atoms at this radius and collect
 575       # (BondOrder AtomIdentifier) values for bonded atom pairs. Additionally, keep track of atom and bonds
 576       # for the neighorhoods to remove identifieres generated from structurally duplicate features.
 577       #
 578       my(%BondOrdersAndAtomIdentifiers);
 579 
 580       %BondOrdersAndAtomIdentifiers = ();
 581       $SuccessorAtomCount = 0;
 582 
 583       NEIGHBORHOODS: for $NeighborhoodAtomsWithSuccessorAtomsRef (@{$This->{AtomNeighborhoods}{$Radius}{$AtomID}}) {
 584         ($NeighborhoodAtom, $NeighborhoodAtomSuccessorAtomsRef) = @{$NeighborhoodAtomsWithSuccessorAtomsRef};
 585 
 586         # Any connected successors for the NeighborhoodAtom?
 587         if (!@{$NeighborhoodAtomSuccessorAtomsRef}) {
 588           next NEIGHBORHOODS;
 589         }
 590         SUCCESSORATOM: for $SuccessorAtom (@{$NeighborhoodAtomSuccessorAtomsRef}) {
 591           if ($SuccessorAtom->IsHydrogen()) {
 592             # Skip successor hydrogen atom...
 593             next SUCCESSORATOM;
 594           }
 595           $SuccessorAtomID = $SuccessorAtom->GetID();
 596           $SuccessorAtomCount++;
 597 
 598           $Bond = $NeighborhoodAtom->GetBondToAtom($SuccessorAtom);
 599           $BondOrder = $Bond->IsAromatic() ? "1.5" : $Bond->GetBondOrder();
 600 
 601           if (!exists $BondOrdersAndAtomIdentifiers{$BondOrder}) {
 602             @{$BondOrdersAndAtomIdentifiers{$BondOrder}} = ();
 603           }
 604           push @{$BondOrdersAndAtomIdentifiers{$BondOrder}}, $This->{AtomIdentifiers}{$Radius}{$SuccessorAtomID};
 605         }
 606       }
 607       if (!$SuccessorAtomCount) {
 608         next ATOM;
 609       }
 610       # Assign a new atom identifier at the NextRadius level...
 611       $This->_AssignAtomIdentifierToAtomNeighborhood($AtomID, $Radius, \%BondOrdersAndAtomIdentifiers);
 612     }
 613  }
 614   return $This;
 615 }
 616 
 617 # Generate and assign atom indentifier for AtomID using atom neighborhood at next radius level...
 618 #
 619 sub _AssignAtomIdentifierToAtomNeighborhood {
 620   my($This, $AtomID, $Radius, $BondOrdersAndAtomIdentifiersRef) = @_;
 621   my($NextRadius, $AtomIdentifier,  $SuccessorAtomIdentifier, $BondOrder, $AtomIdentifierString, @AtomIndentifiersInfo);
 622 
 623   $NextRadius = $Radius + 1;
 624 
 625   @AtomIndentifiersInfo = ();
 626 
 627   $AtomIdentifier = $This->{AtomIdentifiers}{$Radius}{$AtomID};
 628   push @AtomIndentifiersInfo, ($NextRadius, $AtomIdentifier);
 629 
 630   # Sort out successor atom bond order and identifier pairs by bond order followed by atom identifiers
 631   # in order to make the final atom identifier graph invariant...
 632   #
 633   for $BondOrder (sort { $a <=> $b } keys %{$BondOrdersAndAtomIdentifiersRef}) {
 634     for $SuccessorAtomIdentifier (sort { $a <=> $b } @{$BondOrdersAndAtomIdentifiersRef->{$BondOrder}}) {
 635       push @AtomIndentifiersInfo, ($BondOrder, $SuccessorAtomIdentifier);
 636     }
 637   }
 638   $AtomIdentifierString = join("", @AtomIndentifiersInfo);
 639   $AtomIdentifier = TextUtil::HashCode($AtomIdentifierString);
 640 
 641   # Assign atom identifier to the atom at next radius level...
 642   $This->{AtomIdentifiers}{$NextRadius}{$AtomID} = $AtomIdentifier;
 643 
 644   return $This;
 645 }
 646 
 647 # Remove duplicates atom identifiers...
 648 #
 649 sub _RemoveDuplicateAtomIdentifiers {
 650   my($This) = @_;
 651 
 652   $This->_RemoveDuplicateIdentifiersByValue();
 653   $This->_RemoveStructurallyDuplicateIdenfiers();
 654 
 655   return $This;
 656 }
 657 
 658 # Remove duplicate identifiers at each radius level by just using their value...
 659 #
 660 sub _RemoveDuplicateIdentifiersByValue {
 661   my($This) = @_;
 662   my($Radius, $Atom, $AtomID, $AtomIdentifier);
 663 
 664   for $Radius (0 .. $This->{NeighborhoodRadius}) {
 665     ATOM: for $Atom (@{$This->{Atoms}}) {
 666       $AtomID = $Atom->GetID();
 667       if (!exists $This->{AtomIdentifiers}{$Radius}{$AtomID}) {
 668         next ATOM;
 669       }
 670       $AtomIdentifier = $This->{AtomIdentifiers}{$Radius}{$AtomID};
 671       if (exists $This->{UniqueAtomIdentifiers}{$Radius}{$AtomIdentifier}) {
 672         # It's a duplicate atom idenfier at this radius level...
 673         $This->{UniqueAtomIdentifiersCount}{$Radius}{$AtomIdentifier} += 1;
 674         next ATOM;
 675       }
 676       $This->{UniqueAtomIdentifiers}{$Radius}{$AtomIdentifier} = $AtomID;
 677       $This->{UniqueAtomIdentifiersCount}{$Radius}{$AtomIdentifier} = 1;
 678     }
 679   }
 680   return $This;
 681 }
 682 
 683 # Remove structurally duplicate identifiers at each radius level...
 684 #
 685 # Methodology:
 686 #   . For unquie atom identifiers at each radius level, assign complete structure features
 687 #     in terms all the bonds involved to generate that identifier
 688 #   . Use the complete structure features to remover atom identifiers which are
 689 #     structurally equivalent which can also be at earlier radii levels
 690 #
 691 #
 692 sub _RemoveStructurallyDuplicateIdenfiers {
 693   my($This) = @_;
 694   my($Radius, $AtomID, $AtomIdentifier, $SimilarAtomIdentifierRadius, $SimilarAtomIdentifier);
 695 
 696   # Setup structure features...
 697   $This->_SetupStructureFeaturesForAtomIDsInvolvedInUniqueIdentifiers();
 698 
 699   # Identify structurally unqiue identifiers...
 700   for $Radius (0 .. $This->{NeighborhoodRadius}) {
 701     ATOMIDENTIFIER: for $AtomIdentifier (sort { $a <=> $b } keys %{$This->{UniqueAtomIdentifiers}{$Radius}}) {
 702       $AtomID = $This->{UniqueAtomIdentifiers}{$Radius}{$AtomIdentifier};
 703 
 704       ($SimilarAtomIdentifierRadius, $SimilarAtomIdentifier) = $This->_FindStructurallySimilarAtomIdentifier($Radius, $AtomID, $AtomIdentifier);
 705       if ($SimilarAtomIdentifier) {
 706         # Current atom identifier is similar to an earlier structurally unique atom identifier...
 707         $This->{StructurallyUniqueAtomIdentifiersCount}{$SimilarAtomIdentifierRadius}{$SimilarAtomIdentifier} += $This->{UniqueAtomIdentifiersCount}{$Radius}{$AtomIdentifier};
 708         next ATOMIDENTIFIER;
 709       }
 710       $This->{StructurallyUniqueAtomIdentifiers}{$Radius}{$AtomIdentifier} = $AtomID;
 711 
 712       # Set structurally unique atom identifier count to the unique atom identifiers count...
 713       $This->{StructurallyUniqueAtomIdentifiersCount}{$Radius}{$AtomIdentifier} = $This->{UniqueAtomIdentifiersCount}{$Radius}{$AtomIdentifier};
 714     }
 715   }
 716   return $This;
 717 }
 718 
 719 # Set final fingerpritns vector...
 720 #
 721 sub _SetFinalFingerprints {
 722   my($This) = @_;
 723 
 724   # Mark successful generation of fingerprints...
 725   $This->{FingerprintsGenerated} = 1;
 726 
 727   if ($This->{Type} =~ /^ExtendedConnectivity$/i) {
 728     $This->_SetFinalExtendedConnectivityFingerprints();
 729   }
 730   elsif ($This->{Type} =~ /^ExtendedConnectivityCount$/i) {
 731     $This->_SetFinalExtendedConnectivityCountFingerprints();
 732   }
 733   elsif ($This->{Type} =~ /^ExtendedConnectivityBits$/i) {
 734     $This->_SetFinalExtendedConnectivityBitsFingerprints();
 735   }
 736 
 737   return $This;
 738 }
 739 
 740 # Set final extended connectivity fingerpritns vector...
 741 #
 742 sub _SetFinalExtendedConnectivityFingerprints {
 743   my($This) = @_;
 744   my($Radius, $AtomIdentifier, @AtomIdentifiers);
 745 
 746   @AtomIdentifiers = ();
 747 
 748   for $Radius (0 .. $This->{NeighborhoodRadius}) {
 749     for $AtomIdentifier (sort { $a <=> $b } keys %{$This->{StructurallyUniqueAtomIdentifiers}{$Radius}}) {
 750       push @AtomIdentifiers, $AtomIdentifier;
 751     }
 752   }
 753   # Add atom identifiers to fingerprint vector...
 754   $This->{FingerprintsVector}->AddValues(\@AtomIdentifiers);
 755 
 756   return $This;
 757 }
 758 
 759 # Set final extended connectivity count fingerpritns vector...
 760 #
 761 sub _SetFinalExtendedConnectivityCountFingerprints {
 762   my($This) = @_;
 763   my($Radius, $AtomIdentifier, $AtomIdentifierCount, @AtomIdentifiers, @AtomIdentifiersCount);
 764 
 765   @AtomIdentifiers = (); @AtomIdentifiersCount = ();
 766 
 767   for $Radius (0 .. $This->{NeighborhoodRadius}) {
 768     for $AtomIdentifier (sort { $a <=> $b } keys %{$This->{StructurallyUniqueAtomIdentifiers}{$Radius}}) {
 769       $AtomIdentifierCount = $This->{StructurallyUniqueAtomIdentifiersCount}{$Radius}{$AtomIdentifier};
 770       push @AtomIdentifiers, $AtomIdentifier;
 771       push @AtomIdentifiersCount, $AtomIdentifierCount;
 772     }
 773   }
 774   # Add atom identifiers to fingerprint vector as value IDs...
 775   $This->{FingerprintsVector}->AddValueIDs(\@AtomIdentifiers);
 776 
 777   # Add atom identifiers to count to fingerprint vector as values...
 778   $This->{FingerprintsVector}->AddValues(\@AtomIdentifiersCount);
 779 
 780   return $This;
 781 }
 782 
 783 # Set final extended connectivity bits fingerpritns vector...
 784 #
 785 sub _SetFinalExtendedConnectivityBitsFingerprints {
 786   my($This) = @_;
 787   my($Radius, $AtomIdentifier, $FingerprintsBitVector, $Size, $SkipBitPosCheck, $AtomIdentifierBitPos, $SetBitNum);
 788 
 789   $FingerprintsBitVector = $This->{FingerprintsBitVector};
 790 
 791   $Size = $This->{Size};
 792 
 793   $SkipBitPosCheck = 1;
 794 
 795   for $Radius (0 .. $This->{NeighborhoodRadius}) {
 796     for $AtomIdentifier (keys %{$This->{StructurallyUniqueAtomIdentifiers}{$Radius}}) {
 797       # Set random number seed...
 798       if ($This->{UsePerlCoreRandom}) {
 799         CORE::srand($AtomIdentifier);
 800       }
 801       else {
 802         MathUtil::srandom($AtomIdentifier);
 803       }
 804 
 805       # Set bit position...
 806       $AtomIdentifierBitPos = $This->{UsePerlCoreRandom} ? int(CORE::rand($Size)) : int(MathUtil::random($Size));
 807       $FingerprintsBitVector->SetBit($AtomIdentifierBitPos, $SkipBitPosCheck);
 808     }
 809   }
 810   return $This;
 811 }
 812 
 813 
 814 # Identify structurally unique identifiers by comparing structure features involved in
 815 # generating identifiear by comparing it agains all the previous structurally unique
 816 # identifiers...
 817 #
 818 sub _FindStructurallySimilarAtomIdentifier {
 819   my($This, $SpecifiedRadius, $SpecifiedAtomID, $SpecifiedAtomIdentifier) = @_;
 820   my($Radius, $AtomID, $AtomIdentifier, $FeatureAtomCount, $FeatureAtomIDsRef,  $SpecifiedFeatureAtomID, $SpecifiedFeatureAtomCount, $SpecifiedFeatureAtomIDsRef);
 821 
 822   if ($SpecifiedRadius == 0) {
 823     # After duplicate removal by value, all identifier at radius level 0 would be structurally unique...
 824     return (undef, undef);
 825   }
 826 
 827   $SpecifiedFeatureAtomCount = $This->{StructureFeatures}{AtomCount}{$SpecifiedRadius}{$SpecifiedAtomID};
 828   $SpecifiedFeatureAtomIDsRef = $This->{StructureFeatures}{AtomIDs}{$SpecifiedRadius}{$SpecifiedAtomID};
 829 
 830   # No need to compare features at radius 0...
 831   for $Radius (1 .. $SpecifiedRadius) {
 832     ATOMIDENTIFIER: for $AtomIdentifier (keys %{$This->{StructurallyUniqueAtomIdentifiers}{$Radius}}) {
 833       $AtomID = $This->{StructurallyUniqueAtomIdentifiers}{$Radius}{$AtomIdentifier};
 834 
 835       $FeatureAtomCount = $This->{StructureFeatures}{AtomCount}{$Radius}{$AtomID};
 836       $FeatureAtomIDsRef = $This->{StructureFeatures}{AtomIDs}{$Radius}{$AtomID};
 837 
 838       if ($SpecifiedFeatureAtomCount != $FeatureAtomCount) {
 839         # Couldn't be structurally equivalent...
 840         next ATOMIDENTIFIER;
 841       }
 842       for $SpecifiedFeatureAtomID (keys % {$SpecifiedFeatureAtomIDsRef}) {
 843         if (! exists $FeatureAtomIDsRef->{$SpecifiedFeatureAtomID}) {
 844           # For structural equivalency, all atom in specified feature must also be present in a previously
 845           # identified structurally unique structure feature...
 846           next ATOMIDENTIFIER;
 847         }
 848       }
 849       # Found structurally equivalent feature...
 850       return ($Radius, $AtomIdentifier);
 851     }
 852   }
 853   return (undef, undef);
 854 }
 855 
 856 # Setup structure features for atom IDs involved in unique atom identifiers at all
 857 # radii level...
 858 #
 859 sub _SetupStructureFeaturesForAtomIDsInvolvedInUniqueIdentifiers {
 860   my($This) = @_;
 861   my($Radius, $PreviousRadius, $Atom, $AtomID, $AtomIdentifier, $NeighborhoodAtomID, $NeighborhoodAtomsWithSuccessorAtomsRef, $NeighborhoodAtom, $NeighborhoodAtomSuccessorAtomsRef, %AtomIDs);
 862 
 863   $This->_InitializeStructureFeatures();
 864 
 865   # Collect atom IDs involved in unique atom identifiers...
 866   %AtomIDs = ();
 867   for $Radius (0 .. $This->{NeighborhoodRadius}) {
 868     for $AtomIdentifier (keys %{$This->{UniqueAtomIdentifiers}{$Radius}}) {
 869       $AtomID = $This->{UniqueAtomIdentifiers}{$Radius}{$AtomIdentifier};
 870       $AtomIDs{$AtomID} = $AtomID;
 871     }
 872   }
 873 
 874   # Setup structure features...
 875   for $Radius (0 .. $This->{NeighborhoodRadius}) {
 876     for $AtomID (keys %AtomIDs) {
 877       my($StructureFeatureAtomCount, %StructureFeatureAtomIDs);
 878 
 879       $StructureFeatureAtomCount = 0;
 880       %StructureFeatureAtomIDs = ();
 881 
 882       # Get partial structure features for the atom at previous radius level...
 883       $PreviousRadius = $Radius - 1;
 884       if ($PreviousRadius >= 0) {
 885         $StructureFeatureAtomCount += $This->{StructureFeatures}{AtomCount}{$PreviousRadius}{$AtomID};
 886         %StructureFeatureAtomIDs = %{$This->{StructureFeatures}{AtomIDs}{$PreviousRadius}{$AtomID}};
 887       }
 888 
 889       # Get all neighborhood atom at this radius level...
 890       if (exists($This->{AtomNeighborhoods}{$Radius}) && exists($This->{AtomNeighborhoods}{$Radius}{$AtomID})) {
 891         NEIGHBORHOODS: for $NeighborhoodAtomsWithSuccessorAtomsRef (@{$This->{AtomNeighborhoods}{$Radius}{$AtomID}}) {
 892           ($NeighborhoodAtom, $NeighborhoodAtomSuccessorAtomsRef) = @{$NeighborhoodAtomsWithSuccessorAtomsRef};
 893           if ($NeighborhoodAtom->IsHydrogen()) {
 894             next NEIGHBORHOODS;
 895           }
 896           $NeighborhoodAtomID = $NeighborhoodAtom->GetID();
 897           $StructureFeatureAtomCount++;
 898           $StructureFeatureAtomIDs{$NeighborhoodAtomID} = $NeighborhoodAtomID;
 899         }
 900       }
 901 
 902       # Assign structure features to atom at this radius level...
 903       $This->{StructureFeatures}{AtomCount}{$Radius}{$AtomID} = $StructureFeatureAtomCount;
 904       $This->{StructureFeatures}{AtomIDs}{$Radius}{$AtomID} = \%StructureFeatureAtomIDs;
 905     }
 906   }
 907   return $This;
 908 }
 909 
 910 # Intialize structure features at each radius level...
 911 #
 912 sub _InitializeStructureFeatures {
 913   my($This) = @_;
 914   my($Radius, $CurrentRadius, $Atom, $AtomID);
 915 
 916   # Initialize all structure features...
 917 
 918   %{$This->{StructureFeatures}} = ();
 919   %{$This->{StructureFeatures}{AtomCount}} = ();
 920   %{$This->{StructureFeatures}{AtomIDs}} = ();
 921 
 922   $Radius = $This->{NeighborhoodRadius};
 923   for $CurrentRadius (0 .. $Radius) {
 924     # Structure features for at specific radii accessed using atom IDs...
 925     %{$This->{StructureFeatures}{AtomCount}{$CurrentRadius}} = ();
 926     %{$This->{StructureFeatures}{AtomIDs}{$CurrentRadius}} = ();
 927   }
 928   return $This;
 929 }
 930 
 931 # Cache  appropriate molecule data...
 932 #
 933 sub _SetupMoleculeDataCache {
 934   my($This) = @_;
 935 
 936   # Get all non-hydrogen atoms...
 937   my($NegateAtomCheckMethod);
 938   $NegateAtomCheckMethod = 1;
 939   @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms("IsHydrogen", $NegateAtomCheckMethod);
 940 
 941   return $This;
 942 }
 943 
 944 # Clear cached molecule data...
 945 #
 946 sub _ClearMoleculeDataCache {
 947   my($This) = @_;
 948 
 949   @{$This->{Atoms}} = ();
 950 
 951   return $This;
 952 }
 953 
 954 # Initialize atom indentifier type information...
 955 #
 956 # Current supported values:
 957 #
 958 # AtomicInvariantsAtomTypes, FunctionalClassAtomTypes, DREIDINGAtomTypes, EStateAtomTypes,
 959 # MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes
 960 #
 961 sub _InitializeAtomIdentifierTypeInformation {
 962   my($This) = @_;
 963 
 964   IDENTIFIERTYPE: {
 965     if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
 966       $This->_InitializeAtomicInvariantsAtomTypesInformation();
 967       last IDENTIFIERTYPE;
 968     }
 969     if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
 970       $This->_InitializeFunctionalClassAtomTypesInformation();
 971       last IDENTIFIERTYPE;
 972     }
 973     if ($This->{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
 974       # Nothing to do for now...
 975       last IDENTIFIERTYPE;
 976     }
 977     carp "Warning: ${ClassName}->_InitializeAtomIdentifierTypeInformation: Unknown atom indentifier type $This->{AtomIdentifierType}...";
 978   }
 979   return $This;
 980 }
 981 
 982 # Initialize atomic invariants atom types, generated by AtomTypes::AtomicInvariantsAtomTypes
 983 # class, to use for generating initial atom identifiers...
 984 #
 985 # Let:
 986 #   AS = Atom symbol corresponding to element symbol
 987 #
 988 #   X<n>   = Number of non-hydrogen atom neighbors or heavy atoms attached to atom
 989 #   BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms attached to atom
 990 #   LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms attached to atom
 991 #   SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
 992 #   DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
 993 #   TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
 994 #   H<n>   = Number of implicit and explicit hydrogens for atom
 995 #   Ar     = Aromatic annotation indicating whether atom is aromatic
 996 #   RA     = Ring atom annotation indicating whether atom is a ring
 997 #   FC<+n/-n> = Formal charge assigned to atom
 998 #   MN<n> = Mass number indicating isotope other than most abundant isotope
 999 #   SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or 3 (triplet)
1000 #
1001 # Then:
1002 #
1003 #   Atom type generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to:
1004 #
1005 #     AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n>
1006 #
1007 # Except for AS which is a required atomic invariant in atom types, all other atomic invariants are
1008 # optional.
1009 #
1010 # Default atomic invariants used for generating inital atom identifiers are [ Ref 24 ]:
1011 #
1012 #   AS, X<n>, BO<n>, H<n>, FC<+n/-n>, MN<n>
1013 #
1014 # In addition to usage of abbreviations for specifying atomic invariants, the following descriptive words
1015 # are also allowed:
1016 #
1017 # X : NumOfNonHydrogenAtomNeighbors or NumOfHeavyAtomNeighbors
1018 # BO : SumOfBondOrdersToNonHydrogenAtoms or SumOfBondOrdersToHeavyAtoms
1019 # LBO : LargestBondOrderToNonHydrogenAtoms or LargestBondOrderToHeavyAtoms
1020 # SB :  NumOfSingleBondsToNonHydrogenAtoms or NumOfSingleBondsToHeavyAtoms
1021 # DB : NumOfDoubleBondsToNonHydrogenAtoms or NumOfDoubleBondsToHeavyAtoms
1022 # TB : NumOfTripleBondsToNonHydrogenAtoms or NumOfTripleBondsToHeavyAtoms
1023 # H :  NumOfImplicitAndExplicitHydrogens
1024 # Ar : Aromatic
1025 # RA : RingAtom
1026 # FC : FormalCharge
1027 # MN : MassNumber
1028 # SM : SpinMultiplicity
1029 #
1030 sub _InitializeAtomicInvariantsAtomTypesInformation {
1031   my($This) = @_;
1032 
1033   # Default atomic invariants to use for generating initial atom identifiers are: AS, X, BO, LBO, H, FC
1034   #
1035   @{$This->{AtomicInvariantsToUse}} = ();
1036   @{$This->{AtomicInvariantsToUse}} = ('AS', 'X', 'BO', 'H', 'FC', 'MN');
1037 
1038   return $This;
1039 }
1040 
1041 # Initialize functional class atom types, generated by AtomTypes::FunctionalClassAtomTypes
1042 # class, to use for generating initial atom identifiers...
1043 #
1044 # Let:
1045 #   HBD: HydrogenBondDonor
1046 #   HBA: HydrogenBondAcceptor
1047 #   PI :  PositivelyIonizable
1048 #   NI : NegativelyIonizable
1049 #   Ar : Aromatic
1050 #   Hal : Halogen
1051 #   H : Hydrophobic
1052 #   RA : RingAtom
1053 #   CA : ChainAtom
1054 #
1055 # Then:
1056 #
1057 #   Functiononal class atom type specification for an atom corresponds to:
1058 #
1059 #     Ar.CA.H.HBA.HBD.Hal.NI.PI.RA
1060 #
1061 #   Default functional classes used are: HBD, HBA, PI, NI, Ar, Hal
1062 #
1063 #   FunctionalAtomTypes are assigned using the following definitions [ Ref 60-61, Ref 65-66 ]:
1064 #
1065 #     HydrogenBondDonor: NH, NH2, OH
1066 #     HydrogenBondAcceptor: N[!H], O
1067 #     PositivelyIonizable: +, NH2
1068 #     NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH
1069 #
1070 sub _InitializeFunctionalClassAtomTypesInformation {
1071   my($This) = @_;
1072 
1073   # Default functional class atom typess to use for generating initial atom identifiers
1074   # are: HBD, HBA, PI, NI, Ar, Hal
1075   #
1076   @{$This->{FunctionalClassesToUse}} = ();
1077   @{$This->{FunctionalClassesToUse}} = ('HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal');
1078 
1079   return $This;
1080 }
1081 
1082 # Set atomic invariants to use for generation of intial atom indentifiers...
1083 #
1084 sub SetAtomicInvariantsToUse {
1085   my($This, @Values) = @_;
1086   my($FirstValue, $TypeOfFirstValue, $AtomicInvariant, $SpecifiedAtomicInvariant, @SpecifiedAtomicInvariants, @AtomicInvariantsToUse);
1087 
1088   if (!@Values) {
1089     carp "Warning: ${ClassName}->SetAtomicInvariantsToUse: No values specified...";
1090     return;
1091   }
1092 
1093   if ($This->{AtomIdentifierType} !~ /^AtomicInvariantsAtomTypes$/i) {
1094     carp "Warning: ${ClassName}->SetFunctionalAtomTypesToUse: AtomicInvariantsToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}...";
1095     return;
1096   }
1097 
1098   $FirstValue = $Values[0];
1099   $TypeOfFirstValue = ref $FirstValue;
1100 
1101   @SpecifiedAtomicInvariants = ();
1102   @AtomicInvariantsToUse = ();
1103 
1104   if ($TypeOfFirstValue =~ /^ARRAY/) {
1105     push @SpecifiedAtomicInvariants, @{$FirstValue};
1106   }
1107   else {
1108     push @SpecifiedAtomicInvariants, @Values;
1109   }
1110 
1111   # Make sure specified AtomicInvariants are valid...
1112   for $SpecifiedAtomicInvariant (@SpecifiedAtomicInvariants) {
1113     if (!AtomTypes::AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($SpecifiedAtomicInvariant)) {
1114       croak "Error: ${ClassName}->SetAtomicInvariantsToUse: Specified atomic invariant, $SpecifiedAtomicInvariant, is not supported...\n ";
1115     }
1116     $AtomicInvariant = $SpecifiedAtomicInvariant;
1117     push @AtomicInvariantsToUse, $AtomicInvariant;
1118   }
1119 
1120   # Set atomic invariants to use...
1121   @{$This->{AtomicInvariantsToUse}} = ();
1122   push @{$This->{AtomicInvariantsToUse}}, @AtomicInvariantsToUse;
1123 
1124   return $This;
1125 }
1126 
1127 # Set functional classes to use for generation of intial atom indentifiers...
1128 #
1129 sub SetFunctionalClassesToUse {
1130   my($This, @Values) = @_;
1131   my($FirstValue, $TypeOfFirstValue, $FunctionalClass, $SpecifiedFunctionalClass, @SpecifiedFunctionalClasses, @FunctionalClassesToUse);
1132 
1133   if (!@Values) {
1134     carp "Warning: ${ClassName}->SetFunctionalClassesToUse: No values specified...";
1135     return;
1136   }
1137 
1138   if ($This->{AtomIdentifierType} !~ /^FunctionalClassAtomTypes$/i) {
1139     carp "Warning: ${ClassName}->SetFunctionalClassesToUse: FunctionalClassesToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}...";
1140     return;
1141   }
1142 
1143   $FirstValue = $Values[0];
1144   $TypeOfFirstValue = ref $FirstValue;
1145 
1146   @SpecifiedFunctionalClasses = ();
1147   @FunctionalClassesToUse = ();
1148 
1149   if ($TypeOfFirstValue =~ /^ARRAY/) {
1150     push @SpecifiedFunctionalClasses, @{$FirstValue};
1151   }
1152   else {
1153     push @SpecifiedFunctionalClasses, @Values;
1154   }
1155 
1156   # Make sure specified FunctionalClasses are valid...
1157   for $SpecifiedFunctionalClass (@SpecifiedFunctionalClasses) {
1158     if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedFunctionalClass)) {
1159       croak "Error: ${ClassName}->SetFunctionalClassesToUse: Specified functional class, $SpecifiedFunctionalClass, is not supported...\n ";
1160     }
1161     push @FunctionalClassesToUse, $SpecifiedFunctionalClass;
1162   }
1163 
1164   # Set functional classes to use...
1165   @{$This->{FunctionalClassesToUse}} = ();
1166   push @{$This->{FunctionalClassesToUse}}, @FunctionalClassesToUse;
1167 
1168   return $This;
1169 }
1170 
1171 # Return a string containg data for ExtendedConnectivityFingerprints object...
1172 sub StringifyExtendedConnectivityFingerprints {
1173   my($This) = @_;
1174   my($ExtendedConnectivityFingerprintsString);
1175 
1176   $ExtendedConnectivityFingerprintsString = "InitialAtomIdentifierType: $This->{AtomIdentifierType}; NeighborhoodRadius: $This->{NeighborhoodRadius}";
1177 
1178   if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
1179     my($AtomicInvariant, @AtomicInvariants, @AtomicInvariantsOrder, %AvailableAtomicInvariants);
1180 
1181     @AtomicInvariantsOrder = AtomTypes::AtomicInvariantsAtomTypes::GetAtomicInvariantsOrder();
1182     %AvailableAtomicInvariants = AtomTypes::AtomicInvariantsAtomTypes::GetAvailableAtomicInvariants();
1183 
1184     for $AtomicInvariant (@AtomicInvariantsOrder) {
1185       push @AtomicInvariants, "$AtomicInvariant: $AvailableAtomicInvariants{$AtomicInvariant}";
1186     }
1187 
1188     $ExtendedConnectivityFingerprintsString .= "; AtomicInvariantsToUse: <" . TextUtil::JoinWords(\@{$This->{AtomicInvariantsToUse}}, ", ", 0) . ">";
1189     $ExtendedConnectivityFingerprintsString .= "; AtomicInvariantsOrder: <" . TextUtil::JoinWords(\@AtomicInvariantsOrder, ", ", 0) . ">";
1190     $ExtendedConnectivityFingerprintsString .= "; AvailableAtomicInvariants: <" . TextUtil::JoinWords(\@AtomicInvariants, ", ", 0) . ">";
1191   }
1192   elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
1193     my($FunctionalClass, @FunctionalClasses, @FunctionalClassesOrder, %AvailableFunctionalClasses);
1194 
1195     @FunctionalClassesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder();
1196     %AvailableFunctionalClasses = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses();
1197 
1198     for $FunctionalClass (@FunctionalClassesOrder) {
1199       push @FunctionalClasses, "$FunctionalClass: $AvailableFunctionalClasses{$FunctionalClass}";
1200     }
1201 
1202     $ExtendedConnectivityFingerprintsString .= "; FunctionalClassesToUse: <" . TextUtil::JoinWords(\@{$This->{FunctionalClassesToUse}}, ", ", 0) . ">";
1203     $ExtendedConnectivityFingerprintsString .= "; FunctionalClassesOrder: <" . TextUtil::JoinWords(\@FunctionalClassesOrder, ", ", 0) . ">";
1204     $ExtendedConnectivityFingerprintsString .= "; AvailableFunctionalClasses: <" . TextUtil::JoinWords(\@FunctionalClasses, ", ", 0) . ">";
1205   }
1206 
1207   if ($This->{Type} =~ /^ExtendedConnectivityBits$/i) {
1208     # Size...
1209     $ExtendedConnectivityFingerprintsString .= "; Size: $This->{Size}; MinSize: $This->{MinSize}; MaxSize: $This->{MaxSize}";
1210 
1211     # Fingerprint bit density and num of bits set...
1212     my($NumOfSetBits, $BitDensity);
1213     $NumOfSetBits = $This->{FingerprintsBitVector}->GetNumOfSetBits();
1214     $BitDensity = $This->{FingerprintsBitVector}->GetFingerprintsBitDensity();
1215     $ExtendedConnectivityFingerprintsString .= "; NumOfOnBits: $NumOfSetBits; BitDensity: $BitDensity";
1216 
1217     $ExtendedConnectivityFingerprintsString .= "; FingerprintsBitVector: < $This->{FingerprintsBitVector} >";
1218   }
1219   else {
1220     # Number of identifiers...
1221     $ExtendedConnectivityFingerprintsString .= "; NumOfIdentifiers: " . $This->{FingerprintsVector}->GetNumOfValues();
1222 
1223     # FingerprintsVector...
1224     $ExtendedConnectivityFingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >";
1225   }
1226 
1227   return $ExtendedConnectivityFingerprintsString;
1228 }
1229