MayaChemTools

   1 package Fingerprints::TopologicalAtomTripletsFingerprints;
   2 #
   3 # $RCSfile: TopologicalAtomTripletsFingerprints.pm,v $
   4 # $Date: 2015/02/28 20:48:54 $
   5 # $Revision: 1.15 $
   6 #
   7 # Author: Manish Sud <msud@san.rr.com>
   8 #
   9 # Copyright (C) 2015 Manish Sud. All rights reserved.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 use strict;
  30 use Carp;
  31 use Exporter;
  32 use Fingerprints::Fingerprints;
  33 use TextUtil ();
  34 use Molecule;
  35 use AtomTypes::AtomicInvariantsAtomTypes;
  36 use AtomTypes::DREIDINGAtomTypes;
  37 use AtomTypes::EStateAtomTypes;
  38 use AtomTypes::FunctionalClassAtomTypes;
  39 use AtomTypes::MMFF94AtomTypes;
  40 use AtomTypes::SLogPAtomTypes;
  41 use AtomTypes::SYBYLAtomTypes;
  42 use AtomTypes::TPSAAtomTypes;
  43 use AtomTypes::UFFAtomTypes;
  44 
  45 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  46 
  47 @ISA = qw(Fingerprints::Fingerprints Exporter);
  48 @EXPORT = qw();
  49 @EXPORT_OK = qw();
  50 
  51 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  52 
  53 # Setup class variables...
  54 my($ClassName);
  55 _InitializeClass();
  56 
  57 # Overload Perl functions...
  58 use overload '""' => 'StringifyTopologicalAtomTripletsFingerprints';
  59 
  60 # Class constructor...
  61 sub new {
  62   my($Class, %NamesAndValues) = @_;
  63 
  64   # Initialize object...
  65   my $This = $Class->SUPER::new();
  66   bless $This, ref($Class) || $Class;
  67   $This->_InitializeTopologicalAtomTripletsFingerprints();
  68 
  69   $This->_InitializeTopologicalAtomTripletsFingerprintsProperties(%NamesAndValues);
  70 
  71   return $This;
  72 }
  73 
  74 # Initialize object data...
  75 #
  76 sub _InitializeTopologicalAtomTripletsFingerprints {
  77   my($This) = @_;
  78 
  79   # Type of fingerprint...
  80   $This->{Type} = 'TopologicalAtomTriplets';
  81 
  82   # Type of vector...
  83   $This->{VectorType} = 'FingerprintsVector';
  84 
  85   # Type of FingerprintsVector...
  86   $This->{FingerprintsVectorType} = 'NumericalValues';
  87 
  88   # Minimum and maximum bond distance between atom paris...
  89   $This->{MinDistance} = 1;
  90   $This->{MaxDistance} = 10;
  91 
  92   # Determines whether to apply triangle inequality to distance triplets...
  93   #
  94   $This->{UseTriangleInequality} = 0;
  95 
  96   # Atom identifier type to use for atom IDs in atom triplets...
  97   #
  98   # Currently supported values are: AtomicInvariantsAtomTypes, DREIDINGAtomTypes,
  99   # EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes,
 100   # SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes
 101   #
 102   $This->{AtomIdentifierType} = '';
 103 
 104   # Atom types assigned to each heavy atom...
 105   #
 106   %{$This->{AssignedAtomTypes}} = ();
 107 
 108   # All atom triplets between minimum and maximum distance...
 109   #
 110   @{$This->{AtomTripletsIDs}} = ();
 111   %{$This->{AtomTripletsCount}} = ();
 112 }
 113 
 114 # Initialize class ...
 115 sub _InitializeClass {
 116   #Class name...
 117   $ClassName = __PACKAGE__;
 118 }
 119 
 120 # Initialize object properties....
 121 sub _InitializeTopologicalAtomTripletsFingerprintsProperties {
 122   my($This, %NamesAndValues) = @_;
 123 
 124   my($Name, $Value, $MethodName);
 125   while (($Name, $Value) = each  %NamesAndValues) {
 126     $MethodName = "Set${Name}";
 127     $This->$MethodName($Value);
 128   }
 129 
 130   # Make sure molecule object was specified...
 131   if (!exists $NamesAndValues{Molecule}) {
 132     croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule...";
 133   }
 134   if (!exists $NamesAndValues{AtomIdentifierType}) {
 135     croak "Error: ${ClassName}->New: Object can't be instantiated without specifying AtomIdentifierType...";
 136   }
 137 
 138   $This->_InitializeFingerprintsVector();
 139 
 140   return $This;
 141 }
 142 
 143 # Set minimum distance for atom triplets...
 144 #
 145 sub SetMinDistance {
 146   my($This, $Value) = @_;
 147 
 148   if (!TextUtil::IsPositiveInteger($Value)) {
 149     croak "Error: ${ClassName}->SetMinDistance: MinDistance value, $Value, is not valid:  It must be a positive integer...";
 150   }
 151   $This->{MinDistance} = $Value;
 152 
 153   return $This;
 154 }
 155 
 156 # Set maximum distance for atom triplets...
 157 #
 158 sub SetMaxDistance {
 159   my($This, $Value) = @_;
 160 
 161   if (!TextUtil::IsPositiveInteger($Value)) {
 162     croak "Error: ${ClassName}->SetMaxDistance: MaxDistance value, $Value, is not valid:  It must be a positive integer...";
 163   }
 164   $This->{MaxDistance} = $Value;
 165 
 166   return $This;
 167 }
 168 
 169 # Set atom identifier type..
 170 #
 171 sub SetAtomIdentifierType {
 172   my($This, $IdentifierType) = @_;
 173 
 174   if ($IdentifierType !~ /^(AtomicInvariantsAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|FunctionalClassAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
 175     croak "Error: ${ClassName}->SetAtomIdentifierType: Specified value, $IdentifierType, for AtomIdentifierType is not vaild. Supported types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, and UFFAtomTypes.";
 176   }
 177 
 178   if ($This->{AtomIdentifierType}) {
 179     croak "Error: ${ClassName}->SeAtomIdentifierType: Can't change intial atom identifier type:  It's already set...";
 180   }
 181 
 182   $This->{AtomIdentifierType} = $IdentifierType;
 183 
 184   # Initialize atom identifier type information...
 185   $This->_InitializeAtomIdentifierTypeInformation();
 186 
 187   return $This;
 188 }
 189 
 190 # Generate fingerprints description...
 191 #
 192 sub GetDescription {
 193   my($This) = @_;
 194 
 195   # Is description explicity set?
 196   if (exists $This->{Description}) {
 197     return $This->{Description};
 198   }
 199 
 200   # Generate fingerprints description...
 201 
 202   return "$This->{Type}:$This->{AtomIdentifierType}:MinDistance$This->{MinDistance}:MaxDistance$This->{MaxDistance}";
 203 }
 204 
 205 # Generate topological atom triplets fingerprints...
 206 #
 207 # Let:
 208 #
 209 #   AT = Any of the supported atom types
 210 #
 211 #   ATx = Atom type for  atom x
 212 #   ATy = Atom type for  atom y
 213 #   ATz = Atom type for  atom z
 214 #
 215 #   Dxy = Distance between Px and Py
 216 #   Dxz = Distance between Px and Pz
 217 #   Dyz = Distance between Py and Pz
 218 #
 219 # Then:
 220 #
 221 #   ATx-Dyz-ATy-Dxz-ATz-Dxy = Atom triplet ID for atom types ATx, ATy and Atz
 222 #
 223 # Methodology:
 224 #   . Generate a distance matrix.
 225 #   . Assign atom types to all the atoms.
 226 #   . Using distance matrix and atom types, count occurrence of unique atom triplets
 227 #     within specified distance range along with optional trinagle inequality
 228 #
 229 # Notes:
 230 #   . Hydrogen atoms are ignored during the fingerprint generation.
 231 #   . For a molecule containing N atoms with all different atom type, the total number of
 232 #     possible unique atom triplets without applying triangle inquality check corresponds to:
 233 #
 234 #     Factorial( N ) / ( Factorial( N - 3 ) * Factorial (3) )
 235 #
 236 #     However, due to similar atom types assigned to atoms in a molecule for a specific atom
 237 #     typing methodology and specified distance range used during fingerprints generation, the
 238 #     actual number of unique triplets is usually smaller than the theoretical limit.
 239 #
 240 sub GenerateFingerprints {
 241   my($This) = @_;
 242 
 243   if ($This->{MinDistance} > $This->{MaxDistance}) {
 244     croak "Error: ${ClassName}->GenerateTopologicalAtomTripletsFingerprints: No fingerpritns generated: MinDistance, $This->{MinDistance}, must be <= MaxDistance, $This->{MaxDistance}...";
 245   }
 246 
 247   # Cache appropriate molecule data...
 248   $This->_SetupMoleculeDataCache();
 249 
 250   # Generate distance matrix...
 251   if (!$This->_SetupDistanceMatrix()) {
 252     carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't generate distance matrix...";
 253     return $This;
 254   }
 255 
 256   # Assign atom types to all heavy atoms...
 257   if (!$This->_AssignAtomTypes()) {
 258     carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't assign valid $This->{AtomIdentifierType} to all atoms...";
 259     return $This;
 260   }
 261 
 262   # Intialize values of toplogical atom triplets...
 263   $This->_InitializeToplogicalAtomTriplets();
 264 
 265   # Count atom triplets...
 266   $This->_GenerateAndCountAtomTriplets();
 267 
 268   # Set final fingerprints...
 269   $This->_SetFinalFingerprints();
 270 
 271   # Clear cached molecule data...
 272   $This->_ClearMoleculeDataCache();
 273 
 274   return $This;
 275 }
 276 
 277 # Setup distance matrix...
 278 #
 279 sub _SetupDistanceMatrix {
 280   my($This) = @_;
 281 
 282   $This->{DistanceMatrix} = $This->GetMolecule()->GetDistanceMatrix();
 283 
 284   if (!$This->{DistanceMatrix}) {
 285     return undef;
 286   }
 287 
 288   return $This;
 289 }
 290 
 291 # Assign appropriate atom types to all heavy atoms...
 292 #
 293 sub _AssignAtomTypes {
 294   my($This) = @_;
 295   my($SpecifiedAtomTypes, $Atom, $AtomID, $IgnoreHydrogens);
 296 
 297   %{$This->{AssignedAtomTypes}} = ();
 298   $IgnoreHydrogens = 1;
 299 
 300   $SpecifiedAtomTypes = undef;
 301 
 302   IDENTIFIERTYPE: {
 303     if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
 304       $SpecifiedAtomTypes = new AtomTypes::AtomicInvariantsAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'AtomicInvariantsToUse' => $This->{AtomicInvariantsToUse});
 305       last IDENTIFIERTYPE;
 306     }
 307 
 308     if ($This->{AtomIdentifierType} =~ /^DREIDINGAtomTypes$/i) {
 309       $SpecifiedAtomTypes = new AtomTypes::DREIDINGAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 310       last IDENTIFIERTYPE;
 311     }
 312 
 313     if ($This->{AtomIdentifierType} =~ /^EStateAtomTypes$/i) {
 314       $SpecifiedAtomTypes = new AtomTypes::EStateAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 315       last IDENTIFIERTYPE;
 316     }
 317 
 318     if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
 319       $SpecifiedAtomTypes = new AtomTypes::FunctionalClassAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'FunctionalClassesToUse' => $This->{FunctionalClassesToUse});
 320       last IDENTIFIERTYPE;
 321     }
 322 
 323     if ($This->{AtomIdentifierType} =~ /^MMFF94AtomTypes$/i) {
 324       $SpecifiedAtomTypes = new AtomTypes::MMFF94AtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 325       last IDENTIFIERTYPE;
 326     }
 327 
 328     if ($This->{AtomIdentifierType} =~ /^SLogPAtomTypes$/i) {
 329       $SpecifiedAtomTypes = new AtomTypes::SLogPAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 330       last IDENTIFIERTYPE;
 331     }
 332     if ($This->{AtomIdentifierType} =~ /^SYBYLAtomTypes$/i) {
 333       $SpecifiedAtomTypes = new AtomTypes::SYBYLAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 334       last IDENTIFIERTYPE;
 335     }
 336 
 337     if ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) {
 338       $SpecifiedAtomTypes = new AtomTypes::TPSAAtomTypes('Molecule' => $This->{Molecule}, 'IgnorePhosphorus' => 0, 'IgnoreSulfur' => 0);
 339       last IDENTIFIERTYPE;
 340     }
 341 
 342     if ($This->{AtomIdentifierType} =~ /^UFFAtomTypes$/i) {
 343       $SpecifiedAtomTypes = new AtomTypes::UFFAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 344       last IDENTIFIERTYPE;
 345     }
 346 
 347     croak "Error: ${ClassName}->_AssignAtomTypes: Unknown atom indentifier type $This->{AtomIdentifierType}...";
 348   }
 349 
 350   # Assign atom types...
 351   $SpecifiedAtomTypes->AssignAtomTypes();
 352 
 353   # Make sure atom types assignment is successful...
 354   if (!$SpecifiedAtomTypes->IsAtomTypesAssignmentSuccessful()) {
 355     return undef;
 356   }
 357 
 358   # Collect assigned atom types...
 359   ATOM: for $Atom (@{$This->{Atoms}}) {
 360     if ($Atom->IsHydrogen()) {
 361       next ATOM;
 362     }
 363     $AtomID = $Atom->GetID();
 364     $This->{AssignedAtomTypes}{$AtomID} = $SpecifiedAtomTypes->GetAtomType($Atom);
 365   }
 366 
 367   return $This;
 368 }
 369 
 370 # Initialize topological atom triplets between specified distance range...
 371 #
 372 sub _InitializeToplogicalAtomTriplets {
 373   my($This) = @_;
 374   my($Distance);
 375 
 376   @{$This->{AtomTripletsIDs}} = ();
 377   %{$This->{AtomTripletsCount}} = ();
 378 
 379   return $This;
 380 }
 381 
 382 # Count atom triplets between mininum and maximum distance at each
 383 # distance using distance matrix and atom types assiged to each heavy
 384 # atom.
 385 #
 386 sub _GenerateAndCountAtomTriplets {
 387   my($This) = @_;
 388   my($NumOfAtoms, $AtomIndex1, $AtomIndex2, $AtomIndex3, $AtomID1, $AtomID2, $AtomID3, $AtomType1, $AtomType2, $AtomType3, $Distance12, $Distance13, $Distance23, $SkipIndexCheck, $DistanceMatrix, $AtomTripletID);
 389 
 390   $NumOfAtoms = @{$This->{Atoms}};
 391   $DistanceMatrix = $This->{DistanceMatrix};
 392   $SkipIndexCheck = 0;
 393 
 394   ATOMINDEX1: for $AtomIndex1 (0 .. ($NumOfAtoms - 1)) {
 395     $AtomID1 = $This->{AtomIndexToID}{$AtomIndex1};
 396     if (!exists($This->{AssignedAtomTypes}{$AtomID1})) {
 397       next ATOMINDEX1;
 398     }
 399     $AtomType1 = $This->{AssignedAtomTypes}{$AtomID1};
 400 
 401     ATOMINDEX2: for $AtomIndex2 (($AtomIndex1 + 1) .. ($NumOfAtoms - 1)) {
 402       $AtomID2 = $This->{AtomIndexToID}{$AtomIndex2};
 403       if (!exists($This->{AssignedAtomTypes}{$AtomID2})) {
 404         next ATOMINDEX2;
 405       }
 406       $AtomType2 = $This->{AssignedAtomTypes}{$AtomID2};
 407 
 408       $Distance12 = $DistanceMatrix->GetValue($AtomIndex1, $AtomIndex2, $SkipIndexCheck);
 409       if ($Distance12 < $This->{MinDistance} || $Distance12 > $This->{MaxDistance}) {
 410         next ATOMINDEX2;
 411       }
 412 
 413       ATOMINDEX3: for $AtomIndex3 (($AtomIndex2 + 1) .. ($NumOfAtoms - 1)) {
 414         $AtomID3 = $This->{AtomIndexToID}{$AtomIndex3};
 415         if (!exists($This->{AssignedAtomTypes}{$AtomID3})) {
 416           next ATOMINDEX3;
 417         }
 418         $AtomType3 = $This->{AssignedAtomTypes}{$AtomID3};
 419 
 420         $Distance13 = $DistanceMatrix->GetValue($AtomIndex1, $AtomIndex3, $SkipIndexCheck);
 421         $Distance23 = $DistanceMatrix->GetValue($AtomIndex2, $AtomIndex3, $SkipIndexCheck);
 422 
 423         if ($Distance13 < $This->{MinDistance} || $Distance13 > $This->{MaxDistance}) {
 424           next ATOMINDEX3;
 425         }
 426         if ($Distance23 < $This->{MinDistance} || $Distance23 > $This->{MaxDistance}) {
 427           next ATOMINDEX3;
 428         }
 429         if ($This->{UseTriangleInequality} && !$This->_DoDistancesSatisfyTriangleInequality($Distance12, $Distance13, $Distance23)) {
 430           next ATOMINDEX3;
 431         }
 432 
 433         $AtomTripletID = $This->_GetAtomTripletID($AtomType1, $Distance23, $AtomType2, $Distance13, $AtomType3, $Distance12);
 434         if (!exists $This->{AtomTripletsCount}{$AtomTripletID}) {
 435           $This->{AtomTripletsCount}{$AtomTripletID} = 0;
 436         }
 437         $This->{AtomTripletsCount}{$AtomTripletID} += 1;
 438       }
 439     }
 440   }
 441   return $This;
 442 }
 443 
 444 # Check triangle inequality...
 445 #
 446 sub _DoDistancesSatisfyTriangleInequality {
 447   my($This, $Distance1, $Distance2, $Distance3) = @_;
 448 
 449   if ( !($Distance1 > abs($Distance2 - $Distance3) && $Distance1 < ($Distance2 + $Distance3)) ) {
 450     return 0;
 451   }
 452   if ( !($Distance2 > abs($Distance1 - $Distance3) && $Distance2 < ($Distance1 + $Distance3)) ) {
 453     return 0;
 454   }
 455   if ( !($Distance3 > abs($Distance1 - $Distance2) && $Distance3 < ($Distance1 + $Distance2)) ) {
 456     return 0;
 457   }
 458   return 1;
 459 }
 460 
 461 # Get atom triplet ID corresponding to atom types and distances corresponding to atom triplet...
 462 #
 463 sub _GetAtomTripletID {
 464   my($This, $ATx, $Dyz, $ATy, $Dxz, $ATz, $Dxy) = @_;
 465   my($AtomTripletID, @AtomIDs);
 466 
 467   @AtomIDs = ();
 468 
 469   @AtomIDs = sort("${ATx}-D${Dyz}", "${ATy}-D${Dxz}", "${ATz}-D${Dxy}");
 470   $AtomTripletID = join "-", @AtomIDs;
 471 
 472   return $AtomTripletID;
 473 }
 474 
 475 # Set final fingerpritns vector...
 476 #
 477 sub _SetFinalFingerprints {
 478   my($This) = @_;
 479   my($AtomTripletID, $Value, @Values);
 480 
 481   # Mark successful generation of fingerprints...
 482   $This->{FingerprintsGenerated} = 1;
 483 
 484   @Values = ();
 485   @{$This->{AtomTripletsIDs}} = ();
 486 
 487   for $AtomTripletID (sort keys %{$This->{AtomTripletsCount}}) {
 488     push @{$This->{AtomTripletsIDs}}, $AtomTripletID;
 489     $Value = $This->{AtomTripletsCount}{$AtomTripletID};
 490     push @Values, $Value;
 491   }
 492 
 493   # Add AtomTripletsIDs and values to fingerprint vector...
 494   $This->{FingerprintsVector}->AddValueIDs(\@{$This->{AtomTripletsIDs}});
 495   $This->{FingerprintsVector}->AddValues(\@Values);
 496 
 497   return $This;
 498 }
 499 
 500 # Get atom triplet IDs corresponding to atom triplets count values in fingerprint
 501 # vector as an array or reference to an array...
 502 #
 503 # AtomTripletIDs list differes in molecules and is generated during finalization
 504 # of fingerprints to make sure the fingerprint vector containing count values
 505 # matches the atom triplets array.
 506 #
 507 sub GetAtomTripletIDs {
 508   my($This) = @_;
 509 
 510   return wantarray ? @{$This->{AtomTripletsIDs}} : \@{$This->{AtomTripletsIDs}};
 511 }
 512 
 513 # Cache  appropriate molecule data...
 514 #
 515 sub _SetupMoleculeDataCache {
 516   my($This) = @_;
 517 
 518   # Get all atoms including hydrogens to correctly map atom indices to atom IDs for
 519   # usage of distance matrix. The hydrogen atoms are ignored during processing...
 520   #
 521   @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms();
 522 
 523   # Get all atom IDs...
 524   my(@AtomIDs);
 525   @AtomIDs = ();
 526   @AtomIDs =  map { $_->GetID() } @{$This->{Atoms}};
 527 
 528   # Set AtomIndex to AtomID hash...
 529   %{$This->{AtomIndexToID}} = ();
 530   @{$This->{AtomIndexToID}}{ (0 .. $#AtomIDs) } = @AtomIDs;
 531 
 532   return $This;
 533 }
 534 
 535 # Set atomic invariants to use for atom identifiers...
 536 #
 537 sub SetAtomicInvariantsToUse {
 538   my($This, @Values) = @_;
 539   my($FirstValue, $TypeOfFirstValue, $AtomicInvariant, $SpecifiedAtomicInvariant, $AtomicInvariantValue, @SpecifiedAtomicInvariants, @AtomicInvariantsToUse);
 540 
 541   if (!@Values) {
 542     carp "Warning: ${ClassName}->SetAtomicInvariantsToUse: No values specified...";
 543     return;
 544   }
 545 
 546   $FirstValue = $Values[0];
 547   $TypeOfFirstValue = ref $FirstValue;
 548 
 549   @SpecifiedAtomicInvariants = ();
 550   @AtomicInvariantsToUse = ();
 551 
 552   if ($TypeOfFirstValue =~ /^ARRAY/) {
 553     push @SpecifiedAtomicInvariants, @{$FirstValue};
 554   }
 555   else {
 556     push @SpecifiedAtomicInvariants, @Values;
 557   }
 558 
 559   # Make sure specified AtomicInvariants are valid...
 560   for $SpecifiedAtomicInvariant (@SpecifiedAtomicInvariants) {
 561     if (!AtomTypes::AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($SpecifiedAtomicInvariant)) {
 562       croak "Error: ${ClassName}->SetAtomicInvariantsToUse: Specified atomic invariant, $SpecifiedAtomicInvariant, is not supported...\n ";
 563     }
 564     $AtomicInvariant = $SpecifiedAtomicInvariant;
 565     push @AtomicInvariantsToUse, $AtomicInvariant;
 566   }
 567 
 568   # Set atomic invariants to use...
 569   @{$This->{AtomicInvariantsToUse}} = ();
 570   push @{$This->{AtomicInvariantsToUse}}, @AtomicInvariantsToUse;
 571 
 572   return $This;
 573 }
 574 
 575 # Set functional classes to use for atom identifiers...
 576 #
 577 sub SetFunctionalClassesToUse {
 578   my($This, @Values) = @_;
 579   my($FirstValue, $TypeOfFirstValue, $FunctionalClass, $SpecifiedFunctionalClass, @SpecifiedFunctionalClasses, @FunctionalClassesToUse);
 580 
 581   if (!@Values) {
 582     carp "Warning: ${ClassName}->SetFunctionalClassesToUse: No values specified...";
 583     return;
 584   }
 585 
 586   if ($This->{AtomIdentifierType} !~ /^FunctionalClassAtomTypes$/i) {
 587     carp "Warning: ${ClassName}->SetFunctionalClassesToUse: FunctionalClassesToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}...";
 588     return;
 589   }
 590 
 591   $FirstValue = $Values[0];
 592   $TypeOfFirstValue = ref $FirstValue;
 593 
 594   @SpecifiedFunctionalClasses = ();
 595   @FunctionalClassesToUse = ();
 596 
 597   if ($TypeOfFirstValue =~ /^ARRAY/) {
 598     push @SpecifiedFunctionalClasses, @{$FirstValue};
 599   }
 600   else {
 601     push @SpecifiedFunctionalClasses, @Values;
 602   }
 603 
 604   # Make sure specified FunctionalClasses are valid...
 605   for $SpecifiedFunctionalClass (@SpecifiedFunctionalClasses) {
 606     if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedFunctionalClass)) {
 607       croak "Error: ${ClassName}->SetFunctionalClassesToUse: Specified functional class, $SpecifiedFunctionalClass, is not supported...\n ";
 608     }
 609     push @FunctionalClassesToUse, $SpecifiedFunctionalClass;
 610   }
 611 
 612   # Set functional classes to use...
 613   @{$This->{FunctionalClassesToUse}} = ();
 614   push @{$This->{FunctionalClassesToUse}}, @FunctionalClassesToUse;
 615 
 616   return $This;
 617 }
 618 
 619 # Initialize atom indentifier type information...
 620 #
 621 # Current supported values:
 622 #
 623 # AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes,
 624 # MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes
 625 #
 626 sub _InitializeAtomIdentifierTypeInformation {
 627   my($This) = @_;
 628 
 629   if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
 630     $This->_InitializeAtomicInvariantsAtomTypesInformation();
 631   }
 632   elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
 633     $This->_InitializeFunctionalClassAtomTypesInformation();
 634   }
 635   elsif ($This->{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
 636     # Nothing to do for now...
 637   }
 638   else {
 639     croak "Error: ${ClassName}->_InitializeAtomIdentifierTypeInformation: Unknown atom indentifier type $This->{AtomIdentifierType}...";
 640   }
 641 
 642   return $This;
 643 }
 644 
 645 # Initialize atomic invariants atom types to use for generating atom IDs in atom triplets...
 646 #
 647 # Let:
 648 #   AS = Atom symbol corresponding to element symbol
 649 #
 650 #   X<n>   = Number of non-hydrogen atom neighbors or heavy atoms attached to atom
 651 #   BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms attached to atom
 652 #   LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms attached to atom
 653 #   SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
 654 #   DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
 655 #   TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
 656 #   H<n>   = Number of implicit and explicit hydrogens for atom
 657 #   Ar     = Aromatic annotation indicating whether atom is aromatic
 658 #   RA     = Ring atom annotation indicating whether atom is a ring
 659 #   FC<+n/-n> = Formal charge assigned to atom
 660 #   MN<n> = Mass number indicating isotope other than most abundant isotope
 661 #   SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or 3 (triplet)
 662 #
 663 #   ATx = Atomic invariants atom type for atom x
 664 #   ATy = Atomic invariants atom type for atom y
 665 #   ATz = Atomic invariants atom type for atom z
 666 #
 667 #   Dxy = Distance between Px and Py
 668 #   Dxz = Distance between Px and Pz
 669 #   Dyz = Distance between Py and Pz
 670 #
 671 # Then:
 672 #
 673 #   Atom triplet AtomID generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to:
 674 #
 675 #     AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n>
 676 #
 677 #  Toplogical atom triplet ID between atom IDs ATx, ATy and ATz corresponds to:
 678 #
 679 #    ATx-Dyz-ATy-Dxz-ATz-Dxy
 680 #
 681 # Except for AS which is a required atomic invariant in atom triplet AtomIDs, all other atomic invariants are
 682 # optional. Default atomic invariants used for AtomID are: AS, X<n>, BO<n>, H<n>, FC<+n/-n>.
 683 # AtomID specification doesn't include atomic invariants with zero or undefined values.
 684 #
 685 # Examples of atom triplet AtomIDs:
 686 #
 687 #   O.X1.BO1.H1 - Hydroxyl oxygen in carboxylate with attached hydrogen and no explicit charge
 688 #   O.X1.BO1.FC-1 - Hydroxyl ozygen in carboxylate with explicit negative charge
 689 #   O.X1.BO2 - Carbonyl oxygen in carboxylate with double bond to carbon
 690 #   O.X2.BO2 - Hydroxyl ozygen in carboxylate attached to carbonyl carbon and another heavy atom
 691 #
 692 #   C.X2.BO3.H1.Ar - Aromatic carbon
 693 #
 694 sub _InitializeAtomicInvariantsAtomTypesInformation {
 695   my($This) = @_;
 696 
 697   # Default atomic invariants to use for generating atom triplet atom IDs: AS, X, BO, H, FC
 698   #
 699   @{$This->{AtomicInvariantsToUse}} = ();
 700   @{$This->{AtomicInvariantsToUse}} = ('AS', 'X', 'BO', 'H', 'FC');
 701 
 702   return $This;
 703 }
 704 
 705 # Initialize functional class atom types, generated by AtomTypes::FunctionalClassAtomTypes
 706 # class, to use for generating atom identifiers...
 707 #
 708 # Let:
 709 #   HBD: HydrogenBondDonor
 710 #   HBA: HydrogenBondAcceptor
 711 #   PI :  PositivelyIonizable
 712 #   NI : NegativelyIonizable
 713 #   Ar : Aromatic
 714 #   Hal : Halogen
 715 #   H : Hydrophobic
 716 #   RA : RingAtom
 717 #   CA : ChainAtom
 718 #
 719 # Then:
 720 #
 721 #   Functiononal class atom type specification for an atom corresponds to:
 722 #
 723 #     Ar.CA.H.HBA.HBD.Hal.NI.PI.RA
 724 #
 725 #   Default functional classes used are: HBD, HBA, PI, NI, Ar, Hal
 726 #
 727 #   FunctionalAtomTypes are assigned using the following definitions [ Ref 60-61, Ref 65-66 ]:
 728 #
 729 #     HydrogenBondDonor: NH, NH2, OH
 730 #     HydrogenBondAcceptor: N[!H], O
 731 #     PositivelyIonizable: +, NH2
 732 #     NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH
 733 #
 734 sub _InitializeFunctionalClassAtomTypesInformation {
 735   my($This) = @_;
 736 
 737   # Default functional class atom typess to use for generating atom identifiers
 738   # are: HBD, HBA, PI, NI, Ar, Hal
 739   #
 740   @{$This->{FunctionalClassesToUse}} = ();
 741   @{$This->{FunctionalClassesToUse}} = ('HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal');
 742 
 743   return $This;
 744 }
 745 
 746 # Clear cached molecule data...
 747 #
 748 sub _ClearMoleculeDataCache {
 749   my($This) = @_;
 750 
 751   @{$This->{Atoms}} = ();
 752 
 753   return $This;
 754 }
 755 
 756 # Return a string containg data for TopologicalAtomTripletsFingerprints object...
 757 #
 758 sub StringifyTopologicalAtomTripletsFingerprints {
 759   my($This) = @_;
 760   my($FingerprintsString);
 761 
 762   # Type of fingerprint...
 763   $FingerprintsString = "Fingerprint type: $This->{Type}; AtomIdentifierType: $This->{AtomIdentifierType}";
 764 
 765   # Min and max distance...
 766   $FingerprintsString .= "; MinDistance:  $This->{MinDistance}; MaxDistance: $This->{MaxDistance}; UseTriangleInequality: " . ($This->{UseTriangleInequality} ? "Yes" : "No");
 767 
 768   if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
 769     my($AtomicInvariant, @AtomicInvariants, @AtomicInvariantsOrder, %AvailableAtomicInvariants);
 770 
 771     @AtomicInvariantsOrder = AtomTypes::AtomicInvariantsAtomTypes::GetAtomicInvariantsOrder();
 772     %AvailableAtomicInvariants = AtomTypes::AtomicInvariantsAtomTypes::GetAvailableAtomicInvariants();
 773 
 774     for $AtomicInvariant (@AtomicInvariantsOrder) {
 775       push @AtomicInvariants, "$AtomicInvariant: $AvailableAtomicInvariants{$AtomicInvariant}";
 776     }
 777 
 778     $FingerprintsString .= "; AtomicInvariantsToUse: <" . TextUtil::JoinWords(\@{$This->{AtomicInvariantsToUse}}, ", ", 0) . ">";
 779     $FingerprintsString .= "; AtomicInvariantsOrder: <" . TextUtil::JoinWords(\@AtomicInvariantsOrder, ", ", 0) . ">";
 780     $FingerprintsString .= "; AvailableAtomicInvariants: <" . TextUtil::JoinWords(\@AtomicInvariants, ", ", 0) . ">";
 781   }
 782   elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
 783     my($FunctionalClass, @FunctionalClasses, @FunctionalClassesOrder, %AvailableFunctionalClasses);
 784 
 785     @FunctionalClassesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder();
 786     %AvailableFunctionalClasses = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses();
 787 
 788     for $FunctionalClass (@FunctionalClassesOrder) {
 789       push @FunctionalClasses, "$FunctionalClass: $AvailableFunctionalClasses{$FunctionalClass}";
 790     }
 791 
 792     $FingerprintsString .= "; FunctionalClassesToUse: <" . TextUtil::JoinWords(\@{$This->{FunctionalClassesToUse}}, ", ", 0) . ">";
 793     $FingerprintsString .= "; FunctionalClassesOrder: <" . TextUtil::JoinWords(\@FunctionalClassesOrder, ", ", 0) . ">";
 794     $FingerprintsString .= "; AvailableFunctionalClasses: <" . TextUtil::JoinWords(\@FunctionalClasses, ", ", 0) . ">";
 795   }
 796 
 797   # Total number of atom triplets...
 798   $FingerprintsString .= "; NumOfAtomTriplets: " . $This->{FingerprintsVector}->GetNumOfValues();
 799 
 800   # FingerprintsVector...
 801   $FingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >";
 802 
 803   return $FingerprintsString;
 804 }
 805