MayaChemTools

   1 package Fingerprints::TopologicalPharmacophoreAtomPairsFingerprints;
   2 #
   3 # $RCSfile: TopologicalPharmacophoreAtomPairsFingerprints.pm,v $
   4 # $Date: 2015/02/28 20:48:54 $
   5 # $Revision: 1.34 $
   6 #
   7 # Author: Manish Sud <msud@san.rr.com>
   8 #
   9 # Copyright (C) 2015 Manish Sud. All rights reserved.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 use strict;
  30 use Carp;
  31 use Exporter;
  32 use Fingerprints::Fingerprints;
  33 use TextUtil ();
  34 use MathUtil ();
  35 use Molecule;
  36 use AtomTypes::FunctionalClassAtomTypes;
  37 
  38 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  39 
  40 @ISA = qw(Fingerprints::Fingerprints Exporter);
  41 @EXPORT = qw();
  42 @EXPORT_OK = qw();
  43 
  44 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  45 
  46 # Setup class variables...
  47 my($ClassName);
  48 _InitializeClass();
  49 
  50 # Overload Perl functions...
  51 use overload '""' => 'StringifyTopologicalPharmacophoreAtomPairsFingerprints';
  52 
  53 # Class constructor...
  54 sub new {
  55   my($Class, %NamesAndValues) = @_;
  56 
  57   # Initialize object...
  58   my $This = $Class->SUPER::new();
  59   bless $This, ref($Class) || $Class;
  60   $This->_InitializeTopologicalPharmacophoreAtomPairsFingerprints();
  61 
  62   $This->_InitializeTopologicalPharmacophoreAtomPairsFingerprintsProperties(%NamesAndValues);
  63 
  64   return $This;
  65 }
  66 
  67 # Initialize object data...
  68 #
  69 sub _InitializeTopologicalPharmacophoreAtomPairsFingerprints {
  70   my($This) = @_;
  71 
  72   # Type of fingerprint...
  73   $This->{Type} = 'TopologicalPharmacophoreAtomPairs';
  74 
  75   # Type of vector...
  76   $This->{VectorType} = 'FingerprintsVector';
  77 
  78   # AtomPairsSetSizeToUse...
  79   #
  80   # ArbitrarySize - Corrresponds to atom pairs with non-zero count
  81   # FixedSize - Corresponds to all atom pairs with zero and non-zero count
  82   #
  83   # Possible values: ArbitrarySize or FixedSize. Default: ArbitrarySize
  84   #
  85   $This->{AtomPairsSetSizeToUse} = '';
  86 
  87   # Type of FingerprintsVector...
  88   #
  89   # OrderedNumericalValues - For ArbitrarySize value of AtomPairsSetSizeToUse
  90   # NumericalValues - For FixedSize value of AtomPairsSetSizeToUse
  91   #
  92   # Possible values: OrderedNumericalValues or NumericalValues. Default: NumericalValues
  93   #
  94   $This->{FingerprintsVectorType} = '';
  95 
  96   # Vector values precision for real values which might be generated after
  97   # normalization and fuzzification...
  98   $This->{ValuesPrecision} = 2;
  99 
 100   # Minimum and maximum bond distance between pharmacophore atom paris...
 101   $This->{MinDistance} = 1;
 102   $This->{MaxDistance} = 10;
 103 
 104   # Initialize atom types and weight information...
 105   $This->_InitializePharmacophoreAtomTypesAndWeightInformation();
 106 
 107   # Normalization methodology to use for scaling the occurance count of pharmacophore atom
 108   # pairs at various distances.
 109   #
 110   # Possible values: None, ByHeavyAtomsCount, ByAtomTypesCount. Default: None
 111   #
 112   $This->{NormalizationMethodology} = 'None';
 113 
 114   # Initialize fuzzification parameters...
 115   #
 116   $This->_InitializeFuzzificationInformation();
 117 
 118   # Pharmacophore types assigned to each heavy atom...
 119   #
 120   %{$This->{AssignedAtomTypes}} = ();
 121 
 122   # Assigned Atom types count of each type in the molecule...
 123   #
 124   %{$This->{AssignedAtomTypesCount}} = ();
 125 
 126   # All pharmacophore atom pairs between minimum and maximum distance...
 127   #
 128   @{$This->{AtomPairsIDs}} = ();
 129   %{$This->{AtomPairsCount}} = ();
 130 }
 131 
 132 # Inialize pharmacophore atom types and weight information...
 133 #
 134 sub _InitializePharmacophoreAtomTypesAndWeightInformation {
 135   my($This) = @_;
 136 
 137   # Default pharmacophore atom types to use for atom pairs fingerprint generation
 138   # are: HBD, HBA, PI, NI, H
 139   #
 140   @{$This->{AtomTypesToUse}} = ();
 141   @{$This->{AtomTypesToUse}} = sort ('HBD', 'HBA', 'PI', 'NI', 'H');
 142 
 143   # Weight of the various pharmacophore atom types to use for their contribution to atom
 144   # pair interaction. It allows to increase the importance of specific pharmacophore atom
 145   # types in the generted fingerprints.
 146   #
 147   # A value of 0 eliminates the contribution by a particular pharmacophore atom
 148   # type and 2 doubles its contribution.
 149   #
 150   my($AtomType, %AvailableAtomTypes);
 151 
 152   %AvailableAtomTypes = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses();
 153 
 154   %{$This->{AtomTypesWeight}} = ();
 155   for $AtomType (keys %AvailableAtomTypes) {
 156     $This->{AtomTypesWeight}{$AtomType} = 1;
 157   }
 158   return $This;
 159 }
 160 
 161 # Initialize fuzzification information...
 162 #
 163 sub _InitializeFuzzificationInformation {
 164   my($This) = @_;
 165 
 166   # To fuzz or not to fuzz atom pairs count. Default: No fuzzication
 167   #
 168   $This->{FuzzifyAtomPairsCount} = 0;
 169 
 170   # When to fuzz atom pair count...
 171   #
 172   # Possible values: BeforeNormalization or AfterNormalization. Default: AfterNormalization
 173   #
 174   $This->{FuzzificationMode} = 'AfterNormalization';
 175 
 176   # How to fuzz atom pair count...
 177   #
 178   # Possible values: FuzzyBinning or FuzzyBinSmoothing. Default: FuzzyBinning
 179   #
 180   $This->{FuzzificationMethodology} = 'FuzzyBinning';
 181 
 182   # By how much to fuzz atom pairs count...
 183   #
 184   $This->{FuzzFactor} = 0.15;
 185 
 186   return $This;
 187 }
 188 
 189 # Initialize class ...
 190 sub _InitializeClass {
 191   #Class name...
 192   $ClassName = __PACKAGE__;
 193 }
 194 
 195 # Initialize object properties....
 196 sub _InitializeTopologicalPharmacophoreAtomPairsFingerprintsProperties {
 197   my($This, %NamesAndValues) = @_;
 198 
 199   my($Name, $Value, $MethodName);
 200   while (($Name, $Value) = each  %NamesAndValues) {
 201     $MethodName = "Set${Name}";
 202     $This->$MethodName($Value);
 203   }
 204 
 205   # Make sure molecule object was specified...
 206   if (!exists $NamesAndValues{Molecule}) {
 207     croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule...";
 208   }
 209 
 210   $This->_InitializeTopologicalPharmacophoreAtomPairsFingerprintsVector();
 211 
 212   return $This;
 213 }
 214 
 215 # Initialize fingerprints vector...
 216 #
 217 sub _InitializeTopologicalPharmacophoreAtomPairsFingerprintsVector {
 218   my($This) = @_;
 219 
 220   if (!$This->{AtomPairsSetSizeToUse}) {
 221     $This->{AtomPairsSetSizeToUse} =  'ArbitrarySize';
 222   }
 223 
 224   # Vector type and type of values...
 225   $This->{VectorType} = 'FingerprintsVector';
 226 
 227   if ($This->{AtomPairsSetSizeToUse} =~ /^FixedSize$/i) {
 228     $This->{FingerprintsVectorType} = 'OrderedNumericalValues';
 229   }
 230   else {
 231     $This->{FingerprintsVectorType} = 'NumericalValues';
 232   }
 233 
 234   $This->_InitializeFingerprintsVector();
 235 }
 236 
 237 # Set atom parits set size to use...
 238 #
 239 sub SetAtomPairsSetSizeToUse {
 240   my($This, $Value) = @_;
 241 
 242   if ($This->{AtomPairsSetSizeToUse}) {
 243     croak "Error: ${ClassName}->SetAtomPairsSetSizeToUse: Can't change size:  It's already set...";
 244   }
 245 
 246   if ($Value !~ /^(ArbitrarySize|FixedSize)$/i) {
 247     croak "Error: ${ClassName}->SetAtomPairsSetSizeToUse: Unknown AtomPairsSetSizeToUse value: $Value; Supported values: ArbitrarySize or FixedSize";
 248   }
 249 
 250   $This->{AtomPairsSetSizeToUse} = $Value;
 251 
 252   return $This;
 253 }
 254 
 255 # Disable change of AvailableAtomTypes...
 256 #
 257 sub SetAvailableAtomTypes {
 258   my($This) = @_;
 259 
 260   carp "Warning: ${ClassName}->SetAvailableAtomTypes: AvailableAtomTypes value can't be set...";
 261 
 262   return $This;
 263 }
 264 
 265 # Set atom types to use for atom pairs...
 266 #
 267 sub SetAtomTypesToUse {
 268   my($This, @Values) = @_;
 269   my($FirstValue, $TypeOfFirstValue, $AtomType, $SpecifiedAtomType, @SpecifiedAtomTypes, @AtomTypesToUse);
 270 
 271   if (!@Values) {
 272     carp "Warning: ${ClassName}->SetAtomTypesToUse: No values specified...";
 273     return;
 274   }
 275 
 276   $FirstValue = $Values[0];
 277   $TypeOfFirstValue = ref $FirstValue;
 278 
 279   @SpecifiedAtomTypes = ();
 280   @AtomTypesToUse = ();
 281 
 282   if ($TypeOfFirstValue =~ /^ARRAY/) {
 283     push @SpecifiedAtomTypes, @{$FirstValue};
 284   }
 285   else {
 286     push @SpecifiedAtomTypes, @Values;
 287   }
 288 
 289   # Make sure specified AtomTypes are valid...
 290   for $SpecifiedAtomType (@SpecifiedAtomTypes) {
 291     if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedAtomType)) {
 292       croak "Error: ${ClassName}->SetAtomTypesToUse: Specified atom type, $SpecifiedAtomType, is not supported...\n ";
 293     }
 294     $AtomType = $SpecifiedAtomType;
 295     push @AtomTypesToUse, $AtomType;
 296   }
 297 
 298   # Set atom types to use...
 299   @{$This->{AtomTypesToUse}} = ();
 300   push @{$This->{AtomTypesToUse}}, sort @AtomTypesToUse;
 301 
 302   return $This;
 303 }
 304 
 305 # Set vector values precision for real values which might be generated after
 306 # normalization and fuzzification...
 307 #
 308 sub SetValuesPrecision {
 309   my($This, $Value) = @_;
 310 
 311   if (!TextUtil::IsPositiveInteger($Value)) {
 312     croak "Error: ${ClassName}->SetValuesPrecision: ValuesPrecision value, $Value, is not valid:  It must be a positive integer...";
 313   }
 314   $This->{ValuesPrecision} = $Value;
 315 
 316   return $This;
 317 }
 318 
 319 # Set minimum distance for pharmacophore atom pairs...
 320 #
 321 sub SetMinDistance {
 322   my($This, $Value) = @_;
 323 
 324   if (!TextUtil::IsInteger($Value)) {
 325     croak "Error: ${ClassName}->SetMinDistance: MinDistance value, $Value, is not valid:  It must be an integer...";
 326   }
 327   $This->{MinDistance} = $Value;
 328 
 329   return $This;
 330 }
 331 
 332 # Set maximum distance for pharmacophore atom pairs...
 333 #
 334 sub SetMaxDistance {
 335   my($This, $Value) = @_;
 336 
 337   if (!TextUtil::IsPositiveInteger($Value)) {
 338     croak "Error: ${ClassName}->SetMaxDistance: MaxDistance value, $Value, is not valid:  It must be a positive integer...";
 339   }
 340   $This->{MaxDistance} = $Value;
 341 
 342   return $This;
 343 }
 344 
 345 # Set normalization methodology to use for scaling the occurance count of pharmacophore atom
 346 # pairs over distance range beween minimum and maximum distance.
 347 #
 348 sub SetNormalizationMethodology {
 349   my($This, $Value) = @_;
 350 
 351   if ($Value !~ /^(ByHeavyAtomsCount|ByAtomTypesCount|None)$/i) {
 352     croak "Error: ${ClassName}->SetNormalizationMethodology: NormalizationMethodology value, $Value, is not valid. Supported values: None, ByHeavyAtomsCount or ByAtomTypesCount...";
 353   }
 354 
 355   $This->{NormalizationMethodology} = $Value;
 356 
 357   return $This;
 358 }
 359 
 360 # Set weight of the various pharmacophore atom types to use for their contribution to atom
 361 # pair interaction using atom types label and value hash.
 362 #
 363 # It allows to increase the importance of specific pharmacophore atom
 364 # types in the generted fingerprints.
 365 #
 366 # A value of 0 eliminates the contribution by a particular pharmacophore atom
 367 # type and 2 doubles its contribution.
 368 #
 369 sub SetAtomTypesWeight {
 370   my($This, %AtomTypesWeight) = @_;
 371   my($AtomType, $Weight);
 372 
 373   while (($AtomType, $Weight) = each %AtomTypesWeight) {
 374     if (!exists $This->{AtomTypesWeight}{$AtomType}) {
 375       croak "Error: ${ClassName}->SetAtomTypesWeight: AtomTypeWeight for $AtomType couldn't be set: Unknown atom type...";
 376     }
 377     if (!(TextUtil::IsFloat($Weight) && ($Weight >= 0))) {
 378       croak "Error: ${ClassName}->SetAtomTypesWeight: Specified weight value, $Weight, for AtomType, $AtomType, muts be >= 0...";
 379     }
 380     $This->{AtomTypesWeight}{$AtomType}  = $Weight;
 381   }
 382 }
 383 
 384 # Set fuzzification methodology to use for fuzzifying atom pairs count...
 385 #
 386 sub SetFuzzificationMethodology {
 387   my($This, $Value) = @_;
 388 
 389   if ($Value !~ /^(FuzzyBinning|FuzzyBinSmoothing)$/i) {
 390     croak "Error: ${ClassName}->SetFuzzificationMethodology: FuzzificationMethodology value, $Value, is not valid. Supported values: FuzzyBinning or FuzzyBinSmoothing...";
 391   }
 392 
 393   $This->{FuzzificationMethodology} = $Value;
 394 
 395   return $This;
 396 }
 397 
 398 # Set fuzzification mode for fuzzifying atom pairs count...
 399 #
 400 sub SetFuzzificationMode {
 401   my($This, $Value) = @_;
 402 
 403   if ($Value !~ /^(BeforeNormalization|AfterNormalization)$/i) {
 404     croak "Error: ${ClassName}->SetFuzzificationMode: FuzzificationMode value, $Value, is not valid. Supported values: BeforeNormalization or AfterNormalization...";
 405   }
 406 
 407   $This->{FuzzificationMode} = $Value;
 408 
 409   return $This;
 410 }
 411 
 412 # Set fuzz factor values used for fuzzifying atom pairs count...
 413 #
 414 sub SetFuzzFactor {
 415   my($This, $Value) = @_;
 416 
 417   if ($This->{FuzzificationMethodology} =~ /^FuzzyBinning$/i) {
 418     if (!(TextUtil::IsFloat($Value) && $Value >=0 && $Value <= 1.0)) {
 419       croak "Error: ${ClassName}->SetFuzzFactor: Specified fuzz factor value, $Value, must be >= 0 and <= 1...";
 420     }
 421   }
 422   elsif ($This->{FuzzificationMethodology} =~ /^FuzzyBinSmoothing$/i) {
 423     if (!(TextUtil::IsFloat($Value) && $Value >=0 && $Value <= 0.5)) {
 424       croak "Error: ${ClassName}->SetFuzzFactor: Specified fuzz factor value, $Value, must be >= 0 and <= 0.5...";
 425     }
 426   }
 427   else {
 428     croak "Error: ${ClassName}->SetFuzzFactor: Fuzz factor value can't be changed: Uknown FuzzificationMethodology: $This->{FuzzificationMethodology}...";
 429   }
 430 
 431   $This->{FuzzFactor} = $Value;
 432 
 433   return $This;
 434 }
 435 
 436 # Generate fingerprints description...
 437 #
 438 sub GetDescription {
 439   my($This) = @_;
 440 
 441   # Is description explicity set?
 442   if (exists $This->{Description}) {
 443     return $This->{Description};
 444   }
 445 
 446   # Generate fingerprints description...
 447 
 448   return "$This->{Type}:$This->{AtomPairsSetSizeToUse}:MinDistance$This->{MinDistance}:MaxDistance$This->{MaxDistance}";
 449 }
 450 
 451 # Generate topological pharmacophore atom pairs [ Ref 60-62, Ref 65, Ref 68 ] fingerprints...
 452 #
 453 # Methodology:
 454 #   . Generate a distance matrix.
 455 #   . Assign pharmacophore atom types to all the atoms.
 456 #   . Initialize pharmacophore atom pairs basis set for all unique pairs between
 457 #     minimum and maximum distance.
 458 #   . Using distance matrix and pharmacophore atom types, count occurance of
 459 #     unique atom pairs between specified distance range - It corresponds to the
 460 #     correlation-vector for the atom pairs.
 461 #       . Weigh contribution of each atom type to atom pair interaction by its specified
 462 #         weight during occurance count.
 463 #       . Assign count to appropriate distance bin for a specific atom pair
 464 #
 465 #   . Normalize occurance count of pharmacophore atom pairs by heavy atom count
 466 #     or sum of AtomTypeCounts of each pharmacophore atom type in the atom pair
 467 #     at a specific distance.
 468 #
 469 #   . Fuzzify occurance count of pharmacophore atom pairs using FuzzyBinning or
 470 #     FuzzySmothing methodology.
 471 #
 472 # Notes:
 473 #   . Hydrogen atoms are ignored during the fingerprint generation.
 474 #
 475 sub GenerateFingerprints {
 476   my($This) = @_;
 477 
 478   if ($This->{MinDistance} > $This->{MaxDistance}) {
 479     croak "Error: ${ClassName}->GenerateTopologicalPharmacophoreAtomPairsFingerprints: No fingerpritns generated: MinDistance, $This->{MinDistance}, must be <= MaxDistance, $This->{MaxDistance}...";
 480   }
 481 
 482   # Cache appropriate molecule data...
 483   $This->_SetupMoleculeDataCache();
 484 
 485   # Generate distance matrix...
 486   if (!$This->_SetupDistanceMatrix()) {
 487     carp "Warning: ${ClassName}->GenerateFingerprints: Fingerprints generation didn't succeed: Couldn't generate distance matrix...";
 488     return $This;
 489   }
 490 
 491   # Assign pharmacohore atom types to all heavy atoms...
 492   $This->_AssignPharmacophoreAtomTypes();
 493 
 494   # Initialize values of all possible pharmacohore atom pairs...
 495   $This->_InitializePharmacophoreAtomPairs();
 496 
 497   # Count atom pairs...
 498   $This->_CountPharmacohoreAtomPairs();
 499 
 500   # Fuzzify atom pairs count...
 501   if ($This->{FuzzificationMode} =~ /^BeforeNormalization$/i) {
 502     $This->_FuzzifyPharmacohoreAtomPairsCount();
 503   }
 504 
 505   # Normalize atom pairs count...
 506   $This->_NormalizePharmacohoreAtomPairsCount();
 507 
 508   # Fuzzify atom pairs count...
 509   if ($This->{FuzzificationMode} =~ /^AfterNormalization$/i) {
 510     $This->_FuzzifyPharmacohoreAtomPairsCount();
 511   }
 512 
 513   # Set final fingerprints...
 514   $This->_SetFinalFingerprints();
 515 
 516   # Clear cached molecule data...
 517   $This->_ClearMoleculeDataCache();
 518 
 519   return $This;
 520 }
 521 
 522 # Setup distance matrix...
 523 #
 524 sub _SetupDistanceMatrix {
 525   my($This) = @_;
 526 
 527   $This->{DistanceMatrix} = $This->GetMolecule()->GetDistanceMatrix();
 528 
 529   if (!$This->{DistanceMatrix}) {
 530     return undef;
 531   }
 532 
 533   return $This;
 534 }
 535 
 536 # Assign pharmacohore atom types to all heavy atoms and count each atom
 537 # types assigned...
 538 #
 539 sub _AssignPharmacophoreAtomTypes {
 540   my($This) = @_;
 541   my($Atom, $AtomID, $AtomType, $AssignedAtomType, $FunctionalClassAtomTypes);
 542 
 543   # Assign topological pharmacophore atom types...
 544   $FunctionalClassAtomTypes = new AtomTypes::FunctionalClassAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => 1, 'FunctionalClassesToUse' => $This->{AtomTypesToUse});
 545   $FunctionalClassAtomTypes->AssignAtomTypes();
 546 
 547   %{$This->{AssignedAtomTypes}} = ();
 548 
 549   # Initialize assigned atom types count...
 550   %{$This->{AssignedAtomTypesCount}} = ();
 551   for $AtomType (@{$This->{AtomTypesToUse}}) {
 552     $This->{AssignedAtomTypesCount}{$AtomType} = 0;
 553   }
 554 
 555   $This->{HeavyAtomCount} = 0;
 556 
 557   ATOM: for $Atom (@{$This->{Atoms}}) {
 558     if ($Atom->IsHydrogen()) {
 559       next ATOM;
 560     }
 561     $This->{HeavyAtomCount} += 1;
 562 
 563     $AtomID = $Atom->GetID();
 564 
 565     # Collect all possible pharmacophore atom types which could be assigned to atom...
 566     my(@AtomTypes);
 567 
 568     @AtomTypes = ();
 569     $AssignedAtomType = $FunctionalClassAtomTypes->GetAtomType($Atom);
 570     if ($AssignedAtomType && $AssignedAtomType !~ /^None$/i) {
 571       push @AtomTypes, split /\./, $AssignedAtomType;
 572       for $AtomType (@AtomTypes) {
 573         $This->{AssignedAtomTypesCount}{$AtomType} += 1;
 574       }
 575     }
 576 
 577     # Assign phramacophore types to atom...
 578     $AtomID = $Atom->GetID();
 579     $This->{AssignedAtomTypes}{$AtomID} = \@AtomTypes;
 580   }
 581   return $This;
 582 }
 583 
 584 # Initialize values of all possible pharmacohore atom pairs...
 585 #
 586 # Let:
 587 #   Dmin = Minimum distance correspoding to number of bonds between two atoms
 588 #   Dmax = Maximum distance correspoding to number of bonds between two atoms
 589 #   D = Distance correspoding to number of bonds between two atoms
 590 #
 591 #   P = Number of pharmacophore atom types to consider
 592 #   PPDn = Number of possible unique pharmacophore atom pairs at a distance Dn
 593 #
 594 #   PPT = Total number of possible pharmacophore atom pairs at all distances between Dmin and Dmax
 595 #
 596 # Then:
 597 #
 598 #   PPD =  (P * (P - 1))/2 + P
 599 #
 600 #   PPT = ((Dmax - Dmin) + 1) * ((P * (P - 1))/2 + P)
 601 #       = ((Dmax - Dmin) + 1) * PPD
 602 #
 603 #
 604 # So for default values of Dmin = 1, Dmax = 10 and P = 5,
 605 #
 606 #   PPD =  (5 * (5 - 1))/2 + 5 = 15
 607 #   PPT = ((10 - 1) + 1) * 15 = 150
 608 #
 609 # the pharmacophore atom pairs bais set includes 150 values.
 610 #
 611 sub _InitializePharmacophoreAtomPairs {
 612   my($This) = @_;
 613   my($Distance, $Index1, $Index2, $AtomType1, $AtomType2);
 614 
 615   %{$This->{AtomPairsCount}} = ();
 616 
 617   for $Distance ($This->{MinDistance} .. $This->{MaxDistance}) {
 618     %{$This->{AtomPairsCount}{$Distance}} = ();
 619 
 620     for $Index1 (0 .. $#{$This->{AtomTypesToUse}}) {
 621       $AtomType1 = $This->{AtomTypesToUse}[$Index1];
 622       %{$This->{AtomPairsCount}{$Distance}{$AtomType1}} = ();
 623 
 624       for $Index2 ($Index1 .. $#{$This->{AtomTypesToUse}}) {
 625         $AtomType2 = $This->{AtomTypesToUse}[$Index2];
 626         $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} = 0;
 627       }
 628     }
 629   }
 630   return $This;
 631 }
 632 
 633 # Count pharmacophore atom pairs between mininum and maximum distance at each
 634 # distance using distance matrix and pharmacophore atom types assiged to each heavy
 635 # atom.
 636 #
 637 # Let:
 638 #   Px = Pharmacophore atom type x
 639 #   Py = Pharmacophore atom type y
 640 #   Dn = Distance between Px and Py in specified distance range
 641 #
 642 # Then:
 643 #   Px-Dn-Py = Pharmacophore atom pair ID for atom types Px and Py at distance Dn
 644 #
 645 # For example: H-D1-H, H-D2-HBA, PI-D5-PI and so on
 646 #
 647 # Notes:
 648 #   . The row and column indices of distance matrix correspond to atom indices.
 649 #   . Distance value of BigNumber implies the atom is not connected to any other atom.
 650 #   . Due to symmetric nature of distance matrix, only upper or lower triangular matrix
 651 #     needs to be processed during identification and count of pharmacophore atom pairs.
 652 #
 653 sub _CountPharmacohoreAtomPairs {
 654   my($This) = @_;
 655   my($NumOfRows, $NumOfCols, $RowIndex, $ColIndex, $DistanceMatrix, $Distance, $AtomID1, $AtomID2, $AtomType1, $AtomType2, $SkipIndexCheck, $CountIncrement);
 656 
 657   $DistanceMatrix = $This->{DistanceMatrix};
 658   ($NumOfRows, $NumOfCols) = $DistanceMatrix->GetSize();
 659   $SkipIndexCheck = 0;
 660 
 661   ROWINDEX: for $RowIndex (0 .. ($NumOfRows - 1) ) {
 662     $AtomID1 = $This->{AtomIndexToID}{$RowIndex};
 663     if ( !((exists($This->{AssignedAtomTypes}{$AtomID1}) && @{$This->{AssignedAtomTypes}{$AtomID1}})) ) {
 664       next ROWINDEX;
 665     }
 666 
 667     COLINDEX: for $ColIndex ($RowIndex .. ($NumOfCols - 1) ) {
 668       $AtomID2 = $This->{AtomIndexToID}{$ColIndex};
 669       if ( !((exists($This->{AssignedAtomTypes}{$AtomID2}) && @{$This->{AssignedAtomTypes}{$AtomID2}})) ) {
 670         next COLINDEX;
 671       }
 672 
 673       $Distance = $DistanceMatrix->GetValue($RowIndex, $ColIndex, $SkipIndexCheck);
 674       if ($Distance < $This->{MinDistance} || $Distance > $This->{MaxDistance}) {
 675         next COLINDEX;
 676       }
 677 
 678       ATOMTYPE1: for $AtomType1 (@{$This->{AssignedAtomTypes}{$AtomID1}}) {
 679         if ($This->{AtomTypesWeight}{$AtomType1} == 0) {
 680           next ATOMTYPE1;
 681         }
 682         ATOMTYPE2: for $AtomType2 (@{$This->{AssignedAtomTypes}{$AtomID2}}) {
 683           if ($This->{AtomTypesWeight}{$AtomType2} == 0) {
 684             next ATOMTYPE2;
 685           }
 686           $CountIncrement = $This->{AtomTypesWeight}{$AtomType1} * $This->{AtomTypesWeight}{$AtomType2};
 687           if ($AtomType1 le $AtomType2) {
 688             $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} += $CountIncrement;
 689           }
 690           else {
 691             $This->{AtomPairsCount}{$Distance}{$AtomType2}{$AtomType1} += $CountIncrement;
 692           }
 693         }
 694       }
 695     }
 696   }
 697   return $This;
 698 }
 699 
 700 # Normalize the occurance count of pharmacophore atom pairs over the specified distance
 701 # range...
 702 #
 703 sub _NormalizePharmacohoreAtomPairsCount {
 704   my($This) = @_;
 705 
 706   METHODOLOGY: {
 707     if ($This->{NormalizationMethodology} =~ /^None$/i) {
 708       last METHODOLOGY;
 709     }
 710     if ($This->{NormalizationMethodology} =~ /^ByHeavyAtomsCount$/i) {
 711       $This->_NormalizeAtomPairsCountByHeavyAtomsCount();
 712       last METHODOLOGY;
 713     }
 714     if ($This->{NormalizationMethodology} =~ /^ByAtomTypesCount$/i) {
 715       $This->_NormalizeAtomPairsCountByAtomTypesCount();
 716       last METHODOLOGY;
 717     }
 718     croak "Error: ${ClassName}->_NormalizePharmacohoreAtomPairsCount: Unknown NormalizationMethodology: $This->{NormalizationMethodology}...";
 719   }
 720   return $This;
 721 }
 722 
 723 
 724 # Normalize the occurance count of pharmacophore atom pairs at various distances by
 725 # heavy atom count...
 726 #
 727 sub _NormalizeAtomPairsCountByHeavyAtomsCount {
 728   my($This) = @_;
 729   my($Distance, $AtomType1, $AtomType2);
 730 
 731   if ($This->{HeavyAtomCount} == 0) {
 732     return $This;
 733   }
 734 
 735   for $Distance (keys %{$This->{AtomPairsCount}} ) {
 736     for $AtomType1 (keys %{$This->{AtomPairsCount}{$Distance}} ) {
 737       ATOMTYPE2: for $AtomType2 (keys %{$This->{AtomPairsCount}{$Distance}{$AtomType1}} ) {
 738         if ($This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} == 0) {
 739           next ATOMTYPE2;
 740         }
 741         $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} /= $This->{HeavyAtomCount};
 742       }
 743     }
 744   }
 745   return $This;
 746 }
 747 
 748 # Normalize the occurance count of pharmacophore atom pairs at various distances by
 749 # dividing it using sum of the count of each pharmacophore atom type present in the
 750 # molecule for the corresponding atom pair.
 751 #
 752 sub _NormalizeAtomPairsCountByAtomTypesCount {
 753   my($This) = @_;
 754   my($Distance, $AtomType1, $AtomType2, $AtomType1Count, $AtomType2Count, $NormalizationFactor);
 755 
 756   for $Distance (keys %{$This->{AtomPairsCount}} ) {
 757     for $AtomType1 (keys %{$This->{AtomPairsCount}{$Distance}} ) {
 758       ATOMTYPE2: for $AtomType2 (keys %{$This->{AtomPairsCount}{$Distance}{$AtomType1}} ) {
 759         if ($This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} == 0) {
 760           next ATOMTYPE2;
 761         }
 762         $NormalizationFactor = $This->{AssignedAtomTypesCount}{$AtomType1} + $This->{AssignedAtomTypesCount}{$AtomType2};
 763         $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} /= $NormalizationFactor;
 764       }
 765     }
 766   }
 767   return $This;
 768 }
 769 
 770 # Fuzzify pharmacophore atom pairs count...
 771 #
 772 # Let:
 773 #   Px = Pharmacophore atom type x
 774 #   Py = Pharmacophore atom type y
 775 #
 776 #   PPxy = Pharmacophore atom pair between atom type Px and Py
 777 #
 778 #   PPxyDn = Pharmacophore atom pairs count between atom type Px and Py at distance Dn
 779 #   PPxyDn-1 = Pharmacophore atom pairs count between atom type Px and Py at distance Dn - 1
 780 #   PPxyDn+1 = Pharmacophore atom pairs count between atom type Px and Py at distance Dn + 1
 781 #
 782 #   FF = FuzzFactor for FuzzyBinning and FuzzyBinSmoothing
 783 #
 784 # Then:
 785 #
 786 # For FuzzyBinning:
 787 #
 788 #   PPxyDn = PPxyDn (Unchanged)
 789 #
 790 #   PPxyDn-1 = PPxyDn-1 + PPxyDn * FF
 791 #   PPxyDn+1 = PPxyDn+1 + PPxyDn * FF
 792 #
 793 # For FuzzyBinSmoothing:
 794 #
 795 #   PPxyDn = PPxyDn - PPxyDn * 2FF for Dmin < Dn < Dmax
 796 #   PPxyDn = PPxyDn - PPxyDn * FF for Dn = Dmin or Dmax
 797 #
 798 #   PPxyDn-1 = PPxyDn-1 + PPxyDn * FF
 799 #   PPxyDn+1 = PPxyDn+1 + PPxyDn * FF
 800 #
 801 # In both fuzzification schemes, a value of 0 for FF implies no fuzzification of occurance counts.
 802 # A value of 1 during FuzzyBinning corresponds to maximum fuzzification of occurance counts;
 803 # however, a value of 1 during FuzzyBinSmoothing ends up completely distributing the value over
 804 # the previous and next distance bins.
 805 #
 806 # So for default value of FuzzFactor (FF) 0.15, the occurance count of pharmacohore atom pairs
 807 # at distance Dn during FuzzyBinning is left unchanged and the counts at distances Dn -1 and Dn + 1
 808 # are incremened by PPxyDn * 0.15.
 809 #
 810 # And during FuzzyBinSmoothing the occurance counts at Distance Dn is scaled back using multiplicate
 811 # factor of (1 - 2*0.15) and the occurance counts at distances Dn -1 and Dn + 1 are incremened by
 812 # PPxyDn * 0.15. In otherwords, occurance bin count is smoothed out by distributing it over the
 813 # previous and next distance value.
 814 #
 815 sub _FuzzifyPharmacohoreAtomPairsCount {
 816   my($This) = @_;
 817   my($Index1, $Index2, $AtomType1, $AtomType2, $CurrentDistance, $CurrentCount, $NextDistance, $NextCount, $PreviousDistance, $ModifyCurrentCount, $ChangeInCountValue);
 818 
 819   if (!($This->{FuzzifyAtomPairsCount} && $This->{FuzzFactor} > 0)) {
 820     return $This;
 821   }
 822 
 823   $ModifyCurrentCount = ($This->{FuzzificationMethodology} =~ /^FuzzyBinSmoothing$/i) ? 1 : 0;
 824 
 825   for $Index1 (0 .. $#{$This->{AtomTypesToUse}}) {
 826     $AtomType1 = $This->{AtomTypesToUse}[$Index1];
 827     for $Index2 ($Index1 .. $#{$This->{AtomTypesToUse}}) {
 828       $AtomType2 = $This->{AtomTypesToUse}[$Index2];
 829 
 830       $CurrentCount = 0; $NextCount = 0;
 831 
 832       $NextDistance = $This->{MinDistance};
 833       $NextCount = $This->{AtomPairsCount}{$NextDistance}{$AtomType1}{$AtomType2};
 834 
 835       DISTANCE: for $CurrentDistance ($This->{MinDistance} .. $This->{MaxDistance}) {
 836         $NextDistance = $CurrentDistance + 1;
 837         $PreviousDistance = $CurrentDistance - 1;
 838 
 839         $CurrentCount = $NextCount;
 840         $NextCount = ($CurrentDistance < $This->{MaxDistance}) ? $This->{AtomPairsCount}{$NextDistance}{$AtomType1}{$AtomType2} : 0;
 841 
 842         if ($CurrentCount == 0) {
 843           # No contribution to fuzzy binning from this distance...
 844           next DISTANCE;
 845         }
 846 
 847         $ChangeInCountValue = $CurrentCount * $This->{FuzzFactor};
 848 
 849         if ($CurrentDistance > $This->{MinDistance}) {
 850           # Increment count at previous distance...
 851           $This->{AtomPairsCount}{$PreviousDistance}{$AtomType1}{$AtomType2} += $ChangeInCountValue;
 852         }
 853 
 854         if ($ModifyCurrentCount) {
 855           # Decrement count at current distance for FuzzyBinSmoothing...
 856           if ($CurrentDistance > $This->{MinDistance} && $CurrentDistance < $This->{MaxDistance}) {
 857             $This->{AtomPairsCount}{$CurrentDistance}{$AtomType1}{$AtomType2} -= 2 * $ChangeInCountValue;
 858           }
 859           else {
 860             $This->{AtomPairsCount}{$CurrentDistance}{$AtomType1}{$AtomType2} -= $ChangeInCountValue;
 861           }
 862         }
 863 
 864         if ($CurrentDistance < $This->{MaxDistance}) {
 865           # Increment count at next distance...
 866           $This->{AtomPairsCount}{$NextDistance}{$AtomType1}{$AtomType2} += $ChangeInCountValue;
 867         }
 868       }
 869     }
 870   }
 871   return $This;
 872 }
 873 
 874 # Set final fingerpritns vector...
 875 #
 876 sub _SetFinalFingerprints {
 877   my($This) = @_;
 878   my($Distance, $Index1, $Index2, $AtomType1, $AtomType2, $Value, $RoundOffValues, $ValuesPrecision, $UseArbitrarySetSize, @Values);
 879 
 880   # Mark successful generation of fingerprints...
 881   $This->{FingerprintsGenerated} = 1;
 882 
 883   @Values = ();
 884   @{$This->{AtomPairsIDs}} = ();
 885 
 886   # Do values need to be rounded off?
 887   $RoundOffValues = (($This->{NormalizationMethodology} !~ /^None$/i) || ($This->{FuzzifyAtomPairsCount})) ? 1 : 0;
 888   $ValuesPrecision = $This->{ValuesPrecision};
 889 
 890   # Is it an ArbitraySize atom pairs set size?
 891   $UseArbitrarySetSize = $This->{AtomPairsSetSizeToUse} =~ /^ArbitrarySize$/i ? 1 : 0;
 892 
 893   # Collect all atom paris count values...
 894   for $Distance ($This->{MinDistance} .. $This->{MaxDistance}) {
 895     for $Index1 (0 .. $#{$This->{AtomTypesToUse}}) {
 896       $AtomType1 = $This->{AtomTypesToUse}[$Index1];
 897       INDEX2: for $Index2 ($Index1 .. $#{$This->{AtomTypesToUse}}) {
 898         $AtomType2 = $This->{AtomTypesToUse}[$Index2];
 899 
 900         # Atom pair count...
 901         $Value = $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2};
 902         if ($RoundOffValues) {
 903           $Value = MathUtil::round($Value, $This->{ValuesPrecision}) + 0;
 904         }
 905 
 906         # Ignore or not to ignore...
 907         if ($UseArbitrarySetSize && $Value == 0) {
 908           next INDEX2;
 909         }
 910 
 911         push @{$This->{AtomPairsIDs}}, "${AtomType1}-D${Distance}-${AtomType2}";
 912         push @Values, $Value;
 913       }
 914     }
 915   }
 916 
 917   # Add AtomPairsIDs and count values to fingerprint vector...
 918   $This->{FingerprintsVector}->AddValueIDs(\@{$This->{AtomPairsIDs}});
 919   $This->{FingerprintsVector}->AddValues(\@Values);
 920 
 921   return $This;
 922 }
 923 
 924 # Get pharmacophore atom pair IDs corresponding to atom pairs count values in
 925 # fingerprint vector as an array or reference to an array...
 926 #
 927 # AtomPairIDs list  is generated during finalization  of fingerprints  and the fingerprint
 928 # vector containing count values matches the atom pairs array.
 929 #
 930 #
 931 sub GetAtomPairIDs {
 932   my($This) = @_;
 933 
 934   return wantarray ? @{$This->{AtomPairsIDs}} : \@{$This->{AtomPairsIDs}};
 935 }
 936 
 937 # Cache  appropriate molecule data...
 938 #
 939 sub _SetupMoleculeDataCache {
 940   my($This) = @_;
 941 
 942   # Get all atoms including hydrogens to correctly map atom indices to atom IDs for
 943   # usage of distance matrix. The hydrogen atoms are ignored during processing...
 944   #
 945   @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms();
 946 
 947   # Get all atom IDs...
 948   my(@AtomIDs);
 949   @AtomIDs = ();
 950   @AtomIDs =  map { $_->GetID() } @{$This->{Atoms}};
 951 
 952   # Set AtomIndex to AtomID hash...
 953   %{$This->{AtomIndexToID}} = ();
 954   @{$This->{AtomIndexToID}}{ (0 .. $#AtomIDs) } = @AtomIDs;
 955 
 956   return $This;
 957 }
 958 
 959 # Clear cached molecule data...
 960 #
 961 sub _ClearMoleculeDataCache {
 962   my($This) = @_;
 963 
 964   @{$This->{Atoms}} = ();
 965 
 966   return $This;
 967 }
 968 
 969 
 970 # Return a string containg data for TopologicalPharmacophoreAtomPairsFingerprints object...
 971 sub StringifyTopologicalPharmacophoreAtomPairsFingerprints {
 972   my($This) = @_;
 973   my($FingerprintsString);
 974 
 975   # Type of fingerprint...
 976   $FingerprintsString = "Fingerprint type: $This->{Type}; AtomPairsSetSizeToUse: $This->{AtomPairsSetSizeToUse}";
 977 
 978   # Min and max distance...
 979   $FingerprintsString .= "; MinDistance:  $This->{MinDistance}; MaxDistance: $This->{MaxDistance}";
 980 
 981   # Pharmacophore type labels and description...
 982   my($AtomType, @AtomTypes, @AtomTypesOrder, %AvailableAtomTypes);
 983 
 984   @AtomTypesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder();
 985   %AvailableAtomTypes = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses();
 986 
 987   @AtomTypes = ();
 988   for $AtomType (@AtomTypesOrder) {
 989     push @AtomTypes, "$AtomType: $AvailableAtomTypes{$AtomType}";
 990   }
 991 
 992   $FingerprintsString .= "; AtomTypesToUse: <" . TextUtil::JoinWords(\@{$This->{AtomTypesToUse}}, ", ", 0) . ">";
 993   $FingerprintsString .= "; AtomTypesOrder: <" . TextUtil::JoinWords(\@AtomTypesOrder, ", ", 0) . ">";
 994   $FingerprintsString .= "; AvailableAtomTypes: <" . TextUtil::JoinWords(\@AtomTypes, ", ", 0) . ">";
 995 
 996   # Normalization method...
 997   $FingerprintsString .= "; NormalizationMethodology: $This->{NormalizationMethodology}";
 998 
 999   # Weights...
1000   my($FirstLabel, $Label, $Weight);
1001 
1002   $FingerprintsString .= "; AtomTypesWeight <Labels: Weight>: <";
1003   $FirstLabel = 1;
1004   for $Label (sort @{$This->{AtomTypesToUse}}) {
1005     $Weight = $This->{AtomTypesWeight}{$Label};
1006     if ($FirstLabel) {
1007       $FirstLabel = 0;
1008       $FingerprintsString .= " ${Label}: ${Weight}";
1009     }
1010     else {
1011       $FingerprintsString .= "; ${Label}: ${Weight}";
1012     }
1013   }
1014   $FingerprintsString .= ">";
1015 
1016   # Fuzzification of count...
1017   my($FuzzifyFlag);
1018   $FuzzifyFlag = $This->{FuzzifyAtomPairsCount} ? "Yes" : "No";
1019   $FingerprintsString .= "; FuzzifyAtomPairsCount: $FuzzifyFlag; FuzzificationMode: $This->{FuzzificationMode}; FuzzificationMethodology: $This->{FuzzificationMethodology}; FuzzFactor: $This->{FuzzFactor}";
1020 
1021   # Total number of pharmacophore atom pairs...
1022   $FingerprintsString .= "; NumOfAtomPairs: " . $This->{FingerprintsVector}->GetNumOfValues();
1023 
1024   # FingerprintsVector...
1025   $FingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >";
1026 
1027   return $FingerprintsString;
1028 }
1029