1 package Fingerprints::TopologicalPharmacophoreAtomPairsFingerprints; 2 # 3 # $RCSfile: TopologicalPharmacophoreAtomPairsFingerprints.pm,v $ 4 # $Date: 2015/02/28 20:48:54 $ 5 # $Revision: 1.34 $ 6 # 7 # Author: Manish Sud <msud@san.rr.com> 8 # 9 # Copyright (C) 2015 Manish Sud. All rights reserved. 10 # 11 # This file is part of MayaChemTools. 12 # 13 # MayaChemTools is free software; you can redistribute it and/or modify it under 14 # the terms of the GNU Lesser General Public License as published by the Free 15 # Software Foundation; either version 3 of the License, or (at your option) any 16 # later version. 17 # 18 # MayaChemTools is distributed in the hope that it will be useful, but without 19 # any warranty; without even the implied warranty of merchantability of fitness 20 # for a particular purpose. See the GNU Lesser General Public License for more 21 # details. 22 # 23 # You should have received a copy of the GNU Lesser General Public License 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 26 # Boston, MA, 02111-1307, USA. 27 # 28 29 use strict; 30 use Carp; 31 use Exporter; 32 use Fingerprints::Fingerprints; 33 use TextUtil (); 34 use MathUtil (); 35 use Molecule; 36 use AtomTypes::FunctionalClassAtomTypes; 37 38 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 39 40 @ISA = qw(Fingerprints::Fingerprints Exporter); 41 @EXPORT = qw(); 42 @EXPORT_OK = qw(); 43 44 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); 45 46 # Setup class variables... 47 my($ClassName); 48 _InitializeClass(); 49 50 # Overload Perl functions... 51 use overload '""' => 'StringifyTopologicalPharmacophoreAtomPairsFingerprints'; 52 53 # Class constructor... 54 sub new { 55 my($Class, %NamesAndValues) = @_; 56 57 # Initialize object... 58 my $This = $Class->SUPER::new(); 59 bless $This, ref($Class) || $Class; 60 $This->_InitializeTopologicalPharmacophoreAtomPairsFingerprints(); 61 62 $This->_InitializeTopologicalPharmacophoreAtomPairsFingerprintsProperties(%NamesAndValues); 63 64 return $This; 65 } 66 67 # Initialize object data... 68 # 69 sub _InitializeTopologicalPharmacophoreAtomPairsFingerprints { 70 my($This) = @_; 71 72 # Type of fingerprint... 73 $This->{Type} = 'TopologicalPharmacophoreAtomPairs'; 74 75 # Type of vector... 76 $This->{VectorType} = 'FingerprintsVector'; 77 78 # AtomPairsSetSizeToUse... 79 # 80 # ArbitrarySize - Corrresponds to atom pairs with non-zero count 81 # FixedSize - Corresponds to all atom pairs with zero and non-zero count 82 # 83 # Possible values: ArbitrarySize or FixedSize. Default: ArbitrarySize 84 # 85 $This->{AtomPairsSetSizeToUse} = ''; 86 87 # Type of FingerprintsVector... 88 # 89 # OrderedNumericalValues - For ArbitrarySize value of AtomPairsSetSizeToUse 90 # NumericalValues - For FixedSize value of AtomPairsSetSizeToUse 91 # 92 # Possible values: OrderedNumericalValues or NumericalValues. Default: NumericalValues 93 # 94 $This->{FingerprintsVectorType} = ''; 95 96 # Vector values precision for real values which might be generated after 97 # normalization and fuzzification... 98 $This->{ValuesPrecision} = 2; 99 100 # Minimum and maximum bond distance between pharmacophore atom paris... 101 $This->{MinDistance} = 1; 102 $This->{MaxDistance} = 10; 103 104 # Initialize atom types and weight information... 105 $This->_InitializePharmacophoreAtomTypesAndWeightInformation(); 106 107 # Normalization methodology to use for scaling the occurance count of pharmacophore atom 108 # pairs at various distances. 109 # 110 # Possible values: None, ByHeavyAtomsCount, ByAtomTypesCount. Default: None 111 # 112 $This->{NormalizationMethodology} = 'None'; 113 114 # Initialize fuzzification parameters... 115 # 116 $This->_InitializeFuzzificationInformation(); 117 118 # Pharmacophore types assigned to each heavy atom... 119 # 120 %{$This->{AssignedAtomTypes}} = (); 121 122 # Assigned Atom types count of each type in the molecule... 123 # 124 %{$This->{AssignedAtomTypesCount}} = (); 125 126 # All pharmacophore atom pairs between minimum and maximum distance... 127 # 128 @{$This->{AtomPairsIDs}} = (); 129 %{$This->{AtomPairsCount}} = (); 130 } 131 132 # Inialize pharmacophore atom types and weight information... 133 # 134 sub _InitializePharmacophoreAtomTypesAndWeightInformation { 135 my($This) = @_; 136 137 # Default pharmacophore atom types to use for atom pairs fingerprint generation 138 # are: HBD, HBA, PI, NI, H 139 # 140 @{$This->{AtomTypesToUse}} = (); 141 @{$This->{AtomTypesToUse}} = sort ('HBD', 'HBA', 'PI', 'NI', 'H'); 142 143 # Weight of the various pharmacophore atom types to use for their contribution to atom 144 # pair interaction. It allows to increase the importance of specific pharmacophore atom 145 # types in the generted fingerprints. 146 # 147 # A value of 0 eliminates the contribution by a particular pharmacophore atom 148 # type and 2 doubles its contribution. 149 # 150 my($AtomType, %AvailableAtomTypes); 151 152 %AvailableAtomTypes = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses(); 153 154 %{$This->{AtomTypesWeight}} = (); 155 for $AtomType (keys %AvailableAtomTypes) { 156 $This->{AtomTypesWeight}{$AtomType} = 1; 157 } 158 return $This; 159 } 160 161 # Initialize fuzzification information... 162 # 163 sub _InitializeFuzzificationInformation { 164 my($This) = @_; 165 166 # To fuzz or not to fuzz atom pairs count. Default: No fuzzication 167 # 168 $This->{FuzzifyAtomPairsCount} = 0; 169 170 # When to fuzz atom pair count... 171 # 172 # Possible values: BeforeNormalization or AfterNormalization. Default: AfterNormalization 173 # 174 $This->{FuzzificationMode} = 'AfterNormalization'; 175 176 # How to fuzz atom pair count... 177 # 178 # Possible values: FuzzyBinning or FuzzyBinSmoothing. Default: FuzzyBinning 179 # 180 $This->{FuzzificationMethodology} = 'FuzzyBinning'; 181 182 # By how much to fuzz atom pairs count... 183 # 184 $This->{FuzzFactor} = 0.15; 185 186 return $This; 187 } 188 189 # Initialize class ... 190 sub _InitializeClass { 191 #Class name... 192 $ClassName = __PACKAGE__; 193 } 194 195 # Initialize object properties.... 196 sub _InitializeTopologicalPharmacophoreAtomPairsFingerprintsProperties { 197 my($This, %NamesAndValues) = @_; 198 199 my($Name, $Value, $MethodName); 200 while (($Name, $Value) = each %NamesAndValues) { 201 $MethodName = "Set${Name}"; 202 $This->$MethodName($Value); 203 } 204 205 # Make sure molecule object was specified... 206 if (!exists $NamesAndValues{Molecule}) { 207 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule..."; 208 } 209 210 $This->_InitializeTopologicalPharmacophoreAtomPairsFingerprintsVector(); 211 212 return $This; 213 } 214 215 # Initialize fingerprints vector... 216 # 217 sub _InitializeTopologicalPharmacophoreAtomPairsFingerprintsVector { 218 my($This) = @_; 219 220 if (!$This->{AtomPairsSetSizeToUse}) { 221 $This->{AtomPairsSetSizeToUse} = 'ArbitrarySize'; 222 } 223 224 # Vector type and type of values... 225 $This->{VectorType} = 'FingerprintsVector'; 226 227 if ($This->{AtomPairsSetSizeToUse} =~ /^FixedSize$/i) { 228 $This->{FingerprintsVectorType} = 'OrderedNumericalValues'; 229 } 230 else { 231 $This->{FingerprintsVectorType} = 'NumericalValues'; 232 } 233 234 $This->_InitializeFingerprintsVector(); 235 } 236 237 # Set atom parits set size to use... 238 # 239 sub SetAtomPairsSetSizeToUse { 240 my($This, $Value) = @_; 241 242 if ($This->{AtomPairsSetSizeToUse}) { 243 croak "Error: ${ClassName}->SetAtomPairsSetSizeToUse: Can't change size: It's already set..."; 244 } 245 246 if ($Value !~ /^(ArbitrarySize|FixedSize)$/i) { 247 croak "Error: ${ClassName}->SetAtomPairsSetSizeToUse: Unknown AtomPairsSetSizeToUse value: $Value; Supported values: ArbitrarySize or FixedSize"; 248 } 249 250 $This->{AtomPairsSetSizeToUse} = $Value; 251 252 return $This; 253 } 254 255 # Disable change of AvailableAtomTypes... 256 # 257 sub SetAvailableAtomTypes { 258 my($This) = @_; 259 260 carp "Warning: ${ClassName}->SetAvailableAtomTypes: AvailableAtomTypes value can't be set..."; 261 262 return $This; 263 } 264 265 # Set atom types to use for atom pairs... 266 # 267 sub SetAtomTypesToUse { 268 my($This, @Values) = @_; 269 my($FirstValue, $TypeOfFirstValue, $AtomType, $SpecifiedAtomType, @SpecifiedAtomTypes, @AtomTypesToUse); 270 271 if (!@Values) { 272 carp "Warning: ${ClassName}->SetAtomTypesToUse: No values specified..."; 273 return; 274 } 275 276 $FirstValue = $Values[0]; 277 $TypeOfFirstValue = ref $FirstValue; 278 279 @SpecifiedAtomTypes = (); 280 @AtomTypesToUse = (); 281 282 if ($TypeOfFirstValue =~ /^ARRAY/) { 283 push @SpecifiedAtomTypes, @{$FirstValue}; 284 } 285 else { 286 push @SpecifiedAtomTypes, @Values; 287 } 288 289 # Make sure specified AtomTypes are valid... 290 for $SpecifiedAtomType (@SpecifiedAtomTypes) { 291 if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedAtomType)) { 292 croak "Error: ${ClassName}->SetAtomTypesToUse: Specified atom type, $SpecifiedAtomType, is not supported...\n "; 293 } 294 $AtomType = $SpecifiedAtomType; 295 push @AtomTypesToUse, $AtomType; 296 } 297 298 # Set atom types to use... 299 @{$This->{AtomTypesToUse}} = (); 300 push @{$This->{AtomTypesToUse}}, sort @AtomTypesToUse; 301 302 return $This; 303 } 304 305 # Set vector values precision for real values which might be generated after 306 # normalization and fuzzification... 307 # 308 sub SetValuesPrecision { 309 my($This, $Value) = @_; 310 311 if (!TextUtil::IsPositiveInteger($Value)) { 312 croak "Error: ${ClassName}->SetValuesPrecision: ValuesPrecision value, $Value, is not valid: It must be a positive integer..."; 313 } 314 $This->{ValuesPrecision} = $Value; 315 316 return $This; 317 } 318 319 # Set minimum distance for pharmacophore atom pairs... 320 # 321 sub SetMinDistance { 322 my($This, $Value) = @_; 323 324 if (!TextUtil::IsInteger($Value)) { 325 croak "Error: ${ClassName}->SetMinDistance: MinDistance value, $Value, is not valid: It must be an integer..."; 326 } 327 $This->{MinDistance} = $Value; 328 329 return $This; 330 } 331 332 # Set maximum distance for pharmacophore atom pairs... 333 # 334 sub SetMaxDistance { 335 my($This, $Value) = @_; 336 337 if (!TextUtil::IsPositiveInteger($Value)) { 338 croak "Error: ${ClassName}->SetMaxDistance: MaxDistance value, $Value, is not valid: It must be a positive integer..."; 339 } 340 $This->{MaxDistance} = $Value; 341 342 return $This; 343 } 344 345 # Set normalization methodology to use for scaling the occurance count of pharmacophore atom 346 # pairs over distance range beween minimum and maximum distance. 347 # 348 sub SetNormalizationMethodology { 349 my($This, $Value) = @_; 350 351 if ($Value !~ /^(ByHeavyAtomsCount|ByAtomTypesCount|None)$/i) { 352 croak "Error: ${ClassName}->SetNormalizationMethodology: NormalizationMethodology value, $Value, is not valid. Supported values: None, ByHeavyAtomsCount or ByAtomTypesCount..."; 353 } 354 355 $This->{NormalizationMethodology} = $Value; 356 357 return $This; 358 } 359 360 # Set weight of the various pharmacophore atom types to use for their contribution to atom 361 # pair interaction using atom types label and value hash. 362 # 363 # It allows to increase the importance of specific pharmacophore atom 364 # types in the generted fingerprints. 365 # 366 # A value of 0 eliminates the contribution by a particular pharmacophore atom 367 # type and 2 doubles its contribution. 368 # 369 sub SetAtomTypesWeight { 370 my($This, %AtomTypesWeight) = @_; 371 my($AtomType, $Weight); 372 373 while (($AtomType, $Weight) = each %AtomTypesWeight) { 374 if (!exists $This->{AtomTypesWeight}{$AtomType}) { 375 croak "Error: ${ClassName}->SetAtomTypesWeight: AtomTypeWeight for $AtomType couldn't be set: Unknown atom type..."; 376 } 377 if (!(TextUtil::IsFloat($Weight) && ($Weight >= 0))) { 378 croak "Error: ${ClassName}->SetAtomTypesWeight: Specified weight value, $Weight, for AtomType, $AtomType, muts be >= 0..."; 379 } 380 $This->{AtomTypesWeight}{$AtomType} = $Weight; 381 } 382 } 383 384 # Set fuzzification methodology to use for fuzzifying atom pairs count... 385 # 386 sub SetFuzzificationMethodology { 387 my($This, $Value) = @_; 388 389 if ($Value !~ /^(FuzzyBinning|FuzzyBinSmoothing)$/i) { 390 croak "Error: ${ClassName}->SetFuzzificationMethodology: FuzzificationMethodology value, $Value, is not valid. Supported values: FuzzyBinning or FuzzyBinSmoothing..."; 391 } 392 393 $This->{FuzzificationMethodology} = $Value; 394 395 return $This; 396 } 397 398 # Set fuzzification mode for fuzzifying atom pairs count... 399 # 400 sub SetFuzzificationMode { 401 my($This, $Value) = @_; 402 403 if ($Value !~ /^(BeforeNormalization|AfterNormalization)$/i) { 404 croak "Error: ${ClassName}->SetFuzzificationMode: FuzzificationMode value, $Value, is not valid. Supported values: BeforeNormalization or AfterNormalization..."; 405 } 406 407 $This->{FuzzificationMode} = $Value; 408 409 return $This; 410 } 411 412 # Set fuzz factor values used for fuzzifying atom pairs count... 413 # 414 sub SetFuzzFactor { 415 my($This, $Value) = @_; 416 417 if ($This->{FuzzificationMethodology} =~ /^FuzzyBinning$/i) { 418 if (!(TextUtil::IsFloat($Value) && $Value >=0 && $Value <= 1.0)) { 419 croak "Error: ${ClassName}->SetFuzzFactor: Specified fuzz factor value, $Value, must be >= 0 and <= 1..."; 420 } 421 } 422 elsif ($This->{FuzzificationMethodology} =~ /^FuzzyBinSmoothing$/i) { 423 if (!(TextUtil::IsFloat($Value) && $Value >=0 && $Value <= 0.5)) { 424 croak "Error: ${ClassName}->SetFuzzFactor: Specified fuzz factor value, $Value, must be >= 0 and <= 0.5..."; 425 } 426 } 427 else { 428 croak "Error: ${ClassName}->SetFuzzFactor: Fuzz factor value can't be changed: Uknown FuzzificationMethodology: $This->{FuzzificationMethodology}..."; 429 } 430 431 $This->{FuzzFactor} = $Value; 432 433 return $This; 434 } 435 436 # Generate fingerprints description... 437 # 438 sub GetDescription { 439 my($This) = @_; 440 441 # Is description explicity set? 442 if (exists $This->{Description}) { 443 return $This->{Description}; 444 } 445 446 # Generate fingerprints description... 447 448 return "$This->{Type}:$This->{AtomPairsSetSizeToUse}:MinDistance$This->{MinDistance}:MaxDistance$This->{MaxDistance}"; 449 } 450 451 # Generate topological pharmacophore atom pairs [ Ref 60-62, Ref 65, Ref 68 ] fingerprints... 452 # 453 # Methodology: 454 # . Generate a distance matrix. 455 # . Assign pharmacophore atom types to all the atoms. 456 # . Initialize pharmacophore atom pairs basis set for all unique pairs between 457 # minimum and maximum distance. 458 # . Using distance matrix and pharmacophore atom types, count occurance of 459 # unique atom pairs between specified distance range - It corresponds to the 460 # correlation-vector for the atom pairs. 461 # . Weigh contribution of each atom type to atom pair interaction by its specified 462 # weight during occurance count. 463 # . Assign count to appropriate distance bin for a specific atom pair 464 # 465 # . Normalize occurance count of pharmacophore atom pairs by heavy atom count 466 # or sum of AtomTypeCounts of each pharmacophore atom type in the atom pair 467 # at a specific distance. 468 # 469 # . Fuzzify occurance count of pharmacophore atom pairs using FuzzyBinning or 470 # FuzzySmothing methodology. 471 # 472 # Notes: 473 # . Hydrogen atoms are ignored during the fingerprint generation. 474 # 475 sub GenerateFingerprints { 476 my($This) = @_; 477 478 if ($This->{MinDistance} > $This->{MaxDistance}) { 479 croak "Error: ${ClassName}->GenerateTopologicalPharmacophoreAtomPairsFingerprints: No fingerpritns generated: MinDistance, $This->{MinDistance}, must be <= MaxDistance, $This->{MaxDistance}..."; 480 } 481 482 # Cache appropriate molecule data... 483 $This->_SetupMoleculeDataCache(); 484 485 # Generate distance matrix... 486 if (!$This->_SetupDistanceMatrix()) { 487 carp "Warning: ${ClassName}->GenerateFingerprints: Fingerprints generation didn't succeed: Couldn't generate distance matrix..."; 488 return $This; 489 } 490 491 # Assign pharmacohore atom types to all heavy atoms... 492 $This->_AssignPharmacophoreAtomTypes(); 493 494 # Initialize values of all possible pharmacohore atom pairs... 495 $This->_InitializePharmacophoreAtomPairs(); 496 497 # Count atom pairs... 498 $This->_CountPharmacohoreAtomPairs(); 499 500 # Fuzzify atom pairs count... 501 if ($This->{FuzzificationMode} =~ /^BeforeNormalization$/i) { 502 $This->_FuzzifyPharmacohoreAtomPairsCount(); 503 } 504 505 # Normalize atom pairs count... 506 $This->_NormalizePharmacohoreAtomPairsCount(); 507 508 # Fuzzify atom pairs count... 509 if ($This->{FuzzificationMode} =~ /^AfterNormalization$/i) { 510 $This->_FuzzifyPharmacohoreAtomPairsCount(); 511 } 512 513 # Set final fingerprints... 514 $This->_SetFinalFingerprints(); 515 516 # Clear cached molecule data... 517 $This->_ClearMoleculeDataCache(); 518 519 return $This; 520 } 521 522 # Setup distance matrix... 523 # 524 sub _SetupDistanceMatrix { 525 my($This) = @_; 526 527 $This->{DistanceMatrix} = $This->GetMolecule()->GetDistanceMatrix(); 528 529 if (!$This->{DistanceMatrix}) { 530 return undef; 531 } 532 533 return $This; 534 } 535 536 # Assign pharmacohore atom types to all heavy atoms and count each atom 537 # types assigned... 538 # 539 sub _AssignPharmacophoreAtomTypes { 540 my($This) = @_; 541 my($Atom, $AtomID, $AtomType, $AssignedAtomType, $FunctionalClassAtomTypes); 542 543 # Assign topological pharmacophore atom types... 544 $FunctionalClassAtomTypes = new AtomTypes::FunctionalClassAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => 1, 'FunctionalClassesToUse' => $This->{AtomTypesToUse}); 545 $FunctionalClassAtomTypes->AssignAtomTypes(); 546 547 %{$This->{AssignedAtomTypes}} = (); 548 549 # Initialize assigned atom types count... 550 %{$This->{AssignedAtomTypesCount}} = (); 551 for $AtomType (@{$This->{AtomTypesToUse}}) { 552 $This->{AssignedAtomTypesCount}{$AtomType} = 0; 553 } 554 555 $This->{HeavyAtomCount} = 0; 556 557 ATOM: for $Atom (@{$This->{Atoms}}) { 558 if ($Atom->IsHydrogen()) { 559 next ATOM; 560 } 561 $This->{HeavyAtomCount} += 1; 562 563 $AtomID = $Atom->GetID(); 564 565 # Collect all possible pharmacophore atom types which could be assigned to atom... 566 my(@AtomTypes); 567 568 @AtomTypes = (); 569 $AssignedAtomType = $FunctionalClassAtomTypes->GetAtomType($Atom); 570 if ($AssignedAtomType && $AssignedAtomType !~ /^None$/i) { 571 push @AtomTypes, split /\./, $AssignedAtomType; 572 for $AtomType (@AtomTypes) { 573 $This->{AssignedAtomTypesCount}{$AtomType} += 1; 574 } 575 } 576 577 # Assign phramacophore types to atom... 578 $AtomID = $Atom->GetID(); 579 $This->{AssignedAtomTypes}{$AtomID} = \@AtomTypes; 580 } 581 return $This; 582 } 583 584 # Initialize values of all possible pharmacohore atom pairs... 585 # 586 # Let: 587 # Dmin = Minimum distance correspoding to number of bonds between two atoms 588 # Dmax = Maximum distance correspoding to number of bonds between two atoms 589 # D = Distance correspoding to number of bonds between two atoms 590 # 591 # P = Number of pharmacophore atom types to consider 592 # PPDn = Number of possible unique pharmacophore atom pairs at a distance Dn 593 # 594 # PPT = Total number of possible pharmacophore atom pairs at all distances between Dmin and Dmax 595 # 596 # Then: 597 # 598 # PPD = (P * (P - 1))/2 + P 599 # 600 # PPT = ((Dmax - Dmin) + 1) * ((P * (P - 1))/2 + P) 601 # = ((Dmax - Dmin) + 1) * PPD 602 # 603 # 604 # So for default values of Dmin = 1, Dmax = 10 and P = 5, 605 # 606 # PPD = (5 * (5 - 1))/2 + 5 = 15 607 # PPT = ((10 - 1) + 1) * 15 = 150 608 # 609 # the pharmacophore atom pairs bais set includes 150 values. 610 # 611 sub _InitializePharmacophoreAtomPairs { 612 my($This) = @_; 613 my($Distance, $Index1, $Index2, $AtomType1, $AtomType2); 614 615 %{$This->{AtomPairsCount}} = (); 616 617 for $Distance ($This->{MinDistance} .. $This->{MaxDistance}) { 618 %{$This->{AtomPairsCount}{$Distance}} = (); 619 620 for $Index1 (0 .. $#{$This->{AtomTypesToUse}}) { 621 $AtomType1 = $This->{AtomTypesToUse}[$Index1]; 622 %{$This->{AtomPairsCount}{$Distance}{$AtomType1}} = (); 623 624 for $Index2 ($Index1 .. $#{$This->{AtomTypesToUse}}) { 625 $AtomType2 = $This->{AtomTypesToUse}[$Index2]; 626 $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} = 0; 627 } 628 } 629 } 630 return $This; 631 } 632 633 # Count pharmacophore atom pairs between mininum and maximum distance at each 634 # distance using distance matrix and pharmacophore atom types assiged to each heavy 635 # atom. 636 # 637 # Let: 638 # Px = Pharmacophore atom type x 639 # Py = Pharmacophore atom type y 640 # Dn = Distance between Px and Py in specified distance range 641 # 642 # Then: 643 # Px-Dn-Py = Pharmacophore atom pair ID for atom types Px and Py at distance Dn 644 # 645 # For example: H-D1-H, H-D2-HBA, PI-D5-PI and so on 646 # 647 # Notes: 648 # . The row and column indices of distance matrix correspond to atom indices. 649 # . Distance value of BigNumber implies the atom is not connected to any other atom. 650 # . Due to symmetric nature of distance matrix, only upper or lower triangular matrix 651 # needs to be processed during identification and count of pharmacophore atom pairs. 652 # 653 sub _CountPharmacohoreAtomPairs { 654 my($This) = @_; 655 my($NumOfRows, $NumOfCols, $RowIndex, $ColIndex, $DistanceMatrix, $Distance, $AtomID1, $AtomID2, $AtomType1, $AtomType2, $SkipIndexCheck, $CountIncrement); 656 657 $DistanceMatrix = $This->{DistanceMatrix}; 658 ($NumOfRows, $NumOfCols) = $DistanceMatrix->GetSize(); 659 $SkipIndexCheck = 0; 660 661 ROWINDEX: for $RowIndex (0 .. ($NumOfRows - 1) ) { 662 $AtomID1 = $This->{AtomIndexToID}{$RowIndex}; 663 if ( !((exists($This->{AssignedAtomTypes}{$AtomID1}) && @{$This->{AssignedAtomTypes}{$AtomID1}})) ) { 664 next ROWINDEX; 665 } 666 667 COLINDEX: for $ColIndex ($RowIndex .. ($NumOfCols - 1) ) { 668 $AtomID2 = $This->{AtomIndexToID}{$ColIndex}; 669 if ( !((exists($This->{AssignedAtomTypes}{$AtomID2}) && @{$This->{AssignedAtomTypes}{$AtomID2}})) ) { 670 next COLINDEX; 671 } 672 673 $Distance = $DistanceMatrix->GetValue($RowIndex, $ColIndex, $SkipIndexCheck); 674 if ($Distance < $This->{MinDistance} || $Distance > $This->{MaxDistance}) { 675 next COLINDEX; 676 } 677 678 ATOMTYPE1: for $AtomType1 (@{$This->{AssignedAtomTypes}{$AtomID1}}) { 679 if ($This->{AtomTypesWeight}{$AtomType1} == 0) { 680 next ATOMTYPE1; 681 } 682 ATOMTYPE2: for $AtomType2 (@{$This->{AssignedAtomTypes}{$AtomID2}}) { 683 if ($This->{AtomTypesWeight}{$AtomType2} == 0) { 684 next ATOMTYPE2; 685 } 686 $CountIncrement = $This->{AtomTypesWeight}{$AtomType1} * $This->{AtomTypesWeight}{$AtomType2}; 687 if ($AtomType1 le $AtomType2) { 688 $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} += $CountIncrement; 689 } 690 else { 691 $This->{AtomPairsCount}{$Distance}{$AtomType2}{$AtomType1} += $CountIncrement; 692 } 693 } 694 } 695 } 696 } 697 return $This; 698 } 699 700 # Normalize the occurance count of pharmacophore atom pairs over the specified distance 701 # range... 702 # 703 sub _NormalizePharmacohoreAtomPairsCount { 704 my($This) = @_; 705 706 METHODOLOGY: { 707 if ($This->{NormalizationMethodology} =~ /^None$/i) { 708 last METHODOLOGY; 709 } 710 if ($This->{NormalizationMethodology} =~ /^ByHeavyAtomsCount$/i) { 711 $This->_NormalizeAtomPairsCountByHeavyAtomsCount(); 712 last METHODOLOGY; 713 } 714 if ($This->{NormalizationMethodology} =~ /^ByAtomTypesCount$/i) { 715 $This->_NormalizeAtomPairsCountByAtomTypesCount(); 716 last METHODOLOGY; 717 } 718 croak "Error: ${ClassName}->_NormalizePharmacohoreAtomPairsCount: Unknown NormalizationMethodology: $This->{NormalizationMethodology}..."; 719 } 720 return $This; 721 } 722 723 724 # Normalize the occurance count of pharmacophore atom pairs at various distances by 725 # heavy atom count... 726 # 727 sub _NormalizeAtomPairsCountByHeavyAtomsCount { 728 my($This) = @_; 729 my($Distance, $AtomType1, $AtomType2); 730 731 if ($This->{HeavyAtomCount} == 0) { 732 return $This; 733 } 734 735 for $Distance (keys %{$This->{AtomPairsCount}} ) { 736 for $AtomType1 (keys %{$This->{AtomPairsCount}{$Distance}} ) { 737 ATOMTYPE2: for $AtomType2 (keys %{$This->{AtomPairsCount}{$Distance}{$AtomType1}} ) { 738 if ($This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} == 0) { 739 next ATOMTYPE2; 740 } 741 $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} /= $This->{HeavyAtomCount}; 742 } 743 } 744 } 745 return $This; 746 } 747 748 # Normalize the occurance count of pharmacophore atom pairs at various distances by 749 # dividing it using sum of the count of each pharmacophore atom type present in the 750 # molecule for the corresponding atom pair. 751 # 752 sub _NormalizeAtomPairsCountByAtomTypesCount { 753 my($This) = @_; 754 my($Distance, $AtomType1, $AtomType2, $AtomType1Count, $AtomType2Count, $NormalizationFactor); 755 756 for $Distance (keys %{$This->{AtomPairsCount}} ) { 757 for $AtomType1 (keys %{$This->{AtomPairsCount}{$Distance}} ) { 758 ATOMTYPE2: for $AtomType2 (keys %{$This->{AtomPairsCount}{$Distance}{$AtomType1}} ) { 759 if ($This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} == 0) { 760 next ATOMTYPE2; 761 } 762 $NormalizationFactor = $This->{AssignedAtomTypesCount}{$AtomType1} + $This->{AssignedAtomTypesCount}{$AtomType2}; 763 $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} /= $NormalizationFactor; 764 } 765 } 766 } 767 return $This; 768 } 769 770 # Fuzzify pharmacophore atom pairs count... 771 # 772 # Let: 773 # Px = Pharmacophore atom type x 774 # Py = Pharmacophore atom type y 775 # 776 # PPxy = Pharmacophore atom pair between atom type Px and Py 777 # 778 # PPxyDn = Pharmacophore atom pairs count between atom type Px and Py at distance Dn 779 # PPxyDn-1 = Pharmacophore atom pairs count between atom type Px and Py at distance Dn - 1 780 # PPxyDn+1 = Pharmacophore atom pairs count between atom type Px and Py at distance Dn + 1 781 # 782 # FF = FuzzFactor for FuzzyBinning and FuzzyBinSmoothing 783 # 784 # Then: 785 # 786 # For FuzzyBinning: 787 # 788 # PPxyDn = PPxyDn (Unchanged) 789 # 790 # PPxyDn-1 = PPxyDn-1 + PPxyDn * FF 791 # PPxyDn+1 = PPxyDn+1 + PPxyDn * FF 792 # 793 # For FuzzyBinSmoothing: 794 # 795 # PPxyDn = PPxyDn - PPxyDn * 2FF for Dmin < Dn < Dmax 796 # PPxyDn = PPxyDn - PPxyDn * FF for Dn = Dmin or Dmax 797 # 798 # PPxyDn-1 = PPxyDn-1 + PPxyDn * FF 799 # PPxyDn+1 = PPxyDn+1 + PPxyDn * FF 800 # 801 # In both fuzzification schemes, a value of 0 for FF implies no fuzzification of occurance counts. 802 # A value of 1 during FuzzyBinning corresponds to maximum fuzzification of occurance counts; 803 # however, a value of 1 during FuzzyBinSmoothing ends up completely distributing the value over 804 # the previous and next distance bins. 805 # 806 # So for default value of FuzzFactor (FF) 0.15, the occurance count of pharmacohore atom pairs 807 # at distance Dn during FuzzyBinning is left unchanged and the counts at distances Dn -1 and Dn + 1 808 # are incremened by PPxyDn * 0.15. 809 # 810 # And during FuzzyBinSmoothing the occurance counts at Distance Dn is scaled back using multiplicate 811 # factor of (1 - 2*0.15) and the occurance counts at distances Dn -1 and Dn + 1 are incremened by 812 # PPxyDn * 0.15. In otherwords, occurance bin count is smoothed out by distributing it over the 813 # previous and next distance value. 814 # 815 sub _FuzzifyPharmacohoreAtomPairsCount { 816 my($This) = @_; 817 my($Index1, $Index2, $AtomType1, $AtomType2, $CurrentDistance, $CurrentCount, $NextDistance, $NextCount, $PreviousDistance, $ModifyCurrentCount, $ChangeInCountValue); 818 819 if (!($This->{FuzzifyAtomPairsCount} && $This->{FuzzFactor} > 0)) { 820 return $This; 821 } 822 823 $ModifyCurrentCount = ($This->{FuzzificationMethodology} =~ /^FuzzyBinSmoothing$/i) ? 1 : 0; 824 825 for $Index1 (0 .. $#{$This->{AtomTypesToUse}}) { 826 $AtomType1 = $This->{AtomTypesToUse}[$Index1]; 827 for $Index2 ($Index1 .. $#{$This->{AtomTypesToUse}}) { 828 $AtomType2 = $This->{AtomTypesToUse}[$Index2]; 829 830 $CurrentCount = 0; $NextCount = 0; 831 832 $NextDistance = $This->{MinDistance}; 833 $NextCount = $This->{AtomPairsCount}{$NextDistance}{$AtomType1}{$AtomType2}; 834 835 DISTANCE: for $CurrentDistance ($This->{MinDistance} .. $This->{MaxDistance}) { 836 $NextDistance = $CurrentDistance + 1; 837 $PreviousDistance = $CurrentDistance - 1; 838 839 $CurrentCount = $NextCount; 840 $NextCount = ($CurrentDistance < $This->{MaxDistance}) ? $This->{AtomPairsCount}{$NextDistance}{$AtomType1}{$AtomType2} : 0; 841 842 if ($CurrentCount == 0) { 843 # No contribution to fuzzy binning from this distance... 844 next DISTANCE; 845 } 846 847 $ChangeInCountValue = $CurrentCount * $This->{FuzzFactor}; 848 849 if ($CurrentDistance > $This->{MinDistance}) { 850 # Increment count at previous distance... 851 $This->{AtomPairsCount}{$PreviousDistance}{$AtomType1}{$AtomType2} += $ChangeInCountValue; 852 } 853 854 if ($ModifyCurrentCount) { 855 # Decrement count at current distance for FuzzyBinSmoothing... 856 if ($CurrentDistance > $This->{MinDistance} && $CurrentDistance < $This->{MaxDistance}) { 857 $This->{AtomPairsCount}{$CurrentDistance}{$AtomType1}{$AtomType2} -= 2 * $ChangeInCountValue; 858 } 859 else { 860 $This->{AtomPairsCount}{$CurrentDistance}{$AtomType1}{$AtomType2} -= $ChangeInCountValue; 861 } 862 } 863 864 if ($CurrentDistance < $This->{MaxDistance}) { 865 # Increment count at next distance... 866 $This->{AtomPairsCount}{$NextDistance}{$AtomType1}{$AtomType2} += $ChangeInCountValue; 867 } 868 } 869 } 870 } 871 return $This; 872 } 873 874 # Set final fingerpritns vector... 875 # 876 sub _SetFinalFingerprints { 877 my($This) = @_; 878 my($Distance, $Index1, $Index2, $AtomType1, $AtomType2, $Value, $RoundOffValues, $ValuesPrecision, $UseArbitrarySetSize, @Values); 879 880 # Mark successful generation of fingerprints... 881 $This->{FingerprintsGenerated} = 1; 882 883 @Values = (); 884 @{$This->{AtomPairsIDs}} = (); 885 886 # Do values need to be rounded off? 887 $RoundOffValues = (($This->{NormalizationMethodology} !~ /^None$/i) || ($This->{FuzzifyAtomPairsCount})) ? 1 : 0; 888 $ValuesPrecision = $This->{ValuesPrecision}; 889 890 # Is it an ArbitraySize atom pairs set size? 891 $UseArbitrarySetSize = $This->{AtomPairsSetSizeToUse} =~ /^ArbitrarySize$/i ? 1 : 0; 892 893 # Collect all atom paris count values... 894 for $Distance ($This->{MinDistance} .. $This->{MaxDistance}) { 895 for $Index1 (0 .. $#{$This->{AtomTypesToUse}}) { 896 $AtomType1 = $This->{AtomTypesToUse}[$Index1]; 897 INDEX2: for $Index2 ($Index1 .. $#{$This->{AtomTypesToUse}}) { 898 $AtomType2 = $This->{AtomTypesToUse}[$Index2]; 899 900 # Atom pair count... 901 $Value = $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2}; 902 if ($RoundOffValues) { 903 $Value = MathUtil::round($Value, $This->{ValuesPrecision}) + 0; 904 } 905 906 # Ignore or not to ignore... 907 if ($UseArbitrarySetSize && $Value == 0) { 908 next INDEX2; 909 } 910 911 push @{$This->{AtomPairsIDs}}, "${AtomType1}-D${Distance}-${AtomType2}"; 912 push @Values, $Value; 913 } 914 } 915 } 916 917 # Add AtomPairsIDs and count values to fingerprint vector... 918 $This->{FingerprintsVector}->AddValueIDs(\@{$This->{AtomPairsIDs}}); 919 $This->{FingerprintsVector}->AddValues(\@Values); 920 921 return $This; 922 } 923 924 # Get pharmacophore atom pair IDs corresponding to atom pairs count values in 925 # fingerprint vector as an array or reference to an array... 926 # 927 # AtomPairIDs list is generated during finalization of fingerprints and the fingerprint 928 # vector containing count values matches the atom pairs array. 929 # 930 # 931 sub GetAtomPairIDs { 932 my($This) = @_; 933 934 return wantarray ? @{$This->{AtomPairsIDs}} : \@{$This->{AtomPairsIDs}}; 935 } 936 937 # Cache appropriate molecule data... 938 # 939 sub _SetupMoleculeDataCache { 940 my($This) = @_; 941 942 # Get all atoms including hydrogens to correctly map atom indices to atom IDs for 943 # usage of distance matrix. The hydrogen atoms are ignored during processing... 944 # 945 @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms(); 946 947 # Get all atom IDs... 948 my(@AtomIDs); 949 @AtomIDs = (); 950 @AtomIDs = map { $_->GetID() } @{$This->{Atoms}}; 951 952 # Set AtomIndex to AtomID hash... 953 %{$This->{AtomIndexToID}} = (); 954 @{$This->{AtomIndexToID}}{ (0 .. $#AtomIDs) } = @AtomIDs; 955 956 return $This; 957 } 958 959 # Clear cached molecule data... 960 # 961 sub _ClearMoleculeDataCache { 962 my($This) = @_; 963 964 @{$This->{Atoms}} = (); 965 966 return $This; 967 } 968 969 970 # Return a string containg data for TopologicalPharmacophoreAtomPairsFingerprints object... 971 sub StringifyTopologicalPharmacophoreAtomPairsFingerprints { 972 my($This) = @_; 973 my($FingerprintsString); 974 975 # Type of fingerprint... 976 $FingerprintsString = "Fingerprint type: $This->{Type}; AtomPairsSetSizeToUse: $This->{AtomPairsSetSizeToUse}"; 977 978 # Min and max distance... 979 $FingerprintsString .= "; MinDistance: $This->{MinDistance}; MaxDistance: $This->{MaxDistance}"; 980 981 # Pharmacophore type labels and description... 982 my($AtomType, @AtomTypes, @AtomTypesOrder, %AvailableAtomTypes); 983 984 @AtomTypesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder(); 985 %AvailableAtomTypes = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses(); 986 987 @AtomTypes = (); 988 for $AtomType (@AtomTypesOrder) { 989 push @AtomTypes, "$AtomType: $AvailableAtomTypes{$AtomType}"; 990 } 991 992 $FingerprintsString .= "; AtomTypesToUse: <" . TextUtil::JoinWords(\@{$This->{AtomTypesToUse}}, ", ", 0) . ">"; 993 $FingerprintsString .= "; AtomTypesOrder: <" . TextUtil::JoinWords(\@AtomTypesOrder, ", ", 0) . ">"; 994 $FingerprintsString .= "; AvailableAtomTypes: <" . TextUtil::JoinWords(\@AtomTypes, ", ", 0) . ">"; 995 996 # Normalization method... 997 $FingerprintsString .= "; NormalizationMethodology: $This->{NormalizationMethodology}"; 998 999 # Weights... 1000 my($FirstLabel, $Label, $Weight); 1001 1002 $FingerprintsString .= "; AtomTypesWeight <Labels: Weight>: <"; 1003 $FirstLabel = 1; 1004 for $Label (sort @{$This->{AtomTypesToUse}}) { 1005 $Weight = $This->{AtomTypesWeight}{$Label}; 1006 if ($FirstLabel) { 1007 $FirstLabel = 0; 1008 $FingerprintsString .= " ${Label}: ${Weight}"; 1009 } 1010 else { 1011 $FingerprintsString .= "; ${Label}: ${Weight}"; 1012 } 1013 } 1014 $FingerprintsString .= ">"; 1015 1016 # Fuzzification of count... 1017 my($FuzzifyFlag); 1018 $FuzzifyFlag = $This->{FuzzifyAtomPairsCount} ? "Yes" : "No"; 1019 $FingerprintsString .= "; FuzzifyAtomPairsCount: $FuzzifyFlag; FuzzificationMode: $This->{FuzzificationMode}; FuzzificationMethodology: $This->{FuzzificationMethodology}; FuzzFactor: $This->{FuzzFactor}"; 1020 1021 # Total number of pharmacophore atom pairs... 1022 $FingerprintsString .= "; NumOfAtomPairs: " . $This->{FingerprintsVector}->GetNumOfValues(); 1023 1024 # FingerprintsVector... 1025 $FingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >"; 1026 1027 return $FingerprintsString; 1028 } 1029