1 package Fingerprints::TopologicalAtomPairsFingerprints; 2 # 3 # $RCSfile: TopologicalAtomPairsFingerprints.pm,v $ 4 # $Date: 2015/02/28 20:48:54 $ 5 # $Revision: 1.30 $ 6 # 7 # Author: Manish Sud <msud@san.rr.com> 8 # 9 # Copyright (C) 2015 Manish Sud. All rights reserved. 10 # 11 # This file is part of MayaChemTools. 12 # 13 # MayaChemTools is free software; you can redistribute it and/or modify it under 14 # the terms of the GNU Lesser General Public License as published by the Free 15 # Software Foundation; either version 3 of the License, or (at your option) any 16 # later version. 17 # 18 # MayaChemTools is distributed in the hope that it will be useful, but without 19 # any warranty; without even the implied warranty of merchantability of fitness 20 # for a particular purpose. See the GNU Lesser General Public License for more 21 # details. 22 # 23 # You should have received a copy of the GNU Lesser General Public License 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 26 # Boston, MA, 02111-1307, USA. 27 # 28 29 use strict; 30 use Carp; 31 use Exporter; 32 use Fingerprints::Fingerprints; 33 use TextUtil (); 34 use Molecule; 35 use AtomTypes::AtomicInvariantsAtomTypes; 36 use AtomTypes::DREIDINGAtomTypes; 37 use AtomTypes::EStateAtomTypes; 38 use AtomTypes::FunctionalClassAtomTypes; 39 use AtomTypes::MMFF94AtomTypes; 40 use AtomTypes::SLogPAtomTypes; 41 use AtomTypes::SYBYLAtomTypes; 42 use AtomTypes::TPSAAtomTypes; 43 use AtomTypes::UFFAtomTypes; 44 45 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 46 47 @ISA = qw(Fingerprints::Fingerprints Exporter); 48 @EXPORT = qw(); 49 @EXPORT_OK = qw(); 50 51 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); 52 53 # Setup class variables... 54 my($ClassName); 55 _InitializeClass(); 56 57 # Overload Perl functions... 58 use overload '""' => 'StringifyTopologicalAtomPairsFingerprints'; 59 60 # Class constructor... 61 sub new { 62 my($Class, %NamesAndValues) = @_; 63 64 # Initialize object... 65 my $This = $Class->SUPER::new(); 66 bless $This, ref($Class) || $Class; 67 $This->_InitializeTopologicalAtomPairsFingerprints(); 68 69 $This->_InitializeTopologicalAtomPairsFingerprintsProperties(%NamesAndValues); 70 71 return $This; 72 } 73 74 # Initialize object data... 75 # 76 sub _InitializeTopologicalAtomPairsFingerprints { 77 my($This) = @_; 78 79 # Type of fingerprint... 80 $This->{Type} = 'TopologicalAtomPairs'; 81 82 # Type of vector... 83 $This->{VectorType} = 'FingerprintsVector'; 84 85 # Type of FingerprintsVector... 86 $This->{FingerprintsVectorType} = 'NumericalValues'; 87 88 # Minimum and maximum bond distance between atom paris... 89 $This->{MinDistance} = 1; 90 $This->{MaxDistance} = 10; 91 92 # Atom identifier type to use for atom IDs in atom pairs... 93 # 94 # Currently supported values are: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, 95 # EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, 96 # SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes 97 # 98 $This->{AtomIdentifierType} = ''; 99 100 # Atom types assigned to each heavy atom... 101 # 102 %{$This->{AssignedAtomTypes}} = (); 103 104 # All atom pairs between minimum and maximum distance... 105 # 106 @{$This->{AtomPairsIDs}} = (); 107 %{$This->{AtomPairsCount}} = (); 108 } 109 110 # Initialize class ... 111 sub _InitializeClass { 112 #Class name... 113 $ClassName = __PACKAGE__; 114 } 115 116 # Initialize object properties.... 117 sub _InitializeTopologicalAtomPairsFingerprintsProperties { 118 my($This, %NamesAndValues) = @_; 119 120 my($Name, $Value, $MethodName); 121 while (($Name, $Value) = each %NamesAndValues) { 122 $MethodName = "Set${Name}"; 123 $This->$MethodName($Value); 124 } 125 126 # Make sure molecule object was specified... 127 if (!exists $NamesAndValues{Molecule}) { 128 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule..."; 129 } 130 if (!exists $NamesAndValues{AtomIdentifierType}) { 131 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying AtomIdentifierType..."; 132 } 133 134 $This->_InitializeFingerprintsVector(); 135 136 return $This; 137 } 138 139 # Set minimum distance for atom pairs... 140 # 141 sub SetMinDistance { 142 my($This, $Value) = @_; 143 144 if (!TextUtil::IsPositiveInteger($Value)) { 145 croak "Error: ${ClassName}->SetMinDistance: MinDistance value, $Value, is not valid: It must be a positive integer..."; 146 } 147 $This->{MinDistance} = $Value; 148 149 return $This; 150 } 151 152 # Set maximum distance for atom pairs... 153 # 154 sub SetMaxDistance { 155 my($This, $Value) = @_; 156 157 if (!TextUtil::IsPositiveInteger($Value)) { 158 croak "Error: ${ClassName}->SetMaxDistance: MaxDistance value, $Value, is not valid: It must be a positive integer..."; 159 } 160 $This->{MaxDistance} = $Value; 161 162 return $This; 163 } 164 165 # Set atom identifier type.. 166 # 167 sub SetAtomIdentifierType { 168 my($This, $IdentifierType) = @_; 169 170 if ($IdentifierType !~ /^(AtomicInvariantsAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|FunctionalClassAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { 171 croak "Error: ${ClassName}->SetAtomIdentifierType: Specified value, $IdentifierType, for AtomIdentifierType is not vaild. Supported types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, and UFFAtomTypes."; 172 } 173 174 if ($This->{AtomIdentifierType}) { 175 croak "Error: ${ClassName}->SeAtomIdentifierType: Can't change intial atom identifier type: It's already set..."; 176 } 177 178 $This->{AtomIdentifierType} = $IdentifierType; 179 180 # Initialize atom identifier type information... 181 $This->_InitializeAtomIdentifierTypeInformation(); 182 183 return $This; 184 } 185 186 # Generate fingerprints description... 187 # 188 sub GetDescription { 189 my($This) = @_; 190 191 # Is description explicity set? 192 if (exists $This->{Description}) { 193 return $This->{Description}; 194 } 195 196 # Generate fingerprints description... 197 198 return "$This->{Type}:$This->{AtomIdentifierType}:MinDistance$This->{MinDistance}:MaxDistance$This->{MaxDistance}"; 199 } 200 201 # Generate topological atom pairs [ Ref 57, Ref 59, Ref 72 ] fingerprints... 202 # 203 # Methodology: 204 # . Generate a distance matrix. 205 # . Assign atom types to all the atoms. 206 # . Using distance matrix and atom types, count occurrence of 207 # unique atom pairs within specified distance range - It corresponds to the 208 # correlation-vector for the atom pairs. 209 # 210 # Notes: 211 # . Hydrogen atoms are ignored during the fingerprint generation. 212 # 213 sub GenerateFingerprints { 214 my($This) = @_; 215 216 if ($This->{MinDistance} > $This->{MaxDistance}) { 217 croak "Error: ${ClassName}->GenerateTopologicalAtomPairsFingerprints: No fingerpritns generated: MinDistance, $This->{MinDistance}, must be <= MaxDistance, $This->{MaxDistance}..."; 218 } 219 220 # Cache appropriate molecule data... 221 $This->_SetupMoleculeDataCache(); 222 223 # Generate distance matrix... 224 if (!$This->_SetupDistanceMatrix()) { 225 carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't generate distance matrix..."; 226 return $This; 227 } 228 229 # Assign atom types to all heavy atoms... 230 if (!$This->_AssignAtomTypes()) { 231 carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't assign valid $This->{AtomIdentifierType} to all atoms..."; 232 return $This; 233 } 234 235 # Intialize values of toplogical atom pairs... 236 $This->_InitializeToplogicalAtomPairs(); 237 238 # Count atom pairs... 239 $This->_GenerateAndCountAtomPairs(); 240 241 # Set final fingerprints... 242 $This->_SetFinalFingerprints(); 243 244 # Clear cached molecule data... 245 $This->_ClearMoleculeDataCache(); 246 247 return $This; 248 } 249 250 # Setup distance matrix... 251 # 252 sub _SetupDistanceMatrix { 253 my($This) = @_; 254 255 $This->{DistanceMatrix} = $This->GetMolecule()->GetDistanceMatrix(); 256 257 if (!$This->{DistanceMatrix}) { 258 return undef; 259 } 260 261 return $This; 262 } 263 264 # Assign appropriate atom types to all heavy atoms... 265 # 266 sub _AssignAtomTypes { 267 my($This) = @_; 268 my($SpecifiedAtomTypes, $Atom, $AtomID, $IgnoreHydrogens); 269 270 %{$This->{AssignedAtomTypes}} = (); 271 $IgnoreHydrogens = 1; 272 273 $SpecifiedAtomTypes = undef; 274 275 IDENTIFIERTYPE: { 276 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { 277 $SpecifiedAtomTypes = new AtomTypes::AtomicInvariantsAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'AtomicInvariantsToUse' => $This->{AtomicInvariantsToUse}); 278 last IDENTIFIERTYPE; 279 } 280 281 if ($This->{AtomIdentifierType} =~ /^DREIDINGAtomTypes$/i) { 282 $SpecifiedAtomTypes = new AtomTypes::DREIDINGAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 283 last IDENTIFIERTYPE; 284 } 285 286 if ($This->{AtomIdentifierType} =~ /^EStateAtomTypes$/i) { 287 $SpecifiedAtomTypes = new AtomTypes::EStateAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 288 last IDENTIFIERTYPE; 289 } 290 291 if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { 292 $SpecifiedAtomTypes = new AtomTypes::FunctionalClassAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'FunctionalClassesToUse' => $This->{FunctionalClassesToUse}); 293 last IDENTIFIERTYPE; 294 } 295 296 if ($This->{AtomIdentifierType} =~ /^MMFF94AtomTypes$/i) { 297 $SpecifiedAtomTypes = new AtomTypes::MMFF94AtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 298 last IDENTIFIERTYPE; 299 } 300 301 if ($This->{AtomIdentifierType} =~ /^SLogPAtomTypes$/i) { 302 $SpecifiedAtomTypes = new AtomTypes::SLogPAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 303 last IDENTIFIERTYPE; 304 } 305 if ($This->{AtomIdentifierType} =~ /^SYBYLAtomTypes$/i) { 306 $SpecifiedAtomTypes = new AtomTypes::SYBYLAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 307 last IDENTIFIERTYPE; 308 } 309 310 if ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) { 311 $SpecifiedAtomTypes = new AtomTypes::TPSAAtomTypes('Molecule' => $This->{Molecule}, 'IgnorePhosphorus' => 0, 'IgnoreSulfur' => 0); 312 last IDENTIFIERTYPE; 313 } 314 315 if ($This->{AtomIdentifierType} =~ /^UFFAtomTypes$/i) { 316 $SpecifiedAtomTypes = new AtomTypes::UFFAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 317 last IDENTIFIERTYPE; 318 } 319 320 croak "Error: ${ClassName}->_AssignAtomTypes: Unknown atom indentifier type $This->{AtomIdentifierType}..."; 321 } 322 323 # Assign atom types... 324 $SpecifiedAtomTypes->AssignAtomTypes(); 325 326 # Make sure atom types assignment is successful... 327 if (!$SpecifiedAtomTypes->IsAtomTypesAssignmentSuccessful()) { 328 return undef; 329 } 330 331 # Collect assigned atom types... 332 ATOM: for $Atom (@{$This->{Atoms}}) { 333 if ($Atom->IsHydrogen()) { 334 next ATOM; 335 } 336 $AtomID = $Atom->GetID(); 337 $This->{AssignedAtomTypes}{$AtomID} = $SpecifiedAtomTypes->GetAtomType($Atom); 338 } 339 340 return $This; 341 } 342 343 # Initialize topological atom pairs between specified distance range... 344 # 345 sub _InitializeToplogicalAtomPairs { 346 my($This) = @_; 347 my($Distance); 348 349 @{$This->{AtomPairsIDs}} = (); 350 %{$This->{AtomPairsCount}} = (); 351 352 for $Distance ($This->{MinDistance} .. $This->{MaxDistance}) { 353 %{$This->{AtomPairsCount}{$Distance}} = (); 354 } 355 356 return $This; 357 } 358 359 # Count atom pairs between mininum and maximum distance at each 360 # distance using distance matrix and atom types assiged to each heavy 361 # atom. 362 # 363 # Notes: 364 # . The row and column indices of distance matrix correspond to atom indices. 365 # . Distance value of BigNumber implies the atom is not connected to any other atom. 366 # . Due to symmetric nature of distance matrix, only upper or lower triangular matrix 367 # needs to be processed during identification and count of atom pairs. 368 # 369 sub _GenerateAndCountAtomPairs { 370 my($This) = @_; 371 372 my($NumOfRows, $NumOfCols, $RowIndex, $ColIndex, $DistanceMatrix, $Distance, $AtomID1, $AtomID2, $AtomType1, $AtomType2, $SkipIndexCheck, $CountIncrement); 373 374 $DistanceMatrix = $This->{DistanceMatrix}; 375 ($NumOfRows, $NumOfCols) = $DistanceMatrix->GetSize(); 376 $SkipIndexCheck = 0; 377 378 ROWINDEX: for $RowIndex (0 .. ($NumOfRows - 1) ) { 379 $AtomID1 = $This->{AtomIndexToID}{$RowIndex}; 380 if ( !(exists($This->{AssignedAtomTypes}{$AtomID1})) ) { 381 next ROWINDEX; 382 } 383 $AtomType1 = $This->{AssignedAtomTypes}{$AtomID1}; 384 385 COLINDEX: for $ColIndex (($RowIndex + 1) .. ($NumOfCols - 1) ) { 386 $AtomID2 = $This->{AtomIndexToID}{$ColIndex}; 387 if ( !(exists($This->{AssignedAtomTypes}{$AtomID2})) ) { 388 next COLINDEX; 389 } 390 $Distance = $DistanceMatrix->GetValue($RowIndex, $ColIndex, $SkipIndexCheck); 391 if ($Distance < $This->{MinDistance} || $Distance > $This->{MaxDistance}) { 392 next COLINDEX; 393 } 394 $AtomType2 = $This->{AssignedAtomTypes}{$AtomID2}; 395 396 if ($AtomType1 le $AtomType2) { 397 $This->_SetAtomPairsCount($Distance, $AtomType1, $AtomType2); 398 } 399 else { 400 $This->_SetAtomPairsCount($Distance, $AtomType2, $AtomType1); 401 } 402 } 403 } 404 return $This; 405 } 406 407 # Set atom paris count for a specific atom ID pair at a specific distance... 408 # 409 sub _SetAtomPairsCount { 410 my($This, $Distance, $AtomType1, $AtomType2) = @_; 411 412 if (! exists $This->{AtomPairsCount}{$Distance}{$AtomType1}) { 413 %{$This->{AtomPairsCount}{$Distance}{$AtomType1}} = (); 414 $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} = 1; 415 return $This; 416 } 417 418 if (exists $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2}) { 419 $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} += 1; 420 } 421 else { 422 $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} = 1; 423 } 424 425 return $This; 426 } 427 428 # Set final fingerpritns vector... 429 # 430 sub _SetFinalFingerprints { 431 my($This) = @_; 432 my($Distance, $AtomType1, $AtomType2, $Value, @Values); 433 434 # Mark successful generation of fingerprints... 435 $This->{FingerprintsGenerated} = 1; 436 437 @Values = (); 438 @{$This->{AtomPairsIDs}} = (); 439 440 for $Distance ($This->{MinDistance} .. $This->{MaxDistance}) { 441 for $AtomType1 (sort keys %{$This->{AtomPairsCount}{$Distance}} ) { 442 for $AtomType2 (sort keys %{$This->{AtomPairsCount}{$Distance}{$AtomType1}} ) { 443 push @{$This->{AtomPairsIDs}}, "${AtomType1}-D${Distance}-${AtomType2}"; 444 $Value = $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2}; 445 push @Values, $Value; 446 } 447 } 448 } 449 450 # Add AtomPairsIDs and values to fingerprint vector... 451 $This->{FingerprintsVector}->AddValueIDs(\@{$This->{AtomPairsIDs}}); 452 $This->{FingerprintsVector}->AddValues(\@Values); 453 454 return $This; 455 } 456 457 # Get atom pair IDs corresponding to atom pairs count values in fingerprint 458 # vector as an array or reference to an array... 459 # 460 # AtomPairIDs list differes in molecules and is generated during finalization 461 # of fingerprints to make sure the fingerprint vector containing count values 462 # matches the atom pairs array. 463 # 464 sub GetAtomPairIDs { 465 my($This) = @_; 466 467 return wantarray ? @{$This->{AtomPairsIDs}} : \@{$This->{AtomPairsIDs}}; 468 } 469 470 # Cache appropriate molecule data... 471 # 472 sub _SetupMoleculeDataCache { 473 my($This) = @_; 474 475 # Get all atoms including hydrogens to correctly map atom indices to atom IDs for 476 # usage of distance matrix. The hydrogen atoms are ignored during processing... 477 # 478 @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms(); 479 480 # Get all atom IDs... 481 my(@AtomIDs); 482 @AtomIDs = (); 483 @AtomIDs = map { $_->GetID() } @{$This->{Atoms}}; 484 485 # Set AtomIndex to AtomID hash... 486 %{$This->{AtomIndexToID}} = (); 487 @{$This->{AtomIndexToID}}{ (0 .. $#AtomIDs) } = @AtomIDs; 488 489 return $This; 490 } 491 492 # Clear cached molecule data... 493 # 494 sub _ClearMoleculeDataCache { 495 my($This) = @_; 496 497 @{$This->{Atoms}} = (); 498 499 return $This; 500 } 501 502 # Set atomic invariants to use for atom identifiers... 503 # 504 sub SetAtomicInvariantsToUse { 505 my($This, @Values) = @_; 506 my($FirstValue, $TypeOfFirstValue, $AtomicInvariant, $SpecifiedAtomicInvariant, $AtomicInvariantValue, @SpecifiedAtomicInvariants, @AtomicInvariantsToUse); 507 508 if (!@Values) { 509 carp "Warning: ${ClassName}->SetAtomicInvariantsToUse: No values specified..."; 510 return; 511 } 512 513 $FirstValue = $Values[0]; 514 $TypeOfFirstValue = ref $FirstValue; 515 516 @SpecifiedAtomicInvariants = (); 517 @AtomicInvariantsToUse = (); 518 519 if ($TypeOfFirstValue =~ /^ARRAY/) { 520 push @SpecifiedAtomicInvariants, @{$FirstValue}; 521 } 522 else { 523 push @SpecifiedAtomicInvariants, @Values; 524 } 525 526 # Make sure specified AtomicInvariants are valid... 527 for $SpecifiedAtomicInvariant (@SpecifiedAtomicInvariants) { 528 if (!AtomTypes::AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($SpecifiedAtomicInvariant)) { 529 croak "Error: ${ClassName}->SetAtomicInvariantsToUse: Specified atomic invariant, $SpecifiedAtomicInvariant, is not supported...\n "; 530 } 531 $AtomicInvariant = $SpecifiedAtomicInvariant; 532 push @AtomicInvariantsToUse, $AtomicInvariant; 533 } 534 535 # Set atomic invariants to use... 536 @{$This->{AtomicInvariantsToUse}} = (); 537 push @{$This->{AtomicInvariantsToUse}}, @AtomicInvariantsToUse; 538 539 return $This; 540 } 541 542 # Set functional classes to use for atom identifiers... 543 # 544 sub SetFunctionalClassesToUse { 545 my($This, @Values) = @_; 546 my($FirstValue, $TypeOfFirstValue, $FunctionalClass, $SpecifiedFunctionalClass, @SpecifiedFunctionalClasses, @FunctionalClassesToUse); 547 548 if (!@Values) { 549 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: No values specified..."; 550 return; 551 } 552 553 if ($This->{AtomIdentifierType} !~ /^FunctionalClassAtomTypes$/i) { 554 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: FunctionalClassesToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}..."; 555 return; 556 } 557 558 $FirstValue = $Values[0]; 559 $TypeOfFirstValue = ref $FirstValue; 560 561 @SpecifiedFunctionalClasses = (); 562 @FunctionalClassesToUse = (); 563 564 if ($TypeOfFirstValue =~ /^ARRAY/) { 565 push @SpecifiedFunctionalClasses, @{$FirstValue}; 566 } 567 else { 568 push @SpecifiedFunctionalClasses, @Values; 569 } 570 571 # Make sure specified FunctionalClasses are valid... 572 for $SpecifiedFunctionalClass (@SpecifiedFunctionalClasses) { 573 if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedFunctionalClass)) { 574 croak "Error: ${ClassName}->SetFunctionalClassesToUse: Specified functional class, $SpecifiedFunctionalClass, is not supported...\n "; 575 } 576 push @FunctionalClassesToUse, $SpecifiedFunctionalClass; 577 } 578 579 # Set functional classes to use... 580 @{$This->{FunctionalClassesToUse}} = (); 581 push @{$This->{FunctionalClassesToUse}}, @FunctionalClassesToUse; 582 583 return $This; 584 } 585 586 # Initialize atom indentifier type information... 587 # 588 # Current supported values: 589 # 590 # AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, 591 # MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes 592 # 593 sub _InitializeAtomIdentifierTypeInformation { 594 my($This) = @_; 595 596 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { 597 $This->_InitializeAtomicInvariantsAtomTypesInformation(); 598 } 599 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { 600 $This->_InitializeFunctionalClassAtomTypesInformation(); 601 } 602 elsif ($This->{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { 603 # Nothing to do for now... 604 } 605 else { 606 croak "Error: ${ClassName}->_InitializeAtomIdentifierTypeInformation: Unknown atom indentifier type $This->{AtomIdentifierType}..."; 607 } 608 609 return $This; 610 } 611 612 # Initialize atomic invariants atom types to use for generating atom identifiers... 613 # 614 # Let: 615 # AS = Atom symbol corresponding to element symbol 616 # 617 # X<n> = Number of non-hydrogen atom neighbors or heavy atoms attached to atom 618 # BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms attached to atom 619 # LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms attached to atom 620 # SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms attached to atom 621 # DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms attached to atom 622 # TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms attached to atom 623 # H<n> = Number of implicit and explicit hydrogens for atom 624 # Ar = Aromatic annotation indicating whether atom is aromatic 625 # RA = Ring atom annotation indicating whether atom is a ring 626 # FC<+n/-n> = Formal charge assigned to atom 627 # MN<n> = Mass number indicating isotope other than most abundant isotope 628 # SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or 3 (triplet) 629 # 630 # AtomTypeIDx = Atomic invariants atom type for atom x 631 # AtomTypeIDy = Atomic invariants atom type for atom y 632 # Dn = Topological distance between atom x and y 633 # 634 # Then: 635 # 636 # Atom pair AtomID generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to: 637 # 638 # AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n> 639 # 640 # AtomPairID corresponds to: 641 # 642 # AtomTypeIDx-D<n>-AtomTypeIDy 643 # 644 # Except for AS which is a required atomic invariant in atom pair AtomIDs, all other atomic invariants are 645 # optional. Default atomic invariants used for AtomID are: AS, X<n>, BO<n>, H<n>, FC<+n/-n>. 646 # AtomID specification doesn't include atomic invariants with zero or undefined values. 647 # 648 # Examples of atom pair AtomIDs: 649 # 650 # O.X1.BO1.H1 - Hydroxyl oxygen in carboxylate with attached hydrogen and no explicit charge 651 # O.X1.BO1.FC-1 - Hydroxyl ozygen in carboxylate with explicit negative charge 652 # O.X1.BO2 - Carbonyl oxygen in carboxylate with double bond to carbon 653 # O.X2.BO2 - Hydroxyl ozygen in carboxylate attached to carbonyl carbon and another heavy atom 654 # 655 # C.X2.BO3.H1.Ar - Aromatic carbon 656 # 657 # Examples of AtomPairIDs: 658 # 659 # C.X2.BO2.H3-D1-O.X1.BO1 - Carbon with two heavy atom neighbors attached to oxygen at bond distance 1(methanol) 660 # 661 # C.X2.BO3.H1.Ar-D3-C.X2.BO3.H1.Ar - Two aromatic carbons at bond distance 3 where each carbon has 662 # two heavy atom neighbors and bond order of 3 (benzene) 663 # 664 sub _InitializeAtomicInvariantsAtomTypesInformation { 665 my($This) = @_; 666 667 # Default atomic invariants to use for generating atom neighborhood atom IDs: AS, X, BO, H, FC 668 # 669 @{$This->{AtomicInvariantsToUse}} = (); 670 @{$This->{AtomicInvariantsToUse}} = ('AS', 'X', 'BO', 'H', 'FC'); 671 672 return $This; 673 } 674 675 # Initialize functional class atom types, generated by AtomTypes::FunctionalClassAtomTypes 676 # class, to use for generating atom identifiers... 677 # 678 # Let: 679 # HBD: HydrogenBondDonor 680 # HBA: HydrogenBondAcceptor 681 # PI : PositivelyIonizable 682 # NI : NegativelyIonizable 683 # Ar : Aromatic 684 # Hal : Halogen 685 # H : Hydrophobic 686 # RA : RingAtom 687 # CA : ChainAtom 688 # 689 # Then: 690 # 691 # Functiononal class atom type specification for an atom corresponds to: 692 # 693 # Ar.CA.H.HBA.HBD.Hal.NI.PI.RA 694 # 695 # Default functional classes used are: HBD, HBA, PI, NI, Ar, Hal 696 # 697 # FunctionalAtomTypes are assigned using the following definitions [ Ref 60-61, Ref 65-66 ]: 698 # 699 # HydrogenBondDonor: NH, NH2, OH 700 # HydrogenBondAcceptor: N[!H], O 701 # PositivelyIonizable: +, NH2 702 # NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH 703 # 704 sub _InitializeFunctionalClassAtomTypesInformation { 705 my($This) = @_; 706 707 # Default functional class atom typess to use for generating atom identifiers 708 # are: HBD, HBA, PI, NI, Ar, Hal 709 # 710 @{$This->{FunctionalClassesToUse}} = (); 711 @{$This->{FunctionalClassesToUse}} = ('HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal'); 712 713 return $This; 714 } 715 716 # Return a string containg data for TopologicalAtomPairsFingerprints object... 717 # 718 sub StringifyTopologicalAtomPairsFingerprints { 719 my($This) = @_; 720 my($FingerprintsString); 721 722 # Type of fingerprint... 723 $FingerprintsString = "Fingerprint type: $This->{Type}; AtomIdentifierType: $This->{AtomIdentifierType}"; 724 725 # Min and max distance... 726 $FingerprintsString .= "; MinDistance: $This->{MinDistance}; MaxDistance: $This->{MaxDistance}"; 727 728 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { 729 my($AtomicInvariant, @AtomicInvariants, @AtomicInvariantsOrder, %AvailableAtomicInvariants); 730 731 @AtomicInvariantsOrder = AtomTypes::AtomicInvariantsAtomTypes::GetAtomicInvariantsOrder(); 732 %AvailableAtomicInvariants = AtomTypes::AtomicInvariantsAtomTypes::GetAvailableAtomicInvariants(); 733 734 for $AtomicInvariant (@AtomicInvariantsOrder) { 735 push @AtomicInvariants, "$AtomicInvariant: $AvailableAtomicInvariants{$AtomicInvariant}"; 736 } 737 738 $FingerprintsString .= "; AtomicInvariantsToUse: <" . TextUtil::JoinWords(\@{$This->{AtomicInvariantsToUse}}, ", ", 0) . ">"; 739 $FingerprintsString .= "; AtomicInvariantsOrder: <" . TextUtil::JoinWords(\@AtomicInvariantsOrder, ", ", 0) . ">"; 740 $FingerprintsString .= "; AvailableAtomicInvariants: <" . TextUtil::JoinWords(\@AtomicInvariants, ", ", 0) . ">"; 741 } 742 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { 743 my($FunctionalClass, @FunctionalClasses, @FunctionalClassesOrder, %AvailableFunctionalClasses); 744 745 @FunctionalClassesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder(); 746 %AvailableFunctionalClasses = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses(); 747 748 for $FunctionalClass (@FunctionalClassesOrder) { 749 push @FunctionalClasses, "$FunctionalClass: $AvailableFunctionalClasses{$FunctionalClass}"; 750 } 751 752 $FingerprintsString .= "; FunctionalClassesToUse: <" . TextUtil::JoinWords(\@{$This->{FunctionalClassesToUse}}, ", ", 0) . ">"; 753 $FingerprintsString .= "; FunctionalClassesOrder: <" . TextUtil::JoinWords(\@FunctionalClassesOrder, ", ", 0) . ">"; 754 $FingerprintsString .= "; AvailableFunctionalClasses: <" . TextUtil::JoinWords(\@FunctionalClasses, ", ", 0) . ">"; 755 } 756 757 # Total number of atom pairs... 758 $FingerprintsString .= "; NumOfAtomPairs: " . $This->{FingerprintsVector}->GetNumOfValues(); 759 760 # FingerprintsVector... 761 $FingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >"; 762 763 return $FingerprintsString; 764 } 765