1 package Fingerprints::TopologicalAtomTripletsFingerprints; 2 # 3 # $RCSfile: TopologicalAtomTripletsFingerprints.pm,v $ 4 # $Date: 2015/02/28 20:48:54 $ 5 # $Revision: 1.15 $ 6 # 7 # Author: Manish Sud <msud@san.rr.com> 8 # 9 # Copyright (C) 2015 Manish Sud. All rights reserved. 10 # 11 # This file is part of MayaChemTools. 12 # 13 # MayaChemTools is free software; you can redistribute it and/or modify it under 14 # the terms of the GNU Lesser General Public License as published by the Free 15 # Software Foundation; either version 3 of the License, or (at your option) any 16 # later version. 17 # 18 # MayaChemTools is distributed in the hope that it will be useful, but without 19 # any warranty; without even the implied warranty of merchantability of fitness 20 # for a particular purpose. See the GNU Lesser General Public License for more 21 # details. 22 # 23 # You should have received a copy of the GNU Lesser General Public License 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 26 # Boston, MA, 02111-1307, USA. 27 # 28 29 use strict; 30 use Carp; 31 use Exporter; 32 use Fingerprints::Fingerprints; 33 use TextUtil (); 34 use Molecule; 35 use AtomTypes::AtomicInvariantsAtomTypes; 36 use AtomTypes::DREIDINGAtomTypes; 37 use AtomTypes::EStateAtomTypes; 38 use AtomTypes::FunctionalClassAtomTypes; 39 use AtomTypes::MMFF94AtomTypes; 40 use AtomTypes::SLogPAtomTypes; 41 use AtomTypes::SYBYLAtomTypes; 42 use AtomTypes::TPSAAtomTypes; 43 use AtomTypes::UFFAtomTypes; 44 45 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 46 47 @ISA = qw(Fingerprints::Fingerprints Exporter); 48 @EXPORT = qw(); 49 @EXPORT_OK = qw(); 50 51 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); 52 53 # Setup class variables... 54 my($ClassName); 55 _InitializeClass(); 56 57 # Overload Perl functions... 58 use overload '""' => 'StringifyTopologicalAtomTripletsFingerprints'; 59 60 # Class constructor... 61 sub new { 62 my($Class, %NamesAndValues) = @_; 63 64 # Initialize object... 65 my $This = $Class->SUPER::new(); 66 bless $This, ref($Class) || $Class; 67 $This->_InitializeTopologicalAtomTripletsFingerprints(); 68 69 $This->_InitializeTopologicalAtomTripletsFingerprintsProperties(%NamesAndValues); 70 71 return $This; 72 } 73 74 # Initialize object data... 75 # 76 sub _InitializeTopologicalAtomTripletsFingerprints { 77 my($This) = @_; 78 79 # Type of fingerprint... 80 $This->{Type} = 'TopologicalAtomTriplets'; 81 82 # Type of vector... 83 $This->{VectorType} = 'FingerprintsVector'; 84 85 # Type of FingerprintsVector... 86 $This->{FingerprintsVectorType} = 'NumericalValues'; 87 88 # Minimum and maximum bond distance between atom paris... 89 $This->{MinDistance} = 1; 90 $This->{MaxDistance} = 10; 91 92 # Determines whether to apply triangle inequality to distance triplets... 93 # 94 $This->{UseTriangleInequality} = 0; 95 96 # Atom identifier type to use for atom IDs in atom triplets... 97 # 98 # Currently supported values are: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, 99 # EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, 100 # SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes 101 # 102 $This->{AtomIdentifierType} = ''; 103 104 # Atom types assigned to each heavy atom... 105 # 106 %{$This->{AssignedAtomTypes}} = (); 107 108 # All atom triplets between minimum and maximum distance... 109 # 110 @{$This->{AtomTripletsIDs}} = (); 111 %{$This->{AtomTripletsCount}} = (); 112 } 113 114 # Initialize class ... 115 sub _InitializeClass { 116 #Class name... 117 $ClassName = __PACKAGE__; 118 } 119 120 # Initialize object properties.... 121 sub _InitializeTopologicalAtomTripletsFingerprintsProperties { 122 my($This, %NamesAndValues) = @_; 123 124 my($Name, $Value, $MethodName); 125 while (($Name, $Value) = each %NamesAndValues) { 126 $MethodName = "Set${Name}"; 127 $This->$MethodName($Value); 128 } 129 130 # Make sure molecule object was specified... 131 if (!exists $NamesAndValues{Molecule}) { 132 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule..."; 133 } 134 if (!exists $NamesAndValues{AtomIdentifierType}) { 135 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying AtomIdentifierType..."; 136 } 137 138 $This->_InitializeFingerprintsVector(); 139 140 return $This; 141 } 142 143 # Set minimum distance for atom triplets... 144 # 145 sub SetMinDistance { 146 my($This, $Value) = @_; 147 148 if (!TextUtil::IsPositiveInteger($Value)) { 149 croak "Error: ${ClassName}->SetMinDistance: MinDistance value, $Value, is not valid: It must be a positive integer..."; 150 } 151 $This->{MinDistance} = $Value; 152 153 return $This; 154 } 155 156 # Set maximum distance for atom triplets... 157 # 158 sub SetMaxDistance { 159 my($This, $Value) = @_; 160 161 if (!TextUtil::IsPositiveInteger($Value)) { 162 croak "Error: ${ClassName}->SetMaxDistance: MaxDistance value, $Value, is not valid: It must be a positive integer..."; 163 } 164 $This->{MaxDistance} = $Value; 165 166 return $This; 167 } 168 169 # Set atom identifier type.. 170 # 171 sub SetAtomIdentifierType { 172 my($This, $IdentifierType) = @_; 173 174 if ($IdentifierType !~ /^(AtomicInvariantsAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|FunctionalClassAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { 175 croak "Error: ${ClassName}->SetAtomIdentifierType: Specified value, $IdentifierType, for AtomIdentifierType is not vaild. Supported types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, and UFFAtomTypes."; 176 } 177 178 if ($This->{AtomIdentifierType}) { 179 croak "Error: ${ClassName}->SeAtomIdentifierType: Can't change intial atom identifier type: It's already set..."; 180 } 181 182 $This->{AtomIdentifierType} = $IdentifierType; 183 184 # Initialize atom identifier type information... 185 $This->_InitializeAtomIdentifierTypeInformation(); 186 187 return $This; 188 } 189 190 # Generate fingerprints description... 191 # 192 sub GetDescription { 193 my($This) = @_; 194 195 # Is description explicity set? 196 if (exists $This->{Description}) { 197 return $This->{Description}; 198 } 199 200 # Generate fingerprints description... 201 202 return "$This->{Type}:$This->{AtomIdentifierType}:MinDistance$This->{MinDistance}:MaxDistance$This->{MaxDistance}"; 203 } 204 205 # Generate topological atom triplets fingerprints... 206 # 207 # Let: 208 # 209 # AT = Any of the supported atom types 210 # 211 # ATx = Atom type for atom x 212 # ATy = Atom type for atom y 213 # ATz = Atom type for atom z 214 # 215 # Dxy = Distance between Px and Py 216 # Dxz = Distance between Px and Pz 217 # Dyz = Distance between Py and Pz 218 # 219 # Then: 220 # 221 # ATx-Dyz-ATy-Dxz-ATz-Dxy = Atom triplet ID for atom types ATx, ATy and Atz 222 # 223 # Methodology: 224 # . Generate a distance matrix. 225 # . Assign atom types to all the atoms. 226 # . Using distance matrix and atom types, count occurrence of unique atom triplets 227 # within specified distance range along with optional trinagle inequality 228 # 229 # Notes: 230 # . Hydrogen atoms are ignored during the fingerprint generation. 231 # . For a molecule containing N atoms with all different atom type, the total number of 232 # possible unique atom triplets without applying triangle inquality check corresponds to: 233 # 234 # Factorial( N ) / ( Factorial( N - 3 ) * Factorial (3) ) 235 # 236 # However, due to similar atom types assigned to atoms in a molecule for a specific atom 237 # typing methodology and specified distance range used during fingerprints generation, the 238 # actual number of unique triplets is usually smaller than the theoretical limit. 239 # 240 sub GenerateFingerprints { 241 my($This) = @_; 242 243 if ($This->{MinDistance} > $This->{MaxDistance}) { 244 croak "Error: ${ClassName}->GenerateTopologicalAtomTripletsFingerprints: No fingerpritns generated: MinDistance, $This->{MinDistance}, must be <= MaxDistance, $This->{MaxDistance}..."; 245 } 246 247 # Cache appropriate molecule data... 248 $This->_SetupMoleculeDataCache(); 249 250 # Generate distance matrix... 251 if (!$This->_SetupDistanceMatrix()) { 252 carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't generate distance matrix..."; 253 return $This; 254 } 255 256 # Assign atom types to all heavy atoms... 257 if (!$This->_AssignAtomTypes()) { 258 carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't assign valid $This->{AtomIdentifierType} to all atoms..."; 259 return $This; 260 } 261 262 # Intialize values of toplogical atom triplets... 263 $This->_InitializeToplogicalAtomTriplets(); 264 265 # Count atom triplets... 266 $This->_GenerateAndCountAtomTriplets(); 267 268 # Set final fingerprints... 269 $This->_SetFinalFingerprints(); 270 271 # Clear cached molecule data... 272 $This->_ClearMoleculeDataCache(); 273 274 return $This; 275 } 276 277 # Setup distance matrix... 278 # 279 sub _SetupDistanceMatrix { 280 my($This) = @_; 281 282 $This->{DistanceMatrix} = $This->GetMolecule()->GetDistanceMatrix(); 283 284 if (!$This->{DistanceMatrix}) { 285 return undef; 286 } 287 288 return $This; 289 } 290 291 # Assign appropriate atom types to all heavy atoms... 292 # 293 sub _AssignAtomTypes { 294 my($This) = @_; 295 my($SpecifiedAtomTypes, $Atom, $AtomID, $IgnoreHydrogens); 296 297 %{$This->{AssignedAtomTypes}} = (); 298 $IgnoreHydrogens = 1; 299 300 $SpecifiedAtomTypes = undef; 301 302 IDENTIFIERTYPE: { 303 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { 304 $SpecifiedAtomTypes = new AtomTypes::AtomicInvariantsAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'AtomicInvariantsToUse' => $This->{AtomicInvariantsToUse}); 305 last IDENTIFIERTYPE; 306 } 307 308 if ($This->{AtomIdentifierType} =~ /^DREIDINGAtomTypes$/i) { 309 $SpecifiedAtomTypes = new AtomTypes::DREIDINGAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 310 last IDENTIFIERTYPE; 311 } 312 313 if ($This->{AtomIdentifierType} =~ /^EStateAtomTypes$/i) { 314 $SpecifiedAtomTypes = new AtomTypes::EStateAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 315 last IDENTIFIERTYPE; 316 } 317 318 if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { 319 $SpecifiedAtomTypes = new AtomTypes::FunctionalClassAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'FunctionalClassesToUse' => $This->{FunctionalClassesToUse}); 320 last IDENTIFIERTYPE; 321 } 322 323 if ($This->{AtomIdentifierType} =~ /^MMFF94AtomTypes$/i) { 324 $SpecifiedAtomTypes = new AtomTypes::MMFF94AtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 325 last IDENTIFIERTYPE; 326 } 327 328 if ($This->{AtomIdentifierType} =~ /^SLogPAtomTypes$/i) { 329 $SpecifiedAtomTypes = new AtomTypes::SLogPAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 330 last IDENTIFIERTYPE; 331 } 332 if ($This->{AtomIdentifierType} =~ /^SYBYLAtomTypes$/i) { 333 $SpecifiedAtomTypes = new AtomTypes::SYBYLAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 334 last IDENTIFIERTYPE; 335 } 336 337 if ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) { 338 $SpecifiedAtomTypes = new AtomTypes::TPSAAtomTypes('Molecule' => $This->{Molecule}, 'IgnorePhosphorus' => 0, 'IgnoreSulfur' => 0); 339 last IDENTIFIERTYPE; 340 } 341 342 if ($This->{AtomIdentifierType} =~ /^UFFAtomTypes$/i) { 343 $SpecifiedAtomTypes = new AtomTypes::UFFAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 344 last IDENTIFIERTYPE; 345 } 346 347 croak "Error: ${ClassName}->_AssignAtomTypes: Unknown atom indentifier type $This->{AtomIdentifierType}..."; 348 } 349 350 # Assign atom types... 351 $SpecifiedAtomTypes->AssignAtomTypes(); 352 353 # Make sure atom types assignment is successful... 354 if (!$SpecifiedAtomTypes->IsAtomTypesAssignmentSuccessful()) { 355 return undef; 356 } 357 358 # Collect assigned atom types... 359 ATOM: for $Atom (@{$This->{Atoms}}) { 360 if ($Atom->IsHydrogen()) { 361 next ATOM; 362 } 363 $AtomID = $Atom->GetID(); 364 $This->{AssignedAtomTypes}{$AtomID} = $SpecifiedAtomTypes->GetAtomType($Atom); 365 } 366 367 return $This; 368 } 369 370 # Initialize topological atom triplets between specified distance range... 371 # 372 sub _InitializeToplogicalAtomTriplets { 373 my($This) = @_; 374 my($Distance); 375 376 @{$This->{AtomTripletsIDs}} = (); 377 %{$This->{AtomTripletsCount}} = (); 378 379 return $This; 380 } 381 382 # Count atom triplets between mininum and maximum distance at each 383 # distance using distance matrix and atom types assiged to each heavy 384 # atom. 385 # 386 sub _GenerateAndCountAtomTriplets { 387 my($This) = @_; 388 my($NumOfAtoms, $AtomIndex1, $AtomIndex2, $AtomIndex3, $AtomID1, $AtomID2, $AtomID3, $AtomType1, $AtomType2, $AtomType3, $Distance12, $Distance13, $Distance23, $SkipIndexCheck, $DistanceMatrix, $AtomTripletID); 389 390 $NumOfAtoms = @{$This->{Atoms}}; 391 $DistanceMatrix = $This->{DistanceMatrix}; 392 $SkipIndexCheck = 0; 393 394 ATOMINDEX1: for $AtomIndex1 (0 .. ($NumOfAtoms - 1)) { 395 $AtomID1 = $This->{AtomIndexToID}{$AtomIndex1}; 396 if (!exists($This->{AssignedAtomTypes}{$AtomID1})) { 397 next ATOMINDEX1; 398 } 399 $AtomType1 = $This->{AssignedAtomTypes}{$AtomID1}; 400 401 ATOMINDEX2: for $AtomIndex2 (($AtomIndex1 + 1) .. ($NumOfAtoms - 1)) { 402 $AtomID2 = $This->{AtomIndexToID}{$AtomIndex2}; 403 if (!exists($This->{AssignedAtomTypes}{$AtomID2})) { 404 next ATOMINDEX2; 405 } 406 $AtomType2 = $This->{AssignedAtomTypes}{$AtomID2}; 407 408 $Distance12 = $DistanceMatrix->GetValue($AtomIndex1, $AtomIndex2, $SkipIndexCheck); 409 if ($Distance12 < $This->{MinDistance} || $Distance12 > $This->{MaxDistance}) { 410 next ATOMINDEX2; 411 } 412 413 ATOMINDEX3: for $AtomIndex3 (($AtomIndex2 + 1) .. ($NumOfAtoms - 1)) { 414 $AtomID3 = $This->{AtomIndexToID}{$AtomIndex3}; 415 if (!exists($This->{AssignedAtomTypes}{$AtomID3})) { 416 next ATOMINDEX3; 417 } 418 $AtomType3 = $This->{AssignedAtomTypes}{$AtomID3}; 419 420 $Distance13 = $DistanceMatrix->GetValue($AtomIndex1, $AtomIndex3, $SkipIndexCheck); 421 $Distance23 = $DistanceMatrix->GetValue($AtomIndex2, $AtomIndex3, $SkipIndexCheck); 422 423 if ($Distance13 < $This->{MinDistance} || $Distance13 > $This->{MaxDistance}) { 424 next ATOMINDEX3; 425 } 426 if ($Distance23 < $This->{MinDistance} || $Distance23 > $This->{MaxDistance}) { 427 next ATOMINDEX3; 428 } 429 if ($This->{UseTriangleInequality} && !$This->_DoDistancesSatisfyTriangleInequality($Distance12, $Distance13, $Distance23)) { 430 next ATOMINDEX3; 431 } 432 433 $AtomTripletID = $This->_GetAtomTripletID($AtomType1, $Distance23, $AtomType2, $Distance13, $AtomType3, $Distance12); 434 if (!exists $This->{AtomTripletsCount}{$AtomTripletID}) { 435 $This->{AtomTripletsCount}{$AtomTripletID} = 0; 436 } 437 $This->{AtomTripletsCount}{$AtomTripletID} += 1; 438 } 439 } 440 } 441 return $This; 442 } 443 444 # Check triangle inequality... 445 # 446 sub _DoDistancesSatisfyTriangleInequality { 447 my($This, $Distance1, $Distance2, $Distance3) = @_; 448 449 if ( !($Distance1 > abs($Distance2 - $Distance3) && $Distance1 < ($Distance2 + $Distance3)) ) { 450 return 0; 451 } 452 if ( !($Distance2 > abs($Distance1 - $Distance3) && $Distance2 < ($Distance1 + $Distance3)) ) { 453 return 0; 454 } 455 if ( !($Distance3 > abs($Distance1 - $Distance2) && $Distance3 < ($Distance1 + $Distance2)) ) { 456 return 0; 457 } 458 return 1; 459 } 460 461 # Get atom triplet ID corresponding to atom types and distances corresponding to atom triplet... 462 # 463 sub _GetAtomTripletID { 464 my($This, $ATx, $Dyz, $ATy, $Dxz, $ATz, $Dxy) = @_; 465 my($AtomTripletID, @AtomIDs); 466 467 @AtomIDs = (); 468 469 @AtomIDs = sort("${ATx}-D${Dyz}", "${ATy}-D${Dxz}", "${ATz}-D${Dxy}"); 470 $AtomTripletID = join "-", @AtomIDs; 471 472 return $AtomTripletID; 473 } 474 475 # Set final fingerpritns vector... 476 # 477 sub _SetFinalFingerprints { 478 my($This) = @_; 479 my($AtomTripletID, $Value, @Values); 480 481 # Mark successful generation of fingerprints... 482 $This->{FingerprintsGenerated} = 1; 483 484 @Values = (); 485 @{$This->{AtomTripletsIDs}} = (); 486 487 for $AtomTripletID (sort keys %{$This->{AtomTripletsCount}}) { 488 push @{$This->{AtomTripletsIDs}}, $AtomTripletID; 489 $Value = $This->{AtomTripletsCount}{$AtomTripletID}; 490 push @Values, $Value; 491 } 492 493 # Add AtomTripletsIDs and values to fingerprint vector... 494 $This->{FingerprintsVector}->AddValueIDs(\@{$This->{AtomTripletsIDs}}); 495 $This->{FingerprintsVector}->AddValues(\@Values); 496 497 return $This; 498 } 499 500 # Get atom triplet IDs corresponding to atom triplets count values in fingerprint 501 # vector as an array or reference to an array... 502 # 503 # AtomTripletIDs list differes in molecules and is generated during finalization 504 # of fingerprints to make sure the fingerprint vector containing count values 505 # matches the atom triplets array. 506 # 507 sub GetAtomTripletIDs { 508 my($This) = @_; 509 510 return wantarray ? @{$This->{AtomTripletsIDs}} : \@{$This->{AtomTripletsIDs}}; 511 } 512 513 # Cache appropriate molecule data... 514 # 515 sub _SetupMoleculeDataCache { 516 my($This) = @_; 517 518 # Get all atoms including hydrogens to correctly map atom indices to atom IDs for 519 # usage of distance matrix. The hydrogen atoms are ignored during processing... 520 # 521 @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms(); 522 523 # Get all atom IDs... 524 my(@AtomIDs); 525 @AtomIDs = (); 526 @AtomIDs = map { $_->GetID() } @{$This->{Atoms}}; 527 528 # Set AtomIndex to AtomID hash... 529 %{$This->{AtomIndexToID}} = (); 530 @{$This->{AtomIndexToID}}{ (0 .. $#AtomIDs) } = @AtomIDs; 531 532 return $This; 533 } 534 535 # Set atomic invariants to use for atom identifiers... 536 # 537 sub SetAtomicInvariantsToUse { 538 my($This, @Values) = @_; 539 my($FirstValue, $TypeOfFirstValue, $AtomicInvariant, $SpecifiedAtomicInvariant, $AtomicInvariantValue, @SpecifiedAtomicInvariants, @AtomicInvariantsToUse); 540 541 if (!@Values) { 542 carp "Warning: ${ClassName}->SetAtomicInvariantsToUse: No values specified..."; 543 return; 544 } 545 546 $FirstValue = $Values[0]; 547 $TypeOfFirstValue = ref $FirstValue; 548 549 @SpecifiedAtomicInvariants = (); 550 @AtomicInvariantsToUse = (); 551 552 if ($TypeOfFirstValue =~ /^ARRAY/) { 553 push @SpecifiedAtomicInvariants, @{$FirstValue}; 554 } 555 else { 556 push @SpecifiedAtomicInvariants, @Values; 557 } 558 559 # Make sure specified AtomicInvariants are valid... 560 for $SpecifiedAtomicInvariant (@SpecifiedAtomicInvariants) { 561 if (!AtomTypes::AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($SpecifiedAtomicInvariant)) { 562 croak "Error: ${ClassName}->SetAtomicInvariantsToUse: Specified atomic invariant, $SpecifiedAtomicInvariant, is not supported...\n "; 563 } 564 $AtomicInvariant = $SpecifiedAtomicInvariant; 565 push @AtomicInvariantsToUse, $AtomicInvariant; 566 } 567 568 # Set atomic invariants to use... 569 @{$This->{AtomicInvariantsToUse}} = (); 570 push @{$This->{AtomicInvariantsToUse}}, @AtomicInvariantsToUse; 571 572 return $This; 573 } 574 575 # Set functional classes to use for atom identifiers... 576 # 577 sub SetFunctionalClassesToUse { 578 my($This, @Values) = @_; 579 my($FirstValue, $TypeOfFirstValue, $FunctionalClass, $SpecifiedFunctionalClass, @SpecifiedFunctionalClasses, @FunctionalClassesToUse); 580 581 if (!@Values) { 582 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: No values specified..."; 583 return; 584 } 585 586 if ($This->{AtomIdentifierType} !~ /^FunctionalClassAtomTypes$/i) { 587 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: FunctionalClassesToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}..."; 588 return; 589 } 590 591 $FirstValue = $Values[0]; 592 $TypeOfFirstValue = ref $FirstValue; 593 594 @SpecifiedFunctionalClasses = (); 595 @FunctionalClassesToUse = (); 596 597 if ($TypeOfFirstValue =~ /^ARRAY/) { 598 push @SpecifiedFunctionalClasses, @{$FirstValue}; 599 } 600 else { 601 push @SpecifiedFunctionalClasses, @Values; 602 } 603 604 # Make sure specified FunctionalClasses are valid... 605 for $SpecifiedFunctionalClass (@SpecifiedFunctionalClasses) { 606 if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedFunctionalClass)) { 607 croak "Error: ${ClassName}->SetFunctionalClassesToUse: Specified functional class, $SpecifiedFunctionalClass, is not supported...\n "; 608 } 609 push @FunctionalClassesToUse, $SpecifiedFunctionalClass; 610 } 611 612 # Set functional classes to use... 613 @{$This->{FunctionalClassesToUse}} = (); 614 push @{$This->{FunctionalClassesToUse}}, @FunctionalClassesToUse; 615 616 return $This; 617 } 618 619 # Initialize atom indentifier type information... 620 # 621 # Current supported values: 622 # 623 # AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, 624 # MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes 625 # 626 sub _InitializeAtomIdentifierTypeInformation { 627 my($This) = @_; 628 629 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { 630 $This->_InitializeAtomicInvariantsAtomTypesInformation(); 631 } 632 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { 633 $This->_InitializeFunctionalClassAtomTypesInformation(); 634 } 635 elsif ($This->{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { 636 # Nothing to do for now... 637 } 638 else { 639 croak "Error: ${ClassName}->_InitializeAtomIdentifierTypeInformation: Unknown atom indentifier type $This->{AtomIdentifierType}..."; 640 } 641 642 return $This; 643 } 644 645 # Initialize atomic invariants atom types to use for generating atom IDs in atom triplets... 646 # 647 # Let: 648 # AS = Atom symbol corresponding to element symbol 649 # 650 # X<n> = Number of non-hydrogen atom neighbors or heavy atoms attached to atom 651 # BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms attached to atom 652 # LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms attached to atom 653 # SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms attached to atom 654 # DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms attached to atom 655 # TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms attached to atom 656 # H<n> = Number of implicit and explicit hydrogens for atom 657 # Ar = Aromatic annotation indicating whether atom is aromatic 658 # RA = Ring atom annotation indicating whether atom is a ring 659 # FC<+n/-n> = Formal charge assigned to atom 660 # MN<n> = Mass number indicating isotope other than most abundant isotope 661 # SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or 3 (triplet) 662 # 663 # ATx = Atomic invariants atom type for atom x 664 # ATy = Atomic invariants atom type for atom y 665 # ATz = Atomic invariants atom type for atom z 666 # 667 # Dxy = Distance between Px and Py 668 # Dxz = Distance between Px and Pz 669 # Dyz = Distance between Py and Pz 670 # 671 # Then: 672 # 673 # Atom triplet AtomID generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to: 674 # 675 # AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n> 676 # 677 # Toplogical atom triplet ID between atom IDs ATx, ATy and ATz corresponds to: 678 # 679 # ATx-Dyz-ATy-Dxz-ATz-Dxy 680 # 681 # Except for AS which is a required atomic invariant in atom triplet AtomIDs, all other atomic invariants are 682 # optional. Default atomic invariants used for AtomID are: AS, X<n>, BO<n>, H<n>, FC<+n/-n>. 683 # AtomID specification doesn't include atomic invariants with zero or undefined values. 684 # 685 # Examples of atom triplet AtomIDs: 686 # 687 # O.X1.BO1.H1 - Hydroxyl oxygen in carboxylate with attached hydrogen and no explicit charge 688 # O.X1.BO1.FC-1 - Hydroxyl ozygen in carboxylate with explicit negative charge 689 # O.X1.BO2 - Carbonyl oxygen in carboxylate with double bond to carbon 690 # O.X2.BO2 - Hydroxyl ozygen in carboxylate attached to carbonyl carbon and another heavy atom 691 # 692 # C.X2.BO3.H1.Ar - Aromatic carbon 693 # 694 sub _InitializeAtomicInvariantsAtomTypesInformation { 695 my($This) = @_; 696 697 # Default atomic invariants to use for generating atom triplet atom IDs: AS, X, BO, H, FC 698 # 699 @{$This->{AtomicInvariantsToUse}} = (); 700 @{$This->{AtomicInvariantsToUse}} = ('AS', 'X', 'BO', 'H', 'FC'); 701 702 return $This; 703 } 704 705 # Initialize functional class atom types, generated by AtomTypes::FunctionalClassAtomTypes 706 # class, to use for generating atom identifiers... 707 # 708 # Let: 709 # HBD: HydrogenBondDonor 710 # HBA: HydrogenBondAcceptor 711 # PI : PositivelyIonizable 712 # NI : NegativelyIonizable 713 # Ar : Aromatic 714 # Hal : Halogen 715 # H : Hydrophobic 716 # RA : RingAtom 717 # CA : ChainAtom 718 # 719 # Then: 720 # 721 # Functiononal class atom type specification for an atom corresponds to: 722 # 723 # Ar.CA.H.HBA.HBD.Hal.NI.PI.RA 724 # 725 # Default functional classes used are: HBD, HBA, PI, NI, Ar, Hal 726 # 727 # FunctionalAtomTypes are assigned using the following definitions [ Ref 60-61, Ref 65-66 ]: 728 # 729 # HydrogenBondDonor: NH, NH2, OH 730 # HydrogenBondAcceptor: N[!H], O 731 # PositivelyIonizable: +, NH2 732 # NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH 733 # 734 sub _InitializeFunctionalClassAtomTypesInformation { 735 my($This) = @_; 736 737 # Default functional class atom typess to use for generating atom identifiers 738 # are: HBD, HBA, PI, NI, Ar, Hal 739 # 740 @{$This->{FunctionalClassesToUse}} = (); 741 @{$This->{FunctionalClassesToUse}} = ('HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal'); 742 743 return $This; 744 } 745 746 # Clear cached molecule data... 747 # 748 sub _ClearMoleculeDataCache { 749 my($This) = @_; 750 751 @{$This->{Atoms}} = (); 752 753 return $This; 754 } 755 756 # Return a string containg data for TopologicalAtomTripletsFingerprints object... 757 # 758 sub StringifyTopologicalAtomTripletsFingerprints { 759 my($This) = @_; 760 my($FingerprintsString); 761 762 # Type of fingerprint... 763 $FingerprintsString = "Fingerprint type: $This->{Type}; AtomIdentifierType: $This->{AtomIdentifierType}"; 764 765 # Min and max distance... 766 $FingerprintsString .= "; MinDistance: $This->{MinDistance}; MaxDistance: $This->{MaxDistance}; UseTriangleInequality: " . ($This->{UseTriangleInequality} ? "Yes" : "No"); 767 768 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { 769 my($AtomicInvariant, @AtomicInvariants, @AtomicInvariantsOrder, %AvailableAtomicInvariants); 770 771 @AtomicInvariantsOrder = AtomTypes::AtomicInvariantsAtomTypes::GetAtomicInvariantsOrder(); 772 %AvailableAtomicInvariants = AtomTypes::AtomicInvariantsAtomTypes::GetAvailableAtomicInvariants(); 773 774 for $AtomicInvariant (@AtomicInvariantsOrder) { 775 push @AtomicInvariants, "$AtomicInvariant: $AvailableAtomicInvariants{$AtomicInvariant}"; 776 } 777 778 $FingerprintsString .= "; AtomicInvariantsToUse: <" . TextUtil::JoinWords(\@{$This->{AtomicInvariantsToUse}}, ", ", 0) . ">"; 779 $FingerprintsString .= "; AtomicInvariantsOrder: <" . TextUtil::JoinWords(\@AtomicInvariantsOrder, ", ", 0) . ">"; 780 $FingerprintsString .= "; AvailableAtomicInvariants: <" . TextUtil::JoinWords(\@AtomicInvariants, ", ", 0) . ">"; 781 } 782 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { 783 my($FunctionalClass, @FunctionalClasses, @FunctionalClassesOrder, %AvailableFunctionalClasses); 784 785 @FunctionalClassesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder(); 786 %AvailableFunctionalClasses = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses(); 787 788 for $FunctionalClass (@FunctionalClassesOrder) { 789 push @FunctionalClasses, "$FunctionalClass: $AvailableFunctionalClasses{$FunctionalClass}"; 790 } 791 792 $FingerprintsString .= "; FunctionalClassesToUse: <" . TextUtil::JoinWords(\@{$This->{FunctionalClassesToUse}}, ", ", 0) . ">"; 793 $FingerprintsString .= "; FunctionalClassesOrder: <" . TextUtil::JoinWords(\@FunctionalClassesOrder, ", ", 0) . ">"; 794 $FingerprintsString .= "; AvailableFunctionalClasses: <" . TextUtil::JoinWords(\@FunctionalClasses, ", ", 0) . ">"; 795 } 796 797 # Total number of atom triplets... 798 $FingerprintsString .= "; NumOfAtomTriplets: " . $This->{FingerprintsVector}->GetNumOfValues(); 799 800 # FingerprintsVector... 801 $FingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >"; 802 803 return $FingerprintsString; 804 } 805