1 package Fingerprints::ExtendedConnectivityFingerprints; 2 # 3 # $RCSfile: ExtendedConnectivityFingerprints.pm,v $ 4 # $Date: 2015/02/28 20:48:54 $ 5 # $Revision: 1.39 $ 6 # 7 # Author: Manish Sud <msud@san.rr.com> 8 # 9 # Copyright (C) 2015 Manish Sud. All rights reserved. 10 # 11 # This file is part of MayaChemTools. 12 # 13 # MayaChemTools is free software; you can redistribute it and/or modify it under 14 # the terms of the GNU Lesser General Public License as published by the Free 15 # Software Foundation; either version 3 of the License, or (at your option) any 16 # later version. 17 # 18 # MayaChemTools is distributed in the hope that it will be useful, but without 19 # any warranty; without even the implied warranty of merchantability of fitness 20 # for a particular purpose. See the GNU Lesser General Public License for more 21 # details. 22 # 23 # You should have received a copy of the GNU Lesser General Public License 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 26 # Boston, MA, 02111-1307, USA. 27 # 28 29 use strict; 30 use Carp; 31 use Exporter; 32 use TextUtil (); 33 use MathUtil (); 34 use Fingerprints::Fingerprints; 35 use Molecule; 36 use AtomTypes::AtomicInvariantsAtomTypes; 37 use AtomTypes::FunctionalClassAtomTypes; 38 use AtomTypes::DREIDINGAtomTypes; 39 use AtomTypes::EStateAtomTypes; 40 use AtomTypes::MMFF94AtomTypes; 41 use AtomTypes::SLogPAtomTypes; 42 use AtomTypes::SYBYLAtomTypes; 43 use AtomTypes::TPSAAtomTypes; 44 use AtomTypes::UFFAtomTypes; 45 46 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 47 48 @ISA = qw(Fingerprints::Fingerprints Exporter); 49 @EXPORT = qw(); 50 @EXPORT_OK = qw(); 51 52 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); 53 54 # Setup class variables... 55 my($ClassName); 56 _InitializeClass(); 57 58 # Overload Perl functions... 59 use overload '""' => 'StringifyExtendedConnectivityFingerprints'; 60 61 # Class constructor... 62 sub new { 63 my($Class, %NamesAndValues) = @_; 64 65 # Initialize object... 66 my $This = $Class->SUPER::new(); 67 bless $This, ref($Class) || $Class; 68 $This->_InitializeExtendedConnectivityFingerprints(); 69 70 $This->_InitializeExtendedConnectivityFingerprintsProperties(%NamesAndValues); 71 72 return $This; 73 } 74 75 # Initialize object data... 76 # 77 sub _InitializeExtendedConnectivityFingerprints { 78 my($This) = @_; 79 80 # Type of fingerprint to generate: 81 # 82 # ExtendedConnectivity - Set of integer identifiers corresponding to structurally unique features 83 # ExtendedConnectivityCount - Set of integer identifiers corresponding to structurally unique features and their count 84 # ExtendedConnectivityBits - A bit vector indicating presence/absence of structurally unique features 85 # 86 $This->{Type} = 'ExtendedConnectivity'; 87 88 # Atomic neighborhoods radius for extended connectivity... 89 $This->{NeighborhoodRadius} = 2; 90 91 # Size of bit bector to use during generation of ExtendedConnectivityBits fingerprints... 92 $This->{Size} = 1024; 93 94 # Min and max size of bit bector to use during generation of ExtendedConnectivityBits fingerprints... 95 $This->{MinSize} = 32; 96 $This->{MaxSize} = 2**32; 97 98 # Type of atom attributes to use for initial identifier assignment to non-hydrogen atoms 99 # during the calculation of extended connectivity fingerprints [ Ref 48, Ref 52 ]... 100 # 101 # Currently supported values are: AtomicInvariantsAtomTypes, FunctionalClassAtomTypes, 102 # DREIDINGAtomTypes, EStateAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, 103 # TPSAAtomTypes, UFFAtomTypes 104 # 105 $This->{AtomIdentifierType} = ''; 106 107 # Random number generator to use during generation of fingerprints bit-vector 108 # string: Perl CORE::rand or MayaChemTools MathUtil::random function. 109 # 110 # The random number generator implemented in MayaChemTools is a variant of 111 # linear congruential generator (LCG) as described by Miller et al. [ Ref 120 ]. 112 # It is also referred to as Lehmer random number generator or Park-Miller 113 # random number generator. 114 # 115 # Unlike Perl's core random number generator function rand, the random number 116 # generator implemented in MayaChemTools, MathUtil::random, generates consistent 117 # random values across different platformsfor a specific random seed and leads 118 # to generation of portable fingerprints bit-vector strings. 119 # 120 $This->{UsePerlCoreRandom} = 1; 121 122 # Atom neighorhoods up to specified neighborhood radius... 123 %{$This->{AtomNeighborhoods}} = (); 124 125 # Atom identifiers at different neighborhoods up to specified neighborhood radius... 126 %{$This->{AtomIdentifiers}} = (); 127 128 # Structurally unique atom identifiers at different neighborhoods up to specified neighborhood radius... 129 %{$This->{UniqueAtomIdentifiers}} = (); 130 %{$This->{UniqueAtomIdentifiersCount}} = (); 131 132 # Unique atom identifiers at different neighborhoods up to specified neighborhood radius... 133 %{$This->{StructurallyUniqueAtomIdentifiers}} = (); 134 %{$This->{StructurallyUniqueAtomIdentifiersCount}} = (); 135 136 # Structure feature information at different neighborhoods up to specified neighborhood 137 # radius used during removal of atom indentifiers which are structually equivalent... 138 %{$This->{StructureFeatures}} = (); 139 } 140 141 # Initialize class ... 142 sub _InitializeClass { 143 #Class name... 144 $ClassName = __PACKAGE__; 145 } 146 147 # Initialize object properties.... 148 sub _InitializeExtendedConnectivityFingerprintsProperties { 149 my($This, %NamesAndValues) = @_; 150 151 my($Name, $Value, $MethodName); 152 while (($Name, $Value) = each %NamesAndValues) { 153 $MethodName = "Set${Name}"; 154 $This->$MethodName($Value); 155 } 156 157 # Make sure molecule object was specified... 158 if (!exists $NamesAndValues{Molecule}) { 159 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule..."; 160 } 161 162 # Make sure AtomIdentifierType was specified... 163 if (!exists $NamesAndValues{AtomIdentifierType}) { 164 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying AtomIdentifierType..."; 165 } 166 167 # Make sure it's power of 2... 168 if (exists $NamesAndValues{Size}) { 169 if (!TextUtil::IsNumberPowerOfNumber($NamesAndValues{Size}, 2)) { 170 croak "Error: ${ClassName}->New: Specified size value, $NamesAndValues{Size}, must be power of 2..."; 171 } 172 } 173 174 if ($This->{Type} =~ /^ExtendedConnectivity$/i) { 175 $This->_InitializeExtendedConnectivityFingerprintsVector(); 176 } 177 elsif ($This->{Type} =~ /^ExtendedConnectivityCount$/i) { 178 $This->_InitializeExtendedConnectivityCountFingerprintsVector(); 179 } 180 elsif ($This->{Type} =~ /^ExtendedConnectivityBits$/i) { 181 $This->_InitializeExtendedConnectivityBitsFingerprintsBitVector(); 182 } 183 else { 184 croak "Error: ${ClassName}->_InitializeExtendedConnectivityFingerprintsProperties: Unknown ExtendedConnectivity fingerprints type: $This->{Type}; Supported fingerprints types: ExtendedConnectivity, ExtendedConnectivityCount or ExtendedConnectivityBits..."; 185 } 186 187 return $This; 188 } 189 190 # Initialize extended connectivity fingerprints vector... 191 # 192 sub _InitializeExtendedConnectivityFingerprintsVector { 193 my($This) = @_; 194 195 # Type of vector... 196 $This->{VectorType} = 'FingerprintsVector'; 197 198 # Type of FingerprintsVector... 199 $This->{FingerprintsVectorType} = 'AlphaNumericalValues'; 200 201 $This->_InitializeFingerprintsVector(); 202 203 return $This; 204 } 205 206 # Initialize extended connectivity count fingerprints vector... 207 # 208 sub _InitializeExtendedConnectivityCountFingerprintsVector { 209 my($This) = @_; 210 211 # Type of vector... 212 $This->{VectorType} = 'FingerprintsVector'; 213 214 # Type of FingerprintsVector... 215 $This->{FingerprintsVectorType} = 'NumericalValues'; 216 217 $This->_InitializeFingerprintsVector(); 218 219 return $This; 220 } 221 222 # Initialize extended connectivity bit fingerprints vector... 223 # 224 sub _InitializeExtendedConnectivityBitsFingerprintsBitVector { 225 my($This) = @_; 226 227 # Type of vector... 228 $This->{VectorType} = 'FingerprintsBitVector'; 229 230 $This->_InitializeFingerprintsBitVector(); 231 232 return $This; 233 } 234 235 # Set type... 236 # 237 sub SetType { 238 my($This, $Type) = @_; 239 240 if ($Type =~ /^ExtendedConnectivity$/i) { 241 $This->{Type} = 'ExtendedConnectivity';; 242 } 243 elsif ($Type =~ /^ExtendedConnectivityCount$/i) { 244 $This->{Type} = 'ExtendedConnectivityCount';; 245 } 246 elsif ($Type =~ /^ExtendedConnectivityBits$/i) { 247 $This->{Type} = 'ExtendedConnectivityBits';; 248 } 249 else { 250 croak "Error: ${ClassName}->SetType: Unknown ExtendedConnectivity fingerprints type: $This->{Type}; Supported fingerprints types: ExtendedConnectivity, ExtendedConnectivityCount or ExtendedConnectivityBits..."; 251 } 252 return $This; 253 } 254 255 # Disable vector type change... 256 # 257 sub SetVectorType { 258 my($This, $Type) = @_; 259 260 croak "Error: ${ClassName}->SetVectorType: Can't change vector type..."; 261 262 return $This; 263 } 264 265 # Disable vector type change... 266 # 267 sub SetFingerprintsVectorType { 268 my($This, $Type) = @_; 269 270 croak "Error: ${ClassName}->SetFingerprintsVectorType: Can't change fingerprints vector type..."; 271 272 return $This; 273 } 274 275 # Set intial atom identifier type.. 276 # 277 sub SetAtomIdentifierType { 278 my($This, $IdentifierType) = @_; 279 280 if ($IdentifierType !~ /^(AtomicInvariantsAtomTypes|FunctionalClassAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { 281 croak "Error: ${ClassName}->SetAtomIdentifierType: Specified value, $IdentifierType, for AtomIdentifierType is not vaild. Supported types in current release of MayaChemTools: AtomicInvariantsAtomTypes, FunctionalClassAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes and UFFAtomTypes."; 282 } 283 284 if ($This->{AtomIdentifierType}) { 285 croak "Error: ${ClassName}->SetAtomIdentifierType: Can't change intial atom identifier type: It's already set..."; 286 } 287 288 $This->{AtomIdentifierType} = $IdentifierType; 289 290 # Initialize identifier type information... 291 $This->_InitializeAtomIdentifierTypeInformation(); 292 293 return $This; 294 } 295 296 # Set atom neighborhood radius... 297 # 298 sub SetNeighborhoodRadius { 299 my($This, $Value) = @_; 300 301 if (!TextUtil::IsInteger($Value)) { 302 croak "Error: ${ClassName}->SetNeighborhoodRadius: NeighborhoodRadius value, $Value, is not valid: It must be an integer..."; 303 } 304 305 if ($Value < 0 ) { 306 croak "Error: ${ClassName}->SetNeighborhoodRadius: NeighborhoodRadius value, $Value, is not valid: It must be >= 0..."; 307 } 308 $This->{NeighborhoodRadius} = $Value; 309 310 return $This; 311 } 312 313 # Generate fingerprints description... 314 # 315 sub GetDescription { 316 my($This) = @_; 317 318 # Is description explicity set? 319 if (exists $This->{Description}) { 320 return $This->{Description}; 321 } 322 323 # Generate fingerprints description... 324 325 return "$This->{Type}:$This->{AtomIdentifierType}:Radius$This->{NeighborhoodRadius}"; 326 } 327 328 # Generate fingerprints... 329 # 330 # Methodology: 331 # . Assign initial atom identfiers to all non-hydrogen atoms in the molecule 332 # 333 # . Remove duplicates from the initial identifiers and add them to list corresponding 334 # to molecule fingerprint 335 # 336 # . For NeighborhoodRadius value of 0, just return the molecule fingerprint list 337 # 338 # . For each NeighborhoodRadius level 339 # . For each non-hydrogen CentralAtom at this NeighborhoodRadius level 340 # . For each non-hydrogen SuccessorNeighborAtom 341 # . Collect (BondOrder AtomIdentifier) pair of values corresponding to 342 # (CentralAtom SuccessorNeighborAtom) and add it to a list 343 # 344 # . Sort list containing (BondOrder AtomIdentifier) pairs first by BondOrder followed 345 # by AtomIdendifiers to make these values graph invariant 346 # . Generate a hash code for the values in the list 347 # . Assign hash code as new atom identifier at the current NeighborhoodRadius level 348 # . Save all atoms and bonds corresponding to the substructure involved in 349 # generating the hash code to be used for identifying structural duplicate hash code 350 # 351 # . Add the new identifier to the molecule fingerprint list making sure it's not a duplicate 352 # identifier 353 # 354 # Hash code atom identifier deduplication: 355 # . Track/remove the identifier generated at higher neighborhood radius level 356 # 357 # Structural atom identifier deduplication: 358 # . For equivalent atoms and bonds corresponding to substructure at a NeighborhoodRadius level, 359 # track/remove the atom identifier with largest value 360 # 361 # 362 sub GenerateFingerprints { 363 my($This) = @_; 364 365 # Cache appropriate molecule data... 366 $This->_SetupMoleculeDataCache(); 367 368 # Assign intial atom identifers... 369 if (!$This->_AssignInitialAtomIdentifiers()) { 370 carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't assign valid $This->{AtomIdentifierType} to all atoms..."; 371 return $This; 372 } 373 374 # Identify atom neighborhoods up to specified radius... 375 $This->_GetAtomNeighborhoods(); 376 377 # Assign atom identifiers to central atoms considering atom neighborhoods at each 378 # radius level... 379 $This->_AssignAtomIdentifiersToAtomNeighborhoods(); 380 381 # Remove duplicates identifiers... 382 $This->_RemoveDuplicateAtomIdentifiers(); 383 384 # Set final fingerprints... 385 $This->_SetFinalFingerprints(); 386 387 # Clear cached molecule data... 388 $This->_ClearMoleculeDataCache(); 389 390 return $This; 391 } 392 393 # Assign appropriate initial atom identifiers... 394 # 395 # Generation of initial identifier for a specific atom involves: 396 # . Values of the specified atom attributes are appended in a specific order to 397 # generate an initial atom identifier string 398 # . A 32 bit unsigned integer hash key, using TextUtil::HashCode function, is 399 # generated for the atom indentifier and assigned to the atom as initial 400 # atom identifier. 401 # 402 sub _AssignInitialAtomIdentifiers { 403 my($This) = @_; 404 my($Atom, $AtomID, $Radius, $SpecifiedAtomTypes, $IgnoreHydrogens, $AtomType, $InitialAtomTypeString, $InitialAtomIdentifier); 405 406 # Initialize atom identifiers... 407 $This->_InitializeAtomIdentifiers(); 408 409 # Set up atom types... 410 $IgnoreHydrogens = 1; 411 $SpecifiedAtomTypes = undef; 412 413 IDENTIFIERTYPE: { 414 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { 415 $SpecifiedAtomTypes = new AtomTypes::AtomicInvariantsAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'AtomicInvariantsToUse' => $This->{AtomicInvariantsToUse}); 416 last IDENTIFIERTYPE; 417 } 418 419 if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { 420 $SpecifiedAtomTypes = new AtomTypes::FunctionalClassAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'FunctionalClassesToUse' => $This->{FunctionalClassesToUse}); 421 last IDENTIFIERTYPE; 422 } 423 424 if ($This->{AtomIdentifierType} =~ /^DREIDINGAtomTypes$/i) { 425 $SpecifiedAtomTypes = new AtomTypes::DREIDINGAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 426 last IDENTIFIERTYPE; 427 } 428 429 if ($This->{AtomIdentifierType} =~ /^EStateAtomTypes$/i) { 430 $SpecifiedAtomTypes = new AtomTypes::EStateAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 431 last IDENTIFIERTYPE; 432 } 433 434 if ($This->{AtomIdentifierType} =~ /^MMFF94AtomTypes$/i) { 435 $SpecifiedAtomTypes = new AtomTypes::MMFF94AtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 436 last IDENTIFIERTYPE; 437 } 438 439 if ($This->{AtomIdentifierType} =~ /^SLogPAtomTypes$/i) { 440 $SpecifiedAtomTypes = new AtomTypes::SLogPAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 441 last IDENTIFIERTYPE; 442 } 443 444 if ($This->{AtomIdentifierType} =~ /^SYBYLAtomTypes$/i) { 445 $SpecifiedAtomTypes = new AtomTypes::SYBYLAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 446 last IDENTIFIERTYPE; 447 } 448 449 if ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) { 450 $SpecifiedAtomTypes = new AtomTypes::TPSAAtomTypes('Molecule' => $This->{Molecule}, 'IgnorePhosphorus' => 0, 'IgnoreSulfur' => 0); 451 last IDENTIFIERTYPE; 452 } 453 454 if ($This->{AtomIdentifierType} =~ /^UFFAtomTypes$/i) { 455 $SpecifiedAtomTypes = new AtomTypes::UFFAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 456 last IDENTIFIERTYPE; 457 } 458 459 croak "Error: ${ClassName}->_AssignInitialAtomIdentifiers: Couldn't assign intial atom identifiers: InitialAtomIdentifierType $This->{AtomIdentifierType} is not supported..."; 460 } 461 462 # Assign atom types... 463 $SpecifiedAtomTypes->AssignAtomTypes(); 464 465 # Make sure atom types assignment is successful... 466 if (!$SpecifiedAtomTypes->IsAtomTypesAssignmentSuccessful()) { 467 return undef; 468 } 469 470 # Assign atom identifiers at radius 0... 471 $Radius = 0; 472 for $Atom (@{$This->{Atoms}}) { 473 $AtomID = $Atom->GetID(); 474 475 $AtomType = $SpecifiedAtomTypes->GetAtomType($Atom); 476 $InitialAtomTypeString = $AtomType ? $AtomType : 'None'; 477 478 $InitialAtomIdentifier = TextUtil::HashCode($InitialAtomTypeString); 479 $This->{AtomIdentifiers}{$Radius}{$AtomID} = $InitialAtomIdentifier; 480 } 481 482 return $This; 483 } 484 485 # Initialize atom identifiers... 486 # 487 sub _InitializeAtomIdentifiers { 488 my($This) = @_; 489 my($Radius, $CurrentRadius); 490 491 $Radius = $This->{NeighborhoodRadius}; 492 493 %{$This->{AtomIdentifiers}} = (); 494 for $CurrentRadius (0 .. $Radius) { 495 # Atom idenfiers key and value correspond to AtomID and AtomIdentifier 496 %{$This->{AtomIdentifiers}{$CurrentRadius}} = (); 497 498 # Unique and strcuturally unique idenfiers key and value correspond to AtomIdentifier and AtomID 499 %{$This->{UniqueAtomIdentifiers}{$CurrentRadius}} = (); 500 %{$This->{UniqueAtomIdentifiersCount}{$CurrentRadius}} = (); 501 502 %{$This->{StructurallyUniqueAtomIdentifiers}{$CurrentRadius}} = (); 503 %{$This->{StructurallyUniqueAtomIdentifiersCount}{$CurrentRadius}} = (); 504 } 505 506 } 507 508 # Collect atom neighborhoods upto specified neighborhood radius... 509 # 510 sub _GetAtomNeighborhoods { 511 my($This) = @_; 512 my($Atom, $AtomID, $Radius, $CurrentRadius, $Molecule); 513 514 %{$This->{AtomNeighborhoods}} = (); 515 516 $Radius = $This->{NeighborhoodRadius}; 517 if ($Radius < 1) { 518 # At radius level 0, it's just the atoms... 519 return; 520 } 521 522 # Initialize neighborhood at different radii... 523 for $CurrentRadius (0 .. $Radius) { 524 %{$This->{AtomNeighborhoods}{$CurrentRadius}} = (); 525 } 526 527 $Molecule = $This->GetMolecule(); 528 529 # Collect available atom neighborhoods at different at different neighborhood level for each atom... 530 my($AtomsNeighborhoodWithSuccessorAtomsRef); 531 532 for $Atom (@{$This->{Atoms}}) { 533 $AtomID = $Atom->GetID(); 534 $CurrentRadius = 0; 535 for $AtomsNeighborhoodWithSuccessorAtomsRef ($Molecule->GetAtomNeighborhoodsWithSuccessorAtomsAndRadiusUpto($Atom, $Radius)) { 536 $This->{AtomNeighborhoods}{$CurrentRadius}{$AtomID} = $AtomsNeighborhoodWithSuccessorAtomsRef; 537 $CurrentRadius++; 538 } 539 } 540 return $This; 541 } 542 543 # Assign atom identifiers to central atom at each neighborhood radius level... 544 # 545 sub _AssignAtomIdentifiersToAtomNeighborhoods { 546 my($This) = @_; 547 my($Radius, $NextRadius, $Atom, $AtomID, $NeighborhoodAtom, $SuccessorAtom, $SuccessorAtomID, $NeighborhoodAtomSuccessorAtomsRef, $NeighborhoodAtomsWithSuccessorAtomsRef, $Bond, $BondOrder, $SuccessorAtomCount); 548 549 if ($This->{NeighborhoodRadius} < 1) { 550 return; 551 } 552 553 # Go over the atom neighborhoods at each radius upto specified radius and assign atom 554 # indentifiers using their connected successor atoms and their identifiers. 555 # 556 # For a neighborhood atom at a specified radius, the successor connected atoms correpond 557 # to next radius level and the last set of neighorhood atoms don't have any successor connected 558 # atoms. Additionally, radius level 0 just correspond to initial atom identifiers. 559 # 560 # So in order to process atom neighborhood upto specified radius level, the last atom neighborhood 561 # doesn't need to be processed: it gets processed at previous radius level as successor connected 562 # atoms. 563 # 564 RADIUS: for $Radius (0 .. ($This->{NeighborhoodRadius} - 1)) { 565 ATOM: for $Atom (@{$This->{Atoms}}) { 566 $AtomID = $Atom->GetID(); 567 568 # Are there any available atom neighborhoods at this radius? 569 if (!exists $This->{AtomNeighborhoods}{$Radius}{$AtomID}) { 570 next ATOM; 571 } 572 $NextRadius = $Radius + 1; 573 574 # Go over neighborhood atoms and their successor connected atoms at this radius and collect 575 # (BondOrder AtomIdentifier) values for bonded atom pairs. Additionally, keep track of atom and bonds 576 # for the neighorhoods to remove identifieres generated from structurally duplicate features. 577 # 578 my(%BondOrdersAndAtomIdentifiers); 579 580 %BondOrdersAndAtomIdentifiers = (); 581 $SuccessorAtomCount = 0; 582 583 NEIGHBORHOODS: for $NeighborhoodAtomsWithSuccessorAtomsRef (@{$This->{AtomNeighborhoods}{$Radius}{$AtomID}}) { 584 ($NeighborhoodAtom, $NeighborhoodAtomSuccessorAtomsRef) = @{$NeighborhoodAtomsWithSuccessorAtomsRef}; 585 586 # Any connected successors for the NeighborhoodAtom? 587 if (!@{$NeighborhoodAtomSuccessorAtomsRef}) { 588 next NEIGHBORHOODS; 589 } 590 SUCCESSORATOM: for $SuccessorAtom (@{$NeighborhoodAtomSuccessorAtomsRef}) { 591 if ($SuccessorAtom->IsHydrogen()) { 592 # Skip successor hydrogen atom... 593 next SUCCESSORATOM; 594 } 595 $SuccessorAtomID = $SuccessorAtom->GetID(); 596 $SuccessorAtomCount++; 597 598 $Bond = $NeighborhoodAtom->GetBondToAtom($SuccessorAtom); 599 $BondOrder = $Bond->IsAromatic() ? "1.5" : $Bond->GetBondOrder(); 600 601 if (!exists $BondOrdersAndAtomIdentifiers{$BondOrder}) { 602 @{$BondOrdersAndAtomIdentifiers{$BondOrder}} = (); 603 } 604 push @{$BondOrdersAndAtomIdentifiers{$BondOrder}}, $This->{AtomIdentifiers}{$Radius}{$SuccessorAtomID}; 605 } 606 } 607 if (!$SuccessorAtomCount) { 608 next ATOM; 609 } 610 # Assign a new atom identifier at the NextRadius level... 611 $This->_AssignAtomIdentifierToAtomNeighborhood($AtomID, $Radius, \%BondOrdersAndAtomIdentifiers); 612 } 613 } 614 return $This; 615 } 616 617 # Generate and assign atom indentifier for AtomID using atom neighborhood at next radius level... 618 # 619 sub _AssignAtomIdentifierToAtomNeighborhood { 620 my($This, $AtomID, $Radius, $BondOrdersAndAtomIdentifiersRef) = @_; 621 my($NextRadius, $AtomIdentifier, $SuccessorAtomIdentifier, $BondOrder, $AtomIdentifierString, @AtomIndentifiersInfo); 622 623 $NextRadius = $Radius + 1; 624 625 @AtomIndentifiersInfo = (); 626 627 $AtomIdentifier = $This->{AtomIdentifiers}{$Radius}{$AtomID}; 628 push @AtomIndentifiersInfo, ($NextRadius, $AtomIdentifier); 629 630 # Sort out successor atom bond order and identifier pairs by bond order followed by atom identifiers 631 # in order to make the final atom identifier graph invariant... 632 # 633 for $BondOrder (sort { $a <=> $b } keys %{$BondOrdersAndAtomIdentifiersRef}) { 634 for $SuccessorAtomIdentifier (sort { $a <=> $b } @{$BondOrdersAndAtomIdentifiersRef->{$BondOrder}}) { 635 push @AtomIndentifiersInfo, ($BondOrder, $SuccessorAtomIdentifier); 636 } 637 } 638 $AtomIdentifierString = join("", @AtomIndentifiersInfo); 639 $AtomIdentifier = TextUtil::HashCode($AtomIdentifierString); 640 641 # Assign atom identifier to the atom at next radius level... 642 $This->{AtomIdentifiers}{$NextRadius}{$AtomID} = $AtomIdentifier; 643 644 return $This; 645 } 646 647 # Remove duplicates atom identifiers... 648 # 649 sub _RemoveDuplicateAtomIdentifiers { 650 my($This) = @_; 651 652 $This->_RemoveDuplicateIdentifiersByValue(); 653 $This->_RemoveStructurallyDuplicateIdenfiers(); 654 655 return $This; 656 } 657 658 # Remove duplicate identifiers at each radius level by just using their value... 659 # 660 sub _RemoveDuplicateIdentifiersByValue { 661 my($This) = @_; 662 my($Radius, $Atom, $AtomID, $AtomIdentifier); 663 664 for $Radius (0 .. $This->{NeighborhoodRadius}) { 665 ATOM: for $Atom (@{$This->{Atoms}}) { 666 $AtomID = $Atom->GetID(); 667 if (!exists $This->{AtomIdentifiers}{$Radius}{$AtomID}) { 668 next ATOM; 669 } 670 $AtomIdentifier = $This->{AtomIdentifiers}{$Radius}{$AtomID}; 671 if (exists $This->{UniqueAtomIdentifiers}{$Radius}{$AtomIdentifier}) { 672 # It's a duplicate atom idenfier at this radius level... 673 $This->{UniqueAtomIdentifiersCount}{$Radius}{$AtomIdentifier} += 1; 674 next ATOM; 675 } 676 $This->{UniqueAtomIdentifiers}{$Radius}{$AtomIdentifier} = $AtomID; 677 $This->{UniqueAtomIdentifiersCount}{$Radius}{$AtomIdentifier} = 1; 678 } 679 } 680 return $This; 681 } 682 683 # Remove structurally duplicate identifiers at each radius level... 684 # 685 # Methodology: 686 # . For unquie atom identifiers at each radius level, assign complete structure features 687 # in terms all the bonds involved to generate that identifier 688 # . Use the complete structure features to remover atom identifiers which are 689 # structurally equivalent which can also be at earlier radii levels 690 # 691 # 692 sub _RemoveStructurallyDuplicateIdenfiers { 693 my($This) = @_; 694 my($Radius, $AtomID, $AtomIdentifier, $SimilarAtomIdentifierRadius, $SimilarAtomIdentifier); 695 696 # Setup structure features... 697 $This->_SetupStructureFeaturesForAtomIDsInvolvedInUniqueIdentifiers(); 698 699 # Identify structurally unqiue identifiers... 700 for $Radius (0 .. $This->{NeighborhoodRadius}) { 701 ATOMIDENTIFIER: for $AtomIdentifier (sort { $a <=> $b } keys %{$This->{UniqueAtomIdentifiers}{$Radius}}) { 702 $AtomID = $This->{UniqueAtomIdentifiers}{$Radius}{$AtomIdentifier}; 703 704 ($SimilarAtomIdentifierRadius, $SimilarAtomIdentifier) = $This->_FindStructurallySimilarAtomIdentifier($Radius, $AtomID, $AtomIdentifier); 705 if ($SimilarAtomIdentifier) { 706 # Current atom identifier is similar to an earlier structurally unique atom identifier... 707 $This->{StructurallyUniqueAtomIdentifiersCount}{$SimilarAtomIdentifierRadius}{$SimilarAtomIdentifier} += $This->{UniqueAtomIdentifiersCount}{$Radius}{$AtomIdentifier}; 708 next ATOMIDENTIFIER; 709 } 710 $This->{StructurallyUniqueAtomIdentifiers}{$Radius}{$AtomIdentifier} = $AtomID; 711 712 # Set structurally unique atom identifier count to the unique atom identifiers count... 713 $This->{StructurallyUniqueAtomIdentifiersCount}{$Radius}{$AtomIdentifier} = $This->{UniqueAtomIdentifiersCount}{$Radius}{$AtomIdentifier}; 714 } 715 } 716 return $This; 717 } 718 719 # Set final fingerpritns vector... 720 # 721 sub _SetFinalFingerprints { 722 my($This) = @_; 723 724 # Mark successful generation of fingerprints... 725 $This->{FingerprintsGenerated} = 1; 726 727 if ($This->{Type} =~ /^ExtendedConnectivity$/i) { 728 $This->_SetFinalExtendedConnectivityFingerprints(); 729 } 730 elsif ($This->{Type} =~ /^ExtendedConnectivityCount$/i) { 731 $This->_SetFinalExtendedConnectivityCountFingerprints(); 732 } 733 elsif ($This->{Type} =~ /^ExtendedConnectivityBits$/i) { 734 $This->_SetFinalExtendedConnectivityBitsFingerprints(); 735 } 736 737 return $This; 738 } 739 740 # Set final extended connectivity fingerpritns vector... 741 # 742 sub _SetFinalExtendedConnectivityFingerprints { 743 my($This) = @_; 744 my($Radius, $AtomIdentifier, @AtomIdentifiers); 745 746 @AtomIdentifiers = (); 747 748 for $Radius (0 .. $This->{NeighborhoodRadius}) { 749 for $AtomIdentifier (sort { $a <=> $b } keys %{$This->{StructurallyUniqueAtomIdentifiers}{$Radius}}) { 750 push @AtomIdentifiers, $AtomIdentifier; 751 } 752 } 753 # Add atom identifiers to fingerprint vector... 754 $This->{FingerprintsVector}->AddValues(\@AtomIdentifiers); 755 756 return $This; 757 } 758 759 # Set final extended connectivity count fingerpritns vector... 760 # 761 sub _SetFinalExtendedConnectivityCountFingerprints { 762 my($This) = @_; 763 my($Radius, $AtomIdentifier, $AtomIdentifierCount, @AtomIdentifiers, @AtomIdentifiersCount); 764 765 @AtomIdentifiers = (); @AtomIdentifiersCount = (); 766 767 for $Radius (0 .. $This->{NeighborhoodRadius}) { 768 for $AtomIdentifier (sort { $a <=> $b } keys %{$This->{StructurallyUniqueAtomIdentifiers}{$Radius}}) { 769 $AtomIdentifierCount = $This->{StructurallyUniqueAtomIdentifiersCount}{$Radius}{$AtomIdentifier}; 770 push @AtomIdentifiers, $AtomIdentifier; 771 push @AtomIdentifiersCount, $AtomIdentifierCount; 772 } 773 } 774 # Add atom identifiers to fingerprint vector as value IDs... 775 $This->{FingerprintsVector}->AddValueIDs(\@AtomIdentifiers); 776 777 # Add atom identifiers to count to fingerprint vector as values... 778 $This->{FingerprintsVector}->AddValues(\@AtomIdentifiersCount); 779 780 return $This; 781 } 782 783 # Set final extended connectivity bits fingerpritns vector... 784 # 785 sub _SetFinalExtendedConnectivityBitsFingerprints { 786 my($This) = @_; 787 my($Radius, $AtomIdentifier, $FingerprintsBitVector, $Size, $SkipBitPosCheck, $AtomIdentifierBitPos, $SetBitNum); 788 789 $FingerprintsBitVector = $This->{FingerprintsBitVector}; 790 791 $Size = $This->{Size}; 792 793 $SkipBitPosCheck = 1; 794 795 for $Radius (0 .. $This->{NeighborhoodRadius}) { 796 for $AtomIdentifier (keys %{$This->{StructurallyUniqueAtomIdentifiers}{$Radius}}) { 797 # Set random number seed... 798 if ($This->{UsePerlCoreRandom}) { 799 CORE::srand($AtomIdentifier); 800 } 801 else { 802 MathUtil::srandom($AtomIdentifier); 803 } 804 805 # Set bit position... 806 $AtomIdentifierBitPos = $This->{UsePerlCoreRandom} ? int(CORE::rand($Size)) : int(MathUtil::random($Size)); 807 $FingerprintsBitVector->SetBit($AtomIdentifierBitPos, $SkipBitPosCheck); 808 } 809 } 810 return $This; 811 } 812 813 814 # Identify structurally unique identifiers by comparing structure features involved in 815 # generating identifiear by comparing it agains all the previous structurally unique 816 # identifiers... 817 # 818 sub _FindStructurallySimilarAtomIdentifier { 819 my($This, $SpecifiedRadius, $SpecifiedAtomID, $SpecifiedAtomIdentifier) = @_; 820 my($Radius, $AtomID, $AtomIdentifier, $FeatureAtomCount, $FeatureAtomIDsRef, $SpecifiedFeatureAtomID, $SpecifiedFeatureAtomCount, $SpecifiedFeatureAtomIDsRef); 821 822 if ($SpecifiedRadius == 0) { 823 # After duplicate removal by value, all identifier at radius level 0 would be structurally unique... 824 return (undef, undef); 825 } 826 827 $SpecifiedFeatureAtomCount = $This->{StructureFeatures}{AtomCount}{$SpecifiedRadius}{$SpecifiedAtomID}; 828 $SpecifiedFeatureAtomIDsRef = $This->{StructureFeatures}{AtomIDs}{$SpecifiedRadius}{$SpecifiedAtomID}; 829 830 # No need to compare features at radius 0... 831 for $Radius (1 .. $SpecifiedRadius) { 832 ATOMIDENTIFIER: for $AtomIdentifier (keys %{$This->{StructurallyUniqueAtomIdentifiers}{$Radius}}) { 833 $AtomID = $This->{StructurallyUniqueAtomIdentifiers}{$Radius}{$AtomIdentifier}; 834 835 $FeatureAtomCount = $This->{StructureFeatures}{AtomCount}{$Radius}{$AtomID}; 836 $FeatureAtomIDsRef = $This->{StructureFeatures}{AtomIDs}{$Radius}{$AtomID}; 837 838 if ($SpecifiedFeatureAtomCount != $FeatureAtomCount) { 839 # Couldn't be structurally equivalent... 840 next ATOMIDENTIFIER; 841 } 842 for $SpecifiedFeatureAtomID (keys % {$SpecifiedFeatureAtomIDsRef}) { 843 if (! exists $FeatureAtomIDsRef->{$SpecifiedFeatureAtomID}) { 844 # For structural equivalency, all atom in specified feature must also be present in a previously 845 # identified structurally unique structure feature... 846 next ATOMIDENTIFIER; 847 } 848 } 849 # Found structurally equivalent feature... 850 return ($Radius, $AtomIdentifier); 851 } 852 } 853 return (undef, undef); 854 } 855 856 # Setup structure features for atom IDs involved in unique atom identifiers at all 857 # radii level... 858 # 859 sub _SetupStructureFeaturesForAtomIDsInvolvedInUniqueIdentifiers { 860 my($This) = @_; 861 my($Radius, $PreviousRadius, $Atom, $AtomID, $AtomIdentifier, $NeighborhoodAtomID, $NeighborhoodAtomsWithSuccessorAtomsRef, $NeighborhoodAtom, $NeighborhoodAtomSuccessorAtomsRef, %AtomIDs); 862 863 $This->_InitializeStructureFeatures(); 864 865 # Collect atom IDs involved in unique atom identifiers... 866 %AtomIDs = (); 867 for $Radius (0 .. $This->{NeighborhoodRadius}) { 868 for $AtomIdentifier (keys %{$This->{UniqueAtomIdentifiers}{$Radius}}) { 869 $AtomID = $This->{UniqueAtomIdentifiers}{$Radius}{$AtomIdentifier}; 870 $AtomIDs{$AtomID} = $AtomID; 871 } 872 } 873 874 # Setup structure features... 875 for $Radius (0 .. $This->{NeighborhoodRadius}) { 876 for $AtomID (keys %AtomIDs) { 877 my($StructureFeatureAtomCount, %StructureFeatureAtomIDs); 878 879 $StructureFeatureAtomCount = 0; 880 %StructureFeatureAtomIDs = (); 881 882 # Get partial structure features for the atom at previous radius level... 883 $PreviousRadius = $Radius - 1; 884 if ($PreviousRadius >= 0) { 885 $StructureFeatureAtomCount += $This->{StructureFeatures}{AtomCount}{$PreviousRadius}{$AtomID}; 886 %StructureFeatureAtomIDs = %{$This->{StructureFeatures}{AtomIDs}{$PreviousRadius}{$AtomID}}; 887 } 888 889 # Get all neighborhood atom at this radius level... 890 if (exists($This->{AtomNeighborhoods}{$Radius}) && exists($This->{AtomNeighborhoods}{$Radius}{$AtomID})) { 891 NEIGHBORHOODS: for $NeighborhoodAtomsWithSuccessorAtomsRef (@{$This->{AtomNeighborhoods}{$Radius}{$AtomID}}) { 892 ($NeighborhoodAtom, $NeighborhoodAtomSuccessorAtomsRef) = @{$NeighborhoodAtomsWithSuccessorAtomsRef}; 893 if ($NeighborhoodAtom->IsHydrogen()) { 894 next NEIGHBORHOODS; 895 } 896 $NeighborhoodAtomID = $NeighborhoodAtom->GetID(); 897 $StructureFeatureAtomCount++; 898 $StructureFeatureAtomIDs{$NeighborhoodAtomID} = $NeighborhoodAtomID; 899 } 900 } 901 902 # Assign structure features to atom at this radius level... 903 $This->{StructureFeatures}{AtomCount}{$Radius}{$AtomID} = $StructureFeatureAtomCount; 904 $This->{StructureFeatures}{AtomIDs}{$Radius}{$AtomID} = \%StructureFeatureAtomIDs; 905 } 906 } 907 return $This; 908 } 909 910 # Intialize structure features at each radius level... 911 # 912 sub _InitializeStructureFeatures { 913 my($This) = @_; 914 my($Radius, $CurrentRadius, $Atom, $AtomID); 915 916 # Initialize all structure features... 917 918 %{$This->{StructureFeatures}} = (); 919 %{$This->{StructureFeatures}{AtomCount}} = (); 920 %{$This->{StructureFeatures}{AtomIDs}} = (); 921 922 $Radius = $This->{NeighborhoodRadius}; 923 for $CurrentRadius (0 .. $Radius) { 924 # Structure features for at specific radii accessed using atom IDs... 925 %{$This->{StructureFeatures}{AtomCount}{$CurrentRadius}} = (); 926 %{$This->{StructureFeatures}{AtomIDs}{$CurrentRadius}} = (); 927 } 928 return $This; 929 } 930 931 # Cache appropriate molecule data... 932 # 933 sub _SetupMoleculeDataCache { 934 my($This) = @_; 935 936 # Get all non-hydrogen atoms... 937 my($NegateAtomCheckMethod); 938 $NegateAtomCheckMethod = 1; 939 @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms("IsHydrogen", $NegateAtomCheckMethod); 940 941 return $This; 942 } 943 944 # Clear cached molecule data... 945 # 946 sub _ClearMoleculeDataCache { 947 my($This) = @_; 948 949 @{$This->{Atoms}} = (); 950 951 return $This; 952 } 953 954 # Initialize atom indentifier type information... 955 # 956 # Current supported values: 957 # 958 # AtomicInvariantsAtomTypes, FunctionalClassAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, 959 # MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes 960 # 961 sub _InitializeAtomIdentifierTypeInformation { 962 my($This) = @_; 963 964 IDENTIFIERTYPE: { 965 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { 966 $This->_InitializeAtomicInvariantsAtomTypesInformation(); 967 last IDENTIFIERTYPE; 968 } 969 if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { 970 $This->_InitializeFunctionalClassAtomTypesInformation(); 971 last IDENTIFIERTYPE; 972 } 973 if ($This->{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { 974 # Nothing to do for now... 975 last IDENTIFIERTYPE; 976 } 977 carp "Warning: ${ClassName}->_InitializeAtomIdentifierTypeInformation: Unknown atom indentifier type $This->{AtomIdentifierType}..."; 978 } 979 return $This; 980 } 981 982 # Initialize atomic invariants atom types, generated by AtomTypes::AtomicInvariantsAtomTypes 983 # class, to use for generating initial atom identifiers... 984 # 985 # Let: 986 # AS = Atom symbol corresponding to element symbol 987 # 988 # X<n> = Number of non-hydrogen atom neighbors or heavy atoms attached to atom 989 # BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms attached to atom 990 # LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms attached to atom 991 # SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms attached to atom 992 # DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms attached to atom 993 # TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms attached to atom 994 # H<n> = Number of implicit and explicit hydrogens for atom 995 # Ar = Aromatic annotation indicating whether atom is aromatic 996 # RA = Ring atom annotation indicating whether atom is a ring 997 # FC<+n/-n> = Formal charge assigned to atom 998 # MN<n> = Mass number indicating isotope other than most abundant isotope 999 # SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or 3 (triplet) 1000 # 1001 # Then: 1002 # 1003 # Atom type generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to: 1004 # 1005 # AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n> 1006 # 1007 # Except for AS which is a required atomic invariant in atom types, all other atomic invariants are 1008 # optional. 1009 # 1010 # Default atomic invariants used for generating inital atom identifiers are [ Ref 24 ]: 1011 # 1012 # AS, X<n>, BO<n>, H<n>, FC<+n/-n>, MN<n> 1013 # 1014 # In addition to usage of abbreviations for specifying atomic invariants, the following descriptive words 1015 # are also allowed: 1016 # 1017 # X : NumOfNonHydrogenAtomNeighbors or NumOfHeavyAtomNeighbors 1018 # BO : SumOfBondOrdersToNonHydrogenAtoms or SumOfBondOrdersToHeavyAtoms 1019 # LBO : LargestBondOrderToNonHydrogenAtoms or LargestBondOrderToHeavyAtoms 1020 # SB : NumOfSingleBondsToNonHydrogenAtoms or NumOfSingleBondsToHeavyAtoms 1021 # DB : NumOfDoubleBondsToNonHydrogenAtoms or NumOfDoubleBondsToHeavyAtoms 1022 # TB : NumOfTripleBondsToNonHydrogenAtoms or NumOfTripleBondsToHeavyAtoms 1023 # H : NumOfImplicitAndExplicitHydrogens 1024 # Ar : Aromatic 1025 # RA : RingAtom 1026 # FC : FormalCharge 1027 # MN : MassNumber 1028 # SM : SpinMultiplicity 1029 # 1030 sub _InitializeAtomicInvariantsAtomTypesInformation { 1031 my($This) = @_; 1032 1033 # Default atomic invariants to use for generating initial atom identifiers are: AS, X, BO, LBO, H, FC 1034 # 1035 @{$This->{AtomicInvariantsToUse}} = (); 1036 @{$This->{AtomicInvariantsToUse}} = ('AS', 'X', 'BO', 'H', 'FC', 'MN'); 1037 1038 return $This; 1039 } 1040 1041 # Initialize functional class atom types, generated by AtomTypes::FunctionalClassAtomTypes 1042 # class, to use for generating initial atom identifiers... 1043 # 1044 # Let: 1045 # HBD: HydrogenBondDonor 1046 # HBA: HydrogenBondAcceptor 1047 # PI : PositivelyIonizable 1048 # NI : NegativelyIonizable 1049 # Ar : Aromatic 1050 # Hal : Halogen 1051 # H : Hydrophobic 1052 # RA : RingAtom 1053 # CA : ChainAtom 1054 # 1055 # Then: 1056 # 1057 # Functiononal class atom type specification for an atom corresponds to: 1058 # 1059 # Ar.CA.H.HBA.HBD.Hal.NI.PI.RA 1060 # 1061 # Default functional classes used are: HBD, HBA, PI, NI, Ar, Hal 1062 # 1063 # FunctionalAtomTypes are assigned using the following definitions [ Ref 60-61, Ref 65-66 ]: 1064 # 1065 # HydrogenBondDonor: NH, NH2, OH 1066 # HydrogenBondAcceptor: N[!H], O 1067 # PositivelyIonizable: +, NH2 1068 # NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH 1069 # 1070 sub _InitializeFunctionalClassAtomTypesInformation { 1071 my($This) = @_; 1072 1073 # Default functional class atom typess to use for generating initial atom identifiers 1074 # are: HBD, HBA, PI, NI, Ar, Hal 1075 # 1076 @{$This->{FunctionalClassesToUse}} = (); 1077 @{$This->{FunctionalClassesToUse}} = ('HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal'); 1078 1079 return $This; 1080 } 1081 1082 # Set atomic invariants to use for generation of intial atom indentifiers... 1083 # 1084 sub SetAtomicInvariantsToUse { 1085 my($This, @Values) = @_; 1086 my($FirstValue, $TypeOfFirstValue, $AtomicInvariant, $SpecifiedAtomicInvariant, @SpecifiedAtomicInvariants, @AtomicInvariantsToUse); 1087 1088 if (!@Values) { 1089 carp "Warning: ${ClassName}->SetAtomicInvariantsToUse: No values specified..."; 1090 return; 1091 } 1092 1093 if ($This->{AtomIdentifierType} !~ /^AtomicInvariantsAtomTypes$/i) { 1094 carp "Warning: ${ClassName}->SetFunctionalAtomTypesToUse: AtomicInvariantsToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}..."; 1095 return; 1096 } 1097 1098 $FirstValue = $Values[0]; 1099 $TypeOfFirstValue = ref $FirstValue; 1100 1101 @SpecifiedAtomicInvariants = (); 1102 @AtomicInvariantsToUse = (); 1103 1104 if ($TypeOfFirstValue =~ /^ARRAY/) { 1105 push @SpecifiedAtomicInvariants, @{$FirstValue}; 1106 } 1107 else { 1108 push @SpecifiedAtomicInvariants, @Values; 1109 } 1110 1111 # Make sure specified AtomicInvariants are valid... 1112 for $SpecifiedAtomicInvariant (@SpecifiedAtomicInvariants) { 1113 if (!AtomTypes::AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($SpecifiedAtomicInvariant)) { 1114 croak "Error: ${ClassName}->SetAtomicInvariantsToUse: Specified atomic invariant, $SpecifiedAtomicInvariant, is not supported...\n "; 1115 } 1116 $AtomicInvariant = $SpecifiedAtomicInvariant; 1117 push @AtomicInvariantsToUse, $AtomicInvariant; 1118 } 1119 1120 # Set atomic invariants to use... 1121 @{$This->{AtomicInvariantsToUse}} = (); 1122 push @{$This->{AtomicInvariantsToUse}}, @AtomicInvariantsToUse; 1123 1124 return $This; 1125 } 1126 1127 # Set functional classes to use for generation of intial atom indentifiers... 1128 # 1129 sub SetFunctionalClassesToUse { 1130 my($This, @Values) = @_; 1131 my($FirstValue, $TypeOfFirstValue, $FunctionalClass, $SpecifiedFunctionalClass, @SpecifiedFunctionalClasses, @FunctionalClassesToUse); 1132 1133 if (!@Values) { 1134 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: No values specified..."; 1135 return; 1136 } 1137 1138 if ($This->{AtomIdentifierType} !~ /^FunctionalClassAtomTypes$/i) { 1139 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: FunctionalClassesToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}..."; 1140 return; 1141 } 1142 1143 $FirstValue = $Values[0]; 1144 $TypeOfFirstValue = ref $FirstValue; 1145 1146 @SpecifiedFunctionalClasses = (); 1147 @FunctionalClassesToUse = (); 1148 1149 if ($TypeOfFirstValue =~ /^ARRAY/) { 1150 push @SpecifiedFunctionalClasses, @{$FirstValue}; 1151 } 1152 else { 1153 push @SpecifiedFunctionalClasses, @Values; 1154 } 1155 1156 # Make sure specified FunctionalClasses are valid... 1157 for $SpecifiedFunctionalClass (@SpecifiedFunctionalClasses) { 1158 if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedFunctionalClass)) { 1159 croak "Error: ${ClassName}->SetFunctionalClassesToUse: Specified functional class, $SpecifiedFunctionalClass, is not supported...\n "; 1160 } 1161 push @FunctionalClassesToUse, $SpecifiedFunctionalClass; 1162 } 1163 1164 # Set functional classes to use... 1165 @{$This->{FunctionalClassesToUse}} = (); 1166 push @{$This->{FunctionalClassesToUse}}, @FunctionalClassesToUse; 1167 1168 return $This; 1169 } 1170 1171 # Return a string containg data for ExtendedConnectivityFingerprints object... 1172 sub StringifyExtendedConnectivityFingerprints { 1173 my($This) = @_; 1174 my($ExtendedConnectivityFingerprintsString); 1175 1176 $ExtendedConnectivityFingerprintsString = "InitialAtomIdentifierType: $This->{AtomIdentifierType}; NeighborhoodRadius: $This->{NeighborhoodRadius}"; 1177 1178 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { 1179 my($AtomicInvariant, @AtomicInvariants, @AtomicInvariantsOrder, %AvailableAtomicInvariants); 1180 1181 @AtomicInvariantsOrder = AtomTypes::AtomicInvariantsAtomTypes::GetAtomicInvariantsOrder(); 1182 %AvailableAtomicInvariants = AtomTypes::AtomicInvariantsAtomTypes::GetAvailableAtomicInvariants(); 1183 1184 for $AtomicInvariant (@AtomicInvariantsOrder) { 1185 push @AtomicInvariants, "$AtomicInvariant: $AvailableAtomicInvariants{$AtomicInvariant}"; 1186 } 1187 1188 $ExtendedConnectivityFingerprintsString .= "; AtomicInvariantsToUse: <" . TextUtil::JoinWords(\@{$This->{AtomicInvariantsToUse}}, ", ", 0) . ">"; 1189 $ExtendedConnectivityFingerprintsString .= "; AtomicInvariantsOrder: <" . TextUtil::JoinWords(\@AtomicInvariantsOrder, ", ", 0) . ">"; 1190 $ExtendedConnectivityFingerprintsString .= "; AvailableAtomicInvariants: <" . TextUtil::JoinWords(\@AtomicInvariants, ", ", 0) . ">"; 1191 } 1192 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { 1193 my($FunctionalClass, @FunctionalClasses, @FunctionalClassesOrder, %AvailableFunctionalClasses); 1194 1195 @FunctionalClassesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder(); 1196 %AvailableFunctionalClasses = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses(); 1197 1198 for $FunctionalClass (@FunctionalClassesOrder) { 1199 push @FunctionalClasses, "$FunctionalClass: $AvailableFunctionalClasses{$FunctionalClass}"; 1200 } 1201 1202 $ExtendedConnectivityFingerprintsString .= "; FunctionalClassesToUse: <" . TextUtil::JoinWords(\@{$This->{FunctionalClassesToUse}}, ", ", 0) . ">"; 1203 $ExtendedConnectivityFingerprintsString .= "; FunctionalClassesOrder: <" . TextUtil::JoinWords(\@FunctionalClassesOrder, ", ", 0) . ">"; 1204 $ExtendedConnectivityFingerprintsString .= "; AvailableFunctionalClasses: <" . TextUtil::JoinWords(\@FunctionalClasses, ", ", 0) . ">"; 1205 } 1206 1207 if ($This->{Type} =~ /^ExtendedConnectivityBits$/i) { 1208 # Size... 1209 $ExtendedConnectivityFingerprintsString .= "; Size: $This->{Size}; MinSize: $This->{MinSize}; MaxSize: $This->{MaxSize}"; 1210 1211 # Fingerprint bit density and num of bits set... 1212 my($NumOfSetBits, $BitDensity); 1213 $NumOfSetBits = $This->{FingerprintsBitVector}->GetNumOfSetBits(); 1214 $BitDensity = $This->{FingerprintsBitVector}->GetFingerprintsBitDensity(); 1215 $ExtendedConnectivityFingerprintsString .= "; NumOfOnBits: $NumOfSetBits; BitDensity: $BitDensity"; 1216 1217 $ExtendedConnectivityFingerprintsString .= "; FingerprintsBitVector: < $This->{FingerprintsBitVector} >"; 1218 } 1219 else { 1220 # Number of identifiers... 1221 $ExtendedConnectivityFingerprintsString .= "; NumOfIdentifiers: " . $This->{FingerprintsVector}->GetNumOfValues(); 1222 1223 # FingerprintsVector... 1224 $ExtendedConnectivityFingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >"; 1225 } 1226 1227 return $ExtendedConnectivityFingerprintsString; 1228 } 1229