1 package Fingerprints::AtomTypesFingerprints; 2 # 3 # $RCSfile: AtomTypesFingerprints.pm,v $ 4 # $Date: 2015/02/28 20:48:54 $ 5 # $Revision: 1.22 $ 6 # 7 # Author: Manish Sud <msud@san.rr.com> 8 # 9 # Copyright (C) 2015 Manish Sud. All rights reserved. 10 # 11 # This file is part of MayaChemTools. 12 # 13 # MayaChemTools is free software; you can redistribute it and/or modify it under 14 # the terms of the GNU Lesser General Public License as published by the Free 15 # Software Foundation; either version 3 of the License, or (at your option) any 16 # later version. 17 # 18 # MayaChemTools is distributed in the hope that it will be useful, but without 19 # any warranty; without even the implied warranty of merchantability of fitness 20 # for a particular purpose. See the GNU Lesser General Public License for more 21 # details. 22 # 23 # You should have received a copy of the GNU Lesser General Public License 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 26 # Boston, MA, 02111-1307, USA. 27 # 28 29 use strict; 30 use Carp; 31 use Exporter; 32 use Text::ParseWords; 33 use TextUtil (); 34 use FileUtil (); 35 use MathUtil (); 36 use Fingerprints::Fingerprints; 37 use Molecule; 38 use AtomTypes::AtomicInvariantsAtomTypes; 39 use AtomTypes::DREIDINGAtomTypes; 40 use AtomTypes::EStateAtomTypes; 41 use AtomTypes::FunctionalClassAtomTypes; 42 use AtomTypes::MMFF94AtomTypes; 43 use AtomTypes::SLogPAtomTypes; 44 use AtomTypes::SYBYLAtomTypes; 45 use AtomTypes::TPSAAtomTypes; 46 use AtomTypes::UFFAtomTypes; 47 48 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 49 50 @ISA = qw(Fingerprints::Fingerprints Exporter); 51 @EXPORT = qw(); 52 @EXPORT_OK = qw(); 53 54 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); 55 56 # Setup class variables... 57 my($ClassName); 58 _InitializeClass(); 59 60 # Overload Perl functions... 61 use overload '""' => 'StringifyAtomTypesFingerprints'; 62 63 # Class constructor... 64 sub new { 65 my($Class, %NamesAndValues) = @_; 66 67 # Initialize object... 68 my $This = $Class->SUPER::new(); 69 bless $This, ref($Class) || $Class; 70 $This->_InitializeAtomTypesFingerprints(); 71 72 $This->_InitializeAtomTypesFingerprintsProperties(%NamesAndValues); 73 74 return $This; 75 } 76 77 # Initialize object data... 78 # 79 sub _InitializeAtomTypesFingerprints { 80 my($This) = @_; 81 82 # Type of atom type fingerprint to generate: 83 # 84 # AtomTypesCount - A vector containing count of atom types 85 # AtomTypesBits - A bit vector indicating presence/absence of atom types 86 # 87 $This->{Type} = ''; 88 89 # AtomTypes to use for generating fingerprints... 90 # 91 # Currently supported values are: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, 92 # EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, 93 # SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes 94 # 95 $This->{AtomIdentifierType} = ''; 96 97 # AtomTypesSetToUse for AtomTypesCount: 98 # 99 # ArbitrarySize - Corrresponds to only AtomTypes atom types detected in molecule 100 # FixedSize - Corresponds to fixed number of atom types previously defined for 101 # specific atom types. 102 # 103 # The default AtomTypesSetToUse value for AtomTypesCount fingerprints type: ArbitrarySize. 104 # 105 # Possible values: ArbitrarySize or FixedSize. However, for AtomTypesBits fingerprints type, only FixedSize 106 # value is allowed. 107 # 108 $This->{AtomTypesSetToUse} = ''; 109 110 # By default, hydrogens are ignored during fingerprint generation... 111 $This->{IgnoreHydrogens} = 1; 112 113 # Assigned AtomTypes atom types... 114 %{$This->{AtomTypes}} = (); 115 116 # AtomTypes atom types count for generating atom types count and bits fingerprints... 117 %{$This->{AtomTypesCount}} = (); 118 } 119 120 # Initialize class ... 121 sub _InitializeClass { 122 #Class name... 123 $ClassName = __PACKAGE__; 124 } 125 126 # Initialize object properties.... 127 sub _InitializeAtomTypesFingerprintsProperties { 128 my($This, %NamesAndValues) = @_; 129 130 my($Name, $Value, $MethodName); 131 while (($Name, $Value) = each %NamesAndValues) { 132 $MethodName = "Set${Name}"; 133 $This->$MethodName($Value); 134 } 135 136 # Make sure molecule object was specified... 137 if (!exists $NamesAndValues{Molecule}) { 138 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule..."; 139 } 140 141 # Make sure type and identifier type were specified... 142 if (!exists $NamesAndValues{Type}) { 143 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying type..."; 144 } 145 if (!exists $NamesAndValues{AtomIdentifierType}) { 146 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying AtomIdentifierType..."; 147 } 148 149 if ($This->{Type} =~ /^AtomTypesCount$/i) { 150 $This->_InitializeAtomTypesCount(); 151 } 152 elsif ($This->{Type} =~ /^AtomTypesBits$/i) { 153 $This->_InitializeAtomTypesBits(); 154 } 155 else { 156 croak "Error: ${ClassName}->_InitializeAtomTypesFingerprintsProperties: Unknown AtomTypes fingerprints type: $This->{Type}; Supported fingerprints types: AtomTypesCount or AtomTypesBits..."; 157 } 158 159 return $This; 160 } 161 162 # Initialize atom type counts... 163 # 164 sub _InitializeAtomTypesCount { 165 my($This) = @_; 166 167 # Set default AtomTypesSetToUse... 168 if (!$This->{AtomTypesSetToUse}) { 169 $This->{AtomTypesSetToUse} = ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) ? 'FixedSize' : 'ArbitrarySize'; 170 } 171 172 # Make sure AtomTypesSetToUse value is okay... 173 $This->_ValidateAtomTypesSetToUse($This->{AtomTypesSetToUse}); 174 175 # Vector type and type of values... 176 $This->{VectorType} = 'FingerprintsVector'; 177 178 if ($This->{AtomTypesSetToUse} =~ /^FixedSize$/i) { 179 $This->{FingerprintsVectorType} = 'OrderedNumericalValues'; 180 } 181 else { 182 $This->{FingerprintsVectorType} = 'NumericalValues'; 183 } 184 185 $This->_InitializeFingerprintsVector(); 186 187 return $This; 188 } 189 190 # Initialize atom types bits... 191 # 192 sub _InitializeAtomTypesBits { 193 my($This) = @_; 194 195 # Set default AtomTypesSetToUse... 196 $This->{AtomTypesSetToUse} = 'FixedSize'; 197 198 # Make sure AtomTypesSetToUse value is okay... 199 $This->_ValidateAtomTypesSetToUse($This->{AtomTypesSetToUse}); 200 201 # Vector type... 202 $This->{VectorType} = 'FingerprintsBitVector'; 203 204 # Vector size... 205 $This->{Size} = $This->_GetFixedSizeAtomTypesSetSize(); 206 207 $This->_InitializeFingerprintsBitVector(); 208 209 return $This; 210 } 211 212 # Set type... 213 # 214 sub SetType { 215 my($This, $Type) = @_; 216 217 if ($This->{Type}) { 218 croak "Error: ${ClassName}->SetType: Can't change type: It's already set..."; 219 } 220 221 if ($Type =~ /^AtomTypesCount$/i) { 222 $This->{Type} = 'AtomTypesCount';; 223 } 224 elsif ($Type =~ /^AtomTypesBits$/i) { 225 $This->{Type} = 'AtomTypesBits';; 226 } 227 else { 228 croak "Error: ${ClassName}->SetType: Unknown AtomTypes fingerprints type: $Type; Supported fingerprints types: AtomTypesCount or AtomTypesBit..."; 229 } 230 return $This; 231 } 232 233 # Disable set size method... 234 # 235 sub SetSize { 236 my($This, $Type) = @_; 237 238 croak "Error: ${ClassName}->SetSize: Can't change size: It's not allowed..."; 239 } 240 241 # Set atom types set to use... 242 # 243 sub SetAtomTypesSetToUse { 244 my($This, $Value) = @_; 245 246 if ($This->{AtomTypesSetToUse}) { 247 croak "Error: ${ClassName}->SetAtomTypesSetToUse: Can't change size: It's already set..."; 248 } 249 250 $This->_ValidateAtomTypesSetToUse($Value); 251 252 $This->{AtomTypesSetToUse} = $Value; 253 254 return $This; 255 } 256 257 # Validate AtomTypesSetToUse value... 258 # 259 sub _ValidateAtomTypesSetToUse { 260 my($This, $Value) = @_; 261 262 if ($Value !~ /^(ArbitrarySize|FixedSize)/i) { 263 croak "Error: ${ClassName}->_ValidateAtomTypesSetToUse: Unknown AtomTypesSetToUse value: $Value; Supported values: ArbitrarySize or FixedSize"; 264 } 265 266 if ($Value =~ /^ArbitrarySize$/i && $This->{Type} =~ /^AtomTypesBits$/i) { 267 croak "Error: ${ClassName}->_ValidateAtomTypesSetToUse: Specified AtomTypesSetToUse value, $Value, is not allowed for AtomTypesBits fingerprints..."; 268 } 269 270 if ($Value =~ /^FixedSize$/i && $This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { 271 croak "Error: ${ClassName}->_ValidateAtomTypesSetToUse: Specified AtomTypesSetToUse value, $Value, is not allowed for AtomicInvariantsAtomTypes fingerprints..."; 272 } 273 274 if ($Value =~ /^FixedSize$/i && $This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { 275 croak "Error: ${ClassName}->_ValidateAtomTypesSetToUse: Specified AtomTypesSetToUse value, $Value, is not allowed for FunctionalClassAtomTypes fingerprints..."; 276 } 277 278 if ($Value =~ /^ArbitrarySize$/i && $This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) { 279 croak "Error: ${ClassName}->_ValidateAtomTypesSetToUse: Specified AtomTypesSetToUse value, $Value, is not allowed for TPSAAtomTypes fingerprints..."; 280 } 281 282 return $This; 283 } 284 285 # Set atom identifier type... 286 # 287 sub SetAtomIdentifierType { 288 my($This, $IdentifierType) = @_; 289 290 if ($IdentifierType !~ /^(AtomicInvariantsAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|FunctionalClassAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { 291 croak "Error: ${ClassName}->SetAtomIdentifierType: Specified value, $IdentifierType, for AtomIdentifierType is not vaild. Supported types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, and UFFAtomTypes."; 292 } 293 294 if ($This->{AtomIdentifierType}) { 295 croak "Error: ${ClassName}->SetAtomIdentifierType: Can't change intial atom identifier type: It's already set..."; 296 } 297 298 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i && $This->{AtomTypesSetToUse} =~ /^FixedSize$/i) { 299 croak "Error: ${ClassName}->SetAtomIdentifierType: Specified AtomTypesSetToUse value, $IdentifierType, is not allowed for AtomicInvariantsAtomTypes fingerprints..."; 300 } 301 302 if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i && $This->{AtomTypesSetToUse} =~ /^FixedSize$/i) { 303 croak "Error: ${ClassName}->SetAtomIdentifierType: Specified AtomTypesSetToUse value, $IdentifierType, is not allowed for FunctionalClassAtomTypes fingerprints..."; 304 } 305 306 $This->{AtomIdentifierType} = $IdentifierType; 307 308 # Initialize atom identifier type information... 309 $This->_InitializeAtomIdentifierTypeInformation(); 310 311 return $This; 312 } 313 314 # Generate fingerprints description... 315 # 316 sub GetDescription { 317 my($This) = @_; 318 319 # Is description explicity set? 320 if (exists $This->{Description}) { 321 return $This->{Description}; 322 } 323 324 # Generate fingerprints description... 325 326 return "$This->{Type}:$This->{AtomIdentifierType}:$This->{AtomTypesSetToUse}"; 327 } 328 329 # Generate atom types fingerprints... 330 # 331 # The current release of MayaChemTools supports generation of two types of AtomTypes 332 # fingerprints corresponding to non-hydrogen and/or hydrogen atoms: 333 # 334 # AtomTypesCount - A vector containing count of atom types 335 # AtomTypesBits - A bit vector indicating presence/absence of atom types 336 # 337 # For AtomTypesCount fingerprints, two types of atom types set size is allowed: 338 # 339 # ArbitrarySize - Corrresponds to only atom types detected in molecule 340 # FixedSize - Corresponds to fixed number of atom types previously defined 341 # 342 # For AtomTypesBits fingeprints, only FixedSize atom type set is allowed. 343 # 344 # The fixed size atom type set size used during generation of fingerprints corresponding 345 # to FixedSize value of AtomTypesSetToUse contains all possible atom types in datafiles 346 # distributed with MayaChemTools release for each supported type. 347 # 348 # Combination of Type and AtomTypesSetToUse allow generation of 21 different types of 349 # AtomTypes fingerprints: 350 # 351 # Type AtomIdentifierType AtomTypesSetToUse 352 # 353 # AtomTypesCount AtomicInvariantsAtomTypes ArbitrarySize 354 # 355 # AtomTypesCount DREIDINGAtomTypes ArbitrarySize 356 # AtomTypesCount DREIDINGAtomTypes FixedSize 357 # AtomTypesBits DREIDINGAtomTypes FixedSize 358 # 359 # AtomTypesCount EStateAtomTypes ArbitrarySize 360 # AtomTypesCount EStateAtomTypes FixedSize 361 # AtomTypesBits EStateAtomTypes FixedSize 362 # 363 # AtomTypesCount FunctionalClassAtomTypes ArbitrarySize 364 # 365 # AtomTypesCount MMFF94AtomTypes ArbitrarySize 366 # AtomTypesCount MMFF94AtomTypes FixedSize 367 # AtomTypesBits MMFF94AtomTypes FixedSize 368 # 369 # AtomTypesCount SLogPAtomTypes ArbitrarySize 370 # AtomTypesCount SLogPAtomTypes FixedSize 371 # AtomTypesBits SLogPAtomTypes FixedSize 372 # 373 # AtomTypesCount SYBYLAtomTypes ArbitrarySize 374 # AtomTypesCount SYBYLAtomTypes FixedSize 375 # AtomTypesBits SYBYLAtomTypes FixedSize 376 # 377 # AtomTypesCount TPSAAtomTypes FixedSize 378 # AtomTypesBits TPSAAtomTypes FixedSize 379 # 380 # AtomTypesCount UFFAtomTypes ArbitrarySize 381 # AtomTypesCount UFFAtomTypes FixedSize 382 # AtomTypesBits UFFAtomTypes FixedSize 383 # 384 sub GenerateFingerprints { 385 my($This) = @_; 386 387 # Cache appropriate molecule data... 388 $This->_SetupMoleculeDataCache(); 389 390 # Check and assign appropriate atom types... 391 if (!$This->_AssignAtomTypes()) { 392 carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't assign valid $This->{AtomIdentifierType} to all atoms..."; 393 return $This; 394 } 395 396 # Count atom types... 397 $This->_CountAtomTypes(); 398 399 # Set final fingerprints... 400 $This->_SetFinalFingerprints(); 401 402 # Clear cached molecule data... 403 $This->_ClearMoleculeDataCache(); 404 405 return $This; 406 } 407 408 # Assign appropriate atom types... 409 # 410 sub _AssignAtomTypes { 411 my($This) = @_; 412 my($SpecifiedAtomTypes, $Atom, $AtomID); 413 414 %{$This->{AtomTypes}} = (); 415 $SpecifiedAtomTypes = undef; 416 417 IDENTIFIERTYPE: { 418 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { 419 $SpecifiedAtomTypes = new AtomTypes::AtomicInvariantsAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $This->{IgnoreHydrogens}, 'AtomicInvariantsToUse' => $This->{AtomicInvariantsToUse}); 420 last IDENTIFIERTYPE; 421 } 422 423 if ($This->{AtomIdentifierType} =~ /^DREIDINGAtomTypes$/i) { 424 $SpecifiedAtomTypes = new AtomTypes::DREIDINGAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $This->{IgnoreHydrogens}); 425 last IDENTIFIERTYPE; 426 } 427 428 if ($This->{AtomIdentifierType} =~ /^EStateAtomTypes$/i) { 429 $SpecifiedAtomTypes = new AtomTypes::EStateAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $This->{IgnoreHydrogens}); 430 last IDENTIFIERTYPE; 431 } 432 433 if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { 434 $SpecifiedAtomTypes = new AtomTypes::FunctionalClassAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $This->{IgnoreHydrogens}, 'FunctionalClassesToUse' => $This->{FunctionalClassesToUse}); 435 last IDENTIFIERTYPE; 436 } 437 438 if ($This->{AtomIdentifierType} =~ /^MMFF94AtomTypes$/i) { 439 $SpecifiedAtomTypes = new AtomTypes::MMFF94AtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $This->{IgnoreHydrogens}); 440 last IDENTIFIERTYPE; 441 } 442 443 if ($This->{AtomIdentifierType} =~ /^SLogPAtomTypes$/i) { 444 $SpecifiedAtomTypes = new AtomTypes::SLogPAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $This->{IgnoreHydrogens}); 445 last IDENTIFIERTYPE; 446 } 447 if ($This->{AtomIdentifierType} =~ /^SYBYLAtomTypes$/i) { 448 $SpecifiedAtomTypes = new AtomTypes::SYBYLAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $This->{IgnoreHydrogens}); 449 last IDENTIFIERTYPE; 450 } 451 452 if ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) { 453 $SpecifiedAtomTypes = new AtomTypes::TPSAAtomTypes('Molecule' => $This->{Molecule}, 'IgnorePhosphorus' => 0, 'IgnoreSulfur' => 0); 454 last IDENTIFIERTYPE; 455 } 456 457 if ($This->{AtomIdentifierType} =~ /^UFFAtomTypes$/i) { 458 $SpecifiedAtomTypes = new AtomTypes::UFFAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $This->{IgnoreHydrogens}); 459 last IDENTIFIERTYPE; 460 } 461 462 croak "Error: ${ClassName}->_AssignAtomTypes: Unknown atom indentifier type $This->{AtomIdentifierType}..."; 463 } 464 465 # Assign atom types... 466 $SpecifiedAtomTypes->AssignAtomTypes(); 467 468 # Make sure atom types assignment is successful... 469 if (!$SpecifiedAtomTypes->IsAtomTypesAssignmentSuccessful()) { 470 return undef; 471 } 472 473 # Collect assigned atom types... 474 for $Atom (@{$This->{Atoms}}) { 475 $AtomID = $Atom->GetID(); 476 $This->{AtomTypes}{$AtomID} = $SpecifiedAtomTypes->GetAtomType($Atom); 477 } 478 479 return $This; 480 } 481 482 # Count atom types... 483 # 484 sub _CountAtomTypes { 485 my($This) = @_; 486 my($Atom, $AtomID, $AtomType); 487 488 %{$This->{AtomTypesCount}} = (); 489 490 for $Atom (@{$This->{Atoms}}) { 491 $AtomID = $Atom->GetID(); 492 $AtomType = $This->{AtomTypes}{$AtomID}; 493 494 if (!exists $This->{AtomTypesCount}{$AtomType}) { 495 $This->{AtomTypesCount}{$AtomType} = 0; 496 } 497 498 $This->{AtomTypesCount}{$AtomType} += 1; 499 } 500 return $This; 501 } 502 503 # Set final fingerprints... 504 # 505 sub _SetFinalFingerprints { 506 my($This) = @_; 507 508 # Mark successful generation of fingerprints... 509 $This->{FingerprintsGenerated} = 1; 510 511 if ($This->{Type} =~ /^AtomTypesCount$/i) { 512 $This->_SetFinalAtomTypesCountFingerprints(); 513 } 514 elsif ($This->{Type} =~ /^AtomTypesBits$/i) { 515 $This->_SetFinalAtomTypesBitsFingerprints(); 516 } 517 return $This; 518 } 519 520 # Set final final fingerpritns for atom types count... 521 # 522 sub _SetFinalAtomTypesCountFingerprints { 523 my($This) = @_; 524 my($AtomType, @Values, @IDs); 525 526 @Values = (); 527 @IDs = (); 528 529 if ($This->{AtomTypesSetToUse} =~ /^FixedSize$/i) { 530 for $AtomType (@{$This->_GetFixedSizeAtomTypesSet()}) { 531 push @IDs, $AtomType; 532 push @Values, exists($This->{AtomTypesCount}{$AtomType}) ? $This->{AtomTypesCount}{$AtomType} : 0; 533 } 534 } 535 else { 536 for $AtomType (sort keys %{$This->{AtomTypesCount}}) { 537 push @IDs, $AtomType; 538 push @Values, $This->{AtomTypesCount}{$AtomType}; 539 } 540 } 541 542 # Add IDs and values to fingerprint vector... 543 if (@IDs) { 544 $This->{FingerprintsVector}->AddValueIDs(\@IDs); 545 } 546 $This->{FingerprintsVector}->AddValues(\@Values); 547 548 return $This; 549 } 550 551 # Set final final fingerpritns for atom types count bits... 552 # 553 sub _SetFinalAtomTypesBitsFingerprints { 554 my($This) = @_; 555 my($AtomType, $SkipPosCheck, $AtomTypeNum, $AtomTypeBitIndex); 556 557 $SkipPosCheck = 1; 558 $AtomTypeNum = 0; 559 560 ATOMTYPE: for $AtomType (@{$This->_GetFixedSizeAtomTypesSet()}) { 561 $AtomTypeNum++; 562 if (!(exists($This->{AtomTypesCount}{$AtomType}) && $This->{AtomTypesCount}{$AtomType})) { 563 next ATOMTYPE; 564 } 565 $AtomTypeBitIndex = $AtomTypeNum - 1; 566 $This->{FingerprintsBitVector}->SetBit($AtomTypeBitIndex, $SkipPosCheck); 567 } 568 569 return $This; 570 } 571 572 # Cache appropriate molecule data... 573 # 574 sub _SetupMoleculeDataCache { 575 my($This) = @_; 576 577 if ($This->{IgnoreHydrogens}) { 578 # Get all non-hydrogen atoms... 579 my($NegateAtomCheckMethod); 580 $NegateAtomCheckMethod = 1; 581 582 @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms("IsHydrogen", $NegateAtomCheckMethod); 583 } 584 else { 585 @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms(); 586 } 587 588 return $This; 589 } 590 591 # Clear cached molecule data... 592 # 593 sub _ClearMoleculeDataCache { 594 my($This) = @_; 595 596 @{$This->{Atoms}} = (); 597 598 return $This; 599 } 600 601 # Get fixed size atom types set size... 602 # 603 sub _GetFixedSizeAtomTypesSetSize { 604 my($This) = @_; 605 my($Size); 606 607 $Size = 0; 608 609 IDENTIFIERTYPE: { 610 if ($This->{AtomIdentifierType} =~ /^DREIDINGAtomTypes$/i) { 611 $Size = $This->{IgnoreHydrogens} ? scalar @{AtomTypes::DREIDINGAtomTypes::GetAllPossibleDREIDINGNonHydrogenAtomTypes()} : scalar @{AtomTypes::DREIDINGAtomTypes::GetAllPossibleDREIDINGAtomTypes()}; 612 last IDENTIFIERTYPE; 613 } 614 615 if ($This->{AtomIdentifierType} =~ /^EStateAtomTypes$/i) { 616 $Size = $This->{IgnoreHydrogens} ? scalar @{AtomTypes::EStateAtomTypes::GetAllPossibleEStateNonHydrogenAtomTypes()} : scalar @{AtomTypes::EStateAtomTypes::GetAllPossibleEStateAtomTypes()}; 617 last IDENTIFIERTYPE; 618 } 619 620 if ($This->{AtomIdentifierType} =~ /^MMFF94AtomTypes$/i) { 621 $Size = $This->{IgnoreHydrogens} ? scalar @{AtomTypes::MMFF94AtomTypes::GetAllPossibleMMFF94NonHydrogenAtomTypes()} : scalar @{AtomTypes::MMFF94AtomTypes::GetAllPossibleMMFF94AtomTypes()}; 622 last IDENTIFIERTYPE; 623 } 624 625 if ($This->{AtomIdentifierType} =~ /^SLogPAtomTypes$/i) { 626 $Size = $This->{IgnoreHydrogens} ? scalar @{AtomTypes::SLogPAtomTypes::GetAllPossibleSLogPNonHydrogenAtomTypes()} : scalar @{AtomTypes::SLogPAtomTypes::GetAllPossibleSLogPAtomTypes()}; 627 last IDENTIFIERTYPE; 628 } 629 630 if ($This->{AtomIdentifierType} =~ /^SYBYLAtomTypes$/i) { 631 $Size = $This->{IgnoreHydrogens} ? scalar @{AtomTypes::SYBYLAtomTypes::GetAllPossibleSYBYLNonHydrogenAtomTypes()} : scalar @{AtomTypes::SYBYLAtomTypes::GetAllPossibleSYBYLAtomTypes()}; 632 last IDENTIFIERTYPE; 633 } 634 635 if ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) { 636 $Size = scalar @{AtomTypes::TPSAAtomTypes::GetAllPossibleTPSAAtomTypes()}; 637 last IDENTIFIERTYPE; 638 } 639 640 if ($This->{AtomIdentifierType} =~ /^UFFAtomTypes$/i) { 641 $Size = $This->{IgnoreHydrogens} ? scalar @{AtomTypes::UFFAtomTypes::GetAllPossibleUFFNonHydrogenAtomTypes()} : scalar @{AtomTypes::UFFAtomTypes::GetAllPossibleUFFAtomTypes()}; 642 last IDENTIFIERTYPE; 643 } 644 645 croak "Error: ${ClassName}->_GetFixedSizeAtomTypesSetSize: Atom types set size for atom indentifier type, $This->{AtomIdentifierType}, is not available..."; 646 } 647 648 return $Size; 649 } 650 651 # Get fixed size atom types set... 652 # 653 sub _GetFixedSizeAtomTypesSet { 654 my($This) = @_; 655 my($AtomTypesRef); 656 657 $AtomTypesRef = undef; 658 659 IDENTIFIERTYPE: { 660 if ($This->{AtomIdentifierType} =~ /^DREIDINGAtomTypes$/i) { 661 $AtomTypesRef = $This->{IgnoreHydrogens} ? AtomTypes::DREIDINGAtomTypes::GetAllPossibleDREIDINGNonHydrogenAtomTypes() : AtomTypes::DREIDINGAtomTypes::GetAllPossibleDREIDINGAtomTypes(); 662 last IDENTIFIERTYPE; 663 } 664 665 if ($This->{AtomIdentifierType} =~ /^EStateAtomTypes$/i) { 666 $AtomTypesRef = $This->{IgnoreHydrogens} ? AtomTypes::EStateAtomTypes::GetAllPossibleEStateNonHydrogenAtomTypes() : AtomTypes::EStateAtomTypes::GetAllPossibleEStateAtomTypes(); 667 last IDENTIFIERTYPE; 668 } 669 670 if ($This->{AtomIdentifierType} =~ /^MMFF94AtomTypes$/i) { 671 $AtomTypesRef = $This->{IgnoreHydrogens} ? AtomTypes::MMFF94AtomTypes::GetAllPossibleMMFF94NonHydrogenAtomTypes() : AtomTypes::MMFF94AtomTypes::GetAllPossibleMMFF94AtomTypes(); 672 last IDENTIFIERTYPE; 673 } 674 675 if ($This->{AtomIdentifierType} =~ /^SLogPAtomTypes$/i) { 676 $AtomTypesRef = $This->{IgnoreHydrogens} ? AtomTypes::SLogPAtomTypes::GetAllPossibleSLogPNonHydrogenAtomTypes() : AtomTypes::SLogPAtomTypes::GetAllPossibleSLogPAtomTypes(); 677 last IDENTIFIERTYPE; 678 } 679 680 if ($This->{AtomIdentifierType} =~ /^SYBYLAtomTypes$/i) { 681 $AtomTypesRef = $This->{IgnoreHydrogens} ? AtomTypes::SYBYLAtomTypes::GetAllPossibleSYBYLNonHydrogenAtomTypes() : AtomTypes::SYBYLAtomTypes::GetAllPossibleSYBYLAtomTypes(); 682 last IDENTIFIERTYPE; 683 } 684 685 if ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) { 686 $AtomTypesRef = AtomTypes::TPSAAtomTypes::GetAllPossibleTPSAAtomTypes(); 687 last IDENTIFIERTYPE; 688 } 689 690 if ($This->{AtomIdentifierType} =~ /^UFFAtomTypes$/i) { 691 $AtomTypesRef = $This->{IgnoreHydrogens} ? AtomTypes::UFFAtomTypes::GetAllPossibleUFFNonHydrogenAtomTypes() : AtomTypes::UFFAtomTypes::GetAllPossibleUFFAtomTypes(); 692 last IDENTIFIERTYPE; 693 } 694 695 croak "Error: ${ClassName}->_GetFixedSizeAtomTypesSet: Atom types set for atom indentifier type, $This->{AtomIdentifierType}, is not available..."; 696 } 697 698 return $AtomTypesRef; 699 } 700 701 # Initialize atom indentifier type information... 702 # 703 # Current supported values: 704 # 705 # AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, 706 # MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes 707 # 708 sub _InitializeAtomIdentifierTypeInformation { 709 my($This) = @_; 710 711 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { 712 $This->_InitializeAtomicInvariantsAtomTypesInformation(); 713 } 714 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { 715 $This->_InitializeFunctionalClassAtomTypesInformation(); 716 } 717 elsif ($This->{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { 718 # Nothing to do for now... 719 } 720 else { 721 croak "Error: ${ClassName}->_InitializeAtomIdentifierTypeInformation: Unknown atom indentifier type $This->{AtomIdentifierType}..."; 722 } 723 724 return $This; 725 } 726 727 # Initialize atomic invariants atom types to use for generating atom IDs in atom pairs... 728 # 729 # Let: 730 # AS = Atom symbol corresponding to element symbol 731 # 732 # X<n> = Number of non-hydrogen atom neighbors or heavy atoms attached to atom 733 # BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms attached to atom 734 # LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms attached to atom 735 # SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms attached to atom 736 # DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms attached to atom 737 # TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms attached to atom 738 # H<n> = Number of implicit and explicit hydrogens for atom 739 # Ar = Aromatic annotation indicating whether atom is aromatic 740 # RA = Ring atom annotation indicating whether atom is a ring 741 # FC<+n/-n> = Formal charge assigned to atom 742 # MN<n> = Mass number indicating isotope other than most abundant isotope 743 # SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or 3 (triplet) 744 # 745 # AtomTypeIDx = Atomic invariants atom type for atom x 746 # AtomTypeIDy = Atomic invariants atom type for atom y 747 # Dn = Topological distance between atom x and y 748 # 749 # Then: 750 # 751 # AtomID generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to: 752 # 753 # AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n> 754 # 755 # Except for AS which is a required atomic invariant atom types AtomIDs, all other atomic invariants are 756 # optional. Default atomic invariants used for AtomID are: AS, X<n>, BO<n>, H<n>, FC<+n/-n>. 757 # AtomID specification doesn't include atomic invariants with zero or undefined values. 758 # 759 # Examples of AtomIDs: 760 # 761 # O.X1.BO1.H1 - Hydroxyl oxygen in carboxylate with attached hydrogen and no explicit charge 762 # O.X1.BO1.FC-1 - Hydroxyl ozygen in carboxylate with explicit negative charge 763 # O.X1.BO2 - Carbonyl oxygen in carboxylate with double bond to carbon 764 # O.X2.BO2 - Hydroxyl ozygen in carboxylate attached to carbonyl carbon and another heavy atom 765 # 766 # C.X2.BO3.H1.Ar - Aromatic carbon 767 # 768 sub _InitializeAtomicInvariantsAtomTypesInformation { 769 my($This) = @_; 770 771 # Default atomic invariants to use for generating atom pair atom IDs: AS, X, BO, H, FC 772 # 773 @{$This->{AtomicInvariantsToUse}} = (); 774 @{$This->{AtomicInvariantsToUse}} = ('AS', 'X', 'BO', 'H', 'FC'); 775 776 return $This; 777 } 778 779 # Initialize functional class atom types, generated by AtomTypes::FunctionalClassAtomTypes 780 # class, to use for generating atom identifiers... 781 # 782 # Let: 783 # HBD: HydrogenBondDonor 784 # HBA: HydrogenBondAcceptor 785 # PI : PositivelyIonizable 786 # NI : NegativelyIonizable 787 # Ar : Aromatic 788 # Hal : Halogen 789 # H : Hydrophobic 790 # RA : RingAtom 791 # CA : ChainAtom 792 # 793 # Then: 794 # 795 # Functiononal class atom type specification for an atom corresponds to: 796 # 797 # Ar.CA.H.HBA.HBD.Hal.NI.PI.RA 798 # 799 # Default functional classes used are: HBD, HBA, PI, NI, Ar, Hal 800 # 801 # FunctionalAtomTypes are assigned using the following definitions [ Ref 60-61, Ref 65-66 ]: 802 # 803 # HydrogenBondDonor: NH, NH2, OH 804 # HydrogenBondAcceptor: N[!H], O 805 # PositivelyIonizable: +, NH2 806 # NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH 807 # 808 sub _InitializeFunctionalClassAtomTypesInformation { 809 my($This) = @_; 810 811 # Default functional class atom typess to use for generating atom identifiers 812 # are: HBD, HBA, PI, NI, Ar, Hal 813 # 814 @{$This->{FunctionalClassesToUse}} = (); 815 @{$This->{FunctionalClassesToUse}} = ('HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal'); 816 817 return $This; 818 } 819 820 # Set atomic invariants to use for atom IDs... 821 # 822 sub SetAtomicInvariantsToUse { 823 my($This, @Values) = @_; 824 my($FirstValue, $TypeOfFirstValue, $AtomicInvariant, $SpecifiedAtomicInvariant, $AtomicInvariantValue, @SpecifiedAtomicInvariants, @AtomicInvariantsToUse); 825 826 if (!@Values) { 827 carp "Warning: ${ClassName}->SetAtomicInvariantsToUse: No values specified..."; 828 return; 829 } 830 831 $FirstValue = $Values[0]; 832 $TypeOfFirstValue = ref $FirstValue; 833 834 @SpecifiedAtomicInvariants = (); 835 @AtomicInvariantsToUse = (); 836 837 if ($TypeOfFirstValue =~ /^ARRAY/) { 838 push @SpecifiedAtomicInvariants, @{$FirstValue}; 839 } 840 else { 841 push @SpecifiedAtomicInvariants, @Values; 842 } 843 844 # Make sure specified AtomicInvariants are valid... 845 for $SpecifiedAtomicInvariant (@SpecifiedAtomicInvariants) { 846 if (!AtomTypes::AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($SpecifiedAtomicInvariant)) { 847 croak "Error: ${ClassName}->SetAtomicInvariantsToUse: Specified atomic invariant, $SpecifiedAtomicInvariant, is not supported...\n "; 848 } 849 $AtomicInvariant = $SpecifiedAtomicInvariant; 850 push @AtomicInvariantsToUse, $AtomicInvariant; 851 } 852 853 # Set atomic invariants to use... 854 @{$This->{AtomicInvariantsToUse}} = (); 855 push @{$This->{AtomicInvariantsToUse}}, @AtomicInvariantsToUse; 856 857 return $This; 858 } 859 860 # Set functional classes to use for generation of intial atom indentifiers... 861 # 862 sub SetFunctionalClassesToUse { 863 my($This, @Values) = @_; 864 my($FirstValue, $TypeOfFirstValue, $FunctionalClass, $SpecifiedFunctionalClass, @SpecifiedFunctionalClasses, @FunctionalClassesToUse); 865 866 if (!@Values) { 867 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: No values specified..."; 868 return; 869 } 870 871 if ($This->{AtomIdentifierType} !~ /^FunctionalClassAtomTypes$/i) { 872 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: FunctionalClassesToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}..."; 873 return; 874 } 875 876 $FirstValue = $Values[0]; 877 $TypeOfFirstValue = ref $FirstValue; 878 879 @SpecifiedFunctionalClasses = (); 880 @FunctionalClassesToUse = (); 881 882 if ($TypeOfFirstValue =~ /^ARRAY/) { 883 push @SpecifiedFunctionalClasses, @{$FirstValue}; 884 } 885 else { 886 push @SpecifiedFunctionalClasses, @Values; 887 } 888 889 # Make sure specified FunctionalClasses are valid... 890 for $SpecifiedFunctionalClass (@SpecifiedFunctionalClasses) { 891 if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedFunctionalClass)) { 892 croak "Error: ${ClassName}->SetFunctionalClassesToUse: Specified functional class, $SpecifiedFunctionalClass, is not supported...\n "; 893 } 894 push @FunctionalClassesToUse, $SpecifiedFunctionalClass; 895 } 896 897 # Set functional classes to use... 898 @{$This->{FunctionalClassesToUse}} = (); 899 push @{$This->{FunctionalClassesToUse}}, @FunctionalClassesToUse; 900 901 return $This; 902 } 903 904 # Return a string containg data for AtomTypesFingerprints object... 905 sub StringifyAtomTypesFingerprints { 906 my($This) = @_; 907 my($FingerprintsString, $IgnoreHydrogens); 908 909 $FingerprintsString = "Type: $This->{Type}; AtomIdentifierType: $This->{AtomIdentifierType}; AtomTypesSetToUse: $This->{AtomTypesSetToUse}"; 910 911 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { 912 my($AtomicInvariant, @AtomicInvariants, @AtomicInvariantsOrder, %AvailableAtomicInvariants); 913 914 @AtomicInvariantsOrder = AtomTypes::AtomicInvariantsAtomTypes::GetAtomicInvariantsOrder(); 915 %AvailableAtomicInvariants = AtomTypes::AtomicInvariantsAtomTypes::GetAvailableAtomicInvariants(); 916 917 for $AtomicInvariant (@AtomicInvariantsOrder) { 918 push @AtomicInvariants, "$AtomicInvariant: $AvailableAtomicInvariants{$AtomicInvariant}"; 919 } 920 921 $FingerprintsString .= "; AtomicInvariantsToUse: <" . TextUtil::JoinWords(\@{$This->{AtomicInvariantsToUse}}, ", ", 0) . ">"; 922 $FingerprintsString .= "; AtomicInvariantsOrder: <" . TextUtil::JoinWords(\@AtomicInvariantsOrder, ", ", 0) . ">"; 923 $FingerprintsString .= "; AvailableAtomicInvariants: <" . TextUtil::JoinWords(\@AtomicInvariants, ", ", 0) . ">"; 924 } 925 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { 926 my($FunctionalClass, @FunctionalClasses, @FunctionalClassesOrder, %AvailableFunctionalClasses); 927 928 @FunctionalClassesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder(); 929 %AvailableFunctionalClasses = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses(); 930 931 for $FunctionalClass (@FunctionalClassesOrder) { 932 push @FunctionalClasses, "$FunctionalClass: $AvailableFunctionalClasses{$FunctionalClass}"; 933 } 934 935 $FingerprintsString .= "; FunctionalClassesToUse: <" . TextUtil::JoinWords(\@{$This->{FunctionalClassesToUse}}, ", ", 0) . ">"; 936 $FingerprintsString .= "; FunctionalClassesOrder: <" . TextUtil::JoinWords(\@FunctionalClassesOrder, ", ", 0) . ">"; 937 $FingerprintsString .= "; AvailableFunctionalClasses: <" . TextUtil::JoinWords(\@FunctionalClasses, ", ", 0) . ">"; 938 } 939 940 941 $IgnoreHydrogens = $This->{IgnoreHydrogens} ? "Yes" : "No"; 942 $FingerprintsString .= "; IgnoreHydrogens: $IgnoreHydrogens"; 943 944 if ($This->{Type} =~ /^AtomTypesCount$/i) { 945 $FingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >"; 946 } 947 elsif ($This->{Type} =~ /^AtomTypesBits$/i) { 948 $FingerprintsString .= "; FingerprintsBitVector: < $This->{FingerprintsBitVector} >"; 949 } 950 951 return $FingerprintsString; 952 } 953