1 package Fingerprints::AtomNeighborhoodsFingerprints; 2 # 3 # $RCSfile: AtomNeighborhoodsFingerprints.pm,v $ 4 # $Date: 2015/02/28 20:48:53 $ 5 # $Revision: 1.27 $ 6 # 7 # Author: Manish Sud <msud@san.rr.com> 8 # 9 # Copyright (C) 2015 Manish Sud. All rights reserved. 10 # 11 # This file is part of MayaChemTools. 12 # 13 # MayaChemTools is free software; you can redistribute it and/or modify it under 14 # the terms of the GNU Lesser General Public License as published by the Free 15 # Software Foundation; either version 3 of the License, or (at your option) any 16 # later version. 17 # 18 # MayaChemTools is distributed in the hope that it will be useful, but without 19 # any warranty; without even the implied warranty of merchantability of fitness 20 # for a particular purpose. See the GNU Lesser General Public License for more 21 # details. 22 # 23 # You should have received a copy of the GNU Lesser General Public License 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 26 # Boston, MA, 02111-1307, USA. 27 # 28 29 use strict; 30 use Carp; 31 use Exporter; 32 use Fingerprints::Fingerprints; 33 use TextUtil (); 34 use Molecule; 35 use AtomTypes::AtomicInvariantsAtomTypes; 36 use AtomTypes::DREIDINGAtomTypes; 37 use AtomTypes::EStateAtomTypes; 38 use AtomTypes::FunctionalClassAtomTypes; 39 use AtomTypes::MMFF94AtomTypes; 40 use AtomTypes::SLogPAtomTypes; 41 use AtomTypes::SYBYLAtomTypes; 42 use AtomTypes::TPSAAtomTypes; 43 use AtomTypes::UFFAtomTypes; 44 45 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 46 47 @ISA = qw(Fingerprints::Fingerprints Exporter); 48 @EXPORT = qw(); 49 @EXPORT_OK = qw(); 50 51 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); 52 53 # Setup class variables... 54 my($ClassName); 55 _InitializeClass(); 56 57 # Overload Perl functions... 58 use overload '""' => 'StringifyAtomNeighborhoodsFingerprints'; 59 60 # Class constructor... 61 sub new { 62 my($Class, %NamesAndValues) = @_; 63 64 # Initialize object... 65 my $This = $Class->SUPER::new(); 66 bless $This, ref($Class) || $Class; 67 $This->_InitializeAtomNeighborhoodsFingerprints(); 68 69 $This->_InitializeAtomNeighborhoodsFingerprintsProperties(%NamesAndValues); 70 71 return $This; 72 } 73 74 # Initialize object data... 75 # 76 sub _InitializeAtomNeighborhoodsFingerprints { 77 my($This) = @_; 78 79 # Type of fingerprint... 80 $This->{Type} = 'AtomNeighborhoods'; 81 82 # Type of vector... 83 $This->{VectorType} = 'FingerprintsVector'; 84 85 # Type of FingerprintsVector... 86 $This->{FingerprintsVectorType} = 'AlphaNumericalValues'; 87 88 # Minimum and maximum atomic neighborhoods radii... 89 $This->{MinNeighborhoodRadius} = 0; 90 $This->{MaxNeighborhoodRadius} = 2; 91 92 # Atom identifier type to use for atom IDs in atom neighborhood atoms... 93 # 94 # Currently supported values are: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, 95 # EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, 96 # SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes 97 # 98 $This->{AtomIdentifierType} = ''; 99 100 # Atom types assigned to each heavy atom... 101 %{$This->{AssignedAtomTypes}} = (); 102 103 # Atom neighorhoods with in specified atom radii.. 104 %{$This->{AtomNeighborhoods}} = (); 105 106 # Atom neighborhoods atom types count at different neighborhoods... 107 %{$This->{NeighborhoodAtomTypesCount}} = (); 108 109 # Atom neighborhood identifiers using specified atom identifier types methodology... 110 @{$This->{AtomNeighborhoodsIdentifiers}} = (); 111 } 112 113 # Initialize class ... 114 sub _InitializeClass { 115 #Class name... 116 $ClassName = __PACKAGE__; 117 } 118 119 # Initialize object properties.... 120 sub _InitializeAtomNeighborhoodsFingerprintsProperties { 121 my($This, %NamesAndValues) = @_; 122 123 my($Name, $Value, $MethodName); 124 while (($Name, $Value) = each %NamesAndValues) { 125 $MethodName = "Set${Name}"; 126 $This->$MethodName($Value); 127 } 128 129 # Make sure molecule object was specified... 130 if (!exists $NamesAndValues{Molecule}) { 131 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule..."; 132 } 133 if (exists $NamesAndValues{Size}) { 134 croak "Error: ${ClassName}->New: Object can't be instantiated with a user specified size: It's an arbitrary length vector..."; 135 } 136 if (!exists $NamesAndValues{AtomIdentifierType}) { 137 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying AtomIdentifierType..."; 138 } 139 140 $This->_InitializeFingerprintsVector(); 141 142 return $This; 143 } 144 145 # Set atom identifier type.. 146 # 147 sub SetAtomIdentifierType { 148 my($This, $IdentifierType) = @_; 149 150 if ($IdentifierType !~ /^(AtomicInvariantsAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|FunctionalClassAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { 151 croak "Error: ${ClassName}->SetAtomIdentifierType: Specified value, $IdentifierType, for AtomIdentifierType is not vaild. Supported types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, and UFFAtomTypes."; 152 } 153 154 if ($This->{AtomIdentifierType}) { 155 croak "Error: ${ClassName}->SetAtomIdentifierType: Can't change intial atom identifier type: It's already set..."; 156 } 157 158 $This->{AtomIdentifierType} = $IdentifierType; 159 160 # Initialize atom identifier type information... 161 $This->_InitializeAtomIdentifierTypeInformation(); 162 163 return $This; 164 } 165 166 # Set minimum atom neighborhood radius... 167 # 168 sub SetMinNeighborhoodRadius { 169 my($This, $Value) = @_; 170 171 if (!TextUtil::IsInteger($Value)) { 172 croak "Error: ${ClassName}->SetMinNeighborhoodRadius: MinNeighborhoodRadius value, $Value, is not valid: It must be an integer..."; 173 } 174 175 if ($Value < 0 ) { 176 croak "Error: ${ClassName}->SetMinNeighborhoodRadius: MinNeighborhoodRadius value, $Value, is not valid: It must be >= 0..."; 177 } 178 $This->{MinNeighborhoodRadius} = $Value; 179 180 return $This; 181 } 182 183 # Set maximum atom neighborhood radius... 184 # 185 sub SetMaxNeighborhoodRadius { 186 my($This, $Value) = @_; 187 188 if (!TextUtil::IsInteger($Value)) { 189 croak "Error: ${ClassName}->SetMaxNeighborhoodRadius: MaxNeighborhoodRadius value, $Value, is not valid: It must be an integer..."; 190 } 191 192 if ($Value < 0 ) { 193 croak "Error: ${ClassName}->SetMaxNeighborhoodRadius: MaxNeighborhoodRadius value, $Value, is not valid: It must be >= 0..."; 194 } 195 $This->{MaxNeighborhoodRadius} = $Value; 196 197 return $This; 198 } 199 200 # Generate fingerprints description... 201 # 202 sub GetDescription { 203 my($This) = @_; 204 205 # Is description explicity set? 206 if (exists $This->{Description}) { 207 return $This->{Description}; 208 } 209 210 # Generate fingerprints description... 211 212 return "$This->{Type}:$This->{AtomIdentifierType}:MinRadius$This->{MinNeighborhoodRadius}:MaxRadius$This->{MaxNeighborhoodRadius}"; 213 } 214 215 # Generate atom neighborhood [ Ref 53-56, Ref 73 ] fingerprints... 216 # 217 # Methodology: 218 # . Assign atom types to all non-hydrogen atoms in the molecule 219 # . Get atom neighborhoods up to MaxNeighborhoodRadis 220 # . Count unqiue atom types at each neighborhood radii for all heavy atoms 221 # . Generate neighborhood identifiers for all neighborhoods around central 222 # heavy atom 223 # . Atom neighborhood identifier for a specific radii is generated using neighborhood 224 # radius, assigned atom type and its count as follows: 225 # 226 # NR<n>-<AtomType>-ATC<n> 227 # 228 # . Atom neighborhood identifier for a central atom at all specified radii is generated 229 # by concatenating neighborhood identifiers at each radii by colon: 230 # 231 # NR<n>-<AtomType>-ATC<n>:NR<n>-<AtomType>-ATC<n>: 232 # 233 # . Set final fingerprints as list of neighborhood atom indentifiers 234 # 235 sub GenerateFingerprints { 236 my($This) = @_; 237 238 if ($This->{MinNeighborhoodRadius} > $This->{MaxNeighborhoodRadius}) { 239 croak "Error: ${ClassName}->GenerateFingerprints: No fingerpritns generated: MinLength, $This->{MinNeighborhoodRadius}, must be less than MaxLength, $This->{MaxNeighborhoodRadius}..."; 240 } 241 242 # Cache appropriate molecule data... 243 $This->_SetupMoleculeDataCache(); 244 245 # Assign atom types to all heavy atoms... 246 if (!$This->_AssignAtomTypes()) { 247 carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't assign valid $This->{AtomIdentifierType} to all atoms..."; 248 return $This; 249 } 250 251 # Intialize atom neighborhoods information... 252 $This->_InitializeAtomNeighborhoods(); 253 254 # Identify atom neighborhoods with in specified radii... 255 $This->_GetAtomNeighborhoods(); 256 257 # Count atom neighborhoods atom types... 258 $This->_CountAtomNeighborhoodsAtomTypes(); 259 260 # Genenerate atom neighborhood identifiers... 261 $This->_GenerateAtomNeighborhoodIdentifiers(); 262 263 # Set final fingerprints... 264 $This->_SetFinalFingerprints(); 265 266 # Clear cached molecule data... 267 $This->_ClearMoleculeDataCache(); 268 269 return $This; 270 } 271 272 # Assign appropriate atom types to all heavy atoms... 273 # 274 sub _AssignAtomTypes { 275 my($This) = @_; 276 my($SpecifiedAtomTypes, $Atom, $AtomID, $IgnoreHydrogens); 277 278 %{$This->{AssignedAtomTypes}} = (); 279 $IgnoreHydrogens = 1; 280 281 $SpecifiedAtomTypes = undef; 282 283 IDENTIFIERTYPE: { 284 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { 285 $SpecifiedAtomTypes = new AtomTypes::AtomicInvariantsAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'AtomicInvariantsToUse' => $This->{AtomicInvariantsToUse}); 286 last IDENTIFIERTYPE; 287 } 288 289 if ($This->{AtomIdentifierType} =~ /^DREIDINGAtomTypes$/i) { 290 $SpecifiedAtomTypes = new AtomTypes::DREIDINGAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 291 last IDENTIFIERTYPE; 292 } 293 294 if ($This->{AtomIdentifierType} =~ /^EStateAtomTypes$/i) { 295 $SpecifiedAtomTypes = new AtomTypes::EStateAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 296 last IDENTIFIERTYPE; 297 } 298 299 if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { 300 $SpecifiedAtomTypes = new AtomTypes::FunctionalClassAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'FunctionalClassesToUse' => $This->{FunctionalClassesToUse}); 301 last IDENTIFIERTYPE; 302 } 303 304 if ($This->{AtomIdentifierType} =~ /^MMFF94AtomTypes$/i) { 305 $SpecifiedAtomTypes = new AtomTypes::MMFF94AtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 306 last IDENTIFIERTYPE; 307 } 308 309 if ($This->{AtomIdentifierType} =~ /^SLogPAtomTypes$/i) { 310 $SpecifiedAtomTypes = new AtomTypes::SLogPAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 311 last IDENTIFIERTYPE; 312 } 313 if ($This->{AtomIdentifierType} =~ /^SYBYLAtomTypes$/i) { 314 $SpecifiedAtomTypes = new AtomTypes::SYBYLAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 315 last IDENTIFIERTYPE; 316 } 317 318 if ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) { 319 $SpecifiedAtomTypes = new AtomTypes::TPSAAtomTypes('Molecule' => $This->{Molecule}, 'IgnorePhosphorus' => 0, 'IgnoreSulfur' => 0); 320 last IDENTIFIERTYPE; 321 } 322 323 if ($This->{AtomIdentifierType} =~ /^UFFAtomTypes$/i) { 324 $SpecifiedAtomTypes = new AtomTypes::UFFAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 325 last IDENTIFIERTYPE; 326 } 327 328 croak "Error: ${ClassName}->_AssignAtomTypes: Unknown atom indentifier type $This->{AtomIdentifierType}..."; 329 } 330 331 # Assign atom types... 332 $SpecifiedAtomTypes->AssignAtomTypes(); 333 334 # Make sure atom types assignment is successful... 335 if (!$SpecifiedAtomTypes->IsAtomTypesAssignmentSuccessful()) { 336 return undef; 337 } 338 339 # Collect assigned atom types... 340 ATOM: for $Atom (@{$This->{Atoms}}) { 341 if ($Atom->IsHydrogen()) { 342 next ATOM; 343 } 344 $AtomID = $Atom->GetID(); 345 $This->{AssignedAtomTypes}{$AtomID} = $SpecifiedAtomTypes->GetAtomType($Atom); 346 } 347 348 return $This; 349 } 350 351 # Initialize topological atom pairs between specified distance range... 352 # 353 sub _InitializeAtomNeighborhoods { 354 my($This) = @_; 355 my($Radius); 356 357 # Initialize atom neighborhood count information between specified radii... 358 %{$This->{NeighborhoodAtomTypesCount}} = (); 359 360 for $Radius ($This->{MinNeighborhoodRadius} .. $This->{MaxNeighborhoodRadius}) { 361 %{$This->{NeighborhoodAtomTypesCount}{$Radius}} = (); 362 } 363 364 # Initialize atom neighborhoods atoms information at all specified radii... 365 # 366 %{$This->{AtomNeighborhoods}} = (); 367 368 for $Radius (0 .. $This->{MaxNeighborhoodRadius}) { 369 %{$This->{AtomNeighborhoods}{$Radius}} = (); 370 } 371 372 return $This; 373 } 374 375 # Collect atom neighborhoods upto maximum neighborhood radius... 376 # 377 # Notes: 378 # . Fingerprints are only generated for neighborhoods between specified minimum 379 # and maximum neighborhood radii. 380 # 381 sub _GetAtomNeighborhoods { 382 my($This) = @_; 383 my($Atom, $AtomID, $MaxRadius, $Radius, $Molecule); 384 385 $MaxRadius = $This->{MaxNeighborhoodRadius}; 386 $Molecule = $This->GetMolecule(); 387 388 # Collect atom neighborhoods... 389 390 ATOM: for $Atom (@{$This->{Atoms}}) { 391 $AtomID = $Atom->GetID(); 392 $Radius = 0; 393 394 if ($MaxRadius == 0) { 395 # Atom is its own neighborhood at 0 radius... 396 my(@AtomNeighborhoodsAtoms); 397 398 @AtomNeighborhoodsAtoms = ($Atom); 399 $This->{AtomNeighborhoods}{$Radius}{$AtomID} = \@AtomNeighborhoodsAtoms; 400 401 next ATOM; 402 } 403 404 # Collect available atom neighborhoods at different neighborhood radii levels... 405 my($AtomNeighborhoodAtomsRef); 406 407 for $AtomNeighborhoodAtomsRef ($Molecule->GetAtomNeighborhoodsWithRadiusUpto($Atom, $MaxRadius)) { 408 $This->{AtomNeighborhoods}{$Radius}{$AtomID} = $AtomNeighborhoodAtomsRef; 409 $Radius++; 410 } 411 } 412 return $This; 413 } 414 415 # Count atom neighborhoods atom types for each non-hydrogen central atoms with 416 # neighborhoods in specified radii range... 417 # 418 sub _CountAtomNeighborhoodsAtomTypes { 419 my($This) = @_; 420 my($AtomID, $NeighborhoodAtomID, $Radius, $NeighborhoodAtom, $NeighborhoodAtomType, $AtomNeighborhoodAtomsRef); 421 422 RADIUS: for $Radius (sort { $a <=> $b } keys %{$This->{AtomNeighborhoods}} ) { 423 if ($Radius < $This->{MinNeighborhoodRadius} || $Radius > $This->{MaxNeighborhoodRadius}) { 424 next RADIUS; 425 } 426 # Go over the neighborhoods of each atom at the current radius... 427 for $AtomID (keys %{$This->{AtomNeighborhoods}{$Radius}}) { 428 $AtomNeighborhoodAtomsRef = $This->{AtomNeighborhoods}{$Radius}{$AtomID}; 429 NEIGHBORHOODATOM: for $NeighborhoodAtom (@{$AtomNeighborhoodAtomsRef}) { 430 if ($NeighborhoodAtom->IsHydrogen()) { 431 next NEIGHBORHOODATOM; 432 } 433 $NeighborhoodAtomID = $NeighborhoodAtom->GetID(); 434 $NeighborhoodAtomType = $This->{AssignedAtomTypes}{$NeighborhoodAtomID}; 435 436 # Count neighbothood atom types for each atom at different radii... 437 if (!exists $This->{NeighborhoodAtomTypesCount}{$Radius}{$AtomID}) { 438 %{$This->{NeighborhoodAtomTypesCount}{$Radius}{$AtomID}} = (); 439 } 440 if (exists $This->{NeighborhoodAtomTypesCount}{$Radius}{$AtomID}{$NeighborhoodAtomType}) { 441 $This->{NeighborhoodAtomTypesCount}{$Radius}{$AtomID}{$NeighborhoodAtomType} += 1; 442 } 443 else { 444 $This->{NeighborhoodAtomTypesCount}{$Radius}{$AtomID}{$NeighborhoodAtomType} = 1; 445 } 446 } 447 } 448 } 449 return $This; 450 } 451 452 # Generate atom neighborhood identifiers for each non-hydrogen atom using atom 453 # neighborhood atom types and their count information... 454 # 455 # Let: 456 # NR<n> = Neighborhood radius 457 # AtomType = Assigned atom type 458 # ATC<n> = AtomType count 459 # 460 # Then: 461 # 462 # AtomNeighborhoodAtomIdentifier for a neighborhood atom generated for 463 # AtomTypes::AtomicInvariantsAtomTypes class corresponds to: 464 # 465 # NR<n>-<AtomType>-ATC<n> 466 # 467 # AtomNeighborhoodsIdentifier for all specified atom neighbothoods of an atom generated for 468 # AtomTypes::AtomicInvariantsAtomTypes class corresponds to: 469 # 470 # NR<n>-<AtomType>-ATC<n>;NR<n>-<AtomType>-ATC<n>;... 471 # 472 sub _GenerateAtomNeighborhoodIdentifiers { 473 my($This) = @_; 474 my($Atom, $AtomID, $Radius, $AtomType, $AtomTypeCount, $AtomNeighborhoodIdentifier, @AtomNeighborhoodIdentifiers); 475 476 @{$This->{AtomNeighborhoodsIdentifiers}} = (); 477 478 for $Atom (@{$This->{Atoms}}) { 479 $AtomID = $Atom->GetID(); 480 @AtomNeighborhoodIdentifiers = (); 481 RADIUS: for $Radius ($This->{MinNeighborhoodRadius} .. $This->{MaxNeighborhoodRadius}) { 482 if (!exists $This->{NeighborhoodAtomTypesCount}{$Radius}{$AtomID}) { 483 next RADIUS; 484 } 485 for $AtomType (sort keys %{$This->{NeighborhoodAtomTypesCount}{$Radius}{$AtomID}}) { 486 $AtomTypeCount = $This->{NeighborhoodAtomTypesCount}{$Radius}{$AtomID}{$AtomType}; 487 push @AtomNeighborhoodIdentifiers, "NR${Radius}-${AtomType}-ATC${AtomTypeCount}"; 488 } 489 } 490 $AtomNeighborhoodIdentifier = join(":", @AtomNeighborhoodIdentifiers); 491 push @{$This->{AtomNeighborhoodsIdentifiers}}, $AtomNeighborhoodIdentifier; 492 } 493 494 return $This; 495 } 496 497 # Set final fingerprits vector... 498 # 499 sub _SetFinalFingerprints { 500 my($This) = @_; 501 502 # Mark successful generation of fingerprints... 503 $This->{FingerprintsGenerated} = 1; 504 505 # Sort AtomNeighborhoodsIdentifiers.. 506 # 507 @{$This->{AtomNeighborhoodsIdentifiers}} = sort @{$This->{AtomNeighborhoodsIdentifiers}}; 508 509 # Add sorted atom neighborhood identifiers to FingerprintsVector which is already defined 510 # during initialization containing AlphaNumericalValues... 511 # 512 $This->{FingerprintsVector}->AddValues(\@{$This->{AtomNeighborhoodsIdentifiers}}); 513 514 return $This; 515 } 516 517 # Cache appropriate molecule data... 518 # 519 sub _SetupMoleculeDataCache { 520 my($This) = @_; 521 522 # Get all non-hydrogen atoms... 523 my($NegateAtomCheckMethod); 524 $NegateAtomCheckMethod = 1; 525 @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms("IsHydrogen", $NegateAtomCheckMethod); 526 527 return $This; 528 } 529 530 # Clear cached molecule data... 531 # 532 sub _ClearMoleculeDataCache { 533 my($This) = @_; 534 535 @{$This->{Atoms}} = (); 536 537 return $This; 538 } 539 540 # Set atomic invariants to use for atom identifiers... 541 # 542 sub SetAtomicInvariantsToUse { 543 my($This, @Values) = @_; 544 my($FirstValue, $TypeOfFirstValue, $AtomicInvariant, $SpecifiedAtomicInvariant, $AtomicInvariantValue, @SpecifiedAtomicInvariants, @AtomicInvariantsToUse); 545 546 if (!@Values) { 547 carp "Warning: ${ClassName}->SetAtomicInvariantsToUse: No values specified..."; 548 return; 549 } 550 551 $FirstValue = $Values[0]; 552 $TypeOfFirstValue = ref $FirstValue; 553 554 @SpecifiedAtomicInvariants = (); 555 @AtomicInvariantsToUse = (); 556 557 if ($TypeOfFirstValue =~ /^ARRAY/) { 558 push @SpecifiedAtomicInvariants, @{$FirstValue}; 559 } 560 else { 561 push @SpecifiedAtomicInvariants, @Values; 562 } 563 564 # Make sure specified AtomicInvariants are valid... 565 for $SpecifiedAtomicInvariant (@SpecifiedAtomicInvariants) { 566 if (!AtomTypes::AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($SpecifiedAtomicInvariant)) { 567 croak "Error: ${ClassName}->SetAtomicInvariantsToUse: Specified atomic invariant, $SpecifiedAtomicInvariant, is not supported...\n "; 568 } 569 $AtomicInvariant = $SpecifiedAtomicInvariant; 570 push @AtomicInvariantsToUse, $AtomicInvariant; 571 } 572 573 # Set atomic invariants to use... 574 @{$This->{AtomicInvariantsToUse}} = (); 575 push @{$This->{AtomicInvariantsToUse}}, @AtomicInvariantsToUse; 576 577 return $This; 578 } 579 580 # Set functional classes to use for atom identifiers... 581 # 582 sub SetFunctionalClassesToUse { 583 my($This, @Values) = @_; 584 my($FirstValue, $TypeOfFirstValue, $FunctionalClass, $SpecifiedFunctionalClass, @SpecifiedFunctionalClasses, @FunctionalClassesToUse); 585 586 if (!@Values) { 587 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: No values specified..."; 588 return; 589 } 590 591 if ($This->{AtomIdentifierType} !~ /^FunctionalClassAtomTypes$/i) { 592 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: FunctionalClassesToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}..."; 593 return; 594 } 595 596 $FirstValue = $Values[0]; 597 $TypeOfFirstValue = ref $FirstValue; 598 599 @SpecifiedFunctionalClasses = (); 600 @FunctionalClassesToUse = (); 601 602 if ($TypeOfFirstValue =~ /^ARRAY/) { 603 push @SpecifiedFunctionalClasses, @{$FirstValue}; 604 } 605 else { 606 push @SpecifiedFunctionalClasses, @Values; 607 } 608 609 # Make sure specified FunctionalClasses are valid... 610 for $SpecifiedFunctionalClass (@SpecifiedFunctionalClasses) { 611 if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedFunctionalClass)) { 612 croak "Error: ${ClassName}->SetFunctionalClassesToUse: Specified functional class, $SpecifiedFunctionalClass, is not supported...\n "; 613 } 614 push @FunctionalClassesToUse, $SpecifiedFunctionalClass; 615 } 616 617 # Set functional classes to use... 618 @{$This->{FunctionalClassesToUse}} = (); 619 push @{$This->{FunctionalClassesToUse}}, @FunctionalClassesToUse; 620 621 return $This; 622 } 623 624 # Initialize atom indentifier type information... 625 # 626 # Current supported values: 627 # 628 # AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, 629 # MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes 630 # 631 sub _InitializeAtomIdentifierTypeInformation { 632 my($This) = @_; 633 634 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { 635 $This->_InitializeAtomicInvariantsAtomTypesInformation(); 636 } 637 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { 638 $This->_InitializeFunctionalClassAtomTypesInformation(); 639 } 640 elsif ($This->{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { 641 # Nothing to do for now... 642 } 643 else { 644 croak "Error: ${ClassName}->_InitializeAtomIdentifierTypeInformation: Unknown atom indentifier type $This->{AtomIdentifierType}..."; 645 } 646 647 return $This; 648 } 649 650 # Initialize atomic invariants atom types to use for generating atom identifiers... 651 # 652 # Let: 653 # AS = Atom symbol corresponding to element symbol 654 # 655 # X<n> = Number of non-hydrogen atom neighbors or heavy atoms attached to atom 656 # BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms attached to atom 657 # LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms attached to atom 658 # SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms attached to atom 659 # DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms attached to atom 660 # TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms attached to atom 661 # H<n> = Number of implicit and explicit hydrogens for atom 662 # Ar = Aromatic annotation indicating whether atom is aromatic 663 # RA = Ring atom annotation indicating whether atom is a ring 664 # FC<+n/-n> = Formal charge assigned to atom 665 # MN<n> = Mass number indicating isotope other than most abundant isotope 666 # SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or 3 (triplet) 667 # 668 # Then: 669 # 670 # Atom type generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to: 671 # 672 # AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n> 673 # 674 # Except for AS which is a required atomic invariant in atom types, all other atomic invariants are 675 # optional. Default atomic invariants used for AtomID are: AS, X<n>, BO<n>, H<n>, FC<+n/-n>. 676 # AtomID specification doesn't include atomic invariants with zero or undefined values. 677 # 678 sub _InitializeAtomicInvariantsAtomTypesInformation { 679 my($This) = @_; 680 681 # Default atomic invariants to use for generating atom neighborhood atom IDs: AS, X, BO, H, FC 682 # 683 @{$This->{AtomicInvariantsToUse}} = (); 684 @{$This->{AtomicInvariantsToUse}} = ('AS', 'X', 'BO', 'H', 'FC'); 685 686 return $This; 687 } 688 689 # Initialize functional class atom types, generated by AtomTypes::FunctionalClassAtomTypes 690 # class, to use for generating atom identifiers... 691 # 692 # Let: 693 # HBD: HydrogenBondDonor 694 # HBA: HydrogenBondAcceptor 695 # PI : PositivelyIonizable 696 # NI : NegativelyIonizable 697 # Ar : Aromatic 698 # Hal : Halogen 699 # H : Hydrophobic 700 # RA : RingAtom 701 # CA : ChainAtom 702 # 703 # Then: 704 # 705 # Functiononal class atom type specification for an atom corresponds to: 706 # 707 # Ar.CA.H.HBA.HBD.Hal.NI.PI.RA 708 # 709 # Default functional classes used are: HBD, HBA, PI, NI, Ar, Hal 710 # 711 # FunctionalAtomTypes are assigned using the following definitions [ Ref 60-61, Ref 65-66 ]: 712 # 713 # HydrogenBondDonor: NH, NH2, OH 714 # HydrogenBondAcceptor: N[!H], O 715 # PositivelyIonizable: +, NH2 716 # NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH 717 # 718 sub _InitializeFunctionalClassAtomTypesInformation { 719 my($This) = @_; 720 721 # Default functional class atom typess to use for generating atom identifiers 722 # are: HBD, HBA, PI, NI, Ar, Hal 723 # 724 @{$This->{FunctionalClassesToUse}} = (); 725 @{$This->{FunctionalClassesToUse}} = ('HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal'); 726 727 return $This; 728 } 729 730 # Return a string containg data for AtomNeighborhoodsFingerprints object... 731 # 732 sub StringifyAtomNeighborhoodsFingerprints { 733 my($This) = @_; 734 my($FingerprintsString); 735 736 # Type of fingerprint... 737 $FingerprintsString = "Fingerprint type: $This->{Type}; AtomIdentifierType: $This->{AtomIdentifierType}; MinNeighborhoodRadius: $This->{MinNeighborhoodRadius}; MaxNeighborhoodRadius: $This->{MaxNeighborhoodRadius}"; 738 739 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { 740 my($AtomicInvariant, @AtomicInvariants, @AtomicInvariantsOrder, %AvailableAtomicInvariants); 741 742 @AtomicInvariantsOrder = AtomTypes::AtomicInvariantsAtomTypes::GetAtomicInvariantsOrder(); 743 %AvailableAtomicInvariants = AtomTypes::AtomicInvariantsAtomTypes::GetAvailableAtomicInvariants(); 744 745 for $AtomicInvariant (@AtomicInvariantsOrder) { 746 push @AtomicInvariants, "$AtomicInvariant: $AvailableAtomicInvariants{$AtomicInvariant}"; 747 } 748 749 $FingerprintsString .= "; AtomicInvariantsToUse: <" . TextUtil::JoinWords(\@{$This->{AtomicInvariantsToUse}}, ", ", 0) . ">"; 750 $FingerprintsString .= "; AtomicInvariantsOrder: <" . TextUtil::JoinWords(\@AtomicInvariantsOrder, ", ", 0) . ">"; 751 $FingerprintsString .= "; AvailableAtomicInvariants: <" . TextUtil::JoinWords(\@AtomicInvariants, ", ", 0) . ">"; 752 } 753 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { 754 my($FunctionalClass, @FunctionalClasses, @FunctionalClassesOrder, %AvailableFunctionalClasses); 755 756 @FunctionalClassesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder(); 757 %AvailableFunctionalClasses = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses(); 758 759 for $FunctionalClass (@FunctionalClassesOrder) { 760 push @FunctionalClasses, "$FunctionalClass: $AvailableFunctionalClasses{$FunctionalClass}"; 761 } 762 763 $FingerprintsString .= "; FunctionalClassesToUse: <" . TextUtil::JoinWords(\@{$This->{FunctionalClassesToUse}}, ", ", 0) . ">"; 764 $FingerprintsString .= "; FunctionalClassesOrder: <" . TextUtil::JoinWords(\@FunctionalClassesOrder, ", ", 0) . ">"; 765 $FingerprintsString .= "; AvailableFunctionalClasses: <" . TextUtil::JoinWords(\@FunctionalClasses, ", ", 0) . ">"; 766 } 767 768 # Total number of atom neighborhood atom IDs... 769 $FingerprintsString .= "; NumOfAtomNeighborhoodAtomIdentifiers: " . $This->{FingerprintsVector}->GetNumOfValues(); 770 771 # FingerprintsVector... 772 $FingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >"; 773 774 return $FingerprintsString; 775 } 776