Mercurial > repos > deepakjadmin > mayatool3_test3
comparison mayachemtools/lib/Fingerprints/TopologicalAtomTripletsFingerprints.pm @ 0:73ae111cf86f draft
Uploaded
| author | deepakjadmin |
|---|---|
| date | Wed, 20 Jan 2016 11:55:01 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:73ae111cf86f |
|---|---|
| 1 package Fingerprints::TopologicalAtomTripletsFingerprints; | |
| 2 # | |
| 3 # $RCSfile: TopologicalAtomTripletsFingerprints.pm,v $ | |
| 4 # $Date: 2015/02/28 20:48:54 $ | |
| 5 # $Revision: 1.15 $ | |
| 6 # | |
| 7 # Author: Manish Sud <msud@san.rr.com> | |
| 8 # | |
| 9 # Copyright (C) 2015 Manish Sud. All rights reserved. | |
| 10 # | |
| 11 # This file is part of MayaChemTools. | |
| 12 # | |
| 13 # MayaChemTools is free software; you can redistribute it and/or modify it under | |
| 14 # the terms of the GNU Lesser General Public License as published by the Free | |
| 15 # Software Foundation; either version 3 of the License, or (at your option) any | |
| 16 # later version. | |
| 17 # | |
| 18 # MayaChemTools is distributed in the hope that it will be useful, but without | |
| 19 # any warranty; without even the implied warranty of merchantability of fitness | |
| 20 # for a particular purpose. See the GNU Lesser General Public License for more | |
| 21 # details. | |
| 22 # | |
| 23 # You should have received a copy of the GNU Lesser General Public License | |
| 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or | |
| 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, | |
| 26 # Boston, MA, 02111-1307, USA. | |
| 27 # | |
| 28 | |
| 29 use strict; | |
| 30 use Carp; | |
| 31 use Exporter; | |
| 32 use Fingerprints::Fingerprints; | |
| 33 use TextUtil (); | |
| 34 use Molecule; | |
| 35 use AtomTypes::AtomicInvariantsAtomTypes; | |
| 36 use AtomTypes::DREIDINGAtomTypes; | |
| 37 use AtomTypes::EStateAtomTypes; | |
| 38 use AtomTypes::FunctionalClassAtomTypes; | |
| 39 use AtomTypes::MMFF94AtomTypes; | |
| 40 use AtomTypes::SLogPAtomTypes; | |
| 41 use AtomTypes::SYBYLAtomTypes; | |
| 42 use AtomTypes::TPSAAtomTypes; | |
| 43 use AtomTypes::UFFAtomTypes; | |
| 44 | |
| 45 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); | |
| 46 | |
| 47 @ISA = qw(Fingerprints::Fingerprints Exporter); | |
| 48 @EXPORT = qw(); | |
| 49 @EXPORT_OK = qw(); | |
| 50 | |
| 51 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); | |
| 52 | |
| 53 # Setup class variables... | |
| 54 my($ClassName); | |
| 55 _InitializeClass(); | |
| 56 | |
| 57 # Overload Perl functions... | |
| 58 use overload '""' => 'StringifyTopologicalAtomTripletsFingerprints'; | |
| 59 | |
| 60 # Class constructor... | |
| 61 sub new { | |
| 62 my($Class, %NamesAndValues) = @_; | |
| 63 | |
| 64 # Initialize object... | |
| 65 my $This = $Class->SUPER::new(); | |
| 66 bless $This, ref($Class) || $Class; | |
| 67 $This->_InitializeTopologicalAtomTripletsFingerprints(); | |
| 68 | |
| 69 $This->_InitializeTopologicalAtomTripletsFingerprintsProperties(%NamesAndValues); | |
| 70 | |
| 71 return $This; | |
| 72 } | |
| 73 | |
| 74 # Initialize object data... | |
| 75 # | |
| 76 sub _InitializeTopologicalAtomTripletsFingerprints { | |
| 77 my($This) = @_; | |
| 78 | |
| 79 # Type of fingerprint... | |
| 80 $This->{Type} = 'TopologicalAtomTriplets'; | |
| 81 | |
| 82 # Type of vector... | |
| 83 $This->{VectorType} = 'FingerprintsVector'; | |
| 84 | |
| 85 # Type of FingerprintsVector... | |
| 86 $This->{FingerprintsVectorType} = 'NumericalValues'; | |
| 87 | |
| 88 # Minimum and maximum bond distance between atom paris... | |
| 89 $This->{MinDistance} = 1; | |
| 90 $This->{MaxDistance} = 10; | |
| 91 | |
| 92 # Determines whether to apply triangle inequality to distance triplets... | |
| 93 # | |
| 94 $This->{UseTriangleInequality} = 0; | |
| 95 | |
| 96 # Atom identifier type to use for atom IDs in atom triplets... | |
| 97 # | |
| 98 # Currently supported values are: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, | |
| 99 # EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, | |
| 100 # SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes | |
| 101 # | |
| 102 $This->{AtomIdentifierType} = ''; | |
| 103 | |
| 104 # Atom types assigned to each heavy atom... | |
| 105 # | |
| 106 %{$This->{AssignedAtomTypes}} = (); | |
| 107 | |
| 108 # All atom triplets between minimum and maximum distance... | |
| 109 # | |
| 110 @{$This->{AtomTripletsIDs}} = (); | |
| 111 %{$This->{AtomTripletsCount}} = (); | |
| 112 } | |
| 113 | |
| 114 # Initialize class ... | |
| 115 sub _InitializeClass { | |
| 116 #Class name... | |
| 117 $ClassName = __PACKAGE__; | |
| 118 } | |
| 119 | |
| 120 # Initialize object properties.... | |
| 121 sub _InitializeTopologicalAtomTripletsFingerprintsProperties { | |
| 122 my($This, %NamesAndValues) = @_; | |
| 123 | |
| 124 my($Name, $Value, $MethodName); | |
| 125 while (($Name, $Value) = each %NamesAndValues) { | |
| 126 $MethodName = "Set${Name}"; | |
| 127 $This->$MethodName($Value); | |
| 128 } | |
| 129 | |
| 130 # Make sure molecule object was specified... | |
| 131 if (!exists $NamesAndValues{Molecule}) { | |
| 132 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule..."; | |
| 133 } | |
| 134 if (!exists $NamesAndValues{AtomIdentifierType}) { | |
| 135 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying AtomIdentifierType..."; | |
| 136 } | |
| 137 | |
| 138 $This->_InitializeFingerprintsVector(); | |
| 139 | |
| 140 return $This; | |
| 141 } | |
| 142 | |
| 143 # Set minimum distance for atom triplets... | |
| 144 # | |
| 145 sub SetMinDistance { | |
| 146 my($This, $Value) = @_; | |
| 147 | |
| 148 if (!TextUtil::IsPositiveInteger($Value)) { | |
| 149 croak "Error: ${ClassName}->SetMinDistance: MinDistance value, $Value, is not valid: It must be a positive integer..."; | |
| 150 } | |
| 151 $This->{MinDistance} = $Value; | |
| 152 | |
| 153 return $This; | |
| 154 } | |
| 155 | |
| 156 # Set maximum distance for atom triplets... | |
| 157 # | |
| 158 sub SetMaxDistance { | |
| 159 my($This, $Value) = @_; | |
| 160 | |
| 161 if (!TextUtil::IsPositiveInteger($Value)) { | |
| 162 croak "Error: ${ClassName}->SetMaxDistance: MaxDistance value, $Value, is not valid: It must be a positive integer..."; | |
| 163 } | |
| 164 $This->{MaxDistance} = $Value; | |
| 165 | |
| 166 return $This; | |
| 167 } | |
| 168 | |
| 169 # Set atom identifier type.. | |
| 170 # | |
| 171 sub SetAtomIdentifierType { | |
| 172 my($This, $IdentifierType) = @_; | |
| 173 | |
| 174 if ($IdentifierType !~ /^(AtomicInvariantsAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|FunctionalClassAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { | |
| 175 croak "Error: ${ClassName}->SetAtomIdentifierType: Specified value, $IdentifierType, for AtomIdentifierType is not vaild. Supported types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, and UFFAtomTypes."; | |
| 176 } | |
| 177 | |
| 178 if ($This->{AtomIdentifierType}) { | |
| 179 croak "Error: ${ClassName}->SeAtomIdentifierType: Can't change intial atom identifier type: It's already set..."; | |
| 180 } | |
| 181 | |
| 182 $This->{AtomIdentifierType} = $IdentifierType; | |
| 183 | |
| 184 # Initialize atom identifier type information... | |
| 185 $This->_InitializeAtomIdentifierTypeInformation(); | |
| 186 | |
| 187 return $This; | |
| 188 } | |
| 189 | |
| 190 # Generate fingerprints description... | |
| 191 # | |
| 192 sub GetDescription { | |
| 193 my($This) = @_; | |
| 194 | |
| 195 # Is description explicity set? | |
| 196 if (exists $This->{Description}) { | |
| 197 return $This->{Description}; | |
| 198 } | |
| 199 | |
| 200 # Generate fingerprints description... | |
| 201 | |
| 202 return "$This->{Type}:$This->{AtomIdentifierType}:MinDistance$This->{MinDistance}:MaxDistance$This->{MaxDistance}"; | |
| 203 } | |
| 204 | |
| 205 # Generate topological atom triplets fingerprints... | |
| 206 # | |
| 207 # Let: | |
| 208 # | |
| 209 # AT = Any of the supported atom types | |
| 210 # | |
| 211 # ATx = Atom type for atom x | |
| 212 # ATy = Atom type for atom y | |
| 213 # ATz = Atom type for atom z | |
| 214 # | |
| 215 # Dxy = Distance between Px and Py | |
| 216 # Dxz = Distance between Px and Pz | |
| 217 # Dyz = Distance between Py and Pz | |
| 218 # | |
| 219 # Then: | |
| 220 # | |
| 221 # ATx-Dyz-ATy-Dxz-ATz-Dxy = Atom triplet ID for atom types ATx, ATy and Atz | |
| 222 # | |
| 223 # Methodology: | |
| 224 # . Generate a distance matrix. | |
| 225 # . Assign atom types to all the atoms. | |
| 226 # . Using distance matrix and atom types, count occurrence of unique atom triplets | |
| 227 # within specified distance range along with optional trinagle inequality | |
| 228 # | |
| 229 # Notes: | |
| 230 # . Hydrogen atoms are ignored during the fingerprint generation. | |
| 231 # . For a molecule containing N atoms with all different atom type, the total number of | |
| 232 # possible unique atom triplets without applying triangle inquality check corresponds to: | |
| 233 # | |
| 234 # Factorial( N ) / ( Factorial( N - 3 ) * Factorial (3) ) | |
| 235 # | |
| 236 # However, due to similar atom types assigned to atoms in a molecule for a specific atom | |
| 237 # typing methodology and specified distance range used during fingerprints generation, the | |
| 238 # actual number of unique triplets is usually smaller than the theoretical limit. | |
| 239 # | |
| 240 sub GenerateFingerprints { | |
| 241 my($This) = @_; | |
| 242 | |
| 243 if ($This->{MinDistance} > $This->{MaxDistance}) { | |
| 244 croak "Error: ${ClassName}->GenerateTopologicalAtomTripletsFingerprints: No fingerpritns generated: MinDistance, $This->{MinDistance}, must be <= MaxDistance, $This->{MaxDistance}..."; | |
| 245 } | |
| 246 | |
| 247 # Cache appropriate molecule data... | |
| 248 $This->_SetupMoleculeDataCache(); | |
| 249 | |
| 250 # Generate distance matrix... | |
| 251 if (!$This->_SetupDistanceMatrix()) { | |
| 252 carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't generate distance matrix..."; | |
| 253 return $This; | |
| 254 } | |
| 255 | |
| 256 # Assign atom types to all heavy atoms... | |
| 257 if (!$This->_AssignAtomTypes()) { | |
| 258 carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't assign valid $This->{AtomIdentifierType} to all atoms..."; | |
| 259 return $This; | |
| 260 } | |
| 261 | |
| 262 # Intialize values of toplogical atom triplets... | |
| 263 $This->_InitializeToplogicalAtomTriplets(); | |
| 264 | |
| 265 # Count atom triplets... | |
| 266 $This->_GenerateAndCountAtomTriplets(); | |
| 267 | |
| 268 # Set final fingerprints... | |
| 269 $This->_SetFinalFingerprints(); | |
| 270 | |
| 271 # Clear cached molecule data... | |
| 272 $This->_ClearMoleculeDataCache(); | |
| 273 | |
| 274 return $This; | |
| 275 } | |
| 276 | |
| 277 # Setup distance matrix... | |
| 278 # | |
| 279 sub _SetupDistanceMatrix { | |
| 280 my($This) = @_; | |
| 281 | |
| 282 $This->{DistanceMatrix} = $This->GetMolecule()->GetDistanceMatrix(); | |
| 283 | |
| 284 if (!$This->{DistanceMatrix}) { | |
| 285 return undef; | |
| 286 } | |
| 287 | |
| 288 return $This; | |
| 289 } | |
| 290 | |
| 291 # Assign appropriate atom types to all heavy atoms... | |
| 292 # | |
| 293 sub _AssignAtomTypes { | |
| 294 my($This) = @_; | |
| 295 my($SpecifiedAtomTypes, $Atom, $AtomID, $IgnoreHydrogens); | |
| 296 | |
| 297 %{$This->{AssignedAtomTypes}} = (); | |
| 298 $IgnoreHydrogens = 1; | |
| 299 | |
| 300 $SpecifiedAtomTypes = undef; | |
| 301 | |
| 302 IDENTIFIERTYPE: { | |
| 303 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { | |
| 304 $SpecifiedAtomTypes = new AtomTypes::AtomicInvariantsAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'AtomicInvariantsToUse' => $This->{AtomicInvariantsToUse}); | |
| 305 last IDENTIFIERTYPE; | |
| 306 } | |
| 307 | |
| 308 if ($This->{AtomIdentifierType} =~ /^DREIDINGAtomTypes$/i) { | |
| 309 $SpecifiedAtomTypes = new AtomTypes::DREIDINGAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
| 310 last IDENTIFIERTYPE; | |
| 311 } | |
| 312 | |
| 313 if ($This->{AtomIdentifierType} =~ /^EStateAtomTypes$/i) { | |
| 314 $SpecifiedAtomTypes = new AtomTypes::EStateAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
| 315 last IDENTIFIERTYPE; | |
| 316 } | |
| 317 | |
| 318 if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { | |
| 319 $SpecifiedAtomTypes = new AtomTypes::FunctionalClassAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'FunctionalClassesToUse' => $This->{FunctionalClassesToUse}); | |
| 320 last IDENTIFIERTYPE; | |
| 321 } | |
| 322 | |
| 323 if ($This->{AtomIdentifierType} =~ /^MMFF94AtomTypes$/i) { | |
| 324 $SpecifiedAtomTypes = new AtomTypes::MMFF94AtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
| 325 last IDENTIFIERTYPE; | |
| 326 } | |
| 327 | |
| 328 if ($This->{AtomIdentifierType} =~ /^SLogPAtomTypes$/i) { | |
| 329 $SpecifiedAtomTypes = new AtomTypes::SLogPAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
| 330 last IDENTIFIERTYPE; | |
| 331 } | |
| 332 if ($This->{AtomIdentifierType} =~ /^SYBYLAtomTypes$/i) { | |
| 333 $SpecifiedAtomTypes = new AtomTypes::SYBYLAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
| 334 last IDENTIFIERTYPE; | |
| 335 } | |
| 336 | |
| 337 if ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) { | |
| 338 $SpecifiedAtomTypes = new AtomTypes::TPSAAtomTypes('Molecule' => $This->{Molecule}, 'IgnorePhosphorus' => 0, 'IgnoreSulfur' => 0); | |
| 339 last IDENTIFIERTYPE; | |
| 340 } | |
| 341 | |
| 342 if ($This->{AtomIdentifierType} =~ /^UFFAtomTypes$/i) { | |
| 343 $SpecifiedAtomTypes = new AtomTypes::UFFAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
| 344 last IDENTIFIERTYPE; | |
| 345 } | |
| 346 | |
| 347 croak "Error: ${ClassName}->_AssignAtomTypes: Unknown atom indentifier type $This->{AtomIdentifierType}..."; | |
| 348 } | |
| 349 | |
| 350 # Assign atom types... | |
| 351 $SpecifiedAtomTypes->AssignAtomTypes(); | |
| 352 | |
| 353 # Make sure atom types assignment is successful... | |
| 354 if (!$SpecifiedAtomTypes->IsAtomTypesAssignmentSuccessful()) { | |
| 355 return undef; | |
| 356 } | |
| 357 | |
| 358 # Collect assigned atom types... | |
| 359 ATOM: for $Atom (@{$This->{Atoms}}) { | |
| 360 if ($Atom->IsHydrogen()) { | |
| 361 next ATOM; | |
| 362 } | |
| 363 $AtomID = $Atom->GetID(); | |
| 364 $This->{AssignedAtomTypes}{$AtomID} = $SpecifiedAtomTypes->GetAtomType($Atom); | |
| 365 } | |
| 366 | |
| 367 return $This; | |
| 368 } | |
| 369 | |
| 370 # Initialize topological atom triplets between specified distance range... | |
| 371 # | |
| 372 sub _InitializeToplogicalAtomTriplets { | |
| 373 my($This) = @_; | |
| 374 my($Distance); | |
| 375 | |
| 376 @{$This->{AtomTripletsIDs}} = (); | |
| 377 %{$This->{AtomTripletsCount}} = (); | |
| 378 | |
| 379 return $This; | |
| 380 } | |
| 381 | |
| 382 # Count atom triplets between mininum and maximum distance at each | |
| 383 # distance using distance matrix and atom types assiged to each heavy | |
| 384 # atom. | |
| 385 # | |
| 386 sub _GenerateAndCountAtomTriplets { | |
| 387 my($This) = @_; | |
| 388 my($NumOfAtoms, $AtomIndex1, $AtomIndex2, $AtomIndex3, $AtomID1, $AtomID2, $AtomID3, $AtomType1, $AtomType2, $AtomType3, $Distance12, $Distance13, $Distance23, $SkipIndexCheck, $DistanceMatrix, $AtomTripletID); | |
| 389 | |
| 390 $NumOfAtoms = @{$This->{Atoms}}; | |
| 391 $DistanceMatrix = $This->{DistanceMatrix}; | |
| 392 $SkipIndexCheck = 0; | |
| 393 | |
| 394 ATOMINDEX1: for $AtomIndex1 (0 .. ($NumOfAtoms - 1)) { | |
| 395 $AtomID1 = $This->{AtomIndexToID}{$AtomIndex1}; | |
| 396 if (!exists($This->{AssignedAtomTypes}{$AtomID1})) { | |
| 397 next ATOMINDEX1; | |
| 398 } | |
| 399 $AtomType1 = $This->{AssignedAtomTypes}{$AtomID1}; | |
| 400 | |
| 401 ATOMINDEX2: for $AtomIndex2 (($AtomIndex1 + 1) .. ($NumOfAtoms - 1)) { | |
| 402 $AtomID2 = $This->{AtomIndexToID}{$AtomIndex2}; | |
| 403 if (!exists($This->{AssignedAtomTypes}{$AtomID2})) { | |
| 404 next ATOMINDEX2; | |
| 405 } | |
| 406 $AtomType2 = $This->{AssignedAtomTypes}{$AtomID2}; | |
| 407 | |
| 408 $Distance12 = $DistanceMatrix->GetValue($AtomIndex1, $AtomIndex2, $SkipIndexCheck); | |
| 409 if ($Distance12 < $This->{MinDistance} || $Distance12 > $This->{MaxDistance}) { | |
| 410 next ATOMINDEX2; | |
| 411 } | |
| 412 | |
| 413 ATOMINDEX3: for $AtomIndex3 (($AtomIndex2 + 1) .. ($NumOfAtoms - 1)) { | |
| 414 $AtomID3 = $This->{AtomIndexToID}{$AtomIndex3}; | |
| 415 if (!exists($This->{AssignedAtomTypes}{$AtomID3})) { | |
| 416 next ATOMINDEX3; | |
| 417 } | |
| 418 $AtomType3 = $This->{AssignedAtomTypes}{$AtomID3}; | |
| 419 | |
| 420 $Distance13 = $DistanceMatrix->GetValue($AtomIndex1, $AtomIndex3, $SkipIndexCheck); | |
| 421 $Distance23 = $DistanceMatrix->GetValue($AtomIndex2, $AtomIndex3, $SkipIndexCheck); | |
| 422 | |
| 423 if ($Distance13 < $This->{MinDistance} || $Distance13 > $This->{MaxDistance}) { | |
| 424 next ATOMINDEX3; | |
| 425 } | |
| 426 if ($Distance23 < $This->{MinDistance} || $Distance23 > $This->{MaxDistance}) { | |
| 427 next ATOMINDEX3; | |
| 428 } | |
| 429 if ($This->{UseTriangleInequality} && !$This->_DoDistancesSatisfyTriangleInequality($Distance12, $Distance13, $Distance23)) { | |
| 430 next ATOMINDEX3; | |
| 431 } | |
| 432 | |
| 433 $AtomTripletID = $This->_GetAtomTripletID($AtomType1, $Distance23, $AtomType2, $Distance13, $AtomType3, $Distance12); | |
| 434 if (!exists $This->{AtomTripletsCount}{$AtomTripletID}) { | |
| 435 $This->{AtomTripletsCount}{$AtomTripletID} = 0; | |
| 436 } | |
| 437 $This->{AtomTripletsCount}{$AtomTripletID} += 1; | |
| 438 } | |
| 439 } | |
| 440 } | |
| 441 return $This; | |
| 442 } | |
| 443 | |
| 444 # Check triangle inequality... | |
| 445 # | |
| 446 sub _DoDistancesSatisfyTriangleInequality { | |
| 447 my($This, $Distance1, $Distance2, $Distance3) = @_; | |
| 448 | |
| 449 if ( !($Distance1 > abs($Distance2 - $Distance3) && $Distance1 < ($Distance2 + $Distance3)) ) { | |
| 450 return 0; | |
| 451 } | |
| 452 if ( !($Distance2 > abs($Distance1 - $Distance3) && $Distance2 < ($Distance1 + $Distance3)) ) { | |
| 453 return 0; | |
| 454 } | |
| 455 if ( !($Distance3 > abs($Distance1 - $Distance2) && $Distance3 < ($Distance1 + $Distance2)) ) { | |
| 456 return 0; | |
| 457 } | |
| 458 return 1; | |
| 459 } | |
| 460 | |
| 461 # Get atom triplet ID corresponding to atom types and distances corresponding to atom triplet... | |
| 462 # | |
| 463 sub _GetAtomTripletID { | |
| 464 my($This, $ATx, $Dyz, $ATy, $Dxz, $ATz, $Dxy) = @_; | |
| 465 my($AtomTripletID, @AtomIDs); | |
| 466 | |
| 467 @AtomIDs = (); | |
| 468 | |
| 469 @AtomIDs = sort("${ATx}-D${Dyz}", "${ATy}-D${Dxz}", "${ATz}-D${Dxy}"); | |
| 470 $AtomTripletID = join "-", @AtomIDs; | |
| 471 | |
| 472 return $AtomTripletID; | |
| 473 } | |
| 474 | |
| 475 # Set final fingerpritns vector... | |
| 476 # | |
| 477 sub _SetFinalFingerprints { | |
| 478 my($This) = @_; | |
| 479 my($AtomTripletID, $Value, @Values); | |
| 480 | |
| 481 # Mark successful generation of fingerprints... | |
| 482 $This->{FingerprintsGenerated} = 1; | |
| 483 | |
| 484 @Values = (); | |
| 485 @{$This->{AtomTripletsIDs}} = (); | |
| 486 | |
| 487 for $AtomTripletID (sort keys %{$This->{AtomTripletsCount}}) { | |
| 488 push @{$This->{AtomTripletsIDs}}, $AtomTripletID; | |
| 489 $Value = $This->{AtomTripletsCount}{$AtomTripletID}; | |
| 490 push @Values, $Value; | |
| 491 } | |
| 492 | |
| 493 # Add AtomTripletsIDs and values to fingerprint vector... | |
| 494 $This->{FingerprintsVector}->AddValueIDs(\@{$This->{AtomTripletsIDs}}); | |
| 495 $This->{FingerprintsVector}->AddValues(\@Values); | |
| 496 | |
| 497 return $This; | |
| 498 } | |
| 499 | |
| 500 # Get atom triplet IDs corresponding to atom triplets count values in fingerprint | |
| 501 # vector as an array or reference to an array... | |
| 502 # | |
| 503 # AtomTripletIDs list differes in molecules and is generated during finalization | |
| 504 # of fingerprints to make sure the fingerprint vector containing count values | |
| 505 # matches the atom triplets array. | |
| 506 # | |
| 507 sub GetAtomTripletIDs { | |
| 508 my($This) = @_; | |
| 509 | |
| 510 return wantarray ? @{$This->{AtomTripletsIDs}} : \@{$This->{AtomTripletsIDs}}; | |
| 511 } | |
| 512 | |
| 513 # Cache appropriate molecule data... | |
| 514 # | |
| 515 sub _SetupMoleculeDataCache { | |
| 516 my($This) = @_; | |
| 517 | |
| 518 # Get all atoms including hydrogens to correctly map atom indices to atom IDs for | |
| 519 # usage of distance matrix. The hydrogen atoms are ignored during processing... | |
| 520 # | |
| 521 @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms(); | |
| 522 | |
| 523 # Get all atom IDs... | |
| 524 my(@AtomIDs); | |
| 525 @AtomIDs = (); | |
| 526 @AtomIDs = map { $_->GetID() } @{$This->{Atoms}}; | |
| 527 | |
| 528 # Set AtomIndex to AtomID hash... | |
| 529 %{$This->{AtomIndexToID}} = (); | |
| 530 @{$This->{AtomIndexToID}}{ (0 .. $#AtomIDs) } = @AtomIDs; | |
| 531 | |
| 532 return $This; | |
| 533 } | |
| 534 | |
| 535 # Set atomic invariants to use for atom identifiers... | |
| 536 # | |
| 537 sub SetAtomicInvariantsToUse { | |
| 538 my($This, @Values) = @_; | |
| 539 my($FirstValue, $TypeOfFirstValue, $AtomicInvariant, $SpecifiedAtomicInvariant, $AtomicInvariantValue, @SpecifiedAtomicInvariants, @AtomicInvariantsToUse); | |
| 540 | |
| 541 if (!@Values) { | |
| 542 carp "Warning: ${ClassName}->SetAtomicInvariantsToUse: No values specified..."; | |
| 543 return; | |
| 544 } | |
| 545 | |
| 546 $FirstValue = $Values[0]; | |
| 547 $TypeOfFirstValue = ref $FirstValue; | |
| 548 | |
| 549 @SpecifiedAtomicInvariants = (); | |
| 550 @AtomicInvariantsToUse = (); | |
| 551 | |
| 552 if ($TypeOfFirstValue =~ /^ARRAY/) { | |
| 553 push @SpecifiedAtomicInvariants, @{$FirstValue}; | |
| 554 } | |
| 555 else { | |
| 556 push @SpecifiedAtomicInvariants, @Values; | |
| 557 } | |
| 558 | |
| 559 # Make sure specified AtomicInvariants are valid... | |
| 560 for $SpecifiedAtomicInvariant (@SpecifiedAtomicInvariants) { | |
| 561 if (!AtomTypes::AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($SpecifiedAtomicInvariant)) { | |
| 562 croak "Error: ${ClassName}->SetAtomicInvariantsToUse: Specified atomic invariant, $SpecifiedAtomicInvariant, is not supported...\n "; | |
| 563 } | |
| 564 $AtomicInvariant = $SpecifiedAtomicInvariant; | |
| 565 push @AtomicInvariantsToUse, $AtomicInvariant; | |
| 566 } | |
| 567 | |
| 568 # Set atomic invariants to use... | |
| 569 @{$This->{AtomicInvariantsToUse}} = (); | |
| 570 push @{$This->{AtomicInvariantsToUse}}, @AtomicInvariantsToUse; | |
| 571 | |
| 572 return $This; | |
| 573 } | |
| 574 | |
| 575 # Set functional classes to use for atom identifiers... | |
| 576 # | |
| 577 sub SetFunctionalClassesToUse { | |
| 578 my($This, @Values) = @_; | |
| 579 my($FirstValue, $TypeOfFirstValue, $FunctionalClass, $SpecifiedFunctionalClass, @SpecifiedFunctionalClasses, @FunctionalClassesToUse); | |
| 580 | |
| 581 if (!@Values) { | |
| 582 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: No values specified..."; | |
| 583 return; | |
| 584 } | |
| 585 | |
| 586 if ($This->{AtomIdentifierType} !~ /^FunctionalClassAtomTypes$/i) { | |
| 587 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: FunctionalClassesToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}..."; | |
| 588 return; | |
| 589 } | |
| 590 | |
| 591 $FirstValue = $Values[0]; | |
| 592 $TypeOfFirstValue = ref $FirstValue; | |
| 593 | |
| 594 @SpecifiedFunctionalClasses = (); | |
| 595 @FunctionalClassesToUse = (); | |
| 596 | |
| 597 if ($TypeOfFirstValue =~ /^ARRAY/) { | |
| 598 push @SpecifiedFunctionalClasses, @{$FirstValue}; | |
| 599 } | |
| 600 else { | |
| 601 push @SpecifiedFunctionalClasses, @Values; | |
| 602 } | |
| 603 | |
| 604 # Make sure specified FunctionalClasses are valid... | |
| 605 for $SpecifiedFunctionalClass (@SpecifiedFunctionalClasses) { | |
| 606 if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedFunctionalClass)) { | |
| 607 croak "Error: ${ClassName}->SetFunctionalClassesToUse: Specified functional class, $SpecifiedFunctionalClass, is not supported...\n "; | |
| 608 } | |
| 609 push @FunctionalClassesToUse, $SpecifiedFunctionalClass; | |
| 610 } | |
| 611 | |
| 612 # Set functional classes to use... | |
| 613 @{$This->{FunctionalClassesToUse}} = (); | |
| 614 push @{$This->{FunctionalClassesToUse}}, @FunctionalClassesToUse; | |
| 615 | |
| 616 return $This; | |
| 617 } | |
| 618 | |
| 619 # Initialize atom indentifier type information... | |
| 620 # | |
| 621 # Current supported values: | |
| 622 # | |
| 623 # AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, | |
| 624 # MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes | |
| 625 # | |
| 626 sub _InitializeAtomIdentifierTypeInformation { | |
| 627 my($This) = @_; | |
| 628 | |
| 629 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { | |
| 630 $This->_InitializeAtomicInvariantsAtomTypesInformation(); | |
| 631 } | |
| 632 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { | |
| 633 $This->_InitializeFunctionalClassAtomTypesInformation(); | |
| 634 } | |
| 635 elsif ($This->{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { | |
| 636 # Nothing to do for now... | |
| 637 } | |
| 638 else { | |
| 639 croak "Error: ${ClassName}->_InitializeAtomIdentifierTypeInformation: Unknown atom indentifier type $This->{AtomIdentifierType}..."; | |
| 640 } | |
| 641 | |
| 642 return $This; | |
| 643 } | |
| 644 | |
| 645 # Initialize atomic invariants atom types to use for generating atom IDs in atom triplets... | |
| 646 # | |
| 647 # Let: | |
| 648 # AS = Atom symbol corresponding to element symbol | |
| 649 # | |
| 650 # X<n> = Number of non-hydrogen atom neighbors or heavy atoms attached to atom | |
| 651 # BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms attached to atom | |
| 652 # LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms attached to atom | |
| 653 # SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms attached to atom | |
| 654 # DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms attached to atom | |
| 655 # TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms attached to atom | |
| 656 # H<n> = Number of implicit and explicit hydrogens for atom | |
| 657 # Ar = Aromatic annotation indicating whether atom is aromatic | |
| 658 # RA = Ring atom annotation indicating whether atom is a ring | |
| 659 # FC<+n/-n> = Formal charge assigned to atom | |
| 660 # MN<n> = Mass number indicating isotope other than most abundant isotope | |
| 661 # SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or 3 (triplet) | |
| 662 # | |
| 663 # ATx = Atomic invariants atom type for atom x | |
| 664 # ATy = Atomic invariants atom type for atom y | |
| 665 # ATz = Atomic invariants atom type for atom z | |
| 666 # | |
| 667 # Dxy = Distance between Px and Py | |
| 668 # Dxz = Distance between Px and Pz | |
| 669 # Dyz = Distance between Py and Pz | |
| 670 # | |
| 671 # Then: | |
| 672 # | |
| 673 # Atom triplet AtomID generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to: | |
| 674 # | |
| 675 # AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n> | |
| 676 # | |
| 677 # Toplogical atom triplet ID between atom IDs ATx, ATy and ATz corresponds to: | |
| 678 # | |
| 679 # ATx-Dyz-ATy-Dxz-ATz-Dxy | |
| 680 # | |
| 681 # Except for AS which is a required atomic invariant in atom triplet AtomIDs, all other atomic invariants are | |
| 682 # optional. Default atomic invariants used for AtomID are: AS, X<n>, BO<n>, H<n>, FC<+n/-n>. | |
| 683 # AtomID specification doesn't include atomic invariants with zero or undefined values. | |
| 684 # | |
| 685 # Examples of atom triplet AtomIDs: | |
| 686 # | |
| 687 # O.X1.BO1.H1 - Hydroxyl oxygen in carboxylate with attached hydrogen and no explicit charge | |
| 688 # O.X1.BO1.FC-1 - Hydroxyl ozygen in carboxylate with explicit negative charge | |
| 689 # O.X1.BO2 - Carbonyl oxygen in carboxylate with double bond to carbon | |
| 690 # O.X2.BO2 - Hydroxyl ozygen in carboxylate attached to carbonyl carbon and another heavy atom | |
| 691 # | |
| 692 # C.X2.BO3.H1.Ar - Aromatic carbon | |
| 693 # | |
| 694 sub _InitializeAtomicInvariantsAtomTypesInformation { | |
| 695 my($This) = @_; | |
| 696 | |
| 697 # Default atomic invariants to use for generating atom triplet atom IDs: AS, X, BO, H, FC | |
| 698 # | |
| 699 @{$This->{AtomicInvariantsToUse}} = (); | |
| 700 @{$This->{AtomicInvariantsToUse}} = ('AS', 'X', 'BO', 'H', 'FC'); | |
| 701 | |
| 702 return $This; | |
| 703 } | |
| 704 | |
| 705 # Initialize functional class atom types, generated by AtomTypes::FunctionalClassAtomTypes | |
| 706 # class, to use for generating atom identifiers... | |
| 707 # | |
| 708 # Let: | |
| 709 # HBD: HydrogenBondDonor | |
| 710 # HBA: HydrogenBondAcceptor | |
| 711 # PI : PositivelyIonizable | |
| 712 # NI : NegativelyIonizable | |
| 713 # Ar : Aromatic | |
| 714 # Hal : Halogen | |
| 715 # H : Hydrophobic | |
| 716 # RA : RingAtom | |
| 717 # CA : ChainAtom | |
| 718 # | |
| 719 # Then: | |
| 720 # | |
| 721 # Functiononal class atom type specification for an atom corresponds to: | |
| 722 # | |
| 723 # Ar.CA.H.HBA.HBD.Hal.NI.PI.RA | |
| 724 # | |
| 725 # Default functional classes used are: HBD, HBA, PI, NI, Ar, Hal | |
| 726 # | |
| 727 # FunctionalAtomTypes are assigned using the following definitions [ Ref 60-61, Ref 65-66 ]: | |
| 728 # | |
| 729 # HydrogenBondDonor: NH, NH2, OH | |
| 730 # HydrogenBondAcceptor: N[!H], O | |
| 731 # PositivelyIonizable: +, NH2 | |
| 732 # NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH | |
| 733 # | |
| 734 sub _InitializeFunctionalClassAtomTypesInformation { | |
| 735 my($This) = @_; | |
| 736 | |
| 737 # Default functional class atom typess to use for generating atom identifiers | |
| 738 # are: HBD, HBA, PI, NI, Ar, Hal | |
| 739 # | |
| 740 @{$This->{FunctionalClassesToUse}} = (); | |
| 741 @{$This->{FunctionalClassesToUse}} = ('HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal'); | |
| 742 | |
| 743 return $This; | |
| 744 } | |
| 745 | |
| 746 # Clear cached molecule data... | |
| 747 # | |
| 748 sub _ClearMoleculeDataCache { | |
| 749 my($This) = @_; | |
| 750 | |
| 751 @{$This->{Atoms}} = (); | |
| 752 | |
| 753 return $This; | |
| 754 } | |
| 755 | |
| 756 # Return a string containg data for TopologicalAtomTripletsFingerprints object... | |
| 757 # | |
| 758 sub StringifyTopologicalAtomTripletsFingerprints { | |
| 759 my($This) = @_; | |
| 760 my($FingerprintsString); | |
| 761 | |
| 762 # Type of fingerprint... | |
| 763 $FingerprintsString = "Fingerprint type: $This->{Type}; AtomIdentifierType: $This->{AtomIdentifierType}"; | |
| 764 | |
| 765 # Min and max distance... | |
| 766 $FingerprintsString .= "; MinDistance: $This->{MinDistance}; MaxDistance: $This->{MaxDistance}; UseTriangleInequality: " . ($This->{UseTriangleInequality} ? "Yes" : "No"); | |
| 767 | |
| 768 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { | |
| 769 my($AtomicInvariant, @AtomicInvariants, @AtomicInvariantsOrder, %AvailableAtomicInvariants); | |
| 770 | |
| 771 @AtomicInvariantsOrder = AtomTypes::AtomicInvariantsAtomTypes::GetAtomicInvariantsOrder(); | |
| 772 %AvailableAtomicInvariants = AtomTypes::AtomicInvariantsAtomTypes::GetAvailableAtomicInvariants(); | |
| 773 | |
| 774 for $AtomicInvariant (@AtomicInvariantsOrder) { | |
| 775 push @AtomicInvariants, "$AtomicInvariant: $AvailableAtomicInvariants{$AtomicInvariant}"; | |
| 776 } | |
| 777 | |
| 778 $FingerprintsString .= "; AtomicInvariantsToUse: <" . TextUtil::JoinWords(\@{$This->{AtomicInvariantsToUse}}, ", ", 0) . ">"; | |
| 779 $FingerprintsString .= "; AtomicInvariantsOrder: <" . TextUtil::JoinWords(\@AtomicInvariantsOrder, ", ", 0) . ">"; | |
| 780 $FingerprintsString .= "; AvailableAtomicInvariants: <" . TextUtil::JoinWords(\@AtomicInvariants, ", ", 0) . ">"; | |
| 781 } | |
| 782 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { | |
| 783 my($FunctionalClass, @FunctionalClasses, @FunctionalClassesOrder, %AvailableFunctionalClasses); | |
| 784 | |
| 785 @FunctionalClassesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder(); | |
| 786 %AvailableFunctionalClasses = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses(); | |
| 787 | |
| 788 for $FunctionalClass (@FunctionalClassesOrder) { | |
| 789 push @FunctionalClasses, "$FunctionalClass: $AvailableFunctionalClasses{$FunctionalClass}"; | |
| 790 } | |
| 791 | |
| 792 $FingerprintsString .= "; FunctionalClassesToUse: <" . TextUtil::JoinWords(\@{$This->{FunctionalClassesToUse}}, ", ", 0) . ">"; | |
| 793 $FingerprintsString .= "; FunctionalClassesOrder: <" . TextUtil::JoinWords(\@FunctionalClassesOrder, ", ", 0) . ">"; | |
| 794 $FingerprintsString .= "; AvailableFunctionalClasses: <" . TextUtil::JoinWords(\@FunctionalClasses, ", ", 0) . ">"; | |
| 795 } | |
| 796 | |
| 797 # Total number of atom triplets... | |
| 798 $FingerprintsString .= "; NumOfAtomTriplets: " . $This->{FingerprintsVector}->GetNumOfValues(); | |
| 799 | |
| 800 # FingerprintsVector... | |
| 801 $FingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >"; | |
| 802 | |
| 803 return $FingerprintsString; | |
| 804 } | |
| 805 | |
| 806 1; | |
| 807 | |
| 808 __END__ | |
| 809 | |
| 810 =head1 NAME | |
| 811 | |
| 812 TopologicalAtomTripletsFingerprints | |
| 813 | |
| 814 =head1 SYNOPSIS | |
| 815 | |
| 816 use Fingerprints::TopologicalAtomTripletsFingerprints; | |
| 817 | |
| 818 use Fingerprints::TopologicalAtomTripletsFingerprints qw(:all); | |
| 819 | |
| 820 =head1 DESCRIPTION | |
| 821 | |
| 822 B<TopologicalAtomTripletsFingerprints> [ Ref 57, Ref 59, Ref 72 ] class provides the following methods: | |
| 823 | |
| 824 new, GenerateFingerprints, GetAtomTripletIDs, GetDescription, | |
| 825 SetAtomIdentifierType, SetAtomicInvariantsToUse, SetFunctionalClassesToUse, | |
| 826 SetMaxDistance, SetMinDistance, StringifyTopologicalAtomTripletsFingerprints | |
| 827 | |
| 828 B<TopologicalAtomTripletsFingerprints> is derived from B<Fingerprints> class which in turn | |
| 829 is derived from B<ObjectProperty> base class that provides methods not explicitly defined | |
| 830 in B<TopologicalAtomTripletsFingerprints>, B<Fingerprints> or B<ObjectProperty> classes using Perl's | |
| 831 AUTOLOAD functionality. These methods are generated on-the-fly for a specified object property: | |
| 832 | |
| 833 Set<PropertyName>(<PropertyValue>); | |
| 834 $PropertyValue = Get<PropertyName>(); | |
| 835 Delete<PropertyName>(); | |
| 836 | |
| 837 The current release of MayaChemTools supports generation of B<TopologicalAtomTripletsFingerprints> | |
| 838 corresponding to following B<AtomtomIdentifierTypes>: | |
| 839 | |
| 840 AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, | |
| 841 FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, | |
| 842 SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes | |
| 843 | |
| 844 Based on the values specified for B<AtomIdentifierType> along with other specified | |
| 845 parameters such as B<AtomicInvariantsToUse> and B<FunctionalClassesToUse>, initial | |
| 846 atom types are assigned to all non-hydrogen atoms in a molecule. Using the distance | |
| 847 matrix for the molecule and initial atom types assigned to non-hydrogen atoms, all unique atom | |
| 848 triplets within B<MinDistance> and B<MaxDistance> are identified and counted. An atom triplet | |
| 849 identifier is generated for each unique atom triplet; the format of atom triplet identifier is: | |
| 850 | |
| 851 <ATx>-Dyz-<ATy>-Dxz-<ATz>-Dxy | |
| 852 | |
| 853 ATx, ATy, ATz: Atom types assigned to atom x, atom y, and atom z | |
| 854 Dxy: Distance between atom x and atom y | |
| 855 Dxz: Distance between atom x and atom z | |
| 856 Dyz: Distance between atom y and atom z | |
| 857 | |
| 858 where <AT1>-D23 <= <AT2>-D13 <= <AT3>-D12 | |
| 859 | |
| 860 The atom triplet identifiers for all unique atom triplets corresponding to non-hydrogen atoms constitute | |
| 861 topological atom triplets fingerprints of the molecule. | |
| 862 | |
| 863 The current release of MayaChemTools generates the following types of topological atom triplets | |
| 864 fingerprints vector strings: | |
| 865 | |
| 866 FingerprintsVector;TopologicalAtomTriplets:AtomicInvariantsAtomTypes:M | |
| 867 inDistance1:MaxDistance10;3096;NumericalValues;IDsAndValuesString;C.X1 | |
| 868 .BO1.H3-D1-C.X1.BO1.H3-D1-C.X3.BO3.H1-D2 C.X1.BO1.H3-D1-C.X2.BO2.H2-D1 | |
| 869 0-C.X3.BO4-D9 C.X1.BO1.H3-D1-C.X2.BO2.H2-D3-N.X3.BO3-D4 C.X1.BO1.H3-D1 | |
| 870 -C.X2.BO2.H2-D4-C.X2.BO2.H2-D5 C.X1.BO1.H3-D1-C.X2.BO2.H2-D6-C.X3....; | |
| 871 1 2 2 2 2 2 2 2 8 8 4 8 4 4 2 2 2 2 4 2 2 2 4 2 2 2 2 1 2 2 4 4 4 2 2 | |
| 872 2 4 4 4 8 4 4 2 4 4 4 2 4 4 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 8... | |
| 873 | |
| 874 FingerprintsVector;TopologicalAtomTriplets:AtomicInvariantsAtomTypes:M | |
| 875 inDistance1:MaxDistance10;3096;NumericalValues;IDsAndValuesPairsString | |
| 876 ;C.X1.BO1.H3-D1-C.X1.BO1.H3-D1-C.X3.BO3.H1-D2 1 C.X1.BO1.H3-D1-C.X2.BO | |
| 877 2.H2-D10-C.X3.BO4-D9 2 C.X1.BO1.H3-D1-C.X2.BO2.H2-D3-N.X3.BO3-D4 2 C.X | |
| 878 1.BO1.H3-D1-C.X2.BO2.H2-D4-C.X2.BO2.H2-D5 2 C.X1.BO1.H3-D1-C.X2.BO2.H2 | |
| 879 -D6-C.X3.BO3.H1-D5 2 C.X1.BO1.H3-D1-C.X2.BO2.H2-D6-C.X3.BO3.H1-D7 2... | |
| 880 | |
| 881 FingerprintsVector;TopologicalAtomTriplets:DREIDINGAtomTypes:MinDistan | |
| 882 ce1:MaxDistance10;2377;NumericalValues;IDsAndValuesString;C_2-D1-C_2-D | |
| 883 9-C_3-D10 C_2-D1-C_2-D9-C_R-D10 C_2-D1-C_3-D1-C_3-D2 C_2-D1-C_3-D10-C_ | |
| 884 3-D9 C_2-D1-C_3-D2-C_3-D3 C_2-D1-C_3-D2-C_R-D3 C_2-D1-C_3-D3-C_3-D4 C_ | |
| 885 2-D1-C_3-D3-N_R-D4 C_2-D1-C_3-D3-O_3-D2 C_2-D1-C_3-D4-C_3-D5 C_2-D...; | |
| 886 1 1 1 2 1 1 3 1 1 2 2 1 1 1 1 1 1 1 1 2 1 3 4 5 1 1 6 4 2 2 3 1 1 1 2 | |
| 887 2 1 2 1 1 2 2 2 1 2 1 2 1 1 3 3 2 6 4 2 1 1 1 2 2 1 1 1 1 1 1 1 1 1... | |
| 888 | |
| 889 FingerprintsVector;TopologicalAtomTriplets:EStateAtomTypes:MinDistance | |
| 890 1:MaxDistance10;3298;NumericalValues;IDsAndValuesString;aaCH-D1-aaCH-D | |
| 891 1-aaCH-D2 aaCH-D1-aaCH-D1-aasC-D2 aaCH-D1-aaCH-D10-aaCH-D9 aaCH-D1-aaC | |
| 892 H-D10-aasC-D9 aaCH-D1-aaCH-D2-aaCH-D3 aaCH-D1-aaCH-D2-aasC-D1 aaCH-D1- | |
| 893 aaCH-D2-aasC-D3 aaCH-D1-aaCH-D3-aasC-D2 aaCH-D1-aaCH-D4-aasC-D5 aa...; | |
| 894 6 4 24 4 16 8 8 4 8 8 8 12 10 14 4 16 24 4 12 2 2 4 1 10 2 2 15 2 2 2 | |
| 895 2 2 2 14 4 2 2 2 2 1 2 10 2 2 4 1 2 4 8 3 3 3 4 6 4 2 2 3 3 1 1 1 2 1 | |
| 896 2 2 4 2 3 2 1 2 4 5 3 2 2 1 2 4 3 2 8 12 6 2 2 4 4 7 1 4 2 4 2 2 2 ... | |
| 897 | |
| 898 FingerprintsVector;TopologicalAtomTriplets:FunctionalClassAtomTypes:Mi | |
| 899 nDistance1:MaxDistance10;2182;NumericalValues;IDsAndValuesString;Ar-D1 | |
| 900 -Ar-D1-Ar-D2 Ar-D1-Ar-D1-Ar.HBA-D2 Ar-D1-Ar-D10-Ar-D9 Ar-D1-Ar-D10-Hal | |
| 901 -D9 Ar-D1-Ar-D2-Ar-D2 Ar-D1-Ar-D2-Ar-D3 Ar-D1-Ar-D2-Ar.HBA-D1 Ar-D1-Ar | |
| 902 -D2-Ar.HBA-D2 Ar-D1-Ar-D2-Ar.HBA-D3 Ar-D1-Ar-D2-HBD-D1 Ar-D1-Ar-D2...; | |
| 903 27 1 32 2 2 63 3 2 1 2 1 2 3 1 1 40 3 1 2 2 2 2 4 2 2 47 4 2 2 1 2 1 5 | |
| 904 2 2 51 4 3 1 3 1 9 1 1 50 3 3 4 1 9 50 2 2 3 3 5 45 1 1 1 2 1 2 2 3 3 | |
| 905 4 4 3 2 1 1 3 4 5 5 3 1 2 3 2 3 5 7 2 7 3 7 1 1 2 2 2 2 3 1 4 3 1 2... | |
| 906 | |
| 907 FingerprintsVector;TopologicalAtomTriplets:MMFF94AtomTypes:MinDistance | |
| 908 1:MaxDistance10;2966;NumericalValues;IDsAndValuesString;C5A-D1-C5A-D1- | |
| 909 N5-D2 C5A-D1-C5A-D2-C5B-D2 C5A-D1-C5A-D3-CB-D2 C5A-D1-C5A-D3-CR-D2 C5A | |
| 910 -D1-C5B-D1-C5B-D2 C5A-D1-C5B-D2-C=ON-D1 C5A-D1-C5B-D2-CB-D1 C5A-D1-C5B | |
| 911 -D3-C=ON-D2 C5A-D1-C5B-D3-CB-D2 C5A-D1-C=ON-D3-NC=O-D2 C5A-D1-C=ON-D3- | |
| 912 O=CN-D2 C5A-D1-C=ON-D4-NC=O-D3 C5A-D1-C=ON-D4-O=CN-D3 C5A-D1-CB-D1-... | |
| 913 | |
| 914 FingerprintsVector;TopologicalAtomTriplets:SLogPAtomTypes:MinDistance1 | |
| 915 :MaxDistance10;3710;NumericalValues;IDsAndValuesString;C1-D1-C1-D1-C11 | |
| 916 -D2 C1-D1-C1-D1-CS-D2 C1-D1-C1-D10-C5-D9 C1-D1-C1-D3-C10-D2 C1-D1-C1-D | |
| 917 3-C5-D2 C1-D1-C1-D3-CS-D2 C1-D1-C1-D3-CS-D4 C1-D1-C1-D4-C10-D5 C1-D1-C | |
| 918 1-D4-C11-D5 C1-D1-C1-D5-C10-D4 C1-D1-C1-D5-C5-D4 C1-D1-C1-D6-C11-D7 C1 | |
| 919 -D1-C1-D6-CS-D5 C1-D1-C1-D6-CS-D7 C1-D1-C1-D8-C11-D9 C1-D1-C1-D8-CS... | |
| 920 | |
| 921 FingerprintsVector;TopologicalAtomTriplets:SYBYLAtomTypes:MinDistance1 | |
| 922 :MaxDistance10;2332;NumericalValues;IDsAndValuesString;C.2-D1-C.2-D9-C | |
| 923 .3-D10 C.2-D1-C.2-D9-C.ar-D10 C.2-D1-C.3-D1-C.3-D2 C.2-D1-C.3-D10-C.3- | |
| 924 D9 C.2-D1-C.3-D2-C.3-D3 C.2-D1-C.3-D2-C.ar-D3 C.2-D1-C.3-D3-C.3-D4 C.2 | |
| 925 -D1-C.3-D3-N.ar-D4 C.2-D1-C.3-D3-O.3-D2 C.2-D1-C.3-D4-C.3-D5 C.2-D1-C. | |
| 926 3-D5-C.3-D6 C.2-D1-C.3-D5-O.3-D4 C.2-D1-C.3-D6-C.3-D7 C.2-D1-C.3-D7... | |
| 927 | |
| 928 FingerprintsVector;TopologicalAtomTriplets:TPSAAtomTypes:MinDistance1: | |
| 929 MaxDistance10;1007;NumericalValues;IDsAndValuesString;N21-D1-N7-D3-Non | |
| 930 e-D4 N21-D1-N7-D5-None-D4 N21-D1-None-D1-None-D2 N21-D1-None-D2-None-D | |
| 931 2 N21-D1-None-D2-None-D3 N21-D1-None-D3-None-D4 N21-D1-None-D4-None-D5 | |
| 932 N21-D1-None-D4-O3-D3 N21-D1-None-D4-O4-D3 N21-D1-None-D5-None-D6 N21- | |
| 933 D1-None-D6-None-D7 N21-D1-None-D6-O4-D5 N21-D1-None-D7-None-D8 N21-... | |
| 934 | |
| 935 FingerprintsVector;TopologicalAtomTriplets:UFFAtomTypes:MinDistance1:M | |
| 936 axDistance10;2377;NumericalValues;IDsAndValuesString;C_2-D1-C_2-D9-C_3 | |
| 937 -D10 C_2-D1-C_2-D9-C_R-D10 C_2-D1-C_3-D1-C_3-D2 C_2-D1-C_3-D10-C_3-D9 | |
| 938 C_2-D1-C_3-D2-C_3-D3 C_2-D1-C_3-D2-C_R-D3 C_2-D1-C_3-D3-C_3-D4 C_2-D1- | |
| 939 C_3-D3-N_R-D4 C_2-D1-C_3-D3-O_3-D2 C_2-D1-C_3-D4-C_3-D5 C_2-D1-C_3-D5- | |
| 940 C_3-D6 C_2-D1-C_3-D5-O_3-D4 C_2-D1-C_3-D6-C_3-D7 C_2-D1-C_3-D7-C_3-... | |
| 941 | |
| 942 =head2 METHODS | |
| 943 | |
| 944 =over 4 | |
| 945 | |
| 946 =item B<new> | |
| 947 | |
| 948 $NewTopologicalAtomTripletsFingerprints = new TopologicalAtomTripletsFingerprints( | |
| 949 %NamesAndValues); | |
| 950 | |
| 951 Using specified I<TopologicalAtomTripletsFingerprints> property names and values hash, B<new> | |
| 952 method creates a new object and returns a reference to newly created B<TopologicalAtomTripletsFingerprints> | |
| 953 object. By default, the following properties are initialized: | |
| 954 | |
| 955 Molecule = '' | |
| 956 Type = 'TopologicalAtomTriplets' | |
| 957 MinDistance = 1 | |
| 958 MaxDistance = 10 | |
| 959 UseTriangleInequality = 1 | |
| 960 AtomIdentifierType = '' | |
| 961 AtomicInvariantsToUse = ['AS', 'X', 'BO', 'H', 'FC'] | |
| 962 FunctionalClassesToUse = ['HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal'] | |
| 963 | |
| 964 Examples: | |
| 965 | |
| 966 $TopologicalAtomTripletsFingerprints = new TopologicalAtomTripletsFingerprints( | |
| 967 'Molecule' => $Molecule, | |
| 968 'AtomIdentifierType' => | |
| 969 'AtomicInvariantsAtomTypes'); | |
| 970 | |
| 971 $TopologicalAtomTripletsFingerprints = new TopologicalAtomTripletsFingerprints( | |
| 972 'Molecule' => $Molecule, | |
| 973 'MinDistance' => 1, | |
| 974 'MaxDistance' => 10, | |
| 975 'AtomIdentifierType' => | |
| 976 'AtomicInvariantsAtomTypes', | |
| 977 'AtomicInvariantsToUse' => | |
| 978 ['AS', 'X', 'BO', 'H', 'FC'] ); | |
| 979 | |
| 980 $TopologicalAtomTripletsFingerprints = new TopologicalAtomTripletsFingerprints( | |
| 981 'Molecule' => $Molecule, | |
| 982 'AtomIdentifierType' => | |
| 983 'DREIDINGAtomTypes'); | |
| 984 | |
| 985 $TopologicalAtomTripletsFingerprints = new TopologicalAtomTripletsFingerprints( | |
| 986 'Molecule' => $Molecule, | |
| 987 'AtomIdentifierType' => | |
| 988 'MMFF94AtomTypes'); | |
| 989 | |
| 990 $TopologicalAtomTripletsFingerprints = new TopologicalAtomTripletsFingerprints( | |
| 991 'Molecule' => $Molecule, | |
| 992 'AtomIdentifierType' => | |
| 993 'TPSAAtomTypes'); | |
| 994 | |
| 995 $TopologicalAtomTripletsFingerprints = new TopologicalAtomTripletsFingerprints( | |
| 996 'Molecule' => $Molecule, | |
| 997 'MinDistance' => 1, | |
| 998 'MaxDistance' => 10, | |
| 999 'AtomIdentifierType' => | |
| 1000 'FunctionalClassAtomTypes', | |
| 1001 'FunctionalClassesToUse' => | |
| 1002 ['HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal']); | |
| 1003 | |
| 1004 $TopologicalAtomTripletsFingerprints->GenerateFingerprints(); | |
| 1005 print "$TopologicalAtomTripletsFingerprints\n"; | |
| 1006 | |
| 1007 =item B<GetDescription> | |
| 1008 | |
| 1009 $Return = $TopologicalAtomTripletsFingerprints->GetDescription(); | |
| 1010 | |
| 1011 Returns a string containing description of topological atom triplets fingerprints. | |
| 1012 | |
| 1013 =item B<GenerateFingerprints> | |
| 1014 | |
| 1015 $TopologicalAtomTripletsFingerprints->GenerateFingerprints(); | |
| 1016 | |
| 1017 Generates topological atom triplets fingerprints and returns I<TopologicalAtomTripletsFingerprints>. | |
| 1018 | |
| 1019 =item B<GetAtomTripletIDs> | |
| 1020 | |
| 1021 $AtomTripletIDsRef = $TopologicalAtomTripletsFingerprints->GetAtomTripletIDs(); | |
| 1022 @AtomTripletIDs = $TopologicalAtomTripletsFingerprints->GetAtomTripletIDs(); | |
| 1023 | |
| 1024 Returns atom triplet IDs corresponding to atom triplets count values in topological atom triplets | |
| 1025 fingerprints vector as an array or reference to an array. | |
| 1026 | |
| 1027 =item B<SetAtomIdentifierType> | |
| 1028 | |
| 1029 $TopologicalAtomTripletsFingerprints->SetAtomIdentifierType($IdentifierType); | |
| 1030 | |
| 1031 Sets atom I<IdentifierType> to use during atom triplets fingerprints generation and | |
| 1032 returns I<TopologicalAtomTripletsFingerprints>. | |
| 1033 | |
| 1034 Possible values: I<AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, | |
| 1035 FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, | |
| 1036 TPSAAtomTypes, UFFAtomTypes>. | |
| 1037 | |
| 1038 =item B<SetAtomicInvariantsToUse> | |
| 1039 | |
| 1040 $TopologicalAtomTripletsFingerprints->SetAtomicInvariantsToUse($ValuesRef); | |
| 1041 $TopologicalAtomTripletsFingerprints->SetAtomicInvariantsToUse(@Values); | |
| 1042 | |
| 1043 Sets atomic invariants to use during I<AtomicInvariantsAtomTypes> value of I<AtomIdentifierType> | |
| 1044 for topological atom triplets fingerprints generation and returns I<TopologicalAtomTripletsFingerprints>. | |
| 1045 | |
| 1046 Possible values for atomic invariants are: I<AS, X, BO, LBO, SB, DB, TB, | |
| 1047 H, Ar, RA, FC, MN, SM>. Default value: I<AS,X,BO,H,FC>. | |
| 1048 | |
| 1049 The atomic invariants abbreviations correspond to: | |
| 1050 | |
| 1051 AS = Atom symbol corresponding to element symbol | |
| 1052 | |
| 1053 X<n> = Number of non-hydrogen atom neighbors or heavy atoms | |
| 1054 BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms | |
| 1055 LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms | |
| 1056 SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms | |
| 1057 DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms | |
| 1058 TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms | |
| 1059 H<n> = Number of implicit and explicit hydrogens for atom | |
| 1060 Ar = Aromatic annotation indicating whether atom is aromatic | |
| 1061 RA = Ring atom annotation indicating whether atom is a ring | |
| 1062 FC<+n/-n> = Formal charge assigned to atom | |
| 1063 MN<n> = Mass number indicating isotope other than most abundant isotope | |
| 1064 SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or | |
| 1065 3 (triplet) | |
| 1066 | |
| 1067 Atom type generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to: | |
| 1068 | |
| 1069 AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n> | |
| 1070 | |
| 1071 Except for AS which is a required atomic invariant in atom types, all other atomic invariants are | |
| 1072 optional. Atom type specification doesn't include atomic invariants with zero or undefined values. | |
| 1073 | |
| 1074 In addition to usage of abbreviations for specifying atomic invariants, the following descriptive words | |
| 1075 are also allowed: | |
| 1076 | |
| 1077 X : NumOfNonHydrogenAtomNeighbors or NumOfHeavyAtomNeighbors | |
| 1078 BO : SumOfBondOrdersToNonHydrogenAtoms or SumOfBondOrdersToHeavyAtoms | |
| 1079 LBO : LargestBondOrderToNonHydrogenAtoms or LargestBondOrderToHeavyAtoms | |
| 1080 SB : NumOfSingleBondsToNonHydrogenAtoms or NumOfSingleBondsToHeavyAtoms | |
| 1081 DB : NumOfDoubleBondsToNonHydrogenAtoms or NumOfDoubleBondsToHeavyAtoms | |
| 1082 TB : NumOfTripleBondsToNonHydrogenAtoms or NumOfTripleBondsToHeavyAtoms | |
| 1083 H : NumOfImplicitAndExplicitHydrogens | |
| 1084 Ar : Aromatic | |
| 1085 RA : RingAtom | |
| 1086 FC : FormalCharge | |
| 1087 MN : MassNumber | |
| 1088 SM : SpinMultiplicity | |
| 1089 | |
| 1090 I<AtomTypes::AtomicInvariantsAtomTypes> module is used to assign atomic invariant | |
| 1091 atom types. | |
| 1092 | |
| 1093 =item B<SetFunctionalClassesToUse> | |
| 1094 | |
| 1095 $TopologicalTripletsFingerprints->SetFunctionalClassesToUse($ValuesRef); | |
| 1096 $TopologicalTripletsFingerprints->SetFunctionalClassesToUse(@Values); | |
| 1097 | |
| 1098 Sets functional classes invariants to use during I<FunctionalClassAtomTypes> value of I<AtomIdentifierType> | |
| 1099 for topological atom triplets fingerprints generation and returns I<TopologicalAtomTripletsFingerprints>. | |
| 1100 | |
| 1101 Possible values for atom functional classes are: I<Ar, CA, H, HBA, HBD, Hal, NI, PI, RA>. | |
| 1102 Default value [ Ref 24 ]: I<HBD,HBA,PI,NI,Ar,Hal>. | |
| 1103 | |
| 1104 The functional class abbreviations correspond to: | |
| 1105 | |
| 1106 HBD: HydrogenBondDonor | |
| 1107 HBA: HydrogenBondAcceptor | |
| 1108 PI : PositivelyIonizable | |
| 1109 NI : NegativelyIonizable | |
| 1110 Ar : Aromatic | |
| 1111 Hal : Halogen | |
| 1112 H : Hydrophobic | |
| 1113 RA : RingAtom | |
| 1114 CA : ChainAtom | |
| 1115 | |
| 1116 Functional class atom type specification for an atom corresponds to: | |
| 1117 | |
| 1118 Ar.CA.H.HBA.HBD.Hal.NI.PI.RA or None | |
| 1119 | |
| 1120 I<AtomTypes::FunctionalClassAtomTypes> module is used to assign functional class atom | |
| 1121 types. It uses following definitions [ Ref 60-61, Ref 65-66 ]: | |
| 1122 | |
| 1123 HydrogenBondDonor: NH, NH2, OH | |
| 1124 HydrogenBondAcceptor: N[!H], O | |
| 1125 PositivelyIonizable: +, NH2 | |
| 1126 NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH | |
| 1127 | |
| 1128 =item B<SetMaxDistance> | |
| 1129 | |
| 1130 $TopologicalAtomTripletsFingerprints->SetMaxDistance($Distance); | |
| 1131 | |
| 1132 Sets maximum distance to use during topological atom triplets fingerprints generation and | |
| 1133 returns I<TopologicalAtomTripletsFingerprints>. | |
| 1134 | |
| 1135 =item B<SetMinDistance> | |
| 1136 | |
| 1137 $TopologicalAtomTripletsFingerprints->SetMinDistance($Distance); | |
| 1138 | |
| 1139 Sets minimum distance to use during topological atom triplets fingerprints generation and | |
| 1140 returns I<TopologicalAtomTripletsFingerprints>. | |
| 1141 | |
| 1142 =item B<StringifyTopologicalAtomTripletsFingerprints> | |
| 1143 | |
| 1144 $String = $TopologicalAtomTripletsFingerprints-> | |
| 1145 StringifyTopologicalAtomTripletsFingerprints(); | |
| 1146 | |
| 1147 Returns a string containing information about I<TopologicalAtomTripletsFingerprints> object. | |
| 1148 | |
| 1149 =back | |
| 1150 | |
| 1151 =head1 AUTHOR | |
| 1152 | |
| 1153 Manish Sud <msud@san.rr.com> | |
| 1154 | |
| 1155 =head1 SEE ALSO | |
| 1156 | |
| 1157 Fingerprints.pm, FingerprintsStringUtil.pm, AtomNeighborhoodsFingerprints.pm, | |
| 1158 AtomTypesFingerprints.pm, EStateIndiciesFingerprints.pm, ExtendedConnectivityFingerprints.pm, | |
| 1159 MACCSKeys.pm, PathLengthFingerprints.pm, TopologicalAtomPairsFingerprints.pm, | |
| 1160 TopologicalAtomTorsionsFingerprints.pm, TopologicalPharmacophoreAtomPairsFingerprints.pm, | |
| 1161 TopologicalPharmacophoreAtomTripletsFingerprints.pm | |
| 1162 | |
| 1163 =head1 COPYRIGHT | |
| 1164 | |
| 1165 Copyright (C) 2015 Manish Sud. All rights reserved. | |
| 1166 | |
| 1167 This file is part of MayaChemTools. | |
| 1168 | |
| 1169 MayaChemTools is free software; you can redistribute it and/or modify it under | |
| 1170 the terms of the GNU Lesser General Public License as published by the Free | |
| 1171 Software Foundation; either version 3 of the License, or (at your option) | |
| 1172 any later version. | |
| 1173 | |
| 1174 =cut |
