Mercurial > repos > deepakjadmin > mayatool3_test1
comparison lib/Fingerprints/PathLengthFingerprints.pm @ 1:2abf0d43254d draft
Uploaded
| author | deepakjadmin |
|---|---|
| date | Wed, 20 Jan 2016 09:10:43 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:1791cb0984a7 | 1:2abf0d43254d |
|---|---|
| 1 package Fingerprints::PathLengthFingerprints; | |
| 2 # | |
| 3 # $RCSfile: PathLengthFingerprints.pm,v $ | |
| 4 # $Date: 2015/02/28 20:48:54 $ | |
| 5 # $Revision: 1.39 $ | |
| 6 # | |
| 7 # Author: Manish Sud <msud@san.rr.com> | |
| 8 # | |
| 9 # Copyright (C) 2015 Manish Sud. All rights reserved. | |
| 10 # | |
| 11 # This file is part of MayaChemTools. | |
| 12 # | |
| 13 # MayaChemTools is free software; you can redistribute it and/or modify it under | |
| 14 # the terms of the GNU Lesser General Public License as published by the Free | |
| 15 # Software Foundation; either version 3 of the License, or (at your option) any | |
| 16 # later version. | |
| 17 # | |
| 18 # MayaChemTools is distributed in the hope that it will be useful, but without | |
| 19 # any warranty; without even the implied warranty of merchantability of fitness | |
| 20 # for a particular purpose. See the GNU Lesser General Public License for more | |
| 21 # details. | |
| 22 # | |
| 23 # You should have received a copy of the GNU Lesser General Public License | |
| 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or | |
| 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, | |
| 26 # Boston, MA, 02111-1307, USA. | |
| 27 # | |
| 28 | |
| 29 use strict; | |
| 30 use Carp; | |
| 31 use Exporter; | |
| 32 use TextUtil (); | |
| 33 use MathUtil (); | |
| 34 use Fingerprints::Fingerprints; | |
| 35 use Molecule; | |
| 36 use AtomTypes::AtomicInvariantsAtomTypes; | |
| 37 use AtomTypes::DREIDINGAtomTypes; | |
| 38 use AtomTypes::EStateAtomTypes; | |
| 39 use AtomTypes::FunctionalClassAtomTypes; | |
| 40 use AtomTypes::MMFF94AtomTypes; | |
| 41 use AtomTypes::SLogPAtomTypes; | |
| 42 use AtomTypes::SYBYLAtomTypes; | |
| 43 use AtomTypes::TPSAAtomTypes; | |
| 44 use AtomTypes::UFFAtomTypes; | |
| 45 | |
| 46 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); | |
| 47 | |
| 48 @ISA = qw(Fingerprints::Fingerprints Exporter); | |
| 49 @EXPORT = qw(); | |
| 50 @EXPORT_OK = qw(); | |
| 51 | |
| 52 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); | |
| 53 | |
| 54 # Setup class variables... | |
| 55 my($ClassName); | |
| 56 _InitializeClass(); | |
| 57 | |
| 58 # Overload Perl functions... | |
| 59 use overload '""' => 'StringifyPathLengthFingerprints'; | |
| 60 | |
| 61 # Class constructor... | |
| 62 sub new { | |
| 63 my($Class, %NamesAndValues) = @_; | |
| 64 | |
| 65 # Initialize object... | |
| 66 my $This = $Class->SUPER::new(); | |
| 67 bless $This, ref($Class) || $Class; | |
| 68 $This->_InitializePathLengthFingerprints(); | |
| 69 | |
| 70 $This->_InitializePathLengthFingerprintsProperties(%NamesAndValues); | |
| 71 | |
| 72 return $This; | |
| 73 } | |
| 74 | |
| 75 # Initialize object data... | |
| 76 # | |
| 77 sub _InitializePathLengthFingerprints { | |
| 78 my($This) = @_; | |
| 79 | |
| 80 # Type of fingerprint to generate... | |
| 81 # | |
| 82 # PathLengthBits - A bit vector indicating presence/absence of atom paths | |
| 83 # PathLengthCount - A vector containing count of atom paths | |
| 84 # | |
| 85 $This->{Type} = ''; | |
| 86 | |
| 87 # Type of vector: FingerprintsBitVector or FingerprintsVector | |
| 88 $This->{VectorType} = ''; | |
| 89 | |
| 90 # Set default mininum, maximum, and default size. Although any arbitrary size can | |
| 91 # be specified, bit vector used to store bits work on a vector size which is | |
| 92 # power of 2 and additonal bits are automatically added and cleared. | |
| 93 # | |
| 94 $This->{Size} = 1024; | |
| 95 | |
| 96 $This->{MinSize} = 32; | |
| 97 $This->{MaxSize} = 2**32; | |
| 98 | |
| 99 # Minimum and maximum path lengths to use for fingerprints generation... | |
| 100 $This->{MinLength} = 1; | |
| 101 $This->{MaxLength} = 8; | |
| 102 | |
| 103 # Numner of bits to set for each atom path for FingerprintsBitVector... | |
| 104 $This->{NumOfBitsToSetPerPath} = 1; | |
| 105 | |
| 106 # Atom identifier type to use for path atoms during fingerprints generation... | |
| 107 # | |
| 108 # Currently supported values are: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, | |
| 109 # EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, | |
| 110 # SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes | |
| 111 # | |
| 112 $This->{AtomIdentifierType} = ''; | |
| 113 | |
| 114 # Atom types assigned to atoms... | |
| 115 %{$This->{AssignedAtomTypes}} = (); | |
| 116 | |
| 117 # For molecules containing rings, atom paths starting from each atom can be traversed in four | |
| 118 # different ways: | |
| 119 # | |
| 120 # . Atom paths without any rings and sharing of bonds in traversed paths. | |
| 121 # . Atom paths containing rings and without any sharing of bonds in traversed paths | |
| 122 # . All possible atom paths without any rings and sharing of bonds in traversed paths | |
| 123 # . All possible atom paths containing rings and with sharing of bonds in traversed paths. | |
| 124 # | |
| 125 # Atom path traversal is terminated at the last ring atom. For molecules containing no rings, | |
| 126 # first two and last two types described above are equivalent. | |
| 127 # | |
| 128 # AllowSharedBonds and AllowRings variables allow generation of differen types of paths | |
| 129 # to be used for fingerprints generation. | |
| 130 # | |
| 131 # In addition to atom symbols, bond symbols are also used to generate a string | |
| 132 # for atom paths. These atom paths strings are hased to a 32 bit integer key which | |
| 133 # in turn is used as a seed for a random number generation in range of 1 to fingerprint | |
| 134 # size for setting corresponding bit in bit vector. | |
| 135 # | |
| 136 # UseBondSymbols variable allow generation of atom path strings and consequently fingerprints. | |
| 137 # | |
| 138 # Combination of AllowSharedBonds, AllowRings, and UseBondSymbols allow generation of | |
| 139 # 8 different types of path length fingerprints: | |
| 140 # | |
| 141 # AllowSharedBonds AllowRings UseBondSymbols PathLengthFingerprintsType | |
| 142 # | |
| 143 # No No Yes AtomPathsNoCyclesWithBondSymbols | |
| 144 # No Yes Yes AtomPathsWithCyclesWithBondSymbols | |
| 145 # | |
| 146 # Yes No Yes AllAtomPathsNoCyclesWithBondSymbols | |
| 147 # Yes Yes Yes AllAtomPathsWithCyclesWithBondSymbols [ DEFAULT ] | |
| 148 # | |
| 149 # No No No AtomPathsNoCyclesNoBondSymbols | |
| 150 # No Yes No AtomPathsWithCyclesNoBondSymbols | |
| 151 # | |
| 152 # Yes No No AllAtomPathsNoCyclesNoBondSymbols | |
| 153 # Yes Yes No AllAtomPathsWithCyclesNoWithBondSymbols | |
| 154 # | |
| 155 # | |
| 156 | |
| 157 # By default, atom paths starting from atoms are allowed to share bonds already traversed... | |
| 158 $This->{AllowSharedBonds} = 1; | |
| 159 | |
| 160 # By default rings are included in paths... | |
| 161 $This->{AllowRings} = 1; | |
| 162 | |
| 163 # By default bond symbols are included in atom path strings... | |
| 164 $This->{UseBondSymbols} = 1; | |
| 165 | |
| 166 # By default only structurally unique atom paths are used for generation | |
| 167 # atom path strings... | |
| 168 $This->{UseUniquePaths} = 1; | |
| 169 | |
| 170 # Random number generator to use during generation of fingerprints bit-vector | |
| 171 # string: Perl CORE::rand or MayaChemTools MathUtil::random function. | |
| 172 # | |
| 173 # The random number generator implemented in MayaChemTools is a variant of | |
| 174 # linear congruential generator (LCG) as described by Miller et al. [ Ref 120 ]. | |
| 175 # It is also referred to as Lehmer random number generator or Park-Miller | |
| 176 # random number generator. | |
| 177 # | |
| 178 # Unlike Perl's core random number generator function rand, the random number | |
| 179 # generator implemented in MayaChemTools, MathUtil::random, generates consistent | |
| 180 # random values across different platformsfor a specific random seed and leads | |
| 181 # to generation of portable fingerprints bit-vector strings. | |
| 182 # | |
| 183 $This->{UsePerlCoreRandom} = 1; | |
| 184 | |
| 185 # Bond symbols to use during generation of atom path strings... | |
| 186 %{$This->{BondOrderToSymbol}} = (); | |
| 187 %{$This->{BondOrderToSymbol}} = ('1' => '', '1.5' => ':', '2' => '=', '3' => '#'); | |
| 188 | |
| 189 # BondSymbols map to use for bonded atom IDs to use during atom path strings... | |
| 190 %{$This->{BondSymbols}} = (); | |
| 191 | |
| 192 # Path atom IDs to remove duplicate paths... | |
| 193 %{$This->{UniqueLinearAtomPathsIDs}} = (); | |
| 194 %{$This->{UniqueCyclicAtomPathsIDs}} = (); | |
| 195 | |
| 196 # Reference to all the atom paths upto specified path length... | |
| 197 $This->{AtomPathsRef} = ''; | |
| 198 | |
| 199 # Atom paths strings created using specified atom types and bond symbols... | |
| 200 %{$This->{AtomPathsStrings}} = (); | |
| 201 } | |
| 202 | |
| 203 # Initialize class ... | |
| 204 sub _InitializeClass { | |
| 205 #Class name... | |
| 206 $ClassName = __PACKAGE__; | |
| 207 } | |
| 208 | |
| 209 # Initialize object properties.... | |
| 210 sub _InitializePathLengthFingerprintsProperties { | |
| 211 my($This, %NamesAndValues) = @_; | |
| 212 | |
| 213 my($Name, $Value, $MethodName); | |
| 214 while (($Name, $Value) = each %NamesAndValues) { | |
| 215 $MethodName = "Set${Name}"; | |
| 216 $This->$MethodName($Value); | |
| 217 } | |
| 218 | |
| 219 # Make sure molecule object was specified... | |
| 220 if (!exists $NamesAndValues{Molecule}) { | |
| 221 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule..."; | |
| 222 } | |
| 223 | |
| 224 if (!exists $NamesAndValues{Type}) { | |
| 225 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying Type..."; | |
| 226 } | |
| 227 | |
| 228 if (!exists $NamesAndValues{AtomIdentifierType}) { | |
| 229 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying AtomIdentifierType..."; | |
| 230 } | |
| 231 | |
| 232 # Make sure it's power of 2... | |
| 233 if (exists $NamesAndValues{Size}) { | |
| 234 if (!TextUtil::IsNumberPowerOfNumber($NamesAndValues{Size}, 2)) { | |
| 235 croak "Error: ${ClassName}->New: Specified size value, $NamesAndValues{Size}, must be power of 2..."; | |
| 236 } | |
| 237 } | |
| 238 | |
| 239 if ($This->{Type} =~ /^PathLengthBits$/i) { | |
| 240 $This->_InitializePathLengthBits(); | |
| 241 } | |
| 242 elsif ($This->{Type} =~ /^PathLengthCount$/i) { | |
| 243 $This->_InitializePathLengthCount(); | |
| 244 } | |
| 245 else { | |
| 246 croak "Error: ${ClassName}->_InitializePathLengthFingerprintsProperties: Unknown PathLength type: $This->{Type}; Supported PathLength type : PathLengthBits or PathLengthCount......"; | |
| 247 } | |
| 248 | |
| 249 return $This; | |
| 250 } | |
| 251 | |
| 252 # Initialize PathLength bits... | |
| 253 # | |
| 254 sub _InitializePathLengthBits { | |
| 255 my($This) = @_; | |
| 256 | |
| 257 # Vector type... | |
| 258 $This->{VectorType} = 'FingerprintsBitVector'; | |
| 259 | |
| 260 $This->_InitializeFingerprintsBitVector(); | |
| 261 | |
| 262 return $This; | |
| 263 } | |
| 264 | |
| 265 # Initialize PathLength key count... | |
| 266 # | |
| 267 sub _InitializePathLengthCount { | |
| 268 my($This) = @_; | |
| 269 | |
| 270 # Vector type and type of values... | |
| 271 $This->{VectorType} = 'FingerprintsVector'; | |
| 272 $This->{FingerprintsVectorType} = 'NumericalValues'; | |
| 273 | |
| 274 $This->_InitializeFingerprintsVector(); | |
| 275 | |
| 276 return $This; | |
| 277 } | |
| 278 | |
| 279 # Set type... | |
| 280 # | |
| 281 sub SetType { | |
| 282 my($This, $Type) = @_; | |
| 283 | |
| 284 if ($This->{Type}) { | |
| 285 croak "Error: ${ClassName}->SetType: Can't change type: It's already set..."; | |
| 286 } | |
| 287 | |
| 288 if ($Type =~ /^PathLengthBits$/i) { | |
| 289 $This->{Type} = 'PathLengthBits';; | |
| 290 } | |
| 291 elsif ($Type =~ /^PathLengthCount$/i) { | |
| 292 $This->{Type} = 'PathLengthCount';; | |
| 293 } | |
| 294 else { | |
| 295 croak "Error: ${ClassName}->SetType: Unknown PathLength keys: $Type; Supported PathLength types: PathLengthBits or PathLengthCount..."; | |
| 296 } | |
| 297 return $This; | |
| 298 } | |
| 299 | |
| 300 # Disable vector type change... | |
| 301 # | |
| 302 sub SetVectorType { | |
| 303 my($This, $Type) = @_; | |
| 304 | |
| 305 croak "Error: ${ClassName}->SetVectorType: Can't change vector type..."; | |
| 306 | |
| 307 return $This; | |
| 308 } | |
| 309 | |
| 310 # Disable vector type change... | |
| 311 # | |
| 312 sub SetFingerprintsVectorType { | |
| 313 my($This, $Type) = @_; | |
| 314 | |
| 315 croak "Error: ${ClassName}->SetFingerprintsVectorType: Can't change fingerprints vector type..."; | |
| 316 | |
| 317 return $This; | |
| 318 } | |
| 319 | |
| 320 # Set atom identifier type to use for path length atom identifiers... | |
| 321 # | |
| 322 sub SetAtomIdentifierType { | |
| 323 my($This, $IdentifierType) = @_; | |
| 324 | |
| 325 if ($IdentifierType !~ /^(AtomicInvariantsAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|FunctionalClassAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { | |
| 326 croak "Error: ${ClassName}->SetAtomIdentifierType: Specified value, $IdentifierType, for AtomIdentifierType is not vaild. Supported types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, and UFFAtomTypes."; | |
| 327 } | |
| 328 | |
| 329 if ($This->{AtomIdentifierType}) { | |
| 330 croak "Error: ${ClassName}->SetAtomIdentifierType: Can't change atom identifier type: It's already set..."; | |
| 331 } | |
| 332 | |
| 333 $This->{AtomIdentifierType} = $IdentifierType; | |
| 334 | |
| 335 # Initialize atom identifier type information... | |
| 336 $This->_InitializeAtomIdentifierTypeInformation(); | |
| 337 | |
| 338 return $This; | |
| 339 } | |
| 340 | |
| 341 # Set minimum path length... | |
| 342 # | |
| 343 sub SetMinLength { | |
| 344 my($This, $Value) = @_; | |
| 345 | |
| 346 if (!TextUtil::IsPositiveInteger($Value)) { | |
| 347 croak "Error: ${ClassName}->SetMinLength: MinLength value, $Value, is not valid: It must be a positive integer..."; | |
| 348 } | |
| 349 $This->{MinLength} = $Value; | |
| 350 | |
| 351 return $This; | |
| 352 } | |
| 353 | |
| 354 # Set maximum path length... | |
| 355 # | |
| 356 sub SetMaxLength { | |
| 357 my($This, $Value) = @_; | |
| 358 | |
| 359 if (!TextUtil::IsPositiveInteger($Value)) { | |
| 360 croak "Error: ${ClassName}->SetMaxLength: MaxLength value, $Value, is not valid: It must be a positive integer..."; | |
| 361 } | |
| 362 $This->{MaxLength} = $Value; | |
| 363 | |
| 364 return $This; | |
| 365 } | |
| 366 | |
| 367 # Set number of bits to set for each path... | |
| 368 # | |
| 369 sub SetNumOfBitsToSetPerPath { | |
| 370 my($This, $Value) = @_; | |
| 371 | |
| 372 if (!TextUtil::IsPositiveInteger($Value)) { | |
| 373 croak "Error: ${ClassName}->SetNumOfBitsToSetPerPath: NumOfBitsToSetPerPath value, $Value, is not valid: It must be a positive integer..."; | |
| 374 } | |
| 375 $This->{NumOfBitsToSetPerPath} = $Value; | |
| 376 | |
| 377 return $This; | |
| 378 } | |
| 379 | |
| 380 # Generate fingerprints description... | |
| 381 # | |
| 382 sub GetDescription { | |
| 383 my($This) = @_; | |
| 384 | |
| 385 # Is description explicity set? | |
| 386 if (exists $This->{Description}) { | |
| 387 return $This->{Description}; | |
| 388 } | |
| 389 | |
| 390 # Generate fingerprints description... | |
| 391 | |
| 392 return "$This->{Type}:$This->{AtomIdentifierType}:MinLength$This->{MinLength}:MaxLength$This->{MaxLength}"; | |
| 393 } | |
| 394 | |
| 395 # Generate path length fingerprints... | |
| 396 # | |
| 397 sub GenerateFingerprints { | |
| 398 my($This) = @_; | |
| 399 | |
| 400 if ($This->{MinLength} > $This->{MaxLength}) { | |
| 401 croak "Error: ${ClassName}->GenerateFingerprints: No fingerpritns generated: MinLength, $This->{MinLength}, must be <= MaxLength, $This->{MaxLength}..."; | |
| 402 } | |
| 403 | |
| 404 # Cache appropriate molecule data... | |
| 405 $This->_SetupMoleculeDataCache(); | |
| 406 | |
| 407 # Assign atom types to all atoms... | |
| 408 if (!$This->_AssignAtomTypes()) { | |
| 409 carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't assign valid $This->{AtomIdentifierType} to all atoms..."; | |
| 410 return $This; | |
| 411 } | |
| 412 | |
| 413 # Setup bond symbol map... | |
| 414 if ($This->{UseBondSymbols}) { | |
| 415 $This->_InitializeBondSymbols(); | |
| 416 } | |
| 417 | |
| 418 # Generate appropriate atom paths... | |
| 419 $This->_GenerateAtomPathsUpToMaxLength(); | |
| 420 | |
| 421 # Initialize atom path strings... | |
| 422 $This->_InitializeAtomPathsStrings(); | |
| 423 | |
| 424 # Generate appropriate atom path strings for unique atom paths... | |
| 425 $This->_GenerateAtomPathsStrings(); | |
| 426 | |
| 427 # Set final fingerprints... | |
| 428 $This->_SetFinalFingerprints(); | |
| 429 | |
| 430 # Clear cached molecule data... | |
| 431 $This->_ClearMoleculeDataCache(); | |
| 432 | |
| 433 return $This; | |
| 434 } | |
| 435 | |
| 436 # Assign appropriate atom types to all atoms... | |
| 437 # | |
| 438 sub _AssignAtomTypes { | |
| 439 my($This) = @_; | |
| 440 my($SpecifiedAtomTypes, $Atom, $AtomID, $IgnoreHydrogens); | |
| 441 | |
| 442 %{$This->{AssignedAtomTypes}} = (); | |
| 443 $IgnoreHydrogens = 0; | |
| 444 | |
| 445 $SpecifiedAtomTypes = undef; | |
| 446 | |
| 447 IDENTIFIERTYPE: { | |
| 448 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { | |
| 449 $SpecifiedAtomTypes = new AtomTypes::AtomicInvariantsAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'AtomicInvariantsToUse' => $This->{AtomicInvariantsToUse}); | |
| 450 last IDENTIFIERTYPE; | |
| 451 } | |
| 452 | |
| 453 if ($This->{AtomIdentifierType} =~ /^DREIDINGAtomTypes$/i) { | |
| 454 $SpecifiedAtomTypes = new AtomTypes::DREIDINGAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
| 455 last IDENTIFIERTYPE; | |
| 456 } | |
| 457 | |
| 458 if ($This->{AtomIdentifierType} =~ /^EStateAtomTypes$/i) { | |
| 459 $SpecifiedAtomTypes = new AtomTypes::EStateAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
| 460 last IDENTIFIERTYPE; | |
| 461 } | |
| 462 | |
| 463 if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { | |
| 464 $SpecifiedAtomTypes = new AtomTypes::FunctionalClassAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'FunctionalClassesToUse' => $This->{FunctionalClassesToUse}); | |
| 465 last IDENTIFIERTYPE; | |
| 466 } | |
| 467 | |
| 468 if ($This->{AtomIdentifierType} =~ /^MMFF94AtomTypes$/i) { | |
| 469 $SpecifiedAtomTypes = new AtomTypes::MMFF94AtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
| 470 last IDENTIFIERTYPE; | |
| 471 } | |
| 472 | |
| 473 if ($This->{AtomIdentifierType} =~ /^SLogPAtomTypes$/i) { | |
| 474 $SpecifiedAtomTypes = new AtomTypes::SLogPAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
| 475 last IDENTIFIERTYPE; | |
| 476 } | |
| 477 if ($This->{AtomIdentifierType} =~ /^SYBYLAtomTypes$/i) { | |
| 478 $SpecifiedAtomTypes = new AtomTypes::SYBYLAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
| 479 last IDENTIFIERTYPE; | |
| 480 } | |
| 481 | |
| 482 if ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) { | |
| 483 $SpecifiedAtomTypes = new AtomTypes::TPSAAtomTypes('Molecule' => $This->{Molecule}, 'IgnorePhosphorus' => 0, 'IgnoreSulfur' => 0); | |
| 484 last IDENTIFIERTYPE; | |
| 485 } | |
| 486 | |
| 487 if ($This->{AtomIdentifierType} =~ /^UFFAtomTypes$/i) { | |
| 488 $SpecifiedAtomTypes = new AtomTypes::UFFAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
| 489 last IDENTIFIERTYPE; | |
| 490 } | |
| 491 | |
| 492 croak "Error: ${ClassName}->_AssignAtomTypes: Unknown atom indentifier type $This->{AtomIdentifierType}..."; | |
| 493 } | |
| 494 | |
| 495 # Assign atom types... | |
| 496 $SpecifiedAtomTypes->AssignAtomTypes(); | |
| 497 | |
| 498 # Make sure atom types assignment is successful... | |
| 499 if (!$SpecifiedAtomTypes->IsAtomTypesAssignmentSuccessful()) { | |
| 500 return undef; | |
| 501 } | |
| 502 | |
| 503 # Collect assigned atom types... | |
| 504 ATOM: for $Atom (@{$This->{Atoms}}) { | |
| 505 $AtomID = $Atom->GetID(); | |
| 506 $This->{AssignedAtomTypes}{$AtomID} = $SpecifiedAtomTypes->GetAtomType($Atom); | |
| 507 } | |
| 508 | |
| 509 return $This; | |
| 510 } | |
| 511 | |
| 512 # Setup bond symbol map for atoms to speed up generation of path length identifiers | |
| 513 # during fingerprints generation... | |
| 514 # | |
| 515 sub _InitializeBondSymbols { | |
| 516 my($This) = @_; | |
| 517 my($Atom1, $Atom2, $AtomID1, $AtomID2, $Bond, $BondSymbol, $BondOrder); | |
| 518 | |
| 519 %{$This->{BondSymbols}} = (); | |
| 520 | |
| 521 if (!$This->{UseBondSymbols}) { | |
| 522 return $This; | |
| 523 } | |
| 524 | |
| 525 for $Bond ($This->{Molecule}->GetBonds()) { | |
| 526 $BondOrder = $Bond->GetBondOrder(); | |
| 527 $BondSymbol = $Bond->IsAromatic() ? ':' : (exists($This->{BondOrderToSymbol}{$BondOrder}) ? $This->{BondOrderToSymbol}{$BondOrder} : $BondOrder); | |
| 528 ($Atom1, $Atom2) = $Bond->GetAtoms(); | |
| 529 $AtomID1 = $Atom1->GetID(); $AtomID2 = $Atom2->GetID(); | |
| 530 if ($AtomID1 > $AtomID2) { | |
| 531 ($AtomID1, $AtomID2) = ($AtomID2, $AtomID1); | |
| 532 } | |
| 533 | |
| 534 if (!exists $This->{BondSymbols}{$AtomID1}) { | |
| 535 %{$This->{BondSymbols}{$AtomID1}} = (); | |
| 536 } | |
| 537 $This->{BondSymbols}{$AtomID1}{$AtomID2} = $BondSymbol; | |
| 538 } | |
| 539 return $This; | |
| 540 } | |
| 541 | |
| 542 # Get appropriate atom paths with length up to MaxLength... | |
| 543 # | |
| 544 sub _GenerateAtomPathsUpToMaxLength { | |
| 545 my($This) = @_; | |
| 546 my($PathLength, $AllowRings, $Molecule, $AtomPathsRef); | |
| 547 | |
| 548 $PathLength = $This->{MaxLength}; | |
| 549 $AllowRings = $This->{AllowRings}; | |
| 550 $Molecule = $This->{Molecule}; | |
| 551 | |
| 552 if ($This->{AllowSharedBonds}) { | |
| 553 $AtomPathsRef = $Molecule->GetAllAtomPathsWithLengthUpto($PathLength, $AllowRings); | |
| 554 } | |
| 555 else { | |
| 556 $AtomPathsRef = $Molecule->GetAtomPathsWithLengthUpto($PathLength, $AllowRings); | |
| 557 } | |
| 558 $This->{AtomPathsRef} = $AtomPathsRef; | |
| 559 | |
| 560 return $This; | |
| 561 } | |
| 562 | |
| 563 # Initialize atom paths strings at various pathlength levels... | |
| 564 # | |
| 565 sub _InitializeAtomPathsStrings { | |
| 566 my($This) = @_; | |
| 567 my($PathLength); | |
| 568 | |
| 569 %{$This->{AtomPathsStrings}} = (); | |
| 570 | |
| 571 for $PathLength ($This->{MinLength} .. $This->{MaxLength}) { | |
| 572 %{$This->{AtomPathsStrings}{$PathLength}} = (); | |
| 573 } | |
| 574 | |
| 575 return $This; | |
| 576 } | |
| 577 | |
| 578 # Generate appropriate atom path strings for unique atom paths... | |
| 579 # | |
| 580 sub _GenerateAtomPathsStrings { | |
| 581 my($This, $PathAtomsRef) = @_; | |
| 582 my($PathLength, $MinPathLength, $UseUniquePaths); | |
| 583 | |
| 584 $MinPathLength = $This->{MinLength}; | |
| 585 $UseUniquePaths = $This->{UseUniquePaths}; | |
| 586 | |
| 587 PATHATOMS: for $PathAtomsRef (@{$This->{AtomPathsRef}}) { | |
| 588 $PathLength = scalar @{$PathAtomsRef}; | |
| 589 if ($PathLength < $MinPathLength) { | |
| 590 next PATHATOMS; | |
| 591 } | |
| 592 if ($UseUniquePaths) { | |
| 593 $This->_GenerateAtomPathStringUsingUniquePath($PathAtomsRef); | |
| 594 } | |
| 595 else { | |
| 596 $This->_GenerateAtomPathString($PathAtomsRef); | |
| 597 } | |
| 598 } | |
| 599 return $This; | |
| 600 } | |
| 601 | |
| 602 # Generate atom path string using unique path... | |
| 603 # | |
| 604 sub _GenerateAtomPathStringUsingUniquePath { | |
| 605 my($This, $PathAtomsRef) = @_; | |
| 606 | |
| 607 if ($This->{AllowRings} && $This->_DoesAtomPathContainsCycle($PathAtomsRef)) { | |
| 608 $This->_GenerateAtomPathStringUsingUniquePathContainingCycle($PathAtomsRef); | |
| 609 } | |
| 610 else { | |
| 611 $This->_GenerateAtomPathStringUsingUniqueLinearPath($PathAtomsRef); | |
| 612 } | |
| 613 return $This; | |
| 614 } | |
| 615 | |
| 616 # Generate atom path string for specified path containing no cycle... | |
| 617 # | |
| 618 sub _GenerateAtomPathStringUsingUniqueLinearPath { | |
| 619 my($This, $PathAtomsRef) = @_; | |
| 620 | |
| 621 # Is it a unique linear atom path? | |
| 622 # | |
| 623 if (!$This->_IsUniqueLinearAtomPath($PathAtomsRef)) { | |
| 624 return $This; | |
| 625 } | |
| 626 $This->_GenerateAtomPathString($PathAtomsRef); | |
| 627 | |
| 628 return $This; | |
| 629 } | |
| 630 | |
| 631 # Is it a structurally unique linear path? | |
| 632 # | |
| 633 # For a path to be structurally unique, all of its atom IDs must be diffferent from any | |
| 634 # earlier path atom IDs. In order to generate atom path atom ID invariant of the atom | |
| 635 # order in the molecule, atom IDs are sorted numerically before generating the path ID. | |
| 636 # | |
| 637 # Notes: | |
| 638 # . Atom path ID doesn't reflect the order of atoms in the atom path. | |
| 639 # | |
| 640 sub _IsUniqueLinearAtomPath { | |
| 641 my($This, $PathAtomsRef) = @_; | |
| 642 my($AtomPathID, $PathLength, @PathAtomIDs); | |
| 643 | |
| 644 @PathAtomIDs = (); | |
| 645 @PathAtomIDs = map { $_->GetID(); } @{$PathAtomsRef}; | |
| 646 | |
| 647 $AtomPathID = join '-', sort { $a <=> $b } @PathAtomIDs; | |
| 648 if (exists $This->{UniqueLinearAtomPathsIDs}{$AtomPathID}) { | |
| 649 return 0; | |
| 650 } | |
| 651 | |
| 652 # It's a unique atom path... | |
| 653 $This->{UniqueLinearAtomPathsIDs}{$AtomPathID} = 1; | |
| 654 | |
| 655 return 1; | |
| 656 } | |
| 657 | |
| 658 # Generate atom path string for specified path containing a cycle... | |
| 659 # | |
| 660 sub _GenerateAtomPathStringUsingUniquePathContainingCycle { | |
| 661 my($This, $PathAtomsRef) = @_; | |
| 662 | |
| 663 # Is it a unique atom path containing a cycle? | |
| 664 # | |
| 665 if (!$This->_IsUniqueAtomPathContainingCycle($PathAtomsRef)) { | |
| 666 return $This; | |
| 667 } | |
| 668 | |
| 669 my($CycleClosingPathAtomIndex); | |
| 670 ($CycleClosingPathAtomIndex) = $This->_GetAtomPathCycleClosingAtomIndex($PathAtomsRef); | |
| 671 | |
| 672 if ($CycleClosingPathAtomIndex == 0) { | |
| 673 $This->_GenerateUniqueAtomPathStringForPathCycle($PathAtomsRef); | |
| 674 } | |
| 675 else { | |
| 676 $This->_GenerateUniqueAtomPathStringForPathContainingCycle($PathAtomsRef, $CycleClosingPathAtomIndex); | |
| 677 } | |
| 678 return $This; | |
| 679 } | |
| 680 | |
| 681 # Generate a unique atom path string for a cyclic path by generating atom path | |
| 682 # strings for all possible paths in the cycle and keeping the lexicographically smallest | |
| 683 # one. | |
| 684 # | |
| 685 # Although all the paths enumerated during atom path string generation are also | |
| 686 # present in the intial paths list, but structural uniqueness check would detect | |
| 687 # 'em earlier and this method ends being invoked only once for the first cyclic path. | |
| 688 # | |
| 689 # For atom paths containg same atom types and bond symbols, atom path strings | |
| 690 # would be same for the paths. | |
| 691 # | |
| 692 sub _GenerateUniqueAtomPathStringForPathCycle { | |
| 693 my($This, $PathAtomsRef) = @_; | |
| 694 | |
| 695 if ($This->_AreAllPathAtomsSymbolsSame($PathAtomsRef) && $This->_AreAllPathBondSymbolsSame($PathAtomsRef)) { | |
| 696 return $This->_GenerateAtomPathString($PathAtomsRef); | |
| 697 } | |
| 698 | |
| 699 # Generate all possible atom path strings and select the lexicographically smallest one... | |
| 700 my($Index, $PathLength, $FinalAtomPathString, $FirstAtomPathString, $LastIndex, $FirstPartIndex, $FirstPartStartIndex, $FirstPartEndIndex, $SecondPartIndex, $SecondPartStartIndex, $SecondPartEndIndex, $AtomPathSymbolsRef, $AtomPathString, $ReverseAtomPathString, @FirstPartPathAtoms, @SecondPartPathAtoms, @PathAtoms); | |
| 701 | |
| 702 $PathLength = scalar @{$PathAtomsRef}; | |
| 703 $LastIndex = $PathLength - 1; | |
| 704 | |
| 705 $FinalAtomPathString = ''; | |
| 706 $FirstAtomPathString = 1; | |
| 707 | |
| 708 @FirstPartPathAtoms = (); @SecondPartPathAtoms = (); @PathAtoms = (); | |
| 709 | |
| 710 for $Index (0 .. ($LastIndex - 1)) { | |
| 711 @FirstPartPathAtoms = (); @SecondPartPathAtoms = (); @PathAtoms = (); | |
| 712 | |
| 713 $FirstPartStartIndex = 0; $FirstPartEndIndex = $Index - 1; | |
| 714 $SecondPartStartIndex = $Index; $SecondPartEndIndex = $LastIndex - 1; | |
| 715 | |
| 716 # Get first part atoms... | |
| 717 for $FirstPartIndex ($FirstPartStartIndex .. $FirstPartEndIndex) { | |
| 718 push @FirstPartPathAtoms, $PathAtomsRef->[$FirstPartIndex]; | |
| 719 } | |
| 720 | |
| 721 # Get second part atoms... | |
| 722 for $SecondPartIndex ($SecondPartStartIndex .. $SecondPartEndIndex) { | |
| 723 push @SecondPartPathAtoms, $PathAtomsRef->[$SecondPartIndex]; | |
| 724 } | |
| 725 | |
| 726 # Get final list of path atoms... | |
| 727 if (@SecondPartPathAtoms) { | |
| 728 push @PathAtoms, @SecondPartPathAtoms; | |
| 729 } | |
| 730 if (@FirstPartPathAtoms) { | |
| 731 push @PathAtoms, @FirstPartPathAtoms; | |
| 732 } | |
| 733 | |
| 734 # Complete the cycle by adding first atom as the last atom... | |
| 735 push @PathAtoms, $PathAtomsRef->[$SecondPartStartIndex]; | |
| 736 | |
| 737 # Generate atom path string... | |
| 738 $AtomPathSymbolsRef = $This->_GenerateAtomPathSymbols(\@PathAtoms); | |
| 739 | |
| 740 $AtomPathString = join '', @{$AtomPathSymbolsRef}; | |
| 741 $ReverseAtomPathString = join '', reverse @{$AtomPathSymbolsRef}; | |
| 742 | |
| 743 if ($ReverseAtomPathString le $AtomPathString) { | |
| 744 $AtomPathString = $ReverseAtomPathString; | |
| 745 } | |
| 746 | |
| 747 # Update final atom path string... | |
| 748 | |
| 749 if ($FirstAtomPathString) { | |
| 750 $FirstAtomPathString = 0; | |
| 751 $FinalAtomPathString = $AtomPathString; | |
| 752 } | |
| 753 else { | |
| 754 if ($AtomPathString le $FinalAtomPathString) { | |
| 755 $FinalAtomPathString = $AtomPathString; | |
| 756 } | |
| 757 } | |
| 758 } | |
| 759 | |
| 760 # Set final atom path string... | |
| 761 # | |
| 762 if (exists $This->{AtomPathsStrings}{$PathLength}{$FinalAtomPathString}) { | |
| 763 $This->{AtomPathsStrings}{$PathLength}{$FinalAtomPathString} += 1; | |
| 764 } | |
| 765 else { | |
| 766 $This->{AtomPathsStrings}{$PathLength}{$FinalAtomPathString} = 1; | |
| 767 } | |
| 768 | |
| 769 return $This; | |
| 770 } | |
| 771 | |
| 772 # | |
| 773 # Generate a unique atom path string for paths containing a cycle closed by | |
| 774 # the specified atom index and the last atom index. | |
| 775 # | |
| 776 # The following methodology is used to generate atom path string which is | |
| 777 # independemt of initial atom ordering: | |
| 778 # . Generate atom paths string from first atom to the atom before the first cycle | |
| 779 # closing atom. | |
| 780 # . Generate atom path string from atoms from first cycle closing atom index to | |
| 781 # the last path atom in both forward and reverse order. And select the lexicographically | |
| 782 # smallest atom path string. | |
| 783 # . Combine atom path string generated in first step with second step to generate | |
| 784 # final atom path string. | |
| 785 # | |
| 786 sub _GenerateUniqueAtomPathStringForPathContainingCycle { | |
| 787 my($This, $PathAtomsRef, $CycleClosingAtomIndex) = @_; | |
| 788 my($Index, $PathLength, $LastIndex, $LinearPartStartIndex, $LinearPartEndIndex, $CyclicPartStartIndex, $CyclicPartEndIndex, $CyclicPartAtomPathSymbolsRef, $CyclicPartAtomPathString, $ReverseCyclicPartAtomPathString, $AtomPathString, $AtomPathSymbolsRef, @CyclicPartPathAtoms, @PathAtoms); | |
| 789 | |
| 790 $PathLength = scalar @{$PathAtomsRef}; | |
| 791 $LastIndex = $PathLength - 1; | |
| 792 | |
| 793 @PathAtoms = (); | |
| 794 | |
| 795 # Get path atoms corresponding to linear part of the path... | |
| 796 $LinearPartStartIndex = 0; $LinearPartEndIndex = $CycleClosingAtomIndex - 1; | |
| 797 | |
| 798 for $Index ($LinearPartStartIndex .. $LinearPartEndIndex) { | |
| 799 push @PathAtoms, $PathAtomsRef->[$Index]; | |
| 800 } | |
| 801 | |
| 802 # Get atoms correcponding to cyclic part of the path... | |
| 803 @CyclicPartPathAtoms = (); | |
| 804 $CyclicPartStartIndex = $CycleClosingAtomIndex; $CyclicPartEndIndex = $LastIndex; | |
| 805 | |
| 806 for $Index ($CyclicPartStartIndex .. $CyclicPartEndIndex) { | |
| 807 push @CyclicPartPathAtoms, $PathAtomsRef->[$Index]; | |
| 808 } | |
| 809 | |
| 810 # Setup a lexicographically smaller atom path string for cyclic part... | |
| 811 | |
| 812 $CyclicPartAtomPathSymbolsRef = $This->_GenerateAtomPathSymbols(\@CyclicPartPathAtoms); | |
| 813 $CyclicPartAtomPathString = join '', @{$CyclicPartAtomPathSymbolsRef}; | |
| 814 $ReverseCyclicPartAtomPathString = join '', reverse @{$CyclicPartAtomPathSymbolsRef}; | |
| 815 | |
| 816 # Setup atom path corresponding to linear part and lexigraphicall smaller cyclic part... | |
| 817 | |
| 818 if ($ReverseCyclicPartAtomPathString le $CyclicPartAtomPathString) { | |
| 819 push @PathAtoms, reverse @CyclicPartPathAtoms; | |
| 820 } | |
| 821 else { | |
| 822 push @PathAtoms, @CyclicPartPathAtoms; | |
| 823 } | |
| 824 | |
| 825 # Setup final atom path string... | |
| 826 | |
| 827 $AtomPathSymbolsRef = $This->_GenerateAtomPathSymbols(\@PathAtoms); | |
| 828 $AtomPathString = join '', @{$AtomPathSymbolsRef}; | |
| 829 | |
| 830 if (exists $This->{AtomPathsStrings}{$PathLength}{$AtomPathString}) { | |
| 831 $This->{AtomPathsStrings}{$PathLength}{$AtomPathString} += 1; | |
| 832 } | |
| 833 else { | |
| 834 $This->{AtomPathsStrings}{$PathLength}{$AtomPathString} = 1; | |
| 835 } | |
| 836 | |
| 837 return $This; | |
| 838 } | |
| 839 | |
| 840 # Does atom path contain a cycle? | |
| 841 # | |
| 842 # For an atom path to contain cycle, it must satisfy the following conditions: | |
| 843 # . Pathlength >= 3 | |
| 844 # . Last atom ID is equal to first atom ID or some other atom ID besides itself | |
| 845 # | |
| 846 sub _DoesAtomPathContainsCycle { | |
| 847 my($This, $PathAtomsRef) = @_; | |
| 848 my($PathLength); | |
| 849 | |
| 850 $PathLength = scalar @{$PathAtomsRef}; | |
| 851 if ($PathLength <= 2) { | |
| 852 return 0; | |
| 853 } | |
| 854 | |
| 855 my($AtomIndex, $LastAtomIndex, $Atom, $AtomID, $LastAtom, $LastAtomID); | |
| 856 | |
| 857 $LastAtomIndex = $PathLength - 1; | |
| 858 $LastAtom = $PathAtomsRef->[$LastAtomIndex]; | |
| 859 $LastAtomID = $LastAtom->GetID(); | |
| 860 | |
| 861 # Look for atomID similar to last atom ID... | |
| 862 for $AtomIndex (0 .. ($LastAtomIndex - 1)) { | |
| 863 $Atom = $PathAtomsRef->[$AtomIndex]; | |
| 864 $AtomID = $Atom->GetID(); | |
| 865 | |
| 866 if ($AtomID == $LastAtomID) { | |
| 867 # It's a cycle... | |
| 868 return 1; | |
| 869 } | |
| 870 } | |
| 871 return 0; | |
| 872 } | |
| 873 | |
| 874 # Get atom path cycle closing atom index... | |
| 875 # | |
| 876 sub _GetAtomPathCycleClosingAtomIndex { | |
| 877 my($This, $PathAtomsRef) = @_; | |
| 878 my($AtomIndex, $LastAtomIndex, $Atom, $AtomID, $LastAtom, $LastAtomID, $PathLength); | |
| 879 | |
| 880 $PathLength = scalar @{$PathAtomsRef}; | |
| 881 | |
| 882 $LastAtomIndex = $PathLength - 1; | |
| 883 $LastAtom = $PathAtomsRef->[$LastAtomIndex]; $LastAtomID = $LastAtom->GetID(); | |
| 884 | |
| 885 # Look for atomID similar to last atom ID... | |
| 886 for $AtomIndex (0 .. ($LastAtomIndex - 1)) { | |
| 887 $Atom = $PathAtomsRef->[$AtomIndex]; $AtomID = $Atom->GetID(); | |
| 888 | |
| 889 if ($AtomID == $LastAtomID) { | |
| 890 # It's a cycle closing atom... | |
| 891 return $AtomIndex; | |
| 892 } | |
| 893 } | |
| 894 return undef; | |
| 895 } | |
| 896 | |
| 897 # Is it a structurally unique path containing a cycle? | |
| 898 # | |
| 899 # For atom paths containing cycles, last atom ID is either equal to first atom ID or | |
| 900 # some other atom ID besides itself. | |
| 901 # | |
| 902 # In order to determine its structurally unqiue independent of initial atom ordering, | |
| 903 # the following methodolgy is used: | |
| 904 # | |
| 905 # . For paths with same first and atom IDs: | |
| 906 # . Remove the last atom ID from atom path | |
| 907 # . Sort atom IDs in the path | |
| 908 # . Add first atom ID from the sorted list to the end of list to complete the cycle | |
| 909 # . Generate a atom path ID | |
| 910 # . Use final path ID to track uniqueness of path containing cycle. | |
| 911 # | |
| 912 # . For paths with last atom ID equal to some other atom ID besidies itself: | |
| 913 # . Sort atom IDs in atom path | |
| 914 # . Generate atom path ID and use it to track unqiueness of atom paths. | |
| 915 # | |
| 916 sub _IsUniqueAtomPathContainingCycle { | |
| 917 my($This, $PathAtomsRef) = @_; | |
| 918 my($PathLength, $AtomPathID, $FirstAtom, $LastAtom, $FirstAtomID, $LastAtomID, @PathAtomIDs, @SortedPathAtomIDs); | |
| 919 | |
| 920 @PathAtomIDs = (); | |
| 921 @PathAtomIDs = map { $_->GetID(); } @{$PathAtomsRef}; | |
| 922 | |
| 923 $PathLength = scalar @{$PathAtomsRef}; | |
| 924 | |
| 925 $FirstAtom = $PathAtomsRef->[0]; $FirstAtomID = $FirstAtom->GetID(); | |
| 926 $LastAtom = $PathAtomsRef->[$PathLength - 1]; $LastAtomID = $LastAtom->GetID(); | |
| 927 | |
| 928 if ($FirstAtomID == $LastAtomID) { | |
| 929 pop @PathAtomIDs; | |
| 930 | |
| 931 @SortedPathAtomIDs = (); | |
| 932 @SortedPathAtomIDs = sort { $a <=> $b } @PathAtomIDs; | |
| 933 | |
| 934 push @SortedPathAtomIDs, $SortedPathAtomIDs[0]; | |
| 935 | |
| 936 $AtomPathID = join '-', @SortedPathAtomIDs; | |
| 937 } | |
| 938 else { | |
| 939 $AtomPathID = join '-', sort { $a <=> $b } @PathAtomIDs; | |
| 940 } | |
| 941 | |
| 942 if (exists $This->{UniqueCyclicAtomPathsIDs}{$AtomPathID}) { | |
| 943 return 0; | |
| 944 } | |
| 945 | |
| 946 # It's a unique atom path containing a cycle... | |
| 947 $This->{UniqueCyclicAtomPathsIDs}{$AtomPathID} = 1; | |
| 948 | |
| 949 return 1; | |
| 950 } | |
| 951 | |
| 952 # Generate atom path string for specified atom path... | |
| 953 # | |
| 954 sub _GenerateAtomPathString { | |
| 955 my($This, $PathAtomsRef) = @_; | |
| 956 my($PathLength, $AtomPathString, $ReverseAtomPathString, $AtomPathSymbolsRef); | |
| 957 | |
| 958 $PathLength = scalar @{$PathAtomsRef}; | |
| 959 | |
| 960 # Generate path atom and bond symbols... | |
| 961 # | |
| 962 $AtomPathSymbolsRef = $This->_GenerateAtomPathSymbols($PathAtomsRef); | |
| 963 | |
| 964 # Check presence of path using path ID created by atom path symbols... | |
| 965 $AtomPathString = join '', @{$AtomPathSymbolsRef}; | |
| 966 if (exists $This->{AtomPathsStrings}{$PathLength}{$AtomPathString}) { | |
| 967 $This->{AtomPathsStrings}{$PathLength}{$AtomPathString} += 1; | |
| 968 return $This; | |
| 969 } | |
| 970 | |
| 971 # Check presence of reverse path using path ID created by atom path symbols... | |
| 972 # | |
| 973 $ReverseAtomPathString = join '', reverse @{$AtomPathSymbolsRef}; | |
| 974 if (exists $This->{AtomPathsStrings}{$PathLength}{$ReverseAtomPathString}) { | |
| 975 $This->{AtomPathsStrings}{$PathLength}{$ReverseAtomPathString} += 1; | |
| 976 return $This; | |
| 977 } | |
| 978 | |
| 979 # Use lexicographically smaller atom path string as PathID... | |
| 980 # | |
| 981 if ($AtomPathString le $ReverseAtomPathString) { | |
| 982 $This->{AtomPathsStrings}{$PathLength}{$AtomPathString} = 1; | |
| 983 } | |
| 984 else { | |
| 985 $This->{AtomPathsStrings}{$PathLength}{$ReverseAtomPathString} = 1; | |
| 986 } | |
| 987 return $This; | |
| 988 } | |
| 989 | |
| 990 # Are atom types for all path atoms same? | |
| 991 # | |
| 992 sub _AreAllPathAtomsSymbolsSame { | |
| 993 my($This, $PathAtomsRef) = @_; | |
| 994 my($Index, $Atom, $AtomID, $AtomType, $FirstAtomType); | |
| 995 | |
| 996 $Atom = $PathAtomsRef->[0]; $AtomID = $Atom->GetID(); | |
| 997 $FirstAtomType = $This->{AssignedAtomTypes}{$AtomID}; | |
| 998 | |
| 999 for $Index (1 .. $#{$PathAtomsRef}) { | |
| 1000 $Atom = $PathAtomsRef->[$Index]; $AtomID = $Atom->GetID(); | |
| 1001 $AtomType = $This->{AssignedAtomTypes}{$AtomID}; | |
| 1002 | |
| 1003 if ($AtomType ne $FirstAtomType) { | |
| 1004 return 0; | |
| 1005 } | |
| 1006 } | |
| 1007 return 1; | |
| 1008 } | |
| 1009 | |
| 1010 # Are bond symbols for all path bonds same? | |
| 1011 # | |
| 1012 sub _AreAllPathBondSymbolsSame { | |
| 1013 my($This, $PathAtomsRef) = @_; | |
| 1014 my($Index, $Atom, $BondedAtom, $AtomID, $BondedAtomID, $BondAtomID1, $BondAtomID2, $FirstBondSymbol, $BondSymbol); | |
| 1015 | |
| 1016 # During no usage of bond symbols, just ignore them and assume they are same... | |
| 1017 if (!$This->{UseBondSymbols}) { | |
| 1018 return 1; | |
| 1019 } | |
| 1020 | |
| 1021 $Atom = $PathAtomsRef->[0]; $BondedAtom = $PathAtomsRef->[1]; | |
| 1022 $AtomID = $Atom->GetID(); $BondedAtomID = $BondedAtom->GetID(); | |
| 1023 | |
| 1024 ($BondAtomID1, $BondAtomID2) = ($AtomID < $BondedAtomID) ? ($AtomID, $BondedAtomID) : ($BondedAtomID, $AtomID); | |
| 1025 $FirstBondSymbol = $This->{BondSymbols}{$BondAtomID1}{$BondAtomID2}; | |
| 1026 | |
| 1027 for $Index (1 .. ($#{$PathAtomsRef} - 1)) { | |
| 1028 $Atom = $PathAtomsRef->[$Index]; $BondedAtom = $PathAtomsRef->[$Index + 1]; | |
| 1029 $AtomID = $Atom->GetID(); $BondedAtomID = $BondedAtom->GetID(); | |
| 1030 | |
| 1031 ($BondAtomID1, $BondAtomID2) = ($AtomID < $BondedAtomID) ? ($AtomID, $BondedAtomID) : ($BondedAtomID, $AtomID); | |
| 1032 $BondSymbol = $This->{BondSymbols}{$BondAtomID1}{$BondAtomID2}; | |
| 1033 | |
| 1034 if ($BondSymbol ne $FirstBondSymbol) { | |
| 1035 return 0; | |
| 1036 } | |
| 1037 } | |
| 1038 return 1; | |
| 1039 } | |
| 1040 | |
| 1041 # Generate atom path symbols... | |
| 1042 # | |
| 1043 sub _GenerateAtomPathSymbols { | |
| 1044 my($This, $PathAtomsRef) = @_; | |
| 1045 my($Atom, $AtomID, @AtomPathSymbols); | |
| 1046 | |
| 1047 @AtomPathSymbols = (); | |
| 1048 | |
| 1049 if (@{$PathAtomsRef} == 1) { | |
| 1050 $Atom = $PathAtomsRef->[0]; $AtomID = $Atom->GetID(); | |
| 1051 push @AtomPathSymbols, $This->{AssignedAtomTypes}{$AtomID}; | |
| 1052 return \@AtomPathSymbols; | |
| 1053 } | |
| 1054 | |
| 1055 # Ignore bond information... | |
| 1056 if (!$This->{UseBondSymbols}) { | |
| 1057 for $Atom (@{$PathAtomsRef}) { | |
| 1058 $AtomID = $Atom->GetID(); | |
| 1059 push @AtomPathSymbols, $This->{AssignedAtomTypes}{$AtomID}; | |
| 1060 } | |
| 1061 return \@AtomPathSymbols; | |
| 1062 } | |
| 1063 | |
| 1064 # Use atoms and bonds to generate atom path string... | |
| 1065 my($Index, $BondedAtom, $BondedAtomID, $BondAtomID1, $BondAtomID2); | |
| 1066 | |
| 1067 # Process atom type of first atom in path... | |
| 1068 $Atom = $PathAtomsRef->[0]; $AtomID = $Atom->GetID(); | |
| 1069 push @AtomPathSymbols, $This->{AssignedAtomTypes}{$AtomID}; | |
| 1070 | |
| 1071 for $Index (0 .. ($#{$PathAtomsRef} - 1)) { | |
| 1072 $Atom = $PathAtomsRef->[$Index]; $BondedAtom = $PathAtomsRef->[$Index + 1]; | |
| 1073 $AtomID = $Atom->GetID(); $BondedAtomID = $BondedAtom->GetID(); | |
| 1074 | |
| 1075 ($BondAtomID1, $BondAtomID2) = ($AtomID < $BondedAtomID) ? ($AtomID, $BondedAtomID) : ($BondedAtomID, $AtomID); | |
| 1076 push @AtomPathSymbols, $This->{BondSymbols}{$BondAtomID1}{$BondAtomID2}; | |
| 1077 | |
| 1078 # Process atom type of next atom in path... | |
| 1079 push @AtomPathSymbols, $This->{AssignedAtomTypes}{$BondedAtomID}; | |
| 1080 } | |
| 1081 return \@AtomPathSymbols; | |
| 1082 } | |
| 1083 | |
| 1084 # Set final fingerprits... | |
| 1085 # | |
| 1086 sub _SetFinalFingerprints { | |
| 1087 my($This) = @_; | |
| 1088 | |
| 1089 # Mark successful generation of fingerprints... | |
| 1090 $This->{FingerprintsGenerated} = 1; | |
| 1091 | |
| 1092 if ($This->{Type} =~ /^PathLengthBits$/i) { | |
| 1093 $This->_SetFinalFingerprintsBitVector(); | |
| 1094 } | |
| 1095 elsif ($This->{Type} =~ /^PathLengthCount$/i) { | |
| 1096 $This->_SetFinalFingerprintsVector(); | |
| 1097 } | |
| 1098 | |
| 1099 return $This; | |
| 1100 } | |
| 1101 | |
| 1102 # Set final fingerprits bit vector... | |
| 1103 # | |
| 1104 sub _SetFinalFingerprintsBitVector { | |
| 1105 my($This) = @_; | |
| 1106 my($PathLength, $Size, $AtomPathString, $AtomPathHashCode, $AtomPathBitPos, $FingerprintsBitVector, $SkipBitPosCheck, $NumOfBitsToSetPerPath, $SetBitNum); | |
| 1107 | |
| 1108 $FingerprintsBitVector = $This->{FingerprintsBitVector}; | |
| 1109 | |
| 1110 $Size = $This->{Size}; | |
| 1111 | |
| 1112 $SkipBitPosCheck = 1; | |
| 1113 $NumOfBitsToSetPerPath = $This->{NumOfBitsToSetPerPath}; | |
| 1114 | |
| 1115 for $PathLength (keys %{$This->{AtomPathsStrings}}) { | |
| 1116 for $AtomPathString (keys %{$This->{AtomPathsStrings}{$PathLength}}) { | |
| 1117 $AtomPathHashCode = TextUtil::HashCode($AtomPathString); | |
| 1118 | |
| 1119 # Set random number seed... | |
| 1120 if ($This->{UsePerlCoreRandom}) { | |
| 1121 CORE::srand($AtomPathHashCode); | |
| 1122 } | |
| 1123 else { | |
| 1124 MathUtil::srandom($AtomPathHashCode); | |
| 1125 } | |
| 1126 | |
| 1127 for $SetBitNum (1 .. $NumOfBitsToSetPerPath) { | |
| 1128 $AtomPathBitPos = $This->{UsePerlCoreRandom} ? int(CORE::rand($Size)) : int(MathUtil::random($Size)); | |
| 1129 $FingerprintsBitVector->SetBit($AtomPathBitPos, $SkipBitPosCheck); | |
| 1130 } | |
| 1131 } | |
| 1132 } | |
| 1133 return $This; | |
| 1134 } | |
| 1135 | |
| 1136 # Set final fingerprits vector... | |
| 1137 # | |
| 1138 sub _SetFinalFingerprintsVector { | |
| 1139 my($This) = @_; | |
| 1140 my($PathLength, $AtomPathString, $FingerprintsVector, $AtomPathCount, @Values, @ValueIDs); | |
| 1141 | |
| 1142 @Values = (); | |
| 1143 @ValueIDs = (); | |
| 1144 | |
| 1145 for $PathLength (sort { $a <=> $b } keys %{$This->{AtomPathsStrings}}) { | |
| 1146 for $AtomPathString (sort keys %{$This->{AtomPathsStrings}{$PathLength}}) { | |
| 1147 $AtomPathCount = $This->{AtomPathsStrings}{$PathLength}{$AtomPathString}; | |
| 1148 | |
| 1149 push @Values, $AtomPathCount; | |
| 1150 push @ValueIDs, $AtomPathString; | |
| 1151 } | |
| 1152 } | |
| 1153 | |
| 1154 # Add PathLengthIDs and values to fingerprint vector... | |
| 1155 $This->{FingerprintsVector}->AddValueIDs(\@ValueIDs); | |
| 1156 $This->{FingerprintsVector}->AddValues(\@Values); | |
| 1157 | |
| 1158 return $This; | |
| 1159 } | |
| 1160 | |
| 1161 # Cache appropriate molecule data... | |
| 1162 # | |
| 1163 sub _SetupMoleculeDataCache { | |
| 1164 my($This) = @_; | |
| 1165 | |
| 1166 # Get all atoms... | |
| 1167 @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms(); | |
| 1168 | |
| 1169 return $This; | |
| 1170 } | |
| 1171 | |
| 1172 # Clear cached molecule data... | |
| 1173 # | |
| 1174 sub _ClearMoleculeDataCache { | |
| 1175 my($This) = @_; | |
| 1176 | |
| 1177 # Clear atoms... | |
| 1178 @{$This->{Atoms}} = (); | |
| 1179 | |
| 1180 # Clear path atoms.. | |
| 1181 $This->{AtomPathsRef} = ''; | |
| 1182 | |
| 1183 return $This; | |
| 1184 } | |
| 1185 | |
| 1186 # Set atomic invariants to use atom identifiers... | |
| 1187 # | |
| 1188 sub SetAtomicInvariantsToUse { | |
| 1189 my($This, @Values) = @_; | |
| 1190 my($FirstValue, $TypeOfFirstValue, $AtomicInvariant, $SpecifiedAtomicInvariant, $AtomicInvariantValue, @SpecifiedAtomicInvariants, @AtomicInvariantsToUse); | |
| 1191 | |
| 1192 if (!@Values) { | |
| 1193 carp "Warning: ${ClassName}->SetAtomicInvariantsToUse: No values specified..."; | |
| 1194 return; | |
| 1195 } | |
| 1196 | |
| 1197 $FirstValue = $Values[0]; | |
| 1198 $TypeOfFirstValue = ref $FirstValue; | |
| 1199 | |
| 1200 @SpecifiedAtomicInvariants = (); | |
| 1201 @AtomicInvariantsToUse = (); | |
| 1202 | |
| 1203 if ($TypeOfFirstValue =~ /^ARRAY/) { | |
| 1204 push @SpecifiedAtomicInvariants, @{$FirstValue}; | |
| 1205 } | |
| 1206 else { | |
| 1207 push @SpecifiedAtomicInvariants, @Values; | |
| 1208 } | |
| 1209 | |
| 1210 # Make sure specified AtomicInvariants are valid... | |
| 1211 for $SpecifiedAtomicInvariant (@SpecifiedAtomicInvariants) { | |
| 1212 if (!AtomTypes::AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($SpecifiedAtomicInvariant)) { | |
| 1213 croak "Error: ${ClassName}->SetAtomicInvariantsToUse: Specified atomic invariant, $SpecifiedAtomicInvariant, is not supported...\n "; | |
| 1214 } | |
| 1215 $AtomicInvariant = $SpecifiedAtomicInvariant; | |
| 1216 push @AtomicInvariantsToUse, $AtomicInvariant; | |
| 1217 } | |
| 1218 | |
| 1219 # Set atomic invariants to use... | |
| 1220 @{$This->{AtomicInvariantsToUse}} = (); | |
| 1221 push @{$This->{AtomicInvariantsToUse}}, @AtomicInvariantsToUse; | |
| 1222 | |
| 1223 return $This; | |
| 1224 } | |
| 1225 | |
| 1226 # Set functional classes to use for atom identifiers... | |
| 1227 # | |
| 1228 sub SetFunctionalClassesToUse { | |
| 1229 my($This, @Values) = @_; | |
| 1230 my($FirstValue, $TypeOfFirstValue, $FunctionalClass, $SpecifiedFunctionalClass, @SpecifiedFunctionalClasses, @FunctionalClassesToUse); | |
| 1231 | |
| 1232 if (!@Values) { | |
| 1233 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: No values specified..."; | |
| 1234 return; | |
| 1235 } | |
| 1236 | |
| 1237 if ($This->{AtomIdentifierType} !~ /^FunctionalClassAtomTypes$/i) { | |
| 1238 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: FunctionalClassesToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}..."; | |
| 1239 return; | |
| 1240 } | |
| 1241 | |
| 1242 $FirstValue = $Values[0]; | |
| 1243 $TypeOfFirstValue = ref $FirstValue; | |
| 1244 | |
| 1245 @SpecifiedFunctionalClasses = (); | |
| 1246 @FunctionalClassesToUse = (); | |
| 1247 | |
| 1248 if ($TypeOfFirstValue =~ /^ARRAY/) { | |
| 1249 push @SpecifiedFunctionalClasses, @{$FirstValue}; | |
| 1250 } | |
| 1251 else { | |
| 1252 push @SpecifiedFunctionalClasses, @Values; | |
| 1253 } | |
| 1254 | |
| 1255 # Make sure specified FunctionalClasses are valid... | |
| 1256 for $SpecifiedFunctionalClass (@SpecifiedFunctionalClasses) { | |
| 1257 if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedFunctionalClass)) { | |
| 1258 croak "Error: ${ClassName}->SetFunctionalClassesToUse: Specified functional class, $SpecifiedFunctionalClass, is not supported...\n "; | |
| 1259 } | |
| 1260 push @FunctionalClassesToUse, $SpecifiedFunctionalClass; | |
| 1261 } | |
| 1262 | |
| 1263 # Set functional classes to use... | |
| 1264 @{$This->{FunctionalClassesToUse}} = (); | |
| 1265 push @{$This->{FunctionalClassesToUse}}, @FunctionalClassesToUse; | |
| 1266 | |
| 1267 return $This; | |
| 1268 } | |
| 1269 | |
| 1270 # Initialize atom indentifier type information... | |
| 1271 # | |
| 1272 # Current supported values: | |
| 1273 # | |
| 1274 # AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, | |
| 1275 # MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes | |
| 1276 # | |
| 1277 sub _InitializeAtomIdentifierTypeInformation { | |
| 1278 my($This) = @_; | |
| 1279 | |
| 1280 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { | |
| 1281 $This->_InitializeAtomicInvariantsAtomTypesInformation(); | |
| 1282 } | |
| 1283 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { | |
| 1284 $This->_InitializeFunctionalClassAtomTypesInformation(); | |
| 1285 } | |
| 1286 elsif ($This->{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { | |
| 1287 # Nothing to do for now... | |
| 1288 } | |
| 1289 else { | |
| 1290 croak "Error: ${ClassName}->_InitializeAtomIdentifierTypeInformation: Unknown atom indentifier type $This->{AtomIdentifierType}..."; | |
| 1291 } | |
| 1292 | |
| 1293 return $This; | |
| 1294 } | |
| 1295 | |
| 1296 # Initialize atomic invariants atom types to use for generating atom identifiers... | |
| 1297 # | |
| 1298 # Let: | |
| 1299 # AS = Atom symbol corresponding to element symbol | |
| 1300 # | |
| 1301 # X<n> = Number of non-hydrogen atom neighbors or heavy atoms attached to atom | |
| 1302 # BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms attached to atom | |
| 1303 # LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms attached to atom | |
| 1304 # SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms attached to atom | |
| 1305 # DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms attached to atom | |
| 1306 # TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms attached to atom | |
| 1307 # H<n> = Number of implicit and explicit hydrogens for atom | |
| 1308 # Ar = Aromatic annotation indicating whether atom is aromatic | |
| 1309 # RA = Ring atom annotation indicating whether atom is a ring | |
| 1310 # FC<+n/-n> = Formal charge assigned to atom | |
| 1311 # MN<n> = Mass number indicating isotope other than most abundant isotope | |
| 1312 # SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or 3 (triplet) | |
| 1313 # | |
| 1314 # Then: | |
| 1315 # | |
| 1316 # Atom type generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to: | |
| 1317 # | |
| 1318 # AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n> | |
| 1319 # | |
| 1320 # Except for AS which is a required atomic invariant in atom types, all other atomic invariants are | |
| 1321 # optional. Default atomic invariants used for AtomID are: AS, X<n>, BO<n>, H<n>, FC<+n/-n>. | |
| 1322 # AtomID specification doesn't include atomic invariants with zero or undefined values. | |
| 1323 # | |
| 1324 sub _InitializeAtomicInvariantsAtomTypesInformation { | |
| 1325 my($This) = @_; | |
| 1326 | |
| 1327 # Default atomic invariants to use for generating atom neighborhood atom IDs: AS, X, BO, H, FC | |
| 1328 # | |
| 1329 @{$This->{AtomicInvariantsToUse}} = (); | |
| 1330 @{$This->{AtomicInvariantsToUse}} = ('AS', 'X', 'BO', 'H', 'FC'); | |
| 1331 | |
| 1332 return $This; | |
| 1333 } | |
| 1334 | |
| 1335 # Initialize functional class atom types, generated by AtomTypes::FunctionalClassAtomTypes | |
| 1336 # class, to use for generating atom identifiers... | |
| 1337 # | |
| 1338 # Let: | |
| 1339 # HBD: HydrogenBondDonor | |
| 1340 # HBA: HydrogenBondAcceptor | |
| 1341 # PI : PositivelyIonizable | |
| 1342 # NI : NegativelyIonizable | |
| 1343 # Ar : Aromatic | |
| 1344 # Hal : Halogen | |
| 1345 # H : Hydrophobic | |
| 1346 # RA : RingAtom | |
| 1347 # CA : ChainAtom | |
| 1348 # | |
| 1349 # Then: | |
| 1350 # | |
| 1351 # Functiononal class atom type specification for an atom corresponds to: | |
| 1352 # | |
| 1353 # Ar.CA.H.HBA.HBD.Hal.NI.PI.RA | |
| 1354 # | |
| 1355 # Default functional classes used are: HBD, HBA, PI, NI, Ar, Hal | |
| 1356 # | |
| 1357 # FunctionalAtomTypes are assigned using the following definitions [ Ref 60-61, Ref 65-66 ]: | |
| 1358 # | |
| 1359 # HydrogenBondDonor: NH, NH2, OH | |
| 1360 # HydrogenBondAcceptor: N[!H], O | |
| 1361 # PositivelyIonizable: +, NH2 | |
| 1362 # NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH | |
| 1363 # | |
| 1364 sub _InitializeFunctionalClassAtomTypesInformation { | |
| 1365 my($This) = @_; | |
| 1366 | |
| 1367 # Default functional class atom typess to use for generating atom identifiers | |
| 1368 # are: HBD, HBA, PI, NI, Ar, Hal | |
| 1369 # | |
| 1370 @{$This->{FunctionalClassesToUse}} = (); | |
| 1371 @{$This->{FunctionalClassesToUse}} = ('HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal'); | |
| 1372 | |
| 1373 return $This; | |
| 1374 } | |
| 1375 | |
| 1376 # Return a string containg data for PathLengthFingerprints object... | |
| 1377 # | |
| 1378 sub StringifyPathLengthFingerprints { | |
| 1379 my($This) = @_; | |
| 1380 my($PathLengthsFingerprintsString); | |
| 1381 | |
| 1382 # Type of fingerprint... | |
| 1383 $PathLengthsFingerprintsString = "Fingerprint type: $This->{Type}; AtomIdentifierType: $This->{AtomIdentifierType}"; | |
| 1384 | |
| 1385 # Path length... | |
| 1386 $PathLengthsFingerprintsString .= "; MinPathLength: $This->{MinLength}; MaxPathLength: $This->{MaxLength}"; | |
| 1387 | |
| 1388 # Fingerprint generation control... | |
| 1389 my($AllowSharedBonds, $AllowRings, $UseBondSymbols, $UseUniquePaths); | |
| 1390 | |
| 1391 $AllowSharedBonds = $This->{AllowSharedBonds} ? "Yes" : "No"; | |
| 1392 $AllowRings = $This->{AllowRings} ? "Yes" : "No"; | |
| 1393 $UseBondSymbols = $This->{UseBondSymbols} ? "Yes" : "No"; | |
| 1394 $UseUniquePaths = $This->{UseBondSymbols} ? "Yes" : "No"; | |
| 1395 | |
| 1396 $PathLengthsFingerprintsString .= "; UseUniquePaths: $UseUniquePaths; AllowSharedBonds: $AllowSharedBonds; AllowRings: $AllowRings; UseBondSymbols: $UseBondSymbols"; | |
| 1397 | |
| 1398 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { | |
| 1399 my($AtomicInvariant, @AtomicInvariants, @AtomicInvariantsOrder, %AvailableAtomicInvariants); | |
| 1400 | |
| 1401 @AtomicInvariantsOrder = AtomTypes::AtomicInvariantsAtomTypes::GetAtomicInvariantsOrder(); | |
| 1402 %AvailableAtomicInvariants = AtomTypes::AtomicInvariantsAtomTypes::GetAvailableAtomicInvariants(); | |
| 1403 | |
| 1404 for $AtomicInvariant (@AtomicInvariantsOrder) { | |
| 1405 push @AtomicInvariants, "$AtomicInvariant: $AvailableAtomicInvariants{$AtomicInvariant}"; | |
| 1406 } | |
| 1407 | |
| 1408 $PathLengthsFingerprintsString .= "; AtomicInvariantsToUse: <" . TextUtil::JoinWords(\@{$This->{AtomicInvariantsToUse}}, ", ", 0) . ">"; | |
| 1409 $PathLengthsFingerprintsString .= "; AtomicInvariantsOrder: <" . TextUtil::JoinWords(\@AtomicInvariantsOrder, ", ", 0) . ">"; | |
| 1410 $PathLengthsFingerprintsString .= "; AvailableAtomicInvariants: <" . TextUtil::JoinWords(\@AtomicInvariants, ", ", 0) . ">"; | |
| 1411 } | |
| 1412 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { | |
| 1413 my($FunctionalClass, @FunctionalClasses, @FunctionalClassesOrder, %AvailableFunctionalClasses); | |
| 1414 | |
| 1415 @FunctionalClassesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder(); | |
| 1416 %AvailableFunctionalClasses = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses(); | |
| 1417 | |
| 1418 for $FunctionalClass (@FunctionalClassesOrder) { | |
| 1419 push @FunctionalClasses, "$FunctionalClass: $AvailableFunctionalClasses{$FunctionalClass}"; | |
| 1420 } | |
| 1421 | |
| 1422 $PathLengthsFingerprintsString .= "; FunctionalClassesToUse: <" . TextUtil::JoinWords(\@{$This->{FunctionalClassesToUse}}, ", ", 0) . ">"; | |
| 1423 $PathLengthsFingerprintsString .= "; FunctionalClassesOrder: <" . TextUtil::JoinWords(\@FunctionalClassesOrder, ", ", 0) . ">"; | |
| 1424 $PathLengthsFingerprintsString .= "; AvailableFunctionalClasses: <" . TextUtil::JoinWords(\@FunctionalClasses, ", ", 0) . ">"; | |
| 1425 } | |
| 1426 | |
| 1427 if ($This->{Type} =~ /^PathLengthBits$/i) { | |
| 1428 # Size... | |
| 1429 $PathLengthsFingerprintsString .= "; Size: $This->{Size}; MinSize: $This->{MinSize}; MaxSize: $This->{MaxSize}"; | |
| 1430 | |
| 1431 # NumOfBitsToSetPerPath... | |
| 1432 $PathLengthsFingerprintsString .= "; NumOfBitsToSetPerPath: $This->{NumOfBitsToSetPerPath}"; | |
| 1433 | |
| 1434 # Fingerprint bit density and num of bits set... | |
| 1435 my($NumOfSetBits, $BitDensity); | |
| 1436 $NumOfSetBits = $This->{FingerprintsBitVector}->GetNumOfSetBits(); | |
| 1437 $BitDensity = $This->{FingerprintsBitVector}->GetFingerprintsBitDensity(); | |
| 1438 $PathLengthsFingerprintsString .= "; NumOfOnBits: $NumOfSetBits; BitDensity: $BitDensity"; | |
| 1439 | |
| 1440 $PathLengthsFingerprintsString .= "; FingerprintsBitVector: < $This->{FingerprintsBitVector} >"; | |
| 1441 } | |
| 1442 elsif ($This->{Type} =~ /^PathLengthCount$/i) { | |
| 1443 $PathLengthsFingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >"; | |
| 1444 } | |
| 1445 | |
| 1446 return $PathLengthsFingerprintsString; | |
| 1447 } | |
| 1448 | |
| 1449 1; | |
| 1450 | |
| 1451 __END__ | |
| 1452 | |
| 1453 =head1 NAME | |
| 1454 | |
| 1455 PathLengthFingerprints | |
| 1456 | |
| 1457 =head1 SYNOPSIS | |
| 1458 | |
| 1459 use Fingerprints::PathLengthFingerprints; | |
| 1460 | |
| 1461 use Fingerprints::PathLengthFingerprints qw(:all); | |
| 1462 | |
| 1463 =head1 DESCRIPTION | |
| 1464 | |
| 1465 B<PathLengthFingerprints> class provides the following methods: | |
| 1466 | |
| 1467 new, GenerateFingerprints, , GetDescription, SetAtomIdentifierType, | |
| 1468 SetAtomicInvariantsToUse, SetFunctionalClassesToUse, SetMaxLength, | |
| 1469 SetMinLength, SetNumOfBitsToSetPerPath, SetType, | |
| 1470 StringifyPathLengthFingerprints | |
| 1471 | |
| 1472 B<PathLengthFingerprints> is derived from B<Fingerprints> class which in turn | |
| 1473 is derived from B<ObjectProperty> base class that provides methods not explicitly defined | |
| 1474 in B<PathLengthFingerprints>, B<Fingerprints> or B<ObjectProperty> classes using Perl's | |
| 1475 AUTOLOAD functionality. These methods are generated on-the-fly for a specified object property: | |
| 1476 | |
| 1477 Set<PropertyName>(<PropertyValue>); | |
| 1478 $PropertyValue = Get<PropertyName>(); | |
| 1479 Delete<PropertyName>(); | |
| 1480 | |
| 1481 The current release of MayaChemTools supports generation of B<AtomTypesFingerpritns> | |
| 1482 corresponding to following B<AtomtomIdentifierTypes>: | |
| 1483 | |
| 1484 AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, | |
| 1485 FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, | |
| 1486 SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes | |
| 1487 | |
| 1488 Based on the values specified for B<Type>, B<AtomtomIdentifierTypes>, B<MinPathLength> and | |
| 1489 B<MaxPathLength>, all appropriate atom paths are generated for each atom in the molecule | |
| 1490 and collected in a list and the list is filtered to remove any structurally duplicate paths as | |
| 1491 indicated by the value of B<UseUniquePaths>. | |
| 1492 | |
| 1493 For molecules containing rings, atom paths starting from each atom can be traversed in four | |
| 1494 different ways: | |
| 1495 | |
| 1496 o Atom paths without any rings and sharing of bonds in traversed paths. | |
| 1497 o Atom paths containing rings and without any sharing of bonds in | |
| 1498 traversed paths | |
| 1499 o All possible atom paths without any rings and sharing of bonds in | |
| 1500 traversed paths | |
| 1501 o All possible atom paths containing rings and with sharing of bonds in | |
| 1502 traversed paths. | |
| 1503 | |
| 1504 Atom path traversal is terminated at the last ring atom. For molecules containing no rings, | |
| 1505 first two and last two types described above are equivalent. | |
| 1506 | |
| 1507 B<AllowSharedBonds> and B<AllowRings> allow generation of different types of paths | |
| 1508 to be used for fingerprints generation. | |
| 1509 | |
| 1510 The combination of B<AllowSharedBonds>, B<AllowRings>, and B<UseBondSymbols> allows generation of | |
| 1511 8 different types of path length fingerprints: | |
| 1512 | |
| 1513 AllowSharedBonds AllowRings UseBondSymbols | |
| 1514 | |
| 1515 0 0 1 - AtomPathsNoCyclesWithBondSymbols | |
| 1516 0 1 1 - AtomPathsWithCyclesWithBondSymbols | |
| 1517 | |
| 1518 1 0 1 - AllAtomPathsNoCyclesWithBondSymbols | |
| 1519 1 1 1 - AllAtomPathsWithCyclesWithBondSymbols | |
| 1520 [ DEFAULT ] | |
| 1521 | |
| 1522 0 0 0 - AtomPathsNoCyclesNoBondSymbols | |
| 1523 0 1 0 - AtomPathsWithCyclesNoBondSymbols | |
| 1524 | |
| 1525 1 0 0 - AllAtomPathsNoCyclesNoBondSymbols | |
| 1526 1 1 0 - AllAtomPathsWithCyclesNoWithBondSymbols | |
| 1527 | |
| 1528 Additionally, possible values for option B<--AtomIdentifierType> in conjunction with corresponding | |
| 1529 specified values for B<AtomicInvariantsToUse> and B<FunctionalClassesToUse > changes the nature | |
| 1530 of atom path length strings and the fingerprints. | |
| 1531 | |
| 1532 For each atom path in the filtered atom paths list, an atom path string is created using value of | |
| 1533 B<AtomIdentifierType> and specified values to use for a particular atom identifier type. | |
| 1534 Value of B<UseBondSymbols> controls whether bond order symbols are used during generation | |
| 1535 of atom path string. Atom symbol corresponds to element symbol and characters used to represent | |
| 1536 bond order are: I<1 - None; 2 - '='; 3 - '#'; 1.5 or aromatic - ':'; others: bond order value>. By default, | |
| 1537 bond symbols are included in atom path strings. Exclusion of bond symbols in atom path strings | |
| 1538 results in fingerprints which correspond purely to atom paths without considering bonds. | |
| 1539 | |
| 1540 B<UseUniquePaths> controls the removal of structurally duplicate atom path strings are removed | |
| 1541 from the list. | |
| 1542 | |
| 1543 For I<PathLengthBits> value of B<Type>, each atom path is hashed to a 32 bit unsigned | |
| 1544 integer key using B<TextUtil::HashCode> function. Using the hash key as a seed for a random number | |
| 1545 generator, a random integer value between 0 and B<Size> is used to set corresponding bits | |
| 1546 in the fingerprint bit-vector string. Value of B<NumOfBitsToSetPerPaths> option controls the number | |
| 1547 of time a random number is generated to set corresponding bits. | |
| 1548 | |
| 1549 For I< PathLengthCount> value of B<Type>n, the number of times an atom path appears | |
| 1550 is tracked and a fingerprints count-string corresponding to count of atom paths is generated. | |
| 1551 | |
| 1552 The current release of MayaChemTools generates the following types of path length | |
| 1553 fingerprints bit-vector and vector strings: | |
| 1554 | |
| 1555 FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes:MinLeng | |
| 1556 th1:MaxLength8;1024;BinaryString;Ascending;001000010011010101011000110 | |
| 1557 0100010101011000101001011100110001000010001001101000001001001001001000 | |
| 1558 0010110100000111001001000001001010100100100000000011000000101001011100 | |
| 1559 0010000001000101010100000100111100110111011011011000000010110111001101 | |
| 1560 0101100011000000010001000011000010100011101100001000001000100000000... | |
| 1561 | |
| 1562 FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes:MinLeng | |
| 1563 th1:MaxLength8;1024;HexadecimalString;Ascending;48caa1315d82d91122b029 | |
| 1564 42861c9409a4208182d12015509767bd0867653604481a8b1288000056090583603078 | |
| 1565 9cedae54e26596889ab121309800900490515224208421502120a0dd9200509723ae89 | |
| 1566 00024181b86c0122821d4e4880c38620dab280824b455404009f082003d52c212b4e6d | |
| 1567 6ea05280140069c780290c43 | |
| 1568 | |
| 1569 FingerprintsVector;PathLengthCount:AtomicInvariantsAtomTypes:MinLength | |
| 1570 1:MaxLength8;432;NumericalValues;IDsAndValuesPairsString;C.X1.BO1.H3 2 | |
| 1571 C.X2.BO2.H2 4 C.X2.BO3.H1 14 C.X3.BO3.H1 3 C.X3.BO4 10 F.X1.BO1 1 N.X | |
| 1572 2.BO2.H1 1 N.X3.BO3 1 O.X1.BO1.H1 3 O.X1.BO2 2 C.X1.BO1.H3C.X3.BO3.H1 | |
| 1573 2 C.X2.BO2.H2C.X2.BO2.H2 1 C.X2.BO2.H2C.X3.BO3.H1 4 C.X2.BO2.H2C.X3.BO | |
| 1574 4 1 C.X2.BO2.H2N.X3.BO3 1 C.X2.BO3.H1:C.X2.BO3.H1 10 C.X2.BO3.H1:C.... | |
| 1575 | |
| 1576 FingerprintsVector;PathLengthCount:DREIDINGAtomTypes:MinLength1:MaxLen | |
| 1577 gth8;410;NumericalValues;IDsAndValuesPairsString;C_2 2 C_3 9 C_R 22 F_ | |
| 1578 1 N_3 1 N_R 1 O_2 2 O_3 3 C_2=O_2 2 C_2C_3 1 C_2C_R 1 C_2N_3 1 C_2O_3 | |
| 1579 1 C_3C_3 7 C_3C_R 1 C_3N_R 1 C_3O_3 2 C_R:C_R 21 C_R:N_R 2 C_RC_R 2 C | |
| 1580 _RF_ 1 C_RN_3 1 C_2C_3C_3 1 C_2C_R:C_R 2 C_2N_3C_R 1 C_3C_2=O_2 1 C_3C | |
| 1581 _2O_3 1 C_3C_3C_3 5 C_3C_3C_R 2 C_3C_3N_R 1 C_3C_3O_3 4 C_3C_R:C_R ... | |
| 1582 | |
| 1583 FingerprintsVector;PathLengthCount:EStateAtomTypes:MinLength1:MaxLengt | |
| 1584 h8;454;NumericalValues;IDsAndValuesPairsString;aaCH 14 aasC 8 aasN 1 d | |
| 1585 O 2 dssC 2 sCH3 2 sF 1 sOH 3 ssCH2 4 ssNH 1 sssCH 3 aaCH:aaCH 10 aaCH: | |
| 1586 aasC 8 aasC:aasC 3 aasC:aasN 2 aasCaasC 2 aasCdssC 1 aasCsF 1 aasCssNH | |
| 1587 1 aasCsssCH 1 aasNssCH2 1 dO=dssC 2 dssCsOH 1 dssCssCH2 1 dssCssNH 1 | |
| 1588 sCH3sssCH 2 sOHsssCH 2 ssCH2ssCH2 1 ssCH2sssCH 4 aaCH:aaCH:aaCH 6 a... | |
| 1589 | |
| 1590 FingerprintsVector;PathLengthCount:FunctionalClassAtomTypes:MinLength1 | |
| 1591 :MaxLength8;404;NumericalValues;IDsAndValuesPairsString;Ar 22 Ar.HBA 1 | |
| 1592 HBA 2 HBA.HBD 3 HBD 1 Hal 1 NI 1 None 10 Ar.HBA:Ar 2 Ar.HBANone 1 Ar: | |
| 1593 Ar 21 ArAr 2 ArHBD 1 ArHal 1 ArNone 2 HBA.HBDNI 1 HBA.HBDNone 2 HBA=NI | |
| 1594 1 HBA=None 1 HBDNone 1 NINone 1 NoneNone 7 Ar.HBA:Ar:Ar 2 Ar.HBA:ArAr | |
| 1595 1 Ar.HBA:ArNone 1 Ar.HBANoneNone 1 Ar:Ar.HBA:Ar 1 Ar:Ar.HBANone 2 ... | |
| 1596 | |
| 1597 FingerprintsVector;PathLengthCount:MMFF94AtomTypes:MinLength1:MaxLengt | |
| 1598 h8;463;NumericalValues;IDsAndValuesPairsString;C5A 2 C5B 2 C=ON 1 CB 1 | |
| 1599 8 COO 1 CR 9 F 1 N5 1 NC=O 1 O=CN 1 O=CO 1 OC=O 1 OR 2 C5A:C5B 2 C5A:N | |
| 1600 5 2 C5ACB 1 C5ACR 1 C5B:C5B 1 C5BC=ON 1 C5BCB 1 C=ON=O=CN 1 C=ONNC=O 1 | |
| 1601 CB:CB 18 CBF 1 CBNC=O 1 COO=O=CO 1 COOCR 1 COOOC=O 1 CRCR 7 CRN5 1 CR | |
| 1602 OR 2 C5A:C5B:C5B 2 C5A:C5BC=ON 1 C5A:C5BCB 1 C5A:N5:C5A 1 C5A:N5CR ... | |
| 1603 | |
| 1604 FingerprintsVector;PathLengthCount:SLogPAtomTypes:MinLength1:MaxLength | |
| 1605 8;518;NumericalValues;IDsAndValuesPairsString;C1 5 C10 1 C11 1 C14 1 C | |
| 1606 18 14 C20 4 C21 2 C22 1 C5 2 CS 2 F 1 N11 1 N4 1 O10 1 O2 3 O9 1 C10C1 | |
| 1607 1 C10N11 1 C11C1 2 C11C21 1 C14:C18 2 C14F 1 C18:C18 10 C18:C20 4 C18 | |
| 1608 :C22 2 C1C5 1 C1CS 4 C20:C20 1 C20:C21 1 C20:N11 1 C20C20 2 C21:C21 1 | |
| 1609 C21:N11 1 C21C5 1 C22N4 1 C5=O10 1 C5=O9 1 C5N4 1 C5O2 1 CSO2 2 C10... | |
| 1610 | |
| 1611 FingerprintsVector;PathLengthCount:SYBYLAtomTypes:MinLength1:MaxLength | |
| 1612 8;412;NumericalValues;IDsAndValuesPairsString;C.2 2 C.3 9 C.ar 22 F 1 | |
| 1613 N.am 1 N.ar 1 O.2 1 O.3 2 O.co2 2 C.2=O.2 1 C.2=O.co2 1 C.2C.3 1 C.2C. | |
| 1614 ar 1 C.2N.am 1 C.2O.co2 1 C.3C.3 7 C.3C.ar 1 C.3N.ar 1 C.3O.3 2 C.ar:C | |
| 1615 .ar 21 C.ar:N.ar 2 C.arC.ar 2 C.arF 1 C.arN.am 1 C.2C.3C.3 1 C.2C.ar:C | |
| 1616 .ar 2 C.2N.amC.ar 1 C.3C.2=O.co2 1 C.3C.2O.co2 1 C.3C.3C.3 5 C.3C.3... | |
| 1617 | |
| 1618 FingerprintsVector;PathLengthCount:TPSAAtomTypes:MinLength1:MaxLength8 | |
| 1619 ;331;NumericalValues;IDsAndValuesPairsString;N21 1 N7 1 None 34 O3 2 O | |
| 1620 4 3 N21:None 2 N21None 1 N7None 2 None:None 21 None=O3 2 NoneNone 13 N | |
| 1621 oneO4 3 N21:None:None 2 N21:NoneNone 2 N21NoneNone 1 N7None:None 2 N7N | |
| 1622 one=O3 1 N7NoneNone 1 None:N21:None 1 None:N21None 2 None:None:None 20 | |
| 1623 None:NoneNone 12 NoneN7None 1 NoneNone=O3 2 NoneNoneNone 8 NoneNon... | |
| 1624 | |
| 1625 FingerprintsVector;PathLengthCount:UFFAtomTypes:MinLength1:MaxLength8; | |
| 1626 410;NumericalValues;IDsAndValuesPairsString;C_2 2 C_3 9 C_R 22 F_ 1 N_ | |
| 1627 3 1 N_R 1 O_2 2 O_3 3 C_2=O_2 2 C_2C_3 1 C_2C_R 1 C_2N_3 1 C_2O_3 1 C_ | |
| 1628 3C_3 7 C_3C_R 1 C_3N_R 1 C_3O_3 2 C_R:C_R 21 C_R:N_R 2 C_RC_R 2 C_RF_ | |
| 1629 1 C_RN_3 1 C_2C_3C_3 1 C_2C_R:C_R 2 C_2N_3C_R 1 C_3C_2=O_2 1 C_3C_2O_3 | |
| 1630 1 C_3C_3C_3 5 C_3C_3C_R 2 C_3C_3N_R 1 C_3C_3O_3 4 C_3C_R:C_R 1 C_3... | |
| 1631 | |
| 1632 =head2 METHODS | |
| 1633 | |
| 1634 =over 4 | |
| 1635 | |
| 1636 =item B<new> | |
| 1637 | |
| 1638 $NewPathLengthFingerprints = new PathLengthFingerprints( | |
| 1639 %NamesAndValues); | |
| 1640 | |
| 1641 Using specified I<PathLengthFingerprints> property names and values hash, B<new> method creates a new object | |
| 1642 and returns a reference to newly created B<PathLengthFingerprints> object. By default, the following properties are | |
| 1643 initialized: | |
| 1644 | |
| 1645 Molecule = ''; | |
| 1646 Type = '' | |
| 1647 Size = 1024 | |
| 1648 MinSize = 32 | |
| 1649 MaxSize = 2**32 | |
| 1650 NumOfBitsToSetPerPath = 1 | |
| 1651 MinLength = 1 | |
| 1652 MaxLength = 8 | |
| 1653 AllowSharedBonds = 1 | |
| 1654 AllowRings = 1 | |
| 1655 UseBondSymbols = 1 | |
| 1656 UseUniquePaths = '' | |
| 1657 AtomIdentifierType = '' | |
| 1658 SetAtomicInvariantsToUse = ['AS'] | |
| 1659 FunctionalClassesToUse = ['HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal'] | |
| 1660 | |
| 1661 Examples: | |
| 1662 | |
| 1663 $PathLengthFingerprints = new PathLengthFingerprints( | |
| 1664 'Molecule' => $Molecule, | |
| 1665 'Type' => 'PathLengthBits', | |
| 1666 'AtomIdentifierType' = | |
| 1667 'AtomicInvariantsAtomTypes'); | |
| 1668 | |
| 1669 $PathLengthFingerprints = new PathLengthFingerprints( | |
| 1670 'Molecule' => $Molecule, | |
| 1671 'Type' => 'PathLengthBits', | |
| 1672 'Size' => 1024, | |
| 1673 'MinLength' => 1, | |
| 1674 'MaxLength' => 8, | |
| 1675 'AllowRings' => 1, | |
| 1676 'AllowSharedBonds' => 1, | |
| 1677 'UseBondSymbols' => 1, | |
| 1678 'UseUniquePaths' => 1, | |
| 1679 'AtomIdentifierType' = | |
| 1680 'AtomicInvariantsAtomTypes', | |
| 1681 'AtomicInvariantsToUse' => ['AS']); | |
| 1682 | |
| 1683 $PathLengthFingerprints = new PathLengthFingerprints( | |
| 1684 'Molecule' => $Molecule, | |
| 1685 'Type' => 'PathLengthCount', | |
| 1686 'MinLength' => 1, | |
| 1687 'MaxLength' => 8, | |
| 1688 'AllowRings' => 1, | |
| 1689 'AllowSharedBonds' => 1, | |
| 1690 'UseBondSymbols' => 1, | |
| 1691 'UseUniquePaths' => 1, | |
| 1692 'AtomIdentifierType' => | |
| 1693 'AtomicInvariantsAtomTypes', | |
| 1694 'AtomicInvariantsToUse' => ['AS']); | |
| 1695 | |
| 1696 $PathLengthFingerprints = new PathLengthFingerprints( | |
| 1697 'Molecule' => $Molecule, | |
| 1698 'Type' => 'PathLengthBits', | |
| 1699 'AtomIdentifierType' = | |
| 1700 'SLogPAtomTypes'); | |
| 1701 | |
| 1702 $PathLengthFingerprints = new PathLengthFingerprints( | |
| 1703 'Molecule' => $Molecule, | |
| 1704 'Type' => 'PathLengthCount', | |
| 1705 'AtomIdentifierType' = | |
| 1706 'SYBYLAtomTypes'); | |
| 1707 | |
| 1708 $PathLengthFingerprints = new PathLengthFingerprints( | |
| 1709 'Molecule' => $Molecule, | |
| 1710 'Type' => 'PathLengthBits', | |
| 1711 'AtomIdentifierType' = | |
| 1712 'FunctionalClassAtomTypes', | |
| 1713 'FunctionalClassesToUse' => ['HBD', 'HBA', 'Ar']); | |
| 1714 | |
| 1715 $PathLengthFingerprints->GenerateFingerprints(); | |
| 1716 print "$PathLengthFingerprints\n"; | |
| 1717 | |
| 1718 =item B<GetDescription> | |
| 1719 | |
| 1720 $Description = $PathLengthFingerprints->GetDescription(); | |
| 1721 | |
| 1722 Returns a string containing description of path length fingerprints. | |
| 1723 | |
| 1724 =item B<GenerateFingerprints> | |
| 1725 | |
| 1726 $PathLengthFingerprints->GenerateFingerprints(); | |
| 1727 | |
| 1728 Generates path length fingerprints and returns I<PathLengthFingerprints>. | |
| 1729 | |
| 1730 =item B<SetMaxLength> | |
| 1731 | |
| 1732 $PathLengthFingerprints->SetMaxLength($Length); | |
| 1733 | |
| 1734 Sets maximum value of atom path length to be used during atom path length fingerprints | |
| 1735 generation and returns I<PathLengthFingerprints> | |
| 1736 | |
| 1737 =item B<SetAtomIdentifierType> | |
| 1738 | |
| 1739 $PathLengthFingerprints->SetAtomIdentifierType(); | |
| 1740 | |
| 1741 Sets atom I<IdentifierType> to use during path length fingerprints generation and | |
| 1742 returns I<PathLengthFingerprints>. | |
| 1743 | |
| 1744 Possible values: I<AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, | |
| 1745 FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, | |
| 1746 TPSAAtomTypes, UFFAtomTypes>. | |
| 1747 | |
| 1748 =item B<SetAtomicInvariantsToUse> | |
| 1749 | |
| 1750 $PathLengthFingerprints->SetAtomicInvariantsToUse($ValuesRef); | |
| 1751 $PathLengthFingerprints->SetAtomicInvariantsToUse(@Values); | |
| 1752 | |
| 1753 Sets atomic invariants to use during I<AtomicInvariantsAtomTypes> value of I<AtomIdentifierType> | |
| 1754 for path length fingerprints generation and returns I<PathLengthFingerprints>. | |
| 1755 | |
| 1756 Possible values for atomic invariants are: I<AS, X, BO, LBO, SB, DB, TB, | |
| 1757 H, Ar, RA, FC, MN, SM>. Default value: I<AS>. | |
| 1758 | |
| 1759 The atomic invariants abbreviations correspond to: | |
| 1760 | |
| 1761 AS = Atom symbol corresponding to element symbol | |
| 1762 | |
| 1763 X<n> = Number of non-hydrogen atom neighbors or heavy atoms | |
| 1764 BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms | |
| 1765 LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms | |
| 1766 SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms | |
| 1767 DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms | |
| 1768 TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms | |
| 1769 H<n> = Number of implicit and explicit hydrogens for atom | |
| 1770 Ar = Aromatic annotation indicating whether atom is aromatic | |
| 1771 RA = Ring atom annotation indicating whether atom is a ring | |
| 1772 FC<+n/-n> = Formal charge assigned to atom | |
| 1773 MN<n> = Mass number indicating isotope other than most abundant isotope | |
| 1774 SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or | |
| 1775 3 (triplet) | |
| 1776 | |
| 1777 Atom type generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to: | |
| 1778 | |
| 1779 AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n> | |
| 1780 | |
| 1781 Except for AS which is a required atomic invariant in atom types, all other atomic invariants are | |
| 1782 optional. Atom type specification doesn't include atomic invariants with zero or undefined values. | |
| 1783 | |
| 1784 In addition to usage of abbreviations for specifying atomic invariants, the following descriptive words | |
| 1785 are also allowed: | |
| 1786 | |
| 1787 X : NumOfNonHydrogenAtomNeighbors or NumOfHeavyAtomNeighbors | |
| 1788 BO : SumOfBondOrdersToNonHydrogenAtoms or SumOfBondOrdersToHeavyAtoms | |
| 1789 LBO : LargestBondOrderToNonHydrogenAtoms or LargestBondOrderToHeavyAtoms | |
| 1790 SB : NumOfSingleBondsToNonHydrogenAtoms or NumOfSingleBondsToHeavyAtoms | |
| 1791 DB : NumOfDoubleBondsToNonHydrogenAtoms or NumOfDoubleBondsToHeavyAtoms | |
| 1792 TB : NumOfTripleBondsToNonHydrogenAtoms or NumOfTripleBondsToHeavyAtoms | |
| 1793 H : NumOfImplicitAndExplicitHydrogens | |
| 1794 Ar : Aromatic | |
| 1795 RA : RingAtom | |
| 1796 FC : FormalCharge | |
| 1797 MN : MassNumber | |
| 1798 SM : SpinMultiplicity | |
| 1799 | |
| 1800 I<AtomTypes::AtomicInvariantsAtomTypes> module is used to assign atomic invariant | |
| 1801 atom types. | |
| 1802 | |
| 1803 =item B<SetFunctionalClassesToUse> | |
| 1804 | |
| 1805 $PathLengthFingerprints->SetFunctionalClassesToUse($ValuesRef); | |
| 1806 $PathLengthFingerprints->SetFunctionalClassesToUse(@Values); | |
| 1807 | |
| 1808 Sets functional classes invariants to use during I<FunctionalClassAtomTypes> value of I<AtomIdentifierType> | |
| 1809 for path length fingerprints generation and returns I<PathLengthFingerprints>. | |
| 1810 | |
| 1811 Possible values for atom functional classes are: I<Ar, CA, H, HBA, HBD, Hal, NI, PI, RA>. | |
| 1812 Default value [ Ref 24 ]: I<HBD,HBA,PI,NI,Ar,Hal>. | |
| 1813 | |
| 1814 The functional class abbreviations correspond to: | |
| 1815 | |
| 1816 HBD: HydrogenBondDonor | |
| 1817 HBA: HydrogenBondAcceptor | |
| 1818 PI : PositivelyIonizable | |
| 1819 NI : NegativelyIonizable | |
| 1820 Ar : Aromatic | |
| 1821 Hal : Halogen | |
| 1822 H : Hydrophobic | |
| 1823 RA : RingAtom | |
| 1824 CA : ChainAtom | |
| 1825 | |
| 1826 Functional class atom type specification for an atom corresponds to: | |
| 1827 | |
| 1828 Ar.CA.H.HBA.HBD.Hal.NI.PI.RA or None | |
| 1829 | |
| 1830 I<AtomTypes::FunctionalClassAtomTypes> module is used to assign functional class atom | |
| 1831 types. It uses following definitions [ Ref 60-61, Ref 65-66 ]: | |
| 1832 | |
| 1833 HydrogenBondDonor: NH, NH2, OH | |
| 1834 HydrogenBondAcceptor: N[!H], O | |
| 1835 PositivelyIonizable: +, NH2 | |
| 1836 NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH | |
| 1837 | |
| 1838 =item B<SetMinLength> | |
| 1839 | |
| 1840 $PathLengthFingerprints->SetMinLength($Length); | |
| 1841 | |
| 1842 Sets minimum value of atom path length to be used during atom path length fingerprints | |
| 1843 generation and returns I<PathLengthFingerprints>. | |
| 1844 | |
| 1845 =item B<SetMaxLength> | |
| 1846 | |
| 1847 $PathLengthFingerprints->SetMaxLength($Length); | |
| 1848 | |
| 1849 Sets maximum value of atom path length to be used during atom path length fingerprints | |
| 1850 generation and returns I<PathLengthFingerprints>. | |
| 1851 | |
| 1852 =item B<SetNumOfBitsToSetPerPath> | |
| 1853 | |
| 1854 $PathLengthFingerprints->SetNumOfBitsToSetPerPath($NumOfBits); | |
| 1855 | |
| 1856 Sets number of bits to set for each path during I<PathLengthBits> B<Type > during path length fingerprints | |
| 1857 generation and returns I<PathLengthFingerprints>. | |
| 1858 | |
| 1859 =item B<SetType> | |
| 1860 | |
| 1861 $PathLengthFingerprints->SetType($Type); | |
| 1862 | |
| 1863 Sets type of path length fingerprints and returns I<PathLengthFingerprints>. Possible values: | |
| 1864 I<PathLengthBits or PathLengthCount>. | |
| 1865 | |
| 1866 =item B<StringifyPathLengthFingerprints> | |
| 1867 | |
| 1868 $String = $PathLengthFingerprints->StringifyPathLengthFingerprints(); | |
| 1869 | |
| 1870 Returns a string containing information about I<PathLengthFingerprints> object. | |
| 1871 | |
| 1872 =back | |
| 1873 | |
| 1874 =head1 AUTHOR | |
| 1875 | |
| 1876 Manish Sud <msud@san.rr.com> | |
| 1877 | |
| 1878 =head1 SEE ALSO | |
| 1879 | |
| 1880 Fingerprints.pm, FingerprintsStringUtil.pm, AtomNeighborhoodsFingerprints.pm, | |
| 1881 AtomTypesFingerprints.pm, EStateIndiciesFingerprints.pm, ExtendedConnectivityFingerprints.pm, | |
| 1882 MACCSKeys.pm, TopologicalAtomPairsFingerprints.pm, TopologicalAtomTripletsFingerprints.pm, | |
| 1883 TopologicalAtomTorsionsFingerprints.pm, TopologicalPharmacophoreAtomPairsFingerprints.pm, | |
| 1884 TopologicalPharmacophoreAtomTripletsFingerprints.pm | |
| 1885 | |
| 1886 =head1 COPYRIGHT | |
| 1887 | |
| 1888 Copyright (C) 2015 Manish Sud. All rights reserved. | |
| 1889 | |
| 1890 This file is part of MayaChemTools. | |
| 1891 | |
| 1892 MayaChemTools is free software; you can redistribute it and/or modify it under | |
| 1893 the terms of the GNU Lesser General Public License as published by the Free | |
| 1894 Software Foundation; either version 3 of the License, or (at your option) | |
| 1895 any later version. | |
| 1896 | |
| 1897 =cut |
