Mercurial > repos > deepakjadmin > mayatool3_test2
comparison lib/Fingerprints/ExtendedConnectivityFingerprints.pm @ 0:4816e4a8ae95 draft default tip
Uploaded
author | deepakjadmin |
---|---|
date | Wed, 20 Jan 2016 09:23:18 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4816e4a8ae95 |
---|---|
1 package Fingerprints::ExtendedConnectivityFingerprints; | |
2 # | |
3 # $RCSfile: ExtendedConnectivityFingerprints.pm,v $ | |
4 # $Date: 2015/02/28 20:48:54 $ | |
5 # $Revision: 1.39 $ | |
6 # | |
7 # Author: Manish Sud <msud@san.rr.com> | |
8 # | |
9 # Copyright (C) 2015 Manish Sud. All rights reserved. | |
10 # | |
11 # This file is part of MayaChemTools. | |
12 # | |
13 # MayaChemTools is free software; you can redistribute it and/or modify it under | |
14 # the terms of the GNU Lesser General Public License as published by the Free | |
15 # Software Foundation; either version 3 of the License, or (at your option) any | |
16 # later version. | |
17 # | |
18 # MayaChemTools is distributed in the hope that it will be useful, but without | |
19 # any warranty; without even the implied warranty of merchantability of fitness | |
20 # for a particular purpose. See the GNU Lesser General Public License for more | |
21 # details. | |
22 # | |
23 # You should have received a copy of the GNU Lesser General Public License | |
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or | |
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, | |
26 # Boston, MA, 02111-1307, USA. | |
27 # | |
28 | |
29 use strict; | |
30 use Carp; | |
31 use Exporter; | |
32 use TextUtil (); | |
33 use MathUtil (); | |
34 use Fingerprints::Fingerprints; | |
35 use Molecule; | |
36 use AtomTypes::AtomicInvariantsAtomTypes; | |
37 use AtomTypes::FunctionalClassAtomTypes; | |
38 use AtomTypes::DREIDINGAtomTypes; | |
39 use AtomTypes::EStateAtomTypes; | |
40 use AtomTypes::MMFF94AtomTypes; | |
41 use AtomTypes::SLogPAtomTypes; | |
42 use AtomTypes::SYBYLAtomTypes; | |
43 use AtomTypes::TPSAAtomTypes; | |
44 use AtomTypes::UFFAtomTypes; | |
45 | |
46 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); | |
47 | |
48 @ISA = qw(Fingerprints::Fingerprints Exporter); | |
49 @EXPORT = qw(); | |
50 @EXPORT_OK = qw(); | |
51 | |
52 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); | |
53 | |
54 # Setup class variables... | |
55 my($ClassName); | |
56 _InitializeClass(); | |
57 | |
58 # Overload Perl functions... | |
59 use overload '""' => 'StringifyExtendedConnectivityFingerprints'; | |
60 | |
61 # Class constructor... | |
62 sub new { | |
63 my($Class, %NamesAndValues) = @_; | |
64 | |
65 # Initialize object... | |
66 my $This = $Class->SUPER::new(); | |
67 bless $This, ref($Class) || $Class; | |
68 $This->_InitializeExtendedConnectivityFingerprints(); | |
69 | |
70 $This->_InitializeExtendedConnectivityFingerprintsProperties(%NamesAndValues); | |
71 | |
72 return $This; | |
73 } | |
74 | |
75 # Initialize object data... | |
76 # | |
77 sub _InitializeExtendedConnectivityFingerprints { | |
78 my($This) = @_; | |
79 | |
80 # Type of fingerprint to generate: | |
81 # | |
82 # ExtendedConnectivity - Set of integer identifiers corresponding to structurally unique features | |
83 # ExtendedConnectivityCount - Set of integer identifiers corresponding to structurally unique features and their count | |
84 # ExtendedConnectivityBits - A bit vector indicating presence/absence of structurally unique features | |
85 # | |
86 $This->{Type} = 'ExtendedConnectivity'; | |
87 | |
88 # Atomic neighborhoods radius for extended connectivity... | |
89 $This->{NeighborhoodRadius} = 2; | |
90 | |
91 # Size of bit bector to use during generation of ExtendedConnectivityBits fingerprints... | |
92 $This->{Size} = 1024; | |
93 | |
94 # Min and max size of bit bector to use during generation of ExtendedConnectivityBits fingerprints... | |
95 $This->{MinSize} = 32; | |
96 $This->{MaxSize} = 2**32; | |
97 | |
98 # Type of atom attributes to use for initial identifier assignment to non-hydrogen atoms | |
99 # during the calculation of extended connectivity fingerprints [ Ref 48, Ref 52 ]... | |
100 # | |
101 # Currently supported values are: AtomicInvariantsAtomTypes, FunctionalClassAtomTypes, | |
102 # DREIDINGAtomTypes, EStateAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, | |
103 # TPSAAtomTypes, UFFAtomTypes | |
104 # | |
105 $This->{AtomIdentifierType} = ''; | |
106 | |
107 # Random number generator to use during generation of fingerprints bit-vector | |
108 # string: Perl CORE::rand or MayaChemTools MathUtil::random function. | |
109 # | |
110 # The random number generator implemented in MayaChemTools is a variant of | |
111 # linear congruential generator (LCG) as described by Miller et al. [ Ref 120 ]. | |
112 # It is also referred to as Lehmer random number generator or Park-Miller | |
113 # random number generator. | |
114 # | |
115 # Unlike Perl's core random number generator function rand, the random number | |
116 # generator implemented in MayaChemTools, MathUtil::random, generates consistent | |
117 # random values across different platformsfor a specific random seed and leads | |
118 # to generation of portable fingerprints bit-vector strings. | |
119 # | |
120 $This->{UsePerlCoreRandom} = 1; | |
121 | |
122 # Atom neighorhoods up to specified neighborhood radius... | |
123 %{$This->{AtomNeighborhoods}} = (); | |
124 | |
125 # Atom identifiers at different neighborhoods up to specified neighborhood radius... | |
126 %{$This->{AtomIdentifiers}} = (); | |
127 | |
128 # Structurally unique atom identifiers at different neighborhoods up to specified neighborhood radius... | |
129 %{$This->{UniqueAtomIdentifiers}} = (); | |
130 %{$This->{UniqueAtomIdentifiersCount}} = (); | |
131 | |
132 # Unique atom identifiers at different neighborhoods up to specified neighborhood radius... | |
133 %{$This->{StructurallyUniqueAtomIdentifiers}} = (); | |
134 %{$This->{StructurallyUniqueAtomIdentifiersCount}} = (); | |
135 | |
136 # Structure feature information at different neighborhoods up to specified neighborhood | |
137 # radius used during removal of atom indentifiers which are structually equivalent... | |
138 %{$This->{StructureFeatures}} = (); | |
139 } | |
140 | |
141 # Initialize class ... | |
142 sub _InitializeClass { | |
143 #Class name... | |
144 $ClassName = __PACKAGE__; | |
145 } | |
146 | |
147 # Initialize object properties.... | |
148 sub _InitializeExtendedConnectivityFingerprintsProperties { | |
149 my($This, %NamesAndValues) = @_; | |
150 | |
151 my($Name, $Value, $MethodName); | |
152 while (($Name, $Value) = each %NamesAndValues) { | |
153 $MethodName = "Set${Name}"; | |
154 $This->$MethodName($Value); | |
155 } | |
156 | |
157 # Make sure molecule object was specified... | |
158 if (!exists $NamesAndValues{Molecule}) { | |
159 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule..."; | |
160 } | |
161 | |
162 # Make sure AtomIdentifierType was specified... | |
163 if (!exists $NamesAndValues{AtomIdentifierType}) { | |
164 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying AtomIdentifierType..."; | |
165 } | |
166 | |
167 # Make sure it's power of 2... | |
168 if (exists $NamesAndValues{Size}) { | |
169 if (!TextUtil::IsNumberPowerOfNumber($NamesAndValues{Size}, 2)) { | |
170 croak "Error: ${ClassName}->New: Specified size value, $NamesAndValues{Size}, must be power of 2..."; | |
171 } | |
172 } | |
173 | |
174 if ($This->{Type} =~ /^ExtendedConnectivity$/i) { | |
175 $This->_InitializeExtendedConnectivityFingerprintsVector(); | |
176 } | |
177 elsif ($This->{Type} =~ /^ExtendedConnectivityCount$/i) { | |
178 $This->_InitializeExtendedConnectivityCountFingerprintsVector(); | |
179 } | |
180 elsif ($This->{Type} =~ /^ExtendedConnectivityBits$/i) { | |
181 $This->_InitializeExtendedConnectivityBitsFingerprintsBitVector(); | |
182 } | |
183 else { | |
184 croak "Error: ${ClassName}->_InitializeExtendedConnectivityFingerprintsProperties: Unknown ExtendedConnectivity fingerprints type: $This->{Type}; Supported fingerprints types: ExtendedConnectivity, ExtendedConnectivityCount or ExtendedConnectivityBits..."; | |
185 } | |
186 | |
187 return $This; | |
188 } | |
189 | |
190 # Initialize extended connectivity fingerprints vector... | |
191 # | |
192 sub _InitializeExtendedConnectivityFingerprintsVector { | |
193 my($This) = @_; | |
194 | |
195 # Type of vector... | |
196 $This->{VectorType} = 'FingerprintsVector'; | |
197 | |
198 # Type of FingerprintsVector... | |
199 $This->{FingerprintsVectorType} = 'AlphaNumericalValues'; | |
200 | |
201 $This->_InitializeFingerprintsVector(); | |
202 | |
203 return $This; | |
204 } | |
205 | |
206 # Initialize extended connectivity count fingerprints vector... | |
207 # | |
208 sub _InitializeExtendedConnectivityCountFingerprintsVector { | |
209 my($This) = @_; | |
210 | |
211 # Type of vector... | |
212 $This->{VectorType} = 'FingerprintsVector'; | |
213 | |
214 # Type of FingerprintsVector... | |
215 $This->{FingerprintsVectorType} = 'NumericalValues'; | |
216 | |
217 $This->_InitializeFingerprintsVector(); | |
218 | |
219 return $This; | |
220 } | |
221 | |
222 # Initialize extended connectivity bit fingerprints vector... | |
223 # | |
224 sub _InitializeExtendedConnectivityBitsFingerprintsBitVector { | |
225 my($This) = @_; | |
226 | |
227 # Type of vector... | |
228 $This->{VectorType} = 'FingerprintsBitVector'; | |
229 | |
230 $This->_InitializeFingerprintsBitVector(); | |
231 | |
232 return $This; | |
233 } | |
234 | |
235 # Set type... | |
236 # | |
237 sub SetType { | |
238 my($This, $Type) = @_; | |
239 | |
240 if ($Type =~ /^ExtendedConnectivity$/i) { | |
241 $This->{Type} = 'ExtendedConnectivity';; | |
242 } | |
243 elsif ($Type =~ /^ExtendedConnectivityCount$/i) { | |
244 $This->{Type} = 'ExtendedConnectivityCount';; | |
245 } | |
246 elsif ($Type =~ /^ExtendedConnectivityBits$/i) { | |
247 $This->{Type} = 'ExtendedConnectivityBits';; | |
248 } | |
249 else { | |
250 croak "Error: ${ClassName}->SetType: Unknown ExtendedConnectivity fingerprints type: $This->{Type}; Supported fingerprints types: ExtendedConnectivity, ExtendedConnectivityCount or ExtendedConnectivityBits..."; | |
251 } | |
252 return $This; | |
253 } | |
254 | |
255 # Disable vector type change... | |
256 # | |
257 sub SetVectorType { | |
258 my($This, $Type) = @_; | |
259 | |
260 croak "Error: ${ClassName}->SetVectorType: Can't change vector type..."; | |
261 | |
262 return $This; | |
263 } | |
264 | |
265 # Disable vector type change... | |
266 # | |
267 sub SetFingerprintsVectorType { | |
268 my($This, $Type) = @_; | |
269 | |
270 croak "Error: ${ClassName}->SetFingerprintsVectorType: Can't change fingerprints vector type..."; | |
271 | |
272 return $This; | |
273 } | |
274 | |
275 # Set intial atom identifier type.. | |
276 # | |
277 sub SetAtomIdentifierType { | |
278 my($This, $IdentifierType) = @_; | |
279 | |
280 if ($IdentifierType !~ /^(AtomicInvariantsAtomTypes|FunctionalClassAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { | |
281 croak "Error: ${ClassName}->SetAtomIdentifierType: Specified value, $IdentifierType, for AtomIdentifierType is not vaild. Supported types in current release of MayaChemTools: AtomicInvariantsAtomTypes, FunctionalClassAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes and UFFAtomTypes."; | |
282 } | |
283 | |
284 if ($This->{AtomIdentifierType}) { | |
285 croak "Error: ${ClassName}->SetAtomIdentifierType: Can't change intial atom identifier type: It's already set..."; | |
286 } | |
287 | |
288 $This->{AtomIdentifierType} = $IdentifierType; | |
289 | |
290 # Initialize identifier type information... | |
291 $This->_InitializeAtomIdentifierTypeInformation(); | |
292 | |
293 return $This; | |
294 } | |
295 | |
296 # Set atom neighborhood radius... | |
297 # | |
298 sub SetNeighborhoodRadius { | |
299 my($This, $Value) = @_; | |
300 | |
301 if (!TextUtil::IsInteger($Value)) { | |
302 croak "Error: ${ClassName}->SetNeighborhoodRadius: NeighborhoodRadius value, $Value, is not valid: It must be an integer..."; | |
303 } | |
304 | |
305 if ($Value < 0 ) { | |
306 croak "Error: ${ClassName}->SetNeighborhoodRadius: NeighborhoodRadius value, $Value, is not valid: It must be >= 0..."; | |
307 } | |
308 $This->{NeighborhoodRadius} = $Value; | |
309 | |
310 return $This; | |
311 } | |
312 | |
313 # Generate fingerprints description... | |
314 # | |
315 sub GetDescription { | |
316 my($This) = @_; | |
317 | |
318 # Is description explicity set? | |
319 if (exists $This->{Description}) { | |
320 return $This->{Description}; | |
321 } | |
322 | |
323 # Generate fingerprints description... | |
324 | |
325 return "$This->{Type}:$This->{AtomIdentifierType}:Radius$This->{NeighborhoodRadius}"; | |
326 } | |
327 | |
328 # Generate fingerprints... | |
329 # | |
330 # Methodology: | |
331 # . Assign initial atom identfiers to all non-hydrogen atoms in the molecule | |
332 # | |
333 # . Remove duplicates from the initial identifiers and add them to list corresponding | |
334 # to molecule fingerprint | |
335 # | |
336 # . For NeighborhoodRadius value of 0, just return the molecule fingerprint list | |
337 # | |
338 # . For each NeighborhoodRadius level | |
339 # . For each non-hydrogen CentralAtom at this NeighborhoodRadius level | |
340 # . For each non-hydrogen SuccessorNeighborAtom | |
341 # . Collect (BondOrder AtomIdentifier) pair of values corresponding to | |
342 # (CentralAtom SuccessorNeighborAtom) and add it to a list | |
343 # | |
344 # . Sort list containing (BondOrder AtomIdentifier) pairs first by BondOrder followed | |
345 # by AtomIdendifiers to make these values graph invariant | |
346 # . Generate a hash code for the values in the list | |
347 # . Assign hash code as new atom identifier at the current NeighborhoodRadius level | |
348 # . Save all atoms and bonds corresponding to the substructure involved in | |
349 # generating the hash code to be used for identifying structural duplicate hash code | |
350 # | |
351 # . Add the new identifier to the molecule fingerprint list making sure it's not a duplicate | |
352 # identifier | |
353 # | |
354 # Hash code atom identifier deduplication: | |
355 # . Track/remove the identifier generated at higher neighborhood radius level | |
356 # | |
357 # Structural atom identifier deduplication: | |
358 # . For equivalent atoms and bonds corresponding to substructure at a NeighborhoodRadius level, | |
359 # track/remove the atom identifier with largest value | |
360 # | |
361 # | |
362 sub GenerateFingerprints { | |
363 my($This) = @_; | |
364 | |
365 # Cache appropriate molecule data... | |
366 $This->_SetupMoleculeDataCache(); | |
367 | |
368 # Assign intial atom identifers... | |
369 if (!$This->_AssignInitialAtomIdentifiers()) { | |
370 carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't assign valid $This->{AtomIdentifierType} to all atoms..."; | |
371 return $This; | |
372 } | |
373 | |
374 # Identify atom neighborhoods up to specified radius... | |
375 $This->_GetAtomNeighborhoods(); | |
376 | |
377 # Assign atom identifiers to central atoms considering atom neighborhoods at each | |
378 # radius level... | |
379 $This->_AssignAtomIdentifiersToAtomNeighborhoods(); | |
380 | |
381 # Remove duplicates identifiers... | |
382 $This->_RemoveDuplicateAtomIdentifiers(); | |
383 | |
384 # Set final fingerprints... | |
385 $This->_SetFinalFingerprints(); | |
386 | |
387 # Clear cached molecule data... | |
388 $This->_ClearMoleculeDataCache(); | |
389 | |
390 return $This; | |
391 } | |
392 | |
393 # Assign appropriate initial atom identifiers... | |
394 # | |
395 # Generation of initial identifier for a specific atom involves: | |
396 # . Values of the specified atom attributes are appended in a specific order to | |
397 # generate an initial atom identifier string | |
398 # . A 32 bit unsigned integer hash key, using TextUtil::HashCode function, is | |
399 # generated for the atom indentifier and assigned to the atom as initial | |
400 # atom identifier. | |
401 # | |
402 sub _AssignInitialAtomIdentifiers { | |
403 my($This) = @_; | |
404 my($Atom, $AtomID, $Radius, $SpecifiedAtomTypes, $IgnoreHydrogens, $AtomType, $InitialAtomTypeString, $InitialAtomIdentifier); | |
405 | |
406 # Initialize atom identifiers... | |
407 $This->_InitializeAtomIdentifiers(); | |
408 | |
409 # Set up atom types... | |
410 $IgnoreHydrogens = 1; | |
411 $SpecifiedAtomTypes = undef; | |
412 | |
413 IDENTIFIERTYPE: { | |
414 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { | |
415 $SpecifiedAtomTypes = new AtomTypes::AtomicInvariantsAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'AtomicInvariantsToUse' => $This->{AtomicInvariantsToUse}); | |
416 last IDENTIFIERTYPE; | |
417 } | |
418 | |
419 if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { | |
420 $SpecifiedAtomTypes = new AtomTypes::FunctionalClassAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'FunctionalClassesToUse' => $This->{FunctionalClassesToUse}); | |
421 last IDENTIFIERTYPE; | |
422 } | |
423 | |
424 if ($This->{AtomIdentifierType} =~ /^DREIDINGAtomTypes$/i) { | |
425 $SpecifiedAtomTypes = new AtomTypes::DREIDINGAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
426 last IDENTIFIERTYPE; | |
427 } | |
428 | |
429 if ($This->{AtomIdentifierType} =~ /^EStateAtomTypes$/i) { | |
430 $SpecifiedAtomTypes = new AtomTypes::EStateAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
431 last IDENTIFIERTYPE; | |
432 } | |
433 | |
434 if ($This->{AtomIdentifierType} =~ /^MMFF94AtomTypes$/i) { | |
435 $SpecifiedAtomTypes = new AtomTypes::MMFF94AtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
436 last IDENTIFIERTYPE; | |
437 } | |
438 | |
439 if ($This->{AtomIdentifierType} =~ /^SLogPAtomTypes$/i) { | |
440 $SpecifiedAtomTypes = new AtomTypes::SLogPAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
441 last IDENTIFIERTYPE; | |
442 } | |
443 | |
444 if ($This->{AtomIdentifierType} =~ /^SYBYLAtomTypes$/i) { | |
445 $SpecifiedAtomTypes = new AtomTypes::SYBYLAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
446 last IDENTIFIERTYPE; | |
447 } | |
448 | |
449 if ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) { | |
450 $SpecifiedAtomTypes = new AtomTypes::TPSAAtomTypes('Molecule' => $This->{Molecule}, 'IgnorePhosphorus' => 0, 'IgnoreSulfur' => 0); | |
451 last IDENTIFIERTYPE; | |
452 } | |
453 | |
454 if ($This->{AtomIdentifierType} =~ /^UFFAtomTypes$/i) { | |
455 $SpecifiedAtomTypes = new AtomTypes::UFFAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
456 last IDENTIFIERTYPE; | |
457 } | |
458 | |
459 croak "Error: ${ClassName}->_AssignInitialAtomIdentifiers: Couldn't assign intial atom identifiers: InitialAtomIdentifierType $This->{AtomIdentifierType} is not supported..."; | |
460 } | |
461 | |
462 # Assign atom types... | |
463 $SpecifiedAtomTypes->AssignAtomTypes(); | |
464 | |
465 # Make sure atom types assignment is successful... | |
466 if (!$SpecifiedAtomTypes->IsAtomTypesAssignmentSuccessful()) { | |
467 return undef; | |
468 } | |
469 | |
470 # Assign atom identifiers at radius 0... | |
471 $Radius = 0; | |
472 for $Atom (@{$This->{Atoms}}) { | |
473 $AtomID = $Atom->GetID(); | |
474 | |
475 $AtomType = $SpecifiedAtomTypes->GetAtomType($Atom); | |
476 $InitialAtomTypeString = $AtomType ? $AtomType : 'None'; | |
477 | |
478 $InitialAtomIdentifier = TextUtil::HashCode($InitialAtomTypeString); | |
479 $This->{AtomIdentifiers}{$Radius}{$AtomID} = $InitialAtomIdentifier; | |
480 } | |
481 | |
482 return $This; | |
483 } | |
484 | |
485 # Initialize atom identifiers... | |
486 # | |
487 sub _InitializeAtomIdentifiers { | |
488 my($This) = @_; | |
489 my($Radius, $CurrentRadius); | |
490 | |
491 $Radius = $This->{NeighborhoodRadius}; | |
492 | |
493 %{$This->{AtomIdentifiers}} = (); | |
494 for $CurrentRadius (0 .. $Radius) { | |
495 # Atom idenfiers key and value correspond to AtomID and AtomIdentifier | |
496 %{$This->{AtomIdentifiers}{$CurrentRadius}} = (); | |
497 | |
498 # Unique and strcuturally unique idenfiers key and value correspond to AtomIdentifier and AtomID | |
499 %{$This->{UniqueAtomIdentifiers}{$CurrentRadius}} = (); | |
500 %{$This->{UniqueAtomIdentifiersCount}{$CurrentRadius}} = (); | |
501 | |
502 %{$This->{StructurallyUniqueAtomIdentifiers}{$CurrentRadius}} = (); | |
503 %{$This->{StructurallyUniqueAtomIdentifiersCount}{$CurrentRadius}} = (); | |
504 } | |
505 | |
506 } | |
507 | |
508 # Collect atom neighborhoods upto specified neighborhood radius... | |
509 # | |
510 sub _GetAtomNeighborhoods { | |
511 my($This) = @_; | |
512 my($Atom, $AtomID, $Radius, $CurrentRadius, $Molecule); | |
513 | |
514 %{$This->{AtomNeighborhoods}} = (); | |
515 | |
516 $Radius = $This->{NeighborhoodRadius}; | |
517 if ($Radius < 1) { | |
518 # At radius level 0, it's just the atoms... | |
519 return; | |
520 } | |
521 | |
522 # Initialize neighborhood at different radii... | |
523 for $CurrentRadius (0 .. $Radius) { | |
524 %{$This->{AtomNeighborhoods}{$CurrentRadius}} = (); | |
525 } | |
526 | |
527 $Molecule = $This->GetMolecule(); | |
528 | |
529 # Collect available atom neighborhoods at different at different neighborhood level for each atom... | |
530 my($AtomsNeighborhoodWithSuccessorAtomsRef); | |
531 | |
532 for $Atom (@{$This->{Atoms}}) { | |
533 $AtomID = $Atom->GetID(); | |
534 $CurrentRadius = 0; | |
535 for $AtomsNeighborhoodWithSuccessorAtomsRef ($Molecule->GetAtomNeighborhoodsWithSuccessorAtomsAndRadiusUpto($Atom, $Radius)) { | |
536 $This->{AtomNeighborhoods}{$CurrentRadius}{$AtomID} = $AtomsNeighborhoodWithSuccessorAtomsRef; | |
537 $CurrentRadius++; | |
538 } | |
539 } | |
540 return $This; | |
541 } | |
542 | |
543 # Assign atom identifiers to central atom at each neighborhood radius level... | |
544 # | |
545 sub _AssignAtomIdentifiersToAtomNeighborhoods { | |
546 my($This) = @_; | |
547 my($Radius, $NextRadius, $Atom, $AtomID, $NeighborhoodAtom, $SuccessorAtom, $SuccessorAtomID, $NeighborhoodAtomSuccessorAtomsRef, $NeighborhoodAtomsWithSuccessorAtomsRef, $Bond, $BondOrder, $SuccessorAtomCount); | |
548 | |
549 if ($This->{NeighborhoodRadius} < 1) { | |
550 return; | |
551 } | |
552 | |
553 # Go over the atom neighborhoods at each radius upto specified radius and assign atom | |
554 # indentifiers using their connected successor atoms and their identifiers. | |
555 # | |
556 # For a neighborhood atom at a specified radius, the successor connected atoms correpond | |
557 # to next radius level and the last set of neighorhood atoms don't have any successor connected | |
558 # atoms. Additionally, radius level 0 just correspond to initial atom identifiers. | |
559 # | |
560 # So in order to process atom neighborhood upto specified radius level, the last atom neighborhood | |
561 # doesn't need to be processed: it gets processed at previous radius level as successor connected | |
562 # atoms. | |
563 # | |
564 RADIUS: for $Radius (0 .. ($This->{NeighborhoodRadius} - 1)) { | |
565 ATOM: for $Atom (@{$This->{Atoms}}) { | |
566 $AtomID = $Atom->GetID(); | |
567 | |
568 # Are there any available atom neighborhoods at this radius? | |
569 if (!exists $This->{AtomNeighborhoods}{$Radius}{$AtomID}) { | |
570 next ATOM; | |
571 } | |
572 $NextRadius = $Radius + 1; | |
573 | |
574 # Go over neighborhood atoms and their successor connected atoms at this radius and collect | |
575 # (BondOrder AtomIdentifier) values for bonded atom pairs. Additionally, keep track of atom and bonds | |
576 # for the neighorhoods to remove identifieres generated from structurally duplicate features. | |
577 # | |
578 my(%BondOrdersAndAtomIdentifiers); | |
579 | |
580 %BondOrdersAndAtomIdentifiers = (); | |
581 $SuccessorAtomCount = 0; | |
582 | |
583 NEIGHBORHOODS: for $NeighborhoodAtomsWithSuccessorAtomsRef (@{$This->{AtomNeighborhoods}{$Radius}{$AtomID}}) { | |
584 ($NeighborhoodAtom, $NeighborhoodAtomSuccessorAtomsRef) = @{$NeighborhoodAtomsWithSuccessorAtomsRef}; | |
585 | |
586 # Any connected successors for the NeighborhoodAtom? | |
587 if (!@{$NeighborhoodAtomSuccessorAtomsRef}) { | |
588 next NEIGHBORHOODS; | |
589 } | |
590 SUCCESSORATOM: for $SuccessorAtom (@{$NeighborhoodAtomSuccessorAtomsRef}) { | |
591 if ($SuccessorAtom->IsHydrogen()) { | |
592 # Skip successor hydrogen atom... | |
593 next SUCCESSORATOM; | |
594 } | |
595 $SuccessorAtomID = $SuccessorAtom->GetID(); | |
596 $SuccessorAtomCount++; | |
597 | |
598 $Bond = $NeighborhoodAtom->GetBondToAtom($SuccessorAtom); | |
599 $BondOrder = $Bond->IsAromatic() ? "1.5" : $Bond->GetBondOrder(); | |
600 | |
601 if (!exists $BondOrdersAndAtomIdentifiers{$BondOrder}) { | |
602 @{$BondOrdersAndAtomIdentifiers{$BondOrder}} = (); | |
603 } | |
604 push @{$BondOrdersAndAtomIdentifiers{$BondOrder}}, $This->{AtomIdentifiers}{$Radius}{$SuccessorAtomID}; | |
605 } | |
606 } | |
607 if (!$SuccessorAtomCount) { | |
608 next ATOM; | |
609 } | |
610 # Assign a new atom identifier at the NextRadius level... | |
611 $This->_AssignAtomIdentifierToAtomNeighborhood($AtomID, $Radius, \%BondOrdersAndAtomIdentifiers); | |
612 } | |
613 } | |
614 return $This; | |
615 } | |
616 | |
617 # Generate and assign atom indentifier for AtomID using atom neighborhood at next radius level... | |
618 # | |
619 sub _AssignAtomIdentifierToAtomNeighborhood { | |
620 my($This, $AtomID, $Radius, $BondOrdersAndAtomIdentifiersRef) = @_; | |
621 my($NextRadius, $AtomIdentifier, $SuccessorAtomIdentifier, $BondOrder, $AtomIdentifierString, @AtomIndentifiersInfo); | |
622 | |
623 $NextRadius = $Radius + 1; | |
624 | |
625 @AtomIndentifiersInfo = (); | |
626 | |
627 $AtomIdentifier = $This->{AtomIdentifiers}{$Radius}{$AtomID}; | |
628 push @AtomIndentifiersInfo, ($NextRadius, $AtomIdentifier); | |
629 | |
630 # Sort out successor atom bond order and identifier pairs by bond order followed by atom identifiers | |
631 # in order to make the final atom identifier graph invariant... | |
632 # | |
633 for $BondOrder (sort { $a <=> $b } keys %{$BondOrdersAndAtomIdentifiersRef}) { | |
634 for $SuccessorAtomIdentifier (sort { $a <=> $b } @{$BondOrdersAndAtomIdentifiersRef->{$BondOrder}}) { | |
635 push @AtomIndentifiersInfo, ($BondOrder, $SuccessorAtomIdentifier); | |
636 } | |
637 } | |
638 $AtomIdentifierString = join("", @AtomIndentifiersInfo); | |
639 $AtomIdentifier = TextUtil::HashCode($AtomIdentifierString); | |
640 | |
641 # Assign atom identifier to the atom at next radius level... | |
642 $This->{AtomIdentifiers}{$NextRadius}{$AtomID} = $AtomIdentifier; | |
643 | |
644 return $This; | |
645 } | |
646 | |
647 # Remove duplicates atom identifiers... | |
648 # | |
649 sub _RemoveDuplicateAtomIdentifiers { | |
650 my($This) = @_; | |
651 | |
652 $This->_RemoveDuplicateIdentifiersByValue(); | |
653 $This->_RemoveStructurallyDuplicateIdenfiers(); | |
654 | |
655 return $This; | |
656 } | |
657 | |
658 # Remove duplicate identifiers at each radius level by just using their value... | |
659 # | |
660 sub _RemoveDuplicateIdentifiersByValue { | |
661 my($This) = @_; | |
662 my($Radius, $Atom, $AtomID, $AtomIdentifier); | |
663 | |
664 for $Radius (0 .. $This->{NeighborhoodRadius}) { | |
665 ATOM: for $Atom (@{$This->{Atoms}}) { | |
666 $AtomID = $Atom->GetID(); | |
667 if (!exists $This->{AtomIdentifiers}{$Radius}{$AtomID}) { | |
668 next ATOM; | |
669 } | |
670 $AtomIdentifier = $This->{AtomIdentifiers}{$Radius}{$AtomID}; | |
671 if (exists $This->{UniqueAtomIdentifiers}{$Radius}{$AtomIdentifier}) { | |
672 # It's a duplicate atom idenfier at this radius level... | |
673 $This->{UniqueAtomIdentifiersCount}{$Radius}{$AtomIdentifier} += 1; | |
674 next ATOM; | |
675 } | |
676 $This->{UniqueAtomIdentifiers}{$Radius}{$AtomIdentifier} = $AtomID; | |
677 $This->{UniqueAtomIdentifiersCount}{$Radius}{$AtomIdentifier} = 1; | |
678 } | |
679 } | |
680 return $This; | |
681 } | |
682 | |
683 # Remove structurally duplicate identifiers at each radius level... | |
684 # | |
685 # Methodology: | |
686 # . For unquie atom identifiers at each radius level, assign complete structure features | |
687 # in terms all the bonds involved to generate that identifier | |
688 # . Use the complete structure features to remover atom identifiers which are | |
689 # structurally equivalent which can also be at earlier radii levels | |
690 # | |
691 # | |
692 sub _RemoveStructurallyDuplicateIdenfiers { | |
693 my($This) = @_; | |
694 my($Radius, $AtomID, $AtomIdentifier, $SimilarAtomIdentifierRadius, $SimilarAtomIdentifier); | |
695 | |
696 # Setup structure features... | |
697 $This->_SetupStructureFeaturesForAtomIDsInvolvedInUniqueIdentifiers(); | |
698 | |
699 # Identify structurally unqiue identifiers... | |
700 for $Radius (0 .. $This->{NeighborhoodRadius}) { | |
701 ATOMIDENTIFIER: for $AtomIdentifier (sort { $a <=> $b } keys %{$This->{UniqueAtomIdentifiers}{$Radius}}) { | |
702 $AtomID = $This->{UniqueAtomIdentifiers}{$Radius}{$AtomIdentifier}; | |
703 | |
704 ($SimilarAtomIdentifierRadius, $SimilarAtomIdentifier) = $This->_FindStructurallySimilarAtomIdentifier($Radius, $AtomID, $AtomIdentifier); | |
705 if ($SimilarAtomIdentifier) { | |
706 # Current atom identifier is similar to an earlier structurally unique atom identifier... | |
707 $This->{StructurallyUniqueAtomIdentifiersCount}{$SimilarAtomIdentifierRadius}{$SimilarAtomIdentifier} += $This->{UniqueAtomIdentifiersCount}{$Radius}{$AtomIdentifier}; | |
708 next ATOMIDENTIFIER; | |
709 } | |
710 $This->{StructurallyUniqueAtomIdentifiers}{$Radius}{$AtomIdentifier} = $AtomID; | |
711 | |
712 # Set structurally unique atom identifier count to the unique atom identifiers count... | |
713 $This->{StructurallyUniqueAtomIdentifiersCount}{$Radius}{$AtomIdentifier} = $This->{UniqueAtomIdentifiersCount}{$Radius}{$AtomIdentifier}; | |
714 } | |
715 } | |
716 return $This; | |
717 } | |
718 | |
719 # Set final fingerpritns vector... | |
720 # | |
721 sub _SetFinalFingerprints { | |
722 my($This) = @_; | |
723 | |
724 # Mark successful generation of fingerprints... | |
725 $This->{FingerprintsGenerated} = 1; | |
726 | |
727 if ($This->{Type} =~ /^ExtendedConnectivity$/i) { | |
728 $This->_SetFinalExtendedConnectivityFingerprints(); | |
729 } | |
730 elsif ($This->{Type} =~ /^ExtendedConnectivityCount$/i) { | |
731 $This->_SetFinalExtendedConnectivityCountFingerprints(); | |
732 } | |
733 elsif ($This->{Type} =~ /^ExtendedConnectivityBits$/i) { | |
734 $This->_SetFinalExtendedConnectivityBitsFingerprints(); | |
735 } | |
736 | |
737 return $This; | |
738 } | |
739 | |
740 # Set final extended connectivity fingerpritns vector... | |
741 # | |
742 sub _SetFinalExtendedConnectivityFingerprints { | |
743 my($This) = @_; | |
744 my($Radius, $AtomIdentifier, @AtomIdentifiers); | |
745 | |
746 @AtomIdentifiers = (); | |
747 | |
748 for $Radius (0 .. $This->{NeighborhoodRadius}) { | |
749 for $AtomIdentifier (sort { $a <=> $b } keys %{$This->{StructurallyUniqueAtomIdentifiers}{$Radius}}) { | |
750 push @AtomIdentifiers, $AtomIdentifier; | |
751 } | |
752 } | |
753 # Add atom identifiers to fingerprint vector... | |
754 $This->{FingerprintsVector}->AddValues(\@AtomIdentifiers); | |
755 | |
756 return $This; | |
757 } | |
758 | |
759 # Set final extended connectivity count fingerpritns vector... | |
760 # | |
761 sub _SetFinalExtendedConnectivityCountFingerprints { | |
762 my($This) = @_; | |
763 my($Radius, $AtomIdentifier, $AtomIdentifierCount, @AtomIdentifiers, @AtomIdentifiersCount); | |
764 | |
765 @AtomIdentifiers = (); @AtomIdentifiersCount = (); | |
766 | |
767 for $Radius (0 .. $This->{NeighborhoodRadius}) { | |
768 for $AtomIdentifier (sort { $a <=> $b } keys %{$This->{StructurallyUniqueAtomIdentifiers}{$Radius}}) { | |
769 $AtomIdentifierCount = $This->{StructurallyUniqueAtomIdentifiersCount}{$Radius}{$AtomIdentifier}; | |
770 push @AtomIdentifiers, $AtomIdentifier; | |
771 push @AtomIdentifiersCount, $AtomIdentifierCount; | |
772 } | |
773 } | |
774 # Add atom identifiers to fingerprint vector as value IDs... | |
775 $This->{FingerprintsVector}->AddValueIDs(\@AtomIdentifiers); | |
776 | |
777 # Add atom identifiers to count to fingerprint vector as values... | |
778 $This->{FingerprintsVector}->AddValues(\@AtomIdentifiersCount); | |
779 | |
780 return $This; | |
781 } | |
782 | |
783 # Set final extended connectivity bits fingerpritns vector... | |
784 # | |
785 sub _SetFinalExtendedConnectivityBitsFingerprints { | |
786 my($This) = @_; | |
787 my($Radius, $AtomIdentifier, $FingerprintsBitVector, $Size, $SkipBitPosCheck, $AtomIdentifierBitPos, $SetBitNum); | |
788 | |
789 $FingerprintsBitVector = $This->{FingerprintsBitVector}; | |
790 | |
791 $Size = $This->{Size}; | |
792 | |
793 $SkipBitPosCheck = 1; | |
794 | |
795 for $Radius (0 .. $This->{NeighborhoodRadius}) { | |
796 for $AtomIdentifier (keys %{$This->{StructurallyUniqueAtomIdentifiers}{$Radius}}) { | |
797 # Set random number seed... | |
798 if ($This->{UsePerlCoreRandom}) { | |
799 CORE::srand($AtomIdentifier); | |
800 } | |
801 else { | |
802 MathUtil::srandom($AtomIdentifier); | |
803 } | |
804 | |
805 # Set bit position... | |
806 $AtomIdentifierBitPos = $This->{UsePerlCoreRandom} ? int(CORE::rand($Size)) : int(MathUtil::random($Size)); | |
807 $FingerprintsBitVector->SetBit($AtomIdentifierBitPos, $SkipBitPosCheck); | |
808 } | |
809 } | |
810 return $This; | |
811 } | |
812 | |
813 | |
814 # Identify structurally unique identifiers by comparing structure features involved in | |
815 # generating identifiear by comparing it agains all the previous structurally unique | |
816 # identifiers... | |
817 # | |
818 sub _FindStructurallySimilarAtomIdentifier { | |
819 my($This, $SpecifiedRadius, $SpecifiedAtomID, $SpecifiedAtomIdentifier) = @_; | |
820 my($Radius, $AtomID, $AtomIdentifier, $FeatureAtomCount, $FeatureAtomIDsRef, $SpecifiedFeatureAtomID, $SpecifiedFeatureAtomCount, $SpecifiedFeatureAtomIDsRef); | |
821 | |
822 if ($SpecifiedRadius == 0) { | |
823 # After duplicate removal by value, all identifier at radius level 0 would be structurally unique... | |
824 return (undef, undef); | |
825 } | |
826 | |
827 $SpecifiedFeatureAtomCount = $This->{StructureFeatures}{AtomCount}{$SpecifiedRadius}{$SpecifiedAtomID}; | |
828 $SpecifiedFeatureAtomIDsRef = $This->{StructureFeatures}{AtomIDs}{$SpecifiedRadius}{$SpecifiedAtomID}; | |
829 | |
830 # No need to compare features at radius 0... | |
831 for $Radius (1 .. $SpecifiedRadius) { | |
832 ATOMIDENTIFIER: for $AtomIdentifier (keys %{$This->{StructurallyUniqueAtomIdentifiers}{$Radius}}) { | |
833 $AtomID = $This->{StructurallyUniqueAtomIdentifiers}{$Radius}{$AtomIdentifier}; | |
834 | |
835 $FeatureAtomCount = $This->{StructureFeatures}{AtomCount}{$Radius}{$AtomID}; | |
836 $FeatureAtomIDsRef = $This->{StructureFeatures}{AtomIDs}{$Radius}{$AtomID}; | |
837 | |
838 if ($SpecifiedFeatureAtomCount != $FeatureAtomCount) { | |
839 # Couldn't be structurally equivalent... | |
840 next ATOMIDENTIFIER; | |
841 } | |
842 for $SpecifiedFeatureAtomID (keys % {$SpecifiedFeatureAtomIDsRef}) { | |
843 if (! exists $FeatureAtomIDsRef->{$SpecifiedFeatureAtomID}) { | |
844 # For structural equivalency, all atom in specified feature must also be present in a previously | |
845 # identified structurally unique structure feature... | |
846 next ATOMIDENTIFIER; | |
847 } | |
848 } | |
849 # Found structurally equivalent feature... | |
850 return ($Radius, $AtomIdentifier); | |
851 } | |
852 } | |
853 return (undef, undef); | |
854 } | |
855 | |
856 # Setup structure features for atom IDs involved in unique atom identifiers at all | |
857 # radii level... | |
858 # | |
859 sub _SetupStructureFeaturesForAtomIDsInvolvedInUniqueIdentifiers { | |
860 my($This) = @_; | |
861 my($Radius, $PreviousRadius, $Atom, $AtomID, $AtomIdentifier, $NeighborhoodAtomID, $NeighborhoodAtomsWithSuccessorAtomsRef, $NeighborhoodAtom, $NeighborhoodAtomSuccessorAtomsRef, %AtomIDs); | |
862 | |
863 $This->_InitializeStructureFeatures(); | |
864 | |
865 # Collect atom IDs involved in unique atom identifiers... | |
866 %AtomIDs = (); | |
867 for $Radius (0 .. $This->{NeighborhoodRadius}) { | |
868 for $AtomIdentifier (keys %{$This->{UniqueAtomIdentifiers}{$Radius}}) { | |
869 $AtomID = $This->{UniqueAtomIdentifiers}{$Radius}{$AtomIdentifier}; | |
870 $AtomIDs{$AtomID} = $AtomID; | |
871 } | |
872 } | |
873 | |
874 # Setup structure features... | |
875 for $Radius (0 .. $This->{NeighborhoodRadius}) { | |
876 for $AtomID (keys %AtomIDs) { | |
877 my($StructureFeatureAtomCount, %StructureFeatureAtomIDs); | |
878 | |
879 $StructureFeatureAtomCount = 0; | |
880 %StructureFeatureAtomIDs = (); | |
881 | |
882 # Get partial structure features for the atom at previous radius level... | |
883 $PreviousRadius = $Radius - 1; | |
884 if ($PreviousRadius >= 0) { | |
885 $StructureFeatureAtomCount += $This->{StructureFeatures}{AtomCount}{$PreviousRadius}{$AtomID}; | |
886 %StructureFeatureAtomIDs = %{$This->{StructureFeatures}{AtomIDs}{$PreviousRadius}{$AtomID}}; | |
887 } | |
888 | |
889 # Get all neighborhood atom at this radius level... | |
890 if (exists($This->{AtomNeighborhoods}{$Radius}) && exists($This->{AtomNeighborhoods}{$Radius}{$AtomID})) { | |
891 NEIGHBORHOODS: for $NeighborhoodAtomsWithSuccessorAtomsRef (@{$This->{AtomNeighborhoods}{$Radius}{$AtomID}}) { | |
892 ($NeighborhoodAtom, $NeighborhoodAtomSuccessorAtomsRef) = @{$NeighborhoodAtomsWithSuccessorAtomsRef}; | |
893 if ($NeighborhoodAtom->IsHydrogen()) { | |
894 next NEIGHBORHOODS; | |
895 } | |
896 $NeighborhoodAtomID = $NeighborhoodAtom->GetID(); | |
897 $StructureFeatureAtomCount++; | |
898 $StructureFeatureAtomIDs{$NeighborhoodAtomID} = $NeighborhoodAtomID; | |
899 } | |
900 } | |
901 | |
902 # Assign structure features to atom at this radius level... | |
903 $This->{StructureFeatures}{AtomCount}{$Radius}{$AtomID} = $StructureFeatureAtomCount; | |
904 $This->{StructureFeatures}{AtomIDs}{$Radius}{$AtomID} = \%StructureFeatureAtomIDs; | |
905 } | |
906 } | |
907 return $This; | |
908 } | |
909 | |
910 # Intialize structure features at each radius level... | |
911 # | |
912 sub _InitializeStructureFeatures { | |
913 my($This) = @_; | |
914 my($Radius, $CurrentRadius, $Atom, $AtomID); | |
915 | |
916 # Initialize all structure features... | |
917 | |
918 %{$This->{StructureFeatures}} = (); | |
919 %{$This->{StructureFeatures}{AtomCount}} = (); | |
920 %{$This->{StructureFeatures}{AtomIDs}} = (); | |
921 | |
922 $Radius = $This->{NeighborhoodRadius}; | |
923 for $CurrentRadius (0 .. $Radius) { | |
924 # Structure features for at specific radii accessed using atom IDs... | |
925 %{$This->{StructureFeatures}{AtomCount}{$CurrentRadius}} = (); | |
926 %{$This->{StructureFeatures}{AtomIDs}{$CurrentRadius}} = (); | |
927 } | |
928 return $This; | |
929 } | |
930 | |
931 # Cache appropriate molecule data... | |
932 # | |
933 sub _SetupMoleculeDataCache { | |
934 my($This) = @_; | |
935 | |
936 # Get all non-hydrogen atoms... | |
937 my($NegateAtomCheckMethod); | |
938 $NegateAtomCheckMethod = 1; | |
939 @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms("IsHydrogen", $NegateAtomCheckMethod); | |
940 | |
941 return $This; | |
942 } | |
943 | |
944 # Clear cached molecule data... | |
945 # | |
946 sub _ClearMoleculeDataCache { | |
947 my($This) = @_; | |
948 | |
949 @{$This->{Atoms}} = (); | |
950 | |
951 return $This; | |
952 } | |
953 | |
954 # Initialize atom indentifier type information... | |
955 # | |
956 # Current supported values: | |
957 # | |
958 # AtomicInvariantsAtomTypes, FunctionalClassAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, | |
959 # MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes | |
960 # | |
961 sub _InitializeAtomIdentifierTypeInformation { | |
962 my($This) = @_; | |
963 | |
964 IDENTIFIERTYPE: { | |
965 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { | |
966 $This->_InitializeAtomicInvariantsAtomTypesInformation(); | |
967 last IDENTIFIERTYPE; | |
968 } | |
969 if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { | |
970 $This->_InitializeFunctionalClassAtomTypesInformation(); | |
971 last IDENTIFIERTYPE; | |
972 } | |
973 if ($This->{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { | |
974 # Nothing to do for now... | |
975 last IDENTIFIERTYPE; | |
976 } | |
977 carp "Warning: ${ClassName}->_InitializeAtomIdentifierTypeInformation: Unknown atom indentifier type $This->{AtomIdentifierType}..."; | |
978 } | |
979 return $This; | |
980 } | |
981 | |
982 # Initialize atomic invariants atom types, generated by AtomTypes::AtomicInvariantsAtomTypes | |
983 # class, to use for generating initial atom identifiers... | |
984 # | |
985 # Let: | |
986 # AS = Atom symbol corresponding to element symbol | |
987 # | |
988 # X<n> = Number of non-hydrogen atom neighbors or heavy atoms attached to atom | |
989 # BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms attached to atom | |
990 # LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms attached to atom | |
991 # SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms attached to atom | |
992 # DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms attached to atom | |
993 # TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms attached to atom | |
994 # H<n> = Number of implicit and explicit hydrogens for atom | |
995 # Ar = Aromatic annotation indicating whether atom is aromatic | |
996 # RA = Ring atom annotation indicating whether atom is a ring | |
997 # FC<+n/-n> = Formal charge assigned to atom | |
998 # MN<n> = Mass number indicating isotope other than most abundant isotope | |
999 # SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or 3 (triplet) | |
1000 # | |
1001 # Then: | |
1002 # | |
1003 # Atom type generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to: | |
1004 # | |
1005 # AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n> | |
1006 # | |
1007 # Except for AS which is a required atomic invariant in atom types, all other atomic invariants are | |
1008 # optional. | |
1009 # | |
1010 # Default atomic invariants used for generating inital atom identifiers are [ Ref 24 ]: | |
1011 # | |
1012 # AS, X<n>, BO<n>, H<n>, FC<+n/-n>, MN<n> | |
1013 # | |
1014 # In addition to usage of abbreviations for specifying atomic invariants, the following descriptive words | |
1015 # are also allowed: | |
1016 # | |
1017 # X : NumOfNonHydrogenAtomNeighbors or NumOfHeavyAtomNeighbors | |
1018 # BO : SumOfBondOrdersToNonHydrogenAtoms or SumOfBondOrdersToHeavyAtoms | |
1019 # LBO : LargestBondOrderToNonHydrogenAtoms or LargestBondOrderToHeavyAtoms | |
1020 # SB : NumOfSingleBondsToNonHydrogenAtoms or NumOfSingleBondsToHeavyAtoms | |
1021 # DB : NumOfDoubleBondsToNonHydrogenAtoms or NumOfDoubleBondsToHeavyAtoms | |
1022 # TB : NumOfTripleBondsToNonHydrogenAtoms or NumOfTripleBondsToHeavyAtoms | |
1023 # H : NumOfImplicitAndExplicitHydrogens | |
1024 # Ar : Aromatic | |
1025 # RA : RingAtom | |
1026 # FC : FormalCharge | |
1027 # MN : MassNumber | |
1028 # SM : SpinMultiplicity | |
1029 # | |
1030 sub _InitializeAtomicInvariantsAtomTypesInformation { | |
1031 my($This) = @_; | |
1032 | |
1033 # Default atomic invariants to use for generating initial atom identifiers are: AS, X, BO, LBO, H, FC | |
1034 # | |
1035 @{$This->{AtomicInvariantsToUse}} = (); | |
1036 @{$This->{AtomicInvariantsToUse}} = ('AS', 'X', 'BO', 'H', 'FC', 'MN'); | |
1037 | |
1038 return $This; | |
1039 } | |
1040 | |
1041 # Initialize functional class atom types, generated by AtomTypes::FunctionalClassAtomTypes | |
1042 # class, to use for generating initial atom identifiers... | |
1043 # | |
1044 # Let: | |
1045 # HBD: HydrogenBondDonor | |
1046 # HBA: HydrogenBondAcceptor | |
1047 # PI : PositivelyIonizable | |
1048 # NI : NegativelyIonizable | |
1049 # Ar : Aromatic | |
1050 # Hal : Halogen | |
1051 # H : Hydrophobic | |
1052 # RA : RingAtom | |
1053 # CA : ChainAtom | |
1054 # | |
1055 # Then: | |
1056 # | |
1057 # Functiononal class atom type specification for an atom corresponds to: | |
1058 # | |
1059 # Ar.CA.H.HBA.HBD.Hal.NI.PI.RA | |
1060 # | |
1061 # Default functional classes used are: HBD, HBA, PI, NI, Ar, Hal | |
1062 # | |
1063 # FunctionalAtomTypes are assigned using the following definitions [ Ref 60-61, Ref 65-66 ]: | |
1064 # | |
1065 # HydrogenBondDonor: NH, NH2, OH | |
1066 # HydrogenBondAcceptor: N[!H], O | |
1067 # PositivelyIonizable: +, NH2 | |
1068 # NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH | |
1069 # | |
1070 sub _InitializeFunctionalClassAtomTypesInformation { | |
1071 my($This) = @_; | |
1072 | |
1073 # Default functional class atom typess to use for generating initial atom identifiers | |
1074 # are: HBD, HBA, PI, NI, Ar, Hal | |
1075 # | |
1076 @{$This->{FunctionalClassesToUse}} = (); | |
1077 @{$This->{FunctionalClassesToUse}} = ('HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal'); | |
1078 | |
1079 return $This; | |
1080 } | |
1081 | |
1082 # Set atomic invariants to use for generation of intial atom indentifiers... | |
1083 # | |
1084 sub SetAtomicInvariantsToUse { | |
1085 my($This, @Values) = @_; | |
1086 my($FirstValue, $TypeOfFirstValue, $AtomicInvariant, $SpecifiedAtomicInvariant, @SpecifiedAtomicInvariants, @AtomicInvariantsToUse); | |
1087 | |
1088 if (!@Values) { | |
1089 carp "Warning: ${ClassName}->SetAtomicInvariantsToUse: No values specified..."; | |
1090 return; | |
1091 } | |
1092 | |
1093 if ($This->{AtomIdentifierType} !~ /^AtomicInvariantsAtomTypes$/i) { | |
1094 carp "Warning: ${ClassName}->SetFunctionalAtomTypesToUse: AtomicInvariantsToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}..."; | |
1095 return; | |
1096 } | |
1097 | |
1098 $FirstValue = $Values[0]; | |
1099 $TypeOfFirstValue = ref $FirstValue; | |
1100 | |
1101 @SpecifiedAtomicInvariants = (); | |
1102 @AtomicInvariantsToUse = (); | |
1103 | |
1104 if ($TypeOfFirstValue =~ /^ARRAY/) { | |
1105 push @SpecifiedAtomicInvariants, @{$FirstValue}; | |
1106 } | |
1107 else { | |
1108 push @SpecifiedAtomicInvariants, @Values; | |
1109 } | |
1110 | |
1111 # Make sure specified AtomicInvariants are valid... | |
1112 for $SpecifiedAtomicInvariant (@SpecifiedAtomicInvariants) { | |
1113 if (!AtomTypes::AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($SpecifiedAtomicInvariant)) { | |
1114 croak "Error: ${ClassName}->SetAtomicInvariantsToUse: Specified atomic invariant, $SpecifiedAtomicInvariant, is not supported...\n "; | |
1115 } | |
1116 $AtomicInvariant = $SpecifiedAtomicInvariant; | |
1117 push @AtomicInvariantsToUse, $AtomicInvariant; | |
1118 } | |
1119 | |
1120 # Set atomic invariants to use... | |
1121 @{$This->{AtomicInvariantsToUse}} = (); | |
1122 push @{$This->{AtomicInvariantsToUse}}, @AtomicInvariantsToUse; | |
1123 | |
1124 return $This; | |
1125 } | |
1126 | |
1127 # Set functional classes to use for generation of intial atom indentifiers... | |
1128 # | |
1129 sub SetFunctionalClassesToUse { | |
1130 my($This, @Values) = @_; | |
1131 my($FirstValue, $TypeOfFirstValue, $FunctionalClass, $SpecifiedFunctionalClass, @SpecifiedFunctionalClasses, @FunctionalClassesToUse); | |
1132 | |
1133 if (!@Values) { | |
1134 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: No values specified..."; | |
1135 return; | |
1136 } | |
1137 | |
1138 if ($This->{AtomIdentifierType} !~ /^FunctionalClassAtomTypes$/i) { | |
1139 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: FunctionalClassesToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}..."; | |
1140 return; | |
1141 } | |
1142 | |
1143 $FirstValue = $Values[0]; | |
1144 $TypeOfFirstValue = ref $FirstValue; | |
1145 | |
1146 @SpecifiedFunctionalClasses = (); | |
1147 @FunctionalClassesToUse = (); | |
1148 | |
1149 if ($TypeOfFirstValue =~ /^ARRAY/) { | |
1150 push @SpecifiedFunctionalClasses, @{$FirstValue}; | |
1151 } | |
1152 else { | |
1153 push @SpecifiedFunctionalClasses, @Values; | |
1154 } | |
1155 | |
1156 # Make sure specified FunctionalClasses are valid... | |
1157 for $SpecifiedFunctionalClass (@SpecifiedFunctionalClasses) { | |
1158 if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedFunctionalClass)) { | |
1159 croak "Error: ${ClassName}->SetFunctionalClassesToUse: Specified functional class, $SpecifiedFunctionalClass, is not supported...\n "; | |
1160 } | |
1161 push @FunctionalClassesToUse, $SpecifiedFunctionalClass; | |
1162 } | |
1163 | |
1164 # Set functional classes to use... | |
1165 @{$This->{FunctionalClassesToUse}} = (); | |
1166 push @{$This->{FunctionalClassesToUse}}, @FunctionalClassesToUse; | |
1167 | |
1168 return $This; | |
1169 } | |
1170 | |
1171 # Return a string containg data for ExtendedConnectivityFingerprints object... | |
1172 sub StringifyExtendedConnectivityFingerprints { | |
1173 my($This) = @_; | |
1174 my($ExtendedConnectivityFingerprintsString); | |
1175 | |
1176 $ExtendedConnectivityFingerprintsString = "InitialAtomIdentifierType: $This->{AtomIdentifierType}; NeighborhoodRadius: $This->{NeighborhoodRadius}"; | |
1177 | |
1178 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { | |
1179 my($AtomicInvariant, @AtomicInvariants, @AtomicInvariantsOrder, %AvailableAtomicInvariants); | |
1180 | |
1181 @AtomicInvariantsOrder = AtomTypes::AtomicInvariantsAtomTypes::GetAtomicInvariantsOrder(); | |
1182 %AvailableAtomicInvariants = AtomTypes::AtomicInvariantsAtomTypes::GetAvailableAtomicInvariants(); | |
1183 | |
1184 for $AtomicInvariant (@AtomicInvariantsOrder) { | |
1185 push @AtomicInvariants, "$AtomicInvariant: $AvailableAtomicInvariants{$AtomicInvariant}"; | |
1186 } | |
1187 | |
1188 $ExtendedConnectivityFingerprintsString .= "; AtomicInvariantsToUse: <" . TextUtil::JoinWords(\@{$This->{AtomicInvariantsToUse}}, ", ", 0) . ">"; | |
1189 $ExtendedConnectivityFingerprintsString .= "; AtomicInvariantsOrder: <" . TextUtil::JoinWords(\@AtomicInvariantsOrder, ", ", 0) . ">"; | |
1190 $ExtendedConnectivityFingerprintsString .= "; AvailableAtomicInvariants: <" . TextUtil::JoinWords(\@AtomicInvariants, ", ", 0) . ">"; | |
1191 } | |
1192 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { | |
1193 my($FunctionalClass, @FunctionalClasses, @FunctionalClassesOrder, %AvailableFunctionalClasses); | |
1194 | |
1195 @FunctionalClassesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder(); | |
1196 %AvailableFunctionalClasses = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses(); | |
1197 | |
1198 for $FunctionalClass (@FunctionalClassesOrder) { | |
1199 push @FunctionalClasses, "$FunctionalClass: $AvailableFunctionalClasses{$FunctionalClass}"; | |
1200 } | |
1201 | |
1202 $ExtendedConnectivityFingerprintsString .= "; FunctionalClassesToUse: <" . TextUtil::JoinWords(\@{$This->{FunctionalClassesToUse}}, ", ", 0) . ">"; | |
1203 $ExtendedConnectivityFingerprintsString .= "; FunctionalClassesOrder: <" . TextUtil::JoinWords(\@FunctionalClassesOrder, ", ", 0) . ">"; | |
1204 $ExtendedConnectivityFingerprintsString .= "; AvailableFunctionalClasses: <" . TextUtil::JoinWords(\@FunctionalClasses, ", ", 0) . ">"; | |
1205 } | |
1206 | |
1207 if ($This->{Type} =~ /^ExtendedConnectivityBits$/i) { | |
1208 # Size... | |
1209 $ExtendedConnectivityFingerprintsString .= "; Size: $This->{Size}; MinSize: $This->{MinSize}; MaxSize: $This->{MaxSize}"; | |
1210 | |
1211 # Fingerprint bit density and num of bits set... | |
1212 my($NumOfSetBits, $BitDensity); | |
1213 $NumOfSetBits = $This->{FingerprintsBitVector}->GetNumOfSetBits(); | |
1214 $BitDensity = $This->{FingerprintsBitVector}->GetFingerprintsBitDensity(); | |
1215 $ExtendedConnectivityFingerprintsString .= "; NumOfOnBits: $NumOfSetBits; BitDensity: $BitDensity"; | |
1216 | |
1217 $ExtendedConnectivityFingerprintsString .= "; FingerprintsBitVector: < $This->{FingerprintsBitVector} >"; | |
1218 } | |
1219 else { | |
1220 # Number of identifiers... | |
1221 $ExtendedConnectivityFingerprintsString .= "; NumOfIdentifiers: " . $This->{FingerprintsVector}->GetNumOfValues(); | |
1222 | |
1223 # FingerprintsVector... | |
1224 $ExtendedConnectivityFingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >"; | |
1225 } | |
1226 | |
1227 return $ExtendedConnectivityFingerprintsString; | |
1228 } | |
1229 | |
1230 1; | |
1231 | |
1232 __END__ | |
1233 | |
1234 =head1 NAME | |
1235 | |
1236 ExtendedConnectivityFingerprints | |
1237 | |
1238 =head1 SYNOPSIS | |
1239 | |
1240 use Fingerprints::ExtendedConnectivityFingerprints; | |
1241 | |
1242 use Fingerprints::ExtendedConnectivityFingerprints qw(:all); | |
1243 | |
1244 =head1 DESCRIPTION | |
1245 | |
1246 ExtendedConnectivityFingerprints [ Ref 48, Ref 52 ] class provides the following methods: | |
1247 | |
1248 new, GenerateFingerprints, GetDescription, SetAtomIdentifierType, | |
1249 SetAtomicInvariantsToUse, SetFunctionalClassesToUse, SetNeighborhoodRadius, | |
1250 StringifyExtendedConnectivityFingerprints | |
1251 | |
1252 B<ExtendedConnectivityFingerprints> is derived from B<Fingerprints> class which in turn | |
1253 is derived from B<ObjectProperty> base class that provides methods not explicitly defined | |
1254 in B<ExtendedConnectivityFingerprints>, B<Fingerprints> or B<ObjectProperty> classes using Perl's | |
1255 AUTOLOAD functionality. These methods are generated on-the-fly for a specified object property: | |
1256 | |
1257 Set<PropertyName>(<PropertyValue>); | |
1258 $PropertyValue = Get<PropertyName>(); | |
1259 Delete<PropertyName>(); | |
1260 | |
1261 The current release of MayaChemTools supports generation of B<ExtendedConnectivityFingerprints> | |
1262 corresponding to following B<AtomtomIdentifierTypes>: | |
1263 | |
1264 AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, | |
1265 FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, | |
1266 SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes | |
1267 | |
1268 Based on the values specified for B<AtomIdentifierType>, B<AtomicInvariantsToUse> | |
1269 and B<FunctionalClassesToUse>, initial atom types are assigned to all non-hydrogen atoms in | |
1270 a molecule and these atom types strings are converted into initial atom identifier integers using | |
1271 B<TextUtil::HashCode> function. The duplicate atom identifiers are removed. | |
1272 | |
1273 For B<NeighborhoodRadius> value of I<0>, the initial set of unique atom identifiers comprises | |
1274 the molecule fingerprints. Otherwise, atom neighborhoods are generated for each non-hydrogen | |
1275 atom up-to specified B<NeighborhoodRadius> value. For each non-hydrogen central atom | |
1276 at a specific radius, its neighbors at next radius level along with their bond orders and previously | |
1277 calculated atom identifiers are collected which in turn are used to generate a new integer | |
1278 atom identifier; the bond orders and atom identifier pairs list is first sorted by bond order | |
1279 followed by atom identifiers to make these values graph invariant. | |
1280 | |
1281 After integer atom identifiers have been generated for all non-hydrogen atoms at all specified | |
1282 neighborhood radii, the duplicate integer atom identifiers corresponding to same hash code | |
1283 value generated using B<TextUtil::HashCode> are tracked by keeping the atom identifiers at | |
1284 lower radius. Additionally, all structurally duplicate integer atom identifiers at each specified | |
1285 radius are also tracked by identifying equivalent atom and bonds corresponding to substructures | |
1286 used for generating atom identifier and keeping integer atom identifier with lowest value. | |
1287 | |
1288 For I<ExtendedConnnectivity> value of fingerprints B<Type>, the duplicate identifiers are | |
1289 removed from the list and the unique atom identifiers constitute the extended connectivity | |
1290 fingerprints of a molecule. | |
1291 | |
1292 For I<ExtendedConnnectivityCount> value of fingerprints B<Type>, the occurrence of each | |
1293 unique atom identifiers appears is counted and the unique atom identifiers along with their | |
1294 count constitute the extended connectivity fingerprints of a molecule. | |
1295 | |
1296 For I<ExtendedConnectivityBits> value of fingerprints B<-m, --mode>, the unique atom identifiers | |
1297 are used as a random number seed to generate a random integer value between 0 and B<--Size> which | |
1298 in turn is used to set corresponding bits in the fingerprint bit-vector string. | |
1299 | |
1300 The current release of MayaChemTools generates the following types of extended connectivity | |
1301 fingerprints vector strings: | |
1302 | |
1303 FingerprintsVector;ExtendedConnectivity:AtomicInvariantsAtomTypes:Radi | |
1304 us2;60;AlphaNumericalValues;ValuesString;73555770 333564680 352413391 | |
1305 666191900 1001270906 1371674323 1481469939 1977749791 2006158649 21414 | |
1306 08799 49532520 64643108 79385615 96062769 273726379 564565671 85514103 | |
1307 5 906706094 988546669 1018231313 1032696425 1197507444 1331250018 1338 | |
1308 532734 1455473691 1607485225 1609687129 1631614296 1670251330 17303... | |
1309 | |
1310 FingerprintsVector;ExtendedConnectivityCount:AtomicInvariantsAtomTypes | |
1311 :Radius2;60;NumericalValues;IDsAndValuesString;73555770 333564680 3524 | |
1312 13391 666191900 1001270906 1371674323 1481469939 1977749791 2006158649 | |
1313 2141408799 49532520 64643108 79385615 96062769 273726379 564565671...; | |
1314 3 2 1 1 14 1 2 10 4 3 1 1 1 1 2 1 2 1 1 1 2 3 1 1 2 1 3 3 8 2 2 2 6 2 | |
1315 1 2 1 1 2 1 1 1 2 1 1 2 1 2 1 1 1 1 1 1 1 1 1 2 1 1 | |
1316 | |
1317 FingerprintsBitVector;ExtendedConnectivityBits:AtomicInvariantsAtomTyp | |
1318 es:Radius2;1024;BinaryString;Ascending;0000000000000000000000000000100 | |
1319 0000000001010000000110000011000000000000100000000000000000000000100001 | |
1320 1000000110000000000000000000000000010011000000000000000000000000010000 | |
1321 0000000000000000000000000010000000000000000001000000000000000000000000 | |
1322 0000000000010000100001000000000000101000000000000000100000000000000... | |
1323 | |
1324 FingerprintsBitVector;ExtendedConnectivityBits:AtomicInvariantsAtomTyp | |
1325 es:Radius2;1024;HexadecimalString;Ascending;000000010050c0600800000803 | |
1326 0300000091000004000000020000100000000124008200020000000040020000000000 | |
1327 2080000000820040010020000000008040000000000080001000000000400000000000 | |
1328 4040000090000061010000000800200000000000001400000000020080000000000020 | |
1329 00008020200000408000 | |
1330 | |
1331 FingerprintsVector;ExtendedConnectivity:FunctionalClassAtomTypes:Radiu | |
1332 s2;57;AlphaNumericalValues;ValuesString;24769214 508787397 850393286 8 | |
1333 62102353 981185303 1231636850 1649386610 1941540674 263599683 32920567 | |
1334 1 571109041 639579325 683993318 723853089 810600886 885767127 90326012 | |
1335 7 958841485 981022393 1126908698 1152248391 1317567065 1421489994 1455 | |
1336 632544 1557272891 1826413669 1983319256 2015750777 2029559552 20404... | |
1337 | |
1338 FingerprintsVector;ExtendedConnectivityCount:FunctionalClassAtomTypes: | |
1339 Radius2;57;NumericalValues;IDsAndValuesString;24769214 508787397 85039 | |
1340 3286 862102353 981185303 1231636850 1649386610 1941540674 263599683 32 | |
1341 9205671 571109041 639579325 683993318 723853089 810600886 885767127...; | |
1342 1 1 1 10 2 22 3 1 3 3 1 1 1 3 2 2 1 2 2 2 3 1 1 1 1 1 14 1 1 1 1 1 1 2 | |
1343 1 2 1 1 2 2 1 1 2 1 1 1 2 1 1 2 1 1 1 1 1 1 1 | |
1344 | |
1345 FingerprintsBitVector;ExtendedConnectivityBits:FunctionalClassAtomType | |
1346 s:Radius2;1024;BinaryString;Ascending;00000000000000000000100000000000 | |
1347 0000000001000100000000001000000000000000000000000000000000101000000010 | |
1348 0000001000000000010000000000000000000000000000000000000000000000000100 | |
1349 0000000000001000000000000001000000000001001000000000000000000000000000 | |
1350 0000000000000000100000000000001000000000000000000000000000000000000... | |
1351 | |
1352 FingerprintsVector;ExtendedConnectivity:DREIDINGAtomTypes:Radius2;56;A | |
1353 lphaNumericalValues;ValuesString;280305427 357928343 721790579 1151822 | |
1354 898 1207111054 1380963747 1568213839 1603445250 4559268 55012922 18094 | |
1355 0813 335715751 534801009 684609658 829361048 972945982 999881534 10076 | |
1356 55741 1213692591 1222032501 1224517934 1235687794 1244268533 152812070 | |
1357 0 1629595024 1856308891 1978806036 2001865095 2096549435 172675415 ... | |
1358 | |
1359 FingerprintsVector;ExtendedConnectivity:EStateAtomTypes:Radius2;62;Alp | |
1360 haNumericalValues;ValuesString;25189973 528584866 662581668 671034184 | |
1361 926543080 1347067490 1738510057 1759600920 2034425745 2097234755 21450 | |
1362 44754 96779665 180364292 341712110 345278822 386540408 387387308 50430 | |
1363 1706 617094135 771528807 957666640 997798220 1158349170 1291258082 134 | |
1364 1138533 1395329837 1420277211 1479584608 1486476397 1487556246 1566... | |
1365 | |
1366 FingerprintsVector;ExtendedConnectivity:MMFF94AtomTypes:Radius2;64;Alp | |
1367 haNumericalValues;ValuesString;224051550 746527773 998750766 103704190 | |
1368 2 1239701709 1248384926 1259447756 1521678386 1631549126 1909437580 20 | |
1369 37095052 2104274756 2117729376 8770364 31445800 81450228 314289324 344 | |
1370 041929 581773587 638555787 692022098 811840536 929651561 936421792 988 | |
1371 636432 1048624296 1054288509 1369487579 1454058929 1519352190 17271... | |
1372 | |
1373 FingerprintsVector;ExtendedConnectivity:SLogPAtomTypes:Radius2;71;Alph | |
1374 aNumericalValues;ValuesString;78989290 116507218 489454042 888737940 1 | |
1375 162561799 1241797255 1251494264 1263717127 1471206899 1538061784 17654 | |
1376 07295 1795036542 1809833874 2020454493 2055310842 2117729376 11868981 | |
1377 56731842 149505242 184525155 196984339 288181334 481409282 556716568 6 | |
1378 41915747 679881756 721736571 794256218 908276640 992898760 10987549... | |
1379 | |
1380 FingerprintsVector;ExtendedConnectivity:SYBYLAtomTypes:Radius2;58;Alph | |
1381 aNumericalValues;ValuesString;199957044 313356892 455463968 465982819 | |
1382 1225318176 1678585943 1883366064 1963811677 2117729376 113784599 19153 | |
1383 8837 196629033 263865277 416380653 477036669 681527491 730724924 90906 | |
1384 5537 1021959189 1133014972 1174311016 1359441203 1573452838 1661585138 | |
1385 1668649038 1684198062 1812312554 1859266290 1891651106 2072549404 ... | |
1386 | |
1387 FingerprintsVector;ExtendedConnectivity:TPSAAtomTypes:Radius2;47;Alpha | |
1388 NumericalValues;ValuesString;20818206 259344053 862102353 1331904542 1 | |
1389 700688206 265614156 363161397 681332588 810600886 885767127 950172500 | |
1390 951454814 1059668746 1247054493 1382302230 1399502637 1805025917 19189 | |
1391 39561 2114677228 2126402271 8130483 17645742 32278373 149975755 160327 | |
1392 654 256360355 279492740 291251259 317592700 333763396 972105960 101... | |
1393 | |
1394 FingerprintsVector;ExtendedConnectivity:UFFAtomTypes:Radius2;56;AlphaN | |
1395 umericalValues;ValuesString;280305427 357928343 721790579 1151822898 1 | |
1396 207111054 1380963747 1568213839 1603445250 4559268 55012922 180940813 | |
1397 335715751 534801009 684609658 829361048 972945982 999881534 1007655741 | |
1398 1213692591 1222032501 1224517934 1235687794 1244268533 1528120700 162 | |
1399 9595024 1856308891 1978806036 2001865095 2096549435 172675415 18344... | |
1400 | |
1401 =head2 METHODS | |
1402 | |
1403 =over 4 | |
1404 | |
1405 =item B<new> | |
1406 | |
1407 $NewExtendedConnectivityFingerprints = new ExtendedConnectivityFingerprints( | |
1408 %NamesAndValues); | |
1409 | |
1410 Using specified I<ExtendedConnectivityFingerprints> property names and values hash, B<new> | |
1411 method creates a new object and returns a reference to newly created B<ExtendedConnectivityFingerprints> | |
1412 object. By default, the following properties are initialized: | |
1413 | |
1414 Molecule = '' | |
1415 Type = 'ExtendedConnectivity' | |
1416 NeighborhoodRadius = 2 | |
1417 AtomIdentifierType = '' | |
1418 AtomicInvariantsToUse = ['AS', 'X', 'BO', 'H', 'FC', 'MN'] | |
1419 FunctionalClassesToUse = ['HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal'] | |
1420 | |
1421 Examples: | |
1422 | |
1423 $ExtendedConnectivityFingerprints = new ExtendedConnectivityFingerprints( | |
1424 'Molecule' => $Molecule, | |
1425 'AtomIdentifierType' => | |
1426 'AtomicInvariantsAtomTypes'); | |
1427 | |
1428 $ExtendedConnectivityFingerprints = new ExtendedConnectivityFingerprints( | |
1429 'Type' => 'ExtendedConnectivityCount', | |
1430 'Molecule' => $Molecule, | |
1431 'AtomIdentifierType' => | |
1432 'AtomicInvariantsAtomTypes'); | |
1433 | |
1434 $ExtendedConnectivityFingerprints = new ExtendedConnectivityFingerprints( | |
1435 'Type' => 'ExtendedConnectivityBits', | |
1436 'Molecule' => $Molecule, | |
1437 'Size' => 1024, | |
1438 'AtomIdentifierType' => | |
1439 'AtomicInvariantsAtomTypes'); | |
1440 | |
1441 $ExtendedConnectivityFingerprints = new ExtendedConnectivityFingerprints( | |
1442 'Type' => 'ExtendedConnectivity', | |
1443 'Molecule' => $Molecule, | |
1444 'NeighborhoodRadius' => 2, | |
1445 'AtomIdentifierType' => | |
1446 'AtomicInvariantsAtomTypes', | |
1447 'AtomicInvariantsToUse' => | |
1448 ['AS', 'X', 'BO', 'H', 'FC', 'MN'] ); | |
1449 | |
1450 $ExtendedConnectivityFingerprints = new ExtendedConnectivityFingerprints( | |
1451 'Type' => 'ExtendedConnectivity', | |
1452 'Molecule' => $Molecule, | |
1453 'NeighborhoodRadius' => 2, | |
1454 'AtomIdentifierType' => | |
1455 'FunctionalClassAtomTypes', | |
1456 'FunctionalClassesToUse' => | |
1457 ['HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal'] ); | |
1458 | |
1459 $ExtendedConnectivityFingerprints = new ExtendedConnectivityFingerprints( | |
1460 'Type' => 'ExtendedConnectivity', | |
1461 'Molecule' => $Molecule,; | |
1462 'AtomIdentifierType' => | |
1463 'MMFF94AtomTypes'); | |
1464 | |
1465 $ExtendedConnectivityFingerprints = new ExtendedConnectivityFingerprints( | |
1466 'Type' => 'ExtendedConnectivityCount', | |
1467 'Molecule' => $Molecule,; | |
1468 'AtomIdentifierType' => | |
1469 'MMFF94AtomTypes'); | |
1470 | |
1471 $ExtendedConnectivityFingerprints = new ExtendedConnectivityFingerprints( | |
1472 'Type' => 'ExtendedConnectivityCount', | |
1473 'Molecule' => $Molecule,; | |
1474 'AtomIdentifierType' => | |
1475 'SLogPAtomTypes'); | |
1476 | |
1477 $ExtendedConnectivityFingerprints = new ExtendedConnectivityFingerprints( | |
1478 'Type' => 'ExtendedConnectivity', | |
1479 'Molecule' => $Molecule,; | |
1480 'AtomIdentifierType' => | |
1481 'SLogPAtomTypes'); | |
1482 | |
1483 $ExtendedConnectivityFingerprints = new ExtendedConnectivityFingerprints( | |
1484 'Type' => 'ExtendedConnectivity', | |
1485 'Molecule' => $Molecule,; | |
1486 'AtomIdentifierType' => | |
1487 'SYBYLAtomTypes'); | |
1488 | |
1489 $ExtendedConnectivityFingerprints->GenerateFingerprints(); | |
1490 print "$ExtendedConnectivityFingerprints\n"; | |
1491 | |
1492 =item B<GenerateFingerprints> | |
1493 | |
1494 $ExtendedConnectivityFingerprints->GenerateFingerprints(); | |
1495 | |
1496 Generates extended connectivity fingerprints and returns I<ExtendedConnectivityFingerprints>. | |
1497 | |
1498 =item B<GetDescription> | |
1499 | |
1500 $Description = $ExtendedConnectivityFingerprints->GetDescription(); | |
1501 | |
1502 Returns a string containing description of extended connectivity fingerprints | |
1503 fingerprints. | |
1504 | |
1505 =item B<SetAtomIdentifierType> | |
1506 | |
1507 $ExtendedConnectivityFingerprints->SetAtomIdentifierType($IdentifierType); | |
1508 | |
1509 Sets atom I<IdentifierType> to use during extended connectivity fingerprints generation and | |
1510 returns I<ExtendedConnectivityFingerprints>. | |
1511 | |
1512 Possible values: I<AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, | |
1513 FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, | |
1514 TPSAAtomTypes, UFFAtomTypes>. | |
1515 | |
1516 =item B<SetAtomicInvariantsToUse> | |
1517 | |
1518 $ExtendedConnectivityFingerprints->SetAtomicInvariantsToUse($ValuesRef); | |
1519 $ExtendedConnectivityFingerprints->SetAtomicInvariantsToUse(@Values); | |
1520 | |
1521 Sets atomic invariants to use during I<AtomicInvariantsAtomTypes> value of I<AtomIdentifierType> | |
1522 for extended connectivity fingerprints generation and returns I<ExtendedConnectivityFingerprints>. | |
1523 | |
1524 Possible values for atomic invariants are: I<AS, X, BO, LBO, SB, DB, TB, | |
1525 H, Ar, RA, FC, MN, SM>. Default value [ Ref 24 ]: I<AS,X,BO,H,FC,MN>. | |
1526 | |
1527 The atomic invariants abbreviations correspond to: | |
1528 | |
1529 AS = Atom symbol corresponding to element symbol | |
1530 | |
1531 X<n> = Number of non-hydrogen atom neighbors or heavy atoms | |
1532 BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms | |
1533 LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms | |
1534 SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms | |
1535 DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms | |
1536 TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms | |
1537 H<n> = Number of implicit and explicit hydrogens for atom | |
1538 Ar = Aromatic annotation indicating whether atom is aromatic | |
1539 RA = Ring atom annotation indicating whether atom is a ring | |
1540 FC<+n/-n> = Formal charge assigned to atom | |
1541 MN<n> = Mass number indicating isotope other than most abundant isotope | |
1542 SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or | |
1543 3 (triplet) | |
1544 | |
1545 Atom type generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to: | |
1546 | |
1547 AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n> | |
1548 | |
1549 Except for AS which is a required atomic invariant in atom types, all other atomic invariants are | |
1550 optional. Atom type specification doesn't include atomic invariants with zero or undefined values. | |
1551 | |
1552 In addition to usage of abbreviations for specifying atomic invariants, the following descriptive words | |
1553 are also allowed: | |
1554 | |
1555 X : NumOfNonHydrogenAtomNeighbors or NumOfHeavyAtomNeighbors | |
1556 BO : SumOfBondOrdersToNonHydrogenAtoms or SumOfBondOrdersToHeavyAtoms | |
1557 LBO : LargestBondOrderToNonHydrogenAtoms or LargestBondOrderToHeavyAtoms | |
1558 SB : NumOfSingleBondsToNonHydrogenAtoms or NumOfSingleBondsToHeavyAtoms | |
1559 DB : NumOfDoubleBondsToNonHydrogenAtoms or NumOfDoubleBondsToHeavyAtoms | |
1560 TB : NumOfTripleBondsToNonHydrogenAtoms or NumOfTripleBondsToHeavyAtoms | |
1561 H : NumOfImplicitAndExplicitHydrogens | |
1562 Ar : Aromatic | |
1563 RA : RingAtom | |
1564 FC : FormalCharge | |
1565 MN : MassNumber | |
1566 SM : SpinMultiplicity | |
1567 | |
1568 I<AtomTypes::AtomicInvariantsAtomTypes> module is used to assign atomic invariant | |
1569 atom types. | |
1570 | |
1571 =item B<SetFunctionalClassesToUse> | |
1572 | |
1573 $ExtendedConnectivityFingerprints->SetFunctionalClassesToUse($ValuesRef); | |
1574 $ExtendedConnectivityFingerprints->SetFunctionalClassesToUse(@Values); | |
1575 | |
1576 Sets functional classes invariants to use during I<FunctionalClassAtomTypes> value of I<AtomIdentifierType> | |
1577 for extended connectivity fingerprints generation and returns I<ExtendedConnectivityFingerprints>. | |
1578 | |
1579 Possible values for atom functional classes are: I<Ar, CA, H, HBA, HBD, Hal, NI, PI, RA>. | |
1580 Default value [ Ref 24 ]: I<HBD,HBA,PI,NI,Ar,Hal>. | |
1581 | |
1582 The functional class abbreviations correspond to: | |
1583 | |
1584 HBD: HydrogenBondDonor | |
1585 HBA: HydrogenBondAcceptor | |
1586 PI : PositivelyIonizable | |
1587 NI : NegativelyIonizable | |
1588 Ar : Aromatic | |
1589 Hal : Halogen | |
1590 H : Hydrophobic | |
1591 RA : RingAtom | |
1592 CA : ChainAtom | |
1593 | |
1594 Functional class atom type specification for an atom corresponds to: | |
1595 | |
1596 Ar.CA.H.HBA.HBD.Hal.NI.PI.RA or None | |
1597 | |
1598 I<AtomTypes::FunctionalClassAtomTypes> module is used to assign functional class atom | |
1599 types. It uses following definitions [ Ref 60-61, Ref 65-66 ]: | |
1600 | |
1601 HydrogenBondDonor: NH, NH2, OH | |
1602 HydrogenBondAcceptor: N[!H], O | |
1603 PositivelyIonizable: +, NH2 | |
1604 NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH | |
1605 | |
1606 =item B<SetNeighborhoodRadius> | |
1607 | |
1608 $ExtendedConnectivityFingerprints->SetNeighborhoodRadius($Radius); | |
1609 | |
1610 Sets neighborhood radius to use during extended connectivity fingerprints generation and | |
1611 returns I<ExtendedConnectivityFingerprints>. | |
1612 | |
1613 =item B<StringifyExtendedConnectivityFingerprints> | |
1614 | |
1615 $String = $Fingerprints->StringifyExtendedConnectivityFingerprints(); | |
1616 | |
1617 Returns a string containing information about I<ExtendedConnectivityFingerprints> object. | |
1618 | |
1619 =back | |
1620 | |
1621 =head1 AUTHOR | |
1622 | |
1623 Manish Sud <msud@san.rr.com> | |
1624 | |
1625 =head1 SEE ALSO | |
1626 | |
1627 Fingerprints.pm, FingerprintsStringUtil.pm, AtomNeighborhoodsFingerprints.pm, | |
1628 AtomTypesFingerprints.pm, EStateIndiciesFingerprints.pm, MACCSKeys.pm, | |
1629 PathLengthFingerprints.pm, TopologicalAtomPairsFingerprints.pm, | |
1630 TopologicalAtomTripletsFingerprints.pm, TopologicalAtomTorsionsFingerprints.pm, | |
1631 TopologicalPharmacophoreAtomPairsFingerprints.pm, | |
1632 TopologicalPharmacophoreAtomTripletsFingerprints.pm | |
1633 | |
1634 | |
1635 =head1 COPYRIGHT | |
1636 | |
1637 Copyright (C) 2015 Manish Sud. All rights reserved. | |
1638 | |
1639 This file is part of MayaChemTools. | |
1640 | |
1641 MayaChemTools is free software; you can redistribute it and/or modify it under | |
1642 the terms of the GNU Lesser General Public License as published by the Free | |
1643 Software Foundation; either version 3 of the License, or (at your option) | |
1644 any later version. | |
1645 | |
1646 =cut |