comparison lib/Fingerprints/ExtendedConnectivityFingerprints.pm @ 0:4816e4a8ae95 draft default tip

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 09:23:18 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4816e4a8ae95
1 package Fingerprints::ExtendedConnectivityFingerprints;
2 #
3 # $RCSfile: ExtendedConnectivityFingerprints.pm,v $
4 # $Date: 2015/02/28 20:48:54 $
5 # $Revision: 1.39 $
6 #
7 # Author: Manish Sud <msud@san.rr.com>
8 #
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
10 #
11 # This file is part of MayaChemTools.
12 #
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
14 # the terms of the GNU Lesser General Public License as published by the Free
15 # Software Foundation; either version 3 of the License, or (at your option) any
16 # later version.
17 #
18 # MayaChemTools is distributed in the hope that it will be useful, but without
19 # any warranty; without even the implied warranty of merchantability of fitness
20 # for a particular purpose. See the GNU Lesser General Public License for more
21 # details.
22 #
23 # You should have received a copy of the GNU Lesser General Public License
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
26 # Boston, MA, 02111-1307, USA.
27 #
28
29 use strict;
30 use Carp;
31 use Exporter;
32 use TextUtil ();
33 use MathUtil ();
34 use Fingerprints::Fingerprints;
35 use Molecule;
36 use AtomTypes::AtomicInvariantsAtomTypes;
37 use AtomTypes::FunctionalClassAtomTypes;
38 use AtomTypes::DREIDINGAtomTypes;
39 use AtomTypes::EStateAtomTypes;
40 use AtomTypes::MMFF94AtomTypes;
41 use AtomTypes::SLogPAtomTypes;
42 use AtomTypes::SYBYLAtomTypes;
43 use AtomTypes::TPSAAtomTypes;
44 use AtomTypes::UFFAtomTypes;
45
46 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
47
48 @ISA = qw(Fingerprints::Fingerprints Exporter);
49 @EXPORT = qw();
50 @EXPORT_OK = qw();
51
52 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]);
53
54 # Setup class variables...
55 my($ClassName);
56 _InitializeClass();
57
58 # Overload Perl functions...
59 use overload '""' => 'StringifyExtendedConnectivityFingerprints';
60
61 # Class constructor...
62 sub new {
63 my($Class, %NamesAndValues) = @_;
64
65 # Initialize object...
66 my $This = $Class->SUPER::new();
67 bless $This, ref($Class) || $Class;
68 $This->_InitializeExtendedConnectivityFingerprints();
69
70 $This->_InitializeExtendedConnectivityFingerprintsProperties(%NamesAndValues);
71
72 return $This;
73 }
74
75 # Initialize object data...
76 #
77 sub _InitializeExtendedConnectivityFingerprints {
78 my($This) = @_;
79
80 # Type of fingerprint to generate:
81 #
82 # ExtendedConnectivity - Set of integer identifiers corresponding to structurally unique features
83 # ExtendedConnectivityCount - Set of integer identifiers corresponding to structurally unique features and their count
84 # ExtendedConnectivityBits - A bit vector indicating presence/absence of structurally unique features
85 #
86 $This->{Type} = 'ExtendedConnectivity';
87
88 # Atomic neighborhoods radius for extended connectivity...
89 $This->{NeighborhoodRadius} = 2;
90
91 # Size of bit bector to use during generation of ExtendedConnectivityBits fingerprints...
92 $This->{Size} = 1024;
93
94 # Min and max size of bit bector to use during generation of ExtendedConnectivityBits fingerprints...
95 $This->{MinSize} = 32;
96 $This->{MaxSize} = 2**32;
97
98 # Type of atom attributes to use for initial identifier assignment to non-hydrogen atoms
99 # during the calculation of extended connectivity fingerprints [ Ref 48, Ref 52 ]...
100 #
101 # Currently supported values are: AtomicInvariantsAtomTypes, FunctionalClassAtomTypes,
102 # DREIDINGAtomTypes, EStateAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes,
103 # TPSAAtomTypes, UFFAtomTypes
104 #
105 $This->{AtomIdentifierType} = '';
106
107 # Random number generator to use during generation of fingerprints bit-vector
108 # string: Perl CORE::rand or MayaChemTools MathUtil::random function.
109 #
110 # The random number generator implemented in MayaChemTools is a variant of
111 # linear congruential generator (LCG) as described by Miller et al. [ Ref 120 ].
112 # It is also referred to as Lehmer random number generator or Park-Miller
113 # random number generator.
114 #
115 # Unlike Perl's core random number generator function rand, the random number
116 # generator implemented in MayaChemTools, MathUtil::random, generates consistent
117 # random values across different platformsfor a specific random seed and leads
118 # to generation of portable fingerprints bit-vector strings.
119 #
120 $This->{UsePerlCoreRandom} = 1;
121
122 # Atom neighorhoods up to specified neighborhood radius...
123 %{$This->{AtomNeighborhoods}} = ();
124
125 # Atom identifiers at different neighborhoods up to specified neighborhood radius...
126 %{$This->{AtomIdentifiers}} = ();
127
128 # Structurally unique atom identifiers at different neighborhoods up to specified neighborhood radius...
129 %{$This->{UniqueAtomIdentifiers}} = ();
130 %{$This->{UniqueAtomIdentifiersCount}} = ();
131
132 # Unique atom identifiers at different neighborhoods up to specified neighborhood radius...
133 %{$This->{StructurallyUniqueAtomIdentifiers}} = ();
134 %{$This->{StructurallyUniqueAtomIdentifiersCount}} = ();
135
136 # Structure feature information at different neighborhoods up to specified neighborhood
137 # radius used during removal of atom indentifiers which are structually equivalent...
138 %{$This->{StructureFeatures}} = ();
139 }
140
141 # Initialize class ...
142 sub _InitializeClass {
143 #Class name...
144 $ClassName = __PACKAGE__;
145 }
146
147 # Initialize object properties....
148 sub _InitializeExtendedConnectivityFingerprintsProperties {
149 my($This, %NamesAndValues) = @_;
150
151 my($Name, $Value, $MethodName);
152 while (($Name, $Value) = each %NamesAndValues) {
153 $MethodName = "Set${Name}";
154 $This->$MethodName($Value);
155 }
156
157 # Make sure molecule object was specified...
158 if (!exists $NamesAndValues{Molecule}) {
159 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule...";
160 }
161
162 # Make sure AtomIdentifierType was specified...
163 if (!exists $NamesAndValues{AtomIdentifierType}) {
164 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying AtomIdentifierType...";
165 }
166
167 # Make sure it's power of 2...
168 if (exists $NamesAndValues{Size}) {
169 if (!TextUtil::IsNumberPowerOfNumber($NamesAndValues{Size}, 2)) {
170 croak "Error: ${ClassName}->New: Specified size value, $NamesAndValues{Size}, must be power of 2...";
171 }
172 }
173
174 if ($This->{Type} =~ /^ExtendedConnectivity$/i) {
175 $This->_InitializeExtendedConnectivityFingerprintsVector();
176 }
177 elsif ($This->{Type} =~ /^ExtendedConnectivityCount$/i) {
178 $This->_InitializeExtendedConnectivityCountFingerprintsVector();
179 }
180 elsif ($This->{Type} =~ /^ExtendedConnectivityBits$/i) {
181 $This->_InitializeExtendedConnectivityBitsFingerprintsBitVector();
182 }
183 else {
184 croak "Error: ${ClassName}->_InitializeExtendedConnectivityFingerprintsProperties: Unknown ExtendedConnectivity fingerprints type: $This->{Type}; Supported fingerprints types: ExtendedConnectivity, ExtendedConnectivityCount or ExtendedConnectivityBits...";
185 }
186
187 return $This;
188 }
189
190 # Initialize extended connectivity fingerprints vector...
191 #
192 sub _InitializeExtendedConnectivityFingerprintsVector {
193 my($This) = @_;
194
195 # Type of vector...
196 $This->{VectorType} = 'FingerprintsVector';
197
198 # Type of FingerprintsVector...
199 $This->{FingerprintsVectorType} = 'AlphaNumericalValues';
200
201 $This->_InitializeFingerprintsVector();
202
203 return $This;
204 }
205
206 # Initialize extended connectivity count fingerprints vector...
207 #
208 sub _InitializeExtendedConnectivityCountFingerprintsVector {
209 my($This) = @_;
210
211 # Type of vector...
212 $This->{VectorType} = 'FingerprintsVector';
213
214 # Type of FingerprintsVector...
215 $This->{FingerprintsVectorType} = 'NumericalValues';
216
217 $This->_InitializeFingerprintsVector();
218
219 return $This;
220 }
221
222 # Initialize extended connectivity bit fingerprints vector...
223 #
224 sub _InitializeExtendedConnectivityBitsFingerprintsBitVector {
225 my($This) = @_;
226
227 # Type of vector...
228 $This->{VectorType} = 'FingerprintsBitVector';
229
230 $This->_InitializeFingerprintsBitVector();
231
232 return $This;
233 }
234
235 # Set type...
236 #
237 sub SetType {
238 my($This, $Type) = @_;
239
240 if ($Type =~ /^ExtendedConnectivity$/i) {
241 $This->{Type} = 'ExtendedConnectivity';;
242 }
243 elsif ($Type =~ /^ExtendedConnectivityCount$/i) {
244 $This->{Type} = 'ExtendedConnectivityCount';;
245 }
246 elsif ($Type =~ /^ExtendedConnectivityBits$/i) {
247 $This->{Type} = 'ExtendedConnectivityBits';;
248 }
249 else {
250 croak "Error: ${ClassName}->SetType: Unknown ExtendedConnectivity fingerprints type: $This->{Type}; Supported fingerprints types: ExtendedConnectivity, ExtendedConnectivityCount or ExtendedConnectivityBits...";
251 }
252 return $This;
253 }
254
255 # Disable vector type change...
256 #
257 sub SetVectorType {
258 my($This, $Type) = @_;
259
260 croak "Error: ${ClassName}->SetVectorType: Can't change vector type...";
261
262 return $This;
263 }
264
265 # Disable vector type change...
266 #
267 sub SetFingerprintsVectorType {
268 my($This, $Type) = @_;
269
270 croak "Error: ${ClassName}->SetFingerprintsVectorType: Can't change fingerprints vector type...";
271
272 return $This;
273 }
274
275 # Set intial atom identifier type..
276 #
277 sub SetAtomIdentifierType {
278 my($This, $IdentifierType) = @_;
279
280 if ($IdentifierType !~ /^(AtomicInvariantsAtomTypes|FunctionalClassAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
281 croak "Error: ${ClassName}->SetAtomIdentifierType: Specified value, $IdentifierType, for AtomIdentifierType is not vaild. Supported types in current release of MayaChemTools: AtomicInvariantsAtomTypes, FunctionalClassAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes and UFFAtomTypes.";
282 }
283
284 if ($This->{AtomIdentifierType}) {
285 croak "Error: ${ClassName}->SetAtomIdentifierType: Can't change intial atom identifier type: It's already set...";
286 }
287
288 $This->{AtomIdentifierType} = $IdentifierType;
289
290 # Initialize identifier type information...
291 $This->_InitializeAtomIdentifierTypeInformation();
292
293 return $This;
294 }
295
296 # Set atom neighborhood radius...
297 #
298 sub SetNeighborhoodRadius {
299 my($This, $Value) = @_;
300
301 if (!TextUtil::IsInteger($Value)) {
302 croak "Error: ${ClassName}->SetNeighborhoodRadius: NeighborhoodRadius value, $Value, is not valid: It must be an integer...";
303 }
304
305 if ($Value < 0 ) {
306 croak "Error: ${ClassName}->SetNeighborhoodRadius: NeighborhoodRadius value, $Value, is not valid: It must be >= 0...";
307 }
308 $This->{NeighborhoodRadius} = $Value;
309
310 return $This;
311 }
312
313 # Generate fingerprints description...
314 #
315 sub GetDescription {
316 my($This) = @_;
317
318 # Is description explicity set?
319 if (exists $This->{Description}) {
320 return $This->{Description};
321 }
322
323 # Generate fingerprints description...
324
325 return "$This->{Type}:$This->{AtomIdentifierType}:Radius$This->{NeighborhoodRadius}";
326 }
327
328 # Generate fingerprints...
329 #
330 # Methodology:
331 # . Assign initial atom identfiers to all non-hydrogen atoms in the molecule
332 #
333 # . Remove duplicates from the initial identifiers and add them to list corresponding
334 # to molecule fingerprint
335 #
336 # . For NeighborhoodRadius value of 0, just return the molecule fingerprint list
337 #
338 # . For each NeighborhoodRadius level
339 # . For each non-hydrogen CentralAtom at this NeighborhoodRadius level
340 # . For each non-hydrogen SuccessorNeighborAtom
341 # . Collect (BondOrder AtomIdentifier) pair of values corresponding to
342 # (CentralAtom SuccessorNeighborAtom) and add it to a list
343 #
344 # . Sort list containing (BondOrder AtomIdentifier) pairs first by BondOrder followed
345 # by AtomIdendifiers to make these values graph invariant
346 # . Generate a hash code for the values in the list
347 # . Assign hash code as new atom identifier at the current NeighborhoodRadius level
348 # . Save all atoms and bonds corresponding to the substructure involved in
349 # generating the hash code to be used for identifying structural duplicate hash code
350 #
351 # . Add the new identifier to the molecule fingerprint list making sure it's not a duplicate
352 # identifier
353 #
354 # Hash code atom identifier deduplication:
355 # . Track/remove the identifier generated at higher neighborhood radius level
356 #
357 # Structural atom identifier deduplication:
358 # . For equivalent atoms and bonds corresponding to substructure at a NeighborhoodRadius level,
359 # track/remove the atom identifier with largest value
360 #
361 #
362 sub GenerateFingerprints {
363 my($This) = @_;
364
365 # Cache appropriate molecule data...
366 $This->_SetupMoleculeDataCache();
367
368 # Assign intial atom identifers...
369 if (!$This->_AssignInitialAtomIdentifiers()) {
370 carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't assign valid $This->{AtomIdentifierType} to all atoms...";
371 return $This;
372 }
373
374 # Identify atom neighborhoods up to specified radius...
375 $This->_GetAtomNeighborhoods();
376
377 # Assign atom identifiers to central atoms considering atom neighborhoods at each
378 # radius level...
379 $This->_AssignAtomIdentifiersToAtomNeighborhoods();
380
381 # Remove duplicates identifiers...
382 $This->_RemoveDuplicateAtomIdentifiers();
383
384 # Set final fingerprints...
385 $This->_SetFinalFingerprints();
386
387 # Clear cached molecule data...
388 $This->_ClearMoleculeDataCache();
389
390 return $This;
391 }
392
393 # Assign appropriate initial atom identifiers...
394 #
395 # Generation of initial identifier for a specific atom involves:
396 # . Values of the specified atom attributes are appended in a specific order to
397 # generate an initial atom identifier string
398 # . A 32 bit unsigned integer hash key, using TextUtil::HashCode function, is
399 # generated for the atom indentifier and assigned to the atom as initial
400 # atom identifier.
401 #
402 sub _AssignInitialAtomIdentifiers {
403 my($This) = @_;
404 my($Atom, $AtomID, $Radius, $SpecifiedAtomTypes, $IgnoreHydrogens, $AtomType, $InitialAtomTypeString, $InitialAtomIdentifier);
405
406 # Initialize atom identifiers...
407 $This->_InitializeAtomIdentifiers();
408
409 # Set up atom types...
410 $IgnoreHydrogens = 1;
411 $SpecifiedAtomTypes = undef;
412
413 IDENTIFIERTYPE: {
414 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
415 $SpecifiedAtomTypes = new AtomTypes::AtomicInvariantsAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'AtomicInvariantsToUse' => $This->{AtomicInvariantsToUse});
416 last IDENTIFIERTYPE;
417 }
418
419 if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
420 $SpecifiedAtomTypes = new AtomTypes::FunctionalClassAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'FunctionalClassesToUse' => $This->{FunctionalClassesToUse});
421 last IDENTIFIERTYPE;
422 }
423
424 if ($This->{AtomIdentifierType} =~ /^DREIDINGAtomTypes$/i) {
425 $SpecifiedAtomTypes = new AtomTypes::DREIDINGAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
426 last IDENTIFIERTYPE;
427 }
428
429 if ($This->{AtomIdentifierType} =~ /^EStateAtomTypes$/i) {
430 $SpecifiedAtomTypes = new AtomTypes::EStateAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
431 last IDENTIFIERTYPE;
432 }
433
434 if ($This->{AtomIdentifierType} =~ /^MMFF94AtomTypes$/i) {
435 $SpecifiedAtomTypes = new AtomTypes::MMFF94AtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
436 last IDENTIFIERTYPE;
437 }
438
439 if ($This->{AtomIdentifierType} =~ /^SLogPAtomTypes$/i) {
440 $SpecifiedAtomTypes = new AtomTypes::SLogPAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
441 last IDENTIFIERTYPE;
442 }
443
444 if ($This->{AtomIdentifierType} =~ /^SYBYLAtomTypes$/i) {
445 $SpecifiedAtomTypes = new AtomTypes::SYBYLAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
446 last IDENTIFIERTYPE;
447 }
448
449 if ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) {
450 $SpecifiedAtomTypes = new AtomTypes::TPSAAtomTypes('Molecule' => $This->{Molecule}, 'IgnorePhosphorus' => 0, 'IgnoreSulfur' => 0);
451 last IDENTIFIERTYPE;
452 }
453
454 if ($This->{AtomIdentifierType} =~ /^UFFAtomTypes$/i) {
455 $SpecifiedAtomTypes = new AtomTypes::UFFAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
456 last IDENTIFIERTYPE;
457 }
458
459 croak "Error: ${ClassName}->_AssignInitialAtomIdentifiers: Couldn't assign intial atom identifiers: InitialAtomIdentifierType $This->{AtomIdentifierType} is not supported...";
460 }
461
462 # Assign atom types...
463 $SpecifiedAtomTypes->AssignAtomTypes();
464
465 # Make sure atom types assignment is successful...
466 if (!$SpecifiedAtomTypes->IsAtomTypesAssignmentSuccessful()) {
467 return undef;
468 }
469
470 # Assign atom identifiers at radius 0...
471 $Radius = 0;
472 for $Atom (@{$This->{Atoms}}) {
473 $AtomID = $Atom->GetID();
474
475 $AtomType = $SpecifiedAtomTypes->GetAtomType($Atom);
476 $InitialAtomTypeString = $AtomType ? $AtomType : 'None';
477
478 $InitialAtomIdentifier = TextUtil::HashCode($InitialAtomTypeString);
479 $This->{AtomIdentifiers}{$Radius}{$AtomID} = $InitialAtomIdentifier;
480 }
481
482 return $This;
483 }
484
485 # Initialize atom identifiers...
486 #
487 sub _InitializeAtomIdentifiers {
488 my($This) = @_;
489 my($Radius, $CurrentRadius);
490
491 $Radius = $This->{NeighborhoodRadius};
492
493 %{$This->{AtomIdentifiers}} = ();
494 for $CurrentRadius (0 .. $Radius) {
495 # Atom idenfiers key and value correspond to AtomID and AtomIdentifier
496 %{$This->{AtomIdentifiers}{$CurrentRadius}} = ();
497
498 # Unique and strcuturally unique idenfiers key and value correspond to AtomIdentifier and AtomID
499 %{$This->{UniqueAtomIdentifiers}{$CurrentRadius}} = ();
500 %{$This->{UniqueAtomIdentifiersCount}{$CurrentRadius}} = ();
501
502 %{$This->{StructurallyUniqueAtomIdentifiers}{$CurrentRadius}} = ();
503 %{$This->{StructurallyUniqueAtomIdentifiersCount}{$CurrentRadius}} = ();
504 }
505
506 }
507
508 # Collect atom neighborhoods upto specified neighborhood radius...
509 #
510 sub _GetAtomNeighborhoods {
511 my($This) = @_;
512 my($Atom, $AtomID, $Radius, $CurrentRadius, $Molecule);
513
514 %{$This->{AtomNeighborhoods}} = ();
515
516 $Radius = $This->{NeighborhoodRadius};
517 if ($Radius < 1) {
518 # At radius level 0, it's just the atoms...
519 return;
520 }
521
522 # Initialize neighborhood at different radii...
523 for $CurrentRadius (0 .. $Radius) {
524 %{$This->{AtomNeighborhoods}{$CurrentRadius}} = ();
525 }
526
527 $Molecule = $This->GetMolecule();
528
529 # Collect available atom neighborhoods at different at different neighborhood level for each atom...
530 my($AtomsNeighborhoodWithSuccessorAtomsRef);
531
532 for $Atom (@{$This->{Atoms}}) {
533 $AtomID = $Atom->GetID();
534 $CurrentRadius = 0;
535 for $AtomsNeighborhoodWithSuccessorAtomsRef ($Molecule->GetAtomNeighborhoodsWithSuccessorAtomsAndRadiusUpto($Atom, $Radius)) {
536 $This->{AtomNeighborhoods}{$CurrentRadius}{$AtomID} = $AtomsNeighborhoodWithSuccessorAtomsRef;
537 $CurrentRadius++;
538 }
539 }
540 return $This;
541 }
542
543 # Assign atom identifiers to central atom at each neighborhood radius level...
544 #
545 sub _AssignAtomIdentifiersToAtomNeighborhoods {
546 my($This) = @_;
547 my($Radius, $NextRadius, $Atom, $AtomID, $NeighborhoodAtom, $SuccessorAtom, $SuccessorAtomID, $NeighborhoodAtomSuccessorAtomsRef, $NeighborhoodAtomsWithSuccessorAtomsRef, $Bond, $BondOrder, $SuccessorAtomCount);
548
549 if ($This->{NeighborhoodRadius} < 1) {
550 return;
551 }
552
553 # Go over the atom neighborhoods at each radius upto specified radius and assign atom
554 # indentifiers using their connected successor atoms and their identifiers.
555 #
556 # For a neighborhood atom at a specified radius, the successor connected atoms correpond
557 # to next radius level and the last set of neighorhood atoms don't have any successor connected
558 # atoms. Additionally, radius level 0 just correspond to initial atom identifiers.
559 #
560 # So in order to process atom neighborhood upto specified radius level, the last atom neighborhood
561 # doesn't need to be processed: it gets processed at previous radius level as successor connected
562 # atoms.
563 #
564 RADIUS: for $Radius (0 .. ($This->{NeighborhoodRadius} - 1)) {
565 ATOM: for $Atom (@{$This->{Atoms}}) {
566 $AtomID = $Atom->GetID();
567
568 # Are there any available atom neighborhoods at this radius?
569 if (!exists $This->{AtomNeighborhoods}{$Radius}{$AtomID}) {
570 next ATOM;
571 }
572 $NextRadius = $Radius + 1;
573
574 # Go over neighborhood atoms and their successor connected atoms at this radius and collect
575 # (BondOrder AtomIdentifier) values for bonded atom pairs. Additionally, keep track of atom and bonds
576 # for the neighorhoods to remove identifieres generated from structurally duplicate features.
577 #
578 my(%BondOrdersAndAtomIdentifiers);
579
580 %BondOrdersAndAtomIdentifiers = ();
581 $SuccessorAtomCount = 0;
582
583 NEIGHBORHOODS: for $NeighborhoodAtomsWithSuccessorAtomsRef (@{$This->{AtomNeighborhoods}{$Radius}{$AtomID}}) {
584 ($NeighborhoodAtom, $NeighborhoodAtomSuccessorAtomsRef) = @{$NeighborhoodAtomsWithSuccessorAtomsRef};
585
586 # Any connected successors for the NeighborhoodAtom?
587 if (!@{$NeighborhoodAtomSuccessorAtomsRef}) {
588 next NEIGHBORHOODS;
589 }
590 SUCCESSORATOM: for $SuccessorAtom (@{$NeighborhoodAtomSuccessorAtomsRef}) {
591 if ($SuccessorAtom->IsHydrogen()) {
592 # Skip successor hydrogen atom...
593 next SUCCESSORATOM;
594 }
595 $SuccessorAtomID = $SuccessorAtom->GetID();
596 $SuccessorAtomCount++;
597
598 $Bond = $NeighborhoodAtom->GetBondToAtom($SuccessorAtom);
599 $BondOrder = $Bond->IsAromatic() ? "1.5" : $Bond->GetBondOrder();
600
601 if (!exists $BondOrdersAndAtomIdentifiers{$BondOrder}) {
602 @{$BondOrdersAndAtomIdentifiers{$BondOrder}} = ();
603 }
604 push @{$BondOrdersAndAtomIdentifiers{$BondOrder}}, $This->{AtomIdentifiers}{$Radius}{$SuccessorAtomID};
605 }
606 }
607 if (!$SuccessorAtomCount) {
608 next ATOM;
609 }
610 # Assign a new atom identifier at the NextRadius level...
611 $This->_AssignAtomIdentifierToAtomNeighborhood($AtomID, $Radius, \%BondOrdersAndAtomIdentifiers);
612 }
613 }
614 return $This;
615 }
616
617 # Generate and assign atom indentifier for AtomID using atom neighborhood at next radius level...
618 #
619 sub _AssignAtomIdentifierToAtomNeighborhood {
620 my($This, $AtomID, $Radius, $BondOrdersAndAtomIdentifiersRef) = @_;
621 my($NextRadius, $AtomIdentifier, $SuccessorAtomIdentifier, $BondOrder, $AtomIdentifierString, @AtomIndentifiersInfo);
622
623 $NextRadius = $Radius + 1;
624
625 @AtomIndentifiersInfo = ();
626
627 $AtomIdentifier = $This->{AtomIdentifiers}{$Radius}{$AtomID};
628 push @AtomIndentifiersInfo, ($NextRadius, $AtomIdentifier);
629
630 # Sort out successor atom bond order and identifier pairs by bond order followed by atom identifiers
631 # in order to make the final atom identifier graph invariant...
632 #
633 for $BondOrder (sort { $a <=> $b } keys %{$BondOrdersAndAtomIdentifiersRef}) {
634 for $SuccessorAtomIdentifier (sort { $a <=> $b } @{$BondOrdersAndAtomIdentifiersRef->{$BondOrder}}) {
635 push @AtomIndentifiersInfo, ($BondOrder, $SuccessorAtomIdentifier);
636 }
637 }
638 $AtomIdentifierString = join("", @AtomIndentifiersInfo);
639 $AtomIdentifier = TextUtil::HashCode($AtomIdentifierString);
640
641 # Assign atom identifier to the atom at next radius level...
642 $This->{AtomIdentifiers}{$NextRadius}{$AtomID} = $AtomIdentifier;
643
644 return $This;
645 }
646
647 # Remove duplicates atom identifiers...
648 #
649 sub _RemoveDuplicateAtomIdentifiers {
650 my($This) = @_;
651
652 $This->_RemoveDuplicateIdentifiersByValue();
653 $This->_RemoveStructurallyDuplicateIdenfiers();
654
655 return $This;
656 }
657
658 # Remove duplicate identifiers at each radius level by just using their value...
659 #
660 sub _RemoveDuplicateIdentifiersByValue {
661 my($This) = @_;
662 my($Radius, $Atom, $AtomID, $AtomIdentifier);
663
664 for $Radius (0 .. $This->{NeighborhoodRadius}) {
665 ATOM: for $Atom (@{$This->{Atoms}}) {
666 $AtomID = $Atom->GetID();
667 if (!exists $This->{AtomIdentifiers}{$Radius}{$AtomID}) {
668 next ATOM;
669 }
670 $AtomIdentifier = $This->{AtomIdentifiers}{$Radius}{$AtomID};
671 if (exists $This->{UniqueAtomIdentifiers}{$Radius}{$AtomIdentifier}) {
672 # It's a duplicate atom idenfier at this radius level...
673 $This->{UniqueAtomIdentifiersCount}{$Radius}{$AtomIdentifier} += 1;
674 next ATOM;
675 }
676 $This->{UniqueAtomIdentifiers}{$Radius}{$AtomIdentifier} = $AtomID;
677 $This->{UniqueAtomIdentifiersCount}{$Radius}{$AtomIdentifier} = 1;
678 }
679 }
680 return $This;
681 }
682
683 # Remove structurally duplicate identifiers at each radius level...
684 #
685 # Methodology:
686 # . For unquie atom identifiers at each radius level, assign complete structure features
687 # in terms all the bonds involved to generate that identifier
688 # . Use the complete structure features to remover atom identifiers which are
689 # structurally equivalent which can also be at earlier radii levels
690 #
691 #
692 sub _RemoveStructurallyDuplicateIdenfiers {
693 my($This) = @_;
694 my($Radius, $AtomID, $AtomIdentifier, $SimilarAtomIdentifierRadius, $SimilarAtomIdentifier);
695
696 # Setup structure features...
697 $This->_SetupStructureFeaturesForAtomIDsInvolvedInUniqueIdentifiers();
698
699 # Identify structurally unqiue identifiers...
700 for $Radius (0 .. $This->{NeighborhoodRadius}) {
701 ATOMIDENTIFIER: for $AtomIdentifier (sort { $a <=> $b } keys %{$This->{UniqueAtomIdentifiers}{$Radius}}) {
702 $AtomID = $This->{UniqueAtomIdentifiers}{$Radius}{$AtomIdentifier};
703
704 ($SimilarAtomIdentifierRadius, $SimilarAtomIdentifier) = $This->_FindStructurallySimilarAtomIdentifier($Radius, $AtomID, $AtomIdentifier);
705 if ($SimilarAtomIdentifier) {
706 # Current atom identifier is similar to an earlier structurally unique atom identifier...
707 $This->{StructurallyUniqueAtomIdentifiersCount}{$SimilarAtomIdentifierRadius}{$SimilarAtomIdentifier} += $This->{UniqueAtomIdentifiersCount}{$Radius}{$AtomIdentifier};
708 next ATOMIDENTIFIER;
709 }
710 $This->{StructurallyUniqueAtomIdentifiers}{$Radius}{$AtomIdentifier} = $AtomID;
711
712 # Set structurally unique atom identifier count to the unique atom identifiers count...
713 $This->{StructurallyUniqueAtomIdentifiersCount}{$Radius}{$AtomIdentifier} = $This->{UniqueAtomIdentifiersCount}{$Radius}{$AtomIdentifier};
714 }
715 }
716 return $This;
717 }
718
719 # Set final fingerpritns vector...
720 #
721 sub _SetFinalFingerprints {
722 my($This) = @_;
723
724 # Mark successful generation of fingerprints...
725 $This->{FingerprintsGenerated} = 1;
726
727 if ($This->{Type} =~ /^ExtendedConnectivity$/i) {
728 $This->_SetFinalExtendedConnectivityFingerprints();
729 }
730 elsif ($This->{Type} =~ /^ExtendedConnectivityCount$/i) {
731 $This->_SetFinalExtendedConnectivityCountFingerprints();
732 }
733 elsif ($This->{Type} =~ /^ExtendedConnectivityBits$/i) {
734 $This->_SetFinalExtendedConnectivityBitsFingerprints();
735 }
736
737 return $This;
738 }
739
740 # Set final extended connectivity fingerpritns vector...
741 #
742 sub _SetFinalExtendedConnectivityFingerprints {
743 my($This) = @_;
744 my($Radius, $AtomIdentifier, @AtomIdentifiers);
745
746 @AtomIdentifiers = ();
747
748 for $Radius (0 .. $This->{NeighborhoodRadius}) {
749 for $AtomIdentifier (sort { $a <=> $b } keys %{$This->{StructurallyUniqueAtomIdentifiers}{$Radius}}) {
750 push @AtomIdentifiers, $AtomIdentifier;
751 }
752 }
753 # Add atom identifiers to fingerprint vector...
754 $This->{FingerprintsVector}->AddValues(\@AtomIdentifiers);
755
756 return $This;
757 }
758
759 # Set final extended connectivity count fingerpritns vector...
760 #
761 sub _SetFinalExtendedConnectivityCountFingerprints {
762 my($This) = @_;
763 my($Radius, $AtomIdentifier, $AtomIdentifierCount, @AtomIdentifiers, @AtomIdentifiersCount);
764
765 @AtomIdentifiers = (); @AtomIdentifiersCount = ();
766
767 for $Radius (0 .. $This->{NeighborhoodRadius}) {
768 for $AtomIdentifier (sort { $a <=> $b } keys %{$This->{StructurallyUniqueAtomIdentifiers}{$Radius}}) {
769 $AtomIdentifierCount = $This->{StructurallyUniqueAtomIdentifiersCount}{$Radius}{$AtomIdentifier};
770 push @AtomIdentifiers, $AtomIdentifier;
771 push @AtomIdentifiersCount, $AtomIdentifierCount;
772 }
773 }
774 # Add atom identifiers to fingerprint vector as value IDs...
775 $This->{FingerprintsVector}->AddValueIDs(\@AtomIdentifiers);
776
777 # Add atom identifiers to count to fingerprint vector as values...
778 $This->{FingerprintsVector}->AddValues(\@AtomIdentifiersCount);
779
780 return $This;
781 }
782
783 # Set final extended connectivity bits fingerpritns vector...
784 #
785 sub _SetFinalExtendedConnectivityBitsFingerprints {
786 my($This) = @_;
787 my($Radius, $AtomIdentifier, $FingerprintsBitVector, $Size, $SkipBitPosCheck, $AtomIdentifierBitPos, $SetBitNum);
788
789 $FingerprintsBitVector = $This->{FingerprintsBitVector};
790
791 $Size = $This->{Size};
792
793 $SkipBitPosCheck = 1;
794
795 for $Radius (0 .. $This->{NeighborhoodRadius}) {
796 for $AtomIdentifier (keys %{$This->{StructurallyUniqueAtomIdentifiers}{$Radius}}) {
797 # Set random number seed...
798 if ($This->{UsePerlCoreRandom}) {
799 CORE::srand($AtomIdentifier);
800 }
801 else {
802 MathUtil::srandom($AtomIdentifier);
803 }
804
805 # Set bit position...
806 $AtomIdentifierBitPos = $This->{UsePerlCoreRandom} ? int(CORE::rand($Size)) : int(MathUtil::random($Size));
807 $FingerprintsBitVector->SetBit($AtomIdentifierBitPos, $SkipBitPosCheck);
808 }
809 }
810 return $This;
811 }
812
813
814 # Identify structurally unique identifiers by comparing structure features involved in
815 # generating identifiear by comparing it agains all the previous structurally unique
816 # identifiers...
817 #
818 sub _FindStructurallySimilarAtomIdentifier {
819 my($This, $SpecifiedRadius, $SpecifiedAtomID, $SpecifiedAtomIdentifier) = @_;
820 my($Radius, $AtomID, $AtomIdentifier, $FeatureAtomCount, $FeatureAtomIDsRef, $SpecifiedFeatureAtomID, $SpecifiedFeatureAtomCount, $SpecifiedFeatureAtomIDsRef);
821
822 if ($SpecifiedRadius == 0) {
823 # After duplicate removal by value, all identifier at radius level 0 would be structurally unique...
824 return (undef, undef);
825 }
826
827 $SpecifiedFeatureAtomCount = $This->{StructureFeatures}{AtomCount}{$SpecifiedRadius}{$SpecifiedAtomID};
828 $SpecifiedFeatureAtomIDsRef = $This->{StructureFeatures}{AtomIDs}{$SpecifiedRadius}{$SpecifiedAtomID};
829
830 # No need to compare features at radius 0...
831 for $Radius (1 .. $SpecifiedRadius) {
832 ATOMIDENTIFIER: for $AtomIdentifier (keys %{$This->{StructurallyUniqueAtomIdentifiers}{$Radius}}) {
833 $AtomID = $This->{StructurallyUniqueAtomIdentifiers}{$Radius}{$AtomIdentifier};
834
835 $FeatureAtomCount = $This->{StructureFeatures}{AtomCount}{$Radius}{$AtomID};
836 $FeatureAtomIDsRef = $This->{StructureFeatures}{AtomIDs}{$Radius}{$AtomID};
837
838 if ($SpecifiedFeatureAtomCount != $FeatureAtomCount) {
839 # Couldn't be structurally equivalent...
840 next ATOMIDENTIFIER;
841 }
842 for $SpecifiedFeatureAtomID (keys % {$SpecifiedFeatureAtomIDsRef}) {
843 if (! exists $FeatureAtomIDsRef->{$SpecifiedFeatureAtomID}) {
844 # For structural equivalency, all atom in specified feature must also be present in a previously
845 # identified structurally unique structure feature...
846 next ATOMIDENTIFIER;
847 }
848 }
849 # Found structurally equivalent feature...
850 return ($Radius, $AtomIdentifier);
851 }
852 }
853 return (undef, undef);
854 }
855
856 # Setup structure features for atom IDs involved in unique atom identifiers at all
857 # radii level...
858 #
859 sub _SetupStructureFeaturesForAtomIDsInvolvedInUniqueIdentifiers {
860 my($This) = @_;
861 my($Radius, $PreviousRadius, $Atom, $AtomID, $AtomIdentifier, $NeighborhoodAtomID, $NeighborhoodAtomsWithSuccessorAtomsRef, $NeighborhoodAtom, $NeighborhoodAtomSuccessorAtomsRef, %AtomIDs);
862
863 $This->_InitializeStructureFeatures();
864
865 # Collect atom IDs involved in unique atom identifiers...
866 %AtomIDs = ();
867 for $Radius (0 .. $This->{NeighborhoodRadius}) {
868 for $AtomIdentifier (keys %{$This->{UniqueAtomIdentifiers}{$Radius}}) {
869 $AtomID = $This->{UniqueAtomIdentifiers}{$Radius}{$AtomIdentifier};
870 $AtomIDs{$AtomID} = $AtomID;
871 }
872 }
873
874 # Setup structure features...
875 for $Radius (0 .. $This->{NeighborhoodRadius}) {
876 for $AtomID (keys %AtomIDs) {
877 my($StructureFeatureAtomCount, %StructureFeatureAtomIDs);
878
879 $StructureFeatureAtomCount = 0;
880 %StructureFeatureAtomIDs = ();
881
882 # Get partial structure features for the atom at previous radius level...
883 $PreviousRadius = $Radius - 1;
884 if ($PreviousRadius >= 0) {
885 $StructureFeatureAtomCount += $This->{StructureFeatures}{AtomCount}{$PreviousRadius}{$AtomID};
886 %StructureFeatureAtomIDs = %{$This->{StructureFeatures}{AtomIDs}{$PreviousRadius}{$AtomID}};
887 }
888
889 # Get all neighborhood atom at this radius level...
890 if (exists($This->{AtomNeighborhoods}{$Radius}) && exists($This->{AtomNeighborhoods}{$Radius}{$AtomID})) {
891 NEIGHBORHOODS: for $NeighborhoodAtomsWithSuccessorAtomsRef (@{$This->{AtomNeighborhoods}{$Radius}{$AtomID}}) {
892 ($NeighborhoodAtom, $NeighborhoodAtomSuccessorAtomsRef) = @{$NeighborhoodAtomsWithSuccessorAtomsRef};
893 if ($NeighborhoodAtom->IsHydrogen()) {
894 next NEIGHBORHOODS;
895 }
896 $NeighborhoodAtomID = $NeighborhoodAtom->GetID();
897 $StructureFeatureAtomCount++;
898 $StructureFeatureAtomIDs{$NeighborhoodAtomID} = $NeighborhoodAtomID;
899 }
900 }
901
902 # Assign structure features to atom at this radius level...
903 $This->{StructureFeatures}{AtomCount}{$Radius}{$AtomID} = $StructureFeatureAtomCount;
904 $This->{StructureFeatures}{AtomIDs}{$Radius}{$AtomID} = \%StructureFeatureAtomIDs;
905 }
906 }
907 return $This;
908 }
909
910 # Intialize structure features at each radius level...
911 #
912 sub _InitializeStructureFeatures {
913 my($This) = @_;
914 my($Radius, $CurrentRadius, $Atom, $AtomID);
915
916 # Initialize all structure features...
917
918 %{$This->{StructureFeatures}} = ();
919 %{$This->{StructureFeatures}{AtomCount}} = ();
920 %{$This->{StructureFeatures}{AtomIDs}} = ();
921
922 $Radius = $This->{NeighborhoodRadius};
923 for $CurrentRadius (0 .. $Radius) {
924 # Structure features for at specific radii accessed using atom IDs...
925 %{$This->{StructureFeatures}{AtomCount}{$CurrentRadius}} = ();
926 %{$This->{StructureFeatures}{AtomIDs}{$CurrentRadius}} = ();
927 }
928 return $This;
929 }
930
931 # Cache appropriate molecule data...
932 #
933 sub _SetupMoleculeDataCache {
934 my($This) = @_;
935
936 # Get all non-hydrogen atoms...
937 my($NegateAtomCheckMethod);
938 $NegateAtomCheckMethod = 1;
939 @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms("IsHydrogen", $NegateAtomCheckMethod);
940
941 return $This;
942 }
943
944 # Clear cached molecule data...
945 #
946 sub _ClearMoleculeDataCache {
947 my($This) = @_;
948
949 @{$This->{Atoms}} = ();
950
951 return $This;
952 }
953
954 # Initialize atom indentifier type information...
955 #
956 # Current supported values:
957 #
958 # AtomicInvariantsAtomTypes, FunctionalClassAtomTypes, DREIDINGAtomTypes, EStateAtomTypes,
959 # MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes
960 #
961 sub _InitializeAtomIdentifierTypeInformation {
962 my($This) = @_;
963
964 IDENTIFIERTYPE: {
965 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
966 $This->_InitializeAtomicInvariantsAtomTypesInformation();
967 last IDENTIFIERTYPE;
968 }
969 if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
970 $This->_InitializeFunctionalClassAtomTypesInformation();
971 last IDENTIFIERTYPE;
972 }
973 if ($This->{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
974 # Nothing to do for now...
975 last IDENTIFIERTYPE;
976 }
977 carp "Warning: ${ClassName}->_InitializeAtomIdentifierTypeInformation: Unknown atom indentifier type $This->{AtomIdentifierType}...";
978 }
979 return $This;
980 }
981
982 # Initialize atomic invariants atom types, generated by AtomTypes::AtomicInvariantsAtomTypes
983 # class, to use for generating initial atom identifiers...
984 #
985 # Let:
986 # AS = Atom symbol corresponding to element symbol
987 #
988 # X<n> = Number of non-hydrogen atom neighbors or heavy atoms attached to atom
989 # BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms attached to atom
990 # LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms attached to atom
991 # SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
992 # DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
993 # TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
994 # H<n> = Number of implicit and explicit hydrogens for atom
995 # Ar = Aromatic annotation indicating whether atom is aromatic
996 # RA = Ring atom annotation indicating whether atom is a ring
997 # FC<+n/-n> = Formal charge assigned to atom
998 # MN<n> = Mass number indicating isotope other than most abundant isotope
999 # SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or 3 (triplet)
1000 #
1001 # Then:
1002 #
1003 # Atom type generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to:
1004 #
1005 # AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n>
1006 #
1007 # Except for AS which is a required atomic invariant in atom types, all other atomic invariants are
1008 # optional.
1009 #
1010 # Default atomic invariants used for generating inital atom identifiers are [ Ref 24 ]:
1011 #
1012 # AS, X<n>, BO<n>, H<n>, FC<+n/-n>, MN<n>
1013 #
1014 # In addition to usage of abbreviations for specifying atomic invariants, the following descriptive words
1015 # are also allowed:
1016 #
1017 # X : NumOfNonHydrogenAtomNeighbors or NumOfHeavyAtomNeighbors
1018 # BO : SumOfBondOrdersToNonHydrogenAtoms or SumOfBondOrdersToHeavyAtoms
1019 # LBO : LargestBondOrderToNonHydrogenAtoms or LargestBondOrderToHeavyAtoms
1020 # SB : NumOfSingleBondsToNonHydrogenAtoms or NumOfSingleBondsToHeavyAtoms
1021 # DB : NumOfDoubleBondsToNonHydrogenAtoms or NumOfDoubleBondsToHeavyAtoms
1022 # TB : NumOfTripleBondsToNonHydrogenAtoms or NumOfTripleBondsToHeavyAtoms
1023 # H : NumOfImplicitAndExplicitHydrogens
1024 # Ar : Aromatic
1025 # RA : RingAtom
1026 # FC : FormalCharge
1027 # MN : MassNumber
1028 # SM : SpinMultiplicity
1029 #
1030 sub _InitializeAtomicInvariantsAtomTypesInformation {
1031 my($This) = @_;
1032
1033 # Default atomic invariants to use for generating initial atom identifiers are: AS, X, BO, LBO, H, FC
1034 #
1035 @{$This->{AtomicInvariantsToUse}} = ();
1036 @{$This->{AtomicInvariantsToUse}} = ('AS', 'X', 'BO', 'H', 'FC', 'MN');
1037
1038 return $This;
1039 }
1040
1041 # Initialize functional class atom types, generated by AtomTypes::FunctionalClassAtomTypes
1042 # class, to use for generating initial atom identifiers...
1043 #
1044 # Let:
1045 # HBD: HydrogenBondDonor
1046 # HBA: HydrogenBondAcceptor
1047 # PI : PositivelyIonizable
1048 # NI : NegativelyIonizable
1049 # Ar : Aromatic
1050 # Hal : Halogen
1051 # H : Hydrophobic
1052 # RA : RingAtom
1053 # CA : ChainAtom
1054 #
1055 # Then:
1056 #
1057 # Functiononal class atom type specification for an atom corresponds to:
1058 #
1059 # Ar.CA.H.HBA.HBD.Hal.NI.PI.RA
1060 #
1061 # Default functional classes used are: HBD, HBA, PI, NI, Ar, Hal
1062 #
1063 # FunctionalAtomTypes are assigned using the following definitions [ Ref 60-61, Ref 65-66 ]:
1064 #
1065 # HydrogenBondDonor: NH, NH2, OH
1066 # HydrogenBondAcceptor: N[!H], O
1067 # PositivelyIonizable: +, NH2
1068 # NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH
1069 #
1070 sub _InitializeFunctionalClassAtomTypesInformation {
1071 my($This) = @_;
1072
1073 # Default functional class atom typess to use for generating initial atom identifiers
1074 # are: HBD, HBA, PI, NI, Ar, Hal
1075 #
1076 @{$This->{FunctionalClassesToUse}} = ();
1077 @{$This->{FunctionalClassesToUse}} = ('HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal');
1078
1079 return $This;
1080 }
1081
1082 # Set atomic invariants to use for generation of intial atom indentifiers...
1083 #
1084 sub SetAtomicInvariantsToUse {
1085 my($This, @Values) = @_;
1086 my($FirstValue, $TypeOfFirstValue, $AtomicInvariant, $SpecifiedAtomicInvariant, @SpecifiedAtomicInvariants, @AtomicInvariantsToUse);
1087
1088 if (!@Values) {
1089 carp "Warning: ${ClassName}->SetAtomicInvariantsToUse: No values specified...";
1090 return;
1091 }
1092
1093 if ($This->{AtomIdentifierType} !~ /^AtomicInvariantsAtomTypes$/i) {
1094 carp "Warning: ${ClassName}->SetFunctionalAtomTypesToUse: AtomicInvariantsToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}...";
1095 return;
1096 }
1097
1098 $FirstValue = $Values[0];
1099 $TypeOfFirstValue = ref $FirstValue;
1100
1101 @SpecifiedAtomicInvariants = ();
1102 @AtomicInvariantsToUse = ();
1103
1104 if ($TypeOfFirstValue =~ /^ARRAY/) {
1105 push @SpecifiedAtomicInvariants, @{$FirstValue};
1106 }
1107 else {
1108 push @SpecifiedAtomicInvariants, @Values;
1109 }
1110
1111 # Make sure specified AtomicInvariants are valid...
1112 for $SpecifiedAtomicInvariant (@SpecifiedAtomicInvariants) {
1113 if (!AtomTypes::AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($SpecifiedAtomicInvariant)) {
1114 croak "Error: ${ClassName}->SetAtomicInvariantsToUse: Specified atomic invariant, $SpecifiedAtomicInvariant, is not supported...\n ";
1115 }
1116 $AtomicInvariant = $SpecifiedAtomicInvariant;
1117 push @AtomicInvariantsToUse, $AtomicInvariant;
1118 }
1119
1120 # Set atomic invariants to use...
1121 @{$This->{AtomicInvariantsToUse}} = ();
1122 push @{$This->{AtomicInvariantsToUse}}, @AtomicInvariantsToUse;
1123
1124 return $This;
1125 }
1126
1127 # Set functional classes to use for generation of intial atom indentifiers...
1128 #
1129 sub SetFunctionalClassesToUse {
1130 my($This, @Values) = @_;
1131 my($FirstValue, $TypeOfFirstValue, $FunctionalClass, $SpecifiedFunctionalClass, @SpecifiedFunctionalClasses, @FunctionalClassesToUse);
1132
1133 if (!@Values) {
1134 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: No values specified...";
1135 return;
1136 }
1137
1138 if ($This->{AtomIdentifierType} !~ /^FunctionalClassAtomTypes$/i) {
1139 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: FunctionalClassesToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}...";
1140 return;
1141 }
1142
1143 $FirstValue = $Values[0];
1144 $TypeOfFirstValue = ref $FirstValue;
1145
1146 @SpecifiedFunctionalClasses = ();
1147 @FunctionalClassesToUse = ();
1148
1149 if ($TypeOfFirstValue =~ /^ARRAY/) {
1150 push @SpecifiedFunctionalClasses, @{$FirstValue};
1151 }
1152 else {
1153 push @SpecifiedFunctionalClasses, @Values;
1154 }
1155
1156 # Make sure specified FunctionalClasses are valid...
1157 for $SpecifiedFunctionalClass (@SpecifiedFunctionalClasses) {
1158 if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedFunctionalClass)) {
1159 croak "Error: ${ClassName}->SetFunctionalClassesToUse: Specified functional class, $SpecifiedFunctionalClass, is not supported...\n ";
1160 }
1161 push @FunctionalClassesToUse, $SpecifiedFunctionalClass;
1162 }
1163
1164 # Set functional classes to use...
1165 @{$This->{FunctionalClassesToUse}} = ();
1166 push @{$This->{FunctionalClassesToUse}}, @FunctionalClassesToUse;
1167
1168 return $This;
1169 }
1170
1171 # Return a string containg data for ExtendedConnectivityFingerprints object...
1172 sub StringifyExtendedConnectivityFingerprints {
1173 my($This) = @_;
1174 my($ExtendedConnectivityFingerprintsString);
1175
1176 $ExtendedConnectivityFingerprintsString = "InitialAtomIdentifierType: $This->{AtomIdentifierType}; NeighborhoodRadius: $This->{NeighborhoodRadius}";
1177
1178 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
1179 my($AtomicInvariant, @AtomicInvariants, @AtomicInvariantsOrder, %AvailableAtomicInvariants);
1180
1181 @AtomicInvariantsOrder = AtomTypes::AtomicInvariantsAtomTypes::GetAtomicInvariantsOrder();
1182 %AvailableAtomicInvariants = AtomTypes::AtomicInvariantsAtomTypes::GetAvailableAtomicInvariants();
1183
1184 for $AtomicInvariant (@AtomicInvariantsOrder) {
1185 push @AtomicInvariants, "$AtomicInvariant: $AvailableAtomicInvariants{$AtomicInvariant}";
1186 }
1187
1188 $ExtendedConnectivityFingerprintsString .= "; AtomicInvariantsToUse: <" . TextUtil::JoinWords(\@{$This->{AtomicInvariantsToUse}}, ", ", 0) . ">";
1189 $ExtendedConnectivityFingerprintsString .= "; AtomicInvariantsOrder: <" . TextUtil::JoinWords(\@AtomicInvariantsOrder, ", ", 0) . ">";
1190 $ExtendedConnectivityFingerprintsString .= "; AvailableAtomicInvariants: <" . TextUtil::JoinWords(\@AtomicInvariants, ", ", 0) . ">";
1191 }
1192 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
1193 my($FunctionalClass, @FunctionalClasses, @FunctionalClassesOrder, %AvailableFunctionalClasses);
1194
1195 @FunctionalClassesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder();
1196 %AvailableFunctionalClasses = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses();
1197
1198 for $FunctionalClass (@FunctionalClassesOrder) {
1199 push @FunctionalClasses, "$FunctionalClass: $AvailableFunctionalClasses{$FunctionalClass}";
1200 }
1201
1202 $ExtendedConnectivityFingerprintsString .= "; FunctionalClassesToUse: <" . TextUtil::JoinWords(\@{$This->{FunctionalClassesToUse}}, ", ", 0) . ">";
1203 $ExtendedConnectivityFingerprintsString .= "; FunctionalClassesOrder: <" . TextUtil::JoinWords(\@FunctionalClassesOrder, ", ", 0) . ">";
1204 $ExtendedConnectivityFingerprintsString .= "; AvailableFunctionalClasses: <" . TextUtil::JoinWords(\@FunctionalClasses, ", ", 0) . ">";
1205 }
1206
1207 if ($This->{Type} =~ /^ExtendedConnectivityBits$/i) {
1208 # Size...
1209 $ExtendedConnectivityFingerprintsString .= "; Size: $This->{Size}; MinSize: $This->{MinSize}; MaxSize: $This->{MaxSize}";
1210
1211 # Fingerprint bit density and num of bits set...
1212 my($NumOfSetBits, $BitDensity);
1213 $NumOfSetBits = $This->{FingerprintsBitVector}->GetNumOfSetBits();
1214 $BitDensity = $This->{FingerprintsBitVector}->GetFingerprintsBitDensity();
1215 $ExtendedConnectivityFingerprintsString .= "; NumOfOnBits: $NumOfSetBits; BitDensity: $BitDensity";
1216
1217 $ExtendedConnectivityFingerprintsString .= "; FingerprintsBitVector: < $This->{FingerprintsBitVector} >";
1218 }
1219 else {
1220 # Number of identifiers...
1221 $ExtendedConnectivityFingerprintsString .= "; NumOfIdentifiers: " . $This->{FingerprintsVector}->GetNumOfValues();
1222
1223 # FingerprintsVector...
1224 $ExtendedConnectivityFingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >";
1225 }
1226
1227 return $ExtendedConnectivityFingerprintsString;
1228 }
1229
1230 1;
1231
1232 __END__
1233
1234 =head1 NAME
1235
1236 ExtendedConnectivityFingerprints
1237
1238 =head1 SYNOPSIS
1239
1240 use Fingerprints::ExtendedConnectivityFingerprints;
1241
1242 use Fingerprints::ExtendedConnectivityFingerprints qw(:all);
1243
1244 =head1 DESCRIPTION
1245
1246 ExtendedConnectivityFingerprints [ Ref 48, Ref 52 ] class provides the following methods:
1247
1248 new, GenerateFingerprints, GetDescription, SetAtomIdentifierType,
1249 SetAtomicInvariantsToUse, SetFunctionalClassesToUse, SetNeighborhoodRadius,
1250 StringifyExtendedConnectivityFingerprints
1251
1252 B<ExtendedConnectivityFingerprints> is derived from B<Fingerprints> class which in turn
1253 is derived from B<ObjectProperty> base class that provides methods not explicitly defined
1254 in B<ExtendedConnectivityFingerprints>, B<Fingerprints> or B<ObjectProperty> classes using Perl's
1255 AUTOLOAD functionality. These methods are generated on-the-fly for a specified object property:
1256
1257 Set<PropertyName>(<PropertyValue>);
1258 $PropertyValue = Get<PropertyName>();
1259 Delete<PropertyName>();
1260
1261 The current release of MayaChemTools supports generation of B<ExtendedConnectivityFingerprints>
1262 corresponding to following B<AtomtomIdentifierTypes>:
1263
1264 AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes,
1265 FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes,
1266 SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes
1267
1268 Based on the values specified for B<AtomIdentifierType>, B<AtomicInvariantsToUse>
1269 and B<FunctionalClassesToUse>, initial atom types are assigned to all non-hydrogen atoms in
1270 a molecule and these atom types strings are converted into initial atom identifier integers using
1271 B<TextUtil::HashCode> function. The duplicate atom identifiers are removed.
1272
1273 For B<NeighborhoodRadius> value of I<0>, the initial set of unique atom identifiers comprises
1274 the molecule fingerprints. Otherwise, atom neighborhoods are generated for each non-hydrogen
1275 atom up-to specified B<NeighborhoodRadius> value. For each non-hydrogen central atom
1276 at a specific radius, its neighbors at next radius level along with their bond orders and previously
1277 calculated atom identifiers are collected which in turn are used to generate a new integer
1278 atom identifier; the bond orders and atom identifier pairs list is first sorted by bond order
1279 followed by atom identifiers to make these values graph invariant.
1280
1281 After integer atom identifiers have been generated for all non-hydrogen atoms at all specified
1282 neighborhood radii, the duplicate integer atom identifiers corresponding to same hash code
1283 value generated using B<TextUtil::HashCode> are tracked by keeping the atom identifiers at
1284 lower radius. Additionally, all structurally duplicate integer atom identifiers at each specified
1285 radius are also tracked by identifying equivalent atom and bonds corresponding to substructures
1286 used for generating atom identifier and keeping integer atom identifier with lowest value.
1287
1288 For I<ExtendedConnnectivity> value of fingerprints B<Type>, the duplicate identifiers are
1289 removed from the list and the unique atom identifiers constitute the extended connectivity
1290 fingerprints of a molecule.
1291
1292 For I<ExtendedConnnectivityCount> value of fingerprints B<Type>, the occurrence of each
1293 unique atom identifiers appears is counted and the unique atom identifiers along with their
1294 count constitute the extended connectivity fingerprints of a molecule.
1295
1296 For I<ExtendedConnectivityBits> value of fingerprints B<-m, --mode>, the unique atom identifiers
1297 are used as a random number seed to generate a random integer value between 0 and B<--Size> which
1298 in turn is used to set corresponding bits in the fingerprint bit-vector string.
1299
1300 The current release of MayaChemTools generates the following types of extended connectivity
1301 fingerprints vector strings:
1302
1303 FingerprintsVector;ExtendedConnectivity:AtomicInvariantsAtomTypes:Radi
1304 us2;60;AlphaNumericalValues;ValuesString;73555770 333564680 352413391
1305 666191900 1001270906 1371674323 1481469939 1977749791 2006158649 21414
1306 08799 49532520 64643108 79385615 96062769 273726379 564565671 85514103
1307 5 906706094 988546669 1018231313 1032696425 1197507444 1331250018 1338
1308 532734 1455473691 1607485225 1609687129 1631614296 1670251330 17303...
1309
1310 FingerprintsVector;ExtendedConnectivityCount:AtomicInvariantsAtomTypes
1311 :Radius2;60;NumericalValues;IDsAndValuesString;73555770 333564680 3524
1312 13391 666191900 1001270906 1371674323 1481469939 1977749791 2006158649
1313 2141408799 49532520 64643108 79385615 96062769 273726379 564565671...;
1314 3 2 1 1 14 1 2 10 4 3 1 1 1 1 2 1 2 1 1 1 2 3 1 1 2 1 3 3 8 2 2 2 6 2
1315 1 2 1 1 2 1 1 1 2 1 1 2 1 2 1 1 1 1 1 1 1 1 1 2 1 1
1316
1317 FingerprintsBitVector;ExtendedConnectivityBits:AtomicInvariantsAtomTyp
1318 es:Radius2;1024;BinaryString;Ascending;0000000000000000000000000000100
1319 0000000001010000000110000011000000000000100000000000000000000000100001
1320 1000000110000000000000000000000000010011000000000000000000000000010000
1321 0000000000000000000000000010000000000000000001000000000000000000000000
1322 0000000000010000100001000000000000101000000000000000100000000000000...
1323
1324 FingerprintsBitVector;ExtendedConnectivityBits:AtomicInvariantsAtomTyp
1325 es:Radius2;1024;HexadecimalString;Ascending;000000010050c0600800000803
1326 0300000091000004000000020000100000000124008200020000000040020000000000
1327 2080000000820040010020000000008040000000000080001000000000400000000000
1328 4040000090000061010000000800200000000000001400000000020080000000000020
1329 00008020200000408000
1330
1331 FingerprintsVector;ExtendedConnectivity:FunctionalClassAtomTypes:Radiu
1332 s2;57;AlphaNumericalValues;ValuesString;24769214 508787397 850393286 8
1333 62102353 981185303 1231636850 1649386610 1941540674 263599683 32920567
1334 1 571109041 639579325 683993318 723853089 810600886 885767127 90326012
1335 7 958841485 981022393 1126908698 1152248391 1317567065 1421489994 1455
1336 632544 1557272891 1826413669 1983319256 2015750777 2029559552 20404...
1337
1338 FingerprintsVector;ExtendedConnectivityCount:FunctionalClassAtomTypes:
1339 Radius2;57;NumericalValues;IDsAndValuesString;24769214 508787397 85039
1340 3286 862102353 981185303 1231636850 1649386610 1941540674 263599683 32
1341 9205671 571109041 639579325 683993318 723853089 810600886 885767127...;
1342 1 1 1 10 2 22 3 1 3 3 1 1 1 3 2 2 1 2 2 2 3 1 1 1 1 1 14 1 1 1 1 1 1 2
1343 1 2 1 1 2 2 1 1 2 1 1 1 2 1 1 2 1 1 1 1 1 1 1
1344
1345 FingerprintsBitVector;ExtendedConnectivityBits:FunctionalClassAtomType
1346 s:Radius2;1024;BinaryString;Ascending;00000000000000000000100000000000
1347 0000000001000100000000001000000000000000000000000000000000101000000010
1348 0000001000000000010000000000000000000000000000000000000000000000000100
1349 0000000000001000000000000001000000000001001000000000000000000000000000
1350 0000000000000000100000000000001000000000000000000000000000000000000...
1351
1352 FingerprintsVector;ExtendedConnectivity:DREIDINGAtomTypes:Radius2;56;A
1353 lphaNumericalValues;ValuesString;280305427 357928343 721790579 1151822
1354 898 1207111054 1380963747 1568213839 1603445250 4559268 55012922 18094
1355 0813 335715751 534801009 684609658 829361048 972945982 999881534 10076
1356 55741 1213692591 1222032501 1224517934 1235687794 1244268533 152812070
1357 0 1629595024 1856308891 1978806036 2001865095 2096549435 172675415 ...
1358
1359 FingerprintsVector;ExtendedConnectivity:EStateAtomTypes:Radius2;62;Alp
1360 haNumericalValues;ValuesString;25189973 528584866 662581668 671034184
1361 926543080 1347067490 1738510057 1759600920 2034425745 2097234755 21450
1362 44754 96779665 180364292 341712110 345278822 386540408 387387308 50430
1363 1706 617094135 771528807 957666640 997798220 1158349170 1291258082 134
1364 1138533 1395329837 1420277211 1479584608 1486476397 1487556246 1566...
1365
1366 FingerprintsVector;ExtendedConnectivity:MMFF94AtomTypes:Radius2;64;Alp
1367 haNumericalValues;ValuesString;224051550 746527773 998750766 103704190
1368 2 1239701709 1248384926 1259447756 1521678386 1631549126 1909437580 20
1369 37095052 2104274756 2117729376 8770364 31445800 81450228 314289324 344
1370 041929 581773587 638555787 692022098 811840536 929651561 936421792 988
1371 636432 1048624296 1054288509 1369487579 1454058929 1519352190 17271...
1372
1373 FingerprintsVector;ExtendedConnectivity:SLogPAtomTypes:Radius2;71;Alph
1374 aNumericalValues;ValuesString;78989290 116507218 489454042 888737940 1
1375 162561799 1241797255 1251494264 1263717127 1471206899 1538061784 17654
1376 07295 1795036542 1809833874 2020454493 2055310842 2117729376 11868981
1377 56731842 149505242 184525155 196984339 288181334 481409282 556716568 6
1378 41915747 679881756 721736571 794256218 908276640 992898760 10987549...
1379
1380 FingerprintsVector;ExtendedConnectivity:SYBYLAtomTypes:Radius2;58;Alph
1381 aNumericalValues;ValuesString;199957044 313356892 455463968 465982819
1382 1225318176 1678585943 1883366064 1963811677 2117729376 113784599 19153
1383 8837 196629033 263865277 416380653 477036669 681527491 730724924 90906
1384 5537 1021959189 1133014972 1174311016 1359441203 1573452838 1661585138
1385 1668649038 1684198062 1812312554 1859266290 1891651106 2072549404 ...
1386
1387 FingerprintsVector;ExtendedConnectivity:TPSAAtomTypes:Radius2;47;Alpha
1388 NumericalValues;ValuesString;20818206 259344053 862102353 1331904542 1
1389 700688206 265614156 363161397 681332588 810600886 885767127 950172500
1390 951454814 1059668746 1247054493 1382302230 1399502637 1805025917 19189
1391 39561 2114677228 2126402271 8130483 17645742 32278373 149975755 160327
1392 654 256360355 279492740 291251259 317592700 333763396 972105960 101...
1393
1394 FingerprintsVector;ExtendedConnectivity:UFFAtomTypes:Radius2;56;AlphaN
1395 umericalValues;ValuesString;280305427 357928343 721790579 1151822898 1
1396 207111054 1380963747 1568213839 1603445250 4559268 55012922 180940813
1397 335715751 534801009 684609658 829361048 972945982 999881534 1007655741
1398 1213692591 1222032501 1224517934 1235687794 1244268533 1528120700 162
1399 9595024 1856308891 1978806036 2001865095 2096549435 172675415 18344...
1400
1401 =head2 METHODS
1402
1403 =over 4
1404
1405 =item B<new>
1406
1407 $NewExtendedConnectivityFingerprints = new ExtendedConnectivityFingerprints(
1408 %NamesAndValues);
1409
1410 Using specified I<ExtendedConnectivityFingerprints> property names and values hash, B<new>
1411 method creates a new object and returns a reference to newly created B<ExtendedConnectivityFingerprints>
1412 object. By default, the following properties are initialized:
1413
1414 Molecule = ''
1415 Type = 'ExtendedConnectivity'
1416 NeighborhoodRadius = 2
1417 AtomIdentifierType = ''
1418 AtomicInvariantsToUse = ['AS', 'X', 'BO', 'H', 'FC', 'MN']
1419 FunctionalClassesToUse = ['HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal']
1420
1421 Examples:
1422
1423 $ExtendedConnectivityFingerprints = new ExtendedConnectivityFingerprints(
1424 'Molecule' => $Molecule,
1425 'AtomIdentifierType' =>
1426 'AtomicInvariantsAtomTypes');
1427
1428 $ExtendedConnectivityFingerprints = new ExtendedConnectivityFingerprints(
1429 'Type' => 'ExtendedConnectivityCount',
1430 'Molecule' => $Molecule,
1431 'AtomIdentifierType' =>
1432 'AtomicInvariantsAtomTypes');
1433
1434 $ExtendedConnectivityFingerprints = new ExtendedConnectivityFingerprints(
1435 'Type' => 'ExtendedConnectivityBits',
1436 'Molecule' => $Molecule,
1437 'Size' => 1024,
1438 'AtomIdentifierType' =>
1439 'AtomicInvariantsAtomTypes');
1440
1441 $ExtendedConnectivityFingerprints = new ExtendedConnectivityFingerprints(
1442 'Type' => 'ExtendedConnectivity',
1443 'Molecule' => $Molecule,
1444 'NeighborhoodRadius' => 2,
1445 'AtomIdentifierType' =>
1446 'AtomicInvariantsAtomTypes',
1447 'AtomicInvariantsToUse' =>
1448 ['AS', 'X', 'BO', 'H', 'FC', 'MN'] );
1449
1450 $ExtendedConnectivityFingerprints = new ExtendedConnectivityFingerprints(
1451 'Type' => 'ExtendedConnectivity',
1452 'Molecule' => $Molecule,
1453 'NeighborhoodRadius' => 2,
1454 'AtomIdentifierType' =>
1455 'FunctionalClassAtomTypes',
1456 'FunctionalClassesToUse' =>
1457 ['HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal'] );
1458
1459 $ExtendedConnectivityFingerprints = new ExtendedConnectivityFingerprints(
1460 'Type' => 'ExtendedConnectivity',
1461 'Molecule' => $Molecule,;
1462 'AtomIdentifierType' =>
1463 'MMFF94AtomTypes');
1464
1465 $ExtendedConnectivityFingerprints = new ExtendedConnectivityFingerprints(
1466 'Type' => 'ExtendedConnectivityCount',
1467 'Molecule' => $Molecule,;
1468 'AtomIdentifierType' =>
1469 'MMFF94AtomTypes');
1470
1471 $ExtendedConnectivityFingerprints = new ExtendedConnectivityFingerprints(
1472 'Type' => 'ExtendedConnectivityCount',
1473 'Molecule' => $Molecule,;
1474 'AtomIdentifierType' =>
1475 'SLogPAtomTypes');
1476
1477 $ExtendedConnectivityFingerprints = new ExtendedConnectivityFingerprints(
1478 'Type' => 'ExtendedConnectivity',
1479 'Molecule' => $Molecule,;
1480 'AtomIdentifierType' =>
1481 'SLogPAtomTypes');
1482
1483 $ExtendedConnectivityFingerprints = new ExtendedConnectivityFingerprints(
1484 'Type' => 'ExtendedConnectivity',
1485 'Molecule' => $Molecule,;
1486 'AtomIdentifierType' =>
1487 'SYBYLAtomTypes');
1488
1489 $ExtendedConnectivityFingerprints->GenerateFingerprints();
1490 print "$ExtendedConnectivityFingerprints\n";
1491
1492 =item B<GenerateFingerprints>
1493
1494 $ExtendedConnectivityFingerprints->GenerateFingerprints();
1495
1496 Generates extended connectivity fingerprints and returns I<ExtendedConnectivityFingerprints>.
1497
1498 =item B<GetDescription>
1499
1500 $Description = $ExtendedConnectivityFingerprints->GetDescription();
1501
1502 Returns a string containing description of extended connectivity fingerprints
1503 fingerprints.
1504
1505 =item B<SetAtomIdentifierType>
1506
1507 $ExtendedConnectivityFingerprints->SetAtomIdentifierType($IdentifierType);
1508
1509 Sets atom I<IdentifierType> to use during extended connectivity fingerprints generation and
1510 returns I<ExtendedConnectivityFingerprints>.
1511
1512 Possible values: I<AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes,
1513 FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes,
1514 TPSAAtomTypes, UFFAtomTypes>.
1515
1516 =item B<SetAtomicInvariantsToUse>
1517
1518 $ExtendedConnectivityFingerprints->SetAtomicInvariantsToUse($ValuesRef);
1519 $ExtendedConnectivityFingerprints->SetAtomicInvariantsToUse(@Values);
1520
1521 Sets atomic invariants to use during I<AtomicInvariantsAtomTypes> value of I<AtomIdentifierType>
1522 for extended connectivity fingerprints generation and returns I<ExtendedConnectivityFingerprints>.
1523
1524 Possible values for atomic invariants are: I<AS, X, BO, LBO, SB, DB, TB,
1525 H, Ar, RA, FC, MN, SM>. Default value [ Ref 24 ]: I<AS,X,BO,H,FC,MN>.
1526
1527 The atomic invariants abbreviations correspond to:
1528
1529 AS = Atom symbol corresponding to element symbol
1530
1531 X<n> = Number of non-hydrogen atom neighbors or heavy atoms
1532 BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms
1533 LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms
1534 SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms
1535 DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms
1536 TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms
1537 H<n> = Number of implicit and explicit hydrogens for atom
1538 Ar = Aromatic annotation indicating whether atom is aromatic
1539 RA = Ring atom annotation indicating whether atom is a ring
1540 FC<+n/-n> = Formal charge assigned to atom
1541 MN<n> = Mass number indicating isotope other than most abundant isotope
1542 SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or
1543 3 (triplet)
1544
1545 Atom type generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to:
1546
1547 AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n>
1548
1549 Except for AS which is a required atomic invariant in atom types, all other atomic invariants are
1550 optional. Atom type specification doesn't include atomic invariants with zero or undefined values.
1551
1552 In addition to usage of abbreviations for specifying atomic invariants, the following descriptive words
1553 are also allowed:
1554
1555 X : NumOfNonHydrogenAtomNeighbors or NumOfHeavyAtomNeighbors
1556 BO : SumOfBondOrdersToNonHydrogenAtoms or SumOfBondOrdersToHeavyAtoms
1557 LBO : LargestBondOrderToNonHydrogenAtoms or LargestBondOrderToHeavyAtoms
1558 SB : NumOfSingleBondsToNonHydrogenAtoms or NumOfSingleBondsToHeavyAtoms
1559 DB : NumOfDoubleBondsToNonHydrogenAtoms or NumOfDoubleBondsToHeavyAtoms
1560 TB : NumOfTripleBondsToNonHydrogenAtoms or NumOfTripleBondsToHeavyAtoms
1561 H : NumOfImplicitAndExplicitHydrogens
1562 Ar : Aromatic
1563 RA : RingAtom
1564 FC : FormalCharge
1565 MN : MassNumber
1566 SM : SpinMultiplicity
1567
1568 I<AtomTypes::AtomicInvariantsAtomTypes> module is used to assign atomic invariant
1569 atom types.
1570
1571 =item B<SetFunctionalClassesToUse>
1572
1573 $ExtendedConnectivityFingerprints->SetFunctionalClassesToUse($ValuesRef);
1574 $ExtendedConnectivityFingerprints->SetFunctionalClassesToUse(@Values);
1575
1576 Sets functional classes invariants to use during I<FunctionalClassAtomTypes> value of I<AtomIdentifierType>
1577 for extended connectivity fingerprints generation and returns I<ExtendedConnectivityFingerprints>.
1578
1579 Possible values for atom functional classes are: I<Ar, CA, H, HBA, HBD, Hal, NI, PI, RA>.
1580 Default value [ Ref 24 ]: I<HBD,HBA,PI,NI,Ar,Hal>.
1581
1582 The functional class abbreviations correspond to:
1583
1584 HBD: HydrogenBondDonor
1585 HBA: HydrogenBondAcceptor
1586 PI : PositivelyIonizable
1587 NI : NegativelyIonizable
1588 Ar : Aromatic
1589 Hal : Halogen
1590 H : Hydrophobic
1591 RA : RingAtom
1592 CA : ChainAtom
1593
1594 Functional class atom type specification for an atom corresponds to:
1595
1596 Ar.CA.H.HBA.HBD.Hal.NI.PI.RA or None
1597
1598 I<AtomTypes::FunctionalClassAtomTypes> module is used to assign functional class atom
1599 types. It uses following definitions [ Ref 60-61, Ref 65-66 ]:
1600
1601 HydrogenBondDonor: NH, NH2, OH
1602 HydrogenBondAcceptor: N[!H], O
1603 PositivelyIonizable: +, NH2
1604 NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH
1605
1606 =item B<SetNeighborhoodRadius>
1607
1608 $ExtendedConnectivityFingerprints->SetNeighborhoodRadius($Radius);
1609
1610 Sets neighborhood radius to use during extended connectivity fingerprints generation and
1611 returns I<ExtendedConnectivityFingerprints>.
1612
1613 =item B<StringifyExtendedConnectivityFingerprints>
1614
1615 $String = $Fingerprints->StringifyExtendedConnectivityFingerprints();
1616
1617 Returns a string containing information about I<ExtendedConnectivityFingerprints> object.
1618
1619 =back
1620
1621 =head1 AUTHOR
1622
1623 Manish Sud <msud@san.rr.com>
1624
1625 =head1 SEE ALSO
1626
1627 Fingerprints.pm, FingerprintsStringUtil.pm, AtomNeighborhoodsFingerprints.pm,
1628 AtomTypesFingerprints.pm, EStateIndiciesFingerprints.pm, MACCSKeys.pm,
1629 PathLengthFingerprints.pm, TopologicalAtomPairsFingerprints.pm,
1630 TopologicalAtomTripletsFingerprints.pm, TopologicalAtomTorsionsFingerprints.pm,
1631 TopologicalPharmacophoreAtomPairsFingerprints.pm,
1632 TopologicalPharmacophoreAtomTripletsFingerprints.pm
1633
1634
1635 =head1 COPYRIGHT
1636
1637 Copyright (C) 2015 Manish Sud. All rights reserved.
1638
1639 This file is part of MayaChemTools.
1640
1641 MayaChemTools is free software; you can redistribute it and/or modify it under
1642 the terms of the GNU Lesser General Public License as published by the Free
1643 Software Foundation; either version 3 of the License, or (at your option)
1644 any later version.
1645
1646 =cut