0
|
1 package Fingerprints::PathLengthFingerprints;
|
|
2 #
|
|
3 # $RCSfile: PathLengthFingerprints.pm,v $
|
|
4 # $Date: 2015/02/28 20:48:54 $
|
|
5 # $Revision: 1.39 $
|
|
6 #
|
|
7 # Author: Manish Sud <msud@san.rr.com>
|
|
8 #
|
|
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
|
|
10 #
|
|
11 # This file is part of MayaChemTools.
|
|
12 #
|
|
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
|
|
14 # the terms of the GNU Lesser General Public License as published by the Free
|
|
15 # Software Foundation; either version 3 of the License, or (at your option) any
|
|
16 # later version.
|
|
17 #
|
|
18 # MayaChemTools is distributed in the hope that it will be useful, but without
|
|
19 # any warranty; without even the implied warranty of merchantability of fitness
|
|
20 # for a particular purpose. See the GNU Lesser General Public License for more
|
|
21 # details.
|
|
22 #
|
|
23 # You should have received a copy of the GNU Lesser General Public License
|
|
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
|
|
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
|
|
26 # Boston, MA, 02111-1307, USA.
|
|
27 #
|
|
28
|
|
29 use strict;
|
|
30 use Carp;
|
|
31 use Exporter;
|
|
32 use TextUtil ();
|
|
33 use MathUtil ();
|
|
34 use Fingerprints::Fingerprints;
|
|
35 use Molecule;
|
|
36 use AtomTypes::AtomicInvariantsAtomTypes;
|
|
37 use AtomTypes::DREIDINGAtomTypes;
|
|
38 use AtomTypes::EStateAtomTypes;
|
|
39 use AtomTypes::FunctionalClassAtomTypes;
|
|
40 use AtomTypes::MMFF94AtomTypes;
|
|
41 use AtomTypes::SLogPAtomTypes;
|
|
42 use AtomTypes::SYBYLAtomTypes;
|
|
43 use AtomTypes::TPSAAtomTypes;
|
|
44 use AtomTypes::UFFAtomTypes;
|
|
45
|
|
46 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
|
|
47
|
|
48 @ISA = qw(Fingerprints::Fingerprints Exporter);
|
|
49 @EXPORT = qw();
|
|
50 @EXPORT_OK = qw();
|
|
51
|
|
52 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]);
|
|
53
|
|
54 # Setup class variables...
|
|
55 my($ClassName);
|
|
56 _InitializeClass();
|
|
57
|
|
58 # Overload Perl functions...
|
|
59 use overload '""' => 'StringifyPathLengthFingerprints';
|
|
60
|
|
61 # Class constructor...
|
|
62 sub new {
|
|
63 my($Class, %NamesAndValues) = @_;
|
|
64
|
|
65 # Initialize object...
|
|
66 my $This = $Class->SUPER::new();
|
|
67 bless $This, ref($Class) || $Class;
|
|
68 $This->_InitializePathLengthFingerprints();
|
|
69
|
|
70 $This->_InitializePathLengthFingerprintsProperties(%NamesAndValues);
|
|
71
|
|
72 return $This;
|
|
73 }
|
|
74
|
|
75 # Initialize object data...
|
|
76 #
|
|
77 sub _InitializePathLengthFingerprints {
|
|
78 my($This) = @_;
|
|
79
|
|
80 # Type of fingerprint to generate...
|
|
81 #
|
|
82 # PathLengthBits - A bit vector indicating presence/absence of atom paths
|
|
83 # PathLengthCount - A vector containing count of atom paths
|
|
84 #
|
|
85 $This->{Type} = '';
|
|
86
|
|
87 # Type of vector: FingerprintsBitVector or FingerprintsVector
|
|
88 $This->{VectorType} = '';
|
|
89
|
|
90 # Set default mininum, maximum, and default size. Although any arbitrary size can
|
|
91 # be specified, bit vector used to store bits work on a vector size which is
|
|
92 # power of 2 and additonal bits are automatically added and cleared.
|
|
93 #
|
|
94 $This->{Size} = 1024;
|
|
95
|
|
96 $This->{MinSize} = 32;
|
|
97 $This->{MaxSize} = 2**32;
|
|
98
|
|
99 # Minimum and maximum path lengths to use for fingerprints generation...
|
|
100 $This->{MinLength} = 1;
|
|
101 $This->{MaxLength} = 8;
|
|
102
|
|
103 # Numner of bits to set for each atom path for FingerprintsBitVector...
|
|
104 $This->{NumOfBitsToSetPerPath} = 1;
|
|
105
|
|
106 # Atom identifier type to use for path atoms during fingerprints generation...
|
|
107 #
|
|
108 # Currently supported values are: AtomicInvariantsAtomTypes, DREIDINGAtomTypes,
|
|
109 # EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes,
|
|
110 # SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes
|
|
111 #
|
|
112 $This->{AtomIdentifierType} = '';
|
|
113
|
|
114 # Atom types assigned to atoms...
|
|
115 %{$This->{AssignedAtomTypes}} = ();
|
|
116
|
|
117 # For molecules containing rings, atom paths starting from each atom can be traversed in four
|
|
118 # different ways:
|
|
119 #
|
|
120 # . Atom paths without any rings and sharing of bonds in traversed paths.
|
|
121 # . Atom paths containing rings and without any sharing of bonds in traversed paths
|
|
122 # . All possible atom paths without any rings and sharing of bonds in traversed paths
|
|
123 # . All possible atom paths containing rings and with sharing of bonds in traversed paths.
|
|
124 #
|
|
125 # Atom path traversal is terminated at the last ring atom. For molecules containing no rings,
|
|
126 # first two and last two types described above are equivalent.
|
|
127 #
|
|
128 # AllowSharedBonds and AllowRings variables allow generation of differen types of paths
|
|
129 # to be used for fingerprints generation.
|
|
130 #
|
|
131 # In addition to atom symbols, bond symbols are also used to generate a string
|
|
132 # for atom paths. These atom paths strings are hased to a 32 bit integer key which
|
|
133 # in turn is used as a seed for a random number generation in range of 1 to fingerprint
|
|
134 # size for setting corresponding bit in bit vector.
|
|
135 #
|
|
136 # UseBondSymbols variable allow generation of atom path strings and consequently fingerprints.
|
|
137 #
|
|
138 # Combination of AllowSharedBonds, AllowRings, and UseBondSymbols allow generation of
|
|
139 # 8 different types of path length fingerprints:
|
|
140 #
|
|
141 # AllowSharedBonds AllowRings UseBondSymbols PathLengthFingerprintsType
|
|
142 #
|
|
143 # No No Yes AtomPathsNoCyclesWithBondSymbols
|
|
144 # No Yes Yes AtomPathsWithCyclesWithBondSymbols
|
|
145 #
|
|
146 # Yes No Yes AllAtomPathsNoCyclesWithBondSymbols
|
|
147 # Yes Yes Yes AllAtomPathsWithCyclesWithBondSymbols [ DEFAULT ]
|
|
148 #
|
|
149 # No No No AtomPathsNoCyclesNoBondSymbols
|
|
150 # No Yes No AtomPathsWithCyclesNoBondSymbols
|
|
151 #
|
|
152 # Yes No No AllAtomPathsNoCyclesNoBondSymbols
|
|
153 # Yes Yes No AllAtomPathsWithCyclesNoWithBondSymbols
|
|
154 #
|
|
155 #
|
|
156
|
|
157 # By default, atom paths starting from atoms are allowed to share bonds already traversed...
|
|
158 $This->{AllowSharedBonds} = 1;
|
|
159
|
|
160 # By default rings are included in paths...
|
|
161 $This->{AllowRings} = 1;
|
|
162
|
|
163 # By default bond symbols are included in atom path strings...
|
|
164 $This->{UseBondSymbols} = 1;
|
|
165
|
|
166 # By default only structurally unique atom paths are used for generation
|
|
167 # atom path strings...
|
|
168 $This->{UseUniquePaths} = 1;
|
|
169
|
|
170 # Random number generator to use during generation of fingerprints bit-vector
|
|
171 # string: Perl CORE::rand or MayaChemTools MathUtil::random function.
|
|
172 #
|
|
173 # The random number generator implemented in MayaChemTools is a variant of
|
|
174 # linear congruential generator (LCG) as described by Miller et al. [ Ref 120 ].
|
|
175 # It is also referred to as Lehmer random number generator or Park-Miller
|
|
176 # random number generator.
|
|
177 #
|
|
178 # Unlike Perl's core random number generator function rand, the random number
|
|
179 # generator implemented in MayaChemTools, MathUtil::random, generates consistent
|
|
180 # random values across different platformsfor a specific random seed and leads
|
|
181 # to generation of portable fingerprints bit-vector strings.
|
|
182 #
|
|
183 $This->{UsePerlCoreRandom} = 1;
|
|
184
|
|
185 # Bond symbols to use during generation of atom path strings...
|
|
186 %{$This->{BondOrderToSymbol}} = ();
|
|
187 %{$This->{BondOrderToSymbol}} = ('1' => '', '1.5' => ':', '2' => '=', '3' => '#');
|
|
188
|
|
189 # BondSymbols map to use for bonded atom IDs to use during atom path strings...
|
|
190 %{$This->{BondSymbols}} = ();
|
|
191
|
|
192 # Path atom IDs to remove duplicate paths...
|
|
193 %{$This->{UniqueLinearAtomPathsIDs}} = ();
|
|
194 %{$This->{UniqueCyclicAtomPathsIDs}} = ();
|
|
195
|
|
196 # Reference to all the atom paths upto specified path length...
|
|
197 $This->{AtomPathsRef} = '';
|
|
198
|
|
199 # Atom paths strings created using specified atom types and bond symbols...
|
|
200 %{$This->{AtomPathsStrings}} = ();
|
|
201 }
|
|
202
|
|
203 # Initialize class ...
|
|
204 sub _InitializeClass {
|
|
205 #Class name...
|
|
206 $ClassName = __PACKAGE__;
|
|
207 }
|
|
208
|
|
209 # Initialize object properties....
|
|
210 sub _InitializePathLengthFingerprintsProperties {
|
|
211 my($This, %NamesAndValues) = @_;
|
|
212
|
|
213 my($Name, $Value, $MethodName);
|
|
214 while (($Name, $Value) = each %NamesAndValues) {
|
|
215 $MethodName = "Set${Name}";
|
|
216 $This->$MethodName($Value);
|
|
217 }
|
|
218
|
|
219 # Make sure molecule object was specified...
|
|
220 if (!exists $NamesAndValues{Molecule}) {
|
|
221 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule...";
|
|
222 }
|
|
223
|
|
224 if (!exists $NamesAndValues{Type}) {
|
|
225 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying Type...";
|
|
226 }
|
|
227
|
|
228 if (!exists $NamesAndValues{AtomIdentifierType}) {
|
|
229 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying AtomIdentifierType...";
|
|
230 }
|
|
231
|
|
232 # Make sure it's power of 2...
|
|
233 if (exists $NamesAndValues{Size}) {
|
|
234 if (!TextUtil::IsNumberPowerOfNumber($NamesAndValues{Size}, 2)) {
|
|
235 croak "Error: ${ClassName}->New: Specified size value, $NamesAndValues{Size}, must be power of 2...";
|
|
236 }
|
|
237 }
|
|
238
|
|
239 if ($This->{Type} =~ /^PathLengthBits$/i) {
|
|
240 $This->_InitializePathLengthBits();
|
|
241 }
|
|
242 elsif ($This->{Type} =~ /^PathLengthCount$/i) {
|
|
243 $This->_InitializePathLengthCount();
|
|
244 }
|
|
245 else {
|
|
246 croak "Error: ${ClassName}->_InitializePathLengthFingerprintsProperties: Unknown PathLength type: $This->{Type}; Supported PathLength type : PathLengthBits or PathLengthCount......";
|
|
247 }
|
|
248
|
|
249 return $This;
|
|
250 }
|
|
251
|
|
252 # Initialize PathLength bits...
|
|
253 #
|
|
254 sub _InitializePathLengthBits {
|
|
255 my($This) = @_;
|
|
256
|
|
257 # Vector type...
|
|
258 $This->{VectorType} = 'FingerprintsBitVector';
|
|
259
|
|
260 $This->_InitializeFingerprintsBitVector();
|
|
261
|
|
262 return $This;
|
|
263 }
|
|
264
|
|
265 # Initialize PathLength key count...
|
|
266 #
|
|
267 sub _InitializePathLengthCount {
|
|
268 my($This) = @_;
|
|
269
|
|
270 # Vector type and type of values...
|
|
271 $This->{VectorType} = 'FingerprintsVector';
|
|
272 $This->{FingerprintsVectorType} = 'NumericalValues';
|
|
273
|
|
274 $This->_InitializeFingerprintsVector();
|
|
275
|
|
276 return $This;
|
|
277 }
|
|
278
|
|
279 # Set type...
|
|
280 #
|
|
281 sub SetType {
|
|
282 my($This, $Type) = @_;
|
|
283
|
|
284 if ($This->{Type}) {
|
|
285 croak "Error: ${ClassName}->SetType: Can't change type: It's already set...";
|
|
286 }
|
|
287
|
|
288 if ($Type =~ /^PathLengthBits$/i) {
|
|
289 $This->{Type} = 'PathLengthBits';;
|
|
290 }
|
|
291 elsif ($Type =~ /^PathLengthCount$/i) {
|
|
292 $This->{Type} = 'PathLengthCount';;
|
|
293 }
|
|
294 else {
|
|
295 croak "Error: ${ClassName}->SetType: Unknown PathLength keys: $Type; Supported PathLength types: PathLengthBits or PathLengthCount...";
|
|
296 }
|
|
297 return $This;
|
|
298 }
|
|
299
|
|
300 # Disable vector type change...
|
|
301 #
|
|
302 sub SetVectorType {
|
|
303 my($This, $Type) = @_;
|
|
304
|
|
305 croak "Error: ${ClassName}->SetVectorType: Can't change vector type...";
|
|
306
|
|
307 return $This;
|
|
308 }
|
|
309
|
|
310 # Disable vector type change...
|
|
311 #
|
|
312 sub SetFingerprintsVectorType {
|
|
313 my($This, $Type) = @_;
|
|
314
|
|
315 croak "Error: ${ClassName}->SetFingerprintsVectorType: Can't change fingerprints vector type...";
|
|
316
|
|
317 return $This;
|
|
318 }
|
|
319
|
|
320 # Set atom identifier type to use for path length atom identifiers...
|
|
321 #
|
|
322 sub SetAtomIdentifierType {
|
|
323 my($This, $IdentifierType) = @_;
|
|
324
|
|
325 if ($IdentifierType !~ /^(AtomicInvariantsAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|FunctionalClassAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
|
|
326 croak "Error: ${ClassName}->SetAtomIdentifierType: Specified value, $IdentifierType, for AtomIdentifierType is not vaild. Supported types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, and UFFAtomTypes.";
|
|
327 }
|
|
328
|
|
329 if ($This->{AtomIdentifierType}) {
|
|
330 croak "Error: ${ClassName}->SetAtomIdentifierType: Can't change atom identifier type: It's already set...";
|
|
331 }
|
|
332
|
|
333 $This->{AtomIdentifierType} = $IdentifierType;
|
|
334
|
|
335 # Initialize atom identifier type information...
|
|
336 $This->_InitializeAtomIdentifierTypeInformation();
|
|
337
|
|
338 return $This;
|
|
339 }
|
|
340
|
|
341 # Set minimum path length...
|
|
342 #
|
|
343 sub SetMinLength {
|
|
344 my($This, $Value) = @_;
|
|
345
|
|
346 if (!TextUtil::IsPositiveInteger($Value)) {
|
|
347 croak "Error: ${ClassName}->SetMinLength: MinLength value, $Value, is not valid: It must be a positive integer...";
|
|
348 }
|
|
349 $This->{MinLength} = $Value;
|
|
350
|
|
351 return $This;
|
|
352 }
|
|
353
|
|
354 # Set maximum path length...
|
|
355 #
|
|
356 sub SetMaxLength {
|
|
357 my($This, $Value) = @_;
|
|
358
|
|
359 if (!TextUtil::IsPositiveInteger($Value)) {
|
|
360 croak "Error: ${ClassName}->SetMaxLength: MaxLength value, $Value, is not valid: It must be a positive integer...";
|
|
361 }
|
|
362 $This->{MaxLength} = $Value;
|
|
363
|
|
364 return $This;
|
|
365 }
|
|
366
|
|
367 # Set number of bits to set for each path...
|
|
368 #
|
|
369 sub SetNumOfBitsToSetPerPath {
|
|
370 my($This, $Value) = @_;
|
|
371
|
|
372 if (!TextUtil::IsPositiveInteger($Value)) {
|
|
373 croak "Error: ${ClassName}->SetNumOfBitsToSetPerPath: NumOfBitsToSetPerPath value, $Value, is not valid: It must be a positive integer...";
|
|
374 }
|
|
375 $This->{NumOfBitsToSetPerPath} = $Value;
|
|
376
|
|
377 return $This;
|
|
378 }
|
|
379
|
|
380 # Generate fingerprints description...
|
|
381 #
|
|
382 sub GetDescription {
|
|
383 my($This) = @_;
|
|
384
|
|
385 # Is description explicity set?
|
|
386 if (exists $This->{Description}) {
|
|
387 return $This->{Description};
|
|
388 }
|
|
389
|
|
390 # Generate fingerprints description...
|
|
391
|
|
392 return "$This->{Type}:$This->{AtomIdentifierType}:MinLength$This->{MinLength}:MaxLength$This->{MaxLength}";
|
|
393 }
|
|
394
|
|
395 # Generate path length fingerprints...
|
|
396 #
|
|
397 sub GenerateFingerprints {
|
|
398 my($This) = @_;
|
|
399
|
|
400 if ($This->{MinLength} > $This->{MaxLength}) {
|
|
401 croak "Error: ${ClassName}->GenerateFingerprints: No fingerpritns generated: MinLength, $This->{MinLength}, must be <= MaxLength, $This->{MaxLength}...";
|
|
402 }
|
|
403
|
|
404 # Cache appropriate molecule data...
|
|
405 $This->_SetupMoleculeDataCache();
|
|
406
|
|
407 # Assign atom types to all atoms...
|
|
408 if (!$This->_AssignAtomTypes()) {
|
|
409 carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't assign valid $This->{AtomIdentifierType} to all atoms...";
|
|
410 return $This;
|
|
411 }
|
|
412
|
|
413 # Setup bond symbol map...
|
|
414 if ($This->{UseBondSymbols}) {
|
|
415 $This->_InitializeBondSymbols();
|
|
416 }
|
|
417
|
|
418 # Generate appropriate atom paths...
|
|
419 $This->_GenerateAtomPathsUpToMaxLength();
|
|
420
|
|
421 # Initialize atom path strings...
|
|
422 $This->_InitializeAtomPathsStrings();
|
|
423
|
|
424 # Generate appropriate atom path strings for unique atom paths...
|
|
425 $This->_GenerateAtomPathsStrings();
|
|
426
|
|
427 # Set final fingerprints...
|
|
428 $This->_SetFinalFingerprints();
|
|
429
|
|
430 # Clear cached molecule data...
|
|
431 $This->_ClearMoleculeDataCache();
|
|
432
|
|
433 return $This;
|
|
434 }
|
|
435
|
|
436 # Assign appropriate atom types to all atoms...
|
|
437 #
|
|
438 sub _AssignAtomTypes {
|
|
439 my($This) = @_;
|
|
440 my($SpecifiedAtomTypes, $Atom, $AtomID, $IgnoreHydrogens);
|
|
441
|
|
442 %{$This->{AssignedAtomTypes}} = ();
|
|
443 $IgnoreHydrogens = 0;
|
|
444
|
|
445 $SpecifiedAtomTypes = undef;
|
|
446
|
|
447 IDENTIFIERTYPE: {
|
|
448 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
|
|
449 $SpecifiedAtomTypes = new AtomTypes::AtomicInvariantsAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'AtomicInvariantsToUse' => $This->{AtomicInvariantsToUse});
|
|
450 last IDENTIFIERTYPE;
|
|
451 }
|
|
452
|
|
453 if ($This->{AtomIdentifierType} =~ /^DREIDINGAtomTypes$/i) {
|
|
454 $SpecifiedAtomTypes = new AtomTypes::DREIDINGAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
|
|
455 last IDENTIFIERTYPE;
|
|
456 }
|
|
457
|
|
458 if ($This->{AtomIdentifierType} =~ /^EStateAtomTypes$/i) {
|
|
459 $SpecifiedAtomTypes = new AtomTypes::EStateAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
|
|
460 last IDENTIFIERTYPE;
|
|
461 }
|
|
462
|
|
463 if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
|
|
464 $SpecifiedAtomTypes = new AtomTypes::FunctionalClassAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'FunctionalClassesToUse' => $This->{FunctionalClassesToUse});
|
|
465 last IDENTIFIERTYPE;
|
|
466 }
|
|
467
|
|
468 if ($This->{AtomIdentifierType} =~ /^MMFF94AtomTypes$/i) {
|
|
469 $SpecifiedAtomTypes = new AtomTypes::MMFF94AtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
|
|
470 last IDENTIFIERTYPE;
|
|
471 }
|
|
472
|
|
473 if ($This->{AtomIdentifierType} =~ /^SLogPAtomTypes$/i) {
|
|
474 $SpecifiedAtomTypes = new AtomTypes::SLogPAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
|
|
475 last IDENTIFIERTYPE;
|
|
476 }
|
|
477 if ($This->{AtomIdentifierType} =~ /^SYBYLAtomTypes$/i) {
|
|
478 $SpecifiedAtomTypes = new AtomTypes::SYBYLAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
|
|
479 last IDENTIFIERTYPE;
|
|
480 }
|
|
481
|
|
482 if ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) {
|
|
483 $SpecifiedAtomTypes = new AtomTypes::TPSAAtomTypes('Molecule' => $This->{Molecule}, 'IgnorePhosphorus' => 0, 'IgnoreSulfur' => 0);
|
|
484 last IDENTIFIERTYPE;
|
|
485 }
|
|
486
|
|
487 if ($This->{AtomIdentifierType} =~ /^UFFAtomTypes$/i) {
|
|
488 $SpecifiedAtomTypes = new AtomTypes::UFFAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
|
|
489 last IDENTIFIERTYPE;
|
|
490 }
|
|
491
|
|
492 croak "Error: ${ClassName}->_AssignAtomTypes: Unknown atom indentifier type $This->{AtomIdentifierType}...";
|
|
493 }
|
|
494
|
|
495 # Assign atom types...
|
|
496 $SpecifiedAtomTypes->AssignAtomTypes();
|
|
497
|
|
498 # Make sure atom types assignment is successful...
|
|
499 if (!$SpecifiedAtomTypes->IsAtomTypesAssignmentSuccessful()) {
|
|
500 return undef;
|
|
501 }
|
|
502
|
|
503 # Collect assigned atom types...
|
|
504 ATOM: for $Atom (@{$This->{Atoms}}) {
|
|
505 $AtomID = $Atom->GetID();
|
|
506 $This->{AssignedAtomTypes}{$AtomID} = $SpecifiedAtomTypes->GetAtomType($Atom);
|
|
507 }
|
|
508
|
|
509 return $This;
|
|
510 }
|
|
511
|
|
512 # Setup bond symbol map for atoms to speed up generation of path length identifiers
|
|
513 # during fingerprints generation...
|
|
514 #
|
|
515 sub _InitializeBondSymbols {
|
|
516 my($This) = @_;
|
|
517 my($Atom1, $Atom2, $AtomID1, $AtomID2, $Bond, $BondSymbol, $BondOrder);
|
|
518
|
|
519 %{$This->{BondSymbols}} = ();
|
|
520
|
|
521 if (!$This->{UseBondSymbols}) {
|
|
522 return $This;
|
|
523 }
|
|
524
|
|
525 for $Bond ($This->{Molecule}->GetBonds()) {
|
|
526 $BondOrder = $Bond->GetBondOrder();
|
|
527 $BondSymbol = $Bond->IsAromatic() ? ':' : (exists($This->{BondOrderToSymbol}{$BondOrder}) ? $This->{BondOrderToSymbol}{$BondOrder} : $BondOrder);
|
|
528 ($Atom1, $Atom2) = $Bond->GetAtoms();
|
|
529 $AtomID1 = $Atom1->GetID(); $AtomID2 = $Atom2->GetID();
|
|
530 if ($AtomID1 > $AtomID2) {
|
|
531 ($AtomID1, $AtomID2) = ($AtomID2, $AtomID1);
|
|
532 }
|
|
533
|
|
534 if (!exists $This->{BondSymbols}{$AtomID1}) {
|
|
535 %{$This->{BondSymbols}{$AtomID1}} = ();
|
|
536 }
|
|
537 $This->{BondSymbols}{$AtomID1}{$AtomID2} = $BondSymbol;
|
|
538 }
|
|
539 return $This;
|
|
540 }
|
|
541
|
|
542 # Get appropriate atom paths with length up to MaxLength...
|
|
543 #
|
|
544 sub _GenerateAtomPathsUpToMaxLength {
|
|
545 my($This) = @_;
|
|
546 my($PathLength, $AllowRings, $Molecule, $AtomPathsRef);
|
|
547
|
|
548 $PathLength = $This->{MaxLength};
|
|
549 $AllowRings = $This->{AllowRings};
|
|
550 $Molecule = $This->{Molecule};
|
|
551
|
|
552 if ($This->{AllowSharedBonds}) {
|
|
553 $AtomPathsRef = $Molecule->GetAllAtomPathsWithLengthUpto($PathLength, $AllowRings);
|
|
554 }
|
|
555 else {
|
|
556 $AtomPathsRef = $Molecule->GetAtomPathsWithLengthUpto($PathLength, $AllowRings);
|
|
557 }
|
|
558 $This->{AtomPathsRef} = $AtomPathsRef;
|
|
559
|
|
560 return $This;
|
|
561 }
|
|
562
|
|
563 # Initialize atom paths strings at various pathlength levels...
|
|
564 #
|
|
565 sub _InitializeAtomPathsStrings {
|
|
566 my($This) = @_;
|
|
567 my($PathLength);
|
|
568
|
|
569 %{$This->{AtomPathsStrings}} = ();
|
|
570
|
|
571 for $PathLength ($This->{MinLength} .. $This->{MaxLength}) {
|
|
572 %{$This->{AtomPathsStrings}{$PathLength}} = ();
|
|
573 }
|
|
574
|
|
575 return $This;
|
|
576 }
|
|
577
|
|
578 # Generate appropriate atom path strings for unique atom paths...
|
|
579 #
|
|
580 sub _GenerateAtomPathsStrings {
|
|
581 my($This, $PathAtomsRef) = @_;
|
|
582 my($PathLength, $MinPathLength, $UseUniquePaths);
|
|
583
|
|
584 $MinPathLength = $This->{MinLength};
|
|
585 $UseUniquePaths = $This->{UseUniquePaths};
|
|
586
|
|
587 PATHATOMS: for $PathAtomsRef (@{$This->{AtomPathsRef}}) {
|
|
588 $PathLength = scalar @{$PathAtomsRef};
|
|
589 if ($PathLength < $MinPathLength) {
|
|
590 next PATHATOMS;
|
|
591 }
|
|
592 if ($UseUniquePaths) {
|
|
593 $This->_GenerateAtomPathStringUsingUniquePath($PathAtomsRef);
|
|
594 }
|
|
595 else {
|
|
596 $This->_GenerateAtomPathString($PathAtomsRef);
|
|
597 }
|
|
598 }
|
|
599 return $This;
|
|
600 }
|
|
601
|
|
602 # Generate atom path string using unique path...
|
|
603 #
|
|
604 sub _GenerateAtomPathStringUsingUniquePath {
|
|
605 my($This, $PathAtomsRef) = @_;
|
|
606
|
|
607 if ($This->{AllowRings} && $This->_DoesAtomPathContainsCycle($PathAtomsRef)) {
|
|
608 $This->_GenerateAtomPathStringUsingUniquePathContainingCycle($PathAtomsRef);
|
|
609 }
|
|
610 else {
|
|
611 $This->_GenerateAtomPathStringUsingUniqueLinearPath($PathAtomsRef);
|
|
612 }
|
|
613 return $This;
|
|
614 }
|
|
615
|
|
616 # Generate atom path string for specified path containing no cycle...
|
|
617 #
|
|
618 sub _GenerateAtomPathStringUsingUniqueLinearPath {
|
|
619 my($This, $PathAtomsRef) = @_;
|
|
620
|
|
621 # Is it a unique linear atom path?
|
|
622 #
|
|
623 if (!$This->_IsUniqueLinearAtomPath($PathAtomsRef)) {
|
|
624 return $This;
|
|
625 }
|
|
626 $This->_GenerateAtomPathString($PathAtomsRef);
|
|
627
|
|
628 return $This;
|
|
629 }
|
|
630
|
|
631 # Is it a structurally unique linear path?
|
|
632 #
|
|
633 # For a path to be structurally unique, all of its atom IDs must be diffferent from any
|
|
634 # earlier path atom IDs. In order to generate atom path atom ID invariant of the atom
|
|
635 # order in the molecule, atom IDs are sorted numerically before generating the path ID.
|
|
636 #
|
|
637 # Notes:
|
|
638 # . Atom path ID doesn't reflect the order of atoms in the atom path.
|
|
639 #
|
|
640 sub _IsUniqueLinearAtomPath {
|
|
641 my($This, $PathAtomsRef) = @_;
|
|
642 my($AtomPathID, $PathLength, @PathAtomIDs);
|
|
643
|
|
644 @PathAtomIDs = ();
|
|
645 @PathAtomIDs = map { $_->GetID(); } @{$PathAtomsRef};
|
|
646
|
|
647 $AtomPathID = join '-', sort { $a <=> $b } @PathAtomIDs;
|
|
648 if (exists $This->{UniqueLinearAtomPathsIDs}{$AtomPathID}) {
|
|
649 return 0;
|
|
650 }
|
|
651
|
|
652 # It's a unique atom path...
|
|
653 $This->{UniqueLinearAtomPathsIDs}{$AtomPathID} = 1;
|
|
654
|
|
655 return 1;
|
|
656 }
|
|
657
|
|
658 # Generate atom path string for specified path containing a cycle...
|
|
659 #
|
|
660 sub _GenerateAtomPathStringUsingUniquePathContainingCycle {
|
|
661 my($This, $PathAtomsRef) = @_;
|
|
662
|
|
663 # Is it a unique atom path containing a cycle?
|
|
664 #
|
|
665 if (!$This->_IsUniqueAtomPathContainingCycle($PathAtomsRef)) {
|
|
666 return $This;
|
|
667 }
|
|
668
|
|
669 my($CycleClosingPathAtomIndex);
|
|
670 ($CycleClosingPathAtomIndex) = $This->_GetAtomPathCycleClosingAtomIndex($PathAtomsRef);
|
|
671
|
|
672 if ($CycleClosingPathAtomIndex == 0) {
|
|
673 $This->_GenerateUniqueAtomPathStringForPathCycle($PathAtomsRef);
|
|
674 }
|
|
675 else {
|
|
676 $This->_GenerateUniqueAtomPathStringForPathContainingCycle($PathAtomsRef, $CycleClosingPathAtomIndex);
|
|
677 }
|
|
678 return $This;
|
|
679 }
|
|
680
|
|
681 # Generate a unique atom path string for a cyclic path by generating atom path
|
|
682 # strings for all possible paths in the cycle and keeping the lexicographically smallest
|
|
683 # one.
|
|
684 #
|
|
685 # Although all the paths enumerated during atom path string generation are also
|
|
686 # present in the intial paths list, but structural uniqueness check would detect
|
|
687 # 'em earlier and this method ends being invoked only once for the first cyclic path.
|
|
688 #
|
|
689 # For atom paths containg same atom types and bond symbols, atom path strings
|
|
690 # would be same for the paths.
|
|
691 #
|
|
692 sub _GenerateUniqueAtomPathStringForPathCycle {
|
|
693 my($This, $PathAtomsRef) = @_;
|
|
694
|
|
695 if ($This->_AreAllPathAtomsSymbolsSame($PathAtomsRef) && $This->_AreAllPathBondSymbolsSame($PathAtomsRef)) {
|
|
696 return $This->_GenerateAtomPathString($PathAtomsRef);
|
|
697 }
|
|
698
|
|
699 # Generate all possible atom path strings and select the lexicographically smallest one...
|
|
700 my($Index, $PathLength, $FinalAtomPathString, $FirstAtomPathString, $LastIndex, $FirstPartIndex, $FirstPartStartIndex, $FirstPartEndIndex, $SecondPartIndex, $SecondPartStartIndex, $SecondPartEndIndex, $AtomPathSymbolsRef, $AtomPathString, $ReverseAtomPathString, @FirstPartPathAtoms, @SecondPartPathAtoms, @PathAtoms);
|
|
701
|
|
702 $PathLength = scalar @{$PathAtomsRef};
|
|
703 $LastIndex = $PathLength - 1;
|
|
704
|
|
705 $FinalAtomPathString = '';
|
|
706 $FirstAtomPathString = 1;
|
|
707
|
|
708 @FirstPartPathAtoms = (); @SecondPartPathAtoms = (); @PathAtoms = ();
|
|
709
|
|
710 for $Index (0 .. ($LastIndex - 1)) {
|
|
711 @FirstPartPathAtoms = (); @SecondPartPathAtoms = (); @PathAtoms = ();
|
|
712
|
|
713 $FirstPartStartIndex = 0; $FirstPartEndIndex = $Index - 1;
|
|
714 $SecondPartStartIndex = $Index; $SecondPartEndIndex = $LastIndex - 1;
|
|
715
|
|
716 # Get first part atoms...
|
|
717 for $FirstPartIndex ($FirstPartStartIndex .. $FirstPartEndIndex) {
|
|
718 push @FirstPartPathAtoms, $PathAtomsRef->[$FirstPartIndex];
|
|
719 }
|
|
720
|
|
721 # Get second part atoms...
|
|
722 for $SecondPartIndex ($SecondPartStartIndex .. $SecondPartEndIndex) {
|
|
723 push @SecondPartPathAtoms, $PathAtomsRef->[$SecondPartIndex];
|
|
724 }
|
|
725
|
|
726 # Get final list of path atoms...
|
|
727 if (@SecondPartPathAtoms) {
|
|
728 push @PathAtoms, @SecondPartPathAtoms;
|
|
729 }
|
|
730 if (@FirstPartPathAtoms) {
|
|
731 push @PathAtoms, @FirstPartPathAtoms;
|
|
732 }
|
|
733
|
|
734 # Complete the cycle by adding first atom as the last atom...
|
|
735 push @PathAtoms, $PathAtomsRef->[$SecondPartStartIndex];
|
|
736
|
|
737 # Generate atom path string...
|
|
738 $AtomPathSymbolsRef = $This->_GenerateAtomPathSymbols(\@PathAtoms);
|
|
739
|
|
740 $AtomPathString = join '', @{$AtomPathSymbolsRef};
|
|
741 $ReverseAtomPathString = join '', reverse @{$AtomPathSymbolsRef};
|
|
742
|
|
743 if ($ReverseAtomPathString le $AtomPathString) {
|
|
744 $AtomPathString = $ReverseAtomPathString;
|
|
745 }
|
|
746
|
|
747 # Update final atom path string...
|
|
748
|
|
749 if ($FirstAtomPathString) {
|
|
750 $FirstAtomPathString = 0;
|
|
751 $FinalAtomPathString = $AtomPathString;
|
|
752 }
|
|
753 else {
|
|
754 if ($AtomPathString le $FinalAtomPathString) {
|
|
755 $FinalAtomPathString = $AtomPathString;
|
|
756 }
|
|
757 }
|
|
758 }
|
|
759
|
|
760 # Set final atom path string...
|
|
761 #
|
|
762 if (exists $This->{AtomPathsStrings}{$PathLength}{$FinalAtomPathString}) {
|
|
763 $This->{AtomPathsStrings}{$PathLength}{$FinalAtomPathString} += 1;
|
|
764 }
|
|
765 else {
|
|
766 $This->{AtomPathsStrings}{$PathLength}{$FinalAtomPathString} = 1;
|
|
767 }
|
|
768
|
|
769 return $This;
|
|
770 }
|
|
771
|
|
772 #
|
|
773 # Generate a unique atom path string for paths containing a cycle closed by
|
|
774 # the specified atom index and the last atom index.
|
|
775 #
|
|
776 # The following methodology is used to generate atom path string which is
|
|
777 # independemt of initial atom ordering:
|
|
778 # . Generate atom paths string from first atom to the atom before the first cycle
|
|
779 # closing atom.
|
|
780 # . Generate atom path string from atoms from first cycle closing atom index to
|
|
781 # the last path atom in both forward and reverse order. And select the lexicographically
|
|
782 # smallest atom path string.
|
|
783 # . Combine atom path string generated in first step with second step to generate
|
|
784 # final atom path string.
|
|
785 #
|
|
786 sub _GenerateUniqueAtomPathStringForPathContainingCycle {
|
|
787 my($This, $PathAtomsRef, $CycleClosingAtomIndex) = @_;
|
|
788 my($Index, $PathLength, $LastIndex, $LinearPartStartIndex, $LinearPartEndIndex, $CyclicPartStartIndex, $CyclicPartEndIndex, $CyclicPartAtomPathSymbolsRef, $CyclicPartAtomPathString, $ReverseCyclicPartAtomPathString, $AtomPathString, $AtomPathSymbolsRef, @CyclicPartPathAtoms, @PathAtoms);
|
|
789
|
|
790 $PathLength = scalar @{$PathAtomsRef};
|
|
791 $LastIndex = $PathLength - 1;
|
|
792
|
|
793 @PathAtoms = ();
|
|
794
|
|
795 # Get path atoms corresponding to linear part of the path...
|
|
796 $LinearPartStartIndex = 0; $LinearPartEndIndex = $CycleClosingAtomIndex - 1;
|
|
797
|
|
798 for $Index ($LinearPartStartIndex .. $LinearPartEndIndex) {
|
|
799 push @PathAtoms, $PathAtomsRef->[$Index];
|
|
800 }
|
|
801
|
|
802 # Get atoms correcponding to cyclic part of the path...
|
|
803 @CyclicPartPathAtoms = ();
|
|
804 $CyclicPartStartIndex = $CycleClosingAtomIndex; $CyclicPartEndIndex = $LastIndex;
|
|
805
|
|
806 for $Index ($CyclicPartStartIndex .. $CyclicPartEndIndex) {
|
|
807 push @CyclicPartPathAtoms, $PathAtomsRef->[$Index];
|
|
808 }
|
|
809
|
|
810 # Setup a lexicographically smaller atom path string for cyclic part...
|
|
811
|
|
812 $CyclicPartAtomPathSymbolsRef = $This->_GenerateAtomPathSymbols(\@CyclicPartPathAtoms);
|
|
813 $CyclicPartAtomPathString = join '', @{$CyclicPartAtomPathSymbolsRef};
|
|
814 $ReverseCyclicPartAtomPathString = join '', reverse @{$CyclicPartAtomPathSymbolsRef};
|
|
815
|
|
816 # Setup atom path corresponding to linear part and lexigraphicall smaller cyclic part...
|
|
817
|
|
818 if ($ReverseCyclicPartAtomPathString le $CyclicPartAtomPathString) {
|
|
819 push @PathAtoms, reverse @CyclicPartPathAtoms;
|
|
820 }
|
|
821 else {
|
|
822 push @PathAtoms, @CyclicPartPathAtoms;
|
|
823 }
|
|
824
|
|
825 # Setup final atom path string...
|
|
826
|
|
827 $AtomPathSymbolsRef = $This->_GenerateAtomPathSymbols(\@PathAtoms);
|
|
828 $AtomPathString = join '', @{$AtomPathSymbolsRef};
|
|
829
|
|
830 if (exists $This->{AtomPathsStrings}{$PathLength}{$AtomPathString}) {
|
|
831 $This->{AtomPathsStrings}{$PathLength}{$AtomPathString} += 1;
|
|
832 }
|
|
833 else {
|
|
834 $This->{AtomPathsStrings}{$PathLength}{$AtomPathString} = 1;
|
|
835 }
|
|
836
|
|
837 return $This;
|
|
838 }
|
|
839
|
|
840 # Does atom path contain a cycle?
|
|
841 #
|
|
842 # For an atom path to contain cycle, it must satisfy the following conditions:
|
|
843 # . Pathlength >= 3
|
|
844 # . Last atom ID is equal to first atom ID or some other atom ID besides itself
|
|
845 #
|
|
846 sub _DoesAtomPathContainsCycle {
|
|
847 my($This, $PathAtomsRef) = @_;
|
|
848 my($PathLength);
|
|
849
|
|
850 $PathLength = scalar @{$PathAtomsRef};
|
|
851 if ($PathLength <= 2) {
|
|
852 return 0;
|
|
853 }
|
|
854
|
|
855 my($AtomIndex, $LastAtomIndex, $Atom, $AtomID, $LastAtom, $LastAtomID);
|
|
856
|
|
857 $LastAtomIndex = $PathLength - 1;
|
|
858 $LastAtom = $PathAtomsRef->[$LastAtomIndex];
|
|
859 $LastAtomID = $LastAtom->GetID();
|
|
860
|
|
861 # Look for atomID similar to last atom ID...
|
|
862 for $AtomIndex (0 .. ($LastAtomIndex - 1)) {
|
|
863 $Atom = $PathAtomsRef->[$AtomIndex];
|
|
864 $AtomID = $Atom->GetID();
|
|
865
|
|
866 if ($AtomID == $LastAtomID) {
|
|
867 # It's a cycle...
|
|
868 return 1;
|
|
869 }
|
|
870 }
|
|
871 return 0;
|
|
872 }
|
|
873
|
|
874 # Get atom path cycle closing atom index...
|
|
875 #
|
|
876 sub _GetAtomPathCycleClosingAtomIndex {
|
|
877 my($This, $PathAtomsRef) = @_;
|
|
878 my($AtomIndex, $LastAtomIndex, $Atom, $AtomID, $LastAtom, $LastAtomID, $PathLength);
|
|
879
|
|
880 $PathLength = scalar @{$PathAtomsRef};
|
|
881
|
|
882 $LastAtomIndex = $PathLength - 1;
|
|
883 $LastAtom = $PathAtomsRef->[$LastAtomIndex]; $LastAtomID = $LastAtom->GetID();
|
|
884
|
|
885 # Look for atomID similar to last atom ID...
|
|
886 for $AtomIndex (0 .. ($LastAtomIndex - 1)) {
|
|
887 $Atom = $PathAtomsRef->[$AtomIndex]; $AtomID = $Atom->GetID();
|
|
888
|
|
889 if ($AtomID == $LastAtomID) {
|
|
890 # It's a cycle closing atom...
|
|
891 return $AtomIndex;
|
|
892 }
|
|
893 }
|
|
894 return undef;
|
|
895 }
|
|
896
|
|
897 # Is it a structurally unique path containing a cycle?
|
|
898 #
|
|
899 # For atom paths containing cycles, last atom ID is either equal to first atom ID or
|
|
900 # some other atom ID besides itself.
|
|
901 #
|
|
902 # In order to determine its structurally unqiue independent of initial atom ordering,
|
|
903 # the following methodolgy is used:
|
|
904 #
|
|
905 # . For paths with same first and atom IDs:
|
|
906 # . Remove the last atom ID from atom path
|
|
907 # . Sort atom IDs in the path
|
|
908 # . Add first atom ID from the sorted list to the end of list to complete the cycle
|
|
909 # . Generate a atom path ID
|
|
910 # . Use final path ID to track uniqueness of path containing cycle.
|
|
911 #
|
|
912 # . For paths with last atom ID equal to some other atom ID besidies itself:
|
|
913 # . Sort atom IDs in atom path
|
|
914 # . Generate atom path ID and use it to track unqiueness of atom paths.
|
|
915 #
|
|
916 sub _IsUniqueAtomPathContainingCycle {
|
|
917 my($This, $PathAtomsRef) = @_;
|
|
918 my($PathLength, $AtomPathID, $FirstAtom, $LastAtom, $FirstAtomID, $LastAtomID, @PathAtomIDs, @SortedPathAtomIDs);
|
|
919
|
|
920 @PathAtomIDs = ();
|
|
921 @PathAtomIDs = map { $_->GetID(); } @{$PathAtomsRef};
|
|
922
|
|
923 $PathLength = scalar @{$PathAtomsRef};
|
|
924
|
|
925 $FirstAtom = $PathAtomsRef->[0]; $FirstAtomID = $FirstAtom->GetID();
|
|
926 $LastAtom = $PathAtomsRef->[$PathLength - 1]; $LastAtomID = $LastAtom->GetID();
|
|
927
|
|
928 if ($FirstAtomID == $LastAtomID) {
|
|
929 pop @PathAtomIDs;
|
|
930
|
|
931 @SortedPathAtomIDs = ();
|
|
932 @SortedPathAtomIDs = sort { $a <=> $b } @PathAtomIDs;
|
|
933
|
|
934 push @SortedPathAtomIDs, $SortedPathAtomIDs[0];
|
|
935
|
|
936 $AtomPathID = join '-', @SortedPathAtomIDs;
|
|
937 }
|
|
938 else {
|
|
939 $AtomPathID = join '-', sort { $a <=> $b } @PathAtomIDs;
|
|
940 }
|
|
941
|
|
942 if (exists $This->{UniqueCyclicAtomPathsIDs}{$AtomPathID}) {
|
|
943 return 0;
|
|
944 }
|
|
945
|
|
946 # It's a unique atom path containing a cycle...
|
|
947 $This->{UniqueCyclicAtomPathsIDs}{$AtomPathID} = 1;
|
|
948
|
|
949 return 1;
|
|
950 }
|
|
951
|
|
952 # Generate atom path string for specified atom path...
|
|
953 #
|
|
954 sub _GenerateAtomPathString {
|
|
955 my($This, $PathAtomsRef) = @_;
|
|
956 my($PathLength, $AtomPathString, $ReverseAtomPathString, $AtomPathSymbolsRef);
|
|
957
|
|
958 $PathLength = scalar @{$PathAtomsRef};
|
|
959
|
|
960 # Generate path atom and bond symbols...
|
|
961 #
|
|
962 $AtomPathSymbolsRef = $This->_GenerateAtomPathSymbols($PathAtomsRef);
|
|
963
|
|
964 # Check presence of path using path ID created by atom path symbols...
|
|
965 $AtomPathString = join '', @{$AtomPathSymbolsRef};
|
|
966 if (exists $This->{AtomPathsStrings}{$PathLength}{$AtomPathString}) {
|
|
967 $This->{AtomPathsStrings}{$PathLength}{$AtomPathString} += 1;
|
|
968 return $This;
|
|
969 }
|
|
970
|
|
971 # Check presence of reverse path using path ID created by atom path symbols...
|
|
972 #
|
|
973 $ReverseAtomPathString = join '', reverse @{$AtomPathSymbolsRef};
|
|
974 if (exists $This->{AtomPathsStrings}{$PathLength}{$ReverseAtomPathString}) {
|
|
975 $This->{AtomPathsStrings}{$PathLength}{$ReverseAtomPathString} += 1;
|
|
976 return $This;
|
|
977 }
|
|
978
|
|
979 # Use lexicographically smaller atom path string as PathID...
|
|
980 #
|
|
981 if ($AtomPathString le $ReverseAtomPathString) {
|
|
982 $This->{AtomPathsStrings}{$PathLength}{$AtomPathString} = 1;
|
|
983 }
|
|
984 else {
|
|
985 $This->{AtomPathsStrings}{$PathLength}{$ReverseAtomPathString} = 1;
|
|
986 }
|
|
987 return $This;
|
|
988 }
|
|
989
|
|
990 # Are atom types for all path atoms same?
|
|
991 #
|
|
992 sub _AreAllPathAtomsSymbolsSame {
|
|
993 my($This, $PathAtomsRef) = @_;
|
|
994 my($Index, $Atom, $AtomID, $AtomType, $FirstAtomType);
|
|
995
|
|
996 $Atom = $PathAtomsRef->[0]; $AtomID = $Atom->GetID();
|
|
997 $FirstAtomType = $This->{AssignedAtomTypes}{$AtomID};
|
|
998
|
|
999 for $Index (1 .. $#{$PathAtomsRef}) {
|
|
1000 $Atom = $PathAtomsRef->[$Index]; $AtomID = $Atom->GetID();
|
|
1001 $AtomType = $This->{AssignedAtomTypes}{$AtomID};
|
|
1002
|
|
1003 if ($AtomType ne $FirstAtomType) {
|
|
1004 return 0;
|
|
1005 }
|
|
1006 }
|
|
1007 return 1;
|
|
1008 }
|
|
1009
|
|
1010 # Are bond symbols for all path bonds same?
|
|
1011 #
|
|
1012 sub _AreAllPathBondSymbolsSame {
|
|
1013 my($This, $PathAtomsRef) = @_;
|
|
1014 my($Index, $Atom, $BondedAtom, $AtomID, $BondedAtomID, $BondAtomID1, $BondAtomID2, $FirstBondSymbol, $BondSymbol);
|
|
1015
|
|
1016 # During no usage of bond symbols, just ignore them and assume they are same...
|
|
1017 if (!$This->{UseBondSymbols}) {
|
|
1018 return 1;
|
|
1019 }
|
|
1020
|
|
1021 $Atom = $PathAtomsRef->[0]; $BondedAtom = $PathAtomsRef->[1];
|
|
1022 $AtomID = $Atom->GetID(); $BondedAtomID = $BondedAtom->GetID();
|
|
1023
|
|
1024 ($BondAtomID1, $BondAtomID2) = ($AtomID < $BondedAtomID) ? ($AtomID, $BondedAtomID) : ($BondedAtomID, $AtomID);
|
|
1025 $FirstBondSymbol = $This->{BondSymbols}{$BondAtomID1}{$BondAtomID2};
|
|
1026
|
|
1027 for $Index (1 .. ($#{$PathAtomsRef} - 1)) {
|
|
1028 $Atom = $PathAtomsRef->[$Index]; $BondedAtom = $PathAtomsRef->[$Index + 1];
|
|
1029 $AtomID = $Atom->GetID(); $BondedAtomID = $BondedAtom->GetID();
|
|
1030
|
|
1031 ($BondAtomID1, $BondAtomID2) = ($AtomID < $BondedAtomID) ? ($AtomID, $BondedAtomID) : ($BondedAtomID, $AtomID);
|
|
1032 $BondSymbol = $This->{BondSymbols}{$BondAtomID1}{$BondAtomID2};
|
|
1033
|
|
1034 if ($BondSymbol ne $FirstBondSymbol) {
|
|
1035 return 0;
|
|
1036 }
|
|
1037 }
|
|
1038 return 1;
|
|
1039 }
|
|
1040
|
|
1041 # Generate atom path symbols...
|
|
1042 #
|
|
1043 sub _GenerateAtomPathSymbols {
|
|
1044 my($This, $PathAtomsRef) = @_;
|
|
1045 my($Atom, $AtomID, @AtomPathSymbols);
|
|
1046
|
|
1047 @AtomPathSymbols = ();
|
|
1048
|
|
1049 if (@{$PathAtomsRef} == 1) {
|
|
1050 $Atom = $PathAtomsRef->[0]; $AtomID = $Atom->GetID();
|
|
1051 push @AtomPathSymbols, $This->{AssignedAtomTypes}{$AtomID};
|
|
1052 return \@AtomPathSymbols;
|
|
1053 }
|
|
1054
|
|
1055 # Ignore bond information...
|
|
1056 if (!$This->{UseBondSymbols}) {
|
|
1057 for $Atom (@{$PathAtomsRef}) {
|
|
1058 $AtomID = $Atom->GetID();
|
|
1059 push @AtomPathSymbols, $This->{AssignedAtomTypes}{$AtomID};
|
|
1060 }
|
|
1061 return \@AtomPathSymbols;
|
|
1062 }
|
|
1063
|
|
1064 # Use atoms and bonds to generate atom path string...
|
|
1065 my($Index, $BondedAtom, $BondedAtomID, $BondAtomID1, $BondAtomID2);
|
|
1066
|
|
1067 # Process atom type of first atom in path...
|
|
1068 $Atom = $PathAtomsRef->[0]; $AtomID = $Atom->GetID();
|
|
1069 push @AtomPathSymbols, $This->{AssignedAtomTypes}{$AtomID};
|
|
1070
|
|
1071 for $Index (0 .. ($#{$PathAtomsRef} - 1)) {
|
|
1072 $Atom = $PathAtomsRef->[$Index]; $BondedAtom = $PathAtomsRef->[$Index + 1];
|
|
1073 $AtomID = $Atom->GetID(); $BondedAtomID = $BondedAtom->GetID();
|
|
1074
|
|
1075 ($BondAtomID1, $BondAtomID2) = ($AtomID < $BondedAtomID) ? ($AtomID, $BondedAtomID) : ($BondedAtomID, $AtomID);
|
|
1076 push @AtomPathSymbols, $This->{BondSymbols}{$BondAtomID1}{$BondAtomID2};
|
|
1077
|
|
1078 # Process atom type of next atom in path...
|
|
1079 push @AtomPathSymbols, $This->{AssignedAtomTypes}{$BondedAtomID};
|
|
1080 }
|
|
1081 return \@AtomPathSymbols;
|
|
1082 }
|
|
1083
|
|
1084 # Set final fingerprits...
|
|
1085 #
|
|
1086 sub _SetFinalFingerprints {
|
|
1087 my($This) = @_;
|
|
1088
|
|
1089 # Mark successful generation of fingerprints...
|
|
1090 $This->{FingerprintsGenerated} = 1;
|
|
1091
|
|
1092 if ($This->{Type} =~ /^PathLengthBits$/i) {
|
|
1093 $This->_SetFinalFingerprintsBitVector();
|
|
1094 }
|
|
1095 elsif ($This->{Type} =~ /^PathLengthCount$/i) {
|
|
1096 $This->_SetFinalFingerprintsVector();
|
|
1097 }
|
|
1098
|
|
1099 return $This;
|
|
1100 }
|
|
1101
|
|
1102 # Set final fingerprits bit vector...
|
|
1103 #
|
|
1104 sub _SetFinalFingerprintsBitVector {
|
|
1105 my($This) = @_;
|
|
1106 my($PathLength, $Size, $AtomPathString, $AtomPathHashCode, $AtomPathBitPos, $FingerprintsBitVector, $SkipBitPosCheck, $NumOfBitsToSetPerPath, $SetBitNum);
|
|
1107
|
|
1108 $FingerprintsBitVector = $This->{FingerprintsBitVector};
|
|
1109
|
|
1110 $Size = $This->{Size};
|
|
1111
|
|
1112 $SkipBitPosCheck = 1;
|
|
1113 $NumOfBitsToSetPerPath = $This->{NumOfBitsToSetPerPath};
|
|
1114
|
|
1115 for $PathLength (keys %{$This->{AtomPathsStrings}}) {
|
|
1116 for $AtomPathString (keys %{$This->{AtomPathsStrings}{$PathLength}}) {
|
|
1117 $AtomPathHashCode = TextUtil::HashCode($AtomPathString);
|
|
1118
|
|
1119 # Set random number seed...
|
|
1120 if ($This->{UsePerlCoreRandom}) {
|
|
1121 CORE::srand($AtomPathHashCode);
|
|
1122 }
|
|
1123 else {
|
|
1124 MathUtil::srandom($AtomPathHashCode);
|
|
1125 }
|
|
1126
|
|
1127 for $SetBitNum (1 .. $NumOfBitsToSetPerPath) {
|
|
1128 $AtomPathBitPos = $This->{UsePerlCoreRandom} ? int(CORE::rand($Size)) : int(MathUtil::random($Size));
|
|
1129 $FingerprintsBitVector->SetBit($AtomPathBitPos, $SkipBitPosCheck);
|
|
1130 }
|
|
1131 }
|
|
1132 }
|
|
1133 return $This;
|
|
1134 }
|
|
1135
|
|
1136 # Set final fingerprits vector...
|
|
1137 #
|
|
1138 sub _SetFinalFingerprintsVector {
|
|
1139 my($This) = @_;
|
|
1140 my($PathLength, $AtomPathString, $FingerprintsVector, $AtomPathCount, @Values, @ValueIDs);
|
|
1141
|
|
1142 @Values = ();
|
|
1143 @ValueIDs = ();
|
|
1144
|
|
1145 for $PathLength (sort { $a <=> $b } keys %{$This->{AtomPathsStrings}}) {
|
|
1146 for $AtomPathString (sort keys %{$This->{AtomPathsStrings}{$PathLength}}) {
|
|
1147 $AtomPathCount = $This->{AtomPathsStrings}{$PathLength}{$AtomPathString};
|
|
1148
|
|
1149 push @Values, $AtomPathCount;
|
|
1150 push @ValueIDs, $AtomPathString;
|
|
1151 }
|
|
1152 }
|
|
1153
|
|
1154 # Add PathLengthIDs and values to fingerprint vector...
|
|
1155 $This->{FingerprintsVector}->AddValueIDs(\@ValueIDs);
|
|
1156 $This->{FingerprintsVector}->AddValues(\@Values);
|
|
1157
|
|
1158 return $This;
|
|
1159 }
|
|
1160
|
|
1161 # Cache appropriate molecule data...
|
|
1162 #
|
|
1163 sub _SetupMoleculeDataCache {
|
|
1164 my($This) = @_;
|
|
1165
|
|
1166 # Get all atoms...
|
|
1167 @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms();
|
|
1168
|
|
1169 return $This;
|
|
1170 }
|
|
1171
|
|
1172 # Clear cached molecule data...
|
|
1173 #
|
|
1174 sub _ClearMoleculeDataCache {
|
|
1175 my($This) = @_;
|
|
1176
|
|
1177 # Clear atoms...
|
|
1178 @{$This->{Atoms}} = ();
|
|
1179
|
|
1180 # Clear path atoms..
|
|
1181 $This->{AtomPathsRef} = '';
|
|
1182
|
|
1183 return $This;
|
|
1184 }
|
|
1185
|
|
1186 # Set atomic invariants to use atom identifiers...
|
|
1187 #
|
|
1188 sub SetAtomicInvariantsToUse {
|
|
1189 my($This, @Values) = @_;
|
|
1190 my($FirstValue, $TypeOfFirstValue, $AtomicInvariant, $SpecifiedAtomicInvariant, $AtomicInvariantValue, @SpecifiedAtomicInvariants, @AtomicInvariantsToUse);
|
|
1191
|
|
1192 if (!@Values) {
|
|
1193 carp "Warning: ${ClassName}->SetAtomicInvariantsToUse: No values specified...";
|
|
1194 return;
|
|
1195 }
|
|
1196
|
|
1197 $FirstValue = $Values[0];
|
|
1198 $TypeOfFirstValue = ref $FirstValue;
|
|
1199
|
|
1200 @SpecifiedAtomicInvariants = ();
|
|
1201 @AtomicInvariantsToUse = ();
|
|
1202
|
|
1203 if ($TypeOfFirstValue =~ /^ARRAY/) {
|
|
1204 push @SpecifiedAtomicInvariants, @{$FirstValue};
|
|
1205 }
|
|
1206 else {
|
|
1207 push @SpecifiedAtomicInvariants, @Values;
|
|
1208 }
|
|
1209
|
|
1210 # Make sure specified AtomicInvariants are valid...
|
|
1211 for $SpecifiedAtomicInvariant (@SpecifiedAtomicInvariants) {
|
|
1212 if (!AtomTypes::AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($SpecifiedAtomicInvariant)) {
|
|
1213 croak "Error: ${ClassName}->SetAtomicInvariantsToUse: Specified atomic invariant, $SpecifiedAtomicInvariant, is not supported...\n ";
|
|
1214 }
|
|
1215 $AtomicInvariant = $SpecifiedAtomicInvariant;
|
|
1216 push @AtomicInvariantsToUse, $AtomicInvariant;
|
|
1217 }
|
|
1218
|
|
1219 # Set atomic invariants to use...
|
|
1220 @{$This->{AtomicInvariantsToUse}} = ();
|
|
1221 push @{$This->{AtomicInvariantsToUse}}, @AtomicInvariantsToUse;
|
|
1222
|
|
1223 return $This;
|
|
1224 }
|
|
1225
|
|
1226 # Set functional classes to use for atom identifiers...
|
|
1227 #
|
|
1228 sub SetFunctionalClassesToUse {
|
|
1229 my($This, @Values) = @_;
|
|
1230 my($FirstValue, $TypeOfFirstValue, $FunctionalClass, $SpecifiedFunctionalClass, @SpecifiedFunctionalClasses, @FunctionalClassesToUse);
|
|
1231
|
|
1232 if (!@Values) {
|
|
1233 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: No values specified...";
|
|
1234 return;
|
|
1235 }
|
|
1236
|
|
1237 if ($This->{AtomIdentifierType} !~ /^FunctionalClassAtomTypes$/i) {
|
|
1238 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: FunctionalClassesToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}...";
|
|
1239 return;
|
|
1240 }
|
|
1241
|
|
1242 $FirstValue = $Values[0];
|
|
1243 $TypeOfFirstValue = ref $FirstValue;
|
|
1244
|
|
1245 @SpecifiedFunctionalClasses = ();
|
|
1246 @FunctionalClassesToUse = ();
|
|
1247
|
|
1248 if ($TypeOfFirstValue =~ /^ARRAY/) {
|
|
1249 push @SpecifiedFunctionalClasses, @{$FirstValue};
|
|
1250 }
|
|
1251 else {
|
|
1252 push @SpecifiedFunctionalClasses, @Values;
|
|
1253 }
|
|
1254
|
|
1255 # Make sure specified FunctionalClasses are valid...
|
|
1256 for $SpecifiedFunctionalClass (@SpecifiedFunctionalClasses) {
|
|
1257 if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedFunctionalClass)) {
|
|
1258 croak "Error: ${ClassName}->SetFunctionalClassesToUse: Specified functional class, $SpecifiedFunctionalClass, is not supported...\n ";
|
|
1259 }
|
|
1260 push @FunctionalClassesToUse, $SpecifiedFunctionalClass;
|
|
1261 }
|
|
1262
|
|
1263 # Set functional classes to use...
|
|
1264 @{$This->{FunctionalClassesToUse}} = ();
|
|
1265 push @{$This->{FunctionalClassesToUse}}, @FunctionalClassesToUse;
|
|
1266
|
|
1267 return $This;
|
|
1268 }
|
|
1269
|
|
1270 # Initialize atom indentifier type information...
|
|
1271 #
|
|
1272 # Current supported values:
|
|
1273 #
|
|
1274 # AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes,
|
|
1275 # MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes
|
|
1276 #
|
|
1277 sub _InitializeAtomIdentifierTypeInformation {
|
|
1278 my($This) = @_;
|
|
1279
|
|
1280 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
|
|
1281 $This->_InitializeAtomicInvariantsAtomTypesInformation();
|
|
1282 }
|
|
1283 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
|
|
1284 $This->_InitializeFunctionalClassAtomTypesInformation();
|
|
1285 }
|
|
1286 elsif ($This->{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
|
|
1287 # Nothing to do for now...
|
|
1288 }
|
|
1289 else {
|
|
1290 croak "Error: ${ClassName}->_InitializeAtomIdentifierTypeInformation: Unknown atom indentifier type $This->{AtomIdentifierType}...";
|
|
1291 }
|
|
1292
|
|
1293 return $This;
|
|
1294 }
|
|
1295
|
|
1296 # Initialize atomic invariants atom types to use for generating atom identifiers...
|
|
1297 #
|
|
1298 # Let:
|
|
1299 # AS = Atom symbol corresponding to element symbol
|
|
1300 #
|
|
1301 # X<n> = Number of non-hydrogen atom neighbors or heavy atoms attached to atom
|
|
1302 # BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms attached to atom
|
|
1303 # LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms attached to atom
|
|
1304 # SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
|
|
1305 # DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
|
|
1306 # TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
|
|
1307 # H<n> = Number of implicit and explicit hydrogens for atom
|
|
1308 # Ar = Aromatic annotation indicating whether atom is aromatic
|
|
1309 # RA = Ring atom annotation indicating whether atom is a ring
|
|
1310 # FC<+n/-n> = Formal charge assigned to atom
|
|
1311 # MN<n> = Mass number indicating isotope other than most abundant isotope
|
|
1312 # SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or 3 (triplet)
|
|
1313 #
|
|
1314 # Then:
|
|
1315 #
|
|
1316 # Atom type generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to:
|
|
1317 #
|
|
1318 # AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n>
|
|
1319 #
|
|
1320 # Except for AS which is a required atomic invariant in atom types, all other atomic invariants are
|
|
1321 # optional. Default atomic invariants used for AtomID are: AS, X<n>, BO<n>, H<n>, FC<+n/-n>.
|
|
1322 # AtomID specification doesn't include atomic invariants with zero or undefined values.
|
|
1323 #
|
|
1324 sub _InitializeAtomicInvariantsAtomTypesInformation {
|
|
1325 my($This) = @_;
|
|
1326
|
|
1327 # Default atomic invariants to use for generating atom neighborhood atom IDs: AS, X, BO, H, FC
|
|
1328 #
|
|
1329 @{$This->{AtomicInvariantsToUse}} = ();
|
|
1330 @{$This->{AtomicInvariantsToUse}} = ('AS', 'X', 'BO', 'H', 'FC');
|
|
1331
|
|
1332 return $This;
|
|
1333 }
|
|
1334
|
|
1335 # Initialize functional class atom types, generated by AtomTypes::FunctionalClassAtomTypes
|
|
1336 # class, to use for generating atom identifiers...
|
|
1337 #
|
|
1338 # Let:
|
|
1339 # HBD: HydrogenBondDonor
|
|
1340 # HBA: HydrogenBondAcceptor
|
|
1341 # PI : PositivelyIonizable
|
|
1342 # NI : NegativelyIonizable
|
|
1343 # Ar : Aromatic
|
|
1344 # Hal : Halogen
|
|
1345 # H : Hydrophobic
|
|
1346 # RA : RingAtom
|
|
1347 # CA : ChainAtom
|
|
1348 #
|
|
1349 # Then:
|
|
1350 #
|
|
1351 # Functiononal class atom type specification for an atom corresponds to:
|
|
1352 #
|
|
1353 # Ar.CA.H.HBA.HBD.Hal.NI.PI.RA
|
|
1354 #
|
|
1355 # Default functional classes used are: HBD, HBA, PI, NI, Ar, Hal
|
|
1356 #
|
|
1357 # FunctionalAtomTypes are assigned using the following definitions [ Ref 60-61, Ref 65-66 ]:
|
|
1358 #
|
|
1359 # HydrogenBondDonor: NH, NH2, OH
|
|
1360 # HydrogenBondAcceptor: N[!H], O
|
|
1361 # PositivelyIonizable: +, NH2
|
|
1362 # NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH
|
|
1363 #
|
|
1364 sub _InitializeFunctionalClassAtomTypesInformation {
|
|
1365 my($This) = @_;
|
|
1366
|
|
1367 # Default functional class atom typess to use for generating atom identifiers
|
|
1368 # are: HBD, HBA, PI, NI, Ar, Hal
|
|
1369 #
|
|
1370 @{$This->{FunctionalClassesToUse}} = ();
|
|
1371 @{$This->{FunctionalClassesToUse}} = ('HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal');
|
|
1372
|
|
1373 return $This;
|
|
1374 }
|
|
1375
|
|
1376 # Return a string containg data for PathLengthFingerprints object...
|
|
1377 #
|
|
1378 sub StringifyPathLengthFingerprints {
|
|
1379 my($This) = @_;
|
|
1380 my($PathLengthsFingerprintsString);
|
|
1381
|
|
1382 # Type of fingerprint...
|
|
1383 $PathLengthsFingerprintsString = "Fingerprint type: $This->{Type}; AtomIdentifierType: $This->{AtomIdentifierType}";
|
|
1384
|
|
1385 # Path length...
|
|
1386 $PathLengthsFingerprintsString .= "; MinPathLength: $This->{MinLength}; MaxPathLength: $This->{MaxLength}";
|
|
1387
|
|
1388 # Fingerprint generation control...
|
|
1389 my($AllowSharedBonds, $AllowRings, $UseBondSymbols, $UseUniquePaths);
|
|
1390
|
|
1391 $AllowSharedBonds = $This->{AllowSharedBonds} ? "Yes" : "No";
|
|
1392 $AllowRings = $This->{AllowRings} ? "Yes" : "No";
|
|
1393 $UseBondSymbols = $This->{UseBondSymbols} ? "Yes" : "No";
|
|
1394 $UseUniquePaths = $This->{UseBondSymbols} ? "Yes" : "No";
|
|
1395
|
|
1396 $PathLengthsFingerprintsString .= "; UseUniquePaths: $UseUniquePaths; AllowSharedBonds: $AllowSharedBonds; AllowRings: $AllowRings; UseBondSymbols: $UseBondSymbols";
|
|
1397
|
|
1398 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
|
|
1399 my($AtomicInvariant, @AtomicInvariants, @AtomicInvariantsOrder, %AvailableAtomicInvariants);
|
|
1400
|
|
1401 @AtomicInvariantsOrder = AtomTypes::AtomicInvariantsAtomTypes::GetAtomicInvariantsOrder();
|
|
1402 %AvailableAtomicInvariants = AtomTypes::AtomicInvariantsAtomTypes::GetAvailableAtomicInvariants();
|
|
1403
|
|
1404 for $AtomicInvariant (@AtomicInvariantsOrder) {
|
|
1405 push @AtomicInvariants, "$AtomicInvariant: $AvailableAtomicInvariants{$AtomicInvariant}";
|
|
1406 }
|
|
1407
|
|
1408 $PathLengthsFingerprintsString .= "; AtomicInvariantsToUse: <" . TextUtil::JoinWords(\@{$This->{AtomicInvariantsToUse}}, ", ", 0) . ">";
|
|
1409 $PathLengthsFingerprintsString .= "; AtomicInvariantsOrder: <" . TextUtil::JoinWords(\@AtomicInvariantsOrder, ", ", 0) . ">";
|
|
1410 $PathLengthsFingerprintsString .= "; AvailableAtomicInvariants: <" . TextUtil::JoinWords(\@AtomicInvariants, ", ", 0) . ">";
|
|
1411 }
|
|
1412 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
|
|
1413 my($FunctionalClass, @FunctionalClasses, @FunctionalClassesOrder, %AvailableFunctionalClasses);
|
|
1414
|
|
1415 @FunctionalClassesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder();
|
|
1416 %AvailableFunctionalClasses = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses();
|
|
1417
|
|
1418 for $FunctionalClass (@FunctionalClassesOrder) {
|
|
1419 push @FunctionalClasses, "$FunctionalClass: $AvailableFunctionalClasses{$FunctionalClass}";
|
|
1420 }
|
|
1421
|
|
1422 $PathLengthsFingerprintsString .= "; FunctionalClassesToUse: <" . TextUtil::JoinWords(\@{$This->{FunctionalClassesToUse}}, ", ", 0) . ">";
|
|
1423 $PathLengthsFingerprintsString .= "; FunctionalClassesOrder: <" . TextUtil::JoinWords(\@FunctionalClassesOrder, ", ", 0) . ">";
|
|
1424 $PathLengthsFingerprintsString .= "; AvailableFunctionalClasses: <" . TextUtil::JoinWords(\@FunctionalClasses, ", ", 0) . ">";
|
|
1425 }
|
|
1426
|
|
1427 if ($This->{Type} =~ /^PathLengthBits$/i) {
|
|
1428 # Size...
|
|
1429 $PathLengthsFingerprintsString .= "; Size: $This->{Size}; MinSize: $This->{MinSize}; MaxSize: $This->{MaxSize}";
|
|
1430
|
|
1431 # NumOfBitsToSetPerPath...
|
|
1432 $PathLengthsFingerprintsString .= "; NumOfBitsToSetPerPath: $This->{NumOfBitsToSetPerPath}";
|
|
1433
|
|
1434 # Fingerprint bit density and num of bits set...
|
|
1435 my($NumOfSetBits, $BitDensity);
|
|
1436 $NumOfSetBits = $This->{FingerprintsBitVector}->GetNumOfSetBits();
|
|
1437 $BitDensity = $This->{FingerprintsBitVector}->GetFingerprintsBitDensity();
|
|
1438 $PathLengthsFingerprintsString .= "; NumOfOnBits: $NumOfSetBits; BitDensity: $BitDensity";
|
|
1439
|
|
1440 $PathLengthsFingerprintsString .= "; FingerprintsBitVector: < $This->{FingerprintsBitVector} >";
|
|
1441 }
|
|
1442 elsif ($This->{Type} =~ /^PathLengthCount$/i) {
|
|
1443 $PathLengthsFingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >";
|
|
1444 }
|
|
1445
|
|
1446 return $PathLengthsFingerprintsString;
|
|
1447 }
|
|
1448
|
|
1449 1;
|
|
1450
|
|
1451 __END__
|
|
1452
|
|
1453 =head1 NAME
|
|
1454
|
|
1455 PathLengthFingerprints
|
|
1456
|
|
1457 =head1 SYNOPSIS
|
|
1458
|
|
1459 use Fingerprints::PathLengthFingerprints;
|
|
1460
|
|
1461 use Fingerprints::PathLengthFingerprints qw(:all);
|
|
1462
|
|
1463 =head1 DESCRIPTION
|
|
1464
|
|
1465 B<PathLengthFingerprints> class provides the following methods:
|
|
1466
|
|
1467 new, GenerateFingerprints, , GetDescription, SetAtomIdentifierType,
|
|
1468 SetAtomicInvariantsToUse, SetFunctionalClassesToUse, SetMaxLength,
|
|
1469 SetMinLength, SetNumOfBitsToSetPerPath, SetType,
|
|
1470 StringifyPathLengthFingerprints
|
|
1471
|
|
1472 B<PathLengthFingerprints> is derived from B<Fingerprints> class which in turn
|
|
1473 is derived from B<ObjectProperty> base class that provides methods not explicitly defined
|
|
1474 in B<PathLengthFingerprints>, B<Fingerprints> or B<ObjectProperty> classes using Perl's
|
|
1475 AUTOLOAD functionality. These methods are generated on-the-fly for a specified object property:
|
|
1476
|
|
1477 Set<PropertyName>(<PropertyValue>);
|
|
1478 $PropertyValue = Get<PropertyName>();
|
|
1479 Delete<PropertyName>();
|
|
1480
|
|
1481 The current release of MayaChemTools supports generation of B<AtomTypesFingerpritns>
|
|
1482 corresponding to following B<AtomtomIdentifierTypes>:
|
|
1483
|
|
1484 AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes,
|
|
1485 FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes,
|
|
1486 SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes
|
|
1487
|
|
1488 Based on the values specified for B<Type>, B<AtomtomIdentifierTypes>, B<MinPathLength> and
|
|
1489 B<MaxPathLength>, all appropriate atom paths are generated for each atom in the molecule
|
|
1490 and collected in a list and the list is filtered to remove any structurally duplicate paths as
|
|
1491 indicated by the value of B<UseUniquePaths>.
|
|
1492
|
|
1493 For molecules containing rings, atom paths starting from each atom can be traversed in four
|
|
1494 different ways:
|
|
1495
|
|
1496 o Atom paths without any rings and sharing of bonds in traversed paths.
|
|
1497 o Atom paths containing rings and without any sharing of bonds in
|
|
1498 traversed paths
|
|
1499 o All possible atom paths without any rings and sharing of bonds in
|
|
1500 traversed paths
|
|
1501 o All possible atom paths containing rings and with sharing of bonds in
|
|
1502 traversed paths.
|
|
1503
|
|
1504 Atom path traversal is terminated at the last ring atom. For molecules containing no rings,
|
|
1505 first two and last two types described above are equivalent.
|
|
1506
|
|
1507 B<AllowSharedBonds> and B<AllowRings> allow generation of different types of paths
|
|
1508 to be used for fingerprints generation.
|
|
1509
|
|
1510 The combination of B<AllowSharedBonds>, B<AllowRings>, and B<UseBondSymbols> allows generation of
|
|
1511 8 different types of path length fingerprints:
|
|
1512
|
|
1513 AllowSharedBonds AllowRings UseBondSymbols
|
|
1514
|
|
1515 0 0 1 - AtomPathsNoCyclesWithBondSymbols
|
|
1516 0 1 1 - AtomPathsWithCyclesWithBondSymbols
|
|
1517
|
|
1518 1 0 1 - AllAtomPathsNoCyclesWithBondSymbols
|
|
1519 1 1 1 - AllAtomPathsWithCyclesWithBondSymbols
|
|
1520 [ DEFAULT ]
|
|
1521
|
|
1522 0 0 0 - AtomPathsNoCyclesNoBondSymbols
|
|
1523 0 1 0 - AtomPathsWithCyclesNoBondSymbols
|
|
1524
|
|
1525 1 0 0 - AllAtomPathsNoCyclesNoBondSymbols
|
|
1526 1 1 0 - AllAtomPathsWithCyclesNoWithBondSymbols
|
|
1527
|
|
1528 Additionally, possible values for option B<--AtomIdentifierType> in conjunction with corresponding
|
|
1529 specified values for B<AtomicInvariantsToUse> and B<FunctionalClassesToUse > changes the nature
|
|
1530 of atom path length strings and the fingerprints.
|
|
1531
|
|
1532 For each atom path in the filtered atom paths list, an atom path string is created using value of
|
|
1533 B<AtomIdentifierType> and specified values to use for a particular atom identifier type.
|
|
1534 Value of B<UseBondSymbols> controls whether bond order symbols are used during generation
|
|
1535 of atom path string. Atom symbol corresponds to element symbol and characters used to represent
|
|
1536 bond order are: I<1 - None; 2 - '='; 3 - '#'; 1.5 or aromatic - ':'; others: bond order value>. By default,
|
|
1537 bond symbols are included in atom path strings. Exclusion of bond symbols in atom path strings
|
|
1538 results in fingerprints which correspond purely to atom paths without considering bonds.
|
|
1539
|
|
1540 B<UseUniquePaths> controls the removal of structurally duplicate atom path strings are removed
|
|
1541 from the list.
|
|
1542
|
|
1543 For I<PathLengthBits> value of B<Type>, each atom path is hashed to a 32 bit unsigned
|
|
1544 integer key using B<TextUtil::HashCode> function. Using the hash key as a seed for a random number
|
|
1545 generator, a random integer value between 0 and B<Size> is used to set corresponding bits
|
|
1546 in the fingerprint bit-vector string. Value of B<NumOfBitsToSetPerPaths> option controls the number
|
|
1547 of time a random number is generated to set corresponding bits.
|
|
1548
|
|
1549 For I< PathLengthCount> value of B<Type>n, the number of times an atom path appears
|
|
1550 is tracked and a fingerprints count-string corresponding to count of atom paths is generated.
|
|
1551
|
|
1552 The current release of MayaChemTools generates the following types of path length
|
|
1553 fingerprints bit-vector and vector strings:
|
|
1554
|
|
1555 FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes:MinLeng
|
|
1556 th1:MaxLength8;1024;BinaryString;Ascending;001000010011010101011000110
|
|
1557 0100010101011000101001011100110001000010001001101000001001001001001000
|
|
1558 0010110100000111001001000001001010100100100000000011000000101001011100
|
|
1559 0010000001000101010100000100111100110111011011011000000010110111001101
|
|
1560 0101100011000000010001000011000010100011101100001000001000100000000...
|
|
1561
|
|
1562 FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes:MinLeng
|
|
1563 th1:MaxLength8;1024;HexadecimalString;Ascending;48caa1315d82d91122b029
|
|
1564 42861c9409a4208182d12015509767bd0867653604481a8b1288000056090583603078
|
|
1565 9cedae54e26596889ab121309800900490515224208421502120a0dd9200509723ae89
|
|
1566 00024181b86c0122821d4e4880c38620dab280824b455404009f082003d52c212b4e6d
|
|
1567 6ea05280140069c780290c43
|
|
1568
|
|
1569 FingerprintsVector;PathLengthCount:AtomicInvariantsAtomTypes:MinLength
|
|
1570 1:MaxLength8;432;NumericalValues;IDsAndValuesPairsString;C.X1.BO1.H3 2
|
|
1571 C.X2.BO2.H2 4 C.X2.BO3.H1 14 C.X3.BO3.H1 3 C.X3.BO4 10 F.X1.BO1 1 N.X
|
|
1572 2.BO2.H1 1 N.X3.BO3 1 O.X1.BO1.H1 3 O.X1.BO2 2 C.X1.BO1.H3C.X3.BO3.H1
|
|
1573 2 C.X2.BO2.H2C.X2.BO2.H2 1 C.X2.BO2.H2C.X3.BO3.H1 4 C.X2.BO2.H2C.X3.BO
|
|
1574 4 1 C.X2.BO2.H2N.X3.BO3 1 C.X2.BO3.H1:C.X2.BO3.H1 10 C.X2.BO3.H1:C....
|
|
1575
|
|
1576 FingerprintsVector;PathLengthCount:DREIDINGAtomTypes:MinLength1:MaxLen
|
|
1577 gth8;410;NumericalValues;IDsAndValuesPairsString;C_2 2 C_3 9 C_R 22 F_
|
|
1578 1 N_3 1 N_R 1 O_2 2 O_3 3 C_2=O_2 2 C_2C_3 1 C_2C_R 1 C_2N_3 1 C_2O_3
|
|
1579 1 C_3C_3 7 C_3C_R 1 C_3N_R 1 C_3O_3 2 C_R:C_R 21 C_R:N_R 2 C_RC_R 2 C
|
|
1580 _RF_ 1 C_RN_3 1 C_2C_3C_3 1 C_2C_R:C_R 2 C_2N_3C_R 1 C_3C_2=O_2 1 C_3C
|
|
1581 _2O_3 1 C_3C_3C_3 5 C_3C_3C_R 2 C_3C_3N_R 1 C_3C_3O_3 4 C_3C_R:C_R ...
|
|
1582
|
|
1583 FingerprintsVector;PathLengthCount:EStateAtomTypes:MinLength1:MaxLengt
|
|
1584 h8;454;NumericalValues;IDsAndValuesPairsString;aaCH 14 aasC 8 aasN 1 d
|
|
1585 O 2 dssC 2 sCH3 2 sF 1 sOH 3 ssCH2 4 ssNH 1 sssCH 3 aaCH:aaCH 10 aaCH:
|
|
1586 aasC 8 aasC:aasC 3 aasC:aasN 2 aasCaasC 2 aasCdssC 1 aasCsF 1 aasCssNH
|
|
1587 1 aasCsssCH 1 aasNssCH2 1 dO=dssC 2 dssCsOH 1 dssCssCH2 1 dssCssNH 1
|
|
1588 sCH3sssCH 2 sOHsssCH 2 ssCH2ssCH2 1 ssCH2sssCH 4 aaCH:aaCH:aaCH 6 a...
|
|
1589
|
|
1590 FingerprintsVector;PathLengthCount:FunctionalClassAtomTypes:MinLength1
|
|
1591 :MaxLength8;404;NumericalValues;IDsAndValuesPairsString;Ar 22 Ar.HBA 1
|
|
1592 HBA 2 HBA.HBD 3 HBD 1 Hal 1 NI 1 None 10 Ar.HBA:Ar 2 Ar.HBANone 1 Ar:
|
|
1593 Ar 21 ArAr 2 ArHBD 1 ArHal 1 ArNone 2 HBA.HBDNI 1 HBA.HBDNone 2 HBA=NI
|
|
1594 1 HBA=None 1 HBDNone 1 NINone 1 NoneNone 7 Ar.HBA:Ar:Ar 2 Ar.HBA:ArAr
|
|
1595 1 Ar.HBA:ArNone 1 Ar.HBANoneNone 1 Ar:Ar.HBA:Ar 1 Ar:Ar.HBANone 2 ...
|
|
1596
|
|
1597 FingerprintsVector;PathLengthCount:MMFF94AtomTypes:MinLength1:MaxLengt
|
|
1598 h8;463;NumericalValues;IDsAndValuesPairsString;C5A 2 C5B 2 C=ON 1 CB 1
|
|
1599 8 COO 1 CR 9 F 1 N5 1 NC=O 1 O=CN 1 O=CO 1 OC=O 1 OR 2 C5A:C5B 2 C5A:N
|
|
1600 5 2 C5ACB 1 C5ACR 1 C5B:C5B 1 C5BC=ON 1 C5BCB 1 C=ON=O=CN 1 C=ONNC=O 1
|
|
1601 CB:CB 18 CBF 1 CBNC=O 1 COO=O=CO 1 COOCR 1 COOOC=O 1 CRCR 7 CRN5 1 CR
|
|
1602 OR 2 C5A:C5B:C5B 2 C5A:C5BC=ON 1 C5A:C5BCB 1 C5A:N5:C5A 1 C5A:N5CR ...
|
|
1603
|
|
1604 FingerprintsVector;PathLengthCount:SLogPAtomTypes:MinLength1:MaxLength
|
|
1605 8;518;NumericalValues;IDsAndValuesPairsString;C1 5 C10 1 C11 1 C14 1 C
|
|
1606 18 14 C20 4 C21 2 C22 1 C5 2 CS 2 F 1 N11 1 N4 1 O10 1 O2 3 O9 1 C10C1
|
|
1607 1 C10N11 1 C11C1 2 C11C21 1 C14:C18 2 C14F 1 C18:C18 10 C18:C20 4 C18
|
|
1608 :C22 2 C1C5 1 C1CS 4 C20:C20 1 C20:C21 1 C20:N11 1 C20C20 2 C21:C21 1
|
|
1609 C21:N11 1 C21C5 1 C22N4 1 C5=O10 1 C5=O9 1 C5N4 1 C5O2 1 CSO2 2 C10...
|
|
1610
|
|
1611 FingerprintsVector;PathLengthCount:SYBYLAtomTypes:MinLength1:MaxLength
|
|
1612 8;412;NumericalValues;IDsAndValuesPairsString;C.2 2 C.3 9 C.ar 22 F 1
|
|
1613 N.am 1 N.ar 1 O.2 1 O.3 2 O.co2 2 C.2=O.2 1 C.2=O.co2 1 C.2C.3 1 C.2C.
|
|
1614 ar 1 C.2N.am 1 C.2O.co2 1 C.3C.3 7 C.3C.ar 1 C.3N.ar 1 C.3O.3 2 C.ar:C
|
|
1615 .ar 21 C.ar:N.ar 2 C.arC.ar 2 C.arF 1 C.arN.am 1 C.2C.3C.3 1 C.2C.ar:C
|
|
1616 .ar 2 C.2N.amC.ar 1 C.3C.2=O.co2 1 C.3C.2O.co2 1 C.3C.3C.3 5 C.3C.3...
|
|
1617
|
|
1618 FingerprintsVector;PathLengthCount:TPSAAtomTypes:MinLength1:MaxLength8
|
|
1619 ;331;NumericalValues;IDsAndValuesPairsString;N21 1 N7 1 None 34 O3 2 O
|
|
1620 4 3 N21:None 2 N21None 1 N7None 2 None:None 21 None=O3 2 NoneNone 13 N
|
|
1621 oneO4 3 N21:None:None 2 N21:NoneNone 2 N21NoneNone 1 N7None:None 2 N7N
|
|
1622 one=O3 1 N7NoneNone 1 None:N21:None 1 None:N21None 2 None:None:None 20
|
|
1623 None:NoneNone 12 NoneN7None 1 NoneNone=O3 2 NoneNoneNone 8 NoneNon...
|
|
1624
|
|
1625 FingerprintsVector;PathLengthCount:UFFAtomTypes:MinLength1:MaxLength8;
|
|
1626 410;NumericalValues;IDsAndValuesPairsString;C_2 2 C_3 9 C_R 22 F_ 1 N_
|
|
1627 3 1 N_R 1 O_2 2 O_3 3 C_2=O_2 2 C_2C_3 1 C_2C_R 1 C_2N_3 1 C_2O_3 1 C_
|
|
1628 3C_3 7 C_3C_R 1 C_3N_R 1 C_3O_3 2 C_R:C_R 21 C_R:N_R 2 C_RC_R 2 C_RF_
|
|
1629 1 C_RN_3 1 C_2C_3C_3 1 C_2C_R:C_R 2 C_2N_3C_R 1 C_3C_2=O_2 1 C_3C_2O_3
|
|
1630 1 C_3C_3C_3 5 C_3C_3C_R 2 C_3C_3N_R 1 C_3C_3O_3 4 C_3C_R:C_R 1 C_3...
|
|
1631
|
|
1632 =head2 METHODS
|
|
1633
|
|
1634 =over 4
|
|
1635
|
|
1636 =item B<new>
|
|
1637
|
|
1638 $NewPathLengthFingerprints = new PathLengthFingerprints(
|
|
1639 %NamesAndValues);
|
|
1640
|
|
1641 Using specified I<PathLengthFingerprints> property names and values hash, B<new> method creates a new object
|
|
1642 and returns a reference to newly created B<PathLengthFingerprints> object. By default, the following properties are
|
|
1643 initialized:
|
|
1644
|
|
1645 Molecule = '';
|
|
1646 Type = ''
|
|
1647 Size = 1024
|
|
1648 MinSize = 32
|
|
1649 MaxSize = 2**32
|
|
1650 NumOfBitsToSetPerPath = 1
|
|
1651 MinLength = 1
|
|
1652 MaxLength = 8
|
|
1653 AllowSharedBonds = 1
|
|
1654 AllowRings = 1
|
|
1655 UseBondSymbols = 1
|
|
1656 UseUniquePaths = ''
|
|
1657 AtomIdentifierType = ''
|
|
1658 SetAtomicInvariantsToUse = ['AS']
|
|
1659 FunctionalClassesToUse = ['HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal']
|
|
1660
|
|
1661 Examples:
|
|
1662
|
|
1663 $PathLengthFingerprints = new PathLengthFingerprints(
|
|
1664 'Molecule' => $Molecule,
|
|
1665 'Type' => 'PathLengthBits',
|
|
1666 'AtomIdentifierType' =
|
|
1667 'AtomicInvariantsAtomTypes');
|
|
1668
|
|
1669 $PathLengthFingerprints = new PathLengthFingerprints(
|
|
1670 'Molecule' => $Molecule,
|
|
1671 'Type' => 'PathLengthBits',
|
|
1672 'Size' => 1024,
|
|
1673 'MinLength' => 1,
|
|
1674 'MaxLength' => 8,
|
|
1675 'AllowRings' => 1,
|
|
1676 'AllowSharedBonds' => 1,
|
|
1677 'UseBondSymbols' => 1,
|
|
1678 'UseUniquePaths' => 1,
|
|
1679 'AtomIdentifierType' =
|
|
1680 'AtomicInvariantsAtomTypes',
|
|
1681 'AtomicInvariantsToUse' => ['AS']);
|
|
1682
|
|
1683 $PathLengthFingerprints = new PathLengthFingerprints(
|
|
1684 'Molecule' => $Molecule,
|
|
1685 'Type' => 'PathLengthCount',
|
|
1686 'MinLength' => 1,
|
|
1687 'MaxLength' => 8,
|
|
1688 'AllowRings' => 1,
|
|
1689 'AllowSharedBonds' => 1,
|
|
1690 'UseBondSymbols' => 1,
|
|
1691 'UseUniquePaths' => 1,
|
|
1692 'AtomIdentifierType' =>
|
|
1693 'AtomicInvariantsAtomTypes',
|
|
1694 'AtomicInvariantsToUse' => ['AS']);
|
|
1695
|
|
1696 $PathLengthFingerprints = new PathLengthFingerprints(
|
|
1697 'Molecule' => $Molecule,
|
|
1698 'Type' => 'PathLengthBits',
|
|
1699 'AtomIdentifierType' =
|
|
1700 'SLogPAtomTypes');
|
|
1701
|
|
1702 $PathLengthFingerprints = new PathLengthFingerprints(
|
|
1703 'Molecule' => $Molecule,
|
|
1704 'Type' => 'PathLengthCount',
|
|
1705 'AtomIdentifierType' =
|
|
1706 'SYBYLAtomTypes');
|
|
1707
|
|
1708 $PathLengthFingerprints = new PathLengthFingerprints(
|
|
1709 'Molecule' => $Molecule,
|
|
1710 'Type' => 'PathLengthBits',
|
|
1711 'AtomIdentifierType' =
|
|
1712 'FunctionalClassAtomTypes',
|
|
1713 'FunctionalClassesToUse' => ['HBD', 'HBA', 'Ar']);
|
|
1714
|
|
1715 $PathLengthFingerprints->GenerateFingerprints();
|
|
1716 print "$PathLengthFingerprints\n";
|
|
1717
|
|
1718 =item B<GetDescription>
|
|
1719
|
|
1720 $Description = $PathLengthFingerprints->GetDescription();
|
|
1721
|
|
1722 Returns a string containing description of path length fingerprints.
|
|
1723
|
|
1724 =item B<GenerateFingerprints>
|
|
1725
|
|
1726 $PathLengthFingerprints->GenerateFingerprints();
|
|
1727
|
|
1728 Generates path length fingerprints and returns I<PathLengthFingerprints>.
|
|
1729
|
|
1730 =item B<SetMaxLength>
|
|
1731
|
|
1732 $PathLengthFingerprints->SetMaxLength($Length);
|
|
1733
|
|
1734 Sets maximum value of atom path length to be used during atom path length fingerprints
|
|
1735 generation and returns I<PathLengthFingerprints>
|
|
1736
|
|
1737 =item B<SetAtomIdentifierType>
|
|
1738
|
|
1739 $PathLengthFingerprints->SetAtomIdentifierType();
|
|
1740
|
|
1741 Sets atom I<IdentifierType> to use during path length fingerprints generation and
|
|
1742 returns I<PathLengthFingerprints>.
|
|
1743
|
|
1744 Possible values: I<AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes,
|
|
1745 FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes,
|
|
1746 TPSAAtomTypes, UFFAtomTypes>.
|
|
1747
|
|
1748 =item B<SetAtomicInvariantsToUse>
|
|
1749
|
|
1750 $PathLengthFingerprints->SetAtomicInvariantsToUse($ValuesRef);
|
|
1751 $PathLengthFingerprints->SetAtomicInvariantsToUse(@Values);
|
|
1752
|
|
1753 Sets atomic invariants to use during I<AtomicInvariantsAtomTypes> value of I<AtomIdentifierType>
|
|
1754 for path length fingerprints generation and returns I<PathLengthFingerprints>.
|
|
1755
|
|
1756 Possible values for atomic invariants are: I<AS, X, BO, LBO, SB, DB, TB,
|
|
1757 H, Ar, RA, FC, MN, SM>. Default value: I<AS>.
|
|
1758
|
|
1759 The atomic invariants abbreviations correspond to:
|
|
1760
|
|
1761 AS = Atom symbol corresponding to element symbol
|
|
1762
|
|
1763 X<n> = Number of non-hydrogen atom neighbors or heavy atoms
|
|
1764 BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms
|
|
1765 LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms
|
|
1766 SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms
|
|
1767 DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms
|
|
1768 TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms
|
|
1769 H<n> = Number of implicit and explicit hydrogens for atom
|
|
1770 Ar = Aromatic annotation indicating whether atom is aromatic
|
|
1771 RA = Ring atom annotation indicating whether atom is a ring
|
|
1772 FC<+n/-n> = Formal charge assigned to atom
|
|
1773 MN<n> = Mass number indicating isotope other than most abundant isotope
|
|
1774 SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or
|
|
1775 3 (triplet)
|
|
1776
|
|
1777 Atom type generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to:
|
|
1778
|
|
1779 AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n>
|
|
1780
|
|
1781 Except for AS which is a required atomic invariant in atom types, all other atomic invariants are
|
|
1782 optional. Atom type specification doesn't include atomic invariants with zero or undefined values.
|
|
1783
|
|
1784 In addition to usage of abbreviations for specifying atomic invariants, the following descriptive words
|
|
1785 are also allowed:
|
|
1786
|
|
1787 X : NumOfNonHydrogenAtomNeighbors or NumOfHeavyAtomNeighbors
|
|
1788 BO : SumOfBondOrdersToNonHydrogenAtoms or SumOfBondOrdersToHeavyAtoms
|
|
1789 LBO : LargestBondOrderToNonHydrogenAtoms or LargestBondOrderToHeavyAtoms
|
|
1790 SB : NumOfSingleBondsToNonHydrogenAtoms or NumOfSingleBondsToHeavyAtoms
|
|
1791 DB : NumOfDoubleBondsToNonHydrogenAtoms or NumOfDoubleBondsToHeavyAtoms
|
|
1792 TB : NumOfTripleBondsToNonHydrogenAtoms or NumOfTripleBondsToHeavyAtoms
|
|
1793 H : NumOfImplicitAndExplicitHydrogens
|
|
1794 Ar : Aromatic
|
|
1795 RA : RingAtom
|
|
1796 FC : FormalCharge
|
|
1797 MN : MassNumber
|
|
1798 SM : SpinMultiplicity
|
|
1799
|
|
1800 I<AtomTypes::AtomicInvariantsAtomTypes> module is used to assign atomic invariant
|
|
1801 atom types.
|
|
1802
|
|
1803 =item B<SetFunctionalClassesToUse>
|
|
1804
|
|
1805 $PathLengthFingerprints->SetFunctionalClassesToUse($ValuesRef);
|
|
1806 $PathLengthFingerprints->SetFunctionalClassesToUse(@Values);
|
|
1807
|
|
1808 Sets functional classes invariants to use during I<FunctionalClassAtomTypes> value of I<AtomIdentifierType>
|
|
1809 for path length fingerprints generation and returns I<PathLengthFingerprints>.
|
|
1810
|
|
1811 Possible values for atom functional classes are: I<Ar, CA, H, HBA, HBD, Hal, NI, PI, RA>.
|
|
1812 Default value [ Ref 24 ]: I<HBD,HBA,PI,NI,Ar,Hal>.
|
|
1813
|
|
1814 The functional class abbreviations correspond to:
|
|
1815
|
|
1816 HBD: HydrogenBondDonor
|
|
1817 HBA: HydrogenBondAcceptor
|
|
1818 PI : PositivelyIonizable
|
|
1819 NI : NegativelyIonizable
|
|
1820 Ar : Aromatic
|
|
1821 Hal : Halogen
|
|
1822 H : Hydrophobic
|
|
1823 RA : RingAtom
|
|
1824 CA : ChainAtom
|
|
1825
|
|
1826 Functional class atom type specification for an atom corresponds to:
|
|
1827
|
|
1828 Ar.CA.H.HBA.HBD.Hal.NI.PI.RA or None
|
|
1829
|
|
1830 I<AtomTypes::FunctionalClassAtomTypes> module is used to assign functional class atom
|
|
1831 types. It uses following definitions [ Ref 60-61, Ref 65-66 ]:
|
|
1832
|
|
1833 HydrogenBondDonor: NH, NH2, OH
|
|
1834 HydrogenBondAcceptor: N[!H], O
|
|
1835 PositivelyIonizable: +, NH2
|
|
1836 NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH
|
|
1837
|
|
1838 =item B<SetMinLength>
|
|
1839
|
|
1840 $PathLengthFingerprints->SetMinLength($Length);
|
|
1841
|
|
1842 Sets minimum value of atom path length to be used during atom path length fingerprints
|
|
1843 generation and returns I<PathLengthFingerprints>.
|
|
1844
|
|
1845 =item B<SetMaxLength>
|
|
1846
|
|
1847 $PathLengthFingerprints->SetMaxLength($Length);
|
|
1848
|
|
1849 Sets maximum value of atom path length to be used during atom path length fingerprints
|
|
1850 generation and returns I<PathLengthFingerprints>.
|
|
1851
|
|
1852 =item B<SetNumOfBitsToSetPerPath>
|
|
1853
|
|
1854 $PathLengthFingerprints->SetNumOfBitsToSetPerPath($NumOfBits);
|
|
1855
|
|
1856 Sets number of bits to set for each path during I<PathLengthBits> B<Type > during path length fingerprints
|
|
1857 generation and returns I<PathLengthFingerprints>.
|
|
1858
|
|
1859 =item B<SetType>
|
|
1860
|
|
1861 $PathLengthFingerprints->SetType($Type);
|
|
1862
|
|
1863 Sets type of path length fingerprints and returns I<PathLengthFingerprints>. Possible values:
|
|
1864 I<PathLengthBits or PathLengthCount>.
|
|
1865
|
|
1866 =item B<StringifyPathLengthFingerprints>
|
|
1867
|
|
1868 $String = $PathLengthFingerprints->StringifyPathLengthFingerprints();
|
|
1869
|
|
1870 Returns a string containing information about I<PathLengthFingerprints> object.
|
|
1871
|
|
1872 =back
|
|
1873
|
|
1874 =head1 AUTHOR
|
|
1875
|
|
1876 Manish Sud <msud@san.rr.com>
|
|
1877
|
|
1878 =head1 SEE ALSO
|
|
1879
|
|
1880 Fingerprints.pm, FingerprintsStringUtil.pm, AtomNeighborhoodsFingerprints.pm,
|
|
1881 AtomTypesFingerprints.pm, EStateIndiciesFingerprints.pm, ExtendedConnectivityFingerprints.pm,
|
|
1882 MACCSKeys.pm, TopologicalAtomPairsFingerprints.pm, TopologicalAtomTripletsFingerprints.pm,
|
|
1883 TopologicalAtomTorsionsFingerprints.pm, TopologicalPharmacophoreAtomPairsFingerprints.pm,
|
|
1884 TopologicalPharmacophoreAtomTripletsFingerprints.pm
|
|
1885
|
|
1886 =head1 COPYRIGHT
|
|
1887
|
|
1888 Copyright (C) 2015 Manish Sud. All rights reserved.
|
|
1889
|
|
1890 This file is part of MayaChemTools.
|
|
1891
|
|
1892 MayaChemTools is free software; you can redistribute it and/or modify it under
|
|
1893 the terms of the GNU Lesser General Public License as published by the Free
|
|
1894 Software Foundation; either version 3 of the License, or (at your option)
|
|
1895 any later version.
|
|
1896
|
|
1897 =cut
|