Mercurial > repos > deepakjadmin > mayatool3_test2
comparison lib/Fingerprints/PathLengthFingerprints.pm @ 0:4816e4a8ae95 draft default tip
Uploaded
author | deepakjadmin |
---|---|
date | Wed, 20 Jan 2016 09:23:18 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4816e4a8ae95 |
---|---|
1 package Fingerprints::PathLengthFingerprints; | |
2 # | |
3 # $RCSfile: PathLengthFingerprints.pm,v $ | |
4 # $Date: 2015/02/28 20:48:54 $ | |
5 # $Revision: 1.39 $ | |
6 # | |
7 # Author: Manish Sud <msud@san.rr.com> | |
8 # | |
9 # Copyright (C) 2015 Manish Sud. All rights reserved. | |
10 # | |
11 # This file is part of MayaChemTools. | |
12 # | |
13 # MayaChemTools is free software; you can redistribute it and/or modify it under | |
14 # the terms of the GNU Lesser General Public License as published by the Free | |
15 # Software Foundation; either version 3 of the License, or (at your option) any | |
16 # later version. | |
17 # | |
18 # MayaChemTools is distributed in the hope that it will be useful, but without | |
19 # any warranty; without even the implied warranty of merchantability of fitness | |
20 # for a particular purpose. See the GNU Lesser General Public License for more | |
21 # details. | |
22 # | |
23 # You should have received a copy of the GNU Lesser General Public License | |
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or | |
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, | |
26 # Boston, MA, 02111-1307, USA. | |
27 # | |
28 | |
29 use strict; | |
30 use Carp; | |
31 use Exporter; | |
32 use TextUtil (); | |
33 use MathUtil (); | |
34 use Fingerprints::Fingerprints; | |
35 use Molecule; | |
36 use AtomTypes::AtomicInvariantsAtomTypes; | |
37 use AtomTypes::DREIDINGAtomTypes; | |
38 use AtomTypes::EStateAtomTypes; | |
39 use AtomTypes::FunctionalClassAtomTypes; | |
40 use AtomTypes::MMFF94AtomTypes; | |
41 use AtomTypes::SLogPAtomTypes; | |
42 use AtomTypes::SYBYLAtomTypes; | |
43 use AtomTypes::TPSAAtomTypes; | |
44 use AtomTypes::UFFAtomTypes; | |
45 | |
46 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); | |
47 | |
48 @ISA = qw(Fingerprints::Fingerprints Exporter); | |
49 @EXPORT = qw(); | |
50 @EXPORT_OK = qw(); | |
51 | |
52 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); | |
53 | |
54 # Setup class variables... | |
55 my($ClassName); | |
56 _InitializeClass(); | |
57 | |
58 # Overload Perl functions... | |
59 use overload '""' => 'StringifyPathLengthFingerprints'; | |
60 | |
61 # Class constructor... | |
62 sub new { | |
63 my($Class, %NamesAndValues) = @_; | |
64 | |
65 # Initialize object... | |
66 my $This = $Class->SUPER::new(); | |
67 bless $This, ref($Class) || $Class; | |
68 $This->_InitializePathLengthFingerprints(); | |
69 | |
70 $This->_InitializePathLengthFingerprintsProperties(%NamesAndValues); | |
71 | |
72 return $This; | |
73 } | |
74 | |
75 # Initialize object data... | |
76 # | |
77 sub _InitializePathLengthFingerprints { | |
78 my($This) = @_; | |
79 | |
80 # Type of fingerprint to generate... | |
81 # | |
82 # PathLengthBits - A bit vector indicating presence/absence of atom paths | |
83 # PathLengthCount - A vector containing count of atom paths | |
84 # | |
85 $This->{Type} = ''; | |
86 | |
87 # Type of vector: FingerprintsBitVector or FingerprintsVector | |
88 $This->{VectorType} = ''; | |
89 | |
90 # Set default mininum, maximum, and default size. Although any arbitrary size can | |
91 # be specified, bit vector used to store bits work on a vector size which is | |
92 # power of 2 and additonal bits are automatically added and cleared. | |
93 # | |
94 $This->{Size} = 1024; | |
95 | |
96 $This->{MinSize} = 32; | |
97 $This->{MaxSize} = 2**32; | |
98 | |
99 # Minimum and maximum path lengths to use for fingerprints generation... | |
100 $This->{MinLength} = 1; | |
101 $This->{MaxLength} = 8; | |
102 | |
103 # Numner of bits to set for each atom path for FingerprintsBitVector... | |
104 $This->{NumOfBitsToSetPerPath} = 1; | |
105 | |
106 # Atom identifier type to use for path atoms during fingerprints generation... | |
107 # | |
108 # Currently supported values are: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, | |
109 # EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, | |
110 # SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes | |
111 # | |
112 $This->{AtomIdentifierType} = ''; | |
113 | |
114 # Atom types assigned to atoms... | |
115 %{$This->{AssignedAtomTypes}} = (); | |
116 | |
117 # For molecules containing rings, atom paths starting from each atom can be traversed in four | |
118 # different ways: | |
119 # | |
120 # . Atom paths without any rings and sharing of bonds in traversed paths. | |
121 # . Atom paths containing rings and without any sharing of bonds in traversed paths | |
122 # . All possible atom paths without any rings and sharing of bonds in traversed paths | |
123 # . All possible atom paths containing rings and with sharing of bonds in traversed paths. | |
124 # | |
125 # Atom path traversal is terminated at the last ring atom. For molecules containing no rings, | |
126 # first two and last two types described above are equivalent. | |
127 # | |
128 # AllowSharedBonds and AllowRings variables allow generation of differen types of paths | |
129 # to be used for fingerprints generation. | |
130 # | |
131 # In addition to atom symbols, bond symbols are also used to generate a string | |
132 # for atom paths. These atom paths strings are hased to a 32 bit integer key which | |
133 # in turn is used as a seed for a random number generation in range of 1 to fingerprint | |
134 # size for setting corresponding bit in bit vector. | |
135 # | |
136 # UseBondSymbols variable allow generation of atom path strings and consequently fingerprints. | |
137 # | |
138 # Combination of AllowSharedBonds, AllowRings, and UseBondSymbols allow generation of | |
139 # 8 different types of path length fingerprints: | |
140 # | |
141 # AllowSharedBonds AllowRings UseBondSymbols PathLengthFingerprintsType | |
142 # | |
143 # No No Yes AtomPathsNoCyclesWithBondSymbols | |
144 # No Yes Yes AtomPathsWithCyclesWithBondSymbols | |
145 # | |
146 # Yes No Yes AllAtomPathsNoCyclesWithBondSymbols | |
147 # Yes Yes Yes AllAtomPathsWithCyclesWithBondSymbols [ DEFAULT ] | |
148 # | |
149 # No No No AtomPathsNoCyclesNoBondSymbols | |
150 # No Yes No AtomPathsWithCyclesNoBondSymbols | |
151 # | |
152 # Yes No No AllAtomPathsNoCyclesNoBondSymbols | |
153 # Yes Yes No AllAtomPathsWithCyclesNoWithBondSymbols | |
154 # | |
155 # | |
156 | |
157 # By default, atom paths starting from atoms are allowed to share bonds already traversed... | |
158 $This->{AllowSharedBonds} = 1; | |
159 | |
160 # By default rings are included in paths... | |
161 $This->{AllowRings} = 1; | |
162 | |
163 # By default bond symbols are included in atom path strings... | |
164 $This->{UseBondSymbols} = 1; | |
165 | |
166 # By default only structurally unique atom paths are used for generation | |
167 # atom path strings... | |
168 $This->{UseUniquePaths} = 1; | |
169 | |
170 # Random number generator to use during generation of fingerprints bit-vector | |
171 # string: Perl CORE::rand or MayaChemTools MathUtil::random function. | |
172 # | |
173 # The random number generator implemented in MayaChemTools is a variant of | |
174 # linear congruential generator (LCG) as described by Miller et al. [ Ref 120 ]. | |
175 # It is also referred to as Lehmer random number generator or Park-Miller | |
176 # random number generator. | |
177 # | |
178 # Unlike Perl's core random number generator function rand, the random number | |
179 # generator implemented in MayaChemTools, MathUtil::random, generates consistent | |
180 # random values across different platformsfor a specific random seed and leads | |
181 # to generation of portable fingerprints bit-vector strings. | |
182 # | |
183 $This->{UsePerlCoreRandom} = 1; | |
184 | |
185 # Bond symbols to use during generation of atom path strings... | |
186 %{$This->{BondOrderToSymbol}} = (); | |
187 %{$This->{BondOrderToSymbol}} = ('1' => '', '1.5' => ':', '2' => '=', '3' => '#'); | |
188 | |
189 # BondSymbols map to use for bonded atom IDs to use during atom path strings... | |
190 %{$This->{BondSymbols}} = (); | |
191 | |
192 # Path atom IDs to remove duplicate paths... | |
193 %{$This->{UniqueLinearAtomPathsIDs}} = (); | |
194 %{$This->{UniqueCyclicAtomPathsIDs}} = (); | |
195 | |
196 # Reference to all the atom paths upto specified path length... | |
197 $This->{AtomPathsRef} = ''; | |
198 | |
199 # Atom paths strings created using specified atom types and bond symbols... | |
200 %{$This->{AtomPathsStrings}} = (); | |
201 } | |
202 | |
203 # Initialize class ... | |
204 sub _InitializeClass { | |
205 #Class name... | |
206 $ClassName = __PACKAGE__; | |
207 } | |
208 | |
209 # Initialize object properties.... | |
210 sub _InitializePathLengthFingerprintsProperties { | |
211 my($This, %NamesAndValues) = @_; | |
212 | |
213 my($Name, $Value, $MethodName); | |
214 while (($Name, $Value) = each %NamesAndValues) { | |
215 $MethodName = "Set${Name}"; | |
216 $This->$MethodName($Value); | |
217 } | |
218 | |
219 # Make sure molecule object was specified... | |
220 if (!exists $NamesAndValues{Molecule}) { | |
221 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule..."; | |
222 } | |
223 | |
224 if (!exists $NamesAndValues{Type}) { | |
225 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying Type..."; | |
226 } | |
227 | |
228 if (!exists $NamesAndValues{AtomIdentifierType}) { | |
229 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying AtomIdentifierType..."; | |
230 } | |
231 | |
232 # Make sure it's power of 2... | |
233 if (exists $NamesAndValues{Size}) { | |
234 if (!TextUtil::IsNumberPowerOfNumber($NamesAndValues{Size}, 2)) { | |
235 croak "Error: ${ClassName}->New: Specified size value, $NamesAndValues{Size}, must be power of 2..."; | |
236 } | |
237 } | |
238 | |
239 if ($This->{Type} =~ /^PathLengthBits$/i) { | |
240 $This->_InitializePathLengthBits(); | |
241 } | |
242 elsif ($This->{Type} =~ /^PathLengthCount$/i) { | |
243 $This->_InitializePathLengthCount(); | |
244 } | |
245 else { | |
246 croak "Error: ${ClassName}->_InitializePathLengthFingerprintsProperties: Unknown PathLength type: $This->{Type}; Supported PathLength type : PathLengthBits or PathLengthCount......"; | |
247 } | |
248 | |
249 return $This; | |
250 } | |
251 | |
252 # Initialize PathLength bits... | |
253 # | |
254 sub _InitializePathLengthBits { | |
255 my($This) = @_; | |
256 | |
257 # Vector type... | |
258 $This->{VectorType} = 'FingerprintsBitVector'; | |
259 | |
260 $This->_InitializeFingerprintsBitVector(); | |
261 | |
262 return $This; | |
263 } | |
264 | |
265 # Initialize PathLength key count... | |
266 # | |
267 sub _InitializePathLengthCount { | |
268 my($This) = @_; | |
269 | |
270 # Vector type and type of values... | |
271 $This->{VectorType} = 'FingerprintsVector'; | |
272 $This->{FingerprintsVectorType} = 'NumericalValues'; | |
273 | |
274 $This->_InitializeFingerprintsVector(); | |
275 | |
276 return $This; | |
277 } | |
278 | |
279 # Set type... | |
280 # | |
281 sub SetType { | |
282 my($This, $Type) = @_; | |
283 | |
284 if ($This->{Type}) { | |
285 croak "Error: ${ClassName}->SetType: Can't change type: It's already set..."; | |
286 } | |
287 | |
288 if ($Type =~ /^PathLengthBits$/i) { | |
289 $This->{Type} = 'PathLengthBits';; | |
290 } | |
291 elsif ($Type =~ /^PathLengthCount$/i) { | |
292 $This->{Type} = 'PathLengthCount';; | |
293 } | |
294 else { | |
295 croak "Error: ${ClassName}->SetType: Unknown PathLength keys: $Type; Supported PathLength types: PathLengthBits or PathLengthCount..."; | |
296 } | |
297 return $This; | |
298 } | |
299 | |
300 # Disable vector type change... | |
301 # | |
302 sub SetVectorType { | |
303 my($This, $Type) = @_; | |
304 | |
305 croak "Error: ${ClassName}->SetVectorType: Can't change vector type..."; | |
306 | |
307 return $This; | |
308 } | |
309 | |
310 # Disable vector type change... | |
311 # | |
312 sub SetFingerprintsVectorType { | |
313 my($This, $Type) = @_; | |
314 | |
315 croak "Error: ${ClassName}->SetFingerprintsVectorType: Can't change fingerprints vector type..."; | |
316 | |
317 return $This; | |
318 } | |
319 | |
320 # Set atom identifier type to use for path length atom identifiers... | |
321 # | |
322 sub SetAtomIdentifierType { | |
323 my($This, $IdentifierType) = @_; | |
324 | |
325 if ($IdentifierType !~ /^(AtomicInvariantsAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|FunctionalClassAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { | |
326 croak "Error: ${ClassName}->SetAtomIdentifierType: Specified value, $IdentifierType, for AtomIdentifierType is not vaild. Supported types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, and UFFAtomTypes."; | |
327 } | |
328 | |
329 if ($This->{AtomIdentifierType}) { | |
330 croak "Error: ${ClassName}->SetAtomIdentifierType: Can't change atom identifier type: It's already set..."; | |
331 } | |
332 | |
333 $This->{AtomIdentifierType} = $IdentifierType; | |
334 | |
335 # Initialize atom identifier type information... | |
336 $This->_InitializeAtomIdentifierTypeInformation(); | |
337 | |
338 return $This; | |
339 } | |
340 | |
341 # Set minimum path length... | |
342 # | |
343 sub SetMinLength { | |
344 my($This, $Value) = @_; | |
345 | |
346 if (!TextUtil::IsPositiveInteger($Value)) { | |
347 croak "Error: ${ClassName}->SetMinLength: MinLength value, $Value, is not valid: It must be a positive integer..."; | |
348 } | |
349 $This->{MinLength} = $Value; | |
350 | |
351 return $This; | |
352 } | |
353 | |
354 # Set maximum path length... | |
355 # | |
356 sub SetMaxLength { | |
357 my($This, $Value) = @_; | |
358 | |
359 if (!TextUtil::IsPositiveInteger($Value)) { | |
360 croak "Error: ${ClassName}->SetMaxLength: MaxLength value, $Value, is not valid: It must be a positive integer..."; | |
361 } | |
362 $This->{MaxLength} = $Value; | |
363 | |
364 return $This; | |
365 } | |
366 | |
367 # Set number of bits to set for each path... | |
368 # | |
369 sub SetNumOfBitsToSetPerPath { | |
370 my($This, $Value) = @_; | |
371 | |
372 if (!TextUtil::IsPositiveInteger($Value)) { | |
373 croak "Error: ${ClassName}->SetNumOfBitsToSetPerPath: NumOfBitsToSetPerPath value, $Value, is not valid: It must be a positive integer..."; | |
374 } | |
375 $This->{NumOfBitsToSetPerPath} = $Value; | |
376 | |
377 return $This; | |
378 } | |
379 | |
380 # Generate fingerprints description... | |
381 # | |
382 sub GetDescription { | |
383 my($This) = @_; | |
384 | |
385 # Is description explicity set? | |
386 if (exists $This->{Description}) { | |
387 return $This->{Description}; | |
388 } | |
389 | |
390 # Generate fingerprints description... | |
391 | |
392 return "$This->{Type}:$This->{AtomIdentifierType}:MinLength$This->{MinLength}:MaxLength$This->{MaxLength}"; | |
393 } | |
394 | |
395 # Generate path length fingerprints... | |
396 # | |
397 sub GenerateFingerprints { | |
398 my($This) = @_; | |
399 | |
400 if ($This->{MinLength} > $This->{MaxLength}) { | |
401 croak "Error: ${ClassName}->GenerateFingerprints: No fingerpritns generated: MinLength, $This->{MinLength}, must be <= MaxLength, $This->{MaxLength}..."; | |
402 } | |
403 | |
404 # Cache appropriate molecule data... | |
405 $This->_SetupMoleculeDataCache(); | |
406 | |
407 # Assign atom types to all atoms... | |
408 if (!$This->_AssignAtomTypes()) { | |
409 carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't assign valid $This->{AtomIdentifierType} to all atoms..."; | |
410 return $This; | |
411 } | |
412 | |
413 # Setup bond symbol map... | |
414 if ($This->{UseBondSymbols}) { | |
415 $This->_InitializeBondSymbols(); | |
416 } | |
417 | |
418 # Generate appropriate atom paths... | |
419 $This->_GenerateAtomPathsUpToMaxLength(); | |
420 | |
421 # Initialize atom path strings... | |
422 $This->_InitializeAtomPathsStrings(); | |
423 | |
424 # Generate appropriate atom path strings for unique atom paths... | |
425 $This->_GenerateAtomPathsStrings(); | |
426 | |
427 # Set final fingerprints... | |
428 $This->_SetFinalFingerprints(); | |
429 | |
430 # Clear cached molecule data... | |
431 $This->_ClearMoleculeDataCache(); | |
432 | |
433 return $This; | |
434 } | |
435 | |
436 # Assign appropriate atom types to all atoms... | |
437 # | |
438 sub _AssignAtomTypes { | |
439 my($This) = @_; | |
440 my($SpecifiedAtomTypes, $Atom, $AtomID, $IgnoreHydrogens); | |
441 | |
442 %{$This->{AssignedAtomTypes}} = (); | |
443 $IgnoreHydrogens = 0; | |
444 | |
445 $SpecifiedAtomTypes = undef; | |
446 | |
447 IDENTIFIERTYPE: { | |
448 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { | |
449 $SpecifiedAtomTypes = new AtomTypes::AtomicInvariantsAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'AtomicInvariantsToUse' => $This->{AtomicInvariantsToUse}); | |
450 last IDENTIFIERTYPE; | |
451 } | |
452 | |
453 if ($This->{AtomIdentifierType} =~ /^DREIDINGAtomTypes$/i) { | |
454 $SpecifiedAtomTypes = new AtomTypes::DREIDINGAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
455 last IDENTIFIERTYPE; | |
456 } | |
457 | |
458 if ($This->{AtomIdentifierType} =~ /^EStateAtomTypes$/i) { | |
459 $SpecifiedAtomTypes = new AtomTypes::EStateAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
460 last IDENTIFIERTYPE; | |
461 } | |
462 | |
463 if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { | |
464 $SpecifiedAtomTypes = new AtomTypes::FunctionalClassAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'FunctionalClassesToUse' => $This->{FunctionalClassesToUse}); | |
465 last IDENTIFIERTYPE; | |
466 } | |
467 | |
468 if ($This->{AtomIdentifierType} =~ /^MMFF94AtomTypes$/i) { | |
469 $SpecifiedAtomTypes = new AtomTypes::MMFF94AtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
470 last IDENTIFIERTYPE; | |
471 } | |
472 | |
473 if ($This->{AtomIdentifierType} =~ /^SLogPAtomTypes$/i) { | |
474 $SpecifiedAtomTypes = new AtomTypes::SLogPAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
475 last IDENTIFIERTYPE; | |
476 } | |
477 if ($This->{AtomIdentifierType} =~ /^SYBYLAtomTypes$/i) { | |
478 $SpecifiedAtomTypes = new AtomTypes::SYBYLAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
479 last IDENTIFIERTYPE; | |
480 } | |
481 | |
482 if ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) { | |
483 $SpecifiedAtomTypes = new AtomTypes::TPSAAtomTypes('Molecule' => $This->{Molecule}, 'IgnorePhosphorus' => 0, 'IgnoreSulfur' => 0); | |
484 last IDENTIFIERTYPE; | |
485 } | |
486 | |
487 if ($This->{AtomIdentifierType} =~ /^UFFAtomTypes$/i) { | |
488 $SpecifiedAtomTypes = new AtomTypes::UFFAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
489 last IDENTIFIERTYPE; | |
490 } | |
491 | |
492 croak "Error: ${ClassName}->_AssignAtomTypes: Unknown atom indentifier type $This->{AtomIdentifierType}..."; | |
493 } | |
494 | |
495 # Assign atom types... | |
496 $SpecifiedAtomTypes->AssignAtomTypes(); | |
497 | |
498 # Make sure atom types assignment is successful... | |
499 if (!$SpecifiedAtomTypes->IsAtomTypesAssignmentSuccessful()) { | |
500 return undef; | |
501 } | |
502 | |
503 # Collect assigned atom types... | |
504 ATOM: for $Atom (@{$This->{Atoms}}) { | |
505 $AtomID = $Atom->GetID(); | |
506 $This->{AssignedAtomTypes}{$AtomID} = $SpecifiedAtomTypes->GetAtomType($Atom); | |
507 } | |
508 | |
509 return $This; | |
510 } | |
511 | |
512 # Setup bond symbol map for atoms to speed up generation of path length identifiers | |
513 # during fingerprints generation... | |
514 # | |
515 sub _InitializeBondSymbols { | |
516 my($This) = @_; | |
517 my($Atom1, $Atom2, $AtomID1, $AtomID2, $Bond, $BondSymbol, $BondOrder); | |
518 | |
519 %{$This->{BondSymbols}} = (); | |
520 | |
521 if (!$This->{UseBondSymbols}) { | |
522 return $This; | |
523 } | |
524 | |
525 for $Bond ($This->{Molecule}->GetBonds()) { | |
526 $BondOrder = $Bond->GetBondOrder(); | |
527 $BondSymbol = $Bond->IsAromatic() ? ':' : (exists($This->{BondOrderToSymbol}{$BondOrder}) ? $This->{BondOrderToSymbol}{$BondOrder} : $BondOrder); | |
528 ($Atom1, $Atom2) = $Bond->GetAtoms(); | |
529 $AtomID1 = $Atom1->GetID(); $AtomID2 = $Atom2->GetID(); | |
530 if ($AtomID1 > $AtomID2) { | |
531 ($AtomID1, $AtomID2) = ($AtomID2, $AtomID1); | |
532 } | |
533 | |
534 if (!exists $This->{BondSymbols}{$AtomID1}) { | |
535 %{$This->{BondSymbols}{$AtomID1}} = (); | |
536 } | |
537 $This->{BondSymbols}{$AtomID1}{$AtomID2} = $BondSymbol; | |
538 } | |
539 return $This; | |
540 } | |
541 | |
542 # Get appropriate atom paths with length up to MaxLength... | |
543 # | |
544 sub _GenerateAtomPathsUpToMaxLength { | |
545 my($This) = @_; | |
546 my($PathLength, $AllowRings, $Molecule, $AtomPathsRef); | |
547 | |
548 $PathLength = $This->{MaxLength}; | |
549 $AllowRings = $This->{AllowRings}; | |
550 $Molecule = $This->{Molecule}; | |
551 | |
552 if ($This->{AllowSharedBonds}) { | |
553 $AtomPathsRef = $Molecule->GetAllAtomPathsWithLengthUpto($PathLength, $AllowRings); | |
554 } | |
555 else { | |
556 $AtomPathsRef = $Molecule->GetAtomPathsWithLengthUpto($PathLength, $AllowRings); | |
557 } | |
558 $This->{AtomPathsRef} = $AtomPathsRef; | |
559 | |
560 return $This; | |
561 } | |
562 | |
563 # Initialize atom paths strings at various pathlength levels... | |
564 # | |
565 sub _InitializeAtomPathsStrings { | |
566 my($This) = @_; | |
567 my($PathLength); | |
568 | |
569 %{$This->{AtomPathsStrings}} = (); | |
570 | |
571 for $PathLength ($This->{MinLength} .. $This->{MaxLength}) { | |
572 %{$This->{AtomPathsStrings}{$PathLength}} = (); | |
573 } | |
574 | |
575 return $This; | |
576 } | |
577 | |
578 # Generate appropriate atom path strings for unique atom paths... | |
579 # | |
580 sub _GenerateAtomPathsStrings { | |
581 my($This, $PathAtomsRef) = @_; | |
582 my($PathLength, $MinPathLength, $UseUniquePaths); | |
583 | |
584 $MinPathLength = $This->{MinLength}; | |
585 $UseUniquePaths = $This->{UseUniquePaths}; | |
586 | |
587 PATHATOMS: for $PathAtomsRef (@{$This->{AtomPathsRef}}) { | |
588 $PathLength = scalar @{$PathAtomsRef}; | |
589 if ($PathLength < $MinPathLength) { | |
590 next PATHATOMS; | |
591 } | |
592 if ($UseUniquePaths) { | |
593 $This->_GenerateAtomPathStringUsingUniquePath($PathAtomsRef); | |
594 } | |
595 else { | |
596 $This->_GenerateAtomPathString($PathAtomsRef); | |
597 } | |
598 } | |
599 return $This; | |
600 } | |
601 | |
602 # Generate atom path string using unique path... | |
603 # | |
604 sub _GenerateAtomPathStringUsingUniquePath { | |
605 my($This, $PathAtomsRef) = @_; | |
606 | |
607 if ($This->{AllowRings} && $This->_DoesAtomPathContainsCycle($PathAtomsRef)) { | |
608 $This->_GenerateAtomPathStringUsingUniquePathContainingCycle($PathAtomsRef); | |
609 } | |
610 else { | |
611 $This->_GenerateAtomPathStringUsingUniqueLinearPath($PathAtomsRef); | |
612 } | |
613 return $This; | |
614 } | |
615 | |
616 # Generate atom path string for specified path containing no cycle... | |
617 # | |
618 sub _GenerateAtomPathStringUsingUniqueLinearPath { | |
619 my($This, $PathAtomsRef) = @_; | |
620 | |
621 # Is it a unique linear atom path? | |
622 # | |
623 if (!$This->_IsUniqueLinearAtomPath($PathAtomsRef)) { | |
624 return $This; | |
625 } | |
626 $This->_GenerateAtomPathString($PathAtomsRef); | |
627 | |
628 return $This; | |
629 } | |
630 | |
631 # Is it a structurally unique linear path? | |
632 # | |
633 # For a path to be structurally unique, all of its atom IDs must be diffferent from any | |
634 # earlier path atom IDs. In order to generate atom path atom ID invariant of the atom | |
635 # order in the molecule, atom IDs are sorted numerically before generating the path ID. | |
636 # | |
637 # Notes: | |
638 # . Atom path ID doesn't reflect the order of atoms in the atom path. | |
639 # | |
640 sub _IsUniqueLinearAtomPath { | |
641 my($This, $PathAtomsRef) = @_; | |
642 my($AtomPathID, $PathLength, @PathAtomIDs); | |
643 | |
644 @PathAtomIDs = (); | |
645 @PathAtomIDs = map { $_->GetID(); } @{$PathAtomsRef}; | |
646 | |
647 $AtomPathID = join '-', sort { $a <=> $b } @PathAtomIDs; | |
648 if (exists $This->{UniqueLinearAtomPathsIDs}{$AtomPathID}) { | |
649 return 0; | |
650 } | |
651 | |
652 # It's a unique atom path... | |
653 $This->{UniqueLinearAtomPathsIDs}{$AtomPathID} = 1; | |
654 | |
655 return 1; | |
656 } | |
657 | |
658 # Generate atom path string for specified path containing a cycle... | |
659 # | |
660 sub _GenerateAtomPathStringUsingUniquePathContainingCycle { | |
661 my($This, $PathAtomsRef) = @_; | |
662 | |
663 # Is it a unique atom path containing a cycle? | |
664 # | |
665 if (!$This->_IsUniqueAtomPathContainingCycle($PathAtomsRef)) { | |
666 return $This; | |
667 } | |
668 | |
669 my($CycleClosingPathAtomIndex); | |
670 ($CycleClosingPathAtomIndex) = $This->_GetAtomPathCycleClosingAtomIndex($PathAtomsRef); | |
671 | |
672 if ($CycleClosingPathAtomIndex == 0) { | |
673 $This->_GenerateUniqueAtomPathStringForPathCycle($PathAtomsRef); | |
674 } | |
675 else { | |
676 $This->_GenerateUniqueAtomPathStringForPathContainingCycle($PathAtomsRef, $CycleClosingPathAtomIndex); | |
677 } | |
678 return $This; | |
679 } | |
680 | |
681 # Generate a unique atom path string for a cyclic path by generating atom path | |
682 # strings for all possible paths in the cycle and keeping the lexicographically smallest | |
683 # one. | |
684 # | |
685 # Although all the paths enumerated during atom path string generation are also | |
686 # present in the intial paths list, but structural uniqueness check would detect | |
687 # 'em earlier and this method ends being invoked only once for the first cyclic path. | |
688 # | |
689 # For atom paths containg same atom types and bond symbols, atom path strings | |
690 # would be same for the paths. | |
691 # | |
692 sub _GenerateUniqueAtomPathStringForPathCycle { | |
693 my($This, $PathAtomsRef) = @_; | |
694 | |
695 if ($This->_AreAllPathAtomsSymbolsSame($PathAtomsRef) && $This->_AreAllPathBondSymbolsSame($PathAtomsRef)) { | |
696 return $This->_GenerateAtomPathString($PathAtomsRef); | |
697 } | |
698 | |
699 # Generate all possible atom path strings and select the lexicographically smallest one... | |
700 my($Index, $PathLength, $FinalAtomPathString, $FirstAtomPathString, $LastIndex, $FirstPartIndex, $FirstPartStartIndex, $FirstPartEndIndex, $SecondPartIndex, $SecondPartStartIndex, $SecondPartEndIndex, $AtomPathSymbolsRef, $AtomPathString, $ReverseAtomPathString, @FirstPartPathAtoms, @SecondPartPathAtoms, @PathAtoms); | |
701 | |
702 $PathLength = scalar @{$PathAtomsRef}; | |
703 $LastIndex = $PathLength - 1; | |
704 | |
705 $FinalAtomPathString = ''; | |
706 $FirstAtomPathString = 1; | |
707 | |
708 @FirstPartPathAtoms = (); @SecondPartPathAtoms = (); @PathAtoms = (); | |
709 | |
710 for $Index (0 .. ($LastIndex - 1)) { | |
711 @FirstPartPathAtoms = (); @SecondPartPathAtoms = (); @PathAtoms = (); | |
712 | |
713 $FirstPartStartIndex = 0; $FirstPartEndIndex = $Index - 1; | |
714 $SecondPartStartIndex = $Index; $SecondPartEndIndex = $LastIndex - 1; | |
715 | |
716 # Get first part atoms... | |
717 for $FirstPartIndex ($FirstPartStartIndex .. $FirstPartEndIndex) { | |
718 push @FirstPartPathAtoms, $PathAtomsRef->[$FirstPartIndex]; | |
719 } | |
720 | |
721 # Get second part atoms... | |
722 for $SecondPartIndex ($SecondPartStartIndex .. $SecondPartEndIndex) { | |
723 push @SecondPartPathAtoms, $PathAtomsRef->[$SecondPartIndex]; | |
724 } | |
725 | |
726 # Get final list of path atoms... | |
727 if (@SecondPartPathAtoms) { | |
728 push @PathAtoms, @SecondPartPathAtoms; | |
729 } | |
730 if (@FirstPartPathAtoms) { | |
731 push @PathAtoms, @FirstPartPathAtoms; | |
732 } | |
733 | |
734 # Complete the cycle by adding first atom as the last atom... | |
735 push @PathAtoms, $PathAtomsRef->[$SecondPartStartIndex]; | |
736 | |
737 # Generate atom path string... | |
738 $AtomPathSymbolsRef = $This->_GenerateAtomPathSymbols(\@PathAtoms); | |
739 | |
740 $AtomPathString = join '', @{$AtomPathSymbolsRef}; | |
741 $ReverseAtomPathString = join '', reverse @{$AtomPathSymbolsRef}; | |
742 | |
743 if ($ReverseAtomPathString le $AtomPathString) { | |
744 $AtomPathString = $ReverseAtomPathString; | |
745 } | |
746 | |
747 # Update final atom path string... | |
748 | |
749 if ($FirstAtomPathString) { | |
750 $FirstAtomPathString = 0; | |
751 $FinalAtomPathString = $AtomPathString; | |
752 } | |
753 else { | |
754 if ($AtomPathString le $FinalAtomPathString) { | |
755 $FinalAtomPathString = $AtomPathString; | |
756 } | |
757 } | |
758 } | |
759 | |
760 # Set final atom path string... | |
761 # | |
762 if (exists $This->{AtomPathsStrings}{$PathLength}{$FinalAtomPathString}) { | |
763 $This->{AtomPathsStrings}{$PathLength}{$FinalAtomPathString} += 1; | |
764 } | |
765 else { | |
766 $This->{AtomPathsStrings}{$PathLength}{$FinalAtomPathString} = 1; | |
767 } | |
768 | |
769 return $This; | |
770 } | |
771 | |
772 # | |
773 # Generate a unique atom path string for paths containing a cycle closed by | |
774 # the specified atom index and the last atom index. | |
775 # | |
776 # The following methodology is used to generate atom path string which is | |
777 # independemt of initial atom ordering: | |
778 # . Generate atom paths string from first atom to the atom before the first cycle | |
779 # closing atom. | |
780 # . Generate atom path string from atoms from first cycle closing atom index to | |
781 # the last path atom in both forward and reverse order. And select the lexicographically | |
782 # smallest atom path string. | |
783 # . Combine atom path string generated in first step with second step to generate | |
784 # final atom path string. | |
785 # | |
786 sub _GenerateUniqueAtomPathStringForPathContainingCycle { | |
787 my($This, $PathAtomsRef, $CycleClosingAtomIndex) = @_; | |
788 my($Index, $PathLength, $LastIndex, $LinearPartStartIndex, $LinearPartEndIndex, $CyclicPartStartIndex, $CyclicPartEndIndex, $CyclicPartAtomPathSymbolsRef, $CyclicPartAtomPathString, $ReverseCyclicPartAtomPathString, $AtomPathString, $AtomPathSymbolsRef, @CyclicPartPathAtoms, @PathAtoms); | |
789 | |
790 $PathLength = scalar @{$PathAtomsRef}; | |
791 $LastIndex = $PathLength - 1; | |
792 | |
793 @PathAtoms = (); | |
794 | |
795 # Get path atoms corresponding to linear part of the path... | |
796 $LinearPartStartIndex = 0; $LinearPartEndIndex = $CycleClosingAtomIndex - 1; | |
797 | |
798 for $Index ($LinearPartStartIndex .. $LinearPartEndIndex) { | |
799 push @PathAtoms, $PathAtomsRef->[$Index]; | |
800 } | |
801 | |
802 # Get atoms correcponding to cyclic part of the path... | |
803 @CyclicPartPathAtoms = (); | |
804 $CyclicPartStartIndex = $CycleClosingAtomIndex; $CyclicPartEndIndex = $LastIndex; | |
805 | |
806 for $Index ($CyclicPartStartIndex .. $CyclicPartEndIndex) { | |
807 push @CyclicPartPathAtoms, $PathAtomsRef->[$Index]; | |
808 } | |
809 | |
810 # Setup a lexicographically smaller atom path string for cyclic part... | |
811 | |
812 $CyclicPartAtomPathSymbolsRef = $This->_GenerateAtomPathSymbols(\@CyclicPartPathAtoms); | |
813 $CyclicPartAtomPathString = join '', @{$CyclicPartAtomPathSymbolsRef}; | |
814 $ReverseCyclicPartAtomPathString = join '', reverse @{$CyclicPartAtomPathSymbolsRef}; | |
815 | |
816 # Setup atom path corresponding to linear part and lexigraphicall smaller cyclic part... | |
817 | |
818 if ($ReverseCyclicPartAtomPathString le $CyclicPartAtomPathString) { | |
819 push @PathAtoms, reverse @CyclicPartPathAtoms; | |
820 } | |
821 else { | |
822 push @PathAtoms, @CyclicPartPathAtoms; | |
823 } | |
824 | |
825 # Setup final atom path string... | |
826 | |
827 $AtomPathSymbolsRef = $This->_GenerateAtomPathSymbols(\@PathAtoms); | |
828 $AtomPathString = join '', @{$AtomPathSymbolsRef}; | |
829 | |
830 if (exists $This->{AtomPathsStrings}{$PathLength}{$AtomPathString}) { | |
831 $This->{AtomPathsStrings}{$PathLength}{$AtomPathString} += 1; | |
832 } | |
833 else { | |
834 $This->{AtomPathsStrings}{$PathLength}{$AtomPathString} = 1; | |
835 } | |
836 | |
837 return $This; | |
838 } | |
839 | |
840 # Does atom path contain a cycle? | |
841 # | |
842 # For an atom path to contain cycle, it must satisfy the following conditions: | |
843 # . Pathlength >= 3 | |
844 # . Last atom ID is equal to first atom ID or some other atom ID besides itself | |
845 # | |
846 sub _DoesAtomPathContainsCycle { | |
847 my($This, $PathAtomsRef) = @_; | |
848 my($PathLength); | |
849 | |
850 $PathLength = scalar @{$PathAtomsRef}; | |
851 if ($PathLength <= 2) { | |
852 return 0; | |
853 } | |
854 | |
855 my($AtomIndex, $LastAtomIndex, $Atom, $AtomID, $LastAtom, $LastAtomID); | |
856 | |
857 $LastAtomIndex = $PathLength - 1; | |
858 $LastAtom = $PathAtomsRef->[$LastAtomIndex]; | |
859 $LastAtomID = $LastAtom->GetID(); | |
860 | |
861 # Look for atomID similar to last atom ID... | |
862 for $AtomIndex (0 .. ($LastAtomIndex - 1)) { | |
863 $Atom = $PathAtomsRef->[$AtomIndex]; | |
864 $AtomID = $Atom->GetID(); | |
865 | |
866 if ($AtomID == $LastAtomID) { | |
867 # It's a cycle... | |
868 return 1; | |
869 } | |
870 } | |
871 return 0; | |
872 } | |
873 | |
874 # Get atom path cycle closing atom index... | |
875 # | |
876 sub _GetAtomPathCycleClosingAtomIndex { | |
877 my($This, $PathAtomsRef) = @_; | |
878 my($AtomIndex, $LastAtomIndex, $Atom, $AtomID, $LastAtom, $LastAtomID, $PathLength); | |
879 | |
880 $PathLength = scalar @{$PathAtomsRef}; | |
881 | |
882 $LastAtomIndex = $PathLength - 1; | |
883 $LastAtom = $PathAtomsRef->[$LastAtomIndex]; $LastAtomID = $LastAtom->GetID(); | |
884 | |
885 # Look for atomID similar to last atom ID... | |
886 for $AtomIndex (0 .. ($LastAtomIndex - 1)) { | |
887 $Atom = $PathAtomsRef->[$AtomIndex]; $AtomID = $Atom->GetID(); | |
888 | |
889 if ($AtomID == $LastAtomID) { | |
890 # It's a cycle closing atom... | |
891 return $AtomIndex; | |
892 } | |
893 } | |
894 return undef; | |
895 } | |
896 | |
897 # Is it a structurally unique path containing a cycle? | |
898 # | |
899 # For atom paths containing cycles, last atom ID is either equal to first atom ID or | |
900 # some other atom ID besides itself. | |
901 # | |
902 # In order to determine its structurally unqiue independent of initial atom ordering, | |
903 # the following methodolgy is used: | |
904 # | |
905 # . For paths with same first and atom IDs: | |
906 # . Remove the last atom ID from atom path | |
907 # . Sort atom IDs in the path | |
908 # . Add first atom ID from the sorted list to the end of list to complete the cycle | |
909 # . Generate a atom path ID | |
910 # . Use final path ID to track uniqueness of path containing cycle. | |
911 # | |
912 # . For paths with last atom ID equal to some other atom ID besidies itself: | |
913 # . Sort atom IDs in atom path | |
914 # . Generate atom path ID and use it to track unqiueness of atom paths. | |
915 # | |
916 sub _IsUniqueAtomPathContainingCycle { | |
917 my($This, $PathAtomsRef) = @_; | |
918 my($PathLength, $AtomPathID, $FirstAtom, $LastAtom, $FirstAtomID, $LastAtomID, @PathAtomIDs, @SortedPathAtomIDs); | |
919 | |
920 @PathAtomIDs = (); | |
921 @PathAtomIDs = map { $_->GetID(); } @{$PathAtomsRef}; | |
922 | |
923 $PathLength = scalar @{$PathAtomsRef}; | |
924 | |
925 $FirstAtom = $PathAtomsRef->[0]; $FirstAtomID = $FirstAtom->GetID(); | |
926 $LastAtom = $PathAtomsRef->[$PathLength - 1]; $LastAtomID = $LastAtom->GetID(); | |
927 | |
928 if ($FirstAtomID == $LastAtomID) { | |
929 pop @PathAtomIDs; | |
930 | |
931 @SortedPathAtomIDs = (); | |
932 @SortedPathAtomIDs = sort { $a <=> $b } @PathAtomIDs; | |
933 | |
934 push @SortedPathAtomIDs, $SortedPathAtomIDs[0]; | |
935 | |
936 $AtomPathID = join '-', @SortedPathAtomIDs; | |
937 } | |
938 else { | |
939 $AtomPathID = join '-', sort { $a <=> $b } @PathAtomIDs; | |
940 } | |
941 | |
942 if (exists $This->{UniqueCyclicAtomPathsIDs}{$AtomPathID}) { | |
943 return 0; | |
944 } | |
945 | |
946 # It's a unique atom path containing a cycle... | |
947 $This->{UniqueCyclicAtomPathsIDs}{$AtomPathID} = 1; | |
948 | |
949 return 1; | |
950 } | |
951 | |
952 # Generate atom path string for specified atom path... | |
953 # | |
954 sub _GenerateAtomPathString { | |
955 my($This, $PathAtomsRef) = @_; | |
956 my($PathLength, $AtomPathString, $ReverseAtomPathString, $AtomPathSymbolsRef); | |
957 | |
958 $PathLength = scalar @{$PathAtomsRef}; | |
959 | |
960 # Generate path atom and bond symbols... | |
961 # | |
962 $AtomPathSymbolsRef = $This->_GenerateAtomPathSymbols($PathAtomsRef); | |
963 | |
964 # Check presence of path using path ID created by atom path symbols... | |
965 $AtomPathString = join '', @{$AtomPathSymbolsRef}; | |
966 if (exists $This->{AtomPathsStrings}{$PathLength}{$AtomPathString}) { | |
967 $This->{AtomPathsStrings}{$PathLength}{$AtomPathString} += 1; | |
968 return $This; | |
969 } | |
970 | |
971 # Check presence of reverse path using path ID created by atom path symbols... | |
972 # | |
973 $ReverseAtomPathString = join '', reverse @{$AtomPathSymbolsRef}; | |
974 if (exists $This->{AtomPathsStrings}{$PathLength}{$ReverseAtomPathString}) { | |
975 $This->{AtomPathsStrings}{$PathLength}{$ReverseAtomPathString} += 1; | |
976 return $This; | |
977 } | |
978 | |
979 # Use lexicographically smaller atom path string as PathID... | |
980 # | |
981 if ($AtomPathString le $ReverseAtomPathString) { | |
982 $This->{AtomPathsStrings}{$PathLength}{$AtomPathString} = 1; | |
983 } | |
984 else { | |
985 $This->{AtomPathsStrings}{$PathLength}{$ReverseAtomPathString} = 1; | |
986 } | |
987 return $This; | |
988 } | |
989 | |
990 # Are atom types for all path atoms same? | |
991 # | |
992 sub _AreAllPathAtomsSymbolsSame { | |
993 my($This, $PathAtomsRef) = @_; | |
994 my($Index, $Atom, $AtomID, $AtomType, $FirstAtomType); | |
995 | |
996 $Atom = $PathAtomsRef->[0]; $AtomID = $Atom->GetID(); | |
997 $FirstAtomType = $This->{AssignedAtomTypes}{$AtomID}; | |
998 | |
999 for $Index (1 .. $#{$PathAtomsRef}) { | |
1000 $Atom = $PathAtomsRef->[$Index]; $AtomID = $Atom->GetID(); | |
1001 $AtomType = $This->{AssignedAtomTypes}{$AtomID}; | |
1002 | |
1003 if ($AtomType ne $FirstAtomType) { | |
1004 return 0; | |
1005 } | |
1006 } | |
1007 return 1; | |
1008 } | |
1009 | |
1010 # Are bond symbols for all path bonds same? | |
1011 # | |
1012 sub _AreAllPathBondSymbolsSame { | |
1013 my($This, $PathAtomsRef) = @_; | |
1014 my($Index, $Atom, $BondedAtom, $AtomID, $BondedAtomID, $BondAtomID1, $BondAtomID2, $FirstBondSymbol, $BondSymbol); | |
1015 | |
1016 # During no usage of bond symbols, just ignore them and assume they are same... | |
1017 if (!$This->{UseBondSymbols}) { | |
1018 return 1; | |
1019 } | |
1020 | |
1021 $Atom = $PathAtomsRef->[0]; $BondedAtom = $PathAtomsRef->[1]; | |
1022 $AtomID = $Atom->GetID(); $BondedAtomID = $BondedAtom->GetID(); | |
1023 | |
1024 ($BondAtomID1, $BondAtomID2) = ($AtomID < $BondedAtomID) ? ($AtomID, $BondedAtomID) : ($BondedAtomID, $AtomID); | |
1025 $FirstBondSymbol = $This->{BondSymbols}{$BondAtomID1}{$BondAtomID2}; | |
1026 | |
1027 for $Index (1 .. ($#{$PathAtomsRef} - 1)) { | |
1028 $Atom = $PathAtomsRef->[$Index]; $BondedAtom = $PathAtomsRef->[$Index + 1]; | |
1029 $AtomID = $Atom->GetID(); $BondedAtomID = $BondedAtom->GetID(); | |
1030 | |
1031 ($BondAtomID1, $BondAtomID2) = ($AtomID < $BondedAtomID) ? ($AtomID, $BondedAtomID) : ($BondedAtomID, $AtomID); | |
1032 $BondSymbol = $This->{BondSymbols}{$BondAtomID1}{$BondAtomID2}; | |
1033 | |
1034 if ($BondSymbol ne $FirstBondSymbol) { | |
1035 return 0; | |
1036 } | |
1037 } | |
1038 return 1; | |
1039 } | |
1040 | |
1041 # Generate atom path symbols... | |
1042 # | |
1043 sub _GenerateAtomPathSymbols { | |
1044 my($This, $PathAtomsRef) = @_; | |
1045 my($Atom, $AtomID, @AtomPathSymbols); | |
1046 | |
1047 @AtomPathSymbols = (); | |
1048 | |
1049 if (@{$PathAtomsRef} == 1) { | |
1050 $Atom = $PathAtomsRef->[0]; $AtomID = $Atom->GetID(); | |
1051 push @AtomPathSymbols, $This->{AssignedAtomTypes}{$AtomID}; | |
1052 return \@AtomPathSymbols; | |
1053 } | |
1054 | |
1055 # Ignore bond information... | |
1056 if (!$This->{UseBondSymbols}) { | |
1057 for $Atom (@{$PathAtomsRef}) { | |
1058 $AtomID = $Atom->GetID(); | |
1059 push @AtomPathSymbols, $This->{AssignedAtomTypes}{$AtomID}; | |
1060 } | |
1061 return \@AtomPathSymbols; | |
1062 } | |
1063 | |
1064 # Use atoms and bonds to generate atom path string... | |
1065 my($Index, $BondedAtom, $BondedAtomID, $BondAtomID1, $BondAtomID2); | |
1066 | |
1067 # Process atom type of first atom in path... | |
1068 $Atom = $PathAtomsRef->[0]; $AtomID = $Atom->GetID(); | |
1069 push @AtomPathSymbols, $This->{AssignedAtomTypes}{$AtomID}; | |
1070 | |
1071 for $Index (0 .. ($#{$PathAtomsRef} - 1)) { | |
1072 $Atom = $PathAtomsRef->[$Index]; $BondedAtom = $PathAtomsRef->[$Index + 1]; | |
1073 $AtomID = $Atom->GetID(); $BondedAtomID = $BondedAtom->GetID(); | |
1074 | |
1075 ($BondAtomID1, $BondAtomID2) = ($AtomID < $BondedAtomID) ? ($AtomID, $BondedAtomID) : ($BondedAtomID, $AtomID); | |
1076 push @AtomPathSymbols, $This->{BondSymbols}{$BondAtomID1}{$BondAtomID2}; | |
1077 | |
1078 # Process atom type of next atom in path... | |
1079 push @AtomPathSymbols, $This->{AssignedAtomTypes}{$BondedAtomID}; | |
1080 } | |
1081 return \@AtomPathSymbols; | |
1082 } | |
1083 | |
1084 # Set final fingerprits... | |
1085 # | |
1086 sub _SetFinalFingerprints { | |
1087 my($This) = @_; | |
1088 | |
1089 # Mark successful generation of fingerprints... | |
1090 $This->{FingerprintsGenerated} = 1; | |
1091 | |
1092 if ($This->{Type} =~ /^PathLengthBits$/i) { | |
1093 $This->_SetFinalFingerprintsBitVector(); | |
1094 } | |
1095 elsif ($This->{Type} =~ /^PathLengthCount$/i) { | |
1096 $This->_SetFinalFingerprintsVector(); | |
1097 } | |
1098 | |
1099 return $This; | |
1100 } | |
1101 | |
1102 # Set final fingerprits bit vector... | |
1103 # | |
1104 sub _SetFinalFingerprintsBitVector { | |
1105 my($This) = @_; | |
1106 my($PathLength, $Size, $AtomPathString, $AtomPathHashCode, $AtomPathBitPos, $FingerprintsBitVector, $SkipBitPosCheck, $NumOfBitsToSetPerPath, $SetBitNum); | |
1107 | |
1108 $FingerprintsBitVector = $This->{FingerprintsBitVector}; | |
1109 | |
1110 $Size = $This->{Size}; | |
1111 | |
1112 $SkipBitPosCheck = 1; | |
1113 $NumOfBitsToSetPerPath = $This->{NumOfBitsToSetPerPath}; | |
1114 | |
1115 for $PathLength (keys %{$This->{AtomPathsStrings}}) { | |
1116 for $AtomPathString (keys %{$This->{AtomPathsStrings}{$PathLength}}) { | |
1117 $AtomPathHashCode = TextUtil::HashCode($AtomPathString); | |
1118 | |
1119 # Set random number seed... | |
1120 if ($This->{UsePerlCoreRandom}) { | |
1121 CORE::srand($AtomPathHashCode); | |
1122 } | |
1123 else { | |
1124 MathUtil::srandom($AtomPathHashCode); | |
1125 } | |
1126 | |
1127 for $SetBitNum (1 .. $NumOfBitsToSetPerPath) { | |
1128 $AtomPathBitPos = $This->{UsePerlCoreRandom} ? int(CORE::rand($Size)) : int(MathUtil::random($Size)); | |
1129 $FingerprintsBitVector->SetBit($AtomPathBitPos, $SkipBitPosCheck); | |
1130 } | |
1131 } | |
1132 } | |
1133 return $This; | |
1134 } | |
1135 | |
1136 # Set final fingerprits vector... | |
1137 # | |
1138 sub _SetFinalFingerprintsVector { | |
1139 my($This) = @_; | |
1140 my($PathLength, $AtomPathString, $FingerprintsVector, $AtomPathCount, @Values, @ValueIDs); | |
1141 | |
1142 @Values = (); | |
1143 @ValueIDs = (); | |
1144 | |
1145 for $PathLength (sort { $a <=> $b } keys %{$This->{AtomPathsStrings}}) { | |
1146 for $AtomPathString (sort keys %{$This->{AtomPathsStrings}{$PathLength}}) { | |
1147 $AtomPathCount = $This->{AtomPathsStrings}{$PathLength}{$AtomPathString}; | |
1148 | |
1149 push @Values, $AtomPathCount; | |
1150 push @ValueIDs, $AtomPathString; | |
1151 } | |
1152 } | |
1153 | |
1154 # Add PathLengthIDs and values to fingerprint vector... | |
1155 $This->{FingerprintsVector}->AddValueIDs(\@ValueIDs); | |
1156 $This->{FingerprintsVector}->AddValues(\@Values); | |
1157 | |
1158 return $This; | |
1159 } | |
1160 | |
1161 # Cache appropriate molecule data... | |
1162 # | |
1163 sub _SetupMoleculeDataCache { | |
1164 my($This) = @_; | |
1165 | |
1166 # Get all atoms... | |
1167 @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms(); | |
1168 | |
1169 return $This; | |
1170 } | |
1171 | |
1172 # Clear cached molecule data... | |
1173 # | |
1174 sub _ClearMoleculeDataCache { | |
1175 my($This) = @_; | |
1176 | |
1177 # Clear atoms... | |
1178 @{$This->{Atoms}} = (); | |
1179 | |
1180 # Clear path atoms.. | |
1181 $This->{AtomPathsRef} = ''; | |
1182 | |
1183 return $This; | |
1184 } | |
1185 | |
1186 # Set atomic invariants to use atom identifiers... | |
1187 # | |
1188 sub SetAtomicInvariantsToUse { | |
1189 my($This, @Values) = @_; | |
1190 my($FirstValue, $TypeOfFirstValue, $AtomicInvariant, $SpecifiedAtomicInvariant, $AtomicInvariantValue, @SpecifiedAtomicInvariants, @AtomicInvariantsToUse); | |
1191 | |
1192 if (!@Values) { | |
1193 carp "Warning: ${ClassName}->SetAtomicInvariantsToUse: No values specified..."; | |
1194 return; | |
1195 } | |
1196 | |
1197 $FirstValue = $Values[0]; | |
1198 $TypeOfFirstValue = ref $FirstValue; | |
1199 | |
1200 @SpecifiedAtomicInvariants = (); | |
1201 @AtomicInvariantsToUse = (); | |
1202 | |
1203 if ($TypeOfFirstValue =~ /^ARRAY/) { | |
1204 push @SpecifiedAtomicInvariants, @{$FirstValue}; | |
1205 } | |
1206 else { | |
1207 push @SpecifiedAtomicInvariants, @Values; | |
1208 } | |
1209 | |
1210 # Make sure specified AtomicInvariants are valid... | |
1211 for $SpecifiedAtomicInvariant (@SpecifiedAtomicInvariants) { | |
1212 if (!AtomTypes::AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($SpecifiedAtomicInvariant)) { | |
1213 croak "Error: ${ClassName}->SetAtomicInvariantsToUse: Specified atomic invariant, $SpecifiedAtomicInvariant, is not supported...\n "; | |
1214 } | |
1215 $AtomicInvariant = $SpecifiedAtomicInvariant; | |
1216 push @AtomicInvariantsToUse, $AtomicInvariant; | |
1217 } | |
1218 | |
1219 # Set atomic invariants to use... | |
1220 @{$This->{AtomicInvariantsToUse}} = (); | |
1221 push @{$This->{AtomicInvariantsToUse}}, @AtomicInvariantsToUse; | |
1222 | |
1223 return $This; | |
1224 } | |
1225 | |
1226 # Set functional classes to use for atom identifiers... | |
1227 # | |
1228 sub SetFunctionalClassesToUse { | |
1229 my($This, @Values) = @_; | |
1230 my($FirstValue, $TypeOfFirstValue, $FunctionalClass, $SpecifiedFunctionalClass, @SpecifiedFunctionalClasses, @FunctionalClassesToUse); | |
1231 | |
1232 if (!@Values) { | |
1233 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: No values specified..."; | |
1234 return; | |
1235 } | |
1236 | |
1237 if ($This->{AtomIdentifierType} !~ /^FunctionalClassAtomTypes$/i) { | |
1238 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: FunctionalClassesToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}..."; | |
1239 return; | |
1240 } | |
1241 | |
1242 $FirstValue = $Values[0]; | |
1243 $TypeOfFirstValue = ref $FirstValue; | |
1244 | |
1245 @SpecifiedFunctionalClasses = (); | |
1246 @FunctionalClassesToUse = (); | |
1247 | |
1248 if ($TypeOfFirstValue =~ /^ARRAY/) { | |
1249 push @SpecifiedFunctionalClasses, @{$FirstValue}; | |
1250 } | |
1251 else { | |
1252 push @SpecifiedFunctionalClasses, @Values; | |
1253 } | |
1254 | |
1255 # Make sure specified FunctionalClasses are valid... | |
1256 for $SpecifiedFunctionalClass (@SpecifiedFunctionalClasses) { | |
1257 if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedFunctionalClass)) { | |
1258 croak "Error: ${ClassName}->SetFunctionalClassesToUse: Specified functional class, $SpecifiedFunctionalClass, is not supported...\n "; | |
1259 } | |
1260 push @FunctionalClassesToUse, $SpecifiedFunctionalClass; | |
1261 } | |
1262 | |
1263 # Set functional classes to use... | |
1264 @{$This->{FunctionalClassesToUse}} = (); | |
1265 push @{$This->{FunctionalClassesToUse}}, @FunctionalClassesToUse; | |
1266 | |
1267 return $This; | |
1268 } | |
1269 | |
1270 # Initialize atom indentifier type information... | |
1271 # | |
1272 # Current supported values: | |
1273 # | |
1274 # AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, | |
1275 # MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes | |
1276 # | |
1277 sub _InitializeAtomIdentifierTypeInformation { | |
1278 my($This) = @_; | |
1279 | |
1280 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { | |
1281 $This->_InitializeAtomicInvariantsAtomTypesInformation(); | |
1282 } | |
1283 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { | |
1284 $This->_InitializeFunctionalClassAtomTypesInformation(); | |
1285 } | |
1286 elsif ($This->{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { | |
1287 # Nothing to do for now... | |
1288 } | |
1289 else { | |
1290 croak "Error: ${ClassName}->_InitializeAtomIdentifierTypeInformation: Unknown atom indentifier type $This->{AtomIdentifierType}..."; | |
1291 } | |
1292 | |
1293 return $This; | |
1294 } | |
1295 | |
1296 # Initialize atomic invariants atom types to use for generating atom identifiers... | |
1297 # | |
1298 # Let: | |
1299 # AS = Atom symbol corresponding to element symbol | |
1300 # | |
1301 # X<n> = Number of non-hydrogen atom neighbors or heavy atoms attached to atom | |
1302 # BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms attached to atom | |
1303 # LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms attached to atom | |
1304 # SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms attached to atom | |
1305 # DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms attached to atom | |
1306 # TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms attached to atom | |
1307 # H<n> = Number of implicit and explicit hydrogens for atom | |
1308 # Ar = Aromatic annotation indicating whether atom is aromatic | |
1309 # RA = Ring atom annotation indicating whether atom is a ring | |
1310 # FC<+n/-n> = Formal charge assigned to atom | |
1311 # MN<n> = Mass number indicating isotope other than most abundant isotope | |
1312 # SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or 3 (triplet) | |
1313 # | |
1314 # Then: | |
1315 # | |
1316 # Atom type generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to: | |
1317 # | |
1318 # AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n> | |
1319 # | |
1320 # Except for AS which is a required atomic invariant in atom types, all other atomic invariants are | |
1321 # optional. Default atomic invariants used for AtomID are: AS, X<n>, BO<n>, H<n>, FC<+n/-n>. | |
1322 # AtomID specification doesn't include atomic invariants with zero or undefined values. | |
1323 # | |
1324 sub _InitializeAtomicInvariantsAtomTypesInformation { | |
1325 my($This) = @_; | |
1326 | |
1327 # Default atomic invariants to use for generating atom neighborhood atom IDs: AS, X, BO, H, FC | |
1328 # | |
1329 @{$This->{AtomicInvariantsToUse}} = (); | |
1330 @{$This->{AtomicInvariantsToUse}} = ('AS', 'X', 'BO', 'H', 'FC'); | |
1331 | |
1332 return $This; | |
1333 } | |
1334 | |
1335 # Initialize functional class atom types, generated by AtomTypes::FunctionalClassAtomTypes | |
1336 # class, to use for generating atom identifiers... | |
1337 # | |
1338 # Let: | |
1339 # HBD: HydrogenBondDonor | |
1340 # HBA: HydrogenBondAcceptor | |
1341 # PI : PositivelyIonizable | |
1342 # NI : NegativelyIonizable | |
1343 # Ar : Aromatic | |
1344 # Hal : Halogen | |
1345 # H : Hydrophobic | |
1346 # RA : RingAtom | |
1347 # CA : ChainAtom | |
1348 # | |
1349 # Then: | |
1350 # | |
1351 # Functiononal class atom type specification for an atom corresponds to: | |
1352 # | |
1353 # Ar.CA.H.HBA.HBD.Hal.NI.PI.RA | |
1354 # | |
1355 # Default functional classes used are: HBD, HBA, PI, NI, Ar, Hal | |
1356 # | |
1357 # FunctionalAtomTypes are assigned using the following definitions [ Ref 60-61, Ref 65-66 ]: | |
1358 # | |
1359 # HydrogenBondDonor: NH, NH2, OH | |
1360 # HydrogenBondAcceptor: N[!H], O | |
1361 # PositivelyIonizable: +, NH2 | |
1362 # NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH | |
1363 # | |
1364 sub _InitializeFunctionalClassAtomTypesInformation { | |
1365 my($This) = @_; | |
1366 | |
1367 # Default functional class atom typess to use for generating atom identifiers | |
1368 # are: HBD, HBA, PI, NI, Ar, Hal | |
1369 # | |
1370 @{$This->{FunctionalClassesToUse}} = (); | |
1371 @{$This->{FunctionalClassesToUse}} = ('HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal'); | |
1372 | |
1373 return $This; | |
1374 } | |
1375 | |
1376 # Return a string containg data for PathLengthFingerprints object... | |
1377 # | |
1378 sub StringifyPathLengthFingerprints { | |
1379 my($This) = @_; | |
1380 my($PathLengthsFingerprintsString); | |
1381 | |
1382 # Type of fingerprint... | |
1383 $PathLengthsFingerprintsString = "Fingerprint type: $This->{Type}; AtomIdentifierType: $This->{AtomIdentifierType}"; | |
1384 | |
1385 # Path length... | |
1386 $PathLengthsFingerprintsString .= "; MinPathLength: $This->{MinLength}; MaxPathLength: $This->{MaxLength}"; | |
1387 | |
1388 # Fingerprint generation control... | |
1389 my($AllowSharedBonds, $AllowRings, $UseBondSymbols, $UseUniquePaths); | |
1390 | |
1391 $AllowSharedBonds = $This->{AllowSharedBonds} ? "Yes" : "No"; | |
1392 $AllowRings = $This->{AllowRings} ? "Yes" : "No"; | |
1393 $UseBondSymbols = $This->{UseBondSymbols} ? "Yes" : "No"; | |
1394 $UseUniquePaths = $This->{UseBondSymbols} ? "Yes" : "No"; | |
1395 | |
1396 $PathLengthsFingerprintsString .= "; UseUniquePaths: $UseUniquePaths; AllowSharedBonds: $AllowSharedBonds; AllowRings: $AllowRings; UseBondSymbols: $UseBondSymbols"; | |
1397 | |
1398 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { | |
1399 my($AtomicInvariant, @AtomicInvariants, @AtomicInvariantsOrder, %AvailableAtomicInvariants); | |
1400 | |
1401 @AtomicInvariantsOrder = AtomTypes::AtomicInvariantsAtomTypes::GetAtomicInvariantsOrder(); | |
1402 %AvailableAtomicInvariants = AtomTypes::AtomicInvariantsAtomTypes::GetAvailableAtomicInvariants(); | |
1403 | |
1404 for $AtomicInvariant (@AtomicInvariantsOrder) { | |
1405 push @AtomicInvariants, "$AtomicInvariant: $AvailableAtomicInvariants{$AtomicInvariant}"; | |
1406 } | |
1407 | |
1408 $PathLengthsFingerprintsString .= "; AtomicInvariantsToUse: <" . TextUtil::JoinWords(\@{$This->{AtomicInvariantsToUse}}, ", ", 0) . ">"; | |
1409 $PathLengthsFingerprintsString .= "; AtomicInvariantsOrder: <" . TextUtil::JoinWords(\@AtomicInvariantsOrder, ", ", 0) . ">"; | |
1410 $PathLengthsFingerprintsString .= "; AvailableAtomicInvariants: <" . TextUtil::JoinWords(\@AtomicInvariants, ", ", 0) . ">"; | |
1411 } | |
1412 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { | |
1413 my($FunctionalClass, @FunctionalClasses, @FunctionalClassesOrder, %AvailableFunctionalClasses); | |
1414 | |
1415 @FunctionalClassesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder(); | |
1416 %AvailableFunctionalClasses = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses(); | |
1417 | |
1418 for $FunctionalClass (@FunctionalClassesOrder) { | |
1419 push @FunctionalClasses, "$FunctionalClass: $AvailableFunctionalClasses{$FunctionalClass}"; | |
1420 } | |
1421 | |
1422 $PathLengthsFingerprintsString .= "; FunctionalClassesToUse: <" . TextUtil::JoinWords(\@{$This->{FunctionalClassesToUse}}, ", ", 0) . ">"; | |
1423 $PathLengthsFingerprintsString .= "; FunctionalClassesOrder: <" . TextUtil::JoinWords(\@FunctionalClassesOrder, ", ", 0) . ">"; | |
1424 $PathLengthsFingerprintsString .= "; AvailableFunctionalClasses: <" . TextUtil::JoinWords(\@FunctionalClasses, ", ", 0) . ">"; | |
1425 } | |
1426 | |
1427 if ($This->{Type} =~ /^PathLengthBits$/i) { | |
1428 # Size... | |
1429 $PathLengthsFingerprintsString .= "; Size: $This->{Size}; MinSize: $This->{MinSize}; MaxSize: $This->{MaxSize}"; | |
1430 | |
1431 # NumOfBitsToSetPerPath... | |
1432 $PathLengthsFingerprintsString .= "; NumOfBitsToSetPerPath: $This->{NumOfBitsToSetPerPath}"; | |
1433 | |
1434 # Fingerprint bit density and num of bits set... | |
1435 my($NumOfSetBits, $BitDensity); | |
1436 $NumOfSetBits = $This->{FingerprintsBitVector}->GetNumOfSetBits(); | |
1437 $BitDensity = $This->{FingerprintsBitVector}->GetFingerprintsBitDensity(); | |
1438 $PathLengthsFingerprintsString .= "; NumOfOnBits: $NumOfSetBits; BitDensity: $BitDensity"; | |
1439 | |
1440 $PathLengthsFingerprintsString .= "; FingerprintsBitVector: < $This->{FingerprintsBitVector} >"; | |
1441 } | |
1442 elsif ($This->{Type} =~ /^PathLengthCount$/i) { | |
1443 $PathLengthsFingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >"; | |
1444 } | |
1445 | |
1446 return $PathLengthsFingerprintsString; | |
1447 } | |
1448 | |
1449 1; | |
1450 | |
1451 __END__ | |
1452 | |
1453 =head1 NAME | |
1454 | |
1455 PathLengthFingerprints | |
1456 | |
1457 =head1 SYNOPSIS | |
1458 | |
1459 use Fingerprints::PathLengthFingerprints; | |
1460 | |
1461 use Fingerprints::PathLengthFingerprints qw(:all); | |
1462 | |
1463 =head1 DESCRIPTION | |
1464 | |
1465 B<PathLengthFingerprints> class provides the following methods: | |
1466 | |
1467 new, GenerateFingerprints, , GetDescription, SetAtomIdentifierType, | |
1468 SetAtomicInvariantsToUse, SetFunctionalClassesToUse, SetMaxLength, | |
1469 SetMinLength, SetNumOfBitsToSetPerPath, SetType, | |
1470 StringifyPathLengthFingerprints | |
1471 | |
1472 B<PathLengthFingerprints> is derived from B<Fingerprints> class which in turn | |
1473 is derived from B<ObjectProperty> base class that provides methods not explicitly defined | |
1474 in B<PathLengthFingerprints>, B<Fingerprints> or B<ObjectProperty> classes using Perl's | |
1475 AUTOLOAD functionality. These methods are generated on-the-fly for a specified object property: | |
1476 | |
1477 Set<PropertyName>(<PropertyValue>); | |
1478 $PropertyValue = Get<PropertyName>(); | |
1479 Delete<PropertyName>(); | |
1480 | |
1481 The current release of MayaChemTools supports generation of B<AtomTypesFingerpritns> | |
1482 corresponding to following B<AtomtomIdentifierTypes>: | |
1483 | |
1484 AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, | |
1485 FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, | |
1486 SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes | |
1487 | |
1488 Based on the values specified for B<Type>, B<AtomtomIdentifierTypes>, B<MinPathLength> and | |
1489 B<MaxPathLength>, all appropriate atom paths are generated for each atom in the molecule | |
1490 and collected in a list and the list is filtered to remove any structurally duplicate paths as | |
1491 indicated by the value of B<UseUniquePaths>. | |
1492 | |
1493 For molecules containing rings, atom paths starting from each atom can be traversed in four | |
1494 different ways: | |
1495 | |
1496 o Atom paths without any rings and sharing of bonds in traversed paths. | |
1497 o Atom paths containing rings and without any sharing of bonds in | |
1498 traversed paths | |
1499 o All possible atom paths without any rings and sharing of bonds in | |
1500 traversed paths | |
1501 o All possible atom paths containing rings and with sharing of bonds in | |
1502 traversed paths. | |
1503 | |
1504 Atom path traversal is terminated at the last ring atom. For molecules containing no rings, | |
1505 first two and last two types described above are equivalent. | |
1506 | |
1507 B<AllowSharedBonds> and B<AllowRings> allow generation of different types of paths | |
1508 to be used for fingerprints generation. | |
1509 | |
1510 The combination of B<AllowSharedBonds>, B<AllowRings>, and B<UseBondSymbols> allows generation of | |
1511 8 different types of path length fingerprints: | |
1512 | |
1513 AllowSharedBonds AllowRings UseBondSymbols | |
1514 | |
1515 0 0 1 - AtomPathsNoCyclesWithBondSymbols | |
1516 0 1 1 - AtomPathsWithCyclesWithBondSymbols | |
1517 | |
1518 1 0 1 - AllAtomPathsNoCyclesWithBondSymbols | |
1519 1 1 1 - AllAtomPathsWithCyclesWithBondSymbols | |
1520 [ DEFAULT ] | |
1521 | |
1522 0 0 0 - AtomPathsNoCyclesNoBondSymbols | |
1523 0 1 0 - AtomPathsWithCyclesNoBondSymbols | |
1524 | |
1525 1 0 0 - AllAtomPathsNoCyclesNoBondSymbols | |
1526 1 1 0 - AllAtomPathsWithCyclesNoWithBondSymbols | |
1527 | |
1528 Additionally, possible values for option B<--AtomIdentifierType> in conjunction with corresponding | |
1529 specified values for B<AtomicInvariantsToUse> and B<FunctionalClassesToUse > changes the nature | |
1530 of atom path length strings and the fingerprints. | |
1531 | |
1532 For each atom path in the filtered atom paths list, an atom path string is created using value of | |
1533 B<AtomIdentifierType> and specified values to use for a particular atom identifier type. | |
1534 Value of B<UseBondSymbols> controls whether bond order symbols are used during generation | |
1535 of atom path string. Atom symbol corresponds to element symbol and characters used to represent | |
1536 bond order are: I<1 - None; 2 - '='; 3 - '#'; 1.5 or aromatic - ':'; others: bond order value>. By default, | |
1537 bond symbols are included in atom path strings. Exclusion of bond symbols in atom path strings | |
1538 results in fingerprints which correspond purely to atom paths without considering bonds. | |
1539 | |
1540 B<UseUniquePaths> controls the removal of structurally duplicate atom path strings are removed | |
1541 from the list. | |
1542 | |
1543 For I<PathLengthBits> value of B<Type>, each atom path is hashed to a 32 bit unsigned | |
1544 integer key using B<TextUtil::HashCode> function. Using the hash key as a seed for a random number | |
1545 generator, a random integer value between 0 and B<Size> is used to set corresponding bits | |
1546 in the fingerprint bit-vector string. Value of B<NumOfBitsToSetPerPaths> option controls the number | |
1547 of time a random number is generated to set corresponding bits. | |
1548 | |
1549 For I< PathLengthCount> value of B<Type>n, the number of times an atom path appears | |
1550 is tracked and a fingerprints count-string corresponding to count of atom paths is generated. | |
1551 | |
1552 The current release of MayaChemTools generates the following types of path length | |
1553 fingerprints bit-vector and vector strings: | |
1554 | |
1555 FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes:MinLeng | |
1556 th1:MaxLength8;1024;BinaryString;Ascending;001000010011010101011000110 | |
1557 0100010101011000101001011100110001000010001001101000001001001001001000 | |
1558 0010110100000111001001000001001010100100100000000011000000101001011100 | |
1559 0010000001000101010100000100111100110111011011011000000010110111001101 | |
1560 0101100011000000010001000011000010100011101100001000001000100000000... | |
1561 | |
1562 FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes:MinLeng | |
1563 th1:MaxLength8;1024;HexadecimalString;Ascending;48caa1315d82d91122b029 | |
1564 42861c9409a4208182d12015509767bd0867653604481a8b1288000056090583603078 | |
1565 9cedae54e26596889ab121309800900490515224208421502120a0dd9200509723ae89 | |
1566 00024181b86c0122821d4e4880c38620dab280824b455404009f082003d52c212b4e6d | |
1567 6ea05280140069c780290c43 | |
1568 | |
1569 FingerprintsVector;PathLengthCount:AtomicInvariantsAtomTypes:MinLength | |
1570 1:MaxLength8;432;NumericalValues;IDsAndValuesPairsString;C.X1.BO1.H3 2 | |
1571 C.X2.BO2.H2 4 C.X2.BO3.H1 14 C.X3.BO3.H1 3 C.X3.BO4 10 F.X1.BO1 1 N.X | |
1572 2.BO2.H1 1 N.X3.BO3 1 O.X1.BO1.H1 3 O.X1.BO2 2 C.X1.BO1.H3C.X3.BO3.H1 | |
1573 2 C.X2.BO2.H2C.X2.BO2.H2 1 C.X2.BO2.H2C.X3.BO3.H1 4 C.X2.BO2.H2C.X3.BO | |
1574 4 1 C.X2.BO2.H2N.X3.BO3 1 C.X2.BO3.H1:C.X2.BO3.H1 10 C.X2.BO3.H1:C.... | |
1575 | |
1576 FingerprintsVector;PathLengthCount:DREIDINGAtomTypes:MinLength1:MaxLen | |
1577 gth8;410;NumericalValues;IDsAndValuesPairsString;C_2 2 C_3 9 C_R 22 F_ | |
1578 1 N_3 1 N_R 1 O_2 2 O_3 3 C_2=O_2 2 C_2C_3 1 C_2C_R 1 C_2N_3 1 C_2O_3 | |
1579 1 C_3C_3 7 C_3C_R 1 C_3N_R 1 C_3O_3 2 C_R:C_R 21 C_R:N_R 2 C_RC_R 2 C | |
1580 _RF_ 1 C_RN_3 1 C_2C_3C_3 1 C_2C_R:C_R 2 C_2N_3C_R 1 C_3C_2=O_2 1 C_3C | |
1581 _2O_3 1 C_3C_3C_3 5 C_3C_3C_R 2 C_3C_3N_R 1 C_3C_3O_3 4 C_3C_R:C_R ... | |
1582 | |
1583 FingerprintsVector;PathLengthCount:EStateAtomTypes:MinLength1:MaxLengt | |
1584 h8;454;NumericalValues;IDsAndValuesPairsString;aaCH 14 aasC 8 aasN 1 d | |
1585 O 2 dssC 2 sCH3 2 sF 1 sOH 3 ssCH2 4 ssNH 1 sssCH 3 aaCH:aaCH 10 aaCH: | |
1586 aasC 8 aasC:aasC 3 aasC:aasN 2 aasCaasC 2 aasCdssC 1 aasCsF 1 aasCssNH | |
1587 1 aasCsssCH 1 aasNssCH2 1 dO=dssC 2 dssCsOH 1 dssCssCH2 1 dssCssNH 1 | |
1588 sCH3sssCH 2 sOHsssCH 2 ssCH2ssCH2 1 ssCH2sssCH 4 aaCH:aaCH:aaCH 6 a... | |
1589 | |
1590 FingerprintsVector;PathLengthCount:FunctionalClassAtomTypes:MinLength1 | |
1591 :MaxLength8;404;NumericalValues;IDsAndValuesPairsString;Ar 22 Ar.HBA 1 | |
1592 HBA 2 HBA.HBD 3 HBD 1 Hal 1 NI 1 None 10 Ar.HBA:Ar 2 Ar.HBANone 1 Ar: | |
1593 Ar 21 ArAr 2 ArHBD 1 ArHal 1 ArNone 2 HBA.HBDNI 1 HBA.HBDNone 2 HBA=NI | |
1594 1 HBA=None 1 HBDNone 1 NINone 1 NoneNone 7 Ar.HBA:Ar:Ar 2 Ar.HBA:ArAr | |
1595 1 Ar.HBA:ArNone 1 Ar.HBANoneNone 1 Ar:Ar.HBA:Ar 1 Ar:Ar.HBANone 2 ... | |
1596 | |
1597 FingerprintsVector;PathLengthCount:MMFF94AtomTypes:MinLength1:MaxLengt | |
1598 h8;463;NumericalValues;IDsAndValuesPairsString;C5A 2 C5B 2 C=ON 1 CB 1 | |
1599 8 COO 1 CR 9 F 1 N5 1 NC=O 1 O=CN 1 O=CO 1 OC=O 1 OR 2 C5A:C5B 2 C5A:N | |
1600 5 2 C5ACB 1 C5ACR 1 C5B:C5B 1 C5BC=ON 1 C5BCB 1 C=ON=O=CN 1 C=ONNC=O 1 | |
1601 CB:CB 18 CBF 1 CBNC=O 1 COO=O=CO 1 COOCR 1 COOOC=O 1 CRCR 7 CRN5 1 CR | |
1602 OR 2 C5A:C5B:C5B 2 C5A:C5BC=ON 1 C5A:C5BCB 1 C5A:N5:C5A 1 C5A:N5CR ... | |
1603 | |
1604 FingerprintsVector;PathLengthCount:SLogPAtomTypes:MinLength1:MaxLength | |
1605 8;518;NumericalValues;IDsAndValuesPairsString;C1 5 C10 1 C11 1 C14 1 C | |
1606 18 14 C20 4 C21 2 C22 1 C5 2 CS 2 F 1 N11 1 N4 1 O10 1 O2 3 O9 1 C10C1 | |
1607 1 C10N11 1 C11C1 2 C11C21 1 C14:C18 2 C14F 1 C18:C18 10 C18:C20 4 C18 | |
1608 :C22 2 C1C5 1 C1CS 4 C20:C20 1 C20:C21 1 C20:N11 1 C20C20 2 C21:C21 1 | |
1609 C21:N11 1 C21C5 1 C22N4 1 C5=O10 1 C5=O9 1 C5N4 1 C5O2 1 CSO2 2 C10... | |
1610 | |
1611 FingerprintsVector;PathLengthCount:SYBYLAtomTypes:MinLength1:MaxLength | |
1612 8;412;NumericalValues;IDsAndValuesPairsString;C.2 2 C.3 9 C.ar 22 F 1 | |
1613 N.am 1 N.ar 1 O.2 1 O.3 2 O.co2 2 C.2=O.2 1 C.2=O.co2 1 C.2C.3 1 C.2C. | |
1614 ar 1 C.2N.am 1 C.2O.co2 1 C.3C.3 7 C.3C.ar 1 C.3N.ar 1 C.3O.3 2 C.ar:C | |
1615 .ar 21 C.ar:N.ar 2 C.arC.ar 2 C.arF 1 C.arN.am 1 C.2C.3C.3 1 C.2C.ar:C | |
1616 .ar 2 C.2N.amC.ar 1 C.3C.2=O.co2 1 C.3C.2O.co2 1 C.3C.3C.3 5 C.3C.3... | |
1617 | |
1618 FingerprintsVector;PathLengthCount:TPSAAtomTypes:MinLength1:MaxLength8 | |
1619 ;331;NumericalValues;IDsAndValuesPairsString;N21 1 N7 1 None 34 O3 2 O | |
1620 4 3 N21:None 2 N21None 1 N7None 2 None:None 21 None=O3 2 NoneNone 13 N | |
1621 oneO4 3 N21:None:None 2 N21:NoneNone 2 N21NoneNone 1 N7None:None 2 N7N | |
1622 one=O3 1 N7NoneNone 1 None:N21:None 1 None:N21None 2 None:None:None 20 | |
1623 None:NoneNone 12 NoneN7None 1 NoneNone=O3 2 NoneNoneNone 8 NoneNon... | |
1624 | |
1625 FingerprintsVector;PathLengthCount:UFFAtomTypes:MinLength1:MaxLength8; | |
1626 410;NumericalValues;IDsAndValuesPairsString;C_2 2 C_3 9 C_R 22 F_ 1 N_ | |
1627 3 1 N_R 1 O_2 2 O_3 3 C_2=O_2 2 C_2C_3 1 C_2C_R 1 C_2N_3 1 C_2O_3 1 C_ | |
1628 3C_3 7 C_3C_R 1 C_3N_R 1 C_3O_3 2 C_R:C_R 21 C_R:N_R 2 C_RC_R 2 C_RF_ | |
1629 1 C_RN_3 1 C_2C_3C_3 1 C_2C_R:C_R 2 C_2N_3C_R 1 C_3C_2=O_2 1 C_3C_2O_3 | |
1630 1 C_3C_3C_3 5 C_3C_3C_R 2 C_3C_3N_R 1 C_3C_3O_3 4 C_3C_R:C_R 1 C_3... | |
1631 | |
1632 =head2 METHODS | |
1633 | |
1634 =over 4 | |
1635 | |
1636 =item B<new> | |
1637 | |
1638 $NewPathLengthFingerprints = new PathLengthFingerprints( | |
1639 %NamesAndValues); | |
1640 | |
1641 Using specified I<PathLengthFingerprints> property names and values hash, B<new> method creates a new object | |
1642 and returns a reference to newly created B<PathLengthFingerprints> object. By default, the following properties are | |
1643 initialized: | |
1644 | |
1645 Molecule = ''; | |
1646 Type = '' | |
1647 Size = 1024 | |
1648 MinSize = 32 | |
1649 MaxSize = 2**32 | |
1650 NumOfBitsToSetPerPath = 1 | |
1651 MinLength = 1 | |
1652 MaxLength = 8 | |
1653 AllowSharedBonds = 1 | |
1654 AllowRings = 1 | |
1655 UseBondSymbols = 1 | |
1656 UseUniquePaths = '' | |
1657 AtomIdentifierType = '' | |
1658 SetAtomicInvariantsToUse = ['AS'] | |
1659 FunctionalClassesToUse = ['HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal'] | |
1660 | |
1661 Examples: | |
1662 | |
1663 $PathLengthFingerprints = new PathLengthFingerprints( | |
1664 'Molecule' => $Molecule, | |
1665 'Type' => 'PathLengthBits', | |
1666 'AtomIdentifierType' = | |
1667 'AtomicInvariantsAtomTypes'); | |
1668 | |
1669 $PathLengthFingerprints = new PathLengthFingerprints( | |
1670 'Molecule' => $Molecule, | |
1671 'Type' => 'PathLengthBits', | |
1672 'Size' => 1024, | |
1673 'MinLength' => 1, | |
1674 'MaxLength' => 8, | |
1675 'AllowRings' => 1, | |
1676 'AllowSharedBonds' => 1, | |
1677 'UseBondSymbols' => 1, | |
1678 'UseUniquePaths' => 1, | |
1679 'AtomIdentifierType' = | |
1680 'AtomicInvariantsAtomTypes', | |
1681 'AtomicInvariantsToUse' => ['AS']); | |
1682 | |
1683 $PathLengthFingerprints = new PathLengthFingerprints( | |
1684 'Molecule' => $Molecule, | |
1685 'Type' => 'PathLengthCount', | |
1686 'MinLength' => 1, | |
1687 'MaxLength' => 8, | |
1688 'AllowRings' => 1, | |
1689 'AllowSharedBonds' => 1, | |
1690 'UseBondSymbols' => 1, | |
1691 'UseUniquePaths' => 1, | |
1692 'AtomIdentifierType' => | |
1693 'AtomicInvariantsAtomTypes', | |
1694 'AtomicInvariantsToUse' => ['AS']); | |
1695 | |
1696 $PathLengthFingerprints = new PathLengthFingerprints( | |
1697 'Molecule' => $Molecule, | |
1698 'Type' => 'PathLengthBits', | |
1699 'AtomIdentifierType' = | |
1700 'SLogPAtomTypes'); | |
1701 | |
1702 $PathLengthFingerprints = new PathLengthFingerprints( | |
1703 'Molecule' => $Molecule, | |
1704 'Type' => 'PathLengthCount', | |
1705 'AtomIdentifierType' = | |
1706 'SYBYLAtomTypes'); | |
1707 | |
1708 $PathLengthFingerprints = new PathLengthFingerprints( | |
1709 'Molecule' => $Molecule, | |
1710 'Type' => 'PathLengthBits', | |
1711 'AtomIdentifierType' = | |
1712 'FunctionalClassAtomTypes', | |
1713 'FunctionalClassesToUse' => ['HBD', 'HBA', 'Ar']); | |
1714 | |
1715 $PathLengthFingerprints->GenerateFingerprints(); | |
1716 print "$PathLengthFingerprints\n"; | |
1717 | |
1718 =item B<GetDescription> | |
1719 | |
1720 $Description = $PathLengthFingerprints->GetDescription(); | |
1721 | |
1722 Returns a string containing description of path length fingerprints. | |
1723 | |
1724 =item B<GenerateFingerprints> | |
1725 | |
1726 $PathLengthFingerprints->GenerateFingerprints(); | |
1727 | |
1728 Generates path length fingerprints and returns I<PathLengthFingerprints>. | |
1729 | |
1730 =item B<SetMaxLength> | |
1731 | |
1732 $PathLengthFingerprints->SetMaxLength($Length); | |
1733 | |
1734 Sets maximum value of atom path length to be used during atom path length fingerprints | |
1735 generation and returns I<PathLengthFingerprints> | |
1736 | |
1737 =item B<SetAtomIdentifierType> | |
1738 | |
1739 $PathLengthFingerprints->SetAtomIdentifierType(); | |
1740 | |
1741 Sets atom I<IdentifierType> to use during path length fingerprints generation and | |
1742 returns I<PathLengthFingerprints>. | |
1743 | |
1744 Possible values: I<AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, | |
1745 FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, | |
1746 TPSAAtomTypes, UFFAtomTypes>. | |
1747 | |
1748 =item B<SetAtomicInvariantsToUse> | |
1749 | |
1750 $PathLengthFingerprints->SetAtomicInvariantsToUse($ValuesRef); | |
1751 $PathLengthFingerprints->SetAtomicInvariantsToUse(@Values); | |
1752 | |
1753 Sets atomic invariants to use during I<AtomicInvariantsAtomTypes> value of I<AtomIdentifierType> | |
1754 for path length fingerprints generation and returns I<PathLengthFingerprints>. | |
1755 | |
1756 Possible values for atomic invariants are: I<AS, X, BO, LBO, SB, DB, TB, | |
1757 H, Ar, RA, FC, MN, SM>. Default value: I<AS>. | |
1758 | |
1759 The atomic invariants abbreviations correspond to: | |
1760 | |
1761 AS = Atom symbol corresponding to element symbol | |
1762 | |
1763 X<n> = Number of non-hydrogen atom neighbors or heavy atoms | |
1764 BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms | |
1765 LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms | |
1766 SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms | |
1767 DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms | |
1768 TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms | |
1769 H<n> = Number of implicit and explicit hydrogens for atom | |
1770 Ar = Aromatic annotation indicating whether atom is aromatic | |
1771 RA = Ring atom annotation indicating whether atom is a ring | |
1772 FC<+n/-n> = Formal charge assigned to atom | |
1773 MN<n> = Mass number indicating isotope other than most abundant isotope | |
1774 SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or | |
1775 3 (triplet) | |
1776 | |
1777 Atom type generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to: | |
1778 | |
1779 AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n> | |
1780 | |
1781 Except for AS which is a required atomic invariant in atom types, all other atomic invariants are | |
1782 optional. Atom type specification doesn't include atomic invariants with zero or undefined values. | |
1783 | |
1784 In addition to usage of abbreviations for specifying atomic invariants, the following descriptive words | |
1785 are also allowed: | |
1786 | |
1787 X : NumOfNonHydrogenAtomNeighbors or NumOfHeavyAtomNeighbors | |
1788 BO : SumOfBondOrdersToNonHydrogenAtoms or SumOfBondOrdersToHeavyAtoms | |
1789 LBO : LargestBondOrderToNonHydrogenAtoms or LargestBondOrderToHeavyAtoms | |
1790 SB : NumOfSingleBondsToNonHydrogenAtoms or NumOfSingleBondsToHeavyAtoms | |
1791 DB : NumOfDoubleBondsToNonHydrogenAtoms or NumOfDoubleBondsToHeavyAtoms | |
1792 TB : NumOfTripleBondsToNonHydrogenAtoms or NumOfTripleBondsToHeavyAtoms | |
1793 H : NumOfImplicitAndExplicitHydrogens | |
1794 Ar : Aromatic | |
1795 RA : RingAtom | |
1796 FC : FormalCharge | |
1797 MN : MassNumber | |
1798 SM : SpinMultiplicity | |
1799 | |
1800 I<AtomTypes::AtomicInvariantsAtomTypes> module is used to assign atomic invariant | |
1801 atom types. | |
1802 | |
1803 =item B<SetFunctionalClassesToUse> | |
1804 | |
1805 $PathLengthFingerprints->SetFunctionalClassesToUse($ValuesRef); | |
1806 $PathLengthFingerprints->SetFunctionalClassesToUse(@Values); | |
1807 | |
1808 Sets functional classes invariants to use during I<FunctionalClassAtomTypes> value of I<AtomIdentifierType> | |
1809 for path length fingerprints generation and returns I<PathLengthFingerprints>. | |
1810 | |
1811 Possible values for atom functional classes are: I<Ar, CA, H, HBA, HBD, Hal, NI, PI, RA>. | |
1812 Default value [ Ref 24 ]: I<HBD,HBA,PI,NI,Ar,Hal>. | |
1813 | |
1814 The functional class abbreviations correspond to: | |
1815 | |
1816 HBD: HydrogenBondDonor | |
1817 HBA: HydrogenBondAcceptor | |
1818 PI : PositivelyIonizable | |
1819 NI : NegativelyIonizable | |
1820 Ar : Aromatic | |
1821 Hal : Halogen | |
1822 H : Hydrophobic | |
1823 RA : RingAtom | |
1824 CA : ChainAtom | |
1825 | |
1826 Functional class atom type specification for an atom corresponds to: | |
1827 | |
1828 Ar.CA.H.HBA.HBD.Hal.NI.PI.RA or None | |
1829 | |
1830 I<AtomTypes::FunctionalClassAtomTypes> module is used to assign functional class atom | |
1831 types. It uses following definitions [ Ref 60-61, Ref 65-66 ]: | |
1832 | |
1833 HydrogenBondDonor: NH, NH2, OH | |
1834 HydrogenBondAcceptor: N[!H], O | |
1835 PositivelyIonizable: +, NH2 | |
1836 NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH | |
1837 | |
1838 =item B<SetMinLength> | |
1839 | |
1840 $PathLengthFingerprints->SetMinLength($Length); | |
1841 | |
1842 Sets minimum value of atom path length to be used during atom path length fingerprints | |
1843 generation and returns I<PathLengthFingerprints>. | |
1844 | |
1845 =item B<SetMaxLength> | |
1846 | |
1847 $PathLengthFingerprints->SetMaxLength($Length); | |
1848 | |
1849 Sets maximum value of atom path length to be used during atom path length fingerprints | |
1850 generation and returns I<PathLengthFingerprints>. | |
1851 | |
1852 =item B<SetNumOfBitsToSetPerPath> | |
1853 | |
1854 $PathLengthFingerprints->SetNumOfBitsToSetPerPath($NumOfBits); | |
1855 | |
1856 Sets number of bits to set for each path during I<PathLengthBits> B<Type > during path length fingerprints | |
1857 generation and returns I<PathLengthFingerprints>. | |
1858 | |
1859 =item B<SetType> | |
1860 | |
1861 $PathLengthFingerprints->SetType($Type); | |
1862 | |
1863 Sets type of path length fingerprints and returns I<PathLengthFingerprints>. Possible values: | |
1864 I<PathLengthBits or PathLengthCount>. | |
1865 | |
1866 =item B<StringifyPathLengthFingerprints> | |
1867 | |
1868 $String = $PathLengthFingerprints->StringifyPathLengthFingerprints(); | |
1869 | |
1870 Returns a string containing information about I<PathLengthFingerprints> object. | |
1871 | |
1872 =back | |
1873 | |
1874 =head1 AUTHOR | |
1875 | |
1876 Manish Sud <msud@san.rr.com> | |
1877 | |
1878 =head1 SEE ALSO | |
1879 | |
1880 Fingerprints.pm, FingerprintsStringUtil.pm, AtomNeighborhoodsFingerprints.pm, | |
1881 AtomTypesFingerprints.pm, EStateIndiciesFingerprints.pm, ExtendedConnectivityFingerprints.pm, | |
1882 MACCSKeys.pm, TopologicalAtomPairsFingerprints.pm, TopologicalAtomTripletsFingerprints.pm, | |
1883 TopologicalAtomTorsionsFingerprints.pm, TopologicalPharmacophoreAtomPairsFingerprints.pm, | |
1884 TopologicalPharmacophoreAtomTripletsFingerprints.pm | |
1885 | |
1886 =head1 COPYRIGHT | |
1887 | |
1888 Copyright (C) 2015 Manish Sud. All rights reserved. | |
1889 | |
1890 This file is part of MayaChemTools. | |
1891 | |
1892 MayaChemTools is free software; you can redistribute it and/or modify it under | |
1893 the terms of the GNU Lesser General Public License as published by the Free | |
1894 Software Foundation; either version 3 of the License, or (at your option) | |
1895 any later version. | |
1896 | |
1897 =cut |