0
|
1 package Fingerprints::TopologicalAtomTripletsFingerprints;
|
|
2 #
|
|
3 # $RCSfile: TopologicalAtomTripletsFingerprints.pm,v $
|
|
4 # $Date: 2015/02/28 20:48:54 $
|
|
5 # $Revision: 1.15 $
|
|
6 #
|
|
7 # Author: Manish Sud <msud@san.rr.com>
|
|
8 #
|
|
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
|
|
10 #
|
|
11 # This file is part of MayaChemTools.
|
|
12 #
|
|
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
|
|
14 # the terms of the GNU Lesser General Public License as published by the Free
|
|
15 # Software Foundation; either version 3 of the License, or (at your option) any
|
|
16 # later version.
|
|
17 #
|
|
18 # MayaChemTools is distributed in the hope that it will be useful, but without
|
|
19 # any warranty; without even the implied warranty of merchantability of fitness
|
|
20 # for a particular purpose. See the GNU Lesser General Public License for more
|
|
21 # details.
|
|
22 #
|
|
23 # You should have received a copy of the GNU Lesser General Public License
|
|
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
|
|
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
|
|
26 # Boston, MA, 02111-1307, USA.
|
|
27 #
|
|
28
|
|
29 use strict;
|
|
30 use Carp;
|
|
31 use Exporter;
|
|
32 use Fingerprints::Fingerprints;
|
|
33 use TextUtil ();
|
|
34 use Molecule;
|
|
35 use AtomTypes::AtomicInvariantsAtomTypes;
|
|
36 use AtomTypes::DREIDINGAtomTypes;
|
|
37 use AtomTypes::EStateAtomTypes;
|
|
38 use AtomTypes::FunctionalClassAtomTypes;
|
|
39 use AtomTypes::MMFF94AtomTypes;
|
|
40 use AtomTypes::SLogPAtomTypes;
|
|
41 use AtomTypes::SYBYLAtomTypes;
|
|
42 use AtomTypes::TPSAAtomTypes;
|
|
43 use AtomTypes::UFFAtomTypes;
|
|
44
|
|
45 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
|
|
46
|
|
47 @ISA = qw(Fingerprints::Fingerprints Exporter);
|
|
48 @EXPORT = qw();
|
|
49 @EXPORT_OK = qw();
|
|
50
|
|
51 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]);
|
|
52
|
|
53 # Setup class variables...
|
|
54 my($ClassName);
|
|
55 _InitializeClass();
|
|
56
|
|
57 # Overload Perl functions...
|
|
58 use overload '""' => 'StringifyTopologicalAtomTripletsFingerprints';
|
|
59
|
|
60 # Class constructor...
|
|
61 sub new {
|
|
62 my($Class, %NamesAndValues) = @_;
|
|
63
|
|
64 # Initialize object...
|
|
65 my $This = $Class->SUPER::new();
|
|
66 bless $This, ref($Class) || $Class;
|
|
67 $This->_InitializeTopologicalAtomTripletsFingerprints();
|
|
68
|
|
69 $This->_InitializeTopologicalAtomTripletsFingerprintsProperties(%NamesAndValues);
|
|
70
|
|
71 return $This;
|
|
72 }
|
|
73
|
|
74 # Initialize object data...
|
|
75 #
|
|
76 sub _InitializeTopologicalAtomTripletsFingerprints {
|
|
77 my($This) = @_;
|
|
78
|
|
79 # Type of fingerprint...
|
|
80 $This->{Type} = 'TopologicalAtomTriplets';
|
|
81
|
|
82 # Type of vector...
|
|
83 $This->{VectorType} = 'FingerprintsVector';
|
|
84
|
|
85 # Type of FingerprintsVector...
|
|
86 $This->{FingerprintsVectorType} = 'NumericalValues';
|
|
87
|
|
88 # Minimum and maximum bond distance between atom paris...
|
|
89 $This->{MinDistance} = 1;
|
|
90 $This->{MaxDistance} = 10;
|
|
91
|
|
92 # Determines whether to apply triangle inequality to distance triplets...
|
|
93 #
|
|
94 $This->{UseTriangleInequality} = 0;
|
|
95
|
|
96 # Atom identifier type to use for atom IDs in atom triplets...
|
|
97 #
|
|
98 # Currently supported values are: AtomicInvariantsAtomTypes, DREIDINGAtomTypes,
|
|
99 # EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes,
|
|
100 # SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes
|
|
101 #
|
|
102 $This->{AtomIdentifierType} = '';
|
|
103
|
|
104 # Atom types assigned to each heavy atom...
|
|
105 #
|
|
106 %{$This->{AssignedAtomTypes}} = ();
|
|
107
|
|
108 # All atom triplets between minimum and maximum distance...
|
|
109 #
|
|
110 @{$This->{AtomTripletsIDs}} = ();
|
|
111 %{$This->{AtomTripletsCount}} = ();
|
|
112 }
|
|
113
|
|
114 # Initialize class ...
|
|
115 sub _InitializeClass {
|
|
116 #Class name...
|
|
117 $ClassName = __PACKAGE__;
|
|
118 }
|
|
119
|
|
120 # Initialize object properties....
|
|
121 sub _InitializeTopologicalAtomTripletsFingerprintsProperties {
|
|
122 my($This, %NamesAndValues) = @_;
|
|
123
|
|
124 my($Name, $Value, $MethodName);
|
|
125 while (($Name, $Value) = each %NamesAndValues) {
|
|
126 $MethodName = "Set${Name}";
|
|
127 $This->$MethodName($Value);
|
|
128 }
|
|
129
|
|
130 # Make sure molecule object was specified...
|
|
131 if (!exists $NamesAndValues{Molecule}) {
|
|
132 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule...";
|
|
133 }
|
|
134 if (!exists $NamesAndValues{AtomIdentifierType}) {
|
|
135 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying AtomIdentifierType...";
|
|
136 }
|
|
137
|
|
138 $This->_InitializeFingerprintsVector();
|
|
139
|
|
140 return $This;
|
|
141 }
|
|
142
|
|
143 # Set minimum distance for atom triplets...
|
|
144 #
|
|
145 sub SetMinDistance {
|
|
146 my($This, $Value) = @_;
|
|
147
|
|
148 if (!TextUtil::IsPositiveInteger($Value)) {
|
|
149 croak "Error: ${ClassName}->SetMinDistance: MinDistance value, $Value, is not valid: It must be a positive integer...";
|
|
150 }
|
|
151 $This->{MinDistance} = $Value;
|
|
152
|
|
153 return $This;
|
|
154 }
|
|
155
|
|
156 # Set maximum distance for atom triplets...
|
|
157 #
|
|
158 sub SetMaxDistance {
|
|
159 my($This, $Value) = @_;
|
|
160
|
|
161 if (!TextUtil::IsPositiveInteger($Value)) {
|
|
162 croak "Error: ${ClassName}->SetMaxDistance: MaxDistance value, $Value, is not valid: It must be a positive integer...";
|
|
163 }
|
|
164 $This->{MaxDistance} = $Value;
|
|
165
|
|
166 return $This;
|
|
167 }
|
|
168
|
|
169 # Set atom identifier type..
|
|
170 #
|
|
171 sub SetAtomIdentifierType {
|
|
172 my($This, $IdentifierType) = @_;
|
|
173
|
|
174 if ($IdentifierType !~ /^(AtomicInvariantsAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|FunctionalClassAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
|
|
175 croak "Error: ${ClassName}->SetAtomIdentifierType: Specified value, $IdentifierType, for AtomIdentifierType is not vaild. Supported types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, and UFFAtomTypes.";
|
|
176 }
|
|
177
|
|
178 if ($This->{AtomIdentifierType}) {
|
|
179 croak "Error: ${ClassName}->SeAtomIdentifierType: Can't change intial atom identifier type: It's already set...";
|
|
180 }
|
|
181
|
|
182 $This->{AtomIdentifierType} = $IdentifierType;
|
|
183
|
|
184 # Initialize atom identifier type information...
|
|
185 $This->_InitializeAtomIdentifierTypeInformation();
|
|
186
|
|
187 return $This;
|
|
188 }
|
|
189
|
|
190 # Generate fingerprints description...
|
|
191 #
|
|
192 sub GetDescription {
|
|
193 my($This) = @_;
|
|
194
|
|
195 # Is description explicity set?
|
|
196 if (exists $This->{Description}) {
|
|
197 return $This->{Description};
|
|
198 }
|
|
199
|
|
200 # Generate fingerprints description...
|
|
201
|
|
202 return "$This->{Type}:$This->{AtomIdentifierType}:MinDistance$This->{MinDistance}:MaxDistance$This->{MaxDistance}";
|
|
203 }
|
|
204
|
|
205 # Generate topological atom triplets fingerprints...
|
|
206 #
|
|
207 # Let:
|
|
208 #
|
|
209 # AT = Any of the supported atom types
|
|
210 #
|
|
211 # ATx = Atom type for atom x
|
|
212 # ATy = Atom type for atom y
|
|
213 # ATz = Atom type for atom z
|
|
214 #
|
|
215 # Dxy = Distance between Px and Py
|
|
216 # Dxz = Distance between Px and Pz
|
|
217 # Dyz = Distance between Py and Pz
|
|
218 #
|
|
219 # Then:
|
|
220 #
|
|
221 # ATx-Dyz-ATy-Dxz-ATz-Dxy = Atom triplet ID for atom types ATx, ATy and Atz
|
|
222 #
|
|
223 # Methodology:
|
|
224 # . Generate a distance matrix.
|
|
225 # . Assign atom types to all the atoms.
|
|
226 # . Using distance matrix and atom types, count occurrence of unique atom triplets
|
|
227 # within specified distance range along with optional trinagle inequality
|
|
228 #
|
|
229 # Notes:
|
|
230 # . Hydrogen atoms are ignored during the fingerprint generation.
|
|
231 # . For a molecule containing N atoms with all different atom type, the total number of
|
|
232 # possible unique atom triplets without applying triangle inquality check corresponds to:
|
|
233 #
|
|
234 # Factorial( N ) / ( Factorial( N - 3 ) * Factorial (3) )
|
|
235 #
|
|
236 # However, due to similar atom types assigned to atoms in a molecule for a specific atom
|
|
237 # typing methodology and specified distance range used during fingerprints generation, the
|
|
238 # actual number of unique triplets is usually smaller than the theoretical limit.
|
|
239 #
|
|
240 sub GenerateFingerprints {
|
|
241 my($This) = @_;
|
|
242
|
|
243 if ($This->{MinDistance} > $This->{MaxDistance}) {
|
|
244 croak "Error: ${ClassName}->GenerateTopologicalAtomTripletsFingerprints: No fingerpritns generated: MinDistance, $This->{MinDistance}, must be <= MaxDistance, $This->{MaxDistance}...";
|
|
245 }
|
|
246
|
|
247 # Cache appropriate molecule data...
|
|
248 $This->_SetupMoleculeDataCache();
|
|
249
|
|
250 # Generate distance matrix...
|
|
251 if (!$This->_SetupDistanceMatrix()) {
|
|
252 carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't generate distance matrix...";
|
|
253 return $This;
|
|
254 }
|
|
255
|
|
256 # Assign atom types to all heavy atoms...
|
|
257 if (!$This->_AssignAtomTypes()) {
|
|
258 carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't assign valid $This->{AtomIdentifierType} to all atoms...";
|
|
259 return $This;
|
|
260 }
|
|
261
|
|
262 # Intialize values of toplogical atom triplets...
|
|
263 $This->_InitializeToplogicalAtomTriplets();
|
|
264
|
|
265 # Count atom triplets...
|
|
266 $This->_GenerateAndCountAtomTriplets();
|
|
267
|
|
268 # Set final fingerprints...
|
|
269 $This->_SetFinalFingerprints();
|
|
270
|
|
271 # Clear cached molecule data...
|
|
272 $This->_ClearMoleculeDataCache();
|
|
273
|
|
274 return $This;
|
|
275 }
|
|
276
|
|
277 # Setup distance matrix...
|
|
278 #
|
|
279 sub _SetupDistanceMatrix {
|
|
280 my($This) = @_;
|
|
281
|
|
282 $This->{DistanceMatrix} = $This->GetMolecule()->GetDistanceMatrix();
|
|
283
|
|
284 if (!$This->{DistanceMatrix}) {
|
|
285 return undef;
|
|
286 }
|
|
287
|
|
288 return $This;
|
|
289 }
|
|
290
|
|
291 # Assign appropriate atom types to all heavy atoms...
|
|
292 #
|
|
293 sub _AssignAtomTypes {
|
|
294 my($This) = @_;
|
|
295 my($SpecifiedAtomTypes, $Atom, $AtomID, $IgnoreHydrogens);
|
|
296
|
|
297 %{$This->{AssignedAtomTypes}} = ();
|
|
298 $IgnoreHydrogens = 1;
|
|
299
|
|
300 $SpecifiedAtomTypes = undef;
|
|
301
|
|
302 IDENTIFIERTYPE: {
|
|
303 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
|
|
304 $SpecifiedAtomTypes = new AtomTypes::AtomicInvariantsAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'AtomicInvariantsToUse' => $This->{AtomicInvariantsToUse});
|
|
305 last IDENTIFIERTYPE;
|
|
306 }
|
|
307
|
|
308 if ($This->{AtomIdentifierType} =~ /^DREIDINGAtomTypes$/i) {
|
|
309 $SpecifiedAtomTypes = new AtomTypes::DREIDINGAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
|
|
310 last IDENTIFIERTYPE;
|
|
311 }
|
|
312
|
|
313 if ($This->{AtomIdentifierType} =~ /^EStateAtomTypes$/i) {
|
|
314 $SpecifiedAtomTypes = new AtomTypes::EStateAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
|
|
315 last IDENTIFIERTYPE;
|
|
316 }
|
|
317
|
|
318 if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
|
|
319 $SpecifiedAtomTypes = new AtomTypes::FunctionalClassAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'FunctionalClassesToUse' => $This->{FunctionalClassesToUse});
|
|
320 last IDENTIFIERTYPE;
|
|
321 }
|
|
322
|
|
323 if ($This->{AtomIdentifierType} =~ /^MMFF94AtomTypes$/i) {
|
|
324 $SpecifiedAtomTypes = new AtomTypes::MMFF94AtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
|
|
325 last IDENTIFIERTYPE;
|
|
326 }
|
|
327
|
|
328 if ($This->{AtomIdentifierType} =~ /^SLogPAtomTypes$/i) {
|
|
329 $SpecifiedAtomTypes = new AtomTypes::SLogPAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
|
|
330 last IDENTIFIERTYPE;
|
|
331 }
|
|
332 if ($This->{AtomIdentifierType} =~ /^SYBYLAtomTypes$/i) {
|
|
333 $SpecifiedAtomTypes = new AtomTypes::SYBYLAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
|
|
334 last IDENTIFIERTYPE;
|
|
335 }
|
|
336
|
|
337 if ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) {
|
|
338 $SpecifiedAtomTypes = new AtomTypes::TPSAAtomTypes('Molecule' => $This->{Molecule}, 'IgnorePhosphorus' => 0, 'IgnoreSulfur' => 0);
|
|
339 last IDENTIFIERTYPE;
|
|
340 }
|
|
341
|
|
342 if ($This->{AtomIdentifierType} =~ /^UFFAtomTypes$/i) {
|
|
343 $SpecifiedAtomTypes = new AtomTypes::UFFAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
|
|
344 last IDENTIFIERTYPE;
|
|
345 }
|
|
346
|
|
347 croak "Error: ${ClassName}->_AssignAtomTypes: Unknown atom indentifier type $This->{AtomIdentifierType}...";
|
|
348 }
|
|
349
|
|
350 # Assign atom types...
|
|
351 $SpecifiedAtomTypes->AssignAtomTypes();
|
|
352
|
|
353 # Make sure atom types assignment is successful...
|
|
354 if (!$SpecifiedAtomTypes->IsAtomTypesAssignmentSuccessful()) {
|
|
355 return undef;
|
|
356 }
|
|
357
|
|
358 # Collect assigned atom types...
|
|
359 ATOM: for $Atom (@{$This->{Atoms}}) {
|
|
360 if ($Atom->IsHydrogen()) {
|
|
361 next ATOM;
|
|
362 }
|
|
363 $AtomID = $Atom->GetID();
|
|
364 $This->{AssignedAtomTypes}{$AtomID} = $SpecifiedAtomTypes->GetAtomType($Atom);
|
|
365 }
|
|
366
|
|
367 return $This;
|
|
368 }
|
|
369
|
|
370 # Initialize topological atom triplets between specified distance range...
|
|
371 #
|
|
372 sub _InitializeToplogicalAtomTriplets {
|
|
373 my($This) = @_;
|
|
374 my($Distance);
|
|
375
|
|
376 @{$This->{AtomTripletsIDs}} = ();
|
|
377 %{$This->{AtomTripletsCount}} = ();
|
|
378
|
|
379 return $This;
|
|
380 }
|
|
381
|
|
382 # Count atom triplets between mininum and maximum distance at each
|
|
383 # distance using distance matrix and atom types assiged to each heavy
|
|
384 # atom.
|
|
385 #
|
|
386 sub _GenerateAndCountAtomTriplets {
|
|
387 my($This) = @_;
|
|
388 my($NumOfAtoms, $AtomIndex1, $AtomIndex2, $AtomIndex3, $AtomID1, $AtomID2, $AtomID3, $AtomType1, $AtomType2, $AtomType3, $Distance12, $Distance13, $Distance23, $SkipIndexCheck, $DistanceMatrix, $AtomTripletID);
|
|
389
|
|
390 $NumOfAtoms = @{$This->{Atoms}};
|
|
391 $DistanceMatrix = $This->{DistanceMatrix};
|
|
392 $SkipIndexCheck = 0;
|
|
393
|
|
394 ATOMINDEX1: for $AtomIndex1 (0 .. ($NumOfAtoms - 1)) {
|
|
395 $AtomID1 = $This->{AtomIndexToID}{$AtomIndex1};
|
|
396 if (!exists($This->{AssignedAtomTypes}{$AtomID1})) {
|
|
397 next ATOMINDEX1;
|
|
398 }
|
|
399 $AtomType1 = $This->{AssignedAtomTypes}{$AtomID1};
|
|
400
|
|
401 ATOMINDEX2: for $AtomIndex2 (($AtomIndex1 + 1) .. ($NumOfAtoms - 1)) {
|
|
402 $AtomID2 = $This->{AtomIndexToID}{$AtomIndex2};
|
|
403 if (!exists($This->{AssignedAtomTypes}{$AtomID2})) {
|
|
404 next ATOMINDEX2;
|
|
405 }
|
|
406 $AtomType2 = $This->{AssignedAtomTypes}{$AtomID2};
|
|
407
|
|
408 $Distance12 = $DistanceMatrix->GetValue($AtomIndex1, $AtomIndex2, $SkipIndexCheck);
|
|
409 if ($Distance12 < $This->{MinDistance} || $Distance12 > $This->{MaxDistance}) {
|
|
410 next ATOMINDEX2;
|
|
411 }
|
|
412
|
|
413 ATOMINDEX3: for $AtomIndex3 (($AtomIndex2 + 1) .. ($NumOfAtoms - 1)) {
|
|
414 $AtomID3 = $This->{AtomIndexToID}{$AtomIndex3};
|
|
415 if (!exists($This->{AssignedAtomTypes}{$AtomID3})) {
|
|
416 next ATOMINDEX3;
|
|
417 }
|
|
418 $AtomType3 = $This->{AssignedAtomTypes}{$AtomID3};
|
|
419
|
|
420 $Distance13 = $DistanceMatrix->GetValue($AtomIndex1, $AtomIndex3, $SkipIndexCheck);
|
|
421 $Distance23 = $DistanceMatrix->GetValue($AtomIndex2, $AtomIndex3, $SkipIndexCheck);
|
|
422
|
|
423 if ($Distance13 < $This->{MinDistance} || $Distance13 > $This->{MaxDistance}) {
|
|
424 next ATOMINDEX3;
|
|
425 }
|
|
426 if ($Distance23 < $This->{MinDistance} || $Distance23 > $This->{MaxDistance}) {
|
|
427 next ATOMINDEX3;
|
|
428 }
|
|
429 if ($This->{UseTriangleInequality} && !$This->_DoDistancesSatisfyTriangleInequality($Distance12, $Distance13, $Distance23)) {
|
|
430 next ATOMINDEX3;
|
|
431 }
|
|
432
|
|
433 $AtomTripletID = $This->_GetAtomTripletID($AtomType1, $Distance23, $AtomType2, $Distance13, $AtomType3, $Distance12);
|
|
434 if (!exists $This->{AtomTripletsCount}{$AtomTripletID}) {
|
|
435 $This->{AtomTripletsCount}{$AtomTripletID} = 0;
|
|
436 }
|
|
437 $This->{AtomTripletsCount}{$AtomTripletID} += 1;
|
|
438 }
|
|
439 }
|
|
440 }
|
|
441 return $This;
|
|
442 }
|
|
443
|
|
444 # Check triangle inequality...
|
|
445 #
|
|
446 sub _DoDistancesSatisfyTriangleInequality {
|
|
447 my($This, $Distance1, $Distance2, $Distance3) = @_;
|
|
448
|
|
449 if ( !($Distance1 > abs($Distance2 - $Distance3) && $Distance1 < ($Distance2 + $Distance3)) ) {
|
|
450 return 0;
|
|
451 }
|
|
452 if ( !($Distance2 > abs($Distance1 - $Distance3) && $Distance2 < ($Distance1 + $Distance3)) ) {
|
|
453 return 0;
|
|
454 }
|
|
455 if ( !($Distance3 > abs($Distance1 - $Distance2) && $Distance3 < ($Distance1 + $Distance2)) ) {
|
|
456 return 0;
|
|
457 }
|
|
458 return 1;
|
|
459 }
|
|
460
|
|
461 # Get atom triplet ID corresponding to atom types and distances corresponding to atom triplet...
|
|
462 #
|
|
463 sub _GetAtomTripletID {
|
|
464 my($This, $ATx, $Dyz, $ATy, $Dxz, $ATz, $Dxy) = @_;
|
|
465 my($AtomTripletID, @AtomIDs);
|
|
466
|
|
467 @AtomIDs = ();
|
|
468
|
|
469 @AtomIDs = sort("${ATx}-D${Dyz}", "${ATy}-D${Dxz}", "${ATz}-D${Dxy}");
|
|
470 $AtomTripletID = join "-", @AtomIDs;
|
|
471
|
|
472 return $AtomTripletID;
|
|
473 }
|
|
474
|
|
475 # Set final fingerpritns vector...
|
|
476 #
|
|
477 sub _SetFinalFingerprints {
|
|
478 my($This) = @_;
|
|
479 my($AtomTripletID, $Value, @Values);
|
|
480
|
|
481 # Mark successful generation of fingerprints...
|
|
482 $This->{FingerprintsGenerated} = 1;
|
|
483
|
|
484 @Values = ();
|
|
485 @{$This->{AtomTripletsIDs}} = ();
|
|
486
|
|
487 for $AtomTripletID (sort keys %{$This->{AtomTripletsCount}}) {
|
|
488 push @{$This->{AtomTripletsIDs}}, $AtomTripletID;
|
|
489 $Value = $This->{AtomTripletsCount}{$AtomTripletID};
|
|
490 push @Values, $Value;
|
|
491 }
|
|
492
|
|
493 # Add AtomTripletsIDs and values to fingerprint vector...
|
|
494 $This->{FingerprintsVector}->AddValueIDs(\@{$This->{AtomTripletsIDs}});
|
|
495 $This->{FingerprintsVector}->AddValues(\@Values);
|
|
496
|
|
497 return $This;
|
|
498 }
|
|
499
|
|
500 # Get atom triplet IDs corresponding to atom triplets count values in fingerprint
|
|
501 # vector as an array or reference to an array...
|
|
502 #
|
|
503 # AtomTripletIDs list differes in molecules and is generated during finalization
|
|
504 # of fingerprints to make sure the fingerprint vector containing count values
|
|
505 # matches the atom triplets array.
|
|
506 #
|
|
507 sub GetAtomTripletIDs {
|
|
508 my($This) = @_;
|
|
509
|
|
510 return wantarray ? @{$This->{AtomTripletsIDs}} : \@{$This->{AtomTripletsIDs}};
|
|
511 }
|
|
512
|
|
513 # Cache appropriate molecule data...
|
|
514 #
|
|
515 sub _SetupMoleculeDataCache {
|
|
516 my($This) = @_;
|
|
517
|
|
518 # Get all atoms including hydrogens to correctly map atom indices to atom IDs for
|
|
519 # usage of distance matrix. The hydrogen atoms are ignored during processing...
|
|
520 #
|
|
521 @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms();
|
|
522
|
|
523 # Get all atom IDs...
|
|
524 my(@AtomIDs);
|
|
525 @AtomIDs = ();
|
|
526 @AtomIDs = map { $_->GetID() } @{$This->{Atoms}};
|
|
527
|
|
528 # Set AtomIndex to AtomID hash...
|
|
529 %{$This->{AtomIndexToID}} = ();
|
|
530 @{$This->{AtomIndexToID}}{ (0 .. $#AtomIDs) } = @AtomIDs;
|
|
531
|
|
532 return $This;
|
|
533 }
|
|
534
|
|
535 # Set atomic invariants to use for atom identifiers...
|
|
536 #
|
|
537 sub SetAtomicInvariantsToUse {
|
|
538 my($This, @Values) = @_;
|
|
539 my($FirstValue, $TypeOfFirstValue, $AtomicInvariant, $SpecifiedAtomicInvariant, $AtomicInvariantValue, @SpecifiedAtomicInvariants, @AtomicInvariantsToUse);
|
|
540
|
|
541 if (!@Values) {
|
|
542 carp "Warning: ${ClassName}->SetAtomicInvariantsToUse: No values specified...";
|
|
543 return;
|
|
544 }
|
|
545
|
|
546 $FirstValue = $Values[0];
|
|
547 $TypeOfFirstValue = ref $FirstValue;
|
|
548
|
|
549 @SpecifiedAtomicInvariants = ();
|
|
550 @AtomicInvariantsToUse = ();
|
|
551
|
|
552 if ($TypeOfFirstValue =~ /^ARRAY/) {
|
|
553 push @SpecifiedAtomicInvariants, @{$FirstValue};
|
|
554 }
|
|
555 else {
|
|
556 push @SpecifiedAtomicInvariants, @Values;
|
|
557 }
|
|
558
|
|
559 # Make sure specified AtomicInvariants are valid...
|
|
560 for $SpecifiedAtomicInvariant (@SpecifiedAtomicInvariants) {
|
|
561 if (!AtomTypes::AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($SpecifiedAtomicInvariant)) {
|
|
562 croak "Error: ${ClassName}->SetAtomicInvariantsToUse: Specified atomic invariant, $SpecifiedAtomicInvariant, is not supported...\n ";
|
|
563 }
|
|
564 $AtomicInvariant = $SpecifiedAtomicInvariant;
|
|
565 push @AtomicInvariantsToUse, $AtomicInvariant;
|
|
566 }
|
|
567
|
|
568 # Set atomic invariants to use...
|
|
569 @{$This->{AtomicInvariantsToUse}} = ();
|
|
570 push @{$This->{AtomicInvariantsToUse}}, @AtomicInvariantsToUse;
|
|
571
|
|
572 return $This;
|
|
573 }
|
|
574
|
|
575 # Set functional classes to use for atom identifiers...
|
|
576 #
|
|
577 sub SetFunctionalClassesToUse {
|
|
578 my($This, @Values) = @_;
|
|
579 my($FirstValue, $TypeOfFirstValue, $FunctionalClass, $SpecifiedFunctionalClass, @SpecifiedFunctionalClasses, @FunctionalClassesToUse);
|
|
580
|
|
581 if (!@Values) {
|
|
582 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: No values specified...";
|
|
583 return;
|
|
584 }
|
|
585
|
|
586 if ($This->{AtomIdentifierType} !~ /^FunctionalClassAtomTypes$/i) {
|
|
587 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: FunctionalClassesToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}...";
|
|
588 return;
|
|
589 }
|
|
590
|
|
591 $FirstValue = $Values[0];
|
|
592 $TypeOfFirstValue = ref $FirstValue;
|
|
593
|
|
594 @SpecifiedFunctionalClasses = ();
|
|
595 @FunctionalClassesToUse = ();
|
|
596
|
|
597 if ($TypeOfFirstValue =~ /^ARRAY/) {
|
|
598 push @SpecifiedFunctionalClasses, @{$FirstValue};
|
|
599 }
|
|
600 else {
|
|
601 push @SpecifiedFunctionalClasses, @Values;
|
|
602 }
|
|
603
|
|
604 # Make sure specified FunctionalClasses are valid...
|
|
605 for $SpecifiedFunctionalClass (@SpecifiedFunctionalClasses) {
|
|
606 if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedFunctionalClass)) {
|
|
607 croak "Error: ${ClassName}->SetFunctionalClassesToUse: Specified functional class, $SpecifiedFunctionalClass, is not supported...\n ";
|
|
608 }
|
|
609 push @FunctionalClassesToUse, $SpecifiedFunctionalClass;
|
|
610 }
|
|
611
|
|
612 # Set functional classes to use...
|
|
613 @{$This->{FunctionalClassesToUse}} = ();
|
|
614 push @{$This->{FunctionalClassesToUse}}, @FunctionalClassesToUse;
|
|
615
|
|
616 return $This;
|
|
617 }
|
|
618
|
|
619 # Initialize atom indentifier type information...
|
|
620 #
|
|
621 # Current supported values:
|
|
622 #
|
|
623 # AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes,
|
|
624 # MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes
|
|
625 #
|
|
626 sub _InitializeAtomIdentifierTypeInformation {
|
|
627 my($This) = @_;
|
|
628
|
|
629 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
|
|
630 $This->_InitializeAtomicInvariantsAtomTypesInformation();
|
|
631 }
|
|
632 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
|
|
633 $This->_InitializeFunctionalClassAtomTypesInformation();
|
|
634 }
|
|
635 elsif ($This->{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
|
|
636 # Nothing to do for now...
|
|
637 }
|
|
638 else {
|
|
639 croak "Error: ${ClassName}->_InitializeAtomIdentifierTypeInformation: Unknown atom indentifier type $This->{AtomIdentifierType}...";
|
|
640 }
|
|
641
|
|
642 return $This;
|
|
643 }
|
|
644
|
|
645 # Initialize atomic invariants atom types to use for generating atom IDs in atom triplets...
|
|
646 #
|
|
647 # Let:
|
|
648 # AS = Atom symbol corresponding to element symbol
|
|
649 #
|
|
650 # X<n> = Number of non-hydrogen atom neighbors or heavy atoms attached to atom
|
|
651 # BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms attached to atom
|
|
652 # LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms attached to atom
|
|
653 # SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
|
|
654 # DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
|
|
655 # TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
|
|
656 # H<n> = Number of implicit and explicit hydrogens for atom
|
|
657 # Ar = Aromatic annotation indicating whether atom is aromatic
|
|
658 # RA = Ring atom annotation indicating whether atom is a ring
|
|
659 # FC<+n/-n> = Formal charge assigned to atom
|
|
660 # MN<n> = Mass number indicating isotope other than most abundant isotope
|
|
661 # SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or 3 (triplet)
|
|
662 #
|
|
663 # ATx = Atomic invariants atom type for atom x
|
|
664 # ATy = Atomic invariants atom type for atom y
|
|
665 # ATz = Atomic invariants atom type for atom z
|
|
666 #
|
|
667 # Dxy = Distance between Px and Py
|
|
668 # Dxz = Distance between Px and Pz
|
|
669 # Dyz = Distance between Py and Pz
|
|
670 #
|
|
671 # Then:
|
|
672 #
|
|
673 # Atom triplet AtomID generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to:
|
|
674 #
|
|
675 # AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n>
|
|
676 #
|
|
677 # Toplogical atom triplet ID between atom IDs ATx, ATy and ATz corresponds to:
|
|
678 #
|
|
679 # ATx-Dyz-ATy-Dxz-ATz-Dxy
|
|
680 #
|
|
681 # Except for AS which is a required atomic invariant in atom triplet AtomIDs, all other atomic invariants are
|
|
682 # optional. Default atomic invariants used for AtomID are: AS, X<n>, BO<n>, H<n>, FC<+n/-n>.
|
|
683 # AtomID specification doesn't include atomic invariants with zero or undefined values.
|
|
684 #
|
|
685 # Examples of atom triplet AtomIDs:
|
|
686 #
|
|
687 # O.X1.BO1.H1 - Hydroxyl oxygen in carboxylate with attached hydrogen and no explicit charge
|
|
688 # O.X1.BO1.FC-1 - Hydroxyl ozygen in carboxylate with explicit negative charge
|
|
689 # O.X1.BO2 - Carbonyl oxygen in carboxylate with double bond to carbon
|
|
690 # O.X2.BO2 - Hydroxyl ozygen in carboxylate attached to carbonyl carbon and another heavy atom
|
|
691 #
|
|
692 # C.X2.BO3.H1.Ar - Aromatic carbon
|
|
693 #
|
|
694 sub _InitializeAtomicInvariantsAtomTypesInformation {
|
|
695 my($This) = @_;
|
|
696
|
|
697 # Default atomic invariants to use for generating atom triplet atom IDs: AS, X, BO, H, FC
|
|
698 #
|
|
699 @{$This->{AtomicInvariantsToUse}} = ();
|
|
700 @{$This->{AtomicInvariantsToUse}} = ('AS', 'X', 'BO', 'H', 'FC');
|
|
701
|
|
702 return $This;
|
|
703 }
|
|
704
|
|
705 # Initialize functional class atom types, generated by AtomTypes::FunctionalClassAtomTypes
|
|
706 # class, to use for generating atom identifiers...
|
|
707 #
|
|
708 # Let:
|
|
709 # HBD: HydrogenBondDonor
|
|
710 # HBA: HydrogenBondAcceptor
|
|
711 # PI : PositivelyIonizable
|
|
712 # NI : NegativelyIonizable
|
|
713 # Ar : Aromatic
|
|
714 # Hal : Halogen
|
|
715 # H : Hydrophobic
|
|
716 # RA : RingAtom
|
|
717 # CA : ChainAtom
|
|
718 #
|
|
719 # Then:
|
|
720 #
|
|
721 # Functiononal class atom type specification for an atom corresponds to:
|
|
722 #
|
|
723 # Ar.CA.H.HBA.HBD.Hal.NI.PI.RA
|
|
724 #
|
|
725 # Default functional classes used are: HBD, HBA, PI, NI, Ar, Hal
|
|
726 #
|
|
727 # FunctionalAtomTypes are assigned using the following definitions [ Ref 60-61, Ref 65-66 ]:
|
|
728 #
|
|
729 # HydrogenBondDonor: NH, NH2, OH
|
|
730 # HydrogenBondAcceptor: N[!H], O
|
|
731 # PositivelyIonizable: +, NH2
|
|
732 # NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH
|
|
733 #
|
|
734 sub _InitializeFunctionalClassAtomTypesInformation {
|
|
735 my($This) = @_;
|
|
736
|
|
737 # Default functional class atom typess to use for generating atom identifiers
|
|
738 # are: HBD, HBA, PI, NI, Ar, Hal
|
|
739 #
|
|
740 @{$This->{FunctionalClassesToUse}} = ();
|
|
741 @{$This->{FunctionalClassesToUse}} = ('HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal');
|
|
742
|
|
743 return $This;
|
|
744 }
|
|
745
|
|
746 # Clear cached molecule data...
|
|
747 #
|
|
748 sub _ClearMoleculeDataCache {
|
|
749 my($This) = @_;
|
|
750
|
|
751 @{$This->{Atoms}} = ();
|
|
752
|
|
753 return $This;
|
|
754 }
|
|
755
|
|
756 # Return a string containg data for TopologicalAtomTripletsFingerprints object...
|
|
757 #
|
|
758 sub StringifyTopologicalAtomTripletsFingerprints {
|
|
759 my($This) = @_;
|
|
760 my($FingerprintsString);
|
|
761
|
|
762 # Type of fingerprint...
|
|
763 $FingerprintsString = "Fingerprint type: $This->{Type}; AtomIdentifierType: $This->{AtomIdentifierType}";
|
|
764
|
|
765 # Min and max distance...
|
|
766 $FingerprintsString .= "; MinDistance: $This->{MinDistance}; MaxDistance: $This->{MaxDistance}; UseTriangleInequality: " . ($This->{UseTriangleInequality} ? "Yes" : "No");
|
|
767
|
|
768 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
|
|
769 my($AtomicInvariant, @AtomicInvariants, @AtomicInvariantsOrder, %AvailableAtomicInvariants);
|
|
770
|
|
771 @AtomicInvariantsOrder = AtomTypes::AtomicInvariantsAtomTypes::GetAtomicInvariantsOrder();
|
|
772 %AvailableAtomicInvariants = AtomTypes::AtomicInvariantsAtomTypes::GetAvailableAtomicInvariants();
|
|
773
|
|
774 for $AtomicInvariant (@AtomicInvariantsOrder) {
|
|
775 push @AtomicInvariants, "$AtomicInvariant: $AvailableAtomicInvariants{$AtomicInvariant}";
|
|
776 }
|
|
777
|
|
778 $FingerprintsString .= "; AtomicInvariantsToUse: <" . TextUtil::JoinWords(\@{$This->{AtomicInvariantsToUse}}, ", ", 0) . ">";
|
|
779 $FingerprintsString .= "; AtomicInvariantsOrder: <" . TextUtil::JoinWords(\@AtomicInvariantsOrder, ", ", 0) . ">";
|
|
780 $FingerprintsString .= "; AvailableAtomicInvariants: <" . TextUtil::JoinWords(\@AtomicInvariants, ", ", 0) . ">";
|
|
781 }
|
|
782 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
|
|
783 my($FunctionalClass, @FunctionalClasses, @FunctionalClassesOrder, %AvailableFunctionalClasses);
|
|
784
|
|
785 @FunctionalClassesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder();
|
|
786 %AvailableFunctionalClasses = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses();
|
|
787
|
|
788 for $FunctionalClass (@FunctionalClassesOrder) {
|
|
789 push @FunctionalClasses, "$FunctionalClass: $AvailableFunctionalClasses{$FunctionalClass}";
|
|
790 }
|
|
791
|
|
792 $FingerprintsString .= "; FunctionalClassesToUse: <" . TextUtil::JoinWords(\@{$This->{FunctionalClassesToUse}}, ", ", 0) . ">";
|
|
793 $FingerprintsString .= "; FunctionalClassesOrder: <" . TextUtil::JoinWords(\@FunctionalClassesOrder, ", ", 0) . ">";
|
|
794 $FingerprintsString .= "; AvailableFunctionalClasses: <" . TextUtil::JoinWords(\@FunctionalClasses, ", ", 0) . ">";
|
|
795 }
|
|
796
|
|
797 # Total number of atom triplets...
|
|
798 $FingerprintsString .= "; NumOfAtomTriplets: " . $This->{FingerprintsVector}->GetNumOfValues();
|
|
799
|
|
800 # FingerprintsVector...
|
|
801 $FingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >";
|
|
802
|
|
803 return $FingerprintsString;
|
|
804 }
|
|
805
|
|
806 1;
|
|
807
|
|
808 __END__
|
|
809
|
|
810 =head1 NAME
|
|
811
|
|
812 TopologicalAtomTripletsFingerprints
|
|
813
|
|
814 =head1 SYNOPSIS
|
|
815
|
|
816 use Fingerprints::TopologicalAtomTripletsFingerprints;
|
|
817
|
|
818 use Fingerprints::TopologicalAtomTripletsFingerprints qw(:all);
|
|
819
|
|
820 =head1 DESCRIPTION
|
|
821
|
|
822 B<TopologicalAtomTripletsFingerprints> [ Ref 57, Ref 59, Ref 72 ] class provides the following methods:
|
|
823
|
|
824 new, GenerateFingerprints, GetAtomTripletIDs, GetDescription,
|
|
825 SetAtomIdentifierType, SetAtomicInvariantsToUse, SetFunctionalClassesToUse,
|
|
826 SetMaxDistance, SetMinDistance, StringifyTopologicalAtomTripletsFingerprints
|
|
827
|
|
828 B<TopologicalAtomTripletsFingerprints> is derived from B<Fingerprints> class which in turn
|
|
829 is derived from B<ObjectProperty> base class that provides methods not explicitly defined
|
|
830 in B<TopologicalAtomTripletsFingerprints>, B<Fingerprints> or B<ObjectProperty> classes using Perl's
|
|
831 AUTOLOAD functionality. These methods are generated on-the-fly for a specified object property:
|
|
832
|
|
833 Set<PropertyName>(<PropertyValue>);
|
|
834 $PropertyValue = Get<PropertyName>();
|
|
835 Delete<PropertyName>();
|
|
836
|
|
837 The current release of MayaChemTools supports generation of B<TopologicalAtomTripletsFingerprints>
|
|
838 corresponding to following B<AtomtomIdentifierTypes>:
|
|
839
|
|
840 AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes,
|
|
841 FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes,
|
|
842 SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes
|
|
843
|
|
844 Based on the values specified for B<AtomIdentifierType> along with other specified
|
|
845 parameters such as B<AtomicInvariantsToUse> and B<FunctionalClassesToUse>, initial
|
|
846 atom types are assigned to all non-hydrogen atoms in a molecule. Using the distance
|
|
847 matrix for the molecule and initial atom types assigned to non-hydrogen atoms, all unique atom
|
|
848 triplets within B<MinDistance> and B<MaxDistance> are identified and counted. An atom triplet
|
|
849 identifier is generated for each unique atom triplet; the format of atom triplet identifier is:
|
|
850
|
|
851 <ATx>-Dyz-<ATy>-Dxz-<ATz>-Dxy
|
|
852
|
|
853 ATx, ATy, ATz: Atom types assigned to atom x, atom y, and atom z
|
|
854 Dxy: Distance between atom x and atom y
|
|
855 Dxz: Distance between atom x and atom z
|
|
856 Dyz: Distance between atom y and atom z
|
|
857
|
|
858 where <AT1>-D23 <= <AT2>-D13 <= <AT3>-D12
|
|
859
|
|
860 The atom triplet identifiers for all unique atom triplets corresponding to non-hydrogen atoms constitute
|
|
861 topological atom triplets fingerprints of the molecule.
|
|
862
|
|
863 The current release of MayaChemTools generates the following types of topological atom triplets
|
|
864 fingerprints vector strings:
|
|
865
|
|
866 FingerprintsVector;TopologicalAtomTriplets:AtomicInvariantsAtomTypes:M
|
|
867 inDistance1:MaxDistance10;3096;NumericalValues;IDsAndValuesString;C.X1
|
|
868 .BO1.H3-D1-C.X1.BO1.H3-D1-C.X3.BO3.H1-D2 C.X1.BO1.H3-D1-C.X2.BO2.H2-D1
|
|
869 0-C.X3.BO4-D9 C.X1.BO1.H3-D1-C.X2.BO2.H2-D3-N.X3.BO3-D4 C.X1.BO1.H3-D1
|
|
870 -C.X2.BO2.H2-D4-C.X2.BO2.H2-D5 C.X1.BO1.H3-D1-C.X2.BO2.H2-D6-C.X3....;
|
|
871 1 2 2 2 2 2 2 2 8 8 4 8 4 4 2 2 2 2 4 2 2 2 4 2 2 2 2 1 2 2 4 4 4 2 2
|
|
872 2 4 4 4 8 4 4 2 4 4 4 2 4 4 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 8...
|
|
873
|
|
874 FingerprintsVector;TopologicalAtomTriplets:AtomicInvariantsAtomTypes:M
|
|
875 inDistance1:MaxDistance10;3096;NumericalValues;IDsAndValuesPairsString
|
|
876 ;C.X1.BO1.H3-D1-C.X1.BO1.H3-D1-C.X3.BO3.H1-D2 1 C.X1.BO1.H3-D1-C.X2.BO
|
|
877 2.H2-D10-C.X3.BO4-D9 2 C.X1.BO1.H3-D1-C.X2.BO2.H2-D3-N.X3.BO3-D4 2 C.X
|
|
878 1.BO1.H3-D1-C.X2.BO2.H2-D4-C.X2.BO2.H2-D5 2 C.X1.BO1.H3-D1-C.X2.BO2.H2
|
|
879 -D6-C.X3.BO3.H1-D5 2 C.X1.BO1.H3-D1-C.X2.BO2.H2-D6-C.X3.BO3.H1-D7 2...
|
|
880
|
|
881 FingerprintsVector;TopologicalAtomTriplets:DREIDINGAtomTypes:MinDistan
|
|
882 ce1:MaxDistance10;2377;NumericalValues;IDsAndValuesString;C_2-D1-C_2-D
|
|
883 9-C_3-D10 C_2-D1-C_2-D9-C_R-D10 C_2-D1-C_3-D1-C_3-D2 C_2-D1-C_3-D10-C_
|
|
884 3-D9 C_2-D1-C_3-D2-C_3-D3 C_2-D1-C_3-D2-C_R-D3 C_2-D1-C_3-D3-C_3-D4 C_
|
|
885 2-D1-C_3-D3-N_R-D4 C_2-D1-C_3-D3-O_3-D2 C_2-D1-C_3-D4-C_3-D5 C_2-D...;
|
|
886 1 1 1 2 1 1 3 1 1 2 2 1 1 1 1 1 1 1 1 2 1 3 4 5 1 1 6 4 2 2 3 1 1 1 2
|
|
887 2 1 2 1 1 2 2 2 1 2 1 2 1 1 3 3 2 6 4 2 1 1 1 2 2 1 1 1 1 1 1 1 1 1...
|
|
888
|
|
889 FingerprintsVector;TopologicalAtomTriplets:EStateAtomTypes:MinDistance
|
|
890 1:MaxDistance10;3298;NumericalValues;IDsAndValuesString;aaCH-D1-aaCH-D
|
|
891 1-aaCH-D2 aaCH-D1-aaCH-D1-aasC-D2 aaCH-D1-aaCH-D10-aaCH-D9 aaCH-D1-aaC
|
|
892 H-D10-aasC-D9 aaCH-D1-aaCH-D2-aaCH-D3 aaCH-D1-aaCH-D2-aasC-D1 aaCH-D1-
|
|
893 aaCH-D2-aasC-D3 aaCH-D1-aaCH-D3-aasC-D2 aaCH-D1-aaCH-D4-aasC-D5 aa...;
|
|
894 6 4 24 4 16 8 8 4 8 8 8 12 10 14 4 16 24 4 12 2 2 4 1 10 2 2 15 2 2 2
|
|
895 2 2 2 14 4 2 2 2 2 1 2 10 2 2 4 1 2 4 8 3 3 3 4 6 4 2 2 3 3 1 1 1 2 1
|
|
896 2 2 4 2 3 2 1 2 4 5 3 2 2 1 2 4 3 2 8 12 6 2 2 4 4 7 1 4 2 4 2 2 2 ...
|
|
897
|
|
898 FingerprintsVector;TopologicalAtomTriplets:FunctionalClassAtomTypes:Mi
|
|
899 nDistance1:MaxDistance10;2182;NumericalValues;IDsAndValuesString;Ar-D1
|
|
900 -Ar-D1-Ar-D2 Ar-D1-Ar-D1-Ar.HBA-D2 Ar-D1-Ar-D10-Ar-D9 Ar-D1-Ar-D10-Hal
|
|
901 -D9 Ar-D1-Ar-D2-Ar-D2 Ar-D1-Ar-D2-Ar-D3 Ar-D1-Ar-D2-Ar.HBA-D1 Ar-D1-Ar
|
|
902 -D2-Ar.HBA-D2 Ar-D1-Ar-D2-Ar.HBA-D3 Ar-D1-Ar-D2-HBD-D1 Ar-D1-Ar-D2...;
|
|
903 27 1 32 2 2 63 3 2 1 2 1 2 3 1 1 40 3 1 2 2 2 2 4 2 2 47 4 2 2 1 2 1 5
|
|
904 2 2 51 4 3 1 3 1 9 1 1 50 3 3 4 1 9 50 2 2 3 3 5 45 1 1 1 2 1 2 2 3 3
|
|
905 4 4 3 2 1 1 3 4 5 5 3 1 2 3 2 3 5 7 2 7 3 7 1 1 2 2 2 2 3 1 4 3 1 2...
|
|
906
|
|
907 FingerprintsVector;TopologicalAtomTriplets:MMFF94AtomTypes:MinDistance
|
|
908 1:MaxDistance10;2966;NumericalValues;IDsAndValuesString;C5A-D1-C5A-D1-
|
|
909 N5-D2 C5A-D1-C5A-D2-C5B-D2 C5A-D1-C5A-D3-CB-D2 C5A-D1-C5A-D3-CR-D2 C5A
|
|
910 -D1-C5B-D1-C5B-D2 C5A-D1-C5B-D2-C=ON-D1 C5A-D1-C5B-D2-CB-D1 C5A-D1-C5B
|
|
911 -D3-C=ON-D2 C5A-D1-C5B-D3-CB-D2 C5A-D1-C=ON-D3-NC=O-D2 C5A-D1-C=ON-D3-
|
|
912 O=CN-D2 C5A-D1-C=ON-D4-NC=O-D3 C5A-D1-C=ON-D4-O=CN-D3 C5A-D1-CB-D1-...
|
|
913
|
|
914 FingerprintsVector;TopologicalAtomTriplets:SLogPAtomTypes:MinDistance1
|
|
915 :MaxDistance10;3710;NumericalValues;IDsAndValuesString;C1-D1-C1-D1-C11
|
|
916 -D2 C1-D1-C1-D1-CS-D2 C1-D1-C1-D10-C5-D9 C1-D1-C1-D3-C10-D2 C1-D1-C1-D
|
|
917 3-C5-D2 C1-D1-C1-D3-CS-D2 C1-D1-C1-D3-CS-D4 C1-D1-C1-D4-C10-D5 C1-D1-C
|
|
918 1-D4-C11-D5 C1-D1-C1-D5-C10-D4 C1-D1-C1-D5-C5-D4 C1-D1-C1-D6-C11-D7 C1
|
|
919 -D1-C1-D6-CS-D5 C1-D1-C1-D6-CS-D7 C1-D1-C1-D8-C11-D9 C1-D1-C1-D8-CS...
|
|
920
|
|
921 FingerprintsVector;TopologicalAtomTriplets:SYBYLAtomTypes:MinDistance1
|
|
922 :MaxDistance10;2332;NumericalValues;IDsAndValuesString;C.2-D1-C.2-D9-C
|
|
923 .3-D10 C.2-D1-C.2-D9-C.ar-D10 C.2-D1-C.3-D1-C.3-D2 C.2-D1-C.3-D10-C.3-
|
|
924 D9 C.2-D1-C.3-D2-C.3-D3 C.2-D1-C.3-D2-C.ar-D3 C.2-D1-C.3-D3-C.3-D4 C.2
|
|
925 -D1-C.3-D3-N.ar-D4 C.2-D1-C.3-D3-O.3-D2 C.2-D1-C.3-D4-C.3-D5 C.2-D1-C.
|
|
926 3-D5-C.3-D6 C.2-D1-C.3-D5-O.3-D4 C.2-D1-C.3-D6-C.3-D7 C.2-D1-C.3-D7...
|
|
927
|
|
928 FingerprintsVector;TopologicalAtomTriplets:TPSAAtomTypes:MinDistance1:
|
|
929 MaxDistance10;1007;NumericalValues;IDsAndValuesString;N21-D1-N7-D3-Non
|
|
930 e-D4 N21-D1-N7-D5-None-D4 N21-D1-None-D1-None-D2 N21-D1-None-D2-None-D
|
|
931 2 N21-D1-None-D2-None-D3 N21-D1-None-D3-None-D4 N21-D1-None-D4-None-D5
|
|
932 N21-D1-None-D4-O3-D3 N21-D1-None-D4-O4-D3 N21-D1-None-D5-None-D6 N21-
|
|
933 D1-None-D6-None-D7 N21-D1-None-D6-O4-D5 N21-D1-None-D7-None-D8 N21-...
|
|
934
|
|
935 FingerprintsVector;TopologicalAtomTriplets:UFFAtomTypes:MinDistance1:M
|
|
936 axDistance10;2377;NumericalValues;IDsAndValuesString;C_2-D1-C_2-D9-C_3
|
|
937 -D10 C_2-D1-C_2-D9-C_R-D10 C_2-D1-C_3-D1-C_3-D2 C_2-D1-C_3-D10-C_3-D9
|
|
938 C_2-D1-C_3-D2-C_3-D3 C_2-D1-C_3-D2-C_R-D3 C_2-D1-C_3-D3-C_3-D4 C_2-D1-
|
|
939 C_3-D3-N_R-D4 C_2-D1-C_3-D3-O_3-D2 C_2-D1-C_3-D4-C_3-D5 C_2-D1-C_3-D5-
|
|
940 C_3-D6 C_2-D1-C_3-D5-O_3-D4 C_2-D1-C_3-D6-C_3-D7 C_2-D1-C_3-D7-C_3-...
|
|
941
|
|
942 =head2 METHODS
|
|
943
|
|
944 =over 4
|
|
945
|
|
946 =item B<new>
|
|
947
|
|
948 $NewTopologicalAtomTripletsFingerprints = new TopologicalAtomTripletsFingerprints(
|
|
949 %NamesAndValues);
|
|
950
|
|
951 Using specified I<TopologicalAtomTripletsFingerprints> property names and values hash, B<new>
|
|
952 method creates a new object and returns a reference to newly created B<TopologicalAtomTripletsFingerprints>
|
|
953 object. By default, the following properties are initialized:
|
|
954
|
|
955 Molecule = ''
|
|
956 Type = 'TopologicalAtomTriplets'
|
|
957 MinDistance = 1
|
|
958 MaxDistance = 10
|
|
959 UseTriangleInequality = 1
|
|
960 AtomIdentifierType = ''
|
|
961 AtomicInvariantsToUse = ['AS', 'X', 'BO', 'H', 'FC']
|
|
962 FunctionalClassesToUse = ['HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal']
|
|
963
|
|
964 Examples:
|
|
965
|
|
966 $TopologicalAtomTripletsFingerprints = new TopologicalAtomTripletsFingerprints(
|
|
967 'Molecule' => $Molecule,
|
|
968 'AtomIdentifierType' =>
|
|
969 'AtomicInvariantsAtomTypes');
|
|
970
|
|
971 $TopologicalAtomTripletsFingerprints = new TopologicalAtomTripletsFingerprints(
|
|
972 'Molecule' => $Molecule,
|
|
973 'MinDistance' => 1,
|
|
974 'MaxDistance' => 10,
|
|
975 'AtomIdentifierType' =>
|
|
976 'AtomicInvariantsAtomTypes',
|
|
977 'AtomicInvariantsToUse' =>
|
|
978 ['AS', 'X', 'BO', 'H', 'FC'] );
|
|
979
|
|
980 $TopologicalAtomTripletsFingerprints = new TopologicalAtomTripletsFingerprints(
|
|
981 'Molecule' => $Molecule,
|
|
982 'AtomIdentifierType' =>
|
|
983 'DREIDINGAtomTypes');
|
|
984
|
|
985 $TopologicalAtomTripletsFingerprints = new TopologicalAtomTripletsFingerprints(
|
|
986 'Molecule' => $Molecule,
|
|
987 'AtomIdentifierType' =>
|
|
988 'MMFF94AtomTypes');
|
|
989
|
|
990 $TopologicalAtomTripletsFingerprints = new TopologicalAtomTripletsFingerprints(
|
|
991 'Molecule' => $Molecule,
|
|
992 'AtomIdentifierType' =>
|
|
993 'TPSAAtomTypes');
|
|
994
|
|
995 $TopologicalAtomTripletsFingerprints = new TopologicalAtomTripletsFingerprints(
|
|
996 'Molecule' => $Molecule,
|
|
997 'MinDistance' => 1,
|
|
998 'MaxDistance' => 10,
|
|
999 'AtomIdentifierType' =>
|
|
1000 'FunctionalClassAtomTypes',
|
|
1001 'FunctionalClassesToUse' =>
|
|
1002 ['HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal']);
|
|
1003
|
|
1004 $TopologicalAtomTripletsFingerprints->GenerateFingerprints();
|
|
1005 print "$TopologicalAtomTripletsFingerprints\n";
|
|
1006
|
|
1007 =item B<GetDescription>
|
|
1008
|
|
1009 $Return = $TopologicalAtomTripletsFingerprints->GetDescription();
|
|
1010
|
|
1011 Returns a string containing description of topological atom triplets fingerprints.
|
|
1012
|
|
1013 =item B<GenerateFingerprints>
|
|
1014
|
|
1015 $TopologicalAtomTripletsFingerprints->GenerateFingerprints();
|
|
1016
|
|
1017 Generates topological atom triplets fingerprints and returns I<TopologicalAtomTripletsFingerprints>.
|
|
1018
|
|
1019 =item B<GetAtomTripletIDs>
|
|
1020
|
|
1021 $AtomTripletIDsRef = $TopologicalAtomTripletsFingerprints->GetAtomTripletIDs();
|
|
1022 @AtomTripletIDs = $TopologicalAtomTripletsFingerprints->GetAtomTripletIDs();
|
|
1023
|
|
1024 Returns atom triplet IDs corresponding to atom triplets count values in topological atom triplets
|
|
1025 fingerprints vector as an array or reference to an array.
|
|
1026
|
|
1027 =item B<SetAtomIdentifierType>
|
|
1028
|
|
1029 $TopologicalAtomTripletsFingerprints->SetAtomIdentifierType($IdentifierType);
|
|
1030
|
|
1031 Sets atom I<IdentifierType> to use during atom triplets fingerprints generation and
|
|
1032 returns I<TopologicalAtomTripletsFingerprints>.
|
|
1033
|
|
1034 Possible values: I<AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes,
|
|
1035 FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes,
|
|
1036 TPSAAtomTypes, UFFAtomTypes>.
|
|
1037
|
|
1038 =item B<SetAtomicInvariantsToUse>
|
|
1039
|
|
1040 $TopologicalAtomTripletsFingerprints->SetAtomicInvariantsToUse($ValuesRef);
|
|
1041 $TopologicalAtomTripletsFingerprints->SetAtomicInvariantsToUse(@Values);
|
|
1042
|
|
1043 Sets atomic invariants to use during I<AtomicInvariantsAtomTypes> value of I<AtomIdentifierType>
|
|
1044 for topological atom triplets fingerprints generation and returns I<TopologicalAtomTripletsFingerprints>.
|
|
1045
|
|
1046 Possible values for atomic invariants are: I<AS, X, BO, LBO, SB, DB, TB,
|
|
1047 H, Ar, RA, FC, MN, SM>. Default value: I<AS,X,BO,H,FC>.
|
|
1048
|
|
1049 The atomic invariants abbreviations correspond to:
|
|
1050
|
|
1051 AS = Atom symbol corresponding to element symbol
|
|
1052
|
|
1053 X<n> = Number of non-hydrogen atom neighbors or heavy atoms
|
|
1054 BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms
|
|
1055 LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms
|
|
1056 SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms
|
|
1057 DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms
|
|
1058 TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms
|
|
1059 H<n> = Number of implicit and explicit hydrogens for atom
|
|
1060 Ar = Aromatic annotation indicating whether atom is aromatic
|
|
1061 RA = Ring atom annotation indicating whether atom is a ring
|
|
1062 FC<+n/-n> = Formal charge assigned to atom
|
|
1063 MN<n> = Mass number indicating isotope other than most abundant isotope
|
|
1064 SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or
|
|
1065 3 (triplet)
|
|
1066
|
|
1067 Atom type generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to:
|
|
1068
|
|
1069 AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n>
|
|
1070
|
|
1071 Except for AS which is a required atomic invariant in atom types, all other atomic invariants are
|
|
1072 optional. Atom type specification doesn't include atomic invariants with zero or undefined values.
|
|
1073
|
|
1074 In addition to usage of abbreviations for specifying atomic invariants, the following descriptive words
|
|
1075 are also allowed:
|
|
1076
|
|
1077 X : NumOfNonHydrogenAtomNeighbors or NumOfHeavyAtomNeighbors
|
|
1078 BO : SumOfBondOrdersToNonHydrogenAtoms or SumOfBondOrdersToHeavyAtoms
|
|
1079 LBO : LargestBondOrderToNonHydrogenAtoms or LargestBondOrderToHeavyAtoms
|
|
1080 SB : NumOfSingleBondsToNonHydrogenAtoms or NumOfSingleBondsToHeavyAtoms
|
|
1081 DB : NumOfDoubleBondsToNonHydrogenAtoms or NumOfDoubleBondsToHeavyAtoms
|
|
1082 TB : NumOfTripleBondsToNonHydrogenAtoms or NumOfTripleBondsToHeavyAtoms
|
|
1083 H : NumOfImplicitAndExplicitHydrogens
|
|
1084 Ar : Aromatic
|
|
1085 RA : RingAtom
|
|
1086 FC : FormalCharge
|
|
1087 MN : MassNumber
|
|
1088 SM : SpinMultiplicity
|
|
1089
|
|
1090 I<AtomTypes::AtomicInvariantsAtomTypes> module is used to assign atomic invariant
|
|
1091 atom types.
|
|
1092
|
|
1093 =item B<SetFunctionalClassesToUse>
|
|
1094
|
|
1095 $TopologicalTripletsFingerprints->SetFunctionalClassesToUse($ValuesRef);
|
|
1096 $TopologicalTripletsFingerprints->SetFunctionalClassesToUse(@Values);
|
|
1097
|
|
1098 Sets functional classes invariants to use during I<FunctionalClassAtomTypes> value of I<AtomIdentifierType>
|
|
1099 for topological atom triplets fingerprints generation and returns I<TopologicalAtomTripletsFingerprints>.
|
|
1100
|
|
1101 Possible values for atom functional classes are: I<Ar, CA, H, HBA, HBD, Hal, NI, PI, RA>.
|
|
1102 Default value [ Ref 24 ]: I<HBD,HBA,PI,NI,Ar,Hal>.
|
|
1103
|
|
1104 The functional class abbreviations correspond to:
|
|
1105
|
|
1106 HBD: HydrogenBondDonor
|
|
1107 HBA: HydrogenBondAcceptor
|
|
1108 PI : PositivelyIonizable
|
|
1109 NI : NegativelyIonizable
|
|
1110 Ar : Aromatic
|
|
1111 Hal : Halogen
|
|
1112 H : Hydrophobic
|
|
1113 RA : RingAtom
|
|
1114 CA : ChainAtom
|
|
1115
|
|
1116 Functional class atom type specification for an atom corresponds to:
|
|
1117
|
|
1118 Ar.CA.H.HBA.HBD.Hal.NI.PI.RA or None
|
|
1119
|
|
1120 I<AtomTypes::FunctionalClassAtomTypes> module is used to assign functional class atom
|
|
1121 types. It uses following definitions [ Ref 60-61, Ref 65-66 ]:
|
|
1122
|
|
1123 HydrogenBondDonor: NH, NH2, OH
|
|
1124 HydrogenBondAcceptor: N[!H], O
|
|
1125 PositivelyIonizable: +, NH2
|
|
1126 NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH
|
|
1127
|
|
1128 =item B<SetMaxDistance>
|
|
1129
|
|
1130 $TopologicalAtomTripletsFingerprints->SetMaxDistance($Distance);
|
|
1131
|
|
1132 Sets maximum distance to use during topological atom triplets fingerprints generation and
|
|
1133 returns I<TopologicalAtomTripletsFingerprints>.
|
|
1134
|
|
1135 =item B<SetMinDistance>
|
|
1136
|
|
1137 $TopologicalAtomTripletsFingerprints->SetMinDistance($Distance);
|
|
1138
|
|
1139 Sets minimum distance to use during topological atom triplets fingerprints generation and
|
|
1140 returns I<TopologicalAtomTripletsFingerprints>.
|
|
1141
|
|
1142 =item B<StringifyTopologicalAtomTripletsFingerprints>
|
|
1143
|
|
1144 $String = $TopologicalAtomTripletsFingerprints->
|
|
1145 StringifyTopologicalAtomTripletsFingerprints();
|
|
1146
|
|
1147 Returns a string containing information about I<TopologicalAtomTripletsFingerprints> object.
|
|
1148
|
|
1149 =back
|
|
1150
|
|
1151 =head1 AUTHOR
|
|
1152
|
|
1153 Manish Sud <msud@san.rr.com>
|
|
1154
|
|
1155 =head1 SEE ALSO
|
|
1156
|
|
1157 Fingerprints.pm, FingerprintsStringUtil.pm, AtomNeighborhoodsFingerprints.pm,
|
|
1158 AtomTypesFingerprints.pm, EStateIndiciesFingerprints.pm, ExtendedConnectivityFingerprints.pm,
|
|
1159 MACCSKeys.pm, PathLengthFingerprints.pm, TopologicalAtomPairsFingerprints.pm,
|
|
1160 TopologicalAtomTorsionsFingerprints.pm, TopologicalPharmacophoreAtomPairsFingerprints.pm,
|
|
1161 TopologicalPharmacophoreAtomTripletsFingerprints.pm
|
|
1162
|
|
1163 =head1 COPYRIGHT
|
|
1164
|
|
1165 Copyright (C) 2015 Manish Sud. All rights reserved.
|
|
1166
|
|
1167 This file is part of MayaChemTools.
|
|
1168
|
|
1169 MayaChemTools is free software; you can redistribute it and/or modify it under
|
|
1170 the terms of the GNU Lesser General Public License as published by the Free
|
|
1171 Software Foundation; either version 3 of the License, or (at your option)
|
|
1172 any later version.
|
|
1173
|
|
1174 =cut
|