Mercurial > repos > deepakjadmin > mayatool3_test2
comparison lib/Fingerprints/TopologicalAtomTripletsFingerprints.pm @ 0:4816e4a8ae95 draft default tip
Uploaded
author | deepakjadmin |
---|---|
date | Wed, 20 Jan 2016 09:23:18 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4816e4a8ae95 |
---|---|
1 package Fingerprints::TopologicalAtomTripletsFingerprints; | |
2 # | |
3 # $RCSfile: TopologicalAtomTripletsFingerprints.pm,v $ | |
4 # $Date: 2015/02/28 20:48:54 $ | |
5 # $Revision: 1.15 $ | |
6 # | |
7 # Author: Manish Sud <msud@san.rr.com> | |
8 # | |
9 # Copyright (C) 2015 Manish Sud. All rights reserved. | |
10 # | |
11 # This file is part of MayaChemTools. | |
12 # | |
13 # MayaChemTools is free software; you can redistribute it and/or modify it under | |
14 # the terms of the GNU Lesser General Public License as published by the Free | |
15 # Software Foundation; either version 3 of the License, or (at your option) any | |
16 # later version. | |
17 # | |
18 # MayaChemTools is distributed in the hope that it will be useful, but without | |
19 # any warranty; without even the implied warranty of merchantability of fitness | |
20 # for a particular purpose. See the GNU Lesser General Public License for more | |
21 # details. | |
22 # | |
23 # You should have received a copy of the GNU Lesser General Public License | |
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or | |
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, | |
26 # Boston, MA, 02111-1307, USA. | |
27 # | |
28 | |
29 use strict; | |
30 use Carp; | |
31 use Exporter; | |
32 use Fingerprints::Fingerprints; | |
33 use TextUtil (); | |
34 use Molecule; | |
35 use AtomTypes::AtomicInvariantsAtomTypes; | |
36 use AtomTypes::DREIDINGAtomTypes; | |
37 use AtomTypes::EStateAtomTypes; | |
38 use AtomTypes::FunctionalClassAtomTypes; | |
39 use AtomTypes::MMFF94AtomTypes; | |
40 use AtomTypes::SLogPAtomTypes; | |
41 use AtomTypes::SYBYLAtomTypes; | |
42 use AtomTypes::TPSAAtomTypes; | |
43 use AtomTypes::UFFAtomTypes; | |
44 | |
45 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); | |
46 | |
47 @ISA = qw(Fingerprints::Fingerprints Exporter); | |
48 @EXPORT = qw(); | |
49 @EXPORT_OK = qw(); | |
50 | |
51 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); | |
52 | |
53 # Setup class variables... | |
54 my($ClassName); | |
55 _InitializeClass(); | |
56 | |
57 # Overload Perl functions... | |
58 use overload '""' => 'StringifyTopologicalAtomTripletsFingerprints'; | |
59 | |
60 # Class constructor... | |
61 sub new { | |
62 my($Class, %NamesAndValues) = @_; | |
63 | |
64 # Initialize object... | |
65 my $This = $Class->SUPER::new(); | |
66 bless $This, ref($Class) || $Class; | |
67 $This->_InitializeTopologicalAtomTripletsFingerprints(); | |
68 | |
69 $This->_InitializeTopologicalAtomTripletsFingerprintsProperties(%NamesAndValues); | |
70 | |
71 return $This; | |
72 } | |
73 | |
74 # Initialize object data... | |
75 # | |
76 sub _InitializeTopologicalAtomTripletsFingerprints { | |
77 my($This) = @_; | |
78 | |
79 # Type of fingerprint... | |
80 $This->{Type} = 'TopologicalAtomTriplets'; | |
81 | |
82 # Type of vector... | |
83 $This->{VectorType} = 'FingerprintsVector'; | |
84 | |
85 # Type of FingerprintsVector... | |
86 $This->{FingerprintsVectorType} = 'NumericalValues'; | |
87 | |
88 # Minimum and maximum bond distance between atom paris... | |
89 $This->{MinDistance} = 1; | |
90 $This->{MaxDistance} = 10; | |
91 | |
92 # Determines whether to apply triangle inequality to distance triplets... | |
93 # | |
94 $This->{UseTriangleInequality} = 0; | |
95 | |
96 # Atom identifier type to use for atom IDs in atom triplets... | |
97 # | |
98 # Currently supported values are: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, | |
99 # EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, | |
100 # SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes | |
101 # | |
102 $This->{AtomIdentifierType} = ''; | |
103 | |
104 # Atom types assigned to each heavy atom... | |
105 # | |
106 %{$This->{AssignedAtomTypes}} = (); | |
107 | |
108 # All atom triplets between minimum and maximum distance... | |
109 # | |
110 @{$This->{AtomTripletsIDs}} = (); | |
111 %{$This->{AtomTripletsCount}} = (); | |
112 } | |
113 | |
114 # Initialize class ... | |
115 sub _InitializeClass { | |
116 #Class name... | |
117 $ClassName = __PACKAGE__; | |
118 } | |
119 | |
120 # Initialize object properties.... | |
121 sub _InitializeTopologicalAtomTripletsFingerprintsProperties { | |
122 my($This, %NamesAndValues) = @_; | |
123 | |
124 my($Name, $Value, $MethodName); | |
125 while (($Name, $Value) = each %NamesAndValues) { | |
126 $MethodName = "Set${Name}"; | |
127 $This->$MethodName($Value); | |
128 } | |
129 | |
130 # Make sure molecule object was specified... | |
131 if (!exists $NamesAndValues{Molecule}) { | |
132 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule..."; | |
133 } | |
134 if (!exists $NamesAndValues{AtomIdentifierType}) { | |
135 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying AtomIdentifierType..."; | |
136 } | |
137 | |
138 $This->_InitializeFingerprintsVector(); | |
139 | |
140 return $This; | |
141 } | |
142 | |
143 # Set minimum distance for atom triplets... | |
144 # | |
145 sub SetMinDistance { | |
146 my($This, $Value) = @_; | |
147 | |
148 if (!TextUtil::IsPositiveInteger($Value)) { | |
149 croak "Error: ${ClassName}->SetMinDistance: MinDistance value, $Value, is not valid: It must be a positive integer..."; | |
150 } | |
151 $This->{MinDistance} = $Value; | |
152 | |
153 return $This; | |
154 } | |
155 | |
156 # Set maximum distance for atom triplets... | |
157 # | |
158 sub SetMaxDistance { | |
159 my($This, $Value) = @_; | |
160 | |
161 if (!TextUtil::IsPositiveInteger($Value)) { | |
162 croak "Error: ${ClassName}->SetMaxDistance: MaxDistance value, $Value, is not valid: It must be a positive integer..."; | |
163 } | |
164 $This->{MaxDistance} = $Value; | |
165 | |
166 return $This; | |
167 } | |
168 | |
169 # Set atom identifier type.. | |
170 # | |
171 sub SetAtomIdentifierType { | |
172 my($This, $IdentifierType) = @_; | |
173 | |
174 if ($IdentifierType !~ /^(AtomicInvariantsAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|FunctionalClassAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { | |
175 croak "Error: ${ClassName}->SetAtomIdentifierType: Specified value, $IdentifierType, for AtomIdentifierType is not vaild. Supported types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, and UFFAtomTypes."; | |
176 } | |
177 | |
178 if ($This->{AtomIdentifierType}) { | |
179 croak "Error: ${ClassName}->SeAtomIdentifierType: Can't change intial atom identifier type: It's already set..."; | |
180 } | |
181 | |
182 $This->{AtomIdentifierType} = $IdentifierType; | |
183 | |
184 # Initialize atom identifier type information... | |
185 $This->_InitializeAtomIdentifierTypeInformation(); | |
186 | |
187 return $This; | |
188 } | |
189 | |
190 # Generate fingerprints description... | |
191 # | |
192 sub GetDescription { | |
193 my($This) = @_; | |
194 | |
195 # Is description explicity set? | |
196 if (exists $This->{Description}) { | |
197 return $This->{Description}; | |
198 } | |
199 | |
200 # Generate fingerprints description... | |
201 | |
202 return "$This->{Type}:$This->{AtomIdentifierType}:MinDistance$This->{MinDistance}:MaxDistance$This->{MaxDistance}"; | |
203 } | |
204 | |
205 # Generate topological atom triplets fingerprints... | |
206 # | |
207 # Let: | |
208 # | |
209 # AT = Any of the supported atom types | |
210 # | |
211 # ATx = Atom type for atom x | |
212 # ATy = Atom type for atom y | |
213 # ATz = Atom type for atom z | |
214 # | |
215 # Dxy = Distance between Px and Py | |
216 # Dxz = Distance between Px and Pz | |
217 # Dyz = Distance between Py and Pz | |
218 # | |
219 # Then: | |
220 # | |
221 # ATx-Dyz-ATy-Dxz-ATz-Dxy = Atom triplet ID for atom types ATx, ATy and Atz | |
222 # | |
223 # Methodology: | |
224 # . Generate a distance matrix. | |
225 # . Assign atom types to all the atoms. | |
226 # . Using distance matrix and atom types, count occurrence of unique atom triplets | |
227 # within specified distance range along with optional trinagle inequality | |
228 # | |
229 # Notes: | |
230 # . Hydrogen atoms are ignored during the fingerprint generation. | |
231 # . For a molecule containing N atoms with all different atom type, the total number of | |
232 # possible unique atom triplets without applying triangle inquality check corresponds to: | |
233 # | |
234 # Factorial( N ) / ( Factorial( N - 3 ) * Factorial (3) ) | |
235 # | |
236 # However, due to similar atom types assigned to atoms in a molecule for a specific atom | |
237 # typing methodology and specified distance range used during fingerprints generation, the | |
238 # actual number of unique triplets is usually smaller than the theoretical limit. | |
239 # | |
240 sub GenerateFingerprints { | |
241 my($This) = @_; | |
242 | |
243 if ($This->{MinDistance} > $This->{MaxDistance}) { | |
244 croak "Error: ${ClassName}->GenerateTopologicalAtomTripletsFingerprints: No fingerpritns generated: MinDistance, $This->{MinDistance}, must be <= MaxDistance, $This->{MaxDistance}..."; | |
245 } | |
246 | |
247 # Cache appropriate molecule data... | |
248 $This->_SetupMoleculeDataCache(); | |
249 | |
250 # Generate distance matrix... | |
251 if (!$This->_SetupDistanceMatrix()) { | |
252 carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't generate distance matrix..."; | |
253 return $This; | |
254 } | |
255 | |
256 # Assign atom types to all heavy atoms... | |
257 if (!$This->_AssignAtomTypes()) { | |
258 carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't assign valid $This->{AtomIdentifierType} to all atoms..."; | |
259 return $This; | |
260 } | |
261 | |
262 # Intialize values of toplogical atom triplets... | |
263 $This->_InitializeToplogicalAtomTriplets(); | |
264 | |
265 # Count atom triplets... | |
266 $This->_GenerateAndCountAtomTriplets(); | |
267 | |
268 # Set final fingerprints... | |
269 $This->_SetFinalFingerprints(); | |
270 | |
271 # Clear cached molecule data... | |
272 $This->_ClearMoleculeDataCache(); | |
273 | |
274 return $This; | |
275 } | |
276 | |
277 # Setup distance matrix... | |
278 # | |
279 sub _SetupDistanceMatrix { | |
280 my($This) = @_; | |
281 | |
282 $This->{DistanceMatrix} = $This->GetMolecule()->GetDistanceMatrix(); | |
283 | |
284 if (!$This->{DistanceMatrix}) { | |
285 return undef; | |
286 } | |
287 | |
288 return $This; | |
289 } | |
290 | |
291 # Assign appropriate atom types to all heavy atoms... | |
292 # | |
293 sub _AssignAtomTypes { | |
294 my($This) = @_; | |
295 my($SpecifiedAtomTypes, $Atom, $AtomID, $IgnoreHydrogens); | |
296 | |
297 %{$This->{AssignedAtomTypes}} = (); | |
298 $IgnoreHydrogens = 1; | |
299 | |
300 $SpecifiedAtomTypes = undef; | |
301 | |
302 IDENTIFIERTYPE: { | |
303 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { | |
304 $SpecifiedAtomTypes = new AtomTypes::AtomicInvariantsAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'AtomicInvariantsToUse' => $This->{AtomicInvariantsToUse}); | |
305 last IDENTIFIERTYPE; | |
306 } | |
307 | |
308 if ($This->{AtomIdentifierType} =~ /^DREIDINGAtomTypes$/i) { | |
309 $SpecifiedAtomTypes = new AtomTypes::DREIDINGAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
310 last IDENTIFIERTYPE; | |
311 } | |
312 | |
313 if ($This->{AtomIdentifierType} =~ /^EStateAtomTypes$/i) { | |
314 $SpecifiedAtomTypes = new AtomTypes::EStateAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
315 last IDENTIFIERTYPE; | |
316 } | |
317 | |
318 if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { | |
319 $SpecifiedAtomTypes = new AtomTypes::FunctionalClassAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'FunctionalClassesToUse' => $This->{FunctionalClassesToUse}); | |
320 last IDENTIFIERTYPE; | |
321 } | |
322 | |
323 if ($This->{AtomIdentifierType} =~ /^MMFF94AtomTypes$/i) { | |
324 $SpecifiedAtomTypes = new AtomTypes::MMFF94AtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
325 last IDENTIFIERTYPE; | |
326 } | |
327 | |
328 if ($This->{AtomIdentifierType} =~ /^SLogPAtomTypes$/i) { | |
329 $SpecifiedAtomTypes = new AtomTypes::SLogPAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
330 last IDENTIFIERTYPE; | |
331 } | |
332 if ($This->{AtomIdentifierType} =~ /^SYBYLAtomTypes$/i) { | |
333 $SpecifiedAtomTypes = new AtomTypes::SYBYLAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
334 last IDENTIFIERTYPE; | |
335 } | |
336 | |
337 if ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) { | |
338 $SpecifiedAtomTypes = new AtomTypes::TPSAAtomTypes('Molecule' => $This->{Molecule}, 'IgnorePhosphorus' => 0, 'IgnoreSulfur' => 0); | |
339 last IDENTIFIERTYPE; | |
340 } | |
341 | |
342 if ($This->{AtomIdentifierType} =~ /^UFFAtomTypes$/i) { | |
343 $SpecifiedAtomTypes = new AtomTypes::UFFAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
344 last IDENTIFIERTYPE; | |
345 } | |
346 | |
347 croak "Error: ${ClassName}->_AssignAtomTypes: Unknown atom indentifier type $This->{AtomIdentifierType}..."; | |
348 } | |
349 | |
350 # Assign atom types... | |
351 $SpecifiedAtomTypes->AssignAtomTypes(); | |
352 | |
353 # Make sure atom types assignment is successful... | |
354 if (!$SpecifiedAtomTypes->IsAtomTypesAssignmentSuccessful()) { | |
355 return undef; | |
356 } | |
357 | |
358 # Collect assigned atom types... | |
359 ATOM: for $Atom (@{$This->{Atoms}}) { | |
360 if ($Atom->IsHydrogen()) { | |
361 next ATOM; | |
362 } | |
363 $AtomID = $Atom->GetID(); | |
364 $This->{AssignedAtomTypes}{$AtomID} = $SpecifiedAtomTypes->GetAtomType($Atom); | |
365 } | |
366 | |
367 return $This; | |
368 } | |
369 | |
370 # Initialize topological atom triplets between specified distance range... | |
371 # | |
372 sub _InitializeToplogicalAtomTriplets { | |
373 my($This) = @_; | |
374 my($Distance); | |
375 | |
376 @{$This->{AtomTripletsIDs}} = (); | |
377 %{$This->{AtomTripletsCount}} = (); | |
378 | |
379 return $This; | |
380 } | |
381 | |
382 # Count atom triplets between mininum and maximum distance at each | |
383 # distance using distance matrix and atom types assiged to each heavy | |
384 # atom. | |
385 # | |
386 sub _GenerateAndCountAtomTriplets { | |
387 my($This) = @_; | |
388 my($NumOfAtoms, $AtomIndex1, $AtomIndex2, $AtomIndex3, $AtomID1, $AtomID2, $AtomID3, $AtomType1, $AtomType2, $AtomType3, $Distance12, $Distance13, $Distance23, $SkipIndexCheck, $DistanceMatrix, $AtomTripletID); | |
389 | |
390 $NumOfAtoms = @{$This->{Atoms}}; | |
391 $DistanceMatrix = $This->{DistanceMatrix}; | |
392 $SkipIndexCheck = 0; | |
393 | |
394 ATOMINDEX1: for $AtomIndex1 (0 .. ($NumOfAtoms - 1)) { | |
395 $AtomID1 = $This->{AtomIndexToID}{$AtomIndex1}; | |
396 if (!exists($This->{AssignedAtomTypes}{$AtomID1})) { | |
397 next ATOMINDEX1; | |
398 } | |
399 $AtomType1 = $This->{AssignedAtomTypes}{$AtomID1}; | |
400 | |
401 ATOMINDEX2: for $AtomIndex2 (($AtomIndex1 + 1) .. ($NumOfAtoms - 1)) { | |
402 $AtomID2 = $This->{AtomIndexToID}{$AtomIndex2}; | |
403 if (!exists($This->{AssignedAtomTypes}{$AtomID2})) { | |
404 next ATOMINDEX2; | |
405 } | |
406 $AtomType2 = $This->{AssignedAtomTypes}{$AtomID2}; | |
407 | |
408 $Distance12 = $DistanceMatrix->GetValue($AtomIndex1, $AtomIndex2, $SkipIndexCheck); | |
409 if ($Distance12 < $This->{MinDistance} || $Distance12 > $This->{MaxDistance}) { | |
410 next ATOMINDEX2; | |
411 } | |
412 | |
413 ATOMINDEX3: for $AtomIndex3 (($AtomIndex2 + 1) .. ($NumOfAtoms - 1)) { | |
414 $AtomID3 = $This->{AtomIndexToID}{$AtomIndex3}; | |
415 if (!exists($This->{AssignedAtomTypes}{$AtomID3})) { | |
416 next ATOMINDEX3; | |
417 } | |
418 $AtomType3 = $This->{AssignedAtomTypes}{$AtomID3}; | |
419 | |
420 $Distance13 = $DistanceMatrix->GetValue($AtomIndex1, $AtomIndex3, $SkipIndexCheck); | |
421 $Distance23 = $DistanceMatrix->GetValue($AtomIndex2, $AtomIndex3, $SkipIndexCheck); | |
422 | |
423 if ($Distance13 < $This->{MinDistance} || $Distance13 > $This->{MaxDistance}) { | |
424 next ATOMINDEX3; | |
425 } | |
426 if ($Distance23 < $This->{MinDistance} || $Distance23 > $This->{MaxDistance}) { | |
427 next ATOMINDEX3; | |
428 } | |
429 if ($This->{UseTriangleInequality} && !$This->_DoDistancesSatisfyTriangleInequality($Distance12, $Distance13, $Distance23)) { | |
430 next ATOMINDEX3; | |
431 } | |
432 | |
433 $AtomTripletID = $This->_GetAtomTripletID($AtomType1, $Distance23, $AtomType2, $Distance13, $AtomType3, $Distance12); | |
434 if (!exists $This->{AtomTripletsCount}{$AtomTripletID}) { | |
435 $This->{AtomTripletsCount}{$AtomTripletID} = 0; | |
436 } | |
437 $This->{AtomTripletsCount}{$AtomTripletID} += 1; | |
438 } | |
439 } | |
440 } | |
441 return $This; | |
442 } | |
443 | |
444 # Check triangle inequality... | |
445 # | |
446 sub _DoDistancesSatisfyTriangleInequality { | |
447 my($This, $Distance1, $Distance2, $Distance3) = @_; | |
448 | |
449 if ( !($Distance1 > abs($Distance2 - $Distance3) && $Distance1 < ($Distance2 + $Distance3)) ) { | |
450 return 0; | |
451 } | |
452 if ( !($Distance2 > abs($Distance1 - $Distance3) && $Distance2 < ($Distance1 + $Distance3)) ) { | |
453 return 0; | |
454 } | |
455 if ( !($Distance3 > abs($Distance1 - $Distance2) && $Distance3 < ($Distance1 + $Distance2)) ) { | |
456 return 0; | |
457 } | |
458 return 1; | |
459 } | |
460 | |
461 # Get atom triplet ID corresponding to atom types and distances corresponding to atom triplet... | |
462 # | |
463 sub _GetAtomTripletID { | |
464 my($This, $ATx, $Dyz, $ATy, $Dxz, $ATz, $Dxy) = @_; | |
465 my($AtomTripletID, @AtomIDs); | |
466 | |
467 @AtomIDs = (); | |
468 | |
469 @AtomIDs = sort("${ATx}-D${Dyz}", "${ATy}-D${Dxz}", "${ATz}-D${Dxy}"); | |
470 $AtomTripletID = join "-", @AtomIDs; | |
471 | |
472 return $AtomTripletID; | |
473 } | |
474 | |
475 # Set final fingerpritns vector... | |
476 # | |
477 sub _SetFinalFingerprints { | |
478 my($This) = @_; | |
479 my($AtomTripletID, $Value, @Values); | |
480 | |
481 # Mark successful generation of fingerprints... | |
482 $This->{FingerprintsGenerated} = 1; | |
483 | |
484 @Values = (); | |
485 @{$This->{AtomTripletsIDs}} = (); | |
486 | |
487 for $AtomTripletID (sort keys %{$This->{AtomTripletsCount}}) { | |
488 push @{$This->{AtomTripletsIDs}}, $AtomTripletID; | |
489 $Value = $This->{AtomTripletsCount}{$AtomTripletID}; | |
490 push @Values, $Value; | |
491 } | |
492 | |
493 # Add AtomTripletsIDs and values to fingerprint vector... | |
494 $This->{FingerprintsVector}->AddValueIDs(\@{$This->{AtomTripletsIDs}}); | |
495 $This->{FingerprintsVector}->AddValues(\@Values); | |
496 | |
497 return $This; | |
498 } | |
499 | |
500 # Get atom triplet IDs corresponding to atom triplets count values in fingerprint | |
501 # vector as an array or reference to an array... | |
502 # | |
503 # AtomTripletIDs list differes in molecules and is generated during finalization | |
504 # of fingerprints to make sure the fingerprint vector containing count values | |
505 # matches the atom triplets array. | |
506 # | |
507 sub GetAtomTripletIDs { | |
508 my($This) = @_; | |
509 | |
510 return wantarray ? @{$This->{AtomTripletsIDs}} : \@{$This->{AtomTripletsIDs}}; | |
511 } | |
512 | |
513 # Cache appropriate molecule data... | |
514 # | |
515 sub _SetupMoleculeDataCache { | |
516 my($This) = @_; | |
517 | |
518 # Get all atoms including hydrogens to correctly map atom indices to atom IDs for | |
519 # usage of distance matrix. The hydrogen atoms are ignored during processing... | |
520 # | |
521 @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms(); | |
522 | |
523 # Get all atom IDs... | |
524 my(@AtomIDs); | |
525 @AtomIDs = (); | |
526 @AtomIDs = map { $_->GetID() } @{$This->{Atoms}}; | |
527 | |
528 # Set AtomIndex to AtomID hash... | |
529 %{$This->{AtomIndexToID}} = (); | |
530 @{$This->{AtomIndexToID}}{ (0 .. $#AtomIDs) } = @AtomIDs; | |
531 | |
532 return $This; | |
533 } | |
534 | |
535 # Set atomic invariants to use for atom identifiers... | |
536 # | |
537 sub SetAtomicInvariantsToUse { | |
538 my($This, @Values) = @_; | |
539 my($FirstValue, $TypeOfFirstValue, $AtomicInvariant, $SpecifiedAtomicInvariant, $AtomicInvariantValue, @SpecifiedAtomicInvariants, @AtomicInvariantsToUse); | |
540 | |
541 if (!@Values) { | |
542 carp "Warning: ${ClassName}->SetAtomicInvariantsToUse: No values specified..."; | |
543 return; | |
544 } | |
545 | |
546 $FirstValue = $Values[0]; | |
547 $TypeOfFirstValue = ref $FirstValue; | |
548 | |
549 @SpecifiedAtomicInvariants = (); | |
550 @AtomicInvariantsToUse = (); | |
551 | |
552 if ($TypeOfFirstValue =~ /^ARRAY/) { | |
553 push @SpecifiedAtomicInvariants, @{$FirstValue}; | |
554 } | |
555 else { | |
556 push @SpecifiedAtomicInvariants, @Values; | |
557 } | |
558 | |
559 # Make sure specified AtomicInvariants are valid... | |
560 for $SpecifiedAtomicInvariant (@SpecifiedAtomicInvariants) { | |
561 if (!AtomTypes::AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($SpecifiedAtomicInvariant)) { | |
562 croak "Error: ${ClassName}->SetAtomicInvariantsToUse: Specified atomic invariant, $SpecifiedAtomicInvariant, is not supported...\n "; | |
563 } | |
564 $AtomicInvariant = $SpecifiedAtomicInvariant; | |
565 push @AtomicInvariantsToUse, $AtomicInvariant; | |
566 } | |
567 | |
568 # Set atomic invariants to use... | |
569 @{$This->{AtomicInvariantsToUse}} = (); | |
570 push @{$This->{AtomicInvariantsToUse}}, @AtomicInvariantsToUse; | |
571 | |
572 return $This; | |
573 } | |
574 | |
575 # Set functional classes to use for atom identifiers... | |
576 # | |
577 sub SetFunctionalClassesToUse { | |
578 my($This, @Values) = @_; | |
579 my($FirstValue, $TypeOfFirstValue, $FunctionalClass, $SpecifiedFunctionalClass, @SpecifiedFunctionalClasses, @FunctionalClassesToUse); | |
580 | |
581 if (!@Values) { | |
582 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: No values specified..."; | |
583 return; | |
584 } | |
585 | |
586 if ($This->{AtomIdentifierType} !~ /^FunctionalClassAtomTypes$/i) { | |
587 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: FunctionalClassesToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}..."; | |
588 return; | |
589 } | |
590 | |
591 $FirstValue = $Values[0]; | |
592 $TypeOfFirstValue = ref $FirstValue; | |
593 | |
594 @SpecifiedFunctionalClasses = (); | |
595 @FunctionalClassesToUse = (); | |
596 | |
597 if ($TypeOfFirstValue =~ /^ARRAY/) { | |
598 push @SpecifiedFunctionalClasses, @{$FirstValue}; | |
599 } | |
600 else { | |
601 push @SpecifiedFunctionalClasses, @Values; | |
602 } | |
603 | |
604 # Make sure specified FunctionalClasses are valid... | |
605 for $SpecifiedFunctionalClass (@SpecifiedFunctionalClasses) { | |
606 if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedFunctionalClass)) { | |
607 croak "Error: ${ClassName}->SetFunctionalClassesToUse: Specified functional class, $SpecifiedFunctionalClass, is not supported...\n "; | |
608 } | |
609 push @FunctionalClassesToUse, $SpecifiedFunctionalClass; | |
610 } | |
611 | |
612 # Set functional classes to use... | |
613 @{$This->{FunctionalClassesToUse}} = (); | |
614 push @{$This->{FunctionalClassesToUse}}, @FunctionalClassesToUse; | |
615 | |
616 return $This; | |
617 } | |
618 | |
619 # Initialize atom indentifier type information... | |
620 # | |
621 # Current supported values: | |
622 # | |
623 # AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, | |
624 # MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes | |
625 # | |
626 sub _InitializeAtomIdentifierTypeInformation { | |
627 my($This) = @_; | |
628 | |
629 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { | |
630 $This->_InitializeAtomicInvariantsAtomTypesInformation(); | |
631 } | |
632 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { | |
633 $This->_InitializeFunctionalClassAtomTypesInformation(); | |
634 } | |
635 elsif ($This->{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { | |
636 # Nothing to do for now... | |
637 } | |
638 else { | |
639 croak "Error: ${ClassName}->_InitializeAtomIdentifierTypeInformation: Unknown atom indentifier type $This->{AtomIdentifierType}..."; | |
640 } | |
641 | |
642 return $This; | |
643 } | |
644 | |
645 # Initialize atomic invariants atom types to use for generating atom IDs in atom triplets... | |
646 # | |
647 # Let: | |
648 # AS = Atom symbol corresponding to element symbol | |
649 # | |
650 # X<n> = Number of non-hydrogen atom neighbors or heavy atoms attached to atom | |
651 # BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms attached to atom | |
652 # LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms attached to atom | |
653 # SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms attached to atom | |
654 # DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms attached to atom | |
655 # TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms attached to atom | |
656 # H<n> = Number of implicit and explicit hydrogens for atom | |
657 # Ar = Aromatic annotation indicating whether atom is aromatic | |
658 # RA = Ring atom annotation indicating whether atom is a ring | |
659 # FC<+n/-n> = Formal charge assigned to atom | |
660 # MN<n> = Mass number indicating isotope other than most abundant isotope | |
661 # SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or 3 (triplet) | |
662 # | |
663 # ATx = Atomic invariants atom type for atom x | |
664 # ATy = Atomic invariants atom type for atom y | |
665 # ATz = Atomic invariants atom type for atom z | |
666 # | |
667 # Dxy = Distance between Px and Py | |
668 # Dxz = Distance between Px and Pz | |
669 # Dyz = Distance between Py and Pz | |
670 # | |
671 # Then: | |
672 # | |
673 # Atom triplet AtomID generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to: | |
674 # | |
675 # AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n> | |
676 # | |
677 # Toplogical atom triplet ID between atom IDs ATx, ATy and ATz corresponds to: | |
678 # | |
679 # ATx-Dyz-ATy-Dxz-ATz-Dxy | |
680 # | |
681 # Except for AS which is a required atomic invariant in atom triplet AtomIDs, all other atomic invariants are | |
682 # optional. Default atomic invariants used for AtomID are: AS, X<n>, BO<n>, H<n>, FC<+n/-n>. | |
683 # AtomID specification doesn't include atomic invariants with zero or undefined values. | |
684 # | |
685 # Examples of atom triplet AtomIDs: | |
686 # | |
687 # O.X1.BO1.H1 - Hydroxyl oxygen in carboxylate with attached hydrogen and no explicit charge | |
688 # O.X1.BO1.FC-1 - Hydroxyl ozygen in carboxylate with explicit negative charge | |
689 # O.X1.BO2 - Carbonyl oxygen in carboxylate with double bond to carbon | |
690 # O.X2.BO2 - Hydroxyl ozygen in carboxylate attached to carbonyl carbon and another heavy atom | |
691 # | |
692 # C.X2.BO3.H1.Ar - Aromatic carbon | |
693 # | |
694 sub _InitializeAtomicInvariantsAtomTypesInformation { | |
695 my($This) = @_; | |
696 | |
697 # Default atomic invariants to use for generating atom triplet atom IDs: AS, X, BO, H, FC | |
698 # | |
699 @{$This->{AtomicInvariantsToUse}} = (); | |
700 @{$This->{AtomicInvariantsToUse}} = ('AS', 'X', 'BO', 'H', 'FC'); | |
701 | |
702 return $This; | |
703 } | |
704 | |
705 # Initialize functional class atom types, generated by AtomTypes::FunctionalClassAtomTypes | |
706 # class, to use for generating atom identifiers... | |
707 # | |
708 # Let: | |
709 # HBD: HydrogenBondDonor | |
710 # HBA: HydrogenBondAcceptor | |
711 # PI : PositivelyIonizable | |
712 # NI : NegativelyIonizable | |
713 # Ar : Aromatic | |
714 # Hal : Halogen | |
715 # H : Hydrophobic | |
716 # RA : RingAtom | |
717 # CA : ChainAtom | |
718 # | |
719 # Then: | |
720 # | |
721 # Functiononal class atom type specification for an atom corresponds to: | |
722 # | |
723 # Ar.CA.H.HBA.HBD.Hal.NI.PI.RA | |
724 # | |
725 # Default functional classes used are: HBD, HBA, PI, NI, Ar, Hal | |
726 # | |
727 # FunctionalAtomTypes are assigned using the following definitions [ Ref 60-61, Ref 65-66 ]: | |
728 # | |
729 # HydrogenBondDonor: NH, NH2, OH | |
730 # HydrogenBondAcceptor: N[!H], O | |
731 # PositivelyIonizable: +, NH2 | |
732 # NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH | |
733 # | |
734 sub _InitializeFunctionalClassAtomTypesInformation { | |
735 my($This) = @_; | |
736 | |
737 # Default functional class atom typess to use for generating atom identifiers | |
738 # are: HBD, HBA, PI, NI, Ar, Hal | |
739 # | |
740 @{$This->{FunctionalClassesToUse}} = (); | |
741 @{$This->{FunctionalClassesToUse}} = ('HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal'); | |
742 | |
743 return $This; | |
744 } | |
745 | |
746 # Clear cached molecule data... | |
747 # | |
748 sub _ClearMoleculeDataCache { | |
749 my($This) = @_; | |
750 | |
751 @{$This->{Atoms}} = (); | |
752 | |
753 return $This; | |
754 } | |
755 | |
756 # Return a string containg data for TopologicalAtomTripletsFingerprints object... | |
757 # | |
758 sub StringifyTopologicalAtomTripletsFingerprints { | |
759 my($This) = @_; | |
760 my($FingerprintsString); | |
761 | |
762 # Type of fingerprint... | |
763 $FingerprintsString = "Fingerprint type: $This->{Type}; AtomIdentifierType: $This->{AtomIdentifierType}"; | |
764 | |
765 # Min and max distance... | |
766 $FingerprintsString .= "; MinDistance: $This->{MinDistance}; MaxDistance: $This->{MaxDistance}; UseTriangleInequality: " . ($This->{UseTriangleInequality} ? "Yes" : "No"); | |
767 | |
768 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { | |
769 my($AtomicInvariant, @AtomicInvariants, @AtomicInvariantsOrder, %AvailableAtomicInvariants); | |
770 | |
771 @AtomicInvariantsOrder = AtomTypes::AtomicInvariantsAtomTypes::GetAtomicInvariantsOrder(); | |
772 %AvailableAtomicInvariants = AtomTypes::AtomicInvariantsAtomTypes::GetAvailableAtomicInvariants(); | |
773 | |
774 for $AtomicInvariant (@AtomicInvariantsOrder) { | |
775 push @AtomicInvariants, "$AtomicInvariant: $AvailableAtomicInvariants{$AtomicInvariant}"; | |
776 } | |
777 | |
778 $FingerprintsString .= "; AtomicInvariantsToUse: <" . TextUtil::JoinWords(\@{$This->{AtomicInvariantsToUse}}, ", ", 0) . ">"; | |
779 $FingerprintsString .= "; AtomicInvariantsOrder: <" . TextUtil::JoinWords(\@AtomicInvariantsOrder, ", ", 0) . ">"; | |
780 $FingerprintsString .= "; AvailableAtomicInvariants: <" . TextUtil::JoinWords(\@AtomicInvariants, ", ", 0) . ">"; | |
781 } | |
782 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { | |
783 my($FunctionalClass, @FunctionalClasses, @FunctionalClassesOrder, %AvailableFunctionalClasses); | |
784 | |
785 @FunctionalClassesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder(); | |
786 %AvailableFunctionalClasses = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses(); | |
787 | |
788 for $FunctionalClass (@FunctionalClassesOrder) { | |
789 push @FunctionalClasses, "$FunctionalClass: $AvailableFunctionalClasses{$FunctionalClass}"; | |
790 } | |
791 | |
792 $FingerprintsString .= "; FunctionalClassesToUse: <" . TextUtil::JoinWords(\@{$This->{FunctionalClassesToUse}}, ", ", 0) . ">"; | |
793 $FingerprintsString .= "; FunctionalClassesOrder: <" . TextUtil::JoinWords(\@FunctionalClassesOrder, ", ", 0) . ">"; | |
794 $FingerprintsString .= "; AvailableFunctionalClasses: <" . TextUtil::JoinWords(\@FunctionalClasses, ", ", 0) . ">"; | |
795 } | |
796 | |
797 # Total number of atom triplets... | |
798 $FingerprintsString .= "; NumOfAtomTriplets: " . $This->{FingerprintsVector}->GetNumOfValues(); | |
799 | |
800 # FingerprintsVector... | |
801 $FingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >"; | |
802 | |
803 return $FingerprintsString; | |
804 } | |
805 | |
806 1; | |
807 | |
808 __END__ | |
809 | |
810 =head1 NAME | |
811 | |
812 TopologicalAtomTripletsFingerprints | |
813 | |
814 =head1 SYNOPSIS | |
815 | |
816 use Fingerprints::TopologicalAtomTripletsFingerprints; | |
817 | |
818 use Fingerprints::TopologicalAtomTripletsFingerprints qw(:all); | |
819 | |
820 =head1 DESCRIPTION | |
821 | |
822 B<TopologicalAtomTripletsFingerprints> [ Ref 57, Ref 59, Ref 72 ] class provides the following methods: | |
823 | |
824 new, GenerateFingerprints, GetAtomTripletIDs, GetDescription, | |
825 SetAtomIdentifierType, SetAtomicInvariantsToUse, SetFunctionalClassesToUse, | |
826 SetMaxDistance, SetMinDistance, StringifyTopologicalAtomTripletsFingerprints | |
827 | |
828 B<TopologicalAtomTripletsFingerprints> is derived from B<Fingerprints> class which in turn | |
829 is derived from B<ObjectProperty> base class that provides methods not explicitly defined | |
830 in B<TopologicalAtomTripletsFingerprints>, B<Fingerprints> or B<ObjectProperty> classes using Perl's | |
831 AUTOLOAD functionality. These methods are generated on-the-fly for a specified object property: | |
832 | |
833 Set<PropertyName>(<PropertyValue>); | |
834 $PropertyValue = Get<PropertyName>(); | |
835 Delete<PropertyName>(); | |
836 | |
837 The current release of MayaChemTools supports generation of B<TopologicalAtomTripletsFingerprints> | |
838 corresponding to following B<AtomtomIdentifierTypes>: | |
839 | |
840 AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, | |
841 FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, | |
842 SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes | |
843 | |
844 Based on the values specified for B<AtomIdentifierType> along with other specified | |
845 parameters such as B<AtomicInvariantsToUse> and B<FunctionalClassesToUse>, initial | |
846 atom types are assigned to all non-hydrogen atoms in a molecule. Using the distance | |
847 matrix for the molecule and initial atom types assigned to non-hydrogen atoms, all unique atom | |
848 triplets within B<MinDistance> and B<MaxDistance> are identified and counted. An atom triplet | |
849 identifier is generated for each unique atom triplet; the format of atom triplet identifier is: | |
850 | |
851 <ATx>-Dyz-<ATy>-Dxz-<ATz>-Dxy | |
852 | |
853 ATx, ATy, ATz: Atom types assigned to atom x, atom y, and atom z | |
854 Dxy: Distance between atom x and atom y | |
855 Dxz: Distance between atom x and atom z | |
856 Dyz: Distance between atom y and atom z | |
857 | |
858 where <AT1>-D23 <= <AT2>-D13 <= <AT3>-D12 | |
859 | |
860 The atom triplet identifiers for all unique atom triplets corresponding to non-hydrogen atoms constitute | |
861 topological atom triplets fingerprints of the molecule. | |
862 | |
863 The current release of MayaChemTools generates the following types of topological atom triplets | |
864 fingerprints vector strings: | |
865 | |
866 FingerprintsVector;TopologicalAtomTriplets:AtomicInvariantsAtomTypes:M | |
867 inDistance1:MaxDistance10;3096;NumericalValues;IDsAndValuesString;C.X1 | |
868 .BO1.H3-D1-C.X1.BO1.H3-D1-C.X3.BO3.H1-D2 C.X1.BO1.H3-D1-C.X2.BO2.H2-D1 | |
869 0-C.X3.BO4-D9 C.X1.BO1.H3-D1-C.X2.BO2.H2-D3-N.X3.BO3-D4 C.X1.BO1.H3-D1 | |
870 -C.X2.BO2.H2-D4-C.X2.BO2.H2-D5 C.X1.BO1.H3-D1-C.X2.BO2.H2-D6-C.X3....; | |
871 1 2 2 2 2 2 2 2 8 8 4 8 4 4 2 2 2 2 4 2 2 2 4 2 2 2 2 1 2 2 4 4 4 2 2 | |
872 2 4 4 4 8 4 4 2 4 4 4 2 4 4 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 8... | |
873 | |
874 FingerprintsVector;TopologicalAtomTriplets:AtomicInvariantsAtomTypes:M | |
875 inDistance1:MaxDistance10;3096;NumericalValues;IDsAndValuesPairsString | |
876 ;C.X1.BO1.H3-D1-C.X1.BO1.H3-D1-C.X3.BO3.H1-D2 1 C.X1.BO1.H3-D1-C.X2.BO | |
877 2.H2-D10-C.X3.BO4-D9 2 C.X1.BO1.H3-D1-C.X2.BO2.H2-D3-N.X3.BO3-D4 2 C.X | |
878 1.BO1.H3-D1-C.X2.BO2.H2-D4-C.X2.BO2.H2-D5 2 C.X1.BO1.H3-D1-C.X2.BO2.H2 | |
879 -D6-C.X3.BO3.H1-D5 2 C.X1.BO1.H3-D1-C.X2.BO2.H2-D6-C.X3.BO3.H1-D7 2... | |
880 | |
881 FingerprintsVector;TopologicalAtomTriplets:DREIDINGAtomTypes:MinDistan | |
882 ce1:MaxDistance10;2377;NumericalValues;IDsAndValuesString;C_2-D1-C_2-D | |
883 9-C_3-D10 C_2-D1-C_2-D9-C_R-D10 C_2-D1-C_3-D1-C_3-D2 C_2-D1-C_3-D10-C_ | |
884 3-D9 C_2-D1-C_3-D2-C_3-D3 C_2-D1-C_3-D2-C_R-D3 C_2-D1-C_3-D3-C_3-D4 C_ | |
885 2-D1-C_3-D3-N_R-D4 C_2-D1-C_3-D3-O_3-D2 C_2-D1-C_3-D4-C_3-D5 C_2-D...; | |
886 1 1 1 2 1 1 3 1 1 2 2 1 1 1 1 1 1 1 1 2 1 3 4 5 1 1 6 4 2 2 3 1 1 1 2 | |
887 2 1 2 1 1 2 2 2 1 2 1 2 1 1 3 3 2 6 4 2 1 1 1 2 2 1 1 1 1 1 1 1 1 1... | |
888 | |
889 FingerprintsVector;TopologicalAtomTriplets:EStateAtomTypes:MinDistance | |
890 1:MaxDistance10;3298;NumericalValues;IDsAndValuesString;aaCH-D1-aaCH-D | |
891 1-aaCH-D2 aaCH-D1-aaCH-D1-aasC-D2 aaCH-D1-aaCH-D10-aaCH-D9 aaCH-D1-aaC | |
892 H-D10-aasC-D9 aaCH-D1-aaCH-D2-aaCH-D3 aaCH-D1-aaCH-D2-aasC-D1 aaCH-D1- | |
893 aaCH-D2-aasC-D3 aaCH-D1-aaCH-D3-aasC-D2 aaCH-D1-aaCH-D4-aasC-D5 aa...; | |
894 6 4 24 4 16 8 8 4 8 8 8 12 10 14 4 16 24 4 12 2 2 4 1 10 2 2 15 2 2 2 | |
895 2 2 2 14 4 2 2 2 2 1 2 10 2 2 4 1 2 4 8 3 3 3 4 6 4 2 2 3 3 1 1 1 2 1 | |
896 2 2 4 2 3 2 1 2 4 5 3 2 2 1 2 4 3 2 8 12 6 2 2 4 4 7 1 4 2 4 2 2 2 ... | |
897 | |
898 FingerprintsVector;TopologicalAtomTriplets:FunctionalClassAtomTypes:Mi | |
899 nDistance1:MaxDistance10;2182;NumericalValues;IDsAndValuesString;Ar-D1 | |
900 -Ar-D1-Ar-D2 Ar-D1-Ar-D1-Ar.HBA-D2 Ar-D1-Ar-D10-Ar-D9 Ar-D1-Ar-D10-Hal | |
901 -D9 Ar-D1-Ar-D2-Ar-D2 Ar-D1-Ar-D2-Ar-D3 Ar-D1-Ar-D2-Ar.HBA-D1 Ar-D1-Ar | |
902 -D2-Ar.HBA-D2 Ar-D1-Ar-D2-Ar.HBA-D3 Ar-D1-Ar-D2-HBD-D1 Ar-D1-Ar-D2...; | |
903 27 1 32 2 2 63 3 2 1 2 1 2 3 1 1 40 3 1 2 2 2 2 4 2 2 47 4 2 2 1 2 1 5 | |
904 2 2 51 4 3 1 3 1 9 1 1 50 3 3 4 1 9 50 2 2 3 3 5 45 1 1 1 2 1 2 2 3 3 | |
905 4 4 3 2 1 1 3 4 5 5 3 1 2 3 2 3 5 7 2 7 3 7 1 1 2 2 2 2 3 1 4 3 1 2... | |
906 | |
907 FingerprintsVector;TopologicalAtomTriplets:MMFF94AtomTypes:MinDistance | |
908 1:MaxDistance10;2966;NumericalValues;IDsAndValuesString;C5A-D1-C5A-D1- | |
909 N5-D2 C5A-D1-C5A-D2-C5B-D2 C5A-D1-C5A-D3-CB-D2 C5A-D1-C5A-D3-CR-D2 C5A | |
910 -D1-C5B-D1-C5B-D2 C5A-D1-C5B-D2-C=ON-D1 C5A-D1-C5B-D2-CB-D1 C5A-D1-C5B | |
911 -D3-C=ON-D2 C5A-D1-C5B-D3-CB-D2 C5A-D1-C=ON-D3-NC=O-D2 C5A-D1-C=ON-D3- | |
912 O=CN-D2 C5A-D1-C=ON-D4-NC=O-D3 C5A-D1-C=ON-D4-O=CN-D3 C5A-D1-CB-D1-... | |
913 | |
914 FingerprintsVector;TopologicalAtomTriplets:SLogPAtomTypes:MinDistance1 | |
915 :MaxDistance10;3710;NumericalValues;IDsAndValuesString;C1-D1-C1-D1-C11 | |
916 -D2 C1-D1-C1-D1-CS-D2 C1-D1-C1-D10-C5-D9 C1-D1-C1-D3-C10-D2 C1-D1-C1-D | |
917 3-C5-D2 C1-D1-C1-D3-CS-D2 C1-D1-C1-D3-CS-D4 C1-D1-C1-D4-C10-D5 C1-D1-C | |
918 1-D4-C11-D5 C1-D1-C1-D5-C10-D4 C1-D1-C1-D5-C5-D4 C1-D1-C1-D6-C11-D7 C1 | |
919 -D1-C1-D6-CS-D5 C1-D1-C1-D6-CS-D7 C1-D1-C1-D8-C11-D9 C1-D1-C1-D8-CS... | |
920 | |
921 FingerprintsVector;TopologicalAtomTriplets:SYBYLAtomTypes:MinDistance1 | |
922 :MaxDistance10;2332;NumericalValues;IDsAndValuesString;C.2-D1-C.2-D9-C | |
923 .3-D10 C.2-D1-C.2-D9-C.ar-D10 C.2-D1-C.3-D1-C.3-D2 C.2-D1-C.3-D10-C.3- | |
924 D9 C.2-D1-C.3-D2-C.3-D3 C.2-D1-C.3-D2-C.ar-D3 C.2-D1-C.3-D3-C.3-D4 C.2 | |
925 -D1-C.3-D3-N.ar-D4 C.2-D1-C.3-D3-O.3-D2 C.2-D1-C.3-D4-C.3-D5 C.2-D1-C. | |
926 3-D5-C.3-D6 C.2-D1-C.3-D5-O.3-D4 C.2-D1-C.3-D6-C.3-D7 C.2-D1-C.3-D7... | |
927 | |
928 FingerprintsVector;TopologicalAtomTriplets:TPSAAtomTypes:MinDistance1: | |
929 MaxDistance10;1007;NumericalValues;IDsAndValuesString;N21-D1-N7-D3-Non | |
930 e-D4 N21-D1-N7-D5-None-D4 N21-D1-None-D1-None-D2 N21-D1-None-D2-None-D | |
931 2 N21-D1-None-D2-None-D3 N21-D1-None-D3-None-D4 N21-D1-None-D4-None-D5 | |
932 N21-D1-None-D4-O3-D3 N21-D1-None-D4-O4-D3 N21-D1-None-D5-None-D6 N21- | |
933 D1-None-D6-None-D7 N21-D1-None-D6-O4-D5 N21-D1-None-D7-None-D8 N21-... | |
934 | |
935 FingerprintsVector;TopologicalAtomTriplets:UFFAtomTypes:MinDistance1:M | |
936 axDistance10;2377;NumericalValues;IDsAndValuesString;C_2-D1-C_2-D9-C_3 | |
937 -D10 C_2-D1-C_2-D9-C_R-D10 C_2-D1-C_3-D1-C_3-D2 C_2-D1-C_3-D10-C_3-D9 | |
938 C_2-D1-C_3-D2-C_3-D3 C_2-D1-C_3-D2-C_R-D3 C_2-D1-C_3-D3-C_3-D4 C_2-D1- | |
939 C_3-D3-N_R-D4 C_2-D1-C_3-D3-O_3-D2 C_2-D1-C_3-D4-C_3-D5 C_2-D1-C_3-D5- | |
940 C_3-D6 C_2-D1-C_3-D5-O_3-D4 C_2-D1-C_3-D6-C_3-D7 C_2-D1-C_3-D7-C_3-... | |
941 | |
942 =head2 METHODS | |
943 | |
944 =over 4 | |
945 | |
946 =item B<new> | |
947 | |
948 $NewTopologicalAtomTripletsFingerprints = new TopologicalAtomTripletsFingerprints( | |
949 %NamesAndValues); | |
950 | |
951 Using specified I<TopologicalAtomTripletsFingerprints> property names and values hash, B<new> | |
952 method creates a new object and returns a reference to newly created B<TopologicalAtomTripletsFingerprints> | |
953 object. By default, the following properties are initialized: | |
954 | |
955 Molecule = '' | |
956 Type = 'TopologicalAtomTriplets' | |
957 MinDistance = 1 | |
958 MaxDistance = 10 | |
959 UseTriangleInequality = 1 | |
960 AtomIdentifierType = '' | |
961 AtomicInvariantsToUse = ['AS', 'X', 'BO', 'H', 'FC'] | |
962 FunctionalClassesToUse = ['HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal'] | |
963 | |
964 Examples: | |
965 | |
966 $TopologicalAtomTripletsFingerprints = new TopologicalAtomTripletsFingerprints( | |
967 'Molecule' => $Molecule, | |
968 'AtomIdentifierType' => | |
969 'AtomicInvariantsAtomTypes'); | |
970 | |
971 $TopologicalAtomTripletsFingerprints = new TopologicalAtomTripletsFingerprints( | |
972 'Molecule' => $Molecule, | |
973 'MinDistance' => 1, | |
974 'MaxDistance' => 10, | |
975 'AtomIdentifierType' => | |
976 'AtomicInvariantsAtomTypes', | |
977 'AtomicInvariantsToUse' => | |
978 ['AS', 'X', 'BO', 'H', 'FC'] ); | |
979 | |
980 $TopologicalAtomTripletsFingerprints = new TopologicalAtomTripletsFingerprints( | |
981 'Molecule' => $Molecule, | |
982 'AtomIdentifierType' => | |
983 'DREIDINGAtomTypes'); | |
984 | |
985 $TopologicalAtomTripletsFingerprints = new TopologicalAtomTripletsFingerprints( | |
986 'Molecule' => $Molecule, | |
987 'AtomIdentifierType' => | |
988 'MMFF94AtomTypes'); | |
989 | |
990 $TopologicalAtomTripletsFingerprints = new TopologicalAtomTripletsFingerprints( | |
991 'Molecule' => $Molecule, | |
992 'AtomIdentifierType' => | |
993 'TPSAAtomTypes'); | |
994 | |
995 $TopologicalAtomTripletsFingerprints = new TopologicalAtomTripletsFingerprints( | |
996 'Molecule' => $Molecule, | |
997 'MinDistance' => 1, | |
998 'MaxDistance' => 10, | |
999 'AtomIdentifierType' => | |
1000 'FunctionalClassAtomTypes', | |
1001 'FunctionalClassesToUse' => | |
1002 ['HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal']); | |
1003 | |
1004 $TopologicalAtomTripletsFingerprints->GenerateFingerprints(); | |
1005 print "$TopologicalAtomTripletsFingerprints\n"; | |
1006 | |
1007 =item B<GetDescription> | |
1008 | |
1009 $Return = $TopologicalAtomTripletsFingerprints->GetDescription(); | |
1010 | |
1011 Returns a string containing description of topological atom triplets fingerprints. | |
1012 | |
1013 =item B<GenerateFingerprints> | |
1014 | |
1015 $TopologicalAtomTripletsFingerprints->GenerateFingerprints(); | |
1016 | |
1017 Generates topological atom triplets fingerprints and returns I<TopologicalAtomTripletsFingerprints>. | |
1018 | |
1019 =item B<GetAtomTripletIDs> | |
1020 | |
1021 $AtomTripletIDsRef = $TopologicalAtomTripletsFingerprints->GetAtomTripletIDs(); | |
1022 @AtomTripletIDs = $TopologicalAtomTripletsFingerprints->GetAtomTripletIDs(); | |
1023 | |
1024 Returns atom triplet IDs corresponding to atom triplets count values in topological atom triplets | |
1025 fingerprints vector as an array or reference to an array. | |
1026 | |
1027 =item B<SetAtomIdentifierType> | |
1028 | |
1029 $TopologicalAtomTripletsFingerprints->SetAtomIdentifierType($IdentifierType); | |
1030 | |
1031 Sets atom I<IdentifierType> to use during atom triplets fingerprints generation and | |
1032 returns I<TopologicalAtomTripletsFingerprints>. | |
1033 | |
1034 Possible values: I<AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, | |
1035 FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, | |
1036 TPSAAtomTypes, UFFAtomTypes>. | |
1037 | |
1038 =item B<SetAtomicInvariantsToUse> | |
1039 | |
1040 $TopologicalAtomTripletsFingerprints->SetAtomicInvariantsToUse($ValuesRef); | |
1041 $TopologicalAtomTripletsFingerprints->SetAtomicInvariantsToUse(@Values); | |
1042 | |
1043 Sets atomic invariants to use during I<AtomicInvariantsAtomTypes> value of I<AtomIdentifierType> | |
1044 for topological atom triplets fingerprints generation and returns I<TopologicalAtomTripletsFingerprints>. | |
1045 | |
1046 Possible values for atomic invariants are: I<AS, X, BO, LBO, SB, DB, TB, | |
1047 H, Ar, RA, FC, MN, SM>. Default value: I<AS,X,BO,H,FC>. | |
1048 | |
1049 The atomic invariants abbreviations correspond to: | |
1050 | |
1051 AS = Atom symbol corresponding to element symbol | |
1052 | |
1053 X<n> = Number of non-hydrogen atom neighbors or heavy atoms | |
1054 BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms | |
1055 LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms | |
1056 SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms | |
1057 DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms | |
1058 TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms | |
1059 H<n> = Number of implicit and explicit hydrogens for atom | |
1060 Ar = Aromatic annotation indicating whether atom is aromatic | |
1061 RA = Ring atom annotation indicating whether atom is a ring | |
1062 FC<+n/-n> = Formal charge assigned to atom | |
1063 MN<n> = Mass number indicating isotope other than most abundant isotope | |
1064 SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or | |
1065 3 (triplet) | |
1066 | |
1067 Atom type generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to: | |
1068 | |
1069 AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n> | |
1070 | |
1071 Except for AS which is a required atomic invariant in atom types, all other atomic invariants are | |
1072 optional. Atom type specification doesn't include atomic invariants with zero or undefined values. | |
1073 | |
1074 In addition to usage of abbreviations for specifying atomic invariants, the following descriptive words | |
1075 are also allowed: | |
1076 | |
1077 X : NumOfNonHydrogenAtomNeighbors or NumOfHeavyAtomNeighbors | |
1078 BO : SumOfBondOrdersToNonHydrogenAtoms or SumOfBondOrdersToHeavyAtoms | |
1079 LBO : LargestBondOrderToNonHydrogenAtoms or LargestBondOrderToHeavyAtoms | |
1080 SB : NumOfSingleBondsToNonHydrogenAtoms or NumOfSingleBondsToHeavyAtoms | |
1081 DB : NumOfDoubleBondsToNonHydrogenAtoms or NumOfDoubleBondsToHeavyAtoms | |
1082 TB : NumOfTripleBondsToNonHydrogenAtoms or NumOfTripleBondsToHeavyAtoms | |
1083 H : NumOfImplicitAndExplicitHydrogens | |
1084 Ar : Aromatic | |
1085 RA : RingAtom | |
1086 FC : FormalCharge | |
1087 MN : MassNumber | |
1088 SM : SpinMultiplicity | |
1089 | |
1090 I<AtomTypes::AtomicInvariantsAtomTypes> module is used to assign atomic invariant | |
1091 atom types. | |
1092 | |
1093 =item B<SetFunctionalClassesToUse> | |
1094 | |
1095 $TopologicalTripletsFingerprints->SetFunctionalClassesToUse($ValuesRef); | |
1096 $TopologicalTripletsFingerprints->SetFunctionalClassesToUse(@Values); | |
1097 | |
1098 Sets functional classes invariants to use during I<FunctionalClassAtomTypes> value of I<AtomIdentifierType> | |
1099 for topological atom triplets fingerprints generation and returns I<TopologicalAtomTripletsFingerprints>. | |
1100 | |
1101 Possible values for atom functional classes are: I<Ar, CA, H, HBA, HBD, Hal, NI, PI, RA>. | |
1102 Default value [ Ref 24 ]: I<HBD,HBA,PI,NI,Ar,Hal>. | |
1103 | |
1104 The functional class abbreviations correspond to: | |
1105 | |
1106 HBD: HydrogenBondDonor | |
1107 HBA: HydrogenBondAcceptor | |
1108 PI : PositivelyIonizable | |
1109 NI : NegativelyIonizable | |
1110 Ar : Aromatic | |
1111 Hal : Halogen | |
1112 H : Hydrophobic | |
1113 RA : RingAtom | |
1114 CA : ChainAtom | |
1115 | |
1116 Functional class atom type specification for an atom corresponds to: | |
1117 | |
1118 Ar.CA.H.HBA.HBD.Hal.NI.PI.RA or None | |
1119 | |
1120 I<AtomTypes::FunctionalClassAtomTypes> module is used to assign functional class atom | |
1121 types. It uses following definitions [ Ref 60-61, Ref 65-66 ]: | |
1122 | |
1123 HydrogenBondDonor: NH, NH2, OH | |
1124 HydrogenBondAcceptor: N[!H], O | |
1125 PositivelyIonizable: +, NH2 | |
1126 NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH | |
1127 | |
1128 =item B<SetMaxDistance> | |
1129 | |
1130 $TopologicalAtomTripletsFingerprints->SetMaxDistance($Distance); | |
1131 | |
1132 Sets maximum distance to use during topological atom triplets fingerprints generation and | |
1133 returns I<TopologicalAtomTripletsFingerprints>. | |
1134 | |
1135 =item B<SetMinDistance> | |
1136 | |
1137 $TopologicalAtomTripletsFingerprints->SetMinDistance($Distance); | |
1138 | |
1139 Sets minimum distance to use during topological atom triplets fingerprints generation and | |
1140 returns I<TopologicalAtomTripletsFingerprints>. | |
1141 | |
1142 =item B<StringifyTopologicalAtomTripletsFingerprints> | |
1143 | |
1144 $String = $TopologicalAtomTripletsFingerprints-> | |
1145 StringifyTopologicalAtomTripletsFingerprints(); | |
1146 | |
1147 Returns a string containing information about I<TopologicalAtomTripletsFingerprints> object. | |
1148 | |
1149 =back | |
1150 | |
1151 =head1 AUTHOR | |
1152 | |
1153 Manish Sud <msud@san.rr.com> | |
1154 | |
1155 =head1 SEE ALSO | |
1156 | |
1157 Fingerprints.pm, FingerprintsStringUtil.pm, AtomNeighborhoodsFingerprints.pm, | |
1158 AtomTypesFingerprints.pm, EStateIndiciesFingerprints.pm, ExtendedConnectivityFingerprints.pm, | |
1159 MACCSKeys.pm, PathLengthFingerprints.pm, TopologicalAtomPairsFingerprints.pm, | |
1160 TopologicalAtomTorsionsFingerprints.pm, TopologicalPharmacophoreAtomPairsFingerprints.pm, | |
1161 TopologicalPharmacophoreAtomTripletsFingerprints.pm | |
1162 | |
1163 =head1 COPYRIGHT | |
1164 | |
1165 Copyright (C) 2015 Manish Sud. All rights reserved. | |
1166 | |
1167 This file is part of MayaChemTools. | |
1168 | |
1169 MayaChemTools is free software; you can redistribute it and/or modify it under | |
1170 the terms of the GNU Lesser General Public License as published by the Free | |
1171 Software Foundation; either version 3 of the License, or (at your option) | |
1172 any later version. | |
1173 | |
1174 =cut |