comparison lib/Fingerprints/TopologicalAtomTripletsFingerprints.pm @ 0:4816e4a8ae95 draft default tip

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 09:23:18 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4816e4a8ae95
1 package Fingerprints::TopologicalAtomTripletsFingerprints;
2 #
3 # $RCSfile: TopologicalAtomTripletsFingerprints.pm,v $
4 # $Date: 2015/02/28 20:48:54 $
5 # $Revision: 1.15 $
6 #
7 # Author: Manish Sud <msud@san.rr.com>
8 #
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
10 #
11 # This file is part of MayaChemTools.
12 #
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
14 # the terms of the GNU Lesser General Public License as published by the Free
15 # Software Foundation; either version 3 of the License, or (at your option) any
16 # later version.
17 #
18 # MayaChemTools is distributed in the hope that it will be useful, but without
19 # any warranty; without even the implied warranty of merchantability of fitness
20 # for a particular purpose. See the GNU Lesser General Public License for more
21 # details.
22 #
23 # You should have received a copy of the GNU Lesser General Public License
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
26 # Boston, MA, 02111-1307, USA.
27 #
28
29 use strict;
30 use Carp;
31 use Exporter;
32 use Fingerprints::Fingerprints;
33 use TextUtil ();
34 use Molecule;
35 use AtomTypes::AtomicInvariantsAtomTypes;
36 use AtomTypes::DREIDINGAtomTypes;
37 use AtomTypes::EStateAtomTypes;
38 use AtomTypes::FunctionalClassAtomTypes;
39 use AtomTypes::MMFF94AtomTypes;
40 use AtomTypes::SLogPAtomTypes;
41 use AtomTypes::SYBYLAtomTypes;
42 use AtomTypes::TPSAAtomTypes;
43 use AtomTypes::UFFAtomTypes;
44
45 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
46
47 @ISA = qw(Fingerprints::Fingerprints Exporter);
48 @EXPORT = qw();
49 @EXPORT_OK = qw();
50
51 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]);
52
53 # Setup class variables...
54 my($ClassName);
55 _InitializeClass();
56
57 # Overload Perl functions...
58 use overload '""' => 'StringifyTopologicalAtomTripletsFingerprints';
59
60 # Class constructor...
61 sub new {
62 my($Class, %NamesAndValues) = @_;
63
64 # Initialize object...
65 my $This = $Class->SUPER::new();
66 bless $This, ref($Class) || $Class;
67 $This->_InitializeTopologicalAtomTripletsFingerprints();
68
69 $This->_InitializeTopologicalAtomTripletsFingerprintsProperties(%NamesAndValues);
70
71 return $This;
72 }
73
74 # Initialize object data...
75 #
76 sub _InitializeTopologicalAtomTripletsFingerprints {
77 my($This) = @_;
78
79 # Type of fingerprint...
80 $This->{Type} = 'TopologicalAtomTriplets';
81
82 # Type of vector...
83 $This->{VectorType} = 'FingerprintsVector';
84
85 # Type of FingerprintsVector...
86 $This->{FingerprintsVectorType} = 'NumericalValues';
87
88 # Minimum and maximum bond distance between atom paris...
89 $This->{MinDistance} = 1;
90 $This->{MaxDistance} = 10;
91
92 # Determines whether to apply triangle inequality to distance triplets...
93 #
94 $This->{UseTriangleInequality} = 0;
95
96 # Atom identifier type to use for atom IDs in atom triplets...
97 #
98 # Currently supported values are: AtomicInvariantsAtomTypes, DREIDINGAtomTypes,
99 # EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes,
100 # SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes
101 #
102 $This->{AtomIdentifierType} = '';
103
104 # Atom types assigned to each heavy atom...
105 #
106 %{$This->{AssignedAtomTypes}} = ();
107
108 # All atom triplets between minimum and maximum distance...
109 #
110 @{$This->{AtomTripletsIDs}} = ();
111 %{$This->{AtomTripletsCount}} = ();
112 }
113
114 # Initialize class ...
115 sub _InitializeClass {
116 #Class name...
117 $ClassName = __PACKAGE__;
118 }
119
120 # Initialize object properties....
121 sub _InitializeTopologicalAtomTripletsFingerprintsProperties {
122 my($This, %NamesAndValues) = @_;
123
124 my($Name, $Value, $MethodName);
125 while (($Name, $Value) = each %NamesAndValues) {
126 $MethodName = "Set${Name}";
127 $This->$MethodName($Value);
128 }
129
130 # Make sure molecule object was specified...
131 if (!exists $NamesAndValues{Molecule}) {
132 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule...";
133 }
134 if (!exists $NamesAndValues{AtomIdentifierType}) {
135 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying AtomIdentifierType...";
136 }
137
138 $This->_InitializeFingerprintsVector();
139
140 return $This;
141 }
142
143 # Set minimum distance for atom triplets...
144 #
145 sub SetMinDistance {
146 my($This, $Value) = @_;
147
148 if (!TextUtil::IsPositiveInteger($Value)) {
149 croak "Error: ${ClassName}->SetMinDistance: MinDistance value, $Value, is not valid: It must be a positive integer...";
150 }
151 $This->{MinDistance} = $Value;
152
153 return $This;
154 }
155
156 # Set maximum distance for atom triplets...
157 #
158 sub SetMaxDistance {
159 my($This, $Value) = @_;
160
161 if (!TextUtil::IsPositiveInteger($Value)) {
162 croak "Error: ${ClassName}->SetMaxDistance: MaxDistance value, $Value, is not valid: It must be a positive integer...";
163 }
164 $This->{MaxDistance} = $Value;
165
166 return $This;
167 }
168
169 # Set atom identifier type..
170 #
171 sub SetAtomIdentifierType {
172 my($This, $IdentifierType) = @_;
173
174 if ($IdentifierType !~ /^(AtomicInvariantsAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|FunctionalClassAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
175 croak "Error: ${ClassName}->SetAtomIdentifierType: Specified value, $IdentifierType, for AtomIdentifierType is not vaild. Supported types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, and UFFAtomTypes.";
176 }
177
178 if ($This->{AtomIdentifierType}) {
179 croak "Error: ${ClassName}->SeAtomIdentifierType: Can't change intial atom identifier type: It's already set...";
180 }
181
182 $This->{AtomIdentifierType} = $IdentifierType;
183
184 # Initialize atom identifier type information...
185 $This->_InitializeAtomIdentifierTypeInformation();
186
187 return $This;
188 }
189
190 # Generate fingerprints description...
191 #
192 sub GetDescription {
193 my($This) = @_;
194
195 # Is description explicity set?
196 if (exists $This->{Description}) {
197 return $This->{Description};
198 }
199
200 # Generate fingerprints description...
201
202 return "$This->{Type}:$This->{AtomIdentifierType}:MinDistance$This->{MinDistance}:MaxDistance$This->{MaxDistance}";
203 }
204
205 # Generate topological atom triplets fingerprints...
206 #
207 # Let:
208 #
209 # AT = Any of the supported atom types
210 #
211 # ATx = Atom type for atom x
212 # ATy = Atom type for atom y
213 # ATz = Atom type for atom z
214 #
215 # Dxy = Distance between Px and Py
216 # Dxz = Distance between Px and Pz
217 # Dyz = Distance between Py and Pz
218 #
219 # Then:
220 #
221 # ATx-Dyz-ATy-Dxz-ATz-Dxy = Atom triplet ID for atom types ATx, ATy and Atz
222 #
223 # Methodology:
224 # . Generate a distance matrix.
225 # . Assign atom types to all the atoms.
226 # . Using distance matrix and atom types, count occurrence of unique atom triplets
227 # within specified distance range along with optional trinagle inequality
228 #
229 # Notes:
230 # . Hydrogen atoms are ignored during the fingerprint generation.
231 # . For a molecule containing N atoms with all different atom type, the total number of
232 # possible unique atom triplets without applying triangle inquality check corresponds to:
233 #
234 # Factorial( N ) / ( Factorial( N - 3 ) * Factorial (3) )
235 #
236 # However, due to similar atom types assigned to atoms in a molecule for a specific atom
237 # typing methodology and specified distance range used during fingerprints generation, the
238 # actual number of unique triplets is usually smaller than the theoretical limit.
239 #
240 sub GenerateFingerprints {
241 my($This) = @_;
242
243 if ($This->{MinDistance} > $This->{MaxDistance}) {
244 croak "Error: ${ClassName}->GenerateTopologicalAtomTripletsFingerprints: No fingerpritns generated: MinDistance, $This->{MinDistance}, must be <= MaxDistance, $This->{MaxDistance}...";
245 }
246
247 # Cache appropriate molecule data...
248 $This->_SetupMoleculeDataCache();
249
250 # Generate distance matrix...
251 if (!$This->_SetupDistanceMatrix()) {
252 carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't generate distance matrix...";
253 return $This;
254 }
255
256 # Assign atom types to all heavy atoms...
257 if (!$This->_AssignAtomTypes()) {
258 carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't assign valid $This->{AtomIdentifierType} to all atoms...";
259 return $This;
260 }
261
262 # Intialize values of toplogical atom triplets...
263 $This->_InitializeToplogicalAtomTriplets();
264
265 # Count atom triplets...
266 $This->_GenerateAndCountAtomTriplets();
267
268 # Set final fingerprints...
269 $This->_SetFinalFingerprints();
270
271 # Clear cached molecule data...
272 $This->_ClearMoleculeDataCache();
273
274 return $This;
275 }
276
277 # Setup distance matrix...
278 #
279 sub _SetupDistanceMatrix {
280 my($This) = @_;
281
282 $This->{DistanceMatrix} = $This->GetMolecule()->GetDistanceMatrix();
283
284 if (!$This->{DistanceMatrix}) {
285 return undef;
286 }
287
288 return $This;
289 }
290
291 # Assign appropriate atom types to all heavy atoms...
292 #
293 sub _AssignAtomTypes {
294 my($This) = @_;
295 my($SpecifiedAtomTypes, $Atom, $AtomID, $IgnoreHydrogens);
296
297 %{$This->{AssignedAtomTypes}} = ();
298 $IgnoreHydrogens = 1;
299
300 $SpecifiedAtomTypes = undef;
301
302 IDENTIFIERTYPE: {
303 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
304 $SpecifiedAtomTypes = new AtomTypes::AtomicInvariantsAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'AtomicInvariantsToUse' => $This->{AtomicInvariantsToUse});
305 last IDENTIFIERTYPE;
306 }
307
308 if ($This->{AtomIdentifierType} =~ /^DREIDINGAtomTypes$/i) {
309 $SpecifiedAtomTypes = new AtomTypes::DREIDINGAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
310 last IDENTIFIERTYPE;
311 }
312
313 if ($This->{AtomIdentifierType} =~ /^EStateAtomTypes$/i) {
314 $SpecifiedAtomTypes = new AtomTypes::EStateAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
315 last IDENTIFIERTYPE;
316 }
317
318 if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
319 $SpecifiedAtomTypes = new AtomTypes::FunctionalClassAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'FunctionalClassesToUse' => $This->{FunctionalClassesToUse});
320 last IDENTIFIERTYPE;
321 }
322
323 if ($This->{AtomIdentifierType} =~ /^MMFF94AtomTypes$/i) {
324 $SpecifiedAtomTypes = new AtomTypes::MMFF94AtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
325 last IDENTIFIERTYPE;
326 }
327
328 if ($This->{AtomIdentifierType} =~ /^SLogPAtomTypes$/i) {
329 $SpecifiedAtomTypes = new AtomTypes::SLogPAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
330 last IDENTIFIERTYPE;
331 }
332 if ($This->{AtomIdentifierType} =~ /^SYBYLAtomTypes$/i) {
333 $SpecifiedAtomTypes = new AtomTypes::SYBYLAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
334 last IDENTIFIERTYPE;
335 }
336
337 if ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) {
338 $SpecifiedAtomTypes = new AtomTypes::TPSAAtomTypes('Molecule' => $This->{Molecule}, 'IgnorePhosphorus' => 0, 'IgnoreSulfur' => 0);
339 last IDENTIFIERTYPE;
340 }
341
342 if ($This->{AtomIdentifierType} =~ /^UFFAtomTypes$/i) {
343 $SpecifiedAtomTypes = new AtomTypes::UFFAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
344 last IDENTIFIERTYPE;
345 }
346
347 croak "Error: ${ClassName}->_AssignAtomTypes: Unknown atom indentifier type $This->{AtomIdentifierType}...";
348 }
349
350 # Assign atom types...
351 $SpecifiedAtomTypes->AssignAtomTypes();
352
353 # Make sure atom types assignment is successful...
354 if (!$SpecifiedAtomTypes->IsAtomTypesAssignmentSuccessful()) {
355 return undef;
356 }
357
358 # Collect assigned atom types...
359 ATOM: for $Atom (@{$This->{Atoms}}) {
360 if ($Atom->IsHydrogen()) {
361 next ATOM;
362 }
363 $AtomID = $Atom->GetID();
364 $This->{AssignedAtomTypes}{$AtomID} = $SpecifiedAtomTypes->GetAtomType($Atom);
365 }
366
367 return $This;
368 }
369
370 # Initialize topological atom triplets between specified distance range...
371 #
372 sub _InitializeToplogicalAtomTriplets {
373 my($This) = @_;
374 my($Distance);
375
376 @{$This->{AtomTripletsIDs}} = ();
377 %{$This->{AtomTripletsCount}} = ();
378
379 return $This;
380 }
381
382 # Count atom triplets between mininum and maximum distance at each
383 # distance using distance matrix and atom types assiged to each heavy
384 # atom.
385 #
386 sub _GenerateAndCountAtomTriplets {
387 my($This) = @_;
388 my($NumOfAtoms, $AtomIndex1, $AtomIndex2, $AtomIndex3, $AtomID1, $AtomID2, $AtomID3, $AtomType1, $AtomType2, $AtomType3, $Distance12, $Distance13, $Distance23, $SkipIndexCheck, $DistanceMatrix, $AtomTripletID);
389
390 $NumOfAtoms = @{$This->{Atoms}};
391 $DistanceMatrix = $This->{DistanceMatrix};
392 $SkipIndexCheck = 0;
393
394 ATOMINDEX1: for $AtomIndex1 (0 .. ($NumOfAtoms - 1)) {
395 $AtomID1 = $This->{AtomIndexToID}{$AtomIndex1};
396 if (!exists($This->{AssignedAtomTypes}{$AtomID1})) {
397 next ATOMINDEX1;
398 }
399 $AtomType1 = $This->{AssignedAtomTypes}{$AtomID1};
400
401 ATOMINDEX2: for $AtomIndex2 (($AtomIndex1 + 1) .. ($NumOfAtoms - 1)) {
402 $AtomID2 = $This->{AtomIndexToID}{$AtomIndex2};
403 if (!exists($This->{AssignedAtomTypes}{$AtomID2})) {
404 next ATOMINDEX2;
405 }
406 $AtomType2 = $This->{AssignedAtomTypes}{$AtomID2};
407
408 $Distance12 = $DistanceMatrix->GetValue($AtomIndex1, $AtomIndex2, $SkipIndexCheck);
409 if ($Distance12 < $This->{MinDistance} || $Distance12 > $This->{MaxDistance}) {
410 next ATOMINDEX2;
411 }
412
413 ATOMINDEX3: for $AtomIndex3 (($AtomIndex2 + 1) .. ($NumOfAtoms - 1)) {
414 $AtomID3 = $This->{AtomIndexToID}{$AtomIndex3};
415 if (!exists($This->{AssignedAtomTypes}{$AtomID3})) {
416 next ATOMINDEX3;
417 }
418 $AtomType3 = $This->{AssignedAtomTypes}{$AtomID3};
419
420 $Distance13 = $DistanceMatrix->GetValue($AtomIndex1, $AtomIndex3, $SkipIndexCheck);
421 $Distance23 = $DistanceMatrix->GetValue($AtomIndex2, $AtomIndex3, $SkipIndexCheck);
422
423 if ($Distance13 < $This->{MinDistance} || $Distance13 > $This->{MaxDistance}) {
424 next ATOMINDEX3;
425 }
426 if ($Distance23 < $This->{MinDistance} || $Distance23 > $This->{MaxDistance}) {
427 next ATOMINDEX3;
428 }
429 if ($This->{UseTriangleInequality} && !$This->_DoDistancesSatisfyTriangleInequality($Distance12, $Distance13, $Distance23)) {
430 next ATOMINDEX3;
431 }
432
433 $AtomTripletID = $This->_GetAtomTripletID($AtomType1, $Distance23, $AtomType2, $Distance13, $AtomType3, $Distance12);
434 if (!exists $This->{AtomTripletsCount}{$AtomTripletID}) {
435 $This->{AtomTripletsCount}{$AtomTripletID} = 0;
436 }
437 $This->{AtomTripletsCount}{$AtomTripletID} += 1;
438 }
439 }
440 }
441 return $This;
442 }
443
444 # Check triangle inequality...
445 #
446 sub _DoDistancesSatisfyTriangleInequality {
447 my($This, $Distance1, $Distance2, $Distance3) = @_;
448
449 if ( !($Distance1 > abs($Distance2 - $Distance3) && $Distance1 < ($Distance2 + $Distance3)) ) {
450 return 0;
451 }
452 if ( !($Distance2 > abs($Distance1 - $Distance3) && $Distance2 < ($Distance1 + $Distance3)) ) {
453 return 0;
454 }
455 if ( !($Distance3 > abs($Distance1 - $Distance2) && $Distance3 < ($Distance1 + $Distance2)) ) {
456 return 0;
457 }
458 return 1;
459 }
460
461 # Get atom triplet ID corresponding to atom types and distances corresponding to atom triplet...
462 #
463 sub _GetAtomTripletID {
464 my($This, $ATx, $Dyz, $ATy, $Dxz, $ATz, $Dxy) = @_;
465 my($AtomTripletID, @AtomIDs);
466
467 @AtomIDs = ();
468
469 @AtomIDs = sort("${ATx}-D${Dyz}", "${ATy}-D${Dxz}", "${ATz}-D${Dxy}");
470 $AtomTripletID = join "-", @AtomIDs;
471
472 return $AtomTripletID;
473 }
474
475 # Set final fingerpritns vector...
476 #
477 sub _SetFinalFingerprints {
478 my($This) = @_;
479 my($AtomTripletID, $Value, @Values);
480
481 # Mark successful generation of fingerprints...
482 $This->{FingerprintsGenerated} = 1;
483
484 @Values = ();
485 @{$This->{AtomTripletsIDs}} = ();
486
487 for $AtomTripletID (sort keys %{$This->{AtomTripletsCount}}) {
488 push @{$This->{AtomTripletsIDs}}, $AtomTripletID;
489 $Value = $This->{AtomTripletsCount}{$AtomTripletID};
490 push @Values, $Value;
491 }
492
493 # Add AtomTripletsIDs and values to fingerprint vector...
494 $This->{FingerprintsVector}->AddValueIDs(\@{$This->{AtomTripletsIDs}});
495 $This->{FingerprintsVector}->AddValues(\@Values);
496
497 return $This;
498 }
499
500 # Get atom triplet IDs corresponding to atom triplets count values in fingerprint
501 # vector as an array or reference to an array...
502 #
503 # AtomTripletIDs list differes in molecules and is generated during finalization
504 # of fingerprints to make sure the fingerprint vector containing count values
505 # matches the atom triplets array.
506 #
507 sub GetAtomTripletIDs {
508 my($This) = @_;
509
510 return wantarray ? @{$This->{AtomTripletsIDs}} : \@{$This->{AtomTripletsIDs}};
511 }
512
513 # Cache appropriate molecule data...
514 #
515 sub _SetupMoleculeDataCache {
516 my($This) = @_;
517
518 # Get all atoms including hydrogens to correctly map atom indices to atom IDs for
519 # usage of distance matrix. The hydrogen atoms are ignored during processing...
520 #
521 @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms();
522
523 # Get all atom IDs...
524 my(@AtomIDs);
525 @AtomIDs = ();
526 @AtomIDs = map { $_->GetID() } @{$This->{Atoms}};
527
528 # Set AtomIndex to AtomID hash...
529 %{$This->{AtomIndexToID}} = ();
530 @{$This->{AtomIndexToID}}{ (0 .. $#AtomIDs) } = @AtomIDs;
531
532 return $This;
533 }
534
535 # Set atomic invariants to use for atom identifiers...
536 #
537 sub SetAtomicInvariantsToUse {
538 my($This, @Values) = @_;
539 my($FirstValue, $TypeOfFirstValue, $AtomicInvariant, $SpecifiedAtomicInvariant, $AtomicInvariantValue, @SpecifiedAtomicInvariants, @AtomicInvariantsToUse);
540
541 if (!@Values) {
542 carp "Warning: ${ClassName}->SetAtomicInvariantsToUse: No values specified...";
543 return;
544 }
545
546 $FirstValue = $Values[0];
547 $TypeOfFirstValue = ref $FirstValue;
548
549 @SpecifiedAtomicInvariants = ();
550 @AtomicInvariantsToUse = ();
551
552 if ($TypeOfFirstValue =~ /^ARRAY/) {
553 push @SpecifiedAtomicInvariants, @{$FirstValue};
554 }
555 else {
556 push @SpecifiedAtomicInvariants, @Values;
557 }
558
559 # Make sure specified AtomicInvariants are valid...
560 for $SpecifiedAtomicInvariant (@SpecifiedAtomicInvariants) {
561 if (!AtomTypes::AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($SpecifiedAtomicInvariant)) {
562 croak "Error: ${ClassName}->SetAtomicInvariantsToUse: Specified atomic invariant, $SpecifiedAtomicInvariant, is not supported...\n ";
563 }
564 $AtomicInvariant = $SpecifiedAtomicInvariant;
565 push @AtomicInvariantsToUse, $AtomicInvariant;
566 }
567
568 # Set atomic invariants to use...
569 @{$This->{AtomicInvariantsToUse}} = ();
570 push @{$This->{AtomicInvariantsToUse}}, @AtomicInvariantsToUse;
571
572 return $This;
573 }
574
575 # Set functional classes to use for atom identifiers...
576 #
577 sub SetFunctionalClassesToUse {
578 my($This, @Values) = @_;
579 my($FirstValue, $TypeOfFirstValue, $FunctionalClass, $SpecifiedFunctionalClass, @SpecifiedFunctionalClasses, @FunctionalClassesToUse);
580
581 if (!@Values) {
582 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: No values specified...";
583 return;
584 }
585
586 if ($This->{AtomIdentifierType} !~ /^FunctionalClassAtomTypes$/i) {
587 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: FunctionalClassesToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}...";
588 return;
589 }
590
591 $FirstValue = $Values[0];
592 $TypeOfFirstValue = ref $FirstValue;
593
594 @SpecifiedFunctionalClasses = ();
595 @FunctionalClassesToUse = ();
596
597 if ($TypeOfFirstValue =~ /^ARRAY/) {
598 push @SpecifiedFunctionalClasses, @{$FirstValue};
599 }
600 else {
601 push @SpecifiedFunctionalClasses, @Values;
602 }
603
604 # Make sure specified FunctionalClasses are valid...
605 for $SpecifiedFunctionalClass (@SpecifiedFunctionalClasses) {
606 if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedFunctionalClass)) {
607 croak "Error: ${ClassName}->SetFunctionalClassesToUse: Specified functional class, $SpecifiedFunctionalClass, is not supported...\n ";
608 }
609 push @FunctionalClassesToUse, $SpecifiedFunctionalClass;
610 }
611
612 # Set functional classes to use...
613 @{$This->{FunctionalClassesToUse}} = ();
614 push @{$This->{FunctionalClassesToUse}}, @FunctionalClassesToUse;
615
616 return $This;
617 }
618
619 # Initialize atom indentifier type information...
620 #
621 # Current supported values:
622 #
623 # AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes,
624 # MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes
625 #
626 sub _InitializeAtomIdentifierTypeInformation {
627 my($This) = @_;
628
629 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
630 $This->_InitializeAtomicInvariantsAtomTypesInformation();
631 }
632 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
633 $This->_InitializeFunctionalClassAtomTypesInformation();
634 }
635 elsif ($This->{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
636 # Nothing to do for now...
637 }
638 else {
639 croak "Error: ${ClassName}->_InitializeAtomIdentifierTypeInformation: Unknown atom indentifier type $This->{AtomIdentifierType}...";
640 }
641
642 return $This;
643 }
644
645 # Initialize atomic invariants atom types to use for generating atom IDs in atom triplets...
646 #
647 # Let:
648 # AS = Atom symbol corresponding to element symbol
649 #
650 # X<n> = Number of non-hydrogen atom neighbors or heavy atoms attached to atom
651 # BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms attached to atom
652 # LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms attached to atom
653 # SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
654 # DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
655 # TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
656 # H<n> = Number of implicit and explicit hydrogens for atom
657 # Ar = Aromatic annotation indicating whether atom is aromatic
658 # RA = Ring atom annotation indicating whether atom is a ring
659 # FC<+n/-n> = Formal charge assigned to atom
660 # MN<n> = Mass number indicating isotope other than most abundant isotope
661 # SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or 3 (triplet)
662 #
663 # ATx = Atomic invariants atom type for atom x
664 # ATy = Atomic invariants atom type for atom y
665 # ATz = Atomic invariants atom type for atom z
666 #
667 # Dxy = Distance between Px and Py
668 # Dxz = Distance between Px and Pz
669 # Dyz = Distance between Py and Pz
670 #
671 # Then:
672 #
673 # Atom triplet AtomID generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to:
674 #
675 # AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n>
676 #
677 # Toplogical atom triplet ID between atom IDs ATx, ATy and ATz corresponds to:
678 #
679 # ATx-Dyz-ATy-Dxz-ATz-Dxy
680 #
681 # Except for AS which is a required atomic invariant in atom triplet AtomIDs, all other atomic invariants are
682 # optional. Default atomic invariants used for AtomID are: AS, X<n>, BO<n>, H<n>, FC<+n/-n>.
683 # AtomID specification doesn't include atomic invariants with zero or undefined values.
684 #
685 # Examples of atom triplet AtomIDs:
686 #
687 # O.X1.BO1.H1 - Hydroxyl oxygen in carboxylate with attached hydrogen and no explicit charge
688 # O.X1.BO1.FC-1 - Hydroxyl ozygen in carboxylate with explicit negative charge
689 # O.X1.BO2 - Carbonyl oxygen in carboxylate with double bond to carbon
690 # O.X2.BO2 - Hydroxyl ozygen in carboxylate attached to carbonyl carbon and another heavy atom
691 #
692 # C.X2.BO3.H1.Ar - Aromatic carbon
693 #
694 sub _InitializeAtomicInvariantsAtomTypesInformation {
695 my($This) = @_;
696
697 # Default atomic invariants to use for generating atom triplet atom IDs: AS, X, BO, H, FC
698 #
699 @{$This->{AtomicInvariantsToUse}} = ();
700 @{$This->{AtomicInvariantsToUse}} = ('AS', 'X', 'BO', 'H', 'FC');
701
702 return $This;
703 }
704
705 # Initialize functional class atom types, generated by AtomTypes::FunctionalClassAtomTypes
706 # class, to use for generating atom identifiers...
707 #
708 # Let:
709 # HBD: HydrogenBondDonor
710 # HBA: HydrogenBondAcceptor
711 # PI : PositivelyIonizable
712 # NI : NegativelyIonizable
713 # Ar : Aromatic
714 # Hal : Halogen
715 # H : Hydrophobic
716 # RA : RingAtom
717 # CA : ChainAtom
718 #
719 # Then:
720 #
721 # Functiononal class atom type specification for an atom corresponds to:
722 #
723 # Ar.CA.H.HBA.HBD.Hal.NI.PI.RA
724 #
725 # Default functional classes used are: HBD, HBA, PI, NI, Ar, Hal
726 #
727 # FunctionalAtomTypes are assigned using the following definitions [ Ref 60-61, Ref 65-66 ]:
728 #
729 # HydrogenBondDonor: NH, NH2, OH
730 # HydrogenBondAcceptor: N[!H], O
731 # PositivelyIonizable: +, NH2
732 # NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH
733 #
734 sub _InitializeFunctionalClassAtomTypesInformation {
735 my($This) = @_;
736
737 # Default functional class atom typess to use for generating atom identifiers
738 # are: HBD, HBA, PI, NI, Ar, Hal
739 #
740 @{$This->{FunctionalClassesToUse}} = ();
741 @{$This->{FunctionalClassesToUse}} = ('HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal');
742
743 return $This;
744 }
745
746 # Clear cached molecule data...
747 #
748 sub _ClearMoleculeDataCache {
749 my($This) = @_;
750
751 @{$This->{Atoms}} = ();
752
753 return $This;
754 }
755
756 # Return a string containg data for TopologicalAtomTripletsFingerprints object...
757 #
758 sub StringifyTopologicalAtomTripletsFingerprints {
759 my($This) = @_;
760 my($FingerprintsString);
761
762 # Type of fingerprint...
763 $FingerprintsString = "Fingerprint type: $This->{Type}; AtomIdentifierType: $This->{AtomIdentifierType}";
764
765 # Min and max distance...
766 $FingerprintsString .= "; MinDistance: $This->{MinDistance}; MaxDistance: $This->{MaxDistance}; UseTriangleInequality: " . ($This->{UseTriangleInequality} ? "Yes" : "No");
767
768 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
769 my($AtomicInvariant, @AtomicInvariants, @AtomicInvariantsOrder, %AvailableAtomicInvariants);
770
771 @AtomicInvariantsOrder = AtomTypes::AtomicInvariantsAtomTypes::GetAtomicInvariantsOrder();
772 %AvailableAtomicInvariants = AtomTypes::AtomicInvariantsAtomTypes::GetAvailableAtomicInvariants();
773
774 for $AtomicInvariant (@AtomicInvariantsOrder) {
775 push @AtomicInvariants, "$AtomicInvariant: $AvailableAtomicInvariants{$AtomicInvariant}";
776 }
777
778 $FingerprintsString .= "; AtomicInvariantsToUse: <" . TextUtil::JoinWords(\@{$This->{AtomicInvariantsToUse}}, ", ", 0) . ">";
779 $FingerprintsString .= "; AtomicInvariantsOrder: <" . TextUtil::JoinWords(\@AtomicInvariantsOrder, ", ", 0) . ">";
780 $FingerprintsString .= "; AvailableAtomicInvariants: <" . TextUtil::JoinWords(\@AtomicInvariants, ", ", 0) . ">";
781 }
782 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
783 my($FunctionalClass, @FunctionalClasses, @FunctionalClassesOrder, %AvailableFunctionalClasses);
784
785 @FunctionalClassesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder();
786 %AvailableFunctionalClasses = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses();
787
788 for $FunctionalClass (@FunctionalClassesOrder) {
789 push @FunctionalClasses, "$FunctionalClass: $AvailableFunctionalClasses{$FunctionalClass}";
790 }
791
792 $FingerprintsString .= "; FunctionalClassesToUse: <" . TextUtil::JoinWords(\@{$This->{FunctionalClassesToUse}}, ", ", 0) . ">";
793 $FingerprintsString .= "; FunctionalClassesOrder: <" . TextUtil::JoinWords(\@FunctionalClassesOrder, ", ", 0) . ">";
794 $FingerprintsString .= "; AvailableFunctionalClasses: <" . TextUtil::JoinWords(\@FunctionalClasses, ", ", 0) . ">";
795 }
796
797 # Total number of atom triplets...
798 $FingerprintsString .= "; NumOfAtomTriplets: " . $This->{FingerprintsVector}->GetNumOfValues();
799
800 # FingerprintsVector...
801 $FingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >";
802
803 return $FingerprintsString;
804 }
805
806 1;
807
808 __END__
809
810 =head1 NAME
811
812 TopologicalAtomTripletsFingerprints
813
814 =head1 SYNOPSIS
815
816 use Fingerprints::TopologicalAtomTripletsFingerprints;
817
818 use Fingerprints::TopologicalAtomTripletsFingerprints qw(:all);
819
820 =head1 DESCRIPTION
821
822 B<TopologicalAtomTripletsFingerprints> [ Ref 57, Ref 59, Ref 72 ] class provides the following methods:
823
824 new, GenerateFingerprints, GetAtomTripletIDs, GetDescription,
825 SetAtomIdentifierType, SetAtomicInvariantsToUse, SetFunctionalClassesToUse,
826 SetMaxDistance, SetMinDistance, StringifyTopologicalAtomTripletsFingerprints
827
828 B<TopologicalAtomTripletsFingerprints> is derived from B<Fingerprints> class which in turn
829 is derived from B<ObjectProperty> base class that provides methods not explicitly defined
830 in B<TopologicalAtomTripletsFingerprints>, B<Fingerprints> or B<ObjectProperty> classes using Perl's
831 AUTOLOAD functionality. These methods are generated on-the-fly for a specified object property:
832
833 Set<PropertyName>(<PropertyValue>);
834 $PropertyValue = Get<PropertyName>();
835 Delete<PropertyName>();
836
837 The current release of MayaChemTools supports generation of B<TopologicalAtomTripletsFingerprints>
838 corresponding to following B<AtomtomIdentifierTypes>:
839
840 AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes,
841 FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes,
842 SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes
843
844 Based on the values specified for B<AtomIdentifierType> along with other specified
845 parameters such as B<AtomicInvariantsToUse> and B<FunctionalClassesToUse>, initial
846 atom types are assigned to all non-hydrogen atoms in a molecule. Using the distance
847 matrix for the molecule and initial atom types assigned to non-hydrogen atoms, all unique atom
848 triplets within B<MinDistance> and B<MaxDistance> are identified and counted. An atom triplet
849 identifier is generated for each unique atom triplet; the format of atom triplet identifier is:
850
851 <ATx>-Dyz-<ATy>-Dxz-<ATz>-Dxy
852
853 ATx, ATy, ATz: Atom types assigned to atom x, atom y, and atom z
854 Dxy: Distance between atom x and atom y
855 Dxz: Distance between atom x and atom z
856 Dyz: Distance between atom y and atom z
857
858 where <AT1>-D23 <= <AT2>-D13 <= <AT3>-D12
859
860 The atom triplet identifiers for all unique atom triplets corresponding to non-hydrogen atoms constitute
861 topological atom triplets fingerprints of the molecule.
862
863 The current release of MayaChemTools generates the following types of topological atom triplets
864 fingerprints vector strings:
865
866 FingerprintsVector;TopologicalAtomTriplets:AtomicInvariantsAtomTypes:M
867 inDistance1:MaxDistance10;3096;NumericalValues;IDsAndValuesString;C.X1
868 .BO1.H3-D1-C.X1.BO1.H3-D1-C.X3.BO3.H1-D2 C.X1.BO1.H3-D1-C.X2.BO2.H2-D1
869 0-C.X3.BO4-D9 C.X1.BO1.H3-D1-C.X2.BO2.H2-D3-N.X3.BO3-D4 C.X1.BO1.H3-D1
870 -C.X2.BO2.H2-D4-C.X2.BO2.H2-D5 C.X1.BO1.H3-D1-C.X2.BO2.H2-D6-C.X3....;
871 1 2 2 2 2 2 2 2 8 8 4 8 4 4 2 2 2 2 4 2 2 2 4 2 2 2 2 1 2 2 4 4 4 2 2
872 2 4 4 4 8 4 4 2 4 4 4 2 4 4 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 8...
873
874 FingerprintsVector;TopologicalAtomTriplets:AtomicInvariantsAtomTypes:M
875 inDistance1:MaxDistance10;3096;NumericalValues;IDsAndValuesPairsString
876 ;C.X1.BO1.H3-D1-C.X1.BO1.H3-D1-C.X3.BO3.H1-D2 1 C.X1.BO1.H3-D1-C.X2.BO
877 2.H2-D10-C.X3.BO4-D9 2 C.X1.BO1.H3-D1-C.X2.BO2.H2-D3-N.X3.BO3-D4 2 C.X
878 1.BO1.H3-D1-C.X2.BO2.H2-D4-C.X2.BO2.H2-D5 2 C.X1.BO1.H3-D1-C.X2.BO2.H2
879 -D6-C.X3.BO3.H1-D5 2 C.X1.BO1.H3-D1-C.X2.BO2.H2-D6-C.X3.BO3.H1-D7 2...
880
881 FingerprintsVector;TopologicalAtomTriplets:DREIDINGAtomTypes:MinDistan
882 ce1:MaxDistance10;2377;NumericalValues;IDsAndValuesString;C_2-D1-C_2-D
883 9-C_3-D10 C_2-D1-C_2-D9-C_R-D10 C_2-D1-C_3-D1-C_3-D2 C_2-D1-C_3-D10-C_
884 3-D9 C_2-D1-C_3-D2-C_3-D3 C_2-D1-C_3-D2-C_R-D3 C_2-D1-C_3-D3-C_3-D4 C_
885 2-D1-C_3-D3-N_R-D4 C_2-D1-C_3-D3-O_3-D2 C_2-D1-C_3-D4-C_3-D5 C_2-D...;
886 1 1 1 2 1 1 3 1 1 2 2 1 1 1 1 1 1 1 1 2 1 3 4 5 1 1 6 4 2 2 3 1 1 1 2
887 2 1 2 1 1 2 2 2 1 2 1 2 1 1 3 3 2 6 4 2 1 1 1 2 2 1 1 1 1 1 1 1 1 1...
888
889 FingerprintsVector;TopologicalAtomTriplets:EStateAtomTypes:MinDistance
890 1:MaxDistance10;3298;NumericalValues;IDsAndValuesString;aaCH-D1-aaCH-D
891 1-aaCH-D2 aaCH-D1-aaCH-D1-aasC-D2 aaCH-D1-aaCH-D10-aaCH-D9 aaCH-D1-aaC
892 H-D10-aasC-D9 aaCH-D1-aaCH-D2-aaCH-D3 aaCH-D1-aaCH-D2-aasC-D1 aaCH-D1-
893 aaCH-D2-aasC-D3 aaCH-D1-aaCH-D3-aasC-D2 aaCH-D1-aaCH-D4-aasC-D5 aa...;
894 6 4 24 4 16 8 8 4 8 8 8 12 10 14 4 16 24 4 12 2 2 4 1 10 2 2 15 2 2 2
895 2 2 2 14 4 2 2 2 2 1 2 10 2 2 4 1 2 4 8 3 3 3 4 6 4 2 2 3 3 1 1 1 2 1
896 2 2 4 2 3 2 1 2 4 5 3 2 2 1 2 4 3 2 8 12 6 2 2 4 4 7 1 4 2 4 2 2 2 ...
897
898 FingerprintsVector;TopologicalAtomTriplets:FunctionalClassAtomTypes:Mi
899 nDistance1:MaxDistance10;2182;NumericalValues;IDsAndValuesString;Ar-D1
900 -Ar-D1-Ar-D2 Ar-D1-Ar-D1-Ar.HBA-D2 Ar-D1-Ar-D10-Ar-D9 Ar-D1-Ar-D10-Hal
901 -D9 Ar-D1-Ar-D2-Ar-D2 Ar-D1-Ar-D2-Ar-D3 Ar-D1-Ar-D2-Ar.HBA-D1 Ar-D1-Ar
902 -D2-Ar.HBA-D2 Ar-D1-Ar-D2-Ar.HBA-D3 Ar-D1-Ar-D2-HBD-D1 Ar-D1-Ar-D2...;
903 27 1 32 2 2 63 3 2 1 2 1 2 3 1 1 40 3 1 2 2 2 2 4 2 2 47 4 2 2 1 2 1 5
904 2 2 51 4 3 1 3 1 9 1 1 50 3 3 4 1 9 50 2 2 3 3 5 45 1 1 1 2 1 2 2 3 3
905 4 4 3 2 1 1 3 4 5 5 3 1 2 3 2 3 5 7 2 7 3 7 1 1 2 2 2 2 3 1 4 3 1 2...
906
907 FingerprintsVector;TopologicalAtomTriplets:MMFF94AtomTypes:MinDistance
908 1:MaxDistance10;2966;NumericalValues;IDsAndValuesString;C5A-D1-C5A-D1-
909 N5-D2 C5A-D1-C5A-D2-C5B-D2 C5A-D1-C5A-D3-CB-D2 C5A-D1-C5A-D3-CR-D2 C5A
910 -D1-C5B-D1-C5B-D2 C5A-D1-C5B-D2-C=ON-D1 C5A-D1-C5B-D2-CB-D1 C5A-D1-C5B
911 -D3-C=ON-D2 C5A-D1-C5B-D3-CB-D2 C5A-D1-C=ON-D3-NC=O-D2 C5A-D1-C=ON-D3-
912 O=CN-D2 C5A-D1-C=ON-D4-NC=O-D3 C5A-D1-C=ON-D4-O=CN-D3 C5A-D1-CB-D1-...
913
914 FingerprintsVector;TopologicalAtomTriplets:SLogPAtomTypes:MinDistance1
915 :MaxDistance10;3710;NumericalValues;IDsAndValuesString;C1-D1-C1-D1-C11
916 -D2 C1-D1-C1-D1-CS-D2 C1-D1-C1-D10-C5-D9 C1-D1-C1-D3-C10-D2 C1-D1-C1-D
917 3-C5-D2 C1-D1-C1-D3-CS-D2 C1-D1-C1-D3-CS-D4 C1-D1-C1-D4-C10-D5 C1-D1-C
918 1-D4-C11-D5 C1-D1-C1-D5-C10-D4 C1-D1-C1-D5-C5-D4 C1-D1-C1-D6-C11-D7 C1
919 -D1-C1-D6-CS-D5 C1-D1-C1-D6-CS-D7 C1-D1-C1-D8-C11-D9 C1-D1-C1-D8-CS...
920
921 FingerprintsVector;TopologicalAtomTriplets:SYBYLAtomTypes:MinDistance1
922 :MaxDistance10;2332;NumericalValues;IDsAndValuesString;C.2-D1-C.2-D9-C
923 .3-D10 C.2-D1-C.2-D9-C.ar-D10 C.2-D1-C.3-D1-C.3-D2 C.2-D1-C.3-D10-C.3-
924 D9 C.2-D1-C.3-D2-C.3-D3 C.2-D1-C.3-D2-C.ar-D3 C.2-D1-C.3-D3-C.3-D4 C.2
925 -D1-C.3-D3-N.ar-D4 C.2-D1-C.3-D3-O.3-D2 C.2-D1-C.3-D4-C.3-D5 C.2-D1-C.
926 3-D5-C.3-D6 C.2-D1-C.3-D5-O.3-D4 C.2-D1-C.3-D6-C.3-D7 C.2-D1-C.3-D7...
927
928 FingerprintsVector;TopologicalAtomTriplets:TPSAAtomTypes:MinDistance1:
929 MaxDistance10;1007;NumericalValues;IDsAndValuesString;N21-D1-N7-D3-Non
930 e-D4 N21-D1-N7-D5-None-D4 N21-D1-None-D1-None-D2 N21-D1-None-D2-None-D
931 2 N21-D1-None-D2-None-D3 N21-D1-None-D3-None-D4 N21-D1-None-D4-None-D5
932 N21-D1-None-D4-O3-D3 N21-D1-None-D4-O4-D3 N21-D1-None-D5-None-D6 N21-
933 D1-None-D6-None-D7 N21-D1-None-D6-O4-D5 N21-D1-None-D7-None-D8 N21-...
934
935 FingerprintsVector;TopologicalAtomTriplets:UFFAtomTypes:MinDistance1:M
936 axDistance10;2377;NumericalValues;IDsAndValuesString;C_2-D1-C_2-D9-C_3
937 -D10 C_2-D1-C_2-D9-C_R-D10 C_2-D1-C_3-D1-C_3-D2 C_2-D1-C_3-D10-C_3-D9
938 C_2-D1-C_3-D2-C_3-D3 C_2-D1-C_3-D2-C_R-D3 C_2-D1-C_3-D3-C_3-D4 C_2-D1-
939 C_3-D3-N_R-D4 C_2-D1-C_3-D3-O_3-D2 C_2-D1-C_3-D4-C_3-D5 C_2-D1-C_3-D5-
940 C_3-D6 C_2-D1-C_3-D5-O_3-D4 C_2-D1-C_3-D6-C_3-D7 C_2-D1-C_3-D7-C_3-...
941
942 =head2 METHODS
943
944 =over 4
945
946 =item B<new>
947
948 $NewTopologicalAtomTripletsFingerprints = new TopologicalAtomTripletsFingerprints(
949 %NamesAndValues);
950
951 Using specified I<TopologicalAtomTripletsFingerprints> property names and values hash, B<new>
952 method creates a new object and returns a reference to newly created B<TopologicalAtomTripletsFingerprints>
953 object. By default, the following properties are initialized:
954
955 Molecule = ''
956 Type = 'TopologicalAtomTriplets'
957 MinDistance = 1
958 MaxDistance = 10
959 UseTriangleInequality = 1
960 AtomIdentifierType = ''
961 AtomicInvariantsToUse = ['AS', 'X', 'BO', 'H', 'FC']
962 FunctionalClassesToUse = ['HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal']
963
964 Examples:
965
966 $TopologicalAtomTripletsFingerprints = new TopologicalAtomTripletsFingerprints(
967 'Molecule' => $Molecule,
968 'AtomIdentifierType' =>
969 'AtomicInvariantsAtomTypes');
970
971 $TopologicalAtomTripletsFingerprints = new TopologicalAtomTripletsFingerprints(
972 'Molecule' => $Molecule,
973 'MinDistance' => 1,
974 'MaxDistance' => 10,
975 'AtomIdentifierType' =>
976 'AtomicInvariantsAtomTypes',
977 'AtomicInvariantsToUse' =>
978 ['AS', 'X', 'BO', 'H', 'FC'] );
979
980 $TopologicalAtomTripletsFingerprints = new TopologicalAtomTripletsFingerprints(
981 'Molecule' => $Molecule,
982 'AtomIdentifierType' =>
983 'DREIDINGAtomTypes');
984
985 $TopologicalAtomTripletsFingerprints = new TopologicalAtomTripletsFingerprints(
986 'Molecule' => $Molecule,
987 'AtomIdentifierType' =>
988 'MMFF94AtomTypes');
989
990 $TopologicalAtomTripletsFingerprints = new TopologicalAtomTripletsFingerprints(
991 'Molecule' => $Molecule,
992 'AtomIdentifierType' =>
993 'TPSAAtomTypes');
994
995 $TopologicalAtomTripletsFingerprints = new TopologicalAtomTripletsFingerprints(
996 'Molecule' => $Molecule,
997 'MinDistance' => 1,
998 'MaxDistance' => 10,
999 'AtomIdentifierType' =>
1000 'FunctionalClassAtomTypes',
1001 'FunctionalClassesToUse' =>
1002 ['HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal']);
1003
1004 $TopologicalAtomTripletsFingerprints->GenerateFingerprints();
1005 print "$TopologicalAtomTripletsFingerprints\n";
1006
1007 =item B<GetDescription>
1008
1009 $Return = $TopologicalAtomTripletsFingerprints->GetDescription();
1010
1011 Returns a string containing description of topological atom triplets fingerprints.
1012
1013 =item B<GenerateFingerprints>
1014
1015 $TopologicalAtomTripletsFingerprints->GenerateFingerprints();
1016
1017 Generates topological atom triplets fingerprints and returns I<TopologicalAtomTripletsFingerprints>.
1018
1019 =item B<GetAtomTripletIDs>
1020
1021 $AtomTripletIDsRef = $TopologicalAtomTripletsFingerprints->GetAtomTripletIDs();
1022 @AtomTripletIDs = $TopologicalAtomTripletsFingerprints->GetAtomTripletIDs();
1023
1024 Returns atom triplet IDs corresponding to atom triplets count values in topological atom triplets
1025 fingerprints vector as an array or reference to an array.
1026
1027 =item B<SetAtomIdentifierType>
1028
1029 $TopologicalAtomTripletsFingerprints->SetAtomIdentifierType($IdentifierType);
1030
1031 Sets atom I<IdentifierType> to use during atom triplets fingerprints generation and
1032 returns I<TopologicalAtomTripletsFingerprints>.
1033
1034 Possible values: I<AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes,
1035 FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes,
1036 TPSAAtomTypes, UFFAtomTypes>.
1037
1038 =item B<SetAtomicInvariantsToUse>
1039
1040 $TopologicalAtomTripletsFingerprints->SetAtomicInvariantsToUse($ValuesRef);
1041 $TopologicalAtomTripletsFingerprints->SetAtomicInvariantsToUse(@Values);
1042
1043 Sets atomic invariants to use during I<AtomicInvariantsAtomTypes> value of I<AtomIdentifierType>
1044 for topological atom triplets fingerprints generation and returns I<TopologicalAtomTripletsFingerprints>.
1045
1046 Possible values for atomic invariants are: I<AS, X, BO, LBO, SB, DB, TB,
1047 H, Ar, RA, FC, MN, SM>. Default value: I<AS,X,BO,H,FC>.
1048
1049 The atomic invariants abbreviations correspond to:
1050
1051 AS = Atom symbol corresponding to element symbol
1052
1053 X<n> = Number of non-hydrogen atom neighbors or heavy atoms
1054 BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms
1055 LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms
1056 SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms
1057 DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms
1058 TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms
1059 H<n> = Number of implicit and explicit hydrogens for atom
1060 Ar = Aromatic annotation indicating whether atom is aromatic
1061 RA = Ring atom annotation indicating whether atom is a ring
1062 FC<+n/-n> = Formal charge assigned to atom
1063 MN<n> = Mass number indicating isotope other than most abundant isotope
1064 SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or
1065 3 (triplet)
1066
1067 Atom type generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to:
1068
1069 AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n>
1070
1071 Except for AS which is a required atomic invariant in atom types, all other atomic invariants are
1072 optional. Atom type specification doesn't include atomic invariants with zero or undefined values.
1073
1074 In addition to usage of abbreviations for specifying atomic invariants, the following descriptive words
1075 are also allowed:
1076
1077 X : NumOfNonHydrogenAtomNeighbors or NumOfHeavyAtomNeighbors
1078 BO : SumOfBondOrdersToNonHydrogenAtoms or SumOfBondOrdersToHeavyAtoms
1079 LBO : LargestBondOrderToNonHydrogenAtoms or LargestBondOrderToHeavyAtoms
1080 SB : NumOfSingleBondsToNonHydrogenAtoms or NumOfSingleBondsToHeavyAtoms
1081 DB : NumOfDoubleBondsToNonHydrogenAtoms or NumOfDoubleBondsToHeavyAtoms
1082 TB : NumOfTripleBondsToNonHydrogenAtoms or NumOfTripleBondsToHeavyAtoms
1083 H : NumOfImplicitAndExplicitHydrogens
1084 Ar : Aromatic
1085 RA : RingAtom
1086 FC : FormalCharge
1087 MN : MassNumber
1088 SM : SpinMultiplicity
1089
1090 I<AtomTypes::AtomicInvariantsAtomTypes> module is used to assign atomic invariant
1091 atom types.
1092
1093 =item B<SetFunctionalClassesToUse>
1094
1095 $TopologicalTripletsFingerprints->SetFunctionalClassesToUse($ValuesRef);
1096 $TopologicalTripletsFingerprints->SetFunctionalClassesToUse(@Values);
1097
1098 Sets functional classes invariants to use during I<FunctionalClassAtomTypes> value of I<AtomIdentifierType>
1099 for topological atom triplets fingerprints generation and returns I<TopologicalAtomTripletsFingerprints>.
1100
1101 Possible values for atom functional classes are: I<Ar, CA, H, HBA, HBD, Hal, NI, PI, RA>.
1102 Default value [ Ref 24 ]: I<HBD,HBA,PI,NI,Ar,Hal>.
1103
1104 The functional class abbreviations correspond to:
1105
1106 HBD: HydrogenBondDonor
1107 HBA: HydrogenBondAcceptor
1108 PI : PositivelyIonizable
1109 NI : NegativelyIonizable
1110 Ar : Aromatic
1111 Hal : Halogen
1112 H : Hydrophobic
1113 RA : RingAtom
1114 CA : ChainAtom
1115
1116 Functional class atom type specification for an atom corresponds to:
1117
1118 Ar.CA.H.HBA.HBD.Hal.NI.PI.RA or None
1119
1120 I<AtomTypes::FunctionalClassAtomTypes> module is used to assign functional class atom
1121 types. It uses following definitions [ Ref 60-61, Ref 65-66 ]:
1122
1123 HydrogenBondDonor: NH, NH2, OH
1124 HydrogenBondAcceptor: N[!H], O
1125 PositivelyIonizable: +, NH2
1126 NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH
1127
1128 =item B<SetMaxDistance>
1129
1130 $TopologicalAtomTripletsFingerprints->SetMaxDistance($Distance);
1131
1132 Sets maximum distance to use during topological atom triplets fingerprints generation and
1133 returns I<TopologicalAtomTripletsFingerprints>.
1134
1135 =item B<SetMinDistance>
1136
1137 $TopologicalAtomTripletsFingerprints->SetMinDistance($Distance);
1138
1139 Sets minimum distance to use during topological atom triplets fingerprints generation and
1140 returns I<TopologicalAtomTripletsFingerprints>.
1141
1142 =item B<StringifyTopologicalAtomTripletsFingerprints>
1143
1144 $String = $TopologicalAtomTripletsFingerprints->
1145 StringifyTopologicalAtomTripletsFingerprints();
1146
1147 Returns a string containing information about I<TopologicalAtomTripletsFingerprints> object.
1148
1149 =back
1150
1151 =head1 AUTHOR
1152
1153 Manish Sud <msud@san.rr.com>
1154
1155 =head1 SEE ALSO
1156
1157 Fingerprints.pm, FingerprintsStringUtil.pm, AtomNeighborhoodsFingerprints.pm,
1158 AtomTypesFingerprints.pm, EStateIndiciesFingerprints.pm, ExtendedConnectivityFingerprints.pm,
1159 MACCSKeys.pm, PathLengthFingerprints.pm, TopologicalAtomPairsFingerprints.pm,
1160 TopologicalAtomTorsionsFingerprints.pm, TopologicalPharmacophoreAtomPairsFingerprints.pm,
1161 TopologicalPharmacophoreAtomTripletsFingerprints.pm
1162
1163 =head1 COPYRIGHT
1164
1165 Copyright (C) 2015 Manish Sud. All rights reserved.
1166
1167 This file is part of MayaChemTools.
1168
1169 MayaChemTools is free software; you can redistribute it and/or modify it under
1170 the terms of the GNU Lesser General Public License as published by the Free
1171 Software Foundation; either version 3 of the License, or (at your option)
1172 any later version.
1173
1174 =cut