comparison lib/Fingerprints/TopologicalPharmacophoreAtomTripletsFingerprints.pm @ 0:4816e4a8ae95 draft default tip

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 09:23:18 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4816e4a8ae95
1 package Fingerprints::TopologicalPharmacophoreAtomTripletsFingerprints;
2 #
3 # $RCSfile: TopologicalPharmacophoreAtomTripletsFingerprints.pm,v $
4 # $Date: 2015/02/28 20:48:54 $
5 # $Revision: 1.30 $
6 #
7 # Author: Manish Sud <msud@san.rr.com>
8 #
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
10 #
11 # This file is part of MayaChemTools.
12 #
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
14 # the terms of the GNU Lesser General Public License as published by the Free
15 # Software Foundation; either version 3 of the License, or (at your option) any
16 # later version.
17 #
18 # MayaChemTools is distributed in the hope that it will be useful, but without
19 # any warranty; without even the implied warranty of merchantability of fitness
20 # for a particular purpose. See the GNU Lesser General Public License for more
21 # details.
22 #
23 # You should have received a copy of the GNU Lesser General Public License
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
26 # Boston, MA, 02111-1307, USA.
27 #
28
29 use strict;
30 use Carp;
31 use Exporter;
32 use Fingerprints::Fingerprints;
33 use TextUtil ();
34 use MathUtil ();
35 use Molecule;
36 use AtomTypes::FunctionalClassAtomTypes;
37
38 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
39
40 @ISA = qw(Fingerprints::Fingerprints Exporter);
41 @EXPORT = qw();
42 @EXPORT_OK = qw();
43
44 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]);
45
46 # Setup class variables...
47 my($ClassName);
48 _InitializeClass();
49
50 # Overload Perl functions...
51 use overload '""' => 'StringifyTopologicalPharmacophoreAtomTripletsFingerprints';
52
53 # Class constructor...
54 sub new {
55 my($Class, %NamesAndValues) = @_;
56
57 # Initialize object...
58 my $This = $Class->SUPER::new();
59 bless $This, ref($Class) || $Class;
60 $This->_InitializeTopologicalPharmacophoreAtomTripletsFingerprints();
61
62 $This->_InitializeTopologicalPharmacophoreAtomTripletsFingerprintsProperties(%NamesAndValues);
63
64 return $This;
65 }
66
67 # Initialize object data...
68 #
69 sub _InitializeTopologicalPharmacophoreAtomTripletsFingerprints {
70 my($This) = @_;
71
72 # Type of fingerprint...
73 $This->{Type} = 'TopologicalPharmacophoreAtomTriplets';
74
75 # Type of vector...
76 $This->{VectorType} = 'FingerprintsVector';
77
78 # AtomTripletsSetSizeToUse...
79 #
80 # ArbitrarySize - Corrresponds to atom triplets with non-zero count
81 # FixedSize - Corresponds to all atom triplets with zero and non-zero count
82 #
83 # Possible values: ArbitrarySize or FixedSize. Default: ArbitrarySize
84 #
85 $This->{AtomTripletsSetSizeToUse} = '';
86
87 #
88 # OrderedNumericalValues - For ArbitrarySize value of AtomTripletsSetSizeToUse
89 # NumericalValues - For FixedSize value of AtomTripletsSetSizeToUse
90 #
91 # Possible values: OrderedNumericalValues or NumericalValues. Default: NumericalValues
92 #
93 $This->{FingerprintsVectorType} = '';
94
95 # Minimum and maximum bond distance between pharmacophore atom pairs corresponding to
96 # atom triplets and distance bin size used for binning distances.
97 #
98 # In order to distribute distance bins of equal size, the last bin is allowed to go past the
99 # maximum distance specified by upto distance bin size.
100 #
101 # The default MinDistance and MaxDistance values of 1 and 10 with DistanceBinSize of
102 # 2 [ Ref 70 ] generates the following 5 distance bins: [1, 2] [3, 4] [5, 6] [7, 8] [9 10]
103 #
104 $This->{MinDistance} = 1;
105 $This->{MaxDistance} = 10;
106
107 # Distance bin size used for binning distances...
108 #
109 $This->{DistanceBinSize} = 2;
110
111 # Determines whether to apply triangle inequality to distances triplets during basis set generation...
112 #
113 $This->{UseTriangleInequality} = 1;
114
115 # Initialize pharmacophore atom types information...
116 $This->_InitializeToplogicalPharmacophoreAtomTypesInformation();
117
118 # Pharmacophore types assigned to each heavy atom...
119 #
120 %{$This->{AssignedAtomTypes}} = ();
121
122 # All pharmacophore atom triplets between minimum and maximum distance...
123 #
124 %{$This->{AtomTriplets}} = ();
125 @{$This->{AtomTriplets}{IDs}} = ();
126 %{$This->{AtomTriplets}{Count}} = ();
127 }
128
129 # Initialize class ...
130 sub _InitializeClass {
131 #Class name...
132 $ClassName = __PACKAGE__;
133 }
134
135 # Initialize object properties....
136 sub _InitializeTopologicalPharmacophoreAtomTripletsFingerprintsProperties {
137 my($This, %NamesAndValues) = @_;
138
139 my($Name, $Value, $MethodName);
140 while (($Name, $Value) = each %NamesAndValues) {
141 $MethodName = "Set${Name}";
142 $This->$MethodName($Value);
143 }
144
145 # Make sure molecule object was specified...
146 if (!exists $NamesAndValues{Molecule}) {
147 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule...";
148 }
149 $This->_InitializeTopologicalPharmacophoreAtomTripletsFingerprintsVector();
150
151 return $This;
152 }
153
154 # Initialize fingerprints vector...
155 #
156 sub _InitializeTopologicalPharmacophoreAtomTripletsFingerprintsVector {
157 my($This) = @_;
158
159 if (!$This->{AtomTripletsSetSizeToUse}) {
160 $This->{AtomTripletsSetSizeToUse} = 'ArbitrarySize';
161 }
162
163 # Vector type and type of values...
164 $This->{VectorType} = 'FingerprintsVector';
165
166 if ($This->{AtomTripletsSetSizeToUse} =~ /^FixedSize$/i) {
167 $This->{FingerprintsVectorType} = 'OrderedNumericalValues';
168 }
169 else {
170 $This->{FingerprintsVectorType} = 'NumericalValues';
171 }
172
173 $This->_InitializeFingerprintsVector();
174 }
175
176 # Set atom parits set size to use...
177 #
178 sub SetAtomTripletsSetSizeToUse {
179 my($This, $Value) = @_;
180
181 if ($This->{AtomTripletsSetSizeToUse}) {
182 croak "Error: ${ClassName}->SetAtomTripletsSetSizeToUse: Can't change size: It's already set...";
183 }
184
185 if ($Value !~ /^(ArbitrarySize|FixedSize)$/i) {
186 croak "Error: ${ClassName}->SetAtomTripletsSetSizeToUse: Unknown AtomTripletsSetSizeToUse value: $Value; Supported values: ArbitrarySize or FixedSize";
187 }
188
189 $This->{AtomTripletsSetSizeToUse} = $Value;
190
191 return $This;
192 }
193
194 # Initialize topological atom types, generated by AtomTypes::FunctionalClassAtomTypes
195 # class, to use for atom triplets fingerprint generation...
196 #
197 # Let:
198 # HBD: HydrogenBondDonor
199 # HBA: HydrogenBondAcceptor
200 # PI : PositivelyIonizable
201 # NI : NegativelyIonizable
202 # Ar : Aromatic
203 # Hal : Halogen
204 # H : Hydrophobic
205 # RA : RingAtom
206 # CA : ChainAtom
207 #
208 # Then:
209 #
210 # Functiononal class atom type specification for an atom corresponds to:
211 #
212 # Ar.CA.H.HBA.HBD.Hal.NI.PI.RA
213 #
214 # Default pharmacophore atom types [ Ref 71 ] to use for atom triplets fingerprint generation
215 # are: HBD, HBA, PI, NI, H, Ar
216 #
217 # FunctionalAtomTypes are assigned using the following definitions [ Ref 60-61, Ref 65-66 ]:
218 #
219 # HydrogenBondDonor: NH, NH2, OH
220 # HydrogenBondAcceptor: N[!H], O
221 # PositivelyIonizable: +, NH2
222 # NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH
223 #
224 sub _InitializeToplogicalPharmacophoreAtomTypesInformation {
225 my($This) = @_;
226
227 # Default pharmacophore atom types to use for atom triplets fingerprint generation
228 # are: HBD, HBA, PI, NI, H, Ar
229 #
230 @{$This->{AtomTypesToUse}} = ();
231 @{$This->{AtomTypesToUse}} = sort ('HBD', 'HBA', 'PI', 'NI', 'H', 'Ar');
232
233 return $This;
234 }
235
236 # Set atom types to use for atom triplets...
237 #
238 sub SetAtomTypesToUse {
239 my($This, @Values) = @_;
240 my($FirstValue, $TypeOfFirstValue, $AtomType, $SpecifiedAtomType, @SpecifiedAtomTypes, @AtomTypesToUse);
241
242 if (!@Values) {
243 carp "Warning: ${ClassName}->SetAtomTypesToUse: No values specified...";
244 return;
245 }
246
247 $FirstValue = $Values[0];
248 $TypeOfFirstValue = ref $FirstValue;
249
250 @SpecifiedAtomTypes = ();
251 @AtomTypesToUse = ();
252
253 if ($TypeOfFirstValue =~ /^ARRAY/) {
254 push @SpecifiedAtomTypes, @{$FirstValue};
255 }
256 else {
257 push @SpecifiedAtomTypes, @Values;
258 }
259
260 # Make sure specified AtomTypes are valid...
261 for $SpecifiedAtomType (@SpecifiedAtomTypes) {
262 if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedAtomType)) {
263 croak "Error: ${ClassName}->SetAtomTypesToUse: Specified atom type, $SpecifiedAtomType, is not supported...\n ";
264 }
265 $AtomType = $SpecifiedAtomType;
266 push @AtomTypesToUse, $AtomType;
267 }
268
269 # Set atom types to use...
270 @{$This->{AtomTypesToUse}} = ();
271 push @{$This->{AtomTypesToUse}}, sort @AtomTypesToUse;
272
273 return $This;
274 }
275
276 # Set minimum distance for pharmacophore atom pairs in atom triplets...
277 #
278 sub SetMinDistance {
279 my($This, $Value) = @_;
280
281 if (!TextUtil::IsPositiveInteger($Value)) {
282 croak "Error: ${ClassName}->SetMinDistance: MinDistance value, $Value, is not valid: It must be a positive integer...";
283 }
284 $This->{MinDistance} = $Value;
285
286 return $This;
287 }
288
289 # Set maximum distance for pharmacophore atom pairs in atom triplets...
290 #
291 sub SetMaxDistance {
292 my($This, $Value) = @_;
293
294 if (!TextUtil::IsPositiveInteger($Value)) {
295 croak "Error: ${ClassName}->SetMaxDistance: MaxDistance value, $Value, is not valid: It must be a positive integer...";
296 }
297 $This->{MaxDistance} = $Value;
298
299 return $This;
300 }
301
302 # Set distance bin size for binning pharmacophore atom pair distances in atom triplets...
303 #
304 sub SetDistanceBinSize {
305 my($This, $Value) = @_;
306
307 if (!TextUtil::IsPositiveInteger($Value)) {
308 croak "Error: ${ClassName}->SetDistanceBinSize: DistanceBinSize value, $Value, is not valid: It must be a positive integer...";
309 }
310 $This->{DistanceBinSize} = $Value;
311
312 return $This;
313 }
314
315 # Generate fingerprints description...
316 #
317 sub GetDescription {
318 my($This) = @_;
319
320 # Is description explicity set?
321 if (exists $This->{Description}) {
322 return $This->{Description};
323 }
324
325 # Generate fingerprints description...
326
327 return "$This->{Type}:$This->{AtomTripletsSetSizeToUse}:MinDistance$This->{MinDistance}:MaxDistance$This->{MaxDistance}";
328 }
329
330 # Generate topological pharmacophore atom triplets [ Ref 66, Ref 68-71 ] fingerprints...
331 #
332 # Let:
333 #
334 # P = Any of the supported pharmacophore atom types
335 #
336 # Px = Pharmacophore atom x
337 # Py = Pharmacophore atom y
338 # Pz = Pharmacophore atom z
339 #
340 # Dxy = Distance or lower bound of binned distance between Px and Py
341 # Dxz = Distance or lower bound of binned distance between Px and Pz
342 # Dyz = Distance or lower bound of binned distance between Py and Pz
343 #
344 # Then:
345 # PxDyz-PyDxz-PzDxy = Pharmacophore atom triplet ID for atoms Px, Py and Pz
346 #
347 # For example: H1-H1-H1, H2-HBA-H2 and so on
348 #
349 # Methodology:
350 # . Generate a distance matrix.
351 # . Using specified minimum, maximum and distance bin size, generate a binned distance
352 # matrix from distance matrix. The lower distance bound on the distance bin is used
353 # in the binned distance matrix and atom triplet IDs.
354 # . Assign pharmacophore atom types to all the atoms.
355 # . Initialize pharmacophore atom triplets basis set for all unique triplets constituting
356 # atom pairs binned distances between minimum and maximum distance.
357 # . Optionally, trinagle inequality is also implied which means:
358 # . Distance or binned distance between any two pairs in a triplet must be less than the
359 # sum of distances or binned distances between other two pairs and greater than the
360 # difference of distances between other pairs.
361 # . Using binned distance matrix and pharmacophore atom types, count occurance of
362 # unique atom triplets.
363 #
364 # Notes:
365 # . Hydrogen atoms are ignored during the fingerprint generation.
366 #
367 sub GenerateFingerprints {
368 my($This) = @_;
369
370 if ($This->{MinDistance} > $This->{MaxDistance}) {
371 croak "Error: ${ClassName}->GenerateTopologicalPharmacophoreAtomTripletsFingerprints: No fingerpritns generated: MinDistance, $This->{MinDistance}, must be <= MaxDistance, $This->{MaxDistance}...";
372 }
373
374 # Cache appropriate molecule data...
375 $This->_SetupMoleculeDataCache();
376
377 # Generate distance matrix...
378 if (!$This->_SetupDistanceMatrix()) {
379 carp "Warning: ${ClassName}->GenerateFingerprints: Fingerprints generation didn't succeed: Couldn't generate distance matrix...";
380 return $This;
381 }
382
383 # Generate binned distance matrix...
384 $This->_GenerateBinnedDistanceMatrix();
385
386 # Assign pharmacohore atom types to all heavy atoms...
387 $This->_AssignPharmacophoreAtomTypes();
388
389 # Initialize values of all possible pharmacohore atom triplets...
390 $This->_InitializePharmacophoreAtomTriplets();
391
392 # Count atom triplets...
393 $This->_CountPharmacohoreAtomTriplets();
394
395 # Set final fingerprints...
396 $This->_SetFinalFingerprints();
397
398 # Clear cached molecule data...
399 $This->_ClearMoleculeDataCache();
400
401 return $This;
402 }
403
404 # Setup distance matrix...
405 #
406 sub _SetupDistanceMatrix {
407 my($This) = @_;
408
409 $This->{DistanceMatrix} = $This->GetMolecule()->GetDistanceMatrix();
410
411 if (!$This->{DistanceMatrix}) {
412 return undef;
413 }
414
415 return $This;
416 }
417
418 # Generate binned distance matrix for distances with in the specified distance ranges...
419 #
420 sub _GenerateBinnedDistanceMatrix {
421 my($This) = @_;
422 my($DistanceMatrix, $BinnedDistanceMatrix, $NumOfRows, $NumOfCols, $RowIndex, $ColIndex, $SkipIndexCheck);
423
424 $DistanceMatrix = $This->{DistanceMatrix};
425 ($NumOfRows, $NumOfCols) = $DistanceMatrix->GetSize();
426
427 # Initialize binned distance matrix...
428 $BinnedDistanceMatrix = new Matrix($NumOfRows, $NumOfCols);
429
430 # Setup distance to binned distance map...
431 my($BinnedDistance, $Distance, %DistanceToBinnedDistance);
432 %DistanceToBinnedDistance = ();
433 for ($BinnedDistance = $This->{MinDistance}; $BinnedDistance <= $This->{MaxDistance}; $BinnedDistance += $This->{DistanceBinSize}) {
434 for $Distance ($BinnedDistance .. ($BinnedDistance + $This->{DistanceBinSize} - 1)) {
435 $DistanceToBinnedDistance{$Distance} = $BinnedDistance;
436 }
437 }
438
439 # Generate binned distance matrix...
440 $SkipIndexCheck = 0;
441 for $RowIndex (0 .. ($NumOfRows - 1) ) {
442 COLINDEX: for $ColIndex (($RowIndex + 1) .. ($NumOfCols - 1) ) {
443 $Distance = $DistanceMatrix->GetValue($RowIndex, $ColIndex, $SkipIndexCheck);
444 if ($Distance < $This->{MinDistance} || $Distance > $This->{MaxDistance}) {
445 next COLINDEX;
446 }
447 $BinnedDistance = $DistanceToBinnedDistance{$Distance};
448 $BinnedDistanceMatrix->SetValue($RowIndex, $ColIndex, $BinnedDistance, $SkipIndexCheck);
449 $BinnedDistanceMatrix->SetValue($ColIndex, $RowIndex, $BinnedDistance, $SkipIndexCheck);
450 }
451 }
452
453 $This->{BinnedDistanceMatrix} = $BinnedDistanceMatrix;
454
455 return $This;
456 }
457
458 # Assign pharmacohore atom types to all heavy atoms...
459 #
460 sub _AssignPharmacophoreAtomTypes {
461 my($This) = @_;
462 my($Atom, $AtomID, $AtomType, $FunctionalClassAtomTypes);
463
464 # Assign topological pharmacophore atom types...
465 $FunctionalClassAtomTypes = new AtomTypes::FunctionalClassAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => 1, 'FunctionalClassesToUse' => $This->{AtomTypesToUse});
466 $FunctionalClassAtomTypes->AssignAtomTypes();
467
468 %{$This->{AssignedAtomTypes}} = ();
469
470 ATOM: for $Atom (@{$This->{Atoms}}) {
471 if ($Atom->IsHydrogen()) {
472 next ATOM;
473 }
474 $AtomID = $Atom->GetID();
475
476 my(@AtomTypes);
477 @AtomTypes = ();
478
479 $AtomType = $FunctionalClassAtomTypes->GetAtomType($Atom);
480 if ($AtomType && $AtomType !~ /^None$/i) {
481 push @AtomTypes, split /\./, $AtomType;
482 }
483 # Assign phramacophore types list to atom...
484 $This->{AssignedAtomTypes}{$AtomID} = \@AtomTypes;
485 }
486 return $This;
487 }
488
489 # Initialize pharmacophore atom triplets basis set for all unique triplets constituting atom pairs
490 # binned distances between minimum and maximum distance and optionally applying triangle
491 # inequality. The DistanceBinSize determines the size of the distance bins. The lower distance
492 # bound, along with specified pharmacophore types, is used during generation of atom triplet
493 # IDs.
494 #
495 #
496 sub _InitializePharmacophoreAtomTriplets {
497 my($This) = @_;
498 my($AtomType1, $AtomType2, $AtomType3, $BinnedDistance12, $BinnedDistance13, $BinnedDistance23, $AtomTripletID);
499
500 # Initialize atom triplets information...
501 for ($BinnedDistance12 = $This->{MinDistance}; $BinnedDistance12 <= $This->{MaxDistance}; $BinnedDistance12 += $This->{DistanceBinSize}) {
502 for ($BinnedDistance13 = $This->{MinDistance}; $BinnedDistance13 <= $This->{MaxDistance}; $BinnedDistance13 += $This->{DistanceBinSize}) {
503 DISTANCE23: for ($BinnedDistance23 = $BinnedDistance12; $BinnedDistance23 <= $This->{MaxDistance}; $BinnedDistance23 += $This->{DistanceBinSize}) {
504 if ($This->{UseTriangleInequality} && !$This->_DoDistancesSatisfyTriangleInequality($BinnedDistance12, $BinnedDistance13, $BinnedDistance23)) {
505 next DISTANCE23;
506 }
507 for $AtomType1 (@{$This->{AtomTypesToUse}}) {
508 for $AtomType2 (@{$This->{AtomTypesToUse}}) {
509 ATOMTYPE3: for $AtomType3 (@{$This->{AtomTypesToUse}}) {
510 $AtomTripletID = $This->_GetAtomTripletID($AtomType1, $BinnedDistance23, $AtomType2, $BinnedDistance13, $AtomType3, $BinnedDistance12);
511 if (exists $This->{AtomTriplets}{Count}{$AtomTripletID}) {
512 next ATOMTYPE3;
513 }
514 # Unique atom triplets information...
515 push @{$This->{AtomTriplets}{IDs}}, $AtomTripletID;
516 $This->{AtomTriplets}{Count}{$AtomTripletID} = 0;
517 }
518 }
519 }
520 }
521 }
522 }
523 return $This;
524 }
525
526 # Check triangle inequality...
527 #
528 sub _DoDistancesSatisfyTriangleInequality {
529 my($This, $Distance1, $Distance2, $Distance3) = @_;
530
531 if ( !($Distance1 > abs($Distance2 - $Distance3) && $Distance1 < ($Distance2 + $Distance3)) ) {
532 return 0;
533 }
534 if ( !($Distance2 > abs($Distance1 - $Distance3) && $Distance2 < ($Distance1 + $Distance3)) ) {
535 return 0;
536 }
537 if ( !($Distance3 > abs($Distance1 - $Distance2) && $Distance3 < ($Distance1 + $Distance2)) ) {
538 return 0;
539 }
540 return 1;
541 }
542
543 # Count pharmacophore atom triplets...
544 #
545 sub _CountPharmacohoreAtomTriplets {
546 my($This) = @_;
547 my($NumOfAtoms, $AtomIndex1, $AtomIndex2, $AtomIndex3, $AtomID1, $AtomID2, $AtomID3, $AtomType1, $AtomType2, $AtomType3, $BinnedDistance12, $BinnedDistance13, $BinnedDistance23, $SkipIndexCheck, $BinnedDistanceMatrix, $AtomTripletID);
548
549 $NumOfAtoms = @{$This->{Atoms}};
550 $BinnedDistanceMatrix = $This->{BinnedDistanceMatrix};
551 $SkipIndexCheck = 0;
552
553 ATOMINDEX1: for $AtomIndex1 (0 .. ($NumOfAtoms - 1)) {
554 $AtomID1 = $This->{AtomIndexToID}{$AtomIndex1};
555 if ( !((exists($This->{AssignedAtomTypes}{$AtomID1}) && @{$This->{AssignedAtomTypes}{$AtomID1}})) ) {
556 next ATOMINDEX1;
557 }
558
559 ATOMINDEX2: for $AtomIndex2 (($AtomIndex1 + 1) .. ($NumOfAtoms - 1)) {
560 $AtomID2 = $This->{AtomIndexToID}{$AtomIndex2};
561 if ( !((exists($This->{AssignedAtomTypes}{$AtomID2}) && @{$This->{AssignedAtomTypes}{$AtomID2}})) ) {
562 next ATOMINDEX2;
563 }
564 $BinnedDistance12 = $BinnedDistanceMatrix->GetValue($AtomIndex1, $AtomIndex2, $SkipIndexCheck);
565 if ($BinnedDistance12 == 0) {
566 next ATOMINDEX2;
567 }
568
569 ATOMINDEX3: for $AtomIndex3 (($AtomIndex2 + 1) .. ($NumOfAtoms - 1)) {
570 $AtomID3 = $This->{AtomIndexToID}{$AtomIndex3};
571 if ( !((exists($This->{AssignedAtomTypes}{$AtomID3}) && @{$This->{AssignedAtomTypes}{$AtomID3}})) ) {
572 next ATOMINDEX3;
573 }
574 $BinnedDistance13 = $BinnedDistanceMatrix->GetValue($AtomIndex1, $AtomIndex3, $SkipIndexCheck);
575 $BinnedDistance23 = $BinnedDistanceMatrix->GetValue($AtomIndex2, $AtomIndex3, $SkipIndexCheck);
576 if ($BinnedDistance13 == 0 || $BinnedDistance23 == 0) {
577 next ATOMINDEX3;
578 }
579 if ($This->{UseTriangleInequality} && !$This->_DoDistancesSatisfyTriangleInequality($BinnedDistance12, $BinnedDistance13, $BinnedDistance23)) {
580 next ATOMINDEX3;
581 }
582
583 # Go over possible pharmacohore triplets for the three pharmacophore atoms using the
584 # binned distances...
585 for $AtomType1 (@{$This->{AssignedAtomTypes}{$AtomID1}}) {
586 for $AtomType2 (@{$This->{AssignedAtomTypes}{$AtomID2}}) {
587 for $AtomType3 (@{$This->{AssignedAtomTypes}{$AtomID3}}) {
588 $AtomTripletID = $This->_GetAtomTripletID($AtomType1, $BinnedDistance23, $AtomType2, $BinnedDistance13, $AtomType3, $BinnedDistance12);
589 $This->{AtomTriplets}{Count}{$AtomTripletID} += 1;
590 }
591 }
592 }
593 }
594 }
595 }
596 return $This;
597 }
598
599 # Set final fingerpritns vector...
600 #
601 sub _SetFinalFingerprints {
602 my($This) = @_;
603 my($UseArbitrarySetSize, $ID, $Value, @IDs, @Values);
604
605 # Mark successful generation of fingerprints...
606 $This->{FingerprintsGenerated} = 1;
607
608 # Is it an ArbitraySize atom triplets set size?
609 $UseArbitrarySetSize = $This->{AtomTripletsSetSizeToUse} =~ /^ArbitrarySize$/i ? 1 : 0;
610
611 # Set atom triplet count values...
612 @IDs = (); @Values = ();
613
614 if ($UseArbitrarySetSize) {
615 ID: for $ID (@{$This->{AtomTriplets}{IDs}}) {
616 $Value = $This->{AtomTriplets}{Count}{$ID};
617 if ($Value == 0) {
618 next ID;
619 }
620 push @IDs, $ID;
621 push @Values, $Value;
622 }
623 }
624 else {
625 @Values = map { $This->{AtomTriplets}{Count}{$_} } @{$This->{AtomTriplets}{IDs}};
626 }
627
628 # Set atom triplet IDs for fingerprint vector...
629 if ($UseArbitrarySetSize) {
630 $This->{FingerprintsVector}->AddValueIDs(\@IDs);
631 }
632 else {
633 $This->{FingerprintsVector}->AddValueIDs(\@{$This->{AtomTriplets}{IDs}});
634 }
635
636 # Set atom triplets count values for fingerprint vector...
637 $This->{FingerprintsVector}->AddValues(\@Values);
638
639 return $This;
640 }
641
642 # Return an array or reference to an array containing atom triplet IDs...
643 #
644 sub GetAtomTripletIDs {
645 my($This) = @_;
646
647 return wantarray ? @{$This->{AtomTriplets}{IDs}} : \@{$This->{AtomTriplets}{IDs}};
648 }
649
650 # Get pharmacophore atom triplet ID corresponding to atom types and distances
651 # corresponding to atom triplet...
652 #
653 sub _GetAtomTripletID {
654 my($This, $Px, $Dyz, $Py, $Dxz, $Pz, $Dxy) = @_;
655 my($AtomTripletID, @AtomIDs);
656
657 @AtomIDs = ();
658
659 @AtomIDs = sort("${Px}${Dyz}", "${Py}${Dxz}", "${Pz}${Dxy}");
660 $AtomTripletID = join "-", @AtomIDs;
661
662 return $AtomTripletID;
663 }
664
665 # Cache appropriate molecule data...
666 #
667 sub _SetupMoleculeDataCache {
668 my($This) = @_;
669
670 # Get all atoms including hydrogens to correctly map atom indices to atom IDs for
671 # usage of distance matrix. The hydrogen atoms are ignored during processing...
672 #
673 @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms();
674
675 # Get all atom IDs...
676 my(@AtomIDs);
677 @AtomIDs = ();
678 @AtomIDs = map { $_->GetID() } @{$This->{Atoms}};
679
680 # Set AtomIndex to AtomID hash...
681 %{$This->{AtomIndexToID}} = ();
682 @{$This->{AtomIndexToID}}{ (0 .. $#AtomIDs) } = @AtomIDs;
683
684 return $This;
685 }
686
687 # Clear cached molecule data...
688 #
689 sub _ClearMoleculeDataCache {
690 my($This) = @_;
691
692 @{$This->{Atoms}} = ();
693
694 return $This;
695 }
696
697
698 # Return a string containg data for TopologicalPharmacophoreAtomTripletsFingerprints object...
699 #
700 sub StringifyTopologicalPharmacophoreAtomTripletsFingerprints {
701 my($This) = @_;
702 my($FingerprintsString, $UseTriangleInequality);
703
704 # Type of fingerprint...
705 $FingerprintsString = "Fingerprint type: $This->{Type}; AtomTripletsSetSizeToUse: $This->{AtomTripletsSetSizeToUse}";
706
707 # Distances information...
708 $FingerprintsString .= "; MinDistance: $This->{MinDistance}; MaxDistance: $This->{MaxDistance}; DistanceBinSize: $This->{DistanceBinSize}; UseTriangleInequality: " . ($This->{UseTriangleInequality} ? "Yes" : "No");
709
710 # Pharmacophore atom type labels and description...
711 my($AtomType, @AtomTypes, @AtomTypesOrder, %AvailableAtomTypes);
712
713 @AtomTypesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder();
714 %AvailableAtomTypes = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses();
715
716 @AtomTypes = ();
717 for $AtomType (@AtomTypesOrder) {
718 push @AtomTypes, "$AtomType: $AvailableAtomTypes{$AtomType}";
719 }
720
721 $FingerprintsString .= "; AtomTypesToUse: <" . TextUtil::JoinWords(\@{$This->{AtomTypesToUse}}, ", ", 0) . ">";
722 $FingerprintsString .= "; AtomTypesOrder: <" . TextUtil::JoinWords(\@AtomTypesOrder, ", ", 0) . ">";
723 $FingerprintsString .= "; AvailableAtomTypes: <" . TextUtil::JoinWords(\@AtomTypes, ", ", 0) . ">";
724
725 # Total number of pharmacophore atom triplets...
726 $FingerprintsString .= "; NumOfAtomTriplets: " . $This->{FingerprintsVector}->GetNumOfValues();
727
728 # FingerprintsVector...
729 $FingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >";
730
731 return $FingerprintsString;
732 }
733
734 1;
735
736 __END__
737
738 =head1 NAME
739
740 TopologicalPharmacophoreAtomTripletsFingerprints
741
742 =head1 SYNOPSIS
743
744 use Fingerprints::TopologicalPharmacophoreAtomTripletsFingerprints;
745
746 use Fingerprints::TopologicalPharmacophoreAtomTripletsFingerprints qw(:all);
747
748 =head1 DESCRIPTION
749
750 B<TopologicalPharmacophoreAtomTripletsFingerprints> [ Ref 66, Ref 68-71 ] class provides
751 the following methods:
752
753 new, GenerateFingerprints, , GetDescription, GetAtomTripletIDs,
754 SetAtomTypesToUse, SetDistanceBinSize, SetMaxDistance, SetMinDistance,
755 StringifyTopologicalPharmacophoreAtomTripletsFingerprints
756
757 B<TopologicalPharmacophoreAtomTripletsFingerprints> is derived from B<Fingerprints> class
758 which in turn is derived from B<ObjectProperty> base class that provides methods not explicitly
759 defined in B<TopologicalPharmacophoreAtomTripletsFingerprints>, B<Fingerprints> or B<ObjectProperty>
760 classes using Perl's AUTOLOAD functionality. These methods are generated on-the-fly for a specified
761 object property:
762
763 Set<PropertyName>(<PropertyValue>);
764 $PropertyValue = Get<PropertyName>();
765 Delete<PropertyName>();
766
767 Based on the values specified for B<AtomTypesToUse>, pharmacophore atom types are
768 assigned to all non-hydrogen atoms in a molecule and a distance matrix is generated.
769 Using B<MinDistance>, B<MaxDistance>, and B<DistanceBinSize> values, a
770 binned distance matrix is generated with lower bound on the distance bin as the distance
771 in distance matrix; the lower bound on the distance bin is also used as the distance between
772 atom pairs for generation of atom triplet identifiers.
773
774 A pharmacophore atom triplets basis set is generated for all unique atom triplets constituting
775 atom pairs binned distances between B<--MinDistance> and B<--MaxDistance>. The value
776 of B<--UseTriangleInequality> determines whether the triangle inequality test is applied during
777 generation of atom triplets basis set. The lower distance bound, along with specified pharmacophore
778 types, is used during generation of atom triplet IDs.
779
780 Let:
781
782 P = Valid pharmacophore atom type
783
784 Px = Pharmacophore atom x
785 Py = Pharmacophore atom y
786 Pz = Pharmacophore atom z
787
788 Dmin = Minimum distance corresponding to number of bonds between two atoms
789 Dmax = Maximum distance corresponding to number of bonds between two atoms
790 D = Distance corresponding to number of bonds between two atom
791
792 Bsize = Distance bin size
793 Nbins = Number of distance bins
794
795 Dxy = Distance or lower bound of binned distance between Px and Py
796 Dxz = Distance or lower bound of binned distance between Px and Pz
797 Dyz = Distance or lower bound of binned distance between Py and Pz
798
799 Then:
800
801 PxDyz-PyDxz-PzDxy = Pharmacophore atom triplet IDs for atom types Px,
802 Py, and Pz
803
804 For example: H1-H1-H1, H2-HBA-H2 and so on.
805
806 For default values of Dmin = 1 , Dmax = 10 and Bsize = 2, the number of
807 distance bins, Nbins = 5, are:
808
809 [1, 2] [3, 4] [5, 6] [7, 8] [9 10]
810
811 and atom triplet basis set size is 2692.
812
813 Atom triplet basis set size for various values of Dmin, Dmax and Bsize in
814 conjunction with usage of triangle inequality is:
815
816 Dmin Dmax Bsize UseTriangleInequality TripletBasisSetSize
817 1 10 2 No 4960
818 1 10 2 Yes 2692 [ Default ]
819 2 12 2 No 8436
820 2 12 2 Yes 4494
821
822
823 Using binned distance matrix and pharmacohore atom types, occurrence of unique pharmacohore
824 atom triplets is counted.
825
826 The final pharmacophore atom triples count along with atom pair identifiers involving all non-hydrogen
827 atoms constitute pharmacophore topological atom triplets fingerprints of the molecule.
828
829 For I<ArbitrarySize> value of B<AtomTripletsSetSizeToUse>, the fingerprint vector correspond to
830 only those topological pharmacophore atom triplets which are present and have non-zero count. However,
831 for I<FixedSize> value of B<AtomTripletsSetSizeToUse>, the fingerprint vector contains all possible
832 valid topological pharmacophore atom triplets with both zero and non-zero count values.
833
834 The current release of MayaChemTools generates the following types of topological pharmacophore
835 atom triplets fingerprints vector strings:
836
837 FingerprintsVector;TopologicalPharmacophoreAtomTriplets:ArbitrarySize:
838 MinDistance1:MaxDistance10;696;NumericalValues;IDsAndValuesString;Ar1-
839 Ar1-Ar1 Ar1-Ar1-H1 Ar1-Ar1-HBA1 Ar1-Ar1-HBD1 Ar1-H1-H1 Ar1-H1-HBA1 Ar1
840 -H1-HBD1 Ar1-HBA1-HBD1 H1-H1-H1 H1-H1-HBA1 H1-H1-HBD1 H1-HBA1-HBA1 H1-
841 HBA1-HBD1 H1-HBA1-NI1 H1-HBD1-NI1 HBA1-HBA1-NI1 HBA1-HBD1-NI1 Ar1-...;
842 46 106 8 3 83 11 4 1 21 5 3 1 2 2 1 1 1 100 101 18 11 145 132 26 14 23
843 28 3 3 5 4 61 45 10 4 16 20 7 5 1 3 4 5 3 1 1 1 1 5 4 2 1 2 2 2 1 1 1
844 119 123 24 15 185 202 41 25 22 17 3 5 85 95 18 11 23 17 3 1 1 6 4 ...
845
846 FingerprintsVector;TopologicalPharmacophoreAtomTriplets:FixedSize:MinD
847 istance1:MaxDistance10;2692;OrderedNumericalValues;ValuesString;46 106
848 8 3 0 0 83 11 4 0 0 0 1 0 0 0 0 0 0 0 0 21 5 3 0 0 1 2 2 0 0 1 0 0 0
849 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 100 101 18 11 0 0 145 132 26
850 14 0 0 23 28 3 3 0 0 5 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 61 45 10 4 0
851 0 16 20 7 5 1 0 3 4 5 3 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 5 ...
852
853 FingerprintsVector;TopologicalPharmacophoreAtomTriplets:FixedSize:MinD
854 istance1:MaxDistance10;2692;OrderedNumericalValues;IDsAndValuesString;
855 Ar1-Ar1-Ar1 Ar1-Ar1-H1 Ar1-Ar1-HBA1 Ar1-Ar1-HBD1 Ar1-Ar1-NI1 Ar1-Ar1-P
856 I1 Ar1-H1-H1 Ar1-H1-HBA1 Ar1-H1-HBD1 Ar1-H1-NI1 Ar1-H1-PI1 Ar1-HBA1-HB
857 A1 Ar1-HBA1-HBD1 Ar1-HBA1-NI1 Ar1-HBA1-PI1 Ar1-HBD1-HBD1 Ar1-HBD1-...;
858 46 106 8 3 0 0 83 11 4 0 0 0 1 0 0 0 0 0 0 0 0 21 5 3 0 0 1 2 2 0 0 1
859 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 100 101 18 11 0 0 145
860 132 26 14 0 0 23 28 3 3 0 0 5 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 61 ...
861
862 =head2 METHODS
863
864 =over 4
865
866 =item B<new>
867
868 $TPATFP = new TopologicalPharmacophoreAtomTripletsFingerprints(
869 %NamesAndValues);
870
871 Using specified I<TopologicalPharmacophoreAtomTripletsFingerprints> property names and values hash, B<new>
872 method creates a new object and returns a reference to newly created B<TopologicalPharmacophoreAtomTripletsFingerprints>
873 object. By default, the following properties are initialized:
874
875 Molecule = ''
876 Type = 'TopologicalPharmacophoreAtomTriplets'
877 MinDistance = 1
878 MaxDistance = 10
879 DistanceBinSize = 2
880 UseTriangleInequality = 1
881 AtomTypesToUse = ['HBD', 'HBA', 'PI', 'NI', 'H', 'Ar']
882
883 Examples:
884
885 $TPATFP = new TopologicalPharmacophoreAtomTripletsFingerprints(
886 'Molecule' => $Molecule);
887
888 $TPATFP = new TopologicalPharmacophoreAtomTripletsFingerprints(
889 'Molecule' => $Molecule,
890 'AtomTripletsSetSizeToUse' => 'ArbitrarySize';
891 'MinDistance' => 1,
892 'MaxDistance' => 10,
893 'DistanceBinSize' => 2,
894 'AtomTypesToUse' => ['HBD', 'HBA', 'PI', 'NI', 'H', 'Ar'],
895 'UseTriangleInequality' => 1);
896
897 $TPATFP = new TopologicalPharmacophoreAtomTripletsFingerprints(
898 'Molecule' => $Molecule,
899 'AtomTripletsSetSizeToUse' => 'FixedSize';
900 'MinDistance' => 1,
901 'MaxDistance' => 10,
902 'DistanceBinSize' => 2,
903 'AtomTypesToUse' => ['HBD', 'HBA', 'PI', 'NI', 'H', 'Ar'],
904 'UseTriangleInequality' => 1);
905
906 $TPATFP->GenerateFingerprints();
907 print "$TPATFP\n";
908
909 =item B<GetDescription>
910
911 $Description = $TopologicalPharmacophoreAtomTripletsFP->GetDescription();
912
913 Returns a string containing description of topological pharmacophore atom triplets fingerprints.
914
915 =item B<GenerateFingerprints>
916
917 $TopologicalPharmacophoreAtomTripletsFP->GenerateFingerprints();
918
919 Generates topological pharmacophore atom triplets fingerprints and returns
920 I<TopologicalPharmacophoreAtomTripletsFP>.
921
922 =item B<GetAtomTripletIDs>
923
924 $AtomTripletsIDsRef = $TopologicalPharmacophoreATFP->GetAtomTripletIDs();
925 @AtomTripletIDs = $TopologicalPharmacophoreATFP->GetAtomTripletIDs();
926
927 Returns atom triplet IDs corresponding to atom pairs count values in topological pharmacophore
928 atom triplet fingerprints vector as an array or reference to an array.
929
930 =item B<AtomTripletsSetSizeToUse>
931
932 $TPAFP->AtomTripletsSetSizeToUse($Values);
933
934 Sets pharmacophore atom triplets set size to use for topological pharmacophore fingerprints
935 generation and returns I<TopologicalPharmacophoreAtomTripletsFingerprints>.
936
937 Possible values for pharmacophore atom triplets set size are: I<ArbitrarySize, FizedSize>.
938 Default value: I<ArbitrarySize>.
939
940 For I<ArbitrarySize> value of B<AtomTripletsSetSizeToUse>, the fingerprint vector correspond to
941 only those topological pharmacophore atom triplets which are present and have non-zero count. However,
942 for I<FixedSize> value of B<AtomTripletsSetSizeToUse>, the fingerprint vector contains all possible
943 valid topological pharmacophore atom triplets with both zero and non-zero count values.
944
945 =item B<SetAtomTypesToUse>
946
947 $TopologicalPharmacophoreAtomTripletsFP->SetAtomTypesToUse($ValuesRef);
948 $TopologicalPharmacophoreAtomTripletsFP->SetAtomTypesToUse(@Values);
949
950 Sets pharmacophore atom types to use for topological pharmacophore fingerprints
951 generation and returns I<TopologicalPharmacophoreAtomTripletsFingerprints>.
952
953 Possible values for pharmacophore atom types are: I<Ar, CA, H, HBA, HBD, Hal, NI, PI, RA>.
954 Default value [ Ref 71 ] : I<HBD,HBA,PI,NI,H,Ar>.
955
956 The pharmacophore atom types abbreviations correspond to:
957
958 HBD: HydrogenBondDonor
959 HBA: HydrogenBondAcceptor
960 PI : PositivelyIonizable
961 NI : NegativelyIonizable
962 Ar : Aromatic
963 Hal : Halogen
964 H : Hydrophobic
965 RA : RingAtom
966 CA : ChainAtom
967
968 I<AtomTypes::FunctionalClassAtomTypes> module is used to assign pharmacophore atom
969 types. It uses following definitions [ Ref 60-61, Ref 65-66 ]:
970
971 HydrogenBondDonor: NH, NH2, OH
972 HydrogenBondAcceptor: N[!H], O
973 PositivelyIonizable: +, NH2
974 NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH
975
976
977 =item B<SetDistanceBinSize>
978
979 $TopologicalPharmacophoreAtomTripletsFP->SetDistanceBinSize($Value);
980
981 Sets distance bin size used to bin distances between atom pairs in atom triplets and returns
982 I<TopologicalPharmacophoreAtomTriplesFP>.
983
984 For default B<MinDistance> and B<MaxDistance> values of 1 and 10 with B<DistanceBinSize>
985 of 2 [ Ref 70 ], the following 5 distance bins are generated:
986
987 [1, 2] [3, 4] [5, 6] [7, 8] [9 10]
988
989 The lower distance bound on the distance bin is uses to bin the distance between atom pairs in
990 atom triplets. So in the previous example, atom pairs with distances 1 and 2 fall in first distance
991 bin, atom pairs with distances 3 and 4 fall in second distance bin and so on.
992
993 In order to distribute distance bins of equal size, the last bin is allowed to go past B<MaxDistance>
994 by up to distance bin size. For example, B<MinDistance> and B<MaxDistance> values of 2 and 10
995 with B<DistanceBinSize> of 2 generates the following 6 distance bins:
996
997 [2, 3] [4, 5] [6, 7] [8, 9] [10 11]
998
999
1000 =item B<SetMaxDistance>
1001
1002 $TopologicalPharmacophoreAtomTriplesFP->SetMaxDistance($Value);
1003
1004 Sets maximum bond distance between atom pairs corresponding to atom triplets for
1005 generating topological pharmacophore atom triplets fingerprints and returns
1006 I<TopologicalPharmacophoreAtomTriplesFP>.
1007
1008 =item B<SetMinDistance>
1009
1010 $TopologicalPharmacophoreAtomTriplesFP->SetMinDistance($Value);
1011
1012 Sets minimum bond distance between atom pairs corresponding to atom triplets for
1013 generating topological pharmacophore atom triplets fingerprints and returns
1014 I<TopologicalPharmacophoreAtomTriplesFP>.
1015
1016 =item B<StringifyTopologicalPharmacophoreAtomTripletsFingerprints>
1017
1018 $String = $TopologicalPharmacophoreAtomTripletsFingerprints->
1019 StringifyTopologicalPharmacophoreAtomTripletsFingerprints();
1020
1021 Returns a string containing information about I<TopologicalPharmacophoreAtomTripletsFingerprints> object.
1022
1023 =back
1024
1025 =head1 AUTHOR
1026
1027 Manish Sud <msud@san.rr.com>
1028
1029 =head1 SEE ALSO
1030
1031 Fingerprints.pm, FingerprintsStringUtil.pm, AtomNeighborhoodsFingerprints.pm,
1032 AtomTypesFingerprints.pm, EStateIndiciesFingerprints.pm, ExtendedConnectivityFingerprints.pm,
1033 MACCSKeys.pm, PathLengthFingerprints.pm, TopologicalAtomPairsFingerprints.pm,
1034 TopologicalAtomTripletsFingerprints.pm, TopologicalAtomTorsionsFingerprints.pm,
1035 TopologicalPharmacophoreAtomPairsFingerprints.pm,
1036
1037 =head1 COPYRIGHT
1038
1039 Copyright (C) 2015 Manish Sud. All rights reserved.
1040
1041 This file is part of MayaChemTools.
1042
1043 MayaChemTools is free software; you can redistribute it and/or modify it under
1044 the terms of the GNU Lesser General Public License as published by the Free
1045 Software Foundation; either version 3 of the License, or (at your option)
1046 any later version.
1047
1048 =cut