comparison mayachemtool/mayachemtools/lib/Fingerprints/TopologicalPharmacophoreAtomPairsFingerprints.pm @ 0:a4a2ad5a214e draft default tip

Uploaded
author deepakjadmin
date Thu, 05 Nov 2015 02:37:56 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:a4a2ad5a214e
1 package Fingerprints::TopologicalPharmacophoreAtomPairsFingerprints;
2 #
3 # $RCSfile: TopologicalPharmacophoreAtomPairsFingerprints.pm,v $
4 # $Date: 2015/02/28 20:48:54 $
5 # $Revision: 1.34 $
6 #
7 # Author: Manish Sud <msud@san.rr.com>
8 #
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
10 #
11 # This file is part of MayaChemTools.
12 #
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
14 # the terms of the GNU Lesser General Public License as published by the Free
15 # Software Foundation; either version 3 of the License, or (at your option) any
16 # later version.
17 #
18 # MayaChemTools is distributed in the hope that it will be useful, but without
19 # any warranty; without even the implied warranty of merchantability of fitness
20 # for a particular purpose. See the GNU Lesser General Public License for more
21 # details.
22 #
23 # You should have received a copy of the GNU Lesser General Public License
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
26 # Boston, MA, 02111-1307, USA.
27 #
28
29 use strict;
30 use Carp;
31 use Exporter;
32 use Fingerprints::Fingerprints;
33 use TextUtil ();
34 use MathUtil ();
35 use Molecule;
36 use AtomTypes::FunctionalClassAtomTypes;
37
38 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
39
40 @ISA = qw(Fingerprints::Fingerprints Exporter);
41 @EXPORT = qw();
42 @EXPORT_OK = qw();
43
44 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]);
45
46 # Setup class variables...
47 my($ClassName);
48 _InitializeClass();
49
50 # Overload Perl functions...
51 use overload '""' => 'StringifyTopologicalPharmacophoreAtomPairsFingerprints';
52
53 # Class constructor...
54 sub new {
55 my($Class, %NamesAndValues) = @_;
56
57 # Initialize object...
58 my $This = $Class->SUPER::new();
59 bless $This, ref($Class) || $Class;
60 $This->_InitializeTopologicalPharmacophoreAtomPairsFingerprints();
61
62 $This->_InitializeTopologicalPharmacophoreAtomPairsFingerprintsProperties(%NamesAndValues);
63
64 return $This;
65 }
66
67 # Initialize object data...
68 #
69 sub _InitializeTopologicalPharmacophoreAtomPairsFingerprints {
70 my($This) = @_;
71
72 # Type of fingerprint...
73 $This->{Type} = 'TopologicalPharmacophoreAtomPairs';
74
75 # Type of vector...
76 $This->{VectorType} = 'FingerprintsVector';
77
78 # AtomPairsSetSizeToUse...
79 #
80 # ArbitrarySize - Corrresponds to atom pairs with non-zero count
81 # FixedSize - Corresponds to all atom pairs with zero and non-zero count
82 #
83 # Possible values: ArbitrarySize or FixedSize. Default: ArbitrarySize
84 #
85 $This->{AtomPairsSetSizeToUse} = '';
86
87 # Type of FingerprintsVector...
88 #
89 # OrderedNumericalValues - For ArbitrarySize value of AtomPairsSetSizeToUse
90 # NumericalValues - For FixedSize value of AtomPairsSetSizeToUse
91 #
92 # Possible values: OrderedNumericalValues or NumericalValues. Default: NumericalValues
93 #
94 $This->{FingerprintsVectorType} = '';
95
96 # Vector values precision for real values which might be generated after
97 # normalization and fuzzification...
98 $This->{ValuesPrecision} = 2;
99
100 # Minimum and maximum bond distance between pharmacophore atom paris...
101 $This->{MinDistance} = 1;
102 $This->{MaxDistance} = 10;
103
104 # Initialize atom types and weight information...
105 $This->_InitializePharmacophoreAtomTypesAndWeightInformation();
106
107 # Normalization methodology to use for scaling the occurance count of pharmacophore atom
108 # pairs at various distances.
109 #
110 # Possible values: None, ByHeavyAtomsCount, ByAtomTypesCount. Default: None
111 #
112 $This->{NormalizationMethodology} = 'None';
113
114 # Initialize fuzzification parameters...
115 #
116 $This->_InitializeFuzzificationInformation();
117
118 # Pharmacophore types assigned to each heavy atom...
119 #
120 %{$This->{AssignedAtomTypes}} = ();
121
122 # Assigned Atom types count of each type in the molecule...
123 #
124 %{$This->{AssignedAtomTypesCount}} = ();
125
126 # All pharmacophore atom pairs between minimum and maximum distance...
127 #
128 @{$This->{AtomPairsIDs}} = ();
129 %{$This->{AtomPairsCount}} = ();
130 }
131
132 # Inialize pharmacophore atom types and weight information...
133 #
134 sub _InitializePharmacophoreAtomTypesAndWeightInformation {
135 my($This) = @_;
136
137 # Default pharmacophore atom types to use for atom pairs fingerprint generation
138 # are: HBD, HBA, PI, NI, H
139 #
140 @{$This->{AtomTypesToUse}} = ();
141 @{$This->{AtomTypesToUse}} = sort ('HBD', 'HBA', 'PI', 'NI', 'H');
142
143 # Weight of the various pharmacophore atom types to use for their contribution to atom
144 # pair interaction. It allows to increase the importance of specific pharmacophore atom
145 # types in the generted fingerprints.
146 #
147 # A value of 0 eliminates the contribution by a particular pharmacophore atom
148 # type and 2 doubles its contribution.
149 #
150 my($AtomType, %AvailableAtomTypes);
151
152 %AvailableAtomTypes = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses();
153
154 %{$This->{AtomTypesWeight}} = ();
155 for $AtomType (keys %AvailableAtomTypes) {
156 $This->{AtomTypesWeight}{$AtomType} = 1;
157 }
158 return $This;
159 }
160
161 # Initialize fuzzification information...
162 #
163 sub _InitializeFuzzificationInformation {
164 my($This) = @_;
165
166 # To fuzz or not to fuzz atom pairs count. Default: No fuzzication
167 #
168 $This->{FuzzifyAtomPairsCount} = 0;
169
170 # When to fuzz atom pair count...
171 #
172 # Possible values: BeforeNormalization or AfterNormalization. Default: AfterNormalization
173 #
174 $This->{FuzzificationMode} = 'AfterNormalization';
175
176 # How to fuzz atom pair count...
177 #
178 # Possible values: FuzzyBinning or FuzzyBinSmoothing. Default: FuzzyBinning
179 #
180 $This->{FuzzificationMethodology} = 'FuzzyBinning';
181
182 # By how much to fuzz atom pairs count...
183 #
184 $This->{FuzzFactor} = 0.15;
185
186 return $This;
187 }
188
189 # Initialize class ...
190 sub _InitializeClass {
191 #Class name...
192 $ClassName = __PACKAGE__;
193 }
194
195 # Initialize object properties....
196 sub _InitializeTopologicalPharmacophoreAtomPairsFingerprintsProperties {
197 my($This, %NamesAndValues) = @_;
198
199 my($Name, $Value, $MethodName);
200 while (($Name, $Value) = each %NamesAndValues) {
201 $MethodName = "Set${Name}";
202 $This->$MethodName($Value);
203 }
204
205 # Make sure molecule object was specified...
206 if (!exists $NamesAndValues{Molecule}) {
207 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule...";
208 }
209
210 $This->_InitializeTopologicalPharmacophoreAtomPairsFingerprintsVector();
211
212 return $This;
213 }
214
215 # Initialize fingerprints vector...
216 #
217 sub _InitializeTopologicalPharmacophoreAtomPairsFingerprintsVector {
218 my($This) = @_;
219
220 if (!$This->{AtomPairsSetSizeToUse}) {
221 $This->{AtomPairsSetSizeToUse} = 'ArbitrarySize';
222 }
223
224 # Vector type and type of values...
225 $This->{VectorType} = 'FingerprintsVector';
226
227 if ($This->{AtomPairsSetSizeToUse} =~ /^FixedSize$/i) {
228 $This->{FingerprintsVectorType} = 'OrderedNumericalValues';
229 }
230 else {
231 $This->{FingerprintsVectorType} = 'NumericalValues';
232 }
233
234 $This->_InitializeFingerprintsVector();
235 }
236
237 # Set atom parits set size to use...
238 #
239 sub SetAtomPairsSetSizeToUse {
240 my($This, $Value) = @_;
241
242 if ($This->{AtomPairsSetSizeToUse}) {
243 croak "Error: ${ClassName}->SetAtomPairsSetSizeToUse: Can't change size: It's already set...";
244 }
245
246 if ($Value !~ /^(ArbitrarySize|FixedSize)$/i) {
247 croak "Error: ${ClassName}->SetAtomPairsSetSizeToUse: Unknown AtomPairsSetSizeToUse value: $Value; Supported values: ArbitrarySize or FixedSize";
248 }
249
250 $This->{AtomPairsSetSizeToUse} = $Value;
251
252 return $This;
253 }
254
255 # Disable change of AvailableAtomTypes...
256 #
257 sub SetAvailableAtomTypes {
258 my($This) = @_;
259
260 carp "Warning: ${ClassName}->SetAvailableAtomTypes: AvailableAtomTypes value can't be set...";
261
262 return $This;
263 }
264
265 # Set atom types to use for atom pairs...
266 #
267 sub SetAtomTypesToUse {
268 my($This, @Values) = @_;
269 my($FirstValue, $TypeOfFirstValue, $AtomType, $SpecifiedAtomType, @SpecifiedAtomTypes, @AtomTypesToUse);
270
271 if (!@Values) {
272 carp "Warning: ${ClassName}->SetAtomTypesToUse: No values specified...";
273 return;
274 }
275
276 $FirstValue = $Values[0];
277 $TypeOfFirstValue = ref $FirstValue;
278
279 @SpecifiedAtomTypes = ();
280 @AtomTypesToUse = ();
281
282 if ($TypeOfFirstValue =~ /^ARRAY/) {
283 push @SpecifiedAtomTypes, @{$FirstValue};
284 }
285 else {
286 push @SpecifiedAtomTypes, @Values;
287 }
288
289 # Make sure specified AtomTypes are valid...
290 for $SpecifiedAtomType (@SpecifiedAtomTypes) {
291 if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedAtomType)) {
292 croak "Error: ${ClassName}->SetAtomTypesToUse: Specified atom type, $SpecifiedAtomType, is not supported...\n ";
293 }
294 $AtomType = $SpecifiedAtomType;
295 push @AtomTypesToUse, $AtomType;
296 }
297
298 # Set atom types to use...
299 @{$This->{AtomTypesToUse}} = ();
300 push @{$This->{AtomTypesToUse}}, sort @AtomTypesToUse;
301
302 return $This;
303 }
304
305 # Set vector values precision for real values which might be generated after
306 # normalization and fuzzification...
307 #
308 sub SetValuesPrecision {
309 my($This, $Value) = @_;
310
311 if (!TextUtil::IsPositiveInteger($Value)) {
312 croak "Error: ${ClassName}->SetValuesPrecision: ValuesPrecision value, $Value, is not valid: It must be a positive integer...";
313 }
314 $This->{ValuesPrecision} = $Value;
315
316 return $This;
317 }
318
319 # Set minimum distance for pharmacophore atom pairs...
320 #
321 sub SetMinDistance {
322 my($This, $Value) = @_;
323
324 if (!TextUtil::IsInteger($Value)) {
325 croak "Error: ${ClassName}->SetMinDistance: MinDistance value, $Value, is not valid: It must be an integer...";
326 }
327 $This->{MinDistance} = $Value;
328
329 return $This;
330 }
331
332 # Set maximum distance for pharmacophore atom pairs...
333 #
334 sub SetMaxDistance {
335 my($This, $Value) = @_;
336
337 if (!TextUtil::IsPositiveInteger($Value)) {
338 croak "Error: ${ClassName}->SetMaxDistance: MaxDistance value, $Value, is not valid: It must be a positive integer...";
339 }
340 $This->{MaxDistance} = $Value;
341
342 return $This;
343 }
344
345 # Set normalization methodology to use for scaling the occurance count of pharmacophore atom
346 # pairs over distance range beween minimum and maximum distance.
347 #
348 sub SetNormalizationMethodology {
349 my($This, $Value) = @_;
350
351 if ($Value !~ /^(ByHeavyAtomsCount|ByAtomTypesCount|None)$/i) {
352 croak "Error: ${ClassName}->SetNormalizationMethodology: NormalizationMethodology value, $Value, is not valid. Supported values: None, ByHeavyAtomsCount or ByAtomTypesCount...";
353 }
354
355 $This->{NormalizationMethodology} = $Value;
356
357 return $This;
358 }
359
360 # Set weight of the various pharmacophore atom types to use for their contribution to atom
361 # pair interaction using atom types label and value hash.
362 #
363 # It allows to increase the importance of specific pharmacophore atom
364 # types in the generted fingerprints.
365 #
366 # A value of 0 eliminates the contribution by a particular pharmacophore atom
367 # type and 2 doubles its contribution.
368 #
369 sub SetAtomTypesWeight {
370 my($This, %AtomTypesWeight) = @_;
371 my($AtomType, $Weight);
372
373 while (($AtomType, $Weight) = each %AtomTypesWeight) {
374 if (!exists $This->{AtomTypesWeight}{$AtomType}) {
375 croak "Error: ${ClassName}->SetAtomTypesWeight: AtomTypeWeight for $AtomType couldn't be set: Unknown atom type...";
376 }
377 if (!(TextUtil::IsFloat($Weight) && ($Weight >= 0))) {
378 croak "Error: ${ClassName}->SetAtomTypesWeight: Specified weight value, $Weight, for AtomType, $AtomType, muts be >= 0...";
379 }
380 $This->{AtomTypesWeight}{$AtomType} = $Weight;
381 }
382 }
383
384 # Set fuzzification methodology to use for fuzzifying atom pairs count...
385 #
386 sub SetFuzzificationMethodology {
387 my($This, $Value) = @_;
388
389 if ($Value !~ /^(FuzzyBinning|FuzzyBinSmoothing)$/i) {
390 croak "Error: ${ClassName}->SetFuzzificationMethodology: FuzzificationMethodology value, $Value, is not valid. Supported values: FuzzyBinning or FuzzyBinSmoothing...";
391 }
392
393 $This->{FuzzificationMethodology} = $Value;
394
395 return $This;
396 }
397
398 # Set fuzzification mode for fuzzifying atom pairs count...
399 #
400 sub SetFuzzificationMode {
401 my($This, $Value) = @_;
402
403 if ($Value !~ /^(BeforeNormalization|AfterNormalization)$/i) {
404 croak "Error: ${ClassName}->SetFuzzificationMode: FuzzificationMode value, $Value, is not valid. Supported values: BeforeNormalization or AfterNormalization...";
405 }
406
407 $This->{FuzzificationMode} = $Value;
408
409 return $This;
410 }
411
412 # Set fuzz factor values used for fuzzifying atom pairs count...
413 #
414 sub SetFuzzFactor {
415 my($This, $Value) = @_;
416
417 if ($This->{FuzzificationMethodology} =~ /^FuzzyBinning$/i) {
418 if (!(TextUtil::IsFloat($Value) && $Value >=0 && $Value <= 1.0)) {
419 croak "Error: ${ClassName}->SetFuzzFactor: Specified fuzz factor value, $Value, must be >= 0 and <= 1...";
420 }
421 }
422 elsif ($This->{FuzzificationMethodology} =~ /^FuzzyBinSmoothing$/i) {
423 if (!(TextUtil::IsFloat($Value) && $Value >=0 && $Value <= 0.5)) {
424 croak "Error: ${ClassName}->SetFuzzFactor: Specified fuzz factor value, $Value, must be >= 0 and <= 0.5...";
425 }
426 }
427 else {
428 croak "Error: ${ClassName}->SetFuzzFactor: Fuzz factor value can't be changed: Uknown FuzzificationMethodology: $This->{FuzzificationMethodology}...";
429 }
430
431 $This->{FuzzFactor} = $Value;
432
433 return $This;
434 }
435
436 # Generate fingerprints description...
437 #
438 sub GetDescription {
439 my($This) = @_;
440
441 # Is description explicity set?
442 if (exists $This->{Description}) {
443 return $This->{Description};
444 }
445
446 # Generate fingerprints description...
447
448 return "$This->{Type}:$This->{AtomPairsSetSizeToUse}:MinDistance$This->{MinDistance}:MaxDistance$This->{MaxDistance}";
449 }
450
451 # Generate topological pharmacophore atom pairs [ Ref 60-62, Ref 65, Ref 68 ] fingerprints...
452 #
453 # Methodology:
454 # . Generate a distance matrix.
455 # . Assign pharmacophore atom types to all the atoms.
456 # . Initialize pharmacophore atom pairs basis set for all unique pairs between
457 # minimum and maximum distance.
458 # . Using distance matrix and pharmacophore atom types, count occurance of
459 # unique atom pairs between specified distance range - It corresponds to the
460 # correlation-vector for the atom pairs.
461 # . Weigh contribution of each atom type to atom pair interaction by its specified
462 # weight during occurance count.
463 # . Assign count to appropriate distance bin for a specific atom pair
464 #
465 # . Normalize occurance count of pharmacophore atom pairs by heavy atom count
466 # or sum of AtomTypeCounts of each pharmacophore atom type in the atom pair
467 # at a specific distance.
468 #
469 # . Fuzzify occurance count of pharmacophore atom pairs using FuzzyBinning or
470 # FuzzySmothing methodology.
471 #
472 # Notes:
473 # . Hydrogen atoms are ignored during the fingerprint generation.
474 #
475 sub GenerateFingerprints {
476 my($This) = @_;
477
478 if ($This->{MinDistance} > $This->{MaxDistance}) {
479 croak "Error: ${ClassName}->GenerateTopologicalPharmacophoreAtomPairsFingerprints: No fingerpritns generated: MinDistance, $This->{MinDistance}, must be <= MaxDistance, $This->{MaxDistance}...";
480 }
481
482 # Cache appropriate molecule data...
483 $This->_SetupMoleculeDataCache();
484
485 # Generate distance matrix...
486 if (!$This->_SetupDistanceMatrix()) {
487 carp "Warning: ${ClassName}->GenerateFingerprints: Fingerprints generation didn't succeed: Couldn't generate distance matrix...";
488 return $This;
489 }
490
491 # Assign pharmacohore atom types to all heavy atoms...
492 $This->_AssignPharmacophoreAtomTypes();
493
494 # Initialize values of all possible pharmacohore atom pairs...
495 $This->_InitializePharmacophoreAtomPairs();
496
497 # Count atom pairs...
498 $This->_CountPharmacohoreAtomPairs();
499
500 # Fuzzify atom pairs count...
501 if ($This->{FuzzificationMode} =~ /^BeforeNormalization$/i) {
502 $This->_FuzzifyPharmacohoreAtomPairsCount();
503 }
504
505 # Normalize atom pairs count...
506 $This->_NormalizePharmacohoreAtomPairsCount();
507
508 # Fuzzify atom pairs count...
509 if ($This->{FuzzificationMode} =~ /^AfterNormalization$/i) {
510 $This->_FuzzifyPharmacohoreAtomPairsCount();
511 }
512
513 # Set final fingerprints...
514 $This->_SetFinalFingerprints();
515
516 # Clear cached molecule data...
517 $This->_ClearMoleculeDataCache();
518
519 return $This;
520 }
521
522 # Setup distance matrix...
523 #
524 sub _SetupDistanceMatrix {
525 my($This) = @_;
526
527 $This->{DistanceMatrix} = $This->GetMolecule()->GetDistanceMatrix();
528
529 if (!$This->{DistanceMatrix}) {
530 return undef;
531 }
532
533 return $This;
534 }
535
536 # Assign pharmacohore atom types to all heavy atoms and count each atom
537 # types assigned...
538 #
539 sub _AssignPharmacophoreAtomTypes {
540 my($This) = @_;
541 my($Atom, $AtomID, $AtomType, $AssignedAtomType, $FunctionalClassAtomTypes);
542
543 # Assign topological pharmacophore atom types...
544 $FunctionalClassAtomTypes = new AtomTypes::FunctionalClassAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => 1, 'FunctionalClassesToUse' => $This->{AtomTypesToUse});
545 $FunctionalClassAtomTypes->AssignAtomTypes();
546
547 %{$This->{AssignedAtomTypes}} = ();
548
549 # Initialize assigned atom types count...
550 %{$This->{AssignedAtomTypesCount}} = ();
551 for $AtomType (@{$This->{AtomTypesToUse}}) {
552 $This->{AssignedAtomTypesCount}{$AtomType} = 0;
553 }
554
555 $This->{HeavyAtomCount} = 0;
556
557 ATOM: for $Atom (@{$This->{Atoms}}) {
558 if ($Atom->IsHydrogen()) {
559 next ATOM;
560 }
561 $This->{HeavyAtomCount} += 1;
562
563 $AtomID = $Atom->GetID();
564
565 # Collect all possible pharmacophore atom types which could be assigned to atom...
566 my(@AtomTypes);
567
568 @AtomTypes = ();
569 $AssignedAtomType = $FunctionalClassAtomTypes->GetAtomType($Atom);
570 if ($AssignedAtomType && $AssignedAtomType !~ /^None$/i) {
571 push @AtomTypes, split /\./, $AssignedAtomType;
572 for $AtomType (@AtomTypes) {
573 $This->{AssignedAtomTypesCount}{$AtomType} += 1;
574 }
575 }
576
577 # Assign phramacophore types to atom...
578 $AtomID = $Atom->GetID();
579 $This->{AssignedAtomTypes}{$AtomID} = \@AtomTypes;
580 }
581 return $This;
582 }
583
584 # Initialize values of all possible pharmacohore atom pairs...
585 #
586 # Let:
587 # Dmin = Minimum distance correspoding to number of bonds between two atoms
588 # Dmax = Maximum distance correspoding to number of bonds between two atoms
589 # D = Distance correspoding to number of bonds between two atoms
590 #
591 # P = Number of pharmacophore atom types to consider
592 # PPDn = Number of possible unique pharmacophore atom pairs at a distance Dn
593 #
594 # PPT = Total number of possible pharmacophore atom pairs at all distances between Dmin and Dmax
595 #
596 # Then:
597 #
598 # PPD = (P * (P - 1))/2 + P
599 #
600 # PPT = ((Dmax - Dmin) + 1) * ((P * (P - 1))/2 + P)
601 # = ((Dmax - Dmin) + 1) * PPD
602 #
603 #
604 # So for default values of Dmin = 1, Dmax = 10 and P = 5,
605 #
606 # PPD = (5 * (5 - 1))/2 + 5 = 15
607 # PPT = ((10 - 1) + 1) * 15 = 150
608 #
609 # the pharmacophore atom pairs bais set includes 150 values.
610 #
611 sub _InitializePharmacophoreAtomPairs {
612 my($This) = @_;
613 my($Distance, $Index1, $Index2, $AtomType1, $AtomType2);
614
615 %{$This->{AtomPairsCount}} = ();
616
617 for $Distance ($This->{MinDistance} .. $This->{MaxDistance}) {
618 %{$This->{AtomPairsCount}{$Distance}} = ();
619
620 for $Index1 (0 .. $#{$This->{AtomTypesToUse}}) {
621 $AtomType1 = $This->{AtomTypesToUse}[$Index1];
622 %{$This->{AtomPairsCount}{$Distance}{$AtomType1}} = ();
623
624 for $Index2 ($Index1 .. $#{$This->{AtomTypesToUse}}) {
625 $AtomType2 = $This->{AtomTypesToUse}[$Index2];
626 $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} = 0;
627 }
628 }
629 }
630 return $This;
631 }
632
633 # Count pharmacophore atom pairs between mininum and maximum distance at each
634 # distance using distance matrix and pharmacophore atom types assiged to each heavy
635 # atom.
636 #
637 # Let:
638 # Px = Pharmacophore atom type x
639 # Py = Pharmacophore atom type y
640 # Dn = Distance between Px and Py in specified distance range
641 #
642 # Then:
643 # Px-Dn-Py = Pharmacophore atom pair ID for atom types Px and Py at distance Dn
644 #
645 # For example: H-D1-H, H-D2-HBA, PI-D5-PI and so on
646 #
647 # Notes:
648 # . The row and column indices of distance matrix correspond to atom indices.
649 # . Distance value of BigNumber implies the atom is not connected to any other atom.
650 # . Due to symmetric nature of distance matrix, only upper or lower triangular matrix
651 # needs to be processed during identification and count of pharmacophore atom pairs.
652 #
653 sub _CountPharmacohoreAtomPairs {
654 my($This) = @_;
655 my($NumOfRows, $NumOfCols, $RowIndex, $ColIndex, $DistanceMatrix, $Distance, $AtomID1, $AtomID2, $AtomType1, $AtomType2, $SkipIndexCheck, $CountIncrement);
656
657 $DistanceMatrix = $This->{DistanceMatrix};
658 ($NumOfRows, $NumOfCols) = $DistanceMatrix->GetSize();
659 $SkipIndexCheck = 0;
660
661 ROWINDEX: for $RowIndex (0 .. ($NumOfRows - 1) ) {
662 $AtomID1 = $This->{AtomIndexToID}{$RowIndex};
663 if ( !((exists($This->{AssignedAtomTypes}{$AtomID1}) && @{$This->{AssignedAtomTypes}{$AtomID1}})) ) {
664 next ROWINDEX;
665 }
666
667 COLINDEX: for $ColIndex ($RowIndex .. ($NumOfCols - 1) ) {
668 $AtomID2 = $This->{AtomIndexToID}{$ColIndex};
669 if ( !((exists($This->{AssignedAtomTypes}{$AtomID2}) && @{$This->{AssignedAtomTypes}{$AtomID2}})) ) {
670 next COLINDEX;
671 }
672
673 $Distance = $DistanceMatrix->GetValue($RowIndex, $ColIndex, $SkipIndexCheck);
674 if ($Distance < $This->{MinDistance} || $Distance > $This->{MaxDistance}) {
675 next COLINDEX;
676 }
677
678 ATOMTYPE1: for $AtomType1 (@{$This->{AssignedAtomTypes}{$AtomID1}}) {
679 if ($This->{AtomTypesWeight}{$AtomType1} == 0) {
680 next ATOMTYPE1;
681 }
682 ATOMTYPE2: for $AtomType2 (@{$This->{AssignedAtomTypes}{$AtomID2}}) {
683 if ($This->{AtomTypesWeight}{$AtomType2} == 0) {
684 next ATOMTYPE2;
685 }
686 $CountIncrement = $This->{AtomTypesWeight}{$AtomType1} * $This->{AtomTypesWeight}{$AtomType2};
687 if ($AtomType1 le $AtomType2) {
688 $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} += $CountIncrement;
689 }
690 else {
691 $This->{AtomPairsCount}{$Distance}{$AtomType2}{$AtomType1} += $CountIncrement;
692 }
693 }
694 }
695 }
696 }
697 return $This;
698 }
699
700 # Normalize the occurance count of pharmacophore atom pairs over the specified distance
701 # range...
702 #
703 sub _NormalizePharmacohoreAtomPairsCount {
704 my($This) = @_;
705
706 METHODOLOGY: {
707 if ($This->{NormalizationMethodology} =~ /^None$/i) {
708 last METHODOLOGY;
709 }
710 if ($This->{NormalizationMethodology} =~ /^ByHeavyAtomsCount$/i) {
711 $This->_NormalizeAtomPairsCountByHeavyAtomsCount();
712 last METHODOLOGY;
713 }
714 if ($This->{NormalizationMethodology} =~ /^ByAtomTypesCount$/i) {
715 $This->_NormalizeAtomPairsCountByAtomTypesCount();
716 last METHODOLOGY;
717 }
718 croak "Error: ${ClassName}->_NormalizePharmacohoreAtomPairsCount: Unknown NormalizationMethodology: $This->{NormalizationMethodology}...";
719 }
720 return $This;
721 }
722
723
724 # Normalize the occurance count of pharmacophore atom pairs at various distances by
725 # heavy atom count...
726 #
727 sub _NormalizeAtomPairsCountByHeavyAtomsCount {
728 my($This) = @_;
729 my($Distance, $AtomType1, $AtomType2);
730
731 if ($This->{HeavyAtomCount} == 0) {
732 return $This;
733 }
734
735 for $Distance (keys %{$This->{AtomPairsCount}} ) {
736 for $AtomType1 (keys %{$This->{AtomPairsCount}{$Distance}} ) {
737 ATOMTYPE2: for $AtomType2 (keys %{$This->{AtomPairsCount}{$Distance}{$AtomType1}} ) {
738 if ($This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} == 0) {
739 next ATOMTYPE2;
740 }
741 $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} /= $This->{HeavyAtomCount};
742 }
743 }
744 }
745 return $This;
746 }
747
748 # Normalize the occurance count of pharmacophore atom pairs at various distances by
749 # dividing it using sum of the count of each pharmacophore atom type present in the
750 # molecule for the corresponding atom pair.
751 #
752 sub _NormalizeAtomPairsCountByAtomTypesCount {
753 my($This) = @_;
754 my($Distance, $AtomType1, $AtomType2, $AtomType1Count, $AtomType2Count, $NormalizationFactor);
755
756 for $Distance (keys %{$This->{AtomPairsCount}} ) {
757 for $AtomType1 (keys %{$This->{AtomPairsCount}{$Distance}} ) {
758 ATOMTYPE2: for $AtomType2 (keys %{$This->{AtomPairsCount}{$Distance}{$AtomType1}} ) {
759 if ($This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} == 0) {
760 next ATOMTYPE2;
761 }
762 $NormalizationFactor = $This->{AssignedAtomTypesCount}{$AtomType1} + $This->{AssignedAtomTypesCount}{$AtomType2};
763 $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} /= $NormalizationFactor;
764 }
765 }
766 }
767 return $This;
768 }
769
770 # Fuzzify pharmacophore atom pairs count...
771 #
772 # Let:
773 # Px = Pharmacophore atom type x
774 # Py = Pharmacophore atom type y
775 #
776 # PPxy = Pharmacophore atom pair between atom type Px and Py
777 #
778 # PPxyDn = Pharmacophore atom pairs count between atom type Px and Py at distance Dn
779 # PPxyDn-1 = Pharmacophore atom pairs count between atom type Px and Py at distance Dn - 1
780 # PPxyDn+1 = Pharmacophore atom pairs count between atom type Px and Py at distance Dn + 1
781 #
782 # FF = FuzzFactor for FuzzyBinning and FuzzyBinSmoothing
783 #
784 # Then:
785 #
786 # For FuzzyBinning:
787 #
788 # PPxyDn = PPxyDn (Unchanged)
789 #
790 # PPxyDn-1 = PPxyDn-1 + PPxyDn * FF
791 # PPxyDn+1 = PPxyDn+1 + PPxyDn * FF
792 #
793 # For FuzzyBinSmoothing:
794 #
795 # PPxyDn = PPxyDn - PPxyDn * 2FF for Dmin < Dn < Dmax
796 # PPxyDn = PPxyDn - PPxyDn * FF for Dn = Dmin or Dmax
797 #
798 # PPxyDn-1 = PPxyDn-1 + PPxyDn * FF
799 # PPxyDn+1 = PPxyDn+1 + PPxyDn * FF
800 #
801 # In both fuzzification schemes, a value of 0 for FF implies no fuzzification of occurance counts.
802 # A value of 1 during FuzzyBinning corresponds to maximum fuzzification of occurance counts;
803 # however, a value of 1 during FuzzyBinSmoothing ends up completely distributing the value over
804 # the previous and next distance bins.
805 #
806 # So for default value of FuzzFactor (FF) 0.15, the occurance count of pharmacohore atom pairs
807 # at distance Dn during FuzzyBinning is left unchanged and the counts at distances Dn -1 and Dn + 1
808 # are incremened by PPxyDn * 0.15.
809 #
810 # And during FuzzyBinSmoothing the occurance counts at Distance Dn is scaled back using multiplicate
811 # factor of (1 - 2*0.15) and the occurance counts at distances Dn -1 and Dn + 1 are incremened by
812 # PPxyDn * 0.15. In otherwords, occurance bin count is smoothed out by distributing it over the
813 # previous and next distance value.
814 #
815 sub _FuzzifyPharmacohoreAtomPairsCount {
816 my($This) = @_;
817 my($Index1, $Index2, $AtomType1, $AtomType2, $CurrentDistance, $CurrentCount, $NextDistance, $NextCount, $PreviousDistance, $ModifyCurrentCount, $ChangeInCountValue);
818
819 if (!($This->{FuzzifyAtomPairsCount} && $This->{FuzzFactor} > 0)) {
820 return $This;
821 }
822
823 $ModifyCurrentCount = ($This->{FuzzificationMethodology} =~ /^FuzzyBinSmoothing$/i) ? 1 : 0;
824
825 for $Index1 (0 .. $#{$This->{AtomTypesToUse}}) {
826 $AtomType1 = $This->{AtomTypesToUse}[$Index1];
827 for $Index2 ($Index1 .. $#{$This->{AtomTypesToUse}}) {
828 $AtomType2 = $This->{AtomTypesToUse}[$Index2];
829
830 $CurrentCount = 0; $NextCount = 0;
831
832 $NextDistance = $This->{MinDistance};
833 $NextCount = $This->{AtomPairsCount}{$NextDistance}{$AtomType1}{$AtomType2};
834
835 DISTANCE: for $CurrentDistance ($This->{MinDistance} .. $This->{MaxDistance}) {
836 $NextDistance = $CurrentDistance + 1;
837 $PreviousDistance = $CurrentDistance - 1;
838
839 $CurrentCount = $NextCount;
840 $NextCount = ($CurrentDistance < $This->{MaxDistance}) ? $This->{AtomPairsCount}{$NextDistance}{$AtomType1}{$AtomType2} : 0;
841
842 if ($CurrentCount == 0) {
843 # No contribution to fuzzy binning from this distance...
844 next DISTANCE;
845 }
846
847 $ChangeInCountValue = $CurrentCount * $This->{FuzzFactor};
848
849 if ($CurrentDistance > $This->{MinDistance}) {
850 # Increment count at previous distance...
851 $This->{AtomPairsCount}{$PreviousDistance}{$AtomType1}{$AtomType2} += $ChangeInCountValue;
852 }
853
854 if ($ModifyCurrentCount) {
855 # Decrement count at current distance for FuzzyBinSmoothing...
856 if ($CurrentDistance > $This->{MinDistance} && $CurrentDistance < $This->{MaxDistance}) {
857 $This->{AtomPairsCount}{$CurrentDistance}{$AtomType1}{$AtomType2} -= 2 * $ChangeInCountValue;
858 }
859 else {
860 $This->{AtomPairsCount}{$CurrentDistance}{$AtomType1}{$AtomType2} -= $ChangeInCountValue;
861 }
862 }
863
864 if ($CurrentDistance < $This->{MaxDistance}) {
865 # Increment count at next distance...
866 $This->{AtomPairsCount}{$NextDistance}{$AtomType1}{$AtomType2} += $ChangeInCountValue;
867 }
868 }
869 }
870 }
871 return $This;
872 }
873
874 # Set final fingerpritns vector...
875 #
876 sub _SetFinalFingerprints {
877 my($This) = @_;
878 my($Distance, $Index1, $Index2, $AtomType1, $AtomType2, $Value, $RoundOffValues, $ValuesPrecision, $UseArbitrarySetSize, @Values);
879
880 # Mark successful generation of fingerprints...
881 $This->{FingerprintsGenerated} = 1;
882
883 @Values = ();
884 @{$This->{AtomPairsIDs}} = ();
885
886 # Do values need to be rounded off?
887 $RoundOffValues = (($This->{NormalizationMethodology} !~ /^None$/i) || ($This->{FuzzifyAtomPairsCount})) ? 1 : 0;
888 $ValuesPrecision = $This->{ValuesPrecision};
889
890 # Is it an ArbitraySize atom pairs set size?
891 $UseArbitrarySetSize = $This->{AtomPairsSetSizeToUse} =~ /^ArbitrarySize$/i ? 1 : 0;
892
893 # Collect all atom paris count values...
894 for $Distance ($This->{MinDistance} .. $This->{MaxDistance}) {
895 for $Index1 (0 .. $#{$This->{AtomTypesToUse}}) {
896 $AtomType1 = $This->{AtomTypesToUse}[$Index1];
897 INDEX2: for $Index2 ($Index1 .. $#{$This->{AtomTypesToUse}}) {
898 $AtomType2 = $This->{AtomTypesToUse}[$Index2];
899
900 # Atom pair count...
901 $Value = $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2};
902 if ($RoundOffValues) {
903 $Value = MathUtil::round($Value, $This->{ValuesPrecision}) + 0;
904 }
905
906 # Ignore or not to ignore...
907 if ($UseArbitrarySetSize && $Value == 0) {
908 next INDEX2;
909 }
910
911 push @{$This->{AtomPairsIDs}}, "${AtomType1}-D${Distance}-${AtomType2}";
912 push @Values, $Value;
913 }
914 }
915 }
916
917 # Add AtomPairsIDs and count values to fingerprint vector...
918 $This->{FingerprintsVector}->AddValueIDs(\@{$This->{AtomPairsIDs}});
919 $This->{FingerprintsVector}->AddValues(\@Values);
920
921 return $This;
922 }
923
924 # Get pharmacophore atom pair IDs corresponding to atom pairs count values in
925 # fingerprint vector as an array or reference to an array...
926 #
927 # AtomPairIDs list is generated during finalization of fingerprints and the fingerprint
928 # vector containing count values matches the atom pairs array.
929 #
930 #
931 sub GetAtomPairIDs {
932 my($This) = @_;
933
934 return wantarray ? @{$This->{AtomPairsIDs}} : \@{$This->{AtomPairsIDs}};
935 }
936
937 # Cache appropriate molecule data...
938 #
939 sub _SetupMoleculeDataCache {
940 my($This) = @_;
941
942 # Get all atoms including hydrogens to correctly map atom indices to atom IDs for
943 # usage of distance matrix. The hydrogen atoms are ignored during processing...
944 #
945 @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms();
946
947 # Get all atom IDs...
948 my(@AtomIDs);
949 @AtomIDs = ();
950 @AtomIDs = map { $_->GetID() } @{$This->{Atoms}};
951
952 # Set AtomIndex to AtomID hash...
953 %{$This->{AtomIndexToID}} = ();
954 @{$This->{AtomIndexToID}}{ (0 .. $#AtomIDs) } = @AtomIDs;
955
956 return $This;
957 }
958
959 # Clear cached molecule data...
960 #
961 sub _ClearMoleculeDataCache {
962 my($This) = @_;
963
964 @{$This->{Atoms}} = ();
965
966 return $This;
967 }
968
969
970 # Return a string containg data for TopologicalPharmacophoreAtomPairsFingerprints object...
971 sub StringifyTopologicalPharmacophoreAtomPairsFingerprints {
972 my($This) = @_;
973 my($FingerprintsString);
974
975 # Type of fingerprint...
976 $FingerprintsString = "Fingerprint type: $This->{Type}; AtomPairsSetSizeToUse: $This->{AtomPairsSetSizeToUse}";
977
978 # Min and max distance...
979 $FingerprintsString .= "; MinDistance: $This->{MinDistance}; MaxDistance: $This->{MaxDistance}";
980
981 # Pharmacophore type labels and description...
982 my($AtomType, @AtomTypes, @AtomTypesOrder, %AvailableAtomTypes);
983
984 @AtomTypesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder();
985 %AvailableAtomTypes = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses();
986
987 @AtomTypes = ();
988 for $AtomType (@AtomTypesOrder) {
989 push @AtomTypes, "$AtomType: $AvailableAtomTypes{$AtomType}";
990 }
991
992 $FingerprintsString .= "; AtomTypesToUse: <" . TextUtil::JoinWords(\@{$This->{AtomTypesToUse}}, ", ", 0) . ">";
993 $FingerprintsString .= "; AtomTypesOrder: <" . TextUtil::JoinWords(\@AtomTypesOrder, ", ", 0) . ">";
994 $FingerprintsString .= "; AvailableAtomTypes: <" . TextUtil::JoinWords(\@AtomTypes, ", ", 0) . ">";
995
996 # Normalization method...
997 $FingerprintsString .= "; NormalizationMethodology: $This->{NormalizationMethodology}";
998
999 # Weights...
1000 my($FirstLabel, $Label, $Weight);
1001
1002 $FingerprintsString .= "; AtomTypesWeight <Labels: Weight>: <";
1003 $FirstLabel = 1;
1004 for $Label (sort @{$This->{AtomTypesToUse}}) {
1005 $Weight = $This->{AtomTypesWeight}{$Label};
1006 if ($FirstLabel) {
1007 $FirstLabel = 0;
1008 $FingerprintsString .= " ${Label}: ${Weight}";
1009 }
1010 else {
1011 $FingerprintsString .= "; ${Label}: ${Weight}";
1012 }
1013 }
1014 $FingerprintsString .= ">";
1015
1016 # Fuzzification of count...
1017 my($FuzzifyFlag);
1018 $FuzzifyFlag = $This->{FuzzifyAtomPairsCount} ? "Yes" : "No";
1019 $FingerprintsString .= "; FuzzifyAtomPairsCount: $FuzzifyFlag; FuzzificationMode: $This->{FuzzificationMode}; FuzzificationMethodology: $This->{FuzzificationMethodology}; FuzzFactor: $This->{FuzzFactor}";
1020
1021 # Total number of pharmacophore atom pairs...
1022 $FingerprintsString .= "; NumOfAtomPairs: " . $This->{FingerprintsVector}->GetNumOfValues();
1023
1024 # FingerprintsVector...
1025 $FingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >";
1026
1027 return $FingerprintsString;
1028 }
1029
1030 1;
1031
1032 __END__
1033
1034 =head1 NAME
1035
1036 TopologicalPharmacophoreAtomPairsFingerprints
1037
1038 =head1 SYNOPSIS
1039
1040 use Fingerprints::TopologicalPharmacophoreAtomPairsFingerprints;
1041
1042 use Fingerprints::TopologicalPharmacophoreAtomPairsFingerprints qw(:all);
1043
1044 =head1 DESCRIPTION
1045
1046 B<TopologicalPharmacophoreAtomPairsFingerprints> [ Ref 60-62, Ref 65, Ref 68 ] class provides
1047 the following methods:
1048
1049 new, GenerateFingerprints, GetDescription, GetAtomPairIDs, SetAtomTypesToUse,
1050 SetAtomTypesWeight, SetFuzzFactor, SetFuzzificationMethodology,
1051 SetFuzzificationMode, SetMaxDistance, SetMinDistance,
1052 SetNormalizationMethodology, SetValuesPrecision,
1053 StringifyTopologicalPharmacophoreAtomPairsFingerprints
1054
1055 B<TopologicalPharmacophoreAtomPairsFingerprints> is derived from B<Fingerprints> class which in turn
1056 is derived from B<ObjectProperty> base class that provides methods not explicitly defined
1057 in B<TopologicalPharmacophoreAtomPairsFingerprints>, B<Fingerprints> or B<ObjectProperty> classes using Perl's
1058 AUTOLOAD functionality. These methods are generated on-the-fly for a specified object property:
1059
1060 Set<PropertyName>(<PropertyValue>);
1061 $PropertyValue = Get<PropertyName>();
1062 Delete<PropertyName>();
1063
1064 Based on the values specified for B<AtomTypesToUse>, pharmacophore atom types are
1065 assigned to all non-hydrogen atoms in a molecule and a distance matrix is generated.
1066 A pharmacophore atom pairs basis set is initialized for all unique possible pairs within
1067 B<MinDistance> and B<MaxDistance> range.
1068
1069 Let:
1070
1071 P = Valid pharmacophore atom type
1072
1073 Px = Pharmacophore atom type x
1074 Py = Pharmacophore atom type y
1075
1076 Dmin = Minimum distance corresponding to number of bonds between two atoms
1077 Dmax = Maximum distance corresponding to number of bonds between two atoms
1078 D = Distance corresponding to number of bonds between two atoms
1079
1080 Px-Dn-Py = Pharmacophore atom pair ID for atom types Px and Py at distance Dn
1081
1082 P = Number of pharmacophore atom types to consider
1083 PPDn = Number of possible unique pharmacophore atom pairs at a distance Dn
1084
1085 PPT = Total number of possible pharmacophore atom pairs at all distances between Dmin and Dmax
1086
1087 Then:
1088
1089 PPD = (P * (P - 1))/2 + P
1090
1091 PPT = ((Dmax - Dmin) + 1) * ((P * (P - 1))/2 + P)
1092 = ((Dmax - Dmin) + 1) * PPD
1093
1094 So for default values of Dmin = 1, Dmax = 10 and P = 5,
1095
1096 PPD = (5 * (5 - 1))/2 + 5 = 15
1097 PPT = ((10 - 1) + 1) * 15 = 150
1098
1099 The pharmacophore atom pairs bais set includes 150 values.
1100
1101 The atom pair IDs correspond to:
1102
1103 Px-Dn-Py = Pharmacophore atom pair ID for atom types Px and Py at distance Dn
1104
1105 For example: H-D1-H, H-D2-HBA, PI-D5-PI and so on
1106
1107 Using distance matrix and pharmacohore atom types, occurrence of unique pharmacohore atom
1108 pairs is counted. The contribution of each atom type to atom pair interaction is optionally
1109 weighted by specified B<AtomTypesWeight> before assigning its count to appropriate distance
1110 bin. Based on B<NormalizationMethodology> option, pharmacophore atom pairs count is optionally
1111 normalized. Additionally, pharmacohore atom pairs count is optionally fuzzified before or after
1112 the normalization controlled by values of B<FuzzifyAtomPairsCount>, B<FuzzificationMode>,
1113 B<FuzzificationMethodology> and B<FuzzFactor>.
1114
1115 The final pharmacophore atom pairs count along with atom pair identifiers involving all non-hydrogen
1116 atoms, with optional normalization and fuzzification, constitute pharmacophore topological atom pairs
1117 fingerprints of the molecule.
1118
1119 For I<ArbitrarySize> value of B<AtomPairsSetSizeToUse>, the fingerprint vector correspond to
1120 only those topological pharmacophore atom pairs which are present and have non-zero count. However,
1121 for I<FixedSize> value of B<AtomPairsSetSizeToUse>, the fingerprint vector contains all possible
1122 valid topological pharmacophore atom pairs with both zero and non-zero count values.
1123
1124 The current release of MayaChemTools generates the following types of topological pharmacophore
1125 atom pairs fingerprints vector strings:
1126
1127 FingerprintsVector;TopologicalPharmacophoreAtomPairs:ArbitrarySize:Min
1128 Distance1:MaxDistance10;54;NumericalValues;IDsAndValuesString;H-D1-H H
1129 -D1-NI HBA-D1-NI HBD-D1-NI H-D2-H H-D2-HBA H-D2-HBD HBA-D2-HBA HBA-D2-
1130 HBD H-D3-H H-D3-HBA H-D3-HBD H-D3-NI HBA-D3-NI HBD-D3-NI H-D4-H H-D4-H
1131 BA H-D4-HBD HBA-D4-HBA HBA-D4-HBD HBD-D4-HBD H-D5-H H-D5-HBA H-D5-...;
1132 18 1 2 1 22 12 8 1 2 18 6 3 1 1 1 22 13 6 5 7 2 28 9 5 1 1 1 36 16 10
1133 3 4 1 37 10 8 1 35 10 9 3 3 1 28 7 7 4 18 16 12 5 1 2 1
1134
1135 FingerprintsVector;TopologicalPharmacophoreAtomPairs:FixedSize:MinDist
1136 ance1:MaxDistance10;150;OrderedNumericalValues;ValuesString;18 0 0 1 0
1137 0 0 2 0 0 1 0 0 0 0 22 12 8 0 0 1 2 0 0 0 0 0 0 0 0 18 6 3 1 0 0 0 1
1138 0 0 1 0 0 0 0 22 13 6 0 0 5 7 0 0 2 0 0 0 0 0 28 9 5 1 0 0 0 1 0 0 1 0
1139 0 0 0 36 16 10 0 0 3 4 0 0 1 0 0 0 0 0 37 10 8 0 0 0 0 1 0 0 0 0 0 0
1140 0 35 10 9 0 0 3 3 0 0 1 0 0 0 0 0 28 7 7 4 0 0 0 0 0 0 0 0 0 0 0 18...
1141
1142 FingerprintsVector;TopologicalPharmacophoreAtomPairs:FixedSize:MinDist
1143 ance1:MaxDistance10;150;OrderedNumericalValues;IDsAndValuesString;H-D1
1144 -H H-D1-HBA H-D1-HBD H-D1-NI H-D1-PI HBA-D1-HBA HBA-D1-HBD HBA-D1-NI H
1145 BA-D1-PI HBD-D1-HBD HBD-D1-NI HBD-D1-PI NI-D1-NI NI-D1-PI PI-D1-PI H-D
1146 2-H H-D2-HBA H-D2-HBD H-D2-NI H-D2-PI HBA-D2-HBA HBA-D2-HBD HBA-D2...;
1147 18 0 0 1 0 0 0 2 0 0 1 0 0 0 0 22 12 8 0 0 1 2 0 0 0 0 0 0 0 0 18 6 3
1148 1 0 0 0 1 0 0 1 0 0 0 0 22 13 6 0 0 5 7 0 0 2 0 0 0 0 0 28 9 5 1 0 0 0
1149 1 0 0 1 0 0 0 0 36 16 10 0 0 3 4 0 0 1 0 0 0 0
1150
1151 =head2 METHODS
1152
1153 =over 4
1154
1155 =item B<new>
1156
1157 $TPAPFP = new TopologicalPharmacophoreAtomPairsFingerprints(
1158 %NamesAndValues);
1159
1160 Using specified I<TopologicalPharmacophoreAtomPairsFingerprints> property names and
1161 values hash, B<new> method creates a new object and returns a reference to newly created
1162 B<TopologicalPharmacophoreAtomPairsFingerprints> object. By default, the following properties
1163 are initialized:
1164
1165 Molecule = ''
1166 Type = 'TopologicalPharmacophoreAtomPairs'
1167 MinDistance = 1
1168 MaxDistance = 10
1169 NormalizationMethodology = 'None'
1170 AtomTypesToUse = ['HBD', 'HBA', 'PI', 'NI', 'H']
1171
1172 FuzzifyAtomPairsCount = 0
1173 FuzzificationMode = 'AfterNormalization'
1174 FuzzificationMethodology = 'FuzzyBinning'
1175 FuzzFactor = 0.15
1176
1177 ValuesPrecision = 2
1178
1179 Examples:
1180
1181 $TPAPFP = new TopologicalPharmacophoreAtomPairsFingerprints(
1182 'Molecule' => $Molecule);
1183
1184 $TPAPFP = new TopologicalPharmacophoreAtomPairsFingerprints(
1185 'Molecule' => $Molecule,
1186 'AtomPairsSetSizeToUse' => 'ArbitrarySize',
1187 'MinDistance' => 1,
1188 'MaxDistance' => 10,
1189 'NormalizationMethodology' => 'None',
1190 'AtomTypesToUse' => ['HBD', 'HBA', 'PI', 'NI', 'H'],
1191 'FuzzifyAtomPairsCount' => 0);
1192
1193 $TPAPFP = new TopologicalPharmacophoreAtomPairsFingerprints(
1194 'Molecule' => $Molecule,
1195 'AtomPairsSetSizeToUse' => 'FizedSize',
1196 'MinDistance' => 1,
1197 'MaxDistance' => 10,
1198 'NormalizationMethodology' => 'None',
1199 'AtomTypesToUse' => ['HBD', 'HBA', 'PI', 'NI', 'H'],
1200 'FuzzifyAtomPairsCount' => 1,
1201 'FuzzificationMethodology' => 'FuzzyBinning',
1202 'FuzzFactor' => 0.15,
1203 'ValuesPrecision' => 2);
1204
1205 $TPAPFP->GenerateFingerprints();
1206 print "$TPAPFP\n";
1207
1208 =item B<GetDescription>
1209
1210 $Description = $TopologicalPharmacophoreAtomPairsFP->GetDescription();
1211
1212 Returns a string containing description of topological pharmacophore atom pairs fingerprints.
1213
1214 =item B<GenerateFingerprints>
1215
1216 $TopologicalPharmacophoreAtomPairsFP->GenerateFingerprints();
1217
1218 Generates topological pharmacophore atom pairs fingerprints and returns
1219 I<TopologicalPharmacophoreAtomPairsFP>.
1220
1221 =item B<GetAtomPairIDs>
1222
1223 $AtomPairIDsRef = $TopologicalPharmacophoreAtomPairsFP->GetAtomPairIDs();
1224 @AtomPairIDs = $TopologicalPharmacophoreAtomPairsFP->GetAtomPairIDs();
1225
1226 Returns atom pair IDs corresponding to atom pairs count values in topological pharmacophore
1227 atom pairs fingerprints vector as an array or reference to an array.
1228
1229 =item B<SetAtomPairsSetSizeToUse>
1230
1231 $TopologicalPharmacophoreAtomPairsFP->SetAtomPairsSetSizeToUse($Values);
1232
1233 Sets pharmacophore atom pairs set size to use for topological pharmacophore fingerprints
1234 generation and returns I<TopologicalPharmacophoreAtomPairsFingerprints>.
1235
1236 Possible values for pharmacophore atom pairs set size are: I<ArbitrarySize, FizedSize>.
1237 Default value: I<ArbitrarySize>.
1238
1239 For I<ArbitrarySize> value of B<AtomPairsSetSizeToUse>, the fingerprint vector correspond to
1240 only those topological pharmacophore atom pairs which are present and have non-zero count. However,
1241 for I<FixedSize> value of B<AtomPairsSetSizeToUse>, the fingerprint vector contains all possible
1242 valid topological pharmacophore atom pairs with both zero and non-zero count values.
1243
1244 =item B<SetAtomTypesToUse>
1245
1246 $TopologicalPharmacophoreAtomPairsFP->SetAtomTypesToUse($ValuesRef);
1247 $TopologicalPharmacophoreAtomPairsFP->SetAtomTypesToUse(@Values);
1248
1249 Sets pharmacophore atom types to use for topological pharmacophore fingerprints
1250 generation and returns I<TopologicalPharmacophoreAtomPairsFingerprints>.
1251
1252 Possible values for pharmacophore atom types are: I<Ar, CA, H, HBA, HBD, Hal, NI, PI, RA>.
1253 Default value [ Ref 60-62 ] : I<HBD,HBA,PI,NI,H>.
1254
1255 The pharmacophore atom types abbreviations correspond to:
1256
1257 HBD: HydrogenBondDonor
1258 HBA: HydrogenBondAcceptor
1259 PI : PositivelyIonizable
1260 NI : NegativelyIonizable
1261 Ar : Aromatic
1262 Hal : Halogen
1263 H : Hydrophobic
1264 RA : RingAtom
1265 CA : ChainAtom
1266
1267 I<AtomTypes::FunctionalClassAtomTypes> module is used to assign pharmacophore atom
1268 types. It uses following definitions [ Ref 60-61, Ref 65-66 ]:
1269
1270 HydrogenBondDonor: NH, NH2, OH
1271 HydrogenBondAcceptor: N[!H], O
1272 PositivelyIonizable: +, NH2
1273 NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH
1274
1275 =item B<SetAtomTypesWeight>
1276
1277 $TopologicalPharmacophoreAtomPairsFP->SetAtomTypesWeight(
1278 %AtomTypesToWeight);
1279
1280 Sets weights of specified pharmacophore atom types to use during calculation of their contribution
1281 to atom pair count and returns I<TopologicalPharmacophoreAtomPairsFP>. Default values: I<1 for
1282 each atom type>.
1283
1284 The weight values allow to increase the importance of specific pharmacophore atom type
1285 in the generated fingerprints. A weight value of 0 for an atom type eliminates its contribution to
1286 atom pair count where as weight value of 2 doubles its contribution.
1287
1288 =item B<SetFuzzFactor>
1289
1290 $TopologicalPharmacophoreAtomPairsFP->SetFuzzFactor($Value);
1291
1292 Sets fuzz factor value to use during fuzzification of atom pairs count and returns
1293 I<TopologicalPharmacophoreAtomPairsFP>. Default value: I<0.15>.
1294
1295 Valid values: For I<FuzzyBinning> value of B<FuzzificationMethodology>: I<between 0 and 1.0>; For
1296 I<FuzzyBinSmoothing> value of B<FuzzificationMethodology>: I<between 0 and 0.5>.
1297
1298 =item B<SetFuzzificationMethodology>
1299
1300 $TopologicalPharmacophoreAtomPairsFP->SetFuzzificationMethodology($Value);
1301
1302 Sets fuzzification methodology to use for fuzzification of atom pairs count and returns
1303 I<TopologicalPharmacophoreAtomPairsFP>. Default value: I<FuzzyBinning>. Possible values:
1304 I<FuzzyBinning | FuzzyBinSmoothing>.
1305
1306 In conjunction with values for options B<FuzzifyAtomPairsCount>, B<FuzzificationMode> and
1307 B<FuzzFactor>, B<FuzzificationMethodology> option is used to fuzzify pharmacophore atom
1308 pairs count.
1309
1310 Let:
1311
1312 Px = Pharmacophore atom type x
1313 Py = Pharmacophore atom type y
1314 PPxy = Pharmacophore atom pair between atom type Px and Py
1315
1316 PPxyDn = Pharmacophore atom pairs count between atom type Px and Py
1317 at distance Dn
1318 PPxyDn-1 = Pharmacophore atom pairs count between atom type Px and Py
1319 at distance Dn - 1
1320 PPxyDn+1 = Pharmacophore atom pairs count between atom type Px and Py
1321 at distance Dn + 1
1322
1323 FF = FuzzFactor for FuzzyBinning and FuzzyBinSmoothing
1324
1325 Then:
1326
1327 For I<FuzzyBinning>:
1328
1329 PPxyDn = PPxyDn (Unchanged)
1330
1331 PPxyDn-1 = PPxyDn-1 + PPxyDn * FF
1332 PPxyDn+1 = PPxyDn+1 + PPxyDn * FF
1333
1334 For I<FuzzyBinSmoothing>:
1335
1336 PPxyDn = PPxyDn - PPxyDn * 2FF for Dmin < Dn < Dmax
1337 PPxyDn = PPxyDn - PPxyDn * FF for Dn = Dmin or Dmax
1338
1339 PPxyDn-1 = PPxyDn-1 + PPxyDn * FF
1340 PPxyDn+1 = PPxyDn+1 + PPxyDn * FF
1341
1342 In both fuzzification schemes, a value of 0 for FF implies no fuzzification of occurrence counts.
1343 A value of 1 during I<FuzzyBinning> corresponds to maximum fuzzification of occurrence counts;
1344 however, a value of 1 during I<FuzzyBinSmoothing> ends up completely distributing the value over
1345 the previous and next distance bins.
1346
1347 So for default value of B<FuzzFactor> (FF) 0.15, the occurrence count of pharmacohore atom pairs
1348 at distance Dn during FuzzyBinning is left unchanged and the counts at distances Dn -1 and Dn + 1
1349 are incremented by PPxyDn * 0.15.
1350
1351 And during I<FuzzyBinSmoothing> the occurrence counts at Distance Dn is scaled back using multiplicative
1352 factor of (1 - 2*0.15) and the occurrence counts at distances Dn -1 and Dn + 1 are incremented by
1353 PPxyDn * 0.15. In other words, occurrence bin count is smoothed out by distributing it over the
1354 previous and next distance value.
1355
1356 =item B<SetFuzzificationMode>
1357
1358 $TopologicalPharmacophoreAtomPairsFP->SetFuzzificationMode($Value);
1359
1360 Sets fuzzification mode to use for fuzzification of atom pairs count and returns
1361 I<TopologicalPharmacophoreAtomPairsFP>. Default value: I<AfterNormalization>. Possible values:
1362 I<BeforeNormalization | AfterNormalization>.
1363
1364 =item B<SetMaxDistance>
1365
1366 $TopologicalPharmacophoreAtomPairsFP->SetMaxDistance($Value);
1367
1368 Sets maximum bond distance between atom pairs for generating topological pharmacophore atom
1369 pairs fingerprints and returns I<TopologicalPharmacophoreAtomPairsFP>.
1370
1371 =item B<SetMinDistance>
1372
1373 $TopologicalPharmacophoreAtomPairsFP->SetMinDistance($Value);
1374
1375 Sets minimum bond distance between atom pairs for generating topological pharmacophore atom
1376 pairs fingerprints and returns I<TopologicalPharmacophoreAtomPairsFP>.
1377
1378 =item B<SetNormalizationMethodology>
1379
1380 $TopologicalPharmacophoreAtomPairsFP->SetNormalizationMethodology($Value);
1381
1382 Sets normalization methodology to use for scaling the occurrence count of pharmacophore atom
1383 pairs within specified distance range and returns I<TopologicalPharmacophoreAtomPairsFP>.
1384 Default value: I<None>. Possible values: I<None, ByHeavyAtomsCount or ByAtomTypesCount>.
1385
1386 =item B<SetValuesPrecision>
1387
1388 $TopologicalPharmacophoreAtomPairsFP->SetValuesPrecision($Value);
1389
1390 Sets precision of atom pairs count real values which might be generated after normalization
1391 or fuzzification and returns I<TopologicalPharmacophoreAtomPairsFP>. Default: up to I<2> decimal
1392 places.
1393
1394 =item B<StringifyTopologicalPharmacophoreAtomPairsFingerprints>
1395
1396 $String = $TopologicalPharmacophoreAtomPairsFP->
1397 StringifyTopologicalPharmacophoreAtomPairsFingerprints();
1398
1399 Returns a string containing information about I<TopologicalPharmacophoreAtomPairsFingerprints> object.
1400
1401 =back
1402
1403 =head1 AUTHOR
1404
1405 Manish Sud <msud@san.rr.com>
1406
1407 =head1 SEE ALSO
1408
1409 Fingerprints.pm, FingerprintsStringUtil.pm, AtomNeighborhoodsFingerprints.pm,
1410 AtomTypesFingerprints.pm, EStateIndiciesFingerprints.pm, ExtendedConnectivityFingerprints.pm,
1411 MACCSKeys.pm, PathLengthFingerprints.pm, TopologicalAtomPairsFingerprints.pm,
1412 TopologicalAtomTripletsFingerprints.pm, TopologicalAtomTorsionsFingerprints.pm,
1413 TopologicalPharmacophoreAtomTripletsFingerprints.pm
1414
1415 =head1 COPYRIGHT
1416
1417 Copyright (C) 2015 Manish Sud. All rights reserved.
1418
1419 This file is part of MayaChemTools.
1420
1421 MayaChemTools is free software; you can redistribute it and/or modify it under
1422 the terms of the GNU Lesser General Public License as published by the Free
1423 Software Foundation; either version 3 of the License, or (at your option)
1424 any later version.
1425
1426 =cut