0
|
1 package MolecularDescriptors::MolecularComplexityDescriptors;
|
|
2 #
|
|
3 # $RCSfile: MolecularComplexityDescriptors.pm,v $
|
|
4 # $Date: 2015/02/28 20:49:20 $
|
|
5 # $Revision: 1.15 $
|
|
6 #
|
|
7 # Author: Manish Sud <msud@san.rr.com>
|
|
8 #
|
|
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
|
|
10 #
|
|
11 # This file is part of MayaChemTools.
|
|
12 #
|
|
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
|
|
14 # the terms of the GNU Lesser General Public License as published by the Free
|
|
15 # Software Foundation; either version 3 of the License, or (at your option) any
|
|
16 # later version.
|
|
17 #
|
|
18 # MayaChemTools is distributed in the hope that it will be useful, but without
|
|
19 # any warranty; without even the implied warranty of merchantability of fitness
|
|
20 # for a particular purpose. See the GNU Lesser General Public License for more
|
|
21 # details.
|
|
22 #
|
|
23 # You should have received a copy of the GNU Lesser General Public License
|
|
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
|
|
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
|
|
26 # Boston, MA, 02111-1307, USA.
|
|
27 #
|
|
28
|
|
29 use strict;
|
|
30 use Carp;
|
|
31 use Exporter;
|
|
32 use Scalar::Util ();
|
|
33 use TextUtil ();
|
|
34 use MathUtil ();
|
|
35 use Atom;
|
|
36 use Molecule;
|
|
37 use MolecularDescriptors::MolecularDescriptors;
|
|
38 use AtomTypes::AtomicInvariantsAtomTypes;
|
|
39 use AtomTypes::FunctionalClassAtomTypes;
|
|
40 use Fingerprints::AtomTypesFingerprints;
|
|
41 use Fingerprints::ExtendedConnectivityFingerprints;
|
|
42 use Fingerprints::MACCSKeys;
|
|
43 use Fingerprints::PathLengthFingerprints;
|
|
44 use Fingerprints::TopologicalAtomPairsFingerprints;
|
|
45 use Fingerprints::TopologicalAtomTripletsFingerprints;
|
|
46 use Fingerprints::TopologicalAtomTorsionsFingerprints;
|
|
47 use Fingerprints::TopologicalPharmacophoreAtomPairsFingerprints;
|
|
48 use Fingerprints::TopologicalPharmacophoreAtomTripletsFingerprints;
|
|
49
|
|
50 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
|
|
51
|
|
52 @ISA = qw(MolecularDescriptors::MolecularDescriptors Exporter);
|
|
53 @EXPORT = qw();
|
|
54 @EXPORT_OK = qw(GetDescriptorNames GetMolecularComplexityTypeAbbreviation);
|
|
55
|
|
56 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]);
|
|
57
|
|
58 # Setup class variables...
|
|
59 my($ClassName, @DescriptorNames);
|
|
60 _InitializeClass();
|
|
61
|
|
62 # Overload Perl functions...
|
|
63 use overload '""' => 'StringifyMolecularComplexityDescriptors';
|
|
64
|
|
65 # Class constructor...
|
|
66 sub new {
|
|
67 my($Class, %NamesAndValues) = @_;
|
|
68
|
|
69 # Initialize object...
|
|
70 my $This = $Class->SUPER::new();
|
|
71 bless $This, ref($Class) || $Class;
|
|
72 $This->_InitializeMolecularComplexityDescriptors();
|
|
73
|
|
74 $This->_InitializeMolecularComplexityDescriptorsProperties(%NamesAndValues);
|
|
75
|
|
76 return $This;
|
|
77 }
|
|
78
|
|
79 # Initialize class ...
|
|
80 sub _InitializeClass {
|
|
81 #Class name...
|
|
82 $ClassName = __PACKAGE__;
|
|
83
|
|
84 # Descriptor names...
|
|
85 @DescriptorNames = ('MolecularComplexity');
|
|
86
|
|
87 }
|
|
88
|
|
89 # Get descriptor names as an array.
|
|
90 #
|
|
91 # This functionality can be either invoked as a class function or an
|
|
92 # object method.
|
|
93 #
|
|
94 sub GetDescriptorNames {
|
|
95 return @DescriptorNames;
|
|
96 }
|
|
97
|
|
98 # Initialize object data...
|
|
99 #
|
|
100 sub _InitializeMolecularComplexityDescriptors {
|
|
101 my($This) = @_;
|
|
102
|
|
103 # Type of MolecularDescriptor...
|
|
104 $This->{Type} = 'MolecularComplexity';
|
|
105
|
|
106 #
|
|
107 # The current release of MayaChemTools supports calculation of molecular complexity
|
|
108 # corresponding to number of bits-set or unique keys [ Ref 117-119 ] in molecular
|
|
109 # fingerprints. The following types of fingerprints based molecular complexity measures
|
|
110 # are supported:
|
|
111 #
|
|
112 # AtomTypesFingerprints
|
|
113 # ExtendedConnectivityFingerprints
|
|
114 # MACCSKeys
|
|
115 # PathLengthFingerprints
|
|
116 # TopologicalAtomPairsFingerprints
|
|
117 # TopologicalAtomTripletsFingerprints
|
|
118 # TopologicalAtomTorsionsFingerprints
|
|
119 # TopologicalPharmacophoreAtomPairsFingerprints
|
|
120 # TopologicalPharmacophoreAtomTripletsFingerprints
|
|
121 #
|
|
122 # Default: MACCSKeys
|
|
123 #
|
|
124 $This->{MolecularComplexityType} = '';
|
|
125
|
|
126 # Atom types to use for generating fingerprints...
|
|
127 #
|
|
128 # Currently supported values are: AtomicInvariantsAtomTypes, DREIDINGAtomTypes,
|
|
129 # EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes,
|
|
130 # SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes
|
|
131 #
|
|
132 # Notes:
|
|
133 # . AtomicInvariantsAtomTypes for all supported MolecularComplexityType except for
|
|
134 # TopologicalPharmacophoreAtomPairsFingerprints and TopologicalPharmacophoreAtomTripletsFingerprints
|
|
135 # . This value is not used for MACCSKeys
|
|
136 # . FunctionalClassAtomTypes is the only valid value during topological pharmacophore fingerprints.
|
|
137 #
|
|
138 # . Default values for AtomicInvariantsToUse and FunctionalClassesToUse are set appropriately
|
|
139 # for different types of fingerprints as shown below.
|
|
140 #
|
|
141 # MolecularComplexityType AtomicInvariantsToUse
|
|
142 #
|
|
143 # AtomTypesFingerprints AS, X, BO, H, FC
|
|
144 # TopologicalAtomPairsFingerprints AS, X, BO, H, FC
|
|
145 # TopologicalAtomTripletsFingerprints AS, X, BO, H, FC
|
|
146 # TopologicalAtomTorsionsFingerprints AS, X, BO, H, FC
|
|
147 #
|
|
148 # ExtendedConnectivityFingerprints AS, X, BO, H, FC, MN
|
|
149 # PathLengthFingerprints AS
|
|
150 #
|
|
151 # Default for FunctionalClassesToUse for all fingerprints is set to:
|
|
152 #
|
|
153 # HBD, HBA, PI, NI, Ar, Hal
|
|
154 #
|
|
155 # except for the following two MolecularComplexityType fingerprints:
|
|
156 #
|
|
157 # TopologicalPharmacophoreAtomPairsFingerprints HBD, HBA, PI, NI, H
|
|
158 # TopologicalPharmacophoreAtomTripletsFingerprints HBD, HBA, PI, NI, H, Ar
|
|
159 #
|
|
160 $This->{AtomIdentifierType} = '';
|
|
161
|
|
162 # Size of MACCS key set: 166 or 322...
|
|
163 #
|
|
164 $This->{MACCSKeysSize} = 166;
|
|
165
|
|
166 # Atomic neighborhoods radius for extended connectivity fingerprints...
|
|
167 $This->{NeighborhoodRadius} = 2;
|
|
168
|
|
169 # Minimum and maximum path lengths to use for path length fingerprints...
|
|
170 $This->{MinPathLength} = 1;
|
|
171 $This->{MaxPathLength} = 8;
|
|
172
|
|
173 # By default bond symbols are included in atom path strings used to generate path length
|
|
174 # fingerprints... ...
|
|
175 $This->{UseBondSymbols} = 1;
|
|
176
|
|
177 # Minimum and maximum bond distance between atom pairs during topological
|
|
178 # atom pairs/triplets fingerprints...
|
|
179 $This->{MinDistance} = 1;
|
|
180 $This->{MaxDistance} = 10;
|
|
181
|
|
182 # Determines whether to apply triangle inequality to distance triplets...
|
|
183 #
|
|
184 # Default for TopologicalAtomTripletsFingerprints: 0
|
|
185 # Default for TopologicalPharmacophoreAtomTripletsFingerprints: 1
|
|
186 #
|
|
187 $This->{UseTriangleInequality} = '';
|
|
188
|
|
189 # Distance bin size used for binning distances during generation of
|
|
190 # topological pharmacophore atom triplets fingerprints...
|
|
191 #
|
|
192 $This->{DistanceBinSize} = 2;
|
|
193
|
|
194 # Normalization methodology to use for scaling the number of bits-set or unique keys
|
|
195 # for:
|
|
196 #
|
|
197 # ExtendedConnectivityFingerprints
|
|
198 # TopologicalPharmacophoreAtomPairsFingerprints
|
|
199 # TopologicalPharmacophoreAtomTripletsFingerprints
|
|
200 #
|
|
201 # This option is gnored for all other types of fingerprints.
|
|
202 #
|
|
203 # Possible values during extended connectivity fingerprints: None or ByHeavyAtomsCount. Default:
|
|
204 # None.
|
|
205 #
|
|
206 # Possible values during topological pharmacophore atom pairs and tripletes fingerprints: None,
|
|
207 # or ByPossibleKeysCount. Default: None. ByPossibleKeysCount corresponds to total number of
|
|
208 # possible topological pharmacophore atom pairs or triplets in a molecule.
|
|
209 #
|
|
210 #
|
|
211 $This->{NormalizationMethodology} = 'None';
|
|
212
|
|
213 # Intialize descriptor names and values...
|
|
214 $This->_InitializeDescriptorNamesAndValues(@DescriptorNames);
|
|
215
|
|
216 return $This;
|
|
217 }
|
|
218
|
|
219 # Initialize object properties...
|
|
220 #
|
|
221 sub _InitializeMolecularComplexityDescriptorsProperties {
|
|
222 my($This, %NamesAndValues) = @_;
|
|
223
|
|
224 my($Name, $Value, $MethodName);
|
|
225 while (($Name, $Value) = each %NamesAndValues) {
|
|
226 $MethodName = "Set${Name}";
|
|
227 $This->$MethodName($Value);
|
|
228 }
|
|
229
|
|
230 # Make sure MolecularComplexityType is set...
|
|
231 if (!exists $NamesAndValues{MolecularComplexityType}) {
|
|
232 $This->{MolecularComplexityType} = 'MACCSKeys';
|
|
233 }
|
|
234
|
|
235 # Make sure AtomIdentifierType is set...
|
|
236 if ($This->{MolecularComplexityType} !~ /^MACCSKeys$/i) {
|
|
237 if (!exists $NamesAndValues{AtomIdentifierType}) {
|
|
238 $This->_InitializeAtomIdentifierType();
|
|
239 }
|
|
240 }
|
|
241
|
|
242 # Make sure UseTriangleInequality is set...
|
|
243 if ($This->{MolecularComplexityType} =~ /^(TopologicalAtomTripletsFingerprints|TopologicalPharmacophoreAtomTripletsFingerprints)$/i) {
|
|
244 if (!exists $NamesAndValues{UseTriangleInequality}) {
|
|
245 $This->{UseTriangleInequality} = ($This->{MolecularComplexityType} =~ /^TopologicalPharmacophoreAtomTripletsFingerprints$/i) ? 1 : 0;
|
|
246 }
|
|
247 }
|
|
248
|
|
249 return $This;
|
|
250 }
|
|
251
|
|
252 # Initialize atom identifer type...
|
|
253 #
|
|
254 sub _InitializeAtomIdentifierType {
|
|
255 my($This) = @_;
|
|
256 my($AtomIdentifierType);
|
|
257
|
|
258 if ($This->{MolecularComplexityType} =~ /^MACCSKeys$/i) {
|
|
259 return $This;
|
|
260 }
|
|
261
|
|
262 $AtomIdentifierType = ($This->{MolecularComplexityType} =~ /^(TopologicalPharmacophoreAtomPairsFingerprints|TopologicalPharmacophoreAtomTripletsFingerprints)$/i) ? 'FunctionalClassAtomTypes' : 'AtomicInvariantsAtomTypes';
|
|
263
|
|
264 $This->SetAtomIdentifierType($AtomIdentifierType);
|
|
265
|
|
266 return $This;
|
|
267 }
|
|
268
|
|
269 # Get abbreviation for specified molecular complexity type or using descriptors object...
|
|
270 #
|
|
271 # This functionality can be either invoked as a class function or an
|
|
272 # object method.
|
|
273 #
|
|
274 sub GetMolecularComplexityTypeAbbreviation {
|
|
275 my($FirstParameter) = @_;
|
|
276 my($This, $ComplexityType, %ComplexityTypeToAbbrev);
|
|
277
|
|
278 if (_IsMolecularComplexityDescriptors($FirstParameter)) {
|
|
279 $This = $FirstParameter;
|
|
280 $ComplexityType = $This->{MolecularComplexityType};
|
|
281 }
|
|
282 else {
|
|
283 $ComplexityType = $FirstParameter;
|
|
284 }
|
|
285
|
|
286 %ComplexityTypeToAbbrev = (lc 'AtomTypesFingerprints' => 'ATFP', lc 'ExtendedConnectivityFingerprints' => 'ECFP',
|
|
287 lc 'MACCSKeys' => 'MACCSKeys', lc 'PathLengthFingerprints' => 'PLFP',
|
|
288 lc 'TopologicalAtomPairsFingerprints' => 'TAPFP', lc 'TopologicalAtomTripletsFingerprints' => 'TATFP',
|
|
289 lc 'TopologicalAtomTorsionsFingerprints' => 'TATFP',
|
|
290 lc 'TopologicalPharmacophoreAtomPairsFingerprints' => 'TPAPFP',
|
|
291 lc 'TopologicalPharmacophoreAtomTripletsFingerprints' => 'TPATFP');
|
|
292
|
|
293 return exists $ComplexityTypeToAbbrev{lc $ComplexityType} ? $ComplexityTypeToAbbrev{lc $ComplexityType} : '';
|
|
294 }
|
|
295
|
|
296 # Set MACCS key set size...
|
|
297 #
|
|
298 sub SetMACCSKeysSize {
|
|
299 my($This, $Value) = @_;
|
|
300
|
|
301 if (!TextUtil::IsPositiveInteger($Value)) {
|
|
302 croak "Error: ${ClassName}->SetMACCSKeysSize: Size value, $Value, is not valid: It must be a positive integer...";
|
|
303 }
|
|
304 if ($Value !~ /^(166|322)/i) {
|
|
305 croak "Error: ${ClassName}->SetMACCSKeysSize: The current release of MayaChemTools doesn't support MDL MACCS $Value keys...";
|
|
306 }
|
|
307 $This->{MACCSKeysSize} = $Value;
|
|
308
|
|
309 return $This;
|
|
310 }
|
|
311
|
|
312 # Set minimum path length...
|
|
313 #
|
|
314 sub SetMinPathLength {
|
|
315 my($This, $Value) = @_;
|
|
316
|
|
317 if (!TextUtil::IsPositiveInteger($Value)) {
|
|
318 croak "Error: ${ClassName}->SetMinPathLength: MinPathLength value, $Value, is not valid: It must be a positive integer...";
|
|
319 }
|
|
320 $This->{MinPathLength} = $Value;
|
|
321
|
|
322 return $This;
|
|
323 }
|
|
324
|
|
325 # Set maximum path length...
|
|
326 #
|
|
327 sub SetMaxPathLength {
|
|
328 my($This, $Value) = @_;
|
|
329
|
|
330 if (!TextUtil::IsPositiveInteger($Value)) {
|
|
331 croak "Error: ${ClassName}->SetMaxPathLength: MaxPathLength value, $Value, is not valid: It must be a positive integer...";
|
|
332 }
|
|
333 $This->{MaxPathLength} = $Value;
|
|
334
|
|
335 return $This;
|
|
336 }
|
|
337
|
|
338 # Set minimum bond distance between atom pairs during topological and topological
|
|
339 # pharmacophore atom pairs/triplets fingerprints...
|
|
340 #
|
|
341 sub SetMinDistance {
|
|
342 my($This, $Value) = @_;
|
|
343
|
|
344 if (!TextUtil::IsPositiveInteger($Value)) {
|
|
345 croak "Error: ${ClassName}->SetMinDistance: MinDistance value, $Value, is not valid: It must be a positive integer...";
|
|
346 }
|
|
347 $This->{MinDistance} = $Value;
|
|
348
|
|
349 return $This;
|
|
350 }
|
|
351
|
|
352 # Set maximum bond distance between atom pairs during topological and topological
|
|
353 # pharmacophore atom pairs/triplets fingerprints...
|
|
354 #
|
|
355 sub SetMaxDistance {
|
|
356 my($This, $Value) = @_;
|
|
357
|
|
358 if (!TextUtil::IsPositiveInteger($Value)) {
|
|
359 croak "Error: ${ClassName}->SetMaxDistance: MaxDistance value, $Value, is not valid: It must be a positive integer...";
|
|
360 }
|
|
361 $This->{MaxDistance} = $Value;
|
|
362
|
|
363 return $This;
|
|
364 }
|
|
365
|
|
366 # Set atom neighborhood radius...
|
|
367 #
|
|
368 sub SetNeighborhoodRadius {
|
|
369 my($This, $Value) = @_;
|
|
370
|
|
371 if (!TextUtil::IsInteger($Value)) {
|
|
372 croak "Error: ${ClassName}->SetNeighborhoodRadius: NeighborhoodRadius value, $Value, is not valid: It must be an integer...";
|
|
373 }
|
|
374
|
|
375 if ($Value < 0 ) {
|
|
376 croak "Error: ${ClassName}->SetNeighborhoodRadius: NeighborhoodRadius value, $Value, is not valid: It must be >= 0...";
|
|
377 }
|
|
378 $This->{NeighborhoodRadius} = $Value;
|
|
379
|
|
380 return $This;
|
|
381 }
|
|
382
|
|
383 # Set molecular complexity type...
|
|
384 #
|
|
385 sub SetMolecularComplexityType {
|
|
386 my($This, $Value) = @_;
|
|
387
|
|
388 if ($Value !~ /^(AtomTypesFingerprints|ExtendedConnectivityFingerprints|MACCSKeys|PathLengthFingerprints|TopologicalAtomPairsFingerprints|TopologicalAtomTripletsFingerprints|TopologicalAtomTorsionsFingerprints|TopologicalPharmacophoreAtomPairsFingerprints|TopologicalPharmacophoreAtomTripletsFingerprints)$/i) {
|
|
389 croak "Error: ${ClassName}->SetMolecularComplexityType: MolecularComplexityType value, $Value, is not valid. Supported values: AtomTypesFingerprints, ExtendedConnectivityFingerprints, MACCSKeys, PathLengthFingerprints, TopologicalAtomPairsFingerprints, TopologicalAtomTripletsFingerprints, TopologicalAtomTorsionsFingerprints, TopologicalPharmacophoreAtomPairsFingerprints, or TopologicalPharmacophoreAtomTripletsFingerprints...";
|
|
390 }
|
|
391
|
|
392 $This->{MolecularComplexityType} = $Value;
|
|
393
|
|
394 return $This;
|
|
395 }
|
|
396
|
|
397 # Set distance bin size for binning pharmacophore atom pair distances in atom triplets...
|
|
398 #
|
|
399 sub SetDistanceBinSize {
|
|
400 my($This, $Value) = @_;
|
|
401
|
|
402 if (!TextUtil::IsPositiveInteger($Value)) {
|
|
403 croak "Error: ${ClassName}->SetDistanceBinSize: DistanceBinSize value, $Value, is not valid: It must be a positive integer...";
|
|
404 }
|
|
405 $This->{DistanceBinSize} = $Value;
|
|
406
|
|
407 return $This;
|
|
408 }
|
|
409
|
|
410 # Set normalization methodology to use for scaling the number of bits-set or unique keys
|
|
411 # in fingerprints...
|
|
412 #
|
|
413 sub SetNormalizationMethodology {
|
|
414 my($This, $Value) = @_;
|
|
415
|
|
416 if ($Value !~ /^(ByHeavyAtomsCount|ByPossibleKeysCount|None)$/i) {
|
|
417 croak "Error: ${ClassName}->SetNormalizationMethodology: NormalizationMethodology value, $Value, is not valid. Supported values: None, ByHeavyAtomsCount or ByPossibleKeysCount...";
|
|
418 }
|
|
419
|
|
420 if ($This->{MolecularComplexityType}) {
|
|
421 if ($This->{MolecularComplexityType} !~ /^(ExtendedConnectivityFingerprints|TopologicalPharmacophoreAtomPairsFingerprints|TopologicalPharmacophoreAtomTripletsFingerprints)$/i) {
|
|
422 croak "Error: ${ClassName}->SetNormalizationMethodology: Normalization is not supported for MolecularComplexityType: $This->{MolecularComplexityType}. Valid MolecularComplexityType values: ExtendedConnectivityFingerprints, TopologicalPharmacophoreAtomPairsFingerprints, or TopologicalPharmacophoreAtomTripletsFingerprints...\n";
|
|
423 }
|
|
424
|
|
425 if ($This->{MolecularComplexityType} =~ /^ExtendedConnectivityFingerprints$/i && $Value !~ /^(ByHeavyAtomsCount|None)$/i) {
|
|
426 croak "Error: ${ClassName}->SetNormalizationMethodology: NormalizationMethodology value, $Value, is not valid for MolecularComplexityType: $This->{MolecularComplexityType}. Supported values: None or ByHeavyAtomsCount...";
|
|
427 }
|
|
428
|
|
429 if ($This->{MolecularComplexityType} =~ /^(TopologicalPharmacophoreAtomPairsFingerprints|TopologicalPharmacophoreAtomTripletsFingerprints)$/i && $Value !~ /^(ByPossibleKeysCount|None)$/i) {
|
|
430 croak "Error: ${ClassName}->SetNormalizationMethodology: NormalizationMethodology value, $Value, is not valid for MolecularComplexityType: $This->{MolecularComplexityType}. Supported values: None or ByPossibleKeysCount...";
|
|
431 }
|
|
432 }
|
|
433
|
|
434 $This->{NormalizationMethodology} = $Value;
|
|
435
|
|
436 return $This;
|
|
437 }
|
|
438
|
|
439 # Set intial atom identifier type..
|
|
440 #
|
|
441 sub SetAtomIdentifierType {
|
|
442 my($This, $IdentifierType) = @_;
|
|
443
|
|
444 if ($IdentifierType !~ /^(AtomicInvariantsAtomTypes|FunctionalClassAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
|
|
445 croak "Error: ${ClassName}->SetAtomIdentifierType: Specified value, $IdentifierType, for AtomIdentifierType is not vaild. Supported types in current release of MayaChemTools: AtomicInvariantsAtomTypes, FunctionalClassAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes and UFFAtomTypes.";
|
|
446 }
|
|
447
|
|
448 # FunctionalClassAtomTypes is the only valid atom identifier type for pharmacophore fingerprints...
|
|
449 if ($This->{MolecularComplexityType} =~ /^(TopologicalPharmacophoreAtomPairsFingerprints|TopologicalPharmacophoreAtomTripletsFingerprints)$/i) {
|
|
450 if ($IdentifierType !~ /^FunctionalClassAtomTypes$/i) {
|
|
451 croak "Error: ${ClassName}->SetAtomIdentifierType: Specified value, $IdentifierType, for AtomIdentifierType is not vaild. Supported type for $This->{MolecularComplexityType} complexity type: FunctionalClassAtomTypes.";
|
|
452 }
|
|
453 }
|
|
454
|
|
455 if ($This->{AtomIdentifierType}) {
|
|
456 croak "Error: ${ClassName}->SetAtomIdentifierType: Can't change intial atom identifier type: It's already set...";
|
|
457 }
|
|
458
|
|
459 $This->{AtomIdentifierType} = $IdentifierType;
|
|
460
|
|
461 # Initialize identifier type information...
|
|
462 $This->_InitializeAtomIdentifierTypeInformation();
|
|
463
|
|
464 return $This;
|
|
465 }
|
|
466
|
|
467 # Calculate molecular complexity [ Ref 117-119 ] of a molecule using its fingerprints.
|
|
468 #
|
|
469 # The current release of MayaChemTools supports calculation of molecular complexity
|
|
470 # corresponding to the number of bits-set or unique keys in molecular fingerprints. The
|
|
471 # following types of fingerprints based molecular complexity measures are supported:
|
|
472 #
|
|
473 # AtomTypesFingerprints
|
|
474 # ExtendedConnectivityFingerprints
|
|
475 # MACCSKeys
|
|
476 # PathLengthFingerprints
|
|
477 # TopologicalAtomPairsFingerprints
|
|
478 # TopologicalAtomTripletsFingerprints
|
|
479 # TopologicalAtomTorsionsFingerprints
|
|
480 # TopologicalPharmacophoreAtomPairsFingerprints
|
|
481 # TopologicalPharmacophoreAtomTripletsFingerprints
|
|
482 #
|
|
483 # After the molecular complexity value has been calculated, it can also be normalized by
|
|
484 # by scaling the number of bits-set or unique keys for following types of fingerprints:
|
|
485 #
|
|
486 # ExtendedConnectivityFingerprints
|
|
487 # TopologicalPharmacophoreAtomPairsFingerprints
|
|
488 # TopologicalPharmacophoreAtomTripletsFingerprints
|
|
489 #
|
|
490 # Two types of normalization methodologies are supported: by heavy atoms count for
|
|
491 # extended connectivity fingerprints; by possible keys count for topological pharmacophore
|
|
492 # atom pairs and triplets fingerprints.
|
|
493 #
|
|
494 sub GenerateDescriptors {
|
|
495 my($This) = @_;
|
|
496
|
|
497 # Initialize descriptor values...
|
|
498 $This->_InitializeDescriptorValues();
|
|
499
|
|
500 # Check availability of molecule...
|
|
501 if (!$This->{Molecule}) {
|
|
502 carp "Warning: ${ClassName}->GenerateDescriptors: $This->{Type} molecular descriptors generation didn't succeed: Molecule data is not available: Molecule object hasn't been set...";
|
|
503 return undef;
|
|
504 }
|
|
505
|
|
506 # Calculate descriptor values...
|
|
507 if (!$This->_CalculateDescriptorValues()) {
|
|
508 carp "Warning: ${ClassName}->GenerateDescriptors: $This->{Type} molecular descriptors generation didn't succeed: Couldn't calculate MolecularComplexity values corresponding to assigned MolecularComplexity atom types...";
|
|
509 return undef;
|
|
510 }
|
|
511
|
|
512 # Set final descriptor values...
|
|
513 $This->_SetFinalDescriptorValues();
|
|
514
|
|
515 return $This;
|
|
516 }
|
|
517
|
|
518 # Calculate molecular complexity value...
|
|
519 #
|
|
520 sub _CalculateDescriptorValues {
|
|
521 my($This) = @_;
|
|
522 my($FingerprintsObject, $MethodName);
|
|
523
|
|
524 # Setup fingerprints object and generate fingerprints...
|
|
525 $MethodName = "_Setup" . $This->{MolecularComplexityType};
|
|
526 $FingerprintsObject = $This->$MethodName();
|
|
527
|
|
528 $FingerprintsObject->GenerateFingerprints();
|
|
529
|
|
530 # Make sure atom types fingerprints generation is successful...
|
|
531 if (!$FingerprintsObject->IsFingerprintsGenerationSuccessful()) {
|
|
532 return undef;
|
|
533 }
|
|
534
|
|
535 if (!$This->_CalculateMolecularComplexity($FingerprintsObject)) {
|
|
536 return undef;
|
|
537 }
|
|
538
|
|
539 # Normalize molecular complexity...
|
|
540 if ($This->{NormalizationMethodology} !~ /^None$/i) {
|
|
541 if (!$This->_NormalizeMolecularComplexity($FingerprintsObject)) {
|
|
542 return undef;
|
|
543 }
|
|
544 }
|
|
545
|
|
546 return $This;
|
|
547 }
|
|
548
|
|
549 # Setup atom types fingerprints...
|
|
550 #
|
|
551 sub _SetupAtomTypesFingerprints {
|
|
552 my($This) = @_;
|
|
553 my($FingerprintsObject);
|
|
554
|
|
555 $FingerprintsObject = new Fingerprints::AtomTypesFingerprints('Molecule' => $This->{Molecule}, 'Type' => 'AtomTypesCount', 'AtomIdentifierType' => $This->{AtomIdentifierType}, 'IgnoreHydrogens' => 1);
|
|
556 $This->_SetAtomIdentifierTypeValuesToUse($FingerprintsObject);
|
|
557
|
|
558 return $FingerprintsObject;
|
|
559 }
|
|
560
|
|
561 # Setup extended connectivity fingerprints...
|
|
562 #
|
|
563 sub _SetupExtendedConnectivityFingerprints {
|
|
564 my($This) = @_;
|
|
565 my($FingerprintsObject);
|
|
566
|
|
567 $FingerprintsObject = new Fingerprints::ExtendedConnectivityFingerprints('Molecule' => $This->{Molecule}, 'Type' => 'ExtendedConnectivity', 'NeighborhoodRadius' => $This->{NeighborhoodRadius}, 'AtomIdentifierType' => $This->{AtomIdentifierType});
|
|
568 $This->_SetAtomIdentifierTypeValuesToUse($FingerprintsObject);
|
|
569
|
|
570 return $FingerprintsObject;
|
|
571 }
|
|
572
|
|
573 # Setup MACCS keys...
|
|
574 #
|
|
575 sub _SetupMACCSKeys {
|
|
576 my($This) = @_;
|
|
577 my($FingerprintsObject);
|
|
578
|
|
579 $FingerprintsObject = new Fingerprints::MACCSKeys('Molecule' => $This->{Molecule}, 'Type' => 'MACCSKeyBits', 'Size' => $This->{MACCSKeysSize});
|
|
580
|
|
581 return $FingerprintsObject;
|
|
582 }
|
|
583
|
|
584 # Set up path length fingerprints...
|
|
585 #
|
|
586 sub _SetupPathLengthFingerprints {
|
|
587 my($This) = @_;
|
|
588 my($FingerprintsObject);
|
|
589
|
|
590 $FingerprintsObject = new Fingerprints::PathLengthFingerprints('Molecule' => $This->{Molecule}, 'Type' => 'PathLengthCount', 'AtomIdentifierType' => $This->{AtomIdentifierType}, 'MinLength' => $This->{MinPathLength}, 'MaxLength' => $This->{MaxPathLength}, 'AllowRings' => 1, 'AllowSharedBonds' => 1, 'UseBondSymbols' => $This->{UseBondSymbols}, 'UseUniquePaths' => 1);
|
|
591 $This->_SetAtomIdentifierTypeValuesToUse($FingerprintsObject);
|
|
592
|
|
593 return $FingerprintsObject;
|
|
594 }
|
|
595
|
|
596 # Setup topological atom pairs fingerprints...
|
|
597 #
|
|
598 sub _SetupTopologicalAtomPairsFingerprints {
|
|
599 my($This) = @_;
|
|
600 my($FingerprintsObject);
|
|
601
|
|
602 $FingerprintsObject = new Fingerprints::TopologicalAtomPairsFingerprints('Molecule' => $This->{Molecule}, 'MinDistance' => $This->{MinDistance}, 'MaxDistance' => $This->{MaxDistance}, 'AtomIdentifierType' => $This->{AtomIdentifierType});
|
|
603 $This->_SetAtomIdentifierTypeValuesToUse($FingerprintsObject);
|
|
604
|
|
605 return $FingerprintsObject;
|
|
606 }
|
|
607
|
|
608 # Setup topological atom triplets fingerprints...
|
|
609 #
|
|
610 sub _SetupTopologicalAtomTripletsFingerprints {
|
|
611 my($This) = @_;
|
|
612 my($FingerprintsObject);
|
|
613
|
|
614 $FingerprintsObject = new Fingerprints::TopologicalAtomTripletsFingerprints('Molecule' => $This->{Molecule}, 'MinDistance' => $This->{MinDistance}, 'MaxDistance' => $This->{MaxDistance}, 'UseTriangleInequality' => $This->{UseTriangleInequality}, 'AtomIdentifierType' => $This->{AtomIdentifierType});
|
|
615 $This->_SetAtomIdentifierTypeValuesToUse($FingerprintsObject);
|
|
616
|
|
617 return $FingerprintsObject;
|
|
618 }
|
|
619
|
|
620 # Setup topological atom torsions fingerprints...
|
|
621 #
|
|
622 sub _SetupTopologicalAtomTorsionsFingerprints {
|
|
623 my($This) = @_;
|
|
624 my($FingerprintsObject);
|
|
625
|
|
626 $FingerprintsObject = new Fingerprints::TopologicalAtomTorsionsFingerprints('Molecule' => $This->{Molecule}, 'AtomIdentifierType' => $This->{AtomIdentifierType});
|
|
627
|
|
628 $This->_SetAtomIdentifierTypeValuesToUse($FingerprintsObject);
|
|
629
|
|
630 return $FingerprintsObject;
|
|
631 }
|
|
632
|
|
633 # Setup TopologicalPharmacophoreAtomPairsFingerprints...
|
|
634 #
|
|
635 sub _SetupTopologicalPharmacophoreAtomPairsFingerprints {
|
|
636 my($This) = @_;
|
|
637 my($FingerprintsObject, $AtomPairsSetSizeToUse);
|
|
638
|
|
639 # Use fixed size to get total number of possible keys for normalization...
|
|
640 $AtomPairsSetSizeToUse = ($This->{NormalizationMethodology} =~ /^ByPossibleKeysCount$/i) ? 'FixedSize' : 'ArbitrarySize';
|
|
641
|
|
642 $FingerprintsObject = new Fingerprints::TopologicalPharmacophoreAtomPairsFingerprints('Molecule' => $This->{Molecule}, 'AtomPairsSetSizeToUse' => $AtomPairsSetSizeToUse, 'MinDistance' => $This->{MinDistance}, 'MaxDistance' => $This->{MaxDistance}, 'AtomTypesToUse' => \@{$This->{FunctionalClassesToUse}}, 'NormalizationMethodology' => 'None', 'ValuesPrecision' => 2);
|
|
643
|
|
644 return $FingerprintsObject;
|
|
645 }
|
|
646
|
|
647 # Setup TopologicalPharmacophoreAtomTripletsFingerprints...
|
|
648 #
|
|
649 sub _SetupTopologicalPharmacophoreAtomTripletsFingerprints {
|
|
650 my($This) = @_;
|
|
651 my($FingerprintsObject, $AtomTripletsSetSizeToUse);
|
|
652
|
|
653 # Use fixed size to get total number of possible keys for normalization...
|
|
654 $AtomTripletsSetSizeToUse = ($This->{NormalizationMethodology} =~ /^ByPossibleKeysCount$/i) ? 'FixedSize' : 'ArbitrarySize';
|
|
655
|
|
656 $FingerprintsObject = new Fingerprints::TopologicalPharmacophoreAtomTripletsFingerprints('Molecule' => $This->{Molecule}, 'AtomTripletsSetSizeToUse' => $AtomTripletsSetSizeToUse, 'MinDistance' => $This->{MinDistance}, 'MaxDistance' => $This->{MaxDistance}, 'DistanceBinSize' => $This->{DistanceBinSize}, 'UseTriangleInequality' => $This->{UseTriangleInequality}, 'AtomTypesToUse' => \@{$This->{FunctionalClassesToUse}});
|
|
657
|
|
658 return $FingerprintsObject;
|
|
659 }
|
|
660
|
|
661 # Normalize molecular complexity value...
|
|
662 #
|
|
663 sub _NormalizeMolecularComplexity {
|
|
664 my($This, $FingerprintsObject) = @_;
|
|
665
|
|
666 if ($This->{MolecularComplexityType} =~ /^ExtendedConnectivityFingerprints$/i && $This->{NormalizationMethodology} =~ /^ByHeavyAtomsCount$/i) {
|
|
667 return $This->_NormalizeMolecularComplexityByHeavyAtomsCount($FingerprintsObject);
|
|
668 }
|
|
669 elsif ($This->{MolecularComplexityType} =~ /^(TopologicalPharmacophoreAtomPairsFingerprints|TopologicalPharmacophoreAtomTripletsFingerprints)$/i && $This->{NormalizationMethodology} =~ /^ByPossibleKeysCount$/i) {
|
|
670 return $This->_NormalizeMolecularComplexityByPossibleKeysCount($FingerprintsObject);
|
|
671 }
|
|
672 else {
|
|
673 warn "Warning: ${ClassName}->_NormalizeMolecularComplexity: NormalizationMethodology value, $This->{NormalizationMethodology}, is not valid. Supported values: ByHeavyAtomsCount or ByPossibleKeysCount...";
|
|
674 }
|
|
675 return undef;
|
|
676 }
|
|
677
|
|
678 # Normalize molecular complexity value by heavy atom count...
|
|
679 #
|
|
680 sub _NormalizeMolecularComplexityByHeavyAtomsCount {
|
|
681 my($This, $FingerprintsObject) = @_;
|
|
682 my($NumOfHeavyAtoms, $NormalizedComplexity);
|
|
683
|
|
684 $NumOfHeavyAtoms = $This->{Molecule}->GetNumOfHeavyAtoms();
|
|
685 if (!$NumOfHeavyAtoms) {
|
|
686 return $This;
|
|
687 }
|
|
688
|
|
689 $NormalizedComplexity = $This->{MolecularComplexity} / $NumOfHeavyAtoms;
|
|
690 $This->{MolecularComplexity} = MathUtil::round($NormalizedComplexity, 2) + 0;
|
|
691
|
|
692 return $This;
|
|
693 }
|
|
694
|
|
695 # Normalize molecular complexity value by possible keys count...
|
|
696 #
|
|
697 sub _NormalizeMolecularComplexityByPossibleKeysCount {
|
|
698 my($This, $FingerprintsObject) = @_;
|
|
699 my($NumOfPossibleKeys, $NormalizedComplexity);
|
|
700
|
|
701 $NumOfPossibleKeys = $FingerprintsObject->GetFingerprintsVector()->GetNumOfValues();
|
|
702 if (!$NumOfPossibleKeys) {
|
|
703 return $This;
|
|
704 }
|
|
705
|
|
706 $NormalizedComplexity = $This->{MolecularComplexity} / $NumOfPossibleKeys;
|
|
707 $This->{MolecularComplexity} = MathUtil::round($NormalizedComplexity, 2) + 0;
|
|
708
|
|
709 return $This;
|
|
710 }
|
|
711
|
|
712 # Calculate molecular complexity value using fingerprints objects...
|
|
713 #
|
|
714 sub _CalculateMolecularComplexity {
|
|
715 my($This, $FingerprintsObject) = @_;
|
|
716
|
|
717 if ($FingerprintsObject->GetVectorType() =~ /^FingerprintsBitVector$/i) {
|
|
718 return $This->_CalculateMolecularComplexityUsingFingerprintsBitVector($FingerprintsObject->GetFingerprintsBitVector());
|
|
719 }
|
|
720 elsif ($FingerprintsObject->GetVectorType() =~ /^FingerprintsVector$/i) {
|
|
721 return $This->_CalculateMolecularComplexityUsingFingerprintsVector($FingerprintsObject->GetFingerprintsVector());
|
|
722 }
|
|
723 else {
|
|
724 warn "Warning: ${ClassName}->_CalculateMolecularComplexity: Fingerprints vector type is not valid. Supported values: FingerprintsBitVector or FingerprintsVector...";
|
|
725 }
|
|
726
|
|
727 return undef;
|
|
728 }
|
|
729
|
|
730 # Calculate molecular complexity value using fingerprints vector...
|
|
731 #
|
|
732 sub _CalculateMolecularComplexityUsingFingerprintsVector {
|
|
733 my($This, $FingerprintsVector) = @_;
|
|
734
|
|
735 $This->{MolecularComplexity} = ($FingerprintsVector->GetType() =~ /^(OrderedNumericalValues|NumericalValues)$/i) ? $FingerprintsVector->GetNumOfNonZeroValues() : $FingerprintsVector->GetNumOfValues();
|
|
736
|
|
737 return $This;
|
|
738 }
|
|
739
|
|
740 # Calculate molecular complexity value using fingerprints vector...
|
|
741 #
|
|
742 sub _CalculateMolecularComplexityUsingFingerprintsBitVector {
|
|
743 my($This, $FingerprintsBitVector) = @_;
|
|
744
|
|
745 $This->{MolecularComplexity} = $FingerprintsBitVector->GetNumOfSetBits();
|
|
746
|
|
747 return $This;
|
|
748 }
|
|
749
|
|
750 # Setup final descriptor values...
|
|
751 #
|
|
752 sub _SetFinalDescriptorValues {
|
|
753 my($This) = @_;
|
|
754
|
|
755 $This->{DescriptorsGenerated} = 1;
|
|
756
|
|
757 $This->SetDescriptorValues($This->{MolecularComplexity});
|
|
758
|
|
759 return $This;
|
|
760 }
|
|
761
|
|
762 # Set atom identifier type to use for generating fingerprints...
|
|
763 #
|
|
764 sub _SetAtomIdentifierTypeValuesToUse {
|
|
765 my($This, $FingerprintsObject) = @_;
|
|
766
|
|
767 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
|
|
768 $FingerprintsObject->SetAtomicInvariantsToUse(\@{$This->{AtomicInvariantsToUse}});
|
|
769 }
|
|
770 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
|
|
771 $FingerprintsObject->SetFunctionalClassesToUse(\@{$This->{FunctionalClassesToUse}});
|
|
772 }
|
|
773 elsif ($This->{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
|
|
774 # Nothing to do for now...
|
|
775 }
|
|
776 else {
|
|
777 croak "Error: The value specified, $This->{AtomIdentifierType}, for option \"-a, --AtomIdentifierType\" is not valid. Supported atom identifier types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes\n";
|
|
778 }
|
|
779 }
|
|
780
|
|
781 # Initialize atom indentifier type information...
|
|
782 #
|
|
783 # Current supported values:
|
|
784 #
|
|
785 # AtomicInvariantsAtomTypes, FunctionalClassAtomTypes, DREIDINGAtomTypes, EStateAtomTypes,
|
|
786 # MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes
|
|
787 #
|
|
788 sub _InitializeAtomIdentifierTypeInformation {
|
|
789 my($This) = @_;
|
|
790
|
|
791 IDENTIFIERTYPE: {
|
|
792 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
|
|
793 $This->_InitializeAtomicInvariantsAtomTypesInformation();
|
|
794 last IDENTIFIERTYPE;
|
|
795 }
|
|
796 if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
|
|
797 $This->_InitializeFunctionalClassAtomTypesInformation();
|
|
798 last IDENTIFIERTYPE;
|
|
799 }
|
|
800 if ($This->{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
|
|
801 # Nothing to do for now...
|
|
802 last IDENTIFIERTYPE;
|
|
803 }
|
|
804 carp "Warning: ${ClassName}->_InitializeAtomIdentifierTypeInformation: Unknown atom indentifier type $This->{AtomIdentifierType}...";
|
|
805 }
|
|
806 return $This;
|
|
807 }
|
|
808
|
|
809 # Initialize atomic invariants atom types, generated by AtomTypes::AtomicInvariantsAtomTypes
|
|
810 # class, to use for generating initial atom identifiers...
|
|
811 #
|
|
812 # Let:
|
|
813 # AS = Atom symbol corresponding to element symbol
|
|
814 #
|
|
815 # X<n> = Number of non-hydrogen atom neighbors or heavy atoms attached to atom
|
|
816 # BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms attached to atom
|
|
817 # LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms attached to atom
|
|
818 # SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
|
|
819 # DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
|
|
820 # TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
|
|
821 # H<n> = Number of implicit and explicit hydrogens for atom
|
|
822 # Ar = Aromatic annotation indicating whether atom is aromatic
|
|
823 # RA = Ring atom annotation indicating whether atom is a ring
|
|
824 # FC<+n/-n> = Formal charge assigned to atom
|
|
825 # MN<n> = Mass number indicating isotope other than most abundant isotope
|
|
826 # SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or 3 (triplet)
|
|
827 #
|
|
828 # Then:
|
|
829 #
|
|
830 # Atom type generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to:
|
|
831 #
|
|
832 # AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n>
|
|
833 #
|
|
834 # Except for AS which is a required atomic invariant in atom types, all other atomic invariants are
|
|
835 # optional.
|
|
836 #
|
|
837 # Default atomic invariants used for generating inital atom identifiers are [ Ref 24 ]:
|
|
838 #
|
|
839 # AS, X<n>, BO<n>, H<n>, FC<+n/-n>, MN<n>
|
|
840 #
|
|
841 # In addition to usage of abbreviations for specifying atomic invariants, the following descriptive words
|
|
842 # are also allowed:
|
|
843 #
|
|
844 # X : NumOfNonHydrogenAtomNeighbors or NumOfHeavyAtomNeighbors
|
|
845 # BO : SumOfBondOrdersToNonHydrogenAtoms or SumOfBondOrdersToHeavyAtoms
|
|
846 # LBO : LargestBondOrderToNonHydrogenAtoms or LargestBondOrderToHeavyAtoms
|
|
847 # SB : NumOfSingleBondsToNonHydrogenAtoms or NumOfSingleBondsToHeavyAtoms
|
|
848 # DB : NumOfDoubleBondsToNonHydrogenAtoms or NumOfDoubleBondsToHeavyAtoms
|
|
849 # TB : NumOfTripleBondsToNonHydrogenAtoms or NumOfTripleBondsToHeavyAtoms
|
|
850 # H : NumOfImplicitAndExplicitHydrogens
|
|
851 # Ar : Aromatic
|
|
852 # RA : RingAtom
|
|
853 # FC : FormalCharge
|
|
854 # MN : MassNumber
|
|
855 # SM : SpinMultiplicity
|
|
856 #
|
|
857 sub _InitializeAtomicInvariantsAtomTypesInformation {
|
|
858 my($This) = @_;
|
|
859
|
|
860 @{$This->{AtomicInvariantsToUse}} = ();
|
|
861
|
|
862 if ($This->{MolecularComplexityType} =~ /^(AtomTypesFingerprints|TopologicalAtomPairsFingerprints|TopologicalAtomTripletsFingerprints|TopologicalAtomTorsionsFingerprints)$/i) {
|
|
863 @{$This->{AtomicInvariantsToUse}} = ('AS', 'X', 'BO', 'H', 'FC');
|
|
864 }
|
|
865 elsif ($This->{MolecularComplexityType} =~ /^ExtendedConnectivityFingerprints$/i) {
|
|
866 @{$This->{AtomicInvariantsToUse}} = ('AS', 'X', 'BO', 'H', 'FC', 'MN');
|
|
867 }
|
|
868 elsif ($This->{MolecularComplexityType} =~ /^PathLengthFingerprints$/i) {
|
|
869 @{$This->{AtomicInvariantsToUse}} = ('AS');
|
|
870 }
|
|
871
|
|
872 return $This;
|
|
873 }
|
|
874
|
|
875 # Initialize functional class atom types, generated by AtomTypes::FunctionalClassAtomTypes
|
|
876 # class, to use for generating initial atom identifiers...
|
|
877 #
|
|
878 # Let:
|
|
879 # HBD: HydrogenBondDonor
|
|
880 # HBA: HydrogenBondAcceptor
|
|
881 # PI : PositivelyIonizable
|
|
882 # NI : NegativelyIonizable
|
|
883 # Ar : Aromatic
|
|
884 # Hal : Halogen
|
|
885 # H : Hydrophobic
|
|
886 # RA : RingAtom
|
|
887 # CA : ChainAtom
|
|
888 #
|
|
889 # Then:
|
|
890 #
|
|
891 # Functiononal class atom type specification for an atom corresponds to:
|
|
892 #
|
|
893 # Ar.CA.H.HBA.HBD.Hal.NI.PI.RA
|
|
894 #
|
|
895 # Default functional classes used are: HBD, HBA, PI, NI, Ar, Hal
|
|
896 #
|
|
897 # FunctionalAtomTypes are assigned using the following definitions [ Ref 60-61, Ref 65-66 ]:
|
|
898 #
|
|
899 # HydrogenBondDonor: NH, NH2, OH
|
|
900 # HydrogenBondAcceptor: N[!H], O
|
|
901 # PositivelyIonizable: +, NH2
|
|
902 # NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH
|
|
903 #
|
|
904 sub _InitializeFunctionalClassAtomTypesInformation {
|
|
905 my($This) = @_;
|
|
906
|
|
907 @{$This->{FunctionalClassesToUse}} = ();
|
|
908
|
|
909 if ($This->{MolecularComplexityType} =~ /^(AtomTypesFingerprints|ExtendedConnectivityFingerprints|PathLengthFingerprints|TopologicalAtomPairsFingerprints|TopologicalAtomTripletsFingerprints|TopologicalAtomTorsionsFingerprints)$/i) {
|
|
910 @{$This->{FunctionalClassesToUse}} = ('HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal');
|
|
911 }
|
|
912 elsif ($This->{MolecularComplexityType} =~ /^TopologicalPharmacophoreAtomPairsFingerprints$/i) {
|
|
913 @{$This->{FunctionalClassesToUse}} = ('HBD', 'HBA', 'PI', 'NI', 'H');
|
|
914 }
|
|
915 elsif ($This->{MolecularComplexityType} =~ /^TopologicalPharmacophoreAtomTripletsFingerprints$/i) {
|
|
916 @{$This->{FunctionalClassesToUse}} = ('HBD', 'HBA', 'PI', 'NI', 'H', 'Ar');
|
|
917 }
|
|
918
|
|
919 return $This;
|
|
920 }
|
|
921
|
|
922 # Set atomic invariants to use for generation of intial atom indentifiers...
|
|
923 #
|
|
924 sub SetAtomicInvariantsToUse {
|
|
925 my($This, @Values) = @_;
|
|
926 my($FirstValue, $TypeOfFirstValue, $AtomicInvariant, $SpecifiedAtomicInvariant, @SpecifiedAtomicInvariants, @AtomicInvariantsToUse);
|
|
927
|
|
928 if (!@Values) {
|
|
929 carp "Warning: ${ClassName}->SetAtomicInvariantsToUse: No values specified...";
|
|
930 return;
|
|
931 }
|
|
932
|
|
933 if ($This->{AtomIdentifierType} !~ /^AtomicInvariantsAtomTypes$/i) {
|
|
934 carp "Warning: ${ClassName}->SetAtomicInvariantsToUse: AtomicInvariantsToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}...";
|
|
935 return;
|
|
936 }
|
|
937
|
|
938 $FirstValue = $Values[0];
|
|
939 $TypeOfFirstValue = ref $FirstValue;
|
|
940
|
|
941 @SpecifiedAtomicInvariants = ();
|
|
942 @AtomicInvariantsToUse = ();
|
|
943
|
|
944 if ($TypeOfFirstValue =~ /^ARRAY/) {
|
|
945 push @SpecifiedAtomicInvariants, @{$FirstValue};
|
|
946 }
|
|
947 else {
|
|
948 push @SpecifiedAtomicInvariants, @Values;
|
|
949 }
|
|
950
|
|
951 # Make sure specified AtomicInvariants are valid...
|
|
952 for $SpecifiedAtomicInvariant (@SpecifiedAtomicInvariants) {
|
|
953 if (!AtomTypes::AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($SpecifiedAtomicInvariant)) {
|
|
954 croak "Error: ${ClassName}->SetAtomicInvariantsToUse: Specified atomic invariant, $SpecifiedAtomicInvariant, is not supported...\n ";
|
|
955 }
|
|
956 $AtomicInvariant = $SpecifiedAtomicInvariant;
|
|
957 push @AtomicInvariantsToUse, $AtomicInvariant;
|
|
958 }
|
|
959
|
|
960 # Set atomic invariants to use...
|
|
961 @{$This->{AtomicInvariantsToUse}} = ();
|
|
962 push @{$This->{AtomicInvariantsToUse}}, @AtomicInvariantsToUse;
|
|
963
|
|
964 return $This;
|
|
965 }
|
|
966
|
|
967 # Set functional classes to use for generation of intial atom indentifiers...
|
|
968 #
|
|
969 sub SetFunctionalClassesToUse {
|
|
970 my($This, @Values) = @_;
|
|
971 my($FirstValue, $TypeOfFirstValue, $FunctionalClass, $SpecifiedFunctionalClass, @SpecifiedFunctionalClasses, @FunctionalClassesToUse);
|
|
972
|
|
973 if (!@Values) {
|
|
974 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: No values specified...";
|
|
975 return;
|
|
976 }
|
|
977
|
|
978 if ($This->{AtomIdentifierType} !~ /^FunctionalClassAtomTypes$/i) {
|
|
979 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: FunctionalClassesToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}...";
|
|
980 return;
|
|
981 }
|
|
982
|
|
983 $FirstValue = $Values[0];
|
|
984 $TypeOfFirstValue = ref $FirstValue;
|
|
985
|
|
986 @SpecifiedFunctionalClasses = ();
|
|
987 @FunctionalClassesToUse = ();
|
|
988
|
|
989 if ($TypeOfFirstValue =~ /^ARRAY/) {
|
|
990 push @SpecifiedFunctionalClasses, @{$FirstValue};
|
|
991 }
|
|
992 else {
|
|
993 push @SpecifiedFunctionalClasses, @Values;
|
|
994 }
|
|
995
|
|
996 # Make sure specified FunctionalClasses are valid...
|
|
997 for $SpecifiedFunctionalClass (@SpecifiedFunctionalClasses) {
|
|
998 if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedFunctionalClass)) {
|
|
999 croak "Error: ${ClassName}->SetFunctionalClassesToUse: Specified functional class, $SpecifiedFunctionalClass, is not supported...\n ";
|
|
1000 }
|
|
1001 push @FunctionalClassesToUse, $SpecifiedFunctionalClass;
|
|
1002 }
|
|
1003
|
|
1004 # Set functional classes to use...
|
|
1005 @{$This->{FunctionalClassesToUse}} = ();
|
|
1006 push @{$This->{FunctionalClassesToUse}}, @FunctionalClassesToUse;
|
|
1007
|
|
1008 return $This;
|
|
1009 }
|
|
1010
|
|
1011 # Return a string containg data for MolecularComplexityDescriptors object...
|
|
1012 #
|
|
1013 sub StringifyMolecularComplexityDescriptors {
|
|
1014 my($This) = @_;
|
|
1015 my($ComplexityDescriptorsString, $Nothing);
|
|
1016
|
|
1017 $ComplexityDescriptorsString = "MolecularDescriptorType: $This->{Type}; MolecularComplexityType: $This->{MolecularComplexityType}; " . $This->_StringifyDescriptorNamesAndValues();
|
|
1018
|
|
1019 # Setup fingerprints specific information...
|
|
1020 if ($This->{MolecularComplexityType} =~ /^MACCSKeys$/i) {
|
|
1021 $ComplexityDescriptorsString .= "; MACCSKeysSize = $This->{MACCSKeysSize}";
|
|
1022 }
|
|
1023 elsif ($This->{MolecularComplexityType} =~ /^ExtendedConnectivityFingerprints$/i) {
|
|
1024 $ComplexityDescriptorsString .= "; NeighborhoodRadius = $This->{NeighborhoodRadius}; NormalizationMethodology = $This->{NormalizationMethodology}";
|
|
1025 }
|
|
1026 elsif ($This->{MolecularComplexityType} =~ /^PathLengthFingerprints$/i) {
|
|
1027 $ComplexityDescriptorsString .= "; MinPathLength = $This->{MinPathLength}; MaxPathLength = $This->{MaxPathLength}; UseBondSymbols: " . ($This->{UseBondSymbols} ? "Yes" : "No");
|
|
1028 }
|
|
1029 elsif ($This->{MolecularComplexityType} =~ /^TopologicalAtomPairsFingerprints$/i) {
|
|
1030 $ComplexityDescriptorsString .= "; MinDistance = $This->{MinDistance}; MaxDistance = $This->{MaxDistance}";
|
|
1031 }
|
|
1032 elsif ($This->{MolecularComplexityType} =~ /^TopologicalAtomTripletsFingerprints$/i) {
|
|
1033 $ComplexityDescriptorsString .= "; MinDistance = $This->{MinDistance}; MaxDistance = $This->{MaxDistance}; UseTriangleInequality: " . ($This->{UseTriangleInequality} ? "Yes" : "No");
|
|
1034 }
|
|
1035 elsif ($This->{MolecularComplexityType} =~ /^TopologicalAtomTorsionsFingerprints$/i) {
|
|
1036 $ComplexityDescriptorsString .= "; MinDistance = $This->{MinDistance}; MaxDistance = $This->{MaxDistance}";
|
|
1037 }
|
|
1038 elsif ($This->{MolecularComplexityType} =~ /^TopologicalPharmacophoreAtomPairsFingerprints$/i) {
|
|
1039 $ComplexityDescriptorsString .= "; MinDistance = $This->{MinDistance}; MaxDistance = $This->{MaxDistance}; NormalizationMethodology = $This->{NormalizationMethodology}";
|
|
1040 }
|
|
1041 elsif ($This->{MolecularComplexityType} =~ /^TopologicalPharmacophoreAtomTripletsFingerprints$/i) {
|
|
1042 $ComplexityDescriptorsString .= "; MinDistance = $This->{MinDistance}; MaxDistance = $This->{MaxDistance}; NormalizationMethodology = $This->{NormalizationMethodology}; DistanceBinSize: $This->{DistanceBinSize}; UseTriangleInequality: " . ($This->{UseTriangleInequality} ? "Yes" : "No");
|
|
1043 }
|
|
1044
|
|
1045 # Setup atom identifier information...
|
|
1046 if ($This->{MolecularComplexityType} =~ /^(AtomTypesFingerprints|ExtendedConnectivityFingerprints|PathLengthFingerprints|TopologicalAtomPairsFingerprints|TopologicalAtomTripletsFingerprints|TopologicalAtomTorsionsFingerprints|TopologicalPharmacophoreAtomPairsFingerprints|TopologicalPharmacophoreAtomTripletsFingerprints)$/i) {
|
|
1047 $ComplexityDescriptorsString .= "; AtomIdentifierType = $This->{AtomIdentifierType}";
|
|
1048
|
|
1049 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
|
|
1050 my($AtomicInvariant, @AtomicInvariants, @AtomicInvariantsOrder, %AvailableAtomicInvariants);
|
|
1051
|
|
1052 @AtomicInvariantsOrder = AtomTypes::AtomicInvariantsAtomTypes::GetAtomicInvariantsOrder();
|
|
1053 %AvailableAtomicInvariants = AtomTypes::AtomicInvariantsAtomTypes::GetAvailableAtomicInvariants();
|
|
1054
|
|
1055 for $AtomicInvariant (@AtomicInvariantsOrder) {
|
|
1056 push @AtomicInvariants, "$AtomicInvariant: $AvailableAtomicInvariants{$AtomicInvariant}";
|
|
1057 }
|
|
1058
|
|
1059 $ComplexityDescriptorsString .= "; AtomicInvariantsToUse: <" . TextUtil::JoinWords(\@{$This->{AtomicInvariantsToUse}}, ", ", 0) . ">";
|
|
1060 $ComplexityDescriptorsString .= "; AtomicInvariantsOrder: <" . TextUtil::JoinWords(\@AtomicInvariantsOrder, ", ", 0) . ">";
|
|
1061 $ComplexityDescriptorsString .= "; AvailableAtomicInvariants: <" . TextUtil::JoinWords(\@AtomicInvariants, ", ", 0) . ">";
|
|
1062 }
|
|
1063 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
|
|
1064 my($FunctionalClass, @FunctionalClasses, @FunctionalClassesOrder, %AvailableFunctionalClasses);
|
|
1065
|
|
1066 @FunctionalClassesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder();
|
|
1067 %AvailableFunctionalClasses = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses();
|
|
1068
|
|
1069 for $FunctionalClass (@FunctionalClassesOrder) {
|
|
1070 push @FunctionalClasses, "$FunctionalClass: $AvailableFunctionalClasses{$FunctionalClass}";
|
|
1071 }
|
|
1072
|
|
1073 $ComplexityDescriptorsString .= "; FunctionalClassesToUse: <" . TextUtil::JoinWords(\@{$This->{FunctionalClassesToUse}}, ", ", 0) . ">";
|
|
1074 $ComplexityDescriptorsString .= "; FunctionalClassesOrder: <" . TextUtil::JoinWords(\@FunctionalClassesOrder, ", ", 0) . ">";
|
|
1075 $ComplexityDescriptorsString .= "; AvailableFunctionalClasses: <" . TextUtil::JoinWords(\@FunctionalClasses, ", ", 0) . ">";
|
|
1076 }
|
|
1077 }
|
|
1078 return $ComplexityDescriptorsString;
|
|
1079 }
|
|
1080
|
|
1081 # Is it a MolecularComplexityDescriptors object?
|
|
1082 sub _IsMolecularComplexityDescriptors {
|
|
1083 my($Object) = @_;
|
|
1084
|
|
1085 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0;
|
|
1086 }
|
|
1087
|
|
1088 1;
|
|
1089
|
|
1090 __END__
|
|
1091
|
|
1092 =head1 NAME
|
|
1093
|
|
1094 MolecularComplexityDescriptors
|
|
1095
|
|
1096 =head1 SYNOPSIS
|
|
1097
|
|
1098 use MolecularDescriptors::MolecularComplexityDescriptors;
|
|
1099
|
|
1100 use MolecularDescriptors::MolecularComplexityDescriptors qw(:all);
|
|
1101
|
|
1102 =head1 DESCRIPTION
|
|
1103
|
|
1104 B<MolecularComplexityDescriptors> class provides the following methods:
|
|
1105
|
|
1106 new, GenerateDescriptors, GetDescriptorNames,
|
|
1107 GetMolecularComplexityTypeAbbreviation, MACCSKeysSize, SetAtomIdentifierType,
|
|
1108 SetAtomicInvariantsToUse, SetDistanceBinSize, SetFunctionalClassesToUse,
|
|
1109 SetMaxDistance, SetMaxPathLength, SetMinDistance, SetMinPathLength,
|
|
1110 SetMolecularComplexityType, SetNeighborhoodRadius, SetNormalizationMethodology,
|
|
1111 StringifyMolecularComplexityDescriptors
|
|
1112
|
|
1113 B<MolecularComplexityDescriptors> is derived from B<MolecularDescriptors> class which in turn
|
|
1114 is derived from B<ObjectProperty> base class that provides methods not explicitly defined
|
|
1115 in B<MolecularComplexityDescriptors>, B<MolecularDescriptors> or B<ObjectProperty> classes using Perl's
|
|
1116 AUTOLOAD functionality. These methods are generated on-the-fly for a specified object property:
|
|
1117
|
|
1118 Set<PropertyName>(<PropertyValue>);
|
|
1119 $PropertyValue = Get<PropertyName>();
|
|
1120 Delete<PropertyName>();
|
|
1121
|
|
1122 The current release of MayaChemTools supports calculation of molecular complexity using
|
|
1123 I<MolecularComplexityType> parameter corresponding to number of bits-set or unique
|
|
1124 keys [ Ref 117-119 ] in molecular fingerprints. The valid values for I<MolecularComplexityType>
|
|
1125 are:
|
|
1126
|
|
1127 AtomTypesFingerprints
|
|
1128 ExtendedConnectivityFingerprints
|
|
1129 MACCSKeys
|
|
1130 PathLengthFingerprints
|
|
1131 TopologicalAtomPairsFingerprints
|
|
1132 TopologicalAtomTripletsFingerprints
|
|
1133 TopologicalAtomTorsionsFingerprints
|
|
1134 TopologicalPharmacophoreAtomPairsFingerprints
|
|
1135 TopologicalPharmacophoreAtomTripletsFingerprints
|
|
1136
|
|
1137 Default value for I<MolecularComplexityType>: I<MACCSKeys>.
|
|
1138
|
|
1139 I<AtomIdentifierType> parameter name corresponds to atom types used during generation of
|
|
1140 fingerprints. The valid values for I<AtomIdentifierType> are: I<AtomicInvariantsAtomTypes,
|
|
1141 DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes,
|
|
1142 SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes>. I<AtomicInvariantsAtomTypes>
|
|
1143 is not supported for following values of I<MolecularComplexityType>: I<MACCSKeys,
|
|
1144 TopologicalPharmacophoreAtomPairsFingerprints, TopologicalPharmacophoreAtomTripletsFingerprints>.
|
|
1145 I<FunctionalClassAtomTypes> is the only valid value of I<AtomIdentifierType> for topological
|
|
1146 pharmacophore fingerprints.
|
|
1147
|
|
1148 Default value for I<AtomIdentifierType>: I<AtomicInvariantsAtomTypes> for all fingerprints;
|
|
1149 I<FunctionalClassAtomTypes> for topological pharmacophore fingerprints.
|
|
1150
|
|
1151 I<AtomicInvariantsToUse> parameter name and values are used during I<AtomicInvariantsAtomTypes>
|
|
1152 value of parameter I<AtomIdentifierType>. It's a list of space separated valid atomic invariant atom types.
|
|
1153
|
|
1154 Possible values for atomic invariants are: I<AS, X, BO, LBO, SB, DB, TB, H, Ar, RA, FC, MN, SM>.
|
|
1155 Default value for I<AtomicInvariantsToUse> parameter are set differently for different fingerprints
|
|
1156 using I<MolecularComplexityType> parameter as shown below:
|
|
1157
|
|
1158 MolecularComplexityType AtomicInvariantsToUse
|
|
1159
|
|
1160 AtomTypesFingerprints AS X BO H FC
|
|
1161 TopologicalAtomPairsFingerprints AS X BO H FC
|
|
1162 TopologicalAtomTripletsFingerprints AS X BO H FC
|
|
1163 TopologicalAtomTorsionsFingerprints AS X BO H FC
|
|
1164
|
|
1165 ExtendedConnectivityFingerprints AS X BO H FC MN
|
|
1166 PathLengthFingerprints AS
|
|
1167
|
|
1168 I<FunctionalClassesToUse> parameter name and values are used during I<FunctionalClassAtomTypes>
|
|
1169 value of parameter I<AtomIdentifierType>. It's a list of space separated valid atomic invariant atom types.
|
|
1170
|
|
1171 Possible values for atom functional classes are: I<Ar, CA, H, HBA, HBD, Hal, NI, PI, RA>.
|
|
1172
|
|
1173 Default value for I<FunctionalClassesToUse> parameter is set to:
|
|
1174
|
|
1175 HBD HBA PI NI Ar Hal
|
|
1176
|
|
1177 for all fingerprints except for the following two I<MolecularComplexityType> fingerints:
|
|
1178
|
|
1179 MolecularComplexityType FunctionalClassesToUse
|
|
1180
|
|
1181 TopologicalPharmacophoreAtomPairsFingerprints HBD HBA P, NI H
|
|
1182 TopologicalPharmacophoreAtomTripletsFingerprints HBD HBA PI NI H Ar
|
|
1183
|
|
1184 I<MACCSKeysSize> parameter name is only used during I<MACCSKeys> value of
|
|
1185 I<MolecularComplexityType> and corresponds to size of MACCS key set. Possible
|
|
1186 values: I<166 or 322>. Default value: I<166>.
|
|
1187
|
|
1188 I<NeighborhoodRadius> parameter name is only used during I<ExtendedConnectivityFingerprints>
|
|
1189 value of I<MolecularComplexityType> and corresponds to atomic neighborhoods radius for
|
|
1190 generating extended connectivity fingerprints. Possible values: positive integer. Default value:
|
|
1191 I<2>.
|
|
1192
|
|
1193 I<MinPathLength> and I<MaxPathLength> parameters are only used during I<PathLengthFingerprints>
|
|
1194 value of I<MolecularComplexityType> and correspond to minimum and maximum path lengths to use
|
|
1195 for generating path length fingerprints. Possible values: positive integers. Default value: I<MinPathLength - 1>;
|
|
1196 I<MaxPathLength - 8>.
|
|
1197
|
|
1198 I<UseBondSymbols> parameter is only used during I<PathLengthFingerprints> value of
|
|
1199 I<MolecularComplexityType> and indicates whether bond symbols are included in atom path
|
|
1200 strings used to generate path length fingerprints. Possible value: I<Yes or No>. Default value:
|
|
1201 I<Yes>.
|
|
1202
|
|
1203 I<MinDistance> and I<MaxDistance> parameters are only used during I<TopologicalAtomPairsFingerprints>
|
|
1204 and I<TopologicalAtomTripletsFingerprints> values of I<MolecularComplexityType> and correspond to
|
|
1205 minimum and maximum bond distance between atom pairs during topological pharmacophore fingerprints.
|
|
1206 Possible values: positive integers. Default value: I<MinDistance - 1>; I<MaxDistance - 10>.
|
|
1207
|
|
1208 I<UseTriangleInequality> parameter is used during these values for I<MolecularComplexityType>:
|
|
1209 I<TopologicalAtomTripletsFingerprints> and I<TopologicalPharmacophoreAtomTripletsFingerprints>.
|
|
1210 Possible values: I<Yes or No>. It determines wheter to apply triangle inequality to distance triplets.
|
|
1211 Default value: I<TopologicalAtomTripletsFingerprints - No>;
|
|
1212 I<TopologicalPharmacophoreAtomTripletsFingerprints - Yes>.
|
|
1213
|
|
1214 I<DistanceBinSize> parameter is used during I<TopologicalPharmacophoreAtomTripletsFingerprints>
|
|
1215 value of I<MolecularComplexityType> and corresponds to distance bin size used for binning
|
|
1216 distances during generation of topological pharmacophore atom triplets fingerprints. Possible
|
|
1217 value: positive integer. Default value: I<2>.
|
|
1218
|
|
1219 I<NormalizationMethodology> is only used for these values for I<MolecularComplexityType>:
|
|
1220 I<ExtendedConnectivityFingerprints>, I<TopologicalPharmacophoreAtomPairsFingerprints>
|
|
1221 and I<TopologicalPharmacophoreAtomTripletsFingerprints>. It corresponds to normalization
|
|
1222 methodology to use for scaling the number of bits-set or unique keys during generation of
|
|
1223 fingerprints. Possible values during I<ExtendedConnectivityFingerprints>: I<None or
|
|
1224 ByHeavyAtomsCount>; Default value: I<None>. Possible values during topological
|
|
1225 pharmacophore atom pairs and triplets fingerprints: I<None or ByPossibleKeysCount>;
|
|
1226 Default value: I<None>. I<ByPossibleKeysCount> corresponds to total number of
|
|
1227 possible topological pharmacophore atom pairs or triplets in a molecule.
|
|
1228
|
|
1229 =head2 METHODS
|
|
1230
|
|
1231 =over 4
|
|
1232
|
|
1233 =item B<new>
|
|
1234
|
|
1235 $NewMolecularComplexityDescriptors = new MolecularDescriptors::
|
|
1236 MolecularComplexityDescriptors(
|
|
1237 %NamesAndValues);
|
|
1238
|
|
1239 Using specified I<MolecularComplexityDescriptors> property names and values hash, B<new>
|
|
1240 method creates a new object and returns a reference to newly created B<MolecularComplexityDescriptors>
|
|
1241 object. By default, the following properties are initialized:
|
|
1242
|
|
1243 Molecule = ''
|
|
1244 Type = 'MolecularComplexity'
|
|
1245 MolecularComplexityType = 'MACCSKeys'
|
|
1246 AtomIdentifierType = ''
|
|
1247 MACCSKeysSize = 166
|
|
1248 NeighborhoodRadius = 2
|
|
1249 MinPathLength = 1
|
|
1250 MaxPathLength = 8
|
|
1251 UseBondSymbols = 1
|
|
1252 MinDistance = 1
|
|
1253 MaxDistance = 10
|
|
1254 UseTriangleInequality = ''
|
|
1255 DistanceBinSize = 2
|
|
1256 NormalizationMethodology = 'None'
|
|
1257 @DescriptorNames = ('MolecularComplexity')
|
|
1258 @DescriptorValues = ('None')
|
|
1259
|
|
1260 Examples:
|
|
1261
|
|
1262 $MolecularComplexityDescriptors = new MolecularDescriptors::
|
|
1263 MolecularComplexityDescriptors(
|
|
1264 'Molecule' => $Molecule);
|
|
1265
|
|
1266 $MolecularComplexityDescriptors = new MolecularDescriptors::
|
|
1267 MolecularComplexityDescriptors();
|
|
1268
|
|
1269 $MolecularComplexityDescriptors->SetMolecule($Molecule);
|
|
1270 $MolecularComplexityDescriptors->GenerateDescriptors();
|
|
1271 print "MolecularComplexityDescriptors: $MolecularComplexityDescriptors\n";
|
|
1272
|
|
1273
|
|
1274 =item B<GenerateDescriptors>
|
|
1275
|
|
1276 $MolecularComplexityDescriptors->GenerateDescriptors();
|
|
1277
|
|
1278 Calculates MolecularComplexity value for a molecule and returns I<MolecularComplexityDescriptors>.
|
|
1279
|
|
1280 =item B<GetDescriptorNames>
|
|
1281
|
|
1282 @DescriptorNames = $MolecularComplexityDescriptors->GetDescriptorNames();
|
|
1283 @DescriptorNames = MolecularDescriptors::MolecularComplexityDescriptors::
|
|
1284 GetDescriptorNames();
|
|
1285
|
|
1286 Returns all available descriptor names as an array.
|
|
1287
|
|
1288 =item B<GetMolecularComplexityTypeAbbreviation>
|
|
1289
|
|
1290 $Abbrev = $MolecularComplexityDescriptors->
|
|
1291 GetMolecularComplexityTypeAbbreviation();
|
|
1292 $Abbrev = MolecularDescriptors::MolecularComplexityDescriptors::
|
|
1293 GetMolecularComplexityTypeAbbreviation($ComplexityType);
|
|
1294
|
|
1295 Returns abbreviation for a specified molecular complexity type or corresponding to
|
|
1296 I<MolecularComplexityDescriptors> object.
|
|
1297
|
|
1298 =item B<SetMACCSKeysSize>
|
|
1299
|
|
1300 $MolecularComplexityDescriptors->MACCSKeysSize($Size);
|
|
1301
|
|
1302 Sets MACCS keys size and returns I<MolecularComplexityDescriptors>.
|
|
1303
|
|
1304 =item B<SetAtomIdentifierType>
|
|
1305
|
|
1306 $MolecularComplexityDescriptors->SetAtomIdentifierType($IdentifierType);
|
|
1307
|
|
1308 Sets atom I<IdentifierType> to use during fingerprints generation corresponding to
|
|
1309 I<MolecularComplexityType> and returns I<MolecularComplexityDescriptors>.
|
|
1310
|
|
1311 Possible values: I<AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes,
|
|
1312 FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes,
|
|
1313 TPSAAtomTypes, UFFAtomTypes>.
|
|
1314
|
|
1315 =item B<SetAtomicInvariantsToUse>
|
|
1316
|
|
1317 $MolecularComplexityDescriptors->SetAtomicInvariantsToUse($ValuesRef);
|
|
1318 $MolecularComplexityDescriptors->SetAtomicInvariantsToUse(@Values);
|
|
1319
|
|
1320 Sets atomic invariants to use during I<AtomicInvariantsAtomTypes> value of I<AtomIdentifierType>
|
|
1321 for fingerprints generation and returns I<MolecularComplexityDescriptors>.
|
|
1322
|
|
1323 Possible values for atomic invariants are: I<AS, X, BO, LBO, SB, DB, TB,
|
|
1324 H, Ar, RA, FC, MN, SM>. Default value [ Ref 24 ]: I<AS,X,BO,H,FC,MN>.
|
|
1325
|
|
1326 The atomic invariants abbreviations correspond to:
|
|
1327
|
|
1328 AS = Atom symbol corresponding to element symbol
|
|
1329
|
|
1330 X<n> = Number of non-hydrogen atom neighbors or heavy atoms
|
|
1331 BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms
|
|
1332 LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms
|
|
1333 SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms
|
|
1334 DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms
|
|
1335 TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms
|
|
1336 H<n> = Number of implicit and explicit hydrogens for atom
|
|
1337 Ar = Aromatic annotation indicating whether atom is aromatic
|
|
1338 RA = Ring atom annotation indicating whether atom is a ring
|
|
1339 FC<+n/-n> = Formal charge assigned to atom
|
|
1340 MN<n> = Mass number indicating isotope other than most abundant isotope
|
|
1341 SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or
|
|
1342 3 (triplet)
|
|
1343
|
|
1344 Atom type generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to:
|
|
1345
|
|
1346 AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n>
|
|
1347
|
|
1348 Except for AS which is a required atomic invariant in atom types, all other atomic invariants are
|
|
1349 optional. Atom type specification doesn't include atomic invariants with zero or undefined values.
|
|
1350
|
|
1351 In addition to usage of abbreviations for specifying atomic invariants, the following descriptive words
|
|
1352 are also allowed:
|
|
1353
|
|
1354 X : NumOfNonHydrogenAtomNeighbors or NumOfHeavyAtomNeighbors
|
|
1355 BO : SumOfBondOrdersToNonHydrogenAtoms or SumOfBondOrdersToHeavyAtoms
|
|
1356 LBO : LargestBondOrderToNonHydrogenAtoms or LargestBondOrderToHeavyAtoms
|
|
1357 SB : NumOfSingleBondsToNonHydrogenAtoms or NumOfSingleBondsToHeavyAtoms
|
|
1358 DB : NumOfDoubleBondsToNonHydrogenAtoms or NumOfDoubleBondsToHeavyAtoms
|
|
1359 TB : NumOfTripleBondsToNonHydrogenAtoms or NumOfTripleBondsToHeavyAtoms
|
|
1360 H : NumOfImplicitAndExplicitHydrogens
|
|
1361 Ar : Aromatic
|
|
1362 RA : RingAtom
|
|
1363 FC : FormalCharge
|
|
1364 MN : MassNumber
|
|
1365 SM : SpinMultiplicity
|
|
1366
|
|
1367 I<AtomTypes::AtomicInvariantsAtomTypes> module is used to assign atomic invariant
|
|
1368 atom types.
|
|
1369
|
|
1370 =item B<SetDistanceBinSize>
|
|
1371
|
|
1372 $MolecularComplexityDescriptors->SetDistanceBinSize($BinSize);
|
|
1373
|
|
1374 Sets distance bin size used to bin distances between atom pairs in atom triplets for
|
|
1375 topological pharmacophore atom triplets fingerprints generation and returns
|
|
1376 I<MolecularComplexityDescriptors>.
|
|
1377
|
|
1378 =item B<SetFunctionalClassesToUse>
|
|
1379
|
|
1380 $MolecularComplexityDescriptors->SetFunctionalClassesToUse($ValuesRef);
|
|
1381 $MolecularComplexityDescriptors->SetFunctionalClassesToUse(@Values);
|
|
1382
|
|
1383 Sets functional classes invariants to use during I<FunctionalClassAtomTypes> value of I<AtomIdentifierType>
|
|
1384 for fingerprints generation and returns I<MolecularComplexityDescriptors>.
|
|
1385
|
|
1386 Possible values for atom functional classes are: I<Ar, CA, H, HBA, HBD, Hal, NI, PI, RA>.
|
|
1387 Default value [ Ref 24 ]: I<HBD,HBA,PI,NI,Ar,Hal>.
|
|
1388
|
|
1389 The functional class abbreviations correspond to:
|
|
1390
|
|
1391 HBD: HydrogenBondDonor
|
|
1392 HBA: HydrogenBondAcceptor
|
|
1393 PI : PositivelyIonizable
|
|
1394 NI : NegativelyIonizable
|
|
1395 Ar : Aromatic
|
|
1396 Hal : Halogen
|
|
1397 H : Hydrophobic
|
|
1398 RA : RingAtom
|
|
1399 CA : ChainAtom
|
|
1400
|
|
1401 Functional class atom type specification for an atom corresponds to:
|
|
1402
|
|
1403 Ar.CA.H.HBA.HBD.Hal.NI.PI.RA or None
|
|
1404
|
|
1405 I<AtomTypes::FunctionalClassAtomTypes> module is used to assign functional class atom
|
|
1406 types. It uses following definitions [ Ref 60-61, Ref 65-66 ]:
|
|
1407
|
|
1408 HydrogenBondDonor: NH, NH2, OH
|
|
1409 HydrogenBondAcceptor: N[!H], O
|
|
1410 PositivelyIonizable: +, NH2
|
|
1411 NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH
|
|
1412
|
|
1413 =item B<SetMaxDistance>
|
|
1414
|
|
1415 $MolecularComplexityDescriptors->SetMaxDistance($MaxDistance);
|
|
1416
|
|
1417 Sets maximum distance to use during topological atom pairs and triplets fingerprints
|
|
1418 generation and returns I<MolecularComplexityDescriptors>.
|
|
1419
|
|
1420 =item B<SetMaxPathLength>
|
|
1421
|
|
1422 $MolecularComplexityDescriptors->SetMaxPathLength($Length);
|
|
1423
|
|
1424 Sets maximum path length to use during path length fingerprints generation and returns
|
|
1425 I<MolecularComplexityDescriptors>.
|
|
1426
|
|
1427 =item B<SetMinDistance>
|
|
1428
|
|
1429 $MolecularComplexityDescriptors->SetMinDistance($MinDistance);
|
|
1430
|
|
1431 Sets minimum distance to use during topological atom pairs and triplets fingerprints
|
|
1432 generation and returns I<MolecularComplexityDescriptors>.
|
|
1433
|
|
1434 =item B<SetMinPathLength>
|
|
1435
|
|
1436 $MolecularComplexityDescriptors->SetMinPathLength($MinPathLength);
|
|
1437
|
|
1438 Sets minimum path length to use during path length fingerprints generation and returns
|
|
1439 I<MolecularComplexityDescriptors>.
|
|
1440
|
|
1441 =item B<SetMolecularComplexityType>
|
|
1442
|
|
1443 $MolecularComplexityDescriptors->SetMolecularComplexityType($ComplexityType);
|
|
1444
|
|
1445 Sets molecular complexity type to use for calculating its value and returns
|
|
1446 I<MolecularComplexityDescriptors>.
|
|
1447
|
|
1448 =item B<SetNeighborhoodRadius>
|
|
1449
|
|
1450 $MolecularComplexityDescriptors->SetNeighborhoodRadius($Radius);
|
|
1451
|
|
1452 Sets neighborhood radius to use during extended connectivity fingerprints generation and
|
|
1453 returns I<MolecularComplexityDescriptors>.
|
|
1454
|
|
1455 =item B<SetNormalizationMethodology>
|
|
1456
|
|
1457 $MolecularComplexityDescriptors->SetNormalizationMethodology($Methodology);
|
|
1458
|
|
1459 Sets normalization methodology to use during calculation of molecular complexity
|
|
1460 corresponding to extended connectivity, topological pharmacophore atom pairs and
|
|
1461 tripletes fingerprints returns I<MolecularComplexityDescriptors>.
|
|
1462
|
|
1463 =item B<StringifyMolecularComplexityDescriptors>
|
|
1464
|
|
1465 $String = $MolecularComplexityDescriptors->
|
|
1466 StringifyMolecularComplexityDescriptors();
|
|
1467
|
|
1468 Returns a string containing information about I<MolecularComplexityDescriptors> object.
|
|
1469
|
|
1470 =back
|
|
1471
|
|
1472 =head1 AUTHOR
|
|
1473
|
|
1474 Manish Sud <msud@san.rr.com>
|
|
1475
|
|
1476 =head1 SEE ALSO
|
|
1477
|
|
1478 MolecularDescriptors.pm, MolecularDescriptorsGenerator.pm
|
|
1479
|
|
1480 =head1 COPYRIGHT
|
|
1481
|
|
1482 Copyright (C) 2015 Manish Sud. All rights reserved.
|
|
1483
|
|
1484 This file is part of MayaChemTools.
|
|
1485
|
|
1486 MayaChemTools is free software; you can redistribute it and/or modify it under
|
|
1487 the terms of the GNU Lesser General Public License as published by the Free
|
|
1488 Software Foundation; either version 3 of the License, or (at your option)
|
|
1489 any later version.
|
|
1490
|
|
1491 =cut
|