Mercurial > repos > deepakjadmin > mayatool3_test2
comparison lib/MolecularDescriptors/MolecularComplexityDescriptors.pm @ 0:4816e4a8ae95 draft default tip
Uploaded
author | deepakjadmin |
---|---|
date | Wed, 20 Jan 2016 09:23:18 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4816e4a8ae95 |
---|---|
1 package MolecularDescriptors::MolecularComplexityDescriptors; | |
2 # | |
3 # $RCSfile: MolecularComplexityDescriptors.pm,v $ | |
4 # $Date: 2015/02/28 20:49:20 $ | |
5 # $Revision: 1.15 $ | |
6 # | |
7 # Author: Manish Sud <msud@san.rr.com> | |
8 # | |
9 # Copyright (C) 2015 Manish Sud. All rights reserved. | |
10 # | |
11 # This file is part of MayaChemTools. | |
12 # | |
13 # MayaChemTools is free software; you can redistribute it and/or modify it under | |
14 # the terms of the GNU Lesser General Public License as published by the Free | |
15 # Software Foundation; either version 3 of the License, or (at your option) any | |
16 # later version. | |
17 # | |
18 # MayaChemTools is distributed in the hope that it will be useful, but without | |
19 # any warranty; without even the implied warranty of merchantability of fitness | |
20 # for a particular purpose. See the GNU Lesser General Public License for more | |
21 # details. | |
22 # | |
23 # You should have received a copy of the GNU Lesser General Public License | |
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or | |
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, | |
26 # Boston, MA, 02111-1307, USA. | |
27 # | |
28 | |
29 use strict; | |
30 use Carp; | |
31 use Exporter; | |
32 use Scalar::Util (); | |
33 use TextUtil (); | |
34 use MathUtil (); | |
35 use Atom; | |
36 use Molecule; | |
37 use MolecularDescriptors::MolecularDescriptors; | |
38 use AtomTypes::AtomicInvariantsAtomTypes; | |
39 use AtomTypes::FunctionalClassAtomTypes; | |
40 use Fingerprints::AtomTypesFingerprints; | |
41 use Fingerprints::ExtendedConnectivityFingerprints; | |
42 use Fingerprints::MACCSKeys; | |
43 use Fingerprints::PathLengthFingerprints; | |
44 use Fingerprints::TopologicalAtomPairsFingerprints; | |
45 use Fingerprints::TopologicalAtomTripletsFingerprints; | |
46 use Fingerprints::TopologicalAtomTorsionsFingerprints; | |
47 use Fingerprints::TopologicalPharmacophoreAtomPairsFingerprints; | |
48 use Fingerprints::TopologicalPharmacophoreAtomTripletsFingerprints; | |
49 | |
50 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); | |
51 | |
52 @ISA = qw(MolecularDescriptors::MolecularDescriptors Exporter); | |
53 @EXPORT = qw(); | |
54 @EXPORT_OK = qw(GetDescriptorNames GetMolecularComplexityTypeAbbreviation); | |
55 | |
56 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); | |
57 | |
58 # Setup class variables... | |
59 my($ClassName, @DescriptorNames); | |
60 _InitializeClass(); | |
61 | |
62 # Overload Perl functions... | |
63 use overload '""' => 'StringifyMolecularComplexityDescriptors'; | |
64 | |
65 # Class constructor... | |
66 sub new { | |
67 my($Class, %NamesAndValues) = @_; | |
68 | |
69 # Initialize object... | |
70 my $This = $Class->SUPER::new(); | |
71 bless $This, ref($Class) || $Class; | |
72 $This->_InitializeMolecularComplexityDescriptors(); | |
73 | |
74 $This->_InitializeMolecularComplexityDescriptorsProperties(%NamesAndValues); | |
75 | |
76 return $This; | |
77 } | |
78 | |
79 # Initialize class ... | |
80 sub _InitializeClass { | |
81 #Class name... | |
82 $ClassName = __PACKAGE__; | |
83 | |
84 # Descriptor names... | |
85 @DescriptorNames = ('MolecularComplexity'); | |
86 | |
87 } | |
88 | |
89 # Get descriptor names as an array. | |
90 # | |
91 # This functionality can be either invoked as a class function or an | |
92 # object method. | |
93 # | |
94 sub GetDescriptorNames { | |
95 return @DescriptorNames; | |
96 } | |
97 | |
98 # Initialize object data... | |
99 # | |
100 sub _InitializeMolecularComplexityDescriptors { | |
101 my($This) = @_; | |
102 | |
103 # Type of MolecularDescriptor... | |
104 $This->{Type} = 'MolecularComplexity'; | |
105 | |
106 # | |
107 # The current release of MayaChemTools supports calculation of molecular complexity | |
108 # corresponding to number of bits-set or unique keys [ Ref 117-119 ] in molecular | |
109 # fingerprints. The following types of fingerprints based molecular complexity measures | |
110 # are supported: | |
111 # | |
112 # AtomTypesFingerprints | |
113 # ExtendedConnectivityFingerprints | |
114 # MACCSKeys | |
115 # PathLengthFingerprints | |
116 # TopologicalAtomPairsFingerprints | |
117 # TopologicalAtomTripletsFingerprints | |
118 # TopologicalAtomTorsionsFingerprints | |
119 # TopologicalPharmacophoreAtomPairsFingerprints | |
120 # TopologicalPharmacophoreAtomTripletsFingerprints | |
121 # | |
122 # Default: MACCSKeys | |
123 # | |
124 $This->{MolecularComplexityType} = ''; | |
125 | |
126 # Atom types to use for generating fingerprints... | |
127 # | |
128 # Currently supported values are: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, | |
129 # EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, | |
130 # SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes | |
131 # | |
132 # Notes: | |
133 # . AtomicInvariantsAtomTypes for all supported MolecularComplexityType except for | |
134 # TopologicalPharmacophoreAtomPairsFingerprints and TopologicalPharmacophoreAtomTripletsFingerprints | |
135 # . This value is not used for MACCSKeys | |
136 # . FunctionalClassAtomTypes is the only valid value during topological pharmacophore fingerprints. | |
137 # | |
138 # . Default values for AtomicInvariantsToUse and FunctionalClassesToUse are set appropriately | |
139 # for different types of fingerprints as shown below. | |
140 # | |
141 # MolecularComplexityType AtomicInvariantsToUse | |
142 # | |
143 # AtomTypesFingerprints AS, X, BO, H, FC | |
144 # TopologicalAtomPairsFingerprints AS, X, BO, H, FC | |
145 # TopologicalAtomTripletsFingerprints AS, X, BO, H, FC | |
146 # TopologicalAtomTorsionsFingerprints AS, X, BO, H, FC | |
147 # | |
148 # ExtendedConnectivityFingerprints AS, X, BO, H, FC, MN | |
149 # PathLengthFingerprints AS | |
150 # | |
151 # Default for FunctionalClassesToUse for all fingerprints is set to: | |
152 # | |
153 # HBD, HBA, PI, NI, Ar, Hal | |
154 # | |
155 # except for the following two MolecularComplexityType fingerprints: | |
156 # | |
157 # TopologicalPharmacophoreAtomPairsFingerprints HBD, HBA, PI, NI, H | |
158 # TopologicalPharmacophoreAtomTripletsFingerprints HBD, HBA, PI, NI, H, Ar | |
159 # | |
160 $This->{AtomIdentifierType} = ''; | |
161 | |
162 # Size of MACCS key set: 166 or 322... | |
163 # | |
164 $This->{MACCSKeysSize} = 166; | |
165 | |
166 # Atomic neighborhoods radius for extended connectivity fingerprints... | |
167 $This->{NeighborhoodRadius} = 2; | |
168 | |
169 # Minimum and maximum path lengths to use for path length fingerprints... | |
170 $This->{MinPathLength} = 1; | |
171 $This->{MaxPathLength} = 8; | |
172 | |
173 # By default bond symbols are included in atom path strings used to generate path length | |
174 # fingerprints... ... | |
175 $This->{UseBondSymbols} = 1; | |
176 | |
177 # Minimum and maximum bond distance between atom pairs during topological | |
178 # atom pairs/triplets fingerprints... | |
179 $This->{MinDistance} = 1; | |
180 $This->{MaxDistance} = 10; | |
181 | |
182 # Determines whether to apply triangle inequality to distance triplets... | |
183 # | |
184 # Default for TopologicalAtomTripletsFingerprints: 0 | |
185 # Default for TopologicalPharmacophoreAtomTripletsFingerprints: 1 | |
186 # | |
187 $This->{UseTriangleInequality} = ''; | |
188 | |
189 # Distance bin size used for binning distances during generation of | |
190 # topological pharmacophore atom triplets fingerprints... | |
191 # | |
192 $This->{DistanceBinSize} = 2; | |
193 | |
194 # Normalization methodology to use for scaling the number of bits-set or unique keys | |
195 # for: | |
196 # | |
197 # ExtendedConnectivityFingerprints | |
198 # TopologicalPharmacophoreAtomPairsFingerprints | |
199 # TopologicalPharmacophoreAtomTripletsFingerprints | |
200 # | |
201 # This option is gnored for all other types of fingerprints. | |
202 # | |
203 # Possible values during extended connectivity fingerprints: None or ByHeavyAtomsCount. Default: | |
204 # None. | |
205 # | |
206 # Possible values during topological pharmacophore atom pairs and tripletes fingerprints: None, | |
207 # or ByPossibleKeysCount. Default: None. ByPossibleKeysCount corresponds to total number of | |
208 # possible topological pharmacophore atom pairs or triplets in a molecule. | |
209 # | |
210 # | |
211 $This->{NormalizationMethodology} = 'None'; | |
212 | |
213 # Intialize descriptor names and values... | |
214 $This->_InitializeDescriptorNamesAndValues(@DescriptorNames); | |
215 | |
216 return $This; | |
217 } | |
218 | |
219 # Initialize object properties... | |
220 # | |
221 sub _InitializeMolecularComplexityDescriptorsProperties { | |
222 my($This, %NamesAndValues) = @_; | |
223 | |
224 my($Name, $Value, $MethodName); | |
225 while (($Name, $Value) = each %NamesAndValues) { | |
226 $MethodName = "Set${Name}"; | |
227 $This->$MethodName($Value); | |
228 } | |
229 | |
230 # Make sure MolecularComplexityType is set... | |
231 if (!exists $NamesAndValues{MolecularComplexityType}) { | |
232 $This->{MolecularComplexityType} = 'MACCSKeys'; | |
233 } | |
234 | |
235 # Make sure AtomIdentifierType is set... | |
236 if ($This->{MolecularComplexityType} !~ /^MACCSKeys$/i) { | |
237 if (!exists $NamesAndValues{AtomIdentifierType}) { | |
238 $This->_InitializeAtomIdentifierType(); | |
239 } | |
240 } | |
241 | |
242 # Make sure UseTriangleInequality is set... | |
243 if ($This->{MolecularComplexityType} =~ /^(TopologicalAtomTripletsFingerprints|TopologicalPharmacophoreAtomTripletsFingerprints)$/i) { | |
244 if (!exists $NamesAndValues{UseTriangleInequality}) { | |
245 $This->{UseTriangleInequality} = ($This->{MolecularComplexityType} =~ /^TopologicalPharmacophoreAtomTripletsFingerprints$/i) ? 1 : 0; | |
246 } | |
247 } | |
248 | |
249 return $This; | |
250 } | |
251 | |
252 # Initialize atom identifer type... | |
253 # | |
254 sub _InitializeAtomIdentifierType { | |
255 my($This) = @_; | |
256 my($AtomIdentifierType); | |
257 | |
258 if ($This->{MolecularComplexityType} =~ /^MACCSKeys$/i) { | |
259 return $This; | |
260 } | |
261 | |
262 $AtomIdentifierType = ($This->{MolecularComplexityType} =~ /^(TopologicalPharmacophoreAtomPairsFingerprints|TopologicalPharmacophoreAtomTripletsFingerprints)$/i) ? 'FunctionalClassAtomTypes' : 'AtomicInvariantsAtomTypes'; | |
263 | |
264 $This->SetAtomIdentifierType($AtomIdentifierType); | |
265 | |
266 return $This; | |
267 } | |
268 | |
269 # Get abbreviation for specified molecular complexity type or using descriptors object... | |
270 # | |
271 # This functionality can be either invoked as a class function or an | |
272 # object method. | |
273 # | |
274 sub GetMolecularComplexityTypeAbbreviation { | |
275 my($FirstParameter) = @_; | |
276 my($This, $ComplexityType, %ComplexityTypeToAbbrev); | |
277 | |
278 if (_IsMolecularComplexityDescriptors($FirstParameter)) { | |
279 $This = $FirstParameter; | |
280 $ComplexityType = $This->{MolecularComplexityType}; | |
281 } | |
282 else { | |
283 $ComplexityType = $FirstParameter; | |
284 } | |
285 | |
286 %ComplexityTypeToAbbrev = (lc 'AtomTypesFingerprints' => 'ATFP', lc 'ExtendedConnectivityFingerprints' => 'ECFP', | |
287 lc 'MACCSKeys' => 'MACCSKeys', lc 'PathLengthFingerprints' => 'PLFP', | |
288 lc 'TopologicalAtomPairsFingerprints' => 'TAPFP', lc 'TopologicalAtomTripletsFingerprints' => 'TATFP', | |
289 lc 'TopologicalAtomTorsionsFingerprints' => 'TATFP', | |
290 lc 'TopologicalPharmacophoreAtomPairsFingerprints' => 'TPAPFP', | |
291 lc 'TopologicalPharmacophoreAtomTripletsFingerprints' => 'TPATFP'); | |
292 | |
293 return exists $ComplexityTypeToAbbrev{lc $ComplexityType} ? $ComplexityTypeToAbbrev{lc $ComplexityType} : ''; | |
294 } | |
295 | |
296 # Set MACCS key set size... | |
297 # | |
298 sub SetMACCSKeysSize { | |
299 my($This, $Value) = @_; | |
300 | |
301 if (!TextUtil::IsPositiveInteger($Value)) { | |
302 croak "Error: ${ClassName}->SetMACCSKeysSize: Size value, $Value, is not valid: It must be a positive integer..."; | |
303 } | |
304 if ($Value !~ /^(166|322)/i) { | |
305 croak "Error: ${ClassName}->SetMACCSKeysSize: The current release of MayaChemTools doesn't support MDL MACCS $Value keys..."; | |
306 } | |
307 $This->{MACCSKeysSize} = $Value; | |
308 | |
309 return $This; | |
310 } | |
311 | |
312 # Set minimum path length... | |
313 # | |
314 sub SetMinPathLength { | |
315 my($This, $Value) = @_; | |
316 | |
317 if (!TextUtil::IsPositiveInteger($Value)) { | |
318 croak "Error: ${ClassName}->SetMinPathLength: MinPathLength value, $Value, is not valid: It must be a positive integer..."; | |
319 } | |
320 $This->{MinPathLength} = $Value; | |
321 | |
322 return $This; | |
323 } | |
324 | |
325 # Set maximum path length... | |
326 # | |
327 sub SetMaxPathLength { | |
328 my($This, $Value) = @_; | |
329 | |
330 if (!TextUtil::IsPositiveInteger($Value)) { | |
331 croak "Error: ${ClassName}->SetMaxPathLength: MaxPathLength value, $Value, is not valid: It must be a positive integer..."; | |
332 } | |
333 $This->{MaxPathLength} = $Value; | |
334 | |
335 return $This; | |
336 } | |
337 | |
338 # Set minimum bond distance between atom pairs during topological and topological | |
339 # pharmacophore atom pairs/triplets fingerprints... | |
340 # | |
341 sub SetMinDistance { | |
342 my($This, $Value) = @_; | |
343 | |
344 if (!TextUtil::IsPositiveInteger($Value)) { | |
345 croak "Error: ${ClassName}->SetMinDistance: MinDistance value, $Value, is not valid: It must be a positive integer..."; | |
346 } | |
347 $This->{MinDistance} = $Value; | |
348 | |
349 return $This; | |
350 } | |
351 | |
352 # Set maximum bond distance between atom pairs during topological and topological | |
353 # pharmacophore atom pairs/triplets fingerprints... | |
354 # | |
355 sub SetMaxDistance { | |
356 my($This, $Value) = @_; | |
357 | |
358 if (!TextUtil::IsPositiveInteger($Value)) { | |
359 croak "Error: ${ClassName}->SetMaxDistance: MaxDistance value, $Value, is not valid: It must be a positive integer..."; | |
360 } | |
361 $This->{MaxDistance} = $Value; | |
362 | |
363 return $This; | |
364 } | |
365 | |
366 # Set atom neighborhood radius... | |
367 # | |
368 sub SetNeighborhoodRadius { | |
369 my($This, $Value) = @_; | |
370 | |
371 if (!TextUtil::IsInteger($Value)) { | |
372 croak "Error: ${ClassName}->SetNeighborhoodRadius: NeighborhoodRadius value, $Value, is not valid: It must be an integer..."; | |
373 } | |
374 | |
375 if ($Value < 0 ) { | |
376 croak "Error: ${ClassName}->SetNeighborhoodRadius: NeighborhoodRadius value, $Value, is not valid: It must be >= 0..."; | |
377 } | |
378 $This->{NeighborhoodRadius} = $Value; | |
379 | |
380 return $This; | |
381 } | |
382 | |
383 # Set molecular complexity type... | |
384 # | |
385 sub SetMolecularComplexityType { | |
386 my($This, $Value) = @_; | |
387 | |
388 if ($Value !~ /^(AtomTypesFingerprints|ExtendedConnectivityFingerprints|MACCSKeys|PathLengthFingerprints|TopologicalAtomPairsFingerprints|TopologicalAtomTripletsFingerprints|TopologicalAtomTorsionsFingerprints|TopologicalPharmacophoreAtomPairsFingerprints|TopologicalPharmacophoreAtomTripletsFingerprints)$/i) { | |
389 croak "Error: ${ClassName}->SetMolecularComplexityType: MolecularComplexityType value, $Value, is not valid. Supported values: AtomTypesFingerprints, ExtendedConnectivityFingerprints, MACCSKeys, PathLengthFingerprints, TopologicalAtomPairsFingerprints, TopologicalAtomTripletsFingerprints, TopologicalAtomTorsionsFingerprints, TopologicalPharmacophoreAtomPairsFingerprints, or TopologicalPharmacophoreAtomTripletsFingerprints..."; | |
390 } | |
391 | |
392 $This->{MolecularComplexityType} = $Value; | |
393 | |
394 return $This; | |
395 } | |
396 | |
397 # Set distance bin size for binning pharmacophore atom pair distances in atom triplets... | |
398 # | |
399 sub SetDistanceBinSize { | |
400 my($This, $Value) = @_; | |
401 | |
402 if (!TextUtil::IsPositiveInteger($Value)) { | |
403 croak "Error: ${ClassName}->SetDistanceBinSize: DistanceBinSize value, $Value, is not valid: It must be a positive integer..."; | |
404 } | |
405 $This->{DistanceBinSize} = $Value; | |
406 | |
407 return $This; | |
408 } | |
409 | |
410 # Set normalization methodology to use for scaling the number of bits-set or unique keys | |
411 # in fingerprints... | |
412 # | |
413 sub SetNormalizationMethodology { | |
414 my($This, $Value) = @_; | |
415 | |
416 if ($Value !~ /^(ByHeavyAtomsCount|ByPossibleKeysCount|None)$/i) { | |
417 croak "Error: ${ClassName}->SetNormalizationMethodology: NormalizationMethodology value, $Value, is not valid. Supported values: None, ByHeavyAtomsCount or ByPossibleKeysCount..."; | |
418 } | |
419 | |
420 if ($This->{MolecularComplexityType}) { | |
421 if ($This->{MolecularComplexityType} !~ /^(ExtendedConnectivityFingerprints|TopologicalPharmacophoreAtomPairsFingerprints|TopologicalPharmacophoreAtomTripletsFingerprints)$/i) { | |
422 croak "Error: ${ClassName}->SetNormalizationMethodology: Normalization is not supported for MolecularComplexityType: $This->{MolecularComplexityType}. Valid MolecularComplexityType values: ExtendedConnectivityFingerprints, TopologicalPharmacophoreAtomPairsFingerprints, or TopologicalPharmacophoreAtomTripletsFingerprints...\n"; | |
423 } | |
424 | |
425 if ($This->{MolecularComplexityType} =~ /^ExtendedConnectivityFingerprints$/i && $Value !~ /^(ByHeavyAtomsCount|None)$/i) { | |
426 croak "Error: ${ClassName}->SetNormalizationMethodology: NormalizationMethodology value, $Value, is not valid for MolecularComplexityType: $This->{MolecularComplexityType}. Supported values: None or ByHeavyAtomsCount..."; | |
427 } | |
428 | |
429 if ($This->{MolecularComplexityType} =~ /^(TopologicalPharmacophoreAtomPairsFingerprints|TopologicalPharmacophoreAtomTripletsFingerprints)$/i && $Value !~ /^(ByPossibleKeysCount|None)$/i) { | |
430 croak "Error: ${ClassName}->SetNormalizationMethodology: NormalizationMethodology value, $Value, is not valid for MolecularComplexityType: $This->{MolecularComplexityType}. Supported values: None or ByPossibleKeysCount..."; | |
431 } | |
432 } | |
433 | |
434 $This->{NormalizationMethodology} = $Value; | |
435 | |
436 return $This; | |
437 } | |
438 | |
439 # Set intial atom identifier type.. | |
440 # | |
441 sub SetAtomIdentifierType { | |
442 my($This, $IdentifierType) = @_; | |
443 | |
444 if ($IdentifierType !~ /^(AtomicInvariantsAtomTypes|FunctionalClassAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { | |
445 croak "Error: ${ClassName}->SetAtomIdentifierType: Specified value, $IdentifierType, for AtomIdentifierType is not vaild. Supported types in current release of MayaChemTools: AtomicInvariantsAtomTypes, FunctionalClassAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes and UFFAtomTypes."; | |
446 } | |
447 | |
448 # FunctionalClassAtomTypes is the only valid atom identifier type for pharmacophore fingerprints... | |
449 if ($This->{MolecularComplexityType} =~ /^(TopologicalPharmacophoreAtomPairsFingerprints|TopologicalPharmacophoreAtomTripletsFingerprints)$/i) { | |
450 if ($IdentifierType !~ /^FunctionalClassAtomTypes$/i) { | |
451 croak "Error: ${ClassName}->SetAtomIdentifierType: Specified value, $IdentifierType, for AtomIdentifierType is not vaild. Supported type for $This->{MolecularComplexityType} complexity type: FunctionalClassAtomTypes."; | |
452 } | |
453 } | |
454 | |
455 if ($This->{AtomIdentifierType}) { | |
456 croak "Error: ${ClassName}->SetAtomIdentifierType: Can't change intial atom identifier type: It's already set..."; | |
457 } | |
458 | |
459 $This->{AtomIdentifierType} = $IdentifierType; | |
460 | |
461 # Initialize identifier type information... | |
462 $This->_InitializeAtomIdentifierTypeInformation(); | |
463 | |
464 return $This; | |
465 } | |
466 | |
467 # Calculate molecular complexity [ Ref 117-119 ] of a molecule using its fingerprints. | |
468 # | |
469 # The current release of MayaChemTools supports calculation of molecular complexity | |
470 # corresponding to the number of bits-set or unique keys in molecular fingerprints. The | |
471 # following types of fingerprints based molecular complexity measures are supported: | |
472 # | |
473 # AtomTypesFingerprints | |
474 # ExtendedConnectivityFingerprints | |
475 # MACCSKeys | |
476 # PathLengthFingerprints | |
477 # TopologicalAtomPairsFingerprints | |
478 # TopologicalAtomTripletsFingerprints | |
479 # TopologicalAtomTorsionsFingerprints | |
480 # TopologicalPharmacophoreAtomPairsFingerprints | |
481 # TopologicalPharmacophoreAtomTripletsFingerprints | |
482 # | |
483 # After the molecular complexity value has been calculated, it can also be normalized by | |
484 # by scaling the number of bits-set or unique keys for following types of fingerprints: | |
485 # | |
486 # ExtendedConnectivityFingerprints | |
487 # TopologicalPharmacophoreAtomPairsFingerprints | |
488 # TopologicalPharmacophoreAtomTripletsFingerprints | |
489 # | |
490 # Two types of normalization methodologies are supported: by heavy atoms count for | |
491 # extended connectivity fingerprints; by possible keys count for topological pharmacophore | |
492 # atom pairs and triplets fingerprints. | |
493 # | |
494 sub GenerateDescriptors { | |
495 my($This) = @_; | |
496 | |
497 # Initialize descriptor values... | |
498 $This->_InitializeDescriptorValues(); | |
499 | |
500 # Check availability of molecule... | |
501 if (!$This->{Molecule}) { | |
502 carp "Warning: ${ClassName}->GenerateDescriptors: $This->{Type} molecular descriptors generation didn't succeed: Molecule data is not available: Molecule object hasn't been set..."; | |
503 return undef; | |
504 } | |
505 | |
506 # Calculate descriptor values... | |
507 if (!$This->_CalculateDescriptorValues()) { | |
508 carp "Warning: ${ClassName}->GenerateDescriptors: $This->{Type} molecular descriptors generation didn't succeed: Couldn't calculate MolecularComplexity values corresponding to assigned MolecularComplexity atom types..."; | |
509 return undef; | |
510 } | |
511 | |
512 # Set final descriptor values... | |
513 $This->_SetFinalDescriptorValues(); | |
514 | |
515 return $This; | |
516 } | |
517 | |
518 # Calculate molecular complexity value... | |
519 # | |
520 sub _CalculateDescriptorValues { | |
521 my($This) = @_; | |
522 my($FingerprintsObject, $MethodName); | |
523 | |
524 # Setup fingerprints object and generate fingerprints... | |
525 $MethodName = "_Setup" . $This->{MolecularComplexityType}; | |
526 $FingerprintsObject = $This->$MethodName(); | |
527 | |
528 $FingerprintsObject->GenerateFingerprints(); | |
529 | |
530 # Make sure atom types fingerprints generation is successful... | |
531 if (!$FingerprintsObject->IsFingerprintsGenerationSuccessful()) { | |
532 return undef; | |
533 } | |
534 | |
535 if (!$This->_CalculateMolecularComplexity($FingerprintsObject)) { | |
536 return undef; | |
537 } | |
538 | |
539 # Normalize molecular complexity... | |
540 if ($This->{NormalizationMethodology} !~ /^None$/i) { | |
541 if (!$This->_NormalizeMolecularComplexity($FingerprintsObject)) { | |
542 return undef; | |
543 } | |
544 } | |
545 | |
546 return $This; | |
547 } | |
548 | |
549 # Setup atom types fingerprints... | |
550 # | |
551 sub _SetupAtomTypesFingerprints { | |
552 my($This) = @_; | |
553 my($FingerprintsObject); | |
554 | |
555 $FingerprintsObject = new Fingerprints::AtomTypesFingerprints('Molecule' => $This->{Molecule}, 'Type' => 'AtomTypesCount', 'AtomIdentifierType' => $This->{AtomIdentifierType}, 'IgnoreHydrogens' => 1); | |
556 $This->_SetAtomIdentifierTypeValuesToUse($FingerprintsObject); | |
557 | |
558 return $FingerprintsObject; | |
559 } | |
560 | |
561 # Setup extended connectivity fingerprints... | |
562 # | |
563 sub _SetupExtendedConnectivityFingerprints { | |
564 my($This) = @_; | |
565 my($FingerprintsObject); | |
566 | |
567 $FingerprintsObject = new Fingerprints::ExtendedConnectivityFingerprints('Molecule' => $This->{Molecule}, 'Type' => 'ExtendedConnectivity', 'NeighborhoodRadius' => $This->{NeighborhoodRadius}, 'AtomIdentifierType' => $This->{AtomIdentifierType}); | |
568 $This->_SetAtomIdentifierTypeValuesToUse($FingerprintsObject); | |
569 | |
570 return $FingerprintsObject; | |
571 } | |
572 | |
573 # Setup MACCS keys... | |
574 # | |
575 sub _SetupMACCSKeys { | |
576 my($This) = @_; | |
577 my($FingerprintsObject); | |
578 | |
579 $FingerprintsObject = new Fingerprints::MACCSKeys('Molecule' => $This->{Molecule}, 'Type' => 'MACCSKeyBits', 'Size' => $This->{MACCSKeysSize}); | |
580 | |
581 return $FingerprintsObject; | |
582 } | |
583 | |
584 # Set up path length fingerprints... | |
585 # | |
586 sub _SetupPathLengthFingerprints { | |
587 my($This) = @_; | |
588 my($FingerprintsObject); | |
589 | |
590 $FingerprintsObject = new Fingerprints::PathLengthFingerprints('Molecule' => $This->{Molecule}, 'Type' => 'PathLengthCount', 'AtomIdentifierType' => $This->{AtomIdentifierType}, 'MinLength' => $This->{MinPathLength}, 'MaxLength' => $This->{MaxPathLength}, 'AllowRings' => 1, 'AllowSharedBonds' => 1, 'UseBondSymbols' => $This->{UseBondSymbols}, 'UseUniquePaths' => 1); | |
591 $This->_SetAtomIdentifierTypeValuesToUse($FingerprintsObject); | |
592 | |
593 return $FingerprintsObject; | |
594 } | |
595 | |
596 # Setup topological atom pairs fingerprints... | |
597 # | |
598 sub _SetupTopologicalAtomPairsFingerprints { | |
599 my($This) = @_; | |
600 my($FingerprintsObject); | |
601 | |
602 $FingerprintsObject = new Fingerprints::TopologicalAtomPairsFingerprints('Molecule' => $This->{Molecule}, 'MinDistance' => $This->{MinDistance}, 'MaxDistance' => $This->{MaxDistance}, 'AtomIdentifierType' => $This->{AtomIdentifierType}); | |
603 $This->_SetAtomIdentifierTypeValuesToUse($FingerprintsObject); | |
604 | |
605 return $FingerprintsObject; | |
606 } | |
607 | |
608 # Setup topological atom triplets fingerprints... | |
609 # | |
610 sub _SetupTopologicalAtomTripletsFingerprints { | |
611 my($This) = @_; | |
612 my($FingerprintsObject); | |
613 | |
614 $FingerprintsObject = new Fingerprints::TopologicalAtomTripletsFingerprints('Molecule' => $This->{Molecule}, 'MinDistance' => $This->{MinDistance}, 'MaxDistance' => $This->{MaxDistance}, 'UseTriangleInequality' => $This->{UseTriangleInequality}, 'AtomIdentifierType' => $This->{AtomIdentifierType}); | |
615 $This->_SetAtomIdentifierTypeValuesToUse($FingerprintsObject); | |
616 | |
617 return $FingerprintsObject; | |
618 } | |
619 | |
620 # Setup topological atom torsions fingerprints... | |
621 # | |
622 sub _SetupTopologicalAtomTorsionsFingerprints { | |
623 my($This) = @_; | |
624 my($FingerprintsObject); | |
625 | |
626 $FingerprintsObject = new Fingerprints::TopologicalAtomTorsionsFingerprints('Molecule' => $This->{Molecule}, 'AtomIdentifierType' => $This->{AtomIdentifierType}); | |
627 | |
628 $This->_SetAtomIdentifierTypeValuesToUse($FingerprintsObject); | |
629 | |
630 return $FingerprintsObject; | |
631 } | |
632 | |
633 # Setup TopologicalPharmacophoreAtomPairsFingerprints... | |
634 # | |
635 sub _SetupTopologicalPharmacophoreAtomPairsFingerprints { | |
636 my($This) = @_; | |
637 my($FingerprintsObject, $AtomPairsSetSizeToUse); | |
638 | |
639 # Use fixed size to get total number of possible keys for normalization... | |
640 $AtomPairsSetSizeToUse = ($This->{NormalizationMethodology} =~ /^ByPossibleKeysCount$/i) ? 'FixedSize' : 'ArbitrarySize'; | |
641 | |
642 $FingerprintsObject = new Fingerprints::TopologicalPharmacophoreAtomPairsFingerprints('Molecule' => $This->{Molecule}, 'AtomPairsSetSizeToUse' => $AtomPairsSetSizeToUse, 'MinDistance' => $This->{MinDistance}, 'MaxDistance' => $This->{MaxDistance}, 'AtomTypesToUse' => \@{$This->{FunctionalClassesToUse}}, 'NormalizationMethodology' => 'None', 'ValuesPrecision' => 2); | |
643 | |
644 return $FingerprintsObject; | |
645 } | |
646 | |
647 # Setup TopologicalPharmacophoreAtomTripletsFingerprints... | |
648 # | |
649 sub _SetupTopologicalPharmacophoreAtomTripletsFingerprints { | |
650 my($This) = @_; | |
651 my($FingerprintsObject, $AtomTripletsSetSizeToUse); | |
652 | |
653 # Use fixed size to get total number of possible keys for normalization... | |
654 $AtomTripletsSetSizeToUse = ($This->{NormalizationMethodology} =~ /^ByPossibleKeysCount$/i) ? 'FixedSize' : 'ArbitrarySize'; | |
655 | |
656 $FingerprintsObject = new Fingerprints::TopologicalPharmacophoreAtomTripletsFingerprints('Molecule' => $This->{Molecule}, 'AtomTripletsSetSizeToUse' => $AtomTripletsSetSizeToUse, 'MinDistance' => $This->{MinDistance}, 'MaxDistance' => $This->{MaxDistance}, 'DistanceBinSize' => $This->{DistanceBinSize}, 'UseTriangleInequality' => $This->{UseTriangleInequality}, 'AtomTypesToUse' => \@{$This->{FunctionalClassesToUse}}); | |
657 | |
658 return $FingerprintsObject; | |
659 } | |
660 | |
661 # Normalize molecular complexity value... | |
662 # | |
663 sub _NormalizeMolecularComplexity { | |
664 my($This, $FingerprintsObject) = @_; | |
665 | |
666 if ($This->{MolecularComplexityType} =~ /^ExtendedConnectivityFingerprints$/i && $This->{NormalizationMethodology} =~ /^ByHeavyAtomsCount$/i) { | |
667 return $This->_NormalizeMolecularComplexityByHeavyAtomsCount($FingerprintsObject); | |
668 } | |
669 elsif ($This->{MolecularComplexityType} =~ /^(TopologicalPharmacophoreAtomPairsFingerprints|TopologicalPharmacophoreAtomTripletsFingerprints)$/i && $This->{NormalizationMethodology} =~ /^ByPossibleKeysCount$/i) { | |
670 return $This->_NormalizeMolecularComplexityByPossibleKeysCount($FingerprintsObject); | |
671 } | |
672 else { | |
673 warn "Warning: ${ClassName}->_NormalizeMolecularComplexity: NormalizationMethodology value, $This->{NormalizationMethodology}, is not valid. Supported values: ByHeavyAtomsCount or ByPossibleKeysCount..."; | |
674 } | |
675 return undef; | |
676 } | |
677 | |
678 # Normalize molecular complexity value by heavy atom count... | |
679 # | |
680 sub _NormalizeMolecularComplexityByHeavyAtomsCount { | |
681 my($This, $FingerprintsObject) = @_; | |
682 my($NumOfHeavyAtoms, $NormalizedComplexity); | |
683 | |
684 $NumOfHeavyAtoms = $This->{Molecule}->GetNumOfHeavyAtoms(); | |
685 if (!$NumOfHeavyAtoms) { | |
686 return $This; | |
687 } | |
688 | |
689 $NormalizedComplexity = $This->{MolecularComplexity} / $NumOfHeavyAtoms; | |
690 $This->{MolecularComplexity} = MathUtil::round($NormalizedComplexity, 2) + 0; | |
691 | |
692 return $This; | |
693 } | |
694 | |
695 # Normalize molecular complexity value by possible keys count... | |
696 # | |
697 sub _NormalizeMolecularComplexityByPossibleKeysCount { | |
698 my($This, $FingerprintsObject) = @_; | |
699 my($NumOfPossibleKeys, $NormalizedComplexity); | |
700 | |
701 $NumOfPossibleKeys = $FingerprintsObject->GetFingerprintsVector()->GetNumOfValues(); | |
702 if (!$NumOfPossibleKeys) { | |
703 return $This; | |
704 } | |
705 | |
706 $NormalizedComplexity = $This->{MolecularComplexity} / $NumOfPossibleKeys; | |
707 $This->{MolecularComplexity} = MathUtil::round($NormalizedComplexity, 2) + 0; | |
708 | |
709 return $This; | |
710 } | |
711 | |
712 # Calculate molecular complexity value using fingerprints objects... | |
713 # | |
714 sub _CalculateMolecularComplexity { | |
715 my($This, $FingerprintsObject) = @_; | |
716 | |
717 if ($FingerprintsObject->GetVectorType() =~ /^FingerprintsBitVector$/i) { | |
718 return $This->_CalculateMolecularComplexityUsingFingerprintsBitVector($FingerprintsObject->GetFingerprintsBitVector()); | |
719 } | |
720 elsif ($FingerprintsObject->GetVectorType() =~ /^FingerprintsVector$/i) { | |
721 return $This->_CalculateMolecularComplexityUsingFingerprintsVector($FingerprintsObject->GetFingerprintsVector()); | |
722 } | |
723 else { | |
724 warn "Warning: ${ClassName}->_CalculateMolecularComplexity: Fingerprints vector type is not valid. Supported values: FingerprintsBitVector or FingerprintsVector..."; | |
725 } | |
726 | |
727 return undef; | |
728 } | |
729 | |
730 # Calculate molecular complexity value using fingerprints vector... | |
731 # | |
732 sub _CalculateMolecularComplexityUsingFingerprintsVector { | |
733 my($This, $FingerprintsVector) = @_; | |
734 | |
735 $This->{MolecularComplexity} = ($FingerprintsVector->GetType() =~ /^(OrderedNumericalValues|NumericalValues)$/i) ? $FingerprintsVector->GetNumOfNonZeroValues() : $FingerprintsVector->GetNumOfValues(); | |
736 | |
737 return $This; | |
738 } | |
739 | |
740 # Calculate molecular complexity value using fingerprints vector... | |
741 # | |
742 sub _CalculateMolecularComplexityUsingFingerprintsBitVector { | |
743 my($This, $FingerprintsBitVector) = @_; | |
744 | |
745 $This->{MolecularComplexity} = $FingerprintsBitVector->GetNumOfSetBits(); | |
746 | |
747 return $This; | |
748 } | |
749 | |
750 # Setup final descriptor values... | |
751 # | |
752 sub _SetFinalDescriptorValues { | |
753 my($This) = @_; | |
754 | |
755 $This->{DescriptorsGenerated} = 1; | |
756 | |
757 $This->SetDescriptorValues($This->{MolecularComplexity}); | |
758 | |
759 return $This; | |
760 } | |
761 | |
762 # Set atom identifier type to use for generating fingerprints... | |
763 # | |
764 sub _SetAtomIdentifierTypeValuesToUse { | |
765 my($This, $FingerprintsObject) = @_; | |
766 | |
767 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { | |
768 $FingerprintsObject->SetAtomicInvariantsToUse(\@{$This->{AtomicInvariantsToUse}}); | |
769 } | |
770 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { | |
771 $FingerprintsObject->SetFunctionalClassesToUse(\@{$This->{FunctionalClassesToUse}}); | |
772 } | |
773 elsif ($This->{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { | |
774 # Nothing to do for now... | |
775 } | |
776 else { | |
777 croak "Error: The value specified, $This->{AtomIdentifierType}, for option \"-a, --AtomIdentifierType\" is not valid. Supported atom identifier types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes\n"; | |
778 } | |
779 } | |
780 | |
781 # Initialize atom indentifier type information... | |
782 # | |
783 # Current supported values: | |
784 # | |
785 # AtomicInvariantsAtomTypes, FunctionalClassAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, | |
786 # MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes | |
787 # | |
788 sub _InitializeAtomIdentifierTypeInformation { | |
789 my($This) = @_; | |
790 | |
791 IDENTIFIERTYPE: { | |
792 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { | |
793 $This->_InitializeAtomicInvariantsAtomTypesInformation(); | |
794 last IDENTIFIERTYPE; | |
795 } | |
796 if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { | |
797 $This->_InitializeFunctionalClassAtomTypesInformation(); | |
798 last IDENTIFIERTYPE; | |
799 } | |
800 if ($This->{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { | |
801 # Nothing to do for now... | |
802 last IDENTIFIERTYPE; | |
803 } | |
804 carp "Warning: ${ClassName}->_InitializeAtomIdentifierTypeInformation: Unknown atom indentifier type $This->{AtomIdentifierType}..."; | |
805 } | |
806 return $This; | |
807 } | |
808 | |
809 # Initialize atomic invariants atom types, generated by AtomTypes::AtomicInvariantsAtomTypes | |
810 # class, to use for generating initial atom identifiers... | |
811 # | |
812 # Let: | |
813 # AS = Atom symbol corresponding to element symbol | |
814 # | |
815 # X<n> = Number of non-hydrogen atom neighbors or heavy atoms attached to atom | |
816 # BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms attached to atom | |
817 # LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms attached to atom | |
818 # SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms attached to atom | |
819 # DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms attached to atom | |
820 # TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms attached to atom | |
821 # H<n> = Number of implicit and explicit hydrogens for atom | |
822 # Ar = Aromatic annotation indicating whether atom is aromatic | |
823 # RA = Ring atom annotation indicating whether atom is a ring | |
824 # FC<+n/-n> = Formal charge assigned to atom | |
825 # MN<n> = Mass number indicating isotope other than most abundant isotope | |
826 # SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or 3 (triplet) | |
827 # | |
828 # Then: | |
829 # | |
830 # Atom type generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to: | |
831 # | |
832 # AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n> | |
833 # | |
834 # Except for AS which is a required atomic invariant in atom types, all other atomic invariants are | |
835 # optional. | |
836 # | |
837 # Default atomic invariants used for generating inital atom identifiers are [ Ref 24 ]: | |
838 # | |
839 # AS, X<n>, BO<n>, H<n>, FC<+n/-n>, MN<n> | |
840 # | |
841 # In addition to usage of abbreviations for specifying atomic invariants, the following descriptive words | |
842 # are also allowed: | |
843 # | |
844 # X : NumOfNonHydrogenAtomNeighbors or NumOfHeavyAtomNeighbors | |
845 # BO : SumOfBondOrdersToNonHydrogenAtoms or SumOfBondOrdersToHeavyAtoms | |
846 # LBO : LargestBondOrderToNonHydrogenAtoms or LargestBondOrderToHeavyAtoms | |
847 # SB : NumOfSingleBondsToNonHydrogenAtoms or NumOfSingleBondsToHeavyAtoms | |
848 # DB : NumOfDoubleBondsToNonHydrogenAtoms or NumOfDoubleBondsToHeavyAtoms | |
849 # TB : NumOfTripleBondsToNonHydrogenAtoms or NumOfTripleBondsToHeavyAtoms | |
850 # H : NumOfImplicitAndExplicitHydrogens | |
851 # Ar : Aromatic | |
852 # RA : RingAtom | |
853 # FC : FormalCharge | |
854 # MN : MassNumber | |
855 # SM : SpinMultiplicity | |
856 # | |
857 sub _InitializeAtomicInvariantsAtomTypesInformation { | |
858 my($This) = @_; | |
859 | |
860 @{$This->{AtomicInvariantsToUse}} = (); | |
861 | |
862 if ($This->{MolecularComplexityType} =~ /^(AtomTypesFingerprints|TopologicalAtomPairsFingerprints|TopologicalAtomTripletsFingerprints|TopologicalAtomTorsionsFingerprints)$/i) { | |
863 @{$This->{AtomicInvariantsToUse}} = ('AS', 'X', 'BO', 'H', 'FC'); | |
864 } | |
865 elsif ($This->{MolecularComplexityType} =~ /^ExtendedConnectivityFingerprints$/i) { | |
866 @{$This->{AtomicInvariantsToUse}} = ('AS', 'X', 'BO', 'H', 'FC', 'MN'); | |
867 } | |
868 elsif ($This->{MolecularComplexityType} =~ /^PathLengthFingerprints$/i) { | |
869 @{$This->{AtomicInvariantsToUse}} = ('AS'); | |
870 } | |
871 | |
872 return $This; | |
873 } | |
874 | |
875 # Initialize functional class atom types, generated by AtomTypes::FunctionalClassAtomTypes | |
876 # class, to use for generating initial atom identifiers... | |
877 # | |
878 # Let: | |
879 # HBD: HydrogenBondDonor | |
880 # HBA: HydrogenBondAcceptor | |
881 # PI : PositivelyIonizable | |
882 # NI : NegativelyIonizable | |
883 # Ar : Aromatic | |
884 # Hal : Halogen | |
885 # H : Hydrophobic | |
886 # RA : RingAtom | |
887 # CA : ChainAtom | |
888 # | |
889 # Then: | |
890 # | |
891 # Functiononal class atom type specification for an atom corresponds to: | |
892 # | |
893 # Ar.CA.H.HBA.HBD.Hal.NI.PI.RA | |
894 # | |
895 # Default functional classes used are: HBD, HBA, PI, NI, Ar, Hal | |
896 # | |
897 # FunctionalAtomTypes are assigned using the following definitions [ Ref 60-61, Ref 65-66 ]: | |
898 # | |
899 # HydrogenBondDonor: NH, NH2, OH | |
900 # HydrogenBondAcceptor: N[!H], O | |
901 # PositivelyIonizable: +, NH2 | |
902 # NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH | |
903 # | |
904 sub _InitializeFunctionalClassAtomTypesInformation { | |
905 my($This) = @_; | |
906 | |
907 @{$This->{FunctionalClassesToUse}} = (); | |
908 | |
909 if ($This->{MolecularComplexityType} =~ /^(AtomTypesFingerprints|ExtendedConnectivityFingerprints|PathLengthFingerprints|TopologicalAtomPairsFingerprints|TopologicalAtomTripletsFingerprints|TopologicalAtomTorsionsFingerprints)$/i) { | |
910 @{$This->{FunctionalClassesToUse}} = ('HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal'); | |
911 } | |
912 elsif ($This->{MolecularComplexityType} =~ /^TopologicalPharmacophoreAtomPairsFingerprints$/i) { | |
913 @{$This->{FunctionalClassesToUse}} = ('HBD', 'HBA', 'PI', 'NI', 'H'); | |
914 } | |
915 elsif ($This->{MolecularComplexityType} =~ /^TopologicalPharmacophoreAtomTripletsFingerprints$/i) { | |
916 @{$This->{FunctionalClassesToUse}} = ('HBD', 'HBA', 'PI', 'NI', 'H', 'Ar'); | |
917 } | |
918 | |
919 return $This; | |
920 } | |
921 | |
922 # Set atomic invariants to use for generation of intial atom indentifiers... | |
923 # | |
924 sub SetAtomicInvariantsToUse { | |
925 my($This, @Values) = @_; | |
926 my($FirstValue, $TypeOfFirstValue, $AtomicInvariant, $SpecifiedAtomicInvariant, @SpecifiedAtomicInvariants, @AtomicInvariantsToUse); | |
927 | |
928 if (!@Values) { | |
929 carp "Warning: ${ClassName}->SetAtomicInvariantsToUse: No values specified..."; | |
930 return; | |
931 } | |
932 | |
933 if ($This->{AtomIdentifierType} !~ /^AtomicInvariantsAtomTypes$/i) { | |
934 carp "Warning: ${ClassName}->SetAtomicInvariantsToUse: AtomicInvariantsToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}..."; | |
935 return; | |
936 } | |
937 | |
938 $FirstValue = $Values[0]; | |
939 $TypeOfFirstValue = ref $FirstValue; | |
940 | |
941 @SpecifiedAtomicInvariants = (); | |
942 @AtomicInvariantsToUse = (); | |
943 | |
944 if ($TypeOfFirstValue =~ /^ARRAY/) { | |
945 push @SpecifiedAtomicInvariants, @{$FirstValue}; | |
946 } | |
947 else { | |
948 push @SpecifiedAtomicInvariants, @Values; | |
949 } | |
950 | |
951 # Make sure specified AtomicInvariants are valid... | |
952 for $SpecifiedAtomicInvariant (@SpecifiedAtomicInvariants) { | |
953 if (!AtomTypes::AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($SpecifiedAtomicInvariant)) { | |
954 croak "Error: ${ClassName}->SetAtomicInvariantsToUse: Specified atomic invariant, $SpecifiedAtomicInvariant, is not supported...\n "; | |
955 } | |
956 $AtomicInvariant = $SpecifiedAtomicInvariant; | |
957 push @AtomicInvariantsToUse, $AtomicInvariant; | |
958 } | |
959 | |
960 # Set atomic invariants to use... | |
961 @{$This->{AtomicInvariantsToUse}} = (); | |
962 push @{$This->{AtomicInvariantsToUse}}, @AtomicInvariantsToUse; | |
963 | |
964 return $This; | |
965 } | |
966 | |
967 # Set functional classes to use for generation of intial atom indentifiers... | |
968 # | |
969 sub SetFunctionalClassesToUse { | |
970 my($This, @Values) = @_; | |
971 my($FirstValue, $TypeOfFirstValue, $FunctionalClass, $SpecifiedFunctionalClass, @SpecifiedFunctionalClasses, @FunctionalClassesToUse); | |
972 | |
973 if (!@Values) { | |
974 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: No values specified..."; | |
975 return; | |
976 } | |
977 | |
978 if ($This->{AtomIdentifierType} !~ /^FunctionalClassAtomTypes$/i) { | |
979 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: FunctionalClassesToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}..."; | |
980 return; | |
981 } | |
982 | |
983 $FirstValue = $Values[0]; | |
984 $TypeOfFirstValue = ref $FirstValue; | |
985 | |
986 @SpecifiedFunctionalClasses = (); | |
987 @FunctionalClassesToUse = (); | |
988 | |
989 if ($TypeOfFirstValue =~ /^ARRAY/) { | |
990 push @SpecifiedFunctionalClasses, @{$FirstValue}; | |
991 } | |
992 else { | |
993 push @SpecifiedFunctionalClasses, @Values; | |
994 } | |
995 | |
996 # Make sure specified FunctionalClasses are valid... | |
997 for $SpecifiedFunctionalClass (@SpecifiedFunctionalClasses) { | |
998 if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedFunctionalClass)) { | |
999 croak "Error: ${ClassName}->SetFunctionalClassesToUse: Specified functional class, $SpecifiedFunctionalClass, is not supported...\n "; | |
1000 } | |
1001 push @FunctionalClassesToUse, $SpecifiedFunctionalClass; | |
1002 } | |
1003 | |
1004 # Set functional classes to use... | |
1005 @{$This->{FunctionalClassesToUse}} = (); | |
1006 push @{$This->{FunctionalClassesToUse}}, @FunctionalClassesToUse; | |
1007 | |
1008 return $This; | |
1009 } | |
1010 | |
1011 # Return a string containg data for MolecularComplexityDescriptors object... | |
1012 # | |
1013 sub StringifyMolecularComplexityDescriptors { | |
1014 my($This) = @_; | |
1015 my($ComplexityDescriptorsString, $Nothing); | |
1016 | |
1017 $ComplexityDescriptorsString = "MolecularDescriptorType: $This->{Type}; MolecularComplexityType: $This->{MolecularComplexityType}; " . $This->_StringifyDescriptorNamesAndValues(); | |
1018 | |
1019 # Setup fingerprints specific information... | |
1020 if ($This->{MolecularComplexityType} =~ /^MACCSKeys$/i) { | |
1021 $ComplexityDescriptorsString .= "; MACCSKeysSize = $This->{MACCSKeysSize}"; | |
1022 } | |
1023 elsif ($This->{MolecularComplexityType} =~ /^ExtendedConnectivityFingerprints$/i) { | |
1024 $ComplexityDescriptorsString .= "; NeighborhoodRadius = $This->{NeighborhoodRadius}; NormalizationMethodology = $This->{NormalizationMethodology}"; | |
1025 } | |
1026 elsif ($This->{MolecularComplexityType} =~ /^PathLengthFingerprints$/i) { | |
1027 $ComplexityDescriptorsString .= "; MinPathLength = $This->{MinPathLength}; MaxPathLength = $This->{MaxPathLength}; UseBondSymbols: " . ($This->{UseBondSymbols} ? "Yes" : "No"); | |
1028 } | |
1029 elsif ($This->{MolecularComplexityType} =~ /^TopologicalAtomPairsFingerprints$/i) { | |
1030 $ComplexityDescriptorsString .= "; MinDistance = $This->{MinDistance}; MaxDistance = $This->{MaxDistance}"; | |
1031 } | |
1032 elsif ($This->{MolecularComplexityType} =~ /^TopologicalAtomTripletsFingerprints$/i) { | |
1033 $ComplexityDescriptorsString .= "; MinDistance = $This->{MinDistance}; MaxDistance = $This->{MaxDistance}; UseTriangleInequality: " . ($This->{UseTriangleInequality} ? "Yes" : "No"); | |
1034 } | |
1035 elsif ($This->{MolecularComplexityType} =~ /^TopologicalAtomTorsionsFingerprints$/i) { | |
1036 $ComplexityDescriptorsString .= "; MinDistance = $This->{MinDistance}; MaxDistance = $This->{MaxDistance}"; | |
1037 } | |
1038 elsif ($This->{MolecularComplexityType} =~ /^TopologicalPharmacophoreAtomPairsFingerprints$/i) { | |
1039 $ComplexityDescriptorsString .= "; MinDistance = $This->{MinDistance}; MaxDistance = $This->{MaxDistance}; NormalizationMethodology = $This->{NormalizationMethodology}"; | |
1040 } | |
1041 elsif ($This->{MolecularComplexityType} =~ /^TopologicalPharmacophoreAtomTripletsFingerprints$/i) { | |
1042 $ComplexityDescriptorsString .= "; MinDistance = $This->{MinDistance}; MaxDistance = $This->{MaxDistance}; NormalizationMethodology = $This->{NormalizationMethodology}; DistanceBinSize: $This->{DistanceBinSize}; UseTriangleInequality: " . ($This->{UseTriangleInequality} ? "Yes" : "No"); | |
1043 } | |
1044 | |
1045 # Setup atom identifier information... | |
1046 if ($This->{MolecularComplexityType} =~ /^(AtomTypesFingerprints|ExtendedConnectivityFingerprints|PathLengthFingerprints|TopologicalAtomPairsFingerprints|TopologicalAtomTripletsFingerprints|TopologicalAtomTorsionsFingerprints|TopologicalPharmacophoreAtomPairsFingerprints|TopologicalPharmacophoreAtomTripletsFingerprints)$/i) { | |
1047 $ComplexityDescriptorsString .= "; AtomIdentifierType = $This->{AtomIdentifierType}"; | |
1048 | |
1049 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { | |
1050 my($AtomicInvariant, @AtomicInvariants, @AtomicInvariantsOrder, %AvailableAtomicInvariants); | |
1051 | |
1052 @AtomicInvariantsOrder = AtomTypes::AtomicInvariantsAtomTypes::GetAtomicInvariantsOrder(); | |
1053 %AvailableAtomicInvariants = AtomTypes::AtomicInvariantsAtomTypes::GetAvailableAtomicInvariants(); | |
1054 | |
1055 for $AtomicInvariant (@AtomicInvariantsOrder) { | |
1056 push @AtomicInvariants, "$AtomicInvariant: $AvailableAtomicInvariants{$AtomicInvariant}"; | |
1057 } | |
1058 | |
1059 $ComplexityDescriptorsString .= "; AtomicInvariantsToUse: <" . TextUtil::JoinWords(\@{$This->{AtomicInvariantsToUse}}, ", ", 0) . ">"; | |
1060 $ComplexityDescriptorsString .= "; AtomicInvariantsOrder: <" . TextUtil::JoinWords(\@AtomicInvariantsOrder, ", ", 0) . ">"; | |
1061 $ComplexityDescriptorsString .= "; AvailableAtomicInvariants: <" . TextUtil::JoinWords(\@AtomicInvariants, ", ", 0) . ">"; | |
1062 } | |
1063 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { | |
1064 my($FunctionalClass, @FunctionalClasses, @FunctionalClassesOrder, %AvailableFunctionalClasses); | |
1065 | |
1066 @FunctionalClassesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder(); | |
1067 %AvailableFunctionalClasses = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses(); | |
1068 | |
1069 for $FunctionalClass (@FunctionalClassesOrder) { | |
1070 push @FunctionalClasses, "$FunctionalClass: $AvailableFunctionalClasses{$FunctionalClass}"; | |
1071 } | |
1072 | |
1073 $ComplexityDescriptorsString .= "; FunctionalClassesToUse: <" . TextUtil::JoinWords(\@{$This->{FunctionalClassesToUse}}, ", ", 0) . ">"; | |
1074 $ComplexityDescriptorsString .= "; FunctionalClassesOrder: <" . TextUtil::JoinWords(\@FunctionalClassesOrder, ", ", 0) . ">"; | |
1075 $ComplexityDescriptorsString .= "; AvailableFunctionalClasses: <" . TextUtil::JoinWords(\@FunctionalClasses, ", ", 0) . ">"; | |
1076 } | |
1077 } | |
1078 return $ComplexityDescriptorsString; | |
1079 } | |
1080 | |
1081 # Is it a MolecularComplexityDescriptors object? | |
1082 sub _IsMolecularComplexityDescriptors { | |
1083 my($Object) = @_; | |
1084 | |
1085 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0; | |
1086 } | |
1087 | |
1088 1; | |
1089 | |
1090 __END__ | |
1091 | |
1092 =head1 NAME | |
1093 | |
1094 MolecularComplexityDescriptors | |
1095 | |
1096 =head1 SYNOPSIS | |
1097 | |
1098 use MolecularDescriptors::MolecularComplexityDescriptors; | |
1099 | |
1100 use MolecularDescriptors::MolecularComplexityDescriptors qw(:all); | |
1101 | |
1102 =head1 DESCRIPTION | |
1103 | |
1104 B<MolecularComplexityDescriptors> class provides the following methods: | |
1105 | |
1106 new, GenerateDescriptors, GetDescriptorNames, | |
1107 GetMolecularComplexityTypeAbbreviation, MACCSKeysSize, SetAtomIdentifierType, | |
1108 SetAtomicInvariantsToUse, SetDistanceBinSize, SetFunctionalClassesToUse, | |
1109 SetMaxDistance, SetMaxPathLength, SetMinDistance, SetMinPathLength, | |
1110 SetMolecularComplexityType, SetNeighborhoodRadius, SetNormalizationMethodology, | |
1111 StringifyMolecularComplexityDescriptors | |
1112 | |
1113 B<MolecularComplexityDescriptors> is derived from B<MolecularDescriptors> class which in turn | |
1114 is derived from B<ObjectProperty> base class that provides methods not explicitly defined | |
1115 in B<MolecularComplexityDescriptors>, B<MolecularDescriptors> or B<ObjectProperty> classes using Perl's | |
1116 AUTOLOAD functionality. These methods are generated on-the-fly for a specified object property: | |
1117 | |
1118 Set<PropertyName>(<PropertyValue>); | |
1119 $PropertyValue = Get<PropertyName>(); | |
1120 Delete<PropertyName>(); | |
1121 | |
1122 The current release of MayaChemTools supports calculation of molecular complexity using | |
1123 I<MolecularComplexityType> parameter corresponding to number of bits-set or unique | |
1124 keys [ Ref 117-119 ] in molecular fingerprints. The valid values for I<MolecularComplexityType> | |
1125 are: | |
1126 | |
1127 AtomTypesFingerprints | |
1128 ExtendedConnectivityFingerprints | |
1129 MACCSKeys | |
1130 PathLengthFingerprints | |
1131 TopologicalAtomPairsFingerprints | |
1132 TopologicalAtomTripletsFingerprints | |
1133 TopologicalAtomTorsionsFingerprints | |
1134 TopologicalPharmacophoreAtomPairsFingerprints | |
1135 TopologicalPharmacophoreAtomTripletsFingerprints | |
1136 | |
1137 Default value for I<MolecularComplexityType>: I<MACCSKeys>. | |
1138 | |
1139 I<AtomIdentifierType> parameter name corresponds to atom types used during generation of | |
1140 fingerprints. The valid values for I<AtomIdentifierType> are: I<AtomicInvariantsAtomTypes, | |
1141 DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, | |
1142 SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes>. I<AtomicInvariantsAtomTypes> | |
1143 is not supported for following values of I<MolecularComplexityType>: I<MACCSKeys, | |
1144 TopologicalPharmacophoreAtomPairsFingerprints, TopologicalPharmacophoreAtomTripletsFingerprints>. | |
1145 I<FunctionalClassAtomTypes> is the only valid value of I<AtomIdentifierType> for topological | |
1146 pharmacophore fingerprints. | |
1147 | |
1148 Default value for I<AtomIdentifierType>: I<AtomicInvariantsAtomTypes> for all fingerprints; | |
1149 I<FunctionalClassAtomTypes> for topological pharmacophore fingerprints. | |
1150 | |
1151 I<AtomicInvariantsToUse> parameter name and values are used during I<AtomicInvariantsAtomTypes> | |
1152 value of parameter I<AtomIdentifierType>. It's a list of space separated valid atomic invariant atom types. | |
1153 | |
1154 Possible values for atomic invariants are: I<AS, X, BO, LBO, SB, DB, TB, H, Ar, RA, FC, MN, SM>. | |
1155 Default value for I<AtomicInvariantsToUse> parameter are set differently for different fingerprints | |
1156 using I<MolecularComplexityType> parameter as shown below: | |
1157 | |
1158 MolecularComplexityType AtomicInvariantsToUse | |
1159 | |
1160 AtomTypesFingerprints AS X BO H FC | |
1161 TopologicalAtomPairsFingerprints AS X BO H FC | |
1162 TopologicalAtomTripletsFingerprints AS X BO H FC | |
1163 TopologicalAtomTorsionsFingerprints AS X BO H FC | |
1164 | |
1165 ExtendedConnectivityFingerprints AS X BO H FC MN | |
1166 PathLengthFingerprints AS | |
1167 | |
1168 I<FunctionalClassesToUse> parameter name and values are used during I<FunctionalClassAtomTypes> | |
1169 value of parameter I<AtomIdentifierType>. It's a list of space separated valid atomic invariant atom types. | |
1170 | |
1171 Possible values for atom functional classes are: I<Ar, CA, H, HBA, HBD, Hal, NI, PI, RA>. | |
1172 | |
1173 Default value for I<FunctionalClassesToUse> parameter is set to: | |
1174 | |
1175 HBD HBA PI NI Ar Hal | |
1176 | |
1177 for all fingerprints except for the following two I<MolecularComplexityType> fingerints: | |
1178 | |
1179 MolecularComplexityType FunctionalClassesToUse | |
1180 | |
1181 TopologicalPharmacophoreAtomPairsFingerprints HBD HBA P, NI H | |
1182 TopologicalPharmacophoreAtomTripletsFingerprints HBD HBA PI NI H Ar | |
1183 | |
1184 I<MACCSKeysSize> parameter name is only used during I<MACCSKeys> value of | |
1185 I<MolecularComplexityType> and corresponds to size of MACCS key set. Possible | |
1186 values: I<166 or 322>. Default value: I<166>. | |
1187 | |
1188 I<NeighborhoodRadius> parameter name is only used during I<ExtendedConnectivityFingerprints> | |
1189 value of I<MolecularComplexityType> and corresponds to atomic neighborhoods radius for | |
1190 generating extended connectivity fingerprints. Possible values: positive integer. Default value: | |
1191 I<2>. | |
1192 | |
1193 I<MinPathLength> and I<MaxPathLength> parameters are only used during I<PathLengthFingerprints> | |
1194 value of I<MolecularComplexityType> and correspond to minimum and maximum path lengths to use | |
1195 for generating path length fingerprints. Possible values: positive integers. Default value: I<MinPathLength - 1>; | |
1196 I<MaxPathLength - 8>. | |
1197 | |
1198 I<UseBondSymbols> parameter is only used during I<PathLengthFingerprints> value of | |
1199 I<MolecularComplexityType> and indicates whether bond symbols are included in atom path | |
1200 strings used to generate path length fingerprints. Possible value: I<Yes or No>. Default value: | |
1201 I<Yes>. | |
1202 | |
1203 I<MinDistance> and I<MaxDistance> parameters are only used during I<TopologicalAtomPairsFingerprints> | |
1204 and I<TopologicalAtomTripletsFingerprints> values of I<MolecularComplexityType> and correspond to | |
1205 minimum and maximum bond distance between atom pairs during topological pharmacophore fingerprints. | |
1206 Possible values: positive integers. Default value: I<MinDistance - 1>; I<MaxDistance - 10>. | |
1207 | |
1208 I<UseTriangleInequality> parameter is used during these values for I<MolecularComplexityType>: | |
1209 I<TopologicalAtomTripletsFingerprints> and I<TopologicalPharmacophoreAtomTripletsFingerprints>. | |
1210 Possible values: I<Yes or No>. It determines wheter to apply triangle inequality to distance triplets. | |
1211 Default value: I<TopologicalAtomTripletsFingerprints - No>; | |
1212 I<TopologicalPharmacophoreAtomTripletsFingerprints - Yes>. | |
1213 | |
1214 I<DistanceBinSize> parameter is used during I<TopologicalPharmacophoreAtomTripletsFingerprints> | |
1215 value of I<MolecularComplexityType> and corresponds to distance bin size used for binning | |
1216 distances during generation of topological pharmacophore atom triplets fingerprints. Possible | |
1217 value: positive integer. Default value: I<2>. | |
1218 | |
1219 I<NormalizationMethodology> is only used for these values for I<MolecularComplexityType>: | |
1220 I<ExtendedConnectivityFingerprints>, I<TopologicalPharmacophoreAtomPairsFingerprints> | |
1221 and I<TopologicalPharmacophoreAtomTripletsFingerprints>. It corresponds to normalization | |
1222 methodology to use for scaling the number of bits-set or unique keys during generation of | |
1223 fingerprints. Possible values during I<ExtendedConnectivityFingerprints>: I<None or | |
1224 ByHeavyAtomsCount>; Default value: I<None>. Possible values during topological | |
1225 pharmacophore atom pairs and triplets fingerprints: I<None or ByPossibleKeysCount>; | |
1226 Default value: I<None>. I<ByPossibleKeysCount> corresponds to total number of | |
1227 possible topological pharmacophore atom pairs or triplets in a molecule. | |
1228 | |
1229 =head2 METHODS | |
1230 | |
1231 =over 4 | |
1232 | |
1233 =item B<new> | |
1234 | |
1235 $NewMolecularComplexityDescriptors = new MolecularDescriptors:: | |
1236 MolecularComplexityDescriptors( | |
1237 %NamesAndValues); | |
1238 | |
1239 Using specified I<MolecularComplexityDescriptors> property names and values hash, B<new> | |
1240 method creates a new object and returns a reference to newly created B<MolecularComplexityDescriptors> | |
1241 object. By default, the following properties are initialized: | |
1242 | |
1243 Molecule = '' | |
1244 Type = 'MolecularComplexity' | |
1245 MolecularComplexityType = 'MACCSKeys' | |
1246 AtomIdentifierType = '' | |
1247 MACCSKeysSize = 166 | |
1248 NeighborhoodRadius = 2 | |
1249 MinPathLength = 1 | |
1250 MaxPathLength = 8 | |
1251 UseBondSymbols = 1 | |
1252 MinDistance = 1 | |
1253 MaxDistance = 10 | |
1254 UseTriangleInequality = '' | |
1255 DistanceBinSize = 2 | |
1256 NormalizationMethodology = 'None' | |
1257 @DescriptorNames = ('MolecularComplexity') | |
1258 @DescriptorValues = ('None') | |
1259 | |
1260 Examples: | |
1261 | |
1262 $MolecularComplexityDescriptors = new MolecularDescriptors:: | |
1263 MolecularComplexityDescriptors( | |
1264 'Molecule' => $Molecule); | |
1265 | |
1266 $MolecularComplexityDescriptors = new MolecularDescriptors:: | |
1267 MolecularComplexityDescriptors(); | |
1268 | |
1269 $MolecularComplexityDescriptors->SetMolecule($Molecule); | |
1270 $MolecularComplexityDescriptors->GenerateDescriptors(); | |
1271 print "MolecularComplexityDescriptors: $MolecularComplexityDescriptors\n"; | |
1272 | |
1273 | |
1274 =item B<GenerateDescriptors> | |
1275 | |
1276 $MolecularComplexityDescriptors->GenerateDescriptors(); | |
1277 | |
1278 Calculates MolecularComplexity value for a molecule and returns I<MolecularComplexityDescriptors>. | |
1279 | |
1280 =item B<GetDescriptorNames> | |
1281 | |
1282 @DescriptorNames = $MolecularComplexityDescriptors->GetDescriptorNames(); | |
1283 @DescriptorNames = MolecularDescriptors::MolecularComplexityDescriptors:: | |
1284 GetDescriptorNames(); | |
1285 | |
1286 Returns all available descriptor names as an array. | |
1287 | |
1288 =item B<GetMolecularComplexityTypeAbbreviation> | |
1289 | |
1290 $Abbrev = $MolecularComplexityDescriptors-> | |
1291 GetMolecularComplexityTypeAbbreviation(); | |
1292 $Abbrev = MolecularDescriptors::MolecularComplexityDescriptors:: | |
1293 GetMolecularComplexityTypeAbbreviation($ComplexityType); | |
1294 | |
1295 Returns abbreviation for a specified molecular complexity type or corresponding to | |
1296 I<MolecularComplexityDescriptors> object. | |
1297 | |
1298 =item B<SetMACCSKeysSize> | |
1299 | |
1300 $MolecularComplexityDescriptors->MACCSKeysSize($Size); | |
1301 | |
1302 Sets MACCS keys size and returns I<MolecularComplexityDescriptors>. | |
1303 | |
1304 =item B<SetAtomIdentifierType> | |
1305 | |
1306 $MolecularComplexityDescriptors->SetAtomIdentifierType($IdentifierType); | |
1307 | |
1308 Sets atom I<IdentifierType> to use during fingerprints generation corresponding to | |
1309 I<MolecularComplexityType> and returns I<MolecularComplexityDescriptors>. | |
1310 | |
1311 Possible values: I<AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, | |
1312 FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, | |
1313 TPSAAtomTypes, UFFAtomTypes>. | |
1314 | |
1315 =item B<SetAtomicInvariantsToUse> | |
1316 | |
1317 $MolecularComplexityDescriptors->SetAtomicInvariantsToUse($ValuesRef); | |
1318 $MolecularComplexityDescriptors->SetAtomicInvariantsToUse(@Values); | |
1319 | |
1320 Sets atomic invariants to use during I<AtomicInvariantsAtomTypes> value of I<AtomIdentifierType> | |
1321 for fingerprints generation and returns I<MolecularComplexityDescriptors>. | |
1322 | |
1323 Possible values for atomic invariants are: I<AS, X, BO, LBO, SB, DB, TB, | |
1324 H, Ar, RA, FC, MN, SM>. Default value [ Ref 24 ]: I<AS,X,BO,H,FC,MN>. | |
1325 | |
1326 The atomic invariants abbreviations correspond to: | |
1327 | |
1328 AS = Atom symbol corresponding to element symbol | |
1329 | |
1330 X<n> = Number of non-hydrogen atom neighbors or heavy atoms | |
1331 BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms | |
1332 LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms | |
1333 SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms | |
1334 DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms | |
1335 TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms | |
1336 H<n> = Number of implicit and explicit hydrogens for atom | |
1337 Ar = Aromatic annotation indicating whether atom is aromatic | |
1338 RA = Ring atom annotation indicating whether atom is a ring | |
1339 FC<+n/-n> = Formal charge assigned to atom | |
1340 MN<n> = Mass number indicating isotope other than most abundant isotope | |
1341 SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or | |
1342 3 (triplet) | |
1343 | |
1344 Atom type generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to: | |
1345 | |
1346 AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n> | |
1347 | |
1348 Except for AS which is a required atomic invariant in atom types, all other atomic invariants are | |
1349 optional. Atom type specification doesn't include atomic invariants with zero or undefined values. | |
1350 | |
1351 In addition to usage of abbreviations for specifying atomic invariants, the following descriptive words | |
1352 are also allowed: | |
1353 | |
1354 X : NumOfNonHydrogenAtomNeighbors or NumOfHeavyAtomNeighbors | |
1355 BO : SumOfBondOrdersToNonHydrogenAtoms or SumOfBondOrdersToHeavyAtoms | |
1356 LBO : LargestBondOrderToNonHydrogenAtoms or LargestBondOrderToHeavyAtoms | |
1357 SB : NumOfSingleBondsToNonHydrogenAtoms or NumOfSingleBondsToHeavyAtoms | |
1358 DB : NumOfDoubleBondsToNonHydrogenAtoms or NumOfDoubleBondsToHeavyAtoms | |
1359 TB : NumOfTripleBondsToNonHydrogenAtoms or NumOfTripleBondsToHeavyAtoms | |
1360 H : NumOfImplicitAndExplicitHydrogens | |
1361 Ar : Aromatic | |
1362 RA : RingAtom | |
1363 FC : FormalCharge | |
1364 MN : MassNumber | |
1365 SM : SpinMultiplicity | |
1366 | |
1367 I<AtomTypes::AtomicInvariantsAtomTypes> module is used to assign atomic invariant | |
1368 atom types. | |
1369 | |
1370 =item B<SetDistanceBinSize> | |
1371 | |
1372 $MolecularComplexityDescriptors->SetDistanceBinSize($BinSize); | |
1373 | |
1374 Sets distance bin size used to bin distances between atom pairs in atom triplets for | |
1375 topological pharmacophore atom triplets fingerprints generation and returns | |
1376 I<MolecularComplexityDescriptors>. | |
1377 | |
1378 =item B<SetFunctionalClassesToUse> | |
1379 | |
1380 $MolecularComplexityDescriptors->SetFunctionalClassesToUse($ValuesRef); | |
1381 $MolecularComplexityDescriptors->SetFunctionalClassesToUse(@Values); | |
1382 | |
1383 Sets functional classes invariants to use during I<FunctionalClassAtomTypes> value of I<AtomIdentifierType> | |
1384 for fingerprints generation and returns I<MolecularComplexityDescriptors>. | |
1385 | |
1386 Possible values for atom functional classes are: I<Ar, CA, H, HBA, HBD, Hal, NI, PI, RA>. | |
1387 Default value [ Ref 24 ]: I<HBD,HBA,PI,NI,Ar,Hal>. | |
1388 | |
1389 The functional class abbreviations correspond to: | |
1390 | |
1391 HBD: HydrogenBondDonor | |
1392 HBA: HydrogenBondAcceptor | |
1393 PI : PositivelyIonizable | |
1394 NI : NegativelyIonizable | |
1395 Ar : Aromatic | |
1396 Hal : Halogen | |
1397 H : Hydrophobic | |
1398 RA : RingAtom | |
1399 CA : ChainAtom | |
1400 | |
1401 Functional class atom type specification for an atom corresponds to: | |
1402 | |
1403 Ar.CA.H.HBA.HBD.Hal.NI.PI.RA or None | |
1404 | |
1405 I<AtomTypes::FunctionalClassAtomTypes> module is used to assign functional class atom | |
1406 types. It uses following definitions [ Ref 60-61, Ref 65-66 ]: | |
1407 | |
1408 HydrogenBondDonor: NH, NH2, OH | |
1409 HydrogenBondAcceptor: N[!H], O | |
1410 PositivelyIonizable: +, NH2 | |
1411 NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH | |
1412 | |
1413 =item B<SetMaxDistance> | |
1414 | |
1415 $MolecularComplexityDescriptors->SetMaxDistance($MaxDistance); | |
1416 | |
1417 Sets maximum distance to use during topological atom pairs and triplets fingerprints | |
1418 generation and returns I<MolecularComplexityDescriptors>. | |
1419 | |
1420 =item B<SetMaxPathLength> | |
1421 | |
1422 $MolecularComplexityDescriptors->SetMaxPathLength($Length); | |
1423 | |
1424 Sets maximum path length to use during path length fingerprints generation and returns | |
1425 I<MolecularComplexityDescriptors>. | |
1426 | |
1427 =item B<SetMinDistance> | |
1428 | |
1429 $MolecularComplexityDescriptors->SetMinDistance($MinDistance); | |
1430 | |
1431 Sets minimum distance to use during topological atom pairs and triplets fingerprints | |
1432 generation and returns I<MolecularComplexityDescriptors>. | |
1433 | |
1434 =item B<SetMinPathLength> | |
1435 | |
1436 $MolecularComplexityDescriptors->SetMinPathLength($MinPathLength); | |
1437 | |
1438 Sets minimum path length to use during path length fingerprints generation and returns | |
1439 I<MolecularComplexityDescriptors>. | |
1440 | |
1441 =item B<SetMolecularComplexityType> | |
1442 | |
1443 $MolecularComplexityDescriptors->SetMolecularComplexityType($ComplexityType); | |
1444 | |
1445 Sets molecular complexity type to use for calculating its value and returns | |
1446 I<MolecularComplexityDescriptors>. | |
1447 | |
1448 =item B<SetNeighborhoodRadius> | |
1449 | |
1450 $MolecularComplexityDescriptors->SetNeighborhoodRadius($Radius); | |
1451 | |
1452 Sets neighborhood radius to use during extended connectivity fingerprints generation and | |
1453 returns I<MolecularComplexityDescriptors>. | |
1454 | |
1455 =item B<SetNormalizationMethodology> | |
1456 | |
1457 $MolecularComplexityDescriptors->SetNormalizationMethodology($Methodology); | |
1458 | |
1459 Sets normalization methodology to use during calculation of molecular complexity | |
1460 corresponding to extended connectivity, topological pharmacophore atom pairs and | |
1461 tripletes fingerprints returns I<MolecularComplexityDescriptors>. | |
1462 | |
1463 =item B<StringifyMolecularComplexityDescriptors> | |
1464 | |
1465 $String = $MolecularComplexityDescriptors-> | |
1466 StringifyMolecularComplexityDescriptors(); | |
1467 | |
1468 Returns a string containing information about I<MolecularComplexityDescriptors> object. | |
1469 | |
1470 =back | |
1471 | |
1472 =head1 AUTHOR | |
1473 | |
1474 Manish Sud <msud@san.rr.com> | |
1475 | |
1476 =head1 SEE ALSO | |
1477 | |
1478 MolecularDescriptors.pm, MolecularDescriptorsGenerator.pm | |
1479 | |
1480 =head1 COPYRIGHT | |
1481 | |
1482 Copyright (C) 2015 Manish Sud. All rights reserved. | |
1483 | |
1484 This file is part of MayaChemTools. | |
1485 | |
1486 MayaChemTools is free software; you can redistribute it and/or modify it under | |
1487 the terms of the GNU Lesser General Public License as published by the Free | |
1488 Software Foundation; either version 3 of the License, or (at your option) | |
1489 any later version. | |
1490 | |
1491 =cut |