comparison lib/Fingerprints/EStateIndiciesFingerprints.pm @ 0:4816e4a8ae95 draft default tip

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 09:23:18 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4816e4a8ae95
1 package Fingerprints::EStateIndiciesFingerprints;
2 #
3 # $RCSfile: EStateIndiciesFingerprints.pm,v $
4 # $Date: 2015/02/28 20:48:54 $
5 # $Revision: 1.19 $
6 #
7 # Author: Manish Sud <msud@san.rr.com>
8 #
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
10 #
11 # This file is part of MayaChemTools.
12 #
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
14 # the terms of the GNU Lesser General Public License as published by the Free
15 # Software Foundation; either version 3 of the License, or (at your option) any
16 # later version.
17 #
18 # MayaChemTools is distributed in the hope that it will be useful, but without
19 # any warranty; without even the implied warranty of merchantability of fitness
20 # for a particular purpose. See the GNU Lesser General Public License for more
21 # details.
22 #
23 # You should have received a copy of the GNU Lesser General Public License
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
26 # Boston, MA, 02111-1307, USA.
27 #
28
29 use strict;
30 use Carp;
31 use Exporter;
32 use Text::ParseWords;
33 use TextUtil ();
34 use FileUtil ();
35 use MathUtil ();
36 use Fingerprints::Fingerprints;
37 use Molecule;
38 use AtomTypes::EStateAtomTypes;
39 use AtomicDescriptors::EStateValuesDescriptors;
40
41 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
42
43 @ISA = qw(Fingerprints::Fingerprints Exporter);
44 @EXPORT = qw();
45 @EXPORT_OK = qw();
46
47 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]);
48
49 # Setup class variables...
50 my($ClassName);
51 _InitializeClass();
52
53 # Overload Perl functions...
54 use overload '""' => 'StringifyEStateIndiciesFingerprints';
55
56 # Class constructor...
57 sub new {
58 my($Class, %NamesAndValues) = @_;
59
60 # Initialize object...
61 my $This = $Class->SUPER::new();
62 bless $This, ref($Class) || $Class;
63 $This->_InitializeEStateIndiciesFingerprints();
64
65 $This->_InitializeEStateIndiciesFingerprintsProperties(%NamesAndValues);
66
67 return $This;
68 }
69
70 # Initialize object data...
71 #
72 sub _InitializeEStateIndiciesFingerprints {
73 my($This) = @_;
74
75 # EStateIndicies is a vector containing sum of E-state values for E-state atom types
76 #
77 $This->{Type} = 'EStateIndicies';
78
79 # EStateAtomTypesSetToUse for EStateIndicies:
80 #
81 # ArbitrarySize - Corrresponds to only E-state atom types detected in molecule
82 # FixedSize - Corresponds to fixed number of E-state atom types previously defined [ Ref 77 ]
83 #
84 # The default EStateAtomTypesSetToUse value for EStateIndicies fingerprints type: ArbitrarySize.
85 # Possible values: ArbitrarySize or FixedSize.
86 #
87 $This->{EStateAtomTypesSetToUse} = '';
88
89 # Assigned E-state atom types...
90 %{$This->{EStateAtomTypes}} = ();
91
92 # Vector values precision for real values during E-state indicies...
93 $This->{ValuesPrecision} = 3;
94
95 # Calculated E-state values and indicies for generating E-state indicies fingerprints...
96 %{$This->{EStateValues}} = ();
97 %{$This->{EStateIndicies}} = ();
98 }
99
100 # Initialize class ...
101 sub _InitializeClass {
102 #Class name...
103 $ClassName = __PACKAGE__;
104
105 }
106
107 # Initialize object properties....
108 sub _InitializeEStateIndiciesFingerprintsProperties {
109 my($This, %NamesAndValues) = @_;
110
111 my($Name, $Value, $MethodName);
112 while (($Name, $Value) = each %NamesAndValues) {
113 $MethodName = "Set${Name}";
114 $This->$MethodName($Value);
115 }
116
117 # Make sure molecule object was specified...
118 if (!exists $NamesAndValues{Molecule}) {
119 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule...";
120 }
121
122 $This->_InitializeEstateIndicies();
123
124 return $This;
125 }
126
127 # Initialize E-state indicies...
128 #
129 sub _InitializeEstateIndicies {
130 my($This) = @_;
131
132 # Set default EStateAtomTypesSetToUse...
133 if (!$This->{EStateAtomTypesSetToUse}) {
134 $This->{EStateAtomTypesSetToUse} = 'ArbitrarySize';
135 }
136
137 # Vector type...
138 $This->{VectorType} = 'FingerprintsVector';
139
140 if ($This->{EStateAtomTypesSetToUse} =~ /^FixedSize$/i) {
141 $This->{FingerprintsVectorType} = 'OrderedNumericalValues';
142 }
143 else {
144 $This->{FingerprintsVectorType} = 'NumericalValues';
145 }
146
147 $This->_InitializeFingerprintsVector();
148
149 return $This;
150 }
151
152 # Disable set size method...
153 #
154 sub SetSize {
155 my($This, $Type) = @_;
156
157 croak "Error: ${ClassName}->SetSize: Can't change size: It's not allowed...";
158 }
159
160 # Set E-state atom types set to use...
161 #
162 sub SetEStateAtomTypesSetToUse {
163 my($This, $Value) = @_;
164
165 if ($This->{EStateAtomTypesSetToUse}) {
166 croak "Error: ${ClassName}->SetEStateAtomTypesSetToUse: Can't change size: It's already set...";
167 }
168
169 if ($Value !~ /^(ArbitrarySize|FixedSize)/i) {
170 croak "Error: ${ClassName}->SetEStateAtomTypesSetToUse: Unknown EStateAtomTypesSetToUse value: $Value; Supported values: ArbitrarySize or FixedSize";
171 }
172
173 $This->{EStateAtomTypesSetToUse} = $Value;
174
175 return $This;
176 }
177
178 # Set vector values precision for real values for E-state indicies...
179 #
180 sub SetValuesPrecision {
181 my($This, $Value) = @_;
182
183 if (!TextUtil::IsPositiveInteger($Value)) {
184 croak "Error: ${ClassName}->SetValuesPrecision: ValuesPrecision value, $Value, is not valid: It must be a positive integer...";
185 }
186 $This->{ValuesPrecision} = $Value;
187
188 return $This;
189 }
190
191 # Generate fingerprints description...
192 #
193 sub GetDescription {
194 my($This) = @_;
195
196 # Is description explicity set?
197 if (exists $This->{Description}) {
198 return $This->{Description};
199 }
200
201 # Generate fingerprints description...
202
203 return "$This->{Type}:$This->{EStateAtomTypesSetToUse}";
204 }
205
206 # Generate electrotopological state indicies (E-state) [ Ref 75-78 ] fingerprints for
207 # non-hydrogen atoms in a molecule...
208 #
209 # EStateIndicies fingerprints constitute a vector containing sum of E-state values
210 # for E-state atom types. Two types of E-state atom types set size are allowed:
211 #
212 # ArbitrarySize - Corrresponds to only E-state atom types detected in molecule
213 # FixedSize - Corresponds to fixed number of E-state atom types previously defined
214 #
215 # Module AtomTypes::EStateAtomTypes.pm is used to assign E-state atom types to
216 # non-hydrogen atoms in the molecule which is able to assign atom types to any valid
217 # atom group. However, for FixedSize value of EStateAtomTypesSetToUse, only a fixed
218 # set of E-state atom types corresponding to specific atom groups [ Appendix III in
219 # Ref 77 ] are used for fingerprints.
220 #
221 # The fixed size E-state atom type set size used during generation of fingerprints corresponding
222 # FixedSize value of EStateAtomTypesSetToUse contains 87 E-state non-hydrogen atom types
223 # in EStateAtomTypes.csv data file distributed with MayaChemTools.
224 #
225 # Combination of Type and EStateAtomTypesSetToUse allow generation of 2 different types of
226 # E-state indicies fingerprints:
227 #
228 # Type EStateAtomTypesSetToUse
229 #
230 # EStateIndicies ArbitrarySize [ default fingerprints ]
231 # EStateIndicies FixedSize
232 #
233 # The default is generate EStateIndicies type fingeprints corresponding to ArbitrarySize as
234 # EStateAtomTypesSetToUse value.
235 #
236 #
237 sub GenerateFingerprints {
238 my($This) = @_;
239
240 # Cache appropriate molecule data...
241 $This->_SetupMoleculeDataCache();
242
243 # Assign E-state atom types...
244 if (!$This->_AssignEStateAtomTypes()) {
245 carp "Warning: ${ClassName}->GenerateFingerprints: $This->{Type} fingerprints generation didn't succeed: Couldn't assign valid E-state atom types to all atoms...";
246 return $This;
247 }
248
249 # Calculate E-state indicies...
250 if (!$This->_CalculateEStateIndicies()) {
251 carp "Warning: ${ClassName}->GenerateFingerprints: $This->{Type} fingerprints generation didn't succeed: Couldn't calculate E-state values for all atoms...";
252 return $This;
253 }
254
255 # Set final fingerprints...
256 $This->_SetFinalFingerprints();
257
258 # Clear cached molecule data...
259 $This->_ClearMoleculeDataCache();
260
261 return $This;
262 }
263
264 # Assign E-state atom types...
265 #
266 sub _AssignEStateAtomTypes {
267 my($This) = @_;
268 my($EStateAtomTypes, $Atom, $AtomID, $AtomType);
269
270 %{$This->{EStateAtomTypes}} = ();
271
272 # Assign E-state atom types...
273 $EStateAtomTypes = new AtomTypes::EStateAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => 1);
274 $EStateAtomTypes->AssignAtomTypes();
275
276 # Make sure atom types assignment is successful...
277 if (!$EStateAtomTypes->IsAtomTypesAssignmentSuccessful()) {
278 return undef;
279 }
280
281 # Collect assigned atom types...
282 for $Atom (@{$This->{Atoms}}) {
283 $AtomID = $Atom->GetID();
284
285 $AtomType = $EStateAtomTypes->GetAtomType($Atom);
286 $This->{EStateAtomTypes}{$AtomID} = $AtomType;
287 }
288 return $This;
289 }
290
291 # Calculate E-state indicies by summing up E-state values for specific
292 # E-state atom types...
293 #
294 sub _CalculateEStateIndicies {
295 my($This) = @_;
296 my($Atom, $AtomID, $AtomType, $EStateValue);
297
298 # Calculate E-state values to generate E-state indicies...
299 if (!$This->_CalculateEStateValuesDescriptors()) {
300 return undef;
301 }
302
303 # Calculate E-state indicies...
304 for $Atom (@{$This->{Atoms}}) {
305 $AtomID = $Atom->GetID();
306
307 $AtomType = $This->{EStateAtomTypes}{$AtomID};
308 $EStateValue = $This->{EStateValues}{$AtomID};
309
310 if (!exists $This->{EStateIndicies}{$AtomType}) {
311 $This->{EStateIndicies}{$AtomType} = 0;
312 }
313
314 $This->{EStateIndicies}{$AtomType} += $EStateValue;
315 }
316 return $This;
317 }
318
319 # Calculate E-state values for E-state indicies...
320 #
321 sub _CalculateEStateValuesDescriptors {
322 my($This) = @_;
323 my($EStateValuesDescriptors, $Atom, $AtomID, $EStateValue);
324
325 %{$This->{EStateValues}} = ();
326
327 # Calculate and assign E-state values...
328 $EStateValuesDescriptors = new AtomicDescriptors::EStateValuesDescriptors('Molecule' => $This->{Molecule});
329 $EStateValuesDescriptors->GenerateDescriptors();
330
331 # Make sure E-state values calculation is successful...
332 if (!$EStateValuesDescriptors->IsDescriptorsGenerationSuccessful()) {
333 return undef;
334 }
335
336 # Collect assigned E-state values...
337 for $Atom (@{$This->{Atoms}}) {
338 $AtomID = $Atom->GetID();
339 $EStateValue = $EStateValuesDescriptors->GetDescriptorValue($Atom);
340 $This->{EStateValues}{$AtomID} = $EStateValue;
341 }
342 return $This;
343 }
344
345 # Set final final fingerpritns for E-state indicies...
346 #
347 sub _SetFinalFingerprints {
348 my($This) = @_;
349 my($AtomType, $ValuesPrecision, $EStateAtomTypesDataRef, @Values, @IDs);
350
351 # Mark successful generation of fingerprints...
352 $This->{FingerprintsGenerated} = 1;
353
354 @Values = ();
355 @IDs = ();
356
357 $ValuesPrecision = $This->{ValuesPrecision};
358
359 if ($This->{EStateAtomTypesSetToUse} =~ /^FixedSize$/i) {
360 # Use fixed size E-state atom types set for non-hydrogen atoms...
361 for $AtomType (@{AtomTypes::EStateAtomTypes::GetAllPossibleEStateNonHydrogenAtomTypes()}) {
362 push @IDs, "S${AtomType}";
363 push @Values, exists($This->{EStateIndicies}{$AtomType}) ? MathUtil::round($This->{EStateIndicies}{$AtomType}, $ValuesPrecision) : 0;
364 }
365 }
366 else {
367 for $AtomType (sort keys %{$This->{EStateIndicies}}) {
368 push @IDs, "S${AtomType}";
369 push @Values, MathUtil::round($This->{EStateIndicies}{$AtomType}, $ValuesPrecision);
370 }
371 }
372
373 # Add IDs and values to fingerprint vector...
374 if (@IDs) {
375 $This->{FingerprintsVector}->AddValueIDs(\@IDs);
376 }
377 $This->{FingerprintsVector}->AddValues(\@Values);
378
379 return $This;
380 }
381
382 # Cache appropriate molecule data...
383 #
384 sub _SetupMoleculeDataCache {
385 my($This) = @_;
386
387 # Get all non-hydrogen atoms...
388 my($NegateAtomCheckMethod);
389 $NegateAtomCheckMethod = 1;
390 @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms("IsHydrogen", $NegateAtomCheckMethod);
391
392 return $This;
393 }
394
395 # Clear cached molecule data...
396 #
397 sub _ClearMoleculeDataCache {
398 my($This) = @_;
399
400 @{$This->{Atoms}} = ();
401
402 return $This;
403 }
404
405 # Return a string containg data for EStateIndiciesFingerprints object...
406 sub StringifyEStateIndiciesFingerprints {
407 my($This) = @_;
408 my($EStateIndiciesFingerprintsString);
409
410 # Type of Keys...
411 $EStateIndiciesFingerprintsString = "Type: $This->{Type}; EStateAtomTypesSetToUse: $This->{EStateAtomTypesSetToUse}";
412
413 # Fingerprint vector...
414 $EStateIndiciesFingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >";
415
416 return $EStateIndiciesFingerprintsString;
417 }
418
419 1;
420
421 __END__
422
423 =head1 NAME
424
425 EStateIndiciesFingerprints
426
427 =head1 SYNOPSIS
428
429 use Fingerprints::EStateIndiciesFingerprints;
430
431 use Fingerprints::EStateIndiciesFingerprints qw(:all);
432
433 =head1 DESCRIPTION
434
435 B<EStateIndiciesFingerprints> [ Ref 75-78 ] class provides the following methods:
436
437 new, GenerateFingerprints, GetDescription, SetEStateAtomTypesSetToUse,
438 SetValuesPrecision, StringifyEStateIndiciesFingerprints
439
440 B<EStateIndiciesFingerprints> is derived from B<Fingerprints> class which in turn
441 is derived from B<ObjectProperty> base class that provides methods not explicitly defined
442 in B<AtomNeighborhoodsFingerprints>, B<Fingerprints> or B<ObjectProperty> classes using Perl's
443 AUTOLOAD functionality. These methods are generated on-the-fly for a specified object property:
444
445 Set<PropertyName>(<PropertyValue>);
446 $PropertyValue = Get<PropertyName>();
447 Delete<PropertyName>();
448
449 E-state atom types are assigned to all non-hydrogen atoms in a molecule using module
450 AtomTypes::EStateAtomTypes.pm and E-state values are calculated using module
451 AtomicDescriptors::EStateValues.pm. Using E-state atom types and E-state values,
452 B<EStateIndiciesFingerprints> constituting sum of E-state values for E-sate atom types
453 are generated.
454
455 Two types of E-state atom types set size are allowed:
456
457 ArbitrarySize - Corresponds to only E-state atom types detected
458 in molecule
459 FixedSize - Corresponds to fixed number of E-state atom types previously
460 defined
461
462 Module AtomTypes::EStateAtomTypes.pm, used to assign E-state atom types to
463 non-hydrogen atoms in the molecule, is able to assign atom types to any valid
464 atom group. However, for I<FixedSize> value of B<EStateAtomTypesSetToUse>, only a
465 fixed set of E-state atom types corresponding to specific atom groups [ Appendix III in
466 Ref 77 ] are used for fingerprints.
467
468 The fixed size E-state atom type set size used during generation of fingerprints contains
469 87 E-state non-hydrogen atom types in EStateAtomTypes.csv data file distributed with
470 MayaChemTools.
471
472 Combination of Type and EStateAtomTypesSetToUse allow generation of 2 different types of
473 E-state indicies fingerprints:
474
475 Type EStateAtomTypesSetToUse
476
477 EStateIndicies ArbitrarySize [ default fingerprints ]
478 EStateIndicies FixedSize
479
480 The current release of MayaChemTools generates the following types of E-state
481 fingerprints vector strings:
482
483 FingerprintsVector;EStateIndicies:ArbitrarySize;11;NumericalValues;IDs
484 AndValuesString;SaaCH SaasC SaasN SdO SdssC SsCH3 SsF SsOH SssCH2 SssN
485 H SsssCH;24.778 4.387 1.993 25.023 -1.435 3.975 14.006 29.759 -0.073 3
486 .024 -2.270
487
488 FingerprintsVector;EStateIndicies:FixedSize;87;OrderedNumericalValues;
489 ValuesString;0 0 0 0 0 0 0 3.975 0 -0.073 0 0 24.778 -2.270 0 0 -1.435
490 4.387 0 0 0 0 0 0 3.024 0 0 0 0 0 0 0 1.993 0 29.759 25.023 0 0 0 0 1
491 4.006 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
492 0 0 0 0 0 0 0 0 0 0 0 0 0 0
493
494 FingerprintsVector;EStateIndicies:FixedSize;87;OrderedNumericalValues;
495 IDsAndValuesString;SsLi SssBe SssssBem SsBH2 SssBH SsssB SssssBm SsCH3
496 SdCH2 SssCH2 StCH SdsCH SaaCH SsssCH SddC StsC SdssC SaasC SaaaC Sssss
497 C SsNH3p SsNH2 SssNH2p SdNH SssNH SaaNH StN SsssNHp SdsN SaaN SsssN Sd
498 0 0 0 0 0 0 0 3.975 0 -0.073 0 0 24.778 -2.270 0 0 -1.435 4.387 0 0 0
499 0 0 0 3.024 0 0 0 0 0 0 0 1.993 0 29.759 25.023 0 0 0 0 14.006 0 0 0 0
500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0...
501
502 =head2 METHODS
503
504 =over 4
505
506 =item B<new>
507
508 $EStateIndiciesFingerprints = new EStateIndiciesFingerprints(%NamesAndValues);
509
510 Using specified I<EStateIndiciesFingerprints> property names and values hash, B<new> method creates a new object
511 and returns a reference to newly created B<PathLengthFingerprints> object. By default, the
512 following properties are initialized:
513
514 Molecule = '';
515 Type = 'EStateIndicies'
516 EStateAtomTypesSetToUse = 'ArbitrarySize'
517 ValuesPrecision = 3
518
519 Examples:
520
521 $EStateIndiciesFingerprints = new AtomTypesFingerprints(
522 'Molecule' => $Molecule,
523 'EStateAtomTypesSetToUse' =>
524 'ArbitrarySize');
525
526 $EStateIndiciesFingerprints = new AtomTypesFingerprints(
527 'Molecule' => $Molecule,
528 'EStateAtomTypesSetToUse' =>
529 'FixedSize');
530
531 $EStateIndiciesFingerprints->GenerateFingerprints();
532 print "$EStateIndiciesFingerprints\n";
533
534 =item B<GenerateFingerprints>
535
536 $EStateIndiciesFingerprints = $EStateIndiciesFingerprints->
537 GenerateEStateIndiciesFingerprints();
538
539 Generates EState keys fingerprints and returns I<EStateIndiciesFingerprints>.
540
541 =item B<GetDescription>
542
543 $Description = $EStateIndiciesFingerprints->GetDescription();
544
545 Returns a string containing description of EState keys fingerprints.
546
547 =item B<SetEStateAtomTypesSetToUse>
548
549 $EStateIndiciesFingerprints->SetEStateAtomTypesSetToUse($Value);
550
551 Sets I<Value> of I<EStateAtomTypesSetToUse> and returns I<EStateIndiciesFingerprints>.
552 Possible values: I<ArbitrarySize or FixedSize>. Default value: I<ArbitrarySize>.
553
554 =item B<SetValuesPrecision>
555
556 $EStateIndiciesFingerprints->SetValuesPrecision($Precision);
557
558 Sets precesion of E-state values to use during generation of E-state indices fingerprints
559 and returns I<EStateIndiciesFingerprints>. Possible values: I<Positive integers>.
560 Default value: I<3>.
561
562 =item B<StringifyEStateIndiciesFingerprints>
563
564 $String = $EStateIndiciesFingerprints->StringifyEStateIndiciesFingerprints();
565
566 Returns a string containing information about I<EStateIndiciesFingerprints> object.
567
568 =back
569
570 =head1 AUTHOR
571
572 Manish Sud <msud@san.rr.com>
573
574 =head1 SEE ALSO
575
576 Fingerprints.pm, FingerprintsStringUtil.pm, AtomNeighborhoodsFingerprints.pm, AtomTypesFingerprints.pm,
577 ExtendedConnectivityFingerprints.pm, MACCSKeys.pm, PathLengthFingerprints.pm,
578 TopologicalAtomPairsFingerprints.pm, TopologicalAtomTripletsFingerprints.pm,
579 TopologicalAtomTorsionsFingerprints.pm, TopologicalPharmacophoreAtomPairsFingerprints.pm,
580 TopologicalPharmacophoreAtomTripletsFingerprints.pm
581
582 =head1 COPYRIGHT
583
584 Copyright (C) 2015 Manish Sud. All rights reserved.
585
586 This file is part of MayaChemTools.
587
588 MayaChemTools is free software; you can redistribute it and/or modify it under
589 the terms of the GNU Lesser General Public License as published by the Free
590 Software Foundation; either version 3 of the License, or (at your option)
591 any later version.
592
593 =cut