MayaChemTools

   1 package Fingerprints::EStateIndiciesFingerprints;
   2 #
   3 # $RCSfile: EStateIndiciesFingerprints.pm,v $
   4 # $Date: 2015/02/28 20:48:54 $
   5 # $Revision: 1.19 $
   6 #
   7 # Author: Manish Sud <msud@san.rr.com>
   8 #
   9 # Copyright (C) 2015 Manish Sud. All rights reserved.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 use strict;
  30 use Carp;
  31 use Exporter;
  32 use Text::ParseWords;
  33 use TextUtil ();
  34 use FileUtil ();
  35 use MathUtil ();
  36 use Fingerprints::Fingerprints;
  37 use Molecule;
  38 use AtomTypes::EStateAtomTypes;
  39 use AtomicDescriptors::EStateValuesDescriptors;
  40 
  41 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  42 
  43 @ISA = qw(Fingerprints::Fingerprints Exporter);
  44 @EXPORT = qw();
  45 @EXPORT_OK = qw();
  46 
  47 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  48 
  49 # Setup class variables...
  50 my($ClassName);
  51 _InitializeClass();
  52 
  53 # Overload Perl functions...
  54 use overload '""' => 'StringifyEStateIndiciesFingerprints';
  55 
  56 # Class constructor...
  57 sub new {
  58   my($Class, %NamesAndValues) = @_;
  59 
  60   # Initialize object...
  61   my $This = $Class->SUPER::new();
  62   bless $This, ref($Class) || $Class;
  63   $This->_InitializeEStateIndiciesFingerprints();
  64 
  65   $This->_InitializeEStateIndiciesFingerprintsProperties(%NamesAndValues);
  66 
  67   return $This;
  68 }
  69 
  70 # Initialize object data...
  71 #
  72 sub _InitializeEStateIndiciesFingerprints {
  73   my($This) = @_;
  74 
  75   # EStateIndicies is a vector containing sum of E-state values for E-state atom types
  76   #
  77   $This->{Type} = 'EStateIndicies';
  78 
  79   # EStateAtomTypesSetToUse for EStateIndicies:
  80   #
  81   # ArbitrarySize - Corrresponds to only E-state atom types detected in molecule
  82   # FixedSize - Corresponds to fixed number of E-state atom types previously defined [ Ref 77 ]
  83   #
  84   # The default EStateAtomTypesSetToUse value for EStateIndicies fingerprints type: ArbitrarySize.
  85   # Possible values: ArbitrarySize or FixedSize.
  86   #
  87   $This->{EStateAtomTypesSetToUse} = '';
  88 
  89   # Assigned E-state atom types...
  90   %{$This->{EStateAtomTypes}} = ();
  91 
  92   # Vector values precision for real values during E-state indicies...
  93   $This->{ValuesPrecision} = 3;
  94 
  95   # Calculated E-state values and indicies for generating E-state indicies fingerprints...
  96   %{$This->{EStateValues}} = ();
  97   %{$This->{EStateIndicies}} = ();
  98 }
  99 
 100 # Initialize class ...
 101 sub _InitializeClass {
 102   #Class name...
 103   $ClassName = __PACKAGE__;
 104 
 105 }
 106 
 107 # Initialize object properties....
 108 sub _InitializeEStateIndiciesFingerprintsProperties {
 109   my($This, %NamesAndValues) = @_;
 110 
 111   my($Name, $Value, $MethodName);
 112   while (($Name, $Value) = each  %NamesAndValues) {
 113     $MethodName = "Set${Name}";
 114     $This->$MethodName($Value);
 115   }
 116 
 117   # Make sure molecule object was specified...
 118   if (!exists $NamesAndValues{Molecule}) {
 119     croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule...";
 120   }
 121 
 122   $This->_InitializeEstateIndicies();
 123 
 124   return $This;
 125 }
 126 
 127 # Initialize E-state indicies...
 128 #
 129 sub _InitializeEstateIndicies {
 130   my($This) = @_;
 131 
 132   # Set default EStateAtomTypesSetToUse...
 133   if (!$This->{EStateAtomTypesSetToUse}) {
 134     $This->{EStateAtomTypesSetToUse} = 'ArbitrarySize';
 135   }
 136 
 137   # Vector type...
 138   $This->{VectorType} = 'FingerprintsVector';
 139 
 140   if ($This->{EStateAtomTypesSetToUse} =~ /^FixedSize$/i) {
 141     $This->{FingerprintsVectorType} = 'OrderedNumericalValues';
 142   }
 143   else {
 144     $This->{FingerprintsVectorType} = 'NumericalValues';
 145   }
 146 
 147   $This->_InitializeFingerprintsVector();
 148 
 149   return $This;
 150 }
 151 
 152 # Disable set size method...
 153 #
 154 sub SetSize {
 155   my($This, $Type) = @_;
 156 
 157   croak "Error: ${ClassName}->SetSize: Can't change size:  It's not allowed...";
 158 }
 159 
 160 # Set E-state atom types set to use...
 161 #
 162 sub SetEStateAtomTypesSetToUse {
 163   my($This, $Value) = @_;
 164 
 165   if ($This->{EStateAtomTypesSetToUse}) {
 166     croak "Error: ${ClassName}->SetEStateAtomTypesSetToUse: Can't change size:  It's already set...";
 167   }
 168 
 169   if ($Value !~ /^(ArbitrarySize|FixedSize)/i) {
 170     croak "Error: ${ClassName}->SetEStateAtomTypesSetToUse: Unknown EStateAtomTypesSetToUse value: $Value; Supported values: ArbitrarySize or FixedSize";
 171   }
 172 
 173   $This->{EStateAtomTypesSetToUse} = $Value;
 174 
 175   return $This;
 176 }
 177 
 178 # Set vector values precision for real values for E-state indicies...
 179 #
 180 sub SetValuesPrecision {
 181   my($This, $Value) = @_;
 182 
 183   if (!TextUtil::IsPositiveInteger($Value)) {
 184     croak "Error: ${ClassName}->SetValuesPrecision: ValuesPrecision value, $Value, is not valid:  It must be a positive integer...";
 185   }
 186   $This->{ValuesPrecision} = $Value;
 187 
 188   return $This;
 189 }
 190 
 191 # Generate fingerprints description...
 192 #
 193 sub GetDescription {
 194   my($This) = @_;
 195 
 196   # Is description explicity set?
 197   if (exists $This->{Description}) {
 198     return $This->{Description};
 199   }
 200 
 201   # Generate fingerprints description...
 202 
 203   return "$This->{Type}:$This->{EStateAtomTypesSetToUse}";
 204 }
 205 
 206 # Generate electrotopological state indicies (E-state) [ Ref 75-78 ] fingerprints for
 207 # non-hydrogen atoms in a molecule...
 208 #
 209 # EStateIndicies fingerprints constitute a vector containing sum of E-state values
 210 # for E-state atom types. Two types of E-state atom types set size are allowed:
 211 #
 212 # ArbitrarySize - Corrresponds to only E-state atom types detected in molecule
 213 # FixedSize - Corresponds to fixed number of E-state atom types previously defined
 214 #
 215 # Module AtomTypes::EStateAtomTypes.pm is used to assign E-state atom types to
 216 # non-hydrogen atoms in the molecule which is able to assign atom types to any valid
 217 # atom group. However, for FixedSize value of EStateAtomTypesSetToUse, only a fixed
 218 # set of E-state atom types corresponding to specific atom groups [ Appendix III in
 219 # Ref 77 ] are used for fingerprints.
 220 #
 221 # The fixed size E-state atom type set size used during generation of fingerprints corresponding
 222 # FixedSize value of EStateAtomTypesSetToUse contains 87 E-state non-hydrogen atom types
 223 # in EStateAtomTypes.csv data file distributed with MayaChemTools.
 224 #
 225 # Combination of Type and EStateAtomTypesSetToUse allow generation of 2 different types of
 226 # E-state indicies fingerprints:
 227 #
 228 # Type                        EStateAtomTypesSetToUse
 229 #
 230 # EStateIndicies               ArbitrarySize      [ default fingerprints ]
 231 # EStateIndicies               FixedSize
 232 #
 233 # The default is generate EStateIndicies type fingeprints corresponding to ArbitrarySize as
 234 # EStateAtomTypesSetToUse value.
 235 #
 236 #
 237 sub GenerateFingerprints {
 238   my($This) = @_;
 239 
 240   # Cache appropriate molecule data...
 241   $This->_SetupMoleculeDataCache();
 242 
 243   # Assign E-state atom types...
 244   if (!$This->_AssignEStateAtomTypes()) {
 245     carp "Warning: ${ClassName}->GenerateFingerprints: $This->{Type} fingerprints generation didn't succeed: Couldn't assign valid E-state atom types to all atoms...";
 246     return $This;
 247   }
 248 
 249   # Calculate E-state indicies...
 250   if (!$This->_CalculateEStateIndicies()) {
 251     carp "Warning: ${ClassName}->GenerateFingerprints: $This->{Type} fingerprints generation didn't succeed: Couldn't calculate E-state values for all atoms...";
 252     return $This;
 253   }
 254 
 255   # Set final fingerprints...
 256   $This->_SetFinalFingerprints();
 257 
 258   # Clear cached molecule data...
 259   $This->_ClearMoleculeDataCache();
 260 
 261   return $This;
 262 }
 263 
 264 # Assign E-state atom types...
 265 #
 266 sub _AssignEStateAtomTypes {
 267   my($This) = @_;
 268   my($EStateAtomTypes, $Atom, $AtomID, $AtomType);
 269 
 270   %{$This->{EStateAtomTypes}} = ();
 271 
 272   # Assign E-state atom types...
 273   $EStateAtomTypes = new AtomTypes::EStateAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => 1);
 274   $EStateAtomTypes->AssignAtomTypes();
 275 
 276   # Make sure atom types assignment is successful...
 277   if (!$EStateAtomTypes->IsAtomTypesAssignmentSuccessful()) {
 278     return undef;
 279   }
 280 
 281   # Collect assigned atom types...
 282   for $Atom (@{$This->{Atoms}}) {
 283     $AtomID = $Atom->GetID();
 284 
 285     $AtomType = $EStateAtomTypes->GetAtomType($Atom);
 286     $This->{EStateAtomTypes}{$AtomID} = $AtomType;
 287   }
 288   return $This;
 289 }
 290 
 291 # Calculate E-state indicies by summing up E-state values for specific
 292 # E-state atom types...
 293 #
 294 sub _CalculateEStateIndicies {
 295   my($This) = @_;
 296   my($Atom, $AtomID, $AtomType, $EStateValue);
 297 
 298   # Calculate E-state values to generate E-state indicies...
 299   if (!$This->_CalculateEStateValuesDescriptors()) {
 300     return undef;
 301   }
 302 
 303   # Calculate E-state indicies...
 304   for $Atom (@{$This->{Atoms}}) {
 305     $AtomID = $Atom->GetID();
 306 
 307     $AtomType = $This->{EStateAtomTypes}{$AtomID};
 308     $EStateValue = $This->{EStateValues}{$AtomID};
 309 
 310     if (!exists $This->{EStateIndicies}{$AtomType}) {
 311       $This->{EStateIndicies}{$AtomType} = 0;
 312     }
 313 
 314     $This->{EStateIndicies}{$AtomType} += $EStateValue;
 315   }
 316   return $This;
 317 }
 318 
 319 # Calculate E-state values for E-state indicies...
 320 #
 321 sub _CalculateEStateValuesDescriptors {
 322   my($This) = @_;
 323   my($EStateValuesDescriptors, $Atom, $AtomID, $EStateValue);
 324 
 325   %{$This->{EStateValues}} = ();
 326 
 327   # Calculate and assign E-state values...
 328   $EStateValuesDescriptors = new AtomicDescriptors::EStateValuesDescriptors('Molecule' => $This->{Molecule});
 329   $EStateValuesDescriptors->GenerateDescriptors();
 330 
 331   # Make sure E-state values calculation is successful...
 332   if (!$EStateValuesDescriptors->IsDescriptorsGenerationSuccessful()) {
 333     return undef;
 334   }
 335 
 336   # Collect assigned E-state values...
 337   for $Atom (@{$This->{Atoms}}) {
 338     $AtomID = $Atom->GetID();
 339     $EStateValue = $EStateValuesDescriptors->GetDescriptorValue($Atom);
 340     $This->{EStateValues}{$AtomID} = $EStateValue;
 341   }
 342   return $This;
 343 }
 344 
 345 # Set final final fingerpritns for E-state indicies...
 346 #
 347 sub _SetFinalFingerprints {
 348   my($This) = @_;
 349   my($AtomType, $ValuesPrecision, $EStateAtomTypesDataRef, @Values, @IDs);
 350 
 351   # Mark successful generation of fingerprints...
 352   $This->{FingerprintsGenerated} = 1;
 353 
 354   @Values = ();
 355   @IDs = ();
 356 
 357   $ValuesPrecision = $This->{ValuesPrecision};
 358 
 359   if ($This->{EStateAtomTypesSetToUse} =~ /^FixedSize$/i) {
 360     # Use fixed size E-state atom types set for non-hydrogen atoms...
 361     for $AtomType (@{AtomTypes::EStateAtomTypes::GetAllPossibleEStateNonHydrogenAtomTypes()}) {
 362       push @IDs, "S${AtomType}";
 363       push @Values, exists($This->{EStateIndicies}{$AtomType}) ? MathUtil::round($This->{EStateIndicies}{$AtomType}, $ValuesPrecision) : 0;
 364     }
 365   }
 366   else {
 367     for $AtomType (sort keys %{$This->{EStateIndicies}}) {
 368       push @IDs, "S${AtomType}";
 369       push @Values, MathUtil::round($This->{EStateIndicies}{$AtomType}, $ValuesPrecision);
 370     }
 371   }
 372 
 373   # Add IDs and values to fingerprint vector...
 374   if (@IDs) {
 375     $This->{FingerprintsVector}->AddValueIDs(\@IDs);
 376   }
 377   $This->{FingerprintsVector}->AddValues(\@Values);
 378 
 379   return $This;
 380 }
 381 
 382 # Cache  appropriate molecule data...
 383 #
 384 sub _SetupMoleculeDataCache {
 385   my($This) = @_;
 386 
 387   # Get all non-hydrogen atoms...
 388   my($NegateAtomCheckMethod);
 389   $NegateAtomCheckMethod = 1;
 390   @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms("IsHydrogen", $NegateAtomCheckMethod);
 391 
 392   return $This;
 393 }
 394 
 395 # Clear cached molecule data...
 396 #
 397 sub _ClearMoleculeDataCache {
 398   my($This) = @_;
 399 
 400   @{$This->{Atoms}} = ();
 401 
 402   return $This;
 403 }
 404 
 405 # Return a string containg data for EStateIndiciesFingerprints object...
 406 sub StringifyEStateIndiciesFingerprints {
 407   my($This) = @_;
 408   my($EStateIndiciesFingerprintsString);
 409 
 410   # Type of Keys...
 411   $EStateIndiciesFingerprintsString = "Type: $This->{Type}; EStateAtomTypesSetToUse: $This->{EStateAtomTypesSetToUse}";
 412 
 413   # Fingerprint vector...
 414   $EStateIndiciesFingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >";
 415 
 416   return $EStateIndiciesFingerprintsString;
 417 }
 418