MayaChemTools

   1 package AtomicDescriptors::EStateValuesDescriptors;
   2 #
   3 # $RCSfile: EStateValuesDescriptors.pm,v $
   4 # $Date: 2015/02/28 20:47:49 $
   5 # $Revision: 1.16 $
   6 #
   7 # Author: Manish Sud <msud@san.rr.com>
   8 #
   9 # Copyright (C) 2015 Manish Sud. All rights reserved.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 use strict;
  30 use Carp;
  31 use Exporter;
  32 use Scalar::Util ();
  33 use Matrix;
  34 use Constants;
  35 use TextUtil ();
  36 use MathUtil ();
  37 use StatisticsUtil ();
  38 use Atom;
  39 use Molecule;
  40 use AtomicDescriptors::AtomicDescriptors;
  41 
  42 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  43 
  44 @ISA = qw(AtomicDescriptors::AtomicDescriptors Exporter);
  45 @EXPORT = qw();
  46 @EXPORT_OK = qw();
  47 
  48 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  49 
  50 # Setup class variables...
  51 my($ClassName);
  52 _InitializeClass();
  53 
  54 # Overload Perl functions...
  55 use overload '""' => 'StringifyEStateValuesDescriptors';
  56 
  57 # Class constructor...
  58 sub new {
  59   my($Class, %NamesAndValues) = @_;
  60 
  61   # Initialize object...
  62   my $This = $Class->SUPER::new();
  63   bless $This, ref($Class) || $Class;
  64   $This->_InitializeEStateValuesDescriptors();
  65 
  66   $This->_InitializeEStateValuesDescriptorsProperties(%NamesAndValues);
  67 
  68   return $This;
  69 }
  70 
  71 # Initialize class ...
  72 sub _InitializeClass {
  73   #Class name...
  74   $ClassName = __PACKAGE__;
  75 }
  76 
  77 
  78 # Initialize object data...
  79 #
  80 sub _InitializeEStateValuesDescriptors {
  81   my($This) = @_;
  82 
  83   # Type of AtomicDescriptor...
  84   $This->{Type} = 'EStateValue';
  85 
  86   # Intrinsic state values calculated for non-hydrogen atom...
  87   #
  88   %{$This->{IStateValues}} = ();
  89 
  90   # Calculatetion of E-state values for types for hydrogens is not supported...
  91   $This->{IgnoreHydrogens} = 1;
  92 
  93   # Perturbation to intrinsic state values of atoms corresponding to all non-hydrogen
  94   # atom pairs...
  95   #
  96   %{$This->{DeltaIStateMatrix}} = ();
  97 
  98   # E-state values calculated for non-hydrogen atoms...
  99   #
 100   %{$This->{EStateValues}} = ();
 101 
 102   return $This;
 103 }
 104 
 105 # Initialize object properties...
 106 #
 107 sub _InitializeEStateValuesDescriptorsProperties {
 108   my($This, %NamesAndValues) = @_;
 109 
 110   my($Name, $Value, $MethodName);
 111   while (($Name, $Value) = each  %NamesAndValues) {
 112     $MethodName = "Set${Name}";
 113     $This->$MethodName($Value);
 114   }
 115 
 116   # Make sure molecule object was specified...
 117   if (!exists $NamesAndValues{Molecule}) {
 118     croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule...";
 119   }
 120 
 121   # Intialize atomic descriptor values...
 122   $This->_InitializeDescriptorValues();
 123 
 124   return $This;
 125 }
 126 
 127 # Disable change of ignore hydrogens...
 128 #
 129 sub SetIgnoreHydrogens {
 130   my($This, $IgnoreHydrogens) = @_;
 131 
 132   carp "Warning: ${ClassName}->SetIgnoreHydrogens: Ignore hydrogens value can't be changed: It's not supported...";
 133 
 134   return $This;
 135 }
 136 
 137 # Generate electrotopological state (E-state) values [ Ref 75-78 ] for all atoms
 138 # in the molecule...
 139 #
 140 # Calculation of E-state values for non-hydrogen atoms:
 141 #
 142 # Let:
 143 #
 144 #   N = Principal quantum number or period number corresponding to element symbol
 145 #
 146 #   Sigma = Number of sigma electrons involves in bonds to hydrogen and non-hydrogen atoms
 147 #           attached to atom
 148 #         = Number of sigma bonds to hydrogen and non-hydrogen atoms attached to atom
 149 #   PI = Number of PI electrons involved in bonds to non-hydrogen atoms attached to atom
 150 #       = Number of PI bonds to non-hydrogen atoms attached to atom
 151 #
 152 #   LP = Number of lone pair electrons on atom
 153 #
 154 #   Zv = Number of electrons in valence shell of atom
 155 #
 156 #   X = Number of non-hydrogen atom neighbors or heavy atoms attached to atom
 157 #   H = Number of implicit and explicit hydrogens for atom
 158 #
 159 #   Delta = Number of sigma electrons involved to bonds to non-hydrogen atoms
 160 #   DeltaV = ValenceDelta = Number of valence shell electrons not involved in bonding to hydrogen atoms
 161 #
 162 #   Ii = Intrinsic state value for atom i
 163 #
 164 #   DeltaIi = Sum of perturbations to intrinsic state value Ii of atom i by all other atoms besides atom i
 165 #
 166 #   DeltaIij = Perturbation to intrinsic state value Ii of atom i by atom j
 167 #
 168 #   Dij = Graph/bond distance between atom i and j
 169 #   Rij = Dij + 1
 170 #
 171 #   Si = E-state value for atom i
 172 #
 173 #
 174 # Then:
 175 #
 176 #   Delta = Sigma - H = X
 177 #
 178 #   DeltaV = Zv - H
 179 #      = Sigma + PI + LP - H
 180 #
 181 #   Ii = ( ( ( 2 / N ) ** 2 ) * DeltaV + 1 ) / Delta
 182 #
 183 #   Si = Ii + DeltaIi
 184 #
 185 #   DeltaIij = (Ii - Ij) / (Rij ** 2) for j not equal to i
 186 #
 187 #   DeltaIji = - DeltaIij
 188 #
 189 #   DeltaIi = SUM ( (Ii - Ij) / (Rij ** 2) ) for j = 1 to num of atoms skipping atom i
 190 #
 191 # Methodology:
 192 #   . Calculate intrinsic state values for atoms.
 193 #   . Generate a distance matrix.
 194 #   . Use distance matrix to calculate DeltaIij matrix with each row i containing
 195 #     DeltaIij values corresponding to perturbation to intrinsic state value of atom
 196 #     i by atom j.
 197 #   . Calculate E-state values using DeltaIij matrix.
 198 #   . Assign E-state values to atoms.
 199 #
 200 # Notes:
 201 #   . The current release of MayaChemTools doesn't support calculation of Hydrogen
 202 #     E-state values.
 203 #
 204 sub GenerateDescriptors {
 205   my($This) = @_;
 206 
 207   # Cache appropriate molecule data...
 208   $This->_SetupMoleculeDataCache();
 209 
 210   # Generate distance matrix...
 211   if (!$This->_SetupDistanceMatrix()) {
 212     carp "Warning: ${ClassName}->GenerateDescriptors: E-state values description generation didn't succeed: Couldn't generate a distance matrix...";
 213     return $This;
 214   }
 215 
 216   # Calculate EState values..
 217   if (!$This->_CalculateEStateValuesDescriptors()) {
 218     carp "Warning: ${ClassName}->GenerateDescriptors: E-state values description generation didn't succeed: Couldn't calculate IState values for all atoms...";
 219     return $This;
 220   }
 221 
 222   # Set final descriptor values...
 223   $This->_SetFinalValues();
 224 
 225   # Clear cached molecule data...
 226   $This->_ClearMoleculeDataCache();
 227 
 228   return $This;
 229 }
 230 
 231 # Calculate E-state values for non-hydrogen atoms...
 232 #
 233 sub _CalculateEStateValuesDescriptors {
 234   my($This) = @_;
 235   my($DeltaIStateMatrix, $NumOfRows, $NumOfCols, $RowIndex, $AtomID, $EStateValue, $IStateValue, $DeltaIStateValue, @DeltaIStateMatrixRowValues);
 236 
 237   %{$This->{EStateValues}} = ();
 238 
 239   # Calculate intrinsic state values for non-hydrogen atoms...
 240   if (!$This->_CalculateIStateValues()) {
 241     return undef;
 242   }
 243 
 244   # Calculate delta intrinsic state matrix for non-hydrogen atoms...
 245   $This->_CalculateDeltaIStateMatrix();
 246 
 247   # Get DeltaIState matrix information...
 248   $DeltaIStateMatrix = $This->{DeltaIStateMatrix};
 249   ($NumOfRows, $NumOfCols) = $DeltaIStateMatrix->GetSize();
 250 
 251   # Calculate EState values...
 252   ROWINDEX: for $RowIndex (0 .. ($NumOfRows - 1) ) {
 253     $AtomID = $This->{AtomIndexToID}{$RowIndex};
 254     if ( !(exists($This->{IStateValues}{$AtomID})) ) {
 255       next ROWINDEX;
 256     }
 257     $IStateValue = $This->{IStateValues}{$AtomID};
 258 
 259     @DeltaIStateMatrixRowValues = $DeltaIStateMatrix->GetRowValues($RowIndex);
 260     $DeltaIStateValue = StatisticsUtil::Sum(\@DeltaIStateMatrixRowValues);
 261 
 262     $EStateValue = $IStateValue + $DeltaIStateValue;
 263 
 264     $This->{EStateValues}{$AtomID} = $EStateValue;
 265   }
 266   return $This;
 267 }
 268 
 269 # Calculate intrinsic state values for non-hydrogen atoms...
 270 #
 271 sub _CalculateIStateValues {
 272   my($This) = @_;
 273   my($Atom, $AtomID);
 274 
 275   %{$This->{IStateValues}} = ();
 276 
 277   ATOM: for $Atom (@{$This->{Atoms}}) {
 278     # Irrespective of IgoreHydrogens value, just ignore hydrogens...
 279     if ($Atom->IsHydrogen()) {
 280       next ATOM;
 281     }
 282     $AtomID = $Atom->GetID();
 283     $This->{IStateValues}{$AtomID} = $This->_CalculateIStateValue($Atom);
 284     if (!defined($This->{IStateValues}{$AtomID})) {
 285       return undef;
 286     }
 287   }
 288   return $This;
 289 }
 290 
 291 # Calculation intrinsic state value for non-hydrogen...
 292 #
 293 sub _CalculateIStateValue {
 294   my($This, $Atom) = @_;
 295   my($IStateValue, $Delta, $DeltaV, $PeriodNumber);
 296 
 297   $PeriodNumber = $Atom->GetPeriodNumber();
 298   ($Delta, $DeltaV) = $This->_GetDeltaValues($Atom);
 299 
 300   if (!(defined($Delta) && defined($PeriodNumber) && defined($DeltaV))) {
 301     return undef;
 302   }
 303 
 304   $IStateValue = ($PeriodNumber && $Delta) ? (((2/$PeriodNumber)**2)*$DeltaV + 1)/$Delta : 0;
 305 
 306   return $IStateValue;
 307 }
 308 
 309 # Get Delta and DeltaV values for atom...
 310 #
 311 sub _GetDeltaValues {
 312   my($This, $Atom) = @_;
 313   my($Delta, $DeltaV, $ValenceElectrons, $NumOfHydrogens);
 314 
 315   ($Delta, $DeltaV) = (undef, undef);
 316 
 317   $ValenceElectrons = $Atom->GetValenceElectrons();
 318   $NumOfHydrogens = $Atom->GetAtomicInvariantValue('H');
 319 
 320   $Delta = $Atom->GetAtomicInvariantValue('X');
 321   if (defined($ValenceElectrons) && defined($NumOfHydrogens)) {
 322     $DeltaV = $ValenceElectrons - $NumOfHydrogens;
 323   }
 324 
 325   return ($Delta, $DeltaV);
 326 }
 327 
 328 # Calculate DeltaIState matrix for atoms with each row i containing DeltaIij values
 329 # corresponding atom atoms i and j.
 330 #
 331 # Notes:
 332 #   . Matrix elements corresponding to hydrogen atoms or unconnected
 333 #     are assigned zero value.
 334 #
 335 sub _CalculateDeltaIStateMatrix {
 336   my($This) = @_;
 337   my($DistanceMatrix, $NumOfRows, $NumOfCols, $RowIndex, $ColIndex, $AtomID1, $AtomID2, $DeltaIStateMatrix, $IStateValue1, $IStateValue2, $GraphDistance, $DeltaIState12, $DeltaIState21, $SkipIndexCheck);
 338 
 339   # Get distance matrix information...
 340   $DistanceMatrix = $This->{DistanceMatrix};
 341   ($NumOfRows, $NumOfCols) = $DistanceMatrix->GetSize();
 342 
 343   # Initialize DeltaIState matrix...
 344   $This->{DeltaIStateMatrix} = new Matrix($NumOfRows, $NumOfCols);
 345   $DeltaIStateMatrix = $This->{DeltaIStateMatrix};
 346 
 347   $SkipIndexCheck = 1;
 348 
 349   # Calculate DeltaIState matrix values...
 350   ROWINDEX: for $RowIndex (0 .. ($NumOfRows - 1) ) {
 351     $AtomID1 = $This->{AtomIndexToID}{$RowIndex};
 352     if (!exists($This->{IStateValues}{$AtomID1})) {
 353       next ROWINDEX;
 354     }
 355     $IStateValue1 = $This->{IStateValues}{$AtomID1};
 356 
 357     COLINDEX: for $ColIndex (($RowIndex + 1) .. ($NumOfCols - 1) ) {
 358       $AtomID2 = $This->{AtomIndexToID}{$ColIndex};
 359       if (!exists($This->{IStateValues}{$AtomID2})) {
 360         next COLINDEX;
 361       }
 362       $IStateValue2 = $This->{IStateValues}{$AtomID2};
 363 
 364       # Make sure it's a connected atom...
 365       $GraphDistance = $DistanceMatrix->GetValue($RowIndex, $ColIndex, $SkipIndexCheck);
 366       if ($GraphDistance >= BigNumber) {
 367         next COLINDEX;
 368       }
 369 
 370       $DeltaIState12 = ($IStateValue1 - $IStateValue2)/(($GraphDistance + 1)**2);
 371       $DeltaIState21 = -$DeltaIState12;
 372 
 373       # Set DeltaIState values...
 374       $DeltaIStateMatrix->SetValue($RowIndex, $ColIndex, $DeltaIState12, $SkipIndexCheck);
 375       $DeltaIStateMatrix->SetValue($ColIndex, $RowIndex, $DeltaIState21, $SkipIndexCheck);
 376     }
 377   }
 378 }
 379 
 380 # Setup distance matrix...
 381 #
 382 sub _SetupDistanceMatrix {
 383   my($This) = @_;
 384 
 385   $This->{DistanceMatrix} = $This->GetMolecule()->GetDistanceMatrix();
 386 
 387   if (!defined($This->{DistanceMatrix})) {
 388     return undef;
 389   }
 390 
 391   return $This;
 392 }
 393 
 394 # Setup final descriptor values...
 395 #
 396 sub _SetFinalValues {
 397   my($This) = @_;
 398   my($Atom, $AtomID, $EStateValue);
 399 
 400   ATOM: for $Atom (@{$This->{Atoms}}) {
 401     $AtomID = $Atom->GetID();
 402     if (!exists $This->{EStateValues}{$AtomID}) {
 403       next ATOM;
 404     }
 405     $EStateValue = $This->{EStateValues}{$AtomID};
 406     $This->SetDescriptorValue($Atom, $EStateValue);
 407   }
 408 
 409   return $This;
 410 }
 411 
 412 # Cache  appropriate molecule data...
 413 #
 414 sub _SetupMoleculeDataCache {
 415   my($This) = @_;
 416 
 417   # Get all atoms including hydrogens to correctly map atom indicies to atom IDs for
 418   # usage of distance matrix.
 419   #
 420   @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms();
 421 
 422   # Get all atom IDs...
 423   my(@AtomIDs);
 424   @AtomIDs = ();
 425   @AtomIDs =  map { $_->GetID() } @{$This->{Atoms}};
 426 
 427   # Set AtomIndex to AtomID hash...
 428   %{$This->{AtomIndexToID}} = ();
 429   @{$This->{AtomIndexToID}}{ (0 .. $#AtomIDs) } = @AtomIDs;
 430 
 431   return $This;
 432 }
 433 
 434 # Clear cached molecule data...
 435 #
 436 sub _ClearMoleculeDataCache {
 437   my($This) = @_;
 438 
 439   @{$This->{Atoms}} = ();
 440 
 441   return $This;
 442 }
 443 
 444 # Return a string containg data for EStateValuesDescriptors object...
 445 #
 446 sub StringifyEStateValuesDescriptors {
 447   my($This) = @_;
 448   my($EStateValuesDescriptorsString);
 449 
 450   # Type of AtomicValues...
 451   $EStateValuesDescriptorsString = "AtomicDescriptorType: $This->{Type}; IgnoreHydrogens: " . ($This->{IgnoreHydrogens} ? "Yes" : "No");
 452 
 453   # Setup atomic descriptor information...
 454   my($AtomID, $DescriptorValue, @DescriptorValuesInfo, %DescriptorValues);
 455 
 456   @DescriptorValuesInfo = ();
 457   %DescriptorValues = $This->GetDescriptorValues();
 458 
 459   for $AtomID (sort { $a <=> $b } keys %DescriptorValues) {
 460     $DescriptorValue = $DescriptorValues{$AtomID};
 461     $DescriptorValue = (TextUtil::IsEmpty($DescriptorValue) || $DescriptorValue =~ /^None$/i) ? 'None' : MathUtil::round($DescriptorValue, 3) + 0;
 462     push @DescriptorValuesInfo, "$AtomID:$DescriptorValue";
 463   }
 464   $EStateValuesDescriptorsString .= "; AtomIDs:EStateValuesDescriptors: <" . TextUtil::JoinWords(\@DescriptorValuesInfo, ", ", 0) . ">";
 465 
 466   return $EStateValuesDescriptorsString;
 467 }
 468 
 469 # Is it a EStateValuesDescriptors object?
 470 sub _IsEStateValuesDescriptors {
 471   my($Object) = @_;
 472 
 473   return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0;
 474 }
 475