1 package Fingerprints::EStateIndiciesFingerprints; 2 # 3 # $RCSfile: EStateIndiciesFingerprints.pm,v $ 4 # $Date: 2015/02/28 20:48:54 $ 5 # $Revision: 1.19 $ 6 # 7 # Author: Manish Sud <msud@san.rr.com> 8 # 9 # Copyright (C) 2015 Manish Sud. All rights reserved. 10 # 11 # This file is part of MayaChemTools. 12 # 13 # MayaChemTools is free software; you can redistribute it and/or modify it under 14 # the terms of the GNU Lesser General Public License as published by the Free 15 # Software Foundation; either version 3 of the License, or (at your option) any 16 # later version. 17 # 18 # MayaChemTools is distributed in the hope that it will be useful, but without 19 # any warranty; without even the implied warranty of merchantability of fitness 20 # for a particular purpose. See the GNU Lesser General Public License for more 21 # details. 22 # 23 # You should have received a copy of the GNU Lesser General Public License 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 26 # Boston, MA, 02111-1307, USA. 27 # 28 29 use strict; 30 use Carp; 31 use Exporter; 32 use Text::ParseWords; 33 use TextUtil (); 34 use FileUtil (); 35 use MathUtil (); 36 use Fingerprints::Fingerprints; 37 use Molecule; 38 use AtomTypes::EStateAtomTypes; 39 use AtomicDescriptors::EStateValuesDescriptors; 40 41 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 42 43 @ISA = qw(Fingerprints::Fingerprints Exporter); 44 @EXPORT = qw(); 45 @EXPORT_OK = qw(); 46 47 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); 48 49 # Setup class variables... 50 my($ClassName); 51 _InitializeClass(); 52 53 # Overload Perl functions... 54 use overload '""' => 'StringifyEStateIndiciesFingerprints'; 55 56 # Class constructor... 57 sub new { 58 my($Class, %NamesAndValues) = @_; 59 60 # Initialize object... 61 my $This = $Class->SUPER::new(); 62 bless $This, ref($Class) || $Class; 63 $This->_InitializeEStateIndiciesFingerprints(); 64 65 $This->_InitializeEStateIndiciesFingerprintsProperties(%NamesAndValues); 66 67 return $This; 68 } 69 70 # Initialize object data... 71 # 72 sub _InitializeEStateIndiciesFingerprints { 73 my($This) = @_; 74 75 # EStateIndicies is a vector containing sum of E-state values for E-state atom types 76 # 77 $This->{Type} = 'EStateIndicies'; 78 79 # EStateAtomTypesSetToUse for EStateIndicies: 80 # 81 # ArbitrarySize - Corrresponds to only E-state atom types detected in molecule 82 # FixedSize - Corresponds to fixed number of E-state atom types previously defined [ Ref 77 ] 83 # 84 # The default EStateAtomTypesSetToUse value for EStateIndicies fingerprints type: ArbitrarySize. 85 # Possible values: ArbitrarySize or FixedSize. 86 # 87 $This->{EStateAtomTypesSetToUse} = ''; 88 89 # Assigned E-state atom types... 90 %{$This->{EStateAtomTypes}} = (); 91 92 # Vector values precision for real values during E-state indicies... 93 $This->{ValuesPrecision} = 3; 94 95 # Calculated E-state values and indicies for generating E-state indicies fingerprints... 96 %{$This->{EStateValues}} = (); 97 %{$This->{EStateIndicies}} = (); 98 } 99 100 # Initialize class ... 101 sub _InitializeClass { 102 #Class name... 103 $ClassName = __PACKAGE__; 104 105 } 106 107 # Initialize object properties.... 108 sub _InitializeEStateIndiciesFingerprintsProperties { 109 my($This, %NamesAndValues) = @_; 110 111 my($Name, $Value, $MethodName); 112 while (($Name, $Value) = each %NamesAndValues) { 113 $MethodName = "Set${Name}"; 114 $This->$MethodName($Value); 115 } 116 117 # Make sure molecule object was specified... 118 if (!exists $NamesAndValues{Molecule}) { 119 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule..."; 120 } 121 122 $This->_InitializeEstateIndicies(); 123 124 return $This; 125 } 126 127 # Initialize E-state indicies... 128 # 129 sub _InitializeEstateIndicies { 130 my($This) = @_; 131 132 # Set default EStateAtomTypesSetToUse... 133 if (!$This->{EStateAtomTypesSetToUse}) { 134 $This->{EStateAtomTypesSetToUse} = 'ArbitrarySize'; 135 } 136 137 # Vector type... 138 $This->{VectorType} = 'FingerprintsVector'; 139 140 if ($This->{EStateAtomTypesSetToUse} =~ /^FixedSize$/i) { 141 $This->{FingerprintsVectorType} = 'OrderedNumericalValues'; 142 } 143 else { 144 $This->{FingerprintsVectorType} = 'NumericalValues'; 145 } 146 147 $This->_InitializeFingerprintsVector(); 148 149 return $This; 150 } 151 152 # Disable set size method... 153 # 154 sub SetSize { 155 my($This, $Type) = @_; 156 157 croak "Error: ${ClassName}->SetSize: Can't change size: It's not allowed..."; 158 } 159 160 # Set E-state atom types set to use... 161 # 162 sub SetEStateAtomTypesSetToUse { 163 my($This, $Value) = @_; 164 165 if ($This->{EStateAtomTypesSetToUse}) { 166 croak "Error: ${ClassName}->SetEStateAtomTypesSetToUse: Can't change size: It's already set..."; 167 } 168 169 if ($Value !~ /^(ArbitrarySize|FixedSize)/i) { 170 croak "Error: ${ClassName}->SetEStateAtomTypesSetToUse: Unknown EStateAtomTypesSetToUse value: $Value; Supported values: ArbitrarySize or FixedSize"; 171 } 172 173 $This->{EStateAtomTypesSetToUse} = $Value; 174 175 return $This; 176 } 177 178 # Set vector values precision for real values for E-state indicies... 179 # 180 sub SetValuesPrecision { 181 my($This, $Value) = @_; 182 183 if (!TextUtil::IsPositiveInteger($Value)) { 184 croak "Error: ${ClassName}->SetValuesPrecision: ValuesPrecision value, $Value, is not valid: It must be a positive integer..."; 185 } 186 $This->{ValuesPrecision} = $Value; 187 188 return $This; 189 } 190 191 # Generate fingerprints description... 192 # 193 sub GetDescription { 194 my($This) = @_; 195 196 # Is description explicity set? 197 if (exists $This->{Description}) { 198 return $This->{Description}; 199 } 200 201 # Generate fingerprints description... 202 203 return "$This->{Type}:$This->{EStateAtomTypesSetToUse}"; 204 } 205 206 # Generate electrotopological state indicies (E-state) [ Ref 75-78 ] fingerprints for 207 # non-hydrogen atoms in a molecule... 208 # 209 # EStateIndicies fingerprints constitute a vector containing sum of E-state values 210 # for E-state atom types. Two types of E-state atom types set size are allowed: 211 # 212 # ArbitrarySize - Corrresponds to only E-state atom types detected in molecule 213 # FixedSize - Corresponds to fixed number of E-state atom types previously defined 214 # 215 # Module AtomTypes::EStateAtomTypes.pm is used to assign E-state atom types to 216 # non-hydrogen atoms in the molecule which is able to assign atom types to any valid 217 # atom group. However, for FixedSize value of EStateAtomTypesSetToUse, only a fixed 218 # set of E-state atom types corresponding to specific atom groups [ Appendix III in 219 # Ref 77 ] are used for fingerprints. 220 # 221 # The fixed size E-state atom type set size used during generation of fingerprints corresponding 222 # FixedSize value of EStateAtomTypesSetToUse contains 87 E-state non-hydrogen atom types 223 # in EStateAtomTypes.csv data file distributed with MayaChemTools. 224 # 225 # Combination of Type and EStateAtomTypesSetToUse allow generation of 2 different types of 226 # E-state indicies fingerprints: 227 # 228 # Type EStateAtomTypesSetToUse 229 # 230 # EStateIndicies ArbitrarySize [ default fingerprints ] 231 # EStateIndicies FixedSize 232 # 233 # The default is generate EStateIndicies type fingeprints corresponding to ArbitrarySize as 234 # EStateAtomTypesSetToUse value. 235 # 236 # 237 sub GenerateFingerprints { 238 my($This) = @_; 239 240 # Cache appropriate molecule data... 241 $This->_SetupMoleculeDataCache(); 242 243 # Assign E-state atom types... 244 if (!$This->_AssignEStateAtomTypes()) { 245 carp "Warning: ${ClassName}->GenerateFingerprints: $This->{Type} fingerprints generation didn't succeed: Couldn't assign valid E-state atom types to all atoms..."; 246 return $This; 247 } 248 249 # Calculate E-state indicies... 250 if (!$This->_CalculateEStateIndicies()) { 251 carp "Warning: ${ClassName}->GenerateFingerprints: $This->{Type} fingerprints generation didn't succeed: Couldn't calculate E-state values for all atoms..."; 252 return $This; 253 } 254 255 # Set final fingerprints... 256 $This->_SetFinalFingerprints(); 257 258 # Clear cached molecule data... 259 $This->_ClearMoleculeDataCache(); 260 261 return $This; 262 } 263 264 # Assign E-state atom types... 265 # 266 sub _AssignEStateAtomTypes { 267 my($This) = @_; 268 my($EStateAtomTypes, $Atom, $AtomID, $AtomType); 269 270 %{$This->{EStateAtomTypes}} = (); 271 272 # Assign E-state atom types... 273 $EStateAtomTypes = new AtomTypes::EStateAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => 1); 274 $EStateAtomTypes->AssignAtomTypes(); 275 276 # Make sure atom types assignment is successful... 277 if (!$EStateAtomTypes->IsAtomTypesAssignmentSuccessful()) { 278 return undef; 279 } 280 281 # Collect assigned atom types... 282 for $Atom (@{$This->{Atoms}}) { 283 $AtomID = $Atom->GetID(); 284 285 $AtomType = $EStateAtomTypes->GetAtomType($Atom); 286 $This->{EStateAtomTypes}{$AtomID} = $AtomType; 287 } 288 return $This; 289 } 290 291 # Calculate E-state indicies by summing up E-state values for specific 292 # E-state atom types... 293 # 294 sub _CalculateEStateIndicies { 295 my($This) = @_; 296 my($Atom, $AtomID, $AtomType, $EStateValue); 297 298 # Calculate E-state values to generate E-state indicies... 299 if (!$This->_CalculateEStateValuesDescriptors()) { 300 return undef; 301 } 302 303 # Calculate E-state indicies... 304 for $Atom (@{$This->{Atoms}}) { 305 $AtomID = $Atom->GetID(); 306 307 $AtomType = $This->{EStateAtomTypes}{$AtomID}; 308 $EStateValue = $This->{EStateValues}{$AtomID}; 309 310 if (!exists $This->{EStateIndicies}{$AtomType}) { 311 $This->{EStateIndicies}{$AtomType} = 0; 312 } 313 314 $This->{EStateIndicies}{$AtomType} += $EStateValue; 315 } 316 return $This; 317 } 318 319 # Calculate E-state values for E-state indicies... 320 # 321 sub _CalculateEStateValuesDescriptors { 322 my($This) = @_; 323 my($EStateValuesDescriptors, $Atom, $AtomID, $EStateValue); 324 325 %{$This->{EStateValues}} = (); 326 327 # Calculate and assign E-state values... 328 $EStateValuesDescriptors = new AtomicDescriptors::EStateValuesDescriptors('Molecule' => $This->{Molecule}); 329 $EStateValuesDescriptors->GenerateDescriptors(); 330 331 # Make sure E-state values calculation is successful... 332 if (!$EStateValuesDescriptors->IsDescriptorsGenerationSuccessful()) { 333 return undef; 334 } 335 336 # Collect assigned E-state values... 337 for $Atom (@{$This->{Atoms}}) { 338 $AtomID = $Atom->GetID(); 339 $EStateValue = $EStateValuesDescriptors->GetDescriptorValue($Atom); 340 $This->{EStateValues}{$AtomID} = $EStateValue; 341 } 342 return $This; 343 } 344 345 # Set final final fingerpritns for E-state indicies... 346 # 347 sub _SetFinalFingerprints { 348 my($This) = @_; 349 my($AtomType, $ValuesPrecision, $EStateAtomTypesDataRef, @Values, @IDs); 350 351 # Mark successful generation of fingerprints... 352 $This->{FingerprintsGenerated} = 1; 353 354 @Values = (); 355 @IDs = (); 356 357 $ValuesPrecision = $This->{ValuesPrecision}; 358 359 if ($This->{EStateAtomTypesSetToUse} =~ /^FixedSize$/i) { 360 # Use fixed size E-state atom types set for non-hydrogen atoms... 361 for $AtomType (@{AtomTypes::EStateAtomTypes::GetAllPossibleEStateNonHydrogenAtomTypes()}) { 362 push @IDs, "S${AtomType}"; 363 push @Values, exists($This->{EStateIndicies}{$AtomType}) ? MathUtil::round($This->{EStateIndicies}{$AtomType}, $ValuesPrecision) : 0; 364 } 365 } 366 else { 367 for $AtomType (sort keys %{$This->{EStateIndicies}}) { 368 push @IDs, "S${AtomType}"; 369 push @Values, MathUtil::round($This->{EStateIndicies}{$AtomType}, $ValuesPrecision); 370 } 371 } 372 373 # Add IDs and values to fingerprint vector... 374 if (@IDs) { 375 $This->{FingerprintsVector}->AddValueIDs(\@IDs); 376 } 377 $This->{FingerprintsVector}->AddValues(\@Values); 378 379 return $This; 380 } 381 382 # Cache appropriate molecule data... 383 # 384 sub _SetupMoleculeDataCache { 385 my($This) = @_; 386 387 # Get all non-hydrogen atoms... 388 my($NegateAtomCheckMethod); 389 $NegateAtomCheckMethod = 1; 390 @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms("IsHydrogen", $NegateAtomCheckMethod); 391 392 return $This; 393 } 394 395 # Clear cached molecule data... 396 # 397 sub _ClearMoleculeDataCache { 398 my($This) = @_; 399 400 @{$This->{Atoms}} = (); 401 402 return $This; 403 } 404 405 # Return a string containg data for EStateIndiciesFingerprints object... 406 sub StringifyEStateIndiciesFingerprints { 407 my($This) = @_; 408 my($EStateIndiciesFingerprintsString); 409 410 # Type of Keys... 411 $EStateIndiciesFingerprintsString = "Type: $This->{Type}; EStateAtomTypesSetToUse: $This->{EStateAtomTypesSetToUse}"; 412 413 # Fingerprint vector... 414 $EStateIndiciesFingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >"; 415 416 return $EStateIndiciesFingerprintsString; 417 } 418