diff lib/Fingerprints/EStateIndiciesFingerprints.pm @ 0:4816e4a8ae95 draft default tip

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 09:23:18 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/Fingerprints/EStateIndiciesFingerprints.pm	Wed Jan 20 09:23:18 2016 -0500
@@ -0,0 +1,593 @@
+package Fingerprints::EStateIndiciesFingerprints;
+#
+# $RCSfile: EStateIndiciesFingerprints.pm,v $
+# $Date: 2015/02/28 20:48:54 $
+# $Revision: 1.19 $
+#
+# Author: Manish Sud <msud@san.rr.com>
+#
+# Copyright (C) 2015 Manish Sud. All rights reserved.
+#
+# This file is part of MayaChemTools.
+#
+# MayaChemTools is free software; you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License as published by the Free
+# Software Foundation; either version 3 of the License, or (at your option) any
+# later version.
+#
+# MayaChemTools is distributed in the hope that it will be useful, but without
+# any warranty; without even the implied warranty of merchantability of fitness
+# for a particular purpose.  See the GNU Lesser General Public License for more
+# details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
+# write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
+# Boston, MA, 02111-1307, USA.
+#
+
+use strict;
+use Carp;
+use Exporter;
+use Text::ParseWords;
+use TextUtil ();
+use FileUtil ();
+use MathUtil ();
+use Fingerprints::Fingerprints;
+use Molecule;
+use AtomTypes::EStateAtomTypes;
+use AtomicDescriptors::EStateValuesDescriptors;
+
+use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
+
+@ISA = qw(Fingerprints::Fingerprints Exporter);
+@EXPORT = qw();
+@EXPORT_OK = qw();
+
+%EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
+
+# Setup class variables...
+my($ClassName);
+_InitializeClass();
+
+# Overload Perl functions...
+use overload '""' => 'StringifyEStateIndiciesFingerprints';
+
+# Class constructor...
+sub new {
+  my($Class, %NamesAndValues) = @_;
+
+  # Initialize object...
+  my $This = $Class->SUPER::new();
+  bless $This, ref($Class) || $Class;
+  $This->_InitializeEStateIndiciesFingerprints();
+
+  $This->_InitializeEStateIndiciesFingerprintsProperties(%NamesAndValues);
+
+  return $This;
+}
+
+# Initialize object data...
+#
+sub _InitializeEStateIndiciesFingerprints {
+  my($This) = @_;
+
+  # EStateIndicies is a vector containing sum of E-state values for E-state atom types
+  #
+  $This->{Type} = 'EStateIndicies';
+
+  # EStateAtomTypesSetToUse for EStateIndicies:
+  #
+  # ArbitrarySize - Corrresponds to only E-state atom types detected in molecule
+  # FixedSize - Corresponds to fixed number of E-state atom types previously defined [ Ref 77 ]
+  #
+  # The default EStateAtomTypesSetToUse value for EStateIndicies fingerprints type: ArbitrarySize.
+  # Possible values: ArbitrarySize or FixedSize.
+  #
+  $This->{EStateAtomTypesSetToUse} = '';
+
+  # Assigned E-state atom types...
+  %{$This->{EStateAtomTypes}} = ();
+
+  # Vector values precision for real values during E-state indicies...
+  $This->{ValuesPrecision} = 3;
+
+  # Calculated E-state values and indicies for generating E-state indicies fingerprints...
+  %{$This->{EStateValues}} = ();
+  %{$This->{EStateIndicies}} = ();
+}
+
+# Initialize class ...
+sub _InitializeClass {
+  #Class name...
+  $ClassName = __PACKAGE__;
+
+}
+
+# Initialize object properties....
+sub _InitializeEStateIndiciesFingerprintsProperties {
+  my($This, %NamesAndValues) = @_;
+
+  my($Name, $Value, $MethodName);
+  while (($Name, $Value) = each  %NamesAndValues) {
+    $MethodName = "Set${Name}";
+    $This->$MethodName($Value);
+  }
+
+  # Make sure molecule object was specified...
+  if (!exists $NamesAndValues{Molecule}) {
+    croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule...";
+  }
+
+  $This->_InitializeEstateIndicies();
+
+  return $This;
+}
+
+# Initialize E-state indicies...
+#
+sub _InitializeEstateIndicies {
+  my($This) = @_;
+
+  # Set default EStateAtomTypesSetToUse...
+  if (!$This->{EStateAtomTypesSetToUse}) {
+    $This->{EStateAtomTypesSetToUse} = 'ArbitrarySize';
+  }
+
+  # Vector type...
+  $This->{VectorType} = 'FingerprintsVector';
+
+  if ($This->{EStateAtomTypesSetToUse} =~ /^FixedSize$/i) {
+    $This->{FingerprintsVectorType} = 'OrderedNumericalValues';
+  }
+  else {
+    $This->{FingerprintsVectorType} = 'NumericalValues';
+  }
+
+  $This->_InitializeFingerprintsVector();
+
+  return $This;
+}
+
+# Disable set size method...
+#
+sub SetSize {
+  my($This, $Type) = @_;
+
+  croak "Error: ${ClassName}->SetSize: Can't change size:  It's not allowed...";
+}
+
+# Set E-state atom types set to use...
+#
+sub SetEStateAtomTypesSetToUse {
+  my($This, $Value) = @_;
+
+  if ($This->{EStateAtomTypesSetToUse}) {
+    croak "Error: ${ClassName}->SetEStateAtomTypesSetToUse: Can't change size:  It's already set...";
+  }
+
+  if ($Value !~ /^(ArbitrarySize|FixedSize)/i) {
+    croak "Error: ${ClassName}->SetEStateAtomTypesSetToUse: Unknown EStateAtomTypesSetToUse value: $Value; Supported values: ArbitrarySize or FixedSize";
+  }
+
+  $This->{EStateAtomTypesSetToUse} = $Value;
+
+  return $This;
+}
+
+# Set vector values precision for real values for E-state indicies...
+#
+sub SetValuesPrecision {
+  my($This, $Value) = @_;
+
+  if (!TextUtil::IsPositiveInteger($Value)) {
+    croak "Error: ${ClassName}->SetValuesPrecision: ValuesPrecision value, $Value, is not valid:  It must be a positive integer...";
+  }
+  $This->{ValuesPrecision} = $Value;
+
+  return $This;
+}
+
+# Generate fingerprints description...
+#
+sub GetDescription {
+  my($This) = @_;
+
+  # Is description explicity set?
+  if (exists $This->{Description}) {
+    return $This->{Description};
+  }
+
+  # Generate fingerprints description...
+
+  return "$This->{Type}:$This->{EStateAtomTypesSetToUse}";
+}
+
+# Generate electrotopological state indicies (E-state) [ Ref 75-78 ] fingerprints for
+# non-hydrogen atoms in a molecule...
+#
+# EStateIndicies fingerprints constitute a vector containing sum of E-state values
+# for E-state atom types. Two types of E-state atom types set size are allowed:
+#
+# ArbitrarySize - Corrresponds to only E-state atom types detected in molecule
+# FixedSize - Corresponds to fixed number of E-state atom types previously defined
+#
+# Module AtomTypes::EStateAtomTypes.pm is used to assign E-state atom types to
+# non-hydrogen atoms in the molecule which is able to assign atom types to any valid
+# atom group. However, for FixedSize value of EStateAtomTypesSetToUse, only a fixed
+# set of E-state atom types corresponding to specific atom groups [ Appendix III in
+# Ref 77 ] are used for fingerprints.
+#
+# The fixed size E-state atom type set size used during generation of fingerprints corresponding
+# FixedSize value of EStateAtomTypesSetToUse contains 87 E-state non-hydrogen atom types
+# in EStateAtomTypes.csv data file distributed with MayaChemTools.
+#
+# Combination of Type and EStateAtomTypesSetToUse allow generation of 2 different types of
+# E-state indicies fingerprints:
+#
+# Type                        EStateAtomTypesSetToUse
+#
+# EStateIndicies               ArbitrarySize      [ default fingerprints ]
+# EStateIndicies               FixedSize
+#
+# The default is generate EStateIndicies type fingeprints corresponding to ArbitrarySize as
+# EStateAtomTypesSetToUse value.
+#
+#
+sub GenerateFingerprints {
+  my($This) = @_;
+
+  # Cache appropriate molecule data...
+  $This->_SetupMoleculeDataCache();
+
+  # Assign E-state atom types...
+  if (!$This->_AssignEStateAtomTypes()) {
+    carp "Warning: ${ClassName}->GenerateFingerprints: $This->{Type} fingerprints generation didn't succeed: Couldn't assign valid E-state atom types to all atoms...";
+    return $This;
+  }
+
+  # Calculate E-state indicies...
+  if (!$This->_CalculateEStateIndicies()) {
+    carp "Warning: ${ClassName}->GenerateFingerprints: $This->{Type} fingerprints generation didn't succeed: Couldn't calculate E-state values for all atoms...";
+    return $This;
+  }
+
+  # Set final fingerprints...
+  $This->_SetFinalFingerprints();
+
+  # Clear cached molecule data...
+  $This->_ClearMoleculeDataCache();
+
+  return $This;
+}
+
+# Assign E-state atom types...
+#
+sub _AssignEStateAtomTypes {
+  my($This) = @_;
+  my($EStateAtomTypes, $Atom, $AtomID, $AtomType);
+
+  %{$This->{EStateAtomTypes}} = ();
+
+  # Assign E-state atom types...
+  $EStateAtomTypes = new AtomTypes::EStateAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => 1);
+  $EStateAtomTypes->AssignAtomTypes();
+
+  # Make sure atom types assignment is successful...
+  if (!$EStateAtomTypes->IsAtomTypesAssignmentSuccessful()) {
+    return undef;
+  }
+
+  # Collect assigned atom types...
+  for $Atom (@{$This->{Atoms}}) {
+    $AtomID = $Atom->GetID();
+
+    $AtomType = $EStateAtomTypes->GetAtomType($Atom);
+    $This->{EStateAtomTypes}{$AtomID} = $AtomType;
+  }
+  return $This;
+}
+
+# Calculate E-state indicies by summing up E-state values for specific
+# E-state atom types...
+#
+sub _CalculateEStateIndicies {
+  my($This) = @_;
+  my($Atom, $AtomID, $AtomType, $EStateValue);
+
+  # Calculate E-state values to generate E-state indicies...
+  if (!$This->_CalculateEStateValuesDescriptors()) {
+    return undef;
+  }
+
+  # Calculate E-state indicies...
+  for $Atom (@{$This->{Atoms}}) {
+    $AtomID = $Atom->GetID();
+
+    $AtomType = $This->{EStateAtomTypes}{$AtomID};
+    $EStateValue = $This->{EStateValues}{$AtomID};
+
+    if (!exists $This->{EStateIndicies}{$AtomType}) {
+      $This->{EStateIndicies}{$AtomType} = 0;
+    }
+
+    $This->{EStateIndicies}{$AtomType} += $EStateValue;
+  }
+  return $This;
+}
+
+# Calculate E-state values for E-state indicies...
+#
+sub _CalculateEStateValuesDescriptors {
+  my($This) = @_;
+  my($EStateValuesDescriptors, $Atom, $AtomID, $EStateValue);
+
+  %{$This->{EStateValues}} = ();
+
+  # Calculate and assign E-state values...
+  $EStateValuesDescriptors = new AtomicDescriptors::EStateValuesDescriptors('Molecule' => $This->{Molecule});
+  $EStateValuesDescriptors->GenerateDescriptors();
+
+  # Make sure E-state values calculation is successful...
+  if (!$EStateValuesDescriptors->IsDescriptorsGenerationSuccessful()) {
+    return undef;
+  }
+
+  # Collect assigned E-state values...
+  for $Atom (@{$This->{Atoms}}) {
+    $AtomID = $Atom->GetID();
+    $EStateValue = $EStateValuesDescriptors->GetDescriptorValue($Atom);
+    $This->{EStateValues}{$AtomID} = $EStateValue;
+  }
+  return $This;
+}
+
+# Set final final fingerpritns for E-state indicies...
+#
+sub _SetFinalFingerprints {
+  my($This) = @_;
+  my($AtomType, $ValuesPrecision, $EStateAtomTypesDataRef, @Values, @IDs);
+
+  # Mark successful generation of fingerprints...
+  $This->{FingerprintsGenerated} = 1;
+
+  @Values = ();
+  @IDs = ();
+
+  $ValuesPrecision = $This->{ValuesPrecision};
+
+  if ($This->{EStateAtomTypesSetToUse} =~ /^FixedSize$/i) {
+    # Use fixed size E-state atom types set for non-hydrogen atoms...
+    for $AtomType (@{AtomTypes::EStateAtomTypes::GetAllPossibleEStateNonHydrogenAtomTypes()}) {
+      push @IDs, "S${AtomType}";
+      push @Values, exists($This->{EStateIndicies}{$AtomType}) ? MathUtil::round($This->{EStateIndicies}{$AtomType}, $ValuesPrecision) : 0;
+    }
+  }
+  else {
+    for $AtomType (sort keys %{$This->{EStateIndicies}}) {
+      push @IDs, "S${AtomType}";
+      push @Values, MathUtil::round($This->{EStateIndicies}{$AtomType}, $ValuesPrecision);
+    }
+  }
+
+  # Add IDs and values to fingerprint vector...
+  if (@IDs) {
+    $This->{FingerprintsVector}->AddValueIDs(\@IDs);
+  }
+  $This->{FingerprintsVector}->AddValues(\@Values);
+
+  return $This;
+}
+
+# Cache  appropriate molecule data...
+#
+sub _SetupMoleculeDataCache {
+  my($This) = @_;
+
+  # Get all non-hydrogen atoms...
+  my($NegateAtomCheckMethod);
+  $NegateAtomCheckMethod = 1;
+  @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms("IsHydrogen", $NegateAtomCheckMethod);
+
+  return $This;
+}
+
+# Clear cached molecule data...
+#
+sub _ClearMoleculeDataCache {
+  my($This) = @_;
+
+  @{$This->{Atoms}} = ();
+
+  return $This;
+}
+
+# Return a string containg data for EStateIndiciesFingerprints object...
+sub StringifyEStateIndiciesFingerprints {
+  my($This) = @_;
+  my($EStateIndiciesFingerprintsString);
+
+  # Type of Keys...
+  $EStateIndiciesFingerprintsString = "Type: $This->{Type}; EStateAtomTypesSetToUse: $This->{EStateAtomTypesSetToUse}";
+
+  # Fingerprint vector...
+  $EStateIndiciesFingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >";
+
+  return $EStateIndiciesFingerprintsString;
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+EStateIndiciesFingerprints
+
+=head1 SYNOPSIS
+
+use Fingerprints::EStateIndiciesFingerprints;
+
+use Fingerprints::EStateIndiciesFingerprints qw(:all);
+
+=head1 DESCRIPTION
+
+B<EStateIndiciesFingerprints> [ Ref 75-78 ] class provides the following methods:
+
+new, GenerateFingerprints, GetDescription, SetEStateAtomTypesSetToUse,
+SetValuesPrecision, StringifyEStateIndiciesFingerprints
+
+B<EStateIndiciesFingerprints> is derived from B<Fingerprints> class which in turn
+is  derived from B<ObjectProperty> base class that provides methods not explicitly defined
+in B<AtomNeighborhoodsFingerprints>, B<Fingerprints> or B<ObjectProperty> classes using Perl's
+AUTOLOAD functionality. These methods are generated on-the-fly for a specified object property:
+
+    Set<PropertyName>(<PropertyValue>);
+    $PropertyValue = Get<PropertyName>();
+    Delete<PropertyName>();
+
+E-state atom types are assigned to all non-hydrogen atoms in a molecule using module
+AtomTypes::EStateAtomTypes.pm and E-state values are calculated using module
+AtomicDescriptors::EStateValues.pm. Using E-state atom types and E-state values,
+B<EStateIndiciesFingerprints> constituting sum of E-state values for E-sate atom types
+are generated.
+
+Two types of E-state atom types set size are allowed:
+
+    ArbitrarySize - Corresponds to only E-state atom types detected
+                    in molecule
+    FixedSize - Corresponds to fixed number of E-state atom types previously
+                defined
+
+Module AtomTypes::EStateAtomTypes.pm, used to assign E-state atom types to
+non-hydrogen atoms in the molecule, is able to assign atom types to any valid
+atom group. However, for I<FixedSize> value of B<EStateAtomTypesSetToUse>, only a
+fixed set of E-state atom types corresponding to specific atom groups [ Appendix III in
+Ref 77 ] are used for fingerprints.
+
+The fixed size E-state atom type set size used during generation of fingerprints contains
+87 E-state non-hydrogen atom types in EStateAtomTypes.csv data file distributed with
+MayaChemTools.
+
+Combination of Type and EStateAtomTypesSetToUse allow generation of 2 different types of
+E-state indicies fingerprints:
+
+    Type                        EStateAtomTypesSetToUse
+
+    EStateIndicies              ArbitrarySize      [ default fingerprints ]
+    EStateIndicies              FixedSize
+
+The current release of MayaChemTools generates the following types of E-state
+fingerprints vector strings:
+
+    FingerprintsVector;EStateIndicies:ArbitrarySize;11;NumericalValues;IDs
+    AndValuesString;SaaCH SaasC SaasN SdO SdssC SsCH3 SsF SsOH SssCH2 SssN
+    H SsssCH;24.778 4.387 1.993 25.023 -1.435 3.975 14.006 29.759 -0.073 3
+    .024 -2.270
+
+    FingerprintsVector;EStateIndicies:FixedSize;87;OrderedNumericalValues;
+    ValuesString;0 0 0 0 0 0 0 3.975 0 -0.073 0 0 24.778 -2.270 0 0 -1.435
+    4.387 0 0 0 0 0 0 3.024 0 0 0 0 0 0 0 1.993 0 29.759 25.023 0 0 0 0 1
+    4.006 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+    0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+    FingerprintsVector;EStateIndicies:FixedSize;87;OrderedNumericalValues;
+    IDsAndValuesString;SsLi SssBe SssssBem SsBH2 SssBH SsssB SssssBm SsCH3
+    SdCH2 SssCH2 StCH SdsCH SaaCH SsssCH SddC StsC SdssC SaasC SaaaC Sssss
+    C SsNH3p SsNH2 SssNH2p SdNH SssNH SaaNH StN SsssNHp SdsN SaaN SsssN Sd
+    0 0 0 0 0 0 0 3.975 0 -0.073 0 0 24.778 -2.270 0 0 -1.435 4.387 0 0 0
+    0 0 0 3.024 0 0 0 0 0 0 0 1.993 0 29.759 25.023 0 0 0 0 14.006 0 0 0 0
+    0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0...
+
+=head2 METHODS
+
+=over 4
+
+=item B<new>
+
+    $EStateIndiciesFingerprints = new EStateIndiciesFingerprints(%NamesAndValues);
+
+Using specified I<EStateIndiciesFingerprints> property names and values hash, B<new> method creates a new object
+and returns a reference to newly created B<PathLengthFingerprints> object. By default, the
+following properties are initialized:
+
+    Molecule = '';
+    Type = 'EStateIndicies'
+    EStateAtomTypesSetToUse = 'ArbitrarySize'
+    ValuesPrecision = 3
+
+Examples:
+
+    $EStateIndiciesFingerprints = new AtomTypesFingerprints(
+                              'Molecule' => $Molecule,
+                              'EStateAtomTypesSetToUse' =>
+                                              'ArbitrarySize');
+
+    $EStateIndiciesFingerprints = new AtomTypesFingerprints(
+                              'Molecule' => $Molecule,
+                              'EStateAtomTypesSetToUse' =>
+                                              'FixedSize');
+
+    $EStateIndiciesFingerprints->GenerateFingerprints();
+    print "$EStateIndiciesFingerprints\n";
+
+=item B<GenerateFingerprints>
+
+    $EStateIndiciesFingerprints = $EStateIndiciesFingerprints->
+                                  GenerateEStateIndiciesFingerprints();
+
+Generates EState keys fingerprints and returns I<EStateIndiciesFingerprints>.
+
+=item B<GetDescription>
+
+    $Description = $EStateIndiciesFingerprints->GetDescription();
+
+Returns a string containing description of EState keys fingerprints.
+
+=item B<SetEStateAtomTypesSetToUse>
+
+    $EStateIndiciesFingerprints->SetEStateAtomTypesSetToUse($Value);
+
+Sets I<Value> of I<EStateAtomTypesSetToUse> and returns I<EStateIndiciesFingerprints>.
+Possible values: I<ArbitrarySize or FixedSize>. Default value: I<ArbitrarySize>.
+
+=item B<SetValuesPrecision>
+
+    $EStateIndiciesFingerprints->SetValuesPrecision($Precision);
+
+Sets precesion of E-state values to use during generation of E-state indices fingerprints
+and returns I<EStateIndiciesFingerprints>. Possible values: I<Positive integers>.
+Default value: I<3>.
+
+=item B<StringifyEStateIndiciesFingerprints>
+
+    $String = $EStateIndiciesFingerprints->StringifyEStateIndiciesFingerprints();
+
+Returns a string containing information about I<EStateIndiciesFingerprints> object.
+
+=back
+
+=head1 AUTHOR
+
+Manish Sud <msud@san.rr.com>
+
+=head1 SEE ALSO
+
+Fingerprints.pm, FingerprintsStringUtil.pm, AtomNeighborhoodsFingerprints.pm, AtomTypesFingerprints.pm,
+ExtendedConnectivityFingerprints.pm, MACCSKeys.pm, PathLengthFingerprints.pm,
+TopologicalAtomPairsFingerprints.pm, TopologicalAtomTripletsFingerprints.pm,
+TopologicalAtomTorsionsFingerprints.pm, TopologicalPharmacophoreAtomPairsFingerprints.pm,
+TopologicalPharmacophoreAtomTripletsFingerprints.pm
+
+=head1 COPYRIGHT
+
+Copyright (C) 2015 Manish Sud. All rights reserved.
+
+This file is part of MayaChemTools.
+
+MayaChemTools is free software; you can redistribute it and/or modify it under
+the terms of the GNU Lesser General Public License as published by the Free
+Software Foundation; either version 3 of the License, or (at your option)
+any later version.
+
+=cut