Mercurial > repos > deepakjadmin > mayatool3_test2
view lib/Fingerprints/EStateIndiciesFingerprints.pm @ 0:4816e4a8ae95 draft default tip
Uploaded
author | deepakjadmin |
---|---|
date | Wed, 20 Jan 2016 09:23:18 -0500 |
parents | |
children |
line wrap: on
line source
package Fingerprints::EStateIndiciesFingerprints; # # $RCSfile: EStateIndiciesFingerprints.pm,v $ # $Date: 2015/02/28 20:48:54 $ # $Revision: 1.19 $ # # Author: Manish Sud <msud@san.rr.com> # # Copyright (C) 2015 Manish Sud. All rights reserved. # # This file is part of MayaChemTools. # # MayaChemTools is free software; you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free # Software Foundation; either version 3 of the License, or (at your option) any # later version. # # MayaChemTools is distributed in the hope that it will be useful, but without # any warranty; without even the implied warranty of merchantability of fitness # for a particular purpose. See the GNU Lesser General Public License for more # details. # # You should have received a copy of the GNU Lesser General Public License # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, # Boston, MA, 02111-1307, USA. # use strict; use Carp; use Exporter; use Text::ParseWords; use TextUtil (); use FileUtil (); use MathUtil (); use Fingerprints::Fingerprints; use Molecule; use AtomTypes::EStateAtomTypes; use AtomicDescriptors::EStateValuesDescriptors; use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); @ISA = qw(Fingerprints::Fingerprints Exporter); @EXPORT = qw(); @EXPORT_OK = qw(); %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); # Setup class variables... my($ClassName); _InitializeClass(); # Overload Perl functions... use overload '""' => 'StringifyEStateIndiciesFingerprints'; # Class constructor... sub new { my($Class, %NamesAndValues) = @_; # Initialize object... my $This = $Class->SUPER::new(); bless $This, ref($Class) || $Class; $This->_InitializeEStateIndiciesFingerprints(); $This->_InitializeEStateIndiciesFingerprintsProperties(%NamesAndValues); return $This; } # Initialize object data... # sub _InitializeEStateIndiciesFingerprints { my($This) = @_; # EStateIndicies is a vector containing sum of E-state values for E-state atom types # $This->{Type} = 'EStateIndicies'; # EStateAtomTypesSetToUse for EStateIndicies: # # ArbitrarySize - Corrresponds to only E-state atom types detected in molecule # FixedSize - Corresponds to fixed number of E-state atom types previously defined [ Ref 77 ] # # The default EStateAtomTypesSetToUse value for EStateIndicies fingerprints type: ArbitrarySize. # Possible values: ArbitrarySize or FixedSize. # $This->{EStateAtomTypesSetToUse} = ''; # Assigned E-state atom types... %{$This->{EStateAtomTypes}} = (); # Vector values precision for real values during E-state indicies... $This->{ValuesPrecision} = 3; # Calculated E-state values and indicies for generating E-state indicies fingerprints... %{$This->{EStateValues}} = (); %{$This->{EStateIndicies}} = (); } # Initialize class ... sub _InitializeClass { #Class name... $ClassName = __PACKAGE__; } # Initialize object properties.... sub _InitializeEStateIndiciesFingerprintsProperties { my($This, %NamesAndValues) = @_; my($Name, $Value, $MethodName); while (($Name, $Value) = each %NamesAndValues) { $MethodName = "Set${Name}"; $This->$MethodName($Value); } # Make sure molecule object was specified... if (!exists $NamesAndValues{Molecule}) { croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule..."; } $This->_InitializeEstateIndicies(); return $This; } # Initialize E-state indicies... # sub _InitializeEstateIndicies { my($This) = @_; # Set default EStateAtomTypesSetToUse... if (!$This->{EStateAtomTypesSetToUse}) { $This->{EStateAtomTypesSetToUse} = 'ArbitrarySize'; } # Vector type... $This->{VectorType} = 'FingerprintsVector'; if ($This->{EStateAtomTypesSetToUse} =~ /^FixedSize$/i) { $This->{FingerprintsVectorType} = 'OrderedNumericalValues'; } else { $This->{FingerprintsVectorType} = 'NumericalValues'; } $This->_InitializeFingerprintsVector(); return $This; } # Disable set size method... # sub SetSize { my($This, $Type) = @_; croak "Error: ${ClassName}->SetSize: Can't change size: It's not allowed..."; } # Set E-state atom types set to use... # sub SetEStateAtomTypesSetToUse { my($This, $Value) = @_; if ($This->{EStateAtomTypesSetToUse}) { croak "Error: ${ClassName}->SetEStateAtomTypesSetToUse: Can't change size: It's already set..."; } if ($Value !~ /^(ArbitrarySize|FixedSize)/i) { croak "Error: ${ClassName}->SetEStateAtomTypesSetToUse: Unknown EStateAtomTypesSetToUse value: $Value; Supported values: ArbitrarySize or FixedSize"; } $This->{EStateAtomTypesSetToUse} = $Value; return $This; } # Set vector values precision for real values for E-state indicies... # sub SetValuesPrecision { my($This, $Value) = @_; if (!TextUtil::IsPositiveInteger($Value)) { croak "Error: ${ClassName}->SetValuesPrecision: ValuesPrecision value, $Value, is not valid: It must be a positive integer..."; } $This->{ValuesPrecision} = $Value; return $This; } # Generate fingerprints description... # sub GetDescription { my($This) = @_; # Is description explicity set? if (exists $This->{Description}) { return $This->{Description}; } # Generate fingerprints description... return "$This->{Type}:$This->{EStateAtomTypesSetToUse}"; } # Generate electrotopological state indicies (E-state) [ Ref 75-78 ] fingerprints for # non-hydrogen atoms in a molecule... # # EStateIndicies fingerprints constitute a vector containing sum of E-state values # for E-state atom types. Two types of E-state atom types set size are allowed: # # ArbitrarySize - Corrresponds to only E-state atom types detected in molecule # FixedSize - Corresponds to fixed number of E-state atom types previously defined # # Module AtomTypes::EStateAtomTypes.pm is used to assign E-state atom types to # non-hydrogen atoms in the molecule which is able to assign atom types to any valid # atom group. However, for FixedSize value of EStateAtomTypesSetToUse, only a fixed # set of E-state atom types corresponding to specific atom groups [ Appendix III in # Ref 77 ] are used for fingerprints. # # The fixed size E-state atom type set size used during generation of fingerprints corresponding # FixedSize value of EStateAtomTypesSetToUse contains 87 E-state non-hydrogen atom types # in EStateAtomTypes.csv data file distributed with MayaChemTools. # # Combination of Type and EStateAtomTypesSetToUse allow generation of 2 different types of # E-state indicies fingerprints: # # Type EStateAtomTypesSetToUse # # EStateIndicies ArbitrarySize [ default fingerprints ] # EStateIndicies FixedSize # # The default is generate EStateIndicies type fingeprints corresponding to ArbitrarySize as # EStateAtomTypesSetToUse value. # # sub GenerateFingerprints { my($This) = @_; # Cache appropriate molecule data... $This->_SetupMoleculeDataCache(); # Assign E-state atom types... if (!$This->_AssignEStateAtomTypes()) { carp "Warning: ${ClassName}->GenerateFingerprints: $This->{Type} fingerprints generation didn't succeed: Couldn't assign valid E-state atom types to all atoms..."; return $This; } # Calculate E-state indicies... if (!$This->_CalculateEStateIndicies()) { carp "Warning: ${ClassName}->GenerateFingerprints: $This->{Type} fingerprints generation didn't succeed: Couldn't calculate E-state values for all atoms..."; return $This; } # Set final fingerprints... $This->_SetFinalFingerprints(); # Clear cached molecule data... $This->_ClearMoleculeDataCache(); return $This; } # Assign E-state atom types... # sub _AssignEStateAtomTypes { my($This) = @_; my($EStateAtomTypes, $Atom, $AtomID, $AtomType); %{$This->{EStateAtomTypes}} = (); # Assign E-state atom types... $EStateAtomTypes = new AtomTypes::EStateAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => 1); $EStateAtomTypes->AssignAtomTypes(); # Make sure atom types assignment is successful... if (!$EStateAtomTypes->IsAtomTypesAssignmentSuccessful()) { return undef; } # Collect assigned atom types... for $Atom (@{$This->{Atoms}}) { $AtomID = $Atom->GetID(); $AtomType = $EStateAtomTypes->GetAtomType($Atom); $This->{EStateAtomTypes}{$AtomID} = $AtomType; } return $This; } # Calculate E-state indicies by summing up E-state values for specific # E-state atom types... # sub _CalculateEStateIndicies { my($This) = @_; my($Atom, $AtomID, $AtomType, $EStateValue); # Calculate E-state values to generate E-state indicies... if (!$This->_CalculateEStateValuesDescriptors()) { return undef; } # Calculate E-state indicies... for $Atom (@{$This->{Atoms}}) { $AtomID = $Atom->GetID(); $AtomType = $This->{EStateAtomTypes}{$AtomID}; $EStateValue = $This->{EStateValues}{$AtomID}; if (!exists $This->{EStateIndicies}{$AtomType}) { $This->{EStateIndicies}{$AtomType} = 0; } $This->{EStateIndicies}{$AtomType} += $EStateValue; } return $This; } # Calculate E-state values for E-state indicies... # sub _CalculateEStateValuesDescriptors { my($This) = @_; my($EStateValuesDescriptors, $Atom, $AtomID, $EStateValue); %{$This->{EStateValues}} = (); # Calculate and assign E-state values... $EStateValuesDescriptors = new AtomicDescriptors::EStateValuesDescriptors('Molecule' => $This->{Molecule}); $EStateValuesDescriptors->GenerateDescriptors(); # Make sure E-state values calculation is successful... if (!$EStateValuesDescriptors->IsDescriptorsGenerationSuccessful()) { return undef; } # Collect assigned E-state values... for $Atom (@{$This->{Atoms}}) { $AtomID = $Atom->GetID(); $EStateValue = $EStateValuesDescriptors->GetDescriptorValue($Atom); $This->{EStateValues}{$AtomID} = $EStateValue; } return $This; } # Set final final fingerpritns for E-state indicies... # sub _SetFinalFingerprints { my($This) = @_; my($AtomType, $ValuesPrecision, $EStateAtomTypesDataRef, @Values, @IDs); # Mark successful generation of fingerprints... $This->{FingerprintsGenerated} = 1; @Values = (); @IDs = (); $ValuesPrecision = $This->{ValuesPrecision}; if ($This->{EStateAtomTypesSetToUse} =~ /^FixedSize$/i) { # Use fixed size E-state atom types set for non-hydrogen atoms... for $AtomType (@{AtomTypes::EStateAtomTypes::GetAllPossibleEStateNonHydrogenAtomTypes()}) { push @IDs, "S${AtomType}"; push @Values, exists($This->{EStateIndicies}{$AtomType}) ? MathUtil::round($This->{EStateIndicies}{$AtomType}, $ValuesPrecision) : 0; } } else { for $AtomType (sort keys %{$This->{EStateIndicies}}) { push @IDs, "S${AtomType}"; push @Values, MathUtil::round($This->{EStateIndicies}{$AtomType}, $ValuesPrecision); } } # Add IDs and values to fingerprint vector... if (@IDs) { $This->{FingerprintsVector}->AddValueIDs(\@IDs); } $This->{FingerprintsVector}->AddValues(\@Values); return $This; } # Cache appropriate molecule data... # sub _SetupMoleculeDataCache { my($This) = @_; # Get all non-hydrogen atoms... my($NegateAtomCheckMethod); $NegateAtomCheckMethod = 1; @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms("IsHydrogen", $NegateAtomCheckMethod); return $This; } # Clear cached molecule data... # sub _ClearMoleculeDataCache { my($This) = @_; @{$This->{Atoms}} = (); return $This; } # Return a string containg data for EStateIndiciesFingerprints object... sub StringifyEStateIndiciesFingerprints { my($This) = @_; my($EStateIndiciesFingerprintsString); # Type of Keys... $EStateIndiciesFingerprintsString = "Type: $This->{Type}; EStateAtomTypesSetToUse: $This->{EStateAtomTypesSetToUse}"; # Fingerprint vector... $EStateIndiciesFingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >"; return $EStateIndiciesFingerprintsString; } 1; __END__ =head1 NAME EStateIndiciesFingerprints =head1 SYNOPSIS use Fingerprints::EStateIndiciesFingerprints; use Fingerprints::EStateIndiciesFingerprints qw(:all); =head1 DESCRIPTION B<EStateIndiciesFingerprints> [ Ref 75-78 ] class provides the following methods: new, GenerateFingerprints, GetDescription, SetEStateAtomTypesSetToUse, SetValuesPrecision, StringifyEStateIndiciesFingerprints B<EStateIndiciesFingerprints> is derived from B<Fingerprints> class which in turn is derived from B<ObjectProperty> base class that provides methods not explicitly defined in B<AtomNeighborhoodsFingerprints>, B<Fingerprints> or B<ObjectProperty> classes using Perl's AUTOLOAD functionality. These methods are generated on-the-fly for a specified object property: Set<PropertyName>(<PropertyValue>); $PropertyValue = Get<PropertyName>(); Delete<PropertyName>(); E-state atom types are assigned to all non-hydrogen atoms in a molecule using module AtomTypes::EStateAtomTypes.pm and E-state values are calculated using module AtomicDescriptors::EStateValues.pm. Using E-state atom types and E-state values, B<EStateIndiciesFingerprints> constituting sum of E-state values for E-sate atom types are generated. Two types of E-state atom types set size are allowed: ArbitrarySize - Corresponds to only E-state atom types detected in molecule FixedSize - Corresponds to fixed number of E-state atom types previously defined Module AtomTypes::EStateAtomTypes.pm, used to assign E-state atom types to non-hydrogen atoms in the molecule, is able to assign atom types to any valid atom group. However, for I<FixedSize> value of B<EStateAtomTypesSetToUse>, only a fixed set of E-state atom types corresponding to specific atom groups [ Appendix III in Ref 77 ] are used for fingerprints. The fixed size E-state atom type set size used during generation of fingerprints contains 87 E-state non-hydrogen atom types in EStateAtomTypes.csv data file distributed with MayaChemTools. Combination of Type and EStateAtomTypesSetToUse allow generation of 2 different types of E-state indicies fingerprints: Type EStateAtomTypesSetToUse EStateIndicies ArbitrarySize [ default fingerprints ] EStateIndicies FixedSize The current release of MayaChemTools generates the following types of E-state fingerprints vector strings: FingerprintsVector;EStateIndicies:ArbitrarySize;11;NumericalValues;IDs AndValuesString;SaaCH SaasC SaasN SdO SdssC SsCH3 SsF SsOH SssCH2 SssN H SsssCH;24.778 4.387 1.993 25.023 -1.435 3.975 14.006 29.759 -0.073 3 .024 -2.270 FingerprintsVector;EStateIndicies:FixedSize;87;OrderedNumericalValues; ValuesString;0 0 0 0 0 0 0 3.975 0 -0.073 0 0 24.778 -2.270 0 0 -1.435 4.387 0 0 0 0 0 0 3.024 0 0 0 0 0 0 0 1.993 0 29.759 25.023 0 0 0 0 1 4.006 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 FingerprintsVector;EStateIndicies:FixedSize;87;OrderedNumericalValues; IDsAndValuesString;SsLi SssBe SssssBem SsBH2 SssBH SsssB SssssBm SsCH3 SdCH2 SssCH2 StCH SdsCH SaaCH SsssCH SddC StsC SdssC SaasC SaaaC Sssss C SsNH3p SsNH2 SssNH2p SdNH SssNH SaaNH StN SsssNHp SdsN SaaN SsssN Sd 0 0 0 0 0 0 0 3.975 0 -0.073 0 0 24.778 -2.270 0 0 -1.435 4.387 0 0 0 0 0 0 3.024 0 0 0 0 0 0 0 1.993 0 29.759 25.023 0 0 0 0 14.006 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0... =head2 METHODS =over 4 =item B<new> $EStateIndiciesFingerprints = new EStateIndiciesFingerprints(%NamesAndValues); Using specified I<EStateIndiciesFingerprints> property names and values hash, B<new> method creates a new object and returns a reference to newly created B<PathLengthFingerprints> object. By default, the following properties are initialized: Molecule = ''; Type = 'EStateIndicies' EStateAtomTypesSetToUse = 'ArbitrarySize' ValuesPrecision = 3 Examples: $EStateIndiciesFingerprints = new AtomTypesFingerprints( 'Molecule' => $Molecule, 'EStateAtomTypesSetToUse' => 'ArbitrarySize'); $EStateIndiciesFingerprints = new AtomTypesFingerprints( 'Molecule' => $Molecule, 'EStateAtomTypesSetToUse' => 'FixedSize'); $EStateIndiciesFingerprints->GenerateFingerprints(); print "$EStateIndiciesFingerprints\n"; =item B<GenerateFingerprints> $EStateIndiciesFingerprints = $EStateIndiciesFingerprints-> GenerateEStateIndiciesFingerprints(); Generates EState keys fingerprints and returns I<EStateIndiciesFingerprints>. =item B<GetDescription> $Description = $EStateIndiciesFingerprints->GetDescription(); Returns a string containing description of EState keys fingerprints. =item B<SetEStateAtomTypesSetToUse> $EStateIndiciesFingerprints->SetEStateAtomTypesSetToUse($Value); Sets I<Value> of I<EStateAtomTypesSetToUse> and returns I<EStateIndiciesFingerprints>. Possible values: I<ArbitrarySize or FixedSize>. Default value: I<ArbitrarySize>. =item B<SetValuesPrecision> $EStateIndiciesFingerprints->SetValuesPrecision($Precision); Sets precesion of E-state values to use during generation of E-state indices fingerprints and returns I<EStateIndiciesFingerprints>. Possible values: I<Positive integers>. Default value: I<3>. =item B<StringifyEStateIndiciesFingerprints> $String = $EStateIndiciesFingerprints->StringifyEStateIndiciesFingerprints(); Returns a string containing information about I<EStateIndiciesFingerprints> object. =back =head1 AUTHOR Manish Sud <msud@san.rr.com> =head1 SEE ALSO Fingerprints.pm, FingerprintsStringUtil.pm, AtomNeighborhoodsFingerprints.pm, AtomTypesFingerprints.pm, ExtendedConnectivityFingerprints.pm, MACCSKeys.pm, PathLengthFingerprints.pm, TopologicalAtomPairsFingerprints.pm, TopologicalAtomTripletsFingerprints.pm, TopologicalAtomTorsionsFingerprints.pm, TopologicalPharmacophoreAtomPairsFingerprints.pm, TopologicalPharmacophoreAtomTripletsFingerprints.pm =head1 COPYRIGHT Copyright (C) 2015 Manish Sud. All rights reserved. This file is part of MayaChemTools. MayaChemTools is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. =cut