Mercurial > repos > deepakjadmin > mayatool3_test2
diff lib/Fingerprints/TopologicalPharmacophoreAtomTripletsFingerprints.pm @ 0:4816e4a8ae95 draft default tip
Uploaded
author | deepakjadmin |
---|---|
date | Wed, 20 Jan 2016 09:23:18 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/Fingerprints/TopologicalPharmacophoreAtomTripletsFingerprints.pm Wed Jan 20 09:23:18 2016 -0500 @@ -0,0 +1,1048 @@ +package Fingerprints::TopologicalPharmacophoreAtomTripletsFingerprints; +# +# $RCSfile: TopologicalPharmacophoreAtomTripletsFingerprints.pm,v $ +# $Date: 2015/02/28 20:48:54 $ +# $Revision: 1.30 $ +# +# Author: Manish Sud <msud@san.rr.com> +# +# Copyright (C) 2015 Manish Sud. All rights reserved. +# +# This file is part of MayaChemTools. +# +# MayaChemTools is free software; you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the Free +# Software Foundation; either version 3 of the License, or (at your option) any +# later version. +# +# MayaChemTools is distributed in the hope that it will be useful, but without +# any warranty; without even the implied warranty of merchantability of fitness +# for a particular purpose. See the GNU Lesser General Public License for more +# details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or +# write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, +# Boston, MA, 02111-1307, USA. +# + +use strict; +use Carp; +use Exporter; +use Fingerprints::Fingerprints; +use TextUtil (); +use MathUtil (); +use Molecule; +use AtomTypes::FunctionalClassAtomTypes; + +use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); + +@ISA = qw(Fingerprints::Fingerprints Exporter); +@EXPORT = qw(); +@EXPORT_OK = qw(); + +%EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); + +# Setup class variables... +my($ClassName); +_InitializeClass(); + +# Overload Perl functions... +use overload '""' => 'StringifyTopologicalPharmacophoreAtomTripletsFingerprints'; + +# Class constructor... +sub new { + my($Class, %NamesAndValues) = @_; + + # Initialize object... + my $This = $Class->SUPER::new(); + bless $This, ref($Class) || $Class; + $This->_InitializeTopologicalPharmacophoreAtomTripletsFingerprints(); + + $This->_InitializeTopologicalPharmacophoreAtomTripletsFingerprintsProperties(%NamesAndValues); + + return $This; +} + +# Initialize object data... +# +sub _InitializeTopologicalPharmacophoreAtomTripletsFingerprints { + my($This) = @_; + + # Type of fingerprint... + $This->{Type} = 'TopologicalPharmacophoreAtomTriplets'; + + # Type of vector... + $This->{VectorType} = 'FingerprintsVector'; + + # AtomTripletsSetSizeToUse... + # + # ArbitrarySize - Corrresponds to atom triplets with non-zero count + # FixedSize - Corresponds to all atom triplets with zero and non-zero count + # + # Possible values: ArbitrarySize or FixedSize. Default: ArbitrarySize + # + $This->{AtomTripletsSetSizeToUse} = ''; + + # + # OrderedNumericalValues - For ArbitrarySize value of AtomTripletsSetSizeToUse + # NumericalValues - For FixedSize value of AtomTripletsSetSizeToUse + # + # Possible values: OrderedNumericalValues or NumericalValues. Default: NumericalValues + # + $This->{FingerprintsVectorType} = ''; + + # Minimum and maximum bond distance between pharmacophore atom pairs corresponding to + # atom triplets and distance bin size used for binning distances. + # + # In order to distribute distance bins of equal size, the last bin is allowed to go past the + # maximum distance specified by upto distance bin size. + # + # The default MinDistance and MaxDistance values of 1 and 10 with DistanceBinSize of + # 2 [ Ref 70 ] generates the following 5 distance bins: [1, 2] [3, 4] [5, 6] [7, 8] [9 10] + # + $This->{MinDistance} = 1; + $This->{MaxDistance} = 10; + + # Distance bin size used for binning distances... + # + $This->{DistanceBinSize} = 2; + + # Determines whether to apply triangle inequality to distances triplets during basis set generation... + # + $This->{UseTriangleInequality} = 1; + + # Initialize pharmacophore atom types information... + $This->_InitializeToplogicalPharmacophoreAtomTypesInformation(); + + # Pharmacophore types assigned to each heavy atom... + # + %{$This->{AssignedAtomTypes}} = (); + + # All pharmacophore atom triplets between minimum and maximum distance... + # + %{$This->{AtomTriplets}} = (); + @{$This->{AtomTriplets}{IDs}} = (); + %{$This->{AtomTriplets}{Count}} = (); +} + +# Initialize class ... +sub _InitializeClass { + #Class name... + $ClassName = __PACKAGE__; +} + +# Initialize object properties.... +sub _InitializeTopologicalPharmacophoreAtomTripletsFingerprintsProperties { + my($This, %NamesAndValues) = @_; + + my($Name, $Value, $MethodName); + while (($Name, $Value) = each %NamesAndValues) { + $MethodName = "Set${Name}"; + $This->$MethodName($Value); + } + + # Make sure molecule object was specified... + if (!exists $NamesAndValues{Molecule}) { + croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule..."; + } + $This->_InitializeTopologicalPharmacophoreAtomTripletsFingerprintsVector(); + + return $This; +} + +# Initialize fingerprints vector... +# +sub _InitializeTopologicalPharmacophoreAtomTripletsFingerprintsVector { + my($This) = @_; + + if (!$This->{AtomTripletsSetSizeToUse}) { + $This->{AtomTripletsSetSizeToUse} = 'ArbitrarySize'; + } + + # Vector type and type of values... + $This->{VectorType} = 'FingerprintsVector'; + + if ($This->{AtomTripletsSetSizeToUse} =~ /^FixedSize$/i) { + $This->{FingerprintsVectorType} = 'OrderedNumericalValues'; + } + else { + $This->{FingerprintsVectorType} = 'NumericalValues'; + } + + $This->_InitializeFingerprintsVector(); +} + +# Set atom parits set size to use... +# +sub SetAtomTripletsSetSizeToUse { + my($This, $Value) = @_; + + if ($This->{AtomTripletsSetSizeToUse}) { + croak "Error: ${ClassName}->SetAtomTripletsSetSizeToUse: Can't change size: It's already set..."; + } + + if ($Value !~ /^(ArbitrarySize|FixedSize)$/i) { + croak "Error: ${ClassName}->SetAtomTripletsSetSizeToUse: Unknown AtomTripletsSetSizeToUse value: $Value; Supported values: ArbitrarySize or FixedSize"; + } + + $This->{AtomTripletsSetSizeToUse} = $Value; + + return $This; +} + +# Initialize topological atom types, generated by AtomTypes::FunctionalClassAtomTypes +# class, to use for atom triplets fingerprint generation... +# +# Let: +# HBD: HydrogenBondDonor +# HBA: HydrogenBondAcceptor +# PI : PositivelyIonizable +# NI : NegativelyIonizable +# Ar : Aromatic +# Hal : Halogen +# H : Hydrophobic +# RA : RingAtom +# CA : ChainAtom +# +# Then: +# +# Functiononal class atom type specification for an atom corresponds to: +# +# Ar.CA.H.HBA.HBD.Hal.NI.PI.RA +# +# Default pharmacophore atom types [ Ref 71 ] to use for atom triplets fingerprint generation +# are: HBD, HBA, PI, NI, H, Ar +# +# FunctionalAtomTypes are assigned using the following definitions [ Ref 60-61, Ref 65-66 ]: +# +# HydrogenBondDonor: NH, NH2, OH +# HydrogenBondAcceptor: N[!H], O +# PositivelyIonizable: +, NH2 +# NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH +# +sub _InitializeToplogicalPharmacophoreAtomTypesInformation { + my($This) = @_; + + # Default pharmacophore atom types to use for atom triplets fingerprint generation + # are: HBD, HBA, PI, NI, H, Ar + # + @{$This->{AtomTypesToUse}} = (); + @{$This->{AtomTypesToUse}} = sort ('HBD', 'HBA', 'PI', 'NI', 'H', 'Ar'); + + return $This; +} + +# Set atom types to use for atom triplets... +# +sub SetAtomTypesToUse { + my($This, @Values) = @_; + my($FirstValue, $TypeOfFirstValue, $AtomType, $SpecifiedAtomType, @SpecifiedAtomTypes, @AtomTypesToUse); + + if (!@Values) { + carp "Warning: ${ClassName}->SetAtomTypesToUse: No values specified..."; + return; + } + + $FirstValue = $Values[0]; + $TypeOfFirstValue = ref $FirstValue; + + @SpecifiedAtomTypes = (); + @AtomTypesToUse = (); + + if ($TypeOfFirstValue =~ /^ARRAY/) { + push @SpecifiedAtomTypes, @{$FirstValue}; + } + else { + push @SpecifiedAtomTypes, @Values; + } + + # Make sure specified AtomTypes are valid... + for $SpecifiedAtomType (@SpecifiedAtomTypes) { + if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedAtomType)) { + croak "Error: ${ClassName}->SetAtomTypesToUse: Specified atom type, $SpecifiedAtomType, is not supported...\n "; + } + $AtomType = $SpecifiedAtomType; + push @AtomTypesToUse, $AtomType; + } + + # Set atom types to use... + @{$This->{AtomTypesToUse}} = (); + push @{$This->{AtomTypesToUse}}, sort @AtomTypesToUse; + + return $This; +} + +# Set minimum distance for pharmacophore atom pairs in atom triplets... +# +sub SetMinDistance { + my($This, $Value) = @_; + + if (!TextUtil::IsPositiveInteger($Value)) { + croak "Error: ${ClassName}->SetMinDistance: MinDistance value, $Value, is not valid: It must be a positive integer..."; + } + $This->{MinDistance} = $Value; + + return $This; +} + +# Set maximum distance for pharmacophore atom pairs in atom triplets... +# +sub SetMaxDistance { + my($This, $Value) = @_; + + if (!TextUtil::IsPositiveInteger($Value)) { + croak "Error: ${ClassName}->SetMaxDistance: MaxDistance value, $Value, is not valid: It must be a positive integer..."; + } + $This->{MaxDistance} = $Value; + + return $This; +} + +# Set distance bin size for binning pharmacophore atom pair distances in atom triplets... +# +sub SetDistanceBinSize { + my($This, $Value) = @_; + + if (!TextUtil::IsPositiveInteger($Value)) { + croak "Error: ${ClassName}->SetDistanceBinSize: DistanceBinSize value, $Value, is not valid: It must be a positive integer..."; + } + $This->{DistanceBinSize} = $Value; + + return $This; +} + +# Generate fingerprints description... +# +sub GetDescription { + my($This) = @_; + + # Is description explicity set? + if (exists $This->{Description}) { + return $This->{Description}; + } + + # Generate fingerprints description... + + return "$This->{Type}:$This->{AtomTripletsSetSizeToUse}:MinDistance$This->{MinDistance}:MaxDistance$This->{MaxDistance}"; +} + +# Generate topological pharmacophore atom triplets [ Ref 66, Ref 68-71 ] fingerprints... +# +# Let: +# +# P = Any of the supported pharmacophore atom types +# +# Px = Pharmacophore atom x +# Py = Pharmacophore atom y +# Pz = Pharmacophore atom z +# +# Dxy = Distance or lower bound of binned distance between Px and Py +# Dxz = Distance or lower bound of binned distance between Px and Pz +# Dyz = Distance or lower bound of binned distance between Py and Pz +# +# Then: +# PxDyz-PyDxz-PzDxy = Pharmacophore atom triplet ID for atoms Px, Py and Pz +# +# For example: H1-H1-H1, H2-HBA-H2 and so on +# +# Methodology: +# . Generate a distance matrix. +# . Using specified minimum, maximum and distance bin size, generate a binned distance +# matrix from distance matrix. The lower distance bound on the distance bin is used +# in the binned distance matrix and atom triplet IDs. +# . Assign pharmacophore atom types to all the atoms. +# . Initialize pharmacophore atom triplets basis set for all unique triplets constituting +# atom pairs binned distances between minimum and maximum distance. +# . Optionally, trinagle inequality is also implied which means: +# . Distance or binned distance between any two pairs in a triplet must be less than the +# sum of distances or binned distances between other two pairs and greater than the +# difference of distances between other pairs. +# . Using binned distance matrix and pharmacophore atom types, count occurance of +# unique atom triplets. +# +# Notes: +# . Hydrogen atoms are ignored during the fingerprint generation. +# +sub GenerateFingerprints { + my($This) = @_; + + if ($This->{MinDistance} > $This->{MaxDistance}) { + croak "Error: ${ClassName}->GenerateTopologicalPharmacophoreAtomTripletsFingerprints: No fingerpritns generated: MinDistance, $This->{MinDistance}, must be <= MaxDistance, $This->{MaxDistance}..."; + } + + # Cache appropriate molecule data... + $This->_SetupMoleculeDataCache(); + + # Generate distance matrix... + if (!$This->_SetupDistanceMatrix()) { + carp "Warning: ${ClassName}->GenerateFingerprints: Fingerprints generation didn't succeed: Couldn't generate distance matrix..."; + return $This; + } + + # Generate binned distance matrix... + $This->_GenerateBinnedDistanceMatrix(); + + # Assign pharmacohore atom types to all heavy atoms... + $This->_AssignPharmacophoreAtomTypes(); + + # Initialize values of all possible pharmacohore atom triplets... + $This->_InitializePharmacophoreAtomTriplets(); + + # Count atom triplets... + $This->_CountPharmacohoreAtomTriplets(); + + # Set final fingerprints... + $This->_SetFinalFingerprints(); + + # Clear cached molecule data... + $This->_ClearMoleculeDataCache(); + + return $This; +} + +# Setup distance matrix... +# +sub _SetupDistanceMatrix { + my($This) = @_; + + $This->{DistanceMatrix} = $This->GetMolecule()->GetDistanceMatrix(); + + if (!$This->{DistanceMatrix}) { + return undef; + } + + return $This; +} + +# Generate binned distance matrix for distances with in the specified distance ranges... +# +sub _GenerateBinnedDistanceMatrix { + my($This) = @_; + my($DistanceMatrix, $BinnedDistanceMatrix, $NumOfRows, $NumOfCols, $RowIndex, $ColIndex, $SkipIndexCheck); + + $DistanceMatrix = $This->{DistanceMatrix}; + ($NumOfRows, $NumOfCols) = $DistanceMatrix->GetSize(); + + # Initialize binned distance matrix... + $BinnedDistanceMatrix = new Matrix($NumOfRows, $NumOfCols); + + # Setup distance to binned distance map... + my($BinnedDistance, $Distance, %DistanceToBinnedDistance); + %DistanceToBinnedDistance = (); + for ($BinnedDistance = $This->{MinDistance}; $BinnedDistance <= $This->{MaxDistance}; $BinnedDistance += $This->{DistanceBinSize}) { + for $Distance ($BinnedDistance .. ($BinnedDistance + $This->{DistanceBinSize} - 1)) { + $DistanceToBinnedDistance{$Distance} = $BinnedDistance; + } + } + + # Generate binned distance matrix... + $SkipIndexCheck = 0; + for $RowIndex (0 .. ($NumOfRows - 1) ) { + COLINDEX: for $ColIndex (($RowIndex + 1) .. ($NumOfCols - 1) ) { + $Distance = $DistanceMatrix->GetValue($RowIndex, $ColIndex, $SkipIndexCheck); + if ($Distance < $This->{MinDistance} || $Distance > $This->{MaxDistance}) { + next COLINDEX; + } + $BinnedDistance = $DistanceToBinnedDistance{$Distance}; + $BinnedDistanceMatrix->SetValue($RowIndex, $ColIndex, $BinnedDistance, $SkipIndexCheck); + $BinnedDistanceMatrix->SetValue($ColIndex, $RowIndex, $BinnedDistance, $SkipIndexCheck); + } + } + + $This->{BinnedDistanceMatrix} = $BinnedDistanceMatrix; + + return $This; +} + +# Assign pharmacohore atom types to all heavy atoms... +# +sub _AssignPharmacophoreAtomTypes { + my($This) = @_; + my($Atom, $AtomID, $AtomType, $FunctionalClassAtomTypes); + + # Assign topological pharmacophore atom types... + $FunctionalClassAtomTypes = new AtomTypes::FunctionalClassAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => 1, 'FunctionalClassesToUse' => $This->{AtomTypesToUse}); + $FunctionalClassAtomTypes->AssignAtomTypes(); + + %{$This->{AssignedAtomTypes}} = (); + + ATOM: for $Atom (@{$This->{Atoms}}) { + if ($Atom->IsHydrogen()) { + next ATOM; + } + $AtomID = $Atom->GetID(); + + my(@AtomTypes); + @AtomTypes = (); + + $AtomType = $FunctionalClassAtomTypes->GetAtomType($Atom); + if ($AtomType && $AtomType !~ /^None$/i) { + push @AtomTypes, split /\./, $AtomType; + } + # Assign phramacophore types list to atom... + $This->{AssignedAtomTypes}{$AtomID} = \@AtomTypes; + } + return $This; +} + +# Initialize pharmacophore atom triplets basis set for all unique triplets constituting atom pairs +# binned distances between minimum and maximum distance and optionally applying triangle +# inequality. The DistanceBinSize determines the size of the distance bins. The lower distance +# bound, along with specified pharmacophore types, is used during generation of atom triplet +# IDs. +# +# +sub _InitializePharmacophoreAtomTriplets { + my($This) = @_; + my($AtomType1, $AtomType2, $AtomType3, $BinnedDistance12, $BinnedDistance13, $BinnedDistance23, $AtomTripletID); + + # Initialize atom triplets information... + for ($BinnedDistance12 = $This->{MinDistance}; $BinnedDistance12 <= $This->{MaxDistance}; $BinnedDistance12 += $This->{DistanceBinSize}) { + for ($BinnedDistance13 = $This->{MinDistance}; $BinnedDistance13 <= $This->{MaxDistance}; $BinnedDistance13 += $This->{DistanceBinSize}) { + DISTANCE23: for ($BinnedDistance23 = $BinnedDistance12; $BinnedDistance23 <= $This->{MaxDistance}; $BinnedDistance23 += $This->{DistanceBinSize}) { + if ($This->{UseTriangleInequality} && !$This->_DoDistancesSatisfyTriangleInequality($BinnedDistance12, $BinnedDistance13, $BinnedDistance23)) { + next DISTANCE23; + } + for $AtomType1 (@{$This->{AtomTypesToUse}}) { + for $AtomType2 (@{$This->{AtomTypesToUse}}) { + ATOMTYPE3: for $AtomType3 (@{$This->{AtomTypesToUse}}) { + $AtomTripletID = $This->_GetAtomTripletID($AtomType1, $BinnedDistance23, $AtomType2, $BinnedDistance13, $AtomType3, $BinnedDistance12); + if (exists $This->{AtomTriplets}{Count}{$AtomTripletID}) { + next ATOMTYPE3; + } + # Unique atom triplets information... + push @{$This->{AtomTriplets}{IDs}}, $AtomTripletID; + $This->{AtomTriplets}{Count}{$AtomTripletID} = 0; + } + } + } + } + } + } + return $This; +} + +# Check triangle inequality... +# +sub _DoDistancesSatisfyTriangleInequality { + my($This, $Distance1, $Distance2, $Distance3) = @_; + + if ( !($Distance1 > abs($Distance2 - $Distance3) && $Distance1 < ($Distance2 + $Distance3)) ) { + return 0; + } + if ( !($Distance2 > abs($Distance1 - $Distance3) && $Distance2 < ($Distance1 + $Distance3)) ) { + return 0; + } + if ( !($Distance3 > abs($Distance1 - $Distance2) && $Distance3 < ($Distance1 + $Distance2)) ) { + return 0; + } + return 1; +} + +# Count pharmacophore atom triplets... +# +sub _CountPharmacohoreAtomTriplets { + my($This) = @_; + my($NumOfAtoms, $AtomIndex1, $AtomIndex2, $AtomIndex3, $AtomID1, $AtomID2, $AtomID3, $AtomType1, $AtomType2, $AtomType3, $BinnedDistance12, $BinnedDistance13, $BinnedDistance23, $SkipIndexCheck, $BinnedDistanceMatrix, $AtomTripletID); + + $NumOfAtoms = @{$This->{Atoms}}; + $BinnedDistanceMatrix = $This->{BinnedDistanceMatrix}; + $SkipIndexCheck = 0; + + ATOMINDEX1: for $AtomIndex1 (0 .. ($NumOfAtoms - 1)) { + $AtomID1 = $This->{AtomIndexToID}{$AtomIndex1}; + if ( !((exists($This->{AssignedAtomTypes}{$AtomID1}) && @{$This->{AssignedAtomTypes}{$AtomID1}})) ) { + next ATOMINDEX1; + } + + ATOMINDEX2: for $AtomIndex2 (($AtomIndex1 + 1) .. ($NumOfAtoms - 1)) { + $AtomID2 = $This->{AtomIndexToID}{$AtomIndex2}; + if ( !((exists($This->{AssignedAtomTypes}{$AtomID2}) && @{$This->{AssignedAtomTypes}{$AtomID2}})) ) { + next ATOMINDEX2; + } + $BinnedDistance12 = $BinnedDistanceMatrix->GetValue($AtomIndex1, $AtomIndex2, $SkipIndexCheck); + if ($BinnedDistance12 == 0) { + next ATOMINDEX2; + } + + ATOMINDEX3: for $AtomIndex3 (($AtomIndex2 + 1) .. ($NumOfAtoms - 1)) { + $AtomID3 = $This->{AtomIndexToID}{$AtomIndex3}; + if ( !((exists($This->{AssignedAtomTypes}{$AtomID3}) && @{$This->{AssignedAtomTypes}{$AtomID3}})) ) { + next ATOMINDEX3; + } + $BinnedDistance13 = $BinnedDistanceMatrix->GetValue($AtomIndex1, $AtomIndex3, $SkipIndexCheck); + $BinnedDistance23 = $BinnedDistanceMatrix->GetValue($AtomIndex2, $AtomIndex3, $SkipIndexCheck); + if ($BinnedDistance13 == 0 || $BinnedDistance23 == 0) { + next ATOMINDEX3; + } + if ($This->{UseTriangleInequality} && !$This->_DoDistancesSatisfyTriangleInequality($BinnedDistance12, $BinnedDistance13, $BinnedDistance23)) { + next ATOMINDEX3; + } + + # Go over possible pharmacohore triplets for the three pharmacophore atoms using the + # binned distances... + for $AtomType1 (@{$This->{AssignedAtomTypes}{$AtomID1}}) { + for $AtomType2 (@{$This->{AssignedAtomTypes}{$AtomID2}}) { + for $AtomType3 (@{$This->{AssignedAtomTypes}{$AtomID3}}) { + $AtomTripletID = $This->_GetAtomTripletID($AtomType1, $BinnedDistance23, $AtomType2, $BinnedDistance13, $AtomType3, $BinnedDistance12); + $This->{AtomTriplets}{Count}{$AtomTripletID} += 1; + } + } + } + } + } + } + return $This; +} + +# Set final fingerpritns vector... +# +sub _SetFinalFingerprints { + my($This) = @_; + my($UseArbitrarySetSize, $ID, $Value, @IDs, @Values); + + # Mark successful generation of fingerprints... + $This->{FingerprintsGenerated} = 1; + + # Is it an ArbitraySize atom triplets set size? + $UseArbitrarySetSize = $This->{AtomTripletsSetSizeToUse} =~ /^ArbitrarySize$/i ? 1 : 0; + + # Set atom triplet count values... + @IDs = (); @Values = (); + + if ($UseArbitrarySetSize) { + ID: for $ID (@{$This->{AtomTriplets}{IDs}}) { + $Value = $This->{AtomTriplets}{Count}{$ID}; + if ($Value == 0) { + next ID; + } + push @IDs, $ID; + push @Values, $Value; + } + } + else { + @Values = map { $This->{AtomTriplets}{Count}{$_} } @{$This->{AtomTriplets}{IDs}}; + } + + # Set atom triplet IDs for fingerprint vector... + if ($UseArbitrarySetSize) { + $This->{FingerprintsVector}->AddValueIDs(\@IDs); + } + else { + $This->{FingerprintsVector}->AddValueIDs(\@{$This->{AtomTriplets}{IDs}}); + } + + # Set atom triplets count values for fingerprint vector... + $This->{FingerprintsVector}->AddValues(\@Values); + + return $This; +} + +# Return an array or reference to an array containing atom triplet IDs... +# +sub GetAtomTripletIDs { + my($This) = @_; + + return wantarray ? @{$This->{AtomTriplets}{IDs}} : \@{$This->{AtomTriplets}{IDs}}; +} + +# Get pharmacophore atom triplet ID corresponding to atom types and distances +# corresponding to atom triplet... +# +sub _GetAtomTripletID { + my($This, $Px, $Dyz, $Py, $Dxz, $Pz, $Dxy) = @_; + my($AtomTripletID, @AtomIDs); + + @AtomIDs = (); + + @AtomIDs = sort("${Px}${Dyz}", "${Py}${Dxz}", "${Pz}${Dxy}"); + $AtomTripletID = join "-", @AtomIDs; + + return $AtomTripletID; +} + +# Cache appropriate molecule data... +# +sub _SetupMoleculeDataCache { + my($This) = @_; + + # Get all atoms including hydrogens to correctly map atom indices to atom IDs for + # usage of distance matrix. The hydrogen atoms are ignored during processing... + # + @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms(); + + # Get all atom IDs... + my(@AtomIDs); + @AtomIDs = (); + @AtomIDs = map { $_->GetID() } @{$This->{Atoms}}; + + # Set AtomIndex to AtomID hash... + %{$This->{AtomIndexToID}} = (); + @{$This->{AtomIndexToID}}{ (0 .. $#AtomIDs) } = @AtomIDs; + + return $This; +} + +# Clear cached molecule data... +# +sub _ClearMoleculeDataCache { + my($This) = @_; + + @{$This->{Atoms}} = (); + + return $This; +} + + +# Return a string containg data for TopologicalPharmacophoreAtomTripletsFingerprints object... +# +sub StringifyTopologicalPharmacophoreAtomTripletsFingerprints { + my($This) = @_; + my($FingerprintsString, $UseTriangleInequality); + + # Type of fingerprint... + $FingerprintsString = "Fingerprint type: $This->{Type}; AtomTripletsSetSizeToUse: $This->{AtomTripletsSetSizeToUse}"; + + # Distances information... + $FingerprintsString .= "; MinDistance: $This->{MinDistance}; MaxDistance: $This->{MaxDistance}; DistanceBinSize: $This->{DistanceBinSize}; UseTriangleInequality: " . ($This->{UseTriangleInequality} ? "Yes" : "No"); + + # Pharmacophore atom type labels and description... + my($AtomType, @AtomTypes, @AtomTypesOrder, %AvailableAtomTypes); + + @AtomTypesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder(); + %AvailableAtomTypes = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses(); + + @AtomTypes = (); + for $AtomType (@AtomTypesOrder) { + push @AtomTypes, "$AtomType: $AvailableAtomTypes{$AtomType}"; + } + + $FingerprintsString .= "; AtomTypesToUse: <" . TextUtil::JoinWords(\@{$This->{AtomTypesToUse}}, ", ", 0) . ">"; + $FingerprintsString .= "; AtomTypesOrder: <" . TextUtil::JoinWords(\@AtomTypesOrder, ", ", 0) . ">"; + $FingerprintsString .= "; AvailableAtomTypes: <" . TextUtil::JoinWords(\@AtomTypes, ", ", 0) . ">"; + + # Total number of pharmacophore atom triplets... + $FingerprintsString .= "; NumOfAtomTriplets: " . $This->{FingerprintsVector}->GetNumOfValues(); + + # FingerprintsVector... + $FingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >"; + + return $FingerprintsString; +} + +1; + +__END__ + +=head1 NAME + +TopologicalPharmacophoreAtomTripletsFingerprints + +=head1 SYNOPSIS + +use Fingerprints::TopologicalPharmacophoreAtomTripletsFingerprints; + +use Fingerprints::TopologicalPharmacophoreAtomTripletsFingerprints qw(:all); + +=head1 DESCRIPTION + +B<TopologicalPharmacophoreAtomTripletsFingerprints> [ Ref 66, Ref 68-71 ] class provides +the following methods: + +new, GenerateFingerprints, , GetDescription, GetAtomTripletIDs, +SetAtomTypesToUse, SetDistanceBinSize, SetMaxDistance, SetMinDistance, +StringifyTopologicalPharmacophoreAtomTripletsFingerprints + +B<TopologicalPharmacophoreAtomTripletsFingerprints> is derived from B<Fingerprints> class +which in turn is derived from B<ObjectProperty> base class that provides methods not explicitly +defined in B<TopologicalPharmacophoreAtomTripletsFingerprints>, B<Fingerprints> or B<ObjectProperty> +classes using Perl's AUTOLOAD functionality. These methods are generated on-the-fly for a specified +object property: + + Set<PropertyName>(<PropertyValue>); + $PropertyValue = Get<PropertyName>(); + Delete<PropertyName>(); + +Based on the values specified for B<AtomTypesToUse>, pharmacophore atom types are +assigned to all non-hydrogen atoms in a molecule and a distance matrix is generated. +Using B<MinDistance>, B<MaxDistance>, and B<DistanceBinSize> values, a +binned distance matrix is generated with lower bound on the distance bin as the distance +in distance matrix; the lower bound on the distance bin is also used as the distance between +atom pairs for generation of atom triplet identifiers. + +A pharmacophore atom triplets basis set is generated for all unique atom triplets constituting +atom pairs binned distances between B<--MinDistance> and B<--MaxDistance>. The value +of B<--UseTriangleInequality> determines whether the triangle inequality test is applied during +generation of atom triplets basis set. The lower distance bound, along with specified pharmacophore +types, is used during generation of atom triplet IDs. + + Let: + + P = Valid pharmacophore atom type + + Px = Pharmacophore atom x + Py = Pharmacophore atom y + Pz = Pharmacophore atom z + + Dmin = Minimum distance corresponding to number of bonds between two atoms + Dmax = Maximum distance corresponding to number of bonds between two atoms + D = Distance corresponding to number of bonds between two atom + + Bsize = Distance bin size + Nbins = Number of distance bins + + Dxy = Distance or lower bound of binned distance between Px and Py + Dxz = Distance or lower bound of binned distance between Px and Pz + Dyz = Distance or lower bound of binned distance between Py and Pz + + Then: + + PxDyz-PyDxz-PzDxy = Pharmacophore atom triplet IDs for atom types Px, + Py, and Pz + + For example: H1-H1-H1, H2-HBA-H2 and so on. + + For default values of Dmin = 1 , Dmax = 10 and Bsize = 2, the number of + distance bins, Nbins = 5, are: + + [1, 2] [3, 4] [5, 6] [7, 8] [9 10] + + and atom triplet basis set size is 2692. + + Atom triplet basis set size for various values of Dmin, Dmax and Bsize in + conjunction with usage of triangle inequality is: + + Dmin Dmax Bsize UseTriangleInequality TripletBasisSetSize + 1 10 2 No 4960 + 1 10 2 Yes 2692 [ Default ] + 2 12 2 No 8436 + 2 12 2 Yes 4494 + + +Using binned distance matrix and pharmacohore atom types, occurrence of unique pharmacohore +atom triplets is counted. + +The final pharmacophore atom triples count along with atom pair identifiers involving all non-hydrogen +atoms constitute pharmacophore topological atom triplets fingerprints of the molecule. + +For I<ArbitrarySize> value of B<AtomTripletsSetSizeToUse>, the fingerprint vector correspond to +only those topological pharmacophore atom triplets which are present and have non-zero count. However, +for I<FixedSize> value of B<AtomTripletsSetSizeToUse>, the fingerprint vector contains all possible +valid topological pharmacophore atom triplets with both zero and non-zero count values. + +The current release of MayaChemTools generates the following types of topological pharmacophore +atom triplets fingerprints vector strings: + + FingerprintsVector;TopologicalPharmacophoreAtomTriplets:ArbitrarySize: + MinDistance1:MaxDistance10;696;NumericalValues;IDsAndValuesString;Ar1- + Ar1-Ar1 Ar1-Ar1-H1 Ar1-Ar1-HBA1 Ar1-Ar1-HBD1 Ar1-H1-H1 Ar1-H1-HBA1 Ar1 + -H1-HBD1 Ar1-HBA1-HBD1 H1-H1-H1 H1-H1-HBA1 H1-H1-HBD1 H1-HBA1-HBA1 H1- + HBA1-HBD1 H1-HBA1-NI1 H1-HBD1-NI1 HBA1-HBA1-NI1 HBA1-HBD1-NI1 Ar1-...; + 46 106 8 3 83 11 4 1 21 5 3 1 2 2 1 1 1 100 101 18 11 145 132 26 14 23 + 28 3 3 5 4 61 45 10 4 16 20 7 5 1 3 4 5 3 1 1 1 1 5 4 2 1 2 2 2 1 1 1 + 119 123 24 15 185 202 41 25 22 17 3 5 85 95 18 11 23 17 3 1 1 6 4 ... + + FingerprintsVector;TopologicalPharmacophoreAtomTriplets:FixedSize:MinD + istance1:MaxDistance10;2692;OrderedNumericalValues;ValuesString;46 106 + 8 3 0 0 83 11 4 0 0 0 1 0 0 0 0 0 0 0 0 21 5 3 0 0 1 2 2 0 0 1 0 0 0 + 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 100 101 18 11 0 0 145 132 26 + 14 0 0 23 28 3 3 0 0 5 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 61 45 10 4 0 + 0 16 20 7 5 1 0 3 4 5 3 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 5 ... + + FingerprintsVector;TopologicalPharmacophoreAtomTriplets:FixedSize:MinD + istance1:MaxDistance10;2692;OrderedNumericalValues;IDsAndValuesString; + Ar1-Ar1-Ar1 Ar1-Ar1-H1 Ar1-Ar1-HBA1 Ar1-Ar1-HBD1 Ar1-Ar1-NI1 Ar1-Ar1-P + I1 Ar1-H1-H1 Ar1-H1-HBA1 Ar1-H1-HBD1 Ar1-H1-NI1 Ar1-H1-PI1 Ar1-HBA1-HB + A1 Ar1-HBA1-HBD1 Ar1-HBA1-NI1 Ar1-HBA1-PI1 Ar1-HBD1-HBD1 Ar1-HBD1-...; + 46 106 8 3 0 0 83 11 4 0 0 0 1 0 0 0 0 0 0 0 0 21 5 3 0 0 1 2 2 0 0 1 + 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 100 101 18 11 0 0 145 + 132 26 14 0 0 23 28 3 3 0 0 5 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 61 ... + +=head2 METHODS + +=over 4 + +=item B<new> + + $TPATFP = new TopologicalPharmacophoreAtomTripletsFingerprints( + %NamesAndValues); + +Using specified I<TopologicalPharmacophoreAtomTripletsFingerprints> property names and values hash, B<new> +method creates a new object and returns a reference to newly created B<TopologicalPharmacophoreAtomTripletsFingerprints> +object. By default, the following properties are initialized: + + Molecule = '' + Type = 'TopologicalPharmacophoreAtomTriplets' + MinDistance = 1 + MaxDistance = 10 + DistanceBinSize = 2 + UseTriangleInequality = 1 + AtomTypesToUse = ['HBD', 'HBA', 'PI', 'NI', 'H', 'Ar'] + +Examples: + + $TPATFP = new TopologicalPharmacophoreAtomTripletsFingerprints( + 'Molecule' => $Molecule); + + $TPATFP = new TopologicalPharmacophoreAtomTripletsFingerprints( + 'Molecule' => $Molecule, + 'AtomTripletsSetSizeToUse' => 'ArbitrarySize'; + 'MinDistance' => 1, + 'MaxDistance' => 10, + 'DistanceBinSize' => 2, + 'AtomTypesToUse' => ['HBD', 'HBA', 'PI', 'NI', 'H', 'Ar'], + 'UseTriangleInequality' => 1); + + $TPATFP = new TopologicalPharmacophoreAtomTripletsFingerprints( + 'Molecule' => $Molecule, + 'AtomTripletsSetSizeToUse' => 'FixedSize'; + 'MinDistance' => 1, + 'MaxDistance' => 10, + 'DistanceBinSize' => 2, + 'AtomTypesToUse' => ['HBD', 'HBA', 'PI', 'NI', 'H', 'Ar'], + 'UseTriangleInequality' => 1); + + $TPATFP->GenerateFingerprints(); + print "$TPATFP\n"; + +=item B<GetDescription> + + $Description = $TopologicalPharmacophoreAtomTripletsFP->GetDescription(); + +Returns a string containing description of topological pharmacophore atom triplets fingerprints. + +=item B<GenerateFingerprints> + + $TopologicalPharmacophoreAtomTripletsFP->GenerateFingerprints(); + +Generates topological pharmacophore atom triplets fingerprints and returns +I<TopologicalPharmacophoreAtomTripletsFP>. + +=item B<GetAtomTripletIDs> + + $AtomTripletsIDsRef = $TopologicalPharmacophoreATFP->GetAtomTripletIDs(); + @AtomTripletIDs = $TopologicalPharmacophoreATFP->GetAtomTripletIDs(); + +Returns atom triplet IDs corresponding to atom pairs count values in topological pharmacophore +atom triplet fingerprints vector as an array or reference to an array. + +=item B<AtomTripletsSetSizeToUse> + + $TPAFP->AtomTripletsSetSizeToUse($Values); + +Sets pharmacophore atom triplets set size to use for topological pharmacophore fingerprints +generation and returns I<TopologicalPharmacophoreAtomTripletsFingerprints>. + +Possible values for pharmacophore atom triplets set size are: I<ArbitrarySize, FizedSize>. +Default value: I<ArbitrarySize>. + +For I<ArbitrarySize> value of B<AtomTripletsSetSizeToUse>, the fingerprint vector correspond to +only those topological pharmacophore atom triplets which are present and have non-zero count. However, +for I<FixedSize> value of B<AtomTripletsSetSizeToUse>, the fingerprint vector contains all possible +valid topological pharmacophore atom triplets with both zero and non-zero count values. + +=item B<SetAtomTypesToUse> + + $TopologicalPharmacophoreAtomTripletsFP->SetAtomTypesToUse($ValuesRef); + $TopologicalPharmacophoreAtomTripletsFP->SetAtomTypesToUse(@Values); + +Sets pharmacophore atom types to use for topological pharmacophore fingerprints +generation and returns I<TopologicalPharmacophoreAtomTripletsFingerprints>. + +Possible values for pharmacophore atom types are: I<Ar, CA, H, HBA, HBD, Hal, NI, PI, RA>. +Default value [ Ref 71 ] : I<HBD,HBA,PI,NI,H,Ar>. + +The pharmacophore atom types abbreviations correspond to: + + HBD: HydrogenBondDonor + HBA: HydrogenBondAcceptor + PI : PositivelyIonizable + NI : NegativelyIonizable + Ar : Aromatic + Hal : Halogen + H : Hydrophobic + RA : RingAtom + CA : ChainAtom + +I<AtomTypes::FunctionalClassAtomTypes> module is used to assign pharmacophore atom +types. It uses following definitions [ Ref 60-61, Ref 65-66 ]: + + HydrogenBondDonor: NH, NH2, OH + HydrogenBondAcceptor: N[!H], O + PositivelyIonizable: +, NH2 + NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH + + +=item B<SetDistanceBinSize> + + $TopologicalPharmacophoreAtomTripletsFP->SetDistanceBinSize($Value); + +Sets distance bin size used to bin distances between atom pairs in atom triplets and returns +I<TopologicalPharmacophoreAtomTriplesFP>. + +For default B<MinDistance> and B<MaxDistance> values of 1 and 10 with B<DistanceBinSize> +of 2 [ Ref 70 ], the following 5 distance bins are generated: + + [1, 2] [3, 4] [5, 6] [7, 8] [9 10] + +The lower distance bound on the distance bin is uses to bin the distance between atom pairs in +atom triplets. So in the previous example, atom pairs with distances 1 and 2 fall in first distance +bin, atom pairs with distances 3 and 4 fall in second distance bin and so on. + +In order to distribute distance bins of equal size, the last bin is allowed to go past B<MaxDistance> +by up to distance bin size. For example, B<MinDistance> and B<MaxDistance> values of 2 and 10 +with B<DistanceBinSize> of 2 generates the following 6 distance bins: + + [2, 3] [4, 5] [6, 7] [8, 9] [10 11] + + +=item B<SetMaxDistance> + + $TopologicalPharmacophoreAtomTriplesFP->SetMaxDistance($Value); + +Sets maximum bond distance between atom pairs corresponding to atom triplets for +generating topological pharmacophore atom triplets fingerprints and returns +I<TopologicalPharmacophoreAtomTriplesFP>. + +=item B<SetMinDistance> + + $TopologicalPharmacophoreAtomTriplesFP->SetMinDistance($Value); + +Sets minimum bond distance between atom pairs corresponding to atom triplets for +generating topological pharmacophore atom triplets fingerprints and returns +I<TopologicalPharmacophoreAtomTriplesFP>. + +=item B<StringifyTopologicalPharmacophoreAtomTripletsFingerprints> + + $String = $TopologicalPharmacophoreAtomTripletsFingerprints-> + StringifyTopologicalPharmacophoreAtomTripletsFingerprints(); + +Returns a string containing information about I<TopologicalPharmacophoreAtomTripletsFingerprints> object. + +=back + +=head1 AUTHOR + +Manish Sud <msud@san.rr.com> + +=head1 SEE ALSO + +Fingerprints.pm, FingerprintsStringUtil.pm, AtomNeighborhoodsFingerprints.pm, +AtomTypesFingerprints.pm, EStateIndiciesFingerprints.pm, ExtendedConnectivityFingerprints.pm, +MACCSKeys.pm, PathLengthFingerprints.pm, TopologicalAtomPairsFingerprints.pm, +TopologicalAtomTripletsFingerprints.pm, TopologicalAtomTorsionsFingerprints.pm, +TopologicalPharmacophoreAtomPairsFingerprints.pm, + +=head1 COPYRIGHT + +Copyright (C) 2015 Manish Sud. All rights reserved. + +This file is part of MayaChemTools. + +MayaChemTools is free software; you can redistribute it and/or modify it under +the terms of the GNU Lesser General Public License as published by the Free +Software Foundation; either version 3 of the License, or (at your option) +any later version. + +=cut