Mercurial > repos > deepakjadmin > mayatool3_test2
comparison lib/MolecularDescriptors/MolecularComplexityDescriptors.pm @ 0:4816e4a8ae95 draft default tip
Uploaded
| author | deepakjadmin |
|---|---|
| date | Wed, 20 Jan 2016 09:23:18 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:4816e4a8ae95 |
|---|---|
| 1 package MolecularDescriptors::MolecularComplexityDescriptors; | |
| 2 # | |
| 3 # $RCSfile: MolecularComplexityDescriptors.pm,v $ | |
| 4 # $Date: 2015/02/28 20:49:20 $ | |
| 5 # $Revision: 1.15 $ | |
| 6 # | |
| 7 # Author: Manish Sud <msud@san.rr.com> | |
| 8 # | |
| 9 # Copyright (C) 2015 Manish Sud. All rights reserved. | |
| 10 # | |
| 11 # This file is part of MayaChemTools. | |
| 12 # | |
| 13 # MayaChemTools is free software; you can redistribute it and/or modify it under | |
| 14 # the terms of the GNU Lesser General Public License as published by the Free | |
| 15 # Software Foundation; either version 3 of the License, or (at your option) any | |
| 16 # later version. | |
| 17 # | |
| 18 # MayaChemTools is distributed in the hope that it will be useful, but without | |
| 19 # any warranty; without even the implied warranty of merchantability of fitness | |
| 20 # for a particular purpose. See the GNU Lesser General Public License for more | |
| 21 # details. | |
| 22 # | |
| 23 # You should have received a copy of the GNU Lesser General Public License | |
| 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or | |
| 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, | |
| 26 # Boston, MA, 02111-1307, USA. | |
| 27 # | |
| 28 | |
| 29 use strict; | |
| 30 use Carp; | |
| 31 use Exporter; | |
| 32 use Scalar::Util (); | |
| 33 use TextUtil (); | |
| 34 use MathUtil (); | |
| 35 use Atom; | |
| 36 use Molecule; | |
| 37 use MolecularDescriptors::MolecularDescriptors; | |
| 38 use AtomTypes::AtomicInvariantsAtomTypes; | |
| 39 use AtomTypes::FunctionalClassAtomTypes; | |
| 40 use Fingerprints::AtomTypesFingerprints; | |
| 41 use Fingerprints::ExtendedConnectivityFingerprints; | |
| 42 use Fingerprints::MACCSKeys; | |
| 43 use Fingerprints::PathLengthFingerprints; | |
| 44 use Fingerprints::TopologicalAtomPairsFingerprints; | |
| 45 use Fingerprints::TopologicalAtomTripletsFingerprints; | |
| 46 use Fingerprints::TopologicalAtomTorsionsFingerprints; | |
| 47 use Fingerprints::TopologicalPharmacophoreAtomPairsFingerprints; | |
| 48 use Fingerprints::TopologicalPharmacophoreAtomTripletsFingerprints; | |
| 49 | |
| 50 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); | |
| 51 | |
| 52 @ISA = qw(MolecularDescriptors::MolecularDescriptors Exporter); | |
| 53 @EXPORT = qw(); | |
| 54 @EXPORT_OK = qw(GetDescriptorNames GetMolecularComplexityTypeAbbreviation); | |
| 55 | |
| 56 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); | |
| 57 | |
| 58 # Setup class variables... | |
| 59 my($ClassName, @DescriptorNames); | |
| 60 _InitializeClass(); | |
| 61 | |
| 62 # Overload Perl functions... | |
| 63 use overload '""' => 'StringifyMolecularComplexityDescriptors'; | |
| 64 | |
| 65 # Class constructor... | |
| 66 sub new { | |
| 67 my($Class, %NamesAndValues) = @_; | |
| 68 | |
| 69 # Initialize object... | |
| 70 my $This = $Class->SUPER::new(); | |
| 71 bless $This, ref($Class) || $Class; | |
| 72 $This->_InitializeMolecularComplexityDescriptors(); | |
| 73 | |
| 74 $This->_InitializeMolecularComplexityDescriptorsProperties(%NamesAndValues); | |
| 75 | |
| 76 return $This; | |
| 77 } | |
| 78 | |
| 79 # Initialize class ... | |
| 80 sub _InitializeClass { | |
| 81 #Class name... | |
| 82 $ClassName = __PACKAGE__; | |
| 83 | |
| 84 # Descriptor names... | |
| 85 @DescriptorNames = ('MolecularComplexity'); | |
| 86 | |
| 87 } | |
| 88 | |
| 89 # Get descriptor names as an array. | |
| 90 # | |
| 91 # This functionality can be either invoked as a class function or an | |
| 92 # object method. | |
| 93 # | |
| 94 sub GetDescriptorNames { | |
| 95 return @DescriptorNames; | |
| 96 } | |
| 97 | |
| 98 # Initialize object data... | |
| 99 # | |
| 100 sub _InitializeMolecularComplexityDescriptors { | |
| 101 my($This) = @_; | |
| 102 | |
| 103 # Type of MolecularDescriptor... | |
| 104 $This->{Type} = 'MolecularComplexity'; | |
| 105 | |
| 106 # | |
| 107 # The current release of MayaChemTools supports calculation of molecular complexity | |
| 108 # corresponding to number of bits-set or unique keys [ Ref 117-119 ] in molecular | |
| 109 # fingerprints. The following types of fingerprints based molecular complexity measures | |
| 110 # are supported: | |
| 111 # | |
| 112 # AtomTypesFingerprints | |
| 113 # ExtendedConnectivityFingerprints | |
| 114 # MACCSKeys | |
| 115 # PathLengthFingerprints | |
| 116 # TopologicalAtomPairsFingerprints | |
| 117 # TopologicalAtomTripletsFingerprints | |
| 118 # TopologicalAtomTorsionsFingerprints | |
| 119 # TopologicalPharmacophoreAtomPairsFingerprints | |
| 120 # TopologicalPharmacophoreAtomTripletsFingerprints | |
| 121 # | |
| 122 # Default: MACCSKeys | |
| 123 # | |
| 124 $This->{MolecularComplexityType} = ''; | |
| 125 | |
| 126 # Atom types to use for generating fingerprints... | |
| 127 # | |
| 128 # Currently supported values are: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, | |
| 129 # EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, | |
| 130 # SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes | |
| 131 # | |
| 132 # Notes: | |
| 133 # . AtomicInvariantsAtomTypes for all supported MolecularComplexityType except for | |
| 134 # TopologicalPharmacophoreAtomPairsFingerprints and TopologicalPharmacophoreAtomTripletsFingerprints | |
| 135 # . This value is not used for MACCSKeys | |
| 136 # . FunctionalClassAtomTypes is the only valid value during topological pharmacophore fingerprints. | |
| 137 # | |
| 138 # . Default values for AtomicInvariantsToUse and FunctionalClassesToUse are set appropriately | |
| 139 # for different types of fingerprints as shown below. | |
| 140 # | |
| 141 # MolecularComplexityType AtomicInvariantsToUse | |
| 142 # | |
| 143 # AtomTypesFingerprints AS, X, BO, H, FC | |
| 144 # TopologicalAtomPairsFingerprints AS, X, BO, H, FC | |
| 145 # TopologicalAtomTripletsFingerprints AS, X, BO, H, FC | |
| 146 # TopologicalAtomTorsionsFingerprints AS, X, BO, H, FC | |
| 147 # | |
| 148 # ExtendedConnectivityFingerprints AS, X, BO, H, FC, MN | |
| 149 # PathLengthFingerprints AS | |
| 150 # | |
| 151 # Default for FunctionalClassesToUse for all fingerprints is set to: | |
| 152 # | |
| 153 # HBD, HBA, PI, NI, Ar, Hal | |
| 154 # | |
| 155 # except for the following two MolecularComplexityType fingerprints: | |
| 156 # | |
| 157 # TopologicalPharmacophoreAtomPairsFingerprints HBD, HBA, PI, NI, H | |
| 158 # TopologicalPharmacophoreAtomTripletsFingerprints HBD, HBA, PI, NI, H, Ar | |
| 159 # | |
| 160 $This->{AtomIdentifierType} = ''; | |
| 161 | |
| 162 # Size of MACCS key set: 166 or 322... | |
| 163 # | |
| 164 $This->{MACCSKeysSize} = 166; | |
| 165 | |
| 166 # Atomic neighborhoods radius for extended connectivity fingerprints... | |
| 167 $This->{NeighborhoodRadius} = 2; | |
| 168 | |
| 169 # Minimum and maximum path lengths to use for path length fingerprints... | |
| 170 $This->{MinPathLength} = 1; | |
| 171 $This->{MaxPathLength} = 8; | |
| 172 | |
| 173 # By default bond symbols are included in atom path strings used to generate path length | |
| 174 # fingerprints... ... | |
| 175 $This->{UseBondSymbols} = 1; | |
| 176 | |
| 177 # Minimum and maximum bond distance between atom pairs during topological | |
| 178 # atom pairs/triplets fingerprints... | |
| 179 $This->{MinDistance} = 1; | |
| 180 $This->{MaxDistance} = 10; | |
| 181 | |
| 182 # Determines whether to apply triangle inequality to distance triplets... | |
| 183 # | |
| 184 # Default for TopologicalAtomTripletsFingerprints: 0 | |
| 185 # Default for TopologicalPharmacophoreAtomTripletsFingerprints: 1 | |
| 186 # | |
| 187 $This->{UseTriangleInequality} = ''; | |
| 188 | |
| 189 # Distance bin size used for binning distances during generation of | |
| 190 # topological pharmacophore atom triplets fingerprints... | |
| 191 # | |
| 192 $This->{DistanceBinSize} = 2; | |
| 193 | |
| 194 # Normalization methodology to use for scaling the number of bits-set or unique keys | |
| 195 # for: | |
| 196 # | |
| 197 # ExtendedConnectivityFingerprints | |
| 198 # TopologicalPharmacophoreAtomPairsFingerprints | |
| 199 # TopologicalPharmacophoreAtomTripletsFingerprints | |
| 200 # | |
| 201 # This option is gnored for all other types of fingerprints. | |
| 202 # | |
| 203 # Possible values during extended connectivity fingerprints: None or ByHeavyAtomsCount. Default: | |
| 204 # None. | |
| 205 # | |
| 206 # Possible values during topological pharmacophore atom pairs and tripletes fingerprints: None, | |
| 207 # or ByPossibleKeysCount. Default: None. ByPossibleKeysCount corresponds to total number of | |
| 208 # possible topological pharmacophore atom pairs or triplets in a molecule. | |
| 209 # | |
| 210 # | |
| 211 $This->{NormalizationMethodology} = 'None'; | |
| 212 | |
| 213 # Intialize descriptor names and values... | |
| 214 $This->_InitializeDescriptorNamesAndValues(@DescriptorNames); | |
| 215 | |
| 216 return $This; | |
| 217 } | |
| 218 | |
| 219 # Initialize object properties... | |
| 220 # | |
| 221 sub _InitializeMolecularComplexityDescriptorsProperties { | |
| 222 my($This, %NamesAndValues) = @_; | |
| 223 | |
| 224 my($Name, $Value, $MethodName); | |
| 225 while (($Name, $Value) = each %NamesAndValues) { | |
| 226 $MethodName = "Set${Name}"; | |
| 227 $This->$MethodName($Value); | |
| 228 } | |
| 229 | |
| 230 # Make sure MolecularComplexityType is set... | |
| 231 if (!exists $NamesAndValues{MolecularComplexityType}) { | |
| 232 $This->{MolecularComplexityType} = 'MACCSKeys'; | |
| 233 } | |
| 234 | |
| 235 # Make sure AtomIdentifierType is set... | |
| 236 if ($This->{MolecularComplexityType} !~ /^MACCSKeys$/i) { | |
| 237 if (!exists $NamesAndValues{AtomIdentifierType}) { | |
| 238 $This->_InitializeAtomIdentifierType(); | |
| 239 } | |
| 240 } | |
| 241 | |
| 242 # Make sure UseTriangleInequality is set... | |
| 243 if ($This->{MolecularComplexityType} =~ /^(TopologicalAtomTripletsFingerprints|TopologicalPharmacophoreAtomTripletsFingerprints)$/i) { | |
| 244 if (!exists $NamesAndValues{UseTriangleInequality}) { | |
| 245 $This->{UseTriangleInequality} = ($This->{MolecularComplexityType} =~ /^TopologicalPharmacophoreAtomTripletsFingerprints$/i) ? 1 : 0; | |
| 246 } | |
| 247 } | |
| 248 | |
| 249 return $This; | |
| 250 } | |
| 251 | |
| 252 # Initialize atom identifer type... | |
| 253 # | |
| 254 sub _InitializeAtomIdentifierType { | |
| 255 my($This) = @_; | |
| 256 my($AtomIdentifierType); | |
| 257 | |
| 258 if ($This->{MolecularComplexityType} =~ /^MACCSKeys$/i) { | |
| 259 return $This; | |
| 260 } | |
| 261 | |
| 262 $AtomIdentifierType = ($This->{MolecularComplexityType} =~ /^(TopologicalPharmacophoreAtomPairsFingerprints|TopologicalPharmacophoreAtomTripletsFingerprints)$/i) ? 'FunctionalClassAtomTypes' : 'AtomicInvariantsAtomTypes'; | |
| 263 | |
| 264 $This->SetAtomIdentifierType($AtomIdentifierType); | |
| 265 | |
| 266 return $This; | |
| 267 } | |
| 268 | |
| 269 # Get abbreviation for specified molecular complexity type or using descriptors object... | |
| 270 # | |
| 271 # This functionality can be either invoked as a class function or an | |
| 272 # object method. | |
| 273 # | |
| 274 sub GetMolecularComplexityTypeAbbreviation { | |
| 275 my($FirstParameter) = @_; | |
| 276 my($This, $ComplexityType, %ComplexityTypeToAbbrev); | |
| 277 | |
| 278 if (_IsMolecularComplexityDescriptors($FirstParameter)) { | |
| 279 $This = $FirstParameter; | |
| 280 $ComplexityType = $This->{MolecularComplexityType}; | |
| 281 } | |
| 282 else { | |
| 283 $ComplexityType = $FirstParameter; | |
| 284 } | |
| 285 | |
| 286 %ComplexityTypeToAbbrev = (lc 'AtomTypesFingerprints' => 'ATFP', lc 'ExtendedConnectivityFingerprints' => 'ECFP', | |
| 287 lc 'MACCSKeys' => 'MACCSKeys', lc 'PathLengthFingerprints' => 'PLFP', | |
| 288 lc 'TopologicalAtomPairsFingerprints' => 'TAPFP', lc 'TopologicalAtomTripletsFingerprints' => 'TATFP', | |
| 289 lc 'TopologicalAtomTorsionsFingerprints' => 'TATFP', | |
| 290 lc 'TopologicalPharmacophoreAtomPairsFingerprints' => 'TPAPFP', | |
| 291 lc 'TopologicalPharmacophoreAtomTripletsFingerprints' => 'TPATFP'); | |
| 292 | |
| 293 return exists $ComplexityTypeToAbbrev{lc $ComplexityType} ? $ComplexityTypeToAbbrev{lc $ComplexityType} : ''; | |
| 294 } | |
| 295 | |
| 296 # Set MACCS key set size... | |
| 297 # | |
| 298 sub SetMACCSKeysSize { | |
| 299 my($This, $Value) = @_; | |
| 300 | |
| 301 if (!TextUtil::IsPositiveInteger($Value)) { | |
| 302 croak "Error: ${ClassName}->SetMACCSKeysSize: Size value, $Value, is not valid: It must be a positive integer..."; | |
| 303 } | |
| 304 if ($Value !~ /^(166|322)/i) { | |
| 305 croak "Error: ${ClassName}->SetMACCSKeysSize: The current release of MayaChemTools doesn't support MDL MACCS $Value keys..."; | |
| 306 } | |
| 307 $This->{MACCSKeysSize} = $Value; | |
| 308 | |
| 309 return $This; | |
| 310 } | |
| 311 | |
| 312 # Set minimum path length... | |
| 313 # | |
| 314 sub SetMinPathLength { | |
| 315 my($This, $Value) = @_; | |
| 316 | |
| 317 if (!TextUtil::IsPositiveInteger($Value)) { | |
| 318 croak "Error: ${ClassName}->SetMinPathLength: MinPathLength value, $Value, is not valid: It must be a positive integer..."; | |
| 319 } | |
| 320 $This->{MinPathLength} = $Value; | |
| 321 | |
| 322 return $This; | |
| 323 } | |
| 324 | |
| 325 # Set maximum path length... | |
| 326 # | |
| 327 sub SetMaxPathLength { | |
| 328 my($This, $Value) = @_; | |
| 329 | |
| 330 if (!TextUtil::IsPositiveInteger($Value)) { | |
| 331 croak "Error: ${ClassName}->SetMaxPathLength: MaxPathLength value, $Value, is not valid: It must be a positive integer..."; | |
| 332 } | |
| 333 $This->{MaxPathLength} = $Value; | |
| 334 | |
| 335 return $This; | |
| 336 } | |
| 337 | |
| 338 # Set minimum bond distance between atom pairs during topological and topological | |
| 339 # pharmacophore atom pairs/triplets fingerprints... | |
| 340 # | |
| 341 sub SetMinDistance { | |
| 342 my($This, $Value) = @_; | |
| 343 | |
| 344 if (!TextUtil::IsPositiveInteger($Value)) { | |
| 345 croak "Error: ${ClassName}->SetMinDistance: MinDistance value, $Value, is not valid: It must be a positive integer..."; | |
| 346 } | |
| 347 $This->{MinDistance} = $Value; | |
| 348 | |
| 349 return $This; | |
| 350 } | |
| 351 | |
| 352 # Set maximum bond distance between atom pairs during topological and topological | |
| 353 # pharmacophore atom pairs/triplets fingerprints... | |
| 354 # | |
| 355 sub SetMaxDistance { | |
| 356 my($This, $Value) = @_; | |
| 357 | |
| 358 if (!TextUtil::IsPositiveInteger($Value)) { | |
| 359 croak "Error: ${ClassName}->SetMaxDistance: MaxDistance value, $Value, is not valid: It must be a positive integer..."; | |
| 360 } | |
| 361 $This->{MaxDistance} = $Value; | |
| 362 | |
| 363 return $This; | |
| 364 } | |
| 365 | |
| 366 # Set atom neighborhood radius... | |
| 367 # | |
| 368 sub SetNeighborhoodRadius { | |
| 369 my($This, $Value) = @_; | |
| 370 | |
| 371 if (!TextUtil::IsInteger($Value)) { | |
| 372 croak "Error: ${ClassName}->SetNeighborhoodRadius: NeighborhoodRadius value, $Value, is not valid: It must be an integer..."; | |
| 373 } | |
| 374 | |
| 375 if ($Value < 0 ) { | |
| 376 croak "Error: ${ClassName}->SetNeighborhoodRadius: NeighborhoodRadius value, $Value, is not valid: It must be >= 0..."; | |
| 377 } | |
| 378 $This->{NeighborhoodRadius} = $Value; | |
| 379 | |
| 380 return $This; | |
| 381 } | |
| 382 | |
| 383 # Set molecular complexity type... | |
| 384 # | |
| 385 sub SetMolecularComplexityType { | |
| 386 my($This, $Value) = @_; | |
| 387 | |
| 388 if ($Value !~ /^(AtomTypesFingerprints|ExtendedConnectivityFingerprints|MACCSKeys|PathLengthFingerprints|TopologicalAtomPairsFingerprints|TopologicalAtomTripletsFingerprints|TopologicalAtomTorsionsFingerprints|TopologicalPharmacophoreAtomPairsFingerprints|TopologicalPharmacophoreAtomTripletsFingerprints)$/i) { | |
| 389 croak "Error: ${ClassName}->SetMolecularComplexityType: MolecularComplexityType value, $Value, is not valid. Supported values: AtomTypesFingerprints, ExtendedConnectivityFingerprints, MACCSKeys, PathLengthFingerprints, TopologicalAtomPairsFingerprints, TopologicalAtomTripletsFingerprints, TopologicalAtomTorsionsFingerprints, TopologicalPharmacophoreAtomPairsFingerprints, or TopologicalPharmacophoreAtomTripletsFingerprints..."; | |
| 390 } | |
| 391 | |
| 392 $This->{MolecularComplexityType} = $Value; | |
| 393 | |
| 394 return $This; | |
| 395 } | |
| 396 | |
| 397 # Set distance bin size for binning pharmacophore atom pair distances in atom triplets... | |
| 398 # | |
| 399 sub SetDistanceBinSize { | |
| 400 my($This, $Value) = @_; | |
| 401 | |
| 402 if (!TextUtil::IsPositiveInteger($Value)) { | |
| 403 croak "Error: ${ClassName}->SetDistanceBinSize: DistanceBinSize value, $Value, is not valid: It must be a positive integer..."; | |
| 404 } | |
| 405 $This->{DistanceBinSize} = $Value; | |
| 406 | |
| 407 return $This; | |
| 408 } | |
| 409 | |
| 410 # Set normalization methodology to use for scaling the number of bits-set or unique keys | |
| 411 # in fingerprints... | |
| 412 # | |
| 413 sub SetNormalizationMethodology { | |
| 414 my($This, $Value) = @_; | |
| 415 | |
| 416 if ($Value !~ /^(ByHeavyAtomsCount|ByPossibleKeysCount|None)$/i) { | |
| 417 croak "Error: ${ClassName}->SetNormalizationMethodology: NormalizationMethodology value, $Value, is not valid. Supported values: None, ByHeavyAtomsCount or ByPossibleKeysCount..."; | |
| 418 } | |
| 419 | |
| 420 if ($This->{MolecularComplexityType}) { | |
| 421 if ($This->{MolecularComplexityType} !~ /^(ExtendedConnectivityFingerprints|TopologicalPharmacophoreAtomPairsFingerprints|TopologicalPharmacophoreAtomTripletsFingerprints)$/i) { | |
| 422 croak "Error: ${ClassName}->SetNormalizationMethodology: Normalization is not supported for MolecularComplexityType: $This->{MolecularComplexityType}. Valid MolecularComplexityType values: ExtendedConnectivityFingerprints, TopologicalPharmacophoreAtomPairsFingerprints, or TopologicalPharmacophoreAtomTripletsFingerprints...\n"; | |
| 423 } | |
| 424 | |
| 425 if ($This->{MolecularComplexityType} =~ /^ExtendedConnectivityFingerprints$/i && $Value !~ /^(ByHeavyAtomsCount|None)$/i) { | |
| 426 croak "Error: ${ClassName}->SetNormalizationMethodology: NormalizationMethodology value, $Value, is not valid for MolecularComplexityType: $This->{MolecularComplexityType}. Supported values: None or ByHeavyAtomsCount..."; | |
| 427 } | |
| 428 | |
| 429 if ($This->{MolecularComplexityType} =~ /^(TopologicalPharmacophoreAtomPairsFingerprints|TopologicalPharmacophoreAtomTripletsFingerprints)$/i && $Value !~ /^(ByPossibleKeysCount|None)$/i) { | |
| 430 croak "Error: ${ClassName}->SetNormalizationMethodology: NormalizationMethodology value, $Value, is not valid for MolecularComplexityType: $This->{MolecularComplexityType}. Supported values: None or ByPossibleKeysCount..."; | |
| 431 } | |
| 432 } | |
| 433 | |
| 434 $This->{NormalizationMethodology} = $Value; | |
| 435 | |
| 436 return $This; | |
| 437 } | |
| 438 | |
| 439 # Set intial atom identifier type.. | |
| 440 # | |
| 441 sub SetAtomIdentifierType { | |
| 442 my($This, $IdentifierType) = @_; | |
| 443 | |
| 444 if ($IdentifierType !~ /^(AtomicInvariantsAtomTypes|FunctionalClassAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { | |
| 445 croak "Error: ${ClassName}->SetAtomIdentifierType: Specified value, $IdentifierType, for AtomIdentifierType is not vaild. Supported types in current release of MayaChemTools: AtomicInvariantsAtomTypes, FunctionalClassAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes and UFFAtomTypes."; | |
| 446 } | |
| 447 | |
| 448 # FunctionalClassAtomTypes is the only valid atom identifier type for pharmacophore fingerprints... | |
| 449 if ($This->{MolecularComplexityType} =~ /^(TopologicalPharmacophoreAtomPairsFingerprints|TopologicalPharmacophoreAtomTripletsFingerprints)$/i) { | |
| 450 if ($IdentifierType !~ /^FunctionalClassAtomTypes$/i) { | |
| 451 croak "Error: ${ClassName}->SetAtomIdentifierType: Specified value, $IdentifierType, for AtomIdentifierType is not vaild. Supported type for $This->{MolecularComplexityType} complexity type: FunctionalClassAtomTypes."; | |
| 452 } | |
| 453 } | |
| 454 | |
| 455 if ($This->{AtomIdentifierType}) { | |
| 456 croak "Error: ${ClassName}->SetAtomIdentifierType: Can't change intial atom identifier type: It's already set..."; | |
| 457 } | |
| 458 | |
| 459 $This->{AtomIdentifierType} = $IdentifierType; | |
| 460 | |
| 461 # Initialize identifier type information... | |
| 462 $This->_InitializeAtomIdentifierTypeInformation(); | |
| 463 | |
| 464 return $This; | |
| 465 } | |
| 466 | |
| 467 # Calculate molecular complexity [ Ref 117-119 ] of a molecule using its fingerprints. | |
| 468 # | |
| 469 # The current release of MayaChemTools supports calculation of molecular complexity | |
| 470 # corresponding to the number of bits-set or unique keys in molecular fingerprints. The | |
| 471 # following types of fingerprints based molecular complexity measures are supported: | |
| 472 # | |
| 473 # AtomTypesFingerprints | |
| 474 # ExtendedConnectivityFingerprints | |
| 475 # MACCSKeys | |
| 476 # PathLengthFingerprints | |
| 477 # TopologicalAtomPairsFingerprints | |
| 478 # TopologicalAtomTripletsFingerprints | |
| 479 # TopologicalAtomTorsionsFingerprints | |
| 480 # TopologicalPharmacophoreAtomPairsFingerprints | |
| 481 # TopologicalPharmacophoreAtomTripletsFingerprints | |
| 482 # | |
| 483 # After the molecular complexity value has been calculated, it can also be normalized by | |
| 484 # by scaling the number of bits-set or unique keys for following types of fingerprints: | |
| 485 # | |
| 486 # ExtendedConnectivityFingerprints | |
| 487 # TopologicalPharmacophoreAtomPairsFingerprints | |
| 488 # TopologicalPharmacophoreAtomTripletsFingerprints | |
| 489 # | |
| 490 # Two types of normalization methodologies are supported: by heavy atoms count for | |
| 491 # extended connectivity fingerprints; by possible keys count for topological pharmacophore | |
| 492 # atom pairs and triplets fingerprints. | |
| 493 # | |
| 494 sub GenerateDescriptors { | |
| 495 my($This) = @_; | |
| 496 | |
| 497 # Initialize descriptor values... | |
| 498 $This->_InitializeDescriptorValues(); | |
| 499 | |
| 500 # Check availability of molecule... | |
| 501 if (!$This->{Molecule}) { | |
| 502 carp "Warning: ${ClassName}->GenerateDescriptors: $This->{Type} molecular descriptors generation didn't succeed: Molecule data is not available: Molecule object hasn't been set..."; | |
| 503 return undef; | |
| 504 } | |
| 505 | |
| 506 # Calculate descriptor values... | |
| 507 if (!$This->_CalculateDescriptorValues()) { | |
| 508 carp "Warning: ${ClassName}->GenerateDescriptors: $This->{Type} molecular descriptors generation didn't succeed: Couldn't calculate MolecularComplexity values corresponding to assigned MolecularComplexity atom types..."; | |
| 509 return undef; | |
| 510 } | |
| 511 | |
| 512 # Set final descriptor values... | |
| 513 $This->_SetFinalDescriptorValues(); | |
| 514 | |
| 515 return $This; | |
| 516 } | |
| 517 | |
| 518 # Calculate molecular complexity value... | |
| 519 # | |
| 520 sub _CalculateDescriptorValues { | |
| 521 my($This) = @_; | |
| 522 my($FingerprintsObject, $MethodName); | |
| 523 | |
| 524 # Setup fingerprints object and generate fingerprints... | |
| 525 $MethodName = "_Setup" . $This->{MolecularComplexityType}; | |
| 526 $FingerprintsObject = $This->$MethodName(); | |
| 527 | |
| 528 $FingerprintsObject->GenerateFingerprints(); | |
| 529 | |
| 530 # Make sure atom types fingerprints generation is successful... | |
| 531 if (!$FingerprintsObject->IsFingerprintsGenerationSuccessful()) { | |
| 532 return undef; | |
| 533 } | |
| 534 | |
| 535 if (!$This->_CalculateMolecularComplexity($FingerprintsObject)) { | |
| 536 return undef; | |
| 537 } | |
| 538 | |
| 539 # Normalize molecular complexity... | |
| 540 if ($This->{NormalizationMethodology} !~ /^None$/i) { | |
| 541 if (!$This->_NormalizeMolecularComplexity($FingerprintsObject)) { | |
| 542 return undef; | |
| 543 } | |
| 544 } | |
| 545 | |
| 546 return $This; | |
| 547 } | |
| 548 | |
| 549 # Setup atom types fingerprints... | |
| 550 # | |
| 551 sub _SetupAtomTypesFingerprints { | |
| 552 my($This) = @_; | |
| 553 my($FingerprintsObject); | |
| 554 | |
| 555 $FingerprintsObject = new Fingerprints::AtomTypesFingerprints('Molecule' => $This->{Molecule}, 'Type' => 'AtomTypesCount', 'AtomIdentifierType' => $This->{AtomIdentifierType}, 'IgnoreHydrogens' => 1); | |
| 556 $This->_SetAtomIdentifierTypeValuesToUse($FingerprintsObject); | |
| 557 | |
| 558 return $FingerprintsObject; | |
| 559 } | |
| 560 | |
| 561 # Setup extended connectivity fingerprints... | |
| 562 # | |
| 563 sub _SetupExtendedConnectivityFingerprints { | |
| 564 my($This) = @_; | |
| 565 my($FingerprintsObject); | |
| 566 | |
| 567 $FingerprintsObject = new Fingerprints::ExtendedConnectivityFingerprints('Molecule' => $This->{Molecule}, 'Type' => 'ExtendedConnectivity', 'NeighborhoodRadius' => $This->{NeighborhoodRadius}, 'AtomIdentifierType' => $This->{AtomIdentifierType}); | |
| 568 $This->_SetAtomIdentifierTypeValuesToUse($FingerprintsObject); | |
| 569 | |
| 570 return $FingerprintsObject; | |
| 571 } | |
| 572 | |
| 573 # Setup MACCS keys... | |
| 574 # | |
| 575 sub _SetupMACCSKeys { | |
| 576 my($This) = @_; | |
| 577 my($FingerprintsObject); | |
| 578 | |
| 579 $FingerprintsObject = new Fingerprints::MACCSKeys('Molecule' => $This->{Molecule}, 'Type' => 'MACCSKeyBits', 'Size' => $This->{MACCSKeysSize}); | |
| 580 | |
| 581 return $FingerprintsObject; | |
| 582 } | |
| 583 | |
| 584 # Set up path length fingerprints... | |
| 585 # | |
| 586 sub _SetupPathLengthFingerprints { | |
| 587 my($This) = @_; | |
| 588 my($FingerprintsObject); | |
| 589 | |
| 590 $FingerprintsObject = new Fingerprints::PathLengthFingerprints('Molecule' => $This->{Molecule}, 'Type' => 'PathLengthCount', 'AtomIdentifierType' => $This->{AtomIdentifierType}, 'MinLength' => $This->{MinPathLength}, 'MaxLength' => $This->{MaxPathLength}, 'AllowRings' => 1, 'AllowSharedBonds' => 1, 'UseBondSymbols' => $This->{UseBondSymbols}, 'UseUniquePaths' => 1); | |
| 591 $This->_SetAtomIdentifierTypeValuesToUse($FingerprintsObject); | |
| 592 | |
| 593 return $FingerprintsObject; | |
| 594 } | |
| 595 | |
| 596 # Setup topological atom pairs fingerprints... | |
| 597 # | |
| 598 sub _SetupTopologicalAtomPairsFingerprints { | |
| 599 my($This) = @_; | |
| 600 my($FingerprintsObject); | |
| 601 | |
| 602 $FingerprintsObject = new Fingerprints::TopologicalAtomPairsFingerprints('Molecule' => $This->{Molecule}, 'MinDistance' => $This->{MinDistance}, 'MaxDistance' => $This->{MaxDistance}, 'AtomIdentifierType' => $This->{AtomIdentifierType}); | |
| 603 $This->_SetAtomIdentifierTypeValuesToUse($FingerprintsObject); | |
| 604 | |
| 605 return $FingerprintsObject; | |
| 606 } | |
| 607 | |
| 608 # Setup topological atom triplets fingerprints... | |
| 609 # | |
| 610 sub _SetupTopologicalAtomTripletsFingerprints { | |
| 611 my($This) = @_; | |
| 612 my($FingerprintsObject); | |
| 613 | |
| 614 $FingerprintsObject = new Fingerprints::TopologicalAtomTripletsFingerprints('Molecule' => $This->{Molecule}, 'MinDistance' => $This->{MinDistance}, 'MaxDistance' => $This->{MaxDistance}, 'UseTriangleInequality' => $This->{UseTriangleInequality}, 'AtomIdentifierType' => $This->{AtomIdentifierType}); | |
| 615 $This->_SetAtomIdentifierTypeValuesToUse($FingerprintsObject); | |
| 616 | |
| 617 return $FingerprintsObject; | |
| 618 } | |
| 619 | |
| 620 # Setup topological atom torsions fingerprints... | |
| 621 # | |
| 622 sub _SetupTopologicalAtomTorsionsFingerprints { | |
| 623 my($This) = @_; | |
| 624 my($FingerprintsObject); | |
| 625 | |
| 626 $FingerprintsObject = new Fingerprints::TopologicalAtomTorsionsFingerprints('Molecule' => $This->{Molecule}, 'AtomIdentifierType' => $This->{AtomIdentifierType}); | |
| 627 | |
| 628 $This->_SetAtomIdentifierTypeValuesToUse($FingerprintsObject); | |
| 629 | |
| 630 return $FingerprintsObject; | |
| 631 } | |
| 632 | |
| 633 # Setup TopologicalPharmacophoreAtomPairsFingerprints... | |
| 634 # | |
| 635 sub _SetupTopologicalPharmacophoreAtomPairsFingerprints { | |
| 636 my($This) = @_; | |
| 637 my($FingerprintsObject, $AtomPairsSetSizeToUse); | |
| 638 | |
| 639 # Use fixed size to get total number of possible keys for normalization... | |
| 640 $AtomPairsSetSizeToUse = ($This->{NormalizationMethodology} =~ /^ByPossibleKeysCount$/i) ? 'FixedSize' : 'ArbitrarySize'; | |
| 641 | |
| 642 $FingerprintsObject = new Fingerprints::TopologicalPharmacophoreAtomPairsFingerprints('Molecule' => $This->{Molecule}, 'AtomPairsSetSizeToUse' => $AtomPairsSetSizeToUse, 'MinDistance' => $This->{MinDistance}, 'MaxDistance' => $This->{MaxDistance}, 'AtomTypesToUse' => \@{$This->{FunctionalClassesToUse}}, 'NormalizationMethodology' => 'None', 'ValuesPrecision' => 2); | |
| 643 | |
| 644 return $FingerprintsObject; | |
| 645 } | |
| 646 | |
| 647 # Setup TopologicalPharmacophoreAtomTripletsFingerprints... | |
| 648 # | |
| 649 sub _SetupTopologicalPharmacophoreAtomTripletsFingerprints { | |
| 650 my($This) = @_; | |
| 651 my($FingerprintsObject, $AtomTripletsSetSizeToUse); | |
| 652 | |
| 653 # Use fixed size to get total number of possible keys for normalization... | |
| 654 $AtomTripletsSetSizeToUse = ($This->{NormalizationMethodology} =~ /^ByPossibleKeysCount$/i) ? 'FixedSize' : 'ArbitrarySize'; | |
| 655 | |
| 656 $FingerprintsObject = new Fingerprints::TopologicalPharmacophoreAtomTripletsFingerprints('Molecule' => $This->{Molecule}, 'AtomTripletsSetSizeToUse' => $AtomTripletsSetSizeToUse, 'MinDistance' => $This->{MinDistance}, 'MaxDistance' => $This->{MaxDistance}, 'DistanceBinSize' => $This->{DistanceBinSize}, 'UseTriangleInequality' => $This->{UseTriangleInequality}, 'AtomTypesToUse' => \@{$This->{FunctionalClassesToUse}}); | |
| 657 | |
| 658 return $FingerprintsObject; | |
| 659 } | |
| 660 | |
| 661 # Normalize molecular complexity value... | |
| 662 # | |
| 663 sub _NormalizeMolecularComplexity { | |
| 664 my($This, $FingerprintsObject) = @_; | |
| 665 | |
| 666 if ($This->{MolecularComplexityType} =~ /^ExtendedConnectivityFingerprints$/i && $This->{NormalizationMethodology} =~ /^ByHeavyAtomsCount$/i) { | |
| 667 return $This->_NormalizeMolecularComplexityByHeavyAtomsCount($FingerprintsObject); | |
| 668 } | |
| 669 elsif ($This->{MolecularComplexityType} =~ /^(TopologicalPharmacophoreAtomPairsFingerprints|TopologicalPharmacophoreAtomTripletsFingerprints)$/i && $This->{NormalizationMethodology} =~ /^ByPossibleKeysCount$/i) { | |
| 670 return $This->_NormalizeMolecularComplexityByPossibleKeysCount($FingerprintsObject); | |
| 671 } | |
| 672 else { | |
| 673 warn "Warning: ${ClassName}->_NormalizeMolecularComplexity: NormalizationMethodology value, $This->{NormalizationMethodology}, is not valid. Supported values: ByHeavyAtomsCount or ByPossibleKeysCount..."; | |
| 674 } | |
| 675 return undef; | |
| 676 } | |
| 677 | |
| 678 # Normalize molecular complexity value by heavy atom count... | |
| 679 # | |
| 680 sub _NormalizeMolecularComplexityByHeavyAtomsCount { | |
| 681 my($This, $FingerprintsObject) = @_; | |
| 682 my($NumOfHeavyAtoms, $NormalizedComplexity); | |
| 683 | |
| 684 $NumOfHeavyAtoms = $This->{Molecule}->GetNumOfHeavyAtoms(); | |
| 685 if (!$NumOfHeavyAtoms) { | |
| 686 return $This; | |
| 687 } | |
| 688 | |
| 689 $NormalizedComplexity = $This->{MolecularComplexity} / $NumOfHeavyAtoms; | |
| 690 $This->{MolecularComplexity} = MathUtil::round($NormalizedComplexity, 2) + 0; | |
| 691 | |
| 692 return $This; | |
| 693 } | |
| 694 | |
| 695 # Normalize molecular complexity value by possible keys count... | |
| 696 # | |
| 697 sub _NormalizeMolecularComplexityByPossibleKeysCount { | |
| 698 my($This, $FingerprintsObject) = @_; | |
| 699 my($NumOfPossibleKeys, $NormalizedComplexity); | |
| 700 | |
| 701 $NumOfPossibleKeys = $FingerprintsObject->GetFingerprintsVector()->GetNumOfValues(); | |
| 702 if (!$NumOfPossibleKeys) { | |
| 703 return $This; | |
| 704 } | |
| 705 | |
| 706 $NormalizedComplexity = $This->{MolecularComplexity} / $NumOfPossibleKeys; | |
| 707 $This->{MolecularComplexity} = MathUtil::round($NormalizedComplexity, 2) + 0; | |
| 708 | |
| 709 return $This; | |
| 710 } | |
| 711 | |
| 712 # Calculate molecular complexity value using fingerprints objects... | |
| 713 # | |
| 714 sub _CalculateMolecularComplexity { | |
| 715 my($This, $FingerprintsObject) = @_; | |
| 716 | |
| 717 if ($FingerprintsObject->GetVectorType() =~ /^FingerprintsBitVector$/i) { | |
| 718 return $This->_CalculateMolecularComplexityUsingFingerprintsBitVector($FingerprintsObject->GetFingerprintsBitVector()); | |
| 719 } | |
| 720 elsif ($FingerprintsObject->GetVectorType() =~ /^FingerprintsVector$/i) { | |
| 721 return $This->_CalculateMolecularComplexityUsingFingerprintsVector($FingerprintsObject->GetFingerprintsVector()); | |
| 722 } | |
| 723 else { | |
| 724 warn "Warning: ${ClassName}->_CalculateMolecularComplexity: Fingerprints vector type is not valid. Supported values: FingerprintsBitVector or FingerprintsVector..."; | |
| 725 } | |
| 726 | |
| 727 return undef; | |
| 728 } | |
| 729 | |
| 730 # Calculate molecular complexity value using fingerprints vector... | |
| 731 # | |
| 732 sub _CalculateMolecularComplexityUsingFingerprintsVector { | |
| 733 my($This, $FingerprintsVector) = @_; | |
| 734 | |
| 735 $This->{MolecularComplexity} = ($FingerprintsVector->GetType() =~ /^(OrderedNumericalValues|NumericalValues)$/i) ? $FingerprintsVector->GetNumOfNonZeroValues() : $FingerprintsVector->GetNumOfValues(); | |
| 736 | |
| 737 return $This; | |
| 738 } | |
| 739 | |
| 740 # Calculate molecular complexity value using fingerprints vector... | |
| 741 # | |
| 742 sub _CalculateMolecularComplexityUsingFingerprintsBitVector { | |
| 743 my($This, $FingerprintsBitVector) = @_; | |
| 744 | |
| 745 $This->{MolecularComplexity} = $FingerprintsBitVector->GetNumOfSetBits(); | |
| 746 | |
| 747 return $This; | |
| 748 } | |
| 749 | |
| 750 # Setup final descriptor values... | |
| 751 # | |
| 752 sub _SetFinalDescriptorValues { | |
| 753 my($This) = @_; | |
| 754 | |
| 755 $This->{DescriptorsGenerated} = 1; | |
| 756 | |
| 757 $This->SetDescriptorValues($This->{MolecularComplexity}); | |
| 758 | |
| 759 return $This; | |
| 760 } | |
| 761 | |
| 762 # Set atom identifier type to use for generating fingerprints... | |
| 763 # | |
| 764 sub _SetAtomIdentifierTypeValuesToUse { | |
| 765 my($This, $FingerprintsObject) = @_; | |
| 766 | |
| 767 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { | |
| 768 $FingerprintsObject->SetAtomicInvariantsToUse(\@{$This->{AtomicInvariantsToUse}}); | |
| 769 } | |
| 770 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { | |
| 771 $FingerprintsObject->SetFunctionalClassesToUse(\@{$This->{FunctionalClassesToUse}}); | |
| 772 } | |
| 773 elsif ($This->{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { | |
| 774 # Nothing to do for now... | |
| 775 } | |
| 776 else { | |
| 777 croak "Error: The value specified, $This->{AtomIdentifierType}, for option \"-a, --AtomIdentifierType\" is not valid. Supported atom identifier types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes\n"; | |
| 778 } | |
| 779 } | |
| 780 | |
| 781 # Initialize atom indentifier type information... | |
| 782 # | |
| 783 # Current supported values: | |
| 784 # | |
| 785 # AtomicInvariantsAtomTypes, FunctionalClassAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, | |
| 786 # MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes | |
| 787 # | |
| 788 sub _InitializeAtomIdentifierTypeInformation { | |
| 789 my($This) = @_; | |
| 790 | |
| 791 IDENTIFIERTYPE: { | |
| 792 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { | |
| 793 $This->_InitializeAtomicInvariantsAtomTypesInformation(); | |
| 794 last IDENTIFIERTYPE; | |
| 795 } | |
| 796 if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { | |
| 797 $This->_InitializeFunctionalClassAtomTypesInformation(); | |
| 798 last IDENTIFIERTYPE; | |
| 799 } | |
| 800 if ($This->{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { | |
| 801 # Nothing to do for now... | |
| 802 last IDENTIFIERTYPE; | |
| 803 } | |
| 804 carp "Warning: ${ClassName}->_InitializeAtomIdentifierTypeInformation: Unknown atom indentifier type $This->{AtomIdentifierType}..."; | |
| 805 } | |
| 806 return $This; | |
| 807 } | |
| 808 | |
| 809 # Initialize atomic invariants atom types, generated by AtomTypes::AtomicInvariantsAtomTypes | |
| 810 # class, to use for generating initial atom identifiers... | |
| 811 # | |
| 812 # Let: | |
| 813 # AS = Atom symbol corresponding to element symbol | |
| 814 # | |
| 815 # X<n> = Number of non-hydrogen atom neighbors or heavy atoms attached to atom | |
| 816 # BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms attached to atom | |
| 817 # LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms attached to atom | |
| 818 # SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms attached to atom | |
| 819 # DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms attached to atom | |
| 820 # TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms attached to atom | |
| 821 # H<n> = Number of implicit and explicit hydrogens for atom | |
| 822 # Ar = Aromatic annotation indicating whether atom is aromatic | |
| 823 # RA = Ring atom annotation indicating whether atom is a ring | |
| 824 # FC<+n/-n> = Formal charge assigned to atom | |
| 825 # MN<n> = Mass number indicating isotope other than most abundant isotope | |
| 826 # SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or 3 (triplet) | |
| 827 # | |
| 828 # Then: | |
| 829 # | |
| 830 # Atom type generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to: | |
| 831 # | |
| 832 # AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n> | |
| 833 # | |
| 834 # Except for AS which is a required atomic invariant in atom types, all other atomic invariants are | |
| 835 # optional. | |
| 836 # | |
| 837 # Default atomic invariants used for generating inital atom identifiers are [ Ref 24 ]: | |
| 838 # | |
| 839 # AS, X<n>, BO<n>, H<n>, FC<+n/-n>, MN<n> | |
| 840 # | |
| 841 # In addition to usage of abbreviations for specifying atomic invariants, the following descriptive words | |
| 842 # are also allowed: | |
| 843 # | |
| 844 # X : NumOfNonHydrogenAtomNeighbors or NumOfHeavyAtomNeighbors | |
| 845 # BO : SumOfBondOrdersToNonHydrogenAtoms or SumOfBondOrdersToHeavyAtoms | |
| 846 # LBO : LargestBondOrderToNonHydrogenAtoms or LargestBondOrderToHeavyAtoms | |
| 847 # SB : NumOfSingleBondsToNonHydrogenAtoms or NumOfSingleBondsToHeavyAtoms | |
| 848 # DB : NumOfDoubleBondsToNonHydrogenAtoms or NumOfDoubleBondsToHeavyAtoms | |
| 849 # TB : NumOfTripleBondsToNonHydrogenAtoms or NumOfTripleBondsToHeavyAtoms | |
| 850 # H : NumOfImplicitAndExplicitHydrogens | |
| 851 # Ar : Aromatic | |
| 852 # RA : RingAtom | |
| 853 # FC : FormalCharge | |
| 854 # MN : MassNumber | |
| 855 # SM : SpinMultiplicity | |
| 856 # | |
| 857 sub _InitializeAtomicInvariantsAtomTypesInformation { | |
| 858 my($This) = @_; | |
| 859 | |
| 860 @{$This->{AtomicInvariantsToUse}} = (); | |
| 861 | |
| 862 if ($This->{MolecularComplexityType} =~ /^(AtomTypesFingerprints|TopologicalAtomPairsFingerprints|TopologicalAtomTripletsFingerprints|TopologicalAtomTorsionsFingerprints)$/i) { | |
| 863 @{$This->{AtomicInvariantsToUse}} = ('AS', 'X', 'BO', 'H', 'FC'); | |
| 864 } | |
| 865 elsif ($This->{MolecularComplexityType} =~ /^ExtendedConnectivityFingerprints$/i) { | |
| 866 @{$This->{AtomicInvariantsToUse}} = ('AS', 'X', 'BO', 'H', 'FC', 'MN'); | |
| 867 } | |
| 868 elsif ($This->{MolecularComplexityType} =~ /^PathLengthFingerprints$/i) { | |
| 869 @{$This->{AtomicInvariantsToUse}} = ('AS'); | |
| 870 } | |
| 871 | |
| 872 return $This; | |
| 873 } | |
| 874 | |
| 875 # Initialize functional class atom types, generated by AtomTypes::FunctionalClassAtomTypes | |
| 876 # class, to use for generating initial atom identifiers... | |
| 877 # | |
| 878 # Let: | |
| 879 # HBD: HydrogenBondDonor | |
| 880 # HBA: HydrogenBondAcceptor | |
| 881 # PI : PositivelyIonizable | |
| 882 # NI : NegativelyIonizable | |
| 883 # Ar : Aromatic | |
| 884 # Hal : Halogen | |
| 885 # H : Hydrophobic | |
| 886 # RA : RingAtom | |
| 887 # CA : ChainAtom | |
| 888 # | |
| 889 # Then: | |
| 890 # | |
| 891 # Functiononal class atom type specification for an atom corresponds to: | |
| 892 # | |
| 893 # Ar.CA.H.HBA.HBD.Hal.NI.PI.RA | |
| 894 # | |
| 895 # Default functional classes used are: HBD, HBA, PI, NI, Ar, Hal | |
| 896 # | |
| 897 # FunctionalAtomTypes are assigned using the following definitions [ Ref 60-61, Ref 65-66 ]: | |
| 898 # | |
| 899 # HydrogenBondDonor: NH, NH2, OH | |
| 900 # HydrogenBondAcceptor: N[!H], O | |
| 901 # PositivelyIonizable: +, NH2 | |
| 902 # NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH | |
| 903 # | |
| 904 sub _InitializeFunctionalClassAtomTypesInformation { | |
| 905 my($This) = @_; | |
| 906 | |
| 907 @{$This->{FunctionalClassesToUse}} = (); | |
| 908 | |
| 909 if ($This->{MolecularComplexityType} =~ /^(AtomTypesFingerprints|ExtendedConnectivityFingerprints|PathLengthFingerprints|TopologicalAtomPairsFingerprints|TopologicalAtomTripletsFingerprints|TopologicalAtomTorsionsFingerprints)$/i) { | |
| 910 @{$This->{FunctionalClassesToUse}} = ('HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal'); | |
| 911 } | |
| 912 elsif ($This->{MolecularComplexityType} =~ /^TopologicalPharmacophoreAtomPairsFingerprints$/i) { | |
| 913 @{$This->{FunctionalClassesToUse}} = ('HBD', 'HBA', 'PI', 'NI', 'H'); | |
| 914 } | |
| 915 elsif ($This->{MolecularComplexityType} =~ /^TopologicalPharmacophoreAtomTripletsFingerprints$/i) { | |
| 916 @{$This->{FunctionalClassesToUse}} = ('HBD', 'HBA', 'PI', 'NI', 'H', 'Ar'); | |
| 917 } | |
| 918 | |
| 919 return $This; | |
| 920 } | |
| 921 | |
| 922 # Set atomic invariants to use for generation of intial atom indentifiers... | |
| 923 # | |
| 924 sub SetAtomicInvariantsToUse { | |
| 925 my($This, @Values) = @_; | |
| 926 my($FirstValue, $TypeOfFirstValue, $AtomicInvariant, $SpecifiedAtomicInvariant, @SpecifiedAtomicInvariants, @AtomicInvariantsToUse); | |
| 927 | |
| 928 if (!@Values) { | |
| 929 carp "Warning: ${ClassName}->SetAtomicInvariantsToUse: No values specified..."; | |
| 930 return; | |
| 931 } | |
| 932 | |
| 933 if ($This->{AtomIdentifierType} !~ /^AtomicInvariantsAtomTypes$/i) { | |
| 934 carp "Warning: ${ClassName}->SetAtomicInvariantsToUse: AtomicInvariantsToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}..."; | |
| 935 return; | |
| 936 } | |
| 937 | |
| 938 $FirstValue = $Values[0]; | |
| 939 $TypeOfFirstValue = ref $FirstValue; | |
| 940 | |
| 941 @SpecifiedAtomicInvariants = (); | |
| 942 @AtomicInvariantsToUse = (); | |
| 943 | |
| 944 if ($TypeOfFirstValue =~ /^ARRAY/) { | |
| 945 push @SpecifiedAtomicInvariants, @{$FirstValue}; | |
| 946 } | |
| 947 else { | |
| 948 push @SpecifiedAtomicInvariants, @Values; | |
| 949 } | |
| 950 | |
| 951 # Make sure specified AtomicInvariants are valid... | |
| 952 for $SpecifiedAtomicInvariant (@SpecifiedAtomicInvariants) { | |
| 953 if (!AtomTypes::AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($SpecifiedAtomicInvariant)) { | |
| 954 croak "Error: ${ClassName}->SetAtomicInvariantsToUse: Specified atomic invariant, $SpecifiedAtomicInvariant, is not supported...\n "; | |
| 955 } | |
| 956 $AtomicInvariant = $SpecifiedAtomicInvariant; | |
| 957 push @AtomicInvariantsToUse, $AtomicInvariant; | |
| 958 } | |
| 959 | |
| 960 # Set atomic invariants to use... | |
| 961 @{$This->{AtomicInvariantsToUse}} = (); | |
| 962 push @{$This->{AtomicInvariantsToUse}}, @AtomicInvariantsToUse; | |
| 963 | |
| 964 return $This; | |
| 965 } | |
| 966 | |
| 967 # Set functional classes to use for generation of intial atom indentifiers... | |
| 968 # | |
| 969 sub SetFunctionalClassesToUse { | |
| 970 my($This, @Values) = @_; | |
| 971 my($FirstValue, $TypeOfFirstValue, $FunctionalClass, $SpecifiedFunctionalClass, @SpecifiedFunctionalClasses, @FunctionalClassesToUse); | |
| 972 | |
| 973 if (!@Values) { | |
| 974 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: No values specified..."; | |
| 975 return; | |
| 976 } | |
| 977 | |
| 978 if ($This->{AtomIdentifierType} !~ /^FunctionalClassAtomTypes$/i) { | |
| 979 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: FunctionalClassesToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}..."; | |
| 980 return; | |
| 981 } | |
| 982 | |
| 983 $FirstValue = $Values[0]; | |
| 984 $TypeOfFirstValue = ref $FirstValue; | |
| 985 | |
| 986 @SpecifiedFunctionalClasses = (); | |
| 987 @FunctionalClassesToUse = (); | |
| 988 | |
| 989 if ($TypeOfFirstValue =~ /^ARRAY/) { | |
| 990 push @SpecifiedFunctionalClasses, @{$FirstValue}; | |
| 991 } | |
| 992 else { | |
| 993 push @SpecifiedFunctionalClasses, @Values; | |
| 994 } | |
| 995 | |
| 996 # Make sure specified FunctionalClasses are valid... | |
| 997 for $SpecifiedFunctionalClass (@SpecifiedFunctionalClasses) { | |
| 998 if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedFunctionalClass)) { | |
| 999 croak "Error: ${ClassName}->SetFunctionalClassesToUse: Specified functional class, $SpecifiedFunctionalClass, is not supported...\n "; | |
| 1000 } | |
| 1001 push @FunctionalClassesToUse, $SpecifiedFunctionalClass; | |
| 1002 } | |
| 1003 | |
| 1004 # Set functional classes to use... | |
| 1005 @{$This->{FunctionalClassesToUse}} = (); | |
| 1006 push @{$This->{FunctionalClassesToUse}}, @FunctionalClassesToUse; | |
| 1007 | |
| 1008 return $This; | |
| 1009 } | |
| 1010 | |
| 1011 # Return a string containg data for MolecularComplexityDescriptors object... | |
| 1012 # | |
| 1013 sub StringifyMolecularComplexityDescriptors { | |
| 1014 my($This) = @_; | |
| 1015 my($ComplexityDescriptorsString, $Nothing); | |
| 1016 | |
| 1017 $ComplexityDescriptorsString = "MolecularDescriptorType: $This->{Type}; MolecularComplexityType: $This->{MolecularComplexityType}; " . $This->_StringifyDescriptorNamesAndValues(); | |
| 1018 | |
| 1019 # Setup fingerprints specific information... | |
| 1020 if ($This->{MolecularComplexityType} =~ /^MACCSKeys$/i) { | |
| 1021 $ComplexityDescriptorsString .= "; MACCSKeysSize = $This->{MACCSKeysSize}"; | |
| 1022 } | |
| 1023 elsif ($This->{MolecularComplexityType} =~ /^ExtendedConnectivityFingerprints$/i) { | |
| 1024 $ComplexityDescriptorsString .= "; NeighborhoodRadius = $This->{NeighborhoodRadius}; NormalizationMethodology = $This->{NormalizationMethodology}"; | |
| 1025 } | |
| 1026 elsif ($This->{MolecularComplexityType} =~ /^PathLengthFingerprints$/i) { | |
| 1027 $ComplexityDescriptorsString .= "; MinPathLength = $This->{MinPathLength}; MaxPathLength = $This->{MaxPathLength}; UseBondSymbols: " . ($This->{UseBondSymbols} ? "Yes" : "No"); | |
| 1028 } | |
| 1029 elsif ($This->{MolecularComplexityType} =~ /^TopologicalAtomPairsFingerprints$/i) { | |
| 1030 $ComplexityDescriptorsString .= "; MinDistance = $This->{MinDistance}; MaxDistance = $This->{MaxDistance}"; | |
| 1031 } | |
| 1032 elsif ($This->{MolecularComplexityType} =~ /^TopologicalAtomTripletsFingerprints$/i) { | |
| 1033 $ComplexityDescriptorsString .= "; MinDistance = $This->{MinDistance}; MaxDistance = $This->{MaxDistance}; UseTriangleInequality: " . ($This->{UseTriangleInequality} ? "Yes" : "No"); | |
| 1034 } | |
| 1035 elsif ($This->{MolecularComplexityType} =~ /^TopologicalAtomTorsionsFingerprints$/i) { | |
| 1036 $ComplexityDescriptorsString .= "; MinDistance = $This->{MinDistance}; MaxDistance = $This->{MaxDistance}"; | |
| 1037 } | |
| 1038 elsif ($This->{MolecularComplexityType} =~ /^TopologicalPharmacophoreAtomPairsFingerprints$/i) { | |
| 1039 $ComplexityDescriptorsString .= "; MinDistance = $This->{MinDistance}; MaxDistance = $This->{MaxDistance}; NormalizationMethodology = $This->{NormalizationMethodology}"; | |
| 1040 } | |
| 1041 elsif ($This->{MolecularComplexityType} =~ /^TopologicalPharmacophoreAtomTripletsFingerprints$/i) { | |
| 1042 $ComplexityDescriptorsString .= "; MinDistance = $This->{MinDistance}; MaxDistance = $This->{MaxDistance}; NormalizationMethodology = $This->{NormalizationMethodology}; DistanceBinSize: $This->{DistanceBinSize}; UseTriangleInequality: " . ($This->{UseTriangleInequality} ? "Yes" : "No"); | |
| 1043 } | |
| 1044 | |
| 1045 # Setup atom identifier information... | |
| 1046 if ($This->{MolecularComplexityType} =~ /^(AtomTypesFingerprints|ExtendedConnectivityFingerprints|PathLengthFingerprints|TopologicalAtomPairsFingerprints|TopologicalAtomTripletsFingerprints|TopologicalAtomTorsionsFingerprints|TopologicalPharmacophoreAtomPairsFingerprints|TopologicalPharmacophoreAtomTripletsFingerprints)$/i) { | |
| 1047 $ComplexityDescriptorsString .= "; AtomIdentifierType = $This->{AtomIdentifierType}"; | |
| 1048 | |
| 1049 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { | |
| 1050 my($AtomicInvariant, @AtomicInvariants, @AtomicInvariantsOrder, %AvailableAtomicInvariants); | |
| 1051 | |
| 1052 @AtomicInvariantsOrder = AtomTypes::AtomicInvariantsAtomTypes::GetAtomicInvariantsOrder(); | |
| 1053 %AvailableAtomicInvariants = AtomTypes::AtomicInvariantsAtomTypes::GetAvailableAtomicInvariants(); | |
| 1054 | |
| 1055 for $AtomicInvariant (@AtomicInvariantsOrder) { | |
| 1056 push @AtomicInvariants, "$AtomicInvariant: $AvailableAtomicInvariants{$AtomicInvariant}"; | |
| 1057 } | |
| 1058 | |
| 1059 $ComplexityDescriptorsString .= "; AtomicInvariantsToUse: <" . TextUtil::JoinWords(\@{$This->{AtomicInvariantsToUse}}, ", ", 0) . ">"; | |
| 1060 $ComplexityDescriptorsString .= "; AtomicInvariantsOrder: <" . TextUtil::JoinWords(\@AtomicInvariantsOrder, ", ", 0) . ">"; | |
| 1061 $ComplexityDescriptorsString .= "; AvailableAtomicInvariants: <" . TextUtil::JoinWords(\@AtomicInvariants, ", ", 0) . ">"; | |
| 1062 } | |
| 1063 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { | |
| 1064 my($FunctionalClass, @FunctionalClasses, @FunctionalClassesOrder, %AvailableFunctionalClasses); | |
| 1065 | |
| 1066 @FunctionalClassesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder(); | |
| 1067 %AvailableFunctionalClasses = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses(); | |
| 1068 | |
| 1069 for $FunctionalClass (@FunctionalClassesOrder) { | |
| 1070 push @FunctionalClasses, "$FunctionalClass: $AvailableFunctionalClasses{$FunctionalClass}"; | |
| 1071 } | |
| 1072 | |
| 1073 $ComplexityDescriptorsString .= "; FunctionalClassesToUse: <" . TextUtil::JoinWords(\@{$This->{FunctionalClassesToUse}}, ", ", 0) . ">"; | |
| 1074 $ComplexityDescriptorsString .= "; FunctionalClassesOrder: <" . TextUtil::JoinWords(\@FunctionalClassesOrder, ", ", 0) . ">"; | |
| 1075 $ComplexityDescriptorsString .= "; AvailableFunctionalClasses: <" . TextUtil::JoinWords(\@FunctionalClasses, ", ", 0) . ">"; | |
| 1076 } | |
| 1077 } | |
| 1078 return $ComplexityDescriptorsString; | |
| 1079 } | |
| 1080 | |
| 1081 # Is it a MolecularComplexityDescriptors object? | |
| 1082 sub _IsMolecularComplexityDescriptors { | |
| 1083 my($Object) = @_; | |
| 1084 | |
| 1085 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0; | |
| 1086 } | |
| 1087 | |
| 1088 1; | |
| 1089 | |
| 1090 __END__ | |
| 1091 | |
| 1092 =head1 NAME | |
| 1093 | |
| 1094 MolecularComplexityDescriptors | |
| 1095 | |
| 1096 =head1 SYNOPSIS | |
| 1097 | |
| 1098 use MolecularDescriptors::MolecularComplexityDescriptors; | |
| 1099 | |
| 1100 use MolecularDescriptors::MolecularComplexityDescriptors qw(:all); | |
| 1101 | |
| 1102 =head1 DESCRIPTION | |
| 1103 | |
| 1104 B<MolecularComplexityDescriptors> class provides the following methods: | |
| 1105 | |
| 1106 new, GenerateDescriptors, GetDescriptorNames, | |
| 1107 GetMolecularComplexityTypeAbbreviation, MACCSKeysSize, SetAtomIdentifierType, | |
| 1108 SetAtomicInvariantsToUse, SetDistanceBinSize, SetFunctionalClassesToUse, | |
| 1109 SetMaxDistance, SetMaxPathLength, SetMinDistance, SetMinPathLength, | |
| 1110 SetMolecularComplexityType, SetNeighborhoodRadius, SetNormalizationMethodology, | |
| 1111 StringifyMolecularComplexityDescriptors | |
| 1112 | |
| 1113 B<MolecularComplexityDescriptors> is derived from B<MolecularDescriptors> class which in turn | |
| 1114 is derived from B<ObjectProperty> base class that provides methods not explicitly defined | |
| 1115 in B<MolecularComplexityDescriptors>, B<MolecularDescriptors> or B<ObjectProperty> classes using Perl's | |
| 1116 AUTOLOAD functionality. These methods are generated on-the-fly for a specified object property: | |
| 1117 | |
| 1118 Set<PropertyName>(<PropertyValue>); | |
| 1119 $PropertyValue = Get<PropertyName>(); | |
| 1120 Delete<PropertyName>(); | |
| 1121 | |
| 1122 The current release of MayaChemTools supports calculation of molecular complexity using | |
| 1123 I<MolecularComplexityType> parameter corresponding to number of bits-set or unique | |
| 1124 keys [ Ref 117-119 ] in molecular fingerprints. The valid values for I<MolecularComplexityType> | |
| 1125 are: | |
| 1126 | |
| 1127 AtomTypesFingerprints | |
| 1128 ExtendedConnectivityFingerprints | |
| 1129 MACCSKeys | |
| 1130 PathLengthFingerprints | |
| 1131 TopologicalAtomPairsFingerprints | |
| 1132 TopologicalAtomTripletsFingerprints | |
| 1133 TopologicalAtomTorsionsFingerprints | |
| 1134 TopologicalPharmacophoreAtomPairsFingerprints | |
| 1135 TopologicalPharmacophoreAtomTripletsFingerprints | |
| 1136 | |
| 1137 Default value for I<MolecularComplexityType>: I<MACCSKeys>. | |
| 1138 | |
| 1139 I<AtomIdentifierType> parameter name corresponds to atom types used during generation of | |
| 1140 fingerprints. The valid values for I<AtomIdentifierType> are: I<AtomicInvariantsAtomTypes, | |
| 1141 DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, | |
| 1142 SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes>. I<AtomicInvariantsAtomTypes> | |
| 1143 is not supported for following values of I<MolecularComplexityType>: I<MACCSKeys, | |
| 1144 TopologicalPharmacophoreAtomPairsFingerprints, TopologicalPharmacophoreAtomTripletsFingerprints>. | |
| 1145 I<FunctionalClassAtomTypes> is the only valid value of I<AtomIdentifierType> for topological | |
| 1146 pharmacophore fingerprints. | |
| 1147 | |
| 1148 Default value for I<AtomIdentifierType>: I<AtomicInvariantsAtomTypes> for all fingerprints; | |
| 1149 I<FunctionalClassAtomTypes> for topological pharmacophore fingerprints. | |
| 1150 | |
| 1151 I<AtomicInvariantsToUse> parameter name and values are used during I<AtomicInvariantsAtomTypes> | |
| 1152 value of parameter I<AtomIdentifierType>. It's a list of space separated valid atomic invariant atom types. | |
| 1153 | |
| 1154 Possible values for atomic invariants are: I<AS, X, BO, LBO, SB, DB, TB, H, Ar, RA, FC, MN, SM>. | |
| 1155 Default value for I<AtomicInvariantsToUse> parameter are set differently for different fingerprints | |
| 1156 using I<MolecularComplexityType> parameter as shown below: | |
| 1157 | |
| 1158 MolecularComplexityType AtomicInvariantsToUse | |
| 1159 | |
| 1160 AtomTypesFingerprints AS X BO H FC | |
| 1161 TopologicalAtomPairsFingerprints AS X BO H FC | |
| 1162 TopologicalAtomTripletsFingerprints AS X BO H FC | |
| 1163 TopologicalAtomTorsionsFingerprints AS X BO H FC | |
| 1164 | |
| 1165 ExtendedConnectivityFingerprints AS X BO H FC MN | |
| 1166 PathLengthFingerprints AS | |
| 1167 | |
| 1168 I<FunctionalClassesToUse> parameter name and values are used during I<FunctionalClassAtomTypes> | |
| 1169 value of parameter I<AtomIdentifierType>. It's a list of space separated valid atomic invariant atom types. | |
| 1170 | |
| 1171 Possible values for atom functional classes are: I<Ar, CA, H, HBA, HBD, Hal, NI, PI, RA>. | |
| 1172 | |
| 1173 Default value for I<FunctionalClassesToUse> parameter is set to: | |
| 1174 | |
| 1175 HBD HBA PI NI Ar Hal | |
| 1176 | |
| 1177 for all fingerprints except for the following two I<MolecularComplexityType> fingerints: | |
| 1178 | |
| 1179 MolecularComplexityType FunctionalClassesToUse | |
| 1180 | |
| 1181 TopologicalPharmacophoreAtomPairsFingerprints HBD HBA P, NI H | |
| 1182 TopologicalPharmacophoreAtomTripletsFingerprints HBD HBA PI NI H Ar | |
| 1183 | |
| 1184 I<MACCSKeysSize> parameter name is only used during I<MACCSKeys> value of | |
| 1185 I<MolecularComplexityType> and corresponds to size of MACCS key set. Possible | |
| 1186 values: I<166 or 322>. Default value: I<166>. | |
| 1187 | |
| 1188 I<NeighborhoodRadius> parameter name is only used during I<ExtendedConnectivityFingerprints> | |
| 1189 value of I<MolecularComplexityType> and corresponds to atomic neighborhoods radius for | |
| 1190 generating extended connectivity fingerprints. Possible values: positive integer. Default value: | |
| 1191 I<2>. | |
| 1192 | |
| 1193 I<MinPathLength> and I<MaxPathLength> parameters are only used during I<PathLengthFingerprints> | |
| 1194 value of I<MolecularComplexityType> and correspond to minimum and maximum path lengths to use | |
| 1195 for generating path length fingerprints. Possible values: positive integers. Default value: I<MinPathLength - 1>; | |
| 1196 I<MaxPathLength - 8>. | |
| 1197 | |
| 1198 I<UseBondSymbols> parameter is only used during I<PathLengthFingerprints> value of | |
| 1199 I<MolecularComplexityType> and indicates whether bond symbols are included in atom path | |
| 1200 strings used to generate path length fingerprints. Possible value: I<Yes or No>. Default value: | |
| 1201 I<Yes>. | |
| 1202 | |
| 1203 I<MinDistance> and I<MaxDistance> parameters are only used during I<TopologicalAtomPairsFingerprints> | |
| 1204 and I<TopologicalAtomTripletsFingerprints> values of I<MolecularComplexityType> and correspond to | |
| 1205 minimum and maximum bond distance between atom pairs during topological pharmacophore fingerprints. | |
| 1206 Possible values: positive integers. Default value: I<MinDistance - 1>; I<MaxDistance - 10>. | |
| 1207 | |
| 1208 I<UseTriangleInequality> parameter is used during these values for I<MolecularComplexityType>: | |
| 1209 I<TopologicalAtomTripletsFingerprints> and I<TopologicalPharmacophoreAtomTripletsFingerprints>. | |
| 1210 Possible values: I<Yes or No>. It determines wheter to apply triangle inequality to distance triplets. | |
| 1211 Default value: I<TopologicalAtomTripletsFingerprints - No>; | |
| 1212 I<TopologicalPharmacophoreAtomTripletsFingerprints - Yes>. | |
| 1213 | |
| 1214 I<DistanceBinSize> parameter is used during I<TopologicalPharmacophoreAtomTripletsFingerprints> | |
| 1215 value of I<MolecularComplexityType> and corresponds to distance bin size used for binning | |
| 1216 distances during generation of topological pharmacophore atom triplets fingerprints. Possible | |
| 1217 value: positive integer. Default value: I<2>. | |
| 1218 | |
| 1219 I<NormalizationMethodology> is only used for these values for I<MolecularComplexityType>: | |
| 1220 I<ExtendedConnectivityFingerprints>, I<TopologicalPharmacophoreAtomPairsFingerprints> | |
| 1221 and I<TopologicalPharmacophoreAtomTripletsFingerprints>. It corresponds to normalization | |
| 1222 methodology to use for scaling the number of bits-set or unique keys during generation of | |
| 1223 fingerprints. Possible values during I<ExtendedConnectivityFingerprints>: I<None or | |
| 1224 ByHeavyAtomsCount>; Default value: I<None>. Possible values during topological | |
| 1225 pharmacophore atom pairs and triplets fingerprints: I<None or ByPossibleKeysCount>; | |
| 1226 Default value: I<None>. I<ByPossibleKeysCount> corresponds to total number of | |
| 1227 possible topological pharmacophore atom pairs or triplets in a molecule. | |
| 1228 | |
| 1229 =head2 METHODS | |
| 1230 | |
| 1231 =over 4 | |
| 1232 | |
| 1233 =item B<new> | |
| 1234 | |
| 1235 $NewMolecularComplexityDescriptors = new MolecularDescriptors:: | |
| 1236 MolecularComplexityDescriptors( | |
| 1237 %NamesAndValues); | |
| 1238 | |
| 1239 Using specified I<MolecularComplexityDescriptors> property names and values hash, B<new> | |
| 1240 method creates a new object and returns a reference to newly created B<MolecularComplexityDescriptors> | |
| 1241 object. By default, the following properties are initialized: | |
| 1242 | |
| 1243 Molecule = '' | |
| 1244 Type = 'MolecularComplexity' | |
| 1245 MolecularComplexityType = 'MACCSKeys' | |
| 1246 AtomIdentifierType = '' | |
| 1247 MACCSKeysSize = 166 | |
| 1248 NeighborhoodRadius = 2 | |
| 1249 MinPathLength = 1 | |
| 1250 MaxPathLength = 8 | |
| 1251 UseBondSymbols = 1 | |
| 1252 MinDistance = 1 | |
| 1253 MaxDistance = 10 | |
| 1254 UseTriangleInequality = '' | |
| 1255 DistanceBinSize = 2 | |
| 1256 NormalizationMethodology = 'None' | |
| 1257 @DescriptorNames = ('MolecularComplexity') | |
| 1258 @DescriptorValues = ('None') | |
| 1259 | |
| 1260 Examples: | |
| 1261 | |
| 1262 $MolecularComplexityDescriptors = new MolecularDescriptors:: | |
| 1263 MolecularComplexityDescriptors( | |
| 1264 'Molecule' => $Molecule); | |
| 1265 | |
| 1266 $MolecularComplexityDescriptors = new MolecularDescriptors:: | |
| 1267 MolecularComplexityDescriptors(); | |
| 1268 | |
| 1269 $MolecularComplexityDescriptors->SetMolecule($Molecule); | |
| 1270 $MolecularComplexityDescriptors->GenerateDescriptors(); | |
| 1271 print "MolecularComplexityDescriptors: $MolecularComplexityDescriptors\n"; | |
| 1272 | |
| 1273 | |
| 1274 =item B<GenerateDescriptors> | |
| 1275 | |
| 1276 $MolecularComplexityDescriptors->GenerateDescriptors(); | |
| 1277 | |
| 1278 Calculates MolecularComplexity value for a molecule and returns I<MolecularComplexityDescriptors>. | |
| 1279 | |
| 1280 =item B<GetDescriptorNames> | |
| 1281 | |
| 1282 @DescriptorNames = $MolecularComplexityDescriptors->GetDescriptorNames(); | |
| 1283 @DescriptorNames = MolecularDescriptors::MolecularComplexityDescriptors:: | |
| 1284 GetDescriptorNames(); | |
| 1285 | |
| 1286 Returns all available descriptor names as an array. | |
| 1287 | |
| 1288 =item B<GetMolecularComplexityTypeAbbreviation> | |
| 1289 | |
| 1290 $Abbrev = $MolecularComplexityDescriptors-> | |
| 1291 GetMolecularComplexityTypeAbbreviation(); | |
| 1292 $Abbrev = MolecularDescriptors::MolecularComplexityDescriptors:: | |
| 1293 GetMolecularComplexityTypeAbbreviation($ComplexityType); | |
| 1294 | |
| 1295 Returns abbreviation for a specified molecular complexity type or corresponding to | |
| 1296 I<MolecularComplexityDescriptors> object. | |
| 1297 | |
| 1298 =item B<SetMACCSKeysSize> | |
| 1299 | |
| 1300 $MolecularComplexityDescriptors->MACCSKeysSize($Size); | |
| 1301 | |
| 1302 Sets MACCS keys size and returns I<MolecularComplexityDescriptors>. | |
| 1303 | |
| 1304 =item B<SetAtomIdentifierType> | |
| 1305 | |
| 1306 $MolecularComplexityDescriptors->SetAtomIdentifierType($IdentifierType); | |
| 1307 | |
| 1308 Sets atom I<IdentifierType> to use during fingerprints generation corresponding to | |
| 1309 I<MolecularComplexityType> and returns I<MolecularComplexityDescriptors>. | |
| 1310 | |
| 1311 Possible values: I<AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, | |
| 1312 FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, | |
| 1313 TPSAAtomTypes, UFFAtomTypes>. | |
| 1314 | |
| 1315 =item B<SetAtomicInvariantsToUse> | |
| 1316 | |
| 1317 $MolecularComplexityDescriptors->SetAtomicInvariantsToUse($ValuesRef); | |
| 1318 $MolecularComplexityDescriptors->SetAtomicInvariantsToUse(@Values); | |
| 1319 | |
| 1320 Sets atomic invariants to use during I<AtomicInvariantsAtomTypes> value of I<AtomIdentifierType> | |
| 1321 for fingerprints generation and returns I<MolecularComplexityDescriptors>. | |
| 1322 | |
| 1323 Possible values for atomic invariants are: I<AS, X, BO, LBO, SB, DB, TB, | |
| 1324 H, Ar, RA, FC, MN, SM>. Default value [ Ref 24 ]: I<AS,X,BO,H,FC,MN>. | |
| 1325 | |
| 1326 The atomic invariants abbreviations correspond to: | |
| 1327 | |
| 1328 AS = Atom symbol corresponding to element symbol | |
| 1329 | |
| 1330 X<n> = Number of non-hydrogen atom neighbors or heavy atoms | |
| 1331 BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms | |
| 1332 LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms | |
| 1333 SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms | |
| 1334 DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms | |
| 1335 TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms | |
| 1336 H<n> = Number of implicit and explicit hydrogens for atom | |
| 1337 Ar = Aromatic annotation indicating whether atom is aromatic | |
| 1338 RA = Ring atom annotation indicating whether atom is a ring | |
| 1339 FC<+n/-n> = Formal charge assigned to atom | |
| 1340 MN<n> = Mass number indicating isotope other than most abundant isotope | |
| 1341 SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or | |
| 1342 3 (triplet) | |
| 1343 | |
| 1344 Atom type generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to: | |
| 1345 | |
| 1346 AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n> | |
| 1347 | |
| 1348 Except for AS which is a required atomic invariant in atom types, all other atomic invariants are | |
| 1349 optional. Atom type specification doesn't include atomic invariants with zero or undefined values. | |
| 1350 | |
| 1351 In addition to usage of abbreviations for specifying atomic invariants, the following descriptive words | |
| 1352 are also allowed: | |
| 1353 | |
| 1354 X : NumOfNonHydrogenAtomNeighbors or NumOfHeavyAtomNeighbors | |
| 1355 BO : SumOfBondOrdersToNonHydrogenAtoms or SumOfBondOrdersToHeavyAtoms | |
| 1356 LBO : LargestBondOrderToNonHydrogenAtoms or LargestBondOrderToHeavyAtoms | |
| 1357 SB : NumOfSingleBondsToNonHydrogenAtoms or NumOfSingleBondsToHeavyAtoms | |
| 1358 DB : NumOfDoubleBondsToNonHydrogenAtoms or NumOfDoubleBondsToHeavyAtoms | |
| 1359 TB : NumOfTripleBondsToNonHydrogenAtoms or NumOfTripleBondsToHeavyAtoms | |
| 1360 H : NumOfImplicitAndExplicitHydrogens | |
| 1361 Ar : Aromatic | |
| 1362 RA : RingAtom | |
| 1363 FC : FormalCharge | |
| 1364 MN : MassNumber | |
| 1365 SM : SpinMultiplicity | |
| 1366 | |
| 1367 I<AtomTypes::AtomicInvariantsAtomTypes> module is used to assign atomic invariant | |
| 1368 atom types. | |
| 1369 | |
| 1370 =item B<SetDistanceBinSize> | |
| 1371 | |
| 1372 $MolecularComplexityDescriptors->SetDistanceBinSize($BinSize); | |
| 1373 | |
| 1374 Sets distance bin size used to bin distances between atom pairs in atom triplets for | |
| 1375 topological pharmacophore atom triplets fingerprints generation and returns | |
| 1376 I<MolecularComplexityDescriptors>. | |
| 1377 | |
| 1378 =item B<SetFunctionalClassesToUse> | |
| 1379 | |
| 1380 $MolecularComplexityDescriptors->SetFunctionalClassesToUse($ValuesRef); | |
| 1381 $MolecularComplexityDescriptors->SetFunctionalClassesToUse(@Values); | |
| 1382 | |
| 1383 Sets functional classes invariants to use during I<FunctionalClassAtomTypes> value of I<AtomIdentifierType> | |
| 1384 for fingerprints generation and returns I<MolecularComplexityDescriptors>. | |
| 1385 | |
| 1386 Possible values for atom functional classes are: I<Ar, CA, H, HBA, HBD, Hal, NI, PI, RA>. | |
| 1387 Default value [ Ref 24 ]: I<HBD,HBA,PI,NI,Ar,Hal>. | |
| 1388 | |
| 1389 The functional class abbreviations correspond to: | |
| 1390 | |
| 1391 HBD: HydrogenBondDonor | |
| 1392 HBA: HydrogenBondAcceptor | |
| 1393 PI : PositivelyIonizable | |
| 1394 NI : NegativelyIonizable | |
| 1395 Ar : Aromatic | |
| 1396 Hal : Halogen | |
| 1397 H : Hydrophobic | |
| 1398 RA : RingAtom | |
| 1399 CA : ChainAtom | |
| 1400 | |
| 1401 Functional class atom type specification for an atom corresponds to: | |
| 1402 | |
| 1403 Ar.CA.H.HBA.HBD.Hal.NI.PI.RA or None | |
| 1404 | |
| 1405 I<AtomTypes::FunctionalClassAtomTypes> module is used to assign functional class atom | |
| 1406 types. It uses following definitions [ Ref 60-61, Ref 65-66 ]: | |
| 1407 | |
| 1408 HydrogenBondDonor: NH, NH2, OH | |
| 1409 HydrogenBondAcceptor: N[!H], O | |
| 1410 PositivelyIonizable: +, NH2 | |
| 1411 NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH | |
| 1412 | |
| 1413 =item B<SetMaxDistance> | |
| 1414 | |
| 1415 $MolecularComplexityDescriptors->SetMaxDistance($MaxDistance); | |
| 1416 | |
| 1417 Sets maximum distance to use during topological atom pairs and triplets fingerprints | |
| 1418 generation and returns I<MolecularComplexityDescriptors>. | |
| 1419 | |
| 1420 =item B<SetMaxPathLength> | |
| 1421 | |
| 1422 $MolecularComplexityDescriptors->SetMaxPathLength($Length); | |
| 1423 | |
| 1424 Sets maximum path length to use during path length fingerprints generation and returns | |
| 1425 I<MolecularComplexityDescriptors>. | |
| 1426 | |
| 1427 =item B<SetMinDistance> | |
| 1428 | |
| 1429 $MolecularComplexityDescriptors->SetMinDistance($MinDistance); | |
| 1430 | |
| 1431 Sets minimum distance to use during topological atom pairs and triplets fingerprints | |
| 1432 generation and returns I<MolecularComplexityDescriptors>. | |
| 1433 | |
| 1434 =item B<SetMinPathLength> | |
| 1435 | |
| 1436 $MolecularComplexityDescriptors->SetMinPathLength($MinPathLength); | |
| 1437 | |
| 1438 Sets minimum path length to use during path length fingerprints generation and returns | |
| 1439 I<MolecularComplexityDescriptors>. | |
| 1440 | |
| 1441 =item B<SetMolecularComplexityType> | |
| 1442 | |
| 1443 $MolecularComplexityDescriptors->SetMolecularComplexityType($ComplexityType); | |
| 1444 | |
| 1445 Sets molecular complexity type to use for calculating its value and returns | |
| 1446 I<MolecularComplexityDescriptors>. | |
| 1447 | |
| 1448 =item B<SetNeighborhoodRadius> | |
| 1449 | |
| 1450 $MolecularComplexityDescriptors->SetNeighborhoodRadius($Radius); | |
| 1451 | |
| 1452 Sets neighborhood radius to use during extended connectivity fingerprints generation and | |
| 1453 returns I<MolecularComplexityDescriptors>. | |
| 1454 | |
| 1455 =item B<SetNormalizationMethodology> | |
| 1456 | |
| 1457 $MolecularComplexityDescriptors->SetNormalizationMethodology($Methodology); | |
| 1458 | |
| 1459 Sets normalization methodology to use during calculation of molecular complexity | |
| 1460 corresponding to extended connectivity, topological pharmacophore atom pairs and | |
| 1461 tripletes fingerprints returns I<MolecularComplexityDescriptors>. | |
| 1462 | |
| 1463 =item B<StringifyMolecularComplexityDescriptors> | |
| 1464 | |
| 1465 $String = $MolecularComplexityDescriptors-> | |
| 1466 StringifyMolecularComplexityDescriptors(); | |
| 1467 | |
| 1468 Returns a string containing information about I<MolecularComplexityDescriptors> object. | |
| 1469 | |
| 1470 =back | |
| 1471 | |
| 1472 =head1 AUTHOR | |
| 1473 | |
| 1474 Manish Sud <msud@san.rr.com> | |
| 1475 | |
| 1476 =head1 SEE ALSO | |
| 1477 | |
| 1478 MolecularDescriptors.pm, MolecularDescriptorsGenerator.pm | |
| 1479 | |
| 1480 =head1 COPYRIGHT | |
| 1481 | |
| 1482 Copyright (C) 2015 Manish Sud. All rights reserved. | |
| 1483 | |
| 1484 This file is part of MayaChemTools. | |
| 1485 | |
| 1486 MayaChemTools is free software; you can redistribute it and/or modify it under | |
| 1487 the terms of the GNU Lesser General Public License as published by the Free | |
| 1488 Software Foundation; either version 3 of the License, or (at your option) | |
| 1489 any later version. | |
| 1490 | |
| 1491 =cut |
