Mercurial > repos > deepakjadmin > mayatool3_test2
comparison lib/Fingerprints/AtomNeighborhoodsFingerprints.pm @ 0:4816e4a8ae95 draft default tip
Uploaded
| author | deepakjadmin |
|---|---|
| date | Wed, 20 Jan 2016 09:23:18 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:4816e4a8ae95 |
|---|---|
| 1 package Fingerprints::AtomNeighborhoodsFingerprints; | |
| 2 # | |
| 3 # $RCSfile: AtomNeighborhoodsFingerprints.pm,v $ | |
| 4 # $Date: 2015/02/28 20:48:53 $ | |
| 5 # $Revision: 1.27 $ | |
| 6 # | |
| 7 # Author: Manish Sud <msud@san.rr.com> | |
| 8 # | |
| 9 # Copyright (C) 2015 Manish Sud. All rights reserved. | |
| 10 # | |
| 11 # This file is part of MayaChemTools. | |
| 12 # | |
| 13 # MayaChemTools is free software; you can redistribute it and/or modify it under | |
| 14 # the terms of the GNU Lesser General Public License as published by the Free | |
| 15 # Software Foundation; either version 3 of the License, or (at your option) any | |
| 16 # later version. | |
| 17 # | |
| 18 # MayaChemTools is distributed in the hope that it will be useful, but without | |
| 19 # any warranty; without even the implied warranty of merchantability of fitness | |
| 20 # for a particular purpose. See the GNU Lesser General Public License for more | |
| 21 # details. | |
| 22 # | |
| 23 # You should have received a copy of the GNU Lesser General Public License | |
| 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or | |
| 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, | |
| 26 # Boston, MA, 02111-1307, USA. | |
| 27 # | |
| 28 | |
| 29 use strict; | |
| 30 use Carp; | |
| 31 use Exporter; | |
| 32 use Fingerprints::Fingerprints; | |
| 33 use TextUtil (); | |
| 34 use Molecule; | |
| 35 use AtomTypes::AtomicInvariantsAtomTypes; | |
| 36 use AtomTypes::DREIDINGAtomTypes; | |
| 37 use AtomTypes::EStateAtomTypes; | |
| 38 use AtomTypes::FunctionalClassAtomTypes; | |
| 39 use AtomTypes::MMFF94AtomTypes; | |
| 40 use AtomTypes::SLogPAtomTypes; | |
| 41 use AtomTypes::SYBYLAtomTypes; | |
| 42 use AtomTypes::TPSAAtomTypes; | |
| 43 use AtomTypes::UFFAtomTypes; | |
| 44 | |
| 45 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); | |
| 46 | |
| 47 @ISA = qw(Fingerprints::Fingerprints Exporter); | |
| 48 @EXPORT = qw(); | |
| 49 @EXPORT_OK = qw(); | |
| 50 | |
| 51 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); | |
| 52 | |
| 53 # Setup class variables... | |
| 54 my($ClassName); | |
| 55 _InitializeClass(); | |
| 56 | |
| 57 # Overload Perl functions... | |
| 58 use overload '""' => 'StringifyAtomNeighborhoodsFingerprints'; | |
| 59 | |
| 60 # Class constructor... | |
| 61 sub new { | |
| 62 my($Class, %NamesAndValues) = @_; | |
| 63 | |
| 64 # Initialize object... | |
| 65 my $This = $Class->SUPER::new(); | |
| 66 bless $This, ref($Class) || $Class; | |
| 67 $This->_InitializeAtomNeighborhoodsFingerprints(); | |
| 68 | |
| 69 $This->_InitializeAtomNeighborhoodsFingerprintsProperties(%NamesAndValues); | |
| 70 | |
| 71 return $This; | |
| 72 } | |
| 73 | |
| 74 # Initialize object data... | |
| 75 # | |
| 76 sub _InitializeAtomNeighborhoodsFingerprints { | |
| 77 my($This) = @_; | |
| 78 | |
| 79 # Type of fingerprint... | |
| 80 $This->{Type} = 'AtomNeighborhoods'; | |
| 81 | |
| 82 # Type of vector... | |
| 83 $This->{VectorType} = 'FingerprintsVector'; | |
| 84 | |
| 85 # Type of FingerprintsVector... | |
| 86 $This->{FingerprintsVectorType} = 'AlphaNumericalValues'; | |
| 87 | |
| 88 # Minimum and maximum atomic neighborhoods radii... | |
| 89 $This->{MinNeighborhoodRadius} = 0; | |
| 90 $This->{MaxNeighborhoodRadius} = 2; | |
| 91 | |
| 92 # Atom identifier type to use for atom IDs in atom neighborhood atoms... | |
| 93 # | |
| 94 # Currently supported values are: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, | |
| 95 # EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, | |
| 96 # SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes | |
| 97 # | |
| 98 $This->{AtomIdentifierType} = ''; | |
| 99 | |
| 100 # Atom types assigned to each heavy atom... | |
| 101 %{$This->{AssignedAtomTypes}} = (); | |
| 102 | |
| 103 # Atom neighorhoods with in specified atom radii.. | |
| 104 %{$This->{AtomNeighborhoods}} = (); | |
| 105 | |
| 106 # Atom neighborhoods atom types count at different neighborhoods... | |
| 107 %{$This->{NeighborhoodAtomTypesCount}} = (); | |
| 108 | |
| 109 # Atom neighborhood identifiers using specified atom identifier types methodology... | |
| 110 @{$This->{AtomNeighborhoodsIdentifiers}} = (); | |
| 111 } | |
| 112 | |
| 113 # Initialize class ... | |
| 114 sub _InitializeClass { | |
| 115 #Class name... | |
| 116 $ClassName = __PACKAGE__; | |
| 117 } | |
| 118 | |
| 119 # Initialize object properties.... | |
| 120 sub _InitializeAtomNeighborhoodsFingerprintsProperties { | |
| 121 my($This, %NamesAndValues) = @_; | |
| 122 | |
| 123 my($Name, $Value, $MethodName); | |
| 124 while (($Name, $Value) = each %NamesAndValues) { | |
| 125 $MethodName = "Set${Name}"; | |
| 126 $This->$MethodName($Value); | |
| 127 } | |
| 128 | |
| 129 # Make sure molecule object was specified... | |
| 130 if (!exists $NamesAndValues{Molecule}) { | |
| 131 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule..."; | |
| 132 } | |
| 133 if (exists $NamesAndValues{Size}) { | |
| 134 croak "Error: ${ClassName}->New: Object can't be instantiated with a user specified size: It's an arbitrary length vector..."; | |
| 135 } | |
| 136 if (!exists $NamesAndValues{AtomIdentifierType}) { | |
| 137 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying AtomIdentifierType..."; | |
| 138 } | |
| 139 | |
| 140 $This->_InitializeFingerprintsVector(); | |
| 141 | |
| 142 return $This; | |
| 143 } | |
| 144 | |
| 145 # Set atom identifier type.. | |
| 146 # | |
| 147 sub SetAtomIdentifierType { | |
| 148 my($This, $IdentifierType) = @_; | |
| 149 | |
| 150 if ($IdentifierType !~ /^(AtomicInvariantsAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|FunctionalClassAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { | |
| 151 croak "Error: ${ClassName}->SetAtomIdentifierType: Specified value, $IdentifierType, for AtomIdentifierType is not vaild. Supported types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, and UFFAtomTypes."; | |
| 152 } | |
| 153 | |
| 154 if ($This->{AtomIdentifierType}) { | |
| 155 croak "Error: ${ClassName}->SetAtomIdentifierType: Can't change intial atom identifier type: It's already set..."; | |
| 156 } | |
| 157 | |
| 158 $This->{AtomIdentifierType} = $IdentifierType; | |
| 159 | |
| 160 # Initialize atom identifier type information... | |
| 161 $This->_InitializeAtomIdentifierTypeInformation(); | |
| 162 | |
| 163 return $This; | |
| 164 } | |
| 165 | |
| 166 # Set minimum atom neighborhood radius... | |
| 167 # | |
| 168 sub SetMinNeighborhoodRadius { | |
| 169 my($This, $Value) = @_; | |
| 170 | |
| 171 if (!TextUtil::IsInteger($Value)) { | |
| 172 croak "Error: ${ClassName}->SetMinNeighborhoodRadius: MinNeighborhoodRadius value, $Value, is not valid: It must be an integer..."; | |
| 173 } | |
| 174 | |
| 175 if ($Value < 0 ) { | |
| 176 croak "Error: ${ClassName}->SetMinNeighborhoodRadius: MinNeighborhoodRadius value, $Value, is not valid: It must be >= 0..."; | |
| 177 } | |
| 178 $This->{MinNeighborhoodRadius} = $Value; | |
| 179 | |
| 180 return $This; | |
| 181 } | |
| 182 | |
| 183 # Set maximum atom neighborhood radius... | |
| 184 # | |
| 185 sub SetMaxNeighborhoodRadius { | |
| 186 my($This, $Value) = @_; | |
| 187 | |
| 188 if (!TextUtil::IsInteger($Value)) { | |
| 189 croak "Error: ${ClassName}->SetMaxNeighborhoodRadius: MaxNeighborhoodRadius value, $Value, is not valid: It must be an integer..."; | |
| 190 } | |
| 191 | |
| 192 if ($Value < 0 ) { | |
| 193 croak "Error: ${ClassName}->SetMaxNeighborhoodRadius: MaxNeighborhoodRadius value, $Value, is not valid: It must be >= 0..."; | |
| 194 } | |
| 195 $This->{MaxNeighborhoodRadius} = $Value; | |
| 196 | |
| 197 return $This; | |
| 198 } | |
| 199 | |
| 200 # Generate fingerprints description... | |
| 201 # | |
| 202 sub GetDescription { | |
| 203 my($This) = @_; | |
| 204 | |
| 205 # Is description explicity set? | |
| 206 if (exists $This->{Description}) { | |
| 207 return $This->{Description}; | |
| 208 } | |
| 209 | |
| 210 # Generate fingerprints description... | |
| 211 | |
| 212 return "$This->{Type}:$This->{AtomIdentifierType}:MinRadius$This->{MinNeighborhoodRadius}:MaxRadius$This->{MaxNeighborhoodRadius}"; | |
| 213 } | |
| 214 | |
| 215 # Generate atom neighborhood [ Ref 53-56, Ref 73 ] fingerprints... | |
| 216 # | |
| 217 # Methodology: | |
| 218 # . Assign atom types to all non-hydrogen atoms in the molecule | |
| 219 # . Get atom neighborhoods up to MaxNeighborhoodRadis | |
| 220 # . Count unqiue atom types at each neighborhood radii for all heavy atoms | |
| 221 # . Generate neighborhood identifiers for all neighborhoods around central | |
| 222 # heavy atom | |
| 223 # . Atom neighborhood identifier for a specific radii is generated using neighborhood | |
| 224 # radius, assigned atom type and its count as follows: | |
| 225 # | |
| 226 # NR<n>-<AtomType>-ATC<n> | |
| 227 # | |
| 228 # . Atom neighborhood identifier for a central atom at all specified radii is generated | |
| 229 # by concatenating neighborhood identifiers at each radii by colon: | |
| 230 # | |
| 231 # NR<n>-<AtomType>-ATC<n>:NR<n>-<AtomType>-ATC<n>: | |
| 232 # | |
| 233 # . Set final fingerprints as list of neighborhood atom indentifiers | |
| 234 # | |
| 235 sub GenerateFingerprints { | |
| 236 my($This) = @_; | |
| 237 | |
| 238 if ($This->{MinNeighborhoodRadius} > $This->{MaxNeighborhoodRadius}) { | |
| 239 croak "Error: ${ClassName}->GenerateFingerprints: No fingerpritns generated: MinLength, $This->{MinNeighborhoodRadius}, must be less than MaxLength, $This->{MaxNeighborhoodRadius}..."; | |
| 240 } | |
| 241 | |
| 242 # Cache appropriate molecule data... | |
| 243 $This->_SetupMoleculeDataCache(); | |
| 244 | |
| 245 # Assign atom types to all heavy atoms... | |
| 246 if (!$This->_AssignAtomTypes()) { | |
| 247 carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't assign valid $This->{AtomIdentifierType} to all atoms..."; | |
| 248 return $This; | |
| 249 } | |
| 250 | |
| 251 # Intialize atom neighborhoods information... | |
| 252 $This->_InitializeAtomNeighborhoods(); | |
| 253 | |
| 254 # Identify atom neighborhoods with in specified radii... | |
| 255 $This->_GetAtomNeighborhoods(); | |
| 256 | |
| 257 # Count atom neighborhoods atom types... | |
| 258 $This->_CountAtomNeighborhoodsAtomTypes(); | |
| 259 | |
| 260 # Genenerate atom neighborhood identifiers... | |
| 261 $This->_GenerateAtomNeighborhoodIdentifiers(); | |
| 262 | |
| 263 # Set final fingerprints... | |
| 264 $This->_SetFinalFingerprints(); | |
| 265 | |
| 266 # Clear cached molecule data... | |
| 267 $This->_ClearMoleculeDataCache(); | |
| 268 | |
| 269 return $This; | |
| 270 } | |
| 271 | |
| 272 # Assign appropriate atom types to all heavy atoms... | |
| 273 # | |
| 274 sub _AssignAtomTypes { | |
| 275 my($This) = @_; | |
| 276 my($SpecifiedAtomTypes, $Atom, $AtomID, $IgnoreHydrogens); | |
| 277 | |
| 278 %{$This->{AssignedAtomTypes}} = (); | |
| 279 $IgnoreHydrogens = 1; | |
| 280 | |
| 281 $SpecifiedAtomTypes = undef; | |
| 282 | |
| 283 IDENTIFIERTYPE: { | |
| 284 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { | |
| 285 $SpecifiedAtomTypes = new AtomTypes::AtomicInvariantsAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'AtomicInvariantsToUse' => $This->{AtomicInvariantsToUse}); | |
| 286 last IDENTIFIERTYPE; | |
| 287 } | |
| 288 | |
| 289 if ($This->{AtomIdentifierType} =~ /^DREIDINGAtomTypes$/i) { | |
| 290 $SpecifiedAtomTypes = new AtomTypes::DREIDINGAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
| 291 last IDENTIFIERTYPE; | |
| 292 } | |
| 293 | |
| 294 if ($This->{AtomIdentifierType} =~ /^EStateAtomTypes$/i) { | |
| 295 $SpecifiedAtomTypes = new AtomTypes::EStateAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
| 296 last IDENTIFIERTYPE; | |
| 297 } | |
| 298 | |
| 299 if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { | |
| 300 $SpecifiedAtomTypes = new AtomTypes::FunctionalClassAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'FunctionalClassesToUse' => $This->{FunctionalClassesToUse}); | |
| 301 last IDENTIFIERTYPE; | |
| 302 } | |
| 303 | |
| 304 if ($This->{AtomIdentifierType} =~ /^MMFF94AtomTypes$/i) { | |
| 305 $SpecifiedAtomTypes = new AtomTypes::MMFF94AtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
| 306 last IDENTIFIERTYPE; | |
| 307 } | |
| 308 | |
| 309 if ($This->{AtomIdentifierType} =~ /^SLogPAtomTypes$/i) { | |
| 310 $SpecifiedAtomTypes = new AtomTypes::SLogPAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
| 311 last IDENTIFIERTYPE; | |
| 312 } | |
| 313 if ($This->{AtomIdentifierType} =~ /^SYBYLAtomTypes$/i) { | |
| 314 $SpecifiedAtomTypes = new AtomTypes::SYBYLAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
| 315 last IDENTIFIERTYPE; | |
| 316 } | |
| 317 | |
| 318 if ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) { | |
| 319 $SpecifiedAtomTypes = new AtomTypes::TPSAAtomTypes('Molecule' => $This->{Molecule}, 'IgnorePhosphorus' => 0, 'IgnoreSulfur' => 0); | |
| 320 last IDENTIFIERTYPE; | |
| 321 } | |
| 322 | |
| 323 if ($This->{AtomIdentifierType} =~ /^UFFAtomTypes$/i) { | |
| 324 $SpecifiedAtomTypes = new AtomTypes::UFFAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
| 325 last IDENTIFIERTYPE; | |
| 326 } | |
| 327 | |
| 328 croak "Error: ${ClassName}->_AssignAtomTypes: Unknown atom indentifier type $This->{AtomIdentifierType}..."; | |
| 329 } | |
| 330 | |
| 331 # Assign atom types... | |
| 332 $SpecifiedAtomTypes->AssignAtomTypes(); | |
| 333 | |
| 334 # Make sure atom types assignment is successful... | |
| 335 if (!$SpecifiedAtomTypes->IsAtomTypesAssignmentSuccessful()) { | |
| 336 return undef; | |
| 337 } | |
| 338 | |
| 339 # Collect assigned atom types... | |
| 340 ATOM: for $Atom (@{$This->{Atoms}}) { | |
| 341 if ($Atom->IsHydrogen()) { | |
| 342 next ATOM; | |
| 343 } | |
| 344 $AtomID = $Atom->GetID(); | |
| 345 $This->{AssignedAtomTypes}{$AtomID} = $SpecifiedAtomTypes->GetAtomType($Atom); | |
| 346 } | |
| 347 | |
| 348 return $This; | |
| 349 } | |
| 350 | |
| 351 # Initialize topological atom pairs between specified distance range... | |
| 352 # | |
| 353 sub _InitializeAtomNeighborhoods { | |
| 354 my($This) = @_; | |
| 355 my($Radius); | |
| 356 | |
| 357 # Initialize atom neighborhood count information between specified radii... | |
| 358 %{$This->{NeighborhoodAtomTypesCount}} = (); | |
| 359 | |
| 360 for $Radius ($This->{MinNeighborhoodRadius} .. $This->{MaxNeighborhoodRadius}) { | |
| 361 %{$This->{NeighborhoodAtomTypesCount}{$Radius}} = (); | |
| 362 } | |
| 363 | |
| 364 # Initialize atom neighborhoods atoms information at all specified radii... | |
| 365 # | |
| 366 %{$This->{AtomNeighborhoods}} = (); | |
| 367 | |
| 368 for $Radius (0 .. $This->{MaxNeighborhoodRadius}) { | |
| 369 %{$This->{AtomNeighborhoods}{$Radius}} = (); | |
| 370 } | |
| 371 | |
| 372 return $This; | |
| 373 } | |
| 374 | |
| 375 # Collect atom neighborhoods upto maximum neighborhood radius... | |
| 376 # | |
| 377 # Notes: | |
| 378 # . Fingerprints are only generated for neighborhoods between specified minimum | |
| 379 # and maximum neighborhood radii. | |
| 380 # | |
| 381 sub _GetAtomNeighborhoods { | |
| 382 my($This) = @_; | |
| 383 my($Atom, $AtomID, $MaxRadius, $Radius, $Molecule); | |
| 384 | |
| 385 $MaxRadius = $This->{MaxNeighborhoodRadius}; | |
| 386 $Molecule = $This->GetMolecule(); | |
| 387 | |
| 388 # Collect atom neighborhoods... | |
| 389 | |
| 390 ATOM: for $Atom (@{$This->{Atoms}}) { | |
| 391 $AtomID = $Atom->GetID(); | |
| 392 $Radius = 0; | |
| 393 | |
| 394 if ($MaxRadius == 0) { | |
| 395 # Atom is its own neighborhood at 0 radius... | |
| 396 my(@AtomNeighborhoodsAtoms); | |
| 397 | |
| 398 @AtomNeighborhoodsAtoms = ($Atom); | |
| 399 $This->{AtomNeighborhoods}{$Radius}{$AtomID} = \@AtomNeighborhoodsAtoms; | |
| 400 | |
| 401 next ATOM; | |
| 402 } | |
| 403 | |
| 404 # Collect available atom neighborhoods at different neighborhood radii levels... | |
| 405 my($AtomNeighborhoodAtomsRef); | |
| 406 | |
| 407 for $AtomNeighborhoodAtomsRef ($Molecule->GetAtomNeighborhoodsWithRadiusUpto($Atom, $MaxRadius)) { | |
| 408 $This->{AtomNeighborhoods}{$Radius}{$AtomID} = $AtomNeighborhoodAtomsRef; | |
| 409 $Radius++; | |
| 410 } | |
| 411 } | |
| 412 return $This; | |
| 413 } | |
| 414 | |
| 415 # Count atom neighborhoods atom types for each non-hydrogen central atoms with | |
| 416 # neighborhoods in specified radii range... | |
| 417 # | |
| 418 sub _CountAtomNeighborhoodsAtomTypes { | |
| 419 my($This) = @_; | |
| 420 my($AtomID, $NeighborhoodAtomID, $Radius, $NeighborhoodAtom, $NeighborhoodAtomType, $AtomNeighborhoodAtomsRef); | |
| 421 | |
| 422 RADIUS: for $Radius (sort { $a <=> $b } keys %{$This->{AtomNeighborhoods}} ) { | |
| 423 if ($Radius < $This->{MinNeighborhoodRadius} || $Radius > $This->{MaxNeighborhoodRadius}) { | |
| 424 next RADIUS; | |
| 425 } | |
| 426 # Go over the neighborhoods of each atom at the current radius... | |
| 427 for $AtomID (keys %{$This->{AtomNeighborhoods}{$Radius}}) { | |
| 428 $AtomNeighborhoodAtomsRef = $This->{AtomNeighborhoods}{$Radius}{$AtomID}; | |
| 429 NEIGHBORHOODATOM: for $NeighborhoodAtom (@{$AtomNeighborhoodAtomsRef}) { | |
| 430 if ($NeighborhoodAtom->IsHydrogen()) { | |
| 431 next NEIGHBORHOODATOM; | |
| 432 } | |
| 433 $NeighborhoodAtomID = $NeighborhoodAtom->GetID(); | |
| 434 $NeighborhoodAtomType = $This->{AssignedAtomTypes}{$NeighborhoodAtomID}; | |
| 435 | |
| 436 # Count neighbothood atom types for each atom at different radii... | |
| 437 if (!exists $This->{NeighborhoodAtomTypesCount}{$Radius}{$AtomID}) { | |
| 438 %{$This->{NeighborhoodAtomTypesCount}{$Radius}{$AtomID}} = (); | |
| 439 } | |
| 440 if (exists $This->{NeighborhoodAtomTypesCount}{$Radius}{$AtomID}{$NeighborhoodAtomType}) { | |
| 441 $This->{NeighborhoodAtomTypesCount}{$Radius}{$AtomID}{$NeighborhoodAtomType} += 1; | |
| 442 } | |
| 443 else { | |
| 444 $This->{NeighborhoodAtomTypesCount}{$Radius}{$AtomID}{$NeighborhoodAtomType} = 1; | |
| 445 } | |
| 446 } | |
| 447 } | |
| 448 } | |
| 449 return $This; | |
| 450 } | |
| 451 | |
| 452 # Generate atom neighborhood identifiers for each non-hydrogen atom using atom | |
| 453 # neighborhood atom types and their count information... | |
| 454 # | |
| 455 # Let: | |
| 456 # NR<n> = Neighborhood radius | |
| 457 # AtomType = Assigned atom type | |
| 458 # ATC<n> = AtomType count | |
| 459 # | |
| 460 # Then: | |
| 461 # | |
| 462 # AtomNeighborhoodAtomIdentifier for a neighborhood atom generated for | |
| 463 # AtomTypes::AtomicInvariantsAtomTypes class corresponds to: | |
| 464 # | |
| 465 # NR<n>-<AtomType>-ATC<n> | |
| 466 # | |
| 467 # AtomNeighborhoodsIdentifier for all specified atom neighbothoods of an atom generated for | |
| 468 # AtomTypes::AtomicInvariantsAtomTypes class corresponds to: | |
| 469 # | |
| 470 # NR<n>-<AtomType>-ATC<n>;NR<n>-<AtomType>-ATC<n>;... | |
| 471 # | |
| 472 sub _GenerateAtomNeighborhoodIdentifiers { | |
| 473 my($This) = @_; | |
| 474 my($Atom, $AtomID, $Radius, $AtomType, $AtomTypeCount, $AtomNeighborhoodIdentifier, @AtomNeighborhoodIdentifiers); | |
| 475 | |
| 476 @{$This->{AtomNeighborhoodsIdentifiers}} = (); | |
| 477 | |
| 478 for $Atom (@{$This->{Atoms}}) { | |
| 479 $AtomID = $Atom->GetID(); | |
| 480 @AtomNeighborhoodIdentifiers = (); | |
| 481 RADIUS: for $Radius ($This->{MinNeighborhoodRadius} .. $This->{MaxNeighborhoodRadius}) { | |
| 482 if (!exists $This->{NeighborhoodAtomTypesCount}{$Radius}{$AtomID}) { | |
| 483 next RADIUS; | |
| 484 } | |
| 485 for $AtomType (sort keys %{$This->{NeighborhoodAtomTypesCount}{$Radius}{$AtomID}}) { | |
| 486 $AtomTypeCount = $This->{NeighborhoodAtomTypesCount}{$Radius}{$AtomID}{$AtomType}; | |
| 487 push @AtomNeighborhoodIdentifiers, "NR${Radius}-${AtomType}-ATC${AtomTypeCount}"; | |
| 488 } | |
| 489 } | |
| 490 $AtomNeighborhoodIdentifier = join(":", @AtomNeighborhoodIdentifiers); | |
| 491 push @{$This->{AtomNeighborhoodsIdentifiers}}, $AtomNeighborhoodIdentifier; | |
| 492 } | |
| 493 | |
| 494 return $This; | |
| 495 } | |
| 496 | |
| 497 # Set final fingerprits vector... | |
| 498 # | |
| 499 sub _SetFinalFingerprints { | |
| 500 my($This) = @_; | |
| 501 | |
| 502 # Mark successful generation of fingerprints... | |
| 503 $This->{FingerprintsGenerated} = 1; | |
| 504 | |
| 505 # Sort AtomNeighborhoodsIdentifiers.. | |
| 506 # | |
| 507 @{$This->{AtomNeighborhoodsIdentifiers}} = sort @{$This->{AtomNeighborhoodsIdentifiers}}; | |
| 508 | |
| 509 # Add sorted atom neighborhood identifiers to FingerprintsVector which is already defined | |
| 510 # during initialization containing AlphaNumericalValues... | |
| 511 # | |
| 512 $This->{FingerprintsVector}->AddValues(\@{$This->{AtomNeighborhoodsIdentifiers}}); | |
| 513 | |
| 514 return $This; | |
| 515 } | |
| 516 | |
| 517 # Cache appropriate molecule data... | |
| 518 # | |
| 519 sub _SetupMoleculeDataCache { | |
| 520 my($This) = @_; | |
| 521 | |
| 522 # Get all non-hydrogen atoms... | |
| 523 my($NegateAtomCheckMethod); | |
| 524 $NegateAtomCheckMethod = 1; | |
| 525 @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms("IsHydrogen", $NegateAtomCheckMethod); | |
| 526 | |
| 527 return $This; | |
| 528 } | |
| 529 | |
| 530 # Clear cached molecule data... | |
| 531 # | |
| 532 sub _ClearMoleculeDataCache { | |
| 533 my($This) = @_; | |
| 534 | |
| 535 @{$This->{Atoms}} = (); | |
| 536 | |
| 537 return $This; | |
| 538 } | |
| 539 | |
| 540 # Set atomic invariants to use for atom identifiers... | |
| 541 # | |
| 542 sub SetAtomicInvariantsToUse { | |
| 543 my($This, @Values) = @_; | |
| 544 my($FirstValue, $TypeOfFirstValue, $AtomicInvariant, $SpecifiedAtomicInvariant, $AtomicInvariantValue, @SpecifiedAtomicInvariants, @AtomicInvariantsToUse); | |
| 545 | |
| 546 if (!@Values) { | |
| 547 carp "Warning: ${ClassName}->SetAtomicInvariantsToUse: No values specified..."; | |
| 548 return; | |
| 549 } | |
| 550 | |
| 551 $FirstValue = $Values[0]; | |
| 552 $TypeOfFirstValue = ref $FirstValue; | |
| 553 | |
| 554 @SpecifiedAtomicInvariants = (); | |
| 555 @AtomicInvariantsToUse = (); | |
| 556 | |
| 557 if ($TypeOfFirstValue =~ /^ARRAY/) { | |
| 558 push @SpecifiedAtomicInvariants, @{$FirstValue}; | |
| 559 } | |
| 560 else { | |
| 561 push @SpecifiedAtomicInvariants, @Values; | |
| 562 } | |
| 563 | |
| 564 # Make sure specified AtomicInvariants are valid... | |
| 565 for $SpecifiedAtomicInvariant (@SpecifiedAtomicInvariants) { | |
| 566 if (!AtomTypes::AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($SpecifiedAtomicInvariant)) { | |
| 567 croak "Error: ${ClassName}->SetAtomicInvariantsToUse: Specified atomic invariant, $SpecifiedAtomicInvariant, is not supported...\n "; | |
| 568 } | |
| 569 $AtomicInvariant = $SpecifiedAtomicInvariant; | |
| 570 push @AtomicInvariantsToUse, $AtomicInvariant; | |
| 571 } | |
| 572 | |
| 573 # Set atomic invariants to use... | |
| 574 @{$This->{AtomicInvariantsToUse}} = (); | |
| 575 push @{$This->{AtomicInvariantsToUse}}, @AtomicInvariantsToUse; | |
| 576 | |
| 577 return $This; | |
| 578 } | |
| 579 | |
| 580 # Set functional classes to use for atom identifiers... | |
| 581 # | |
| 582 sub SetFunctionalClassesToUse { | |
| 583 my($This, @Values) = @_; | |
| 584 my($FirstValue, $TypeOfFirstValue, $FunctionalClass, $SpecifiedFunctionalClass, @SpecifiedFunctionalClasses, @FunctionalClassesToUse); | |
| 585 | |
| 586 if (!@Values) { | |
| 587 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: No values specified..."; | |
| 588 return; | |
| 589 } | |
| 590 | |
| 591 if ($This->{AtomIdentifierType} !~ /^FunctionalClassAtomTypes$/i) { | |
| 592 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: FunctionalClassesToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}..."; | |
| 593 return; | |
| 594 } | |
| 595 | |
| 596 $FirstValue = $Values[0]; | |
| 597 $TypeOfFirstValue = ref $FirstValue; | |
| 598 | |
| 599 @SpecifiedFunctionalClasses = (); | |
| 600 @FunctionalClassesToUse = (); | |
| 601 | |
| 602 if ($TypeOfFirstValue =~ /^ARRAY/) { | |
| 603 push @SpecifiedFunctionalClasses, @{$FirstValue}; | |
| 604 } | |
| 605 else { | |
| 606 push @SpecifiedFunctionalClasses, @Values; | |
| 607 } | |
| 608 | |
| 609 # Make sure specified FunctionalClasses are valid... | |
| 610 for $SpecifiedFunctionalClass (@SpecifiedFunctionalClasses) { | |
| 611 if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedFunctionalClass)) { | |
| 612 croak "Error: ${ClassName}->SetFunctionalClassesToUse: Specified functional class, $SpecifiedFunctionalClass, is not supported...\n "; | |
| 613 } | |
| 614 push @FunctionalClassesToUse, $SpecifiedFunctionalClass; | |
| 615 } | |
| 616 | |
| 617 # Set functional classes to use... | |
| 618 @{$This->{FunctionalClassesToUse}} = (); | |
| 619 push @{$This->{FunctionalClassesToUse}}, @FunctionalClassesToUse; | |
| 620 | |
| 621 return $This; | |
| 622 } | |
| 623 | |
| 624 # Initialize atom indentifier type information... | |
| 625 # | |
| 626 # Current supported values: | |
| 627 # | |
| 628 # AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, | |
| 629 # MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes | |
| 630 # | |
| 631 sub _InitializeAtomIdentifierTypeInformation { | |
| 632 my($This) = @_; | |
| 633 | |
| 634 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { | |
| 635 $This->_InitializeAtomicInvariantsAtomTypesInformation(); | |
| 636 } | |
| 637 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { | |
| 638 $This->_InitializeFunctionalClassAtomTypesInformation(); | |
| 639 } | |
| 640 elsif ($This->{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { | |
| 641 # Nothing to do for now... | |
| 642 } | |
| 643 else { | |
| 644 croak "Error: ${ClassName}->_InitializeAtomIdentifierTypeInformation: Unknown atom indentifier type $This->{AtomIdentifierType}..."; | |
| 645 } | |
| 646 | |
| 647 return $This; | |
| 648 } | |
| 649 | |
| 650 # Initialize atomic invariants atom types to use for generating atom identifiers... | |
| 651 # | |
| 652 # Let: | |
| 653 # AS = Atom symbol corresponding to element symbol | |
| 654 # | |
| 655 # X<n> = Number of non-hydrogen atom neighbors or heavy atoms attached to atom | |
| 656 # BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms attached to atom | |
| 657 # LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms attached to atom | |
| 658 # SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms attached to atom | |
| 659 # DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms attached to atom | |
| 660 # TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms attached to atom | |
| 661 # H<n> = Number of implicit and explicit hydrogens for atom | |
| 662 # Ar = Aromatic annotation indicating whether atom is aromatic | |
| 663 # RA = Ring atom annotation indicating whether atom is a ring | |
| 664 # FC<+n/-n> = Formal charge assigned to atom | |
| 665 # MN<n> = Mass number indicating isotope other than most abundant isotope | |
| 666 # SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or 3 (triplet) | |
| 667 # | |
| 668 # Then: | |
| 669 # | |
| 670 # Atom type generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to: | |
| 671 # | |
| 672 # AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n> | |
| 673 # | |
| 674 # Except for AS which is a required atomic invariant in atom types, all other atomic invariants are | |
| 675 # optional. Default atomic invariants used for AtomID are: AS, X<n>, BO<n>, H<n>, FC<+n/-n>. | |
| 676 # AtomID specification doesn't include atomic invariants with zero or undefined values. | |
| 677 # | |
| 678 sub _InitializeAtomicInvariantsAtomTypesInformation { | |
| 679 my($This) = @_; | |
| 680 | |
| 681 # Default atomic invariants to use for generating atom neighborhood atom IDs: AS, X, BO, H, FC | |
| 682 # | |
| 683 @{$This->{AtomicInvariantsToUse}} = (); | |
| 684 @{$This->{AtomicInvariantsToUse}} = ('AS', 'X', 'BO', 'H', 'FC'); | |
| 685 | |
| 686 return $This; | |
| 687 } | |
| 688 | |
| 689 # Initialize functional class atom types, generated by AtomTypes::FunctionalClassAtomTypes | |
| 690 # class, to use for generating atom identifiers... | |
| 691 # | |
| 692 # Let: | |
| 693 # HBD: HydrogenBondDonor | |
| 694 # HBA: HydrogenBondAcceptor | |
| 695 # PI : PositivelyIonizable | |
| 696 # NI : NegativelyIonizable | |
| 697 # Ar : Aromatic | |
| 698 # Hal : Halogen | |
| 699 # H : Hydrophobic | |
| 700 # RA : RingAtom | |
| 701 # CA : ChainAtom | |
| 702 # | |
| 703 # Then: | |
| 704 # | |
| 705 # Functiononal class atom type specification for an atom corresponds to: | |
| 706 # | |
| 707 # Ar.CA.H.HBA.HBD.Hal.NI.PI.RA | |
| 708 # | |
| 709 # Default functional classes used are: HBD, HBA, PI, NI, Ar, Hal | |
| 710 # | |
| 711 # FunctionalAtomTypes are assigned using the following definitions [ Ref 60-61, Ref 65-66 ]: | |
| 712 # | |
| 713 # HydrogenBondDonor: NH, NH2, OH | |
| 714 # HydrogenBondAcceptor: N[!H], O | |
| 715 # PositivelyIonizable: +, NH2 | |
| 716 # NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH | |
| 717 # | |
| 718 sub _InitializeFunctionalClassAtomTypesInformation { | |
| 719 my($This) = @_; | |
| 720 | |
| 721 # Default functional class atom typess to use for generating atom identifiers | |
| 722 # are: HBD, HBA, PI, NI, Ar, Hal | |
| 723 # | |
| 724 @{$This->{FunctionalClassesToUse}} = (); | |
| 725 @{$This->{FunctionalClassesToUse}} = ('HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal'); | |
| 726 | |
| 727 return $This; | |
| 728 } | |
| 729 | |
| 730 # Return a string containg data for AtomNeighborhoodsFingerprints object... | |
| 731 # | |
| 732 sub StringifyAtomNeighborhoodsFingerprints { | |
| 733 my($This) = @_; | |
| 734 my($FingerprintsString); | |
| 735 | |
| 736 # Type of fingerprint... | |
| 737 $FingerprintsString = "Fingerprint type: $This->{Type}; AtomIdentifierType: $This->{AtomIdentifierType}; MinNeighborhoodRadius: $This->{MinNeighborhoodRadius}; MaxNeighborhoodRadius: $This->{MaxNeighborhoodRadius}"; | |
| 738 | |
| 739 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { | |
| 740 my($AtomicInvariant, @AtomicInvariants, @AtomicInvariantsOrder, %AvailableAtomicInvariants); | |
| 741 | |
| 742 @AtomicInvariantsOrder = AtomTypes::AtomicInvariantsAtomTypes::GetAtomicInvariantsOrder(); | |
| 743 %AvailableAtomicInvariants = AtomTypes::AtomicInvariantsAtomTypes::GetAvailableAtomicInvariants(); | |
| 744 | |
| 745 for $AtomicInvariant (@AtomicInvariantsOrder) { | |
| 746 push @AtomicInvariants, "$AtomicInvariant: $AvailableAtomicInvariants{$AtomicInvariant}"; | |
| 747 } | |
| 748 | |
| 749 $FingerprintsString .= "; AtomicInvariantsToUse: <" . TextUtil::JoinWords(\@{$This->{AtomicInvariantsToUse}}, ", ", 0) . ">"; | |
| 750 $FingerprintsString .= "; AtomicInvariantsOrder: <" . TextUtil::JoinWords(\@AtomicInvariantsOrder, ", ", 0) . ">"; | |
| 751 $FingerprintsString .= "; AvailableAtomicInvariants: <" . TextUtil::JoinWords(\@AtomicInvariants, ", ", 0) . ">"; | |
| 752 } | |
| 753 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { | |
| 754 my($FunctionalClass, @FunctionalClasses, @FunctionalClassesOrder, %AvailableFunctionalClasses); | |
| 755 | |
| 756 @FunctionalClassesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder(); | |
| 757 %AvailableFunctionalClasses = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses(); | |
| 758 | |
| 759 for $FunctionalClass (@FunctionalClassesOrder) { | |
| 760 push @FunctionalClasses, "$FunctionalClass: $AvailableFunctionalClasses{$FunctionalClass}"; | |
| 761 } | |
| 762 | |
| 763 $FingerprintsString .= "; FunctionalClassesToUse: <" . TextUtil::JoinWords(\@{$This->{FunctionalClassesToUse}}, ", ", 0) . ">"; | |
| 764 $FingerprintsString .= "; FunctionalClassesOrder: <" . TextUtil::JoinWords(\@FunctionalClassesOrder, ", ", 0) . ">"; | |
| 765 $FingerprintsString .= "; AvailableFunctionalClasses: <" . TextUtil::JoinWords(\@FunctionalClasses, ", ", 0) . ">"; | |
| 766 } | |
| 767 | |
| 768 # Total number of atom neighborhood atom IDs... | |
| 769 $FingerprintsString .= "; NumOfAtomNeighborhoodAtomIdentifiers: " . $This->{FingerprintsVector}->GetNumOfValues(); | |
| 770 | |
| 771 # FingerprintsVector... | |
| 772 $FingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >"; | |
| 773 | |
| 774 return $FingerprintsString; | |
| 775 } | |
| 776 | |
| 777 1; | |
| 778 | |
| 779 __END__ | |
| 780 | |
| 781 =head1 NAME | |
| 782 | |
| 783 AtomNeighborhoodsFingerprints | |
| 784 | |
| 785 =head1 SYNOPSIS | |
| 786 | |
| 787 use Fingerprints::AtomNeighborhoodsFingerprints; | |
| 788 | |
| 789 use Fingerprints::AtomNeighborhoodsFingerprints qw(:all); | |
| 790 | |
| 791 =head1 DESCRIPTION | |
| 792 | |
| 793 B<AtomNeighborhoodsFingerprints> [ Ref 53-56, Ref 73 ] class provides the following methods: | |
| 794 | |
| 795 new, GenerateFingerprints, GetDescription, SetAtomIdentifierType, | |
| 796 SetAtomicInvariantsToUse, SetFunctionalClassesToUse, SetMaxNeighborhoodRadius, | |
| 797 SetMinNeighborhoodRadius, StringifyAtomNeighborhoodsFingerprints | |
| 798 | |
| 799 B<AtomNeighborhoodsFingerprints> is derived from B<Fingerprints> class which in turn | |
| 800 is derived from B<ObjectProperty> base class that provides methods not explicitly defined | |
| 801 in B<AtomNeighborhoodsFingerprints>, B<Fingerprints> or B<ObjectProperty> classes using Perl's | |
| 802 AUTOLOAD functionality. These methods are generated on-the-fly for a specified object property: | |
| 803 | |
| 804 Set<PropertyName>(<PropertyValue>); | |
| 805 $PropertyValue = Get<PropertyName>(); | |
| 806 Delete<PropertyName>(); | |
| 807 | |
| 808 The current release of MayaChemTools supports generation of B<AtomNeighborhoodsFingerprints> | |
| 809 corresponding to following B<AtomIdentifierTypes>: | |
| 810 | |
| 811 AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, | |
| 812 FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, | |
| 813 SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes | |
| 814 | |
| 815 Based on the values specified for B<AtomIdentifierType> along with other specified | |
| 816 sucb as B<AtomicInvariantsToUse> and B<FunctionalClassesToUse>, initial atom types are | |
| 817 assigned to all non-hydrogen atoms in a molecule. Using atom neighborhoods | |
| 818 around each non-hydrogen central atom corresponding to radii between specified values | |
| 819 B<MinNeighborhoodRadius> and B<MaxNeighborhoodRadius>, unique atom types at each radii | |
| 820 level are counted and an atom neighborhood identifier is generated. | |
| 821 | |
| 822 The format of an atom neighborhood identifier around a central non-hydrogen atom at a | |
| 823 specific radius is: | |
| 824 | |
| 825 NR<n>-<AtomType>-ATC<n> | |
| 826 | |
| 827 NR: Neighborhood radius | |
| 828 AtomType: Assigned atom type | |
| 829 ATC: Atom type count | |
| 830 | |
| 831 The atom neighborhood identifier for non-hydrogen central atom corresponding to all specified radii | |
| 832 is generated by concatenating neighborhood identifiers at each radii by colon as a delimiter: | |
| 833 | |
| 834 NR<n>-<AtomType>-ATC<n>:NR<n>-<AtomType>-ATC<n>:... | |
| 835 | |
| 836 The atom neighborhood identifiers for all non-hydrogen central atoms at all specified radii are | |
| 837 concatenated using space as a delimiter and constitute atom neighborhood fingerprint of the molecule. | |
| 838 | |
| 839 The current release of MayaChemTools generates the following types of atom neighborhoods | |
| 840 fingerprints vector strings: | |
| 841 | |
| 842 FingerprintsVector;AtomNeighborhoods:AtomicInvariantsAtomTypes:MinRadi | |
| 843 us0:MaxRadius2;41;AlphaNumericalValues;ValuesString;NR0-C.X1.BO1.H3-AT | |
| 844 C1:NR1-C.X3.BO3.H1-ATC1:NR2-C.X1.BO1.H3-ATC1:NR2-C.X3.BO4-ATC1 NR0-C.X | |
| 845 1.BO1.H3-ATC1:NR1-C.X3.BO3.H1-ATC1:NR2-C.X1.BO1.H3-ATC1:NR2-C.X3.BO4-A | |
| 846 TC1 NR0-C.X2.BO2.H2-ATC1:NR1-C.X2.BO2.H2-ATC1:NR1-C.X3.BO3.H1-ATC1:NR2 | |
| 847 -C.X2.BO2.H2-ATC1:NR2-N.X3.BO3-ATC1:NR2-O.X1.BO1.H1-ATC1 NR0-C.X2.B... | |
| 848 | |
| 849 FingerprintsVector;AtomNeighborhoods:DREIDINGAtomTypes:MinRadius0:MaxR | |
| 850 adius2;41;AlphaNumericalValues;ValuesString;NR0-C_2-ATC1:NR1-C_3-ATC1: | |
| 851 NR1-O_2-ATC1:NR1-O_3-ATC1:NR2-C_3-ATC1 NR0-C_2-ATC1:NR1-C_R-ATC1:NR1-N | |
| 852 _3-ATC1:NR1-O_2-ATC1:NR2-C_R-ATC3 NR0-C_3-ATC1:NR1-C_2-ATC1:NR1-C_3-AT | |
| 853 C1:NR2-C_3-ATC1:NR2-O_2-ATC1:NR2-O_3-ATC2 NR0-C_3-ATC1:NR1-C_3-ATC1:NR | |
| 854 1-N_R-ATC1:NR2-C_3-ATC1:NR2-C_R-ATC2 NR0-C_3-ATC1:NR1-C_3-ATC1:NR2-... | |
| 855 | |
| 856 FingerprintsVector;AtomNeighborhoods:EStateAtomTypes:MinRadius0:MaxRad | |
| 857 ius2;41;AlphaNumericalValues;ValuesString;NR0-aaCH-ATC1:NR1-aaCH-ATC1: | |
| 858 NR1-aasC-ATC1:NR2-aaCH-ATC1:NR2-aasC-ATC1:NR2-sF-ATC1 NR0-aaCH-ATC1:NR | |
| 859 1-aaCH-ATC1:NR1-aasC-ATC1:NR2-aaCH-ATC1:NR2-aasC-ATC1:NR2-sF-ATC1 NR0- | |
| 860 aaCH-ATC1:NR1-aaCH-ATC1:NR1-aasC-ATC1:NR2-aaCH-ATC1:NR2-aasC-ATC2 NR0- | |
| 861 aaCH-ATC1:NR1-aaCH-ATC1:NR1-aasC-ATC1:NR2-aaCH-ATC1:NR2-aasC-ATC2 N... | |
| 862 | |
| 863 FingerprintsVector;AtomNeighborhoods:FunctionalClassAtomTypes:MinRadiu | |
| 864 s0:MaxRadius2;41;AlphaNumericalValues;ValuesString;NR0-Ar-ATC1:NR1-Ar- | |
| 865 ATC1:NR1-Ar.HBA-ATC1:NR1-None-ATC1:NR2-Ar-ATC2:NR2-None-ATC4 NR0-Ar-AT | |
| 866 C1:NR1-Ar-ATC2:NR1-Ar.HBA-ATC1:NR2-Ar-ATC5:NR2-None-ATC1 NR0-Ar-ATC1:N | |
| 867 R1-Ar-ATC2:NR1-HBD-ATC1:NR2-Ar-ATC2:NR2-None-ATC1 NR0-Ar-ATC1:NR1-Ar-A | |
| 868 TC2:NR1-Hal-ATC1:NR2-Ar-ATC2 NR0-Ar-ATC1:NR1-Ar-ATC2:NR1-None-ATC1:... | |
| 869 | |
| 870 FingerprintsVector;AtomNeighborhoods:MMFF94AtomTypes:MinRadius0:MaxRad | |
| 871 ius2;41;AlphaNumericalValues;ValuesString;NR0-C5A-ATC1:NR1-C5B-ATC1:NR | |
| 872 1-CB-ATC1:NR1-N5-ATC1:NR2-C5A-ATC1:NR2-C5B-ATC1:NR2-CB-ATC3:NR2-CR-ATC | |
| 873 1 NR0-C5A-ATC1:NR1-C5B-ATC1:NR1-CR-ATC1:NR1-N5-ATC1:NR2-C5A-ATC1:NR2-C | |
| 874 5B-ATC1:NR2-C=ON-ATC1:NR2-CR-ATC3 NR0-C5B-ATC1:NR1-C5A-ATC1:NR1-C5B-AT | |
| 875 C1:NR1-C=ON-ATC1:NR2-C5A-ATC1:NR2-CB-ATC1:NR2-CR-ATC1:NR2-N5-ATC1:N... | |
| 876 | |
| 877 FingerprintsVector;AtomNeighborhoods:SLogPAtomTypes:MinRadius0:MaxRadi | |
| 878 us2;41;AlphaNumericalValues;ValuesString;NR0-C1-ATC1:NR1-C10-ATC1:NR1- | |
| 879 CS-ATC1:NR2-C1-ATC1:NR2-N11-ATC1:NR2-O2-ATC1 NR0-C1-ATC1:NR1-C11-ATC1: | |
| 880 NR2-C1-ATC1:NR2-C21-ATC1 NR0-C1-ATC1:NR1-C11-ATC1:NR2-C1-ATC1:NR2-C21- | |
| 881 ATC1 NR0-C1-ATC1:NR1-C5-ATC1:NR1-CS-ATC1:NR2-C1-ATC1:NR2-O2-ATC2:NR2-O | |
| 882 9-ATC1 NR0-C1-ATC1:NR1-CS-ATC2:NR2-C1-ATC2:NR2-O2-ATC2 NR0-C10-ATC1... | |
| 883 | |
| 884 FingerprintsVector;AtomNeighborhoods:SYBYLAtomTypes:MinRadius0:MaxRadi | |
| 885 us2;41;AlphaNumericalValues;ValuesString;NR0-C.2-ATC1:NR1-C.3-ATC1:NR1 | |
| 886 -O.co2-ATC2:NR2-C.3-ATC1 NR0-C.2-ATC1:NR1-C.ar-ATC1:NR1-N.am-ATC1:NR1- | |
| 887 O.2-ATC1:NR2-C.ar-ATC3 NR0-C.3-ATC1:NR1-C.2-ATC1:NR1-C.3-ATC1:NR2-C.3- | |
| 888 ATC1:NR2-O.3-ATC1:NR2-O.co2-ATC2 NR0-C.3-ATC1:NR1-C.3-ATC1:NR1-N.ar-AT | |
| 889 C1:NR2-C.3-ATC1:NR2-C.ar-ATC2 NR0-C.3-ATC1:NR1-C.3-ATC1:NR2-C.3-ATC... | |
| 890 | |
| 891 FingerprintsVector;AtomNeighborhoods:TPSAAtomTypes:MinRadius0:MaxRadiu | |
| 892 s2;41;AlphaNumericalValues;ValuesString;NR0-N21-ATC1:NR1-None-ATC3:NR2 | |
| 893 -None-ATC5 NR0-N7-ATC1:NR1-None-ATC2:NR2-None-ATC3:NR2-O3-ATC1 NR0-Non | |
| 894 e-ATC1:NR1-N21-ATC1:NR1-None-ATC1:NR2-None-ATC3 NR0-None-ATC1:NR1-N21- | |
| 895 ATC1:NR1-None-ATC2:NR2-None-ATC6 NR0-None-ATC1:NR1-N21-ATC1:NR1-None-A | |
| 896 TC2:NR2-None-ATC6 NR0-None-ATC1:NR1-N7-ATC1:NR1-None-ATC1:NR1-O3-AT... | |
| 897 | |
| 898 FingerprintsVector;AtomNeighborhoods:UFFAtomTypes:MinRadius0:MaxRadius | |
| 899 2;41;AlphaNumericalValues;ValuesString;NR0-C_2-ATC1:NR1-C_3-ATC1:NR1-O | |
| 900 _2-ATC1:NR1-O_3-ATC1:NR2-C_3-ATC1 NR0-C_2-ATC1:NR1-C_R-ATC1:NR1-N_3-AT | |
| 901 C1:NR1-O_2-ATC1:NR2-C_R-ATC3 NR0-C_3-ATC1:NR1-C_2-ATC1:NR1-C_3-ATC1:NR | |
| 902 2-C_3-ATC1:NR2-O_2-ATC1:NR2-O_3-ATC2 NR0-C_3-ATC1:NR1-C_3-ATC1:NR1-N_R | |
| 903 -ATC1:NR2-C_3-ATC1:NR2-C_R-ATC2 NR0-C_3-ATC1:NR1-C_3-ATC1:NR2-C_3-A... | |
| 904 | |
| 905 =head2 METHODS | |
| 906 | |
| 907 =over 4 | |
| 908 | |
| 909 =item B<new> | |
| 910 | |
| 911 $NewAtomNeighborhoodsFingerprints = new AtomNeighborhoodsFingerprints( | |
| 912 %NamesAndValues); | |
| 913 | |
| 914 Using specified I<AtomNeighborhoodsFingerprints> property names and values hash, B<new> | |
| 915 method creates a new object and returns a reference to newly created B<AtomNeighborhoodsFingerprints> | |
| 916 object. By default, the following properties are initialized: | |
| 917 | |
| 918 Molecule = '' | |
| 919 Type = 'AtomNeighborhoods' | |
| 920 MinNeighborhoodRadius = 0 | |
| 921 MaxNeighborhoodRadius = 2 | |
| 922 AtomIdentifierType = '' | |
| 923 AtomicInvariantsToUse = ['AS', 'X', 'BO', 'H', 'FC', 'MN'] | |
| 924 FunctionalClassesToUse = ['HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal'] | |
| 925 | |
| 926 Examples: | |
| 927 | |
| 928 $AtomNeighborhoodsFingerprints = new AtomNeighborhoodsFingerprints( | |
| 929 'Molecule' => $Molecule, | |
| 930 'AtomIdentifierType' => | |
| 931 "AtomicInvariantsAtomTypes"); | |
| 932 | |
| 933 $AtomNeighborhoodsFingerprints = new AtomNeighborhoodsFingerprints( | |
| 934 'Molecule' => $Molecule, | |
| 935 'MinNeighborhoodRadius' => 0, | |
| 936 'MaxNeighborhoodRadius' => 2, | |
| 937 'AtomIdentifierType' => | |
| 938 'AtomicInvariantsAtomTypes', | |
| 939 'AtomicInvariantsToUse' => | |
| 940 ['AS', 'X', 'BO', 'H', 'FC'] ); | |
| 941 | |
| 942 $AtomNeighborhoodsFingerprints = new AtomNeighborhoodsFingerprints( | |
| 943 'Molecule' => $Molecule, | |
| 944 'AtomIdentifierType' => | |
| 945 'SYBYLAtomTypes'); | |
| 946 | |
| 947 $AtomNeighborhoodsFingerprints = new AtomNeighborhoodsFingerprints( | |
| 948 'Molecule' => $Molecule, | |
| 949 'AtomIdentifierType' => | |
| 950 'MMFF94AtomTypes'); | |
| 951 | |
| 952 $AtomNeighborhoodsFingerprints = new AtomNeighborhoodsFingerprints( | |
| 953 'Molecule' => $Molecule, | |
| 954 'AtomIdentifierType' => | |
| 955 'AtomicInvariantsAtomTypes'); | |
| 956 | |
| 957 $AtomNeighborhoodsFingerprints = new AtomNeighborhoodsFingerprints( | |
| 958 'Molecule' => $Molecule, | |
| 959 'MinNeighborhoodRadius' => 0, | |
| 960 'MaxNeighborhoodRadius' => 2, | |
| 961 'AtomIdentifierType' => | |
| 962 'FunctionalClassAtomTypes', | |
| 963 'FunctionalClassesToUse' => | |
| 964 ['HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal'] ); | |
| 965 | |
| 966 $AtomNeighborhoodsFingerprints->GenerateFingerprints(); | |
| 967 print "$AtomNeighborhoodsFingerprints\n"; | |
| 968 | |
| 969 =item B<GenerateFingerprints> | |
| 970 | |
| 971 $AtomNeighborhoodsFingerprints->GenerateFingerprints(); | |
| 972 | |
| 973 Generates atom neighborhood fingerprints and returns I<AtomNeighborhoodsFingerprints>. | |
| 974 | |
| 975 =item B<GetDescription> | |
| 976 | |
| 977 $Description = $AtomNeighborhoodsFingerprints->GetDescription(); | |
| 978 | |
| 979 Returns a string containing description of atom neighborhood fingerprints. | |
| 980 | |
| 981 =item B<SetAtomIdentifierType> | |
| 982 | |
| 983 $AtomNeighborhoodsFingerprints->SetAtomIdentifierType($IdentifierType); | |
| 984 | |
| 985 Sets atom I<IdentifierType> to use during atom neighborhood fingerprints generation and | |
| 986 returns I<AtomNeighborhoodsFingerprints>. | |
| 987 | |
| 988 Possible values: I<AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, | |
| 989 FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, | |
| 990 TPSAAtomTypes, UFFAtomTypes>. | |
| 991 | |
| 992 =item B<SetAtomicInvariantsToUse> | |
| 993 | |
| 994 $AtomNeighborhoodsFingerprints->SetAtomicInvariantsToUse($ValuesRef); | |
| 995 $AtomNeighborhoodsFingerprints->SetAtomicInvariantsToUse(@Values); | |
| 996 | |
| 997 Sets atomic invariants to use during I<AtomicInvariantsAtomTypes> value of I<AtomIdentifierType> | |
| 998 for atom neighborhood fingerprints generation and returns I<AtomNeighborhoodsFingerprints>. | |
| 999 | |
| 1000 Possible values for atomic invariants are: I<AS, X, BO, LBO, SB, DB, TB, | |
| 1001 H, Ar, RA, FC, MN, SM>. Default value: I<AS,X,BO,H,FC>. | |
| 1002 | |
| 1003 The atomic invariants abbreviations correspond to: | |
| 1004 | |
| 1005 AS = Atom symbol corresponding to element symbol | |
| 1006 | |
| 1007 X<n> = Number of non-hydrogen atom neighbors or heavy atoms | |
| 1008 BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms | |
| 1009 LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms | |
| 1010 SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms | |
| 1011 DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms | |
| 1012 TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms | |
| 1013 H<n> = Number of implicit and explicit hydrogens for atom | |
| 1014 Ar = Aromatic annotation indicating whether atom is aromatic | |
| 1015 RA = Ring atom annotation indicating whether atom is a ring | |
| 1016 FC<+n/-n> = Formal charge assigned to atom | |
| 1017 MN<n> = Mass number indicating isotope other than most abundant isotope | |
| 1018 SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or | |
| 1019 3 (triplet) | |
| 1020 | |
| 1021 Atom type generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to: | |
| 1022 | |
| 1023 AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n> | |
| 1024 | |
| 1025 Except for AS which is a required atomic invariant in atom types, all other atomic invariants are | |
| 1026 optional. Atom type specification doesn't include atomic invariants with zero or undefined values. | |
| 1027 | |
| 1028 In addition to usage of abbreviations for specifying atomic invariants, the following descriptive words | |
| 1029 are also allowed: | |
| 1030 | |
| 1031 X : NumOfNonHydrogenAtomNeighbors or NumOfHeavyAtomNeighbors | |
| 1032 BO : SumOfBondOrdersToNonHydrogenAtoms or SumOfBondOrdersToHeavyAtoms | |
| 1033 LBO : LargestBondOrderToNonHydrogenAtoms or LargestBondOrderToHeavyAtoms | |
| 1034 SB : NumOfSingleBondsToNonHydrogenAtoms or NumOfSingleBondsToHeavyAtoms | |
| 1035 DB : NumOfDoubleBondsToNonHydrogenAtoms or NumOfDoubleBondsToHeavyAtoms | |
| 1036 TB : NumOfTripleBondsToNonHydrogenAtoms or NumOfTripleBondsToHeavyAtoms | |
| 1037 H : NumOfImplicitAndExplicitHydrogens | |
| 1038 Ar : Aromatic | |
| 1039 RA : RingAtom | |
| 1040 FC : FormalCharge | |
| 1041 MN : MassNumber | |
| 1042 SM : SpinMultiplicity | |
| 1043 | |
| 1044 I<AtomTypes::AtomicInvariantsAtomTypes> module is used to assign atomic invariant | |
| 1045 atom types. | |
| 1046 | |
| 1047 =item B<SetFunctionalClassesToUse> | |
| 1048 | |
| 1049 $AtomNeighborhoodsFingerprints->SetFunctionalClassesToUse($ValuesRef); | |
| 1050 $AtomNeighborhoodsFingerprints->SetFunctionalClassesToUse(@Values); | |
| 1051 | |
| 1052 Sets functional classes invariants to use during I<FunctionalClassAtomTypes> value of I<AtomIdentifierType> | |
| 1053 for atom neighborhoods fingerprints generation and returns I<AtomNeighborhoodsFingerprints>. | |
| 1054 | |
| 1055 Possible values for atom functional classes are: I<Ar, CA, H, HBA, HBD, Hal, NI, PI, RA>. | |
| 1056 Default value [ Ref 24 ]: I<HBD,HBA,PI,NI,Ar,Hal>. | |
| 1057 | |
| 1058 The functional class abbreviations correspond to: | |
| 1059 | |
| 1060 HBD: HydrogenBondDonor | |
| 1061 HBA: HydrogenBondAcceptor | |
| 1062 PI : PositivelyIonizable | |
| 1063 NI : NegativelyIonizable | |
| 1064 Ar : Aromatic | |
| 1065 Hal : Halogen | |
| 1066 H : Hydrophobic | |
| 1067 RA : RingAtom | |
| 1068 CA : ChainAtom | |
| 1069 | |
| 1070 Functional class atom type specification for an atom corresponds to: | |
| 1071 | |
| 1072 Ar.CA.H.HBA.HBD.Hal.NI.PI.RA or None | |
| 1073 | |
| 1074 I<AtomTypes::FunctionalClassAtomTypes> module is used to assign functional class atom | |
| 1075 types. It uses following definitions [ Ref 60-61, Ref 65-66 ]: | |
| 1076 | |
| 1077 HydrogenBondDonor: NH, NH2, OH | |
| 1078 HydrogenBondAcceptor: N[!H], O | |
| 1079 PositivelyIonizable: +, NH2 | |
| 1080 NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH | |
| 1081 | |
| 1082 =item B<SetMaxNeighborhoodRadius> | |
| 1083 | |
| 1084 $AtomNeighborhoodsFingerprints->SetMaxNeighborhoodRadius($Radius); | |
| 1085 | |
| 1086 Sets maximum neighborhood radius to use during atom neighborhood fingerprints generation and | |
| 1087 returns I<AtomNeighborhoodsFingerprints>. | |
| 1088 | |
| 1089 =item B<SetMinNeighborhoodRadius> | |
| 1090 | |
| 1091 $AtomNeighborhoodsFingerprints->SetMinNeighborhoodRadius($Radius); | |
| 1092 | |
| 1093 Sets minimum neighborhood radius to use during atom neighborhood fingerprints generation and | |
| 1094 returns I<AtomNeighborhoodsFingerprints>. | |
| 1095 | |
| 1096 =item B<StringifyAtomNeighborhoodsFingerprints> | |
| 1097 | |
| 1098 $String = $Fingerprints->StringifyAtomNeighborhoodsFingerprints(); | |
| 1099 | |
| 1100 Returns a string containing information about I<AtomNeighborhoodsFingerprints> object. | |
| 1101 | |
| 1102 =back | |
| 1103 | |
| 1104 =head1 AUTHOR | |
| 1105 | |
| 1106 Manish Sud <msud@san.rr.com> | |
| 1107 | |
| 1108 =head1 SEE ALSO | |
| 1109 | |
| 1110 Fingerprints.pm, FingerprintsStringUtil.pm, AtomTypesFingerprints.pm, EStateIndiciesFingerprints.pm, | |
| 1111 ExtendedConnectivityFingerprints.pm, MACCSKeys.pm, PathLengthFingerprints.pm, | |
| 1112 TopologicalAtomPairsFingerprints.pm, TopologicalAtomTripletsFingerprints.pm, | |
| 1113 TopologicalAtomTorsionsFingerprints.pm, TopologicalPharmacophoreAtomPairsFingerprints.pm, | |
| 1114 TopologicalPharmacophoreAtomTripletsFingerprints.pm | |
| 1115 | |
| 1116 =head1 COPYRIGHT | |
| 1117 | |
| 1118 Copyright (C) 2015 Manish Sud. All rights reserved. | |
| 1119 | |
| 1120 This file is part of MayaChemTools. | |
| 1121 | |
| 1122 MayaChemTools is free software; you can redistribute it and/or modify it under | |
| 1123 the terms of the GNU Lesser General Public License as published by the Free | |
| 1124 Software Foundation; either version 3 of the License, or (at your option) | |
| 1125 any later version. | |
| 1126 | |
| 1127 =cut |
