MayaChemTools

   1 package AtomTypes::DREIDINGAtomTypes;
   2 #
   3 # $RCSfile: DREIDINGAtomTypes.pm,v $
   4 # $Date: 2015/02/28 20:48:03 $
   5 # $Revision: 1.17 $
   6 #
   7 # Author: Manish Sud <msud@san.rr.com>
   8 #
   9 # Copyright (C) 2015 Manish Sud. All rights reserved.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 use strict;
  30 use Carp;
  31 use Exporter;
  32 use Scalar::Util ();
  33 use AtomTypes::AtomTypes;
  34 use Molecule;
  35 
  36 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  37 
  38 @ISA = qw(AtomTypes::AtomTypes Exporter);
  39 @EXPORT = qw(GetDREIDINGAtomTypesData GetAllPossibleDREIDINGAtomTypes GetAllPossibleDREIDINGNonHydrogenAtomTypes);
  40 @EXPORT_OK = qw();
  41 
  42 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  43 
  44 # Setup class variables...
  45 my($ClassName, %DREIDINGAtomTypesDataMap);
  46 _InitializeClass();
  47 
  48 # Overload Perl functions...
  49 use overload '""' => 'StringifyDREIDINGAtomTypes';
  50 
  51 # Class constructor...
  52 sub new {
  53   my($Class, %NamesAndValues) = @_;
  54 
  55   # Initialize object...
  56   my $This = $Class->SUPER::new();
  57   bless $This, ref($Class) || $Class;
  58   $This->_InitializeDREIDINGAtomTypes();
  59 
  60   $This->_InitializeDREIDINGAtomTypesProperties(%NamesAndValues);
  61 
  62   return $This;
  63 }
  64 
  65 # Initialize class ...
  66 sub _InitializeClass {
  67   #Class name...
  68   $ClassName = __PACKAGE__;
  69 
  70   # Initialize the data hash. It'll be loaded on demand later...
  71   %DREIDINGAtomTypesDataMap = ();
  72 }
  73 
  74 
  75 # Initialize object data...
  76 #
  77 sub _InitializeDREIDINGAtomTypes {
  78   my($This) = @_;
  79 
  80   # Type of AtomTypes...
  81   $This->{Type} = 'DREIDING';
  82 
  83   # By default, DREIDING atom types are also assigned to hydrogens...
  84   $This->{IgnoreHydrogens} = 0;
  85 
  86   return $This;
  87 }
  88 
  89 # Initialize object properties...
  90 #
  91 sub _InitializeDREIDINGAtomTypesProperties {
  92   my($This, %NamesAndValues) = @_;
  93 
  94   my($Name, $Value, $MethodName);
  95   while (($Name, $Value) = each  %NamesAndValues) {
  96     $MethodName = "Set${Name}";
  97     $This->$MethodName($Value);
  98   }
  99 
 100   # Make sure molecule object was specified...
 101   if (!exists $NamesAndValues{Molecule}) {
 102     croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule...";
 103   }
 104 
 105   return $This;
 106 }
 107 
 108 # Get DREIDING atom types and associated data loaded from DREIDING data file as
 109 # a reference to hash with the following hash data format:
 110 #
 111 # @{$DREIDINGAtomTypesDataMap{AtomTypes}} - Array of all possible atom types for all atoms
 112 # @{$DREIDINGAtomTypesDataMap{NonHydrogenAtomTypes}} - Array of all possible atom types for non-hydrogen atoms
 113 # @{$DREIDINGAtomTypesDataMap->{ColLabels}} - Array of column labels
 114 # %{$DREIDINGAtomTypesDataMap->{DataCol<Num>}} - Hash keys pair: <DataCol<Num>, AtomType>
 115 #
 116 # This functionality can be either invoked as a class function or an
 117 # object method.
 118 #
 119 sub GetDREIDINGAtomTypesData {
 120 
 121   # Make sure data is loaded...
 122   _CheckAndLoadDREIDINGAtomTypesData();
 123 
 124   return \%DREIDINGAtomTypesDataMap;
 125 }
 126 
 127 # Get all possible DREIDING atom types corresponding to hydrogen and non-hydrogen
 128 # atoms as an array reference...
 129 #
 130 # This functionality can be either invoked as a class function or an
 131 # object method.
 132 #
 133 sub GetAllPossibleDREIDINGAtomTypes {
 134   return _GetAllPossibleDREIDINGAtomTypes();
 135 }
 136 
 137 # Get all possible DREIDING atom types corresponding to non-hydrogen atoms
 138 # as an array reference...
 139 #
 140 # This functionality can be either invoked as a class function or an
 141 # object method.
 142 #
 143 sub GetAllPossibleDREIDINGNonHydrogenAtomTypes {
 144   my($NonHydrogensOnly);
 145 
 146   $NonHydrogensOnly = 1;
 147   return _GetAllPossibleDREIDINGAtomTypes($NonHydrogensOnly);
 148 }
 149 
 150 # Get all possible DREIDING atom types as an array reference...
 151 #
 152 sub _GetAllPossibleDREIDINGAtomTypes {
 153   my($NonHydrogensOnly) = @_;
 154   my($DREIDINGAtomTypesDataRef);
 155 
 156   $NonHydrogensOnly = defined $NonHydrogensOnly ? $NonHydrogensOnly : 0;
 157 
 158   $DREIDINGAtomTypesDataRef = GetDREIDINGAtomTypesData();
 159 
 160   return $NonHydrogensOnly ? \@{$DREIDINGAtomTypesDataRef->{NonHydrogenAtomTypes}}: \@{$DREIDINGAtomTypesDataRef->{AtomTypes}};
 161 }
 162 
 163 # Assign DREIDING [ Ref 88 ] atom types to all atoms...
 164 #
 165 # Notes:
 166 #     o 37 DREIDING atom types are listed
 167 #     o AtomTypes::DREIDINGAtomTypes.pm module is used to assign DREIDING atom types
 168 #     o Units:
 169 #         o ValenceBondRadius and NonBondRadius: Angstroms
 170 #         o ValenceAngle: Degrees
 171 #     o Five-character mnemonic label for DREIDING atom types
 172 #         o First two characters correspond to chemical symbol with an underscore as second
 173 #           character for elements with one character symbol
 174 #         o Third character describes hybridization: 1 - linear (sp); 2 - trigonal (sp2);
 175 #           3 = tetrahedral (sp3); R - sp2 involved in resonance situation
 176 #         o Fourth character used to indicate number of implicit hydrogens
 177 #         o Fourth and fifth chracters are used as indicators of alternate parameters: formal oxidation
 178 #           state, bridging hydrogens and so on. The _HB type denotes a hydrogen atom capable
 179 #           of forming hdyrogen bonds attached to (N, O, F). The H_b is the bridging hydrogen
 180 #           of diborane.
 181 #
 182 #
 183 sub AssignAtomTypes {
 184   my($This) = @_;
 185   my($Atom, $AtomType);
 186 
 187   ATOM: for $Atom ($This->GetMolecule()->GetAtoms()) {
 188     if ($This->{IgnoreHydrogens} && $Atom->IsHydrogen()) {
 189       next ATOM;
 190     }
 191     $AtomType = $This->_GetAtomType($Atom);
 192     $This->SetAtomType($Atom, $AtomType);
 193   }
 194   return $This;
 195 }
 196 
 197 # Get DREIDING atom type for atom...
 198 #
 199 sub _GetAtomType {
 200   my($This, $Atom) = @_;
 201   my($AtomType);
 202 
 203   $AtomType = '';
 204 
 205   ATOM: {
 206     if ($Atom->IsCarbon()) {
 207       $AtomType = $This->_GetAtomTypeForCarbon($Atom);
 208       last ATOM;
 209     }
 210     if ($Atom->IsNitrogen()) {
 211       $AtomType = $This->_GetAtomTypeForNitrogen($Atom);
 212       last ATOM;
 213     }
 214     if ($Atom->IsOxygen()) {
 215       $AtomType = $This->_GetAtomTypeForOxygen($Atom);
 216       last ATOM;
 217     }
 218     if ($Atom->IsPhosphorus()) {
 219       $AtomType = $This->_GetAtomTypeForPhosphorus($Atom);
 220       last ATOM;
 221     }
 222     if ($Atom->IsSulfur()) {
 223       $AtomType = $This->_GetAtomTypeForSulfur($Atom);
 224       last ATOM;
 225     }
 226     if ($Atom->IsHydrogen()) {
 227       $AtomType = $This->_GetAtomTypeForHydrogen($Atom);
 228       last ATOM;
 229     }
 230     $AtomType = $This->_GetAtomTypeForOtherAtoms($Atom);
 231   }
 232 
 233   return $AtomType;
 234 }
 235 
 236 # Get DREIDING atom type for Carbon atom...
 237 #
 238 sub _GetAtomTypeForCarbon {
 239   my($This, $Atom) = @_;
 240   my($AtomType, $NumOfSigmaBonds, $NumOfPiBonds);
 241 
 242   $AtomType = 'None';
 243 
 244   ($NumOfSigmaBonds, $NumOfPiBonds) = ('0') x 2;
 245 
 246   ($NumOfSigmaBonds, $NumOfPiBonds) = $Atom->GetNumOfSigmaAndPiBondsToNonHydrogenAtoms();
 247   $NumOfSigmaBonds += $Atom->GetAtomicInvariantValue('H');
 248 
 249   ATOMTYPE: {
 250     if ($Atom->IsAromatic()) {
 251       $AtomType = 'C_R';
 252       last ATOMTYPE;
 253     }
 254 
 255     # Only single bonds...
 256     if ($NumOfPiBonds == 0) {
 257       $AtomType = 'C_3';
 258       last ATOMTYPE;
 259     }
 260 
 261     # One double bond...
 262     if ($NumOfPiBonds == 1) {
 263       $AtomType = 'C_2';
 264       last ATOMTYPE;
 265     }
 266 
 267     # One triple bond or two double bonds...
 268     if ($NumOfPiBonds == 2) {
 269       $AtomType = 'C_1';
 270       last ATOMTYPE;
 271     }
 272 
 273     $AtomType = 'None';
 274     carp "Warning: ${ClassName}->_GetAtomTypeForCarbon: DREIDING atom types for Carbon cann't be assigned...";
 275   }
 276 
 277   return $AtomType;
 278 }
 279 
 280 # Get DREIDING atom type for Nitrogen atom...
 281 #
 282 sub _GetAtomTypeForNitrogen {
 283   my($This, $Atom) = @_;
 284   my($AtomType, $NumOfSigmaBonds, $NumOfPiBonds);
 285 
 286   $AtomType = 'None';
 287 
 288   ($NumOfSigmaBonds, $NumOfPiBonds) = ('0') x 2;
 289 
 290   ($NumOfSigmaBonds, $NumOfPiBonds) = $Atom->GetNumOfSigmaAndPiBondsToNonHydrogenAtoms();
 291   $NumOfSigmaBonds += $Atom->GetAtomicInvariantValue('H');
 292 
 293   ATOMTYPE: {
 294     if ($Atom->IsAromatic()) {
 295       $AtomType = 'N_R';
 296       last ATOMTYPE;
 297     }
 298 
 299     # Only single bonds...
 300     if ($NumOfPiBonds == 0) {
 301       $AtomType = 'N_3';
 302       last ATOMTYPE;
 303     }
 304 
 305     # One double bond...
 306     if ($NumOfPiBonds == 1) {
 307       $AtomType = 'N_2';
 308       last ATOMTYPE;
 309     }
 310 
 311     # One triple bond or two double bonds...
 312     if ($NumOfPiBonds == 2) {
 313       $AtomType = 'N_1';
 314       last ATOMTYPE;
 315     }
 316 
 317     $AtomType = 'None';
 318     carp "Warning: ${ClassName}->_GetAtomTypeForNitrogen: DREIDING atom types for Nitrogen cann't be assigned...";
 319   }
 320 
 321   return $AtomType;
 322 }
 323 
 324 # Get DREIDING atom type for Oxygen atom...
 325 #
 326 sub _GetAtomTypeForOxygen {
 327   my($This, $Atom) = @_;
 328   my($AtomType, $NumOfSigmaBonds, $NumOfPiBonds);
 329 
 330   $AtomType = 'None';
 331 
 332   ($NumOfSigmaBonds, $NumOfPiBonds) = ('0') x 2;
 333 
 334   ($NumOfSigmaBonds, $NumOfPiBonds) = $Atom->GetNumOfSigmaAndPiBondsToNonHydrogenAtoms();
 335   $NumOfSigmaBonds += $Atom->GetAtomicInvariantValue('H');
 336 
 337   ATOMTYPE: {
 338     if ($Atom->IsAromatic()) {
 339       $AtomType = 'O_R';
 340       last ATOMTYPE;
 341     }
 342 
 343     # Only single bonds...
 344     if ($NumOfPiBonds == 0) {
 345       $AtomType = 'O_3';
 346       last ATOMTYPE;
 347     }
 348 
 349     # One double bond...
 350     if ($NumOfPiBonds == 1) {
 351       $AtomType = 'O_2';
 352       last ATOMTYPE;
 353     }
 354 
 355     # One triple bond or two double bonds...
 356     if ($NumOfPiBonds == 2) {
 357       $AtomType = 'O_1';
 358       last ATOMTYPE;
 359     }
 360 
 361     $AtomType = 'None';
 362     carp "Warning: ${ClassName}->_GetAtomTypeForOxygen: DREIDING atom types for Oxygen cann't be assigned...";
 363   }
 364 
 365   return $AtomType;
 366 }
 367 
 368 # Get DREIDING atom type for Phosphorus atom...
 369 #
 370 sub _GetAtomTypeForPhosphorus {
 371   my($This, $Atom) = @_;
 372   my($AtomType);
 373 
 374   $AtomType = 'P_3';
 375 
 376   return $AtomType;
 377 }
 378 
 379 # Get DREIDING atom type for Sulfur atom...
 380 #
 381 sub _GetAtomTypeForSulfur {
 382   my($This, $Atom) = @_;
 383   my($AtomType);
 384 
 385   $AtomType = 'S_3';
 386 
 387   return $AtomType;
 388 }
 389 
 390 # Get DREIDING atom type for Hydrogen atom...
 391 #
 392 sub _GetAtomTypeForHydrogen {
 393   my($This, $Atom) = @_;
 394   my($AtomType, $NumOfNeighbors, $NeighborAtom, @NonHydrogenAtomNeighbors);
 395 
 396   @NonHydrogenAtomNeighbors = $Atom->GetNonHydrogenAtomNeighbors();
 397 
 398   $NumOfNeighbors = scalar @NonHydrogenAtomNeighbors;
 399   $NeighborAtom = $NonHydrogenAtomNeighbors[0];
 400 
 401   ATOMTYPE: {
 402     if ($NumOfNeighbors > 1) {
 403       # Bridging hydrogen as in B2H6
 404       $AtomType = 'H___b';
 405       last ATOMTYPE;
 406     }
 407 
 408     if ($NeighborAtom->GetAtomicNumber() =~ /^(7|8|9)$/) {
 409       # Involved in hydrogen bonding due to its attachment to N, O, or F
 410       $AtomType = 'H__HB';
 411       last ATOMTYPE;
 412     }
 413     $AtomType = 'H_';
 414   }
 415 
 416   return $AtomType;
 417 }
 418 
 419 # Get DREIDING atom type for atoms other than Carbon, Nitrogen, Oxygen, Phosporus,
 420 # Sulfur and Hydrogen...
 421 #
 422 sub _GetAtomTypeForOtherAtoms {
 423   my($This, $Atom) = @_;
 424   my($AtomType, $AtomicNumber, $AtomSymbol);
 425 
 426   $AtomType = 'None';
 427 
 428   $AtomicNumber = $Atom->GetAtomicNumber();
 429   $AtomSymbol = $Atom->GetAtomSymbol();
 430 
 431   ATOMICNUMBER: {
 432     if ($AtomicNumber =~ /^(9|17|35|53)$/i) {
 433       # F, Cl, Br, I
 434       $AtomType = length($AtomSymbol) == 1 ? "${AtomSymbol}_" : $AtomSymbol;
 435       last ATOMICNUMBER;
 436     }
 437 
 438     if ($AtomicNumber =~ /^5$/i) {
 439       # B: B_2 and B_3
 440       $AtomType = (($Atom->GetNumOfNonHydrogenAtomNeighbors() + $Atom->GetAtomicInvariantValue('H')) == 4) ? "B_3" : "B_2";
 441       last ATOMICNUMBER;
 442     }
 443 
 444     if ($AtomicNumber =~ /^(13|14|31|32|33|34|49|50|51|52)$/i) {
 445       # Al, Si, Ga, Ge, As, Se, In, Sn, Sb, Te
 446       $AtomType = "${AtomSymbol}3";
 447       last ATOMICNUMBER;
 448     }
 449 
 450     if ($AtomicNumber =~ /^(11|20|26|30)$/i) {
 451       # Na, Ca, Fe, Zn
 452       $AtomType = $AtomSymbol;
 453       last ATOMICNUMBER;
 454     }
 455 
 456     $AtomType = 'None';
 457     carp "Warning: ${ClassName}->_GetAtomTypeForOtherAtoms: DREIDING atom types for atom, $AtomSymbol, with atomic number, $AtomicNumber, cann't be assigned...";
 458   }
 459 
 460   return $AtomType;
 461 }
 462 
 463 # Return a string containg data for DREIDINGAtomTypes object...
 464 #
 465 sub StringifyDREIDINGAtomTypes {
 466   my($This) = @_;
 467   my($AtomTypesString);
 468 
 469   # Type of AtomTypes...
 470   $AtomTypesString = "AtomTypes: $This->{Type}; IgnoreHydrogens: " . ($This->{IgnoreHydrogens} ? "Yes" : "No");
 471 
 472   # Setup atom types information...
 473   my($AtomID, $AtomType, @AtomTypesInfo, %AssignedAtomTypes);
 474 
 475   @AtomTypesInfo = ();
 476   %AssignedAtomTypes = $This->GetAtomTypes();
 477 
 478   for $AtomID (sort { $a <=> $b } keys %AssignedAtomTypes) {
 479     $AtomType = $AssignedAtomTypes{$AtomID} ? $AssignedAtomTypes{$AtomID} : 'None';
 480     push @AtomTypesInfo, "$AtomID:$AtomType";
 481   }
 482   $AtomTypesString .= "; AtomIDs:AtomTypes: <" . TextUtil::JoinWords(\@AtomTypesInfo, ", ", 0) . ">";
 483 
 484   return $AtomTypesString;
 485 }
 486 
 487 # Is it a DREIDINGAtomTypes object?
 488 sub _IsDREIDINGAtomTypes {
 489   my($Object) = @_;
 490 
 491   return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0;
 492 }
 493 
 494 # Check and load DREIDING atom types data...
 495 #
 496 sub _CheckAndLoadDREIDINGAtomTypesData {
 497 
 498   # Is it already loaded?
 499   if (exists $DREIDINGAtomTypesDataMap{AtomTypes}) {
 500     return;
 501   }
 502 
 503   _LoadDREIDINGAtomTypesData();
 504 }
 505 
 506 # Load DREIDING atom types data from the file assuming first column to be atom type symbol..
 507 #
 508 # Format:
 509 #
 510 # "AtomType","ValenceBondRadius","ValenceAngle"
 511 # "H_","0.330","180.0"
 512 # "C_3","0.770","109.471"
 513 # "C_R","0.700","120.0"
 514 # "C_2","0.670","120.0"
 515 # "C_1","0.602","180.0"
 516 # "N_3","0.702","106.7"
 517 #
 518 sub _LoadDREIDINGAtomTypesData {
 519   my($AtomTypesDataFile, $MayaChemToolsLibDir);
 520 
 521   $MayaChemToolsLibDir = FileUtil::GetMayaChemToolsLibDirName();
 522 
 523   $AtomTypesDataFile =  "$MayaChemToolsLibDir" . "/data/DREIDINGAtomTypes.csv";
 524   if (! -e "$AtomTypesDataFile") {
 525     croak "Error: MayaChemTools package file, $AtomTypesDataFile, is missing: Possible installation problems...";
 526   }
 527 
 528   %DREIDINGAtomTypesDataMap = ();
 529   AtomTypes::AtomTypes::LoadAtomTypesData($AtomTypesDataFile, \%DREIDINGAtomTypesDataMap);
 530 }
 531