MayaChemTools

   1 package AtomTypes::AtomTypes;
   2 #
   3 # $RCSfile: AtomTypes.pm,v $
   4 # $Date: 2015/02/28 20:48:03 $
   5 # $Revision: 1.21 $
   6 #
   7 # Author: Manish Sud <msud@san.rr.com>
   8 #
   9 # Copyright (C) 2015 Manish Sud. All rights reserved.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 use strict;
  30 use Carp;
  31 use Exporter;
  32 use Scalar::Util ();
  33 use Text::ParseWords;
  34 use ObjectProperty;
  35 use TextUtil ();
  36 
  37 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  38 
  39 @ISA = qw(ObjectProperty Exporter);
  40 @EXPORT = qw(LoadAtomTypesData);
  41 @EXPORT_OK = qw();
  42 
  43 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  44 
  45 # Setup class variables...
  46 my($ClassName);
  47 _InitializeClass();
  48 
  49 # Class constructor...
  50 sub new {
  51   my($Class, %NamesAndValues) = @_;
  52 
  53   # Initialize object...
  54   my $This = {};
  55   bless $This, ref($Class) || $Class;
  56   $This->_InitializeAtomTypes();
  57 
  58   $This->_InitializeAtomTypesProperties(%NamesAndValues);
  59 
  60   return $This;
  61 }
  62 
  63 # Initialize object data...
  64 #
  65 sub _InitializeAtomTypes {
  66   my($This) = @_;
  67 
  68   # Molecule object...
  69   $This->{Molecule} = '';
  70 
  71   # Type of AtomType...
  72   $This->{Type} = '';
  73 
  74   # By default, atom types are also assigned to hydrogens...
  75   $This->{IgnoreHydrogens} = 0;
  76 
  77 }
  78 
  79 # Initialize class ...
  80 sub _InitializeClass {
  81   #Class name...
  82   $ClassName = __PACKAGE__;
  83 }
  84 
  85 
  86 # Initialize object properties....
  87 sub _InitializeAtomTypesProperties {
  88   my($This, %NamesAndValues) = @_;
  89 
  90   my($Name, $Value, $MethodName);
  91   while (($Name, $Value) = each  %NamesAndValues) {
  92     $MethodName = "Set${Name}";
  93     $This->$MethodName($Value);
  94   }
  95 
  96   return $This;
  97 }
  98 
  99 # Set molecule object and make sure it's not already set...
 100 #
 101 sub SetMolecule {
 102   my($This, $Molecule) = @_;
 103 
 104   if ($This->{Molecule}) {
 105     croak "Error: ${ClassName}->SetMolecule: Can't change molecule object:  It's already set...";
 106   }
 107   $This->{Molecule} = $Molecule;
 108 
 109   # Weaken the reference to disable increment of reference count...
 110   Scalar::Util::weaken($This->{Molecule});
 111 
 112   return $This;
 113 }
 114 
 115 # Set type and make sure it's not already set...
 116 #
 117 sub SetType {
 118   my($This, $Type) = @_;
 119 
 120   if ($This->{Type}) {
 121     croak "Error: ${ClassName}->SetType: Can't change AtomType type:  It's already set...";
 122   }
 123   $This->{Type} = $Type;
 124 
 125   return $This;
 126 }
 127 
 128 # Set specific atom type...
 129 #
 130 sub SetAtomType {
 131   my($This, $Atom, $AtomType) = @_;
 132   my($MethodName);
 133 
 134   # Assign AtomType to Atom...
 135   $MethodName = "Set" . $This->{Type} . "AtomType";
 136   $Atom->$MethodName($AtomType);
 137 
 138   return $This;
 139 }
 140 
 141 # Get specific atom type...
 142 #
 143 sub GetAtomType {
 144   my($This, $Atom) = @_;
 145   my($MethodName, $AtomType);
 146 
 147   $MethodName = "Get" . $This->{Type} . "AtomType";
 148   $AtomType = $Atom->$MethodName();
 149 
 150   return defined $AtomType ? $AtomType : 'None';
 151 }
 152 
 153 # Get atom types for all atoms as a hash with atom ID and atom types as
 154 # key/value pairs.
 155 #
 156 # Notes:
 157 #   . Irrespective of ignore hydrogens value, atom type for hydrogens are also
 158 #     returned. Based on value of ignore hydrogens, atom type assignment methodology
 159 #     might igonore hydrogens and value of None is returned for the hydrogens.
 160 #
 161 sub GetAtomTypes {
 162   my($This) = @_;
 163   my($Atom, $AtomID,  %AtomTypesMap);
 164 
 165   %AtomTypesMap = ();
 166 
 167   if (!$This->{Molecule}) {
 168     return %AtomTypesMap;
 169   }
 170 
 171   for $Atom ($This->{Molecule}->GetAtoms()) {
 172     $AtomID = $Atom->GetID();
 173     $AtomTypesMap{$AtomID} = $This->GetAtomType($Atom);
 174   }
 175 
 176   return %AtomTypesMap;
 177 }
 178 
 179 # Are all atoms types successfully assigned?
 180 #
 181 # Notes:
 182 #   . Dynamic checking of atom types assignment for atoms eliminates the need
 183 #     to check and synchronize valid atom types during SetAtomType.
 184 #
 185 sub IsAtomTypesAssignmentSuccessful {
 186   my($This) = @_;
 187   my($Atom, $AtomType);
 188 
 189   ATOM: for $Atom ($This->{Molecule}->GetAtoms()) {
 190     if ($Atom->IsHydrogen() && $This->{IgnoreHydrogens}) {
 191       next ATOM;
 192     }
 193     $AtomType = $This->GetAtomType($Atom);
 194     if ($AtomType =~ /^None$/i) {
 195       return 0;
 196     }
 197   }
 198 
 199   return 1;
 200 }
 201 
 202 # Load atom types data from the specified CSV atom type file into the specified
 203 # hash reference.
 204 #
 205 # The lines starting with # are treated as comments and ignored. First line
 206 # not starting with # must contain column labels and the number of columns in
 207 # all other data rows must match the number of column labels.
 208 #
 209 # The first column is assumed to contain atom types; all other columns contain data
 210 # as indicated in their column labels.
 211 #
 212 # In order to avoid dependence of data access on the specified column labels, the
 213 # column data is loaded into hash with Column<Num> and AtomType as hash keys;
 214 # however, the data for the first column which is treated as AtomTypes is also loaded
 215 # into an array with AtomTypes as hash key. The format of the data structure loaded
 216 # into a specified hash reference is:
 217 #
 218 # @{$AtomTypesDataMapRef->{AtomTypes}} - Array of all possible atom types for all atoms
 219 # @{$AtomTypesDataMapRef->{NonHydrogenAtomTypes}} - Array of all possible atom types for non-hydrogen atoms
 220 # @{$AtomTypesDataMapRef->{ColLabels}} - Array of column labels
 221 # %{$AtomTypesDataMapRef->{DataCol<Num>}} - Hash keys pair: <DataCol<Num>, AtomType>
 222 #
 223 # Caveats:
 224 #   . The column number start from 1.
 225 #   . Column data for first column is not loaded into <Column<Num>, AtomType> hash keys pairs.
 226 #
 227 # AtomType file examples: SYBYLAtomTypes.csv, DREIDINGAtomTypes.csv,
 228 # MMFF94AtomTypes.csv etc.
 229 #
 230 # This functionality can be either invoked as a class function or an
 231 # object method.
 232 #
 233 sub LoadAtomTypesData {
 234   my($FirstParameter, @OtherParamaters) = @_;
 235   my($AtomTypesDataFile, $AtomTypesDataMapRef, $InDelim, $Line, $NumOfCols, $ColIndex, $ColNum, $ColLabel, $ColValue, $AtomType, %AtomTypes, @LineWords, @ColLabels, @ColDataLabels);
 236 
 237   if (Scalar::Util::blessed($FirstParameter)) {
 238     ($AtomTypesDataFile, $AtomTypesDataMapRef) = @OtherParamaters;
 239   }
 240   else {
 241     ($AtomTypesDataFile, $AtomTypesDataMapRef) = @_;
 242   }
 243 
 244   $InDelim = "\,";
 245   open ATOMTYPESDATAFILE, "$AtomTypesDataFile" or croak "Couldn't open $AtomTypesDataFile: $! ...";
 246 
 247   # Skip lines up to column labels...
 248   LINE: while ($Line = TextUtil::GetTextLine(\*ATOMTYPESDATAFILE)) {
 249     if ($Line !~ /^#/) {
 250       last LINE;
 251     }
 252   }
 253 
 254   # Initialize data map...
 255   %{$AtomTypesDataMapRef} = ();
 256   @{$AtomTypesDataMapRef->{AtomTypes}} = ();
 257   @{$AtomTypesDataMapRef->{NonHydrogenAtomTypes}} = ();
 258   @{$AtomTypesDataMapRef->{ColLabels}} = ();
 259 
 260   %AtomTypes = ();
 261 
 262   # Process column labels...
 263   @ColLabels= quotewords($InDelim, 0, $Line);
 264   $NumOfCols = @ColLabels;
 265   push @{$AtomTypesDataMapRef->{ColLabels}}, @ColLabels;
 266 
 267   # Set up column data labels for storing the data...
 268   @ColDataLabels = ();
 269   for $ColNum (1 .. $NumOfCols) {
 270     $ColLabel = "DataCol${ColNum}";
 271     push @ColDataLabels, $ColLabel;
 272   }
 273 
 274   # Initialize column data hash...
 275   for $ColIndex (1 .. $#ColDataLabels) {
 276     $ColLabel = $ColDataLabels[$ColIndex];
 277     %{$AtomTypesDataMapRef->{$ColLabel}} = ();
 278   }
 279 
 280   # Process atom types data assuming first column to be atom type..
 281   LINE: while ($Line = TextUtil::GetTextLine(\*ATOMTYPESDATAFILE)) {
 282     if ($Line =~ /^#/) {
 283       next LINE;
 284     }
 285     @LineWords = quotewords($InDelim, 0, $Line);
 286     if (@LineWords != $NumOfCols) {
 287       croak "Error: The number of data fields, @LineWords, in $AtomTypesDataFile must be $NumOfCols.\nLine: $Line...";
 288     }
 289     $AtomType = $LineWords[0];
 290     if (exists $AtomTypes{$AtomType}) {
 291       carp "Warning: Ignoring data for atom type, $AtomType, in file $AtomTypesDataFile: It has already been loaded.\nLine: $Line....";
 292       next LINE;
 293     }
 294 
 295     $AtomTypes{$AtomType} = $AtomType;
 296     push @{$AtomTypesDataMapRef->{AtomTypes}}, $AtomType;
 297 
 298     # Is it a non-hydrogen atom type?
 299     if ($AtomType !~ /^H/i || $AtomType =~ /^(HAL|HET|HEV)$/i || $AtomType =~ /^(He4|Ho6|Hf3|Hg1)/) {
 300       # Non-hydrogen SYBYL atom types starting with H: HAL, HET, HEV
 301       # Non-hydrogen UFF atom types starting with H: He4+4, Ho6+3, Hf3+4, Hg1+2
 302       #
 303       push @{$AtomTypesDataMapRef->{NonHydrogenAtomTypes}}, $AtomType;
 304     }
 305 
 306     # Track column data values...
 307     for $ColIndex (1 .. $#LineWords) {
 308       $ColLabel = $ColDataLabels[$ColIndex]; $ColValue = $LineWords[$ColIndex];
 309       $AtomTypesDataMapRef->{$ColLabel}{$AtomType} = $ColValue;
 310     }
 311   }
 312   close ATOMTYPESDATAFILE;
 313 }
 314