1 package AtomTypes::AtomTypes; 2 # 3 # $RCSfile: AtomTypes.pm,v $ 4 # $Date: 2015/02/28 20:48:03 $ 5 # $Revision: 1.21 $ 6 # 7 # Author: Manish Sud <msud@san.rr.com> 8 # 9 # Copyright (C) 2015 Manish Sud. All rights reserved. 10 # 11 # This file is part of MayaChemTools. 12 # 13 # MayaChemTools is free software; you can redistribute it and/or modify it under 14 # the terms of the GNU Lesser General Public License as published by the Free 15 # Software Foundation; either version 3 of the License, or (at your option) any 16 # later version. 17 # 18 # MayaChemTools is distributed in the hope that it will be useful, but without 19 # any warranty; without even the implied warranty of merchantability of fitness 20 # for a particular purpose. See the GNU Lesser General Public License for more 21 # details. 22 # 23 # You should have received a copy of the GNU Lesser General Public License 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 26 # Boston, MA, 02111-1307, USA. 27 # 28 29 use strict; 30 use Carp; 31 use Exporter; 32 use Scalar::Util (); 33 use Text::ParseWords; 34 use ObjectProperty; 35 use TextUtil (); 36 37 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 38 39 @ISA = qw(ObjectProperty Exporter); 40 @EXPORT = qw(LoadAtomTypesData); 41 @EXPORT_OK = qw(); 42 43 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); 44 45 # Setup class variables... 46 my($ClassName); 47 _InitializeClass(); 48 49 # Class constructor... 50 sub new { 51 my($Class, %NamesAndValues) = @_; 52 53 # Initialize object... 54 my $This = {}; 55 bless $This, ref($Class) || $Class; 56 $This->_InitializeAtomTypes(); 57 58 $This->_InitializeAtomTypesProperties(%NamesAndValues); 59 60 return $This; 61 } 62 63 # Initialize object data... 64 # 65 sub _InitializeAtomTypes { 66 my($This) = @_; 67 68 # Molecule object... 69 $This->{Molecule} = ''; 70 71 # Type of AtomType... 72 $This->{Type} = ''; 73 74 # By default, atom types are also assigned to hydrogens... 75 $This->{IgnoreHydrogens} = 0; 76 77 } 78 79 # Initialize class ... 80 sub _InitializeClass { 81 #Class name... 82 $ClassName = __PACKAGE__; 83 } 84 85 86 # Initialize object properties.... 87 sub _InitializeAtomTypesProperties { 88 my($This, %NamesAndValues) = @_; 89 90 my($Name, $Value, $MethodName); 91 while (($Name, $Value) = each %NamesAndValues) { 92 $MethodName = "Set${Name}"; 93 $This->$MethodName($Value); 94 } 95 96 return $This; 97 } 98 99 # Set molecule object and make sure it's not already set... 100 # 101 sub SetMolecule { 102 my($This, $Molecule) = @_; 103 104 if ($This->{Molecule}) { 105 croak "Error: ${ClassName}->SetMolecule: Can't change molecule object: It's already set..."; 106 } 107 $This->{Molecule} = $Molecule; 108 109 # Weaken the reference to disable increment of reference count... 110 Scalar::Util::weaken($This->{Molecule}); 111 112 return $This; 113 } 114 115 # Set type and make sure it's not already set... 116 # 117 sub SetType { 118 my($This, $Type) = @_; 119 120 if ($This->{Type}) { 121 croak "Error: ${ClassName}->SetType: Can't change AtomType type: It's already set..."; 122 } 123 $This->{Type} = $Type; 124 125 return $This; 126 } 127 128 # Set specific atom type... 129 # 130 sub SetAtomType { 131 my($This, $Atom, $AtomType) = @_; 132 my($MethodName); 133 134 # Assign AtomType to Atom... 135 $MethodName = "Set" . $This->{Type} . "AtomType"; 136 $Atom->$MethodName($AtomType); 137 138 return $This; 139 } 140 141 # Get specific atom type... 142 # 143 sub GetAtomType { 144 my($This, $Atom) = @_; 145 my($MethodName, $AtomType); 146 147 $MethodName = "Get" . $This->{Type} . "AtomType"; 148 $AtomType = $Atom->$MethodName(); 149 150 return defined $AtomType ? $AtomType : 'None'; 151 } 152 153 # Get atom types for all atoms as a hash with atom ID and atom types as 154 # key/value pairs. 155 # 156 # Notes: 157 # . Irrespective of ignore hydrogens value, atom type for hydrogens are also 158 # returned. Based on value of ignore hydrogens, atom type assignment methodology 159 # might igonore hydrogens and value of None is returned for the hydrogens. 160 # 161 sub GetAtomTypes { 162 my($This) = @_; 163 my($Atom, $AtomID, %AtomTypesMap); 164 165 %AtomTypesMap = (); 166 167 if (!$This->{Molecule}) { 168 return %AtomTypesMap; 169 } 170 171 for $Atom ($This->{Molecule}->GetAtoms()) { 172 $AtomID = $Atom->GetID(); 173 $AtomTypesMap{$AtomID} = $This->GetAtomType($Atom); 174 } 175 176 return %AtomTypesMap; 177 } 178 179 # Are all atoms types successfully assigned? 180 # 181 # Notes: 182 # . Dynamic checking of atom types assignment for atoms eliminates the need 183 # to check and synchronize valid atom types during SetAtomType. 184 # 185 sub IsAtomTypesAssignmentSuccessful { 186 my($This) = @_; 187 my($Atom, $AtomType); 188 189 ATOM: for $Atom ($This->{Molecule}->GetAtoms()) { 190 if ($Atom->IsHydrogen() && $This->{IgnoreHydrogens}) { 191 next ATOM; 192 } 193 $AtomType = $This->GetAtomType($Atom); 194 if ($AtomType =~ /^None$/i) { 195 return 0; 196 } 197 } 198 199 return 1; 200 } 201 202 # Load atom types data from the specified CSV atom type file into the specified 203 # hash reference. 204 # 205 # The lines starting with # are treated as comments and ignored. First line 206 # not starting with # must contain column labels and the number of columns in 207 # all other data rows must match the number of column labels. 208 # 209 # The first column is assumed to contain atom types; all other columns contain data 210 # as indicated in their column labels. 211 # 212 # In order to avoid dependence of data access on the specified column labels, the 213 # column data is loaded into hash with Column<Num> and AtomType as hash keys; 214 # however, the data for the first column which is treated as AtomTypes is also loaded 215 # into an array with AtomTypes as hash key. The format of the data structure loaded 216 # into a specified hash reference is: 217 # 218 # @{$AtomTypesDataMapRef->{AtomTypes}} - Array of all possible atom types for all atoms 219 # @{$AtomTypesDataMapRef->{NonHydrogenAtomTypes}} - Array of all possible atom types for non-hydrogen atoms 220 # @{$AtomTypesDataMapRef->{ColLabels}} - Array of column labels 221 # %{$AtomTypesDataMapRef->{DataCol<Num>}} - Hash keys pair: <DataCol<Num>, AtomType> 222 # 223 # Caveats: 224 # . The column number start from 1. 225 # . Column data for first column is not loaded into <Column<Num>, AtomType> hash keys pairs. 226 # 227 # AtomType file examples: SYBYLAtomTypes.csv, DREIDINGAtomTypes.csv, 228 # MMFF94AtomTypes.csv etc. 229 # 230 # This functionality can be either invoked as a class function or an 231 # object method. 232 # 233 sub LoadAtomTypesData { 234 my($FirstParameter, @OtherParamaters) = @_; 235 my($AtomTypesDataFile, $AtomTypesDataMapRef, $InDelim, $Line, $NumOfCols, $ColIndex, $ColNum, $ColLabel, $ColValue, $AtomType, %AtomTypes, @LineWords, @ColLabels, @ColDataLabels); 236 237 if (Scalar::Util::blessed($FirstParameter)) { 238 ($AtomTypesDataFile, $AtomTypesDataMapRef) = @OtherParamaters; 239 } 240 else { 241 ($AtomTypesDataFile, $AtomTypesDataMapRef) = @_; 242 } 243 244 $InDelim = "\,"; 245 open ATOMTYPESDATAFILE, "$AtomTypesDataFile" or croak "Couldn't open $AtomTypesDataFile: $! ..."; 246 247 # Skip lines up to column labels... 248 LINE: while ($Line = TextUtil::GetTextLine(\*ATOMTYPESDATAFILE)) { 249 if ($Line !~ /^#/) { 250 last LINE; 251 } 252 } 253 254 # Initialize data map... 255 %{$AtomTypesDataMapRef} = (); 256 @{$AtomTypesDataMapRef->{AtomTypes}} = (); 257 @{$AtomTypesDataMapRef->{NonHydrogenAtomTypes}} = (); 258 @{$AtomTypesDataMapRef->{ColLabels}} = (); 259 260 %AtomTypes = (); 261 262 # Process column labels... 263 @ColLabels= quotewords($InDelim, 0, $Line); 264 $NumOfCols = @ColLabels; 265 push @{$AtomTypesDataMapRef->{ColLabels}}, @ColLabels; 266 267 # Set up column data labels for storing the data... 268 @ColDataLabels = (); 269 for $ColNum (1 .. $NumOfCols) { 270 $ColLabel = "DataCol${ColNum}"; 271 push @ColDataLabels, $ColLabel; 272 } 273 274 # Initialize column data hash... 275 for $ColIndex (1 .. $#ColDataLabels) { 276 $ColLabel = $ColDataLabels[$ColIndex]; 277 %{$AtomTypesDataMapRef->{$ColLabel}} = (); 278 } 279 280 # Process atom types data assuming first column to be atom type.. 281 LINE: while ($Line = TextUtil::GetTextLine(\*ATOMTYPESDATAFILE)) { 282 if ($Line =~ /^#/) { 283 next LINE; 284 } 285 @LineWords = quotewords($InDelim, 0, $Line); 286 if (@LineWords != $NumOfCols) { 287 croak "Error: The number of data fields, @LineWords, in $AtomTypesDataFile must be $NumOfCols.\nLine: $Line..."; 288 } 289 $AtomType = $LineWords[0]; 290 if (exists $AtomTypes{$AtomType}) { 291 carp "Warning: Ignoring data for atom type, $AtomType, in file $AtomTypesDataFile: It has already been loaded.\nLine: $Line...."; 292 next LINE; 293 } 294 295 $AtomTypes{$AtomType} = $AtomType; 296 push @{$AtomTypesDataMapRef->{AtomTypes}}, $AtomType; 297 298 # Is it a non-hydrogen atom type? 299 if ($AtomType !~ /^H/i || $AtomType =~ /^(HAL|HET|HEV)$/i || $AtomType =~ /^(He4|Ho6|Hf3|Hg1)/) { 300 # Non-hydrogen SYBYL atom types starting with H: HAL, HET, HEV 301 # Non-hydrogen UFF atom types starting with H: He4+4, Ho6+3, Hf3+4, Hg1+2 302 # 303 push @{$AtomTypesDataMapRef->{NonHydrogenAtomTypes}}, $AtomType; 304 } 305 306 # Track column data values... 307 for $ColIndex (1 .. $#LineWords) { 308 $ColLabel = $ColDataLabels[$ColIndex]; $ColValue = $LineWords[$ColIndex]; 309 $AtomTypesDataMapRef->{$ColLabel}{$AtomType} = $ColValue; 310 } 311 } 312 close ATOMTYPESDATAFILE; 313 } 314