Mercurial > repos > deepakjadmin > mayatool3_test2
comparison lib/FileIO/MDLMolFileIO.pm @ 0:4816e4a8ae95 draft default tip
Uploaded
| author | deepakjadmin |
|---|---|
| date | Wed, 20 Jan 2016 09:23:18 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:4816e4a8ae95 |
|---|---|
| 1 package FileIO::MDLMolFileIO; | |
| 2 # | |
| 3 # $RCSfile: MDLMolFileIO.pm,v $ | |
| 4 # $Date: 2015/02/28 20:48:43 $ | |
| 5 # $Revision: 1.32 $ | |
| 6 # | |
| 7 # Author: Manish Sud <msud@san.rr.com> | |
| 8 # | |
| 9 # Copyright (C) 2015 Manish Sud. All rights reserved. | |
| 10 # | |
| 11 # This file is part of MayaChemTools. | |
| 12 # | |
| 13 # MayaChemTools is free software; you can redistribute it and/or modify it under | |
| 14 # the terms of the GNU Lesser General Public License as published by the Free | |
| 15 # Software Foundation; either version 3 of the License, or (at your option) any | |
| 16 # later version. | |
| 17 # | |
| 18 # MayaChemTools is distributed in the hope that it will be useful, but without | |
| 19 # any warranty; without even the implied warranty of merchantability of fitness | |
| 20 # for a particular purpose. See the GNU Lesser General Public License for more | |
| 21 # details. | |
| 22 # | |
| 23 # You should have received a copy of the GNU Lesser General Public License | |
| 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or | |
| 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, | |
| 26 # Boston, MA, 02111-1307, USA. | |
| 27 # | |
| 28 | |
| 29 use strict; | |
| 30 use Carp; | |
| 31 use Exporter; | |
| 32 use Scalar::Util (); | |
| 33 use TextUtil (); | |
| 34 use FileUtil (); | |
| 35 use SDFileUtil (); | |
| 36 use FileIO::FileIO; | |
| 37 use Molecule; | |
| 38 | |
| 39 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); | |
| 40 | |
| 41 @ISA = qw(FileIO::FileIO Exporter); | |
| 42 @EXPORT = qw(); | |
| 43 @EXPORT_OK = qw(IsMDLMolFile); | |
| 44 | |
| 45 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); | |
| 46 | |
| 47 # Setup class variables... | |
| 48 my($ClassName); | |
| 49 _InitializeClass(); | |
| 50 | |
| 51 # Class constructor... | |
| 52 sub new { | |
| 53 my($Class, %NamesAndValues) = @_; | |
| 54 | |
| 55 # Initialize object... | |
| 56 my $This = $Class->SUPER::new(); | |
| 57 bless $This, ref($Class) || $Class; | |
| 58 $This->_InitializeMDLMolFileIO(); | |
| 59 | |
| 60 $This->_InitializeMDLMolFileIOProperties(%NamesAndValues); | |
| 61 | |
| 62 return $This; | |
| 63 } | |
| 64 | |
| 65 # Initialize any local object data... | |
| 66 # | |
| 67 sub _InitializeMDLMolFileIO { | |
| 68 my($This) = @_; | |
| 69 | |
| 70 # Nothing to do: Base class FileIO handles default class variables... | |
| 71 | |
| 72 return $This; | |
| 73 } | |
| 74 | |
| 75 # Initialize class ... | |
| 76 sub _InitializeClass { | |
| 77 #Class name... | |
| 78 $ClassName = __PACKAGE__; | |
| 79 | |
| 80 } | |
| 81 | |
| 82 # Initialize object values... | |
| 83 sub _InitializeMDLMolFileIOProperties { | |
| 84 my($This, %NamesAndValues) = @_; | |
| 85 | |
| 86 # All other property names and values along with all Set/Get<PropertyName> methods | |
| 87 # are implemented on-demand using ObjectProperty class. | |
| 88 | |
| 89 my($Name, $Value, $MethodName); | |
| 90 while (($Name, $Value) = each %NamesAndValues) { | |
| 91 $MethodName = "Set${Name}"; | |
| 92 $This->$MethodName($Value); | |
| 93 } | |
| 94 | |
| 95 if (!exists $NamesAndValues{Name}) { | |
| 96 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying file name..."; | |
| 97 } | |
| 98 | |
| 99 # Make sure it's a MDLMol file... | |
| 100 $Name = $NamesAndValues{Name}; | |
| 101 if (!$This->IsMDLMolFile($Name)) { | |
| 102 croak "Error: ${ClassName}->New: Object can't be instantiated: File, $Name, doesn't appear to be MDLMol format..."; | |
| 103 } | |
| 104 | |
| 105 return $This; | |
| 106 } | |
| 107 | |
| 108 # Is it a MDLMol file? | |
| 109 sub IsMDLMolFile ($;$) { | |
| 110 my($FirstParameter, $SecondParameter) = @_; | |
| 111 my($This, $FileName, $Status); | |
| 112 | |
| 113 if ((@_ == 2) && (_IsMDLMolFileIO($FirstParameter))) { | |
| 114 ($This, $FileName) = ($FirstParameter, $SecondParameter); | |
| 115 } | |
| 116 else { | |
| 117 $FileName = $FirstParameter; | |
| 118 } | |
| 119 | |
| 120 # Check file extension... | |
| 121 $Status = FileUtil::CheckFileType($FileName, "mol"); | |
| 122 | |
| 123 return $Status; | |
| 124 } | |
| 125 | |
| 126 # Read molecule from file and return molecule object... | |
| 127 sub ReadMolecule { | |
| 128 my($This) = @_; | |
| 129 my($FileHandle); | |
| 130 | |
| 131 $FileHandle = $This->GetFileHandle(); | |
| 132 return $This->ParseMoleculeString(SDFileUtil::ReadCmpdString($FileHandle)); | |
| 133 } | |
| 134 | |
| 135 # Write compound data using Molecule object... | |
| 136 sub WriteMolecule { | |
| 137 my($This, $Molecule) = @_; | |
| 138 | |
| 139 if (!(defined($Molecule) && $Molecule->IsMolecule())) { | |
| 140 carp "Warning: ${ClassName}->WriteMolecule: No data written: Molecule object is not specified..."; | |
| 141 return $This; | |
| 142 } | |
| 143 my($FileHandle); | |
| 144 $FileHandle = $This->GetFileHandle(); | |
| 145 | |
| 146 print $FileHandle $This->GenerateMoleculeString($Molecule) . "\n"; | |
| 147 | |
| 148 return $This; | |
| 149 } | |
| 150 | |
| 151 # Retrieve molecule string... | |
| 152 sub ReadMoleculeString { | |
| 153 my($This) = @_; | |
| 154 my($FileHandle); | |
| 155 | |
| 156 $FileHandle = $This->GetFileHandle(); | |
| 157 return SDFileUtil::ReadCmpdString($FileHandle); | |
| 158 } | |
| 159 | |
| 160 # Parse molecule string and return molecule object. ParseMoleculeString supports two invocation methods: class | |
| 161 # method or a package function. | |
| 162 # | |
| 163 sub ParseMoleculeString { | |
| 164 my($FirstParameter, $SecondParameter) = @_; | |
| 165 my($This, $MoleculeString); | |
| 166 | |
| 167 if ((@_ == 2) && (_IsMDLMolFileIO($FirstParameter))) { | |
| 168 ($This, $MoleculeString) = ($FirstParameter, $SecondParameter); | |
| 169 } | |
| 170 else { | |
| 171 $MoleculeString = $FirstParameter; | |
| 172 $This = undef; | |
| 173 } | |
| 174 if (!$MoleculeString) { | |
| 175 return undef; | |
| 176 } | |
| 177 my($LineIndex, @MoleculeLines); | |
| 178 @MoleculeLines = split /\n/, $MoleculeString; | |
| 179 | |
| 180 # Create molecule object and set molecule level native and MDL properties... | |
| 181 # | |
| 182 my($Molecule); | |
| 183 $Molecule = new Molecule(); | |
| 184 | |
| 185 # Set valence model for calculating implicit hydrogens... | |
| 186 $Molecule->SetValenceModel('MDLValenceModel'); | |
| 187 | |
| 188 # Process headers data... | |
| 189 $LineIndex = 0; | |
| 190 my($MoleculeName) = SDFileUtil::ParseCmpdMolNameLine($MoleculeLines[$LineIndex]); | |
| 191 $MoleculeName = TextUtil::RemoveTrailingWhiteSpaces($MoleculeName); | |
| 192 $Molecule->SetName($MoleculeName); | |
| 193 | |
| 194 $LineIndex++; | |
| 195 my($UserInitial, $ProgramName, $Date, $Code, $ScalingFactor1, $ScalingFactor2, $Energy, $RegistryNum) = SDFileUtil::ParseCmpdMiscInfoLine($MoleculeLines[$LineIndex]); | |
| 196 $Molecule->SetProperties('MDLUserInitial' => $UserInitial, 'MDLProgramName' => $ProgramName, 'MDLDate' => $Date, 'MDLCode' => $Code, 'MDLScalingFactor1' => $ScalingFactor1, 'MDLScalingFactor2' => $ScalingFactor2, 'MDLEnergy' => $Energy, 'MDLRegistryNum' => $RegistryNum); | |
| 197 | |
| 198 $LineIndex++; | |
| 199 my($Comments) = SDFileUtil::ParseCmpdCommentsLine($MoleculeLines[$LineIndex]); | |
| 200 $Molecule->SetProperties('MDLComments' => $Comments); | |
| 201 | |
| 202 $LineIndex++; | |
| 203 my($AtomCount, $BondCount, $ChiralFlag, $PropertyCount, $Version) = SDFileUtil::ParseCmpdCountsLine($MoleculeLines[$LineIndex]); | |
| 204 | |
| 205 $Molecule->SetProperties('MDLChiralFlag' => $ChiralFlag, 'MDLPropertyCount' => $PropertyCount, 'MDLVersion' => $Version); | |
| 206 | |
| 207 # Process atom data... | |
| 208 my($FirstAtomLineIndex, $LastAtomLineIndex, $AtomNum, $AtomX, $AtomY, $AtomZ, $AtomSymbol, $MassDifference, $Charge, $StereoParity, $Atom, %AtomNumToAtomMap); | |
| 209 | |
| 210 $AtomNum = 0; | |
| 211 %AtomNumToAtomMap = (); | |
| 212 $FirstAtomLineIndex = 4; $LastAtomLineIndex = $FirstAtomLineIndex + $AtomCount - 1; | |
| 213 | |
| 214 for ($LineIndex = $FirstAtomLineIndex; $LineIndex <= $LastAtomLineIndex; $LineIndex++) { | |
| 215 $AtomNum++; | |
| 216 ($AtomSymbol, $AtomX, $AtomY, $AtomZ, $MassDifference, $Charge, $StereoParity) = SDFileUtil::ParseCmpdAtomLine($MoleculeLines[$LineIndex]); | |
| 217 | |
| 218 $Atom = new Atom('AtomSymbol' => $AtomSymbol, 'XYZ' => [$AtomX, $AtomY, $AtomZ]); | |
| 219 | |
| 220 if ($MassDifference && $MassDifference != 0) { | |
| 221 _ProcessMassDifference($Atom, $MassDifference); | |
| 222 } | |
| 223 if ($Charge && $Charge != 0) { | |
| 224 _ProcessCharge($Atom, $Charge); | |
| 225 } | |
| 226 if ($StereoParity && $StereoParity != 0) { | |
| 227 _ProcessStereoParity($Atom, $StereoParity); | |
| 228 } | |
| 229 | |
| 230 $AtomNumToAtomMap{$AtomNum} = $Atom; | |
| 231 $Molecule->AddAtom($Atom); | |
| 232 } | |
| 233 | |
| 234 # Process bond data... | |
| 235 my($FirstBondLineIndex, $LastBondLineIndex, $FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo, $InternalBondOrder, $InternalBondType, $Bond, $Atom1, $Atom2); | |
| 236 | |
| 237 $FirstBondLineIndex = $FirstAtomLineIndex + $AtomCount; | |
| 238 $LastBondLineIndex = $FirstAtomLineIndex + $AtomCount + $BondCount - 1; | |
| 239 | |
| 240 for ($LineIndex = $FirstBondLineIndex; $LineIndex <= $LastBondLineIndex; $LineIndex++) { | |
| 241 ($FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo) = SDFileUtil::ParseCmpdBondLine($MoleculeLines[$LineIndex]); | |
| 242 | |
| 243 $Atom1 = $AtomNumToAtomMap{$FirstAtomNum}; | |
| 244 $Atom2 = $AtomNumToAtomMap{$SecondAtomNum}; | |
| 245 | |
| 246 ($InternalBondOrder, $InternalBondType) = SDFileUtil::MDLBondTypeToInternalBondOrder($BondType); | |
| 247 $Bond = new Bond('Atoms' => [$Atom1, $Atom2], 'BondOrder' => $InternalBondOrder); | |
| 248 $Bond->SetBondType($InternalBondType); | |
| 249 | |
| 250 if ($BondStereo && $BondStereo != 0) { | |
| 251 _ProcessBondStereo($Bond, $BondStereo); | |
| 252 } | |
| 253 | |
| 254 $Molecule->AddBond($Bond); | |
| 255 } | |
| 256 | |
| 257 # Process available property block lines starting with A aaa, M CHG, M ISO and M RAD. All other property blocks | |
| 258 # lines are for query or specific display purposes and are ignored for now. | |
| 259 # | |
| 260 # | |
| 261 my($PropertyLineIndex, $PropertyLine, $FirstChargeOrRadicalLine, @ValuePairs); | |
| 262 | |
| 263 $PropertyLineIndex = $FirstAtomLineIndex + $AtomCount + $BondCount; | |
| 264 $PropertyLine = $MoleculeLines[$PropertyLineIndex]; | |
| 265 $FirstChargeOrRadicalLine = 1; | |
| 266 | |
| 267 PROPERTYLINE: while ($PropertyLine !~ /^M END/i ) { | |
| 268 if ($PropertyLine =~ /\$\$\$\$/) { | |
| 269 last PROPERTYLINE; | |
| 270 } | |
| 271 if ($PropertyLine =~ /^(M CHG|M RAD)/i) { | |
| 272 if ($FirstChargeOrRadicalLine) { | |
| 273 $FirstChargeOrRadicalLine = 0; | |
| 274 _ZeroOutAtomsChargeAndRadicalValues(\%AtomNumToAtomMap); | |
| 275 } | |
| 276 if ($PropertyLine =~ /^M CHG/i) { | |
| 277 @ValuePairs = SDFileUtil::ParseCmpdChargePropertyLine($PropertyLine); | |
| 278 _ProcessChargeProperty(\@ValuePairs, \%AtomNumToAtomMap); | |
| 279 } | |
| 280 elsif ($PropertyLine =~ /^M RAD/i) { | |
| 281 @ValuePairs = SDFileUtil::ParseCmpdRadicalPropertyLine($PropertyLine); | |
| 282 _ProcessRadicalProperty(\@ValuePairs, \%AtomNumToAtomMap); | |
| 283 } | |
| 284 } | |
| 285 elsif ($PropertyLine =~ /^M ISO/i) { | |
| 286 @ValuePairs = SDFileUtil::ParseCmpdIsotopePropertyLine($PropertyLine); | |
| 287 _ProcessIsotopeProperty(\@ValuePairs, \%AtomNumToAtomMap); | |
| 288 } | |
| 289 elsif ($PropertyLine =~ /^A /i) { | |
| 290 my($NextPropertyLine); | |
| 291 $PropertyLineIndex++; | |
| 292 $NextPropertyLine = $MoleculeLines[$PropertyLineIndex]; | |
| 293 @ValuePairs = SDFileUtil::ParseCmpdAtomAliasPropertyLine($PropertyLine, $NextPropertyLine); | |
| 294 _ProcessAtomAliasProperty(\@ValuePairs, \%AtomNumToAtomMap); | |
| 295 } | |
| 296 $PropertyLineIndex++; | |
| 297 $PropertyLine = $MoleculeLines[$PropertyLineIndex]; | |
| 298 } | |
| 299 # Store input molecule string as generic property of molecule... | |
| 300 $Molecule->SetInputMoleculeString($MoleculeString); | |
| 301 | |
| 302 return $Molecule; | |
| 303 } | |
| 304 | |
| 305 # Generate molecule string using molecule object... | |
| 306 sub GenerateMoleculeString { | |
| 307 my($FirstParameter, $SecondParameter) = @_; | |
| 308 my($This, $Molecule); | |
| 309 | |
| 310 if ((@_ == 2) && (_IsMDLMolFileIO($FirstParameter))) { | |
| 311 ($This, $Molecule) = ($FirstParameter, $SecondParameter); | |
| 312 } | |
| 313 else { | |
| 314 $Molecule = $FirstParameter; | |
| 315 $This = undef; | |
| 316 } | |
| 317 if (!defined($Molecule)) { | |
| 318 return undef; | |
| 319 } | |
| 320 my(@MoleculeLines); | |
| 321 @MoleculeLines = (); | |
| 322 | |
| 323 # First line: Molname line... | |
| 324 push @MoleculeLines, SDFileUtil::GenerateCmpdMolNameLine($Molecule->GetName()); | |
| 325 | |
| 326 # Second line: Misc info... | |
| 327 my($ProgramName, $UserInitial, $Code); | |
| 328 $ProgramName = ''; $UserInitial = ''; $Code = ''; | |
| 329 | |
| 330 $Code = $Molecule->IsThreeDimensional() ? '3D' : '2D'; | |
| 331 | |
| 332 push @MoleculeLines, SDFileUtil::GenerateCmpdMiscInfoLine($ProgramName, $UserInitial, $Code); | |
| 333 | |
| 334 # Third line: Comments line... | |
| 335 my($Comments); | |
| 336 $Comments = $Molecule->HasProperty('MDLComments') ? $Molecule->GetMDLComments() : ($Molecule->HasProperty('Comments') ? $Molecule->GetComments() : ''); | |
| 337 push @MoleculeLines, SDFileUtil::GenerateCmpdCommentsLine($Comments); | |
| 338 | |
| 339 # Fourth line: Counts line for V2000 | |
| 340 my($AtomCount, $BondCount, $ChiralFlag); | |
| 341 $AtomCount = $Molecule->GetNumOfAtoms(); | |
| 342 $BondCount = $Molecule->GetNumOfBonds(); | |
| 343 $ChiralFlag = 0; | |
| 344 push @MoleculeLines, SDFileUtil::GenerateCmpdCountsLine($AtomCount, $BondCount, $ChiralFlag); | |
| 345 | |
| 346 # Atom lines... | |
| 347 my($Atom, $AtomSymbol, $AtomX, $AtomY, $AtomZ, $MassDifference, $Charge, $StereoParity, $AtomNum, $AtomID, @Atoms, %AtomIDToNum); | |
| 348 my($ChargePropertyValue, $IsotopePropertyValue, $RadicalPropertyValue, $AtomAliasPropertyValue, @IsotopePropertyValuePairs, @ChargePropertyValuePairs, @RadicalPropertyValuePairs, @AtomAliasPropertyValuePairs); | |
| 349 | |
| 350 @ChargePropertyValuePairs = (); | |
| 351 @IsotopePropertyValuePairs = (); | |
| 352 @RadicalPropertyValuePairs = (); | |
| 353 @AtomAliasPropertyValuePairs = (); | |
| 354 | |
| 355 @Atoms = $Molecule->GetAtoms(); | |
| 356 | |
| 357 $AtomNum = 0; | |
| 358 for $Atom (@Atoms) { | |
| 359 $AtomNum++; | |
| 360 $AtomID = $Atom->GetID(); | |
| 361 $AtomIDToNum{$AtomID} = $AtomNum; | |
| 362 | |
| 363 $AtomSymbol = $Atom->GetAtomSymbol(); | |
| 364 ($AtomX, $AtomY, $AtomZ) = $Atom->GetXYZ(); | |
| 365 | |
| 366 # Setup mass difference... | |
| 367 $MassDifference = _GetMassDifference($Atom); | |
| 368 if ($MassDifference) { | |
| 369 # Hold it for M ISO property lines... | |
| 370 $IsotopePropertyValue = _GetIsotopePropertyValue($Atom); | |
| 371 if ($IsotopePropertyValue) { | |
| 372 push @IsotopePropertyValuePairs, ($AtomNum, $IsotopePropertyValue); | |
| 373 } | |
| 374 } | |
| 375 | |
| 376 # Setup charge... | |
| 377 $Charge = _GetCharge($Atom); | |
| 378 if ($Charge) { | |
| 379 # Hold it for M CHG property lines... | |
| 380 $ChargePropertyValue = _GetChargePropertyValue($Atom); | |
| 381 if ($ChargePropertyValue) { | |
| 382 push @ChargePropertyValuePairs, ($AtomNum, $ChargePropertyValue); | |
| 383 } | |
| 384 } | |
| 385 | |
| 386 # Hold any radical values for for M RAD property lines... | |
| 387 $RadicalPropertyValue = _GetRadicalPropertyValue($Atom); | |
| 388 if ($RadicalPropertyValue) { | |
| 389 push @RadicalPropertyValuePairs, ($AtomNum, $RadicalPropertyValue); | |
| 390 } | |
| 391 | |
| 392 # Hold any atom alias value for A xxx property lines.... | |
| 393 $AtomAliasPropertyValue = _GetAtomAliasPropertyValue($Atom); | |
| 394 if ($AtomAliasPropertyValue) { | |
| 395 push @AtomAliasPropertyValuePairs, ($AtomNum, $AtomAliasPropertyValue); | |
| 396 | |
| 397 # Set AtomSymbol to carbon as atom alias would override its value during parsing... | |
| 398 $AtomSymbol = "C"; | |
| 399 } | |
| 400 | |
| 401 # Setup stereo parity... | |
| 402 $StereoParity = _GetStereoParity($Atom); | |
| 403 | |
| 404 push @MoleculeLines, SDFileUtil::GenerateCmpdAtomLine($AtomSymbol, $AtomX, $AtomY, $AtomZ, $MassDifference, $Charge, $StereoParity); | |
| 405 } | |
| 406 | |
| 407 # Bond lines... | |
| 408 my($FirstAtomID, $FirstAtom, $FirstAtomNum, $SecondAtomID, $SecondAtom, $SecondAtomNum, $MDLBondType, $BondOrder, $BondType, $MDLBondStereo, $Bond, @Bonds); | |
| 409 for $FirstAtom (@Atoms) { | |
| 410 $FirstAtomID = $FirstAtom->GetID(); | |
| 411 $FirstAtomNum = $AtomIDToNum{$FirstAtomID}; | |
| 412 | |
| 413 @Bonds = (); | |
| 414 @Bonds = $FirstAtom->GetBonds(); | |
| 415 BOND: for $Bond (@Bonds) { | |
| 416 $SecondAtom = $Bond->GetBondedAtom($FirstAtom); | |
| 417 $SecondAtomID = $SecondAtom->GetID(); | |
| 418 $SecondAtomNum = $AtomIDToNum{$SecondAtomID}; | |
| 419 if ($FirstAtomNum >= $SecondAtomNum) { | |
| 420 next BOND; | |
| 421 } | |
| 422 # Setup BondType... | |
| 423 $BondOrder = $Bond->GetBondOrder(); | |
| 424 $BondType = $Bond->GetBondType(); | |
| 425 $MDLBondType = SDFileUtil::InternalBondOrderToMDLBondType($BondOrder, $BondType); | |
| 426 | |
| 427 # Setup BondStereo... | |
| 428 $MDLBondStereo = _GetBondStereo($Bond); | |
| 429 | |
| 430 push @MoleculeLines, SDFileUtil::GenerateCmpdBondLine($FirstAtomNum, $SecondAtomNum, $MDLBondType, $MDLBondStereo); | |
| 431 } | |
| 432 } | |
| 433 # Property lines... | |
| 434 if (@IsotopePropertyValuePairs) { | |
| 435 push @MoleculeLines, SDFileUtil::GenerateCmpdIsotopePropertyLines(\@IsotopePropertyValuePairs); | |
| 436 } | |
| 437 if (@ChargePropertyValuePairs) { | |
| 438 push @MoleculeLines, SDFileUtil::GenerateCmpdChargePropertyLines(\@ChargePropertyValuePairs); | |
| 439 } | |
| 440 if (@RadicalPropertyValuePairs) { | |
| 441 push @MoleculeLines, SDFileUtil::GenerateCmpdRadicalPropertyLines(\@RadicalPropertyValuePairs); | |
| 442 } | |
| 443 if (@AtomAliasPropertyValuePairs) { | |
| 444 push @MoleculeLines, SDFileUtil::GenerateCmpdAtomAliasPropertyLines(\@AtomAliasPropertyValuePairs); | |
| 445 } | |
| 446 | |
| 447 push @MoleculeLines, "M END"; | |
| 448 | |
| 449 return join "\n", @MoleculeLines; | |
| 450 } | |
| 451 | |
| 452 # Process MassDifference value and set atom's mass number... | |
| 453 # | |
| 454 sub _ProcessMassDifference { | |
| 455 my($Atom, $MassDifference) = @_; | |
| 456 my($MassNumber, $NewMassNumber, $AtomicNumber); | |
| 457 | |
| 458 $AtomicNumber = $Atom->GetAtomicNumber(); | |
| 459 | |
| 460 if (!$AtomicNumber) { | |
| 461 carp "Warning: ${ClassName}->_ProcessMassDifference: Ignoring specified mass difference value, $MassDifference, in SD file: Assigned to non standard element..."; | |
| 462 return; | |
| 463 } | |
| 464 $MassNumber = $Atom->GetMassNumber(); | |
| 465 if (!$MassDifference) { | |
| 466 carp "Warning: ${ClassName}->_ProcessMassDifference: Ignoring specified mass difference value, $MassDifference, in SD file: Unknown MassNumber value..."; | |
| 467 return; | |
| 468 } | |
| 469 $NewMassNumber = $MassNumber + $MassDifference; | |
| 470 if (!PeriodicTable::IsElementNaturalIsotopeMassNumber($AtomicNumber, $NewMassNumber)) { | |
| 471 my($AtomSymbol) = $Atom->GetAtomSymbol(); | |
| 472 carp "Warning: ${ClassName}->_ProcessMassDifference: Unknown mass number, $MassNumber, corresponding to specified mass difference value, $MassDifference, in SD for atom with atomic number, $AtomicNumber, and atomic symbol, $AtomSymbol. The mass number value has been assigned. Don't forget to Set ExactMass property explicitly; otherwise, GetExactMass method would return mass of most abundant isotope...\n"; | |
| 473 } | |
| 474 | |
| 475 # Use SetProperty method instead of SetMassNumber to skip explicit checks on MassNumber value... | |
| 476 $Atom->SetProperty('MassNumber', $NewMassNumber); | |
| 477 } | |
| 478 | |
| 479 # Get mass difference value... | |
| 480 sub _GetMassDifference { | |
| 481 my($Atom) = @_; | |
| 482 my($MassDifference, $MassNumber, $MostAbundantMassNumber, $AtomicNumber); | |
| 483 | |
| 484 $MassDifference = 0; | |
| 485 $MassNumber = $Atom->GetMassNumber(); | |
| 486 if (defined $MassNumber) { | |
| 487 $AtomicNumber = $Atom->GetAtomicNumber(); | |
| 488 if (defined $AtomicNumber) { | |
| 489 $MostAbundantMassNumber = PeriodicTable::GetElementMostAbundantNaturalIsotopeMassNumber($AtomicNumber); | |
| 490 if (defined($MostAbundantMassNumber) && $MassNumber != $MostAbundantMassNumber) { | |
| 491 $MassDifference = $MassNumber - $MostAbundantMassNumber; | |
| 492 } | |
| 493 } | |
| 494 } | |
| 495 return $MassDifference; | |
| 496 } | |
| 497 | |
| 498 # Process formal charge value and assign it to atom as formal charge... | |
| 499 sub _ProcessCharge { | |
| 500 my($Atom, $Charge) = @_; | |
| 501 my($InternalCharge); | |
| 502 | |
| 503 $InternalCharge = SDFileUtil::MDLChargeToInternalCharge($Charge); | |
| 504 $Atom->SetFormalCharge($InternalCharge); | |
| 505 } | |
| 506 | |
| 507 # Get MDL formal charge value ... | |
| 508 sub _GetCharge { | |
| 509 my($Atom) = @_; | |
| 510 my($InternalCharge, $Charge); | |
| 511 | |
| 512 $Charge = 0; | |
| 513 if ($Atom->HasProperty('FormalCharge')) { | |
| 514 $InternalCharge = $Atom->GetFormalCharge(); | |
| 515 if ($InternalCharge) { | |
| 516 $Charge = SDFileUtil::InternalChargeToMDLCharge($InternalCharge); | |
| 517 } | |
| 518 } | |
| 519 return $Charge; | |
| 520 } | |
| 521 | |
| 522 # Process stereo parity value and assign it to atom as MDL property... | |
| 523 # | |
| 524 # Notes: | |
| 525 # . Mark atom as chiral center | |
| 526 # . Assign any explicit Clockwise (parity 1), CounterClockwise (parity 2) or either value (parity 3) as property of atom. | |
| 527 # . MDL values of Clockwise and CounterClockwise don't correspond to priority assigned to ligands around | |
| 528 # stereo center using CIP scheme; consequently, these values can't be used to set internal Stereochemistry for | |
| 529 # an atom. | |
| 530 # | |
| 531 sub _ProcessStereoParity { | |
| 532 my($Atom, $StereoParity) = @_; | |
| 533 | |
| 534 $Atom->SetStereoCenter('1'); | |
| 535 $Atom->SetMDLStereoParity($StereoParity); | |
| 536 } | |
| 537 | |
| 538 # Set stereo parity value to zero for now: The current release of MayaChemTools hasn't implemented | |
| 539 # functionality to determine chirality. | |
| 540 # | |
| 541 sub _GetStereoParity { | |
| 542 my($Atom) = @_; | |
| 543 my($StereoParity); | |
| 544 | |
| 545 $StereoParity = 0; | |
| 546 | |
| 547 return $StereoParity; | |
| 548 } | |
| 549 | |
| 550 # Process bond stereo value... | |
| 551 sub _ProcessBondStereo { | |
| 552 my($Bond, $BondStereo) = @_; | |
| 553 my($InternalBondStereo); | |
| 554 | |
| 555 $InternalBondStereo = SDFileUtil::MDLBondStereoToInternalBondStereochemistry($BondStereo); | |
| 556 if ($InternalBondStereo) { | |
| 557 $Bond->SetBondStereochemistry($InternalBondStereo); | |
| 558 } | |
| 559 } | |
| 560 | |
| 561 # Get MDLBondStereo value... | |
| 562 sub _GetBondStereo { | |
| 563 my($Bond) = @_; | |
| 564 my($InternalBondStereo, $BondStereo); | |
| 565 | |
| 566 $BondStereo = 0; | |
| 567 | |
| 568 $InternalBondStereo = ''; | |
| 569 BONDSTEREO: { | |
| 570 if ($Bond->IsUp()) { | |
| 571 $InternalBondStereo = 'Up'; | |
| 572 last BONDSTEREO; | |
| 573 } | |
| 574 if ($Bond->IsDown()) { | |
| 575 $InternalBondStereo = 'Down'; | |
| 576 last BONDSTEREO; | |
| 577 } | |
| 578 if ($Bond->IsUpOrDown()) { | |
| 579 $InternalBondStereo = 'UpOrDown'; | |
| 580 last BONDSTEREO; | |
| 581 } | |
| 582 if ($Bond->IsCisOrTrans() || $Bond->IsCis() || $Bond->IsTrans()) { | |
| 583 $InternalBondStereo = 'CisOrTrans'; | |
| 584 last BONDSTEREO; | |
| 585 } | |
| 586 $InternalBondStereo = ''; | |
| 587 } | |
| 588 | |
| 589 if ($InternalBondStereo) { | |
| 590 $BondStereo = SDFileUtil::InternalBondStereochemistryToMDLBondStereo($InternalBondStereo); | |
| 591 } | |
| 592 | |
| 593 return $BondStereo; | |
| 594 } | |
| 595 | |
| 596 # Zero out charge and radical values specified for atoms... | |
| 597 sub _ZeroOutAtomsChargeAndRadicalValues { | |
| 598 my($AtomNumToAtomMapRef) = @_; | |
| 599 my($Atom); | |
| 600 | |
| 601 for $Atom (values %{$AtomNumToAtomMapRef}) { | |
| 602 if ($Atom->HasProperty('FormalCharge')) { | |
| 603 $Atom->DeleteProperty('FormalCharge'); | |
| 604 } | |
| 605 elsif ($Atom->HasProperty('SpinMultiplicity')) { | |
| 606 $Atom->DeleteProperty('SpinMultiplicity'); | |
| 607 } | |
| 608 } | |
| 609 } | |
| 610 | |
| 611 # Process charge property value pairs... | |
| 612 sub _ProcessChargeProperty { | |
| 613 my($ValuePairsRef, $AtomNumToAtomMapRef) = @_; | |
| 614 | |
| 615 if (!(defined($ValuePairsRef) && @{$ValuePairsRef})) { | |
| 616 return; | |
| 617 } | |
| 618 my($Index, $ValuePairsCount, $AtomNum, $Charge, $Atom); | |
| 619 | |
| 620 $ValuePairsCount = scalar @{$ValuePairsRef}; | |
| 621 VALUEPAIRS: for ($Index = 0; $Index < $ValuePairsCount; $Index +=2) { | |
| 622 $AtomNum = $ValuePairsRef->[$Index]; $Charge = $ValuePairsRef->[$Index + 1]; | |
| 623 if (!$Charge) { | |
| 624 next VALUEPAIRS; | |
| 625 } | |
| 626 if (!exists $AtomNumToAtomMapRef->{$AtomNum}) { | |
| 627 next VALUEPAIRS; | |
| 628 } | |
| 629 $Atom = $AtomNumToAtomMapRef->{$AtomNum}; | |
| 630 if ($Atom->HasProperty('SpinMultiplicity')) { | |
| 631 carp "Warning: ${ClassName}->_ProcessChargeProperty: Setting formal charge on atom number, $AtomNum, with already assigned spin multiplicity value..."; | |
| 632 } | |
| 633 $Atom->SetFormalCharge($Charge); | |
| 634 } | |
| 635 } | |
| 636 | |
| 637 # Get charge property value for an atom... | |
| 638 sub _GetChargePropertyValue { | |
| 639 my($Atom) = @_; | |
| 640 my($Charge); | |
| 641 | |
| 642 $Charge = 0; | |
| 643 if ($Atom->HasProperty('FormalCharge')) { | |
| 644 $Charge = $Atom->GetFormalCharge(); | |
| 645 } | |
| 646 return $Charge; | |
| 647 } | |
| 648 | |
| 649 # Process charge property value pairs... | |
| 650 sub _ProcessRadicalProperty { | |
| 651 my($ValuePairsRef, $AtomNumToAtomMapRef) = @_; | |
| 652 | |
| 653 if (!(defined($ValuePairsRef) && @{$ValuePairsRef})) { | |
| 654 return; | |
| 655 } | |
| 656 my($Index, $ValuePairsCount, $AtomNum, $Radical, $SpinMultiplicity, $Atom); | |
| 657 | |
| 658 $ValuePairsCount = scalar @{$ValuePairsRef}; | |
| 659 VALUEPAIRS: for ($Index = 0; $Index < $ValuePairsCount; $Index +=2) { | |
| 660 $AtomNum = $ValuePairsRef->[$Index]; $Radical = $ValuePairsRef->[$Index + 1]; | |
| 661 if (!$Radical) { | |
| 662 next VALUEPAIRS; | |
| 663 } | |
| 664 if (!exists $AtomNumToAtomMapRef->{$AtomNum}) { | |
| 665 next VALUEPAIRS; | |
| 666 } | |
| 667 $Atom = $AtomNumToAtomMapRef->{$AtomNum}; | |
| 668 if ($Atom->HasProperty('FormalCharge')) { | |
| 669 carp "Warning: ${ClassName}->_ProcessRadicalProperty: Setting spin multiplicity on atom number, $AtomNum, with already assigned formal charge value..."; | |
| 670 } | |
| 671 $SpinMultiplicity = SDFileUtil::MDLRadicalToInternalSpinMultiplicity($Radical); | |
| 672 $Atom->SetSpinMultiplicity($SpinMultiplicity); | |
| 673 } | |
| 674 } | |
| 675 | |
| 676 # Get radical property value for an atom... | |
| 677 sub _GetRadicalPropertyValue { | |
| 678 my($Atom) = @_; | |
| 679 my($Radical, $SpinMultiplicity); | |
| 680 | |
| 681 $Radical = 0; | |
| 682 if ($Atom->HasProperty('SpinMultiplicity')) { | |
| 683 $SpinMultiplicity = $Atom->GetSpinMultiplicity(); | |
| 684 $Radical = SDFileUtil::InternalSpinMultiplicityToMDLRadical($SpinMultiplicity); | |
| 685 } | |
| 686 return $Radical; | |
| 687 } | |
| 688 | |
| 689 # Process isotope property value pairs... | |
| 690 sub _ProcessIsotopeProperty { | |
| 691 my($ValuePairsRef, $AtomNumToAtomMapRef) = @_; | |
| 692 | |
| 693 if (!(defined($ValuePairsRef) && @{$ValuePairsRef})) { | |
| 694 return; | |
| 695 } | |
| 696 my($Index, $ValuePairsCount, $AtomNum, $MassNumber, $Atom, $AtomicNumber); | |
| 697 | |
| 698 $ValuePairsCount = scalar @{$ValuePairsRef}; | |
| 699 VALUEPAIRS: for ($Index = 0; $Index < $ValuePairsCount; $Index +=2) { | |
| 700 $AtomNum = $ValuePairsRef->[$Index]; $MassNumber = $ValuePairsRef->[$Index + 1]; | |
| 701 if (!$MassNumber) { | |
| 702 next VALUEPAIRS; | |
| 703 } | |
| 704 if (!exists $AtomNumToAtomMapRef->{$AtomNum}) { | |
| 705 next VALUEPAIRS; | |
| 706 } | |
| 707 $Atom = $AtomNumToAtomMapRef->{$AtomNum}; | |
| 708 $AtomicNumber = $Atom->GetAtomicNumber(); | |
| 709 | |
| 710 if (!PeriodicTable::IsElementNaturalIsotopeMassNumber($AtomicNumber, $MassNumber)) { | |
| 711 my($AtomSymbol) = $Atom->GetAtomSymbol(); | |
| 712 carp "Warning: ${ClassName}->_ProcessProcessIsotopeProperty: Unknown mass number, $MassNumber, specified on M ISO property line for atom number, $AtomNum, in SD for atom with atomic number, $AtomicNumber, and atomic symbol, $AtomSymbol. The mass number value has been assigned. Don't forget to Set ExactMass property explicitly; otherwise, GetExactMass method would return mass of most abundant isotope...\n"; | |
| 713 } | |
| 714 | |
| 715 # Use SetProperty method instead of SetMassNumber to skip explicit checks on MassNumber value... | |
| 716 $Atom->SetProperty('MassNumber', $MassNumber); | |
| 717 } | |
| 718 } | |
| 719 | |
| 720 # Get isotope property value for an atom... | |
| 721 sub _GetIsotopePropertyValue { | |
| 722 my($Atom) = @_; | |
| 723 my($MassNumber); | |
| 724 | |
| 725 $MassNumber = 0; | |
| 726 if ($Atom->HasProperty('MassNumber')) { | |
| 727 $MassNumber = $Atom->GetMassNumber(); | |
| 728 } | |
| 729 return $MassNumber; | |
| 730 } | |
| 731 | |
| 732 # Process atom alias property value pairs... | |
| 733 sub _ProcessAtomAliasProperty { | |
| 734 my($ValuePairsRef, $AtomNumToAtomMapRef) = @_; | |
| 735 | |
| 736 if (!(defined($ValuePairsRef) && @{$ValuePairsRef})) { | |
| 737 return; | |
| 738 } | |
| 739 my($Index, $ValuePairsCount, $AtomNum, $AtomAlias, $Atom); | |
| 740 | |
| 741 $ValuePairsCount = scalar @{$ValuePairsRef}; | |
| 742 VALUEPAIRS: for ($Index = 0; $Index < $ValuePairsCount; $Index +=2) { | |
| 743 $AtomNum = $ValuePairsRef->[$Index]; $AtomAlias = $ValuePairsRef->[$Index + 1]; | |
| 744 if (!$AtomNum) { | |
| 745 next VALUEPAIRS; | |
| 746 } | |
| 747 if (!exists $AtomNumToAtomMapRef->{$AtomNum}) { | |
| 748 next VALUEPAIRS; | |
| 749 } | |
| 750 $AtomAlias = TextUtil::RemoveLeadingAndTrailingWhiteSpaces($AtomAlias); | |
| 751 if (TextUtil::IsEmpty($AtomAlias)) { | |
| 752 carp("Warning: ${ClassName}->_ProcessAtomAliasProperty: Ignoring atom alias property line: No Atom alias value specified..."); | |
| 753 next VALUEPAIRS; | |
| 754 } | |
| 755 | |
| 756 # Set atom symbol to atom alias which sets atomic number automatically... | |
| 757 $Atom = $AtomNumToAtomMapRef->{$AtomNum}; | |
| 758 $Atom->SetAtomSymbol($AtomAlias); | |
| 759 | |
| 760 $Atom->SetProperty('AtomAlias', $AtomAlias); | |
| 761 } | |
| 762 } | |
| 763 | |
| 764 # Get atom alias property value for an atom... | |
| 765 sub _GetAtomAliasPropertyValue { | |
| 766 my($Atom) = @_; | |
| 767 my($AtomAlias); | |
| 768 | |
| 769 $AtomAlias = undef; | |
| 770 if ($Atom->HasProperty('AtomAlias')) { | |
| 771 $AtomAlias = $Atom->GetAtomAlias(); | |
| 772 } | |
| 773 return $AtomAlias; | |
| 774 } | |
| 775 | |
| 776 # Is it a MDLMolFileIO object? | |
| 777 sub _IsMDLMolFileIO { | |
| 778 my($Object) = @_; | |
| 779 | |
| 780 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0; | |
| 781 } | |
| 782 | |
| 783 | |
| 784 1; | |
| 785 | |
| 786 __END__ | |
| 787 | |
| 788 =head1 NAME | |
| 789 | |
| 790 MDLMolFileIO | |
| 791 | |
| 792 =head1 SYNOPSIS | |
| 793 | |
| 794 use FileIO::MDLMolFileIO; | |
| 795 | |
| 796 use FileIO::MDLMolFileIO qw(:all); | |
| 797 | |
| 798 =head1 DESCRIPTION | |
| 799 | |
| 800 B<MDLMolFIleIO> class provides the following methods: | |
| 801 | |
| 802 new, GenerateMoleculeString, IsMDLMolFile, ParseMoleculeString, ReadMolecule, | |
| 803 ReadMoleculeString, WriteMolecule | |
| 804 | |
| 805 The following methods can also be used as functions: | |
| 806 | |
| 807 GenerateMoleculeString, IsMDLMolFile, ParseMoleculeString | |
| 808 | |
| 809 Data specific to B<MDLMolFileIO> class not directly used by B<Molecule>, B<Atom> and | |
| 810 B<Bond> objects - data label/value pairs, atom SteroParity and so on - is associated to | |
| 811 and retrieved from appropriate objects using following methods: | |
| 812 | |
| 813 SetMDL<PropertyName> | |
| 814 GetMDL<PropertyName>. | |
| 815 | |
| 816 B<MDLMolFileIO> class is derived from I<FileIO> class and uses its methods to support | |
| 817 generic file related functionality. | |
| 818 | |
| 819 =head2 METHODS | |
| 820 | |
| 821 =over 4 | |
| 822 | |
| 823 =item B<new> | |
| 824 | |
| 825 $NewMDLMolFileIO = new FileIO::MDLMolFileIO(%NamesAndValues); | |
| 826 | |
| 827 Using specified I<MDLMolFileIO> property names and values hash, B<new> method creates a new object | |
| 828 and returns a reference to newly created B<MDLMolFileIO> object. | |
| 829 | |
| 830 =item B<GenerateMoleculeString> | |
| 831 | |
| 832 $MoleculeString = $MDLMolFileIO->GenerateMoleculeString($Molecule); | |
| 833 $MoleculeString = FileIO::MDLMolFileIO::GenerateMoleculeString($Molecule); | |
| 834 | |
| 835 Returns a B<MoleculeString> in MDLMol format corresponding to I<Molecule>. | |
| 836 | |
| 837 =item B<IsMDLMolFile> | |
| 838 | |
| 839 $Status = $MDLMolFileIO->IsMDLMolFile($FileName); | |
| 840 $Status = FileIO::MDLMolFileIO::IsMDLMolFile($FileName); | |
| 841 | |
| 842 Returns 1 or 0 based on whether I<FileName> is a MDLMol file. | |
| 843 | |
| 844 =item B<ParseMoleculeString> | |
| 845 | |
| 846 $Molecule = $MDLMolFileIO->ParseMoleculeString($MoleculeString); | |
| 847 $Molecule = FileIO::MDLMolFileIO::ParseMoleculeString($MoleculeString); | |
| 848 | |
| 849 Parses I<MoleculeString> and returns a B<Molecule> object. | |
| 850 | |
| 851 =item B<ReadMolecule> | |
| 852 | |
| 853 $Molecule = $MDLMolFileIO->ReadMolecule($FileHandle); | |
| 854 | |
| 855 Reads data for the compound in a file using already opened I<FileHandle>, creates, | |
| 856 and returns a B<Molecule> object. | |
| 857 | |
| 858 =item B<ReadMoleculeString> | |
| 859 | |
| 860 $MoleculeString = $MDLMolFileIO->ReadMoleculeString($FileHandle); | |
| 861 | |
| 862 Reads data for the compound in a file using already opened I<FileHandle> and | |
| 863 returns a B<MoleculeString> corresponding to compound structure and other associated | |
| 864 data. | |
| 865 | |
| 866 =item B<WriteMolecule> | |
| 867 | |
| 868 $MDLMolFileIO->WriteMolecule($Molecule); | |
| 869 | |
| 870 Writes I<Molecule> data to a file in MDLMol format and returns B<MDLMolFileIO>. | |
| 871 | |
| 872 =back | |
| 873 | |
| 874 =head1 AUTHOR | |
| 875 | |
| 876 Manish Sud <msud@san.rr.com> | |
| 877 | |
| 878 =head1 SEE ALSO | |
| 879 | |
| 880 MoleculeFileIO.pm, SDFileIO.pm | |
| 881 | |
| 882 =head1 COPYRIGHT | |
| 883 | |
| 884 Copyright (C) 2015 Manish Sud. All rights reserved. | |
| 885 | |
| 886 This file is part of MayaChemTools. | |
| 887 | |
| 888 MayaChemTools is free software; you can redistribute it and/or modify it under | |
| 889 the terms of the GNU Lesser General Public License as published by the Free | |
| 890 Software Foundation; either version 3 of the License, or (at your option) | |
| 891 any later version. | |
| 892 | |
| 893 =cut |
