Mercurial > repos > deepakjadmin > mayatool3_test3
comparison mayachemtools/lib/FileIO/MDLMolFileIO.pm @ 0:73ae111cf86f draft
Uploaded
author | deepakjadmin |
---|---|
date | Wed, 20 Jan 2016 11:55:01 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:73ae111cf86f |
---|---|
1 package FileIO::MDLMolFileIO; | |
2 # | |
3 # $RCSfile: MDLMolFileIO.pm,v $ | |
4 # $Date: 2015/02/28 20:48:43 $ | |
5 # $Revision: 1.32 $ | |
6 # | |
7 # Author: Manish Sud <msud@san.rr.com> | |
8 # | |
9 # Copyright (C) 2015 Manish Sud. All rights reserved. | |
10 # | |
11 # This file is part of MayaChemTools. | |
12 # | |
13 # MayaChemTools is free software; you can redistribute it and/or modify it under | |
14 # the terms of the GNU Lesser General Public License as published by the Free | |
15 # Software Foundation; either version 3 of the License, or (at your option) any | |
16 # later version. | |
17 # | |
18 # MayaChemTools is distributed in the hope that it will be useful, but without | |
19 # any warranty; without even the implied warranty of merchantability of fitness | |
20 # for a particular purpose. See the GNU Lesser General Public License for more | |
21 # details. | |
22 # | |
23 # You should have received a copy of the GNU Lesser General Public License | |
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or | |
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, | |
26 # Boston, MA, 02111-1307, USA. | |
27 # | |
28 | |
29 use strict; | |
30 use Carp; | |
31 use Exporter; | |
32 use Scalar::Util (); | |
33 use TextUtil (); | |
34 use FileUtil (); | |
35 use SDFileUtil (); | |
36 use FileIO::FileIO; | |
37 use Molecule; | |
38 | |
39 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); | |
40 | |
41 @ISA = qw(FileIO::FileIO Exporter); | |
42 @EXPORT = qw(); | |
43 @EXPORT_OK = qw(IsMDLMolFile); | |
44 | |
45 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); | |
46 | |
47 # Setup class variables... | |
48 my($ClassName); | |
49 _InitializeClass(); | |
50 | |
51 # Class constructor... | |
52 sub new { | |
53 my($Class, %NamesAndValues) = @_; | |
54 | |
55 # Initialize object... | |
56 my $This = $Class->SUPER::new(); | |
57 bless $This, ref($Class) || $Class; | |
58 $This->_InitializeMDLMolFileIO(); | |
59 | |
60 $This->_InitializeMDLMolFileIOProperties(%NamesAndValues); | |
61 | |
62 return $This; | |
63 } | |
64 | |
65 # Initialize any local object data... | |
66 # | |
67 sub _InitializeMDLMolFileIO { | |
68 my($This) = @_; | |
69 | |
70 # Nothing to do: Base class FileIO handles default class variables... | |
71 | |
72 return $This; | |
73 } | |
74 | |
75 # Initialize class ... | |
76 sub _InitializeClass { | |
77 #Class name... | |
78 $ClassName = __PACKAGE__; | |
79 | |
80 } | |
81 | |
82 # Initialize object values... | |
83 sub _InitializeMDLMolFileIOProperties { | |
84 my($This, %NamesAndValues) = @_; | |
85 | |
86 # All other property names and values along with all Set/Get<PropertyName> methods | |
87 # are implemented on-demand using ObjectProperty class. | |
88 | |
89 my($Name, $Value, $MethodName); | |
90 while (($Name, $Value) = each %NamesAndValues) { | |
91 $MethodName = "Set${Name}"; | |
92 $This->$MethodName($Value); | |
93 } | |
94 | |
95 if (!exists $NamesAndValues{Name}) { | |
96 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying file name..."; | |
97 } | |
98 | |
99 # Make sure it's a MDLMol file... | |
100 $Name = $NamesAndValues{Name}; | |
101 if (!$This->IsMDLMolFile($Name)) { | |
102 croak "Error: ${ClassName}->New: Object can't be instantiated: File, $Name, doesn't appear to be MDLMol format..."; | |
103 } | |
104 | |
105 return $This; | |
106 } | |
107 | |
108 # Is it a MDLMol file? | |
109 sub IsMDLMolFile ($;$) { | |
110 my($FirstParameter, $SecondParameter) = @_; | |
111 my($This, $FileName, $Status); | |
112 | |
113 if ((@_ == 2) && (_IsMDLMolFileIO($FirstParameter))) { | |
114 ($This, $FileName) = ($FirstParameter, $SecondParameter); | |
115 } | |
116 else { | |
117 $FileName = $FirstParameter; | |
118 } | |
119 | |
120 # Check file extension... | |
121 $Status = FileUtil::CheckFileType($FileName, "mol"); | |
122 | |
123 return $Status; | |
124 } | |
125 | |
126 # Read molecule from file and return molecule object... | |
127 sub ReadMolecule { | |
128 my($This) = @_; | |
129 my($FileHandle); | |
130 | |
131 $FileHandle = $This->GetFileHandle(); | |
132 return $This->ParseMoleculeString(SDFileUtil::ReadCmpdString($FileHandle)); | |
133 } | |
134 | |
135 # Write compound data using Molecule object... | |
136 sub WriteMolecule { | |
137 my($This, $Molecule) = @_; | |
138 | |
139 if (!(defined($Molecule) && $Molecule->IsMolecule())) { | |
140 carp "Warning: ${ClassName}->WriteMolecule: No data written: Molecule object is not specified..."; | |
141 return $This; | |
142 } | |
143 my($FileHandle); | |
144 $FileHandle = $This->GetFileHandle(); | |
145 | |
146 print $FileHandle $This->GenerateMoleculeString($Molecule) . "\n"; | |
147 | |
148 return $This; | |
149 } | |
150 | |
151 # Retrieve molecule string... | |
152 sub ReadMoleculeString { | |
153 my($This) = @_; | |
154 my($FileHandle); | |
155 | |
156 $FileHandle = $This->GetFileHandle(); | |
157 return SDFileUtil::ReadCmpdString($FileHandle); | |
158 } | |
159 | |
160 # Parse molecule string and return molecule object. ParseMoleculeString supports two invocation methods: class | |
161 # method or a package function. | |
162 # | |
163 sub ParseMoleculeString { | |
164 my($FirstParameter, $SecondParameter) = @_; | |
165 my($This, $MoleculeString); | |
166 | |
167 if ((@_ == 2) && (_IsMDLMolFileIO($FirstParameter))) { | |
168 ($This, $MoleculeString) = ($FirstParameter, $SecondParameter); | |
169 } | |
170 else { | |
171 $MoleculeString = $FirstParameter; | |
172 $This = undef; | |
173 } | |
174 if (!$MoleculeString) { | |
175 return undef; | |
176 } | |
177 my($LineIndex, @MoleculeLines); | |
178 @MoleculeLines = split /\n/, $MoleculeString; | |
179 | |
180 # Create molecule object and set molecule level native and MDL properties... | |
181 # | |
182 my($Molecule); | |
183 $Molecule = new Molecule(); | |
184 | |
185 # Set valence model for calculating implicit hydrogens... | |
186 $Molecule->SetValenceModel('MDLValenceModel'); | |
187 | |
188 # Process headers data... | |
189 $LineIndex = 0; | |
190 my($MoleculeName) = SDFileUtil::ParseCmpdMolNameLine($MoleculeLines[$LineIndex]); | |
191 $MoleculeName = TextUtil::RemoveTrailingWhiteSpaces($MoleculeName); | |
192 $Molecule->SetName($MoleculeName); | |
193 | |
194 $LineIndex++; | |
195 my($UserInitial, $ProgramName, $Date, $Code, $ScalingFactor1, $ScalingFactor2, $Energy, $RegistryNum) = SDFileUtil::ParseCmpdMiscInfoLine($MoleculeLines[$LineIndex]); | |
196 $Molecule->SetProperties('MDLUserInitial' => $UserInitial, 'MDLProgramName' => $ProgramName, 'MDLDate' => $Date, 'MDLCode' => $Code, 'MDLScalingFactor1' => $ScalingFactor1, 'MDLScalingFactor2' => $ScalingFactor2, 'MDLEnergy' => $Energy, 'MDLRegistryNum' => $RegistryNum); | |
197 | |
198 $LineIndex++; | |
199 my($Comments) = SDFileUtil::ParseCmpdCommentsLine($MoleculeLines[$LineIndex]); | |
200 $Molecule->SetProperties('MDLComments' => $Comments); | |
201 | |
202 $LineIndex++; | |
203 my($AtomCount, $BondCount, $ChiralFlag, $PropertyCount, $Version) = SDFileUtil::ParseCmpdCountsLine($MoleculeLines[$LineIndex]); | |
204 | |
205 $Molecule->SetProperties('MDLChiralFlag' => $ChiralFlag, 'MDLPropertyCount' => $PropertyCount, 'MDLVersion' => $Version); | |
206 | |
207 # Process atom data... | |
208 my($FirstAtomLineIndex, $LastAtomLineIndex, $AtomNum, $AtomX, $AtomY, $AtomZ, $AtomSymbol, $MassDifference, $Charge, $StereoParity, $Atom, %AtomNumToAtomMap); | |
209 | |
210 $AtomNum = 0; | |
211 %AtomNumToAtomMap = (); | |
212 $FirstAtomLineIndex = 4; $LastAtomLineIndex = $FirstAtomLineIndex + $AtomCount - 1; | |
213 | |
214 for ($LineIndex = $FirstAtomLineIndex; $LineIndex <= $LastAtomLineIndex; $LineIndex++) { | |
215 $AtomNum++; | |
216 ($AtomSymbol, $AtomX, $AtomY, $AtomZ, $MassDifference, $Charge, $StereoParity) = SDFileUtil::ParseCmpdAtomLine($MoleculeLines[$LineIndex]); | |
217 | |
218 $Atom = new Atom('AtomSymbol' => $AtomSymbol, 'XYZ' => [$AtomX, $AtomY, $AtomZ]); | |
219 | |
220 if ($MassDifference && $MassDifference != 0) { | |
221 _ProcessMassDifference($Atom, $MassDifference); | |
222 } | |
223 if ($Charge && $Charge != 0) { | |
224 _ProcessCharge($Atom, $Charge); | |
225 } | |
226 if ($StereoParity && $StereoParity != 0) { | |
227 _ProcessStereoParity($Atom, $StereoParity); | |
228 } | |
229 | |
230 $AtomNumToAtomMap{$AtomNum} = $Atom; | |
231 $Molecule->AddAtom($Atom); | |
232 } | |
233 | |
234 # Process bond data... | |
235 my($FirstBondLineIndex, $LastBondLineIndex, $FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo, $InternalBondOrder, $InternalBondType, $Bond, $Atom1, $Atom2); | |
236 | |
237 $FirstBondLineIndex = $FirstAtomLineIndex + $AtomCount; | |
238 $LastBondLineIndex = $FirstAtomLineIndex + $AtomCount + $BondCount - 1; | |
239 | |
240 for ($LineIndex = $FirstBondLineIndex; $LineIndex <= $LastBondLineIndex; $LineIndex++) { | |
241 ($FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo) = SDFileUtil::ParseCmpdBondLine($MoleculeLines[$LineIndex]); | |
242 | |
243 $Atom1 = $AtomNumToAtomMap{$FirstAtomNum}; | |
244 $Atom2 = $AtomNumToAtomMap{$SecondAtomNum}; | |
245 | |
246 ($InternalBondOrder, $InternalBondType) = SDFileUtil::MDLBondTypeToInternalBondOrder($BondType); | |
247 $Bond = new Bond('Atoms' => [$Atom1, $Atom2], 'BondOrder' => $InternalBondOrder); | |
248 $Bond->SetBondType($InternalBondType); | |
249 | |
250 if ($BondStereo && $BondStereo != 0) { | |
251 _ProcessBondStereo($Bond, $BondStereo); | |
252 } | |
253 | |
254 $Molecule->AddBond($Bond); | |
255 } | |
256 | |
257 # Process available property block lines starting with A aaa, M CHG, M ISO and M RAD. All other property blocks | |
258 # lines are for query or specific display purposes and are ignored for now. | |
259 # | |
260 # | |
261 my($PropertyLineIndex, $PropertyLine, $FirstChargeOrRadicalLine, @ValuePairs); | |
262 | |
263 $PropertyLineIndex = $FirstAtomLineIndex + $AtomCount + $BondCount; | |
264 $PropertyLine = $MoleculeLines[$PropertyLineIndex]; | |
265 $FirstChargeOrRadicalLine = 1; | |
266 | |
267 PROPERTYLINE: while ($PropertyLine !~ /^M END/i ) { | |
268 if ($PropertyLine =~ /\$\$\$\$/) { | |
269 last PROPERTYLINE; | |
270 } | |
271 if ($PropertyLine =~ /^(M CHG|M RAD)/i) { | |
272 if ($FirstChargeOrRadicalLine) { | |
273 $FirstChargeOrRadicalLine = 0; | |
274 _ZeroOutAtomsChargeAndRadicalValues(\%AtomNumToAtomMap); | |
275 } | |
276 if ($PropertyLine =~ /^M CHG/i) { | |
277 @ValuePairs = SDFileUtil::ParseCmpdChargePropertyLine($PropertyLine); | |
278 _ProcessChargeProperty(\@ValuePairs, \%AtomNumToAtomMap); | |
279 } | |
280 elsif ($PropertyLine =~ /^M RAD/i) { | |
281 @ValuePairs = SDFileUtil::ParseCmpdRadicalPropertyLine($PropertyLine); | |
282 _ProcessRadicalProperty(\@ValuePairs, \%AtomNumToAtomMap); | |
283 } | |
284 } | |
285 elsif ($PropertyLine =~ /^M ISO/i) { | |
286 @ValuePairs = SDFileUtil::ParseCmpdIsotopePropertyLine($PropertyLine); | |
287 _ProcessIsotopeProperty(\@ValuePairs, \%AtomNumToAtomMap); | |
288 } | |
289 elsif ($PropertyLine =~ /^A /i) { | |
290 my($NextPropertyLine); | |
291 $PropertyLineIndex++; | |
292 $NextPropertyLine = $MoleculeLines[$PropertyLineIndex]; | |
293 @ValuePairs = SDFileUtil::ParseCmpdAtomAliasPropertyLine($PropertyLine, $NextPropertyLine); | |
294 _ProcessAtomAliasProperty(\@ValuePairs, \%AtomNumToAtomMap); | |
295 } | |
296 $PropertyLineIndex++; | |
297 $PropertyLine = $MoleculeLines[$PropertyLineIndex]; | |
298 } | |
299 # Store input molecule string as generic property of molecule... | |
300 $Molecule->SetInputMoleculeString($MoleculeString); | |
301 | |
302 return $Molecule; | |
303 } | |
304 | |
305 # Generate molecule string using molecule object... | |
306 sub GenerateMoleculeString { | |
307 my($FirstParameter, $SecondParameter) = @_; | |
308 my($This, $Molecule); | |
309 | |
310 if ((@_ == 2) && (_IsMDLMolFileIO($FirstParameter))) { | |
311 ($This, $Molecule) = ($FirstParameter, $SecondParameter); | |
312 } | |
313 else { | |
314 $Molecule = $FirstParameter; | |
315 $This = undef; | |
316 } | |
317 if (!defined($Molecule)) { | |
318 return undef; | |
319 } | |
320 my(@MoleculeLines); | |
321 @MoleculeLines = (); | |
322 | |
323 # First line: Molname line... | |
324 push @MoleculeLines, SDFileUtil::GenerateCmpdMolNameLine($Molecule->GetName()); | |
325 | |
326 # Second line: Misc info... | |
327 my($ProgramName, $UserInitial, $Code); | |
328 $ProgramName = ''; $UserInitial = ''; $Code = ''; | |
329 | |
330 $Code = $Molecule->IsThreeDimensional() ? '3D' : '2D'; | |
331 | |
332 push @MoleculeLines, SDFileUtil::GenerateCmpdMiscInfoLine($ProgramName, $UserInitial, $Code); | |
333 | |
334 # Third line: Comments line... | |
335 my($Comments); | |
336 $Comments = $Molecule->HasProperty('MDLComments') ? $Molecule->GetMDLComments() : ($Molecule->HasProperty('Comments') ? $Molecule->GetComments() : ''); | |
337 push @MoleculeLines, SDFileUtil::GenerateCmpdCommentsLine($Comments); | |
338 | |
339 # Fourth line: Counts line for V2000 | |
340 my($AtomCount, $BondCount, $ChiralFlag); | |
341 $AtomCount = $Molecule->GetNumOfAtoms(); | |
342 $BondCount = $Molecule->GetNumOfBonds(); | |
343 $ChiralFlag = 0; | |
344 push @MoleculeLines, SDFileUtil::GenerateCmpdCountsLine($AtomCount, $BondCount, $ChiralFlag); | |
345 | |
346 # Atom lines... | |
347 my($Atom, $AtomSymbol, $AtomX, $AtomY, $AtomZ, $MassDifference, $Charge, $StereoParity, $AtomNum, $AtomID, @Atoms, %AtomIDToNum); | |
348 my($ChargePropertyValue, $IsotopePropertyValue, $RadicalPropertyValue, $AtomAliasPropertyValue, @IsotopePropertyValuePairs, @ChargePropertyValuePairs, @RadicalPropertyValuePairs, @AtomAliasPropertyValuePairs); | |
349 | |
350 @ChargePropertyValuePairs = (); | |
351 @IsotopePropertyValuePairs = (); | |
352 @RadicalPropertyValuePairs = (); | |
353 @AtomAliasPropertyValuePairs = (); | |
354 | |
355 @Atoms = $Molecule->GetAtoms(); | |
356 | |
357 $AtomNum = 0; | |
358 for $Atom (@Atoms) { | |
359 $AtomNum++; | |
360 $AtomID = $Atom->GetID(); | |
361 $AtomIDToNum{$AtomID} = $AtomNum; | |
362 | |
363 $AtomSymbol = $Atom->GetAtomSymbol(); | |
364 ($AtomX, $AtomY, $AtomZ) = $Atom->GetXYZ(); | |
365 | |
366 # Setup mass difference... | |
367 $MassDifference = _GetMassDifference($Atom); | |
368 if ($MassDifference) { | |
369 # Hold it for M ISO property lines... | |
370 $IsotopePropertyValue = _GetIsotopePropertyValue($Atom); | |
371 if ($IsotopePropertyValue) { | |
372 push @IsotopePropertyValuePairs, ($AtomNum, $IsotopePropertyValue); | |
373 } | |
374 } | |
375 | |
376 # Setup charge... | |
377 $Charge = _GetCharge($Atom); | |
378 if ($Charge) { | |
379 # Hold it for M CHG property lines... | |
380 $ChargePropertyValue = _GetChargePropertyValue($Atom); | |
381 if ($ChargePropertyValue) { | |
382 push @ChargePropertyValuePairs, ($AtomNum, $ChargePropertyValue); | |
383 } | |
384 } | |
385 | |
386 # Hold any radical values for for M RAD property lines... | |
387 $RadicalPropertyValue = _GetRadicalPropertyValue($Atom); | |
388 if ($RadicalPropertyValue) { | |
389 push @RadicalPropertyValuePairs, ($AtomNum, $RadicalPropertyValue); | |
390 } | |
391 | |
392 # Hold any atom alias value for A xxx property lines.... | |
393 $AtomAliasPropertyValue = _GetAtomAliasPropertyValue($Atom); | |
394 if ($AtomAliasPropertyValue) { | |
395 push @AtomAliasPropertyValuePairs, ($AtomNum, $AtomAliasPropertyValue); | |
396 | |
397 # Set AtomSymbol to carbon as atom alias would override its value during parsing... | |
398 $AtomSymbol = "C"; | |
399 } | |
400 | |
401 # Setup stereo parity... | |
402 $StereoParity = _GetStereoParity($Atom); | |
403 | |
404 push @MoleculeLines, SDFileUtil::GenerateCmpdAtomLine($AtomSymbol, $AtomX, $AtomY, $AtomZ, $MassDifference, $Charge, $StereoParity); | |
405 } | |
406 | |
407 # Bond lines... | |
408 my($FirstAtomID, $FirstAtom, $FirstAtomNum, $SecondAtomID, $SecondAtom, $SecondAtomNum, $MDLBondType, $BondOrder, $BondType, $MDLBondStereo, $Bond, @Bonds); | |
409 for $FirstAtom (@Atoms) { | |
410 $FirstAtomID = $FirstAtom->GetID(); | |
411 $FirstAtomNum = $AtomIDToNum{$FirstAtomID}; | |
412 | |
413 @Bonds = (); | |
414 @Bonds = $FirstAtom->GetBonds(); | |
415 BOND: for $Bond (@Bonds) { | |
416 $SecondAtom = $Bond->GetBondedAtom($FirstAtom); | |
417 $SecondAtomID = $SecondAtom->GetID(); | |
418 $SecondAtomNum = $AtomIDToNum{$SecondAtomID}; | |
419 if ($FirstAtomNum >= $SecondAtomNum) { | |
420 next BOND; | |
421 } | |
422 # Setup BondType... | |
423 $BondOrder = $Bond->GetBondOrder(); | |
424 $BondType = $Bond->GetBondType(); | |
425 $MDLBondType = SDFileUtil::InternalBondOrderToMDLBondType($BondOrder, $BondType); | |
426 | |
427 # Setup BondStereo... | |
428 $MDLBondStereo = _GetBondStereo($Bond); | |
429 | |
430 push @MoleculeLines, SDFileUtil::GenerateCmpdBondLine($FirstAtomNum, $SecondAtomNum, $MDLBondType, $MDLBondStereo); | |
431 } | |
432 } | |
433 # Property lines... | |
434 if (@IsotopePropertyValuePairs) { | |
435 push @MoleculeLines, SDFileUtil::GenerateCmpdIsotopePropertyLines(\@IsotopePropertyValuePairs); | |
436 } | |
437 if (@ChargePropertyValuePairs) { | |
438 push @MoleculeLines, SDFileUtil::GenerateCmpdChargePropertyLines(\@ChargePropertyValuePairs); | |
439 } | |
440 if (@RadicalPropertyValuePairs) { | |
441 push @MoleculeLines, SDFileUtil::GenerateCmpdRadicalPropertyLines(\@RadicalPropertyValuePairs); | |
442 } | |
443 if (@AtomAliasPropertyValuePairs) { | |
444 push @MoleculeLines, SDFileUtil::GenerateCmpdAtomAliasPropertyLines(\@AtomAliasPropertyValuePairs); | |
445 } | |
446 | |
447 push @MoleculeLines, "M END"; | |
448 | |
449 return join "\n", @MoleculeLines; | |
450 } | |
451 | |
452 # Process MassDifference value and set atom's mass number... | |
453 # | |
454 sub _ProcessMassDifference { | |
455 my($Atom, $MassDifference) = @_; | |
456 my($MassNumber, $NewMassNumber, $AtomicNumber); | |
457 | |
458 $AtomicNumber = $Atom->GetAtomicNumber(); | |
459 | |
460 if (!$AtomicNumber) { | |
461 carp "Warning: ${ClassName}->_ProcessMassDifference: Ignoring specified mass difference value, $MassDifference, in SD file: Assigned to non standard element..."; | |
462 return; | |
463 } | |
464 $MassNumber = $Atom->GetMassNumber(); | |
465 if (!$MassDifference) { | |
466 carp "Warning: ${ClassName}->_ProcessMassDifference: Ignoring specified mass difference value, $MassDifference, in SD file: Unknown MassNumber value..."; | |
467 return; | |
468 } | |
469 $NewMassNumber = $MassNumber + $MassDifference; | |
470 if (!PeriodicTable::IsElementNaturalIsotopeMassNumber($AtomicNumber, $NewMassNumber)) { | |
471 my($AtomSymbol) = $Atom->GetAtomSymbol(); | |
472 carp "Warning: ${ClassName}->_ProcessMassDifference: Unknown mass number, $MassNumber, corresponding to specified mass difference value, $MassDifference, in SD for atom with atomic number, $AtomicNumber, and atomic symbol, $AtomSymbol. The mass number value has been assigned. Don't forget to Set ExactMass property explicitly; otherwise, GetExactMass method would return mass of most abundant isotope...\n"; | |
473 } | |
474 | |
475 # Use SetProperty method instead of SetMassNumber to skip explicit checks on MassNumber value... | |
476 $Atom->SetProperty('MassNumber', $NewMassNumber); | |
477 } | |
478 | |
479 # Get mass difference value... | |
480 sub _GetMassDifference { | |
481 my($Atom) = @_; | |
482 my($MassDifference, $MassNumber, $MostAbundantMassNumber, $AtomicNumber); | |
483 | |
484 $MassDifference = 0; | |
485 $MassNumber = $Atom->GetMassNumber(); | |
486 if (defined $MassNumber) { | |
487 $AtomicNumber = $Atom->GetAtomicNumber(); | |
488 if (defined $AtomicNumber) { | |
489 $MostAbundantMassNumber = PeriodicTable::GetElementMostAbundantNaturalIsotopeMassNumber($AtomicNumber); | |
490 if (defined($MostAbundantMassNumber) && $MassNumber != $MostAbundantMassNumber) { | |
491 $MassDifference = $MassNumber - $MostAbundantMassNumber; | |
492 } | |
493 } | |
494 } | |
495 return $MassDifference; | |
496 } | |
497 | |
498 # Process formal charge value and assign it to atom as formal charge... | |
499 sub _ProcessCharge { | |
500 my($Atom, $Charge) = @_; | |
501 my($InternalCharge); | |
502 | |
503 $InternalCharge = SDFileUtil::MDLChargeToInternalCharge($Charge); | |
504 $Atom->SetFormalCharge($InternalCharge); | |
505 } | |
506 | |
507 # Get MDL formal charge value ... | |
508 sub _GetCharge { | |
509 my($Atom) = @_; | |
510 my($InternalCharge, $Charge); | |
511 | |
512 $Charge = 0; | |
513 if ($Atom->HasProperty('FormalCharge')) { | |
514 $InternalCharge = $Atom->GetFormalCharge(); | |
515 if ($InternalCharge) { | |
516 $Charge = SDFileUtil::InternalChargeToMDLCharge($InternalCharge); | |
517 } | |
518 } | |
519 return $Charge; | |
520 } | |
521 | |
522 # Process stereo parity value and assign it to atom as MDL property... | |
523 # | |
524 # Notes: | |
525 # . Mark atom as chiral center | |
526 # . Assign any explicit Clockwise (parity 1), CounterClockwise (parity 2) or either value (parity 3) as property of atom. | |
527 # . MDL values of Clockwise and CounterClockwise don't correspond to priority assigned to ligands around | |
528 # stereo center using CIP scheme; consequently, these values can't be used to set internal Stereochemistry for | |
529 # an atom. | |
530 # | |
531 sub _ProcessStereoParity { | |
532 my($Atom, $StereoParity) = @_; | |
533 | |
534 $Atom->SetStereoCenter('1'); | |
535 $Atom->SetMDLStereoParity($StereoParity); | |
536 } | |
537 | |
538 # Set stereo parity value to zero for now: The current release of MayaChemTools hasn't implemented | |
539 # functionality to determine chirality. | |
540 # | |
541 sub _GetStereoParity { | |
542 my($Atom) = @_; | |
543 my($StereoParity); | |
544 | |
545 $StereoParity = 0; | |
546 | |
547 return $StereoParity; | |
548 } | |
549 | |
550 # Process bond stereo value... | |
551 sub _ProcessBondStereo { | |
552 my($Bond, $BondStereo) = @_; | |
553 my($InternalBondStereo); | |
554 | |
555 $InternalBondStereo = SDFileUtil::MDLBondStereoToInternalBondStereochemistry($BondStereo); | |
556 if ($InternalBondStereo) { | |
557 $Bond->SetBondStereochemistry($InternalBondStereo); | |
558 } | |
559 } | |
560 | |
561 # Get MDLBondStereo value... | |
562 sub _GetBondStereo { | |
563 my($Bond) = @_; | |
564 my($InternalBondStereo, $BondStereo); | |
565 | |
566 $BondStereo = 0; | |
567 | |
568 $InternalBondStereo = ''; | |
569 BONDSTEREO: { | |
570 if ($Bond->IsUp()) { | |
571 $InternalBondStereo = 'Up'; | |
572 last BONDSTEREO; | |
573 } | |
574 if ($Bond->IsDown()) { | |
575 $InternalBondStereo = 'Down'; | |
576 last BONDSTEREO; | |
577 } | |
578 if ($Bond->IsUpOrDown()) { | |
579 $InternalBondStereo = 'UpOrDown'; | |
580 last BONDSTEREO; | |
581 } | |
582 if ($Bond->IsCisOrTrans() || $Bond->IsCis() || $Bond->IsTrans()) { | |
583 $InternalBondStereo = 'CisOrTrans'; | |
584 last BONDSTEREO; | |
585 } | |
586 $InternalBondStereo = ''; | |
587 } | |
588 | |
589 if ($InternalBondStereo) { | |
590 $BondStereo = SDFileUtil::InternalBondStereochemistryToMDLBondStereo($InternalBondStereo); | |
591 } | |
592 | |
593 return $BondStereo; | |
594 } | |
595 | |
596 # Zero out charge and radical values specified for atoms... | |
597 sub _ZeroOutAtomsChargeAndRadicalValues { | |
598 my($AtomNumToAtomMapRef) = @_; | |
599 my($Atom); | |
600 | |
601 for $Atom (values %{$AtomNumToAtomMapRef}) { | |
602 if ($Atom->HasProperty('FormalCharge')) { | |
603 $Atom->DeleteProperty('FormalCharge'); | |
604 } | |
605 elsif ($Atom->HasProperty('SpinMultiplicity')) { | |
606 $Atom->DeleteProperty('SpinMultiplicity'); | |
607 } | |
608 } | |
609 } | |
610 | |
611 # Process charge property value pairs... | |
612 sub _ProcessChargeProperty { | |
613 my($ValuePairsRef, $AtomNumToAtomMapRef) = @_; | |
614 | |
615 if (!(defined($ValuePairsRef) && @{$ValuePairsRef})) { | |
616 return; | |
617 } | |
618 my($Index, $ValuePairsCount, $AtomNum, $Charge, $Atom); | |
619 | |
620 $ValuePairsCount = scalar @{$ValuePairsRef}; | |
621 VALUEPAIRS: for ($Index = 0; $Index < $ValuePairsCount; $Index +=2) { | |
622 $AtomNum = $ValuePairsRef->[$Index]; $Charge = $ValuePairsRef->[$Index + 1]; | |
623 if (!$Charge) { | |
624 next VALUEPAIRS; | |
625 } | |
626 if (!exists $AtomNumToAtomMapRef->{$AtomNum}) { | |
627 next VALUEPAIRS; | |
628 } | |
629 $Atom = $AtomNumToAtomMapRef->{$AtomNum}; | |
630 if ($Atom->HasProperty('SpinMultiplicity')) { | |
631 carp "Warning: ${ClassName}->_ProcessChargeProperty: Setting formal charge on atom number, $AtomNum, with already assigned spin multiplicity value..."; | |
632 } | |
633 $Atom->SetFormalCharge($Charge); | |
634 } | |
635 } | |
636 | |
637 # Get charge property value for an atom... | |
638 sub _GetChargePropertyValue { | |
639 my($Atom) = @_; | |
640 my($Charge); | |
641 | |
642 $Charge = 0; | |
643 if ($Atom->HasProperty('FormalCharge')) { | |
644 $Charge = $Atom->GetFormalCharge(); | |
645 } | |
646 return $Charge; | |
647 } | |
648 | |
649 # Process charge property value pairs... | |
650 sub _ProcessRadicalProperty { | |
651 my($ValuePairsRef, $AtomNumToAtomMapRef) = @_; | |
652 | |
653 if (!(defined($ValuePairsRef) && @{$ValuePairsRef})) { | |
654 return; | |
655 } | |
656 my($Index, $ValuePairsCount, $AtomNum, $Radical, $SpinMultiplicity, $Atom); | |
657 | |
658 $ValuePairsCount = scalar @{$ValuePairsRef}; | |
659 VALUEPAIRS: for ($Index = 0; $Index < $ValuePairsCount; $Index +=2) { | |
660 $AtomNum = $ValuePairsRef->[$Index]; $Radical = $ValuePairsRef->[$Index + 1]; | |
661 if (!$Radical) { | |
662 next VALUEPAIRS; | |
663 } | |
664 if (!exists $AtomNumToAtomMapRef->{$AtomNum}) { | |
665 next VALUEPAIRS; | |
666 } | |
667 $Atom = $AtomNumToAtomMapRef->{$AtomNum}; | |
668 if ($Atom->HasProperty('FormalCharge')) { | |
669 carp "Warning: ${ClassName}->_ProcessRadicalProperty: Setting spin multiplicity on atom number, $AtomNum, with already assigned formal charge value..."; | |
670 } | |
671 $SpinMultiplicity = SDFileUtil::MDLRadicalToInternalSpinMultiplicity($Radical); | |
672 $Atom->SetSpinMultiplicity($SpinMultiplicity); | |
673 } | |
674 } | |
675 | |
676 # Get radical property value for an atom... | |
677 sub _GetRadicalPropertyValue { | |
678 my($Atom) = @_; | |
679 my($Radical, $SpinMultiplicity); | |
680 | |
681 $Radical = 0; | |
682 if ($Atom->HasProperty('SpinMultiplicity')) { | |
683 $SpinMultiplicity = $Atom->GetSpinMultiplicity(); | |
684 $Radical = SDFileUtil::InternalSpinMultiplicityToMDLRadical($SpinMultiplicity); | |
685 } | |
686 return $Radical; | |
687 } | |
688 | |
689 # Process isotope property value pairs... | |
690 sub _ProcessIsotopeProperty { | |
691 my($ValuePairsRef, $AtomNumToAtomMapRef) = @_; | |
692 | |
693 if (!(defined($ValuePairsRef) && @{$ValuePairsRef})) { | |
694 return; | |
695 } | |
696 my($Index, $ValuePairsCount, $AtomNum, $MassNumber, $Atom, $AtomicNumber); | |
697 | |
698 $ValuePairsCount = scalar @{$ValuePairsRef}; | |
699 VALUEPAIRS: for ($Index = 0; $Index < $ValuePairsCount; $Index +=2) { | |
700 $AtomNum = $ValuePairsRef->[$Index]; $MassNumber = $ValuePairsRef->[$Index + 1]; | |
701 if (!$MassNumber) { | |
702 next VALUEPAIRS; | |
703 } | |
704 if (!exists $AtomNumToAtomMapRef->{$AtomNum}) { | |
705 next VALUEPAIRS; | |
706 } | |
707 $Atom = $AtomNumToAtomMapRef->{$AtomNum}; | |
708 $AtomicNumber = $Atom->GetAtomicNumber(); | |
709 | |
710 if (!PeriodicTable::IsElementNaturalIsotopeMassNumber($AtomicNumber, $MassNumber)) { | |
711 my($AtomSymbol) = $Atom->GetAtomSymbol(); | |
712 carp "Warning: ${ClassName}->_ProcessProcessIsotopeProperty: Unknown mass number, $MassNumber, specified on M ISO property line for atom number, $AtomNum, in SD for atom with atomic number, $AtomicNumber, and atomic symbol, $AtomSymbol. The mass number value has been assigned. Don't forget to Set ExactMass property explicitly; otherwise, GetExactMass method would return mass of most abundant isotope...\n"; | |
713 } | |
714 | |
715 # Use SetProperty method instead of SetMassNumber to skip explicit checks on MassNumber value... | |
716 $Atom->SetProperty('MassNumber', $MassNumber); | |
717 } | |
718 } | |
719 | |
720 # Get isotope property value for an atom... | |
721 sub _GetIsotopePropertyValue { | |
722 my($Atom) = @_; | |
723 my($MassNumber); | |
724 | |
725 $MassNumber = 0; | |
726 if ($Atom->HasProperty('MassNumber')) { | |
727 $MassNumber = $Atom->GetMassNumber(); | |
728 } | |
729 return $MassNumber; | |
730 } | |
731 | |
732 # Process atom alias property value pairs... | |
733 sub _ProcessAtomAliasProperty { | |
734 my($ValuePairsRef, $AtomNumToAtomMapRef) = @_; | |
735 | |
736 if (!(defined($ValuePairsRef) && @{$ValuePairsRef})) { | |
737 return; | |
738 } | |
739 my($Index, $ValuePairsCount, $AtomNum, $AtomAlias, $Atom); | |
740 | |
741 $ValuePairsCount = scalar @{$ValuePairsRef}; | |
742 VALUEPAIRS: for ($Index = 0; $Index < $ValuePairsCount; $Index +=2) { | |
743 $AtomNum = $ValuePairsRef->[$Index]; $AtomAlias = $ValuePairsRef->[$Index + 1]; | |
744 if (!$AtomNum) { | |
745 next VALUEPAIRS; | |
746 } | |
747 if (!exists $AtomNumToAtomMapRef->{$AtomNum}) { | |
748 next VALUEPAIRS; | |
749 } | |
750 $AtomAlias = TextUtil::RemoveLeadingAndTrailingWhiteSpaces($AtomAlias); | |
751 if (TextUtil::IsEmpty($AtomAlias)) { | |
752 carp("Warning: ${ClassName}->_ProcessAtomAliasProperty: Ignoring atom alias property line: No Atom alias value specified..."); | |
753 next VALUEPAIRS; | |
754 } | |
755 | |
756 # Set atom symbol to atom alias which sets atomic number automatically... | |
757 $Atom = $AtomNumToAtomMapRef->{$AtomNum}; | |
758 $Atom->SetAtomSymbol($AtomAlias); | |
759 | |
760 $Atom->SetProperty('AtomAlias', $AtomAlias); | |
761 } | |
762 } | |
763 | |
764 # Get atom alias property value for an atom... | |
765 sub _GetAtomAliasPropertyValue { | |
766 my($Atom) = @_; | |
767 my($AtomAlias); | |
768 | |
769 $AtomAlias = undef; | |
770 if ($Atom->HasProperty('AtomAlias')) { | |
771 $AtomAlias = $Atom->GetAtomAlias(); | |
772 } | |
773 return $AtomAlias; | |
774 } | |
775 | |
776 # Is it a MDLMolFileIO object? | |
777 sub _IsMDLMolFileIO { | |
778 my($Object) = @_; | |
779 | |
780 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0; | |
781 } | |
782 | |
783 | |
784 1; | |
785 | |
786 __END__ | |
787 | |
788 =head1 NAME | |
789 | |
790 MDLMolFileIO | |
791 | |
792 =head1 SYNOPSIS | |
793 | |
794 use FileIO::MDLMolFileIO; | |
795 | |
796 use FileIO::MDLMolFileIO qw(:all); | |
797 | |
798 =head1 DESCRIPTION | |
799 | |
800 B<MDLMolFIleIO> class provides the following methods: | |
801 | |
802 new, GenerateMoleculeString, IsMDLMolFile, ParseMoleculeString, ReadMolecule, | |
803 ReadMoleculeString, WriteMolecule | |
804 | |
805 The following methods can also be used as functions: | |
806 | |
807 GenerateMoleculeString, IsMDLMolFile, ParseMoleculeString | |
808 | |
809 Data specific to B<MDLMolFileIO> class not directly used by B<Molecule>, B<Atom> and | |
810 B<Bond> objects - data label/value pairs, atom SteroParity and so on - is associated to | |
811 and retrieved from appropriate objects using following methods: | |
812 | |
813 SetMDL<PropertyName> | |
814 GetMDL<PropertyName>. | |
815 | |
816 B<MDLMolFileIO> class is derived from I<FileIO> class and uses its methods to support | |
817 generic file related functionality. | |
818 | |
819 =head2 METHODS | |
820 | |
821 =over 4 | |
822 | |
823 =item B<new> | |
824 | |
825 $NewMDLMolFileIO = new FileIO::MDLMolFileIO(%NamesAndValues); | |
826 | |
827 Using specified I<MDLMolFileIO> property names and values hash, B<new> method creates a new object | |
828 and returns a reference to newly created B<MDLMolFileIO> object. | |
829 | |
830 =item B<GenerateMoleculeString> | |
831 | |
832 $MoleculeString = $MDLMolFileIO->GenerateMoleculeString($Molecule); | |
833 $MoleculeString = FileIO::MDLMolFileIO::GenerateMoleculeString($Molecule); | |
834 | |
835 Returns a B<MoleculeString> in MDLMol format corresponding to I<Molecule>. | |
836 | |
837 =item B<IsMDLMolFile> | |
838 | |
839 $Status = $MDLMolFileIO->IsMDLMolFile($FileName); | |
840 $Status = FileIO::MDLMolFileIO::IsMDLMolFile($FileName); | |
841 | |
842 Returns 1 or 0 based on whether I<FileName> is a MDLMol file. | |
843 | |
844 =item B<ParseMoleculeString> | |
845 | |
846 $Molecule = $MDLMolFileIO->ParseMoleculeString($MoleculeString); | |
847 $Molecule = FileIO::MDLMolFileIO::ParseMoleculeString($MoleculeString); | |
848 | |
849 Parses I<MoleculeString> and returns a B<Molecule> object. | |
850 | |
851 =item B<ReadMolecule> | |
852 | |
853 $Molecule = $MDLMolFileIO->ReadMolecule($FileHandle); | |
854 | |
855 Reads data for the compound in a file using already opened I<FileHandle>, creates, | |
856 and returns a B<Molecule> object. | |
857 | |
858 =item B<ReadMoleculeString> | |
859 | |
860 $MoleculeString = $MDLMolFileIO->ReadMoleculeString($FileHandle); | |
861 | |
862 Reads data for the compound in a file using already opened I<FileHandle> and | |
863 returns a B<MoleculeString> corresponding to compound structure and other associated | |
864 data. | |
865 | |
866 =item B<WriteMolecule> | |
867 | |
868 $MDLMolFileIO->WriteMolecule($Molecule); | |
869 | |
870 Writes I<Molecule> data to a file in MDLMol format and returns B<MDLMolFileIO>. | |
871 | |
872 =back | |
873 | |
874 =head1 AUTHOR | |
875 | |
876 Manish Sud <msud@san.rr.com> | |
877 | |
878 =head1 SEE ALSO | |
879 | |
880 MoleculeFileIO.pm, SDFileIO.pm | |
881 | |
882 =head1 COPYRIGHT | |
883 | |
884 Copyright (C) 2015 Manish Sud. All rights reserved. | |
885 | |
886 This file is part of MayaChemTools. | |
887 | |
888 MayaChemTools is free software; you can redistribute it and/or modify it under | |
889 the terms of the GNU Lesser General Public License as published by the Free | |
890 Software Foundation; either version 3 of the License, or (at your option) | |
891 any later version. | |
892 | |
893 =cut |