0
|
1 package FileIO::MDLMolFileIO;
|
|
2 #
|
|
3 # $RCSfile: MDLMolFileIO.pm,v $
|
|
4 # $Date: 2015/02/28 20:48:43 $
|
|
5 # $Revision: 1.32 $
|
|
6 #
|
|
7 # Author: Manish Sud <msud@san.rr.com>
|
|
8 #
|
|
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
|
|
10 #
|
|
11 # This file is part of MayaChemTools.
|
|
12 #
|
|
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
|
|
14 # the terms of the GNU Lesser General Public License as published by the Free
|
|
15 # Software Foundation; either version 3 of the License, or (at your option) any
|
|
16 # later version.
|
|
17 #
|
|
18 # MayaChemTools is distributed in the hope that it will be useful, but without
|
|
19 # any warranty; without even the implied warranty of merchantability of fitness
|
|
20 # for a particular purpose. See the GNU Lesser General Public License for more
|
|
21 # details.
|
|
22 #
|
|
23 # You should have received a copy of the GNU Lesser General Public License
|
|
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
|
|
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
|
|
26 # Boston, MA, 02111-1307, USA.
|
|
27 #
|
|
28
|
|
29 use strict;
|
|
30 use Carp;
|
|
31 use Exporter;
|
|
32 use Scalar::Util ();
|
|
33 use TextUtil ();
|
|
34 use FileUtil ();
|
|
35 use SDFileUtil ();
|
|
36 use FileIO::FileIO;
|
|
37 use Molecule;
|
|
38
|
|
39 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
|
|
40
|
|
41 @ISA = qw(FileIO::FileIO Exporter);
|
|
42 @EXPORT = qw();
|
|
43 @EXPORT_OK = qw(IsMDLMolFile);
|
|
44
|
|
45 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]);
|
|
46
|
|
47 # Setup class variables...
|
|
48 my($ClassName);
|
|
49 _InitializeClass();
|
|
50
|
|
51 # Class constructor...
|
|
52 sub new {
|
|
53 my($Class, %NamesAndValues) = @_;
|
|
54
|
|
55 # Initialize object...
|
|
56 my $This = $Class->SUPER::new();
|
|
57 bless $This, ref($Class) || $Class;
|
|
58 $This->_InitializeMDLMolFileIO();
|
|
59
|
|
60 $This->_InitializeMDLMolFileIOProperties(%NamesAndValues);
|
|
61
|
|
62 return $This;
|
|
63 }
|
|
64
|
|
65 # Initialize any local object data...
|
|
66 #
|
|
67 sub _InitializeMDLMolFileIO {
|
|
68 my($This) = @_;
|
|
69
|
|
70 # Nothing to do: Base class FileIO handles default class variables...
|
|
71
|
|
72 return $This;
|
|
73 }
|
|
74
|
|
75 # Initialize class ...
|
|
76 sub _InitializeClass {
|
|
77 #Class name...
|
|
78 $ClassName = __PACKAGE__;
|
|
79
|
|
80 }
|
|
81
|
|
82 # Initialize object values...
|
|
83 sub _InitializeMDLMolFileIOProperties {
|
|
84 my($This, %NamesAndValues) = @_;
|
|
85
|
|
86 # All other property names and values along with all Set/Get<PropertyName> methods
|
|
87 # are implemented on-demand using ObjectProperty class.
|
|
88
|
|
89 my($Name, $Value, $MethodName);
|
|
90 while (($Name, $Value) = each %NamesAndValues) {
|
|
91 $MethodName = "Set${Name}";
|
|
92 $This->$MethodName($Value);
|
|
93 }
|
|
94
|
|
95 if (!exists $NamesAndValues{Name}) {
|
|
96 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying file name...";
|
|
97 }
|
|
98
|
|
99 # Make sure it's a MDLMol file...
|
|
100 $Name = $NamesAndValues{Name};
|
|
101 if (!$This->IsMDLMolFile($Name)) {
|
|
102 croak "Error: ${ClassName}->New: Object can't be instantiated: File, $Name, doesn't appear to be MDLMol format...";
|
|
103 }
|
|
104
|
|
105 return $This;
|
|
106 }
|
|
107
|
|
108 # Is it a MDLMol file?
|
|
109 sub IsMDLMolFile ($;$) {
|
|
110 my($FirstParameter, $SecondParameter) = @_;
|
|
111 my($This, $FileName, $Status);
|
|
112
|
|
113 if ((@_ == 2) && (_IsMDLMolFileIO($FirstParameter))) {
|
|
114 ($This, $FileName) = ($FirstParameter, $SecondParameter);
|
|
115 }
|
|
116 else {
|
|
117 $FileName = $FirstParameter;
|
|
118 }
|
|
119
|
|
120 # Check file extension...
|
|
121 $Status = FileUtil::CheckFileType($FileName, "mol");
|
|
122
|
|
123 return $Status;
|
|
124 }
|
|
125
|
|
126 # Read molecule from file and return molecule object...
|
|
127 sub ReadMolecule {
|
|
128 my($This) = @_;
|
|
129 my($FileHandle);
|
|
130
|
|
131 $FileHandle = $This->GetFileHandle();
|
|
132 return $This->ParseMoleculeString(SDFileUtil::ReadCmpdString($FileHandle));
|
|
133 }
|
|
134
|
|
135 # Write compound data using Molecule object...
|
|
136 sub WriteMolecule {
|
|
137 my($This, $Molecule) = @_;
|
|
138
|
|
139 if (!(defined($Molecule) && $Molecule->IsMolecule())) {
|
|
140 carp "Warning: ${ClassName}->WriteMolecule: No data written: Molecule object is not specified...";
|
|
141 return $This;
|
|
142 }
|
|
143 my($FileHandle);
|
|
144 $FileHandle = $This->GetFileHandle();
|
|
145
|
|
146 print $FileHandle $This->GenerateMoleculeString($Molecule) . "\n";
|
|
147
|
|
148 return $This;
|
|
149 }
|
|
150
|
|
151 # Retrieve molecule string...
|
|
152 sub ReadMoleculeString {
|
|
153 my($This) = @_;
|
|
154 my($FileHandle);
|
|
155
|
|
156 $FileHandle = $This->GetFileHandle();
|
|
157 return SDFileUtil::ReadCmpdString($FileHandle);
|
|
158 }
|
|
159
|
|
160 # Parse molecule string and return molecule object. ParseMoleculeString supports two invocation methods: class
|
|
161 # method or a package function.
|
|
162 #
|
|
163 sub ParseMoleculeString {
|
|
164 my($FirstParameter, $SecondParameter) = @_;
|
|
165 my($This, $MoleculeString);
|
|
166
|
|
167 if ((@_ == 2) && (_IsMDLMolFileIO($FirstParameter))) {
|
|
168 ($This, $MoleculeString) = ($FirstParameter, $SecondParameter);
|
|
169 }
|
|
170 else {
|
|
171 $MoleculeString = $FirstParameter;
|
|
172 $This = undef;
|
|
173 }
|
|
174 if (!$MoleculeString) {
|
|
175 return undef;
|
|
176 }
|
|
177 my($LineIndex, @MoleculeLines);
|
|
178 @MoleculeLines = split /\n/, $MoleculeString;
|
|
179
|
|
180 # Create molecule object and set molecule level native and MDL properties...
|
|
181 #
|
|
182 my($Molecule);
|
|
183 $Molecule = new Molecule();
|
|
184
|
|
185 # Set valence model for calculating implicit hydrogens...
|
|
186 $Molecule->SetValenceModel('MDLValenceModel');
|
|
187
|
|
188 # Process headers data...
|
|
189 $LineIndex = 0;
|
|
190 my($MoleculeName) = SDFileUtil::ParseCmpdMolNameLine($MoleculeLines[$LineIndex]);
|
|
191 $MoleculeName = TextUtil::RemoveTrailingWhiteSpaces($MoleculeName);
|
|
192 $Molecule->SetName($MoleculeName);
|
|
193
|
|
194 $LineIndex++;
|
|
195 my($UserInitial, $ProgramName, $Date, $Code, $ScalingFactor1, $ScalingFactor2, $Energy, $RegistryNum) = SDFileUtil::ParseCmpdMiscInfoLine($MoleculeLines[$LineIndex]);
|
|
196 $Molecule->SetProperties('MDLUserInitial' => $UserInitial, 'MDLProgramName' => $ProgramName, 'MDLDate' => $Date, 'MDLCode' => $Code, 'MDLScalingFactor1' => $ScalingFactor1, 'MDLScalingFactor2' => $ScalingFactor2, 'MDLEnergy' => $Energy, 'MDLRegistryNum' => $RegistryNum);
|
|
197
|
|
198 $LineIndex++;
|
|
199 my($Comments) = SDFileUtil::ParseCmpdCommentsLine($MoleculeLines[$LineIndex]);
|
|
200 $Molecule->SetProperties('MDLComments' => $Comments);
|
|
201
|
|
202 $LineIndex++;
|
|
203 my($AtomCount, $BondCount, $ChiralFlag, $PropertyCount, $Version) = SDFileUtil::ParseCmpdCountsLine($MoleculeLines[$LineIndex]);
|
|
204
|
|
205 $Molecule->SetProperties('MDLChiralFlag' => $ChiralFlag, 'MDLPropertyCount' => $PropertyCount, 'MDLVersion' => $Version);
|
|
206
|
|
207 # Process atom data...
|
|
208 my($FirstAtomLineIndex, $LastAtomLineIndex, $AtomNum, $AtomX, $AtomY, $AtomZ, $AtomSymbol, $MassDifference, $Charge, $StereoParity, $Atom, %AtomNumToAtomMap);
|
|
209
|
|
210 $AtomNum = 0;
|
|
211 %AtomNumToAtomMap = ();
|
|
212 $FirstAtomLineIndex = 4; $LastAtomLineIndex = $FirstAtomLineIndex + $AtomCount - 1;
|
|
213
|
|
214 for ($LineIndex = $FirstAtomLineIndex; $LineIndex <= $LastAtomLineIndex; $LineIndex++) {
|
|
215 $AtomNum++;
|
|
216 ($AtomSymbol, $AtomX, $AtomY, $AtomZ, $MassDifference, $Charge, $StereoParity) = SDFileUtil::ParseCmpdAtomLine($MoleculeLines[$LineIndex]);
|
|
217
|
|
218 $Atom = new Atom('AtomSymbol' => $AtomSymbol, 'XYZ' => [$AtomX, $AtomY, $AtomZ]);
|
|
219
|
|
220 if ($MassDifference && $MassDifference != 0) {
|
|
221 _ProcessMassDifference($Atom, $MassDifference);
|
|
222 }
|
|
223 if ($Charge && $Charge != 0) {
|
|
224 _ProcessCharge($Atom, $Charge);
|
|
225 }
|
|
226 if ($StereoParity && $StereoParity != 0) {
|
|
227 _ProcessStereoParity($Atom, $StereoParity);
|
|
228 }
|
|
229
|
|
230 $AtomNumToAtomMap{$AtomNum} = $Atom;
|
|
231 $Molecule->AddAtom($Atom);
|
|
232 }
|
|
233
|
|
234 # Process bond data...
|
|
235 my($FirstBondLineIndex, $LastBondLineIndex, $FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo, $InternalBondOrder, $InternalBondType, $Bond, $Atom1, $Atom2);
|
|
236
|
|
237 $FirstBondLineIndex = $FirstAtomLineIndex + $AtomCount;
|
|
238 $LastBondLineIndex = $FirstAtomLineIndex + $AtomCount + $BondCount - 1;
|
|
239
|
|
240 for ($LineIndex = $FirstBondLineIndex; $LineIndex <= $LastBondLineIndex; $LineIndex++) {
|
|
241 ($FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo) = SDFileUtil::ParseCmpdBondLine($MoleculeLines[$LineIndex]);
|
|
242
|
|
243 $Atom1 = $AtomNumToAtomMap{$FirstAtomNum};
|
|
244 $Atom2 = $AtomNumToAtomMap{$SecondAtomNum};
|
|
245
|
|
246 ($InternalBondOrder, $InternalBondType) = SDFileUtil::MDLBondTypeToInternalBondOrder($BondType);
|
|
247 $Bond = new Bond('Atoms' => [$Atom1, $Atom2], 'BondOrder' => $InternalBondOrder);
|
|
248 $Bond->SetBondType($InternalBondType);
|
|
249
|
|
250 if ($BondStereo && $BondStereo != 0) {
|
|
251 _ProcessBondStereo($Bond, $BondStereo);
|
|
252 }
|
|
253
|
|
254 $Molecule->AddBond($Bond);
|
|
255 }
|
|
256
|
|
257 # Process available property block lines starting with A aaa, M CHG, M ISO and M RAD. All other property blocks
|
|
258 # lines are for query or specific display purposes and are ignored for now.
|
|
259 #
|
|
260 #
|
|
261 my($PropertyLineIndex, $PropertyLine, $FirstChargeOrRadicalLine, @ValuePairs);
|
|
262
|
|
263 $PropertyLineIndex = $FirstAtomLineIndex + $AtomCount + $BondCount;
|
|
264 $PropertyLine = $MoleculeLines[$PropertyLineIndex];
|
|
265 $FirstChargeOrRadicalLine = 1;
|
|
266
|
|
267 PROPERTYLINE: while ($PropertyLine !~ /^M END/i ) {
|
|
268 if ($PropertyLine =~ /\$\$\$\$/) {
|
|
269 last PROPERTYLINE;
|
|
270 }
|
|
271 if ($PropertyLine =~ /^(M CHG|M RAD)/i) {
|
|
272 if ($FirstChargeOrRadicalLine) {
|
|
273 $FirstChargeOrRadicalLine = 0;
|
|
274 _ZeroOutAtomsChargeAndRadicalValues(\%AtomNumToAtomMap);
|
|
275 }
|
|
276 if ($PropertyLine =~ /^M CHG/i) {
|
|
277 @ValuePairs = SDFileUtil::ParseCmpdChargePropertyLine($PropertyLine);
|
|
278 _ProcessChargeProperty(\@ValuePairs, \%AtomNumToAtomMap);
|
|
279 }
|
|
280 elsif ($PropertyLine =~ /^M RAD/i) {
|
|
281 @ValuePairs = SDFileUtil::ParseCmpdRadicalPropertyLine($PropertyLine);
|
|
282 _ProcessRadicalProperty(\@ValuePairs, \%AtomNumToAtomMap);
|
|
283 }
|
|
284 }
|
|
285 elsif ($PropertyLine =~ /^M ISO/i) {
|
|
286 @ValuePairs = SDFileUtil::ParseCmpdIsotopePropertyLine($PropertyLine);
|
|
287 _ProcessIsotopeProperty(\@ValuePairs, \%AtomNumToAtomMap);
|
|
288 }
|
|
289 elsif ($PropertyLine =~ /^A /i) {
|
|
290 my($NextPropertyLine);
|
|
291 $PropertyLineIndex++;
|
|
292 $NextPropertyLine = $MoleculeLines[$PropertyLineIndex];
|
|
293 @ValuePairs = SDFileUtil::ParseCmpdAtomAliasPropertyLine($PropertyLine, $NextPropertyLine);
|
|
294 _ProcessAtomAliasProperty(\@ValuePairs, \%AtomNumToAtomMap);
|
|
295 }
|
|
296 $PropertyLineIndex++;
|
|
297 $PropertyLine = $MoleculeLines[$PropertyLineIndex];
|
|
298 }
|
|
299 # Store input molecule string as generic property of molecule...
|
|
300 $Molecule->SetInputMoleculeString($MoleculeString);
|
|
301
|
|
302 return $Molecule;
|
|
303 }
|
|
304
|
|
305 # Generate molecule string using molecule object...
|
|
306 sub GenerateMoleculeString {
|
|
307 my($FirstParameter, $SecondParameter) = @_;
|
|
308 my($This, $Molecule);
|
|
309
|
|
310 if ((@_ == 2) && (_IsMDLMolFileIO($FirstParameter))) {
|
|
311 ($This, $Molecule) = ($FirstParameter, $SecondParameter);
|
|
312 }
|
|
313 else {
|
|
314 $Molecule = $FirstParameter;
|
|
315 $This = undef;
|
|
316 }
|
|
317 if (!defined($Molecule)) {
|
|
318 return undef;
|
|
319 }
|
|
320 my(@MoleculeLines);
|
|
321 @MoleculeLines = ();
|
|
322
|
|
323 # First line: Molname line...
|
|
324 push @MoleculeLines, SDFileUtil::GenerateCmpdMolNameLine($Molecule->GetName());
|
|
325
|
|
326 # Second line: Misc info...
|
|
327 my($ProgramName, $UserInitial, $Code);
|
|
328 $ProgramName = ''; $UserInitial = ''; $Code = '';
|
|
329
|
|
330 $Code = $Molecule->IsThreeDimensional() ? '3D' : '2D';
|
|
331
|
|
332 push @MoleculeLines, SDFileUtil::GenerateCmpdMiscInfoLine($ProgramName, $UserInitial, $Code);
|
|
333
|
|
334 # Third line: Comments line...
|
|
335 my($Comments);
|
|
336 $Comments = $Molecule->HasProperty('MDLComments') ? $Molecule->GetMDLComments() : ($Molecule->HasProperty('Comments') ? $Molecule->GetComments() : '');
|
|
337 push @MoleculeLines, SDFileUtil::GenerateCmpdCommentsLine($Comments);
|
|
338
|
|
339 # Fourth line: Counts line for V2000
|
|
340 my($AtomCount, $BondCount, $ChiralFlag);
|
|
341 $AtomCount = $Molecule->GetNumOfAtoms();
|
|
342 $BondCount = $Molecule->GetNumOfBonds();
|
|
343 $ChiralFlag = 0;
|
|
344 push @MoleculeLines, SDFileUtil::GenerateCmpdCountsLine($AtomCount, $BondCount, $ChiralFlag);
|
|
345
|
|
346 # Atom lines...
|
|
347 my($Atom, $AtomSymbol, $AtomX, $AtomY, $AtomZ, $MassDifference, $Charge, $StereoParity, $AtomNum, $AtomID, @Atoms, %AtomIDToNum);
|
|
348 my($ChargePropertyValue, $IsotopePropertyValue, $RadicalPropertyValue, $AtomAliasPropertyValue, @IsotopePropertyValuePairs, @ChargePropertyValuePairs, @RadicalPropertyValuePairs, @AtomAliasPropertyValuePairs);
|
|
349
|
|
350 @ChargePropertyValuePairs = ();
|
|
351 @IsotopePropertyValuePairs = ();
|
|
352 @RadicalPropertyValuePairs = ();
|
|
353 @AtomAliasPropertyValuePairs = ();
|
|
354
|
|
355 @Atoms = $Molecule->GetAtoms();
|
|
356
|
|
357 $AtomNum = 0;
|
|
358 for $Atom (@Atoms) {
|
|
359 $AtomNum++;
|
|
360 $AtomID = $Atom->GetID();
|
|
361 $AtomIDToNum{$AtomID} = $AtomNum;
|
|
362
|
|
363 $AtomSymbol = $Atom->GetAtomSymbol();
|
|
364 ($AtomX, $AtomY, $AtomZ) = $Atom->GetXYZ();
|
|
365
|
|
366 # Setup mass difference...
|
|
367 $MassDifference = _GetMassDifference($Atom);
|
|
368 if ($MassDifference) {
|
|
369 # Hold it for M ISO property lines...
|
|
370 $IsotopePropertyValue = _GetIsotopePropertyValue($Atom);
|
|
371 if ($IsotopePropertyValue) {
|
|
372 push @IsotopePropertyValuePairs, ($AtomNum, $IsotopePropertyValue);
|
|
373 }
|
|
374 }
|
|
375
|
|
376 # Setup charge...
|
|
377 $Charge = _GetCharge($Atom);
|
|
378 if ($Charge) {
|
|
379 # Hold it for M CHG property lines...
|
|
380 $ChargePropertyValue = _GetChargePropertyValue($Atom);
|
|
381 if ($ChargePropertyValue) {
|
|
382 push @ChargePropertyValuePairs, ($AtomNum, $ChargePropertyValue);
|
|
383 }
|
|
384 }
|
|
385
|
|
386 # Hold any radical values for for M RAD property lines...
|
|
387 $RadicalPropertyValue = _GetRadicalPropertyValue($Atom);
|
|
388 if ($RadicalPropertyValue) {
|
|
389 push @RadicalPropertyValuePairs, ($AtomNum, $RadicalPropertyValue);
|
|
390 }
|
|
391
|
|
392 # Hold any atom alias value for A xxx property lines....
|
|
393 $AtomAliasPropertyValue = _GetAtomAliasPropertyValue($Atom);
|
|
394 if ($AtomAliasPropertyValue) {
|
|
395 push @AtomAliasPropertyValuePairs, ($AtomNum, $AtomAliasPropertyValue);
|
|
396
|
|
397 # Set AtomSymbol to carbon as atom alias would override its value during parsing...
|
|
398 $AtomSymbol = "C";
|
|
399 }
|
|
400
|
|
401 # Setup stereo parity...
|
|
402 $StereoParity = _GetStereoParity($Atom);
|
|
403
|
|
404 push @MoleculeLines, SDFileUtil::GenerateCmpdAtomLine($AtomSymbol, $AtomX, $AtomY, $AtomZ, $MassDifference, $Charge, $StereoParity);
|
|
405 }
|
|
406
|
|
407 # Bond lines...
|
|
408 my($FirstAtomID, $FirstAtom, $FirstAtomNum, $SecondAtomID, $SecondAtom, $SecondAtomNum, $MDLBondType, $BondOrder, $BondType, $MDLBondStereo, $Bond, @Bonds);
|
|
409 for $FirstAtom (@Atoms) {
|
|
410 $FirstAtomID = $FirstAtom->GetID();
|
|
411 $FirstAtomNum = $AtomIDToNum{$FirstAtomID};
|
|
412
|
|
413 @Bonds = ();
|
|
414 @Bonds = $FirstAtom->GetBonds();
|
|
415 BOND: for $Bond (@Bonds) {
|
|
416 $SecondAtom = $Bond->GetBondedAtom($FirstAtom);
|
|
417 $SecondAtomID = $SecondAtom->GetID();
|
|
418 $SecondAtomNum = $AtomIDToNum{$SecondAtomID};
|
|
419 if ($FirstAtomNum >= $SecondAtomNum) {
|
|
420 next BOND;
|
|
421 }
|
|
422 # Setup BondType...
|
|
423 $BondOrder = $Bond->GetBondOrder();
|
|
424 $BondType = $Bond->GetBondType();
|
|
425 $MDLBondType = SDFileUtil::InternalBondOrderToMDLBondType($BondOrder, $BondType);
|
|
426
|
|
427 # Setup BondStereo...
|
|
428 $MDLBondStereo = _GetBondStereo($Bond);
|
|
429
|
|
430 push @MoleculeLines, SDFileUtil::GenerateCmpdBondLine($FirstAtomNum, $SecondAtomNum, $MDLBondType, $MDLBondStereo);
|
|
431 }
|
|
432 }
|
|
433 # Property lines...
|
|
434 if (@IsotopePropertyValuePairs) {
|
|
435 push @MoleculeLines, SDFileUtil::GenerateCmpdIsotopePropertyLines(\@IsotopePropertyValuePairs);
|
|
436 }
|
|
437 if (@ChargePropertyValuePairs) {
|
|
438 push @MoleculeLines, SDFileUtil::GenerateCmpdChargePropertyLines(\@ChargePropertyValuePairs);
|
|
439 }
|
|
440 if (@RadicalPropertyValuePairs) {
|
|
441 push @MoleculeLines, SDFileUtil::GenerateCmpdRadicalPropertyLines(\@RadicalPropertyValuePairs);
|
|
442 }
|
|
443 if (@AtomAliasPropertyValuePairs) {
|
|
444 push @MoleculeLines, SDFileUtil::GenerateCmpdAtomAliasPropertyLines(\@AtomAliasPropertyValuePairs);
|
|
445 }
|
|
446
|
|
447 push @MoleculeLines, "M END";
|
|
448
|
|
449 return join "\n", @MoleculeLines;
|
|
450 }
|
|
451
|
|
452 # Process MassDifference value and set atom's mass number...
|
|
453 #
|
|
454 sub _ProcessMassDifference {
|
|
455 my($Atom, $MassDifference) = @_;
|
|
456 my($MassNumber, $NewMassNumber, $AtomicNumber);
|
|
457
|
|
458 $AtomicNumber = $Atom->GetAtomicNumber();
|
|
459
|
|
460 if (!$AtomicNumber) {
|
|
461 carp "Warning: ${ClassName}->_ProcessMassDifference: Ignoring specified mass difference value, $MassDifference, in SD file: Assigned to non standard element...";
|
|
462 return;
|
|
463 }
|
|
464 $MassNumber = $Atom->GetMassNumber();
|
|
465 if (!$MassDifference) {
|
|
466 carp "Warning: ${ClassName}->_ProcessMassDifference: Ignoring specified mass difference value, $MassDifference, in SD file: Unknown MassNumber value...";
|
|
467 return;
|
|
468 }
|
|
469 $NewMassNumber = $MassNumber + $MassDifference;
|
|
470 if (!PeriodicTable::IsElementNaturalIsotopeMassNumber($AtomicNumber, $NewMassNumber)) {
|
|
471 my($AtomSymbol) = $Atom->GetAtomSymbol();
|
|
472 carp "Warning: ${ClassName}->_ProcessMassDifference: Unknown mass number, $MassNumber, corresponding to specified mass difference value, $MassDifference, in SD for atom with atomic number, $AtomicNumber, and atomic symbol, $AtomSymbol. The mass number value has been assigned. Don't forget to Set ExactMass property explicitly; otherwise, GetExactMass method would return mass of most abundant isotope...\n";
|
|
473 }
|
|
474
|
|
475 # Use SetProperty method instead of SetMassNumber to skip explicit checks on MassNumber value...
|
|
476 $Atom->SetProperty('MassNumber', $NewMassNumber);
|
|
477 }
|
|
478
|
|
479 # Get mass difference value...
|
|
480 sub _GetMassDifference {
|
|
481 my($Atom) = @_;
|
|
482 my($MassDifference, $MassNumber, $MostAbundantMassNumber, $AtomicNumber);
|
|
483
|
|
484 $MassDifference = 0;
|
|
485 $MassNumber = $Atom->GetMassNumber();
|
|
486 if (defined $MassNumber) {
|
|
487 $AtomicNumber = $Atom->GetAtomicNumber();
|
|
488 if (defined $AtomicNumber) {
|
|
489 $MostAbundantMassNumber = PeriodicTable::GetElementMostAbundantNaturalIsotopeMassNumber($AtomicNumber);
|
|
490 if (defined($MostAbundantMassNumber) && $MassNumber != $MostAbundantMassNumber) {
|
|
491 $MassDifference = $MassNumber - $MostAbundantMassNumber;
|
|
492 }
|
|
493 }
|
|
494 }
|
|
495 return $MassDifference;
|
|
496 }
|
|
497
|
|
498 # Process formal charge value and assign it to atom as formal charge...
|
|
499 sub _ProcessCharge {
|
|
500 my($Atom, $Charge) = @_;
|
|
501 my($InternalCharge);
|
|
502
|
|
503 $InternalCharge = SDFileUtil::MDLChargeToInternalCharge($Charge);
|
|
504 $Atom->SetFormalCharge($InternalCharge);
|
|
505 }
|
|
506
|
|
507 # Get MDL formal charge value ...
|
|
508 sub _GetCharge {
|
|
509 my($Atom) = @_;
|
|
510 my($InternalCharge, $Charge);
|
|
511
|
|
512 $Charge = 0;
|
|
513 if ($Atom->HasProperty('FormalCharge')) {
|
|
514 $InternalCharge = $Atom->GetFormalCharge();
|
|
515 if ($InternalCharge) {
|
|
516 $Charge = SDFileUtil::InternalChargeToMDLCharge($InternalCharge);
|
|
517 }
|
|
518 }
|
|
519 return $Charge;
|
|
520 }
|
|
521
|
|
522 # Process stereo parity value and assign it to atom as MDL property...
|
|
523 #
|
|
524 # Notes:
|
|
525 # . Mark atom as chiral center
|
|
526 # . Assign any explicit Clockwise (parity 1), CounterClockwise (parity 2) or either value (parity 3) as property of atom.
|
|
527 # . MDL values of Clockwise and CounterClockwise don't correspond to priority assigned to ligands around
|
|
528 # stereo center using CIP scheme; consequently, these values can't be used to set internal Stereochemistry for
|
|
529 # an atom.
|
|
530 #
|
|
531 sub _ProcessStereoParity {
|
|
532 my($Atom, $StereoParity) = @_;
|
|
533
|
|
534 $Atom->SetStereoCenter('1');
|
|
535 $Atom->SetMDLStereoParity($StereoParity);
|
|
536 }
|
|
537
|
|
538 # Set stereo parity value to zero for now: The current release of MayaChemTools hasn't implemented
|
|
539 # functionality to determine chirality.
|
|
540 #
|
|
541 sub _GetStereoParity {
|
|
542 my($Atom) = @_;
|
|
543 my($StereoParity);
|
|
544
|
|
545 $StereoParity = 0;
|
|
546
|
|
547 return $StereoParity;
|
|
548 }
|
|
549
|
|
550 # Process bond stereo value...
|
|
551 sub _ProcessBondStereo {
|
|
552 my($Bond, $BondStereo) = @_;
|
|
553 my($InternalBondStereo);
|
|
554
|
|
555 $InternalBondStereo = SDFileUtil::MDLBondStereoToInternalBondStereochemistry($BondStereo);
|
|
556 if ($InternalBondStereo) {
|
|
557 $Bond->SetBondStereochemistry($InternalBondStereo);
|
|
558 }
|
|
559 }
|
|
560
|
|
561 # Get MDLBondStereo value...
|
|
562 sub _GetBondStereo {
|
|
563 my($Bond) = @_;
|
|
564 my($InternalBondStereo, $BondStereo);
|
|
565
|
|
566 $BondStereo = 0;
|
|
567
|
|
568 $InternalBondStereo = '';
|
|
569 BONDSTEREO: {
|
|
570 if ($Bond->IsUp()) {
|
|
571 $InternalBondStereo = 'Up';
|
|
572 last BONDSTEREO;
|
|
573 }
|
|
574 if ($Bond->IsDown()) {
|
|
575 $InternalBondStereo = 'Down';
|
|
576 last BONDSTEREO;
|
|
577 }
|
|
578 if ($Bond->IsUpOrDown()) {
|
|
579 $InternalBondStereo = 'UpOrDown';
|
|
580 last BONDSTEREO;
|
|
581 }
|
|
582 if ($Bond->IsCisOrTrans() || $Bond->IsCis() || $Bond->IsTrans()) {
|
|
583 $InternalBondStereo = 'CisOrTrans';
|
|
584 last BONDSTEREO;
|
|
585 }
|
|
586 $InternalBondStereo = '';
|
|
587 }
|
|
588
|
|
589 if ($InternalBondStereo) {
|
|
590 $BondStereo = SDFileUtil::InternalBondStereochemistryToMDLBondStereo($InternalBondStereo);
|
|
591 }
|
|
592
|
|
593 return $BondStereo;
|
|
594 }
|
|
595
|
|
596 # Zero out charge and radical values specified for atoms...
|
|
597 sub _ZeroOutAtomsChargeAndRadicalValues {
|
|
598 my($AtomNumToAtomMapRef) = @_;
|
|
599 my($Atom);
|
|
600
|
|
601 for $Atom (values %{$AtomNumToAtomMapRef}) {
|
|
602 if ($Atom->HasProperty('FormalCharge')) {
|
|
603 $Atom->DeleteProperty('FormalCharge');
|
|
604 }
|
|
605 elsif ($Atom->HasProperty('SpinMultiplicity')) {
|
|
606 $Atom->DeleteProperty('SpinMultiplicity');
|
|
607 }
|
|
608 }
|
|
609 }
|
|
610
|
|
611 # Process charge property value pairs...
|
|
612 sub _ProcessChargeProperty {
|
|
613 my($ValuePairsRef, $AtomNumToAtomMapRef) = @_;
|
|
614
|
|
615 if (!(defined($ValuePairsRef) && @{$ValuePairsRef})) {
|
|
616 return;
|
|
617 }
|
|
618 my($Index, $ValuePairsCount, $AtomNum, $Charge, $Atom);
|
|
619
|
|
620 $ValuePairsCount = scalar @{$ValuePairsRef};
|
|
621 VALUEPAIRS: for ($Index = 0; $Index < $ValuePairsCount; $Index +=2) {
|
|
622 $AtomNum = $ValuePairsRef->[$Index]; $Charge = $ValuePairsRef->[$Index + 1];
|
|
623 if (!$Charge) {
|
|
624 next VALUEPAIRS;
|
|
625 }
|
|
626 if (!exists $AtomNumToAtomMapRef->{$AtomNum}) {
|
|
627 next VALUEPAIRS;
|
|
628 }
|
|
629 $Atom = $AtomNumToAtomMapRef->{$AtomNum};
|
|
630 if ($Atom->HasProperty('SpinMultiplicity')) {
|
|
631 carp "Warning: ${ClassName}->_ProcessChargeProperty: Setting formal charge on atom number, $AtomNum, with already assigned spin multiplicity value...";
|
|
632 }
|
|
633 $Atom->SetFormalCharge($Charge);
|
|
634 }
|
|
635 }
|
|
636
|
|
637 # Get charge property value for an atom...
|
|
638 sub _GetChargePropertyValue {
|
|
639 my($Atom) = @_;
|
|
640 my($Charge);
|
|
641
|
|
642 $Charge = 0;
|
|
643 if ($Atom->HasProperty('FormalCharge')) {
|
|
644 $Charge = $Atom->GetFormalCharge();
|
|
645 }
|
|
646 return $Charge;
|
|
647 }
|
|
648
|
|
649 # Process charge property value pairs...
|
|
650 sub _ProcessRadicalProperty {
|
|
651 my($ValuePairsRef, $AtomNumToAtomMapRef) = @_;
|
|
652
|
|
653 if (!(defined($ValuePairsRef) && @{$ValuePairsRef})) {
|
|
654 return;
|
|
655 }
|
|
656 my($Index, $ValuePairsCount, $AtomNum, $Radical, $SpinMultiplicity, $Atom);
|
|
657
|
|
658 $ValuePairsCount = scalar @{$ValuePairsRef};
|
|
659 VALUEPAIRS: for ($Index = 0; $Index < $ValuePairsCount; $Index +=2) {
|
|
660 $AtomNum = $ValuePairsRef->[$Index]; $Radical = $ValuePairsRef->[$Index + 1];
|
|
661 if (!$Radical) {
|
|
662 next VALUEPAIRS;
|
|
663 }
|
|
664 if (!exists $AtomNumToAtomMapRef->{$AtomNum}) {
|
|
665 next VALUEPAIRS;
|
|
666 }
|
|
667 $Atom = $AtomNumToAtomMapRef->{$AtomNum};
|
|
668 if ($Atom->HasProperty('FormalCharge')) {
|
|
669 carp "Warning: ${ClassName}->_ProcessRadicalProperty: Setting spin multiplicity on atom number, $AtomNum, with already assigned formal charge value...";
|
|
670 }
|
|
671 $SpinMultiplicity = SDFileUtil::MDLRadicalToInternalSpinMultiplicity($Radical);
|
|
672 $Atom->SetSpinMultiplicity($SpinMultiplicity);
|
|
673 }
|
|
674 }
|
|
675
|
|
676 # Get radical property value for an atom...
|
|
677 sub _GetRadicalPropertyValue {
|
|
678 my($Atom) = @_;
|
|
679 my($Radical, $SpinMultiplicity);
|
|
680
|
|
681 $Radical = 0;
|
|
682 if ($Atom->HasProperty('SpinMultiplicity')) {
|
|
683 $SpinMultiplicity = $Atom->GetSpinMultiplicity();
|
|
684 $Radical = SDFileUtil::InternalSpinMultiplicityToMDLRadical($SpinMultiplicity);
|
|
685 }
|
|
686 return $Radical;
|
|
687 }
|
|
688
|
|
689 # Process isotope property value pairs...
|
|
690 sub _ProcessIsotopeProperty {
|
|
691 my($ValuePairsRef, $AtomNumToAtomMapRef) = @_;
|
|
692
|
|
693 if (!(defined($ValuePairsRef) && @{$ValuePairsRef})) {
|
|
694 return;
|
|
695 }
|
|
696 my($Index, $ValuePairsCount, $AtomNum, $MassNumber, $Atom, $AtomicNumber);
|
|
697
|
|
698 $ValuePairsCount = scalar @{$ValuePairsRef};
|
|
699 VALUEPAIRS: for ($Index = 0; $Index < $ValuePairsCount; $Index +=2) {
|
|
700 $AtomNum = $ValuePairsRef->[$Index]; $MassNumber = $ValuePairsRef->[$Index + 1];
|
|
701 if (!$MassNumber) {
|
|
702 next VALUEPAIRS;
|
|
703 }
|
|
704 if (!exists $AtomNumToAtomMapRef->{$AtomNum}) {
|
|
705 next VALUEPAIRS;
|
|
706 }
|
|
707 $Atom = $AtomNumToAtomMapRef->{$AtomNum};
|
|
708 $AtomicNumber = $Atom->GetAtomicNumber();
|
|
709
|
|
710 if (!PeriodicTable::IsElementNaturalIsotopeMassNumber($AtomicNumber, $MassNumber)) {
|
|
711 my($AtomSymbol) = $Atom->GetAtomSymbol();
|
|
712 carp "Warning: ${ClassName}->_ProcessProcessIsotopeProperty: Unknown mass number, $MassNumber, specified on M ISO property line for atom number, $AtomNum, in SD for atom with atomic number, $AtomicNumber, and atomic symbol, $AtomSymbol. The mass number value has been assigned. Don't forget to Set ExactMass property explicitly; otherwise, GetExactMass method would return mass of most abundant isotope...\n";
|
|
713 }
|
|
714
|
|
715 # Use SetProperty method instead of SetMassNumber to skip explicit checks on MassNumber value...
|
|
716 $Atom->SetProperty('MassNumber', $MassNumber);
|
|
717 }
|
|
718 }
|
|
719
|
|
720 # Get isotope property value for an atom...
|
|
721 sub _GetIsotopePropertyValue {
|
|
722 my($Atom) = @_;
|
|
723 my($MassNumber);
|
|
724
|
|
725 $MassNumber = 0;
|
|
726 if ($Atom->HasProperty('MassNumber')) {
|
|
727 $MassNumber = $Atom->GetMassNumber();
|
|
728 }
|
|
729 return $MassNumber;
|
|
730 }
|
|
731
|
|
732 # Process atom alias property value pairs...
|
|
733 sub _ProcessAtomAliasProperty {
|
|
734 my($ValuePairsRef, $AtomNumToAtomMapRef) = @_;
|
|
735
|
|
736 if (!(defined($ValuePairsRef) && @{$ValuePairsRef})) {
|
|
737 return;
|
|
738 }
|
|
739 my($Index, $ValuePairsCount, $AtomNum, $AtomAlias, $Atom);
|
|
740
|
|
741 $ValuePairsCount = scalar @{$ValuePairsRef};
|
|
742 VALUEPAIRS: for ($Index = 0; $Index < $ValuePairsCount; $Index +=2) {
|
|
743 $AtomNum = $ValuePairsRef->[$Index]; $AtomAlias = $ValuePairsRef->[$Index + 1];
|
|
744 if (!$AtomNum) {
|
|
745 next VALUEPAIRS;
|
|
746 }
|
|
747 if (!exists $AtomNumToAtomMapRef->{$AtomNum}) {
|
|
748 next VALUEPAIRS;
|
|
749 }
|
|
750 $AtomAlias = TextUtil::RemoveLeadingAndTrailingWhiteSpaces($AtomAlias);
|
|
751 if (TextUtil::IsEmpty($AtomAlias)) {
|
|
752 carp("Warning: ${ClassName}->_ProcessAtomAliasProperty: Ignoring atom alias property line: No Atom alias value specified...");
|
|
753 next VALUEPAIRS;
|
|
754 }
|
|
755
|
|
756 # Set atom symbol to atom alias which sets atomic number automatically...
|
|
757 $Atom = $AtomNumToAtomMapRef->{$AtomNum};
|
|
758 $Atom->SetAtomSymbol($AtomAlias);
|
|
759
|
|
760 $Atom->SetProperty('AtomAlias', $AtomAlias);
|
|
761 }
|
|
762 }
|
|
763
|
|
764 # Get atom alias property value for an atom...
|
|
765 sub _GetAtomAliasPropertyValue {
|
|
766 my($Atom) = @_;
|
|
767 my($AtomAlias);
|
|
768
|
|
769 $AtomAlias = undef;
|
|
770 if ($Atom->HasProperty('AtomAlias')) {
|
|
771 $AtomAlias = $Atom->GetAtomAlias();
|
|
772 }
|
|
773 return $AtomAlias;
|
|
774 }
|
|
775
|
|
776 # Is it a MDLMolFileIO object?
|
|
777 sub _IsMDLMolFileIO {
|
|
778 my($Object) = @_;
|
|
779
|
|
780 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0;
|
|
781 }
|
|
782
|
|
783
|
|
784 1;
|
|
785
|
|
786 __END__
|
|
787
|
|
788 =head1 NAME
|
|
789
|
|
790 MDLMolFileIO
|
|
791
|
|
792 =head1 SYNOPSIS
|
|
793
|
|
794 use FileIO::MDLMolFileIO;
|
|
795
|
|
796 use FileIO::MDLMolFileIO qw(:all);
|
|
797
|
|
798 =head1 DESCRIPTION
|
|
799
|
|
800 B<MDLMolFIleIO> class provides the following methods:
|
|
801
|
|
802 new, GenerateMoleculeString, IsMDLMolFile, ParseMoleculeString, ReadMolecule,
|
|
803 ReadMoleculeString, WriteMolecule
|
|
804
|
|
805 The following methods can also be used as functions:
|
|
806
|
|
807 GenerateMoleculeString, IsMDLMolFile, ParseMoleculeString
|
|
808
|
|
809 Data specific to B<MDLMolFileIO> class not directly used by B<Molecule>, B<Atom> and
|
|
810 B<Bond> objects - data label/value pairs, atom SteroParity and so on - is associated to
|
|
811 and retrieved from appropriate objects using following methods:
|
|
812
|
|
813 SetMDL<PropertyName>
|
|
814 GetMDL<PropertyName>.
|
|
815
|
|
816 B<MDLMolFileIO> class is derived from I<FileIO> class and uses its methods to support
|
|
817 generic file related functionality.
|
|
818
|
|
819 =head2 METHODS
|
|
820
|
|
821 =over 4
|
|
822
|
|
823 =item B<new>
|
|
824
|
|
825 $NewMDLMolFileIO = new FileIO::MDLMolFileIO(%NamesAndValues);
|
|
826
|
|
827 Using specified I<MDLMolFileIO> property names and values hash, B<new> method creates a new object
|
|
828 and returns a reference to newly created B<MDLMolFileIO> object.
|
|
829
|
|
830 =item B<GenerateMoleculeString>
|
|
831
|
|
832 $MoleculeString = $MDLMolFileIO->GenerateMoleculeString($Molecule);
|
|
833 $MoleculeString = FileIO::MDLMolFileIO::GenerateMoleculeString($Molecule);
|
|
834
|
|
835 Returns a B<MoleculeString> in MDLMol format corresponding to I<Molecule>.
|
|
836
|
|
837 =item B<IsMDLMolFile>
|
|
838
|
|
839 $Status = $MDLMolFileIO->IsMDLMolFile($FileName);
|
|
840 $Status = FileIO::MDLMolFileIO::IsMDLMolFile($FileName);
|
|
841
|
|
842 Returns 1 or 0 based on whether I<FileName> is a MDLMol file.
|
|
843
|
|
844 =item B<ParseMoleculeString>
|
|
845
|
|
846 $Molecule = $MDLMolFileIO->ParseMoleculeString($MoleculeString);
|
|
847 $Molecule = FileIO::MDLMolFileIO::ParseMoleculeString($MoleculeString);
|
|
848
|
|
849 Parses I<MoleculeString> and returns a B<Molecule> object.
|
|
850
|
|
851 =item B<ReadMolecule>
|
|
852
|
|
853 $Molecule = $MDLMolFileIO->ReadMolecule($FileHandle);
|
|
854
|
|
855 Reads data for the compound in a file using already opened I<FileHandle>, creates,
|
|
856 and returns a B<Molecule> object.
|
|
857
|
|
858 =item B<ReadMoleculeString>
|
|
859
|
|
860 $MoleculeString = $MDLMolFileIO->ReadMoleculeString($FileHandle);
|
|
861
|
|
862 Reads data for the compound in a file using already opened I<FileHandle> and
|
|
863 returns a B<MoleculeString> corresponding to compound structure and other associated
|
|
864 data.
|
|
865
|
|
866 =item B<WriteMolecule>
|
|
867
|
|
868 $MDLMolFileIO->WriteMolecule($Molecule);
|
|
869
|
|
870 Writes I<Molecule> data to a file in MDLMol format and returns B<MDLMolFileIO>.
|
|
871
|
|
872 =back
|
|
873
|
|
874 =head1 AUTHOR
|
|
875
|
|
876 Manish Sud <msud@san.rr.com>
|
|
877
|
|
878 =head1 SEE ALSO
|
|
879
|
|
880 MoleculeFileIO.pm, SDFileIO.pm
|
|
881
|
|
882 =head1 COPYRIGHT
|
|
883
|
|
884 Copyright (C) 2015 Manish Sud. All rights reserved.
|
|
885
|
|
886 This file is part of MayaChemTools.
|
|
887
|
|
888 MayaChemTools is free software; you can redistribute it and/or modify it under
|
|
889 the terms of the GNU Lesser General Public License as published by the Free
|
|
890 Software Foundation; either version 3 of the License, or (at your option)
|
|
891 any later version.
|
|
892
|
|
893 =cut
|