comparison mayachemtools/lib/FileIO/MDLMolFileIO.pm @ 0:73ae111cf86f draft

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 11:55:01 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:73ae111cf86f
1 package FileIO::MDLMolFileIO;
2 #
3 # $RCSfile: MDLMolFileIO.pm,v $
4 # $Date: 2015/02/28 20:48:43 $
5 # $Revision: 1.32 $
6 #
7 # Author: Manish Sud <msud@san.rr.com>
8 #
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
10 #
11 # This file is part of MayaChemTools.
12 #
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
14 # the terms of the GNU Lesser General Public License as published by the Free
15 # Software Foundation; either version 3 of the License, or (at your option) any
16 # later version.
17 #
18 # MayaChemTools is distributed in the hope that it will be useful, but without
19 # any warranty; without even the implied warranty of merchantability of fitness
20 # for a particular purpose. See the GNU Lesser General Public License for more
21 # details.
22 #
23 # You should have received a copy of the GNU Lesser General Public License
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
26 # Boston, MA, 02111-1307, USA.
27 #
28
29 use strict;
30 use Carp;
31 use Exporter;
32 use Scalar::Util ();
33 use TextUtil ();
34 use FileUtil ();
35 use SDFileUtil ();
36 use FileIO::FileIO;
37 use Molecule;
38
39 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
40
41 @ISA = qw(FileIO::FileIO Exporter);
42 @EXPORT = qw();
43 @EXPORT_OK = qw(IsMDLMolFile);
44
45 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]);
46
47 # Setup class variables...
48 my($ClassName);
49 _InitializeClass();
50
51 # Class constructor...
52 sub new {
53 my($Class, %NamesAndValues) = @_;
54
55 # Initialize object...
56 my $This = $Class->SUPER::new();
57 bless $This, ref($Class) || $Class;
58 $This->_InitializeMDLMolFileIO();
59
60 $This->_InitializeMDLMolFileIOProperties(%NamesAndValues);
61
62 return $This;
63 }
64
65 # Initialize any local object data...
66 #
67 sub _InitializeMDLMolFileIO {
68 my($This) = @_;
69
70 # Nothing to do: Base class FileIO handles default class variables...
71
72 return $This;
73 }
74
75 # Initialize class ...
76 sub _InitializeClass {
77 #Class name...
78 $ClassName = __PACKAGE__;
79
80 }
81
82 # Initialize object values...
83 sub _InitializeMDLMolFileIOProperties {
84 my($This, %NamesAndValues) = @_;
85
86 # All other property names and values along with all Set/Get<PropertyName> methods
87 # are implemented on-demand using ObjectProperty class.
88
89 my($Name, $Value, $MethodName);
90 while (($Name, $Value) = each %NamesAndValues) {
91 $MethodName = "Set${Name}";
92 $This->$MethodName($Value);
93 }
94
95 if (!exists $NamesAndValues{Name}) {
96 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying file name...";
97 }
98
99 # Make sure it's a MDLMol file...
100 $Name = $NamesAndValues{Name};
101 if (!$This->IsMDLMolFile($Name)) {
102 croak "Error: ${ClassName}->New: Object can't be instantiated: File, $Name, doesn't appear to be MDLMol format...";
103 }
104
105 return $This;
106 }
107
108 # Is it a MDLMol file?
109 sub IsMDLMolFile ($;$) {
110 my($FirstParameter, $SecondParameter) = @_;
111 my($This, $FileName, $Status);
112
113 if ((@_ == 2) && (_IsMDLMolFileIO($FirstParameter))) {
114 ($This, $FileName) = ($FirstParameter, $SecondParameter);
115 }
116 else {
117 $FileName = $FirstParameter;
118 }
119
120 # Check file extension...
121 $Status = FileUtil::CheckFileType($FileName, "mol");
122
123 return $Status;
124 }
125
126 # Read molecule from file and return molecule object...
127 sub ReadMolecule {
128 my($This) = @_;
129 my($FileHandle);
130
131 $FileHandle = $This->GetFileHandle();
132 return $This->ParseMoleculeString(SDFileUtil::ReadCmpdString($FileHandle));
133 }
134
135 # Write compound data using Molecule object...
136 sub WriteMolecule {
137 my($This, $Molecule) = @_;
138
139 if (!(defined($Molecule) && $Molecule->IsMolecule())) {
140 carp "Warning: ${ClassName}->WriteMolecule: No data written: Molecule object is not specified...";
141 return $This;
142 }
143 my($FileHandle);
144 $FileHandle = $This->GetFileHandle();
145
146 print $FileHandle $This->GenerateMoleculeString($Molecule) . "\n";
147
148 return $This;
149 }
150
151 # Retrieve molecule string...
152 sub ReadMoleculeString {
153 my($This) = @_;
154 my($FileHandle);
155
156 $FileHandle = $This->GetFileHandle();
157 return SDFileUtil::ReadCmpdString($FileHandle);
158 }
159
160 # Parse molecule string and return molecule object. ParseMoleculeString supports two invocation methods: class
161 # method or a package function.
162 #
163 sub ParseMoleculeString {
164 my($FirstParameter, $SecondParameter) = @_;
165 my($This, $MoleculeString);
166
167 if ((@_ == 2) && (_IsMDLMolFileIO($FirstParameter))) {
168 ($This, $MoleculeString) = ($FirstParameter, $SecondParameter);
169 }
170 else {
171 $MoleculeString = $FirstParameter;
172 $This = undef;
173 }
174 if (!$MoleculeString) {
175 return undef;
176 }
177 my($LineIndex, @MoleculeLines);
178 @MoleculeLines = split /\n/, $MoleculeString;
179
180 # Create molecule object and set molecule level native and MDL properties...
181 #
182 my($Molecule);
183 $Molecule = new Molecule();
184
185 # Set valence model for calculating implicit hydrogens...
186 $Molecule->SetValenceModel('MDLValenceModel');
187
188 # Process headers data...
189 $LineIndex = 0;
190 my($MoleculeName) = SDFileUtil::ParseCmpdMolNameLine($MoleculeLines[$LineIndex]);
191 $MoleculeName = TextUtil::RemoveTrailingWhiteSpaces($MoleculeName);
192 $Molecule->SetName($MoleculeName);
193
194 $LineIndex++;
195 my($UserInitial, $ProgramName, $Date, $Code, $ScalingFactor1, $ScalingFactor2, $Energy, $RegistryNum) = SDFileUtil::ParseCmpdMiscInfoLine($MoleculeLines[$LineIndex]);
196 $Molecule->SetProperties('MDLUserInitial' => $UserInitial, 'MDLProgramName' => $ProgramName, 'MDLDate' => $Date, 'MDLCode' => $Code, 'MDLScalingFactor1' => $ScalingFactor1, 'MDLScalingFactor2' => $ScalingFactor2, 'MDLEnergy' => $Energy, 'MDLRegistryNum' => $RegistryNum);
197
198 $LineIndex++;
199 my($Comments) = SDFileUtil::ParseCmpdCommentsLine($MoleculeLines[$LineIndex]);
200 $Molecule->SetProperties('MDLComments' => $Comments);
201
202 $LineIndex++;
203 my($AtomCount, $BondCount, $ChiralFlag, $PropertyCount, $Version) = SDFileUtil::ParseCmpdCountsLine($MoleculeLines[$LineIndex]);
204
205 $Molecule->SetProperties('MDLChiralFlag' => $ChiralFlag, 'MDLPropertyCount' => $PropertyCount, 'MDLVersion' => $Version);
206
207 # Process atom data...
208 my($FirstAtomLineIndex, $LastAtomLineIndex, $AtomNum, $AtomX, $AtomY, $AtomZ, $AtomSymbol, $MassDifference, $Charge, $StereoParity, $Atom, %AtomNumToAtomMap);
209
210 $AtomNum = 0;
211 %AtomNumToAtomMap = ();
212 $FirstAtomLineIndex = 4; $LastAtomLineIndex = $FirstAtomLineIndex + $AtomCount - 1;
213
214 for ($LineIndex = $FirstAtomLineIndex; $LineIndex <= $LastAtomLineIndex; $LineIndex++) {
215 $AtomNum++;
216 ($AtomSymbol, $AtomX, $AtomY, $AtomZ, $MassDifference, $Charge, $StereoParity) = SDFileUtil::ParseCmpdAtomLine($MoleculeLines[$LineIndex]);
217
218 $Atom = new Atom('AtomSymbol' => $AtomSymbol, 'XYZ' => [$AtomX, $AtomY, $AtomZ]);
219
220 if ($MassDifference && $MassDifference != 0) {
221 _ProcessMassDifference($Atom, $MassDifference);
222 }
223 if ($Charge && $Charge != 0) {
224 _ProcessCharge($Atom, $Charge);
225 }
226 if ($StereoParity && $StereoParity != 0) {
227 _ProcessStereoParity($Atom, $StereoParity);
228 }
229
230 $AtomNumToAtomMap{$AtomNum} = $Atom;
231 $Molecule->AddAtom($Atom);
232 }
233
234 # Process bond data...
235 my($FirstBondLineIndex, $LastBondLineIndex, $FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo, $InternalBondOrder, $InternalBondType, $Bond, $Atom1, $Atom2);
236
237 $FirstBondLineIndex = $FirstAtomLineIndex + $AtomCount;
238 $LastBondLineIndex = $FirstAtomLineIndex + $AtomCount + $BondCount - 1;
239
240 for ($LineIndex = $FirstBondLineIndex; $LineIndex <= $LastBondLineIndex; $LineIndex++) {
241 ($FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo) = SDFileUtil::ParseCmpdBondLine($MoleculeLines[$LineIndex]);
242
243 $Atom1 = $AtomNumToAtomMap{$FirstAtomNum};
244 $Atom2 = $AtomNumToAtomMap{$SecondAtomNum};
245
246 ($InternalBondOrder, $InternalBondType) = SDFileUtil::MDLBondTypeToInternalBondOrder($BondType);
247 $Bond = new Bond('Atoms' => [$Atom1, $Atom2], 'BondOrder' => $InternalBondOrder);
248 $Bond->SetBondType($InternalBondType);
249
250 if ($BondStereo && $BondStereo != 0) {
251 _ProcessBondStereo($Bond, $BondStereo);
252 }
253
254 $Molecule->AddBond($Bond);
255 }
256
257 # Process available property block lines starting with A aaa, M CHG, M ISO and M RAD. All other property blocks
258 # lines are for query or specific display purposes and are ignored for now.
259 #
260 #
261 my($PropertyLineIndex, $PropertyLine, $FirstChargeOrRadicalLine, @ValuePairs);
262
263 $PropertyLineIndex = $FirstAtomLineIndex + $AtomCount + $BondCount;
264 $PropertyLine = $MoleculeLines[$PropertyLineIndex];
265 $FirstChargeOrRadicalLine = 1;
266
267 PROPERTYLINE: while ($PropertyLine !~ /^M END/i ) {
268 if ($PropertyLine =~ /\$\$\$\$/) {
269 last PROPERTYLINE;
270 }
271 if ($PropertyLine =~ /^(M CHG|M RAD)/i) {
272 if ($FirstChargeOrRadicalLine) {
273 $FirstChargeOrRadicalLine = 0;
274 _ZeroOutAtomsChargeAndRadicalValues(\%AtomNumToAtomMap);
275 }
276 if ($PropertyLine =~ /^M CHG/i) {
277 @ValuePairs = SDFileUtil::ParseCmpdChargePropertyLine($PropertyLine);
278 _ProcessChargeProperty(\@ValuePairs, \%AtomNumToAtomMap);
279 }
280 elsif ($PropertyLine =~ /^M RAD/i) {
281 @ValuePairs = SDFileUtil::ParseCmpdRadicalPropertyLine($PropertyLine);
282 _ProcessRadicalProperty(\@ValuePairs, \%AtomNumToAtomMap);
283 }
284 }
285 elsif ($PropertyLine =~ /^M ISO/i) {
286 @ValuePairs = SDFileUtil::ParseCmpdIsotopePropertyLine($PropertyLine);
287 _ProcessIsotopeProperty(\@ValuePairs, \%AtomNumToAtomMap);
288 }
289 elsif ($PropertyLine =~ /^A /i) {
290 my($NextPropertyLine);
291 $PropertyLineIndex++;
292 $NextPropertyLine = $MoleculeLines[$PropertyLineIndex];
293 @ValuePairs = SDFileUtil::ParseCmpdAtomAliasPropertyLine($PropertyLine, $NextPropertyLine);
294 _ProcessAtomAliasProperty(\@ValuePairs, \%AtomNumToAtomMap);
295 }
296 $PropertyLineIndex++;
297 $PropertyLine = $MoleculeLines[$PropertyLineIndex];
298 }
299 # Store input molecule string as generic property of molecule...
300 $Molecule->SetInputMoleculeString($MoleculeString);
301
302 return $Molecule;
303 }
304
305 # Generate molecule string using molecule object...
306 sub GenerateMoleculeString {
307 my($FirstParameter, $SecondParameter) = @_;
308 my($This, $Molecule);
309
310 if ((@_ == 2) && (_IsMDLMolFileIO($FirstParameter))) {
311 ($This, $Molecule) = ($FirstParameter, $SecondParameter);
312 }
313 else {
314 $Molecule = $FirstParameter;
315 $This = undef;
316 }
317 if (!defined($Molecule)) {
318 return undef;
319 }
320 my(@MoleculeLines);
321 @MoleculeLines = ();
322
323 # First line: Molname line...
324 push @MoleculeLines, SDFileUtil::GenerateCmpdMolNameLine($Molecule->GetName());
325
326 # Second line: Misc info...
327 my($ProgramName, $UserInitial, $Code);
328 $ProgramName = ''; $UserInitial = ''; $Code = '';
329
330 $Code = $Molecule->IsThreeDimensional() ? '3D' : '2D';
331
332 push @MoleculeLines, SDFileUtil::GenerateCmpdMiscInfoLine($ProgramName, $UserInitial, $Code);
333
334 # Third line: Comments line...
335 my($Comments);
336 $Comments = $Molecule->HasProperty('MDLComments') ? $Molecule->GetMDLComments() : ($Molecule->HasProperty('Comments') ? $Molecule->GetComments() : '');
337 push @MoleculeLines, SDFileUtil::GenerateCmpdCommentsLine($Comments);
338
339 # Fourth line: Counts line for V2000
340 my($AtomCount, $BondCount, $ChiralFlag);
341 $AtomCount = $Molecule->GetNumOfAtoms();
342 $BondCount = $Molecule->GetNumOfBonds();
343 $ChiralFlag = 0;
344 push @MoleculeLines, SDFileUtil::GenerateCmpdCountsLine($AtomCount, $BondCount, $ChiralFlag);
345
346 # Atom lines...
347 my($Atom, $AtomSymbol, $AtomX, $AtomY, $AtomZ, $MassDifference, $Charge, $StereoParity, $AtomNum, $AtomID, @Atoms, %AtomIDToNum);
348 my($ChargePropertyValue, $IsotopePropertyValue, $RadicalPropertyValue, $AtomAliasPropertyValue, @IsotopePropertyValuePairs, @ChargePropertyValuePairs, @RadicalPropertyValuePairs, @AtomAliasPropertyValuePairs);
349
350 @ChargePropertyValuePairs = ();
351 @IsotopePropertyValuePairs = ();
352 @RadicalPropertyValuePairs = ();
353 @AtomAliasPropertyValuePairs = ();
354
355 @Atoms = $Molecule->GetAtoms();
356
357 $AtomNum = 0;
358 for $Atom (@Atoms) {
359 $AtomNum++;
360 $AtomID = $Atom->GetID();
361 $AtomIDToNum{$AtomID} = $AtomNum;
362
363 $AtomSymbol = $Atom->GetAtomSymbol();
364 ($AtomX, $AtomY, $AtomZ) = $Atom->GetXYZ();
365
366 # Setup mass difference...
367 $MassDifference = _GetMassDifference($Atom);
368 if ($MassDifference) {
369 # Hold it for M ISO property lines...
370 $IsotopePropertyValue = _GetIsotopePropertyValue($Atom);
371 if ($IsotopePropertyValue) {
372 push @IsotopePropertyValuePairs, ($AtomNum, $IsotopePropertyValue);
373 }
374 }
375
376 # Setup charge...
377 $Charge = _GetCharge($Atom);
378 if ($Charge) {
379 # Hold it for M CHG property lines...
380 $ChargePropertyValue = _GetChargePropertyValue($Atom);
381 if ($ChargePropertyValue) {
382 push @ChargePropertyValuePairs, ($AtomNum, $ChargePropertyValue);
383 }
384 }
385
386 # Hold any radical values for for M RAD property lines...
387 $RadicalPropertyValue = _GetRadicalPropertyValue($Atom);
388 if ($RadicalPropertyValue) {
389 push @RadicalPropertyValuePairs, ($AtomNum, $RadicalPropertyValue);
390 }
391
392 # Hold any atom alias value for A xxx property lines....
393 $AtomAliasPropertyValue = _GetAtomAliasPropertyValue($Atom);
394 if ($AtomAliasPropertyValue) {
395 push @AtomAliasPropertyValuePairs, ($AtomNum, $AtomAliasPropertyValue);
396
397 # Set AtomSymbol to carbon as atom alias would override its value during parsing...
398 $AtomSymbol = "C";
399 }
400
401 # Setup stereo parity...
402 $StereoParity = _GetStereoParity($Atom);
403
404 push @MoleculeLines, SDFileUtil::GenerateCmpdAtomLine($AtomSymbol, $AtomX, $AtomY, $AtomZ, $MassDifference, $Charge, $StereoParity);
405 }
406
407 # Bond lines...
408 my($FirstAtomID, $FirstAtom, $FirstAtomNum, $SecondAtomID, $SecondAtom, $SecondAtomNum, $MDLBondType, $BondOrder, $BondType, $MDLBondStereo, $Bond, @Bonds);
409 for $FirstAtom (@Atoms) {
410 $FirstAtomID = $FirstAtom->GetID();
411 $FirstAtomNum = $AtomIDToNum{$FirstAtomID};
412
413 @Bonds = ();
414 @Bonds = $FirstAtom->GetBonds();
415 BOND: for $Bond (@Bonds) {
416 $SecondAtom = $Bond->GetBondedAtom($FirstAtom);
417 $SecondAtomID = $SecondAtom->GetID();
418 $SecondAtomNum = $AtomIDToNum{$SecondAtomID};
419 if ($FirstAtomNum >= $SecondAtomNum) {
420 next BOND;
421 }
422 # Setup BondType...
423 $BondOrder = $Bond->GetBondOrder();
424 $BondType = $Bond->GetBondType();
425 $MDLBondType = SDFileUtil::InternalBondOrderToMDLBondType($BondOrder, $BondType);
426
427 # Setup BondStereo...
428 $MDLBondStereo = _GetBondStereo($Bond);
429
430 push @MoleculeLines, SDFileUtil::GenerateCmpdBondLine($FirstAtomNum, $SecondAtomNum, $MDLBondType, $MDLBondStereo);
431 }
432 }
433 # Property lines...
434 if (@IsotopePropertyValuePairs) {
435 push @MoleculeLines, SDFileUtil::GenerateCmpdIsotopePropertyLines(\@IsotopePropertyValuePairs);
436 }
437 if (@ChargePropertyValuePairs) {
438 push @MoleculeLines, SDFileUtil::GenerateCmpdChargePropertyLines(\@ChargePropertyValuePairs);
439 }
440 if (@RadicalPropertyValuePairs) {
441 push @MoleculeLines, SDFileUtil::GenerateCmpdRadicalPropertyLines(\@RadicalPropertyValuePairs);
442 }
443 if (@AtomAliasPropertyValuePairs) {
444 push @MoleculeLines, SDFileUtil::GenerateCmpdAtomAliasPropertyLines(\@AtomAliasPropertyValuePairs);
445 }
446
447 push @MoleculeLines, "M END";
448
449 return join "\n", @MoleculeLines;
450 }
451
452 # Process MassDifference value and set atom's mass number...
453 #
454 sub _ProcessMassDifference {
455 my($Atom, $MassDifference) = @_;
456 my($MassNumber, $NewMassNumber, $AtomicNumber);
457
458 $AtomicNumber = $Atom->GetAtomicNumber();
459
460 if (!$AtomicNumber) {
461 carp "Warning: ${ClassName}->_ProcessMassDifference: Ignoring specified mass difference value, $MassDifference, in SD file: Assigned to non standard element...";
462 return;
463 }
464 $MassNumber = $Atom->GetMassNumber();
465 if (!$MassDifference) {
466 carp "Warning: ${ClassName}->_ProcessMassDifference: Ignoring specified mass difference value, $MassDifference, in SD file: Unknown MassNumber value...";
467 return;
468 }
469 $NewMassNumber = $MassNumber + $MassDifference;
470 if (!PeriodicTable::IsElementNaturalIsotopeMassNumber($AtomicNumber, $NewMassNumber)) {
471 my($AtomSymbol) = $Atom->GetAtomSymbol();
472 carp "Warning: ${ClassName}->_ProcessMassDifference: Unknown mass number, $MassNumber, corresponding to specified mass difference value, $MassDifference, in SD for atom with atomic number, $AtomicNumber, and atomic symbol, $AtomSymbol. The mass number value has been assigned. Don't forget to Set ExactMass property explicitly; otherwise, GetExactMass method would return mass of most abundant isotope...\n";
473 }
474
475 # Use SetProperty method instead of SetMassNumber to skip explicit checks on MassNumber value...
476 $Atom->SetProperty('MassNumber', $NewMassNumber);
477 }
478
479 # Get mass difference value...
480 sub _GetMassDifference {
481 my($Atom) = @_;
482 my($MassDifference, $MassNumber, $MostAbundantMassNumber, $AtomicNumber);
483
484 $MassDifference = 0;
485 $MassNumber = $Atom->GetMassNumber();
486 if (defined $MassNumber) {
487 $AtomicNumber = $Atom->GetAtomicNumber();
488 if (defined $AtomicNumber) {
489 $MostAbundantMassNumber = PeriodicTable::GetElementMostAbundantNaturalIsotopeMassNumber($AtomicNumber);
490 if (defined($MostAbundantMassNumber) && $MassNumber != $MostAbundantMassNumber) {
491 $MassDifference = $MassNumber - $MostAbundantMassNumber;
492 }
493 }
494 }
495 return $MassDifference;
496 }
497
498 # Process formal charge value and assign it to atom as formal charge...
499 sub _ProcessCharge {
500 my($Atom, $Charge) = @_;
501 my($InternalCharge);
502
503 $InternalCharge = SDFileUtil::MDLChargeToInternalCharge($Charge);
504 $Atom->SetFormalCharge($InternalCharge);
505 }
506
507 # Get MDL formal charge value ...
508 sub _GetCharge {
509 my($Atom) = @_;
510 my($InternalCharge, $Charge);
511
512 $Charge = 0;
513 if ($Atom->HasProperty('FormalCharge')) {
514 $InternalCharge = $Atom->GetFormalCharge();
515 if ($InternalCharge) {
516 $Charge = SDFileUtil::InternalChargeToMDLCharge($InternalCharge);
517 }
518 }
519 return $Charge;
520 }
521
522 # Process stereo parity value and assign it to atom as MDL property...
523 #
524 # Notes:
525 # . Mark atom as chiral center
526 # . Assign any explicit Clockwise (parity 1), CounterClockwise (parity 2) or either value (parity 3) as property of atom.
527 # . MDL values of Clockwise and CounterClockwise don't correspond to priority assigned to ligands around
528 # stereo center using CIP scheme; consequently, these values can't be used to set internal Stereochemistry for
529 # an atom.
530 #
531 sub _ProcessStereoParity {
532 my($Atom, $StereoParity) = @_;
533
534 $Atom->SetStereoCenter('1');
535 $Atom->SetMDLStereoParity($StereoParity);
536 }
537
538 # Set stereo parity value to zero for now: The current release of MayaChemTools hasn't implemented
539 # functionality to determine chirality.
540 #
541 sub _GetStereoParity {
542 my($Atom) = @_;
543 my($StereoParity);
544
545 $StereoParity = 0;
546
547 return $StereoParity;
548 }
549
550 # Process bond stereo value...
551 sub _ProcessBondStereo {
552 my($Bond, $BondStereo) = @_;
553 my($InternalBondStereo);
554
555 $InternalBondStereo = SDFileUtil::MDLBondStereoToInternalBondStereochemistry($BondStereo);
556 if ($InternalBondStereo) {
557 $Bond->SetBondStereochemistry($InternalBondStereo);
558 }
559 }
560
561 # Get MDLBondStereo value...
562 sub _GetBondStereo {
563 my($Bond) = @_;
564 my($InternalBondStereo, $BondStereo);
565
566 $BondStereo = 0;
567
568 $InternalBondStereo = '';
569 BONDSTEREO: {
570 if ($Bond->IsUp()) {
571 $InternalBondStereo = 'Up';
572 last BONDSTEREO;
573 }
574 if ($Bond->IsDown()) {
575 $InternalBondStereo = 'Down';
576 last BONDSTEREO;
577 }
578 if ($Bond->IsUpOrDown()) {
579 $InternalBondStereo = 'UpOrDown';
580 last BONDSTEREO;
581 }
582 if ($Bond->IsCisOrTrans() || $Bond->IsCis() || $Bond->IsTrans()) {
583 $InternalBondStereo = 'CisOrTrans';
584 last BONDSTEREO;
585 }
586 $InternalBondStereo = '';
587 }
588
589 if ($InternalBondStereo) {
590 $BondStereo = SDFileUtil::InternalBondStereochemistryToMDLBondStereo($InternalBondStereo);
591 }
592
593 return $BondStereo;
594 }
595
596 # Zero out charge and radical values specified for atoms...
597 sub _ZeroOutAtomsChargeAndRadicalValues {
598 my($AtomNumToAtomMapRef) = @_;
599 my($Atom);
600
601 for $Atom (values %{$AtomNumToAtomMapRef}) {
602 if ($Atom->HasProperty('FormalCharge')) {
603 $Atom->DeleteProperty('FormalCharge');
604 }
605 elsif ($Atom->HasProperty('SpinMultiplicity')) {
606 $Atom->DeleteProperty('SpinMultiplicity');
607 }
608 }
609 }
610
611 # Process charge property value pairs...
612 sub _ProcessChargeProperty {
613 my($ValuePairsRef, $AtomNumToAtomMapRef) = @_;
614
615 if (!(defined($ValuePairsRef) && @{$ValuePairsRef})) {
616 return;
617 }
618 my($Index, $ValuePairsCount, $AtomNum, $Charge, $Atom);
619
620 $ValuePairsCount = scalar @{$ValuePairsRef};
621 VALUEPAIRS: for ($Index = 0; $Index < $ValuePairsCount; $Index +=2) {
622 $AtomNum = $ValuePairsRef->[$Index]; $Charge = $ValuePairsRef->[$Index + 1];
623 if (!$Charge) {
624 next VALUEPAIRS;
625 }
626 if (!exists $AtomNumToAtomMapRef->{$AtomNum}) {
627 next VALUEPAIRS;
628 }
629 $Atom = $AtomNumToAtomMapRef->{$AtomNum};
630 if ($Atom->HasProperty('SpinMultiplicity')) {
631 carp "Warning: ${ClassName}->_ProcessChargeProperty: Setting formal charge on atom number, $AtomNum, with already assigned spin multiplicity value...";
632 }
633 $Atom->SetFormalCharge($Charge);
634 }
635 }
636
637 # Get charge property value for an atom...
638 sub _GetChargePropertyValue {
639 my($Atom) = @_;
640 my($Charge);
641
642 $Charge = 0;
643 if ($Atom->HasProperty('FormalCharge')) {
644 $Charge = $Atom->GetFormalCharge();
645 }
646 return $Charge;
647 }
648
649 # Process charge property value pairs...
650 sub _ProcessRadicalProperty {
651 my($ValuePairsRef, $AtomNumToAtomMapRef) = @_;
652
653 if (!(defined($ValuePairsRef) && @{$ValuePairsRef})) {
654 return;
655 }
656 my($Index, $ValuePairsCount, $AtomNum, $Radical, $SpinMultiplicity, $Atom);
657
658 $ValuePairsCount = scalar @{$ValuePairsRef};
659 VALUEPAIRS: for ($Index = 0; $Index < $ValuePairsCount; $Index +=2) {
660 $AtomNum = $ValuePairsRef->[$Index]; $Radical = $ValuePairsRef->[$Index + 1];
661 if (!$Radical) {
662 next VALUEPAIRS;
663 }
664 if (!exists $AtomNumToAtomMapRef->{$AtomNum}) {
665 next VALUEPAIRS;
666 }
667 $Atom = $AtomNumToAtomMapRef->{$AtomNum};
668 if ($Atom->HasProperty('FormalCharge')) {
669 carp "Warning: ${ClassName}->_ProcessRadicalProperty: Setting spin multiplicity on atom number, $AtomNum, with already assigned formal charge value...";
670 }
671 $SpinMultiplicity = SDFileUtil::MDLRadicalToInternalSpinMultiplicity($Radical);
672 $Atom->SetSpinMultiplicity($SpinMultiplicity);
673 }
674 }
675
676 # Get radical property value for an atom...
677 sub _GetRadicalPropertyValue {
678 my($Atom) = @_;
679 my($Radical, $SpinMultiplicity);
680
681 $Radical = 0;
682 if ($Atom->HasProperty('SpinMultiplicity')) {
683 $SpinMultiplicity = $Atom->GetSpinMultiplicity();
684 $Radical = SDFileUtil::InternalSpinMultiplicityToMDLRadical($SpinMultiplicity);
685 }
686 return $Radical;
687 }
688
689 # Process isotope property value pairs...
690 sub _ProcessIsotopeProperty {
691 my($ValuePairsRef, $AtomNumToAtomMapRef) = @_;
692
693 if (!(defined($ValuePairsRef) && @{$ValuePairsRef})) {
694 return;
695 }
696 my($Index, $ValuePairsCount, $AtomNum, $MassNumber, $Atom, $AtomicNumber);
697
698 $ValuePairsCount = scalar @{$ValuePairsRef};
699 VALUEPAIRS: for ($Index = 0; $Index < $ValuePairsCount; $Index +=2) {
700 $AtomNum = $ValuePairsRef->[$Index]; $MassNumber = $ValuePairsRef->[$Index + 1];
701 if (!$MassNumber) {
702 next VALUEPAIRS;
703 }
704 if (!exists $AtomNumToAtomMapRef->{$AtomNum}) {
705 next VALUEPAIRS;
706 }
707 $Atom = $AtomNumToAtomMapRef->{$AtomNum};
708 $AtomicNumber = $Atom->GetAtomicNumber();
709
710 if (!PeriodicTable::IsElementNaturalIsotopeMassNumber($AtomicNumber, $MassNumber)) {
711 my($AtomSymbol) = $Atom->GetAtomSymbol();
712 carp "Warning: ${ClassName}->_ProcessProcessIsotopeProperty: Unknown mass number, $MassNumber, specified on M ISO property line for atom number, $AtomNum, in SD for atom with atomic number, $AtomicNumber, and atomic symbol, $AtomSymbol. The mass number value has been assigned. Don't forget to Set ExactMass property explicitly; otherwise, GetExactMass method would return mass of most abundant isotope...\n";
713 }
714
715 # Use SetProperty method instead of SetMassNumber to skip explicit checks on MassNumber value...
716 $Atom->SetProperty('MassNumber', $MassNumber);
717 }
718 }
719
720 # Get isotope property value for an atom...
721 sub _GetIsotopePropertyValue {
722 my($Atom) = @_;
723 my($MassNumber);
724
725 $MassNumber = 0;
726 if ($Atom->HasProperty('MassNumber')) {
727 $MassNumber = $Atom->GetMassNumber();
728 }
729 return $MassNumber;
730 }
731
732 # Process atom alias property value pairs...
733 sub _ProcessAtomAliasProperty {
734 my($ValuePairsRef, $AtomNumToAtomMapRef) = @_;
735
736 if (!(defined($ValuePairsRef) && @{$ValuePairsRef})) {
737 return;
738 }
739 my($Index, $ValuePairsCount, $AtomNum, $AtomAlias, $Atom);
740
741 $ValuePairsCount = scalar @{$ValuePairsRef};
742 VALUEPAIRS: for ($Index = 0; $Index < $ValuePairsCount; $Index +=2) {
743 $AtomNum = $ValuePairsRef->[$Index]; $AtomAlias = $ValuePairsRef->[$Index + 1];
744 if (!$AtomNum) {
745 next VALUEPAIRS;
746 }
747 if (!exists $AtomNumToAtomMapRef->{$AtomNum}) {
748 next VALUEPAIRS;
749 }
750 $AtomAlias = TextUtil::RemoveLeadingAndTrailingWhiteSpaces($AtomAlias);
751 if (TextUtil::IsEmpty($AtomAlias)) {
752 carp("Warning: ${ClassName}->_ProcessAtomAliasProperty: Ignoring atom alias property line: No Atom alias value specified...");
753 next VALUEPAIRS;
754 }
755
756 # Set atom symbol to atom alias which sets atomic number automatically...
757 $Atom = $AtomNumToAtomMapRef->{$AtomNum};
758 $Atom->SetAtomSymbol($AtomAlias);
759
760 $Atom->SetProperty('AtomAlias', $AtomAlias);
761 }
762 }
763
764 # Get atom alias property value for an atom...
765 sub _GetAtomAliasPropertyValue {
766 my($Atom) = @_;
767 my($AtomAlias);
768
769 $AtomAlias = undef;
770 if ($Atom->HasProperty('AtomAlias')) {
771 $AtomAlias = $Atom->GetAtomAlias();
772 }
773 return $AtomAlias;
774 }
775
776 # Is it a MDLMolFileIO object?
777 sub _IsMDLMolFileIO {
778 my($Object) = @_;
779
780 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0;
781 }
782
783
784 1;
785
786 __END__
787
788 =head1 NAME
789
790 MDLMolFileIO
791
792 =head1 SYNOPSIS
793
794 use FileIO::MDLMolFileIO;
795
796 use FileIO::MDLMolFileIO qw(:all);
797
798 =head1 DESCRIPTION
799
800 B<MDLMolFIleIO> class provides the following methods:
801
802 new, GenerateMoleculeString, IsMDLMolFile, ParseMoleculeString, ReadMolecule,
803 ReadMoleculeString, WriteMolecule
804
805 The following methods can also be used as functions:
806
807 GenerateMoleculeString, IsMDLMolFile, ParseMoleculeString
808
809 Data specific to B<MDLMolFileIO> class not directly used by B<Molecule>, B<Atom> and
810 B<Bond> objects - data label/value pairs, atom SteroParity and so on - is associated to
811 and retrieved from appropriate objects using following methods:
812
813 SetMDL<PropertyName>
814 GetMDL<PropertyName>.
815
816 B<MDLMolFileIO> class is derived from I<FileIO> class and uses its methods to support
817 generic file related functionality.
818
819 =head2 METHODS
820
821 =over 4
822
823 =item B<new>
824
825 $NewMDLMolFileIO = new FileIO::MDLMolFileIO(%NamesAndValues);
826
827 Using specified I<MDLMolFileIO> property names and values hash, B<new> method creates a new object
828 and returns a reference to newly created B<MDLMolFileIO> object.
829
830 =item B<GenerateMoleculeString>
831
832 $MoleculeString = $MDLMolFileIO->GenerateMoleculeString($Molecule);
833 $MoleculeString = FileIO::MDLMolFileIO::GenerateMoleculeString($Molecule);
834
835 Returns a B<MoleculeString> in MDLMol format corresponding to I<Molecule>.
836
837 =item B<IsMDLMolFile>
838
839 $Status = $MDLMolFileIO->IsMDLMolFile($FileName);
840 $Status = FileIO::MDLMolFileIO::IsMDLMolFile($FileName);
841
842 Returns 1 or 0 based on whether I<FileName> is a MDLMol file.
843
844 =item B<ParseMoleculeString>
845
846 $Molecule = $MDLMolFileIO->ParseMoleculeString($MoleculeString);
847 $Molecule = FileIO::MDLMolFileIO::ParseMoleculeString($MoleculeString);
848
849 Parses I<MoleculeString> and returns a B<Molecule> object.
850
851 =item B<ReadMolecule>
852
853 $Molecule = $MDLMolFileIO->ReadMolecule($FileHandle);
854
855 Reads data for the compound in a file using already opened I<FileHandle>, creates,
856 and returns a B<Molecule> object.
857
858 =item B<ReadMoleculeString>
859
860 $MoleculeString = $MDLMolFileIO->ReadMoleculeString($FileHandle);
861
862 Reads data for the compound in a file using already opened I<FileHandle> and
863 returns a B<MoleculeString> corresponding to compound structure and other associated
864 data.
865
866 =item B<WriteMolecule>
867
868 $MDLMolFileIO->WriteMolecule($Molecule);
869
870 Writes I<Molecule> data to a file in MDLMol format and returns B<MDLMolFileIO>.
871
872 =back
873
874 =head1 AUTHOR
875
876 Manish Sud <msud@san.rr.com>
877
878 =head1 SEE ALSO
879
880 MoleculeFileIO.pm, SDFileIO.pm
881
882 =head1 COPYRIGHT
883
884 Copyright (C) 2015 Manish Sud. All rights reserved.
885
886 This file is part of MayaChemTools.
887
888 MayaChemTools is free software; you can redistribute it and/or modify it under
889 the terms of the GNU Lesser General Public License as published by the Free
890 Software Foundation; either version 3 of the License, or (at your option)
891 any later version.
892
893 =cut