0
|
1 <html>
|
|
2 <head>
|
|
3 <title>MayaChemTools:Code:SDFileUtil.pm</title>
|
|
4 <meta http-equiv="content-type" content="text/html;charset=utf-8">
|
|
5 <link rel="stylesheet" type="text/css" href="../../../css/MayaChemToolsCode.css">
|
|
6 </head>
|
|
7 <body leftmargin="20" rightmargin="20" topmargin="10" bottommargin="10">
|
|
8 <br/>
|
|
9 <center>
|
|
10 <a href="http://www.mayachemtools.org" title="MayaChemTools Home"><img src="../../../images/MayaChemToolsLogo.gif" border="0" alt="MayaChemTools"></a>
|
|
11 </center>
|
|
12 <br/>
|
|
13 <pre>
|
|
14 <a name="package-SDFileUtil-"></a> 1 <span class="k">package </span><span class="i">SDFileUtil</span><span class="sc">;</span>
|
|
15 2 <span class="c">#</span>
|
|
16 3 <span class="c"># $RCSfile: SDFileUtil.pm,v $</span>
|
|
17 4 <span class="c"># $Date: 2015/02/28 20:47:18 $</span>
|
|
18 5 <span class="c"># $Revision: 1.49 $</span>
|
|
19 6 <span class="c">#</span>
|
|
20 7 <span class="c"># Author: Manish Sud <msud@san.rr.com></span>
|
|
21 8 <span class="c">#</span>
|
|
22 9 <span class="c"># Copyright (C) 2015 Manish Sud. All rights reserved.</span>
|
|
23 10 <span class="c">#</span>
|
|
24 11 <span class="c"># This file is part of MayaChemTools.</span>
|
|
25 12 <span class="c">#</span>
|
|
26 13 <span class="c"># MayaChemTools is free software; you can redistribute it and/or modify it under</span>
|
|
27 14 <span class="c"># the terms of the GNU Lesser General Public License as published by the Free</span>
|
|
28 15 <span class="c"># Software Foundation; either version 3 of the License, or (at your option) any</span>
|
|
29 16 <span class="c"># later version.</span>
|
|
30 17 <span class="c">#</span>
|
|
31 18 <span class="c"># MayaChemTools is distributed in the hope that it will be useful, but without</span>
|
|
32 19 <span class="c"># any warranty; without even the implied warranty of merchantability of fitness</span>
|
|
33 20 <span class="c"># for a particular purpose. See the GNU Lesser General Public License for more</span>
|
|
34 21 <span class="c"># details.</span>
|
|
35 22 <span class="c">#</span>
|
|
36 23 <span class="c"># You should have received a copy of the GNU Lesser General Public License</span>
|
|
37 24 <span class="c"># along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or</span>
|
|
38 25 <span class="c"># write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,</span>
|
|
39 26 <span class="c"># Boston, MA, 02111-1307, USA.</span>
|
|
40 27 <span class="c">#</span>
|
|
41 28
|
|
42 29 <span class="k">use</span> <span class="w">strict</span><span class="sc">;</span>
|
|
43 30 <span class="k">use</span> <span class="w">Exporter</span><span class="sc">;</span>
|
|
44 31 <span class="k">use</span> <span class="w">Carp</span><span class="sc">;</span>
|
|
45 32 <span class="k">use</span> <span class="w">PeriodicTable</span> <span class="q">qw(IsElement)</span><span class="sc">;</span>
|
|
46 33 <span class="k">use</span> <span class="w">TimeUtil</span> <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
47 34
|
|
48 35 <span class="k">use</span> <span class="w">vars</span> <span class="q">qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS)</span><span class="sc">;</span>
|
|
49 36
|
|
50 37 <span class="i">@ISA</span> = <span class="q">qw(Exporter)</span><span class="sc">;</span>
|
|
51 38 <span class="i">@EXPORT</span> = <span class="q">qw(GenerateCmpdAtomLine GenerateCmpdBondLine GenerateCmpdChargePropertyLines GenerateCmpdCommentsLine GenerateCmpdCountsLine GenerateCmpdAtomAliasPropertyLines GenerateCmpdIsotopePropertyLines GenerateCmpdDataHeaderLabelsAndValuesLines GenerateCmpdMiscInfoLine GenerateCmpdRadicalPropertyLines GenerateCmpdMolNameLine GenerateEmptyCtabBlockLines GenerateMiscLineDateStamp GetAllAndCommonCmpdDataHeaderLabels GetCmpdDataHeaderLabels GetCmpdDataHeaderLabelsAndValues GetCmpdFragments GetCtabLinesCount GetUnknownAtoms GetInvalidAtomNumbers MDLChargeToInternalCharge InternalChargeToMDLCharge MDLBondTypeToInternalBondOrder InternalBondOrderToMDLBondType MDLBondStereoToInternalBondStereochemistry InternalBondStereochemistryToMDLBondStereo InternalSpinMultiplicityToMDLRadical MDLRadicalToInternalSpinMultiplicity IsCmpd3D IsCmpd2D ParseCmpdAtomLine ParseCmpdBondLine ParseCmpdCommentsLine ParseCmpdCountsLine ParseCmpdMiscInfoLine ParseCmpdMolNameLine ParseCmpdAtomAliasPropertyLine ParseCmpdChargePropertyLine ParseCmpdIsotopePropertyLine ParseCmpdRadicalPropertyLine ReadCmpdString RemoveCmpdDataHeaderLabelAndValue WashCmpd)</span><span class="sc">;</span>
|
|
52 39 <span class="i">@EXPORT_OK</span> = <span class="q">qw()</span><span class="sc">;</span>
|
|
53 40 <span class="i">%EXPORT_TAGS</span> = <span class="s">(</span><span class="w">all</span> <span class="cm">=></span> <span class="s">[</span><span class="i">@EXPORT</span><span class="cm">,</span> <span class="i">@EXPORT_OK</span><span class="s">]</span><span class="s">)</span><span class="sc">;</span>
|
|
54 41
|
|
55 42 <span class="c"># Format data for compounds count line...</span>
|
|
56 <a name="GenerateCmpdCountsLine-"></a> 43 <span class="k">sub </span><span class="m">GenerateCmpdCountsLine</span> <span class="s">{</span>
|
|
57 44 <span class="k">my</span><span class="s">(</span><span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="cm">,</span> <span class="i">$ChiralFlag</span><span class="cm">,</span> <span class="i">$PropertyCount</span><span class="cm">,</span> <span class="i">$Version</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span>
|
|
58 45
|
|
59 46 <span class="k">if</span> <span class="s">(</span><span class="i">@_</span> == <span class="n">5</span><span class="s">)</span> <span class="s">{</span>
|
|
60 47 <span class="s">(</span><span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="cm">,</span> <span class="i">$ChiralFlag</span><span class="cm">,</span> <span class="i">$PropertyCount</span><span class="cm">,</span> <span class="i">$Version</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
61 48 <span class="s">}</span>
|
|
62 49 <span class="k">elsif</span> <span class="s">(</span><span class="i">@_</span> == <span class="n">3</span><span class="s">)</span> <span class="s">{</span>
|
|
63 50 <span class="s">(</span><span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="cm">,</span> <span class="i">$ChiralFlag</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
64 51 <span class="i">$PropertyCount</span> = <span class="n">999</span><span class="sc">;</span>
|
|
65 52 <span class="i">$Version</span> = <span class="q">"V2000"</span><span class="sc">;</span>
|
|
66 53 <span class="s">}</span>
|
|
67 54 <span class="k">else</span> <span class="s">{</span>
|
|
68 55 <span class="s">(</span><span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
69 56 <span class="i">$ChiralFlag</span> = <span class="n">0</span><span class="sc">;</span>
|
|
70 57 <span class="i">$PropertyCount</span> = <span class="n">999</span><span class="sc">;</span>
|
|
71 58 <span class="i">$Version</span> = <span class="q">"V2000"</span><span class="sc">;</span>
|
|
72 59 <span class="s">}</span>
|
|
73 60 <span class="k">if</span> <span class="s">(</span><span class="i">$AtomCount</span> > <span class="n">999</span><span class="s">)</span> <span class="s">{</span>
|
|
74 61 <span class="w">croak</span> <span class="q">"Error: SDFileUtil::GenerateCmpdCountsLine: The atom count, $AtomCount, exceeds maximum of 999 allowed for CTAB version 2000. The Extended Connection Table (V3000) format in MDL MOL and SD files is not supported by the current release of MayaChemTools..."</span><span class="sc">;</span>
|
|
75 62 <span class="s">}</span>
|
|
76 63 <span class="i">$Line</span> = <span class="k">sprintf</span> <span class="q">"%3i%3i%3i%3i%3i%3i%3i%3i%3i%3i%3i%6s"</span><span class="cm">,</span> <span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="cm">,</span> <span class="n">0</span><span class="cm">,</span> <span class="n">0</span><span class="cm">,</span> <span class="i">$ChiralFlag</span><span class="cm">,</span> <span class="n">0</span><span class="cm">,</span> <span class="n">0</span><span class="cm">,</span> <span class="n">0</span><span class="cm">,</span> <span class="n">0</span><span class="cm">,</span> <span class="n">0</span><span class="cm">,</span> <span class="i">$PropertyCount</span><span class="cm">,</span> <span class="i">$Version</span><span class="sc">;</span>
|
|
77 64
|
|
78 65 <span class="k">return</span> <span class="s">(</span><span class="i">$Line</span><span class="s">)</span><span class="sc">;</span>
|
|
79 66 <span class="s">}</span>
|
|
80 67
|
|
81 68 <span class="c"># Generate comments line...</span>
|
|
82 <a name="GenerateCmpdCommentsLine-"></a> 69 <span class="k">sub </span><span class="m">GenerateCmpdCommentsLine</span> <span class="s">{</span>
|
|
83 70 <span class="k">my</span><span class="s">(</span><span class="i">$Comments</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
84 71 <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span><span class="sc">;</span>
|
|
85 72
|
|
86 73 <span class="i">$Line</span> = <span class="s">(</span><span class="k">length</span><span class="s">(</span><span class="i">$Comments</span><span class="s">)</span> > <span class="n">80</span><span class="s">)</span> ? <span class="k">substr</span><span class="s">(</span><span class="i">$Comments</span><span class="cm">,</span> <span class="n">0</span><span class="cm">,</span> <span class="n">80</span><span class="s">)</span> <span class="co">:</span> <span class="i">$Comments</span><span class="sc">;</span>
|
|
87 74
|
|
88 75 <span class="k">return</span> <span class="i">$Line</span><span class="sc">;</span>
|
|
89 76 <span class="s">}</span>
|
|
90 77
|
|
91 78 <span class="c"># Generate molname line...</span>
|
|
92 <a name="GenerateCmpdMolNameLine-"></a> 79 <span class="k">sub </span><span class="m">GenerateCmpdMolNameLine</span> <span class="s">{</span>
|
|
93 80 <span class="k">my</span><span class="s">(</span><span class="i">$MolName</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
94 81 <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span><span class="sc">;</span>
|
|
95 82
|
|
96 83 <span class="i">$Line</span> = <span class="s">(</span><span class="k">length</span><span class="s">(</span><span class="i">$MolName</span><span class="s">)</span> > <span class="n">80</span><span class="s">)</span> ? <span class="k">substr</span><span class="s">(</span><span class="i">$MolName</span><span class="cm">,</span> <span class="n">0</span><span class="cm">,</span> <span class="n">80</span><span class="s">)</span> <span class="co">:</span> <span class="i">$MolName</span><span class="sc">;</span>
|
|
97 84
|
|
98 85 <span class="k">return</span> <span class="i">$Line</span><span class="sc">;</span>
|
|
99 86 <span class="s">}</span>
|
|
100 87
|
|
101 88 <span class="c"># Generate data for compounds misc info line...</span>
|
|
102 <a name="GenerateCmpdMiscInfoLine-"></a> 89 <span class="k">sub </span><span class="m">GenerateCmpdMiscInfoLine</span> <span class="s">{</span>
|
|
103 90 <span class="k">my</span><span class="s">(</span><span class="i">$ProgramName</span><span class="cm">,</span> <span class="i">$UserInitial</span><span class="cm">,</span> <span class="i">$Code</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
104 91 <span class="k">my</span><span class="s">(</span><span class="i">$Date</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span>
|
|
105 92
|
|
106 93 <span class="k">if</span> <span class="s">(</span>!<span class="s">(</span><span class="k">defined</span><span class="s">(</span><span class="i">$ProgramName</span><span class="s">)</span> && <span class="i">$ProgramName</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span>
|
|
107 94 <span class="i">$ProgramName</span> = <span class="q">"MayaChem"</span><span class="sc">;</span>
|
|
108 95 <span class="s">}</span>
|
|
109 96 <span class="k">if</span> <span class="s">(</span>!<span class="s">(</span><span class="k">defined</span><span class="s">(</span><span class="i">$UserInitial</span><span class="s">)</span> && <span class="i">$UserInitial</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span>
|
|
110 97 <span class="i">$UserInitial</span> = <span class="q">" "</span><span class="sc">;</span>
|
|
111 98 <span class="s">}</span>
|
|
112 99 <span class="k">if</span> <span class="s">(</span>!<span class="s">(</span><span class="k">defined</span><span class="s">(</span><span class="i">$Code</span><span class="s">)</span> && <span class="i">$Code</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span>
|
|
113 100 <span class="i">$Code</span> = <span class="q">"2D"</span><span class="sc">;</span>
|
|
114 101 <span class="s">}</span>
|
|
115 102
|
|
116 103 <span class="k">if</span> <span class="s">(</span><span class="k">length</span><span class="s">(</span><span class="i">$ProgramName</span><span class="s">)</span> > <span class="n">8</span><span class="s">)</span> <span class="s">{</span>
|
|
117 104 <span class="i">$ProgramName</span> = <span class="k">substr</span><span class="s">(</span><span class="i">$ProgramName</span><span class="cm">,</span> <span class="n">0</span><span class="cm">,</span> <span class="n">8</span><span class="s">)</span><span class="sc">;</span>
|
|
118 105 <span class="s">}</span>
|
|
119 106 <span class="k">if</span> <span class="s">(</span><span class="k">length</span><span class="s">(</span><span class="i">$UserInitial</span><span class="s">)</span> > <span class="n">2</span><span class="s">)</span> <span class="s">{</span>
|
|
120 107 <span class="i">$UserInitial</span> = <span class="k">substr</span><span class="s">(</span><span class="i">$UserInitial</span><span class="cm">,</span> <span class="n">0</span><span class="cm">,</span> <span class="n">2</span><span class="s">)</span><span class="sc">;</span>
|
|
121 108 <span class="s">}</span>
|
|
122 109 <span class="k">if</span> <span class="s">(</span><span class="k">length</span><span class="s">(</span><span class="i">$Code</span><span class="s">)</span> > <span class="n">2</span><span class="s">)</span> <span class="s">{</span>
|
|
123 110 <span class="i">$Code</span> = <span class="k">substr</span><span class="s">(</span><span class="i">$Code</span><span class="cm">,</span> <span class="n">0</span><span class="cm">,</span> <span class="n">2</span><span class="s">)</span><span class="sc">;</span>
|
|
124 111 <span class="s">}</span>
|
|
125 112 <span class="i">$Date</span> = <span class="i">GenerateMiscLineDateStamp</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
126 113
|
|
127 114 <span class="i">$Line</span> = <span class="q">"${UserInitial}${ProgramName}${Date}${Code}"</span><span class="sc">;</span>
|
|
128 115
|
|
129 116 <span class="k">return</span> <span class="i">$Line</span><span class="sc">;</span>
|
|
130 117 <span class="s">}</span>
|
|
131 118
|
|
132 119 <span class="c"># Generate data for compounds misc info line...</span>
|
|
133 <a name="GenerateEmptyCtabBlockLines-"></a> 120 <span class="k">sub </span><span class="m">GenerateEmptyCtabBlockLines</span> <span class="s">{</span>
|
|
134 121 <span class="k">my</span><span class="s">(</span><span class="i">$Date</span><span class="cm">,</span> <span class="i">$Lines</span><span class="s">)</span><span class="sc">;</span>
|
|
135 122
|
|
136 123 <span class="k">if</span> <span class="s">(</span><span class="i">@_</span> == <span class="n">1</span><span class="s">)</span> <span class="s">{</span>
|
|
137 124 <span class="s">(</span><span class="i">$Date</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
138 125 <span class="s">}</span>
|
|
139 126 <span class="k">else</span> <span class="s">{</span>
|
|
140 127 <span class="i">$Date</span> = <span class="i">GenerateMiscLineDateStamp</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
141 128 <span class="s">}</span>
|
|
142 129 <span class="c"># First line: Blank molname line...</span>
|
|
143 130 <span class="c"># Second line: Misc info...</span>
|
|
144 131 <span class="c"># Third line: Blank comments line...</span>
|
|
145 132 <span class="c"># Fourth line: Counts line reflecting empty structure data block...</span>
|
|
146 133 <span class="i">$Lines</span> = <span class="q">"\n"</span><span class="sc">;</span>
|
|
147 134 <span class="i">$Lines</span> .= <span class="q">" MayaChem${Date}2D\n"</span><span class="sc">;</span>
|
|
148 135 <span class="i">$Lines</span> .= <span class="q">"\n"</span><span class="sc">;</span>
|
|
149 136 <span class="i">$Lines</span> .= <span class="i">GenerateCmpdCountsLine</span><span class="s">(</span><span class="n">0</span><span class="cm">,</span> <span class="n">0</span><span class="cm">,</span> <span class="n">0</span><span class="s">)</span> . <span class="q">"\n"</span><span class="sc">;</span>
|
|
150 137 <span class="i">$Lines</span> .= <span class="q">"M END"</span><span class="sc">;</span>
|
|
151 138
|
|
152 139 <span class="k">return</span> <span class="i">$Lines</span><span class="sc">;</span>
|
|
153 140 <span class="s">}</span>
|
|
154 141
|
|
155 142 <span class="c"># Generate SD file data stamp...</span>
|
|
156 <a name="GenerateMiscLineDateStamp-"></a> 143 <span class="k">sub </span><span class="m">GenerateMiscLineDateStamp</span> <span class="s">{</span>
|
|
157 144 <span class="k">return</span> <span class="i">TimeUtil::SDFileTimeStamp</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
158 145 <span class="s">}</span>
|
|
159 146
|
|
160 147 <span class="c"># Generate data for compound atom line...</span>
|
|
161 148 <span class="c">#</span>
|
|
162 <a name="GenerateCmpdAtomLine-"></a> 149 <span class="k">sub </span><span class="m">GenerateCmpdAtomLine</span> <span class="s">{</span>
|
|
163 150 <span class="k">my</span><span class="s">(</span><span class="i">$AtomSymbol</span><span class="cm">,</span> <span class="i">$AtomX</span><span class="cm">,</span> <span class="i">$AtomY</span><span class="cm">,</span> <span class="i">$AtomZ</span><span class="cm">,</span> <span class="i">$MassDifference</span><span class="cm">,</span> <span class="i">$Charge</span><span class="cm">,</span> <span class="i">$StereoParity</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
164 151 <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span><span class="sc">;</span>
|
|
165 152
|
|
166 153 <span class="k">if</span> <span class="s">(</span>!<span class="k">defined</span> <span class="i">$MassDifference</span><span class="s">)</span> <span class="s">{</span>
|
|
167 154 <span class="i">$MassDifference</span> = <span class="n">0</span><span class="sc">;</span>
|
|
168 155 <span class="s">}</span>
|
|
169 156 <span class="k">if</span> <span class="s">(</span>!<span class="k">defined</span> <span class="i">$Charge</span><span class="s">)</span> <span class="s">{</span>
|
|
170 157 <span class="i">$Charge</span> = <span class="n">0</span><span class="sc">;</span>
|
|
171 158 <span class="s">}</span>
|
|
172 159 <span class="k">if</span> <span class="s">(</span>!<span class="k">defined</span> <span class="i">$StereoParity</span><span class="s">)</span> <span class="s">{</span>
|
|
173 160 <span class="i">$StereoParity</span> = <span class="n">0</span><span class="sc">;</span>
|
|
174 161 <span class="s">}</span>
|
|
175 162 <span class="i">$Line</span> = <span class="k">sprintf</span> <span class="q">"%10.4f%10.4f%10.4f %-3s%2i%3i%3i 0 0 0 0 0 0 0 0 0"</span><span class="cm">,</span> <span class="i">$AtomX</span><span class="cm">,</span> <span class="i">$AtomY</span><span class="cm">,</span> <span class="i">$AtomZ</span><span class="cm">,</span> <span class="i">$AtomSymbol</span><span class="cm">,</span> <span class="i">$MassDifference</span><span class="cm">,</span> <span class="i">$Charge</span><span class="cm">,</span> <span class="i">$StereoParity</span><span class="sc">;</span>
|
|
176 163
|
|
177 164 <span class="k">return</span> <span class="i">$Line</span>
|
|
178 165 <span class="s">}</span>
|
|
179 166
|
|
180 167 <span class="c"># Generate data for compound bond line...</span>
|
|
181 168 <span class="c">#</span>
|
|
182 <a name="GenerateCmpdBondLine-"></a> 169 <span class="k">sub </span><span class="m">GenerateCmpdBondLine</span> <span class="s">{</span>
|
|
183 170 <span class="k">my</span><span class="s">(</span><span class="i">$FirstAtomNum</span><span class="cm">,</span> <span class="i">$SecondAtomNum</span><span class="cm">,</span> <span class="i">$BondType</span><span class="cm">,</span> <span class="i">$BondStereo</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
184 171 <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span><span class="sc">;</span>
|
|
185 172
|
|
186 173 <span class="k">if</span> <span class="s">(</span>!<span class="k">defined</span> <span class="i">$BondStereo</span><span class="s">)</span> <span class="s">{</span>
|
|
187 174 <span class="i">$BondStereo</span> = <span class="n">0</span><span class="sc">;</span>
|
|
188 175 <span class="s">}</span>
|
|
189 176 <span class="i">$Line</span> = <span class="k">sprintf</span> <span class="q">"%3i%3i%3i%3i 0 0 0"</span><span class="cm">,</span> <span class="i">$FirstAtomNum</span><span class="cm">,</span> <span class="i">$SecondAtomNum</span><span class="cm">,</span> <span class="i">$BondType</span><span class="cm">,</span> <span class="i">$BondStereo</span><span class="sc">;</span>
|
|
190 177
|
|
191 178 <span class="k">return</span> <span class="i">$Line</span>
|
|
192 179 <span class="s">}</span>
|
|
193 180
|
|
194 181 <span class="c"># Generate charge property lines for CTAB block...</span>
|
|
195 182 <span class="c">#</span>
|
|
196 <a name="GenerateCmpdChargePropertyLines-"></a> 183 <span class="k">sub </span><span class="m">GenerateCmpdChargePropertyLines</span> <span class="s">{</span>
|
|
197 184 <span class="k">my</span><span class="s">(</span><span class="i">$ChargeValuePairsRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
198 185
|
|
199 186 <span class="k">return</span> <span class="i">_GenerateCmpdGenericPropertyLines</span><span class="s">(</span><span class="q">'Charge'</span><span class="cm">,</span> <span class="i">$ChargeValuePairsRef</span><span class="s">)</span><span class="sc">;</span>
|
|
200 187 <span class="s">}</span>
|
|
201 188
|
|
202 189 <span class="c"># Generate isotope property lines for CTAB block...</span>
|
|
203 190 <span class="c">#</span>
|
|
204 <a name="GenerateCmpdIsotopePropertyLines-"></a> 191 <span class="k">sub </span><span class="m">GenerateCmpdIsotopePropertyLines</span> <span class="s">{</span>
|
|
205 192 <span class="k">my</span><span class="s">(</span><span class="i">$IsotopeValuePairsRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
206 193
|
|
207 194 <span class="k">return</span> <span class="i">_GenerateCmpdGenericPropertyLines</span><span class="s">(</span><span class="q">'Isotope'</span><span class="cm">,</span> <span class="i">$IsotopeValuePairsRef</span><span class="s">)</span><span class="sc">;</span>
|
|
208 195 <span class="s">}</span>
|
|
209 196
|
|
210 197 <span class="c"># Generate radical property line property lines for CTAB block...</span>
|
|
211 198 <span class="c">#</span>
|
|
212 <a name="GenerateCmpdRadicalPropertyLines-"></a> 199 <span class="k">sub </span><span class="m">GenerateCmpdRadicalPropertyLines</span> <span class="s">{</span>
|
|
213 200 <span class="k">my</span><span class="s">(</span><span class="i">$RadicalValuePairsRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
214 201
|
|
215 202 <span class="k">return</span> <span class="i">_GenerateCmpdGenericPropertyLines</span><span class="s">(</span><span class="q">'Radical'</span><span class="cm">,</span> <span class="i">$RadicalValuePairsRef</span><span class="s">)</span><span class="sc">;</span>
|
|
216 203 <span class="s">}</span>
|
|
217 204
|
|
218 205 <span class="c"># Generate atom alias property line property lines for CTAB block...</span>
|
|
219 206 <span class="c">#</span>
|
|
220 207 <span class="c"># Atom alias property line format:</span>
|
|
221 208 <span class="c">#</span>
|
|
222 209 <span class="c"># A aaa</span>
|
|
223 210 <span class="c"># x...</span>
|
|
224 211 <span class="c">#</span>
|
|
225 212 <span class="c"># aaa: Atom number</span>
|
|
226 213 <span class="c"># x: Atom alias in next line</span>
|
|
227 214 <span class="c">#</span>
|
|
228 <a name="GenerateCmpdAtomAliasPropertyLines-"></a> 215 <span class="k">sub </span><span class="m">GenerateCmpdAtomAliasPropertyLines</span> <span class="s">{</span>
|
|
229 216 <span class="k">my</span><span class="s">(</span><span class="i">$PropertyValuePairsRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
230 217 <span class="k">my</span><span class="s">(</span><span class="i">$Index</span><span class="cm">,</span> <span class="i">$AtomNum</span><span class="cm">,</span> <span class="i">$AtomAlias</span><span class="cm">,</span> <span class="i">$Line</span><span class="cm">,</span> <span class="i">@PropertyLines</span><span class="s">)</span><span class="sc">;</span>
|
|
231 218
|
|
232 219 <span class="i">@PropertyLines</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
233 220
|
|
234 221 <span class="k">for</span> <span class="s">(</span><span class="i">$Index</span> = <span class="n">0</span><span class="sc">;</span> <span class="i">$Index</span> < <span class="i">$#</span>{<span class="i">$PropertyValuePairsRef</span>}<span class="sc">;</span> <span class="i">$Index</span> += <span class="n">2</span><span class="s">)</span> <span class="s">{</span>
|
|
235 222 <span class="i">$AtomNum</span> = <span class="i">$PropertyValuePairsRef</span>->[<span class="i">$Index</span>]<span class="sc">;</span>
|
|
236 223 <span class="i">$AtomAlias</span> = <span class="i">$PropertyValuePairsRef</span>->[<span class="i">$Index</span> + <span class="n">1</span>]<span class="sc">;</span>
|
|
237 224
|
|
238 225 <span class="i">$Line</span> = <span class="q">"A "</span> . <span class="k">sprintf</span> <span class="q">"%3i"</span><span class="cm">,</span> <span class="i">$AtomNum</span><span class="sc">;</span>
|
|
239 226
|
|
240 227 <span class="k">push</span> <span class="i">@PropertyLines</span><span class="cm">,</span> <span class="i">$Line</span><span class="sc">;</span>
|
|
241 228 <span class="k">push</span> <span class="i">@PropertyLines</span><span class="cm">,</span> <span class="i">$AtomAlias</span><span class="sc">;</span>
|
|
242 229 <span class="s">}</span>
|
|
243 230
|
|
244 231 <span class="k">return</span> <span class="i">@PropertyLines</span><span class="sc">;</span>
|
|
245 232 <span class="s">}</span>
|
|
246 233
|
|
247 234 <span class="c"># Generate data header labels and values lines...</span>
|
|
248 235 <span class="c">#</span>
|
|
249 <a name="GenerateCmpdDataHeaderLabelsAndValuesLines-"></a> 236 <span class="k">sub </span><span class="m">GenerateCmpdDataHeaderLabelsAndValuesLines</span> <span class="s">{</span>
|
|
250 237 <span class="k">my</span><span class="s">(</span><span class="i">$DataHeaderLabelsRef</span><span class="cm">,</span> <span class="i">$DataHeaderLabelsAndValuesRef</span><span class="cm">,</span> <span class="i">$SortDataLabels</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
251 238 <span class="k">my</span><span class="s">(</span><span class="i">$DataLabel</span><span class="cm">,</span> <span class="i">$DataValue</span><span class="cm">,</span> <span class="i">@DataLabels</span><span class="cm">,</span> <span class="i">@DataLines</span><span class="s">)</span><span class="sc">;</span>
|
|
252 239
|
|
253 240 <span class="k">if</span> <span class="s">(</span>!<span class="k">defined</span> <span class="i">$SortDataLabels</span><span class="s">)</span> <span class="s">{</span>
|
|
254 241 <span class="i">$SortDataLabels</span> = <span class="n">0</span><span class="sc">;</span>
|
|
255 242 <span class="s">}</span>
|
|
256 243
|
|
257 244 <span class="i">@DataLines</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
258 245 <span class="i">@DataLabels</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
259 246 <span class="k">if</span> <span class="s">(</span><span class="i">$SortDataLabels</span><span class="s">)</span> <span class="s">{</span>
|
|
260 247 <span class="k">push</span> <span class="i">@DataLabels</span><span class="cm">,</span> <span class="k">sort</span> <span class="i">@</span>{<span class="i">$DataHeaderLabelsRef</span>}<span class="sc">;</span>
|
|
261 248 <span class="s">}</span>
|
|
262 249 <span class="k">else</span> <span class="s">{</span>
|
|
263 250 <span class="k">push</span> <span class="i">@DataLabels</span><span class="cm">,</span> <span class="i">@</span>{<span class="i">$DataHeaderLabelsRef</span>}<span class="sc">;</span>
|
|
264 251 <span class="s">}</span>
|
|
265 252 <span class="k">for</span> <span class="i">$DataLabel</span> <span class="s">(</span><span class="i">@DataLabels</span><span class="s">)</span> <span class="s">{</span>
|
|
266 253 <span class="i">$DataValue</span> = <span class="q">''</span><span class="sc">;</span>
|
|
267 254 <span class="k">if</span> <span class="s">(</span><span class="k">exists</span> <span class="i">$DataHeaderLabelsAndValuesRef</span>->{<span class="i">$DataLabel</span>}<span class="s">)</span> <span class="s">{</span>
|
|
268 255 <span class="i">$DataValue</span> = <span class="i">$DataHeaderLabelsAndValuesRef</span>->{<span class="i">$DataLabel</span>}<span class="sc">;</span>
|
|
269 256 <span class="s">}</span>
|
|
270 257 <span class="k">push</span> <span class="i">@DataLines</span><span class="cm">,</span> <span class="s">(</span><span class="q">"> <${DataLabel}>"</span><span class="cm">,</span> <span class="q">"$DataValue"</span><span class="cm">,</span> <span class="q">""</span><span class="s">)</span><span class="sc">;</span>
|
|
271 258 <span class="s">}</span>
|
|
272 259 <span class="k">return</span> <span class="i">@DataLines</span><span class="sc">;</span>
|
|
273 260 <span class="s">}</span>
|
|
274 261
|
|
275 262 <span class="c"># Parse data field header in SD file and return lists of all and common data field</span>
|
|
276 263 <span class="c"># labels.</span>
|
|
277 <a name="GetAllAndCommonCmpdDataHeaderLabels-"></a> 264 <span class="k">sub </span><span class="m">GetAllAndCommonCmpdDataHeaderLabels</span> <span class="s">{</span>
|
|
278 265 <span class="k">my</span><span class="s">(</span><span class="i">$SDFileRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
279 266 <span class="k">my</span><span class="s">(</span><span class="i">$CmpdCount</span><span class="cm">,</span> <span class="i">$CmpdString</span><span class="cm">,</span> <span class="i">$Label</span><span class="cm">,</span> <span class="i">@CmpdLines</span><span class="cm">,</span> <span class="i">@DataFieldLabels</span><span class="cm">,</span> <span class="i">@CommonDataFieldLabels</span><span class="cm">,</span> <span class="i">%DataFieldLabelsMap</span><span class="s">)</span><span class="sc">;</span>
|
|
280 267
|
|
281 268 <span class="i">$CmpdCount</span> = <span class="n">0</span><span class="sc">;</span>
|
|
282 269 <span class="i">@DataFieldLabels</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
283 270 <span class="i">@CommonDataFieldLabels</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
284 271 <span class="i">%DataFieldLabelsMap</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
285 272
|
|
286 273 <span class="k">while</span> <span class="s">(</span><span class="i">$CmpdString</span> = <span class="i">ReadCmpdString</span><span class="s">(</span><span class="i">$SDFileRef</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span>
|
|
287 274 <span class="i">$CmpdCount</span>++<span class="sc">;</span>
|
|
288 275 <span class="i">@CmpdLines</span> = <span class="k">split</span> <span class="q">"\n"</span><span class="cm">,</span> <span class="i">$CmpdString</span><span class="sc">;</span>
|
|
289 276 <span class="c"># Process compound data header labels and figure out which ones are present for</span>
|
|
290 277 <span class="c"># all the compounds...</span>
|
|
291 278 <span class="k">if</span> <span class="s">(</span><span class="i">@DataFieldLabels</span><span class="s">)</span> <span class="s">{</span>
|
|
292 279 <span class="k">my</span> <span class="s">(</span><span class="i">@CmpdDataFieldLabels</span><span class="s">)</span> = <span class="i">GetCmpdDataHeaderLabels</span><span class="s">(</span>\<span class="i">@CmpdLines</span><span class="s">)</span><span class="sc">;</span>
|
|
293 280 <span class="k">my</span><span class="s">(</span><span class="i">%CmpdDataFieldLabelsMap</span><span class="s">)</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
294 281 <span class="c"># Setup a map for the current labels...</span>
|
|
295 282 <span class="k">for</span> <span class="i">$Label</span> <span class="s">(</span><span class="i">@CmpdDataFieldLabels</span><span class="s">)</span> <span class="s">{</span>
|
|
296 283 <span class="i">$CmpdDataFieldLabelsMap</span>{<span class="i">$Label</span>} = <span class="q">"PresentInSome"</span><span class="sc">;</span>
|
|
297 284 <span class="s">}</span>
|
|
298 285 <span class="c"># Check the presence old labels for this compound; otherwise, mark 'em new...</span>
|
|
299 286 <span class="k">for</span> <span class="i">$Label</span> <span class="s">(</span><span class="i">@DataFieldLabels</span><span class="s">)</span> <span class="s">{</span>
|
|
300 287 <span class="k">if</span> <span class="s">(</span>!<span class="i">$CmpdDataFieldLabelsMap</span>{<span class="i">$Label</span>}<span class="s">)</span> <span class="s">{</span>
|
|
301 288 <span class="i">$DataFieldLabelsMap</span>{<span class="i">$Label</span>} = <span class="q">"PresentInSome"</span><span class="sc">;</span>
|
|
302 289 <span class="s">}</span>
|
|
303 290 <span class="s">}</span>
|
|
304 291 <span class="c"># Check the presence this compound in the old labels; otherwise, add 'em...</span>
|
|
305 292 <span class="k">for</span> <span class="i">$Label</span> <span class="s">(</span><span class="i">@CmpdDataFieldLabels</span> <span class="s">)</span> <span class="s">{</span>
|
|
306 293 <span class="k">if</span> <span class="s">(</span>!<span class="i">$DataFieldLabelsMap</span>{<span class="i">$Label</span>}<span class="s">)</span> <span class="s">{</span>
|
|
307 294 <span class="c"># It's a new label...</span>
|
|
308 295 <span class="k">push</span> <span class="i">@DataFieldLabels</span><span class="cm">,</span> <span class="i">$Label</span><span class="sc">;</span>
|
|
309 296 <span class="i">$DataFieldLabelsMap</span>{<span class="i">$Label</span>} = <span class="q">"PresentInSome"</span><span class="sc">;</span>
|
|
310 297 <span class="s">}</span>
|
|
311 298 <span class="s">}</span>
|
|
312 299 <span class="s">}</span>
|
|
313 300 <span class="k">else</span> <span class="s">{</span>
|
|
314 301 <span class="c"># Get the initial label set and set up a map...</span>
|
|
315 302 <span class="i">@DataFieldLabels</span> = <span class="i">GetCmpdDataHeaderLabels</span><span class="s">(</span>\<span class="i">@CmpdLines</span><span class="s">)</span><span class="sc">;</span>
|
|
316 303 <span class="k">for</span> <span class="i">$Label</span> <span class="s">(</span><span class="i">@DataFieldLabels</span><span class="s">)</span> <span class="s">{</span>
|
|
317 304 <span class="i">$DataFieldLabelsMap</span>{<span class="i">$Label</span>} = <span class="q">"PresentInAll"</span><span class="sc">;</span>
|
|
318 305 <span class="s">}</span>
|
|
319 306 <span class="s">}</span>
|
|
320 307 <span class="s">}</span>
|
|
321 308 <span class="c"># Identify the common data field labels...</span>
|
|
322 309 <span class="i">@CommonDataFieldLabels</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
323 310 <span class="k">for</span> <span class="i">$Label</span> <span class="s">(</span><span class="i">@DataFieldLabels</span><span class="s">)</span> <span class="s">{</span>
|
|
324 311 <span class="k">if</span> <span class="s">(</span><span class="i">$DataFieldLabelsMap</span>{<span class="i">$Label</span>} <span class="k">eq</span> <span class="q">"PresentInAll"</span><span class="s">)</span> <span class="s">{</span>
|
|
325 312 <span class="k">push</span> <span class="i">@CommonDataFieldLabels</span><span class="cm">,</span> <span class="i">$Label</span><span class="sc">;</span>
|
|
326 313 <span class="s">}</span>
|
|
327 314 <span class="s">}</span>
|
|
328 315 <span class="k">return</span> <span class="s">(</span><span class="i">$CmpdCount</span><span class="cm">,</span> \<span class="i">@DataFieldLabels</span><span class="cm">,</span> \<span class="i">@CommonDataFieldLabels</span><span class="s">)</span><span class="sc">;</span>
|
|
329 316 <span class="s">}</span>
|
|
330 317
|
|
331 318 <span class="c"># Parse all the data header labels and return 'em as an list...</span>
|
|
332 319 <span class="c">#</span>
|
|
333 320 <span class="c"># Format:</span>
|
|
334 321 <span class="c">#</span>
|
|
335 322 <span class="c">#> Data header line</span>
|
|
336 323 <span class="c">#Data line(s)</span>
|
|
337 324 <span class="c">#Blank line</span>
|
|
338 325 <span class="c">#</span>
|
|
339 326 <span class="c"># [Data Header] (one line) precedes each item of data, starts with a greater than (>) sign, and</span>
|
|
340 327 <span class="c"># contains at least one of the following:</span>
|
|
341 328 <span class="c"># The field name enclosed in angle brackets. For example: <melting.point></span>
|
|
342 329 <span class="c"># The field number, DTn , where n represents the number assigned to the field in a MACCS-II database</span>
|
|
343 330 <span class="c">#</span>
|
|
344 331 <span class="c">#Optional information for the data header includes:</span>
|
|
345 332 <span class="c"># The compound’s external and internal registry numbers. External registry numbers must be enclosed in parentheses.</span>
|
|
346 333 <span class="c"># Any combination of information</span>
|
|
347 334 <span class="c">#</span>
|
|
348 335 <span class="c">#The following are examples of valid data headers:</span>
|
|
349 336 <span class="c">#> <MELTING.POINT></span>
|
|
350 337 <span class="c">#> 55 (MD-08974) <BOILING.POINT> DT12</span>
|
|
351 338 <span class="c">#> DT12 55</span>
|
|
352 339 <span class="c">#> (MD-0894) <BOILING.POINT> FROM ARCHIVES</span>
|
|
353 340 <span class="c">#</span>
|
|
354 341 <span class="c">#Notes: Sometimes last blank line is missing and can be just followed by $$$$</span>
|
|
355 342 <span class="c">#</span>
|
|
356 <a name="GetCmpdDataHeaderLabels-"></a> 343 <span class="k">sub </span><span class="m">GetCmpdDataHeaderLabels</span> <span class="s">{</span>
|
|
357 344 <span class="k">my</span><span class="s">(</span><span class="i">$CmpdLines</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
358 345 <span class="k">my</span><span class="s">(</span><span class="i">$CmpdLine</span><span class="cm">,</span> <span class="i">$Label</span><span class="cm">,</span> <span class="i">@Labels</span><span class="s">)</span><span class="sc">;</span>
|
|
359 346
|
|
360 347 <span class="i">@Labels</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
361 348 <span class="j">CMPDLINE:</span> <span class="k">for</span> <span class="i">$CmpdLine</span> <span class="s">(</span><span class="i">@$CmpdLines</span><span class="s">)</span> <span class="s">{</span>
|
|
362 349 <span class="k">if</span> <span class="s">(</span><span class="i">$CmpdLine</span> !~ <span class="q">/^>/</span><span class="s">)</span> <span class="s">{</span>
|
|
363 350 <span class="k">next</span> <span class="j">CMPDLINE</span><span class="sc">;</span>
|
|
364 351 <span class="s">}</span>
|
|
365 352 <span class="c"># Does the line contains field name enclosed in angular brackets?</span>
|
|
366 353 <span class="s">(</span><span class="i">$Label</span><span class="s">)</span> = <span class="i">$CmpdLine</span> =~ <span class="q">/<.*?>/g</span><span class="sc">;</span>
|
|
367 354 <span class="k">if</span> <span class="s">(</span>!<span class="k">defined</span><span class="s">(</span><span class="i">$Label</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span>
|
|
368 355 <span class="k">next</span> <span class="j">CMPDLINE</span><span class="sc">;</span>
|
|
369 356 <span class="s">}</span>
|
|
370 357 <span class="i">$Label</span> =~ <span class="q">s/(<|>)//g</span><span class="sc">;</span>
|
|
371 358 <span class="k">push</span> <span class="i">@Labels</span><span class="cm">,</span> <span class="i">$Label</span><span class="sc">;</span>
|
|
372 359 <span class="s">}</span>
|
|
373 360 <span class="k">return</span> <span class="s">(</span><span class="i">@Labels</span><span class="s">)</span><span class="sc">;</span>
|
|
374 361 <span class="s">}</span>
|
|
375 362
|
|
376 363 <span class="c"># Parse all the data header labels and values</span>
|
|
377 <a name="GetCmpdDataHeaderLabelsAndValues-"></a> 364 <span class="k">sub </span><span class="m">GetCmpdDataHeaderLabelsAndValues</span> <span class="s">{</span>
|
|
378 365 <span class="k">my</span><span class="s">(</span><span class="i">$CmpdLines</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
379 366 <span class="k">my</span><span class="s">(</span><span class="i">$CmpdLine</span><span class="cm">,</span> <span class="i">$CurrentLabel</span><span class="cm">,</span> <span class="i">$Label</span><span class="cm">,</span> <span class="i">$Value</span><span class="cm">,</span> <span class="i">$ValueCount</span><span class="cm">,</span> <span class="i">$ProcessingLabelData</span><span class="cm">,</span> <span class="i">@Values</span><span class="cm">,</span> <span class="i">%DataFields</span><span class="s">)</span><span class="sc">;</span>
|
|
380 367
|
|
381 368 <span class="i">%DataFields</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
382 369 <span class="i">$ProcessingLabelData</span> = <span class="n">0</span><span class="sc">;</span>
|
|
383 370 <span class="i">$ValueCount</span> = <span class="n">0</span><span class="sc">;</span>
|
|
384 371 <span class="j">CMPDLINE:</span> <span class="k">for</span> <span class="i">$CmpdLine</span> <span class="s">(</span><span class="i">@$CmpdLines</span><span class="s">)</span> <span class="s">{</span>
|
|
385 372 <span class="k">if</span> <span class="s">(</span><span class="i">$CmpdLine</span> =~ <span class="q">/^\$\$\$\$/</span><span class="s">)</span> <span class="s">{</span>
|
|
386 373 <span class="k">last</span> <span class="j">CMPDLINE</span><span class="sc">;</span>
|
|
387 374 <span class="s">}</span>
|
|
388 375 <span class="k">if</span> <span class="s">(</span><span class="i">$CmpdLine</span> =~ <span class="q">/^>/</span><span class="s">)</span> <span class="s">{</span>
|
|
389 376 <span class="c"># Does the line contains field name enclosed in angular brackets?</span>
|
|
390 377 <span class="s">(</span><span class="i">$Label</span><span class="s">)</span> = <span class="i">$CmpdLine</span> =~ <span class="q">/<.*?>/g</span><span class="sc">;</span>
|
|
391 378 <span class="k">if</span> <span class="s">(</span><span class="k">defined</span> <span class="i">$Label</span><span class="s">)</span> <span class="s">{</span>
|
|
392 379 <span class="i">$CurrentLabel</span> = <span class="i">$Label</span><span class="sc">;</span>
|
|
393 380 <span class="i">$CurrentLabel</span> =~ <span class="q">s/(<|>)//g</span><span class="sc">;</span>
|
|
394 381 <span class="i">$ProcessingLabelData</span> = <span class="n">0</span><span class="sc">;</span>
|
|
395 382 <span class="i">$ValueCount</span> = <span class="n">0</span><span class="sc">;</span>
|
|
396 383
|
|
397 384 <span class="k">if</span> <span class="s">(</span><span class="i">$CurrentLabel</span><span class="s">)</span> <span class="s">{</span>
|
|
398 385 <span class="i">$ProcessingLabelData</span> = <span class="n">1</span><span class="sc">;</span>
|
|
399 386 <span class="i">$DataFields</span>{<span class="i">$CurrentLabel</span>} = <span class="q">''</span><span class="sc">;</span>
|
|
400 387 <span class="k">next</span> <span class="j">CMPDLINE</span><span class="sc">;</span>
|
|
401 388 <span class="s">}</span>
|
|
402 389 <span class="s">}</span>
|
|
403 390 <span class="k">else</span> <span class="s">{</span>
|
|
404 391 <span class="k">if</span> <span class="s">(</span>!<span class="i">$ProcessingLabelData</span><span class="s">)</span> <span class="s">{</span>
|
|
405 392 <span class="c"># Data line containing no <label> as allowed by SDF format. Just ignore it...</span>
|
|
406 393 <span class="k">next</span> <span class="j">CMPDLINE</span><span class="sc">;</span>
|
|
407 394 <span class="s">}</span>
|
|
408 395 <span class="s">}</span>
|
|
409 396 <span class="s">}</span>
|
|
410 397 <span class="k">if</span> <span class="s">(</span>!<span class="i">$ProcessingLabelData</span><span class="s">)</span> <span class="s">{</span>
|
|
411 398 <span class="k">next</span> <span class="j">CMPDLINE</span><span class="sc">;</span>
|
|
412 399 <span class="s">}</span>
|
|
413 400 <span class="k">if</span> <span class="s">(</span>!<span class="s">(</span><span class="k">defined</span><span class="s">(</span><span class="i">$CmpdLine</span><span class="s">)</span> && <span class="k">length</span><span class="s">(</span><span class="i">$CmpdLine</span><span class="s">)</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span>
|
|
414 401 <span class="c"># Blank line terminates value for a label...</span>
|
|
415 402 <span class="i">$CurrentLabel</span> = <span class="q">''</span><span class="sc">;</span>
|
|
416 403 <span class="i">$ValueCount</span> = <span class="n">0</span><span class="sc">;</span>
|
|
417 404 <span class="i">$ProcessingLabelData</span> = <span class="n">0</span><span class="sc">;</span>
|
|
418 405 <span class="k">next</span> <span class="j">CMPDLINE</span><span class="sc">;</span>
|
|
419 406 <span class="s">}</span>
|
|
420 407 <span class="i">$ValueCount</span>++<span class="sc">;</span>
|
|
421 408 <span class="i">$Value</span> = <span class="i">$CmpdLine</span><span class="sc">;</span>
|
|
422 409
|
|
423 410 <span class="k">if</span> <span class="s">(</span><span class="i">$ValueCount</span> > <span class="n">1</span><span class="s">)</span> <span class="s">{</span>
|
|
424 411 <span class="i">$DataFields</span>{<span class="i">$CurrentLabel</span>} .= <span class="q">"\n"</span> . <span class="i">$Value</span><span class="sc">;</span>
|
|
425 412 <span class="s">}</span>
|
|
426 413 <span class="k">else</span> <span class="s">{</span>
|
|
427 414 <span class="i">$DataFields</span>{<span class="i">$CurrentLabel</span>} = <span class="i">$Value</span><span class="sc">;</span>
|
|
428 415 <span class="s">}</span>
|
|
429 416 <span class="s">}</span>
|
|
430 417 <span class="k">return</span> <span class="s">(</span><span class="i">%DataFields</span><span class="s">)</span><span class="sc">;</span>
|
|
431 418 <span class="s">}</span>
|
|
432 419
|
|
433 420 <span class="c"># Return an updated compoud string after removing data header label along with its</span>
|
|
434 421 <span class="c"># value from the specified compound string...</span>
|
|
435 422 <span class="c">#</span>
|
|
436 <a name="RemoveCmpdDataHeaderLabelAndValue-"></a> 423 <span class="k">sub </span><span class="m">RemoveCmpdDataHeaderLabelAndValue</span> <span class="s">{</span>
|
|
437 424 <span class="k">my</span><span class="s">(</span><span class="i">$CmpdString</span><span class="cm">,</span> <span class="i">$DataHeaderLabel</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
438 425 <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="cm">,</span> <span class="i">$PorcessingDataHeaderLabel</span><span class="cm">,</span> <span class="i">@CmpdLines</span><span class="s">)</span><span class="sc">;</span>
|
|
439 426
|
|
440 427 <span class="i">@CmpdLines</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
441 428 <span class="i">$PorcessingDataHeaderLabel</span> = <span class="n">0</span><span class="sc">;</span>
|
|
442 429
|
|
443 430 <span class="j">CMPDLINE:</span> <span class="k">for</span> <span class="i">$Line</span> <span class="s">(</span><span class="k">split</span> <span class="q">"\n"</span><span class="cm">,</span> <span class="i">$CmpdString</span><span class="s">)</span> <span class="s">{</span>
|
|
444 431 <span class="k">if</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^>/</span> && <span class="i">$Line</span> =~ <span class="q">/<$DataHeaderLabel>/i</span><span class="s">)</span> <span class="s">{</span>
|
|
445 432 <span class="i">$PorcessingDataHeaderLabel</span> = <span class="n">1</span><span class="sc">;</span>
|
|
446 433 <span class="k">next</span> <span class="j">CMPDLINE</span><span class="sc">;</span>
|
|
447 434 <span class="s">}</span>
|
|
448 435
|
|
449 436 <span class="k">if</span> <span class="s">(</span><span class="i">$PorcessingDataHeaderLabel</span><span class="s">)</span> <span class="s">{</span>
|
|
450 437 <span class="c"># Blank line indicates end of fingerprints data value...</span>
|
|
451 438 <span class="k">if</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^\$\$\$\$/</span><span class="s">)</span> <span class="s">{</span>
|
|
452 439 <span class="k">push</span> <span class="i">@CmpdLines</span><span class="cm">,</span> <span class="i">$Line</span><span class="sc">;</span>
|
|
453 440 <span class="i">$PorcessingDataHeaderLabel</span> = <span class="n">0</span><span class="sc">;</span>
|
|
454 441 <span class="s">}</span>
|
|
455 442 <span class="k">elsif</span> <span class="s">(</span>!<span class="k">length</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span>
|
|
456 443 <span class="i">$PorcessingDataHeaderLabel</span> = <span class="n">0</span><span class="sc">;</span>
|
|
457 444 <span class="s">}</span>
|
|
458 445 <span class="k">next</span> <span class="j">CMPDLINE</span><span class="sc">;</span>
|
|
459 446 <span class="s">}</span>
|
|
460 447
|
|
461 448 <span class="c"># Track compound lines without fingerprints data...</span>
|
|
462 449 <span class="k">push</span> <span class="i">@CmpdLines</span><span class="cm">,</span> <span class="i">$Line</span><span class="sc">;</span>
|
|
463 450 <span class="s">}</span>
|
|
464 451
|
|
465 452 <span class="k">return</span> <span class="k">join</span> <span class="q">"\n"</span><span class="cm">,</span> <span class="i">@CmpdLines</span><span class="sc">;</span>
|
|
466 453 <span class="s">}</span>
|
|
467 454
|
|
468 455 <span class="c">#</span>
|
|
469 456 <span class="c"># Using bond blocks, figure out the number of disconnected fragments and</span>
|
|
470 457 <span class="c"># return their values along with the atom numbers in a string delimited by new</span>
|
|
471 458 <span class="c"># line character.</span>
|
|
472 459 <span class="c">#</span>
|
|
473 <a name="GetCmpdFragments-"></a> 460 <span class="k">sub </span><span class="m">GetCmpdFragments</span> <span class="s">{</span>
|
|
474 461 <span class="k">my</span><span class="s">(</span><span class="i">$CmpdLines</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
475 462 <span class="k">my</span><span class="s">(</span><span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="cm">,</span> <span class="i">$FirstAtomNum</span><span class="cm">,</span> <span class="i">$SecondAtomNum</span><span class="cm">,</span> <span class="i">@AtomConnections</span><span class="cm">,</span> <span class="i">$BondType</span><span class="cm">,</span> <span class="i">$FragmentString</span><span class="cm">,</span> <span class="i">$FragmentCount</span><span class="cm">,</span> <span class="i">$LineIndex</span><span class="cm">,</span> <span class="i">$Index</span><span class="cm">,</span> <span class="i">$AtomNum</span><span class="cm">,</span> <span class="i">$NbrAtomNum</span><span class="cm">,</span> <span class="i">@ProcessedAtoms</span><span class="cm">,</span> <span class="i">$ProcessedAtomCount</span><span class="cm">,</span> <span class="i">$ProcessAtomNum</span><span class="cm">,</span> <span class="i">@ProcessingAtoms</span><span class="cm">,</span> <span class="i">@ConnectedAtoms</span><span class="cm">,</span> <span class="i">%Fragments</span><span class="cm">,</span> <span class="i">$FragmentNum</span><span class="cm">,</span> <span class="i">$AFragmentString</span><span class="s">)</span><span class="sc">;</span>
|
|
476 463
|
|
477 464 <span class="c"># Setup the connection table for each atom...</span>
|
|
478 465 <span class="i">@AtomConnections</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
479 466 <span class="s">(</span><span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="s">)</span> = <span class="i">ParseCmpdCountsLine</span><span class="s">(</span><span class="i">@$CmpdLines</span>[<span class="n">3</span>]<span class="s">)</span><span class="sc">;</span>
|
|
480 467 <span class="k">for</span> <span class="i">$AtomNum</span> <span class="s">(</span><span class="n">1</span> .. <span class="i">$AtomCount</span><span class="s">)</span> <span class="s">{</span>
|
|
481 468 <span class="i">%</span>{<span class="i">$AtomConnections</span>[<span class="i">$AtomNum</span>]} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
482 469 <span class="s">}</span>
|
|
483 470 <span class="k">for</span> <span class="s">(</span><span class="i">$LineIndex</span> = <span class="n">4</span> + <span class="i">$AtomCount</span><span class="sc">;</span> <span class="i">$LineIndex</span> < <span class="s">(</span><span class="n">4</span> + <span class="i">$AtomCount</span> + <span class="i">$BondCount</span><span class="s">)</span><span class="sc">;</span> <span class="i">$LineIndex</span>++<span class="s">)</span> <span class="s">{</span>
|
|
484 471 <span class="s">(</span><span class="i">$FirstAtomNum</span><span class="cm">,</span> <span class="i">$SecondAtomNum</span><span class="cm">,</span> <span class="i">$BondType</span><span class="s">)</span> = <span class="i">ParseCmpdBondLine</span><span class="s">(</span><span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="s">)</span><span class="sc">;</span>
|
|
485 472 <span class="k">if</span> <span class="s">(</span>!<span class="i">$AtomConnections</span>[<span class="i">$FirstAtomNum</span>]{<span class="i">$SecondAtomNum</span>}<span class="s">)</span> <span class="s">{</span>
|
|
486 473 <span class="i">$AtomConnections</span>[<span class="i">$FirstAtomNum</span>]{<span class="i">$SecondAtomNum</span>} = <span class="i">$BondType</span><span class="sc">;</span>
|
|
487 474 <span class="s">}</span>
|
|
488 475 <span class="k">if</span> <span class="s">(</span>!<span class="i">$AtomConnections</span>[<span class="i">$SecondAtomNum</span>]{<span class="i">$FirstAtomNum</span>}<span class="s">)</span> <span class="s">{</span>
|
|
489 476 <span class="i">$AtomConnections</span>[<span class="i">$SecondAtomNum</span>]{<span class="i">$FirstAtomNum</span>} = <span class="i">$BondType</span><span class="sc">;</span>
|
|
490 477 <span class="s">}</span>
|
|
491 478 <span class="s">}</span>
|
|
492 479
|
|
493 480 <span class="c">#Get set to count fragments...</span>
|
|
494 481 <span class="i">$ProcessedAtomCount</span> = <span class="n">0</span><span class="sc">;</span>
|
|
495 482 <span class="i">$FragmentNum</span> = <span class="n">0</span><span class="sc">;</span>
|
|
496 483 <span class="i">%Fragments</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
497 484 <span class="i">@ProcessedAtoms</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
498 485 <span class="k">for</span> <span class="i">$AtomNum</span> <span class="s">(</span><span class="n">1</span> .. <span class="i">$AtomCount</span><span class="s">)</span> <span class="s">{</span>
|
|
499 486 <span class="i">$ProcessedAtoms</span>[<span class="i">$AtomNum</span>] = <span class="n">0</span><span class="sc">;</span>
|
|
500 487 <span class="s">}</span>
|
|
501 488 <span class="k">while</span> <span class="s">(</span><span class="i">$ProcessedAtomCount</span> < <span class="i">$AtomCount</span><span class="s">)</span> <span class="s">{</span>
|
|
502 489 <span class="i">@ProcessingAtoms</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
503 490 <span class="i">@ConnectedAtoms</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
504 491 <span class="j">ATOMNUM:</span> <span class="k">for</span> <span class="i">$AtomNum</span> <span class="s">(</span><span class="n">1</span> .. <span class="i">$AtomCount</span><span class="s">)</span> <span class="s">{</span>
|
|
505 492 <span class="k">if</span> <span class="s">(</span>!<span class="i">$ProcessedAtoms</span>[<span class="i">$AtomNum</span>]<span class="s">)</span> <span class="s">{</span>
|
|
506 493 <span class="i">$ProcessedAtomCount</span>++<span class="sc">;</span>
|
|
507 494 <span class="i">$ProcessedAtoms</span>[<span class="i">$AtomNum</span>] = <span class="n">1</span><span class="sc">;</span>
|
|
508 495 <span class="k">push</span> <span class="i">@ProcessingAtoms</span><span class="cm">,</span> <span class="i">$AtomNum</span><span class="sc">;</span>
|
|
509 496 <span class="i">$FragmentNum</span>++<span class="sc">;</span>
|
|
510 497 <span class="i">@</span>{<span class="i">$Fragments</span>{<span class="i">$FragmentNum</span>} } = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
511 498 <span class="k">push</span> <span class="i">@</span>{<span class="i">$Fragments</span>{<span class="i">$FragmentNum</span>} }<span class="cm">,</span> <span class="i">$AtomNum</span><span class="sc">;</span>
|
|
512 499 <span class="k">last</span> <span class="j">ATOMNUM</span><span class="sc">;</span>
|
|
513 500 <span class="s">}</span>
|
|
514 501 <span class="s">}</span>
|
|
515 502
|
|
516 503 <span class="c"># Go over the neighbors and follow the connection trail while collecting the</span>
|
|
517 504 <span class="c"># atoms numbers present in the connected fragment...</span>
|
|
518 505 <span class="k">while</span> <span class="s">(</span><span class="i">@ProcessingAtoms</span><span class="s">)</span> <span class="s">{</span>
|
|
519 506 <span class="k">for</span> <span class="s">(</span><span class="i">$Index</span> = <span class="n">0</span><span class="sc">;</span> <span class="i">$Index</span> < <span class="i">@ProcessingAtoms</span><span class="sc">;</span> <span class="i">$Index</span>++<span class="s">)</span> <span class="s">{</span>
|
|
520 507 <span class="i">$ProcessAtomNum</span> = <span class="i">$ProcessingAtoms</span>[<span class="i">$Index</span>]<span class="sc">;</span>
|
|
521 508 <span class="k">for</span> <span class="i">$NbrAtomNum</span> <span class="s">(</span><span class="k">keys</span> <span class="i">%</span>{<span class="i">$AtomConnections</span>[<span class="i">$ProcessAtomNum</span>]}<span class="s">)</span> <span class="s">{</span>
|
|
522 509 <span class="k">if</span> <span class="s">(</span>!<span class="i">$ProcessedAtoms</span>[<span class="i">$NbrAtomNum</span>]<span class="s">)</span> <span class="s">{</span>
|
|
523 510 <span class="i">$ProcessedAtomCount</span>++<span class="sc">;</span>
|
|
524 511 <span class="i">$ProcessedAtoms</span>[<span class="i">$NbrAtomNum</span>] = <span class="n">1</span><span class="sc">;</span>
|
|
525 512 <span class="k">push</span> <span class="i">@ConnectedAtoms</span><span class="cm">,</span> <span class="i">$NbrAtomNum</span><span class="sc">;</span>
|
|
526 513 <span class="k">push</span> <span class="i">@</span>{ <span class="i">$Fragments</span>{<span class="i">$FragmentNum</span>} }<span class="cm">,</span> <span class="i">$NbrAtomNum</span><span class="sc">;</span>
|
|
527 514 <span class="s">}</span>
|
|
528 515 <span class="s">}</span>
|
|
529 516 <span class="s">}</span>
|
|
530 517 <span class="i">@ProcessingAtoms</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
531 518 <span class="i">@ProcessingAtoms</span> = <span class="i">@ConnectedAtoms</span><span class="sc">;</span>
|
|
532 519 <span class="i">@ConnectedAtoms</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
533 520 <span class="s">}</span>
|
|
534 521 <span class="s">}</span>
|
|
535 522 <span class="i">$FragmentCount</span> = <span class="i">$FragmentNum</span><span class="sc">;</span>
|
|
536 523 <span class="i">$FragmentString</span> = <span class="q">""</span><span class="sc">;</span>
|
|
537 524
|
|
538 525 <span class="c"># Sort out the fragments by size...</span>
|
|
539 526 <span class="k">for</span> <span class="i">$FragmentNum</span> <span class="s">(</span><span class="k">sort</span> <span class="s">{</span> <span class="i">@</span>{<span class="i">$Fragments</span>{<span class="i">$b</span>}} <=> <span class="i">@</span>{<span class="i">$Fragments</span>{<span class="i">$a</span>}} <span class="s">}</span> <span class="k">keys</span> <span class="i">%Fragments</span> <span class="s">)</span> <span class="s">{</span>
|
|
540 527 <span class="c"># Sort the atoms in a fragment by their numbers...</span>
|
|
541 528 <span class="i">$AFragmentString</span> = <span class="k">join</span> <span class="q">" "</span><span class="cm">,</span> <span class="k">sort</span> <span class="s">{</span> <span class="i">$a</span> <=> <span class="i">$b</span> <span class="s">}</span> <span class="i">@</span>{ <span class="i">$Fragments</span>{<span class="i">$FragmentNum</span>} }<span class="sc">;</span>
|
|
542 529 <span class="k">if</span> <span class="s">(</span><span class="i">$FragmentString</span><span class="s">)</span> <span class="s">{</span>
|
|
543 530 <span class="i">$FragmentString</span> .= <span class="q">"\n"</span> . <span class="i">$AFragmentString</span><span class="sc">;</span>
|
|
544 531 <span class="s">}</span>
|
|
545 532 <span class="k">else</span> <span class="s">{</span>
|
|
546 533 <span class="i">$FragmentString</span> = <span class="i">$AFragmentString</span><span class="sc">;</span>
|
|
547 534 <span class="s">}</span>
|
|
548 535 <span class="s">}</span>
|
|
549 536 <span class="k">return</span> <span class="s">(</span><span class="i">$FragmentCount</span><span class="cm">,</span> <span class="i">$FragmentString</span><span class="s">)</span><span class="sc">;</span>
|
|
550 537 <span class="s">}</span>
|
|
551 538
|
|
552 539 <span class="c"># Count number of lines present in between 4th and line containg "M END"</span>
|
|
553 <a name="GetCtabLinesCount-"></a> 540 <span class="k">sub </span><span class="m">GetCtabLinesCount</span> <span class="s">{</span>
|
|
554 541 <span class="k">my</span><span class="s">(</span><span class="i">$CmpdLines</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
555 542 <span class="k">my</span><span class="s">(</span><span class="i">$LineIndex</span><span class="cm">,</span> <span class="i">$CtabLinesCount</span><span class="s">)</span><span class="sc">;</span>
|
|
556 543
|
|
557 544 <span class="i">$CtabLinesCount</span> = <span class="n">0</span><span class="sc">;</span>
|
|
558 545 <span class="j">LINE:</span> <span class="k">for</span> <span class="s">(</span><span class="i">$LineIndex</span> = <span class="n">4</span><span class="sc">;</span> <span class="i">$LineIndex</span> < <span class="i">@$CmpdLines</span><span class="sc">;</span> <span class="i">$LineIndex</span>++<span class="s">)</span> <span class="s">{</span>
|
|
559 546 <span class="c">#</span>
|
|
560 547 <span class="c"># Any line after atom and bond data starting with anything other than space or</span>
|
|
561 548 <span class="c"># a digit indicates end of Ctab atom/bond data block...</span>
|
|
562 549 <span class="c">#</span>
|
|
563 550 <span class="k">if</span> <span class="s">(</span><span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>] !~ <span class="q">/^[0-9 ]/</span><span class="s">)</span> <span class="s">{</span>
|
|
564 551 <span class="i">$CtabLinesCount</span> = <span class="i">$LineIndex</span> - <span class="n">4</span><span class="sc">;</span>
|
|
565 552 <span class="k">last</span> <span class="j">LINE</span><span class="sc">;</span>
|
|
566 553 <span class="s">}</span>
|
|
567 554 <span class="s">}</span>
|
|
568 555 <span class="k">return</span> <span class="i">$CtabLinesCount</span><span class="sc">;</span>
|
|
569 556 <span class="s">}</span>
|
|
570 557
|
|
571 558 <span class="c"># Using atom blocks, count the number of atoms which contain special element</span>
|
|
572 559 <span class="c"># symbols not present in the periodic table.</span>
|
|
573 <a name="GetUnknownAtoms-"></a> 560 <span class="k">sub </span><span class="m">GetUnknownAtoms</span> <span class="s">{</span>
|
|
574 561 <span class="k">my</span><span class="s">(</span><span class="i">$CmpdLines</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
575 562 <span class="k">my</span><span class="s">(</span><span class="i">$UnknownAtomCount</span><span class="cm">,</span> <span class="i">$UnknownAtoms</span><span class="cm">,</span> <span class="i">$UnknownAtomLines</span><span class="cm">,</span> <span class="i">$LineIndex</span><span class="cm">,</span> <span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$AtomSymbol</span><span class="s">)</span><span class="sc">;</span>
|
|
576 563
|
|
577 564 <span class="i">$UnknownAtomCount</span> = <span class="n">0</span><span class="sc">;</span>
|
|
578 565 <span class="i">$UnknownAtoms</span> = <span class="q">""</span><span class="sc">;</span>
|
|
579 566 <span class="i">$UnknownAtomLines</span> = <span class="q">""</span><span class="sc">;</span>
|
|
580 567 <span class="s">(</span><span class="i">$AtomCount</span><span class="s">)</span> = <span class="i">ParseCmpdCountsLine</span><span class="s">(</span><span class="i">@$CmpdLines</span>[<span class="n">3</span>]<span class="s">)</span><span class="sc">;</span>
|
|
581 568 <span class="k">for</span> <span class="s">(</span><span class="i">$LineIndex</span> = <span class="n">4</span><span class="sc">;</span> <span class="i">$LineIndex</span> < <span class="s">(</span><span class="n">4</span> + <span class="i">$AtomCount</span><span class="s">)</span><span class="sc">;</span> <span class="i">$LineIndex</span>++<span class="s">)</span> <span class="s">{</span>
|
|
582 569 <span class="s">(</span><span class="i">$AtomSymbol</span><span class="s">)</span> = <span class="i">ParseCmpdAtomLine</span><span class="s">(</span><span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="s">)</span><span class="sc">;</span>
|
|
583 570 <span class="k">if</span> <span class="s">(</span>!<span class="i">IsElement</span><span class="s">(</span><span class="i">$AtomSymbol</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span>
|
|
584 571 <span class="i">$UnknownAtomCount</span>++<span class="sc">;</span>
|
|
585 572 <span class="i">$UnknownAtoms</span> .= <span class="q">" $AtomSymbol"</span><span class="sc">;</span>
|
|
586 573 <span class="k">if</span> <span class="s">(</span><span class="i">$UnknownAtomLines</span><span class="s">)</span> <span class="s">{</span>
|
|
587 574 <span class="i">$UnknownAtomLines</span> .= <span class="q">"\n"</span> . <span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="sc">;</span>
|
|
588 575 <span class="s">}</span>
|
|
589 576 <span class="k">else</span> <span class="s">{</span>
|
|
590 577 <span class="i">$UnknownAtomLines</span> = <span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="sc">;</span>
|
|
591 578 <span class="s">}</span>
|
|
592 579 <span class="s">}</span>
|
|
593 580 <span class="s">}</span>
|
|
594 581 <span class="k">return</span> <span class="s">(</span><span class="i">$UnknownAtomCount</span><span class="cm">,</span> <span class="i">$UnknownAtoms</span><span class="cm">,</span> <span class="i">$UnknownAtomLines</span><span class="s">)</span><span class="sc">;</span>
|
|
595 582 <span class="s">}</span>
|
|
596 583
|
|
597 584 <span class="c"># Check z coordinates of all atoms to see whether any of them is non-zero</span>
|
|
598 585 <span class="c"># which makes the compound geometry three dimensional...</span>
|
|
599 586 <span class="c">#</span>
|
|
600 <a name="IsCmpd3D-"></a> 587 <span class="k">sub </span><span class="m">IsCmpd3D</span> <span class="s">{</span>
|
|
601 588 <span class="k">my</span><span class="s">(</span><span class="i">$CmpdLines</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
602 589 <span class="k">my</span><span class="s">(</span><span class="i">$LineIndex</span><span class="cm">,</span> <span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$AtomSymbol</span><span class="cm">,</span> <span class="i">$AtomX</span><span class="cm">,</span> <span class="i">$AtomY</span><span class="cm">,</span> <span class="i">$AtomZ</span><span class="s">)</span><span class="sc">;</span>
|
|
603 590
|
|
604 591 <span class="s">(</span><span class="i">$AtomCount</span><span class="s">)</span> = <span class="i">ParseCmpdCountsLine</span><span class="s">(</span><span class="i">@$CmpdLines</span>[<span class="n">3</span>]<span class="s">)</span><span class="sc">;</span>
|
|
605 592 <span class="k">for</span> <span class="s">(</span><span class="i">$LineIndex</span> = <span class="n">4</span><span class="sc">;</span> <span class="i">$LineIndex</span> < <span class="s">(</span><span class="n">4</span> + <span class="i">$AtomCount</span><span class="s">)</span><span class="sc">;</span> <span class="i">$LineIndex</span>++<span class="s">)</span> <span class="s">{</span>
|
|
606 593 <span class="s">(</span><span class="i">$AtomSymbol</span><span class="cm">,</span> <span class="i">$AtomX</span><span class="cm">,</span> <span class="i">$AtomY</span><span class="cm">,</span> <span class="i">$AtomZ</span><span class="s">)</span> = <span class="i">ParseCmpdAtomLine</span><span class="s">(</span><span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="s">)</span><span class="sc">;</span>
|
|
607 594 <span class="k">if</span> <span class="s">(</span><span class="i">$AtomZ</span> != <span class="n">0</span><span class="s">)</span> <span class="s">{</span>
|
|
608 595 <span class="k">return</span> <span class="n">1</span><span class="sc">;</span>
|
|
609 596 <span class="s">}</span>
|
|
610 597 <span class="s">}</span>
|
|
611 598 <span class="k">return</span> <span class="n">0</span><span class="sc">;</span>
|
|
612 599 <span class="s">}</span>
|
|
613 600
|
|
614 601 <span class="c"># Check whether it's a 2D compound...</span>
|
|
615 602 <span class="c">#</span>
|
|
616 <a name="IsCmpd2D-"></a> 603 <span class="k">sub </span><span class="m">IsCmpd2D</span> <span class="s">{</span>
|
|
617 604 <span class="k">my</span><span class="s">(</span><span class="i">$CmpdLines</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
618 605
|
|
619 606 <span class="k">return</span> <span class="i">IsCmpd3D</span><span class="s">(</span><span class="i">$CmpdLines</span><span class="s">)</span> ? <span class="n">0</span> <span class="co">:</span> <span class="n">1</span><span class="sc">;</span>
|
|
620 607 <span class="s">}</span>
|
|
621 608
|
|
622 609 <span class="c"># Using bond blocks, count the number of bond lines which contain atom numbers</span>
|
|
623 610 <span class="c"># greater than atom count specified in compound count line...</span>
|
|
624 611 <span class="c">#</span>
|
|
625 <a name="GetInvalidAtomNumbers-"></a> 612 <span class="k">sub </span><span class="m">GetInvalidAtomNumbers</span> <span class="s">{</span>
|
|
626 613 <span class="k">my</span><span class="s">(</span><span class="i">$CmpdLines</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
627 614 <span class="k">my</span><span class="s">(</span><span class="i">$LineIndex</span><span class="cm">,</span> <span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="cm">,</span> <span class="i">$FirstAtomNum</span><span class="cm">,</span> <span class="i">$SecondAtomNum</span><span class="cm">,</span> <span class="i">$InvalidAtomNumbersCount</span><span class="cm">,</span> <span class="i">$InvalidAtomNumbers</span><span class="cm">,</span> <span class="i">$InvalidAtomNumberLines</span><span class="cm">,</span> <span class="i">$Line</span><span class="cm">,</span> <span class="i">$InvalidAtomPropertyLine</span><span class="cm">,</span> <span class="i">$ValuePairIndex</span><span class="cm">,</span> <span class="i">$AtomNum</span><span class="cm">,</span> <span class="i">$Value</span><span class="cm">,</span> <span class="i">@ValuePairs</span><span class="s">)</span><span class="sc">;</span>
|
|
628 615
|
|
629 616 <span class="s">(</span><span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="s">)</span> = <span class="i">ParseCmpdCountsLine</span><span class="s">(</span><span class="i">@$CmpdLines</span>[<span class="n">3</span>]<span class="s">)</span><span class="sc">;</span>
|
|
630 617
|
|
631 618 <span class="i">$InvalidAtomNumbersCount</span> = <span class="n">0</span><span class="sc">;</span>
|
|
632 619 <span class="i">$InvalidAtomNumbers</span> = <span class="q">""</span><span class="sc">;</span>
|
|
633 620 <span class="i">$InvalidAtomNumberLines</span> = <span class="q">""</span><span class="sc">;</span>
|
|
634 621
|
|
635 622 <span class="c"># Go over bond block lines...</span>
|
|
636 623 <span class="j">LINE:</span> <span class="k">for</span> <span class="s">(</span><span class="i">$LineIndex</span> = <span class="n">4</span> + <span class="i">$AtomCount</span><span class="sc">;</span> <span class="i">$LineIndex</span> < <span class="s">(</span><span class="n">4</span> + <span class="i">$AtomCount</span> + <span class="i">$BondCount</span><span class="s">)</span><span class="sc">;</span> <span class="i">$LineIndex</span>++<span class="s">)</span> <span class="s">{</span>
|
|
637 624 <span class="s">(</span><span class="i">$FirstAtomNum</span><span class="cm">,</span> <span class="i">$SecondAtomNum</span><span class="s">)</span> = <span class="i">ParseCmpdBondLine</span><span class="s">(</span><span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="s">)</span><span class="sc">;</span>
|
|
638 625 <span class="k">if</span> <span class="s">(</span><span class="i">$FirstAtomNum</span> <= <span class="i">$AtomCount</span> && <span class="i">$SecondAtomNum</span> <= <span class="i">$AtomCount</span><span class="s">)</span> <span class="s">{</span>
|
|
639 626 <span class="k">next</span> <span class="j">LINE</span><span class="sc">;</span>
|
|
640 627 <span class="s">}</span>
|
|
641 628 <span class="k">if</span> <span class="s">(</span><span class="i">$FirstAtomNum</span> > <span class="i">$AtomCount</span><span class="s">)</span> <span class="s">{</span>
|
|
642 629 <span class="i">$InvalidAtomNumbersCount</span>++<span class="sc">;</span>
|
|
643 630 <span class="i">$InvalidAtomNumbers</span> .= <span class="q">" $FirstAtomNum"</span><span class="sc">;</span>
|
|
644 631 <span class="s">}</span>
|
|
645 632 <span class="k">if</span> <span class="s">(</span><span class="i">$SecondAtomNum</span> > <span class="i">$AtomCount</span><span class="s">)</span> <span class="s">{</span>
|
|
646 633 <span class="i">$InvalidAtomNumbersCount</span>++<span class="sc">;</span>
|
|
647 634 <span class="i">$InvalidAtomNumbers</span> .= <span class="q">" $SecondAtomNum"</span><span class="sc">;</span>
|
|
648 635 <span class="s">}</span>
|
|
649 636 <span class="k">if</span> <span class="s">(</span><span class="i">$InvalidAtomNumberLines</span><span class="s">)</span> <span class="s">{</span>
|
|
650 637 <span class="i">$InvalidAtomNumberLines</span> .= <span class="q">"\n"</span> . <span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="sc">;</span>
|
|
651 638 <span class="s">}</span>
|
|
652 639 <span class="k">else</span> <span class="s">{</span>
|
|
653 640 <span class="i">$InvalidAtomNumberLines</span> = <span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="sc">;</span>
|
|
654 641 <span class="s">}</span>
|
|
655 642 <span class="s">}</span>
|
|
656 643 <span class="c"># Go over property lines before M END...</span>
|
|
657 644 <span class="c">#</span>
|
|
658 645 <span class="j">LINE:</span> <span class="k">for</span> <span class="s">(</span><span class="i">$LineIndex</span> = <span class="s">(</span><span class="n">4</span> + <span class="i">$AtomCount</span> + <span class="i">$BondCount</span><span class="s">)</span><span class="sc">;</span> <span class="i">$LineIndex</span> < <span class="i">@$CmpdLines</span><span class="sc">;</span> <span class="i">$LineIndex</span>++<span class="s">)</span> <span class="s">{</span>
|
|
659 646 <span class="i">$Line</span> = <span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="sc">;</span>
|
|
660 647 <span class="i">@ValuePairs</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
661 648 <span class="k">if</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^M END/i</span><span class="s">)</span> <span class="s">{</span>
|
|
662 649 <span class="k">last</span> <span class="j">LINE</span><span class="sc">;</span>
|
|
663 650 <span class="s">}</span>
|
|
664 651 <span class="i">@ValuePairs</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
665 652 <span class="k">if</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^M CHG/i</span><span class="s">)</span> <span class="s">{</span>
|
|
666 653 <span class="i">@ValuePairs</span> = <span class="i">ParseCmpdChargePropertyLine</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span><span class="sc">;</span>
|
|
667 654 <span class="s">}</span>
|
|
668 655 <span class="k">elsif</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^M RAD/i</span><span class="s">)</span> <span class="s">{</span>
|
|
669 656 <span class="i">@ValuePairs</span> = <span class="i">ParseCmpdRadicalPropertyLine</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span><span class="sc">;</span>
|
|
670 657 <span class="s">}</span>
|
|
671 658 <span class="k">elsif</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^M ISO/i</span><span class="s">)</span> <span class="s">{</span>
|
|
672 659 <span class="i">@ValuePairs</span> = <span class="i">ParseCmpdIsotopePropertyLine</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span><span class="sc">;</span>
|
|
673 660 <span class="s">}</span>
|
|
674 661 <span class="k">elsif</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^A /i</span><span class="s">)</span> <span class="s">{</span>
|
|
675 662 <span class="k">my</span><span class="s">(</span><span class="i">$NextLine</span><span class="s">)</span><span class="sc">;</span>
|
|
676 663 <span class="i">$LineIndex</span>++<span class="sc">;</span>
|
|
677 664 <span class="i">$NextLine</span> = <span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="sc">;</span>
|
|
678 665 <span class="i">@ValuePairs</span> = <span class="i">ParseCmpdAtomAliasPropertyLine</span><span class="s">(</span><span class="i">$Line</span><span class="cm">,</span> <span class="i">$NextLine</span><span class="s">)</span><span class="sc">;</span>
|
|
679 666 <span class="s">}</span>
|
|
680 667 <span class="k">else</span> <span class="s">{</span>
|
|
681 668 <span class="k">next</span> <span class="j">LINE</span><span class="sc">;</span>
|
|
682 669 <span class="s">}</span>
|
|
683 670
|
|
684 671 <span class="i">$InvalidAtomPropertyLine</span> = <span class="n">0</span><span class="sc">;</span>
|
|
685 672 <span class="k">for</span> <span class="s">(</span><span class="i">$ValuePairIndex</span> = <span class="n">0</span><span class="sc">;</span> <span class="i">$ValuePairIndex</span> < <span class="i">$#ValuePairs</span><span class="sc">;</span> <span class="i">$ValuePairIndex</span> += <span class="n">2</span><span class="s">)</span> <span class="s">{</span>
|
|
686 673 <span class="i">$AtomNum</span> = <span class="i">$ValuePairs</span>[<span class="i">$ValuePairIndex</span>]<span class="sc">;</span> <span class="i">$Value</span> = <span class="i">$ValuePairs</span>[<span class="i">$ValuePairIndex</span> + <span class="n">1</span>]<span class="sc">;</span>
|
|
687 674 <span class="k">if</span> <span class="s">(</span><span class="i">$AtomNum</span> > <span class="i">$AtomCount</span><span class="s">)</span> <span class="s">{</span>
|
|
688 675 <span class="i">$InvalidAtomPropertyLine</span> = <span class="n">1</span><span class="sc">;</span>
|
|
689 676 <span class="i">$InvalidAtomNumbersCount</span>++<span class="sc">;</span>
|
|
690 677 <span class="i">$InvalidAtomNumbers</span> .= <span class="q">" $AtomNum"</span><span class="sc">;</span>
|
|
691 678 <span class="s">}</span>
|
|
692 679 <span class="s">}</span>
|
|
693 680 <span class="k">if</span> <span class="s">(</span><span class="i">$InvalidAtomPropertyLine</span><span class="s">)</span> <span class="s">{</span>
|
|
694 681 <span class="k">if</span> <span class="s">(</span><span class="i">$InvalidAtomNumberLines</span><span class="s">)</span> <span class="s">{</span>
|
|
695 682 <span class="i">$InvalidAtomNumberLines</span> .= <span class="q">"\n"</span> . <span class="i">$Line</span><span class="sc">;</span>
|
|
696 683 <span class="s">}</span>
|
|
697 684 <span class="k">else</span> <span class="s">{</span>
|
|
698 685 <span class="i">$InvalidAtomNumberLines</span> = <span class="i">$Line</span><span class="sc">;</span>
|
|
699 686 <span class="s">}</span>
|
|
700 687 <span class="s">}</span>
|
|
701 688 <span class="s">}</span>
|
|
702 689
|
|
703 690 <span class="k">return</span> <span class="s">(</span><span class="i">$InvalidAtomNumbersCount</span><span class="cm">,</span> <span class="i">$InvalidAtomNumbers</span><span class="cm">,</span> <span class="i">$InvalidAtomNumberLines</span><span class="s">)</span><span class="sc">;</span>
|
|
704 691 <span class="s">}</span>
|
|
705 692
|
|
706 693 <span class="c"># Ctab lines: Atom block</span>
|
|
707 694 <span class="c">#</span>
|
|
708 695 <span class="c"># Format: xxxxx.xxxxyyyyy.yyyyzzzzz.zzzz aaaddcccssshhhbbbvvvHHHrrriiimmmnnneee</span>
|
|
709 696 <span class="c"># A10 A10 A10 xA3 A2A3 A3 A3 A3 A3 A3 A3 A3 A3 A3 A3</span>
|
|
710 697 <span class="c"># x,y,z: Atom coordinates</span>
|
|
711 698 <span class="c"># aaa: Atom symbol. Entry in periodic table or L for atom list, A, Q, * for unspecified</span>
|
|
712 699 <span class="c"># atom, and LP for lone pair, or R# for Rgroup label</span>
|
|
713 700 <span class="c"># dd: Mass difference. -3, -2, -1, 0, 1, 2, 3, 4 (0 for value beyond these limits)</span>
|
|
714 701 <span class="c"># ccc: Charge. 0 = uncharged or value other than these, 1 = +3, 2 = +2, 3 = +1,</span>
|
|
715 702 <span class="c"># 4 = doublet radical, 5 = -1, 6 = -2, 7 = -3</span>
|
|
716 703 <span class="c"># sss: Atom stereo parity. 0 = not stereo, 1 = odd, 2 = even, 3 = either or unmarked stereo center</span>
|
|
717 704 <span class="c"># hhh: Hydrogen count + 1. 1 = H0, 2 = H1, 3 = H2, 4 = H3, 5 = H4</span>
|
|
718 705 <span class="c"># bbb: Stereo care box. 0 = ignore stereo configuration of this double bond atom, 1 = stereo</span>
|
|
719 706 <span class="c"># configuration of double bond atom must match</span>
|
|
720 707 <span class="c"># vvv: Valence. 0 = no marking (default)(1 to 14) = (1 to 14) 15 = zero valence</span>
|
|
721 708 <span class="c"># HHH: H0 designator. 0 = not specified, 1 = no H atoms allowed (redundant due to hhh)</span>
|
|
722 709 <span class="c"># rrr: Not used</span>
|
|
723 710 <span class="c"># iii: Not used</span>
|
|
724 711 <span class="c"># mmm: Atom-atom mapping number. 1 - number of atoms</span>
|
|
725 712 <span class="c"># nnn: Inversion/retention flag. 0 = property not applied, 1 = configuration is inverted,</span>
|
|
726 713 <span class="c"># 2 = configuration is retained.</span>
|
|
727 714 <span class="c"># eee: Exact change flag. 0 = property not applied, 1 = change on atom must be</span>
|
|
728 715 <span class="c"># exactly as shown</span>
|
|
729 716 <span class="c">#</span>
|
|
730 717 <span class="c"># Notes:</span>
|
|
731 718 <span class="c"># . StereoParity: 1 - ClockwiseStereo, 2 - AntiClockwiseStereo; 3 - Either; 0 - none. These</span>
|
|
732 719 <span class="c"># values determine chirailty around the chiral center; a non zero value indicates atom</span>
|
|
733 720 <span class="c"># has been marked as chiral center.</span>
|
|
734 721 <span class="c">#</span>
|
|
735 <a name="ParseCmpdAtomLine-"></a> 722 <span class="k">sub </span><span class="m">ParseCmpdAtomLine</span> <span class="s">{</span>
|
|
736 723 <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
737 724 <span class="k">my</span> <span class="s">(</span><span class="i">$LineIndex</span><span class="cm">,</span> <span class="i">$AtomX</span><span class="cm">,</span> <span class="i">$AtomY</span><span class="cm">,</span> <span class="i">$AtomZ</span><span class="cm">,</span> <span class="i">$AtomSymbol</span><span class="cm">,</span> <span class="i">$MassDifference</span><span class="cm">,</span> <span class="i">$Charge</span><span class="cm">,</span> <span class="i">$StereoParity</span><span class="s">)</span><span class="sc">;</span>
|
|
738 725
|
|
739 726 <span class="s">(</span><span class="i">$AtomX</span><span class="cm">,</span> <span class="i">$AtomY</span><span class="cm">,</span> <span class="i">$AtomZ</span><span class="cm">,</span> <span class="i">$AtomSymbol</span><span class="cm">,</span> <span class="i">$MassDifference</span><span class="cm">,</span> <span class="i">$Charge</span><span class="cm">,</span> <span class="i">$StereoParity</span><span class="s">)</span> = <span class="s">(</span><span class="q">''</span><span class="s">)</span> x <span class="n">7</span><span class="sc">;</span>
|
|
740 727 <span class="k">if</span> <span class="s">(</span><span class="k">length</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span> > <span class="n">31</span><span class="s">)</span> <span class="s">{</span>
|
|
741 728 <span class="s">(</span><span class="i">$AtomX</span><span class="cm">,</span> <span class="i">$AtomY</span><span class="cm">,</span> <span class="i">$AtomZ</span><span class="cm">,</span> <span class="i">$AtomSymbol</span><span class="cm">,</span> <span class="i">$MassDifference</span><span class="cm">,</span> <span class="i">$Charge</span><span class="cm">,</span> <span class="i">$StereoParity</span><span class="s">)</span> = <span class="k">unpack</span><span class="s">(</span><span class="q">"A10A10A10xA3A2A3A3"</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span>
|
|
742 729 <span class="s">}</span>
|
|
743 730 <span class="k">else</span> <span class="s">{</span>
|
|
744 731 <span class="s">(</span><span class="i">$AtomX</span><span class="cm">,</span> <span class="i">$AtomY</span><span class="cm">,</span> <span class="i">$AtomZ</span><span class="cm">,</span> <span class="i">$AtomSymbol</span><span class="s">)</span> = <span class="k">unpack</span><span class="s">(</span><span class="q">"A10A10A10"</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span>
|
|
745 732 <span class="s">}</span>
|
|
746 733 <span class="k">return</span> <span class="s">(</span><span class="i">$AtomSymbol</span><span class="cm">,</span> <span class="i">$AtomX</span><span class="cm">,</span> <span class="i">$AtomY</span><span class="cm">,</span> <span class="i">$AtomZ</span><span class="cm">,</span> <span class="i">$MassDifference</span><span class="cm">,</span> <span class="i">$Charge</span><span class="cm">,</span> <span class="i">$StereoParity</span><span class="s">)</span><span class="sc">;</span>
|
|
747 734 <span class="s">}</span>
|
|
748 735
|
|
749 736 <span class="c"># Map MDL charge value used in SD and MOL files to internal charge used by MayaChemTools.</span>
|
|
750 737 <span class="c">#</span>
|
|
751 <a name="MDLChargeToInternalCharge-"></a> 738 <span class="k">sub </span><span class="m">MDLChargeToInternalCharge</span> <span class="s">{</span>
|
|
752 739 <span class="k">my</span><span class="s">(</span><span class="i">$MDLCharge</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
753 740 <span class="k">my</span><span class="s">(</span><span class="i">$InternalCharge</span><span class="s">)</span><span class="sc">;</span>
|
|
754 741
|
|
755 742 <span class="j">CHARGE:</span> <span class="s">{</span>
|
|
756 743 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLCharge</span> == <span class="n">0</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalCharge</span> = <span class="n">0</span><span class="sc">;</span> <span class="k">last</span> <span class="j">CHARGE</span><span class="sc">;</span><span class="s">}</span>
|
|
757 744 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLCharge</span> == <span class="n">1</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalCharge</span> = <span class="n">3</span><span class="sc">;</span> <span class="k">last</span> <span class="j">CHARGE</span><span class="sc">;</span><span class="s">}</span>
|
|
758 745 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLCharge</span> == <span class="n">2</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalCharge</span> = <span class="n">2</span><span class="sc">;</span> <span class="k">last</span> <span class="j">CHARGE</span><span class="sc">;</span><span class="s">}</span>
|
|
759 746 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLCharge</span> == <span class="n">3</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalCharge</span> = <span class="n">1</span><span class="sc">;</span> <span class="k">last</span> <span class="j">CHARGE</span><span class="sc">;</span><span class="s">}</span>
|
|
760 747 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLCharge</span> == <span class="n">5</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalCharge</span> = <span class="n">-1</span><span class="sc">;</span> <span class="k">last</span> <span class="j">CHARGE</span><span class="sc">;</span><span class="s">}</span>
|
|
761 748 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLCharge</span> == <span class="n">6</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalCharge</span> = <span class="n">-2</span><span class="sc">;</span> <span class="k">last</span> <span class="j">CHARGE</span><span class="sc">;</span><span class="s">}</span>
|
|
762 749 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLCharge</span> == <span class="n">7</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalCharge</span> = <span class="n">-3</span><span class="sc">;</span> <span class="k">last</span> <span class="j">CHARGE</span><span class="sc">;</span><span class="s">}</span>
|
|
763 750 <span class="c"># All other MDL charge values, including 4 corresponding to "doublet radical",</span>
|
|
764 751 <span class="c"># are assigned internal value of 0.</span>
|
|
765 752 <span class="i">$InternalCharge</span> = <span class="n">0</span><span class="sc">;</span>
|
|
766 753 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLCharge</span> != <span class="n">4</span><span class="s">)</span> <span class="s">{</span>
|
|
767 754 <span class="w">carp</span> <span class="q">"Warning: MDLChargeToInternalCharge: MDL charge value, $MDLCharge, is not supported: An internal charge value, 0, has been assigned..."</span><span class="sc">;</span>
|
|
768 755 <span class="s">}</span>
|
|
769 756 <span class="s">}</span>
|
|
770 757 <span class="k">return</span> <span class="i">$InternalCharge</span><span class="sc">;</span>
|
|
771 758 <span class="s">}</span>
|
|
772 759
|
|
773 760 <span class="c"># Map internal charge used by MayaChemTools to MDL charge value used in SD and MOL files.</span>
|
|
774 761 <span class="c">#</span>
|
|
775 <a name="InternalChargeToMDLCharge-"></a> 762 <span class="k">sub </span><span class="m">InternalChargeToMDLCharge</span> <span class="s">{</span>
|
|
776 763 <span class="k">my</span><span class="s">(</span><span class="i">$InternalCharge</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
777 764 <span class="k">my</span><span class="s">(</span><span class="i">$MDLCharge</span><span class="s">)</span><span class="sc">;</span>
|
|
778 765
|
|
779 766 <span class="j">CHARGE:</span> <span class="s">{</span>
|
|
780 767 <span class="k">if</span> <span class="s">(</span><span class="i">$InternalCharge</span> == <span class="n">3</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLCharge</span> = <span class="n">1</span><span class="sc">;</span> <span class="k">last</span> <span class="j">CHARGE</span><span class="sc">;</span><span class="s">}</span>
|
|
781 768 <span class="k">if</span> <span class="s">(</span><span class="i">$InternalCharge</span> == <span class="n">2</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLCharge</span> = <span class="n">2</span><span class="sc">;</span> <span class="k">last</span> <span class="j">CHARGE</span><span class="sc">;</span><span class="s">}</span>
|
|
782 769 <span class="k">if</span> <span class="s">(</span><span class="i">$InternalCharge</span> == <span class="n">1</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLCharge</span> = <span class="n">3</span><span class="sc">;</span> <span class="k">last</span> <span class="j">CHARGE</span><span class="sc">;</span><span class="s">}</span>
|
|
783 770 <span class="k">if</span> <span class="s">(</span><span class="i">$InternalCharge</span> == <span class="n">-1</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLCharge</span> = <span class="n">5</span><span class="sc">;</span> <span class="k">last</span> <span class="j">CHARGE</span><span class="sc">;</span><span class="s">}</span>
|
|
784 771 <span class="k">if</span> <span class="s">(</span><span class="i">$InternalCharge</span> == <span class="n">-2</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLCharge</span> = <span class="n">6</span><span class="sc">;</span> <span class="k">last</span> <span class="j">CHARGE</span><span class="sc">;</span><span class="s">}</span>
|
|
785 772 <span class="k">if</span> <span class="s">(</span><span class="i">$InternalCharge</span> == <span class="n">-3</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLCharge</span> = <span class="n">7</span><span class="sc">;</span> <span class="k">last</span> <span class="j">CHARGE</span><span class="sc">;</span><span class="s">}</span>
|
|
786 773 <span class="c"># All other MDL charge values, including 4 corresponding to "doublet radical",</span>
|
|
787 774 <span class="c"># are assigned internal value of 0.</span>
|
|
788 775 <span class="i">$MDLCharge</span> = <span class="n">0</span><span class="sc">;</span>
|
|
789 776 <span class="s">}</span>
|
|
790 777 <span class="k">return</span> <span class="i">$MDLCharge</span><span class="sc">;</span>
|
|
791 778 <span class="s">}</span>
|
|
792 779
|
|
793 780 <span class="c"># Ctab lines: Bond block</span>
|
|
794 781 <span class="c">#</span>
|
|
795 782 <span class="c"># Format: 111222tttsssxxxrrrccc</span>
|
|
796 783 <span class="c">#</span>
|
|
797 784 <span class="c"># 111: First atom number.</span>
|
|
798 785 <span class="c"># 222: Second atom number.</span>
|
|
799 786 <span class="c"># ttt: Bond type. 1 = Single, 2 = Double, 3 = Triple, 4 = Aromatic, 5 = Single or Double,</span>
|
|
800 787 <span class="c"># 6 = Single or Aromatic, 7 = Double or Aromatic, 8 = Any</span>
|
|
801 788 <span class="c"># sss: Bond stereo. Single bonds: 0 = not stereo, 1 = Up, 4 = Either, 6 = Down,</span>
|
|
802 789 <span class="c"># Double bonds: 0 = Use x-, y-, z-coords from atom block to determine cis or trans,</span>
|
|
803 790 <span class="c"># 3 = Cis or trans (either) double bond</span>
|
|
804 791 <span class="c"># xxx: Not used</span>
|
|
805 792 <span class="c"># rrr: Bond topology. 0 = Either, 1 = Ring, 2 = Chain</span>
|
|
806 793 <span class="c"># ccc: Reacting center status. 0 = unmarked, 1 = a center, -1 = not a center,</span>
|
|
807 794 <span class="c"># Additional: 2 = no change,4 = bond made/broken, 8 = bond order changes 12 = 4+8</span>
|
|
808 795 <span class="c"># (both made/broken and changes); 5 = (4 + 1), 9 = (8 + 1), and 13 = (12 + 1) are also possible</span>
|
|
809 796 <span class="c">#</span>
|
|
810 <a name="ParseCmpdBondLine-"></a> 797 <span class="k">sub </span><span class="m">ParseCmpdBondLine</span> <span class="s">{</span>
|
|
811 798 <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
812 799 <span class="k">my</span><span class="s">(</span><span class="i">$FirstAtomNum</span><span class="cm">,</span> <span class="i">$SecondAtomNum</span><span class="cm">,</span> <span class="i">$BondType</span><span class="cm">,</span> <span class="i">$BondStereo</span><span class="s">)</span><span class="sc">;</span>
|
|
813 800
|
|
814 801 <span class="s">(</span><span class="i">$FirstAtomNum</span><span class="cm">,</span> <span class="i">$SecondAtomNum</span><span class="cm">,</span> <span class="i">$BondType</span><span class="cm">,</span> <span class="i">$BondStereo</span><span class="s">)</span> = <span class="k">map</span> <span class="s">{</span><span class="q">s/ //g</span><span class="sc">;</span> <span class="i">$_</span><span class="s">}</span> <span class="k">unpack</span><span class="s">(</span><span class="q">"A3A3A3A3"</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span>
|
|
815 802 <span class="k">return</span> <span class="s">(</span><span class="i">$FirstAtomNum</span><span class="cm">,</span> <span class="i">$SecondAtomNum</span><span class="cm">,</span> <span class="i">$BondType</span><span class="cm">,</span> <span class="i">$BondStereo</span><span class="s">)</span><span class="sc">;</span>
|
|
816 803 <span class="s">}</span>
|
|
817 804
|
|
818 805 <span class="c"># Map MDL bond type value used in SD and MOL files to internal bond order and bond types</span>
|
|
819 806 <span class="c"># values used by MayaChemTools...</span>
|
|
820 807 <span class="c">#</span>
|
|
821 <a name="MDLBondTypeToInternalBondOrder-"></a> 808 <span class="k">sub </span><span class="m">MDLBondTypeToInternalBondOrder</span> <span class="s">{</span>
|
|
822 809 <span class="k">my</span><span class="s">(</span><span class="i">$MDLBondType</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
823 810 <span class="k">my</span><span class="s">(</span><span class="i">$InternalBondOrder</span><span class="cm">,</span> <span class="i">$InternalBondType</span><span class="s">)</span><span class="sc">;</span>
|
|
824 811
|
|
825 812 <span class="i">$InternalBondType</span> = <span class="q">''</span><span class="sc">;</span>
|
|
826 813
|
|
827 814 <span class="j">BONDTYPE:</span> <span class="s">{</span>
|
|
828 815 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLBondType</span> == <span class="n">1</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalBondOrder</span> = <span class="n">1</span><span class="sc">;</span> <span class="i">$InternalBondType</span> = <span class="q">'Single'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDTYPE</span><span class="sc">;</span><span class="s">}</span>
|
|
829 816 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLBondType</span> == <span class="n">2</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalBondOrder</span> = <span class="n">2</span><span class="sc">;</span> <span class="i">$InternalBondType</span> = <span class="q">'Double'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDTYPE</span><span class="sc">;</span><span class="s">}</span>
|
|
830 817 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLBondType</span> == <span class="n">3</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalBondOrder</span> = <span class="n">3</span><span class="sc">;</span> <span class="i">$InternalBondType</span> = <span class="q">'Triple'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDTYPE</span><span class="sc">;</span><span class="s">}</span>
|
|
831 818 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLBondType</span> == <span class="n">4</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalBondOrder</span> = <span class="n">1.5</span><span class="sc">;</span> <span class="i">$InternalBondType</span> = <span class="q">'Aromatic'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDTYPE</span><span class="sc">;</span><span class="s">}</span> <span class="c"># Aromatic</span>
|
|
832 819 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLBondType</span> == <span class="n">5</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalBondOrder</span> = <span class="n">1</span><span class="sc">;</span> <span class="i">$InternalBondType</span> = <span class="q">'SingleOrDouble'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDTYPE</span><span class="sc">;</span><span class="s">}</span> <span class="c"># Aromatic</span>
|
|
833 820 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLBondType</span> == <span class="n">6</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalBondOrder</span> = <span class="n">1</span><span class="sc">;</span> <span class="i">$InternalBondType</span> = <span class="q">'SingleOrAromatic'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDTYPE</span><span class="sc">;</span><span class="s">}</span> <span class="c"># Aromatic</span>
|
|
834 821 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLBondType</span> == <span class="n">7</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalBondOrder</span> = <span class="n">2</span><span class="sc">;</span> <span class="i">$InternalBondType</span> = <span class="q">'DoubleOrAromatic'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDTYPE</span><span class="sc">;</span><span class="s">}</span> <span class="c"># Aromatic</span>
|
|
835 822 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLBondType</span> == <span class="n">8</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalBondOrder</span> = <span class="n">1</span><span class="sc">;</span> <span class="i">$InternalBondType</span> = <span class="q">'Any'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDTYPE</span><span class="sc">;</span><span class="s">}</span> <span class="c"># Aromatic</span>
|
|
836 823 <span class="c">#</span>
|
|
837 824 <span class="c"># Although MDL aromatic bond values are used for query only and explicit Kekule bond order</span>
|
|
838 825 <span class="c"># values must be assigned, internal value of 1.5 is allowed to indicate aromatic bond orders.</span>
|
|
839 826 <span class="c">#</span>
|
|
840 827 <span class="c"># All other MDL bond type values - 5 = Single or Double, 6 = Single or Aromatic, 7 = Double or Aromatic,</span>
|
|
841 828 <span class="c"># 8 = Any - are also assigned appropriate internal value of 1: These are meant to be used for</span>
|
|
842 829 <span class="c"># structure queries by MDL products.</span>
|
|
843 830 <span class="c">#</span>
|
|
844 831 <span class="i">$InternalBondOrder</span> = <span class="n">1</span><span class="sc">;</span>
|
|
845 832 <span class="i">$InternalBondType</span> = <span class="q">'Single'</span><span class="sc">;</span>
|
|
846 833
|
|
847 834 <span class="w">carp</span> <span class="q">"Warning: MDLBondTypeToInternalBondOrder: MDL bond type value, $MDLBondType, is not supported: An internal bond order value, 0, has been assigned..."</span><span class="sc">;</span>
|
|
848 835 <span class="s">}</span>
|
|
849 836 <span class="k">return</span> <span class="s">(</span><span class="i">$InternalBondOrder</span><span class="cm">,</span> <span class="i">$InternalBondType</span><span class="s">)</span><span class="sc">;</span>
|
|
850 837 <span class="s">}</span>
|
|
851 838
|
|
852 839 <span class="c"># Map internal bond order and bond type values used by MayaChemTools to MDL bond type value used</span>
|
|
853 840 <span class="c"># in SD and MOL files...</span>
|
|
854 841 <span class="c">#</span>
|
|
855 <a name="InternalBondOrderToMDLBondType-"></a> 842 <span class="k">sub </span><span class="m">InternalBondOrderToMDLBondType</span> <span class="s">{</span>
|
|
856 843 <span class="k">my</span><span class="s">(</span><span class="i">$InternalBondOrder</span><span class="cm">,</span> <span class="i">$InternalBondType</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
857 844 <span class="k">my</span><span class="s">(</span><span class="i">$MDLBondType</span><span class="s">)</span><span class="sc">;</span>
|
|
858 845
|
|
859 846 <span class="j">BONDTYPE:</span> <span class="s">{</span>
|
|
860 847 <span class="k">if</span> <span class="s">(</span><span class="i">$InternalBondOrder</span> == <span class="n">1</span><span class="s">)</span> <span class="s">{</span>
|
|
861 848 <span class="k">if</span> <span class="s">(</span><span class="i">$InternalBondType</span> =~ <span class="q">/^SingleOrDouble$/i</span><span class="s">)</span> <span class="s">{</span>
|
|
862 849 <span class="i">$MDLBondType</span> = <span class="n">5</span><span class="sc">;</span>
|
|
863 850 <span class="s">}</span>
|
|
864 851 <span class="k">elsif</span> <span class="s">(</span><span class="i">$InternalBondType</span> =~ <span class="q">/^SingleOrAromatic$/i</span><span class="s">)</span> <span class="s">{</span>
|
|
865 852 <span class="i">$MDLBondType</span> = <span class="n">6</span><span class="sc">;</span>
|
|
866 853 <span class="s">}</span>
|
|
867 854 <span class="k">elsif</span> <span class="s">(</span><span class="i">$InternalBondType</span> =~ <span class="q">/^Any$/i</span><span class="s">)</span> <span class="s">{</span>
|
|
868 855 <span class="i">$MDLBondType</span> = <span class="n">8</span><span class="sc">;</span>
|
|
869 856 <span class="s">}</span>
|
|
870 857 <span class="k">else</span> <span class="s">{</span>
|
|
871 858 <span class="i">$MDLBondType</span> = <span class="n">1</span><span class="sc">;</span>
|
|
872 859 <span class="s">}</span>
|
|
873 860 <span class="i">$MDLBondType</span> = <span class="n">1</span><span class="sc">;</span>
|
|
874 861 <span class="k">last</span> <span class="j">BONDTYPE</span><span class="sc">;</span>
|
|
875 862 <span class="s">}</span>
|
|
876 863 <span class="k">if</span> <span class="s">(</span><span class="i">$InternalBondOrder</span> == <span class="n">2</span><span class="s">)</span> <span class="s">{</span>
|
|
877 864 <span class="k">if</span> <span class="s">(</span><span class="i">$InternalBondType</span> =~ <span class="q">/^DoubleOrAromatic$/i</span><span class="s">)</span> <span class="s">{</span>
|
|
878 865 <span class="i">$MDLBondType</span> = <span class="n">7</span><span class="sc">;</span>
|
|
879 866 <span class="s">}</span>
|
|
880 867 <span class="k">else</span> <span class="s">{</span>
|
|
881 868 <span class="i">$MDLBondType</span> = <span class="n">2</span><span class="sc">;</span>
|
|
882 869 <span class="s">}</span>
|
|
883 870 <span class="k">last</span> <span class="j">BONDTYPE</span><span class="sc">;</span>
|
|
884 871 <span class="s">}</span>
|
|
885 872 <span class="k">if</span> <span class="s">(</span><span class="i">$InternalBondOrder</span> == <span class="n">3</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLBondType</span> = <span class="n">3</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDTYPE</span><span class="sc">;</span><span class="s">}</span>
|
|
886 873 <span class="k">if</span> <span class="s">(</span><span class="i">$InternalBondOrder</span> == <span class="n">1.5</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLBondType</span> = <span class="n">4</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDTYPE</span><span class="sc">;</span><span class="s">}</span>
|
|
887 874 <span class="k">if</span> <span class="s">(</span><span class="i">$InternalBondType</span> =~ <span class="q">/^Any$/i</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLBondType</span> = <span class="n">8</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDTYPE</span><span class="sc">;</span><span class="s">}</span>
|
|
888 875
|
|
889 876 <span class="i">$MDLBondType</span> = <span class="n">1</span><span class="sc">;</span>
|
|
890 877
|
|
891 878 <span class="w">carp</span> <span class="q">"Warning: InternalBondOrderToMDLBondType: Internal bond order and type values, $InternalBondOrder and $InternalBondType, don't match any valid MDL bond type: MDL bond type value, 1, has been assigned..."</span><span class="sc">;</span>
|
|
892 879 <span class="s">}</span>
|
|
893 880 <span class="k">return</span> <span class="i">$MDLBondType</span><span class="sc">;</span>
|
|
894 881 <span class="s">}</span>
|
|
895 882
|
|
896 883 <span class="c"># Third line: Comments - A blank line is also allowed.</span>
|
|
897 <a name="ParseCmpdCommentsLine-"></a> 884 <span class="k">sub </span><span class="m">ParseCmpdCommentsLine</span> <span class="s">{</span>
|
|
898 885 <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
899 886 <span class="k">my</span><span class="s">(</span><span class="i">$Comments</span><span class="s">)</span><span class="sc">;</span>
|
|
900 887
|
|
901 888 <span class="i">$Comments</span> = <span class="k">unpack</span><span class="s">(</span><span class="q">"A80"</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span>
|
|
902 889
|
|
903 890 <span class="k">return</span> <span class="s">(</span><span class="i">$Comments</span><span class="s">)</span><span class="sc">;</span>
|
|
904 891 <span class="s">}</span>
|
|
905 892
|
|
906 893 <span class="c"># Map MDL bond stereo value used in SD and MOL files to internal bond stereochemistry values used by MayaChemTools...</span>
|
|
907 894 <span class="c">#</span>
|
|
908 <a name="MDLBondStereoToInternalBondStereochemistry-"></a> 895 <span class="k">sub </span><span class="m">MDLBondStereoToInternalBondStereochemistry</span> <span class="s">{</span>
|
|
909 896 <span class="k">my</span><span class="s">(</span><span class="i">$MDLBondStereo</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
910 897 <span class="k">my</span><span class="s">(</span><span class="i">$InternalBondStereo</span><span class="s">)</span><span class="sc">;</span>
|
|
911 898
|
|
912 899 <span class="i">$InternalBondStereo</span> = <span class="q">''</span><span class="sc">;</span>
|
|
913 900
|
|
914 901 <span class="j">BONDSTEREO:</span> <span class="s">{</span>
|
|
915 902 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLBondStereo</span> == <span class="n">1</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalBondStereo</span> = <span class="q">'Up'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDSTEREO</span><span class="sc">;</span><span class="s">}</span>
|
|
916 903 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLBondStereo</span> == <span class="n">4</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalBondStereo</span> = <span class="q">'UpOrDown'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDSTEREO</span><span class="sc">;</span><span class="s">}</span>
|
|
917 904 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLBondStereo</span> == <span class="n">6</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalBondStereo</span> = <span class="q">'Down'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDSTEREO</span><span class="sc">;</span><span class="s">}</span>
|
|
918 905 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLBondStereo</span> == <span class="n">3</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalBondStereo</span> = <span class="q">'CisOrTrans'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDSTEREO</span><span class="sc">;</span><span class="s">}</span>
|
|
919 906 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLBondStereo</span> == <span class="n">0</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalBondStereo</span> = <span class="q">'None'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDSTEREO</span><span class="sc">;</span><span class="s">}</span>
|
|
920 907
|
|
921 908 <span class="i">$InternalBondStereo</span> = <span class="q">''</span><span class="sc">;</span>
|
|
922 909 <span class="w">carp</span> <span class="q">"Warning: MDLBondStereoToInternalBondType: MDL bond stereo value, $MDLBondStereo, is not supported: It has been ignored and bond order would be used to determine bond type..."</span><span class="sc">;</span>
|
|
923 910 <span class="s">}</span>
|
|
924 911 <span class="k">return</span> <span class="i">$InternalBondStereo</span><span class="sc">;</span>
|
|
925 912 <span class="s">}</span>
|
|
926 913
|
|
927 914 <span class="c"># Map internal bond stereochemistry values used by MayaChemTools to MDL bond stereo value used in SD and MOL files...</span>
|
|
928 915 <span class="c">#</span>
|
|
929 <a name="InternalBondStereochemistryToMDLBondStereo-"></a> 916 <span class="k">sub </span><span class="m">InternalBondStereochemistryToMDLBondStereo</span> <span class="s">{</span>
|
|
930 917 <span class="k">my</span><span class="s">(</span><span class="i">$InternalBondStereo</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
931 918 <span class="k">my</span><span class="s">(</span><span class="i">$MDLBondStereo</span><span class="s">)</span><span class="sc">;</span>
|
|
932 919
|
|
933 920 <span class="i">$MDLBondStereo</span> = <span class="n">0</span><span class="sc">;</span>
|
|
934 921
|
|
935 922 <span class="j">BONDSTEREO:</span> <span class="s">{</span>
|
|
936 923 <span class="k">if</span> <span class="s">(</span><span class="i">$InternalBondStereo</span> =~ <span class="q">/^Up$/i</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLBondStereo</span> = <span class="n">1</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDSTEREO</span><span class="sc">;</span><span class="s">}</span>
|
|
937 924 <span class="k">if</span> <span class="s">(</span><span class="i">$InternalBondStereo</span> =~ <span class="q">/^UpOrDown$/i</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLBondStereo</span> = <span class="n">4</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDSTEREO</span><span class="sc">;</span><span class="s">}</span>
|
|
938 925 <span class="k">if</span> <span class="s">(</span><span class="i">$InternalBondStereo</span> =~ <span class="q">/^Down$/</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLBondStereo</span> = <span class="n">6</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDSTEREO</span><span class="sc">;</span><span class="s">}</span>
|
|
939 926 <span class="k">if</span> <span class="s">(</span><span class="i">$InternalBondStereo</span> =~ <span class="q">/^CisOrTrans$/</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLBondStereo</span> = <span class="n">3</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDSTEREO</span><span class="sc">;</span><span class="s">}</span>
|
|
940 927
|
|
941 928 <span class="i">$MDLBondStereo</span> = <span class="n">0</span><span class="sc">;</span>
|
|
942 929 <span class="s">}</span>
|
|
943 930 <span class="k">return</span> <span class="i">$MDLBondStereo</span><span class="sc">;</span>
|
|
944 931 <span class="s">}</span>
|
|
945 932
|
|
946 933 <span class="c"># Fourth line: Counts</span>
|
|
947 934 <span class="c">#</span>
|
|
948 935 <span class="c"># Format: aaabbblllfffcccsssxxxrrrpppiiimmmvvvvvv</span>
|
|
949 936 <span class="c">#</span>
|
|
950 937 <span class="c"># aaa: number of atoms; bbb: number of bonds; lll: number of atom lists; fff: (obsolete)</span>
|
|
951 938 <span class="c"># ccc: chiral flag: 0=not chiral, 1=chiral; sss: number of stext entries; xxx,rrr,ppp,iii:</span>
|
|
952 939 <span class="c"># (obsolete); mmm: number of lines of additional properties, including the M END line, No</span>
|
|
953 940 <span class="c"># longer supported, default is set to 999; vvvvvv: version</span>
|
|
954 941
|
|
955 <a name="ParseCmpdCountsLine-"></a> 942 <span class="k">sub </span><span class="m">ParseCmpdCountsLine</span> <span class="s">{</span>
|
|
956 943 <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
957 944 <span class="k">my</span><span class="s">(</span><span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="cm">,</span> <span class="i">$ChiralFlag</span><span class="cm">,</span> <span class="i">$PropertyCount</span><span class="cm">,</span> <span class="i">$Version</span><span class="s">)</span><span class="sc">;</span>
|
|
958 945
|
|
959 946 <span class="k">if</span> <span class="s">(</span><span class="k">length</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span> >= <span class="n">39</span><span class="s">)</span> <span class="s">{</span>
|
|
960 947 <span class="s">(</span><span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="cm">,</span> <span class="i">$ChiralFlag</span><span class="cm">,</span> <span class="i">$PropertyCount</span><span class="cm">,</span> <span class="i">$Version</span><span class="s">)</span> = <span class="k">unpack</span><span class="s">(</span><span class="q">"A3A3x3x3A3x3x3x3x3x3A3A6"</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span>
|
|
961 948 <span class="s">}</span>
|
|
962 949 <span class="k">elsif</span> <span class="s">(</span><span class="k">length</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span> >= <span class="n">15</span><span class="s">)</span> <span class="s">{</span>
|
|
963 950 <span class="s">(</span><span class="i">$PropertyCount</span><span class="cm">,</span> <span class="i">$Version</span><span class="s">)</span> = <span class="s">(</span><span class="q">"999"</span><span class="cm">,</span> <span class="q">"v2000"</span><span class="s">)</span><span class="sc">;</span>
|
|
964 951 <span class="s">(</span><span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="cm">,</span> <span class="i">$ChiralFlag</span><span class="s">)</span> = <span class="k">unpack</span><span class="s">(</span><span class="q">"A3A3x3x3A3"</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span>
|
|
965 952 <span class="s">}</span>
|
|
966 953 <span class="k">else</span> <span class="s">{</span>
|
|
967 954 <span class="s">(</span><span class="i">$ChiralFlag</span><span class="cm">,</span> <span class="i">$PropertyCount</span><span class="cm">,</span> <span class="i">$Version</span><span class="s">)</span> = <span class="s">(</span><span class="q">"0"</span><span class="cm">,</span> <span class="q">"999"</span><span class="cm">,</span> <span class="q">"v2000"</span><span class="s">)</span><span class="sc">;</span>
|
|
968 955 <span class="s">(</span><span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="s">)</span> = <span class="k">unpack</span><span class="s">(</span><span class="q">"A3A3"</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span>
|
|
969 956 <span class="s">}</span>
|
|
970 957
|
|
971 958 <span class="k">if</span> <span class="s">(</span><span class="i">$Version</span> =~ <span class="q">/V3000/i</span><span class="s">)</span> <span class="s">{</span>
|
|
972 959 <span class="c"># Current version of MayaChemTools modules and classes for processing MDL MOL and SD don't support</span>
|
|
973 960 <span class="c"># V3000. So instead of relying on callers, just exit with an error to disable any processing of V3000</span>
|
|
974 961 <span class="c"># format.</span>
|
|
975 962 <span class="w">croak</span> <span class="q">"Error: SDFileUtil::ParseCmpdCountsLine: The Extended Connection Table (V3000) format in MDL MOL and SD files is not supported by the current release of MayaChemTools..."</span><span class="sc">;</span>
|
|
976 963 <span class="s">}</span>
|
|
977 964
|
|
978 965 <span class="k">return</span> <span class="s">(</span><span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="cm">,</span> <span class="i">$ChiralFlag</span><span class="cm">,</span> <span class="i">$PropertyCount</span><span class="cm">,</span> <span class="i">$Version</span><span class="s">)</span><span class="sc">;</span>
|
|
979 966 <span class="s">}</span>
|
|
980 967
|
|
981 968 <span class="c"># Second line: Misc info</span>
|
|
982 969 <span class="c">#</span>
|
|
983 970 <span class="c"># Format: IIPPPPPPPPMMDDYYHHmmddSSssssssssssEEEEEEEEEEEERRRRRR</span>
|
|
984 971 <span class="c"># A2A8 A10 A2I2A10 A12 A6</span>
|
|
985 972 <span class="c"># User's first and last initials (I), program name (P), date/time (M/D/Y,H:m),</span>
|
|
986 973 <span class="c"># dimensional codes - 2D or 3D (d),scaling factors (S, s), energy (E) if modeling program input,</span>
|
|
987 974 <span class="c"># internal registry number (R) if input through MDL form. A blank line is also allowed.</span>
|
|
988 <a name="ParseCmpdMiscInfoLine-"></a> 975 <span class="k">sub </span><span class="m">ParseCmpdMiscInfoLine</span> <span class="s">{</span>
|
|
989 976 <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
990 977 <span class="k">my</span><span class="s">(</span><span class="i">$UserInitial</span><span class="cm">,</span> <span class="i">$ProgramName</span><span class="cm">,</span> <span class="i">$Date</span><span class="cm">,</span> <span class="i">$Code</span><span class="cm">,</span> <span class="i">$ScalingFactor1</span><span class="cm">,</span> <span class="i">$ScalingFactor2</span><span class="cm">,</span> <span class="i">$Energy</span><span class="cm">,</span> <span class="i">$RegistryNum</span><span class="s">)</span><span class="sc">;</span>
|
|
991 978
|
|
992 979 <span class="s">(</span><span class="i">$UserInitial</span><span class="cm">,</span> <span class="i">$ProgramName</span><span class="cm">,</span> <span class="i">$Date</span><span class="cm">,</span> <span class="i">$Code</span><span class="cm">,</span> <span class="i">$ScalingFactor1</span><span class="cm">,</span> <span class="i">$ScalingFactor2</span><span class="cm">,</span> <span class="i">$Energy</span><span class="cm">,</span> <span class="i">$RegistryNum</span><span class="s">)</span> = <span class="k">unpack</span><span class="s">(</span><span class="q">"A2A8A10A2A2A10A12A6"</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span>
|
|
993 980 <span class="k">return</span> <span class="s">(</span><span class="i">$UserInitial</span><span class="cm">,</span> <span class="i">$ProgramName</span><span class="cm">,</span> <span class="i">$Date</span><span class="cm">,</span> <span class="i">$Code</span><span class="cm">,</span> <span class="i">$ScalingFactor1</span><span class="cm">,</span> <span class="i">$ScalingFactor2</span><span class="cm">,</span> <span class="i">$Energy</span><span class="cm">,</span> <span class="i">$RegistryNum</span><span class="s">)</span><span class="sc">;</span>
|
|
994 981 <span class="s">}</span>
|
|
995 982
|
|
996 983 <span class="c"># First line: Molecule name. This line is unformatted, but like all other lines in a</span>
|
|
997 984 <span class="c"># molfile may not extend beyond column 80. A blank line is also allowed.</span>
|
|
998 <a name="ParseCmpdMolNameLine-"></a> 985 <span class="k">sub </span><span class="m">ParseCmpdMolNameLine</span> <span class="s">{</span>
|
|
999 986 <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
1000 987 <span class="k">my</span><span class="s">(</span><span class="i">$MolName</span><span class="s">)</span><span class="sc">;</span>
|
|
1001 988
|
|
1002 989 <span class="i">$MolName</span> = <span class="k">unpack</span><span class="s">(</span><span class="q">"A80"</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span>
|
|
1003 990
|
|
1004 991 <span class="k">return</span> <span class="s">(</span><span class="i">$MolName</span><span class="s">)</span><span class="sc">;</span>
|
|
1005 992 <span class="s">}</span>
|
|
1006 993
|
|
1007 994 <span class="c"># Parse atom alias property line in CTAB generic properties block.</span>
|
|
1008 995 <span class="c">#</span>
|
|
1009 996 <span class="c"># Atom alias property line format:</span>
|
|
1010 997 <span class="c">#</span>
|
|
1011 998 <span class="c"># A aaa</span>
|
|
1012 999 <span class="c"># x...</span>
|
|
1013 1000 <span class="c">#</span>
|
|
1014 1001 <span class="c"># aaa: Atom number</span>
|
|
1015 1002 <span class="c"># x: Atom alias in next line</span>
|
|
1016 1003 <span class="c">#</span>
|
|
1017 <a name="ParseCmpdAtomAliasPropertyLine-"></a>1004 <span class="k">sub </span><span class="m">ParseCmpdAtomAliasPropertyLine</span> <span class="s">{</span>
|
|
1018 1005 <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="cm">,</span> <span class="i">$NextLine</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
1019 1006 <span class="k">my</span><span class="s">(</span><span class="i">$Label</span><span class="cm">,</span> <span class="i">$AtomNumber</span><span class="cm">,</span> <span class="i">$AtomAlias</span><span class="s">)</span><span class="sc">;</span>
|
|
1020 1007
|
|
1021 1008 <span class="s">(</span><span class="i">$Label</span><span class="cm">,</span> <span class="i">$AtomNumber</span><span class="s">)</span> = <span class="k">split</span><span class="s">(</span><span class="q">' '</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span>
|
|
1022 1009 <span class="i">$AtomAlias</span> = <span class="i">$NextLine</span><span class="sc">;</span>
|
|
1023 1010
|
|
1024 1011 <span class="k">if</span> <span class="s">(</span>!<span class="i">$AtomAlias</span><span class="s">)</span> <span class="s">{</span>
|
|
1025 1012 <span class="w">carp</span> <span class="q">"Warning: _ParseCmpdAtomAliasPropertyLine: No atom alias value specified on the line following atom alias property line..."</span><span class="sc">;</span>
|
|
1026 1013 <span class="s">}</span>
|
|
1027 1014
|
|
1028 1015 <span class="k">return</span> <span class="s">(</span><span class="i">$AtomNumber</span><span class="cm">,</span> <span class="i">$AtomAlias</span><span class="s">)</span><span class="sc">;</span>
|
|
1029 1016 <span class="s">}</span>
|
|
1030 1017
|
|
1031 1018 <span class="c"># Parse charge property line in CTAB generic properties block.</span>
|
|
1032 1019 <span class="c">#</span>
|
|
1033 1020 <span class="c"># Charge property line format:</span>
|
|
1034 1021 <span class="c">#</span>
|
|
1035 1022 <span class="c"># M CHGnn8 aaa vvv ...</span>
|
|
1036 1023 <span class="c">#</span>
|
|
1037 1024 <span class="c"># nn8: Number of value pairs. Maximum of 8 pairs allowed.</span>
|
|
1038 1025 <span class="c"># aaa: Atom number</span>
|
|
1039 1026 <span class="c"># vvv: -15 to +15. Default of 0 = uncharged atom. When present, this property supersedes</span>
|
|
1040 1027 <span class="c"># all charge and radical values in the atom block, forcing a 0 charge on all atoms not</span>
|
|
1041 1028 <span class="c"># listed in an M CHG or M RAD line.</span>
|
|
1042 1029 <span class="c">#</span>
|
|
1043 <a name="ParseCmpdChargePropertyLine-"></a>1030 <span class="k">sub </span><span class="m">ParseCmpdChargePropertyLine</span> <span class="s">{</span>
|
|
1044 1031 <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
1045 1032
|
|
1046 1033 <span class="k">return</span> <span class="i">_ParseCmpdGenericPropertyLine</span><span class="s">(</span><span class="q">'Charge'</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span>
|
|
1047 1034 <span class="s">}</span>
|
|
1048 1035
|
|
1049 1036
|
|
1050 1037 <span class="c"># Parse isotope property line in CTAB generic properties block.</span>
|
|
1051 1038 <span class="c">#</span>
|
|
1052 1039 <span class="c"># Isoptope property line format:</span>
|
|
1053 1040 <span class="c">#</span>
|
|
1054 1041 <span class="c"># M ISOnn8 aaa vvv ...</span>
|
|
1055 1042 <span class="c">#</span>
|
|
1056 1043 <span class="c"># nn8: Number of value paris. Maximum of 8 pairs allowed.</span>
|
|
1057 1044 <span class="c"># aaa: Atom number</span>
|
|
1058 1045 <span class="c"># vvv: Absolute mass of the atom isotope as a positive integer. When present, this property</span>
|
|
1059 1046 <span class="c"># supersedes all isotope values in the atom block. Default (no entry) means natural</span>
|
|
1060 1047 <span class="c"># abundance. The difference between this absolute mass value and the natural</span>
|
|
1061 1048 <span class="c"># abundance value specified in the PTABLE.DAT file must be within the range of -18</span>
|
|
1062 1049 <span class="c"># to +12</span>
|
|
1063 1050 <span class="c">#</span>
|
|
1064 1051 <span class="c"># Notes:</span>
|
|
1065 1052 <span class="c"># . Values correspond to mass numbers...</span>
|
|
1066 1053 <span class="c">#</span>
|
|
1067 <a name="ParseCmpdIsotopePropertyLine-"></a>1054 <span class="k">sub </span><span class="m">ParseCmpdIsotopePropertyLine</span> <span class="s">{</span>
|
|
1068 1055 <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
1069 1056
|
|
1070 1057 <span class="k">return</span> <span class="i">_ParseCmpdGenericPropertyLine</span><span class="s">(</span><span class="q">'Isotope'</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span>
|
|
1071 1058 <span class="s">}</span>
|
|
1072 1059
|
|
1073 1060 <span class="c"># Parse radical property line in CTAB generic properties block.</span>
|
|
1074 1061 <span class="c">#</span>
|
|
1075 1062 <span class="c"># Radical property line format:</span>
|
|
1076 1063 <span class="c">#</span>
|
|
1077 1064 <span class="c"># M RADnn8 aaa vvv ...</span>
|
|
1078 1065 <span class="c">#</span>
|
|
1079 1066 <span class="c"># nn8: Number of value paris. Maximum of 8 pairs allowed.</span>
|
|
1080 1067 <span class="c"># aaa: Atom number</span>
|
|
1081 1068 <span class="c"># vvv: Default of 0 = no radical, 1 = singlet, 2 = doublet, 3 = triplet . When</span>
|
|
1082 1069 <span class="c"># present, this property supersedes all charge and radical values in the atom block,</span>
|
|
1083 1070 <span class="c"># forcing a 0 (zero) charge and radical on all atoms not listed in an M CHG or</span>
|
|
1084 1071 <span class="c"># M RAD line.</span>
|
|
1085 1072 <span class="c">#</span>
|
|
1086 <a name="ParseCmpdRadicalPropertyLine-"></a>1073 <span class="k">sub </span><span class="m">ParseCmpdRadicalPropertyLine</span> <span class="s">{</span>
|
|
1087 1074 <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
1088 1075
|
|
1089 1076 <span class="k">return</span> <span class="i">_ParseCmpdGenericPropertyLine</span><span class="s">(</span><span class="q">'Radical'</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span>
|
|
1090 1077 <span class="s">}</span>
|
|
1091 1078
|
|
1092 1079 <span class="c"># Map MDL radical stereo value used in SD and MOL files to internal spin multiplicity values used by MayaChemTools...</span>
|
|
1093 1080 <span class="c">#</span>
|
|
1094 <a name="MDLRadicalToInternalSpinMultiplicity-"></a>1081 <span class="k">sub </span><span class="m">MDLRadicalToInternalSpinMultiplicity</span> <span class="s">{</span>
|
|
1095 1082 <span class="k">my</span><span class="s">(</span><span class="i">$MDLRadical</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
1096 1083 <span class="k">my</span><span class="s">(</span><span class="i">$InternalSpinMultiplicity</span><span class="s">)</span><span class="sc">;</span>
|
|
1097 1084
|
|
1098 1085 <span class="i">$InternalSpinMultiplicity</span> = <span class="q">''</span><span class="sc">;</span>
|
|
1099 1086
|
|
1100 1087 <span class="j">SPINMULTIPLICITY:</span> <span class="s">{</span>
|
|
1101 1088 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLRadical</span> == <span class="n">0</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalSpinMultiplicity</span> = <span class="n">0</span><span class="sc">;</span> <span class="k">last</span> <span class="j">SPINMULTIPLICITY</span><span class="sc">;</span><span class="s">}</span>
|
|
1102 1089 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLRadical</span> == <span class="n">1</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalSpinMultiplicity</span> = <span class="n">1</span><span class="sc">;</span> <span class="k">last</span> <span class="j">SPINMULTIPLICITY</span><span class="sc">;</span><span class="s">}</span>
|
|
1103 1090 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLRadical</span> == <span class="n">2</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalSpinMultiplicity</span> = <span class="n">2</span><span class="sc">;</span> <span class="k">last</span> <span class="j">SPINMULTIPLICITY</span><span class="sc">;</span><span class="s">}</span>
|
|
1104 1091 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLRadical</span> == <span class="n">3</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalSpinMultiplicity</span> = <span class="n">3</span><span class="sc">;</span> <span class="k">last</span> <span class="j">SPINMULTIPLICITY</span><span class="sc">;</span><span class="s">}</span>
|
|
1105 1092 <span class="i">$InternalSpinMultiplicity</span> = <span class="q">''</span><span class="sc">;</span>
|
|
1106 1093 <span class="w">carp</span> <span class="q">"Warning: MDLRadicalToInternalSpinMultiplicity: MDL radical value, $MDLRadical, specifed on line M RAD is not supported..."</span><span class="sc">;</span>
|
|
1107 1094 <span class="s">}</span>
|
|
1108 1095 <span class="k">return</span> <span class="i">$InternalSpinMultiplicity</span><span class="sc">;</span>
|
|
1109 1096 <span class="s">}</span>
|
|
1110 1097
|
|
1111 1098 <span class="c"># Map internal spin multiplicity values used by MayaChemTools to MDL radical stereo value used in SD and MOL files...</span>
|
|
1112 1099 <span class="c">#</span>
|
|
1113 <a name="InternalSpinMultiplicityToMDLRadical-"></a>1100 <span class="k">sub </span><span class="m">InternalSpinMultiplicityToMDLRadical</span> <span class="s">{</span>
|
|
1114 1101 <span class="k">my</span><span class="s">(</span><span class="i">$InternalSpinMultiplicity</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
1115 1102 <span class="k">my</span><span class="s">(</span><span class="i">$MDLRadical</span><span class="s">)</span><span class="sc">;</span>
|
|
1116 1103
|
|
1117 1104 <span class="i">$MDLRadical</span> = <span class="n">0</span><span class="sc">;</span>
|
|
1118 1105
|
|
1119 1106 <span class="j">SPINMULTIPLICITY:</span> <span class="s">{</span>
|
|
1120 1107 <span class="k">if</span> <span class="s">(</span><span class="i">$InternalSpinMultiplicity</span> == <span class="n">1</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLRadical</span> = <span class="n">1</span><span class="sc">;</span> <span class="k">last</span> <span class="j">SPINMULTIPLICITY</span><span class="sc">;</span><span class="s">}</span>
|
|
1121 1108 <span class="k">if</span> <span class="s">(</span><span class="i">$InternalSpinMultiplicity</span> == <span class="n">2</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLRadical</span> = <span class="n">2</span><span class="sc">;</span> <span class="k">last</span> <span class="j">SPINMULTIPLICITY</span><span class="sc">;</span><span class="s">}</span>
|
|
1122 1109 <span class="k">if</span> <span class="s">(</span><span class="i">$InternalSpinMultiplicity</span> == <span class="n">3</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLRadical</span> = <span class="n">3</span><span class="sc">;</span> <span class="k">last</span> <span class="j">SPINMULTIPLICITY</span><span class="sc">;</span><span class="s">}</span>
|
|
1123 1110 <span class="i">$MDLRadical</span> = <span class="n">0</span><span class="sc">;</span>
|
|
1124 1111 <span class="s">}</span>
|
|
1125 1112 <span class="k">return</span> <span class="i">$MDLRadical</span><span class="sc">;</span>
|
|
1126 1113 <span class="s">}</span>
|
|
1127 1114
|
|
1128 1115 <span class="c"># Process generic CTAB property line...</span>
|
|
1129 <a name="_ParseCmpdGenericPropertyLine-"></a>1116 <span class="k">sub </span><span class="m">_ParseCmpdGenericPropertyLine</span> <span class="s">{</span>
|
|
1130 1117 <span class="k">my</span><span class="s">(</span><span class="i">$PropertyName</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
1131 1118
|
|
1132 1119 <span class="k">my</span><span class="s">(</span><span class="i">$Label</span><span class="cm">,</span> <span class="i">$PropertyLabel</span><span class="cm">,</span> <span class="i">$ValuesCount</span><span class="cm">,</span> <span class="i">$ValuePairsCount</span><span class="cm">,</span> <span class="i">@ValuePairs</span><span class="s">)</span><span class="sc">;</span>
|
|
1133 1120
|
|
1134 1121 <span class="i">@ValuePairs</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
1135 1122 <span class="s">(</span><span class="i">$Label</span><span class="cm">,</span> <span class="i">$PropertyLabel</span><span class="cm">,</span> <span class="i">$ValuesCount</span><span class="cm">,</span> <span class="i">@ValuePairs</span><span class="s">)</span> = <span class="k">split</span><span class="s">(</span><span class="q">' '</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span>
|
|
1136 1123 <span class="i">$ValuePairsCount</span> = <span class="s">(</span><span class="k">scalar</span> <span class="i">@ValuePairs</span><span class="s">)</span>/<span class="n">2</span><span class="sc">;</span>
|
|
1137 1124 <span class="k">if</span> <span class="s">(</span><span class="i">$ValuesCount</span> != <span class="i">$ValuePairsCount</span><span class="s">)</span> <span class="s">{</span>
|
|
1138 1125 <span class="w">carp</span> <span class="q">"Warning: _ParseCmpdGenericPropertyLine: Number of atom number and $PropertyName value paris specified on $Label $PropertyLabel property line, $ValuePairsCount, does not match expected value of $ValuesCount..."</span><span class="sc">;</span>
|
|
1139 1126 <span class="s">}</span>
|
|
1140 1127
|
|
1141 1128 <span class="k">return</span> <span class="s">(</span><span class="i">@ValuePairs</span><span class="s">)</span><span class="sc">;</span>
|
|
1142 1129 <span class="s">}</span>
|
|
1143 1130
|
|
1144 1131 <span class="c"># Generic CTAB property lines for charge, istope and radical properties...</span>
|
|
1145 1132 <span class="c">#</span>
|
|
1146 <a name="_GenerateCmpdGenericPropertyLines-"></a>1133 <span class="k">sub </span><span class="m">_GenerateCmpdGenericPropertyLines</span> <span class="s">{</span>
|
|
1147 1134 <span class="k">my</span><span class="s">(</span><span class="i">$PropertyName</span><span class="cm">,</span> <span class="i">$PropertyValuePairsRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
1148 1135 <span class="k">my</span><span class="s">(</span><span class="i">$Index</span><span class="cm">,</span> <span class="i">$PropertyLabel</span><span class="cm">,</span> <span class="i">$Line</span><span class="cm">,</span> <span class="i">$PropertyCount</span><span class="cm">,</span> <span class="i">$AtomNum</span><span class="cm">,</span> <span class="i">$PropertyValue</span><span class="cm">,</span> <span class="i">@PropertyLines</span><span class="s">)</span><span class="sc">;</span>
|
|
1149 1136
|
|
1150 1137 <span class="i">@PropertyLines</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
1151 1138 <span class="j">NAME:</span> <span class="s">{</span>
|
|
1152 1139 <span class="k">if</span> <span class="s">(</span><span class="i">$PropertyName</span> =~ <span class="q">/^Charge$/i</span><span class="s">)</span> <span class="s">{</span> <span class="i">$PropertyLabel</span> = <span class="q">"M CHG"</span><span class="sc">;</span> <span class="k">last</span> <span class="j">NAME</span><span class="sc">;</span> <span class="s">}</span>
|
|
1153 1140 <span class="k">if</span> <span class="s">(</span><span class="i">$PropertyName</span> =~ <span class="q">/^Isotope$/i</span><span class="s">)</span> <span class="s">{</span> <span class="i">$PropertyLabel</span> = <span class="q">"M ISO"</span><span class="sc">;</span> <span class="k">last</span> <span class="j">NAME</span><span class="sc">;</span> <span class="s">}</span>
|
|
1154 1141 <span class="k">if</span> <span class="s">(</span><span class="i">$PropertyName</span> =~ <span class="q">/^Radical$/i</span><span class="s">)</span> <span class="s">{</span> <span class="i">$PropertyLabel</span> = <span class="q">"M RAD"</span><span class="sc">;</span> <span class="k">last</span> <span class="j">NAME</span><span class="sc">;</span> <span class="s">}</span>
|
|
1155 1142 <span class="w">carp</span> <span class="q">"Warning: _GenerateCmpdGenericPropertyLines: Unknown property name, $PropertyName, specified..."</span><span class="sc">;</span>
|
|
1156 1143 <span class="k">return</span> <span class="i">@PropertyLines</span><span class="sc">;</span>
|
|
1157 1144 <span class="s">}</span>
|
|
1158 1145
|
|
1159 1146 <span class="c"># A maximum of 8 property pair values allowed per line...</span>
|
|
1160 1147 <span class="i">$PropertyCount</span> = <span class="n">0</span><span class="sc">;</span>
|
|
1161 1148 <span class="i">$Line</span> = <span class="q">''</span><span class="sc">;</span>
|
|
1162 1149 <span class="k">for</span> <span class="s">(</span><span class="i">$Index</span> = <span class="n">0</span><span class="sc">;</span> <span class="i">$Index</span> < <span class="i">$#</span>{<span class="i">$PropertyValuePairsRef</span>}<span class="sc">;</span> <span class="i">$Index</span> += <span class="n">2</span><span class="s">)</span> <span class="s">{</span>
|
|
1163 1150 <span class="k">if</span> <span class="s">(</span><span class="i">$PropertyCount</span> > <span class="n">8</span><span class="s">)</span> <span class="s">{</span>
|
|
1164 1151 <span class="c"># Setup property line...</span>
|
|
1165 1152 <span class="i">$Line</span> = <span class="q">"${PropertyLabel} 8${Line}"</span><span class="sc">;</span>
|
|
1166 1153 <span class="k">push</span> <span class="i">@PropertyLines</span><span class="cm">,</span> <span class="i">$Line</span><span class="sc">;</span>
|
|
1167 1154
|
|
1168 1155 <span class="i">$PropertyCount</span> = <span class="n">0</span><span class="sc">;</span>
|
|
1169 1156 <span class="i">$Line</span> = <span class="q">''</span><span class="sc">;</span>
|
|
1170 1157 <span class="s">}</span>
|
|
1171 1158 <span class="i">$PropertyCount</span>++<span class="sc">;</span>
|
|
1172 1159 <span class="i">$AtomNum</span> = <span class="i">$PropertyValuePairsRef</span>->[<span class="i">$Index</span>]<span class="sc">;</span>
|
|
1173 1160 <span class="i">$PropertyValue</span> = <span class="i">$PropertyValuePairsRef</span>->[<span class="i">$Index</span> + <span class="n">1</span>]<span class="sc">;</span>
|
|
1174 1161 <span class="i">$Line</span> .= <span class="k">sprintf</span> <span class="q">" %3i %3i"</span><span class="cm">,</span> <span class="i">$AtomNum</span><span class="cm">,</span> <span class="i">$PropertyValue</span><span class="sc">;</span>
|
|
1175 1162 <span class="s">}</span>
|
|
1176 1163 <span class="k">if</span> <span class="s">(</span><span class="i">$Line</span><span class="s">)</span> <span class="s">{</span>
|
|
1177 1164 <span class="i">$Line</span> = <span class="q">"${PropertyLabel} ${PropertyCount}${Line}"</span><span class="sc">;</span>
|
|
1178 1165 <span class="k">push</span> <span class="i">@PropertyLines</span><span class="cm">,</span> <span class="i">$Line</span><span class="sc">;</span>
|
|
1179 1166 <span class="s">}</span>
|
|
1180 1167 <span class="k">return</span> <span class="i">@PropertyLines</span><span class="sc">;</span>
|
|
1181 1168 <span class="s">}</span>
|
|
1182 1169
|
|
1183 1170 <span class="c">#</span>
|
|
1184 1171 <span class="c"># Read compound data into a string and return its value</span>
|
|
1185 <a name="ReadCmpdString-"></a>1172 <span class="k">sub </span><span class="m">ReadCmpdString</span> <span class="s">{</span>
|
|
1186 1173 <span class="k">my</span><span class="s">(</span><span class="i">$SDFileRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
1187 1174 <span class="k">my</span><span class="s">(</span><span class="i">$CmpdString</span><span class="s">)</span><span class="sc">;</span>
|
|
1188 1175
|
|
1189 1176 <span class="i">$CmpdString</span> = <span class="q">""</span><span class="sc">;</span>
|
|
1190 1177 <span class="j">LINE:</span> <span class="k">while</span> <span class="s">(</span><span class="k">defined</span><span class="s">(</span><span class="i">$_</span> = <span class="q"><$SDFileRef></span><span class="s">)</span><span class="s">)</span> <span class="s">{</span>
|
|
1191 1178 <span class="c"># Change Windows and Mac new line char to UNIX...</span>
|
|
1192 1179 <span class="q">s/(\r\n)|(\r)/\n/g</span><span class="sc">;</span>
|
|
1193 1180
|
|
1194 1181 <span class="k">if</span> <span class="s">(</span><span class="q">/^\$\$\$\$/</span><span class="s">)</span> <span class="s">{</span>
|
|
1195 1182 <span class="c"># Take out any new line char at the end by explicitly removing it instead of using</span>
|
|
1196 1183 <span class="c"># chomp, which might not always work correctly on files generated on a system</span>
|
|
1197 1184 <span class="c"># with a value of input line separator different from the current system...</span>
|
|
1198 1185 <span class="q">s/\n$//g</span><span class="sc">;</span>
|
|
1199 1186
|
|
1200 1187 <span class="c"># Doesn't hurt to chomp...</span>
|
|
1201 1188 <span class="k">chomp</span><span class="sc">;</span>
|
|
1202 1189
|
|
1203 1190 <span class="i">$CmpdString</span> .= <span class="i">$_</span><span class="sc">;</span>
|
|
1204 1191 <span class="k">last</span> <span class="j">LINE</span><span class="sc">;</span>
|
|
1205 1192 <span class="s">}</span>
|
|
1206 1193 <span class="k">else</span> <span class="s">{</span>
|
|
1207 1194 <span class="i">$CmpdString</span> .= <span class="i">$_</span><span class="sc">;</span>
|
|
1208 1195 <span class="s">}</span>
|
|
1209 1196 <span class="s">}</span>
|
|
1210 1197 <span class="k">return</span> <span class="i">$CmpdString</span><span class="sc">;</span>
|
|
1211 1198 <span class="s">}</span>
|
|
1212 1199
|
|
1213 1200 <span class="c"># Find out the number of fragements in the compounds. And for the compound with</span>
|
|
1214 1201 <span class="c"># more than one fragment, remove all the others besides the largest one.</span>
|
|
1215 <a name="WashCmpd-"></a>1202 <span class="k">sub </span><span class="m">WashCmpd</span> <span class="s">{</span>
|
|
1216 1203 <span class="k">my</span><span class="s">(</span><span class="i">$CmpdLines</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
1217 1204 <span class="k">my</span><span class="s">(</span><span class="i">$WashedCmpdString</span><span class="cm">,</span> <span class="i">$FragmentCount</span><span class="cm">,</span> <span class="i">$Fragments</span><span class="s">)</span><span class="sc">;</span>
|
|
1218 1205
|
|
1219 1206 <span class="i">$WashedCmpdString</span> = <span class="q">""</span><span class="sc">;</span>
|
|
1220 1207 <span class="s">(</span><span class="i">$FragmentCount</span><span class="cm">,</span> <span class="i">$Fragments</span><span class="s">)</span> = <span class="i">GetCmpdFragments</span><span class="s">(</span><span class="i">$CmpdLines</span><span class="s">)</span><span class="sc">;</span>
|
|
1221 1208 <span class="k">if</span> <span class="s">(</span><span class="i">$FragmentCount</span> > <span class="n">1</span><span class="s">)</span> <span class="s">{</span>
|
|
1222 1209 <span class="c"># Go over the compound data for the largest fragment including property</span>
|
|
1223 1210 <span class="c"># data...</span>
|
|
1224 1211 <span class="k">my</span> <span class="s">(</span><span class="i">@AllFragments</span><span class="cm">,</span> <span class="i">@LargestFragment</span><span class="cm">,</span> <span class="i">%LargestFragmentAtoms</span><span class="cm">,</span> <span class="i">@WashedCmpdLines</span><span class="cm">,</span> <span class="i">$Index</span><span class="cm">,</span> <span class="i">$LineIndex</span><span class="cm">,</span> <span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="cm">,</span> <span class="i">$NewAtomCount</span><span class="cm">,</span> <span class="i">$NewBondCount</span><span class="cm">,</span> <span class="i">$FirstAtomNum</span><span class="cm">,</span> <span class="i">$SecondAtomNum</span><span class="cm">,</span> <span class="i">$BondType</span><span class="cm">,</span> <span class="i">$BondStereo</span><span class="cm">,</span> <span class="i">$FirstNewAtomNum</span><span class="cm">,</span> <span class="i">$SecondNewAtomNum</span><span class="cm">,</span> <span class="i">$AtomNum</span><span class="cm">,</span> <span class="i">$ChiralFlag</span><span class="cm">,</span> <span class="i">$BondLine</span><span class="cm">,</span> <span class="i">$MENDLineIndex</span><span class="cm">,</span> <span class="i">$Line</span><span class="cm">,</span> <span class="i">$Value</span><span class="cm">,</span> <span class="i">@ValuePairs</span><span class="cm">,</span> <span class="i">@NewValuePairs</span><span class="cm">,</span> <span class="i">$ValuePairIndex</span><span class="cm">,</span> <span class="i">$NewAtomNum</span><span class="cm">,</span> <span class="i">@NewPropertyLines</span><span class="s">)</span><span class="sc">;</span>
|
|
1225 1212
|
|
1226 1213 <span class="i">@AllFragments</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> <span class="i">@LargestFragment</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
1227 1214 <span class="i">%LargestFragmentAtoms</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
1228 1215 <span class="i">@AllFragments</span> = <span class="k">split</span> <span class="q">"\n"</span><span class="cm">,</span> <span class="i">$Fragments</span><span class="sc">;</span>
|
|
1229 1216 <span class="i">@LargestFragment</span> = <span class="k">split</span> <span class="q">" "</span><span class="cm">,</span> <span class="i">$AllFragments</span>[<span class="n">0</span>]<span class="sc">;</span>
|
|
1230 1217 <span class="k">for</span> <span class="i">$Index</span> <span class="s">(</span><span class="n">0</span> .. <span class="i">$#LargestFragment</span><span class="s">)</span> <span class="s">{</span>
|
|
1231 1218 <span class="c"># Map old atom numbers to new atom numbers as the fragment atom numbers are sorted</span>
|
|
1232 1219 <span class="c"># from lowest to highest old atom numbers...</span>
|
|
1233 1220 <span class="i">$LargestFragmentAtoms</span>{<span class="i">$LargestFragment</span>[<span class="i">$Index</span>]} = <span class="i">$Index</span> + <span class="n">1</span><span class="sc">;</span>
|
|
1234 1221 <span class="s">}</span>
|
|
1235 1222 <span class="i">@WashedCmpdLines</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
1236 1223 <span class="k">push</span> <span class="i">@WashedCmpdLines</span><span class="cm">,</span> <span class="i">@$CmpdLines</span>[<span class="n">0</span>]<span class="cm">,</span> <span class="i">@$CmpdLines</span>[<span class="n">1</span>]<span class="cm">,</span> <span class="i">@$CmpdLines</span>[<span class="n">2</span>]<span class="cm">,</span> <span class="i">@$CmpdLines</span>[<span class="n">3</span>]<span class="sc">;</span>
|
|
1237 1224 <span class="s">(</span><span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="cm">,</span> <span class="i">$ChiralFlag</span><span class="s">)</span> = <span class="i">ParseCmpdCountsLine</span><span class="s">(</span><span class="i">@$CmpdLines</span>[<span class="n">3</span>]<span class="s">)</span><span class="sc">;</span>
|
|
1238 1225 <span class="i">$NewAtomCount</span> = <span class="i">@LargestFragment</span><span class="sc">;</span>
|
|
1239 1226 <span class="i">$NewBondCount</span> = <span class="n">0</span><span class="sc">;</span>
|
|
1240 1227 <span class="i">$AtomNum</span> = <span class="n">0</span><span class="sc">;</span>
|
|
1241 1228 <span class="c"># Retrieve the largest fragment atom lines...</span>
|
|
1242 1229 <span class="k">for</span> <span class="s">(</span><span class="i">$LineIndex</span> = <span class="n">4</span><span class="sc">;</span> <span class="i">$LineIndex</span> < <span class="s">(</span><span class="n">4</span> + <span class="i">$AtomCount</span><span class="s">)</span><span class="sc">;</span> <span class="i">$LineIndex</span>++<span class="s">)</span> <span class="s">{</span>
|
|
1243 1230 <span class="i">$AtomNum</span>++<span class="sc">;</span>
|
|
1244 1231 <span class="k">if</span> <span class="s">(</span><span class="i">$LargestFragmentAtoms</span>{<span class="i">$AtomNum</span>}<span class="s">)</span> <span class="s">{</span>
|
|
1245 1232 <span class="k">push</span> <span class="i">@WashedCmpdLines</span><span class="cm">,</span> <span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="sc">;</span>
|
|
1246 1233 <span class="s">}</span>
|
|
1247 1234 <span class="s">}</span>
|
|
1248 1235 <span class="c"># Retrieve the largest fragment bond lines...</span>
|
|
1249 1236 <span class="k">for</span> <span class="s">(</span><span class="i">$LineIndex</span> = <span class="n">4</span> + <span class="i">$AtomCount</span><span class="sc">;</span> <span class="i">$LineIndex</span> < <span class="s">(</span><span class="n">4</span> + <span class="i">$AtomCount</span> + <span class="i">$BondCount</span><span class="s">)</span><span class="sc">;</span> <span class="i">$LineIndex</span>++<span class="s">)</span> <span class="s">{</span>
|
|
1250 1237 <span class="s">(</span><span class="i">$FirstAtomNum</span><span class="cm">,</span> <span class="i">$SecondAtomNum</span><span class="cm">,</span> <span class="i">$BondType</span><span class="cm">,</span> <span class="i">$BondStereo</span><span class="s">)</span> = <span class="i">ParseCmpdBondLine</span><span class="s">(</span><span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="s">)</span><span class="sc">;</span>
|
|
1251 1238 <span class="k">if</span> <span class="s">(</span><span class="i">$LargestFragmentAtoms</span>{<span class="i">$FirstAtomNum</span>} && <span class="i">$LargestFragmentAtoms</span>{<span class="i">$SecondAtomNum</span>}<span class="s">)</span> <span class="s">{</span>
|
|
1252 1239 <span class="i">$NewBondCount</span>++<span class="sc">;</span>
|
|
1253 1240 <span class="c"># Set up bond line with new atom number mapping...</span>
|
|
1254 1241 <span class="i">$FirstNewAtomNum</span> = <span class="i">$LargestFragmentAtoms</span>{<span class="i">$FirstAtomNum</span>}<span class="sc">;</span>
|
|
1255 1242 <span class="i">$SecondNewAtomNum</span> = <span class="i">$LargestFragmentAtoms</span>{<span class="i">$SecondAtomNum</span>}<span class="sc">;</span>
|
|
1256 1243 <span class="i">$BondLine</span> = <span class="i">GenerateCmpdBondLine</span><span class="s">(</span><span class="i">$FirstNewAtomNum</span><span class="cm">,</span> <span class="i">$SecondNewAtomNum</span><span class="cm">,</span> <span class="i">$BondType</span><span class="cm">,</span> <span class="i">$BondStereo</span><span class="s">)</span><span class="sc">;</span>
|
|
1257 1244 <span class="k">push</span> <span class="i">@WashedCmpdLines</span><span class="cm">,</span> <span class="i">$BondLine</span><span class="sc">;</span>
|
|
1258 1245 <span class="s">}</span>
|
|
1259 1246 <span class="s">}</span>
|
|
1260 1247 <span class="c"># Get property lines for CHG, ISO and RAD label and map the old atom numbers to new</span>
|
|
1261 1248 <span class="c"># atom numners; Others, property lines before M END line are skipped as atom numbers for</span>
|
|
1262 1249 <span class="c"># other properties might not valid anymore...</span>
|
|
1263 1250 <span class="c">#</span>
|
|
1264 1251 <span class="i">$MENDLineIndex</span> = <span class="i">$LineIndex</span><span class="sc">;</span>
|
|
1265 1252 <span class="j">LINE:</span> <span class="k">for</span> <span class="s">(</span><span class="i">$LineIndex</span> = <span class="s">(</span><span class="n">4</span> + <span class="i">$AtomCount</span> + <span class="i">$BondCount</span><span class="s">)</span><span class="sc">;</span> <span class="i">$LineIndex</span> < <span class="i">@$CmpdLines</span><span class="sc">;</span> <span class="i">$LineIndex</span>++<span class="s">)</span> <span class="s">{</span>
|
|
1266 1253 <span class="i">$Line</span> = <span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="sc">;</span>
|
|
1267 1254 <span class="k">if</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^M END/i</span><span class="s">)</span> <span class="s">{</span>
|
|
1268 1255 <span class="k">push</span> <span class="i">@WashedCmpdLines</span><span class="cm">,</span> <span class="q">"M END"</span><span class="sc">;</span>
|
|
1269 1256 <span class="i">$MENDLineIndex</span> = <span class="i">$LineIndex</span><span class="sc">;</span>
|
|
1270 1257 <span class="k">last</span> <span class="j">LINE</span><span class="sc">;</span>
|
|
1271 1258 <span class="s">}</span>
|
|
1272 1259
|
|
1273 1260 <span class="i">@ValuePairs</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
1274 1261 <span class="k">if</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^M CHG/i</span><span class="s">)</span> <span class="s">{</span>
|
|
1275 1262 <span class="i">@ValuePairs</span> = <span class="i">ParseCmpdChargePropertyLine</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span><span class="sc">;</span>
|
|
1276 1263 <span class="s">}</span>
|
|
1277 1264 <span class="k">elsif</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^M RAD/i</span><span class="s">)</span> <span class="s">{</span>
|
|
1278 1265 <span class="i">@ValuePairs</span> = <span class="i">ParseCmpdRadicalPropertyLine</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span><span class="sc">;</span>
|
|
1279 1266 <span class="s">}</span>
|
|
1280 1267 <span class="k">elsif</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^M ISO/i</span><span class="s">)</span> <span class="s">{</span>
|
|
1281 1268 <span class="i">@ValuePairs</span> = <span class="i">ParseCmpdIsotopePropertyLine</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span><span class="sc">;</span>
|
|
1282 1269 <span class="s">}</span>
|
|
1283 1270 <span class="k">elsif</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^A /i</span><span class="s">)</span> <span class="s">{</span>
|
|
1284 1271 <span class="k">my</span><span class="s">(</span><span class="i">$NextLine</span><span class="s">)</span><span class="sc">;</span>
|
|
1285 1272 <span class="i">$LineIndex</span>++<span class="sc">;</span>
|
|
1286 1273 <span class="i">$NextLine</span> = <span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="sc">;</span>
|
|
1287 1274 <span class="i">@ValuePairs</span> = <span class="i">ParseCmpdAtomAliasPropertyLine</span><span class="s">(</span><span class="i">$Line</span><span class="cm">,</span> <span class="i">$NextLine</span><span class="s">)</span><span class="sc">;</span>
|
|
1288 1275 <span class="s">}</span>
|
|
1289 1276 <span class="k">else</span> <span class="s">{</span>
|
|
1290 1277 <span class="k">next</span> <span class="j">LINE</span><span class="sc">;</span>
|
|
1291 1278 <span class="s">}</span>
|
|
1292 1279
|
|
1293 1280 <span class="k">if</span> <span class="s">(</span>!<span class="i">@ValuePairs</span><span class="s">)</span> <span class="s">{</span>
|
|
1294 1281 <span class="k">next</span> <span class="j">LINE</span><span class="sc">;</span>
|
|
1295 1282 <span class="s">}</span>
|
|
1296 1283
|
|
1297 1284 <span class="c"># Collect values for valid atom numbers with mapping to new atom numbers...</span>
|
|
1298 1285 <span class="i">@NewValuePairs</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
1299 1286 <span class="j">VALUEINDEX:</span> <span class="k">for</span> <span class="s">(</span><span class="i">$ValuePairIndex</span> = <span class="n">0</span><span class="sc">;</span> <span class="i">$ValuePairIndex</span> < <span class="i">$#ValuePairs</span><span class="sc">;</span> <span class="i">$ValuePairIndex</span> += <span class="n">2</span><span class="s">)</span> <span class="s">{</span>
|
|
1300 1287 <span class="i">$AtomNum</span> = <span class="i">$ValuePairs</span>[<span class="i">$ValuePairIndex</span>]<span class="sc">;</span> <span class="i">$Value</span> = <span class="i">$ValuePairs</span>[<span class="i">$ValuePairIndex</span> + <span class="n">1</span>]<span class="sc">;</span>
|
|
1301 1288 <span class="k">if</span> <span class="s">(</span>!<span class="k">exists</span> <span class="i">$LargestFragmentAtoms</span>{<span class="i">$AtomNum</span>}<span class="s">)</span> <span class="s">{</span>
|
|
1302 1289 <span class="k">next</span> <span class="j">VALUEINDEX</span><span class="sc">;</span>
|
|
1303 1290 <span class="s">}</span>
|
|
1304 1291 <span class="i">$NewAtomNum</span> = <span class="i">$LargestFragmentAtoms</span>{<span class="i">$AtomNum</span>}<span class="sc">;</span>
|
|
1305 1292 <span class="k">push</span> <span class="i">@NewValuePairs</span><span class="cm">,</span> <span class="s">(</span><span class="i">$NewAtomNum</span><span class="cm">,</span> <span class="i">$Value</span><span class="s">)</span>
|
|
1306 1293 <span class="s">}</span>
|
|
1307 1294 <span class="k">if</span> <span class="s">(</span>!<span class="i">@NewValuePairs</span><span class="s">)</span> <span class="s">{</span>
|
|
1308 1295 <span class="k">next</span> <span class="j">LINE</span><span class="sc">;</span>
|
|
1309 1296 <span class="s">}</span>
|
|
1310 1297 <span class="i">@NewPropertyLines</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
1311 1298 <span class="k">if</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^M CHG/i</span><span class="s">)</span> <span class="s">{</span>
|
|
1312 1299 <span class="i">@NewPropertyLines</span> = <span class="i">GenerateCmpdChargePropertyLines</span><span class="s">(</span>\<span class="i">@NewValuePairs</span><span class="s">)</span><span class="sc">;</span>
|
|
1313 1300 <span class="s">}</span>
|
|
1314 1301 <span class="k">elsif</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^M RAD/i</span><span class="s">)</span> <span class="s">{</span>
|
|
1315 1302 <span class="i">@NewPropertyLines</span> = <span class="i">GenerateCmpdRadicalPropertyLines</span><span class="s">(</span>\<span class="i">@NewValuePairs</span><span class="s">)</span><span class="sc">;</span>
|
|
1316 1303 <span class="s">}</span>
|
|
1317 1304 <span class="k">elsif</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^M ISO/i</span><span class="s">)</span> <span class="s">{</span>
|
|
1318 1305 <span class="i">@NewPropertyLines</span> = <span class="i">GenerateCmpdIsotopePropertyLines</span><span class="s">(</span>\<span class="i">@NewValuePairs</span><span class="s">)</span><span class="sc">;</span>
|
|
1319 1306 <span class="s">}</span>
|
|
1320 1307 <span class="k">elsif</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^A /i</span><span class="s">)</span> <span class="s">{</span>
|
|
1321 1308 <span class="i">@NewPropertyLines</span> = <span class="i">GenerateCmpdAtomAliasPropertyLines</span><span class="s">(</span>\<span class="i">@NewValuePairs</span><span class="s">)</span><span class="sc">;</span>
|
|
1322 1309 <span class="s">}</span>
|
|
1323 1310 <span class="k">push</span> <span class="i">@WashedCmpdLines</span><span class="cm">,</span> <span class="i">@NewPropertyLines</span><span class="sc">;</span>
|
|
1324 1311 <span class="s">}</span>
|
|
1325 1312
|
|
1326 1313 <span class="c"># Retrieve rest of the data label and value property data...</span>
|
|
1327 1314 <span class="k">for</span> <span class="s">(</span><span class="i">$LineIndex</span> = <span class="s">(</span><span class="n">1</span> + <span class="i">$MENDLineIndex</span><span class="s">)</span><span class="sc">;</span> <span class="i">$LineIndex</span> < <span class="i">@$CmpdLines</span><span class="sc">;</span> <span class="i">$LineIndex</span>++<span class="s">)</span> <span class="s">{</span>
|
|
1328 1315 <span class="k">push</span> <span class="i">@WashedCmpdLines</span><span class="cm">,</span> <span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="sc">;</span>
|
|
1329 1316 <span class="s">}</span>
|
|
1330 1317 <span class="c"># Update atom and bond count line...</span>
|
|
1331 1318 <span class="i">$WashedCmpdLines</span>[<span class="n">3</span>] = <span class="i">GenerateCmpdCountsLine</span><span class="s">(</span><span class="i">$NewAtomCount</span><span class="cm">,</span> <span class="i">$NewBondCount</span><span class="cm">,</span> <span class="i">$ChiralFlag</span><span class="s">)</span><span class="sc">;</span>
|
|
1332 1319
|
|
1333 1320 <span class="i">$WashedCmpdString</span> = <span class="k">join</span> <span class="q">"\n"</span><span class="cm">,</span> <span class="i">@WashedCmpdLines</span><span class="sc">;</span>
|
|
1334 1321 <span class="s">}</span>
|
|
1335 1322 <span class="k">return</span> <span class="s">(</span><span class="i">$FragmentCount</span><span class="cm">,</span> <span class="i">$Fragments</span><span class="cm">,</span> <span class="i">$WashedCmpdString</span><span class="s">)</span><span class="sc">;</span>
|
|
1336 1323 <span class="s">}</span>
|
|
1337 1324
|
|
1338 <a name="EOF-"></a></pre>
|
|
1339 <p> </p>
|
|
1340 <br />
|
|
1341 <center>
|
|
1342 <img src="../../../images/h2o2.png">
|
|
1343 </center>
|
|
1344 </body>
|
|
1345 </html>
|