Mercurial > repos > deepakjadmin > mayatool3_test3
view mayachemtools/docs/modules/html/code/SDFileUtil.html @ 0:73ae111cf86f draft
Uploaded
author | deepakjadmin |
---|---|
date | Wed, 20 Jan 2016 11:55:01 -0500 |
parents | |
children |
line wrap: on
line source
<html> <head> <title>MayaChemTools:Code:SDFileUtil.pm</title> <meta http-equiv="content-type" content="text/html;charset=utf-8"> <link rel="stylesheet" type="text/css" href="../../../css/MayaChemToolsCode.css"> </head> <body leftmargin="20" rightmargin="20" topmargin="10" bottommargin="10"> <br/> <center> <a href="http://www.mayachemtools.org" title="MayaChemTools Home"><img src="../../../images/MayaChemToolsLogo.gif" border="0" alt="MayaChemTools"></a> </center> <br/> <pre> <a name="package-SDFileUtil-"></a> 1 <span class="k">package </span><span class="i">SDFileUtil</span><span class="sc">;</span> 2 <span class="c">#</span> 3 <span class="c"># $RCSfile: SDFileUtil.pm,v $</span> 4 <span class="c"># $Date: 2015/02/28 20:47:18 $</span> 5 <span class="c"># $Revision: 1.49 $</span> 6 <span class="c">#</span> 7 <span class="c"># Author: Manish Sud <msud@san.rr.com></span> 8 <span class="c">#</span> 9 <span class="c"># Copyright (C) 2015 Manish Sud. All rights reserved.</span> 10 <span class="c">#</span> 11 <span class="c"># This file is part of MayaChemTools.</span> 12 <span class="c">#</span> 13 <span class="c"># MayaChemTools is free software; you can redistribute it and/or modify it under</span> 14 <span class="c"># the terms of the GNU Lesser General Public License as published by the Free</span> 15 <span class="c"># Software Foundation; either version 3 of the License, or (at your option) any</span> 16 <span class="c"># later version.</span> 17 <span class="c">#</span> 18 <span class="c"># MayaChemTools is distributed in the hope that it will be useful, but without</span> 19 <span class="c"># any warranty; without even the implied warranty of merchantability of fitness</span> 20 <span class="c"># for a particular purpose. See the GNU Lesser General Public License for more</span> 21 <span class="c"># details.</span> 22 <span class="c">#</span> 23 <span class="c"># You should have received a copy of the GNU Lesser General Public License</span> 24 <span class="c"># along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or</span> 25 <span class="c"># write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,</span> 26 <span class="c"># Boston, MA, 02111-1307, USA.</span> 27 <span class="c">#</span> 28 29 <span class="k">use</span> <span class="w">strict</span><span class="sc">;</span> 30 <span class="k">use</span> <span class="w">Exporter</span><span class="sc">;</span> 31 <span class="k">use</span> <span class="w">Carp</span><span class="sc">;</span> 32 <span class="k">use</span> <span class="w">PeriodicTable</span> <span class="q">qw(IsElement)</span><span class="sc">;</span> 33 <span class="k">use</span> <span class="w">TimeUtil</span> <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 34 35 <span class="k">use</span> <span class="w">vars</span> <span class="q">qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS)</span><span class="sc">;</span> 36 37 <span class="i">@ISA</span> = <span class="q">qw(Exporter)</span><span class="sc">;</span> 38 <span class="i">@EXPORT</span> = <span class="q">qw(GenerateCmpdAtomLine GenerateCmpdBondLine GenerateCmpdChargePropertyLines GenerateCmpdCommentsLine GenerateCmpdCountsLine GenerateCmpdAtomAliasPropertyLines GenerateCmpdIsotopePropertyLines GenerateCmpdDataHeaderLabelsAndValuesLines GenerateCmpdMiscInfoLine GenerateCmpdRadicalPropertyLines GenerateCmpdMolNameLine GenerateEmptyCtabBlockLines GenerateMiscLineDateStamp GetAllAndCommonCmpdDataHeaderLabels GetCmpdDataHeaderLabels GetCmpdDataHeaderLabelsAndValues GetCmpdFragments GetCtabLinesCount GetUnknownAtoms GetInvalidAtomNumbers MDLChargeToInternalCharge InternalChargeToMDLCharge MDLBondTypeToInternalBondOrder InternalBondOrderToMDLBondType MDLBondStereoToInternalBondStereochemistry InternalBondStereochemistryToMDLBondStereo InternalSpinMultiplicityToMDLRadical MDLRadicalToInternalSpinMultiplicity IsCmpd3D IsCmpd2D ParseCmpdAtomLine ParseCmpdBondLine ParseCmpdCommentsLine ParseCmpdCountsLine ParseCmpdMiscInfoLine ParseCmpdMolNameLine ParseCmpdAtomAliasPropertyLine ParseCmpdChargePropertyLine ParseCmpdIsotopePropertyLine ParseCmpdRadicalPropertyLine ReadCmpdString RemoveCmpdDataHeaderLabelAndValue WashCmpd)</span><span class="sc">;</span> 39 <span class="i">@EXPORT_OK</span> = <span class="q">qw()</span><span class="sc">;</span> 40 <span class="i">%EXPORT_TAGS</span> = <span class="s">(</span><span class="w">all</span> <span class="cm">=></span> <span class="s">[</span><span class="i">@EXPORT</span><span class="cm">,</span> <span class="i">@EXPORT_OK</span><span class="s">]</span><span class="s">)</span><span class="sc">;</span> 41 42 <span class="c"># Format data for compounds count line...</span> <a name="GenerateCmpdCountsLine-"></a> 43 <span class="k">sub </span><span class="m">GenerateCmpdCountsLine</span> <span class="s">{</span> 44 <span class="k">my</span><span class="s">(</span><span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="cm">,</span> <span class="i">$ChiralFlag</span><span class="cm">,</span> <span class="i">$PropertyCount</span><span class="cm">,</span> <span class="i">$Version</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> 45 46 <span class="k">if</span> <span class="s">(</span><span class="i">@_</span> == <span class="n">5</span><span class="s">)</span> <span class="s">{</span> 47 <span class="s">(</span><span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="cm">,</span> <span class="i">$ChiralFlag</span><span class="cm">,</span> <span class="i">$PropertyCount</span><span class="cm">,</span> <span class="i">$Version</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 48 <span class="s">}</span> 49 <span class="k">elsif</span> <span class="s">(</span><span class="i">@_</span> == <span class="n">3</span><span class="s">)</span> <span class="s">{</span> 50 <span class="s">(</span><span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="cm">,</span> <span class="i">$ChiralFlag</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 51 <span class="i">$PropertyCount</span> = <span class="n">999</span><span class="sc">;</span> 52 <span class="i">$Version</span> = <span class="q">"V2000"</span><span class="sc">;</span> 53 <span class="s">}</span> 54 <span class="k">else</span> <span class="s">{</span> 55 <span class="s">(</span><span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 56 <span class="i">$ChiralFlag</span> = <span class="n">0</span><span class="sc">;</span> 57 <span class="i">$PropertyCount</span> = <span class="n">999</span><span class="sc">;</span> 58 <span class="i">$Version</span> = <span class="q">"V2000"</span><span class="sc">;</span> 59 <span class="s">}</span> 60 <span class="k">if</span> <span class="s">(</span><span class="i">$AtomCount</span> > <span class="n">999</span><span class="s">)</span> <span class="s">{</span> 61 <span class="w">croak</span> <span class="q">"Error: SDFileUtil::GenerateCmpdCountsLine: The atom count, $AtomCount, exceeds maximum of 999 allowed for CTAB version 2000. The Extended Connection Table (V3000) format in MDL MOL and SD files is not supported by the current release of MayaChemTools..."</span><span class="sc">;</span> 62 <span class="s">}</span> 63 <span class="i">$Line</span> = <span class="k">sprintf</span> <span class="q">"%3i%3i%3i%3i%3i%3i%3i%3i%3i%3i%3i%6s"</span><span class="cm">,</span> <span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="cm">,</span> <span class="n">0</span><span class="cm">,</span> <span class="n">0</span><span class="cm">,</span> <span class="i">$ChiralFlag</span><span class="cm">,</span> <span class="n">0</span><span class="cm">,</span> <span class="n">0</span><span class="cm">,</span> <span class="n">0</span><span class="cm">,</span> <span class="n">0</span><span class="cm">,</span> <span class="n">0</span><span class="cm">,</span> <span class="i">$PropertyCount</span><span class="cm">,</span> <span class="i">$Version</span><span class="sc">;</span> 64 65 <span class="k">return</span> <span class="s">(</span><span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> 66 <span class="s">}</span> 67 68 <span class="c"># Generate comments line...</span> <a name="GenerateCmpdCommentsLine-"></a> 69 <span class="k">sub </span><span class="m">GenerateCmpdCommentsLine</span> <span class="s">{</span> 70 <span class="k">my</span><span class="s">(</span><span class="i">$Comments</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 71 <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> 72 73 <span class="i">$Line</span> = <span class="s">(</span><span class="k">length</span><span class="s">(</span><span class="i">$Comments</span><span class="s">)</span> > <span class="n">80</span><span class="s">)</span> ? <span class="k">substr</span><span class="s">(</span><span class="i">$Comments</span><span class="cm">,</span> <span class="n">0</span><span class="cm">,</span> <span class="n">80</span><span class="s">)</span> <span class="co">:</span> <span class="i">$Comments</span><span class="sc">;</span> 74 75 <span class="k">return</span> <span class="i">$Line</span><span class="sc">;</span> 76 <span class="s">}</span> 77 78 <span class="c"># Generate molname line...</span> <a name="GenerateCmpdMolNameLine-"></a> 79 <span class="k">sub </span><span class="m">GenerateCmpdMolNameLine</span> <span class="s">{</span> 80 <span class="k">my</span><span class="s">(</span><span class="i">$MolName</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 81 <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> 82 83 <span class="i">$Line</span> = <span class="s">(</span><span class="k">length</span><span class="s">(</span><span class="i">$MolName</span><span class="s">)</span> > <span class="n">80</span><span class="s">)</span> ? <span class="k">substr</span><span class="s">(</span><span class="i">$MolName</span><span class="cm">,</span> <span class="n">0</span><span class="cm">,</span> <span class="n">80</span><span class="s">)</span> <span class="co">:</span> <span class="i">$MolName</span><span class="sc">;</span> 84 85 <span class="k">return</span> <span class="i">$Line</span><span class="sc">;</span> 86 <span class="s">}</span> 87 88 <span class="c"># Generate data for compounds misc info line...</span> <a name="GenerateCmpdMiscInfoLine-"></a> 89 <span class="k">sub </span><span class="m">GenerateCmpdMiscInfoLine</span> <span class="s">{</span> 90 <span class="k">my</span><span class="s">(</span><span class="i">$ProgramName</span><span class="cm">,</span> <span class="i">$UserInitial</span><span class="cm">,</span> <span class="i">$Code</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 91 <span class="k">my</span><span class="s">(</span><span class="i">$Date</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> 92 93 <span class="k">if</span> <span class="s">(</span>!<span class="s">(</span><span class="k">defined</span><span class="s">(</span><span class="i">$ProgramName</span><span class="s">)</span> && <span class="i">$ProgramName</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> 94 <span class="i">$ProgramName</span> = <span class="q">"MayaChem"</span><span class="sc">;</span> 95 <span class="s">}</span> 96 <span class="k">if</span> <span class="s">(</span>!<span class="s">(</span><span class="k">defined</span><span class="s">(</span><span class="i">$UserInitial</span><span class="s">)</span> && <span class="i">$UserInitial</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> 97 <span class="i">$UserInitial</span> = <span class="q">" "</span><span class="sc">;</span> 98 <span class="s">}</span> 99 <span class="k">if</span> <span class="s">(</span>!<span class="s">(</span><span class="k">defined</span><span class="s">(</span><span class="i">$Code</span><span class="s">)</span> && <span class="i">$Code</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> 100 <span class="i">$Code</span> = <span class="q">"2D"</span><span class="sc">;</span> 101 <span class="s">}</span> 102 103 <span class="k">if</span> <span class="s">(</span><span class="k">length</span><span class="s">(</span><span class="i">$ProgramName</span><span class="s">)</span> > <span class="n">8</span><span class="s">)</span> <span class="s">{</span> 104 <span class="i">$ProgramName</span> = <span class="k">substr</span><span class="s">(</span><span class="i">$ProgramName</span><span class="cm">,</span> <span class="n">0</span><span class="cm">,</span> <span class="n">8</span><span class="s">)</span><span class="sc">;</span> 105 <span class="s">}</span> 106 <span class="k">if</span> <span class="s">(</span><span class="k">length</span><span class="s">(</span><span class="i">$UserInitial</span><span class="s">)</span> > <span class="n">2</span><span class="s">)</span> <span class="s">{</span> 107 <span class="i">$UserInitial</span> = <span class="k">substr</span><span class="s">(</span><span class="i">$UserInitial</span><span class="cm">,</span> <span class="n">0</span><span class="cm">,</span> <span class="n">2</span><span class="s">)</span><span class="sc">;</span> 108 <span class="s">}</span> 109 <span class="k">if</span> <span class="s">(</span><span class="k">length</span><span class="s">(</span><span class="i">$Code</span><span class="s">)</span> > <span class="n">2</span><span class="s">)</span> <span class="s">{</span> 110 <span class="i">$Code</span> = <span class="k">substr</span><span class="s">(</span><span class="i">$Code</span><span class="cm">,</span> <span class="n">0</span><span class="cm">,</span> <span class="n">2</span><span class="s">)</span><span class="sc">;</span> 111 <span class="s">}</span> 112 <span class="i">$Date</span> = <span class="i">GenerateMiscLineDateStamp</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> 113 114 <span class="i">$Line</span> = <span class="q">"${UserInitial}${ProgramName}${Date}${Code}"</span><span class="sc">;</span> 115 116 <span class="k">return</span> <span class="i">$Line</span><span class="sc">;</span> 117 <span class="s">}</span> 118 119 <span class="c"># Generate data for compounds misc info line...</span> <a name="GenerateEmptyCtabBlockLines-"></a> 120 <span class="k">sub </span><span class="m">GenerateEmptyCtabBlockLines</span> <span class="s">{</span> 121 <span class="k">my</span><span class="s">(</span><span class="i">$Date</span><span class="cm">,</span> <span class="i">$Lines</span><span class="s">)</span><span class="sc">;</span> 122 123 <span class="k">if</span> <span class="s">(</span><span class="i">@_</span> == <span class="n">1</span><span class="s">)</span> <span class="s">{</span> 124 <span class="s">(</span><span class="i">$Date</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 125 <span class="s">}</span> 126 <span class="k">else</span> <span class="s">{</span> 127 <span class="i">$Date</span> = <span class="i">GenerateMiscLineDateStamp</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> 128 <span class="s">}</span> 129 <span class="c"># First line: Blank molname line...</span> 130 <span class="c"># Second line: Misc info...</span> 131 <span class="c"># Third line: Blank comments line...</span> 132 <span class="c"># Fourth line: Counts line reflecting empty structure data block...</span> 133 <span class="i">$Lines</span> = <span class="q">"\n"</span><span class="sc">;</span> 134 <span class="i">$Lines</span> .= <span class="q">" MayaChem${Date}2D\n"</span><span class="sc">;</span> 135 <span class="i">$Lines</span> .= <span class="q">"\n"</span><span class="sc">;</span> 136 <span class="i">$Lines</span> .= <span class="i">GenerateCmpdCountsLine</span><span class="s">(</span><span class="n">0</span><span class="cm">,</span> <span class="n">0</span><span class="cm">,</span> <span class="n">0</span><span class="s">)</span> . <span class="q">"\n"</span><span class="sc">;</span> 137 <span class="i">$Lines</span> .= <span class="q">"M END"</span><span class="sc">;</span> 138 139 <span class="k">return</span> <span class="i">$Lines</span><span class="sc">;</span> 140 <span class="s">}</span> 141 142 <span class="c"># Generate SD file data stamp...</span> <a name="GenerateMiscLineDateStamp-"></a> 143 <span class="k">sub </span><span class="m">GenerateMiscLineDateStamp</span> <span class="s">{</span> 144 <span class="k">return</span> <span class="i">TimeUtil::SDFileTimeStamp</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> 145 <span class="s">}</span> 146 147 <span class="c"># Generate data for compound atom line...</span> 148 <span class="c">#</span> <a name="GenerateCmpdAtomLine-"></a> 149 <span class="k">sub </span><span class="m">GenerateCmpdAtomLine</span> <span class="s">{</span> 150 <span class="k">my</span><span class="s">(</span><span class="i">$AtomSymbol</span><span class="cm">,</span> <span class="i">$AtomX</span><span class="cm">,</span> <span class="i">$AtomY</span><span class="cm">,</span> <span class="i">$AtomZ</span><span class="cm">,</span> <span class="i">$MassDifference</span><span class="cm">,</span> <span class="i">$Charge</span><span class="cm">,</span> <span class="i">$StereoParity</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 151 <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> 152 153 <span class="k">if</span> <span class="s">(</span>!<span class="k">defined</span> <span class="i">$MassDifference</span><span class="s">)</span> <span class="s">{</span> 154 <span class="i">$MassDifference</span> = <span class="n">0</span><span class="sc">;</span> 155 <span class="s">}</span> 156 <span class="k">if</span> <span class="s">(</span>!<span class="k">defined</span> <span class="i">$Charge</span><span class="s">)</span> <span class="s">{</span> 157 <span class="i">$Charge</span> = <span class="n">0</span><span class="sc">;</span> 158 <span class="s">}</span> 159 <span class="k">if</span> <span class="s">(</span>!<span class="k">defined</span> <span class="i">$StereoParity</span><span class="s">)</span> <span class="s">{</span> 160 <span class="i">$StereoParity</span> = <span class="n">0</span><span class="sc">;</span> 161 <span class="s">}</span> 162 <span class="i">$Line</span> = <span class="k">sprintf</span> <span class="q">"%10.4f%10.4f%10.4f %-3s%2i%3i%3i 0 0 0 0 0 0 0 0 0"</span><span class="cm">,</span> <span class="i">$AtomX</span><span class="cm">,</span> <span class="i">$AtomY</span><span class="cm">,</span> <span class="i">$AtomZ</span><span class="cm">,</span> <span class="i">$AtomSymbol</span><span class="cm">,</span> <span class="i">$MassDifference</span><span class="cm">,</span> <span class="i">$Charge</span><span class="cm">,</span> <span class="i">$StereoParity</span><span class="sc">;</span> 163 164 <span class="k">return</span> <span class="i">$Line</span> 165 <span class="s">}</span> 166 167 <span class="c"># Generate data for compound bond line...</span> 168 <span class="c">#</span> <a name="GenerateCmpdBondLine-"></a> 169 <span class="k">sub </span><span class="m">GenerateCmpdBondLine</span> <span class="s">{</span> 170 <span class="k">my</span><span class="s">(</span><span class="i">$FirstAtomNum</span><span class="cm">,</span> <span class="i">$SecondAtomNum</span><span class="cm">,</span> <span class="i">$BondType</span><span class="cm">,</span> <span class="i">$BondStereo</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 171 <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> 172 173 <span class="k">if</span> <span class="s">(</span>!<span class="k">defined</span> <span class="i">$BondStereo</span><span class="s">)</span> <span class="s">{</span> 174 <span class="i">$BondStereo</span> = <span class="n">0</span><span class="sc">;</span> 175 <span class="s">}</span> 176 <span class="i">$Line</span> = <span class="k">sprintf</span> <span class="q">"%3i%3i%3i%3i 0 0 0"</span><span class="cm">,</span> <span class="i">$FirstAtomNum</span><span class="cm">,</span> <span class="i">$SecondAtomNum</span><span class="cm">,</span> <span class="i">$BondType</span><span class="cm">,</span> <span class="i">$BondStereo</span><span class="sc">;</span> 177 178 <span class="k">return</span> <span class="i">$Line</span> 179 <span class="s">}</span> 180 181 <span class="c"># Generate charge property lines for CTAB block...</span> 182 <span class="c">#</span> <a name="GenerateCmpdChargePropertyLines-"></a> 183 <span class="k">sub </span><span class="m">GenerateCmpdChargePropertyLines</span> <span class="s">{</span> 184 <span class="k">my</span><span class="s">(</span><span class="i">$ChargeValuePairsRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 185 186 <span class="k">return</span> <span class="i">_GenerateCmpdGenericPropertyLines</span><span class="s">(</span><span class="q">'Charge'</span><span class="cm">,</span> <span class="i">$ChargeValuePairsRef</span><span class="s">)</span><span class="sc">;</span> 187 <span class="s">}</span> 188 189 <span class="c"># Generate isotope property lines for CTAB block...</span> 190 <span class="c">#</span> <a name="GenerateCmpdIsotopePropertyLines-"></a> 191 <span class="k">sub </span><span class="m">GenerateCmpdIsotopePropertyLines</span> <span class="s">{</span> 192 <span class="k">my</span><span class="s">(</span><span class="i">$IsotopeValuePairsRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 193 194 <span class="k">return</span> <span class="i">_GenerateCmpdGenericPropertyLines</span><span class="s">(</span><span class="q">'Isotope'</span><span class="cm">,</span> <span class="i">$IsotopeValuePairsRef</span><span class="s">)</span><span class="sc">;</span> 195 <span class="s">}</span> 196 197 <span class="c"># Generate radical property line property lines for CTAB block...</span> 198 <span class="c">#</span> <a name="GenerateCmpdRadicalPropertyLines-"></a> 199 <span class="k">sub </span><span class="m">GenerateCmpdRadicalPropertyLines</span> <span class="s">{</span> 200 <span class="k">my</span><span class="s">(</span><span class="i">$RadicalValuePairsRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 201 202 <span class="k">return</span> <span class="i">_GenerateCmpdGenericPropertyLines</span><span class="s">(</span><span class="q">'Radical'</span><span class="cm">,</span> <span class="i">$RadicalValuePairsRef</span><span class="s">)</span><span class="sc">;</span> 203 <span class="s">}</span> 204 205 <span class="c"># Generate atom alias property line property lines for CTAB block...</span> 206 <span class="c">#</span> 207 <span class="c"># Atom alias property line format:</span> 208 <span class="c">#</span> 209 <span class="c"># A aaa</span> 210 <span class="c"># x...</span> 211 <span class="c">#</span> 212 <span class="c"># aaa: Atom number</span> 213 <span class="c"># x: Atom alias in next line</span> 214 <span class="c">#</span> <a name="GenerateCmpdAtomAliasPropertyLines-"></a> 215 <span class="k">sub </span><span class="m">GenerateCmpdAtomAliasPropertyLines</span> <span class="s">{</span> 216 <span class="k">my</span><span class="s">(</span><span class="i">$PropertyValuePairsRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 217 <span class="k">my</span><span class="s">(</span><span class="i">$Index</span><span class="cm">,</span> <span class="i">$AtomNum</span><span class="cm">,</span> <span class="i">$AtomAlias</span><span class="cm">,</span> <span class="i">$Line</span><span class="cm">,</span> <span class="i">@PropertyLines</span><span class="s">)</span><span class="sc">;</span> 218 219 <span class="i">@PropertyLines</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 220 221 <span class="k">for</span> <span class="s">(</span><span class="i">$Index</span> = <span class="n">0</span><span class="sc">;</span> <span class="i">$Index</span> < <span class="i">$#</span>{<span class="i">$PropertyValuePairsRef</span>}<span class="sc">;</span> <span class="i">$Index</span> += <span class="n">2</span><span class="s">)</span> <span class="s">{</span> 222 <span class="i">$AtomNum</span> = <span class="i">$PropertyValuePairsRef</span>->[<span class="i">$Index</span>]<span class="sc">;</span> 223 <span class="i">$AtomAlias</span> = <span class="i">$PropertyValuePairsRef</span>->[<span class="i">$Index</span> + <span class="n">1</span>]<span class="sc">;</span> 224 225 <span class="i">$Line</span> = <span class="q">"A "</span> . <span class="k">sprintf</span> <span class="q">"%3i"</span><span class="cm">,</span> <span class="i">$AtomNum</span><span class="sc">;</span> 226 227 <span class="k">push</span> <span class="i">@PropertyLines</span><span class="cm">,</span> <span class="i">$Line</span><span class="sc">;</span> 228 <span class="k">push</span> <span class="i">@PropertyLines</span><span class="cm">,</span> <span class="i">$AtomAlias</span><span class="sc">;</span> 229 <span class="s">}</span> 230 231 <span class="k">return</span> <span class="i">@PropertyLines</span><span class="sc">;</span> 232 <span class="s">}</span> 233 234 <span class="c"># Generate data header labels and values lines...</span> 235 <span class="c">#</span> <a name="GenerateCmpdDataHeaderLabelsAndValuesLines-"></a> 236 <span class="k">sub </span><span class="m">GenerateCmpdDataHeaderLabelsAndValuesLines</span> <span class="s">{</span> 237 <span class="k">my</span><span class="s">(</span><span class="i">$DataHeaderLabelsRef</span><span class="cm">,</span> <span class="i">$DataHeaderLabelsAndValuesRef</span><span class="cm">,</span> <span class="i">$SortDataLabels</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 238 <span class="k">my</span><span class="s">(</span><span class="i">$DataLabel</span><span class="cm">,</span> <span class="i">$DataValue</span><span class="cm">,</span> <span class="i">@DataLabels</span><span class="cm">,</span> <span class="i">@DataLines</span><span class="s">)</span><span class="sc">;</span> 239 240 <span class="k">if</span> <span class="s">(</span>!<span class="k">defined</span> <span class="i">$SortDataLabels</span><span class="s">)</span> <span class="s">{</span> 241 <span class="i">$SortDataLabels</span> = <span class="n">0</span><span class="sc">;</span> 242 <span class="s">}</span> 243 244 <span class="i">@DataLines</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 245 <span class="i">@DataLabels</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 246 <span class="k">if</span> <span class="s">(</span><span class="i">$SortDataLabels</span><span class="s">)</span> <span class="s">{</span> 247 <span class="k">push</span> <span class="i">@DataLabels</span><span class="cm">,</span> <span class="k">sort</span> <span class="i">@</span>{<span class="i">$DataHeaderLabelsRef</span>}<span class="sc">;</span> 248 <span class="s">}</span> 249 <span class="k">else</span> <span class="s">{</span> 250 <span class="k">push</span> <span class="i">@DataLabels</span><span class="cm">,</span> <span class="i">@</span>{<span class="i">$DataHeaderLabelsRef</span>}<span class="sc">;</span> 251 <span class="s">}</span> 252 <span class="k">for</span> <span class="i">$DataLabel</span> <span class="s">(</span><span class="i">@DataLabels</span><span class="s">)</span> <span class="s">{</span> 253 <span class="i">$DataValue</span> = <span class="q">''</span><span class="sc">;</span> 254 <span class="k">if</span> <span class="s">(</span><span class="k">exists</span> <span class="i">$DataHeaderLabelsAndValuesRef</span>->{<span class="i">$DataLabel</span>}<span class="s">)</span> <span class="s">{</span> 255 <span class="i">$DataValue</span> = <span class="i">$DataHeaderLabelsAndValuesRef</span>->{<span class="i">$DataLabel</span>}<span class="sc">;</span> 256 <span class="s">}</span> 257 <span class="k">push</span> <span class="i">@DataLines</span><span class="cm">,</span> <span class="s">(</span><span class="q">"> <${DataLabel}>"</span><span class="cm">,</span> <span class="q">"$DataValue"</span><span class="cm">,</span> <span class="q">""</span><span class="s">)</span><span class="sc">;</span> 258 <span class="s">}</span> 259 <span class="k">return</span> <span class="i">@DataLines</span><span class="sc">;</span> 260 <span class="s">}</span> 261 262 <span class="c"># Parse data field header in SD file and return lists of all and common data field</span> 263 <span class="c"># labels.</span> <a name="GetAllAndCommonCmpdDataHeaderLabels-"></a> 264 <span class="k">sub </span><span class="m">GetAllAndCommonCmpdDataHeaderLabels</span> <span class="s">{</span> 265 <span class="k">my</span><span class="s">(</span><span class="i">$SDFileRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 266 <span class="k">my</span><span class="s">(</span><span class="i">$CmpdCount</span><span class="cm">,</span> <span class="i">$CmpdString</span><span class="cm">,</span> <span class="i">$Label</span><span class="cm">,</span> <span class="i">@CmpdLines</span><span class="cm">,</span> <span class="i">@DataFieldLabels</span><span class="cm">,</span> <span class="i">@CommonDataFieldLabels</span><span class="cm">,</span> <span class="i">%DataFieldLabelsMap</span><span class="s">)</span><span class="sc">;</span> 267 268 <span class="i">$CmpdCount</span> = <span class="n">0</span><span class="sc">;</span> 269 <span class="i">@DataFieldLabels</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 270 <span class="i">@CommonDataFieldLabels</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 271 <span class="i">%DataFieldLabelsMap</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 272 273 <span class="k">while</span> <span class="s">(</span><span class="i">$CmpdString</span> = <span class="i">ReadCmpdString</span><span class="s">(</span><span class="i">$SDFileRef</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> 274 <span class="i">$CmpdCount</span>++<span class="sc">;</span> 275 <span class="i">@CmpdLines</span> = <span class="k">split</span> <span class="q">"\n"</span><span class="cm">,</span> <span class="i">$CmpdString</span><span class="sc">;</span> 276 <span class="c"># Process compound data header labels and figure out which ones are present for</span> 277 <span class="c"># all the compounds...</span> 278 <span class="k">if</span> <span class="s">(</span><span class="i">@DataFieldLabels</span><span class="s">)</span> <span class="s">{</span> 279 <span class="k">my</span> <span class="s">(</span><span class="i">@CmpdDataFieldLabels</span><span class="s">)</span> = <span class="i">GetCmpdDataHeaderLabels</span><span class="s">(</span>\<span class="i">@CmpdLines</span><span class="s">)</span><span class="sc">;</span> 280 <span class="k">my</span><span class="s">(</span><span class="i">%CmpdDataFieldLabelsMap</span><span class="s">)</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 281 <span class="c"># Setup a map for the current labels...</span> 282 <span class="k">for</span> <span class="i">$Label</span> <span class="s">(</span><span class="i">@CmpdDataFieldLabels</span><span class="s">)</span> <span class="s">{</span> 283 <span class="i">$CmpdDataFieldLabelsMap</span>{<span class="i">$Label</span>} = <span class="q">"PresentInSome"</span><span class="sc">;</span> 284 <span class="s">}</span> 285 <span class="c"># Check the presence old labels for this compound; otherwise, mark 'em new...</span> 286 <span class="k">for</span> <span class="i">$Label</span> <span class="s">(</span><span class="i">@DataFieldLabels</span><span class="s">)</span> <span class="s">{</span> 287 <span class="k">if</span> <span class="s">(</span>!<span class="i">$CmpdDataFieldLabelsMap</span>{<span class="i">$Label</span>}<span class="s">)</span> <span class="s">{</span> 288 <span class="i">$DataFieldLabelsMap</span>{<span class="i">$Label</span>} = <span class="q">"PresentInSome"</span><span class="sc">;</span> 289 <span class="s">}</span> 290 <span class="s">}</span> 291 <span class="c"># Check the presence this compound in the old labels; otherwise, add 'em...</span> 292 <span class="k">for</span> <span class="i">$Label</span> <span class="s">(</span><span class="i">@CmpdDataFieldLabels</span> <span class="s">)</span> <span class="s">{</span> 293 <span class="k">if</span> <span class="s">(</span>!<span class="i">$DataFieldLabelsMap</span>{<span class="i">$Label</span>}<span class="s">)</span> <span class="s">{</span> 294 <span class="c"># It's a new label...</span> 295 <span class="k">push</span> <span class="i">@DataFieldLabels</span><span class="cm">,</span> <span class="i">$Label</span><span class="sc">;</span> 296 <span class="i">$DataFieldLabelsMap</span>{<span class="i">$Label</span>} = <span class="q">"PresentInSome"</span><span class="sc">;</span> 297 <span class="s">}</span> 298 <span class="s">}</span> 299 <span class="s">}</span> 300 <span class="k">else</span> <span class="s">{</span> 301 <span class="c"># Get the initial label set and set up a map...</span> 302 <span class="i">@DataFieldLabels</span> = <span class="i">GetCmpdDataHeaderLabels</span><span class="s">(</span>\<span class="i">@CmpdLines</span><span class="s">)</span><span class="sc">;</span> 303 <span class="k">for</span> <span class="i">$Label</span> <span class="s">(</span><span class="i">@DataFieldLabels</span><span class="s">)</span> <span class="s">{</span> 304 <span class="i">$DataFieldLabelsMap</span>{<span class="i">$Label</span>} = <span class="q">"PresentInAll"</span><span class="sc">;</span> 305 <span class="s">}</span> 306 <span class="s">}</span> 307 <span class="s">}</span> 308 <span class="c"># Identify the common data field labels...</span> 309 <span class="i">@CommonDataFieldLabels</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 310 <span class="k">for</span> <span class="i">$Label</span> <span class="s">(</span><span class="i">@DataFieldLabels</span><span class="s">)</span> <span class="s">{</span> 311 <span class="k">if</span> <span class="s">(</span><span class="i">$DataFieldLabelsMap</span>{<span class="i">$Label</span>} <span class="k">eq</span> <span class="q">"PresentInAll"</span><span class="s">)</span> <span class="s">{</span> 312 <span class="k">push</span> <span class="i">@CommonDataFieldLabels</span><span class="cm">,</span> <span class="i">$Label</span><span class="sc">;</span> 313 <span class="s">}</span> 314 <span class="s">}</span> 315 <span class="k">return</span> <span class="s">(</span><span class="i">$CmpdCount</span><span class="cm">,</span> \<span class="i">@DataFieldLabels</span><span class="cm">,</span> \<span class="i">@CommonDataFieldLabels</span><span class="s">)</span><span class="sc">;</span> 316 <span class="s">}</span> 317 318 <span class="c"># Parse all the data header labels and return 'em as an list...</span> 319 <span class="c">#</span> 320 <span class="c"># Format:</span> 321 <span class="c">#</span> 322 <span class="c">#> Data header line</span> 323 <span class="c">#Data line(s)</span> 324 <span class="c">#Blank line</span> 325 <span class="c">#</span> 326 <span class="c"># [Data Header] (one line) precedes each item of data, starts with a greater than (>) sign, and</span> 327 <span class="c"># contains at least one of the following:</span> 328 <span class="c"># The field name enclosed in angle brackets. For example: <melting.point></span> 329 <span class="c"># The field number, DTn , where n represents the number assigned to the field in a MACCS-II database</span> 330 <span class="c">#</span> 331 <span class="c">#Optional information for the data header includes:</span> 332 <span class="c"># The compound’s external and internal registry numbers. External registry numbers must be enclosed in parentheses.</span> 333 <span class="c"># Any combination of information</span> 334 <span class="c">#</span> 335 <span class="c">#The following are examples of valid data headers:</span> 336 <span class="c">#> <MELTING.POINT></span> 337 <span class="c">#> 55 (MD-08974) <BOILING.POINT> DT12</span> 338 <span class="c">#> DT12 55</span> 339 <span class="c">#> (MD-0894) <BOILING.POINT> FROM ARCHIVES</span> 340 <span class="c">#</span> 341 <span class="c">#Notes: Sometimes last blank line is missing and can be just followed by $$$$</span> 342 <span class="c">#</span> <a name="GetCmpdDataHeaderLabels-"></a> 343 <span class="k">sub </span><span class="m">GetCmpdDataHeaderLabels</span> <span class="s">{</span> 344 <span class="k">my</span><span class="s">(</span><span class="i">$CmpdLines</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 345 <span class="k">my</span><span class="s">(</span><span class="i">$CmpdLine</span><span class="cm">,</span> <span class="i">$Label</span><span class="cm">,</span> <span class="i">@Labels</span><span class="s">)</span><span class="sc">;</span> 346 347 <span class="i">@Labels</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 348 <span class="j">CMPDLINE:</span> <span class="k">for</span> <span class="i">$CmpdLine</span> <span class="s">(</span><span class="i">@$CmpdLines</span><span class="s">)</span> <span class="s">{</span> 349 <span class="k">if</span> <span class="s">(</span><span class="i">$CmpdLine</span> !~ <span class="q">/^>/</span><span class="s">)</span> <span class="s">{</span> 350 <span class="k">next</span> <span class="j">CMPDLINE</span><span class="sc">;</span> 351 <span class="s">}</span> 352 <span class="c"># Does the line contains field name enclosed in angular brackets?</span> 353 <span class="s">(</span><span class="i">$Label</span><span class="s">)</span> = <span class="i">$CmpdLine</span> =~ <span class="q">/<.*?>/g</span><span class="sc">;</span> 354 <span class="k">if</span> <span class="s">(</span>!<span class="k">defined</span><span class="s">(</span><span class="i">$Label</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> 355 <span class="k">next</span> <span class="j">CMPDLINE</span><span class="sc">;</span> 356 <span class="s">}</span> 357 <span class="i">$Label</span> =~ <span class="q">s/(<|>)//g</span><span class="sc">;</span> 358 <span class="k">push</span> <span class="i">@Labels</span><span class="cm">,</span> <span class="i">$Label</span><span class="sc">;</span> 359 <span class="s">}</span> 360 <span class="k">return</span> <span class="s">(</span><span class="i">@Labels</span><span class="s">)</span><span class="sc">;</span> 361 <span class="s">}</span> 362 363 <span class="c"># Parse all the data header labels and values</span> <a name="GetCmpdDataHeaderLabelsAndValues-"></a> 364 <span class="k">sub </span><span class="m">GetCmpdDataHeaderLabelsAndValues</span> <span class="s">{</span> 365 <span class="k">my</span><span class="s">(</span><span class="i">$CmpdLines</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 366 <span class="k">my</span><span class="s">(</span><span class="i">$CmpdLine</span><span class="cm">,</span> <span class="i">$CurrentLabel</span><span class="cm">,</span> <span class="i">$Label</span><span class="cm">,</span> <span class="i">$Value</span><span class="cm">,</span> <span class="i">$ValueCount</span><span class="cm">,</span> <span class="i">$ProcessingLabelData</span><span class="cm">,</span> <span class="i">@Values</span><span class="cm">,</span> <span class="i">%DataFields</span><span class="s">)</span><span class="sc">;</span> 367 368 <span class="i">%DataFields</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 369 <span class="i">$ProcessingLabelData</span> = <span class="n">0</span><span class="sc">;</span> 370 <span class="i">$ValueCount</span> = <span class="n">0</span><span class="sc">;</span> 371 <span class="j">CMPDLINE:</span> <span class="k">for</span> <span class="i">$CmpdLine</span> <span class="s">(</span><span class="i">@$CmpdLines</span><span class="s">)</span> <span class="s">{</span> 372 <span class="k">if</span> <span class="s">(</span><span class="i">$CmpdLine</span> =~ <span class="q">/^\$\$\$\$/</span><span class="s">)</span> <span class="s">{</span> 373 <span class="k">last</span> <span class="j">CMPDLINE</span><span class="sc">;</span> 374 <span class="s">}</span> 375 <span class="k">if</span> <span class="s">(</span><span class="i">$CmpdLine</span> =~ <span class="q">/^>/</span><span class="s">)</span> <span class="s">{</span> 376 <span class="c"># Does the line contains field name enclosed in angular brackets?</span> 377 <span class="s">(</span><span class="i">$Label</span><span class="s">)</span> = <span class="i">$CmpdLine</span> =~ <span class="q">/<.*?>/g</span><span class="sc">;</span> 378 <span class="k">if</span> <span class="s">(</span><span class="k">defined</span> <span class="i">$Label</span><span class="s">)</span> <span class="s">{</span> 379 <span class="i">$CurrentLabel</span> = <span class="i">$Label</span><span class="sc">;</span> 380 <span class="i">$CurrentLabel</span> =~ <span class="q">s/(<|>)//g</span><span class="sc">;</span> 381 <span class="i">$ProcessingLabelData</span> = <span class="n">0</span><span class="sc">;</span> 382 <span class="i">$ValueCount</span> = <span class="n">0</span><span class="sc">;</span> 383 384 <span class="k">if</span> <span class="s">(</span><span class="i">$CurrentLabel</span><span class="s">)</span> <span class="s">{</span> 385 <span class="i">$ProcessingLabelData</span> = <span class="n">1</span><span class="sc">;</span> 386 <span class="i">$DataFields</span>{<span class="i">$CurrentLabel</span>} = <span class="q">''</span><span class="sc">;</span> 387 <span class="k">next</span> <span class="j">CMPDLINE</span><span class="sc">;</span> 388 <span class="s">}</span> 389 <span class="s">}</span> 390 <span class="k">else</span> <span class="s">{</span> 391 <span class="k">if</span> <span class="s">(</span>!<span class="i">$ProcessingLabelData</span><span class="s">)</span> <span class="s">{</span> 392 <span class="c"># Data line containing no <label> as allowed by SDF format. Just ignore it...</span> 393 <span class="k">next</span> <span class="j">CMPDLINE</span><span class="sc">;</span> 394 <span class="s">}</span> 395 <span class="s">}</span> 396 <span class="s">}</span> 397 <span class="k">if</span> <span class="s">(</span>!<span class="i">$ProcessingLabelData</span><span class="s">)</span> <span class="s">{</span> 398 <span class="k">next</span> <span class="j">CMPDLINE</span><span class="sc">;</span> 399 <span class="s">}</span> 400 <span class="k">if</span> <span class="s">(</span>!<span class="s">(</span><span class="k">defined</span><span class="s">(</span><span class="i">$CmpdLine</span><span class="s">)</span> && <span class="k">length</span><span class="s">(</span><span class="i">$CmpdLine</span><span class="s">)</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> 401 <span class="c"># Blank line terminates value for a label...</span> 402 <span class="i">$CurrentLabel</span> = <span class="q">''</span><span class="sc">;</span> 403 <span class="i">$ValueCount</span> = <span class="n">0</span><span class="sc">;</span> 404 <span class="i">$ProcessingLabelData</span> = <span class="n">0</span><span class="sc">;</span> 405 <span class="k">next</span> <span class="j">CMPDLINE</span><span class="sc">;</span> 406 <span class="s">}</span> 407 <span class="i">$ValueCount</span>++<span class="sc">;</span> 408 <span class="i">$Value</span> = <span class="i">$CmpdLine</span><span class="sc">;</span> 409 410 <span class="k">if</span> <span class="s">(</span><span class="i">$ValueCount</span> > <span class="n">1</span><span class="s">)</span> <span class="s">{</span> 411 <span class="i">$DataFields</span>{<span class="i">$CurrentLabel</span>} .= <span class="q">"\n"</span> . <span class="i">$Value</span><span class="sc">;</span> 412 <span class="s">}</span> 413 <span class="k">else</span> <span class="s">{</span> 414 <span class="i">$DataFields</span>{<span class="i">$CurrentLabel</span>} = <span class="i">$Value</span><span class="sc">;</span> 415 <span class="s">}</span> 416 <span class="s">}</span> 417 <span class="k">return</span> <span class="s">(</span><span class="i">%DataFields</span><span class="s">)</span><span class="sc">;</span> 418 <span class="s">}</span> 419 420 <span class="c"># Return an updated compoud string after removing data header label along with its</span> 421 <span class="c"># value from the specified compound string...</span> 422 <span class="c">#</span> <a name="RemoveCmpdDataHeaderLabelAndValue-"></a> 423 <span class="k">sub </span><span class="m">RemoveCmpdDataHeaderLabelAndValue</span> <span class="s">{</span> 424 <span class="k">my</span><span class="s">(</span><span class="i">$CmpdString</span><span class="cm">,</span> <span class="i">$DataHeaderLabel</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 425 <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="cm">,</span> <span class="i">$PorcessingDataHeaderLabel</span><span class="cm">,</span> <span class="i">@CmpdLines</span><span class="s">)</span><span class="sc">;</span> 426 427 <span class="i">@CmpdLines</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 428 <span class="i">$PorcessingDataHeaderLabel</span> = <span class="n">0</span><span class="sc">;</span> 429 430 <span class="j">CMPDLINE:</span> <span class="k">for</span> <span class="i">$Line</span> <span class="s">(</span><span class="k">split</span> <span class="q">"\n"</span><span class="cm">,</span> <span class="i">$CmpdString</span><span class="s">)</span> <span class="s">{</span> 431 <span class="k">if</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^>/</span> && <span class="i">$Line</span> =~ <span class="q">/<$DataHeaderLabel>/i</span><span class="s">)</span> <span class="s">{</span> 432 <span class="i">$PorcessingDataHeaderLabel</span> = <span class="n">1</span><span class="sc">;</span> 433 <span class="k">next</span> <span class="j">CMPDLINE</span><span class="sc">;</span> 434 <span class="s">}</span> 435 436 <span class="k">if</span> <span class="s">(</span><span class="i">$PorcessingDataHeaderLabel</span><span class="s">)</span> <span class="s">{</span> 437 <span class="c"># Blank line indicates end of fingerprints data value...</span> 438 <span class="k">if</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^\$\$\$\$/</span><span class="s">)</span> <span class="s">{</span> 439 <span class="k">push</span> <span class="i">@CmpdLines</span><span class="cm">,</span> <span class="i">$Line</span><span class="sc">;</span> 440 <span class="i">$PorcessingDataHeaderLabel</span> = <span class="n">0</span><span class="sc">;</span> 441 <span class="s">}</span> 442 <span class="k">elsif</span> <span class="s">(</span>!<span class="k">length</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> 443 <span class="i">$PorcessingDataHeaderLabel</span> = <span class="n">0</span><span class="sc">;</span> 444 <span class="s">}</span> 445 <span class="k">next</span> <span class="j">CMPDLINE</span><span class="sc">;</span> 446 <span class="s">}</span> 447 448 <span class="c"># Track compound lines without fingerprints data...</span> 449 <span class="k">push</span> <span class="i">@CmpdLines</span><span class="cm">,</span> <span class="i">$Line</span><span class="sc">;</span> 450 <span class="s">}</span> 451 452 <span class="k">return</span> <span class="k">join</span> <span class="q">"\n"</span><span class="cm">,</span> <span class="i">@CmpdLines</span><span class="sc">;</span> 453 <span class="s">}</span> 454 455 <span class="c">#</span> 456 <span class="c"># Using bond blocks, figure out the number of disconnected fragments and</span> 457 <span class="c"># return their values along with the atom numbers in a string delimited by new</span> 458 <span class="c"># line character.</span> 459 <span class="c">#</span> <a name="GetCmpdFragments-"></a> 460 <span class="k">sub </span><span class="m">GetCmpdFragments</span> <span class="s">{</span> 461 <span class="k">my</span><span class="s">(</span><span class="i">$CmpdLines</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 462 <span class="k">my</span><span class="s">(</span><span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="cm">,</span> <span class="i">$FirstAtomNum</span><span class="cm">,</span> <span class="i">$SecondAtomNum</span><span class="cm">,</span> <span class="i">@AtomConnections</span><span class="cm">,</span> <span class="i">$BondType</span><span class="cm">,</span> <span class="i">$FragmentString</span><span class="cm">,</span> <span class="i">$FragmentCount</span><span class="cm">,</span> <span class="i">$LineIndex</span><span class="cm">,</span> <span class="i">$Index</span><span class="cm">,</span> <span class="i">$AtomNum</span><span class="cm">,</span> <span class="i">$NbrAtomNum</span><span class="cm">,</span> <span class="i">@ProcessedAtoms</span><span class="cm">,</span> <span class="i">$ProcessedAtomCount</span><span class="cm">,</span> <span class="i">$ProcessAtomNum</span><span class="cm">,</span> <span class="i">@ProcessingAtoms</span><span class="cm">,</span> <span class="i">@ConnectedAtoms</span><span class="cm">,</span> <span class="i">%Fragments</span><span class="cm">,</span> <span class="i">$FragmentNum</span><span class="cm">,</span> <span class="i">$AFragmentString</span><span class="s">)</span><span class="sc">;</span> 463 464 <span class="c"># Setup the connection table for each atom...</span> 465 <span class="i">@AtomConnections</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 466 <span class="s">(</span><span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="s">)</span> = <span class="i">ParseCmpdCountsLine</span><span class="s">(</span><span class="i">@$CmpdLines</span>[<span class="n">3</span>]<span class="s">)</span><span class="sc">;</span> 467 <span class="k">for</span> <span class="i">$AtomNum</span> <span class="s">(</span><span class="n">1</span> .. <span class="i">$AtomCount</span><span class="s">)</span> <span class="s">{</span> 468 <span class="i">%</span>{<span class="i">$AtomConnections</span>[<span class="i">$AtomNum</span>]} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 469 <span class="s">}</span> 470 <span class="k">for</span> <span class="s">(</span><span class="i">$LineIndex</span> = <span class="n">4</span> + <span class="i">$AtomCount</span><span class="sc">;</span> <span class="i">$LineIndex</span> < <span class="s">(</span><span class="n">4</span> + <span class="i">$AtomCount</span> + <span class="i">$BondCount</span><span class="s">)</span><span class="sc">;</span> <span class="i">$LineIndex</span>++<span class="s">)</span> <span class="s">{</span> 471 <span class="s">(</span><span class="i">$FirstAtomNum</span><span class="cm">,</span> <span class="i">$SecondAtomNum</span><span class="cm">,</span> <span class="i">$BondType</span><span class="s">)</span> = <span class="i">ParseCmpdBondLine</span><span class="s">(</span><span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="s">)</span><span class="sc">;</span> 472 <span class="k">if</span> <span class="s">(</span>!<span class="i">$AtomConnections</span>[<span class="i">$FirstAtomNum</span>]{<span class="i">$SecondAtomNum</span>}<span class="s">)</span> <span class="s">{</span> 473 <span class="i">$AtomConnections</span>[<span class="i">$FirstAtomNum</span>]{<span class="i">$SecondAtomNum</span>} = <span class="i">$BondType</span><span class="sc">;</span> 474 <span class="s">}</span> 475 <span class="k">if</span> <span class="s">(</span>!<span class="i">$AtomConnections</span>[<span class="i">$SecondAtomNum</span>]{<span class="i">$FirstAtomNum</span>}<span class="s">)</span> <span class="s">{</span> 476 <span class="i">$AtomConnections</span>[<span class="i">$SecondAtomNum</span>]{<span class="i">$FirstAtomNum</span>} = <span class="i">$BondType</span><span class="sc">;</span> 477 <span class="s">}</span> 478 <span class="s">}</span> 479 480 <span class="c">#Get set to count fragments...</span> 481 <span class="i">$ProcessedAtomCount</span> = <span class="n">0</span><span class="sc">;</span> 482 <span class="i">$FragmentNum</span> = <span class="n">0</span><span class="sc">;</span> 483 <span class="i">%Fragments</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 484 <span class="i">@ProcessedAtoms</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 485 <span class="k">for</span> <span class="i">$AtomNum</span> <span class="s">(</span><span class="n">1</span> .. <span class="i">$AtomCount</span><span class="s">)</span> <span class="s">{</span> 486 <span class="i">$ProcessedAtoms</span>[<span class="i">$AtomNum</span>] = <span class="n">0</span><span class="sc">;</span> 487 <span class="s">}</span> 488 <span class="k">while</span> <span class="s">(</span><span class="i">$ProcessedAtomCount</span> < <span class="i">$AtomCount</span><span class="s">)</span> <span class="s">{</span> 489 <span class="i">@ProcessingAtoms</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 490 <span class="i">@ConnectedAtoms</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 491 <span class="j">ATOMNUM:</span> <span class="k">for</span> <span class="i">$AtomNum</span> <span class="s">(</span><span class="n">1</span> .. <span class="i">$AtomCount</span><span class="s">)</span> <span class="s">{</span> 492 <span class="k">if</span> <span class="s">(</span>!<span class="i">$ProcessedAtoms</span>[<span class="i">$AtomNum</span>]<span class="s">)</span> <span class="s">{</span> 493 <span class="i">$ProcessedAtomCount</span>++<span class="sc">;</span> 494 <span class="i">$ProcessedAtoms</span>[<span class="i">$AtomNum</span>] = <span class="n">1</span><span class="sc">;</span> 495 <span class="k">push</span> <span class="i">@ProcessingAtoms</span><span class="cm">,</span> <span class="i">$AtomNum</span><span class="sc">;</span> 496 <span class="i">$FragmentNum</span>++<span class="sc">;</span> 497 <span class="i">@</span>{<span class="i">$Fragments</span>{<span class="i">$FragmentNum</span>} } = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 498 <span class="k">push</span> <span class="i">@</span>{<span class="i">$Fragments</span>{<span class="i">$FragmentNum</span>} }<span class="cm">,</span> <span class="i">$AtomNum</span><span class="sc">;</span> 499 <span class="k">last</span> <span class="j">ATOMNUM</span><span class="sc">;</span> 500 <span class="s">}</span> 501 <span class="s">}</span> 502 503 <span class="c"># Go over the neighbors and follow the connection trail while collecting the</span> 504 <span class="c"># atoms numbers present in the connected fragment...</span> 505 <span class="k">while</span> <span class="s">(</span><span class="i">@ProcessingAtoms</span><span class="s">)</span> <span class="s">{</span> 506 <span class="k">for</span> <span class="s">(</span><span class="i">$Index</span> = <span class="n">0</span><span class="sc">;</span> <span class="i">$Index</span> < <span class="i">@ProcessingAtoms</span><span class="sc">;</span> <span class="i">$Index</span>++<span class="s">)</span> <span class="s">{</span> 507 <span class="i">$ProcessAtomNum</span> = <span class="i">$ProcessingAtoms</span>[<span class="i">$Index</span>]<span class="sc">;</span> 508 <span class="k">for</span> <span class="i">$NbrAtomNum</span> <span class="s">(</span><span class="k">keys</span> <span class="i">%</span>{<span class="i">$AtomConnections</span>[<span class="i">$ProcessAtomNum</span>]}<span class="s">)</span> <span class="s">{</span> 509 <span class="k">if</span> <span class="s">(</span>!<span class="i">$ProcessedAtoms</span>[<span class="i">$NbrAtomNum</span>]<span class="s">)</span> <span class="s">{</span> 510 <span class="i">$ProcessedAtomCount</span>++<span class="sc">;</span> 511 <span class="i">$ProcessedAtoms</span>[<span class="i">$NbrAtomNum</span>] = <span class="n">1</span><span class="sc">;</span> 512 <span class="k">push</span> <span class="i">@ConnectedAtoms</span><span class="cm">,</span> <span class="i">$NbrAtomNum</span><span class="sc">;</span> 513 <span class="k">push</span> <span class="i">@</span>{ <span class="i">$Fragments</span>{<span class="i">$FragmentNum</span>} }<span class="cm">,</span> <span class="i">$NbrAtomNum</span><span class="sc">;</span> 514 <span class="s">}</span> 515 <span class="s">}</span> 516 <span class="s">}</span> 517 <span class="i">@ProcessingAtoms</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 518 <span class="i">@ProcessingAtoms</span> = <span class="i">@ConnectedAtoms</span><span class="sc">;</span> 519 <span class="i">@ConnectedAtoms</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 520 <span class="s">}</span> 521 <span class="s">}</span> 522 <span class="i">$FragmentCount</span> = <span class="i">$FragmentNum</span><span class="sc">;</span> 523 <span class="i">$FragmentString</span> = <span class="q">""</span><span class="sc">;</span> 524 525 <span class="c"># Sort out the fragments by size...</span> 526 <span class="k">for</span> <span class="i">$FragmentNum</span> <span class="s">(</span><span class="k">sort</span> <span class="s">{</span> <span class="i">@</span>{<span class="i">$Fragments</span>{<span class="i">$b</span>}} <=> <span class="i">@</span>{<span class="i">$Fragments</span>{<span class="i">$a</span>}} <span class="s">}</span> <span class="k">keys</span> <span class="i">%Fragments</span> <span class="s">)</span> <span class="s">{</span> 527 <span class="c"># Sort the atoms in a fragment by their numbers...</span> 528 <span class="i">$AFragmentString</span> = <span class="k">join</span> <span class="q">" "</span><span class="cm">,</span> <span class="k">sort</span> <span class="s">{</span> <span class="i">$a</span> <=> <span class="i">$b</span> <span class="s">}</span> <span class="i">@</span>{ <span class="i">$Fragments</span>{<span class="i">$FragmentNum</span>} }<span class="sc">;</span> 529 <span class="k">if</span> <span class="s">(</span><span class="i">$FragmentString</span><span class="s">)</span> <span class="s">{</span> 530 <span class="i">$FragmentString</span> .= <span class="q">"\n"</span> . <span class="i">$AFragmentString</span><span class="sc">;</span> 531 <span class="s">}</span> 532 <span class="k">else</span> <span class="s">{</span> 533 <span class="i">$FragmentString</span> = <span class="i">$AFragmentString</span><span class="sc">;</span> 534 <span class="s">}</span> 535 <span class="s">}</span> 536 <span class="k">return</span> <span class="s">(</span><span class="i">$FragmentCount</span><span class="cm">,</span> <span class="i">$FragmentString</span><span class="s">)</span><span class="sc">;</span> 537 <span class="s">}</span> 538 539 <span class="c"># Count number of lines present in between 4th and line containg "M END"</span> <a name="GetCtabLinesCount-"></a> 540 <span class="k">sub </span><span class="m">GetCtabLinesCount</span> <span class="s">{</span> 541 <span class="k">my</span><span class="s">(</span><span class="i">$CmpdLines</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 542 <span class="k">my</span><span class="s">(</span><span class="i">$LineIndex</span><span class="cm">,</span> <span class="i">$CtabLinesCount</span><span class="s">)</span><span class="sc">;</span> 543 544 <span class="i">$CtabLinesCount</span> = <span class="n">0</span><span class="sc">;</span> 545 <span class="j">LINE:</span> <span class="k">for</span> <span class="s">(</span><span class="i">$LineIndex</span> = <span class="n">4</span><span class="sc">;</span> <span class="i">$LineIndex</span> < <span class="i">@$CmpdLines</span><span class="sc">;</span> <span class="i">$LineIndex</span>++<span class="s">)</span> <span class="s">{</span> 546 <span class="c">#</span> 547 <span class="c"># Any line after atom and bond data starting with anything other than space or</span> 548 <span class="c"># a digit indicates end of Ctab atom/bond data block...</span> 549 <span class="c">#</span> 550 <span class="k">if</span> <span class="s">(</span><span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>] !~ <span class="q">/^[0-9 ]/</span><span class="s">)</span> <span class="s">{</span> 551 <span class="i">$CtabLinesCount</span> = <span class="i">$LineIndex</span> - <span class="n">4</span><span class="sc">;</span> 552 <span class="k">last</span> <span class="j">LINE</span><span class="sc">;</span> 553 <span class="s">}</span> 554 <span class="s">}</span> 555 <span class="k">return</span> <span class="i">$CtabLinesCount</span><span class="sc">;</span> 556 <span class="s">}</span> 557 558 <span class="c"># Using atom blocks, count the number of atoms which contain special element</span> 559 <span class="c"># symbols not present in the periodic table.</span> <a name="GetUnknownAtoms-"></a> 560 <span class="k">sub </span><span class="m">GetUnknownAtoms</span> <span class="s">{</span> 561 <span class="k">my</span><span class="s">(</span><span class="i">$CmpdLines</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 562 <span class="k">my</span><span class="s">(</span><span class="i">$UnknownAtomCount</span><span class="cm">,</span> <span class="i">$UnknownAtoms</span><span class="cm">,</span> <span class="i">$UnknownAtomLines</span><span class="cm">,</span> <span class="i">$LineIndex</span><span class="cm">,</span> <span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$AtomSymbol</span><span class="s">)</span><span class="sc">;</span> 563 564 <span class="i">$UnknownAtomCount</span> = <span class="n">0</span><span class="sc">;</span> 565 <span class="i">$UnknownAtoms</span> = <span class="q">""</span><span class="sc">;</span> 566 <span class="i">$UnknownAtomLines</span> = <span class="q">""</span><span class="sc">;</span> 567 <span class="s">(</span><span class="i">$AtomCount</span><span class="s">)</span> = <span class="i">ParseCmpdCountsLine</span><span class="s">(</span><span class="i">@$CmpdLines</span>[<span class="n">3</span>]<span class="s">)</span><span class="sc">;</span> 568 <span class="k">for</span> <span class="s">(</span><span class="i">$LineIndex</span> = <span class="n">4</span><span class="sc">;</span> <span class="i">$LineIndex</span> < <span class="s">(</span><span class="n">4</span> + <span class="i">$AtomCount</span><span class="s">)</span><span class="sc">;</span> <span class="i">$LineIndex</span>++<span class="s">)</span> <span class="s">{</span> 569 <span class="s">(</span><span class="i">$AtomSymbol</span><span class="s">)</span> = <span class="i">ParseCmpdAtomLine</span><span class="s">(</span><span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="s">)</span><span class="sc">;</span> 570 <span class="k">if</span> <span class="s">(</span>!<span class="i">IsElement</span><span class="s">(</span><span class="i">$AtomSymbol</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> 571 <span class="i">$UnknownAtomCount</span>++<span class="sc">;</span> 572 <span class="i">$UnknownAtoms</span> .= <span class="q">" $AtomSymbol"</span><span class="sc">;</span> 573 <span class="k">if</span> <span class="s">(</span><span class="i">$UnknownAtomLines</span><span class="s">)</span> <span class="s">{</span> 574 <span class="i">$UnknownAtomLines</span> .= <span class="q">"\n"</span> . <span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="sc">;</span> 575 <span class="s">}</span> 576 <span class="k">else</span> <span class="s">{</span> 577 <span class="i">$UnknownAtomLines</span> = <span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="sc">;</span> 578 <span class="s">}</span> 579 <span class="s">}</span> 580 <span class="s">}</span> 581 <span class="k">return</span> <span class="s">(</span><span class="i">$UnknownAtomCount</span><span class="cm">,</span> <span class="i">$UnknownAtoms</span><span class="cm">,</span> <span class="i">$UnknownAtomLines</span><span class="s">)</span><span class="sc">;</span> 582 <span class="s">}</span> 583 584 <span class="c"># Check z coordinates of all atoms to see whether any of them is non-zero</span> 585 <span class="c"># which makes the compound geometry three dimensional...</span> 586 <span class="c">#</span> <a name="IsCmpd3D-"></a> 587 <span class="k">sub </span><span class="m">IsCmpd3D</span> <span class="s">{</span> 588 <span class="k">my</span><span class="s">(</span><span class="i">$CmpdLines</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 589 <span class="k">my</span><span class="s">(</span><span class="i">$LineIndex</span><span class="cm">,</span> <span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$AtomSymbol</span><span class="cm">,</span> <span class="i">$AtomX</span><span class="cm">,</span> <span class="i">$AtomY</span><span class="cm">,</span> <span class="i">$AtomZ</span><span class="s">)</span><span class="sc">;</span> 590 591 <span class="s">(</span><span class="i">$AtomCount</span><span class="s">)</span> = <span class="i">ParseCmpdCountsLine</span><span class="s">(</span><span class="i">@$CmpdLines</span>[<span class="n">3</span>]<span class="s">)</span><span class="sc">;</span> 592 <span class="k">for</span> <span class="s">(</span><span class="i">$LineIndex</span> = <span class="n">4</span><span class="sc">;</span> <span class="i">$LineIndex</span> < <span class="s">(</span><span class="n">4</span> + <span class="i">$AtomCount</span><span class="s">)</span><span class="sc">;</span> <span class="i">$LineIndex</span>++<span class="s">)</span> <span class="s">{</span> 593 <span class="s">(</span><span class="i">$AtomSymbol</span><span class="cm">,</span> <span class="i">$AtomX</span><span class="cm">,</span> <span class="i">$AtomY</span><span class="cm">,</span> <span class="i">$AtomZ</span><span class="s">)</span> = <span class="i">ParseCmpdAtomLine</span><span class="s">(</span><span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="s">)</span><span class="sc">;</span> 594 <span class="k">if</span> <span class="s">(</span><span class="i">$AtomZ</span> != <span class="n">0</span><span class="s">)</span> <span class="s">{</span> 595 <span class="k">return</span> <span class="n">1</span><span class="sc">;</span> 596 <span class="s">}</span> 597 <span class="s">}</span> 598 <span class="k">return</span> <span class="n">0</span><span class="sc">;</span> 599 <span class="s">}</span> 600 601 <span class="c"># Check whether it's a 2D compound...</span> 602 <span class="c">#</span> <a name="IsCmpd2D-"></a> 603 <span class="k">sub </span><span class="m">IsCmpd2D</span> <span class="s">{</span> 604 <span class="k">my</span><span class="s">(</span><span class="i">$CmpdLines</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 605 606 <span class="k">return</span> <span class="i">IsCmpd3D</span><span class="s">(</span><span class="i">$CmpdLines</span><span class="s">)</span> ? <span class="n">0</span> <span class="co">:</span> <span class="n">1</span><span class="sc">;</span> 607 <span class="s">}</span> 608 609 <span class="c"># Using bond blocks, count the number of bond lines which contain atom numbers</span> 610 <span class="c"># greater than atom count specified in compound count line...</span> 611 <span class="c">#</span> <a name="GetInvalidAtomNumbers-"></a> 612 <span class="k">sub </span><span class="m">GetInvalidAtomNumbers</span> <span class="s">{</span> 613 <span class="k">my</span><span class="s">(</span><span class="i">$CmpdLines</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 614 <span class="k">my</span><span class="s">(</span><span class="i">$LineIndex</span><span class="cm">,</span> <span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="cm">,</span> <span class="i">$FirstAtomNum</span><span class="cm">,</span> <span class="i">$SecondAtomNum</span><span class="cm">,</span> <span class="i">$InvalidAtomNumbersCount</span><span class="cm">,</span> <span class="i">$InvalidAtomNumbers</span><span class="cm">,</span> <span class="i">$InvalidAtomNumberLines</span><span class="cm">,</span> <span class="i">$Line</span><span class="cm">,</span> <span class="i">$InvalidAtomPropertyLine</span><span class="cm">,</span> <span class="i">$ValuePairIndex</span><span class="cm">,</span> <span class="i">$AtomNum</span><span class="cm">,</span> <span class="i">$Value</span><span class="cm">,</span> <span class="i">@ValuePairs</span><span class="s">)</span><span class="sc">;</span> 615 616 <span class="s">(</span><span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="s">)</span> = <span class="i">ParseCmpdCountsLine</span><span class="s">(</span><span class="i">@$CmpdLines</span>[<span class="n">3</span>]<span class="s">)</span><span class="sc">;</span> 617 618 <span class="i">$InvalidAtomNumbersCount</span> = <span class="n">0</span><span class="sc">;</span> 619 <span class="i">$InvalidAtomNumbers</span> = <span class="q">""</span><span class="sc">;</span> 620 <span class="i">$InvalidAtomNumberLines</span> = <span class="q">""</span><span class="sc">;</span> 621 622 <span class="c"># Go over bond block lines...</span> 623 <span class="j">LINE:</span> <span class="k">for</span> <span class="s">(</span><span class="i">$LineIndex</span> = <span class="n">4</span> + <span class="i">$AtomCount</span><span class="sc">;</span> <span class="i">$LineIndex</span> < <span class="s">(</span><span class="n">4</span> + <span class="i">$AtomCount</span> + <span class="i">$BondCount</span><span class="s">)</span><span class="sc">;</span> <span class="i">$LineIndex</span>++<span class="s">)</span> <span class="s">{</span> 624 <span class="s">(</span><span class="i">$FirstAtomNum</span><span class="cm">,</span> <span class="i">$SecondAtomNum</span><span class="s">)</span> = <span class="i">ParseCmpdBondLine</span><span class="s">(</span><span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="s">)</span><span class="sc">;</span> 625 <span class="k">if</span> <span class="s">(</span><span class="i">$FirstAtomNum</span> <= <span class="i">$AtomCount</span> && <span class="i">$SecondAtomNum</span> <= <span class="i">$AtomCount</span><span class="s">)</span> <span class="s">{</span> 626 <span class="k">next</span> <span class="j">LINE</span><span class="sc">;</span> 627 <span class="s">}</span> 628 <span class="k">if</span> <span class="s">(</span><span class="i">$FirstAtomNum</span> > <span class="i">$AtomCount</span><span class="s">)</span> <span class="s">{</span> 629 <span class="i">$InvalidAtomNumbersCount</span>++<span class="sc">;</span> 630 <span class="i">$InvalidAtomNumbers</span> .= <span class="q">" $FirstAtomNum"</span><span class="sc">;</span> 631 <span class="s">}</span> 632 <span class="k">if</span> <span class="s">(</span><span class="i">$SecondAtomNum</span> > <span class="i">$AtomCount</span><span class="s">)</span> <span class="s">{</span> 633 <span class="i">$InvalidAtomNumbersCount</span>++<span class="sc">;</span> 634 <span class="i">$InvalidAtomNumbers</span> .= <span class="q">" $SecondAtomNum"</span><span class="sc">;</span> 635 <span class="s">}</span> 636 <span class="k">if</span> <span class="s">(</span><span class="i">$InvalidAtomNumberLines</span><span class="s">)</span> <span class="s">{</span> 637 <span class="i">$InvalidAtomNumberLines</span> .= <span class="q">"\n"</span> . <span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="sc">;</span> 638 <span class="s">}</span> 639 <span class="k">else</span> <span class="s">{</span> 640 <span class="i">$InvalidAtomNumberLines</span> = <span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="sc">;</span> 641 <span class="s">}</span> 642 <span class="s">}</span> 643 <span class="c"># Go over property lines before M END...</span> 644 <span class="c">#</span> 645 <span class="j">LINE:</span> <span class="k">for</span> <span class="s">(</span><span class="i">$LineIndex</span> = <span class="s">(</span><span class="n">4</span> + <span class="i">$AtomCount</span> + <span class="i">$BondCount</span><span class="s">)</span><span class="sc">;</span> <span class="i">$LineIndex</span> < <span class="i">@$CmpdLines</span><span class="sc">;</span> <span class="i">$LineIndex</span>++<span class="s">)</span> <span class="s">{</span> 646 <span class="i">$Line</span> = <span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="sc">;</span> 647 <span class="i">@ValuePairs</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 648 <span class="k">if</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^M END/i</span><span class="s">)</span> <span class="s">{</span> 649 <span class="k">last</span> <span class="j">LINE</span><span class="sc">;</span> 650 <span class="s">}</span> 651 <span class="i">@ValuePairs</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 652 <span class="k">if</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^M CHG/i</span><span class="s">)</span> <span class="s">{</span> 653 <span class="i">@ValuePairs</span> = <span class="i">ParseCmpdChargePropertyLine</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> 654 <span class="s">}</span> 655 <span class="k">elsif</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^M RAD/i</span><span class="s">)</span> <span class="s">{</span> 656 <span class="i">@ValuePairs</span> = <span class="i">ParseCmpdRadicalPropertyLine</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> 657 <span class="s">}</span> 658 <span class="k">elsif</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^M ISO/i</span><span class="s">)</span> <span class="s">{</span> 659 <span class="i">@ValuePairs</span> = <span class="i">ParseCmpdIsotopePropertyLine</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> 660 <span class="s">}</span> 661 <span class="k">elsif</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^A /i</span><span class="s">)</span> <span class="s">{</span> 662 <span class="k">my</span><span class="s">(</span><span class="i">$NextLine</span><span class="s">)</span><span class="sc">;</span> 663 <span class="i">$LineIndex</span>++<span class="sc">;</span> 664 <span class="i">$NextLine</span> = <span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="sc">;</span> 665 <span class="i">@ValuePairs</span> = <span class="i">ParseCmpdAtomAliasPropertyLine</span><span class="s">(</span><span class="i">$Line</span><span class="cm">,</span> <span class="i">$NextLine</span><span class="s">)</span><span class="sc">;</span> 666 <span class="s">}</span> 667 <span class="k">else</span> <span class="s">{</span> 668 <span class="k">next</span> <span class="j">LINE</span><span class="sc">;</span> 669 <span class="s">}</span> 670 671 <span class="i">$InvalidAtomPropertyLine</span> = <span class="n">0</span><span class="sc">;</span> 672 <span class="k">for</span> <span class="s">(</span><span class="i">$ValuePairIndex</span> = <span class="n">0</span><span class="sc">;</span> <span class="i">$ValuePairIndex</span> < <span class="i">$#ValuePairs</span><span class="sc">;</span> <span class="i">$ValuePairIndex</span> += <span class="n">2</span><span class="s">)</span> <span class="s">{</span> 673 <span class="i">$AtomNum</span> = <span class="i">$ValuePairs</span>[<span class="i">$ValuePairIndex</span>]<span class="sc">;</span> <span class="i">$Value</span> = <span class="i">$ValuePairs</span>[<span class="i">$ValuePairIndex</span> + <span class="n">1</span>]<span class="sc">;</span> 674 <span class="k">if</span> <span class="s">(</span><span class="i">$AtomNum</span> > <span class="i">$AtomCount</span><span class="s">)</span> <span class="s">{</span> 675 <span class="i">$InvalidAtomPropertyLine</span> = <span class="n">1</span><span class="sc">;</span> 676 <span class="i">$InvalidAtomNumbersCount</span>++<span class="sc">;</span> 677 <span class="i">$InvalidAtomNumbers</span> .= <span class="q">" $AtomNum"</span><span class="sc">;</span> 678 <span class="s">}</span> 679 <span class="s">}</span> 680 <span class="k">if</span> <span class="s">(</span><span class="i">$InvalidAtomPropertyLine</span><span class="s">)</span> <span class="s">{</span> 681 <span class="k">if</span> <span class="s">(</span><span class="i">$InvalidAtomNumberLines</span><span class="s">)</span> <span class="s">{</span> 682 <span class="i">$InvalidAtomNumberLines</span> .= <span class="q">"\n"</span> . <span class="i">$Line</span><span class="sc">;</span> 683 <span class="s">}</span> 684 <span class="k">else</span> <span class="s">{</span> 685 <span class="i">$InvalidAtomNumberLines</span> = <span class="i">$Line</span><span class="sc">;</span> 686 <span class="s">}</span> 687 <span class="s">}</span> 688 <span class="s">}</span> 689 690 <span class="k">return</span> <span class="s">(</span><span class="i">$InvalidAtomNumbersCount</span><span class="cm">,</span> <span class="i">$InvalidAtomNumbers</span><span class="cm">,</span> <span class="i">$InvalidAtomNumberLines</span><span class="s">)</span><span class="sc">;</span> 691 <span class="s">}</span> 692 693 <span class="c"># Ctab lines: Atom block</span> 694 <span class="c">#</span> 695 <span class="c"># Format: xxxxx.xxxxyyyyy.yyyyzzzzz.zzzz aaaddcccssshhhbbbvvvHHHrrriiimmmnnneee</span> 696 <span class="c"># A10 A10 A10 xA3 A2A3 A3 A3 A3 A3 A3 A3 A3 A3 A3 A3</span> 697 <span class="c"># x,y,z: Atom coordinates</span> 698 <span class="c"># aaa: Atom symbol. Entry in periodic table or L for atom list, A, Q, * for unspecified</span> 699 <span class="c"># atom, and LP for lone pair, or R# for Rgroup label</span> 700 <span class="c"># dd: Mass difference. -3, -2, -1, 0, 1, 2, 3, 4 (0 for value beyond these limits)</span> 701 <span class="c"># ccc: Charge. 0 = uncharged or value other than these, 1 = +3, 2 = +2, 3 = +1,</span> 702 <span class="c"># 4 = doublet radical, 5 = -1, 6 = -2, 7 = -3</span> 703 <span class="c"># sss: Atom stereo parity. 0 = not stereo, 1 = odd, 2 = even, 3 = either or unmarked stereo center</span> 704 <span class="c"># hhh: Hydrogen count + 1. 1 = H0, 2 = H1, 3 = H2, 4 = H3, 5 = H4</span> 705 <span class="c"># bbb: Stereo care box. 0 = ignore stereo configuration of this double bond atom, 1 = stereo</span> 706 <span class="c"># configuration of double bond atom must match</span> 707 <span class="c"># vvv: Valence. 0 = no marking (default)(1 to 14) = (1 to 14) 15 = zero valence</span> 708 <span class="c"># HHH: H0 designator. 0 = not specified, 1 = no H atoms allowed (redundant due to hhh)</span> 709 <span class="c"># rrr: Not used</span> 710 <span class="c"># iii: Not used</span> 711 <span class="c"># mmm: Atom-atom mapping number. 1 - number of atoms</span> 712 <span class="c"># nnn: Inversion/retention flag. 0 = property not applied, 1 = configuration is inverted,</span> 713 <span class="c"># 2 = configuration is retained.</span> 714 <span class="c"># eee: Exact change flag. 0 = property not applied, 1 = change on atom must be</span> 715 <span class="c"># exactly as shown</span> 716 <span class="c">#</span> 717 <span class="c"># Notes:</span> 718 <span class="c"># . StereoParity: 1 - ClockwiseStereo, 2 - AntiClockwiseStereo; 3 - Either; 0 - none. These</span> 719 <span class="c"># values determine chirailty around the chiral center; a non zero value indicates atom</span> 720 <span class="c"># has been marked as chiral center.</span> 721 <span class="c">#</span> <a name="ParseCmpdAtomLine-"></a> 722 <span class="k">sub </span><span class="m">ParseCmpdAtomLine</span> <span class="s">{</span> 723 <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 724 <span class="k">my</span> <span class="s">(</span><span class="i">$LineIndex</span><span class="cm">,</span> <span class="i">$AtomX</span><span class="cm">,</span> <span class="i">$AtomY</span><span class="cm">,</span> <span class="i">$AtomZ</span><span class="cm">,</span> <span class="i">$AtomSymbol</span><span class="cm">,</span> <span class="i">$MassDifference</span><span class="cm">,</span> <span class="i">$Charge</span><span class="cm">,</span> <span class="i">$StereoParity</span><span class="s">)</span><span class="sc">;</span> 725 726 <span class="s">(</span><span class="i">$AtomX</span><span class="cm">,</span> <span class="i">$AtomY</span><span class="cm">,</span> <span class="i">$AtomZ</span><span class="cm">,</span> <span class="i">$AtomSymbol</span><span class="cm">,</span> <span class="i">$MassDifference</span><span class="cm">,</span> <span class="i">$Charge</span><span class="cm">,</span> <span class="i">$StereoParity</span><span class="s">)</span> = <span class="s">(</span><span class="q">''</span><span class="s">)</span> x <span class="n">7</span><span class="sc">;</span> 727 <span class="k">if</span> <span class="s">(</span><span class="k">length</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span> > <span class="n">31</span><span class="s">)</span> <span class="s">{</span> 728 <span class="s">(</span><span class="i">$AtomX</span><span class="cm">,</span> <span class="i">$AtomY</span><span class="cm">,</span> <span class="i">$AtomZ</span><span class="cm">,</span> <span class="i">$AtomSymbol</span><span class="cm">,</span> <span class="i">$MassDifference</span><span class="cm">,</span> <span class="i">$Charge</span><span class="cm">,</span> <span class="i">$StereoParity</span><span class="s">)</span> = <span class="k">unpack</span><span class="s">(</span><span class="q">"A10A10A10xA3A2A3A3"</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> 729 <span class="s">}</span> 730 <span class="k">else</span> <span class="s">{</span> 731 <span class="s">(</span><span class="i">$AtomX</span><span class="cm">,</span> <span class="i">$AtomY</span><span class="cm">,</span> <span class="i">$AtomZ</span><span class="cm">,</span> <span class="i">$AtomSymbol</span><span class="s">)</span> = <span class="k">unpack</span><span class="s">(</span><span class="q">"A10A10A10"</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> 732 <span class="s">}</span> 733 <span class="k">return</span> <span class="s">(</span><span class="i">$AtomSymbol</span><span class="cm">,</span> <span class="i">$AtomX</span><span class="cm">,</span> <span class="i">$AtomY</span><span class="cm">,</span> <span class="i">$AtomZ</span><span class="cm">,</span> <span class="i">$MassDifference</span><span class="cm">,</span> <span class="i">$Charge</span><span class="cm">,</span> <span class="i">$StereoParity</span><span class="s">)</span><span class="sc">;</span> 734 <span class="s">}</span> 735 736 <span class="c"># Map MDL charge value used in SD and MOL files to internal charge used by MayaChemTools.</span> 737 <span class="c">#</span> <a name="MDLChargeToInternalCharge-"></a> 738 <span class="k">sub </span><span class="m">MDLChargeToInternalCharge</span> <span class="s">{</span> 739 <span class="k">my</span><span class="s">(</span><span class="i">$MDLCharge</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 740 <span class="k">my</span><span class="s">(</span><span class="i">$InternalCharge</span><span class="s">)</span><span class="sc">;</span> 741 742 <span class="j">CHARGE:</span> <span class="s">{</span> 743 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLCharge</span> == <span class="n">0</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalCharge</span> = <span class="n">0</span><span class="sc">;</span> <span class="k">last</span> <span class="j">CHARGE</span><span class="sc">;</span><span class="s">}</span> 744 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLCharge</span> == <span class="n">1</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalCharge</span> = <span class="n">3</span><span class="sc">;</span> <span class="k">last</span> <span class="j">CHARGE</span><span class="sc">;</span><span class="s">}</span> 745 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLCharge</span> == <span class="n">2</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalCharge</span> = <span class="n">2</span><span class="sc">;</span> <span class="k">last</span> <span class="j">CHARGE</span><span class="sc">;</span><span class="s">}</span> 746 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLCharge</span> == <span class="n">3</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalCharge</span> = <span class="n">1</span><span class="sc">;</span> <span class="k">last</span> <span class="j">CHARGE</span><span class="sc">;</span><span class="s">}</span> 747 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLCharge</span> == <span class="n">5</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalCharge</span> = <span class="n">-1</span><span class="sc">;</span> <span class="k">last</span> <span class="j">CHARGE</span><span class="sc">;</span><span class="s">}</span> 748 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLCharge</span> == <span class="n">6</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalCharge</span> = <span class="n">-2</span><span class="sc">;</span> <span class="k">last</span> <span class="j">CHARGE</span><span class="sc">;</span><span class="s">}</span> 749 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLCharge</span> == <span class="n">7</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalCharge</span> = <span class="n">-3</span><span class="sc">;</span> <span class="k">last</span> <span class="j">CHARGE</span><span class="sc">;</span><span class="s">}</span> 750 <span class="c"># All other MDL charge values, including 4 corresponding to "doublet radical",</span> 751 <span class="c"># are assigned internal value of 0.</span> 752 <span class="i">$InternalCharge</span> = <span class="n">0</span><span class="sc">;</span> 753 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLCharge</span> != <span class="n">4</span><span class="s">)</span> <span class="s">{</span> 754 <span class="w">carp</span> <span class="q">"Warning: MDLChargeToInternalCharge: MDL charge value, $MDLCharge, is not supported: An internal charge value, 0, has been assigned..."</span><span class="sc">;</span> 755 <span class="s">}</span> 756 <span class="s">}</span> 757 <span class="k">return</span> <span class="i">$InternalCharge</span><span class="sc">;</span> 758 <span class="s">}</span> 759 760 <span class="c"># Map internal charge used by MayaChemTools to MDL charge value used in SD and MOL files.</span> 761 <span class="c">#</span> <a name="InternalChargeToMDLCharge-"></a> 762 <span class="k">sub </span><span class="m">InternalChargeToMDLCharge</span> <span class="s">{</span> 763 <span class="k">my</span><span class="s">(</span><span class="i">$InternalCharge</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 764 <span class="k">my</span><span class="s">(</span><span class="i">$MDLCharge</span><span class="s">)</span><span class="sc">;</span> 765 766 <span class="j">CHARGE:</span> <span class="s">{</span> 767 <span class="k">if</span> <span class="s">(</span><span class="i">$InternalCharge</span> == <span class="n">3</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLCharge</span> = <span class="n">1</span><span class="sc">;</span> <span class="k">last</span> <span class="j">CHARGE</span><span class="sc">;</span><span class="s">}</span> 768 <span class="k">if</span> <span class="s">(</span><span class="i">$InternalCharge</span> == <span class="n">2</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLCharge</span> = <span class="n">2</span><span class="sc">;</span> <span class="k">last</span> <span class="j">CHARGE</span><span class="sc">;</span><span class="s">}</span> 769 <span class="k">if</span> <span class="s">(</span><span class="i">$InternalCharge</span> == <span class="n">1</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLCharge</span> = <span class="n">3</span><span class="sc">;</span> <span class="k">last</span> <span class="j">CHARGE</span><span class="sc">;</span><span class="s">}</span> 770 <span class="k">if</span> <span class="s">(</span><span class="i">$InternalCharge</span> == <span class="n">-1</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLCharge</span> = <span class="n">5</span><span class="sc">;</span> <span class="k">last</span> <span class="j">CHARGE</span><span class="sc">;</span><span class="s">}</span> 771 <span class="k">if</span> <span class="s">(</span><span class="i">$InternalCharge</span> == <span class="n">-2</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLCharge</span> = <span class="n">6</span><span class="sc">;</span> <span class="k">last</span> <span class="j">CHARGE</span><span class="sc">;</span><span class="s">}</span> 772 <span class="k">if</span> <span class="s">(</span><span class="i">$InternalCharge</span> == <span class="n">-3</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLCharge</span> = <span class="n">7</span><span class="sc">;</span> <span class="k">last</span> <span class="j">CHARGE</span><span class="sc">;</span><span class="s">}</span> 773 <span class="c"># All other MDL charge values, including 4 corresponding to "doublet radical",</span> 774 <span class="c"># are assigned internal value of 0.</span> 775 <span class="i">$MDLCharge</span> = <span class="n">0</span><span class="sc">;</span> 776 <span class="s">}</span> 777 <span class="k">return</span> <span class="i">$MDLCharge</span><span class="sc">;</span> 778 <span class="s">}</span> 779 780 <span class="c"># Ctab lines: Bond block</span> 781 <span class="c">#</span> 782 <span class="c"># Format: 111222tttsssxxxrrrccc</span> 783 <span class="c">#</span> 784 <span class="c"># 111: First atom number.</span> 785 <span class="c"># 222: Second atom number.</span> 786 <span class="c"># ttt: Bond type. 1 = Single, 2 = Double, 3 = Triple, 4 = Aromatic, 5 = Single or Double,</span> 787 <span class="c"># 6 = Single or Aromatic, 7 = Double or Aromatic, 8 = Any</span> 788 <span class="c"># sss: Bond stereo. Single bonds: 0 = not stereo, 1 = Up, 4 = Either, 6 = Down,</span> 789 <span class="c"># Double bonds: 0 = Use x-, y-, z-coords from atom block to determine cis or trans,</span> 790 <span class="c"># 3 = Cis or trans (either) double bond</span> 791 <span class="c"># xxx: Not used</span> 792 <span class="c"># rrr: Bond topology. 0 = Either, 1 = Ring, 2 = Chain</span> 793 <span class="c"># ccc: Reacting center status. 0 = unmarked, 1 = a center, -1 = not a center,</span> 794 <span class="c"># Additional: 2 = no change,4 = bond made/broken, 8 = bond order changes 12 = 4+8</span> 795 <span class="c"># (both made/broken and changes); 5 = (4 + 1), 9 = (8 + 1), and 13 = (12 + 1) are also possible</span> 796 <span class="c">#</span> <a name="ParseCmpdBondLine-"></a> 797 <span class="k">sub </span><span class="m">ParseCmpdBondLine</span> <span class="s">{</span> 798 <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 799 <span class="k">my</span><span class="s">(</span><span class="i">$FirstAtomNum</span><span class="cm">,</span> <span class="i">$SecondAtomNum</span><span class="cm">,</span> <span class="i">$BondType</span><span class="cm">,</span> <span class="i">$BondStereo</span><span class="s">)</span><span class="sc">;</span> 800 801 <span class="s">(</span><span class="i">$FirstAtomNum</span><span class="cm">,</span> <span class="i">$SecondAtomNum</span><span class="cm">,</span> <span class="i">$BondType</span><span class="cm">,</span> <span class="i">$BondStereo</span><span class="s">)</span> = <span class="k">map</span> <span class="s">{</span><span class="q">s/ //g</span><span class="sc">;</span> <span class="i">$_</span><span class="s">}</span> <span class="k">unpack</span><span class="s">(</span><span class="q">"A3A3A3A3"</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> 802 <span class="k">return</span> <span class="s">(</span><span class="i">$FirstAtomNum</span><span class="cm">,</span> <span class="i">$SecondAtomNum</span><span class="cm">,</span> <span class="i">$BondType</span><span class="cm">,</span> <span class="i">$BondStereo</span><span class="s">)</span><span class="sc">;</span> 803 <span class="s">}</span> 804 805 <span class="c"># Map MDL bond type value used in SD and MOL files to internal bond order and bond types</span> 806 <span class="c"># values used by MayaChemTools...</span> 807 <span class="c">#</span> <a name="MDLBondTypeToInternalBondOrder-"></a> 808 <span class="k">sub </span><span class="m">MDLBondTypeToInternalBondOrder</span> <span class="s">{</span> 809 <span class="k">my</span><span class="s">(</span><span class="i">$MDLBondType</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 810 <span class="k">my</span><span class="s">(</span><span class="i">$InternalBondOrder</span><span class="cm">,</span> <span class="i">$InternalBondType</span><span class="s">)</span><span class="sc">;</span> 811 812 <span class="i">$InternalBondType</span> = <span class="q">''</span><span class="sc">;</span> 813 814 <span class="j">BONDTYPE:</span> <span class="s">{</span> 815 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLBondType</span> == <span class="n">1</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalBondOrder</span> = <span class="n">1</span><span class="sc">;</span> <span class="i">$InternalBondType</span> = <span class="q">'Single'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDTYPE</span><span class="sc">;</span><span class="s">}</span> 816 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLBondType</span> == <span class="n">2</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalBondOrder</span> = <span class="n">2</span><span class="sc">;</span> <span class="i">$InternalBondType</span> = <span class="q">'Double'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDTYPE</span><span class="sc">;</span><span class="s">}</span> 817 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLBondType</span> == <span class="n">3</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalBondOrder</span> = <span class="n">3</span><span class="sc">;</span> <span class="i">$InternalBondType</span> = <span class="q">'Triple'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDTYPE</span><span class="sc">;</span><span class="s">}</span> 818 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLBondType</span> == <span class="n">4</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalBondOrder</span> = <span class="n">1.5</span><span class="sc">;</span> <span class="i">$InternalBondType</span> = <span class="q">'Aromatic'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDTYPE</span><span class="sc">;</span><span class="s">}</span> <span class="c"># Aromatic</span> 819 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLBondType</span> == <span class="n">5</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalBondOrder</span> = <span class="n">1</span><span class="sc">;</span> <span class="i">$InternalBondType</span> = <span class="q">'SingleOrDouble'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDTYPE</span><span class="sc">;</span><span class="s">}</span> <span class="c"># Aromatic</span> 820 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLBondType</span> == <span class="n">6</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalBondOrder</span> = <span class="n">1</span><span class="sc">;</span> <span class="i">$InternalBondType</span> = <span class="q">'SingleOrAromatic'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDTYPE</span><span class="sc">;</span><span class="s">}</span> <span class="c"># Aromatic</span> 821 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLBondType</span> == <span class="n">7</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalBondOrder</span> = <span class="n">2</span><span class="sc">;</span> <span class="i">$InternalBondType</span> = <span class="q">'DoubleOrAromatic'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDTYPE</span><span class="sc">;</span><span class="s">}</span> <span class="c"># Aromatic</span> 822 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLBondType</span> == <span class="n">8</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalBondOrder</span> = <span class="n">1</span><span class="sc">;</span> <span class="i">$InternalBondType</span> = <span class="q">'Any'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDTYPE</span><span class="sc">;</span><span class="s">}</span> <span class="c"># Aromatic</span> 823 <span class="c">#</span> 824 <span class="c"># Although MDL aromatic bond values are used for query only and explicit Kekule bond order</span> 825 <span class="c"># values must be assigned, internal value of 1.5 is allowed to indicate aromatic bond orders.</span> 826 <span class="c">#</span> 827 <span class="c"># All other MDL bond type values - 5 = Single or Double, 6 = Single or Aromatic, 7 = Double or Aromatic,</span> 828 <span class="c"># 8 = Any - are also assigned appropriate internal value of 1: These are meant to be used for</span> 829 <span class="c"># structure queries by MDL products.</span> 830 <span class="c">#</span> 831 <span class="i">$InternalBondOrder</span> = <span class="n">1</span><span class="sc">;</span> 832 <span class="i">$InternalBondType</span> = <span class="q">'Single'</span><span class="sc">;</span> 833 834 <span class="w">carp</span> <span class="q">"Warning: MDLBondTypeToInternalBondOrder: MDL bond type value, $MDLBondType, is not supported: An internal bond order value, 0, has been assigned..."</span><span class="sc">;</span> 835 <span class="s">}</span> 836 <span class="k">return</span> <span class="s">(</span><span class="i">$InternalBondOrder</span><span class="cm">,</span> <span class="i">$InternalBondType</span><span class="s">)</span><span class="sc">;</span> 837 <span class="s">}</span> 838 839 <span class="c"># Map internal bond order and bond type values used by MayaChemTools to MDL bond type value used</span> 840 <span class="c"># in SD and MOL files...</span> 841 <span class="c">#</span> <a name="InternalBondOrderToMDLBondType-"></a> 842 <span class="k">sub </span><span class="m">InternalBondOrderToMDLBondType</span> <span class="s">{</span> 843 <span class="k">my</span><span class="s">(</span><span class="i">$InternalBondOrder</span><span class="cm">,</span> <span class="i">$InternalBondType</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 844 <span class="k">my</span><span class="s">(</span><span class="i">$MDLBondType</span><span class="s">)</span><span class="sc">;</span> 845 846 <span class="j">BONDTYPE:</span> <span class="s">{</span> 847 <span class="k">if</span> <span class="s">(</span><span class="i">$InternalBondOrder</span> == <span class="n">1</span><span class="s">)</span> <span class="s">{</span> 848 <span class="k">if</span> <span class="s">(</span><span class="i">$InternalBondType</span> =~ <span class="q">/^SingleOrDouble$/i</span><span class="s">)</span> <span class="s">{</span> 849 <span class="i">$MDLBondType</span> = <span class="n">5</span><span class="sc">;</span> 850 <span class="s">}</span> 851 <span class="k">elsif</span> <span class="s">(</span><span class="i">$InternalBondType</span> =~ <span class="q">/^SingleOrAromatic$/i</span><span class="s">)</span> <span class="s">{</span> 852 <span class="i">$MDLBondType</span> = <span class="n">6</span><span class="sc">;</span> 853 <span class="s">}</span> 854 <span class="k">elsif</span> <span class="s">(</span><span class="i">$InternalBondType</span> =~ <span class="q">/^Any$/i</span><span class="s">)</span> <span class="s">{</span> 855 <span class="i">$MDLBondType</span> = <span class="n">8</span><span class="sc">;</span> 856 <span class="s">}</span> 857 <span class="k">else</span> <span class="s">{</span> 858 <span class="i">$MDLBondType</span> = <span class="n">1</span><span class="sc">;</span> 859 <span class="s">}</span> 860 <span class="i">$MDLBondType</span> = <span class="n">1</span><span class="sc">;</span> 861 <span class="k">last</span> <span class="j">BONDTYPE</span><span class="sc">;</span> 862 <span class="s">}</span> 863 <span class="k">if</span> <span class="s">(</span><span class="i">$InternalBondOrder</span> == <span class="n">2</span><span class="s">)</span> <span class="s">{</span> 864 <span class="k">if</span> <span class="s">(</span><span class="i">$InternalBondType</span> =~ <span class="q">/^DoubleOrAromatic$/i</span><span class="s">)</span> <span class="s">{</span> 865 <span class="i">$MDLBondType</span> = <span class="n">7</span><span class="sc">;</span> 866 <span class="s">}</span> 867 <span class="k">else</span> <span class="s">{</span> 868 <span class="i">$MDLBondType</span> = <span class="n">2</span><span class="sc">;</span> 869 <span class="s">}</span> 870 <span class="k">last</span> <span class="j">BONDTYPE</span><span class="sc">;</span> 871 <span class="s">}</span> 872 <span class="k">if</span> <span class="s">(</span><span class="i">$InternalBondOrder</span> == <span class="n">3</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLBondType</span> = <span class="n">3</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDTYPE</span><span class="sc">;</span><span class="s">}</span> 873 <span class="k">if</span> <span class="s">(</span><span class="i">$InternalBondOrder</span> == <span class="n">1.5</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLBondType</span> = <span class="n">4</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDTYPE</span><span class="sc">;</span><span class="s">}</span> 874 <span class="k">if</span> <span class="s">(</span><span class="i">$InternalBondType</span> =~ <span class="q">/^Any$/i</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLBondType</span> = <span class="n">8</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDTYPE</span><span class="sc">;</span><span class="s">}</span> 875 876 <span class="i">$MDLBondType</span> = <span class="n">1</span><span class="sc">;</span> 877 878 <span class="w">carp</span> <span class="q">"Warning: InternalBondOrderToMDLBondType: Internal bond order and type values, $InternalBondOrder and $InternalBondType, don't match any valid MDL bond type: MDL bond type value, 1, has been assigned..."</span><span class="sc">;</span> 879 <span class="s">}</span> 880 <span class="k">return</span> <span class="i">$MDLBondType</span><span class="sc">;</span> 881 <span class="s">}</span> 882 883 <span class="c"># Third line: Comments - A blank line is also allowed.</span> <a name="ParseCmpdCommentsLine-"></a> 884 <span class="k">sub </span><span class="m">ParseCmpdCommentsLine</span> <span class="s">{</span> 885 <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 886 <span class="k">my</span><span class="s">(</span><span class="i">$Comments</span><span class="s">)</span><span class="sc">;</span> 887 888 <span class="i">$Comments</span> = <span class="k">unpack</span><span class="s">(</span><span class="q">"A80"</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> 889 890 <span class="k">return</span> <span class="s">(</span><span class="i">$Comments</span><span class="s">)</span><span class="sc">;</span> 891 <span class="s">}</span> 892 893 <span class="c"># Map MDL bond stereo value used in SD and MOL files to internal bond stereochemistry values used by MayaChemTools...</span> 894 <span class="c">#</span> <a name="MDLBondStereoToInternalBondStereochemistry-"></a> 895 <span class="k">sub </span><span class="m">MDLBondStereoToInternalBondStereochemistry</span> <span class="s">{</span> 896 <span class="k">my</span><span class="s">(</span><span class="i">$MDLBondStereo</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 897 <span class="k">my</span><span class="s">(</span><span class="i">$InternalBondStereo</span><span class="s">)</span><span class="sc">;</span> 898 899 <span class="i">$InternalBondStereo</span> = <span class="q">''</span><span class="sc">;</span> 900 901 <span class="j">BONDSTEREO:</span> <span class="s">{</span> 902 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLBondStereo</span> == <span class="n">1</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalBondStereo</span> = <span class="q">'Up'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDSTEREO</span><span class="sc">;</span><span class="s">}</span> 903 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLBondStereo</span> == <span class="n">4</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalBondStereo</span> = <span class="q">'UpOrDown'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDSTEREO</span><span class="sc">;</span><span class="s">}</span> 904 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLBondStereo</span> == <span class="n">6</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalBondStereo</span> = <span class="q">'Down'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDSTEREO</span><span class="sc">;</span><span class="s">}</span> 905 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLBondStereo</span> == <span class="n">3</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalBondStereo</span> = <span class="q">'CisOrTrans'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDSTEREO</span><span class="sc">;</span><span class="s">}</span> 906 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLBondStereo</span> == <span class="n">0</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalBondStereo</span> = <span class="q">'None'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDSTEREO</span><span class="sc">;</span><span class="s">}</span> 907 908 <span class="i">$InternalBondStereo</span> = <span class="q">''</span><span class="sc">;</span> 909 <span class="w">carp</span> <span class="q">"Warning: MDLBondStereoToInternalBondType: MDL bond stereo value, $MDLBondStereo, is not supported: It has been ignored and bond order would be used to determine bond type..."</span><span class="sc">;</span> 910 <span class="s">}</span> 911 <span class="k">return</span> <span class="i">$InternalBondStereo</span><span class="sc">;</span> 912 <span class="s">}</span> 913 914 <span class="c"># Map internal bond stereochemistry values used by MayaChemTools to MDL bond stereo value used in SD and MOL files...</span> 915 <span class="c">#</span> <a name="InternalBondStereochemistryToMDLBondStereo-"></a> 916 <span class="k">sub </span><span class="m">InternalBondStereochemistryToMDLBondStereo</span> <span class="s">{</span> 917 <span class="k">my</span><span class="s">(</span><span class="i">$InternalBondStereo</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 918 <span class="k">my</span><span class="s">(</span><span class="i">$MDLBondStereo</span><span class="s">)</span><span class="sc">;</span> 919 920 <span class="i">$MDLBondStereo</span> = <span class="n">0</span><span class="sc">;</span> 921 922 <span class="j">BONDSTEREO:</span> <span class="s">{</span> 923 <span class="k">if</span> <span class="s">(</span><span class="i">$InternalBondStereo</span> =~ <span class="q">/^Up$/i</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLBondStereo</span> = <span class="n">1</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDSTEREO</span><span class="sc">;</span><span class="s">}</span> 924 <span class="k">if</span> <span class="s">(</span><span class="i">$InternalBondStereo</span> =~ <span class="q">/^UpOrDown$/i</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLBondStereo</span> = <span class="n">4</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDSTEREO</span><span class="sc">;</span><span class="s">}</span> 925 <span class="k">if</span> <span class="s">(</span><span class="i">$InternalBondStereo</span> =~ <span class="q">/^Down$/</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLBondStereo</span> = <span class="n">6</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDSTEREO</span><span class="sc">;</span><span class="s">}</span> 926 <span class="k">if</span> <span class="s">(</span><span class="i">$InternalBondStereo</span> =~ <span class="q">/^CisOrTrans$/</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLBondStereo</span> = <span class="n">3</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDSTEREO</span><span class="sc">;</span><span class="s">}</span> 927 928 <span class="i">$MDLBondStereo</span> = <span class="n">0</span><span class="sc">;</span> 929 <span class="s">}</span> 930 <span class="k">return</span> <span class="i">$MDLBondStereo</span><span class="sc">;</span> 931 <span class="s">}</span> 932 933 <span class="c"># Fourth line: Counts</span> 934 <span class="c">#</span> 935 <span class="c"># Format: aaabbblllfffcccsssxxxrrrpppiiimmmvvvvvv</span> 936 <span class="c">#</span> 937 <span class="c"># aaa: number of atoms; bbb: number of bonds; lll: number of atom lists; fff: (obsolete)</span> 938 <span class="c"># ccc: chiral flag: 0=not chiral, 1=chiral; sss: number of stext entries; xxx,rrr,ppp,iii:</span> 939 <span class="c"># (obsolete); mmm: number of lines of additional properties, including the M END line, No</span> 940 <span class="c"># longer supported, default is set to 999; vvvvvv: version</span> 941 <a name="ParseCmpdCountsLine-"></a> 942 <span class="k">sub </span><span class="m">ParseCmpdCountsLine</span> <span class="s">{</span> 943 <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 944 <span class="k">my</span><span class="s">(</span><span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="cm">,</span> <span class="i">$ChiralFlag</span><span class="cm">,</span> <span class="i">$PropertyCount</span><span class="cm">,</span> <span class="i">$Version</span><span class="s">)</span><span class="sc">;</span> 945 946 <span class="k">if</span> <span class="s">(</span><span class="k">length</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span> >= <span class="n">39</span><span class="s">)</span> <span class="s">{</span> 947 <span class="s">(</span><span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="cm">,</span> <span class="i">$ChiralFlag</span><span class="cm">,</span> <span class="i">$PropertyCount</span><span class="cm">,</span> <span class="i">$Version</span><span class="s">)</span> = <span class="k">unpack</span><span class="s">(</span><span class="q">"A3A3x3x3A3x3x3x3x3x3A3A6"</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> 948 <span class="s">}</span> 949 <span class="k">elsif</span> <span class="s">(</span><span class="k">length</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span> >= <span class="n">15</span><span class="s">)</span> <span class="s">{</span> 950 <span class="s">(</span><span class="i">$PropertyCount</span><span class="cm">,</span> <span class="i">$Version</span><span class="s">)</span> = <span class="s">(</span><span class="q">"999"</span><span class="cm">,</span> <span class="q">"v2000"</span><span class="s">)</span><span class="sc">;</span> 951 <span class="s">(</span><span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="cm">,</span> <span class="i">$ChiralFlag</span><span class="s">)</span> = <span class="k">unpack</span><span class="s">(</span><span class="q">"A3A3x3x3A3"</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> 952 <span class="s">}</span> 953 <span class="k">else</span> <span class="s">{</span> 954 <span class="s">(</span><span class="i">$ChiralFlag</span><span class="cm">,</span> <span class="i">$PropertyCount</span><span class="cm">,</span> <span class="i">$Version</span><span class="s">)</span> = <span class="s">(</span><span class="q">"0"</span><span class="cm">,</span> <span class="q">"999"</span><span class="cm">,</span> <span class="q">"v2000"</span><span class="s">)</span><span class="sc">;</span> 955 <span class="s">(</span><span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="s">)</span> = <span class="k">unpack</span><span class="s">(</span><span class="q">"A3A3"</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> 956 <span class="s">}</span> 957 958 <span class="k">if</span> <span class="s">(</span><span class="i">$Version</span> =~ <span class="q">/V3000/i</span><span class="s">)</span> <span class="s">{</span> 959 <span class="c"># Current version of MayaChemTools modules and classes for processing MDL MOL and SD don't support</span> 960 <span class="c"># V3000. So instead of relying on callers, just exit with an error to disable any processing of V3000</span> 961 <span class="c"># format.</span> 962 <span class="w">croak</span> <span class="q">"Error: SDFileUtil::ParseCmpdCountsLine: The Extended Connection Table (V3000) format in MDL MOL and SD files is not supported by the current release of MayaChemTools..."</span><span class="sc">;</span> 963 <span class="s">}</span> 964 965 <span class="k">return</span> <span class="s">(</span><span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="cm">,</span> <span class="i">$ChiralFlag</span><span class="cm">,</span> <span class="i">$PropertyCount</span><span class="cm">,</span> <span class="i">$Version</span><span class="s">)</span><span class="sc">;</span> 966 <span class="s">}</span> 967 968 <span class="c"># Second line: Misc info</span> 969 <span class="c">#</span> 970 <span class="c"># Format: IIPPPPPPPPMMDDYYHHmmddSSssssssssssEEEEEEEEEEEERRRRRR</span> 971 <span class="c"># A2A8 A10 A2I2A10 A12 A6</span> 972 <span class="c"># User's first and last initials (I), program name (P), date/time (M/D/Y,H:m),</span> 973 <span class="c"># dimensional codes - 2D or 3D (d),scaling factors (S, s), energy (E) if modeling program input,</span> 974 <span class="c"># internal registry number (R) if input through MDL form. A blank line is also allowed.</span> <a name="ParseCmpdMiscInfoLine-"></a> 975 <span class="k">sub </span><span class="m">ParseCmpdMiscInfoLine</span> <span class="s">{</span> 976 <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 977 <span class="k">my</span><span class="s">(</span><span class="i">$UserInitial</span><span class="cm">,</span> <span class="i">$ProgramName</span><span class="cm">,</span> <span class="i">$Date</span><span class="cm">,</span> <span class="i">$Code</span><span class="cm">,</span> <span class="i">$ScalingFactor1</span><span class="cm">,</span> <span class="i">$ScalingFactor2</span><span class="cm">,</span> <span class="i">$Energy</span><span class="cm">,</span> <span class="i">$RegistryNum</span><span class="s">)</span><span class="sc">;</span> 978 979 <span class="s">(</span><span class="i">$UserInitial</span><span class="cm">,</span> <span class="i">$ProgramName</span><span class="cm">,</span> <span class="i">$Date</span><span class="cm">,</span> <span class="i">$Code</span><span class="cm">,</span> <span class="i">$ScalingFactor1</span><span class="cm">,</span> <span class="i">$ScalingFactor2</span><span class="cm">,</span> <span class="i">$Energy</span><span class="cm">,</span> <span class="i">$RegistryNum</span><span class="s">)</span> = <span class="k">unpack</span><span class="s">(</span><span class="q">"A2A8A10A2A2A10A12A6"</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> 980 <span class="k">return</span> <span class="s">(</span><span class="i">$UserInitial</span><span class="cm">,</span> <span class="i">$ProgramName</span><span class="cm">,</span> <span class="i">$Date</span><span class="cm">,</span> <span class="i">$Code</span><span class="cm">,</span> <span class="i">$ScalingFactor1</span><span class="cm">,</span> <span class="i">$ScalingFactor2</span><span class="cm">,</span> <span class="i">$Energy</span><span class="cm">,</span> <span class="i">$RegistryNum</span><span class="s">)</span><span class="sc">;</span> 981 <span class="s">}</span> 982 983 <span class="c"># First line: Molecule name. This line is unformatted, but like all other lines in a</span> 984 <span class="c"># molfile may not extend beyond column 80. A blank line is also allowed.</span> <a name="ParseCmpdMolNameLine-"></a> 985 <span class="k">sub </span><span class="m">ParseCmpdMolNameLine</span> <span class="s">{</span> 986 <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 987 <span class="k">my</span><span class="s">(</span><span class="i">$MolName</span><span class="s">)</span><span class="sc">;</span> 988 989 <span class="i">$MolName</span> = <span class="k">unpack</span><span class="s">(</span><span class="q">"A80"</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> 990 991 <span class="k">return</span> <span class="s">(</span><span class="i">$MolName</span><span class="s">)</span><span class="sc">;</span> 992 <span class="s">}</span> 993 994 <span class="c"># Parse atom alias property line in CTAB generic properties block.</span> 995 <span class="c">#</span> 996 <span class="c"># Atom alias property line format:</span> 997 <span class="c">#</span> 998 <span class="c"># A aaa</span> 999 <span class="c"># x...</span> 1000 <span class="c">#</span> 1001 <span class="c"># aaa: Atom number</span> 1002 <span class="c"># x: Atom alias in next line</span> 1003 <span class="c">#</span> <a name="ParseCmpdAtomAliasPropertyLine-"></a>1004 <span class="k">sub </span><span class="m">ParseCmpdAtomAliasPropertyLine</span> <span class="s">{</span> 1005 <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="cm">,</span> <span class="i">$NextLine</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 1006 <span class="k">my</span><span class="s">(</span><span class="i">$Label</span><span class="cm">,</span> <span class="i">$AtomNumber</span><span class="cm">,</span> <span class="i">$AtomAlias</span><span class="s">)</span><span class="sc">;</span> 1007 1008 <span class="s">(</span><span class="i">$Label</span><span class="cm">,</span> <span class="i">$AtomNumber</span><span class="s">)</span> = <span class="k">split</span><span class="s">(</span><span class="q">' '</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> 1009 <span class="i">$AtomAlias</span> = <span class="i">$NextLine</span><span class="sc">;</span> 1010 1011 <span class="k">if</span> <span class="s">(</span>!<span class="i">$AtomAlias</span><span class="s">)</span> <span class="s">{</span> 1012 <span class="w">carp</span> <span class="q">"Warning: _ParseCmpdAtomAliasPropertyLine: No atom alias value specified on the line following atom alias property line..."</span><span class="sc">;</span> 1013 <span class="s">}</span> 1014 1015 <span class="k">return</span> <span class="s">(</span><span class="i">$AtomNumber</span><span class="cm">,</span> <span class="i">$AtomAlias</span><span class="s">)</span><span class="sc">;</span> 1016 <span class="s">}</span> 1017 1018 <span class="c"># Parse charge property line in CTAB generic properties block.</span> 1019 <span class="c">#</span> 1020 <span class="c"># Charge property line format:</span> 1021 <span class="c">#</span> 1022 <span class="c"># M CHGnn8 aaa vvv ...</span> 1023 <span class="c">#</span> 1024 <span class="c"># nn8: Number of value pairs. Maximum of 8 pairs allowed.</span> 1025 <span class="c"># aaa: Atom number</span> 1026 <span class="c"># vvv: -15 to +15. Default of 0 = uncharged atom. When present, this property supersedes</span> 1027 <span class="c"># all charge and radical values in the atom block, forcing a 0 charge on all atoms not</span> 1028 <span class="c"># listed in an M CHG or M RAD line.</span> 1029 <span class="c">#</span> <a name="ParseCmpdChargePropertyLine-"></a>1030 <span class="k">sub </span><span class="m">ParseCmpdChargePropertyLine</span> <span class="s">{</span> 1031 <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 1032 1033 <span class="k">return</span> <span class="i">_ParseCmpdGenericPropertyLine</span><span class="s">(</span><span class="q">'Charge'</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> 1034 <span class="s">}</span> 1035 1036 1037 <span class="c"># Parse isotope property line in CTAB generic properties block.</span> 1038 <span class="c">#</span> 1039 <span class="c"># Isoptope property line format:</span> 1040 <span class="c">#</span> 1041 <span class="c"># M ISOnn8 aaa vvv ...</span> 1042 <span class="c">#</span> 1043 <span class="c"># nn8: Number of value paris. Maximum of 8 pairs allowed.</span> 1044 <span class="c"># aaa: Atom number</span> 1045 <span class="c"># vvv: Absolute mass of the atom isotope as a positive integer. When present, this property</span> 1046 <span class="c"># supersedes all isotope values in the atom block. Default (no entry) means natural</span> 1047 <span class="c"># abundance. The difference between this absolute mass value and the natural</span> 1048 <span class="c"># abundance value specified in the PTABLE.DAT file must be within the range of -18</span> 1049 <span class="c"># to +12</span> 1050 <span class="c">#</span> 1051 <span class="c"># Notes:</span> 1052 <span class="c"># . Values correspond to mass numbers...</span> 1053 <span class="c">#</span> <a name="ParseCmpdIsotopePropertyLine-"></a>1054 <span class="k">sub </span><span class="m">ParseCmpdIsotopePropertyLine</span> <span class="s">{</span> 1055 <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 1056 1057 <span class="k">return</span> <span class="i">_ParseCmpdGenericPropertyLine</span><span class="s">(</span><span class="q">'Isotope'</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> 1058 <span class="s">}</span> 1059 1060 <span class="c"># Parse radical property line in CTAB generic properties block.</span> 1061 <span class="c">#</span> 1062 <span class="c"># Radical property line format:</span> 1063 <span class="c">#</span> 1064 <span class="c"># M RADnn8 aaa vvv ...</span> 1065 <span class="c">#</span> 1066 <span class="c"># nn8: Number of value paris. Maximum of 8 pairs allowed.</span> 1067 <span class="c"># aaa: Atom number</span> 1068 <span class="c"># vvv: Default of 0 = no radical, 1 = singlet, 2 = doublet, 3 = triplet . When</span> 1069 <span class="c"># present, this property supersedes all charge and radical values in the atom block,</span> 1070 <span class="c"># forcing a 0 (zero) charge and radical on all atoms not listed in an M CHG or</span> 1071 <span class="c"># M RAD line.</span> 1072 <span class="c">#</span> <a name="ParseCmpdRadicalPropertyLine-"></a>1073 <span class="k">sub </span><span class="m">ParseCmpdRadicalPropertyLine</span> <span class="s">{</span> 1074 <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 1075 1076 <span class="k">return</span> <span class="i">_ParseCmpdGenericPropertyLine</span><span class="s">(</span><span class="q">'Radical'</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> 1077 <span class="s">}</span> 1078 1079 <span class="c"># Map MDL radical stereo value used in SD and MOL files to internal spin multiplicity values used by MayaChemTools...</span> 1080 <span class="c">#</span> <a name="MDLRadicalToInternalSpinMultiplicity-"></a>1081 <span class="k">sub </span><span class="m">MDLRadicalToInternalSpinMultiplicity</span> <span class="s">{</span> 1082 <span class="k">my</span><span class="s">(</span><span class="i">$MDLRadical</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 1083 <span class="k">my</span><span class="s">(</span><span class="i">$InternalSpinMultiplicity</span><span class="s">)</span><span class="sc">;</span> 1084 1085 <span class="i">$InternalSpinMultiplicity</span> = <span class="q">''</span><span class="sc">;</span> 1086 1087 <span class="j">SPINMULTIPLICITY:</span> <span class="s">{</span> 1088 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLRadical</span> == <span class="n">0</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalSpinMultiplicity</span> = <span class="n">0</span><span class="sc">;</span> <span class="k">last</span> <span class="j">SPINMULTIPLICITY</span><span class="sc">;</span><span class="s">}</span> 1089 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLRadical</span> == <span class="n">1</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalSpinMultiplicity</span> = <span class="n">1</span><span class="sc">;</span> <span class="k">last</span> <span class="j">SPINMULTIPLICITY</span><span class="sc">;</span><span class="s">}</span> 1090 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLRadical</span> == <span class="n">2</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalSpinMultiplicity</span> = <span class="n">2</span><span class="sc">;</span> <span class="k">last</span> <span class="j">SPINMULTIPLICITY</span><span class="sc">;</span><span class="s">}</span> 1091 <span class="k">if</span> <span class="s">(</span><span class="i">$MDLRadical</span> == <span class="n">3</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalSpinMultiplicity</span> = <span class="n">3</span><span class="sc">;</span> <span class="k">last</span> <span class="j">SPINMULTIPLICITY</span><span class="sc">;</span><span class="s">}</span> 1092 <span class="i">$InternalSpinMultiplicity</span> = <span class="q">''</span><span class="sc">;</span> 1093 <span class="w">carp</span> <span class="q">"Warning: MDLRadicalToInternalSpinMultiplicity: MDL radical value, $MDLRadical, specifed on line M RAD is not supported..."</span><span class="sc">;</span> 1094 <span class="s">}</span> 1095 <span class="k">return</span> <span class="i">$InternalSpinMultiplicity</span><span class="sc">;</span> 1096 <span class="s">}</span> 1097 1098 <span class="c"># Map internal spin multiplicity values used by MayaChemTools to MDL radical stereo value used in SD and MOL files...</span> 1099 <span class="c">#</span> <a name="InternalSpinMultiplicityToMDLRadical-"></a>1100 <span class="k">sub </span><span class="m">InternalSpinMultiplicityToMDLRadical</span> <span class="s">{</span> 1101 <span class="k">my</span><span class="s">(</span><span class="i">$InternalSpinMultiplicity</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 1102 <span class="k">my</span><span class="s">(</span><span class="i">$MDLRadical</span><span class="s">)</span><span class="sc">;</span> 1103 1104 <span class="i">$MDLRadical</span> = <span class="n">0</span><span class="sc">;</span> 1105 1106 <span class="j">SPINMULTIPLICITY:</span> <span class="s">{</span> 1107 <span class="k">if</span> <span class="s">(</span><span class="i">$InternalSpinMultiplicity</span> == <span class="n">1</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLRadical</span> = <span class="n">1</span><span class="sc">;</span> <span class="k">last</span> <span class="j">SPINMULTIPLICITY</span><span class="sc">;</span><span class="s">}</span> 1108 <span class="k">if</span> <span class="s">(</span><span class="i">$InternalSpinMultiplicity</span> == <span class="n">2</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLRadical</span> = <span class="n">2</span><span class="sc">;</span> <span class="k">last</span> <span class="j">SPINMULTIPLICITY</span><span class="sc">;</span><span class="s">}</span> 1109 <span class="k">if</span> <span class="s">(</span><span class="i">$InternalSpinMultiplicity</span> == <span class="n">3</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLRadical</span> = <span class="n">3</span><span class="sc">;</span> <span class="k">last</span> <span class="j">SPINMULTIPLICITY</span><span class="sc">;</span><span class="s">}</span> 1110 <span class="i">$MDLRadical</span> = <span class="n">0</span><span class="sc">;</span> 1111 <span class="s">}</span> 1112 <span class="k">return</span> <span class="i">$MDLRadical</span><span class="sc">;</span> 1113 <span class="s">}</span> 1114 1115 <span class="c"># Process generic CTAB property line...</span> <a name="_ParseCmpdGenericPropertyLine-"></a>1116 <span class="k">sub </span><span class="m">_ParseCmpdGenericPropertyLine</span> <span class="s">{</span> 1117 <span class="k">my</span><span class="s">(</span><span class="i">$PropertyName</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 1118 1119 <span class="k">my</span><span class="s">(</span><span class="i">$Label</span><span class="cm">,</span> <span class="i">$PropertyLabel</span><span class="cm">,</span> <span class="i">$ValuesCount</span><span class="cm">,</span> <span class="i">$ValuePairsCount</span><span class="cm">,</span> <span class="i">@ValuePairs</span><span class="s">)</span><span class="sc">;</span> 1120 1121 <span class="i">@ValuePairs</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 1122 <span class="s">(</span><span class="i">$Label</span><span class="cm">,</span> <span class="i">$PropertyLabel</span><span class="cm">,</span> <span class="i">$ValuesCount</span><span class="cm">,</span> <span class="i">@ValuePairs</span><span class="s">)</span> = <span class="k">split</span><span class="s">(</span><span class="q">' '</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> 1123 <span class="i">$ValuePairsCount</span> = <span class="s">(</span><span class="k">scalar</span> <span class="i">@ValuePairs</span><span class="s">)</span>/<span class="n">2</span><span class="sc">;</span> 1124 <span class="k">if</span> <span class="s">(</span><span class="i">$ValuesCount</span> != <span class="i">$ValuePairsCount</span><span class="s">)</span> <span class="s">{</span> 1125 <span class="w">carp</span> <span class="q">"Warning: _ParseCmpdGenericPropertyLine: Number of atom number and $PropertyName value paris specified on $Label $PropertyLabel property line, $ValuePairsCount, does not match expected value of $ValuesCount..."</span><span class="sc">;</span> 1126 <span class="s">}</span> 1127 1128 <span class="k">return</span> <span class="s">(</span><span class="i">@ValuePairs</span><span class="s">)</span><span class="sc">;</span> 1129 <span class="s">}</span> 1130 1131 <span class="c"># Generic CTAB property lines for charge, istope and radical properties...</span> 1132 <span class="c">#</span> <a name="_GenerateCmpdGenericPropertyLines-"></a>1133 <span class="k">sub </span><span class="m">_GenerateCmpdGenericPropertyLines</span> <span class="s">{</span> 1134 <span class="k">my</span><span class="s">(</span><span class="i">$PropertyName</span><span class="cm">,</span> <span class="i">$PropertyValuePairsRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 1135 <span class="k">my</span><span class="s">(</span><span class="i">$Index</span><span class="cm">,</span> <span class="i">$PropertyLabel</span><span class="cm">,</span> <span class="i">$Line</span><span class="cm">,</span> <span class="i">$PropertyCount</span><span class="cm">,</span> <span class="i">$AtomNum</span><span class="cm">,</span> <span class="i">$PropertyValue</span><span class="cm">,</span> <span class="i">@PropertyLines</span><span class="s">)</span><span class="sc">;</span> 1136 1137 <span class="i">@PropertyLines</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 1138 <span class="j">NAME:</span> <span class="s">{</span> 1139 <span class="k">if</span> <span class="s">(</span><span class="i">$PropertyName</span> =~ <span class="q">/^Charge$/i</span><span class="s">)</span> <span class="s">{</span> <span class="i">$PropertyLabel</span> = <span class="q">"M CHG"</span><span class="sc">;</span> <span class="k">last</span> <span class="j">NAME</span><span class="sc">;</span> <span class="s">}</span> 1140 <span class="k">if</span> <span class="s">(</span><span class="i">$PropertyName</span> =~ <span class="q">/^Isotope$/i</span><span class="s">)</span> <span class="s">{</span> <span class="i">$PropertyLabel</span> = <span class="q">"M ISO"</span><span class="sc">;</span> <span class="k">last</span> <span class="j">NAME</span><span class="sc">;</span> <span class="s">}</span> 1141 <span class="k">if</span> <span class="s">(</span><span class="i">$PropertyName</span> =~ <span class="q">/^Radical$/i</span><span class="s">)</span> <span class="s">{</span> <span class="i">$PropertyLabel</span> = <span class="q">"M RAD"</span><span class="sc">;</span> <span class="k">last</span> <span class="j">NAME</span><span class="sc">;</span> <span class="s">}</span> 1142 <span class="w">carp</span> <span class="q">"Warning: _GenerateCmpdGenericPropertyLines: Unknown property name, $PropertyName, specified..."</span><span class="sc">;</span> 1143 <span class="k">return</span> <span class="i">@PropertyLines</span><span class="sc">;</span> 1144 <span class="s">}</span> 1145 1146 <span class="c"># A maximum of 8 property pair values allowed per line...</span> 1147 <span class="i">$PropertyCount</span> = <span class="n">0</span><span class="sc">;</span> 1148 <span class="i">$Line</span> = <span class="q">''</span><span class="sc">;</span> 1149 <span class="k">for</span> <span class="s">(</span><span class="i">$Index</span> = <span class="n">0</span><span class="sc">;</span> <span class="i">$Index</span> < <span class="i">$#</span>{<span class="i">$PropertyValuePairsRef</span>}<span class="sc">;</span> <span class="i">$Index</span> += <span class="n">2</span><span class="s">)</span> <span class="s">{</span> 1150 <span class="k">if</span> <span class="s">(</span><span class="i">$PropertyCount</span> > <span class="n">8</span><span class="s">)</span> <span class="s">{</span> 1151 <span class="c"># Setup property line...</span> 1152 <span class="i">$Line</span> = <span class="q">"${PropertyLabel} 8${Line}"</span><span class="sc">;</span> 1153 <span class="k">push</span> <span class="i">@PropertyLines</span><span class="cm">,</span> <span class="i">$Line</span><span class="sc">;</span> 1154 1155 <span class="i">$PropertyCount</span> = <span class="n">0</span><span class="sc">;</span> 1156 <span class="i">$Line</span> = <span class="q">''</span><span class="sc">;</span> 1157 <span class="s">}</span> 1158 <span class="i">$PropertyCount</span>++<span class="sc">;</span> 1159 <span class="i">$AtomNum</span> = <span class="i">$PropertyValuePairsRef</span>->[<span class="i">$Index</span>]<span class="sc">;</span> 1160 <span class="i">$PropertyValue</span> = <span class="i">$PropertyValuePairsRef</span>->[<span class="i">$Index</span> + <span class="n">1</span>]<span class="sc">;</span> 1161 <span class="i">$Line</span> .= <span class="k">sprintf</span> <span class="q">" %3i %3i"</span><span class="cm">,</span> <span class="i">$AtomNum</span><span class="cm">,</span> <span class="i">$PropertyValue</span><span class="sc">;</span> 1162 <span class="s">}</span> 1163 <span class="k">if</span> <span class="s">(</span><span class="i">$Line</span><span class="s">)</span> <span class="s">{</span> 1164 <span class="i">$Line</span> = <span class="q">"${PropertyLabel} ${PropertyCount}${Line}"</span><span class="sc">;</span> 1165 <span class="k">push</span> <span class="i">@PropertyLines</span><span class="cm">,</span> <span class="i">$Line</span><span class="sc">;</span> 1166 <span class="s">}</span> 1167 <span class="k">return</span> <span class="i">@PropertyLines</span><span class="sc">;</span> 1168 <span class="s">}</span> 1169 1170 <span class="c">#</span> 1171 <span class="c"># Read compound data into a string and return its value</span> <a name="ReadCmpdString-"></a>1172 <span class="k">sub </span><span class="m">ReadCmpdString</span> <span class="s">{</span> 1173 <span class="k">my</span><span class="s">(</span><span class="i">$SDFileRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 1174 <span class="k">my</span><span class="s">(</span><span class="i">$CmpdString</span><span class="s">)</span><span class="sc">;</span> 1175 1176 <span class="i">$CmpdString</span> = <span class="q">""</span><span class="sc">;</span> 1177 <span class="j">LINE:</span> <span class="k">while</span> <span class="s">(</span><span class="k">defined</span><span class="s">(</span><span class="i">$_</span> = <span class="q"><$SDFileRef></span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> 1178 <span class="c"># Change Windows and Mac new line char to UNIX...</span> 1179 <span class="q">s/(\r\n)|(\r)/\n/g</span><span class="sc">;</span> 1180 1181 <span class="k">if</span> <span class="s">(</span><span class="q">/^\$\$\$\$/</span><span class="s">)</span> <span class="s">{</span> 1182 <span class="c"># Take out any new line char at the end by explicitly removing it instead of using</span> 1183 <span class="c"># chomp, which might not always work correctly on files generated on a system</span> 1184 <span class="c"># with a value of input line separator different from the current system...</span> 1185 <span class="q">s/\n$//g</span><span class="sc">;</span> 1186 1187 <span class="c"># Doesn't hurt to chomp...</span> 1188 <span class="k">chomp</span><span class="sc">;</span> 1189 1190 <span class="i">$CmpdString</span> .= <span class="i">$_</span><span class="sc">;</span> 1191 <span class="k">last</span> <span class="j">LINE</span><span class="sc">;</span> 1192 <span class="s">}</span> 1193 <span class="k">else</span> <span class="s">{</span> 1194 <span class="i">$CmpdString</span> .= <span class="i">$_</span><span class="sc">;</span> 1195 <span class="s">}</span> 1196 <span class="s">}</span> 1197 <span class="k">return</span> <span class="i">$CmpdString</span><span class="sc">;</span> 1198 <span class="s">}</span> 1199 1200 <span class="c"># Find out the number of fragements in the compounds. And for the compound with</span> 1201 <span class="c"># more than one fragment, remove all the others besides the largest one.</span> <a name="WashCmpd-"></a>1202 <span class="k">sub </span><span class="m">WashCmpd</span> <span class="s">{</span> 1203 <span class="k">my</span><span class="s">(</span><span class="i">$CmpdLines</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 1204 <span class="k">my</span><span class="s">(</span><span class="i">$WashedCmpdString</span><span class="cm">,</span> <span class="i">$FragmentCount</span><span class="cm">,</span> <span class="i">$Fragments</span><span class="s">)</span><span class="sc">;</span> 1205 1206 <span class="i">$WashedCmpdString</span> = <span class="q">""</span><span class="sc">;</span> 1207 <span class="s">(</span><span class="i">$FragmentCount</span><span class="cm">,</span> <span class="i">$Fragments</span><span class="s">)</span> = <span class="i">GetCmpdFragments</span><span class="s">(</span><span class="i">$CmpdLines</span><span class="s">)</span><span class="sc">;</span> 1208 <span class="k">if</span> <span class="s">(</span><span class="i">$FragmentCount</span> > <span class="n">1</span><span class="s">)</span> <span class="s">{</span> 1209 <span class="c"># Go over the compound data for the largest fragment including property</span> 1210 <span class="c"># data...</span> 1211 <span class="k">my</span> <span class="s">(</span><span class="i">@AllFragments</span><span class="cm">,</span> <span class="i">@LargestFragment</span><span class="cm">,</span> <span class="i">%LargestFragmentAtoms</span><span class="cm">,</span> <span class="i">@WashedCmpdLines</span><span class="cm">,</span> <span class="i">$Index</span><span class="cm">,</span> <span class="i">$LineIndex</span><span class="cm">,</span> <span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="cm">,</span> <span class="i">$NewAtomCount</span><span class="cm">,</span> <span class="i">$NewBondCount</span><span class="cm">,</span> <span class="i">$FirstAtomNum</span><span class="cm">,</span> <span class="i">$SecondAtomNum</span><span class="cm">,</span> <span class="i">$BondType</span><span class="cm">,</span> <span class="i">$BondStereo</span><span class="cm">,</span> <span class="i">$FirstNewAtomNum</span><span class="cm">,</span> <span class="i">$SecondNewAtomNum</span><span class="cm">,</span> <span class="i">$AtomNum</span><span class="cm">,</span> <span class="i">$ChiralFlag</span><span class="cm">,</span> <span class="i">$BondLine</span><span class="cm">,</span> <span class="i">$MENDLineIndex</span><span class="cm">,</span> <span class="i">$Line</span><span class="cm">,</span> <span class="i">$Value</span><span class="cm">,</span> <span class="i">@ValuePairs</span><span class="cm">,</span> <span class="i">@NewValuePairs</span><span class="cm">,</span> <span class="i">$ValuePairIndex</span><span class="cm">,</span> <span class="i">$NewAtomNum</span><span class="cm">,</span> <span class="i">@NewPropertyLines</span><span class="s">)</span><span class="sc">;</span> 1212 1213 <span class="i">@AllFragments</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> <span class="i">@LargestFragment</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 1214 <span class="i">%LargestFragmentAtoms</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 1215 <span class="i">@AllFragments</span> = <span class="k">split</span> <span class="q">"\n"</span><span class="cm">,</span> <span class="i">$Fragments</span><span class="sc">;</span> 1216 <span class="i">@LargestFragment</span> = <span class="k">split</span> <span class="q">" "</span><span class="cm">,</span> <span class="i">$AllFragments</span>[<span class="n">0</span>]<span class="sc">;</span> 1217 <span class="k">for</span> <span class="i">$Index</span> <span class="s">(</span><span class="n">0</span> .. <span class="i">$#LargestFragment</span><span class="s">)</span> <span class="s">{</span> 1218 <span class="c"># Map old atom numbers to new atom numbers as the fragment atom numbers are sorted</span> 1219 <span class="c"># from lowest to highest old atom numbers...</span> 1220 <span class="i">$LargestFragmentAtoms</span>{<span class="i">$LargestFragment</span>[<span class="i">$Index</span>]} = <span class="i">$Index</span> + <span class="n">1</span><span class="sc">;</span> 1221 <span class="s">}</span> 1222 <span class="i">@WashedCmpdLines</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 1223 <span class="k">push</span> <span class="i">@WashedCmpdLines</span><span class="cm">,</span> <span class="i">@$CmpdLines</span>[<span class="n">0</span>]<span class="cm">,</span> <span class="i">@$CmpdLines</span>[<span class="n">1</span>]<span class="cm">,</span> <span class="i">@$CmpdLines</span>[<span class="n">2</span>]<span class="cm">,</span> <span class="i">@$CmpdLines</span>[<span class="n">3</span>]<span class="sc">;</span> 1224 <span class="s">(</span><span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="cm">,</span> <span class="i">$ChiralFlag</span><span class="s">)</span> = <span class="i">ParseCmpdCountsLine</span><span class="s">(</span><span class="i">@$CmpdLines</span>[<span class="n">3</span>]<span class="s">)</span><span class="sc">;</span> 1225 <span class="i">$NewAtomCount</span> = <span class="i">@LargestFragment</span><span class="sc">;</span> 1226 <span class="i">$NewBondCount</span> = <span class="n">0</span><span class="sc">;</span> 1227 <span class="i">$AtomNum</span> = <span class="n">0</span><span class="sc">;</span> 1228 <span class="c"># Retrieve the largest fragment atom lines...</span> 1229 <span class="k">for</span> <span class="s">(</span><span class="i">$LineIndex</span> = <span class="n">4</span><span class="sc">;</span> <span class="i">$LineIndex</span> < <span class="s">(</span><span class="n">4</span> + <span class="i">$AtomCount</span><span class="s">)</span><span class="sc">;</span> <span class="i">$LineIndex</span>++<span class="s">)</span> <span class="s">{</span> 1230 <span class="i">$AtomNum</span>++<span class="sc">;</span> 1231 <span class="k">if</span> <span class="s">(</span><span class="i">$LargestFragmentAtoms</span>{<span class="i">$AtomNum</span>}<span class="s">)</span> <span class="s">{</span> 1232 <span class="k">push</span> <span class="i">@WashedCmpdLines</span><span class="cm">,</span> <span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="sc">;</span> 1233 <span class="s">}</span> 1234 <span class="s">}</span> 1235 <span class="c"># Retrieve the largest fragment bond lines...</span> 1236 <span class="k">for</span> <span class="s">(</span><span class="i">$LineIndex</span> = <span class="n">4</span> + <span class="i">$AtomCount</span><span class="sc">;</span> <span class="i">$LineIndex</span> < <span class="s">(</span><span class="n">4</span> + <span class="i">$AtomCount</span> + <span class="i">$BondCount</span><span class="s">)</span><span class="sc">;</span> <span class="i">$LineIndex</span>++<span class="s">)</span> <span class="s">{</span> 1237 <span class="s">(</span><span class="i">$FirstAtomNum</span><span class="cm">,</span> <span class="i">$SecondAtomNum</span><span class="cm">,</span> <span class="i">$BondType</span><span class="cm">,</span> <span class="i">$BondStereo</span><span class="s">)</span> = <span class="i">ParseCmpdBondLine</span><span class="s">(</span><span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="s">)</span><span class="sc">;</span> 1238 <span class="k">if</span> <span class="s">(</span><span class="i">$LargestFragmentAtoms</span>{<span class="i">$FirstAtomNum</span>} && <span class="i">$LargestFragmentAtoms</span>{<span class="i">$SecondAtomNum</span>}<span class="s">)</span> <span class="s">{</span> 1239 <span class="i">$NewBondCount</span>++<span class="sc">;</span> 1240 <span class="c"># Set up bond line with new atom number mapping...</span> 1241 <span class="i">$FirstNewAtomNum</span> = <span class="i">$LargestFragmentAtoms</span>{<span class="i">$FirstAtomNum</span>}<span class="sc">;</span> 1242 <span class="i">$SecondNewAtomNum</span> = <span class="i">$LargestFragmentAtoms</span>{<span class="i">$SecondAtomNum</span>}<span class="sc">;</span> 1243 <span class="i">$BondLine</span> = <span class="i">GenerateCmpdBondLine</span><span class="s">(</span><span class="i">$FirstNewAtomNum</span><span class="cm">,</span> <span class="i">$SecondNewAtomNum</span><span class="cm">,</span> <span class="i">$BondType</span><span class="cm">,</span> <span class="i">$BondStereo</span><span class="s">)</span><span class="sc">;</span> 1244 <span class="k">push</span> <span class="i">@WashedCmpdLines</span><span class="cm">,</span> <span class="i">$BondLine</span><span class="sc">;</span> 1245 <span class="s">}</span> 1246 <span class="s">}</span> 1247 <span class="c"># Get property lines for CHG, ISO and RAD label and map the old atom numbers to new</span> 1248 <span class="c"># atom numners; Others, property lines before M END line are skipped as atom numbers for</span> 1249 <span class="c"># other properties might not valid anymore...</span> 1250 <span class="c">#</span> 1251 <span class="i">$MENDLineIndex</span> = <span class="i">$LineIndex</span><span class="sc">;</span> 1252 <span class="j">LINE:</span> <span class="k">for</span> <span class="s">(</span><span class="i">$LineIndex</span> = <span class="s">(</span><span class="n">4</span> + <span class="i">$AtomCount</span> + <span class="i">$BondCount</span><span class="s">)</span><span class="sc">;</span> <span class="i">$LineIndex</span> < <span class="i">@$CmpdLines</span><span class="sc">;</span> <span class="i">$LineIndex</span>++<span class="s">)</span> <span class="s">{</span> 1253 <span class="i">$Line</span> = <span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="sc">;</span> 1254 <span class="k">if</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^M END/i</span><span class="s">)</span> <span class="s">{</span> 1255 <span class="k">push</span> <span class="i">@WashedCmpdLines</span><span class="cm">,</span> <span class="q">"M END"</span><span class="sc">;</span> 1256 <span class="i">$MENDLineIndex</span> = <span class="i">$LineIndex</span><span class="sc">;</span> 1257 <span class="k">last</span> <span class="j">LINE</span><span class="sc">;</span> 1258 <span class="s">}</span> 1259 1260 <span class="i">@ValuePairs</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 1261 <span class="k">if</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^M CHG/i</span><span class="s">)</span> <span class="s">{</span> 1262 <span class="i">@ValuePairs</span> = <span class="i">ParseCmpdChargePropertyLine</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> 1263 <span class="s">}</span> 1264 <span class="k">elsif</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^M RAD/i</span><span class="s">)</span> <span class="s">{</span> 1265 <span class="i">@ValuePairs</span> = <span class="i">ParseCmpdRadicalPropertyLine</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> 1266 <span class="s">}</span> 1267 <span class="k">elsif</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^M ISO/i</span><span class="s">)</span> <span class="s">{</span> 1268 <span class="i">@ValuePairs</span> = <span class="i">ParseCmpdIsotopePropertyLine</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> 1269 <span class="s">}</span> 1270 <span class="k">elsif</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^A /i</span><span class="s">)</span> <span class="s">{</span> 1271 <span class="k">my</span><span class="s">(</span><span class="i">$NextLine</span><span class="s">)</span><span class="sc">;</span> 1272 <span class="i">$LineIndex</span>++<span class="sc">;</span> 1273 <span class="i">$NextLine</span> = <span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="sc">;</span> 1274 <span class="i">@ValuePairs</span> = <span class="i">ParseCmpdAtomAliasPropertyLine</span><span class="s">(</span><span class="i">$Line</span><span class="cm">,</span> <span class="i">$NextLine</span><span class="s">)</span><span class="sc">;</span> 1275 <span class="s">}</span> 1276 <span class="k">else</span> <span class="s">{</span> 1277 <span class="k">next</span> <span class="j">LINE</span><span class="sc">;</span> 1278 <span class="s">}</span> 1279 1280 <span class="k">if</span> <span class="s">(</span>!<span class="i">@ValuePairs</span><span class="s">)</span> <span class="s">{</span> 1281 <span class="k">next</span> <span class="j">LINE</span><span class="sc">;</span> 1282 <span class="s">}</span> 1283 1284 <span class="c"># Collect values for valid atom numbers with mapping to new atom numbers...</span> 1285 <span class="i">@NewValuePairs</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 1286 <span class="j">VALUEINDEX:</span> <span class="k">for</span> <span class="s">(</span><span class="i">$ValuePairIndex</span> = <span class="n">0</span><span class="sc">;</span> <span class="i">$ValuePairIndex</span> < <span class="i">$#ValuePairs</span><span class="sc">;</span> <span class="i">$ValuePairIndex</span> += <span class="n">2</span><span class="s">)</span> <span class="s">{</span> 1287 <span class="i">$AtomNum</span> = <span class="i">$ValuePairs</span>[<span class="i">$ValuePairIndex</span>]<span class="sc">;</span> <span class="i">$Value</span> = <span class="i">$ValuePairs</span>[<span class="i">$ValuePairIndex</span> + <span class="n">1</span>]<span class="sc">;</span> 1288 <span class="k">if</span> <span class="s">(</span>!<span class="k">exists</span> <span class="i">$LargestFragmentAtoms</span>{<span class="i">$AtomNum</span>}<span class="s">)</span> <span class="s">{</span> 1289 <span class="k">next</span> <span class="j">VALUEINDEX</span><span class="sc">;</span> 1290 <span class="s">}</span> 1291 <span class="i">$NewAtomNum</span> = <span class="i">$LargestFragmentAtoms</span>{<span class="i">$AtomNum</span>}<span class="sc">;</span> 1292 <span class="k">push</span> <span class="i">@NewValuePairs</span><span class="cm">,</span> <span class="s">(</span><span class="i">$NewAtomNum</span><span class="cm">,</span> <span class="i">$Value</span><span class="s">)</span> 1293 <span class="s">}</span> 1294 <span class="k">if</span> <span class="s">(</span>!<span class="i">@NewValuePairs</span><span class="s">)</span> <span class="s">{</span> 1295 <span class="k">next</span> <span class="j">LINE</span><span class="sc">;</span> 1296 <span class="s">}</span> 1297 <span class="i">@NewPropertyLines</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 1298 <span class="k">if</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^M CHG/i</span><span class="s">)</span> <span class="s">{</span> 1299 <span class="i">@NewPropertyLines</span> = <span class="i">GenerateCmpdChargePropertyLines</span><span class="s">(</span>\<span class="i">@NewValuePairs</span><span class="s">)</span><span class="sc">;</span> 1300 <span class="s">}</span> 1301 <span class="k">elsif</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^M RAD/i</span><span class="s">)</span> <span class="s">{</span> 1302 <span class="i">@NewPropertyLines</span> = <span class="i">GenerateCmpdRadicalPropertyLines</span><span class="s">(</span>\<span class="i">@NewValuePairs</span><span class="s">)</span><span class="sc">;</span> 1303 <span class="s">}</span> 1304 <span class="k">elsif</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^M ISO/i</span><span class="s">)</span> <span class="s">{</span> 1305 <span class="i">@NewPropertyLines</span> = <span class="i">GenerateCmpdIsotopePropertyLines</span><span class="s">(</span>\<span class="i">@NewValuePairs</span><span class="s">)</span><span class="sc">;</span> 1306 <span class="s">}</span> 1307 <span class="k">elsif</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^A /i</span><span class="s">)</span> <span class="s">{</span> 1308 <span class="i">@NewPropertyLines</span> = <span class="i">GenerateCmpdAtomAliasPropertyLines</span><span class="s">(</span>\<span class="i">@NewValuePairs</span><span class="s">)</span><span class="sc">;</span> 1309 <span class="s">}</span> 1310 <span class="k">push</span> <span class="i">@WashedCmpdLines</span><span class="cm">,</span> <span class="i">@NewPropertyLines</span><span class="sc">;</span> 1311 <span class="s">}</span> 1312 1313 <span class="c"># Retrieve rest of the data label and value property data...</span> 1314 <span class="k">for</span> <span class="s">(</span><span class="i">$LineIndex</span> = <span class="s">(</span><span class="n">1</span> + <span class="i">$MENDLineIndex</span><span class="s">)</span><span class="sc">;</span> <span class="i">$LineIndex</span> < <span class="i">@$CmpdLines</span><span class="sc">;</span> <span class="i">$LineIndex</span>++<span class="s">)</span> <span class="s">{</span> 1315 <span class="k">push</span> <span class="i">@WashedCmpdLines</span><span class="cm">,</span> <span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="sc">;</span> 1316 <span class="s">}</span> 1317 <span class="c"># Update atom and bond count line...</span> 1318 <span class="i">$WashedCmpdLines</span>[<span class="n">3</span>] = <span class="i">GenerateCmpdCountsLine</span><span class="s">(</span><span class="i">$NewAtomCount</span><span class="cm">,</span> <span class="i">$NewBondCount</span><span class="cm">,</span> <span class="i">$ChiralFlag</span><span class="s">)</span><span class="sc">;</span> 1319 1320 <span class="i">$WashedCmpdString</span> = <span class="k">join</span> <span class="q">"\n"</span><span class="cm">,</span> <span class="i">@WashedCmpdLines</span><span class="sc">;</span> 1321 <span class="s">}</span> 1322 <span class="k">return</span> <span class="s">(</span><span class="i">$FragmentCount</span><span class="cm">,</span> <span class="i">$Fragments</span><span class="cm">,</span> <span class="i">$WashedCmpdString</span><span class="s">)</span><span class="sc">;</span> 1323 <span class="s">}</span> 1324 <a name="EOF-"></a></pre> <p> </p> <br /> <center> <img src="../../../images/h2o2.png"> </center> </body> </html>