| 0 | 1 <html> | 
|  | 2 <head> | 
|  | 3 <title>MayaChemTools:Code:SDFileUtil.pm</title> | 
|  | 4 <meta http-equiv="content-type" content="text/html;charset=utf-8"> | 
|  | 5 <link rel="stylesheet" type="text/css" href="../../../css/MayaChemToolsCode.css"> | 
|  | 6 </head> | 
|  | 7 <body leftmargin="20" rightmargin="20" topmargin="10" bottommargin="10"> | 
|  | 8 <br/> | 
|  | 9 <center> | 
|  | 10 <a href="http://www.mayachemtools.org" title="MayaChemTools Home"><img src="../../../images/MayaChemToolsLogo.gif" border="0" alt="MayaChemTools"></a> | 
|  | 11 </center> | 
|  | 12 <br/> | 
|  | 13 <pre> | 
|  | 14 <a name="package-SDFileUtil-"></a>   1 <span class="k">package </span><span class="i">SDFileUtil</span><span class="sc">;</span> | 
|  | 15    2 <span class="c">#</span> | 
|  | 16    3 <span class="c"># $RCSfile: SDFileUtil.pm,v $</span> | 
|  | 17    4 <span class="c"># $Date: 2015/02/28 20:47:18 $</span> | 
|  | 18    5 <span class="c"># $Revision: 1.49 $</span> | 
|  | 19    6 <span class="c">#</span> | 
|  | 20    7 <span class="c"># Author: Manish Sud <msud@san.rr.com></span> | 
|  | 21    8 <span class="c">#</span> | 
|  | 22    9 <span class="c"># Copyright (C) 2015 Manish Sud. All rights reserved.</span> | 
|  | 23   10 <span class="c">#</span> | 
|  | 24   11 <span class="c"># This file is part of MayaChemTools.</span> | 
|  | 25   12 <span class="c">#</span> | 
|  | 26   13 <span class="c"># MayaChemTools is free software; you can redistribute it and/or modify it under</span> | 
|  | 27   14 <span class="c"># the terms of the GNU Lesser General Public License as published by the Free</span> | 
|  | 28   15 <span class="c"># Software Foundation; either version 3 of the License, or (at your option) any</span> | 
|  | 29   16 <span class="c"># later version.</span> | 
|  | 30   17 <span class="c">#</span> | 
|  | 31   18 <span class="c"># MayaChemTools is distributed in the hope that it will be useful, but without</span> | 
|  | 32   19 <span class="c"># any warranty; without even the implied warranty of merchantability of fitness</span> | 
|  | 33   20 <span class="c"># for a particular purpose.  See the GNU Lesser General Public License for more</span> | 
|  | 34   21 <span class="c"># details.</span> | 
|  | 35   22 <span class="c">#</span> | 
|  | 36   23 <span class="c"># You should have received a copy of the GNU Lesser General Public License</span> | 
|  | 37   24 <span class="c"># along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or</span> | 
|  | 38   25 <span class="c"># write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,</span> | 
|  | 39   26 <span class="c"># Boston, MA, 02111-1307, USA.</span> | 
|  | 40   27 <span class="c">#</span> | 
|  | 41   28 | 
|  | 42   29 <span class="k">use</span> <span class="w">strict</span><span class="sc">;</span> | 
|  | 43   30 <span class="k">use</span> <span class="w">Exporter</span><span class="sc">;</span> | 
|  | 44   31 <span class="k">use</span> <span class="w">Carp</span><span class="sc">;</span> | 
|  | 45   32 <span class="k">use</span> <span class="w">PeriodicTable</span> <span class="q">qw(IsElement)</span><span class="sc">;</span> | 
|  | 46   33 <span class="k">use</span> <span class="w">TimeUtil</span> <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 47   34 | 
|  | 48   35 <span class="k">use</span> <span class="w">vars</span> <span class="q">qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS)</span><span class="sc">;</span> | 
|  | 49   36 | 
|  | 50   37 <span class="i">@ISA</span> = <span class="q">qw(Exporter)</span><span class="sc">;</span> | 
|  | 51   38 <span class="i">@EXPORT</span> = <span class="q">qw(GenerateCmpdAtomLine GenerateCmpdBondLine GenerateCmpdChargePropertyLines GenerateCmpdCommentsLine GenerateCmpdCountsLine GenerateCmpdAtomAliasPropertyLines GenerateCmpdIsotopePropertyLines GenerateCmpdDataHeaderLabelsAndValuesLines GenerateCmpdMiscInfoLine GenerateCmpdRadicalPropertyLines GenerateCmpdMolNameLine GenerateEmptyCtabBlockLines GenerateMiscLineDateStamp GetAllAndCommonCmpdDataHeaderLabels GetCmpdDataHeaderLabels GetCmpdDataHeaderLabelsAndValues GetCmpdFragments GetCtabLinesCount GetUnknownAtoms GetInvalidAtomNumbers MDLChargeToInternalCharge InternalChargeToMDLCharge MDLBondTypeToInternalBondOrder InternalBondOrderToMDLBondType MDLBondStereoToInternalBondStereochemistry InternalBondStereochemistryToMDLBondStereo InternalSpinMultiplicityToMDLRadical MDLRadicalToInternalSpinMultiplicity IsCmpd3D IsCmpd2D ParseCmpdAtomLine ParseCmpdBondLine ParseCmpdCommentsLine ParseCmpdCountsLine ParseCmpdMiscInfoLine ParseCmpdMolNameLine ParseCmpdAtomAliasPropertyLine ParseCmpdChargePropertyLine ParseCmpdIsotopePropertyLine ParseCmpdRadicalPropertyLine ReadCmpdString RemoveCmpdDataHeaderLabelAndValue WashCmpd)</span><span class="sc">;</span> | 
|  | 52   39 <span class="i">@EXPORT_OK</span> = <span class="q">qw()</span><span class="sc">;</span> | 
|  | 53   40 <span class="i">%EXPORT_TAGS</span> = <span class="s">(</span><span class="w">all</span>  <span class="cm">=></span> <span class="s">[</span><span class="i">@EXPORT</span><span class="cm">,</span> <span class="i">@EXPORT_OK</span><span class="s">]</span><span class="s">)</span><span class="sc">;</span> | 
|  | 54   41 | 
|  | 55   42 <span class="c"># Format data for compounds count line...</span> | 
|  | 56 <a name="GenerateCmpdCountsLine-"></a>  43 <span class="k">sub </span><span class="m">GenerateCmpdCountsLine</span> <span class="s">{</span> | 
|  | 57   44   <span class="k">my</span><span class="s">(</span><span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="cm">,</span> <span class="i">$ChiralFlag</span><span class="cm">,</span> <span class="i">$PropertyCount</span><span class="cm">,</span> <span class="i">$Version</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> | 
|  | 58   45 | 
|  | 59   46   <span class="k">if</span> <span class="s">(</span><span class="i">@_</span> == <span class="n">5</span><span class="s">)</span> <span class="s">{</span> | 
|  | 60   47     <span class="s">(</span><span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="cm">,</span> <span class="i">$ChiralFlag</span><span class="cm">,</span> <span class="i">$PropertyCount</span><span class="cm">,</span> <span class="i">$Version</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 61   48   <span class="s">}</span> | 
|  | 62   49   <span class="k">elsif</span> <span class="s">(</span><span class="i">@_</span> == <span class="n">3</span><span class="s">)</span> <span class="s">{</span> | 
|  | 63   50     <span class="s">(</span><span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="cm">,</span> <span class="i">$ChiralFlag</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 64   51     <span class="i">$PropertyCount</span> = <span class="n">999</span><span class="sc">;</span> | 
|  | 65   52     <span class="i">$Version</span> = <span class="q">"V2000"</span><span class="sc">;</span> | 
|  | 66   53   <span class="s">}</span> | 
|  | 67   54   <span class="k">else</span> <span class="s">{</span> | 
|  | 68   55     <span class="s">(</span><span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 69   56     <span class="i">$ChiralFlag</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 70   57     <span class="i">$PropertyCount</span> = <span class="n">999</span><span class="sc">;</span> | 
|  | 71   58     <span class="i">$Version</span> = <span class="q">"V2000"</span><span class="sc">;</span> | 
|  | 72   59   <span class="s">}</span> | 
|  | 73   60   <span class="k">if</span> <span class="s">(</span><span class="i">$AtomCount</span> > <span class="n">999</span><span class="s">)</span> <span class="s">{</span> | 
|  | 74   61     <span class="w">croak</span> <span class="q">"Error: SDFileUtil::GenerateCmpdCountsLine: The atom count, $AtomCount, exceeds maximum of 999 allowed for CTAB version 2000. The Extended Connection Table (V3000) format in MDL MOL and SD files is not supported by the current release of MayaChemTools..."</span><span class="sc">;</span> | 
|  | 75   62   <span class="s">}</span> | 
|  | 76   63   <span class="i">$Line</span> = <span class="k">sprintf</span> <span class="q">"%3i%3i%3i%3i%3i%3i%3i%3i%3i%3i%3i%6s"</span><span class="cm">,</span> <span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="cm">,</span> <span class="n">0</span><span class="cm">,</span> <span class="n">0</span><span class="cm">,</span> <span class="i">$ChiralFlag</span><span class="cm">,</span> <span class="n">0</span><span class="cm">,</span> <span class="n">0</span><span class="cm">,</span> <span class="n">0</span><span class="cm">,</span> <span class="n">0</span><span class="cm">,</span> <span class="n">0</span><span class="cm">,</span> <span class="i">$PropertyCount</span><span class="cm">,</span> <span class="i">$Version</span><span class="sc">;</span> | 
|  | 77   64 | 
|  | 78   65   <span class="k">return</span> <span class="s">(</span><span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> | 
|  | 79   66 <span class="s">}</span> | 
|  | 80   67 | 
|  | 81   68 <span class="c"># Generate comments line...</span> | 
|  | 82 <a name="GenerateCmpdCommentsLine-"></a>  69 <span class="k">sub </span><span class="m">GenerateCmpdCommentsLine</span> <span class="s">{</span> | 
|  | 83   70   <span class="k">my</span><span class="s">(</span><span class="i">$Comments</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 84   71   <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> | 
|  | 85   72 | 
|  | 86   73   <span class="i">$Line</span> = <span class="s">(</span><span class="k">length</span><span class="s">(</span><span class="i">$Comments</span><span class="s">)</span> > <span class="n">80</span><span class="s">)</span> ? <span class="k">substr</span><span class="s">(</span><span class="i">$Comments</span><span class="cm">,</span> <span class="n">0</span><span class="cm">,</span> <span class="n">80</span><span class="s">)</span> <span class="co">:</span> <span class="i">$Comments</span><span class="sc">;</span> | 
|  | 87   74 | 
|  | 88   75   <span class="k">return</span> <span class="i">$Line</span><span class="sc">;</span> | 
|  | 89   76 <span class="s">}</span> | 
|  | 90   77 | 
|  | 91   78 <span class="c"># Generate molname line...</span> | 
|  | 92 <a name="GenerateCmpdMolNameLine-"></a>  79 <span class="k">sub </span><span class="m">GenerateCmpdMolNameLine</span> <span class="s">{</span> | 
|  | 93   80   <span class="k">my</span><span class="s">(</span><span class="i">$MolName</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 94   81   <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> | 
|  | 95   82 | 
|  | 96   83   <span class="i">$Line</span> = <span class="s">(</span><span class="k">length</span><span class="s">(</span><span class="i">$MolName</span><span class="s">)</span> > <span class="n">80</span><span class="s">)</span> ? <span class="k">substr</span><span class="s">(</span><span class="i">$MolName</span><span class="cm">,</span> <span class="n">0</span><span class="cm">,</span> <span class="n">80</span><span class="s">)</span> <span class="co">:</span> <span class="i">$MolName</span><span class="sc">;</span> | 
|  | 97   84 | 
|  | 98   85   <span class="k">return</span> <span class="i">$Line</span><span class="sc">;</span> | 
|  | 99   86 <span class="s">}</span> | 
|  | 100   87 | 
|  | 101   88 <span class="c"># Generate data for compounds misc info line...</span> | 
|  | 102 <a name="GenerateCmpdMiscInfoLine-"></a>  89 <span class="k">sub </span><span class="m">GenerateCmpdMiscInfoLine</span> <span class="s">{</span> | 
|  | 103   90   <span class="k">my</span><span class="s">(</span><span class="i">$ProgramName</span><span class="cm">,</span> <span class="i">$UserInitial</span><span class="cm">,</span> <span class="i">$Code</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 104   91   <span class="k">my</span><span class="s">(</span><span class="i">$Date</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> | 
|  | 105   92 | 
|  | 106   93   <span class="k">if</span> <span class="s">(</span>!<span class="s">(</span><span class="k">defined</span><span class="s">(</span><span class="i">$ProgramName</span><span class="s">)</span> && <span class="i">$ProgramName</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 107   94     <span class="i">$ProgramName</span> = <span class="q">"MayaChem"</span><span class="sc">;</span> | 
|  | 108   95   <span class="s">}</span> | 
|  | 109   96   <span class="k">if</span> <span class="s">(</span>!<span class="s">(</span><span class="k">defined</span><span class="s">(</span><span class="i">$UserInitial</span><span class="s">)</span> && <span class="i">$UserInitial</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 110   97     <span class="i">$UserInitial</span> = <span class="q">"  "</span><span class="sc">;</span> | 
|  | 111   98   <span class="s">}</span> | 
|  | 112   99   <span class="k">if</span> <span class="s">(</span>!<span class="s">(</span><span class="k">defined</span><span class="s">(</span><span class="i">$Code</span><span class="s">)</span> && <span class="i">$Code</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 113  100     <span class="i">$Code</span> = <span class="q">"2D"</span><span class="sc">;</span> | 
|  | 114  101   <span class="s">}</span> | 
|  | 115  102 | 
|  | 116  103   <span class="k">if</span> <span class="s">(</span><span class="k">length</span><span class="s">(</span><span class="i">$ProgramName</span><span class="s">)</span> > <span class="n">8</span><span class="s">)</span> <span class="s">{</span> | 
|  | 117  104     <span class="i">$ProgramName</span> = <span class="k">substr</span><span class="s">(</span><span class="i">$ProgramName</span><span class="cm">,</span> <span class="n">0</span><span class="cm">,</span> <span class="n">8</span><span class="s">)</span><span class="sc">;</span> | 
|  | 118  105   <span class="s">}</span> | 
|  | 119  106   <span class="k">if</span> <span class="s">(</span><span class="k">length</span><span class="s">(</span><span class="i">$UserInitial</span><span class="s">)</span> > <span class="n">2</span><span class="s">)</span> <span class="s">{</span> | 
|  | 120  107     <span class="i">$UserInitial</span> = <span class="k">substr</span><span class="s">(</span><span class="i">$UserInitial</span><span class="cm">,</span> <span class="n">0</span><span class="cm">,</span> <span class="n">2</span><span class="s">)</span><span class="sc">;</span> | 
|  | 121  108   <span class="s">}</span> | 
|  | 122  109   <span class="k">if</span> <span class="s">(</span><span class="k">length</span><span class="s">(</span><span class="i">$Code</span><span class="s">)</span> > <span class="n">2</span><span class="s">)</span> <span class="s">{</span> | 
|  | 123  110     <span class="i">$Code</span> = <span class="k">substr</span><span class="s">(</span><span class="i">$Code</span><span class="cm">,</span> <span class="n">0</span><span class="cm">,</span> <span class="n">2</span><span class="s">)</span><span class="sc">;</span> | 
|  | 124  111   <span class="s">}</span> | 
|  | 125  112   <span class="i">$Date</span> = <span class="i">GenerateMiscLineDateStamp</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 126  113 | 
|  | 127  114   <span class="i">$Line</span> = <span class="q">"${UserInitial}${ProgramName}${Date}${Code}"</span><span class="sc">;</span> | 
|  | 128  115 | 
|  | 129  116   <span class="k">return</span> <span class="i">$Line</span><span class="sc">;</span> | 
|  | 130  117 <span class="s">}</span> | 
|  | 131  118 | 
|  | 132  119 <span class="c"># Generate data for compounds misc info line...</span> | 
|  | 133 <a name="GenerateEmptyCtabBlockLines-"></a> 120 <span class="k">sub </span><span class="m">GenerateEmptyCtabBlockLines</span> <span class="s">{</span> | 
|  | 134  121   <span class="k">my</span><span class="s">(</span><span class="i">$Date</span><span class="cm">,</span> <span class="i">$Lines</span><span class="s">)</span><span class="sc">;</span> | 
|  | 135  122 | 
|  | 136  123   <span class="k">if</span> <span class="s">(</span><span class="i">@_</span> == <span class="n">1</span><span class="s">)</span> <span class="s">{</span> | 
|  | 137  124     <span class="s">(</span><span class="i">$Date</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 138  125   <span class="s">}</span> | 
|  | 139  126   <span class="k">else</span> <span class="s">{</span> | 
|  | 140  127     <span class="i">$Date</span> = <span class="i">GenerateMiscLineDateStamp</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 141  128   <span class="s">}</span> | 
|  | 142  129   <span class="c"># First line: Blank molname line...</span> | 
|  | 143  130   <span class="c"># Second line: Misc info...</span> | 
|  | 144  131   <span class="c"># Third line: Blank comments line...</span> | 
|  | 145  132   <span class="c"># Fourth line: Counts line reflecting empty structure data block...</span> | 
|  | 146  133   <span class="i">$Lines</span> = <span class="q">"\n"</span><span class="sc">;</span> | 
|  | 147  134   <span class="i">$Lines</span> .= <span class="q">"  MayaChem${Date}2D\n"</span><span class="sc">;</span> | 
|  | 148  135   <span class="i">$Lines</span> .= <span class="q">"\n"</span><span class="sc">;</span> | 
|  | 149  136   <span class="i">$Lines</span> .= <span class="i">GenerateCmpdCountsLine</span><span class="s">(</span><span class="n">0</span><span class="cm">,</span> <span class="n">0</span><span class="cm">,</span> <span class="n">0</span><span class="s">)</span> . <span class="q">"\n"</span><span class="sc">;</span> | 
|  | 150  137   <span class="i">$Lines</span> .= <span class="q">"M  END"</span><span class="sc">;</span> | 
|  | 151  138 | 
|  | 152  139   <span class="k">return</span> <span class="i">$Lines</span><span class="sc">;</span> | 
|  | 153  140 <span class="s">}</span> | 
|  | 154  141 | 
|  | 155  142 <span class="c"># Generate SD file data stamp...</span> | 
|  | 156 <a name="GenerateMiscLineDateStamp-"></a> 143 <span class="k">sub </span><span class="m">GenerateMiscLineDateStamp</span> <span class="s">{</span> | 
|  | 157  144   <span class="k">return</span> <span class="i">TimeUtil::SDFileTimeStamp</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 158  145 <span class="s">}</span> | 
|  | 159  146 | 
|  | 160  147 <span class="c"># Generate data for compound atom line...</span> | 
|  | 161  148 <span class="c">#</span> | 
|  | 162 <a name="GenerateCmpdAtomLine-"></a> 149 <span class="k">sub </span><span class="m">GenerateCmpdAtomLine</span> <span class="s">{</span> | 
|  | 163  150   <span class="k">my</span><span class="s">(</span><span class="i">$AtomSymbol</span><span class="cm">,</span> <span class="i">$AtomX</span><span class="cm">,</span> <span class="i">$AtomY</span><span class="cm">,</span> <span class="i">$AtomZ</span><span class="cm">,</span> <span class="i">$MassDifference</span><span class="cm">,</span> <span class="i">$Charge</span><span class="cm">,</span> <span class="i">$StereoParity</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 164  151   <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> | 
|  | 165  152 | 
|  | 166  153   <span class="k">if</span> <span class="s">(</span>!<span class="k">defined</span> <span class="i">$MassDifference</span><span class="s">)</span> <span class="s">{</span> | 
|  | 167  154     <span class="i">$MassDifference</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 168  155   <span class="s">}</span> | 
|  | 169  156   <span class="k">if</span> <span class="s">(</span>!<span class="k">defined</span> <span class="i">$Charge</span><span class="s">)</span> <span class="s">{</span> | 
|  | 170  157     <span class="i">$Charge</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 171  158   <span class="s">}</span> | 
|  | 172  159   <span class="k">if</span> <span class="s">(</span>!<span class="k">defined</span> <span class="i">$StereoParity</span><span class="s">)</span> <span class="s">{</span> | 
|  | 173  160     <span class="i">$StereoParity</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 174  161   <span class="s">}</span> | 
|  | 175  162   <span class="i">$Line</span> = <span class="k">sprintf</span> <span class="q">"%10.4f%10.4f%10.4f %-3s%2i%3i%3i  0  0  0  0  0  0  0  0  0"</span><span class="cm">,</span> <span class="i">$AtomX</span><span class="cm">,</span> <span class="i">$AtomY</span><span class="cm">,</span> <span class="i">$AtomZ</span><span class="cm">,</span> <span class="i">$AtomSymbol</span><span class="cm">,</span> <span class="i">$MassDifference</span><span class="cm">,</span> <span class="i">$Charge</span><span class="cm">,</span> <span class="i">$StereoParity</span><span class="sc">;</span> | 
|  | 176  163 | 
|  | 177  164   <span class="k">return</span> <span class="i">$Line</span> | 
|  | 178  165 <span class="s">}</span> | 
|  | 179  166 | 
|  | 180  167 <span class="c"># Generate data for compound bond line...</span> | 
|  | 181  168 <span class="c">#</span> | 
|  | 182 <a name="GenerateCmpdBondLine-"></a> 169 <span class="k">sub </span><span class="m">GenerateCmpdBondLine</span> <span class="s">{</span> | 
|  | 183  170   <span class="k">my</span><span class="s">(</span><span class="i">$FirstAtomNum</span><span class="cm">,</span> <span class="i">$SecondAtomNum</span><span class="cm">,</span> <span class="i">$BondType</span><span class="cm">,</span> <span class="i">$BondStereo</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 184  171   <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> | 
|  | 185  172 | 
|  | 186  173   <span class="k">if</span> <span class="s">(</span>!<span class="k">defined</span> <span class="i">$BondStereo</span><span class="s">)</span> <span class="s">{</span> | 
|  | 187  174     <span class="i">$BondStereo</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 188  175   <span class="s">}</span> | 
|  | 189  176   <span class="i">$Line</span> = <span class="k">sprintf</span> <span class="q">"%3i%3i%3i%3i  0  0  0"</span><span class="cm">,</span> <span class="i">$FirstAtomNum</span><span class="cm">,</span> <span class="i">$SecondAtomNum</span><span class="cm">,</span> <span class="i">$BondType</span><span class="cm">,</span> <span class="i">$BondStereo</span><span class="sc">;</span> | 
|  | 190  177 | 
|  | 191  178   <span class="k">return</span> <span class="i">$Line</span> | 
|  | 192  179 <span class="s">}</span> | 
|  | 193  180 | 
|  | 194  181 <span class="c"># Generate charge property lines for CTAB block...</span> | 
|  | 195  182 <span class="c">#</span> | 
|  | 196 <a name="GenerateCmpdChargePropertyLines-"></a> 183 <span class="k">sub </span><span class="m">GenerateCmpdChargePropertyLines</span> <span class="s">{</span> | 
|  | 197  184   <span class="k">my</span><span class="s">(</span><span class="i">$ChargeValuePairsRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 198  185 | 
|  | 199  186   <span class="k">return</span> <span class="i">_GenerateCmpdGenericPropertyLines</span><span class="s">(</span><span class="q">'Charge'</span><span class="cm">,</span> <span class="i">$ChargeValuePairsRef</span><span class="s">)</span><span class="sc">;</span> | 
|  | 200  187 <span class="s">}</span> | 
|  | 201  188 | 
|  | 202  189 <span class="c"># Generate isotope property lines for CTAB block...</span> | 
|  | 203  190 <span class="c">#</span> | 
|  | 204 <a name="GenerateCmpdIsotopePropertyLines-"></a> 191 <span class="k">sub </span><span class="m">GenerateCmpdIsotopePropertyLines</span> <span class="s">{</span> | 
|  | 205  192   <span class="k">my</span><span class="s">(</span><span class="i">$IsotopeValuePairsRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 206  193 | 
|  | 207  194   <span class="k">return</span> <span class="i">_GenerateCmpdGenericPropertyLines</span><span class="s">(</span><span class="q">'Isotope'</span><span class="cm">,</span> <span class="i">$IsotopeValuePairsRef</span><span class="s">)</span><span class="sc">;</span> | 
|  | 208  195 <span class="s">}</span> | 
|  | 209  196 | 
|  | 210  197 <span class="c"># Generate radical property line property lines for CTAB block...</span> | 
|  | 211  198 <span class="c">#</span> | 
|  | 212 <a name="GenerateCmpdRadicalPropertyLines-"></a> 199 <span class="k">sub </span><span class="m">GenerateCmpdRadicalPropertyLines</span> <span class="s">{</span> | 
|  | 213  200   <span class="k">my</span><span class="s">(</span><span class="i">$RadicalValuePairsRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 214  201 | 
|  | 215  202   <span class="k">return</span> <span class="i">_GenerateCmpdGenericPropertyLines</span><span class="s">(</span><span class="q">'Radical'</span><span class="cm">,</span> <span class="i">$RadicalValuePairsRef</span><span class="s">)</span><span class="sc">;</span> | 
|  | 216  203 <span class="s">}</span> | 
|  | 217  204 | 
|  | 218  205 <span class="c"># Generate atom alias property line property lines for CTAB block...</span> | 
|  | 219  206 <span class="c">#</span> | 
|  | 220  207 <span class="c"># Atom alias property line format:</span> | 
|  | 221  208 <span class="c">#</span> | 
|  | 222  209 <span class="c"># A  aaa</span> | 
|  | 223  210 <span class="c"># x...</span> | 
|  | 224  211 <span class="c">#</span> | 
|  | 225  212 <span class="c">#    aaa: Atom number</span> | 
|  | 226  213 <span class="c">#    x: Atom alias in next line</span> | 
|  | 227  214 <span class="c">#</span> | 
|  | 228 <a name="GenerateCmpdAtomAliasPropertyLines-"></a> 215 <span class="k">sub </span><span class="m">GenerateCmpdAtomAliasPropertyLines</span> <span class="s">{</span> | 
|  | 229  216   <span class="k">my</span><span class="s">(</span><span class="i">$PropertyValuePairsRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 230  217   <span class="k">my</span><span class="s">(</span><span class="i">$Index</span><span class="cm">,</span> <span class="i">$AtomNum</span><span class="cm">,</span> <span class="i">$AtomAlias</span><span class="cm">,</span> <span class="i">$Line</span><span class="cm">,</span> <span class="i">@PropertyLines</span><span class="s">)</span><span class="sc">;</span> | 
|  | 231  218 | 
|  | 232  219   <span class="i">@PropertyLines</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 233  220 | 
|  | 234  221   <span class="k">for</span> <span class="s">(</span><span class="i">$Index</span> = <span class="n">0</span><span class="sc">;</span> <span class="i">$Index</span> < <span class="i">$#</span>{<span class="i">$PropertyValuePairsRef</span>}<span class="sc">;</span> <span class="i">$Index</span> += <span class="n">2</span><span class="s">)</span> <span class="s">{</span> | 
|  | 235  222     <span class="i">$AtomNum</span> = <span class="i">$PropertyValuePairsRef</span>->[<span class="i">$Index</span>]<span class="sc">;</span> | 
|  | 236  223     <span class="i">$AtomAlias</span> = <span class="i">$PropertyValuePairsRef</span>->[<span class="i">$Index</span> + <span class="n">1</span>]<span class="sc">;</span> | 
|  | 237  224 | 
|  | 238  225     <span class="i">$Line</span> = <span class="q">"A  "</span> . <span class="k">sprintf</span> <span class="q">"%3i"</span><span class="cm">,</span> <span class="i">$AtomNum</span><span class="sc">;</span> | 
|  | 239  226 | 
|  | 240  227     <span class="k">push</span> <span class="i">@PropertyLines</span><span class="cm">,</span> <span class="i">$Line</span><span class="sc">;</span> | 
|  | 241  228     <span class="k">push</span> <span class="i">@PropertyLines</span><span class="cm">,</span> <span class="i">$AtomAlias</span><span class="sc">;</span> | 
|  | 242  229   <span class="s">}</span> | 
|  | 243  230 | 
|  | 244  231   <span class="k">return</span> <span class="i">@PropertyLines</span><span class="sc">;</span> | 
|  | 245  232 <span class="s">}</span> | 
|  | 246  233 | 
|  | 247  234 <span class="c"># Generate data header labels and values lines...</span> | 
|  | 248  235 <span class="c">#</span> | 
|  | 249 <a name="GenerateCmpdDataHeaderLabelsAndValuesLines-"></a> 236 <span class="k">sub </span><span class="m">GenerateCmpdDataHeaderLabelsAndValuesLines</span> <span class="s">{</span> | 
|  | 250  237   <span class="k">my</span><span class="s">(</span><span class="i">$DataHeaderLabelsRef</span><span class="cm">,</span> <span class="i">$DataHeaderLabelsAndValuesRef</span><span class="cm">,</span> <span class="i">$SortDataLabels</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 251  238   <span class="k">my</span><span class="s">(</span><span class="i">$DataLabel</span><span class="cm">,</span> <span class="i">$DataValue</span><span class="cm">,</span> <span class="i">@DataLabels</span><span class="cm">,</span> <span class="i">@DataLines</span><span class="s">)</span><span class="sc">;</span> | 
|  | 252  239 | 
|  | 253  240   <span class="k">if</span> <span class="s">(</span>!<span class="k">defined</span> <span class="i">$SortDataLabels</span><span class="s">)</span> <span class="s">{</span> | 
|  | 254  241     <span class="i">$SortDataLabels</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 255  242   <span class="s">}</span> | 
|  | 256  243 | 
|  | 257  244   <span class="i">@DataLines</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 258  245   <span class="i">@DataLabels</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 259  246   <span class="k">if</span> <span class="s">(</span><span class="i">$SortDataLabels</span><span class="s">)</span> <span class="s">{</span> | 
|  | 260  247     <span class="k">push</span> <span class="i">@DataLabels</span><span class="cm">,</span> <span class="k">sort</span> <span class="i">@</span>{<span class="i">$DataHeaderLabelsRef</span>}<span class="sc">;</span> | 
|  | 261  248   <span class="s">}</span> | 
|  | 262  249   <span class="k">else</span> <span class="s">{</span> | 
|  | 263  250     <span class="k">push</span> <span class="i">@DataLabels</span><span class="cm">,</span>  <span class="i">@</span>{<span class="i">$DataHeaderLabelsRef</span>}<span class="sc">;</span> | 
|  | 264  251   <span class="s">}</span> | 
|  | 265  252   <span class="k">for</span> <span class="i">$DataLabel</span> <span class="s">(</span><span class="i">@DataLabels</span><span class="s">)</span> <span class="s">{</span> | 
|  | 266  253     <span class="i">$DataValue</span> = <span class="q">''</span><span class="sc">;</span> | 
|  | 267  254     <span class="k">if</span> <span class="s">(</span><span class="k">exists</span> <span class="i">$DataHeaderLabelsAndValuesRef</span>->{<span class="i">$DataLabel</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 268  255       <span class="i">$DataValue</span> = <span class="i">$DataHeaderLabelsAndValuesRef</span>->{<span class="i">$DataLabel</span>}<span class="sc">;</span> | 
|  | 269  256     <span class="s">}</span> | 
|  | 270  257     <span class="k">push</span> <span class="i">@DataLines</span><span class="cm">,</span> <span class="s">(</span><span class="q">">  <${DataLabel}>"</span><span class="cm">,</span> <span class="q">"$DataValue"</span><span class="cm">,</span> <span class="q">""</span><span class="s">)</span><span class="sc">;</span> | 
|  | 271  258   <span class="s">}</span> | 
|  | 272  259   <span class="k">return</span> <span class="i">@DataLines</span><span class="sc">;</span> | 
|  | 273  260 <span class="s">}</span> | 
|  | 274  261 | 
|  | 275  262 <span class="c"># Parse data field header in SD file and return lists of all and common data field</span> | 
|  | 276  263 <span class="c"># labels.</span> | 
|  | 277 <a name="GetAllAndCommonCmpdDataHeaderLabels-"></a> 264 <span class="k">sub </span><span class="m">GetAllAndCommonCmpdDataHeaderLabels</span> <span class="s">{</span> | 
|  | 278  265   <span class="k">my</span><span class="s">(</span><span class="i">$SDFileRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 279  266   <span class="k">my</span><span class="s">(</span><span class="i">$CmpdCount</span><span class="cm">,</span> <span class="i">$CmpdString</span><span class="cm">,</span> <span class="i">$Label</span><span class="cm">,</span> <span class="i">@CmpdLines</span><span class="cm">,</span> <span class="i">@DataFieldLabels</span><span class="cm">,</span> <span class="i">@CommonDataFieldLabels</span><span class="cm">,</span> <span class="i">%DataFieldLabelsMap</span><span class="s">)</span><span class="sc">;</span> | 
|  | 280  267 | 
|  | 281  268   <span class="i">$CmpdCount</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 282  269   <span class="i">@DataFieldLabels</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 283  270   <span class="i">@CommonDataFieldLabels</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 284  271   <span class="i">%DataFieldLabelsMap</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 285  272 | 
|  | 286  273   <span class="k">while</span> <span class="s">(</span><span class="i">$CmpdString</span> = <span class="i">ReadCmpdString</span><span class="s">(</span><span class="i">$SDFileRef</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 287  274     <span class="i">$CmpdCount</span>++<span class="sc">;</span> | 
|  | 288  275     <span class="i">@CmpdLines</span> = <span class="k">split</span> <span class="q">"\n"</span><span class="cm">,</span> <span class="i">$CmpdString</span><span class="sc">;</span> | 
|  | 289  276     <span class="c"># Process compound data header labels and figure out which ones are present for</span> | 
|  | 290  277     <span class="c"># all the compounds...</span> | 
|  | 291  278     <span class="k">if</span> <span class="s">(</span><span class="i">@DataFieldLabels</span><span class="s">)</span> <span class="s">{</span> | 
|  | 292  279       <span class="k">my</span> <span class="s">(</span><span class="i">@CmpdDataFieldLabels</span><span class="s">)</span> = <span class="i">GetCmpdDataHeaderLabels</span><span class="s">(</span>\<span class="i">@CmpdLines</span><span class="s">)</span><span class="sc">;</span> | 
|  | 293  280       <span class="k">my</span><span class="s">(</span><span class="i">%CmpdDataFieldLabelsMap</span><span class="s">)</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 294  281       <span class="c"># Setup a map for the current labels...</span> | 
|  | 295  282       <span class="k">for</span> <span class="i">$Label</span> <span class="s">(</span><span class="i">@CmpdDataFieldLabels</span><span class="s">)</span> <span class="s">{</span> | 
|  | 296  283         <span class="i">$CmpdDataFieldLabelsMap</span>{<span class="i">$Label</span>} = <span class="q">"PresentInSome"</span><span class="sc">;</span> | 
|  | 297  284       <span class="s">}</span> | 
|  | 298  285       <span class="c"># Check the presence old labels for this compound; otherwise, mark 'em new...</span> | 
|  | 299  286       <span class="k">for</span> <span class="i">$Label</span> <span class="s">(</span><span class="i">@DataFieldLabels</span><span class="s">)</span> <span class="s">{</span> | 
|  | 300  287         <span class="k">if</span> <span class="s">(</span>!<span class="i">$CmpdDataFieldLabelsMap</span>{<span class="i">$Label</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 301  288           <span class="i">$DataFieldLabelsMap</span>{<span class="i">$Label</span>} = <span class="q">"PresentInSome"</span><span class="sc">;</span> | 
|  | 302  289         <span class="s">}</span> | 
|  | 303  290       <span class="s">}</span> | 
|  | 304  291       <span class="c"># Check the presence this compound in the old labels; otherwise, add 'em...</span> | 
|  | 305  292       <span class="k">for</span> <span class="i">$Label</span> <span class="s">(</span><span class="i">@CmpdDataFieldLabels</span> <span class="s">)</span> <span class="s">{</span> | 
|  | 306  293         <span class="k">if</span> <span class="s">(</span>!<span class="i">$DataFieldLabelsMap</span>{<span class="i">$Label</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 307  294           <span class="c"># It's a new label...</span> | 
|  | 308  295           <span class="k">push</span> <span class="i">@DataFieldLabels</span><span class="cm">,</span> <span class="i">$Label</span><span class="sc">;</span> | 
|  | 309  296           <span class="i">$DataFieldLabelsMap</span>{<span class="i">$Label</span>} = <span class="q">"PresentInSome"</span><span class="sc">;</span> | 
|  | 310  297         <span class="s">}</span> | 
|  | 311  298       <span class="s">}</span> | 
|  | 312  299     <span class="s">}</span> | 
|  | 313  300     <span class="k">else</span> <span class="s">{</span> | 
|  | 314  301       <span class="c"># Get the initial label set and set up a map...</span> | 
|  | 315  302       <span class="i">@DataFieldLabels</span> = <span class="i">GetCmpdDataHeaderLabels</span><span class="s">(</span>\<span class="i">@CmpdLines</span><span class="s">)</span><span class="sc">;</span> | 
|  | 316  303       <span class="k">for</span> <span class="i">$Label</span> <span class="s">(</span><span class="i">@DataFieldLabels</span><span class="s">)</span> <span class="s">{</span> | 
|  | 317  304         <span class="i">$DataFieldLabelsMap</span>{<span class="i">$Label</span>} = <span class="q">"PresentInAll"</span><span class="sc">;</span> | 
|  | 318  305       <span class="s">}</span> | 
|  | 319  306     <span class="s">}</span> | 
|  | 320  307   <span class="s">}</span> | 
|  | 321  308   <span class="c"># Identify the common data field labels...</span> | 
|  | 322  309   <span class="i">@CommonDataFieldLabels</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 323  310   <span class="k">for</span> <span class="i">$Label</span> <span class="s">(</span><span class="i">@DataFieldLabels</span><span class="s">)</span> <span class="s">{</span> | 
|  | 324  311     <span class="k">if</span> <span class="s">(</span><span class="i">$DataFieldLabelsMap</span>{<span class="i">$Label</span>} <span class="k">eq</span> <span class="q">"PresentInAll"</span><span class="s">)</span> <span class="s">{</span> | 
|  | 325  312       <span class="k">push</span> <span class="i">@CommonDataFieldLabels</span><span class="cm">,</span> <span class="i">$Label</span><span class="sc">;</span> | 
|  | 326  313     <span class="s">}</span> | 
|  | 327  314   <span class="s">}</span> | 
|  | 328  315   <span class="k">return</span> <span class="s">(</span><span class="i">$CmpdCount</span><span class="cm">,</span> \<span class="i">@DataFieldLabels</span><span class="cm">,</span> \<span class="i">@CommonDataFieldLabels</span><span class="s">)</span><span class="sc">;</span> | 
|  | 329  316 <span class="s">}</span> | 
|  | 330  317 | 
|  | 331  318 <span class="c"># Parse all the data header labels and return 'em as an list...</span> | 
|  | 332  319 <span class="c">#</span> | 
|  | 333  320 <span class="c"># Format:</span> | 
|  | 334  321 <span class="c">#</span> | 
|  | 335  322 <span class="c">#> Data header line</span> | 
|  | 336  323 <span class="c">#Data line(s)</span> | 
|  | 337  324 <span class="c">#Blank line</span> | 
|  | 338  325 <span class="c">#</span> | 
|  | 339  326 <span class="c"># [Data Header] (one line) precedes each item of data, starts with a greater than (>) sign, and</span> | 
|  | 340  327 <span class="c"># contains at least one of the following:</span> | 
|  | 341  328 <span class="c">#  The field name enclosed in angle brackets. For example: <melting.point></span> | 
|  | 342  329 <span class="c">#  The field number, DTn , where n represents the number assigned to the field in a MACCS-II database</span> | 
|  | 343  330 <span class="c">#</span> | 
|  | 344  331 <span class="c">#Optional information for the data header includes:</span> | 
|  | 345  332 <span class="c">#  The compound’s external and internal registry numbers. External registry numbers must be enclosed in parentheses.</span> | 
|  | 346  333 <span class="c">#  Any combination of information</span> | 
|  | 347  334 <span class="c">#</span> | 
|  | 348  335 <span class="c">#The following are examples of valid data headers:</span> | 
|  | 349  336 <span class="c">#> <MELTING.POINT></span> | 
|  | 350  337 <span class="c">#> 55 (MD-08974) <BOILING.POINT> DT12</span> | 
|  | 351  338 <span class="c">#> DT12 55</span> | 
|  | 352  339 <span class="c">#> (MD-0894) <BOILING.POINT> FROM ARCHIVES</span> | 
|  | 353  340 <span class="c">#</span> | 
|  | 354  341 <span class="c">#Notes: Sometimes last blank line is missing and can be just followed by $$$$</span> | 
|  | 355  342 <span class="c">#</span> | 
|  | 356 <a name="GetCmpdDataHeaderLabels-"></a> 343 <span class="k">sub </span><span class="m">GetCmpdDataHeaderLabels</span> <span class="s">{</span> | 
|  | 357  344   <span class="k">my</span><span class="s">(</span><span class="i">$CmpdLines</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 358  345   <span class="k">my</span><span class="s">(</span><span class="i">$CmpdLine</span><span class="cm">,</span> <span class="i">$Label</span><span class="cm">,</span> <span class="i">@Labels</span><span class="s">)</span><span class="sc">;</span> | 
|  | 359  346 | 
|  | 360  347   <span class="i">@Labels</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 361  348   <span class="j">CMPDLINE:</span> <span class="k">for</span> <span class="i">$CmpdLine</span> <span class="s">(</span><span class="i">@$CmpdLines</span><span class="s">)</span> <span class="s">{</span> | 
|  | 362  349     <span class="k">if</span> <span class="s">(</span><span class="i">$CmpdLine</span> !~ <span class="q">/^>/</span><span class="s">)</span> <span class="s">{</span> | 
|  | 363  350       <span class="k">next</span> <span class="j">CMPDLINE</span><span class="sc">;</span> | 
|  | 364  351     <span class="s">}</span> | 
|  | 365  352     <span class="c"># Does the line contains field name enclosed in angular brackets?</span> | 
|  | 366  353     <span class="s">(</span><span class="i">$Label</span><span class="s">)</span> = <span class="i">$CmpdLine</span> =~ <span class="q">/<.*?>/g</span><span class="sc">;</span> | 
|  | 367  354     <span class="k">if</span> <span class="s">(</span>!<span class="k">defined</span><span class="s">(</span><span class="i">$Label</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 368  355       <span class="k">next</span> <span class="j">CMPDLINE</span><span class="sc">;</span> | 
|  | 369  356     <span class="s">}</span> | 
|  | 370  357     <span class="i">$Label</span> =~ <span class="q">s/(<|>)//g</span><span class="sc">;</span> | 
|  | 371  358     <span class="k">push</span> <span class="i">@Labels</span><span class="cm">,</span> <span class="i">$Label</span><span class="sc">;</span> | 
|  | 372  359   <span class="s">}</span> | 
|  | 373  360   <span class="k">return</span> <span class="s">(</span><span class="i">@Labels</span><span class="s">)</span><span class="sc">;</span> | 
|  | 374  361 <span class="s">}</span> | 
|  | 375  362 | 
|  | 376  363 <span class="c"># Parse all the data header labels and values</span> | 
|  | 377 <a name="GetCmpdDataHeaderLabelsAndValues-"></a> 364 <span class="k">sub </span><span class="m">GetCmpdDataHeaderLabelsAndValues</span> <span class="s">{</span> | 
|  | 378  365   <span class="k">my</span><span class="s">(</span><span class="i">$CmpdLines</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 379  366   <span class="k">my</span><span class="s">(</span><span class="i">$CmpdLine</span><span class="cm">,</span> <span class="i">$CurrentLabel</span><span class="cm">,</span> <span class="i">$Label</span><span class="cm">,</span> <span class="i">$Value</span><span class="cm">,</span> <span class="i">$ValueCount</span><span class="cm">,</span> <span class="i">$ProcessingLabelData</span><span class="cm">,</span> <span class="i">@Values</span><span class="cm">,</span> <span class="i">%DataFields</span><span class="s">)</span><span class="sc">;</span> | 
|  | 380  367 | 
|  | 381  368   <span class="i">%DataFields</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 382  369   <span class="i">$ProcessingLabelData</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 383  370   <span class="i">$ValueCount</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 384  371   <span class="j">CMPDLINE:</span> <span class="k">for</span> <span class="i">$CmpdLine</span> <span class="s">(</span><span class="i">@$CmpdLines</span><span class="s">)</span> <span class="s">{</span> | 
|  | 385  372     <span class="k">if</span> <span class="s">(</span><span class="i">$CmpdLine</span> =~ <span class="q">/^\$\$\$\$/</span><span class="s">)</span> <span class="s">{</span> | 
|  | 386  373       <span class="k">last</span> <span class="j">CMPDLINE</span><span class="sc">;</span> | 
|  | 387  374     <span class="s">}</span> | 
|  | 388  375     <span class="k">if</span> <span class="s">(</span><span class="i">$CmpdLine</span> =~ <span class="q">/^>/</span><span class="s">)</span> <span class="s">{</span> | 
|  | 389  376       <span class="c"># Does the line contains field name enclosed in angular brackets?</span> | 
|  | 390  377       <span class="s">(</span><span class="i">$Label</span><span class="s">)</span> = <span class="i">$CmpdLine</span> =~ <span class="q">/<.*?>/g</span><span class="sc">;</span> | 
|  | 391  378       <span class="k">if</span> <span class="s">(</span><span class="k">defined</span> <span class="i">$Label</span><span class="s">)</span> <span class="s">{</span> | 
|  | 392  379         <span class="i">$CurrentLabel</span> = <span class="i">$Label</span><span class="sc">;</span> | 
|  | 393  380         <span class="i">$CurrentLabel</span> =~ <span class="q">s/(<|>)//g</span><span class="sc">;</span> | 
|  | 394  381         <span class="i">$ProcessingLabelData</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 395  382         <span class="i">$ValueCount</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 396  383 | 
|  | 397  384         <span class="k">if</span> <span class="s">(</span><span class="i">$CurrentLabel</span><span class="s">)</span> <span class="s">{</span> | 
|  | 398  385           <span class="i">$ProcessingLabelData</span> = <span class="n">1</span><span class="sc">;</span> | 
|  | 399  386           <span class="i">$DataFields</span>{<span class="i">$CurrentLabel</span>} = <span class="q">''</span><span class="sc">;</span> | 
|  | 400  387           <span class="k">next</span> <span class="j">CMPDLINE</span><span class="sc">;</span> | 
|  | 401  388         <span class="s">}</span> | 
|  | 402  389       <span class="s">}</span> | 
|  | 403  390       <span class="k">else</span> <span class="s">{</span> | 
|  | 404  391         <span class="k">if</span> <span class="s">(</span>!<span class="i">$ProcessingLabelData</span><span class="s">)</span> <span class="s">{</span> | 
|  | 405  392           <span class="c"># Data line containing no <label> as allowed by SDF format. Just ignore it...</span> | 
|  | 406  393           <span class="k">next</span> <span class="j">CMPDLINE</span><span class="sc">;</span> | 
|  | 407  394         <span class="s">}</span> | 
|  | 408  395       <span class="s">}</span> | 
|  | 409  396     <span class="s">}</span> | 
|  | 410  397     <span class="k">if</span> <span class="s">(</span>!<span class="i">$ProcessingLabelData</span><span class="s">)</span> <span class="s">{</span> | 
|  | 411  398       <span class="k">next</span> <span class="j">CMPDLINE</span><span class="sc">;</span> | 
|  | 412  399     <span class="s">}</span> | 
|  | 413  400     <span class="k">if</span> <span class="s">(</span>!<span class="s">(</span><span class="k">defined</span><span class="s">(</span><span class="i">$CmpdLine</span><span class="s">)</span> && <span class="k">length</span><span class="s">(</span><span class="i">$CmpdLine</span><span class="s">)</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 414  401       <span class="c"># Blank line terminates value for a label...</span> | 
|  | 415  402       <span class="i">$CurrentLabel</span> = <span class="q">''</span><span class="sc">;</span> | 
|  | 416  403       <span class="i">$ValueCount</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 417  404       <span class="i">$ProcessingLabelData</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 418  405       <span class="k">next</span> <span class="j">CMPDLINE</span><span class="sc">;</span> | 
|  | 419  406     <span class="s">}</span> | 
|  | 420  407     <span class="i">$ValueCount</span>++<span class="sc">;</span> | 
|  | 421  408     <span class="i">$Value</span> = <span class="i">$CmpdLine</span><span class="sc">;</span> | 
|  | 422  409 | 
|  | 423  410     <span class="k">if</span> <span class="s">(</span><span class="i">$ValueCount</span> > <span class="n">1</span><span class="s">)</span> <span class="s">{</span> | 
|  | 424  411       <span class="i">$DataFields</span>{<span class="i">$CurrentLabel</span>} .= <span class="q">"\n"</span> . <span class="i">$Value</span><span class="sc">;</span> | 
|  | 425  412     <span class="s">}</span> | 
|  | 426  413     <span class="k">else</span> <span class="s">{</span> | 
|  | 427  414       <span class="i">$DataFields</span>{<span class="i">$CurrentLabel</span>} = <span class="i">$Value</span><span class="sc">;</span> | 
|  | 428  415     <span class="s">}</span> | 
|  | 429  416   <span class="s">}</span> | 
|  | 430  417   <span class="k">return</span> <span class="s">(</span><span class="i">%DataFields</span><span class="s">)</span><span class="sc">;</span> | 
|  | 431  418 <span class="s">}</span> | 
|  | 432  419 | 
|  | 433  420 <span class="c"># Return an updated compoud string after removing  data header label along with its</span> | 
|  | 434  421 <span class="c"># value from the specified compound string...</span> | 
|  | 435  422 <span class="c">#</span> | 
|  | 436 <a name="RemoveCmpdDataHeaderLabelAndValue-"></a> 423 <span class="k">sub </span><span class="m">RemoveCmpdDataHeaderLabelAndValue</span> <span class="s">{</span> | 
|  | 437  424   <span class="k">my</span><span class="s">(</span><span class="i">$CmpdString</span><span class="cm">,</span> <span class="i">$DataHeaderLabel</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 438  425   <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="cm">,</span> <span class="i">$PorcessingDataHeaderLabel</span><span class="cm">,</span> <span class="i">@CmpdLines</span><span class="s">)</span><span class="sc">;</span> | 
|  | 439  426 | 
|  | 440  427   <span class="i">@CmpdLines</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 441  428   <span class="i">$PorcessingDataHeaderLabel</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 442  429 | 
|  | 443  430   <span class="j">CMPDLINE:</span> <span class="k">for</span> <span class="i">$Line</span> <span class="s">(</span><span class="k">split</span> <span class="q">"\n"</span><span class="cm">,</span> <span class="i">$CmpdString</span><span class="s">)</span> <span class="s">{</span> | 
|  | 444  431     <span class="k">if</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^>/</span> && <span class="i">$Line</span> =~ <span class="q">/<$DataHeaderLabel>/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 445  432       <span class="i">$PorcessingDataHeaderLabel</span> = <span class="n">1</span><span class="sc">;</span> | 
|  | 446  433       <span class="k">next</span> <span class="j">CMPDLINE</span><span class="sc">;</span> | 
|  | 447  434     <span class="s">}</span> | 
|  | 448  435 | 
|  | 449  436     <span class="k">if</span> <span class="s">(</span><span class="i">$PorcessingDataHeaderLabel</span><span class="s">)</span> <span class="s">{</span> | 
|  | 450  437       <span class="c"># Blank line indicates end of fingerprints data value...</span> | 
|  | 451  438       <span class="k">if</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^\$\$\$\$/</span><span class="s">)</span> <span class="s">{</span> | 
|  | 452  439         <span class="k">push</span> <span class="i">@CmpdLines</span><span class="cm">,</span> <span class="i">$Line</span><span class="sc">;</span> | 
|  | 453  440         <span class="i">$PorcessingDataHeaderLabel</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 454  441       <span class="s">}</span> | 
|  | 455  442       <span class="k">elsif</span> <span class="s">(</span>!<span class="k">length</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 456  443         <span class="i">$PorcessingDataHeaderLabel</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 457  444       <span class="s">}</span> | 
|  | 458  445       <span class="k">next</span> <span class="j">CMPDLINE</span><span class="sc">;</span> | 
|  | 459  446     <span class="s">}</span> | 
|  | 460  447 | 
|  | 461  448     <span class="c"># Track compound lines without fingerprints data...</span> | 
|  | 462  449     <span class="k">push</span> <span class="i">@CmpdLines</span><span class="cm">,</span> <span class="i">$Line</span><span class="sc">;</span> | 
|  | 463  450   <span class="s">}</span> | 
|  | 464  451 | 
|  | 465  452   <span class="k">return</span> <span class="k">join</span> <span class="q">"\n"</span><span class="cm">,</span> <span class="i">@CmpdLines</span><span class="sc">;</span> | 
|  | 466  453 <span class="s">}</span> | 
|  | 467  454 | 
|  | 468  455 <span class="c">#</span> | 
|  | 469  456 <span class="c"># Using bond blocks, figure out the number of disconnected fragments  and</span> | 
|  | 470  457 <span class="c"># return their values along with the atom numbers in a string delimited by new</span> | 
|  | 471  458 <span class="c"># line character.</span> | 
|  | 472  459 <span class="c">#</span> | 
|  | 473 <a name="GetCmpdFragments-"></a> 460 <span class="k">sub </span><span class="m">GetCmpdFragments</span> <span class="s">{</span> | 
|  | 474  461   <span class="k">my</span><span class="s">(</span><span class="i">$CmpdLines</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 475  462   <span class="k">my</span><span class="s">(</span><span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="cm">,</span> <span class="i">$FirstAtomNum</span><span class="cm">,</span> <span class="i">$SecondAtomNum</span><span class="cm">,</span> <span class="i">@AtomConnections</span><span class="cm">,</span> <span class="i">$BondType</span><span class="cm">,</span> <span class="i">$FragmentString</span><span class="cm">,</span> <span class="i">$FragmentCount</span><span class="cm">,</span> <span class="i">$LineIndex</span><span class="cm">,</span> <span class="i">$Index</span><span class="cm">,</span> <span class="i">$AtomNum</span><span class="cm">,</span> <span class="i">$NbrAtomNum</span><span class="cm">,</span> <span class="i">@ProcessedAtoms</span><span class="cm">,</span> <span class="i">$ProcessedAtomCount</span><span class="cm">,</span> <span class="i">$ProcessAtomNum</span><span class="cm">,</span> <span class="i">@ProcessingAtoms</span><span class="cm">,</span> <span class="i">@ConnectedAtoms</span><span class="cm">,</span> <span class="i">%Fragments</span><span class="cm">,</span> <span class="i">$FragmentNum</span><span class="cm">,</span> <span class="i">$AFragmentString</span><span class="s">)</span><span class="sc">;</span> | 
|  | 476  463 | 
|  | 477  464   <span class="c"># Setup the connection table for each atom...</span> | 
|  | 478  465   <span class="i">@AtomConnections</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 479  466   <span class="s">(</span><span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="s">)</span> = <span class="i">ParseCmpdCountsLine</span><span class="s">(</span><span class="i">@$CmpdLines</span>[<span class="n">3</span>]<span class="s">)</span><span class="sc">;</span> | 
|  | 480  467   <span class="k">for</span> <span class="i">$AtomNum</span> <span class="s">(</span><span class="n">1</span> .. <span class="i">$AtomCount</span><span class="s">)</span> <span class="s">{</span> | 
|  | 481  468     <span class="i">%</span>{<span class="i">$AtomConnections</span>[<span class="i">$AtomNum</span>]} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 482  469   <span class="s">}</span> | 
|  | 483  470   <span class="k">for</span> <span class="s">(</span><span class="i">$LineIndex</span> = <span class="n">4</span> + <span class="i">$AtomCount</span><span class="sc">;</span> <span class="i">$LineIndex</span> < <span class="s">(</span><span class="n">4</span> + <span class="i">$AtomCount</span> + <span class="i">$BondCount</span><span class="s">)</span><span class="sc">;</span> <span class="i">$LineIndex</span>++<span class="s">)</span> <span class="s">{</span> | 
|  | 484  471     <span class="s">(</span><span class="i">$FirstAtomNum</span><span class="cm">,</span> <span class="i">$SecondAtomNum</span><span class="cm">,</span> <span class="i">$BondType</span><span class="s">)</span> = <span class="i">ParseCmpdBondLine</span><span class="s">(</span><span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="s">)</span><span class="sc">;</span> | 
|  | 485  472     <span class="k">if</span> <span class="s">(</span>!<span class="i">$AtomConnections</span>[<span class="i">$FirstAtomNum</span>]{<span class="i">$SecondAtomNum</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 486  473       <span class="i">$AtomConnections</span>[<span class="i">$FirstAtomNum</span>]{<span class="i">$SecondAtomNum</span>} = <span class="i">$BondType</span><span class="sc">;</span> | 
|  | 487  474     <span class="s">}</span> | 
|  | 488  475     <span class="k">if</span> <span class="s">(</span>!<span class="i">$AtomConnections</span>[<span class="i">$SecondAtomNum</span>]{<span class="i">$FirstAtomNum</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 489  476       <span class="i">$AtomConnections</span>[<span class="i">$SecondAtomNum</span>]{<span class="i">$FirstAtomNum</span>} = <span class="i">$BondType</span><span class="sc">;</span> | 
|  | 490  477     <span class="s">}</span> | 
|  | 491  478   <span class="s">}</span> | 
|  | 492  479 | 
|  | 493  480   <span class="c">#Get set to count fragments...</span> | 
|  | 494  481   <span class="i">$ProcessedAtomCount</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 495  482   <span class="i">$FragmentNum</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 496  483   <span class="i">%Fragments</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 497  484   <span class="i">@ProcessedAtoms</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 498  485   <span class="k">for</span> <span class="i">$AtomNum</span> <span class="s">(</span><span class="n">1</span> .. <span class="i">$AtomCount</span><span class="s">)</span> <span class="s">{</span> | 
|  | 499  486     <span class="i">$ProcessedAtoms</span>[<span class="i">$AtomNum</span>] = <span class="n">0</span><span class="sc">;</span> | 
|  | 500  487   <span class="s">}</span> | 
|  | 501  488   <span class="k">while</span> <span class="s">(</span><span class="i">$ProcessedAtomCount</span> < <span class="i">$AtomCount</span><span class="s">)</span> <span class="s">{</span> | 
|  | 502  489     <span class="i">@ProcessingAtoms</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 503  490     <span class="i">@ConnectedAtoms</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 504  491     <span class="j">ATOMNUM:</span> <span class="k">for</span> <span class="i">$AtomNum</span> <span class="s">(</span><span class="n">1</span> .. <span class="i">$AtomCount</span><span class="s">)</span> <span class="s">{</span> | 
|  | 505  492       <span class="k">if</span> <span class="s">(</span>!<span class="i">$ProcessedAtoms</span>[<span class="i">$AtomNum</span>]<span class="s">)</span> <span class="s">{</span> | 
|  | 506  493         <span class="i">$ProcessedAtomCount</span>++<span class="sc">;</span> | 
|  | 507  494         <span class="i">$ProcessedAtoms</span>[<span class="i">$AtomNum</span>] = <span class="n">1</span><span class="sc">;</span> | 
|  | 508  495         <span class="k">push</span> <span class="i">@ProcessingAtoms</span><span class="cm">,</span> <span class="i">$AtomNum</span><span class="sc">;</span> | 
|  | 509  496         <span class="i">$FragmentNum</span>++<span class="sc">;</span> | 
|  | 510  497         <span class="i">@</span>{<span class="i">$Fragments</span>{<span class="i">$FragmentNum</span>} } = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 511  498         <span class="k">push</span> <span class="i">@</span>{<span class="i">$Fragments</span>{<span class="i">$FragmentNum</span>} }<span class="cm">,</span> <span class="i">$AtomNum</span><span class="sc">;</span> | 
|  | 512  499         <span class="k">last</span> <span class="j">ATOMNUM</span><span class="sc">;</span> | 
|  | 513  500       <span class="s">}</span> | 
|  | 514  501     <span class="s">}</span> | 
|  | 515  502 | 
|  | 516  503     <span class="c"># Go over the neighbors and follow the connection trail while collecting the</span> | 
|  | 517  504     <span class="c"># atoms numbers present in the connected fragment...</span> | 
|  | 518  505     <span class="k">while</span> <span class="s">(</span><span class="i">@ProcessingAtoms</span><span class="s">)</span> <span class="s">{</span> | 
|  | 519  506       <span class="k">for</span> <span class="s">(</span><span class="i">$Index</span> = <span class="n">0</span><span class="sc">;</span> <span class="i">$Index</span> < <span class="i">@ProcessingAtoms</span><span class="sc">;</span> <span class="i">$Index</span>++<span class="s">)</span> <span class="s">{</span> | 
|  | 520  507         <span class="i">$ProcessAtomNum</span> = <span class="i">$ProcessingAtoms</span>[<span class="i">$Index</span>]<span class="sc">;</span> | 
|  | 521  508         <span class="k">for</span> <span class="i">$NbrAtomNum</span> <span class="s">(</span><span class="k">keys</span> <span class="i">%</span>{<span class="i">$AtomConnections</span>[<span class="i">$ProcessAtomNum</span>]}<span class="s">)</span>  <span class="s">{</span> | 
|  | 522  509           <span class="k">if</span> <span class="s">(</span>!<span class="i">$ProcessedAtoms</span>[<span class="i">$NbrAtomNum</span>]<span class="s">)</span> <span class="s">{</span> | 
|  | 523  510             <span class="i">$ProcessedAtomCount</span>++<span class="sc">;</span> | 
|  | 524  511             <span class="i">$ProcessedAtoms</span>[<span class="i">$NbrAtomNum</span>] = <span class="n">1</span><span class="sc">;</span> | 
|  | 525  512             <span class="k">push</span> <span class="i">@ConnectedAtoms</span><span class="cm">,</span> <span class="i">$NbrAtomNum</span><span class="sc">;</span> | 
|  | 526  513             <span class="k">push</span> <span class="i">@</span>{ <span class="i">$Fragments</span>{<span class="i">$FragmentNum</span>} }<span class="cm">,</span> <span class="i">$NbrAtomNum</span><span class="sc">;</span> | 
|  | 527  514           <span class="s">}</span> | 
|  | 528  515         <span class="s">}</span> | 
|  | 529  516       <span class="s">}</span> | 
|  | 530  517       <span class="i">@ProcessingAtoms</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 531  518       <span class="i">@ProcessingAtoms</span> = <span class="i">@ConnectedAtoms</span><span class="sc">;</span> | 
|  | 532  519       <span class="i">@ConnectedAtoms</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 533  520     <span class="s">}</span> | 
|  | 534  521   <span class="s">}</span> | 
|  | 535  522   <span class="i">$FragmentCount</span> = <span class="i">$FragmentNum</span><span class="sc">;</span> | 
|  | 536  523   <span class="i">$FragmentString</span> = <span class="q">""</span><span class="sc">;</span> | 
|  | 537  524 | 
|  | 538  525   <span class="c"># Sort out the fragments by size...</span> | 
|  | 539  526   <span class="k">for</span> <span class="i">$FragmentNum</span> <span class="s">(</span><span class="k">sort</span> <span class="s">{</span> <span class="i">@</span>{<span class="i">$Fragments</span>{<span class="i">$b</span>}} <=> <span class="i">@</span>{<span class="i">$Fragments</span>{<span class="i">$a</span>}}  <span class="s">}</span> <span class="k">keys</span> <span class="i">%Fragments</span> <span class="s">)</span> <span class="s">{</span> | 
|  | 540  527     <span class="c"># Sort the atoms in a fragment by their numbers...</span> | 
|  | 541  528     <span class="i">$AFragmentString</span> = <span class="k">join</span> <span class="q">" "</span><span class="cm">,</span> <span class="k">sort</span> <span class="s">{</span> <span class="i">$a</span> <=> <span class="i">$b</span> <span class="s">}</span> <span class="i">@</span>{ <span class="i">$Fragments</span>{<span class="i">$FragmentNum</span>} }<span class="sc">;</span> | 
|  | 542  529     <span class="k">if</span> <span class="s">(</span><span class="i">$FragmentString</span><span class="s">)</span> <span class="s">{</span> | 
|  | 543  530       <span class="i">$FragmentString</span> .=  <span class="q">"\n"</span> . <span class="i">$AFragmentString</span><span class="sc">;</span> | 
|  | 544  531     <span class="s">}</span> | 
|  | 545  532     <span class="k">else</span> <span class="s">{</span> | 
|  | 546  533       <span class="i">$FragmentString</span> = <span class="i">$AFragmentString</span><span class="sc">;</span> | 
|  | 547  534     <span class="s">}</span> | 
|  | 548  535   <span class="s">}</span> | 
|  | 549  536   <span class="k">return</span> <span class="s">(</span><span class="i">$FragmentCount</span><span class="cm">,</span> <span class="i">$FragmentString</span><span class="s">)</span><span class="sc">;</span> | 
|  | 550  537 <span class="s">}</span> | 
|  | 551  538 | 
|  | 552  539 <span class="c"># Count number of lines present in between 4th and line containg "M END"</span> | 
|  | 553 <a name="GetCtabLinesCount-"></a> 540 <span class="k">sub </span><span class="m">GetCtabLinesCount</span> <span class="s">{</span> | 
|  | 554  541   <span class="k">my</span><span class="s">(</span><span class="i">$CmpdLines</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 555  542   <span class="k">my</span><span class="s">(</span><span class="i">$LineIndex</span><span class="cm">,</span> <span class="i">$CtabLinesCount</span><span class="s">)</span><span class="sc">;</span> | 
|  | 556  543 | 
|  | 557  544   <span class="i">$CtabLinesCount</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 558  545  <span class="j">LINE:</span> <span class="k">for</span> <span class="s">(</span><span class="i">$LineIndex</span> = <span class="n">4</span><span class="sc">;</span> <span class="i">$LineIndex</span> < <span class="i">@$CmpdLines</span><span class="sc">;</span> <span class="i">$LineIndex</span>++<span class="s">)</span> <span class="s">{</span> | 
|  | 559  546     <span class="c">#</span> | 
|  | 560  547     <span class="c"># Any line after atom and bond data starting with anything other than space or</span> | 
|  | 561  548     <span class="c"># a digit indicates end of Ctab atom/bond data block...</span> | 
|  | 562  549     <span class="c">#</span> | 
|  | 563  550     <span class="k">if</span> <span class="s">(</span><span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>] !~ <span class="q">/^[0-9 ]/</span><span class="s">)</span> <span class="s">{</span> | 
|  | 564  551       <span class="i">$CtabLinesCount</span> = <span class="i">$LineIndex</span> - <span class="n">4</span><span class="sc">;</span> | 
|  | 565  552       <span class="k">last</span> <span class="j">LINE</span><span class="sc">;</span> | 
|  | 566  553     <span class="s">}</span> | 
|  | 567  554   <span class="s">}</span> | 
|  | 568  555   <span class="k">return</span> <span class="i">$CtabLinesCount</span><span class="sc">;</span> | 
|  | 569  556 <span class="s">}</span> | 
|  | 570  557 | 
|  | 571  558 <span class="c"># Using atom blocks, count the number of atoms which contain special element</span> | 
|  | 572  559 <span class="c"># symbols not present in the periodic table.</span> | 
|  | 573 <a name="GetUnknownAtoms-"></a> 560 <span class="k">sub </span><span class="m">GetUnknownAtoms</span> <span class="s">{</span> | 
|  | 574  561   <span class="k">my</span><span class="s">(</span><span class="i">$CmpdLines</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 575  562   <span class="k">my</span><span class="s">(</span><span class="i">$UnknownAtomCount</span><span class="cm">,</span> <span class="i">$UnknownAtoms</span><span class="cm">,</span> <span class="i">$UnknownAtomLines</span><span class="cm">,</span> <span class="i">$LineIndex</span><span class="cm">,</span> <span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$AtomSymbol</span><span class="s">)</span><span class="sc">;</span> | 
|  | 576  563 | 
|  | 577  564   <span class="i">$UnknownAtomCount</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 578  565   <span class="i">$UnknownAtoms</span> = <span class="q">""</span><span class="sc">;</span> | 
|  | 579  566   <span class="i">$UnknownAtomLines</span> = <span class="q">""</span><span class="sc">;</span> | 
|  | 580  567   <span class="s">(</span><span class="i">$AtomCount</span><span class="s">)</span> = <span class="i">ParseCmpdCountsLine</span><span class="s">(</span><span class="i">@$CmpdLines</span>[<span class="n">3</span>]<span class="s">)</span><span class="sc">;</span> | 
|  | 581  568   <span class="k">for</span> <span class="s">(</span><span class="i">$LineIndex</span> = <span class="n">4</span><span class="sc">;</span> <span class="i">$LineIndex</span> < <span class="s">(</span><span class="n">4</span> + <span class="i">$AtomCount</span><span class="s">)</span><span class="sc">;</span> <span class="i">$LineIndex</span>++<span class="s">)</span> <span class="s">{</span> | 
|  | 582  569     <span class="s">(</span><span class="i">$AtomSymbol</span><span class="s">)</span> = <span class="i">ParseCmpdAtomLine</span><span class="s">(</span><span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="s">)</span><span class="sc">;</span> | 
|  | 583  570     <span class="k">if</span> <span class="s">(</span>!<span class="i">IsElement</span><span class="s">(</span><span class="i">$AtomSymbol</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 584  571       <span class="i">$UnknownAtomCount</span>++<span class="sc">;</span> | 
|  | 585  572       <span class="i">$UnknownAtoms</span> .= <span class="q">" $AtomSymbol"</span><span class="sc">;</span> | 
|  | 586  573       <span class="k">if</span> <span class="s">(</span><span class="i">$UnknownAtomLines</span><span class="s">)</span> <span class="s">{</span> | 
|  | 587  574         <span class="i">$UnknownAtomLines</span> .= <span class="q">"\n"</span> . <span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="sc">;</span> | 
|  | 588  575       <span class="s">}</span> | 
|  | 589  576       <span class="k">else</span> <span class="s">{</span> | 
|  | 590  577         <span class="i">$UnknownAtomLines</span> = <span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="sc">;</span> | 
|  | 591  578       <span class="s">}</span> | 
|  | 592  579     <span class="s">}</span> | 
|  | 593  580   <span class="s">}</span> | 
|  | 594  581   <span class="k">return</span> <span class="s">(</span><span class="i">$UnknownAtomCount</span><span class="cm">,</span> <span class="i">$UnknownAtoms</span><span class="cm">,</span> <span class="i">$UnknownAtomLines</span><span class="s">)</span><span class="sc">;</span> | 
|  | 595  582 <span class="s">}</span> | 
|  | 596  583 | 
|  | 597  584 <span class="c"># Check z coordinates of all atoms to see whether any of them is non-zero</span> | 
|  | 598  585 <span class="c"># which makes the compound geometry three dimensional...</span> | 
|  | 599  586 <span class="c">#</span> | 
|  | 600 <a name="IsCmpd3D-"></a> 587 <span class="k">sub </span><span class="m">IsCmpd3D</span> <span class="s">{</span> | 
|  | 601  588   <span class="k">my</span><span class="s">(</span><span class="i">$CmpdLines</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 602  589   <span class="k">my</span><span class="s">(</span><span class="i">$LineIndex</span><span class="cm">,</span> <span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$AtomSymbol</span><span class="cm">,</span> <span class="i">$AtomX</span><span class="cm">,</span> <span class="i">$AtomY</span><span class="cm">,</span> <span class="i">$AtomZ</span><span class="s">)</span><span class="sc">;</span> | 
|  | 603  590 | 
|  | 604  591   <span class="s">(</span><span class="i">$AtomCount</span><span class="s">)</span> = <span class="i">ParseCmpdCountsLine</span><span class="s">(</span><span class="i">@$CmpdLines</span>[<span class="n">3</span>]<span class="s">)</span><span class="sc">;</span> | 
|  | 605  592   <span class="k">for</span> <span class="s">(</span><span class="i">$LineIndex</span> = <span class="n">4</span><span class="sc">;</span> <span class="i">$LineIndex</span> < <span class="s">(</span><span class="n">4</span> + <span class="i">$AtomCount</span><span class="s">)</span><span class="sc">;</span> <span class="i">$LineIndex</span>++<span class="s">)</span> <span class="s">{</span> | 
|  | 606  593     <span class="s">(</span><span class="i">$AtomSymbol</span><span class="cm">,</span> <span class="i">$AtomX</span><span class="cm">,</span> <span class="i">$AtomY</span><span class="cm">,</span> <span class="i">$AtomZ</span><span class="s">)</span> = <span class="i">ParseCmpdAtomLine</span><span class="s">(</span><span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="s">)</span><span class="sc">;</span> | 
|  | 607  594     <span class="k">if</span> <span class="s">(</span><span class="i">$AtomZ</span> != <span class="n">0</span><span class="s">)</span> <span class="s">{</span> | 
|  | 608  595       <span class="k">return</span> <span class="n">1</span><span class="sc">;</span> | 
|  | 609  596     <span class="s">}</span> | 
|  | 610  597   <span class="s">}</span> | 
|  | 611  598   <span class="k">return</span> <span class="n">0</span><span class="sc">;</span> | 
|  | 612  599 <span class="s">}</span> | 
|  | 613  600 | 
|  | 614  601 <span class="c"># Check whether it's a 2D compound...</span> | 
|  | 615  602 <span class="c">#</span> | 
|  | 616 <a name="IsCmpd2D-"></a> 603 <span class="k">sub </span><span class="m">IsCmpd2D</span> <span class="s">{</span> | 
|  | 617  604   <span class="k">my</span><span class="s">(</span><span class="i">$CmpdLines</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 618  605 | 
|  | 619  606   <span class="k">return</span> <span class="i">IsCmpd3D</span><span class="s">(</span><span class="i">$CmpdLines</span><span class="s">)</span> ? <span class="n">0</span> <span class="co">:</span> <span class="n">1</span><span class="sc">;</span> | 
|  | 620  607 <span class="s">}</span> | 
|  | 621  608 | 
|  | 622  609 <span class="c"># Using bond blocks, count the number of bond lines which contain atom numbers</span> | 
|  | 623  610 <span class="c"># greater than atom count specified in compound count line...</span> | 
|  | 624  611 <span class="c">#</span> | 
|  | 625 <a name="GetInvalidAtomNumbers-"></a> 612 <span class="k">sub </span><span class="m">GetInvalidAtomNumbers</span> <span class="s">{</span> | 
|  | 626  613   <span class="k">my</span><span class="s">(</span><span class="i">$CmpdLines</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 627  614   <span class="k">my</span><span class="s">(</span><span class="i">$LineIndex</span><span class="cm">,</span> <span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="cm">,</span> <span class="i">$FirstAtomNum</span><span class="cm">,</span> <span class="i">$SecondAtomNum</span><span class="cm">,</span> <span class="i">$InvalidAtomNumbersCount</span><span class="cm">,</span> <span class="i">$InvalidAtomNumbers</span><span class="cm">,</span> <span class="i">$InvalidAtomNumberLines</span><span class="cm">,</span> <span class="i">$Line</span><span class="cm">,</span> <span class="i">$InvalidAtomPropertyLine</span><span class="cm">,</span> <span class="i">$ValuePairIndex</span><span class="cm">,</span> <span class="i">$AtomNum</span><span class="cm">,</span> <span class="i">$Value</span><span class="cm">,</span> <span class="i">@ValuePairs</span><span class="s">)</span><span class="sc">;</span> | 
|  | 628  615 | 
|  | 629  616   <span class="s">(</span><span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="s">)</span> = <span class="i">ParseCmpdCountsLine</span><span class="s">(</span><span class="i">@$CmpdLines</span>[<span class="n">3</span>]<span class="s">)</span><span class="sc">;</span> | 
|  | 630  617 | 
|  | 631  618   <span class="i">$InvalidAtomNumbersCount</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 632  619   <span class="i">$InvalidAtomNumbers</span> = <span class="q">""</span><span class="sc">;</span> | 
|  | 633  620   <span class="i">$InvalidAtomNumberLines</span> = <span class="q">""</span><span class="sc">;</span> | 
|  | 634  621 | 
|  | 635  622   <span class="c"># Go over bond block lines...</span> | 
|  | 636  623   <span class="j">LINE:</span> <span class="k">for</span> <span class="s">(</span><span class="i">$LineIndex</span> = <span class="n">4</span> + <span class="i">$AtomCount</span><span class="sc">;</span> <span class="i">$LineIndex</span> < <span class="s">(</span><span class="n">4</span> + <span class="i">$AtomCount</span> + <span class="i">$BondCount</span><span class="s">)</span><span class="sc">;</span> <span class="i">$LineIndex</span>++<span class="s">)</span> <span class="s">{</span> | 
|  | 637  624     <span class="s">(</span><span class="i">$FirstAtomNum</span><span class="cm">,</span> <span class="i">$SecondAtomNum</span><span class="s">)</span> = <span class="i">ParseCmpdBondLine</span><span class="s">(</span><span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="s">)</span><span class="sc">;</span> | 
|  | 638  625     <span class="k">if</span> <span class="s">(</span><span class="i">$FirstAtomNum</span> <= <span class="i">$AtomCount</span> && <span class="i">$SecondAtomNum</span> <= <span class="i">$AtomCount</span><span class="s">)</span> <span class="s">{</span> | 
|  | 639  626       <span class="k">next</span> <span class="j">LINE</span><span class="sc">;</span> | 
|  | 640  627     <span class="s">}</span> | 
|  | 641  628     <span class="k">if</span> <span class="s">(</span><span class="i">$FirstAtomNum</span> > <span class="i">$AtomCount</span><span class="s">)</span> <span class="s">{</span> | 
|  | 642  629       <span class="i">$InvalidAtomNumbersCount</span>++<span class="sc">;</span> | 
|  | 643  630       <span class="i">$InvalidAtomNumbers</span> .= <span class="q">" $FirstAtomNum"</span><span class="sc">;</span> | 
|  | 644  631     <span class="s">}</span> | 
|  | 645  632     <span class="k">if</span> <span class="s">(</span><span class="i">$SecondAtomNum</span> > <span class="i">$AtomCount</span><span class="s">)</span> <span class="s">{</span> | 
|  | 646  633       <span class="i">$InvalidAtomNumbersCount</span>++<span class="sc">;</span> | 
|  | 647  634       <span class="i">$InvalidAtomNumbers</span> .= <span class="q">" $SecondAtomNum"</span><span class="sc">;</span> | 
|  | 648  635     <span class="s">}</span> | 
|  | 649  636     <span class="k">if</span> <span class="s">(</span><span class="i">$InvalidAtomNumberLines</span><span class="s">)</span> <span class="s">{</span> | 
|  | 650  637       <span class="i">$InvalidAtomNumberLines</span> .= <span class="q">"\n"</span> . <span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="sc">;</span> | 
|  | 651  638     <span class="s">}</span> | 
|  | 652  639     <span class="k">else</span> <span class="s">{</span> | 
|  | 653  640       <span class="i">$InvalidAtomNumberLines</span> = <span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="sc">;</span> | 
|  | 654  641     <span class="s">}</span> | 
|  | 655  642   <span class="s">}</span> | 
|  | 656  643   <span class="c"># Go over property lines before M  END...</span> | 
|  | 657  644   <span class="c">#</span> | 
|  | 658  645   <span class="j">LINE:</span> <span class="k">for</span> <span class="s">(</span><span class="i">$LineIndex</span> = <span class="s">(</span><span class="n">4</span> + <span class="i">$AtomCount</span> + <span class="i">$BondCount</span><span class="s">)</span><span class="sc">;</span> <span class="i">$LineIndex</span> < <span class="i">@$CmpdLines</span><span class="sc">;</span> <span class="i">$LineIndex</span>++<span class="s">)</span> <span class="s">{</span> | 
|  | 659  646     <span class="i">$Line</span> = <span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="sc">;</span> | 
|  | 660  647     <span class="i">@ValuePairs</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 661  648     <span class="k">if</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^M  END/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 662  649       <span class="k">last</span> <span class="j">LINE</span><span class="sc">;</span> | 
|  | 663  650     <span class="s">}</span> | 
|  | 664  651     <span class="i">@ValuePairs</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 665  652     <span class="k">if</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^M  CHG/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 666  653       <span class="i">@ValuePairs</span> = <span class="i">ParseCmpdChargePropertyLine</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> | 
|  | 667  654     <span class="s">}</span> | 
|  | 668  655     <span class="k">elsif</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^M  RAD/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 669  656       <span class="i">@ValuePairs</span> = <span class="i">ParseCmpdRadicalPropertyLine</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> | 
|  | 670  657     <span class="s">}</span> | 
|  | 671  658     <span class="k">elsif</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^M  ISO/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 672  659       <span class="i">@ValuePairs</span> = <span class="i">ParseCmpdIsotopePropertyLine</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> | 
|  | 673  660     <span class="s">}</span> | 
|  | 674  661     <span class="k">elsif</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^A  /i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 675  662       <span class="k">my</span><span class="s">(</span><span class="i">$NextLine</span><span class="s">)</span><span class="sc">;</span> | 
|  | 676  663       <span class="i">$LineIndex</span>++<span class="sc">;</span> | 
|  | 677  664       <span class="i">$NextLine</span> = <span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="sc">;</span> | 
|  | 678  665       <span class="i">@ValuePairs</span> = <span class="i">ParseCmpdAtomAliasPropertyLine</span><span class="s">(</span><span class="i">$Line</span><span class="cm">,</span> <span class="i">$NextLine</span><span class="s">)</span><span class="sc">;</span> | 
|  | 679  666     <span class="s">}</span> | 
|  | 680  667     <span class="k">else</span> <span class="s">{</span> | 
|  | 681  668       <span class="k">next</span> <span class="j">LINE</span><span class="sc">;</span> | 
|  | 682  669     <span class="s">}</span> | 
|  | 683  670 | 
|  | 684  671     <span class="i">$InvalidAtomPropertyLine</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 685  672     <span class="k">for</span> <span class="s">(</span><span class="i">$ValuePairIndex</span> = <span class="n">0</span><span class="sc">;</span> <span class="i">$ValuePairIndex</span> < <span class="i">$#ValuePairs</span><span class="sc">;</span> <span class="i">$ValuePairIndex</span> += <span class="n">2</span><span class="s">)</span> <span class="s">{</span> | 
|  | 686  673       <span class="i">$AtomNum</span> = <span class="i">$ValuePairs</span>[<span class="i">$ValuePairIndex</span>]<span class="sc">;</span> <span class="i">$Value</span> = <span class="i">$ValuePairs</span>[<span class="i">$ValuePairIndex</span> + <span class="n">1</span>]<span class="sc">;</span> | 
|  | 687  674       <span class="k">if</span> <span class="s">(</span><span class="i">$AtomNum</span> > <span class="i">$AtomCount</span><span class="s">)</span> <span class="s">{</span> | 
|  | 688  675         <span class="i">$InvalidAtomPropertyLine</span> = <span class="n">1</span><span class="sc">;</span> | 
|  | 689  676         <span class="i">$InvalidAtomNumbersCount</span>++<span class="sc">;</span> | 
|  | 690  677         <span class="i">$InvalidAtomNumbers</span> .= <span class="q">" $AtomNum"</span><span class="sc">;</span> | 
|  | 691  678       <span class="s">}</span> | 
|  | 692  679     <span class="s">}</span> | 
|  | 693  680     <span class="k">if</span> <span class="s">(</span><span class="i">$InvalidAtomPropertyLine</span><span class="s">)</span> <span class="s">{</span> | 
|  | 694  681       <span class="k">if</span> <span class="s">(</span><span class="i">$InvalidAtomNumberLines</span><span class="s">)</span> <span class="s">{</span> | 
|  | 695  682         <span class="i">$InvalidAtomNumberLines</span> .= <span class="q">"\n"</span> . <span class="i">$Line</span><span class="sc">;</span> | 
|  | 696  683       <span class="s">}</span> | 
|  | 697  684       <span class="k">else</span> <span class="s">{</span> | 
|  | 698  685         <span class="i">$InvalidAtomNumberLines</span> = <span class="i">$Line</span><span class="sc">;</span> | 
|  | 699  686       <span class="s">}</span> | 
|  | 700  687     <span class="s">}</span> | 
|  | 701  688   <span class="s">}</span> | 
|  | 702  689 | 
|  | 703  690   <span class="k">return</span> <span class="s">(</span><span class="i">$InvalidAtomNumbersCount</span><span class="cm">,</span> <span class="i">$InvalidAtomNumbers</span><span class="cm">,</span> <span class="i">$InvalidAtomNumberLines</span><span class="s">)</span><span class="sc">;</span> | 
|  | 704  691 <span class="s">}</span> | 
|  | 705  692 | 
|  | 706  693 <span class="c"># Ctab lines: Atom block</span> | 
|  | 707  694 <span class="c">#</span> | 
|  | 708  695 <span class="c"># Format: xxxxx.xxxxyyyyy.yyyyzzzzz.zzzz aaaddcccssshhhbbbvvvHHHrrriiimmmnnneee</span> | 
|  | 709  696 <span class="c">#         A10       A10       A10       xA3 A2A3 A3 A3 A3 A3 A3 A3 A3 A3 A3 A3</span> | 
|  | 710  697 <span class="c"># x,y,z: Atom coordinates</span> | 
|  | 711  698 <span class="c"># aaa: Atom symbol. Entry in periodic table or L for atom list, A, Q, * for unspecified</span> | 
|  | 712  699 <span class="c">#      atom, and LP for lone pair, or R# for Rgroup label</span> | 
|  | 713  700 <span class="c"># dd: Mass difference. -3, -2, -1, 0, 1, 2, 3, 4 (0 for value beyond these limits)</span> | 
|  | 714  701 <span class="c"># ccc: Charge. 0 = uncharged or value other than these, 1 = +3, 2 = +2, 3 = +1,</span> | 
|  | 715  702 <span class="c">#      4 = doublet radical, 5 = -1, 6 = -2, 7 = -3</span> | 
|  | 716  703 <span class="c"># sss: Atom stereo parity. 0 = not stereo, 1 = odd, 2 = even, 3 = either or unmarked stereo center</span> | 
|  | 717  704 <span class="c"># hhh: Hydrogen count + 1. 1 = H0, 2 = H1, 3 = H2, 4 = H3, 5 = H4</span> | 
|  | 718  705 <span class="c"># bbb: Stereo care box. 0 = ignore stereo configuration of this double bond atom, 1 = stereo</span> | 
|  | 719  706 <span class="c">#      configuration of double bond atom must match</span> | 
|  | 720  707 <span class="c"># vvv: Valence. 0 = no marking (default)(1 to 14) = (1 to 14) 15 = zero valence</span> | 
|  | 721  708 <span class="c"># HHH: H0 designator. 0 = not specified, 1 = no H atoms allowed (redundant due to hhh)</span> | 
|  | 722  709 <span class="c"># rrr: Not used</span> | 
|  | 723  710 <span class="c"># iii: Not used</span> | 
|  | 724  711 <span class="c"># mmm: Atom-atom mapping number. 1 - number of atoms</span> | 
|  | 725  712 <span class="c"># nnn: Inversion/retention flag. 0 = property not applied, 1 = configuration is inverted,</span> | 
|  | 726  713 <span class="c">#      2 = configuration is retained.</span> | 
|  | 727  714 <span class="c"># eee: Exact change flag. 0 = property not applied, 1 = change on atom must be</span> | 
|  | 728  715 <span class="c">#      exactly as shown</span> | 
|  | 729  716 <span class="c">#</span> | 
|  | 730  717 <span class="c"># Notes:</span> | 
|  | 731  718 <span class="c">#  . StereoParity: 1 - ClockwiseStereo, 2 - AntiClockwiseStereo; 3 - Either; 0 - none. These</span> | 
|  | 732  719 <span class="c">#    values determine chirailty around the chiral center; a non zero value indicates atom</span> | 
|  | 733  720 <span class="c">#    has been marked as chiral center.</span> | 
|  | 734  721 <span class="c">#</span> | 
|  | 735 <a name="ParseCmpdAtomLine-"></a> 722 <span class="k">sub </span><span class="m">ParseCmpdAtomLine</span> <span class="s">{</span> | 
|  | 736  723   <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 737  724   <span class="k">my</span> <span class="s">(</span><span class="i">$LineIndex</span><span class="cm">,</span> <span class="i">$AtomX</span><span class="cm">,</span> <span class="i">$AtomY</span><span class="cm">,</span> <span class="i">$AtomZ</span><span class="cm">,</span> <span class="i">$AtomSymbol</span><span class="cm">,</span> <span class="i">$MassDifference</span><span class="cm">,</span> <span class="i">$Charge</span><span class="cm">,</span> <span class="i">$StereoParity</span><span class="s">)</span><span class="sc">;</span> | 
|  | 738  725 | 
|  | 739  726   <span class="s">(</span><span class="i">$AtomX</span><span class="cm">,</span> <span class="i">$AtomY</span><span class="cm">,</span> <span class="i">$AtomZ</span><span class="cm">,</span> <span class="i">$AtomSymbol</span><span class="cm">,</span> <span class="i">$MassDifference</span><span class="cm">,</span> <span class="i">$Charge</span><span class="cm">,</span> <span class="i">$StereoParity</span><span class="s">)</span> = <span class="s">(</span><span class="q">''</span><span class="s">)</span> x <span class="n">7</span><span class="sc">;</span> | 
|  | 740  727   <span class="k">if</span> <span class="s">(</span><span class="k">length</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span> > <span class="n">31</span><span class="s">)</span> <span class="s">{</span> | 
|  | 741  728     <span class="s">(</span><span class="i">$AtomX</span><span class="cm">,</span> <span class="i">$AtomY</span><span class="cm">,</span> <span class="i">$AtomZ</span><span class="cm">,</span> <span class="i">$AtomSymbol</span><span class="cm">,</span> <span class="i">$MassDifference</span><span class="cm">,</span> <span class="i">$Charge</span><span class="cm">,</span> <span class="i">$StereoParity</span><span class="s">)</span> = <span class="k">unpack</span><span class="s">(</span><span class="q">"A10A10A10xA3A2A3A3"</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> | 
|  | 742  729   <span class="s">}</span> | 
|  | 743  730   <span class="k">else</span> <span class="s">{</span> | 
|  | 744  731     <span class="s">(</span><span class="i">$AtomX</span><span class="cm">,</span> <span class="i">$AtomY</span><span class="cm">,</span> <span class="i">$AtomZ</span><span class="cm">,</span> <span class="i">$AtomSymbol</span><span class="s">)</span> = <span class="k">unpack</span><span class="s">(</span><span class="q">"A10A10A10"</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> | 
|  | 745  732   <span class="s">}</span> | 
|  | 746  733   <span class="k">return</span> <span class="s">(</span><span class="i">$AtomSymbol</span><span class="cm">,</span> <span class="i">$AtomX</span><span class="cm">,</span> <span class="i">$AtomY</span><span class="cm">,</span> <span class="i">$AtomZ</span><span class="cm">,</span> <span class="i">$MassDifference</span><span class="cm">,</span> <span class="i">$Charge</span><span class="cm">,</span> <span class="i">$StereoParity</span><span class="s">)</span><span class="sc">;</span> | 
|  | 747  734 <span class="s">}</span> | 
|  | 748  735 | 
|  | 749  736 <span class="c"># Map MDL charge value used in SD and MOL files to internal charge used by MayaChemTools.</span> | 
|  | 750  737 <span class="c">#</span> | 
|  | 751 <a name="MDLChargeToInternalCharge-"></a> 738 <span class="k">sub </span><span class="m">MDLChargeToInternalCharge</span> <span class="s">{</span> | 
|  | 752  739   <span class="k">my</span><span class="s">(</span><span class="i">$MDLCharge</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 753  740   <span class="k">my</span><span class="s">(</span><span class="i">$InternalCharge</span><span class="s">)</span><span class="sc">;</span> | 
|  | 754  741 | 
|  | 755  742   <span class="j">CHARGE:</span> <span class="s">{</span> | 
|  | 756  743     <span class="k">if</span> <span class="s">(</span><span class="i">$MDLCharge</span> == <span class="n">0</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalCharge</span> = <span class="n">0</span><span class="sc">;</span> <span class="k">last</span> <span class="j">CHARGE</span><span class="sc">;</span><span class="s">}</span> | 
|  | 757  744     <span class="k">if</span> <span class="s">(</span><span class="i">$MDLCharge</span> == <span class="n">1</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalCharge</span> = <span class="n">3</span><span class="sc">;</span> <span class="k">last</span> <span class="j">CHARGE</span><span class="sc">;</span><span class="s">}</span> | 
|  | 758  745     <span class="k">if</span> <span class="s">(</span><span class="i">$MDLCharge</span> == <span class="n">2</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalCharge</span> = <span class="n">2</span><span class="sc">;</span> <span class="k">last</span> <span class="j">CHARGE</span><span class="sc">;</span><span class="s">}</span> | 
|  | 759  746     <span class="k">if</span> <span class="s">(</span><span class="i">$MDLCharge</span> == <span class="n">3</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalCharge</span> = <span class="n">1</span><span class="sc">;</span> <span class="k">last</span> <span class="j">CHARGE</span><span class="sc">;</span><span class="s">}</span> | 
|  | 760  747     <span class="k">if</span> <span class="s">(</span><span class="i">$MDLCharge</span> == <span class="n">5</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalCharge</span> = <span class="n">-1</span><span class="sc">;</span> <span class="k">last</span> <span class="j">CHARGE</span><span class="sc">;</span><span class="s">}</span> | 
|  | 761  748     <span class="k">if</span> <span class="s">(</span><span class="i">$MDLCharge</span> == <span class="n">6</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalCharge</span> = <span class="n">-2</span><span class="sc">;</span> <span class="k">last</span> <span class="j">CHARGE</span><span class="sc">;</span><span class="s">}</span> | 
|  | 762  749     <span class="k">if</span> <span class="s">(</span><span class="i">$MDLCharge</span> == <span class="n">7</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalCharge</span> = <span class="n">-3</span><span class="sc">;</span> <span class="k">last</span> <span class="j">CHARGE</span><span class="sc">;</span><span class="s">}</span> | 
|  | 763  750     <span class="c"># All other MDL charge values, including 4 corresponding to "doublet radical",</span> | 
|  | 764  751     <span class="c"># are assigned internal value of 0.</span> | 
|  | 765  752     <span class="i">$InternalCharge</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 766  753     <span class="k">if</span> <span class="s">(</span><span class="i">$MDLCharge</span> != <span class="n">4</span><span class="s">)</span> <span class="s">{</span> | 
|  | 767  754       <span class="w">carp</span> <span class="q">"Warning: MDLChargeToInternalCharge: MDL charge value, $MDLCharge, is not supported: An internal charge value, 0, has been assigned..."</span><span class="sc">;</span> | 
|  | 768  755     <span class="s">}</span> | 
|  | 769  756   <span class="s">}</span> | 
|  | 770  757   <span class="k">return</span> <span class="i">$InternalCharge</span><span class="sc">;</span> | 
|  | 771  758 <span class="s">}</span> | 
|  | 772  759 | 
|  | 773  760 <span class="c"># Map internal charge used by MayaChemTools to MDL charge value used in SD and MOL files.</span> | 
|  | 774  761 <span class="c">#</span> | 
|  | 775 <a name="InternalChargeToMDLCharge-"></a> 762 <span class="k">sub </span><span class="m">InternalChargeToMDLCharge</span> <span class="s">{</span> | 
|  | 776  763   <span class="k">my</span><span class="s">(</span><span class="i">$InternalCharge</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 777  764   <span class="k">my</span><span class="s">(</span><span class="i">$MDLCharge</span><span class="s">)</span><span class="sc">;</span> | 
|  | 778  765 | 
|  | 779  766   <span class="j">CHARGE:</span> <span class="s">{</span> | 
|  | 780  767     <span class="k">if</span> <span class="s">(</span><span class="i">$InternalCharge</span> == <span class="n">3</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLCharge</span> = <span class="n">1</span><span class="sc">;</span> <span class="k">last</span> <span class="j">CHARGE</span><span class="sc">;</span><span class="s">}</span> | 
|  | 781  768     <span class="k">if</span> <span class="s">(</span><span class="i">$InternalCharge</span> == <span class="n">2</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLCharge</span> = <span class="n">2</span><span class="sc">;</span> <span class="k">last</span> <span class="j">CHARGE</span><span class="sc">;</span><span class="s">}</span> | 
|  | 782  769     <span class="k">if</span> <span class="s">(</span><span class="i">$InternalCharge</span> == <span class="n">1</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLCharge</span> = <span class="n">3</span><span class="sc">;</span> <span class="k">last</span> <span class="j">CHARGE</span><span class="sc">;</span><span class="s">}</span> | 
|  | 783  770     <span class="k">if</span> <span class="s">(</span><span class="i">$InternalCharge</span> == <span class="n">-1</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLCharge</span> = <span class="n">5</span><span class="sc">;</span> <span class="k">last</span> <span class="j">CHARGE</span><span class="sc">;</span><span class="s">}</span> | 
|  | 784  771     <span class="k">if</span> <span class="s">(</span><span class="i">$InternalCharge</span> == <span class="n">-2</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLCharge</span> = <span class="n">6</span><span class="sc">;</span> <span class="k">last</span> <span class="j">CHARGE</span><span class="sc">;</span><span class="s">}</span> | 
|  | 785  772     <span class="k">if</span> <span class="s">(</span><span class="i">$InternalCharge</span> == <span class="n">-3</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLCharge</span> = <span class="n">7</span><span class="sc">;</span> <span class="k">last</span> <span class="j">CHARGE</span><span class="sc">;</span><span class="s">}</span> | 
|  | 786  773     <span class="c"># All other MDL charge values, including 4 corresponding to "doublet radical",</span> | 
|  | 787  774     <span class="c"># are assigned internal value of 0.</span> | 
|  | 788  775     <span class="i">$MDLCharge</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 789  776   <span class="s">}</span> | 
|  | 790  777   <span class="k">return</span> <span class="i">$MDLCharge</span><span class="sc">;</span> | 
|  | 791  778 <span class="s">}</span> | 
|  | 792  779 | 
|  | 793  780 <span class="c"># Ctab lines: Bond block</span> | 
|  | 794  781 <span class="c">#</span> | 
|  | 795  782 <span class="c"># Format: 111222tttsssxxxrrrccc</span> | 
|  | 796  783 <span class="c">#</span> | 
|  | 797  784 <span class="c"># 111: First atom number.</span> | 
|  | 798  785 <span class="c"># 222: Second atom number.</span> | 
|  | 799  786 <span class="c"># ttt: Bond type. 1 = Single, 2 = Double, 3 = Triple, 4 = Aromatic, 5 = Single or Double,</span> | 
|  | 800  787 <span class="c">#      6 = Single or Aromatic, 7 = Double or Aromatic, 8 = Any</span> | 
|  | 801  788 <span class="c"># sss: Bond stereo. Single bonds: 0 = not stereo, 1 = Up, 4 = Either, 6 = Down,</span> | 
|  | 802  789 <span class="c">#      Double bonds: 0 = Use x-, y-, z-coords from atom block to determine cis or trans,</span> | 
|  | 803  790 <span class="c">#      3 = Cis or trans (either) double bond</span> | 
|  | 804  791 <span class="c"># xxx: Not used</span> | 
|  | 805  792 <span class="c"># rrr: Bond topology. 0 = Either, 1 = Ring, 2 = Chain</span> | 
|  | 806  793 <span class="c"># ccc: Reacting center status. 0 = unmarked, 1 = a center, -1 = not a center,</span> | 
|  | 807  794 <span class="c">#      Additional: 2 = no change,4 = bond made/broken, 8 = bond order changes 12 = 4+8</span> | 
|  | 808  795 <span class="c">#      (both made/broken and changes); 5 = (4 + 1), 9 = (8 + 1), and 13 = (12 + 1) are also possible</span> | 
|  | 809  796 <span class="c">#</span> | 
|  | 810 <a name="ParseCmpdBondLine-"></a> 797 <span class="k">sub </span><span class="m">ParseCmpdBondLine</span> <span class="s">{</span> | 
|  | 811  798   <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 812  799   <span class="k">my</span><span class="s">(</span><span class="i">$FirstAtomNum</span><span class="cm">,</span> <span class="i">$SecondAtomNum</span><span class="cm">,</span> <span class="i">$BondType</span><span class="cm">,</span> <span class="i">$BondStereo</span><span class="s">)</span><span class="sc">;</span> | 
|  | 813  800 | 
|  | 814  801   <span class="s">(</span><span class="i">$FirstAtomNum</span><span class="cm">,</span> <span class="i">$SecondAtomNum</span><span class="cm">,</span> <span class="i">$BondType</span><span class="cm">,</span> <span class="i">$BondStereo</span><span class="s">)</span> = <span class="k">map</span> <span class="s">{</span><span class="q">s/ //g</span><span class="sc">;</span> <span class="i">$_</span><span class="s">}</span> <span class="k">unpack</span><span class="s">(</span><span class="q">"A3A3A3A3"</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> | 
|  | 815  802   <span class="k">return</span> <span class="s">(</span><span class="i">$FirstAtomNum</span><span class="cm">,</span> <span class="i">$SecondAtomNum</span><span class="cm">,</span> <span class="i">$BondType</span><span class="cm">,</span> <span class="i">$BondStereo</span><span class="s">)</span><span class="sc">;</span> | 
|  | 816  803 <span class="s">}</span> | 
|  | 817  804 | 
|  | 818  805 <span class="c"># Map MDL bond type value used in SD and MOL files to internal bond order  and bond types</span> | 
|  | 819  806 <span class="c"># values used by MayaChemTools...</span> | 
|  | 820  807 <span class="c">#</span> | 
|  | 821 <a name="MDLBondTypeToInternalBondOrder-"></a> 808 <span class="k">sub </span><span class="m">MDLBondTypeToInternalBondOrder</span> <span class="s">{</span> | 
|  | 822  809   <span class="k">my</span><span class="s">(</span><span class="i">$MDLBondType</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 823  810   <span class="k">my</span><span class="s">(</span><span class="i">$InternalBondOrder</span><span class="cm">,</span> <span class="i">$InternalBondType</span><span class="s">)</span><span class="sc">;</span> | 
|  | 824  811 | 
|  | 825  812   <span class="i">$InternalBondType</span> = <span class="q">''</span><span class="sc">;</span> | 
|  | 826  813 | 
|  | 827  814   <span class="j">BONDTYPE:</span> <span class="s">{</span> | 
|  | 828  815     <span class="k">if</span> <span class="s">(</span><span class="i">$MDLBondType</span> == <span class="n">1</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalBondOrder</span> = <span class="n">1</span><span class="sc">;</span> <span class="i">$InternalBondType</span> = <span class="q">'Single'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDTYPE</span><span class="sc">;</span><span class="s">}</span> | 
|  | 829  816     <span class="k">if</span> <span class="s">(</span><span class="i">$MDLBondType</span> == <span class="n">2</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalBondOrder</span> = <span class="n">2</span><span class="sc">;</span> <span class="i">$InternalBondType</span> = <span class="q">'Double'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDTYPE</span><span class="sc">;</span><span class="s">}</span> | 
|  | 830  817     <span class="k">if</span> <span class="s">(</span><span class="i">$MDLBondType</span> == <span class="n">3</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalBondOrder</span> = <span class="n">3</span><span class="sc">;</span> <span class="i">$InternalBondType</span> = <span class="q">'Triple'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDTYPE</span><span class="sc">;</span><span class="s">}</span> | 
|  | 831  818     <span class="k">if</span> <span class="s">(</span><span class="i">$MDLBondType</span> == <span class="n">4</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalBondOrder</span> = <span class="n">1.5</span><span class="sc">;</span> <span class="i">$InternalBondType</span> = <span class="q">'Aromatic'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDTYPE</span><span class="sc">;</span><span class="s">}</span> <span class="c"># Aromatic</span> | 
|  | 832  819     <span class="k">if</span> <span class="s">(</span><span class="i">$MDLBondType</span> == <span class="n">5</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalBondOrder</span> = <span class="n">1</span><span class="sc">;</span> <span class="i">$InternalBondType</span> = <span class="q">'SingleOrDouble'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDTYPE</span><span class="sc">;</span><span class="s">}</span> <span class="c"># Aromatic</span> | 
|  | 833  820     <span class="k">if</span> <span class="s">(</span><span class="i">$MDLBondType</span> == <span class="n">6</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalBondOrder</span> = <span class="n">1</span><span class="sc">;</span> <span class="i">$InternalBondType</span> = <span class="q">'SingleOrAromatic'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDTYPE</span><span class="sc">;</span><span class="s">}</span> <span class="c"># Aromatic</span> | 
|  | 834  821     <span class="k">if</span> <span class="s">(</span><span class="i">$MDLBondType</span> == <span class="n">7</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalBondOrder</span> = <span class="n">2</span><span class="sc">;</span> <span class="i">$InternalBondType</span> = <span class="q">'DoubleOrAromatic'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDTYPE</span><span class="sc">;</span><span class="s">}</span> <span class="c"># Aromatic</span> | 
|  | 835  822     <span class="k">if</span> <span class="s">(</span><span class="i">$MDLBondType</span> == <span class="n">8</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalBondOrder</span> = <span class="n">1</span><span class="sc">;</span> <span class="i">$InternalBondType</span> = <span class="q">'Any'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDTYPE</span><span class="sc">;</span><span class="s">}</span> <span class="c"># Aromatic</span> | 
|  | 836  823     <span class="c">#</span> | 
|  | 837  824     <span class="c"># Although MDL aromatic bond values are used for query only and explicit Kekule bond order</span> | 
|  | 838  825     <span class="c"># values must be assigned, internal value of 1.5 is allowed to indicate aromatic bond orders.</span> | 
|  | 839  826     <span class="c">#</span> | 
|  | 840  827     <span class="c"># All other MDL bond type values -  5 = Single or Double, 6 = Single or Aromatic, 7 = Double or Aromatic,</span> | 
|  | 841  828     <span class="c"># 8 = Any - are also assigned appropriate internal value of 1: These are meant to be used for</span> | 
|  | 842  829     <span class="c"># structure queries by MDL products.</span> | 
|  | 843  830     <span class="c">#</span> | 
|  | 844  831     <span class="i">$InternalBondOrder</span> = <span class="n">1</span><span class="sc">;</span> | 
|  | 845  832     <span class="i">$InternalBondType</span> = <span class="q">'Single'</span><span class="sc">;</span> | 
|  | 846  833 | 
|  | 847  834     <span class="w">carp</span> <span class="q">"Warning: MDLBondTypeToInternalBondOrder: MDL bond type value, $MDLBondType, is not supported: An internal bond order value, 0, has been assigned..."</span><span class="sc">;</span> | 
|  | 848  835   <span class="s">}</span> | 
|  | 849  836   <span class="k">return</span> <span class="s">(</span><span class="i">$InternalBondOrder</span><span class="cm">,</span> <span class="i">$InternalBondType</span><span class="s">)</span><span class="sc">;</span> | 
|  | 850  837 <span class="s">}</span> | 
|  | 851  838 | 
|  | 852  839 <span class="c"># Map internal bond order  and bond type values used by MayaChemTools to MDL bond type value used</span> | 
|  | 853  840 <span class="c"># in SD and MOL files...</span> | 
|  | 854  841 <span class="c">#</span> | 
|  | 855 <a name="InternalBondOrderToMDLBondType-"></a> 842 <span class="k">sub </span><span class="m">InternalBondOrderToMDLBondType</span> <span class="s">{</span> | 
|  | 856  843   <span class="k">my</span><span class="s">(</span><span class="i">$InternalBondOrder</span><span class="cm">,</span> <span class="i">$InternalBondType</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 857  844   <span class="k">my</span><span class="s">(</span><span class="i">$MDLBondType</span><span class="s">)</span><span class="sc">;</span> | 
|  | 858  845 | 
|  | 859  846   <span class="j">BONDTYPE:</span> <span class="s">{</span> | 
|  | 860  847     <span class="k">if</span> <span class="s">(</span><span class="i">$InternalBondOrder</span> == <span class="n">1</span><span class="s">)</span> <span class="s">{</span> | 
|  | 861  848       <span class="k">if</span> <span class="s">(</span><span class="i">$InternalBondType</span> =~ <span class="q">/^SingleOrDouble$/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 862  849         <span class="i">$MDLBondType</span> = <span class="n">5</span><span class="sc">;</span> | 
|  | 863  850       <span class="s">}</span> | 
|  | 864  851       <span class="k">elsif</span> <span class="s">(</span><span class="i">$InternalBondType</span> =~ <span class="q">/^SingleOrAromatic$/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 865  852         <span class="i">$MDLBondType</span> = <span class="n">6</span><span class="sc">;</span> | 
|  | 866  853       <span class="s">}</span> | 
|  | 867  854       <span class="k">elsif</span> <span class="s">(</span><span class="i">$InternalBondType</span> =~ <span class="q">/^Any$/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 868  855         <span class="i">$MDLBondType</span> = <span class="n">8</span><span class="sc">;</span> | 
|  | 869  856       <span class="s">}</span> | 
|  | 870  857       <span class="k">else</span> <span class="s">{</span> | 
|  | 871  858         <span class="i">$MDLBondType</span> = <span class="n">1</span><span class="sc">;</span> | 
|  | 872  859       <span class="s">}</span> | 
|  | 873  860       <span class="i">$MDLBondType</span> = <span class="n">1</span><span class="sc">;</span> | 
|  | 874  861       <span class="k">last</span> <span class="j">BONDTYPE</span><span class="sc">;</span> | 
|  | 875  862     <span class="s">}</span> | 
|  | 876  863     <span class="k">if</span> <span class="s">(</span><span class="i">$InternalBondOrder</span> == <span class="n">2</span><span class="s">)</span> <span class="s">{</span> | 
|  | 877  864       <span class="k">if</span> <span class="s">(</span><span class="i">$InternalBondType</span> =~ <span class="q">/^DoubleOrAromatic$/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 878  865         <span class="i">$MDLBondType</span> = <span class="n">7</span><span class="sc">;</span> | 
|  | 879  866       <span class="s">}</span> | 
|  | 880  867       <span class="k">else</span> <span class="s">{</span> | 
|  | 881  868         <span class="i">$MDLBondType</span> = <span class="n">2</span><span class="sc">;</span> | 
|  | 882  869       <span class="s">}</span> | 
|  | 883  870       <span class="k">last</span> <span class="j">BONDTYPE</span><span class="sc">;</span> | 
|  | 884  871     <span class="s">}</span> | 
|  | 885  872     <span class="k">if</span> <span class="s">(</span><span class="i">$InternalBondOrder</span> == <span class="n">3</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLBondType</span> = <span class="n">3</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDTYPE</span><span class="sc">;</span><span class="s">}</span> | 
|  | 886  873     <span class="k">if</span> <span class="s">(</span><span class="i">$InternalBondOrder</span> == <span class="n">1.5</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLBondType</span> = <span class="n">4</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDTYPE</span><span class="sc">;</span><span class="s">}</span> | 
|  | 887  874     <span class="k">if</span> <span class="s">(</span><span class="i">$InternalBondType</span> =~ <span class="q">/^Any$/i</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLBondType</span> = <span class="n">8</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDTYPE</span><span class="sc">;</span><span class="s">}</span> | 
|  | 888  875 | 
|  | 889  876     <span class="i">$MDLBondType</span> = <span class="n">1</span><span class="sc">;</span> | 
|  | 890  877 | 
|  | 891  878     <span class="w">carp</span> <span class="q">"Warning: InternalBondOrderToMDLBondType: Internal bond order and type values, $InternalBondOrder and $InternalBondType, don't match any valid MDL bond type: MDL bond type value, 1, has been assigned..."</span><span class="sc">;</span> | 
|  | 892  879   <span class="s">}</span> | 
|  | 893  880   <span class="k">return</span> <span class="i">$MDLBondType</span><span class="sc">;</span> | 
|  | 894  881 <span class="s">}</span> | 
|  | 895  882 | 
|  | 896  883 <span class="c"># Third line: Comments - A blank line is also allowed.</span> | 
|  | 897 <a name="ParseCmpdCommentsLine-"></a> 884 <span class="k">sub </span><span class="m">ParseCmpdCommentsLine</span> <span class="s">{</span> | 
|  | 898  885   <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 899  886   <span class="k">my</span><span class="s">(</span><span class="i">$Comments</span><span class="s">)</span><span class="sc">;</span> | 
|  | 900  887 | 
|  | 901  888   <span class="i">$Comments</span> = <span class="k">unpack</span><span class="s">(</span><span class="q">"A80"</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> | 
|  | 902  889 | 
|  | 903  890   <span class="k">return</span> <span class="s">(</span><span class="i">$Comments</span><span class="s">)</span><span class="sc">;</span> | 
|  | 904  891 <span class="s">}</span> | 
|  | 905  892 | 
|  | 906  893 <span class="c"># Map MDL bond stereo value used in SD and MOL files to internal bond stereochemistry values used by MayaChemTools...</span> | 
|  | 907  894 <span class="c">#</span> | 
|  | 908 <a name="MDLBondStereoToInternalBondStereochemistry-"></a> 895 <span class="k">sub </span><span class="m">MDLBondStereoToInternalBondStereochemistry</span> <span class="s">{</span> | 
|  | 909  896   <span class="k">my</span><span class="s">(</span><span class="i">$MDLBondStereo</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 910  897   <span class="k">my</span><span class="s">(</span><span class="i">$InternalBondStereo</span><span class="s">)</span><span class="sc">;</span> | 
|  | 911  898 | 
|  | 912  899   <span class="i">$InternalBondStereo</span> = <span class="q">''</span><span class="sc">;</span> | 
|  | 913  900 | 
|  | 914  901   <span class="j">BONDSTEREO:</span> <span class="s">{</span> | 
|  | 915  902     <span class="k">if</span> <span class="s">(</span><span class="i">$MDLBondStereo</span> == <span class="n">1</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalBondStereo</span> = <span class="q">'Up'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDSTEREO</span><span class="sc">;</span><span class="s">}</span> | 
|  | 916  903     <span class="k">if</span> <span class="s">(</span><span class="i">$MDLBondStereo</span> == <span class="n">4</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalBondStereo</span> = <span class="q">'UpOrDown'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDSTEREO</span><span class="sc">;</span><span class="s">}</span> | 
|  | 917  904     <span class="k">if</span> <span class="s">(</span><span class="i">$MDLBondStereo</span> == <span class="n">6</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalBondStereo</span> = <span class="q">'Down'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDSTEREO</span><span class="sc">;</span><span class="s">}</span> | 
|  | 918  905     <span class="k">if</span> <span class="s">(</span><span class="i">$MDLBondStereo</span> == <span class="n">3</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalBondStereo</span> = <span class="q">'CisOrTrans'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDSTEREO</span><span class="sc">;</span><span class="s">}</span> | 
|  | 919  906     <span class="k">if</span> <span class="s">(</span><span class="i">$MDLBondStereo</span> == <span class="n">0</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalBondStereo</span> = <span class="q">'None'</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDSTEREO</span><span class="sc">;</span><span class="s">}</span> | 
|  | 920  907 | 
|  | 921  908     <span class="i">$InternalBondStereo</span> = <span class="q">''</span><span class="sc">;</span> | 
|  | 922  909     <span class="w">carp</span> <span class="q">"Warning: MDLBondStereoToInternalBondType: MDL bond stereo value, $MDLBondStereo, is not supported: It has been ignored and bond order would be used to determine bond type..."</span><span class="sc">;</span> | 
|  | 923  910   <span class="s">}</span> | 
|  | 924  911   <span class="k">return</span> <span class="i">$InternalBondStereo</span><span class="sc">;</span> | 
|  | 925  912 <span class="s">}</span> | 
|  | 926  913 | 
|  | 927  914 <span class="c"># Map internal bond stereochemistry values used by MayaChemTools to MDL bond stereo value used in SD and MOL files...</span> | 
|  | 928  915 <span class="c">#</span> | 
|  | 929 <a name="InternalBondStereochemistryToMDLBondStereo-"></a> 916 <span class="k">sub </span><span class="m">InternalBondStereochemistryToMDLBondStereo</span> <span class="s">{</span> | 
|  | 930  917   <span class="k">my</span><span class="s">(</span><span class="i">$InternalBondStereo</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 931  918   <span class="k">my</span><span class="s">(</span><span class="i">$MDLBondStereo</span><span class="s">)</span><span class="sc">;</span> | 
|  | 932  919 | 
|  | 933  920   <span class="i">$MDLBondStereo</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 934  921 | 
|  | 935  922   <span class="j">BONDSTEREO:</span> <span class="s">{</span> | 
|  | 936  923     <span class="k">if</span> <span class="s">(</span><span class="i">$InternalBondStereo</span> =~ <span class="q">/^Up$/i</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLBondStereo</span> = <span class="n">1</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDSTEREO</span><span class="sc">;</span><span class="s">}</span> | 
|  | 937  924     <span class="k">if</span> <span class="s">(</span><span class="i">$InternalBondStereo</span> =~ <span class="q">/^UpOrDown$/i</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLBondStereo</span> = <span class="n">4</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDSTEREO</span><span class="sc">;</span><span class="s">}</span> | 
|  | 938  925     <span class="k">if</span> <span class="s">(</span><span class="i">$InternalBondStereo</span> =~ <span class="q">/^Down$/</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLBondStereo</span> = <span class="n">6</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDSTEREO</span><span class="sc">;</span><span class="s">}</span> | 
|  | 939  926     <span class="k">if</span> <span class="s">(</span><span class="i">$InternalBondStereo</span> =~ <span class="q">/^CisOrTrans$/</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLBondStereo</span> = <span class="n">3</span><span class="sc">;</span> <span class="k">last</span> <span class="j">BONDSTEREO</span><span class="sc">;</span><span class="s">}</span> | 
|  | 940  927 | 
|  | 941  928     <span class="i">$MDLBondStereo</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 942  929   <span class="s">}</span> | 
|  | 943  930   <span class="k">return</span> <span class="i">$MDLBondStereo</span><span class="sc">;</span> | 
|  | 944  931 <span class="s">}</span> | 
|  | 945  932 | 
|  | 946  933 <span class="c"># Fourth line: Counts</span> | 
|  | 947  934 <span class="c">#</span> | 
|  | 948  935 <span class="c"># Format: aaabbblllfffcccsssxxxrrrpppiiimmmvvvvvv</span> | 
|  | 949  936 <span class="c">#</span> | 
|  | 950  937 <span class="c"># aaa: number of atoms; bbb: number of bonds; lll: number of atom lists; fff: (obsolete)</span> | 
|  | 951  938 <span class="c"># ccc: chiral flag: 0=not chiral, 1=chiral; sss: number of stext entries; xxx,rrr,ppp,iii:</span> | 
|  | 952  939 <span class="c"># (obsolete); mmm: number of lines of additional properties, including the M END line, No</span> | 
|  | 953  940 <span class="c"># longer supported, default is set to 999; vvvvvv: version</span> | 
|  | 954  941 | 
|  | 955 <a name="ParseCmpdCountsLine-"></a> 942 <span class="k">sub </span><span class="m">ParseCmpdCountsLine</span> <span class="s">{</span> | 
|  | 956  943   <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 957  944   <span class="k">my</span><span class="s">(</span><span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="cm">,</span> <span class="i">$ChiralFlag</span><span class="cm">,</span> <span class="i">$PropertyCount</span><span class="cm">,</span> <span class="i">$Version</span><span class="s">)</span><span class="sc">;</span> | 
|  | 958  945 | 
|  | 959  946   <span class="k">if</span> <span class="s">(</span><span class="k">length</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span> >= <span class="n">39</span><span class="s">)</span> <span class="s">{</span> | 
|  | 960  947     <span class="s">(</span><span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="cm">,</span> <span class="i">$ChiralFlag</span><span class="cm">,</span> <span class="i">$PropertyCount</span><span class="cm">,</span> <span class="i">$Version</span><span class="s">)</span> = <span class="k">unpack</span><span class="s">(</span><span class="q">"A3A3x3x3A3x3x3x3x3x3A3A6"</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> | 
|  | 961  948   <span class="s">}</span> | 
|  | 962  949   <span class="k">elsif</span> <span class="s">(</span><span class="k">length</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span> >= <span class="n">15</span><span class="s">)</span> <span class="s">{</span> | 
|  | 963  950     <span class="s">(</span><span class="i">$PropertyCount</span><span class="cm">,</span> <span class="i">$Version</span><span class="s">)</span> = <span class="s">(</span><span class="q">"999"</span><span class="cm">,</span> <span class="q">"v2000"</span><span class="s">)</span><span class="sc">;</span> | 
|  | 964  951     <span class="s">(</span><span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="cm">,</span> <span class="i">$ChiralFlag</span><span class="s">)</span> = <span class="k">unpack</span><span class="s">(</span><span class="q">"A3A3x3x3A3"</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> | 
|  | 965  952   <span class="s">}</span> | 
|  | 966  953   <span class="k">else</span> <span class="s">{</span> | 
|  | 967  954     <span class="s">(</span><span class="i">$ChiralFlag</span><span class="cm">,</span> <span class="i">$PropertyCount</span><span class="cm">,</span> <span class="i">$Version</span><span class="s">)</span> = <span class="s">(</span><span class="q">"0"</span><span class="cm">,</span> <span class="q">"999"</span><span class="cm">,</span> <span class="q">"v2000"</span><span class="s">)</span><span class="sc">;</span> | 
|  | 968  955     <span class="s">(</span><span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="s">)</span> = <span class="k">unpack</span><span class="s">(</span><span class="q">"A3A3"</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> | 
|  | 969  956   <span class="s">}</span> | 
|  | 970  957 | 
|  | 971  958   <span class="k">if</span> <span class="s">(</span><span class="i">$Version</span> =~ <span class="q">/V3000/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 972  959     <span class="c"># Current version of MayaChemTools modules and classes for processing MDL MOL and SD don't support</span> | 
|  | 973  960     <span class="c"># V3000. So instead of relying on callers, just exit with an error to disable any processing of V3000</span> | 
|  | 974  961     <span class="c"># format.</span> | 
|  | 975  962     <span class="w">croak</span> <span class="q">"Error: SDFileUtil::ParseCmpdCountsLine: The Extended Connection Table (V3000) format in MDL MOL and SD files is not supported by the current release of MayaChemTools..."</span><span class="sc">;</span> | 
|  | 976  963   <span class="s">}</span> | 
|  | 977  964 | 
|  | 978  965   <span class="k">return</span> <span class="s">(</span><span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="cm">,</span> <span class="i">$ChiralFlag</span><span class="cm">,</span> <span class="i">$PropertyCount</span><span class="cm">,</span> <span class="i">$Version</span><span class="s">)</span><span class="sc">;</span> | 
|  | 979  966 <span class="s">}</span> | 
|  | 980  967 | 
|  | 981  968 <span class="c"># Second line: Misc info</span> | 
|  | 982  969 <span class="c">#</span> | 
|  | 983  970 <span class="c"># Format: IIPPPPPPPPMMDDYYHHmmddSSssssssssssEEEEEEEEEEEERRRRRR</span> | 
|  | 984  971 <span class="c">#         A2A8      A10       A2I2A10       A12         A6</span> | 
|  | 985  972 <span class="c"># User's first and last initials (I), program name (P), date/time (M/D/Y,H:m),</span> | 
|  | 986  973 <span class="c"># dimensional codes - 2D or 3D (d),scaling factors (S, s), energy (E) if modeling program input,</span> | 
|  | 987  974 <span class="c"># internal registry number (R) if input through MDL form. A blank line is also allowed.</span> | 
|  | 988 <a name="ParseCmpdMiscInfoLine-"></a> 975 <span class="k">sub </span><span class="m">ParseCmpdMiscInfoLine</span> <span class="s">{</span> | 
|  | 989  976   <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 990  977   <span class="k">my</span><span class="s">(</span><span class="i">$UserInitial</span><span class="cm">,</span> <span class="i">$ProgramName</span><span class="cm">,</span> <span class="i">$Date</span><span class="cm">,</span> <span class="i">$Code</span><span class="cm">,</span> <span class="i">$ScalingFactor1</span><span class="cm">,</span> <span class="i">$ScalingFactor2</span><span class="cm">,</span> <span class="i">$Energy</span><span class="cm">,</span> <span class="i">$RegistryNum</span><span class="s">)</span><span class="sc">;</span> | 
|  | 991  978 | 
|  | 992  979   <span class="s">(</span><span class="i">$UserInitial</span><span class="cm">,</span> <span class="i">$ProgramName</span><span class="cm">,</span> <span class="i">$Date</span><span class="cm">,</span> <span class="i">$Code</span><span class="cm">,</span> <span class="i">$ScalingFactor1</span><span class="cm">,</span> <span class="i">$ScalingFactor2</span><span class="cm">,</span> <span class="i">$Energy</span><span class="cm">,</span> <span class="i">$RegistryNum</span><span class="s">)</span> = <span class="k">unpack</span><span class="s">(</span><span class="q">"A2A8A10A2A2A10A12A6"</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> | 
|  | 993  980   <span class="k">return</span> <span class="s">(</span><span class="i">$UserInitial</span><span class="cm">,</span> <span class="i">$ProgramName</span><span class="cm">,</span> <span class="i">$Date</span><span class="cm">,</span> <span class="i">$Code</span><span class="cm">,</span> <span class="i">$ScalingFactor1</span><span class="cm">,</span> <span class="i">$ScalingFactor2</span><span class="cm">,</span> <span class="i">$Energy</span><span class="cm">,</span> <span class="i">$RegistryNum</span><span class="s">)</span><span class="sc">;</span> | 
|  | 994  981 <span class="s">}</span> | 
|  | 995  982 | 
|  | 996  983 <span class="c"># First line: Molecule name. This line is unformatted, but like all other lines in a</span> | 
|  | 997  984 <span class="c"># molfile may not extend beyond column 80. A blank line is also allowed.</span> | 
|  | 998 <a name="ParseCmpdMolNameLine-"></a> 985 <span class="k">sub </span><span class="m">ParseCmpdMolNameLine</span> <span class="s">{</span> | 
|  | 999  986   <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 1000  987   <span class="k">my</span><span class="s">(</span><span class="i">$MolName</span><span class="s">)</span><span class="sc">;</span> | 
|  | 1001  988 | 
|  | 1002  989   <span class="i">$MolName</span> = <span class="k">unpack</span><span class="s">(</span><span class="q">"A80"</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> | 
|  | 1003  990 | 
|  | 1004  991   <span class="k">return</span> <span class="s">(</span><span class="i">$MolName</span><span class="s">)</span><span class="sc">;</span> | 
|  | 1005  992 <span class="s">}</span> | 
|  | 1006  993 | 
|  | 1007  994 <span class="c"># Parse atom alias property line in CTAB generic properties block.</span> | 
|  | 1008  995 <span class="c">#</span> | 
|  | 1009  996 <span class="c"># Atom alias property line format:</span> | 
|  | 1010  997 <span class="c">#</span> | 
|  | 1011  998 <span class="c"># A  aaa</span> | 
|  | 1012  999 <span class="c"># x...</span> | 
|  | 1013 1000 <span class="c">#</span> | 
|  | 1014 1001 <span class="c">#    aaa: Atom number</span> | 
|  | 1015 1002 <span class="c">#    x: Atom alias in next line</span> | 
|  | 1016 1003 <span class="c">#</span> | 
|  | 1017 <a name="ParseCmpdAtomAliasPropertyLine-"></a>1004 <span class="k">sub </span><span class="m">ParseCmpdAtomAliasPropertyLine</span> <span class="s">{</span> | 
|  | 1018 1005   <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="cm">,</span> <span class="i">$NextLine</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 1019 1006   <span class="k">my</span><span class="s">(</span><span class="i">$Label</span><span class="cm">,</span> <span class="i">$AtomNumber</span><span class="cm">,</span> <span class="i">$AtomAlias</span><span class="s">)</span><span class="sc">;</span> | 
|  | 1020 1007 | 
|  | 1021 1008   <span class="s">(</span><span class="i">$Label</span><span class="cm">,</span> <span class="i">$AtomNumber</span><span class="s">)</span> = <span class="k">split</span><span class="s">(</span><span class="q">' '</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> | 
|  | 1022 1009   <span class="i">$AtomAlias</span> = <span class="i">$NextLine</span><span class="sc">;</span> | 
|  | 1023 1010 | 
|  | 1024 1011   <span class="k">if</span> <span class="s">(</span>!<span class="i">$AtomAlias</span><span class="s">)</span> <span class="s">{</span> | 
|  | 1025 1012     <span class="w">carp</span> <span class="q">"Warning: _ParseCmpdAtomAliasPropertyLine: No atom alias value specified on the line following atom alias property line..."</span><span class="sc">;</span> | 
|  | 1026 1013   <span class="s">}</span> | 
|  | 1027 1014 | 
|  | 1028 1015   <span class="k">return</span> <span class="s">(</span><span class="i">$AtomNumber</span><span class="cm">,</span> <span class="i">$AtomAlias</span><span class="s">)</span><span class="sc">;</span> | 
|  | 1029 1016 <span class="s">}</span> | 
|  | 1030 1017 | 
|  | 1031 1018 <span class="c"># Parse charge property line in CTAB generic properties block.</span> | 
|  | 1032 1019 <span class="c">#</span> | 
|  | 1033 1020 <span class="c"># Charge property line format:</span> | 
|  | 1034 1021 <span class="c">#</span> | 
|  | 1035 1022 <span class="c"># M  CHGnn8 aaa vvv ...</span> | 
|  | 1036 1023 <span class="c">#</span> | 
|  | 1037 1024 <span class="c">#    nn8: Number of value pairs. Maximum of 8 pairs allowed.</span> | 
|  | 1038 1025 <span class="c">#    aaa: Atom number</span> | 
|  | 1039 1026 <span class="c">#    vvv: -15 to +15. Default of 0 = uncharged atom. When present, this property supersedes</span> | 
|  | 1040 1027 <span class="c">#    all charge and radical values in the atom block, forcing a 0 charge on all atoms not</span> | 
|  | 1041 1028 <span class="c">#    listed in an M  CHG or M  RAD line.</span> | 
|  | 1042 1029 <span class="c">#</span> | 
|  | 1043 <a name="ParseCmpdChargePropertyLine-"></a>1030 <span class="k">sub </span><span class="m">ParseCmpdChargePropertyLine</span> <span class="s">{</span> | 
|  | 1044 1031   <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 1045 1032 | 
|  | 1046 1033   <span class="k">return</span> <span class="i">_ParseCmpdGenericPropertyLine</span><span class="s">(</span><span class="q">'Charge'</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> | 
|  | 1047 1034 <span class="s">}</span> | 
|  | 1048 1035 | 
|  | 1049 1036 | 
|  | 1050 1037 <span class="c"># Parse isotope property line in CTAB generic properties block.</span> | 
|  | 1051 1038 <span class="c">#</span> | 
|  | 1052 1039 <span class="c"># Isoptope property line format:</span> | 
|  | 1053 1040 <span class="c">#</span> | 
|  | 1054 1041 <span class="c"># M  ISOnn8 aaa vvv ...</span> | 
|  | 1055 1042 <span class="c">#</span> | 
|  | 1056 1043 <span class="c">#    nn8: Number of value paris. Maximum of 8 pairs allowed.</span> | 
|  | 1057 1044 <span class="c">#    aaa: Atom number</span> | 
|  | 1058 1045 <span class="c">#    vvv: Absolute mass of the atom isotope as a positive integer. When present, this property</span> | 
|  | 1059 1046 <span class="c">#    supersedes all isotope values in the atom block. Default (no entry) means natural</span> | 
|  | 1060 1047 <span class="c">#    abundance. The difference between this absolute mass value and the natural</span> | 
|  | 1061 1048 <span class="c">#    abundance value specified in the PTABLE.DAT file must be within the range of -18</span> | 
|  | 1062 1049 <span class="c">#    to +12</span> | 
|  | 1063 1050 <span class="c">#</span> | 
|  | 1064 1051 <span class="c"># Notes:</span> | 
|  | 1065 1052 <span class="c">#  . Values correspond to mass numbers...</span> | 
|  | 1066 1053 <span class="c">#</span> | 
|  | 1067 <a name="ParseCmpdIsotopePropertyLine-"></a>1054 <span class="k">sub </span><span class="m">ParseCmpdIsotopePropertyLine</span> <span class="s">{</span> | 
|  | 1068 1055   <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 1069 1056 | 
|  | 1070 1057   <span class="k">return</span> <span class="i">_ParseCmpdGenericPropertyLine</span><span class="s">(</span><span class="q">'Isotope'</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> | 
|  | 1071 1058 <span class="s">}</span> | 
|  | 1072 1059 | 
|  | 1073 1060 <span class="c"># Parse radical property line in CTAB generic properties block.</span> | 
|  | 1074 1061 <span class="c">#</span> | 
|  | 1075 1062 <span class="c"># Radical property line format:</span> | 
|  | 1076 1063 <span class="c">#</span> | 
|  | 1077 1064 <span class="c"># M  RADnn8 aaa vvv ...</span> | 
|  | 1078 1065 <span class="c">#</span> | 
|  | 1079 1066 <span class="c">#    nn8: Number of value paris. Maximum of 8 pairs allowed.</span> | 
|  | 1080 1067 <span class="c">#    aaa: Atom number</span> | 
|  | 1081 1068 <span class="c">#    vvv: Default of 0 = no radical, 1 = singlet, 2 = doublet, 3 = triplet . When</span> | 
|  | 1082 1069 <span class="c">#    present, this property supersedes all charge and radical values in the atom block,</span> | 
|  | 1083 1070 <span class="c">#    forcing a 0 (zero) charge and radical on all atoms not listed in an M  CHG or</span> | 
|  | 1084 1071 <span class="c">#    M  RAD line.</span> | 
|  | 1085 1072 <span class="c">#</span> | 
|  | 1086 <a name="ParseCmpdRadicalPropertyLine-"></a>1073 <span class="k">sub </span><span class="m">ParseCmpdRadicalPropertyLine</span> <span class="s">{</span> | 
|  | 1087 1074   <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 1088 1075 | 
|  | 1089 1076   <span class="k">return</span> <span class="i">_ParseCmpdGenericPropertyLine</span><span class="s">(</span><span class="q">'Radical'</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> | 
|  | 1090 1077 <span class="s">}</span> | 
|  | 1091 1078 | 
|  | 1092 1079 <span class="c"># Map MDL radical stereo value used in SD and MOL files to internal spin multiplicity values used by MayaChemTools...</span> | 
|  | 1093 1080 <span class="c">#</span> | 
|  | 1094 <a name="MDLRadicalToInternalSpinMultiplicity-"></a>1081 <span class="k">sub </span><span class="m">MDLRadicalToInternalSpinMultiplicity</span> <span class="s">{</span> | 
|  | 1095 1082   <span class="k">my</span><span class="s">(</span><span class="i">$MDLRadical</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 1096 1083   <span class="k">my</span><span class="s">(</span><span class="i">$InternalSpinMultiplicity</span><span class="s">)</span><span class="sc">;</span> | 
|  | 1097 1084 | 
|  | 1098 1085   <span class="i">$InternalSpinMultiplicity</span> = <span class="q">''</span><span class="sc">;</span> | 
|  | 1099 1086 | 
|  | 1100 1087   <span class="j">SPINMULTIPLICITY:</span> <span class="s">{</span> | 
|  | 1101 1088     <span class="k">if</span> <span class="s">(</span><span class="i">$MDLRadical</span> == <span class="n">0</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalSpinMultiplicity</span> = <span class="n">0</span><span class="sc">;</span> <span class="k">last</span> <span class="j">SPINMULTIPLICITY</span><span class="sc">;</span><span class="s">}</span> | 
|  | 1102 1089     <span class="k">if</span> <span class="s">(</span><span class="i">$MDLRadical</span> == <span class="n">1</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalSpinMultiplicity</span> = <span class="n">1</span><span class="sc">;</span> <span class="k">last</span> <span class="j">SPINMULTIPLICITY</span><span class="sc">;</span><span class="s">}</span> | 
|  | 1103 1090     <span class="k">if</span> <span class="s">(</span><span class="i">$MDLRadical</span> == <span class="n">2</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalSpinMultiplicity</span> = <span class="n">2</span><span class="sc">;</span> <span class="k">last</span> <span class="j">SPINMULTIPLICITY</span><span class="sc">;</span><span class="s">}</span> | 
|  | 1104 1091     <span class="k">if</span> <span class="s">(</span><span class="i">$MDLRadical</span> == <span class="n">3</span><span class="s">)</span> <span class="s">{</span> <span class="i">$InternalSpinMultiplicity</span> = <span class="n">3</span><span class="sc">;</span> <span class="k">last</span> <span class="j">SPINMULTIPLICITY</span><span class="sc">;</span><span class="s">}</span> | 
|  | 1105 1092     <span class="i">$InternalSpinMultiplicity</span> = <span class="q">''</span><span class="sc">;</span> | 
|  | 1106 1093     <span class="w">carp</span> <span class="q">"Warning: MDLRadicalToInternalSpinMultiplicity: MDL radical value, $MDLRadical, specifed on line M  RAD is not supported..."</span><span class="sc">;</span> | 
|  | 1107 1094   <span class="s">}</span> | 
|  | 1108 1095   <span class="k">return</span> <span class="i">$InternalSpinMultiplicity</span><span class="sc">;</span> | 
|  | 1109 1096 <span class="s">}</span> | 
|  | 1110 1097 | 
|  | 1111 1098 <span class="c"># Map internal spin multiplicity values used by MayaChemTools to MDL radical stereo value used in SD and MOL files...</span> | 
|  | 1112 1099 <span class="c">#</span> | 
|  | 1113 <a name="InternalSpinMultiplicityToMDLRadical-"></a>1100 <span class="k">sub </span><span class="m">InternalSpinMultiplicityToMDLRadical</span> <span class="s">{</span> | 
|  | 1114 1101   <span class="k">my</span><span class="s">(</span><span class="i">$InternalSpinMultiplicity</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 1115 1102   <span class="k">my</span><span class="s">(</span><span class="i">$MDLRadical</span><span class="s">)</span><span class="sc">;</span> | 
|  | 1116 1103 | 
|  | 1117 1104   <span class="i">$MDLRadical</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 1118 1105 | 
|  | 1119 1106   <span class="j">SPINMULTIPLICITY:</span> <span class="s">{</span> | 
|  | 1120 1107     <span class="k">if</span> <span class="s">(</span><span class="i">$InternalSpinMultiplicity</span> == <span class="n">1</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLRadical</span> = <span class="n">1</span><span class="sc">;</span> <span class="k">last</span> <span class="j">SPINMULTIPLICITY</span><span class="sc">;</span><span class="s">}</span> | 
|  | 1121 1108     <span class="k">if</span> <span class="s">(</span><span class="i">$InternalSpinMultiplicity</span> == <span class="n">2</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLRadical</span> = <span class="n">2</span><span class="sc">;</span> <span class="k">last</span> <span class="j">SPINMULTIPLICITY</span><span class="sc">;</span><span class="s">}</span> | 
|  | 1122 1109     <span class="k">if</span> <span class="s">(</span><span class="i">$InternalSpinMultiplicity</span> == <span class="n">3</span><span class="s">)</span> <span class="s">{</span> <span class="i">$MDLRadical</span> = <span class="n">3</span><span class="sc">;</span> <span class="k">last</span> <span class="j">SPINMULTIPLICITY</span><span class="sc">;</span><span class="s">}</span> | 
|  | 1123 1110     <span class="i">$MDLRadical</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 1124 1111   <span class="s">}</span> | 
|  | 1125 1112   <span class="k">return</span> <span class="i">$MDLRadical</span><span class="sc">;</span> | 
|  | 1126 1113 <span class="s">}</span> | 
|  | 1127 1114 | 
|  | 1128 1115 <span class="c"># Process generic CTAB property line...</span> | 
|  | 1129 <a name="_ParseCmpdGenericPropertyLine-"></a>1116 <span class="k">sub </span><span class="m">_ParseCmpdGenericPropertyLine</span> <span class="s">{</span> | 
|  | 1130 1117   <span class="k">my</span><span class="s">(</span><span class="i">$PropertyName</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 1131 1118 | 
|  | 1132 1119   <span class="k">my</span><span class="s">(</span><span class="i">$Label</span><span class="cm">,</span> <span class="i">$PropertyLabel</span><span class="cm">,</span> <span class="i">$ValuesCount</span><span class="cm">,</span> <span class="i">$ValuePairsCount</span><span class="cm">,</span> <span class="i">@ValuePairs</span><span class="s">)</span><span class="sc">;</span> | 
|  | 1133 1120 | 
|  | 1134 1121   <span class="i">@ValuePairs</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 1135 1122   <span class="s">(</span><span class="i">$Label</span><span class="cm">,</span> <span class="i">$PropertyLabel</span><span class="cm">,</span> <span class="i">$ValuesCount</span><span class="cm">,</span> <span class="i">@ValuePairs</span><span class="s">)</span> = <span class="k">split</span><span class="s">(</span><span class="q">' '</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> | 
|  | 1136 1123   <span class="i">$ValuePairsCount</span> = <span class="s">(</span><span class="k">scalar</span> <span class="i">@ValuePairs</span><span class="s">)</span>/<span class="n">2</span><span class="sc">;</span> | 
|  | 1137 1124   <span class="k">if</span> <span class="s">(</span><span class="i">$ValuesCount</span> != <span class="i">$ValuePairsCount</span><span class="s">)</span> <span class="s">{</span> | 
|  | 1138 1125     <span class="w">carp</span> <span class="q">"Warning: _ParseCmpdGenericPropertyLine: Number of atom number and $PropertyName value paris specified on $Label $PropertyLabel property line, $ValuePairsCount, does not match expected value of $ValuesCount..."</span><span class="sc">;</span> | 
|  | 1139 1126   <span class="s">}</span> | 
|  | 1140 1127 | 
|  | 1141 1128   <span class="k">return</span> <span class="s">(</span><span class="i">@ValuePairs</span><span class="s">)</span><span class="sc">;</span> | 
|  | 1142 1129 <span class="s">}</span> | 
|  | 1143 1130 | 
|  | 1144 1131 <span class="c"># Generic CTAB property lines for charge, istope and radical properties...</span> | 
|  | 1145 1132 <span class="c">#</span> | 
|  | 1146 <a name="_GenerateCmpdGenericPropertyLines-"></a>1133 <span class="k">sub </span><span class="m">_GenerateCmpdGenericPropertyLines</span> <span class="s">{</span> | 
|  | 1147 1134   <span class="k">my</span><span class="s">(</span><span class="i">$PropertyName</span><span class="cm">,</span> <span class="i">$PropertyValuePairsRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 1148 1135   <span class="k">my</span><span class="s">(</span><span class="i">$Index</span><span class="cm">,</span> <span class="i">$PropertyLabel</span><span class="cm">,</span> <span class="i">$Line</span><span class="cm">,</span> <span class="i">$PropertyCount</span><span class="cm">,</span> <span class="i">$AtomNum</span><span class="cm">,</span> <span class="i">$PropertyValue</span><span class="cm">,</span> <span class="i">@PropertyLines</span><span class="s">)</span><span class="sc">;</span> | 
|  | 1149 1136 | 
|  | 1150 1137   <span class="i">@PropertyLines</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 1151 1138   <span class="j">NAME:</span> <span class="s">{</span> | 
|  | 1152 1139     <span class="k">if</span> <span class="s">(</span><span class="i">$PropertyName</span> =~ <span class="q">/^Charge$/i</span><span class="s">)</span> <span class="s">{</span> <span class="i">$PropertyLabel</span> = <span class="q">"M  CHG"</span><span class="sc">;</span> <span class="k">last</span> <span class="j">NAME</span><span class="sc">;</span> <span class="s">}</span> | 
|  | 1153 1140     <span class="k">if</span> <span class="s">(</span><span class="i">$PropertyName</span> =~ <span class="q">/^Isotope$/i</span><span class="s">)</span> <span class="s">{</span> <span class="i">$PropertyLabel</span> = <span class="q">"M  ISO"</span><span class="sc">;</span> <span class="k">last</span> <span class="j">NAME</span><span class="sc">;</span> <span class="s">}</span> | 
|  | 1154 1141     <span class="k">if</span> <span class="s">(</span><span class="i">$PropertyName</span> =~ <span class="q">/^Radical$/i</span><span class="s">)</span> <span class="s">{</span> <span class="i">$PropertyLabel</span> = <span class="q">"M  RAD"</span><span class="sc">;</span> <span class="k">last</span> <span class="j">NAME</span><span class="sc">;</span> <span class="s">}</span> | 
|  | 1155 1142     <span class="w">carp</span> <span class="q">"Warning: _GenerateCmpdGenericPropertyLines: Unknown property name, $PropertyName, specified..."</span><span class="sc">;</span> | 
|  | 1156 1143     <span class="k">return</span> <span class="i">@PropertyLines</span><span class="sc">;</span> | 
|  | 1157 1144   <span class="s">}</span> | 
|  | 1158 1145 | 
|  | 1159 1146   <span class="c"># A maximum of 8 property pair values allowed per line...</span> | 
|  | 1160 1147   <span class="i">$PropertyCount</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 1161 1148   <span class="i">$Line</span> = <span class="q">''</span><span class="sc">;</span> | 
|  | 1162 1149   <span class="k">for</span> <span class="s">(</span><span class="i">$Index</span> = <span class="n">0</span><span class="sc">;</span> <span class="i">$Index</span> < <span class="i">$#</span>{<span class="i">$PropertyValuePairsRef</span>}<span class="sc">;</span> <span class="i">$Index</span> += <span class="n">2</span><span class="s">)</span> <span class="s">{</span> | 
|  | 1163 1150     <span class="k">if</span> <span class="s">(</span><span class="i">$PropertyCount</span> > <span class="n">8</span><span class="s">)</span> <span class="s">{</span> | 
|  | 1164 1151       <span class="c"># Setup property line...</span> | 
|  | 1165 1152       <span class="i">$Line</span> = <span class="q">"${PropertyLabel}  8${Line}"</span><span class="sc">;</span> | 
|  | 1166 1153       <span class="k">push</span> <span class="i">@PropertyLines</span><span class="cm">,</span> <span class="i">$Line</span><span class="sc">;</span> | 
|  | 1167 1154 | 
|  | 1168 1155       <span class="i">$PropertyCount</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 1169 1156       <span class="i">$Line</span> = <span class="q">''</span><span class="sc">;</span> | 
|  | 1170 1157     <span class="s">}</span> | 
|  | 1171 1158     <span class="i">$PropertyCount</span>++<span class="sc">;</span> | 
|  | 1172 1159     <span class="i">$AtomNum</span> = <span class="i">$PropertyValuePairsRef</span>->[<span class="i">$Index</span>]<span class="sc">;</span> | 
|  | 1173 1160     <span class="i">$PropertyValue</span> = <span class="i">$PropertyValuePairsRef</span>->[<span class="i">$Index</span> + <span class="n">1</span>]<span class="sc">;</span> | 
|  | 1174 1161     <span class="i">$Line</span> .= <span class="k">sprintf</span> <span class="q">" %3i %3i"</span><span class="cm">,</span> <span class="i">$AtomNum</span><span class="cm">,</span> <span class="i">$PropertyValue</span><span class="sc">;</span> | 
|  | 1175 1162   <span class="s">}</span> | 
|  | 1176 1163   <span class="k">if</span> <span class="s">(</span><span class="i">$Line</span><span class="s">)</span> <span class="s">{</span> | 
|  | 1177 1164     <span class="i">$Line</span> = <span class="q">"${PropertyLabel}  ${PropertyCount}${Line}"</span><span class="sc">;</span> | 
|  | 1178 1165     <span class="k">push</span> <span class="i">@PropertyLines</span><span class="cm">,</span> <span class="i">$Line</span><span class="sc">;</span> | 
|  | 1179 1166   <span class="s">}</span> | 
|  | 1180 1167   <span class="k">return</span> <span class="i">@PropertyLines</span><span class="sc">;</span> | 
|  | 1181 1168 <span class="s">}</span> | 
|  | 1182 1169 | 
|  | 1183 1170 <span class="c">#</span> | 
|  | 1184 1171 <span class="c"># Read compound data into a string and return its value</span> | 
|  | 1185 <a name="ReadCmpdString-"></a>1172 <span class="k">sub </span><span class="m">ReadCmpdString</span> <span class="s">{</span> | 
|  | 1186 1173   <span class="k">my</span><span class="s">(</span><span class="i">$SDFileRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 1187 1174   <span class="k">my</span><span class="s">(</span><span class="i">$CmpdString</span><span class="s">)</span><span class="sc">;</span> | 
|  | 1188 1175 | 
|  | 1189 1176   <span class="i">$CmpdString</span> = <span class="q">""</span><span class="sc">;</span> | 
|  | 1190 1177   <span class="j">LINE:</span> <span class="k">while</span> <span class="s">(</span><span class="k">defined</span><span class="s">(</span><span class="i">$_</span> = <span class="q"><$SDFileRef></span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 1191 1178     <span class="c"># Change Windows and Mac new line char to UNIX...</span> | 
|  | 1192 1179     <span class="q">s/(\r\n)|(\r)/\n/g</span><span class="sc">;</span> | 
|  | 1193 1180 | 
|  | 1194 1181     <span class="k">if</span> <span class="s">(</span><span class="q">/^\$\$\$\$/</span><span class="s">)</span> <span class="s">{</span> | 
|  | 1195 1182       <span class="c"># Take out any new line char at the end by explicitly removing it instead of using</span> | 
|  | 1196 1183       <span class="c"># chomp, which might not always work correctly on files generated on a system</span> | 
|  | 1197 1184       <span class="c"># with a value of input line separator different from the current system...</span> | 
|  | 1198 1185       <span class="q">s/\n$//g</span><span class="sc">;</span> | 
|  | 1199 1186 | 
|  | 1200 1187       <span class="c"># Doesn't hurt to chomp...</span> | 
|  | 1201 1188       <span class="k">chomp</span><span class="sc">;</span> | 
|  | 1202 1189 | 
|  | 1203 1190       <span class="i">$CmpdString</span> .=  <span class="i">$_</span><span class="sc">;</span> | 
|  | 1204 1191       <span class="k">last</span> <span class="j">LINE</span><span class="sc">;</span> | 
|  | 1205 1192     <span class="s">}</span> | 
|  | 1206 1193     <span class="k">else</span> <span class="s">{</span> | 
|  | 1207 1194       <span class="i">$CmpdString</span> .=  <span class="i">$_</span><span class="sc">;</span> | 
|  | 1208 1195     <span class="s">}</span> | 
|  | 1209 1196   <span class="s">}</span> | 
|  | 1210 1197   <span class="k">return</span> <span class="i">$CmpdString</span><span class="sc">;</span> | 
|  | 1211 1198 <span class="s">}</span> | 
|  | 1212 1199 | 
|  | 1213 1200 <span class="c"># Find out the number of fragements in the compounds. And for the compound with</span> | 
|  | 1214 1201 <span class="c"># more than one fragment, remove all the others besides the largest one.</span> | 
|  | 1215 <a name="WashCmpd-"></a>1202 <span class="k">sub </span><span class="m">WashCmpd</span> <span class="s">{</span> | 
|  | 1216 1203   <span class="k">my</span><span class="s">(</span><span class="i">$CmpdLines</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 1217 1204   <span class="k">my</span><span class="s">(</span><span class="i">$WashedCmpdString</span><span class="cm">,</span> <span class="i">$FragmentCount</span><span class="cm">,</span> <span class="i">$Fragments</span><span class="s">)</span><span class="sc">;</span> | 
|  | 1218 1205 | 
|  | 1219 1206   <span class="i">$WashedCmpdString</span> = <span class="q">""</span><span class="sc">;</span> | 
|  | 1220 1207   <span class="s">(</span><span class="i">$FragmentCount</span><span class="cm">,</span> <span class="i">$Fragments</span><span class="s">)</span> = <span class="i">GetCmpdFragments</span><span class="s">(</span><span class="i">$CmpdLines</span><span class="s">)</span><span class="sc">;</span> | 
|  | 1221 1208   <span class="k">if</span> <span class="s">(</span><span class="i">$FragmentCount</span> > <span class="n">1</span><span class="s">)</span> <span class="s">{</span> | 
|  | 1222 1209     <span class="c"># Go over the compound data for the largest fragment including property</span> | 
|  | 1223 1210     <span class="c"># data...</span> | 
|  | 1224 1211     <span class="k">my</span> <span class="s">(</span><span class="i">@AllFragments</span><span class="cm">,</span> <span class="i">@LargestFragment</span><span class="cm">,</span> <span class="i">%LargestFragmentAtoms</span><span class="cm">,</span> <span class="i">@WashedCmpdLines</span><span class="cm">,</span> <span class="i">$Index</span><span class="cm">,</span> <span class="i">$LineIndex</span><span class="cm">,</span> <span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="cm">,</span> <span class="i">$NewAtomCount</span><span class="cm">,</span> <span class="i">$NewBondCount</span><span class="cm">,</span> <span class="i">$FirstAtomNum</span><span class="cm">,</span> <span class="i">$SecondAtomNum</span><span class="cm">,</span> <span class="i">$BondType</span><span class="cm">,</span> <span class="i">$BondStereo</span><span class="cm">,</span> <span class="i">$FirstNewAtomNum</span><span class="cm">,</span> <span class="i">$SecondNewAtomNum</span><span class="cm">,</span> <span class="i">$AtomNum</span><span class="cm">,</span> <span class="i">$ChiralFlag</span><span class="cm">,</span> <span class="i">$BondLine</span><span class="cm">,</span> <span class="i">$MENDLineIndex</span><span class="cm">,</span> <span class="i">$Line</span><span class="cm">,</span> <span class="i">$Value</span><span class="cm">,</span> <span class="i">@ValuePairs</span><span class="cm">,</span> <span class="i">@NewValuePairs</span><span class="cm">,</span> <span class="i">$ValuePairIndex</span><span class="cm">,</span> <span class="i">$NewAtomNum</span><span class="cm">,</span> <span class="i">@NewPropertyLines</span><span class="s">)</span><span class="sc">;</span> | 
|  | 1225 1212 | 
|  | 1226 1213     <span class="i">@AllFragments</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> <span class="i">@LargestFragment</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 1227 1214     <span class="i">%LargestFragmentAtoms</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 1228 1215     <span class="i">@AllFragments</span> = <span class="k">split</span> <span class="q">"\n"</span><span class="cm">,</span> <span class="i">$Fragments</span><span class="sc">;</span> | 
|  | 1229 1216     <span class="i">@LargestFragment</span> = <span class="k">split</span> <span class="q">" "</span><span class="cm">,</span> <span class="i">$AllFragments</span>[<span class="n">0</span>]<span class="sc">;</span> | 
|  | 1230 1217     <span class="k">for</span> <span class="i">$Index</span> <span class="s">(</span><span class="n">0</span> .. <span class="i">$#LargestFragment</span><span class="s">)</span> <span class="s">{</span> | 
|  | 1231 1218       <span class="c"># Map old atom numbers to new atom numbers as the fragment atom numbers are sorted</span> | 
|  | 1232 1219       <span class="c"># from lowest to highest old atom numbers...</span> | 
|  | 1233 1220       <span class="i">$LargestFragmentAtoms</span>{<span class="i">$LargestFragment</span>[<span class="i">$Index</span>]} = <span class="i">$Index</span> + <span class="n">1</span><span class="sc">;</span> | 
|  | 1234 1221     <span class="s">}</span> | 
|  | 1235 1222     <span class="i">@WashedCmpdLines</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 1236 1223     <span class="k">push</span> <span class="i">@WashedCmpdLines</span><span class="cm">,</span> <span class="i">@$CmpdLines</span>[<span class="n">0</span>]<span class="cm">,</span> <span class="i">@$CmpdLines</span>[<span class="n">1</span>]<span class="cm">,</span> <span class="i">@$CmpdLines</span>[<span class="n">2</span>]<span class="cm">,</span> <span class="i">@$CmpdLines</span>[<span class="n">3</span>]<span class="sc">;</span> | 
|  | 1237 1224     <span class="s">(</span><span class="i">$AtomCount</span><span class="cm">,</span> <span class="i">$BondCount</span><span class="cm">,</span> <span class="i">$ChiralFlag</span><span class="s">)</span> = <span class="i">ParseCmpdCountsLine</span><span class="s">(</span><span class="i">@$CmpdLines</span>[<span class="n">3</span>]<span class="s">)</span><span class="sc">;</span> | 
|  | 1238 1225     <span class="i">$NewAtomCount</span> = <span class="i">@LargestFragment</span><span class="sc">;</span> | 
|  | 1239 1226     <span class="i">$NewBondCount</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 1240 1227     <span class="i">$AtomNum</span> = <span class="n">0</span><span class="sc">;</span> | 
|  | 1241 1228     <span class="c"># Retrieve the largest fragment atom lines...</span> | 
|  | 1242 1229     <span class="k">for</span> <span class="s">(</span><span class="i">$LineIndex</span> = <span class="n">4</span><span class="sc">;</span> <span class="i">$LineIndex</span> < <span class="s">(</span><span class="n">4</span> + <span class="i">$AtomCount</span><span class="s">)</span><span class="sc">;</span> <span class="i">$LineIndex</span>++<span class="s">)</span> <span class="s">{</span> | 
|  | 1243 1230       <span class="i">$AtomNum</span>++<span class="sc">;</span> | 
|  | 1244 1231       <span class="k">if</span> <span class="s">(</span><span class="i">$LargestFragmentAtoms</span>{<span class="i">$AtomNum</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 1245 1232         <span class="k">push</span> <span class="i">@WashedCmpdLines</span><span class="cm">,</span> <span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="sc">;</span> | 
|  | 1246 1233       <span class="s">}</span> | 
|  | 1247 1234     <span class="s">}</span> | 
|  | 1248 1235     <span class="c"># Retrieve the largest fragment bond lines...</span> | 
|  | 1249 1236     <span class="k">for</span> <span class="s">(</span><span class="i">$LineIndex</span> = <span class="n">4</span> + <span class="i">$AtomCount</span><span class="sc">;</span> <span class="i">$LineIndex</span> < <span class="s">(</span><span class="n">4</span> + <span class="i">$AtomCount</span> + <span class="i">$BondCount</span><span class="s">)</span><span class="sc">;</span> <span class="i">$LineIndex</span>++<span class="s">)</span> <span class="s">{</span> | 
|  | 1250 1237       <span class="s">(</span><span class="i">$FirstAtomNum</span><span class="cm">,</span> <span class="i">$SecondAtomNum</span><span class="cm">,</span> <span class="i">$BondType</span><span class="cm">,</span> <span class="i">$BondStereo</span><span class="s">)</span> = <span class="i">ParseCmpdBondLine</span><span class="s">(</span><span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="s">)</span><span class="sc">;</span> | 
|  | 1251 1238       <span class="k">if</span> <span class="s">(</span><span class="i">$LargestFragmentAtoms</span>{<span class="i">$FirstAtomNum</span>} && <span class="i">$LargestFragmentAtoms</span>{<span class="i">$SecondAtomNum</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 1252 1239         <span class="i">$NewBondCount</span>++<span class="sc">;</span> | 
|  | 1253 1240         <span class="c"># Set up bond line with new atom number mapping...</span> | 
|  | 1254 1241         <span class="i">$FirstNewAtomNum</span> =  <span class="i">$LargestFragmentAtoms</span>{<span class="i">$FirstAtomNum</span>}<span class="sc">;</span> | 
|  | 1255 1242         <span class="i">$SecondNewAtomNum</span> =  <span class="i">$LargestFragmentAtoms</span>{<span class="i">$SecondAtomNum</span>}<span class="sc">;</span> | 
|  | 1256 1243         <span class="i">$BondLine</span> = <span class="i">GenerateCmpdBondLine</span><span class="s">(</span><span class="i">$FirstNewAtomNum</span><span class="cm">,</span> <span class="i">$SecondNewAtomNum</span><span class="cm">,</span> <span class="i">$BondType</span><span class="cm">,</span> <span class="i">$BondStereo</span><span class="s">)</span><span class="sc">;</span> | 
|  | 1257 1244         <span class="k">push</span> <span class="i">@WashedCmpdLines</span><span class="cm">,</span> <span class="i">$BondLine</span><span class="sc">;</span> | 
|  | 1258 1245       <span class="s">}</span> | 
|  | 1259 1246     <span class="s">}</span> | 
|  | 1260 1247     <span class="c"># Get property lines for CHG, ISO and RAD label and map the old atom numbers to new</span> | 
|  | 1261 1248     <span class="c"># atom numners; Others, property lines before M  END line are skipped as atom numbers for</span> | 
|  | 1262 1249     <span class="c"># other properties might not valid anymore...</span> | 
|  | 1263 1250     <span class="c">#</span> | 
|  | 1264 1251     <span class="i">$MENDLineIndex</span> = <span class="i">$LineIndex</span><span class="sc">;</span> | 
|  | 1265 1252     <span class="j">LINE:</span> <span class="k">for</span> <span class="s">(</span><span class="i">$LineIndex</span> = <span class="s">(</span><span class="n">4</span> + <span class="i">$AtomCount</span> + <span class="i">$BondCount</span><span class="s">)</span><span class="sc">;</span> <span class="i">$LineIndex</span> < <span class="i">@$CmpdLines</span><span class="sc">;</span> <span class="i">$LineIndex</span>++<span class="s">)</span> <span class="s">{</span> | 
|  | 1266 1253       <span class="i">$Line</span> = <span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="sc">;</span> | 
|  | 1267 1254       <span class="k">if</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^M  END/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 1268 1255         <span class="k">push</span> <span class="i">@WashedCmpdLines</span><span class="cm">,</span> <span class="q">"M  END"</span><span class="sc">;</span> | 
|  | 1269 1256         <span class="i">$MENDLineIndex</span> = <span class="i">$LineIndex</span><span class="sc">;</span> | 
|  | 1270 1257         <span class="k">last</span> <span class="j">LINE</span><span class="sc">;</span> | 
|  | 1271 1258       <span class="s">}</span> | 
|  | 1272 1259 | 
|  | 1273 1260       <span class="i">@ValuePairs</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 1274 1261       <span class="k">if</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^M  CHG/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 1275 1262         <span class="i">@ValuePairs</span> = <span class="i">ParseCmpdChargePropertyLine</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> | 
|  | 1276 1263       <span class="s">}</span> | 
|  | 1277 1264       <span class="k">elsif</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^M  RAD/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 1278 1265         <span class="i">@ValuePairs</span> = <span class="i">ParseCmpdRadicalPropertyLine</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> | 
|  | 1279 1266       <span class="s">}</span> | 
|  | 1280 1267       <span class="k">elsif</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^M  ISO/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 1281 1268         <span class="i">@ValuePairs</span> = <span class="i">ParseCmpdIsotopePropertyLine</span><span class="s">(</span><span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> | 
|  | 1282 1269       <span class="s">}</span> | 
|  | 1283 1270       <span class="k">elsif</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^A  /i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 1284 1271         <span class="k">my</span><span class="s">(</span><span class="i">$NextLine</span><span class="s">)</span><span class="sc">;</span> | 
|  | 1285 1272         <span class="i">$LineIndex</span>++<span class="sc">;</span> | 
|  | 1286 1273         <span class="i">$NextLine</span> = <span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="sc">;</span> | 
|  | 1287 1274         <span class="i">@ValuePairs</span> = <span class="i">ParseCmpdAtomAliasPropertyLine</span><span class="s">(</span><span class="i">$Line</span><span class="cm">,</span> <span class="i">$NextLine</span><span class="s">)</span><span class="sc">;</span> | 
|  | 1288 1275       <span class="s">}</span> | 
|  | 1289 1276       <span class="k">else</span> <span class="s">{</span> | 
|  | 1290 1277         <span class="k">next</span> <span class="j">LINE</span><span class="sc">;</span> | 
|  | 1291 1278       <span class="s">}</span> | 
|  | 1292 1279 | 
|  | 1293 1280       <span class="k">if</span> <span class="s">(</span>!<span class="i">@ValuePairs</span><span class="s">)</span> <span class="s">{</span> | 
|  | 1294 1281         <span class="k">next</span> <span class="j">LINE</span><span class="sc">;</span> | 
|  | 1295 1282       <span class="s">}</span> | 
|  | 1296 1283 | 
|  | 1297 1284       <span class="c"># Collect values for valid atom numbers with mapping to new atom numbers...</span> | 
|  | 1298 1285       <span class="i">@NewValuePairs</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 1299 1286       <span class="j">VALUEINDEX:</span> <span class="k">for</span> <span class="s">(</span><span class="i">$ValuePairIndex</span> = <span class="n">0</span><span class="sc">;</span> <span class="i">$ValuePairIndex</span> < <span class="i">$#ValuePairs</span><span class="sc">;</span> <span class="i">$ValuePairIndex</span> += <span class="n">2</span><span class="s">)</span> <span class="s">{</span> | 
|  | 1300 1287         <span class="i">$AtomNum</span> = <span class="i">$ValuePairs</span>[<span class="i">$ValuePairIndex</span>]<span class="sc">;</span> <span class="i">$Value</span> = <span class="i">$ValuePairs</span>[<span class="i">$ValuePairIndex</span> + <span class="n">1</span>]<span class="sc">;</span> | 
|  | 1301 1288         <span class="k">if</span> <span class="s">(</span>!<span class="k">exists</span> <span class="i">$LargestFragmentAtoms</span>{<span class="i">$AtomNum</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 1302 1289           <span class="k">next</span> <span class="j">VALUEINDEX</span><span class="sc">;</span> | 
|  | 1303 1290         <span class="s">}</span> | 
|  | 1304 1291         <span class="i">$NewAtomNum</span> = <span class="i">$LargestFragmentAtoms</span>{<span class="i">$AtomNum</span>}<span class="sc">;</span> | 
|  | 1305 1292         <span class="k">push</span> <span class="i">@NewValuePairs</span><span class="cm">,</span> <span class="s">(</span><span class="i">$NewAtomNum</span><span class="cm">,</span> <span class="i">$Value</span><span class="s">)</span> | 
|  | 1306 1293       <span class="s">}</span> | 
|  | 1307 1294       <span class="k">if</span> <span class="s">(</span>!<span class="i">@NewValuePairs</span><span class="s">)</span> <span class="s">{</span> | 
|  | 1308 1295         <span class="k">next</span> <span class="j">LINE</span><span class="sc">;</span> | 
|  | 1309 1296       <span class="s">}</span> | 
|  | 1310 1297       <span class="i">@NewPropertyLines</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 1311 1298       <span class="k">if</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^M  CHG/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 1312 1299         <span class="i">@NewPropertyLines</span> = <span class="i">GenerateCmpdChargePropertyLines</span><span class="s">(</span>\<span class="i">@NewValuePairs</span><span class="s">)</span><span class="sc">;</span> | 
|  | 1313 1300       <span class="s">}</span> | 
|  | 1314 1301       <span class="k">elsif</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^M  RAD/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 1315 1302         <span class="i">@NewPropertyLines</span> = <span class="i">GenerateCmpdRadicalPropertyLines</span><span class="s">(</span>\<span class="i">@NewValuePairs</span><span class="s">)</span><span class="sc">;</span> | 
|  | 1316 1303       <span class="s">}</span> | 
|  | 1317 1304       <span class="k">elsif</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^M  ISO/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 1318 1305         <span class="i">@NewPropertyLines</span> = <span class="i">GenerateCmpdIsotopePropertyLines</span><span class="s">(</span>\<span class="i">@NewValuePairs</span><span class="s">)</span><span class="sc">;</span> | 
|  | 1319 1306       <span class="s">}</span> | 
|  | 1320 1307       <span class="k">elsif</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^A  /i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 1321 1308         <span class="i">@NewPropertyLines</span> = <span class="i">GenerateCmpdAtomAliasPropertyLines</span><span class="s">(</span>\<span class="i">@NewValuePairs</span><span class="s">)</span><span class="sc">;</span> | 
|  | 1322 1309       <span class="s">}</span> | 
|  | 1323 1310       <span class="k">push</span> <span class="i">@WashedCmpdLines</span><span class="cm">,</span> <span class="i">@NewPropertyLines</span><span class="sc">;</span> | 
|  | 1324 1311     <span class="s">}</span> | 
|  | 1325 1312 | 
|  | 1326 1313     <span class="c"># Retrieve rest of the data label and value property data...</span> | 
|  | 1327 1314     <span class="k">for</span> <span class="s">(</span><span class="i">$LineIndex</span> = <span class="s">(</span><span class="n">1</span> + <span class="i">$MENDLineIndex</span><span class="s">)</span><span class="sc">;</span> <span class="i">$LineIndex</span> < <span class="i">@$CmpdLines</span><span class="sc">;</span> <span class="i">$LineIndex</span>++<span class="s">)</span> <span class="s">{</span> | 
|  | 1328 1315       <span class="k">push</span> <span class="i">@WashedCmpdLines</span><span class="cm">,</span> <span class="i">@$CmpdLines</span>[<span class="i">$LineIndex</span>]<span class="sc">;</span> | 
|  | 1329 1316     <span class="s">}</span> | 
|  | 1330 1317     <span class="c"># Update atom and bond count line...</span> | 
|  | 1331 1318     <span class="i">$WashedCmpdLines</span>[<span class="n">3</span>] = <span class="i">GenerateCmpdCountsLine</span><span class="s">(</span><span class="i">$NewAtomCount</span><span class="cm">,</span> <span class="i">$NewBondCount</span><span class="cm">,</span> <span class="i">$ChiralFlag</span><span class="s">)</span><span class="sc">;</span> | 
|  | 1332 1319 | 
|  | 1333 1320     <span class="i">$WashedCmpdString</span> = <span class="k">join</span> <span class="q">"\n"</span><span class="cm">,</span> <span class="i">@WashedCmpdLines</span><span class="sc">;</span> | 
|  | 1334 1321   <span class="s">}</span> | 
|  | 1335 1322   <span class="k">return</span> <span class="s">(</span><span class="i">$FragmentCount</span><span class="cm">,</span> <span class="i">$Fragments</span><span class="cm">,</span> <span class="i">$WashedCmpdString</span><span class="s">)</span><span class="sc">;</span> | 
|  | 1336 1323 <span class="s">}</span> | 
|  | 1337 1324 | 
|  | 1338 <a name="EOF-"></a></pre> | 
|  | 1339 <p> </p> | 
|  | 1340 <br /> | 
|  | 1341 <center> | 
|  | 1342 <img src="../../../images/h2o2.png"> | 
|  | 1343 </center> | 
|  | 1344 </body> | 
|  | 1345 </html> |