Mercurial > repos > deepakjadmin > mayatool3_test2
view docs/scripts/html/code/SimilaritySearchingFingerprints.html @ 0:4816e4a8ae95 draft default tip
Uploaded
author | deepakjadmin |
---|---|
date | Wed, 20 Jan 2016 09:23:18 -0500 |
parents | |
children |
line wrap: on
line source
<html> <head> <title>MayaChemTools:Code:SimilaritySearchingFingerprints.pl</title> <meta http-equiv="content-type" content="text/html;charset=utf-8"> <link rel="stylesheet" type="text/css" href="../../../css/MayaChemToolsCode.css"> </head> <body leftmargin="20" rightmargin="20" topmargin="10" bottommargin="10"> <br/> <center> <a href="http://www.mayachemtools.org" title="MayaChemTools Home"><img src="../../../images/MayaChemToolsLogo.gif" border="0" alt="MayaChemTools"></a> </center> <br/> <pre> 1 #!/usr/bin/perl -w 2 <span class="c">#</span> 3 <span class="c"># $RCSfile: SimilaritySearchingFingerprints.pl,v $</span> 4 <span class="c"># $Date: 2015/02/28 20:46:21 $</span> 5 <span class="c"># $Revision: 1.18 $</span> 6 <span class="c">#</span> 7 <span class="c"># Author: Manish Sud <msud@san.rr.com></span> 8 <span class="c">#</span> 9 <span class="c"># Copyright (C) 2015 Manish Sud. All rights reserved.</span> 10 <span class="c">#</span> 11 <span class="c"># This file is part of MayaChemTools.</span> 12 <span class="c">#</span> 13 <span class="c"># MayaChemTools is free software; you can redistribute it and/or modify it under</span> 14 <span class="c"># the terms of the GNU Lesser General Public License as published by the Free</span> 15 <span class="c"># Software Foundation; either version 3 of the License, or (at your option) any</span> 16 <span class="c"># later version.</span> 17 <span class="c">#</span> 18 <span class="c"># MayaChemTools is distributed in the hope that it will be useful, but without</span> 19 <span class="c"># any warranty; without even the implied warranty of merchantability of fitness</span> 20 <span class="c"># for a particular purpose. See the GNU Lesser General Public License for more</span> 21 <span class="c"># details.</span> 22 <span class="c">#</span> 23 <span class="c"># You should have received a copy of the GNU Lesser General Public License</span> 24 <span class="c"># along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or</span> 25 <span class="c"># write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,</span> 26 <span class="c"># Boston, MA, 02111-1307, USA.</span> 27 <span class="c">#</span> 28 29 <span class="k">use</span> <span class="w">strict</span><span class="sc">;</span> 30 <span class="k">use</span> <span class="w">FindBin</span><span class="sc">;</span> <span class="k">use</span> <span class="w">lib</span> <span class="q">"$FindBin::Bin/../lib"</span><span class="sc">;</span> 31 <span class="k">use</span> <span class="w">Getopt::Long</span><span class="sc">;</span> 32 <span class="k">use</span> <span class="w">File::Basename</span><span class="sc">;</span> 33 <span class="k">use</span> <span class="w">Text::ParseWords</span><span class="sc">;</span> 34 <span class="k">use</span> <span class="w">Benchmark</span><span class="sc">;</span> 35 <span class="k">use</span> <span class="w">FileUtil</span><span class="sc">;</span> 36 <span class="k">use</span> <span class="w">TextUtil</span><span class="sc">;</span> 37 <span class="k">use</span> <span class="w">SDFileUtil</span><span class="sc">;</span> 38 <span class="k">use</span> <span class="w">StatisticsUtil</span><span class="sc">;</span> 39 <span class="k">use</span> <span class="w">PseudoHeap</span><span class="sc">;</span> 40 <span class="k">use</span> <span class="w">Fingerprints::FingerprintsFileUtil</span><span class="sc">;</span> 41 <span class="k">use</span> <span class="w">Fingerprints::FingerprintsBitVector</span><span class="sc">;</span> 42 <span class="k">use</span> <span class="w">Fingerprints::FingerprintsVector</span><span class="sc">;</span> 43 44 <span class="k">my</span><span class="s">(</span><span class="i">$ScriptName</span><span class="cm">,</span> <span class="i">%Options</span><span class="cm">,</span> <span class="i">$StartTime</span><span class="cm">,</span> <span class="i">$EndTime</span><span class="cm">,</span> <span class="i">$TotalTime</span><span class="s">)</span><span class="sc">;</span> 45 46 <span class="c"># Autoflush STDOUT</span> 47 <span class="i">$|</span> = <span class="n">1</span><span class="sc">;</span> 48 49 <span class="c"># Starting message...</span> 50 <span class="i">$ScriptName</span> = <span class="i">basename</span><span class="s">(</span><span class="i">$0</span><span class="s">)</span><span class="sc">;</span> 51 <span class="k">print</span> <span class="q">"\n$ScriptName: Starting...\n\n"</span><span class="sc">;</span> 52 <span class="i">$StartTime</span> = <span class="w">new</span> <span class="w">Benchmark</span><span class="sc">;</span> 53 54 <span class="c"># Get the options and setup script...</span> 55 <span class="i">SetupScriptUsage</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> 56 <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">help</span>} || <span class="i">@ARGV</span> != <span class="n">2</span><span class="s">)</span> <span class="s">{</span> 57 <span class="k">die</span> <span class="i">GetUsageFromPod</span><span class="s">(</span><span class="q">"$FindBin::Bin/$ScriptName"</span><span class="s">)</span><span class="sc">;</span> 58 <span class="s">}</span> 59 60 <span class="c"># Process reference and database file names...</span> 61 <span class="k">my</span><span class="s">(</span><span class="i">@FingerprintsFilesList</span><span class="s">)</span><span class="sc">;</span> 62 <span class="i">ProcessFingerprintsFileNames</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> 63 64 <span class="c"># Process options...</span> 65 <span class="k">print</span> <span class="q">"Processing options...\n"</span><span class="sc">;</span> 66 <span class="k">my</span><span class="s">(</span><span class="i">%OptionsInfo</span><span class="s">)</span><span class="sc">;</span> 67 <span class="i">ProcessOptions</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> 68 69 <span class="c"># Setup information about fingerprints inut and SD/text output files...</span> 70 <span class="k">my</span><span class="s">(</span><span class="i">%FingerprintsFilesInfo</span><span class="cm">,</span> <span class="i">%OutputFilesInfo</span><span class="cm">,</span> <span class="i">%SimilaritySearchInfo</span><span class="s">)</span><span class="sc">;</span> 71 <span class="k">print</span> <span class="q">"Checking and retrieving information from reference and database fingerprints files...\n"</span><span class="sc">;</span> 72 <span class="i">RetrieveFingerprintsFilesInfo</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> 73 74 <span class="c"># Perform similarity search...</span> 75 <span class="k">print</span> <span class="q">"Performing similarity search...\n"</span><span class="sc">;</span> 76 <span class="k">my</span><span class="s">(</span><span class="i">%SimilaritySearchResults</span><span class="cm">,</span> <span class="i">%DatabaseFingerprintsFileData</span><span class="s">)</span><span class="sc">;</span> 77 <span class="i">PerformSimilaritySearch</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> 78 79 <span class="k">print</span> <span class="q">"\n$ScriptName:Done...\n\n"</span><span class="sc">;</span> 80 81 <span class="i">$EndTime</span> = <span class="w">new</span> <span class="w">Benchmark</span><span class="sc">;</span> 82 <span class="i">$TotalTime</span> = <span class="w">timediff</span> <span class="s">(</span><span class="i">$EndTime</span><span class="cm">,</span> <span class="i">$StartTime</span><span class="s">)</span><span class="sc">;</span> 83 <span class="k">print</span> <span class="q">"Total time: "</span><span class="cm">,</span> <span class="i">timestr</span><span class="s">(</span><span class="i">$TotalTime</span><span class="s">)</span><span class="cm">,</span> <span class="q">"\n"</span><span class="sc">;</span> 84 85 <span class="c">###############################################################################</span> 86 87 <span class="c"># Perform similarity search using fingerprints data in reference and database text files...</span> 88 <span class="c">#</span> <a name="PerformSimilaritySearch-"></a> 89 <span class="k">sub </span><span class="m">PerformSimilaritySearch</span> <span class="s">{</span> 90 91 <span class="k">print</span> <span class="q">"\nProcessing fingerprints data for reference molecules...\n"</span><span class="sc">;</span> 92 <span class="i">ReadReferenceFingerprintsData</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> 93 94 <span class="i">InitializeSimilaritySearchResults</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> 95 <span class="i">GenerateSimilaritySearchResults</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> 96 <span class="i">WriteSimilaritySearchResultFiles</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> 97 <span class="s">}</span> 98 99 <span class="c"># Find similar molecules from database molecules for individual or multiple reference molecules...</span> 100 <span class="c">#</span> <a name="GenerateSimilaritySearchResults-"></a> 101 <span class="k">sub </span><span class="m">GenerateSimilaritySearchResults</span> <span class="s">{</span> 102 <span class="k">my</span><span class="s">(</span><span class="i">$DatabaseFingerprintsFileIO</span><span class="cm">,</span> <span class="i">$FingerprintsCount</span><span class="cm">,</span> <span class="i">$IgnoredFingerprintsCount</span><span class="cm">,</span> <span class="i">$DatabaseFingerprintsObject</span><span class="cm">,</span> <span class="i">$DatabaseCmpdID</span><span class="cm">,</span> <span class="i">$ReferenceFingerprintsObject</span><span class="cm">,</span> <span class="i">$ReferenceIndex</span><span class="cm">,</span> <span class="i">$ReferenceCmpdID</span><span class="cm">,</span> <span class="i">$ComparisonValue</span><span class="cm">,</span> <span class="i">$FusedComparisonValue</span><span class="cm">,</span> <span class="i">@ComparisonValues</span><span class="s">)</span><span class="sc">;</span> 103 104 <span class="k">print</span> <span class="q">"Processing fingerprints data for database molecules...\n"</span><span class="sc">;</span> 105 106 <span class="s">(</span><span class="i">$FingerprintsCount</span><span class="cm">,</span> <span class="i">$IgnoredFingerprintsCount</span><span class="s">)</span> = <span class="s">(</span><span class="n">0</span><span class="s">)</span> x <span class="n">3</span><span class="sc">;</span> 107 108 <span class="i">$DatabaseFingerprintsFileIO</span> = <span class="i">Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO</span><span class="s">(</span><span class="i">%</span>{<span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">FingerprintsFileIOParameters</span>}}<span class="s">)</span><span class="sc">;</span> 109 <span class="i">$DatabaseFingerprintsFileIO</span><span class="i">->Open</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> 110 111 <span class="i">@ComparisonValues</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 112 113 <span class="j">DATABASEFP:</span> <span class="k">while</span> <span class="s">(</span><span class="i">$DatabaseFingerprintsFileIO</span><span class="i">->Read</span><span class="s">(</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> 114 <span class="i">$FingerprintsCount</span>++<span class="sc">;</span> 115 116 <span class="k">if</span> <span class="s">(</span>!<span class="i">$DatabaseFingerprintsFileIO</span><span class="i">->IsFingerprintsDataValid</span><span class="s">(</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> 117 <span class="i">$IgnoredFingerprintsCount</span>++<span class="sc">;</span> 118 <span class="k">next</span> <span class="j">DATABASEFP</span><span class="sc">;</span> 119 <span class="s">}</span> 120 <span class="i">$DatabaseFingerprintsObject</span> = <span class="i">$DatabaseFingerprintsFileIO</span><span class="i">->GetFingerprints</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> 121 <span class="i">$DatabaseCmpdID</span> = <span class="i">$DatabaseFingerprintsFileIO</span><span class="i">->GetCompoundID</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> 122 123 <span class="k">if</span> <span class="s">(</span><span class="i">$SimilaritySearchInfo</span>{<span class="w">MultipleReferencesMode</span>}<span class="s">)</span> <span class="s">{</span> 124 <span class="i">@ComparisonValues</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 125 <span class="s">}</span> 126 127 <span class="j">REFERENCEFP:</span> <span class="k">for</span> <span class="i">$ReferenceIndex</span> <span class="s">(</span><span class="n">0</span> .. <span class="i">$#</span>{<span class="i">$SimilaritySearchInfo</span>{<span class="w">ReferenceCmpdIDsRef</span>}}<span class="s">)</span> <span class="s">{</span> 128 <span class="i">$ReferenceCmpdID</span> = <span class="i">$SimilaritySearchInfo</span>{<span class="w">ReferenceCmpdIDsRef</span>}->[<span class="i">$ReferenceIndex</span>]<span class="sc">;</span> 129 <span class="i">$ReferenceFingerprintsObject</span> = <span class="i">$SimilaritySearchInfo</span>{<span class="w">ReferenceFingerprintsObjectsRef</span>}->[<span class="i">$ReferenceIndex</span>]<span class="sc">;</span> 130 131 <span class="i">$ComparisonValue</span> = <span class="i">CompareReferenceAndDatabaseFingerprintsPair</span><span class="s">(</span><span class="i">$ReferenceFingerprintsObject</span><span class="cm">,</span> <span class="i">$DatabaseFingerprintsObject</span><span class="s">)</span><span class="sc">;</span> 132 <span class="k">if</span> <span class="s">(</span>!<span class="k">defined</span> <span class="i">$ComparisonValue</span><span class="s">)</span> <span class="s">{</span> 133 <span class="k">next</span> <span class="j">REFERENCEFP</span><span class="sc">;</span> 134 <span class="s">}</span> 135 136 <span class="k">if</span> <span class="s">(</span><span class="i">$SimilaritySearchInfo</span>{<span class="w">IndividualReferenceMode</span>}<span class="s">)</span> <span class="s">{</span> 137 <span class="i">CollectSimilaritySearchResults</span><span class="s">(</span><span class="i">$DatabaseFingerprintsFileIO</span><span class="cm">,</span> <span class="i">$DatabaseCmpdID</span><span class="cm">,</span> <span class="i">$ComparisonValue</span><span class="cm">,</span> <span class="i">$ReferenceCmpdID</span><span class="s">)</span><span class="sc">;</span> 138 <span class="s">}</span> 139 <span class="k">elsif</span> <span class="s">(</span><span class="i">$SimilaritySearchInfo</span>{<span class="w">MultipleReferencesMode</span>}<span class="s">)</span> <span class="s">{</span> 140 <span class="k">push</span> <span class="i">@ComparisonValues</span><span class="cm">,</span> <span class="i">$ComparisonValue</span><span class="sc">;</span> 141 <span class="s">}</span> 142 <span class="s">}</span> 143 144 <span class="k">if</span> <span class="s">(</span><span class="i">$SimilaritySearchInfo</span>{<span class="w">MultipleReferencesMode</span>}<span class="s">)</span> <span class="s">{</span> 145 <span class="i">$FusedComparisonValue</span> = <span class="i">CalculateGroupFusionComparisonValue</span><span class="s">(</span>\<span class="i">@ComparisonValues</span><span class="s">)</span><span class="sc">;</span> 146 <span class="k">if</span> <span class="s">(</span>!<span class="k">defined</span> <span class="i">$FusedComparisonValue</span><span class="s">)</span> <span class="s">{</span> 147 <span class="k">next</span> <span class="j">DATABASEFP</span><span class="sc">;</span> 148 <span class="s">}</span> 149 <span class="i">CollectSimilaritySearchResults</span><span class="s">(</span><span class="i">$DatabaseFingerprintsFileIO</span><span class="cm">,</span> <span class="i">$DatabaseCmpdID</span><span class="cm">,</span> <span class="i">$FusedComparisonValue</span><span class="s">)</span><span class="sc">;</span> 150 <span class="s">}</span> 151 <span class="s">}</span> 152 <span class="i">$DatabaseFingerprintsFileIO</span><span class="i">->Close</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> 153 154 <span class="k">print</span> <span class="q">"Number of fingerprints data entries in database fingerprints file: $FingerprintsCount\n"</span><span class="sc">;</span> 155 <span class="k">print</span> <span class="q">"Number of fingerprints date entries processed successfully: "</span><span class="cm">,</span> <span class="s">(</span><span class="i">$FingerprintsCount</span> - <span class="i">$IgnoredFingerprintsCount</span><span class="s">)</span> <span class="cm">,</span> <span class="q">"\n"</span><span class="sc">;</span> 156 <span class="k">print</span> <span class="q">"Number of fingerprints data entries ignored due to missing/invalid data: $IgnoredFingerprintsCount\n\n"</span><span class="sc">;</span> 157 <span class="s">}</span> 158 159 <span class="c"># Compare a pair of reference and database fingerprints objects corresponding to bit-vector or</span> 160 <span class="c"># vectors using specified comparison method and comparison cutoff...</span> 161 <span class="c">#</span> <a name="CompareReferenceAndDatabaseFingerprintsPair-"></a> 162 <span class="k">sub </span><span class="m">CompareReferenceAndDatabaseFingerprintsPair</span> <span class="s">{</span> 163 <span class="k">my</span><span class="s">(</span><span class="i">$ReferenceFingerprintsObject</span><span class="cm">,</span> <span class="i">$DatabaseFingerprintsObject</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 164 <span class="k">my</span><span class="s">(</span><span class="i">$ComparisonMethod</span><span class="cm">,</span> <span class="i">$ComparisonValue</span><span class="s">)</span><span class="sc">;</span> 165 166 <span class="i">$ComparisonMethod</span> = <span class="i">$SimilaritySearchInfo</span>{<span class="w">ComparisonMethod</span>}<span class="sc">;</span> 167 <span class="i">$ComparisonValue</span> = <span class="i">$ReferenceFingerprintsObject</span><span class="i">->$ComparisonMethod</span><span class="s">(</span><span class="i">$DatabaseFingerprintsObject</span><span class="cm">,</span> <span class="i">@</span>{<span class="i">$SimilaritySearchInfo</span>{<span class="w">ComparisonMethodParameters</span>}}<span class="s">)</span><span class="sc">;</span> 168 169 <span class="k">if</span> <span class="s">(</span>!<span class="k">defined</span> <span class="i">$ComparisonValue</span><span class="s">)</span> <span class="s">{</span> 170 <span class="k">warn</span> <span class="q">"Warning: Ignoring fingerprints data for reference compound ID "</span><span class="cm">,</span> <span class="i">$ReferenceFingerprintsObject</span><span class="i">->GetID</span><span class="s">(</span><span class="s">)</span><span class="cm">,</span> <span class="q">": Its comparison with database compound ID, "</span><span class="cm">,</span> <span class="i">$DatabaseFingerprintsObject</span><span class="i">->GetID</span><span class="s">(</span><span class="s">)</span><span class="cm">,</span> <span class="q">", failed.\n"</span><span class="sc">;</span> 171 <span class="k">return</span> <span class="k">undef</span><span class="sc">;</span> 172 <span class="s">}</span> 173 174 <span class="i">$ComparisonValue</span> = <span class="k">sprintf</span><span class="s">(</span><span class="q">"%.$OptionsInfo{Precision}f"</span><span class="cm">,</span> <span class="i">$ComparisonValue</span><span class="s">)</span><span class="sc">;</span> 175 176 <span class="c"># Apply any comparison cutoff...</span> 177 <span class="k">if</span> <span class="s">(</span><span class="i">$SimilaritySearchInfo</span>{<span class="w">ApplyComparisonCutoff</span>}<span class="s">)</span> <span class="s">{</span> 178 <span class="k">return</span> <span class="i">$SimilaritySearchInfo</span>{<span class="w">KeepTop</span>} ? <span class="s">(</span><span class="i">$ComparisonValue</span> >= <span class="i">$SimilaritySearchInfo</span>{<span class="w">ComparisonCutoff</span>} ? <span class="i">$ComparisonValue</span> <span class="co">:</span> <span class="k">undef</span><span class="s">)</span> <span class="co">:</span> <span class="s">(</span><span class="i">$ComparisonValue</span> <= <span class="i">$SimilaritySearchInfo</span>{<span class="w">ComparisonCutoff</span>} ? <span class="i">$ComparisonValue</span> <span class="co">:</span> <span class="k">undef</span><span class="s">)</span><span class="sc">;</span> 179 <span class="s">}</span> 180 <span class="k">else</span> <span class="s">{</span> 181 <span class="k">return</span> <span class="i">$ComparisonValue</span><span class="sc">;</span> 182 <span class="s">}</span> 183 <span class="s">}</span> 184 185 <span class="c"># Calculate group fusion comparison value...</span> 186 <span class="c">#</span> <a name="CalculateGroupFusionComparisonValue-"></a> 187 <span class="k">sub </span><span class="m">CalculateGroupFusionComparisonValue</span> <span class="s">{</span> 188 <span class="k">my</span><span class="s">(</span><span class="i">$ComparisonValuesRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 189 <span class="k">my</span><span class="s">(</span><span class="i">$FusedComparisonValue</span><span class="cm">,</span> <span class="i">@ComparisonValues</span><span class="s">)</span><span class="sc">;</span> 190 191 <span class="k">if</span> <span class="s">(</span>!<span class="i">@</span>{<span class="i">$ComparisonValuesRef</span>}<span class="s">)</span> <span class="s">{</span> 192 <span class="k">return</span> <span class="k">undef</span><span class="sc">;</span> 193 <span class="s">}</span> 194 195 <span class="k">if</span> <span class="s">(</span><span class="i">$SimilaritySearchInfo</span>{<span class="w">SortComparisonValues</span>}<span class="s">)</span> <span class="s">{</span> 196 <span class="i">@ComparisonValues</span> = <span class="k">sort</span> <span class="s">{</span> <span class="i">$SimilaritySearchInfo</span>{<span class="w">KeepTop</span>} ? <span class="s">(</span><span class="i">$b</span> <=> <span class="i">$a</span><span class="s">)</span> <span class="co">:</span> <span class="s">(</span><span class="i">$a</span> <=> <span class="i">$b</span><span class="s">)</span> <span class="s">}</span> <span class="i">@</span>{<span class="i">$ComparisonValuesRef</span>}<span class="sc">;</span> 197 <span class="k">if</span> <span class="s">(</span><span class="i">$SimilaritySearchInfo</span>{<span class="w">UsekNN</span>} && <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">kNN</span>} < <span class="k">scalar</span> <span class="i">@</span>{<span class="i">$ComparisonValuesRef</span>}<span class="s">)</span><span class="s">)</span> <span class="s">{</span> 198 <span class="c"># Keep only top kNN values for group fusion...</span> 199 <span class="k">splice</span> <span class="i">@ComparisonValues</span><span class="cm">,</span> <span class="i">$OptionsInfo</span>{<span class="w">kNN</span>}<span class="sc">;</span> 200 <span class="s">}</span> 201 <span class="i">$ComparisonValuesRef</span> = \<span class="i">@ComparisonValues</span><span class="sc">;</span> 202 <span class="s">}</span> 203 204 <span class="i">$FusedComparisonValue</span> = <span class="i">&</span>{<span class="i">$SimilaritySearchInfo</span>{<span class="w">GroupFusionMethodRef</span>}}<span class="s">(</span><span class="i">$ComparisonValuesRef</span><span class="s">)</span><span class="sc">;</span> 205 <span class="k">if</span> <span class="s">(</span><span class="i">$SimilaritySearchInfo</span>{<span class="w">ApplyPrecisionDuringFusion</span>}<span class="s">)</span> <span class="s">{</span> 206 <span class="i">$FusedComparisonValue</span> = <span class="k">sprintf</span><span class="s">(</span><span class="q">"%.$OptionsInfo{Precision}f"</span><span class="cm">,</span> <span class="i">$FusedComparisonValue</span><span class="s">)</span><span class="sc">;</span> 207 <span class="s">}</span> 208 209 <span class="k">return</span> <span class="i">$FusedComparisonValue</span><span class="sc">;</span> 210 <span class="s">}</span> 211 212 <span class="c"># Collect similarity results for individual reference and multiple references search...</span> 213 <span class="c">#</span> <a name="CollectSimilaritySearchResults-"></a> 214 <span class="k">sub </span><span class="m">CollectSimilaritySearchResults</span> <span class="s">{</span> 215 <span class="k">my</span><span class="s">(</span><span class="i">$DatabaseFingerprintsFileIO</span><span class="cm">,</span> <span class="i">$DatabaseCmpdID</span><span class="cm">,</span> <span class="i">$ComparisonValue</span><span class="cm">,</span> <span class="i">$ReferenceCmpdID</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 216 217 <span class="k">if</span> <span class="s">(</span><span class="k">defined</span> <span class="i">$ReferenceCmpdID</span><span class="s">)</span> <span class="s">{</span> 218 <span class="i">$SimilaritySearchResults</span>{<span class="i">$ReferenceCmpdID</span>}<span class="i">->AddKeyValuePair</span><span class="s">(</span><span class="i">$ComparisonValue</span><span class="cm">,</span> <span class="i">$DatabaseCmpdID</span><span class="s">)</span><span class="sc">;</span> 219 <span class="s">}</span> 220 <span class="k">else</span> <span class="s">{</span> 221 <span class="i">$SimilaritySearchResults</span>{<span class="w">ResultsPseudoHeap</span>}<span class="i">->AddKeyValuePair</span><span class="s">(</span><span class="i">$ComparisonValue</span><span class="cm">,</span> <span class="i">$DatabaseCmpdID</span><span class="s">)</span><span class="sc">;</span> 222 <span class="s">}</span> 223 224 <span class="k">if</span> <span class="s">(</span><span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">CollectInputFileData</span>}<span class="s">)</span> <span class="s">{</span> 225 <span class="i">CollectDatabaseFileData</span><span class="s">(</span><span class="i">$DatabaseCmpdID</span><span class="cm">,</span> <span class="i">$DatabaseFingerprintsFileIO</span><span class="s">)</span><span class="sc">;</span> 226 <span class="s">}</span> 227 <span class="s">}</span> 228 229 <span class="c"># Initialize similarity results for individual or multiple reference molecules...</span> 230 <span class="c">#</span> <a name="InitializeSimilaritySearchResults-"></a> 231 <span class="k">sub </span><span class="m">InitializeSimilaritySearchResults</span> <span class="s">{</span> 232 <span class="k">my</span><span class="s">(</span><span class="i">$ReferenceCmpdID</span><span class="s">)</span><span class="sc">;</span> 233 234 <span class="i">%SimilaritySearchResults</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 235 236 <span class="k">if</span> <span class="s">(</span><span class="i">$SimilaritySearchInfo</span>{<span class="w">IndividualReferenceMode</span>}<span class="s">)</span> <span class="s">{</span> 237 <span class="k">for</span> <span class="i">$ReferenceCmpdID</span> <span class="s">(</span><span class="i">@</span>{<span class="i">$SimilaritySearchInfo</span>{<span class="w">ReferenceCmpdIDsRef</span>}}<span class="s">)</span> <span class="s">{</span> 238 <span class="i">$SimilaritySearchResults</span>{<span class="i">$ReferenceCmpdID</span>} = <span class="w">new</span> <span class="i">PseudoHeap</span><span class="s">(</span><span class="q">'Type'</span> <span class="cm">=></span> <span class="s">(</span><span class="i">$SimilaritySearchInfo</span>{<span class="w">KeepTop</span>} ? <span class="q">'KeepTopN'</span> <span class="co">:</span> <span class="q">'KeepBottomN'</span><span class="s">)</span><span class="cm">,</span> <span class="q">'KeyType'</span> <span class="cm">=></span> <span class="q">'Numeric'</span><span class="cm">,</span> <span class="q">'MaxSize'</span> <span class="cm">=></span> <span class="i">$OptionsInfo</span>{<span class="w">MaxSimilarMolecules</span>}<span class="s">)</span><span class="sc">;</span> 239 <span class="s">}</span> 240 <span class="s">}</span> 241 <span class="k">elsif</span> <span class="s">(</span><span class="i">$SimilaritySearchInfo</span>{<span class="w">MultipleReferencesMode</span>}<span class="s">)</span> <span class="s">{</span> 242 <span class="i">$SimilaritySearchResults</span>{<span class="w">ResultsPseudoHeap</span>} = <span class="w">new</span> <span class="i">PseudoHeap</span><span class="s">(</span><span class="q">'Type'</span> <span class="cm">=></span> <span class="s">(</span><span class="i">$SimilaritySearchInfo</span>{<span class="w">KeepTop</span>} ? <span class="q">'KeepTopN'</span> <span class="co">:</span> <span class="q">'KeepBottomN'</span><span class="s">)</span><span class="cm">,</span> <span class="q">'KeyType'</span> <span class="cm">=></span> <span class="q">'Numeric'</span><span class="cm">,</span> <span class="q">'MaxSize'</span> <span class="cm">=></span> <span class="i">$OptionsInfo</span>{<span class="w">MaxSimilarMolecules</span>}<span class="s">)</span><span class="sc">;</span> 243 <span class="s">}</span> 244 245 <span class="i">%DatabaseFingerprintsFileData</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 246 <span class="s">}</span> 247 248 <span class="c"># Write out results SD and/or CSV/TSV text files for individual or multiple reference molecules...</span> 249 <span class="c">#</span> <a name="WriteSimilaritySearchResultFiles-"></a> 250 <span class="k">sub </span><span class="m">WriteSimilaritySearchResultFiles</span> <span class="s">{</span> 251 <span class="k">my</span><span class="s">(</span><span class="i">$NewSDFileRef</span><span class="cm">,</span> <span class="i">$NewTextFileRef</span><span class="cm">,</span> <span class="i">$ReferenceCmpdID</span><span class="cm">,</span> <span class="i">$DatabaseCmpdID</span><span class="cm">,</span> <span class="i">$ComparisonValue</span><span class="s">)</span><span class="sc">;</span> 252 253 <span class="s">(</span><span class="i">$NewSDFileRef</span><span class="cm">,</span> <span class="i">$NewTextFileRef</span><span class="s">)</span> = <span class="i">SetupAndOpenOutputFiles</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> 254 255 <span class="k">if</span> <span class="s">(</span><span class="i">$SimilaritySearchInfo</span>{<span class="w">IndividualReferenceMode</span>}<span class="s">)</span> <span class="s">{</span> 256 <span class="k">for</span> <span class="i">$ReferenceCmpdID</span> <span class="s">(</span><span class="i">@</span>{<span class="i">$SimilaritySearchInfo</span>{<span class="w">ReferenceCmpdIDsRef</span>}}<span class="s">)</span> <span class="s">{</span> 257 <span class="k">for</span> <span class="i">$ComparisonValue</span> <span class="s">(</span><span class="i">$SimilaritySearchResults</span>{<span class="i">$ReferenceCmpdID</span>}<span class="i">->GetSortedKeys</span><span class="s">(</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> 258 <span class="k">for</span> <span class="i">$DatabaseCmpdID</span> <span class="s">(</span><span class="i">$SimilaritySearchResults</span>{<span class="i">$ReferenceCmpdID</span>}<span class="i">->GetKeyValues</span><span class="s">(</span><span class="i">$ComparisonValue</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> 259 <span class="i">WriteDataToOutputFiles</span><span class="s">(</span><span class="i">$NewSDFileRef</span><span class="cm">,</span> <span class="i">$NewTextFileRef</span><span class="cm">,</span> <span class="i">$ComparisonValue</span><span class="cm">,</span> <span class="i">$DatabaseCmpdID</span><span class="cm">,</span> <span class="i">$ReferenceCmpdID</span><span class="s">)</span><span class="sc">;</span> 260 <span class="s">}</span> 261 <span class="s">}</span> 262 <span class="s">}</span> 263 <span class="s">}</span> 264 <span class="k">elsif</span> <span class="s">(</span><span class="i">$SimilaritySearchInfo</span>{<span class="w">MultipleReferencesMode</span>}<span class="s">)</span> <span class="s">{</span> 265 <span class="k">for</span> <span class="i">$ComparisonValue</span> <span class="s">(</span><span class="i">$SimilaritySearchResults</span>{<span class="w">ResultsPseudoHeap</span>}<span class="i">->GetSortedKeys</span><span class="s">(</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> 266 <span class="k">for</span> <span class="i">$DatabaseCmpdID</span> <span class="s">(</span><span class="i">$SimilaritySearchResults</span>{<span class="w">ResultsPseudoHeap</span>}<span class="i">->GetKeyValues</span><span class="s">(</span><span class="i">$ComparisonValue</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> 267 <span class="i">WriteDataToOutputFiles</span><span class="s">(</span><span class="i">$NewSDFileRef</span><span class="cm">,</span> <span class="i">$NewTextFileRef</span><span class="cm">,</span> <span class="i">$ComparisonValue</span><span class="cm">,</span> <span class="i">$DatabaseCmpdID</span><span class="s">)</span><span class="sc">;</span> 268 <span class="s">}</span> 269 <span class="s">}</span> 270 <span class="s">}</span> 271 272 <span class="k">if</span> <span class="s">(</span><span class="i">$NewSDFileRef</span><span class="s">)</span> <span class="s">{</span> 273 <span class="k">close</span> <span class="i">$NewSDFileRef</span><span class="sc">;</span> 274 <span class="s">}</span> 275 <span class="k">if</span> <span class="s">(</span><span class="i">$NewTextFileRef</span><span class="s">)</span> <span class="s">{</span> 276 <span class="k">close</span> <span class="i">$NewTextFileRef</span><span class="sc">;</span> 277 <span class="s">}</span> 278 <span class="s">}</span> 279 280 <span class="c"># Write individual reference or multiple references similarity results along with any other data to output files...</span> 281 <span class="c">#</span> <a name="WriteDataToOutputFiles-"></a> 282 <span class="k">sub </span><span class="m">WriteDataToOutputFiles</span> <span class="s">{</span> 283 <span class="k">my</span><span class="s">(</span><span class="i">$NewSDFileRef</span><span class="cm">,</span> <span class="i">$NewTextFileRef</span><span class="cm">,</span> <span class="i">$ComparisonValue</span><span class="cm">,</span> <span class="i">$DatabaseCmpdID</span><span class="cm">,</span> <span class="i">$ReferenceCmpdID</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 284 285 <span class="k">if</span> <span class="s">(</span><span class="i">$NewSDFileRef</span><span class="s">)</span> <span class="s">{</span> 286 <span class="i">WriteMolStringDataToSDOutputFile</span><span class="s">(</span><span class="i">$DatabaseCmpdID</span><span class="cm">,</span> <span class="i">$NewSDFileRef</span><span class="s">)</span><span class="sc">;</span> 287 <span class="k">if</span> <span class="s">(</span><span class="k">defined</span> <span class="i">$ReferenceCmpdID</span><span class="s">)</span> <span class="s">{</span> 288 <span class="k">print</span> <span class="i">$NewSDFileRef</span> <span class="q">"> <ReferenceCmpdID>\n$ReferenceCmpdID\n\n"</span><span class="sc">;</span> 289 <span class="s">}</span> 290 <span class="k">print</span> <span class="i">$NewSDFileRef</span> <span class="q">"> <DatabaseCmpdID>\n$DatabaseCmpdID\n\n> <ComparisonValue>\n$ComparisonValue\n\n"</span><span class="sc">;</span> 291 <span class="i">WriteDatabaseDataToSDOutputFile</span><span class="s">(</span><span class="i">$DatabaseCmpdID</span><span class="cm">,</span> <span class="i">$NewSDFileRef</span><span class="s">)</span><span class="sc">;</span> 292 <span class="k">print</span> <span class="i">$NewSDFileRef</span> <span class="q">"\$\$\$\$\n"</span><span class="sc">;</span> 293 <span class="s">}</span> 294 295 <span class="k">if</span> <span class="s">(</span><span class="i">$NewTextFileRef</span><span class="s">)</span> <span class="s">{</span> 296 <span class="k">my</span><span class="s">(</span><span class="i">@LineWords</span><span class="s">)</span><span class="sc">;</span> 297 298 <span class="i">@LineWords</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 299 <span class="k">if</span> <span class="s">(</span><span class="k">defined</span> <span class="i">$ReferenceCmpdID</span><span class="s">)</span> <span class="s">{</span> 300 <span class="k">push</span> <span class="i">@LineWords</span><span class="cm">,</span> <span class="i">$ReferenceCmpdID</span><span class="sc">;</span> 301 <span class="s">}</span> 302 <span class="k">push</span> <span class="i">@LineWords</span><span class="cm">,</span> <span class="s">(</span><span class="i">$DatabaseCmpdID</span><span class="cm">,</span> <span class="i">$ComparisonValue</span><span class="s">)</span><span class="sc">;</span> 303 304 <span class="k">if</span> <span class="s">(</span><span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">OutputDataFields</span>} || <span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">OutputDataCols</span>}<span class="s">)</span> <span class="s">{</span> 305 <span class="k">push</span> <span class="i">@LineWords</span><span class="cm">,</span> <span class="i">RetrieveDatabaseDataForTextOutputFile</span><span class="s">(</span><span class="i">$DatabaseCmpdID</span><span class="s">)</span><span class="sc">;</span> 306 <span class="s">}</span> 307 <span class="k">print</span> <span class="i">$NewTextFileRef</span> <span class="i">JoinWords</span><span class="s">(</span>\<span class="i">@LineWords</span><span class="cm">,</span> <span class="i">$OptionsInfo</span>{<span class="w">OutDelim</span>}<span class="cm">,</span> <span class="i">$OptionsInfo</span>{<span class="w">OutQuote</span>}<span class="s">)</span><span class="cm">,</span> <span class="q">"\n"</span><span class="sc">;</span> 308 <span class="s">}</span> 309 <span class="s">}</span> 310 311 <span class="c"># Open output files...</span> 312 <span class="c">#</span> <a name="SetupAndOpenOutputFiles-"></a> 313 <span class="k">sub </span><span class="m">SetupAndOpenOutputFiles</span> <span class="s">{</span> 314 <span class="k">my</span><span class="s">(</span><span class="i">$NewSDFileRef</span><span class="cm">,</span> <span class="i">$NewTextFileRef</span><span class="cm">,</span> <span class="i">$NewSDFile</span><span class="cm">,</span> <span class="i">$NewTextFile</span><span class="s">)</span><span class="sc">;</span> 315 316 <span class="s">(</span><span class="i">$NewSDFileRef</span><span class="cm">,</span> <span class="i">$NewTextFileRef</span><span class="s">)</span> = <span class="s">(</span><span class="k">undef</span><span class="s">)</span> x <span class="n">2</span><span class="sc">;</span> 317 318 <span class="k">if</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">SDOutput</span>}<span class="s">)</span> <span class="s">{</span> 319 <span class="i">$NewSDFile</span> = <span class="i">$OutputFilesInfo</span>{<span class="w">SDOutFileName</span>}<span class="sc">;</span> 320 <span class="k">print</span> <span class="q">"Generating SD file $NewSDFile...\n"</span><span class="sc">;</span> 321 <span class="k">open</span> <span class="w">NEWSDFILE</span><span class="cm">,</span> <span class="q">">$NewSDFile"</span> <span class="k">or</span> <span class="k">die</span> <span class="q">"Error: Couldn't open $NewSDFile: $! \n"</span><span class="sc">;</span> 322 <span class="i">$NewSDFileRef</span> = \<span class="i">*NEWSDFILE</span><span class="sc">;</span> 323 <span class="s">}</span> 324 325 <span class="k">if</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">TextOutput</span>}<span class="s">)</span> <span class="s">{</span> 326 <span class="i">$NewTextFile</span> = <span class="i">$OutputFilesInfo</span>{<span class="w">TextOutFileName</span>}<span class="sc">;</span> 327 <span class="k">print</span> <span class="q">"Generating text file $NewTextFile...\n"</span><span class="sc">;</span> 328 <span class="k">open</span> <span class="w">NEWTEXTFILE</span><span class="cm">,</span> <span class="q">">$NewTextFile"</span> <span class="k">or</span> <span class="k">die</span> <span class="q">"Error: Couldn't open $NewTextFile: $! \n"</span><span class="sc">;</span> 329 <span class="i">$NewTextFileRef</span> = \<span class="i">*NEWTEXTFILE</span><span class="sc">;</span> 330 331 <span class="i">WriteTextFileCoulmnLabels</span><span class="s">(</span>\<span class="i">*NEWTEXTFILE</span><span class="s">)</span><span class="sc">;</span> 332 <span class="s">}</span> 333 334 <span class="k">return</span> <span class="s">(</span><span class="i">$NewSDFileRef</span><span class="cm">,</span> <span class="i">$NewTextFileRef</span><span class="s">)</span><span class="sc">;</span> 335 <span class="s">}</span> 336 337 <span class="c"># Write out approriate column labels to text file...</span> 338 <span class="c">#</span> <a name="WriteTextFileCoulmnLabels-"></a> 339 <span class="k">sub </span><span class="m">WriteTextFileCoulmnLabels</span> <span class="s">{</span> 340 <span class="k">my</span><span class="s">(</span><span class="i">$NewTextFileRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 341 <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="cm">,</span> <span class="i">@LineWords</span><span class="s">)</span><span class="sc">;</span> 342 343 <span class="i">@LineWords</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 344 345 <span class="k">if</span> <span class="s">(</span><span class="i">$SimilaritySearchInfo</span>{<span class="w">IndividualReferenceMode</span>}<span class="s">)</span> <span class="s">{</span> 346 <span class="k">push</span> <span class="i">@LineWords</span><span class="cm">,</span> <span class="q">qw(ReferenceCompoundID DatabaseCompoundID ComparisonValue)</span><span class="sc">;</span> 347 <span class="s">}</span> 348 <span class="k">elsif</span> <span class="s">(</span><span class="i">$SimilaritySearchInfo</span>{<span class="w">MultipleReferencesMode</span>}<span class="s">)</span> <span class="s">{</span> 349 <span class="k">push</span> <span class="i">@LineWords</span><span class="cm">,</span> <span class="q">qw(DatabaseCompoundID ComparisonValue)</span><span class="sc">;</span> 350 <span class="s">}</span> 351 352 <span class="c"># Add columns for other database fingerprints file data to be written to output file...</span> 353 <span class="k">if</span> <span class="s">(</span><span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">OutputDataFields</span>}<span class="s">)</span> <span class="s">{</span> 354 <span class="k">push</span> <span class="i">@LineWords</span><span class="cm">,</span> <span class="i">@</span>{<span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">DataFieldsToOutput</span>}}<span class="sc">;</span> 355 <span class="s">}</span> 356 <span class="k">elsif</span> <span class="s">(</span><span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">OutputDataCols</span>}<span class="s">)</span> <span class="s">{</span> 357 <span class="k">push</span> <span class="i">@LineWords</span><span class="cm">,</span> <span class="i">@</span>{<span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">DataColLabelsToOutput</span>}}<span class="sc">;</span> 358 <span class="s">}</span> 359 360 <span class="i">$Line</span> = <span class="i">JoinWords</span><span class="s">(</span>\<span class="i">@LineWords</span><span class="cm">,</span> <span class="i">$OptionsInfo</span>{<span class="w">OutDelim</span>}<span class="cm">,</span> <span class="i">$OptionsInfo</span>{<span class="w">OutQuote</span>}<span class="s">)</span><span class="sc">;</span> 361 <span class="k">print</span> <span class="i">$NewTextFileRef</span> <span class="q">"$Line\n"</span><span class="sc">;</span> 362 <span class="s">}</span> 363 364 <span class="c"># Write molecule string data to SD output file...</span> 365 <span class="c">#</span> <a name="WriteMolStringDataToSDOutputFile-"></a> 366 <span class="k">sub </span><span class="m">WriteMolStringDataToSDOutputFile</span> <span class="s">{</span> 367 <span class="k">my</span><span class="s">(</span><span class="i">$DatabaseCmpdID</span><span class="cm">,</span> <span class="i">$NewSDFileRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 368 369 <span class="k">if</span> <span class="s">(</span><span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">CollectCmpdStringData</span>}<span class="s">)</span> <span class="s">{</span> 370 <span class="k">my</span><span class="s">(</span><span class="i">$MolString</span><span class="s">)</span><span class="sc">;</span> 371 372 <span class="s">(</span><span class="i">$MolString</span><span class="s">)</span> = <span class="k">split</span> <span class="q">/M END/</span><span class="cm">,</span> <span class="i">$DatabaseFingerprintsFileData</span>{<span class="i">$DatabaseCmpdID</span>}<span class="sc">;</span> 373 <span class="k">print</span> <span class="i">$NewSDFileRef</span> <span class="q">"$MolString\nM END\n"</span><span class="sc">;</span> 374 <span class="s">}</span> 375 <span class="k">else</span> <span class="s">{</span> 376 <span class="c"># Just write out an empty molecule data string...</span> 377 <span class="k">print</span> <span class="i">$NewSDFileRef</span> <span class="i">SDFileUtil::GenerateEmptyCtabBlockLines</span><span class="s">(</span><span class="s">)</span><span class="cm">,</span> <span class="q">"\n"</span><span class="sc">;</span> 378 <span class="s">}</span> 379 <span class="s">}</span> 380 381 <span class="c"># Write database data from SD or Text database file to SD output file...</span> 382 <span class="c">#</span> <a name="WriteDatabaseDataToSDOutputFile-"></a> 383 <span class="k">sub </span><span class="m">WriteDatabaseDataToSDOutputFile</span> <span class="s">{</span> 384 <span class="k">my</span><span class="s">(</span><span class="i">$DatabaseCmpdID</span><span class="cm">,</span> <span class="i">$NewSDFileRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 385 386 <span class="k">if</span> <span class="s">(</span><span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">OutputDataFields</span>}<span class="s">)</span> <span class="s">{</span> 387 <span class="k">my</span><span class="s">(</span><span class="i">$DataFieldLabel</span><span class="cm">,</span> <span class="i">$DataFieldValue</span><span class="cm">,</span> <span class="i">@CmpdLines</span><span class="cm">,</span> <span class="i">%DataFieldLabelAndValues</span><span class="s">)</span><span class="sc">;</span> 388 389 <span class="i">@CmpdLines</span> = <span class="k">split</span> <span class="q">/\n/</span><span class="cm">,</span> <span class="i">$DatabaseFingerprintsFileData</span>{<span class="i">$DatabaseCmpdID</span>}<span class="sc">;</span> 390 <span class="i">%DataFieldLabelAndValues</span> = <span class="i">GetCmpdDataHeaderLabelsAndValues</span><span class="s">(</span>\<span class="i">@CmpdLines</span><span class="s">)</span><span class="sc">;</span> 391 392 <span class="k">for</span> <span class="i">$DataFieldLabel</span> <span class="s">(</span><span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">OutputCurrentDataFields</span>} ? <span class="i">GetCmpdDataHeaderLabels</span><span class="s">(</span>\<span class="i">@CmpdLines</span><span class="s">)</span> <span class="co">:</span> <span class="i">@</span>{<span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">DataFieldsToOutput</span>}}<span class="s">)</span> <span class="s">{</span> 393 <span class="i">$DataFieldValue</span> = <span class="k">exists</span> <span class="i">$DataFieldLabelAndValues</span>{<span class="i">$DataFieldLabel</span>} ? <span class="i">$DataFieldLabelAndValues</span>{<span class="i">$DataFieldLabel</span>} <span class="co">:</span> <span class="q">''</span><span class="sc">;</span> 394 <span class="k">print</span> <span class="i">$NewSDFileRef</span> <span class="q">"> <$DataFieldLabel>\n$DataFieldValue\n\n"</span><span class="sc">;</span> 395 <span class="s">}</span> 396 <span class="s">}</span> 397 <span class="k">elsif</span> <span class="s">(</span><span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">OutputDataCols</span>}<span class="s">)</span> <span class="s">{</span> 398 <span class="k">my</span><span class="s">(</span><span class="i">$DataColNum</span><span class="cm">,</span> <span class="i">$DataFieldLabel</span><span class="cm">,</span> <span class="i">$DataFieldValue</span><span class="s">)</span><span class="sc">;</span> 399 400 <span class="k">for</span> <span class="i">$DataColNum</span> <span class="s">(</span><span class="i">@</span>{<span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">DataColNumsToOutput</span>}}<span class="s">)</span> <span class="s">{</span> 401 <span class="i">$DataFieldLabel</span> = <span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">DataColNumToLabelMap</span>}{<span class="i">$DataColNum</span>}<span class="sc">;</span> 402 <span class="i">$DataFieldValue</span> = <span class="i">$DatabaseFingerprintsFileData</span>{<span class="i">$DatabaseCmpdID</span>}->[<span class="i">$DataColNum</span>]<span class="sc">;</span> 403 <span class="k">print</span> <span class="i">$NewSDFileRef</span> <span class="q">"> <$DataFieldLabel>\n$DataFieldValue\n\n"</span><span class="sc">;</span> 404 <span class="s">}</span> 405 <span class="s">}</span> 406 <span class="s">}</span> 407 408 <span class="c"># Retriebe database data from SD or Text database file for text output file...</span> 409 <span class="c">#</span> <a name="RetrieveDatabaseDataForTextOutputFile-"></a> 410 <span class="k">sub </span><span class="m">RetrieveDatabaseDataForTextOutputFile</span> <span class="s">{</span> 411 <span class="k">my</span><span class="s">(</span><span class="i">$DatabaseCmpdID</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 412 413 <span class="k">if</span> <span class="s">(</span><span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">OutputDataFields</span>}<span class="s">)</span> <span class="s">{</span> 414 <span class="k">my</span><span class="s">(</span><span class="i">@CmpdLines</span><span class="cm">,</span> <span class="i">%DataFieldLabelAndValues</span><span class="s">)</span><span class="sc">;</span> 415 416 <span class="i">@CmpdLines</span> = <span class="k">split</span> <span class="q">/\n/</span><span class="cm">,</span> <span class="i">$DatabaseFingerprintsFileData</span>{<span class="i">$DatabaseCmpdID</span>}<span class="sc">;</span> 417 <span class="i">%DataFieldLabelAndValues</span> = <span class="i">GetCmpdDataHeaderLabelsAndValues</span><span class="s">(</span>\<span class="i">@CmpdLines</span><span class="s">)</span><span class="sc">;</span> 418 419 <span class="k">return</span> <span class="k">map</span> <span class="s">{</span> <span class="k">exists</span> <span class="i">$DataFieldLabelAndValues</span>{<span class="i">$_</span>} ? <span class="i">$DataFieldLabelAndValues</span>{<span class="i">$_</span>} <span class="co">:</span> <span class="q">''</span><span class="s">}</span> <span class="i">@</span>{<span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">DataFieldsToOutput</span>}}<span class="sc">;</span> 420 <span class="s">}</span> 421 <span class="k">elsif</span> <span class="s">(</span><span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">OutputDataCols</span>}<span class="s">)</span> <span class="s">{</span> 422 <span class="k">if</span> <span class="s">(</span><span class="k">exists</span> <span class="i">$DatabaseFingerprintsFileData</span>{<span class="i">$DatabaseCmpdID</span>}<span class="s">)</span> <span class="s">{</span> 423 <span class="k">return</span> <span class="k">map</span> <span class="s">{</span> <span class="i">$DatabaseFingerprintsFileData</span>{<span class="i">$DatabaseCmpdID</span>}->[<span class="i">$_</span>] <span class="s">}</span> <span class="s">(</span><span class="n">0</span> .. <span class="i">$#</span>{<span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">DataColNumsToOutput</span>}}<span class="s">)</span><span class="sc">;</span> 424 <span class="s">}</span> 425 <span class="k">else</span> <span class="s">{</span> 426 <span class="k">return</span> <span class="s">(</span><span class="q">''</span><span class="s">)</span> x <span class="i">$#</span>{<span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">DataColNumsToOutput</span>}}<span class="sc">;</span> 427 <span class="s">}</span> 428 <span class="s">}</span> 429 <span class="s">}</span> 430 431 <span class="c"># Collect database file SD compound string or CSV/TSV data line for generating results</span> 432 <span class="c"># files..</span> 433 <span class="c">#</span> <a name="CollectDatabaseFileData-"></a> 434 <span class="k">sub </span><span class="m">CollectDatabaseFileData</span> <span class="s">{</span> 435 <span class="k">my</span><span class="s">(</span><span class="i">$DatabaseCmpdID</span><span class="cm">,</span> <span class="i">$DatabaseFingerprintsFileIO</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 436 437 <span class="k">if</span> <span class="s">(</span><span class="k">exists</span> <span class="i">$DatabaseFingerprintsFileData</span>{<span class="i">$DatabaseCmpdID</span>}<span class="s">)</span> <span class="s">{</span> 438 <span class="k">return</span><span class="sc">;</span> 439 <span class="s">}</span> 440 441 <span class="k">if</span> <span class="s">(</span><span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">CollectCmpdStringData</span>}<span class="s">)</span> <span class="s">{</span> 442 <span class="i">$DatabaseFingerprintsFileData</span>{<span class="i">$DatabaseCmpdID</span>} = <span class="i">$DatabaseFingerprintsFileIO</span><span class="i">->GetCompoundString</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> 443 <span class="s">}</span> 444 445 <span class="k">if</span> <span class="s">(</span><span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">CollectDataLine</span>}<span class="s">)</span> <span class="s">{</span> 446 <span class="k">my</span><span class="s">(</span><span class="i">@DataLineWords</span><span class="s">)</span><span class="sc">;</span> 447 <span class="i">@DataLineWords</span> = <span class="i">$DatabaseFingerprintsFileIO</span><span class="i">->GetDataLineWords</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> 448 <span class="i">$DatabaseFingerprintsFileData</span>{<span class="i">$DatabaseCmpdID</span>} = \<span class="i">@DataLineWords</span><span class="sc">;</span> 449 <span class="s">}</span> 450 451 <span class="s">}</span> 452 453 <span class="c"># Read fingerprints data from reference fingerprints file...</span> 454 <span class="c">#</span> <a name="ReadReferenceFingerprintsData-"></a> 455 <span class="k">sub </span><span class="m">ReadReferenceFingerprintsData</span> <span class="s">{</span> 456 <span class="k">my</span><span class="s">(</span><span class="i">$FingerprintsFileIO</span><span class="s">)</span><span class="sc">;</span> 457 458 <span class="i">$FingerprintsFileIO</span> = <span class="i">Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO</span><span class="s">(</span><span class="i">%</span>{<span class="i">$FingerprintsFilesInfo</span>{<span class="w">Reference</span>}{<span class="w">FingerprintsFileIOParameters</span>}}<span class="s">)</span><span class="sc">;</span> 459 <span class="s">(</span><span class="i">$SimilaritySearchInfo</span>{<span class="w">ReferenceCmpdIDsRef</span>}<span class="cm">,</span> <span class="i">$SimilaritySearchInfo</span>{<span class="w">ReferenceFingerprintsObjectsRef</span>}<span class="s">)</span> = <span class="i">Fingerprints::FingerprintsFileUtil::ReadAndProcessFingerpritsData</span><span class="s">(</span><span class="i">$FingerprintsFileIO</span><span class="s">)</span><span class="sc">;</span> 460 461 <span class="s">}</span> 462 463 <span class="c"># Retrieve information about fingerprints files...</span> 464 <span class="c">#</span> <a name="RetrieveFingerprintsFilesInfo-"></a> 465 <span class="k">sub </span><span class="m">RetrieveFingerprintsFilesInfo</span> <span class="s">{</span> 466 467 <span class="i">%FingerprintsFilesInfo</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 468 <span class="i">%OutputFilesInfo</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 469 <span class="i">%SimilaritySearchInfo</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 470 471 <span class="i">%</span>{<span class="i">$FingerprintsFilesInfo</span>{<span class="w">Reference</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 472 <span class="i">%</span>{<span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 473 474 <span class="c"># Set up reference and database file names...</span> 475 <span class="i">$FingerprintsFilesInfo</span>{<span class="w">Reference</span>}{<span class="w">FileName</span>} = <span class="i">$FingerprintsFilesList</span>[<span class="n">0</span>]<span class="sc">;</span> 476 <span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">FileName</span>} = <span class="i">$FingerprintsFilesList</span>[<span class="n">1</span>]<span class="sc">;</span> 477 478 <span class="c"># Retrieve information about reference and database fingerprints file...</span> 479 <span class="i">RetrieveReferenceFingerprintsFileInfo</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> 480 <span class="i">RetrieveDatabaseFingerprintsFileInfo</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> 481 482 <span class="c"># Setup fingerprints comparison method and associated method parameters...</span> 483 <span class="i">SetupReferenceAndDatabaseFingerprintsComparisonInfo</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> 484 485 <span class="c"># Retrieve information for output files...</span> 486 <span class="i">RetrieveOutputFilesInfo</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> 487 <span class="s">}</span> 488 489 <span class="c"># Setup refrerence and database fingerprints comparison method and associated method parameters...</span> 490 <span class="c">#</span> <a name="SetupReferenceAndDatabaseFingerprintsComparisonInfo-"></a> 491 <span class="k">sub </span><span class="m">SetupReferenceAndDatabaseFingerprintsComparisonInfo</span> <span class="s">{</span> 492 493 <span class="c"># Make sure reference and database fingerprints string match...</span> 494 <span class="k">if</span> <span class="s">(</span><span class="s">(</span><span class="i">$FingerprintsFilesInfo</span>{<span class="w">Reference</span>}{<span class="w">FirstFingerprintsStringType</span>} !~ <span class="q">/^$FingerprintsFilesInfo{Database}{FirstFingerprintsStringType}$/i</span><span class="s">)</span> || 495 <span class="s">(</span><span class="i">$FingerprintsFilesInfo</span>{<span class="w">Reference</span>}{<span class="w">FingerprintsBitVectorStringMode</span>} != <span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">FingerprintsBitVectorStringMode</span>}<span class="s">)</span> || 496 <span class="s">(</span><span class="i">$FingerprintsFilesInfo</span>{<span class="w">Reference</span>}{<span class="w">FingerprintsVectorStringMode</span>} != <span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">FingerprintsVectorStringMode</span>}<span class="s">)</span> <span class="s">)</span> <span class="s">{</span> 497 <span class="k">die</span> <span class="q">"Error: First reference fingerprints string type, $FingerprintsFilesInfo{Reference}{FirstFingerprintsStringType}, must match first database fingerprints type, $FingerprintsFilesInfo{Database}{FirstFingerprintsStringType}.\n"</span><span class="sc">;</span> 498 <span class="s">}</span> 499 500 <span class="k">if</span> <span class="s">(</span><span class="i">$FingerprintsFilesInfo</span>{<span class="w">Reference</span>}{<span class="w">FirstFingerprintsStringDescription</span>} !~ <span class="q">/^$FingerprintsFilesInfo{Database}{FirstFingerprintsStringDescription}$/i</span><span class="s">)</span> <span class="s">{</span> 501 <span class="k">warn</span> <span class="q">"Warning: First reference fingerprints string description, $FingerprintsFilesInfo{Reference}{FirstFingerprintsStringDescription}, doesn't match first database fingerprints string description, $FingerprintsFilesInfo{Database}{FirstFingerprintsStringDescription}.\n"</span><span class="sc">;</span> 502 <span class="s">}</span> 503 504 <span class="c"># Setup individual reference and multiple references search mode...</span> 505 <span class="i">$SimilaritySearchInfo</span>{<span class="w">IndividualReferenceMode</span>} = <span class="k">undef</span><span class="sc">;</span> 506 <span class="i">$SimilaritySearchInfo</span>{<span class="w">MultipleReferencesMode</span>} = <span class="k">undef</span><span class="sc">;</span> 507 508 <span class="k">if</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">Mode</span>} =~ <span class="q">/^IndividualReference$/i</span><span class="s">)</span> <span class="s">{</span> 509 <span class="i">$SimilaritySearchInfo</span>{<span class="w">IndividualReferenceMode</span>} = <span class="n">1</span><span class="sc">;</span> 510 <span class="s">}</span> 511 <span class="k">elsif</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">Mode</span>} =~ <span class="q">/^MultipleReferences$/i</span><span class="s">)</span> <span class="s">{</span> 512 <span class="i">$SimilaritySearchInfo</span>{<span class="w">MultipleReferencesMode</span>} = <span class="n">1</span><span class="sc">;</span> 513 <span class="s">}</span> 514 <span class="k">else</span> <span class="s">{</span> 515 <span class="k">die</span> <span class="q">"Error: The value specified, $Options{mode}, for option \"-m, --mode\" is not valid. Allowed values: IndividualReference, MultipleReferences\n"</span><span class="sc">;</span> 516 <span class="s">}</span> 517 518 <span class="c"># Set up reference and database fingerprints similarity search method and paramaters...</span> 519 <span class="k">my</span><span class="s">(</span><span class="i">$ComparisonMeasure</span><span class="cm">,</span> <span class="i">$ComparisonMethod</span><span class="cm">,</span> <span class="i">$ApplyComparisonCutoff</span><span class="cm">,</span> <span class="i">$ComparisonCutoff</span><span class="cm">,</span> <span class="i">$KeepTop</span><span class="cm">,</span> <span class="i">@ComparisonMethodParameters</span><span class="s">)</span><span class="sc">;</span> 520 521 <span class="i">$SimilaritySearchInfo</span>{<span class="w">ComparisonMethod</span>} = <span class="q">''</span><span class="sc">;</span> 522 <span class="i">@</span>{<span class="i">$SimilaritySearchInfo</span>{<span class="w">ComparisonMethodParameters</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 523 524 <span class="i">$SimilaritySearchInfo</span>{<span class="w">ComparisonCutoff</span>} = <span class="q">''</span><span class="sc">;</span> 525 <span class="i">$SimilaritySearchInfo</span>{<span class="w">KeepTop</span>} = <span class="q">''</span><span class="sc">;</span> 526 527 <span class="i">$ComparisonMeasure</span> = <span class="q">''</span><span class="sc">;</span> <span class="i">$ComparisonMethod</span> = <span class="q">''</span><span class="sc">;</span> 528 <span class="i">@ComparisonMethodParameters</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 529 530 <span class="j">FINGERPRINTSTYPE:</span> <span class="s">{</span> 531 <span class="k">if</span> <span class="s">(</span><span class="i">$FingerprintsFilesInfo</span>{<span class="w">Reference</span>}{<span class="w">FingerprintsBitVectorStringMode</span>}<span class="s">)</span> <span class="s">{</span> 532 <span class="i">$ComparisonMeasure</span> = <span class="i">$OptionsInfo</span>{<span class="w">SpecifiedBitVectorComparisonMeasure</span>}<span class="sc">;</span> 533 <span class="i">$ComparisonMethod</span> = <span class="i">$OptionsInfo</span>{<span class="w">SpecifiedBitVectorComparisonMeasureMethod</span>}<span class="sc">;</span> 534 535 <span class="k">if</span> <span class="s">(</span><span class="i">$ComparisonMeasure</span> =~ <span class="q">/^TverskySimilarity$/i</span><span class="s">)</span> <span class="s">{</span> 536 <span class="k">push</span> <span class="i">@ComparisonMethodParameters</span><span class="cm">,</span> <span class="i">$OptionsInfo</span>{<span class="w">Alpha</span>}<span class="sc">;</span> 537 <span class="s">}</span> 538 <span class="k">elsif</span> <span class="s">(</span><span class="i">$ComparisonMeasure</span> =~ <span class="q">/^WeightedTverskySimilarity$/i</span><span class="s">)</span> <span class="s">{</span> 539 <span class="k">push</span> <span class="i">@ComparisonMethodParameters</span><span class="cm">,</span> <span class="i">$OptionsInfo</span>{<span class="w">Alpha</span>}<span class="sc">;</span> 540 <span class="k">push</span> <span class="i">@ComparisonMethodParameters</span><span class="cm">,</span> <span class="i">$OptionsInfo</span>{<span class="w">Beta</span>}<span class="sc">;</span> 541 <span class="s">}</span> 542 <span class="k">elsif</span> <span class="s">(</span><span class="i">$ComparisonMeasure</span> =~ <span class="q">/^WeightedTanimotoSimilarity$/i</span><span class="s">)</span> <span class="s">{</span> 543 <span class="k">push</span> <span class="i">@ComparisonMethodParameters</span><span class="cm">,</span> <span class="i">$OptionsInfo</span>{<span class="w">Beta</span>}<span class="sc">;</span> 544 <span class="s">}</span> 545 546 <span class="k">last</span> <span class="j">FINGERPRINTSTYPE</span><span class="sc">;</span> 547 <span class="s">}</span> 548 <span class="k">if</span> <span class="s">(</span><span class="i">$FingerprintsFilesInfo</span>{<span class="w">Reference</span>}{<span class="w">FingerprintsVectorStringMode</span>}<span class="s">)</span> <span class="s">{</span> 549 <span class="k">my</span><span class="s">(</span><span class="i">$SkipValuesCheck</span><span class="s">)</span><span class="sc">;</span> 550 551 <span class="i">$ComparisonMeasure</span> = <span class="i">$OptionsInfo</span>{<span class="w">SpecifiedVectorComparisonMeasure</span>}<span class="sc">;</span> 552 <span class="i">$ComparisonMethod</span> = <span class="i">$OptionsInfo</span>{<span class="w">SpecifiedVectorComparisonMeasuresMethod</span>}<span class="sc">;</span> 553 554 <span class="k">push</span> <span class="i">@ComparisonMethodParameters</span><span class="cm">,</span> <span class="i">$OptionsInfo</span>{<span class="w">SpecifiedVectorComparisonMode</span>}<span class="sc">;</span> 555 556 <span class="i">$SkipValuesCheck</span> = <span class="i">$OptionsInfo</span>{<span class="w">Fast</span>} ? <span class="n">1</span> <span class="co">:</span> <span class="n">0</span><span class="sc">;</span> 557 <span class="k">push</span> <span class="i">@ComparisonMethodParameters</span><span class="cm">,</span> <span class="i">$SkipValuesCheck</span><span class="sc">;</span> 558 559 <span class="k">last</span> <span class="j">FINGERPRINTSTYPE</span><span class="sc">;</span> 560 <span class="s">}</span> 561 <span class="k">die</span> <span class="q">"Error: Uknown fingerprints string type. Supported values: FingerprintsBitVectorString or FingerprintsVectorString.\n"</span><span class="sc">;</span> 562 <span class="s">}</span> 563 564 <span class="i">$ApplyComparisonCutoff</span> = <span class="i">$SimilaritySearchInfo</span>{<span class="w">IndividualReferenceMode</span>} ? <span class="n">1</span> <span class="co">:</span> <span class="s">(</span><span class="s">(</span><span class="i">$SimilaritySearchInfo</span>{<span class="w">MultipleReferencesMode</span>} && <span class="i">$OptionsInfo</span>{<span class="w">GroupFusionApplyCutoff</span>}<span class="s">)</span> ? <span class="n">1</span> <span class="co">:</span> <span class="n">0</span><span class="s">)</span><span class="sc">;</span> 565 566 <span class="i">$ComparisonCutoff</span> = <span class="q">''</span><span class="sc">;</span> <span class="i">$KeepTop</span> = <span class="q">''</span><span class="sc">;</span> 567 <span class="k">if</span> <span class="s">(</span><span class="i">$ComparisonMethod</span> =~ <span class="q">/Distance/i</span><span class="s">)</span> <span class="s">{</span> 568 <span class="i">$ComparisonCutoff</span> = <span class="i">$OptionsInfo</span>{<span class="w">DistanceCutoff</span>}<span class="sc">;</span> 569 <span class="i">$KeepTop</span> = <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">SearchMode</span>} =~ <span class="q">/^SimilaritySearch$/i</span><span class="s">)</span> ? <span class="n">0</span> <span class="co">:</span> <span class="n">1</span><span class="sc">;</span> 570 <span class="s">}</span> 571 <span class="k">else</span> <span class="s">{</span> 572 <span class="i">$ComparisonCutoff</span> = <span class="i">$OptionsInfo</span>{<span class="w">SimilarityCutoff</span>}<span class="sc">;</span> 573 <span class="i">$KeepTop</span> = <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">SearchMode</span>} =~ <span class="q">/^SimilaritySearch$/i</span><span class="s">)</span> ? <span class="n">1</span> <span class="co">:</span> <span class="n">0</span><span class="sc">;</span> 574 <span class="s">}</span> 575 576 <span class="i">$SimilaritySearchInfo</span>{<span class="w">ComparisonMethod</span>} = <span class="i">$ComparisonMethod</span><span class="sc">;</span> 577 <span class="i">@</span>{<span class="i">$SimilaritySearchInfo</span>{<span class="w">ComparisonMethodParameters</span>}} = <span class="i">@ComparisonMethodParameters</span><span class="sc">;</span> 578 579 <span class="i">$SimilaritySearchInfo</span>{<span class="w">ComparisonCutoff</span>} = <span class="i">$ComparisonCutoff</span><span class="sc">;</span> 580 <span class="i">$SimilaritySearchInfo</span>{<span class="w">KeepTop</span>} = <span class="i">$KeepTop</span><span class="sc">;</span> 581 <span class="i">$SimilaritySearchInfo</span>{<span class="w">ApplyComparisonCutoff</span>} = <span class="i">$ApplyComparisonCutoff</span><span class="sc">;</span> 582 583 <span class="c"># Setup references to group fusion methods...</span> 584 <span class="i">$SimilaritySearchInfo</span>{<span class="w">GroupFusionMethodRef</span>} = <span class="k">undef</span><span class="sc">;</span> 585 <span class="i">$SimilaritySearchInfo</span>{<span class="w">ApplyPrecisionDuringFusion</span>} = <span class="k">undef</span><span class="sc">;</span> 586 587 <span class="j">FUSIONRULE:</span> <span class="s">{</span> 588 <span class="k">if</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">GroupFusionRule</span>} =~ <span class="q">/^Max$/i</span><span class="s">)</span> <span class="s">{</span> 589 <span class="c"># It's always the first value in the appropriated sorted list using value of KeepTop...</span> 590 <span class="i">$SimilaritySearchInfo</span>{<span class="w">GroupFusionMethodRef</span>} = <span class="k">sub</span> <span class="s">{</span> <span class="k">my</span><span class="s">(</span><span class="i">$ComparisonValuesRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> <span class="k">return</span> <span class="i">$ComparisonValuesRef</span>->[<span class="n">0</span>]<span class="sc">;</span> <span class="s">}</span><span class="sc">;</span> 591 <span class="k">last</span> <span class="j">FUSIONRULE</span><span class="sc">;</span> 592 <span class="s">}</span> 593 <span class="k">if</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">GroupFusionRule</span>} =~ <span class="q">/^Min$/i</span><span class="s">)</span> <span class="s">{</span> 594 <span class="c"># It's always the last value in the appropriated sorted list using value of KeepTop...</span> 595 <span class="i">$SimilaritySearchInfo</span>{<span class="w">GroupFusionMethodRef</span>} = <span class="k">sub</span> <span class="s">{</span> <span class="k">my</span><span class="s">(</span><span class="i">$ComparisonValuesRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> <span class="k">return</span> <span class="i">$ComparisonValuesRef</span>->[<span class="i">$#</span>{<span class="i">$ComparisonValuesRef</span>}]<span class="sc">;</span> <span class="s">}</span><span class="sc">;</span> 596 <span class="k">last</span> <span class="j">FUSIONRULE</span><span class="sc">;</span> 597 <span class="s">}</span> 598 <span class="k">if</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">GroupFusionRule</span>} =~ <span class="q">/^Mean$/i</span><span class="s">)</span> <span class="s">{</span> 599 <span class="i">$SimilaritySearchInfo</span>{<span class="w">GroupFusionMethodRef</span>} = \<span class="i">&StatisticsUtil::Mean</span><span class="sc">;</span> 600 <span class="i">$SimilaritySearchInfo</span>{<span class="w">ApplyPrecisionDuringFusion</span>} = <span class="n">1</span><span class="sc">;</span> 601 <span class="k">last</span> <span class="j">FUSIONRULE</span><span class="sc">;</span> 602 <span class="s">}</span> 603 <span class="k">if</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">GroupFusionRule</span>} =~ <span class="q">/^Median$/i</span><span class="s">)</span> <span class="s">{</span> 604 <span class="i">$SimilaritySearchInfo</span>{<span class="w">GroupFusionMethodRef</span>} = \<span class="i">&StatisticsUtil::Median</span><span class="sc">;</span> 605 <span class="i">$SimilaritySearchInfo</span>{<span class="w">ApplyPrecisionDuringFusion</span>} = <span class="n">1</span><span class="sc">;</span> 606 <span class="k">last</span> <span class="j">FUSIONRULE</span><span class="sc">;</span> 607 <span class="s">}</span> 608 <span class="k">if</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">GroupFusionRule</span>} =~ <span class="q">/^Sum$/i</span><span class="s">)</span> <span class="s">{</span> 609 <span class="i">$SimilaritySearchInfo</span>{<span class="w">GroupFusionMethodRef</span>} = \<span class="i">&StatisticsUtil::Sum</span><span class="sc">;</span> 610 <span class="i">$SimilaritySearchInfo</span>{<span class="w">ApplyPrecisionDuringFusion</span>} = <span class="n">1</span><span class="sc">;</span> 611 <span class="k">last</span> <span class="j">FUSIONRULE</span><span class="sc">;</span> 612 <span class="s">}</span> 613 <span class="k">if</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">GroupFusionRule</span>} =~ <span class="q">/^Euclidean$/i</span><span class="s">)</span> <span class="s">{</span> 614 <span class="i">$SimilaritySearchInfo</span>{<span class="w">GroupFusionMethodRef</span>} = \<span class="i">&StatisticsUtil::Euclidean</span><span class="sc">;</span> 615 <span class="i">$SimilaritySearchInfo</span>{<span class="w">ApplyPrecisionDuringFusion</span>} = <span class="n">1</span><span class="sc">;</span> 616 <span class="k">last</span> <span class="j">FUSIONRULE</span><span class="sc">;</span> 617 <span class="s">}</span> 618 <span class="k">die</span> <span class="q">"Error: The value specified, $Options{groupfusionrule}, for option \"-g, --GroupFusionRule\" is not valid. Allowed values: Max, Min, Mean, Median, Sum, Euclidean\n"</span><span class="sc">;</span> 619 <span class="s">}</span> 620 621 <span class="i">$SimilaritySearchInfo</span>{<span class="w">UsekNN</span>} = <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">kNN</span>} !~ <span class="q">/^All$/i</span><span class="s">)</span> ? <span class="n">1</span> <span class="co">:</span> <span class="n">0</span><span class="sc">;</span> 622 <span class="i">$SimilaritySearchInfo</span>{<span class="w">SortComparisonValues</span>} = <span class="s">(</span><span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">GroupFusionRule</span>} =~ <span class="q">/^(Max|Min)$/i</span><span class="s">)</span> || <span class="i">$SimilaritySearchInfo</span>{<span class="w">UsekNN</span>}<span class="s">)</span> ? <span class="n">1</span> <span class="co">:</span> <span class="n">0</span><span class="sc">;</span> 623 <span class="s">}</span> 624 625 <span class="c"># Retrieve information about reference fingerprints file...</span> 626 <span class="c">#</span> <a name="RetrieveReferenceFingerprintsFileInfo-"></a> 627 <span class="k">sub </span><span class="m">RetrieveReferenceFingerprintsFileInfo</span> <span class="s">{</span> 628 <span class="k">my</span><span class="s">(</span><span class="i">$FingerprintsFile</span><span class="cm">,</span> <span class="i">$FileType</span><span class="cm">,</span> <span class="i">$InDelim</span><span class="cm">,</span> <span class="i">$FingerprintsFileIO</span><span class="cm">,</span> <span class="i">$FingerprintsStringMode</span><span class="cm">,</span> <span class="i">$FingerprintsBitVectorStringMode</span><span class="cm">,</span> <span class="i">$FingerprintsVectorStringMode</span><span class="cm">,</span> <span class="i">$FirstFingerprintsStringType</span><span class="cm">,</span> <span class="i">$FirstFingerprintsStringDescription</span><span class="s">)</span><span class="sc">;</span> 629 630 <span class="i">$FingerprintsFile</span> = <span class="i">$FingerprintsFilesInfo</span>{<span class="w">Reference</span>}{<span class="w">FileName</span>}<span class="sc">;</span> 631 <span class="s">(</span><span class="i">$FileType</span><span class="cm">,</span> <span class="i">$InDelim</span><span class="s">)</span> = <span class="i">RetrieveFingerprintsFileInfo</span><span class="s">(</span><span class="i">$FingerprintsFile</span><span class="s">)</span><span class="sc">;</span> 632 633 <span class="i">$FingerprintsFilesInfo</span>{<span class="w">Reference</span>}{<span class="w">FileType</span>} = <span class="i">$FileType</span><span class="sc">;</span> 634 <span class="i">$FingerprintsFilesInfo</span>{<span class="w">Reference</span>}{<span class="w">InDelim</span>} = <span class="i">$InDelim</span><span class="sc">;</span> 635 636 <span class="c"># Setup reference FingerprintsFileIO parameters...</span> 637 <span class="i">%</span>{<span class="i">$FingerprintsFilesInfo</span>{<span class="w">Reference</span>}{<span class="w">FingerprintsFileIOParameters</span>}} = <span class="i">RetrieveFingerprintsFileIOParameters</span><span class="s">(</span><span class="q">'Reference'</span><span class="cm">,</span> <span class="i">$FileType</span><span class="cm">,</span> <span class="i">$FingerprintsFile</span><span class="s">)</span><span class="sc">;</span> 638 639 <span class="c"># Make sure reference fingerprints data file contains valid and retrieve fingerprints string mode information...</span> 640 <span class="s">(</span><span class="i">$FingerprintsStringMode</span><span class="cm">,</span> <span class="i">$FingerprintsBitVectorStringMode</span><span class="cm">,</span> <span class="i">$FingerprintsVectorStringMode</span><span class="cm">,</span> <span class="i">$FirstFingerprintsStringType</span><span class="cm">,</span> <span class="i">$FirstFingerprintsStringDescription</span><span class="s">)</span> = <span class="i">RetrieveFingerprintsFileFingerprintsStringInfo</span><span class="s">(</span><span class="q">'Reference'</span><span class="cm">,</span> <span class="i">$FingerprintsFile</span><span class="s">)</span><span class="sc">;</span> 641 <span class="i">$FingerprintsFilesInfo</span>{<span class="w">Reference</span>}{<span class="w">FingerprintsStringMode</span>} = <span class="i">$FingerprintsStringMode</span><span class="sc">;</span> 642 <span class="i">$FingerprintsFilesInfo</span>{<span class="w">Reference</span>}{<span class="w">FingerprintsBitVectorStringMode</span>} = <span class="i">$FingerprintsBitVectorStringMode</span><span class="sc">;</span> 643 <span class="i">$FingerprintsFilesInfo</span>{<span class="w">Reference</span>}{<span class="w">FingerprintsVectorStringMode</span>} = <span class="i">$FingerprintsVectorStringMode</span><span class="sc">;</span> 644 <span class="i">$FingerprintsFilesInfo</span>{<span class="w">Reference</span>}{<span class="w">FirstFingerprintsStringType</span>} = <span class="i">$FirstFingerprintsStringType</span><span class="sc">;</span> 645 <span class="i">$FingerprintsFilesInfo</span>{<span class="w">Reference</span>}{<span class="w">FirstFingerprintsStringDescription</span>} = <span class="i">$FirstFingerprintsStringDescription</span><span class="sc">;</span> 646 647 <span class="s">}</span> 648 649 <span class="c"># Retrieve information about database fingerprints file...</span> 650 <span class="c">#</span> <a name="RetrieveDatabaseFingerprintsFileInfo-"></a> 651 <span class="k">sub </span><span class="m">RetrieveDatabaseFingerprintsFileInfo</span> <span class="s">{</span> 652 <span class="k">my</span><span class="s">(</span><span class="i">$FingerprintsFile</span><span class="cm">,</span> <span class="i">$FileType</span><span class="cm">,</span> <span class="i">$InDelim</span><span class="cm">,</span> <span class="i">$FingerprintsFileIO</span><span class="cm">,</span> <span class="i">$FingerprintsStringMode</span><span class="cm">,</span> <span class="i">$FingerprintsBitVectorStringMode</span><span class="cm">,</span> <span class="i">$FingerprintsVectorStringMode</span><span class="cm">,</span> <span class="i">$FirstFingerprintsStringType</span><span class="cm">,</span> <span class="i">$FirstFingerprintsStringDescription</span><span class="s">)</span><span class="sc">;</span> 653 654 <span class="i">$FingerprintsFile</span> = <span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">FileName</span>}<span class="sc">;</span> 655 <span class="s">(</span><span class="i">$FileType</span><span class="cm">,</span> <span class="i">$InDelim</span><span class="s">)</span> = <span class="i">RetrieveFingerprintsFileInfo</span><span class="s">(</span><span class="i">$FingerprintsFile</span><span class="s">)</span><span class="sc">;</span> 656 657 <span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">FileType</span>} = <span class="i">$FileType</span><span class="sc">;</span> 658 <span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">InDelim</span>} = <span class="i">$InDelim</span><span class="sc">;</span> 659 660 <span class="c"># Setup reference FingerprintsFileIO parameters...</span> 661 <span class="i">%</span>{<span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">FingerprintsFileIOParameters</span>}} = <span class="i">RetrieveFingerprintsFileIOParameters</span><span class="s">(</span><span class="q">'Database'</span><span class="cm">,</span> <span class="i">$FileType</span><span class="cm">,</span> <span class="i">$FingerprintsFile</span><span class="s">)</span><span class="sc">;</span> 662 663 <span class="c"># Make sure database fingerprints data file contains valid and retrieve fingerprints string mode information...</span> 664 <span class="s">(</span><span class="i">$FingerprintsStringMode</span><span class="cm">,</span> <span class="i">$FingerprintsBitVectorStringMode</span><span class="cm">,</span> <span class="i">$FingerprintsVectorStringMode</span><span class="cm">,</span> <span class="i">$FirstFingerprintsStringType</span><span class="cm">,</span> <span class="i">$FirstFingerprintsStringDescription</span><span class="s">)</span> = <span class="i">RetrieveFingerprintsFileFingerprintsStringInfo</span><span class="s">(</span><span class="q">'Database'</span><span class="cm">,</span> <span class="i">$FingerprintsFile</span><span class="s">)</span><span class="sc">;</span> 665 <span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">FingerprintsStringMode</span>} = <span class="i">$FingerprintsStringMode</span><span class="sc">;</span> 666 <span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">FingerprintsBitVectorStringMode</span>} = <span class="i">$FingerprintsBitVectorStringMode</span><span class="sc">;</span> 667 <span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">FingerprintsVectorStringMode</span>} = <span class="i">$FingerprintsVectorStringMode</span><span class="sc">;</span> 668 <span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">FirstFingerprintsStringType</span>} = <span class="i">$FirstFingerprintsStringType</span><span class="sc">;</span> 669 <span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">FirstFingerprintsStringDescription</span>} = <span class="i">$FirstFingerprintsStringDescription</span><span class="sc">;</span> 670 671 <span class="c"># Retrieve database fingerprints data field information for output file...</span> 672 <span class="c">#</span> 673 <span class="i">RetrieveDatabaseFingerprintsDataFieldsInfo</span><span class="s">(</span><span class="i">$FingerprintsFile</span><span class="cm">,</span> <span class="i">$FileType</span><span class="cm">,</span> <span class="i">$InDelim</span><span class="s">)</span><span class="sc">;</span> 674 675 <span class="c"># Retrieve database fingerprints text file data columns information for output file...</span> 676 <span class="c">#</span> 677 <span class="i">RetrieveDatabaseFingerprintsDataColsInfo</span><span class="s">(</span><span class="i">$FingerprintsFile</span><span class="cm">,</span> <span class="i">$FileType</span><span class="cm">,</span> <span class="i">$InDelim</span><span class="s">)</span><span class="sc">;</span> 678 679 <span class="c"># Any need to collect database compound string or data line for generation of results files...</span> 680 <span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">CollectCmpdStringData</span>} = <span class="s">(</span><span class="i">$FileType</span> =~ <span class="q">/^SD$/i</span><span class="s">)</span> ? <span class="n">1</span> <span class="co">:</span> <span class="n">0</span><span class="sc">;</span> 681 <span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">CollectDataLine</span>} = <span class="s">(</span><span class="i">$FileType</span> =~ <span class="q">/^Text$/i</span> && <span class="i">$OptionsInfo</span>{<span class="w">DatabaseDataColsMode</span>} =~ <span class="q">/^(All|Specify)$/i</span><span class="s">)</span> ? <span class="n">1</span> <span class="co">:</span> <span class="n">0</span><span class="sc">;</span> 682 <span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">CollectInputFileData</span>} = <span class="s">(</span><span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">CollectCmpdStringData</span>} || <span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">CollectDataLine</span>}<span class="s">)</span> ? <span class="n">1</span> <span class="co">:</span> <span class="n">0</span><span class="sc">;</span> 683 684 <span class="c"># Set maximum number of similar compounds to find for individual reference of set of multiple</span> 685 <span class="c"># reference compounds...</span> 686 <span class="c">#</span> 687 <span class="i">SetMaximumSimilarMoleculesToRetrieve</span><span class="s">(</span><span class="i">$FingerprintsFile</span><span class="cm">,</span> <span class="i">$FileType</span><span class="cm">,</span> <span class="i">$InDelim</span><span class="s">)</span><span class="sc">;</span> 688 <span class="s">}</span> 689 690 <span class="c"># Retrieve database fingerprints data field information...</span> 691 <span class="c">#</span> <a name="RetrieveDatabaseFingerprintsDataFieldsInfo-"></a> 692 <span class="k">sub </span><span class="m">RetrieveDatabaseFingerprintsDataFieldsInfo</span> <span class="s">{</span> 693 <span class="k">my</span><span class="s">(</span><span class="i">$FingerprintsFile</span><span class="cm">,</span> <span class="i">$FileType</span><span class="cm">,</span> <span class="i">$InDelim</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 694 <span class="k">my</span><span class="s">(</span><span class="i">$CollectDataFields</span><span class="cm">,</span> <span class="i">$CmpdCount</span><span class="cm">,</span> <span class="i">$AllDataFieldsRef</span><span class="cm">,</span> <span class="i">$CommonDataFieldsRef</span><span class="cm">,</span> <span class="i">@DataFieldsToOutput</span><span class="s">)</span><span class="sc">;</span> 695 696 <span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">OutputDataFields</span>} = <span class="n">0</span><span class="sc">;</span> 697 <span class="i">@</span>{<span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">DataFieldsToOutput</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 698 699 <span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">OutputCurrentDataFields</span>} = <span class="n">0</span><span class="sc">;</span> 700 701 <span class="i">@</span>{<span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">AllDataFields</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 702 <span class="i">@</span>{<span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">CommonDataFields</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 703 <span class="i">@</span>{<span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">SpecifiedDatabaseDataFields</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 704 705 <span class="k">if</span> <span class="s">(</span><span class="i">$FileType</span> !~ <span class="q">/^SD$/i</span><span class="s">)</span> <span class="s">{</span> 706 <span class="k">return</span><span class="sc">;</span> 707 <span class="s">}</span> 708 709 <span class="c"># No need to go over SD file and collect data fields for SD file during All DatabaseDataFieldsMode as</span> 710 <span class="c"># they would be retrieved from database SD file compound string during generation of output files...</span> 711 <span class="c">#</span> 712 <span class="i">$CollectDataFields</span> = <span class="s">(</span><span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">TextOutput</span>} && <span class="i">$OptionsInfo</span>{<span class="w">DatabaseDataFieldsMode</span>} =~ <span class="q">/^(All|Common)$/i</span><span class="s">)</span> || <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">SDOutput</span>} && <span class="i">$OptionsInfo</span>{<span class="w">DatabaseDataFieldsMode</span>} =~ <span class="q">/^Common$/i</span><span class="s">)</span><span class="s">)</span> ? <span class="n">1</span> <span class="co">:</span> <span class="n">0</span><span class="sc">;</span> 713 714 <span class="s">(</span><span class="i">$CmpdCount</span><span class="cm">,</span> <span class="i">$AllDataFieldsRef</span><span class="cm">,</span> <span class="i">$CommonDataFieldsRef</span><span class="s">)</span> = <span class="s">(</span><span class="k">undef</span><span class="s">)</span> x <span class="n">2</span><span class="sc">;</span> 715 716 <span class="k">if</span> <span class="s">(</span><span class="i">$CollectDataFields</span><span class="s">)</span> <span class="s">{</span> 717 <span class="k">open</span> <span class="w">SDFILE</span><span class="cm">,</span> <span class="q">"$FingerprintsFile"</span> <span class="k">or</span> <span class="k">die</span> <span class="q">"Error: Couldn't open $FingerprintsFile: $! \n"</span><span class="sc">;</span> 718 <span class="s">(</span><span class="i">$CmpdCount</span><span class="cm">,</span> <span class="i">$AllDataFieldsRef</span><span class="cm">,</span> <span class="i">$CommonDataFieldsRef</span><span class="s">)</span> = <span class="i">GetAllAndCommonCmpdDataHeaderLabels</span><span class="s">(</span>\<span class="i">*SDFILE</span><span class="s">)</span><span class="sc">;</span> 719 <span class="k">close</span> <span class="w">SDFILE</span><span class="sc">;</span> 720 <span class="s">}</span> 721 722 <span class="i">@DataFieldsToOutput</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 723 <span class="k">if</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">DatabaseDataFieldsMode</span>} =~ <span class="q">/^All$/i</span><span class="s">)</span> <span class="s">{</span> 724 <span class="k">if</span> <span class="s">(</span><span class="k">defined</span> <span class="i">$AllDataFieldsRef</span><span class="s">)</span> <span class="s">{</span> 725 <span class="k">push</span> <span class="i">@DataFieldsToOutput</span><span class="cm">,</span> <span class="i">@</span>{<span class="i">$AllDataFieldsRef</span>}<span class="sc">;</span> 726 <span class="k">push</span> <span class="i">@</span>{<span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">AllDataFields</span>}}<span class="cm">,</span> <span class="i">@</span>{<span class="i">$AllDataFieldsRef</span>}<span class="sc">;</span> 727 <span class="s">}</span> 728 <span class="k">else</span> <span class="s">{</span> 729 <span class="c"># Retrieve and output data fields and values dynamically...</span> 730 <span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">OutputCurrentDataFields</span>} = <span class="n">1</span><span class="sc">;</span> 731 <span class="s">}</span> 732 <span class="s">}</span> 733 <span class="k">elsif</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">DatabaseDataFieldsMode</span>} =~ <span class="q">/^Common$/i</span><span class="s">)</span> <span class="s">{</span> 734 <span class="k">if</span> <span class="s">(</span><span class="k">defined</span> <span class="i">$CommonDataFieldsRef</span><span class="s">)</span> <span class="s">{</span> 735 <span class="k">push</span> <span class="i">@DataFieldsToOutput</span><span class="cm">,</span> <span class="i">@</span>{<span class="i">$CommonDataFieldsRef</span>}<span class="sc">;</span> 736 <span class="k">push</span> <span class="i">@</span>{<span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">CommonDataFields</span>}}<span class="cm">,</span> <span class="i">@</span>{<span class="i">$CommonDataFieldsRef</span>}<span class="sc">;</span> 737 <span class="s">}</span> 738 <span class="s">}</span> 739 <span class="k">elsif</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">DatabaseDataFieldsMode</span>} =~ <span class="q">/^Specify$/i</span><span class="s">)</span> <span class="s">{</span> 740 <span class="k">push</span> <span class="i">@DataFieldsToOutput</span><span class="cm">,</span> <span class="i">@</span>{<span class="i">$OptionsInfo</span>{<span class="w">SpecifiedDatabaseDataFields</span>}}<span class="sc">;</span> 741 <span class="k">push</span> <span class="i">@</span>{<span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">SpecifiedDatabaseDataFields</span>}}<span class="cm">,</span> <span class="i">@</span>{<span class="i">$OptionsInfo</span>{<span class="w">SpecifiedDatabaseDataFields</span>}}<span class="sc">;</span> 742 <span class="s">}</span> 743 744 <span class="k">if</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">DatabaseDataFieldsMode</span>} !~ <span class="q">/^CompoundID$/i</span><span class="s">)</span> <span class="s">{</span> 745 <span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">OutputDataFields</span>} = <span class="n">1</span><span class="sc">;</span> 746 <span class="s">}</span> 747 748 <span class="k">push</span> <span class="i">@</span>{<span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">DataFieldsToOutput</span>}}<span class="cm">,</span> <span class="i">@DataFieldsToOutput</span><span class="sc">;</span> 749 750 <span class="s">}</span> 751 752 <span class="c"># Retrieve database fingerprints data columns information...</span> 753 <span class="c">#</span> <a name="RetrieveDatabaseFingerprintsDataColsInfo-"></a> 754 <span class="k">sub </span><span class="m">RetrieveDatabaseFingerprintsDataColsInfo</span> <span class="s">{</span> 755 <span class="k">my</span><span class="s">(</span><span class="i">$FingerprintsFile</span><span class="cm">,</span> <span class="i">$FileType</span><span class="cm">,</span> <span class="i">$InDelim</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 756 <span class="k">my</span><span class="s">(</span><span class="i">$Line</span><span class="cm">,</span> <span class="i">$ColNum</span><span class="cm">,</span> <span class="i">$ColLabel</span><span class="cm">,</span> <span class="i">$NumOfCols</span><span class="cm">,</span> <span class="i">@DataColLabels</span><span class="cm">,</span> <span class="i">@DataColLabelsToOutput</span><span class="cm">,</span> <span class="i">@DataColNumsToOutput</span><span class="cm">,</span> <span class="i">%DataColLabelToNumMap</span><span class="cm">,</span> <span class="i">%DataColNumToLabelMap</span><span class="s">)</span><span class="sc">;</span> 757 758 <span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">OutputDataCols</span>} = <span class="n">0</span><span class="sc">;</span> 759 760 <span class="i">@</span>{<span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">DataColLabels</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 761 <span class="i">%</span>{<span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">DataColLabelToNumMap</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 762 <span class="i">%</span>{<span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">DataColNumToLabelMap</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 763 764 <span class="i">@</span>{<span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">DataColNumsToOutput</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 765 <span class="i">@</span>{<span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">DataColLabelsToOutput</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 766 767 <span class="k">if</span> <span class="s">(</span><span class="i">$FileType</span> !~ <span class="q">/^Text$/i</span><span class="s">)</span> <span class="s">{</span> 768 <span class="k">return</span><span class="sc">;</span> 769 <span class="s">}</span> 770 771 <span class="i">@DataColLabels</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 772 <span class="i">@DataColLabelsToOutput</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 773 <span class="i">@DataColNumsToOutput</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 774 775 <span class="i">%DataColLabelToNumMap</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 776 <span class="i">%DataColNumToLabelMap</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 777 778 <span class="c"># Get column label line...</span> 779 <span class="k">open</span> <span class="w">TEXTFILE</span><span class="cm">,</span> <span class="q">"$FingerprintsFile"</span> <span class="k">or</span> <span class="k">die</span> <span class="q">"Error: Couldn't open $FingerprintsFile: $! \n"</span><span class="sc">;</span> 780 <span class="i">$Line</span> = <span class="i">TextUtil::GetTextLine</span><span class="s">(</span>\<span class="i">*TEXTFILE</span><span class="s">)</span><span class="sc">;</span> 781 <span class="k">close</span> <span class="w">TEXTFILE</span><span class="sc">;</span> 782 783 <span class="i">$InDelim</span> = <span class="s">(</span><span class="i">$InDelim</span> =~ <span class="q">/^Tab$/i</span><span class="s">)</span> ? <span class="q">"\t"</span> <span class="co">:</span> <span class="s">(</span><span class="i">$InDelim</span> =~ <span class="q">/semicolon/i</span> ? <span class="q">"\;"</span> <span class="co">:</span> <span class="q">"\,"</span><span class="s">)</span><span class="sc">;</span> 784 785 <span class="i">@DataColLabels</span> = <span class="i">TextUtil::SplitWords</span><span class="s">(</span><span class="i">$Line</span><span class="cm">,</span> <span class="i">$InDelim</span><span class="s">)</span><span class="sc">;</span> 786 <span class="i">$NumOfCols</span> = <span class="k">scalar</span> <span class="i">@DataColLabels</span><span class="sc">;</span> 787 788 <span class="k">for</span> <span class="i">$ColNum</span> <span class="s">(</span><span class="n">0</span> .. <span class="i">$#DataColLabels</span><span class="s">)</span> <span class="s">{</span> 789 <span class="i">$ColLabel</span> = <span class="i">$DataColLabels</span>[<span class="i">$ColNum</span>]<span class="sc">;</span> 790 <span class="i">$DataColLabelToNumMap</span>{<span class="i">$ColLabel</span>} = <span class="i">$ColNum</span><span class="sc">;</span> 791 <span class="i">$DataColNumToLabelMap</span>{<span class="i">$ColNum</span>} = <span class="i">$ColLabel</span><span class="sc">;</span> 792 <span class="s">}</span> 793 794 <span class="k">if</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">DatabaseDataColsMode</span>} =~ <span class="q">/^Specify$/i</span><span class="s">)</span> <span class="s">{</span> 795 <span class="k">if</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">DatabaseColMode</span>} =~ <span class="q">/^ColNum$/i</span><span class="s">)</span> <span class="s">{</span> 796 <span class="k">for</span> <span class="i">$ColNum</span> <span class="s">(</span><span class="i">@</span>{<span class="i">$OptionsInfo</span>{<span class="w">SpecifiedDatabaseDataCols</span>}}<span class="s">)</span> <span class="s">{</span> 797 <span class="k">if</span> <span class="s">(</span><span class="i">$ColNum</span> > <span class="i">$NumOfCols</span><span class="s">)</span> <span class="s">{</span> 798 <span class="k">die</span> <span class="q">"Error: Column number, $ColNum, specified using \"--DatabaseDataCols\" is not valid: It must be <= $NumOfCols\n"</span><span class="sc">;</span> 799 <span class="s">}</span> 800 <span class="k">push</span> <span class="i">@DataColNumsToOutput</span><span class="cm">,</span> <span class="s">(</span><span class="i">$ColNum</span> - <span class="n">1</span><span class="s">)</span><span class="sc">;</span> 801 <span class="s">}</span> 802 <span class="s">}</span> 803 <span class="k">elsif</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">DatabaseColMode</span>} =~ <span class="q">/^ColLabel$/i</span><span class="s">)</span> <span class="s">{</span> 804 <span class="k">for</span> <span class="i">$ColLabel</span> <span class="s">(</span><span class="i">@</span>{<span class="i">$OptionsInfo</span>{<span class="w">SpecifiedDatabaseDataCols</span>}}<span class="s">)</span> <span class="s">{</span> 805 <span class="k">if</span> <span class="s">(</span>!<span class="k">exists</span> <span class="i">$DataColLabelToNumMap</span>{<span class="i">$ColLabel</span>}<span class="s">)</span> <span class="s">{</span> 806 <span class="k">die</span> <span class="q">"Error: Column label, $ColLabel, specified using \"--DatabaseDataCols\" is not valid: It doesn't exist\n"</span><span class="sc">;</span> 807 <span class="s">}</span> 808 <span class="k">push</span> <span class="i">@DataColNumsToOutput</span><span class="cm">,</span> <span class="i">$DataColLabelToNumMap</span>{<span class="i">$ColLabel</span>}<span class="sc">;</span> 809 <span class="s">}</span> 810 <span class="s">}</span> 811 <span class="s">}</span> 812 <span class="k">elsif</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">DatabaseDataColsMode</span>} =~ <span class="q">/^All$/i</span><span class="s">)</span> <span class="s">{</span> 813 <span class="i">@DataColNumsToOutput</span> = <span class="k">map</span> <span class="s">{</span> <span class="i">$_</span> <span class="s">}</span> <span class="s">(</span><span class="n">0</span> .. <span class="i">$#DataColLabels</span><span class="s">)</span><span class="sc">;</span> 814 <span class="s">}</span> 815 816 <span class="c"># Setup data column labels to output...</span> 817 <span class="k">if</span> <span class="s">(</span><span class="k">scalar</span> <span class="i">@DataColNumsToOutput</span><span class="s">)</span> <span class="s">{</span> 818 <span class="i">@DataColLabelsToOutput</span> = <span class="k">map</span> <span class="s">{</span> <span class="i">$DataColNumToLabelMap</span>{<span class="i">$_</span>} <span class="s">}</span> <span class="s">(</span><span class="n">0</span> .. <span class="i">$#DataColNumsToOutput</span><span class="s">)</span><span class="sc">;</span> 819 <span class="s">}</span> 820 821 <span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">OutputDataCols</span>} = <span class="k">scalar</span> <span class="i">@DataColNumsToOutput</span> ? <span class="n">1</span> <span class="co">:</span> <span class="n">0</span><span class="sc">;</span> 822 823 <span class="i">@</span>{<span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">DataColLabels</span>}} = <span class="i">@DataColLabels</span><span class="sc">;</span> 824 <span class="i">%</span>{<span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">DataColLabelToNumMap</span>}} = <span class="i">%DataColLabelToNumMap</span><span class="sc">;</span> 825 <span class="i">%</span>{<span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">DataColNumToLabelMap</span>}} = <span class="i">%DataColNumToLabelMap</span><span class="sc">;</span> 826 827 <span class="i">@</span>{<span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">DataColNumsToOutput</span>}} = <span class="i">@DataColNumsToOutput</span><span class="sc">;</span> 828 <span class="i">@</span>{<span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">DataColLabelsToOutput</span>}} = <span class="i">@DataColLabelsToOutput</span><span class="sc">;</span> 829 <span class="s">}</span> 830 831 <span class="c"># Set maximum number of similar compounds to find for individual reference of set of multiple</span> 832 <span class="c"># reference compounds...</span> 833 <span class="c">#</span> <a name="SetMaximumSimilarMoleculesToRetrieve-"></a> 834 <span class="k">sub </span><span class="m">SetMaximumSimilarMoleculesToRetrieve</span> <span class="s">{</span> 835 <span class="k">my</span><span class="s">(</span><span class="i">$FingerprintsFile</span><span class="cm">,</span> <span class="i">$FileType</span><span class="cm">,</span> <span class="i">$InDelim</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 836 <span class="k">my</span><span class="s">(</span><span class="i">$MaxSimilarMolecules</span><span class="cm">,</span> <span class="i">$NumOfDatabaseMolecules</span><span class="cm">,</span> <span class="i">$PercentSimilarMolecules</span><span class="cm">,</span> <span class="i">$Line</span><span class="s">)</span><span class="sc">;</span> 837 838 <span class="k">if</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">SimilarCountMode</span>} !~ <span class="q">/^PercentSimilar$/i</span><span class="s">)</span> <span class="s">{</span> 839 <span class="k">return</span><span class="sc">;</span> 840 <span class="s">}</span> 841 842 <span class="i">$PercentSimilarMolecules</span> = <span class="i">$OptionsInfo</span>{<span class="w">PercentSimilarMolecules</span>}<span class="sc">;</span> 843 844 <span class="c"># Count database entries to figure out MaxSimilarMolecules using PercentSimilarMolecules</span> 845 <span class="c"># value...</span> 846 <span class="i">$NumOfDatabaseMolecules</span> = <span class="n">0</span><span class="sc">;</span> 847 <span class="k">if</span> <span class="s">(</span><span class="i">$FileType</span> =~ <span class="q">/^SD$/i</span> && <span class="k">exists</span><span class="s">(</span><span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">NumOfDatabaseMolecules</span>}<span class="s">)</span><span class="s">)</span> <span class="s">{</span> 848 <span class="c"># It might already be counted for SD file...</span> 849 <span class="i">$NumOfDatabaseMolecules</span> = <span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">NumOfDatabaseMolecules</span>}<span class="sc">;</span> 850 <span class="s">}</span> 851 <span class="k">else</span> <span class="s">{</span> 852 <span class="k">print</span> <span class="q">"Calculating maximum number of similar molecules to retrieve for \"PercentSimilar\" value of \"--SimilarCountMode\" option by counting number of molecules in database fingerprints file...\n"</span><span class="sc">;</span> 853 <span class="k">open</span> <span class="w">FINGERPRINTSFILE</span><span class="cm">,</span> <span class="q">"$FingerprintsFile"</span> <span class="k">or</span> <span class="k">die</span> <span class="q">"Error: Couldn't open $FingerprintsFile: $! \n"</span><span class="sc">;</span> 854 <span class="j">FILETYPE:</span> <span class="s">{</span> 855 <span class="k">if</span> <span class="s">(</span><span class="i">$FileType</span> =~ <span class="q">/^SD$/i</span><span class="s">)</span> <span class="s">{</span> 856 <span class="k">while</span> <span class="s">(</span><span class="i">$Line</span> = <span class="i">TextUtil::GetTextLine</span><span class="s">(</span>\<span class="i">*FINGERPRINTSFILE</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> 857 <span class="k">if</span> <span class="s">(</span><span class="i">$Line</span> =~ <span class="q">/^\$\$\$\$/</span><span class="s">)</span> <span class="s">{</span> 858 <span class="i">$NumOfDatabaseMolecules</span>++<span class="sc">;</span> 859 <span class="s">}</span> 860 <span class="s">}</span> 861 <span class="k">last</span> <span class="j">FILETYPE</span><span class="sc">;</span> 862 <span class="s">}</span> 863 <span class="k">if</span> <span class="s">(</span><span class="i">$FileType</span> =~ <span class="q">/^Text$/i</span><span class="s">)</span> <span class="s">{</span> 864 <span class="c"># Ignore column label line...</span> 865 <span class="i">$Line</span> = <span class="i">TextUtil::GetTextLine</span><span class="s">(</span>\<span class="i">*FINGERPRINTSFILE</span><span class="s">)</span><span class="sc">;</span> 866 <span class="k">while</span> <span class="s">(</span><span class="i">$Line</span> = <span class="i">TextUtil::GetTextLine</span><span class="s">(</span>\<span class="i">*FINGERPRINTSFILE</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> 867 <span class="i">$NumOfDatabaseMolecules</span>++<span class="sc">;</span> 868 <span class="s">}</span> 869 <span class="k">last</span> <span class="j">FILETYPE</span><span class="sc">;</span> 870 <span class="s">}</span> 871 <span class="k">if</span> <span class="s">(</span><span class="i">$FileType</span> =~ <span class="q">/^FP$/i</span><span class="s">)</span> <span class="s">{</span> 872 <span class="k">while</span> <span class="s">(</span><span class="i">$Line</span> = <span class="i">TextUtil::GetTextLine</span><span class="s">(</span>\<span class="i">*FINGERPRINTSFILE</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> 873 <span class="k">if</span> <span class="s">(</span><span class="i">$Line</span> !~ <span class="q">/^#/</span><span class="s">)</span> <span class="s">{</span> 874 <span class="i">$NumOfDatabaseMolecules</span>++<span class="sc">;</span> 875 <span class="s">}</span> 876 <span class="s">}</span> 877 <span class="k">last</span> <span class="j">FILETYPE</span><span class="sc">;</span> 878 <span class="s">}</span> 879 <span class="i">$NumOfDatabaseMolecules</span> = <span class="n">0</span><span class="sc">;</span> 880 <span class="s">}</span> 881 <span class="k">close</span> <span class="w">FINGERPRINTSFILE</span><span class="sc">;</span> 882 <span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">NumOfDatabaseMolecules</span>} = <span class="i">$NumOfDatabaseMolecules</span><span class="sc">;</span> 883 <span class="s">}</span> 884 885 <span class="i">$MaxSimilarMolecules</span> = <span class="k">int</span> <span class="s">(</span><span class="s">(</span><span class="i">$NumOfDatabaseMolecules</span> * <span class="i">$PercentSimilarMolecules</span><span class="s">)</span>/<span class="n">100</span><span class="s">)</span><span class="sc">;</span> 886 <span class="k">if</span> <span class="s">(</span><span class="i">$MaxSimilarMolecules</span> < <span class="n">1</span><span class="s">)</span> <span class="s">{</span> 887 <span class="i">$MaxSimilarMolecules</span> = <span class="n">1</span><span class="sc">;</span> 888 <span class="s">}</span> 889 890 <span class="i">$OptionsInfo</span>{<span class="w">MaxSimilarMolecules</span>} = <span class="i">$MaxSimilarMolecules</span><span class="sc">;</span> 891 <span class="s">}</span> 892 893 <span class="c"># Retrieve information about fingerprints file...</span> 894 <span class="c">#</span> <a name="RetrieveFingerprintsFileInfo-"></a> 895 <span class="k">sub </span><span class="m">RetrieveFingerprintsFileInfo</span> <span class="s">{</span> 896 <span class="k">my</span><span class="s">(</span><span class="i">$FingerprintsFile</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 897 <span class="k">my</span><span class="s">(</span><span class="i">$FileType</span><span class="cm">,</span> <span class="i">$InDelim</span><span class="cm">,</span> <span class="i">$FileDir</span><span class="cm">,</span> <span class="i">$FileExt</span><span class="cm">,</span> <span class="i">$FileName</span><span class="s">)</span><span class="sc">;</span> 898 899 <span class="k">if</span> <span class="s">(</span>!<span class="s">(</span><span class="k">-e</span> <span class="i">$FingerprintsFile</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> 900 <span class="k">die</span> <span class="q">"Error: Input fingerprints file, $FingerprintsFile, doesn't exist.\n"</span><span class="sc">;</span> 901 <span class="s">}</span> 902 903 <span class="i">$FileType</span> = <span class="i">Fingerprints::FingerprintsFileUtil::GetFingerprintsFileType</span><span class="s">(</span><span class="i">$FingerprintsFile</span><span class="s">)</span><span class="sc">;</span> 904 <span class="k">if</span> <span class="s">(</span><span class="i">IsEmpty</span><span class="s">(</span><span class="i">$FileType</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> 905 <span class="k">die</span> <span class="q">"Error: Input file, $FingerprintsFile, is not a fingerprints file.\n"</span><span class="sc">;</span> 906 <span class="s">}</span> 907 908 <span class="i">$InDelim</span> = <span class="q">''</span><span class="sc">;</span> 909 <span class="k">if</span> <span class="s">(</span><span class="i">$FileType</span> =~ <span class="q">/^Text$/i</span><span class="s">)</span> <span class="s">{</span> 910 <span class="i">$FileDir</span> = <span class="q">""</span><span class="sc">;</span> <span class="i">$FileName</span> = <span class="q">""</span><span class="sc">;</span> <span class="i">$FileExt</span> = <span class="q">""</span><span class="sc">;</span> 911 <span class="s">(</span><span class="i">$FileDir</span><span class="cm">,</span> <span class="i">$FileName</span><span class="cm">,</span> <span class="i">$FileExt</span><span class="s">)</span> = <span class="i">ParseFileName</span><span class="s">(</span><span class="i">$FingerprintsFile</span><span class="s">)</span><span class="sc">;</span> 912 <span class="i">$InDelim</span> = <span class="s">(</span><span class="i">$FileExt</span> =~ <span class="q">/^tsv$/i</span><span class="s">)</span> ? <span class="q">'Tab'</span> <span class="co">:</span> <span class="i">$OptionsInfo</span>{<span class="w">InDelim</span>}<span class="sc">;</span> 913 <span class="s">}</span> 914 915 <span class="k">return</span> <span class="s">(</span><span class="i">$FileType</span><span class="cm">,</span> <span class="i">$InDelim</span><span class="s">)</span><span class="sc">;</span> 916 <span class="s">}</span> 917 918 <span class="c"># Retrieve fingerprints file IO parameters...</span> 919 <span class="c">#</span> <a name="RetrieveFingerprintsFileIOParameters-"></a> 920 <span class="k">sub </span><span class="m">RetrieveFingerprintsFileIOParameters</span> <span class="s">{</span> 921 <span class="k">my</span><span class="s">(</span><span class="i">$FingerprintsFileMode</span><span class="cm">,</span> <span class="i">$FileType</span><span class="cm">,</span> <span class="i">$FingerprintsFile</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 922 <span class="k">my</span><span class="s">(</span><span class="i">%FingerprintsFileIOParams</span><span class="s">)</span><span class="sc">;</span> 923 924 <span class="k">if</span> <span class="s">(</span><span class="i">$FingerprintsFileMode</span> !~ <span class="q">/^(Reference|Database)$/</span><span class="s">)</span> <span class="s">{</span> 925 <span class="k">die</span> <span class="q">"Error: Unknown fingerprints file mode: $FingerprintsFileMode. Supported values: Reference or Database\n"</span><span class="sc">;</span> 926 <span class="s">}</span> 927 928 <span class="i">%FingerprintsFileIOParams</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 929 930 <span class="j">FILETYPE:</span> <span class="s">{</span> 931 <span class="k">if</span> <span class="s">(</span><span class="i">$FileType</span> =~ <span class="q">/^SD$/i</span><span class="s">)</span> <span class="s">{</span> 932 <span class="i">%FingerprintsFileIOParams</span> = <span class="s">(</span><span class="q">'Name'</span> <span class="cm">=></span> <span class="i">$FingerprintsFile</span><span class="cm">,</span> <span class="q">'Mode'</span> <span class="cm">=></span> <span class="q">'Read'</span><span class="cm">,</span> <span class="q">'FingerprintsStringMode'</span> <span class="cm">=></span> <span class="i">$OptionsInfo</span>{<span class="w">FingerprintsMode</span>}<span class="cm">,</span> <span class="q">'ValidateData'</span> <span class="cm">=></span> <span class="i">$OptionsInfo</span>{<span class="w">ValidateData</span>}<span class="cm">,</span> <span class="q">'DetailLevel'</span> <span class="cm">=></span> <span class="i">$OptionsInfo</span>{<span class="w">Detail</span>}<span class="cm">,</span> <span class="q">'FingerprintsFieldLabel'</span> <span class="cm">=></span> <span class="i">$OptionsInfo</span>{<span class="q">"${FingerprintsFileMode}FingerprintsField"</span>}<span class="cm">,</span> <span class="q">'CompoundIDMode'</span> <span class="cm">=></span> <span class="i">$OptionsInfo</span>{<span class="q">"${FingerprintsFileMode}CompoundIDMode"</span>}<span class="cm">,</span> <span class="q">'CompoundIDFieldLabel'</span> <span class="cm">=></span> <span class="i">$OptionsInfo</span>{<span class="q">"${FingerprintsFileMode}CompoundIDField"</span>}<span class="cm">,</span> <span class="q">'CompoundIDPrefix'</span> <span class="cm">=></span> <span class="i">$OptionsInfo</span>{<span class="q">"${FingerprintsFileMode}CompoundIDPrefix"</span>}<span class="s">)</span><span class="sc">;</span> 933 <span class="k">last</span> <span class="j">FILETYPE</span><span class="sc">;</span> 934 <span class="s">}</span> 935 <span class="k">if</span> <span class="s">(</span><span class="i">$FileType</span> =~ <span class="q">/^FP$/i</span><span class="s">)</span> <span class="s">{</span> 936 <span class="i">%FingerprintsFileIOParams</span> = <span class="s">(</span><span class="q">'Name'</span> <span class="cm">=></span> <span class="i">$FingerprintsFile</span><span class="cm">,</span> <span class="q">'Mode'</span> <span class="cm">=></span> <span class="q">'Read'</span><span class="cm">,</span> <span class="q">'FingerprintsStringMode'</span> <span class="cm">=></span> <span class="i">$OptionsInfo</span>{<span class="w">FingerprintsMode</span>}<span class="cm">,</span> <span class="q">'ValidateData'</span> <span class="cm">=></span> <span class="i">$OptionsInfo</span>{<span class="w">ValidateData</span>}<span class="cm">,</span> <span class="q">'DetailLevel'</span> <span class="cm">=></span> <span class="i">$OptionsInfo</span>{<span class="w">Detail</span>}<span class="s">)</span><span class="sc">;</span> 937 <span class="k">last</span> <span class="j">FILETYPE</span><span class="sc">;</span> 938 <span class="s">}</span> 939 <span class="k">if</span> <span class="s">(</span><span class="i">$FileType</span> =~ <span class="q">/^Text$/i</span><span class="s">)</span> <span class="s">{</span> 940 <span class="i">%FingerprintsFileIOParams</span> = <span class="s">(</span><span class="q">'Name'</span> <span class="cm">=></span> <span class="i">$FingerprintsFile</span><span class="cm">,</span> <span class="q">'Mode'</span> <span class="cm">=></span> <span class="q">'Read'</span><span class="cm">,</span> <span class="q">'FingerprintsStringMode'</span> <span class="cm">=></span> <span class="i">$OptionsInfo</span>{<span class="w">FingerprintsMode</span>}<span class="cm">,</span> <span class="q">'ValidateData'</span> <span class="cm">=></span> <span class="i">$OptionsInfo</span>{<span class="w">ValidateData</span>}<span class="cm">,</span> <span class="q">'DetailLevel'</span> <span class="cm">=></span> <span class="i">$OptionsInfo</span>{<span class="w">Detail</span>}<span class="cm">,</span> <span class="q">'FingerprintsCol'</span> <span class="cm">=></span> <span class="i">$OptionsInfo</span>{<span class="q">"${FingerprintsFileMode}FingerprintsCol"</span>}<span class="cm">,</span> <span class="q">'ColMode'</span> <span class="cm">=></span> <span class="i">$OptionsInfo</span>{<span class="q">"${FingerprintsFileMode}ColMode"</span>}<span class="cm">,</span> <span class="q">'CompoundIDCol'</span> <span class="cm">=></span> <span class="i">$OptionsInfo</span>{<span class="q">"${FingerprintsFileMode}CompoundIDCol"</span>}<span class="cm">,</span> <span class="q">'CompoundIDPrefix'</span> <span class="cm">=></span> <span class="i">$OptionsInfo</span>{<span class="q">"${FingerprintsFileMode}CompoundIDPrefix"</span>}<span class="cm">,</span> <span class="q">'InDelim'</span> <span class="cm">=></span> <span class="i">$FingerprintsFilesInfo</span>{<span class="i">$FingerprintsFileMode</span>}{<span class="w">InDelim</span>}<span class="s">)</span><span class="sc">;</span> 941 <span class="k">last</span> <span class="j">FILETYPE</span><span class="sc">;</span> 942 <span class="s">}</span> 943 <span class="k">die</span> <span class="q">"Error: Fingerprints file type, $FileType, is not valid. Supported file types: SD, FP or Text\n"</span><span class="sc">;</span> 944 <span class="s">}</span> 945 946 <span class="k">return</span> <span class="i">%FingerprintsFileIOParams</span><span class="sc">;</span> 947 <span class="s">}</span> 948 949 <span class="c"># Make sure fingerprints data file contains valid dta and retrieve fingerprints string mode information...</span> 950 <span class="c">#</span> <a name="RetrieveFingerprintsFileFingerprintsStringInfo-"></a> 951 <span class="k">sub </span><span class="m">RetrieveFingerprintsFileFingerprintsStringInfo</span> <span class="s">{</span> 952 <span class="k">my</span><span class="s">(</span><span class="i">$FingerprintsFileMode</span><span class="cm">,</span> <span class="i">$FingerprintsFile</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> 953 <span class="k">my</span><span class="s">(</span><span class="i">$FingerprintsFileIO</span><span class="cm">,</span> <span class="i">$FingerprintsStringMode</span><span class="cm">,</span> <span class="i">$FingerprintsBitVectorStringMode</span><span class="cm">,</span> <span class="i">$FingerprintsVectorStringMode</span><span class="cm">,</span> <span class="i">$FirstFingerprintsStringType</span><span class="cm">,</span> <span class="i">$FirstFingerprintsStringDescription</span><span class="s">)</span><span class="sc">;</span> 954 955 <span class="i">$FingerprintsFileIO</span> = <span class="i">Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO</span><span class="s">(</span><span class="i">%</span>{<span class="i">$FingerprintsFilesInfo</span>{<span class="i">$FingerprintsFileMode</span>}{<span class="w">FingerprintsFileIOParameters</span>}}<span class="s">)</span><span class="sc">;</span> 956 <span class="k">if</span> <span class="s">(</span>!<span class="i">$FingerprintsFileIO</span><span class="s">)</span> <span class="s">{</span> 957 <span class="k">die</span> <span class="q">"Error: Reference fingerprints file, $FingerprintsFile, contains invalid fingerprints data.\n"</span><span class="sc">;</span> 958 <span class="s">}</span> 959 <span class="k">if</span> <span class="s">(</span>!<span class="i">$FingerprintsFileIO</span><span class="i">->IsFingerprintsFileDataValid</span><span class="s">(</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> 960 <span class="k">die</span> <span class="q">"Error: Reference fingerprints file, $FingerprintsFile, contains invalid fingerprints data.\n"</span><span class="sc">;</span> 961 <span class="s">}</span> 962 963 <span class="i">$FingerprintsStringMode</span> = <span class="i">$FingerprintsFileIO</span><span class="i">->GetFingerprintsStringMode</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> 964 <span class="i">$FingerprintsBitVectorStringMode</span> = <span class="i">$FingerprintsFileIO</span><span class="i">->GetFingerprintsBitVectorStringMode</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> 965 <span class="i">$FingerprintsVectorStringMode</span> = <span class="i">$FingerprintsFileIO</span><span class="i">->GetFingerprintsVectorStringMode</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> 966 967 <span class="i">$FirstFingerprintsStringType</span> = <span class="i">$FingerprintsFileIO</span><span class="i">->GetFirstFingerprintsStringType</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> 968 <span class="i">$FirstFingerprintsStringDescription</span> = <span class="i">$FingerprintsFileIO</span><span class="i">->GetFirstFingerprintsStringDescription</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> 969 970 <span class="i">$FingerprintsFileIO</span><span class="i">->Close</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> 971 972 <span class="k">return</span> <span class="s">(</span><span class="i">$FingerprintsStringMode</span><span class="cm">,</span> <span class="i">$FingerprintsBitVectorStringMode</span><span class="cm">,</span> <span class="i">$FingerprintsVectorStringMode</span><span class="cm">,</span> <span class="i">$FirstFingerprintsStringType</span><span class="cm">,</span> <span class="i">$FirstFingerprintsStringDescription</span><span class="s">)</span><span class="sc">;</span> 973 <span class="s">}</span> 974 975 <span class="c"># Retrieve output files names using reference fingerprints file name...</span> 976 <span class="c">#</span> <a name="RetrieveOutputFilesInfo-"></a> 977 <span class="k">sub </span><span class="m">RetrieveOutputFilesInfo</span> <span class="s">{</span> 978 <span class="k">my</span><span class="s">(</span><span class="i">$FingerprintsFile</span><span class="cm">,</span> <span class="i">$FileDir</span><span class="cm">,</span> <span class="i">$FileExt</span><span class="cm">,</span> <span class="i">$FileName</span><span class="cm">,</span> <span class="i">$OutFileRoot</span><span class="cm">,</span> <span class="i">$SDOutFileName</span><span class="cm">,</span> <span class="i">$TextOutFileName</span><span class="cm">,</span> <span class="i">$SDOutFileExt</span><span class="cm">,</span> <span class="i">$TextOutFileExt</span><span class="cm">,</span> <span class="i">$ReferenceFileName</span><span class="cm">,</span> <span class="i">$DatabaseFileName</span><span class="s">)</span><span class="sc">;</span> 979 980 <span class="i">$OutputFilesInfo</span>{<span class="w">OutFileRoot</span>} = <span class="q">''</span><span class="sc">;</span> 981 <span class="i">$OutputFilesInfo</span>{<span class="w">SDOutFileName</span>} = <span class="q">''</span><span class="sc">;</span> 982 <span class="i">$OutputFilesInfo</span>{<span class="w">TextOutFileName</span>} = <span class="q">''</span><span class="sc">;</span> 983 984 <span class="i">$FingerprintsFile</span> = <span class="i">$FingerprintsFilesInfo</span>{<span class="w">Reference</span>}{<span class="w">FileName</span>}<span class="sc">;</span> 985 986 <span class="i">$FileDir</span> = <span class="q">""</span><span class="sc">;</span> <span class="i">$FileName</span> = <span class="q">""</span><span class="sc">;</span> <span class="i">$FileExt</span> = <span class="q">""</span><span class="sc">;</span> 987 <span class="s">(</span><span class="i">$FileDir</span><span class="cm">,</span> <span class="i">$FileName</span><span class="cm">,</span> <span class="i">$FileExt</span><span class="s">)</span> = <span class="i">ParseFileName</span><span class="s">(</span><span class="i">$FingerprintsFile</span><span class="s">)</span><span class="sc">;</span> 988 989 <span class="i">$SDOutFileExt</span> = <span class="q">"sdf"</span><span class="sc">;</span> 990 <span class="i">$TextOutFileExt</span> = <span class="s">(</span><span class="i">$Options</span>{<span class="w">outdelim</span>} =~ <span class="q">/^tab$/i</span><span class="s">)</span> ? <span class="q">"tsv"</span> <span class="co">:</span> <span class="q">"csv"</span><span class="sc">;</span> 991 992 <span class="k">if</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">OutFileRoot</span>}<span class="s">)</span> <span class="s">{</span> 993 <span class="k">my</span> <span class="s">(</span><span class="i">$RootFileDir</span><span class="cm">,</span> <span class="i">$RootFileName</span><span class="cm">,</span> <span class="i">$RootFileExt</span><span class="s">)</span> = <span class="i">ParseFileName</span><span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">OutFileRoot</span>}<span class="s">)</span><span class="sc">;</span> 994 <span class="k">if</span> <span class="s">(</span><span class="i">$RootFileName</span> && <span class="i">$RootFileExt</span><span class="s">)</span> <span class="s">{</span> 995 <span class="i">$FileName</span> = <span class="i">$RootFileName</span><span class="sc">;</span> 996 <span class="s">}</span> 997 <span class="k">else</span> <span class="s">{</span> 998 <span class="i">$FileName</span> = <span class="i">$OptionsInfo</span>{<span class="w">OutFileRoot</span>}<span class="sc">;</span> 999 <span class="s">}</span> 1000 <span class="i">$OutFileRoot</span> = <span class="i">$FileName</span><span class="sc">;</span> 1001 <span class="s">}</span> 1002 <span class="k">else</span> <span class="s">{</span> 1003 <span class="i">$OutFileRoot</span> = <span class="q">"${FileName}SimilaritySearching"</span><span class="sc">;</span> 1004 <span class="s">}</span> 1005 1006 <span class="i">$SDOutFileName</span> = <span class="q">"${OutFileRoot}.${SDOutFileExt}"</span><span class="sc">;</span> 1007 <span class="i">$TextOutFileName</span> = <span class="q">"${OutFileRoot}.${TextOutFileExt}"</span><span class="sc">;</span> 1008 1009 <span class="i">$ReferenceFileName</span> = <span class="i">$FingerprintsFilesInfo</span>{<span class="w">Reference</span>}{<span class="w">FileName</span>}<span class="sc">;</span> 1010 <span class="i">$DatabaseFileName</span> = <span class="i">$FingerprintsFilesInfo</span>{<span class="w">Database</span>}{<span class="w">FileName</span>}<span class="sc">;</span> 1011 1012 <span class="k">if</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">SDOutput</span>}<span class="s">)</span> <span class="s">{</span> 1013 <span class="k">if</span> <span class="s">(</span><span class="i">$SDOutFileName</span> =~ <span class="q">/^$ReferenceFileName$/i</span><span class="s">)</span> <span class="s">{</span> 1014 <span class="k">die</span> <span class="q">"Error: Same output, $SDOutFileName, and reference input file names.\nSpecify a different name using \"-r --root\" option or use default name.\n"</span><span class="sc">;</span> 1015 <span class="s">}</span> 1016 <span class="k">if</span> <span class="s">(</span><span class="i">$SDOutFileName</span> =~ <span class="q">/^$DatabaseFileName$/i</span><span class="s">)</span> <span class="s">{</span> 1017 <span class="k">die</span> <span class="q">"Error: Same output, $SDOutFileName, and database input file names.\nSpecify a different name using \"-r --root\" option or use default name.\n"</span><span class="sc">;</span> 1018 <span class="s">}</span> 1019 <span class="s">}</span> 1020 1021 <span class="k">if</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">TextOutput</span>}<span class="s">)</span> <span class="s">{</span> 1022 <span class="k">if</span> <span class="s">(</span><span class="i">$TextOutFileName</span> =~ <span class="q">/^$ReferenceFileName$/i</span><span class="s">)</span> <span class="s">{</span> 1023 <span class="k">die</span> <span class="q">"Error: Same output, $TextOutFileName, and reference input file names.\nSpecify a different name using \"-r --root\" option or use default name.\n"</span><span class="sc">;</span> 1024 <span class="s">}</span> 1025 <span class="k">if</span> <span class="s">(</span><span class="i">$TextOutFileName</span> =~ <span class="q">/^$DatabaseFileName$/i</span><span class="s">)</span> <span class="s">{</span> 1026 <span class="k">die</span> <span class="q">"Error: Same output, $TextOutFileName, and database input file names.\nSpecify a different name using \"-r --root\" option or use default name.\n"</span><span class="sc">;</span> 1027 <span class="s">}</span> 1028 <span class="s">}</span> 1029 1030 <span class="k">if</span> <span class="s">(</span>!<span class="i">$OptionsInfo</span>{<span class="w">OverwriteFiles</span>}<span class="s">)</span> <span class="s">{</span> 1031 <span class="k">if</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">SDOutput</span>}<span class="s">)</span> <span class="s">{</span> 1032 <span class="k">if</span> <span class="s">(</span><span class="k">-e</span> <span class="i">$SDOutFileName</span><span class="s">)</span> <span class="s">{</span> 1033 <span class="k">die</span> <span class="q">"Error: The output file $SDOutFileName already exists.\n"</span><span class="sc">;</span> 1034 <span class="s">}</span> 1035 <span class="s">}</span> 1036 <span class="k">if</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">TextOutput</span>}<span class="s">)</span> <span class="s">{</span> 1037 <span class="k">if</span> <span class="s">(</span><span class="k">-e</span> <span class="i">$TextOutFileName</span><span class="s">)</span> <span class="s">{</span> 1038 <span class="k">die</span> <span class="q">"Error: The output file $TextOutFileName already exists.\n"</span><span class="sc">;</span> 1039 <span class="s">}</span> 1040 <span class="s">}</span> 1041 <span class="s">}</span> 1042 1043 <span class="i">$OutputFilesInfo</span>{<span class="w">OutFileRoot</span>} = <span class="i">$OutFileRoot</span><span class="sc">;</span> 1044 <span class="i">$OutputFilesInfo</span>{<span class="w">SDOutFileName</span>} = <span class="i">$SDOutFileName</span><span class="sc">;</span> 1045 <span class="i">$OutputFilesInfo</span>{<span class="w">TextOutFileName</span>} = <span class="i">$TextOutFileName</span><span class="sc">;</span> 1046 1047 <span class="s">}</span> 1048 1049 <span class="c"># Process input fingerprints file names...</span> 1050 <span class="c">#</span> <a name="ProcessFingerprintsFileNames-"></a>1051 <span class="k">sub </span><span class="m">ProcessFingerprintsFileNames</span> <span class="s">{</span> 1052 <span class="i">@FingerprintsFilesList</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 1053 1054 <span class="k">if</span> <span class="s">(</span><span class="i">@ARGV</span> != <span class="n">2</span><span class="s">)</span> <span class="s">{</span> 1055 <span class="k">die</span> <span class="i">GetUsageFromPod</span><span class="s">(</span><span class="q">"$FindBin::Bin/$ScriptName"</span><span class="s">)</span><span class="sc">;</span> 1056 <span class="s">}</span> 1057 1058 <span class="c"># Reference fingerprints file name...</span> 1059 <span class="k">push</span> <span class="i">@FingerprintsFilesList</span><span class="cm">,</span> <span class="i">$ARGV</span>[<span class="n">0</span>]<span class="sc">;</span> 1060 1061 <span class="c"># Database fingerprints file name...</span> 1062 <span class="k">push</span> <span class="i">@FingerprintsFilesList</span><span class="cm">,</span> <span class="i">$ARGV</span>[<span class="n">1</span>]<span class="sc">;</span> 1063 1064 <span class="s">}</span> 1065 1066 <span class="c"># Process option values...</span> <a name="ProcessOptions-"></a>1067 <span class="k">sub </span><span class="m">ProcessOptions</span> <span class="s">{</span> 1068 <span class="i">%OptionsInfo</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 1069 1070 <span class="i">$OptionsInfo</span>{<span class="w">Mode</span>} = <span class="i">$Options</span>{<span class="w">mode</span>}<span class="sc">;</span> 1071 <span class="i">$OptionsInfo</span>{<span class="w">FingerprintsMode</span>} = <span class="i">$Options</span>{<span class="w">fingerprintsmode</span>}<span class="sc">;</span> 1072 1073 <span class="i">$OptionsInfo</span>{<span class="w">SearchMode</span>} = <span class="i">$Options</span>{<span class="w">searchmode</span>}<span class="sc">;</span> 1074 1075 <span class="i">ProcessBitVectorComparisonOptions</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> 1076 <span class="i">ProcessVectorComparisonOptions</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> 1077 1078 <span class="i">$OptionsInfo</span>{<span class="w">GroupFusionRule</span>} = <span class="i">$Options</span>{<span class="w">groupfusionrule</span>}<span class="sc">;</span> 1079 <span class="i">$OptionsInfo</span>{<span class="w">GroupFusionApplyCutoff</span>} = <span class="s">(</span><span class="i">$Options</span>{<span class="w">groupfusionapplycutoff</span>} =~ <span class="q">/^Yes$/i</span><span class="s">)</span> ? <span class="n">1</span> <span class="co">:</span> <span class="n">0</span><span class="sc">;</span><span class="sc">;</span> 1080 1081 <span class="i">$OptionsInfo</span>{<span class="w">SimilarCountMode</span>} = <span class="i">$Options</span>{<span class="w">similarcountmode</span>}<span class="sc">;</span> 1082 <span class="i">$OptionsInfo</span>{<span class="w">NumOfSimilarMolecules</span>} = <span class="i">$Options</span>{<span class="w">numofsimilarmolecules</span>}<span class="sc">;</span> 1083 <span class="i">$OptionsInfo</span>{<span class="w">PercentSimilarMolecules</span>} = <span class="i">$Options</span>{<span class="w">percentsimilarmolecules</span>}<span class="sc">;</span> 1084 1085 <span class="c"># Set MaxSimilarMolecules to NumOfSimilarMolecules. For PercentSimilar value of SimilarCountMode,</span> 1086 <span class="c"># it'll be overwritten using number of entries in database fingerprints file and value of PercentSimilarMolecules...</span> 1087 <span class="c">#</span> 1088 <span class="i">$OptionsInfo</span>{<span class="w">MaxSimilarMolecules</span>} = <span class="i">$OptionsInfo</span>{<span class="w">NumOfSimilarMolecules</span>}<span class="sc">;</span> 1089 1090 <span class="i">$OptionsInfo</span>{<span class="w">SimilarityCutoff</span>} = <span class="i">$Options</span>{<span class="w">similaritycutoff</span>}<span class="sc">;</span> 1091 <span class="i">$OptionsInfo</span>{<span class="w">DistanceCutoff</span>} = <span class="i">$Options</span>{<span class="w">distancecutoff</span>}<span class="sc">;</span> 1092 1093 <span class="i">$OptionsInfo</span>{<span class="w">kNN</span>} = <span class="i">$Options</span>{<span class="w">knn</span>}<span class="sc">;</span> 1094 <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">knn</span>} !~ <span class="q">/^All$/i</span><span class="s">)</span> <span class="s">{</span> 1095 <span class="k">if</span> <span class="s">(</span>!<span class="i">IsPositiveInteger</span><span class="s">(</span><span class="i">$Options</span>{<span class="w">knn</span>}<span class="s">)</span><span class="s">)</span> <span class="s">{</span> 1096 <span class="k">die</span> <span class="q">"Error: The value specified, $Options{knn}, for option \"-k, --KNN\" is not valid. Allowed values: > 0 \n"</span><span class="sc">;</span> 1097 <span class="s">}</span> 1098 <span class="s">}</span> 1099 1100 <span class="i">ProcessReferenceFingerprintsDataOptions</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> 1101 <span class="i">ProcessDatabaseFingerprintsDataOptions</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> 1102 1103 <span class="i">$OptionsInfo</span>{<span class="w">Detail</span>} = <span class="i">$Options</span>{<span class="w">detail</span>}<span class="sc">;</span> 1104 1105 <span class="i">$OptionsInfo</span>{<span class="w">InDelim</span>} = <span class="i">$Options</span>{<span class="w">indelim</span>}<span class="sc">;</span> 1106 <span class="i">$OptionsInfo</span>{<span class="w">OutDelim</span>} = <span class="s">(</span><span class="i">$Options</span>{<span class="w">outdelim</span>} =~ <span class="q">/tab/i</span> <span class="s">)</span> ? <span class="q">"\t"</span> <span class="co">:</span> <span class="s">(</span><span class="s">(</span><span class="i">$Options</span>{<span class="w">outdelim</span>} =~ <span class="q">/semicolon/i</span><span class="s">)</span> ? <span class="q">"\;"</span> <span class="co">:</span> <span class="q">"\,"</span><span class="s">)</span><span class="sc">;</span> 1107 <span class="i">$OptionsInfo</span>{<span class="w">OutQuote</span>} = <span class="s">(</span><span class="i">$Options</span>{<span class="w">quote</span>} =~ <span class="q">/^Yes$/i</span><span class="s">)</span> ? <span class="n">1</span> <span class="co">:</span> <span class="n">0</span><span class="sc">;</span> 1108 1109 <span class="i">$OptionsInfo</span>{<span class="w">Output</span>} = <span class="i">$Options</span>{<span class="w">output</span>}<span class="sc">;</span> 1110 <span class="i">$OptionsInfo</span>{<span class="w">SDOutput</span>} = <span class="s">(</span><span class="i">$Options</span>{<span class="w">output</span>} =~ <span class="q">/^(SD|Both)$/i</span><span class="s">)</span> ? <span class="n">1</span> <span class="co">:</span> <span class="n">0</span><span class="sc">;</span> 1111 <span class="i">$OptionsInfo</span>{<span class="w">TextOutput</span>} = <span class="s">(</span><span class="i">$Options</span>{<span class="w">output</span>} =~ <span class="q">/^(Text|Both)$/i</span><span class="s">)</span> ? <span class="n">1</span> <span class="co">:</span> <span class="n">0</span><span class="sc">;</span> 1112 1113 <span class="i">$OptionsInfo</span>{<span class="w">OverwriteFiles</span>} = <span class="i">$Options</span>{<span class="w">overwrite</span>} ? <span class="n">1</span> <span class="co">:</span> <span class="n">0</span><span class="sc">;</span> 1114 <span class="i">$OptionsInfo</span>{<span class="w">OutFileRoot</span>} = <span class="i">$Options</span>{<span class="w">root</span>} ? <span class="i">$Options</span>{<span class="w">root</span>} <span class="co">:</span> <span class="n">0</span><span class="sc">;</span> 1115 1116 <span class="i">$OptionsInfo</span>{<span class="w">Fast</span>} = <span class="i">$Options</span>{<span class="w">fast</span>} ? <span class="n">1</span> <span class="co">:</span> <span class="n">0</span><span class="sc">;</span> 1117 <span class="i">$OptionsInfo</span>{<span class="w">ValidateData</span>} = <span class="i">$Options</span>{<span class="w">fast</span>} ? <span class="n">0</span> <span class="co">:</span> <span class="n">1</span><span class="sc">;</span> 1118 1119 <span class="i">$OptionsInfo</span>{<span class="w">Precision</span>} = <span class="i">$Options</span>{<span class="w">precision</span>}<span class="sc">;</span> 1120 <span class="s">}</span> 1121 1122 <span class="c"># Process options related to comparion of bit vector strings...</span> 1123 <span class="c">#</span> <a name="ProcessBitVectorComparisonOptions-"></a>1124 <span class="k">sub </span><span class="m">ProcessBitVectorComparisonOptions</span> <span class="s">{</span> 1125 <span class="c"># Setup supported bit vector similarity coefficients for bit vector strings...</span> 1126 <span class="k">my</span><span class="s">(</span><span class="i">$ComparisonMeasure</span><span class="cm">,</span> <span class="i">$SupportedComparisonMeasure</span><span class="cm">,</span> <span class="i">@SupportedComparisonMeasures</span><span class="cm">,</span> <span class="i">%SupportedComparisonMeasuresNameMap</span><span class="cm">,</span> <span class="i">%SupportedComparisonMeasuresMethodMap</span><span class="s">)</span><span class="sc">;</span> 1127 1128 <span class="i">@SupportedComparisonMeasures</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 1129 <span class="i">%SupportedComparisonMeasuresNameMap</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 1130 <span class="i">%SupportedComparisonMeasuresMethodMap</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 1131 1132 <span class="k">for</span> <span class="i">$SupportedComparisonMeasure</span> <span class="s">(</span><span class="i">Fingerprints::FingerprintsBitVector::GetSupportedSimilarityCoefficients</span><span class="s">(</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> 1133 <span class="c"># Similarity coefficient function/method names contain "Coefficient" in their names.</span> 1134 <span class="c"># So take 'em out and setup a map to original function/method name...</span> 1135 <span class="i">$ComparisonMeasure</span> = <span class="i">$SupportedComparisonMeasure</span><span class="sc">;</span> 1136 <span class="i">$ComparisonMeasure</span> =~ <span class="q">s/Coefficient$//</span><span class="sc">;</span> 1137 1138 <span class="k">push</span> <span class="i">@SupportedComparisonMeasures</span><span class="cm">,</span> <span class="i">$ComparisonMeasure</span><span class="sc">;</span> 1139 <span class="i">$SupportedComparisonMeasuresNameMap</span>{<span class="k">lc</span><span class="s">(</span><span class="i">$ComparisonMeasure</span><span class="s">)</span>} = <span class="i">$ComparisonMeasure</span><span class="sc">;</span> 1140 <span class="i">$SupportedComparisonMeasuresMethodMap</span>{<span class="k">lc</span><span class="s">(</span><span class="i">$ComparisonMeasure</span><span class="s">)</span>} = <span class="i">$SupportedComparisonMeasure</span><span class="sc">;</span> 1141 <span class="s">}</span> 1142 1143 <span class="c"># Setup similarity coefficient to use for calculating similarity matrices for bit vector strings...</span> 1144 <span class="k">my</span><span class="s">(</span><span class="i">$SpecifiedMeasure</span><span class="cm">,</span> <span class="i">$SpecifiedComparisonMeasureName</span><span class="cm">,</span> <span class="i">$SpecifiedComparisonMeasureMethod</span><span class="s">)</span><span class="sc">;</span> 1145 1146 <span class="i">$SpecifiedComparisonMeasureName</span> = <span class="q">''</span><span class="sc">;</span> 1147 <span class="i">$SpecifiedComparisonMeasureMethod</span> = <span class="q">''</span><span class="sc">;</span> 1148 1149 <span class="i">$SpecifiedMeasure</span> = <span class="i">$Options</span>{<span class="w">bitvectorcomparisonmode</span>}<span class="sc">;</span> 1150 1151 <span class="k">if</span> <span class="s">(</span>! <span class="k">exists</span> <span class="i">$SupportedComparisonMeasuresMethodMap</span>{<span class="k">lc</span><span class="s">(</span><span class="i">$SpecifiedMeasure</span><span class="s">)</span>} <span class="s">)</span> <span class="s">{</span> 1152 <span class="k">die</span> <span class="q">"Error: The value specified, $SpecifiedMeasure, for option \"-b --BitVectorComparisonMode\" is not valid.\nAllowed values:"</span><span class="cm">,</span> <span class="i">JoinWords</span><span class="s">(</span>\<span class="i">@SupportedComparisonMeasures</span><span class="cm">,</span> <span class="q">", "</span><span class="cm">,</span> <span class="n">0</span><span class="s">)</span><span class="cm">,</span> <span class="q">"\n"</span><span class="sc">;</span> 1153 <span class="s">}</span> 1154 1155 <span class="i">$SpecifiedComparisonMeasureMethod</span> = <span class="i">$SupportedComparisonMeasuresMethodMap</span>{<span class="k">lc</span><span class="s">(</span><span class="i">$SpecifiedMeasure</span><span class="s">)</span>}<span class="sc">;</span> 1156 <span class="i">$SpecifiedComparisonMeasureName</span> = <span class="i">$SupportedComparisonMeasuresNameMap</span>{<span class="k">lc</span><span class="s">(</span><span class="i">$SpecifiedMeasure</span><span class="s">)</span>}<span class="sc">;</span> 1157 1158 <span class="i">$OptionsInfo</span>{<span class="w">BitVectorComparisonMode</span>} = <span class="i">$Options</span>{<span class="w">bitvectorcomparisonmode</span>}<span class="sc">;</span> 1159 1160 <span class="i">$OptionsInfo</span>{<span class="w">SpecifiedBitVectorComparisonMeasure</span>} = <span class="i">$SpecifiedMeasure</span><span class="sc">;</span> 1161 <span class="i">$OptionsInfo</span>{<span class="w">SpecifiedBitVectorComparisonMeasureName</span>} = <span class="i">$SpecifiedComparisonMeasureName</span><span class="sc">;</span> 1162 <span class="i">$OptionsInfo</span>{<span class="w">SpecifiedBitVectorComparisonMeasureMethod</span>} = <span class="i">$SpecifiedComparisonMeasureMethod</span><span class="sc">;</span> 1163 1164 <span class="c"># Make sure valid alpha parameter is specified for Tversky calculation...</span> 1165 <span class="i">$OptionsInfo</span>{<span class="w">Alpha</span>} = <span class="q">''</span><span class="sc">;</span> 1166 <span class="k">if</span> <span class="s">(</span><span class="i">$SpecifiedMeasure</span> =~ <span class="q">/^(TverskySimilarity|WeightedTverskySimilarity)$/i</span><span class="s">)</span> <span class="s">{</span> 1167 <span class="k">if</span> <span class="s">(</span><span class="i">IsEmpty</span><span class="s">(</span><span class="i">$Options</span>{<span class="w">alpha</span>}<span class="s">)</span><span class="s">)</span> <span class="s">{</span> 1168 <span class="k">die</span> <span class="q">"Error: You must specify a value for \"-a, --alpha\" option in \"TverskySimilarity or WeightedTverskySimilarity\" \"-m --mode\". \n"</span><span class="sc">;</span> 1169 <span class="s">}</span> 1170 <span class="k">my</span><span class="s">(</span><span class="i">$Alpha</span><span class="s">)</span><span class="sc">;</span> 1171 <span class="i">$Alpha</span> = <span class="i">$Options</span>{<span class="w">alpha</span>}<span class="sc">;</span> 1172 <span class="k">if</span> <span class="s">(</span>!<span class="s">(</span><span class="i">IsFloat</span><span class="s">(</span><span class="i">$Alpha</span><span class="s">)</span> && <span class="i">$Alpha</span> >=<span class="n">0</span> && <span class="i">$Alpha</span> <= <span class="n">1</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> 1173 <span class="k">die</span> <span class="q">"Error: The value specified, $Options{alpha}, for option \"-a, --alpha\" is not valid. Allowed values: >= 0 and <= 1\n"</span><span class="sc">;</span> 1174 <span class="s">}</span> 1175 <span class="i">$OptionsInfo</span>{<span class="w">Alpha</span>} = <span class="i">$Alpha</span><span class="sc">;</span> 1176 <span class="s">}</span> 1177 1178 <span class="c"># Make sure valid beta parameter is specified for WeightedTanimoto and WeightedTversky</span> 1179 <span class="c"># calculations...</span> 1180 <span class="i">$OptionsInfo</span>{<span class="w">Beta</span>} = <span class="q">''</span><span class="sc">;</span> 1181 <span class="k">if</span> <span class="s">(</span><span class="i">$SpecifiedMeasure</span> =~ <span class="q">/^(WeightedTverskySimilarity|WeightedTanimotoSimilarity)$/i</span><span class="s">)</span> <span class="s">{</span> 1182 <span class="k">if</span> <span class="s">(</span><span class="i">IsEmpty</span><span class="s">(</span><span class="i">$Options</span>{<span class="w">beta</span>}<span class="s">)</span><span class="s">)</span> <span class="s">{</span> 1183 <span class="k">die</span> <span class="q">"Error: You must specify a value for \"-b, --beta\" option in \"WeightedTverskySimilarity or WeightedTanimotoSimilarity\" \"-m --mode\". \n"</span><span class="sc">;</span> 1184 <span class="s">}</span> 1185 <span class="k">my</span><span class="s">(</span><span class="i">$Beta</span><span class="s">)</span><span class="sc">;</span> 1186 <span class="i">$Beta</span> = <span class="i">$Options</span>{<span class="w">beta</span>}<span class="sc">;</span> 1187 <span class="k">if</span> <span class="s">(</span>!<span class="s">(</span><span class="i">IsFloat</span><span class="s">(</span><span class="i">$Beta</span><span class="s">)</span> && <span class="i">$Beta</span> >=<span class="n">0</span> && <span class="i">$Beta</span> <= <span class="n">1</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> 1188 <span class="k">die</span> <span class="q">"Error: The value specified, $Options{beta}, for option \"-b, --beta\" is not valid. Allowed values: >= 0 and <= 1\n"</span><span class="sc">;</span> 1189 <span class="s">}</span> 1190 <span class="i">$OptionsInfo</span>{<span class="w">Beta</span>} = <span class="i">$Beta</span><span class="sc">;</span> 1191 <span class="s">}</span> 1192 <span class="s">}</span> 1193 1194 <span class="c"># Process options related to comparion of vector strings...</span> 1195 <span class="c">#</span> <a name="ProcessVectorComparisonOptions-"></a>1196 <span class="k">sub </span><span class="m">ProcessVectorComparisonOptions</span> <span class="s">{</span> 1197 <span class="c"># Setup specified similarity coefficients for vector strings..</span> 1198 <span class="k">my</span><span class="s">(</span><span class="i">$ComparisonMeasure</span><span class="cm">,</span> <span class="i">$SupportedComparisonMeasure</span><span class="cm">,</span> <span class="i">@SupportedComparisonMeasures</span><span class="cm">,</span> <span class="i">%SupportedComparisonMeasuresNameMap</span><span class="cm">,</span> <span class="i">%SupportedComparisonMeasuresMethodMap</span><span class="s">)</span><span class="sc">;</span> 1199 1200 <span class="i">@SupportedComparisonMeasures</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 1201 <span class="i">%SupportedComparisonMeasuresNameMap</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 1202 <span class="i">%SupportedComparisonMeasuresMethodMap</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 1203 <span class="k">for</span> <span class="i">$SupportedComparisonMeasure</span> <span class="s">(</span><span class="i">Fingerprints::FingerprintsVector::GetSupportedDistanceAndSimilarityCoefficients</span><span class="s">(</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> 1204 <span class="c"># Similarity and distance coefficient function/method names contain "Coefficient" in their names.</span> 1205 <span class="c"># So take 'em out and setup a map to original function/method name...</span> 1206 <span class="i">$ComparisonMeasure</span> = <span class="i">$SupportedComparisonMeasure</span><span class="sc">;</span> 1207 <span class="k">if</span> <span class="s">(</span><span class="i">$ComparisonMeasure</span> =~ <span class="q">/Coefficient$/i</span><span class="s">)</span> <span class="s">{</span> 1208 <span class="i">$ComparisonMeasure</span> =~ <span class="q">s/Coefficient$//i</span><span class="sc">;</span> 1209 <span class="s">}</span> 1210 <span class="k">push</span> <span class="i">@SupportedComparisonMeasures</span><span class="cm">,</span> <span class="i">$ComparisonMeasure</span><span class="sc">;</span> 1211 <span class="i">$SupportedComparisonMeasuresNameMap</span>{<span class="k">lc</span><span class="s">(</span><span class="i">$ComparisonMeasure</span><span class="s">)</span>} = <span class="i">$ComparisonMeasure</span><span class="sc">;</span> 1212 <span class="i">$SupportedComparisonMeasuresMethodMap</span>{<span class="k">lc</span><span class="s">(</span><span class="i">$ComparisonMeasure</span><span class="s">)</span>} = <span class="i">$SupportedComparisonMeasure</span><span class="sc">;</span> 1213 <span class="s">}</span> 1214 1215 <span class="c"># Setup a list of similarity coefficients to use for calculating similarity matrices for bit vector strings...</span> 1216 <span class="k">my</span><span class="s">(</span><span class="i">$SpecifiedMeasure</span><span class="cm">,</span> <span class="i">$SpecifiedComparisonMeasureName</span><span class="cm">,</span> <span class="i">$SpecifiedComparisonMeasureMethod</span><span class="s">)</span><span class="sc">;</span> 1217 1218 <span class="i">$SpecifiedComparisonMeasureName</span> = <span class="q">''</span><span class="sc">;</span> 1219 <span class="i">$SpecifiedComparisonMeasureMethod</span> = <span class="q">''</span><span class="sc">;</span> 1220 1221 <span class="i">$SpecifiedMeasure</span> = <span class="i">$Options</span>{<span class="w">vectorcomparisonmode</span>}<span class="sc">;</span> 1222 <span class="i">$SpecifiedMeasure</span> =~ <span class="q">s/ //g</span><span class="sc">;</span> 1223 1224 <span class="k">if</span> <span class="s">(</span>! <span class="k">exists</span><span class="s">(</span><span class="i">$SupportedComparisonMeasuresMethodMap</span>{<span class="k">lc</span><span class="s">(</span><span class="i">$SpecifiedMeasure</span><span class="s">)</span>}<span class="s">)</span><span class="s">)</span> <span class="s">{</span> 1225 <span class="k">die</span> <span class="q">"Error: The value specified, $SpecifiedMeasure, for option \"-v --VectorComparisonMode\" is not valid.\nAllowed values:"</span><span class="cm">,</span> <span class="i">JoinWords</span><span class="s">(</span>\<span class="i">@SupportedComparisonMeasures</span><span class="cm">,</span> <span class="q">", "</span><span class="cm">,</span> <span class="n">0</span><span class="s">)</span><span class="cm">,</span> <span class="q">"\n"</span><span class="sc">;</span> 1226 <span class="s">}</span> 1227 1228 <span class="i">$SpecifiedComparisonMeasureMethod</span> = <span class="i">$SupportedComparisonMeasuresMethodMap</span>{<span class="k">lc</span><span class="s">(</span><span class="i">$SpecifiedMeasure</span><span class="s">)</span>}<span class="sc">;</span> 1229 <span class="i">$SpecifiedComparisonMeasureName</span> = <span class="i">$SupportedComparisonMeasuresNameMap</span>{<span class="k">lc</span><span class="s">(</span><span class="i">$SpecifiedMeasure</span><span class="s">)</span>}<span class="sc">;</span> 1230 1231 <span class="i">$OptionsInfo</span>{<span class="w">VectorComparisonMode</span>} = <span class="i">$Options</span>{<span class="w">vectorcomparisonmode</span>}<span class="sc">;</span> 1232 1233 <span class="i">$OptionsInfo</span>{<span class="w">SpecifiedVectorComparisonMeasure</span>} = <span class="i">$SpecifiedMeasure</span><span class="sc">;</span> 1234 <span class="i">$OptionsInfo</span>{<span class="w">SpecifiedVectorComparisonMeasuresName</span>} = <span class="i">$SpecifiedComparisonMeasureName</span><span class="sc">;</span> 1235 <span class="i">$OptionsInfo</span>{<span class="w">SpecifiedVectorComparisonMeasuresMethod</span>} = <span class="i">$SpecifiedComparisonMeasureMethod</span><span class="sc">;</span> 1236 1237 <span class="c"># Setup specified vector comparison calculation modes...</span> 1238 <span class="k">my</span><span class="s">(</span><span class="i">$SpecifiedFormulism</span><span class="s">)</span><span class="sc">;</span> 1239 1240 <span class="i">$SpecifiedFormulism</span> = <span class="i">$Options</span>{<span class="w">vectorcomparisonformulism</span>}<span class="sc">;</span> 1241 <span class="i">$SpecifiedFormulism</span> =~ <span class="q">s/ //g</span><span class="sc">;</span> 1242 <span class="k">if</span> <span class="s">(</span><span class="i">$SpecifiedFormulism</span> !~ <span class="q">/^(AlgebraicForm|BinaryForm|SetTheoreticForm)$/i</span><span class="s">)</span> <span class="s">{</span> 1243 <span class="k">die</span> <span class="q">"Error: The value specified, $SpecifiedFormulism, for option \"--VectorComparisonFormulism\" is not valid. Allowed values: AlgebraicForm, BinaryForm or SetTheoreticForm\n"</span><span class="sc">;</span> 1244 <span class="s">}</span> 1245 1246 <span class="i">$OptionsInfo</span>{<span class="w">VectorComparisonFormulism</span>} = <span class="i">$Options</span>{<span class="w">vectorcomparisonformulism</span>}<span class="sc">;</span> 1247 <span class="i">$OptionsInfo</span>{<span class="w">SpecifiedVectorComparisonMode</span>} = <span class="i">$SpecifiedFormulism</span><span class="sc">;</span> 1248 1249 <span class="s">}</span> 1250 1251 <span class="c"># Process options related to data retrieval from reference fingerprints SD and CSV/TSV</span> 1252 <span class="c"># text files...</span> 1253 <span class="c">#</span> <a name="ProcessReferenceFingerprintsDataOptions-"></a>1254 <span class="k">sub </span><span class="m">ProcessReferenceFingerprintsDataOptions</span> <span class="s">{</span> 1255 1256 <span class="i">$OptionsInfo</span>{<span class="w">ReferenceCompoundIDPrefix</span>} = <span class="i">$Options</span>{<span class="w">referencecompoundidprefix</span>} ? <span class="i">$Options</span>{<span class="w">referencecompoundidprefix</span>} <span class="co">:</span> <span class="q">'Cmpd'</span><span class="sc">;</span> 1257 1258 <span class="c"># Compound ID and fingerprints column options for text files...</span> 1259 1260 <span class="i">$OptionsInfo</span>{<span class="w">ReferenceColMode</span>} = <span class="i">$Options</span>{<span class="w">referencecolmode</span>}<span class="sc">;</span> 1261 1262 <span class="k">if</span> <span class="s">(</span><span class="i">IsNotEmpty</span><span class="s">(</span><span class="i">$Options</span>{<span class="w">referencecompoundidcol</span>}<span class="s">)</span><span class="s">)</span> <span class="s">{</span> 1263 <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">referencecolmode</span>} =~ <span class="q">/^ColNum$/i</span><span class="s">)</span> <span class="s">{</span> 1264 <span class="k">if</span> <span class="s">(</span>!<span class="i">IsPositiveInteger</span><span class="s">(</span><span class="i">$Options</span>{<span class="w">referencecompoundidcol</span>}<span class="s">)</span><span class="s">)</span> <span class="s">{</span> 1265 <span class="k">die</span> <span class="q">"Error: Column value, $Options{referencecompoundidcol}, specified using \"--ReferenceCompoundIDCol\" is not valid: Allowed integer values: > 0\n"</span><span class="sc">;</span> 1266 <span class="s">}</span> 1267 <span class="s">}</span> 1268 <span class="i">$OptionsInfo</span>{<span class="w">ReferenceCompoundIDCol</span>} = <span class="i">$Options</span>{<span class="w">referencecompoundidcol</span>}<span class="sc">;</span> 1269 <span class="s">}</span> 1270 <span class="k">else</span> <span class="s">{</span> 1271 <span class="i">$OptionsInfo</span>{<span class="w">ReferenceCompoundIDCol</span>} = <span class="q">'AutoDetect'</span><span class="sc">;</span> 1272 <span class="s">}</span> 1273 1274 <span class="k">if</span> <span class="s">(</span><span class="i">IsNotEmpty</span><span class="s">(</span><span class="i">$Options</span>{<span class="w">referencefingerprintscol</span>}<span class="s">)</span><span class="s">)</span> <span class="s">{</span> 1275 <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">referencecolmode</span>} =~ <span class="q">/^ColNum$/i</span><span class="s">)</span> <span class="s">{</span> 1276 <span class="k">if</span> <span class="s">(</span>!<span class="i">IsPositiveInteger</span><span class="s">(</span><span class="i">$Options</span>{<span class="w">referencefingerprintscol</span>}<span class="s">)</span><span class="s">)</span> <span class="s">{</span> 1277 <span class="k">die</span> <span class="q">"Error: Column value, $Options{referencefingerprintscol}, specified using \"--ReferenceFingerprintsCol\" is not valid: Allowed integer values: > 0\n"</span><span class="sc">;</span> 1278 <span class="s">}</span> 1279 <span class="s">}</span> 1280 <span class="i">$OptionsInfo</span>{<span class="w">ReferenceFingerprintsCol</span>} = <span class="i">$Options</span>{<span class="w">referencefingerprintscol</span>}<span class="sc">;</span> 1281 <span class="s">}</span> 1282 <span class="k">else</span> <span class="s">{</span> 1283 <span class="i">$OptionsInfo</span>{<span class="w">ReferenceFingerprintsCol</span>} = <span class="q">'AutoDetect'</span><span class="sc">;</span> 1284 <span class="s">}</span> 1285 1286 <span class="k">if</span> <span class="s">(</span><span class="i">IsNotEmpty</span><span class="s">(</span><span class="i">$Options</span>{<span class="w">referencecompoundidcol</span>}<span class="s">)</span> && <span class="i">IsNotEmpty</span><span class="s">(</span><span class="i">$Options</span>{<span class="w">referencefingerprintscol</span>}<span class="s">)</span><span class="s">)</span> <span class="s">{</span> 1287 <span class="k">if</span> <span class="s">(</span><span class="i">IsPositiveInteger</span><span class="s">(</span><span class="i">$Options</span>{<span class="w">referencecompoundidcol</span>}<span class="s">)</span> && <span class="i">IsPositiveInteger</span><span class="s">(</span><span class="i">$Options</span>{<span class="w">referencefingerprintscol</span>}<span class="s">)</span><span class="s">)</span> <span class="s">{</span> 1288 <span class="k">if</span> <span class="s">(</span><span class="s">(</span><span class="i">$Options</span>{<span class="w">referencecompoundidcol</span>} == <span class="i">$Options</span>{<span class="w">referencefingerprintscol</span>}<span class="s">)</span><span class="s">)</span> <span class="s">{</span> 1289 <span class="k">die</span> <span class="q">"Error: Values specified using \"--ReferenceCompoundIDCol\" and \"--ReferenceFingerprintsCol\", $Options{referencecompoundidcol}, must be different.\n"</span><span class="sc">;</span> 1290 <span class="s">}</span> 1291 <span class="s">}</span> 1292 <span class="k">else</span> <span class="s">{</span> 1293 <span class="k">if</span> <span class="s">(</span><span class="s">(</span><span class="i">$Options</span>{<span class="w">referencecompoundidcol</span>} <span class="k">eq</span> <span class="i">$Options</span>{<span class="w">referencefingerprintscol</span>}<span class="s">)</span><span class="s">)</span> <span class="s">{</span> 1294 <span class="k">die</span> <span class="q">"Error: Values specified using \"--ReferenceCompoundIDCol\" and \"--ReferenceFingerprintsCol\", $Options{referencecompoundidcol}, must be different.\n"</span><span class="sc">;</span> 1295 <span class="s">}</span> 1296 <span class="s">}</span> 1297 <span class="s">}</span> 1298 1299 <span class="c"># Compound ID and fingerprints field options for SD files...</span> 1300 1301 <span class="i">$OptionsInfo</span>{<span class="w">ReferenceCompoundIDMode</span>} = <span class="i">$Options</span>{<span class="w">referencecompoundidmode</span>}<span class="sc">;</span> 1302 <span class="i">$OptionsInfo</span>{<span class="w">ReferenceCompoundIDField</span>} = <span class="q">''</span><span class="sc">;</span> 1303 1304 <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">referencecompoundidmode</span>} =~ <span class="q">/^DataField$/i</span> && !<span class="i">$Options</span>{<span class="w">referencecompoundidfield</span>}<span class="s">)</span> <span class="s">{</span> 1305 <span class="k">die</span> <span class="q">"Error: You must specify a value for \"--ReferenceCompoundIDField\" option in \"DataField\" \"--ReferenceCompoundIDMode\". \n"</span><span class="sc">;</span> 1306 <span class="s">}</span> 1307 <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">referencecompoundidfield</span>}<span class="s">)</span> <span class="s">{</span> 1308 <span class="i">$OptionsInfo</span>{<span class="w">ReferenceCompoundIDField</span>} = <span class="i">$Options</span>{<span class="w">referencecompoundidfield</span>}<span class="sc">;</span> 1309 <span class="s">}</span> 1310 1311 <span class="k">if</span> <span class="s">(</span><span class="i">IsNotEmpty</span><span class="s">(</span><span class="i">$Options</span>{<span class="w">referencefingerprintsfield</span>}<span class="s">)</span><span class="s">)</span> <span class="s">{</span> 1312 <span class="i">$OptionsInfo</span>{<span class="w">ReferenceFingerprintsField</span>} = <span class="i">$Options</span>{<span class="w">referencefingerprintsfield</span>}<span class="sc">;</span> 1313 <span class="s">}</span> 1314 <span class="k">else</span> <span class="s">{</span> 1315 <span class="i">$OptionsInfo</span>{<span class="w">ReferenceFingerprintsField</span>} = <span class="q">'AutoDetect'</span><span class="sc">;</span> 1316 <span class="s">}</span> 1317 1318 <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">referencecompoundidfield</span>} && <span class="i">IsNotEmpty</span><span class="s">(</span><span class="i">$Options</span>{<span class="w">referencefingerprintsfield</span>}<span class="s">)</span><span class="s">)</span> <span class="s">{</span> 1319 <span class="k">if</span> <span class="s">(</span><span class="s">(</span><span class="i">$Options</span>{<span class="w">referencecompoundidfield</span>} <span class="k">eq</span> <span class="i">$Options</span>{<span class="w">referencefingerprintsfield</span>}<span class="s">)</span><span class="s">)</span> <span class="s">{</span> 1320 <span class="k">die</span> <span class="q">"Error: Values specified using \"--ReferenceCompoundIDField\" and \"--ReferenceFingerprintsfield\", $Options{referencecompoundidfield}, must be different.\n"</span><span class="sc">;</span> 1321 <span class="s">}</span> 1322 <span class="s">}</span> 1323 1324 <span class="s">}</span> 1325 1326 <span class="c"># Process options related to data retrieval from database fingerprints SD and CSV/TSV</span> 1327 <span class="c"># text files...</span> 1328 <span class="c">#</span> <a name="ProcessDatabaseFingerprintsDataOptions-"></a>1329 <span class="k">sub </span><span class="m">ProcessDatabaseFingerprintsDataOptions</span> <span class="s">{</span> 1330 1331 <span class="i">$OptionsInfo</span>{<span class="w">DatabaseCompoundIDPrefix</span>} = <span class="i">$Options</span>{<span class="w">databasecompoundidprefix</span>} ? <span class="i">$Options</span>{<span class="w">databasecompoundidprefix</span>} <span class="co">:</span> <span class="q">'Cmpd'</span><span class="sc">;</span> 1332 1333 <span class="c"># Compound ID and fingerprints column options for text files...</span> 1334 1335 <span class="i">$OptionsInfo</span>{<span class="w">DatabaseColMode</span>} = <span class="i">$Options</span>{<span class="w">databasecolmode</span>}<span class="sc">;</span> 1336 1337 <span class="k">if</span> <span class="s">(</span><span class="i">IsNotEmpty</span><span class="s">(</span><span class="i">$Options</span>{<span class="w">databasecompoundidcol</span>}<span class="s">)</span><span class="s">)</span> <span class="s">{</span> 1338 <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">databasecolmode</span>} =~ <span class="q">/^ColNum$/i</span><span class="s">)</span> <span class="s">{</span> 1339 <span class="k">if</span> <span class="s">(</span>!<span class="i">IsPositiveInteger</span><span class="s">(</span><span class="i">$Options</span>{<span class="w">databasecompoundidcol</span>}<span class="s">)</span><span class="s">)</span> <span class="s">{</span> 1340 <span class="k">die</span> <span class="q">"Error: Column value, $Options{databasecompoundidcol}, specified using \"--DatabaseCompoundIDCol\" is not valid: Allowed integer values: > 0\n"</span><span class="sc">;</span> 1341 <span class="s">}</span> 1342 <span class="s">}</span> 1343 <span class="i">$OptionsInfo</span>{<span class="w">DatabaseCompoundIDCol</span>} = <span class="i">$Options</span>{<span class="w">databasecompoundidcol</span>}<span class="sc">;</span> 1344 <span class="s">}</span> 1345 <span class="k">else</span> <span class="s">{</span> 1346 <span class="i">$OptionsInfo</span>{<span class="w">DatabaseCompoundIDCol</span>} = <span class="q">'AutoDetect'</span><span class="sc">;</span> 1347 <span class="s">}</span> 1348 1349 <span class="k">if</span> <span class="s">(</span><span class="i">IsNotEmpty</span><span class="s">(</span><span class="i">$Options</span>{<span class="w">databasefingerprintscol</span>}<span class="s">)</span><span class="s">)</span> <span class="s">{</span> 1350 <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">databasecolmode</span>} =~ <span class="q">/^ColNum$/i</span><span class="s">)</span> <span class="s">{</span> 1351 <span class="k">if</span> <span class="s">(</span>!<span class="i">IsPositiveInteger</span><span class="s">(</span><span class="i">$Options</span>{<span class="w">databasefingerprintscol</span>}<span class="s">)</span><span class="s">)</span> <span class="s">{</span> 1352 <span class="k">die</span> <span class="q">"Error: Column value, $Options{databasefingerprintscol}, specified using \"--DatabaseFingerprintsCol\" is not valid: Allowed integer values: > 0\n"</span><span class="sc">;</span> 1353 <span class="s">}</span> 1354 <span class="s">}</span> 1355 <span class="i">$OptionsInfo</span>{<span class="w">DatabaseFingerprintsCol</span>} = <span class="i">$Options</span>{<span class="w">databasefingerprintscol</span>}<span class="sc">;</span> 1356 <span class="s">}</span> 1357 <span class="k">else</span> <span class="s">{</span> 1358 <span class="i">$OptionsInfo</span>{<span class="w">DatabaseFingerprintsCol</span>} = <span class="q">'AutoDetect'</span><span class="sc">;</span> 1359 <span class="s">}</span> 1360 1361 <span class="k">if</span> <span class="s">(</span><span class="i">IsNotEmpty</span><span class="s">(</span><span class="i">$Options</span>{<span class="w">databasecompoundidcol</span>}<span class="s">)</span> && <span class="i">IsNotEmpty</span><span class="s">(</span><span class="i">$Options</span>{<span class="w">databasefingerprintscol</span>}<span class="s">)</span><span class="s">)</span> <span class="s">{</span> 1362 <span class="k">if</span> <span class="s">(</span><span class="i">IsPositiveInteger</span><span class="s">(</span><span class="i">$Options</span>{<span class="w">databasecompoundidcol</span>}<span class="s">)</span> && <span class="i">IsPositiveInteger</span><span class="s">(</span><span class="i">$Options</span>{<span class="w">databasefingerprintscol</span>}<span class="s">)</span><span class="s">)</span> <span class="s">{</span> 1363 <span class="k">if</span> <span class="s">(</span><span class="s">(</span><span class="i">$Options</span>{<span class="w">databasecompoundidcol</span>} == <span class="i">$Options</span>{<span class="w">databasefingerprintscol</span>}<span class="s">)</span><span class="s">)</span> <span class="s">{</span> 1364 <span class="k">die</span> <span class="q">"Error: Values specified using \"--DatabaseCompoundIDCol\" and \"--DatabaseFingerprintsCol\", $Options{databasecompoundidcol}, must be different.\n"</span><span class="sc">;</span> 1365 <span class="s">}</span> 1366 <span class="s">}</span> 1367 <span class="k">else</span> <span class="s">{</span> 1368 <span class="k">if</span> <span class="s">(</span><span class="s">(</span><span class="i">$Options</span>{<span class="w">databasecompoundidcol</span>} <span class="k">eq</span> <span class="i">$Options</span>{<span class="w">databasefingerprintscol</span>}<span class="s">)</span><span class="s">)</span> <span class="s">{</span> 1369 <span class="k">die</span> <span class="q">"Error: Values specified using \"--DatabaseCompoundIDCol\" and \"--DatabaseFingerprintsCol\", $Options{databasecompoundidcol}, must be different.\n"</span><span class="sc">;</span> 1370 <span class="s">}</span> 1371 <span class="s">}</span> 1372 <span class="s">}</span> 1373 1374 <span class="c"># Database data column options for text files...</span> 1375 1376 <span class="i">$OptionsInfo</span>{<span class="w">DatabaseDataColsMode</span>} = <span class="i">$Options</span>{<span class="w">databasedatacolsmode</span>}<span class="sc">;</span> 1377 <span class="i">$OptionsInfo</span>{<span class="w">DatabaseDataCols</span>} = <span class="q">''</span><span class="sc">;</span> 1378 <span class="i">@</span>{<span class="i">$OptionsInfo</span>{<span class="w">SpecifiedDatabaseDataCols</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 1379 1380 <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">databasedatacolsmode</span>} =~ <span class="q">/^Specify$/i</span><span class="s">)</span> <span class="s">{</span> 1381 <span class="k">my</span><span class="s">(</span><span class="i">$DatabaseDataCols</span><span class="cm">,</span> <span class="i">$DatabaseColNum</span><span class="cm">,</span> <span class="i">@SpecifiedDataCols</span><span class="s">)</span><span class="sc">;</span> 1382 1383 <span class="k">if</span> <span class="s">(</span>!<span class="i">$Options</span>{<span class="w">databasedatacols</span>}<span class="s">)</span> <span class="s">{</span> 1384 <span class="k">die</span> <span class="q">"Error: You must specify a value for \"--DatabaseDataCols\" option in \"Specify\" \"--DatabaseDataColsMode\". \n"</span><span class="sc">;</span> 1385 <span class="s">}</span> 1386 <span class="i">$DatabaseDataCols</span> = <span class="i">$Options</span>{<span class="w">databasedatacols</span>}<span class="sc">;</span> 1387 1388 <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">databasecolmode</span>} =~ <span class="q">/^ColNum$/i</span><span class="s">)</span> <span class="s">{</span> 1389 <span class="i">$DatabaseDataCols</span> =~ <span class="q">s/ //g</span><span class="sc">;</span> 1390 <span class="i">@SpecifiedDataCols</span> = <span class="k">split</span> <span class="q">/\,/</span><span class="cm">,</span> <span class="i">$DatabaseDataCols</span><span class="sc">;</span> 1391 <span class="k">for</span> <span class="i">$DatabaseColNum</span> <span class="s">(</span><span class="i">@SpecifiedDataCols</span><span class="s">)</span> <span class="s">{</span> 1392 <span class="k">if</span> <span class="s">(</span>!<span class="i">IsPositiveInteger</span><span class="s">(</span><span class="i">$DatabaseColNum</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> 1393 <span class="k">die</span> <span class="q">"Error: Column value, $DatabaseColNum, specified using \"--DatabaseDataCols\" is not valid: Allowed integer values: > 0\n"</span><span class="sc">;</span> 1394 <span class="s">}</span> 1395 <span class="s">}</span> 1396 <span class="s">}</span> 1397 <span class="k">else</span> <span class="s">{</span> 1398 <span class="i">@SpecifiedDataCols</span> = <span class="k">split</span> <span class="q">/\,/</span><span class="cm">,</span> <span class="i">$DatabaseDataCols</span><span class="sc">;</span> 1399 <span class="s">}</span> 1400 <span class="i">$OptionsInfo</span>{<span class="w">DatabaseDataCols</span>} = <span class="i">$DatabaseDataCols</span><span class="sc">;</span> 1401 <span class="k">push</span> <span class="i">@</span>{<span class="i">$OptionsInfo</span>{<span class="w">SpecifiedDatabaseDataCols</span>}}<span class="cm">,</span> <span class="i">@SpecifiedDataCols</span><span class="sc">;</span> 1402 <span class="s">}</span> 1403 <span class="k">elsif</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">databasedatacolsmode</span>} =~ <span class="q">/^All$/i</span><span class="s">)</span> <span class="s">{</span> 1404 <span class="i">$OptionsInfo</span>{<span class="w">DatabaseDataCols</span>} = <span class="q">'All'</span><span class="sc">;</span> 1405 <span class="s">}</span> 1406 1407 <span class="k">if</span> <span class="s">(</span><span class="i">$OptionsInfo</span>{<span class="w">DatabaseDataColsMode</span>} =~ <span class="q">/^Specify$/i</span> && !<span class="i">$OptionsInfo</span>{<span class="w">DatabaseDataCols</span>}<span class="s">)</span> <span class="s">{</span> 1408 <span class="k">die</span> <span class="q">"Error: You must specify a value for \"--DatabaseDataCols\" option in \"Specify\" \"--DatabaseDataColsMode\". \n"</span><span class="sc">;</span> 1409 <span class="s">}</span> 1410 1411 <span class="c"># Compound ID and fingerprints field options for SD files...</span> 1412 1413 <span class="i">$OptionsInfo</span>{<span class="w">DatabaseCompoundIDMode</span>} = <span class="i">$Options</span>{<span class="w">databasecompoundidmode</span>}<span class="sc">;</span> 1414 <span class="i">$OptionsInfo</span>{<span class="w">DatabaseCompoundIDField</span>} = <span class="i">$Options</span>{<span class="w">databasecompoundidfield</span>} ? <span class="i">$Options</span>{<span class="w">databasecompoundidfield</span>} <span class="co">:</span> <span class="q">''</span><span class="sc">;</span> 1415 1416 <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">databasecompoundidmode</span>} =~ <span class="q">/^DataField$/i</span><span class="s">)</span> <span class="s">{</span> 1417 <span class="k">if</span> <span class="s">(</span>!<span class="i">$Options</span>{<span class="w">databasecompoundidfield</span>}<span class="s">)</span> <span class="s">{</span> 1418 <span class="k">die</span> <span class="q">"Error: You must specify a value for \"--DatabaseCompoundIDField\" option in \"DataField\" \"--DatabaseCompoundIDMode\". \n"</span><span class="sc">;</span> 1419 <span class="s">}</span> 1420 <span class="i">$OptionsInfo</span>{<span class="w">DatabaseCompoundIDField</span>} = <span class="i">$Options</span>{<span class="w">databasecompoundidfield</span>}<span class="sc">;</span> 1421 <span class="s">}</span> 1422 1423 1424 <span class="k">if</span> <span class="s">(</span><span class="i">IsNotEmpty</span><span class="s">(</span><span class="i">$Options</span>{<span class="w">databasefingerprintsfield</span>}<span class="s">)</span><span class="s">)</span> <span class="s">{</span> 1425 <span class="i">$OptionsInfo</span>{<span class="w">DatabaseFingerprintsField</span>} = <span class="i">$Options</span>{<span class="w">databasefingerprintsfield</span>}<span class="sc">;</span> 1426 <span class="s">}</span> 1427 <span class="k">else</span> <span class="s">{</span> 1428 <span class="i">$OptionsInfo</span>{<span class="w">DatabaseFingerprintsField</span>} = <span class="q">'AutoDetect'</span><span class="sc">;</span> 1429 <span class="s">}</span> 1430 1431 <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">databasecompoundidfield</span>} && <span class="i">IsNotEmpty</span><span class="s">(</span><span class="i">$Options</span>{<span class="w">databasefingerprintsfield</span>}<span class="s">)</span><span class="s">)</span> <span class="s">{</span> 1432 <span class="k">if</span> <span class="s">(</span><span class="s">(</span><span class="i">$Options</span>{<span class="w">databasecompoundidfield</span>} <span class="k">eq</span> <span class="i">$Options</span>{<span class="w">databasefingerprintsfield</span>}<span class="s">)</span><span class="s">)</span> <span class="s">{</span> 1433 <span class="k">die</span> <span class="q">"Error: Values specified using \"--DatabaseCompoundIDField\" and \"--DatabaseFingerprintsfield\", $Options{databasecompoundidfield}, must be different.\n"</span><span class="sc">;</span> 1434 <span class="s">}</span> 1435 <span class="s">}</span> 1436 1437 <span class="c"># Database data field options for SD files...</span> 1438 1439 <span class="i">$OptionsInfo</span>{<span class="w">DatabaseDataFieldsMode</span>} = <span class="i">$Options</span>{<span class="w">databasedatafieldsmode</span>}<span class="sc">;</span> 1440 <span class="i">$OptionsInfo</span>{<span class="w">DatabaseDataFields</span>} = <span class="q">''</span><span class="sc">;</span> 1441 <span class="i">@</span>{<span class="i">$OptionsInfo</span>{<span class="w">SpecifiedDatabaseDataFields</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 1442 1443 <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">databasedatafieldsmode</span>} =~ <span class="q">/^Specify$/i</span> && !<span class="i">$Options</span>{<span class="w">databasedatafields</span>}<span class="s">)</span> <span class="s">{</span> 1444 <span class="k">die</span> <span class="q">"Error: You must specify a value for \"--DatabaseDataFields\" option in \"Specify\" \"--DatabaseDataFieldsMode\". \n"</span><span class="sc">;</span> 1445 <span class="s">}</span> 1446 <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">databasedatafields</span>}<span class="s">)</span> <span class="s">{</span> 1447 <span class="k">my</span><span class="s">(</span><span class="i">@SpecifiedDataFields</span><span class="s">)</span><span class="sc">;</span> 1448 <span class="i">$OptionsInfo</span>{<span class="w">DatabaseDataFields</span>} = <span class="i">$Options</span>{<span class="w">databasedatafields</span>}<span class="sc">;</span> 1449 1450 <span class="i">@SpecifiedDataFields</span> = <span class="k">split</span> <span class="q">/\,/</span><span class="cm">,</span> <span class="i">$Options</span>{<span class="w">databasedatafields</span>}<span class="sc">;</span> 1451 <span class="k">push</span> <span class="i">@</span>{<span class="i">$OptionsInfo</span>{<span class="w">SpecifiedDatabaseDataFields</span>}}<span class="cm">,</span> <span class="i">@SpecifiedDataFields</span><span class="sc">;</span> 1452 <span class="s">}</span> 1453 <span class="s">}</span> 1454 1455 <span class="c"># Setup script usage and retrieve command line arguments specified using various options...</span> <a name="SetupScriptUsage-"></a>1456 <span class="k">sub </span><span class="m">SetupScriptUsage</span> <span class="s">{</span> 1457 1458 <span class="c"># Retrieve all the options...</span> 1459 <span class="i">%Options</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> 1460 1461 <span class="i">$Options</span>{<span class="w">alpha</span>} = <span class="n">0.5</span><span class="sc">;</span> 1462 <span class="i">$Options</span>{<span class="w">beta</span>} = <span class="n">1</span><span class="sc">;</span> 1463 1464 <span class="i">$Options</span>{<span class="w">bitvectorcomparisonmode</span>} = <span class="q">"TanimotoSimilarity"</span><span class="sc">;</span> 1465 1466 <span class="i">$Options</span>{<span class="w">databasecolmode</span>} = <span class="q">'colnum'</span><span class="sc">;</span> 1467 1468 <span class="i">$Options</span>{<span class="w">databasecompoundidprefix</span>} = <span class="q">'Cmpd'</span><span class="sc">;</span> 1469 <span class="i">$Options</span>{<span class="w">databasecompoundidmode</span>} = <span class="q">'LabelPrefix'</span><span class="sc">;</span> 1470 1471 <span class="i">$Options</span>{<span class="w">databasedatacolsmode</span>} = <span class="q">'CompoundID'</span><span class="sc">;</span> 1472 <span class="i">$Options</span>{<span class="w">databasedatafieldsmode</span>} = <span class="q">'CompoundID'</span><span class="sc">;</span> 1473 1474 <span class="i">$Options</span>{<span class="w">distancecutoff</span>} = <span class="n">10</span><span class="sc">;</span> 1475 1476 <span class="i">$Options</span>{<span class="w">referencecolmode</span>} = <span class="q">'colnum'</span><span class="sc">;</span> 1477 1478 <span class="i">$Options</span>{<span class="w">referencecompoundidprefix</span>} = <span class="q">'Cmpd'</span><span class="sc">;</span> 1479 <span class="i">$Options</span>{<span class="w">referencecompoundidmode</span>} = <span class="q">'LabelPrefix'</span><span class="sc">;</span> 1480 1481 <span class="i">$Options</span>{<span class="w">detail</span>} = <span class="n">1</span><span class="sc">;</span> 1482 1483 <span class="i">$Options</span>{<span class="w">fingerprintsmode</span>} = <span class="q">'AutoDetect'</span><span class="sc">;</span> 1484 <span class="i">$Options</span>{<span class="w">groupfusionrule</span>} = <span class="q">'Max'</span><span class="sc">;</span> 1485 <span class="i">$Options</span>{<span class="w">groupfusionapplycutoff</span>} = <span class="q">'Yes'</span><span class="sc">;</span> 1486 1487 <span class="i">$Options</span>{<span class="w">knn</span>} = <span class="q">'All'</span><span class="sc">;</span> 1488 1489 <span class="i">$Options</span>{<span class="w">mode</span>} = <span class="q">'MultipleReferences'</span><span class="sc">;</span> 1490 1491 <span class="i">$Options</span>{<span class="w">numofsimilarmolecules</span>} = <span class="n">10</span><span class="sc">;</span> 1492 <span class="i">$Options</span>{<span class="w">percentsimilarmolecules</span>} = <span class="n">1</span><span class="sc">;</span> 1493 1494 <span class="i">$Options</span>{<span class="w">indelim</span>} = <span class="q">'comma'</span><span class="sc">;</span> 1495 <span class="i">$Options</span>{<span class="w">outdelim</span>} = <span class="q">'comma'</span><span class="sc">;</span> 1496 <span class="i">$Options</span>{<span class="w">quote</span>} = <span class="q">'yes'</span><span class="sc">;</span> 1497 1498 <span class="i">$Options</span>{<span class="w">output</span>} = <span class="q">'text'</span><span class="sc">;</span> 1499 1500 <span class="i">$Options</span>{<span class="w">precision</span>} = <span class="n">2</span><span class="sc">;</span> 1501 1502 <span class="i">$Options</span>{<span class="w">searchmode</span>} = <span class="q">'SimilaritySearch'</span><span class="sc">;</span> 1503 1504 <span class="i">$Options</span>{<span class="w">similarcountmode</span>} = <span class="q">'NumOfSimilar'</span><span class="sc">;</span> 1505 1506 <span class="i">$Options</span>{<span class="w">similaritycutoff</span>} = <span class="n">0.75</span><span class="sc">;</span> 1507 1508 <span class="i">$Options</span>{<span class="w">vectorcomparisonmode</span>} = <span class="q">'TanimotoSimilarity'</span><span class="sc">;</span> 1509 <span class="i">$Options</span>{<span class="w">vectorcomparisonformulism</span>} = <span class="q">'AlgebraicForm'</span><span class="sc">;</span> 1510 1511 <span class="k">if</span> <span class="s">(</span>!<span class="i">GetOptions</span><span class="s">(</span>\<span class="i">%Options</span><span class="cm">,</span> <span class="q">"alpha=f"</span><span class="cm">,</span> <span class="q">"beta=f"</span><span class="cm">,</span> <span class="q">"bitvectorcomparisonmode|b=s"</span><span class="cm">,</span> <span class="q">"databasecolmode=s"</span><span class="cm">,</span> <span class="q">"databasecompoundidcol=s"</span><span class="cm">,</span> <span class="q">"databasecompoundidprefix=s"</span><span class="cm">,</span> <span class="q">"databasecompoundidfield=s"</span><span class="cm">,</span> <span class="q">"databasecompoundidmode=s"</span><span class="cm">,</span> <span class="q">"databasedatacols=s"</span><span class="cm">,</span> <span class="q">"databasedatacolsmode=s"</span><span class="cm">,</span> <span class="q">"databasedatafields=s"</span><span class="cm">,</span> <span class="q">"databasedatafieldsmode=s"</span><span class="cm">,</span> <span class="q">"databasefingerprintscol=s"</span><span class="cm">,</span> <span class="q">"databasefingerprintsfield=s"</span><span class="cm">,</span> <span class="q">"distancecutoff=f"</span><span class="cm">,</span> <span class="q">"detail|d=i"</span><span class="cm">,</span> <span class="q">"fast|f"</span><span class="cm">,</span> <span class="q">"fingerprintsmode=s"</span><span class="cm">,</span> <span class="q">"groupfusionrule|g=s"</span><span class="cm">,</span> <span class="cm">,</span> <span class="q">"groupfusionapplycutoff=s"</span><span class="cm">,</span> <span class="q">"help|h"</span><span class="cm">,</span> <span class="q">"indelim=s"</span><span class="cm">,</span> <span class="q">"knn|k=s"</span><span class="cm">,</span> <span class="q">"mode|m=s"</span><span class="cm">,</span> <span class="q">"numofsimilarmolecules|n=i"</span><span class="cm">,</span> <span class="q">"outdelim=s"</span><span class="cm">,</span> <span class="q">"output=s"</span><span class="cm">,</span> <span class="q">"overwrite|o"</span><span class="cm">,</span> <span class="q">"percentsimilarmolecules|p=f"</span><span class="cm">,</span> <span class="q">"precision=s"</span><span class="cm">,</span> <span class="q">"quote|q=s"</span><span class="cm">,</span> <span class="q">"referencecolmode=s"</span><span class="cm">,</span> <span class="q">"referencecompoundidcol=s"</span><span class="cm">,</span> <span class="q">"referencecompoundidprefix=s"</span><span class="cm">,</span> <span class="q">"referencecompoundidfield=s"</span><span class="cm">,</span> <span class="q">"referencecompoundidmode=s"</span><span class="cm">,</span> <span class="q">"referencefingerprintscol=s"</span><span class="cm">,</span> <span class="q">"referencefingerprintsfield=s"</span><span class="cm">,</span> <span class="q">"root|r=s"</span><span class="cm">,</span> <span class="q">"searchmode|s=s"</span><span class="cm">,</span> <span class="q">"similarcountmode=s"</span><span class="cm">,</span> <span class="q">"similaritycutoff=f"</span><span class="cm">,</span> <span class="q">"vectorcomparisonmode|v=s"</span><span class="cm">,</span> <span class="q">"vectorcomparisonformulism=s"</span><span class="cm">,</span> <span class="q">"workingdir|w=s"</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> 1512 <span class="k">die</span> <span class="q">"\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n"</span><span class="sc">;</span> 1513 <span class="s">}</span> 1514 <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">workingdir</span>}<span class="s">)</span> <span class="s">{</span> 1515 <span class="k">if</span> <span class="s">(</span>! <span class="k">-d</span> <span class="i">$Options</span>{<span class="w">workingdir</span>}<span class="s">)</span> <span class="s">{</span> 1516 <span class="k">die</span> <span class="q">"Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n"</span><span class="sc">;</span> 1517 <span class="s">}</span> 1518 <span class="k">chdir</span> <span class="i">$Options</span>{<span class="w">workingdir</span>} <span class="k">or</span> <span class="k">die</span> <span class="q">"Error: Couldn't chdir $Options{workingdir}: $! \n"</span><span class="sc">;</span> 1519 <span class="s">}</span> 1520 <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">databasecolmode</span>} !~ <span class="q">/^(ColNum|ColLabel)$/i</span><span class="s">)</span> <span class="s">{</span> 1521 <span class="k">die</span> <span class="q">"Error: The value specified, $Options{databasecolmode}, for option \"--DatabaseColMode\" is not valid. Allowed values: ColNum, or ColLabel\n"</span><span class="sc">;</span> 1522 <span class="s">}</span> 1523 <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">databasecompoundidmode</span>} !~ <span class="q">/^(DataField|MolName|LabelPrefix|MolNameOrLabelPrefix)$/i</span><span class="s">)</span> <span class="s">{</span> 1524 <span class="k">die</span> <span class="q">"Error: The value specified, $Options{databasecompoundidmode}, for option \"--DatabaseCompoundIDMode\" is not valid. Allowed values: DataField, MolName, LabelPrefix or MolNameOrLabelPrefix\n"</span><span class="sc">;</span> 1525 <span class="s">}</span> 1526 <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">databasedatacolsmode</span>} !~ <span class="q">/^(All|Specify|CompoundID)$/i</span><span class="s">)</span> <span class="s">{</span> 1527 <span class="k">die</span> <span class="q">"Error: The value specified, $Options{databasedatacolsmode}, for option \"--DatabaseDataColsMode\" is not valid. Allowed values: All, Specify, or CompoundID\n"</span><span class="sc">;</span> 1528 <span class="s">}</span> 1529 <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">databasedatafieldsmode</span>} !~ <span class="q">/^(All|Common|Specify|CompoundID)$/i</span><span class="s">)</span> <span class="s">{</span> 1530 <span class="k">die</span> <span class="q">"Error: The value specified, $Options{databasedatafieldsmode}, for option \"--DatabaseDataFieldsMode\" is not valid. Allowed values: All, Common, Specify, or CompoundID\n"</span><span class="sc">;</span> 1531 <span class="s">}</span> 1532 <span class="k">if</span> <span class="s">(</span>!<span class="i">IsPositiveInteger</span><span class="s">(</span><span class="i">$Options</span>{<span class="w">detail</span>}<span class="s">)</span><span class="s">)</span> <span class="s">{</span> 1533 <span class="k">die</span> <span class="q">"Error: The value specified, $Options{detail}, for option \"-d, --detail\" is not valid. Allowed values: > 0 \n"</span><span class="sc">;</span> 1534 <span class="s">}</span> 1535 <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">fingerprintsmode</span>} !~ <span class="q">/^(AutoDetect|FingerprintsBitVectorString|FingerprintsVectorString)$/i</span><span class="s">)</span> <span class="s">{</span> 1536 <span class="k">die</span> <span class="q">"Error: The value specified, $Options{fingerprintsmode}, for option \"--FingerprintsMode\" is not valid. Allowed values: AutoDetect, FingerprintsBitVectorString or FingerprintsVectorString \n"</span><span class="sc">;</span> 1537 <span class="s">}</span> 1538 <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">groupfusionrule</span>} !~ <span class="q">/^(Max|Min|Mean|Median|Sum|Euclidean)$/i</span><span class="s">)</span> <span class="s">{</span> 1539 <span class="k">die</span> <span class="q">"Error: The value specified, $Options{groupfusionrule}, for option \"-g, --GroupFusionRule\" is not valid. Allowed values: Max, Min, Mean, Median, Sum, Euclidean\n"</span><span class="sc">;</span> 1540 <span class="s">}</span> 1541 <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">groupfusionapplycutoff</span>} !~ <span class="q">/^(Yes|No)$/i</span><span class="s">)</span> <span class="s">{</span> 1542 <span class="k">die</span> <span class="q">"Error: The value specified, $Options{quote}, for option \"--GroupFusionApplyCutoff\" is not valid. Allowed values: Yes or No\n"</span><span class="sc">;</span> 1543 <span class="s">}</span> 1544 <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">indelim</span>} !~ <span class="q">/^(comma|semicolon)$/i</span><span class="s">)</span> <span class="s">{</span> 1545 <span class="k">die</span> <span class="q">"Error: The value specified, $Options{indelim}, for option \"--InDelim\" is not valid. Allowed values: comma, or semicolon\n"</span><span class="sc">;</span> 1546 <span class="s">}</span> 1547 <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">mode</span>} !~ <span class="q">/^(IndividualReference|MultipleReferences)$/i</span><span class="s">)</span> <span class="s">{</span> 1548 <span class="k">die</span> <span class="q">"Error: The value specified, $Options{mode}, for option \"-m, --mode\" is not valid. Allowed values: IndividualReference, MultipleReferences\n"</span><span class="sc">;</span> 1549 <span class="s">}</span> 1550 <span class="k">if</span> <span class="s">(</span>!<span class="i">IsPositiveInteger</span><span class="s">(</span><span class="i">$Options</span>{<span class="w">numofsimilarmolecules</span>}<span class="s">)</span><span class="s">)</span> <span class="s">{</span> 1551 <span class="k">die</span> <span class="q">"Error: The value specified, $Options{numofsimilarmolecules}, for option \"-n, --NumOfSimilarMolecules\" is not valid. Allowed values: > 0 \n"</span><span class="sc">;</span> 1552 <span class="s">}</span> 1553 <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">outdelim</span>} !~ <span class="q">/^(comma|semicolon|tab)$/i</span><span class="s">)</span> <span class="s">{</span> 1554 <span class="k">die</span> <span class="q">"Error: The value specified, $Options{outdelim}, for option \"--OutDelim\" is not valid. Allowed values: comma, tab, or semicolon\n"</span><span class="sc">;</span> 1555 <span class="s">}</span> 1556 <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">output</span>} !~ <span class="q">/^(SD|text|both)$/i</span><span class="s">)</span> <span class="s">{</span> 1557 <span class="k">die</span> <span class="q">"Error: The value specified, $Options{output}, for option \"--output\" is not valid. Allowed values: SD, text, or both\n"</span><span class="sc">;</span> 1558 <span class="s">}</span> 1559 <span class="k">if</span> <span class="s">(</span>!<span class="s">(</span><span class="i">IsFloat</span><span class="s">(</span><span class="i">$Options</span>{<span class="w">percentsimilarmolecules</span>}<span class="s">)</span> && <span class="i">$Options</span>{<span class="w">percentsimilarmolecules</span>} > <span class="n">0</span> && <span class="i">$Options</span>{<span class="w">percentsimilarmolecules</span>} <= <span class="n">100</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> 1560 <span class="k">die</span> <span class="q">"Error: The value specified, $Options{percentsimilarmolecules}, for option \"-p, --PercentSimilarMolecules\" is not valid. Allowed values: > 0 and <= 100 \n"</span><span class="sc">;</span> 1561 <span class="s">}</span> 1562 <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">quote</span>} !~ <span class="q">/^(Yes|No)$/i</span><span class="s">)</span> <span class="s">{</span> 1563 <span class="k">die</span> <span class="q">"Error: The value specified, $Options{quote}, for option \"-q --quote\" is not valid. Allowed values: Yes or No\n"</span><span class="sc">;</span> 1564 <span class="s">}</span> 1565 <span class="k">if</span> <span class="s">(</span>!<span class="i">IsPositiveInteger</span><span class="s">(</span><span class="i">$Options</span>{<span class="w">precision</span>}<span class="s">)</span><span class="s">)</span> <span class="s">{</span> 1566 <span class="k">die</span> <span class="q">"Error: The value specified, $Options{precision}, for option \"--precision\" is not valid. Allowed values: > 0 \n"</span><span class="sc">;</span> 1567 <span class="s">}</span> 1568 <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">referencecolmode</span>} !~ <span class="q">/^(ColNum|ColLabel)$/i</span><span class="s">)</span> <span class="s">{</span> 1569 <span class="k">die</span> <span class="q">"Error: The value specified, $Options{referencecolmode}, for option \"--ReferenceColMode\" is not valid. Allowed values: ColNum, or ColLabel\n"</span><span class="sc">;</span> 1570 <span class="s">}</span> 1571 <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">referencecompoundidmode</span>} !~ <span class="q">/^(DataField|MolName|LabelPrefix|MolNameOrLabelPrefix)$/i</span><span class="s">)</span> <span class="s">{</span> 1572 <span class="k">die</span> <span class="q">"Error: The value specified, $Options{referencecompoundidmode}, for option \"--ReferenceCompoundIDMode\" is not valid. Allowed values: DataField, MolName, LabelPrefix or MolNameOrLabelPrefix\n"</span><span class="sc">;</span> 1573 <span class="s">}</span> 1574 <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">searchmode</span>} !~ <span class="q">/^(SimilaritySearch|DissimilaritySearch)$/i</span><span class="s">)</span> <span class="s">{</span> 1575 <span class="k">die</span> <span class="q">"Error: The value specified, $Options{searchmode}, for option \"-s, --SearchMode\" is not valid. Allowed values: SimilaritySearch, DissimilaritySearch \n"</span><span class="sc">;</span> 1576 <span class="s">}</span> 1577 <span class="k">if</span> <span class="s">(</span><span class="i">$Options</span>{<span class="w">similarcountmode</span>} !~ <span class="q">/^(NumOfSimilar|PercentSimilar)$/i</span><span class="s">)</span> <span class="s">{</span> 1578 <span class="k">die</span> <span class="q">"Error: The value specified, $Options{similarcountmode}, for option \"--SimilarCountMode\" is not valid. Allowed values: NumOfSimilar, PercentSimilar \n"</span><span class="sc">;</span> 1579 <span class="s">}</span> 1580 <span class="s">}</span> 1581 <a name="EOF-"></a></pre> <p> </p> <br /> <center> <img src="../../../images/h2o2.png"> </center> </body> </html>