diff mayachemtools/docs/modules/html/code/FingerprintsFileUtil.html @ 0:73ae111cf86f draft

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 11:55:01 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mayachemtools/docs/modules/html/code/FingerprintsFileUtil.html	Wed Jan 20 11:55:01 2016 -0500
@@ -0,0 +1,198 @@
+<html>
+<head>
+<title>MayaChemTools:Code:Fingerprints::FingerprintsFileUtil.pm</title>
+<meta http-equiv="content-type" content="text/html;charset=utf-8">
+<link rel="stylesheet" type="text/css" href="../../../css/MayaChemToolsCode.css">
+</head>
+<body leftmargin="20" rightmargin="20" topmargin="10" bottommargin="10">
+<br/>
+<center>
+<a href="http://www.mayachemtools.org" title="MayaChemTools Home"><img src="../../../images/MayaChemToolsLogo.gif" border="0" alt="MayaChemTools"></a>
+</center>
+<br/>
+<pre>
+<a name="package-Fingerprints::FingerprintsFileUtil-"></a>   1 <span class="k">package </span><span class="i">Fingerprints::FingerprintsFileUtil</span><span class="sc">;</span>
+   2 <span class="c">#</span>
+   3 <span class="c"># $RCSfile: FingerprintsFileUtil.pm,v $</span>
+   4 <span class="c"># $Date: 2015/02/28 20:48:54 $</span>
+   5 <span class="c"># $Revision: 1.14 $</span>
+   6 <span class="c">#</span>
+   7 <span class="c"># Author: Manish Sud &lt;msud@san.rr.com&gt;</span>
+   8 <span class="c">#</span>
+   9 <span class="c"># Copyright (C) 2015 Manish Sud. All rights reserved.</span>
+  10 <span class="c">#</span>
+  11 <span class="c"># This file is part of MayaChemTools.</span>
+  12 <span class="c">#</span>
+  13 <span class="c"># MayaChemTools is free software; you can redistribute it and/or modify it under</span>
+  14 <span class="c"># the terms of the GNU Lesser General Public License as published by the Free</span>
+  15 <span class="c"># Software Foundation; either version 3 of the License, or (at your option) any</span>
+  16 <span class="c"># later version.</span>
+  17 <span class="c">#</span>
+  18 <span class="c"># MayaChemTools is distributed in the hope that it will be useful, but without</span>
+  19 <span class="c"># any warranty; without even the implied warranty of merchantability of fitness</span>
+  20 <span class="c"># for a particular purpose.  See the GNU Lesser General Public License for more</span>
+  21 <span class="c"># details.</span>
+  22 <span class="c">#</span>
+  23 <span class="c"># You should have received a copy of the GNU Lesser General Public License</span>
+  24 <span class="c"># along with MayaChemTools; if not, see &lt;http://www.gnu.org/licenses/&gt; or</span>
+  25 <span class="c"># write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,</span>
+  26 <span class="c"># Boston, MA, 02111-1307, USA.</span>
+  27 <span class="c">#</span>
+  28 
+  29 <span class="k">use</span> <span class="w">strict</span><span class="sc">;</span>
+  30 <span class="k">use</span> <span class="w">Exporter</span><span class="sc">;</span>
+  31 <span class="k">use</span> <span class="w">Carp</span><span class="sc">;</span>
+  32 <span class="k">use</span> <span class="w">TextUtil</span> <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
+  33 <span class="k">use</span> <span class="w">FileUtil</span> <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
+  34 <span class="k">use</span> <span class="w">FileIO::FingerprintsSDFileIO</span><span class="sc">;</span>
+  35 <span class="k">use</span> <span class="w">FileIO::FingerprintsTextFileIO</span><span class="sc">;</span>
+  36 <span class="k">use</span> <span class="w">FileIO::FingerprintsFPFileIO</span><span class="sc">;</span>
+  37 
+  38 <span class="k">use</span> <span class="w">vars</span> <span class="q">qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS)</span><span class="sc">;</span>
+  39 
+  40 <span class="i">@ISA</span> = <span class="q">qw(Exporter)</span><span class="sc">;</span>
+  41 <span class="i">@EXPORT</span> = <span class="q">qw()</span><span class="sc">;</span>
+  42 <span class="i">@EXPORT_OK</span> = <span class="q">qw(GetFingerprintsFileType ReadAndProcessFingerpritsData  NewFingerprintsFileIO)</span><span class="sc">;</span>
+  43 
+  44 <span class="i">%EXPORT_TAGS</span> = <span class="s">(</span><span class="w">all</span>  <span class="cm">=&gt;</span> <span class="s">[</span><span class="i">@EXPORT</span><span class="cm">,</span> <span class="i">@EXPORT_OK</span><span class="s">]</span><span class="s">)</span><span class="sc">;</span>
+  45 
+  46 <span class="c"># Generate new FingerprintsFileIO object for a SD, FP or Text fingerprints file specified using file name</span>
+  47 <span class="c"># along other appropriate parameters...</span>
+  48 <span class="c">#</span>
+<a name="NewFingerprintsFileIO-"></a>  49 <span class="k">sub </span><span class="m">NewFingerprintsFileIO</span> <span class="s">{</span>
+  50   <span class="k">my</span><span class="s">(</span><span class="i">%FingerprintsFileIOParams</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
+  51   <span class="k">my</span><span class="s">(</span><span class="i">$FingerprintsFileIO</span><span class="cm">,</span> <span class="i">$FileType</span><span class="s">)</span><span class="sc">;</span>
+  52 
+  53   <span class="k">if</span> <span class="s">(</span>!<span class="s">(</span><span class="k">exists</span><span class="s">(</span><span class="i">$FingerprintsFileIOParams</span>{<span class="w">Name</span>}<span class="s">)</span> &amp;&amp; <span class="i">TextUtil::IsNotEmpty</span><span class="s">(</span><span class="i">$FingerprintsFileIOParams</span>{<span class="w">Name</span>}<span class="s">)</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span>
+  54     <span class="w">carp</span> <span class="q">&quot;Warning: Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO: Can&#39;t create new FingerprintsFileIO object: File name is not specified...\n&quot;</span><span class="sc">;</span>
+  55     <span class="k">return</span> <span class="k">undef</span><span class="sc">;</span>
+  56   <span class="s">}</span>
+  57 
+  58   <span class="k">if</span> <span class="s">(</span>!<span class="s">(</span><span class="k">exists</span><span class="s">(</span><span class="i">$FingerprintsFileIOParams</span>{<span class="w">Mode</span>}<span class="s">)</span> &amp;&amp; <span class="i">TextUtil::IsNotEmpty</span><span class="s">(</span><span class="i">$FingerprintsFileIOParams</span>{<span class="w">Mode</span>}<span class="s">)</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span>
+  59     <span class="w">carp</span> <span class="q">&quot;Warning: Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO: Can&#39;t create new FingerprintsFileIO object: File mode is not specified...\n&quot;</span><span class="sc">;</span>
+  60     <span class="k">return</span> <span class="k">undef</span><span class="sc">;</span>
+  61   <span class="s">}</span>
+  62 
+  63   <span class="i">$FileType</span> = <span class="i">GetFingerprintsFileType</span><span class="s">(</span><span class="i">$FingerprintsFileIOParams</span>{<span class="w">Name</span>}<span class="s">)</span><span class="sc">;</span>
+  64   <span class="k">if</span> <span class="s">(</span><span class="i">TextUtil::IsEmpty</span><span class="s">(</span><span class="i">$FileType</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span>
+  65     <span class="w">carp</span> <span class="q">&quot;Warning: Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO: Can&#39;t create new FingerprintsFileIO object: File type is not specified...\n&quot;</span><span class="sc">;</span>
+  66     <span class="k">return</span> <span class="k">undef</span><span class="sc">;</span>
+  67   <span class="s">}</span>
+  68 
+  69   <span class="c"># Generate fingerprints IO object...</span>
+  70   <span class="j">FILETYPE:</span> <span class="s">{</span>
+  71     <span class="k">if</span> <span class="s">(</span><span class="i">$FileType</span> =~ <span class="q">/^SD$/i</span><span class="s">)</span> <span class="s">{</span>
+  72       <span class="i">$FingerprintsFileIO</span> = <span class="w">new</span> <span class="i">FileIO::FingerprintsSDFileIO</span><span class="s">(</span><span class="i">%FingerprintsFileIOParams</span><span class="s">)</span><span class="sc">;</span>
+  73       <span class="k">last</span> <span class="j">FILETYPE</span><span class="sc">;</span>
+  74     <span class="s">}</span>
+  75     <span class="k">if</span> <span class="s">(</span><span class="i">$FileType</span> =~ <span class="q">/^FP$/i</span><span class="s">)</span> <span class="s">{</span>
+  76       <span class="i">$FingerprintsFileIO</span> = <span class="w">new</span> <span class="i">FileIO::FingerprintsFPFileIO</span><span class="s">(</span><span class="i">%FingerprintsFileIOParams</span><span class="s">)</span><span class="sc">;</span>
+  77       <span class="k">last</span> <span class="j">FILETYPE</span><span class="sc">;</span>
+  78     <span class="s">}</span>
+  79     <span class="k">if</span> <span class="s">(</span><span class="i">$FileType</span> =~ <span class="q">/^Text$/i</span><span class="s">)</span> <span class="s">{</span>
+  80       <span class="i">$FingerprintsFileIO</span> = <span class="w">new</span> <span class="i">FileIO::FingerprintsTextFileIO</span><span class="s">(</span><span class="i">%FingerprintsFileIOParams</span><span class="s">)</span><span class="sc">;</span>
+  81       <span class="k">last</span> <span class="j">FILETYPE</span><span class="sc">;</span>
+  82     <span class="s">}</span>
+  83     <span class="i">$FingerprintsFileIO</span> = <span class="k">undef</span><span class="sc">;</span>
+  84     <span class="w">carp</span> <span class="q">&quot;Warning: Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO: Fingerprints file type, $FileType, is not valid. Supported file types: SD, FP or Text\n&quot;</span><span class="sc">;</span>
+  85   <span class="s">}</span>
+  86 
+  87   <span class="k">return</span> <span class="i">$FingerprintsFileIO</span><span class="sc">;</span>
+  88 <span class="s">}</span>
+  89 
+  90 <span class="c"># Get fingerpritns file type from fingerprints file name...</span>
+  91 <span class="c">#</span>
+<a name="GetFingerprintsFileType-"></a>  92 <span class="k">sub </span><span class="m">GetFingerprintsFileType</span> <span class="s">{</span>
+  93   <span class="k">my</span><span class="s">(</span><span class="i">$FileName</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
+  94   <span class="k">my</span><span class="s">(</span><span class="i">$FileType</span><span class="s">)</span><span class="sc">;</span>
+  95 
+  96   <span class="i">$FileType</span> = <span class="q">&#39;&#39;</span><span class="sc">;</span>
+  97   <span class="j">FILETYPE:</span> <span class="s">{</span>
+  98     <span class="k">if</span> <span class="s">(</span><span class="i">FileUtil::CheckFileType</span><span class="s">(</span><span class="i">$FileName</span><span class="cm">,</span> <span class="q">&quot;sdf sd&quot;</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span>
+  99       <span class="i">$FileType</span> = <span class="q">&#39;SD&#39;</span><span class="sc">;</span>
+ 100       <span class="k">last</span> <span class="j">FILETYPE</span><span class="sc">;</span>
+ 101     <span class="s">}</span>
+ 102     <span class="k">if</span> <span class="s">(</span><span class="i">FileUtil::CheckFileType</span><span class="s">(</span><span class="i">$FileName</span><span class="cm">,</span> <span class="q">&quot;fpf fp&quot;</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span>
+ 103       <span class="i">$FileType</span> = <span class="q">&#39;FP&#39;</span><span class="sc">;</span>
+ 104       <span class="k">last</span> <span class="j">FILETYPE</span><span class="sc">;</span>
+ 105     <span class="s">}</span>
+ 106     <span class="k">if</span> <span class="s">(</span><span class="i">FileUtil::CheckFileType</span><span class="s">(</span><span class="i">$FileName</span><span class="cm">,</span> <span class="q">&quot;csv tsv&quot;</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span>
+ 107       <span class="i">$FileType</span> = <span class="q">&#39;Text&#39;</span><span class="sc">;</span>
+ 108       <span class="k">last</span> <span class="j">FILETYPE</span><span class="sc">;</span>
+ 109     <span class="s">}</span>
+ 110     <span class="i">$FileType</span> = <span class="q">&#39;&#39;</span><span class="sc">;</span>
+ 111     <span class="w">carp</span> <span class="q">&quot;Warning: Fingerprints::FingerprintsFileUtil::GetFingerprintsFileType: Can&#39;t determine fingerprints file type for $FileName: It&#39;s not a fingerprints file...\n&quot;</span><span class="sc">;</span>
+ 112   <span class="s">}</span>
+ 113 
+ 114   <span class="k">return</span> <span class="i">$FileType</span><span class="sc">;</span>
+ 115 <span class="s">}</span>
+ 116 
+ 117 
+ 118 <span class="c"># Process fingerprints bit-vector and vector string data in a file using FingerprintsFileIO</span>
+ 119 <span class="c"># object and return a references to arrays of CompoundIDs and FingerprintsObjects...</span>
+ 120 <span class="c">#</span>
+ 121 <span class="c"># Note:</span>
+ 122 <span class="c">#  . The file open and close is automatically performed during processing.</span>
+ 123 <span class="c">#</span>
+<a name="ReadAndProcessFingerpritsData-"></a> 124 <span class="k">sub </span><span class="m">ReadAndProcessFingerpritsData</span> <span class="s">{</span>
+ 125   <span class="k">my</span><span class="s">(</span><span class="i">$FingerprintsFileIO</span><span class="cm">,</span> <span class="i">$CheckCompoundIDs</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
+ 126   <span class="k">my</span><span class="s">(</span><span class="i">$CompoundID</span><span class="cm">,</span> <span class="i">$FingerprintsCount</span><span class="cm">,</span> <span class="i">$IgnoredFingerprintsCount</span><span class="cm">,</span> <span class="i">@CompundIDs</span><span class="cm">,</span> <span class="i">@FingerprintsObjects</span><span class="cm">,</span> <span class="i">%UniqueCompoundIDs</span><span class="s">)</span><span class="sc">;</span>
+ 127 
+ 128   <span class="k">if</span> <span class="s">(</span>!<span class="i">$FingerprintsFileIO</span><span class="s">)</span> <span class="s">{</span>
+ 129     <span class="k">return</span> <span class="s">(</span><span class="k">undef</span><span class="cm">,</span> <span class="k">undef</span><span class="s">)</span><span class="sc">;</span>
+ 130   <span class="s">}</span>
+ 131   <span class="i">$CheckCompoundIDs</span> = <span class="k">defined</span> <span class="i">$CheckCompoundIDs</span> ? <span class="i">$CheckCompoundIDs</span> <span class="co">:</span> <span class="n">0</span><span class="sc">;</span>
+ 132 
+ 133   <span class="k">print</span> <span class="q">&quot;\nReading and processing fingerprints data...\n&quot;</span><span class="sc">;</span>
+ 134 
+ 135   <span class="s">(</span><span class="i">$FingerprintsCount</span><span class="cm">,</span> <span class="i">$IgnoredFingerprintsCount</span><span class="s">)</span> = <span class="s">(</span><span class="n">0</span><span class="s">)</span> x <span class="n">3</span><span class="sc">;</span>
+ 136 
+ 137   <span class="i">@CompundIDs</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
+ 138   <span class="i">@FingerprintsObjects</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
+ 139 
+ 140   <span class="i">%UniqueCompoundIDs</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
+ 141 
+ 142   <span class="c"># Check and open file for reading...</span>
+ 143   <span class="k">if</span> <span class="s">(</span>!<span class="i">$FingerprintsFileIO</span><span class="i">-&gt;GetStatus</span><span class="s">(</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span>
+ 144     <span class="i">$FingerprintsFileIO</span><span class="i">-&gt;Open</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span>
+ 145   <span class="s">}</span>
+ 146 
+ 147   <span class="j">FINGERPRINTS:</span> <span class="k">while</span> <span class="s">(</span><span class="i">$FingerprintsFileIO</span><span class="i">-&gt;Read</span><span class="s">(</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span>
+ 148     <span class="i">$FingerprintsCount</span>++<span class="sc">;</span>
+ 149 
+ 150     <span class="k">if</span> <span class="s">(</span>!<span class="i">$FingerprintsFileIO</span><span class="i">-&gt;IsFingerprintsDataValid</span><span class="s">(</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span>
+ 151       <span class="i">$IgnoredFingerprintsCount</span>++<span class="sc">;</span>
+ 152       <span class="k">next</span> <span class="j">FINGERPRINTS</span><span class="sc">;</span>
+ 153     <span class="s">}</span>
+ 154 
+ 155     <span class="k">if</span> <span class="s">(</span><span class="i">$CheckCompoundIDs</span><span class="s">)</span> <span class="s">{</span>
+ 156       <span class="i">$CompoundID</span> = <span class="i">$FingerprintsFileIO</span><span class="i">-&gt;GetCompoundID</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span>
+ 157       <span class="k">if</span> <span class="s">(</span><span class="k">exists</span> <span class="i">$UniqueCompoundIDs</span>{<span class="i">$CompoundID</span>}<span class="s">)</span> <span class="s">{</span>
+ 158         <span class="k">warn</span> <span class="q">&quot;Warning: Ignoring fingerprints data for compound ID $CompoundID: Multiple entries for compound ID in fingerprints file.\n&quot;</span><span class="sc">;</span>
+ 159         <span class="i">$IgnoredFingerprintsCount</span>++<span class="sc">;</span>
+ 160         <span class="k">next</span> <span class="j">FINGERPRINTS</span><span class="sc">;</span>
+ 161       <span class="s">}</span>
+ 162       <span class="i">$UniqueCompoundIDs</span>{<span class="i">$CompoundID</span>} = <span class="i">$CompoundID</span><span class="sc">;</span>
+ 163     <span class="s">}</span>
+ 164 
+ 165     <span class="k">push</span> <span class="i">@FingerprintsObjects</span><span class="cm">,</span> <span class="i">$FingerprintsFileIO</span><span class="i">-&gt;GetFingerprints</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span>
+ 166     <span class="k">push</span> <span class="i">@CompundIDs</span><span class="cm">,</span> <span class="i">$FingerprintsFileIO</span><span class="i">-&gt;GetCompoundID</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span>
+ 167   <span class="s">}</span>
+ 168   <span class="i">$FingerprintsFileIO</span><span class="i">-&gt;Close</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span>
+ 169 
+ 170   <span class="k">print</span> <span class="q">&quot;Number of fingerprints data entries: $FingerprintsCount\n&quot;</span><span class="sc">;</span>
+ 171   <span class="k">print</span> <span class="q">&quot;Number of fingerprints date entries processed successfully: &quot;</span><span class="cm">,</span> <span class="s">(</span><span class="i">$FingerprintsCount</span> - <span class="i">$IgnoredFingerprintsCount</span><span class="s">)</span>  <span class="cm">,</span> <span class="q">&quot;\n&quot;</span><span class="sc">;</span>
+ 172   <span class="k">print</span> <span class="q">&quot;Number of fingerprints data entries ignored due to missing/invalid data: $IgnoredFingerprintsCount\n\n&quot;</span><span class="sc">;</span>
+ 173 
+ 174   <span class="k">return</span> <span class="s">(</span>\<span class="i">@CompundIDs</span><span class="cm">,</span> \<span class="i">@FingerprintsObjects</span><span class="s">)</span><span class="sc">;</span>
+ 175 <span class="s">}</span>
+ 176 
+ 177 
+<a name="EOF-"></a></pre>
+<p>&nbsp;</p>
+<br />
+<center>
+<img src="../../../images/h2o2.png">
+</center>
+</body>
+</html>