comparison mayachemtools/docs/modules/html/code/FingerprintsFileUtil.html @ 0:73ae111cf86f draft

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 11:55:01 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:73ae111cf86f
1 <html>
2 <head>
3 <title>MayaChemTools:Code:Fingerprints::FingerprintsFileUtil.pm</title>
4 <meta http-equiv="content-type" content="text/html;charset=utf-8">
5 <link rel="stylesheet" type="text/css" href="../../../css/MayaChemToolsCode.css">
6 </head>
7 <body leftmargin="20" rightmargin="20" topmargin="10" bottommargin="10">
8 <br/>
9 <center>
10 <a href="http://www.mayachemtools.org" title="MayaChemTools Home"><img src="../../../images/MayaChemToolsLogo.gif" border="0" alt="MayaChemTools"></a>
11 </center>
12 <br/>
13 <pre>
14 <a name="package-Fingerprints::FingerprintsFileUtil-"></a> 1 <span class="k">package </span><span class="i">Fingerprints::FingerprintsFileUtil</span><span class="sc">;</span>
15 2 <span class="c">#</span>
16 3 <span class="c"># $RCSfile: FingerprintsFileUtil.pm,v $</span>
17 4 <span class="c"># $Date: 2015/02/28 20:48:54 $</span>
18 5 <span class="c"># $Revision: 1.14 $</span>
19 6 <span class="c">#</span>
20 7 <span class="c"># Author: Manish Sud &lt;msud@san.rr.com&gt;</span>
21 8 <span class="c">#</span>
22 9 <span class="c"># Copyright (C) 2015 Manish Sud. All rights reserved.</span>
23 10 <span class="c">#</span>
24 11 <span class="c"># This file is part of MayaChemTools.</span>
25 12 <span class="c">#</span>
26 13 <span class="c"># MayaChemTools is free software; you can redistribute it and/or modify it under</span>
27 14 <span class="c"># the terms of the GNU Lesser General Public License as published by the Free</span>
28 15 <span class="c"># Software Foundation; either version 3 of the License, or (at your option) any</span>
29 16 <span class="c"># later version.</span>
30 17 <span class="c">#</span>
31 18 <span class="c"># MayaChemTools is distributed in the hope that it will be useful, but without</span>
32 19 <span class="c"># any warranty; without even the implied warranty of merchantability of fitness</span>
33 20 <span class="c"># for a particular purpose. See the GNU Lesser General Public License for more</span>
34 21 <span class="c"># details.</span>
35 22 <span class="c">#</span>
36 23 <span class="c"># You should have received a copy of the GNU Lesser General Public License</span>
37 24 <span class="c"># along with MayaChemTools; if not, see &lt;http://www.gnu.org/licenses/&gt; or</span>
38 25 <span class="c"># write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,</span>
39 26 <span class="c"># Boston, MA, 02111-1307, USA.</span>
40 27 <span class="c">#</span>
41 28
42 29 <span class="k">use</span> <span class="w">strict</span><span class="sc">;</span>
43 30 <span class="k">use</span> <span class="w">Exporter</span><span class="sc">;</span>
44 31 <span class="k">use</span> <span class="w">Carp</span><span class="sc">;</span>
45 32 <span class="k">use</span> <span class="w">TextUtil</span> <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
46 33 <span class="k">use</span> <span class="w">FileUtil</span> <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
47 34 <span class="k">use</span> <span class="w">FileIO::FingerprintsSDFileIO</span><span class="sc">;</span>
48 35 <span class="k">use</span> <span class="w">FileIO::FingerprintsTextFileIO</span><span class="sc">;</span>
49 36 <span class="k">use</span> <span class="w">FileIO::FingerprintsFPFileIO</span><span class="sc">;</span>
50 37
51 38 <span class="k">use</span> <span class="w">vars</span> <span class="q">qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS)</span><span class="sc">;</span>
52 39
53 40 <span class="i">@ISA</span> = <span class="q">qw(Exporter)</span><span class="sc">;</span>
54 41 <span class="i">@EXPORT</span> = <span class="q">qw()</span><span class="sc">;</span>
55 42 <span class="i">@EXPORT_OK</span> = <span class="q">qw(GetFingerprintsFileType ReadAndProcessFingerpritsData NewFingerprintsFileIO)</span><span class="sc">;</span>
56 43
57 44 <span class="i">%EXPORT_TAGS</span> = <span class="s">(</span><span class="w">all</span> <span class="cm">=&gt;</span> <span class="s">[</span><span class="i">@EXPORT</span><span class="cm">,</span> <span class="i">@EXPORT_OK</span><span class="s">]</span><span class="s">)</span><span class="sc">;</span>
58 45
59 46 <span class="c"># Generate new FingerprintsFileIO object for a SD, FP or Text fingerprints file specified using file name</span>
60 47 <span class="c"># along other appropriate parameters...</span>
61 48 <span class="c">#</span>
62 <a name="NewFingerprintsFileIO-"></a> 49 <span class="k">sub </span><span class="m">NewFingerprintsFileIO</span> <span class="s">{</span>
63 50 <span class="k">my</span><span class="s">(</span><span class="i">%FingerprintsFileIOParams</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
64 51 <span class="k">my</span><span class="s">(</span><span class="i">$FingerprintsFileIO</span><span class="cm">,</span> <span class="i">$FileType</span><span class="s">)</span><span class="sc">;</span>
65 52
66 53 <span class="k">if</span> <span class="s">(</span>!<span class="s">(</span><span class="k">exists</span><span class="s">(</span><span class="i">$FingerprintsFileIOParams</span>{<span class="w">Name</span>}<span class="s">)</span> &amp;&amp; <span class="i">TextUtil::IsNotEmpty</span><span class="s">(</span><span class="i">$FingerprintsFileIOParams</span>{<span class="w">Name</span>}<span class="s">)</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span>
67 54 <span class="w">carp</span> <span class="q">&quot;Warning: Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO: Can&#39;t create new FingerprintsFileIO object: File name is not specified...\n&quot;</span><span class="sc">;</span>
68 55 <span class="k">return</span> <span class="k">undef</span><span class="sc">;</span>
69 56 <span class="s">}</span>
70 57
71 58 <span class="k">if</span> <span class="s">(</span>!<span class="s">(</span><span class="k">exists</span><span class="s">(</span><span class="i">$FingerprintsFileIOParams</span>{<span class="w">Mode</span>}<span class="s">)</span> &amp;&amp; <span class="i">TextUtil::IsNotEmpty</span><span class="s">(</span><span class="i">$FingerprintsFileIOParams</span>{<span class="w">Mode</span>}<span class="s">)</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span>
72 59 <span class="w">carp</span> <span class="q">&quot;Warning: Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO: Can&#39;t create new FingerprintsFileIO object: File mode is not specified...\n&quot;</span><span class="sc">;</span>
73 60 <span class="k">return</span> <span class="k">undef</span><span class="sc">;</span>
74 61 <span class="s">}</span>
75 62
76 63 <span class="i">$FileType</span> = <span class="i">GetFingerprintsFileType</span><span class="s">(</span><span class="i">$FingerprintsFileIOParams</span>{<span class="w">Name</span>}<span class="s">)</span><span class="sc">;</span>
77 64 <span class="k">if</span> <span class="s">(</span><span class="i">TextUtil::IsEmpty</span><span class="s">(</span><span class="i">$FileType</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span>
78 65 <span class="w">carp</span> <span class="q">&quot;Warning: Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO: Can&#39;t create new FingerprintsFileIO object: File type is not specified...\n&quot;</span><span class="sc">;</span>
79 66 <span class="k">return</span> <span class="k">undef</span><span class="sc">;</span>
80 67 <span class="s">}</span>
81 68
82 69 <span class="c"># Generate fingerprints IO object...</span>
83 70 <span class="j">FILETYPE:</span> <span class="s">{</span>
84 71 <span class="k">if</span> <span class="s">(</span><span class="i">$FileType</span> =~ <span class="q">/^SD$/i</span><span class="s">)</span> <span class="s">{</span>
85 72 <span class="i">$FingerprintsFileIO</span> = <span class="w">new</span> <span class="i">FileIO::FingerprintsSDFileIO</span><span class="s">(</span><span class="i">%FingerprintsFileIOParams</span><span class="s">)</span><span class="sc">;</span>
86 73 <span class="k">last</span> <span class="j">FILETYPE</span><span class="sc">;</span>
87 74 <span class="s">}</span>
88 75 <span class="k">if</span> <span class="s">(</span><span class="i">$FileType</span> =~ <span class="q">/^FP$/i</span><span class="s">)</span> <span class="s">{</span>
89 76 <span class="i">$FingerprintsFileIO</span> = <span class="w">new</span> <span class="i">FileIO::FingerprintsFPFileIO</span><span class="s">(</span><span class="i">%FingerprintsFileIOParams</span><span class="s">)</span><span class="sc">;</span>
90 77 <span class="k">last</span> <span class="j">FILETYPE</span><span class="sc">;</span>
91 78 <span class="s">}</span>
92 79 <span class="k">if</span> <span class="s">(</span><span class="i">$FileType</span> =~ <span class="q">/^Text$/i</span><span class="s">)</span> <span class="s">{</span>
93 80 <span class="i">$FingerprintsFileIO</span> = <span class="w">new</span> <span class="i">FileIO::FingerprintsTextFileIO</span><span class="s">(</span><span class="i">%FingerprintsFileIOParams</span><span class="s">)</span><span class="sc">;</span>
94 81 <span class="k">last</span> <span class="j">FILETYPE</span><span class="sc">;</span>
95 82 <span class="s">}</span>
96 83 <span class="i">$FingerprintsFileIO</span> = <span class="k">undef</span><span class="sc">;</span>
97 84 <span class="w">carp</span> <span class="q">&quot;Warning: Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO: Fingerprints file type, $FileType, is not valid. Supported file types: SD, FP or Text\n&quot;</span><span class="sc">;</span>
98 85 <span class="s">}</span>
99 86
100 87 <span class="k">return</span> <span class="i">$FingerprintsFileIO</span><span class="sc">;</span>
101 88 <span class="s">}</span>
102 89
103 90 <span class="c"># Get fingerpritns file type from fingerprints file name...</span>
104 91 <span class="c">#</span>
105 <a name="GetFingerprintsFileType-"></a> 92 <span class="k">sub </span><span class="m">GetFingerprintsFileType</span> <span class="s">{</span>
106 93 <span class="k">my</span><span class="s">(</span><span class="i">$FileName</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
107 94 <span class="k">my</span><span class="s">(</span><span class="i">$FileType</span><span class="s">)</span><span class="sc">;</span>
108 95
109 96 <span class="i">$FileType</span> = <span class="q">&#39;&#39;</span><span class="sc">;</span>
110 97 <span class="j">FILETYPE:</span> <span class="s">{</span>
111 98 <span class="k">if</span> <span class="s">(</span><span class="i">FileUtil::CheckFileType</span><span class="s">(</span><span class="i">$FileName</span><span class="cm">,</span> <span class="q">&quot;sdf sd&quot;</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span>
112 99 <span class="i">$FileType</span> = <span class="q">&#39;SD&#39;</span><span class="sc">;</span>
113 100 <span class="k">last</span> <span class="j">FILETYPE</span><span class="sc">;</span>
114 101 <span class="s">}</span>
115 102 <span class="k">if</span> <span class="s">(</span><span class="i">FileUtil::CheckFileType</span><span class="s">(</span><span class="i">$FileName</span><span class="cm">,</span> <span class="q">&quot;fpf fp&quot;</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span>
116 103 <span class="i">$FileType</span> = <span class="q">&#39;FP&#39;</span><span class="sc">;</span>
117 104 <span class="k">last</span> <span class="j">FILETYPE</span><span class="sc">;</span>
118 105 <span class="s">}</span>
119 106 <span class="k">if</span> <span class="s">(</span><span class="i">FileUtil::CheckFileType</span><span class="s">(</span><span class="i">$FileName</span><span class="cm">,</span> <span class="q">&quot;csv tsv&quot;</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span>
120 107 <span class="i">$FileType</span> = <span class="q">&#39;Text&#39;</span><span class="sc">;</span>
121 108 <span class="k">last</span> <span class="j">FILETYPE</span><span class="sc">;</span>
122 109 <span class="s">}</span>
123 110 <span class="i">$FileType</span> = <span class="q">&#39;&#39;</span><span class="sc">;</span>
124 111 <span class="w">carp</span> <span class="q">&quot;Warning: Fingerprints::FingerprintsFileUtil::GetFingerprintsFileType: Can&#39;t determine fingerprints file type for $FileName: It&#39;s not a fingerprints file...\n&quot;</span><span class="sc">;</span>
125 112 <span class="s">}</span>
126 113
127 114 <span class="k">return</span> <span class="i">$FileType</span><span class="sc">;</span>
128 115 <span class="s">}</span>
129 116
130 117
131 118 <span class="c"># Process fingerprints bit-vector and vector string data in a file using FingerprintsFileIO</span>
132 119 <span class="c"># object and return a references to arrays of CompoundIDs and FingerprintsObjects...</span>
133 120 <span class="c">#</span>
134 121 <span class="c"># Note:</span>
135 122 <span class="c"># . The file open and close is automatically performed during processing.</span>
136 123 <span class="c">#</span>
137 <a name="ReadAndProcessFingerpritsData-"></a> 124 <span class="k">sub </span><span class="m">ReadAndProcessFingerpritsData</span> <span class="s">{</span>
138 125 <span class="k">my</span><span class="s">(</span><span class="i">$FingerprintsFileIO</span><span class="cm">,</span> <span class="i">$CheckCompoundIDs</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
139 126 <span class="k">my</span><span class="s">(</span><span class="i">$CompoundID</span><span class="cm">,</span> <span class="i">$FingerprintsCount</span><span class="cm">,</span> <span class="i">$IgnoredFingerprintsCount</span><span class="cm">,</span> <span class="i">@CompundIDs</span><span class="cm">,</span> <span class="i">@FingerprintsObjects</span><span class="cm">,</span> <span class="i">%UniqueCompoundIDs</span><span class="s">)</span><span class="sc">;</span>
140 127
141 128 <span class="k">if</span> <span class="s">(</span>!<span class="i">$FingerprintsFileIO</span><span class="s">)</span> <span class="s">{</span>
142 129 <span class="k">return</span> <span class="s">(</span><span class="k">undef</span><span class="cm">,</span> <span class="k">undef</span><span class="s">)</span><span class="sc">;</span>
143 130 <span class="s">}</span>
144 131 <span class="i">$CheckCompoundIDs</span> = <span class="k">defined</span> <span class="i">$CheckCompoundIDs</span> ? <span class="i">$CheckCompoundIDs</span> <span class="co">:</span> <span class="n">0</span><span class="sc">;</span>
145 132
146 133 <span class="k">print</span> <span class="q">&quot;\nReading and processing fingerprints data...\n&quot;</span><span class="sc">;</span>
147 134
148 135 <span class="s">(</span><span class="i">$FingerprintsCount</span><span class="cm">,</span> <span class="i">$IgnoredFingerprintsCount</span><span class="s">)</span> = <span class="s">(</span><span class="n">0</span><span class="s">)</span> x <span class="n">3</span><span class="sc">;</span>
149 136
150 137 <span class="i">@CompundIDs</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
151 138 <span class="i">@FingerprintsObjects</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
152 139
153 140 <span class="i">%UniqueCompoundIDs</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
154 141
155 142 <span class="c"># Check and open file for reading...</span>
156 143 <span class="k">if</span> <span class="s">(</span>!<span class="i">$FingerprintsFileIO</span><span class="i">-&gt;GetStatus</span><span class="s">(</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span>
157 144 <span class="i">$FingerprintsFileIO</span><span class="i">-&gt;Open</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span>
158 145 <span class="s">}</span>
159 146
160 147 <span class="j">FINGERPRINTS:</span> <span class="k">while</span> <span class="s">(</span><span class="i">$FingerprintsFileIO</span><span class="i">-&gt;Read</span><span class="s">(</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span>
161 148 <span class="i">$FingerprintsCount</span>++<span class="sc">;</span>
162 149
163 150 <span class="k">if</span> <span class="s">(</span>!<span class="i">$FingerprintsFileIO</span><span class="i">-&gt;IsFingerprintsDataValid</span><span class="s">(</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span>
164 151 <span class="i">$IgnoredFingerprintsCount</span>++<span class="sc">;</span>
165 152 <span class="k">next</span> <span class="j">FINGERPRINTS</span><span class="sc">;</span>
166 153 <span class="s">}</span>
167 154
168 155 <span class="k">if</span> <span class="s">(</span><span class="i">$CheckCompoundIDs</span><span class="s">)</span> <span class="s">{</span>
169 156 <span class="i">$CompoundID</span> = <span class="i">$FingerprintsFileIO</span><span class="i">-&gt;GetCompoundID</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span>
170 157 <span class="k">if</span> <span class="s">(</span><span class="k">exists</span> <span class="i">$UniqueCompoundIDs</span>{<span class="i">$CompoundID</span>}<span class="s">)</span> <span class="s">{</span>
171 158 <span class="k">warn</span> <span class="q">&quot;Warning: Ignoring fingerprints data for compound ID $CompoundID: Multiple entries for compound ID in fingerprints file.\n&quot;</span><span class="sc">;</span>
172 159 <span class="i">$IgnoredFingerprintsCount</span>++<span class="sc">;</span>
173 160 <span class="k">next</span> <span class="j">FINGERPRINTS</span><span class="sc">;</span>
174 161 <span class="s">}</span>
175 162 <span class="i">$UniqueCompoundIDs</span>{<span class="i">$CompoundID</span>} = <span class="i">$CompoundID</span><span class="sc">;</span>
176 163 <span class="s">}</span>
177 164
178 165 <span class="k">push</span> <span class="i">@FingerprintsObjects</span><span class="cm">,</span> <span class="i">$FingerprintsFileIO</span><span class="i">-&gt;GetFingerprints</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span>
179 166 <span class="k">push</span> <span class="i">@CompundIDs</span><span class="cm">,</span> <span class="i">$FingerprintsFileIO</span><span class="i">-&gt;GetCompoundID</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span>
180 167 <span class="s">}</span>
181 168 <span class="i">$FingerprintsFileIO</span><span class="i">-&gt;Close</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span>
182 169
183 170 <span class="k">print</span> <span class="q">&quot;Number of fingerprints data entries: $FingerprintsCount\n&quot;</span><span class="sc">;</span>
184 171 <span class="k">print</span> <span class="q">&quot;Number of fingerprints date entries processed successfully: &quot;</span><span class="cm">,</span> <span class="s">(</span><span class="i">$FingerprintsCount</span> - <span class="i">$IgnoredFingerprintsCount</span><span class="s">)</span> <span class="cm">,</span> <span class="q">&quot;\n&quot;</span><span class="sc">;</span>
185 172 <span class="k">print</span> <span class="q">&quot;Number of fingerprints data entries ignored due to missing/invalid data: $IgnoredFingerprintsCount\n\n&quot;</span><span class="sc">;</span>
186 173
187 174 <span class="k">return</span> <span class="s">(</span>\<span class="i">@CompundIDs</span><span class="cm">,</span> \<span class="i">@FingerprintsObjects</span><span class="s">)</span><span class="sc">;</span>
188 175 <span class="s">}</span>
189 176
190 177
191 <a name="EOF-"></a></pre>
192 <p>&nbsp;</p>
193 <br />
194 <center>
195 <img src="../../../images/h2o2.png">
196 </center>
197 </body>
198 </html>