0
|
1 <html>
|
|
2 <head>
|
|
3 <title>MayaChemTools:Code:Fingerprints::FingerprintsFileUtil.pm</title>
|
|
4 <meta http-equiv="content-type" content="text/html;charset=utf-8">
|
|
5 <link rel="stylesheet" type="text/css" href="../../../css/MayaChemToolsCode.css">
|
|
6 </head>
|
|
7 <body leftmargin="20" rightmargin="20" topmargin="10" bottommargin="10">
|
|
8 <br/>
|
|
9 <center>
|
|
10 <a href="http://www.mayachemtools.org" title="MayaChemTools Home"><img src="../../../images/MayaChemToolsLogo.gif" border="0" alt="MayaChemTools"></a>
|
|
11 </center>
|
|
12 <br/>
|
|
13 <pre>
|
|
14 <a name="package-Fingerprints::FingerprintsFileUtil-"></a> 1 <span class="k">package </span><span class="i">Fingerprints::FingerprintsFileUtil</span><span class="sc">;</span>
|
|
15 2 <span class="c">#</span>
|
|
16 3 <span class="c"># $RCSfile: FingerprintsFileUtil.pm,v $</span>
|
|
17 4 <span class="c"># $Date: 2015/02/28 20:48:54 $</span>
|
|
18 5 <span class="c"># $Revision: 1.14 $</span>
|
|
19 6 <span class="c">#</span>
|
|
20 7 <span class="c"># Author: Manish Sud <msud@san.rr.com></span>
|
|
21 8 <span class="c">#</span>
|
|
22 9 <span class="c"># Copyright (C) 2015 Manish Sud. All rights reserved.</span>
|
|
23 10 <span class="c">#</span>
|
|
24 11 <span class="c"># This file is part of MayaChemTools.</span>
|
|
25 12 <span class="c">#</span>
|
|
26 13 <span class="c"># MayaChemTools is free software; you can redistribute it and/or modify it under</span>
|
|
27 14 <span class="c"># the terms of the GNU Lesser General Public License as published by the Free</span>
|
|
28 15 <span class="c"># Software Foundation; either version 3 of the License, or (at your option) any</span>
|
|
29 16 <span class="c"># later version.</span>
|
|
30 17 <span class="c">#</span>
|
|
31 18 <span class="c"># MayaChemTools is distributed in the hope that it will be useful, but without</span>
|
|
32 19 <span class="c"># any warranty; without even the implied warranty of merchantability of fitness</span>
|
|
33 20 <span class="c"># for a particular purpose. See the GNU Lesser General Public License for more</span>
|
|
34 21 <span class="c"># details.</span>
|
|
35 22 <span class="c">#</span>
|
|
36 23 <span class="c"># You should have received a copy of the GNU Lesser General Public License</span>
|
|
37 24 <span class="c"># along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or</span>
|
|
38 25 <span class="c"># write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,</span>
|
|
39 26 <span class="c"># Boston, MA, 02111-1307, USA.</span>
|
|
40 27 <span class="c">#</span>
|
|
41 28
|
|
42 29 <span class="k">use</span> <span class="w">strict</span><span class="sc">;</span>
|
|
43 30 <span class="k">use</span> <span class="w">Exporter</span><span class="sc">;</span>
|
|
44 31 <span class="k">use</span> <span class="w">Carp</span><span class="sc">;</span>
|
|
45 32 <span class="k">use</span> <span class="w">TextUtil</span> <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
46 33 <span class="k">use</span> <span class="w">FileUtil</span> <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
47 34 <span class="k">use</span> <span class="w">FileIO::FingerprintsSDFileIO</span><span class="sc">;</span>
|
|
48 35 <span class="k">use</span> <span class="w">FileIO::FingerprintsTextFileIO</span><span class="sc">;</span>
|
|
49 36 <span class="k">use</span> <span class="w">FileIO::FingerprintsFPFileIO</span><span class="sc">;</span>
|
|
50 37
|
|
51 38 <span class="k">use</span> <span class="w">vars</span> <span class="q">qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS)</span><span class="sc">;</span>
|
|
52 39
|
|
53 40 <span class="i">@ISA</span> = <span class="q">qw(Exporter)</span><span class="sc">;</span>
|
|
54 41 <span class="i">@EXPORT</span> = <span class="q">qw()</span><span class="sc">;</span>
|
|
55 42 <span class="i">@EXPORT_OK</span> = <span class="q">qw(GetFingerprintsFileType ReadAndProcessFingerpritsData NewFingerprintsFileIO)</span><span class="sc">;</span>
|
|
56 43
|
|
57 44 <span class="i">%EXPORT_TAGS</span> = <span class="s">(</span><span class="w">all</span> <span class="cm">=></span> <span class="s">[</span><span class="i">@EXPORT</span><span class="cm">,</span> <span class="i">@EXPORT_OK</span><span class="s">]</span><span class="s">)</span><span class="sc">;</span>
|
|
58 45
|
|
59 46 <span class="c"># Generate new FingerprintsFileIO object for a SD, FP or Text fingerprints file specified using file name</span>
|
|
60 47 <span class="c"># along other appropriate parameters...</span>
|
|
61 48 <span class="c">#</span>
|
|
62 <a name="NewFingerprintsFileIO-"></a> 49 <span class="k">sub </span><span class="m">NewFingerprintsFileIO</span> <span class="s">{</span>
|
|
63 50 <span class="k">my</span><span class="s">(</span><span class="i">%FingerprintsFileIOParams</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
64 51 <span class="k">my</span><span class="s">(</span><span class="i">$FingerprintsFileIO</span><span class="cm">,</span> <span class="i">$FileType</span><span class="s">)</span><span class="sc">;</span>
|
|
65 52
|
|
66 53 <span class="k">if</span> <span class="s">(</span>!<span class="s">(</span><span class="k">exists</span><span class="s">(</span><span class="i">$FingerprintsFileIOParams</span>{<span class="w">Name</span>}<span class="s">)</span> && <span class="i">TextUtil::IsNotEmpty</span><span class="s">(</span><span class="i">$FingerprintsFileIOParams</span>{<span class="w">Name</span>}<span class="s">)</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span>
|
|
67 54 <span class="w">carp</span> <span class="q">"Warning: Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO: Can't create new FingerprintsFileIO object: File name is not specified...\n"</span><span class="sc">;</span>
|
|
68 55 <span class="k">return</span> <span class="k">undef</span><span class="sc">;</span>
|
|
69 56 <span class="s">}</span>
|
|
70 57
|
|
71 58 <span class="k">if</span> <span class="s">(</span>!<span class="s">(</span><span class="k">exists</span><span class="s">(</span><span class="i">$FingerprintsFileIOParams</span>{<span class="w">Mode</span>}<span class="s">)</span> && <span class="i">TextUtil::IsNotEmpty</span><span class="s">(</span><span class="i">$FingerprintsFileIOParams</span>{<span class="w">Mode</span>}<span class="s">)</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span>
|
|
72 59 <span class="w">carp</span> <span class="q">"Warning: Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO: Can't create new FingerprintsFileIO object: File mode is not specified...\n"</span><span class="sc">;</span>
|
|
73 60 <span class="k">return</span> <span class="k">undef</span><span class="sc">;</span>
|
|
74 61 <span class="s">}</span>
|
|
75 62
|
|
76 63 <span class="i">$FileType</span> = <span class="i">GetFingerprintsFileType</span><span class="s">(</span><span class="i">$FingerprintsFileIOParams</span>{<span class="w">Name</span>}<span class="s">)</span><span class="sc">;</span>
|
|
77 64 <span class="k">if</span> <span class="s">(</span><span class="i">TextUtil::IsEmpty</span><span class="s">(</span><span class="i">$FileType</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span>
|
|
78 65 <span class="w">carp</span> <span class="q">"Warning: Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO: Can't create new FingerprintsFileIO object: File type is not specified...\n"</span><span class="sc">;</span>
|
|
79 66 <span class="k">return</span> <span class="k">undef</span><span class="sc">;</span>
|
|
80 67 <span class="s">}</span>
|
|
81 68
|
|
82 69 <span class="c"># Generate fingerprints IO object...</span>
|
|
83 70 <span class="j">FILETYPE:</span> <span class="s">{</span>
|
|
84 71 <span class="k">if</span> <span class="s">(</span><span class="i">$FileType</span> =~ <span class="q">/^SD$/i</span><span class="s">)</span> <span class="s">{</span>
|
|
85 72 <span class="i">$FingerprintsFileIO</span> = <span class="w">new</span> <span class="i">FileIO::FingerprintsSDFileIO</span><span class="s">(</span><span class="i">%FingerprintsFileIOParams</span><span class="s">)</span><span class="sc">;</span>
|
|
86 73 <span class="k">last</span> <span class="j">FILETYPE</span><span class="sc">;</span>
|
|
87 74 <span class="s">}</span>
|
|
88 75 <span class="k">if</span> <span class="s">(</span><span class="i">$FileType</span> =~ <span class="q">/^FP$/i</span><span class="s">)</span> <span class="s">{</span>
|
|
89 76 <span class="i">$FingerprintsFileIO</span> = <span class="w">new</span> <span class="i">FileIO::FingerprintsFPFileIO</span><span class="s">(</span><span class="i">%FingerprintsFileIOParams</span><span class="s">)</span><span class="sc">;</span>
|
|
90 77 <span class="k">last</span> <span class="j">FILETYPE</span><span class="sc">;</span>
|
|
91 78 <span class="s">}</span>
|
|
92 79 <span class="k">if</span> <span class="s">(</span><span class="i">$FileType</span> =~ <span class="q">/^Text$/i</span><span class="s">)</span> <span class="s">{</span>
|
|
93 80 <span class="i">$FingerprintsFileIO</span> = <span class="w">new</span> <span class="i">FileIO::FingerprintsTextFileIO</span><span class="s">(</span><span class="i">%FingerprintsFileIOParams</span><span class="s">)</span><span class="sc">;</span>
|
|
94 81 <span class="k">last</span> <span class="j">FILETYPE</span><span class="sc">;</span>
|
|
95 82 <span class="s">}</span>
|
|
96 83 <span class="i">$FingerprintsFileIO</span> = <span class="k">undef</span><span class="sc">;</span>
|
|
97 84 <span class="w">carp</span> <span class="q">"Warning: Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO: Fingerprints file type, $FileType, is not valid. Supported file types: SD, FP or Text\n"</span><span class="sc">;</span>
|
|
98 85 <span class="s">}</span>
|
|
99 86
|
|
100 87 <span class="k">return</span> <span class="i">$FingerprintsFileIO</span><span class="sc">;</span>
|
|
101 88 <span class="s">}</span>
|
|
102 89
|
|
103 90 <span class="c"># Get fingerpritns file type from fingerprints file name...</span>
|
|
104 91 <span class="c">#</span>
|
|
105 <a name="GetFingerprintsFileType-"></a> 92 <span class="k">sub </span><span class="m">GetFingerprintsFileType</span> <span class="s">{</span>
|
|
106 93 <span class="k">my</span><span class="s">(</span><span class="i">$FileName</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
107 94 <span class="k">my</span><span class="s">(</span><span class="i">$FileType</span><span class="s">)</span><span class="sc">;</span>
|
|
108 95
|
|
109 96 <span class="i">$FileType</span> = <span class="q">''</span><span class="sc">;</span>
|
|
110 97 <span class="j">FILETYPE:</span> <span class="s">{</span>
|
|
111 98 <span class="k">if</span> <span class="s">(</span><span class="i">FileUtil::CheckFileType</span><span class="s">(</span><span class="i">$FileName</span><span class="cm">,</span> <span class="q">"sdf sd"</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span>
|
|
112 99 <span class="i">$FileType</span> = <span class="q">'SD'</span><span class="sc">;</span>
|
|
113 100 <span class="k">last</span> <span class="j">FILETYPE</span><span class="sc">;</span>
|
|
114 101 <span class="s">}</span>
|
|
115 102 <span class="k">if</span> <span class="s">(</span><span class="i">FileUtil::CheckFileType</span><span class="s">(</span><span class="i">$FileName</span><span class="cm">,</span> <span class="q">"fpf fp"</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span>
|
|
116 103 <span class="i">$FileType</span> = <span class="q">'FP'</span><span class="sc">;</span>
|
|
117 104 <span class="k">last</span> <span class="j">FILETYPE</span><span class="sc">;</span>
|
|
118 105 <span class="s">}</span>
|
|
119 106 <span class="k">if</span> <span class="s">(</span><span class="i">FileUtil::CheckFileType</span><span class="s">(</span><span class="i">$FileName</span><span class="cm">,</span> <span class="q">"csv tsv"</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span>
|
|
120 107 <span class="i">$FileType</span> = <span class="q">'Text'</span><span class="sc">;</span>
|
|
121 108 <span class="k">last</span> <span class="j">FILETYPE</span><span class="sc">;</span>
|
|
122 109 <span class="s">}</span>
|
|
123 110 <span class="i">$FileType</span> = <span class="q">''</span><span class="sc">;</span>
|
|
124 111 <span class="w">carp</span> <span class="q">"Warning: Fingerprints::FingerprintsFileUtil::GetFingerprintsFileType: Can't determine fingerprints file type for $FileName: It's not a fingerprints file...\n"</span><span class="sc">;</span>
|
|
125 112 <span class="s">}</span>
|
|
126 113
|
|
127 114 <span class="k">return</span> <span class="i">$FileType</span><span class="sc">;</span>
|
|
128 115 <span class="s">}</span>
|
|
129 116
|
|
130 117
|
|
131 118 <span class="c"># Process fingerprints bit-vector and vector string data in a file using FingerprintsFileIO</span>
|
|
132 119 <span class="c"># object and return a references to arrays of CompoundIDs and FingerprintsObjects...</span>
|
|
133 120 <span class="c">#</span>
|
|
134 121 <span class="c"># Note:</span>
|
|
135 122 <span class="c"># . The file open and close is automatically performed during processing.</span>
|
|
136 123 <span class="c">#</span>
|
|
137 <a name="ReadAndProcessFingerpritsData-"></a> 124 <span class="k">sub </span><span class="m">ReadAndProcessFingerpritsData</span> <span class="s">{</span>
|
|
138 125 <span class="k">my</span><span class="s">(</span><span class="i">$FingerprintsFileIO</span><span class="cm">,</span> <span class="i">$CheckCompoundIDs</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
|
|
139 126 <span class="k">my</span><span class="s">(</span><span class="i">$CompoundID</span><span class="cm">,</span> <span class="i">$FingerprintsCount</span><span class="cm">,</span> <span class="i">$IgnoredFingerprintsCount</span><span class="cm">,</span> <span class="i">@CompundIDs</span><span class="cm">,</span> <span class="i">@FingerprintsObjects</span><span class="cm">,</span> <span class="i">%UniqueCompoundIDs</span><span class="s">)</span><span class="sc">;</span>
|
|
140 127
|
|
141 128 <span class="k">if</span> <span class="s">(</span>!<span class="i">$FingerprintsFileIO</span><span class="s">)</span> <span class="s">{</span>
|
|
142 129 <span class="k">return</span> <span class="s">(</span><span class="k">undef</span><span class="cm">,</span> <span class="k">undef</span><span class="s">)</span><span class="sc">;</span>
|
|
143 130 <span class="s">}</span>
|
|
144 131 <span class="i">$CheckCompoundIDs</span> = <span class="k">defined</span> <span class="i">$CheckCompoundIDs</span> ? <span class="i">$CheckCompoundIDs</span> <span class="co">:</span> <span class="n">0</span><span class="sc">;</span>
|
|
145 132
|
|
146 133 <span class="k">print</span> <span class="q">"\nReading and processing fingerprints data...\n"</span><span class="sc">;</span>
|
|
147 134
|
|
148 135 <span class="s">(</span><span class="i">$FingerprintsCount</span><span class="cm">,</span> <span class="i">$IgnoredFingerprintsCount</span><span class="s">)</span> = <span class="s">(</span><span class="n">0</span><span class="s">)</span> x <span class="n">3</span><span class="sc">;</span>
|
|
149 136
|
|
150 137 <span class="i">@CompundIDs</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
151 138 <span class="i">@FingerprintsObjects</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
152 139
|
|
153 140 <span class="i">%UniqueCompoundIDs</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
154 141
|
|
155 142 <span class="c"># Check and open file for reading...</span>
|
|
156 143 <span class="k">if</span> <span class="s">(</span>!<span class="i">$FingerprintsFileIO</span><span class="i">->GetStatus</span><span class="s">(</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span>
|
|
157 144 <span class="i">$FingerprintsFileIO</span><span class="i">->Open</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
158 145 <span class="s">}</span>
|
|
159 146
|
|
160 147 <span class="j">FINGERPRINTS:</span> <span class="k">while</span> <span class="s">(</span><span class="i">$FingerprintsFileIO</span><span class="i">->Read</span><span class="s">(</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span>
|
|
161 148 <span class="i">$FingerprintsCount</span>++<span class="sc">;</span>
|
|
162 149
|
|
163 150 <span class="k">if</span> <span class="s">(</span>!<span class="i">$FingerprintsFileIO</span><span class="i">->IsFingerprintsDataValid</span><span class="s">(</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span>
|
|
164 151 <span class="i">$IgnoredFingerprintsCount</span>++<span class="sc">;</span>
|
|
165 152 <span class="k">next</span> <span class="j">FINGERPRINTS</span><span class="sc">;</span>
|
|
166 153 <span class="s">}</span>
|
|
167 154
|
|
168 155 <span class="k">if</span> <span class="s">(</span><span class="i">$CheckCompoundIDs</span><span class="s">)</span> <span class="s">{</span>
|
|
169 156 <span class="i">$CompoundID</span> = <span class="i">$FingerprintsFileIO</span><span class="i">->GetCompoundID</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
170 157 <span class="k">if</span> <span class="s">(</span><span class="k">exists</span> <span class="i">$UniqueCompoundIDs</span>{<span class="i">$CompoundID</span>}<span class="s">)</span> <span class="s">{</span>
|
|
171 158 <span class="k">warn</span> <span class="q">"Warning: Ignoring fingerprints data for compound ID $CompoundID: Multiple entries for compound ID in fingerprints file.\n"</span><span class="sc">;</span>
|
|
172 159 <span class="i">$IgnoredFingerprintsCount</span>++<span class="sc">;</span>
|
|
173 160 <span class="k">next</span> <span class="j">FINGERPRINTS</span><span class="sc">;</span>
|
|
174 161 <span class="s">}</span>
|
|
175 162 <span class="i">$UniqueCompoundIDs</span>{<span class="i">$CompoundID</span>} = <span class="i">$CompoundID</span><span class="sc">;</span>
|
|
176 163 <span class="s">}</span>
|
|
177 164
|
|
178 165 <span class="k">push</span> <span class="i">@FingerprintsObjects</span><span class="cm">,</span> <span class="i">$FingerprintsFileIO</span><span class="i">->GetFingerprints</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
179 166 <span class="k">push</span> <span class="i">@CompundIDs</span><span class="cm">,</span> <span class="i">$FingerprintsFileIO</span><span class="i">->GetCompoundID</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
180 167 <span class="s">}</span>
|
|
181 168 <span class="i">$FingerprintsFileIO</span><span class="i">->Close</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span>
|
|
182 169
|
|
183 170 <span class="k">print</span> <span class="q">"Number of fingerprints data entries: $FingerprintsCount\n"</span><span class="sc">;</span>
|
|
184 171 <span class="k">print</span> <span class="q">"Number of fingerprints date entries processed successfully: "</span><span class="cm">,</span> <span class="s">(</span><span class="i">$FingerprintsCount</span> - <span class="i">$IgnoredFingerprintsCount</span><span class="s">)</span> <span class="cm">,</span> <span class="q">"\n"</span><span class="sc">;</span>
|
|
185 172 <span class="k">print</span> <span class="q">"Number of fingerprints data entries ignored due to missing/invalid data: $IgnoredFingerprintsCount\n\n"</span><span class="sc">;</span>
|
|
186 173
|
|
187 174 <span class="k">return</span> <span class="s">(</span>\<span class="i">@CompundIDs</span><span class="cm">,</span> \<span class="i">@FingerprintsObjects</span><span class="s">)</span><span class="sc">;</span>
|
|
188 175 <span class="s">}</span>
|
|
189 176
|
|
190 177
|
|
191 <a name="EOF-"></a></pre>
|
|
192 <p> </p>
|
|
193 <br />
|
|
194 <center>
|
|
195 <img src="../../../images/h2o2.png">
|
|
196 </center>
|
|
197 </body>
|
|
198 </html>
|