| 0 | 1 <html> | 
|  | 2 <head> | 
|  | 3 <title>MayaChemTools:Code:Fingerprints::FingerprintsFileUtil.pm</title> | 
|  | 4 <meta http-equiv="content-type" content="text/html;charset=utf-8"> | 
|  | 5 <link rel="stylesheet" type="text/css" href="../../../css/MayaChemToolsCode.css"> | 
|  | 6 </head> | 
|  | 7 <body leftmargin="20" rightmargin="20" topmargin="10" bottommargin="10"> | 
|  | 8 <br/> | 
|  | 9 <center> | 
|  | 10 <a href="http://www.mayachemtools.org" title="MayaChemTools Home"><img src="../../../images/MayaChemToolsLogo.gif" border="0" alt="MayaChemTools"></a> | 
|  | 11 </center> | 
|  | 12 <br/> | 
|  | 13 <pre> | 
|  | 14 <a name="package-Fingerprints::FingerprintsFileUtil-"></a>   1 <span class="k">package </span><span class="i">Fingerprints::FingerprintsFileUtil</span><span class="sc">;</span> | 
|  | 15    2 <span class="c">#</span> | 
|  | 16    3 <span class="c"># $RCSfile: FingerprintsFileUtil.pm,v $</span> | 
|  | 17    4 <span class="c"># $Date: 2015/02/28 20:48:54 $</span> | 
|  | 18    5 <span class="c"># $Revision: 1.14 $</span> | 
|  | 19    6 <span class="c">#</span> | 
|  | 20    7 <span class="c"># Author: Manish Sud <msud@san.rr.com></span> | 
|  | 21    8 <span class="c">#</span> | 
|  | 22    9 <span class="c"># Copyright (C) 2015 Manish Sud. All rights reserved.</span> | 
|  | 23   10 <span class="c">#</span> | 
|  | 24   11 <span class="c"># This file is part of MayaChemTools.</span> | 
|  | 25   12 <span class="c">#</span> | 
|  | 26   13 <span class="c"># MayaChemTools is free software; you can redistribute it and/or modify it under</span> | 
|  | 27   14 <span class="c"># the terms of the GNU Lesser General Public License as published by the Free</span> | 
|  | 28   15 <span class="c"># Software Foundation; either version 3 of the License, or (at your option) any</span> | 
|  | 29   16 <span class="c"># later version.</span> | 
|  | 30   17 <span class="c">#</span> | 
|  | 31   18 <span class="c"># MayaChemTools is distributed in the hope that it will be useful, but without</span> | 
|  | 32   19 <span class="c"># any warranty; without even the implied warranty of merchantability of fitness</span> | 
|  | 33   20 <span class="c"># for a particular purpose.  See the GNU Lesser General Public License for more</span> | 
|  | 34   21 <span class="c"># details.</span> | 
|  | 35   22 <span class="c">#</span> | 
|  | 36   23 <span class="c"># You should have received a copy of the GNU Lesser General Public License</span> | 
|  | 37   24 <span class="c"># along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or</span> | 
|  | 38   25 <span class="c"># write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,</span> | 
|  | 39   26 <span class="c"># Boston, MA, 02111-1307, USA.</span> | 
|  | 40   27 <span class="c">#</span> | 
|  | 41   28 | 
|  | 42   29 <span class="k">use</span> <span class="w">strict</span><span class="sc">;</span> | 
|  | 43   30 <span class="k">use</span> <span class="w">Exporter</span><span class="sc">;</span> | 
|  | 44   31 <span class="k">use</span> <span class="w">Carp</span><span class="sc">;</span> | 
|  | 45   32 <span class="k">use</span> <span class="w">TextUtil</span> <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 46   33 <span class="k">use</span> <span class="w">FileUtil</span> <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 47   34 <span class="k">use</span> <span class="w">FileIO::FingerprintsSDFileIO</span><span class="sc">;</span> | 
|  | 48   35 <span class="k">use</span> <span class="w">FileIO::FingerprintsTextFileIO</span><span class="sc">;</span> | 
|  | 49   36 <span class="k">use</span> <span class="w">FileIO::FingerprintsFPFileIO</span><span class="sc">;</span> | 
|  | 50   37 | 
|  | 51   38 <span class="k">use</span> <span class="w">vars</span> <span class="q">qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS)</span><span class="sc">;</span> | 
|  | 52   39 | 
|  | 53   40 <span class="i">@ISA</span> = <span class="q">qw(Exporter)</span><span class="sc">;</span> | 
|  | 54   41 <span class="i">@EXPORT</span> = <span class="q">qw()</span><span class="sc">;</span> | 
|  | 55   42 <span class="i">@EXPORT_OK</span> = <span class="q">qw(GetFingerprintsFileType ReadAndProcessFingerpritsData  NewFingerprintsFileIO)</span><span class="sc">;</span> | 
|  | 56   43 | 
|  | 57   44 <span class="i">%EXPORT_TAGS</span> = <span class="s">(</span><span class="w">all</span>  <span class="cm">=></span> <span class="s">[</span><span class="i">@EXPORT</span><span class="cm">,</span> <span class="i">@EXPORT_OK</span><span class="s">]</span><span class="s">)</span><span class="sc">;</span> | 
|  | 58   45 | 
|  | 59   46 <span class="c"># Generate new FingerprintsFileIO object for a SD, FP or Text fingerprints file specified using file name</span> | 
|  | 60   47 <span class="c"># along other appropriate parameters...</span> | 
|  | 61   48 <span class="c">#</span> | 
|  | 62 <a name="NewFingerprintsFileIO-"></a>  49 <span class="k">sub </span><span class="m">NewFingerprintsFileIO</span> <span class="s">{</span> | 
|  | 63   50   <span class="k">my</span><span class="s">(</span><span class="i">%FingerprintsFileIOParams</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 64   51   <span class="k">my</span><span class="s">(</span><span class="i">$FingerprintsFileIO</span><span class="cm">,</span> <span class="i">$FileType</span><span class="s">)</span><span class="sc">;</span> | 
|  | 65   52 | 
|  | 66   53   <span class="k">if</span> <span class="s">(</span>!<span class="s">(</span><span class="k">exists</span><span class="s">(</span><span class="i">$FingerprintsFileIOParams</span>{<span class="w">Name</span>}<span class="s">)</span> && <span class="i">TextUtil::IsNotEmpty</span><span class="s">(</span><span class="i">$FingerprintsFileIOParams</span>{<span class="w">Name</span>}<span class="s">)</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 67   54     <span class="w">carp</span> <span class="q">"Warning: Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO: Can't create new FingerprintsFileIO object: File name is not specified...\n"</span><span class="sc">;</span> | 
|  | 68   55     <span class="k">return</span> <span class="k">undef</span><span class="sc">;</span> | 
|  | 69   56   <span class="s">}</span> | 
|  | 70   57 | 
|  | 71   58   <span class="k">if</span> <span class="s">(</span>!<span class="s">(</span><span class="k">exists</span><span class="s">(</span><span class="i">$FingerprintsFileIOParams</span>{<span class="w">Mode</span>}<span class="s">)</span> && <span class="i">TextUtil::IsNotEmpty</span><span class="s">(</span><span class="i">$FingerprintsFileIOParams</span>{<span class="w">Mode</span>}<span class="s">)</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 72   59     <span class="w">carp</span> <span class="q">"Warning: Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO: Can't create new FingerprintsFileIO object: File mode is not specified...\n"</span><span class="sc">;</span> | 
|  | 73   60     <span class="k">return</span> <span class="k">undef</span><span class="sc">;</span> | 
|  | 74   61   <span class="s">}</span> | 
|  | 75   62 | 
|  | 76   63   <span class="i">$FileType</span> = <span class="i">GetFingerprintsFileType</span><span class="s">(</span><span class="i">$FingerprintsFileIOParams</span>{<span class="w">Name</span>}<span class="s">)</span><span class="sc">;</span> | 
|  | 77   64   <span class="k">if</span> <span class="s">(</span><span class="i">TextUtil::IsEmpty</span><span class="s">(</span><span class="i">$FileType</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 78   65     <span class="w">carp</span> <span class="q">"Warning: Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO: Can't create new FingerprintsFileIO object: File type is not specified...\n"</span><span class="sc">;</span> | 
|  | 79   66     <span class="k">return</span> <span class="k">undef</span><span class="sc">;</span> | 
|  | 80   67   <span class="s">}</span> | 
|  | 81   68 | 
|  | 82   69   <span class="c"># Generate fingerprints IO object...</span> | 
|  | 83   70   <span class="j">FILETYPE:</span> <span class="s">{</span> | 
|  | 84   71     <span class="k">if</span> <span class="s">(</span><span class="i">$FileType</span> =~ <span class="q">/^SD$/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 85   72       <span class="i">$FingerprintsFileIO</span> = <span class="w">new</span> <span class="i">FileIO::FingerprintsSDFileIO</span><span class="s">(</span><span class="i">%FingerprintsFileIOParams</span><span class="s">)</span><span class="sc">;</span> | 
|  | 86   73       <span class="k">last</span> <span class="j">FILETYPE</span><span class="sc">;</span> | 
|  | 87   74     <span class="s">}</span> | 
|  | 88   75     <span class="k">if</span> <span class="s">(</span><span class="i">$FileType</span> =~ <span class="q">/^FP$/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 89   76       <span class="i">$FingerprintsFileIO</span> = <span class="w">new</span> <span class="i">FileIO::FingerprintsFPFileIO</span><span class="s">(</span><span class="i">%FingerprintsFileIOParams</span><span class="s">)</span><span class="sc">;</span> | 
|  | 90   77       <span class="k">last</span> <span class="j">FILETYPE</span><span class="sc">;</span> | 
|  | 91   78     <span class="s">}</span> | 
|  | 92   79     <span class="k">if</span> <span class="s">(</span><span class="i">$FileType</span> =~ <span class="q">/^Text$/i</span><span class="s">)</span> <span class="s">{</span> | 
|  | 93   80       <span class="i">$FingerprintsFileIO</span> = <span class="w">new</span> <span class="i">FileIO::FingerprintsTextFileIO</span><span class="s">(</span><span class="i">%FingerprintsFileIOParams</span><span class="s">)</span><span class="sc">;</span> | 
|  | 94   81       <span class="k">last</span> <span class="j">FILETYPE</span><span class="sc">;</span> | 
|  | 95   82     <span class="s">}</span> | 
|  | 96   83     <span class="i">$FingerprintsFileIO</span> = <span class="k">undef</span><span class="sc">;</span> | 
|  | 97   84     <span class="w">carp</span> <span class="q">"Warning: Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO: Fingerprints file type, $FileType, is not valid. Supported file types: SD, FP or Text\n"</span><span class="sc">;</span> | 
|  | 98   85   <span class="s">}</span> | 
|  | 99   86 | 
|  | 100   87   <span class="k">return</span> <span class="i">$FingerprintsFileIO</span><span class="sc">;</span> | 
|  | 101   88 <span class="s">}</span> | 
|  | 102   89 | 
|  | 103   90 <span class="c"># Get fingerpritns file type from fingerprints file name...</span> | 
|  | 104   91 <span class="c">#</span> | 
|  | 105 <a name="GetFingerprintsFileType-"></a>  92 <span class="k">sub </span><span class="m">GetFingerprintsFileType</span> <span class="s">{</span> | 
|  | 106   93   <span class="k">my</span><span class="s">(</span><span class="i">$FileName</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 107   94   <span class="k">my</span><span class="s">(</span><span class="i">$FileType</span><span class="s">)</span><span class="sc">;</span> | 
|  | 108   95 | 
|  | 109   96   <span class="i">$FileType</span> = <span class="q">''</span><span class="sc">;</span> | 
|  | 110   97   <span class="j">FILETYPE:</span> <span class="s">{</span> | 
|  | 111   98     <span class="k">if</span> <span class="s">(</span><span class="i">FileUtil::CheckFileType</span><span class="s">(</span><span class="i">$FileName</span><span class="cm">,</span> <span class="q">"sdf sd"</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 112   99       <span class="i">$FileType</span> = <span class="q">'SD'</span><span class="sc">;</span> | 
|  | 113  100       <span class="k">last</span> <span class="j">FILETYPE</span><span class="sc">;</span> | 
|  | 114  101     <span class="s">}</span> | 
|  | 115  102     <span class="k">if</span> <span class="s">(</span><span class="i">FileUtil::CheckFileType</span><span class="s">(</span><span class="i">$FileName</span><span class="cm">,</span> <span class="q">"fpf fp"</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 116  103       <span class="i">$FileType</span> = <span class="q">'FP'</span><span class="sc">;</span> | 
|  | 117  104       <span class="k">last</span> <span class="j">FILETYPE</span><span class="sc">;</span> | 
|  | 118  105     <span class="s">}</span> | 
|  | 119  106     <span class="k">if</span> <span class="s">(</span><span class="i">FileUtil::CheckFileType</span><span class="s">(</span><span class="i">$FileName</span><span class="cm">,</span> <span class="q">"csv tsv"</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 120  107       <span class="i">$FileType</span> = <span class="q">'Text'</span><span class="sc">;</span> | 
|  | 121  108       <span class="k">last</span> <span class="j">FILETYPE</span><span class="sc">;</span> | 
|  | 122  109     <span class="s">}</span> | 
|  | 123  110     <span class="i">$FileType</span> = <span class="q">''</span><span class="sc">;</span> | 
|  | 124  111     <span class="w">carp</span> <span class="q">"Warning: Fingerprints::FingerprintsFileUtil::GetFingerprintsFileType: Can't determine fingerprints file type for $FileName: It's not a fingerprints file...\n"</span><span class="sc">;</span> | 
|  | 125  112   <span class="s">}</span> | 
|  | 126  113 | 
|  | 127  114   <span class="k">return</span> <span class="i">$FileType</span><span class="sc">;</span> | 
|  | 128  115 <span class="s">}</span> | 
|  | 129  116 | 
|  | 130  117 | 
|  | 131  118 <span class="c"># Process fingerprints bit-vector and vector string data in a file using FingerprintsFileIO</span> | 
|  | 132  119 <span class="c"># object and return a references to arrays of CompoundIDs and FingerprintsObjects...</span> | 
|  | 133  120 <span class="c">#</span> | 
|  | 134  121 <span class="c"># Note:</span> | 
|  | 135  122 <span class="c">#  . The file open and close is automatically performed during processing.</span> | 
|  | 136  123 <span class="c">#</span> | 
|  | 137 <a name="ReadAndProcessFingerpritsData-"></a> 124 <span class="k">sub </span><span class="m">ReadAndProcessFingerpritsData</span> <span class="s">{</span> | 
|  | 138  125   <span class="k">my</span><span class="s">(</span><span class="i">$FingerprintsFileIO</span><span class="cm">,</span> <span class="i">$CheckCompoundIDs</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> | 
|  | 139  126   <span class="k">my</span><span class="s">(</span><span class="i">$CompoundID</span><span class="cm">,</span> <span class="i">$FingerprintsCount</span><span class="cm">,</span> <span class="i">$IgnoredFingerprintsCount</span><span class="cm">,</span> <span class="i">@CompundIDs</span><span class="cm">,</span> <span class="i">@FingerprintsObjects</span><span class="cm">,</span> <span class="i">%UniqueCompoundIDs</span><span class="s">)</span><span class="sc">;</span> | 
|  | 140  127 | 
|  | 141  128   <span class="k">if</span> <span class="s">(</span>!<span class="i">$FingerprintsFileIO</span><span class="s">)</span> <span class="s">{</span> | 
|  | 142  129     <span class="k">return</span> <span class="s">(</span><span class="k">undef</span><span class="cm">,</span> <span class="k">undef</span><span class="s">)</span><span class="sc">;</span> | 
|  | 143  130   <span class="s">}</span> | 
|  | 144  131   <span class="i">$CheckCompoundIDs</span> = <span class="k">defined</span> <span class="i">$CheckCompoundIDs</span> ? <span class="i">$CheckCompoundIDs</span> <span class="co">:</span> <span class="n">0</span><span class="sc">;</span> | 
|  | 145  132 | 
|  | 146  133   <span class="k">print</span> <span class="q">"\nReading and processing fingerprints data...\n"</span><span class="sc">;</span> | 
|  | 147  134 | 
|  | 148  135   <span class="s">(</span><span class="i">$FingerprintsCount</span><span class="cm">,</span> <span class="i">$IgnoredFingerprintsCount</span><span class="s">)</span> = <span class="s">(</span><span class="n">0</span><span class="s">)</span> x <span class="n">3</span><span class="sc">;</span> | 
|  | 149  136 | 
|  | 150  137   <span class="i">@CompundIDs</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 151  138   <span class="i">@FingerprintsObjects</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 152  139 | 
|  | 153  140   <span class="i">%UniqueCompoundIDs</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 154  141 | 
|  | 155  142   <span class="c"># Check and open file for reading...</span> | 
|  | 156  143   <span class="k">if</span> <span class="s">(</span>!<span class="i">$FingerprintsFileIO</span><span class="i">->GetStatus</span><span class="s">(</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 157  144     <span class="i">$FingerprintsFileIO</span><span class="i">->Open</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 158  145   <span class="s">}</span> | 
|  | 159  146 | 
|  | 160  147   <span class="j">FINGERPRINTS:</span> <span class="k">while</span> <span class="s">(</span><span class="i">$FingerprintsFileIO</span><span class="i">->Read</span><span class="s">(</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 161  148     <span class="i">$FingerprintsCount</span>++<span class="sc">;</span> | 
|  | 162  149 | 
|  | 163  150     <span class="k">if</span> <span class="s">(</span>!<span class="i">$FingerprintsFileIO</span><span class="i">->IsFingerprintsDataValid</span><span class="s">(</span><span class="s">)</span><span class="s">)</span> <span class="s">{</span> | 
|  | 164  151       <span class="i">$IgnoredFingerprintsCount</span>++<span class="sc">;</span> | 
|  | 165  152       <span class="k">next</span> <span class="j">FINGERPRINTS</span><span class="sc">;</span> | 
|  | 166  153     <span class="s">}</span> | 
|  | 167  154 | 
|  | 168  155     <span class="k">if</span> <span class="s">(</span><span class="i">$CheckCompoundIDs</span><span class="s">)</span> <span class="s">{</span> | 
|  | 169  156       <span class="i">$CompoundID</span> = <span class="i">$FingerprintsFileIO</span><span class="i">->GetCompoundID</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 170  157       <span class="k">if</span> <span class="s">(</span><span class="k">exists</span> <span class="i">$UniqueCompoundIDs</span>{<span class="i">$CompoundID</span>}<span class="s">)</span> <span class="s">{</span> | 
|  | 171  158         <span class="k">warn</span> <span class="q">"Warning: Ignoring fingerprints data for compound ID $CompoundID: Multiple entries for compound ID in fingerprints file.\n"</span><span class="sc">;</span> | 
|  | 172  159         <span class="i">$IgnoredFingerprintsCount</span>++<span class="sc">;</span> | 
|  | 173  160         <span class="k">next</span> <span class="j">FINGERPRINTS</span><span class="sc">;</span> | 
|  | 174  161       <span class="s">}</span> | 
|  | 175  162       <span class="i">$UniqueCompoundIDs</span>{<span class="i">$CompoundID</span>} = <span class="i">$CompoundID</span><span class="sc">;</span> | 
|  | 176  163     <span class="s">}</span> | 
|  | 177  164 | 
|  | 178  165     <span class="k">push</span> <span class="i">@FingerprintsObjects</span><span class="cm">,</span> <span class="i">$FingerprintsFileIO</span><span class="i">->GetFingerprints</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 179  166     <span class="k">push</span> <span class="i">@CompundIDs</span><span class="cm">,</span> <span class="i">$FingerprintsFileIO</span><span class="i">->GetCompoundID</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 180  167   <span class="s">}</span> | 
|  | 181  168   <span class="i">$FingerprintsFileIO</span><span class="i">->Close</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> | 
|  | 182  169 | 
|  | 183  170   <span class="k">print</span> <span class="q">"Number of fingerprints data entries: $FingerprintsCount\n"</span><span class="sc">;</span> | 
|  | 184  171   <span class="k">print</span> <span class="q">"Number of fingerprints date entries processed successfully: "</span><span class="cm">,</span> <span class="s">(</span><span class="i">$FingerprintsCount</span> - <span class="i">$IgnoredFingerprintsCount</span><span class="s">)</span>  <span class="cm">,</span> <span class="q">"\n"</span><span class="sc">;</span> | 
|  | 185  172   <span class="k">print</span> <span class="q">"Number of fingerprints data entries ignored due to missing/invalid data: $IgnoredFingerprintsCount\n\n"</span><span class="sc">;</span> | 
|  | 186  173 | 
|  | 187  174   <span class="k">return</span> <span class="s">(</span>\<span class="i">@CompundIDs</span><span class="cm">,</span> \<span class="i">@FingerprintsObjects</span><span class="s">)</span><span class="sc">;</span> | 
|  | 188  175 <span class="s">}</span> | 
|  | 189  176 | 
|  | 190  177 | 
|  | 191 <a name="EOF-"></a></pre> | 
|  | 192 <p> </p> | 
|  | 193 <br /> | 
|  | 194 <center> | 
|  | 195 <img src="../../../images/h2o2.png"> | 
|  | 196 </center> | 
|  | 197 </body> | 
|  | 198 </html> |