0
|
1 #!/usr/bin/perl -w
|
|
2 #
|
|
3 # $RCSfile: InfoFingerprintsFiles.pl,v $
|
|
4 # $Date: 2015/02/28 20:46:20 $
|
|
5 # $Revision: 1.20 $
|
|
6 #
|
|
7 # Author: Manish Sud <msud@san.rr.com>
|
|
8 #
|
|
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
|
|
10 #
|
|
11 # This file is part of MayaChemTools.
|
|
12 #
|
|
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
|
|
14 # the terms of the GNU Lesser General Public License as published by the Free
|
|
15 # Software Foundation; either version 3 of the License, or (at your option) any
|
|
16 # later version.
|
|
17 #
|
|
18 # MayaChemTools is distributed in the hope that it will be useful, but without
|
|
19 # any warranty; without even the implied warranty of merchantability of fitness
|
|
20 # for a particular purpose. See the GNU Lesser General Public License for more
|
|
21 # details.
|
|
22 #
|
|
23 # You should have received a copy of the GNU Lesser General Public License
|
|
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
|
|
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
|
|
26 # Boston, MA, 02111-1307, USA.
|
|
27 #
|
|
28
|
|
29 use strict;
|
|
30 use FindBin; use lib "$FindBin::Bin/../lib";
|
|
31 use Getopt::Long;
|
|
32 use File::Basename;
|
|
33 use Text::ParseWords;
|
|
34 use Benchmark;
|
|
35 use FileUtil;
|
|
36 use TextUtil;
|
|
37 use Fingerprints::FingerprintsFileUtil;
|
|
38 use Fingerprints::FingerprintsStringUtil;
|
|
39
|
|
40 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime);
|
|
41
|
|
42 # Autoflush STDOUT
|
|
43 $| = 1;
|
|
44
|
|
45 # Starting message...
|
|
46 $ScriptName = basename($0);
|
|
47 print "\n$ScriptName: Starting...\n\n";
|
|
48 $StartTime = new Benchmark;
|
|
49
|
|
50 # Get the options and setup script...
|
|
51 SetupScriptUsage();
|
|
52 if ($Options{help} || @ARGV < 1) {
|
|
53 die GetUsageFromPod("$FindBin::Bin/$ScriptName");
|
|
54 }
|
|
55
|
|
56 my(@FingerprintsFilesList);
|
|
57 @FingerprintsFilesList = ExpandFileNames(\@ARGV, "sdf sd fpf fp csv tsv");
|
|
58
|
|
59 # Process options...
|
|
60 print "Processing options...\n";
|
|
61 my(%OptionsInfo);
|
|
62 ProcessOptions();
|
|
63
|
|
64 # Setup information about input files...
|
|
65 print "Checking input fingerprints file(s)...\n";
|
|
66 my(%FingerprintsFilesInfo);
|
|
67 RetrieveFingerprintsFilesInfo();
|
|
68
|
|
69 # Process input files..
|
|
70 my($FileIndex);
|
|
71 if (@FingerprintsFilesList > 1) {
|
|
72 print "\nProcessing fingerprints files...\n";
|
|
73 }
|
|
74 for $FileIndex (0 .. $#FingerprintsFilesList) {
|
|
75 if ($FingerprintsFilesInfo{FileOkay}[$FileIndex]) {
|
|
76 print "\nProcessing file $FingerprintsFilesList[$FileIndex]...\n";
|
|
77 ListFingerprintsFileInfo($FileIndex);
|
|
78 }
|
|
79 }
|
|
80 ListTotalSizeOfFiles();
|
|
81
|
|
82 print "\n$ScriptName:Done...\n\n";
|
|
83
|
|
84 $EndTime = new Benchmark;
|
|
85 $TotalTime = timediff ($EndTime, $StartTime);
|
|
86 print "Total time: ", timestr($TotalTime), "\n";
|
|
87
|
|
88 ###############################################################################
|
|
89
|
|
90 # List approptiate information...
|
|
91 #
|
|
92 sub ListFingerprintsFileInfo {
|
|
93 my($FileIndex) = @_;
|
|
94 my($FileName, $FingerprintsFileIO, $InvalidFingerprintsFileData, $InvalidFingerprintsData, $DataEntryCount, $ValidDataEntryCount, $InvalidDataEntryCount, $MissingDataEntryCount, $BitVectorDataEntryCount, $VectorDataEntryCount, $FingerprintsObject, $FingerprintsType, $TotalBitDensity, $FileType, $DataEntryLabel);
|
|
95
|
|
96 $FileType = $FingerprintsFilesInfo{FileType}[$FileIndex];
|
|
97 $DataEntryLabel = ($FileType =~ /^SD$/i) ? 'compounds' : 'lines';
|
|
98
|
|
99 ($DataEntryCount, $ValidDataEntryCount, $InvalidDataEntryCount, $MissingDataEntryCount, $BitVectorDataEntryCount, $VectorDataEntryCount, $TotalBitDensity) = (0) x 7;
|
|
100
|
|
101 $FingerprintsFileIO = Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO(%{$FingerprintsFilesInfo{FingerprintsFileIOParameters}[$FileIndex]});
|
|
102 $FingerprintsFileIO->Open();
|
|
103
|
|
104 $InvalidFingerprintsFileData = $FingerprintsFileIO->IsFingerprintsFileDataValid() ? 0 : 1;
|
|
105
|
|
106 FINGERPRINTS: while ($FingerprintsFileIO->Read()) {
|
|
107 $DataEntryCount++;
|
|
108
|
|
109 # Missing data...
|
|
110 if ($InvalidFingerprintsFileData) {
|
|
111 $MissingDataEntryCount++;
|
|
112 if ($OptionsInfo{ValidateData} || $OptionsInfo{CountEmptyFingerprints}) {
|
|
113 ListEmptyOrInvalidFingerprintsDataInfo('EmptyData', $FingerprintsFileIO, $FileType);
|
|
114 }
|
|
115 next FINGERPRINTS;
|
|
116 }
|
|
117 $InvalidFingerprintsData = $FingerprintsFileIO->IsFingerprintsDataValid() ? 0 : 1;
|
|
118
|
|
119 # Invalid data...
|
|
120 if ($InvalidFingerprintsData) {
|
|
121 $InvalidDataEntryCount++;
|
|
122 if ($OptionsInfo{ValidateData}) {
|
|
123 ListEmptyOrInvalidFingerprintsDataInfo('InvalidData', $FingerprintsFileIO, $FileType);
|
|
124 }
|
|
125 next FINGERPRINTS;
|
|
126 }
|
|
127 $ValidDataEntryCount++;
|
|
128
|
|
129 $FingerprintsObject = $FingerprintsFileIO->GetFingerprints();
|
|
130 $FingerprintsType = $FingerprintsObject->GetVectorType();
|
|
131
|
|
132 if ($FingerprintsType =~ /^FingerprintsBitVector$/i) {
|
|
133 $BitVectorDataEntryCount++;
|
|
134 if ($OptionsInfo{ListAverageBitDensity}) {
|
|
135 $TotalBitDensity += $FingerprintsObject->GetFingerprintsBitDensity();
|
|
136 }
|
|
137 }
|
|
138 elsif ($FingerprintsType =~ /^FingerprintsVector$/i) {
|
|
139 $VectorDataEntryCount++;
|
|
140 }
|
|
141
|
|
142 if ($OptionsInfo{ListFingerprintsDataEntryInfo}) {
|
|
143 ListFingerprintsDataEntryInfo($FingerprintsFileIO, $FileType);
|
|
144 }
|
|
145
|
|
146 }
|
|
147 $FingerprintsFileIO->Close();
|
|
148
|
|
149 print "\nFingerprints file type: $FileType\n";
|
|
150 if ($FileType =~ /^SD$/i) {
|
|
151 print "Number of compounds: $DataEntryCount\n";
|
|
152 }
|
|
153 else {
|
|
154 print "Number of data lines: $DataEntryCount\n";
|
|
155 }
|
|
156
|
|
157 ListFileTypeHeaderInfo($FingerprintsFileIO, $FileType);
|
|
158
|
|
159 print "\nNumber of $DataEntryLabel with valid fingerprints string data: $ValidDataEntryCount\n";
|
|
160 print "Number of $DataEntryLabel with bit-vector fingerprints string data: $BitVectorDataEntryCount\n";
|
|
161 print "Number of $DataEntryLabel with vector fingerprints string data: $VectorDataEntryCount\n";
|
|
162
|
|
163 if ($OptionsInfo{CountEmptyFingerprints}) {
|
|
164 print "Number of $DataEntryLabel with missing fingerprints data: $MissingDataEntryCount\n";
|
|
165 print "Number of $DataEntryLabel with invalid fingerprints data: $InvalidDataEntryCount\n";
|
|
166 }
|
|
167
|
|
168 if ($OptionsInfo{ListAverageBitDensity} && $BitVectorDataEntryCount) {
|
|
169 my($AverageBitDensity);
|
|
170 $AverageBitDensity = $TotalBitDensity/$BitVectorDataEntryCount;
|
|
171 $AverageBitDensity = sprintf("%.2f", $AverageBitDensity) + 0;
|
|
172 print "\nAverage bit density: $AverageBitDensity\n";
|
|
173 }
|
|
174
|
|
175
|
|
176 # File size and modification information...
|
|
177 print "\nFile size: ", FormatFileSize($FingerprintsFilesInfo{FileSize}[$FileIndex]), " \n";
|
|
178 print "Last modified: ", $FingerprintsFilesInfo{FileLastModified}[$FileIndex], " \n";
|
|
179 }
|
|
180
|
|
181 # List empty or invalid fingerprints file data information...
|
|
182 #
|
|
183 sub ListEmptyOrInvalidFingerprintsDataInfo {
|
|
184 my($Mode, $FingerprintsFileIO, $FileType) = @_;
|
|
185 my($ModeInfo);
|
|
186
|
|
187 $ModeInfo = ($Mode =~ /^EmptyData$/i) ? "no" : "invalid";
|
|
188
|
|
189 if ($FileType =~ /^SD$/i) {
|
|
190 my($CmpdNum, $CmpdString);
|
|
191
|
|
192 $CmpdNum = $FingerprintsFileIO->GetCompoundNum();
|
|
193 if ($OptionsInfo{DetailLevel} >= 3 ) {
|
|
194 $CmpdString = $FingerprintsFileIO->GetCompoundString();
|
|
195 print "Compound number $CmpdNum contains $ModeInfo fingerprints data: $CmpdString \n";
|
|
196 }
|
|
197 elsif ($OptionsInfo{DetailLevel} >= 1 ) {
|
|
198 print "Compound number $CmpdNum contains $ModeInfo fingerprints data...\n";
|
|
199 }
|
|
200 }
|
|
201 else {
|
|
202 my($LineNum, $DataLine);
|
|
203
|
|
204 $LineNum = $FingerprintsFileIO->GetLineNum();
|
|
205 if ($OptionsInfo{DetailLevel} >= 3 ) {
|
|
206 $DataLine = $FingerprintsFileIO->GetDataLine();
|
|
207 print "Data line number $LineNum contains $ModeInfo fingerprints data: $DataLine \n";
|
|
208 }
|
|
209 elsif ($OptionsInfo{DetailLevel} >= 1 ) {
|
|
210 print "Data line number $LineNum contains $ModeInfo fingerprints data...\n";
|
|
211 }
|
|
212 }
|
|
213 }
|
|
214
|
|
215 # List detailed information about fingerprints data entry...
|
|
216 #
|
|
217 sub ListFingerprintsDataEntryInfo {
|
|
218 my($FingerprintsFileIO, $FileType) = @_;
|
|
219 my($FingerprintsObject, $FingerprintsString, $FingerprintsType, $FingerprintsDescription, $FingerprintsSize, $FingerprintsBitStringFormat, $FingerprintsBitOrder, $BitDensity, $NumOfOnBits, $FingerprintsVectorValuesType, $FingerprintsVectorValuesFormat, $NumOfNonZeroValues);
|
|
220
|
|
221 $FingerprintsObject = $FingerprintsFileIO->GetFingerprints();
|
|
222 $FingerprintsString = $FingerprintsFileIO->GetFingerprintsString();
|
|
223
|
|
224 $FingerprintsType = $FingerprintsObject->GetVectorType();
|
|
225
|
|
226 if ($FingerprintsType =~ /^FingerprintsBitVector$/i) {
|
|
227 $BitDensity = '';
|
|
228 $NumOfOnBits = '';
|
|
229
|
|
230 ($FingerprintsType, $FingerprintsDescription, $FingerprintsSize, $FingerprintsBitStringFormat, $FingerprintsBitOrder) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringValues($FingerprintsString);
|
|
231
|
|
232 if ($OptionsInfo{ListBitDensity} || $OptionsInfo{ListNumOfOnBits}) {
|
|
233 if ($OptionsInfo{ListBitDensity}) {
|
|
234 $BitDensity = $FingerprintsObject->GetFingerprintsBitDensity();
|
|
235 }
|
|
236 if ($OptionsInfo{ListNumOfOnBits}) {
|
|
237 $NumOfOnBits = $FingerprintsObject->GetNumOfSetBits();
|
|
238 }
|
|
239 }
|
|
240 }
|
|
241 elsif ($FingerprintsType =~ /^FingerprintsVector$/i) {
|
|
242 $NumOfNonZeroValues = '';
|
|
243
|
|
244 ($FingerprintsType, $FingerprintsDescription, $FingerprintsSize, $FingerprintsVectorValuesType, $FingerprintsVectorValuesFormat) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringValues($FingerprintsString);
|
|
245
|
|
246 if ($OptionsInfo{ListNumOfNonZeroValues}) {
|
|
247 if ($FingerprintsVectorValuesType =~ /^AlphaNumericalValues$/i) {
|
|
248 $NumOfNonZeroValues = 'NA';
|
|
249 }
|
|
250 else {
|
|
251 $NumOfNonZeroValues = $FingerprintsObject->GetNumOfNonZeroValues();
|
|
252 }
|
|
253 }
|
|
254 }
|
|
255
|
|
256 if ($FileType =~ /^SD$/i) {
|
|
257 print "Compound number: " . $FingerprintsFileIO->GetCompoundNum();
|
|
258 }
|
|
259 else {
|
|
260 print "Data line number: " . $FingerprintsFileIO->GetLineNum();
|
|
261 }
|
|
262
|
|
263 if ($OptionsInfo{ListFingerprintsType}) {
|
|
264 print "; FPType: $FingerprintsType";
|
|
265 }
|
|
266 if ($OptionsInfo{ListFingerprintsDescription}) {
|
|
267 print "; FPDescription: $FingerprintsDescription";
|
|
268 }
|
|
269 if ($OptionsInfo{ListFingerprintsSize}) {
|
|
270 print "; FPSize: $FingerprintsSize";
|
|
271 }
|
|
272
|
|
273 if ($FingerprintsType =~ /^FingerprintsBitVector$/i) {
|
|
274 if ($OptionsInfo{ListFingerprintsBitStringFormat}) {
|
|
275 print "; FPBitStringFormat: $FingerprintsBitStringFormat";
|
|
276 }
|
|
277 if ($OptionsInfo{ListFingerprintsBitOrder}) {
|
|
278 print "; FPBitOrder: $FingerprintsBitOrder";
|
|
279 }
|
|
280 if ($OptionsInfo{ListBitDensity}) {
|
|
281 print "; BitDensity: $BitDensity";
|
|
282 }
|
|
283 if ($OptionsInfo{ListNumOfOnBits}) {
|
|
284 print "; NumOfOnBits: $NumOfOnBits";
|
|
285 }
|
|
286 }
|
|
287 elsif ($FingerprintsType =~ /^FingerprintsVector$/i) {
|
|
288 if ($OptionsInfo{ListFingerprintsVectorValuesType}) {
|
|
289 print "; FPVectorValuesType: $FingerprintsVectorValuesType";
|
|
290 }
|
|
291 if ($OptionsInfo{ListFingerprintsVectorValuesFormat}) {
|
|
292 print "; FPVectorValuesFormat: $FingerprintsVectorValuesFormat";
|
|
293 }
|
|
294 if ($OptionsInfo{ListNumOfNonZeroValues}) {
|
|
295 print "; NumOfNonZeroValues: $NumOfNonZeroValues";
|
|
296 }
|
|
297 }
|
|
298 print "\n";
|
|
299 }
|
|
300
|
|
301 # List file type header information...
|
|
302 #
|
|
303 sub ListFileTypeHeaderInfo {
|
|
304 my($FingerprintsFileIO, $FileType) = @_;
|
|
305 my($Key, $Value, @DataColLabels, %HeaderDataKeysAndValues);
|
|
306
|
|
307 if ($FileType =~ /^Text$/i) {
|
|
308 @DataColLabels = $FingerprintsFileIO->GetDataColLabels();
|
|
309 print "Number of columns: " . scalar @DataColLabels . "\n";
|
|
310 print "Column labels: ", JoinWords(\@DataColLabels, ", ", 1), "\n";
|
|
311 }
|
|
312 elsif ($FileType =~ /^FP$/i) {
|
|
313 %HeaderDataKeysAndValues = $FingerprintsFileIO->GetHeaderDataKeysAndValues();
|
|
314
|
|
315 print "\nFP file header data keys and values: \n#\n";
|
|
316 for $Key ($FingerprintsFileIO->GetHeaderDataKeys()) {
|
|
317 $Value = $HeaderDataKeysAndValues{$Key};
|
|
318 print "# $Key = $Value\n";
|
|
319 }
|
|
320 print "#\n";
|
|
321 }
|
|
322 }
|
|
323
|
|
324 # Total size of all the fiels...
|
|
325 sub ListTotalSizeOfFiles {
|
|
326 my($FileOkayCount, $TotalSize, $Index);
|
|
327
|
|
328 $FileOkayCount = 0;
|
|
329 $TotalSize = 0;
|
|
330
|
|
331 for $Index (0 .. $#FingerprintsFilesList) {
|
|
332 if ($FingerprintsFilesList[$Index]) {
|
|
333 $FileOkayCount++;
|
|
334 $TotalSize += $FingerprintsFilesInfo{FileSize}[$Index];
|
|
335 }
|
|
336 }
|
|
337 if ($FileOkayCount > 1) {
|
|
338 print "\nTotal size of $FileOkayCount files: ", FormatFileSize($TotalSize), "\n";
|
|
339 }
|
|
340 }
|
|
341
|
|
342 # Retrieve information about fingerprints files...
|
|
343 #
|
|
344 sub RetrieveFingerprintsFilesInfo {
|
|
345 my($FingerprintsFile, $Index, $FileDir, $FileExt, $FileName, $FileType, $InDelim, $ModifiedTimeString, $ModifiedDateString, %FingerprintsFileIOParameters);
|
|
346
|
|
347 %FingerprintsFilesInfo = ();
|
|
348 @{$FingerprintsFilesInfo{FileOkay}} = ();
|
|
349 @{$FingerprintsFilesInfo{FileType}} = ();
|
|
350 @{$FingerprintsFilesInfo{FileSize}} = ();
|
|
351 @{$FingerprintsFilesInfo{FileLastModified}} = ();
|
|
352 @{$FingerprintsFilesInfo{InDelim}} = ();
|
|
353
|
|
354 @{$FingerprintsFilesInfo{FingerprintsFileIOParameters}} = ();
|
|
355
|
|
356 FILELIST: for $Index (0 .. $#FingerprintsFilesList) {
|
|
357 $FingerprintsFile = $FingerprintsFilesList[$Index];
|
|
358
|
|
359 $FingerprintsFilesInfo{FileOkay}[$Index] = 0;
|
|
360 $FingerprintsFilesInfo{FileType}[$Index] = '';
|
|
361 $FingerprintsFilesInfo{FileSize}[$Index] = 0;
|
|
362 $FingerprintsFilesInfo{FileLastModified}[$Index] = '';
|
|
363 $FingerprintsFilesInfo{InDelim}[$Index] = "";
|
|
364
|
|
365 %{$FingerprintsFilesInfo{FingerprintsFileIOParameters}[$Index]} = ();
|
|
366
|
|
367 $FingerprintsFile = $FingerprintsFilesList[$Index];
|
|
368 if (!(-e $FingerprintsFile)) {
|
|
369 warn "Warning: Ignoring file $FingerprintsFile: It doesn't exist\n";
|
|
370 next FILELIST;
|
|
371 }
|
|
372
|
|
373 $FileType = Fingerprints::FingerprintsFileUtil::GetFingerprintsFileType($FingerprintsFile);
|
|
374 if (IsEmpty($FileType)) {
|
|
375 warn "Warning: Ignoring file $FingerprintsFile: It's not a fingerprints file\n";
|
|
376 next FILELIST;
|
|
377 }
|
|
378
|
|
379 $FileDir = ""; $FileName = ""; $FileExt = "";
|
|
380 ($FileDir, $FileName, $FileExt) = ParseFileName($FingerprintsFile);
|
|
381
|
|
382 $InDelim = ($FileExt =~ /^tsv$/i) ? 'Tab' : $OptionsInfo{InDelim};
|
|
383
|
|
384 # Setup FingerprintsFileIO parameters...
|
|
385 %FingerprintsFileIOParameters = ();
|
|
386 FILEIOPARAMETERS: {
|
|
387 if ($FileType =~ /^SD$/i) {
|
|
388 %FingerprintsFileIOParameters = ('Name' => $FingerprintsFile, 'Mode' => 'Read', 'ValidateData' => $OptionsInfo{ValidateData}, 'DetailLevel' => 1, 'FingerprintsFieldLabel' => $OptionsInfo{FingerprintsFieldLabel});
|
|
389 last FILEIOPARAMETERS;
|
|
390 }
|
|
391 if ($FileType =~ /^FP$/i) {
|
|
392 %FingerprintsFileIOParameters = ('Name' => $FingerprintsFile, 'Mode' => 'Read', 'ValidateData' => $OptionsInfo{ValidateData}, 'DetailLevel' => 1);
|
|
393 last FILEIOPARAMETERS;
|
|
394 }
|
|
395 if ($FileType =~ /^Text$/i) {
|
|
396 %FingerprintsFileIOParameters = ('Name' => $FingerprintsFile, 'Mode' => 'Read', 'ValidateData' => $OptionsInfo{ValidateData}, 'DetailLevel' => 1, 'FingerprintsCol' => $OptionsInfo{FingerprintsCol}, 'ColMode' => $OptionsInfo{ColMode}, 'InDelim' => $OptionsInfo{InDelim});
|
|
397 last FILEIOPARAMETERS;
|
|
398 }
|
|
399 warn "Warning: File type for fingerprints file, $FingerprintsFile, is not valid. Supported file types: SD, FP or Text\n";
|
|
400 next FILELIST;
|
|
401 }
|
|
402
|
|
403 $FingerprintsFilesInfo{FileOkay}[$Index] = 1;
|
|
404 $FingerprintsFilesInfo{FileType}[$Index] = $FileType;
|
|
405
|
|
406 $FingerprintsFilesInfo{FileSize}[$Index] = FileSize($FingerprintsFile);
|
|
407 ($ModifiedTimeString, $ModifiedDateString) = FormattedFileModificationTimeAndDate($FingerprintsFile);
|
|
408 $FingerprintsFilesInfo{FileLastModified}[$Index] = "$ModifiedTimeString; $ModifiedDateString";
|
|
409
|
|
410 $FingerprintsFilesInfo{InDelim}[$Index] = $InDelim;
|
|
411
|
|
412 %{$FingerprintsFilesInfo{FingerprintsFileIOParameters}[$Index]} = %FingerprintsFileIOParameters;
|
|
413 }
|
|
414 }
|
|
415
|
|
416 # Process option values...
|
|
417 sub ProcessOptions {
|
|
418 %OptionsInfo = ();
|
|
419
|
|
420 $OptionsInfo{ListAverageBitDensity} = ($Options{all} || $Options{averagebitdensity}) ? 1 :0;
|
|
421 $OptionsInfo{ListBitDensity} = ($Options{all} || $Options{bitdensity}) ? 1 :0;
|
|
422
|
|
423 if ($OptionsInfo{ListAverageBitDensity}) {
|
|
424 # List bit density as well...
|
|
425 $OptionsInfo{ListBitDensity} = 1;
|
|
426 }
|
|
427
|
|
428 # By default, count number of rows containing fingerprints data...
|
|
429 $OptionsInfo{CountFingerprints} = 1;
|
|
430 $OptionsInfo{CountEmptyFingerprints} = ($Options{all} || $Options{empty}) ? 1 :0;
|
|
431
|
|
432 $OptionsInfo{ColMode} = $Options{colmode};
|
|
433 if (IsNotEmpty($Options{fingerprintscol})) {
|
|
434 if ($Options{colmode} =~ /^ColNum$/i) {
|
|
435 if (!IsPositiveInteger($Options{fingerprintscol})) {
|
|
436 die "Error: Column value, $Options{fingerprintscol}, specified using \"--FingerprintsCol\" is not valid: Allowed integer values: > 0.\n";
|
|
437 }
|
|
438 }
|
|
439 $OptionsInfo{FingerprintsCol} = $Options{fingerprintscol};
|
|
440 }
|
|
441 else {
|
|
442 $OptionsInfo{FingerprintsCol} = 'AutoDetect';
|
|
443 }
|
|
444
|
|
445 if (IsNotEmpty($Options{fingerprintsfield})) {
|
|
446 $OptionsInfo{FingerprintsFieldLabel} = $Options{fingerprintsfield};
|
|
447 }
|
|
448 else {
|
|
449 $OptionsInfo{FingerprintsFieldLabel} = 'AutoDetect';
|
|
450 }
|
|
451
|
|
452 $OptionsInfo{ValidateData} = ($Options{all} || $Options{datacheck}) ? 1 :0;
|
|
453 $OptionsInfo{DetailLevel} = $Options{detail};
|
|
454
|
|
455 $OptionsInfo{ListFingerprintsType} = ($Options{all} || $Options{fingerprintstype}) ? 1 :0;
|
|
456 $OptionsInfo{ListFingerprintsDescription} = ($Options{all} || $Options{fingerprintsdescription}) ? 1 :0;
|
|
457 $OptionsInfo{ListFingerprintsSize} = ($Options{all} || $Options{fingerprintssize}) ? 1 :0;
|
|
458
|
|
459 $OptionsInfo{ListFingerprintsBitStringFormat} = ($Options{all} || $Options{fingerprintsbitstringformat}) ? 1 :0;
|
|
460 $OptionsInfo{ListFingerprintsBitOrder} = ($Options{all} || $Options{fingerprintsbitorder}) ? 1 :0;
|
|
461
|
|
462 $OptionsInfo{ListFingerprintsVectorValuesType} = ($Options{all} || $Options{fingerprintsvectorvaluestype}) ? 1 :0;
|
|
463 $OptionsInfo{ListFingerprintsVectorValuesFormat} = ($Options{all} || $Options{fingerprintsvectorvaluesformat}) ? 1 :0;
|
|
464
|
|
465 $OptionsInfo{InDelim} = $Options{indelim};
|
|
466
|
|
467 $OptionsInfo{ListNumOfOnBits} = ($Options{all} || $Options{numofonbits}) ? 1 :0;
|
|
468 $OptionsInfo{ListNumOfNonZeroValues} = ($Options{all} || $Options{numofnonzerovalues}) ? 1 :0;
|
|
469
|
|
470 $OptionsInfo{ListFingerprintsDataEntryInfo} = ($OptionsInfo{ListFingerprintsType} || $OptionsInfo{ListFingerprintsDescription} || $OptionsInfo{ListFingerprintsSize} || $OptionsInfo{ListFingerprintsBitStringFormat} || $OptionsInfo{ListFingerprintsBitOrder} || $OptionsInfo{ListFingerprintsVectorValuesType} || $OptionsInfo{ListFingerprintsVectorValuesFormat} || $OptionsInfo{ListBitDensity} || $OptionsInfo{ListAverageBitDensity} || $OptionsInfo{ListNumOfOnBits} || $OptionsInfo{ListNumOfNonZeroValues}) ? 1 : 0;
|
|
471
|
|
472 }
|
|
473
|
|
474 # Setup script usage and retrieve command line arguments specified using various options...
|
|
475 sub SetupScriptUsage {
|
|
476
|
|
477 # Retrieve all the options...
|
|
478 %Options = ();
|
|
479
|
|
480 $Options{colmode} = 'colnum';
|
|
481 $Options{detail} = 1;
|
|
482 $Options{indelim} = 'comma';
|
|
483
|
|
484 if (!GetOptions(\%Options, "all|a", "averagebitdensity", "bitdensity", "count", "colmode|c=s", "detail|d=i", "datacheck", "empty|e", "fingerprintsfield=s", "fingerprintscol=s", "fingerprintstype", "fingerprintsdescription", "fingerprintssize", "fingerprintsbitstringformat", "fingerprintsbitorder", "fingerprintsvectorvaluestype", "fingerprintsvectorvaluesformat", "help|h", "indelim=s", "numofonbits", "numofnonzerovalues", "workingdir|w=s")) {
|
|
485 die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n";
|
|
486 }
|
|
487 if ($Options{workingdir}) {
|
|
488 if (! -d $Options{workingdir}) {
|
|
489 die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n";
|
|
490 }
|
|
491 chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n";
|
|
492 }
|
|
493 if ($Options{colmode} !~ /^(ColNum|ColLabel)$/i) {
|
|
494 die "Error: The value specified, $Options{colmode}, for option \"-c, --ColMode\" is not valid. Allowed values: ColNum, or ColLabel\n";
|
|
495 }
|
|
496 if (!IsPositiveInteger($Options{detail})) {
|
|
497 die "Error: The value specified, $Options{detail}, for option \"-d, --detail\" is not valid. Allowed values: > 0 \n";
|
|
498 }
|
|
499 if ($Options{indelim} !~ /^(comma|semicolon)$/i) {
|
|
500 die "Error: The value specified, $Options{indelim}, for option \"--InDelim\" is not valid. Allowed values: comma, or semicolon\n";
|
|
501 }
|
|
502 }
|
|
503
|
|
504 __END__
|
|
505
|
|
506 =head1 NAME
|
|
507
|
|
508 InfoFingerprintsFiles.pl - List information about fingerprints data in SD, FP and CSV/TSV text file(s)
|
|
509
|
|
510 =head1 SYNOPSIS
|
|
511
|
|
512 InfoFingerprintsFiles.pl SDFile(s) FPFile(s) TextFile(s)...
|
|
513
|
|
514 InfoFingerprintsFiles.pl [B<-a, --all>] [B<--AverageBitDensity>] [B<--BitDensity>]
|
|
515 [B<-c, --count>] [B<-c, --ColMode> I<ColNum | ColLabel>] [B<--DataCheck>]
|
|
516 [B<-d, --detail> I<InfoLevel>] [B<-e, --empty>] [B<--FingerprintsCol> I<col number | col name>]
|
|
517 [B<--FingerprintsField> I<FieldLabel>] [B<--FingerprintsType>] [B<--FingerprintsDescription>]
|
|
518 [B<--FingerprintsSize>] [B<--FingerprintsBitStringFormat>] [B<--FingerprintsBitOrder>]
|
|
519 [B<--FingerprintsVectorValuesType>] [B<--FingerprintsVectorValuesFormat>]
|
|
520 [B<-h, --help>] [B<--InDelim> I<comma | semicolon>]
|
|
521 [B<--NumOfOnBits>] [B<--NumOfNonZeroValues>]
|
|
522 [B<-w, --WorkingDir> dirname] SDFile(s) FPFile(s) TextFile(s)...
|
|
523
|
|
524 =head1 DESCRIPTION
|
|
525
|
|
526 List information about fingerprints data in I<SD, FP and CSV/TSV> text file(s): number of
|
|
527 rows containing fingerprints data, type of fingerprints vector, description and size of fingerprints,
|
|
528 bit density and average bit density for bit-vector fingerprints strings, and so on.
|
|
529
|
|
530 The scripts InfoFingerprintsSDFiles.pl and InfoFingerprintsTextFiles.pl have been removed from the
|
|
531 current release of MayaChemTools and their functionality merged with this script.
|
|
532
|
|
533 The valid I<SDFile> extensions are I<.sdf> and I<.sd>. All SD files in a current directory
|
|
534 can be specified either by I<*.sdf> or the current directory name.
|
|
535
|
|
536 The valid I<FPFile> extensions are I<.fpf> and I<.fp>. All FP files in a current directory
|
|
537 can be specified either by I<*.fpf> or the current directory name.
|
|
538
|
|
539 The valid I<TextFile> extensions are I<.csv> and I<.tsv> for comma/semicolon and tab
|
|
540 delimited text files respectively. All other file names are ignored. All text files in a
|
|
541 current directory can be specified by I<*.csv>, I<*.tsv>, or the current directory
|
|
542 name. The B<--indelim> option determines the format of I<TextFile(s)>. Any file
|
|
543 which doesn't correspond to the format indicated by B<--indelim> option is ignored.
|
|
544
|
|
545 Format of fingerprint strings data in I<SDFile(s), FPFile(s) and TextFile(s)> is automatically
|
|
546 detected.
|
|
547
|
|
548 Example of I<FP> file containing fingerprints bit-vector string data:
|
|
549
|
|
550 #
|
|
551 # Package = MayaChemTools 7.4
|
|
552 # ReleaseDate = Oct 21, 2010
|
|
553 #
|
|
554 # TimeStamp = Mon Mar 7 15:14:01 2011
|
|
555 #
|
|
556 # FingerprintsStringType = FingerprintsBitVector
|
|
557 #
|
|
558 # Description = PathLengthBits:AtomicInvariantsAtomTypes:MinLength1:...
|
|
559 # Size = 1024
|
|
560 # BitStringFormat = HexadecimalString
|
|
561 # BitsOrder = Ascending
|
|
562 #
|
|
563 Cmpd1 9c8460989ec8a49913991a6603130b0a19e8051c89184414953800cc21510...
|
|
564 Cmpd2 000000249400840040100042011001001980410c000000001010088001120...
|
|
565 ... ...
|
|
566 ... ..
|
|
567
|
|
568 Example of I<FP> file containing fingerprints vector string data:
|
|
569
|
|
570 #
|
|
571 # Package = MayaChemTools 7.4
|
|
572 # ReleaseDate = Oct 21, 2010
|
|
573 #
|
|
574 # TimeStamp = Mon Mar 7 15:14:01 2011
|
|
575 #
|
|
576 # FingerprintsStringType = FingerprintsVector
|
|
577 #
|
|
578 # Description = PathLengthBits:AtomicInvariantsAtomTypes:MinLength1:...
|
|
579 # VectorStringFormat = IDsAndValuesString
|
|
580 # VectorValuesType = NumericalValues
|
|
581 #
|
|
582 Cmpd1 338;C F N O C:C C:N C=O CC CF CN CO C:C:C C:C:N C:CC C:CF C:CN C:
|
|
583 N:C C:NC CC:N CC=O CCC CCN CCO CNC NC=O O=CO C:C:C:C C:C:C:N C:C:CC...;
|
|
584 33 1 2 5 21 2 2 12 1 3 3 20 2 10 2 2 1 2 2 2 8 2 5 1 1 1 19 2 8 2 2 2 2
|
|
585 6 2 2 2 2 2 2 2 2 3 2 2 1 4 1 5 1 1 18 6 2 2 1 2 10 2 1 2 1 2 2 2 2 ...
|
|
586 Cmpd2 103;C N O C=N C=O CC CN CO CC=O CCC CCN CCO CNC N=CN NC=O NCN O=C
|
|
587 O C CC=O CCCC CCCN CCCO CCNC CNC=N CNC=O CNCN CCCC=O CCCCC CCCCN CC...;
|
|
588 15 4 4 1 2 13 5 2 2 15 5 3 2 2 1 1 1 2 17 7 6 5 1 1 1 2 15 8 5 7 2 2 2 2
|
|
589 1 2 1 1 3 15 7 6 8 3 4 4 3 2 2 1 2 3 14 2 4 7 4 4 4 4 1 1 1 2 1 1 1 ...
|
|
590 ... ...
|
|
591 ... ...
|
|
592
|
|
593 Example of I<SD> file containing fingerprints bit-vector string data:
|
|
594
|
|
595 ... ...
|
|
596 ... ...
|
|
597 $$$$
|
|
598 ... ...
|
|
599 ... ...
|
|
600 ... ...
|
|
601 41 44 0 0 0 0 0 0 0 0999 V2000
|
|
602 -3.3652 1.4499 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
|
|
603 ... ...
|
|
604 2 3 1 0 0 0 0
|
|
605 ... ...
|
|
606 M END
|
|
607 > <CmpdID>
|
|
608 Cmpd1
|
|
609
|
|
610 > <PathLengthFingerprints>
|
|
611 FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes:MinLengt
|
|
612 h1:MaxLength8;1024;HexadecimalString;Ascending;9c8460989ec8a49913991a66
|
|
613 03130b0a19e8051c89184414953800cc2151082844a201042800130860308e8204d4028
|
|
614 00831048940e44281c00060449a5000ac80c894114e006321264401600846c050164462
|
|
615 08190410805000304a10205b0100e04c0038ba0fad0209c0ca8b1200012268b61c0026a
|
|
616 aa0660a11014a011d46
|
|
617
|
|
618 $$$$
|
|
619 ... ...
|
|
620 ... ...
|
|
621
|
|
622 Example of CSV I<Text> file containing fingerprints bit-vector string data:
|
|
623
|
|
624 "CompoundID","PathLengthFingerprints"
|
|
625 "Cmpd1","FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes
|
|
626 :MinLength1:MaxLength8;1024;HexadecimalString;Ascending;9c8460989ec8a4
|
|
627 9913991a6603130b0a19e8051c89184414953800cc2151082844a20104280013086030
|
|
628 8e8204d402800831048940e44281c00060449a5000ac80c894114e006321264401..."
|
|
629 ... ...
|
|
630 ... ...
|
|
631
|
|
632 The current release of MayaChemTools supports the following types of fingerprint
|
|
633 bit-vector and vector strings:
|
|
634
|
|
635 FingerprintsVector;AtomNeighborhoods:AtomicInvariantsAtomTypes:MinRadi
|
|
636 us0:MaxRadius2;41;AlphaNumericalValues;ValuesString;NR0-C.X1.BO1.H3-AT
|
|
637 C1:NR1-C.X3.BO3.H1-ATC1:NR2-C.X1.BO1.H3-ATC1:NR2-C.X3.BO4-ATC1 NR0-C.X
|
|
638 1.BO1.H3-ATC1:NR1-C.X3.BO3.H1-ATC1:NR2-C.X1.BO1.H3-ATC1:NR2-C.X3.BO4-A
|
|
639 TC1 NR0-C.X2.BO2.H2-ATC1:NR1-C.X2.BO2.H2-ATC1:NR1-C.X3.BO3.H1-ATC1:NR2
|
|
640 -C.X2.BO2.H2-ATC1:NR2-N.X3.BO3-ATC1:NR2-O.X1.BO1.H1-ATC1 NR0-C.X2.B...
|
|
641
|
|
642 FingerprintsVector;AtomTypesCount:AtomicInvariantsAtomTypes:ArbitraryS
|
|
643 ize;10;NumericalValues;IDsAndValuesString;C.X1.BO1.H3 C.X2.BO2.H2 C.X2
|
|
644 .BO3.H1 C.X3.BO3.H1 C.X3.BO4 F.X1.BO1 N.X2.BO2.H1 N.X3.BO3 O.X1.BO1.H1
|
|
645 O.X1.BO2;2 4 14 3 10 1 1 1 3 2
|
|
646
|
|
647 FingerprintsVector;AtomTypesCount:SLogPAtomTypes:ArbitrarySize;16;Nume
|
|
648 ricalValues;IDsAndValuesString;C1 C10 C11 C14 C18 C20 C21 C22 C5 CS F
|
|
649 N11 N4 O10 O2 O9;5 1 1 1 14 4 2 1 2 2 1 1 1 1 3 1
|
|
650
|
|
651 FingerprintsVector;AtomTypesCount:SLogPAtomTypes:FixedSize;67;OrderedN
|
|
652 umericalValues;IDsAndValuesString;C1 C2 C3 C4 C5 C6 C7 C8 C9 C10 C11 C
|
|
653 12 C13 C14 C15 C16 C17 C18 C19 C20 C21 C22 C23 C24 C25 C26 C27 CS N1 N
|
|
654 2 N3 N4 N5 N6 N7 N8 N9 N10 N11 N12 N13 N14 NS O1 O2 O3 O4 O5 O6 O7 O8
|
|
655 O9 O10 O11 O12 OS F Cl Br I Hal P S1 S2 S3 Me1 Me2;5 0 0 0 2 0 0 0 0 1
|
|
656 1 0 0 1 0 0 0 14 0 4 2 1 0 0 0 0 0 2 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0...
|
|
657
|
|
658 FingerprintsVector;EStateIndicies:ArbitrarySize;11;NumericalValues;IDs
|
|
659 AndValuesString;SaaCH SaasC SaasN SdO SdssC SsCH3 SsF SsOH SssCH2 SssN
|
|
660 H SsssCH;24.778 4.387 1.993 25.023 -1.435 3.975 14.006 29.759 -0.073 3
|
|
661 .024 -2.270
|
|
662
|
|
663 FingerprintsVector;EStateIndicies:FixedSize;87;OrderedNumericalValues;
|
|
664 ValuesString;0 0 0 0 0 0 0 3.975 0 -0.073 0 0 24.778 -2.270 0 0 -1.435
|
|
665 4.387 0 0 0 0 0 0 3.024 0 0 0 0 0 0 0 1.993 0 29.759 25.023 0 0 0 0 1
|
|
666 4.006 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
|
667 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
|
668
|
|
669 FingerprintsVector;ExtendedConnectivity:AtomicInvariantsAtomTypes:Radi
|
|
670 us2;60;AlphaNumericalValues;ValuesString;73555770 333564680 352413391
|
|
671 666191900 1001270906 1371674323 1481469939 1977749791 2006158649 21414
|
|
672 08799 49532520 64643108 79385615 96062769 273726379 564565671 85514103
|
|
673 5 906706094 988546669 1018231313 1032696425 1197507444 1331250018 1338
|
|
674 532734 1455473691 1607485225 1609687129 1631614296 1670251330 17303...
|
|
675
|
|
676 FingerprintsVector;ExtendedConnectivityCount:AtomicInvariantsAtomTypes
|
|
677 :Radius2;60;NumericalValues;IDsAndValuesString;73555770 333564680 3524
|
|
678 13391 666191900 1001270906 1371674323 1481469939 1977749791 2006158649
|
|
679 2141408799 49532520 64643108 79385615 96062769 273726379 564565671...;
|
|
680 3 2 1 1 14 1 2 10 4 3 1 1 1 1 2 1 2 1 1 1 2 3 1 1 2 1 3 3 8 2 2 2 6 2
|
|
681 1 2 1 1 2 1 1 1 2 1 1 2 1 2 1 1 1 1 1 1 1 1 1 2 1 1
|
|
682
|
|
683 FingerprintsBitVector;ExtendedConnectivityBits:AtomicInvariantsAtomTyp
|
|
684 es:Radius2;1024;BinaryString;Ascending;0000000000000000000000000000100
|
|
685 0000000001010000000110000011000000000000100000000000000000000000100001
|
|
686 1000000110000000000000000000000000010011000000000000000000000000010000
|
|
687 0000000000000000000000000010000000000000000001000000000000000000000000
|
|
688 0000000000010000100001000000000000101000000000000000100000000000000...
|
|
689
|
|
690 FingerprintsVector;ExtendedConnectivity:FunctionalClassAtomTypes:Radiu
|
|
691 s2;57;AlphaNumericalValues;ValuesString;24769214 508787397 850393286 8
|
|
692 62102353 981185303 1231636850 1649386610 1941540674 263599683 32920567
|
|
693 1 571109041 639579325 683993318 723853089 810600886 885767127 90326012
|
|
694 7 958841485 981022393 1126908698 1152248391 1317567065 1421489994 1455
|
|
695 632544 1557272891 1826413669 1983319256 2015750777 2029559552 20404...
|
|
696
|
|
697 FingerprintsVector;ExtendedConnectivity:EStateAtomTypes:Radius2;62;Alp
|
|
698 haNumericalValues;ValuesString;25189973 528584866 662581668 671034184
|
|
699 926543080 1347067490 1738510057 1759600920 2034425745 2097234755 21450
|
|
700 44754 96779665 180364292 341712110 345278822 386540408 387387308 50430
|
|
701 1706 617094135 771528807 957666640 997798220 1158349170 1291258082 134
|
|
702 1138533 1395329837 1420277211 1479584608 1486476397 1487556246 1566...
|
|
703
|
|
704 FingerprintsBitVector;MACCSKeyBits;166;BinaryString;Ascending;00000000
|
|
705 0000000000000000000000000000000001001000010010000000010010000000011100
|
|
706 0100101010111100011011000100110110000011011110100110111111111111011111
|
|
707 11111111111110111000
|
|
708
|
|
709 FingerprintsBitVector;MACCSKeyBits;322;BinaryString;Ascending;11101011
|
|
710 1110011111100101111111000111101100110000000000000011100010000000000000
|
|
711 0000000000000000000000000000000000000000000000101000000000000000000000
|
|
712 0000000000000000000000000000000000000000000000000000000000000000000000
|
|
713 0000000000000000000000000000000000000011000000000000000000000000000000
|
|
714 0000000000000000000000000000000000000000
|
|
715
|
|
716 FingerprintsVector;MACCSKeyCount;166;OrderedNumericalValues;ValuesStri
|
|
717 ng;0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
|
718 0 0 0 0 0 0 0 1 0 0 3 0 0 0 0 4 0 0 2 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 0
|
|
719 0 0 0 0 1 1 8 0 0 0 1 0 0 1 0 1 0 1 0 3 1 3 1 0 0 0 1 2 0 11 1 0 0 0
|
|
720 5 0 0 1 2 0 1 1 0 0 0 0 0 1 1 0 1 1 1 1 0 4 0 0 1 1 0 4 6 1 1 1 2 1 1
|
|
721 3 5 2 2 0 5 3 5 1 1 2 5 1 2 1 2 4 8 3 5 5 2 2 0 3 5 4 1
|
|
722
|
|
723 FingerprintsVector;MACCSKeyCount;322;OrderedNumericalValues;ValuesStri
|
|
724 ng;14 8 2 0 2 0 4 4 2 1 4 0 0 2 5 10 5 2 1 0 0 2 0 5 13 3 28 5 5 3 0 0
|
|
725 0 4 2 1 1 0 1 1 0 0 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 22 5 3 0 0 0 1 0
|
|
726 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
|
727 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11 0 2 0 0 0 0 0 0 0 0 0
|
|
728 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
|
|
729
|
|
730 FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes:MinLeng
|
|
731 th1:MaxLength8;1024;BinaryString;Ascending;001000010011010101011000110
|
|
732 0100010101011000101001011100110001000010001001101000001001001001001000
|
|
733 0010110100000111001001000001001010100100100000000011000000101001011100
|
|
734 0010000001000101010100000100111100110111011011011000000010110111001101
|
|
735 0101100011000000010001000011000010100011101100001000001000100000000...
|
|
736
|
|
737 FingerprintsVector;PathLengthCount:AtomicInvariantsAtomTypes:MinLength
|
|
738 1:MaxLength8;432;NumericalValues;IDsAndValuesPairsString;C.X1.BO1.H3 2
|
|
739 C.X2.BO2.H2 4 C.X2.BO3.H1 14 C.X3.BO3.H1 3 C.X3.BO4 10 F.X1.BO1 1 N.X
|
|
740 2.BO2.H1 1 N.X3.BO3 1 O.X1.BO1.H1 3 O.X1.BO2 2 C.X1.BO1.H3C.X3.BO3.H1
|
|
741 2 C.X2.BO2.H2C.X2.BO2.H2 1 C.X2.BO2.H2C.X3.BO3.H1 4 C.X2.BO2.H2C.X3.BO
|
|
742 4 1 C.X2.BO2.H2N.X3.BO3 1 C.X2.BO3.H1:C.X2.BO3.H1 10 C.X2.BO3.H1:C....
|
|
743
|
|
744 FingerprintsVector;PathLengthCount:MMFF94AtomTypes:MinLength1:MaxLengt
|
|
745 h8;463;NumericalValues;IDsAndValuesPairsString;C5A 2 C5B 2 C=ON 1 CB 1
|
|
746 8 COO 1 CR 9 F 1 N5 1 NC=O 1 O=CN 1 O=CO 1 OC=O 1 OR 2 C5A:C5B 2 C5A:N
|
|
747 5 2 C5ACB 1 C5ACR 1 C5B:C5B 1 C5BC=ON 1 C5BCB 1 C=ON=O=CN 1 C=ONNC=O 1
|
|
748 CB:CB 18 CBF 1 CBNC=O 1 COO=O=CO 1 COOCR 1 COOOC=O 1 CRCR 7 CRN5 1 CR
|
|
749 OR 2 C5A:C5B:C5B 2 C5A:C5BC=ON 1 C5A:C5BCB 1 C5A:N5:C5A 1 C5A:N5CR ...
|
|
750
|
|
751 FingerprintsVector;TopologicalAtomPairs:AtomicInvariantsAtomTypes:MinD
|
|
752 istance1:MaxDistance10;223;NumericalValues;IDsAndValuesString;C.X1.BO1
|
|
753 .H3-D1-C.X3.BO3.H1 C.X2.BO2.H2-D1-C.X2.BO2.H2 C.X2.BO2.H2-D1-C.X3.BO3.
|
|
754 H1 C.X2.BO2.H2-D1-C.X3.BO4 C.X2.BO2.H2-D1-N.X3.BO3 C.X2.BO3.H1-D1-...;
|
|
755 2 1 4 1 1 10 8 1 2 6 1 2 2 1 2 1 2 2 1 2 1 5 1 10 12 2 2 1 2 1 9 1 3 1
|
|
756 1 1 2 2 1 3 6 1 6 14 2 2 2 3 1 3 1 8 2 2 1 3 2 6 1 2 2 5 1 3 1 23 1...
|
|
757
|
|
758 FingerprintsVector;TopologicalAtomPairs:FunctionalClassAtomTypes:MinDi
|
|
759 stance1:MaxDistance10;144;NumericalValues;IDsAndValuesString;Ar-D1-Ar
|
|
760 Ar-D1-Ar.HBA Ar-D1-HBD Ar-D1-Hal Ar-D1-None Ar.HBA-D1-None HBA-D1-NI H
|
|
761 BA-D1-None HBA.HBD-D1-NI HBA.HBD-D1-None HBD-D1-None NI-D1-None No...;
|
|
762 23 2 1 1 2 1 1 1 1 2 1 1 7 28 3 1 3 2 8 2 1 1 1 5 1 5 24 3 3 4 2 13 4
|
|
763 1 1 4 1 5 22 4 4 3 1 19 1 1 1 1 1 2 2 3 1 1 8 25 4 5 2 3 1 26 1 4 1 ...
|
|
764
|
|
765 FingerprintsVector;TopologicalAtomTorsions:AtomicInvariantsAtomTypes;3
|
|
766 3;NumericalValues;IDsAndValuesString;C.X1.BO1.H3-C.X3.BO3.H1-C.X3.BO4-
|
|
767 C.X3.BO4 C.X1.BO1.H3-C.X3.BO3.H1-C.X3.BO4-N.X3.BO3 C.X2.BO2.H2-C.X2.BO
|
|
768 2.H2-C.X3.BO3.H1-C.X2.BO2.H2 C.X2.BO2.H2-C.X2.BO2.H2-C.X3.BO3.H1-O...;
|
|
769 2 2 1 1 2 2 1 1 3 4 4 8 4 2 2 6 2 2 1 2 1 1 2 1 1 2 6 2 4 2 1 3 1
|
|
770
|
|
771 FingerprintsVector;TopologicalAtomTorsions:EStateAtomTypes;36;Numerica
|
|
772 lValues;IDsAndValuesString;aaCH-aaCH-aaCH-aaCH aaCH-aaCH-aaCH-aasC aaC
|
|
773 H-aaCH-aasC-aaCH aaCH-aaCH-aasC-aasC aaCH-aaCH-aasC-sF aaCH-aaCH-aasC-
|
|
774 ssNH aaCH-aasC-aasC-aasC aaCH-aasC-aasC-aasN aaCH-aasC-ssNH-dssC a...;
|
|
775 4 4 8 4 2 2 6 2 2 2 4 3 2 1 3 3 2 2 2 1 2 1 1 1 2 1 1 1 1 1 1 1 2 1 1 2
|
|
776
|
|
777 FingerprintsVector;TopologicalAtomTriplets:AtomicInvariantsAtomTypes:M
|
|
778 inDistance1:MaxDistance10;3096;NumericalValues;IDsAndValuesString;C.X1
|
|
779 .BO1.H3-D1-C.X1.BO1.H3-D1-C.X3.BO3.H1-D2 C.X1.BO1.H3-D1-C.X2.BO2.H2-D1
|
|
780 0-C.X3.BO4-D9 C.X1.BO1.H3-D1-C.X2.BO2.H2-D3-N.X3.BO3-D4 C.X1.BO1.H3-D1
|
|
781 -C.X2.BO2.H2-D4-C.X2.BO2.H2-D5 C.X1.BO1.H3-D1-C.X2.BO2.H2-D6-C.X3....;
|
|
782 1 2 2 2 2 2 2 2 8 8 4 8 4 4 2 2 2 2 4 2 2 2 4 2 2 2 2 1 2 2 4 4 4 2 2
|
|
783 2 4 4 4 8 4 4 2 4 4 4 2 4 4 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 8...
|
|
784
|
|
785 FingerprintsVector;TopologicalAtomTriplets:SYBYLAtomTypes:MinDistance1
|
|
786 :MaxDistance10;2332;NumericalValues;IDsAndValuesString;C.2-D1-C.2-D9-C
|
|
787 .3-D10 C.2-D1-C.2-D9-C.ar-D10 C.2-D1-C.3-D1-C.3-D2 C.2-D1-C.3-D10-C.3-
|
|
788 D9 C.2-D1-C.3-D2-C.3-D3 C.2-D1-C.3-D2-C.ar-D3 C.2-D1-C.3-D3-C.3-D4 C.2
|
|
789 -D1-C.3-D3-N.ar-D4 C.2-D1-C.3-D3-O.3-D2 C.2-D1-C.3-D4-C.3-D5 C.2-D1-C.
|
|
790 3-D5-C.3-D6 C.2-D1-C.3-D5-O.3-D4 C.2-D1-C.3-D6-C.3-D7 C.2-D1-C.3-D7...
|
|
791
|
|
792 FingerprintsVector;TopologicalPharmacophoreAtomPairs:ArbitrarySize:Min
|
|
793 Distance1:MaxDistance10;54;NumericalValues;IDsAndValuesString;H-D1-H H
|
|
794 -D1-NI HBA-D1-NI HBD-D1-NI H-D2-H H-D2-HBA H-D2-HBD HBA-D2-HBA HBA-D2-
|
|
795 HBD H-D3-H H-D3-HBA H-D3-HBD H-D3-NI HBA-D3-NI HBD-D3-NI H-D4-H H-D4-H
|
|
796 BA H-D4-HBD HBA-D4-HBA HBA-D4-HBD HBD-D4-HBD H-D5-H H-D5-HBA H-D5-...;
|
|
797 18 1 2 1 22 12 8 1 2 18 6 3 1 1 1 22 13 6 5 7 2 28 9 5 1 1 1 36 16 10
|
|
798 3 4 1 37 10 8 1 35 10 9 3 3 1 28 7 7 4 18 16 12 5 1 2 1
|
|
799
|
|
800 FingerprintsVector;TopologicalPharmacophoreAtomPairs:FixedSize:MinDist
|
|
801 ance1:MaxDistance10;150;OrderedNumericalValues;ValuesString;18 0 0 1 0
|
|
802 0 0 2 0 0 1 0 0 0 0 22 12 8 0 0 1 2 0 0 0 0 0 0 0 0 18 6 3 1 0 0 0 1
|
|
803 0 0 1 0 0 0 0 22 13 6 0 0 5 7 0 0 2 0 0 0 0 0 28 9 5 1 0 0 0 1 0 0 1 0
|
|
804 0 0 0 36 16 10 0 0 3 4 0 0 1 0 0 0 0 0 37 10 8 0 0 0 0 1 0 0 0 0 0 0
|
|
805 0 35 10 9 0 0 3 3 0 0 1 0 0 0 0 0 28 7 7 4 0 0 0 0 0 0 0 0 0 0 0 18...
|
|
806
|
|
807 FingerprintsVector;TopologicalPharmacophoreAtomTriplets:ArbitrarySize:
|
|
808 MinDistance1:MaxDistance10;696;NumericalValues;IDsAndValuesString;Ar1-
|
|
809 Ar1-Ar1 Ar1-Ar1-H1 Ar1-Ar1-HBA1 Ar1-Ar1-HBD1 Ar1-H1-H1 Ar1-H1-HBA1 Ar1
|
|
810 -H1-HBD1 Ar1-HBA1-HBD1 H1-H1-H1 H1-H1-HBA1 H1-H1-HBD1 H1-HBA1-HBA1 H1-
|
|
811 HBA1-HBD1 H1-HBA1-NI1 H1-HBD1-NI1 HBA1-HBA1-NI1 HBA1-HBD1-NI1 Ar1-...;
|
|
812 46 106 8 3 83 11 4 1 21 5 3 1 2 2 1 1 1 100 101 18 11 145 132 26 14 23
|
|
813 28 3 3 5 4 61 45 10 4 16 20 7 5 1 3 4 5 3 1 1 1 1 5 4 2 1 2 2 2 1 1 1
|
|
814 119 123 24 15 185 202 41 25 22 17 3 5 85 95 18 11 23 17 3 1 1 6 4 ...
|
|
815
|
|
816 FingerprintsVector;TopologicalPharmacophoreAtomTriplets:FixedSize:MinD
|
|
817 istance1:MaxDistance10;2692;OrderedNumericalValues;ValuesString;46 106
|
|
818 8 3 0 0 83 11 4 0 0 0 1 0 0 0 0 0 0 0 0 21 5 3 0 0 1 2 2 0 0 1 0 0 0
|
|
819 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 100 101 18 11 0 0 145 132 26
|
|
820 14 0 0 23 28 3 3 0 0 5 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 61 45 10 4 0
|
|
821 0 16 20 7 5 1 0 3 4 5 3 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 5 ...
|
|
822
|
|
823 =head1 OPTIONS
|
|
824
|
|
825 =over 4
|
|
826
|
|
827 =item B<-a, --all>
|
|
828
|
|
829 List all the available information.
|
|
830
|
|
831 =item B<--AverageBitDensity>
|
|
832
|
|
833 List average bit density of fingerprint bit-vector strings.
|
|
834
|
|
835 =item B<--BitDensity>
|
|
836
|
|
837 List bit density of fingerprints bit-vector strings data in each row.
|
|
838
|
|
839 =item B<--count>
|
|
840
|
|
841 List number of data entries containing fingerprints bit-vector or vector strings data. This
|
|
842 is B<default behavior>.
|
|
843
|
|
844 =item B<-c, --ColMode> I<ColNum | ColLabel>
|
|
845
|
|
846 Specify how columns are identified in CSV/TSV I<TextFile(s)>: using column number or column
|
|
847 label. Possible values: I<ColNum or ColLabel>. Default value: I<ColNum>
|
|
848
|
|
849 =item B<-d, --detail> I<InfoLevel>
|
|
850
|
|
851 Level of information to print about lines being ignored. Default: I<1>. Possible values:
|
|
852 I<1, 2 or 3>.
|
|
853
|
|
854 =item B<--DataCheck>
|
|
855
|
|
856 Validate fingerprints data specified using B<--FingerprintsCol> and list information
|
|
857 about missing and invalid data.
|
|
858
|
|
859 =item B<-e, --empty>
|
|
860
|
|
861 List number of rows containing no fingerprints data.
|
|
862
|
|
863 =item B<--FingerprintsCol> I<col number | col name>
|
|
864
|
|
865 This value is B<-c, --colmode> specific. It corresponds to column in CSV/TSV I<TextFile(s)>
|
|
866 containing fingerprints data. Possible values: I<col number or col label>.
|
|
867 Default value: I<first column containing the word Fingerprints in its column label>.
|
|
868
|
|
869 =item B<--FingerprintsField> I<FieldLabel>
|
|
870
|
|
871 Fingerprints field label to use during listing of fingerprints information for I<SDFile(s)>.
|
|
872 Default value: I<first data field label containing the word Fingerprints in its label>.
|
|
873
|
|
874 =item B<--FingerprintsType>
|
|
875
|
|
876 List types of fingerprint strings: FingerprintsBitVector or FingerprintsVector.
|
|
877
|
|
878 =item B<--FingerprintsDescription>
|
|
879
|
|
880 List types of fingerprints: PathLengthBits, PathLengthCount, MACCSKeyCount,
|
|
881 ExtendedConnectivity and so on.
|
|
882
|
|
883 =item B<--FingerprintsSize>
|
|
884
|
|
885 List size of fingerprints.
|
|
886
|
|
887 =item B<--FingerprintsBitStringFormat>
|
|
888
|
|
889 List format of fingerprint bit-vector strings: BinaryString or HexadecimalString.
|
|
890
|
|
891 =item B<--FingerprintsBitOrder>
|
|
892
|
|
893 List order of bits data in fingerprint bit-vector bit strings: Ascending or Descending.
|
|
894
|
|
895 =item B<--FingerprintsVectorValuesType>
|
|
896
|
|
897 List type of values in fingerprint vector strings: OrderedNumericalValues, NumericalValues or
|
|
898 AlphaNumericalValues.
|
|
899
|
|
900 =item B<--FingerprintsVectorValuesFormat>
|
|
901
|
|
902 List format of values in fingerprint vector strings: ValuesString, IDsAndValuesString,
|
|
903 IDsAndValuesPairsString, ValuesAndIDsString or ValuesAndIDsPairsString.
|
|
904
|
|
905 =item B<-h, --help>
|
|
906
|
|
907 Print this help message.
|
|
908
|
|
909 =item B<--InDelim> I<comma | semicolon>
|
|
910
|
|
911 Input delimiter for CSV I<TextFile(s)>. Possible values: I<comma or semicolon>.
|
|
912 Default value: I<comma>. For TSV files, this option is ignored and I<tab> is used as a
|
|
913 delimiter.
|
|
914
|
|
915 =item B<--NumOfOnBits>
|
|
916
|
|
917 List number of on bits in fingerprints bit-vector strings data in each row.
|
|
918
|
|
919 =item B<--NumOfNonZeroValues>
|
|
920
|
|
921 List number of non-zero values in fingerprints vector strings data in each row.
|
|
922
|
|
923 =item B<-w, --WorkingDir> I<DirName>
|
|
924
|
|
925 Location of working directory. Default: current directory.
|
|
926
|
|
927 =back
|
|
928
|
|
929 =head1 EXAMPLES
|
|
930
|
|
931 To count number of lines containing fingerprints bit-vector or vector strings data present
|
|
932 in FP file, in a column name containing Fingerprint substring in text file, and in a data
|
|
933 field with Fingerprint substring in its label, type:
|
|
934
|
|
935 % InfoFingerprintsFiles.pl SampleFPBin.csv
|
|
936
|
|
937 % InfoFingerprintsFiles.pl SampleFPBin.sdf SampleFPBin.fpf
|
|
938 SampleFPBin.csv
|
|
939
|
|
940 % InfoFingerprintsFiles.pl SampleFPHex.sdf SampleFPHex.fpf
|
|
941 SampleFPHex.csv
|
|
942
|
|
943 % InfoFingerprintsFiles.pl SampleFPcount.sdf SampleFPcount.fpf
|
|
944 SampleFPcount.csv
|
|
945
|
|
946 To list all available information about fingerprints bit-vector or vector strings data present
|
|
947 in FP file, in a column name containing Fingerprint substring in text file, and in a data
|
|
948 field with Fingerprint substring in its label, type:
|
|
949
|
|
950 % InfoFingerprintsFiles.pl -a SampleFPHex.sdf SampleFPHex.fpf
|
|
951 SampleFPHex.csv
|
|
952
|
|
953 % InfoFingerprintsFiles.pl -a SampleFPcount.sdf SampleFPcount.fpf
|
|
954 SampleFPcount.csv
|
|
955
|
|
956 To list all available information about fingerprints bit-vector or vector strings data present in a
|
|
957 column named Fingerprints in text file, type:
|
|
958
|
|
959 % InfoFingerprintsFiles.pl -a --ColMode ColLabel --FingerprintsCol
|
|
960 Fingerprints SampleFPHex.sdf
|
|
961
|
|
962 % InfoFingerprintsFiles.pl -a --ColMode ColLabel --FingerprintsCol
|
|
963 Fingerprints SampleFPcount.csv
|
|
964
|
|
965 To list all available information about fingerprints bit-vector or vector strings data present in a
|
|
966 data field names Fingerprints in SD file, type:
|
|
967
|
|
968 % InfoFingerprintsFiles.pl -a --FingerprintsField Fingerprints
|
|
969 SampleFPHex.sdf
|
|
970
|
|
971 % InfoFingerprintsFiles.pl -a --FingerprintsField Fingerprints
|
|
972 SampleFPcount.sdf
|
|
973
|
|
974 To list bit density, average bit density, and number of on bits for fingerprints bit-vector strings data
|
|
975 present in FP file, in a column name containing Fingerprint substring in text file, and in a data
|
|
976 field with Fingerprint substring in its label, type:
|
|
977
|
|
978 % InfoFingerprintsFiles.pl --BitDensity --AverageBitDensity
|
|
979 --NumOfOnBits SampleFPBin.csv SampleFPBin.sdf SampleFPBin.fpf
|
|
980
|
|
981 To list vector values type, format and number of non-zero values for fingerprints vector strings
|
|
982 data present in FP file, in a column name containing Fingerprint substring in text file, and in a data
|
|
983 field with Fingerprint substring in its label along with fingerprints type and description, type:
|
|
984
|
|
985 % InfoFingerprintsFiles.pl --FingerprintsType --FingerprintsDescription
|
|
986 --FingerprintsVectorValuesType --FingerprintsVectorValuesFormat
|
|
987 --NumOfNonZeroValues SampleFPcount.csv SampleFPcount.sdf
|
|
988 SampleFPcount.fpf
|
|
989
|
|
990 =head1 AUTHOR
|
|
991
|
|
992 Manish Sud <msud@san.rr.com>
|
|
993
|
|
994 =head1 SEE ALSO
|
|
995
|
|
996 SimilarityMatricesFingerprints.pl, SimilaritySearchingFingerprints.pl, AtomNeighborhoodsFingerprints.pl,
|
|
997 AtomNeighborhoodsFingerprints.pl, ExtendedConnectivityFingerprints.pl, MACCSKeysFingerprints.pl,
|
|
998 PathLengthFingerprints.pl, TopologicalAtomPairsFingerprints.pl, TopologicalAtomTorsionsFingerprints.pl,
|
|
999 TopologicalPharmacophoreAtomPairsFingerprints.pl, TopologicalPharmacophoreAtomTripletsFingerprints.pl
|
|
1000
|
|
1001 =head1 COPYRIGHT
|
|
1002
|
|
1003 Copyright (C) 2015 Manish Sud. All rights reserved.
|
|
1004
|
|
1005 This file is part of MayaChemTools.
|
|
1006
|
|
1007 MayaChemTools is free software; you can redistribute it and/or modify it under
|
|
1008 the terms of the GNU Lesser General Public License as published by the Free
|
|
1009 Software Foundation; either version 3 of the License, or (at your option)
|
|
1010 any later version.
|
|
1011
|
|
1012 =cut
|