comparison bin/InfoFingerprintsFiles.pl @ 0:4816e4a8ae95 draft default tip

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 09:23:18 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4816e4a8ae95
1 #!/usr/bin/perl -w
2 #
3 # $RCSfile: InfoFingerprintsFiles.pl,v $
4 # $Date: 2015/02/28 20:46:20 $
5 # $Revision: 1.20 $
6 #
7 # Author: Manish Sud <msud@san.rr.com>
8 #
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
10 #
11 # This file is part of MayaChemTools.
12 #
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
14 # the terms of the GNU Lesser General Public License as published by the Free
15 # Software Foundation; either version 3 of the License, or (at your option) any
16 # later version.
17 #
18 # MayaChemTools is distributed in the hope that it will be useful, but without
19 # any warranty; without even the implied warranty of merchantability of fitness
20 # for a particular purpose. See the GNU Lesser General Public License for more
21 # details.
22 #
23 # You should have received a copy of the GNU Lesser General Public License
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
26 # Boston, MA, 02111-1307, USA.
27 #
28
29 use strict;
30 use FindBin; use lib "$FindBin::Bin/../lib";
31 use Getopt::Long;
32 use File::Basename;
33 use Text::ParseWords;
34 use Benchmark;
35 use FileUtil;
36 use TextUtil;
37 use Fingerprints::FingerprintsFileUtil;
38 use Fingerprints::FingerprintsStringUtil;
39
40 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime);
41
42 # Autoflush STDOUT
43 $| = 1;
44
45 # Starting message...
46 $ScriptName = basename($0);
47 print "\n$ScriptName: Starting...\n\n";
48 $StartTime = new Benchmark;
49
50 # Get the options and setup script...
51 SetupScriptUsage();
52 if ($Options{help} || @ARGV < 1) {
53 die GetUsageFromPod("$FindBin::Bin/$ScriptName");
54 }
55
56 my(@FingerprintsFilesList);
57 @FingerprintsFilesList = ExpandFileNames(\@ARGV, "sdf sd fpf fp csv tsv");
58
59 # Process options...
60 print "Processing options...\n";
61 my(%OptionsInfo);
62 ProcessOptions();
63
64 # Setup information about input files...
65 print "Checking input fingerprints file(s)...\n";
66 my(%FingerprintsFilesInfo);
67 RetrieveFingerprintsFilesInfo();
68
69 # Process input files..
70 my($FileIndex);
71 if (@FingerprintsFilesList > 1) {
72 print "\nProcessing fingerprints files...\n";
73 }
74 for $FileIndex (0 .. $#FingerprintsFilesList) {
75 if ($FingerprintsFilesInfo{FileOkay}[$FileIndex]) {
76 print "\nProcessing file $FingerprintsFilesList[$FileIndex]...\n";
77 ListFingerprintsFileInfo($FileIndex);
78 }
79 }
80 ListTotalSizeOfFiles();
81
82 print "\n$ScriptName:Done...\n\n";
83
84 $EndTime = new Benchmark;
85 $TotalTime = timediff ($EndTime, $StartTime);
86 print "Total time: ", timestr($TotalTime), "\n";
87
88 ###############################################################################
89
90 # List approptiate information...
91 #
92 sub ListFingerprintsFileInfo {
93 my($FileIndex) = @_;
94 my($FileName, $FingerprintsFileIO, $InvalidFingerprintsFileData, $InvalidFingerprintsData, $DataEntryCount, $ValidDataEntryCount, $InvalidDataEntryCount, $MissingDataEntryCount, $BitVectorDataEntryCount, $VectorDataEntryCount, $FingerprintsObject, $FingerprintsType, $TotalBitDensity, $FileType, $DataEntryLabel);
95
96 $FileType = $FingerprintsFilesInfo{FileType}[$FileIndex];
97 $DataEntryLabel = ($FileType =~ /^SD$/i) ? 'compounds' : 'lines';
98
99 ($DataEntryCount, $ValidDataEntryCount, $InvalidDataEntryCount, $MissingDataEntryCount, $BitVectorDataEntryCount, $VectorDataEntryCount, $TotalBitDensity) = (0) x 7;
100
101 $FingerprintsFileIO = Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO(%{$FingerprintsFilesInfo{FingerprintsFileIOParameters}[$FileIndex]});
102 $FingerprintsFileIO->Open();
103
104 $InvalidFingerprintsFileData = $FingerprintsFileIO->IsFingerprintsFileDataValid() ? 0 : 1;
105
106 FINGERPRINTS: while ($FingerprintsFileIO->Read()) {
107 $DataEntryCount++;
108
109 # Missing data...
110 if ($InvalidFingerprintsFileData) {
111 $MissingDataEntryCount++;
112 if ($OptionsInfo{ValidateData} || $OptionsInfo{CountEmptyFingerprints}) {
113 ListEmptyOrInvalidFingerprintsDataInfo('EmptyData', $FingerprintsFileIO, $FileType);
114 }
115 next FINGERPRINTS;
116 }
117 $InvalidFingerprintsData = $FingerprintsFileIO->IsFingerprintsDataValid() ? 0 : 1;
118
119 # Invalid data...
120 if ($InvalidFingerprintsData) {
121 $InvalidDataEntryCount++;
122 if ($OptionsInfo{ValidateData}) {
123 ListEmptyOrInvalidFingerprintsDataInfo('InvalidData', $FingerprintsFileIO, $FileType);
124 }
125 next FINGERPRINTS;
126 }
127 $ValidDataEntryCount++;
128
129 $FingerprintsObject = $FingerprintsFileIO->GetFingerprints();
130 $FingerprintsType = $FingerprintsObject->GetVectorType();
131
132 if ($FingerprintsType =~ /^FingerprintsBitVector$/i) {
133 $BitVectorDataEntryCount++;
134 if ($OptionsInfo{ListAverageBitDensity}) {
135 $TotalBitDensity += $FingerprintsObject->GetFingerprintsBitDensity();
136 }
137 }
138 elsif ($FingerprintsType =~ /^FingerprintsVector$/i) {
139 $VectorDataEntryCount++;
140 }
141
142 if ($OptionsInfo{ListFingerprintsDataEntryInfo}) {
143 ListFingerprintsDataEntryInfo($FingerprintsFileIO, $FileType);
144 }
145
146 }
147 $FingerprintsFileIO->Close();
148
149 print "\nFingerprints file type: $FileType\n";
150 if ($FileType =~ /^SD$/i) {
151 print "Number of compounds: $DataEntryCount\n";
152 }
153 else {
154 print "Number of data lines: $DataEntryCount\n";
155 }
156
157 ListFileTypeHeaderInfo($FingerprintsFileIO, $FileType);
158
159 print "\nNumber of $DataEntryLabel with valid fingerprints string data: $ValidDataEntryCount\n";
160 print "Number of $DataEntryLabel with bit-vector fingerprints string data: $BitVectorDataEntryCount\n";
161 print "Number of $DataEntryLabel with vector fingerprints string data: $VectorDataEntryCount\n";
162
163 if ($OptionsInfo{CountEmptyFingerprints}) {
164 print "Number of $DataEntryLabel with missing fingerprints data: $MissingDataEntryCount\n";
165 print "Number of $DataEntryLabel with invalid fingerprints data: $InvalidDataEntryCount\n";
166 }
167
168 if ($OptionsInfo{ListAverageBitDensity} && $BitVectorDataEntryCount) {
169 my($AverageBitDensity);
170 $AverageBitDensity = $TotalBitDensity/$BitVectorDataEntryCount;
171 $AverageBitDensity = sprintf("%.2f", $AverageBitDensity) + 0;
172 print "\nAverage bit density: $AverageBitDensity\n";
173 }
174
175
176 # File size and modification information...
177 print "\nFile size: ", FormatFileSize($FingerprintsFilesInfo{FileSize}[$FileIndex]), " \n";
178 print "Last modified: ", $FingerprintsFilesInfo{FileLastModified}[$FileIndex], " \n";
179 }
180
181 # List empty or invalid fingerprints file data information...
182 #
183 sub ListEmptyOrInvalidFingerprintsDataInfo {
184 my($Mode, $FingerprintsFileIO, $FileType) = @_;
185 my($ModeInfo);
186
187 $ModeInfo = ($Mode =~ /^EmptyData$/i) ? "no" : "invalid";
188
189 if ($FileType =~ /^SD$/i) {
190 my($CmpdNum, $CmpdString);
191
192 $CmpdNum = $FingerprintsFileIO->GetCompoundNum();
193 if ($OptionsInfo{DetailLevel} >= 3 ) {
194 $CmpdString = $FingerprintsFileIO->GetCompoundString();
195 print "Compound number $CmpdNum contains $ModeInfo fingerprints data: $CmpdString \n";
196 }
197 elsif ($OptionsInfo{DetailLevel} >= 1 ) {
198 print "Compound number $CmpdNum contains $ModeInfo fingerprints data...\n";
199 }
200 }
201 else {
202 my($LineNum, $DataLine);
203
204 $LineNum = $FingerprintsFileIO->GetLineNum();
205 if ($OptionsInfo{DetailLevel} >= 3 ) {
206 $DataLine = $FingerprintsFileIO->GetDataLine();
207 print "Data line number $LineNum contains $ModeInfo fingerprints data: $DataLine \n";
208 }
209 elsif ($OptionsInfo{DetailLevel} >= 1 ) {
210 print "Data line number $LineNum contains $ModeInfo fingerprints data...\n";
211 }
212 }
213 }
214
215 # List detailed information about fingerprints data entry...
216 #
217 sub ListFingerprintsDataEntryInfo {
218 my($FingerprintsFileIO, $FileType) = @_;
219 my($FingerprintsObject, $FingerprintsString, $FingerprintsType, $FingerprintsDescription, $FingerprintsSize, $FingerprintsBitStringFormat, $FingerprintsBitOrder, $BitDensity, $NumOfOnBits, $FingerprintsVectorValuesType, $FingerprintsVectorValuesFormat, $NumOfNonZeroValues);
220
221 $FingerprintsObject = $FingerprintsFileIO->GetFingerprints();
222 $FingerprintsString = $FingerprintsFileIO->GetFingerprintsString();
223
224 $FingerprintsType = $FingerprintsObject->GetVectorType();
225
226 if ($FingerprintsType =~ /^FingerprintsBitVector$/i) {
227 $BitDensity = '';
228 $NumOfOnBits = '';
229
230 ($FingerprintsType, $FingerprintsDescription, $FingerprintsSize, $FingerprintsBitStringFormat, $FingerprintsBitOrder) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringValues($FingerprintsString);
231
232 if ($OptionsInfo{ListBitDensity} || $OptionsInfo{ListNumOfOnBits}) {
233 if ($OptionsInfo{ListBitDensity}) {
234 $BitDensity = $FingerprintsObject->GetFingerprintsBitDensity();
235 }
236 if ($OptionsInfo{ListNumOfOnBits}) {
237 $NumOfOnBits = $FingerprintsObject->GetNumOfSetBits();
238 }
239 }
240 }
241 elsif ($FingerprintsType =~ /^FingerprintsVector$/i) {
242 $NumOfNonZeroValues = '';
243
244 ($FingerprintsType, $FingerprintsDescription, $FingerprintsSize, $FingerprintsVectorValuesType, $FingerprintsVectorValuesFormat) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringValues($FingerprintsString);
245
246 if ($OptionsInfo{ListNumOfNonZeroValues}) {
247 if ($FingerprintsVectorValuesType =~ /^AlphaNumericalValues$/i) {
248 $NumOfNonZeroValues = 'NA';
249 }
250 else {
251 $NumOfNonZeroValues = $FingerprintsObject->GetNumOfNonZeroValues();
252 }
253 }
254 }
255
256 if ($FileType =~ /^SD$/i) {
257 print "Compound number: " . $FingerprintsFileIO->GetCompoundNum();
258 }
259 else {
260 print "Data line number: " . $FingerprintsFileIO->GetLineNum();
261 }
262
263 if ($OptionsInfo{ListFingerprintsType}) {
264 print "; FPType: $FingerprintsType";
265 }
266 if ($OptionsInfo{ListFingerprintsDescription}) {
267 print "; FPDescription: $FingerprintsDescription";
268 }
269 if ($OptionsInfo{ListFingerprintsSize}) {
270 print "; FPSize: $FingerprintsSize";
271 }
272
273 if ($FingerprintsType =~ /^FingerprintsBitVector$/i) {
274 if ($OptionsInfo{ListFingerprintsBitStringFormat}) {
275 print "; FPBitStringFormat: $FingerprintsBitStringFormat";
276 }
277 if ($OptionsInfo{ListFingerprintsBitOrder}) {
278 print "; FPBitOrder: $FingerprintsBitOrder";
279 }
280 if ($OptionsInfo{ListBitDensity}) {
281 print "; BitDensity: $BitDensity";
282 }
283 if ($OptionsInfo{ListNumOfOnBits}) {
284 print "; NumOfOnBits: $NumOfOnBits";
285 }
286 }
287 elsif ($FingerprintsType =~ /^FingerprintsVector$/i) {
288 if ($OptionsInfo{ListFingerprintsVectorValuesType}) {
289 print "; FPVectorValuesType: $FingerprintsVectorValuesType";
290 }
291 if ($OptionsInfo{ListFingerprintsVectorValuesFormat}) {
292 print "; FPVectorValuesFormat: $FingerprintsVectorValuesFormat";
293 }
294 if ($OptionsInfo{ListNumOfNonZeroValues}) {
295 print "; NumOfNonZeroValues: $NumOfNonZeroValues";
296 }
297 }
298 print "\n";
299 }
300
301 # List file type header information...
302 #
303 sub ListFileTypeHeaderInfo {
304 my($FingerprintsFileIO, $FileType) = @_;
305 my($Key, $Value, @DataColLabels, %HeaderDataKeysAndValues);
306
307 if ($FileType =~ /^Text$/i) {
308 @DataColLabels = $FingerprintsFileIO->GetDataColLabels();
309 print "Number of columns: " . scalar @DataColLabels . "\n";
310 print "Column labels: ", JoinWords(\@DataColLabels, ", ", 1), "\n";
311 }
312 elsif ($FileType =~ /^FP$/i) {
313 %HeaderDataKeysAndValues = $FingerprintsFileIO->GetHeaderDataKeysAndValues();
314
315 print "\nFP file header data keys and values: \n#\n";
316 for $Key ($FingerprintsFileIO->GetHeaderDataKeys()) {
317 $Value = $HeaderDataKeysAndValues{$Key};
318 print "# $Key = $Value\n";
319 }
320 print "#\n";
321 }
322 }
323
324 # Total size of all the fiels...
325 sub ListTotalSizeOfFiles {
326 my($FileOkayCount, $TotalSize, $Index);
327
328 $FileOkayCount = 0;
329 $TotalSize = 0;
330
331 for $Index (0 .. $#FingerprintsFilesList) {
332 if ($FingerprintsFilesList[$Index]) {
333 $FileOkayCount++;
334 $TotalSize += $FingerprintsFilesInfo{FileSize}[$Index];
335 }
336 }
337 if ($FileOkayCount > 1) {
338 print "\nTotal size of $FileOkayCount files: ", FormatFileSize($TotalSize), "\n";
339 }
340 }
341
342 # Retrieve information about fingerprints files...
343 #
344 sub RetrieveFingerprintsFilesInfo {
345 my($FingerprintsFile, $Index, $FileDir, $FileExt, $FileName, $FileType, $InDelim, $ModifiedTimeString, $ModifiedDateString, %FingerprintsFileIOParameters);
346
347 %FingerprintsFilesInfo = ();
348 @{$FingerprintsFilesInfo{FileOkay}} = ();
349 @{$FingerprintsFilesInfo{FileType}} = ();
350 @{$FingerprintsFilesInfo{FileSize}} = ();
351 @{$FingerprintsFilesInfo{FileLastModified}} = ();
352 @{$FingerprintsFilesInfo{InDelim}} = ();
353
354 @{$FingerprintsFilesInfo{FingerprintsFileIOParameters}} = ();
355
356 FILELIST: for $Index (0 .. $#FingerprintsFilesList) {
357 $FingerprintsFile = $FingerprintsFilesList[$Index];
358
359 $FingerprintsFilesInfo{FileOkay}[$Index] = 0;
360 $FingerprintsFilesInfo{FileType}[$Index] = '';
361 $FingerprintsFilesInfo{FileSize}[$Index] = 0;
362 $FingerprintsFilesInfo{FileLastModified}[$Index] = '';
363 $FingerprintsFilesInfo{InDelim}[$Index] = "";
364
365 %{$FingerprintsFilesInfo{FingerprintsFileIOParameters}[$Index]} = ();
366
367 $FingerprintsFile = $FingerprintsFilesList[$Index];
368 if (!(-e $FingerprintsFile)) {
369 warn "Warning: Ignoring file $FingerprintsFile: It doesn't exist\n";
370 next FILELIST;
371 }
372
373 $FileType = Fingerprints::FingerprintsFileUtil::GetFingerprintsFileType($FingerprintsFile);
374 if (IsEmpty($FileType)) {
375 warn "Warning: Ignoring file $FingerprintsFile: It's not a fingerprints file\n";
376 next FILELIST;
377 }
378
379 $FileDir = ""; $FileName = ""; $FileExt = "";
380 ($FileDir, $FileName, $FileExt) = ParseFileName($FingerprintsFile);
381
382 $InDelim = ($FileExt =~ /^tsv$/i) ? 'Tab' : $OptionsInfo{InDelim};
383
384 # Setup FingerprintsFileIO parameters...
385 %FingerprintsFileIOParameters = ();
386 FILEIOPARAMETERS: {
387 if ($FileType =~ /^SD$/i) {
388 %FingerprintsFileIOParameters = ('Name' => $FingerprintsFile, 'Mode' => 'Read', 'ValidateData' => $OptionsInfo{ValidateData}, 'DetailLevel' => 1, 'FingerprintsFieldLabel' => $OptionsInfo{FingerprintsFieldLabel});
389 last FILEIOPARAMETERS;
390 }
391 if ($FileType =~ /^FP$/i) {
392 %FingerprintsFileIOParameters = ('Name' => $FingerprintsFile, 'Mode' => 'Read', 'ValidateData' => $OptionsInfo{ValidateData}, 'DetailLevel' => 1);
393 last FILEIOPARAMETERS;
394 }
395 if ($FileType =~ /^Text$/i) {
396 %FingerprintsFileIOParameters = ('Name' => $FingerprintsFile, 'Mode' => 'Read', 'ValidateData' => $OptionsInfo{ValidateData}, 'DetailLevel' => 1, 'FingerprintsCol' => $OptionsInfo{FingerprintsCol}, 'ColMode' => $OptionsInfo{ColMode}, 'InDelim' => $OptionsInfo{InDelim});
397 last FILEIOPARAMETERS;
398 }
399 warn "Warning: File type for fingerprints file, $FingerprintsFile, is not valid. Supported file types: SD, FP or Text\n";
400 next FILELIST;
401 }
402
403 $FingerprintsFilesInfo{FileOkay}[$Index] = 1;
404 $FingerprintsFilesInfo{FileType}[$Index] = $FileType;
405
406 $FingerprintsFilesInfo{FileSize}[$Index] = FileSize($FingerprintsFile);
407 ($ModifiedTimeString, $ModifiedDateString) = FormattedFileModificationTimeAndDate($FingerprintsFile);
408 $FingerprintsFilesInfo{FileLastModified}[$Index] = "$ModifiedTimeString; $ModifiedDateString";
409
410 $FingerprintsFilesInfo{InDelim}[$Index] = $InDelim;
411
412 %{$FingerprintsFilesInfo{FingerprintsFileIOParameters}[$Index]} = %FingerprintsFileIOParameters;
413 }
414 }
415
416 # Process option values...
417 sub ProcessOptions {
418 %OptionsInfo = ();
419
420 $OptionsInfo{ListAverageBitDensity} = ($Options{all} || $Options{averagebitdensity}) ? 1 :0;
421 $OptionsInfo{ListBitDensity} = ($Options{all} || $Options{bitdensity}) ? 1 :0;
422
423 if ($OptionsInfo{ListAverageBitDensity}) {
424 # List bit density as well...
425 $OptionsInfo{ListBitDensity} = 1;
426 }
427
428 # By default, count number of rows containing fingerprints data...
429 $OptionsInfo{CountFingerprints} = 1;
430 $OptionsInfo{CountEmptyFingerprints} = ($Options{all} || $Options{empty}) ? 1 :0;
431
432 $OptionsInfo{ColMode} = $Options{colmode};
433 if (IsNotEmpty($Options{fingerprintscol})) {
434 if ($Options{colmode} =~ /^ColNum$/i) {
435 if (!IsPositiveInteger($Options{fingerprintscol})) {
436 die "Error: Column value, $Options{fingerprintscol}, specified using \"--FingerprintsCol\" is not valid: Allowed integer values: > 0.\n";
437 }
438 }
439 $OptionsInfo{FingerprintsCol} = $Options{fingerprintscol};
440 }
441 else {
442 $OptionsInfo{FingerprintsCol} = 'AutoDetect';
443 }
444
445 if (IsNotEmpty($Options{fingerprintsfield})) {
446 $OptionsInfo{FingerprintsFieldLabel} = $Options{fingerprintsfield};
447 }
448 else {
449 $OptionsInfo{FingerprintsFieldLabel} = 'AutoDetect';
450 }
451
452 $OptionsInfo{ValidateData} = ($Options{all} || $Options{datacheck}) ? 1 :0;
453 $OptionsInfo{DetailLevel} = $Options{detail};
454
455 $OptionsInfo{ListFingerprintsType} = ($Options{all} || $Options{fingerprintstype}) ? 1 :0;
456 $OptionsInfo{ListFingerprintsDescription} = ($Options{all} || $Options{fingerprintsdescription}) ? 1 :0;
457 $OptionsInfo{ListFingerprintsSize} = ($Options{all} || $Options{fingerprintssize}) ? 1 :0;
458
459 $OptionsInfo{ListFingerprintsBitStringFormat} = ($Options{all} || $Options{fingerprintsbitstringformat}) ? 1 :0;
460 $OptionsInfo{ListFingerprintsBitOrder} = ($Options{all} || $Options{fingerprintsbitorder}) ? 1 :0;
461
462 $OptionsInfo{ListFingerprintsVectorValuesType} = ($Options{all} || $Options{fingerprintsvectorvaluestype}) ? 1 :0;
463 $OptionsInfo{ListFingerprintsVectorValuesFormat} = ($Options{all} || $Options{fingerprintsvectorvaluesformat}) ? 1 :0;
464
465 $OptionsInfo{InDelim} = $Options{indelim};
466
467 $OptionsInfo{ListNumOfOnBits} = ($Options{all} || $Options{numofonbits}) ? 1 :0;
468 $OptionsInfo{ListNumOfNonZeroValues} = ($Options{all} || $Options{numofnonzerovalues}) ? 1 :0;
469
470 $OptionsInfo{ListFingerprintsDataEntryInfo} = ($OptionsInfo{ListFingerprintsType} || $OptionsInfo{ListFingerprintsDescription} || $OptionsInfo{ListFingerprintsSize} || $OptionsInfo{ListFingerprintsBitStringFormat} || $OptionsInfo{ListFingerprintsBitOrder} || $OptionsInfo{ListFingerprintsVectorValuesType} || $OptionsInfo{ListFingerprintsVectorValuesFormat} || $OptionsInfo{ListBitDensity} || $OptionsInfo{ListAverageBitDensity} || $OptionsInfo{ListNumOfOnBits} || $OptionsInfo{ListNumOfNonZeroValues}) ? 1 : 0;
471
472 }
473
474 # Setup script usage and retrieve command line arguments specified using various options...
475 sub SetupScriptUsage {
476
477 # Retrieve all the options...
478 %Options = ();
479
480 $Options{colmode} = 'colnum';
481 $Options{detail} = 1;
482 $Options{indelim} = 'comma';
483
484 if (!GetOptions(\%Options, "all|a", "averagebitdensity", "bitdensity", "count", "colmode|c=s", "detail|d=i", "datacheck", "empty|e", "fingerprintsfield=s", "fingerprintscol=s", "fingerprintstype", "fingerprintsdescription", "fingerprintssize", "fingerprintsbitstringformat", "fingerprintsbitorder", "fingerprintsvectorvaluestype", "fingerprintsvectorvaluesformat", "help|h", "indelim=s", "numofonbits", "numofnonzerovalues", "workingdir|w=s")) {
485 die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n";
486 }
487 if ($Options{workingdir}) {
488 if (! -d $Options{workingdir}) {
489 die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n";
490 }
491 chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n";
492 }
493 if ($Options{colmode} !~ /^(ColNum|ColLabel)$/i) {
494 die "Error: The value specified, $Options{colmode}, for option \"-c, --ColMode\" is not valid. Allowed values: ColNum, or ColLabel\n";
495 }
496 if (!IsPositiveInteger($Options{detail})) {
497 die "Error: The value specified, $Options{detail}, for option \"-d, --detail\" is not valid. Allowed values: > 0 \n";
498 }
499 if ($Options{indelim} !~ /^(comma|semicolon)$/i) {
500 die "Error: The value specified, $Options{indelim}, for option \"--InDelim\" is not valid. Allowed values: comma, or semicolon\n";
501 }
502 }
503
504 __END__
505
506 =head1 NAME
507
508 InfoFingerprintsFiles.pl - List information about fingerprints data in SD, FP and CSV/TSV text file(s)
509
510 =head1 SYNOPSIS
511
512 InfoFingerprintsFiles.pl SDFile(s) FPFile(s) TextFile(s)...
513
514 InfoFingerprintsFiles.pl [B<-a, --all>] [B<--AverageBitDensity>] [B<--BitDensity>]
515 [B<-c, --count>] [B<-c, --ColMode> I<ColNum | ColLabel>] [B<--DataCheck>]
516 [B<-d, --detail> I<InfoLevel>] [B<-e, --empty>] [B<--FingerprintsCol> I<col number | col name>]
517 [B<--FingerprintsField> I<FieldLabel>] [B<--FingerprintsType>] [B<--FingerprintsDescription>]
518 [B<--FingerprintsSize>] [B<--FingerprintsBitStringFormat>] [B<--FingerprintsBitOrder>]
519 [B<--FingerprintsVectorValuesType>] [B<--FingerprintsVectorValuesFormat>]
520 [B<-h, --help>] [B<--InDelim> I<comma | semicolon>]
521 [B<--NumOfOnBits>] [B<--NumOfNonZeroValues>]
522 [B<-w, --WorkingDir> dirname] SDFile(s) FPFile(s) TextFile(s)...
523
524 =head1 DESCRIPTION
525
526 List information about fingerprints data in I<SD, FP and CSV/TSV> text file(s): number of
527 rows containing fingerprints data, type of fingerprints vector, description and size of fingerprints,
528 bit density and average bit density for bit-vector fingerprints strings, and so on.
529
530 The scripts InfoFingerprintsSDFiles.pl and InfoFingerprintsTextFiles.pl have been removed from the
531 current release of MayaChemTools and their functionality merged with this script.
532
533 The valid I<SDFile> extensions are I<.sdf> and I<.sd>. All SD files in a current directory
534 can be specified either by I<*.sdf> or the current directory name.
535
536 The valid I<FPFile> extensions are I<.fpf> and I<.fp>. All FP files in a current directory
537 can be specified either by I<*.fpf> or the current directory name.
538
539 The valid I<TextFile> extensions are I<.csv> and I<.tsv> for comma/semicolon and tab
540 delimited text files respectively. All other file names are ignored. All text files in a
541 current directory can be specified by I<*.csv>, I<*.tsv>, or the current directory
542 name. The B<--indelim> option determines the format of I<TextFile(s)>. Any file
543 which doesn't correspond to the format indicated by B<--indelim> option is ignored.
544
545 Format of fingerprint strings data in I<SDFile(s), FPFile(s) and TextFile(s)> is automatically
546 detected.
547
548 Example of I<FP> file containing fingerprints bit-vector string data:
549
550 #
551 # Package = MayaChemTools 7.4
552 # ReleaseDate = Oct 21, 2010
553 #
554 # TimeStamp = Mon Mar 7 15:14:01 2011
555 #
556 # FingerprintsStringType = FingerprintsBitVector
557 #
558 # Description = PathLengthBits:AtomicInvariantsAtomTypes:MinLength1:...
559 # Size = 1024
560 # BitStringFormat = HexadecimalString
561 # BitsOrder = Ascending
562 #
563 Cmpd1 9c8460989ec8a49913991a6603130b0a19e8051c89184414953800cc21510...
564 Cmpd2 000000249400840040100042011001001980410c000000001010088001120...
565 ... ...
566 ... ..
567
568 Example of I<FP> file containing fingerprints vector string data:
569
570 #
571 # Package = MayaChemTools 7.4
572 # ReleaseDate = Oct 21, 2010
573 #
574 # TimeStamp = Mon Mar 7 15:14:01 2011
575 #
576 # FingerprintsStringType = FingerprintsVector
577 #
578 # Description = PathLengthBits:AtomicInvariantsAtomTypes:MinLength1:...
579 # VectorStringFormat = IDsAndValuesString
580 # VectorValuesType = NumericalValues
581 #
582 Cmpd1 338;C F N O C:C C:N C=O CC CF CN CO C:C:C C:C:N C:CC C:CF C:CN C:
583 N:C C:NC CC:N CC=O CCC CCN CCO CNC NC=O O=CO C:C:C:C C:C:C:N C:C:CC...;
584 33 1 2 5 21 2 2 12 1 3 3 20 2 10 2 2 1 2 2 2 8 2 5 1 1 1 19 2 8 2 2 2 2
585 6 2 2 2 2 2 2 2 2 3 2 2 1 4 1 5 1 1 18 6 2 2 1 2 10 2 1 2 1 2 2 2 2 ...
586 Cmpd2 103;C N O C=N C=O CC CN CO CC=O CCC CCN CCO CNC N=CN NC=O NCN O=C
587 O C CC=O CCCC CCCN CCCO CCNC CNC=N CNC=O CNCN CCCC=O CCCCC CCCCN CC...;
588 15 4 4 1 2 13 5 2 2 15 5 3 2 2 1 1 1 2 17 7 6 5 1 1 1 2 15 8 5 7 2 2 2 2
589 1 2 1 1 3 15 7 6 8 3 4 4 3 2 2 1 2 3 14 2 4 7 4 4 4 4 1 1 1 2 1 1 1 ...
590 ... ...
591 ... ...
592
593 Example of I<SD> file containing fingerprints bit-vector string data:
594
595 ... ...
596 ... ...
597 $$$$
598 ... ...
599 ... ...
600 ... ...
601 41 44 0 0 0 0 0 0 0 0999 V2000
602 -3.3652 1.4499 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
603 ... ...
604 2 3 1 0 0 0 0
605 ... ...
606 M END
607 > <CmpdID>
608 Cmpd1
609
610 > <PathLengthFingerprints>
611 FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes:MinLengt
612 h1:MaxLength8;1024;HexadecimalString;Ascending;9c8460989ec8a49913991a66
613 03130b0a19e8051c89184414953800cc2151082844a201042800130860308e8204d4028
614 00831048940e44281c00060449a5000ac80c894114e006321264401600846c050164462
615 08190410805000304a10205b0100e04c0038ba0fad0209c0ca8b1200012268b61c0026a
616 aa0660a11014a011d46
617
618 $$$$
619 ... ...
620 ... ...
621
622 Example of CSV I<Text> file containing fingerprints bit-vector string data:
623
624 "CompoundID","PathLengthFingerprints"
625 "Cmpd1","FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes
626 :MinLength1:MaxLength8;1024;HexadecimalString;Ascending;9c8460989ec8a4
627 9913991a6603130b0a19e8051c89184414953800cc2151082844a20104280013086030
628 8e8204d402800831048940e44281c00060449a5000ac80c894114e006321264401..."
629 ... ...
630 ... ...
631
632 The current release of MayaChemTools supports the following types of fingerprint
633 bit-vector and vector strings:
634
635 FingerprintsVector;AtomNeighborhoods:AtomicInvariantsAtomTypes:MinRadi
636 us0:MaxRadius2;41;AlphaNumericalValues;ValuesString;NR0-C.X1.BO1.H3-AT
637 C1:NR1-C.X3.BO3.H1-ATC1:NR2-C.X1.BO1.H3-ATC1:NR2-C.X3.BO4-ATC1 NR0-C.X
638 1.BO1.H3-ATC1:NR1-C.X3.BO3.H1-ATC1:NR2-C.X1.BO1.H3-ATC1:NR2-C.X3.BO4-A
639 TC1 NR0-C.X2.BO2.H2-ATC1:NR1-C.X2.BO2.H2-ATC1:NR1-C.X3.BO3.H1-ATC1:NR2
640 -C.X2.BO2.H2-ATC1:NR2-N.X3.BO3-ATC1:NR2-O.X1.BO1.H1-ATC1 NR0-C.X2.B...
641
642 FingerprintsVector;AtomTypesCount:AtomicInvariantsAtomTypes:ArbitraryS
643 ize;10;NumericalValues;IDsAndValuesString;C.X1.BO1.H3 C.X2.BO2.H2 C.X2
644 .BO3.H1 C.X3.BO3.H1 C.X3.BO4 F.X1.BO1 N.X2.BO2.H1 N.X3.BO3 O.X1.BO1.H1
645 O.X1.BO2;2 4 14 3 10 1 1 1 3 2
646
647 FingerprintsVector;AtomTypesCount:SLogPAtomTypes:ArbitrarySize;16;Nume
648 ricalValues;IDsAndValuesString;C1 C10 C11 C14 C18 C20 C21 C22 C5 CS F
649 N11 N4 O10 O2 O9;5 1 1 1 14 4 2 1 2 2 1 1 1 1 3 1
650
651 FingerprintsVector;AtomTypesCount:SLogPAtomTypes:FixedSize;67;OrderedN
652 umericalValues;IDsAndValuesString;C1 C2 C3 C4 C5 C6 C7 C8 C9 C10 C11 C
653 12 C13 C14 C15 C16 C17 C18 C19 C20 C21 C22 C23 C24 C25 C26 C27 CS N1 N
654 2 N3 N4 N5 N6 N7 N8 N9 N10 N11 N12 N13 N14 NS O1 O2 O3 O4 O5 O6 O7 O8
655 O9 O10 O11 O12 OS F Cl Br I Hal P S1 S2 S3 Me1 Me2;5 0 0 0 2 0 0 0 0 1
656 1 0 0 1 0 0 0 14 0 4 2 1 0 0 0 0 0 2 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0...
657
658 FingerprintsVector;EStateIndicies:ArbitrarySize;11;NumericalValues;IDs
659 AndValuesString;SaaCH SaasC SaasN SdO SdssC SsCH3 SsF SsOH SssCH2 SssN
660 H SsssCH;24.778 4.387 1.993 25.023 -1.435 3.975 14.006 29.759 -0.073 3
661 .024 -2.270
662
663 FingerprintsVector;EStateIndicies:FixedSize;87;OrderedNumericalValues;
664 ValuesString;0 0 0 0 0 0 0 3.975 0 -0.073 0 0 24.778 -2.270 0 0 -1.435
665 4.387 0 0 0 0 0 0 3.024 0 0 0 0 0 0 0 1.993 0 29.759 25.023 0 0 0 0 1
666 4.006 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
667 0 0 0 0 0 0 0 0 0 0 0 0 0 0
668
669 FingerprintsVector;ExtendedConnectivity:AtomicInvariantsAtomTypes:Radi
670 us2;60;AlphaNumericalValues;ValuesString;73555770 333564680 352413391
671 666191900 1001270906 1371674323 1481469939 1977749791 2006158649 21414
672 08799 49532520 64643108 79385615 96062769 273726379 564565671 85514103
673 5 906706094 988546669 1018231313 1032696425 1197507444 1331250018 1338
674 532734 1455473691 1607485225 1609687129 1631614296 1670251330 17303...
675
676 FingerprintsVector;ExtendedConnectivityCount:AtomicInvariantsAtomTypes
677 :Radius2;60;NumericalValues;IDsAndValuesString;73555770 333564680 3524
678 13391 666191900 1001270906 1371674323 1481469939 1977749791 2006158649
679 2141408799 49532520 64643108 79385615 96062769 273726379 564565671...;
680 3 2 1 1 14 1 2 10 4 3 1 1 1 1 2 1 2 1 1 1 2 3 1 1 2 1 3 3 8 2 2 2 6 2
681 1 2 1 1 2 1 1 1 2 1 1 2 1 2 1 1 1 1 1 1 1 1 1 2 1 1
682
683 FingerprintsBitVector;ExtendedConnectivityBits:AtomicInvariantsAtomTyp
684 es:Radius2;1024;BinaryString;Ascending;0000000000000000000000000000100
685 0000000001010000000110000011000000000000100000000000000000000000100001
686 1000000110000000000000000000000000010011000000000000000000000000010000
687 0000000000000000000000000010000000000000000001000000000000000000000000
688 0000000000010000100001000000000000101000000000000000100000000000000...
689
690 FingerprintsVector;ExtendedConnectivity:FunctionalClassAtomTypes:Radiu
691 s2;57;AlphaNumericalValues;ValuesString;24769214 508787397 850393286 8
692 62102353 981185303 1231636850 1649386610 1941540674 263599683 32920567
693 1 571109041 639579325 683993318 723853089 810600886 885767127 90326012
694 7 958841485 981022393 1126908698 1152248391 1317567065 1421489994 1455
695 632544 1557272891 1826413669 1983319256 2015750777 2029559552 20404...
696
697 FingerprintsVector;ExtendedConnectivity:EStateAtomTypes:Radius2;62;Alp
698 haNumericalValues;ValuesString;25189973 528584866 662581668 671034184
699 926543080 1347067490 1738510057 1759600920 2034425745 2097234755 21450
700 44754 96779665 180364292 341712110 345278822 386540408 387387308 50430
701 1706 617094135 771528807 957666640 997798220 1158349170 1291258082 134
702 1138533 1395329837 1420277211 1479584608 1486476397 1487556246 1566...
703
704 FingerprintsBitVector;MACCSKeyBits;166;BinaryString;Ascending;00000000
705 0000000000000000000000000000000001001000010010000000010010000000011100
706 0100101010111100011011000100110110000011011110100110111111111111011111
707 11111111111110111000
708
709 FingerprintsBitVector;MACCSKeyBits;322;BinaryString;Ascending;11101011
710 1110011111100101111111000111101100110000000000000011100010000000000000
711 0000000000000000000000000000000000000000000000101000000000000000000000
712 0000000000000000000000000000000000000000000000000000000000000000000000
713 0000000000000000000000000000000000000011000000000000000000000000000000
714 0000000000000000000000000000000000000000
715
716 FingerprintsVector;MACCSKeyCount;166;OrderedNumericalValues;ValuesStri
717 ng;0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
718 0 0 0 0 0 0 0 1 0 0 3 0 0 0 0 4 0 0 2 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 0
719 0 0 0 0 1 1 8 0 0 0 1 0 0 1 0 1 0 1 0 3 1 3 1 0 0 0 1 2 0 11 1 0 0 0
720 5 0 0 1 2 0 1 1 0 0 0 0 0 1 1 0 1 1 1 1 0 4 0 0 1 1 0 4 6 1 1 1 2 1 1
721 3 5 2 2 0 5 3 5 1 1 2 5 1 2 1 2 4 8 3 5 5 2 2 0 3 5 4 1
722
723 FingerprintsVector;MACCSKeyCount;322;OrderedNumericalValues;ValuesStri
724 ng;14 8 2 0 2 0 4 4 2 1 4 0 0 2 5 10 5 2 1 0 0 2 0 5 13 3 28 5 5 3 0 0
725 0 4 2 1 1 0 1 1 0 0 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 22 5 3 0 0 0 1 0
726 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
727 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11 0 2 0 0 0 0 0 0 0 0 0
728 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
729
730 FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes:MinLeng
731 th1:MaxLength8;1024;BinaryString;Ascending;001000010011010101011000110
732 0100010101011000101001011100110001000010001001101000001001001001001000
733 0010110100000111001001000001001010100100100000000011000000101001011100
734 0010000001000101010100000100111100110111011011011000000010110111001101
735 0101100011000000010001000011000010100011101100001000001000100000000...
736
737 FingerprintsVector;PathLengthCount:AtomicInvariantsAtomTypes:MinLength
738 1:MaxLength8;432;NumericalValues;IDsAndValuesPairsString;C.X1.BO1.H3 2
739 C.X2.BO2.H2 4 C.X2.BO3.H1 14 C.X3.BO3.H1 3 C.X3.BO4 10 F.X1.BO1 1 N.X
740 2.BO2.H1 1 N.X3.BO3 1 O.X1.BO1.H1 3 O.X1.BO2 2 C.X1.BO1.H3C.X3.BO3.H1
741 2 C.X2.BO2.H2C.X2.BO2.H2 1 C.X2.BO2.H2C.X3.BO3.H1 4 C.X2.BO2.H2C.X3.BO
742 4 1 C.X2.BO2.H2N.X3.BO3 1 C.X2.BO3.H1:C.X2.BO3.H1 10 C.X2.BO3.H1:C....
743
744 FingerprintsVector;PathLengthCount:MMFF94AtomTypes:MinLength1:MaxLengt
745 h8;463;NumericalValues;IDsAndValuesPairsString;C5A 2 C5B 2 C=ON 1 CB 1
746 8 COO 1 CR 9 F 1 N5 1 NC=O 1 O=CN 1 O=CO 1 OC=O 1 OR 2 C5A:C5B 2 C5A:N
747 5 2 C5ACB 1 C5ACR 1 C5B:C5B 1 C5BC=ON 1 C5BCB 1 C=ON=O=CN 1 C=ONNC=O 1
748 CB:CB 18 CBF 1 CBNC=O 1 COO=O=CO 1 COOCR 1 COOOC=O 1 CRCR 7 CRN5 1 CR
749 OR 2 C5A:C5B:C5B 2 C5A:C5BC=ON 1 C5A:C5BCB 1 C5A:N5:C5A 1 C5A:N5CR ...
750
751 FingerprintsVector;TopologicalAtomPairs:AtomicInvariantsAtomTypes:MinD
752 istance1:MaxDistance10;223;NumericalValues;IDsAndValuesString;C.X1.BO1
753 .H3-D1-C.X3.BO3.H1 C.X2.BO2.H2-D1-C.X2.BO2.H2 C.X2.BO2.H2-D1-C.X3.BO3.
754 H1 C.X2.BO2.H2-D1-C.X3.BO4 C.X2.BO2.H2-D1-N.X3.BO3 C.X2.BO3.H1-D1-...;
755 2 1 4 1 1 10 8 1 2 6 1 2 2 1 2 1 2 2 1 2 1 5 1 10 12 2 2 1 2 1 9 1 3 1
756 1 1 2 2 1 3 6 1 6 14 2 2 2 3 1 3 1 8 2 2 1 3 2 6 1 2 2 5 1 3 1 23 1...
757
758 FingerprintsVector;TopologicalAtomPairs:FunctionalClassAtomTypes:MinDi
759 stance1:MaxDistance10;144;NumericalValues;IDsAndValuesString;Ar-D1-Ar
760 Ar-D1-Ar.HBA Ar-D1-HBD Ar-D1-Hal Ar-D1-None Ar.HBA-D1-None HBA-D1-NI H
761 BA-D1-None HBA.HBD-D1-NI HBA.HBD-D1-None HBD-D1-None NI-D1-None No...;
762 23 2 1 1 2 1 1 1 1 2 1 1 7 28 3 1 3 2 8 2 1 1 1 5 1 5 24 3 3 4 2 13 4
763 1 1 4 1 5 22 4 4 3 1 19 1 1 1 1 1 2 2 3 1 1 8 25 4 5 2 3 1 26 1 4 1 ...
764
765 FingerprintsVector;TopologicalAtomTorsions:AtomicInvariantsAtomTypes;3
766 3;NumericalValues;IDsAndValuesString;C.X1.BO1.H3-C.X3.BO3.H1-C.X3.BO4-
767 C.X3.BO4 C.X1.BO1.H3-C.X3.BO3.H1-C.X3.BO4-N.X3.BO3 C.X2.BO2.H2-C.X2.BO
768 2.H2-C.X3.BO3.H1-C.X2.BO2.H2 C.X2.BO2.H2-C.X2.BO2.H2-C.X3.BO3.H1-O...;
769 2 2 1 1 2 2 1 1 3 4 4 8 4 2 2 6 2 2 1 2 1 1 2 1 1 2 6 2 4 2 1 3 1
770
771 FingerprintsVector;TopologicalAtomTorsions:EStateAtomTypes;36;Numerica
772 lValues;IDsAndValuesString;aaCH-aaCH-aaCH-aaCH aaCH-aaCH-aaCH-aasC aaC
773 H-aaCH-aasC-aaCH aaCH-aaCH-aasC-aasC aaCH-aaCH-aasC-sF aaCH-aaCH-aasC-
774 ssNH aaCH-aasC-aasC-aasC aaCH-aasC-aasC-aasN aaCH-aasC-ssNH-dssC a...;
775 4 4 8 4 2 2 6 2 2 2 4 3 2 1 3 3 2 2 2 1 2 1 1 1 2 1 1 1 1 1 1 1 2 1 1 2
776
777 FingerprintsVector;TopologicalAtomTriplets:AtomicInvariantsAtomTypes:M
778 inDistance1:MaxDistance10;3096;NumericalValues;IDsAndValuesString;C.X1
779 .BO1.H3-D1-C.X1.BO1.H3-D1-C.X3.BO3.H1-D2 C.X1.BO1.H3-D1-C.X2.BO2.H2-D1
780 0-C.X3.BO4-D9 C.X1.BO1.H3-D1-C.X2.BO2.H2-D3-N.X3.BO3-D4 C.X1.BO1.H3-D1
781 -C.X2.BO2.H2-D4-C.X2.BO2.H2-D5 C.X1.BO1.H3-D1-C.X2.BO2.H2-D6-C.X3....;
782 1 2 2 2 2 2 2 2 8 8 4 8 4 4 2 2 2 2 4 2 2 2 4 2 2 2 2 1 2 2 4 4 4 2 2
783 2 4 4 4 8 4 4 2 4 4 4 2 4 4 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 8...
784
785 FingerprintsVector;TopologicalAtomTriplets:SYBYLAtomTypes:MinDistance1
786 :MaxDistance10;2332;NumericalValues;IDsAndValuesString;C.2-D1-C.2-D9-C
787 .3-D10 C.2-D1-C.2-D9-C.ar-D10 C.2-D1-C.3-D1-C.3-D2 C.2-D1-C.3-D10-C.3-
788 D9 C.2-D1-C.3-D2-C.3-D3 C.2-D1-C.3-D2-C.ar-D3 C.2-D1-C.3-D3-C.3-D4 C.2
789 -D1-C.3-D3-N.ar-D4 C.2-D1-C.3-D3-O.3-D2 C.2-D1-C.3-D4-C.3-D5 C.2-D1-C.
790 3-D5-C.3-D6 C.2-D1-C.3-D5-O.3-D4 C.2-D1-C.3-D6-C.3-D7 C.2-D1-C.3-D7...
791
792 FingerprintsVector;TopologicalPharmacophoreAtomPairs:ArbitrarySize:Min
793 Distance1:MaxDistance10;54;NumericalValues;IDsAndValuesString;H-D1-H H
794 -D1-NI HBA-D1-NI HBD-D1-NI H-D2-H H-D2-HBA H-D2-HBD HBA-D2-HBA HBA-D2-
795 HBD H-D3-H H-D3-HBA H-D3-HBD H-D3-NI HBA-D3-NI HBD-D3-NI H-D4-H H-D4-H
796 BA H-D4-HBD HBA-D4-HBA HBA-D4-HBD HBD-D4-HBD H-D5-H H-D5-HBA H-D5-...;
797 18 1 2 1 22 12 8 1 2 18 6 3 1 1 1 22 13 6 5 7 2 28 9 5 1 1 1 36 16 10
798 3 4 1 37 10 8 1 35 10 9 3 3 1 28 7 7 4 18 16 12 5 1 2 1
799
800 FingerprintsVector;TopologicalPharmacophoreAtomPairs:FixedSize:MinDist
801 ance1:MaxDistance10;150;OrderedNumericalValues;ValuesString;18 0 0 1 0
802 0 0 2 0 0 1 0 0 0 0 22 12 8 0 0 1 2 0 0 0 0 0 0 0 0 18 6 3 1 0 0 0 1
803 0 0 1 0 0 0 0 22 13 6 0 0 5 7 0 0 2 0 0 0 0 0 28 9 5 1 0 0 0 1 0 0 1 0
804 0 0 0 36 16 10 0 0 3 4 0 0 1 0 0 0 0 0 37 10 8 0 0 0 0 1 0 0 0 0 0 0
805 0 35 10 9 0 0 3 3 0 0 1 0 0 0 0 0 28 7 7 4 0 0 0 0 0 0 0 0 0 0 0 18...
806
807 FingerprintsVector;TopologicalPharmacophoreAtomTriplets:ArbitrarySize:
808 MinDistance1:MaxDistance10;696;NumericalValues;IDsAndValuesString;Ar1-
809 Ar1-Ar1 Ar1-Ar1-H1 Ar1-Ar1-HBA1 Ar1-Ar1-HBD1 Ar1-H1-H1 Ar1-H1-HBA1 Ar1
810 -H1-HBD1 Ar1-HBA1-HBD1 H1-H1-H1 H1-H1-HBA1 H1-H1-HBD1 H1-HBA1-HBA1 H1-
811 HBA1-HBD1 H1-HBA1-NI1 H1-HBD1-NI1 HBA1-HBA1-NI1 HBA1-HBD1-NI1 Ar1-...;
812 46 106 8 3 83 11 4 1 21 5 3 1 2 2 1 1 1 100 101 18 11 145 132 26 14 23
813 28 3 3 5 4 61 45 10 4 16 20 7 5 1 3 4 5 3 1 1 1 1 5 4 2 1 2 2 2 1 1 1
814 119 123 24 15 185 202 41 25 22 17 3 5 85 95 18 11 23 17 3 1 1 6 4 ...
815
816 FingerprintsVector;TopologicalPharmacophoreAtomTriplets:FixedSize:MinD
817 istance1:MaxDistance10;2692;OrderedNumericalValues;ValuesString;46 106
818 8 3 0 0 83 11 4 0 0 0 1 0 0 0 0 0 0 0 0 21 5 3 0 0 1 2 2 0 0 1 0 0 0
819 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 100 101 18 11 0 0 145 132 26
820 14 0 0 23 28 3 3 0 0 5 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 61 45 10 4 0
821 0 16 20 7 5 1 0 3 4 5 3 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 5 ...
822
823 =head1 OPTIONS
824
825 =over 4
826
827 =item B<-a, --all>
828
829 List all the available information.
830
831 =item B<--AverageBitDensity>
832
833 List average bit density of fingerprint bit-vector strings.
834
835 =item B<--BitDensity>
836
837 List bit density of fingerprints bit-vector strings data in each row.
838
839 =item B<--count>
840
841 List number of data entries containing fingerprints bit-vector or vector strings data. This
842 is B<default behavior>.
843
844 =item B<-c, --ColMode> I<ColNum | ColLabel>
845
846 Specify how columns are identified in CSV/TSV I<TextFile(s)>: using column number or column
847 label. Possible values: I<ColNum or ColLabel>. Default value: I<ColNum>
848
849 =item B<-d, --detail> I<InfoLevel>
850
851 Level of information to print about lines being ignored. Default: I<1>. Possible values:
852 I<1, 2 or 3>.
853
854 =item B<--DataCheck>
855
856 Validate fingerprints data specified using B<--FingerprintsCol> and list information
857 about missing and invalid data.
858
859 =item B<-e, --empty>
860
861 List number of rows containing no fingerprints data.
862
863 =item B<--FingerprintsCol> I<col number | col name>
864
865 This value is B<-c, --colmode> specific. It corresponds to column in CSV/TSV I<TextFile(s)>
866 containing fingerprints data. Possible values: I<col number or col label>.
867 Default value: I<first column containing the word Fingerprints in its column label>.
868
869 =item B<--FingerprintsField> I<FieldLabel>
870
871 Fingerprints field label to use during listing of fingerprints information for I<SDFile(s)>.
872 Default value: I<first data field label containing the word Fingerprints in its label>.
873
874 =item B<--FingerprintsType>
875
876 List types of fingerprint strings: FingerprintsBitVector or FingerprintsVector.
877
878 =item B<--FingerprintsDescription>
879
880 List types of fingerprints: PathLengthBits, PathLengthCount, MACCSKeyCount,
881 ExtendedConnectivity and so on.
882
883 =item B<--FingerprintsSize>
884
885 List size of fingerprints.
886
887 =item B<--FingerprintsBitStringFormat>
888
889 List format of fingerprint bit-vector strings: BinaryString or HexadecimalString.
890
891 =item B<--FingerprintsBitOrder>
892
893 List order of bits data in fingerprint bit-vector bit strings: Ascending or Descending.
894
895 =item B<--FingerprintsVectorValuesType>
896
897 List type of values in fingerprint vector strings: OrderedNumericalValues, NumericalValues or
898 AlphaNumericalValues.
899
900 =item B<--FingerprintsVectorValuesFormat>
901
902 List format of values in fingerprint vector strings: ValuesString, IDsAndValuesString,
903 IDsAndValuesPairsString, ValuesAndIDsString or ValuesAndIDsPairsString.
904
905 =item B<-h, --help>
906
907 Print this help message.
908
909 =item B<--InDelim> I<comma | semicolon>
910
911 Input delimiter for CSV I<TextFile(s)>. Possible values: I<comma or semicolon>.
912 Default value: I<comma>. For TSV files, this option is ignored and I<tab> is used as a
913 delimiter.
914
915 =item B<--NumOfOnBits>
916
917 List number of on bits in fingerprints bit-vector strings data in each row.
918
919 =item B<--NumOfNonZeroValues>
920
921 List number of non-zero values in fingerprints vector strings data in each row.
922
923 =item B<-w, --WorkingDir> I<DirName>
924
925 Location of working directory. Default: current directory.
926
927 =back
928
929 =head1 EXAMPLES
930
931 To count number of lines containing fingerprints bit-vector or vector strings data present
932 in FP file, in a column name containing Fingerprint substring in text file, and in a data
933 field with Fingerprint substring in its label, type:
934
935 % InfoFingerprintsFiles.pl SampleFPBin.csv
936
937 % InfoFingerprintsFiles.pl SampleFPBin.sdf SampleFPBin.fpf
938 SampleFPBin.csv
939
940 % InfoFingerprintsFiles.pl SampleFPHex.sdf SampleFPHex.fpf
941 SampleFPHex.csv
942
943 % InfoFingerprintsFiles.pl SampleFPcount.sdf SampleFPcount.fpf
944 SampleFPcount.csv
945
946 To list all available information about fingerprints bit-vector or vector strings data present
947 in FP file, in a column name containing Fingerprint substring in text file, and in a data
948 field with Fingerprint substring in its label, type:
949
950 % InfoFingerprintsFiles.pl -a SampleFPHex.sdf SampleFPHex.fpf
951 SampleFPHex.csv
952
953 % InfoFingerprintsFiles.pl -a SampleFPcount.sdf SampleFPcount.fpf
954 SampleFPcount.csv
955
956 To list all available information about fingerprints bit-vector or vector strings data present in a
957 column named Fingerprints in text file, type:
958
959 % InfoFingerprintsFiles.pl -a --ColMode ColLabel --FingerprintsCol
960 Fingerprints SampleFPHex.sdf
961
962 % InfoFingerprintsFiles.pl -a --ColMode ColLabel --FingerprintsCol
963 Fingerprints SampleFPcount.csv
964
965 To list all available information about fingerprints bit-vector or vector strings data present in a
966 data field names Fingerprints in SD file, type:
967
968 % InfoFingerprintsFiles.pl -a --FingerprintsField Fingerprints
969 SampleFPHex.sdf
970
971 % InfoFingerprintsFiles.pl -a --FingerprintsField Fingerprints
972 SampleFPcount.sdf
973
974 To list bit density, average bit density, and number of on bits for fingerprints bit-vector strings data
975 present in FP file, in a column name containing Fingerprint substring in text file, and in a data
976 field with Fingerprint substring in its label, type:
977
978 % InfoFingerprintsFiles.pl --BitDensity --AverageBitDensity
979 --NumOfOnBits SampleFPBin.csv SampleFPBin.sdf SampleFPBin.fpf
980
981 To list vector values type, format and number of non-zero values for fingerprints vector strings
982 data present in FP file, in a column name containing Fingerprint substring in text file, and in a data
983 field with Fingerprint substring in its label along with fingerprints type and description, type:
984
985 % InfoFingerprintsFiles.pl --FingerprintsType --FingerprintsDescription
986 --FingerprintsVectorValuesType --FingerprintsVectorValuesFormat
987 --NumOfNonZeroValues SampleFPcount.csv SampleFPcount.sdf
988 SampleFPcount.fpf
989
990 =head1 AUTHOR
991
992 Manish Sud <msud@san.rr.com>
993
994 =head1 SEE ALSO
995
996 SimilarityMatricesFingerprints.pl, SimilaritySearchingFingerprints.pl, AtomNeighborhoodsFingerprints.pl,
997 AtomNeighborhoodsFingerprints.pl, ExtendedConnectivityFingerprints.pl, MACCSKeysFingerprints.pl,
998 PathLengthFingerprints.pl, TopologicalAtomPairsFingerprints.pl, TopologicalAtomTorsionsFingerprints.pl,
999 TopologicalPharmacophoreAtomPairsFingerprints.pl, TopologicalPharmacophoreAtomTripletsFingerprints.pl
1000
1001 =head1 COPYRIGHT
1002
1003 Copyright (C) 2015 Manish Sud. All rights reserved.
1004
1005 This file is part of MayaChemTools.
1006
1007 MayaChemTools is free software; you can redistribute it and/or modify it under
1008 the terms of the GNU Lesser General Public License as published by the Free
1009 Software Foundation; either version 3 of the License, or (at your option)
1010 any later version.
1011
1012 =cut