Mercurial > repos > deepakjadmin > mayatool3_test3
comparison mayachemtools/bin/InfoFingerprintsFiles.pl @ 0:73ae111cf86f draft
Uploaded
author | deepakjadmin |
---|---|
date | Wed, 20 Jan 2016 11:55:01 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:73ae111cf86f |
---|---|
1 #!/usr/bin/perl -w | |
2 # | |
3 # $RCSfile: InfoFingerprintsFiles.pl,v $ | |
4 # $Date: 2015/02/28 20:46:20 $ | |
5 # $Revision: 1.20 $ | |
6 # | |
7 # Author: Manish Sud <msud@san.rr.com> | |
8 # | |
9 # Copyright (C) 2015 Manish Sud. All rights reserved. | |
10 # | |
11 # This file is part of MayaChemTools. | |
12 # | |
13 # MayaChemTools is free software; you can redistribute it and/or modify it under | |
14 # the terms of the GNU Lesser General Public License as published by the Free | |
15 # Software Foundation; either version 3 of the License, or (at your option) any | |
16 # later version. | |
17 # | |
18 # MayaChemTools is distributed in the hope that it will be useful, but without | |
19 # any warranty; without even the implied warranty of merchantability of fitness | |
20 # for a particular purpose. See the GNU Lesser General Public License for more | |
21 # details. | |
22 # | |
23 # You should have received a copy of the GNU Lesser General Public License | |
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or | |
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, | |
26 # Boston, MA, 02111-1307, USA. | |
27 # | |
28 | |
29 use strict; | |
30 use FindBin; use lib "$FindBin::Bin/../lib"; | |
31 use Getopt::Long; | |
32 use File::Basename; | |
33 use Text::ParseWords; | |
34 use Benchmark; | |
35 use FileUtil; | |
36 use TextUtil; | |
37 use Fingerprints::FingerprintsFileUtil; | |
38 use Fingerprints::FingerprintsStringUtil; | |
39 | |
40 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime); | |
41 | |
42 # Autoflush STDOUT | |
43 $| = 1; | |
44 | |
45 # Starting message... | |
46 $ScriptName = basename($0); | |
47 print "\n$ScriptName: Starting...\n\n"; | |
48 $StartTime = new Benchmark; | |
49 | |
50 # Get the options and setup script... | |
51 SetupScriptUsage(); | |
52 if ($Options{help} || @ARGV < 1) { | |
53 die GetUsageFromPod("$FindBin::Bin/$ScriptName"); | |
54 } | |
55 | |
56 my(@FingerprintsFilesList); | |
57 @FingerprintsFilesList = ExpandFileNames(\@ARGV, "sdf sd fpf fp csv tsv"); | |
58 | |
59 # Process options... | |
60 print "Processing options...\n"; | |
61 my(%OptionsInfo); | |
62 ProcessOptions(); | |
63 | |
64 # Setup information about input files... | |
65 print "Checking input fingerprints file(s)...\n"; | |
66 my(%FingerprintsFilesInfo); | |
67 RetrieveFingerprintsFilesInfo(); | |
68 | |
69 # Process input files.. | |
70 my($FileIndex); | |
71 if (@FingerprintsFilesList > 1) { | |
72 print "\nProcessing fingerprints files...\n"; | |
73 } | |
74 for $FileIndex (0 .. $#FingerprintsFilesList) { | |
75 if ($FingerprintsFilesInfo{FileOkay}[$FileIndex]) { | |
76 print "\nProcessing file $FingerprintsFilesList[$FileIndex]...\n"; | |
77 ListFingerprintsFileInfo($FileIndex); | |
78 } | |
79 } | |
80 ListTotalSizeOfFiles(); | |
81 | |
82 print "\n$ScriptName:Done...\n\n"; | |
83 | |
84 $EndTime = new Benchmark; | |
85 $TotalTime = timediff ($EndTime, $StartTime); | |
86 print "Total time: ", timestr($TotalTime), "\n"; | |
87 | |
88 ############################################################################### | |
89 | |
90 # List approptiate information... | |
91 # | |
92 sub ListFingerprintsFileInfo { | |
93 my($FileIndex) = @_; | |
94 my($FileName, $FingerprintsFileIO, $InvalidFingerprintsFileData, $InvalidFingerprintsData, $DataEntryCount, $ValidDataEntryCount, $InvalidDataEntryCount, $MissingDataEntryCount, $BitVectorDataEntryCount, $VectorDataEntryCount, $FingerprintsObject, $FingerprintsType, $TotalBitDensity, $FileType, $DataEntryLabel); | |
95 | |
96 $FileType = $FingerprintsFilesInfo{FileType}[$FileIndex]; | |
97 $DataEntryLabel = ($FileType =~ /^SD$/i) ? 'compounds' : 'lines'; | |
98 | |
99 ($DataEntryCount, $ValidDataEntryCount, $InvalidDataEntryCount, $MissingDataEntryCount, $BitVectorDataEntryCount, $VectorDataEntryCount, $TotalBitDensity) = (0) x 7; | |
100 | |
101 $FingerprintsFileIO = Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO(%{$FingerprintsFilesInfo{FingerprintsFileIOParameters}[$FileIndex]}); | |
102 $FingerprintsFileIO->Open(); | |
103 | |
104 $InvalidFingerprintsFileData = $FingerprintsFileIO->IsFingerprintsFileDataValid() ? 0 : 1; | |
105 | |
106 FINGERPRINTS: while ($FingerprintsFileIO->Read()) { | |
107 $DataEntryCount++; | |
108 | |
109 # Missing data... | |
110 if ($InvalidFingerprintsFileData) { | |
111 $MissingDataEntryCount++; | |
112 if ($OptionsInfo{ValidateData} || $OptionsInfo{CountEmptyFingerprints}) { | |
113 ListEmptyOrInvalidFingerprintsDataInfo('EmptyData', $FingerprintsFileIO, $FileType); | |
114 } | |
115 next FINGERPRINTS; | |
116 } | |
117 $InvalidFingerprintsData = $FingerprintsFileIO->IsFingerprintsDataValid() ? 0 : 1; | |
118 | |
119 # Invalid data... | |
120 if ($InvalidFingerprintsData) { | |
121 $InvalidDataEntryCount++; | |
122 if ($OptionsInfo{ValidateData}) { | |
123 ListEmptyOrInvalidFingerprintsDataInfo('InvalidData', $FingerprintsFileIO, $FileType); | |
124 } | |
125 next FINGERPRINTS; | |
126 } | |
127 $ValidDataEntryCount++; | |
128 | |
129 $FingerprintsObject = $FingerprintsFileIO->GetFingerprints(); | |
130 $FingerprintsType = $FingerprintsObject->GetVectorType(); | |
131 | |
132 if ($FingerprintsType =~ /^FingerprintsBitVector$/i) { | |
133 $BitVectorDataEntryCount++; | |
134 if ($OptionsInfo{ListAverageBitDensity}) { | |
135 $TotalBitDensity += $FingerprintsObject->GetFingerprintsBitDensity(); | |
136 } | |
137 } | |
138 elsif ($FingerprintsType =~ /^FingerprintsVector$/i) { | |
139 $VectorDataEntryCount++; | |
140 } | |
141 | |
142 if ($OptionsInfo{ListFingerprintsDataEntryInfo}) { | |
143 ListFingerprintsDataEntryInfo($FingerprintsFileIO, $FileType); | |
144 } | |
145 | |
146 } | |
147 $FingerprintsFileIO->Close(); | |
148 | |
149 print "\nFingerprints file type: $FileType\n"; | |
150 if ($FileType =~ /^SD$/i) { | |
151 print "Number of compounds: $DataEntryCount\n"; | |
152 } | |
153 else { | |
154 print "Number of data lines: $DataEntryCount\n"; | |
155 } | |
156 | |
157 ListFileTypeHeaderInfo($FingerprintsFileIO, $FileType); | |
158 | |
159 print "\nNumber of $DataEntryLabel with valid fingerprints string data: $ValidDataEntryCount\n"; | |
160 print "Number of $DataEntryLabel with bit-vector fingerprints string data: $BitVectorDataEntryCount\n"; | |
161 print "Number of $DataEntryLabel with vector fingerprints string data: $VectorDataEntryCount\n"; | |
162 | |
163 if ($OptionsInfo{CountEmptyFingerprints}) { | |
164 print "Number of $DataEntryLabel with missing fingerprints data: $MissingDataEntryCount\n"; | |
165 print "Number of $DataEntryLabel with invalid fingerprints data: $InvalidDataEntryCount\n"; | |
166 } | |
167 | |
168 if ($OptionsInfo{ListAverageBitDensity} && $BitVectorDataEntryCount) { | |
169 my($AverageBitDensity); | |
170 $AverageBitDensity = $TotalBitDensity/$BitVectorDataEntryCount; | |
171 $AverageBitDensity = sprintf("%.2f", $AverageBitDensity) + 0; | |
172 print "\nAverage bit density: $AverageBitDensity\n"; | |
173 } | |
174 | |
175 | |
176 # File size and modification information... | |
177 print "\nFile size: ", FormatFileSize($FingerprintsFilesInfo{FileSize}[$FileIndex]), " \n"; | |
178 print "Last modified: ", $FingerprintsFilesInfo{FileLastModified}[$FileIndex], " \n"; | |
179 } | |
180 | |
181 # List empty or invalid fingerprints file data information... | |
182 # | |
183 sub ListEmptyOrInvalidFingerprintsDataInfo { | |
184 my($Mode, $FingerprintsFileIO, $FileType) = @_; | |
185 my($ModeInfo); | |
186 | |
187 $ModeInfo = ($Mode =~ /^EmptyData$/i) ? "no" : "invalid"; | |
188 | |
189 if ($FileType =~ /^SD$/i) { | |
190 my($CmpdNum, $CmpdString); | |
191 | |
192 $CmpdNum = $FingerprintsFileIO->GetCompoundNum(); | |
193 if ($OptionsInfo{DetailLevel} >= 3 ) { | |
194 $CmpdString = $FingerprintsFileIO->GetCompoundString(); | |
195 print "Compound number $CmpdNum contains $ModeInfo fingerprints data: $CmpdString \n"; | |
196 } | |
197 elsif ($OptionsInfo{DetailLevel} >= 1 ) { | |
198 print "Compound number $CmpdNum contains $ModeInfo fingerprints data...\n"; | |
199 } | |
200 } | |
201 else { | |
202 my($LineNum, $DataLine); | |
203 | |
204 $LineNum = $FingerprintsFileIO->GetLineNum(); | |
205 if ($OptionsInfo{DetailLevel} >= 3 ) { | |
206 $DataLine = $FingerprintsFileIO->GetDataLine(); | |
207 print "Data line number $LineNum contains $ModeInfo fingerprints data: $DataLine \n"; | |
208 } | |
209 elsif ($OptionsInfo{DetailLevel} >= 1 ) { | |
210 print "Data line number $LineNum contains $ModeInfo fingerprints data...\n"; | |
211 } | |
212 } | |
213 } | |
214 | |
215 # List detailed information about fingerprints data entry... | |
216 # | |
217 sub ListFingerprintsDataEntryInfo { | |
218 my($FingerprintsFileIO, $FileType) = @_; | |
219 my($FingerprintsObject, $FingerprintsString, $FingerprintsType, $FingerprintsDescription, $FingerprintsSize, $FingerprintsBitStringFormat, $FingerprintsBitOrder, $BitDensity, $NumOfOnBits, $FingerprintsVectorValuesType, $FingerprintsVectorValuesFormat, $NumOfNonZeroValues); | |
220 | |
221 $FingerprintsObject = $FingerprintsFileIO->GetFingerprints(); | |
222 $FingerprintsString = $FingerprintsFileIO->GetFingerprintsString(); | |
223 | |
224 $FingerprintsType = $FingerprintsObject->GetVectorType(); | |
225 | |
226 if ($FingerprintsType =~ /^FingerprintsBitVector$/i) { | |
227 $BitDensity = ''; | |
228 $NumOfOnBits = ''; | |
229 | |
230 ($FingerprintsType, $FingerprintsDescription, $FingerprintsSize, $FingerprintsBitStringFormat, $FingerprintsBitOrder) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringValues($FingerprintsString); | |
231 | |
232 if ($OptionsInfo{ListBitDensity} || $OptionsInfo{ListNumOfOnBits}) { | |
233 if ($OptionsInfo{ListBitDensity}) { | |
234 $BitDensity = $FingerprintsObject->GetFingerprintsBitDensity(); | |
235 } | |
236 if ($OptionsInfo{ListNumOfOnBits}) { | |
237 $NumOfOnBits = $FingerprintsObject->GetNumOfSetBits(); | |
238 } | |
239 } | |
240 } | |
241 elsif ($FingerprintsType =~ /^FingerprintsVector$/i) { | |
242 $NumOfNonZeroValues = ''; | |
243 | |
244 ($FingerprintsType, $FingerprintsDescription, $FingerprintsSize, $FingerprintsVectorValuesType, $FingerprintsVectorValuesFormat) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringValues($FingerprintsString); | |
245 | |
246 if ($OptionsInfo{ListNumOfNonZeroValues}) { | |
247 if ($FingerprintsVectorValuesType =~ /^AlphaNumericalValues$/i) { | |
248 $NumOfNonZeroValues = 'NA'; | |
249 } | |
250 else { | |
251 $NumOfNonZeroValues = $FingerprintsObject->GetNumOfNonZeroValues(); | |
252 } | |
253 } | |
254 } | |
255 | |
256 if ($FileType =~ /^SD$/i) { | |
257 print "Compound number: " . $FingerprintsFileIO->GetCompoundNum(); | |
258 } | |
259 else { | |
260 print "Data line number: " . $FingerprintsFileIO->GetLineNum(); | |
261 } | |
262 | |
263 if ($OptionsInfo{ListFingerprintsType}) { | |
264 print "; FPType: $FingerprintsType"; | |
265 } | |
266 if ($OptionsInfo{ListFingerprintsDescription}) { | |
267 print "; FPDescription: $FingerprintsDescription"; | |
268 } | |
269 if ($OptionsInfo{ListFingerprintsSize}) { | |
270 print "; FPSize: $FingerprintsSize"; | |
271 } | |
272 | |
273 if ($FingerprintsType =~ /^FingerprintsBitVector$/i) { | |
274 if ($OptionsInfo{ListFingerprintsBitStringFormat}) { | |
275 print "; FPBitStringFormat: $FingerprintsBitStringFormat"; | |
276 } | |
277 if ($OptionsInfo{ListFingerprintsBitOrder}) { | |
278 print "; FPBitOrder: $FingerprintsBitOrder"; | |
279 } | |
280 if ($OptionsInfo{ListBitDensity}) { | |
281 print "; BitDensity: $BitDensity"; | |
282 } | |
283 if ($OptionsInfo{ListNumOfOnBits}) { | |
284 print "; NumOfOnBits: $NumOfOnBits"; | |
285 } | |
286 } | |
287 elsif ($FingerprintsType =~ /^FingerprintsVector$/i) { | |
288 if ($OptionsInfo{ListFingerprintsVectorValuesType}) { | |
289 print "; FPVectorValuesType: $FingerprintsVectorValuesType"; | |
290 } | |
291 if ($OptionsInfo{ListFingerprintsVectorValuesFormat}) { | |
292 print "; FPVectorValuesFormat: $FingerprintsVectorValuesFormat"; | |
293 } | |
294 if ($OptionsInfo{ListNumOfNonZeroValues}) { | |
295 print "; NumOfNonZeroValues: $NumOfNonZeroValues"; | |
296 } | |
297 } | |
298 print "\n"; | |
299 } | |
300 | |
301 # List file type header information... | |
302 # | |
303 sub ListFileTypeHeaderInfo { | |
304 my($FingerprintsFileIO, $FileType) = @_; | |
305 my($Key, $Value, @DataColLabels, %HeaderDataKeysAndValues); | |
306 | |
307 if ($FileType =~ /^Text$/i) { | |
308 @DataColLabels = $FingerprintsFileIO->GetDataColLabels(); | |
309 print "Number of columns: " . scalar @DataColLabels . "\n"; | |
310 print "Column labels: ", JoinWords(\@DataColLabels, ", ", 1), "\n"; | |
311 } | |
312 elsif ($FileType =~ /^FP$/i) { | |
313 %HeaderDataKeysAndValues = $FingerprintsFileIO->GetHeaderDataKeysAndValues(); | |
314 | |
315 print "\nFP file header data keys and values: \n#\n"; | |
316 for $Key ($FingerprintsFileIO->GetHeaderDataKeys()) { | |
317 $Value = $HeaderDataKeysAndValues{$Key}; | |
318 print "# $Key = $Value\n"; | |
319 } | |
320 print "#\n"; | |
321 } | |
322 } | |
323 | |
324 # Total size of all the fiels... | |
325 sub ListTotalSizeOfFiles { | |
326 my($FileOkayCount, $TotalSize, $Index); | |
327 | |
328 $FileOkayCount = 0; | |
329 $TotalSize = 0; | |
330 | |
331 for $Index (0 .. $#FingerprintsFilesList) { | |
332 if ($FingerprintsFilesList[$Index]) { | |
333 $FileOkayCount++; | |
334 $TotalSize += $FingerprintsFilesInfo{FileSize}[$Index]; | |
335 } | |
336 } | |
337 if ($FileOkayCount > 1) { | |
338 print "\nTotal size of $FileOkayCount files: ", FormatFileSize($TotalSize), "\n"; | |
339 } | |
340 } | |
341 | |
342 # Retrieve information about fingerprints files... | |
343 # | |
344 sub RetrieveFingerprintsFilesInfo { | |
345 my($FingerprintsFile, $Index, $FileDir, $FileExt, $FileName, $FileType, $InDelim, $ModifiedTimeString, $ModifiedDateString, %FingerprintsFileIOParameters); | |
346 | |
347 %FingerprintsFilesInfo = (); | |
348 @{$FingerprintsFilesInfo{FileOkay}} = (); | |
349 @{$FingerprintsFilesInfo{FileType}} = (); | |
350 @{$FingerprintsFilesInfo{FileSize}} = (); | |
351 @{$FingerprintsFilesInfo{FileLastModified}} = (); | |
352 @{$FingerprintsFilesInfo{InDelim}} = (); | |
353 | |
354 @{$FingerprintsFilesInfo{FingerprintsFileIOParameters}} = (); | |
355 | |
356 FILELIST: for $Index (0 .. $#FingerprintsFilesList) { | |
357 $FingerprintsFile = $FingerprintsFilesList[$Index]; | |
358 | |
359 $FingerprintsFilesInfo{FileOkay}[$Index] = 0; | |
360 $FingerprintsFilesInfo{FileType}[$Index] = ''; | |
361 $FingerprintsFilesInfo{FileSize}[$Index] = 0; | |
362 $FingerprintsFilesInfo{FileLastModified}[$Index] = ''; | |
363 $FingerprintsFilesInfo{InDelim}[$Index] = ""; | |
364 | |
365 %{$FingerprintsFilesInfo{FingerprintsFileIOParameters}[$Index]} = (); | |
366 | |
367 $FingerprintsFile = $FingerprintsFilesList[$Index]; | |
368 if (!(-e $FingerprintsFile)) { | |
369 warn "Warning: Ignoring file $FingerprintsFile: It doesn't exist\n"; | |
370 next FILELIST; | |
371 } | |
372 | |
373 $FileType = Fingerprints::FingerprintsFileUtil::GetFingerprintsFileType($FingerprintsFile); | |
374 if (IsEmpty($FileType)) { | |
375 warn "Warning: Ignoring file $FingerprintsFile: It's not a fingerprints file\n"; | |
376 next FILELIST; | |
377 } | |
378 | |
379 $FileDir = ""; $FileName = ""; $FileExt = ""; | |
380 ($FileDir, $FileName, $FileExt) = ParseFileName($FingerprintsFile); | |
381 | |
382 $InDelim = ($FileExt =~ /^tsv$/i) ? 'Tab' : $OptionsInfo{InDelim}; | |
383 | |
384 # Setup FingerprintsFileIO parameters... | |
385 %FingerprintsFileIOParameters = (); | |
386 FILEIOPARAMETERS: { | |
387 if ($FileType =~ /^SD$/i) { | |
388 %FingerprintsFileIOParameters = ('Name' => $FingerprintsFile, 'Mode' => 'Read', 'ValidateData' => $OptionsInfo{ValidateData}, 'DetailLevel' => 1, 'FingerprintsFieldLabel' => $OptionsInfo{FingerprintsFieldLabel}); | |
389 last FILEIOPARAMETERS; | |
390 } | |
391 if ($FileType =~ /^FP$/i) { | |
392 %FingerprintsFileIOParameters = ('Name' => $FingerprintsFile, 'Mode' => 'Read', 'ValidateData' => $OptionsInfo{ValidateData}, 'DetailLevel' => 1); | |
393 last FILEIOPARAMETERS; | |
394 } | |
395 if ($FileType =~ /^Text$/i) { | |
396 %FingerprintsFileIOParameters = ('Name' => $FingerprintsFile, 'Mode' => 'Read', 'ValidateData' => $OptionsInfo{ValidateData}, 'DetailLevel' => 1, 'FingerprintsCol' => $OptionsInfo{FingerprintsCol}, 'ColMode' => $OptionsInfo{ColMode}, 'InDelim' => $OptionsInfo{InDelim}); | |
397 last FILEIOPARAMETERS; | |
398 } | |
399 warn "Warning: File type for fingerprints file, $FingerprintsFile, is not valid. Supported file types: SD, FP or Text\n"; | |
400 next FILELIST; | |
401 } | |
402 | |
403 $FingerprintsFilesInfo{FileOkay}[$Index] = 1; | |
404 $FingerprintsFilesInfo{FileType}[$Index] = $FileType; | |
405 | |
406 $FingerprintsFilesInfo{FileSize}[$Index] = FileSize($FingerprintsFile); | |
407 ($ModifiedTimeString, $ModifiedDateString) = FormattedFileModificationTimeAndDate($FingerprintsFile); | |
408 $FingerprintsFilesInfo{FileLastModified}[$Index] = "$ModifiedTimeString; $ModifiedDateString"; | |
409 | |
410 $FingerprintsFilesInfo{InDelim}[$Index] = $InDelim; | |
411 | |
412 %{$FingerprintsFilesInfo{FingerprintsFileIOParameters}[$Index]} = %FingerprintsFileIOParameters; | |
413 } | |
414 } | |
415 | |
416 # Process option values... | |
417 sub ProcessOptions { | |
418 %OptionsInfo = (); | |
419 | |
420 $OptionsInfo{ListAverageBitDensity} = ($Options{all} || $Options{averagebitdensity}) ? 1 :0; | |
421 $OptionsInfo{ListBitDensity} = ($Options{all} || $Options{bitdensity}) ? 1 :0; | |
422 | |
423 if ($OptionsInfo{ListAverageBitDensity}) { | |
424 # List bit density as well... | |
425 $OptionsInfo{ListBitDensity} = 1; | |
426 } | |
427 | |
428 # By default, count number of rows containing fingerprints data... | |
429 $OptionsInfo{CountFingerprints} = 1; | |
430 $OptionsInfo{CountEmptyFingerprints} = ($Options{all} || $Options{empty}) ? 1 :0; | |
431 | |
432 $OptionsInfo{ColMode} = $Options{colmode}; | |
433 if (IsNotEmpty($Options{fingerprintscol})) { | |
434 if ($Options{colmode} =~ /^ColNum$/i) { | |
435 if (!IsPositiveInteger($Options{fingerprintscol})) { | |
436 die "Error: Column value, $Options{fingerprintscol}, specified using \"--FingerprintsCol\" is not valid: Allowed integer values: > 0.\n"; | |
437 } | |
438 } | |
439 $OptionsInfo{FingerprintsCol} = $Options{fingerprintscol}; | |
440 } | |
441 else { | |
442 $OptionsInfo{FingerprintsCol} = 'AutoDetect'; | |
443 } | |
444 | |
445 if (IsNotEmpty($Options{fingerprintsfield})) { | |
446 $OptionsInfo{FingerprintsFieldLabel} = $Options{fingerprintsfield}; | |
447 } | |
448 else { | |
449 $OptionsInfo{FingerprintsFieldLabel} = 'AutoDetect'; | |
450 } | |
451 | |
452 $OptionsInfo{ValidateData} = ($Options{all} || $Options{datacheck}) ? 1 :0; | |
453 $OptionsInfo{DetailLevel} = $Options{detail}; | |
454 | |
455 $OptionsInfo{ListFingerprintsType} = ($Options{all} || $Options{fingerprintstype}) ? 1 :0; | |
456 $OptionsInfo{ListFingerprintsDescription} = ($Options{all} || $Options{fingerprintsdescription}) ? 1 :0; | |
457 $OptionsInfo{ListFingerprintsSize} = ($Options{all} || $Options{fingerprintssize}) ? 1 :0; | |
458 | |
459 $OptionsInfo{ListFingerprintsBitStringFormat} = ($Options{all} || $Options{fingerprintsbitstringformat}) ? 1 :0; | |
460 $OptionsInfo{ListFingerprintsBitOrder} = ($Options{all} || $Options{fingerprintsbitorder}) ? 1 :0; | |
461 | |
462 $OptionsInfo{ListFingerprintsVectorValuesType} = ($Options{all} || $Options{fingerprintsvectorvaluestype}) ? 1 :0; | |
463 $OptionsInfo{ListFingerprintsVectorValuesFormat} = ($Options{all} || $Options{fingerprintsvectorvaluesformat}) ? 1 :0; | |
464 | |
465 $OptionsInfo{InDelim} = $Options{indelim}; | |
466 | |
467 $OptionsInfo{ListNumOfOnBits} = ($Options{all} || $Options{numofonbits}) ? 1 :0; | |
468 $OptionsInfo{ListNumOfNonZeroValues} = ($Options{all} || $Options{numofnonzerovalues}) ? 1 :0; | |
469 | |
470 $OptionsInfo{ListFingerprintsDataEntryInfo} = ($OptionsInfo{ListFingerprintsType} || $OptionsInfo{ListFingerprintsDescription} || $OptionsInfo{ListFingerprintsSize} || $OptionsInfo{ListFingerprintsBitStringFormat} || $OptionsInfo{ListFingerprintsBitOrder} || $OptionsInfo{ListFingerprintsVectorValuesType} || $OptionsInfo{ListFingerprintsVectorValuesFormat} || $OptionsInfo{ListBitDensity} || $OptionsInfo{ListAverageBitDensity} || $OptionsInfo{ListNumOfOnBits} || $OptionsInfo{ListNumOfNonZeroValues}) ? 1 : 0; | |
471 | |
472 } | |
473 | |
474 # Setup script usage and retrieve command line arguments specified using various options... | |
475 sub SetupScriptUsage { | |
476 | |
477 # Retrieve all the options... | |
478 %Options = (); | |
479 | |
480 $Options{colmode} = 'colnum'; | |
481 $Options{detail} = 1; | |
482 $Options{indelim} = 'comma'; | |
483 | |
484 if (!GetOptions(\%Options, "all|a", "averagebitdensity", "bitdensity", "count", "colmode|c=s", "detail|d=i", "datacheck", "empty|e", "fingerprintsfield=s", "fingerprintscol=s", "fingerprintstype", "fingerprintsdescription", "fingerprintssize", "fingerprintsbitstringformat", "fingerprintsbitorder", "fingerprintsvectorvaluestype", "fingerprintsvectorvaluesformat", "help|h", "indelim=s", "numofonbits", "numofnonzerovalues", "workingdir|w=s")) { | |
485 die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n"; | |
486 } | |
487 if ($Options{workingdir}) { | |
488 if (! -d $Options{workingdir}) { | |
489 die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n"; | |
490 } | |
491 chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n"; | |
492 } | |
493 if ($Options{colmode} !~ /^(ColNum|ColLabel)$/i) { | |
494 die "Error: The value specified, $Options{colmode}, for option \"-c, --ColMode\" is not valid. Allowed values: ColNum, or ColLabel\n"; | |
495 } | |
496 if (!IsPositiveInteger($Options{detail})) { | |
497 die "Error: The value specified, $Options{detail}, for option \"-d, --detail\" is not valid. Allowed values: > 0 \n"; | |
498 } | |
499 if ($Options{indelim} !~ /^(comma|semicolon)$/i) { | |
500 die "Error: The value specified, $Options{indelim}, for option \"--InDelim\" is not valid. Allowed values: comma, or semicolon\n"; | |
501 } | |
502 } | |
503 | |
504 __END__ | |
505 | |
506 =head1 NAME | |
507 | |
508 InfoFingerprintsFiles.pl - List information about fingerprints data in SD, FP and CSV/TSV text file(s) | |
509 | |
510 =head1 SYNOPSIS | |
511 | |
512 InfoFingerprintsFiles.pl SDFile(s) FPFile(s) TextFile(s)... | |
513 | |
514 InfoFingerprintsFiles.pl [B<-a, --all>] [B<--AverageBitDensity>] [B<--BitDensity>] | |
515 [B<-c, --count>] [B<-c, --ColMode> I<ColNum | ColLabel>] [B<--DataCheck>] | |
516 [B<-d, --detail> I<InfoLevel>] [B<-e, --empty>] [B<--FingerprintsCol> I<col number | col name>] | |
517 [B<--FingerprintsField> I<FieldLabel>] [B<--FingerprintsType>] [B<--FingerprintsDescription>] | |
518 [B<--FingerprintsSize>] [B<--FingerprintsBitStringFormat>] [B<--FingerprintsBitOrder>] | |
519 [B<--FingerprintsVectorValuesType>] [B<--FingerprintsVectorValuesFormat>] | |
520 [B<-h, --help>] [B<--InDelim> I<comma | semicolon>] | |
521 [B<--NumOfOnBits>] [B<--NumOfNonZeroValues>] | |
522 [B<-w, --WorkingDir> dirname] SDFile(s) FPFile(s) TextFile(s)... | |
523 | |
524 =head1 DESCRIPTION | |
525 | |
526 List information about fingerprints data in I<SD, FP and CSV/TSV> text file(s): number of | |
527 rows containing fingerprints data, type of fingerprints vector, description and size of fingerprints, | |
528 bit density and average bit density for bit-vector fingerprints strings, and so on. | |
529 | |
530 The scripts InfoFingerprintsSDFiles.pl and InfoFingerprintsTextFiles.pl have been removed from the | |
531 current release of MayaChemTools and their functionality merged with this script. | |
532 | |
533 The valid I<SDFile> extensions are I<.sdf> and I<.sd>. All SD files in a current directory | |
534 can be specified either by I<*.sdf> or the current directory name. | |
535 | |
536 The valid I<FPFile> extensions are I<.fpf> and I<.fp>. All FP files in a current directory | |
537 can be specified either by I<*.fpf> or the current directory name. | |
538 | |
539 The valid I<TextFile> extensions are I<.csv> and I<.tsv> for comma/semicolon and tab | |
540 delimited text files respectively. All other file names are ignored. All text files in a | |
541 current directory can be specified by I<*.csv>, I<*.tsv>, or the current directory | |
542 name. The B<--indelim> option determines the format of I<TextFile(s)>. Any file | |
543 which doesn't correspond to the format indicated by B<--indelim> option is ignored. | |
544 | |
545 Format of fingerprint strings data in I<SDFile(s), FPFile(s) and TextFile(s)> is automatically | |
546 detected. | |
547 | |
548 Example of I<FP> file containing fingerprints bit-vector string data: | |
549 | |
550 # | |
551 # Package = MayaChemTools 7.4 | |
552 # ReleaseDate = Oct 21, 2010 | |
553 # | |
554 # TimeStamp = Mon Mar 7 15:14:01 2011 | |
555 # | |
556 # FingerprintsStringType = FingerprintsBitVector | |
557 # | |
558 # Description = PathLengthBits:AtomicInvariantsAtomTypes:MinLength1:... | |
559 # Size = 1024 | |
560 # BitStringFormat = HexadecimalString | |
561 # BitsOrder = Ascending | |
562 # | |
563 Cmpd1 9c8460989ec8a49913991a6603130b0a19e8051c89184414953800cc21510... | |
564 Cmpd2 000000249400840040100042011001001980410c000000001010088001120... | |
565 ... ... | |
566 ... .. | |
567 | |
568 Example of I<FP> file containing fingerprints vector string data: | |
569 | |
570 # | |
571 # Package = MayaChemTools 7.4 | |
572 # ReleaseDate = Oct 21, 2010 | |
573 # | |
574 # TimeStamp = Mon Mar 7 15:14:01 2011 | |
575 # | |
576 # FingerprintsStringType = FingerprintsVector | |
577 # | |
578 # Description = PathLengthBits:AtomicInvariantsAtomTypes:MinLength1:... | |
579 # VectorStringFormat = IDsAndValuesString | |
580 # VectorValuesType = NumericalValues | |
581 # | |
582 Cmpd1 338;C F N O C:C C:N C=O CC CF CN CO C:C:C C:C:N C:CC C:CF C:CN C: | |
583 N:C C:NC CC:N CC=O CCC CCN CCO CNC NC=O O=CO C:C:C:C C:C:C:N C:C:CC...; | |
584 33 1 2 5 21 2 2 12 1 3 3 20 2 10 2 2 1 2 2 2 8 2 5 1 1 1 19 2 8 2 2 2 2 | |
585 6 2 2 2 2 2 2 2 2 3 2 2 1 4 1 5 1 1 18 6 2 2 1 2 10 2 1 2 1 2 2 2 2 ... | |
586 Cmpd2 103;C N O C=N C=O CC CN CO CC=O CCC CCN CCO CNC N=CN NC=O NCN O=C | |
587 O C CC=O CCCC CCCN CCCO CCNC CNC=N CNC=O CNCN CCCC=O CCCCC CCCCN CC...; | |
588 15 4 4 1 2 13 5 2 2 15 5 3 2 2 1 1 1 2 17 7 6 5 1 1 1 2 15 8 5 7 2 2 2 2 | |
589 1 2 1 1 3 15 7 6 8 3 4 4 3 2 2 1 2 3 14 2 4 7 4 4 4 4 1 1 1 2 1 1 1 ... | |
590 ... ... | |
591 ... ... | |
592 | |
593 Example of I<SD> file containing fingerprints bit-vector string data: | |
594 | |
595 ... ... | |
596 ... ... | |
597 $$$$ | |
598 ... ... | |
599 ... ... | |
600 ... ... | |
601 41 44 0 0 0 0 0 0 0 0999 V2000 | |
602 -3.3652 1.4499 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 | |
603 ... ... | |
604 2 3 1 0 0 0 0 | |
605 ... ... | |
606 M END | |
607 > <CmpdID> | |
608 Cmpd1 | |
609 | |
610 > <PathLengthFingerprints> | |
611 FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes:MinLengt | |
612 h1:MaxLength8;1024;HexadecimalString;Ascending;9c8460989ec8a49913991a66 | |
613 03130b0a19e8051c89184414953800cc2151082844a201042800130860308e8204d4028 | |
614 00831048940e44281c00060449a5000ac80c894114e006321264401600846c050164462 | |
615 08190410805000304a10205b0100e04c0038ba0fad0209c0ca8b1200012268b61c0026a | |
616 aa0660a11014a011d46 | |
617 | |
618 $$$$ | |
619 ... ... | |
620 ... ... | |
621 | |
622 Example of CSV I<Text> file containing fingerprints bit-vector string data: | |
623 | |
624 "CompoundID","PathLengthFingerprints" | |
625 "Cmpd1","FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes | |
626 :MinLength1:MaxLength8;1024;HexadecimalString;Ascending;9c8460989ec8a4 | |
627 9913991a6603130b0a19e8051c89184414953800cc2151082844a20104280013086030 | |
628 8e8204d402800831048940e44281c00060449a5000ac80c894114e006321264401..." | |
629 ... ... | |
630 ... ... | |
631 | |
632 The current release of MayaChemTools supports the following types of fingerprint | |
633 bit-vector and vector strings: | |
634 | |
635 FingerprintsVector;AtomNeighborhoods:AtomicInvariantsAtomTypes:MinRadi | |
636 us0:MaxRadius2;41;AlphaNumericalValues;ValuesString;NR0-C.X1.BO1.H3-AT | |
637 C1:NR1-C.X3.BO3.H1-ATC1:NR2-C.X1.BO1.H3-ATC1:NR2-C.X3.BO4-ATC1 NR0-C.X | |
638 1.BO1.H3-ATC1:NR1-C.X3.BO3.H1-ATC1:NR2-C.X1.BO1.H3-ATC1:NR2-C.X3.BO4-A | |
639 TC1 NR0-C.X2.BO2.H2-ATC1:NR1-C.X2.BO2.H2-ATC1:NR1-C.X3.BO3.H1-ATC1:NR2 | |
640 -C.X2.BO2.H2-ATC1:NR2-N.X3.BO3-ATC1:NR2-O.X1.BO1.H1-ATC1 NR0-C.X2.B... | |
641 | |
642 FingerprintsVector;AtomTypesCount:AtomicInvariantsAtomTypes:ArbitraryS | |
643 ize;10;NumericalValues;IDsAndValuesString;C.X1.BO1.H3 C.X2.BO2.H2 C.X2 | |
644 .BO3.H1 C.X3.BO3.H1 C.X3.BO4 F.X1.BO1 N.X2.BO2.H1 N.X3.BO3 O.X1.BO1.H1 | |
645 O.X1.BO2;2 4 14 3 10 1 1 1 3 2 | |
646 | |
647 FingerprintsVector;AtomTypesCount:SLogPAtomTypes:ArbitrarySize;16;Nume | |
648 ricalValues;IDsAndValuesString;C1 C10 C11 C14 C18 C20 C21 C22 C5 CS F | |
649 N11 N4 O10 O2 O9;5 1 1 1 14 4 2 1 2 2 1 1 1 1 3 1 | |
650 | |
651 FingerprintsVector;AtomTypesCount:SLogPAtomTypes:FixedSize;67;OrderedN | |
652 umericalValues;IDsAndValuesString;C1 C2 C3 C4 C5 C6 C7 C8 C9 C10 C11 C | |
653 12 C13 C14 C15 C16 C17 C18 C19 C20 C21 C22 C23 C24 C25 C26 C27 CS N1 N | |
654 2 N3 N4 N5 N6 N7 N8 N9 N10 N11 N12 N13 N14 NS O1 O2 O3 O4 O5 O6 O7 O8 | |
655 O9 O10 O11 O12 OS F Cl Br I Hal P S1 S2 S3 Me1 Me2;5 0 0 0 2 0 0 0 0 1 | |
656 1 0 0 1 0 0 0 14 0 4 2 1 0 0 0 0 0 2 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0... | |
657 | |
658 FingerprintsVector;EStateIndicies:ArbitrarySize;11;NumericalValues;IDs | |
659 AndValuesString;SaaCH SaasC SaasN SdO SdssC SsCH3 SsF SsOH SssCH2 SssN | |
660 H SsssCH;24.778 4.387 1.993 25.023 -1.435 3.975 14.006 29.759 -0.073 3 | |
661 .024 -2.270 | |
662 | |
663 FingerprintsVector;EStateIndicies:FixedSize;87;OrderedNumericalValues; | |
664 ValuesString;0 0 0 0 0 0 0 3.975 0 -0.073 0 0 24.778 -2.270 0 0 -1.435 | |
665 4.387 0 0 0 0 0 0 3.024 0 0 0 0 0 0 0 1.993 0 29.759 25.023 0 0 0 0 1 | |
666 4.006 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | |
667 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | |
668 | |
669 FingerprintsVector;ExtendedConnectivity:AtomicInvariantsAtomTypes:Radi | |
670 us2;60;AlphaNumericalValues;ValuesString;73555770 333564680 352413391 | |
671 666191900 1001270906 1371674323 1481469939 1977749791 2006158649 21414 | |
672 08799 49532520 64643108 79385615 96062769 273726379 564565671 85514103 | |
673 5 906706094 988546669 1018231313 1032696425 1197507444 1331250018 1338 | |
674 532734 1455473691 1607485225 1609687129 1631614296 1670251330 17303... | |
675 | |
676 FingerprintsVector;ExtendedConnectivityCount:AtomicInvariantsAtomTypes | |
677 :Radius2;60;NumericalValues;IDsAndValuesString;73555770 333564680 3524 | |
678 13391 666191900 1001270906 1371674323 1481469939 1977749791 2006158649 | |
679 2141408799 49532520 64643108 79385615 96062769 273726379 564565671...; | |
680 3 2 1 1 14 1 2 10 4 3 1 1 1 1 2 1 2 1 1 1 2 3 1 1 2 1 3 3 8 2 2 2 6 2 | |
681 1 2 1 1 2 1 1 1 2 1 1 2 1 2 1 1 1 1 1 1 1 1 1 2 1 1 | |
682 | |
683 FingerprintsBitVector;ExtendedConnectivityBits:AtomicInvariantsAtomTyp | |
684 es:Radius2;1024;BinaryString;Ascending;0000000000000000000000000000100 | |
685 0000000001010000000110000011000000000000100000000000000000000000100001 | |
686 1000000110000000000000000000000000010011000000000000000000000000010000 | |
687 0000000000000000000000000010000000000000000001000000000000000000000000 | |
688 0000000000010000100001000000000000101000000000000000100000000000000... | |
689 | |
690 FingerprintsVector;ExtendedConnectivity:FunctionalClassAtomTypes:Radiu | |
691 s2;57;AlphaNumericalValues;ValuesString;24769214 508787397 850393286 8 | |
692 62102353 981185303 1231636850 1649386610 1941540674 263599683 32920567 | |
693 1 571109041 639579325 683993318 723853089 810600886 885767127 90326012 | |
694 7 958841485 981022393 1126908698 1152248391 1317567065 1421489994 1455 | |
695 632544 1557272891 1826413669 1983319256 2015750777 2029559552 20404... | |
696 | |
697 FingerprintsVector;ExtendedConnectivity:EStateAtomTypes:Radius2;62;Alp | |
698 haNumericalValues;ValuesString;25189973 528584866 662581668 671034184 | |
699 926543080 1347067490 1738510057 1759600920 2034425745 2097234755 21450 | |
700 44754 96779665 180364292 341712110 345278822 386540408 387387308 50430 | |
701 1706 617094135 771528807 957666640 997798220 1158349170 1291258082 134 | |
702 1138533 1395329837 1420277211 1479584608 1486476397 1487556246 1566... | |
703 | |
704 FingerprintsBitVector;MACCSKeyBits;166;BinaryString;Ascending;00000000 | |
705 0000000000000000000000000000000001001000010010000000010010000000011100 | |
706 0100101010111100011011000100110110000011011110100110111111111111011111 | |
707 11111111111110111000 | |
708 | |
709 FingerprintsBitVector;MACCSKeyBits;322;BinaryString;Ascending;11101011 | |
710 1110011111100101111111000111101100110000000000000011100010000000000000 | |
711 0000000000000000000000000000000000000000000000101000000000000000000000 | |
712 0000000000000000000000000000000000000000000000000000000000000000000000 | |
713 0000000000000000000000000000000000000011000000000000000000000000000000 | |
714 0000000000000000000000000000000000000000 | |
715 | |
716 FingerprintsVector;MACCSKeyCount;166;OrderedNumericalValues;ValuesStri | |
717 ng;0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | |
718 0 0 0 0 0 0 0 1 0 0 3 0 0 0 0 4 0 0 2 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 0 | |
719 0 0 0 0 1 1 8 0 0 0 1 0 0 1 0 1 0 1 0 3 1 3 1 0 0 0 1 2 0 11 1 0 0 0 | |
720 5 0 0 1 2 0 1 1 0 0 0 0 0 1 1 0 1 1 1 1 0 4 0 0 1 1 0 4 6 1 1 1 2 1 1 | |
721 3 5 2 2 0 5 3 5 1 1 2 5 1 2 1 2 4 8 3 5 5 2 2 0 3 5 4 1 | |
722 | |
723 FingerprintsVector;MACCSKeyCount;322;OrderedNumericalValues;ValuesStri | |
724 ng;14 8 2 0 2 0 4 4 2 1 4 0 0 2 5 10 5 2 1 0 0 2 0 5 13 3 28 5 5 3 0 0 | |
725 0 4 2 1 1 0 1 1 0 0 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 22 5 3 0 0 0 1 0 | |
726 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | |
727 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11 0 2 0 0 0 0 0 0 0 0 0 | |
728 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... | |
729 | |
730 FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes:MinLeng | |
731 th1:MaxLength8;1024;BinaryString;Ascending;001000010011010101011000110 | |
732 0100010101011000101001011100110001000010001001101000001001001001001000 | |
733 0010110100000111001001000001001010100100100000000011000000101001011100 | |
734 0010000001000101010100000100111100110111011011011000000010110111001101 | |
735 0101100011000000010001000011000010100011101100001000001000100000000... | |
736 | |
737 FingerprintsVector;PathLengthCount:AtomicInvariantsAtomTypes:MinLength | |
738 1:MaxLength8;432;NumericalValues;IDsAndValuesPairsString;C.X1.BO1.H3 2 | |
739 C.X2.BO2.H2 4 C.X2.BO3.H1 14 C.X3.BO3.H1 3 C.X3.BO4 10 F.X1.BO1 1 N.X | |
740 2.BO2.H1 1 N.X3.BO3 1 O.X1.BO1.H1 3 O.X1.BO2 2 C.X1.BO1.H3C.X3.BO3.H1 | |
741 2 C.X2.BO2.H2C.X2.BO2.H2 1 C.X2.BO2.H2C.X3.BO3.H1 4 C.X2.BO2.H2C.X3.BO | |
742 4 1 C.X2.BO2.H2N.X3.BO3 1 C.X2.BO3.H1:C.X2.BO3.H1 10 C.X2.BO3.H1:C.... | |
743 | |
744 FingerprintsVector;PathLengthCount:MMFF94AtomTypes:MinLength1:MaxLengt | |
745 h8;463;NumericalValues;IDsAndValuesPairsString;C5A 2 C5B 2 C=ON 1 CB 1 | |
746 8 COO 1 CR 9 F 1 N5 1 NC=O 1 O=CN 1 O=CO 1 OC=O 1 OR 2 C5A:C5B 2 C5A:N | |
747 5 2 C5ACB 1 C5ACR 1 C5B:C5B 1 C5BC=ON 1 C5BCB 1 C=ON=O=CN 1 C=ONNC=O 1 | |
748 CB:CB 18 CBF 1 CBNC=O 1 COO=O=CO 1 COOCR 1 COOOC=O 1 CRCR 7 CRN5 1 CR | |
749 OR 2 C5A:C5B:C5B 2 C5A:C5BC=ON 1 C5A:C5BCB 1 C5A:N5:C5A 1 C5A:N5CR ... | |
750 | |
751 FingerprintsVector;TopologicalAtomPairs:AtomicInvariantsAtomTypes:MinD | |
752 istance1:MaxDistance10;223;NumericalValues;IDsAndValuesString;C.X1.BO1 | |
753 .H3-D1-C.X3.BO3.H1 C.X2.BO2.H2-D1-C.X2.BO2.H2 C.X2.BO2.H2-D1-C.X3.BO3. | |
754 H1 C.X2.BO2.H2-D1-C.X3.BO4 C.X2.BO2.H2-D1-N.X3.BO3 C.X2.BO3.H1-D1-...; | |
755 2 1 4 1 1 10 8 1 2 6 1 2 2 1 2 1 2 2 1 2 1 5 1 10 12 2 2 1 2 1 9 1 3 1 | |
756 1 1 2 2 1 3 6 1 6 14 2 2 2 3 1 3 1 8 2 2 1 3 2 6 1 2 2 5 1 3 1 23 1... | |
757 | |
758 FingerprintsVector;TopologicalAtomPairs:FunctionalClassAtomTypes:MinDi | |
759 stance1:MaxDistance10;144;NumericalValues;IDsAndValuesString;Ar-D1-Ar | |
760 Ar-D1-Ar.HBA Ar-D1-HBD Ar-D1-Hal Ar-D1-None Ar.HBA-D1-None HBA-D1-NI H | |
761 BA-D1-None HBA.HBD-D1-NI HBA.HBD-D1-None HBD-D1-None NI-D1-None No...; | |
762 23 2 1 1 2 1 1 1 1 2 1 1 7 28 3 1 3 2 8 2 1 1 1 5 1 5 24 3 3 4 2 13 4 | |
763 1 1 4 1 5 22 4 4 3 1 19 1 1 1 1 1 2 2 3 1 1 8 25 4 5 2 3 1 26 1 4 1 ... | |
764 | |
765 FingerprintsVector;TopologicalAtomTorsions:AtomicInvariantsAtomTypes;3 | |
766 3;NumericalValues;IDsAndValuesString;C.X1.BO1.H3-C.X3.BO3.H1-C.X3.BO4- | |
767 C.X3.BO4 C.X1.BO1.H3-C.X3.BO3.H1-C.X3.BO4-N.X3.BO3 C.X2.BO2.H2-C.X2.BO | |
768 2.H2-C.X3.BO3.H1-C.X2.BO2.H2 C.X2.BO2.H2-C.X2.BO2.H2-C.X3.BO3.H1-O...; | |
769 2 2 1 1 2 2 1 1 3 4 4 8 4 2 2 6 2 2 1 2 1 1 2 1 1 2 6 2 4 2 1 3 1 | |
770 | |
771 FingerprintsVector;TopologicalAtomTorsions:EStateAtomTypes;36;Numerica | |
772 lValues;IDsAndValuesString;aaCH-aaCH-aaCH-aaCH aaCH-aaCH-aaCH-aasC aaC | |
773 H-aaCH-aasC-aaCH aaCH-aaCH-aasC-aasC aaCH-aaCH-aasC-sF aaCH-aaCH-aasC- | |
774 ssNH aaCH-aasC-aasC-aasC aaCH-aasC-aasC-aasN aaCH-aasC-ssNH-dssC a...; | |
775 4 4 8 4 2 2 6 2 2 2 4 3 2 1 3 3 2 2 2 1 2 1 1 1 2 1 1 1 1 1 1 1 2 1 1 2 | |
776 | |
777 FingerprintsVector;TopologicalAtomTriplets:AtomicInvariantsAtomTypes:M | |
778 inDistance1:MaxDistance10;3096;NumericalValues;IDsAndValuesString;C.X1 | |
779 .BO1.H3-D1-C.X1.BO1.H3-D1-C.X3.BO3.H1-D2 C.X1.BO1.H3-D1-C.X2.BO2.H2-D1 | |
780 0-C.X3.BO4-D9 C.X1.BO1.H3-D1-C.X2.BO2.H2-D3-N.X3.BO3-D4 C.X1.BO1.H3-D1 | |
781 -C.X2.BO2.H2-D4-C.X2.BO2.H2-D5 C.X1.BO1.H3-D1-C.X2.BO2.H2-D6-C.X3....; | |
782 1 2 2 2 2 2 2 2 8 8 4 8 4 4 2 2 2 2 4 2 2 2 4 2 2 2 2 1 2 2 4 4 4 2 2 | |
783 2 4 4 4 8 4 4 2 4 4 4 2 4 4 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 8... | |
784 | |
785 FingerprintsVector;TopologicalAtomTriplets:SYBYLAtomTypes:MinDistance1 | |
786 :MaxDistance10;2332;NumericalValues;IDsAndValuesString;C.2-D1-C.2-D9-C | |
787 .3-D10 C.2-D1-C.2-D9-C.ar-D10 C.2-D1-C.3-D1-C.3-D2 C.2-D1-C.3-D10-C.3- | |
788 D9 C.2-D1-C.3-D2-C.3-D3 C.2-D1-C.3-D2-C.ar-D3 C.2-D1-C.3-D3-C.3-D4 C.2 | |
789 -D1-C.3-D3-N.ar-D4 C.2-D1-C.3-D3-O.3-D2 C.2-D1-C.3-D4-C.3-D5 C.2-D1-C. | |
790 3-D5-C.3-D6 C.2-D1-C.3-D5-O.3-D4 C.2-D1-C.3-D6-C.3-D7 C.2-D1-C.3-D7... | |
791 | |
792 FingerprintsVector;TopologicalPharmacophoreAtomPairs:ArbitrarySize:Min | |
793 Distance1:MaxDistance10;54;NumericalValues;IDsAndValuesString;H-D1-H H | |
794 -D1-NI HBA-D1-NI HBD-D1-NI H-D2-H H-D2-HBA H-D2-HBD HBA-D2-HBA HBA-D2- | |
795 HBD H-D3-H H-D3-HBA H-D3-HBD H-D3-NI HBA-D3-NI HBD-D3-NI H-D4-H H-D4-H | |
796 BA H-D4-HBD HBA-D4-HBA HBA-D4-HBD HBD-D4-HBD H-D5-H H-D5-HBA H-D5-...; | |
797 18 1 2 1 22 12 8 1 2 18 6 3 1 1 1 22 13 6 5 7 2 28 9 5 1 1 1 36 16 10 | |
798 3 4 1 37 10 8 1 35 10 9 3 3 1 28 7 7 4 18 16 12 5 1 2 1 | |
799 | |
800 FingerprintsVector;TopologicalPharmacophoreAtomPairs:FixedSize:MinDist | |
801 ance1:MaxDistance10;150;OrderedNumericalValues;ValuesString;18 0 0 1 0 | |
802 0 0 2 0 0 1 0 0 0 0 22 12 8 0 0 1 2 0 0 0 0 0 0 0 0 18 6 3 1 0 0 0 1 | |
803 0 0 1 0 0 0 0 22 13 6 0 0 5 7 0 0 2 0 0 0 0 0 28 9 5 1 0 0 0 1 0 0 1 0 | |
804 0 0 0 36 16 10 0 0 3 4 0 0 1 0 0 0 0 0 37 10 8 0 0 0 0 1 0 0 0 0 0 0 | |
805 0 35 10 9 0 0 3 3 0 0 1 0 0 0 0 0 28 7 7 4 0 0 0 0 0 0 0 0 0 0 0 18... | |
806 | |
807 FingerprintsVector;TopologicalPharmacophoreAtomTriplets:ArbitrarySize: | |
808 MinDistance1:MaxDistance10;696;NumericalValues;IDsAndValuesString;Ar1- | |
809 Ar1-Ar1 Ar1-Ar1-H1 Ar1-Ar1-HBA1 Ar1-Ar1-HBD1 Ar1-H1-H1 Ar1-H1-HBA1 Ar1 | |
810 -H1-HBD1 Ar1-HBA1-HBD1 H1-H1-H1 H1-H1-HBA1 H1-H1-HBD1 H1-HBA1-HBA1 H1- | |
811 HBA1-HBD1 H1-HBA1-NI1 H1-HBD1-NI1 HBA1-HBA1-NI1 HBA1-HBD1-NI1 Ar1-...; | |
812 46 106 8 3 83 11 4 1 21 5 3 1 2 2 1 1 1 100 101 18 11 145 132 26 14 23 | |
813 28 3 3 5 4 61 45 10 4 16 20 7 5 1 3 4 5 3 1 1 1 1 5 4 2 1 2 2 2 1 1 1 | |
814 119 123 24 15 185 202 41 25 22 17 3 5 85 95 18 11 23 17 3 1 1 6 4 ... | |
815 | |
816 FingerprintsVector;TopologicalPharmacophoreAtomTriplets:FixedSize:MinD | |
817 istance1:MaxDistance10;2692;OrderedNumericalValues;ValuesString;46 106 | |
818 8 3 0 0 83 11 4 0 0 0 1 0 0 0 0 0 0 0 0 21 5 3 0 0 1 2 2 0 0 1 0 0 0 | |
819 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 100 101 18 11 0 0 145 132 26 | |
820 14 0 0 23 28 3 3 0 0 5 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 61 45 10 4 0 | |
821 0 16 20 7 5 1 0 3 4 5 3 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 5 ... | |
822 | |
823 =head1 OPTIONS | |
824 | |
825 =over 4 | |
826 | |
827 =item B<-a, --all> | |
828 | |
829 List all the available information. | |
830 | |
831 =item B<--AverageBitDensity> | |
832 | |
833 List average bit density of fingerprint bit-vector strings. | |
834 | |
835 =item B<--BitDensity> | |
836 | |
837 List bit density of fingerprints bit-vector strings data in each row. | |
838 | |
839 =item B<--count> | |
840 | |
841 List number of data entries containing fingerprints bit-vector or vector strings data. This | |
842 is B<default behavior>. | |
843 | |
844 =item B<-c, --ColMode> I<ColNum | ColLabel> | |
845 | |
846 Specify how columns are identified in CSV/TSV I<TextFile(s)>: using column number or column | |
847 label. Possible values: I<ColNum or ColLabel>. Default value: I<ColNum> | |
848 | |
849 =item B<-d, --detail> I<InfoLevel> | |
850 | |
851 Level of information to print about lines being ignored. Default: I<1>. Possible values: | |
852 I<1, 2 or 3>. | |
853 | |
854 =item B<--DataCheck> | |
855 | |
856 Validate fingerprints data specified using B<--FingerprintsCol> and list information | |
857 about missing and invalid data. | |
858 | |
859 =item B<-e, --empty> | |
860 | |
861 List number of rows containing no fingerprints data. | |
862 | |
863 =item B<--FingerprintsCol> I<col number | col name> | |
864 | |
865 This value is B<-c, --colmode> specific. It corresponds to column in CSV/TSV I<TextFile(s)> | |
866 containing fingerprints data. Possible values: I<col number or col label>. | |
867 Default value: I<first column containing the word Fingerprints in its column label>. | |
868 | |
869 =item B<--FingerprintsField> I<FieldLabel> | |
870 | |
871 Fingerprints field label to use during listing of fingerprints information for I<SDFile(s)>. | |
872 Default value: I<first data field label containing the word Fingerprints in its label>. | |
873 | |
874 =item B<--FingerprintsType> | |
875 | |
876 List types of fingerprint strings: FingerprintsBitVector or FingerprintsVector. | |
877 | |
878 =item B<--FingerprintsDescription> | |
879 | |
880 List types of fingerprints: PathLengthBits, PathLengthCount, MACCSKeyCount, | |
881 ExtendedConnectivity and so on. | |
882 | |
883 =item B<--FingerprintsSize> | |
884 | |
885 List size of fingerprints. | |
886 | |
887 =item B<--FingerprintsBitStringFormat> | |
888 | |
889 List format of fingerprint bit-vector strings: BinaryString or HexadecimalString. | |
890 | |
891 =item B<--FingerprintsBitOrder> | |
892 | |
893 List order of bits data in fingerprint bit-vector bit strings: Ascending or Descending. | |
894 | |
895 =item B<--FingerprintsVectorValuesType> | |
896 | |
897 List type of values in fingerprint vector strings: OrderedNumericalValues, NumericalValues or | |
898 AlphaNumericalValues. | |
899 | |
900 =item B<--FingerprintsVectorValuesFormat> | |
901 | |
902 List format of values in fingerprint vector strings: ValuesString, IDsAndValuesString, | |
903 IDsAndValuesPairsString, ValuesAndIDsString or ValuesAndIDsPairsString. | |
904 | |
905 =item B<-h, --help> | |
906 | |
907 Print this help message. | |
908 | |
909 =item B<--InDelim> I<comma | semicolon> | |
910 | |
911 Input delimiter for CSV I<TextFile(s)>. Possible values: I<comma or semicolon>. | |
912 Default value: I<comma>. For TSV files, this option is ignored and I<tab> is used as a | |
913 delimiter. | |
914 | |
915 =item B<--NumOfOnBits> | |
916 | |
917 List number of on bits in fingerprints bit-vector strings data in each row. | |
918 | |
919 =item B<--NumOfNonZeroValues> | |
920 | |
921 List number of non-zero values in fingerprints vector strings data in each row. | |
922 | |
923 =item B<-w, --WorkingDir> I<DirName> | |
924 | |
925 Location of working directory. Default: current directory. | |
926 | |
927 =back | |
928 | |
929 =head1 EXAMPLES | |
930 | |
931 To count number of lines containing fingerprints bit-vector or vector strings data present | |
932 in FP file, in a column name containing Fingerprint substring in text file, and in a data | |
933 field with Fingerprint substring in its label, type: | |
934 | |
935 % InfoFingerprintsFiles.pl SampleFPBin.csv | |
936 | |
937 % InfoFingerprintsFiles.pl SampleFPBin.sdf SampleFPBin.fpf | |
938 SampleFPBin.csv | |
939 | |
940 % InfoFingerprintsFiles.pl SampleFPHex.sdf SampleFPHex.fpf | |
941 SampleFPHex.csv | |
942 | |
943 % InfoFingerprintsFiles.pl SampleFPcount.sdf SampleFPcount.fpf | |
944 SampleFPcount.csv | |
945 | |
946 To list all available information about fingerprints bit-vector or vector strings data present | |
947 in FP file, in a column name containing Fingerprint substring in text file, and in a data | |
948 field with Fingerprint substring in its label, type: | |
949 | |
950 % InfoFingerprintsFiles.pl -a SampleFPHex.sdf SampleFPHex.fpf | |
951 SampleFPHex.csv | |
952 | |
953 % InfoFingerprintsFiles.pl -a SampleFPcount.sdf SampleFPcount.fpf | |
954 SampleFPcount.csv | |
955 | |
956 To list all available information about fingerprints bit-vector or vector strings data present in a | |
957 column named Fingerprints in text file, type: | |
958 | |
959 % InfoFingerprintsFiles.pl -a --ColMode ColLabel --FingerprintsCol | |
960 Fingerprints SampleFPHex.sdf | |
961 | |
962 % InfoFingerprintsFiles.pl -a --ColMode ColLabel --FingerprintsCol | |
963 Fingerprints SampleFPcount.csv | |
964 | |
965 To list all available information about fingerprints bit-vector or vector strings data present in a | |
966 data field names Fingerprints in SD file, type: | |
967 | |
968 % InfoFingerprintsFiles.pl -a --FingerprintsField Fingerprints | |
969 SampleFPHex.sdf | |
970 | |
971 % InfoFingerprintsFiles.pl -a --FingerprintsField Fingerprints | |
972 SampleFPcount.sdf | |
973 | |
974 To list bit density, average bit density, and number of on bits for fingerprints bit-vector strings data | |
975 present in FP file, in a column name containing Fingerprint substring in text file, and in a data | |
976 field with Fingerprint substring in its label, type: | |
977 | |
978 % InfoFingerprintsFiles.pl --BitDensity --AverageBitDensity | |
979 --NumOfOnBits SampleFPBin.csv SampleFPBin.sdf SampleFPBin.fpf | |
980 | |
981 To list vector values type, format and number of non-zero values for fingerprints vector strings | |
982 data present in FP file, in a column name containing Fingerprint substring in text file, and in a data | |
983 field with Fingerprint substring in its label along with fingerprints type and description, type: | |
984 | |
985 % InfoFingerprintsFiles.pl --FingerprintsType --FingerprintsDescription | |
986 --FingerprintsVectorValuesType --FingerprintsVectorValuesFormat | |
987 --NumOfNonZeroValues SampleFPcount.csv SampleFPcount.sdf | |
988 SampleFPcount.fpf | |
989 | |
990 =head1 AUTHOR | |
991 | |
992 Manish Sud <msud@san.rr.com> | |
993 | |
994 =head1 SEE ALSO | |
995 | |
996 SimilarityMatricesFingerprints.pl, SimilaritySearchingFingerprints.pl, AtomNeighborhoodsFingerprints.pl, | |
997 AtomNeighborhoodsFingerprints.pl, ExtendedConnectivityFingerprints.pl, MACCSKeysFingerprints.pl, | |
998 PathLengthFingerprints.pl, TopologicalAtomPairsFingerprints.pl, TopologicalAtomTorsionsFingerprints.pl, | |
999 TopologicalPharmacophoreAtomPairsFingerprints.pl, TopologicalPharmacophoreAtomTripletsFingerprints.pl | |
1000 | |
1001 =head1 COPYRIGHT | |
1002 | |
1003 Copyright (C) 2015 Manish Sud. All rights reserved. | |
1004 | |
1005 This file is part of MayaChemTools. | |
1006 | |
1007 MayaChemTools is free software; you can redistribute it and/or modify it under | |
1008 the terms of the GNU Lesser General Public License as published by the Free | |
1009 Software Foundation; either version 3 of the License, or (at your option) | |
1010 any later version. | |
1011 | |
1012 =cut |