Mercurial > repos > deepakjadmin > mayatool3_test3
diff mayachemtools/bin/InfoFingerprintsFiles.pl @ 0:73ae111cf86f draft
Uploaded
author | deepakjadmin |
---|---|
date | Wed, 20 Jan 2016 11:55:01 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mayachemtools/bin/InfoFingerprintsFiles.pl Wed Jan 20 11:55:01 2016 -0500 @@ -0,0 +1,1012 @@ +#!/usr/bin/perl -w +# +# $RCSfile: InfoFingerprintsFiles.pl,v $ +# $Date: 2015/02/28 20:46:20 $ +# $Revision: 1.20 $ +# +# Author: Manish Sud <msud@san.rr.com> +# +# Copyright (C) 2015 Manish Sud. All rights reserved. +# +# This file is part of MayaChemTools. +# +# MayaChemTools is free software; you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the Free +# Software Foundation; either version 3 of the License, or (at your option) any +# later version. +# +# MayaChemTools is distributed in the hope that it will be useful, but without +# any warranty; without even the implied warranty of merchantability of fitness +# for a particular purpose. See the GNU Lesser General Public License for more +# details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or +# write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, +# Boston, MA, 02111-1307, USA. +# + +use strict; +use FindBin; use lib "$FindBin::Bin/../lib"; +use Getopt::Long; +use File::Basename; +use Text::ParseWords; +use Benchmark; +use FileUtil; +use TextUtil; +use Fingerprints::FingerprintsFileUtil; +use Fingerprints::FingerprintsStringUtil; + +my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime); + +# Autoflush STDOUT +$| = 1; + +# Starting message... +$ScriptName = basename($0); +print "\n$ScriptName: Starting...\n\n"; +$StartTime = new Benchmark; + +# Get the options and setup script... +SetupScriptUsage(); +if ($Options{help} || @ARGV < 1) { + die GetUsageFromPod("$FindBin::Bin/$ScriptName"); +} + +my(@FingerprintsFilesList); +@FingerprintsFilesList = ExpandFileNames(\@ARGV, "sdf sd fpf fp csv tsv"); + +# Process options... +print "Processing options...\n"; +my(%OptionsInfo); +ProcessOptions(); + +# Setup information about input files... +print "Checking input fingerprints file(s)...\n"; +my(%FingerprintsFilesInfo); +RetrieveFingerprintsFilesInfo(); + +# Process input files.. +my($FileIndex); +if (@FingerprintsFilesList > 1) { + print "\nProcessing fingerprints files...\n"; +} +for $FileIndex (0 .. $#FingerprintsFilesList) { + if ($FingerprintsFilesInfo{FileOkay}[$FileIndex]) { + print "\nProcessing file $FingerprintsFilesList[$FileIndex]...\n"; + ListFingerprintsFileInfo($FileIndex); + } +} +ListTotalSizeOfFiles(); + +print "\n$ScriptName:Done...\n\n"; + +$EndTime = new Benchmark; +$TotalTime = timediff ($EndTime, $StartTime); +print "Total time: ", timestr($TotalTime), "\n"; + +############################################################################### + +# List approptiate information... +# +sub ListFingerprintsFileInfo { + my($FileIndex) = @_; + my($FileName, $FingerprintsFileIO, $InvalidFingerprintsFileData, $InvalidFingerprintsData, $DataEntryCount, $ValidDataEntryCount, $InvalidDataEntryCount, $MissingDataEntryCount, $BitVectorDataEntryCount, $VectorDataEntryCount, $FingerprintsObject, $FingerprintsType, $TotalBitDensity, $FileType, $DataEntryLabel); + + $FileType = $FingerprintsFilesInfo{FileType}[$FileIndex]; + $DataEntryLabel = ($FileType =~ /^SD$/i) ? 'compounds' : 'lines'; + + ($DataEntryCount, $ValidDataEntryCount, $InvalidDataEntryCount, $MissingDataEntryCount, $BitVectorDataEntryCount, $VectorDataEntryCount, $TotalBitDensity) = (0) x 7; + + $FingerprintsFileIO = Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO(%{$FingerprintsFilesInfo{FingerprintsFileIOParameters}[$FileIndex]}); + $FingerprintsFileIO->Open(); + + $InvalidFingerprintsFileData = $FingerprintsFileIO->IsFingerprintsFileDataValid() ? 0 : 1; + + FINGERPRINTS: while ($FingerprintsFileIO->Read()) { + $DataEntryCount++; + + # Missing data... + if ($InvalidFingerprintsFileData) { + $MissingDataEntryCount++; + if ($OptionsInfo{ValidateData} || $OptionsInfo{CountEmptyFingerprints}) { + ListEmptyOrInvalidFingerprintsDataInfo('EmptyData', $FingerprintsFileIO, $FileType); + } + next FINGERPRINTS; + } + $InvalidFingerprintsData = $FingerprintsFileIO->IsFingerprintsDataValid() ? 0 : 1; + + # Invalid data... + if ($InvalidFingerprintsData) { + $InvalidDataEntryCount++; + if ($OptionsInfo{ValidateData}) { + ListEmptyOrInvalidFingerprintsDataInfo('InvalidData', $FingerprintsFileIO, $FileType); + } + next FINGERPRINTS; + } + $ValidDataEntryCount++; + + $FingerprintsObject = $FingerprintsFileIO->GetFingerprints(); + $FingerprintsType = $FingerprintsObject->GetVectorType(); + + if ($FingerprintsType =~ /^FingerprintsBitVector$/i) { + $BitVectorDataEntryCount++; + if ($OptionsInfo{ListAverageBitDensity}) { + $TotalBitDensity += $FingerprintsObject->GetFingerprintsBitDensity(); + } + } + elsif ($FingerprintsType =~ /^FingerprintsVector$/i) { + $VectorDataEntryCount++; + } + + if ($OptionsInfo{ListFingerprintsDataEntryInfo}) { + ListFingerprintsDataEntryInfo($FingerprintsFileIO, $FileType); + } + + } + $FingerprintsFileIO->Close(); + + print "\nFingerprints file type: $FileType\n"; + if ($FileType =~ /^SD$/i) { + print "Number of compounds: $DataEntryCount\n"; + } + else { + print "Number of data lines: $DataEntryCount\n"; + } + + ListFileTypeHeaderInfo($FingerprintsFileIO, $FileType); + + print "\nNumber of $DataEntryLabel with valid fingerprints string data: $ValidDataEntryCount\n"; + print "Number of $DataEntryLabel with bit-vector fingerprints string data: $BitVectorDataEntryCount\n"; + print "Number of $DataEntryLabel with vector fingerprints string data: $VectorDataEntryCount\n"; + + if ($OptionsInfo{CountEmptyFingerprints}) { + print "Number of $DataEntryLabel with missing fingerprints data: $MissingDataEntryCount\n"; + print "Number of $DataEntryLabel with invalid fingerprints data: $InvalidDataEntryCount\n"; + } + + if ($OptionsInfo{ListAverageBitDensity} && $BitVectorDataEntryCount) { + my($AverageBitDensity); + $AverageBitDensity = $TotalBitDensity/$BitVectorDataEntryCount; + $AverageBitDensity = sprintf("%.2f", $AverageBitDensity) + 0; + print "\nAverage bit density: $AverageBitDensity\n"; + } + + + # File size and modification information... + print "\nFile size: ", FormatFileSize($FingerprintsFilesInfo{FileSize}[$FileIndex]), " \n"; + print "Last modified: ", $FingerprintsFilesInfo{FileLastModified}[$FileIndex], " \n"; +} + +# List empty or invalid fingerprints file data information... +# +sub ListEmptyOrInvalidFingerprintsDataInfo { + my($Mode, $FingerprintsFileIO, $FileType) = @_; + my($ModeInfo); + + $ModeInfo = ($Mode =~ /^EmptyData$/i) ? "no" : "invalid"; + + if ($FileType =~ /^SD$/i) { + my($CmpdNum, $CmpdString); + + $CmpdNum = $FingerprintsFileIO->GetCompoundNum(); + if ($OptionsInfo{DetailLevel} >= 3 ) { + $CmpdString = $FingerprintsFileIO->GetCompoundString(); + print "Compound number $CmpdNum contains $ModeInfo fingerprints data: $CmpdString \n"; + } + elsif ($OptionsInfo{DetailLevel} >= 1 ) { + print "Compound number $CmpdNum contains $ModeInfo fingerprints data...\n"; + } + } + else { + my($LineNum, $DataLine); + + $LineNum = $FingerprintsFileIO->GetLineNum(); + if ($OptionsInfo{DetailLevel} >= 3 ) { + $DataLine = $FingerprintsFileIO->GetDataLine(); + print "Data line number $LineNum contains $ModeInfo fingerprints data: $DataLine \n"; + } + elsif ($OptionsInfo{DetailLevel} >= 1 ) { + print "Data line number $LineNum contains $ModeInfo fingerprints data...\n"; + } + } +} + +# List detailed information about fingerprints data entry... +# +sub ListFingerprintsDataEntryInfo { + my($FingerprintsFileIO, $FileType) = @_; + my($FingerprintsObject, $FingerprintsString, $FingerprintsType, $FingerprintsDescription, $FingerprintsSize, $FingerprintsBitStringFormat, $FingerprintsBitOrder, $BitDensity, $NumOfOnBits, $FingerprintsVectorValuesType, $FingerprintsVectorValuesFormat, $NumOfNonZeroValues); + + $FingerprintsObject = $FingerprintsFileIO->GetFingerprints(); + $FingerprintsString = $FingerprintsFileIO->GetFingerprintsString(); + + $FingerprintsType = $FingerprintsObject->GetVectorType(); + + if ($FingerprintsType =~ /^FingerprintsBitVector$/i) { + $BitDensity = ''; + $NumOfOnBits = ''; + + ($FingerprintsType, $FingerprintsDescription, $FingerprintsSize, $FingerprintsBitStringFormat, $FingerprintsBitOrder) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringValues($FingerprintsString); + + if ($OptionsInfo{ListBitDensity} || $OptionsInfo{ListNumOfOnBits}) { + if ($OptionsInfo{ListBitDensity}) { + $BitDensity = $FingerprintsObject->GetFingerprintsBitDensity(); + } + if ($OptionsInfo{ListNumOfOnBits}) { + $NumOfOnBits = $FingerprintsObject->GetNumOfSetBits(); + } + } + } + elsif ($FingerprintsType =~ /^FingerprintsVector$/i) { + $NumOfNonZeroValues = ''; + + ($FingerprintsType, $FingerprintsDescription, $FingerprintsSize, $FingerprintsVectorValuesType, $FingerprintsVectorValuesFormat) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringValues($FingerprintsString); + + if ($OptionsInfo{ListNumOfNonZeroValues}) { + if ($FingerprintsVectorValuesType =~ /^AlphaNumericalValues$/i) { + $NumOfNonZeroValues = 'NA'; + } + else { + $NumOfNonZeroValues = $FingerprintsObject->GetNumOfNonZeroValues(); + } + } + } + + if ($FileType =~ /^SD$/i) { + print "Compound number: " . $FingerprintsFileIO->GetCompoundNum(); + } + else { + print "Data line number: " . $FingerprintsFileIO->GetLineNum(); + } + + if ($OptionsInfo{ListFingerprintsType}) { + print "; FPType: $FingerprintsType"; + } + if ($OptionsInfo{ListFingerprintsDescription}) { + print "; FPDescription: $FingerprintsDescription"; + } + if ($OptionsInfo{ListFingerprintsSize}) { + print "; FPSize: $FingerprintsSize"; + } + + if ($FingerprintsType =~ /^FingerprintsBitVector$/i) { + if ($OptionsInfo{ListFingerprintsBitStringFormat}) { + print "; FPBitStringFormat: $FingerprintsBitStringFormat"; + } + if ($OptionsInfo{ListFingerprintsBitOrder}) { + print "; FPBitOrder: $FingerprintsBitOrder"; + } + if ($OptionsInfo{ListBitDensity}) { + print "; BitDensity: $BitDensity"; + } + if ($OptionsInfo{ListNumOfOnBits}) { + print "; NumOfOnBits: $NumOfOnBits"; + } + } + elsif ($FingerprintsType =~ /^FingerprintsVector$/i) { + if ($OptionsInfo{ListFingerprintsVectorValuesType}) { + print "; FPVectorValuesType: $FingerprintsVectorValuesType"; + } + if ($OptionsInfo{ListFingerprintsVectorValuesFormat}) { + print "; FPVectorValuesFormat: $FingerprintsVectorValuesFormat"; + } + if ($OptionsInfo{ListNumOfNonZeroValues}) { + print "; NumOfNonZeroValues: $NumOfNonZeroValues"; + } + } + print "\n"; +} + +# List file type header information... +# +sub ListFileTypeHeaderInfo { + my($FingerprintsFileIO, $FileType) = @_; + my($Key, $Value, @DataColLabels, %HeaderDataKeysAndValues); + + if ($FileType =~ /^Text$/i) { + @DataColLabels = $FingerprintsFileIO->GetDataColLabels(); + print "Number of columns: " . scalar @DataColLabels . "\n"; + print "Column labels: ", JoinWords(\@DataColLabels, ", ", 1), "\n"; + } + elsif ($FileType =~ /^FP$/i) { + %HeaderDataKeysAndValues = $FingerprintsFileIO->GetHeaderDataKeysAndValues(); + + print "\nFP file header data keys and values: \n#\n"; + for $Key ($FingerprintsFileIO->GetHeaderDataKeys()) { + $Value = $HeaderDataKeysAndValues{$Key}; + print "# $Key = $Value\n"; + } + print "#\n"; + } +} + +# Total size of all the fiels... +sub ListTotalSizeOfFiles { + my($FileOkayCount, $TotalSize, $Index); + + $FileOkayCount = 0; + $TotalSize = 0; + + for $Index (0 .. $#FingerprintsFilesList) { + if ($FingerprintsFilesList[$Index]) { + $FileOkayCount++; + $TotalSize += $FingerprintsFilesInfo{FileSize}[$Index]; + } + } + if ($FileOkayCount > 1) { + print "\nTotal size of $FileOkayCount files: ", FormatFileSize($TotalSize), "\n"; + } +} + +# Retrieve information about fingerprints files... +# +sub RetrieveFingerprintsFilesInfo { + my($FingerprintsFile, $Index, $FileDir, $FileExt, $FileName, $FileType, $InDelim, $ModifiedTimeString, $ModifiedDateString, %FingerprintsFileIOParameters); + + %FingerprintsFilesInfo = (); + @{$FingerprintsFilesInfo{FileOkay}} = (); + @{$FingerprintsFilesInfo{FileType}} = (); + @{$FingerprintsFilesInfo{FileSize}} = (); + @{$FingerprintsFilesInfo{FileLastModified}} = (); + @{$FingerprintsFilesInfo{InDelim}} = (); + + @{$FingerprintsFilesInfo{FingerprintsFileIOParameters}} = (); + + FILELIST: for $Index (0 .. $#FingerprintsFilesList) { + $FingerprintsFile = $FingerprintsFilesList[$Index]; + + $FingerprintsFilesInfo{FileOkay}[$Index] = 0; + $FingerprintsFilesInfo{FileType}[$Index] = ''; + $FingerprintsFilesInfo{FileSize}[$Index] = 0; + $FingerprintsFilesInfo{FileLastModified}[$Index] = ''; + $FingerprintsFilesInfo{InDelim}[$Index] = ""; + + %{$FingerprintsFilesInfo{FingerprintsFileIOParameters}[$Index]} = (); + + $FingerprintsFile = $FingerprintsFilesList[$Index]; + if (!(-e $FingerprintsFile)) { + warn "Warning: Ignoring file $FingerprintsFile: It doesn't exist\n"; + next FILELIST; + } + + $FileType = Fingerprints::FingerprintsFileUtil::GetFingerprintsFileType($FingerprintsFile); + if (IsEmpty($FileType)) { + warn "Warning: Ignoring file $FingerprintsFile: It's not a fingerprints file\n"; + next FILELIST; + } + + $FileDir = ""; $FileName = ""; $FileExt = ""; + ($FileDir, $FileName, $FileExt) = ParseFileName($FingerprintsFile); + + $InDelim = ($FileExt =~ /^tsv$/i) ? 'Tab' : $OptionsInfo{InDelim}; + + # Setup FingerprintsFileIO parameters... + %FingerprintsFileIOParameters = (); + FILEIOPARAMETERS: { + if ($FileType =~ /^SD$/i) { + %FingerprintsFileIOParameters = ('Name' => $FingerprintsFile, 'Mode' => 'Read', 'ValidateData' => $OptionsInfo{ValidateData}, 'DetailLevel' => 1, 'FingerprintsFieldLabel' => $OptionsInfo{FingerprintsFieldLabel}); + last FILEIOPARAMETERS; + } + if ($FileType =~ /^FP$/i) { + %FingerprintsFileIOParameters = ('Name' => $FingerprintsFile, 'Mode' => 'Read', 'ValidateData' => $OptionsInfo{ValidateData}, 'DetailLevel' => 1); + last FILEIOPARAMETERS; + } + if ($FileType =~ /^Text$/i) { + %FingerprintsFileIOParameters = ('Name' => $FingerprintsFile, 'Mode' => 'Read', 'ValidateData' => $OptionsInfo{ValidateData}, 'DetailLevel' => 1, 'FingerprintsCol' => $OptionsInfo{FingerprintsCol}, 'ColMode' => $OptionsInfo{ColMode}, 'InDelim' => $OptionsInfo{InDelim}); + last FILEIOPARAMETERS; + } + warn "Warning: File type for fingerprints file, $FingerprintsFile, is not valid. Supported file types: SD, FP or Text\n"; + next FILELIST; + } + + $FingerprintsFilesInfo{FileOkay}[$Index] = 1; + $FingerprintsFilesInfo{FileType}[$Index] = $FileType; + + $FingerprintsFilesInfo{FileSize}[$Index] = FileSize($FingerprintsFile); + ($ModifiedTimeString, $ModifiedDateString) = FormattedFileModificationTimeAndDate($FingerprintsFile); + $FingerprintsFilesInfo{FileLastModified}[$Index] = "$ModifiedTimeString; $ModifiedDateString"; + + $FingerprintsFilesInfo{InDelim}[$Index] = $InDelim; + + %{$FingerprintsFilesInfo{FingerprintsFileIOParameters}[$Index]} = %FingerprintsFileIOParameters; + } +} + +# Process option values... +sub ProcessOptions { + %OptionsInfo = (); + + $OptionsInfo{ListAverageBitDensity} = ($Options{all} || $Options{averagebitdensity}) ? 1 :0; + $OptionsInfo{ListBitDensity} = ($Options{all} || $Options{bitdensity}) ? 1 :0; + + if ($OptionsInfo{ListAverageBitDensity}) { + # List bit density as well... + $OptionsInfo{ListBitDensity} = 1; + } + + # By default, count number of rows containing fingerprints data... + $OptionsInfo{CountFingerprints} = 1; + $OptionsInfo{CountEmptyFingerprints} = ($Options{all} || $Options{empty}) ? 1 :0; + + $OptionsInfo{ColMode} = $Options{colmode}; + if (IsNotEmpty($Options{fingerprintscol})) { + if ($Options{colmode} =~ /^ColNum$/i) { + if (!IsPositiveInteger($Options{fingerprintscol})) { + die "Error: Column value, $Options{fingerprintscol}, specified using \"--FingerprintsCol\" is not valid: Allowed integer values: > 0.\n"; + } + } + $OptionsInfo{FingerprintsCol} = $Options{fingerprintscol}; + } + else { + $OptionsInfo{FingerprintsCol} = 'AutoDetect'; + } + + if (IsNotEmpty($Options{fingerprintsfield})) { + $OptionsInfo{FingerprintsFieldLabel} = $Options{fingerprintsfield}; + } + else { + $OptionsInfo{FingerprintsFieldLabel} = 'AutoDetect'; + } + + $OptionsInfo{ValidateData} = ($Options{all} || $Options{datacheck}) ? 1 :0; + $OptionsInfo{DetailLevel} = $Options{detail}; + + $OptionsInfo{ListFingerprintsType} = ($Options{all} || $Options{fingerprintstype}) ? 1 :0; + $OptionsInfo{ListFingerprintsDescription} = ($Options{all} || $Options{fingerprintsdescription}) ? 1 :0; + $OptionsInfo{ListFingerprintsSize} = ($Options{all} || $Options{fingerprintssize}) ? 1 :0; + + $OptionsInfo{ListFingerprintsBitStringFormat} = ($Options{all} || $Options{fingerprintsbitstringformat}) ? 1 :0; + $OptionsInfo{ListFingerprintsBitOrder} = ($Options{all} || $Options{fingerprintsbitorder}) ? 1 :0; + + $OptionsInfo{ListFingerprintsVectorValuesType} = ($Options{all} || $Options{fingerprintsvectorvaluestype}) ? 1 :0; + $OptionsInfo{ListFingerprintsVectorValuesFormat} = ($Options{all} || $Options{fingerprintsvectorvaluesformat}) ? 1 :0; + + $OptionsInfo{InDelim} = $Options{indelim}; + + $OptionsInfo{ListNumOfOnBits} = ($Options{all} || $Options{numofonbits}) ? 1 :0; + $OptionsInfo{ListNumOfNonZeroValues} = ($Options{all} || $Options{numofnonzerovalues}) ? 1 :0; + + $OptionsInfo{ListFingerprintsDataEntryInfo} = ($OptionsInfo{ListFingerprintsType} || $OptionsInfo{ListFingerprintsDescription} || $OptionsInfo{ListFingerprintsSize} || $OptionsInfo{ListFingerprintsBitStringFormat} || $OptionsInfo{ListFingerprintsBitOrder} || $OptionsInfo{ListFingerprintsVectorValuesType} || $OptionsInfo{ListFingerprintsVectorValuesFormat} || $OptionsInfo{ListBitDensity} || $OptionsInfo{ListAverageBitDensity} || $OptionsInfo{ListNumOfOnBits} || $OptionsInfo{ListNumOfNonZeroValues}) ? 1 : 0; + +} + +# Setup script usage and retrieve command line arguments specified using various options... +sub SetupScriptUsage { + + # Retrieve all the options... + %Options = (); + + $Options{colmode} = 'colnum'; + $Options{detail} = 1; + $Options{indelim} = 'comma'; + + if (!GetOptions(\%Options, "all|a", "averagebitdensity", "bitdensity", "count", "colmode|c=s", "detail|d=i", "datacheck", "empty|e", "fingerprintsfield=s", "fingerprintscol=s", "fingerprintstype", "fingerprintsdescription", "fingerprintssize", "fingerprintsbitstringformat", "fingerprintsbitorder", "fingerprintsvectorvaluestype", "fingerprintsvectorvaluesformat", "help|h", "indelim=s", "numofonbits", "numofnonzerovalues", "workingdir|w=s")) { + die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n"; + } + if ($Options{workingdir}) { + if (! -d $Options{workingdir}) { + die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n"; + } + chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n"; + } + if ($Options{colmode} !~ /^(ColNum|ColLabel)$/i) { + die "Error: The value specified, $Options{colmode}, for option \"-c, --ColMode\" is not valid. Allowed values: ColNum, or ColLabel\n"; + } + if (!IsPositiveInteger($Options{detail})) { + die "Error: The value specified, $Options{detail}, for option \"-d, --detail\" is not valid. Allowed values: > 0 \n"; + } + if ($Options{indelim} !~ /^(comma|semicolon)$/i) { + die "Error: The value specified, $Options{indelim}, for option \"--InDelim\" is not valid. Allowed values: comma, or semicolon\n"; + } +} + +__END__ + +=head1 NAME + +InfoFingerprintsFiles.pl - List information about fingerprints data in SD, FP and CSV/TSV text file(s) + +=head1 SYNOPSIS + +InfoFingerprintsFiles.pl SDFile(s) FPFile(s) TextFile(s)... + +InfoFingerprintsFiles.pl [B<-a, --all>] [B<--AverageBitDensity>] [B<--BitDensity>] +[B<-c, --count>] [B<-c, --ColMode> I<ColNum | ColLabel>] [B<--DataCheck>] +[B<-d, --detail> I<InfoLevel>] [B<-e, --empty>] [B<--FingerprintsCol> I<col number | col name>] +[B<--FingerprintsField> I<FieldLabel>] [B<--FingerprintsType>] [B<--FingerprintsDescription>] +[B<--FingerprintsSize>] [B<--FingerprintsBitStringFormat>] [B<--FingerprintsBitOrder>] +[B<--FingerprintsVectorValuesType>] [B<--FingerprintsVectorValuesFormat>] +[B<-h, --help>] [B<--InDelim> I<comma | semicolon>] +[B<--NumOfOnBits>] [B<--NumOfNonZeroValues>] +[B<-w, --WorkingDir> dirname] SDFile(s) FPFile(s) TextFile(s)... + +=head1 DESCRIPTION + +List information about fingerprints data in I<SD, FP and CSV/TSV> text file(s): number of +rows containing fingerprints data, type of fingerprints vector, description and size of fingerprints, +bit density and average bit density for bit-vector fingerprints strings, and so on. + +The scripts InfoFingerprintsSDFiles.pl and InfoFingerprintsTextFiles.pl have been removed from the +current release of MayaChemTools and their functionality merged with this script. + +The valid I<SDFile> extensions are I<.sdf> and I<.sd>. All SD files in a current directory +can be specified either by I<*.sdf> or the current directory name. + +The valid I<FPFile> extensions are I<.fpf> and I<.fp>. All FP files in a current directory +can be specified either by I<*.fpf> or the current directory name. + +The valid I<TextFile> extensions are I<.csv> and I<.tsv> for comma/semicolon and tab +delimited text files respectively. All other file names are ignored. All text files in a +current directory can be specified by I<*.csv>, I<*.tsv>, or the current directory +name. The B<--indelim> option determines the format of I<TextFile(s)>. Any file +which doesn't correspond to the format indicated by B<--indelim> option is ignored. + +Format of fingerprint strings data in I<SDFile(s), FPFile(s) and TextFile(s)> is automatically +detected. + +Example of I<FP> file containing fingerprints bit-vector string data: + + # + # Package = MayaChemTools 7.4 + # ReleaseDate = Oct 21, 2010 + # + # TimeStamp = Mon Mar 7 15:14:01 2011 + # + # FingerprintsStringType = FingerprintsBitVector + # + # Description = PathLengthBits:AtomicInvariantsAtomTypes:MinLength1:... + # Size = 1024 + # BitStringFormat = HexadecimalString + # BitsOrder = Ascending + # + Cmpd1 9c8460989ec8a49913991a6603130b0a19e8051c89184414953800cc21510... + Cmpd2 000000249400840040100042011001001980410c000000001010088001120... + ... ... + ... .. + +Example of I<FP> file containing fingerprints vector string data: + + # + # Package = MayaChemTools 7.4 + # ReleaseDate = Oct 21, 2010 + # + # TimeStamp = Mon Mar 7 15:14:01 2011 + # + # FingerprintsStringType = FingerprintsVector + # + # Description = PathLengthBits:AtomicInvariantsAtomTypes:MinLength1:... + # VectorStringFormat = IDsAndValuesString + # VectorValuesType = NumericalValues + # + Cmpd1 338;C F N O C:C C:N C=O CC CF CN CO C:C:C C:C:N C:CC C:CF C:CN C: + N:C C:NC CC:N CC=O CCC CCN CCO CNC NC=O O=CO C:C:C:C C:C:C:N C:C:CC...; + 33 1 2 5 21 2 2 12 1 3 3 20 2 10 2 2 1 2 2 2 8 2 5 1 1 1 19 2 8 2 2 2 2 + 6 2 2 2 2 2 2 2 2 3 2 2 1 4 1 5 1 1 18 6 2 2 1 2 10 2 1 2 1 2 2 2 2 ... + Cmpd2 103;C N O C=N C=O CC CN CO CC=O CCC CCN CCO CNC N=CN NC=O NCN O=C + O C CC=O CCCC CCCN CCCO CCNC CNC=N CNC=O CNCN CCCC=O CCCCC CCCCN CC...; + 15 4 4 1 2 13 5 2 2 15 5 3 2 2 1 1 1 2 17 7 6 5 1 1 1 2 15 8 5 7 2 2 2 2 + 1 2 1 1 3 15 7 6 8 3 4 4 3 2 2 1 2 3 14 2 4 7 4 4 4 4 1 1 1 2 1 1 1 ... + ... ... + ... ... + +Example of I<SD> file containing fingerprints bit-vector string data: + + ... ... + ... ... + $$$$ + ... ... + ... ... + ... ... + 41 44 0 0 0 0 0 0 0 0999 V2000 + -3.3652 1.4499 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + ... ... + 2 3 1 0 0 0 0 + ... ... + M END + > <CmpdID> + Cmpd1 + + > <PathLengthFingerprints> + FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes:MinLengt + h1:MaxLength8;1024;HexadecimalString;Ascending;9c8460989ec8a49913991a66 + 03130b0a19e8051c89184414953800cc2151082844a201042800130860308e8204d4028 + 00831048940e44281c00060449a5000ac80c894114e006321264401600846c050164462 + 08190410805000304a10205b0100e04c0038ba0fad0209c0ca8b1200012268b61c0026a + aa0660a11014a011d46 + + $$$$ + ... ... + ... ... + +Example of CSV I<Text> file containing fingerprints bit-vector string data: + + "CompoundID","PathLengthFingerprints" + "Cmpd1","FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes + :MinLength1:MaxLength8;1024;HexadecimalString;Ascending;9c8460989ec8a4 + 9913991a6603130b0a19e8051c89184414953800cc2151082844a20104280013086030 + 8e8204d402800831048940e44281c00060449a5000ac80c894114e006321264401..." + ... ... + ... ... + +The current release of MayaChemTools supports the following types of fingerprint +bit-vector and vector strings: + + FingerprintsVector;AtomNeighborhoods:AtomicInvariantsAtomTypes:MinRadi + us0:MaxRadius2;41;AlphaNumericalValues;ValuesString;NR0-C.X1.BO1.H3-AT + C1:NR1-C.X3.BO3.H1-ATC1:NR2-C.X1.BO1.H3-ATC1:NR2-C.X3.BO4-ATC1 NR0-C.X + 1.BO1.H3-ATC1:NR1-C.X3.BO3.H1-ATC1:NR2-C.X1.BO1.H3-ATC1:NR2-C.X3.BO4-A + TC1 NR0-C.X2.BO2.H2-ATC1:NR1-C.X2.BO2.H2-ATC1:NR1-C.X3.BO3.H1-ATC1:NR2 + -C.X2.BO2.H2-ATC1:NR2-N.X3.BO3-ATC1:NR2-O.X1.BO1.H1-ATC1 NR0-C.X2.B... + + FingerprintsVector;AtomTypesCount:AtomicInvariantsAtomTypes:ArbitraryS + ize;10;NumericalValues;IDsAndValuesString;C.X1.BO1.H3 C.X2.BO2.H2 C.X2 + .BO3.H1 C.X3.BO3.H1 C.X3.BO4 F.X1.BO1 N.X2.BO2.H1 N.X3.BO3 O.X1.BO1.H1 + O.X1.BO2;2 4 14 3 10 1 1 1 3 2 + + FingerprintsVector;AtomTypesCount:SLogPAtomTypes:ArbitrarySize;16;Nume + ricalValues;IDsAndValuesString;C1 C10 C11 C14 C18 C20 C21 C22 C5 CS F + N11 N4 O10 O2 O9;5 1 1 1 14 4 2 1 2 2 1 1 1 1 3 1 + + FingerprintsVector;AtomTypesCount:SLogPAtomTypes:FixedSize;67;OrderedN + umericalValues;IDsAndValuesString;C1 C2 C3 C4 C5 C6 C7 C8 C9 C10 C11 C + 12 C13 C14 C15 C16 C17 C18 C19 C20 C21 C22 C23 C24 C25 C26 C27 CS N1 N + 2 N3 N4 N5 N6 N7 N8 N9 N10 N11 N12 N13 N14 NS O1 O2 O3 O4 O5 O6 O7 O8 + O9 O10 O11 O12 OS F Cl Br I Hal P S1 S2 S3 Me1 Me2;5 0 0 0 2 0 0 0 0 1 + 1 0 0 1 0 0 0 14 0 4 2 1 0 0 0 0 0 2 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0... + + FingerprintsVector;EStateIndicies:ArbitrarySize;11;NumericalValues;IDs + AndValuesString;SaaCH SaasC SaasN SdO SdssC SsCH3 SsF SsOH SssCH2 SssN + H SsssCH;24.778 4.387 1.993 25.023 -1.435 3.975 14.006 29.759 -0.073 3 + .024 -2.270 + + FingerprintsVector;EStateIndicies:FixedSize;87;OrderedNumericalValues; + ValuesString;0 0 0 0 0 0 0 3.975 0 -0.073 0 0 24.778 -2.270 0 0 -1.435 + 4.387 0 0 0 0 0 0 3.024 0 0 0 0 0 0 0 1.993 0 29.759 25.023 0 0 0 0 1 + 4.006 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + + FingerprintsVector;ExtendedConnectivity:AtomicInvariantsAtomTypes:Radi + us2;60;AlphaNumericalValues;ValuesString;73555770 333564680 352413391 + 666191900 1001270906 1371674323 1481469939 1977749791 2006158649 21414 + 08799 49532520 64643108 79385615 96062769 273726379 564565671 85514103 + 5 906706094 988546669 1018231313 1032696425 1197507444 1331250018 1338 + 532734 1455473691 1607485225 1609687129 1631614296 1670251330 17303... + + FingerprintsVector;ExtendedConnectivityCount:AtomicInvariantsAtomTypes + :Radius2;60;NumericalValues;IDsAndValuesString;73555770 333564680 3524 + 13391 666191900 1001270906 1371674323 1481469939 1977749791 2006158649 + 2141408799 49532520 64643108 79385615 96062769 273726379 564565671...; + 3 2 1 1 14 1 2 10 4 3 1 1 1 1 2 1 2 1 1 1 2 3 1 1 2 1 3 3 8 2 2 2 6 2 + 1 2 1 1 2 1 1 1 2 1 1 2 1 2 1 1 1 1 1 1 1 1 1 2 1 1 + + FingerprintsBitVector;ExtendedConnectivityBits:AtomicInvariantsAtomTyp + es:Radius2;1024;BinaryString;Ascending;0000000000000000000000000000100 + 0000000001010000000110000011000000000000100000000000000000000000100001 + 1000000110000000000000000000000000010011000000000000000000000000010000 + 0000000000000000000000000010000000000000000001000000000000000000000000 + 0000000000010000100001000000000000101000000000000000100000000000000... + + FingerprintsVector;ExtendedConnectivity:FunctionalClassAtomTypes:Radiu + s2;57;AlphaNumericalValues;ValuesString;24769214 508787397 850393286 8 + 62102353 981185303 1231636850 1649386610 1941540674 263599683 32920567 + 1 571109041 639579325 683993318 723853089 810600886 885767127 90326012 + 7 958841485 981022393 1126908698 1152248391 1317567065 1421489994 1455 + 632544 1557272891 1826413669 1983319256 2015750777 2029559552 20404... + + FingerprintsVector;ExtendedConnectivity:EStateAtomTypes:Radius2;62;Alp + haNumericalValues;ValuesString;25189973 528584866 662581668 671034184 + 926543080 1347067490 1738510057 1759600920 2034425745 2097234755 21450 + 44754 96779665 180364292 341712110 345278822 386540408 387387308 50430 + 1706 617094135 771528807 957666640 997798220 1158349170 1291258082 134 + 1138533 1395329837 1420277211 1479584608 1486476397 1487556246 1566... + + FingerprintsBitVector;MACCSKeyBits;166;BinaryString;Ascending;00000000 + 0000000000000000000000000000000001001000010010000000010010000000011100 + 0100101010111100011011000100110110000011011110100110111111111111011111 + 11111111111110111000 + + FingerprintsBitVector;MACCSKeyBits;322;BinaryString;Ascending;11101011 + 1110011111100101111111000111101100110000000000000011100010000000000000 + 0000000000000000000000000000000000000000000000101000000000000000000000 + 0000000000000000000000000000000000000000000000000000000000000000000000 + 0000000000000000000000000000000000000011000000000000000000000000000000 + 0000000000000000000000000000000000000000 + + FingerprintsVector;MACCSKeyCount;166;OrderedNumericalValues;ValuesStri + ng;0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 1 0 0 3 0 0 0 0 4 0 0 2 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 0 + 0 0 0 0 1 1 8 0 0 0 1 0 0 1 0 1 0 1 0 3 1 3 1 0 0 0 1 2 0 11 1 0 0 0 + 5 0 0 1 2 0 1 1 0 0 0 0 0 1 1 0 1 1 1 1 0 4 0 0 1 1 0 4 6 1 1 1 2 1 1 + 3 5 2 2 0 5 3 5 1 1 2 5 1 2 1 2 4 8 3 5 5 2 2 0 3 5 4 1 + + FingerprintsVector;MACCSKeyCount;322;OrderedNumericalValues;ValuesStri + ng;14 8 2 0 2 0 4 4 2 1 4 0 0 2 5 10 5 2 1 0 0 2 0 5 13 3 28 5 5 3 0 0 + 0 4 2 1 1 0 1 1 0 0 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 22 5 3 0 0 0 1 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11 0 2 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... + + FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes:MinLeng + th1:MaxLength8;1024;BinaryString;Ascending;001000010011010101011000110 + 0100010101011000101001011100110001000010001001101000001001001001001000 + 0010110100000111001001000001001010100100100000000011000000101001011100 + 0010000001000101010100000100111100110111011011011000000010110111001101 + 0101100011000000010001000011000010100011101100001000001000100000000... + + FingerprintsVector;PathLengthCount:AtomicInvariantsAtomTypes:MinLength + 1:MaxLength8;432;NumericalValues;IDsAndValuesPairsString;C.X1.BO1.H3 2 + C.X2.BO2.H2 4 C.X2.BO3.H1 14 C.X3.BO3.H1 3 C.X3.BO4 10 F.X1.BO1 1 N.X + 2.BO2.H1 1 N.X3.BO3 1 O.X1.BO1.H1 3 O.X1.BO2 2 C.X1.BO1.H3C.X3.BO3.H1 + 2 C.X2.BO2.H2C.X2.BO2.H2 1 C.X2.BO2.H2C.X3.BO3.H1 4 C.X2.BO2.H2C.X3.BO + 4 1 C.X2.BO2.H2N.X3.BO3 1 C.X2.BO3.H1:C.X2.BO3.H1 10 C.X2.BO3.H1:C.... + + FingerprintsVector;PathLengthCount:MMFF94AtomTypes:MinLength1:MaxLengt + h8;463;NumericalValues;IDsAndValuesPairsString;C5A 2 C5B 2 C=ON 1 CB 1 + 8 COO 1 CR 9 F 1 N5 1 NC=O 1 O=CN 1 O=CO 1 OC=O 1 OR 2 C5A:C5B 2 C5A:N + 5 2 C5ACB 1 C5ACR 1 C5B:C5B 1 C5BC=ON 1 C5BCB 1 C=ON=O=CN 1 C=ONNC=O 1 + CB:CB 18 CBF 1 CBNC=O 1 COO=O=CO 1 COOCR 1 COOOC=O 1 CRCR 7 CRN5 1 CR + OR 2 C5A:C5B:C5B 2 C5A:C5BC=ON 1 C5A:C5BCB 1 C5A:N5:C5A 1 C5A:N5CR ... + + FingerprintsVector;TopologicalAtomPairs:AtomicInvariantsAtomTypes:MinD + istance1:MaxDistance10;223;NumericalValues;IDsAndValuesString;C.X1.BO1 + .H3-D1-C.X3.BO3.H1 C.X2.BO2.H2-D1-C.X2.BO2.H2 C.X2.BO2.H2-D1-C.X3.BO3. + H1 C.X2.BO2.H2-D1-C.X3.BO4 C.X2.BO2.H2-D1-N.X3.BO3 C.X2.BO3.H1-D1-...; + 2 1 4 1 1 10 8 1 2 6 1 2 2 1 2 1 2 2 1 2 1 5 1 10 12 2 2 1 2 1 9 1 3 1 + 1 1 2 2 1 3 6 1 6 14 2 2 2 3 1 3 1 8 2 2 1 3 2 6 1 2 2 5 1 3 1 23 1... + + FingerprintsVector;TopologicalAtomPairs:FunctionalClassAtomTypes:MinDi + stance1:MaxDistance10;144;NumericalValues;IDsAndValuesString;Ar-D1-Ar + Ar-D1-Ar.HBA Ar-D1-HBD Ar-D1-Hal Ar-D1-None Ar.HBA-D1-None HBA-D1-NI H + BA-D1-None HBA.HBD-D1-NI HBA.HBD-D1-None HBD-D1-None NI-D1-None No...; + 23 2 1 1 2 1 1 1 1 2 1 1 7 28 3 1 3 2 8 2 1 1 1 5 1 5 24 3 3 4 2 13 4 + 1 1 4 1 5 22 4 4 3 1 19 1 1 1 1 1 2 2 3 1 1 8 25 4 5 2 3 1 26 1 4 1 ... + + FingerprintsVector;TopologicalAtomTorsions:AtomicInvariantsAtomTypes;3 + 3;NumericalValues;IDsAndValuesString;C.X1.BO1.H3-C.X3.BO3.H1-C.X3.BO4- + C.X3.BO4 C.X1.BO1.H3-C.X3.BO3.H1-C.X3.BO4-N.X3.BO3 C.X2.BO2.H2-C.X2.BO + 2.H2-C.X3.BO3.H1-C.X2.BO2.H2 C.X2.BO2.H2-C.X2.BO2.H2-C.X3.BO3.H1-O...; + 2 2 1 1 2 2 1 1 3 4 4 8 4 2 2 6 2 2 1 2 1 1 2 1 1 2 6 2 4 2 1 3 1 + + FingerprintsVector;TopologicalAtomTorsions:EStateAtomTypes;36;Numerica + lValues;IDsAndValuesString;aaCH-aaCH-aaCH-aaCH aaCH-aaCH-aaCH-aasC aaC + H-aaCH-aasC-aaCH aaCH-aaCH-aasC-aasC aaCH-aaCH-aasC-sF aaCH-aaCH-aasC- + ssNH aaCH-aasC-aasC-aasC aaCH-aasC-aasC-aasN aaCH-aasC-ssNH-dssC a...; + 4 4 8 4 2 2 6 2 2 2 4 3 2 1 3 3 2 2 2 1 2 1 1 1 2 1 1 1 1 1 1 1 2 1 1 2 + + FingerprintsVector;TopologicalAtomTriplets:AtomicInvariantsAtomTypes:M + inDistance1:MaxDistance10;3096;NumericalValues;IDsAndValuesString;C.X1 + .BO1.H3-D1-C.X1.BO1.H3-D1-C.X3.BO3.H1-D2 C.X1.BO1.H3-D1-C.X2.BO2.H2-D1 + 0-C.X3.BO4-D9 C.X1.BO1.H3-D1-C.X2.BO2.H2-D3-N.X3.BO3-D4 C.X1.BO1.H3-D1 + -C.X2.BO2.H2-D4-C.X2.BO2.H2-D5 C.X1.BO1.H3-D1-C.X2.BO2.H2-D6-C.X3....; + 1 2 2 2 2 2 2 2 8 8 4 8 4 4 2 2 2 2 4 2 2 2 4 2 2 2 2 1 2 2 4 4 4 2 2 + 2 4 4 4 8 4 4 2 4 4 4 2 4 4 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 8... + + FingerprintsVector;TopologicalAtomTriplets:SYBYLAtomTypes:MinDistance1 + :MaxDistance10;2332;NumericalValues;IDsAndValuesString;C.2-D1-C.2-D9-C + .3-D10 C.2-D1-C.2-D9-C.ar-D10 C.2-D1-C.3-D1-C.3-D2 C.2-D1-C.3-D10-C.3- + D9 C.2-D1-C.3-D2-C.3-D3 C.2-D1-C.3-D2-C.ar-D3 C.2-D1-C.3-D3-C.3-D4 C.2 + -D1-C.3-D3-N.ar-D4 C.2-D1-C.3-D3-O.3-D2 C.2-D1-C.3-D4-C.3-D5 C.2-D1-C. + 3-D5-C.3-D6 C.2-D1-C.3-D5-O.3-D4 C.2-D1-C.3-D6-C.3-D7 C.2-D1-C.3-D7... + + FingerprintsVector;TopologicalPharmacophoreAtomPairs:ArbitrarySize:Min + Distance1:MaxDistance10;54;NumericalValues;IDsAndValuesString;H-D1-H H + -D1-NI HBA-D1-NI HBD-D1-NI H-D2-H H-D2-HBA H-D2-HBD HBA-D2-HBA HBA-D2- + HBD H-D3-H H-D3-HBA H-D3-HBD H-D3-NI HBA-D3-NI HBD-D3-NI H-D4-H H-D4-H + BA H-D4-HBD HBA-D4-HBA HBA-D4-HBD HBD-D4-HBD H-D5-H H-D5-HBA H-D5-...; + 18 1 2 1 22 12 8 1 2 18 6 3 1 1 1 22 13 6 5 7 2 28 9 5 1 1 1 36 16 10 + 3 4 1 37 10 8 1 35 10 9 3 3 1 28 7 7 4 18 16 12 5 1 2 1 + + FingerprintsVector;TopologicalPharmacophoreAtomPairs:FixedSize:MinDist + ance1:MaxDistance10;150;OrderedNumericalValues;ValuesString;18 0 0 1 0 + 0 0 2 0 0 1 0 0 0 0 22 12 8 0 0 1 2 0 0 0 0 0 0 0 0 18 6 3 1 0 0 0 1 + 0 0 1 0 0 0 0 22 13 6 0 0 5 7 0 0 2 0 0 0 0 0 28 9 5 1 0 0 0 1 0 0 1 0 + 0 0 0 36 16 10 0 0 3 4 0 0 1 0 0 0 0 0 37 10 8 0 0 0 0 1 0 0 0 0 0 0 + 0 35 10 9 0 0 3 3 0 0 1 0 0 0 0 0 28 7 7 4 0 0 0 0 0 0 0 0 0 0 0 18... + + FingerprintsVector;TopologicalPharmacophoreAtomTriplets:ArbitrarySize: + MinDistance1:MaxDistance10;696;NumericalValues;IDsAndValuesString;Ar1- + Ar1-Ar1 Ar1-Ar1-H1 Ar1-Ar1-HBA1 Ar1-Ar1-HBD1 Ar1-H1-H1 Ar1-H1-HBA1 Ar1 + -H1-HBD1 Ar1-HBA1-HBD1 H1-H1-H1 H1-H1-HBA1 H1-H1-HBD1 H1-HBA1-HBA1 H1- + HBA1-HBD1 H1-HBA1-NI1 H1-HBD1-NI1 HBA1-HBA1-NI1 HBA1-HBD1-NI1 Ar1-...; + 46 106 8 3 83 11 4 1 21 5 3 1 2 2 1 1 1 100 101 18 11 145 132 26 14 23 + 28 3 3 5 4 61 45 10 4 16 20 7 5 1 3 4 5 3 1 1 1 1 5 4 2 1 2 2 2 1 1 1 + 119 123 24 15 185 202 41 25 22 17 3 5 85 95 18 11 23 17 3 1 1 6 4 ... + + FingerprintsVector;TopologicalPharmacophoreAtomTriplets:FixedSize:MinD + istance1:MaxDistance10;2692;OrderedNumericalValues;ValuesString;46 106 + 8 3 0 0 83 11 4 0 0 0 1 0 0 0 0 0 0 0 0 21 5 3 0 0 1 2 2 0 0 1 0 0 0 + 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 100 101 18 11 0 0 145 132 26 + 14 0 0 23 28 3 3 0 0 5 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 61 45 10 4 0 + 0 16 20 7 5 1 0 3 4 5 3 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 5 ... + +=head1 OPTIONS + +=over 4 + +=item B<-a, --all> + +List all the available information. + +=item B<--AverageBitDensity> + +List average bit density of fingerprint bit-vector strings. + +=item B<--BitDensity> + +List bit density of fingerprints bit-vector strings data in each row. + +=item B<--count> + +List number of data entries containing fingerprints bit-vector or vector strings data. This +is B<default behavior>. + +=item B<-c, --ColMode> I<ColNum | ColLabel> + +Specify how columns are identified in CSV/TSV I<TextFile(s)>: using column number or column +label. Possible values: I<ColNum or ColLabel>. Default value: I<ColNum> + +=item B<-d, --detail> I<InfoLevel> + +Level of information to print about lines being ignored. Default: I<1>. Possible values: +I<1, 2 or 3>. + +=item B<--DataCheck> + +Validate fingerprints data specified using B<--FingerprintsCol> and list information +about missing and invalid data. + +=item B<-e, --empty> + +List number of rows containing no fingerprints data. + +=item B<--FingerprintsCol> I<col number | col name> + +This value is B<-c, --colmode> specific. It corresponds to column in CSV/TSV I<TextFile(s)> +containing fingerprints data. Possible values: I<col number or col label>. +Default value: I<first column containing the word Fingerprints in its column label>. + +=item B<--FingerprintsField> I<FieldLabel> + +Fingerprints field label to use during listing of fingerprints information for I<SDFile(s)>. +Default value: I<first data field label containing the word Fingerprints in its label>. + +=item B<--FingerprintsType> + +List types of fingerprint strings: FingerprintsBitVector or FingerprintsVector. + +=item B<--FingerprintsDescription> + +List types of fingerprints: PathLengthBits, PathLengthCount, MACCSKeyCount, +ExtendedConnectivity and so on. + +=item B<--FingerprintsSize> + +List size of fingerprints. + +=item B<--FingerprintsBitStringFormat> + +List format of fingerprint bit-vector strings: BinaryString or HexadecimalString. + +=item B<--FingerprintsBitOrder> + +List order of bits data in fingerprint bit-vector bit strings: Ascending or Descending. + +=item B<--FingerprintsVectorValuesType> + +List type of values in fingerprint vector strings: OrderedNumericalValues, NumericalValues or +AlphaNumericalValues. + +=item B<--FingerprintsVectorValuesFormat> + +List format of values in fingerprint vector strings: ValuesString, IDsAndValuesString, +IDsAndValuesPairsString, ValuesAndIDsString or ValuesAndIDsPairsString. + +=item B<-h, --help> + +Print this help message. + +=item B<--InDelim> I<comma | semicolon> + +Input delimiter for CSV I<TextFile(s)>. Possible values: I<comma or semicolon>. +Default value: I<comma>. For TSV files, this option is ignored and I<tab> is used as a +delimiter. + +=item B<--NumOfOnBits> + +List number of on bits in fingerprints bit-vector strings data in each row. + +=item B<--NumOfNonZeroValues> + +List number of non-zero values in fingerprints vector strings data in each row. + +=item B<-w, --WorkingDir> I<DirName> + +Location of working directory. Default: current directory. + +=back + +=head1 EXAMPLES + +To count number of lines containing fingerprints bit-vector or vector strings data present +in FP file, in a column name containing Fingerprint substring in text file, and in a data +field with Fingerprint substring in its label, type: + + % InfoFingerprintsFiles.pl SampleFPBin.csv + + % InfoFingerprintsFiles.pl SampleFPBin.sdf SampleFPBin.fpf + SampleFPBin.csv + + % InfoFingerprintsFiles.pl SampleFPHex.sdf SampleFPHex.fpf + SampleFPHex.csv + + % InfoFingerprintsFiles.pl SampleFPcount.sdf SampleFPcount.fpf + SampleFPcount.csv + +To list all available information about fingerprints bit-vector or vector strings data present +in FP file, in a column name containing Fingerprint substring in text file, and in a data +field with Fingerprint substring in its label, type: + + % InfoFingerprintsFiles.pl -a SampleFPHex.sdf SampleFPHex.fpf + SampleFPHex.csv + + % InfoFingerprintsFiles.pl -a SampleFPcount.sdf SampleFPcount.fpf + SampleFPcount.csv + +To list all available information about fingerprints bit-vector or vector strings data present in a +column named Fingerprints in text file, type: + + % InfoFingerprintsFiles.pl -a --ColMode ColLabel --FingerprintsCol + Fingerprints SampleFPHex.sdf + + % InfoFingerprintsFiles.pl -a --ColMode ColLabel --FingerprintsCol + Fingerprints SampleFPcount.csv + +To list all available information about fingerprints bit-vector or vector strings data present in a +data field names Fingerprints in SD file, type: + + % InfoFingerprintsFiles.pl -a --FingerprintsField Fingerprints + SampleFPHex.sdf + + % InfoFingerprintsFiles.pl -a --FingerprintsField Fingerprints + SampleFPcount.sdf + +To list bit density, average bit density, and number of on bits for fingerprints bit-vector strings data +present in FP file, in a column name containing Fingerprint substring in text file, and in a data +field with Fingerprint substring in its label, type: + + % InfoFingerprintsFiles.pl --BitDensity --AverageBitDensity + --NumOfOnBits SampleFPBin.csv SampleFPBin.sdf SampleFPBin.fpf + +To list vector values type, format and number of non-zero values for fingerprints vector strings +data present in FP file, in a column name containing Fingerprint substring in text file, and in a data +field with Fingerprint substring in its label along with fingerprints type and description, type: + + % InfoFingerprintsFiles.pl --FingerprintsType --FingerprintsDescription + --FingerprintsVectorValuesType --FingerprintsVectorValuesFormat + --NumOfNonZeroValues SampleFPcount.csv SampleFPcount.sdf + SampleFPcount.fpf + +=head1 AUTHOR + +Manish Sud <msud@san.rr.com> + +=head1 SEE ALSO + +SimilarityMatricesFingerprints.pl, SimilaritySearchingFingerprints.pl, AtomNeighborhoodsFingerprints.pl, +AtomNeighborhoodsFingerprints.pl, ExtendedConnectivityFingerprints.pl, MACCSKeysFingerprints.pl, +PathLengthFingerprints.pl, TopologicalAtomPairsFingerprints.pl, TopologicalAtomTorsionsFingerprints.pl, +TopologicalPharmacophoreAtomPairsFingerprints.pl, TopologicalPharmacophoreAtomTripletsFingerprints.pl + +=head1 COPYRIGHT + +Copyright (C) 2015 Manish Sud. All rights reserved. + +This file is part of MayaChemTools. + +MayaChemTools is free software; you can redistribute it and/or modify it under +the terms of the GNU Lesser General Public License as published by the Free +Software Foundation; either version 3 of the License, or (at your option) +any later version. + +=cut