annotate mayachemtools/bin/InfoSDFiles.pl @ 9:ab29fa5c8c1f draft default tip

Uploaded
author deepakjadmin
date Thu, 15 Dec 2016 14:18:03 -0500
parents 73ae111cf86f
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1 #!/usr/bin/perl -w
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
2 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
3 # $RCSfile: InfoSDFiles.pl,v $
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
4 # $Date: 2015/02/28 20:46:20 $
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
5 # $Revision: 1.35 $
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
6 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
7 # Author: Manish Sud <msud@san.rr.com>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
8 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
10 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
11 # This file is part of MayaChemTools.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
12 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
14 # the terms of the GNU Lesser General Public License as published by the Free
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
15 # Software Foundation; either version 3 of the License, or (at your option) any
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
16 # later version.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
17 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
18 # MayaChemTools is distributed in the hope that it will be useful, but without
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
19 # any warranty; without even the implied warranty of merchantability of fitness
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
20 # for a particular purpose. See the GNU Lesser General Public License for more
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
21 # details.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
22 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
23 # You should have received a copy of the GNU Lesser General Public License
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
26 # Boston, MA, 02111-1307, USA.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
27 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
28
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
29 use strict;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
30 use FindBin; use lib "$FindBin::Bin/../lib";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
31 use Getopt::Long;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
32 use File::Basename;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
33 use Benchmark;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
34 use SDFileUtil;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
35 use TextUtil;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
36 use FileUtil;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
37
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
38 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
39
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
40 # Autoflush STDOUT
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
41 $| = 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
42
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
43 # Starting message...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
44 $ScriptName = basename $0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
45 print "\n$ScriptName:Starting...\n\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
46 $StartTime = new Benchmark;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
47
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
48 # Get the options and setup script...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
49 SetupScriptUsage();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
50 if ($Options{help} || @ARGV < 1) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
51 die GetUsageFromPod("$FindBin::Bin/$ScriptName");
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
52 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
53
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
54 my(@SDFilesList);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
55 @SDFilesList = ExpandFileNames(\@ARGV, "sdf sd");
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
56
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
57 # Process options...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
58 print "Processing options...\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
59 my(%OptionsInfo);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
60 ProcessOptions();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
61
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
62 # Setup information about input files...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
63 print "Checking input SD file(s)...\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
64 my(%SDFilesInfo, %SDCmpdsInfo);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
65 RetrieveSDFilesInfo();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
66 InitializeSDCmpdsInfo();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
67
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
68 # Process input files..
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
69 my($FileIndex);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
70 if (@SDFilesList > 1) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
71 print "\nProcessing SD files...\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
72 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
73 for $FileIndex (0 .. $#SDFilesList) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
74 if ($SDFilesInfo{FileOkay}[$FileIndex]) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
75 print "\nProcessing file $SDFilesList[$FileIndex]...\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
76 ListSDFileInfo($FileIndex);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
77 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
78 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
79 ListTotalSizeOfFiles();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
80
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
81 print "\n$ScriptName:Done...\n\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
82
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
83 $EndTime = new Benchmark;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
84 $TotalTime = timediff ($EndTime, $StartTime);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
85 print "Total time: ", timestr($TotalTime), "\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
86
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
87 ###############################################################################
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
88
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
89 # List appropriate information...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
90 sub ListSDFileInfo {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
91 my($Index) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
92 my($SDFile);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
93
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
94 $SDFile = $SDFilesList[$Index];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
95
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
96 if ($OptionsInfo{ProcessCmpdInfo}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
97 ListCompoundDetailsInfo($Index);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
98 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
99 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
100 ListCompoundCountInfo($Index);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
101 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
102
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
103 # File size and modification information...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
104 print "\nFile size: ", FormatFileSize($SDFilesInfo{FileSize}[$Index]), " \n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
105 print "Last modified: ", $SDFilesInfo{FileLastModified}[$Index], " \n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
106 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
107
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
108 # List number of compounds in SD file...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
109 sub ListCompoundCountInfo {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
110 my($Index) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
111 my($SDFile, $CmpdCount);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
112
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
113 $SDFile = $SDFilesList[$Index];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
114
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
115 $CmpdCount = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
116
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
117 open SDFILE, "$SDFile" or die "Couldn't open $SDFile: $! \n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
118 while (<SDFILE>) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
119 if (/^\$\$\$\$/) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
120 $CmpdCount++;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
121 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
122 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
123 close SDFILE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
124
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
125 $SDCmpdsInfo{TotalCmpdCount} += $CmpdCount;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
126
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
127 print "\nNumber of compounds: $CmpdCount\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
128 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
129
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
130 # List detailed compound information...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
131 sub ListCompoundDetailsInfo {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
132 my($Index) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
133 my($SDFile, $CmpdCount, $EmptyCtabBlocksCount, $MismatchCtabBlockCount, $ChiralCtabBlockCount, $UnknownAtomsCtabBlockCount, $InvalidAtomNumbersCtabBlockCount, $SaltsCtabBlockCount, $CtabLinesCount, $PrintCmpdCounterHeader, $ProblematicCmpdData, $CmpdString, @CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
134
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
135 $SDFile = $SDFilesList[$Index];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
136
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
137 ($CmpdCount, $EmptyCtabBlocksCount, $MismatchCtabBlockCount, $ChiralCtabBlockCount, $UnknownAtomsCtabBlockCount, $InvalidAtomNumbersCtabBlockCount, $SaltsCtabBlockCount) = (0) x 7;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
138
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
139 InitializeSDCmpdsInfo();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
140
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
141 $PrintCmpdCounterHeader = 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
142
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
143 open SDFILE, "$SDFile" or die "Couldn't open $SDFile: $! \n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
144 while ($CmpdString = ReadCmpdString(\*SDFILE)) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
145 $CmpdCount++;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
146 $ProblematicCmpdData = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
147 if ($OptionsInfo{Detail} <= 1) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
148 if (($CmpdCount % 5000) == 0) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
149 if ($PrintCmpdCounterHeader) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
150 $PrintCmpdCounterHeader = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
151 print "Processing compounds:";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
152 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
153 print "$CmpdCount...";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
154 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
155 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
156 @CmpdLines = split "\n", $CmpdString;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
157 $CtabLinesCount = GetCtabLinesCount(\@CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
158 if ($OptionsInfo{All} || $OptionsInfo{Empty}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
159 if ($CtabLinesCount <= 0) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
160 $EmptyCtabBlocksCount++;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
161 $ProblematicCmpdData = 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
162 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
163 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
164 if ($CtabLinesCount > 0) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
165 my ($AtomCount, $BondCount, $ChiralFlag) = ParseCmpdCountsLine($CmpdLines[3]);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
166 if ($OptionsInfo{All} || $OptionsInfo{Mismatch}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
167 if ($CtabLinesCount != ($AtomCount + $BondCount)) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
168 $MismatchCtabBlockCount++;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
169 $ProblematicCmpdData = 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
170 if ($OptionsInfo{Detail} >= 2) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
171 print "\nMismatch found: Ctab lines count: $CtabLinesCount; Atoms count: $AtomCount; Bond count: $BondCount\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
172 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
173 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
174 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
175 if ($OptionsInfo{All} || $OptionsInfo{Chiral}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
176 if ($ChiralFlag == 1) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
177 $ChiralCtabBlockCount++;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
178 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
179 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
180 if ($CtabLinesCount == ($AtomCount + $BondCount)) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
181 if ($OptionsInfo{All} || $OptionsInfo{UnknownAtoms}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
182 my($UnknownAtomCount, $UnknownAtoms, $UnknownAtomLines) = GetUnknownAtoms(\@CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
183 if ($UnknownAtomCount) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
184 $UnknownAtomsCtabBlockCount++;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
185 $ProblematicCmpdData = 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
186 if ($OptionsInfo{Detail} >= 2) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
187 print "\nUnknown atom(s) found: $UnknownAtomCount\nUnknown atom(s) symbols:$UnknownAtoms\nUnknown atom(s) data lines:\n$UnknownAtomLines\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
188 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
189 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
190 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
191 if ($OptionsInfo{All} || $OptionsInfo{InvalidAtomNumbers}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
192 my($InvalidAtomNumbersCount, $InvalidAtomNumbers, $InvalidAtomNumberLines) = GetInvalidAtomNumbers(\@CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
193 if ($InvalidAtomNumbersCount) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
194 $InvalidAtomNumbersCtabBlockCount++;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
195 $ProblematicCmpdData = 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
196 if ($OptionsInfo{Detail} >= 2) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
197 print "\nInvalid atom number(s) found: $InvalidAtomNumbersCount\nInvalid atom number(s):$InvalidAtomNumbers\nInvalid atom number(s) data lines:\n$InvalidAtomNumberLines\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
198 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
199 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
200 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
201 if ($OptionsInfo{All} || $OptionsInfo{Salts}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
202 my($FragmentsCount, $Fragments) = GetCmpdFragments(\@CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
203 if ($FragmentsCount > 1) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
204 $SaltsCtabBlockCount++;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
205 $ProblematicCmpdData = 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
206 if ($OptionsInfo{Detail} >= 2) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
207 print "\nSalts found: $FragmentsCount\nSalts atom numbers:\n$Fragments\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
208 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
209 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
210 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
211 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
212 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
213 if ($OptionsInfo{ProcessCmpdData}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
214 ProcessCmpdInfo(\@CmpdLines, $CmpdCount);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
215 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
216 if ($OptionsInfo{Detail} >= 3) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
217 if ($ProblematicCmpdData) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
218 print "\nCompound data:\n$CmpdString\n\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
219 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
220 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
221 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
222 if ($OptionsInfo{Detail} <= 1) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
223 if (!$PrintCmpdCounterHeader) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
224 print "\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
225 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
226 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
227 close SDFILE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
228
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
229 $SDCmpdsInfo{TotalCmpdCount} += $CmpdCount;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
230
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
231 print "\nNumber of compounds: $CmpdCount\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
232
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
233 if ($OptionsInfo{All} || $OptionsInfo{Empty}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
234 print "Number of empty atom/bond blocks: $EmptyCtabBlocksCount\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
235 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
236 if ($OptionsInfo{All} || $OptionsInfo{Mismatch}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
237 print "Number of mismatched atom/bond blocks: $MismatchCtabBlockCount\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
238 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
239 if ($OptionsInfo{All} || $OptionsInfo{UnknownAtoms}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
240 print "Number of atom blocks with unknown atom labels: $UnknownAtomsCtabBlockCount\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
241 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
242 if ($OptionsInfo{All} || $OptionsInfo{InvalidAtomNumbers}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
243 print "Number of bond blocks and atom property blocks with invalid atom numbers: $InvalidAtomNumbersCtabBlockCount\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
244 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
245 if ($OptionsInfo{All} || $OptionsInfo{Salts}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
246 print "Number of atom blocks containing salts: $SaltsCtabBlockCount\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
247 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
248 if ($OptionsInfo{All} || $OptionsInfo{Chiral}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
249 print "Number of chiral atom/bond blocks: $ChiralCtabBlockCount\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
250 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
251 if ($OptionsInfo{ProcessCmpdData}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
252 PrintCmpdInfoSummary();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
253 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
254
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
255 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
256
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
257 # Initialize compound data information for a SD file...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
258 sub InitializeSDCmpdsInfo {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
259
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
260 if (!exists $SDCmpdsInfo{TotalCmpdCount}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
261 $SDCmpdsInfo{TotalCmpdCount} = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
262 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
263
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
264 @{$SDCmpdsInfo{FieldLabels}} = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
265 %{$SDCmpdsInfo{FieldLabelsMap}} = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
266 %{$SDCmpdsInfo{NonEmptyFieldValuesCountMap}} = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
267 %{$SDCmpdsInfo{EmptyFieldValuesCountMap}} = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
268 %{$SDCmpdsInfo{NonNumericalFieldValuesCountMap}} = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
269 %{$SDCmpdsInfo{NumericalFieldValuesCountMap}} = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
270 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
271
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
272 # Process compound data header labels and figure out which ones are present for
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
273 # all the compounds...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
274 sub ProcessCmpdInfo {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
275 my($CmpdLinesRef, $CmpdCount) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
276 my($Label);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
277
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
278 if (@{$SDCmpdsInfo{FieldLabels}}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
279 my (@CmpdFieldLabels) = GetCmpdDataHeaderLabels($CmpdLinesRef);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
280 my(%CmpdFieldLabelsMap) = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
281 # Setup a map for the current labels...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
282 for $Label (@CmpdFieldLabels) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
283 $CmpdFieldLabelsMap{$Label} = "PresentInSome";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
284 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
285 # Check the presence old labels for this compound; otherwise, mark 'em new...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
286 for $Label (@{$SDCmpdsInfo{FieldLabels}}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
287 if (!$CmpdFieldLabelsMap{$Label}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
288 $SDCmpdsInfo{FieldLabelsMap}{$Label} = "PresentInSome";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
289 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
290 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
291 # Check the presence this compound in the old labels; otherwise, add 'em...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
292 for $Label (@CmpdFieldLabels ) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
293 if (!$SDCmpdsInfo{FieldLabelsMap}{$Label}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
294 # It's a new label...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
295 push @{$SDCmpdsInfo{FieldLabels}}, $Label;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
296 $SDCmpdsInfo{FieldLabelsMap}{$Label} = "PresentInSome";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
297 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
298 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
299 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
300 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
301 # Get the initial label set and set up a map...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
302 @{$SDCmpdsInfo{FieldLabels}} = GetCmpdDataHeaderLabels($CmpdLinesRef);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
303 for $Label (@{$SDCmpdsInfo{FieldLabels}}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
304 $SDCmpdsInfo{FieldLabelsMap}{$Label} = "PresentInAll";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
305 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
306 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
307 if ($OptionsInfo{CountEmptyData} || $OptionsInfo{CheckData}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
308 # Count empty data field values...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
309 my(%DataFieldAndValues, $Label, $Value);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
310
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
311 %DataFieldAndValues = GetCmpdDataHeaderLabelsAndValues($CmpdLinesRef);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
312 for $Label (keys %DataFieldAndValues) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
313 $Value = $DataFieldAndValues{$Label};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
314 if ($OptionsInfo{CountEmptyData}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
315 if (IsNotEmpty($Value)) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
316 if (exists($SDCmpdsInfo{NonEmptyFieldValuesCountMap}{$Label})) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
317 $SDCmpdsInfo{NonEmptyFieldValuesCountMap}{$Label} += 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
318 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
319 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
320 $SDCmpdsInfo{NonEmptyFieldValuesCountMap}{$Label} = 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
321 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
322 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
323 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
324 if ($Options{detail} >= 2) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
325 print "Compound record $CmpdCount: Empty data field <$Label>\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
326 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
327 if (exists($SDCmpdsInfo{EmptyFieldValuesCountMap}{$Label})) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
328 $SDCmpdsInfo{EmptyFieldValuesCountMap}{$Label} += 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
329 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
330 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
331 $SDCmpdsInfo{EmptyFieldValuesCountMap}{$Label} = 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
332 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
333 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
334 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
335 if ($OptionsInfo{CheckData}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
336 if (IsNumerical($Value)) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
337 if (exists($SDCmpdsInfo{NumericalFieldValuesCountMap}{$Label})) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
338 $SDCmpdsInfo{NumericalFieldValuesCountMap}{$Label} += 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
339 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
340 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
341 $SDCmpdsInfo{NumericalFieldValuesCountMap}{$Label} = 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
342 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
343 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
344 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
345 if (exists($SDCmpdsInfo{NonNumericalFieldValuesCountMap}{$Label})) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
346 $SDCmpdsInfo{NonNumericalFieldValuesCountMap}{$Label} += 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
347 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
348 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
349 $SDCmpdsInfo{NonNumericalFieldValuesCountMap}{$Label} = 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
350 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
351 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
352 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
353 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
354 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
355 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
356
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
357 # Print compound summary...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
358 sub PrintCmpdInfoSummary {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
359 if (@{$SDCmpdsInfo{FieldLabels}}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
360 my($PresentInAllCount, $Label, @FieldLabelsPresentInSome, @FieldLabelsPresentInAll);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
361
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
362 @FieldLabelsPresentInSome = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
363 @FieldLabelsPresentInAll = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
364
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
365 $PresentInAllCount = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
366 print "\nNumber of data fields: ", scalar(@{$SDCmpdsInfo{FieldLabels}}), "\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
367 print "All data field labels: ";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
368 for $Label (sort keys %{$SDCmpdsInfo{FieldLabelsMap}}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
369 print "<$Label> ";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
370 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
371 print "\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
372 for $Label (sort keys %{$SDCmpdsInfo{FieldLabelsMap}}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
373 if ($SDCmpdsInfo{FieldLabelsMap}{$Label} eq "PresentInAll") {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
374 $PresentInAllCount++;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
375 push @FieldLabelsPresentInAll, $Label;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
376 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
377 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
378 if ($PresentInAllCount != @{$SDCmpdsInfo{FieldLabels}}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
379 print "Data field labels present in all compounds: ";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
380 for $Label (sort keys %{$SDCmpdsInfo{FieldLabelsMap}}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
381 if ($SDCmpdsInfo{FieldLabelsMap}{$Label} eq "PresentInAll") {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
382 print "<$Label> ";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
383 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
384 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
385 print "\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
386 print "Data field labels present in some compounds: ";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
387 for $Label (sort keys %{$SDCmpdsInfo{FieldLabelsMap}}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
388 if ($SDCmpdsInfo{FieldLabelsMap}{$Label} eq "PresentInSome") {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
389 print "<$Label> ";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
390 push @FieldLabelsPresentInSome, $Label;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
391 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
392 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
393 print "\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
394 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
395 # List empty data field values count...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
396 if ($OptionsInfo{CountEmptyData}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
397 print "\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
398 if ($PresentInAllCount == @{$SDCmpdsInfo{FieldLabels}}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
399 PrintDataInformation("Number of non-empty values for data field(s)", \@{$SDCmpdsInfo{FieldLabels}}, \%{$SDCmpdsInfo{NonEmptyFieldValuesCountMap}});
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
400 PrintDataInformation("Number of empty values for data field(s)", \@{$SDCmpdsInfo{FieldLabels}}, \%{$SDCmpdsInfo{EmptyFieldValuesCountMap}});
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
401 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
402 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
403 PrintDataInformation("Number of non-empty values for data field(s) present in all compounds", \@FieldLabelsPresentInAll, \%{$SDCmpdsInfo{NonEmptyFieldValuesCountMap}});
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
404 PrintDataInformation("Number of empty values for data field(s) present in all compounds", \@FieldLabelsPresentInAll, \%{$SDCmpdsInfo{EmptyFieldValuesCountMap}});
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
405 PrintDataInformation("Number of non-empty values for data field(s) present in some compounds", \@FieldLabelsPresentInSome, \%{$SDCmpdsInfo{NonEmptyFieldValuesCountMap}});
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
406 PrintDataInformation("Number of empty values for data field(s) present in some compounds", \@FieldLabelsPresentInSome, \%{$SDCmpdsInfo{EmptyFieldValuesCountMap}});
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
407 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
408 print "\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
409 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
410 # List numerical data values count...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
411 if ($OptionsInfo{CheckData}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
412 print "\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
413 if ($PresentInAllCount == @{$SDCmpdsInfo{FieldLabels}}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
414 PrintDataInformation("Number of non-numerical values for data field(s)", \@{$SDCmpdsInfo{FieldLabels}}, \%{$SDCmpdsInfo{NonNumericalFieldValuesCountMap}});
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
415 PrintDataInformation("Number of numerical values for data field(s)", \@{$SDCmpdsInfo{FieldLabels}}, \%{$SDCmpdsInfo{NumericalFieldValuesCountMap}});
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
416 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
417 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
418 PrintDataInformation("Number of non-numerical values for data field(s) present in all compounds", \@FieldLabelsPresentInAll, \%{$SDCmpdsInfo{NonNumericalFieldValuesCountMap}});
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
419 PrintDataInformation("Number of numerical values for data field(s) present in all compounds", \@FieldLabelsPresentInAll, \%{$SDCmpdsInfo{NumericalFieldValuesCountMap}});
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
420 PrintDataInformation("Number of non-numerical values for data field(s) present in some compounds", \@FieldLabelsPresentInSome, \%{$SDCmpdsInfo{NonNumericalFieldValuesCountMap}});
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
421 PrintDataInformation("Number of numerical values for data field(s) present in some compounds", \@FieldLabelsPresentInSome, \%{$SDCmpdsInfo{NumericalFieldValuesCountMap}});
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
422 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
423 print "\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
424 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
425 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
426 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
427 print "\nNumber of data fields: 0\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
428 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
429 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
430 # List data information...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
431 sub PrintDataInformation {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
432 my($InfoLabel, $DataLabelRef, $DataLabelToValueMapRef) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
433 my($Line, $Label);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
434
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
435 $Line = "";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
436 for $Label (@{$DataLabelRef}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
437 $Line .= " <$Label> - " . (exists($DataLabelToValueMapRef->{$Label}) ? $DataLabelToValueMapRef->{$Label} : 0) . ",";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
438 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
439 $Line =~ s/\,$//g;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
440 print "$InfoLabel: $Line\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
441 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
442
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
443 # Total size of all the files...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
444 sub ListTotalSizeOfFiles {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
445 my($FileOkayCount, $TotalSize, $Index);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
446
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
447 $FileOkayCount = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
448 $TotalSize = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
449
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
450 for $Index (0 .. $#SDFilesList) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
451 if ($SDFilesInfo{FileOkay}[$Index]) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
452 $FileOkayCount++;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
453 $TotalSize += $SDFilesInfo{FileSize}[$Index];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
454 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
455 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
456 if ($FileOkayCount > 1) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
457 print "\nTotal number of compounds in $FileOkayCount SD files: $SDCmpdsInfo{TotalCmpdCount}\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
458 print "\nTotal size of $FileOkayCount SD files: ", FormatFileSize($TotalSize), "\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
459 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
460
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
461 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
462
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
463 # Retrieve information about SD files...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
464 sub RetrieveSDFilesInfo {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
465 my($Index, $SDFile, $ModifiedTimeString, $ModifiedDateString);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
466
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
467 %SDCmpdsInfo = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
468
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
469 %SDFilesInfo = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
470 @{$SDFilesInfo{FileOkay}} = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
471 @{$SDFilesInfo{FileSize}} = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
472 @{$SDFilesInfo{FileLastModified}} = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
473
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
474 FILELIST: for $Index (0 .. $#SDFilesList) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
475 $SDFilesInfo{FileOkay}[$Index] = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
476 $SDFilesInfo{FileSize}[$Index] = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
477 $SDFilesInfo{FileLastModified}[$Index] = '';
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
478
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
479 $SDFile = $SDFilesList[$Index];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
480 if (!(-e $SDFile)) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
481 warn "Warning: Ignoring file $SDFile: It doesn't exist\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
482 next FILELIST;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
483 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
484 if (!CheckFileType($SDFile, "sdf sd")) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
485 warn "Warning: Ignoring file $SDFile: It's not a SD file\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
486 next FILELIST;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
487 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
488 if (! open SDFILE, "$SDFile") {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
489 warn "Warning: Ignoring file $SDFile: Couldn't open it: $! \n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
490 next FILELIST;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
491 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
492 close SDFILE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
493
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
494 $SDFilesInfo{FileOkay}[$Index] = 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
495 $SDFilesInfo{FileSize}[$Index] = FileSize($SDFile);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
496 ($ModifiedTimeString, $ModifiedDateString) = FormattedFileModificationTimeAndDate($SDFile);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
497 $SDFilesInfo{FileLastModified}[$Index] = "$ModifiedTimeString; $ModifiedDateString";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
498 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
499 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
500
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
501 # Process option values...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
502 sub ProcessOptions {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
503 %OptionsInfo = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
504
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
505 $OptionsInfo{All} = $Options{all} ? $Options{all} : 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
506 $OptionsInfo{Chiral} = $Options{chiral} ? $Options{chiral} : 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
507 $OptionsInfo{Count} = $Options{count} ? $Options{count} : 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
508 $OptionsInfo{DataCheck} = $Options{datacheck} ? $Options{datacheck} : 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
509 $OptionsInfo{Empty} = $Options{empty} ? $Options{empty} : 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
510 $OptionsInfo{Fields} = $Options{fields} ? $Options{fields} : 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
511 $OptionsInfo{InvalidAtomNumbers} = $Options{invalidatomnumbers} ? $Options{invalidatomnumbers} : 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
512 $OptionsInfo{Mismatch} = $Options{mismatch} ? $Options{mismatch} : 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
513 $OptionsInfo{Salts} = $Options{salts} ? $Options{salts} : 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
514 $OptionsInfo{UnknownAtoms} = $Options{unknownatoms} ? $Options{unknownatoms} : 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
515
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
516 $OptionsInfo{Detail} = $Options{detail};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
517
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
518 $OptionsInfo{ProcessCmpdInfo} = ($Options{all} || $Options{chiral} || $Options{empty} || $Options{fields} || $Options{invalidatomnumbers} || $Options{mismatch} || $Options{salts} || $Options{unknownatoms} || $Options{datacheck}) ? 1 : 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
519
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
520 $OptionsInfo{ProcessCmpdData} = ($Options{all} || $Options{fields} || $Options{empty} || $Options{datacheck}) ? 1 : 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
521
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
522 $OptionsInfo{CountEmptyData} = ($Options{all} || $Options{empty}) ? 1 : 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
523 $OptionsInfo{CheckData} = ($Options{all} || $Options{datacheck}) ? 1 : 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
524 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
525
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
526 # Setup script usage and retrieve command line arguments specified using various options...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
527 sub SetupScriptUsage {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
528
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
529 # Setup default and retrieve all the options...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
530 %Options = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
531 $Options{detail} = 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
532 if (!GetOptions(\%Options, "all|a", "count|c", "chiral", "datacheck", "detail|d:i", "empty|e", "fields|f", "help|h", "invalidatomnumbers|i", "mismatch|m", "salts|s", "unknownatoms|u", "workingdir|w=s")) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
533 die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
534 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
535 if ($Options{workingdir}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
536 if (! -d $Options{workingdir}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
537 die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
538 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
539 chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
540 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
541 if ($Options{detail} <= 0 || $Options{detail} > 3) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
542 die "Error: The value specified, $Options{detail}, for option \"-d --detail\" is not valid. Possible values: 1 to 3\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
543 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
544 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
545
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
546 __END__
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
547
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
548 =head1 NAME
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
549
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
550 InfoSDFiles.pl - List information about SDFile(s)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
551
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
552 =head1 SYNOPSIS
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
553
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
554 InfoSDFile.pl SDFile(s)...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
555
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
556 InfoSDFile.pl [B<-a --all>] [B<-c --count>] [B<--chiral>] [B<--datacheck>]
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
557 [B<-d --detail> infolevel] [B<-e --empty>] [B<-f, --fields>] [B<-h, --help>]
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
558 [B<-i, --invalidatomnumbers>] [B<-m, --mismatch>] [B<-s, --salts>] [B<-u, --unknownatoms>]
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
559 [B<-w, --workingdir> dirname] SDFile(s)...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
560
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
561 =head1 DESCRIPTION
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
562
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
563 List information about I<SDFile(s)> contents: number of compounds, empty records
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
564 and so on. Multiple SDFile names are separated by spaces. The valid file extensions
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
565 are I<.sdf> and I<.sd>. All other file names are ignored. All the SD files in a current
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
566 directory can be specified either by I<*.sdf> or the current directory name.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
567
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
568 =head1 OPTIONS
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
569
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
570 =over 4
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
571
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
572 =item B<-a, --all>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
573
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
574 List all the available information.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
575
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
576 =item B<-c, --count>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
577
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
578 List number of compounds. This is B<default behavior>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
579
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
580 =item B<--chiral>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
581
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
582 List number of empty atom/bond blocks for compounds with chiral flag set in
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
583 count line.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
584
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
585 =item B<-d, --detail> I<infolevel>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
586
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
587 Level of information to print. Default: 1. Possible values: I<1, 2, or 3>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
588
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
589 =item B<--datacheck>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
590
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
591 List number of numerical and non-numerical values for each data field.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
592
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
593 =item B<-e, --empty>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
594
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
595 List number of empty atom/bond blocks and data fields for compounds.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
596
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
597 =item B<-f, --fields>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
598
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
599 List data field labels present for compounds.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
600
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
601 =item B<-h, --help>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
602
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
603 Print this help message.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
604
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
605 =item B<-i, --invalidatomnumbers>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
606
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
607 List number of bond blocks for compounds which contain invalid atom numbers.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
608
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
609 =item B<-m, --mismatch>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
610
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
611 List number of atom/bond blocks for compounds which don't match with counts
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
612 line information in header block.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
613
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
614 =item B<-s, --salts>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
615
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
616 List number of atom blocks for compounds which contain salts identified as
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
617 disconnected structural units.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
618
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
619 =item B<-u, --unknownatoms>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
620
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
621 List number of atom blocks for compounds which contain special atom symbols
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
622 such as L, Q, * ,LP, X, R#, or any other non periodic table symbols.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
623
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
624 =item B<-w, --workingdir> I<dirname>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
625
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
626 Location of working directory. Default: current directory.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
627
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
628 =back
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
629
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
630 =head1 EXAMPLES
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
631
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
632 To count compounds in SD file(s), type:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
633
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
634 % InfoSDFiles.pl Sample1.sdf
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
635 % InfoSDFiles.pl Sample1.sdf Sample2.sdf
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
636 % InfoSDFiles.pl *.sdf
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
637
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
638 To list all available information for SD file(s), type:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
639
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
640 % InfoSDFiles.pl -a *.sdf
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
641
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
642 To list all data fields present in sample.sdf, type:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
643
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
644 % InfoSDFiles.pl -f Sample.sdf
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
645
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
646 To count number of compounds which contain salts and list associated structural
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
647 data, type:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
648
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
649 % InfoSDFiles.pl -s -d 3 Sample.sdf
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
650
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
651 =head1 AUTHOR
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
652
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
653 Manish Sud <msud@san.rr.com>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
654
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
655 =head1 SEE ALSO
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
656
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
657 ExtractFromSDFiles.pl, FilterSDFiles.pl, MergeTextFilesWithSD.pl
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
658
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
659 =head1 COPYRIGHT
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
660
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
661 Copyright (C) 2015 Manish Sud. All rights reserved.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
662
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
663 This file is part of MayaChemTools.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
664
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
665 MayaChemTools is free software; you can redistribute it and/or modify it under
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
666 the terms of the GNU Lesser General Public License as published by the Free
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
667 Software Foundation; either version 3 of the License, or (at your option)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
668 any later version.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
669
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
670 =cut