annotate bin/InfoSDFiles.pl @ 3:90ea638ce878 draft default tip

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 09:11:59 -0500
parents 2abf0d43254d
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1 #!/usr/bin/perl -w
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
2 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
3 # $RCSfile: InfoSDFiles.pl,v $
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
4 # $Date: 2015/02/28 20:46:20 $
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
5 # $Revision: 1.35 $
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
6 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
7 # Author: Manish Sud <msud@san.rr.com>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
8 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
10 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
11 # This file is part of MayaChemTools.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
12 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
14 # the terms of the GNU Lesser General Public License as published by the Free
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
15 # Software Foundation; either version 3 of the License, or (at your option) any
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
16 # later version.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
17 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
18 # MayaChemTools is distributed in the hope that it will be useful, but without
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
19 # any warranty; without even the implied warranty of merchantability of fitness
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
20 # for a particular purpose. See the GNU Lesser General Public License for more
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
21 # details.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
22 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
23 # You should have received a copy of the GNU Lesser General Public License
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
26 # Boston, MA, 02111-1307, USA.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
27 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
28
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
29 use strict;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
30 use FindBin; use lib "$FindBin::Bin/../lib";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
31 use Getopt::Long;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
32 use File::Basename;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
33 use Benchmark;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
34 use SDFileUtil;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
35 use TextUtil;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
36 use FileUtil;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
37
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
38 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
39
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
40 # Autoflush STDOUT
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
41 $| = 1;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
42
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
43 # Starting message...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
44 $ScriptName = basename $0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
45 print "\n$ScriptName:Starting...\n\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
46 $StartTime = new Benchmark;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
47
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
48 # Get the options and setup script...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
49 SetupScriptUsage();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
50 if ($Options{help} || @ARGV < 1) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
51 die GetUsageFromPod("$FindBin::Bin/$ScriptName");
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
52 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
53
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
54 my(@SDFilesList);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
55 @SDFilesList = ExpandFileNames(\@ARGV, "sdf sd");
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
56
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
57 # Process options...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
58 print "Processing options...\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
59 my(%OptionsInfo);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
60 ProcessOptions();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
61
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
62 # Setup information about input files...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
63 print "Checking input SD file(s)...\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
64 my(%SDFilesInfo, %SDCmpdsInfo);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
65 RetrieveSDFilesInfo();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
66 InitializeSDCmpdsInfo();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
67
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
68 # Process input files..
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
69 my($FileIndex);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
70 if (@SDFilesList > 1) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
71 print "\nProcessing SD files...\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
72 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
73 for $FileIndex (0 .. $#SDFilesList) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
74 if ($SDFilesInfo{FileOkay}[$FileIndex]) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
75 print "\nProcessing file $SDFilesList[$FileIndex]...\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
76 ListSDFileInfo($FileIndex);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
77 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
78 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
79 ListTotalSizeOfFiles();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
80
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
81 print "\n$ScriptName:Done...\n\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
82
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
83 $EndTime = new Benchmark;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
84 $TotalTime = timediff ($EndTime, $StartTime);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
85 print "Total time: ", timestr($TotalTime), "\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
86
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
87 ###############################################################################
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
88
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
89 # List appropriate information...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
90 sub ListSDFileInfo {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
91 my($Index) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
92 my($SDFile);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
93
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
94 $SDFile = $SDFilesList[$Index];
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
95
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
96 if ($OptionsInfo{ProcessCmpdInfo}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
97 ListCompoundDetailsInfo($Index);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
98 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
99 else {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
100 ListCompoundCountInfo($Index);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
101 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
102
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
103 # File size and modification information...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
104 print "\nFile size: ", FormatFileSize($SDFilesInfo{FileSize}[$Index]), " \n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
105 print "Last modified: ", $SDFilesInfo{FileLastModified}[$Index], " \n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
106 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
107
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
108 # List number of compounds in SD file...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
109 sub ListCompoundCountInfo {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
110 my($Index) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
111 my($SDFile, $CmpdCount);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
112
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
113 $SDFile = $SDFilesList[$Index];
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
114
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
115 $CmpdCount = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
116
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
117 open SDFILE, "$SDFile" or die "Couldn't open $SDFile: $! \n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
118 while (<SDFILE>) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
119 if (/^\$\$\$\$/) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
120 $CmpdCount++;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
121 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
122 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
123 close SDFILE;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
124
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
125 $SDCmpdsInfo{TotalCmpdCount} += $CmpdCount;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
126
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
127 print "\nNumber of compounds: $CmpdCount\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
128 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
129
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
130 # List detailed compound information...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
131 sub ListCompoundDetailsInfo {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
132 my($Index) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
133 my($SDFile, $CmpdCount, $EmptyCtabBlocksCount, $MismatchCtabBlockCount, $ChiralCtabBlockCount, $UnknownAtomsCtabBlockCount, $InvalidAtomNumbersCtabBlockCount, $SaltsCtabBlockCount, $CtabLinesCount, $PrintCmpdCounterHeader, $ProblematicCmpdData, $CmpdString, @CmpdLines);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
134
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
135 $SDFile = $SDFilesList[$Index];
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
136
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
137 ($CmpdCount, $EmptyCtabBlocksCount, $MismatchCtabBlockCount, $ChiralCtabBlockCount, $UnknownAtomsCtabBlockCount, $InvalidAtomNumbersCtabBlockCount, $SaltsCtabBlockCount) = (0) x 7;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
138
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
139 InitializeSDCmpdsInfo();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
140
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
141 $PrintCmpdCounterHeader = 1;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
142
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
143 open SDFILE, "$SDFile" or die "Couldn't open $SDFile: $! \n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
144 while ($CmpdString = ReadCmpdString(\*SDFILE)) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
145 $CmpdCount++;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
146 $ProblematicCmpdData = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
147 if ($OptionsInfo{Detail} <= 1) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
148 if (($CmpdCount % 5000) == 0) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
149 if ($PrintCmpdCounterHeader) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
150 $PrintCmpdCounterHeader = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
151 print "Processing compounds:";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
152 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
153 print "$CmpdCount...";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
154 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
155 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
156 @CmpdLines = split "\n", $CmpdString;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
157 $CtabLinesCount = GetCtabLinesCount(\@CmpdLines);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
158 if ($OptionsInfo{All} || $OptionsInfo{Empty}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
159 if ($CtabLinesCount <= 0) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
160 $EmptyCtabBlocksCount++;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
161 $ProblematicCmpdData = 1;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
162 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
163 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
164 if ($CtabLinesCount > 0) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
165 my ($AtomCount, $BondCount, $ChiralFlag) = ParseCmpdCountsLine($CmpdLines[3]);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
166 if ($OptionsInfo{All} || $OptionsInfo{Mismatch}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
167 if ($CtabLinesCount != ($AtomCount + $BondCount)) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
168 $MismatchCtabBlockCount++;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
169 $ProblematicCmpdData = 1;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
170 if ($OptionsInfo{Detail} >= 2) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
171 print "\nMismatch found: Ctab lines count: $CtabLinesCount; Atoms count: $AtomCount; Bond count: $BondCount\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
172 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
173 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
174 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
175 if ($OptionsInfo{All} || $OptionsInfo{Chiral}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
176 if ($ChiralFlag == 1) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
177 $ChiralCtabBlockCount++;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
178 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
179 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
180 if ($CtabLinesCount == ($AtomCount + $BondCount)) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
181 if ($OptionsInfo{All} || $OptionsInfo{UnknownAtoms}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
182 my($UnknownAtomCount, $UnknownAtoms, $UnknownAtomLines) = GetUnknownAtoms(\@CmpdLines);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
183 if ($UnknownAtomCount) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
184 $UnknownAtomsCtabBlockCount++;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
185 $ProblematicCmpdData = 1;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
186 if ($OptionsInfo{Detail} >= 2) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
187 print "\nUnknown atom(s) found: $UnknownAtomCount\nUnknown atom(s) symbols:$UnknownAtoms\nUnknown atom(s) data lines:\n$UnknownAtomLines\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
188 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
189 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
190 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
191 if ($OptionsInfo{All} || $OptionsInfo{InvalidAtomNumbers}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
192 my($InvalidAtomNumbersCount, $InvalidAtomNumbers, $InvalidAtomNumberLines) = GetInvalidAtomNumbers(\@CmpdLines);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
193 if ($InvalidAtomNumbersCount) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
194 $InvalidAtomNumbersCtabBlockCount++;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
195 $ProblematicCmpdData = 1;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
196 if ($OptionsInfo{Detail} >= 2) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
197 print "\nInvalid atom number(s) found: $InvalidAtomNumbersCount\nInvalid atom number(s):$InvalidAtomNumbers\nInvalid atom number(s) data lines:\n$InvalidAtomNumberLines\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
198 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
199 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
200 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
201 if ($OptionsInfo{All} || $OptionsInfo{Salts}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
202 my($FragmentsCount, $Fragments) = GetCmpdFragments(\@CmpdLines);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
203 if ($FragmentsCount > 1) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
204 $SaltsCtabBlockCount++;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
205 $ProblematicCmpdData = 1;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
206 if ($OptionsInfo{Detail} >= 2) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
207 print "\nSalts found: $FragmentsCount\nSalts atom numbers:\n$Fragments\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
208 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
209 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
210 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
211 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
212 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
213 if ($OptionsInfo{ProcessCmpdData}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
214 ProcessCmpdInfo(\@CmpdLines, $CmpdCount);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
215 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
216 if ($OptionsInfo{Detail} >= 3) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
217 if ($ProblematicCmpdData) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
218 print "\nCompound data:\n$CmpdString\n\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
219 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
220 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
221 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
222 if ($OptionsInfo{Detail} <= 1) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
223 if (!$PrintCmpdCounterHeader) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
224 print "\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
225 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
226 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
227 close SDFILE;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
228
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
229 $SDCmpdsInfo{TotalCmpdCount} += $CmpdCount;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
230
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
231 print "\nNumber of compounds: $CmpdCount\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
232
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
233 if ($OptionsInfo{All} || $OptionsInfo{Empty}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
234 print "Number of empty atom/bond blocks: $EmptyCtabBlocksCount\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
235 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
236 if ($OptionsInfo{All} || $OptionsInfo{Mismatch}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
237 print "Number of mismatched atom/bond blocks: $MismatchCtabBlockCount\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
238 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
239 if ($OptionsInfo{All} || $OptionsInfo{UnknownAtoms}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
240 print "Number of atom blocks with unknown atom labels: $UnknownAtomsCtabBlockCount\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
241 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
242 if ($OptionsInfo{All} || $OptionsInfo{InvalidAtomNumbers}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
243 print "Number of bond blocks and atom property blocks with invalid atom numbers: $InvalidAtomNumbersCtabBlockCount\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
244 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
245 if ($OptionsInfo{All} || $OptionsInfo{Salts}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
246 print "Number of atom blocks containing salts: $SaltsCtabBlockCount\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
247 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
248 if ($OptionsInfo{All} || $OptionsInfo{Chiral}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
249 print "Number of chiral atom/bond blocks: $ChiralCtabBlockCount\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
250 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
251 if ($OptionsInfo{ProcessCmpdData}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
252 PrintCmpdInfoSummary();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
253 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
254
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
255 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
256
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
257 # Initialize compound data information for a SD file...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
258 sub InitializeSDCmpdsInfo {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
259
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
260 if (!exists $SDCmpdsInfo{TotalCmpdCount}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
261 $SDCmpdsInfo{TotalCmpdCount} = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
262 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
263
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
264 @{$SDCmpdsInfo{FieldLabels}} = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
265 %{$SDCmpdsInfo{FieldLabelsMap}} = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
266 %{$SDCmpdsInfo{NonEmptyFieldValuesCountMap}} = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
267 %{$SDCmpdsInfo{EmptyFieldValuesCountMap}} = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
268 %{$SDCmpdsInfo{NonNumericalFieldValuesCountMap}} = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
269 %{$SDCmpdsInfo{NumericalFieldValuesCountMap}} = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
270 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
271
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
272 # Process compound data header labels and figure out which ones are present for
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
273 # all the compounds...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
274 sub ProcessCmpdInfo {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
275 my($CmpdLinesRef, $CmpdCount) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
276 my($Label);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
277
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
278 if (@{$SDCmpdsInfo{FieldLabels}}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
279 my (@CmpdFieldLabels) = GetCmpdDataHeaderLabels($CmpdLinesRef);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
280 my(%CmpdFieldLabelsMap) = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
281 # Setup a map for the current labels...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
282 for $Label (@CmpdFieldLabels) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
283 $CmpdFieldLabelsMap{$Label} = "PresentInSome";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
284 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
285 # Check the presence old labels for this compound; otherwise, mark 'em new...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
286 for $Label (@{$SDCmpdsInfo{FieldLabels}}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
287 if (!$CmpdFieldLabelsMap{$Label}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
288 $SDCmpdsInfo{FieldLabelsMap}{$Label} = "PresentInSome";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
289 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
290 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
291 # Check the presence this compound in the old labels; otherwise, add 'em...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
292 for $Label (@CmpdFieldLabels ) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
293 if (!$SDCmpdsInfo{FieldLabelsMap}{$Label}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
294 # It's a new label...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
295 push @{$SDCmpdsInfo{FieldLabels}}, $Label;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
296 $SDCmpdsInfo{FieldLabelsMap}{$Label} = "PresentInSome";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
297 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
298 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
299 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
300 else {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
301 # Get the initial label set and set up a map...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
302 @{$SDCmpdsInfo{FieldLabels}} = GetCmpdDataHeaderLabels($CmpdLinesRef);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
303 for $Label (@{$SDCmpdsInfo{FieldLabels}}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
304 $SDCmpdsInfo{FieldLabelsMap}{$Label} = "PresentInAll";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
305 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
306 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
307 if ($OptionsInfo{CountEmptyData} || $OptionsInfo{CheckData}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
308 # Count empty data field values...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
309 my(%DataFieldAndValues, $Label, $Value);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
310
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
311 %DataFieldAndValues = GetCmpdDataHeaderLabelsAndValues($CmpdLinesRef);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
312 for $Label (keys %DataFieldAndValues) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
313 $Value = $DataFieldAndValues{$Label};
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
314 if ($OptionsInfo{CountEmptyData}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
315 if (IsNotEmpty($Value)) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
316 if (exists($SDCmpdsInfo{NonEmptyFieldValuesCountMap}{$Label})) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
317 $SDCmpdsInfo{NonEmptyFieldValuesCountMap}{$Label} += 1;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
318 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
319 else {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
320 $SDCmpdsInfo{NonEmptyFieldValuesCountMap}{$Label} = 1;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
321 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
322 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
323 else {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
324 if ($Options{detail} >= 2) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
325 print "Compound record $CmpdCount: Empty data field <$Label>\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
326 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
327 if (exists($SDCmpdsInfo{EmptyFieldValuesCountMap}{$Label})) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
328 $SDCmpdsInfo{EmptyFieldValuesCountMap}{$Label} += 1;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
329 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
330 else {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
331 $SDCmpdsInfo{EmptyFieldValuesCountMap}{$Label} = 1;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
332 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
333 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
334 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
335 if ($OptionsInfo{CheckData}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
336 if (IsNumerical($Value)) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
337 if (exists($SDCmpdsInfo{NumericalFieldValuesCountMap}{$Label})) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
338 $SDCmpdsInfo{NumericalFieldValuesCountMap}{$Label} += 1;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
339 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
340 else {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
341 $SDCmpdsInfo{NumericalFieldValuesCountMap}{$Label} = 1;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
342 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
343 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
344 else {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
345 if (exists($SDCmpdsInfo{NonNumericalFieldValuesCountMap}{$Label})) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
346 $SDCmpdsInfo{NonNumericalFieldValuesCountMap}{$Label} += 1;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
347 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
348 else {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
349 $SDCmpdsInfo{NonNumericalFieldValuesCountMap}{$Label} = 1;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
350 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
351 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
352 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
353 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
354 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
355 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
356
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
357 # Print compound summary...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
358 sub PrintCmpdInfoSummary {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
359 if (@{$SDCmpdsInfo{FieldLabels}}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
360 my($PresentInAllCount, $Label, @FieldLabelsPresentInSome, @FieldLabelsPresentInAll);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
361
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
362 @FieldLabelsPresentInSome = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
363 @FieldLabelsPresentInAll = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
364
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
365 $PresentInAllCount = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
366 print "\nNumber of data fields: ", scalar(@{$SDCmpdsInfo{FieldLabels}}), "\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
367 print "All data field labels: ";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
368 for $Label (sort keys %{$SDCmpdsInfo{FieldLabelsMap}}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
369 print "<$Label> ";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
370 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
371 print "\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
372 for $Label (sort keys %{$SDCmpdsInfo{FieldLabelsMap}}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
373 if ($SDCmpdsInfo{FieldLabelsMap}{$Label} eq "PresentInAll") {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
374 $PresentInAllCount++;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
375 push @FieldLabelsPresentInAll, $Label;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
376 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
377 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
378 if ($PresentInAllCount != @{$SDCmpdsInfo{FieldLabels}}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
379 print "Data field labels present in all compounds: ";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
380 for $Label (sort keys %{$SDCmpdsInfo{FieldLabelsMap}}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
381 if ($SDCmpdsInfo{FieldLabelsMap}{$Label} eq "PresentInAll") {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
382 print "<$Label> ";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
383 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
384 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
385 print "\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
386 print "Data field labels present in some compounds: ";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
387 for $Label (sort keys %{$SDCmpdsInfo{FieldLabelsMap}}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
388 if ($SDCmpdsInfo{FieldLabelsMap}{$Label} eq "PresentInSome") {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
389 print "<$Label> ";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
390 push @FieldLabelsPresentInSome, $Label;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
391 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
392 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
393 print "\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
394 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
395 # List empty data field values count...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
396 if ($OptionsInfo{CountEmptyData}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
397 print "\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
398 if ($PresentInAllCount == @{$SDCmpdsInfo{FieldLabels}}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
399 PrintDataInformation("Number of non-empty values for data field(s)", \@{$SDCmpdsInfo{FieldLabels}}, \%{$SDCmpdsInfo{NonEmptyFieldValuesCountMap}});
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
400 PrintDataInformation("Number of empty values for data field(s)", \@{$SDCmpdsInfo{FieldLabels}}, \%{$SDCmpdsInfo{EmptyFieldValuesCountMap}});
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
401 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
402 else {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
403 PrintDataInformation("Number of non-empty values for data field(s) present in all compounds", \@FieldLabelsPresentInAll, \%{$SDCmpdsInfo{NonEmptyFieldValuesCountMap}});
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
404 PrintDataInformation("Number of empty values for data field(s) present in all compounds", \@FieldLabelsPresentInAll, \%{$SDCmpdsInfo{EmptyFieldValuesCountMap}});
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
405 PrintDataInformation("Number of non-empty values for data field(s) present in some compounds", \@FieldLabelsPresentInSome, \%{$SDCmpdsInfo{NonEmptyFieldValuesCountMap}});
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
406 PrintDataInformation("Number of empty values for data field(s) present in some compounds", \@FieldLabelsPresentInSome, \%{$SDCmpdsInfo{EmptyFieldValuesCountMap}});
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
407 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
408 print "\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
409 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
410 # List numerical data values count...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
411 if ($OptionsInfo{CheckData}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
412 print "\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
413 if ($PresentInAllCount == @{$SDCmpdsInfo{FieldLabels}}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
414 PrintDataInformation("Number of non-numerical values for data field(s)", \@{$SDCmpdsInfo{FieldLabels}}, \%{$SDCmpdsInfo{NonNumericalFieldValuesCountMap}});
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
415 PrintDataInformation("Number of numerical values for data field(s)", \@{$SDCmpdsInfo{FieldLabels}}, \%{$SDCmpdsInfo{NumericalFieldValuesCountMap}});
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
416 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
417 else {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
418 PrintDataInformation("Number of non-numerical values for data field(s) present in all compounds", \@FieldLabelsPresentInAll, \%{$SDCmpdsInfo{NonNumericalFieldValuesCountMap}});
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
419 PrintDataInformation("Number of numerical values for data field(s) present in all compounds", \@FieldLabelsPresentInAll, \%{$SDCmpdsInfo{NumericalFieldValuesCountMap}});
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
420 PrintDataInformation("Number of non-numerical values for data field(s) present in some compounds", \@FieldLabelsPresentInSome, \%{$SDCmpdsInfo{NonNumericalFieldValuesCountMap}});
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
421 PrintDataInformation("Number of numerical values for data field(s) present in some compounds", \@FieldLabelsPresentInSome, \%{$SDCmpdsInfo{NumericalFieldValuesCountMap}});
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
422 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
423 print "\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
424 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
425 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
426 else {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
427 print "\nNumber of data fields: 0\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
428 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
429 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
430 # List data information...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
431 sub PrintDataInformation {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
432 my($InfoLabel, $DataLabelRef, $DataLabelToValueMapRef) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
433 my($Line, $Label);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
434
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
435 $Line = "";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
436 for $Label (@{$DataLabelRef}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
437 $Line .= " <$Label> - " . (exists($DataLabelToValueMapRef->{$Label}) ? $DataLabelToValueMapRef->{$Label} : 0) . ",";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
438 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
439 $Line =~ s/\,$//g;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
440 print "$InfoLabel: $Line\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
441 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
442
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
443 # Total size of all the files...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
444 sub ListTotalSizeOfFiles {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
445 my($FileOkayCount, $TotalSize, $Index);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
446
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
447 $FileOkayCount = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
448 $TotalSize = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
449
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
450 for $Index (0 .. $#SDFilesList) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
451 if ($SDFilesInfo{FileOkay}[$Index]) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
452 $FileOkayCount++;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
453 $TotalSize += $SDFilesInfo{FileSize}[$Index];
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
454 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
455 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
456 if ($FileOkayCount > 1) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
457 print "\nTotal number of compounds in $FileOkayCount SD files: $SDCmpdsInfo{TotalCmpdCount}\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
458 print "\nTotal size of $FileOkayCount SD files: ", FormatFileSize($TotalSize), "\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
459 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
460
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
461 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
462
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
463 # Retrieve information about SD files...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
464 sub RetrieveSDFilesInfo {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
465 my($Index, $SDFile, $ModifiedTimeString, $ModifiedDateString);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
466
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
467 %SDCmpdsInfo = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
468
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
469 %SDFilesInfo = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
470 @{$SDFilesInfo{FileOkay}} = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
471 @{$SDFilesInfo{FileSize}} = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
472 @{$SDFilesInfo{FileLastModified}} = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
473
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
474 FILELIST: for $Index (0 .. $#SDFilesList) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
475 $SDFilesInfo{FileOkay}[$Index] = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
476 $SDFilesInfo{FileSize}[$Index] = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
477 $SDFilesInfo{FileLastModified}[$Index] = '';
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
478
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
479 $SDFile = $SDFilesList[$Index];
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
480 if (!(-e $SDFile)) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
481 warn "Warning: Ignoring file $SDFile: It doesn't exist\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
482 next FILELIST;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
483 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
484 if (!CheckFileType($SDFile, "sdf sd")) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
485 warn "Warning: Ignoring file $SDFile: It's not a SD file\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
486 next FILELIST;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
487 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
488 if (! open SDFILE, "$SDFile") {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
489 warn "Warning: Ignoring file $SDFile: Couldn't open it: $! \n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
490 next FILELIST;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
491 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
492 close SDFILE;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
493
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
494 $SDFilesInfo{FileOkay}[$Index] = 1;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
495 $SDFilesInfo{FileSize}[$Index] = FileSize($SDFile);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
496 ($ModifiedTimeString, $ModifiedDateString) = FormattedFileModificationTimeAndDate($SDFile);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
497 $SDFilesInfo{FileLastModified}[$Index] = "$ModifiedTimeString; $ModifiedDateString";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
498 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
499 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
500
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
501 # Process option values...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
502 sub ProcessOptions {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
503 %OptionsInfo = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
504
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
505 $OptionsInfo{All} = $Options{all} ? $Options{all} : 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
506 $OptionsInfo{Chiral} = $Options{chiral} ? $Options{chiral} : 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
507 $OptionsInfo{Count} = $Options{count} ? $Options{count} : 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
508 $OptionsInfo{DataCheck} = $Options{datacheck} ? $Options{datacheck} : 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
509 $OptionsInfo{Empty} = $Options{empty} ? $Options{empty} : 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
510 $OptionsInfo{Fields} = $Options{fields} ? $Options{fields} : 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
511 $OptionsInfo{InvalidAtomNumbers} = $Options{invalidatomnumbers} ? $Options{invalidatomnumbers} : 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
512 $OptionsInfo{Mismatch} = $Options{mismatch} ? $Options{mismatch} : 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
513 $OptionsInfo{Salts} = $Options{salts} ? $Options{salts} : 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
514 $OptionsInfo{UnknownAtoms} = $Options{unknownatoms} ? $Options{unknownatoms} : 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
515
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
516 $OptionsInfo{Detail} = $Options{detail};
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
517
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
518 $OptionsInfo{ProcessCmpdInfo} = ($Options{all} || $Options{chiral} || $Options{empty} || $Options{fields} || $Options{invalidatomnumbers} || $Options{mismatch} || $Options{salts} || $Options{unknownatoms} || $Options{datacheck}) ? 1 : 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
519
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
520 $OptionsInfo{ProcessCmpdData} = ($Options{all} || $Options{fields} || $Options{empty} || $Options{datacheck}) ? 1 : 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
521
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
522 $OptionsInfo{CountEmptyData} = ($Options{all} || $Options{empty}) ? 1 : 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
523 $OptionsInfo{CheckData} = ($Options{all} || $Options{datacheck}) ? 1 : 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
524 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
525
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
526 # Setup script usage and retrieve command line arguments specified using various options...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
527 sub SetupScriptUsage {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
528
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
529 # Setup default and retrieve all the options...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
530 %Options = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
531 $Options{detail} = 1;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
532 if (!GetOptions(\%Options, "all|a", "count|c", "chiral", "datacheck", "detail|d:i", "empty|e", "fields|f", "help|h", "invalidatomnumbers|i", "mismatch|m", "salts|s", "unknownatoms|u", "workingdir|w=s")) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
533 die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
534 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
535 if ($Options{workingdir}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
536 if (! -d $Options{workingdir}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
537 die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
538 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
539 chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
540 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
541 if ($Options{detail} <= 0 || $Options{detail} > 3) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
542 die "Error: The value specified, $Options{detail}, for option \"-d --detail\" is not valid. Possible values: 1 to 3\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
543 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
544 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
545
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
546 __END__
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
547
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
548 =head1 NAME
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
549
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
550 InfoSDFiles.pl - List information about SDFile(s)
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
551
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
552 =head1 SYNOPSIS
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
553
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
554 InfoSDFile.pl SDFile(s)...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
555
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
556 InfoSDFile.pl [B<-a --all>] [B<-c --count>] [B<--chiral>] [B<--datacheck>]
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
557 [B<-d --detail> infolevel] [B<-e --empty>] [B<-f, --fields>] [B<-h, --help>]
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
558 [B<-i, --invalidatomnumbers>] [B<-m, --mismatch>] [B<-s, --salts>] [B<-u, --unknownatoms>]
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
559 [B<-w, --workingdir> dirname] SDFile(s)...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
560
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
561 =head1 DESCRIPTION
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
562
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
563 List information about I<SDFile(s)> contents: number of compounds, empty records
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
564 and so on. Multiple SDFile names are separated by spaces. The valid file extensions
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
565 are I<.sdf> and I<.sd>. All other file names are ignored. All the SD files in a current
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
566 directory can be specified either by I<*.sdf> or the current directory name.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
567
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
568 =head1 OPTIONS
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
569
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
570 =over 4
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
571
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
572 =item B<-a, --all>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
573
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
574 List all the available information.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
575
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
576 =item B<-c, --count>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
577
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
578 List number of compounds. This is B<default behavior>.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
579
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
580 =item B<--chiral>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
581
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
582 List number of empty atom/bond blocks for compounds with chiral flag set in
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
583 count line.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
584
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
585 =item B<-d, --detail> I<infolevel>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
586
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
587 Level of information to print. Default: 1. Possible values: I<1, 2, or 3>.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
588
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
589 =item B<--datacheck>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
590
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
591 List number of numerical and non-numerical values for each data field.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
592
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
593 =item B<-e, --empty>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
594
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
595 List number of empty atom/bond blocks and data fields for compounds.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
596
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
597 =item B<-f, --fields>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
598
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
599 List data field labels present for compounds.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
600
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
601 =item B<-h, --help>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
602
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
603 Print this help message.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
604
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
605 =item B<-i, --invalidatomnumbers>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
606
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
607 List number of bond blocks for compounds which contain invalid atom numbers.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
608
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
609 =item B<-m, --mismatch>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
610
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
611 List number of atom/bond blocks for compounds which don't match with counts
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
612 line information in header block.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
613
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
614 =item B<-s, --salts>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
615
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
616 List number of atom blocks for compounds which contain salts identified as
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
617 disconnected structural units.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
618
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
619 =item B<-u, --unknownatoms>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
620
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
621 List number of atom blocks for compounds which contain special atom symbols
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
622 such as L, Q, * ,LP, X, R#, or any other non periodic table symbols.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
623
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
624 =item B<-w, --workingdir> I<dirname>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
625
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
626 Location of working directory. Default: current directory.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
627
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
628 =back
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
629
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
630 =head1 EXAMPLES
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
631
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
632 To count compounds in SD file(s), type:
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
633
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
634 % InfoSDFiles.pl Sample1.sdf
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
635 % InfoSDFiles.pl Sample1.sdf Sample2.sdf
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
636 % InfoSDFiles.pl *.sdf
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
637
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
638 To list all available information for SD file(s), type:
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
639
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
640 % InfoSDFiles.pl -a *.sdf
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
641
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
642 To list all data fields present in sample.sdf, type:
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
643
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
644 % InfoSDFiles.pl -f Sample.sdf
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
645
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
646 To count number of compounds which contain salts and list associated structural
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
647 data, type:
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
648
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
649 % InfoSDFiles.pl -s -d 3 Sample.sdf
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
650
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
651 =head1 AUTHOR
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
652
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
653 Manish Sud <msud@san.rr.com>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
654
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
655 =head1 SEE ALSO
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
656
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
657 ExtractFromSDFiles.pl, FilterSDFiles.pl, MergeTextFilesWithSD.pl
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
658
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
659 =head1 COPYRIGHT
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
660
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
661 Copyright (C) 2015 Manish Sud. All rights reserved.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
662
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
663 This file is part of MayaChemTools.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
664
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
665 MayaChemTools is free software; you can redistribute it and/or modify it under
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
666 the terms of the GNU Lesser General Public License as published by the Free
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
667 Software Foundation; either version 3 of the License, or (at your option)
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
668 any later version.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
669
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
670 =cut