annotate bin/ExtractFromSDFiles.pl @ 0:4816e4a8ae95 draft default tip

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 09:23:18 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1 #!/usr/bin/perl -w
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
2 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
3 # $RCSfile: ExtractFromSDFiles.pl,v $
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
4 # $Date: 2015/03/22 19:11:27 $
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
5 # $Revision: 1.48 $
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
6 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
7 # Author: Manish Sud <msud@san.rr.com>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
8 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
10 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
11 # This file is part of MayaChemTools.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
12 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
14 # the terms of the GNU Lesser General Public License as published by the Free
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
15 # Software Foundation; either version 3 of the License, or (at your option) any
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
16 # later version.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
17 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
18 # MayaChemTools is distributed in the hope that it will be useful, but without
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
19 # any warranty; without even the implied warranty of merchantability of fitness
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
20 # for a particular purpose. See the GNU Lesser General Public License for more
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
21 # details.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
22 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
23 # You should have received a copy of the GNU Lesser General Public License
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
26 # Boston, MA, 02111-1307, USA.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
27 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
28
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
29 use strict;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
30 use FindBin; use lib "$FindBin::Bin/../lib";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
31 use Getopt::Long;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
32 use File::Basename;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
33 use Text::ParseWords;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
34 use Benchmark;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
35 use SDFileUtil;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
36 use FileUtil;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
37 use TextUtil;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
38
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
39 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
40
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
41 # Autoflush STDOUT
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
42 $| = 1;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
43
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
44 # Starting message...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
45 $ScriptName = basename($0);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
46 print "\n$ScriptName:Starting...\n\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
47 $StartTime = new Benchmark;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
48
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
49 # Get the options and setup script...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
50 SetupScriptUsage();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
51 if ($Options{help} || @ARGV < 1) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
52 die GetUsageFromPod("$FindBin::Bin/$ScriptName");
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
53 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
54
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
55 my(@SDFilesList);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
56 @SDFilesList = ExpandFileNames(\@ARGV, "sdf sd");
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
57
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
58 # Process options...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
59 print "Processing options...\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
60 my(%OptionsInfo);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
61 ProcessOptions();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
62
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
63 # Collect information about SD files...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
64 print "Checking input SD file(s)...\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
65 my(%SDFilesInfo);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
66 RetrieveSDFilesInfo();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
67
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
68 # Generate output files...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
69 my($FileIndex);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
70 if (@SDFilesList > 1) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
71 print "\nProcessing SD files...\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
72 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
73 for $FileIndex (0 .. $#SDFilesList) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
74 if ($SDFilesInfo{FileOkay}[$FileIndex]) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
75 print "\nProcessing file $SDFilesList[$FileIndex]...\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
76 ExtractFromSDFile($FileIndex);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
77 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
78 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
79 print "\n$ScriptName:Done...\n\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
80
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
81 $EndTime = new Benchmark;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
82 $TotalTime = timediff ($EndTime, $StartTime);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
83 print "Total time: ", timestr($TotalTime), "\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
84
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
85 ###############################################################################
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
86
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
87 # Extract data from a SD file...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
88 sub ExtractFromSDFile {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
89 my($FileIndex) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
90
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
91 OpenInputAndOutputFiles($FileIndex);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
92
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
93 MODE: {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
94 if ($OptionsInfo{Mode} =~ /^AllDataFields$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
95 ExtractAllDataFields($FileIndex);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
96 last MODE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
97 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
98 if ($OptionsInfo{Mode} =~ /^CommonDataFields$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
99 ExtractCommonDataFields($FileIndex);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
100 last MODE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
101 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
102 if ($OptionsInfo{Mode} =~ /^DataFields$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
103 ExtractDataFields($FileIndex);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
104 last MODE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
105 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
106 if ($OptionsInfo{Mode} =~ /^(DataFieldByList|DatafieldUniqueByList)$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
107 ExtractDataFieldByList($FileIndex);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
108 last MODE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
109 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
110 if ($OptionsInfo{Mode} =~ /^DataFieldNotByList$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
111 ExtractDataFieldNotByList($FileIndex);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
112 last MODE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
113 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
114 if ($OptionsInfo{Mode} =~ /^DataFieldsByValue$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
115 ExtractDataFieldsByValue($FileIndex);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
116 last MODE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
117 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
118 if ($OptionsInfo{Mode} =~ /^DataFieldsByRegex$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
119 ExtractDataFieldsByRegex($FileIndex);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
120 last MODE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
121 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
122 if ($OptionsInfo{Mode} =~ /^RandomCmpds$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
123 ExtractRandomCompounds($FileIndex);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
124 last MODE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
125 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
126 if ($OptionsInfo{Mode} =~ /^MolNames$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
127 ExtractMolNames($FileIndex);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
128 last MODE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
129 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
130 if ($OptionsInfo{Mode} =~ /^RecordNum$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
131 ExtractRecordNum($FileIndex);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
132 last MODE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
133 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
134 if ($OptionsInfo{Mode} =~ /^RecordNums$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
135 ExtractRecordNums($FileIndex);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
136 last MODE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
137 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
138 if ($OptionsInfo{Mode} =~ /^RecordRange$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
139 ExtractRecordRange($FileIndex);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
140 last MODE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
141 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
142 if ($OptionsInfo{Mode} =~ /^2DCmpdRecords$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
143 Extract2DCmpdRecords($FileIndex);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
144 last MODE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
145 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
146 if ($OptionsInfo{Mode} =~ /^3DCmpdRecords$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
147 Extract3DCmpdRecords($FileIndex);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
148 last MODE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
149 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
150 die "Error: The value specified, $Options{mode}, for option \"-m --mode\" is not valid. Allowed values: alldatafields, commondatafields, datafields, datafieldsbyvalue, datafieldbylist, datafielduniquebylist, datafieldnotbylist, molnames, randomcmpds, recordnum, recordnums, recordrange, 2dcmpdrecords, 3dcmpdrecords\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
151 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
152
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
153 CloseInputAndOutputFiles();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
154 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
155
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
156 # Extract all data fields...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
157 sub ExtractAllDataFields {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
158 my($FileIndex) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
159 my(@CmpdLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
160
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
161 @{$SDFilesInfo{DataLabels}} = @{$SDFilesInfo{AllDataFieldLabels}[$FileIndex]};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
162 WriteTextFileColLabels();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
163
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
164 while ($SDFilesInfo{CmpdString} = ReadCmpdString($SDFilesInfo{InputSDFileRef})) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
165 @CmpdLines = split "\n", $SDFilesInfo{CmpdString};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
166 %{$SDFilesInfo{DataFieldValues}} = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
167
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
168 SetupDataValues();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
169 WriteTextFileCmpdData();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
170 WriteSDFileCmpdData();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
171 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
172 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
173
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
174 # Extract common data fields...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
175 sub ExtractCommonDataFields {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
176 my($FileIndex) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
177 my(@CmpdLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
178
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
179 @{$SDFilesInfo{DataLabels}} = @{$SDFilesInfo{CommonDataFieldLabels}[$FileIndex]};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
180 WriteTextFileColLabels();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
181
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
182 while ($SDFilesInfo{CmpdString} = ReadCmpdString($SDFilesInfo{InputSDFileRef})) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
183 @CmpdLines = split "\n", $SDFilesInfo{CmpdString};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
184 %{$SDFilesInfo{DataFieldValues}} = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
185
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
186 SetupDataValues();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
187 WriteTextFileCmpdData();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
188 WriteSDFileCmpdData();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
189 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
190 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
191
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
192 # Extract specified data fields...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
193 sub ExtractDataFields {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
194 my($FileIndex) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
195 my(@CmpdLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
196
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
197 @{$SDFilesInfo{DataLabels}} = @{$OptionsInfo{SpecifiedDataFieldLabels}};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
198 WriteTextFileColLabels();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
199
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
200 while ($SDFilesInfo{CmpdString} = ReadCmpdString($SDFilesInfo{InputSDFileRef})) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
201 @CmpdLines = split "\n", $SDFilesInfo{CmpdString};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
202 %{$SDFilesInfo{DataFieldValues}} = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
203
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
204 SetupDataValues();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
205 WriteTextFileCmpdData();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
206 WriteSDFileCmpdData();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
207 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
208 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
209
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
210 # Extract data fields using a list...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
211 sub ExtractDataFieldByList {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
212 my($FileIndex) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
213 my($CmpdNum, $Value, $SpecifiedDataFieldValuesFoundCount, $CurrentValue, $SpecifiedDataFieldLabel, @CmpdLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
214
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
215 @{$SDFilesInfo{DataLabels}} = @{$SDFilesInfo{AllDataFieldLabels}[$FileIndex]};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
216 WriteTextFileColLabels();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
217
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
218 for $Value (keys %{$OptionsInfo{SpecifiedDataFieldValues}}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
219 $OptionsInfo{SpecifiedDataFieldValues}{$Value} = "NotFound";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
220 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
221 $SpecifiedDataFieldValuesFoundCount = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
222 $SpecifiedDataFieldLabel = $OptionsInfo{SpecifiedDataFieldLabel};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
223
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
224 CMPDSTRING: while ($SDFilesInfo{CmpdString} = ReadCmpdString($SDFilesInfo{InputSDFileRef})) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
225 $CmpdNum++;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
226
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
227 @CmpdLines = split "\n", $SDFilesInfo{CmpdString};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
228 %{$SDFilesInfo{DataFieldValues}} = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
229
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
230 if (!exists $SDFilesInfo{DataFieldValues}{$SpecifiedDataFieldLabel}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
231 next CMPDSTRING;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
232 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
233
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
234 SetupDataValues();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
235
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
236 $SpecifiedDataFieldLabel = $OptionsInfo{SpecifiedDataFieldLabel};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
237 $CurrentValue = $SDFilesInfo{DataFieldValues}{$SpecifiedDataFieldLabel};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
238
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
239 if (exists $OptionsInfo{SpecifiedDataFieldValues}{$CurrentValue}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
240 if ($SpecifiedDataFieldValuesFoundCount < $OptionsInfo{SpecifiedDataFieldValuesCount}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
241 if ($OptionsInfo{SpecifiedDataFieldValues}{$CurrentValue} eq "NotFound") {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
242 $SpecifiedDataFieldValuesFoundCount++;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
243 $OptionsInfo{SpecifiedDataFieldValues}{$CurrentValue} = "Found";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
244 if ($OptionsInfo{Mode} =~ /^DataFieldUniqueByList$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
245 WriteSDFileCmpdString();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
246 WriteTextFileCmpdData();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
247 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
248 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
249 if ($OptionsInfo{Mode} =~ /^DataFieldByList$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
250 WriteSDFileCmpdString();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
251 WriteTextFileCmpdData();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
252 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
253 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
254 if ($SpecifiedDataFieldValuesFoundCount >= $OptionsInfo{SpecifiedDataFieldValuesCount}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
255 last CMPDSTRING;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
256 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
257 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
258 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
259 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
260
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
261 # Extract data field whose values are not on the specified list...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
262 sub ExtractDataFieldNotByList {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
263 my($FileIndex) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
264 my($CurrentValue, $SpecifiedDataFieldLabel, @CmpdLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
265
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
266 @{$SDFilesInfo{DataLabels}} = @{$SDFilesInfo{AllDataFieldLabels}[$FileIndex]};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
267 WriteTextFileColLabels();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
268
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
269 $SpecifiedDataFieldLabel = $OptionsInfo{SpecifiedDataFieldLabel};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
270
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
271 CMPDSTRING: while ($SDFilesInfo{CmpdString} = ReadCmpdString($SDFilesInfo{InputSDFileRef})) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
272 @CmpdLines = split "\n", $SDFilesInfo{CmpdString};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
273 %{$SDFilesInfo{DataFieldValues}} = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
274
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
275 if (!exists $SDFilesInfo{DataFieldValues}{$SpecifiedDataFieldLabel}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
276 next CMPDSTRING;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
277 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
278
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
279 SetupDataValues();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
280
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
281 $CurrentValue = $SDFilesInfo{DataFieldValues}{$SpecifiedDataFieldLabel};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
282
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
283 # Make sure the current value is not empty and is not only specified list of values...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
284 if (IsEmpty($CurrentValue) || exists $OptionsInfo{SpecifiedDataFieldValues}{$CurrentValue}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
285 next CMPDSTRING;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
286 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
287
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
288 WriteSDFileCmpdString();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
289 WriteTextFileCmpdData();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
290 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
291 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
292
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
293 # Extract data fields by value...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
294 sub ExtractDataFieldsByValue {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
295 my($FileIndex) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
296 my($Label, $CurrentValue, $SpecifiedCriterion, $SpecifiedValue, $ViolationCount, $Nothing, @CmpdLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
297
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
298 @{$SDFilesInfo{DataLabels}} = @{$SDFilesInfo{AllDataFieldLabels}[$FileIndex]};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
299 WriteTextFileColLabels();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
300
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
301 CMPDSTRING: while ($SDFilesInfo{CmpdString} = ReadCmpdString($SDFilesInfo{InputSDFileRef})) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
302 @CmpdLines = split "\n", $SDFilesInfo{CmpdString};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
303 %{$SDFilesInfo{DataFieldValues}} = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
304
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
305 SetupDataValues();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
306 $ViolationCount = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
307
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
308 for $Label (@{$OptionsInfo{SpecifiedDataFieldLabels}}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
309 if (exists $SDFilesInfo{DataFieldValues}{$Label}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
310 $CurrentValue = $SDFilesInfo{DataFieldValues}{$Label};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
311 $SpecifiedCriterion = $OptionsInfo{SpecifiedDataFieldCriteriaMap}{$Label};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
312 $SpecifiedValue = $OptionsInfo{SpecifiedDataFieldValuesMap}{$Label};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
313
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
314 if ($OptionsInfo{NumericalComparison}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
315 CRITERION: {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
316 if ($SpecifiedCriterion =~ /^eq$/i) { if ($CurrentValue != $SpecifiedValue) { $ViolationCount++; last CRITERION; } }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
317 if ($SpecifiedCriterion =~ /^le$/i) { if ($CurrentValue > $SpecifiedValue) { $ViolationCount++; last CRITERION; } }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
318 if ($SpecifiedCriterion =~ /^ge$/i) { if ($CurrentValue < $SpecifiedValue) { $ViolationCount++; last CRITERION; } }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
319 $Nothing = 1;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
320 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
321 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
322 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
323 CRITERION: {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
324 if ($SpecifiedCriterion =~ /^eq$/i) { if ($CurrentValue ne $SpecifiedValue) { $ViolationCount++; last CRITERION; } }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
325 if ($SpecifiedCriterion =~ /^le$/i) { if ($CurrentValue gt $SpecifiedValue) { $ViolationCount++; last CRITERION; } }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
326 if ($SpecifiedCriterion =~ /^ge$/i) { if ($CurrentValue lt $SpecifiedValue) { $ViolationCount++; last CRITERION; } }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
327 $Nothing = 1;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
328 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
329 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
330 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
331 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
332 if ($ViolationCount <= $OptionsInfo{Violations}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
333 WriteSDFileCmpdString();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
334 WriteTextFileCmpdData();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
335 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
336 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
337 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
338
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
339 # Extract data fields by value using regular expression match...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
340 sub ExtractDataFieldsByRegex {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
341 my($FileIndex) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
342 my($Label, $CurrentValue, $SpecifiedRegexCriterion, $SpecifiedRegex, $ViolationCount, $Nothing, @CmpdLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
343
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
344 @{$SDFilesInfo{DataLabels}} = @{$SDFilesInfo{AllDataFieldLabels}[$FileIndex]};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
345 WriteTextFileColLabels();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
346
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
347 CMPDSTRING: while ($SDFilesInfo{CmpdString} = ReadCmpdString($SDFilesInfo{InputSDFileRef})) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
348 @CmpdLines = split "\n", $SDFilesInfo{CmpdString};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
349 %{$SDFilesInfo{DataFieldValues}} = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
350
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
351 SetupDataValues();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
352 $ViolationCount = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
353
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
354 for $Label (@{$OptionsInfo{SpecifiedDataFieldLabels}}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
355 if (exists $SDFilesInfo{DataFieldValues}{$Label}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
356 $CurrentValue = $SDFilesInfo{DataFieldValues}{$Label};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
357 $SpecifiedRegexCriterion = $OptionsInfo{SpecifiedDataFieldRegexCriteriaMap}{$Label};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
358 $SpecifiedRegex = $OptionsInfo{SpecifiedDataFieldRegexMap}{$Label};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
359
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
360 if ($OptionsInfo{RegexIgnoreCase}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
361 CRITERION: {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
362 if ($SpecifiedRegexCriterion =~ /^eq$/i) { if ($CurrentValue !~ /$SpecifiedRegex/i) { $ViolationCount++; last CRITERION; } }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
363 if ($SpecifiedRegexCriterion =~ /^ne$/i) { if ($CurrentValue =~ /$SpecifiedRegex/i) { $ViolationCount++; last CRITERION; } }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
364 $Nothing = 1;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
365 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
366 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
367 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
368 CRITERION: {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
369 if ($SpecifiedRegexCriterion =~ /^eq$/i) { if ($CurrentValue !~ /$SpecifiedRegex/) { $ViolationCount++; last CRITERION; } }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
370 if ($SpecifiedRegexCriterion =~ /^ne$/i) { if ($CurrentValue =~ /$SpecifiedRegex/) { $ViolationCount++; last CRITERION; } }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
371 $Nothing = 1;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
372 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
373 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
374 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
375 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
376 if ($ViolationCount <= $OptionsInfo{Violations}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
377 WriteSDFileCmpdString();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
378 WriteTextFileCmpdData();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
379 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
380 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
381 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
382
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
383 # Extract random compounds...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
384 sub ExtractRandomCompounds {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
385 my($FileIndex) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
386 my($CmpdNum, $CmpdCount, $RandomCycleCount, $RandomIndex, @CmpdLines, %RandomCmpdIndexMap);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
387
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
388 @{$SDFilesInfo{DataLabels}} = @{$SDFilesInfo{AllDataFieldLabels}[$FileIndex]};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
389 WriteTextFileColLabels();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
390
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
391 $CmpdCount = $SDFilesInfo{CmpdCount}[$FileIndex];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
392 srand($OptionsInfo{Seed});
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
393 $RandomCycleCount = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
394
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
395 %RandomCmpdIndexMap = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
396 while ($RandomCycleCount <= $CmpdCount && $RandomCycleCount <= $OptionsInfo{NumOfCmpds}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
397 $RandomCycleCount++;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
398 $RandomIndex = int (rand $CmpdCount) + 1;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
399 $RandomCmpdIndexMap{$RandomIndex} = $RandomIndex;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
400 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
401
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
402 $CmpdNum = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
403 CMPDSTRING: while ($SDFilesInfo{CmpdString} = ReadCmpdString($SDFilesInfo{InputSDFileRef})) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
404 $CmpdNum++;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
405 if (!exists $RandomCmpdIndexMap{$CmpdNum}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
406 next CMPDSTRING;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
407 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
408
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
409 @CmpdLines = split "\n", $SDFilesInfo{CmpdString};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
410
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
411 WriteSDFileCmpdString();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
412
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
413 if ($OptionsInfo{OutputTextFile}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
414 %{$SDFilesInfo{DataFieldValues}} = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
415 SetupDataValues();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
416 WriteTextFileCmpdData();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
417 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
418 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
419 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
420
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
421 # Extract mol names...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
422 sub ExtractMolNames {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
423 my($FileIndex) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
424 my($MolName, $NewTextFileRef, @CmpdLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
425
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
426 push @{$SDFilesInfo{DataLabels}}, "MolName";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
427 WriteTextFileColLabels();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
428
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
429 $NewTextFileRef = $SDFilesInfo{NewTextFileRef};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
430 while ($SDFilesInfo{CmpdString} = ReadCmpdString($SDFilesInfo{InputSDFileRef})) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
431 @CmpdLines = split "\n", $SDFilesInfo{CmpdString};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
432 $MolName = QuoteAWord(ParseCmpdMolNameLine($CmpdLines[0]), $OptionsInfo{OutQuote});
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
433 print $NewTextFileRef "$MolName\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
434 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
435 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
436
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
437 # Extract a specific compound record...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
438 sub ExtractRecordNum {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
439 my($FileIndex) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
440 my($CmpdNum, @CmpdLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
441
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
442 @{$SDFilesInfo{DataLabels}} = @{$SDFilesInfo{AllDataFieldLabels}[$FileIndex]};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
443 WriteTextFileColLabels();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
444
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
445 $CmpdNum = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
446
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
447 CMPDSTRING: while ($SDFilesInfo{CmpdString} = ReadCmpdString($SDFilesInfo{InputSDFileRef})) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
448 $CmpdNum++;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
449 if ($CmpdNum != $OptionsInfo{RecordNum}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
450 next CMPDSTRING;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
451 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
452
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
453 @CmpdLines = split "\n", $SDFilesInfo{CmpdString};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
454 WriteSDFileCmpdString();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
455
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
456 if ($OptionsInfo{OutputTextFile}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
457 %{$SDFilesInfo{DataFieldValues}} = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
458 SetupDataValues();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
459 WriteTextFileCmpdData();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
460 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
461 last CMPDSTRING;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
462 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
463 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
464
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
465 # Extract a specific compound records...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
466 sub ExtractRecordNums {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
467 my($FileIndex) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
468 my($CmpdNum, $CmpdCount, @CmpdLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
469
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
470 @{$SDFilesInfo{DataLabels}} = @{$SDFilesInfo{AllDataFieldLabels}[$FileIndex]};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
471 WriteTextFileColLabels();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
472
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
473 $CmpdNum = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
474 $CmpdCount = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
475
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
476 CMPDSTRING: while ($SDFilesInfo{CmpdString} = ReadCmpdString($SDFilesInfo{InputSDFileRef})) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
477 $CmpdNum++;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
478
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
479 if (exists $OptionsInfo{RecordNums}{$CmpdNum}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
480 $CmpdCount++;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
481 @CmpdLines = split "\n", $SDFilesInfo{CmpdString};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
482
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
483 WriteSDFileCmpdString();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
484
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
485 if ($OptionsInfo{OutputTextFile}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
486 %{$SDFilesInfo{DataFieldValues}} = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
487 SetupDataValues();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
488 WriteTextFileCmpdData();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
489 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
490 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
491 elsif ($CmpdNum > $OptionsInfo{RecordNumsMax} || $CmpdCount >= $OptionsInfo{RecordNumsCount}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
492 last CMPDSTRING;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
493 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
494 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
495 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
496
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
497
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
498 # Extract compounds in a specific record range...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
499 sub ExtractRecordRange {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
500 my($FileIndex) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
501 my($CmpdNum, @CmpdLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
502
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
503 @{$SDFilesInfo{DataLabels}} = @{$SDFilesInfo{AllDataFieldLabels}[$FileIndex]};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
504 WriteTextFileColLabels();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
505
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
506 $CmpdNum = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
507 CMPDSTRING: while ($SDFilesInfo{CmpdString} = ReadCmpdString($SDFilesInfo{InputSDFileRef})) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
508 $CmpdNum++;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
509
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
510 if ($CmpdNum >= $OptionsInfo{StartRecordNum} && $CmpdNum <= $OptionsInfo{EndRecordNum}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
511 @CmpdLines = split "\n", $SDFilesInfo{CmpdString};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
512
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
513 WriteSDFileCmpdString();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
514
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
515 if ($OptionsInfo{OutputTextFile}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
516 %{$SDFilesInfo{DataFieldValues}} = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
517 SetupDataValues();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
518 WriteTextFileCmpdData();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
519 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
520 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
521 elsif ($CmpdNum > $OptionsInfo{EndRecordNum}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
522 last CMPDSTRING;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
523 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
524 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
525 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
526
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
527 # Extract 2D compound records...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
528 sub Extract2DCmpdRecords {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
529 my($FileIndex) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
530 my(@CmpdLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
531
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
532 @{$SDFilesInfo{DataLabels}} = @{$SDFilesInfo{AllDataFieldLabels}[$FileIndex]};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
533 WriteTextFileColLabels();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
534
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
535
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
536 CMPDSTRING: while ($SDFilesInfo{CmpdString} = ReadCmpdString($SDFilesInfo{InputSDFileRef})) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
537 @CmpdLines = split "\n", $SDFilesInfo{CmpdString};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
538 if (!IsCmpd2D(\@CmpdLines)) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
539 next CMPDSTRING;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
540 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
541
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
542 WriteSDFileCmpdString();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
543
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
544 if ($OptionsInfo{OutputTextFile}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
545 %{$SDFilesInfo{DataFieldValues}} = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
546 SetupDataValues();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
547 WriteTextFileCmpdData();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
548 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
549 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
550 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
551
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
552 # Extract 3D compound records...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
553 sub Extract3DCmpdRecords {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
554 my($FileIndex) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
555 my(@CmpdLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
556
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
557 @{$SDFilesInfo{DataLabels}} = @{$SDFilesInfo{AllDataFieldLabels}[$FileIndex]};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
558 WriteTextFileColLabels();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
559
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
560
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
561 CMPDSTRING: while ($SDFilesInfo{CmpdString} = ReadCmpdString($SDFilesInfo{InputSDFileRef})) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
562 @CmpdLines = split "\n", $SDFilesInfo{CmpdString};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
563 if (!IsCmpd3D(\@CmpdLines)) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
564 next CMPDSTRING;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
565 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
566
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
567 WriteSDFileCmpdString();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
568
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
569 if ($OptionsInfo{OutputTextFile}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
570 %{$SDFilesInfo{DataFieldValues}} = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
571 SetupDataValues();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
572 WriteTextFileCmpdData();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
573 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
574 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
575 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
576
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
577
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
578 # Open input and output files...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
579 sub OpenInputAndOutputFiles {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
580 my($FileIndex) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
581
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
582 $SDFilesInfo{NewTextFileRef} = undef;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
583 $SDFilesInfo{NewSDFileRef} = undef;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
584
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
585 if ($OptionsInfo{OutputTextFile} && $OptionsInfo{OutputSDFile}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
586 print "Generating files $SDFilesInfo{NewSDFileName}[$FileIndex] and $SDFilesInfo{NewTextFileName}[$FileIndex]...\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
587 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
588 elsif ($OptionsInfo{OutputSDFile}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
589 print "Generating file $SDFilesInfo{NewSDFileName}[$FileIndex]...\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
590 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
591 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
592 print "Generating file $SDFilesInfo{NewTextFileName}[$FileIndex]...\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
593 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
594
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
595 if ($OptionsInfo{OutputSDFile}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
596 open NEWSDFILE, ">$SDFilesInfo{NewSDFileName}[$FileIndex]" or die "Error: Couldn't open $SDFilesInfo{NewSDFileName}[$FileIndex]: $! \n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
597 $SDFilesInfo{NewSDFileRef} = \*NEWSDFILE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
598 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
599 if ($OptionsInfo{OutputTextFile}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
600 open NEWTEXTFILE, ">$SDFilesInfo{NewTextFileName}[$FileIndex]" or die "Error: Couldn't open $SDFilesInfo{NewTextFileName}[$FileIndex]: $! \n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
601 $SDFilesInfo{NewTextFileRef} = \*NEWTEXTFILE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
602 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
603
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
604 open SDFILE, "$SDFilesList[$FileIndex]" or die "Error: Couldn't open $SDFilesList[$FileIndex]: $! \n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
605 $SDFilesInfo{InputSDFileRef} = \*SDFILE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
606
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
607 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
608
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
609 # Close open input and output files...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
610 sub CloseInputAndOutputFiles {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
611 if ($SDFilesInfo{NewSDFileRef}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
612 close $SDFilesInfo{NewSDFileRef};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
613 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
614 if ($SDFilesInfo{NewTextFileRef}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
615 close $SDFilesInfo{NewTextFileRef};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
616 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
617
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
618 if ($SDFilesInfo{InputSDFileRef}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
619 close $SDFilesInfo{InputSDFileRef};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
620 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
621
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
622 $SDFilesInfo{NewTextFileRef} = undef;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
623 $SDFilesInfo{NewSDFileRef} = undef;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
624 $SDFilesInfo{InputSDFileRef} = undef;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
625 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
626
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
627 # Write out column labels for text file...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
628 sub WriteTextFileColLabels {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
629 my($ColLabelsLine, $NewTextFileRef);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
630
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
631 if (!$OptionsInfo{OutputTextFile}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
632 return;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
633 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
634 $NewTextFileRef = $SDFilesInfo{NewTextFileRef};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
635
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
636 if ($OptionsInfo{OutputStrDataString}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
637 # Append structure data string label...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
638 my(@DataLabels);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
639
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
640 @DataLabels = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
641 push @DataLabels, @{$SDFilesInfo{DataLabels}};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
642 push @DataLabels, "StructureDataString";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
643
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
644 $ColLabelsLine = JoinWords(\@DataLabels, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
645 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
646 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
647 $ColLabelsLine = JoinWords(\@{$SDFilesInfo{DataLabels}}, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
648 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
649 print $NewTextFileRef "$ColLabelsLine\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
650 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
651
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
652 # Setup values for data fields...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
653 sub SetupDataValues {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
654 @{$SDFilesInfo{DataValues}} = map { exists $SDFilesInfo{DataFieldValues}{$_} ? $SDFilesInfo{DataFieldValues}{$_} : "" } @{$SDFilesInfo{DataLabels}};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
655 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
656
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
657 # Write out structure data and specific data fields to SD file...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
658 sub WriteSDFileCmpdData {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
659 my($MolString, $Count, $NewSDFileRef);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
660
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
661 if (!$OptionsInfo{OutputSDFile}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
662 return;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
663 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
664
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
665 $NewSDFileRef = $SDFilesInfo{NewSDFileRef};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
666
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
667 ($MolString) = split "M END", $SDFilesInfo{CmpdString};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
668 $MolString .= "M END";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
669 print $NewSDFileRef "$MolString\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
670
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
671 for $Count (0 .. $#{$SDFilesInfo{DataLabels}}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
672 print $NewSDFileRef "> <$SDFilesInfo{DataLabels}[$Count]>\n$SDFilesInfo{DataValues}[$Count]\n\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
673 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
674 print $NewSDFileRef "\$\$\$\$\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
675 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
676
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
677 # Write out compound string...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
678 sub WriteSDFileCmpdString {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
679 my($NewSDFileRef);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
680
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
681 if (!$OptionsInfo{OutputSDFile}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
682 return;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
683 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
684
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
685 $NewSDFileRef = $SDFilesInfo{NewSDFileRef};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
686 print $NewSDFileRef "$SDFilesInfo{CmpdString}\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
687 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
688
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
689 # Write out data for text file...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
690 sub WriteTextFileCmpdData {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
691 my($DataValuesLine, $NewTextFileRef);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
692
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
693 if (!$OptionsInfo{OutputTextFile}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
694 return;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
695 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
696
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
697 $NewTextFileRef = $SDFilesInfo{NewTextFileRef};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
698 $DataValuesLine = JoinWords(\@{$SDFilesInfo{DataValues}}, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
699
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
700 # Handle multiple lines data values for data fields by joining 'em using semicolons...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
701 if ($DataValuesLine =~ /\n/) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
702 $DataValuesLine =~ s/\n/;/g;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
703 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
704
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
705 if ($OptionsInfo{OutputStrDataString}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
706 # Append structure data string...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
707 my($StrDataString, $OutQuoteValue, $OutDelim, $StrDataStringDelimiter);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
708
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
709 if ($OptionsInfo{StrDataStringWithFields}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
710 $StrDataString = $SDFilesInfo{CmpdString};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
711 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
712 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
713 ($StrDataString) = split "M END", $SDFilesInfo{CmpdString};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
714 $StrDataString .= "M END";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
715 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
716 $StrDataStringDelimiter = $OptionsInfo{StrDataStringDelimiter};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
717 $StrDataString =~ s/\n/$StrDataStringDelimiter/g;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
718
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
719 $OutDelim = $OptionsInfo{OutDelim};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
720 $OutQuoteValue = $OptionsInfo{OutQuote} ? "\"" : "";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
721
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
722 print $NewTextFileRef "$DataValuesLine${OutDelim}${OutQuoteValue}${StrDataString}${OutQuoteValue}\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
723 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
724 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
725 print $NewTextFileRef "$DataValuesLine\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
726 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
727 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
728
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
729 # Retrieve information about input SD files...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
730 sub RetrieveSDFilesInfo {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
731 my($SDFile, $Index, $FileDir, $FileExt, $FileName, $NewFileName, $NewSDFileName, $NewTextFileName, $CmpdCount);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
732
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
733 %SDFilesInfo = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
734
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
735 @{$SDFilesInfo{FileOkay}} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
736 @{$SDFilesInfo{CmpdCount}} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
737 @{$SDFilesInfo{NewTextFileName}} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
738 @{$SDFilesInfo{NewSDFileName}} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
739
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
740 @{$SDFilesInfo{AllDataFieldLabels}} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
741 @{$SDFilesInfo{CommonDataFieldLabels}} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
742
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
743 FILELIST: for $Index (0 .. $#SDFilesList) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
744 $SDFile = $SDFilesList[$Index];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
745
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
746 $SDFilesInfo{FileOkay}[$Index] = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
747
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
748 $SDFilesInfo{CmpdCount}[$Index] = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
749 $SDFilesInfo{NewTextFileName}[$Index] = "";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
750 $SDFilesInfo{NewSDFileName}[$Index] = "";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
751
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
752 @{$SDFilesInfo{AllDataFieldLabels}[$Index]} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
753 @{$SDFilesInfo{CommonDataFieldLabels}[$Index]} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
754
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
755 if (!(-e $SDFile)) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
756 warn "Warning: Ignoring file $SDFile: It doesn't exist\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
757 next FILELIST;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
758 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
759
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
760 if (!CheckFileType($SDFile, "sd sdf")) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
761 warn "Warning: Ignoring file $SDFile: It's not a SD file\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
762 next FILELIST;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
763 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
764
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
765 # Generate appropriate name for the new output file.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
766 $FileDir = ""; $FileName = ""; $FileExt = "";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
767 ($FileDir, $FileName, $FileExt) = ParseFileName($SDFile);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
768 $NewFileName = $FileName;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
769 $NewFileName = $FileName . $OptionsInfo{FileNameMode};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
770 if ($OptionsInfo{OutFileRoot} && (@SDFilesList == 1)) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
771 my ($RootFileDir, $RootFileName, $RootFileExt) = ParseFileName($OptionsInfo{OutFileRoot});
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
772 if ($RootFileName && $RootFileExt) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
773 $NewFileName = $RootFileName;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
774 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
775 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
776 $NewFileName = $OptionsInfo{OutFileRoot};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
777 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
778 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
779 $NewSDFileName = $NewFileName . ".$OptionsInfo{SDFileExt}";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
780 $NewTextFileName = $NewFileName . ".$OptionsInfo{TextFileExt}";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
781
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
782 if ($OptionsInfo{OutputSDFile}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
783 if (lc($NewSDFileName) eq lc($SDFile)) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
784 warn "Warning: Ignoring input file $SDFile: Same output, $NewSDFileName, and input file names.\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
785 print "Specify a different name using \"-r --root\" option or use default name.\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
786 next FILELIST;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
787 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
788 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
789
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
790 if (!$OptionsInfo{Overwrite}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
791 if ($OptionsInfo{OutputSDFile}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
792 if (-e $NewSDFileName) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
793 warn "Warning: Ignoring file $SDFile: New file, $NewSDFileName, already exists\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
794 next FILELIST;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
795 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
796 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
797 if ($OptionsInfo{OutputTextFile}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
798 if (-e $NewTextFileName) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
799 warn "Warning: Ignoring file $SDFile: New file, $NewTextFileName, already exists\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
800 next FILELIST;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
801 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
802 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
803 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
804
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
805 if (!open SDFILE, "$SDFile") {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
806 warn "Warning: Ignoring file $SDFile: Couldn't open it: $! \n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
807 next FILELIST;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
808 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
809
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
810 my($CountCmpds, $CollectDataFields);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
811 my($CmpdString, @CmpdLines, @DataFieldLabels, %DataFieldLabelsMap,@CommonDataFieldLabels);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
812
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
813 $CountCmpds = ($OptionsInfo{Mode} =~ /^randomcmpds$/i) ? 1 : 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
814
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
815 $CollectDataFields = (($OptionsInfo{Mode} =~ /^(alldatafields|commondatafields|randomcmpds)$/i && $OptionsInfo{OutputTextFile}) || ($OptionsInfo{Mode} =~ /^(datafieldsbyvalue|datafieldsbyregex)$/i && $OptionsInfo{OutputTextFile}) || ($OptionsInfo{Mode} =~ /^datafieldbylist$/i && $OptionsInfo{OutputTextFile}) || ($OptionsInfo{Mode} =~ /^datafielduniquebylist$/i && $OptionsInfo{OutputTextFile}) || ($OptionsInfo{Mode} =~ /^datafieldnotbylist$/i && $OptionsInfo{OutputTextFile}) || ($OptionsInfo{Mode} =~ /^recordnum$/i && $OptionsInfo{OutputTextFile}) || ($OptionsInfo{Mode} =~ /^recordnums$/i && $OptionsInfo{OutputTextFile}) || ($OptionsInfo{Mode} =~ /^recordrange$/i && $OptionsInfo{OutputTextFile})) ? 1 : 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
816
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
817 $CmpdCount = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
818 if ($CountCmpds || $CollectDataFields) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
819 @DataFieldLabels = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
820 @CommonDataFieldLabels = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
821 %DataFieldLabelsMap = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
822 CMPDSTRING: while ($CmpdString = ReadCmpdString(\*SDFILE)) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
823 $CmpdCount++;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
824 if ($OptionsInfo{Mode} =~ /^recordnum$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
825 if ($CmpdCount == $OptionsInfo{RecordNum}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
826 @CmpdLines = split "\n", $CmpdString;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
827 @DataFieldLabels = GetCmpdDataHeaderLabels(\@CmpdLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
828 last CMPDSTRING;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
829 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
830 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
831 if ($CollectDataFields) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
832 my($Label);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
833 @CmpdLines = split "\n", $CmpdString;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
834 # Process compound data header labels and figure out which ones are present for
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
835 # all the compounds...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
836 if (@DataFieldLabels) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
837 my (@CmpdDataFieldLabels) = GetCmpdDataHeaderLabels(\@CmpdLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
838 my(%CmpdDataFieldLabelsMap) = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
839 # Setup a map for the current labels...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
840 for $Label (@CmpdDataFieldLabels) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
841 $CmpdDataFieldLabelsMap{$Label} = "PresentInSome";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
842 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
843 # Check the presence old labels for this compound; otherwise, mark 'em new...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
844 for $Label (@DataFieldLabels) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
845 if (!$CmpdDataFieldLabelsMap{$Label}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
846 $DataFieldLabelsMap{$Label} = "PresentInSome";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
847 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
848 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
849 # Check the presence this compound in the old labels; otherwise, add 'em...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
850 for $Label (@CmpdDataFieldLabels ) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
851 if (!$DataFieldLabelsMap{$Label}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
852 # It's a new label...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
853 push @DataFieldLabels, $Label;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
854 $DataFieldLabelsMap{$Label} = "PresentInSome";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
855 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
856 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
857 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
858 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
859 # Get the initial label set and set up a map...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
860 @DataFieldLabels = GetCmpdDataHeaderLabels(\@CmpdLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
861 for $Label (@DataFieldLabels) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
862 $DataFieldLabelsMap{$Label} = "PresentInAll";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
863 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
864 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
865 # Identify the common data field labels...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
866 if ($Options{mode} =~ /^commondatafields$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
867 @CommonDataFieldLabels = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
868 for $Label (@DataFieldLabels) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
869 if ($DataFieldLabelsMap{$Label} eq "PresentInAll") {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
870 push @CommonDataFieldLabels, $Label;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
871 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
872 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
873 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
874 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
875 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
876 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
877
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
878 $SDFilesInfo{FileOkay}[$Index] = 1;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
879
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
880 $SDFilesInfo{NewTextFileName}[$Index] = $NewTextFileName;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
881 $SDFilesInfo{NewSDFileName}[$Index] = $NewSDFileName;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
882
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
883 $SDFilesInfo{CmpdCount}[$Index] = $CmpdCount;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
884
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
885 push @{$SDFilesInfo{AllDataFieldLabels}[$Index]}, @DataFieldLabels;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
886 push @{$SDFilesInfo{CommonDataFieldLabels}[$Index]}, @CommonDataFieldLabels;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
887
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
888 close SDFILE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
889 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
890 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
891
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
892 # Process options...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
893 sub ProcessOptions {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
894 %OptionsInfo = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
895
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
896 $OptionsInfo{Mode} = $Options{mode};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
897
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
898 $OptionsInfo{InDelim} = "\,";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
899 if ($Options{indelim} =~ /^semicolon$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
900 $OptionsInfo{InDelim} = "\;";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
901 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
902 elsif ($Options{indelim} =~ /^tab$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
903 $OptionsInfo{InDelim} = "\t";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
904 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
905
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
906 $OptionsInfo{OutDelim} = "\,";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
907 if ($Options{outdelim} =~ /^semicolon$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
908 $OptionsInfo{OutDelim} = "\;";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
909 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
910 elsif ($Options{outdelim} =~ /^tab$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
911 $OptionsInfo{OutDelim} = "\t";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
912 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
913
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
914 $OptionsInfo{OutQuote} = ($Options{quote} =~ /^yes$/i) ? 1 : 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
915
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
916 $OptionsInfo{RegexIgnoreCase} = ($Options{regexignorecase} =~ /^yes$/i) ? 1 : 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
917
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
918 $OptionsInfo{OutFileRoot} = $Options{root} ? $Options{root} : undef;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
919 $OptionsInfo{Overwrite} = $Options{overwrite} ? $Options{overwrite} : undef;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
920
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
921 $OptionsInfo{NumOfCmpds} = $Options{numofcmpds};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
922
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
923 $OptionsInfo{ValueComparisonMode} = $Options{valuecomparisonmode};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
924 $OptionsInfo{NumericalComparison} = ($Options{valuecomparisonmode} =~ /^Numeric$/i) ? 1 : 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
925
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
926 $OptionsInfo{Violations} = $Options{violations};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
927 $OptionsInfo{Seed} = $Options{seed};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
928
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
929
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
930 if ($Options{mode} =~ /^(datafields|datafieldsbyregex|datafieldsbyvalue|datafieldbylist|datafielduniquebylist|datafieldnotbylist)$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
931 if ($Options{datafields} || $Options{datafieldsfile}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
932 if ($Options{datafields} && $Options{datafieldsfile}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
933 die "Error: For \"-m --mode\" option values of datafields, datafieldsbyvalue, datafieldsbyregex, datafieldbylist, datafielduniquebylist, or datafieldnotbylist specify only one of the \"-d --datafields\" or \"--datafieldsfile\" option.\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
934 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
935 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
936 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
937 die "Error: For \"-m --mode\" option values of datafields, datafieldsbyvalue, datafieldsbyregex, datafieldbylist, datafielduniquebylist, or datafieldnotbylist specify one of the \"-d --datafields\" or \"--datafieldsfile\" option.\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
938 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
939 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
940 $OptionsInfo{DataFields} = $Options{datafields} ? $Options{datafields} : undef;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
941 $OptionsInfo{DataFieldsFile} = $Options{datafieldsfile} ? $Options{datafieldsfile} : undef;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
942
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
943 $OptionsInfo{RecordNum} = 0; $OptionsInfo{StartRecordNum} = 0; $OptionsInfo{EndRecordNum} = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
944
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
945 %{$OptionsInfo{RecordNums}} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
946 $OptionsInfo{RecordNumsMin} = 0; $OptionsInfo{RecordNumsMax} = 0; $OptionsInfo{RecordNumsCount} = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
947
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
948 $OptionsInfo{Record} = $Options{record} ? $Options{record} : undef;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
949
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
950 if ($Options{mode} =~ /^(recordnum|recordnums|recordrange)$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
951 if ($Options{record}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
952 my($Record, @RecordSplit);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
953
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
954 $Record = $Options{record};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
955 $Record =~ s/ //g;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
956
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
957 @RecordSplit = split ",", $Record;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
958
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
959 if ($Options{mode} =~ /^recordnum$/i ) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
960 if (@RecordSplit == 1) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
961 $OptionsInfo{RecordNum} = $RecordSplit[0];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
962 if ($OptionsInfo{RecordNum} <= 0) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
963 die "Error: The value specified, $OptionsInfo{RecordNum}, for option \"--records\" is not valid. Allowed values: > 0 \n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
964 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
965 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
966 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
967 die "Error: Invalid number of values, ", scalar(@RecordSplit), ", specified using \"--record\" option: only 1 value is allowed.\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
968 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
969 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
970 elsif ($Options{mode} =~ /^recordnums$/i ) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
971 my($RecordNum, $RecordCount, @SortedRecordSplit);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
972
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
973 @SortedRecordSplit = sort { $a <=> $b } @RecordSplit;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
974
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
975 $RecordCount = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
976 RECORDNUM: for $RecordNum (@SortedRecordSplit) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
977 if (exists $OptionsInfo{RecordNums}{$RecordNum}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
978 next RECORDNUM;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
979 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
980 $RecordCount++;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
981 $OptionsInfo{RecordNums}{$RecordNum} = $RecordNum;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
982 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
983 $OptionsInfo{RecordNumsCount} = $RecordCount;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
984 $OptionsInfo{RecordNumsMin} = $SortedRecordSplit[0];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
985 $OptionsInfo{RecordNumsMax} = $SortedRecordSplit[$#SortedRecordSplit];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
986 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
987 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
988 if (@RecordSplit == 2) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
989 $OptionsInfo{StartRecordNum} = $RecordSplit[0];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
990 $OptionsInfo{EndRecordNum} = $RecordSplit[1];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
991 if ($OptionsInfo{StartRecordNum} <= 0 || $OptionsInfo{EndRecordNum} <= 0) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
992 die "Error: The value pair specified, $Options{record}, for option \"--records\" is not valid. Allowed values: > 0 \n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
993 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
994 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
995 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
996 die "Error: Invalid number of values, ", scalar(@RecordSplit), ", specified using \"--record\" option: only 2 values is allowed.\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
997 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
998 if ($OptionsInfo{StartRecordNum} > $OptionsInfo{EndRecordNum}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
999 die "Error: Start record number, $OptionsInfo{StartRecordNum}, must be smaller than end record number, $OptionsInfo{EndRecordNum}.\nSpecify different values using \"--record\" option.\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1000 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1001 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1002 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1003 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1004 die "Error: For \"-m --mode\" option values recordnum, recordnums or recordrange, specify \"--record\" option value.\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1005 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1006 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1007
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1008 @{$OptionsInfo{SpecifiedDataFieldLabels}} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1009
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1010 my(@Words, $Line, $Value);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1011 if ($Options{mode} =~ /^datafields$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1012 @{$OptionsInfo{SpecifiedDataFieldLabels}} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1013 if ($Options{datafields}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1014 @{$OptionsInfo{SpecifiedDataFieldLabels}} = split $OptionsInfo{InDelim}, $Options{datafields};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1015 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1016 elsif ($Options{datafieldsfile}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1017 open DATAFIELDSFILE, "$Options{datafieldsfile}" or die "Error: Couldn't open $Options{datafieldsfile}: $! \n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1018 while ($Line = GetTextLine(\*DATAFIELDSFILE)) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1019 @Words = quotewords($OptionsInfo{InDelim}, 0, $Line);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1020 if (@Words) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1021 push @{$OptionsInfo{SpecifiedDataFieldLabels}}, @Words;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1022 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1023 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1024 close DATAFIELDSFILE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1025 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1026 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1027 elsif ($Options{mode} =~ /^datafieldsbyvalue$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1028 my(@DataFieldsByValueTriplets);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1029 @DataFieldsByValueTriplets = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1030 if ($Options{datafields}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1031 @DataFieldsByValueTriplets = split $OptionsInfo{InDelim}, $Options{datafields};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1032 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1033 elsif ($Options{datafieldsfile}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1034 open DATAFIELDSFILE, "$Options{datafieldsfile}" or die "Error: Couldn't open $Options{datafieldsfile}: $! \n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1035 while ($Line = GetTextLine(\*DATAFIELDSFILE)) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1036 @Words = quotewords($OptionsInfo{InDelim}, 0, $Line);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1037 if (@Words) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1038 push @DataFieldsByValueTriplets, @Words;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1039 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1040 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1041 close DATAFIELDSFILE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1042 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1043 if ((@DataFieldsByValueTriplets % 3)) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1044 if ($Options{datafields}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1045 die "Error: Triplets not found in values specified by \"-d --datafields\" option\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1046 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1047 elsif ($Options{datafieldsfile}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1048 die "Error: Triplets not found in values specified by \"--datafieldsfile\" option\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1049 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1050 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1051 my($Index, $Label, $Value, $Criterion);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1052
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1053 @{$OptionsInfo{SpecifiedDataFieldLabels}} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1054 %{$OptionsInfo{SpecifiedDataFieldValuesMap}} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1055 %{$OptionsInfo{SpecifiedDataFieldCriteriaMap}} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1056
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1057 for ($Index = 0; $Index < @DataFieldsByValueTriplets; $Index = $Index + 3) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1058 $Label = $DataFieldsByValueTriplets[$Index];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1059 $Value = $DataFieldsByValueTriplets[$Index + 1];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1060 $Criterion = $DataFieldsByValueTriplets[$Index + 2];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1061
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1062 if ($Criterion =~ /^(eq|le|ge)$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1063 push @{$OptionsInfo{SpecifiedDataFieldLabels}}, $Label;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1064 $OptionsInfo{SpecifiedDataFieldValuesMap}{$Label} = $Value;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1065 $OptionsInfo{SpecifiedDataFieldCriteriaMap}{$Label} = $Criterion;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1066 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1067 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1068 warn "Warning: Ignoring triplet value, $Label $Value $Criterion , specified using \"-d --datafields\" or \"--datafieldsfile\" option: Invalid criterion value: $Criterion\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1069 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1070 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1071 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1072 elsif ($Options{mode} =~ /^datafieldsbyregex$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1073 my(@DataFieldsByRegexTriplets);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1074
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1075 @DataFieldsByRegexTriplets = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1076 if ($Options{datafields}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1077 @DataFieldsByRegexTriplets = quotewords($OptionsInfo{InDelim}, 0, $Options{datafields});
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1078 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1079 elsif ($Options{datafieldsfile}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1080 open DATAFIELDSFILE, "$Options{datafieldsfile}" or die "Error: Couldn't open $Options{datafieldsfile}: $! \n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1081 while ($Line = GetTextLine(\*DATAFIELDSFILE)) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1082 @Words = quotewords($OptionsInfo{InDelim}, 0, $Line);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1083 if (@Words) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1084 push @DataFieldsByRegexTriplets, @Words;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1085 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1086 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1087 close DATAFIELDSFILE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1088 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1089 if ((@DataFieldsByRegexTriplets % 3)) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1090 if ($Options{datafields}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1091 die "Error: Triplet not found in values specified by \"-d --datafields\" option\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1092 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1093 elsif ($Options{datafieldsfile}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1094 die "Error: Triplet not found in values specified by \"--datafieldsfile\" option\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1095 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1096 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1097
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1098 my($Index, $Label, $Value, $Criterion);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1099
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1100 @{$OptionsInfo{SpecifiedDataFieldLabels}} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1101 %{$OptionsInfo{SpecifiedDataFieldRegexMap}} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1102 %{$OptionsInfo{SpecifiedDataFieldRegexCriteriaMap}} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1103
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1104 for ($Index = 0; $Index < @DataFieldsByRegexTriplets; $Index = $Index + 3) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1105 $Label = $DataFieldsByRegexTriplets[$Index];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1106 $Value = $DataFieldsByRegexTriplets[$Index + 1];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1107 $Criterion = $DataFieldsByRegexTriplets[$Index + 2];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1108
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1109 if ($Criterion =~ /^(eq|ne)$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1110 push @{$OptionsInfo{SpecifiedDataFieldLabels}}, $Label;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1111 $OptionsInfo{SpecifiedDataFieldRegexMap}{$Label} = $Value;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1112 $OptionsInfo{SpecifiedDataFieldRegexCriteriaMap}{$Label} = $Criterion;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1113 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1114 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1115 warn "Warning: Ignoring triplet value, $Label $Value $Criterion , specified using \"-d --datafields\" or \"--datafieldsfile\" option: Invalid criterion value: $Criterion; Supported values: eq or ne\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1116 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1117 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1118 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1119 elsif ($Options{mode} =~ /^(datafieldbylist|datafielduniquebylist|datafieldnotbylist)$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1120 my($Index, @DataFieldAndValuesList);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1121 if ($Options{datafields}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1122 @DataFieldAndValuesList = split $OptionsInfo{InDelim}, $Options{datafields};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1123 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1124 elsif ($Options{datafieldsfile}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1125 open DATAFIELDSFILE, "$Options{datafieldsfile}" or die "Error: Couldn't open $Options{datafieldsfile}: $! \n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1126 while ($Line = GetTextLine(\*DATAFIELDSFILE)) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1127 @Words = quotewords($OptionsInfo{InDelim}, 0, $Line);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1128 if (@Words) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1129 push @DataFieldAndValuesList, @Words;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1130 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1131 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1132 close DATAFIELDSFILE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1133 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1134 if (@DataFieldAndValuesList < 2) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1135 if ($Options{datafields}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1136 die "Error: Invalid number of values specified by \"-d --datafields\" option\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1137 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1138 elsif ($Options{datafieldsfile}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1139 die "Error: Invalid number values specified by \"--datafieldsfile\" option\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1140 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1141 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1142
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1143 $OptionsInfo{SpecifiedDataFieldLabel} = $DataFieldAndValuesList[0];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1144 $OptionsInfo{SpecifiedDataFieldValuesCount} = @DataFieldAndValuesList - 1;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1145 %{$OptionsInfo{SpecifiedDataFieldValues}} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1146
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1147 for ($Index = 1; $Index < @DataFieldAndValuesList; $Index++) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1148 $Value = $DataFieldAndValuesList[$Index];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1149 $OptionsInfo{SpecifiedDataFieldValues}{$Value} = "NotFound";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1150 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1151 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1152
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1153 $OptionsInfo{SDFileExt} = "sdf";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1154 $OptionsInfo{TextFileExt} = "csv";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1155
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1156 if ($Options{outdelim} =~ /^tab$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1157 $OptionsInfo{TextFileExt} = "tsv";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1158 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1159
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1160 if ($Options{mode} =~ /^(alldatafields|molnames)$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1161 $OptionsInfo{OutputSDFile} = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1162 $OptionsInfo{OutputTextFile} = 1;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1163 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1164 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1165 $OptionsInfo{OutputSDFile} = ($Options{output} =~ /^(SD|both)$/i) ? 1 : 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1166 $OptionsInfo{OutputTextFile} = ($Options{output} =~ /^(text|both)$/i) ? 1 : 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1167 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1168
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1169 $OptionsInfo{StrDataString} = $Options{strdatastring};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1170 $OptionsInfo{OutputStrDataString} = ($Options{strdatastring} =~ /^Yes$/i) ? 1 : 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1171
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1172 $OptionsInfo{StrDataStringDelimiter} = $Options{strdatastringdelimiter};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1173
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1174 if (IsEmpty($Options{strdatastringdelimiter})) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1175 die "Error: No value specified for \"--StrDataStringDelimiter\" option.\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1176 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1177 $OptionsInfo{StrDataStringMode} = $Options{strdatastringmode};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1178 $OptionsInfo{StrDataStringWithFields} = $Options{strdatastringmode} =~ /^StrAndDataFields$/i ? 1 : 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1179
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1180 MODE: {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1181 if ($Options{mode} =~ /^alldatafields$/i) { $OptionsInfo{FileNameMode} = "AllDataDields"; last MODE; }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1182 if ($Options{mode} =~ /^commondatafields$/i) { $OptionsInfo{FileNameMode} = "CommonDataDields"; last MODE; }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1183 if ($Options{mode} =~ /^datafields$/i) { $OptionsInfo{FileNameMode} = "SpecifiedDataFields"; last MODE; }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1184 if ($Options{mode} =~ /^datafieldsbyvalue$/i) { $OptionsInfo{FileNameMode} = "SpecifiedDataFieldsByValue"; last MODE; }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1185 if ($Options{mode} =~ /^datafieldsbyregex$/i) { $OptionsInfo{FileNameMode} = "SpecifiedDataFieldsByRegex"; last MODE; }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1186 if ($Options{mode} =~ /^datafieldbylist$/i) { $OptionsInfo{FileNameMode} = "SpecifiedDataField"; last MODE; }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1187 if ($Options{mode} =~ /^datafielduniquebylist$/i) { $OptionsInfo{FileNameMode} = "SpecifiedUniqueDataField"; last MODE; }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1188 if ($Options{mode} =~ /^datafieldnotbylist$/i) { $OptionsInfo{FileNameMode} = "SpecifiedDataFieldNotByList"; last MODE; }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1189 if ($Options{mode} =~ /^molnames$/i) { $OptionsInfo{FileNameMode} = "MolName"; last MODE; }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1190 if ($Options{mode} =~ /^randomcmpds$/i) { $OptionsInfo{FileNameMode} = "RandomCmpds"; last MODE; }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1191 if ($Options{mode} =~ /^recordnum$/i) { $OptionsInfo{FileNameMode} = "RecordNum$OptionsInfo{RecordNum}"; last MODE; }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1192 if ($Options{mode} =~ /^recordnums$/i) { $OptionsInfo{FileNameMode} = "RecordNums"; last MODE; }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1193 if ($Options{mode} =~ /^recordrange$/i) { $OptionsInfo{FileNameMode} = "RecordNum$OptionsInfo{StartRecordNum}" . "To" . "$OptionsInfo{EndRecordNum}"; last MODE; }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1194 if ($Options{mode} =~ /^2dcmpdrecords$/i) { $OptionsInfo{FileNameMode} = "2DCmpdRecords"; last MODE; }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1195 if ($Options{mode} =~ /^3dcmpdrecords$/i) { $OptionsInfo{FileNameMode} = "3DCmpdRecords"; last MODE; }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1196 die "Error: The value specified, $Options{mode}, for option \"-m --mode\" is not valid. Allowed values: alldatafields, commondatafields, datafields, datafieldsbyvalue, datafieldbylist, datafielduniquebylist, , datafieldnotbylist, molnames, randomcmpds, recordnum, recordnums, recordrange, 2dcmpdrecords, 3dcmpdrecords\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1197 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1198
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1199 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1200
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1201 # Setup script usage and retrieve command line arguments specified using various options...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1202 sub SetupScriptUsage {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1203
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1204 # Retrieve all the options...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1205 %Options = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1206 $Options{numofcmpds} = 1;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1207 $Options{mode} = "alldatafields";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1208 $Options{indelim} = "comma";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1209 $Options{outdelim} = "comma";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1210 $Options{output} = "SD";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1211 $Options{quote} = "yes";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1212 $Options{regexignorecase} = "yes";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1213 $Options{valuecomparisonmode} = "numeric";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1214 $Options{violations} = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1215 $Options{seed} = 123456789;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1216
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1217 $Options{strdatastring} = "no";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1218 $Options{strdatastringdelimiter} = "|";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1219 $Options{strdatastringmode} = "StrOnly";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1220
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1221 if (!GetOptions(\%Options, "help|h", "datafields|d=s", "datafieldsfile=s", "indelim=s", "mode|m=s", "numofcmpds|n=i", "outdelim=s", "output=s", "overwrite|o", "quote|q=s", "regexignorecase=s", "record=s", "root|r=s", "seed|s=i", "strdatastring=s", "strdatastringdelimiter=s", "strdatastringmode=s", "valuecomparisonmode=s", "violations|v=i", "workingdir|w=s")) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1222 die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1223 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1224 if ($Options{workingdir}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1225 if (! -d $Options{workingdir}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1226 die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1227 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1228 chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1229 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1230 if ($Options{numofcmpds} < 1) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1231 die "Error: The value specified, $Options{numofcmpds}, for option \"-n --numofcmpds\" is not valid. Allowed values: >= 1 \n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1232 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1233 if ($Options{valuecomparisonmode} !~ /^(Numeric|Alphanumeric)$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1234 die "Error: The value specified, $Options{valuecomparisonmode}, for option \"--ValueComparisonMode\" is not valid. Allowed values: Numeric or Alphanumeric\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1235 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1236 if ($Options{violations} < 0) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1237 die "Error: The value specified, $Options{violations}, for option \"-v --violations\" is not valid. Allowed values: >= 0 \n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1238 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1239 if ($Options{mode} !~ /^(alldatafields|commondatafields|datafields|datafieldsbyvalue|datafieldsbyregex|datafieldbylist|datafielduniquebylist|datafieldnotbylist|molnames|randomcmpds|recordnum|recordnums|recordrange|2dcmpdrecords|3dcmpdrecords)$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1240 die "Error: The value specified, $Options{mode}, for option \"-m --mode\" is not valid. Allowed values: alldatafields, commondatafields, datafields, datafieldsbyvalue, datafieldbylist, datafielduniquebylist, datafieldnotbylist, molnames, randomcmpds, recordnum, recordnums, recordrange, 2dcmpdrecords, 3dcmpdrecords\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1241 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1242 if ($Options{output} !~ /^(SD|text|both)$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1243 die "Error: The value specified, $Options{output}, for option \"--output\" is not valid. Allowed values: SD, text, or both\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1244 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1245 if ($Options{indelim} !~ /^(comma|semicolon|tab)$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1246 die "Error: The value specified, $Options{indelim}, for option \"--indelim\" is not valid. Allowed values: comma, tab, or semicolon\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1247 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1248 if ($Options{outdelim} !~ /^(comma|semicolon|tab)$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1249 die "Error: The value specified, $Options{outdelim}, for option \"--outdelim\" is not valid. Allowed values: comma, tab, or semicolon\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1250 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1251 if ($Options{quote} !~ /^(yes|no)$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1252 die "Error: The value specified, $Options{quote}, for option \"-q --quote\" is not valid. Allowed values: yes or no\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1253 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1254 if ($Options{regexignorecase} !~ /^(yes|no)$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1255 die "Error: The value specified, $Options{regexignorecase}, for option \"--regexignorecase\" is not valid. Allowed values: yes or no\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1256 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1257 if ($Options{strdatastring} !~ /^(yes|no)$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1258 die "Error: The value specified, $Options{strdatastring}, for option \"--StrDataString\" is not valid. Allowed values: yes or no\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1259 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1260 if ($Options{strdatastringmode} !~ /^(StrOnly|StrAndDataFields)$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1261 die "Error: The value specified, $Options{strdatastringmode}, for option \"--StrDataStringMode\" is not valid. Allowed values: StrOnly or StrAndDataFields\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1262 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1263 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1264
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1265 __END__
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1266
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1267 =head1 NAME
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1268
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1269 ExtractFromSDFiles.pl - Extract specific data from SDFile(s)
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1270
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1271 =head1 SYNOPSIS
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1272
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1273 ExtractFromSDFiles.pl SDFile(s)...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1274
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1275 ExtractFromSDFiles.pl [B<-h, --help>]
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1276 [B<-d, --datafields> "fieldlabel,..." | "fieldlabel,value,criteria..." | "fieldlabel,value,value..."]
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1277 [B<--datafieldsfile> filename] [B<--indelim> comma | tab | semicolon] [B<-m, --mode> alldatafields |
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1278 commondatafields | | datafieldnotbylist | datafields | datafieldsbyvalue | datafieldsbyregex | datafieldbylist |
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1279 datafielduniquebylist | molnames | randomcmpds | recordnum | recordnums | recordrange | 2dcmpdrecords |
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1280 3dcmpdrecords ] [B<-n, --numofcmpds> number] [B<--outdelim> comma | tab | semicolon]
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1281 [B<--output> SD | text | both] [B<-o, --overwrite>] [B<-q, --quote> yes | no]
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1282 [B<--record> recnum | startrecnum,endrecnum] B<--RegexIgnoreCase> I<yes or no>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1283 [B<-r, --root> rootname] [B<-s, --seed> number] [B<--StrDataString> yes | no]
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1284 [B<--StrDataStringDelimiter> text] [B<--StrDataStringMode> StrOnly | StrAndDataFields]
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1285 [B<--ValueComparisonMode> I<Numeric | Alphanumeric>]
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1286 [B<-v, --violations-> number] [B<-w, --workingdir> dirname] SDFile(s)...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1287
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1288 =head1 DESCRIPTION
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1289
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1290 Extract specific data from I<SDFile(s)> and generate appropriate SD or CSV/TSV text
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1291 file(s). The structure data from SDFile(s) is not transferred to CSV/TSV text file(s).
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1292 Multiple SDFile names are separated by spaces. The valid file extensions are I<.sdf>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1293 and I<.sd>. All other file names are ignored. All the SD files in a current directory
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1294 can be specified either by I<*.sdf> or the current directory name.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1295
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1296 =head1 OPTIONS
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1297
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1298 =over 4
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1299
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1300 =item B<-h, --help>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1301
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1302 Print this help message.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1303
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1304 =item B<-d, --datafields> I<"fieldlabel,..." | "fieldlabel,value,criteria..." | "fieldlabel,value,value,...">
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1305
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1306 This value is mode specific. In general, it's a list of comma separated data field labels
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1307 and associated mode specific values.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1308
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1309 For I<datafields> mode, input value format is: I<fieldlabel,...>. Examples:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1310
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1311 Extreg
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1312 Extreg,CompoundName,ID
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1313
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1314 For I<datafieldsbyvalue> mode, input value format contains these triplets:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1315 I<fieldlabel,value, criteria...>. Possible values for criteria: I<le, ge or eq>.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1316 The values of B<--ValueComparisonMode> indicates whether values are
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1317 compared numerical or string comarison operators. Default is to consider
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1318 data field values as numerical values and use numerical comparison operators.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1319 Examples:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1320
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1321 MolWt,450,le
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1322 MolWt,450,le,LogP,5,le,SumNumNO,10,le,SumNHOH,5,le
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1323
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1324 For I<datafieldsbyregex> mode, input value format contains these triplets:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1325 I<fieldlabel,regex, criteria...>. I<regex> corresponds to any valid regular expression
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1326 and is used to match the values for specified I<fieldlabel>. Possible values for criteria:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1327 I<eq or ne>. During I<eq> and I<ne> values, data field label value is matched with
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1328 regular expression using =~ and !~ respectively. B<--RegexIgnoreCase> option
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1329 value is used to determine whether to ignore letter upper/lower case during
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1330 regular expression match. Examples:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1331
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1332 Name,ol,eq
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1333 Name,'^pat',ne
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1334
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1335 For I<datafieldbylist> and I<datafielduniquebylist> mode, input value format is:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1336 I<fieldlabel,value1,value2...>. This is equivalent to I<datafieldsbyvalue> mode with
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1337 this input value format:I<fieldlabel,value1,eq,fieldlabel,value2,eq,...>. For
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1338 I<datafielduniquebylist> mode, only unique compounds identified by first occurrence
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1339 of I<value> associated with I<fieldlabel> in I<SDFile(s)> are kept; any subsequent compounds
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1340 are simply ignored.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1341
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1342 For I<datafieldnotbylist> mode, input value format is: I<fieldlabel,value1,value2...>. In this
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1343 mode, the script behaves exactly opposite of I<datafieldbylist> mode, and only those compounds
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1344 are extracted whose data field values don't match any specified data field value.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1345
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1346 =item B<--datafieldsfile> I<filename>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1347
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1348 Filename which contains various mode specific values. This option provides a way
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1349 to specify mode specific values in a file instead of entering them on the command
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1350 line using B<-d --datafields>.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1351
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1352 For I<datafields> mode, input file lines contain comma delimited field labels:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1353 I<fieldlabel,...>. Example:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1354
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1355 Line 1:MolId
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1356 Line 2:"Extreg",CompoundName,ID
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1357
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1358 For I<datafieldsbyvalue> mode, input file lines contains these comma separated triplets:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1359 I<fieldlabel,value, criteria>. Possible values for criteria: I<le, ge or eq>. Examples:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1360
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1361 Line 1:MolWt,450,le
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1362
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1363 Line 1:"MolWt",450,le,"LogP",5,le,"SumNumNO",10,le,"SumNHOH",5,le
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1364
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1365 Line 1:MolWt,450,le
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1366 Line 2:"LogP",5,le
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1367 Line 3:"SumNumNO",10,le
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1368 Line 4: SumNHOH,5,le
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1369
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1370 For I<datafieldbylist> and I<datafielduniquebylist> mode, input file line format is:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1371
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1372 Line 1:fieldlabel;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1373 Subsequent lines:value1,value2...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1374
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1375 For I<datafieldbylist>, I<datafielduniquebylist>, and I<datafieldnotbylist> mode, input file
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1376 line format is:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1377
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1378 Line 1:fieldlabel;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1379 Subsequent lines:value1,value2...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1380
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1381 For I<datafielduniquebylist> mode, only unique compounds identified by first occurrence
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1382 of I<value> associated with I<fieldlabel> in I<SDFile(s)> are kept; any subsequent compounds
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1383 are simply ignored. Example:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1384
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1385 Line 1: MolID
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1386 Subsequent Lines:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1387 907508
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1388 832291,4642
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1389 "1254","907303"
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1390
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1391 =item B<--indelim> I<comma | tab | semicolon>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1392
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1393 Delimiter used to specify text values for B<-d --datafields> and B<--datafieldsfile> options.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1394 Possible values: I<comma, tab, or semicolon>. Default value: I<comma>.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1395
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1396 =item B<-m, --mode> I<alldatafields | commondatafields | datafields | datafieldsbyvalue | datafieldsbyregex | datafieldbylist | datafielduniquebylist | datafieldnotbylist | molnames | randomcmpds | recordnum | recordnums | recordrange | 2dcmpdrecords | 3dcmpdrecords>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1397
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1398 Specify what to extract from I<SDFile(s)>. Possible values: I<alldatafields, commondatafields,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1399 datafields, datafieldsbyvalue, datafieldsbyregex, datafieldbylist, datafielduniquebylist, datafieldnotbylist,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1400 molnames, randomcmpds, recordnum, recordnums, recordrange, 2dcmpdrecords, 3dcmpdrecords>.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1401 Default value: I<alldatafields>.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1402
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1403 For I<alldatafields> and I<molnames> mode, only a CSV/TSV text file is generated; for all
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1404 other modes, however, a SD file is generated by default - you can change the behavior to genereate
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1405 text file using I<--output> option.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1406
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1407 For I<3DCmpdRecords> mode, only those compounds with at least one non-zero value for Z atomic coordinates
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1408 are retrieved; however, during retrieval of compounds in I<2DCmpdRecords> mode, all Z atomic coordinates must
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1409 be zero.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1410
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1411 =item B<-n, --numofcmpds> I<number>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1412
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1413 Number of compouds to extract during I<randomcmpds> mode.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1414
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1415 =item B<--outdelim> I<comma | tab | semicolon>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1416
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1417 Delimiter for output CSV/TSV text file(s). Possible values: I<comma, tab, or semicolon>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1418 Default value: I<comma>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1419
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1420 =item B<--output> I<SD | text | both>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1421
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1422 Type of output files to generate. Possible values: I<SD, text, or both>. Default value: I<SD>. For
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1423 I<alldatafields> and I<molnames> mode, this option is ingored and only a CSV/TSV text file is generated.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1424
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1425 =item B<-o, --overwrite>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1426
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1427 Overwrite existing files.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1428
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1429 =item B<-q, --quote> I<yes | no>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1430
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1431 Put quote around column values in output CSV/TSV text file(s). Possible values:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1432 I<yes or no>. Default value: I<yes>.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1433
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1434 =item B<--record> I<recnum | recnums | startrecnum,endrecnum>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1435
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1436 Record number, record numbers or range of records to extract during I<recordnum>, I<recordnums>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1437 and I<recordrange> mode. Input value format is: <num>, <num1,num2,...> and <startnum, endnum>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1438 for I<recordnum>, I<recordnums> and I<recordrange> modes recpectively. Default value: none.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1439
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1440 =item B<--RegexIgnoreCase> I<yes or no>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1441
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1442 Specify whether to ingnore case during I<datafieldsbyregex> value of B<-m, --mode> option.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1443 Possible values: I<yes or no>. Default value: I<yes>.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1444
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1445 =item B<-r, --root> I<rootname>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1446
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1447 New file name is generated using the root: <Root>.<Ext>. Default for new file
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1448 names: <SDFileName><mode>.<Ext>. The file type determines <Ext> value.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1449 The sdf, csv, and tsv <Ext> values are used for SD, comma/semicolon, and tab
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1450 delimited text files respectively.This option is ignored for multiple input files.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1451
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1452 =item B<-s, --seed> I<number>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1453
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1454 Random number seed used for I<randomcmpds> mode. Default:123456789.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1455
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1456 =item B<--StrDataString> I<yes | no>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1457
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1458 Specify whether to write out structure data string to CSV/TSV text file(s). Possible values:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1459 I<yes or no>. Default value: I<no>.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1460
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1461 The value of B<StrDataStringDelimiter> option is used as a delimiter to join structure
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1462 data lines into a structure data string.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1463
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1464 This option is ignored during generation of SD file(s).
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1465
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1466 =item B<--StrDataStringDelimiter> I<text>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1467
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1468 Delimiter for joining multiple stucture data lines into a string before writing to CSV/TSV text
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1469 file(s). Possible values: I<any alphanumeric text>. Default value: I<|>.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1470
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1471 This option is ignored during generation of SD file(s).
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1472
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1473 =item B<--StrDataStringMode> I<StrOnly | StrAndDataFields>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1474
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1475 Specify whether to include SD data fields and values along with the structure data into structure
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1476 data string before writing it out to CSV/TSV text file(s). Possible values: I<StrOnly or StrAndDataFields>.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1477 Default value: I<StrOnly>.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1478
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1479 The value of B<StrDataStringDelimiter> option is used as a delimiter to join structure
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1480 data lines into a structure data string.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1481
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1482 This option is ignored during generation of SD file(s).
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1483
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1484 =item B<--ValueComparisonMode> I<Numeric | Alphanumeric>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1485
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1486 Specify how to compare data field values during I<datafieldsbyvalue> mode: Compare
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1487 values using either numeric or string ((eq, le, ge) comparison operators. Possible values:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1488 I<Numeric or Alphanumeric>. Defaule value: I<Numeric>.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1489
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1490 =item B<-v, --violations> I<number>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1491
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1492 Number of criterion violations allowed for values specified during I<datafieldsbyvalue>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1493 and I<datafieldsbyregex> mode. Default value: I<0>.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1494
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1495 =item B<-w, --workingdir> I<dirname>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1496
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1497 Location of working directory. Default: current directory.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1498
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1499 =back
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1500
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1501 =head1 EXAMPLES
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1502
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1503 To retrieve all data fields from SD files and generate CSV text files, type:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1504
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1505 % ExtractFromSDFiles.pl -o Sample.sdf
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1506 % ExtractFromSDFiles.pl -o *.sdf
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1507
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1508 To retrieve all data fields from SD file and generate CSV text files containing
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1509 a column with structure data as a string with | as line delimiter, type:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1510
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1511 % ExtractFromSDFiles.pl --StrDataString Yes -o Sample.sdf
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1512
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1513 To retrieve MOL_ID data fileld from SD file and generate CSV text files containing
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1514 a column with structure data along with all data fields as a string with | as line
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1515 delimiter, type:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1516
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1517 % ExtractFromSDFiles.pl -m datafields -d "Mol_ID" --StrDataString Yes
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1518 --StrDataStringMode StrAndDataFields --StrDataStringDelimiter "|"
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1519 --output text -o Sample.sdf
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1520
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1521 To retrieve common data fields which exists for all the compounds in
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1522 a SD file and generate a TSV text file NewSample.tsv, type:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1523
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1524 % ExtractFromSDFiles.pl -m commondatafields --outdelim tab -r NewSample
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1525 --output Text -o Sample.sdf
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1526
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1527 To retrieve MolId, ExtReg, and CompoundName data field from a SD file and generate a
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1528 CSV text file NewSample.csv, type:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1529
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1530 % ExtractFromSDFiles.pl -m datafields -d "Mol_ID,MolWeight,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1531 CompoundName" -r NewSample --output Text -o Sample.sdf
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1532
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1533 To retrieve compounds from a SD which meet a specific set of criteria - MolWt <= 450,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1534 LogP <= 5 and SumNO < 10 - from a SD file and generate a new SD file NewSample.sdf,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1535 type:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1536
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1537 % ExtractFromSDFiles.pl -m datafieldsbyvalue -d "MolWt,450,le,LogP
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1538 ,5,le,SumNO,10" -r NewSample -o Sample.sdf
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1539
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1540 To retrive compounds from a SD file with a specific set of values for MolID and
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1541 generate a new SD file NewSample.sdf, type:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1542
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1543 % ExtractFromSDFiles.pl -m datafieldbylist -d "Mol_ID,159,4509,4619"
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1544 -r NewSample -o Sample.sdf
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1545
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1546 To retrive compounds from a SD file with values for MolID not on a list of specified
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1547 values and generate a new SD file NewSample.sdf, type:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1548
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1549 % ExtractFromSDFiles.pl -m datafieldnotbylist -d "Mol_ID,159,4509,4619"
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1550 -r NewSample -o Sample.sdf
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1551
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1552 To retrive 10 random compounds from a SD file and generate a new SD file RandomSample.sdf, type:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1553
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1554 % ExtractFromSDFiles.pl -m randomcmpds -n 10 -r RandomSample
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1555 -o Sample.sdf
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1556
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1557 To retrive compound record number 10 from a SD file and generate a new SD file NewSample.sdf, type:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1558
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1559 % ExtractFromSDFiles.pl -m recordnum --record 10 -r NewSample
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1560 -o Sample.sdf
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1561
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1562 To retrive compound record numbers 10, 20 and 30 from a SD file and generate a new SD file
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1563 NewSample.sdf, type:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1564
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1565 % ExtractFromSDFiles.pl -m recordnums --record 10,20,30 -r NewSample
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1566 -o Sample.sdf
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1567
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1568 To retrive compound records between 10 to 20 from SD file and generate a new SD
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1569 file NewSample.sdf, type:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1570
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1571 % ExtractFromSDFiles.pl -m recordrange --record 10,20 -r NewSample
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1572 -o Sample.sdf
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1573
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1574 =head1 AUTHOR
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1575
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1576 Manish Sud <msud@san.rr.com>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1577
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1578 =head1 SEE ALSO
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1579
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1580 FilterSDFiles.pl, InfoSDFiles.pl, SplitSDFiles.pl, MergeTextFilesWithSD.pl
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1581
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1582 =head1 COPYRIGHT
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1583
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1584 Copyright (C) 2015 Manish Sud. All rights reserved.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1585
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1586 This file is part of MayaChemTools.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1587
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1588 MayaChemTools is free software; you can redistribute it and/or modify it under
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1589 the terms of the GNU Lesser General Public License as published by the Free
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1590 Software Foundation; either version 3 of the License, or (at your option)
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1591 any later version.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1592
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1593 =cut