annotate mayachemtools/bin/ExtractFromSDFiles.pl @ 9:ab29fa5c8c1f draft default tip

Uploaded
author deepakjadmin
date Thu, 15 Dec 2016 14:18:03 -0500
parents 73ae111cf86f
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1 #!/usr/bin/perl -w
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
2 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
3 # $RCSfile: ExtractFromSDFiles.pl,v $
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
4 # $Date: 2015/03/22 19:11:27 $
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
5 # $Revision: 1.48 $
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
6 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
7 # Author: Manish Sud <msud@san.rr.com>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
8 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
10 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
11 # This file is part of MayaChemTools.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
12 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
14 # the terms of the GNU Lesser General Public License as published by the Free
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
15 # Software Foundation; either version 3 of the License, or (at your option) any
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
16 # later version.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
17 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
18 # MayaChemTools is distributed in the hope that it will be useful, but without
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
19 # any warranty; without even the implied warranty of merchantability of fitness
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
20 # for a particular purpose. See the GNU Lesser General Public License for more
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
21 # details.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
22 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
23 # You should have received a copy of the GNU Lesser General Public License
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
26 # Boston, MA, 02111-1307, USA.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
27 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
28
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
29 use strict;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
30 use FindBin; use lib "$FindBin::Bin/../lib";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
31 use Getopt::Long;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
32 use File::Basename;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
33 use Text::ParseWords;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
34 use Benchmark;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
35 use SDFileUtil;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
36 use FileUtil;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
37 use TextUtil;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
38
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
39 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
40
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
41 # Autoflush STDOUT
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
42 $| = 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
43
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
44 # Starting message...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
45 $ScriptName = basename($0);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
46 print "\n$ScriptName:Starting...\n\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
47 $StartTime = new Benchmark;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
48
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
49 # Get the options and setup script...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
50 SetupScriptUsage();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
51 if ($Options{help} || @ARGV < 1) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
52 die GetUsageFromPod("$FindBin::Bin/$ScriptName");
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
53 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
54
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
55 my(@SDFilesList);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
56 @SDFilesList = ExpandFileNames(\@ARGV, "sdf sd");
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
57
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
58 # Process options...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
59 print "Processing options...\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
60 my(%OptionsInfo);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
61 ProcessOptions();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
62
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
63 # Collect information about SD files...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
64 print "Checking input SD file(s)...\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
65 my(%SDFilesInfo);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
66 RetrieveSDFilesInfo();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
67
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
68 # Generate output files...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
69 my($FileIndex);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
70 if (@SDFilesList > 1) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
71 print "\nProcessing SD files...\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
72 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
73 for $FileIndex (0 .. $#SDFilesList) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
74 if ($SDFilesInfo{FileOkay}[$FileIndex]) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
75 print "\nProcessing file $SDFilesList[$FileIndex]...\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
76 ExtractFromSDFile($FileIndex);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
77 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
78 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
79 print "\n$ScriptName:Done...\n\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
80
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
81 $EndTime = new Benchmark;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
82 $TotalTime = timediff ($EndTime, $StartTime);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
83 print "Total time: ", timestr($TotalTime), "\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
84
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
85 ###############################################################################
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
86
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
87 # Extract data from a SD file...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
88 sub ExtractFromSDFile {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
89 my($FileIndex) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
90
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
91 OpenInputAndOutputFiles($FileIndex);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
92
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
93 MODE: {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
94 if ($OptionsInfo{Mode} =~ /^AllDataFields$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
95 ExtractAllDataFields($FileIndex);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
96 last MODE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
97 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
98 if ($OptionsInfo{Mode} =~ /^CommonDataFields$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
99 ExtractCommonDataFields($FileIndex);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
100 last MODE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
101 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
102 if ($OptionsInfo{Mode} =~ /^DataFields$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
103 ExtractDataFields($FileIndex);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
104 last MODE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
105 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
106 if ($OptionsInfo{Mode} =~ /^(DataFieldByList|DatafieldUniqueByList)$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
107 ExtractDataFieldByList($FileIndex);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
108 last MODE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
109 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
110 if ($OptionsInfo{Mode} =~ /^DataFieldNotByList$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
111 ExtractDataFieldNotByList($FileIndex);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
112 last MODE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
113 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
114 if ($OptionsInfo{Mode} =~ /^DataFieldsByValue$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
115 ExtractDataFieldsByValue($FileIndex);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
116 last MODE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
117 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
118 if ($OptionsInfo{Mode} =~ /^DataFieldsByRegex$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
119 ExtractDataFieldsByRegex($FileIndex);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
120 last MODE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
121 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
122 if ($OptionsInfo{Mode} =~ /^RandomCmpds$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
123 ExtractRandomCompounds($FileIndex);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
124 last MODE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
125 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
126 if ($OptionsInfo{Mode} =~ /^MolNames$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
127 ExtractMolNames($FileIndex);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
128 last MODE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
129 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
130 if ($OptionsInfo{Mode} =~ /^RecordNum$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
131 ExtractRecordNum($FileIndex);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
132 last MODE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
133 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
134 if ($OptionsInfo{Mode} =~ /^RecordNums$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
135 ExtractRecordNums($FileIndex);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
136 last MODE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
137 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
138 if ($OptionsInfo{Mode} =~ /^RecordRange$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
139 ExtractRecordRange($FileIndex);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
140 last MODE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
141 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
142 if ($OptionsInfo{Mode} =~ /^2DCmpdRecords$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
143 Extract2DCmpdRecords($FileIndex);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
144 last MODE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
145 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
146 if ($OptionsInfo{Mode} =~ /^3DCmpdRecords$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
147 Extract3DCmpdRecords($FileIndex);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
148 last MODE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
149 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
150 die "Error: The value specified, $Options{mode}, for option \"-m --mode\" is not valid. Allowed values: alldatafields, commondatafields, datafields, datafieldsbyvalue, datafieldbylist, datafielduniquebylist, datafieldnotbylist, molnames, randomcmpds, recordnum, recordnums, recordrange, 2dcmpdrecords, 3dcmpdrecords\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
151 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
152
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
153 CloseInputAndOutputFiles();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
154 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
155
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
156 # Extract all data fields...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
157 sub ExtractAllDataFields {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
158 my($FileIndex) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
159 my(@CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
160
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
161 @{$SDFilesInfo{DataLabels}} = @{$SDFilesInfo{AllDataFieldLabels}[$FileIndex]};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
162 WriteTextFileColLabels();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
163
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
164 while ($SDFilesInfo{CmpdString} = ReadCmpdString($SDFilesInfo{InputSDFileRef})) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
165 @CmpdLines = split "\n", $SDFilesInfo{CmpdString};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
166 %{$SDFilesInfo{DataFieldValues}} = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
167
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
168 SetupDataValues();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
169 WriteTextFileCmpdData();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
170 WriteSDFileCmpdData();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
171 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
172 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
173
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
174 # Extract common data fields...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
175 sub ExtractCommonDataFields {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
176 my($FileIndex) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
177 my(@CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
178
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
179 @{$SDFilesInfo{DataLabels}} = @{$SDFilesInfo{CommonDataFieldLabels}[$FileIndex]};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
180 WriteTextFileColLabels();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
181
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
182 while ($SDFilesInfo{CmpdString} = ReadCmpdString($SDFilesInfo{InputSDFileRef})) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
183 @CmpdLines = split "\n", $SDFilesInfo{CmpdString};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
184 %{$SDFilesInfo{DataFieldValues}} = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
185
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
186 SetupDataValues();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
187 WriteTextFileCmpdData();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
188 WriteSDFileCmpdData();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
189 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
190 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
191
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
192 # Extract specified data fields...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
193 sub ExtractDataFields {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
194 my($FileIndex) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
195 my(@CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
196
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
197 @{$SDFilesInfo{DataLabels}} = @{$OptionsInfo{SpecifiedDataFieldLabels}};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
198 WriteTextFileColLabels();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
199
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
200 while ($SDFilesInfo{CmpdString} = ReadCmpdString($SDFilesInfo{InputSDFileRef})) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
201 @CmpdLines = split "\n", $SDFilesInfo{CmpdString};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
202 %{$SDFilesInfo{DataFieldValues}} = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
203
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
204 SetupDataValues();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
205 WriteTextFileCmpdData();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
206 WriteSDFileCmpdData();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
207 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
208 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
209
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
210 # Extract data fields using a list...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
211 sub ExtractDataFieldByList {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
212 my($FileIndex) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
213 my($CmpdNum, $Value, $SpecifiedDataFieldValuesFoundCount, $CurrentValue, $SpecifiedDataFieldLabel, @CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
214
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
215 @{$SDFilesInfo{DataLabels}} = @{$SDFilesInfo{AllDataFieldLabels}[$FileIndex]};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
216 WriteTextFileColLabels();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
217
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
218 for $Value (keys %{$OptionsInfo{SpecifiedDataFieldValues}}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
219 $OptionsInfo{SpecifiedDataFieldValues}{$Value} = "NotFound";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
220 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
221 $SpecifiedDataFieldValuesFoundCount = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
222 $SpecifiedDataFieldLabel = $OptionsInfo{SpecifiedDataFieldLabel};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
223
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
224 CMPDSTRING: while ($SDFilesInfo{CmpdString} = ReadCmpdString($SDFilesInfo{InputSDFileRef})) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
225 $CmpdNum++;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
226
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
227 @CmpdLines = split "\n", $SDFilesInfo{CmpdString};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
228 %{$SDFilesInfo{DataFieldValues}} = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
229
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
230 if (!exists $SDFilesInfo{DataFieldValues}{$SpecifiedDataFieldLabel}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
231 next CMPDSTRING;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
232 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
233
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
234 SetupDataValues();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
235
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
236 $SpecifiedDataFieldLabel = $OptionsInfo{SpecifiedDataFieldLabel};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
237 $CurrentValue = $SDFilesInfo{DataFieldValues}{$SpecifiedDataFieldLabel};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
238
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
239 if (exists $OptionsInfo{SpecifiedDataFieldValues}{$CurrentValue}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
240 if ($SpecifiedDataFieldValuesFoundCount < $OptionsInfo{SpecifiedDataFieldValuesCount}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
241 if ($OptionsInfo{SpecifiedDataFieldValues}{$CurrentValue} eq "NotFound") {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
242 $SpecifiedDataFieldValuesFoundCount++;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
243 $OptionsInfo{SpecifiedDataFieldValues}{$CurrentValue} = "Found";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
244 if ($OptionsInfo{Mode} =~ /^DataFieldUniqueByList$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
245 WriteSDFileCmpdString();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
246 WriteTextFileCmpdData();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
247 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
248 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
249 if ($OptionsInfo{Mode} =~ /^DataFieldByList$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
250 WriteSDFileCmpdString();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
251 WriteTextFileCmpdData();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
252 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
253 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
254 if ($SpecifiedDataFieldValuesFoundCount >= $OptionsInfo{SpecifiedDataFieldValuesCount}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
255 last CMPDSTRING;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
256 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
257 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
258 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
259 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
260
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
261 # Extract data field whose values are not on the specified list...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
262 sub ExtractDataFieldNotByList {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
263 my($FileIndex) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
264 my($CurrentValue, $SpecifiedDataFieldLabel, @CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
265
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
266 @{$SDFilesInfo{DataLabels}} = @{$SDFilesInfo{AllDataFieldLabels}[$FileIndex]};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
267 WriteTextFileColLabels();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
268
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
269 $SpecifiedDataFieldLabel = $OptionsInfo{SpecifiedDataFieldLabel};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
270
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
271 CMPDSTRING: while ($SDFilesInfo{CmpdString} = ReadCmpdString($SDFilesInfo{InputSDFileRef})) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
272 @CmpdLines = split "\n", $SDFilesInfo{CmpdString};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
273 %{$SDFilesInfo{DataFieldValues}} = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
274
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
275 if (!exists $SDFilesInfo{DataFieldValues}{$SpecifiedDataFieldLabel}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
276 next CMPDSTRING;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
277 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
278
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
279 SetupDataValues();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
280
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
281 $CurrentValue = $SDFilesInfo{DataFieldValues}{$SpecifiedDataFieldLabel};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
282
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
283 # Make sure the current value is not empty and is not only specified list of values...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
284 if (IsEmpty($CurrentValue) || exists $OptionsInfo{SpecifiedDataFieldValues}{$CurrentValue}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
285 next CMPDSTRING;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
286 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
287
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
288 WriteSDFileCmpdString();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
289 WriteTextFileCmpdData();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
290 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
291 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
292
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
293 # Extract data fields by value...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
294 sub ExtractDataFieldsByValue {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
295 my($FileIndex) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
296 my($Label, $CurrentValue, $SpecifiedCriterion, $SpecifiedValue, $ViolationCount, $Nothing, @CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
297
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
298 @{$SDFilesInfo{DataLabels}} = @{$SDFilesInfo{AllDataFieldLabels}[$FileIndex]};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
299 WriteTextFileColLabels();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
300
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
301 CMPDSTRING: while ($SDFilesInfo{CmpdString} = ReadCmpdString($SDFilesInfo{InputSDFileRef})) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
302 @CmpdLines = split "\n", $SDFilesInfo{CmpdString};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
303 %{$SDFilesInfo{DataFieldValues}} = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
304
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
305 SetupDataValues();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
306 $ViolationCount = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
307
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
308 for $Label (@{$OptionsInfo{SpecifiedDataFieldLabels}}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
309 if (exists $SDFilesInfo{DataFieldValues}{$Label}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
310 $CurrentValue = $SDFilesInfo{DataFieldValues}{$Label};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
311 $SpecifiedCriterion = $OptionsInfo{SpecifiedDataFieldCriteriaMap}{$Label};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
312 $SpecifiedValue = $OptionsInfo{SpecifiedDataFieldValuesMap}{$Label};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
313
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
314 if ($OptionsInfo{NumericalComparison}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
315 CRITERION: {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
316 if ($SpecifiedCriterion =~ /^eq$/i) { if ($CurrentValue != $SpecifiedValue) { $ViolationCount++; last CRITERION; } }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
317 if ($SpecifiedCriterion =~ /^le$/i) { if ($CurrentValue > $SpecifiedValue) { $ViolationCount++; last CRITERION; } }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
318 if ($SpecifiedCriterion =~ /^ge$/i) { if ($CurrentValue < $SpecifiedValue) { $ViolationCount++; last CRITERION; } }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
319 $Nothing = 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
320 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
321 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
322 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
323 CRITERION: {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
324 if ($SpecifiedCriterion =~ /^eq$/i) { if ($CurrentValue ne $SpecifiedValue) { $ViolationCount++; last CRITERION; } }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
325 if ($SpecifiedCriterion =~ /^le$/i) { if ($CurrentValue gt $SpecifiedValue) { $ViolationCount++; last CRITERION; } }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
326 if ($SpecifiedCriterion =~ /^ge$/i) { if ($CurrentValue lt $SpecifiedValue) { $ViolationCount++; last CRITERION; } }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
327 $Nothing = 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
328 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
329 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
330 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
331 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
332 if ($ViolationCount <= $OptionsInfo{Violations}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
333 WriteSDFileCmpdString();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
334 WriteTextFileCmpdData();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
335 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
336 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
337 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
338
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
339 # Extract data fields by value using regular expression match...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
340 sub ExtractDataFieldsByRegex {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
341 my($FileIndex) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
342 my($Label, $CurrentValue, $SpecifiedRegexCriterion, $SpecifiedRegex, $ViolationCount, $Nothing, @CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
343
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
344 @{$SDFilesInfo{DataLabels}} = @{$SDFilesInfo{AllDataFieldLabels}[$FileIndex]};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
345 WriteTextFileColLabels();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
346
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
347 CMPDSTRING: while ($SDFilesInfo{CmpdString} = ReadCmpdString($SDFilesInfo{InputSDFileRef})) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
348 @CmpdLines = split "\n", $SDFilesInfo{CmpdString};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
349 %{$SDFilesInfo{DataFieldValues}} = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
350
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
351 SetupDataValues();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
352 $ViolationCount = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
353
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
354 for $Label (@{$OptionsInfo{SpecifiedDataFieldLabels}}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
355 if (exists $SDFilesInfo{DataFieldValues}{$Label}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
356 $CurrentValue = $SDFilesInfo{DataFieldValues}{$Label};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
357 $SpecifiedRegexCriterion = $OptionsInfo{SpecifiedDataFieldRegexCriteriaMap}{$Label};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
358 $SpecifiedRegex = $OptionsInfo{SpecifiedDataFieldRegexMap}{$Label};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
359
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
360 if ($OptionsInfo{RegexIgnoreCase}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
361 CRITERION: {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
362 if ($SpecifiedRegexCriterion =~ /^eq$/i) { if ($CurrentValue !~ /$SpecifiedRegex/i) { $ViolationCount++; last CRITERION; } }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
363 if ($SpecifiedRegexCriterion =~ /^ne$/i) { if ($CurrentValue =~ /$SpecifiedRegex/i) { $ViolationCount++; last CRITERION; } }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
364 $Nothing = 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
365 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
366 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
367 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
368 CRITERION: {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
369 if ($SpecifiedRegexCriterion =~ /^eq$/i) { if ($CurrentValue !~ /$SpecifiedRegex/) { $ViolationCount++; last CRITERION; } }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
370 if ($SpecifiedRegexCriterion =~ /^ne$/i) { if ($CurrentValue =~ /$SpecifiedRegex/) { $ViolationCount++; last CRITERION; } }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
371 $Nothing = 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
372 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
373 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
374 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
375 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
376 if ($ViolationCount <= $OptionsInfo{Violations}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
377 WriteSDFileCmpdString();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
378 WriteTextFileCmpdData();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
379 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
380 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
381 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
382
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
383 # Extract random compounds...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
384 sub ExtractRandomCompounds {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
385 my($FileIndex) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
386 my($CmpdNum, $CmpdCount, $RandomCycleCount, $RandomIndex, @CmpdLines, %RandomCmpdIndexMap);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
387
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
388 @{$SDFilesInfo{DataLabels}} = @{$SDFilesInfo{AllDataFieldLabels}[$FileIndex]};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
389 WriteTextFileColLabels();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
390
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
391 $CmpdCount = $SDFilesInfo{CmpdCount}[$FileIndex];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
392 srand($OptionsInfo{Seed});
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
393 $RandomCycleCount = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
394
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
395 %RandomCmpdIndexMap = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
396 while ($RandomCycleCount <= $CmpdCount && $RandomCycleCount <= $OptionsInfo{NumOfCmpds}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
397 $RandomCycleCount++;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
398 $RandomIndex = int (rand $CmpdCount) + 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
399 $RandomCmpdIndexMap{$RandomIndex} = $RandomIndex;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
400 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
401
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
402 $CmpdNum = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
403 CMPDSTRING: while ($SDFilesInfo{CmpdString} = ReadCmpdString($SDFilesInfo{InputSDFileRef})) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
404 $CmpdNum++;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
405 if (!exists $RandomCmpdIndexMap{$CmpdNum}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
406 next CMPDSTRING;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
407 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
408
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
409 @CmpdLines = split "\n", $SDFilesInfo{CmpdString};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
410
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
411 WriteSDFileCmpdString();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
412
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
413 if ($OptionsInfo{OutputTextFile}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
414 %{$SDFilesInfo{DataFieldValues}} = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
415 SetupDataValues();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
416 WriteTextFileCmpdData();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
417 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
418 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
419 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
420
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
421 # Extract mol names...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
422 sub ExtractMolNames {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
423 my($FileIndex) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
424 my($MolName, $NewTextFileRef, @CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
425
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
426 push @{$SDFilesInfo{DataLabels}}, "MolName";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
427 WriteTextFileColLabels();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
428
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
429 $NewTextFileRef = $SDFilesInfo{NewTextFileRef};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
430 while ($SDFilesInfo{CmpdString} = ReadCmpdString($SDFilesInfo{InputSDFileRef})) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
431 @CmpdLines = split "\n", $SDFilesInfo{CmpdString};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
432 $MolName = QuoteAWord(ParseCmpdMolNameLine($CmpdLines[0]), $OptionsInfo{OutQuote});
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
433 print $NewTextFileRef "$MolName\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
434 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
435 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
436
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
437 # Extract a specific compound record...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
438 sub ExtractRecordNum {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
439 my($FileIndex) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
440 my($CmpdNum, @CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
441
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
442 @{$SDFilesInfo{DataLabels}} = @{$SDFilesInfo{AllDataFieldLabels}[$FileIndex]};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
443 WriteTextFileColLabels();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
444
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
445 $CmpdNum = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
446
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
447 CMPDSTRING: while ($SDFilesInfo{CmpdString} = ReadCmpdString($SDFilesInfo{InputSDFileRef})) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
448 $CmpdNum++;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
449 if ($CmpdNum != $OptionsInfo{RecordNum}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
450 next CMPDSTRING;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
451 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
452
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
453 @CmpdLines = split "\n", $SDFilesInfo{CmpdString};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
454 WriteSDFileCmpdString();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
455
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
456 if ($OptionsInfo{OutputTextFile}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
457 %{$SDFilesInfo{DataFieldValues}} = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
458 SetupDataValues();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
459 WriteTextFileCmpdData();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
460 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
461 last CMPDSTRING;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
462 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
463 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
464
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
465 # Extract a specific compound records...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
466 sub ExtractRecordNums {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
467 my($FileIndex) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
468 my($CmpdNum, $CmpdCount, @CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
469
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
470 @{$SDFilesInfo{DataLabels}} = @{$SDFilesInfo{AllDataFieldLabels}[$FileIndex]};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
471 WriteTextFileColLabels();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
472
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
473 $CmpdNum = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
474 $CmpdCount = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
475
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
476 CMPDSTRING: while ($SDFilesInfo{CmpdString} = ReadCmpdString($SDFilesInfo{InputSDFileRef})) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
477 $CmpdNum++;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
478
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
479 if (exists $OptionsInfo{RecordNums}{$CmpdNum}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
480 $CmpdCount++;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
481 @CmpdLines = split "\n", $SDFilesInfo{CmpdString};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
482
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
483 WriteSDFileCmpdString();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
484
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
485 if ($OptionsInfo{OutputTextFile}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
486 %{$SDFilesInfo{DataFieldValues}} = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
487 SetupDataValues();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
488 WriteTextFileCmpdData();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
489 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
490 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
491 elsif ($CmpdNum > $OptionsInfo{RecordNumsMax} || $CmpdCount >= $OptionsInfo{RecordNumsCount}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
492 last CMPDSTRING;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
493 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
494 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
495 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
496
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
497
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
498 # Extract compounds in a specific record range...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
499 sub ExtractRecordRange {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
500 my($FileIndex) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
501 my($CmpdNum, @CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
502
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
503 @{$SDFilesInfo{DataLabels}} = @{$SDFilesInfo{AllDataFieldLabels}[$FileIndex]};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
504 WriteTextFileColLabels();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
505
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
506 $CmpdNum = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
507 CMPDSTRING: while ($SDFilesInfo{CmpdString} = ReadCmpdString($SDFilesInfo{InputSDFileRef})) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
508 $CmpdNum++;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
509
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
510 if ($CmpdNum >= $OptionsInfo{StartRecordNum} && $CmpdNum <= $OptionsInfo{EndRecordNum}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
511 @CmpdLines = split "\n", $SDFilesInfo{CmpdString};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
512
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
513 WriteSDFileCmpdString();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
514
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
515 if ($OptionsInfo{OutputTextFile}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
516 %{$SDFilesInfo{DataFieldValues}} = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
517 SetupDataValues();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
518 WriteTextFileCmpdData();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
519 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
520 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
521 elsif ($CmpdNum > $OptionsInfo{EndRecordNum}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
522 last CMPDSTRING;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
523 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
524 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
525 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
526
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
527 # Extract 2D compound records...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
528 sub Extract2DCmpdRecords {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
529 my($FileIndex) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
530 my(@CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
531
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
532 @{$SDFilesInfo{DataLabels}} = @{$SDFilesInfo{AllDataFieldLabels}[$FileIndex]};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
533 WriteTextFileColLabels();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
534
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
535
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
536 CMPDSTRING: while ($SDFilesInfo{CmpdString} = ReadCmpdString($SDFilesInfo{InputSDFileRef})) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
537 @CmpdLines = split "\n", $SDFilesInfo{CmpdString};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
538 if (!IsCmpd2D(\@CmpdLines)) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
539 next CMPDSTRING;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
540 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
541
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
542 WriteSDFileCmpdString();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
543
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
544 if ($OptionsInfo{OutputTextFile}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
545 %{$SDFilesInfo{DataFieldValues}} = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
546 SetupDataValues();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
547 WriteTextFileCmpdData();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
548 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
549 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
550 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
551
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
552 # Extract 3D compound records...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
553 sub Extract3DCmpdRecords {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
554 my($FileIndex) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
555 my(@CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
556
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
557 @{$SDFilesInfo{DataLabels}} = @{$SDFilesInfo{AllDataFieldLabels}[$FileIndex]};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
558 WriteTextFileColLabels();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
559
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
560
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
561 CMPDSTRING: while ($SDFilesInfo{CmpdString} = ReadCmpdString($SDFilesInfo{InputSDFileRef})) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
562 @CmpdLines = split "\n", $SDFilesInfo{CmpdString};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
563 if (!IsCmpd3D(\@CmpdLines)) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
564 next CMPDSTRING;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
565 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
566
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
567 WriteSDFileCmpdString();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
568
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
569 if ($OptionsInfo{OutputTextFile}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
570 %{$SDFilesInfo{DataFieldValues}} = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
571 SetupDataValues();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
572 WriteTextFileCmpdData();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
573 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
574 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
575 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
576
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
577
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
578 # Open input and output files...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
579 sub OpenInputAndOutputFiles {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
580 my($FileIndex) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
581
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
582 $SDFilesInfo{NewTextFileRef} = undef;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
583 $SDFilesInfo{NewSDFileRef} = undef;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
584
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
585 if ($OptionsInfo{OutputTextFile} && $OptionsInfo{OutputSDFile}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
586 print "Generating files $SDFilesInfo{NewSDFileName}[$FileIndex] and $SDFilesInfo{NewTextFileName}[$FileIndex]...\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
587 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
588 elsif ($OptionsInfo{OutputSDFile}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
589 print "Generating file $SDFilesInfo{NewSDFileName}[$FileIndex]...\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
590 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
591 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
592 print "Generating file $SDFilesInfo{NewTextFileName}[$FileIndex]...\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
593 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
594
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
595 if ($OptionsInfo{OutputSDFile}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
596 open NEWSDFILE, ">$SDFilesInfo{NewSDFileName}[$FileIndex]" or die "Error: Couldn't open $SDFilesInfo{NewSDFileName}[$FileIndex]: $! \n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
597 $SDFilesInfo{NewSDFileRef} = \*NEWSDFILE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
598 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
599 if ($OptionsInfo{OutputTextFile}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
600 open NEWTEXTFILE, ">$SDFilesInfo{NewTextFileName}[$FileIndex]" or die "Error: Couldn't open $SDFilesInfo{NewTextFileName}[$FileIndex]: $! \n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
601 $SDFilesInfo{NewTextFileRef} = \*NEWTEXTFILE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
602 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
603
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
604 open SDFILE, "$SDFilesList[$FileIndex]" or die "Error: Couldn't open $SDFilesList[$FileIndex]: $! \n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
605 $SDFilesInfo{InputSDFileRef} = \*SDFILE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
606
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
607 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
608
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
609 # Close open input and output files...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
610 sub CloseInputAndOutputFiles {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
611 if ($SDFilesInfo{NewSDFileRef}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
612 close $SDFilesInfo{NewSDFileRef};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
613 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
614 if ($SDFilesInfo{NewTextFileRef}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
615 close $SDFilesInfo{NewTextFileRef};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
616 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
617
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
618 if ($SDFilesInfo{InputSDFileRef}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
619 close $SDFilesInfo{InputSDFileRef};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
620 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
621
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
622 $SDFilesInfo{NewTextFileRef} = undef;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
623 $SDFilesInfo{NewSDFileRef} = undef;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
624 $SDFilesInfo{InputSDFileRef} = undef;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
625 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
626
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
627 # Write out column labels for text file...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
628 sub WriteTextFileColLabels {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
629 my($ColLabelsLine, $NewTextFileRef);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
630
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
631 if (!$OptionsInfo{OutputTextFile}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
632 return;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
633 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
634 $NewTextFileRef = $SDFilesInfo{NewTextFileRef};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
635
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
636 if ($OptionsInfo{OutputStrDataString}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
637 # Append structure data string label...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
638 my(@DataLabels);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
639
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
640 @DataLabels = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
641 push @DataLabels, @{$SDFilesInfo{DataLabels}};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
642 push @DataLabels, "StructureDataString";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
643
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
644 $ColLabelsLine = JoinWords(\@DataLabels, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
645 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
646 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
647 $ColLabelsLine = JoinWords(\@{$SDFilesInfo{DataLabels}}, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
648 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
649 print $NewTextFileRef "$ColLabelsLine\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
650 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
651
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
652 # Setup values for data fields...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
653 sub SetupDataValues {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
654 @{$SDFilesInfo{DataValues}} = map { exists $SDFilesInfo{DataFieldValues}{$_} ? $SDFilesInfo{DataFieldValues}{$_} : "" } @{$SDFilesInfo{DataLabels}};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
655 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
656
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
657 # Write out structure data and specific data fields to SD file...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
658 sub WriteSDFileCmpdData {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
659 my($MolString, $Count, $NewSDFileRef);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
660
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
661 if (!$OptionsInfo{OutputSDFile}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
662 return;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
663 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
664
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
665 $NewSDFileRef = $SDFilesInfo{NewSDFileRef};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
666
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
667 ($MolString) = split "M END", $SDFilesInfo{CmpdString};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
668 $MolString .= "M END";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
669 print $NewSDFileRef "$MolString\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
670
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
671 for $Count (0 .. $#{$SDFilesInfo{DataLabels}}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
672 print $NewSDFileRef "> <$SDFilesInfo{DataLabels}[$Count]>\n$SDFilesInfo{DataValues}[$Count]\n\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
673 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
674 print $NewSDFileRef "\$\$\$\$\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
675 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
676
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
677 # Write out compound string...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
678 sub WriteSDFileCmpdString {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
679 my($NewSDFileRef);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
680
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
681 if (!$OptionsInfo{OutputSDFile}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
682 return;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
683 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
684
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
685 $NewSDFileRef = $SDFilesInfo{NewSDFileRef};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
686 print $NewSDFileRef "$SDFilesInfo{CmpdString}\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
687 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
688
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
689 # Write out data for text file...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
690 sub WriteTextFileCmpdData {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
691 my($DataValuesLine, $NewTextFileRef);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
692
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
693 if (!$OptionsInfo{OutputTextFile}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
694 return;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
695 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
696
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
697 $NewTextFileRef = $SDFilesInfo{NewTextFileRef};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
698 $DataValuesLine = JoinWords(\@{$SDFilesInfo{DataValues}}, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
699
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
700 # Handle multiple lines data values for data fields by joining 'em using semicolons...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
701 if ($DataValuesLine =~ /\n/) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
702 $DataValuesLine =~ s/\n/;/g;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
703 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
704
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
705 if ($OptionsInfo{OutputStrDataString}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
706 # Append structure data string...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
707 my($StrDataString, $OutQuoteValue, $OutDelim, $StrDataStringDelimiter);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
708
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
709 if ($OptionsInfo{StrDataStringWithFields}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
710 $StrDataString = $SDFilesInfo{CmpdString};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
711 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
712 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
713 ($StrDataString) = split "M END", $SDFilesInfo{CmpdString};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
714 $StrDataString .= "M END";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
715 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
716 $StrDataStringDelimiter = $OptionsInfo{StrDataStringDelimiter};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
717 $StrDataString =~ s/\n/$StrDataStringDelimiter/g;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
718
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
719 $OutDelim = $OptionsInfo{OutDelim};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
720 $OutQuoteValue = $OptionsInfo{OutQuote} ? "\"" : "";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
721
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
722 print $NewTextFileRef "$DataValuesLine${OutDelim}${OutQuoteValue}${StrDataString}${OutQuoteValue}\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
723 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
724 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
725 print $NewTextFileRef "$DataValuesLine\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
726 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
727 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
728
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
729 # Retrieve information about input SD files...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
730 sub RetrieveSDFilesInfo {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
731 my($SDFile, $Index, $FileDir, $FileExt, $FileName, $NewFileName, $NewSDFileName, $NewTextFileName, $CmpdCount);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
732
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
733 %SDFilesInfo = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
734
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
735 @{$SDFilesInfo{FileOkay}} = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
736 @{$SDFilesInfo{CmpdCount}} = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
737 @{$SDFilesInfo{NewTextFileName}} = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
738 @{$SDFilesInfo{NewSDFileName}} = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
739
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
740 @{$SDFilesInfo{AllDataFieldLabels}} = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
741 @{$SDFilesInfo{CommonDataFieldLabels}} = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
742
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
743 FILELIST: for $Index (0 .. $#SDFilesList) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
744 $SDFile = $SDFilesList[$Index];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
745
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
746 $SDFilesInfo{FileOkay}[$Index] = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
747
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
748 $SDFilesInfo{CmpdCount}[$Index] = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
749 $SDFilesInfo{NewTextFileName}[$Index] = "";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
750 $SDFilesInfo{NewSDFileName}[$Index] = "";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
751
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
752 @{$SDFilesInfo{AllDataFieldLabels}[$Index]} = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
753 @{$SDFilesInfo{CommonDataFieldLabels}[$Index]} = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
754
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
755 if (!(-e $SDFile)) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
756 warn "Warning: Ignoring file $SDFile: It doesn't exist\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
757 next FILELIST;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
758 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
759
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
760 if (!CheckFileType($SDFile, "sd sdf")) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
761 warn "Warning: Ignoring file $SDFile: It's not a SD file\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
762 next FILELIST;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
763 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
764
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
765 # Generate appropriate name for the new output file.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
766 $FileDir = ""; $FileName = ""; $FileExt = "";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
767 ($FileDir, $FileName, $FileExt) = ParseFileName($SDFile);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
768 $NewFileName = $FileName;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
769 $NewFileName = $FileName . $OptionsInfo{FileNameMode};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
770 if ($OptionsInfo{OutFileRoot} && (@SDFilesList == 1)) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
771 my ($RootFileDir, $RootFileName, $RootFileExt) = ParseFileName($OptionsInfo{OutFileRoot});
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
772 if ($RootFileName && $RootFileExt) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
773 $NewFileName = $RootFileName;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
774 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
775 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
776 $NewFileName = $OptionsInfo{OutFileRoot};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
777 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
778 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
779 $NewSDFileName = $NewFileName . ".$OptionsInfo{SDFileExt}";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
780 $NewTextFileName = $NewFileName . ".$OptionsInfo{TextFileExt}";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
781
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
782 if ($OptionsInfo{OutputSDFile}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
783 if (lc($NewSDFileName) eq lc($SDFile)) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
784 warn "Warning: Ignoring input file $SDFile: Same output, $NewSDFileName, and input file names.\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
785 print "Specify a different name using \"-r --root\" option or use default name.\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
786 next FILELIST;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
787 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
788 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
789
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
790 if (!$OptionsInfo{Overwrite}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
791 if ($OptionsInfo{OutputSDFile}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
792 if (-e $NewSDFileName) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
793 warn "Warning: Ignoring file $SDFile: New file, $NewSDFileName, already exists\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
794 next FILELIST;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
795 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
796 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
797 if ($OptionsInfo{OutputTextFile}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
798 if (-e $NewTextFileName) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
799 warn "Warning: Ignoring file $SDFile: New file, $NewTextFileName, already exists\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
800 next FILELIST;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
801 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
802 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
803 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
804
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
805 if (!open SDFILE, "$SDFile") {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
806 warn "Warning: Ignoring file $SDFile: Couldn't open it: $! \n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
807 next FILELIST;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
808 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
809
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
810 my($CountCmpds, $CollectDataFields);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
811 my($CmpdString, @CmpdLines, @DataFieldLabels, %DataFieldLabelsMap,@CommonDataFieldLabels);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
812
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
813 $CountCmpds = ($OptionsInfo{Mode} =~ /^randomcmpds$/i) ? 1 : 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
814
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
815 $CollectDataFields = (($OptionsInfo{Mode} =~ /^(alldatafields|commondatafields|randomcmpds)$/i && $OptionsInfo{OutputTextFile}) || ($OptionsInfo{Mode} =~ /^(datafieldsbyvalue|datafieldsbyregex)$/i && $OptionsInfo{OutputTextFile}) || ($OptionsInfo{Mode} =~ /^datafieldbylist$/i && $OptionsInfo{OutputTextFile}) || ($OptionsInfo{Mode} =~ /^datafielduniquebylist$/i && $OptionsInfo{OutputTextFile}) || ($OptionsInfo{Mode} =~ /^datafieldnotbylist$/i && $OptionsInfo{OutputTextFile}) || ($OptionsInfo{Mode} =~ /^recordnum$/i && $OptionsInfo{OutputTextFile}) || ($OptionsInfo{Mode} =~ /^recordnums$/i && $OptionsInfo{OutputTextFile}) || ($OptionsInfo{Mode} =~ /^recordrange$/i && $OptionsInfo{OutputTextFile})) ? 1 : 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
816
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
817 $CmpdCount = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
818 if ($CountCmpds || $CollectDataFields) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
819 @DataFieldLabels = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
820 @CommonDataFieldLabels = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
821 %DataFieldLabelsMap = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
822 CMPDSTRING: while ($CmpdString = ReadCmpdString(\*SDFILE)) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
823 $CmpdCount++;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
824 if ($OptionsInfo{Mode} =~ /^recordnum$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
825 if ($CmpdCount == $OptionsInfo{RecordNum}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
826 @CmpdLines = split "\n", $CmpdString;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
827 @DataFieldLabels = GetCmpdDataHeaderLabels(\@CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
828 last CMPDSTRING;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
829 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
830 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
831 if ($CollectDataFields) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
832 my($Label);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
833 @CmpdLines = split "\n", $CmpdString;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
834 # Process compound data header labels and figure out which ones are present for
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
835 # all the compounds...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
836 if (@DataFieldLabels) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
837 my (@CmpdDataFieldLabels) = GetCmpdDataHeaderLabels(\@CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
838 my(%CmpdDataFieldLabelsMap) = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
839 # Setup a map for the current labels...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
840 for $Label (@CmpdDataFieldLabels) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
841 $CmpdDataFieldLabelsMap{$Label} = "PresentInSome";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
842 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
843 # Check the presence old labels for this compound; otherwise, mark 'em new...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
844 for $Label (@DataFieldLabels) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
845 if (!$CmpdDataFieldLabelsMap{$Label}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
846 $DataFieldLabelsMap{$Label} = "PresentInSome";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
847 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
848 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
849 # Check the presence this compound in the old labels; otherwise, add 'em...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
850 for $Label (@CmpdDataFieldLabels ) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
851 if (!$DataFieldLabelsMap{$Label}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
852 # It's a new label...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
853 push @DataFieldLabels, $Label;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
854 $DataFieldLabelsMap{$Label} = "PresentInSome";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
855 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
856 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
857 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
858 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
859 # Get the initial label set and set up a map...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
860 @DataFieldLabels = GetCmpdDataHeaderLabels(\@CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
861 for $Label (@DataFieldLabels) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
862 $DataFieldLabelsMap{$Label} = "PresentInAll";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
863 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
864 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
865 # Identify the common data field labels...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
866 if ($Options{mode} =~ /^commondatafields$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
867 @CommonDataFieldLabels = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
868 for $Label (@DataFieldLabels) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
869 if ($DataFieldLabelsMap{$Label} eq "PresentInAll") {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
870 push @CommonDataFieldLabels, $Label;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
871 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
872 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
873 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
874 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
875 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
876 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
877
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
878 $SDFilesInfo{FileOkay}[$Index] = 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
879
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
880 $SDFilesInfo{NewTextFileName}[$Index] = $NewTextFileName;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
881 $SDFilesInfo{NewSDFileName}[$Index] = $NewSDFileName;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
882
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
883 $SDFilesInfo{CmpdCount}[$Index] = $CmpdCount;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
884
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
885 push @{$SDFilesInfo{AllDataFieldLabels}[$Index]}, @DataFieldLabels;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
886 push @{$SDFilesInfo{CommonDataFieldLabels}[$Index]}, @CommonDataFieldLabels;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
887
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
888 close SDFILE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
889 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
890 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
891
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
892 # Process options...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
893 sub ProcessOptions {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
894 %OptionsInfo = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
895
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
896 $OptionsInfo{Mode} = $Options{mode};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
897
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
898 $OptionsInfo{InDelim} = "\,";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
899 if ($Options{indelim} =~ /^semicolon$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
900 $OptionsInfo{InDelim} = "\;";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
901 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
902 elsif ($Options{indelim} =~ /^tab$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
903 $OptionsInfo{InDelim} = "\t";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
904 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
905
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
906 $OptionsInfo{OutDelim} = "\,";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
907 if ($Options{outdelim} =~ /^semicolon$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
908 $OptionsInfo{OutDelim} = "\;";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
909 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
910 elsif ($Options{outdelim} =~ /^tab$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
911 $OptionsInfo{OutDelim} = "\t";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
912 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
913
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
914 $OptionsInfo{OutQuote} = ($Options{quote} =~ /^yes$/i) ? 1 : 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
915
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
916 $OptionsInfo{RegexIgnoreCase} = ($Options{regexignorecase} =~ /^yes$/i) ? 1 : 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
917
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
918 $OptionsInfo{OutFileRoot} = $Options{root} ? $Options{root} : undef;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
919 $OptionsInfo{Overwrite} = $Options{overwrite} ? $Options{overwrite} : undef;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
920
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
921 $OptionsInfo{NumOfCmpds} = $Options{numofcmpds};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
922
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
923 $OptionsInfo{ValueComparisonMode} = $Options{valuecomparisonmode};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
924 $OptionsInfo{NumericalComparison} = ($Options{valuecomparisonmode} =~ /^Numeric$/i) ? 1 : 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
925
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
926 $OptionsInfo{Violations} = $Options{violations};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
927 $OptionsInfo{Seed} = $Options{seed};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
928
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
929
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
930 if ($Options{mode} =~ /^(datafields|datafieldsbyregex|datafieldsbyvalue|datafieldbylist|datafielduniquebylist|datafieldnotbylist)$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
931 if ($Options{datafields} || $Options{datafieldsfile}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
932 if ($Options{datafields} && $Options{datafieldsfile}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
933 die "Error: For \"-m --mode\" option values of datafields, datafieldsbyvalue, datafieldsbyregex, datafieldbylist, datafielduniquebylist, or datafieldnotbylist specify only one of the \"-d --datafields\" or \"--datafieldsfile\" option.\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
934 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
935 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
936 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
937 die "Error: For \"-m --mode\" option values of datafields, datafieldsbyvalue, datafieldsbyregex, datafieldbylist, datafielduniquebylist, or datafieldnotbylist specify one of the \"-d --datafields\" or \"--datafieldsfile\" option.\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
938 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
939 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
940 $OptionsInfo{DataFields} = $Options{datafields} ? $Options{datafields} : undef;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
941 $OptionsInfo{DataFieldsFile} = $Options{datafieldsfile} ? $Options{datafieldsfile} : undef;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
942
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
943 $OptionsInfo{RecordNum} = 0; $OptionsInfo{StartRecordNum} = 0; $OptionsInfo{EndRecordNum} = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
944
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
945 %{$OptionsInfo{RecordNums}} = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
946 $OptionsInfo{RecordNumsMin} = 0; $OptionsInfo{RecordNumsMax} = 0; $OptionsInfo{RecordNumsCount} = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
947
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
948 $OptionsInfo{Record} = $Options{record} ? $Options{record} : undef;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
949
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
950 if ($Options{mode} =~ /^(recordnum|recordnums|recordrange)$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
951 if ($Options{record}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
952 my($Record, @RecordSplit);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
953
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
954 $Record = $Options{record};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
955 $Record =~ s/ //g;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
956
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
957 @RecordSplit = split ",", $Record;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
958
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
959 if ($Options{mode} =~ /^recordnum$/i ) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
960 if (@RecordSplit == 1) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
961 $OptionsInfo{RecordNum} = $RecordSplit[0];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
962 if ($OptionsInfo{RecordNum} <= 0) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
963 die "Error: The value specified, $OptionsInfo{RecordNum}, for option \"--records\" is not valid. Allowed values: > 0 \n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
964 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
965 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
966 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
967 die "Error: Invalid number of values, ", scalar(@RecordSplit), ", specified using \"--record\" option: only 1 value is allowed.\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
968 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
969 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
970 elsif ($Options{mode} =~ /^recordnums$/i ) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
971 my($RecordNum, $RecordCount, @SortedRecordSplit);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
972
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
973 @SortedRecordSplit = sort { $a <=> $b } @RecordSplit;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
974
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
975 $RecordCount = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
976 RECORDNUM: for $RecordNum (@SortedRecordSplit) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
977 if (exists $OptionsInfo{RecordNums}{$RecordNum}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
978 next RECORDNUM;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
979 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
980 $RecordCount++;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
981 $OptionsInfo{RecordNums}{$RecordNum} = $RecordNum;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
982 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
983 $OptionsInfo{RecordNumsCount} = $RecordCount;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
984 $OptionsInfo{RecordNumsMin} = $SortedRecordSplit[0];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
985 $OptionsInfo{RecordNumsMax} = $SortedRecordSplit[$#SortedRecordSplit];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
986 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
987 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
988 if (@RecordSplit == 2) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
989 $OptionsInfo{StartRecordNum} = $RecordSplit[0];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
990 $OptionsInfo{EndRecordNum} = $RecordSplit[1];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
991 if ($OptionsInfo{StartRecordNum} <= 0 || $OptionsInfo{EndRecordNum} <= 0) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
992 die "Error: The value pair specified, $Options{record}, for option \"--records\" is not valid. Allowed values: > 0 \n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
993 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
994 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
995 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
996 die "Error: Invalid number of values, ", scalar(@RecordSplit), ", specified using \"--record\" option: only 2 values is allowed.\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
997 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
998 if ($OptionsInfo{StartRecordNum} > $OptionsInfo{EndRecordNum}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
999 die "Error: Start record number, $OptionsInfo{StartRecordNum}, must be smaller than end record number, $OptionsInfo{EndRecordNum}.\nSpecify different values using \"--record\" option.\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1000 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1001 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1002 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1003 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1004 die "Error: For \"-m --mode\" option values recordnum, recordnums or recordrange, specify \"--record\" option value.\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1005 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1006 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1007
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1008 @{$OptionsInfo{SpecifiedDataFieldLabels}} = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1009
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1010 my(@Words, $Line, $Value);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1011 if ($Options{mode} =~ /^datafields$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1012 @{$OptionsInfo{SpecifiedDataFieldLabels}} = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1013 if ($Options{datafields}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1014 @{$OptionsInfo{SpecifiedDataFieldLabels}} = split $OptionsInfo{InDelim}, $Options{datafields};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1015 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1016 elsif ($Options{datafieldsfile}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1017 open DATAFIELDSFILE, "$Options{datafieldsfile}" or die "Error: Couldn't open $Options{datafieldsfile}: $! \n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1018 while ($Line = GetTextLine(\*DATAFIELDSFILE)) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1019 @Words = quotewords($OptionsInfo{InDelim}, 0, $Line);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1020 if (@Words) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1021 push @{$OptionsInfo{SpecifiedDataFieldLabels}}, @Words;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1022 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1023 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1024 close DATAFIELDSFILE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1025 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1026 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1027 elsif ($Options{mode} =~ /^datafieldsbyvalue$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1028 my(@DataFieldsByValueTriplets);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1029 @DataFieldsByValueTriplets = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1030 if ($Options{datafields}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1031 @DataFieldsByValueTriplets = split $OptionsInfo{InDelim}, $Options{datafields};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1032 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1033 elsif ($Options{datafieldsfile}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1034 open DATAFIELDSFILE, "$Options{datafieldsfile}" or die "Error: Couldn't open $Options{datafieldsfile}: $! \n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1035 while ($Line = GetTextLine(\*DATAFIELDSFILE)) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1036 @Words = quotewords($OptionsInfo{InDelim}, 0, $Line);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1037 if (@Words) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1038 push @DataFieldsByValueTriplets, @Words;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1039 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1040 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1041 close DATAFIELDSFILE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1042 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1043 if ((@DataFieldsByValueTriplets % 3)) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1044 if ($Options{datafields}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1045 die "Error: Triplets not found in values specified by \"-d --datafields\" option\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1046 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1047 elsif ($Options{datafieldsfile}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1048 die "Error: Triplets not found in values specified by \"--datafieldsfile\" option\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1049 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1050 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1051 my($Index, $Label, $Value, $Criterion);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1052
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1053 @{$OptionsInfo{SpecifiedDataFieldLabels}} = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1054 %{$OptionsInfo{SpecifiedDataFieldValuesMap}} = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1055 %{$OptionsInfo{SpecifiedDataFieldCriteriaMap}} = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1056
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1057 for ($Index = 0; $Index < @DataFieldsByValueTriplets; $Index = $Index + 3) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1058 $Label = $DataFieldsByValueTriplets[$Index];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1059 $Value = $DataFieldsByValueTriplets[$Index + 1];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1060 $Criterion = $DataFieldsByValueTriplets[$Index + 2];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1061
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1062 if ($Criterion =~ /^(eq|le|ge)$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1063 push @{$OptionsInfo{SpecifiedDataFieldLabels}}, $Label;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1064 $OptionsInfo{SpecifiedDataFieldValuesMap}{$Label} = $Value;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1065 $OptionsInfo{SpecifiedDataFieldCriteriaMap}{$Label} = $Criterion;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1066 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1067 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1068 warn "Warning: Ignoring triplet value, $Label $Value $Criterion , specified using \"-d --datafields\" or \"--datafieldsfile\" option: Invalid criterion value: $Criterion\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1069 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1070 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1071 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1072 elsif ($Options{mode} =~ /^datafieldsbyregex$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1073 my(@DataFieldsByRegexTriplets);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1074
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1075 @DataFieldsByRegexTriplets = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1076 if ($Options{datafields}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1077 @DataFieldsByRegexTriplets = quotewords($OptionsInfo{InDelim}, 0, $Options{datafields});
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1078 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1079 elsif ($Options{datafieldsfile}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1080 open DATAFIELDSFILE, "$Options{datafieldsfile}" or die "Error: Couldn't open $Options{datafieldsfile}: $! \n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1081 while ($Line = GetTextLine(\*DATAFIELDSFILE)) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1082 @Words = quotewords($OptionsInfo{InDelim}, 0, $Line);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1083 if (@Words) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1084 push @DataFieldsByRegexTriplets, @Words;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1085 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1086 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1087 close DATAFIELDSFILE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1088 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1089 if ((@DataFieldsByRegexTriplets % 3)) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1090 if ($Options{datafields}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1091 die "Error: Triplet not found in values specified by \"-d --datafields\" option\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1092 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1093 elsif ($Options{datafieldsfile}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1094 die "Error: Triplet not found in values specified by \"--datafieldsfile\" option\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1095 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1096 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1097
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1098 my($Index, $Label, $Value, $Criterion);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1099
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1100 @{$OptionsInfo{SpecifiedDataFieldLabels}} = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1101 %{$OptionsInfo{SpecifiedDataFieldRegexMap}} = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1102 %{$OptionsInfo{SpecifiedDataFieldRegexCriteriaMap}} = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1103
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1104 for ($Index = 0; $Index < @DataFieldsByRegexTriplets; $Index = $Index + 3) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1105 $Label = $DataFieldsByRegexTriplets[$Index];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1106 $Value = $DataFieldsByRegexTriplets[$Index + 1];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1107 $Criterion = $DataFieldsByRegexTriplets[$Index + 2];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1108
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1109 if ($Criterion =~ /^(eq|ne)$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1110 push @{$OptionsInfo{SpecifiedDataFieldLabels}}, $Label;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1111 $OptionsInfo{SpecifiedDataFieldRegexMap}{$Label} = $Value;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1112 $OptionsInfo{SpecifiedDataFieldRegexCriteriaMap}{$Label} = $Criterion;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1113 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1114 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1115 warn "Warning: Ignoring triplet value, $Label $Value $Criterion , specified using \"-d --datafields\" or \"--datafieldsfile\" option: Invalid criterion value: $Criterion; Supported values: eq or ne\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1116 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1117 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1118 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1119 elsif ($Options{mode} =~ /^(datafieldbylist|datafielduniquebylist|datafieldnotbylist)$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1120 my($Index, @DataFieldAndValuesList);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1121 if ($Options{datafields}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1122 @DataFieldAndValuesList = split $OptionsInfo{InDelim}, $Options{datafields};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1123 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1124 elsif ($Options{datafieldsfile}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1125 open DATAFIELDSFILE, "$Options{datafieldsfile}" or die "Error: Couldn't open $Options{datafieldsfile}: $! \n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1126 while ($Line = GetTextLine(\*DATAFIELDSFILE)) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1127 @Words = quotewords($OptionsInfo{InDelim}, 0, $Line);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1128 if (@Words) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1129 push @DataFieldAndValuesList, @Words;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1130 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1131 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1132 close DATAFIELDSFILE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1133 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1134 if (@DataFieldAndValuesList < 2) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1135 if ($Options{datafields}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1136 die "Error: Invalid number of values specified by \"-d --datafields\" option\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1137 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1138 elsif ($Options{datafieldsfile}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1139 die "Error: Invalid number values specified by \"--datafieldsfile\" option\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1140 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1141 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1142
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1143 $OptionsInfo{SpecifiedDataFieldLabel} = $DataFieldAndValuesList[0];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1144 $OptionsInfo{SpecifiedDataFieldValuesCount} = @DataFieldAndValuesList - 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1145 %{$OptionsInfo{SpecifiedDataFieldValues}} = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1146
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1147 for ($Index = 1; $Index < @DataFieldAndValuesList; $Index++) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1148 $Value = $DataFieldAndValuesList[$Index];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1149 $OptionsInfo{SpecifiedDataFieldValues}{$Value} = "NotFound";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1150 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1151 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1152
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1153 $OptionsInfo{SDFileExt} = "sdf";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1154 $OptionsInfo{TextFileExt} = "csv";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1155
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1156 if ($Options{outdelim} =~ /^tab$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1157 $OptionsInfo{TextFileExt} = "tsv";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1158 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1159
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1160 if ($Options{mode} =~ /^(alldatafields|molnames)$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1161 $OptionsInfo{OutputSDFile} = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1162 $OptionsInfo{OutputTextFile} = 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1163 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1164 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1165 $OptionsInfo{OutputSDFile} = ($Options{output} =~ /^(SD|both)$/i) ? 1 : 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1166 $OptionsInfo{OutputTextFile} = ($Options{output} =~ /^(text|both)$/i) ? 1 : 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1167 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1168
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1169 $OptionsInfo{StrDataString} = $Options{strdatastring};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1170 $OptionsInfo{OutputStrDataString} = ($Options{strdatastring} =~ /^Yes$/i) ? 1 : 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1171
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1172 $OptionsInfo{StrDataStringDelimiter} = $Options{strdatastringdelimiter};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1173
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1174 if (IsEmpty($Options{strdatastringdelimiter})) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1175 die "Error: No value specified for \"--StrDataStringDelimiter\" option.\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1176 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1177 $OptionsInfo{StrDataStringMode} = $Options{strdatastringmode};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1178 $OptionsInfo{StrDataStringWithFields} = $Options{strdatastringmode} =~ /^StrAndDataFields$/i ? 1 : 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1179
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1180 MODE: {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1181 if ($Options{mode} =~ /^alldatafields$/i) { $OptionsInfo{FileNameMode} = "AllDataDields"; last MODE; }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1182 if ($Options{mode} =~ /^commondatafields$/i) { $OptionsInfo{FileNameMode} = "CommonDataDields"; last MODE; }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1183 if ($Options{mode} =~ /^datafields$/i) { $OptionsInfo{FileNameMode} = "SpecifiedDataFields"; last MODE; }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1184 if ($Options{mode} =~ /^datafieldsbyvalue$/i) { $OptionsInfo{FileNameMode} = "SpecifiedDataFieldsByValue"; last MODE; }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1185 if ($Options{mode} =~ /^datafieldsbyregex$/i) { $OptionsInfo{FileNameMode} = "SpecifiedDataFieldsByRegex"; last MODE; }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1186 if ($Options{mode} =~ /^datafieldbylist$/i) { $OptionsInfo{FileNameMode} = "SpecifiedDataField"; last MODE; }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1187 if ($Options{mode} =~ /^datafielduniquebylist$/i) { $OptionsInfo{FileNameMode} = "SpecifiedUniqueDataField"; last MODE; }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1188 if ($Options{mode} =~ /^datafieldnotbylist$/i) { $OptionsInfo{FileNameMode} = "SpecifiedDataFieldNotByList"; last MODE; }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1189 if ($Options{mode} =~ /^molnames$/i) { $OptionsInfo{FileNameMode} = "MolName"; last MODE; }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1190 if ($Options{mode} =~ /^randomcmpds$/i) { $OptionsInfo{FileNameMode} = "RandomCmpds"; last MODE; }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1191 if ($Options{mode} =~ /^recordnum$/i) { $OptionsInfo{FileNameMode} = "RecordNum$OptionsInfo{RecordNum}"; last MODE; }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1192 if ($Options{mode} =~ /^recordnums$/i) { $OptionsInfo{FileNameMode} = "RecordNums"; last MODE; }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1193 if ($Options{mode} =~ /^recordrange$/i) { $OptionsInfo{FileNameMode} = "RecordNum$OptionsInfo{StartRecordNum}" . "To" . "$OptionsInfo{EndRecordNum}"; last MODE; }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1194 if ($Options{mode} =~ /^2dcmpdrecords$/i) { $OptionsInfo{FileNameMode} = "2DCmpdRecords"; last MODE; }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1195 if ($Options{mode} =~ /^3dcmpdrecords$/i) { $OptionsInfo{FileNameMode} = "3DCmpdRecords"; last MODE; }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1196 die "Error: The value specified, $Options{mode}, for option \"-m --mode\" is not valid. Allowed values: alldatafields, commondatafields, datafields, datafieldsbyvalue, datafieldbylist, datafielduniquebylist, , datafieldnotbylist, molnames, randomcmpds, recordnum, recordnums, recordrange, 2dcmpdrecords, 3dcmpdrecords\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1197 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1198
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1199 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1200
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1201 # Setup script usage and retrieve command line arguments specified using various options...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1202 sub SetupScriptUsage {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1203
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1204 # Retrieve all the options...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1205 %Options = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1206 $Options{numofcmpds} = 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1207 $Options{mode} = "alldatafields";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1208 $Options{indelim} = "comma";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1209 $Options{outdelim} = "comma";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1210 $Options{output} = "SD";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1211 $Options{quote} = "yes";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1212 $Options{regexignorecase} = "yes";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1213 $Options{valuecomparisonmode} = "numeric";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1214 $Options{violations} = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1215 $Options{seed} = 123456789;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1216
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1217 $Options{strdatastring} = "no";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1218 $Options{strdatastringdelimiter} = "|";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1219 $Options{strdatastringmode} = "StrOnly";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1220
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1221 if (!GetOptions(\%Options, "help|h", "datafields|d=s", "datafieldsfile=s", "indelim=s", "mode|m=s", "numofcmpds|n=i", "outdelim=s", "output=s", "overwrite|o", "quote|q=s", "regexignorecase=s", "record=s", "root|r=s", "seed|s=i", "strdatastring=s", "strdatastringdelimiter=s", "strdatastringmode=s", "valuecomparisonmode=s", "violations|v=i", "workingdir|w=s")) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1222 die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1223 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1224 if ($Options{workingdir}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1225 if (! -d $Options{workingdir}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1226 die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1227 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1228 chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1229 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1230 if ($Options{numofcmpds} < 1) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1231 die "Error: The value specified, $Options{numofcmpds}, for option \"-n --numofcmpds\" is not valid. Allowed values: >= 1 \n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1232 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1233 if ($Options{valuecomparisonmode} !~ /^(Numeric|Alphanumeric)$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1234 die "Error: The value specified, $Options{valuecomparisonmode}, for option \"--ValueComparisonMode\" is not valid. Allowed values: Numeric or Alphanumeric\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1235 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1236 if ($Options{violations} < 0) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1237 die "Error: The value specified, $Options{violations}, for option \"-v --violations\" is not valid. Allowed values: >= 0 \n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1238 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1239 if ($Options{mode} !~ /^(alldatafields|commondatafields|datafields|datafieldsbyvalue|datafieldsbyregex|datafieldbylist|datafielduniquebylist|datafieldnotbylist|molnames|randomcmpds|recordnum|recordnums|recordrange|2dcmpdrecords|3dcmpdrecords)$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1240 die "Error: The value specified, $Options{mode}, for option \"-m --mode\" is not valid. Allowed values: alldatafields, commondatafields, datafields, datafieldsbyvalue, datafieldbylist, datafielduniquebylist, datafieldnotbylist, molnames, randomcmpds, recordnum, recordnums, recordrange, 2dcmpdrecords, 3dcmpdrecords\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1241 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1242 if ($Options{output} !~ /^(SD|text|both)$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1243 die "Error: The value specified, $Options{output}, for option \"--output\" is not valid. Allowed values: SD, text, or both\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1244 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1245 if ($Options{indelim} !~ /^(comma|semicolon|tab)$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1246 die "Error: The value specified, $Options{indelim}, for option \"--indelim\" is not valid. Allowed values: comma, tab, or semicolon\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1247 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1248 if ($Options{outdelim} !~ /^(comma|semicolon|tab)$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1249 die "Error: The value specified, $Options{outdelim}, for option \"--outdelim\" is not valid. Allowed values: comma, tab, or semicolon\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1250 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1251 if ($Options{quote} !~ /^(yes|no)$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1252 die "Error: The value specified, $Options{quote}, for option \"-q --quote\" is not valid. Allowed values: yes or no\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1253 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1254 if ($Options{regexignorecase} !~ /^(yes|no)$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1255 die "Error: The value specified, $Options{regexignorecase}, for option \"--regexignorecase\" is not valid. Allowed values: yes or no\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1256 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1257 if ($Options{strdatastring} !~ /^(yes|no)$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1258 die "Error: The value specified, $Options{strdatastring}, for option \"--StrDataString\" is not valid. Allowed values: yes or no\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1259 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1260 if ($Options{strdatastringmode} !~ /^(StrOnly|StrAndDataFields)$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1261 die "Error: The value specified, $Options{strdatastringmode}, for option \"--StrDataStringMode\" is not valid. Allowed values: StrOnly or StrAndDataFields\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1262 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1263 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1264
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1265 __END__
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1266
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1267 =head1 NAME
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1268
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1269 ExtractFromSDFiles.pl - Extract specific data from SDFile(s)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1270
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1271 =head1 SYNOPSIS
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1272
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1273 ExtractFromSDFiles.pl SDFile(s)...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1274
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1275 ExtractFromSDFiles.pl [B<-h, --help>]
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1276 [B<-d, --datafields> "fieldlabel,..." | "fieldlabel,value,criteria..." | "fieldlabel,value,value..."]
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1277 [B<--datafieldsfile> filename] [B<--indelim> comma | tab | semicolon] [B<-m, --mode> alldatafields |
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1278 commondatafields | | datafieldnotbylist | datafields | datafieldsbyvalue | datafieldsbyregex | datafieldbylist |
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1279 datafielduniquebylist | molnames | randomcmpds | recordnum | recordnums | recordrange | 2dcmpdrecords |
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1280 3dcmpdrecords ] [B<-n, --numofcmpds> number] [B<--outdelim> comma | tab | semicolon]
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1281 [B<--output> SD | text | both] [B<-o, --overwrite>] [B<-q, --quote> yes | no]
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1282 [B<--record> recnum | startrecnum,endrecnum] B<--RegexIgnoreCase> I<yes or no>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1283 [B<-r, --root> rootname] [B<-s, --seed> number] [B<--StrDataString> yes | no]
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1284 [B<--StrDataStringDelimiter> text] [B<--StrDataStringMode> StrOnly | StrAndDataFields]
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1285 [B<--ValueComparisonMode> I<Numeric | Alphanumeric>]
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1286 [B<-v, --violations-> number] [B<-w, --workingdir> dirname] SDFile(s)...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1287
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1288 =head1 DESCRIPTION
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1289
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1290 Extract specific data from I<SDFile(s)> and generate appropriate SD or CSV/TSV text
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1291 file(s). The structure data from SDFile(s) is not transferred to CSV/TSV text file(s).
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1292 Multiple SDFile names are separated by spaces. The valid file extensions are I<.sdf>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1293 and I<.sd>. All other file names are ignored. All the SD files in a current directory
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1294 can be specified either by I<*.sdf> or the current directory name.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1295
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1296 =head1 OPTIONS
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1297
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1298 =over 4
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1299
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1300 =item B<-h, --help>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1301
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1302 Print this help message.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1303
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1304 =item B<-d, --datafields> I<"fieldlabel,..." | "fieldlabel,value,criteria..." | "fieldlabel,value,value,...">
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1305
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1306 This value is mode specific. In general, it's a list of comma separated data field labels
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1307 and associated mode specific values.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1308
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1309 For I<datafields> mode, input value format is: I<fieldlabel,...>. Examples:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1310
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1311 Extreg
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1312 Extreg,CompoundName,ID
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1313
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1314 For I<datafieldsbyvalue> mode, input value format contains these triplets:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1315 I<fieldlabel,value, criteria...>. Possible values for criteria: I<le, ge or eq>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1316 The values of B<--ValueComparisonMode> indicates whether values are
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1317 compared numerical or string comarison operators. Default is to consider
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1318 data field values as numerical values and use numerical comparison operators.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1319 Examples:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1320
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1321 MolWt,450,le
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1322 MolWt,450,le,LogP,5,le,SumNumNO,10,le,SumNHOH,5,le
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1323
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1324 For I<datafieldsbyregex> mode, input value format contains these triplets:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1325 I<fieldlabel,regex, criteria...>. I<regex> corresponds to any valid regular expression
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1326 and is used to match the values for specified I<fieldlabel>. Possible values for criteria:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1327 I<eq or ne>. During I<eq> and I<ne> values, data field label value is matched with
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1328 regular expression using =~ and !~ respectively. B<--RegexIgnoreCase> option
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1329 value is used to determine whether to ignore letter upper/lower case during
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1330 regular expression match. Examples:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1331
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1332 Name,ol,eq
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1333 Name,'^pat',ne
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1334
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1335 For I<datafieldbylist> and I<datafielduniquebylist> mode, input value format is:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1336 I<fieldlabel,value1,value2...>. This is equivalent to I<datafieldsbyvalue> mode with
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1337 this input value format:I<fieldlabel,value1,eq,fieldlabel,value2,eq,...>. For
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1338 I<datafielduniquebylist> mode, only unique compounds identified by first occurrence
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1339 of I<value> associated with I<fieldlabel> in I<SDFile(s)> are kept; any subsequent compounds
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1340 are simply ignored.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1341
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1342 For I<datafieldnotbylist> mode, input value format is: I<fieldlabel,value1,value2...>. In this
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1343 mode, the script behaves exactly opposite of I<datafieldbylist> mode, and only those compounds
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1344 are extracted whose data field values don't match any specified data field value.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1345
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1346 =item B<--datafieldsfile> I<filename>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1347
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1348 Filename which contains various mode specific values. This option provides a way
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1349 to specify mode specific values in a file instead of entering them on the command
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1350 line using B<-d --datafields>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1351
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1352 For I<datafields> mode, input file lines contain comma delimited field labels:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1353 I<fieldlabel,...>. Example:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1354
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1355 Line 1:MolId
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1356 Line 2:"Extreg",CompoundName,ID
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1357
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1358 For I<datafieldsbyvalue> mode, input file lines contains these comma separated triplets:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1359 I<fieldlabel,value, criteria>. Possible values for criteria: I<le, ge or eq>. Examples:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1360
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1361 Line 1:MolWt,450,le
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1362
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1363 Line 1:"MolWt",450,le,"LogP",5,le,"SumNumNO",10,le,"SumNHOH",5,le
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1364
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1365 Line 1:MolWt,450,le
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1366 Line 2:"LogP",5,le
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1367 Line 3:"SumNumNO",10,le
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1368 Line 4: SumNHOH,5,le
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1369
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1370 For I<datafieldbylist> and I<datafielduniquebylist> mode, input file line format is:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1371
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1372 Line 1:fieldlabel;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1373 Subsequent lines:value1,value2...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1374
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1375 For I<datafieldbylist>, I<datafielduniquebylist>, and I<datafieldnotbylist> mode, input file
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1376 line format is:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1377
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1378 Line 1:fieldlabel;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1379 Subsequent lines:value1,value2...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1380
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1381 For I<datafielduniquebylist> mode, only unique compounds identified by first occurrence
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1382 of I<value> associated with I<fieldlabel> in I<SDFile(s)> are kept; any subsequent compounds
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1383 are simply ignored. Example:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1384
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1385 Line 1: MolID
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1386 Subsequent Lines:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1387 907508
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1388 832291,4642
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1389 "1254","907303"
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1390
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1391 =item B<--indelim> I<comma | tab | semicolon>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1392
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1393 Delimiter used to specify text values for B<-d --datafields> and B<--datafieldsfile> options.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1394 Possible values: I<comma, tab, or semicolon>. Default value: I<comma>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1395
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1396 =item B<-m, --mode> I<alldatafields | commondatafields | datafields | datafieldsbyvalue | datafieldsbyregex | datafieldbylist | datafielduniquebylist | datafieldnotbylist | molnames | randomcmpds | recordnum | recordnums | recordrange | 2dcmpdrecords | 3dcmpdrecords>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1397
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1398 Specify what to extract from I<SDFile(s)>. Possible values: I<alldatafields, commondatafields,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1399 datafields, datafieldsbyvalue, datafieldsbyregex, datafieldbylist, datafielduniquebylist, datafieldnotbylist,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1400 molnames, randomcmpds, recordnum, recordnums, recordrange, 2dcmpdrecords, 3dcmpdrecords>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1401 Default value: I<alldatafields>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1402
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1403 For I<alldatafields> and I<molnames> mode, only a CSV/TSV text file is generated; for all
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1404 other modes, however, a SD file is generated by default - you can change the behavior to genereate
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1405 text file using I<--output> option.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1406
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1407 For I<3DCmpdRecords> mode, only those compounds with at least one non-zero value for Z atomic coordinates
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1408 are retrieved; however, during retrieval of compounds in I<2DCmpdRecords> mode, all Z atomic coordinates must
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1409 be zero.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1410
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1411 =item B<-n, --numofcmpds> I<number>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1412
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1413 Number of compouds to extract during I<randomcmpds> mode.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1414
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1415 =item B<--outdelim> I<comma | tab | semicolon>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1416
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1417 Delimiter for output CSV/TSV text file(s). Possible values: I<comma, tab, or semicolon>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1418 Default value: I<comma>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1419
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1420 =item B<--output> I<SD | text | both>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1421
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1422 Type of output files to generate. Possible values: I<SD, text, or both>. Default value: I<SD>. For
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1423 I<alldatafields> and I<molnames> mode, this option is ingored and only a CSV/TSV text file is generated.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1424
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1425 =item B<-o, --overwrite>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1426
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1427 Overwrite existing files.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1428
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1429 =item B<-q, --quote> I<yes | no>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1430
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1431 Put quote around column values in output CSV/TSV text file(s). Possible values:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1432 I<yes or no>. Default value: I<yes>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1433
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1434 =item B<--record> I<recnum | recnums | startrecnum,endrecnum>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1435
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1436 Record number, record numbers or range of records to extract during I<recordnum>, I<recordnums>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1437 and I<recordrange> mode. Input value format is: <num>, <num1,num2,...> and <startnum, endnum>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1438 for I<recordnum>, I<recordnums> and I<recordrange> modes recpectively. Default value: none.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1439
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1440 =item B<--RegexIgnoreCase> I<yes or no>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1441
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1442 Specify whether to ingnore case during I<datafieldsbyregex> value of B<-m, --mode> option.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1443 Possible values: I<yes or no>. Default value: I<yes>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1444
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1445 =item B<-r, --root> I<rootname>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1446
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1447 New file name is generated using the root: <Root>.<Ext>. Default for new file
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1448 names: <SDFileName><mode>.<Ext>. The file type determines <Ext> value.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1449 The sdf, csv, and tsv <Ext> values are used for SD, comma/semicolon, and tab
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1450 delimited text files respectively.This option is ignored for multiple input files.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1451
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1452 =item B<-s, --seed> I<number>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1453
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1454 Random number seed used for I<randomcmpds> mode. Default:123456789.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1455
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1456 =item B<--StrDataString> I<yes | no>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1457
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1458 Specify whether to write out structure data string to CSV/TSV text file(s). Possible values:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1459 I<yes or no>. Default value: I<no>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1460
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1461 The value of B<StrDataStringDelimiter> option is used as a delimiter to join structure
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1462 data lines into a structure data string.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1463
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1464 This option is ignored during generation of SD file(s).
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1465
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1466 =item B<--StrDataStringDelimiter> I<text>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1467
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1468 Delimiter for joining multiple stucture data lines into a string before writing to CSV/TSV text
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1469 file(s). Possible values: I<any alphanumeric text>. Default value: I<|>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1470
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1471 This option is ignored during generation of SD file(s).
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1472
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1473 =item B<--StrDataStringMode> I<StrOnly | StrAndDataFields>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1474
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1475 Specify whether to include SD data fields and values along with the structure data into structure
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1476 data string before writing it out to CSV/TSV text file(s). Possible values: I<StrOnly or StrAndDataFields>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1477 Default value: I<StrOnly>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1478
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1479 The value of B<StrDataStringDelimiter> option is used as a delimiter to join structure
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1480 data lines into a structure data string.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1481
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1482 This option is ignored during generation of SD file(s).
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1483
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1484 =item B<--ValueComparisonMode> I<Numeric | Alphanumeric>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1485
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1486 Specify how to compare data field values during I<datafieldsbyvalue> mode: Compare
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1487 values using either numeric or string ((eq, le, ge) comparison operators. Possible values:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1488 I<Numeric or Alphanumeric>. Defaule value: I<Numeric>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1489
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1490 =item B<-v, --violations> I<number>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1491
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1492 Number of criterion violations allowed for values specified during I<datafieldsbyvalue>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1493 and I<datafieldsbyregex> mode. Default value: I<0>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1494
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1495 =item B<-w, --workingdir> I<dirname>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1496
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1497 Location of working directory. Default: current directory.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1498
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1499 =back
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1500
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1501 =head1 EXAMPLES
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1502
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1503 To retrieve all data fields from SD files and generate CSV text files, type:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1504
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1505 % ExtractFromSDFiles.pl -o Sample.sdf
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1506 % ExtractFromSDFiles.pl -o *.sdf
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1507
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1508 To retrieve all data fields from SD file and generate CSV text files containing
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1509 a column with structure data as a string with | as line delimiter, type:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1510
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1511 % ExtractFromSDFiles.pl --StrDataString Yes -o Sample.sdf
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1512
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1513 To retrieve MOL_ID data fileld from SD file and generate CSV text files containing
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1514 a column with structure data along with all data fields as a string with | as line
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1515 delimiter, type:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1516
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1517 % ExtractFromSDFiles.pl -m datafields -d "Mol_ID" --StrDataString Yes
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1518 --StrDataStringMode StrAndDataFields --StrDataStringDelimiter "|"
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1519 --output text -o Sample.sdf
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1520
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1521 To retrieve common data fields which exists for all the compounds in
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1522 a SD file and generate a TSV text file NewSample.tsv, type:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1523
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1524 % ExtractFromSDFiles.pl -m commondatafields --outdelim tab -r NewSample
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1525 --output Text -o Sample.sdf
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1526
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1527 To retrieve MolId, ExtReg, and CompoundName data field from a SD file and generate a
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1528 CSV text file NewSample.csv, type:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1529
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1530 % ExtractFromSDFiles.pl -m datafields -d "Mol_ID,MolWeight,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1531 CompoundName" -r NewSample --output Text -o Sample.sdf
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1532
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1533 To retrieve compounds from a SD which meet a specific set of criteria - MolWt <= 450,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1534 LogP <= 5 and SumNO < 10 - from a SD file and generate a new SD file NewSample.sdf,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1535 type:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1536
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1537 % ExtractFromSDFiles.pl -m datafieldsbyvalue -d "MolWt,450,le,LogP
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1538 ,5,le,SumNO,10" -r NewSample -o Sample.sdf
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1539
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1540 To retrive compounds from a SD file with a specific set of values for MolID and
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1541 generate a new SD file NewSample.sdf, type:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1542
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1543 % ExtractFromSDFiles.pl -m datafieldbylist -d "Mol_ID,159,4509,4619"
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1544 -r NewSample -o Sample.sdf
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1545
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1546 To retrive compounds from a SD file with values for MolID not on a list of specified
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1547 values and generate a new SD file NewSample.sdf, type:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1548
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1549 % ExtractFromSDFiles.pl -m datafieldnotbylist -d "Mol_ID,159,4509,4619"
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1550 -r NewSample -o Sample.sdf
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1551
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1552 To retrive 10 random compounds from a SD file and generate a new SD file RandomSample.sdf, type:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1553
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1554 % ExtractFromSDFiles.pl -m randomcmpds -n 10 -r RandomSample
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1555 -o Sample.sdf
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1556
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1557 To retrive compound record number 10 from a SD file and generate a new SD file NewSample.sdf, type:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1558
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1559 % ExtractFromSDFiles.pl -m recordnum --record 10 -r NewSample
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1560 -o Sample.sdf
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1561
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1562 To retrive compound record numbers 10, 20 and 30 from a SD file and generate a new SD file
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1563 NewSample.sdf, type:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1564
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1565 % ExtractFromSDFiles.pl -m recordnums --record 10,20,30 -r NewSample
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1566 -o Sample.sdf
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1567
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1568 To retrive compound records between 10 to 20 from SD file and generate a new SD
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1569 file NewSample.sdf, type:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1570
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1571 % ExtractFromSDFiles.pl -m recordrange --record 10,20 -r NewSample
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1572 -o Sample.sdf
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1573
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1574 =head1 AUTHOR
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1575
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1576 Manish Sud <msud@san.rr.com>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1577
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1578 =head1 SEE ALSO
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1579
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1580 FilterSDFiles.pl, InfoSDFiles.pl, SplitSDFiles.pl, MergeTextFilesWithSD.pl
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1581
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1582 =head1 COPYRIGHT
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1583
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1584 Copyright (C) 2015 Manish Sud. All rights reserved.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1585
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1586 This file is part of MayaChemTools.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1587
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1588 MayaChemTools is free software; you can redistribute it and/or modify it under
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1589 the terms of the GNU Lesser General Public License as published by the Free
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1590 Software Foundation; either version 3 of the License, or (at your option)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1591 any later version.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1592
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1593 =cut