comparison mayachemtools/bin/JoinSDFiles.pl @ 0:73ae111cf86f draft

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 11:55:01 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:73ae111cf86f
1 #!/usr/bin/perl -w
2 #
3 # $RCSfile: JoinSDFiles.pl,v $
4 # $Date: 2015/02/28 20:46:20 $
5 # $Revision: 1.35 $
6 #
7 # Author: Manish Sud <msud@san.rr.com>
8 #
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
10 #
11 # This file is part of MayaChemTools.
12 #
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
14 # the terms of the GNU Lesser General Public License as published by the Free
15 # Software Foundation; either version 3 of the License, or (at your option) any
16 # later version.
17 #
18 # MayaChemTools is distributed in the hope that it will be useful, but without
19 # any warranty; without even the implied warranty of merchantability of fitness
20 # for a particular purpose. See the GNU Lesser General Public License for more
21 # details.
22 #
23 # You should have received a copy of the GNU Lesser General Public License
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
26 # Boston, MA, 02111-1307, USA.
27 #
28
29 use strict;
30 use FindBin; use lib "$FindBin::Bin/../lib";
31 use Getopt::Long;
32 use File::Basename;
33 use Benchmark;
34 use SDFileUtil;
35 use FileUtil;
36
37 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime);
38
39 # Autoflush STDOUT
40 $| = 1;
41
42 # Starting message...
43 $ScriptName = basename $0;
44 print "\n$ScriptName:Starting...\n\n";
45 $StartTime = new Benchmark;
46
47 # Get the options and setup script...
48 SetupScriptUsage();
49 if ($Options{help} || @ARGV < 1) {
50 die GetUsageFromPod("$FindBin::Bin/$ScriptName");
51 }
52
53 my(@SDFilesList);
54 @SDFilesList = ExpandFileNames(\@ARGV, "sdf sd");
55 if (@SDFilesList == 1) {
56 die "Error: Specify more than one SD file.\n";
57 }
58
59 # Process options...
60 print "Processing options...\n";
61 my(%OptionsInfo);
62 ProcessOptions();
63
64 # Setup information about input files...
65 print "Checking input SD files...\n";
66 my(%SDFilesInfo);
67 RetrieveSDFilesInfo();
68
69 # Join files...
70 print "\nGenerating new SD file $OptionsInfo{NewSDFile}...\n";
71 JoinSDFiles();
72
73 print "\n$ScriptName:Done...\n\n";
74
75 $EndTime = new Benchmark;
76 $TotalTime = timediff ($EndTime, $StartTime);
77 print "Total time: ", timestr($TotalTime), "\n";
78
79 ###############################################################################
80
81 # Join all valid SD files...
82 sub JoinSDFiles {
83 my($FileIndex, $SDFile, $NewSDFile);
84
85 $NewSDFile = $OptionsInfo{NewSDFile};
86
87 open NEWSDFILE, ">$NewSDFile" or die "Error: Couldn't open $NewSDFile: $! \n";
88 FILELIST: for $FileIndex (0 .. $#SDFilesList) {
89 if (!$SDFilesInfo{FileOkay}[$FileIndex]) {
90 next FILELIST;
91 }
92 $SDFile = $SDFilesList[$FileIndex];
93 print "\nProcessing file $SDFile...\n";
94
95 open SDFILE, "$SDFile" or die "Error: Couldn't open $SDFile: $! \n";
96 while (<SDFILE>) {
97 s/(\r\n)|(\r)/\n/g;
98 print NEWSDFILE;
99 }
100 close SDFILE;
101 }
102
103 close NEWSDFILE;
104 }
105
106 # Retrieve information about SD files...
107 sub RetrieveSDFilesInfo {
108 my($Index, $SDFile);
109
110 %SDFilesInfo = ();
111 @{$SDFilesInfo{FileOkay}} = ();
112
113 FILELIST: for $Index (0 .. $#SDFilesList) {
114 $SDFilesInfo{FileOkay}[$Index] = 0;
115
116 $SDFile = $SDFilesList[$Index];
117 if (!(-e $SDFile)) {
118 warn "Warning: Ignoring file $SDFile: It doesn't exist\n";
119 next FILELIST;
120 }
121 if (!CheckFileType($SDFile, "sdf sd")) {
122 warn "Warning: Ignoring file $SDFile: It's not a SD file\n";
123 next FILELIST;
124 }
125 if (! open SDFILE, "$SDFile") {
126 warn "Warning: Ignoring file $SDFile: Couldn't open it: $! \n";
127 next FILELIST;
128 }
129 close SDFILE;
130
131 $SDFilesInfo{FileOkay}[$Index] = 1;
132 }
133 }
134
135 # Process option values...
136 sub ProcessOptions {
137 my($FileDir, $FileName, $FileExt, $NewSDFile);
138
139 %OptionsInfo = ();
140
141 $OptionsInfo{OutFileRoot} = $Options{root} ? $Options{root} : undef;
142 $OptionsInfo{Overwrite} = $Options{overwrite} ? $Options{overwrite} : undef;
143
144 if ($Options{root}) {
145 $FileDir = ""; $FileName = ""; $FileExt = "";
146 ($FileDir, $FileName, $FileExt) = ParseFileName($Options{root});
147 if ($FileName && $FileExt) {
148 $NewSDFile = $FileName . "." . $FileExt;
149 }
150 else {
151 $NewSDFile = $Options{root} . ".sdf";
152 }
153 }
154 else {
155 $FileDir = ""; $FileName = ""; $FileExt = "";
156 ($FileDir, $FileName, $FileExt) = ParseFileName($SDFilesList[0]);
157 $NewSDFile = $FileName . "1To" . @SDFilesList . "Joined.sdf";
158 }
159
160 if (!$Options{overwrite}) {
161 if (-e $NewSDFile) {
162 die "Error: The file $NewSDFile already exists.\n";
163 }
164 }
165 if ($Options{root}) {
166 my($FileIndex);
167 for $FileIndex (0 .. $#SDFilesList) {
168 if (lc($NewSDFile) eq lc($SDFilesList[$FileIndex])) {
169 die "Error: Output filename, $NewSDFile, is similar to a input file name.\nSpecify a different name using \"-r --root\" option or use default name.\n";
170 }
171 }
172 }
173 $OptionsInfo{NewSDFile} = $NewSDFile;
174
175 }
176
177 # Setup script usage and retrieve command line arguments specified using various options...
178 sub SetupScriptUsage {
179
180 # Retrieve all the options...
181 %Options = ();
182 if (!GetOptions(\%Options, "help|h", "overwrite|o", "root|r=s", "workingdir|w=s")) {
183 die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n";
184 }
185 if ($Options{workingdir}) {
186 if (! -d $Options{workingdir}) {
187 die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n";
188 }
189 chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n";
190 }
191 }
192
193 __END__
194
195 =head1 NAME
196
197 JoinSDFiles.pl - Join multiple SDFiles into a single SDFile
198
199 =head1 SYNOPSIS
200
201 JoinSDFiles.pl SDFiles...
202
203 JoinSDFiles.pl [B<-h, --help>] [B<-o, --overwrite>] [B<-r, --root> rootname]
204 [B<-w, --workingdir> dirname] SDFiles...
205
206 =head1 DESCRIPTION
207
208 Multiple I<SDFiles> are joined to generate a single SDFile. The file names
209 are separated by spaces. The valid file extensions are I<.sdf> and I<.sd>.
210 All other file names are ignored. All the SD files in a current directory can be
211 specified either by I<*.sdf> or the current directory name.
212
213 =head1 OPTIONS
214
215 =over 4
216
217 =item B<-h, --help>
218
219 Print this help message.
220
221 =item B<-o, --overwrite>
222
223 Overwrite existing files.
224
225 =item B<-r, --root> I<rootname>
226
227 New SD file name is generated using the root: <Root>.sdf. Default file
228 name:<FirstSDFileName>1To<Count>Joined.sdf.
229
230 =item B<-w, --workingdir> I<dirname>
231
232 Location of working directory. Default: current directory.
233
234 =back
235
236 =head1 EXAMPLES
237
238 To join SD files, type:
239
240 % JoinSDFiles.pl -o Sample1.sdf Sample2.sdf
241 % JoinSDFiles.pl -o *.sdf
242
243 To join all Sample*.sdf files in a directory, SomeDir, and generate a new file NewSample.sdf, type:
244
245 % JoinSDFiles.pl -r NewSample -w SomeDir -o *.sdf
246
247 =head1 AUTHOR
248
249 Manish Sud <msud@san.rr.com>
250
251 =head1 SEE ALSO
252
253 InfoSDFiles.pl, MolFilesToSD.pl, SDToMolFiles.pl, SplitSDFiles.pl
254
255 =head1 COPYRIGHT
256
257 Copyright (C) 2015 Manish Sud. All rights reserved.
258
259 This file is part of MayaChemTools.
260
261 MayaChemTools is free software; you can redistribute it and/or modify it under
262 the terms of the GNU Lesser General Public License as published by the Free
263 Software Foundation; either version 3 of the License, or (at your option)
264 any later version.
265
266 =cut