Mercurial > repos > deepakjadmin > mayatool3_test2
comparison bin/SplitSDFiles.pl @ 0:4816e4a8ae95 draft default tip
Uploaded
author | deepakjadmin |
---|---|
date | Wed, 20 Jan 2016 09:23:18 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4816e4a8ae95 |
---|---|
1 #!/usr/bin/perl -w | |
2 # | |
3 # $RCSfile: SplitSDFiles.pl,v $ | |
4 # $Date: 2015/02/28 20:46:21 $ | |
5 # $Revision: 1.36 $ | |
6 # | |
7 # Author: Manish Sud <msud@san.rr.com> | |
8 # | |
9 # Copyright (C) 2015 Manish Sud. All rights reserved. | |
10 # | |
11 # This file is part of MayaChemTools. | |
12 # | |
13 # MayaChemTools is free software; you can redistribute it and/or modify it under | |
14 # the terms of the GNU Lesser General Public License as published by the Free | |
15 # Software Foundation; either version 3 of the License, or (at your option) any | |
16 # later version. | |
17 # | |
18 # MayaChemTools is distributed in the hope that it will be useful, but without | |
19 # any warranty; without even the implied warranty of merchantability of fitness | |
20 # for a particular purpose. See the GNU Lesser General Public License for more | |
21 # details. | |
22 # | |
23 # You should have received a copy of the GNU Lesser General Public License | |
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or | |
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, | |
26 # Boston, MA, 02111-1307, USA. | |
27 # | |
28 | |
29 use strict; | |
30 use FindBin; use lib "$FindBin::Bin/../lib"; | |
31 use Getopt::Long; | |
32 use File::Basename; | |
33 use Benchmark; | |
34 use SDFileUtil; | |
35 use FileUtil; | |
36 | |
37 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime); | |
38 | |
39 # Autoflush STDOUT | |
40 $| = 1; | |
41 | |
42 # Starting message... | |
43 $ScriptName = basename $0; | |
44 print "\n$ScriptName:Starting...\n\n"; | |
45 $StartTime = new Benchmark; | |
46 | |
47 # Get the options and setup script... | |
48 SetupScriptUsage(); | |
49 if ($Options{help} || @ARGV < 1) { | |
50 die GetUsageFromPod("$FindBin::Bin/$ScriptName"); | |
51 } | |
52 | |
53 my(@SDFilesList); | |
54 @SDFilesList = ExpandFileNames(\@ARGV, "sdf sd"); | |
55 | |
56 # Process options... | |
57 print "Processing options...\n"; | |
58 my(%OptionsInfo); | |
59 ProcessOptions(); | |
60 | |
61 # Setup information about input files... | |
62 my(%SDFilesInfo); | |
63 print "Checking input SD file(s)...\n"; | |
64 RetrieveSDFilesInfo(); | |
65 | |
66 # Process input files.. | |
67 my($FileIndex); | |
68 if (@SDFilesList > 1) { | |
69 print "\nProcessing SD files...\n"; | |
70 } | |
71 for $FileIndex (0 .. $#SDFilesList) { | |
72 if ($SDFilesInfo{FileOkay}[$FileIndex]) { | |
73 print "\nProcessing file $SDFilesList[$FileIndex]...\n"; | |
74 SplitSDFile($FileIndex); | |
75 } | |
76 } | |
77 print "\n$ScriptName:Done...\n\n"; | |
78 | |
79 $EndTime = new Benchmark; | |
80 $TotalTime = timediff ($EndTime, $StartTime); | |
81 print "Total time: ", timestr($TotalTime), "\n"; | |
82 | |
83 ############################################################################### | |
84 | |
85 # Split a SD file... | |
86 # | |
87 sub SplitSDFile { | |
88 my($FileIndex) = @_; | |
89 | |
90 if ($OptionsInfo{Mode} =~ /^Files$/i) { | |
91 SplitSDFileByNumOfFiles($FileIndex); | |
92 } | |
93 elsif ($OptionsInfo{Mode} =~ /^Cmpds$/i) { | |
94 SplitSDFileByNumOfCmpds($FileIndex); | |
95 } | |
96 } | |
97 | |
98 # Split SD into specified number of files... | |
99 # | |
100 sub SplitSDFileByNumOfFiles { | |
101 my($FileIndex) = @_; | |
102 my($SDFile, $CmpdCount, $MaxCmpdsPerFile, $MaxNumOfFiles); | |
103 | |
104 $SDFile = $SDFilesList[$FileIndex]; | |
105 | |
106 if (!open SDFILE, "$SDFile") { | |
107 warn "Warning: Ignoring file $SDFile: Couldn't open it: $! \n"; | |
108 return; | |
109 } | |
110 | |
111 $MaxNumOfFiles = $OptionsInfo{NumOfFiles}; | |
112 | |
113 # Count number of compounds to figure out maximum number of compound per file... | |
114 $CmpdCount = 0; | |
115 while (<SDFILE>) { | |
116 if (/^\$\$\$\$/) { | |
117 $CmpdCount++; | |
118 } | |
119 } | |
120 close SDFILE; | |
121 | |
122 if ($CmpdCount < $MaxNumOfFiles) { | |
123 warn "Warning: Ignoring file $SDFile: Total number of compounds, $CmpdCount, is smaller than number of new files, $MaxNumOfFiles\n"; | |
124 return; | |
125 } | |
126 | |
127 $MaxCmpdsPerFile = int $CmpdCount / $MaxNumOfFiles; | |
128 | |
129 SplitSDFileByNumOfFilesAndCmpds($FileIndex, $MaxNumOfFiles, $MaxCmpdsPerFile); | |
130 } | |
131 | |
132 # Split SD into files containing specified number of compounds... | |
133 # | |
134 sub SplitSDFileByNumOfCmpds { | |
135 my($FileIndex) = @_; | |
136 | |
137 if ($OptionsInfo{NumOfCmpds} == 1) { | |
138 SplitSDFileByOneCmpdPerFile($FileIndex); | |
139 } | |
140 else { | |
141 SplitSDFileByNumOfCmpdsPerFile($FileIndex); | |
142 } | |
143 } | |
144 | |
145 # Split SD into files containing one compound per file... | |
146 # | |
147 sub SplitSDFileByOneCmpdPerFile { | |
148 my($FileIndex) = @_; | |
149 my($SDFile, $NewSDFile, $NewSDFileRoot, $FileExt, $OutFileRoot, $OverwriteFiles, $UseDataField, $DataFieldName, $UseMolName, $CmpdCount, $CmpdString, @CmpdLines, %DataFieldValues); | |
150 | |
151 $SDFile = $SDFilesList[$FileIndex]; | |
152 | |
153 if (!open SDFILE, "$SDFile") { | |
154 warn "Warning: Ignoring file $SDFile: Couldn't open it: $! \n"; | |
155 return; | |
156 } | |
157 | |
158 print "\n"; | |
159 | |
160 $CmpdCount = 0; | |
161 | |
162 $FileExt = $SDFilesInfo{FileExt}[$FileIndex]; | |
163 | |
164 $OutFileRoot = $SDFilesInfo{OutFileRoot}[$FileIndex]; | |
165 $OverwriteFiles = $OptionsInfo{OverwriteFiles}; | |
166 | |
167 $UseDataField = ($OptionsInfo{CmpdsMode} =~ /^DataField$/i) ? 1 : 0; | |
168 $DataFieldName = $OptionsInfo{DataField}; | |
169 | |
170 $UseMolName = ($OptionsInfo{CmpdsMode} =~ /^MolName$/i) ? 1 : 0; | |
171 | |
172 CMPDSTRING: while ($CmpdString = ReadCmpdString(\*SDFILE)) { | |
173 $CmpdCount++; | |
174 | |
175 # Setup SD file name... | |
176 $NewSDFileRoot = ''; | |
177 if ($UseDataField) { | |
178 @CmpdLines = split "\n", $CmpdString; | |
179 %DataFieldValues = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines); | |
180 if (exists $DataFieldValues{$DataFieldName}) { | |
181 $NewSDFileRoot = $DataFieldValues{$DataFieldName}; | |
182 } | |
183 } | |
184 elsif ($UseMolName) { | |
185 @CmpdLines = split "\n", $CmpdString; | |
186 $NewSDFileRoot = $CmpdLines[0]; | |
187 } | |
188 | |
189 # Check for any invalid file name characters in data field or molname values... | |
190 if ($NewSDFileRoot && $NewSDFileRoot =~ /[^a-zA-Z0-9_]/) { | |
191 $NewSDFileRoot =~ s/[^a-zA-Z0-9_]//g; | |
192 } | |
193 | |
194 # Fall back plan for SD file name... | |
195 if (!$NewSDFileRoot) { | |
196 $NewSDFileRoot = "${OutFileRoot}Cmpd${CmpdCount}"; | |
197 } | |
198 | |
199 $NewSDFile = "${NewSDFileRoot}.${FileExt}"; | |
200 | |
201 if (!$OverwriteFiles) { | |
202 if (-e $NewSDFile) { | |
203 warn "Warning: Ignoring compound number, $CmpdCount, in $SDFile: New SD file, $NewSDFile, already exists\n"; | |
204 next CMPDSTRING; | |
205 } | |
206 } | |
207 | |
208 # Write out new SD file... | |
209 | |
210 print "Generating $NewSDFile file\n"; | |
211 open NEWSDFILE, ">$NewSDFile" or die "Error: Can't open $NewSDFile: $! \n"; | |
212 print NEWSDFILE "$CmpdString\n"; | |
213 close NEWSDFILE; | |
214 | |
215 } | |
216 close SDFILE; | |
217 } | |
218 | |
219 # Split SD into files containing specified number of compounds per file... | |
220 # | |
221 sub SplitSDFileByNumOfCmpdsPerFile { | |
222 my($FileIndex) = @_; | |
223 my($SDFile, $CmpdCount, $MaxCmpdsPerFile, $MaxNumOfFiles); | |
224 | |
225 $SDFile = $SDFilesList[$FileIndex]; | |
226 | |
227 if (!open SDFILE, "$SDFile") { | |
228 warn "Warning: Ignoring file $SDFile: Couldn't open it: $! \n"; | |
229 return; | |
230 } | |
231 | |
232 $MaxCmpdsPerFile = $OptionsInfo{NumOfCmpds}; | |
233 | |
234 # Count number of compounds to figure out maximum number of files... | |
235 $CmpdCount = 0; | |
236 while (<SDFILE>) { | |
237 if (/^\$\$\$\$/) { | |
238 $CmpdCount++; | |
239 } | |
240 } | |
241 close SDFILE; | |
242 | |
243 $MaxNumOfFiles = int $CmpdCount / $MaxCmpdsPerFile; | |
244 | |
245 if (($MaxNumOfFiles * $MaxCmpdsPerFile) < $CmpdCount) { | |
246 $MaxNumOfFiles++; | |
247 } | |
248 | |
249 if ($CmpdCount <= $MaxCmpdsPerFile) { | |
250 warn "Warning: Ignoring file $SDFile: Total number of compounds, $CmpdCount, is <= specified number of compunds per file, $MaxCmpdsPerFile\n"; | |
251 return; | |
252 } | |
253 | |
254 SplitSDFileByNumOfFilesAndCmpds($FileIndex, $MaxNumOfFiles, $MaxCmpdsPerFile); | |
255 } | |
256 | |
257 # Split SD files into specified number of files with specified number of compounds | |
258 # in each file... | |
259 # | |
260 sub SplitSDFileByNumOfFilesAndCmpds { | |
261 my($FileIndex, $NumOfFiles, $NumOfCmpdsPerFile) = @_; | |
262 my($SDFile, $CmpdCount, $NewFileIndex, $NewFileName, $MaxCmpdsCount, @NewSDFilesList); | |
263 | |
264 $SDFile = $SDFilesList[$FileIndex]; | |
265 | |
266 if (!open SDFILE, "$SDFile") { | |
267 warn "Warning: Ignoring file $SDFile: Couldn't open it: $! \n"; | |
268 return; | |
269 } | |
270 | |
271 # Setup new file names list... | |
272 @NewSDFilesList = (); | |
273 for $NewFileIndex (1 .. $NumOfFiles) { | |
274 $NewFileName = $SDFilesInfo{OutFileRoot}[$FileIndex] . "Part${NewFileIndex}." . $SDFilesInfo{FileExt}[$FileIndex]; | |
275 if (!$OptionsInfo{OverwriteFiles}) { | |
276 if (-e $NewFileName) { | |
277 warn "Warning: Ignoring file $SDFile: New SD file, $NewFileName, already exists\n"; | |
278 return; | |
279 } | |
280 } | |
281 push @NewSDFilesList, $NewFileName; | |
282 } | |
283 | |
284 $MaxCmpdsCount = $NumOfCmpdsPerFile; | |
285 | |
286 $CmpdCount = 0; | |
287 $NewFileIndex = 1; | |
288 | |
289 open NEWSDFILE, ">$NewSDFilesList[$NewFileIndex - 1]" or die "Error: Can't open $NewSDFilesList[$NewFileIndex -1]: $! \n"; | |
290 print "\nGenerating $NewSDFilesList[$NewFileIndex - 1] file\n"; | |
291 | |
292 open SDFILE, "$SDFile" or die "Error: Can't open $SDFile: $! \n"; | |
293 | |
294 while (<SDFILE>) { | |
295 s/(\r\n)|(\r)/\n/g; | |
296 print NEWSDFILE; | |
297 | |
298 if ( /^\$\$\$\$/ ) { | |
299 $CmpdCount++; | |
300 if ($NewFileIndex <= $NumOfFiles) { | |
301 if ($CmpdCount >= $MaxCmpdsCount) { | |
302 if ($NewFileIndex < $NumOfFiles) { | |
303 close NEWSDFILE; | |
304 } | |
305 $NewFileIndex++; | |
306 $MaxCmpdsCount = $NumOfCmpdsPerFile * $NewFileIndex; | |
307 | |
308 if ($NewFileIndex <= $NumOfFiles) { | |
309 open NEWSDFILE, ">$NewSDFilesList[$NewFileIndex - 1]" or die "Error: Can't open $NewSDFilesList[$NewFileIndex - 1]: $! \n"; | |
310 print "Generating $NewSDFilesList[$NewFileIndex - 1] file...\n"; | |
311 } | |
312 } | |
313 } | |
314 } | |
315 } | |
316 close NEWSDFILE; | |
317 } | |
318 | |
319 # Retrieve information about SD files... | |
320 # | |
321 sub RetrieveSDFilesInfo { | |
322 my($SDFile, $Index, $FileDir, $FileName, $FileExt, $OutFileRoot); | |
323 | |
324 %SDFilesInfo = (); | |
325 @{$SDFilesInfo{FileOkay}} = (); | |
326 @{$SDFilesInfo{FileExt}} = (); | |
327 @{$SDFilesInfo{OutFileRoot}} = (); | |
328 | |
329 FILELIST: for $Index (0 .. $#SDFilesList) { | |
330 $SDFile = $SDFilesList[$Index]; | |
331 | |
332 $SDFilesInfo{FileOkay}[$Index] = 0; | |
333 $SDFilesInfo{FileExt}[$Index] = ''; | |
334 $SDFilesInfo{OutFileRoot}[$Index] = ''; | |
335 | |
336 $SDFile = $SDFilesList[$Index]; | |
337 if (!(-e $SDFile)) { | |
338 warn "Warning: Ignoring file $SDFile: It doesn't exist\n"; | |
339 next FILELIST; | |
340 } | |
341 if (!CheckFileType($SDFile, "sd sdf")) { | |
342 warn "Warning: Ignoring file $SDFile: It's not a SD file\n"; | |
343 next FILELIST; | |
344 } | |
345 | |
346 # Setup output file root... | |
347 $FileDir = ""; $FileName = ""; $FileExt = ""; | |
348 ($FileDir, $FileName, $FileExt) = ParseFileName($SDFile); | |
349 | |
350 if ($OptionsInfo{OutFileRoot} && (@SDFilesList == 1)) { | |
351 my ($RootFileDir, $RootFileName, $RootFileExt) = ParseFileName($OptionsInfo{OutFileRoot}); | |
352 if ($RootFileName && $RootFileExt) { | |
353 $FileName = $RootFileName; | |
354 } | |
355 else { | |
356 $FileName = $OptionsInfo{OutFileRoot}; | |
357 } | |
358 $OutFileRoot = $FileName; | |
359 } | |
360 else { | |
361 $OutFileRoot = "$FileName"; | |
362 } | |
363 | |
364 $SDFilesInfo{FileOkay}[$Index] = 1; | |
365 $SDFilesInfo{FileExt}[$Index] = $FileExt; | |
366 $SDFilesInfo{OutFileRoot}[$Index] = $OutFileRoot; | |
367 } | |
368 } | |
369 | |
370 # Process option values... | |
371 sub ProcessOptions { | |
372 %OptionsInfo = (); | |
373 | |
374 $OptionsInfo{Mode} = $Options{mode}; | |
375 | |
376 $OptionsInfo{CmpdsMode} = $Options{cmpdsmode}; | |
377 | |
378 $OptionsInfo{NumOfFiles} = $Options{numfiles}; | |
379 $OptionsInfo{NumOfCmpds} = $Options{numcmpds}; | |
380 | |
381 $OptionsInfo{DataField} = ''; | |
382 if ($Options{mode} =~ /^Cmpds$/i && $Options{cmpdsmode} =~ /^DataField$/i) { | |
383 if (!$Options{datafield}) { | |
384 die "Error: You must specify a value for \"-d, --DataField\" option in \"DataField\" value of \"-c, --CmpdsMode\" during \"Cmpds\" \"-m, --mode\" value. \n"; | |
385 } | |
386 $OptionsInfo{DataField} = $Options{datafield}; | |
387 } | |
388 | |
389 $OptionsInfo{OverwriteFiles} = $Options{overwrite} ? 1 : 0; | |
390 | |
391 $OptionsInfo{OutFileRoot} = $Options{root} ? $Options{root} : 0; | |
392 } | |
393 | |
394 | |
395 # Setup script usage and retrieve command line arguments specified using various options... | |
396 sub SetupScriptUsage { | |
397 | |
398 # Retrieve all the options... | |
399 %Options = (); | |
400 | |
401 $Options{cmpdsmode} = 'RootPrefix'; | |
402 $Options{mode} = 'Files'; | |
403 | |
404 $Options{numfiles} = 2; | |
405 $Options{numcmpds} = 1; | |
406 | |
407 | |
408 if (!GetOptions(\%Options, "cmpdsmode|c=s", "datafield|d=s", "help|h", "mode|m=s", "numfiles|n=i", "numcmpds=i", "overwrite|o", "root|r=s", "workingdir|w=s")) { | |
409 die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n"; | |
410 } | |
411 if ($Options{workingdir}) { | |
412 if (! -d $Options{workingdir}) { | |
413 die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n"; | |
414 } | |
415 chdir $Options{workingdir} or die "Error: Error: Couldn't chdir $Options{workingdir}: $! \n"; | |
416 } | |
417 if ($Options{cmpdsmode} !~ /^(DataField|MolName|RootPrefix)$/i) { | |
418 die "Error: The value specified, $Options{cmpdsmode}, for option \"-c, --CmpdsMode\" is not valid. Allowed values: DataField, MolName, RootPrefix\n"; | |
419 } | |
420 if ($Options{mode} !~ /^(Cmpds|Files)$/i) { | |
421 die "Error: The value specified, $Options{mode}, for option \"-m, --mode\" is not valid. Allowed values: Cmpds, Files\n"; | |
422 } | |
423 if ($Options{numfiles} < 2) { | |
424 die "Error: The value specified, $Options{numfiles}, for option \"-n --numfiles\" is not valid. Allowed values: >= 2 \n"; | |
425 } | |
426 if ($Options{numcmpds} < 1) { | |
427 die "Error: The value specified, $Options{numcmpds}, for option \"-n --numcmpds\" is not valid. Allowed values: >= 1 \n"; | |
428 } | |
429 } | |
430 | |
431 __END__ | |
432 | |
433 =head1 NAME | |
434 | |
435 SplitSDFiles.pl - Split SDFile(s) into multiple SD files | |
436 | |
437 =head1 SYNOPSIS | |
438 | |
439 SplitSDFiles.pl SDFile(s)... | |
440 | |
441 SplitSDFiles.pl [B<-c, --CmpdsMode> DataField | MolName | RootPrefix] | |
442 [B<-d, --DataField> DataFieldName] [B<-h, --help>] [B<-m, --mode> Cmpds | Files] | |
443 [B<-n, --numfiles> number] [B<--numcmpds> number] [B<-o, --overwrite>] | |
444 [B<-r, --root> rootname] [B<-w,--workingdir> dirname] SDFile(s)... | |
445 | |
446 =head1 DESCRIPTION | |
447 | |
448 Split I<SDFile(s)> into multiple SD files. Each new SDFile contains a compound | |
449 subset of similar size from the initial file. Multiple I<SDFile(s)> names are separated | |
450 by space. The valid file extensions are I<.sdf> and I<.sd>. All other file names are | |
451 ignored. All the SD files in a current directory can be specified either by I<*.sdf> | |
452 or the current directory name. | |
453 | |
454 =head1 OPTIONS | |
455 | |
456 =over 4 | |
457 | |
458 =item B<-c, --CmpdsMode> I<DataField | MolName | RootPrefix> | |
459 | |
460 This option is only used during I<Cmpds> value of <-m, --mode> option with | |
461 specified B<--numcmpds> value of 1. | |
462 | |
463 Specify how to generate new file names during I<Cmpds> value of <-m, --mode> | |
464 option: use I<SDFile(s)> datafield value or molname line for a specific compound; | |
465 generate a sequential ID using root prefix specified by B<-r, --root> option. | |
466 | |
467 Possible values: I<DataField | MolName | RootPrefix | RootPrefix>. | |
468 Default: I<RootPrefix>. | |
469 | |
470 For empty I<MolName> and I<DataField> values during these specified modes, file | |
471 name is automatically generated using I<RootPrefix>. | |
472 | |
473 For I<RootPrefix> value of B<-c, --CmpdsMode> option, new file names are | |
474 generated using by appending compound record number to value of B<-r, --root> option. | |
475 For example: I<RootName>Cmd<RecordNumber>.sdf. | |
476 | |
477 Allowed characters in file names are: a-zA-Z0-9_. All other characters in datafield | |
478 values, molname line, and root prefix are ignore during generation of file names. | |
479 | |
480 =item B<-d, --DataField> I<DataFieldName> | |
481 | |
482 This option is only used during I<DataField> value of <-c, --CmpdsMode> option. | |
483 | |
484 Specify I<SDFile(s)> datafield label name whose value is used for generation of new file | |
485 for a specific compound. Default value: I<None>. | |
486 | |
487 =item B<-h, --help> | |
488 | |
489 Print this help message. | |
490 | |
491 =item B<-m, --mode> I<Cmpds | Files> | |
492 | |
493 Specify how to split I<SDFile(s)>: split into files with each file containing specified | |
494 number of compounds or split into a specified number of files. | |
495 | |
496 Possible values: I<Cmpds | Files>. Default: I<Files>. | |
497 | |
498 For I<Cmpds> value of B<-m, --mode> option, value of B<--numcmpds> option | |
499 determines the number of new files. And value of B<-n, --numfiles> option is | |
500 used to figure out the number of new files for I<Files> value of B<-m, --mode> option. | |
501 | |
502 =item B<-n, --numfiles> I<number> | |
503 | |
504 Number of new files to generate for each I<SDFile(s)>. Default: I<2>. | |
505 | |
506 This value is only used during I<Files> value of B<-m, --mode> option. | |
507 | |
508 =item B<--numcmpds> I<number> | |
509 | |
510 Number of compounds in each new file corresponding to each I<SDFile(s)>. | |
511 Default: I<1>. | |
512 | |
513 This value is only used during I<Cmpds> value of B<-m, --mode> option. | |
514 | |
515 =item B<-o, --overwrite> | |
516 | |
517 Overwrite existing files. | |
518 | |
519 =item B<-r, --root> I<rootname> | |
520 | |
521 New SD file names are generated using the root: <Root>Part<Count>.sdf. | |
522 Default new file names: <InitialSDFileName> Part<Count>.sdf. This option | |
523 is ignored for multiple input files. | |
524 | |
525 =item B<-w,--workingdir> I<dirname> | |
526 | |
527 Location of working directory. Default: current directory. | |
528 | |
529 =back | |
530 | |
531 =head1 EXAMPLES | |
532 | |
533 To split each SD file into 5 new SD files, type: | |
534 | |
535 % SplitSDFiles.pl -n 5 -o Sample1.sdf Sample2.sdf | |
536 % SplitSDFiles.pl -n 5 -o *.sdf | |
537 | |
538 To split Sample1.sdf into 10 new NewSample*.sdf files, type: | |
539 | |
540 % SplitSDFiles.pl -m Files -n 10 -r NewSample -o Sample1.sdf | |
541 | |
542 To split Sample1.sdf into new NewSample*.sdf files containing maximum of 5 compounds | |
543 in each file, type: | |
544 | |
545 % SplitSDFiles.pl -m Cmpds --numcmpds 5 -r NewSample -o Sample1.sdf | |
546 | |
547 To split Sample1.sdf into new SD files containing one compound each with new file | |
548 names corresponding to molname line, type: | |
549 | |
550 % SplitSDFiles.pl -m Cmpds --numcmpds 1 -c MolName -o Sample1.sdf | |
551 | |
552 To split Sample1.sdf into new SD files containing one compound each with new file | |
553 names corresponding to value of datafield MolID, type: | |
554 | |
555 % SplitSDFiles.pl -m Cmpds --numcmpds 1 -c DataField -d MolID | |
556 -o Sample1.sdf | |
557 | |
558 =head1 AUTHOR | |
559 | |
560 Manish Sud <msud@san.rr.com> | |
561 | |
562 =head1 SEE ALSO | |
563 | |
564 InfoSDFiles.pl, JoinSDFiles.pl, MolFilesToSD.pl, SDToMolFiles.pl | |
565 | |
566 =head1 COPYRIGHT | |
567 | |
568 Copyright (C) 2015 Manish Sud. All rights reserved. | |
569 | |
570 This file is part of MayaChemTools. | |
571 | |
572 MayaChemTools is free software; you can redistribute it and/or modify it under | |
573 the terms of the GNU Lesser General Public License as published by the Free | |
574 Software Foundation; either version 3 of the License, or (at your option) | |
575 any later version. | |
576 | |
577 =cut |