comparison mayachemtools/bin/DBSQLToTextFiles.pl @ 0:73ae111cf86f draft

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 11:55:01 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:73ae111cf86f
1 #!/usr/bin/perl -w
2 #
3 # $RCSfile: DBSQLToTextFiles.pl,v $
4 # $Date: 2015/02/28 20:46:19 $
5 # $Revision: 1.32 $
6 #
7 # Author: Manish Sud <msud@san.rr.com>
8 #
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
10 #
11 # This file is part of MayaChemTools.
12 #
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
14 # the terms of the GNU Lesser General Public License as published by the Free
15 # Software Foundation; either version 3 of the License, or (at your option) any
16 # later version.
17 #
18 # MayaChemTools is distributed in the hope that it will be useful, but without
19 # any warranty; without even the implied warranty of merchantability of fitness
20 # for a particular purpose. See the GNU Lesser General Public License for more
21 # details.
22 #
23 # You should have received a copy of the GNU Lesser General Public License
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
26 # Boston, MA, 02111-1307, USA.
27 #
28
29 use strict;
30 use FindBin; use lib "$FindBin::Bin/../lib";
31 use Getopt::Long;
32 use File::Basename;
33 use Text::ParseWords;
34 use Benchmark;
35 use FileUtil;
36 use TextUtil;
37 use DBUtil;
38
39 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime);
40
41 # Autoflush STDOUT
42 $| = 1;
43
44 # Starting message...
45 $ScriptName = basename($0);
46 print "\n$ScriptName: Starting...\n\n";
47 $StartTime = new Benchmark;
48
49 # Get the options and setup script...
50 SetupScriptUsage();
51 if ($Options{help} || @ARGV < 1) {
52 die GetUsageFromPod("$FindBin::Bin/$ScriptName");
53 }
54
55 my($DBDriver, $DBHost, $DBName, $DBUser, $DBPassword, $DBMode, $ExportDataLabels, $ExportLOBs, $OutDelim, $OutQuote, $ReplaceNullStr);
56 ProcessOptions();
57
58 # Collect input parameters information...
59 print "Checking input parameter(s)...\n";
60 my(@DBSQLStatements, @DBTextFiles);
61 RetrieveDBInfo();
62
63 # Connect to database...
64 my($DBHandle);
65 print "Connecting to $DBDriver:database=$DBName as $DBUser...\n";
66 $DBHandle = DBConnect($DBDriver, $DBName, $DBHost, $DBUser, $DBPassword);
67
68 # Generate text files...
69 if (@DBTextFiles > 1) {
70 print "Generating text files...\n";
71 }
72 my($Index, $TextFile, $SQL);
73 TEXTFILE: for $Index (0 .. $#DBTextFiles) {
74 $TextFile = $DBTextFiles[$Index];
75 $SQL = $DBSQLStatements[$Index];
76
77 if (@DBTextFiles > 1) {
78 print "\nGenerating text file $TextFile...\n";
79 }
80 else {
81 print "Generating text file $TextFile...\n";
82 }
83 print "Processing SQL statement \"$SQL\"...\n";
84
85 if (!open TEXTFILE, ">$TextFile") {
86 warn "Warning: Abandoning $TextFile generation: Couldn't open it: $! \n";
87 next TEXTFILE;
88 }
89
90 if (DBSQLToTextFile($DBHandle, $SQL, \*TEXTFILE, $OutDelim, $OutQuote, $ExportDataLabels, $ExportLOBs, $ReplaceNullStr)) {
91 warn "Warning: Abandoning $TextFile generation...\n";
92 next TEXTFILE;
93 }
94 close TEXTFILE;
95 }
96 print "\nDisconnecting from $DBDriver:database=$DBName...\n";
97 DBDisconnect($DBHandle);
98
99 print "$ScriptName:Done...\n\n";
100
101 $EndTime = new Benchmark;
102 $TotalTime = timediff ($EndTime, $StartTime);
103 print "Total time: ", timestr($TotalTime), "\n";
104
105 ###############################################################################
106
107 # Collect input parameters information...
108 sub RetrieveDBInfo {
109 my($FileExt, $UserFileName);
110
111 # Setup out file ext...
112 $FileExt = ($Options{outdelim} =~ /^tab$/i) ? "tsv" : "csv";
113
114 # Get user specified information...
115 if ($Options{root} && (@ARGV == 1)) {
116 my($RootFileDir, $RootFileName, $RootFileExt) = ParseFileName($Options{root});
117 if ($RootFileName && $RootFileExt) {
118 $UserFileName = $RootFileName;
119 }
120 else {
121 $UserFileName = $Options{root};
122 }
123 }
124
125 my($Param, $SQL, $SQLNo, $FileName);
126 # Go over all the input parameters...
127 @DBSQLStatements = ();
128 @DBTextFiles = ();
129 $SQLNo = 0;
130 PARAM: for $Param (@ARGV) {
131 if ($DBMode =~ /^SQLStatement$/i) {
132 $SQLNo++;
133 $SQL = $Param;
134 $FileName = ($Options{root} && (@ARGV == 1)) ? $UserFileName : ("SQLStatement" . "$SQLNo");
135 $FileName .= ".$FileExt";
136 if (!$Options{overwrite}) {
137 if (-e $FileName) {
138 die "Error: The file $FileName already exists.\n";
139 }
140 }
141 push @DBSQLStatements, $SQL;
142 push @DBTextFiles, $FileName;
143 }
144 elsif ($DBMode =~ /^SQLFile$/i) {
145 # Read SQL file...
146 my($SQLFile) = $Param;
147 if (! -e $Param) {
148 warn "Warning: Ignoring file $SQLFile: It doesn't exist\n";
149 next PARAM;
150 }
151 if (!open SQLFILE, "$SQLFile" ) {
152 warn "Warning: Ignoring file $SQLFile: Couldn't open it: $! \n";
153 next PARAM;
154 }
155 my($Line, $SQLString);
156 $SQLString = "";
157 LINE: while ($Line = GetTextLine(\*SQLFILE)) {
158 # Ignore comments line...
159 if ($Line =~ /^#/ || $Line =~ /^-/) {
160 next LINE;
161 }
162 $SQLString .= $Line;
163 }
164 close SQLFILE;
165 # Extract select SQL statements...
166 my($SQLFileDir, $SQLFileName, $SQLFileExt) = ParseFileName($SQLFile);
167 my(@SQLSplits) = split "\;", $SQLString;
168 $SQLNo = 0;
169 SQLSPLIT: for $SQL (@SQLSplits) {
170 $SQLNo++;
171 $FileName = ($Options{root} && (@ARGV == 1)) ? ("$UserFileName" . "$SQLNo") : ("$SQLFileName" . "SQLStatement" . "$SQLNo");
172 $FileName .= ".$FileExt";
173 if (!$Options{overwrite}) {
174 if (-e $FileName) {
175 die "Error: The file $FileName already exists.\n";
176 }
177 }
178 push @DBSQLStatements, $SQL;
179 push @DBTextFiles, $FileName;
180 }
181 }
182 }
183 }
184
185 # Process option values...
186 sub ProcessOptions {
187
188 $DBDriver = $Options{dbdriver} ? $Options{dbdriver} : (exists $ENV{DBI_DRIVER} ? $ENV{DBI_DRIVER} : "") ;
189 if ($DBDriver) {
190 if ($DBDriver =~ /^Oracle$/i) {
191 $DBDriver = "Oracle";
192 }
193 elsif ($DBDriver =~ /^mysql$/i) {
194 $DBDriver = "mysql";
195 }
196 elsif ($DBDriver =~ /^(Pg|Postgres)$/i) {
197 $DBDriver = "Pg";
198 }
199 else {
200 if ($Options{dbdriver}) {
201 die "Error: The value specified, $DBDriver, for option \"-d --dbdriver\" is not valid. Allowed values: MySQL, Oracle, Postgres or Pg\n";
202 }
203 else {
204 die "Error: The value specified, $DBDriver, using environment variable DBI_DRIVER not valid. Allowed values: MySQL, Oracle, Postgres or Pg\n";
205 }
206 }
207 }
208 else {
209 $DBDriver = "mysql";
210 }
211 $DBHost = $Options{dbhost} ? $Options{dbhost} : (exists $ENV{DBI_HOST} ? $ENV{DBI_HOST} : "127.0.0.1");
212 $DBName = $Options{dbname} ? $Options{dbname} : (exists $ENV{DBI_NAME} ? $ENV{DBI_NAME} : "");
213 if (!$DBName) {
214 if ($DBDriver =~ /^mysql$/i) {
215 $DBName = "mysql";
216 }
217 elsif ($DBDriver =~ /^pg|Postgres$/i) {
218 $DBName = "postgres";
219 }
220 }
221 $DBUser = $Options{dbusername} ? $Options{dbusername} : (exists $ENV{DBI_USER} ? $ENV{DBI_USER} : "") ;
222 if (!$DBUser) {
223 die "Error: No database username specified. Use \"--dbusername\" option or environment variable DBI_USER to enter a valid value.\n";
224 }
225 $DBPassword = $Options{dbpassword} ? $Options{dbpassword} : (exists $ENV{DBI_PASS} ? $ENV{DBI_PASS} : "") ;
226 if (!$DBPassword) {
227 die "Error: No database password specified. Use \"--dbpassword\" option or environment variable DBI_PASS to enter a valid value.\n";
228 }
229 $DBMode = $Options{mode};
230 $ExportLOBs = ($Options{exportlobs} =~ /^yes$/) ? 1 : 0;
231 $ExportDataLabels = ($Options{exportdatalabels} =~ /^yes$/i) ? 1 : 0;
232
233 $OutDelim = ($Options{outdelim} =~ /^tab$/i ) ? "\t" : (($Options{outdelim} =~ /^semicolon$/i) ? "\;" : "\,");
234 $OutQuote = ($Options{quote} =~ /^yes$/i) ? 1 : 0;
235
236 $ReplaceNullStr = (defined($Options{replacenullstr}) && length($Options{replacenullstr})) ? $Options{replacenullstr} : "";
237 }
238
239 # Setup script usage and retrieve command line arguments specified using various options...
240 sub SetupScriptUsage {
241
242 # Retrieve all the options...
243 %Options = ();
244 $Options{mode} = "SQLStatement";
245 $Options{exportlobs} = "no";
246 $Options{exportdatalabels} = "yes";
247 $Options{outdelim} = "comma";
248 $Options{quote} = "yes";
249
250 if (!GetOptions(\%Options, "dbdriver|d=s", "dbhost=s", "dbname=s", "dbpassword=s", "dbusername=s", "exportdatalabels=s", "exportlobs=s", "help|h", "mode|m=s", "outdelim=s", "overwrite|o", "quote|q=s", "root|r=s", "replacenullstr=s", "workingdir|w=s")) {
251 die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n";
252 }
253 if ($Options{workingdir}) {
254 if (! -d $Options{workingdir}) {
255 die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n";
256 }
257 chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n";
258 }
259 if ($Options{exportdatalabels} !~ /^(yes|no)$/i) {
260 die "Error: The value specified, $Options{exportlobs}, for option \"--exportdatalabels\" is not valid. Allowed values: yes or no\n";
261 }
262 if ($Options{exportlobs} !~ /^(yes|no)$/i) {
263 die "Error: The value specified, $Options{exportlobs}, for option \"--exportlobs\" is not valid. Allowed values: yes or no\n";
264 }
265 if ($Options{mode} !~ /^(SQLStatement|SQLFile)$/i) {
266 die "Error: The value specified, $Options{mode}, for option \"-m --mode\" is not valid. Allowed values: SQLStatement or SQLFile\n";
267 }
268 if ($Options{outdelim} !~ /^(comma|semicolon|tab)$/i) {
269 die "Error: The value specified, $Options{outdelim}, for option \"--outdelim\" is not valid. Allowed values: comma, tab, or semicolon\n";
270 }
271 if ($Options{quote} !~ /^(yes|no)$/i) {
272 die "Error: The value specified, $Options{quote}, for option \"-q --quote\" is not valid. Allowed values: yes or no\n";
273 }
274 }
275
276 __END__
277
278 =head1 NAME
279
280 DBSQLToTextFiles.pl - Export data from MySQL, Oracle or PostgreSQL database into CSV/TSV text files
281
282 =head1 SYNOPSIS
283
284 DBSQLToTextFiles.pl SQLFileName(s) | SQLSelectStatement(s)...
285
286 DBSQLToTextFiles.pl [B<-d, --dbdriver> mysql | Oracle | Postgres or Pg] [B<--dbhost > hostname]
287 [B<--dbname> databasename] [B<--dbpassword> password] [B<--dbusername> username]
288 [B<--exportdatalabels> yes | no] [B<--exportlobs> yes | no] [B<-h, --help>]
289 [B<-m, --mode> SQLStatement | SQLFile] [B<-o, --overwrite>] [B<--outdelim> comma | tab | semicolon]
290 [B<-q, --quote> yes | no] [B<-r, --root> rootname] [B<--replacenullstr string>]
291 [B<-w --workingdir> dirname] SQLFileName(s) | SQLSelectStatement(s)...
292
293 =head1 DESCRIPTION
294
295 Export data from MySQL, Oracle or PostgreSQL database into CSV/TSV text files. Based on B<-m --mode>
296 option value, two methods of data selection are availble: in line SQL select statement(s), or
297 SQL file name(s) containing SQL select statement(s). All command line parameters must
298 correspond to similar mode; mixing of parameters for different modes is not supported.
299
300 =head1 OPTIONS
301
302 =over 4
303
304 =item B<-d, --dbdriver> I<mysql | Oracle | Postgres or Pg>
305
306 Database driver name. Possible values: I<mysql, Oracle, Postgres or Pg>. Default: I<MySQL> or value of
307 environment variable DBI_DRIVER. This script has only been tested with MySQL, Oracle
308 and PostgreSQL drivers.
309
310 =item B<--dbhost > I<hostname>
311
312 Database host name. Default: I<127.0.0.1> for both MySQL, Oracle and PostgreSQL. For remote
313 databases, specify complete remote host domain: I<dbhostname.org> or something
314 like it.
315
316 =item B<--dbname> I<databasename>
317
318 Database name. Default: mysql for MySQL, postgres for PostgreSQL and none for Oracle.
319 For connecting to local/remote Oracle databases, this value can be left undefined assuming
320 B<--dbhost> is correctly specified.
321
322 =item B<--dbpassword> I<password>
323
324 Database user password. Default: I<none> and value of environment variable DBI_PASS
325 is used for connecting to database.
326
327 =item B<--dbusername> I<username>
328
329 Database user name. Default: I<none> and value of environment variable DBI_USER is
330 used for connecting to database.
331
332 =item B<--exportdatalabels> I<yes | no>
333
334 This option is mode specific and controls exporting of column data labels during
335 exportdata mode. Possible values: I<yes or no>. Default: I<yes>.
336
337 =item B<--exportlobs> I<yes | no>
338
339 This option is mode specific and controls exporting of CLOB/BLOB data columns during
340 exportdata mode. Possible values: I<yes or no>. Default: I<no>.
341
342 =item B<-h, --help>
343
344 Print this help message.
345
346 =item B<-m, --mode> I<SQLStatement | SQLFile>
347
348 Data selection criterion from database. Two different command line parameter methods
349 are available: in line SQL statement(s) specification or file name(s) containing SQL select
350 statement(s). This value determines how command line parameters are processed.
351
352 Possible values: I<SQLStatement or SQLFile>. Default value: I<SQLStatement>
353
354 In SQLFile mode, SQL file contains select statements delimited by I<;>. And the lines starting
355 with I<#> or I<-> are ignored.
356
357 =item B<-o, --overwrite>
358
359 Overwrite existing files.
360
361 =item B<--outdelim> I<comma | tab | semicolon>
362
363 Output text file delimiter. Possible values: I<comma, tab, or semicolon>
364 Default value: I<comma>.
365
366 =item B<-q, --quote> I<yes | no>
367
368 Put quotes around column values in output text file. Possible values: I<yes or
369 no>. Default value: I<yes>.
370
371 =item B<-r, --root> I<rootname>
372
373 New file name is generated using the root:<Root><No>.<Ext>. Default new file
374 file names: SQLStatement<No>.<Ext>, or <SQLFileName><StatementNo>.<Ext>.
375 The csv and tsv <Ext> values are used for comma/semicolon, and tab delimited
376 text files respectively.This option is ignored for multiple input parameters.
377
378 =item B<--replacenullstr> I<string>
379
380 Replace NULL or undefined row values with specified value. Default: I<none>
381
382 For importing output text files into MySQL database using "load data local infile '<tablename>.tsv'
383 into table <tablename>" command, use I<--raplacenullstr "NULL"> in conjunction with I<--exportdatalabels no>,
384 I<--quote no>, and I<--outdelim tab> options: it'll generate files for direct import into MySQL assuming
385 tables already exists.
386
387 =item B<-w --workingdir> I<dirname>
388
389 Location of working directory. Default: current directory.
390
391 =back
392
393 =head1 EXAMPLES
394
395 To export all data in user_info table from a MySQL server running on a local machine
396 using username/password from DBI_USER and DBI_PASS environmental variables, type:
397
398 % DBSQLToTextFiles.pl -o "select * from user_info"
399
400 To describe user table in a MySQL server running on a remote machine using explicit
401 username/password and capturing the output into a UserTable.csv file, type:
402
403 % DBSQLToTextFiles.pl --dbdriver mysql --dbuser <name> --dbpassword
404 <pasword> --dbname mysql --dbhost <mysqlhostname.org> -r UserTable
405 -m SQLStatement -o "select * from user_info"
406
407 To describe table all_tables in Oracle running on a remote machine using explicit
408 username/password and capturing the output into a AllTable.tsv file, type:
409
410 % DBSQLToTextFiles.pl --dbdriver Oracle --dbuser <name> --dbpassword
411 <pasword> --dbhost <oraclehostname.com> -r AllTable -m SQLStatement
412 --outdelim tab --quote no -o "select * from all_tables"
413
414 To run all SQL statement in a file sample.sql on a local Oracle host and capturing output
415 in a SampleSQL.csv file, type:
416
417 % DBSQLToTextFiles.pl --dbdriver Oracle --dbuser <name> --dbpassword
418 <pasword> -r SampleSQL -m SQLFile -o sample.sql
419
420 =head1 AUTHOR
421
422 Manish Sud <msud@san.rr.com>
423
424 =head1 SEE ALSO
425
426 DBSchemaTablesToTextFiles.pl, DBTablesToTextFiles.pl
427
428 =head1 COPYRIGHT
429
430 Copyright (C) 2015 Manish Sud. All rights reserved.
431
432 This file is part of MayaChemTools.
433
434 MayaChemTools is free software; you can redistribute it and/or modify it under
435 the terms of the GNU Lesser General Public License as published by the Free
436 Software Foundation; either version 3 of the License, or (at your option)
437 any later version.
438
439 =cut