comparison mayachemtool/mayachemtools/bin/DBSchemaTablesToTextFiles.pl @ 0:68300206e90d draft default tip

Uploaded
author deepakjadmin
date Thu, 05 Nov 2015 02:41:30 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:68300206e90d
1 #!/usr/bin/perl -w
2 #
3 # $RCSfile: DBSchemaTablesToTextFiles.pl,v $
4 # $Date: 2015/02/28 20:46:19 $
5 # $Revision: 1.31 $
6 #
7 # Author: Manish Sud <msud@san.rr.com>
8 #
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
10 #
11 # This file is part of MayaChemTools.
12 #
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
14 # the terms of the GNU Lesser General Public License as published by the Free
15 # Software Foundation; either version 3 of the License, or (at your option) any
16 # later version.
17 #
18 # MayaChemTools is distributed in the hope that it will be useful, but without
19 # any warranty; without even the implied warranty of merchantability of fitness
20 # for a particular purpose. See the GNU Lesser General Public License for more
21 # details.
22 #
23 # You should have received a copy of the GNU Lesser General Public License
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
26 # Boston, MA, 02111-1307, USA.
27 #
28
29 use strict;
30 use FindBin; use lib "$FindBin::Bin/../lib";
31 use Getopt::Long;
32 use File::Basename;
33 use Text::ParseWords;
34 use Benchmark;
35 use FileUtil;
36 use TextUtil;
37 use DBUtil;
38
39 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime);
40
41 # Autoflush STDOUT
42 $| = 1;
43
44 # Starting message...
45 $ScriptName = basename($0);
46 print "\n$ScriptName: Starting...\n\n";
47 $StartTime = new Benchmark;
48
49 # Get the options and setup script...
50 SetupScriptUsage();
51 if ($Options{help} || @ARGV < 1) {
52 die GetUsageFromPod("$FindBin::Bin/$ScriptName");
53 }
54
55 my($DBDriver, $DBHost, $DBName, $DBUser, $DBPassword, $DBMode, $ExportDataLabels, $ExportLOBs, $OutDelim, $OutQuote, $ReplaceNullStr);
56 ProcessOptions();
57
58 # Connect to database...
59 my($DBHandle);
60 print "Connecting to $DBDriver:database=$DBName as $DBUser...\n";
61 $DBHandle = DBConnect($DBDriver, $DBName, $DBHost, $DBUser, $DBPassword);
62
63 # Collect input parameters information...
64 print "Checking input parameter(s)...\n";
65 my(@DBSchemaNames, @DBTableNames, @DBSQLStatements, @DBTextFiles, $SingleTextFileName);
66 RetrieveDBInfo();
67
68 if ($Options{numoutfiles} =~ /^single$/i ) {
69 GenerateSingleTextFile();
70 }
71 else {
72 GenerateMultipleTextFiles();
73 }
74 print "\nDisconnecting from $DBDriver:database=$DBName...\n";
75 DBDisconnect($DBHandle);
76
77 print "$ScriptName:Done...\n\n";
78
79 $EndTime = new Benchmark;
80 $TotalTime = timediff ($EndTime, $StartTime);
81 print "Total time: ", timestr($TotalTime), "\n";
82
83 ###############################################################################
84
85 # Generate multiple text files...
86 sub GenerateMultipleTextFiles {
87 my($Index, $TextFile, $SQL);
88 if (@DBTextFiles > 1) {
89 print "Generating text files...\n";
90 }
91 TEXTFILE: for $Index (0 .. $#DBTextFiles) {
92 $TextFile = $DBTextFiles[$Index];
93 $SQL = $DBSQLStatements[$Index];
94
95 if (@DBTextFiles > 1) {
96 print "\nGenerating text file $TextFile...\n";
97 }
98 else {
99 print "Generating text file $TextFile...\n";
100 }
101
102 if (!open TEXTFILE, ">$TextFile") {
103 warn "Warning: Abandoning $TextFile generation: Couldn't open it: $! \n";
104 next TEXTFILE;
105 }
106 print "Processing SQL statement \"$SQL\"...\n";
107
108 if (DBSQLToTextFile($DBHandle, $SQL, \*TEXTFILE, $OutDelim, $OutQuote, $ExportDataLabels, $ExportLOBs, $ReplaceNullStr)) {
109 warn "Warning: Abandoning $TextFile generation...\n";
110 next TEXTFILE;
111 }
112 close TEXTFILE;
113 }
114 }
115
116 # Generate single text file...
117 sub GenerateSingleTextFile {
118 my($Index, $TextFile, $SQL, $SchemaName, $TableName);
119
120 open TEXTFILE, ">$SingleTextFileName" or die "Couldn't open $SingleTextFileName: $! \n";
121 print "Generating text file $SingleTextFileName...\n";
122
123 SQL: for $Index (0 .. $#DBSQLStatements) {
124 $SchemaName = $DBSchemaNames[$Index];
125 $TableName = $DBTableNames[$Index];
126 $SQL = $DBSQLStatements[$Index];
127
128 $TableName = qq($SchemaName.$TableName);
129 $TableName = QuoteAWord($TableName, $OutQuote);
130 print TEXTFILE "\n\n$TableName\n";
131
132 if (DBSQLToTextFile($DBHandle, $SQL, \*TEXTFILE, $OutDelim, $OutQuote, $ExportDataLabels, $ExportLOBs, $ReplaceNullStr)) {
133 warn "Warning: Abandoning table $TableName ...\n";
134 next SQL;
135 }
136 }
137 close TEXTFILE;
138 }
139
140 # Collect input parameters information...
141 sub RetrieveDBInfo {
142 my($FileExt, $UserFileName, $FileDBPrefix);
143
144 # Setup out file ext...
145 $FileExt = ($Options{outdelim} =~ /^tab$/i) ? "tsv" : "csv";
146 $FileDBPrefix = ($DBMode =~ /^exportdata$/i) ? "Export" : "Describe";
147
148 # Get user specified information...
149 $UserFileName = "";
150 if ($Options{root} && (@ARGV == 1)) {
151 my($RootFileDir, $RootFileName, $RootFileExt) = ParseFileName($Options{root});
152 if ($RootFileName && $RootFileExt) {
153 $UserFileName = $RootFileName;
154 }
155 else {
156 $UserFileName = $Options{root};
157 }
158 }
159 $SingleTextFileName = "";
160 if ($Options{numoutfiles} =~ /^single$/i) {
161 $SingleTextFileName = $UserFileName ? $UserFileName : ("$FileDBPrefix" . "SchemaTables");
162 $SingleTextFileName .= ".$FileExt";
163 }
164
165 # For each input schema name: collect all the table names, set up appropriate
166 # SQL statements, and output file names...
167 #
168 my($SchemaName, $SQL, $FileName, @SchemaTableNames, $TableName);
169 @DBSchemaNames = ();
170 @DBTableNames = ();
171 @DBSQLStatements = ();
172 @DBTextFiles = ();
173 @SchemaTableNames = ();
174 SCHEMANAME: for $SchemaName (@ARGV) {
175 $SchemaName = ($DBDriver =~ /^(mysql|Oracle)$/i) ? uc($SchemaName) : $SchemaName;
176 if (!(@SchemaTableNames = DBFetchSchemaTableNames($DBDriver, $DBHandle, $SchemaName))) {
177 warn "Warning: Ignoring schema $SchemaName...\n";
178 next SCHEMANAME;
179 }
180 # Prepare SQL statement for each table.
181 for $TableName (@SchemaTableNames) {
182 push @DBSchemaNames, $SchemaName;
183 push @DBTableNames, $TableName;
184 $SQL = ($DBMode =~ /^exportdata$/i) ? DBSetupSelectSQL($DBDriver, $TableName, $SchemaName) : DBSetupDescribeSQL($DBDriver, $TableName, $SchemaName);
185 push @DBSQLStatements, $SQL;
186 if ($Options{numoutfiles} =~ /^multiple$/i) {
187 $FileName = $UserFileName ? ("$UserFileName" . "$TableName") : ("$FileDBPrefix" . "$SchemaName" . "$TableName");
188 $FileName .= ".$FileExt";
189 if (!$Options{overwrite}) {
190 if (-e $FileName) {
191 die "Error: The file $FileName already exists.\n";
192 }
193 }
194 push @DBTextFiles, $FileName;
195 }
196 }
197 }
198 }
199
200 # Process option values...
201 sub ProcessOptions {
202
203 $DBDriver = $Options{dbdriver} ? $Options{dbdriver} : (exists $ENV{DBI_DRIVER} ? $ENV{DBI_DRIVER} : "") ;
204 if ($DBDriver) {
205 if ($DBDriver =~ /^oracle$/i) {
206 $DBDriver = "Oracle";
207 }
208 elsif ($DBDriver =~ /^mysql$/i) {
209 $DBDriver = "mysql";
210 }
211 elsif ($DBDriver =~ /^(Pg|Postgres)$/i) {
212 $DBDriver = "Pg";
213 }
214 else {
215 if ($Options{dbdriver}) {
216 die "Error: The value specified, $DBDriver, for option \"-d --dbdriver\" is not valid. Allowed values: MySQL, Oracle, Postgres or Pg\n";
217 }
218 else {
219 die "Error: The value specified, $DBDriver, using environment variable DBI_DRIVER not valid. Allowed values: MySQL, Oracle, Postgres or Pg\n";
220 }
221 }
222 }
223 else {
224 $DBDriver = "mysql";
225 }
226 $DBHost = $Options{dbhost} ? $Options{dbhost} : (exists $ENV{DBI_HOST} ? $ENV{DBI_HOST} : "127.0.0.1");
227 $DBName = $Options{dbname} ? $Options{dbname} : (exists $ENV{DBI_NAME} ? $ENV{DBI_NAME} : "");
228 if (!$DBName) {
229 if ($DBDriver =~ /^mysql$/i) {
230 $DBName = "mysql";
231 }
232 }
233 $DBUser = $Options{dbusername} ? $Options{dbusername} : (exists $ENV{DBI_USER} ? $ENV{DBI_USER} : "") ;
234 if (!$DBUser) {
235 die "Error: No database username specified. Use \"--dbusername\" option or environment variable DBI_USER to enter a valid value.\n";
236 }
237 $DBPassword = $Options{dbpassword} ? $Options{dbpassword} : (exists $ENV{DBI_PASS} ? $ENV{DBI_PASS} : "") ;
238 if (!$DBPassword) {
239 die "Error: No database password specified. Use \"--dbpassword\" option or environment variable DBI_PASS to enter a valid value.\n";
240 }
241 $DBMode = $Options{mode};
242 $ExportLOBs = ($Options{exportlobs} =~ /^yes$/i) ? 1 : 0;
243 $ExportDataLabels = ($DBMode =~ /^describetable$/i) ? 1 : (($Options{exportdatalabels} =~ /^yes$/i) ? 1 : 0);
244
245 $OutDelim = ($Options{outdelim} =~ /^tab$/i ) ? "\t" : (($Options{outdelim} =~ /^semicolon$/i) ? "\;" : "\,");
246 $OutQuote = ($Options{quote} =~ /^yes$/i) ? 1 : 0;
247
248 $ReplaceNullStr = (defined($Options{replacenullstr}) && length($Options{replacenullstr})) ? $Options{replacenullstr} : "";
249 }
250
251 # Setup script usage and retrieve command line arguments specified using various options...
252 sub SetupScriptUsage {
253
254 # Retrieve all the options...
255 %Options = ();
256 $Options{mode} = "exportdata";
257 $Options{exportlobs} = "no";
258 $Options{exportdatalabels} = "yes";
259 $Options{numoutfiles} = "single";
260 $Options{outdelim} = "comma";
261 $Options{quote} = "yes";
262
263 if (!GetOptions(\%Options, "dbdriver|d=s", "dbhost=s", "dbname=s", "dbpassword=s", "dbusername=s", "exportdatalabels=s", "exportlobs=s", "help|h", "mode|m=s", "numoutfiles|n=s", "outdelim=s", "overwrite|o", "quote|q=s", "root|r=s", "replacenullstr=s", "workingdir|w=s")) {
264 die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n";
265 }
266 if ($Options{workingdir}) {
267 if (! -d $Options{workingdir}) {
268 die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n";
269 }
270 chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n";
271 }
272 if ($Options{exportdatalabels} !~ /^(yes|no)$/i) {
273 die "Error: The value specified, $Options{exportlobs}, for option \"--exportdatalabels\" is not valid. Allowed values: yes or no\n";
274 }
275 if ($Options{exportlobs} !~ /^(yes|no)$/i) {
276 die "Error: The value specified, $Options{exportlobs}, for option \"--exportlobs\" is not valid. Allowed values: yes or no\n";
277 }
278 if ($Options{numoutfiles} !~ /^(single|multiple)$/i) {
279 die "Error: The value specified, $Options{mode}, for option \"-n --numoutfiles\" is not valid. Allowed values: single or multiple\n";
280 }
281 if ($Options{mode} !~ /^(exportdata|describetable)$/i) {
282 die "Error: The value specified, $Options{mode}, for option \"-m --mode\" is not valid. Allowed values: exportdata or describetable\n";
283 }
284 if ($Options{outdelim} !~ /^(comma|semicolon|tab)$/i) {
285 die "Error: The value specified, $Options{outdelim}, for option \"--outdelim\" is not valid. Allowed values: comma, tab, or semicolon\n";
286 }
287 if ($Options{quote} !~ /^(yes|no)$/i) {
288 die "Error: The value specified, $Options{quote}, for option \"-q --quote\" is not valid. Allowed values: yes or no\n";
289 }
290 }
291
292 __END__
293
294 =head1 NAME
295
296 DBSchemaTablesToTextFiles.pl - Export table data from database SchemaName(s) into CSV/TSV text files
297
298 =head1 SYNOPSIS
299
300 DBSchemaTablesToTextFiles.pl SchemaName(s)...
301
302 DBSchemaTablesToTextFiles.pl [B<-d, --dbdriver> mysql | Oracle| Postgres or Pg] [B<--dbhost > hostname]
303 [B<--dbname> databasename] [B<--dbpassword> password] [B<--dbusername> username]
304 [B<--exportdatalabels> yes | no] [B<--exportlobs> yes | no] [B<-h, --help>]
305 [B<-m, --mode> exportdata | describetable] [B<-n, --numoutfilesmode> single | multiple]
306 [B<-o, --overwrite>] [B<--outdelim> comma | tab | semicolon]
307 [B<-q, --quote> yes | no] [B<-r, --root> rootname] [B<--replacenullstr string>]
308 [B<-w --workingdir> dirname] SchemaName(s)...
309
310 =head1 DESCRIPTION
311
312 Export table data from database SchemaName(s) into CSV/TSV text files. Use B<-n --numoutfiles>
313 option to control the number of text files generated for a database schema.
314
315 =head1 OPTIONS
316
317 =over 4
318
319 =item B<-d, --dbdriver> I<mysql | Oracle | Postgres or Pg>
320
321 Database driver name. Possible values: I<mysql, Oracle, Postgres or Pg>. Default: I<MySQL> or value of
322 environment variable DBI_DRIVER. This script has only been tested with MySQL, Oracle
323 and PostgreSQL drivers.
324
325 =item B<--dbhost > I<hostname>
326
327 Database host name. Default: I<127.0.0.1> for both MySQL and Oracle. For remote
328 databases, specify complete remote host domain: I<dbhostname.org> or something
329 like it.
330
331 =item B<--dbname> I<databasename>
332
333 Database name. Default: mysql for MySQL, postgres for PostgreSQL and none for Oracle.
334 For connecting to local/remote Oracle databases, this value can be left undefined assuming
335 B<--dbhost> is correctly specified.
336
337 =item B<--dbpassword> I<password>
338
339 Database user password. Default: I<none> and value of environment variable DBI_PASS
340 is used for connecting to database.
341
342 =item B<--dbusername> I<username>
343
344 Database user name. Default: I<none> and value of environment variable DBI_USER is
345 used for connecting to database.
346
347 =item B<--exportdatalabels> I<yes | no>
348
349 This option is mode specific and controls exporting of column data labels during
350 exportdata mode. Possible values: I<yes or no>. Default: I<yes>.
351
352 =item B<--exportlobs> I<yes | no>
353
354 This option is mode specific and controls exporting of CLOB/BLOB or BYTEA data columns during
355 exportdata mode. Possible values: I<yes or no>. Default: I<no>.
356
357 =item B<-h, --help>
358
359 Print this help message.
360
361 =item B<-m, --mode> I<exportdata | describetable>
362
363 Data selection criterion from database. Possible values: I<exportdata or describetable>.
364 Default value: I<exportdata>.
365
366 =item B<-n, --numoutfilesmode> I<single | multiple>
367
368 Number of CSV/TSV output files to generate: combine output into one file or generate
369 a different file for each table in a schema. Possible values: I<single or multiple>. Default:
370 I<single>.
371
372 In a single output file, data for different tables is separated by a blank line.
373
374 Single outfile option in I<exportdata> mode is quite useful for exporting data from all tables
375 in specifed schemas to one file which can be used for migrating data to another database
376 or simply provide a backup of data; during I<describetable> mode, it provides a means to
377 collect information about columns of all schema tables which can help in creation of these
378 tables on a different database server.
379
380 =item B<-o, --overwrite>
381
382 Overwrite existing files.
383
384 =item B<--outdelim> I<comma | tab | semicolon>
385
386 Output text file delimiter. Possible values: I<comma, tab, or semicolon>.
387 Default value: I<comma>
388
389 =item B<-q, --quote> I<yes | no>
390
391 Put quotes around column values in output text file. Possible values: I<yes or
392 no>. Default value: I<yes>.
393
394 =item B<-r, --root> I<rootname>
395
396 New file name is generated using the root:<Root>.<Ext> and <Root><TableName>.<Ext>
397 for I<single> and I<multiple> B<-n --numoutfiles> option values. Default file name for
398 I<single> B<-n --numoutfiles> option value: <Mode>SchemaTables.<Ext>. Default file
399 names for I<multiple> B<-n --numoutfiles> value: <Mode><SchemaName><TableName>.<Ext>.
400 Based on B<-m --mode> option, I<Export> or I<Describe> <Mode> value is used. The csv and tsv
401 <Ext> values are used for comma/semicolon, and tab delimited text files respectively. This option is
402 ignored for multiple input schema names.
403
404 =item B<--replacenullstr> I<string>
405
406 Replace NULL or undefined row values with specified value. Default: I<none>.
407
408 For importing output text files into MySQL database using "load data local infile '<tablename>.tsv'
409 into table <tablename>" command, use I<--raplacenullstr "NULL"> in conjunction with I<--exportdatalabels no>,
410 I<--quote no>, and I<--outdelim tab> options: it'll generate files for direct import into MySQL assuming
411 tables already exists.
412
413 =item B<-w --workingdir> I<dirname>
414
415 Location of working directory. Default: current directory.
416
417 =back
418
419 =head1 EXAMPLES
420
421 To export data in all tables from mysql schema on a MySQL server running on a local machine
422 using username/password from DBI_USER and DBI_PASS environmental variables, type:
423
424 % DBSchemaTablesToTextFiles.pl mysql
425
426 To describe all tables in mysql and test schemas on a MySQL server running on a remote machine
427 using explicit username/password and capturing the ouput into a DescribeTables.csv file, type:
428
429 % DBSchemaTablesToTextFiles.pl --dbdriver mysql --dbuser <name>
430 --dbpassword <pasword> --dbname mysql --dbhost
431 <mysqlhostname.org> -r DescribeTable -m describetable
432 -o mysql test
433
434 To describe all tables in SCOTT schema in Oracle running on a remote machine using explicit
435 username/password and capturing the ouput into a DescribeAllTable.tsv file, type:
436
437 % DBSchemaTablesToTextFiles.pl --dbdriver Oracle --dbuser <name>
438 --dbpassword <pasword> --dbhost <oraclehostname.com>
439 -r DescribeAllTable -m describetable --outdelim tab --quote no
440 -o SCOTT
441
442 To export data in all tables in mysql and test schemas on a MySQL server running at a local
443 machine using explicit username/password and capturing the data in TSV file for each table with empty
444 values substitued with NULL and clob/blob data, type:
445
446 % DBSchemaTablesToTextFiles.pl --dbdriver Oracle --dbuser <name>
447 --dbpassword <pasword> -r ExportTables --outdelim tab --quote no
448 --replacenullstr "NULL" -m exportdata --exportlobs no --numoutfiles
449 multiple -o user user_info
450
451 =head1 AUTHOR
452
453 Manish Sud <msud@san.rr.com>
454
455 =head1 SEE ALSO
456
457 DBSQLToTextFiles.pl, DBTablesToTextFiles.pl
458
459 =head1 COPYRIGHT
460
461 Copyright (C) 2015 Manish Sud. All rights reserved.
462
463 This file is part of MayaChemTools.
464
465 MayaChemTools is free software; you can redistribute it and/or modify it under
466 the terms of the GNU Lesser General Public License as published by the Free
467 Software Foundation; either version 3 of the License, or (at your option)
468 any later version.
469
470 =cut