MayaChemTools

   1 #!/usr/bin/perl -w
   2 #
   3 # $RCSfile: MergeTextFiles.pl,v $
   4 # $Date: 2015/02/28 20:46:20 $
   5 # $Revision: 1.40 $
   6 #
   7 # Author: Manish Sud <msud@san.rr.com>
   8 #
   9 # Copyright (C) 2015 Manish Sud. All rights reserved.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 use strict;
  30 use FindBin; use lib "$FindBin::Bin/../lib";
  31 use Getopt::Long;
  32 use File::Basename;
  33 use Text::ParseWords;
  34 use Benchmark;
  35 use FileHandle;
  36 use FileUtil;
  37 use TextUtil;
  38 
  39 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime);
  40 
  41 # Autoflush STDOUT
  42 $| = 1;
  43 
  44 # Starting message...
  45 $ScriptName = basename $0;
  46 print "\n$ScriptName:Starting...\n\n";
  47 $StartTime = new Benchmark;
  48 
  49 # Get the options and setup script...
  50 SetupScriptUsage();
  51 if ($Options{help} || @ARGV < 1) {
  52   die GetUsageFromPod("$FindBin::Bin/$ScriptName");
  53 }
  54 
  55 my(@TextFilesList);
  56 @TextFilesList = ExpandFileNames(\@ARGV, "csv tsv");
  57 
  58 if (@TextFilesList == 1) {
  59   die "Error: Specify more than one text file.\n";
  60 }
  61 
  62 # Process options...
  63 print "Processing options...\n";
  64 my(%OptionsInfo);
  65 ProcessOptions();
  66 
  67 # Setup information about input files...
  68 my(%TextFilesInfo);
  69 print "Checking input text files...\n";
  70 RetrieveTextFilesInfo();
  71 RetrieveColumnsAndKeysInfo();
  72 
  73 # Merge files...
  74 print "\nGenerating new text file $OptionsInfo{NewTextFile}...\n";
  75 MergeTextFiles();
  76 
  77 print "\n$ScriptName:Done...\n\n";
  78 
  79 $EndTime = new Benchmark;
  80 $TotalTime = timediff ($EndTime, $StartTime);
  81 print "Total time: ", timestr($TotalTime), "\n";
  82 
  83 ###############################################################################
  84 
  85 # Merge all valid Text files...
  86 sub MergeTextFiles {
  87   my($Index);
  88 
  89   open NEWTEXTFILE, ">$OptionsInfo{NewTextFile}" or die "Error: Couldn't open $OptionsInfo{NewTextFile}: $! \n";
  90 
  91   WriteNewTextFileColumnLabels(\*NEWTEXTFILE);
  92 
  93   #Open up all the files and skip coumn label line...
  94   @{$TextFilesInfo{FileHandle}} = ();
  95   for $Index (0 .. $#TextFilesList) {
  96     $TextFilesInfo{FileHandle}[$Index] = new FileHandle;
  97 
  98     open $TextFilesInfo{FileHandle}[$Index], "$TextFilesList[$Index]" or die "Error: Couldn't open $TextFilesList[$Index]: $! \n";
  99     GetTextLine($TextFilesInfo{FileHandle}[$Index]);
 100   }
 101 
 102   # Merge files...
 103   if ($OptionsInfo{Keys}) {
 104     MergeColumnValuesUsingKeys(\*NEWTEXTFILE);
 105   }
 106   else {
 107     MergeColumnValues(\*NEWTEXTFILE);
 108   }
 109 
 110   # Close all opened files...
 111   close NEWTEXTFILE;
 112   for $Index (0 .. $#TextFilesList) {
 113     close $TextFilesInfo{FileHandle}[$Index];
 114   }
 115 
 116 }
 117 
 118 # Merge all the column values...
 119 sub MergeColumnValues {
 120   my($NewTextFileRef) = @_;
 121   my($Index, $Line, $InDelim, $Value, $ColNum, @LineWords, @File1LineWords, @ColValues);
 122 
 123   while ($Line = GetTextLine($TextFilesInfo{FileHandle}[0])) {
 124     $InDelim = $TextFilesInfo{InDelim}[0];
 125     @ColValues = ();
 126 
 127     #Collect column values from first file before the merge point...
 128     @File1LineWords = quotewords($InDelim, 0, $Line);
 129     for $ColNum (@{$TextFilesInfo{File1Part1ColNums}}) {
 130       $Value = ($ColNum < @File1LineWords) ? $File1LineWords[$ColNum] : "";
 131       push @ColValues, $Value;
 132     }
 133 
 134     #Collect column values from other text files...
 135     for $Index (1 .. $#TextFilesList) {
 136       $InDelim = $TextFilesInfo{InDelim}[$Index];
 137       if ($Line = GetTextLine($TextFilesInfo{FileHandle}[$Index])) {
 138         @LineWords = quotewords($InDelim, 0, $Line);
 139         for $ColNum (@{$TextFilesInfo{ColToMerge}[$Index]}) {
 140           $Value = ($ColNum < @LineWords) ? $LineWords[$ColNum] : "";
 141           push @ColValues, $Value;
 142         }
 143       }
 144     }
 145 
 146     #Collect column labels from first file after the merge point...
 147     for $ColNum (@{$TextFilesInfo{File1Part2ColNums}}) {
 148       $Value = ($ColNum < @File1LineWords) ? $File1LineWords[$ColNum] : "";
 149       push @ColValues, $Value;
 150     }
 151 
 152     # Write it out...
 153     $Line = JoinWords(\@ColValues, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 154     print $NewTextFileRef "$Line\n";
 155   }
 156 
 157 }
 158 
 159 # Merge column values using keys...
 160 sub MergeColumnValuesUsingKeys {
 161   my($NewTextFileRef) = @_;
 162   my($Index, $InDelim, $Line, $Value, $ColNum, $KeyColNum, $KeyColValue, @LineWords, @ColValues, @File1LineWords, @TextFilesKeysToLinesMap);
 163 
 164   @TextFilesKeysToLinesMap = ();
 165 
 166   # Retrieve text lines from all the files except for the first file...
 167   for $Index (1 .. $#TextFilesList) {
 168     %{$TextFilesKeysToLinesMap[$Index]} = ();
 169 
 170     $InDelim = $TextFilesInfo{InDelim}[$Index];
 171     $KeyColNum = $TextFilesInfo{KeysToUse}[$Index];
 172 
 173     while ($Line = GetTextLine($TextFilesInfo{FileHandle}[$Index])) {
 174       @LineWords = quotewords($InDelim, 0, $Line);
 175       if ($KeyColNum < @LineWords) {
 176         $KeyColValue = $LineWords[$KeyColNum];
 177         if (length($KeyColValue)) {
 178           if (exists($TextFilesKeysToLinesMap[$Index]{$KeyColValue})) {
 179             warn "Warning: Ignoring line, $Line, in text file $TextFilesList[$Index]: Column key value, $KeyColValue, already exists\n";
 180           }
 181           else {
 182             @{$TextFilesKeysToLinesMap[$Index]{$KeyColValue}} = ();
 183             push @{$TextFilesKeysToLinesMap[$Index]{$KeyColValue}}, @LineWords;
 184           }
 185         }
 186       }
 187     }
 188   }
 189 
 190   while ($Line = GetTextLine($TextFilesInfo{FileHandle}[0])) {
 191     $InDelim = $TextFilesInfo{InDelim}[0];
 192 
 193     @ColValues = ();
 194     @File1LineWords = quotewords($InDelim, 0, $Line);
 195 
 196     $KeyColNum = $TextFilesInfo{KeysToUse}[0];
 197     $KeyColValue = $File1LineWords[$KeyColNum];
 198 
 199     #Collect column values from first file before the merge point...
 200     for $ColNum (@{$TextFilesInfo{File1Part1ColNums}}) {
 201       $Value = ($ColNum < @File1LineWords) ? $File1LineWords[$ColNum] : "";
 202       push @ColValues, $Value;
 203     }
 204 
 205     #Collect column values from other text files...
 206     for $Index (1 .. $#TextFilesList) {
 207       @LineWords = ();
 208       if (exists($TextFilesKeysToLinesMap[$Index]{$KeyColValue})) {
 209         push @LineWords, @{$TextFilesKeysToLinesMap[$Index]{$KeyColValue}};
 210       }
 211       for $ColNum (@{$TextFilesInfo{ColToMerge}[$Index]}) {
 212         $Value = ($ColNum < @LineWords) ? $LineWords[$ColNum] : "";
 213         push @ColValues, $Value;
 214       }
 215     }
 216 
 217     #Collect column labels from first file after the merge point...
 218     for $ColNum (@{$TextFilesInfo{File1Part2ColNums}}) {
 219       $Value = ($ColNum < @File1LineWords) ? $File1LineWords[$ColNum] : "";
 220       push @ColValues, $Value;
 221     }
 222 
 223     # Write it out...
 224     $Line = JoinWords(\@ColValues, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 225     print $NewTextFileRef "$Line\n";
 226   }
 227 
 228 }
 229 
 230 # Write out column labels for new merged text file...
 231 sub WriteNewTextFileColumnLabels {
 232   my($NewTextFileRef) = @_;
 233   my($Index, $Line, $ColNum, @ColLabels);
 234 
 235   #Write out column labels for the merged text file...
 236   @ColLabels = ();
 237 
 238   #Collect column labels from first file before the merge point...
 239   for $ColNum (@{$TextFilesInfo{File1Part1ColNums}}) {
 240     push @ColLabels, $TextFilesInfo{ColToMergeNumToLabelMap}[0]{$ColNum};
 241   }
 242 
 243   #Collect column labels from other text files...
 244   for $Index (1 .. $#TextFilesList) {
 245     for $ColNum (@{$TextFilesInfo{ColToMerge}[$Index]}) {
 246       push @ColLabels, $TextFilesInfo{ColToMergeNumToLabelMap}[$Index]{$ColNum};
 247     }
 248   }
 249 
 250   #Collect column labels from first file after the merge point...
 251   for $ColNum (@{$TextFilesInfo{File1Part2ColNums}}) {
 252     push @ColLabels, $TextFilesInfo{ColToMergeNumToLabelMap}[0]{$ColNum};
 253   }
 254 
 255   #Write it out...
 256   $Line = JoinWords(\@ColLabels, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 257   print NEWTEXTFILE "$Line\n";
 258 }
 259 
 260 # Retrieve text file columns and keys information for specified options...
 261 sub RetrieveColumnsAndKeysInfo {
 262   ProcessColumnsInfo();
 263 
 264   if ($OptionsInfo{Keys}) {
 265     ProcessKeysInfo();
 266   }
 267 
 268   ProcessStartColInfo();
 269 }
 270 
 271 # Process specified columns...
 272 sub ProcessColumnsInfo {
 273   my($Index, $SpecifiedColNum, $Values, $ColIndex, $ColNum, $ColLabel, @Words);
 274 
 275   @{$TextFilesInfo{ColSpecified}} = ();
 276   @{$TextFilesInfo{ColToMerge}} = ();
 277   @{$TextFilesInfo{ColToMergeNumToLabelMap}} = ();
 278 
 279   for $Index (0 .. $#TextFilesList) {
 280 
 281     @{$TextFilesInfo{ColSpecified}[$Index]} = ();
 282 
 283     $Values = "all";
 284     if ($OptionsInfo{Columns}) {
 285       $Values = $OptionsInfo{ColValues}[$Index];
 286     }
 287 
 288     if ($Values =~ /all/i) {
 289       if ($OptionsInfo{Mode} =~ /^colnum$/i) {
 290         for $ColNum (1 .. $TextFilesInfo{ColCount}[$Index]) {
 291           push @{$TextFilesInfo{ColSpecified}[$Index]}, $ColNum;
 292         }
 293       }
 294       else {
 295         push @{$TextFilesInfo{ColSpecified}[$Index]}, @{$TextFilesInfo{ColLabels}[$Index]};
 296       }
 297     }
 298     else {
 299       @Words = split ",", $Values;
 300       push @{$TextFilesInfo{ColSpecified}[$Index]}, @Words;
 301     }
 302 
 303     @{$TextFilesInfo{ColToMerge}[$Index]} = ();
 304     %{$TextFilesInfo{ColToMergeNumToLabelMap}[$Index]} = ();
 305 
 306     if ($OptionsInfo{Mode} =~ /^collabel$/i) {
 307       for $ColIndex (0 .. $#{$TextFilesInfo{ColSpecified}[$Index]}) {
 308         $ColLabel = $TextFilesInfo{ColSpecified}[$Index][$ColIndex];
 309         if (exists($TextFilesInfo{ColLabelToNumMap}[$Index]{$ColLabel})) {
 310           $ColNum = $TextFilesInfo{ColLabelToNumMap}[$Index]{$ColLabel};
 311           push @{$TextFilesInfo{ColToMerge}[$Index]}, $ColNum;
 312           $TextFilesInfo{ColToMergeNumToLabelMap}[$Index]{$ColNum} = $ColLabel;
 313         }
 314         else {
 315           warn "Warning: Ignoring value, $ColLabel, specified using \"-c --column\" option: column name doesn't exist in  $TextFilesList[$Index]  \n";
 316         }
 317       }
 318     }
 319     else {
 320       for $ColIndex (0 .. $#{$TextFilesInfo{ColSpecified}[$Index]}) {
 321         $SpecifiedColNum = $TextFilesInfo{ColSpecified}[$Index][$ColIndex];
 322         if ($SpecifiedColNum > 0 && $SpecifiedColNum <= $TextFilesInfo{ColCount}[$Index]) {
 323           $ColNum = $SpecifiedColNum - 1;
 324           push @{$TextFilesInfo{ColToMerge}[$Index]}, $ColNum;
 325           $TextFilesInfo{ColToMergeNumToLabelMap}[$Index]{$ColNum} = $TextFilesInfo{ColLabels}[$Index][$ColNum];
 326         }
 327         else {
 328           warn "Warning: Ignoring value, $SpecifiedColNum, specified using \"-c --column\" option: column number doesn't exist in  $TextFilesList[$Index]  \n";
 329         }
 330       }
 331     }
 332     my (@ColToMergeSorted) = sort { $a <=> $b } @{$TextFilesInfo{ColToMerge}[$Index]};
 333     @{$TextFilesInfo{ColToMerge}[$Index]} = ();
 334     push @{$TextFilesInfo{ColToMerge}[$Index]}, @ColToMergeSorted;
 335   }
 336 }
 337 
 338 # Process specified key column values...
 339 sub ProcessKeysInfo {
 340   my($Index, $Key, $ColLabel, $ColNum);
 341 
 342   @{$TextFilesInfo{KeysSpecified}} = ();
 343   @{$TextFilesInfo{KeysToUse}} = ();
 344 
 345   for $Index (0 .. $#TextFilesList) {
 346     $Key = $OptionsInfo{KeyValues}[$Index];
 347 
 348     $TextFilesInfo{KeysSpecified}[$Index] = $Key;
 349     $TextFilesInfo{KeysToUse}[$Index] = -1;
 350 
 351     if ($OptionsInfo{Mode} =~ /^collabel$/i) {
 352       $ColLabel = $Key;
 353       if (exists($TextFilesInfo{ColLabelToNumMap}[$Index]{$ColLabel})) {
 354         $TextFilesInfo{KeysToUse}[$Index] =  $TextFilesInfo{ColLabelToNumMap}[$Index]{$ColLabel};
 355       }
 356       else {
 357         warn "Warning: Ignoring value, $ColLabel, specified using \"-k --keys\" option: column name doesn't exist in  $TextFilesList[$Index]  \n";
 358       }
 359     }
 360     else {
 361       $ColNum = $Key;
 362       if ($ColNum > 0 && $ColNum <= $TextFilesInfo{ColCount}[$Index]) {
 363         $TextFilesInfo{KeysToUse}[$Index] = $ColNum - 1;
 364       }
 365       else {
 366         warn "Warning: Ignoring value, $ColNum, specified using \"-k --keys\" option: column number doesn't exist in  $TextFilesList[$Index]  \n";
 367       }
 368     }
 369   }
 370 
 371   # Modify columns to merge list to make sure the columns identified by key are taken off the list
 372   # except for the first text file...
 373   my(@ColToMergeFiltered);
 374 
 375   for $Index (1 .. $#TextFilesList) {
 376     @ColToMergeFiltered = ();
 377     for $ColNum (@{$TextFilesInfo{ColToMerge}[$Index]}) {
 378       if ($TextFilesInfo{KeysToUse}[$Index] != $ColNum) {
 379         push @ColToMergeFiltered, $ColNum;
 380       }
 381     }
 382     @{$TextFilesInfo{ColToMerge}[$Index]} = ();
 383     push @{$TextFilesInfo{ColToMerge}[$Index]}, @ColToMergeFiltered;
 384   }
 385 }
 386 
 387 # Process specified start column value...
 388 sub ProcessStartColInfo {
 389   my($Index, $ColIndex, $ColNum, $StartColNum, $Part1StartColNum, $Part1EndColNum, $Part2StartColNum, $Part2EndColNum, $BeforeStartColNum, $AfterStartColNum, $FirstColNum, $LastColNum, $FirstIndex, $LastIndex);
 390 
 391   @{$TextFilesInfo{File1Part1ColNums}} = ();
 392   @{$TextFilesInfo{File1Part2ColNums}} = ();
 393 
 394   $StartColNum = "last";
 395   if ($OptionsInfo{StartCol}) {
 396     if (length($OptionsInfo{StartCol})) {
 397       $StartColNum = $OptionsInfo{StartCol}
 398     }
 399   }
 400 
 401   if ($StartColNum !~ /^last$/i) {
 402     if ($OptionsInfo{Mode} =~ /^collabel$/i) {
 403       if (exists($TextFilesInfo{ColLabelToNumMap}[0]{$StartColNum})) {
 404         $StartColNum = $TextFilesInfo{ColLabelToNumMap}[0]{$StartColNum};
 405       }
 406       else {
 407         die "Error: Invalid value $StartColNum specified using \"-s --startcol\" option: column name doesn't exist in  $TextFilesList[0]  \n";
 408       }
 409     }
 410     else {
 411       if ($StartColNum > 0 && $StartColNum <= $TextFilesInfo{ColCount}[0]) {
 412         $StartColNum -= 1;
 413       }
 414       else {
 415         die "Error: Invalid value $StartColNum specified using \"-s --startcol\" option: column number doesn't exist in  $TextFilesList[0]  \n";
 416       }
 417     }
 418   }
 419   else {
 420     $StartColNum = $TextFilesInfo{ColCount}[0] - 1;
 421   }
 422 
 423   # Make sure StartColNum is present on the list of columns to merge for the first text file...
 424   if (!exists($TextFilesInfo{ColToMergeNumToLabelMap}[0]{$StartColNum})) {
 425     die "Error: Invalid value $StartColNum specified using \"-s --startcol\" option: doesn't exist in the specified lists of columns to merge for  $TextFilesList[0]  \n";
 426   }
 427 
 428   # Find out the column number before and after StartColNum in first text file...
 429   $BeforeStartColNum = $StartColNum;
 430   $AfterStartColNum = $StartColNum;
 431 
 432   $FirstIndex = 0; $LastIndex = $#{$TextFilesInfo{ColToMerge}[0]};
 433 
 434   $FirstColNum = $TextFilesInfo{ColToMerge}[0][$FirstIndex];
 435   $LastColNum = $TextFilesInfo{ColToMerge}[0][$LastIndex];
 436 
 437   for $Index (0 .. $LastIndex) {
 438     if ($TextFilesInfo{ColToMerge}[0][$Index] == $StartColNum) {
 439       $BeforeStartColNum = (($Index -1) >= $FirstIndex) ? $TextFilesInfo{ColToMerge}[0][$Index - 1] : ($FirstColNum - 1);
 440       $AfterStartColNum = (($Index + 1) <= $LastIndex) ? $TextFilesInfo{ColToMerge}[0][$Index + 1] : ($LastColNum + 1);
 441     }
 442   }
 443 
 444   if ($OptionsInfo{StartColMode} =~ /^after$/i) {
 445     $Part1StartColNum = $FirstColNum; $Part1EndColNum = $StartColNum;
 446     $Part2StartColNum = $AfterStartColNum; $Part2EndColNum = $LastColNum;
 447   }
 448   else {
 449     $Part1StartColNum = $FirstColNum; $Part1EndColNum = $BeforeStartColNum;
 450     $Part2StartColNum = $StartColNum; $Part2EndColNum = $LastColNum;
 451   }
 452 
 453   @{$TextFilesInfo{File1Part1ColNums}} = ();
 454   @{$TextFilesInfo{File1Part2ColNums}} = ();
 455 
 456   for $ColIndex (0 .. $#{$TextFilesInfo{ColToMerge}[0]}) {
 457     $ColNum = $TextFilesInfo{ColToMerge}[0][$ColIndex];
 458     if ($ColNum >= $Part1StartColNum && $ColNum <= $Part1EndColNum) {
 459       push @{$TextFilesInfo{File1Part1ColNums}}, $ColNum;
 460     }
 461   }
 462 
 463   for $ColIndex (0 .. $#{$TextFilesInfo{ColToMerge}[0]}) {
 464     $ColNum = $TextFilesInfo{ColToMerge}[0][$ColIndex];
 465     if ($ColNum >= $Part2StartColNum && $ColNum <= $Part2EndColNum) {
 466       push @{$TextFilesInfo{File1Part2ColNums}}, $ColNum;
 467     }
 468   }
 469 
 470 }
 471 
 472 # Retrieve information about input text files...
 473 sub RetrieveTextFilesInfo {
 474   my($Index, $TextFile, $FileDir, $FileName, $FileExt, $InDelim, $Line, $ColNum, $ColLabel, $FileNotOkayCount, @ColLabels,);
 475 
 476   %TextFilesInfo = ();
 477 
 478   @{$TextFilesInfo{FileOkay}} = ();
 479   @{$TextFilesInfo{ColCount}} = ();
 480   @{$TextFilesInfo{ColLabels}} = ();
 481   @{$TextFilesInfo{ColLabelToNumMap}} = ();
 482   @{$TextFilesInfo{InDelim}} = ();
 483 
 484   $FileNotOkayCount = 0;
 485 
 486   FILELIST: for $Index (0 .. $#TextFilesList) {
 487     $TextFile = $TextFilesList[$Index];
 488 
 489     $TextFilesInfo{FileOkay}[$Index] = 0;
 490     $TextFilesInfo{ColCount}[$Index] = 0;
 491     $TextFilesInfo{InDelim}[$Index] = "";
 492 
 493     @{$TextFilesInfo{ColLabels}[$Index]} = ();
 494     %{$TextFilesInfo{ColLabelToNumMap}[$Index]} = ();
 495 
 496     if (!(-e $TextFile)) {
 497       warn "Warning: Ignoring file $TextFile: It doesn't exist\n";
 498       $FileNotOkayCount++;
 499       next FILELIST;
 500     }
 501     if (!CheckFileType($TextFile, "csv tsv")) {
 502       warn "Warning: Ignoring file $TextFile: It's not a csv or tsv file\n";
 503       $FileNotOkayCount++;
 504       next FILELIST;
 505     }
 506     ($FileDir, $FileName, $FileExt) = ParseFileName($TextFile);
 507     if ($FileExt =~ /^tsv$/i) {
 508       $InDelim = "\t";
 509     }
 510     else {
 511       $InDelim = "\,";
 512       if ($OptionsInfo{InDelim} !~ /^(comma|semicolon)$/i) {
 513         warn "Warning: Ignoring file $TextFile: The value specified, $OptionsInfo{InDelim}, for option \"--indelim\" is not valid for csv files\n";
 514         $FileNotOkayCount++;
 515         next FILELIST;
 516       }
 517       if ($OptionsInfo{InDelim} =~ /^semicolon$/i) {
 518         $InDelim = "\;";
 519       }
 520     }
 521 
 522     if (!open TEXTFILE, "$TextFile") {
 523       warn "Warning: Ignoring file $TextFile: Couldn't open it: $! \n";
 524       $FileNotOkayCount++;
 525       next FILELIST;
 526     }
 527 
 528     $Line = GetTextLine(\*TEXTFILE);
 529     @ColLabels = quotewords($InDelim, 0, $Line);
 530     close TEXTFILE;
 531 
 532     $TextFilesInfo{FileOkay}[$Index] = 1;
 533     $TextFilesInfo{InDelim}[$Index] = $InDelim;
 534 
 535     $TextFilesInfo{ColCount}[$Index] = @ColLabels;
 536     push @{$TextFilesInfo{ColLabels}[$Index]}, @ColLabels;
 537     for $ColNum (0 .. $#ColLabels) {
 538       $ColLabel = $ColLabels[$ColNum];
 539       $TextFilesInfo{ColLabelToNumMap}[$Index]{$ColLabel} = $ColNum;
 540     }
 541   }
 542   # Make sure all specified files are valid for merging to work properly...
 543   if ($FileNotOkayCount) {
 544     die "Error: Problems with input text file(s)...\n";
 545   }
 546 }
 547 
 548 # Process option values...
 549 sub ProcessOptions {
 550   my($Index, $FileDir, $FileName, $FileExt, $NewTextFile, @ColValues, @KeyValues);
 551 
 552   %OptionsInfo = ();
 553 
 554   $OptionsInfo{Mode} = $Options{mode};
 555 
 556   $OptionsInfo{Columns} = $Options{columns};
 557   @{$OptionsInfo{ColValues}} = ();
 558 
 559   if ($Options{columns}) {
 560     @ColValues = split ";", $Options{columns};
 561     if (@ColValues != @TextFilesList) {
 562       die "Error: Invalid number of values specified by \"-c --columns\" option: it must be equal to number of input text files.\n";
 563     }
 564     for $Index (0 .. $#ColValues) {
 565       if (!length($ColValues[$Index])) {
 566         die "Error: Invalid value specified by \"-c --columns\" option: empty values are not allowed.\n";
 567       }
 568     }
 569     @{$OptionsInfo{ColValues}} = @ColValues;
 570   }
 571 
 572   $OptionsInfo{Keys} = $Options{keys};
 573   @{$OptionsInfo{KeyValues}} = ();
 574 
 575   if ($Options{keys}) {
 576     @KeyValues = split ";", $Options{keys};
 577     if (@KeyValues != @TextFilesList) {
 578       die "Error: Invalid number of values specified by \"-k --keys\" option: it must be equal to number of input text files.\n";
 579     }
 580     for $Index (0 .. $#KeyValues) {
 581       if (!length($KeyValues[$Index])) {
 582         die "Error: Invalid value specified by \"-k --keys\" option: empty values are not allowed.\n";
 583       }
 584     }
 585     @{$OptionsInfo{KeyValues}} = @KeyValues;
 586   }
 587 
 588   $OptionsInfo{InDelim} = $Options{indelim};
 589 
 590   $OptionsInfo{StartCol} = $Options{startcol} ? $Options{startcol} : undef;
 591   $OptionsInfo{StartColMode} = $Options{startcolmode};
 592 
 593   $OptionsInfo{OutFileRoot} = $Options{root} ? $Options{root} : undef;
 594   $OptionsInfo{Overwrite} = $Options{overwrite} ? $Options{overwrite} : undef;
 595 
 596   $OptionsInfo{OutDelim} = ($Options{outdelim} =~ /^tab$/i ) ? "\t" : (($Options{outdelim} =~ /^semicolon$/i) ? "\;" : "\,");
 597   $OptionsInfo{OutQuote} = ($Options{quote} =~ /^yes$/i) ? 1 : 0;
 598 
 599   if ($Options{root}) {
 600     $FileDir = ""; $FileName = ""; $FileExt = "";
 601     ($FileDir, $FileName, $FileExt) = ParseFileName($Options{root});
 602     if ($FileName && $FileExt) {
 603       $NewTextFile = $FileName;
 604     } else {
 605       $NewTextFile =  $Options{root};
 606     }
 607   } else {
 608     $FileDir = ""; $FileName = ""; $FileExt = "";
 609     ($FileDir, $FileName, $FileExt) = ParseFileName($TextFilesList[0]);
 610     $NewTextFile = $FileName . "1To" . @TextFilesList . "Merged";
 611   }
 612   if ($Options{outdelim} =~ /^tab$/i) {
 613     $NewTextFile .= ".tsv";
 614   } else {
 615     $NewTextFile .= ".csv";
 616   }
 617   if (!$Options{overwrite}) {
 618     if (-e $NewTextFile) {
 619       die "Error: The file $NewTextFile already exists.\n";
 620     }
 621   }
 622   if ($Options{root}) {
 623     for $Index (0 .. $#TextFilesList) {
 624       if (lc($NewTextFile) eq lc($TextFilesList[$Index])) {
 625         die "Error: Output filename, $NewTextFile, is similar to a input file name.\nSpecify a different name using \"-r --root\" option or use default name.\n";
 626       }
 627     }
 628   }
 629 
 630   $OptionsInfo{NewTextFile} = $NewTextFile;
 631 }
 632 
 633 # Setup script usage  and retrieve command line arguments specified using various options...
 634 sub SetupScriptUsage {
 635 
 636   # Retrieve all the options...
 637   %Options = ();
 638 
 639   $Options{mode} = "colnum";
 640   $Options{indelim} = "comma";
 641   $Options{outdelim} = "comma";
 642   $Options{quote} = "yes";
 643   $Options{startcolmode} = "after";
 644 
 645   if (!GetOptions(\%Options, "help|h", "indelim=s", "columns|c=s", "keys|k=s", "mode|m=s", "outdelim=s", "overwrite|o", "quote|q=s", "root|r=s", "startcol|s=s", "startcolmode=s", "workingdir|w=s")) {
 646     die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n";
 647   }
 648   if ($Options{workingdir}) {
 649     if (! -d $Options{workingdir}) {
 650       die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n";
 651     }
 652     chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n";
 653   }
 654   if ($Options{mode} !~ /^(colnum|collabel)$/i) {
 655     die "Error: The value specified, $Options{mode}, for option \"-m --mode\" is not valid. Allowed values: colnum, or collabel\n";
 656   }
 657   if ($Options{indelim} !~ /^(comma|semicolon)$/i) {
 658     die "Error: The value specified, $Options{indelim}, for option \"--indelim\" is not valid. Allowed values: comma or semicolon\n";
 659   }
 660   if ($Options{outdelim} !~ /^(comma|semicolon|tab)$/i) {
 661     die "Error: The value specified, $Options{outdelim}, for option \"--outdelim\" is not valid. Allowed values: comma, tab, or semicolon\n";
 662   }
 663   if ($Options{quote} !~ /^(yes|no)$/i) {
 664     die "Error: The value specified, $Options{quote}, for option \"-q --quote\" is not valid. Allowed values: yes or no\n";
 665   }
 666   if ($Options{startcolmode} !~ /^(before|after)$/i) {
 667     die "Error: The value specified, $Options{quote}, for option \"--startcolmode\" is not valid. Allowed values: before or after\n";
 668   }
 669 }
 670