MayaChemTools

   1 #!/usr/bin/perl -w
   2 #
   3 # $RCSfile: AnalyzeTextFilesData.pl,v $
   4 # $Date: 2015/02/28 20:46:04 $
   5 # $Revision: 1.36 $
   6 #
   7 # Author: Manish Sud <msud@san.rr.com>
   8 #
   9 # Copyright (C) 2015 Manish Sud. All rights reserved.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 use strict;
  30 use FindBin; use lib "$FindBin::Bin/../lib";
  31 use Getopt::Long;
  32 use File::Basename;
  33 use Text::ParseWords;
  34 use Benchmark;
  35 use FileUtil;
  36 use TextUtil;
  37 use StatisticsUtil;
  38 
  39 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime);
  40 
  41 # Autoflush STDOUT
  42 $| = 1;
  43 
  44 # Starting message...
  45 $ScriptName = basename($0);
  46 print "\n$ScriptName: Starting...\n\n";
  47 $StartTime = new Benchmark;
  48 
  49 # Get the options and setup script...
  50 SetupScriptUsage();
  51 if ($Options{help} || @ARGV < 1) {
  52   die GetUsageFromPod("$FindBin::Bin/$ScriptName");
  53 }
  54 
  55 my(@TextFilesList);
  56 @TextFilesList = ExpandFileNames(\@ARGV, "csv tsv");
  57 
  58 print "Processing options...\n";
  59 my(%OptionsInfo);
  60 ProcessOptions();
  61 
  62 # Collect column information for all the text files...
  63 print "Checking input text file(s)...\n";
  64 my(%TextFilesInfo);
  65 RetrieveTextFilesInfo();
  66 ProcessColumnsInfo();
  67 
  68 # Generate output files...
  69 my($FileIndex);
  70 if (@TextFilesList > 1) {
  71   print "\nProcessing text files...\n";
  72 }
  73 for $FileIndex (0 .. $#TextFilesList) {
  74   if ($TextFilesInfo{FileOkay}[$FileIndex]) {
  75     print "\nProcessing file $TextFilesList[$FileIndex]...\n";
  76     AnalyzeTextFile($FileIndex);
  77   }
  78 }
  79 print "\n$ScriptName:Done...\n\n";
  80 
  81 $EndTime = new Benchmark;
  82 $TotalTime = timediff ($EndTime, $StartTime);
  83 print "Total time: ", timestr($TotalTime), "\n";
  84 
  85 ###############################################################################
  86 
  87 # Analyze data...
  88 sub AnalyzeTextFile {
  89   my($Index) = @_;
  90   my($TextFile, $Line, $InDelim, $ColNum, $Value, @LineWords, @ColNumsToAnalyze, %ColValuesToAnalyzeMap);
  91 
  92   $TextFile = $TextFilesList[$Index];
  93   $InDelim = $TextFilesInfo{InDelim}[$Index];
  94   @ColNumsToAnalyze = @{$TextFilesInfo{UniqueColNumsToAnalyze}[$Index]};
  95   %ColValuesToAnalyzeMap = ();
  96   for $ColNum (@ColNumsToAnalyze) {
  97     @{$ColValuesToAnalyzeMap{$ColNum}} = ();
  98   }
  99 
 100   my($LineCount, $InvalidLineCount, @InvalidColLabels);
 101 
 102   open TEXTFILE, "$TextFile" or die "Error: Can't open $TextFile: $! \n";
 103   # Skip over column labels line in text file and collect appropriate column data
 104   # for analysis...
 105   $Line = GetTextLine(\*TEXTFILE);
 106   $LineCount = 1;
 107   $InvalidLineCount = 0;
 108   while ($Line = GetTextLine(\*TEXTFILE)) {
 109     $LineCount++;
 110     @LineWords = quotewords($InDelim, 0, $Line);
 111     @InvalidColLabels = ();
 112     COLNUM: for $ColNum (@ColNumsToAnalyze) {
 113       $Value = $LineWords[$ColNum];
 114       if ($OptionsInfo{CheckData}) {
 115         if (!IsNumerical($Value)) {
 116           push @InvalidColLabels, $TextFilesInfo{ColLabels}[$Index][$ColNum];
 117           next COLNUM;
 118         }
 119       }
 120       push @{$ColValuesToAnalyzeMap{$ColNum}}, $Value;
 121     }
 122     if (@InvalidColLabels) {
 123       $InvalidLineCount++;
 124       if ($OptionsInfo{DetailLevel} >=4 ) {
 125         print "Line number $LineCount contains ", scalar(@InvalidColLabels)," non-numerical or empty value(s) for column(s) - ", JoinWords(\@InvalidColLabels, ", ", 0)," - to be analyzed: $Line \n";
 126       }
 127       elsif ($OptionsInfo{DetailLevel} >= 3) {
 128         print "Line number $LineCount contains ", scalar(@InvalidColLabels)," non-numerical or empty value(s) for column(s) - ", JoinWords(\@InvalidColLabels, ", ", 0)," - to be analyzed...\n";
 129       }
 130       elsif ($OptionsInfo{DetailLevel} >= 2) {
 131         print "Line number $LineCount contains ", scalar(@InvalidColLabels)," non-numerical or empty value(s) for columns to be analyzed...\n";
 132       }
 133     }
 134   }
 135   if ($InvalidLineCount && ($OptionsInfo{DetailLevel} >= 1)) {
 136     print "Non-numerical or empty data present in $InvalidLineCount line(s)...\n";
 137   }
 138   close TEXTFILE;
 139 
 140   # Perform the analysis...
 141   my(@SpecifiedFunctionNames, $SpecifiedFunction);
 142   @SpecifiedFunctionNames = ();
 143 
 144   for $SpecifiedFunction (@{$OptionsInfo{SpecifiedStatisticalFunctions}}) {
 145     if ($SpecifiedFunction !~ /^(Covariance|Correlation|Frequency|Rsquare|StandardScores|StandardScoresN)$/i) {
 146       push @SpecifiedFunctionNames, $OptionsInfo{SpecifiedStatisticalFunctionsMap}{lc($SpecifiedFunction)};
 147     }
 148   }
 149   if (@SpecifiedFunctionNames) {
 150     PerformAnalysis($Index, \@SpecifiedFunctionNames, \%ColValuesToAnalyzeMap)
 151   }
 152   if (exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{covariance}) || exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{correlation}) || exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{rsquare})) {
 153     if ($OptionsInfo{AllColumnPairs}) {
 154       PerformMatrixAnalysis($Index, \%ColValuesToAnalyzeMap);
 155     }
 156     else {
 157       # Perform pairwise analysis for specified columns and write out calculated values - correlation
 158       # rsquare, or covariance - in the same file.
 159       PerformColumnPairAnalysis($Index, \%ColValuesToAnalyzeMap);
 160     }
 161   }
 162   if (exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{standardscores}) || exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{standardscoresn}) ) {
 163     PerformStandardScoresAnalysis($Index, \%ColValuesToAnalyzeMap);
 164   }
 165   if (exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{frequency})) {
 166     PerformFrequencyAnalysis($Index, \%ColValuesToAnalyzeMap);
 167   }
 168 }
 169 
 170 # Calculate values for various statistical functions...
 171 sub PerformAnalysis {
 172   my($Index, $SpecifiedFunctionNamesRef, $ColValuesToAnalyzeMapRef) = @_;
 173   my($NewTextFile, $Line, $SpecifiedFunction, $Label, @ColLabels, @ColNumsToAnalyze);
 174 
 175   $NewTextFile = $TextFilesInfo{OutFileRoot}[$Index] . $OptionsInfo{FileNameMode} . "." . $TextFilesInfo{OutFileExt}[$Index];
 176 
 177   print "Generating new text file $NewTextFile...\n";
 178   open NEWTEXTFILE, ">$NewTextFile" or die "Error: Can't open $NewTextFile: $! \n";
 179 
 180   # Write out column labels...
 181   @ColLabels = ();
 182   push @ColLabels, "ColumnID";
 183   for $SpecifiedFunction (@{$SpecifiedFunctionNamesRef}) {
 184     $Label = $SpecifiedFunction;
 185     if ($SpecifiedFunction =~ /^(KLargest|KSmallest)$/i) {
 186       my($KthValue);
 187       $KthValue = ($SpecifiedFunction =~ /^KLargest$/i) ? $OptionsInfo{KLargest} : $OptionsInfo{KSmallest};
 188       $Label = AddNumberSuffix($KthValue) . "$SpecifiedFunction";
 189       $Label =~ s/K//g;
 190     }
 191     elsif ($SpecifiedFunction =~ /^TrimMean$/i) {
 192       $Label = "${SpecifiedFunction}($OptionsInfo{TrimFraction})";
 193     }
 194     push @ColLabels, $Label;
 195   }
 196   $Line = JoinWords(\@ColLabels, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 197   print NEWTEXTFILE "$Line\n";
 198 
 199   # Go over each column to be analyzed...
 200   @ColNumsToAnalyze = @{$TextFilesInfo{ColNumsToAnalyze}[$Index]};
 201 
 202   # Turn off "strict"; otherwise, invoking statistical functions using function name string
 203   # is problematic.
 204   no strict;
 205 
 206   my($ColValuesRef, $ColNum, $Value, @RowValues, %CalculatedValues);
 207   %CalculatedValues = ();
 208   for $ColNum (@ColNumsToAnalyze) {
 209     @RowValues = ();
 210     # Setup column id...
 211     push @RowValues, $TextFilesInfo{ColLabels}[$Index][$ColNum];
 212     $ColValuesRef =  \@{$ColValuesToAnalyzeMapRef->{$ColNum}};
 213     FUNCTIONNAME: for $SpecifiedFunction (@{$SpecifiedFunctionNamesRef}) {
 214       $Value = "";
 215       if (!@{$ColValuesToAnalyzeMapRef->{$ColNum}}) {
 216         # Invalid column values...
 217         push @RowValues, $Value;
 218         next FUNCTIONNAME;
 219       }
 220       if ($SpecifiedFunction =~ /^Count$/i) {
 221         $Value = @{$ColValuesToAnalyzeMapRef->{$ColNum}};
 222       }
 223       elsif ($SpecifiedFunction =~ /^KLargest$/i) {
 224         $Value = &$SpecifiedFunction($ColValuesRef, $OptionsInfo{KLargest});
 225       }
 226       elsif ($SpecifiedFunction =~ /^KSmallest$/i) {
 227         $Value = &$SpecifiedFunction($ColValuesRef, $OptionsInfo{KSmallest});
 228       }
 229       elsif ($SpecifiedFunction =~ /^StandardDeviation$/i) {
 230         if (exists($CalculatedValues{$ColNum}{StandardDeviation})) {
 231           $Value = $CalculatedValues{$ColNum}{StandardDeviation};
 232         }
 233         else {
 234           $Value = &$SpecifiedFunction($ColValuesRef);
 235           $CalculatedValues{$ColNum}{StandardDeviation} = $Value;
 236         }
 237       }
 238       elsif ($SpecifiedFunction =~ /^StandardError$/i) {
 239         if (!exists($CalculatedValues{$ColNum}{StandardDeviation})) {
 240           $Value = StandardDeviation($ColValuesRef);
 241           $CalculatedValues{$ColNum}{StandardDeviation} = $Value;
 242         }
 243         if (defined $CalculatedValues{$ColNum}{StandardDeviation}) {
 244           $Value = &$SpecifiedFunction($CalculatedValues{$ColNum}{StandardDeviation}, @{$ColValuesToAnalyzeMapRef->{$ColNum}});
 245         }
 246       }
 247       elsif ($SpecifiedFunction =~ /^TrimMean$/i) {
 248         $Value = &$SpecifiedFunction($ColValuesRef, $OptionsInfo{TrimFraction});
 249       }
 250       else {
 251         $Value = &$SpecifiedFunction($ColValuesRef);
 252       }
 253       # Format the output value. And add zero to get rid of tariling zeros...
 254       $Value = (defined($Value) && length($Value)) ? (sprintf("%.$OptionsInfo{Precision}f", $Value) + 0) : "";
 255       push @RowValues, $Value;
 256     }
 257     $Line = JoinWords(\@RowValues, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 258     print NEWTEXTFILE "$Line\n";
 259   }
 260   close NEWTEXTFILE;
 261 }
 262 
 263 # Calculate covariance, correlation, rsquare for specified column pairs....
 264 sub PerformColumnPairAnalysis {
 265   my($Index, $ColValuesToAnalyzeMapRef) = @_;
 266   my($NewTextFile, @ColLabels, $Line, $CalculateCorrelation, $CalculateRSquare, $CalculateCovariance);
 267   $CalculateCorrelation = exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{correlation}) ? 1 : 0;
 268   $CalculateRSquare = exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{rsquare}) ? 1 : 0;
 269   $CalculateCovariance = exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{covariance}) ? 1 : 0;
 270 
 271   $NewTextFile = $TextFilesInfo{OutFileRoot}[$Index] . "ColumnPairsAnalysis." .  $TextFilesInfo{OutFileExt}[$Index];
 272   print "Generating new text file $NewTextFile...\n";
 273   open NEWTEXTFILE, ">$NewTextFile" or die "Error: Can't open $NewTextFile: $! \n";
 274 
 275   # Write out the column labels...
 276   @ColLabels = ();
 277   push @ColLabels, ("ColumnID1", "ColumnID2");
 278   if ($CalculateCorrelation || $CalculateRSquare) {
 279     push @ColLabels, "Correlation";
 280     if ($CalculateRSquare) {
 281       push @ColLabels, "RSquare";
 282     }
 283   }
 284   if ($CalculateCovariance) {
 285     push @ColLabels, "Covariance";
 286   }
 287   $Line = JoinWords(\@ColLabels, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 288   print NEWTEXTFILE "$Line\n";
 289 
 290   # Go over each column pair...
 291   my($CorrelationValue, $RSquareValue, $CovarianceValue,  $ColIndex, $ColNum1, $ColNum2, $ColValuesRef1, $ColValuesRef2, @ColPairs1ToAnalyze, @ColPairs2ToAnalyze, @RowValues, $Value);
 292 
 293   @ColPairs1ToAnalyze = @{$TextFilesInfo{ColPairs1ToAnalyze}[$Index]};
 294   @ColPairs2ToAnalyze = @{$TextFilesInfo{ColPairs2ToAnalyze}[$Index]};
 295   for $ColIndex (0 .. $#ColPairs1ToAnalyze) {
 296     @RowValues = ();
 297     $ColNum1 = $ColPairs1ToAnalyze[$ColIndex];
 298     $ColNum2 = $ColPairs2ToAnalyze[$ColIndex];
 299     $ColValuesRef1 =  \@{$ColValuesToAnalyzeMapRef->{$ColNum1}};
 300     $ColValuesRef2 =  \@{$ColValuesToAnalyzeMapRef->{$ColNum2}};
 301 
 302     # Setup column ids...
 303     push @RowValues, $TextFilesInfo{ColLabels}[$Index][$ColNum1];
 304     push @RowValues, $TextFilesInfo{ColLabels}[$Index][$ColNum2];
 305 
 306     if (@$ColValuesRef1 != @$ColValuesRef2) {
 307       # Print a warning...
 308       warn "Warning: Skipping analysis for column pair $TextFilesInfo{ColLabels}[$Index][$ColNum1], $TextFilesInfo{ColLabels}[$Index][$ColNum2]: Number of valid data values must be same.\n";
 309       if ($CalculateCorrelation || $CalculateRSquare) {
 310         push @RowValues, "";
 311         if ($CalculateRSquare) {
 312           push @RowValues, "";
 313         }
 314       }
 315       if ($CalculateCovariance) {
 316         push @RowValues, "";
 317       }
 318     }
 319     else {
 320       # Calculate appropriate value...
 321       if ($CalculateCorrelation || $CalculateRSquare) {
 322         $CorrelationValue = Correlation($ColValuesRef1, $ColValuesRef2);
 323         $Value = (defined($CorrelationValue) && length($CorrelationValue)) ? (sprintf("%.$OptionsInfo{Precision}f", $CorrelationValue) + 0) : "";
 324         push @RowValues, $Value;
 325         if ($CalculateRSquare) {
 326           $RSquareValue = (defined($CorrelationValue) && length($CorrelationValue)) ? ($CorrelationValue ** 2) : "";
 327           $Value = (length($RSquareValue)) ? (sprintf("%.$OptionsInfo{Precision}f", $RSquareValue) + 0) : "";
 328           push @RowValues, $Value;
 329         }
 330       }
 331       if ($CalculateCovariance) {
 332         $CovarianceValue = Covariance($ColValuesRef1, $ColValuesRef2);
 333         $Value = (defined($CovarianceValue) && length($CovarianceValue)) ? (sprintf("%.$OptionsInfo{Precision}f", $CovarianceValue) + 0) : "";
 334         push @RowValues, $Value;
 335       }
 336     }
 337     $Line = JoinWords(\@RowValues, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 338     print NEWTEXTFILE "$Line\n";
 339   }
 340   close NEWTEXTFILE;
 341 }
 342 
 343 # Generate histogram numbers...
 344 sub PerformFrequencyAnalysis {
 345   my($Index, $ColValuesToAnalyzeMapRef) = @_;
 346   my($NewTextFile, $ColLabel, @ColLabels, @RowValues, $Line, $ColNum, @ColNumsToAnalyze, $ColValuesRef, $BinValue, $FrequencyValue, $Value, %FrequencyMap);
 347 
 348   @ColNumsToAnalyze = @{$TextFilesInfo{ColNumsToAnalyze}[$Index]};
 349   for $ColNum (@ColNumsToAnalyze) {
 350     $NewTextFile = $TextFilesInfo{OutFileRoot}[$Index] . $TextFilesInfo{ColLabels}[$Index][$ColNum] . "FrequencyAnalysis." .  $TextFilesInfo{OutFileExt}[$Index];
 351     print "Generating new text file $NewTextFile...\n";
 352     open NEWTEXTFILE, ">$NewTextFile" or die "Error: Can't open $NewTextFile: $! \n";
 353 
 354     # Write out the column labels...
 355     @ColLabels = ();
 356     push @ColLabels , ("Bins", "Frequency");
 357     $Line = JoinWords(\@ColLabels, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 358     print NEWTEXTFILE "$Line\n";
 359 
 360     #Calculate and write out frequency values...
 361     %FrequencyMap = ();
 362     $ColValuesRef =  \@{$ColValuesToAnalyzeMapRef->{$ColNum}};
 363     if (@$ColValuesRef) {
 364       if (@{$OptionsInfo{BinRange}}) {
 365         %FrequencyMap = Frequency($ColValuesRef, \@{$OptionsInfo{BinRange}});
 366       }
 367       else {
 368         %FrequencyMap = Frequency($ColValuesRef, $OptionsInfo{NumOfBins});
 369       }
 370     }
 371     for $BinValue (sort { $a <=> $b }  keys %FrequencyMap) {
 372       $FrequencyValue = $FrequencyMap{$BinValue};
 373 
 374       @RowValues = ();
 375       $Value = (length($BinValue)) ? (sprintf("%.$OptionsInfo{Precision}f", $BinValue) + 0) : "";
 376       push @RowValues, $Value;
 377       $Value = (length($FrequencyValue)) ? (sprintf("%.$OptionsInfo{Precision}f", $FrequencyValue) + 0) : "";
 378       push @RowValues, $Value;
 379 
 380       $Line = JoinWords(\@RowValues, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 381       print NEWTEXTFILE "$Line\n";
 382     }
 383     close NEWTEXTFILE;
 384   }
 385 }
 386 
 387 # Calculate covariance, correlation/rsquare matrices....
 388 sub PerformMatrixAnalysis {
 389   my($Index, $ColValuesToAnalyzeMapRef) = @_;
 390   my($CorrelationTextFile, $CovarianceTextFile, $RSquareTextFile, $CalculateCorrelation, $CalculateRSquare, $CalculateCovariance);
 391 
 392   $CalculateCorrelation = exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{correlation}) ? 1 : 0;
 393   $CalculateRSquare = exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{rsquare}) ? 1 : 0;
 394   $CalculateCovariance = exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{covariance}) ? 1 : 0;
 395 
 396   $CorrelationTextFile = $TextFilesInfo{OutFileRoot}[$Index] . "CorrelationMatrix." .  $TextFilesInfo{OutFileExt}[$Index];
 397   $RSquareTextFile = $TextFilesInfo{OutFileRoot}[$Index] . "RSquareMatrix." .  $TextFilesInfo{OutFileExt}[$Index];
 398   $CovarianceTextFile = $TextFilesInfo{OutFileRoot}[$Index] . "CovarianceMatrix." .  $TextFilesInfo{OutFileExt}[$Index];
 399 
 400   my($TextFilesList, $Delimiter);
 401   $TextFilesList =  "";
 402   if ($CalculateCorrelation || $CalculateRSquare) {
 403     $TextFilesList = $CorrelationTextFile;
 404     if ($CalculateRSquare) {
 405       $TextFilesList .= ", $CorrelationTextFile";
 406     }
 407   }
 408   $Delimiter = length($TextFilesList) ? "," : "";
 409   if ($CalculateCovariance) {
 410     $TextFilesList .= "${Delimiter} ${CorrelationTextFile}";
 411   }
 412   if ($TextFilesList =~ /\,/) {
 413     print "Generating new text files $TextFilesList...\n"
 414   }
 415   else {
 416     print "Generating new text file $TextFilesList...\n"
 417   }
 418   if ($CalculateCorrelation || $CalculateRSquare) {
 419     open CORRELATIONTEXTFILE, ">$CorrelationTextFile" or die "Error: Can't open $CorrelationTextFile: $! \n";
 420     if ($CalculateRSquare) {
 421       open RSQUARETEXTFILE, ">$RSquareTextFile" or die "Error: Can't open $RSquareTextFile: $! \n";
 422     }
 423   }
 424   if ($CalculateCovariance) {
 425     open COVARIANCETEXTFILE, ">$CovarianceTextFile" or die "Error: Can't open $CovarianceTextFile: $! \n";
 426   }
 427 
 428   my($Line, $Value, $CorrelationValue, $RSquareValue, $CovarianceValue, $ColNum, $ColNum1, $ColNum2, $ColValuesRef1, $ColValuesRef2, @ColLabels, @CovarianceRowValues, @CorrelationRowValues, @RSquareRowValues);
 429 
 430   # Write out the column labels...
 431   @ColLabels = ();
 432   push @ColLabels, "";
 433   for $ColNum (0 .. ($TextFilesInfo{ColCount}[$Index] - 1)) {
 434     push @ColLabels, $TextFilesInfo{ColLabels}[$Index][$ColNum];
 435   }
 436   $Line = JoinWords(\@ColLabels, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 437   if ($CalculateCorrelation || $CalculateRSquare) {
 438     print CORRELATIONTEXTFILE "$Line\n";
 439     if ($CalculateRSquare) {
 440       print RSQUARETEXTFILE "$Line\n";
 441     }
 442   }
 443   if ($CalculateCovariance) {
 444     print COVARIANCETEXTFILE "$Line\n";
 445   }
 446 
 447   # Due to symmetric nature of these matrices, only one half needs to be
 448   # calculated. So, just calculate the lower half and copy it to upper half...
 449   my(%CorrelationMatrixMap, %RSquareMatrixMap, %CovarianceMatrixMap);
 450 
 451   %CorrelationMatrixMap = (); %RSquareMatrixMap = (); %CovarianceMatrixMap = ();
 452   for $ColNum1 (0 .. ($TextFilesInfo{ColCount}[$Index] - 1)) {
 453     for $ColNum2 (0 .. $ColNum1) {
 454       $ColValuesRef1 =  \@{$ColValuesToAnalyzeMapRef->{$ColNum1}};
 455       $ColValuesRef2 =  \@{$ColValuesToAnalyzeMapRef->{$ColNum2}};
 456       if ($CalculateCorrelation || $CalculateRSquare) {
 457         $CorrelationValue = Correlation($ColValuesRef1, $ColValuesRef2);
 458         $CorrelationValue = (defined($CorrelationValue) && length($CorrelationValue)) ? (sprintf("%.$OptionsInfo{Precision}f", $CorrelationValue) + 0) : "";
 459         $CorrelationMatrixMap{$ColNum1}{$ColNum2} = $CorrelationValue;
 460         if ($ColNum1 != $ColNum2) {
 461           $CorrelationMatrixMap{$ColNum2}{$ColNum1} = $CorrelationValue;
 462         }
 463         if ($CalculateRSquare) {
 464           $RSquareValue = (defined($CorrelationValue) && length($CorrelationValue)) ? ($CorrelationValue ** 2) : "";
 465           $RSquareValue = (length($RSquareValue)) ? (sprintf("%.$OptionsInfo{Precision}f", $RSquareValue) + 0) : "";
 466           $RSquareMatrixMap{$ColNum1}{$ColNum2} = $RSquareValue;
 467           if ($ColNum1 != $ColNum2) {
 468             $RSquareMatrixMap{$ColNum2}{$ColNum1} = $RSquareValue;
 469           }
 470         }
 471       }
 472       if ($CalculateCovariance) {
 473         $CovarianceValue = Covariance($ColValuesRef1, $ColValuesRef2);
 474         $CovarianceValue = (defined($CovarianceValue) && length($CovarianceValue)) ? (sprintf("%.$OptionsInfo{Precision}f", $CovarianceValue) + 0) : "";
 475         $CovarianceMatrixMap{$ColNum1}{$ColNum2} = $CovarianceValue;
 476         if ($ColNum1 != $ColNum2) {
 477           $CovarianceMatrixMap{$ColNum2}{$ColNum1} = $CovarianceValue;
 478         }
 479       }
 480     }
 481   }
 482 
 483   # Write out the matrices...
 484   for $ColNum1 (0 .. ($TextFilesInfo{ColCount}[$Index] - 1)) {
 485     @CorrelationRowValues = ();
 486     @RSquareRowValues = ();
 487     @CovarianceRowValues = ();
 488     if ($CalculateCorrelation || $CalculateRSquare) {
 489       push @CorrelationRowValues, $TextFilesInfo{ColLabels}[$Index][$ColNum1];
 490       if ($CalculateRSquare) {
 491         push @RSquareRowValues, $TextFilesInfo{ColLabels}[$Index][$ColNum1];
 492       }
 493     }
 494     if ($CalculateCovariance) {
 495       push @CovarianceRowValues, $TextFilesInfo{ColLabels}[$Index][$ColNum1];
 496     }
 497     for $ColNum2 (0 .. ($TextFilesInfo{ColCount}[$Index] - 1)) {
 498       if ($CalculateCorrelation || $CalculateRSquare) {
 499         push @CorrelationRowValues, $CorrelationMatrixMap{$ColNum1}{$ColNum2};
 500         if ($CalculateRSquare) {
 501           push @RSquareRowValues, $RSquareMatrixMap{$ColNum1}{$ColNum2};
 502         }
 503       }
 504       if ($CalculateCovariance) {
 505         push @CovarianceRowValues, $CovarianceMatrixMap{$ColNum1}{$ColNum2};
 506       }
 507     }
 508     if ($CalculateCorrelation || $CalculateRSquare) {
 509       $Line = JoinWords(\@CorrelationRowValues, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 510       print CORRELATIONTEXTFILE "$Line\n";
 511       if ($CalculateRSquare) {
 512         $Line = JoinWords(\@RSquareRowValues, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 513         print RSQUARETEXTFILE "$Line\n";
 514       }
 515     }
 516     if ($CalculateCovariance) {
 517       $Line = JoinWords(\@CovarianceRowValues, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 518       print COVARIANCETEXTFILE "$Line\n";
 519     }
 520   }
 521   if ($CalculateCorrelation || $CalculateRSquare) {
 522     close CORRELATIONTEXTFILE;
 523     if ($CalculateRSquare) {
 524       close RSQUARETEXTFILE;
 525     }
 526   }
 527   if ($CalculateCovariance) {
 528     close COVARIANCETEXTFILE;
 529   }
 530 }
 531 
 532 # Calculate standard scores...
 533 sub PerformStandardScoresAnalysis {
 534   my($Index, $ColValuesToAnalyzeMapRef) = @_;
 535   my($StandardScores, $StandardScoresN, $NewTextFile, @ColLabels, $Label, $NewLine);
 536 
 537   $StandardScores = exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{standardscores}) ? 1 : 0;
 538   $StandardScoresN = exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{standardscoresn}) ? 1 : 0;
 539 
 540   $NewTextFile = $TextFilesInfo{OutFileRoot}[$Index] . "StandardScores." .  $TextFilesInfo{OutFileExt}[$Index];
 541   print "Generating new text file $NewTextFile...\n";
 542   open NEWTEXTFILE, ">$NewTextFile" or die "Error: Can't open $NewTextFile: $! \n";
 543 
 544   my($ColValuesRef, $ColNum, @ColNumsToAnalyze);
 545   # Write out column labels...
 546   @ColLabels = ();
 547   @ColNumsToAnalyze = @{$TextFilesInfo{ColNumsToAnalyze}[$Index]};
 548   for $ColNum (@ColNumsToAnalyze) {
 549     $Label = $TextFilesInfo{ColLabels}[$Index][$ColNum];
 550     if ($StandardScores) {
 551       push @ColLabels, "${Label}\(StandardScores)";
 552     }
 553     if ($StandardScoresN) {
 554       push @ColLabels, "${Label}\(StandardScoresN)";
 555     }
 556   }
 557   $NewLine = JoinWords(\@ColLabels, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 558   print NEWTEXTFILE "$NewLine\n";
 559 
 560   # Go over each column to be analyzed and calculate standard deviation
 561   # and mean values...
 562   my(%StandardDeviationMap, %StandardDeviationNMap, %MeanMap);
 563   %StandardDeviationMap = ();
 564   %StandardDeviationNMap = ();
 565   %MeanMap = ();
 566   for $ColNum (@ColNumsToAnalyze) {
 567     $ColValuesRef =  \@{$ColValuesToAnalyzeMapRef->{$ColNum}};
 568     if (!exists($MeanMap{$ColNum})) {
 569       $MeanMap{$ColNum} = Mean($ColValuesRef);
 570     }
 571     if ($StandardScores) {
 572       if (!exists($StandardDeviationMap{$ColNum})) {
 573         $StandardDeviationMap{$ColNum} = StandardDeviation($ColValuesRef);
 574       }
 575     }
 576     if ($StandardScoresN) {
 577       if (!exists($StandardDeviationNMap{$ColNum})) {
 578         $StandardDeviationNMap{$ColNum} = StandardDeviationN($ColValuesRef);
 579       }
 580     }
 581   }
 582   #
 583   # Go over each row and calculate standard scores for each column
 584   # using (x[i] - mean) / (n - 1) for StandardScores and (x[i] - mean) / n
 585   # for StandardScoresN; write out the calculated values as well...
 586 
 587   my($TextFile, $InDelim, $Line, $Value, $ValueOkay, $ScoreValue, @RowValues, @LineWords);
 588   $TextFile = $TextFilesList[$Index];
 589   $InDelim = $TextFilesInfo{InDelim}[$Index];
 590 
 591   open TEXTFILE, "$TextFile" or die "Error: Can't open $TextFile: $! \n";
 592   $Line = GetTextLine(\*TEXTFILE);
 593   while ($Line = GetTextLine(\*TEXTFILE)) {
 594     @LineWords = quotewords($InDelim, 0, $Line);
 595     @RowValues = ();
 596     COLNUM: for $ColNum (@ColNumsToAnalyze) {
 597       $Value = $LineWords[$ColNum];
 598       $ValueOkay = ($OptionsInfo{CheckData} && !IsNumerical($Value)) ? 0 : 1;
 599       if ($StandardScores) {
 600         $ScoreValue = $ValueOkay ? (($Value - $MeanMap{$ColNum})/$StandardDeviationMap{$ColNum}) : "";
 601         $ScoreValue = (defined($ScoreValue) && length($ScoreValue)) ? (sprintf("%.$OptionsInfo{Precision}f", $ScoreValue) + 0) : "";
 602         push @RowValues, $ScoreValue;
 603       }
 604       if ($StandardScoresN) {
 605         $ScoreValue = $ValueOkay ? (($Value - $MeanMap{$ColNum})/$StandardDeviationNMap{$ColNum}) : "";
 606         $ScoreValue = (defined($ScoreValue) && length($ScoreValue)) ? (sprintf("%.$OptionsInfo{Precision}f", $ScoreValue) + 0) : "";
 607         push @RowValues, $ScoreValue;
 608       }
 609     }
 610     $NewLine = JoinWords(\@RowValues, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 611     print NEWTEXTFILE "$NewLine\n";
 612   }
 613   close TEXTFILE;
 614   close NEWTEXTFILE;
 615 }
 616 
 617 # Make sure the specified columns exists in text files...
 618 sub ProcessColumnsInfo {
 619   my($Index, $TextFile, $ColNum, $NewColNum, $ColIndex, @ColNumsToAnalyze, %UniqueColNumsToAnalyzeMap);
 620 
 621   @{$TextFilesInfo{ColNumsToAnalyze}} = ();
 622   @{$TextFilesInfo{ColPairs1ToAnalyze}} = ();
 623   @{$TextFilesInfo{ColPairs2ToAnalyze}} = ();
 624   @{$TextFilesInfo{UniqueColNumsToAnalyze}} = ();
 625 
 626   FILELIST: for $Index (0 .. $#TextFilesList) {
 627     $TextFile = $TextFilesList[$Index];
 628 
 629     @{$TextFilesInfo{ColNumsToAnalyze}[$Index]} = ();
 630     @{$TextFilesInfo{ColPairs1ToAnalyze}[$Index]} = ();
 631     @{$TextFilesInfo{ColPairs2ToAnalyze}[$Index]} = ();
 632     @{$TextFilesInfo{UniqueColNumsToAnalyze}[$Index]} = ();
 633 
 634     %UniqueColNumsToAnalyzeMap = ();
 635 
 636     if ($TextFilesInfo{FileOkay}[$Index]) {
 637       @ColNumsToAnalyze = ();
 638       if (@{$OptionsInfo{SpecifiedColumns}}) {
 639         if ($OptionsInfo{ColMode} =~ /^colnum$/i) {
 640           for $ColNum (@{$OptionsInfo{SpecifiedColumns}}) {
 641             if ($ColNum >=1 && $ColNum <= $TextFilesInfo{ColCount}[$Index]) {
 642               $NewColNum = $ColNum -1;
 643               push @ColNumsToAnalyze, $NewColNum;
 644             }
 645           }
 646         }
 647         else {
 648           my($ColLabel);
 649           for $ColLabel (@{$OptionsInfo{SpecifiedColumns}}) {
 650             if (exists($TextFilesInfo{ColLabelToNumMap}[$Index]{$ColLabel})) {
 651               push @ColNumsToAnalyze, $TextFilesInfo{ColLabelToNumMap}[$Index]{$ColLabel};
 652             }
 653           }
 654         }
 655       }
 656       elsif (defined  $OptionsInfo{Columns} && $OptionsInfo{Columns} =~ /^All$/i) {
 657         for $ColNum (0 .. ($TextFilesInfo{ColCount}[$Index] - 1)) {
 658           push @ColNumsToAnalyze, $ColNum;
 659         }
 660       }
 661       else {
 662         push @ColNumsToAnalyze, 0;
 663       }
 664       if (@ColNumsToAnalyze) {
 665         push @{$TextFilesInfo{ColNumsToAnalyze}[$Index]}, @ColNumsToAnalyze;
 666         # Set up unique columns map as well...
 667         for $ColNum (@ColNumsToAnalyze) {
 668           if (!exists $UniqueColNumsToAnalyzeMap{$ColNum}) {
 669             $UniqueColNumsToAnalyzeMap{$ColNum} = $ColNum;
 670           }
 671         }
 672       }
 673       else {
 674         warn "Warning: Ignoring file $TextFile: None of the columns specified, @{$OptionsInfo{SpecifiedColumns}}, using \"--columns\" option exist.\n";
 675         $TextFilesInfo{FileOkay}[$Index] = 0;
 676         next FILELIST;
 677       }
 678       if (!$OptionsInfo{Overwrite} && exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{frequency})) {
 679         # Make sure specific frequency files don't exist...
 680         my($FrequencyFile);
 681         for $ColNum (@ColNumsToAnalyze) {
 682           $FrequencyFile = $TextFilesInfo{OutFileRoot}[$Index] . $TextFilesInfo{ColLabels}[$Index][$ColNum] . "FrequencyAnalysis." .  $TextFilesInfo{OutFileExt}[$Index];
 683           if (-e $FrequencyFile) {
 684             warn "Warning: Ignoring file $TextFile: The file $FrequencyFile already exists.\n";
 685             $TextFilesInfo{FileOkay}[$Index] = 0;
 686             next FILELIST;
 687           }
 688         }
 689       }
 690       # Setup specified column pairs...
 691       if (exists $OptionsInfo{SpecifiedStatisticalFunctionsMap}{correlation} || exists $OptionsInfo{SpecifiedStatisticalFunctionsMap}{covariance} || exists $OptionsInfo{SpecifiedStatisticalFunctionsMap}{rsquare}) {
 692         my(@ColPairsToAnalyze, $ColNum1, $ColNum2);
 693         if (@{$OptionsInfo{SpecifiedColumnPairs}}) {
 694           # Make sure both columns exist...
 695           if ($OptionsInfo{ColMode} =~ /^colnum$/i) {
 696             for ($ColIndex = 0; (($ColIndex + 1) < @{$OptionsInfo{SpecifiedColumnPairs}}); $ColIndex += 2 ) {
 697               $ColNum1 = $OptionsInfo{SpecifiedColumnPairs}[$ColIndex];
 698               $ColNum2 = $OptionsInfo{SpecifiedColumnPairs}[$ColIndex + 1];
 699               if ($ColNum1 >=1 && $ColNum1 <= $TextFilesInfo{ColCount}[$Index] && $ColNum2 >=1 && $ColNum2 <= $TextFilesInfo{ColCount}[$Index]) {
 700                 $ColNum1 -= 1;
 701                 $ColNum2 -= 1;
 702                 push @ColPairsToAnalyze, ($ColNum1, $ColNum2);
 703               }
 704             }
 705           }
 706           else {
 707             my($ColLabel1, $ColLabel2);
 708             for ($ColIndex = 0; (($ColIndex + 1) < @{$OptionsInfo{SpecifiedColumnPairs}}); $ColIndex += 2 ) {
 709               $ColLabel1 = $OptionsInfo{SpecifiedColumnPairs}[$ColIndex];
 710               $ColLabel2 = $OptionsInfo{SpecifiedColumnPairs}[$ColIndex + 1];
 711               if (exists($TextFilesInfo{ColLabelToNumMap}[$Index]{$ColLabel1}) && exists($TextFilesInfo{ColLabelToNumMap}[$Index]{$ColLabel2})) {
 712                 $ColNum1 = $TextFilesInfo{ColLabelToNumMap}[$Index]{$ColLabel1};
 713                 $ColNum2 = $TextFilesInfo{ColLabelToNumMap}[$Index]{$ColLabel2};
 714                 push @ColPairsToAnalyze, ($ColNum1, $ColNum2);
 715               }
 716             }
 717           }
 718         }
 719         elsif ($OptionsInfo{AllColumnPairs}) {
 720           for $ColNum1 (0 .. ($TextFilesInfo{ColCount}[$Index] - 1)) {
 721             for $ColNum2 (0 .. ($TextFilesInfo{ColCount}[$Index] - 1)) {
 722               push @ColPairsToAnalyze, ($ColNum1, $ColNum2);
 723             }
 724           }
 725         }
 726         else {
 727           if ($TextFilesInfo{ColCount}[$Index] >= 2) {
 728             push @ColPairsToAnalyze, (0,1);
 729           }
 730         }
 731         if (@ColPairsToAnalyze) {
 732           if (@ColPairsToAnalyze % 2) {
 733             warn "Warning: Ignoring file $TextFile: Invalid number  values specified using \"--columnpairs\" option: It must contain even number of valid values.\n";
 734             $TextFilesInfo{FileOkay}[$Index] = 0;
 735             next FILELIST;
 736           }
 737           else {
 738             for ($ColIndex = 0; $ColIndex < @ColPairsToAnalyze; $ColIndex += 2) {
 739               push @{$TextFilesInfo{ColPairs1ToAnalyze}[$Index]}, $ColPairsToAnalyze[$ColIndex];
 740               push @{$TextFilesInfo{ColPairs2ToAnalyze}[$Index]}, $ColPairsToAnalyze[$ColIndex + 1];
 741             }
 742             # Set up unique columns map as well...
 743             for $ColNum (@ColPairsToAnalyze) {
 744               if (!exists $UniqueColNumsToAnalyzeMap{$ColNum}) {
 745                 $UniqueColNumsToAnalyzeMap{$ColNum} = $ColNum;
 746               }
 747             }
 748           }
 749         }
 750       }
 751       # Setup uniques columns array...
 752       push @{$TextFilesInfo{UniqueColNumsToAnalyze}[$Index]}, (sort keys %UniqueColNumsToAnalyzeMap);
 753     }
 754   }
 755 }
 756 
 757 # Retrieve information about input text files...
 758 sub RetrieveTextFilesInfo {
 759   my($Index, $TextFile, $FileDir, $FileName, $FileExt, $InDelim, $Line, @ColLabels, $OutFileRoot,  $OutFile, $OutFileExt, $ColNum, $ColLabel);
 760 
 761   %TextFilesInfo = ();
 762 
 763   @{$TextFilesInfo{FileOkay}} = ();
 764   @{$TextFilesInfo{ColCount}} = ();
 765   @{$TextFilesInfo{ColLabels}} = ();
 766   @{$TextFilesInfo{ColLabelToNumMap}} = ();
 767   @{$TextFilesInfo{InDelim}} = ();
 768   @{$TextFilesInfo{OutFileRoot}} = ();
 769   @{$TextFilesInfo{OutFileExt}} = ();
 770 
 771   FILELIST: for $Index (0 .. $#TextFilesList) {
 772     $TextFile = $TextFilesList[$Index];
 773 
 774     $TextFilesInfo{FileOkay}[$Index] = 0;
 775     $TextFilesInfo{ColCount}[$Index] = 0;
 776     $TextFilesInfo{InDelim}[$Index] = "";
 777     $TextFilesInfo{OutFileRoot}[$Index] = "";
 778     $TextFilesInfo{OutFileExt}[$Index] = "";
 779 
 780     @{$TextFilesInfo{ColLabels}[$Index]} = ();
 781     %{$TextFilesInfo{ColLabelToNumMap}[$Index]} = ();
 782 
 783     if (!(-e $TextFile)) {
 784       warn "Warning: Ignoring file $TextFile: It doesn't exist\n";
 785       next FILELIST;
 786     }
 787     if (!CheckFileType($TextFile, "csv tsv")) {
 788       warn "Warning: Ignoring file $TextFile: It's not a csv or tsv file\n";
 789       next FILELIST;
 790     }
 791     ($FileDir, $FileName, $FileExt) = ParseFileName($TextFile);
 792     if ($FileExt =~ /^tsv$/i) {
 793       $InDelim = "\t";
 794     }
 795     else {
 796       $InDelim = "\,";
 797       if ($Options{indelim} !~ /^(comma|semicolon)$/i) {
 798         warn "Warning: Ignoring file $TextFile: The value specified, $Options{indelim}, for option \"--indelim\" is not valid for csv files\n";
 799         next FILELIST;
 800       }
 801       if ($Options{indelim} =~ /^semicolon$/i) {
 802         $InDelim = "\;";
 803       }
 804     }
 805 
 806     if (!open TEXTFILE, "$TextFile") {
 807       warn "Warning: Ignoring file $TextFile: Couldn't open it: $! \n";
 808       next FILELIST;
 809     }
 810 
 811     $Line = GetTextLine(\*TEXTFILE);
 812     @ColLabels = quotewords($InDelim, 0, $Line);
 813     close TEXTFILE;
 814 
 815     $FileDir = ""; $FileName = ""; $FileExt = "";
 816     ($FileDir, $FileName, $FileExt) = ParseFileName($TextFile);
 817     $FileExt = "csv";
 818     if ($Options{outdelim} =~ /^tab$/i) {
 819       $FileExt = "tsv";
 820     }
 821     $OutFileExt = $FileExt;
 822     if ($Options{root} && (@TextFilesList == 1)) {
 823       my ($RootFileDir, $RootFileName, $RootFileExt) = ParseFileName($Options{root});
 824       if ($RootFileName && $RootFileExt) {
 825         $FileName = $RootFileName;
 826       }
 827       else {
 828         $FileName = $Options{root};
 829       }
 830       $OutFileRoot = $FileName;
 831     }
 832     else {
 833       $OutFileRoot = $FileName;
 834     }
 835     $OutFile = $OutFileRoot . $OptionsInfo{FileNameMode} . ".$OutFileExt";
 836 
 837     if (lc($OutFile) eq lc($TextFile)) {
 838       warn "Warning: Ignoring file $TextFile:Output file name, $OutFile, is same as input text file name, $TextFile\n";
 839       next FILELIST;
 840     }
 841     if (!$Options{overwrite}) {
 842       if (-e $OutFile) {
 843         warn "Warning: Ignoring file $TextFile: The file $OutFile already exists\n";
 844         next FILELIST;
 845       }
 846       if (exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{covariance}) || exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{correlation}) || exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{rsquare})) {
 847         if ($OptionsInfo{AllColumnPairs}) {
 848           if (exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{covariance}) && (-e "${OutFileRoot}CovarianceMatrix.${FileExt}")) {
 849             warn "Warning: Ignoring file $TextFile: The file ${OutFileRoot}Covariance.${FileExt} already exists.\n";
 850             next FILELIST;
 851           }
 852           if (exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{correlation}) && (-e "${OutFileRoot}CorrelationMatrix.${FileExt}")) {
 853             warn "Warning: Ignoring file $TextFile: The file ${OutFileRoot}CorrelationMatrix.${FileExt} already exists.\n";
 854             next FILELIST;
 855           }
 856           if (exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{rsquare}) && (-e "${OutFileRoot}RSquareMatrix.${FileExt}")) {
 857             warn "Warning: Ignoring file $TextFile: The file ${OutFileRoot}RSquareMatrix.${FileExt} already exists.\n";
 858             next FILELIST;
 859           }
 860         }
 861         else {
 862           if (-e "${OutFileRoot}ColumnPairsAnalysis.${FileExt}") {
 863             warn "Warning: Ignoring file $TextFile: The file ${OutFileRoot}ColumnPairsAnalysis.${FileExt} already exists.\n";
 864             next FILELIST;
 865           }
 866         }
 867       }
 868       if (exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{standardscores}) && (-e "${OutFileRoot}StandardScores.${FileExt}")) {
 869         warn "Warning: Ignoring file $TextFile: The file ${OutFileRoot}StandardScores.${FileExt} already exists.\n";
 870         next FILELIST;
 871       }
 872     }
 873 
 874     $TextFilesInfo{FileOkay}[$Index] = 1;
 875     $TextFilesInfo{InDelim}[$Index] = $InDelim;
 876     $TextFilesInfo{OutFileRoot}[$Index] = "$OutFileRoot";
 877     $TextFilesInfo{OutFileExt}[$Index] = "$OutFileExt";
 878 
 879     $TextFilesInfo{ColCount}[$Index] = @ColLabels;
 880     push @{$TextFilesInfo{ColLabels}[$Index]}, @ColLabels;
 881     for $ColNum (0 .. $#ColLabels) {
 882       $ColLabel = $ColLabels[$ColNum];
 883       $TextFilesInfo{ColLabelToNumMap}[$Index]{$ColLabel} = $ColNum;
 884     }
 885   }
 886 }
 887 
 888 # Process option values...
 889 sub ProcessOptions {
 890   %OptionsInfo = ();
 891 
 892   $OptionsInfo{Mode} = $Options{mode};
 893 
 894   $OptionsInfo{DetailLevel} = $Options{detail};
 895 
 896   # Setup supported statistical functions...
 897   my($SupportedFunction, @SupportedStatisticaFunctions, %SupportedStatisticaFunctionsMap);
 898   %SupportedStatisticaFunctionsMap = ();
 899   @SupportedStatisticaFunctions = qw(Average AverageDeviation Correlation Count Covariance GeometricMean Frequency HarmonicMean KLargest KSmallest Kurtosis Maximum Minimum Mean Median Mode RSquare Skewness Sum SumOfSquares StandardDeviation StandardDeviationN StandardError StandardScores StandardScoresN TrimMean Variance VarianceN);
 900 
 901   for $SupportedFunction (@SupportedStatisticaFunctions) {
 902     $SupportedStatisticaFunctionsMap{lc($SupportedFunction)} = $SupportedFunction;
 903   }
 904 
 905   # Setup a list of functions to use for analysis...
 906   my($SpecifiedFunction);
 907   %{$OptionsInfo{SpecifiedStatisticalFunctionsMap}} = ();
 908   @{$OptionsInfo{SpecifiedStatisticalFunctions}} = ();
 909   # Check mode values...
 910   if ($Options{mode} =~ /^DescriptiveStatisticsBasic$/i ) {
 911     $OptionsInfo{FileNameMode} = "DescriptiveStatisticsBasic";
 912     @{$OptionsInfo{SpecifiedStatisticalFunctions}} = qw(Count Maximum Minimum Mean Median StandardDeviation StandardError Variance Sum);
 913   }
 914   elsif ($Options{mode} =~ /^DescriptiveStatisticsAll$/i ) {
 915     $OptionsInfo{FileNameMode} = "DescriptiveStatisticsAll";
 916     @{$OptionsInfo{SpecifiedStatisticalFunctions}} = qw(Count Maximum Minimum Mean GeometricMean HarmonicMean TrimMean Median Mode StandardDeviation Kurtosis Skewness StandardError Variance  RSquare Frequency  KLargest KSmallest Sum);
 917   }
 918   elsif ($Options{mode} =~ /^All$/i ) {
 919     $OptionsInfo{FileNameMode} = "AllStatistics";
 920     @{$OptionsInfo{SpecifiedStatisticalFunctions}} = @SupportedStatisticaFunctions;
 921   }
 922   else {
 923     $OptionsInfo{FileNameMode} = "SpecifiedStatistics";
 924     # Comma delimited list of functions...
 925     my($Mode, @SpecifiedFunctions, @UnsupportedSpecifiedFunctions);
 926     $Mode = $Options{mode};
 927     $Mode =~ s/ //g;
 928     @SpecifiedFunctions = split ",", $Mode;
 929     @UnsupportedSpecifiedFunctions = ();
 930     for $SpecifiedFunction (@SpecifiedFunctions) {
 931       if (exists($SupportedStatisticaFunctionsMap{lc($SpecifiedFunction)})) {
 932         push @{$OptionsInfo{SpecifiedStatisticalFunctions}}, $SpecifiedFunction;
 933       }
 934       else {
 935         push @UnsupportedSpecifiedFunctions, $SpecifiedFunction;
 936       }
 937     }
 938     if (@UnsupportedSpecifiedFunctions) {
 939       if (@UnsupportedSpecifiedFunctions > 1) {
 940         warn "Error: The values specified - ", JoinWords(\@UnsupportedSpecifiedFunctions, ", ", 0)," - for option \"-m --mode\" are not valid.\n";
 941       }
 942       else {
 943         warn "Error: The value specified, @UnsupportedSpecifiedFunctions , for option \"-m --mode\" is not valid.\n";
 944       }
 945       die "Allowed values:", JoinWords(\@SupportedStatisticaFunctions, ", ", 0), "\n";
 946     }
 947   }
 948   FUNCTION: for $SpecifiedFunction (@{$OptionsInfo{SpecifiedStatisticalFunctions}}) {
 949     if (exists $OptionsInfo{SpecifiedStatisticalFunctionsMap}{lc($SpecifiedFunction)} ) {
 950       next FUNCTION;
 951     }
 952     $OptionsInfo{SpecifiedStatisticalFunctionsMap}{lc($SpecifiedFunction)} = $SupportedStatisticaFunctionsMap{lc($SpecifiedFunction)};
 953   }
 954 
 955   $OptionsInfo{OutDelim} = ($Options{outdelim} =~ /tab/i ) ? "\t" : (($Options{outdelim} =~ /semicolon/i) ? "\;" : "\,");
 956   $OptionsInfo{OutQuote} = ($Options{quote} =~ /yes/i ) ? 1 : 0;
 957 
 958   $OptionsInfo{Overwrite} = defined $Options{overwrite} ? $Options{overwrite} : undef;
 959   $OptionsInfo{Root} = defined $Options{root} ? $Options{root} : undef;
 960 
 961   $OptionsInfo{CheckData} = $Options{fast} ? 0 : 1;
 962   $OptionsInfo{Precision} = $Options{precision};
 963 
 964   $OptionsInfo{KLargest} = $Options{klargest};
 965   $OptionsInfo{KSmallest} = $Options{ksmallest};
 966 
 967   $OptionsInfo{TrimFraction} = $Options{trimfraction};
 968 
 969   # Setup frequency bin values...
 970   $OptionsInfo{NumOfBins} = 10;
 971   @{$OptionsInfo{BinRange}} = ();
 972   if ($Options{frequencybins} =~ /\,/) {
 973     my($BinValue, @SpecifiedBinRange);
 974     @SpecifiedBinRange = split /\,/,  $Options{frequencybins};
 975     if (@SpecifiedBinRange < 2) {
 976       die "Error: The value specified, $Options{frequencybins}, for option \"--frequencybins\" is not valid: Must contain at least two values. \n";
 977     }
 978     for $BinValue (@SpecifiedBinRange) {
 979       if (!IsNumerical($BinValue)) {
 980         die "Error: The value specified, $Options{frequencybins}, for option \"--frequencybins\" is not valid: Contains non numeric values. \n";
 981       }
 982     }
 983     my($Index1, $Index2);
 984     for $Index1 (0 .. $#SpecifiedBinRange) {
 985       for $Index2 (($Index1 + 1) .. $#SpecifiedBinRange) {
 986         if ($SpecifiedBinRange[$Index1] >= $SpecifiedBinRange[$Index2]) {
 987           die "Error: The value specified, $Options{frequencybins}, for option \"--frequencybins\" is not valid: Must contain values in ascending order. \n";
 988         }
 989       }
 990     }
 991     push @{$OptionsInfo{BinRange}}, @SpecifiedBinRange;
 992   }
 993   else {
 994     $OptionsInfo{NumOfBins} = $Options{frequencybins};
 995     if (!IsPositiveInteger($OptionsInfo{NumOfBins})) {
 996       die "Error: The value specified, $Options{frequencybins}, for option \"--frequencybins\" is not valid. Allowed values: positive integer or \"number,number,[number]...\". \n";
 997     }
 998   }
 999 
1000   # Setup specified columns...
1001   $OptionsInfo{ColMode} = $Options{colmode};
1002   $OptionsInfo{Columns} = defined $Options{columns} ? $Options{columns} : undef;
1003 
1004   @{$OptionsInfo{SpecifiedColumns}} = ();
1005   if (defined $Options{columns} && $Options{columns} !~ /^All$/i) {
1006     my(@SpecifiedValues) = split ",", $Options{columns};
1007     if ($Options{colmode} =~ /^colnum$/i) {
1008       my($ColValue);
1009       for $ColValue (@SpecifiedValues) {
1010         if (!IsPositiveInteger($ColValue)) {
1011           die "Error: Column value, $ColValue, specified using \"--columns\" is not valid: Allowed integer values: > 0.\n";
1012         }
1013       }
1014     }
1015     push @{$OptionsInfo{SpecifiedColumns}}, @SpecifiedValues;
1016   }
1017   @{$OptionsInfo{SpecifiedColumnPairs}} = ();
1018   $OptionsInfo{AllColumnPairs} = (defined($Options{columnpairs}) && $Options{columnpairs} =~ /^AllPairs$/i) ? 1 : 0;
1019   if (defined($Options{columnpairs}) && !$OptionsInfo{AllColumnPairs}) {
1020     my(@SpecifiedValues) = split ",", $Options{columnpairs};
1021     if (@SpecifiedValues % 2) {
1022       die "Error: Invalid number of values specified using \"--columnpairs\" option: It must contain even number of values.\n";
1023     }
1024     if ($Options{colmode} =~ /^colnum$/i) {
1025       my($ColValue);
1026       for $ColValue (@SpecifiedValues) {
1027         if (!IsPositiveInteger($ColValue)) {
1028           die "Error: Column value, $ColValue, specified using \"--columnpairs\" is not valid: Allowed integer values: > 0.\n";
1029         }
1030       }
1031     }
1032     push @{$OptionsInfo{SpecifiedColumnPairs}}, @SpecifiedValues;
1033   }
1034 
1035 }
1036 
1037 # Setup script usage  and retrieve command line arguments specified using various options...
1038 sub SetupScriptUsage {
1039 
1040   # Retrieve all the options...
1041   %Options = ();
1042   $Options{colmode} = "colnum";
1043   $Options{detail} = 1;
1044   $Options{indelim} = "comma";
1045   $Options{frequencybins} = 10;
1046   $Options{klargest} = 2;
1047   $Options{ksmallest} = 2;
1048   $Options{mode} = "DescriptiveStatisticsBasic";
1049   $Options{outdelim} = "comma";
1050   $Options{precision} = 2;
1051   $Options{quote} = "yes";
1052   $Options{trimfraction} = 0.1;
1053 
1054   if (!GetOptions(\%Options, "colmode|c=s", "columns=s", "columnpairs=s", "detail|d=i", "frequencybins=s", "fast|f", "help|h", "indelim=s", "klargest=i", "ksmallest=i", "mode|m=s", "outdelim=s", "overwrite|o", "precision|p=i", "quote|q=s", "root|r=s", "trimfraction=f", "workingdir|w=s")) {
1055     die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n";
1056   }
1057   if ($Options{workingdir}) {
1058     if (! -d $Options{workingdir}) {
1059       die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n";
1060     }
1061     chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n";
1062   }
1063   if ($Options{colmode} !~ /^(colnum|collabel)$/i) {
1064     die "Error: The value specified, $Options{colmode}, for option \"-c --colmode\" is not valid. Allowed values: colnum or collabel\n";
1065   }
1066   if (!IsPositiveInteger($Options{detail})) {
1067     die "Error: The value specified, $Options{detail}, for option \"-d --detail\" is not valid. Allowed values: > 0\n";
1068   }
1069   if ($Options{indelim} !~ /^(comma|semicolon)$/i) {
1070     die "Error: The value specified, $Options{indelim}, for option \"--indelim\" is not valid. Allowed values: comma or semicolon\n";
1071   }
1072   if ($Options{outdelim} !~ /^(comma|semicolon|tab)$/i) {
1073     die "Error: The value specified, $Options{outdelim}, for option \"--outdelim\" is not valid. Allowed values: comma, tab, or semicolon\n";
1074   }
1075   if ($Options{quote} !~ /^(yes|no)$/i) {
1076     die "Error: The value specified, $Options{quote}, for option \"-q --quote\" is not valid. Allowed values: yes or no\n";
1077   }
1078   if (!IsPositiveInteger($Options{precision})) {
1079     die "Error: The value specified, $Options{precision}, for option \"-p --precision\" is not valid. Allowed values: > 0 \n";
1080   }
1081   if (!IsPositiveInteger($Options{klargest})) {
1082     die "Error: The value specified, $Options{klargest}, for option \"--klargest\" is not valid. Allowed values: > 0 \n";
1083   }
1084   if (!IsPositiveInteger($Options{ksmallest})) {
1085     die "Error: The value specified, $Options{ksmallest}, for option \"--ksmallest\" is not valid. Allowed values: > 0 \n";
1086   }
1087   if (IsFloat($Options{trimfraction})) {
1088     if ($Options{trimfraction} <= 0 || $Options{trimfraction} >= 1.0) {
1089       die "Error: The value specified, $Options{trimfraction}, for option \"--trimfraction\" is not valid. Allowed values: > 0 and < 1.0\n";
1090     }
1091   }
1092   else {
1093     die "Error: The value specified, $Options{trimfraction}, for option \"--trimfraction\" is not valid. Allowed values: > 0 and < 1.0\n";
1094   }
1095 }
1096