annotate bin/AnalyzeTextFilesData.pl @ 0:4816e4a8ae95 draft default tip

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 09:23:18 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1 #!/usr/bin/perl -w
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
2 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
3 # $RCSfile: AnalyzeTextFilesData.pl,v $
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
4 # $Date: 2015/02/28 20:46:04 $
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
5 # $Revision: 1.36 $
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
6 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
7 # Author: Manish Sud <msud@san.rr.com>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
8 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
10 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
11 # This file is part of MayaChemTools.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
12 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
14 # the terms of the GNU Lesser General Public License as published by the Free
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
15 # Software Foundation; either version 3 of the License, or (at your option) any
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
16 # later version.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
17 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
18 # MayaChemTools is distributed in the hope that it will be useful, but without
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
19 # any warranty; without even the implied warranty of merchantability of fitness
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
20 # for a particular purpose. See the GNU Lesser General Public License for more
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
21 # details.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
22 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
23 # You should have received a copy of the GNU Lesser General Public License
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
26 # Boston, MA, 02111-1307, USA.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
27 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
28
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
29 use strict;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
30 use FindBin; use lib "$FindBin::Bin/../lib";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
31 use Getopt::Long;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
32 use File::Basename;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
33 use Text::ParseWords;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
34 use Benchmark;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
35 use FileUtil;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
36 use TextUtil;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
37 use StatisticsUtil;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
38
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
39 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
40
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
41 # Autoflush STDOUT
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
42 $| = 1;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
43
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
44 # Starting message...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
45 $ScriptName = basename($0);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
46 print "\n$ScriptName: Starting...\n\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
47 $StartTime = new Benchmark;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
48
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
49 # Get the options and setup script...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
50 SetupScriptUsage();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
51 if ($Options{help} || @ARGV < 1) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
52 die GetUsageFromPod("$FindBin::Bin/$ScriptName");
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
53 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
54
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
55 my(@TextFilesList);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
56 @TextFilesList = ExpandFileNames(\@ARGV, "csv tsv");
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
57
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
58 print "Processing options...\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
59 my(%OptionsInfo);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
60 ProcessOptions();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
61
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
62 # Collect column information for all the text files...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
63 print "Checking input text file(s)...\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
64 my(%TextFilesInfo);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
65 RetrieveTextFilesInfo();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
66 ProcessColumnsInfo();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
67
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
68 # Generate output files...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
69 my($FileIndex);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
70 if (@TextFilesList > 1) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
71 print "\nProcessing text files...\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
72 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
73 for $FileIndex (0 .. $#TextFilesList) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
74 if ($TextFilesInfo{FileOkay}[$FileIndex]) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
75 print "\nProcessing file $TextFilesList[$FileIndex]...\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
76 AnalyzeTextFile($FileIndex);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
77 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
78 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
79 print "\n$ScriptName:Done...\n\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
80
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
81 $EndTime = new Benchmark;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
82 $TotalTime = timediff ($EndTime, $StartTime);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
83 print "Total time: ", timestr($TotalTime), "\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
84
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
85 ###############################################################################
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
86
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
87 # Analyze data...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
88 sub AnalyzeTextFile {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
89 my($Index) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
90 my($TextFile, $Line, $InDelim, $ColNum, $Value, @LineWords, @ColNumsToAnalyze, %ColValuesToAnalyzeMap);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
91
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
92 $TextFile = $TextFilesList[$Index];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
93 $InDelim = $TextFilesInfo{InDelim}[$Index];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
94 @ColNumsToAnalyze = @{$TextFilesInfo{UniqueColNumsToAnalyze}[$Index]};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
95 %ColValuesToAnalyzeMap = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
96 for $ColNum (@ColNumsToAnalyze) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
97 @{$ColValuesToAnalyzeMap{$ColNum}} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
98 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
99
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
100 my($LineCount, $InvalidLineCount, @InvalidColLabels);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
101
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
102 open TEXTFILE, "$TextFile" or die "Error: Can't open $TextFile: $! \n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
103 # Skip over column labels line in text file and collect appropriate column data
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
104 # for analysis...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
105 $Line = GetTextLine(\*TEXTFILE);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
106 $LineCount = 1;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
107 $InvalidLineCount = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
108 while ($Line = GetTextLine(\*TEXTFILE)) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
109 $LineCount++;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
110 @LineWords = quotewords($InDelim, 0, $Line);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
111 @InvalidColLabels = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
112 COLNUM: for $ColNum (@ColNumsToAnalyze) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
113 $Value = $LineWords[$ColNum];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
114 if ($OptionsInfo{CheckData}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
115 if (!IsNumerical($Value)) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
116 push @InvalidColLabels, $TextFilesInfo{ColLabels}[$Index][$ColNum];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
117 next COLNUM;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
118 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
119 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
120 push @{$ColValuesToAnalyzeMap{$ColNum}}, $Value;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
121 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
122 if (@InvalidColLabels) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
123 $InvalidLineCount++;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
124 if ($OptionsInfo{DetailLevel} >=4 ) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
125 print "Line number $LineCount contains ", scalar(@InvalidColLabels)," non-numerical or empty value(s) for column(s) - ", JoinWords(\@InvalidColLabels, ", ", 0)," - to be analyzed: $Line \n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
126 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
127 elsif ($OptionsInfo{DetailLevel} >= 3) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
128 print "Line number $LineCount contains ", scalar(@InvalidColLabels)," non-numerical or empty value(s) for column(s) - ", JoinWords(\@InvalidColLabels, ", ", 0)," - to be analyzed...\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
129 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
130 elsif ($OptionsInfo{DetailLevel} >= 2) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
131 print "Line number $LineCount contains ", scalar(@InvalidColLabels)," non-numerical or empty value(s) for columns to be analyzed...\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
132 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
133 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
134 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
135 if ($InvalidLineCount && ($OptionsInfo{DetailLevel} >= 1)) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
136 print "Non-numerical or empty data present in $InvalidLineCount line(s)...\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
137 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
138 close TEXTFILE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
139
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
140 # Perform the analysis...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
141 my(@SpecifiedFunctionNames, $SpecifiedFunction);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
142 @SpecifiedFunctionNames = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
143
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
144 for $SpecifiedFunction (@{$OptionsInfo{SpecifiedStatisticalFunctions}}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
145 if ($SpecifiedFunction !~ /^(Covariance|Correlation|Frequency|Rsquare|StandardScores|StandardScoresN)$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
146 push @SpecifiedFunctionNames, $OptionsInfo{SpecifiedStatisticalFunctionsMap}{lc($SpecifiedFunction)};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
147 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
148 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
149 if (@SpecifiedFunctionNames) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
150 PerformAnalysis($Index, \@SpecifiedFunctionNames, \%ColValuesToAnalyzeMap)
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
151 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
152 if (exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{covariance}) || exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{correlation}) || exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{rsquare})) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
153 if ($OptionsInfo{AllColumnPairs}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
154 PerformMatrixAnalysis($Index, \%ColValuesToAnalyzeMap);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
155 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
156 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
157 # Perform pairwise analysis for specified columns and write out calculated values - correlation
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
158 # rsquare, or covariance - in the same file.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
159 PerformColumnPairAnalysis($Index, \%ColValuesToAnalyzeMap);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
160 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
161 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
162 if (exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{standardscores}) || exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{standardscoresn}) ) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
163 PerformStandardScoresAnalysis($Index, \%ColValuesToAnalyzeMap);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
164 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
165 if (exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{frequency})) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
166 PerformFrequencyAnalysis($Index, \%ColValuesToAnalyzeMap);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
167 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
168 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
169
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
170 # Calculate values for various statistical functions...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
171 sub PerformAnalysis {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
172 my($Index, $SpecifiedFunctionNamesRef, $ColValuesToAnalyzeMapRef) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
173 my($NewTextFile, $Line, $SpecifiedFunction, $Label, @ColLabels, @ColNumsToAnalyze);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
174
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
175 $NewTextFile = $TextFilesInfo{OutFileRoot}[$Index] . $OptionsInfo{FileNameMode} . "." . $TextFilesInfo{OutFileExt}[$Index];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
176
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
177 print "Generating new text file $NewTextFile...\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
178 open NEWTEXTFILE, ">$NewTextFile" or die "Error: Can't open $NewTextFile: $! \n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
179
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
180 # Write out column labels...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
181 @ColLabels = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
182 push @ColLabels, "ColumnID";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
183 for $SpecifiedFunction (@{$SpecifiedFunctionNamesRef}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
184 $Label = $SpecifiedFunction;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
185 if ($SpecifiedFunction =~ /^(KLargest|KSmallest)$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
186 my($KthValue);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
187 $KthValue = ($SpecifiedFunction =~ /^KLargest$/i) ? $OptionsInfo{KLargest} : $OptionsInfo{KSmallest};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
188 $Label = AddNumberSuffix($KthValue) . "$SpecifiedFunction";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
189 $Label =~ s/K//g;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
190 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
191 elsif ($SpecifiedFunction =~ /^TrimMean$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
192 $Label = "${SpecifiedFunction}($OptionsInfo{TrimFraction})";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
193 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
194 push @ColLabels, $Label;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
195 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
196 $Line = JoinWords(\@ColLabels, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
197 print NEWTEXTFILE "$Line\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
198
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
199 # Go over each column to be analyzed...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
200 @ColNumsToAnalyze = @{$TextFilesInfo{ColNumsToAnalyze}[$Index]};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
201
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
202 # Turn off "strict"; otherwise, invoking statistical functions using function name string
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
203 # is problematic.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
204 no strict;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
205
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
206 my($ColValuesRef, $ColNum, $Value, @RowValues, %CalculatedValues);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
207 %CalculatedValues = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
208 for $ColNum (@ColNumsToAnalyze) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
209 @RowValues = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
210 # Setup column id...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
211 push @RowValues, $TextFilesInfo{ColLabels}[$Index][$ColNum];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
212 $ColValuesRef = \@{$ColValuesToAnalyzeMapRef->{$ColNum}};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
213 FUNCTIONNAME: for $SpecifiedFunction (@{$SpecifiedFunctionNamesRef}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
214 $Value = "";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
215 if (!@{$ColValuesToAnalyzeMapRef->{$ColNum}}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
216 # Invalid column values...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
217 push @RowValues, $Value;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
218 next FUNCTIONNAME;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
219 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
220 if ($SpecifiedFunction =~ /^Count$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
221 $Value = @{$ColValuesToAnalyzeMapRef->{$ColNum}};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
222 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
223 elsif ($SpecifiedFunction =~ /^KLargest$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
224 $Value = &$SpecifiedFunction($ColValuesRef, $OptionsInfo{KLargest});
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
225 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
226 elsif ($SpecifiedFunction =~ /^KSmallest$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
227 $Value = &$SpecifiedFunction($ColValuesRef, $OptionsInfo{KSmallest});
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
228 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
229 elsif ($SpecifiedFunction =~ /^StandardDeviation$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
230 if (exists($CalculatedValues{$ColNum}{StandardDeviation})) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
231 $Value = $CalculatedValues{$ColNum}{StandardDeviation};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
232 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
233 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
234 $Value = &$SpecifiedFunction($ColValuesRef);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
235 $CalculatedValues{$ColNum}{StandardDeviation} = $Value;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
236 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
237 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
238 elsif ($SpecifiedFunction =~ /^StandardError$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
239 if (!exists($CalculatedValues{$ColNum}{StandardDeviation})) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
240 $Value = StandardDeviation($ColValuesRef);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
241 $CalculatedValues{$ColNum}{StandardDeviation} = $Value;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
242 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
243 if (defined $CalculatedValues{$ColNum}{StandardDeviation}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
244 $Value = &$SpecifiedFunction($CalculatedValues{$ColNum}{StandardDeviation}, @{$ColValuesToAnalyzeMapRef->{$ColNum}});
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
245 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
246 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
247 elsif ($SpecifiedFunction =~ /^TrimMean$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
248 $Value = &$SpecifiedFunction($ColValuesRef, $OptionsInfo{TrimFraction});
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
249 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
250 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
251 $Value = &$SpecifiedFunction($ColValuesRef);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
252 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
253 # Format the output value. And add zero to get rid of tariling zeros...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
254 $Value = (defined($Value) && length($Value)) ? (sprintf("%.$OptionsInfo{Precision}f", $Value) + 0) : "";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
255 push @RowValues, $Value;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
256 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
257 $Line = JoinWords(\@RowValues, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
258 print NEWTEXTFILE "$Line\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
259 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
260 close NEWTEXTFILE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
261 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
262
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
263 # Calculate covariance, correlation, rsquare for specified column pairs....
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
264 sub PerformColumnPairAnalysis {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
265 my($Index, $ColValuesToAnalyzeMapRef) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
266 my($NewTextFile, @ColLabels, $Line, $CalculateCorrelation, $CalculateRSquare, $CalculateCovariance);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
267 $CalculateCorrelation = exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{correlation}) ? 1 : 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
268 $CalculateRSquare = exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{rsquare}) ? 1 : 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
269 $CalculateCovariance = exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{covariance}) ? 1 : 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
270
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
271 $NewTextFile = $TextFilesInfo{OutFileRoot}[$Index] . "ColumnPairsAnalysis." . $TextFilesInfo{OutFileExt}[$Index];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
272 print "Generating new text file $NewTextFile...\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
273 open NEWTEXTFILE, ">$NewTextFile" or die "Error: Can't open $NewTextFile: $! \n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
274
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
275 # Write out the column labels...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
276 @ColLabels = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
277 push @ColLabels, ("ColumnID1", "ColumnID2");
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
278 if ($CalculateCorrelation || $CalculateRSquare) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
279 push @ColLabels, "Correlation";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
280 if ($CalculateRSquare) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
281 push @ColLabels, "RSquare";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
282 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
283 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
284 if ($CalculateCovariance) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
285 push @ColLabels, "Covariance";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
286 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
287 $Line = JoinWords(\@ColLabels, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
288 print NEWTEXTFILE "$Line\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
289
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
290 # Go over each column pair...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
291 my($CorrelationValue, $RSquareValue, $CovarianceValue, $ColIndex, $ColNum1, $ColNum2, $ColValuesRef1, $ColValuesRef2, @ColPairs1ToAnalyze, @ColPairs2ToAnalyze, @RowValues, $Value);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
292
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
293 @ColPairs1ToAnalyze = @{$TextFilesInfo{ColPairs1ToAnalyze}[$Index]};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
294 @ColPairs2ToAnalyze = @{$TextFilesInfo{ColPairs2ToAnalyze}[$Index]};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
295 for $ColIndex (0 .. $#ColPairs1ToAnalyze) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
296 @RowValues = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
297 $ColNum1 = $ColPairs1ToAnalyze[$ColIndex];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
298 $ColNum2 = $ColPairs2ToAnalyze[$ColIndex];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
299 $ColValuesRef1 = \@{$ColValuesToAnalyzeMapRef->{$ColNum1}};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
300 $ColValuesRef2 = \@{$ColValuesToAnalyzeMapRef->{$ColNum2}};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
301
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
302 # Setup column ids...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
303 push @RowValues, $TextFilesInfo{ColLabels}[$Index][$ColNum1];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
304 push @RowValues, $TextFilesInfo{ColLabels}[$Index][$ColNum2];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
305
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
306 if (@$ColValuesRef1 != @$ColValuesRef2) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
307 # Print a warning...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
308 warn "Warning: Skipping analysis for column pair $TextFilesInfo{ColLabels}[$Index][$ColNum1], $TextFilesInfo{ColLabels}[$Index][$ColNum2]: Number of valid data values must be same.\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
309 if ($CalculateCorrelation || $CalculateRSquare) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
310 push @RowValues, "";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
311 if ($CalculateRSquare) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
312 push @RowValues, "";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
313 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
314 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
315 if ($CalculateCovariance) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
316 push @RowValues, "";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
317 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
318 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
319 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
320 # Calculate appropriate value...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
321 if ($CalculateCorrelation || $CalculateRSquare) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
322 $CorrelationValue = Correlation($ColValuesRef1, $ColValuesRef2);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
323 $Value = (defined($CorrelationValue) && length($CorrelationValue)) ? (sprintf("%.$OptionsInfo{Precision}f", $CorrelationValue) + 0) : "";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
324 push @RowValues, $Value;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
325 if ($CalculateRSquare) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
326 $RSquareValue = (defined($CorrelationValue) && length($CorrelationValue)) ? ($CorrelationValue ** 2) : "";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
327 $Value = (length($RSquareValue)) ? (sprintf("%.$OptionsInfo{Precision}f", $RSquareValue) + 0) : "";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
328 push @RowValues, $Value;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
329 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
330 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
331 if ($CalculateCovariance) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
332 $CovarianceValue = Covariance($ColValuesRef1, $ColValuesRef2);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
333 $Value = (defined($CovarianceValue) && length($CovarianceValue)) ? (sprintf("%.$OptionsInfo{Precision}f", $CovarianceValue) + 0) : "";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
334 push @RowValues, $Value;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
335 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
336 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
337 $Line = JoinWords(\@RowValues, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
338 print NEWTEXTFILE "$Line\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
339 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
340 close NEWTEXTFILE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
341 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
342
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
343 # Generate histogram numbers...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
344 sub PerformFrequencyAnalysis {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
345 my($Index, $ColValuesToAnalyzeMapRef) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
346 my($NewTextFile, $ColLabel, @ColLabels, @RowValues, $Line, $ColNum, @ColNumsToAnalyze, $ColValuesRef, $BinValue, $FrequencyValue, $Value, %FrequencyMap);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
347
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
348 @ColNumsToAnalyze = @{$TextFilesInfo{ColNumsToAnalyze}[$Index]};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
349 for $ColNum (@ColNumsToAnalyze) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
350 $NewTextFile = $TextFilesInfo{OutFileRoot}[$Index] . $TextFilesInfo{ColLabels}[$Index][$ColNum] . "FrequencyAnalysis." . $TextFilesInfo{OutFileExt}[$Index];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
351 print "Generating new text file $NewTextFile...\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
352 open NEWTEXTFILE, ">$NewTextFile" or die "Error: Can't open $NewTextFile: $! \n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
353
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
354 # Write out the column labels...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
355 @ColLabels = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
356 push @ColLabels , ("Bins", "Frequency");
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
357 $Line = JoinWords(\@ColLabels, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
358 print NEWTEXTFILE "$Line\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
359
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
360 #Calculate and write out frequency values...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
361 %FrequencyMap = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
362 $ColValuesRef = \@{$ColValuesToAnalyzeMapRef->{$ColNum}};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
363 if (@$ColValuesRef) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
364 if (@{$OptionsInfo{BinRange}}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
365 %FrequencyMap = Frequency($ColValuesRef, \@{$OptionsInfo{BinRange}});
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
366 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
367 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
368 %FrequencyMap = Frequency($ColValuesRef, $OptionsInfo{NumOfBins});
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
369 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
370 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
371 for $BinValue (sort { $a <=> $b } keys %FrequencyMap) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
372 $FrequencyValue = $FrequencyMap{$BinValue};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
373
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
374 @RowValues = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
375 $Value = (length($BinValue)) ? (sprintf("%.$OptionsInfo{Precision}f", $BinValue) + 0) : "";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
376 push @RowValues, $Value;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
377 $Value = (length($FrequencyValue)) ? (sprintf("%.$OptionsInfo{Precision}f", $FrequencyValue) + 0) : "";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
378 push @RowValues, $Value;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
379
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
380 $Line = JoinWords(\@RowValues, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
381 print NEWTEXTFILE "$Line\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
382 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
383 close NEWTEXTFILE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
384 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
385 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
386
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
387 # Calculate covariance, correlation/rsquare matrices....
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
388 sub PerformMatrixAnalysis {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
389 my($Index, $ColValuesToAnalyzeMapRef) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
390 my($CorrelationTextFile, $CovarianceTextFile, $RSquareTextFile, $CalculateCorrelation, $CalculateRSquare, $CalculateCovariance);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
391
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
392 $CalculateCorrelation = exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{correlation}) ? 1 : 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
393 $CalculateRSquare = exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{rsquare}) ? 1 : 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
394 $CalculateCovariance = exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{covariance}) ? 1 : 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
395
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
396 $CorrelationTextFile = $TextFilesInfo{OutFileRoot}[$Index] . "CorrelationMatrix." . $TextFilesInfo{OutFileExt}[$Index];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
397 $RSquareTextFile = $TextFilesInfo{OutFileRoot}[$Index] . "RSquareMatrix." . $TextFilesInfo{OutFileExt}[$Index];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
398 $CovarianceTextFile = $TextFilesInfo{OutFileRoot}[$Index] . "CovarianceMatrix." . $TextFilesInfo{OutFileExt}[$Index];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
399
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
400 my($TextFilesList, $Delimiter);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
401 $TextFilesList = "";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
402 if ($CalculateCorrelation || $CalculateRSquare) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
403 $TextFilesList = $CorrelationTextFile;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
404 if ($CalculateRSquare) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
405 $TextFilesList .= ", $CorrelationTextFile";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
406 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
407 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
408 $Delimiter = length($TextFilesList) ? "," : "";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
409 if ($CalculateCovariance) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
410 $TextFilesList .= "${Delimiter} ${CorrelationTextFile}";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
411 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
412 if ($TextFilesList =~ /\,/) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
413 print "Generating new text files $TextFilesList...\n"
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
414 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
415 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
416 print "Generating new text file $TextFilesList...\n"
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
417 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
418 if ($CalculateCorrelation || $CalculateRSquare) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
419 open CORRELATIONTEXTFILE, ">$CorrelationTextFile" or die "Error: Can't open $CorrelationTextFile: $! \n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
420 if ($CalculateRSquare) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
421 open RSQUARETEXTFILE, ">$RSquareTextFile" or die "Error: Can't open $RSquareTextFile: $! \n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
422 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
423 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
424 if ($CalculateCovariance) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
425 open COVARIANCETEXTFILE, ">$CovarianceTextFile" or die "Error: Can't open $CovarianceTextFile: $! \n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
426 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
427
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
428 my($Line, $Value, $CorrelationValue, $RSquareValue, $CovarianceValue, $ColNum, $ColNum1, $ColNum2, $ColValuesRef1, $ColValuesRef2, @ColLabels, @CovarianceRowValues, @CorrelationRowValues, @RSquareRowValues);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
429
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
430 # Write out the column labels...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
431 @ColLabels = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
432 push @ColLabels, "";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
433 for $ColNum (0 .. ($TextFilesInfo{ColCount}[$Index] - 1)) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
434 push @ColLabels, $TextFilesInfo{ColLabels}[$Index][$ColNum];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
435 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
436 $Line = JoinWords(\@ColLabels, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
437 if ($CalculateCorrelation || $CalculateRSquare) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
438 print CORRELATIONTEXTFILE "$Line\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
439 if ($CalculateRSquare) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
440 print RSQUARETEXTFILE "$Line\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
441 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
442 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
443 if ($CalculateCovariance) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
444 print COVARIANCETEXTFILE "$Line\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
445 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
446
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
447 # Due to symmetric nature of these matrices, only one half needs to be
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
448 # calculated. So, just calculate the lower half and copy it to upper half...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
449 my(%CorrelationMatrixMap, %RSquareMatrixMap, %CovarianceMatrixMap);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
450
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
451 %CorrelationMatrixMap = (); %RSquareMatrixMap = (); %CovarianceMatrixMap = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
452 for $ColNum1 (0 .. ($TextFilesInfo{ColCount}[$Index] - 1)) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
453 for $ColNum2 (0 .. $ColNum1) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
454 $ColValuesRef1 = \@{$ColValuesToAnalyzeMapRef->{$ColNum1}};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
455 $ColValuesRef2 = \@{$ColValuesToAnalyzeMapRef->{$ColNum2}};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
456 if ($CalculateCorrelation || $CalculateRSquare) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
457 $CorrelationValue = Correlation($ColValuesRef1, $ColValuesRef2);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
458 $CorrelationValue = (defined($CorrelationValue) && length($CorrelationValue)) ? (sprintf("%.$OptionsInfo{Precision}f", $CorrelationValue) + 0) : "";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
459 $CorrelationMatrixMap{$ColNum1}{$ColNum2} = $CorrelationValue;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
460 if ($ColNum1 != $ColNum2) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
461 $CorrelationMatrixMap{$ColNum2}{$ColNum1} = $CorrelationValue;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
462 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
463 if ($CalculateRSquare) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
464 $RSquareValue = (defined($CorrelationValue) && length($CorrelationValue)) ? ($CorrelationValue ** 2) : "";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
465 $RSquareValue = (length($RSquareValue)) ? (sprintf("%.$OptionsInfo{Precision}f", $RSquareValue) + 0) : "";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
466 $RSquareMatrixMap{$ColNum1}{$ColNum2} = $RSquareValue;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
467 if ($ColNum1 != $ColNum2) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
468 $RSquareMatrixMap{$ColNum2}{$ColNum1} = $RSquareValue;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
469 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
470 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
471 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
472 if ($CalculateCovariance) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
473 $CovarianceValue = Covariance($ColValuesRef1, $ColValuesRef2);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
474 $CovarianceValue = (defined($CovarianceValue) && length($CovarianceValue)) ? (sprintf("%.$OptionsInfo{Precision}f", $CovarianceValue) + 0) : "";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
475 $CovarianceMatrixMap{$ColNum1}{$ColNum2} = $CovarianceValue;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
476 if ($ColNum1 != $ColNum2) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
477 $CovarianceMatrixMap{$ColNum2}{$ColNum1} = $CovarianceValue;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
478 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
479 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
480 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
481 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
482
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
483 # Write out the matrices...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
484 for $ColNum1 (0 .. ($TextFilesInfo{ColCount}[$Index] - 1)) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
485 @CorrelationRowValues = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
486 @RSquareRowValues = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
487 @CovarianceRowValues = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
488 if ($CalculateCorrelation || $CalculateRSquare) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
489 push @CorrelationRowValues, $TextFilesInfo{ColLabels}[$Index][$ColNum1];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
490 if ($CalculateRSquare) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
491 push @RSquareRowValues, $TextFilesInfo{ColLabels}[$Index][$ColNum1];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
492 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
493 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
494 if ($CalculateCovariance) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
495 push @CovarianceRowValues, $TextFilesInfo{ColLabels}[$Index][$ColNum1];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
496 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
497 for $ColNum2 (0 .. ($TextFilesInfo{ColCount}[$Index] - 1)) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
498 if ($CalculateCorrelation || $CalculateRSquare) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
499 push @CorrelationRowValues, $CorrelationMatrixMap{$ColNum1}{$ColNum2};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
500 if ($CalculateRSquare) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
501 push @RSquareRowValues, $RSquareMatrixMap{$ColNum1}{$ColNum2};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
502 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
503 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
504 if ($CalculateCovariance) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
505 push @CovarianceRowValues, $CovarianceMatrixMap{$ColNum1}{$ColNum2};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
506 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
507 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
508 if ($CalculateCorrelation || $CalculateRSquare) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
509 $Line = JoinWords(\@CorrelationRowValues, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
510 print CORRELATIONTEXTFILE "$Line\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
511 if ($CalculateRSquare) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
512 $Line = JoinWords(\@RSquareRowValues, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
513 print RSQUARETEXTFILE "$Line\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
514 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
515 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
516 if ($CalculateCovariance) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
517 $Line = JoinWords(\@CovarianceRowValues, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
518 print COVARIANCETEXTFILE "$Line\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
519 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
520 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
521 if ($CalculateCorrelation || $CalculateRSquare) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
522 close CORRELATIONTEXTFILE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
523 if ($CalculateRSquare) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
524 close RSQUARETEXTFILE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
525 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
526 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
527 if ($CalculateCovariance) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
528 close COVARIANCETEXTFILE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
529 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
530 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
531
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
532 # Calculate standard scores...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
533 sub PerformStandardScoresAnalysis {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
534 my($Index, $ColValuesToAnalyzeMapRef) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
535 my($StandardScores, $StandardScoresN, $NewTextFile, @ColLabels, $Label, $NewLine);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
536
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
537 $StandardScores = exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{standardscores}) ? 1 : 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
538 $StandardScoresN = exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{standardscoresn}) ? 1 : 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
539
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
540 $NewTextFile = $TextFilesInfo{OutFileRoot}[$Index] . "StandardScores." . $TextFilesInfo{OutFileExt}[$Index];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
541 print "Generating new text file $NewTextFile...\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
542 open NEWTEXTFILE, ">$NewTextFile" or die "Error: Can't open $NewTextFile: $! \n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
543
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
544 my($ColValuesRef, $ColNum, @ColNumsToAnalyze);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
545 # Write out column labels...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
546 @ColLabels = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
547 @ColNumsToAnalyze = @{$TextFilesInfo{ColNumsToAnalyze}[$Index]};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
548 for $ColNum (@ColNumsToAnalyze) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
549 $Label = $TextFilesInfo{ColLabels}[$Index][$ColNum];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
550 if ($StandardScores) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
551 push @ColLabels, "${Label}\(StandardScores)";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
552 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
553 if ($StandardScoresN) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
554 push @ColLabels, "${Label}\(StandardScoresN)";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
555 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
556 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
557 $NewLine = JoinWords(\@ColLabels, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
558 print NEWTEXTFILE "$NewLine\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
559
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
560 # Go over each column to be analyzed and calculate standard deviation
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
561 # and mean values...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
562 my(%StandardDeviationMap, %StandardDeviationNMap, %MeanMap);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
563 %StandardDeviationMap = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
564 %StandardDeviationNMap = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
565 %MeanMap = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
566 for $ColNum (@ColNumsToAnalyze) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
567 $ColValuesRef = \@{$ColValuesToAnalyzeMapRef->{$ColNum}};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
568 if (!exists($MeanMap{$ColNum})) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
569 $MeanMap{$ColNum} = Mean($ColValuesRef);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
570 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
571 if ($StandardScores) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
572 if (!exists($StandardDeviationMap{$ColNum})) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
573 $StandardDeviationMap{$ColNum} = StandardDeviation($ColValuesRef);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
574 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
575 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
576 if ($StandardScoresN) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
577 if (!exists($StandardDeviationNMap{$ColNum})) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
578 $StandardDeviationNMap{$ColNum} = StandardDeviationN($ColValuesRef);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
579 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
580 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
581 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
582 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
583 # Go over each row and calculate standard scores for each column
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
584 # using (x[i] - mean) / (n - 1) for StandardScores and (x[i] - mean) / n
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
585 # for StandardScoresN; write out the calculated values as well...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
586
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
587 my($TextFile, $InDelim, $Line, $Value, $ValueOkay, $ScoreValue, @RowValues, @LineWords);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
588 $TextFile = $TextFilesList[$Index];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
589 $InDelim = $TextFilesInfo{InDelim}[$Index];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
590
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
591 open TEXTFILE, "$TextFile" or die "Error: Can't open $TextFile: $! \n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
592 $Line = GetTextLine(\*TEXTFILE);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
593 while ($Line = GetTextLine(\*TEXTFILE)) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
594 @LineWords = quotewords($InDelim, 0, $Line);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
595 @RowValues = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
596 COLNUM: for $ColNum (@ColNumsToAnalyze) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
597 $Value = $LineWords[$ColNum];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
598 $ValueOkay = ($OptionsInfo{CheckData} && !IsNumerical($Value)) ? 0 : 1;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
599 if ($StandardScores) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
600 $ScoreValue = $ValueOkay ? (($Value - $MeanMap{$ColNum})/$StandardDeviationMap{$ColNum}) : "";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
601 $ScoreValue = (defined($ScoreValue) && length($ScoreValue)) ? (sprintf("%.$OptionsInfo{Precision}f", $ScoreValue) + 0) : "";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
602 push @RowValues, $ScoreValue;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
603 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
604 if ($StandardScoresN) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
605 $ScoreValue = $ValueOkay ? (($Value - $MeanMap{$ColNum})/$StandardDeviationNMap{$ColNum}) : "";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
606 $ScoreValue = (defined($ScoreValue) && length($ScoreValue)) ? (sprintf("%.$OptionsInfo{Precision}f", $ScoreValue) + 0) : "";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
607 push @RowValues, $ScoreValue;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
608 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
609 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
610 $NewLine = JoinWords(\@RowValues, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
611 print NEWTEXTFILE "$NewLine\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
612 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
613 close TEXTFILE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
614 close NEWTEXTFILE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
615 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
616
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
617 # Make sure the specified columns exists in text files...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
618 sub ProcessColumnsInfo {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
619 my($Index, $TextFile, $ColNum, $NewColNum, $ColIndex, @ColNumsToAnalyze, %UniqueColNumsToAnalyzeMap);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
620
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
621 @{$TextFilesInfo{ColNumsToAnalyze}} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
622 @{$TextFilesInfo{ColPairs1ToAnalyze}} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
623 @{$TextFilesInfo{ColPairs2ToAnalyze}} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
624 @{$TextFilesInfo{UniqueColNumsToAnalyze}} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
625
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
626 FILELIST: for $Index (0 .. $#TextFilesList) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
627 $TextFile = $TextFilesList[$Index];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
628
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
629 @{$TextFilesInfo{ColNumsToAnalyze}[$Index]} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
630 @{$TextFilesInfo{ColPairs1ToAnalyze}[$Index]} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
631 @{$TextFilesInfo{ColPairs2ToAnalyze}[$Index]} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
632 @{$TextFilesInfo{UniqueColNumsToAnalyze}[$Index]} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
633
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
634 %UniqueColNumsToAnalyzeMap = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
635
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
636 if ($TextFilesInfo{FileOkay}[$Index]) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
637 @ColNumsToAnalyze = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
638 if (@{$OptionsInfo{SpecifiedColumns}}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
639 if ($OptionsInfo{ColMode} =~ /^colnum$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
640 for $ColNum (@{$OptionsInfo{SpecifiedColumns}}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
641 if ($ColNum >=1 && $ColNum <= $TextFilesInfo{ColCount}[$Index]) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
642 $NewColNum = $ColNum -1;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
643 push @ColNumsToAnalyze, $NewColNum;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
644 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
645 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
646 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
647 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
648 my($ColLabel);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
649 for $ColLabel (@{$OptionsInfo{SpecifiedColumns}}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
650 if (exists($TextFilesInfo{ColLabelToNumMap}[$Index]{$ColLabel})) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
651 push @ColNumsToAnalyze, $TextFilesInfo{ColLabelToNumMap}[$Index]{$ColLabel};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
652 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
653 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
654 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
655 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
656 elsif (defined $OptionsInfo{Columns} && $OptionsInfo{Columns} =~ /^All$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
657 for $ColNum (0 .. ($TextFilesInfo{ColCount}[$Index] - 1)) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
658 push @ColNumsToAnalyze, $ColNum;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
659 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
660 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
661 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
662 push @ColNumsToAnalyze, 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
663 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
664 if (@ColNumsToAnalyze) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
665 push @{$TextFilesInfo{ColNumsToAnalyze}[$Index]}, @ColNumsToAnalyze;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
666 # Set up unique columns map as well...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
667 for $ColNum (@ColNumsToAnalyze) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
668 if (!exists $UniqueColNumsToAnalyzeMap{$ColNum}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
669 $UniqueColNumsToAnalyzeMap{$ColNum} = $ColNum;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
670 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
671 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
672 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
673 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
674 warn "Warning: Ignoring file $TextFile: None of the columns specified, @{$OptionsInfo{SpecifiedColumns}}, using \"--columns\" option exist.\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
675 $TextFilesInfo{FileOkay}[$Index] = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
676 next FILELIST;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
677 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
678 if (!$OptionsInfo{Overwrite} && exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{frequency})) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
679 # Make sure specific frequency files don't exist...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
680 my($FrequencyFile);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
681 for $ColNum (@ColNumsToAnalyze) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
682 $FrequencyFile = $TextFilesInfo{OutFileRoot}[$Index] . $TextFilesInfo{ColLabels}[$Index][$ColNum] . "FrequencyAnalysis." . $TextFilesInfo{OutFileExt}[$Index];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
683 if (-e $FrequencyFile) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
684 warn "Warning: Ignoring file $TextFile: The file $FrequencyFile already exists.\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
685 $TextFilesInfo{FileOkay}[$Index] = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
686 next FILELIST;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
687 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
688 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
689 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
690 # Setup specified column pairs...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
691 if (exists $OptionsInfo{SpecifiedStatisticalFunctionsMap}{correlation} || exists $OptionsInfo{SpecifiedStatisticalFunctionsMap}{covariance} || exists $OptionsInfo{SpecifiedStatisticalFunctionsMap}{rsquare}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
692 my(@ColPairsToAnalyze, $ColNum1, $ColNum2);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
693 if (@{$OptionsInfo{SpecifiedColumnPairs}}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
694 # Make sure both columns exist...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
695 if ($OptionsInfo{ColMode} =~ /^colnum$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
696 for ($ColIndex = 0; (($ColIndex + 1) < @{$OptionsInfo{SpecifiedColumnPairs}}); $ColIndex += 2 ) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
697 $ColNum1 = $OptionsInfo{SpecifiedColumnPairs}[$ColIndex];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
698 $ColNum2 = $OptionsInfo{SpecifiedColumnPairs}[$ColIndex + 1];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
699 if ($ColNum1 >=1 && $ColNum1 <= $TextFilesInfo{ColCount}[$Index] && $ColNum2 >=1 && $ColNum2 <= $TextFilesInfo{ColCount}[$Index]) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
700 $ColNum1 -= 1;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
701 $ColNum2 -= 1;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
702 push @ColPairsToAnalyze, ($ColNum1, $ColNum2);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
703 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
704 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
705 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
706 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
707 my($ColLabel1, $ColLabel2);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
708 for ($ColIndex = 0; (($ColIndex + 1) < @{$OptionsInfo{SpecifiedColumnPairs}}); $ColIndex += 2 ) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
709 $ColLabel1 = $OptionsInfo{SpecifiedColumnPairs}[$ColIndex];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
710 $ColLabel2 = $OptionsInfo{SpecifiedColumnPairs}[$ColIndex + 1];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
711 if (exists($TextFilesInfo{ColLabelToNumMap}[$Index]{$ColLabel1}) && exists($TextFilesInfo{ColLabelToNumMap}[$Index]{$ColLabel2})) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
712 $ColNum1 = $TextFilesInfo{ColLabelToNumMap}[$Index]{$ColLabel1};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
713 $ColNum2 = $TextFilesInfo{ColLabelToNumMap}[$Index]{$ColLabel2};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
714 push @ColPairsToAnalyze, ($ColNum1, $ColNum2);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
715 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
716 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
717 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
718 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
719 elsif ($OptionsInfo{AllColumnPairs}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
720 for $ColNum1 (0 .. ($TextFilesInfo{ColCount}[$Index] - 1)) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
721 for $ColNum2 (0 .. ($TextFilesInfo{ColCount}[$Index] - 1)) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
722 push @ColPairsToAnalyze, ($ColNum1, $ColNum2);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
723 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
724 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
725 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
726 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
727 if ($TextFilesInfo{ColCount}[$Index] >= 2) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
728 push @ColPairsToAnalyze, (0,1);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
729 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
730 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
731 if (@ColPairsToAnalyze) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
732 if (@ColPairsToAnalyze % 2) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
733 warn "Warning: Ignoring file $TextFile: Invalid number values specified using \"--columnpairs\" option: It must contain even number of valid values.\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
734 $TextFilesInfo{FileOkay}[$Index] = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
735 next FILELIST;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
736 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
737 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
738 for ($ColIndex = 0; $ColIndex < @ColPairsToAnalyze; $ColIndex += 2) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
739 push @{$TextFilesInfo{ColPairs1ToAnalyze}[$Index]}, $ColPairsToAnalyze[$ColIndex];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
740 push @{$TextFilesInfo{ColPairs2ToAnalyze}[$Index]}, $ColPairsToAnalyze[$ColIndex + 1];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
741 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
742 # Set up unique columns map as well...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
743 for $ColNum (@ColPairsToAnalyze) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
744 if (!exists $UniqueColNumsToAnalyzeMap{$ColNum}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
745 $UniqueColNumsToAnalyzeMap{$ColNum} = $ColNum;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
746 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
747 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
748 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
749 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
750 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
751 # Setup uniques columns array...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
752 push @{$TextFilesInfo{UniqueColNumsToAnalyze}[$Index]}, (sort keys %UniqueColNumsToAnalyzeMap);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
753 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
754 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
755 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
756
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
757 # Retrieve information about input text files...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
758 sub RetrieveTextFilesInfo {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
759 my($Index, $TextFile, $FileDir, $FileName, $FileExt, $InDelim, $Line, @ColLabels, $OutFileRoot, $OutFile, $OutFileExt, $ColNum, $ColLabel);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
760
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
761 %TextFilesInfo = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
762
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
763 @{$TextFilesInfo{FileOkay}} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
764 @{$TextFilesInfo{ColCount}} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
765 @{$TextFilesInfo{ColLabels}} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
766 @{$TextFilesInfo{ColLabelToNumMap}} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
767 @{$TextFilesInfo{InDelim}} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
768 @{$TextFilesInfo{OutFileRoot}} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
769 @{$TextFilesInfo{OutFileExt}} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
770
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
771 FILELIST: for $Index (0 .. $#TextFilesList) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
772 $TextFile = $TextFilesList[$Index];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
773
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
774 $TextFilesInfo{FileOkay}[$Index] = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
775 $TextFilesInfo{ColCount}[$Index] = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
776 $TextFilesInfo{InDelim}[$Index] = "";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
777 $TextFilesInfo{OutFileRoot}[$Index] = "";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
778 $TextFilesInfo{OutFileExt}[$Index] = "";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
779
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
780 @{$TextFilesInfo{ColLabels}[$Index]} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
781 %{$TextFilesInfo{ColLabelToNumMap}[$Index]} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
782
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
783 if (!(-e $TextFile)) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
784 warn "Warning: Ignoring file $TextFile: It doesn't exist\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
785 next FILELIST;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
786 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
787 if (!CheckFileType($TextFile, "csv tsv")) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
788 warn "Warning: Ignoring file $TextFile: It's not a csv or tsv file\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
789 next FILELIST;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
790 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
791 ($FileDir, $FileName, $FileExt) = ParseFileName($TextFile);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
792 if ($FileExt =~ /^tsv$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
793 $InDelim = "\t";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
794 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
795 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
796 $InDelim = "\,";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
797 if ($Options{indelim} !~ /^(comma|semicolon)$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
798 warn "Warning: Ignoring file $TextFile: The value specified, $Options{indelim}, for option \"--indelim\" is not valid for csv files\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
799 next FILELIST;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
800 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
801 if ($Options{indelim} =~ /^semicolon$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
802 $InDelim = "\;";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
803 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
804 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
805
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
806 if (!open TEXTFILE, "$TextFile") {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
807 warn "Warning: Ignoring file $TextFile: Couldn't open it: $! \n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
808 next FILELIST;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
809 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
810
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
811 $Line = GetTextLine(\*TEXTFILE);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
812 @ColLabels = quotewords($InDelim, 0, $Line);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
813 close TEXTFILE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
814
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
815 $FileDir = ""; $FileName = ""; $FileExt = "";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
816 ($FileDir, $FileName, $FileExt) = ParseFileName($TextFile);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
817 $FileExt = "csv";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
818 if ($Options{outdelim} =~ /^tab$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
819 $FileExt = "tsv";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
820 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
821 $OutFileExt = $FileExt;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
822 if ($Options{root} && (@TextFilesList == 1)) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
823 my ($RootFileDir, $RootFileName, $RootFileExt) = ParseFileName($Options{root});
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
824 if ($RootFileName && $RootFileExt) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
825 $FileName = $RootFileName;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
826 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
827 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
828 $FileName = $Options{root};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
829 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
830 $OutFileRoot = $FileName;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
831 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
832 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
833 $OutFileRoot = $FileName;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
834 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
835 $OutFile = $OutFileRoot . $OptionsInfo{FileNameMode} . ".$OutFileExt";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
836
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
837 if (lc($OutFile) eq lc($TextFile)) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
838 warn "Warning: Ignoring file $TextFile:Output file name, $OutFile, is same as input text file name, $TextFile\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
839 next FILELIST;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
840 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
841 if (!$Options{overwrite}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
842 if (-e $OutFile) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
843 warn "Warning: Ignoring file $TextFile: The file $OutFile already exists\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
844 next FILELIST;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
845 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
846 if (exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{covariance}) || exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{correlation}) || exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{rsquare})) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
847 if ($OptionsInfo{AllColumnPairs}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
848 if (exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{covariance}) && (-e "${OutFileRoot}CovarianceMatrix.${FileExt}")) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
849 warn "Warning: Ignoring file $TextFile: The file ${OutFileRoot}Covariance.${FileExt} already exists.\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
850 next FILELIST;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
851 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
852 if (exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{correlation}) && (-e "${OutFileRoot}CorrelationMatrix.${FileExt}")) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
853 warn "Warning: Ignoring file $TextFile: The file ${OutFileRoot}CorrelationMatrix.${FileExt} already exists.\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
854 next FILELIST;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
855 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
856 if (exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{rsquare}) && (-e "${OutFileRoot}RSquareMatrix.${FileExt}")) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
857 warn "Warning: Ignoring file $TextFile: The file ${OutFileRoot}RSquareMatrix.${FileExt} already exists.\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
858 next FILELIST;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
859 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
860 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
861 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
862 if (-e "${OutFileRoot}ColumnPairsAnalysis.${FileExt}") {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
863 warn "Warning: Ignoring file $TextFile: The file ${OutFileRoot}ColumnPairsAnalysis.${FileExt} already exists.\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
864 next FILELIST;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
865 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
866 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
867 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
868 if (exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{standardscores}) && (-e "${OutFileRoot}StandardScores.${FileExt}")) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
869 warn "Warning: Ignoring file $TextFile: The file ${OutFileRoot}StandardScores.${FileExt} already exists.\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
870 next FILELIST;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
871 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
872 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
873
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
874 $TextFilesInfo{FileOkay}[$Index] = 1;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
875 $TextFilesInfo{InDelim}[$Index] = $InDelim;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
876 $TextFilesInfo{OutFileRoot}[$Index] = "$OutFileRoot";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
877 $TextFilesInfo{OutFileExt}[$Index] = "$OutFileExt";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
878
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
879 $TextFilesInfo{ColCount}[$Index] = @ColLabels;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
880 push @{$TextFilesInfo{ColLabels}[$Index]}, @ColLabels;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
881 for $ColNum (0 .. $#ColLabels) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
882 $ColLabel = $ColLabels[$ColNum];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
883 $TextFilesInfo{ColLabelToNumMap}[$Index]{$ColLabel} = $ColNum;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
884 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
885 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
886 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
887
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
888 # Process option values...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
889 sub ProcessOptions {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
890 %OptionsInfo = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
891
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
892 $OptionsInfo{Mode} = $Options{mode};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
893
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
894 $OptionsInfo{DetailLevel} = $Options{detail};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
895
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
896 # Setup supported statistical functions...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
897 my($SupportedFunction, @SupportedStatisticaFunctions, %SupportedStatisticaFunctionsMap);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
898 %SupportedStatisticaFunctionsMap = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
899 @SupportedStatisticaFunctions = qw(Average AverageDeviation Correlation Count Covariance GeometricMean Frequency HarmonicMean KLargest KSmallest Kurtosis Maximum Minimum Mean Median Mode RSquare Skewness Sum SumOfSquares StandardDeviation StandardDeviationN StandardError StandardScores StandardScoresN TrimMean Variance VarianceN);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
900
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
901 for $SupportedFunction (@SupportedStatisticaFunctions) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
902 $SupportedStatisticaFunctionsMap{lc($SupportedFunction)} = $SupportedFunction;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
903 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
904
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
905 # Setup a list of functions to use for analysis...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
906 my($SpecifiedFunction);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
907 %{$OptionsInfo{SpecifiedStatisticalFunctionsMap}} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
908 @{$OptionsInfo{SpecifiedStatisticalFunctions}} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
909 # Check mode values...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
910 if ($Options{mode} =~ /^DescriptiveStatisticsBasic$/i ) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
911 $OptionsInfo{FileNameMode} = "DescriptiveStatisticsBasic";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
912 @{$OptionsInfo{SpecifiedStatisticalFunctions}} = qw(Count Maximum Minimum Mean Median StandardDeviation StandardError Variance Sum);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
913 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
914 elsif ($Options{mode} =~ /^DescriptiveStatisticsAll$/i ) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
915 $OptionsInfo{FileNameMode} = "DescriptiveStatisticsAll";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
916 @{$OptionsInfo{SpecifiedStatisticalFunctions}} = qw(Count Maximum Minimum Mean GeometricMean HarmonicMean TrimMean Median Mode StandardDeviation Kurtosis Skewness StandardError Variance RSquare Frequency KLargest KSmallest Sum);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
917 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
918 elsif ($Options{mode} =~ /^All$/i ) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
919 $OptionsInfo{FileNameMode} = "AllStatistics";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
920 @{$OptionsInfo{SpecifiedStatisticalFunctions}} = @SupportedStatisticaFunctions;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
921 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
922 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
923 $OptionsInfo{FileNameMode} = "SpecifiedStatistics";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
924 # Comma delimited list of functions...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
925 my($Mode, @SpecifiedFunctions, @UnsupportedSpecifiedFunctions);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
926 $Mode = $Options{mode};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
927 $Mode =~ s/ //g;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
928 @SpecifiedFunctions = split ",", $Mode;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
929 @UnsupportedSpecifiedFunctions = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
930 for $SpecifiedFunction (@SpecifiedFunctions) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
931 if (exists($SupportedStatisticaFunctionsMap{lc($SpecifiedFunction)})) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
932 push @{$OptionsInfo{SpecifiedStatisticalFunctions}}, $SpecifiedFunction;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
933 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
934 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
935 push @UnsupportedSpecifiedFunctions, $SpecifiedFunction;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
936 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
937 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
938 if (@UnsupportedSpecifiedFunctions) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
939 if (@UnsupportedSpecifiedFunctions > 1) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
940 warn "Error: The values specified - ", JoinWords(\@UnsupportedSpecifiedFunctions, ", ", 0)," - for option \"-m --mode\" are not valid.\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
941 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
942 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
943 warn "Error: The value specified, @UnsupportedSpecifiedFunctions , for option \"-m --mode\" is not valid.\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
944 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
945 die "Allowed values:", JoinWords(\@SupportedStatisticaFunctions, ", ", 0), "\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
946 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
947 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
948 FUNCTION: for $SpecifiedFunction (@{$OptionsInfo{SpecifiedStatisticalFunctions}}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
949 if (exists $OptionsInfo{SpecifiedStatisticalFunctionsMap}{lc($SpecifiedFunction)} ) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
950 next FUNCTION;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
951 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
952 $OptionsInfo{SpecifiedStatisticalFunctionsMap}{lc($SpecifiedFunction)} = $SupportedStatisticaFunctionsMap{lc($SpecifiedFunction)};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
953 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
954
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
955 $OptionsInfo{OutDelim} = ($Options{outdelim} =~ /tab/i ) ? "\t" : (($Options{outdelim} =~ /semicolon/i) ? "\;" : "\,");
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
956 $OptionsInfo{OutQuote} = ($Options{quote} =~ /yes/i ) ? 1 : 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
957
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
958 $OptionsInfo{Overwrite} = defined $Options{overwrite} ? $Options{overwrite} : undef;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
959 $OptionsInfo{Root} = defined $Options{root} ? $Options{root} : undef;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
960
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
961 $OptionsInfo{CheckData} = $Options{fast} ? 0 : 1;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
962 $OptionsInfo{Precision} = $Options{precision};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
963
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
964 $OptionsInfo{KLargest} = $Options{klargest};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
965 $OptionsInfo{KSmallest} = $Options{ksmallest};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
966
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
967 $OptionsInfo{TrimFraction} = $Options{trimfraction};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
968
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
969 # Setup frequency bin values...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
970 $OptionsInfo{NumOfBins} = 10;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
971 @{$OptionsInfo{BinRange}} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
972 if ($Options{frequencybins} =~ /\,/) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
973 my($BinValue, @SpecifiedBinRange);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
974 @SpecifiedBinRange = split /\,/, $Options{frequencybins};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
975 if (@SpecifiedBinRange < 2) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
976 die "Error: The value specified, $Options{frequencybins}, for option \"--frequencybins\" is not valid: Must contain at least two values. \n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
977 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
978 for $BinValue (@SpecifiedBinRange) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
979 if (!IsNumerical($BinValue)) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
980 die "Error: The value specified, $Options{frequencybins}, for option \"--frequencybins\" is not valid: Contains non numeric values. \n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
981 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
982 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
983 my($Index1, $Index2);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
984 for $Index1 (0 .. $#SpecifiedBinRange) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
985 for $Index2 (($Index1 + 1) .. $#SpecifiedBinRange) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
986 if ($SpecifiedBinRange[$Index1] >= $SpecifiedBinRange[$Index2]) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
987 die "Error: The value specified, $Options{frequencybins}, for option \"--frequencybins\" is not valid: Must contain values in ascending order. \n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
988 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
989 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
990 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
991 push @{$OptionsInfo{BinRange}}, @SpecifiedBinRange;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
992 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
993 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
994 $OptionsInfo{NumOfBins} = $Options{frequencybins};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
995 if (!IsPositiveInteger($OptionsInfo{NumOfBins})) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
996 die "Error: The value specified, $Options{frequencybins}, for option \"--frequencybins\" is not valid. Allowed values: positive integer or \"number,number,[number]...\". \n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
997 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
998 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
999
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1000 # Setup specified columns...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1001 $OptionsInfo{ColMode} = $Options{colmode};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1002 $OptionsInfo{Columns} = defined $Options{columns} ? $Options{columns} : undef;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1003
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1004 @{$OptionsInfo{SpecifiedColumns}} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1005 if (defined $Options{columns} && $Options{columns} !~ /^All$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1006 my(@SpecifiedValues) = split ",", $Options{columns};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1007 if ($Options{colmode} =~ /^colnum$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1008 my($ColValue);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1009 for $ColValue (@SpecifiedValues) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1010 if (!IsPositiveInteger($ColValue)) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1011 die "Error: Column value, $ColValue, specified using \"--columns\" is not valid: Allowed integer values: > 0.\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1012 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1013 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1014 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1015 push @{$OptionsInfo{SpecifiedColumns}}, @SpecifiedValues;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1016 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1017 @{$OptionsInfo{SpecifiedColumnPairs}} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1018 $OptionsInfo{AllColumnPairs} = (defined($Options{columnpairs}) && $Options{columnpairs} =~ /^AllPairs$/i) ? 1 : 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1019 if (defined($Options{columnpairs}) && !$OptionsInfo{AllColumnPairs}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1020 my(@SpecifiedValues) = split ",", $Options{columnpairs};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1021 if (@SpecifiedValues % 2) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1022 die "Error: Invalid number of values specified using \"--columnpairs\" option: It must contain even number of values.\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1023 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1024 if ($Options{colmode} =~ /^colnum$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1025 my($ColValue);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1026 for $ColValue (@SpecifiedValues) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1027 if (!IsPositiveInteger($ColValue)) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1028 die "Error: Column value, $ColValue, specified using \"--columnpairs\" is not valid: Allowed integer values: > 0.\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1029 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1030 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1031 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1032 push @{$OptionsInfo{SpecifiedColumnPairs}}, @SpecifiedValues;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1033 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1034
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1035 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1036
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1037 # Setup script usage and retrieve command line arguments specified using various options...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1038 sub SetupScriptUsage {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1039
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1040 # Retrieve all the options...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1041 %Options = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1042 $Options{colmode} = "colnum";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1043 $Options{detail} = 1;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1044 $Options{indelim} = "comma";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1045 $Options{frequencybins} = 10;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1046 $Options{klargest} = 2;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1047 $Options{ksmallest} = 2;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1048 $Options{mode} = "DescriptiveStatisticsBasic";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1049 $Options{outdelim} = "comma";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1050 $Options{precision} = 2;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1051 $Options{quote} = "yes";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1052 $Options{trimfraction} = 0.1;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1053
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1054 if (!GetOptions(\%Options, "colmode|c=s", "columns=s", "columnpairs=s", "detail|d=i", "frequencybins=s", "fast|f", "help|h", "indelim=s", "klargest=i", "ksmallest=i", "mode|m=s", "outdelim=s", "overwrite|o", "precision|p=i", "quote|q=s", "root|r=s", "trimfraction=f", "workingdir|w=s")) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1055 die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1056 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1057 if ($Options{workingdir}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1058 if (! -d $Options{workingdir}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1059 die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1060 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1061 chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1062 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1063 if ($Options{colmode} !~ /^(colnum|collabel)$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1064 die "Error: The value specified, $Options{colmode}, for option \"-c --colmode\" is not valid. Allowed values: colnum or collabel\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1065 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1066 if (!IsPositiveInteger($Options{detail})) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1067 die "Error: The value specified, $Options{detail}, for option \"-d --detail\" is not valid. Allowed values: > 0\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1068 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1069 if ($Options{indelim} !~ /^(comma|semicolon)$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1070 die "Error: The value specified, $Options{indelim}, for option \"--indelim\" is not valid. Allowed values: comma or semicolon\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1071 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1072 if ($Options{outdelim} !~ /^(comma|semicolon|tab)$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1073 die "Error: The value specified, $Options{outdelim}, for option \"--outdelim\" is not valid. Allowed values: comma, tab, or semicolon\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1074 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1075 if ($Options{quote} !~ /^(yes|no)$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1076 die "Error: The value specified, $Options{quote}, for option \"-q --quote\" is not valid. Allowed values: yes or no\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1077 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1078 if (!IsPositiveInteger($Options{precision})) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1079 die "Error: The value specified, $Options{precision}, for option \"-p --precision\" is not valid. Allowed values: > 0 \n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1080 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1081 if (!IsPositiveInteger($Options{klargest})) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1082 die "Error: The value specified, $Options{klargest}, for option \"--klargest\" is not valid. Allowed values: > 0 \n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1083 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1084 if (!IsPositiveInteger($Options{ksmallest})) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1085 die "Error: The value specified, $Options{ksmallest}, for option \"--ksmallest\" is not valid. Allowed values: > 0 \n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1086 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1087 if (IsFloat($Options{trimfraction})) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1088 if ($Options{trimfraction} <= 0 || $Options{trimfraction} >= 1.0) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1089 die "Error: The value specified, $Options{trimfraction}, for option \"--trimfraction\" is not valid. Allowed values: > 0 and < 1.0\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1090 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1091 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1092 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1093 die "Error: The value specified, $Options{trimfraction}, for option \"--trimfraction\" is not valid. Allowed values: > 0 and < 1.0\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1094 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1095 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1096
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1097 __END__
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1098
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1099 =head1 NAME
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1100
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1101 AnalyzeTextFilesData.pl - Analyze numerical coulmn data in TextFile(s)
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1102
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1103 =head1 SYNOPSIS
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1104
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1105 AnalyzeTextFilesData.pl TextFile(s)...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1106
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1107 AnalyzeTextFilesData.pl [B<-c, --colmode> colnum | collabel] [B<--columns> "colnum,[colnum,...]" | "collabel,[collabel,...]" | All]
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1108 [B<--columnpairs> "colnum,colnum,[colnum,colnum]..." | "collabel,collabel,[collabel,collabel]..." | AllPairs]
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1109 [B<-d, --detail> infolevel] [B<-f, --fast>] [B<--frequencybins> number | "number,number,[number,...]"] [B<-h, --help>]
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1110 [B<--indelim> comma | semicolon] [B<--klargest> number] [B<--ksmallest> number]
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1111 [B<-m, --mode> DescriptiveStatisticsBasic | DescriptiveStatisticsAll | All | "function1, [function2,...]"]
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1112 [B<-o, --overwrite>] [B<--outdelim> comma | tab | semicolon] [B<-p, --precision> number]
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1113 [B<-q, --quote> yes | no] [B<-r, --root> rootname] [B<--trimfraction> number] [B<-w, --workingdir> dirname] TextFiles(s)...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1114
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1115 =head1 DESCRIPTION
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1116
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1117 Anaylze numerical column data in I<TextFile(s)> using a combination of various statistical
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1118 functions; Non-numerical values are simply ignored. For I<Correlation, RSquare, and Covariance>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1119 analysis, the count of valid values in specifed column pair must be same; otherwise, column
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1120 pair is ignored. The file names are separated by space. The valid file extensions are I<.csv>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1121 and I<.tsv> for comma/semicolon and tab delimited text files respectively. All other
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1122 file names are ignored. All the text files in a current directory can be specified by
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1123 I<*.csv>, I<*.tsv>, or the current directory name. The B<--indelim> option determines
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1124 the format of I<TextFile(s)>. Any file which doesn't correspond to the format indicated
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1125 by B<--indelim> option is ignored.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1126
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1127 =head1 OPTIONS
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1128
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1129 =over 4
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1130
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1131 =item B<-c, --colmode> I<colnum | collabel>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1132
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1133 Specify how columns are identified in TextFile(s): using column number or column
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1134 label. Possible values: I<colnum or collabel>. Default value: I<colnum>.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1135
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1136 =item B<--columns> I<"colnum,[colnum,...]" | "collabel,[collabel]..." | All>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1137
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1138 This value is mode specific. It's a list of comma delimited columns to use
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1139 for data analysis. Default value: I<First column>.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1140
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1141 This value is ignored during I<Correlation/Pearson Correlation> and I<Covariance>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1142 data analysis; B<-coulmnparis> option is used instead.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1143
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1144 For I<colnum> value of B<-c, --colmode> option, input values format is:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1145 I<colnum,colnum,...>. Example:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1146
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1147 1,3,5
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1148
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1149 For I<collabel> value of B<-c, --colmode> option, input values format is:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1150 I<collabel,collabel,..>. Example:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1151
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1152 ALogP,MolWeight,EC50
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1153
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1154 =item B<--columnpairs> I<"colnum,colnum,[colnum,colnum,...]" | "collabel,collabel,[collabel,collabel,...]" | AllPairs>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1155
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1156 This value is mode specific and is only used for I<Correlation, PearsonCorrelation, or
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1157 Covariance> value of B<-m, --mode> option. It is a comma delimited list of column pairs
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1158 to use for data analysis during I<Correlation> and I<Covariance> calculations. Default value:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1159 I<First column, Second column>.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1160
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1161 For I<colnum> value of B<-c, --colmode> option, input values format is:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1162 I<colnum,colnum,[colnum,colnum]...>. Example:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1163
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1164 1,3,5,6,1,6
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1165
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1166 For I<collabel> value of B<-c, --colmode> option, input values format is:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1167 I<collabel,collabel,[collabel,collabel]..>. Example:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1168
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1169 MolWeight,EC50,NumN+O,PSA
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1170
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1171 For I<AllPairs> value of B<--columnparis> option, all column pairs are used for I<Correlation>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1172 and I<Covariance> calculations.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1173
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1174 =item B<-d, --detail> I<infolevel>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1175
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1176 Level of information to print about column values being ignored. Default: I<1>. Possible values:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1177 1, 2, 3, or 4.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1178
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1179 =item B<-f, --fast>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1180
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1181 In this mode, all the columns specified for analysis are assumed to contain numerical
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1182 data and no checking is performed before analysis. By default, only numerical data is
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1183 used for analysis; other types of column data is ignored.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1184
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1185 =item B<--frequencybins> I<number | "number,number,[number,...]">
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1186
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1187 Specify number of bins or bin range to use for frequency analysis. Default value: I<10>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1188
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1189 Number of bins value along with the smallest and largest value for a column is used to
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1190 group the column values into different groups.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1191
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1192 The bin range list is used to group values for a column into different groups; It must contain
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1193 values in ascending order. Examples:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1194
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1195 10,20,30
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1196 0.1,0.2,0.3,0.4,0.5
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1197
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1198 The frequency value calculated for a specific bin corresponds to all the column values
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1199 which are greater than the previous bin value and less than or equal to the current bin value.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1200
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1201 =item B<-h, --help>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1202
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1203 Print this help message.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1204
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1205 =item B<--indelim> I<comma | semicolon>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1206
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1207 Input delimiter for CSV I<TextFile(s)>. Possible values: I<comma or semicolon>.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1208 Default value: I<comma>. For TSV files, this option is ignored and I<tab> is used as a
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1209 delimiter.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1210
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1211 =item B<--klargest> I<number>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1212
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1213 Kth largest value to find by I<KLargest> function. Default value: I<2> Valid values: positive
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1214 integers.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1215
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1216 =item B<--ksmallest> I<number>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1217
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1218 Kth smallest value to find by I<KSmallest> function. Default value: I<2>. Valid values: positive
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1219 integers.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1220
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1221 =item B<-m, --mode> I<DescriptiveStatisticsBasic | DescriptiveStatisticsAll | All | "function1, [function2,...]">
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1222
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1223 Specify how to analyze data in TextFile(s): calculate basic or all descriptive statistics; or
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1224 use a comma delimited list of supported statistical functions. Possible values:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1225 I<DescriptiveStatisticsBasic | DescriptiveStatisticsAll | "function1,[function2]...">. Default
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1226 value: I<DescriptiveStatisticsBasic>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1227
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1228 I<DescriptiveStatisticsBasic> includes these functions: I<Count, Maximum, Minimum, Mean,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1229 Median, Sum, StandardDeviation, StandardError, Variance>.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1230
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1231 I<DescriptiveStatisticsAll>, in addition to I<DescriptiveStatisticsBasic> functions, includes:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1232 I<GeometricMean, Frequency, HarmonicMean, KLargest, KSmallest, Kurtosis, Mode, RSquare,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1233 Skewness, TrimMean>.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1234
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1235 I<All> uses complete list of supported functions: I<Average, AverageDeviation, Correlation,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1236 Count, Covariance, GeometricMean, Frequency, HarmonicMean, KLargest, KSmallest, Kurtosis,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1237 Maximum, Minimum, Mean, Median, Mode, RSquare, Skewness, Sum,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1238 SumOfSquares, StandardDeviation, StandardDeviationN, StandardError, StandardScores,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1239 StandardScoresN, TrimMean, Variance, VarianceN>. The function names ending with N
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1240 calculate corresponding values assuming an entire population instead of a population sample.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1241
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1242 Here are the formulas for these functions:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1243
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1244 Average: See Mean
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1245
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1246 AverageDeviation: SUM( ABS(x[i] - Xmean) ) / n
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1247
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1248 Correlation: See Pearson Correlation
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1249
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1250 Covariance: SUM( (x[i] - Xmean)(y[i] - Ymean) ) / n
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1251
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1252 GeometricMean: NthROOT( PRODUCT(x[i]) )
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1253
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1254 HarmonicMean: 1 / ( SUM(1/x[i]) / n )
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1255
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1256 Mean: SUM( x[i] ) / n
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1257
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1258 Median: Xsorted[(n - 1)/2 + 1] for even values of n; (Xsorted[n/2] + Xsorted[n/2 + 1])/2
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1259 for odd values of n.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1260
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1261 Kurtosis: [ {n(n + 1)/(n - 1)(n - 2)(n - 3)} SUM{ ((x[i] - Xmean)/STDDEV)^4 } ] -
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1262 {3((n - 1)^2)}/{(n - 2)(n-3)}
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1263
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1264 PearsonCorrelation: SUM( (x[i] - Xmean)(y[i] - Ymean) ) / SQRT( SUM( (x[i] - Xmean)^2 )
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1265 (SUM( (y[i] - Ymean)^2 )) )
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1266
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1267 RSquare: PearsonCorrelation^2
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1268
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1269 Skewness: {n/(n - 1)(n - 2)} SUM{ ((x[i] - Xmean)/STDDEV)^3 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1270
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1271 StandardDeviation: SQRT ( SUM( (x[i] - Mean)^2 ) / (n - 1) )
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1272
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1273 StandardDeviationN: SQRT ( SUM( (x[i] - Mean)^2 ) / n )
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1274
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1275 StandardError: StandardDeviation / SQRT( n )
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1276
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1277 StandardScore: (x[i] - Mean) / (n - 1)
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1278
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1279 StandardScoreN: (x[i] - Mean) / n
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1280
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1281 Variance: SUM( (x[i] - Xmean)^2 / (n - 1) )
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1282
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1283 VarianceN: SUM( (x[i] - Xmean)^2 / n )
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1284
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1285 =item B<-o, --overwrite>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1286
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1287 Overwrite existing files.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1288
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1289 =item B<--outdelim> I<comma | tab | semicolon>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1290
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1291 Output text file delimiter. Possible values: I<comma, tab, or semicolon>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1292 Default value: I<comma>.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1293
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1294 =item B<-p, --precision> I<number>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1295
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1296 Precision of calculated values in the output file. Default: up to I<2> decimal places.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1297 Valid values: positive integers.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1298
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1299 =item B<-q, --quote> I<yes | no>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1300
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1301 Put quotes around column values in output text file. Possible values: I<yes or
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1302 no>. Default value: I<yes>.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1303
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1304 =item B<-r, --root> I<rootname>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1305
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1306 New text file name is generated using the root: <Root>.<Ext>. Default new file
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1307 name: <InitialTextFileName><Mode>.<Ext>. Based on the specified analysis,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1308 <Mode> corresponds to one of these values: DescriptiveStatisticsBasic,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1309 DescriptiveStatisticsAll, AllStatistics, SpecifiedStatistics, Covariance, Correlation,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1310 Frequency, or StandardScores. The csv, and tsv <Ext> values are used for
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1311 comma/semicolon, and tab delimited text files respectively. This option is ignored for
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1312 multiple input files.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1313
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1314 =item B<--trimfraction> I<number>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1315
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1316 Fraction of data to exclude from the top and bottom of the data set during
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1317 I<TrimMean> calculation. Default value: I<0.1>. Valid values: > 0 and < 1.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1318
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1319 =item B<-w --workingdir> I<text>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1320
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1321 Location of working directory. Default: current directory.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1322
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1323 =back
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1324
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1325 =head1 EXAMPLES
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1326
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1327 To calculate basic statistics for data in first column and generate a
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1328 NewSample1DescriptiveStatisticsBasic.csv file, type:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1329
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1330 % AnalyzeTextFilesData.pl -o -r NewSample1 Sample1.csv
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1331
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1332 To calculate basic statistics for data in third column and generate a
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1333 NewSample1DescriptiveStatisticsBasic.csv file, type:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1334
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1335 % AnalyzeTextFilesData.pl --columns 3 -o -r NewSample1 Sample1.csv
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1336
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1337 To calculate basic statistics for data in MolWeight column and generate a
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1338 NewSample1DescriptiveStatisticsBasic.csv file, type:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1339
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1340 % AnalyzeTextFilesData.pl -colmode collabel --columns MolWeight -o
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1341 -r NewSample1 Sample1.csv
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1342
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1343 To calculate all available statistics for data in third column and all column pairs,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1344 and generate NewSample1DescriptiveStatisticsAll.csv, NewSample1CorrelationMatrix.csv,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1345 NewSample1CorrelationMatrix.csv, and NewSample1MolWeightFrequencyAnalysis.csv files,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1346 type:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1347
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1348 % AnalyzeTextFilesData.pl -m DescriptiveStatisticsAll --columns 3 -o
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1349 --columnpairs AllPairs -r NewSample1 Sample1.csv
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1350
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1351 To compute frequency distribution of data in third column into five bins and
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1352 generate NewSample1MolWeightFrequencyAnalysis.csv, type:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1353
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1354 % AnalyzeTextFilesData.pl -m Frequency --frequencybins 5 --columns 3
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1355 -o -r NewSample1 Sample1.csv
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1356
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1357 To compute frequency distribution of data in third column into specified bin range
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1358 values, and generate NewSample1MolWeightFrequencyAnalysis.csv, type:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1359
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1360 % AnalyzeTextFilesData.pl -m Frequency --frequencybins "100,200,400"
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1361 --columns 3 -o -r NewSample1 Sample1.csv
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1362
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1363 To calculate all available statistics for data in all columns and column pairs, type:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1364
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1365 % AnalyzeTextFilesData.pl -m All --columns All --columnpairs
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1366 AllPairs -o -r NewSample1 Sample1.csv
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1367
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1368 =head1 AUTHOR
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1369
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1370 Manish Sud <msud@san.rr.com>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1371
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1372 =head1 SEE ALSO
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1373
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1374 JoinTextFiles.pl, MergeTextFilesWithSD.pl, ModifyTextFilesFormat.pl, SplitTextFiles.pl, TextFilesToHTML.pl
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1375
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1376 =head1 COPYRIGHT
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1377
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1378 Copyright (C) 2015 Manish Sud. All rights reserved.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1379
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1380 This file is part of MayaChemTools.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1381
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1382 MayaChemTools is free software; you can redistribute it and/or modify it under
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1383 the terms of the GNU Lesser General Public License as published by the Free
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1384 Software Foundation; either version 3 of the License, or (at your option)
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1385 any later version.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1386
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1387 =cut