| 
0
 | 
     1 NAME
 | 
| 
 | 
     2     StatisticsUtil
 | 
| 
 | 
     3 
 | 
| 
 | 
     4 SYNOPSIS
 | 
| 
 | 
     5     use StatisticsUtil;
 | 
| 
 | 
     6 
 | 
| 
 | 
     7     use Statistics qw(:all);
 | 
| 
 | 
     8 
 | 
| 
 | 
     9 DESCRIPTION
 | 
| 
 | 
    10     StatisticsUtil module provides the following functions:
 | 
| 
 | 
    11 
 | 
| 
 | 
    12     Average, AverageDeviation, Correlation, Covariance, Euclidean,
 | 
| 
 | 
    13     Factorial, FactorialDivison, Frequency, GeometricMean, HarmonicMean,
 | 
| 
 | 
    14     KLargest, KSmallest, Kurtosis, Maximum, Mean, Median, Minimum, Mode,
 | 
| 
 | 
    15     PearsonCorrelation, Permutations, Product, RSquare, Range, Skewness,
 | 
| 
 | 
    16     StandardDeviation, StandardDeviationN, StandardError, StandardScores,
 | 
| 
 | 
    17     StandardScoresN, Standardize, Sum, SumOfSquares, TrimMean, Variance,
 | 
| 
 | 
    18     VarianceN
 | 
| 
 | 
    19 
 | 
| 
 | 
    20   METHODS
 | 
| 
 | 
    21     Average
 | 
| 
 | 
    22             $Value = Average(\@DataArray);
 | 
| 
 | 
    23 
 | 
| 
 | 
    24         Computes the mean of an array of numbers: SUM( x[i] ) / n
 | 
| 
 | 
    25 
 | 
| 
 | 
    26     AverageDeviation
 | 
| 
 | 
    27             $Value = AverageDeviation(\@DataArray);
 | 
| 
 | 
    28 
 | 
| 
 | 
    29         Computes the average of the absolute deviation of an array of
 | 
| 
 | 
    30         numbers: SUM( ABS(x[i] - Xmean) ) / n
 | 
| 
 | 
    31 
 | 
| 
 | 
    32     Correlation
 | 
| 
 | 
    33             $Value = Correlation(\@XDataArray, \@YDataArray);
 | 
| 
 | 
    34 
 | 
| 
 | 
    35         Computes the Pearson correlation coefficient between two arrays of
 | 
| 
 | 
    36         numbers: SUM( (x[i] - Xmean)(y[i] - Ymean) ) / SQRT( SUM( (x[i] -
 | 
| 
 | 
    37         Xmean)^2 )(SUM( (y[i] - Ymean)^2 )) )
 | 
| 
 | 
    38 
 | 
| 
 | 
    39     Euclidean
 | 
| 
 | 
    40             $Return = Euclidean(\@DataArray);
 | 
| 
 | 
    41 
 | 
| 
 | 
    42         Computes the euclidean distance of an array of numbers: SQRT( SUM(
 | 
| 
 | 
    43         x[i] ** 2) )
 | 
| 
 | 
    44 
 | 
| 
 | 
    45     Covariance
 | 
| 
 | 
    46             $Value = Covariance(\@XDataArray, \@YDataArray);
 | 
| 
 | 
    47 
 | 
| 
 | 
    48         Computes the covariance between two arrays of numbers: SUM( (x[i] -
 | 
| 
 | 
    49         Xmean) (y[i] - Ymean) ) / n
 | 
| 
 | 
    50 
 | 
| 
 | 
    51     Factorial
 | 
| 
 | 
    52             $Value = Factorial($Num);
 | 
| 
 | 
    53 
 | 
| 
 | 
    54         Computes the factorial of a positive integer.
 | 
| 
 | 
    55 
 | 
| 
 | 
    56     FactorialDivison
 | 
| 
 | 
    57             $Value = FactorialDivision($Numerator, $Denominator);
 | 
| 
 | 
    58 
 | 
| 
 | 
    59         Compute the factorial divison of two positive integers.
 | 
| 
 | 
    60 
 | 
| 
 | 
    61     Frequency
 | 
| 
 | 
    62             %FrequencyValues = Frequency(\@DataArray, [$NumOfBins]);
 | 
| 
 | 
    63             %FrequencyValues = Frequency(\@DataArray, [\@BinRange]);
 | 
| 
 | 
    64 
 | 
| 
 | 
    65         A hash array is returned with keys and values representing range and
 | 
| 
 | 
    66         frequency values, respectively. The frequency value for a specific
 | 
| 
 | 
    67         key corresponds to all the values which are greater than the
 | 
| 
 | 
    68         previous key and less than or equal to the current key. A key value
 | 
| 
 | 
    69         representing maximum value is added for generating frequency
 | 
| 
 | 
    70         distribution for specific number of bins, and whenever the maximum
 | 
| 
 | 
    71         array value is greater than the maximum specified in bin range, it
 | 
| 
 | 
    72         is also added to bin range.
 | 
| 
 | 
    73 
 | 
| 
 | 
    74     GeometricMean
 | 
| 
 | 
    75             $Value = GeometricMean(\@DataArray);
 | 
| 
 | 
    76 
 | 
| 
 | 
    77         Computes the geometric mean of an array of numbers: NthROOT(
 | 
| 
 | 
    78         PRODUCT(x[i]) )
 | 
| 
 | 
    79 
 | 
| 
 | 
    80     HarmonicMean
 | 
| 
 | 
    81             $Value = HarmonicMean(\@DataArray);
 | 
| 
 | 
    82 
 | 
| 
 | 
    83         Computes the harmonic mean of an array of numbers: 1 / ( SUM(1/x[i])
 | 
| 
 | 
    84         / n )
 | 
| 
 | 
    85 
 | 
| 
 | 
    86     KLargest
 | 
| 
 | 
    87             $Value = KLargest(\@DataArray, $KthNumber);
 | 
| 
 | 
    88 
 | 
| 
 | 
    89         Returns the k-largest value from an array of numbers.
 | 
| 
 | 
    90 
 | 
| 
 | 
    91     KSmallest
 | 
| 
 | 
    92             $Value = KSmallest(\@DataArray, $KthNumber);
 | 
| 
 | 
    93 
 | 
| 
 | 
    94         Returns the k-smallest value from an array of numbers.
 | 
| 
 | 
    95 
 | 
| 
 | 
    96     Kurtosis
 | 
| 
 | 
    97             $Value = Kurtosis(\@DataArray);
 | 
| 
 | 
    98 
 | 
| 
 | 
    99         Computes the kurtosis of an array of numbers: [ {n(n + 1)/(n - 1)(n
 | 
| 
 | 
   100         - 2)(n - 3)} SUM{ ((x[i] - Xmean)/STDDEV)^4 } ] - {3((n - 1)^2)}/{(n
 | 
| 
 | 
   101         - 2)(n-3)}
 | 
| 
 | 
   102 
 | 
| 
 | 
   103     Maximum
 | 
| 
 | 
   104             $Value = Maximum(\@DataArray);
 | 
| 
 | 
   105 
 | 
| 
 | 
   106         Returns the largest value from an array of numbers.
 | 
| 
 | 
   107 
 | 
| 
 | 
   108     Minimum
 | 
| 
 | 
   109             $Value = Minimum(\@DataArray);
 | 
| 
 | 
   110 
 | 
| 
 | 
   111         Returns the smallest value from an array of numbers.
 | 
| 
 | 
   112 
 | 
| 
 | 
   113     Mean
 | 
| 
 | 
   114             $Value = Mean(\@DataArray);
 | 
| 
 | 
   115 
 | 
| 
 | 
   116         Computes the mean of an array of numbers: SUM( x[i] ) / n
 | 
| 
 | 
   117 
 | 
| 
 | 
   118     Median
 | 
| 
 | 
   119             $Value = Median(\@DataArray);
 | 
| 
 | 
   120 
 | 
| 
 | 
   121         Computes the median value of an array of numbers. For an even number
 | 
| 
 | 
   122         array, it's the average of two middle values.
 | 
| 
 | 
   123 
 | 
| 
 | 
   124         For even values of n: Xsorted[(n - 1)/2 + 1] For odd values of n:
 | 
| 
 | 
   125         (Xsorted[n/2] + Xsorted[n/2 + 1])/2
 | 
| 
 | 
   126 
 | 
| 
 | 
   127     Mode
 | 
| 
 | 
   128             $Value = Mode(\@DataArray);
 | 
| 
 | 
   129 
 | 
| 
 | 
   130         Returns the most frequently occuring value in an array of numbers.
 | 
| 
 | 
   131 
 | 
| 
 | 
   132     PearsonCorrelation
 | 
| 
 | 
   133             $Value = Correlation(\@XDataArray, \@YDataArray);
 | 
| 
 | 
   134 
 | 
| 
 | 
   135         Computes the Pearson correlation coefficient between two arrays of
 | 
| 
 | 
   136         numbers: SUM( (x[i] - Xmean)(y[i] - Ymean) ) / SQRT( SUM( (x[i] -
 | 
| 
 | 
   137         Xmean)^2 )(SUM( (y[i] - Ymean)^2 )) )
 | 
| 
 | 
   138 
 | 
| 
 | 
   139     Permutations
 | 
| 
 | 
   140             $PermutationsRef = Permutations(@DataToPermute);
 | 
| 
 | 
   141 
 | 
| 
 | 
   142         Generate all possible permuations or a specific permutations of
 | 
| 
 | 
   143         items in an array and return a reference to an array containing
 | 
| 
 | 
   144         array references to generated permuations.
 | 
| 
 | 
   145 
 | 
| 
 | 
   146         This alogrithm is based on the example provided by Mark
 | 
| 
 | 
   147         Jason-Dominus, and is available at CPAN as mjd_permute standalone
 | 
| 
 | 
   148         script.
 | 
| 
 | 
   149 
 | 
| 
 | 
   150     Product
 | 
| 
 | 
   151             $Value = Product(\@DataArray);
 | 
| 
 | 
   152 
 | 
| 
 | 
   153         Compute the product of an array of numbers.
 | 
| 
 | 
   154 
 | 
| 
 | 
   155     Range
 | 
| 
 | 
   156             ($Smallest, $Largest) = Range(\@DataArray);
 | 
| 
 | 
   157 
 | 
| 
 | 
   158         Return the smallest and largest values from an array of numbers.
 | 
| 
 | 
   159 
 | 
| 
 | 
   160     RSquare
 | 
| 
 | 
   161             $Value = RSquare(\@XDataArray, \@YDataArray);
 | 
| 
 | 
   162 
 | 
| 
 | 
   163         Computes square of the Pearson correlation coefficient between two
 | 
| 
 | 
   164         arrays of numbers.
 | 
| 
 | 
   165 
 | 
| 
 | 
   166     Skewness
 | 
| 
 | 
   167             $Value = Skewness(\@DataArray);
 | 
| 
 | 
   168 
 | 
| 
 | 
   169         Computes the skewness of an array of numbers: {n/(n - 1)(n - 2)}
 | 
| 
 | 
   170         SUM{ ((x[i] - Xmean)/STDDEV)^3 }
 | 
| 
 | 
   171 
 | 
| 
 | 
   172     StandardDeviation
 | 
| 
 | 
   173             $Value = StandardDeviation(\@DataArray);
 | 
| 
 | 
   174 
 | 
| 
 | 
   175         Computes the standard deviation of an array of numbers. SQRT ( SUM(
 | 
| 
 | 
   176         (x[i] - mean)^2 ) / (n - 1) )
 | 
| 
 | 
   177 
 | 
| 
 | 
   178     StandardDeviationN
 | 
| 
 | 
   179             $Value = StandardDeviationN(\@DataArray);
 | 
| 
 | 
   180 
 | 
| 
 | 
   181         Computes the standard deviation of an array of numbers representing
 | 
| 
 | 
   182         entire population: SQRT ( SUM( (x[i] - mean)^2 ) / n )
 | 
| 
 | 
   183 
 | 
| 
 | 
   184     StandardError
 | 
| 
 | 
   185             $Value = StandardError($StandardDeviation, $Count);
 | 
| 
 | 
   186 
 | 
| 
 | 
   187         Computes the standard error using standard deviation and sample
 | 
| 
 | 
   188         size.
 | 
| 
 | 
   189 
 | 
| 
 | 
   190     Standardize
 | 
| 
 | 
   191             $Value = Standardize($Value, $Mean, $StandardDeviation);
 | 
| 
 | 
   192 
 | 
| 
 | 
   193         Standardizes the value using mean and standard deviation.
 | 
| 
 | 
   194 
 | 
| 
 | 
   195     StandardScores
 | 
| 
 | 
   196             @Values = StandardScores(\@DataArray);
 | 
| 
 | 
   197 
 | 
| 
 | 
   198         Computes the standard deviation above the mean for an array of
 | 
| 
 | 
   199         numbers: (x[i] - mean) / (n - 1)
 | 
| 
 | 
   200 
 | 
| 
 | 
   201     StandardScoresN
 | 
| 
 | 
   202             @Values = StandardScoresN(\@DataArray);
 | 
| 
 | 
   203 
 | 
| 
 | 
   204         Computes the standard deviation above the mean for an array of
 | 
| 
 | 
   205         numbers representing entire population: (x[i] - mean) / n
 | 
| 
 | 
   206 
 | 
| 
 | 
   207     Sum
 | 
| 
 | 
   208             $Value = Sum(\@DataArray);
 | 
| 
 | 
   209 
 | 
| 
 | 
   210         Compute the sum of an array of numbers.
 | 
| 
 | 
   211 
 | 
| 
 | 
   212     SumOfSquares
 | 
| 
 | 
   213             $Value = SumOfSquares(\@DataArray);
 | 
| 
 | 
   214 
 | 
| 
 | 
   215         Computes the sum of an array of numbers.
 | 
| 
 | 
   216 
 | 
| 
 | 
   217     TrimMean
 | 
| 
 | 
   218             $Value = TrimMean(\@DataArray, $FractionToExclude));
 | 
| 
 | 
   219 
 | 
| 
 | 
   220         Computes the mean of an array of numbers by excluding a fraction of
 | 
| 
 | 
   221         numbers from the top and bottom of the data set.
 | 
| 
 | 
   222 
 | 
| 
 | 
   223     Variance
 | 
| 
 | 
   224             $Value = Variance(\@DataArray);
 | 
| 
 | 
   225 
 | 
| 
 | 
   226         Computes the variance of an array of numbers: SUM( (x[i] - Xmean)^2
 | 
| 
 | 
   227         / (n - 1) )
 | 
| 
 | 
   228 
 | 
| 
 | 
   229     VarianceN
 | 
| 
 | 
   230             $Value = Variance(\@DataArray);
 | 
| 
 | 
   231 
 | 
| 
 | 
   232         Compute the variance of an array of numbers representing entire
 | 
| 
 | 
   233         population: SUM( (x[i] - Xmean)^2 / n )
 | 
| 
 | 
   234 
 | 
| 
 | 
   235 AUTHOR
 | 
| 
 | 
   236     Manish Sud <msud@san.rr.com>
 | 
| 
 | 
   237 
 | 
| 
 | 
   238 SEE ALSO
 | 
| 
 | 
   239     Constants.pm, ConversionsUtil.pm, MathUtil.pm
 | 
| 
 | 
   240 
 | 
| 
 | 
   241 COPYRIGHT
 | 
| 
 | 
   242     Copyright (C) 2015 Manish Sud. All rights reserved.
 | 
| 
 | 
   243 
 | 
| 
 | 
   244     This file is part of MayaChemTools.
 | 
| 
 | 
   245 
 | 
| 
 | 
   246     MayaChemTools is free software; you can redistribute it and/or modify it
 | 
| 
 | 
   247     under the terms of the GNU Lesser General Public License as published by
 | 
| 
 | 
   248     the Free Software Foundation; either version 3 of the License, or (at
 | 
| 
 | 
   249     your option) any later version.
 | 
| 
 | 
   250 
 |