MayaChemTools

   1 package StatisticsUtil;
   2 #
   3 # $RCSfile: StatisticsUtil.pm,v $
   4 # $Date: 2015/02/28 20:47:18 $
   5 # $Revision: 1.41 $
   6 #
   7 # Author: Manish Sud <msud@san.rr.com>
   8 #
   9 # Copyright (C) 2015 Manish Sud. All rights reserved.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 use strict;
  30 use Exporter;
  31 
  32 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  33 
  34 @ISA = qw(Exporter);
  35 @EXPORT = qw(Average AverageDeviation Covariance Correlation Euclidean Factorial FactorialDivison GeometricMean Frequency HarmonicMean KLargest KSmallest Kurtosis Maximum Minimum Mean Median Mode PearsonCorrelation Permutations Product Range RSquare Skewness Sum SumOfSquares StandardDeviation StandardDeviationN  StandardError Standardize StandardScores StandardScoresN TrimMean Variance VarianceN);
  36 @EXPORT_OK = qw();
  37 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  38 
  39 # Compute the mean of an array of numbers
  40 sub Average {
  41   my($XArrayRef) = @_;
  42   return Mean($XArrayRef);
  43 }
  44 
  45 # Compute the average of the absolute deviation of an array of numbers: SUM( ABS(x[i] - Xmean) ) / n
  46 sub AverageDeviation {
  47   my($XArrayRef) = @_;
  48 
  49   if (!@$XArrayRef) {
  50     return undef;
  51   }
  52   my($AverageDeviation, $Mean, $Value, $SumOfDeviations);
  53 
  54   $AverageDeviation = 0;
  55   $Mean = Mean($XArrayRef);
  56   foreach $Value (@$XArrayRef) {
  57     $SumOfDeviations += abs($Value - $Mean);
  58   }
  59   $AverageDeviation = $SumOfDeviations / @$XArrayRef;
  60 
  61   return $AverageDeviation;
  62 }
  63 
  64 # Compute correlation coefficient between two arrays of numbers
  65 sub Correlation {
  66   my($XArrayRef, $YArrayRef) = @_;
  67   return PearsonCorrelation($XArrayRef, $YArrayRef);
  68 }
  69 
  70 # Compute the covariance between two arrays of numbers: SUM( (x[i] - Xmean) (y[i] - Ymean) ) / n
  71 sub Covariance {
  72   my($XArrayRef, $YArrayRef) = @_;
  73 
  74   if (!(@$XArrayRef && @$YArrayRef && (@$XArrayRef == @$YArrayRef))) {
  75     return undef;
  76   }
  77   my($Covariance, $XMean, $YMean, $Index, $ProductOfDeviations);
  78 
  79   $Covariance = 0;
  80   $XMean = Mean($XArrayRef);
  81   $YMean = Mean($YArrayRef);
  82   $ProductOfDeviations = 0;
  83   for $Index (0 .. $#{@$XArrayRef}) {
  84     $ProductOfDeviations += (($XArrayRef->[$Index] - $XMean) * ($YArrayRef->[$Index] - $YMean));
  85   }
  86   $Covariance = $ProductOfDeviations / @$XArrayRef;
  87   return $Covariance;
  88 }
  89 
  90 # Compute the euclidean distance of an array of numbers: SQRT( SUM( x[i] ** 2) )
  91 sub Euclidean {
  92   my($XArrayRef) = @_;
  93 
  94   if (!@$XArrayRef) {
  95     return undef;
  96   }
  97   my($SumOfSquares);
  98 
  99   $SumOfSquares = SumOfSquares($XArrayRef);
 100 
 101   return sqrt $SumOfSquares;
 102 }
 103 
 104 # Compute factorial of a number...
 105 sub Factorial {
 106   my($Num) = @_;
 107 
 108   return _Factorial($Num, 1);
 109 }
 110 
 111 # Perform factorial division of two numbers...
 112 sub FactorialDivison {
 113   my($Numerator, $Denominator) = @_;
 114 
 115   # Only works for integer numbers...
 116   if ($Numerator <= 0 || ($Numerator != int($Numerator)) ||
 117      $Denominator <= 0 || ($Denominator != int($Denominator)) ) {
 118     return undef;
 119   }
 120   my($LargerNum, $SmallerNum, $Result);
 121   $LargerNum = ($Numerator > $Denominator) ? $Numerator : $Denominator;
 122   $SmallerNum = ($Numerator < $Denominator) ? $Numerator : $Denominator;
 123 
 124   $Result = _Factorial($LargerNum, $SmallerNum);
 125   if ($Numerator < $Denominator) {
 126     $Result = 1/$Result;
 127   }
 128   return $Result;
 129 }
 130 
 131 # Calculate factorial of a number upto a specific limit...
 132 sub _Factorial {
 133   my($Num, $Limit) = @_;
 134 
 135   # Only works for integer numbers...
 136   if ($Num <= 0 || ($Num != int($Num)) || $Limit < 1) {
 137     return undef;
 138   }
 139 
 140   my($Result) = 1;
 141 
 142   while ($Num > $Limit) {
 143     $Result *= $Num;
 144     $Num--;
 145   }
 146   return $Result;
 147 }
 148 
 149 # Generate all possible permuations or a specific permutations of items in an array
 150 # and return a reference to an array containing array references to generated permuations...
 151 #
 152 # This alogrithm is based on the example provided by Mark Jason-Dominus, and is available
 153 # at CPAN as mjd_permute standalone script.
 154 #
 155 sub Permutations {
 156   my(@DataToPermute) = @_;
 157   my($PermutationNum, $NumOfPermutations, @Permutations);
 158 
 159   if (!@DataToPermute) {
 160     return undef;
 161   }
 162 
 163   @Permutations = ();
 164   $NumOfPermutations = Factorial(scalar @DataToPermute);
 165 
 166   for ($PermutationNum = 0; $PermutationNum < $NumOfPermutations; $PermutationNum++) {
 167     my @Permutation = @DataToPermute[_PermutationNumToPermutation($PermutationNum, $#DataToPermute)];
 168     push @Permutations, \@Permutation;
 169   }
 170 
 171   return \@Permutations;
 172 }
 173 
 174 # Generte Nth permutation for a collection of specific size...
 175 #
 176 sub _PermutationNumToPermutation {
 177   my($Num, $Size) = @_;
 178 
 179   return _PatternToPermutation(_PermutationNumToPattern($Num, $Size));
 180 }
 181 
 182 # Generate Nth pattern for a collection of specific size...
 183 #
 184 sub _PermutationNumToPattern {
 185   my($Num, $Size) = @_;
 186   my($Index, @Pattern);
 187 
 188   $Index = 1;
 189 
 190   while ($Index <= $Size + 1) {
 191     push @Pattern, $Num % $Index;
 192     $Num = int($Num/$Index);
 193     $Index++;
 194   }
 195 
 196   return @Pattern;
 197 }
 198 
 199 # Generate permutation of integers from pattern...
 200 #
 201 sub _PatternToPermutation {
 202   my(@Pattern) = @_;
 203   my(@Source, @Permutation);
 204 
 205   @Source = (0 .. $#Pattern);
 206 
 207   while (@Pattern) {
 208     push @Permutation, splice(@Source, (pop @Pattern), 1);
 209   }
 210 
 211   return @Permutation;
 212 }
 213 
 214 # Compute the frequency of occurance of values in an array of numbers. Three different
 215 # invocation methods are supported:
 216 #
 217 # Frequency(\@ArrayRef) : Using the smallest and largest values, group the numbers into
 218 # 10 bins.
 219 #
 220 # Frequency(\@ArrayRef, $NumOfBins) : Using the smallest and largest values, group the
 221 # numbers into specified bins.
 222 #
 223 # Frequency(\@ArrayRef, \@BinRange): Use bin range to goup the values into different bins.
 224 #
 225 # A hash array is returned with keys and values representing range and frequency values respectively.
 226 # The frequency value for a specific key corresponds to all the values which are greater than
 227 # the previous key and less than or equal to the current key. A key value representing maximum value is
 228 # added for generating frequency distribution for specific number of bins, and whenever the maximum
 229 # array value is greater than the maximum specified in bin range, it is also added to bin range.
 230 #
 231 sub Frequency {
 232   my($XArrayRef) = @_;
 233 
 234   if (!@$XArrayRef) {
 235     return undef;
 236   }
 237 
 238   my($BinRange, $NumOfBins, $BinRangeSpecified);
 239 
 240   $BinRangeSpecified = 0;
 241   $NumOfBins = 10;
 242   if (@_ == 2) {
 243     if (ref($_[1]) eq 'ARRAY') {
 244       $BinRange = $_[1];
 245       if (!(@$BinRange && (@$BinRange > 1))) {
 246         return undef;
 247       }
 248       # Make sure the bin range contains values in increasing order...
 249       my($Index1, $Index2);
 250       for $Index1 (0 .. $#{@$BinRange}) {
 251         for $Index2 (($Index1 + 1) .. $#{@$BinRange}) {
 252           if ($BinRange->[$Index1] >= $BinRange->[$Index2]) {
 253             return undef;
 254           }
 255         }
 256       }
 257       $BinRangeSpecified = 1;
 258     }
 259     else {
 260       $NumOfBins = $_[1];
 261       if ($NumOfBins <= 1) {
 262         return undef;
 263       }
 264     }
 265   }
 266 
 267   # Setup range keys...
 268   my(@RangeKeys);
 269   @RangeKeys = ();
 270 
 271   my($MinValue, $MaxValue) = Range($XArrayRef);
 272   if ($BinRangeSpecified) {
 273     push @RangeKeys, @$BinRange;
 274     if ($MaxValue > $RangeKeys[$#RangeKeys]) {
 275       push @RangeKeys, $MaxValue;
 276     }
 277   }
 278   else {
 279     my($MinValue, $MaxValue) = Range($XArrayRef);
 280     my($Interval) = ($MaxValue - $MinValue)/$NumOfBins;
 281     my($KeyValue) = $MinValue + $Interval;
 282     while ($KeyValue  < $MaxValue) {
 283       push @RangeKeys, $KeyValue;
 284       $KeyValue += $Interval;
 285     }
 286     push @RangeKeys, $MaxValue;
 287   }
 288 
 289   #Setup frequency hash array...
 290   my(%FrequencyMap);
 291   %FrequencyMap = ();
 292 
 293   %FrequencyMap = map { $_ => 0 } @RangeKeys;
 294 
 295   # Count values...
 296   my($Key, $Value);
 297 
 298   VALUE: for $Value (@$XArrayRef) {
 299       for $Key (@RangeKeys) {
 300         if ($Value <= $Key) {
 301           $FrequencyMap{$Key} += 1;
 302           next VALUE;
 303         }
 304       }
 305   }
 306   return (%FrequencyMap);
 307 }
 308 
 309 # Compute the geometric mean of an array of numbers: NthROOT( PRODUCT(x[i]) )
 310 sub GeometricMean {
 311   my($XArrayRef) = @_;
 312 
 313   if (!@$XArrayRef) {
 314     return undef;
 315   }
 316   my($Mean, $Product, $Value);
 317   $Product = 1;
 318   foreach $Value (@$XArrayRef) {
 319     if ($Value <= 0 ) {
 320       return undef;
 321     }
 322     $Product *= $Value;
 323   }
 324   $Mean = $Product ** (1 / @$XArrayRef);
 325   return $Mean;
 326 }
 327 
 328 # Compute the harmonic mean of an array of numbers: 1 / ( SUM(1/x[i]) / n )
 329 sub HarmonicMean {
 330   my($XArrayRef) = @_;
 331 
 332   if (!@$XArrayRef) {
 333     return undef;
 334   }
 335   my($Mean, $Sum, $Value);
 336   $Sum = 0;
 337   foreach $Value (@$XArrayRef) {
 338     if ($Value <= 0 ) {
 339       return undef;
 340     }
 341     $Sum += 1/$Value;
 342   }
 343   $Mean = 1/($Sum/@$XArrayRef);
 344   return $Mean;
 345 }
 346 
 347 # Return the k-largest value from an array of numbers
 348 sub KLargest {
 349   my($XArrayRef, $K) = @_;
 350 
 351   if (!(@$XArrayRef && ($K > 0) && ($K <= @$XArrayRef))) {
 352     return undef;
 353   }
 354   my($KLargest, @SortedXArray);
 355   @SortedXArray = sort { $b <=> $a } @$XArrayRef;
 356   $KLargest = $SortedXArray[$K - 1];
 357   return $KLargest;
 358 }
 359 
 360 # Return the k-smallest value from an array of numbers
 361 sub KSmallest {
 362   my($XArrayRef, $K) = @_;
 363 
 364   if (!(@$XArrayRef && ($K > 0) && ($K <= @$XArrayRef))) {
 365     return undef;
 366   }
 367   my($KSmallest, @SortedXArray);
 368   @SortedXArray = sort { $a <=> $b } @$XArrayRef;
 369   $KSmallest = $SortedXArray[$K - 1];
 370   return $KSmallest;
 371 }
 372 
 373 # Compute the kurtosis of an array of numbers:
 374 # [ {n(n + 1)/(n - 1)(n - 2)(n - 3)}  SUM{ ((x[i] - Xmean)/STDDEV)^4 } ] - {3((n - 1)^2)}/{(n - 2)(n-3)}
 375 #
 376 sub Kurtosis {
 377   my($XArrayRef) = @_;
 378 
 379   if (!@$XArrayRef || ((@$XArrayRef - 3) <= 0)) {
 380     return undef;
 381   }
 382   my($Kurtosis, $Mean, $StandardDeviation, $Value);
 383   $Mean = Mean($XArrayRef);
 384   if (!defined $Mean) {
 385     return undef;
 386   }
 387   $StandardDeviation = StandardDeviation($XArrayRef);
 388   if (!(defined $StandardDeviation &&  $StandardDeviation != 0)) {
 389     return undef;
 390   }
 391 
 392   my($SumOfScores, $SampleSize);
 393   $SumOfScores = 0;
 394   for $Value (@$XArrayRef) {
 395     $SumOfScores += (($Value - $Mean)/$StandardDeviation) ** 4;
 396   }
 397   $SampleSize = @$XArrayRef;
 398   $Kurtosis = ((($SampleSize * ($SampleSize + 1))/(($SampleSize - 1) * ($SampleSize - 2) * ($SampleSize - 3))) * $SumOfScores) - ((3 * (($SampleSize - 1) ** 2))/(($SampleSize - 2) * ($SampleSize - 3)));
 399   return $Kurtosis;
 400 }
 401 
 402 # Return the smallest value from an array of numbers
 403 sub Minimum {
 404   my($XArrayRef) = @_;
 405   return KSmallest($XArrayRef, 1);
 406 }
 407 
 408 # Return the largest value from an array of numbers
 409 sub Maximum {
 410   my($XArrayRef) = @_;
 411   return KLargest($XArrayRef, 1);
 412 }
 413 
 414 # Compute the mean of an array of numbers: SUM( x[i] ) / n
 415 sub Mean {
 416   my($XArrayRef) = @_;
 417 
 418   if (!@$XArrayRef) {
 419     return undef;
 420   }
 421   my($Mean, $Sum, $Value);
 422   $Sum = 0;
 423   foreach $Value (@$XArrayRef) {
 424     $Sum += $Value;
 425   }
 426   $Mean = $Sum / @$XArrayRef;
 427   return $Mean;
 428 }
 429 
 430 # Compute the median value of an array of numbers. For an even number array, it's
 431 # the average of two middle values.
 432 #
 433 # For even values of n: Xsorted[(n - 1)/2 + 1]
 434 # For odd values of n: (Xsorted[n/2] + Xsorted[n/2 + 1])/2
 435 #
 436 sub Median {
 437   my($XArrayRef) = @_;
 438 
 439   if (!@$XArrayRef) {
 440     return undef;
 441   }
 442   my($Median, @SortedXArray);
 443   $Median = 0;
 444   @SortedXArray = sort { $a <=> $b } @$XArrayRef;
 445   if (@$XArrayRef % 2) {
 446     my($MidIndex);
 447     $MidIndex = int(@SortedXArray - 1)/2;
 448     $Median = $SortedXArray[$MidIndex];
 449   }
 450   else {
 451     # Even number array...
 452     my($MidPosition);
 453     $MidPosition = int(@SortedXArray / 2);
 454     $Median = ($SortedXArray[$MidPosition - 1] + $SortedXArray[$MidPosition]) / 2;
 455   }
 456   return $Median;
 457 }
 458 
 459 # Return the most frequently occuring value in an array of numbers
 460 sub Mode {
 461   my($XArrayRef) = @_;
 462 
 463   if (!@$XArrayRef) {
 464     return undef;
 465   }
 466   my($Value, %ValueToCountMap, @CountList, @SortedCountList);
 467   %ValueToCountMap = ();
 468   @CountList = ();
 469   @SortedCountList = ();
 470   for $Value (@$XArrayRef) {
 471     if (exists $ValueToCountMap{$Value}) {
 472       $ValueToCountMap{$Value} += 1;
 473     }
 474     else {
 475       $ValueToCountMap{$Value} = 1;
 476     }
 477   }
 478   for $Value (keys %ValueToCountMap) {
 479     push @CountList, $ValueToCountMap{$Value};
 480   }
 481   @SortedCountList = sort { $b <=> $a } @CountList;
 482 
 483   # Make sure the frequency of mode value is greater than one and check for
 484   # multiple modes as well...
 485   #
 486   my($ModeCount, $ModeValue);
 487   $ModeCount = $SortedCountList[0];
 488   if ($ModeCount <= 1) {
 489     return undef;
 490   }
 491   # Get the first mode value...
 492   VALUE: for $Value (keys %ValueToCountMap) {
 493     if ($ValueToCountMap{$Value} == $ModeCount) {
 494       $ModeValue = $Value;
 495       # Set it to zero to skip it next time...
 496       $ValueToCountMap{$Value} = 0;
 497       last VALUE;
 498     }
 499   }
 500 
 501   if (wantarray) {
 502     # Retrieve all the modes...
 503     my(@Modes, $Count);
 504     @Modes = ();
 505     push @Modes, $ModeValue;
 506     for $Count (@SortedCountList) {
 507       if ($Count == $ModeCount) {
 508       VALUE: for $Value (keys %ValueToCountMap) {
 509           if ($ValueToCountMap{$Value} == $ModeCount) {
 510             push @Modes, $Value;
 511             # Set it to zero to skip it next time...
 512             $ValueToCountMap{$Value} = 0;
 513             last VALUE;
 514           }
 515         }
 516       }
 517     }
 518     return sort {$b <=> $a} @Modes;
 519   }
 520   else {
 521     return $ModeValue;
 522   }
 523 }
 524 
 525 
 526 # Compute the Pearson correlation coefficient between two arrays of numbers:
 527 #
 528 #  SUM( (x[i] - Xmean)(y[i] - Ymean) ) / SQRT( SUM( (x[i] - Xmean)^2 )(SUM( (y[i] - Ymean)^2 ))   )
 529 #
 530 # It returns values in the range from -1.0 to 1.0
 531 sub PearsonCorrelation {
 532   my($XArrayRef, $YArrayRef) = @_;
 533 
 534   if (!(@$XArrayRef && @$YArrayRef && (@$XArrayRef == @$YArrayRef))) {
 535     return undef;
 536   }
 537   my($Correlation, $XMean, $YMean, $Index, $XValueDeviation, $YValueDeviation, $SquareOfXDeviations, $SquareOfYDeviations, $ProductOfDeviations);
 538 
 539   $Correlation = 0;
 540   $XMean = Mean($XArrayRef);
 541   $YMean = Mean($YArrayRef);
 542   $ProductOfDeviations = 0; $SquareOfXDeviations = 0; $SquareOfYDeviations = 0;
 543   for $Index (0 .. $#{@$XArrayRef}) {
 544     $XValueDeviation = $XArrayRef->[$Index] - $XMean;
 545     $YValueDeviation = $YArrayRef->[$Index] - $YMean;
 546     $ProductOfDeviations += ($XValueDeviation * $YValueDeviation);
 547     $SquareOfXDeviations += $XValueDeviation ** 2;
 548     $SquareOfYDeviations += $YValueDeviation ** 2;
 549   }
 550   $Correlation = $ProductOfDeviations / sqrt($SquareOfXDeviations *  $SquareOfYDeviations);
 551   return $Correlation;
 552 }
 553 
 554 # Return the smallest and largest values from an array of numbers
 555 sub Range {
 556   my($XArrayRef) = @_;
 557 
 558   if (!@$XArrayRef) {
 559     return (undef, undef);
 560   }
 561   my($Smallest, $Largest, @SortedXArray);
 562   @SortedXArray = sort { $a <=> $b } @$XArrayRef;
 563   $Smallest = $SortedXArray[0];
 564   $Largest = $SortedXArray[$#SortedXArray];
 565   return ($Smallest, $Largest);
 566 }
 567 
 568 # Compute square of the Pearson correlation coefficient between two arrays of numbers.
 569 #
 570 sub RSquare {
 571   my($XArrayRef, $YArrayRef) = @_;
 572   my($RSquare, $Correlation);
 573 
 574   $RSquare = undef;
 575   $Correlation = PearsonCorrelation($XArrayRef, $YArrayRef);
 576   if (defined $Correlation) {
 577     $RSquare = $Correlation ** 2;
 578   }
 579   return $RSquare;
 580 }
 581 
 582 # Compute the skewness of an array of numbers:
 583 #  {n/(n - 1)(n - 2)} SUM{ ((x[i] - Xmean)/STDDEV)^3 }
 584 #
 585 sub Skewness {
 586   my($XArrayRef) = @_;
 587 
 588   if (!@$XArrayRef || ((@$XArrayRef - 2) <= 0)) {
 589     return undef;
 590   }
 591   my($Skewness, $Mean, $StandardDeviation, $Value);
 592   $Mean = Mean($XArrayRef);
 593   if (!defined $Mean) {
 594     return undef;
 595   }
 596   $StandardDeviation = StandardDeviation($XArrayRef);
 597   if (!(defined $StandardDeviation &&  $StandardDeviation != 0)) {
 598     return undef;
 599   }
 600 
 601   my($SumOfScores, $SampleSize);
 602   $SumOfScores = 0;
 603   for $Value (@$XArrayRef) {
 604     $SumOfScores += (($Value - $Mean)/$StandardDeviation) ** 3;
 605   }
 606   $SampleSize = @$XArrayRef;
 607   $Skewness = ($SampleSize/(($SampleSize - 1) * ($SampleSize - 2) )) * $SumOfScores;
 608   return $Skewness;
 609 }
 610 
 611 # Compute the standard deviation of an array of numbers
 612 sub StandardDeviation {
 613   my($XArrayRef) = @_;
 614   return _CalculateStandardDeviation($XArrayRef, 2);
 615 }
 616 
 617 # Compute the standard deviation of an array of numbers representing entire population
 618 sub StandardDeviationN {
 619   my($XArrayRef) = @_;
 620   return _CalculateStandardDeviation($XArrayRef, 1);
 621 }
 622 
 623 # Compute the standard deviation of an array of numbers.
 624 # Mode 1: SQRT ( SUM( (x[i] - mean)^2 ) / n )
 625 # Mode 2: SQRT ( SUM( (x[i] - mean)^2 ) / (n - 1) )
 626 #
 627 sub _CalculateStandardDeviation {
 628   my($XArrayRef, $Mode) = @_;
 629 
 630   if (!@$XArrayRef) {
 631     return undef;
 632   }
 633   my($StandardDeviation, $Value, $SquareOfDeviations, $Mean, $N);
 634 
 635   $StandardDeviation = 0;
 636   $Mean = Mean($XArrayRef);
 637   $SquareOfDeviations = 0;
 638   foreach $Value (@$XArrayRef) {
 639     $SquareOfDeviations += ($Value - $Mean) ** 2;
 640   }
 641   $N = ($Mode == 1) ? @$XArrayRef : (@$XArrayRef - 1);
 642   $StandardDeviation =  sqrt($SquareOfDeviations / $N);
 643 
 644   return $StandardDeviation;
 645 }
 646 
 647 # Compute the standard error using standard deviation and sample size
 648 sub StandardError {
 649   my($StandardDeviation, $Count) = @_;
 650 
 651   if ($Count <= 0) {
 652     return undef;
 653   }
 654   my($StandardError);
 655   $StandardError = $StandardDeviation / sqrt($Count);
 656 
 657   return $StandardError;
 658 }
 659 
 660 # Standardize the value using mean and standard deviation
 661 sub Standardize {
 662   my($Value, $Mean, $StandardDeviation) = @_;
 663 
 664   if ($StandardDeviation <= 0) {
 665     return undef;
 666   }
 667   my($StandardizedValue);
 668   $StandardizedValue = ($Value - $Mean)/$StandardDeviation;
 669 
 670   return $StandardizedValue;
 671 }
 672 
 673 # Compute the standard deviation above the mean for an array of numbers.
 674 sub StandardScores {
 675   my($XArrayRef) = @_;
 676   return _CalculateStandardScores($XArrayRef, 2);
 677 }
 678 
 679 # Compute the standard deviation above the mean for an array of numbers representing entire population
 680 sub StandardScoresN {
 681   my($XArrayRef) = @_;
 682   return _CalculateStandardScores($XArrayRef, 1);
 683 }
 684 
 685 # Compute the standard deviation above the mean for an array of numbers.
 686 # Mode 1:  (x[i] - mean) / n
 687 # Mode 2:  (x[i] - mean) / (n - 1)
 688 #
 689 sub _CalculateStandardScores {
 690   my($XArrayRef, $Mode) = @_;
 691 
 692   if (!@$XArrayRef) {
 693     return undef;
 694   }
 695   my(@StandardScores, $Mean, $StandardDeviation, $Value);
 696 
 697   $Mean = Mean($XArrayRef);
 698   $StandardDeviation = _CalculateStandardDeviation($XArrayRef, $Mode);
 699   if (!(defined($StandardDeviation) && $StandardDeviation > 0)) {
 700     return undef;
 701   }
 702   @StandardScores = ();
 703   for $Value (@$XArrayRef) {
 704     push @StandardScores, ($Value - $Mean)/$StandardDeviation;
 705   }
 706 
 707   return @StandardScores;
 708 }
 709 
 710 # Compute the product of an array of numbers
 711 sub Product {
 712   my($XArrayRef) = @_;
 713 
 714   if (!@$XArrayRef) {
 715     return undef;
 716   }
 717   my($Product, $Value);
 718   $Product = 1;
 719   foreach $Value (@$XArrayRef) {
 720     $Product *= $Value;
 721   }
 722   return $Product;
 723 }
 724 
 725 # Compute the sum of an array of numbers
 726 sub Sum {
 727   my($XArrayRef) = @_;
 728 
 729   if (!@$XArrayRef) {
 730     return undef;
 731   }
 732   my($Sum, $Value);
 733   $Sum = 0;
 734   foreach $Value (@$XArrayRef) {
 735     $Sum += $Value;
 736   }
 737   return $Sum;
 738 }
 739 
 740 # Compute the sum of squares of an array of numbers
 741 sub SumOfSquares {
 742   my($XArrayRef) = @_;
 743 
 744   if (!@$XArrayRef) {
 745     return undef;
 746   }
 747   my($SumOfSquares, $Value);
 748   $SumOfSquares = 0;
 749   foreach $Value (@$XArrayRef) {
 750     $SumOfSquares += $Value ** 2;
 751   }
 752   return $SumOfSquares;
 753 }
 754 
 755 # Compute the mean of an array of numbers by excluding a fraction of
 756 # numbers from the top and bottom of the data set.
 757 sub TrimMean {
 758   my($XArrayRef, $FractionToExclude) = @_;
 759 
 760   if (!(@$XArrayRef && $FractionToExclude > 0 && $FractionToExclude <= 1)) {
 761     return undef;
 762   }
 763   my($NumberToExclude);
 764   $NumberToExclude = int(@$XArrayRef * $FractionToExclude);
 765   $NumberToExclude = ($NumberToExclude % 2) ? ($NumberToExclude - 1) : $NumberToExclude;
 766   if ($NumberToExclude == @$XArrayRef) {
 767     return undef;
 768   }
 769   my($Mean, $Sum, $Index, $FirstIndex, $LastIndex);
 770   $FirstIndex = $NumberToExclude/2;
 771   $LastIndex = @$XArrayRef - ($NumberToExclude/2) - 1;
 772   $Sum = 0;
 773   my(@SortedXArray);
 774   @SortedXArray = sort { $a <=> $b } @$XArrayRef;
 775   for $Index ($FirstIndex .. $LastIndex) {
 776     $Sum += $SortedXArray[$Index];
 777   }
 778   $Mean = $Sum/(@SortedXArray - $NumberToExclude);
 779   return $Mean;
 780 }
 781 
 782 # Compute the variance of an array of numbers
 783 sub Variance {
 784   my($XArrayRef) = @_;
 785   return _CalculateVariance($XArrayRef, 2);
 786 }
 787 
 788 # Compute the variance of an array of numbers representing entire population
 789 sub VarianceN {
 790   my($XArrayRef) = @_;
 791   return _CalculateVariance($XArrayRef, 1);
 792 }
 793 
 794 # Compute the variance of an array of numbers:
 795 # Mode 1: SUM( (x[i] - Xmean)^2  / n )
 796 # Mode 2: SUM( (x[i] - Xmean)^2  / (n - 1) )
 797 #
 798 sub _CalculateVariance {
 799   my($XArrayRef, $Mode) = @_;
 800 
 801   if (!@$XArrayRef) {
 802     return undef;
 803   }
 804   my($Variance, $Value, $SquareOfDeviations, $Mean, $N);
 805 
 806   $Variance = 0;
 807   $Mean = Mean($XArrayRef);
 808   $SquareOfDeviations = 0;
 809   foreach $Value (@$XArrayRef) {
 810     $SquareOfDeviations += ($Value - $Mean) ** 2;
 811   }
 812   $N = ($Mode == 1) ? @$XArrayRef : (@$XArrayRef - 1);
 813   $Variance =  $SquareOfDeviations / $N;
 814 
 815   return $Variance;
 816 }
 817