1 package StatisticsUtil; 2 # 3 # $RCSfile: StatisticsUtil.pm,v $ 4 # $Date: 2015/02/28 20:47:18 $ 5 # $Revision: 1.41 $ 6 # 7 # Author: Manish Sud <msud@san.rr.com> 8 # 9 # Copyright (C) 2015 Manish Sud. All rights reserved. 10 # 11 # This file is part of MayaChemTools. 12 # 13 # MayaChemTools is free software; you can redistribute it and/or modify it under 14 # the terms of the GNU Lesser General Public License as published by the Free 15 # Software Foundation; either version 3 of the License, or (at your option) any 16 # later version. 17 # 18 # MayaChemTools is distributed in the hope that it will be useful, but without 19 # any warranty; without even the implied warranty of merchantability of fitness 20 # for a particular purpose. See the GNU Lesser General Public License for more 21 # details. 22 # 23 # You should have received a copy of the GNU Lesser General Public License 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 26 # Boston, MA, 02111-1307, USA. 27 # 28 29 use strict; 30 use Exporter; 31 32 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 33 34 @ISA = qw(Exporter); 35 @EXPORT = qw(Average AverageDeviation Covariance Correlation Euclidean Factorial FactorialDivison GeometricMean Frequency HarmonicMean KLargest KSmallest Kurtosis Maximum Minimum Mean Median Mode PearsonCorrelation Permutations Product Range RSquare Skewness Sum SumOfSquares StandardDeviation StandardDeviationN StandardError Standardize StandardScores StandardScoresN TrimMean Variance VarianceN); 36 @EXPORT_OK = qw(); 37 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); 38 39 # Compute the mean of an array of numbers 40 sub Average { 41 my($XArrayRef) = @_; 42 return Mean($XArrayRef); 43 } 44 45 # Compute the average of the absolute deviation of an array of numbers: SUM( ABS(x[i] - Xmean) ) / n 46 sub AverageDeviation { 47 my($XArrayRef) = @_; 48 49 if (!@$XArrayRef) { 50 return undef; 51 } 52 my($AverageDeviation, $Mean, $Value, $SumOfDeviations); 53 54 $AverageDeviation = 0; 55 $Mean = Mean($XArrayRef); 56 foreach $Value (@$XArrayRef) { 57 $SumOfDeviations += abs($Value - $Mean); 58 } 59 $AverageDeviation = $SumOfDeviations / @$XArrayRef; 60 61 return $AverageDeviation; 62 } 63 64 # Compute correlation coefficient between two arrays of numbers 65 sub Correlation { 66 my($XArrayRef, $YArrayRef) = @_; 67 return PearsonCorrelation($XArrayRef, $YArrayRef); 68 } 69 70 # Compute the covariance between two arrays of numbers: SUM( (x[i] - Xmean) (y[i] - Ymean) ) / n 71 sub Covariance { 72 my($XArrayRef, $YArrayRef) = @_; 73 74 if (!(@$XArrayRef && @$YArrayRef && (@$XArrayRef == @$YArrayRef))) { 75 return undef; 76 } 77 my($Covariance, $XMean, $YMean, $Index, $ProductOfDeviations); 78 79 $Covariance = 0; 80 $XMean = Mean($XArrayRef); 81 $YMean = Mean($YArrayRef); 82 $ProductOfDeviations = 0; 83 for $Index (0 .. $#{@$XArrayRef}) { 84 $ProductOfDeviations += (($XArrayRef->[$Index] - $XMean) * ($YArrayRef->[$Index] - $YMean)); 85 } 86 $Covariance = $ProductOfDeviations / @$XArrayRef; 87 return $Covariance; 88 } 89 90 # Compute the euclidean distance of an array of numbers: SQRT( SUM( x[i] ** 2) ) 91 sub Euclidean { 92 my($XArrayRef) = @_; 93 94 if (!@$XArrayRef) { 95 return undef; 96 } 97 my($SumOfSquares); 98 99 $SumOfSquares = SumOfSquares($XArrayRef); 100 101 return sqrt $SumOfSquares; 102 } 103 104 # Compute factorial of a number... 105 sub Factorial { 106 my($Num) = @_; 107 108 return _Factorial($Num, 1); 109 } 110 111 # Perform factorial division of two numbers... 112 sub FactorialDivison { 113 my($Numerator, $Denominator) = @_; 114 115 # Only works for integer numbers... 116 if ($Numerator <= 0 || ($Numerator != int($Numerator)) || 117 $Denominator <= 0 || ($Denominator != int($Denominator)) ) { 118 return undef; 119 } 120 my($LargerNum, $SmallerNum, $Result); 121 $LargerNum = ($Numerator > $Denominator) ? $Numerator : $Denominator; 122 $SmallerNum = ($Numerator < $Denominator) ? $Numerator : $Denominator; 123 124 $Result = _Factorial($LargerNum, $SmallerNum); 125 if ($Numerator < $Denominator) { 126 $Result = 1/$Result; 127 } 128 return $Result; 129 } 130 131 # Calculate factorial of a number upto a specific limit... 132 sub _Factorial { 133 my($Num, $Limit) = @_; 134 135 # Only works for integer numbers... 136 if ($Num <= 0 || ($Num != int($Num)) || $Limit < 1) { 137 return undef; 138 } 139 140 my($Result) = 1; 141 142 while ($Num > $Limit) { 143 $Result *= $Num; 144 $Num--; 145 } 146 return $Result; 147 } 148 149 # Generate all possible permuations or a specific permutations of items in an array 150 # and return a reference to an array containing array references to generated permuations... 151 # 152 # This alogrithm is based on the example provided by Mark Jason-Dominus, and is available 153 # at CPAN as mjd_permute standalone script. 154 # 155 sub Permutations { 156 my(@DataToPermute) = @_; 157 my($PermutationNum, $NumOfPermutations, @Permutations); 158 159 if (!@DataToPermute) { 160 return undef; 161 } 162 163 @Permutations = (); 164 $NumOfPermutations = Factorial(scalar @DataToPermute); 165 166 for ($PermutationNum = 0; $PermutationNum < $NumOfPermutations; $PermutationNum++) { 167 my @Permutation = @DataToPermute[_PermutationNumToPermutation($PermutationNum, $#DataToPermute)]; 168 push @Permutations, \@Permutation; 169 } 170 171 return \@Permutations; 172 } 173 174 # Generte Nth permutation for a collection of specific size... 175 # 176 sub _PermutationNumToPermutation { 177 my($Num, $Size) = @_; 178 179 return _PatternToPermutation(_PermutationNumToPattern($Num, $Size)); 180 } 181 182 # Generate Nth pattern for a collection of specific size... 183 # 184 sub _PermutationNumToPattern { 185 my($Num, $Size) = @_; 186 my($Index, @Pattern); 187 188 $Index = 1; 189 190 while ($Index <= $Size + 1) { 191 push @Pattern, $Num % $Index; 192 $Num = int($Num/$Index); 193 $Index++; 194 } 195 196 return @Pattern; 197 } 198 199 # Generate permutation of integers from pattern... 200 # 201 sub _PatternToPermutation { 202 my(@Pattern) = @_; 203 my(@Source, @Permutation); 204 205 @Source = (0 .. $#Pattern); 206 207 while (@Pattern) { 208 push @Permutation, splice(@Source, (pop @Pattern), 1); 209 } 210 211 return @Permutation; 212 } 213 214 # Compute the frequency of occurance of values in an array of numbers. Three different 215 # invocation methods are supported: 216 # 217 # Frequency(\@ArrayRef) : Using the smallest and largest values, group the numbers into 218 # 10 bins. 219 # 220 # Frequency(\@ArrayRef, $NumOfBins) : Using the smallest and largest values, group the 221 # numbers into specified bins. 222 # 223 # Frequency(\@ArrayRef, \@BinRange): Use bin range to goup the values into different bins. 224 # 225 # A hash array is returned with keys and values representing range and frequency values respectively. 226 # The frequency value for a specific key corresponds to all the values which are greater than 227 # the previous key and less than or equal to the current key. A key value representing maximum value is 228 # added for generating frequency distribution for specific number of bins, and whenever the maximum 229 # array value is greater than the maximum specified in bin range, it is also added to bin range. 230 # 231 sub Frequency { 232 my($XArrayRef) = @_; 233 234 if (!@$XArrayRef) { 235 return undef; 236 } 237 238 my($BinRange, $NumOfBins, $BinRangeSpecified); 239 240 $BinRangeSpecified = 0; 241 $NumOfBins = 10; 242 if (@_ == 2) { 243 if (ref($_[1]) eq 'ARRAY') { 244 $BinRange = $_[1]; 245 if (!(@$BinRange && (@$BinRange > 1))) { 246 return undef; 247 } 248 # Make sure the bin range contains values in increasing order... 249 my($Index1, $Index2); 250 for $Index1 (0 .. $#{@$BinRange}) { 251 for $Index2 (($Index1 + 1) .. $#{@$BinRange}) { 252 if ($BinRange->[$Index1] >= $BinRange->[$Index2]) { 253 return undef; 254 } 255 } 256 } 257 $BinRangeSpecified = 1; 258 } 259 else { 260 $NumOfBins = $_[1]; 261 if ($NumOfBins <= 1) { 262 return undef; 263 } 264 } 265 } 266 267 # Setup range keys... 268 my(@RangeKeys); 269 @RangeKeys = (); 270 271 my($MinValue, $MaxValue) = Range($XArrayRef); 272 if ($BinRangeSpecified) { 273 push @RangeKeys, @$BinRange; 274 if ($MaxValue > $RangeKeys[$#RangeKeys]) { 275 push @RangeKeys, $MaxValue; 276 } 277 } 278 else { 279 my($MinValue, $MaxValue) = Range($XArrayRef); 280 my($Interval) = ($MaxValue - $MinValue)/$NumOfBins; 281 my($KeyValue) = $MinValue + $Interval; 282 while ($KeyValue < $MaxValue) { 283 push @RangeKeys, $KeyValue; 284 $KeyValue += $Interval; 285 } 286 push @RangeKeys, $MaxValue; 287 } 288 289 #Setup frequency hash array... 290 my(%FrequencyMap); 291 %FrequencyMap = (); 292 293 %FrequencyMap = map { $_ => 0 } @RangeKeys; 294 295 # Count values... 296 my($Key, $Value); 297 298 VALUE: for $Value (@$XArrayRef) { 299 for $Key (@RangeKeys) { 300 if ($Value <= $Key) { 301 $FrequencyMap{$Key} += 1; 302 next VALUE; 303 } 304 } 305 } 306 return (%FrequencyMap); 307 } 308 309 # Compute the geometric mean of an array of numbers: NthROOT( PRODUCT(x[i]) ) 310 sub GeometricMean { 311 my($XArrayRef) = @_; 312 313 if (!@$XArrayRef) { 314 return undef; 315 } 316 my($Mean, $Product, $Value); 317 $Product = 1; 318 foreach $Value (@$XArrayRef) { 319 if ($Value <= 0 ) { 320 return undef; 321 } 322 $Product *= $Value; 323 } 324 $Mean = $Product ** (1 / @$XArrayRef); 325 return $Mean; 326 } 327 328 # Compute the harmonic mean of an array of numbers: 1 / ( SUM(1/x[i]) / n ) 329 sub HarmonicMean { 330 my($XArrayRef) = @_; 331 332 if (!@$XArrayRef) { 333 return undef; 334 } 335 my($Mean, $Sum, $Value); 336 $Sum = 0; 337 foreach $Value (@$XArrayRef) { 338 if ($Value <= 0 ) { 339 return undef; 340 } 341 $Sum += 1/$Value; 342 } 343 $Mean = 1/($Sum/@$XArrayRef); 344 return $Mean; 345 } 346 347 # Return the k-largest value from an array of numbers 348 sub KLargest { 349 my($XArrayRef, $K) = @_; 350 351 if (!(@$XArrayRef && ($K > 0) && ($K <= @$XArrayRef))) { 352 return undef; 353 } 354 my($KLargest, @SortedXArray); 355 @SortedXArray = sort { $b <=> $a } @$XArrayRef; 356 $KLargest = $SortedXArray[$K - 1]; 357 return $KLargest; 358 } 359 360 # Return the k-smallest value from an array of numbers 361 sub KSmallest { 362 my($XArrayRef, $K) = @_; 363 364 if (!(@$XArrayRef && ($K > 0) && ($K <= @$XArrayRef))) { 365 return undef; 366 } 367 my($KSmallest, @SortedXArray); 368 @SortedXArray = sort { $a <=> $b } @$XArrayRef; 369 $KSmallest = $SortedXArray[$K - 1]; 370 return $KSmallest; 371 } 372 373 # Compute the kurtosis of an array of numbers: 374 # [ {n(n + 1)/(n - 1)(n - 2)(n - 3)} SUM{ ((x[i] - Xmean)/STDDEV)^4 } ] - {3((n - 1)^2)}/{(n - 2)(n-3)} 375 # 376 sub Kurtosis { 377 my($XArrayRef) = @_; 378 379 if (!@$XArrayRef || ((@$XArrayRef - 3) <= 0)) { 380 return undef; 381 } 382 my($Kurtosis, $Mean, $StandardDeviation, $Value); 383 $Mean = Mean($XArrayRef); 384 if (!defined $Mean) { 385 return undef; 386 } 387 $StandardDeviation = StandardDeviation($XArrayRef); 388 if (!(defined $StandardDeviation && $StandardDeviation != 0)) { 389 return undef; 390 } 391 392 my($SumOfScores, $SampleSize); 393 $SumOfScores = 0; 394 for $Value (@$XArrayRef) { 395 $SumOfScores += (($Value - $Mean)/$StandardDeviation) ** 4; 396 } 397 $SampleSize = @$XArrayRef; 398 $Kurtosis = ((($SampleSize * ($SampleSize + 1))/(($SampleSize - 1) * ($SampleSize - 2) * ($SampleSize - 3))) * $SumOfScores) - ((3 * (($SampleSize - 1) ** 2))/(($SampleSize - 2) * ($SampleSize - 3))); 399 return $Kurtosis; 400 } 401 402 # Return the smallest value from an array of numbers 403 sub Minimum { 404 my($XArrayRef) = @_; 405 return KSmallest($XArrayRef, 1); 406 } 407 408 # Return the largest value from an array of numbers 409 sub Maximum { 410 my($XArrayRef) = @_; 411 return KLargest($XArrayRef, 1); 412 } 413 414 # Compute the mean of an array of numbers: SUM( x[i] ) / n 415 sub Mean { 416 my($XArrayRef) = @_; 417 418 if (!@$XArrayRef) { 419 return undef; 420 } 421 my($Mean, $Sum, $Value); 422 $Sum = 0; 423 foreach $Value (@$XArrayRef) { 424 $Sum += $Value; 425 } 426 $Mean = $Sum / @$XArrayRef; 427 return $Mean; 428 } 429 430 # Compute the median value of an array of numbers. For an even number array, it's 431 # the average of two middle values. 432 # 433 # For even values of n: Xsorted[(n - 1)/2 + 1] 434 # For odd values of n: (Xsorted[n/2] + Xsorted[n/2 + 1])/2 435 # 436 sub Median { 437 my($XArrayRef) = @_; 438 439 if (!@$XArrayRef) { 440 return undef; 441 } 442 my($Median, @SortedXArray); 443 $Median = 0; 444 @SortedXArray = sort { $a <=> $b } @$XArrayRef; 445 if (@$XArrayRef % 2) { 446 my($MidIndex); 447 $MidIndex = int(@SortedXArray - 1)/2; 448 $Median = $SortedXArray[$MidIndex]; 449 } 450 else { 451 # Even number array... 452 my($MidPosition); 453 $MidPosition = int(@SortedXArray / 2); 454 $Median = ($SortedXArray[$MidPosition - 1] + $SortedXArray[$MidPosition]) / 2; 455 } 456 return $Median; 457 } 458 459 # Return the most frequently occuring value in an array of numbers 460 sub Mode { 461 my($XArrayRef) = @_; 462 463 if (!@$XArrayRef) { 464 return undef; 465 } 466 my($Value, %ValueToCountMap, @CountList, @SortedCountList); 467 %ValueToCountMap = (); 468 @CountList = (); 469 @SortedCountList = (); 470 for $Value (@$XArrayRef) { 471 if (exists $ValueToCountMap{$Value}) { 472 $ValueToCountMap{$Value} += 1; 473 } 474 else { 475 $ValueToCountMap{$Value} = 1; 476 } 477 } 478 for $Value (keys %ValueToCountMap) { 479 push @CountList, $ValueToCountMap{$Value}; 480 } 481 @SortedCountList = sort { $b <=> $a } @CountList; 482 483 # Make sure the frequency of mode value is greater than one and check for 484 # multiple modes as well... 485 # 486 my($ModeCount, $ModeValue); 487 $ModeCount = $SortedCountList[0]; 488 if ($ModeCount <= 1) { 489 return undef; 490 } 491 # Get the first mode value... 492 VALUE: for $Value (keys %ValueToCountMap) { 493 if ($ValueToCountMap{$Value} == $ModeCount) { 494 $ModeValue = $Value; 495 # Set it to zero to skip it next time... 496 $ValueToCountMap{$Value} = 0; 497 last VALUE; 498 } 499 } 500 501 if (wantarray) { 502 # Retrieve all the modes... 503 my(@Modes, $Count); 504 @Modes = (); 505 push @Modes, $ModeValue; 506 for $Count (@SortedCountList) { 507 if ($Count == $ModeCount) { 508 VALUE: for $Value (keys %ValueToCountMap) { 509 if ($ValueToCountMap{$Value} == $ModeCount) { 510 push @Modes, $Value; 511 # Set it to zero to skip it next time... 512 $ValueToCountMap{$Value} = 0; 513 last VALUE; 514 } 515 } 516 } 517 } 518 return sort {$b <=> $a} @Modes; 519 } 520 else { 521 return $ModeValue; 522 } 523 } 524 525 526 # Compute the Pearson correlation coefficient between two arrays of numbers: 527 # 528 # SUM( (x[i] - Xmean)(y[i] - Ymean) ) / SQRT( SUM( (x[i] - Xmean)^2 )(SUM( (y[i] - Ymean)^2 )) ) 529 # 530 # It returns values in the range from -1.0 to 1.0 531 sub PearsonCorrelation { 532 my($XArrayRef, $YArrayRef) = @_; 533 534 if (!(@$XArrayRef && @$YArrayRef && (@$XArrayRef == @$YArrayRef))) { 535 return undef; 536 } 537 my($Correlation, $XMean, $YMean, $Index, $XValueDeviation, $YValueDeviation, $SquareOfXDeviations, $SquareOfYDeviations, $ProductOfDeviations); 538 539 $Correlation = 0; 540 $XMean = Mean($XArrayRef); 541 $YMean = Mean($YArrayRef); 542 $ProductOfDeviations = 0; $SquareOfXDeviations = 0; $SquareOfYDeviations = 0; 543 for $Index (0 .. $#{@$XArrayRef}) { 544 $XValueDeviation = $XArrayRef->[$Index] - $XMean; 545 $YValueDeviation = $YArrayRef->[$Index] - $YMean; 546 $ProductOfDeviations += ($XValueDeviation * $YValueDeviation); 547 $SquareOfXDeviations += $XValueDeviation ** 2; 548 $SquareOfYDeviations += $YValueDeviation ** 2; 549 } 550 $Correlation = $ProductOfDeviations / sqrt($SquareOfXDeviations * $SquareOfYDeviations); 551 return $Correlation; 552 } 553 554 # Return the smallest and largest values from an array of numbers 555 sub Range { 556 my($XArrayRef) = @_; 557 558 if (!@$XArrayRef) { 559 return (undef, undef); 560 } 561 my($Smallest, $Largest, @SortedXArray); 562 @SortedXArray = sort { $a <=> $b } @$XArrayRef; 563 $Smallest = $SortedXArray[0]; 564 $Largest = $SortedXArray[$#SortedXArray]; 565 return ($Smallest, $Largest); 566 } 567 568 # Compute square of the Pearson correlation coefficient between two arrays of numbers. 569 # 570 sub RSquare { 571 my($XArrayRef, $YArrayRef) = @_; 572 my($RSquare, $Correlation); 573 574 $RSquare = undef; 575 $Correlation = PearsonCorrelation($XArrayRef, $YArrayRef); 576 if (defined $Correlation) { 577 $RSquare = $Correlation ** 2; 578 } 579 return $RSquare; 580 } 581 582 # Compute the skewness of an array of numbers: 583 # {n/(n - 1)(n - 2)} SUM{ ((x[i] - Xmean)/STDDEV)^3 } 584 # 585 sub Skewness { 586 my($XArrayRef) = @_; 587 588 if (!@$XArrayRef || ((@$XArrayRef - 2) <= 0)) { 589 return undef; 590 } 591 my($Skewness, $Mean, $StandardDeviation, $Value); 592 $Mean = Mean($XArrayRef); 593 if (!defined $Mean) { 594 return undef; 595 } 596 $StandardDeviation = StandardDeviation($XArrayRef); 597 if (!(defined $StandardDeviation && $StandardDeviation != 0)) { 598 return undef; 599 } 600 601 my($SumOfScores, $SampleSize); 602 $SumOfScores = 0; 603 for $Value (@$XArrayRef) { 604 $SumOfScores += (($Value - $Mean)/$StandardDeviation) ** 3; 605 } 606 $SampleSize = @$XArrayRef; 607 $Skewness = ($SampleSize/(($SampleSize - 1) * ($SampleSize - 2) )) * $SumOfScores; 608 return $Skewness; 609 } 610 611 # Compute the standard deviation of an array of numbers 612 sub StandardDeviation { 613 my($XArrayRef) = @_; 614 return _CalculateStandardDeviation($XArrayRef, 2); 615 } 616 617 # Compute the standard deviation of an array of numbers representing entire population 618 sub StandardDeviationN { 619 my($XArrayRef) = @_; 620 return _CalculateStandardDeviation($XArrayRef, 1); 621 } 622 623 # Compute the standard deviation of an array of numbers. 624 # Mode 1: SQRT ( SUM( (x[i] - mean)^2 ) / n ) 625 # Mode 2: SQRT ( SUM( (x[i] - mean)^2 ) / (n - 1) ) 626 # 627 sub _CalculateStandardDeviation { 628 my($XArrayRef, $Mode) = @_; 629 630 if (!@$XArrayRef) { 631 return undef; 632 } 633 my($StandardDeviation, $Value, $SquareOfDeviations, $Mean, $N); 634 635 $StandardDeviation = 0; 636 $Mean = Mean($XArrayRef); 637 $SquareOfDeviations = 0; 638 foreach $Value (@$XArrayRef) { 639 $SquareOfDeviations += ($Value - $Mean) ** 2; 640 } 641 $N = ($Mode == 1) ? @$XArrayRef : (@$XArrayRef - 1); 642 $StandardDeviation = sqrt($SquareOfDeviations / $N); 643 644 return $StandardDeviation; 645 } 646 647 # Compute the standard error using standard deviation and sample size 648 sub StandardError { 649 my($StandardDeviation, $Count) = @_; 650 651 if ($Count <= 0) { 652 return undef; 653 } 654 my($StandardError); 655 $StandardError = $StandardDeviation / sqrt($Count); 656 657 return $StandardError; 658 } 659 660 # Standardize the value using mean and standard deviation 661 sub Standardize { 662 my($Value, $Mean, $StandardDeviation) = @_; 663 664 if ($StandardDeviation <= 0) { 665 return undef; 666 } 667 my($StandardizedValue); 668 $StandardizedValue = ($Value - $Mean)/$StandardDeviation; 669 670 return $StandardizedValue; 671 } 672 673 # Compute the standard deviation above the mean for an array of numbers. 674 sub StandardScores { 675 my($XArrayRef) = @_; 676 return _CalculateStandardScores($XArrayRef, 2); 677 } 678 679 # Compute the standard deviation above the mean for an array of numbers representing entire population 680 sub StandardScoresN { 681 my($XArrayRef) = @_; 682 return _CalculateStandardScores($XArrayRef, 1); 683 } 684 685 # Compute the standard deviation above the mean for an array of numbers. 686 # Mode 1: (x[i] - mean) / n 687 # Mode 2: (x[i] - mean) / (n - 1) 688 # 689 sub _CalculateStandardScores { 690 my($XArrayRef, $Mode) = @_; 691 692 if (!@$XArrayRef) { 693 return undef; 694 } 695 my(@StandardScores, $Mean, $StandardDeviation, $Value); 696 697 $Mean = Mean($XArrayRef); 698 $StandardDeviation = _CalculateStandardDeviation($XArrayRef, $Mode); 699 if (!(defined($StandardDeviation) && $StandardDeviation > 0)) { 700 return undef; 701 } 702 @StandardScores = (); 703 for $Value (@$XArrayRef) { 704 push @StandardScores, ($Value - $Mean)/$StandardDeviation; 705 } 706 707 return @StandardScores; 708 } 709 710 # Compute the product of an array of numbers 711 sub Product { 712 my($XArrayRef) = @_; 713 714 if (!@$XArrayRef) { 715 return undef; 716 } 717 my($Product, $Value); 718 $Product = 1; 719 foreach $Value (@$XArrayRef) { 720 $Product *= $Value; 721 } 722 return $Product; 723 } 724 725 # Compute the sum of an array of numbers 726 sub Sum { 727 my($XArrayRef) = @_; 728 729 if (!@$XArrayRef) { 730 return undef; 731 } 732 my($Sum, $Value); 733 $Sum = 0; 734 foreach $Value (@$XArrayRef) { 735 $Sum += $Value; 736 } 737 return $Sum; 738 } 739 740 # Compute the sum of squares of an array of numbers 741 sub SumOfSquares { 742 my($XArrayRef) = @_; 743 744 if (!@$XArrayRef) { 745 return undef; 746 } 747 my($SumOfSquares, $Value); 748 $SumOfSquares = 0; 749 foreach $Value (@$XArrayRef) { 750 $SumOfSquares += $Value ** 2; 751 } 752 return $SumOfSquares; 753 } 754 755 # Compute the mean of an array of numbers by excluding a fraction of 756 # numbers from the top and bottom of the data set. 757 sub TrimMean { 758 my($XArrayRef, $FractionToExclude) = @_; 759 760 if (!(@$XArrayRef && $FractionToExclude > 0 && $FractionToExclude <= 1)) { 761 return undef; 762 } 763 my($NumberToExclude); 764 $NumberToExclude = int(@$XArrayRef * $FractionToExclude); 765 $NumberToExclude = ($NumberToExclude % 2) ? ($NumberToExclude - 1) : $NumberToExclude; 766 if ($NumberToExclude == @$XArrayRef) { 767 return undef; 768 } 769 my($Mean, $Sum, $Index, $FirstIndex, $LastIndex); 770 $FirstIndex = $NumberToExclude/2; 771 $LastIndex = @$XArrayRef - ($NumberToExclude/2) - 1; 772 $Sum = 0; 773 my(@SortedXArray); 774 @SortedXArray = sort { $a <=> $b } @$XArrayRef; 775 for $Index ($FirstIndex .. $LastIndex) { 776 $Sum += $SortedXArray[$Index]; 777 } 778 $Mean = $Sum/(@SortedXArray - $NumberToExclude); 779 return $Mean; 780 } 781 782 # Compute the variance of an array of numbers 783 sub Variance { 784 my($XArrayRef) = @_; 785 return _CalculateVariance($XArrayRef, 2); 786 } 787 788 # Compute the variance of an array of numbers representing entire population 789 sub VarianceN { 790 my($XArrayRef) = @_; 791 return _CalculateVariance($XArrayRef, 1); 792 } 793 794 # Compute the variance of an array of numbers: 795 # Mode 1: SUM( (x[i] - Xmean)^2 / n ) 796 # Mode 2: SUM( (x[i] - Xmean)^2 / (n - 1) ) 797 # 798 sub _CalculateVariance { 799 my($XArrayRef, $Mode) = @_; 800 801 if (!@$XArrayRef) { 802 return undef; 803 } 804 my($Variance, $Value, $SquareOfDeviations, $Mean, $N); 805 806 $Variance = 0; 807 $Mean = Mean($XArrayRef); 808 $SquareOfDeviations = 0; 809 foreach $Value (@$XArrayRef) { 810 $SquareOfDeviations += ($Value - $Mean) ** 2; 811 } 812 $N = ($Mode == 1) ? @$XArrayRef : (@$XArrayRef - 1); 813 $Variance = $SquareOfDeviations / $N; 814 815 return $Variance; 816 } 817