001/* A library of statistical operations on arrays of doubles. 002 003 Copyright (c) 1998-2013 The Regents of the University of California. 004 All rights reserved. 005 006 Permission is hereby granted, without written agreement and without 007 license or royalty fees, to use, copy, modify, and distribute this 008 software and its documentation for any purpose, provided that the above 009 copyright notice and the following two paragraphs appear in all copies 010 of this software. 011 012 IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY 013 FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 014 ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF 015 THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF 016 SUCH DAMAGE. 017 018 THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, 019 INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 020 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE 021 PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF 022 CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, 023 ENHANCEMENTS, OR MODIFICATIONS. 024 025 PT_COPYRIGHT_VERSION_2 026 COPYRIGHTENDKEY 027 028 */ 029package ptolemy.math; 030 031import java.util.Random; 032 033/////////////////////////////////////////////////////////////////// 034//// DoubleArrayStat 035 036/** 037 This class provides a library for statistical operations on arrays of 038 doubles. 039 Unless explicitly noted otherwise, all array arguments are assumed to be 040 non-null. If a null array is passed to a method, a NullPointerException 041 will be thrown in the method or called methods. 042 043 @author Jeff Tsay 044 @version $Id$ 045 @since Ptolemy II 0.3 046 @Pt.ProposedRating Yellow (ctsay) 047 @Pt.AcceptedRating Red (ctsay) 048 */ 049public class DoubleArrayStat extends DoubleArrayMath { 050 // Protected constructor prevents construction of this class. 051 protected DoubleArrayStat() { 052 } 053 054 /////////////////////////////////////////////////////////////////// 055 //// public methods //// 056 057 /** Return a new array that is the auto-correlation of the 058 * argument array, starting and ending at user-specified lag values. 059 * The output array will have length (endLag - startLag + 1). The first 060 * element of the output will have the auto-correlation at a lag of 061 * startLag. The last element of the output will have the 062 * auto-correlation at a lag of endLag. 063 * @param x An array of doubles. 064 * @param N An integer indicating the number of samples to sum over. 065 * @param startLag An int indicating at which lag to start (may be 066 * negative). 067 * @param endLag An int indicating at which lag to end. 068 * @return A new array of doubles. 069 */ 070 public static final double[] autoCorrelation(double[] x, int N, 071 int startLag, int endLag) { 072 int outputLength = endLag - startLag + 1; 073 double[] returnValue = new double[outputLength]; 074 075 for (int lag = startLag; lag <= endLag; lag++) { 076 // Find the most efficient and correct place to start the summation 077 int start = Math.max(0, -lag); 078 079 // Find the most efficient and correct place to end the summation 080 int limit = Math.min(x.length, N); 081 082 limit = Math.min(limit, x.length - lag); 083 084 double sum = 0.0; 085 086 for (int i = start; i < limit; i++) { 087 sum += x[i] * x[i + lag]; 088 } 089 090 returnValue[lag - startLag] = sum; 091 } 092 093 return returnValue; 094 } 095 096 /** Return the auto-correlation of an array at a certain lag value, 097 * summing over a certain number of samples 098 * defined by : 099 * Rxx[d] = sum of i = 0 to N - 1 of x[i] * x[i + d] 100 * N must be non-negative, but large numbers are ok because this 101 * routine will not overrun reading of the arrays. 102 * @param x An array of doubles. 103 * @param N An integer indicating the number of samples to sum over. 104 * @param lag An integer indicating the lag value (may be negative). 105 * @return A double, Rxx[lag]. 106 */ 107 public static double autoCorrelationAt(double[] x, int N, int lag) { 108 // Find the most efficient and correct place to start the summation 109 int start = Math.max(0, -lag); 110 111 // Find the most efficient and correct place to end the summation 112 int limit = Math.min(x.length, N); 113 114 limit = Math.min(limit, x.length - lag); 115 116 double sum = 0.0; 117 118 for (int i = start; i < limit; i++) { 119 sum += x[i] * x[i + lag]; 120 } 121 122 return sum; 123 } 124 125 /** Return a new array that is the cross-correlation of the two 126 * argument arrays, starting and ending at user-specified lag values. 127 * The output array will have length (endLag - startLag + 1). The first 128 * element of the output will have the cross-correlation at a lag of 129 * startLag. The last element of the output will have the 130 * cross-correlation at a lag of endLag. 131 * @param x The first array of doubles. 132 * @param y The second array of doubles. 133 * @param N An integer indicating the number of samples to sum over. 134 * @param startLag An int indicating at which lag to start (may be 135 * negative). 136 * @param endLag An int indicating at which lag to end. 137 * @return A new array of doubles. 138 */ 139 public static final double[] crossCorrelation(double[] x, double[] y, int N, 140 int startLag, int endLag) { 141 int outputLength = endLag - startLag + 1; 142 double[] returnValue = new double[outputLength]; 143 144 for (int lag = startLag; lag <= endLag; lag++) { 145 // Find the most efficient and correct place to start the summation 146 int start = Math.max(0, -lag); 147 148 // Find the most efficient and correct place to end the summation 149 int limit = Math.min(x.length, N); 150 151 limit = Math.min(limit, y.length - lag); 152 153 double sum = 0.0; 154 155 for (int i = start; i < limit; i++) { 156 sum += x[i] * y[i + lag]; 157 } 158 159 returnValue[lag - startLag] = sum; 160 } 161 162 return returnValue; 163 } 164 165 /** Return the cross-correlation of two arrays at a certain lag value. 166 * The cross-correlation is defined by : 167 * Rxy[d] = sum of i = 0 to N - 1 of x[i] * y[i + d] 168 * @param x The first array of doubles. 169 * @param y The second array of doubles. 170 * @param N An integer indicating the number of samples to sum over. 171 * This must be non-negative, but large numbers are ok because this 172 * routine will not overrun reading of the arrays. 173 * @param lag An integer indicating the lag value (may be negative). 174 * @return A double, Rxy[lag]. 175 */ 176 public static double crossCorrelationAt(double[] x, double[] y, int N, 177 int lag) { 178 // Find the most efficient and correct place to start the summation 179 int start = Math.max(0, -lag); 180 181 // Find the most efficient and correct place to end the summation 182 int limit = Math.min(x.length, N); 183 184 limit = Math.min(limit, y.length - lag); 185 186 double sum = 0.0; 187 188 for (int i = start; i < limit; i++) { 189 sum += x[i] * y[i + lag]; 190 } 191 192 return sum; 193 } 194 195 /** Given an array of probabilities, treated as a probability mass 196 * function (pmf), calculate the entropy (in bits). The pmf is 197 * a discrete function that gives the probability of each element. 198 * The sum of the elements in the pmf should be 1, and each element 199 * should be between 0 and 1. 200 * This method does not check to see if the pmf is valid, except for 201 * checking that each entry is non-negative. 202 * The function computed is : 203 * <p> 204 * H(p) = - sum (p[x] * log<sup>2</sup>(p[x])) 205 * </p> 206 * The entropy is always non-negative. 207 * Throw an IllegalArgumentException if the length of the array is 0, 208 * or a negative probability is encountered. 209 * @param p The array of probabilities. 210 * @return The entropy of the array of probabilities. 211 */ 212 public static final double entropy(double[] p) { 213 int length = _nonZeroLength(p, "DoubleArrayStat.entropy"); 214 215 double h = 0.0; 216 217 for (int i = 0; i < length; i++) { 218 if (p[i] < 0.0) { 219 throw new IllegalArgumentException( 220 "ptolemy.math.DoubleArrayStat.entropy() : " 221 + "Negative probability encountered."); 222 } else if (p[i] == 0.0) { 223 // do nothing 224 } else { 225 h -= p[i] * ExtendedMath.log2(p[i]); 226 } 227 } 228 229 return h; 230 } 231 232 /** Return the geometric mean of the elements in the array. This 233 * is defined to be the Nth root of the product of the elements 234 * in the array, where N is the length of the array. 235 * This method is only useful for arrays of non-negative numbers. If 236 * the product of the elements in the array is negative, throw an 237 * IllegalArgumentException. However, the individual elements of the array 238 * are not verified to be non-negative. 239 * Return 1.0 if the length of the array is 0. 240 * @param array An array of doubles. 241 * @return A double. 242 */ 243 public static final double geometricMean(double[] array) { 244 if (array.length < 1) { 245 return 1.0; 246 } 247 248 return Math.pow(productOfElements(array), 1.0 / array.length); 249 } 250 251 /** Return the maximum value in the array. 252 * Throw an exception if the length of the array is 0. 253 * @param array The array of doubles. 254 * @return The maximum value in the array. 255 */ 256 public static final double max(double[] array) { 257 Object[] maxReturn = maxAndIndex(array); 258 return ((Double) maxReturn[0]).doubleValue(); 259 } 260 261 /** Return the maximum value of the array and the index at which the 262 * maximum occurs in the array. The maximum value is wrapped with 263 * the Double class, the index is wrapped with the Integer class, 264 * and an Object array is returned containing these values. 265 * If the array is of length zero, throw an IllegalArgumentException. 266 * @param array An array of doubles. 267 * @return An array of two Objects, returnValue[0] is the Double 268 * representation of the maximum value, returnValue[1] is the Integer 269 * representation of the corresponding index. 270 */ 271 public static final Object[] maxAndIndex(double[] array) { 272 int length = _nonZeroLength(array, "DoubleArrayStat.maxAndIndex"); 273 int maxIndex = 0; 274 275 double maxElement = array[0]; 276 277 for (int i = 1; i < length; i++) { 278 if (array[i] > maxElement) { 279 maxElement = array[i]; 280 maxIndex = i; 281 } 282 } 283 284 return new Object[] { Double.valueOf(maxElement), 285 Integer.valueOf(maxIndex) }; 286 } 287 288 /** Return the arithmetic mean of the elements in the array. 289 * If the length of the array is 0, return a NaN. 290 * @param array The array of doubles. 291 * @return The mean value in the array. 292 */ 293 public static final double mean(double[] array) { 294 _nonZeroLength(array, "DoubleArrayStat.mean"); 295 return sumOfElements(array) / array.length; 296 } 297 298 /** Return the minimum value in the array. 299 * Throw an exception if the length of the array is 0. 300 * @param array The array of doubles. 301 * @return The minimum value in the array. 302 */ 303 public static final double min(double[] array) { 304 Object[] minReturn = minAndIndex(array); 305 return ((Double) minReturn[0]).doubleValue(); 306 } 307 308 /** Return the minimum value of the array and the index at which the 309 * minimum occurs in the array. The minimum value is wrapped with 310 * the Double class, the index is wrapped with the Integer class, 311 * and an Object array is returned containing these values. 312 * If the array is of length zero, throw an IllegalArgumentException. 313 * @param array An array of doubles. 314 * @return An array of two Objects, returnValue[0] is the Double 315 * representation of the minimum value, returnValue[1] is the Integer 316 * representation of the corresponding index. 317 */ 318 public static final Object[] minAndIndex(double[] array) { 319 int length = _nonZeroLength(array, "DoubleArrayStat.minAndIndex"); 320 int minIndex = 0; 321 322 double minElement = array[0]; 323 324 for (int i = 1; i < length; i++) { 325 if (array[i] < minElement) { 326 minElement = array[i]; 327 minIndex = i; 328 } 329 } 330 331 return new Object[] { Double.valueOf(minElement), 332 Integer.valueOf(minIndex) }; 333 } 334 335 /** Return the product of all of the elements in the array. 336 * Return 1.0 if the length of the array is 0. 337 * @param array The array of doubles. 338 * @return The product of the elements in the array. 339 */ 340 public static final double productOfElements(double[] array) { 341 double product = 1.0; 342 343 for (double element : array) { 344 product *= element; 345 } 346 347 return product; 348 } 349 350 /** Return a new array of Bernoulli random variables with a given 351 * probability of success p. On success, the random variable has 352 * value 1.0; on failure the random variable has value 0.0. 353 * @param p The probability, which should be a double between 0.0 354 * and 1.0. The probability is compared to the output of 355 * java.lang.Random.nextDouble(). If the output is less than p, then 356 * the array element will be 1.0. If the output is greater than or 357 * equal to p, then the array element will be 0.0. 358 * @param N The number of elements to allocate. 359 * @return The array of Bernoulli random variables. 360 */ 361 public static final double[] randomBernoulli(double p, int N) { 362 double[] returnValue = new double[N]; 363 364 if (_random == null) { 365 _random = new Random(); 366 } 367 368 for (int i = 0; i < N; i++) { 369 returnValue[i] = _random.nextDouble() < p ? 1.0 : 0.0; 370 } 371 372 return returnValue; 373 } 374 375 /** Return a new array of exponentially distributed doubles with parameter 376 * lambda. The number of elements to allocate is given by N. 377 * @param lambda The lambda, which may not by 0.0. 378 * @param N The number of elements to allocate. 379 * @return The array of exponential random variables. 380 */ 381 public static final double[] randomExponential(double lambda, int N) { 382 double[] returnValue = new double[N]; 383 384 if (_random == null) { 385 _random = new Random(); 386 } 387 388 for (int i = 0; i < N; i++) { 389 double r; 390 391 do { 392 r = _random.nextDouble(); 393 } while (r == 0.0); 394 395 returnValue[i] = -Math.log(r) / lambda; 396 } 397 398 return returnValue; 399 } 400 401 /** Return a new array of Gaussian distributed doubles with a given 402 * mean and standard deviation. The number of elements to allocate 403 * is given by N. 404 * This algorithm is from [1]. 405 * @param mean The mean of the new array. 406 * @param standardDeviation The standard deviation of the new array. 407 * @param N The number of elements to allocate. 408 * @return The array of random Gaussian variables. 409 */ 410 public static final double[] randomGaussian(double mean, 411 double standardDeviation, int N) { 412 double[] returnValue = new double[N]; 413 414 if (_random == null) { 415 _random = new Random(); 416 } 417 418 for (int i = 0; i < N; i++) { 419 returnValue[i] = mean + _random.nextGaussian() * standardDeviation; 420 } 421 422 return returnValue; 423 } 424 425 /** Return a new array of Poisson random variables (as doubles) with 426 * a given mean. The number of elements to allocate is given by N. 427 * This algorithm is from [1]. 428 * @param mean The mean of the new array. 429 * @param N The number of elements to allocate. 430 * @return The array of random Poisson variables. 431 */ 432 public static final double[] randomPoisson(double mean, int N) { 433 double[] returnValue = new double[N]; 434 435 if (_random == null) { 436 _random = new Random(); 437 } 438 439 for (int i = 0; i < N; i++) { 440 double j; 441 double u; 442 double p; 443 double f; 444 445 j = 0.0; 446 f = p = Math.exp(-mean); 447 u = _random.nextDouble(); 448 449 while (f <= u) { 450 p *= mean / (j + 1.0); 451 f += p; 452 j += 1.0; 453 } 454 455 returnValue[i] = j; 456 } 457 458 return returnValue; 459 } 460 461 /** Return a new array of uniformly distributed doubles ranging from 462 * a to b. The number of elements to allocate is given by N. 463 * @param a A double indicating the lower bound. 464 * @param b A double indicating the upper bound. 465 * @param N An int indicating how many elements to generate. 466 * @return A new array of doubles. 467 */ 468 public static double[] randomUniform(double a, double b, int N) { 469 double range = b - a; 470 double[] returnValue = new double[N]; 471 472 if (_random == null) { 473 _random = new Random(); 474 } 475 476 for (int i = 0; i < N; i++) { 477 returnValue[i] = _random.nextDouble() * range + a; 478 } 479 480 return returnValue; 481 } 482 483 /** Given two array's of probabilities, calculate the relative entropy 484 * aka Kullback Leibler distance, D(p || q), (in bits) between the 485 * two probability mass functions. The result will be POSITIVE_INFINITY if 486 * q has a zero probability for a symbol for which p has a non-zero 487 * probability. 488 * The function computed is : 489 * <p> 490 * D(p||q) = - sum (p[x] * log<sup>2</sup>(p[x]/q[x])) 491 * </p> 492 * Throw an IllegalArgumentException if either array has length 0. 493 * If the two arrays do not have the same length, throw an 494 * IllegalArgumentException. 495 * @param p An array of doubles representing the first pmf, p. 496 * @param q An array of doubles representing the second pmf, q. 497 * @return A double representing the relative entropy of the 498 * random variable. 499 */ 500 public static final double relativeEntropy(double[] p, double[] q) { 501 _nonZeroLength(p, "DoubleArrayStat.relativeEntropy"); 502 503 int length = _commonLength(p, q, "DoubleArrayStat.relativeEntropy"); 504 505 double d = 0.0; 506 507 for (int i = 0; i < length; i++) { 508 if (p[i] < 0.0 || q[i] < 0.0) { 509 throw new IllegalArgumentException( 510 "ptolemy.math.DoubleArrayStat.relativeEntropy() : " 511 + "Negative probability encountered."); 512 } else if (p[i] == 0.0) { 513 // do nothing 514 } else if (q[i] == 0.0) { 515 return Double.POSITIVE_INFINITY; 516 } else { 517 d += p[i] * ExtendedMath.log2(p[i] / q[i]); 518 } 519 } 520 521 return d; 522 } 523 524 /** Return the standard deviation of the elements in the array. 525 * Simply return standardDeviation(array, false). 526 * @param array The input array. 527 * @return The standard deviation. 528 */ 529 public static double standardDeviation(double[] array) { 530 return Math.sqrt(variance(array, false)); 531 } 532 533 /** Return the standard deviation of the elements in the array. 534 * The standard deviation is computed as follows : 535 * <p> 536 * <pre> 537 * stdDev = sqrt(variance) 538 * </pre> 539 * <p> 540 * The sample standard deviation is computed as follows 541 * <p> 542 * <pre> 543 * stdDev = sqrt(variance<sub>sample</sub>) 544 * </pre> 545 * <p> 546 * Throw an exception if the array is of length 0, or if the 547 * sample standard deviation is taken on an array of length less than 2. 548 * @param array An array of doubles. 549 * @param sample True if the sample standard deviation is desired. 550 * @return A double. 551 */ 552 public static double standardDeviation(double[] array, boolean sample) { 553 return Math.sqrt(variance(array, sample)); 554 } 555 556 /** Return the sum of all of the elements in the array. 557 * Return 0.0 of the length of the array is 0. 558 * @param array An array of doubles. 559 * @return The sum of all of the elements in the array. 560 */ 561 public static final double sumOfElements(double[] array) { 562 double sum = 0.0; 563 564 for (double element : array) { 565 sum += element; 566 } 567 568 return sum; 569 } 570 571 /** Return the variance of the elements in the array, assuming 572 * sufficient statistics. 573 * Simply return variance(array, false). 574 * Throw a runtime exception if the array is of length 0, or if the 575 * sample variance is taken on an array of length less than 2. 576 * @param array An array of doubles. 577 * @return The variance of the elements in the array. 578 */ 579 public static double variance(double[] array) { 580 return variance(array, false); 581 } 582 583 /** Return the variance of the elements in the array, assuming 584 * sufficient statistics. 585 * The variance is computed as follows : 586 * <p> 587 * <pre> 588 * variance = (sum(X<sup>2</sup>) - (sum(X) / N)<sup>2</sup>) / N 589 * </pre> 590 * <p> 591 * The sample variance is computed as follows : 592 * <p> 593 * <pre> 594 * variance<sub>sample</sub> = 595 * (sum(X<sup>2</sup>) - (sum(X) / N)<sup>2</sup>) / (N - 1) 596 * </pre> 597 * <p> 598 * 599 * Throw a runtime exception if the array is of length 0, or if the 600 * sample variance is taken on an array of length less than 2. 601 * @param array An array of doubles. 602 * @param sample True if the sample standard deviation is desired. 603 * @return The variance of the elements in the array. 604 */ 605 public static double variance(double[] array, boolean sample) { 606 int length = _nonZeroLength(array, "DoubleArrayStat.variance"); 607 608 if (sample && array.length < 2) { 609 throw new IllegalArgumentException( 610 "ptolemy.math.DoubleArrayStat.variance() : " 611 + "sample variance and standard deviation of an array " 612 + "of length less than 2 are not defined."); 613 } 614 615 double ex2 = 0.0; 616 double sum = 0.0; 617 618 for (int i = 0; i < length; i++) { 619 ex2 += array[i] * array[i]; 620 sum += array[i]; 621 } 622 623 double norm = sample ? length - 1 : length; 624 double sumSquaredOverLength = sum * sum / length; 625 return (ex2 - sumSquaredOverLength) / norm; 626 } 627 628 /////////////////////////////////////////////////////////////////// 629 //// protected methods //// 630 631 /** Throw an exception if the array is null or length 0. 632 * Otherwise return the length of the array. 633 * @param array An array of doubles. 634 * @param methodName A String representing the method name of the caller, 635 * without parentheses. 636 * @return The length of the array. 637 */ 638 protected static final int _nonZeroLength(final double[] array, 639 String methodName) { 640 if (array == null) { 641 throw new IllegalArgumentException( 642 "ptolemy.math." + methodName + "() : input array is null."); 643 } 644 645 if (array.length <= 0) { 646 throw new IllegalArgumentException("ptolemy.math." + methodName 647 + "() : input array has length 0."); 648 } 649 650 return array.length; 651 } 652 653 /////////////////////////////////////////////////////////////////// 654 //// private variables //// 655 // Common instance of Random to be shared by all methods that need 656 // a random number. If we do not share a single Random, then 657 // under Windows, closely spaced calls to nextGaussian() on two 658 // separate Randoms could yield the same return value. 659 // FindBugs reports: "Incorrect lazy initialization of static field" 660 // so we mark the volatile. 661 private static volatile Random _random; 662}