001/* A library of statistical operations on arrays of doubles.
002
003 Copyright (c) 1998-2013 The Regents of the University of California.
004 All rights reserved.
005
006 Permission is hereby granted, without written agreement and without
007 license or royalty fees, to use, copy, modify, and distribute this
008 software and its documentation for any purpose, provided that the above
009 copyright notice and the following two paragraphs appear in all copies
010 of this software.
011
012 IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
013 FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
014 ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF
015 THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF
016 SUCH DAMAGE.
017
018 THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
019 INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
020 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE
021 PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF
022 CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
023 ENHANCEMENTS, OR MODIFICATIONS.
024
025 PT_COPYRIGHT_VERSION_2
026 COPYRIGHTENDKEY
027
028 */
029package ptolemy.math;
030
031import java.util.Random;
032
033///////////////////////////////////////////////////////////////////
034//// DoubleArrayStat
035
036/**
037 This class provides a library for statistical operations on arrays of
038 doubles.
039 Unless explicitly noted otherwise, all array arguments are assumed to be
040 non-null. If a null array is passed to a method, a NullPointerException
041 will be thrown in the method or called methods.
042
043 @author Jeff Tsay
044 @version $Id$
045 @since Ptolemy II 0.3
046 @Pt.ProposedRating Yellow (ctsay)
047 @Pt.AcceptedRating Red (ctsay)
048 */
049public class DoubleArrayStat extends DoubleArrayMath {
050    // Protected constructor prevents construction of this class.
051    protected DoubleArrayStat() {
052    }
053
054    ///////////////////////////////////////////////////////////////////
055    ////                         public methods                    ////
056
057    /** Return a new array that is the auto-correlation of the
058     *  argument array, starting and ending at user-specified lag values.
059     *  The output array will have length (endLag - startLag + 1).  The first
060     *  element of the output will have the auto-correlation at a lag of
061     *  startLag. The last element of the output will have the
062     *  auto-correlation at a lag of endLag.
063     *  @param x An array of doubles.
064     *  @param N An integer indicating the number of samples to sum over.
065     *  @param startLag An int indicating at which lag to start (may be
066     *  negative).
067     *  @param endLag An int indicating at which lag to end.
068     *  @return A new array of doubles.
069     */
070    public static final double[] autoCorrelation(double[] x, int N,
071            int startLag, int endLag) {
072        int outputLength = endLag - startLag + 1;
073        double[] returnValue = new double[outputLength];
074
075        for (int lag = startLag; lag <= endLag; lag++) {
076            // Find the most efficient and correct place to start the summation
077            int start = Math.max(0, -lag);
078
079            // Find the most efficient and correct place to end the summation
080            int limit = Math.min(x.length, N);
081
082            limit = Math.min(limit, x.length - lag);
083
084            double sum = 0.0;
085
086            for (int i = start; i < limit; i++) {
087                sum += x[i] * x[i + lag];
088            }
089
090            returnValue[lag - startLag] = sum;
091        }
092
093        return returnValue;
094    }
095
096    /** Return the auto-correlation of an array at a certain lag value,
097     *  summing over a certain number of samples
098     *  defined by :
099     *  Rxx[d] = sum of i = 0 to N - 1 of x[i] * x[i + d]
100     *  N must be non-negative, but large numbers are ok because this
101     *  routine will not overrun reading of the arrays.
102     *  @param x An array of doubles.
103     *  @param N An integer indicating the number of samples to sum over.
104     *  @param lag An integer indicating the lag value (may be negative).
105     *  @return A double, Rxx[lag].
106     */
107    public static double autoCorrelationAt(double[] x, int N, int lag) {
108        // Find the most efficient and correct place to start the summation
109        int start = Math.max(0, -lag);
110
111        // Find the most efficient and correct place to end the summation
112        int limit = Math.min(x.length, N);
113
114        limit = Math.min(limit, x.length - lag);
115
116        double sum = 0.0;
117
118        for (int i = start; i < limit; i++) {
119            sum += x[i] * x[i + lag];
120        }
121
122        return sum;
123    }
124
125    /** Return a new array that is the cross-correlation of the two
126     *  argument arrays, starting and ending at user-specified lag values.
127     *  The output array will have length (endLag - startLag + 1).  The first
128     *  element of the output will have the cross-correlation at a lag of
129     *  startLag. The last element of the output will have the
130     *  cross-correlation at a lag of endLag.
131     *  @param x The first array of doubles.
132     *  @param y The second array of doubles.
133     *  @param N An integer indicating the number of samples to sum over.
134     *  @param startLag An int indicating at which lag to start (may be
135     *  negative).
136     *  @param endLag An int indicating at which lag to end.
137     *  @return A new array of doubles.
138     */
139    public static final double[] crossCorrelation(double[] x, double[] y, int N,
140            int startLag, int endLag) {
141        int outputLength = endLag - startLag + 1;
142        double[] returnValue = new double[outputLength];
143
144        for (int lag = startLag; lag <= endLag; lag++) {
145            // Find the most efficient and correct place to start the summation
146            int start = Math.max(0, -lag);
147
148            // Find the most efficient and correct place to end the summation
149            int limit = Math.min(x.length, N);
150
151            limit = Math.min(limit, y.length - lag);
152
153            double sum = 0.0;
154
155            for (int i = start; i < limit; i++) {
156                sum += x[i] * y[i + lag];
157            }
158
159            returnValue[lag - startLag] = sum;
160        }
161
162        return returnValue;
163    }
164
165    /** Return the cross-correlation of two arrays at a certain lag value.
166     *  The cross-correlation is defined by :
167     *  Rxy[d] = sum of i = 0 to N - 1 of x[i] * y[i + d]
168     *  @param x The first array of doubles.
169     *  @param y The second array of doubles.
170     *  @param N An integer indicating the number of samples to sum over.
171     *  This must be non-negative, but large numbers are ok because this
172     *  routine will not overrun reading of the arrays.
173     *  @param lag An integer indicating the lag value (may be negative).
174     *  @return A double, Rxy[lag].
175     */
176    public static double crossCorrelationAt(double[] x, double[] y, int N,
177            int lag) {
178        // Find the most efficient and correct place to start the summation
179        int start = Math.max(0, -lag);
180
181        // Find the most efficient and correct place to end the summation
182        int limit = Math.min(x.length, N);
183
184        limit = Math.min(limit, y.length - lag);
185
186        double sum = 0.0;
187
188        for (int i = start; i < limit; i++) {
189            sum += x[i] * y[i + lag];
190        }
191
192        return sum;
193    }
194
195    /** Given an array of probabilities, treated as a probability mass
196     *  function (pmf), calculate the entropy (in bits). The pmf is
197     *  a discrete function that gives the probability of each element.
198     *  The sum of the elements in the pmf should be 1, and each element
199     *  should be between 0 and 1.
200     *  This method does not check to see if the pmf is valid, except for
201     *  checking that each entry is non-negative.
202     *  The function computed is :
203     *  <p>
204     *   H(p) = - sum (p[x] * log<sup>2</sup>(p[x]))
205     *  </p>
206     *  The entropy is always non-negative.
207     *  Throw an IllegalArgumentException if the length of the array is 0,
208     *  or a negative probability is encountered.
209     *  @param p The array of probabilities.
210     *  @return The entropy of the array of probabilities.
211     */
212    public static final double entropy(double[] p) {
213        int length = _nonZeroLength(p, "DoubleArrayStat.entropy");
214
215        double h = 0.0;
216
217        for (int i = 0; i < length; i++) {
218            if (p[i] < 0.0) {
219                throw new IllegalArgumentException(
220                        "ptolemy.math.DoubleArrayStat.entropy() : "
221                                + "Negative probability encountered.");
222            } else if (p[i] == 0.0) {
223                // do nothing
224            } else {
225                h -= p[i] * ExtendedMath.log2(p[i]);
226            }
227        }
228
229        return h;
230    }
231
232    /** Return the geometric mean of the elements in the array. This
233     *  is defined to be the Nth root of the product of the elements
234     *  in the array, where N is the length of the array.
235     *  This method is only useful for arrays of non-negative numbers. If
236     *  the product of the elements in the array is negative, throw an
237     *  IllegalArgumentException. However, the individual elements of the array
238     *  are not verified to be non-negative.
239     *  Return 1.0 if the length of the array is 0.
240     *  @param array An array of doubles.
241     *  @return A double.
242     */
243    public static final double geometricMean(double[] array) {
244        if (array.length < 1) {
245            return 1.0;
246        }
247
248        return Math.pow(productOfElements(array), 1.0 / array.length);
249    }
250
251    /** Return the maximum value in the array.
252     *  Throw an exception if the length of the array is 0.
253     *  @param array The array of doubles.
254     *  @return The maximum value in the array.
255     */
256    public static final double max(double[] array) {
257        Object[] maxReturn = maxAndIndex(array);
258        return ((Double) maxReturn[0]).doubleValue();
259    }
260
261    /** Return the maximum value of the array and the index at which the
262     *  maximum occurs in the array. The maximum value is wrapped with
263     *  the Double class, the index is wrapped with the Integer class,
264     *  and an Object array is returned containing these values.
265     *  If the array is of length zero, throw an IllegalArgumentException.
266     *  @param array An array of doubles.
267     *  @return An array of two Objects, returnValue[0] is the Double
268     *  representation of the maximum value, returnValue[1] is the Integer
269     *  representation of the corresponding index.
270     */
271    public static final Object[] maxAndIndex(double[] array) {
272        int length = _nonZeroLength(array, "DoubleArrayStat.maxAndIndex");
273        int maxIndex = 0;
274
275        double maxElement = array[0];
276
277        for (int i = 1; i < length; i++) {
278            if (array[i] > maxElement) {
279                maxElement = array[i];
280                maxIndex = i;
281            }
282        }
283
284        return new Object[] { Double.valueOf(maxElement),
285                Integer.valueOf(maxIndex) };
286    }
287
288    /** Return the arithmetic mean of the elements in the array.
289     *  If the length of the array is 0, return a NaN.
290     *  @param array The array of doubles.
291     *  @return The mean value in the array.
292     */
293    public static final double mean(double[] array) {
294        _nonZeroLength(array, "DoubleArrayStat.mean");
295        return sumOfElements(array) / array.length;
296    }
297
298    /** Return the minimum value in the array.
299     *  Throw an exception if the length of the array is 0.
300     *  @param array The array of doubles.
301     *  @return The minimum value in the array.
302     */
303    public static final double min(double[] array) {
304        Object[] minReturn = minAndIndex(array);
305        return ((Double) minReturn[0]).doubleValue();
306    }
307
308    /** Return the minimum value of the array and the index at which the
309     *  minimum occurs in the array. The minimum value is wrapped with
310     *  the Double class, the index is wrapped with the Integer class,
311     *  and an Object array is returned containing these values.
312     *  If the array is of length zero, throw an IllegalArgumentException.
313     *  @param array An array of doubles.
314     *  @return An array of two Objects, returnValue[0] is the Double
315     *  representation of the minimum value, returnValue[1] is the Integer
316     *  representation of the corresponding index.
317     */
318    public static final Object[] minAndIndex(double[] array) {
319        int length = _nonZeroLength(array, "DoubleArrayStat.minAndIndex");
320        int minIndex = 0;
321
322        double minElement = array[0];
323
324        for (int i = 1; i < length; i++) {
325            if (array[i] < minElement) {
326                minElement = array[i];
327                minIndex = i;
328            }
329        }
330
331        return new Object[] { Double.valueOf(minElement),
332                Integer.valueOf(minIndex) };
333    }
334
335    /** Return the product of all of the elements in the array.
336     *  Return 1.0 if the length of the array is 0.
337     *  @param array The array of doubles.
338     *  @return The product of the elements in the array.
339     */
340    public static final double productOfElements(double[] array) {
341        double product = 1.0;
342
343        for (double element : array) {
344            product *= element;
345        }
346
347        return product;
348    }
349
350    /** Return a new array of Bernoulli random variables with a given
351     *  probability of success p. On success, the random variable has
352     *  value 1.0; on failure the random variable has value 0.0.
353     *  @param p The probability, which should be a double between 0.0
354     *  and 1.0.  The probability is compared to the output of
355     *  java.lang.Random.nextDouble().  If the output is less than p, then
356     *  the array element will be 1.0.  If the output is greater than or
357     *  equal to p, then the array element will be 0.0.
358     *  @param N The number of elements to allocate.
359     *  @return The array of Bernoulli random variables.
360     */
361    public static final double[] randomBernoulli(double p, int N) {
362        double[] returnValue = new double[N];
363
364        if (_random == null) {
365            _random = new Random();
366        }
367
368        for (int i = 0; i < N; i++) {
369            returnValue[i] = _random.nextDouble() < p ? 1.0 : 0.0;
370        }
371
372        return returnValue;
373    }
374
375    /** Return a new array of exponentially distributed doubles with parameter
376     *  lambda. The number of elements to allocate is given by N.
377     *  @param lambda The lambda, which may not by 0.0.
378     *  @param N The number of elements to allocate.
379     *  @return The array of exponential random variables.
380     */
381    public static final double[] randomExponential(double lambda, int N) {
382        double[] returnValue = new double[N];
383
384        if (_random == null) {
385            _random = new Random();
386        }
387
388        for (int i = 0; i < N; i++) {
389            double r;
390
391            do {
392                r = _random.nextDouble();
393            } while (r == 0.0);
394
395            returnValue[i] = -Math.log(r) / lambda;
396        }
397
398        return returnValue;
399    }
400
401    /** Return a new array of Gaussian distributed doubles with a given
402     *  mean and standard deviation. The number of elements to allocate
403     *  is given by N.
404     *  This algorithm is from [1].
405     *  @param mean The mean of the new array.
406     *  @param standardDeviation The standard deviation of the new array.
407     *  @param N The number of elements to allocate.
408     *  @return The array of random Gaussian variables.
409     */
410    public static final double[] randomGaussian(double mean,
411            double standardDeviation, int N) {
412        double[] returnValue = new double[N];
413
414        if (_random == null) {
415            _random = new Random();
416        }
417
418        for (int i = 0; i < N; i++) {
419            returnValue[i] = mean + _random.nextGaussian() * standardDeviation;
420        }
421
422        return returnValue;
423    }
424
425    /** Return a new array of Poisson random variables (as doubles) with
426     *  a given mean. The number of elements to allocate is given by N.
427     *  This algorithm is from [1].
428     *  @param mean The mean of the new array.
429     *  @param N The number of elements to allocate.
430     *  @return The array of random Poisson variables.
431     */
432    public static final double[] randomPoisson(double mean, int N) {
433        double[] returnValue = new double[N];
434
435        if (_random == null) {
436            _random = new Random();
437        }
438
439        for (int i = 0; i < N; i++) {
440            double j;
441            double u;
442            double p;
443            double f;
444
445            j = 0.0;
446            f = p = Math.exp(-mean);
447            u = _random.nextDouble();
448
449            while (f <= u) {
450                p *= mean / (j + 1.0);
451                f += p;
452                j += 1.0;
453            }
454
455            returnValue[i] = j;
456        }
457
458        return returnValue;
459    }
460
461    /** Return a new array of uniformly distributed doubles ranging from
462     *  a to b. The number of elements to allocate is given by N.
463     *  @param a A double indicating the lower bound.
464     *  @param b A double indicating the upper bound.
465     *  @param N An int indicating how many elements to generate.
466     *  @return A new array of doubles.
467     */
468    public static double[] randomUniform(double a, double b, int N) {
469        double range = b - a;
470        double[] returnValue = new double[N];
471
472        if (_random == null) {
473            _random = new Random();
474        }
475
476        for (int i = 0; i < N; i++) {
477            returnValue[i] = _random.nextDouble() * range + a;
478        }
479
480        return returnValue;
481    }
482
483    /** Given two array's of probabilities, calculate the relative entropy
484     *  aka Kullback Leibler distance, D(p || q), (in bits) between the
485     *  two probability mass functions. The result will be POSITIVE_INFINITY if
486     *  q has a zero probability for a symbol for which p has a non-zero
487     *  probability.
488     *  The function computed is :
489     *  <p>
490     *   D(p||q) = - sum (p[x] * log<sup>2</sup>(p[x]/q[x]))
491     *  </p>
492     *  Throw an IllegalArgumentException if either array has length 0.
493     *  If the two arrays do not have the same length, throw an
494     *  IllegalArgumentException.
495     *  @param p An array of doubles representing the first pmf, p.
496     *  @param q An array of doubles representing the second pmf, q.
497     *  @return A double representing the relative entropy of the
498     *  random variable.
499     */
500    public static final double relativeEntropy(double[] p, double[] q) {
501        _nonZeroLength(p, "DoubleArrayStat.relativeEntropy");
502
503        int length = _commonLength(p, q, "DoubleArrayStat.relativeEntropy");
504
505        double d = 0.0;
506
507        for (int i = 0; i < length; i++) {
508            if (p[i] < 0.0 || q[i] < 0.0) {
509                throw new IllegalArgumentException(
510                        "ptolemy.math.DoubleArrayStat.relativeEntropy() : "
511                                + "Negative probability encountered.");
512            } else if (p[i] == 0.0) {
513                // do nothing
514            } else if (q[i] == 0.0) {
515                return Double.POSITIVE_INFINITY;
516            } else {
517                d += p[i] * ExtendedMath.log2(p[i] / q[i]);
518            }
519        }
520
521        return d;
522    }
523
524    /** Return the standard deviation of the elements in the array.
525     *  Simply return standardDeviation(array, false).
526     *  @param array The input array.
527     *  @return The standard deviation.
528     */
529    public static double standardDeviation(double[] array) {
530        return Math.sqrt(variance(array, false));
531    }
532
533    /** Return the standard deviation of the elements in the array.
534     *  The standard deviation is computed as follows :
535     *  <p>
536     *  <pre>
537     *  stdDev = sqrt(variance)
538     *  </pre>
539     *  <p>
540     *  The sample standard deviation is computed as follows
541     *  <p>
542     *  <pre>
543     *  stdDev = sqrt(variance<sub>sample</sub>)
544     *  </pre>
545     *  <p>
546     *  Throw an exception if the array is of length 0, or if the
547     *  sample standard deviation is taken on an array of length less than 2.
548     *  @param array An array of doubles.
549     *  @param sample True if the sample standard deviation is desired.
550     *  @return A double.
551     */
552    public static double standardDeviation(double[] array, boolean sample) {
553        return Math.sqrt(variance(array, sample));
554    }
555
556    /** Return the sum of all of the elements in the array.
557     *  Return 0.0 of the length of the array is 0.
558     *  @param array An array of doubles.
559     *  @return The sum of all of the elements in the array.
560     */
561    public static final double sumOfElements(double[] array) {
562        double sum = 0.0;
563
564        for (double element : array) {
565            sum += element;
566        }
567
568        return sum;
569    }
570
571    /** Return the variance of the elements in the array, assuming
572     *  sufficient statistics.
573     *  Simply return variance(array, false).
574     *  Throw a runtime exception if the array is of length 0, or if the
575     *  sample variance is taken on an array of length less than 2.
576     *  @param array An array of doubles.
577     *  @return The variance of the elements in the array.
578     */
579    public static double variance(double[] array) {
580        return variance(array, false);
581    }
582
583    /** Return the variance of the elements in the array, assuming
584     *  sufficient statistics.
585     *  The variance is computed as follows :
586     *  <p>
587     *  <pre>
588     *  variance = (sum(X<sup>2</sup>) - (sum(X) / N)<sup>2</sup>) / N
589     *  </pre>
590     *  <p>
591     *  The sample variance is computed as follows :
592     *  <p>
593     *  <pre>
594     *  variance<sub>sample</sub> =
595     *       (sum(X<sup>2</sup>) - (sum(X) / N)<sup>2</sup>) / (N - 1)
596     *  </pre>
597     *  <p>
598     *
599     *  Throw a runtime exception if the array is of length 0, or if the
600     *  sample variance is taken on an array of length less than 2.
601     *  @param array An array of doubles.
602     *  @param sample True if the sample standard deviation is desired.
603     *  @return The variance of the elements in the array.
604     */
605    public static double variance(double[] array, boolean sample) {
606        int length = _nonZeroLength(array, "DoubleArrayStat.variance");
607
608        if (sample && array.length < 2) {
609            throw new IllegalArgumentException(
610                    "ptolemy.math.DoubleArrayStat.variance() : "
611                            + "sample variance and standard deviation of an array "
612                            + "of length less than 2 are not defined.");
613        }
614
615        double ex2 = 0.0;
616        double sum = 0.0;
617
618        for (int i = 0; i < length; i++) {
619            ex2 += array[i] * array[i];
620            sum += array[i];
621        }
622
623        double norm = sample ? length - 1 : length;
624        double sumSquaredOverLength = sum * sum / length;
625        return (ex2 - sumSquaredOverLength) / norm;
626    }
627
628    ///////////////////////////////////////////////////////////////////
629    ////                         protected methods                 ////
630
631    /** Throw an exception if the array is null or length 0.
632     *  Otherwise return the length of the array.
633     *  @param array An array of doubles.
634     *  @param methodName A String representing the method name of the caller,
635     *  without parentheses.
636     *  @return The length of the array.
637     */
638    protected static final int _nonZeroLength(final double[] array,
639            String methodName) {
640        if (array == null) {
641            throw new IllegalArgumentException(
642                    "ptolemy.math." + methodName + "() : input array is null.");
643        }
644
645        if (array.length <= 0) {
646            throw new IllegalArgumentException("ptolemy.math." + methodName
647                    + "() : input array has length 0.");
648        }
649
650        return array.length;
651    }
652
653    ///////////////////////////////////////////////////////////////////
654    ////                         private variables                 ////
655    // Common instance of Random to be shared by all methods that need
656    // a random number.  If we do not share a single Random, then
657    // under Windows, closely spaced calls to nextGaussian() on two
658    // separate Randoms could yield the same return value.
659    // FindBugs reports: "Incorrect lazy initialization of static field"
660    // so we mark the volatile.
661    private static volatile Random _random;
662}