001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.math.stat.descriptive;
018
019 import java.io.Serializable;
020
021 import org.apache.commons.math.MathRuntimeException;
022 import org.apache.commons.math.exception.util.LocalizedFormats;
023 import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
024 import org.apache.commons.math.stat.descriptive.moment.Mean;
025 import org.apache.commons.math.stat.descriptive.moment.SecondMoment;
026 import org.apache.commons.math.stat.descriptive.moment.Variance;
027 import org.apache.commons.math.stat.descriptive.rank.Max;
028 import org.apache.commons.math.stat.descriptive.rank.Min;
029 import org.apache.commons.math.stat.descriptive.summary.Sum;
030 import org.apache.commons.math.stat.descriptive.summary.SumOfLogs;
031 import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
032 import org.apache.commons.math.util.MathUtils;
033 import org.apache.commons.math.util.FastMath;
034
035 /**
036 * <p>
037 * Computes summary statistics for a stream of data values added using the
038 * {@link #addValue(double) addValue} method. The data values are not stored in
039 * memory, so this class can be used to compute statistics for very large data
040 * streams.
041 * </p>
042 * <p>
043 * The {@link StorelessUnivariateStatistic} instances used to maintain summary
044 * state and compute statistics are configurable via setters. For example, the
045 * default implementation for the variance can be overridden by calling
046 * {@link #setVarianceImpl(StorelessUnivariateStatistic)}. Actual parameters to
047 * these methods must implement the {@link StorelessUnivariateStatistic}
048 * interface and configuration must be completed before <code>addValue</code>
049 * is called. No configuration is necessary to use the default, commons-math
050 * provided implementations.
051 * </p>
052 * <p>
053 * Note: This class is not thread-safe. Use
054 * {@link SynchronizedSummaryStatistics} if concurrent access from multiple
055 * threads is required.
056 * </p>
057 * @version $Revision: 1042376 $ $Date: 2010-12-05 16:54:55 +0100 (dim. 05 d??c. 2010) $
058 */
059 public class SummaryStatistics implements StatisticalSummary, Serializable {
060
061 /** Serialization UID */
062 private static final long serialVersionUID = -2021321786743555871L;
063
064 /** count of values that have been added */
065 protected long n = 0;
066
067 /** SecondMoment is used to compute the mean and variance */
068 protected SecondMoment secondMoment = new SecondMoment();
069
070 /** sum of values that have been added */
071 protected Sum sum = new Sum();
072
073 /** sum of the square of each value that has been added */
074 protected SumOfSquares sumsq = new SumOfSquares();
075
076 /** min of values that have been added */
077 protected Min min = new Min();
078
079 /** max of values that have been added */
080 protected Max max = new Max();
081
082 /** sumLog of values that have been added */
083 protected SumOfLogs sumLog = new SumOfLogs();
084
085 /** geoMean of values that have been added */
086 protected GeometricMean geoMean = new GeometricMean(sumLog);
087
088 /** mean of values that have been added */
089 protected Mean mean = new Mean();
090
091 /** variance of values that have been added */
092 protected Variance variance = new Variance();
093
094 /** Sum statistic implementation - can be reset by setter. */
095 private StorelessUnivariateStatistic sumImpl = sum;
096
097 /** Sum of squares statistic implementation - can be reset by setter. */
098 private StorelessUnivariateStatistic sumsqImpl = sumsq;
099
100 /** Minimum statistic implementation - can be reset by setter. */
101 private StorelessUnivariateStatistic minImpl = min;
102
103 /** Maximum statistic implementation - can be reset by setter. */
104 private StorelessUnivariateStatistic maxImpl = max;
105
106 /** Sum of log statistic implementation - can be reset by setter. */
107 private StorelessUnivariateStatistic sumLogImpl = sumLog;
108
109 /** Geometric mean statistic implementation - can be reset by setter. */
110 private StorelessUnivariateStatistic geoMeanImpl = geoMean;
111
112 /** Mean statistic implementation - can be reset by setter. */
113 private StorelessUnivariateStatistic meanImpl = mean;
114
115 /** Variance statistic implementation - can be reset by setter. */
116 private StorelessUnivariateStatistic varianceImpl = variance;
117
118 /**
119 * Construct a SummaryStatistics instance
120 */
121 public SummaryStatistics() {
122 }
123
124 /**
125 * A copy constructor. Creates a deep-copy of the {@code original}.
126 *
127 * @param original the {@code SummaryStatistics} instance to copy
128 */
129 public SummaryStatistics(SummaryStatistics original) {
130 copy(original, this);
131 }
132
133 /**
134 * Return a {@link StatisticalSummaryValues} instance reporting current
135 * statistics.
136 * @return Current values of statistics
137 */
138 public StatisticalSummary getSummary() {
139 return new StatisticalSummaryValues(getMean(), getVariance(), getN(),
140 getMax(), getMin(), getSum());
141 }
142
143 /**
144 * Add a value to the data
145 * @param value the value to add
146 */
147 public void addValue(double value) {
148 sumImpl.increment(value);
149 sumsqImpl.increment(value);
150 minImpl.increment(value);
151 maxImpl.increment(value);
152 sumLogImpl.increment(value);
153 secondMoment.increment(value);
154 // If mean, variance or geomean have been overridden,
155 // need to increment these
156 if (!(meanImpl instanceof Mean)) {
157 meanImpl.increment(value);
158 }
159 if (!(varianceImpl instanceof Variance)) {
160 varianceImpl.increment(value);
161 }
162 if (!(geoMeanImpl instanceof GeometricMean)) {
163 geoMeanImpl.increment(value);
164 }
165 n++;
166 }
167
168 /**
169 * Returns the number of available values
170 * @return The number of available values
171 */
172 public long getN() {
173 return n;
174 }
175
176 /**
177 * Returns the sum of the values that have been added
178 * @return The sum or <code>Double.NaN</code> if no values have been added
179 */
180 public double getSum() {
181 return sumImpl.getResult();
182 }
183
184 /**
185 * Returns the sum of the squares of the values that have been added.
186 * <p>
187 * Double.NaN is returned if no values have been added.
188 * </p>
189 * @return The sum of squares
190 */
191 public double getSumsq() {
192 return sumsqImpl.getResult();
193 }
194
195 /**
196 * Returns the mean of the values that have been added.
197 * <p>
198 * Double.NaN is returned if no values have been added.
199 * </p>
200 * @return the mean
201 */
202 public double getMean() {
203 if (mean == meanImpl) {
204 return new Mean(secondMoment).getResult();
205 } else {
206 return meanImpl.getResult();
207 }
208 }
209
210 /**
211 * Returns the standard deviation of the values that have been added.
212 * <p>
213 * Double.NaN is returned if no values have been added.
214 * </p>
215 * @return the standard deviation
216 */
217 public double getStandardDeviation() {
218 double stdDev = Double.NaN;
219 if (getN() > 0) {
220 if (getN() > 1) {
221 stdDev = FastMath.sqrt(getVariance());
222 } else {
223 stdDev = 0.0;
224 }
225 }
226 return stdDev;
227 }
228
229 /**
230 * Returns the variance of the values that have been added.
231 * <p>
232 * Double.NaN is returned if no values have been added.
233 * </p>
234 * @return the variance
235 */
236 public double getVariance() {
237 if (varianceImpl == variance) {
238 return new Variance(secondMoment).getResult();
239 } else {
240 return varianceImpl.getResult();
241 }
242 }
243
244 /**
245 * Returns the maximum of the values that have been added.
246 * <p>
247 * Double.NaN is returned if no values have been added.
248 * </p>
249 * @return the maximum
250 */
251 public double getMax() {
252 return maxImpl.getResult();
253 }
254
255 /**
256 * Returns the minimum of the values that have been added.
257 * <p>
258 * Double.NaN is returned if no values have been added.
259 * </p>
260 * @return the minimum
261 */
262 public double getMin() {
263 return minImpl.getResult();
264 }
265
266 /**
267 * Returns the geometric mean of the values that have been added.
268 * <p>
269 * Double.NaN is returned if no values have been added.
270 * </p>
271 * @return the geometric mean
272 */
273 public double getGeometricMean() {
274 return geoMeanImpl.getResult();
275 }
276
277 /**
278 * Returns the sum of the logs of the values that have been added.
279 * <p>
280 * Double.NaN is returned if no values have been added.
281 * </p>
282 * @return the sum of logs
283 * @since 1.2
284 */
285 public double getSumOfLogs() {
286 return sumLogImpl.getResult();
287 }
288
289 /**
290 * Returns a statistic related to the Second Central Moment. Specifically,
291 * what is returned is the sum of squared deviations from the sample mean
292 * among the values that have been added.
293 * <p>
294 * Returns <code>Double.NaN</code> if no data values have been added and
295 * returns <code>0</code> if there is just one value in the data set.</p>
296 * <p>
297 * @return second central moment statistic
298 * @since 2.0
299 */
300 public double getSecondMoment() {
301 return secondMoment.getResult();
302 }
303
304 /**
305 * Generates a text report displaying summary statistics from values that
306 * have been added.
307 * @return String with line feeds displaying statistics
308 * @since 1.2
309 */
310 @Override
311 public String toString() {
312 StringBuilder outBuffer = new StringBuilder();
313 String endl = "\n";
314 outBuffer.append("SummaryStatistics:").append(endl);
315 outBuffer.append("n: ").append(getN()).append(endl);
316 outBuffer.append("min: ").append(getMin()).append(endl);
317 outBuffer.append("max: ").append(getMax()).append(endl);
318 outBuffer.append("mean: ").append(getMean()).append(endl);
319 outBuffer.append("geometric mean: ").append(getGeometricMean())
320 .append(endl);
321 outBuffer.append("variance: ").append(getVariance()).append(endl);
322 outBuffer.append("sum of squares: ").append(getSumsq()).append(endl);
323 outBuffer.append("standard deviation: ").append(getStandardDeviation())
324 .append(endl);
325 outBuffer.append("sum of logs: ").append(getSumOfLogs()).append(endl);
326 return outBuffer.toString();
327 }
328
329 /**
330 * Resets all statistics and storage
331 */
332 public void clear() {
333 this.n = 0;
334 minImpl.clear();
335 maxImpl.clear();
336 sumImpl.clear();
337 sumLogImpl.clear();
338 sumsqImpl.clear();
339 geoMeanImpl.clear();
340 secondMoment.clear();
341 if (meanImpl != mean) {
342 meanImpl.clear();
343 }
344 if (varianceImpl != variance) {
345 varianceImpl.clear();
346 }
347 }
348
349 /**
350 * Returns true iff <code>object</code> is a
351 * <code>SummaryStatistics</code> instance and all statistics have the
352 * same values as this.
353 * @param object the object to test equality against.
354 * @return true if object equals this
355 */
356 @Override
357 public boolean equals(Object object) {
358 if (object == this) {
359 return true;
360 }
361 if (object instanceof SummaryStatistics == false) {
362 return false;
363 }
364 SummaryStatistics stat = (SummaryStatistics)object;
365 return MathUtils.equalsIncludingNaN(stat.getGeometricMean(), getGeometricMean()) &&
366 MathUtils.equalsIncludingNaN(stat.getMax(), getMax()) &&
367 MathUtils.equalsIncludingNaN(stat.getMean(), getMean()) &&
368 MathUtils.equalsIncludingNaN(stat.getMin(), getMin()) &&
369 MathUtils.equalsIncludingNaN(stat.getN(), getN()) &&
370 MathUtils.equalsIncludingNaN(stat.getSum(), getSum()) &&
371 MathUtils.equalsIncludingNaN(stat.getSumsq(), getSumsq()) &&
372 MathUtils.equalsIncludingNaN(stat.getVariance(), getVariance());
373 }
374
375 /**
376 * Returns hash code based on values of statistics
377 * @return hash code
378 */
379 @Override
380 public int hashCode() {
381 int result = 31 + MathUtils.hash(getGeometricMean());
382 result = result * 31 + MathUtils.hash(getGeometricMean());
383 result = result * 31 + MathUtils.hash(getMax());
384 result = result * 31 + MathUtils.hash(getMean());
385 result = result * 31 + MathUtils.hash(getMin());
386 result = result * 31 + MathUtils.hash(getN());
387 result = result * 31 + MathUtils.hash(getSum());
388 result = result * 31 + MathUtils.hash(getSumsq());
389 result = result * 31 + MathUtils.hash(getVariance());
390 return result;
391 }
392
393 // Getters and setters for statistics implementations
394 /**
395 * Returns the currently configured Sum implementation
396 * @return the StorelessUnivariateStatistic implementing the sum
397 * @since 1.2
398 */
399 public StorelessUnivariateStatistic getSumImpl() {
400 return sumImpl;
401 }
402
403 /**
404 * <p>
405 * Sets the implementation for the Sum.
406 * </p>
407 * <p>
408 * This method must be activated before any data has been added - i.e.,
409 * before {@link #addValue(double) addValue} has been used to add data;
410 * otherwise an IllegalStateException will be thrown.
411 * </p>
412 * @param sumImpl the StorelessUnivariateStatistic instance to use for
413 * computing the Sum
414 * @throws IllegalStateException if data has already been added (i.e if n >
415 * 0)
416 * @since 1.2
417 */
418 public void setSumImpl(StorelessUnivariateStatistic sumImpl) {
419 checkEmpty();
420 this.sumImpl = sumImpl;
421 }
422
423 /**
424 * Returns the currently configured sum of squares implementation
425 * @return the StorelessUnivariateStatistic implementing the sum of squares
426 * @since 1.2
427 */
428 public StorelessUnivariateStatistic getSumsqImpl() {
429 return sumsqImpl;
430 }
431
432 /**
433 * <p>
434 * Sets the implementation for the sum of squares.
435 * </p>
436 * <p>
437 * This method must be activated before any data has been added - i.e.,
438 * before {@link #addValue(double) addValue} has been used to add data;
439 * otherwise an IllegalStateException will be thrown.
440 * </p>
441 * @param sumsqImpl the StorelessUnivariateStatistic instance to use for
442 * computing the sum of squares
443 * @throws IllegalStateException if data has already been added (i.e if n >
444 * 0)
445 * @since 1.2
446 */
447 public void setSumsqImpl(StorelessUnivariateStatistic sumsqImpl) {
448 checkEmpty();
449 this.sumsqImpl = sumsqImpl;
450 }
451
452 /**
453 * Returns the currently configured minimum implementation
454 * @return the StorelessUnivariateStatistic implementing the minimum
455 * @since 1.2
456 */
457 public StorelessUnivariateStatistic getMinImpl() {
458 return minImpl;
459 }
460
461 /**
462 * <p>
463 * Sets the implementation for the minimum.
464 * </p>
465 * <p>
466 * This method must be activated before any data has been added - i.e.,
467 * before {@link #addValue(double) addValue} has been used to add data;
468 * otherwise an IllegalStateException will be thrown.
469 * </p>
470 * @param minImpl the StorelessUnivariateStatistic instance to use for
471 * computing the minimum
472 * @throws IllegalStateException if data has already been added (i.e if n >
473 * 0)
474 * @since 1.2
475 */
476 public void setMinImpl(StorelessUnivariateStatistic minImpl) {
477 checkEmpty();
478 this.minImpl = minImpl;
479 }
480
481 /**
482 * Returns the currently configured maximum implementation
483 * @return the StorelessUnivariateStatistic implementing the maximum
484 * @since 1.2
485 */
486 public StorelessUnivariateStatistic getMaxImpl() {
487 return maxImpl;
488 }
489
490 /**
491 * <p>
492 * Sets the implementation for the maximum.
493 * </p>
494 * <p>
495 * This method must be activated before any data has been added - i.e.,
496 * before {@link #addValue(double) addValue} has been used to add data;
497 * otherwise an IllegalStateException will be thrown.
498 * </p>
499 * @param maxImpl the StorelessUnivariateStatistic instance to use for
500 * computing the maximum
501 * @throws IllegalStateException if data has already been added (i.e if n >
502 * 0)
503 * @since 1.2
504 */
505 public void setMaxImpl(StorelessUnivariateStatistic maxImpl) {
506 checkEmpty();
507 this.maxImpl = maxImpl;
508 }
509
510 /**
511 * Returns the currently configured sum of logs implementation
512 * @return the StorelessUnivariateStatistic implementing the log sum
513 * @since 1.2
514 */
515 public StorelessUnivariateStatistic getSumLogImpl() {
516 return sumLogImpl;
517 }
518
519 /**
520 * <p>
521 * Sets the implementation for the sum of logs.
522 * </p>
523 * <p>
524 * This method must be activated before any data has been added - i.e.,
525 * before {@link #addValue(double) addValue} has been used to add data;
526 * otherwise an IllegalStateException will be thrown.
527 * </p>
528 * @param sumLogImpl the StorelessUnivariateStatistic instance to use for
529 * computing the log sum
530 * @throws IllegalStateException if data has already been added (i.e if n >
531 * 0)
532 * @since 1.2
533 */
534 public void setSumLogImpl(StorelessUnivariateStatistic sumLogImpl) {
535 checkEmpty();
536 this.sumLogImpl = sumLogImpl;
537 geoMean.setSumLogImpl(sumLogImpl);
538 }
539
540 /**
541 * Returns the currently configured geometric mean implementation
542 * @return the StorelessUnivariateStatistic implementing the geometric mean
543 * @since 1.2
544 */
545 public StorelessUnivariateStatistic getGeoMeanImpl() {
546 return geoMeanImpl;
547 }
548
549 /**
550 * <p>
551 * Sets the implementation for the geometric mean.
552 * </p>
553 * <p>
554 * This method must be activated before any data has been added - i.e.,
555 * before {@link #addValue(double) addValue} has been used to add data;
556 * otherwise an IllegalStateException will be thrown.
557 * </p>
558 * @param geoMeanImpl the StorelessUnivariateStatistic instance to use for
559 * computing the geometric mean
560 * @throws IllegalStateException if data has already been added (i.e if n >
561 * 0)
562 * @since 1.2
563 */
564 public void setGeoMeanImpl(StorelessUnivariateStatistic geoMeanImpl) {
565 checkEmpty();
566 this.geoMeanImpl = geoMeanImpl;
567 }
568
569 /**
570 * Returns the currently configured mean implementation
571 * @return the StorelessUnivariateStatistic implementing the mean
572 * @since 1.2
573 */
574 public StorelessUnivariateStatistic getMeanImpl() {
575 return meanImpl;
576 }
577
578 /**
579 * <p>
580 * Sets the implementation for the mean.
581 * </p>
582 * <p>
583 * This method must be activated before any data has been added - i.e.,
584 * before {@link #addValue(double) addValue} has been used to add data;
585 * otherwise an IllegalStateException will be thrown.
586 * </p>
587 * @param meanImpl the StorelessUnivariateStatistic instance to use for
588 * computing the mean
589 * @throws IllegalStateException if data has already been added (i.e if n >
590 * 0)
591 * @since 1.2
592 */
593 public void setMeanImpl(StorelessUnivariateStatistic meanImpl) {
594 checkEmpty();
595 this.meanImpl = meanImpl;
596 }
597
598 /**
599 * Returns the currently configured variance implementation
600 * @return the StorelessUnivariateStatistic implementing the variance
601 * @since 1.2
602 */
603 public StorelessUnivariateStatistic getVarianceImpl() {
604 return varianceImpl;
605 }
606
607 /**
608 * <p>
609 * Sets the implementation for the variance.
610 * </p>
611 * <p>
612 * This method must be activated before any data has been added - i.e.,
613 * before {@link #addValue(double) addValue} has been used to add data;
614 * otherwise an IllegalStateException will be thrown.
615 * </p>
616 * @param varianceImpl the StorelessUnivariateStatistic instance to use for
617 * computing the variance
618 * @throws IllegalStateException if data has already been added (i.e if n >
619 * 0)
620 * @since 1.2
621 */
622 public void setVarianceImpl(StorelessUnivariateStatistic varianceImpl) {
623 checkEmpty();
624 this.varianceImpl = varianceImpl;
625 }
626
627 /**
628 * Throws IllegalStateException if n > 0.
629 */
630 private void checkEmpty() {
631 if (n > 0) {
632 throw MathRuntimeException.createIllegalStateException(
633 LocalizedFormats.VALUES_ADDED_BEFORE_CONFIGURING_STATISTIC,
634 n);
635 }
636 }
637
638 /**
639 * Returns a copy of this SummaryStatistics instance with the same internal state.
640 *
641 * @return a copy of this
642 */
643 public SummaryStatistics copy() {
644 SummaryStatistics result = new SummaryStatistics();
645 copy(this, result);
646 return result;
647 }
648
649 /**
650 * Copies source to dest.
651 * <p>Neither source nor dest can be null.</p>
652 *
653 * @param source SummaryStatistics to copy
654 * @param dest SummaryStatistics to copy to
655 * @throws NullPointerException if either source or dest is null
656 */
657 public static void copy(SummaryStatistics source, SummaryStatistics dest) {
658 dest.maxImpl = source.maxImpl.copy();
659 dest.meanImpl = source.meanImpl.copy();
660 dest.minImpl = source.minImpl.copy();
661 dest.sumImpl = source.sumImpl.copy();
662 dest.varianceImpl = source.varianceImpl.copy();
663 dest.sumLogImpl = source.sumLogImpl.copy();
664 dest.sumsqImpl = source.sumsqImpl.copy();
665 if (source.getGeoMeanImpl() instanceof GeometricMean) {
666 // Keep geoMeanImpl, sumLogImpl in synch
667 dest.geoMeanImpl = new GeometricMean((SumOfLogs) dest.sumLogImpl);
668 } else {
669 dest.geoMeanImpl = source.geoMeanImpl.copy();
670 }
671 SecondMoment.copy(source.secondMoment, dest.secondMoment);
672 dest.n = source.n;
673
674 // Make sure that if stat == statImpl in source, same
675 // holds in dest; otherwise copy stat
676 if (source.geoMean == source.geoMeanImpl) {
677 dest.geoMean = (GeometricMean) dest.geoMeanImpl;
678 } else {
679 GeometricMean.copy(source.geoMean, dest.geoMean);
680 }
681 if (source.max == source.maxImpl) {
682 dest.max = (Max) dest.maxImpl;
683 } else {
684 Max.copy(source.max, dest.max);
685 }
686 if (source.mean == source.meanImpl) {
687 dest.mean = (Mean) dest.meanImpl;
688 } else {
689 Mean.copy(source.mean, dest.mean);
690 }
691 if (source.min == source.minImpl) {
692 dest.min = (Min) dest.minImpl;
693 } else {
694 Min.copy(source.min, dest.min);
695 }
696 if (source.sum == source.sumImpl) {
697 dest.sum = (Sum) dest.sumImpl;
698 } else {
699 Sum.copy(source.sum, dest.sum);
700 }
701 if (source.variance == source.varianceImpl) {
702 dest.variance = (Variance) dest.varianceImpl;
703 } else {
704 Variance.copy(source.variance, dest.variance);
705 }
706 if (source.sumLog == source.sumLogImpl) {
707 dest.sumLog = (SumOfLogs) dest.sumLogImpl;
708 } else {
709 SumOfLogs.copy(source.sumLog, dest.sumLog);
710 }
711 if (source.sumsq == source.sumsqImpl) {
712 dest.sumsq = (SumOfSquares) dest.sumsqImpl;
713 } else {
714 SumOfSquares.copy(source.sumsq, dest.sumsq);
715 }
716 }
717 }