001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.math.stat.inference;
018
019 import org.apache.commons.math.MathException;
020 import org.apache.commons.math.MathRuntimeException;
021 import org.apache.commons.math.distribution.ChiSquaredDistribution;
022 import org.apache.commons.math.distribution.ChiSquaredDistributionImpl;
023 import org.apache.commons.math.exception.util.LocalizedFormats;
024 import org.apache.commons.math.util.FastMath;
025
026 /**
027 * Implements Chi-Square test statistics defined in the
028 * {@link UnknownDistributionChiSquareTest} interface.
029 *
030 * @version $Revision: 990655 $ $Date: 2010-08-29 23:49:40 +0200 (dim. 29 ao??t 2010) $
031 */
032 public class ChiSquareTestImpl implements UnknownDistributionChiSquareTest {
033
034 /** Distribution used to compute inference statistics. */
035 private ChiSquaredDistribution distribution;
036
037 /**
038 * Construct a ChiSquareTestImpl
039 */
040 public ChiSquareTestImpl() {
041 this(new ChiSquaredDistributionImpl(1.0));
042 }
043
044 /**
045 * Create a test instance using the given distribution for computing
046 * inference statistics.
047 * @param x distribution used to compute inference statistics.
048 * @since 1.2
049 */
050 public ChiSquareTestImpl(ChiSquaredDistribution x) {
051 super();
052 setDistribution(x);
053 }
054 /**
055 * {@inheritDoc}
056 * <p><strong>Note: </strong>This implementation rescales the
057 * <code>expected</code> array if necessary to ensure that the sum of the
058 * expected and observed counts are equal.</p>
059 *
060 * @param observed array of observed frequency counts
061 * @param expected array of expected frequency counts
062 * @return chi-square test statistic
063 * @throws IllegalArgumentException if preconditions are not met
064 * or length is less than 2
065 */
066 public double chiSquare(double[] expected, long[] observed)
067 throws IllegalArgumentException {
068 if (expected.length < 2) {
069 throw MathRuntimeException.createIllegalArgumentException(
070 LocalizedFormats.INSUFFICIENT_DIMENSION, expected.length, 2);
071 }
072 if (expected.length != observed.length) {
073 throw MathRuntimeException.createIllegalArgumentException(
074 LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE, expected.length, observed.length);
075 }
076 checkPositive(expected);
077 checkNonNegative(observed);
078 double sumExpected = 0d;
079 double sumObserved = 0d;
080 for (int i = 0; i < observed.length; i++) {
081 sumExpected += expected[i];
082 sumObserved += observed[i];
083 }
084 double ratio = 1.0d;
085 boolean rescale = false;
086 if (FastMath.abs(sumExpected - sumObserved) > 10E-6) {
087 ratio = sumObserved / sumExpected;
088 rescale = true;
089 }
090 double sumSq = 0.0d;
091 for (int i = 0; i < observed.length; i++) {
092 if (rescale) {
093 final double dev = observed[i] - ratio * expected[i];
094 sumSq += dev * dev / (ratio * expected[i]);
095 } else {
096 final double dev = observed[i] - expected[i];
097 sumSq += dev * dev / expected[i];
098 }
099 }
100 return sumSq;
101 }
102
103 /**
104 * {@inheritDoc}
105 * <p><strong>Note: </strong>This implementation rescales the
106 * <code>expected</code> array if necessary to ensure that the sum of the
107 * expected and observed counts are equal.</p>
108 *
109 * @param observed array of observed frequency counts
110 * @param expected array of expected frequency counts
111 * @return p-value
112 * @throws IllegalArgumentException if preconditions are not met
113 * @throws MathException if an error occurs computing the p-value
114 */
115 public double chiSquareTest(double[] expected, long[] observed)
116 throws IllegalArgumentException, MathException {
117 distribution.setDegreesOfFreedom(expected.length - 1.0);
118 return 1.0 - distribution.cumulativeProbability(
119 chiSquare(expected, observed));
120 }
121
122 /**
123 * {@inheritDoc}
124 * <p><strong>Note: </strong>This implementation rescales the
125 * <code>expected</code> array if necessary to ensure that the sum of the
126 * expected and observed counts are equal.</p>
127 *
128 * @param observed array of observed frequency counts
129 * @param expected array of expected frequency counts
130 * @param alpha significance level of the test
131 * @return true iff null hypothesis can be rejected with confidence
132 * 1 - alpha
133 * @throws IllegalArgumentException if preconditions are not met
134 * @throws MathException if an error occurs performing the test
135 */
136 public boolean chiSquareTest(double[] expected, long[] observed,
137 double alpha) throws IllegalArgumentException, MathException {
138 if ((alpha <= 0) || (alpha > 0.5)) {
139 throw MathRuntimeException.createIllegalArgumentException(
140 LocalizedFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL,
141 alpha, 0, 0.5);
142 }
143 return chiSquareTest(expected, observed) < alpha;
144 }
145
146 /**
147 * @param counts array representation of 2-way table
148 * @return chi-square test statistic
149 * @throws IllegalArgumentException if preconditions are not met
150 */
151 public double chiSquare(long[][] counts) throws IllegalArgumentException {
152
153 checkArray(counts);
154 int nRows = counts.length;
155 int nCols = counts[0].length;
156
157 // compute row, column and total sums
158 double[] rowSum = new double[nRows];
159 double[] colSum = new double[nCols];
160 double total = 0.0d;
161 for (int row = 0; row < nRows; row++) {
162 for (int col = 0; col < nCols; col++) {
163 rowSum[row] += counts[row][col];
164 colSum[col] += counts[row][col];
165 total += counts[row][col];
166 }
167 }
168
169 // compute expected counts and chi-square
170 double sumSq = 0.0d;
171 double expected = 0.0d;
172 for (int row = 0; row < nRows; row++) {
173 for (int col = 0; col < nCols; col++) {
174 expected = (rowSum[row] * colSum[col]) / total;
175 sumSq += ((counts[row][col] - expected) *
176 (counts[row][col] - expected)) / expected;
177 }
178 }
179 return sumSq;
180 }
181
182 /**
183 * @param counts array representation of 2-way table
184 * @return p-value
185 * @throws IllegalArgumentException if preconditions are not met
186 * @throws MathException if an error occurs computing the p-value
187 */
188 public double chiSquareTest(long[][] counts)
189 throws IllegalArgumentException, MathException {
190 checkArray(counts);
191 double df = ((double) counts.length -1) * ((double) counts[0].length - 1);
192 distribution.setDegreesOfFreedom(df);
193 return 1 - distribution.cumulativeProbability(chiSquare(counts));
194 }
195
196 /**
197 * @param counts array representation of 2-way table
198 * @param alpha significance level of the test
199 * @return true iff null hypothesis can be rejected with confidence
200 * 1 - alpha
201 * @throws IllegalArgumentException if preconditions are not met
202 * @throws MathException if an error occurs performing the test
203 */
204 public boolean chiSquareTest(long[][] counts, double alpha)
205 throws IllegalArgumentException, MathException {
206 if ((alpha <= 0) || (alpha > 0.5)) {
207 throw MathRuntimeException.createIllegalArgumentException(
208 LocalizedFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL,
209 alpha, 0.0, 0.5);
210 }
211 return chiSquareTest(counts) < alpha;
212 }
213
214 /**
215 * @param observed1 array of observed frequency counts of the first data set
216 * @param observed2 array of observed frequency counts of the second data set
217 * @return chi-square test statistic
218 * @throws IllegalArgumentException if preconditions are not met
219 * @since 1.2
220 */
221 public double chiSquareDataSetsComparison(long[] observed1, long[] observed2)
222 throws IllegalArgumentException {
223
224 // Make sure lengths are same
225 if (observed1.length < 2) {
226 throw MathRuntimeException.createIllegalArgumentException(
227 LocalizedFormats.INSUFFICIENT_DIMENSION, observed1.length, 2);
228 }
229 if (observed1.length != observed2.length) {
230 throw MathRuntimeException.createIllegalArgumentException(
231 LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE,
232 observed1.length, observed2.length);
233 }
234
235 // Ensure non-negative counts
236 checkNonNegative(observed1);
237 checkNonNegative(observed2);
238
239 // Compute and compare count sums
240 long countSum1 = 0;
241 long countSum2 = 0;
242 boolean unequalCounts = false;
243 double weight = 0.0;
244 for (int i = 0; i < observed1.length; i++) {
245 countSum1 += observed1[i];
246 countSum2 += observed2[i];
247 }
248 // Ensure neither sample is uniformly 0
249 if (countSum1 == 0) {
250 throw MathRuntimeException.createIllegalArgumentException(
251 LocalizedFormats.OBSERVED_COUNTS_ALL_ZERO, 1);
252 }
253 if (countSum2 == 0) {
254 throw MathRuntimeException.createIllegalArgumentException(
255 LocalizedFormats.OBSERVED_COUNTS_ALL_ZERO, 2);
256 }
257 // Compare and compute weight only if different
258 unequalCounts = countSum1 != countSum2;
259 if (unequalCounts) {
260 weight = FastMath.sqrt((double) countSum1 / (double) countSum2);
261 }
262 // Compute ChiSquare statistic
263 double sumSq = 0.0d;
264 double dev = 0.0d;
265 double obs1 = 0.0d;
266 double obs2 = 0.0d;
267 for (int i = 0; i < observed1.length; i++) {
268 if (observed1[i] == 0 && observed2[i] == 0) {
269 throw MathRuntimeException.createIllegalArgumentException(
270 LocalizedFormats.OBSERVED_COUNTS_BOTTH_ZERO_FOR_ENTRY, i);
271 } else {
272 obs1 = observed1[i];
273 obs2 = observed2[i];
274 if (unequalCounts) { // apply weights
275 dev = obs1/weight - obs2 * weight;
276 } else {
277 dev = obs1 - obs2;
278 }
279 sumSq += (dev * dev) / (obs1 + obs2);
280 }
281 }
282 return sumSq;
283 }
284
285 /**
286 * @param observed1 array of observed frequency counts of the first data set
287 * @param observed2 array of observed frequency counts of the second data set
288 * @return p-value
289 * @throws IllegalArgumentException if preconditions are not met
290 * @throws MathException if an error occurs computing the p-value
291 * @since 1.2
292 */
293 public double chiSquareTestDataSetsComparison(long[] observed1, long[] observed2)
294 throws IllegalArgumentException, MathException {
295 distribution.setDegreesOfFreedom((double) observed1.length - 1);
296 return 1 - distribution.cumulativeProbability(
297 chiSquareDataSetsComparison(observed1, observed2));
298 }
299
300 /**
301 * @param observed1 array of observed frequency counts of the first data set
302 * @param observed2 array of observed frequency counts of the second data set
303 * @param alpha significance level of the test
304 * @return true iff null hypothesis can be rejected with confidence
305 * 1 - alpha
306 * @throws IllegalArgumentException if preconditions are not met
307 * @throws MathException if an error occurs performing the test
308 * @since 1.2
309 */
310 public boolean chiSquareTestDataSetsComparison(long[] observed1, long[] observed2,
311 double alpha) throws IllegalArgumentException, MathException {
312 if ((alpha <= 0) || (alpha > 0.5)) {
313 throw MathRuntimeException.createIllegalArgumentException(
314 LocalizedFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL,
315 alpha, 0.0, 0.5);
316 }
317 return chiSquareTestDataSetsComparison(observed1, observed2) < alpha;
318 }
319
320 /**
321 * Checks to make sure that the input long[][] array is rectangular,
322 * has at least 2 rows and 2 columns, and has all non-negative entries,
323 * throwing IllegalArgumentException if any of these checks fail.
324 *
325 * @param in input 2-way table to check
326 * @throws IllegalArgumentException if the array is not valid
327 */
328 private void checkArray(long[][] in) throws IllegalArgumentException {
329
330 if (in.length < 2) {
331 throw MathRuntimeException.createIllegalArgumentException(
332 LocalizedFormats.INSUFFICIENT_DIMENSION, in.length, 2);
333 }
334
335 if (in[0].length < 2) {
336 throw MathRuntimeException.createIllegalArgumentException(
337 LocalizedFormats.INSUFFICIENT_DIMENSION, in[0].length, 2);
338 }
339
340 checkRectangular(in);
341 checkNonNegative(in);
342
343 }
344
345 //--------------------- Private array methods -- should find a utility home for these
346
347 /**
348 * Throws IllegalArgumentException if the input array is not rectangular.
349 *
350 * @param in array to be tested
351 * @throws NullPointerException if input array is null
352 * @throws IllegalArgumentException if input array is not rectangular
353 */
354 private void checkRectangular(long[][] in) {
355 for (int i = 1; i < in.length; i++) {
356 if (in[i].length != in[0].length) {
357 throw MathRuntimeException.createIllegalArgumentException(
358 LocalizedFormats.DIFFERENT_ROWS_LENGTHS,
359 in[i].length, in[0].length);
360 }
361 }
362 }
363
364 /**
365 * Check all entries of the input array are > 0.
366 *
367 * @param in array to be tested
368 * @exception IllegalArgumentException if one entry is not positive
369 */
370 private void checkPositive(double[] in) throws IllegalArgumentException {
371 for (int i = 0; i < in.length; i++) {
372 if (in[i] <= 0) {
373 throw MathRuntimeException.createIllegalArgumentException(
374 LocalizedFormats.NOT_POSITIVE_ELEMENT_AT_INDEX,
375 i, in[i]);
376 }
377 }
378 }
379
380 /**
381 * Check all entries of the input array are >= 0.
382 *
383 * @param in array to be tested
384 * @exception IllegalArgumentException if one entry is negative
385 */
386 private void checkNonNegative(long[] in) throws IllegalArgumentException {
387 for (int i = 0; i < in.length; i++) {
388 if (in[i] < 0) {
389 throw MathRuntimeException.createIllegalArgumentException(
390 LocalizedFormats.NEGATIVE_ELEMENT_AT_INDEX,
391 i, in[i]);
392 }
393 }
394 }
395
396 /**
397 * Check all entries of the input array are >= 0.
398 *
399 * @param in array to be tested
400 * @exception IllegalArgumentException if one entry is negative
401 */
402 private void checkNonNegative(long[][] in) throws IllegalArgumentException {
403 for (int i = 0; i < in.length; i ++) {
404 for (int j = 0; j < in[i].length; j++) {
405 if (in[i][j] < 0) {
406 throw MathRuntimeException.createIllegalArgumentException(
407 LocalizedFormats.NEGATIVE_ELEMENT_AT_2D_INDEX,
408 i, j, in[i][j]);
409 }
410 }
411 }
412 }
413
414 /**
415 * Modify the distribution used to compute inference statistics.
416 *
417 * @param value
418 * the new distribution
419 * @since 1.2
420 */
421 public void setDistribution(ChiSquaredDistribution value) {
422 distribution = value;
423 }
424 }