001 /* =========================================================== 002 * JFreeChart : a free chart library for the Java(tm) platform 003 * =========================================================== 004 * 005 * (C) Copyright 2000-2006, by Object Refinery Limited and Contributors. 006 * 007 * Project Info: http://www.jfree.org/jfreechart/index.html 008 * 009 * This library is free software; you can redistribute it and/or modify it 010 * under the terms of the GNU Lesser General Public License as published by 011 * the Free Software Foundation; either version 2.1 of the License, or 012 * (at your option) any later version. 013 * 014 * This library is distributed in the hope that it will be useful, but 015 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 016 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 017 * License for more details. 018 * 019 * You should have received a copy of the GNU Lesser General Public 020 * License along with this library; if not, write to the Free Software 021 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, 022 * USA. 023 * 024 * [Java is a trademark or registered trademark of Sun Microsystems, Inc. 025 * in the United States and other countries.] 026 * 027 * --------------- 028 * Statistics.java 029 * --------------- 030 * (C) Copyright 2000-2006, by Matthew Wright and Contributors. 031 * 032 * Original Author: Matthew Wright; 033 * Contributor(s): David Gilbert (for Object Refinery Limited); 034 * 035 * $Id: Statistics.java,v 1.5.2.2 2006/11/16 11:19:47 mungady Exp $ 036 * 037 * Changes (from 08-Nov-2001) 038 * -------------------------- 039 * 08-Nov-2001 : Added standard header and tidied Javadoc comments (DG); 040 * Moved from JFreeChart to package com.jrefinery.data.* in 041 * JCommon class library (DG); 042 * 24-Jun-2002 : Removed unnecessary local variable (DG); 043 * 07-Oct-2002 : Fixed errors reported by Checkstyle (DG); 044 * 26-May-2004 : Moved calculateMean() method from BoxAndWhiskerCalculator (DG); 045 * 02-Jun-2004 : Fixed bug in calculateMedian() method (DG); 046 * 11-Jan-2005 : Removed deprecated code in preparation for the 1.0.0 047 * release (DG); 048 * 049 */ 050 051 package org.jfree.data.statistics; 052 053 import java.util.ArrayList; 054 import java.util.Collection; 055 import java.util.Collections; 056 import java.util.Iterator; 057 import java.util.List; 058 059 /** 060 * A utility class that provides some common statistical functions. 061 */ 062 public abstract class Statistics { 063 064 /** 065 * Returns the mean of an array of numbers. This is equivalent to calling 066 * <code>calculateMean(values, true)</code>. 067 * 068 * @param values the values (<code>null</code> not permitted). 069 * 070 * @return The mean. 071 */ 072 public static double calculateMean(Number[] values) { 073 return calculateMean(values, true); 074 } 075 076 /** 077 * Returns the mean of an array of numbers. 078 * 079 * @param values the values (<code>null</code> not permitted). 080 * @param includeNullAndNaN a flag that controls whether or not 081 * <code>null</code> and <code>Double.NaN</code> values are included 082 * in the calculation (if either is present in the array, the result is 083 * {@link Double#NaN}). 084 * 085 * @return The mean. 086 * 087 * @since 1.0.3 088 */ 089 public static double calculateMean(Number[] values, 090 boolean includeNullAndNaN) { 091 092 if (values == null) { 093 throw new IllegalArgumentException("Null 'values' argument."); 094 } 095 double sum = 0.0; 096 double current; 097 int counter = 0; 098 for (int i = 0; i < values.length; i++) { 099 // treat nulls the same as NaNs 100 if (values[i] != null) { 101 current = values[i].doubleValue(); 102 } 103 else { 104 current = Double.NaN; 105 } 106 // calculate the sum and count 107 if (includeNullAndNaN || !Double.isNaN(current)) { 108 sum = sum + current; 109 counter++; 110 } 111 } 112 double result = (sum / counter); 113 return result; 114 } 115 116 /** 117 * Returns the mean of a collection of <code>Number</code> objects. 118 * 119 * @param values the values (<code>null</code> not permitted). 120 * 121 * @return The mean. 122 */ 123 public static double calculateMean(Collection values) { 124 return calculateMean(values, true); 125 } 126 127 /** 128 * Returns the mean of a collection of <code>Number</code> objects. 129 * 130 * @param values the values (<code>null</code> not permitted). 131 * @param includeNullAndNaN a flag that controls whether or not 132 * <code>null</code> and <code>Double.NaN</code> values are included 133 * in the calculation (if either is present in the array, the result is 134 * {@link Double#NaN}). 135 * 136 * @return The mean. 137 * 138 * @since 1.0.3 139 */ 140 public static double calculateMean(Collection values, 141 boolean includeNullAndNaN) { 142 143 if (values == null) { 144 throw new IllegalArgumentException("Null 'values' argument."); 145 } 146 int count = 0; 147 double total = 0.0; 148 Iterator iterator = values.iterator(); 149 while (iterator.hasNext()) { 150 Object object = iterator.next(); 151 if (object == null) { 152 if (includeNullAndNaN) { 153 return Double.NaN; 154 } 155 } 156 else { 157 if (object instanceof Number) { 158 Number number = (Number) object; 159 double value = number.doubleValue(); 160 if (Double.isNaN(value)) { 161 if (includeNullAndNaN) { 162 return Double.NaN; 163 } 164 } 165 else { 166 total = total + number.doubleValue(); 167 count = count + 1; 168 } 169 } 170 } 171 } 172 return total / count; 173 } 174 175 /** 176 * Calculates the median for a list of values (<code>Number</code> objects). 177 * The list of values will be copied, and the copy sorted, before 178 * calculating the median. To avoid this step (if your list of values 179 * is already sorted), use the {@link #calculateMedian(List, boolean)} 180 * method. 181 * 182 * @param values the values (<code>null</code> permitted). 183 * 184 * @return The median. 185 */ 186 public static double calculateMedian(List values) { 187 return calculateMedian(values, true); 188 } 189 190 /** 191 * Calculates the median for a list of values (<code>Number</code> objects). 192 * If <code>copyAndSort</code> is <code>false</code>, the list is assumed 193 * to be presorted in ascending order by value. 194 * 195 * @param values the values (<code>null</code> permitted). 196 * @param copyAndSort a flag that controls whether the list of values is 197 * copied and sorted. 198 * 199 * @return The median. 200 */ 201 public static double calculateMedian(List values, boolean copyAndSort) { 202 203 double result = Double.NaN; 204 if (values != null) { 205 if (copyAndSort) { 206 int itemCount = values.size(); 207 List copy = new ArrayList(itemCount); 208 for (int i = 0; i < itemCount; i++) { 209 copy.add(i, values.get(i)); 210 } 211 Collections.sort(copy); 212 values = copy; 213 } 214 int count = values.size(); 215 if (count > 0) { 216 if (count % 2 == 1) { 217 if (count > 1) { 218 Number value = (Number) values.get((count - 1) / 2); 219 result = value.doubleValue(); 220 } 221 else { 222 Number value = (Number) values.get(0); 223 result = value.doubleValue(); 224 } 225 } 226 else { 227 Number value1 = (Number) values.get(count / 2 - 1); 228 Number value2 = (Number) values.get(count / 2); 229 result = (value1.doubleValue() + value2.doubleValue()) 230 / 2.0; 231 } 232 } 233 } 234 return result; 235 } 236 237 /** 238 * Calculates the median for a sublist within a list of values 239 * (<code>Number</code> objects). 240 * 241 * @param values the values, in any order (<code>null</code> not 242 * permitted). 243 * @param start the start index. 244 * @param end the end index. 245 * 246 * @return The median. 247 */ 248 public static double calculateMedian(List values, int start, int end) { 249 return calculateMedian(values, start, end, true); 250 } 251 252 /** 253 * Calculates the median for a sublist within a list of values 254 * (<code>Number</code> objects). The entire list will be sorted if the 255 * <code>ascending</code< argument is <code>false</code>. 256 * 257 * @param values the values (<code>null</code> not permitted). 258 * @param start the start index. 259 * @param end the end index. 260 * @param copyAndSort a flag that that controls whether the list of values 261 * is copied and sorted. 262 * 263 * @return The median. 264 */ 265 public static double calculateMedian(List values, int start, int end, 266 boolean copyAndSort) { 267 268 double result = Double.NaN; 269 if (copyAndSort) { 270 List working = new ArrayList(end - start + 1); 271 for (int i = start; i <= end; i++) { 272 working.add(values.get(i)); 273 } 274 Collections.sort(working); 275 result = calculateMedian(working, false); 276 } 277 else { 278 int count = end - start + 1; 279 if (count > 0) { 280 if (count % 2 == 1) { 281 if (count > 1) { 282 Number value 283 = (Number) values.get(start + (count - 1) / 2); 284 result = value.doubleValue(); 285 } 286 else { 287 Number value = (Number) values.get(start); 288 result = value.doubleValue(); 289 } 290 } 291 else { 292 Number value1 = (Number) values.get(start + count / 2 - 1); 293 Number value2 = (Number) values.get(start + count / 2); 294 result 295 = (value1.doubleValue() + value2.doubleValue()) / 2.0; 296 } 297 } 298 } 299 return result; 300 301 } 302 303 /** 304 * Returns the standard deviation of a set of numbers. 305 * 306 * @param data the data (<code>null</code> or zero length array not 307 * permitted). 308 * 309 * @return The standard deviation of a set of numbers. 310 */ 311 public static double getStdDev(Number[] data) { 312 if (data == null) { 313 throw new IllegalArgumentException("Null 'data' array."); 314 } 315 if (data.length == 0) { 316 throw new IllegalArgumentException("Zero length 'data' array."); 317 } 318 double avg = calculateMean(data); 319 double sum = 0.0; 320 321 for (int counter = 0; counter < data.length; counter++) { 322 double diff = data[counter].doubleValue() - avg; 323 sum = sum + diff * diff; 324 } 325 return Math.sqrt(sum / (data.length - 1)); 326 } 327 328 /** 329 * Fits a straight line to a set of (x, y) data, returning the slope and 330 * intercept. 331 * 332 * @param xData the x-data (<code>null</code> not permitted). 333 * @param yData the y-data (<code>null</code> not permitted). 334 * 335 * @return A double array with the intercept in [0] and the slope in [1]. 336 */ 337 public static double[] getLinearFit(Number[] xData, Number[] yData) { 338 339 if (xData == null) { 340 throw new IllegalArgumentException("Null 'xData' argument."); 341 } 342 if (yData == null) { 343 throw new IllegalArgumentException("Null 'yData' argument."); 344 } 345 if (xData.length != yData.length) { 346 throw new IllegalArgumentException( 347 "Statistics.getLinearFit(): array lengths must be equal."); 348 } 349 350 double[] result = new double[2]; 351 // slope 352 result[1] = getSlope(xData, yData); 353 // intercept 354 result[0] = calculateMean(yData) - result[1] * calculateMean(xData); 355 356 return result; 357 358 } 359 360 /** 361 * Finds the slope of a regression line using least squares. 362 * 363 * @param xData the x-values (<code>null</code> not permitted). 364 * @param yData the y-values (<code>null</code> not permitted). 365 * 366 * @return The slope. 367 */ 368 public static double getSlope(Number[] xData, Number[] yData) { 369 370 if (xData == null) { 371 throw new IllegalArgumentException("Null 'xData' argument."); 372 } 373 if (yData == null) { 374 throw new IllegalArgumentException("Null 'yData' argument."); 375 } 376 if (xData.length != yData.length) { 377 throw new IllegalArgumentException("Array lengths must be equal."); 378 } 379 380 // ********* stat function for linear slope ******** 381 // y = a + bx 382 // a = ybar - b * xbar 383 // sum(x * y) - (sum (x) * sum(y)) / n 384 // b = ------------------------------------ 385 // sum (x^2) - (sum(x)^2 / n 386 // ************************************************* 387 388 // sum of x, x^2, x * y, y 389 double sx = 0.0, sxx = 0.0, sxy = 0.0, sy = 0.0; 390 int counter; 391 for (counter = 0; counter < xData.length; counter++) { 392 sx = sx + xData[counter].doubleValue(); 393 sxx = sxx + Math.pow(xData[counter].doubleValue(), 2); 394 sxy = sxy + yData[counter].doubleValue() 395 * xData[counter].doubleValue(); 396 sy = sy + yData[counter].doubleValue(); 397 } 398 return (sxy - (sx * sy) / counter) / (sxx - (sx * sx) / counter); 399 400 } 401 402 /** 403 * Calculates the correlation between two datasets. Both arrays should 404 * contain the same number of items. Null values are treated as zero. 405 * <P> 406 * Information about the correlation calculation was obtained from: 407 * 408 * http://trochim.human.cornell.edu/kb/statcorr.htm 409 * 410 * @param data1 the first dataset. 411 * @param data2 the second dataset. 412 * 413 * @return The correlation. 414 */ 415 public static double getCorrelation(Number[] data1, Number[] data2) { 416 if (data1 == null) { 417 throw new IllegalArgumentException("Null 'data1' argument."); 418 } 419 if (data2 == null) { 420 throw new IllegalArgumentException("Null 'data2' argument."); 421 } 422 if (data1.length != data2.length) { 423 throw new IllegalArgumentException( 424 "'data1' and 'data2' arrays must have same length." 425 ); 426 } 427 int n = data1.length; 428 double sumX = 0.0; 429 double sumY = 0.0; 430 double sumX2 = 0.0; 431 double sumY2 = 0.0; 432 double sumXY = 0.0; 433 for (int i = 0; i < n; i++) { 434 double x = 0.0; 435 if (data1[i] != null) { 436 x = data1[i].doubleValue(); 437 } 438 double y = 0.0; 439 if (data2[i] != null) { 440 y = data2[i].doubleValue(); 441 } 442 sumX = sumX + x; 443 sumY = sumY + y; 444 sumXY = sumXY + (x * y); 445 sumX2 = sumX2 + (x * x); 446 sumY2 = sumY2 + (y * y); 447 } 448 return (n * sumXY - sumX * sumY) / Math.pow((n * sumX2 - sumX * sumX) 449 * (n * sumY2 - sumY * sumY), 0.5); 450 } 451 452 /** 453 * Returns a data set for a moving average on the data set passed in. 454 * 455 * @param xData an array of the x data. 456 * @param yData an array of the y data. 457 * @param period the number of data points to average 458 * 459 * @return A double[][] the length of the data set in the first dimension, 460 * with two doubles for x and y in the second dimension 461 */ 462 public static double[][] getMovingAverage(Number[] xData, 463 Number[] yData, 464 int period) { 465 466 // check arguments... 467 if (xData.length != yData.length) { 468 throw new IllegalArgumentException("Array lengths must be equal."); 469 } 470 471 if (period > xData.length) { 472 throw new IllegalArgumentException( 473 "Period can't be longer than dataset." 474 ); 475 } 476 477 double[][] result = new double[xData.length - period][2]; 478 for (int i = 0; i < result.length; i++) { 479 result[i][0] = xData[i + period].doubleValue(); 480 // holds the moving average sum 481 double sum = 0.0; 482 for (int j = 0; j < period; j++) { 483 sum += yData[i + j].doubleValue(); 484 } 485 sum = sum / period; 486 result[i][1] = sum; 487 } 488 return result; 489 490 } 491 492 }