001    /* ===========================================================
002     * JFreeChart : a free chart library for the Java(tm) platform
003     * ===========================================================
004     *
005     * (C) Copyright 2000-2005, by Object Refinery Limited and Contributors.
006     *
007     * Project Info:  http://www.jfree.org/jfreechart/index.html
008     *
009     * This library is free software; you can redistribute it and/or modify it 
010     * under the terms of the GNU Lesser General Public License as published by 
011     * the Free Software Foundation; either version 2.1 of the License, or 
012     * (at your option) any later version.
013     *
014     * This library is distributed in the hope that it will be useful, but 
015     * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 
016     * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 
017     * License for more details.
018     *
019     * You should have received a copy of the GNU Lesser General Public
020     * License along with this library; if not, write to the Free Software
021     * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, 
022     * USA.  
023     *
024     * [Java is a trademark or registered trademark of Sun Microsystems, Inc. 
025     * in the United States and other countries.]
026     *
027     * ---------------
028     * Statistics.java
029     * ---------------
030     * (C) Copyright 2000-2005, by Matthew Wright and Contributors.
031     *
032     * Original Author:  Matthew Wright;
033     * Contributor(s):   David Gilbert (for Object Refinery Limited);
034     *
035     * $Id: Statistics.java,v 1.5.2.1 2005/10/25 21:34:46 mungady Exp $
036     *
037     * Changes (from 08-Nov-2001)
038     * --------------------------
039     * 08-Nov-2001 : Added standard header and tidied Javadoc comments (DG);
040     *               Moved from JFreeChart to package com.jrefinery.data.* in 
041     *               JCommon class library (DG);
042     * 24-Jun-2002 : Removed unnecessary local variable (DG);
043     * 07-Oct-2002 : Fixed errors reported by Checkstyle (DG);
044     * 26-May-2004 : Moved calculateMean() method from BoxAndWhiskerCalculator (DG);
045     * 02-Jun-2004 : Fixed bug in calculateMedian() method (DG);
046     * 11-Jan-2005 : Removed deprecated code in preparation for the 1.0.0 
047     *               release (DG);
048     *
049     */
050    
051    package org.jfree.data.statistics;
052    
053    import java.util.ArrayList;
054    import java.util.Collection;
055    import java.util.Collections;
056    import java.util.Iterator;
057    import java.util.List;
058    
059    /**
060     * A utility class that provides some simple statistical functions.
061     */
062    public abstract class Statistics {
063    
064        /**
065         * Returns the mean of an array of numbers.
066         *
067         * @param values  the values (<code>null</code> permitted, returns 
068         *                <code>Double.NaN</code>).
069         *
070         * @return The mean.
071         */
072        public static double calculateMean(Number[] values) {
073            double result = Double.NaN;
074            if (values != null && values.length > 0) {
075                double sum = 0.0;
076                int counter = 0;
077                for (; counter < values.length; counter++) {
078                    sum = sum + values[counter].doubleValue();
079                }
080                result = (sum / counter);
081            }
082            return result;
083        }
084    
085        /**
086         * Returns the mean of a collection of <code>Number</code> objects.
087         * 
088         * @param values  the values (<code>null</code> permitted, returns 
089         *                <code>Double.NaN</code>).
090         * 
091         * @return The mean.
092         */
093        public static double calculateMean(Collection values) {
094            
095            double result = Double.NaN;
096            int count = 0;
097            double total = 0.0;
098            Iterator iterator = values.iterator();
099            while (iterator.hasNext()) {
100                Object object = iterator.next();
101                if (object != null && object instanceof Number) {
102                    Number number = (Number) object;
103                    total = total + number.doubleValue();
104                    count = count + 1;
105                }
106            }
107            if (count > 0) {
108                result = total / count;
109            }        
110            return result;
111            
112        }
113        
114        /**
115         * Calculates the median for a list of values (<code>Number</code> objects).
116         * The list of values will be sorted first.
117         * 
118         * @param values  the values.
119         * 
120         * @return The median.
121         */
122        public static double calculateMedian(List values) {
123            return calculateMedian(values, true);
124        }
125        
126        /**
127         * Calculates the median for a list of values (<code>Number</code> objects)
128         * that are assumed to be in ascending order.
129         * 
130         * @param values  the values.
131         * @param copyAndSort  a flag that controls whether the list of values is
132         *                     copied and sorted.
133         * 
134         * @return The median.
135         */
136        public static double calculateMedian(List values, boolean copyAndSort) {
137            
138            double result = Double.NaN;
139            if (values != null) {
140                if (copyAndSort) {
141                    int itemCount = values.size();
142                    List copy = new ArrayList(itemCount);
143                    for (int i = 0; i < itemCount; i++) {
144                        copy.add(i, values.get(i));   
145                    }
146                    Collections.sort(copy);
147                    values = copy;
148                }
149                int count = values.size();
150                if (count > 0) {
151                    if (count % 2 == 1) {
152                        if (count > 1) {
153                            Number value = (Number) values.get((count - 1) / 2);
154                            result = value.doubleValue();
155                        }
156                        else {
157                            Number value = (Number) values.get(0);
158                            result = value.doubleValue();
159                        }
160                    }
161                    else {
162                        Number value1 = (Number) values.get(count / 2 - 1);
163                        Number value2 = (Number) values.get(count / 2);
164                        result = (value1.doubleValue() + value2.doubleValue()) 
165                                 / 2.0;
166                    }
167                }
168            }
169            return result;
170        }
171        
172        /**
173         * Calculates the median for a sublist within a list of values 
174         * (<code>Number</code> objects).
175         * 
176         * @param values  the values (in any order).
177         * @param start  the start index.
178         * @param end  the end index.
179         * 
180         * @return The median.
181         */
182        public static double calculateMedian(List values, int start, int end) {
183            return calculateMedian(values, start, end, true);
184        }
185    
186        /**
187         * Calculates the median for a sublist within a list of values 
188         * (<code>Number</code> objects).  The entire list will be sorted if the 
189         * <code>ascending</code< argument is <code>false</code>.
190         * 
191         * @param values  the values.
192         * @param start  the start index.
193         * @param end  the end index.
194         * @param copyAndSort  a flag that that controls whether the list of values 
195         *                     is copied and sorted.
196         * 
197         * @return The median.
198         */
199        public static double calculateMedian(List values, int start, int end,
200                                             boolean copyAndSort) {
201            
202            double result = Double.NaN;
203            if (copyAndSort) {
204                List working = new ArrayList(end - start + 1);
205                for (int i = start; i <= end; i++) {
206                    working.add(values.get(i));  
207                }
208                Collections.sort(working); 
209                result = calculateMedian(working, false);
210            }
211            else {
212                int count = end - start + 1;
213                if (count > 0) {
214                    if (count % 2 == 1) {
215                        if (count > 1) {
216                            Number value 
217                                = (Number) values.get(start + (count - 1) / 2);
218                            result = value.doubleValue();
219                        }
220                        else {
221                            Number value = (Number) values.get(start);
222                            result = value.doubleValue();
223                        }
224                    }
225                    else {
226                        Number value1 = (Number) values.get(start + count / 2 - 1);
227                        Number value2 = (Number) values.get(start + count / 2);
228                        result 
229                            = (value1.doubleValue() + value2.doubleValue()) / 2.0;
230                    }
231                }
232            }
233            return result;    
234            
235        }
236        
237        /**
238         * Returns the standard deviation of a set of numbers.
239         *
240         * @param data  the data.
241         *
242         * @return The standard deviation of a set of numbers.
243         */
244        public static double getStdDev(Number[] data) {
245            double avg = calculateMean(data);
246            double sum = 0.0;
247    
248            for (int counter = 0; counter < data.length; counter++) {
249                double diff = data[counter].doubleValue() - avg;
250                sum = sum + diff * diff;
251            }
252            return Math.sqrt(sum / (data.length - 1));
253        }
254    
255        /**
256         * Fits a straight line to a set of (x, y) data, returning the slope and
257         * intercept.
258         *
259         * @param xData  the x-data.
260         * @param yData  the y-data.
261         *
262         * @return A double array with the intercept in [0] and the slope in [1].
263         */
264        public static double[] getLinearFit(Number[] xData, Number[] yData) {
265    
266            // check arguments...
267            if (xData.length != yData.length) {
268                throw new IllegalArgumentException(
269                    "Statistics.getLinearFit(): array lengths must be equal.");
270            }
271    
272            double[] result = new double[2];
273            // slope
274            result[1] = getSlope(xData, yData);
275            // intercept
276            result[0] = calculateMean(yData) - result[1] * calculateMean(xData);
277    
278            return result;
279    
280        }
281    
282        /**
283         * Finds the slope of a regression line using least squares.
284         *
285         * @param xData  an array of Numbers (the x values).
286         * @param yData  an array of Numbers (the y values).
287         *
288         * @return The slope.
289         */
290        public static double getSlope(Number[] xData, Number[] yData) {
291    
292            // check arguments...
293            if (xData.length != yData.length) {
294                throw new IllegalArgumentException("Array lengths must be equal.");
295            }
296    
297            // ********* stat function for linear slope ********
298            // y = a + bx
299            // a = ybar - b * xbar
300            //     sum(x * y) - (sum (x) * sum(y)) / n
301            // b = ------------------------------------
302            //     sum (x^2) - (sum(x)^2 / n
303            // *************************************************
304    
305            // sum of x, x^2, x * y, y
306            double sx = 0.0, sxx = 0.0, sxy = 0.0, sy = 0.0;
307            int counter;
308            for (counter = 0; counter < xData.length; counter++) {
309                sx = sx + xData[counter].doubleValue();
310                sxx = sxx + Math.pow(xData[counter].doubleValue(), 2);
311                sxy = sxy + yData[counter].doubleValue() 
312                          * xData[counter].doubleValue();
313                sy = sy + yData[counter].doubleValue();
314            }
315            return (sxy - (sx * sy) / counter) / (sxx - (sx * sx) / counter);
316    
317        }
318    
319        /**
320         * Calculates the correlation between two datasets.  Both arrays should 
321         * contain the same number of items.  Null values are treated as zero.
322         * <P>
323         * Information about the correlation calculation was obtained from:
324         * 
325         * http://trochim.human.cornell.edu/kb/statcorr.htm
326         * 
327         * @param data1  the first dataset.
328         * @param data2  the second dataset.
329         * 
330         * @return The correlation.
331         */
332        public static double getCorrelation(Number[] data1, Number[] data2) {
333            if (data1 == null) {
334                throw new IllegalArgumentException("Null 'data1' argument.");
335            }
336            if (data2 == null) {
337                throw new IllegalArgumentException("Null 'data2' argument.");
338            }
339            if (data1.length != data2.length) {
340                throw new IllegalArgumentException(
341                    "'data1' and 'data2' arrays must have same length."
342                );   
343            }
344            int n = data1.length;
345            double sumX = 0.0;
346            double sumY = 0.0;
347            double sumX2 = 0.0;
348            double sumY2 = 0.0;
349            double sumXY = 0.0;
350            for (int i = 0; i < n; i++) {
351                double x = 0.0;
352                if (data1[i] != null) {
353                    x = data1[i].doubleValue();   
354                }
355                double y = 0.0;
356                if (data2[i] != null) {
357                    y = data2[i].doubleValue();   
358                }
359                sumX = sumX + x;
360                sumY = sumY + y;
361                sumXY = sumXY + (x * y);
362                sumX2 = sumX2 + (x * x);
363                sumY2 = sumY2 + (y * y);
364            }
365            return (n * sumXY - sumX * sumY) / Math.pow((n * sumX2 - sumX * sumX) 
366                    * (n * sumY2 - sumY * sumY), 0.5);      
367        }
368    
369        /**
370         * Returns a data set for a moving average on the data set passed in.
371         *
372         * @param xData  an array of the x data.
373         * @param yData  an array of the y data.
374         * @param period  the number of data points to average
375         *
376         * @return A double[][] the length of the data set in the first dimension,
377         *         with two doubles for x and y in the second dimension
378         */
379        public static double[][] getMovingAverage(Number[] xData, 
380                                                  Number[] yData, 
381                                                  int period) {
382    
383            // check arguments...
384            if (xData.length != yData.length) {
385                throw new IllegalArgumentException("Array lengths must be equal.");
386            }
387    
388            if (period > xData.length) {
389                throw new IllegalArgumentException(
390                    "Period can't be longer than dataset."
391                );
392            }
393    
394            double[][] result = new double[xData.length - period][2];
395            for (int i = 0; i < result.length; i++) {
396                result[i][0] = xData[i + period].doubleValue();
397                // holds the moving average sum
398                double sum = 0.0;
399                for (int j = 0; j < period; j++) {
400                    sum += yData[i + j].doubleValue();
401                }
402                sum = sum / period;
403                result[i][1] = sum;
404            }
405            return result;
406    
407        }
408    
409    }