001    /* ===========================================================
002     * JFreeChart : a free chart library for the Java(tm) platform
003     * ===========================================================
004     *
005     * (C) Copyright 2000-2011, by Object Refinery Limited and Contributors.
006     *
007     * Project Info:  http://www.jfree.org/jfreechart/index.html
008     *
009     * This library is free software; you can redistribute it and/or modify it
010     * under the terms of the GNU Lesser General Public License as published by
011     * the Free Software Foundation; either version 2.1 of the License, or
012     * (at your option) any later version.
013     *
014     * This library is distributed in the hope that it will be useful, but
015     * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
016     * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
017     * License for more details.
018     *
019     * You should have received a copy of the GNU Lesser General Public
020     * License along with this library; if not, write to the Free Software
021     * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
022     * USA.
023     *
024     * [Oracle and Java are registered trademarks of Oracle and/or its affiliates. 
025     * Other names may be trademarks of their respective owners.]
026     *
027     * ---------------
028     * Regression.java
029     * ---------------
030     * (C) Copyright 2002-2009, by Object Refinery Limited.
031     *
032     * Original Author:  David Gilbert (for Object Refinery Limited);
033     * Contributor(s):   Peter Kolb (patch 2795746);
034     *
035     * Changes
036     * -------
037     * 30-Sep-2002 : Version 1 (DG);
038     * 18-Aug-2003 : Added 'abstract' (DG);
039     * 15-Jul-2004 : Switched getX() with getXValue() and getY() with
040     *               getYValue() (DG);
041     * 29-May-2009 : Added support for polynomial regression, see patch 2795746
042     *               by Peter Kolb (DG);
043     *
044     */
045    
046    package org.jfree.data.statistics;
047    
048    import org.jfree.data.xy.XYDataset;
049    
050    /**
051     * A utility class for fitting regression curves to data.
052     */
053    public abstract class Regression {
054    
055        /**
056         * Returns the parameters 'a' and 'b' for an equation y = a + bx, fitted to
057         * the data using ordinary least squares regression.  The result is
058         * returned as a double[], where result[0] --> a, and result[1] --> b.
059         *
060         * @param data  the data.
061         *
062         * @return The parameters.
063         */
064        public static double[] getOLSRegression(double[][] data) {
065    
066            int n = data.length;
067            if (n < 2) {
068                throw new IllegalArgumentException("Not enough data.");
069            }
070    
071            double sumX = 0;
072            double sumY = 0;
073            double sumXX = 0;
074            double sumXY = 0;
075            for (int i = 0; i < n; i++) {
076                double x = data[i][0];
077                double y = data[i][1];
078                sumX += x;
079                sumY += y;
080                double xx = x * x;
081                sumXX += xx;
082                double xy = x * y;
083                sumXY += xy;
084            }
085            double sxx = sumXX - (sumX * sumX) / n;
086            double sxy = sumXY - (sumX * sumY) / n;
087            double xbar = sumX / n;
088            double ybar = sumY / n;
089    
090            double[] result = new double[2];
091            result[1] = sxy / sxx;
092            result[0] = ybar - result[1] * xbar;
093    
094            return result;
095    
096        }
097    
098        /**
099         * Returns the parameters 'a' and 'b' for an equation y = a + bx, fitted to
100         * the data using ordinary least squares regression. The result is returned
101         * as a double[], where result[0] --> a, and result[1] --> b.
102         *
103         * @param data  the data.
104         * @param series  the series (zero-based index).
105         *
106         * @return The parameters.
107         */
108        public static double[] getOLSRegression(XYDataset data, int series) {
109    
110            int n = data.getItemCount(series);
111            if (n < 2) {
112                throw new IllegalArgumentException("Not enough data.");
113            }
114    
115            double sumX = 0;
116            double sumY = 0;
117            double sumXX = 0;
118            double sumXY = 0;
119            for (int i = 0; i < n; i++) {
120                double x = data.getXValue(series, i);
121                double y = data.getYValue(series, i);
122                sumX += x;
123                sumY += y;
124                double xx = x * x;
125                sumXX += xx;
126                double xy = x * y;
127                sumXY += xy;
128            }
129            double sxx = sumXX - (sumX * sumX) / n;
130            double sxy = sumXY - (sumX * sumY) / n;
131            double xbar = sumX / n;
132            double ybar = sumY / n;
133    
134            double[] result = new double[2];
135            result[1] = sxy / sxx;
136            result[0] = ybar - result[1] * xbar;
137    
138            return result;
139    
140        }
141    
142        /**
143         * Returns the parameters 'a' and 'b' for an equation y = ax^b, fitted to
144         * the data using a power regression equation.  The result is returned as
145         * an array, where double[0] --> a, and double[1] --> b.
146         *
147         * @param data  the data.
148         *
149         * @return The parameters.
150         */
151        public static double[] getPowerRegression(double[][] data) {
152    
153            int n = data.length;
154            if (n < 2) {
155                throw new IllegalArgumentException("Not enough data.");
156            }
157    
158            double sumX = 0;
159            double sumY = 0;
160            double sumXX = 0;
161            double sumXY = 0;
162            for (int i = 0; i < n; i++) {
163                double x = Math.log(data[i][0]);
164                double y = Math.log(data[i][1]);
165                sumX += x;
166                sumY += y;
167                double xx = x * x;
168                sumXX += xx;
169                double xy = x * y;
170                sumXY += xy;
171            }
172            double sxx = sumXX - (sumX * sumX) / n;
173            double sxy = sumXY - (sumX * sumY) / n;
174            double xbar = sumX / n;
175            double ybar = sumY / n;
176    
177            double[] result = new double[2];
178            result[1] = sxy / sxx;
179            result[0] = Math.pow(Math.exp(1.0), ybar - result[1] * xbar);
180    
181            return result;
182    
183        }
184    
185        /**
186         * Returns the parameters 'a' and 'b' for an equation y = ax^b, fitted to
187         * the data using a power regression equation.  The result is returned as
188         * an array, where double[0] --> a, and double[1] --> b.
189         *
190         * @param data  the data.
191         * @param series  the series to fit the regression line against.
192         *
193         * @return The parameters.
194         */
195        public static double[] getPowerRegression(XYDataset data, int series) {
196    
197            int n = data.getItemCount(series);
198            if (n < 2) {
199                throw new IllegalArgumentException("Not enough data.");
200            }
201    
202            double sumX = 0;
203            double sumY = 0;
204            double sumXX = 0;
205            double sumXY = 0;
206            for (int i = 0; i < n; i++) {
207                double x = Math.log(data.getXValue(series, i));
208                double y = Math.log(data.getYValue(series, i));
209                sumX += x;
210                sumY += y;
211                double xx = x * x;
212                sumXX += xx;
213                double xy = x * y;
214                sumXY += xy;
215            }
216            double sxx = sumXX - (sumX * sumX) / n;
217            double sxy = sumXY - (sumX * sumY) / n;
218            double xbar = sumX / n;
219            double ybar = sumY / n;
220    
221            double[] result = new double[2];
222            result[1] = sxy / sxx;
223            result[0] = Math.pow(Math.exp(1.0), ybar - result[1] * xbar);
224    
225            return result;
226    
227        }
228    
229        /**
230         * Returns the parameters 'a0', 'a1', 'a2', ..., 'an' for a polynomial 
231         * function of order n, y = a0 + a1 * x + a2 * x^2 + ... + an * x^n,
232         * fitted to the data using a polynomial regression equation.
233         * The result is returned as an array with a length of n + 2,
234         * where double[0] --> a0, double[1] --> a1, .., double[n] --> an.
235         * and double[n + 1] is the correlation coefficient R2
236         * Reference: J. D. Faires, R. L. Burden, Numerische Methoden (german
237         * edition), pp. 243ff and 327ff.
238         *
239         * @param dataset  the dataset (<code>null</code> not permitted).
240         * @param series  the series to fit the regression line against (the series
241         *         must have at least order + 1 non-NaN items).
242         * @param order  the order of the function (> 0).
243         *
244         * @return The parameters.
245         *
246         * @since 1.0.14
247         */
248        public static double[] getPolynomialRegression(XYDataset dataset, int series, int order) {
249            if (dataset == null) {
250                throw new IllegalArgumentException("Null 'dataset' argument.");
251            }
252            int itemCount = dataset.getItemCount(series);
253            if (itemCount < order + 1) {
254                throw new IllegalArgumentException("Not enough data.");
255            }
256            int validItems = 0;
257            double[][] data = new double[2][itemCount];
258            for(int item = 0; item < itemCount; item++){
259                double x = dataset.getXValue(series, item);
260                double y = dataset.getYValue(series, item);
261                if (!Double.isNaN(x) && !Double.isNaN(y)){
262                    data[0][validItems] = x;
263                    data[1][validItems] = y;
264                    validItems++;
265                }
266            }
267            if (validItems < order + 1) {
268                throw new IllegalArgumentException("Not enough data.");
269            }
270            int equations = order + 1;
271            int coefficients = order + 2;
272            double[] result = new double[equations + 1];
273            double[][] matrix = new double[equations][coefficients];
274            double sumX = 0.0;
275            double sumY = 0.0;
276    
277            for(int item = 0; item < validItems; item++){
278                sumX += data[0][item];
279                sumY += data[1][item];
280                for(int eq = 0; eq < equations; eq++){
281                    for(int coe = 0; coe < coefficients - 1; coe++){
282                        matrix[eq][coe] += Math.pow(data[0][item],eq + coe);
283                    }
284                    matrix[eq][coefficients - 1] += data[1][item]
285                            * Math.pow(data[0][item],eq);
286                }
287            }
288            double[][] subMatrix = calculateSubMatrix(matrix);
289            for (int eq = 1; eq < equations; eq++) {
290                matrix[eq][0] = 0;
291                for (int coe = 1; coe < coefficients; coe++) {
292                    matrix[eq][coe] = subMatrix[eq - 1][coe - 1];
293                }
294            }
295            for (int eq = equations - 1; eq > -1; eq--) {
296                double value = matrix[eq][coefficients - 1];
297                for (int coe = eq; coe < coefficients -1; coe++) {
298                    value -= matrix[eq][coe] * result[coe];
299                }
300                result[eq] = value / matrix[eq][eq];
301            }
302            double meanY = sumY / validItems;
303            double yObsSquare = 0.0;
304            double yRegSquare = 0.0;
305            for (int item = 0; item < validItems; item++) {
306                double yCalc = 0;
307                for (int eq = 0; eq < equations; eq++) {
308                    yCalc += result[eq] * Math.pow(data[0][item],eq);
309                }
310                yRegSquare += Math.pow(yCalc - meanY, 2);
311                yObsSquare += Math.pow(data[1][item] - meanY, 2);
312            }
313            double rSquare = yRegSquare / yObsSquare;
314            result[equations] = rSquare;
315            return result;
316        }
317    
318        /**
319         * Returns a matrix with the following features: (1) the number of rows
320         * and columns is 1 less than that of the original matrix; (2)the matrix
321         * is triangular, i.e. all elements a (row, column) with column > row are
322         * zero.  This method is used for calculating a polynomial regression.
323         * 
324         * @param matrix  the start matrix.
325         *
326         * @return The new matrix.
327         */
328        private static double[][] calculateSubMatrix(double[][] matrix){
329            int equations = matrix.length;
330            int coefficients = matrix[0].length;
331            double[][] result = new double[equations - 1][coefficients - 1];
332            for (int eq = 1; eq < equations; eq++) {
333                double factor = matrix[0][0] / matrix[eq][0];
334                for (int coe = 1; coe < coefficients; coe++) {
335                    result[eq - 1][coe -1] = matrix[0][coe] - matrix[eq][coe]
336                            * factor;
337                }
338            }
339            if (equations == 1) {
340                return result;
341            }
342            // check for zero pivot element
343            if (result[0][0] == 0) {
344                boolean found = false;
345                for (int i = 0; i < result.length; i ++) {
346                    if (result[i][0] != 0) {
347                        found = true;
348                        double[] temp = result[0];
349                        for (int j = 0; j < result[i].length; j++) {
350                            result[0][j] = result[i][j];
351                        }
352                        for (int j = 0; j < temp.length; j++) {
353                            result[i][j] = temp[j];
354                        }
355                        break;
356                    }
357                }
358                if (!found) {
359                    System.out.println("Equation has no solution!");
360                    return new double[equations - 1][coefficients - 1];
361                }
362            }
363            double[][] subMatrix = calculateSubMatrix(result);
364            for (int eq = 1; eq < equations -  1; eq++) {
365                result[eq][0] = 0;
366                for (int coe = 1; coe < coefficients - 1; coe++) {
367                    result[eq][coe] = subMatrix[eq - 1][coe - 1];
368                }
369            }
370            return result;
371        }
372    
373    }