main.js (7485B)
1 /** 2 * @license Apache-2.0 3 * 4 * Copyright (c) 2018 The Stdlib Authors. 5 * 6 * Licensed under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 19 'use strict'; 20 21 // MODULES // 22 23 var isNumber = require( '@stdlib/assert/is-number' ).isPrimitive; 24 var isnan = require( '@stdlib/math/base/assert/is-nan' ); 25 var sqrt = require( '@stdlib/math/base/special/sqrt' ); 26 27 28 // MAIN // 29 30 /** 31 * Returns an accumulator function which incrementally computes a sample Pearson product-moment correlation coefficient. 32 * 33 * ## Method 34 * 35 * - We begin by defining the co-moment \\(C_n\\) 36 * 37 * ```tex 38 * C_n = \sum_{i=1}^{n} ( x_i - \bar{x}_n ) ( y_i - \bar{y}_n ) 39 * ``` 40 * 41 * where \\(\bar{x}_n\\) and \\(\bar{y}_n\\) are the sample means for \\(x\\) and \\(y\\), respectively. 42 * 43 * - Based on Welford's method, we know the update formulas for the sample means are given by 44 * 45 * ```tex 46 * \bar{x}_n = \bar{x}_{n-1} + \frac{x_n - \bar{x}_{n-1}}{n} 47 * ``` 48 * 49 * and 50 * 51 * ```tex 52 * \bar{y}_n = \bar{y}_{n-1} + \frac{y_n - \bar{y}_{n-1}}{n} 53 * ``` 54 * 55 * - Substituting into the equation for \\(C_n\\) and rearranging terms 56 * 57 * ```tex 58 * C_n = C_{n-1} + (x_n - \bar{x}_n) (y_n - \bar{y}_{n-1}) 59 * ``` 60 * 61 * where the apparent asymmetry arises from 62 * 63 * ```tex 64 * x_n - \bar{x}_n = \frac{n-1}{n} (x_n - \bar{x}_{n-1}) 65 * ``` 66 * 67 * and, hence, the update term can be equivalently expressed 68 * 69 * ```tex 70 * \frac{n-1}{n} (x_n - \bar{x}_{n-1}) (y_n - \bar{y}_{n-1}) 71 * ``` 72 * 73 * - The covariance can be defined 74 * 75 * ```tex 76 * \begin{align*} 77 * \operatorname{cov}_n(x,y) &= \frac{C_n}{n} \\ 78 * &= \frac{C_{n-1} + (x_n - \bar{x}_n) (y_n - \bar{y}_{n-1})}{n} \\ 79 * &= \frac{(n-1)\operatorname{cov}_{n-1}(x,y) + (x_n - \bar{x}_n) (y_n - \bar{y}_{n-1})}{n} 80 * \end{align*} 81 * ``` 82 * 83 * - Applying Bessel's correction, we arrive at an update formula for calculating an unbiased sample covariance 84 * 85 * ```tex 86 * \begin{align*} 87 * \operatorname{cov}_n(x,y) &= \frac{n}{n-1}\cdot\frac{(n-1)\operatorname{cov}_{n-1}(x,y) + (x_n - \bar{x}_n) (y_n - \bar{y}_{n-1})}{n} \\ 88 * &= \operatorname{cov}_{n-1}(x,y) + \frac{(x_n - \bar{x}_n) (y_n - \bar{y}_{n-1})}{n-1} \\ 89 * &= \frac{C_{n-1} + (x_n - \bar{x}_n) (y_n - \bar{y}_{n-1})}{n-1} 90 * &= \frac{C_{n-1} + (x_n - \bar{x}_{n-1}) (y_n - \bar{y}_n)}{n-1} 91 * \end{align*} 92 * ``` 93 * 94 * - To calculate the corrected sample standard deviation, we can use Welford's method, which can be derived as follows. We can express the variance as 95 * 96 * ```tex 97 * \begin{align*} 98 * S_n &= n \sigma_n^2 \\ 99 * &= \sum_{i=1}^{n} (x_i - \mu_n)^2 \\ 100 * &= \biggl(\sum_{i=1}^{n} x_i^2 \biggr) - n\mu_n^2 101 * \end{align*} 102 * ``` 103 * 104 * Accordingly, 105 * 106 * ```tex 107 * \begin{align*} 108 * S_n - S_{n-1} &= \sum_{i=1}^{n} x_i^2 - n\mu_n^2 - \sum_{i=1}^{n-1} x_i^2 + (n-1)\mu_{n-1}^2 \\ 109 * &= x_n^2 - n\mu_n^2 + (n-1)\mu_{n-1}^2 \\ 110 * &= x_n^2 - \mu_{n-1}^2 + n(\mu_{n-1}^2 - \mu_n^2) \\ 111 * &= x_n^2 - \mu_{n-1}^2 + n(\mu_{n-1} - \mu_n)(\mu_{n-1} + \mu_n) \\ 112 * &= x_n^2 - \mu_{n-1}^2 + (\mu_{n-1} - x_n)(\mu_{n-1} + \mu_n) \\ 113 * &= x_n^2 - \mu_{n-1}^2 + \mu_{n-1}^2 - x_n\mu_n - x_n\mu_{n-1} + \mu_n\mu_{n-1} \\ 114 * &= x_n^2 - x_n\mu_n - x_n\mu_{n-1} + \mu_n\mu_{n-1} \\ 115 * &= (x_n - \mu_{n-1})(x_n - \mu_n) \\ 116 * &= S_{n-1} + (x_n - \mu_{n-1})(x_n - \mu_n) 117 * \end{align*} 118 * ``` 119 * 120 * where we use the identity 121 * 122 * ```tex 123 * x_n - \mu_{n-1} = n (\mu_n - \mu_{n-1}) 124 * ``` 125 * 126 * - To compute the corrected sample standard deviation, we apply Bessel's correction and take the square root. 127 * 128 * - The sample Pearson product-moment correlation coefficient can thus be calculated as 129 * 130 * ```tex 131 * r = \frac{\operatorname{cov}_n(x,y)}{\sigma_x \sigma_y} 132 * ``` 133 * 134 * where \\(\sigma_x\\) and \\(\sigma_y\\) are the corrected sample standard deviations for \\(x\\) and \\(y\\), respectively. 135 * 136 * @param {number} [meanx] - mean value 137 * @param {number} [meany] - mean value 138 * @throws {TypeError} first argument must be a number primitive 139 * @throws {TypeError} second argument must be a number primitive 140 * @returns {Function} accumulator function 141 * 142 * @example 143 * var accumulator = incrpcorr(); 144 * 145 * var r = accumulator(); 146 * // returns null 147 * 148 * r = accumulator( 2.0, 1.0 ); 149 * // returns 0.0 150 * 151 * r = accumulator( -5.0, 3.14 ); 152 * // returns ~-1.0 153 * 154 * r = accumulator(); 155 * // returns ~-1.0 156 * 157 * @example 158 * var accumulator = incrpcorr( 2.0, -3.0 ); 159 */ 160 function incrpcorr( meanx, meany ) { 161 var M2x; 162 var M2y; 163 var dy1; 164 var dy2; 165 var dy; 166 var dx; 167 var mx; 168 var my; 169 var sx; 170 var sy; 171 var C; 172 var N; 173 174 M2x = 0.0; 175 M2y = 0.0; 176 C = 0.0; 177 N = 0; 178 if ( arguments.length ) { 179 if ( !isNumber( meanx ) ) { 180 throw new TypeError( 'invalid argument. First argument must be a number primitive. Value: `' + meanx + '`.' ); 181 } 182 if ( !isNumber( meany ) ) { 183 throw new TypeError( 'invalid argument. Second argument must be a number primitive. Value: `' + meany + '`.' ); 184 } 185 mx = meanx; 186 my = meany; 187 return accumulator2; 188 } 189 mx = 0.0; 190 my = 0.0; 191 return accumulator1; 192 193 /** 194 * If provided input values, the accumulator function returns an updated sample correlation coefficient. If not provided input values, the accumulator function returns the current sample correlation coefficient. 195 * 196 * @private 197 * @param {number} [x] - new value 198 * @param {number} [y] - new value 199 * @returns {(number|null)} sample correlation coefficient or null 200 */ 201 function accumulator1( x, y ) { 202 var n; 203 if ( arguments.length === 0 ) { 204 if ( N === 0 ) { 205 return null; 206 } 207 if ( N === 1 ) { 208 return ( isnan( M2x ) || isnan( M2y ) ) ? NaN : 0.0; 209 } 210 return ( C/(N-1) ) / ( sx*sy ); 211 } 212 N += 1; 213 214 dx = x - mx; 215 mx += dx / N; 216 M2x += dx * ( x-mx ); 217 218 dy1 = y - my; 219 my += dy1 / N; 220 dy2 = y - my; 221 M2y += dy2 * dy1; 222 223 C += dx * dy2; 224 if ( N < 2 ) { 225 return ( isnan( M2x ) || isnan( M2y ) ) ? NaN : 0.0; 226 } 227 n = N - 1; 228 sx = sqrt( M2x/n ); 229 sy = sqrt( M2y/n ); 230 return ( C/n ) / ( sx*sy ); // Note: why all the dividing by `N`? To avoid overflow. 231 } 232 233 /** 234 * If provided input values, the accumulator function returns an updated sample correlation coefficient. If not provided input values, the accumulator function returns the current sample correlation coefficient. 235 * 236 * @private 237 * @param {number} [x] - new value 238 * @param {number} [y] - new value 239 * @returns {(number|null)} sample correlation coefficient or null 240 */ 241 function accumulator2( x, y ) { 242 if ( arguments.length === 0 ) { 243 if ( N === 0 ) { 244 return null; 245 } 246 return ( C/N ) / ( sx*sy ); 247 } 248 N += 1; 249 250 dx = x - mx; 251 M2x += dx * dx; 252 253 dy = y - my; 254 M2y += dy * dy; 255 256 C += dx * dy; 257 sx = sqrt( M2x/N ); 258 sy = sqrt( M2y/N ); 259 return ( C/N ) / ( sx*sy ); // Note: why all the dividing by `N`? To avoid overflow. 260 } 261 } 262 263 264 // EXPORTS // 265 266 module.exports = incrpcorr;