main.js (13688B)
1 /** 2 * @license Apache-2.0 3 * 4 * Copyright (c) 2018 The Stdlib Authors. 5 * 6 * Licensed under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 19 'use strict'; 20 21 // MODULES // 22 23 var isPositiveInteger = require( '@stdlib/assert/is-positive-integer' ).isPrimitive; 24 var isSquareMatrix = require( '@stdlib/assert/is-square-matrix' ); 25 var isVectorLike = require( '@stdlib/assert/is-vector-like' ); 26 var Float64Array = require( '@stdlib/array/float64' ); 27 var sqrt = require( '@stdlib/math/base/special/sqrt' ); 28 var ctor = require( '@stdlib/ndarray/ctor' ); 29 var bctor = require( '@stdlib/ndarray/base/ctor' ); 30 var numel = require( '@stdlib/ndarray/base/numel' ); 31 32 33 // FUNCTIONS // 34 35 /** 36 * Returns a matrix. 37 * 38 * @private 39 * @param {PositiveInteger} n - matrix order 40 * @param {boolean} bool - boolean indicating whether to create a low-level ndarray 41 * @returns {ndarray} matrix 42 */ 43 function createMatrix( n, bool ) { 44 var strides; 45 var buffer; 46 var shape; 47 var f; 48 49 if ( bool ) { 50 f = bctor; 51 } else { 52 f = ctor; 53 } 54 buffer = new Float64Array( n*n ); 55 shape = [ n, n ]; 56 strides = [ n, 1 ]; 57 return f( 'float64', buffer, shape, strides, 0, 'row-major' ); 58 } 59 60 /** 61 * Sets the values along the main diagonal of a square matrix. 62 * 63 * @private 64 * @param {ndarray} matrix - input square matrix 65 * @param {number} v - value 66 * @returns {ndarray} input matrix 67 */ 68 function diagonal( matrix, v ) { 69 var M = matrix.shape[ 0 ]; 70 var i; 71 for ( i = 0; i < M; i++ ) { 72 matrix.set( i, i, v ); 73 } 74 return matrix; 75 } 76 77 /** 78 * Returns a vector. 79 * 80 * @private 81 * @param {PositiveInteger} N - number of elements 82 * @returns {ndarray} vector 83 */ 84 function createVector( N ) { 85 var strides; 86 var buffer; 87 var shape; 88 89 buffer = new Float64Array( N ); 90 shape = [ N ]; 91 strides = [ 1 ]; 92 93 return bctor( 'float64', buffer, shape, strides, 0, 'row-major' ); 94 } 95 96 97 // MAIN // 98 99 /** 100 * Returns an accumulator function which incrementally computes a sample Pearson product-moment correlation matrix. 101 * 102 * ## Method 103 * 104 * - For each sample Pearson product-moment correlation coefficient, we begin by defining the co-moment \\(C_{jn}\\) 105 * 106 * ```tex 107 * C_n = \sum_{i=1}^{n} ( x_i - \bar{x}_n ) ( y_i - \bar{y}_n ) 108 * ``` 109 * 110 * where \\(\bar{x}_n\\) and \\(\bar{y}_n\\) are the sample means for \\(x\\) and \\(y\\), respectively. 111 * 112 * - Based on Welford's method, we know the update formulas for the sample means are given by 113 * 114 * ```tex 115 * \bar{x}_n = \bar{x}_{n-1} + \frac{x_n - \bar{x}_{n-1}}{n} 116 * ``` 117 * 118 * and 119 * 120 * ```tex 121 * \bar{y}_n = \bar{y}_{n-1} + \frac{y_n - \bar{y}_{n-1}}{n} 122 * ``` 123 * 124 * - Substituting into the equation for \\(C_n\\) and rearranging terms 125 * 126 * ```tex 127 * C_n = C_{n-1} + (x_n - \bar{x}_n) (y_n - \bar{y}_{n-1}) 128 * ``` 129 * 130 * where the apparent asymmetry arises from 131 * 132 * ```tex 133 * x_n - \bar{x}_n = \frac{n-1}{n} (x_n - \bar{x}_{n-1}) 134 * ``` 135 * 136 * and, hence, the update term can be equivalently expressed 137 * 138 * ```tex 139 * \frac{n-1}{n} (x_n - \bar{x}_{n-1}) (y_n - \bar{y}_{n-1}) 140 * ``` 141 * 142 * - The covariance can be defined 143 * 144 * ```tex 145 * \begin{align*} 146 * \operatorname{cov}_n(x,y) &= \frac{C_n}{n} \\ 147 * &= \frac{C_{n-1} + \frac{n-1}{n} (x_n - \bar{x}_{n-1}) (y_n - \bar{y}_{n-1})}{n} \\ 148 * &= \frac{(n-1)\operatorname{cov}_{n-1}(x,y) + \frac{n-1}{n} (x_n - \bar{x}_{n-1}) (y_n - \bar{y}_{n-1})}{n} 149 * \end{align*} 150 * ``` 151 * 152 * - Applying Bessel's correction, we arrive at an update formula for calculating an unbiased sample covariance 153 * 154 * ```tex 155 * \begin{align*} 156 * \operatorname{cov}_n(x,y) &= \frac{n}{n-1}\cdot\frac{(n-1)\operatorname{cov}_{n-1}(x,y) + \frac{n-1}{n} (x_n - \bar{x}_{n-1}) (y_n - \bar{y}_{n-1})}{n} \\ 157 * &= \operatorname{cov}_{n-1}(x,y) + \frac{(x_n - \bar{x}_{n-1}) (y_n - \bar{y}_{n-1})}{n} \\ 158 * &= \frac{C_{n-1}}{n-1} + \frac{(x_n - \bar{x}_{n-1}) (y_n - \bar{y}_{n-1})}{n} 159 * &= \frac{C_{n-1} + \frac{n-1}{n} (x_n - \bar{x}_{n-1}) (y_n - \bar{y}_{n-1})}{n-1} 160 * \end{align*} 161 * ``` 162 * 163 * - To calculate the corrected sample standard deviation, we can use Welford's method, which can be derived as follows. We can express the variance as 164 * 165 * ```tex 166 * \begin{align*} 167 * S_n &= n \sigma_n^2 \\ 168 * &= \sum_{i=1}^{n} (x_i - \mu_n)^2 \\ 169 * &= \biggl(\sum_{i=1}^{n} x_i^2 \biggr) - n\mu_n^2 170 * \end{align*} 171 * ``` 172 * 173 * Accordingly, 174 * 175 * ```tex 176 * \begin{align*} 177 * S_n - S_{n-1} &= \sum_{i=1}^{n} x_i^2 - n\mu_n^2 - \sum_{i=1}^{n-1} x_i^2 + (n-1)\mu_{n-1}^2 \\ 178 * &= x_n^2 - n\mu_n^2 + (n-1)\mu_{n-1}^2 \\ 179 * &= x_n^2 - \mu_{n-1}^2 + n(\mu_{n-1}^2 - \mu_n^2) \\ 180 * &= x_n^2 - \mu_{n-1}^2 + n(\mu_{n-1} - \mu_n)(\mu_{n-1} + \mu_n) \\ 181 * &= x_n^2 - \mu_{n-1}^2 + (\mu_{n-1} - x_n)(\mu_{n-1} + \mu_n) \\ 182 * &= x_n^2 - \mu_{n-1}^2 + \mu_{n-1}^2 - x_n\mu_n - x_n\mu_{n-1} + \mu_n\mu_{n-1} \\ 183 * &= x_n^2 - x_n\mu_n - x_n\mu_{n-1} + \mu_n\mu_{n-1} \\ 184 * &= (x_n - \mu_{n-1})(x_n - \mu_n) \\ 185 * &= S_{n-1} + (x_n - \mu_{n-1})(x_n - \mu_n) 186 * \end{align*} 187 * ``` 188 * 189 * where we use the identity 190 * 191 * ```tex 192 * x_n - \mu_{n-1} = n (\mu_n - \mu_{n-1}) 193 * ``` 194 * 195 * - To compute the corrected sample standard deviation, we apply Bessel's correction and take the square root. 196 * 197 * - The sample Pearson product-moment correlation coefficient can thus be calculated as 198 * 199 * ```tex 200 * r = \frac{\operatorname{cov}_n(x,y)}{\sigma_x \sigma_y} 201 * ``` 202 * 203 * where \\(\sigma_x\\) and \\(\sigma_y\\) are the corrected sample standard deviations for \\(x\\) and \\(y\\), respectively. 204 * 205 * @param {(PositiveInteger|ndarray)} out - order of the correlation matrix or a square 2-dimensional output ndarray for storing the correlation matrix 206 * @param {ndarray} [means] - mean values 207 * @throws {TypeError} first argument must be either a positive integer or a 2-dimensional ndarray having equal dimensions 208 * @throws {TypeError} second argument must be a 1-dimensional ndarray 209 * @throws {Error} number of means must match correlation matrix dimensions 210 * @returns {Function} accumulator function 211 * 212 * @example 213 * var Float64Array = require( '@stdlib/array/float64' ); 214 * var ndarray = require( '@stdlib/ndarray/ctor' ); 215 * 216 * // Create an output correlation matrix: 217 * var buffer = new Float64Array( 4 ); 218 * var shape = [ 2, 2 ]; 219 * var strides = [ 2, 1 ]; 220 * var offset = 0; 221 * var order = 'row-major'; 222 * 223 * var corr = ndarray( 'float64', buffer, shape, strides, offset, order ); 224 * 225 * // Create a correlation matrix accumulator: 226 * var accumulator = incrpcorrmat( corr ); 227 * 228 * var out = accumulator(); 229 * // returns null 230 * 231 * // Create a data vector: 232 * buffer = new Float64Array( 2 ); 233 * shape = [ 2 ]; 234 * strides = [ 1 ]; 235 * 236 * var vec = ndarray( 'float64', buffer, shape, strides, offset, order ); 237 * 238 * // Provide data to the accumulator: 239 * vec.set( 0, 2.0 ); 240 * vec.set( 1, 1.0 ); 241 * 242 * out = accumulator( vec ); 243 * // returns <ndarray> 244 * 245 * var bool = ( out === corr ); 246 * // returns true 247 * 248 * vec.set( 0, -5.0 ); 249 * vec.set( 1, 3.14 ); 250 * 251 * out = accumulator( vec ); 252 * // returns <ndarray> 253 * 254 * // Retrieve the correlation matrix: 255 * out = accumulator(); 256 * // returns <ndarray> 257 */ 258 function incrpcorrmat( out, means ) { 259 var order; 260 var corr; 261 var M2; 262 var sd; 263 var mu; 264 var C; 265 var d; 266 var N; 267 268 N = 0; 269 if ( isPositiveInteger( out ) ) { 270 order = out; 271 corr = createMatrix( order, false ); 272 } else if ( isSquareMatrix( out ) ) { 273 order = out.shape[ 0 ]; 274 corr = out; 275 } else { 276 throw new TypeError( 'invalid argument. First argument must either specify the order of the correlation matrix or be a square 2-dimensional ndarray for storing the correlation matrix. Value: `' + out + '`.' ); 277 } 278 // Set the values along the correlation matrix diagonal to `1` (i.e., a random variable is always perfectly correlated with itself): 279 corr = diagonal( corr, 1.0 ); 280 281 // Create a scratch array for storing residuals (i.e., `x_i - xbar_{i-1}`): 282 d = new Float64Array( order ); 283 284 // Create a scratch array for storing second moments: 285 M2 = new Float64Array( order ); 286 287 // Create a scratch array for storing standard deviations: 288 sd = new Float64Array( order ); 289 290 // Create a low-level scratch matrix for storing co-moments: 291 C = createMatrix( order, true ); 292 293 if ( arguments.length > 1 ) { 294 if ( !isVectorLike( means ) ) { 295 throw new TypeError( 'invalid argument. Second argument must be a 1-dimensional ndarray. Value: `' + means + '`.' ); 296 } 297 if ( numel( means.shape ) !== order ) { 298 throw new Error( 'invalid argument. The number of elements (means) in the second argument must match correlation matrix dimensions. Expected: '+order+'. Actual: '+numel( means.shape )+'.' ); 299 } 300 mu = means; // TODO: should we copy this? Otherwise, internal state could be "corrupted" due to mutation outside the accumulator 301 return accumulator2; 302 } 303 // Create an ndarray vector for storing sample means (note: an ndarray interface is not necessary, but it reduces implementation complexity by ensuring a consistent abstraction for accessing and updating sample means): 304 mu = createVector( order ); 305 306 return accumulator1; 307 308 /** 309 * If provided a data vector, the accumulator function returns an updated sample correlation matrix. If not provided a data vector, the accumulator function returns the current sample correlation matrix. 310 * 311 * @private 312 * @param {ndarray} [v] - data vector 313 * @throws {TypeError} must provide a 1-dimensional ndarray 314 * @throws {Error} vector length must match correlation matrix dimensions 315 * @returns {(ndarray|null)} sample correlation matrix or null 316 */ 317 function accumulator1( v ) { 318 var denom; 319 var rdx; 320 var cij; 321 var rij; 322 var sdi; 323 var di; 324 var vi; 325 var m; 326 var n; 327 var r; 328 var i; 329 var j; 330 if ( arguments.length === 0 ) { 331 if ( N === 0 ) { 332 return null; 333 } 334 return corr; 335 } 336 if ( !isVectorLike( v ) ) { 337 throw new TypeError( 'invalid argument. Must provide a 1-dimensional ndarray. Value: `' + v + '`.' ); 338 } 339 if ( v.shape[ 0 ] !== order ) { 340 throw new Error( 'invalid argument. Vector length must match correlation matrix dimensions. Expected: '+order+'. Actual: '+v.shape[ 0 ]+'.' ); 341 } 342 n = N; 343 N += 1; 344 r = n / N; 345 346 denom = n || 1; // Bessel's correction (avoiding divide-by-zero below) 347 348 if ( N === 1 ) { 349 for ( i = 0; i < order; i++ ) { 350 vi = v.get( i ); 351 m = mu.get( i ); 352 353 // Compute the residual: 354 di = vi - m; 355 356 // Update the sample mean: 357 m += di / N; 358 mu.set( i, m ); 359 360 // Update the sample standard deviation: 361 d[ i ] = di; 362 M2[ i ] += di * ( vi-m ); 363 sd[ i ] = sqrt( M2[i]/denom ); 364 365 // Update the co-moments and correlation matrix, recognizing that the matrices are symmetric... 366 rdx = r * d[i]; // if `n=0`, `r=0.0` 367 for ( j = 0; j < i; j++ ) { 368 cij = C.get( i, j ) + ( rdx*d[j] ); 369 C.set( i, j, cij ); 370 C.set( j, i, cij ); // via symmetry 371 } 372 } 373 } else { 374 for ( i = 0; i < order; i++ ) { 375 vi = v.get( i ); 376 m = mu.get( i ); 377 378 // Compute the residual: 379 di = vi - m; 380 381 // Update the sample mean: 382 m += di / N; 383 mu.set( i, m ); 384 385 // Update the sample standard deviation: 386 d[ i ] = di; 387 M2[ i ] += di * ( vi-m ); 388 sd[ i ] = sqrt( M2[i]/denom ); 389 390 rdx = r * d[i]; 391 sdi = sd[ i ]; 392 for ( j = 0; j < i; j++ ) { 393 cij = C.get( i, j ) + ( rdx*d[j] ); 394 C.set( i, j, cij ); 395 C.set( j, i, cij ); // via symmetry 396 397 rij = ( cij/denom ) / ( sdi*sd[j] ); 398 corr.set( i, j, rij ); 399 corr.set( j, i, rij ); // via symmetry 400 } 401 } 402 } 403 return corr; 404 } 405 406 /** 407 * If provided a data vector, the accumulator function returns an updated sample correlation matrix. If not provided a data vector, the accumulator function returns the current sample correlation matrix. 408 * 409 * @private 410 * @param {ndarray} [v] - data vector 411 * @throws {TypeError} must provide a 1-dimensional ndarray 412 * @throws {Error} vector length must match correlation matrix dimensions 413 * @returns {(ndarray|null)} sample correlation matrix or null 414 */ 415 function accumulator2( v ) { 416 var rij; 417 var cij; 418 var sdi; 419 var di; 420 var i; 421 var j; 422 if ( arguments.length === 0 ) { 423 if ( N === 0 ) { 424 return null; 425 } 426 return corr; 427 } 428 if ( !isVectorLike( v ) ) { 429 throw new TypeError( 'invalid argument. Must provide a 1-dimensional ndarray. Value: `' + v + '`.' ); 430 } 431 if ( v.shape[ 0 ] !== order ) { 432 throw new Error( 'invalid argument. Vector length must match correlation matrix dimensions. Expected: '+order+'. Actual: '+v.shape[ 0 ]+'.' ); 433 } 434 N += 1; 435 for ( i = 0; i < order; i++ ) { 436 // Compute the residual: 437 di = v.get( i ) - mu.get( i ); 438 d[ i ] = di; 439 440 // Update the standard deviation: 441 M2[ i ] += di * di; 442 sd[ i ] = sqrt( M2[i]/N ); 443 444 // Update the co-moments and correlation matrix, recognizing that the matrices are symmetric... 445 sdi = sd[ i ]; 446 for ( j = 0; j < i; j++ ) { 447 cij = C.get( i, j ) + ( di*d[j] ); 448 C.set( i, j, cij ); 449 C.set( j, i, cij ); // via symmetry 450 451 rij = ( cij/N ) / ( sdi*sd[j] ); 452 corr.set( i, j, rij ); 453 corr.set( j, i, rij ); // via symmetry 454 } 455 } 456 return corr; 457 } 458 } 459 460 461 // EXPORTS // 462 463 module.exports = incrpcorrmat;