main.js (13820B)
1 /** 2 * @license Apache-2.0 3 * 4 * Copyright (c) 2018 The Stdlib Authors. 5 * 6 * Licensed under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 19 'use strict'; 20 21 // MODULES // 22 23 var isPositiveInteger = require( '@stdlib/assert/is-positive-integer' ).isPrimitive; 24 var isSquareMatrix = require( '@stdlib/assert/is-square-matrix' ); 25 var isVectorLike = require( '@stdlib/assert/is-vector-like' ); 26 var Float64Array = require( '@stdlib/array/float64' ); 27 var sqrt = require( '@stdlib/math/base/special/sqrt' ); 28 var ctor = require( '@stdlib/ndarray/ctor' ); 29 var bctor = require( '@stdlib/ndarray/base/ctor' ); 30 var numel = require( '@stdlib/ndarray/base/numel' ); 31 32 33 // FUNCTIONS // 34 35 /** 36 * Returns a matrix. 37 * 38 * @private 39 * @param {PositiveInteger} n - matrix order 40 * @param {boolean} bool - boolean indicating whether to create a low-level ndarray 41 * @returns {ndarray} matrix 42 */ 43 function createMatrix( n, bool ) { 44 var strides; 45 var buffer; 46 var shape; 47 var f; 48 49 if ( bool ) { 50 f = bctor; 51 } else { 52 f = ctor; 53 } 54 buffer = new Float64Array( n*n ); 55 shape = [ n, n ]; 56 strides = [ n, 1 ]; 57 return f( 'float64', buffer, shape, strides, 0, 'row-major' ); 58 } 59 60 /** 61 * Returns a vector. 62 * 63 * @private 64 * @param {PositiveInteger} N - number of elements 65 * @returns {ndarray} vector 66 */ 67 function createVector( N ) { 68 var strides; 69 var buffer; 70 var shape; 71 72 buffer = new Float64Array( N ); 73 shape = [ N ]; 74 strides = [ 1 ]; 75 76 return bctor( 'float64', buffer, shape, strides, 0, 'row-major' ); 77 } 78 79 80 // MAIN // 81 82 /** 83 * Returns an accumulator function which incrementally computes a sample Pearson product-moment correlation distance matrix. 84 * 85 * ## Method 86 * 87 * - For each sample Pearson product-moment correlation distance, we begin by defining the co-moment \\(C_{jn}\\) 88 * 89 * ```tex 90 * C_n = \sum_{i=1}^{n} ( x_i - \bar{x}_n ) ( y_i - \bar{y}_n ) 91 * ``` 92 * 93 * where \\(\bar{x}_n\\) and \\(\bar{y}_n\\) are the sample means for \\(x\\) and \\(y\\), respectively. 94 * 95 * - Based on Welford's method, we know the update formulas for the sample means are given by 96 * 97 * ```tex 98 * \bar{x}_n = \bar{x}_{n-1} + \frac{x_n - \bar{x}_{n-1}}{n} 99 * ``` 100 * 101 * and 102 * 103 * ```tex 104 * \bar{y}_n = \bar{y}_{n-1} + \frac{y_n - \bar{y}_{n-1}}{n} 105 * ``` 106 * 107 * - Substituting into the equation for \\(C_n\\) and rearranging terms 108 * 109 * ```tex 110 * C_n = C_{n-1} + (x_n - \bar{x}_n) (y_n - \bar{y}_{n-1}) 111 * ``` 112 * 113 * where the apparent asymmetry arises from 114 * 115 * ```tex 116 * x_n - \bar{x}_n = \frac{n-1}{n} (x_n - \bar{x}_{n-1}) 117 * ``` 118 * 119 * and, hence, the update term can be equivalently expressed 120 * 121 * ```tex 122 * \frac{n-1}{n} (x_n - \bar{x}_{n-1}) (y_n - \bar{y}_{n-1}) 123 * ``` 124 * 125 * - The covariance can be defined 126 * 127 * ```tex 128 * \begin{align*} 129 * \operatorname{cov}_n(x,y) &= \frac{C_n}{n} \\ 130 * &= \frac{C_{n-1} + \frac{n-1}{n} (x_n - \bar{x}_{n-1}) (y_n - \bar{y}_{n-1})}{n} \\ 131 * &= \frac{(n-1)\operatorname{cov}_{n-1}(x,y) + \frac{n-1}{n} (x_n - \bar{x}_{n-1}) (y_n - \bar{y}_{n-1})}{n} 132 * \end{align*} 133 * ``` 134 * 135 * - Applying Bessel's correction, we arrive at an update formula for calculating an unbiased sample covariance 136 * 137 * ```tex 138 * \begin{align*} 139 * \operatorname{cov}_n(x,y) &= \frac{n}{n-1}\cdot\frac{(n-1)\operatorname{cov}_{n-1}(x,y) + \frac{n-1}{n} (x_n - \bar{x}_{n-1}) (y_n - \bar{y}_{n-1})}{n} \\ 140 * &= \operatorname{cov}_{n-1}(x,y) + \frac{(x_n - \bar{x}_{n-1}) (y_n - \bar{y}_{n-1})}{n} \\ 141 * &= \frac{C_{n-1}}{n-1} + \frac{(x_n - \bar{x}_{n-1}) (y_n - \bar{y}_{n-1})}{n} 142 * &= \frac{C_{n-1} + \frac{n-1}{n} (x_n - \bar{x}_{n-1}) (y_n - \bar{y}_{n-1})}{n-1} 143 * \end{align*} 144 * ``` 145 * 146 * - To calculate the corrected sample standard deviation, we can use Welford's method, which can be derived as follows. We can express the variance as 147 * 148 * ```tex 149 * \begin{align*} 150 * S_n &= n \sigma_n^2 \\ 151 * &= \sum_{i=1}^{n} (x_i - \mu_n)^2 \\ 152 * &= \biggl(\sum_{i=1}^{n} x_i^2 \biggr) - n\mu_n^2 153 * \end{align*} 154 * ``` 155 * 156 * Accordingly, 157 * 158 * ```tex 159 * \begin{align*} 160 * S_n - S_{n-1} &= \sum_{i=1}^{n} x_i^2 - n\mu_n^2 - \sum_{i=1}^{n-1} x_i^2 + (n-1)\mu_{n-1}^2 \\ 161 * &= x_n^2 - n\mu_n^2 + (n-1)\mu_{n-1}^2 \\ 162 * &= x_n^2 - \mu_{n-1}^2 + n(\mu_{n-1}^2 - \mu_n^2) \\ 163 * &= x_n^2 - \mu_{n-1}^2 + n(\mu_{n-1} - \mu_n)(\mu_{n-1} + \mu_n) \\ 164 * &= x_n^2 - \mu_{n-1}^2 + (\mu_{n-1} - x_n)(\mu_{n-1} + \mu_n) \\ 165 * &= x_n^2 - \mu_{n-1}^2 + \mu_{n-1}^2 - x_n\mu_n - x_n\mu_{n-1} + \mu_n\mu_{n-1} \\ 166 * &= x_n^2 - x_n\mu_n - x_n\mu_{n-1} + \mu_n\mu_{n-1} \\ 167 * &= (x_n - \mu_{n-1})(x_n - \mu_n) \\ 168 * &= S_{n-1} + (x_n - \mu_{n-1})(x_n - \mu_n) 169 * \end{align*} 170 * ``` 171 * 172 * where we use the identity 173 * 174 * ```tex 175 * x_n - \mu_{n-1} = n (\mu_n - \mu_{n-1}) 176 * ``` 177 * 178 * - To compute the corrected sample standard deviation, we apply Bessel's correction and take the square root. 179 * 180 * - The sample Pearson product-moment correlation coefficient can thus be calculated as 181 * 182 * ```tex 183 * r = \frac{\operatorname{cov}_n(x,y)}{\sigma_x \sigma_y} 184 * ``` 185 * 186 * where \\(\sigma_x\\) and \\(\sigma_y\\) are the corrected sample standard deviations for \\(x\\) and \\(y\\), respectively. 187 * 188 * - The sample Pearson product-moment correlation distance is defined as 189 * 190 * ```tex 191 * d = 1 - r = 1 - \frac{\operatorname{cov}_n(x,y)}{\sigma_x \sigma_y} 192 * ``` 193 * 194 * - The implementation thus computes each sample Pearson product-moment correlation coefficient \\(r\\) and subtracts each coefficient from 1. 195 * 196 * @param {(PositiveInteger|ndarray)} out - order of the correlation distance matrix or a square 2-dimensional output ndarray for storing the correlation distance matrix 197 * @param {ndarray} [means] - mean values 198 * @throws {TypeError} first argument must be either a positive integer or a 2-dimensional ndarray having equal dimensions 199 * @throws {TypeError} second argument must be a 1-dimensional ndarray 200 * @throws {Error} number of means must match correlation distance matrix dimensions 201 * @returns {Function} accumulator function 202 * 203 * @example 204 * var Float64Array = require( '@stdlib/array/float64' ); 205 * var ndarray = require( '@stdlib/ndarray/ctor' ); 206 * 207 * // Create an output correlation distance matrix: 208 * var buffer = new Float64Array( 4 ); 209 * var shape = [ 2, 2 ]; 210 * var strides = [ 2, 1 ]; 211 * var offset = 0; 212 * var order = 'row-major'; 213 * 214 * var dist = ndarray( 'float64', buffer, shape, strides, offset, order ); 215 * 216 * // Create a correlation distance matrix accumulator: 217 * var accumulator = incrpcorrdistmat( dist ); 218 * 219 * var out = accumulator(); 220 * // returns null 221 * 222 * // Create a data vector: 223 * buffer = new Float64Array( 2 ); 224 * shape = [ 2 ]; 225 * strides = [ 1 ]; 226 * 227 * var vec = ndarray( 'float64', buffer, shape, strides, offset, order ); 228 * 229 * // Provide data to the accumulator: 230 * vec.set( 0, 2.0 ); 231 * vec.set( 1, 1.0 ); 232 * 233 * out = accumulator( vec ); 234 * // returns <ndarray> 235 * 236 * var bool = ( out === dist ); 237 * // returns true 238 * 239 * vec.set( 0, -5.0 ); 240 * vec.set( 1, 3.14 ); 241 * 242 * out = accumulator( vec ); 243 * // returns <ndarray> 244 * 245 * // Retrieve the correlation distance matrix: 246 * out = accumulator(); 247 * // returns <ndarray> 248 */ 249 function incrpcorrdistmat( out, means ) { 250 var order; 251 var dist; 252 var M2; 253 var sd; 254 var mu; 255 var C; 256 var d; 257 var N; 258 259 N = 0; 260 if ( isPositiveInteger( out ) ) { 261 order = out; 262 dist = createMatrix( order, false ); 263 } else if ( isSquareMatrix( out ) ) { 264 order = out.shape[ 0 ]; 265 dist = out; 266 } else { 267 throw new TypeError( 'invalid argument. First argument must either specify the order of the correlation distance matrix or be a square 2-dimensional ndarray for storing the correlation distance matrix. Value: `' + out + '`.' ); 268 } 269 // Create a scratch array for storing residuals (i.e., `x_i - xbar_{i-1}`): 270 d = new Float64Array( order ); 271 272 // Create a scratch array for storing second moments: 273 M2 = new Float64Array( order ); 274 275 // Create a scratch array for storing standard deviations: 276 sd = new Float64Array( order ); 277 278 // Create a low-level scratch matrix for storing co-moments: 279 C = createMatrix( order, true ); 280 281 if ( arguments.length > 1 ) { 282 if ( !isVectorLike( means ) ) { 283 throw new TypeError( 'invalid argument. Second argument must be a 1-dimensional ndarray. Value: `' + means + '`.' ); 284 } 285 if ( numel( means.shape ) !== order ) { 286 throw new Error( 'invalid argument. The number of elements (means) in the second argument must match correlation distance matrix dimensions. Expected: '+order+'. Actual: '+numel( means.shape )+'.' ); 287 } 288 mu = means; // TODO: should we copy this? Otherwise, internal state could be "corrupted" due to mutation outside the accumulator 289 return accumulator2; 290 } 291 // Create an ndarray vector for storing sample means (note: an ndarray interface is not necessary, but it reduces implementation complexity by ensuring a consistent abstraction for accessing and updating sample means): 292 mu = createVector( order ); 293 294 return accumulator1; 295 296 /** 297 * If provided a data vector, the accumulator function returns an updated sample correlation distance matrix. If not provided a data vector, the accumulator function returns the current sample correlation distance matrix. 298 * 299 * @private 300 * @param {ndarray} [v] - data vector 301 * @throws {TypeError} must provide a 1-dimensional ndarray 302 * @throws {Error} vector length must match correlation distance matrix dimensions 303 * @returns {(ndarray|null)} sample correlation distance matrix or null 304 */ 305 function accumulator1( v ) { 306 var denom; 307 var rdx; 308 var cij; 309 var dij; 310 var sdi; 311 var di; 312 var vi; 313 var m; 314 var n; 315 var r; 316 var i; 317 var j; 318 if ( arguments.length === 0 ) { 319 if ( N === 0 ) { 320 return null; 321 } 322 return dist; 323 } 324 if ( !isVectorLike( v ) ) { 325 throw new TypeError( 'invalid argument. Must provide a 1-dimensional ndarray. Value: `' + v + '`.' ); 326 } 327 if ( v.shape[ 0 ] !== order ) { 328 throw new Error( 'invalid argument. Vector length must match correlation matrix dimensions. Expected: '+order+'. Actual: '+v.shape[ 0 ]+'.' ); 329 } 330 n = N; 331 N += 1; 332 r = n / N; 333 334 denom = n || 1; // Bessel's correction (avoiding divide-by-zero below) 335 336 if ( N === 1 ) { 337 for ( i = 0; i < order; i++ ) { 338 vi = v.get( i ); 339 m = mu.get( i ); 340 341 // Compute the residual: 342 di = vi - m; 343 344 // Update the sample mean: 345 m += di / N; 346 mu.set( i, m ); 347 348 // Update the sample standard deviation: 349 d[ i ] = di; 350 M2[ i ] += di * ( vi-m ); 351 sd[ i ] = sqrt( M2[i]/denom ); 352 353 // Update the co-moments and correlation distance matrix, recognizing that the matrices are symmetric... 354 rdx = r * d[i]; // if `n=0`, `r=0.0` 355 for ( j = 0; j <= i; j++ ) { 356 cij = C.get( i, j ) + ( rdx*d[j] ); 357 C.set( i, j, cij ); 358 C.set( j, i, cij ); // via symmetry 359 } 360 } 361 } else { 362 for ( i = 0; i < order; i++ ) { 363 vi = v.get( i ); 364 m = mu.get( i ); 365 366 // Compute the residual: 367 di = vi - m; 368 369 // Update the sample mean: 370 m += di / N; 371 mu.set( i, m ); 372 373 // Update the sample standard deviation: 374 d[ i ] = di; 375 M2[ i ] += di * ( vi-m ); 376 sd[ i ] = sqrt( M2[i]/denom ); 377 378 // Update the co-moments and correlation distance matrix, recognizing that the matrices are symmetric... 379 rdx = r * d[i]; 380 sdi = sd[ i ]; 381 for ( j = 0; j < i; j++ ) { 382 cij = C.get( i, j ) + ( rdx*d[j] ); 383 C.set( i, j, cij ); 384 C.set( j, i, cij ); // via symmetry 385 386 dij = 1.0 - ( (cij/denom)/(sdi*sd[j]) ); 387 dist.set( i, j, dij ); 388 dist.set( j, i, dij ); // via symmetry 389 } 390 } 391 } 392 return dist; 393 } 394 395 /** 396 * If provided a data vector, the accumulator function returns an updated sample correlation distance matrix. If not provided a data vector, the accumulator function returns the current sample correlation distance matrix. 397 * 398 * @private 399 * @param {ndarray} [v] - data vector 400 * @throws {TypeError} must provide a 1-dimensional ndarray 401 * @throws {Error} vector length must match correlation distance matrix dimensions 402 * @returns {(ndarray|null)} sample correlation distance matrix or null 403 */ 404 function accumulator2( v ) { 405 var dij; 406 var cij; 407 var sdi; 408 var di; 409 var i; 410 var j; 411 if ( arguments.length === 0 ) { 412 if ( N === 0 ) { 413 return null; 414 } 415 return dist; 416 } 417 if ( !isVectorLike( v ) ) { 418 throw new TypeError( 'invalid argument. Must provide a 1-dimensional ndarray. Value: `' + v + '`.' ); 419 } 420 if ( v.shape[ 0 ] !== order ) { 421 throw new Error( 'invalid argument. Vector length must match correlation distance matrix dimensions. Expected: '+order+'. Actual: '+v.shape[ 0 ]+'.' ); 422 } 423 N += 1; 424 for ( i = 0; i < order; i++ ) { 425 // Compute the residual: 426 di = v.get( i ) - mu.get( i ); 427 428 // Update standard deviation: 429 d[ i ] = di; 430 M2[ i ] += di * di; 431 sd[ i ] = sqrt( M2[i]/N ); 432 433 // Update the co-moments and correlation distance matrix, recognizing that the matrices are symmetric... 434 sdi = sd[ i ]; 435 for ( j = 0; j < i; j++ ) { 436 cij = C.get( i, j ) + ( di*d[j] ); 437 C.set( i, j, cij ); 438 C.set( j, i, cij ); // via symmetry 439 440 dij = 1.0 - ( (cij/N)/(sdi*sd[j]) ); 441 dist.set( i, j, dij ); 442 dist.set( j, i, dij ); // via symmetry 443 } 444 } 445 return dist; 446 } 447 } 448 449 450 // EXPORTS // 451 452 module.exports = incrpcorrdistmat;