main.js - time-to-botec - Benchmark sampling in different programming languages

main.js (13688B)
      1 /**
      2 * @license Apache-2.0
      3 *
      4 * Copyright (c) 2018 The Stdlib Authors.
      5 *
      6 * Licensed under the Apache License, Version 2.0 (the "License");
      7 * you may not use this file except in compliance with the License.
      8 * You may obtain a copy of the License at
      9 *
     10 *    http://www.apache.org/licenses/LICENSE-2.0
     11 *
     12 * Unless required by applicable law or agreed to in writing, software
     13 * distributed under the License is distributed on an "AS IS" BASIS,
     14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     15 * See the License for the specific language governing permissions and
     16 * limitations under the License.
     17 */
     18 
     19 'use strict';
     20 
     21 // MODULES //
     22 
     23 var isPositiveInteger = require( '@stdlib/assert/is-positive-integer' ).isPrimitive;
     24 var isSquareMatrix = require( '@stdlib/assert/is-square-matrix' );
     25 var isVectorLike = require( '@stdlib/assert/is-vector-like' );
     26 var Float64Array = require( '@stdlib/array/float64' );
     27 var sqrt = require( '@stdlib/math/base/special/sqrt' );
     28 var ctor = require( '@stdlib/ndarray/ctor' );
     29 var bctor = require( '@stdlib/ndarray/base/ctor' );
     30 var numel = require( '@stdlib/ndarray/base/numel' );
     31 
     32 
     33 // FUNCTIONS //
     34 
     35 /**
     36 * Returns a matrix.
     37 *
     38 * @private
     39 * @param {PositiveInteger} n - matrix order
     40 * @param {boolean} bool - boolean indicating whether to create a low-level ndarray
     41 * @returns {ndarray} matrix
     42 */
     43 function createMatrix( n, bool ) {
     44 	var strides;
     45 	var buffer;
     46 	var shape;
     47 	var f;
     48 
     49 	if ( bool ) {
     50 		f = bctor;
     51 	} else {
     52 		f = ctor;
     53 	}
     54 	buffer = new Float64Array( n*n );
     55 	shape = [ n, n ];
     56 	strides = [ n, 1 ];
     57 	return f( 'float64', buffer, shape, strides, 0, 'row-major' );
     58 }
     59 
     60 /**
     61 * Sets the values along the main diagonal of a square matrix.
     62 *
     63 * @private
     64 * @param {ndarray} matrix - input square matrix
     65 * @param {number} v - value
     66 * @returns {ndarray} input matrix
     67 */
     68 function diagonal( matrix, v ) {
     69 	var M = matrix.shape[ 0 ];
     70 	var i;
     71 	for ( i = 0; i < M; i++ ) {
     72 		matrix.set( i, i, v );
     73 	}
     74 	return matrix;
     75 }
     76 
     77 /**
     78 * Returns a vector.
     79 *
     80 * @private
     81 * @param {PositiveInteger} N - number of elements
     82 * @returns {ndarray} vector
     83 */
     84 function createVector( N ) {
     85 	var strides;
     86 	var buffer;
     87 	var shape;
     88 
     89 	buffer = new Float64Array( N );
     90 	shape = [ N ];
     91 	strides = [ 1 ];
     92 
     93 	return bctor( 'float64', buffer, shape, strides, 0, 'row-major' );
     94 }
     95 
     96 
     97 // MAIN //
     98 
     99 /**
    100 * Returns an accumulator function which incrementally computes a sample Pearson product-moment correlation matrix.
    101 *
    102 * ## Method
    103 *
    104 * -   For each sample Pearson product-moment correlation coefficient, we begin by defining the co-moment \\(C_{jn}\\)
    105 *
    106 *     ```tex
    107 *     C_n = \sum_{i=1}^{n} ( x_i - \bar{x}_n ) ( y_i - \bar{y}_n )
    108 *     ```
    109 *
    110 *     where \\(\bar{x}_n\\) and \\(\bar{y}_n\\) are the sample means for \\(x\\) and \\(y\\), respectively.
    111 *
    112 * -   Based on Welford's method, we know the update formulas for the sample means are given by
    113 *
    114 *     ```tex
    115 *     \bar{x}_n = \bar{x}_{n-1} + \frac{x_n - \bar{x}_{n-1}}{n}
    116 *     ```
    117 *
    118 *     and
    119 *
    120 *     ```tex
    121 *     \bar{y}_n = \bar{y}_{n-1} + \frac{y_n - \bar{y}_{n-1}}{n}
    122 *     ```
    123 *
    124 * -   Substituting into the equation for \\(C_n\\) and rearranging terms
    125 *
    126 *     ```tex
    127 *     C_n = C_{n-1} + (x_n - \bar{x}_n) (y_n - \bar{y}_{n-1})
    128 *     ```
    129 *
    130 *     where the apparent asymmetry arises from
    131 *
    132 *     ```tex
    133 *     x_n - \bar{x}_n = \frac{n-1}{n} (x_n - \bar{x}_{n-1})
    134 *     ```
    135 *
    136 *     and, hence, the update term can be equivalently expressed
    137 *
    138 *     ```tex
    139 *     \frac{n-1}{n} (x_n - \bar{x}_{n-1}) (y_n - \bar{y}_{n-1})
    140 *     ```
    141 *
    142 * -   The covariance can be defined
    143 *
    144 *     ```tex
    145 *     \begin{align*}
    146 *     \operatorname{cov}_n(x,y) &= \frac{C_n}{n} \\
    147 *     &= \frac{C_{n-1} + \frac{n-1}{n} (x_n - \bar{x}_{n-1}) (y_n - \bar{y}_{n-1})}{n} \\
    148 *     &= \frac{(n-1)\operatorname{cov}_{n-1}(x,y) + \frac{n-1}{n} (x_n - \bar{x}_{n-1}) (y_n - \bar{y}_{n-1})}{n}
    149 *     \end{align*}
    150 *     ```
    151 *
    152 * -   Applying Bessel's correction, we arrive at an update formula for calculating an unbiased sample covariance
    153 *
    154 *     ```tex
    155 *     \begin{align*}
    156 *     \operatorname{cov}_n(x,y) &= \frac{n}{n-1}\cdot\frac{(n-1)\operatorname{cov}_{n-1}(x,y) + \frac{n-1}{n} (x_n - \bar{x}_{n-1}) (y_n - \bar{y}_{n-1})}{n} \\
    157 *     &= \operatorname{cov}_{n-1}(x,y) + \frac{(x_n - \bar{x}_{n-1}) (y_n - \bar{y}_{n-1})}{n} \\
    158 *     &= \frac{C_{n-1}}{n-1} + \frac{(x_n - \bar{x}_{n-1}) (y_n - \bar{y}_{n-1})}{n}
    159 *     &= \frac{C_{n-1} + \frac{n-1}{n} (x_n - \bar{x}_{n-1}) (y_n - \bar{y}_{n-1})}{n-1}
    160 *     \end{align*}
    161 *     ```
    162 *
    163 * -   To calculate the corrected sample standard deviation, we can use Welford's method, which can be derived as follows. We can express the variance as
    164 *
    165 *     ```tex
    166 *     \begin{align*}
    167 *     S_n &= n \sigma_n^2 \\
    168 *         &= \sum_{i=1}^{n} (x_i - \mu_n)^2 \\
    169 *         &= \biggl(\sum_{i=1}^{n} x_i^2 \biggr) - n\mu_n^2
    170 *     \end{align*}
    171 *     ```
    172 *
    173 *     Accordingly,
    174 *
    175 *     ```tex
    176 *     \begin{align*}
    177 *     S_n - S_{n-1} &= \sum_{i=1}^{n} x_i^2 - n\mu_n^2 - \sum_{i=1}^{n-1} x_i^2 + (n-1)\mu_{n-1}^2 \\
    178 *                   &= x_n^2 - n\mu_n^2 + (n-1)\mu_{n-1}^2 \\
    179 *                   &= x_n^2 - \mu_{n-1}^2 + n(\mu_{n-1}^2 - \mu_n^2) \\
    180 *                   &= x_n^2 - \mu_{n-1}^2 + n(\mu_{n-1} - \mu_n)(\mu_{n-1} + \mu_n) \\
    181 *                   &= x_n^2 - \mu_{n-1}^2 + (\mu_{n-1} - x_n)(\mu_{n-1} + \mu_n) \\
    182 *                   &= x_n^2 - \mu_{n-1}^2 + \mu_{n-1}^2 - x_n\mu_n - x_n\mu_{n-1} + \mu_n\mu_{n-1} \\
    183 *                   &= x_n^2 - x_n\mu_n - x_n\mu_{n-1} + \mu_n\mu_{n-1} \\
    184 *                   &= (x_n - \mu_{n-1})(x_n - \mu_n) \\
    185 *                   &= S_{n-1} + (x_n - \mu_{n-1})(x_n - \mu_n)
    186 *     \end{align*}
    187 *     ```
    188 *
    189 *     where we use the identity
    190 *
    191 *     ```tex
    192 *     x_n - \mu_{n-1} = n (\mu_n - \mu_{n-1})
    193 *     ```
    194 *
    195 * -   To compute the corrected sample standard deviation, we apply Bessel's correction and take the square root.
    196 *
    197 * -   The sample Pearson product-moment correlation coefficient can thus be calculated as
    198 *
    199 *     ```tex
    200 *     r = \frac{\operatorname{cov}_n(x,y)}{\sigma_x \sigma_y}
    201 *     ```
    202 *
    203 *     where \\(\sigma_x\\) and \\(\sigma_y\\) are the corrected sample standard deviations for \\(x\\) and \\(y\\), respectively.
    204 *
    205 * @param {(PositiveInteger|ndarray)} out - order of the correlation matrix or a square 2-dimensional output ndarray for storing the correlation matrix
    206 * @param {ndarray} [means] - mean values
    207 * @throws {TypeError} first argument must be either a positive integer or a 2-dimensional ndarray having equal dimensions
    208 * @throws {TypeError} second argument must be a 1-dimensional ndarray
    209 * @throws {Error} number of means must match correlation matrix dimensions
    210 * @returns {Function} accumulator function
    211 *
    212 * @example
    213 * var Float64Array = require( '@stdlib/array/float64' );
    214 * var ndarray = require( '@stdlib/ndarray/ctor' );
    215 *
    216 * // Create an output correlation matrix:
    217 * var buffer = new Float64Array( 4 );
    218 * var shape = [ 2, 2 ];
    219 * var strides = [ 2, 1 ];
    220 * var offset = 0;
    221 * var order = 'row-major';
    222 *
    223 * var corr = ndarray( 'float64', buffer, shape, strides, offset, order );
    224 *
    225 * // Create a correlation matrix accumulator:
    226 * var accumulator = incrpcorrmat( corr );
    227 *
    228 * var out = accumulator();
    229 * // returns null
    230 *
    231 * // Create a data vector:
    232 * buffer = new Float64Array( 2 );
    233 * shape = [ 2 ];
    234 * strides = [ 1 ];
    235 *
    236 * var vec = ndarray( 'float64', buffer, shape, strides, offset, order );
    237 *
    238 * // Provide data to the accumulator:
    239 * vec.set( 0, 2.0 );
    240 * vec.set( 1, 1.0 );
    241 *
    242 * out = accumulator( vec );
    243 * // returns <ndarray>
    244 *
    245 * var bool = ( out === corr );
    246 * // returns true
    247 *
    248 * vec.set( 0, -5.0 );
    249 * vec.set( 1, 3.14 );
    250 *
    251 * out = accumulator( vec );
    252 * // returns <ndarray>
    253 *
    254 * // Retrieve the correlation matrix:
    255 * out = accumulator();
    256 * // returns <ndarray>
    257 */
    258 function incrpcorrmat( out, means ) {
    259 	var order;
    260 	var corr;
    261 	var M2;
    262 	var sd;
    263 	var mu;
    264 	var C;
    265 	var d;
    266 	var N;
    267 
    268 	N = 0;
    269 	if ( isPositiveInteger( out ) ) {
    270 		order = out;
    271 		corr = createMatrix( order, false );
    272 	} else if ( isSquareMatrix( out ) ) {
    273 		order = out.shape[ 0 ];
    274 		corr = out;
    275 	} else {
    276 		throw new TypeError( 'invalid argument. First argument must either specify the order of the correlation matrix or be a square 2-dimensional ndarray for storing the correlation matrix. Value: `' + out + '`.' );
    277 	}
    278 	// Set the values along the correlation matrix diagonal to `1` (i.e., a random variable is always perfectly correlated with itself):
    279 	corr = diagonal( corr, 1.0 );
    280 
    281 	// Create a scratch array for storing residuals (i.e., `x_i - xbar_{i-1}`):
    282 	d = new Float64Array( order );
    283 
    284 	// Create a scratch array for storing second moments:
    285 	M2 = new Float64Array( order );
    286 
    287 	// Create a scratch array for storing standard deviations:
    288 	sd = new Float64Array( order );
    289 
    290 	// Create a low-level scratch matrix for storing co-moments:
    291 	C = createMatrix( order, true );
    292 
    293 	if ( arguments.length > 1 ) {
    294 		if ( !isVectorLike( means ) ) {
    295 			throw new TypeError( 'invalid argument. Second argument must be a 1-dimensional ndarray. Value: `' + means + '`.' );
    296 		}
    297 		if ( numel( means.shape ) !== order ) {
    298 			throw new Error( 'invalid argument. The number of elements (means) in the second argument must match correlation matrix dimensions. Expected: '+order+'. Actual: '+numel( means.shape )+'.' );
    299 		}
    300 		mu = means; // TODO: should we copy this? Otherwise, internal state could be "corrupted" due to mutation outside the accumulator
    301 		return accumulator2;
    302 	}
    303 	// Create an ndarray vector for storing sample means (note: an ndarray interface is not necessary, but it reduces implementation complexity by ensuring a consistent abstraction for accessing and updating sample means):
    304 	mu = createVector( order );
    305 
    306 	return accumulator1;
    307 
    308 	/**
    309 	* If provided a data vector, the accumulator function returns an updated sample correlation matrix. If not provided a data vector, the accumulator function returns the current sample correlation matrix.
    310 	*
    311 	* @private
    312 	* @param {ndarray} [v] - data vector
    313 	* @throws {TypeError} must provide a 1-dimensional ndarray
    314 	* @throws {Error} vector length must match correlation matrix dimensions
    315 	* @returns {(ndarray|null)} sample correlation matrix or null
    316 	*/
    317 	function accumulator1( v ) {
    318 		var denom;
    319 		var rdx;
    320 		var cij;
    321 		var rij;
    322 		var sdi;
    323 		var di;
    324 		var vi;
    325 		var m;
    326 		var n;
    327 		var r;
    328 		var i;
    329 		var j;
    330 		if ( arguments.length === 0 ) {
    331 			if ( N === 0 ) {
    332 				return null;
    333 			}
    334 			return corr;
    335 		}
    336 		if ( !isVectorLike( v ) ) {
    337 			throw new TypeError( 'invalid argument. Must provide a 1-dimensional ndarray. Value: `' + v + '`.' );
    338 		}
    339 		if ( v.shape[ 0 ] !== order ) {
    340 			throw new Error( 'invalid argument. Vector length must match correlation matrix dimensions. Expected: '+order+'. Actual: '+v.shape[ 0 ]+'.' );
    341 		}
    342 		n = N;
    343 		N += 1;
    344 		r = n / N;
    345 
    346 		denom = n || 1; // Bessel's correction (avoiding divide-by-zero below)
    347 
    348 		if ( N === 1 ) {
    349 			for ( i = 0; i < order; i++ ) {
    350 				vi = v.get( i );
    351 				m = mu.get( i );
    352 
    353 				// Compute the residual:
    354 				di = vi - m;
    355 
    356 				// Update the sample mean:
    357 				m += di / N;
    358 				mu.set( i, m );
    359 
    360 				// Update the sample standard deviation:
    361 				d[ i ] = di;
    362 				M2[ i ] += di * ( vi-m );
    363 				sd[ i ] = sqrt( M2[i]/denom );
    364 
    365 				// Update the co-moments and correlation matrix, recognizing that the matrices are symmetric...
    366 				rdx = r * d[i]; // if `n=0`, `r=0.0`
    367 				for ( j = 0; j < i; j++ ) {
    368 					cij = C.get( i, j ) + ( rdx*d[j] );
    369 					C.set( i, j, cij );
    370 					C.set( j, i, cij ); // via symmetry
    371 				}
    372 			}
    373 		} else {
    374 			for ( i = 0; i < order; i++ ) {
    375 				vi = v.get( i );
    376 				m = mu.get( i );
    377 
    378 				// Compute the residual:
    379 				di = vi - m;
    380 
    381 				// Update the sample mean:
    382 				m += di / N;
    383 				mu.set( i, m );
    384 
    385 				// Update the sample standard deviation:
    386 				d[ i ] = di;
    387 				M2[ i ] += di * ( vi-m );
    388 				sd[ i ] = sqrt( M2[i]/denom );
    389 
    390 				rdx = r * d[i];
    391 				sdi = sd[ i ];
    392 				for ( j = 0; j < i; j++ ) {
    393 					cij = C.get( i, j ) + ( rdx*d[j] );
    394 					C.set( i, j, cij );
    395 					C.set( j, i, cij ); // via symmetry
    396 
    397 					rij = ( cij/denom ) / ( sdi*sd[j] );
    398 					corr.set( i, j, rij );
    399 					corr.set( j, i, rij ); // via symmetry
    400 				}
    401 			}
    402 		}
    403 		return corr;
    404 	}
    405 
    406 	/**
    407 	* If provided a data vector, the accumulator function returns an updated sample correlation matrix. If not provided a data vector, the accumulator function returns the current sample correlation matrix.
    408 	*
    409 	* @private
    410 	* @param {ndarray} [v] - data vector
    411 	* @throws {TypeError} must provide a 1-dimensional ndarray
    412 	* @throws {Error} vector length must match correlation matrix dimensions
    413 	* @returns {(ndarray|null)} sample correlation matrix or null
    414 	*/
    415 	function accumulator2( v ) {
    416 		var rij;
    417 		var cij;
    418 		var sdi;
    419 		var di;
    420 		var i;
    421 		var j;
    422 		if ( arguments.length === 0 ) {
    423 			if ( N === 0 ) {
    424 				return null;
    425 			}
    426 			return corr;
    427 		}
    428 		if ( !isVectorLike( v ) ) {
    429 			throw new TypeError( 'invalid argument. Must provide a 1-dimensional ndarray. Value: `' + v + '`.' );
    430 		}
    431 		if ( v.shape[ 0 ] !== order ) {
    432 			throw new Error( 'invalid argument. Vector length must match correlation matrix dimensions. Expected: '+order+'. Actual: '+v.shape[ 0 ]+'.' );
    433 		}
    434 		N += 1;
    435 		for ( i = 0; i < order; i++ ) {
    436 			// Compute the residual:
    437 			di = v.get( i ) - mu.get( i );
    438 			d[ i ] = di;
    439 
    440 			// Update the standard deviation:
    441 			M2[ i ] += di * di;
    442 			sd[ i ] = sqrt( M2[i]/N );
    443 
    444 			// Update the co-moments and correlation matrix, recognizing that the matrices are symmetric...
    445 			sdi = sd[ i ];
    446 			for ( j = 0; j < i; j++ ) {
    447 				cij = C.get( i, j ) + ( di*d[j] );
    448 				C.set( i, j, cij );
    449 				C.set( j, i, cij ); // via symmetry
    450 
    451 				rij = ( cij/N ) / ( sdi*sd[j] );
    452 				corr.set( i, j, rij );
    453 				corr.set( j, i, rij ); // via symmetry
    454 			}
    455 		}
    456 		return corr;
    457 	}
    458 }
    459 
    460 
    461 // EXPORTS //
    462 
    463 module.exports = incrpcorrmat;
	time-to-botec Benchmark sampling in different programming languages
	Log \| Files \| Refs \| README