time-to-botec

Benchmark sampling in different programming languages
Log | Files | Refs | README

10d_blocked_accessors.js (9424B)


      1 /**
      2 * @license Apache-2.0
      3 *
      4 * Copyright (c) 2021 The Stdlib Authors.
      5 *
      6 * Licensed under the Apache License, Version 2.0 (the "License");
      7 * you may not use this file except in compliance with the License.
      8 * You may obtain a copy of the License at
      9 *
     10 *    http://www.apache.org/licenses/LICENSE-2.0
     11 *
     12 * Unless required by applicable law or agreed to in writing, software
     13 * distributed under the License is distributed on an "AS IS" BASIS,
     14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     15 * See the License for the specific language governing permissions and
     16 * limitations under the License.
     17 */
     18 
     19 /* eslint-disable max-depth, max-len */
     20 
     21 'use strict';
     22 
     23 // MODULES //
     24 
     25 var init = require( './init.js' );
     26 
     27 
     28 // MAIN //
     29 
     30 /**
     31 * Applies a unary callback to elements in a ten-dimensional input ndarray and assigns results to elements in an equivalently shaped output ndarray via loop blocking.
     32 *
     33 * @private
     34 * @param {Object} x - object containing input ndarray meta data
     35 * @param {string} x.dtype - data type
     36 * @param {Collection} x.data - data buffer
     37 * @param {NonNegativeIntegerArray} x.shape - dimensions
     38 * @param {IntegerArray} x.strides - stride lengths
     39 * @param {NonNegativeInteger} x.offset - index offset
     40 * @param {string} x.order - specifies whether `x` is row-major (C-style) or column-major (Fortran-style)
     41 * @param {Function} x.getter - callback for accessing `x` data buffer elements
     42 * @param {Object} y - object containing output ndarray meta data
     43 * @param {string} y.dtype - data type
     44 * @param {Collection} y.data - data buffer
     45 * @param {NonNegativeIntegerArray} y.shape - dimensions
     46 * @param {IntegerArray} y.strides - stride lengths
     47 * @param {NonNegativeInteger} y.offset - index offset
     48 * @param {string} y.order - specifies whether `y` is row-major (C-style) or column-major (Fortran-style)
     49 * @param {Function} y.setter - callback for setting `y` data buffer elements
     50 * @param {Callback} fcn - unary callback
     51 *
     52 * @example
     53 * var Complex64Array = require( '@stdlib/array/complex64' );
     54 * var Complex64 = require( '@stdlib/complex/float32' );
     55 * var real = require( '@stdlib/complex/real' );
     56 * var imag = require( '@stdlib/complex/imag' );
     57 *
     58 * function scale( z ) {
     59 *     return new Complex64( real(z)*10.0, imag(z)*10.0 );
     60 * }
     61 *
     62 * // Create data buffers:
     63 * var xbuf = new Complex64Array( [ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0 ] );
     64 * var ybuf = new Complex64Array( 4 );
     65 *
     66 * // Define the shape of the input and output arrays:
     67 * var shape = [ 1, 1, 1, 1, 1, 1, 1, 1, 2, 2 ];
     68 *
     69 * // Define the array strides:
     70 * var sx = [ 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 ];
     71 * var sy = [ 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 ];
     72 *
     73 * // Define the index offsets:
     74 * var ox = 0;
     75 * var oy = 0;
     76 *
     77 * // Define getters and setters:
     78 * function getter( buf, idx ) {
     79 *     return buf.get( idx );
     80 * }
     81 *
     82 * function setter( buf, idx, value ) {
     83 *     buf.set( value, idx );
     84 * }
     85 *
     86 * // Create the input and output ndarray-like objects:
     87 * var x = {
     88 *     'dtype': 'complex64',
     89 *     'data': xbuf,
     90 *     'shape': shape,
     91 *     'strides': sx,
     92 *     'offset': ox,
     93 *     'order': 'row-major',
     94 *     'getter': getter
     95 * };
     96 * var y = {
     97 *     'dtype': 'complex64',
     98 *     'data': ybuf,
     99 *     'shape': shape,
    100 *     'strides': sy,
    101 *     'offset': oy,
    102 *     'order': 'row-major',
    103 *     'setter': setter
    104 * };
    105 *
    106 * // Apply the unary function:
    107 * blockedunary10d( x, y, scale );
    108 *
    109 * var v = y.data.get( 0 );
    110 *
    111 * var re = real( v );
    112 * // returns 10.0
    113 *
    114 * var im = imag( v );
    115 * // returns 20.0
    116 */
    117 function blockedunary10d( x, y, fcn ) { // eslint-disable-line max-statements, max-lines-per-function
    118 	var bsize;
    119 	var xbuf;
    120 	var ybuf;
    121 	var get;
    122 	var set;
    123 	var dx0;
    124 	var dx1;
    125 	var dx2;
    126 	var dx3;
    127 	var dx4;
    128 	var dx5;
    129 	var dx6;
    130 	var dx7;
    131 	var dx8;
    132 	var dx9;
    133 	var dy0;
    134 	var dy1;
    135 	var dy2;
    136 	var dy3;
    137 	var dy4;
    138 	var dy5;
    139 	var dy6;
    140 	var dy7;
    141 	var dy8;
    142 	var dy9;
    143 	var ox1;
    144 	var ox2;
    145 	var ox3;
    146 	var ox4;
    147 	var ox5;
    148 	var ox6;
    149 	var ox7;
    150 	var ox8;
    151 	var ox9;
    152 	var oy1;
    153 	var oy2;
    154 	var oy3;
    155 	var oy4;
    156 	var oy5;
    157 	var oy6;
    158 	var oy7;
    159 	var oy8;
    160 	var oy9;
    161 	var sh;
    162 	var s0;
    163 	var s1;
    164 	var s2;
    165 	var s3;
    166 	var s4;
    167 	var s5;
    168 	var s6;
    169 	var s7;
    170 	var s8;
    171 	var s9;
    172 	var sx;
    173 	var sy;
    174 	var ox;
    175 	var oy;
    176 	var ix;
    177 	var iy;
    178 	var i0;
    179 	var i1;
    180 	var i2;
    181 	var i3;
    182 	var i4;
    183 	var i5;
    184 	var i6;
    185 	var i7;
    186 	var i8;
    187 	var i9;
    188 	var j0;
    189 	var j1;
    190 	var j2;
    191 	var j3;
    192 	var j4;
    193 	var j5;
    194 	var j6;
    195 	var j7;
    196 	var j8;
    197 	var j9;
    198 	var o;
    199 
    200 	// Note on variable naming convention: s#, dx#, dy#, i#, j# where # corresponds to the loop number, with `0` being the innermost loop...
    201 
    202 	// Initialize and unpack block data:
    203 	o = init( x, y );
    204 	sh = o.sh;
    205 	sx = o.sx;
    206 	sy = o.sy;
    207 	bsize = o.bsize;
    208 
    209 	// Cache the indices of the first indexed elements in the respective ndarrays...
    210 	ox = x.offset;
    211 	oy = y.offset;
    212 
    213 	// Cache references to the input and output ndarray buffers...
    214 	xbuf = x.data;
    215 	ybuf = y.data;
    216 
    217 	// Cache offset increments for the innermost loop...
    218 	dx0 = sx[0];
    219 	dy0 = sy[0];
    220 
    221 	// Cache accessors:
    222 	get = x.getter;
    223 	set = y.setter;
    224 
    225 	// Iterate over blocks...
    226 	for ( j9 = sh[9]; j9 > 0; ) {
    227 		if ( j9 < bsize ) {
    228 			s9 = j9;
    229 			j9 = 0;
    230 		} else {
    231 			s9 = bsize;
    232 			j9 -= bsize;
    233 		}
    234 		ox9 = ox + ( j9*sx[9] );
    235 		oy9 = oy + ( j9*sy[9] );
    236 		for ( j8 = sh[8]; j8 > 0; ) {
    237 			if ( j8 < bsize ) {
    238 				s8 = j8;
    239 				j8 = 0;
    240 			} else {
    241 				s8 = bsize;
    242 				j8 -= bsize;
    243 			}
    244 			dx9 = sx[9] - ( s8*sx[8] );
    245 			dy9 = sy[9] - ( s8*sy[8] );
    246 			ox8 = ox9 + ( j8*sx[8] );
    247 			oy8 = oy9 + ( j8*sy[8] );
    248 			for ( j7 = sh[7]; j7 > 0; ) {
    249 				if ( j7 < bsize ) {
    250 					s7 = j7;
    251 					j7 = 0;
    252 				} else {
    253 					s7 = bsize;
    254 					j7 -= bsize;
    255 				}
    256 				dx8 = sx[8] - ( s7*sx[7] );
    257 				dy8 = sy[8] - ( s7*sy[7] );
    258 				ox7 = ox8 + ( j7*sx[7] );
    259 				oy7 = oy8 + ( j7*sy[7] );
    260 				for ( j6 = sh[6]; j6 > 0; ) {
    261 					if ( j6 < bsize ) {
    262 						s6 = j6;
    263 						j6 = 0;
    264 					} else {
    265 						s6 = bsize;
    266 						j6 -= bsize;
    267 					}
    268 					dx7 = sx[7] - ( s6*sx[6] );
    269 					dy7 = sy[7] - ( s6*sy[6] );
    270 					ox6 = ox7 + ( j6*sx[6] );
    271 					oy6 = oy7 + ( j6*sy[6] );
    272 					for ( j5 = sh[5]; j5 > 0; ) {
    273 						if ( j5 < bsize ) {
    274 							s5 = j5;
    275 							j5 = 0;
    276 						} else {
    277 							s5 = bsize;
    278 							j5 -= bsize;
    279 						}
    280 						dx6 = sx[6] - ( s5*sx[5] );
    281 						dy6 = sy[6] - ( s5*sy[5] );
    282 						ox5 = ox6 + ( j5*sx[5] );
    283 						oy5 = oy6 + ( j5*sy[5] );
    284 						for ( j4 = sh[4]; j4 > 0; ) {
    285 							if ( j4 < bsize ) {
    286 								s4 = j4;
    287 								j4 = 0;
    288 							} else {
    289 								s4 = bsize;
    290 								j4 -= bsize;
    291 							}
    292 							dx5 = sx[5] - ( s4*sx[4] );
    293 							dy5 = sy[5] - ( s4*sy[4] );
    294 							ox4 = ox5 + ( j4*sx[4] );
    295 							oy4 = oy5 + ( j4*sy[4] );
    296 							for ( j3 = sh[3]; j3 > 0; ) {
    297 								if ( j3 < bsize ) {
    298 									s3 = j3;
    299 									j3 = 0;
    300 								} else {
    301 									s3 = bsize;
    302 									j3 -= bsize;
    303 								}
    304 								dx4 = sx[4] - ( s3*sx[3] );
    305 								dy4 = sy[4] - ( s3*sy[3] );
    306 								ox3 = ox4 + ( j3*sx[3] );
    307 								oy3 = oy4 + ( j3*sy[3] );
    308 								for ( j2 = sh[2]; j2 > 0; ) {
    309 									if ( j2 < bsize ) {
    310 										s2 = j2;
    311 										j2 = 0;
    312 									} else {
    313 										s2 = bsize;
    314 										j2 -= bsize;
    315 									}
    316 									dx3 = sx[3] - ( s2*sx[2] );
    317 									dy3 = sy[3] - ( s2*sy[2] );
    318 									ox2 = ox3 + ( j2*sx[2] );
    319 									oy2 = oy3 + ( j2*sy[2] );
    320 									for ( j1 = sh[1]; j1 > 0; ) {
    321 										if ( j1 < bsize ) {
    322 											s1 = j1;
    323 											j1 = 0;
    324 										} else {
    325 											s1 = bsize;
    326 											j1 -= bsize;
    327 										}
    328 										dx2 = sx[2] - ( s1*sx[1] );
    329 										dy2 = sy[2] - ( s1*sy[1] );
    330 										ox1 = ox2 + ( j1*sx[1] );
    331 										oy1 = oy2 + ( j1*sy[1] );
    332 										for ( j0 = sh[0]; j0 > 0; ) {
    333 											if ( j0 < bsize ) {
    334 												s0 = j0;
    335 												j0 = 0;
    336 											} else {
    337 												s0 = bsize;
    338 												j0 -= bsize;
    339 											}
    340 											// Compute index offsets for the first input and output ndarray elements in the current block...
    341 											ix = ox1 + ( j0*sx[0] );
    342 											iy = oy1 + ( j0*sy[0] );
    343 
    344 											// Compute loop offset increments...
    345 											dx1 = sx[1] - ( s0*sx[0] );
    346 											dy1 = sy[1] - ( s0*sy[0] );
    347 
    348 											// Iterate over the ndarray dimensions...
    349 											for ( i9 = 0; i9 < s9; i9++ ) {
    350 												for ( i8 = 0; i8 < s8; i8++ ) {
    351 													for ( i7 = 0; i7 < s7; i7++ ) {
    352 														for ( i6 = 0; i6 < s6; i6++ ) {
    353 															for ( i5 = 0; i5 < s5; i5++ ) {
    354 																for ( i4 = 0; i4 < s4; i4++ ) {
    355 																	for ( i3 = 0; i3 < s3; i3++ ) {
    356 																		for ( i2 = 0; i2 < s2; i2++ ) {
    357 																			for ( i1 = 0; i1 < s1; i1++ ) {
    358 																				for ( i0 = 0; i0 < s0; i0++ ) {
    359 																					set( ybuf, iy, fcn( get( xbuf, ix ) ) );
    360 																					ix += dx0;
    361 																					iy += dy0;
    362 																				}
    363 																				ix += dx1;
    364 																				iy += dy1;
    365 																			}
    366 																			ix += dx2;
    367 																			iy += dy2;
    368 																		}
    369 																		ix += dx3;
    370 																		iy += dy3;
    371 																	}
    372 																	ix += dx4;
    373 																	iy += dy4;
    374 																}
    375 																ix += dx5;
    376 																iy += dy5;
    377 															}
    378 															ix += dx6;
    379 															iy += dy6;
    380 														}
    381 														ix += dx7;
    382 														iy += dy7;
    383 													}
    384 													ix += dx8;
    385 													iy += dy8;
    386 												}
    387 												ix += dx9;
    388 												iy += dy9;
    389 											}
    390 										}
    391 									}
    392 								}
    393 							}
    394 						}
    395 					}
    396 				}
    397 			}
    398 		}
    399 	}
    400 }
    401 
    402 
    403 // EXPORTS //
    404 
    405 module.exports = blockedunary10d;