time-to-botec

Benchmark sampling in different programming languages
Log | Files | Refs | README

9d_blocked.js (8153B)


      1 /**
      2 * @license Apache-2.0
      3 *
      4 * Copyright (c) 2021 The Stdlib Authors.
      5 *
      6 * Licensed under the Apache License, Version 2.0 (the "License");
      7 * you may not use this file except in compliance with the License.
      8 * You may obtain a copy of the License at
      9 *
     10 *    http://www.apache.org/licenses/LICENSE-2.0
     11 *
     12 * Unless required by applicable law or agreed to in writing, software
     13 * distributed under the License is distributed on an "AS IS" BASIS,
     14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     15 * See the License for the specific language governing permissions and
     16 * limitations under the License.
     17 */
     18 
     19 /* eslint-disable max-depth, max-len */
     20 
     21 'use strict';
     22 
     23 // MODULES //
     24 
     25 var init = require( './init.js' );
     26 
     27 
     28 // MAIN //
     29 
     30 /**
     31 * Applies a unary callback to elements in a nine-dimensional input ndarray and assigns results to elements in an equivalently shaped output ndarray via loop blocking.
     32 *
     33 * @private
     34 * @param {Object} x - object containing input ndarray meta data
     35 * @param {string} x.dtype - data type
     36 * @param {Collection} x.data - data buffer
     37 * @param {NonNegativeIntegerArray} x.shape - dimensions
     38 * @param {IntegerArray} x.strides - stride lengths
     39 * @param {NonNegativeInteger} x.offset - index offset
     40 * @param {string} x.order - specifies whether `x` is row-major (C-style) or column-major (Fortran-style)
     41 * @param {Object} y - object containing output ndarray meta data
     42 * @param {string} y.dtype - data type
     43 * @param {Collection} y.data - data buffer
     44 * @param {NonNegativeIntegerArray} y.shape - dimensions
     45 * @param {IntegerArray} y.strides - stride lengths
     46 * @param {NonNegativeInteger} y.offset - index offset
     47 * @param {string} y.order - specifies whether `y` is row-major (C-style) or column-major (Fortran-style)
     48 * @param {Callback} fcn - unary callback
     49 *
     50 * @example
     51 * var Float64Array = require( '@stdlib/array/float64' );
     52 *
     53 * function scale( x ) {
     54 *     return x * 10.0;
     55 * }
     56 *
     57 * // Create data buffers:
     58 * var xbuf = new Float64Array( [ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 ] );
     59 * var ybuf = new Float64Array( 6 );
     60 *
     61 * // Define the shape of the input and output arrays:
     62 * var shape = [ 1, 1, 1, 1, 1, 1, 3, 1, 2 ];
     63 *
     64 * // Define the array strides:
     65 * var sx = [ 12, 12, 12, 12, 12, 12, 4, 4, 1 ];
     66 * var sy = [ 6, 6, 6, 6, 6, 6, 2, 2, 1 ];
     67 *
     68 * // Define the index offsets:
     69 * var ox = 1;
     70 * var oy = 0;
     71 *
     72 * // Create the input and output ndarray-like objects:
     73 * var x = {
     74 *     'dtype': 'float64',
     75 *     'data': xbuf,
     76 *     'shape': shape,
     77 *     'strides': sx,
     78 *     'offset': ox,
     79 *     'order': 'row-major'
     80 * };
     81 * var y = {
     82 *     'dtype': 'float64',
     83 *     'data': ybuf,
     84 *     'shape': shape,
     85 *     'strides': sy,
     86 *     'offset': oy,
     87 *     'order': 'row-major'
     88 * };
     89 *
     90 * // Apply the unary function:
     91 * blockedunary9d( x, y, scale );
     92 *
     93 * console.log( y.data );
     94 * // => <Float64Array>[ 20.0, 30.0, 60.0, 70.0, 100.0, 110.0 ]
     95 */
     96 function blockedunary9d( x, y, fcn ) { // eslint-disable-line max-statements, max-lines-per-function
     97 	var bsize;
     98 	var xbuf;
     99 	var ybuf;
    100 	var dx0;
    101 	var dx1;
    102 	var dx2;
    103 	var dx3;
    104 	var dx4;
    105 	var dx5;
    106 	var dx6;
    107 	var dx7;
    108 	var dx8;
    109 	var dy0;
    110 	var dy1;
    111 	var dy2;
    112 	var dy3;
    113 	var dy4;
    114 	var dy5;
    115 	var dy6;
    116 	var dy7;
    117 	var dy8;
    118 	var ox1;
    119 	var ox2;
    120 	var ox3;
    121 	var ox4;
    122 	var ox5;
    123 	var ox6;
    124 	var ox7;
    125 	var ox8;
    126 	var oy1;
    127 	var oy2;
    128 	var oy3;
    129 	var oy4;
    130 	var oy5;
    131 	var oy6;
    132 	var oy7;
    133 	var oy8;
    134 	var sh;
    135 	var s0;
    136 	var s1;
    137 	var s2;
    138 	var s3;
    139 	var s4;
    140 	var s5;
    141 	var s6;
    142 	var s7;
    143 	var s8;
    144 	var sx;
    145 	var sy;
    146 	var ox;
    147 	var oy;
    148 	var ix;
    149 	var iy;
    150 	var i0;
    151 	var i1;
    152 	var i2;
    153 	var i3;
    154 	var i4;
    155 	var i5;
    156 	var i6;
    157 	var i7;
    158 	var i8;
    159 	var j0;
    160 	var j1;
    161 	var j2;
    162 	var j3;
    163 	var j4;
    164 	var j5;
    165 	var j6;
    166 	var j7;
    167 	var j8;
    168 	var o;
    169 
    170 	// Note on variable naming convention: s#, dx#, dy#, i#, j# where # corresponds to the loop number, with `0` being the innermost loop...
    171 
    172 	// Initialize and unpack block data:
    173 	o = init( x, y );
    174 	sh = o.sh;
    175 	sx = o.sx;
    176 	sy = o.sy;
    177 	bsize = o.bsize;
    178 
    179 	// Cache the indices of the first indexed elements in the respective ndarrays...
    180 	ox = x.offset;
    181 	oy = y.offset;
    182 
    183 	// Cache references to the input and output ndarray buffers...
    184 	xbuf = x.data;
    185 	ybuf = y.data;
    186 
    187 	// Cache offset increments for the innermost loop...
    188 	dx0 = sx[0];
    189 	dy0 = sy[0];
    190 
    191 	// Iterate over blocks...
    192 	for ( j8 = sh[8]; j8 > 0; ) {
    193 		if ( j8 < bsize ) {
    194 			s8 = j8;
    195 			j8 = 0;
    196 		} else {
    197 			s8 = bsize;
    198 			j8 -= bsize;
    199 		}
    200 		ox8 = ox + ( j8*sx[8] );
    201 		oy8 = oy + ( j8*sy[8] );
    202 		for ( j7 = sh[7]; j7 > 0; ) {
    203 			if ( j7 < bsize ) {
    204 				s7 = j7;
    205 				j7 = 0;
    206 			} else {
    207 				s7 = bsize;
    208 				j7 -= bsize;
    209 			}
    210 			dx8 = sx[8] - ( s7*sx[7] );
    211 			dy8 = sy[8] - ( s7*sy[7] );
    212 			ox7 = ox8 + ( j7*sx[7] );
    213 			oy7 = oy8 + ( j7*sy[7] );
    214 			for ( j6 = sh[6]; j6 > 0; ) {
    215 				if ( j6 < bsize ) {
    216 					s6 = j6;
    217 					j6 = 0;
    218 				} else {
    219 					s6 = bsize;
    220 					j6 -= bsize;
    221 				}
    222 				dx7 = sx[7] - ( s6*sx[6] );
    223 				dy7 = sy[7] - ( s6*sy[6] );
    224 				ox6 = ox7 + ( j6*sx[6] );
    225 				oy6 = oy7 + ( j6*sy[6] );
    226 				for ( j5 = sh[5]; j5 > 0; ) {
    227 					if ( j5 < bsize ) {
    228 						s5 = j5;
    229 						j5 = 0;
    230 					} else {
    231 						s5 = bsize;
    232 						j5 -= bsize;
    233 					}
    234 					dx6 = sx[6] - ( s5*sx[5] );
    235 					dy6 = sy[6] - ( s5*sy[5] );
    236 					ox5 = ox6 + ( j5*sx[5] );
    237 					oy5 = oy6 + ( j5*sy[5] );
    238 					for ( j4 = sh[4]; j4 > 0; ) {
    239 						if ( j4 < bsize ) {
    240 							s4 = j4;
    241 							j4 = 0;
    242 						} else {
    243 							s4 = bsize;
    244 							j4 -= bsize;
    245 						}
    246 						dx5 = sx[5] - ( s4*sx[4] );
    247 						dy5 = sy[5] - ( s4*sy[4] );
    248 						ox4 = ox5 + ( j4*sx[4] );
    249 						oy4 = oy5 + ( j4*sy[4] );
    250 						for ( j3 = sh[3]; j3 > 0; ) {
    251 							if ( j3 < bsize ) {
    252 								s3 = j3;
    253 								j3 = 0;
    254 							} else {
    255 								s3 = bsize;
    256 								j3 -= bsize;
    257 							}
    258 							dx4 = sx[4] - ( s3*sx[3] );
    259 							dy4 = sy[4] - ( s3*sy[3] );
    260 							ox3 = ox4 + ( j3*sx[3] );
    261 							oy3 = oy4 + ( j3*sy[3] );
    262 							for ( j2 = sh[2]; j2 > 0; ) {
    263 								if ( j2 < bsize ) {
    264 									s2 = j2;
    265 									j2 = 0;
    266 								} else {
    267 									s2 = bsize;
    268 									j2 -= bsize;
    269 								}
    270 								dx3 = sx[3] - ( s2*sx[2] );
    271 								dy3 = sy[3] - ( s2*sy[2] );
    272 								ox2 = ox3 + ( j2*sx[2] );
    273 								oy2 = oy3 + ( j2*sy[2] );
    274 								for ( j1 = sh[1]; j1 > 0; ) {
    275 									if ( j1 < bsize ) {
    276 										s1 = j1;
    277 										j1 = 0;
    278 									} else {
    279 										s1 = bsize;
    280 										j1 -= bsize;
    281 									}
    282 									dx2 = sx[2] - ( s1*sx[1] );
    283 									dy2 = sy[2] - ( s1*sy[1] );
    284 									ox1 = ox2 + ( j1*sx[1] );
    285 									oy1 = oy2 + ( j1*sy[1] );
    286 									for ( j0 = sh[0]; j0 > 0; ) {
    287 										if ( j0 < bsize ) {
    288 											s0 = j0;
    289 											j0 = 0;
    290 										} else {
    291 											s0 = bsize;
    292 											j0 -= bsize;
    293 										}
    294 										// Compute index offsets for the first input and output ndarray elements in the current block...
    295 										ix = ox1 + ( j0*sx[0] );
    296 										iy = oy1 + ( j0*sy[0] );
    297 
    298 										// Compute loop offset increments...
    299 										dx1 = sx[1] - ( s0*sx[0] );
    300 										dy1 = sy[1] - ( s0*sy[0] );
    301 
    302 										// Iterate over the ndarray dimensions...
    303 										for ( i8 = 0; i8 < s8; i8++ ) {
    304 											for ( i7 = 0; i7 < s7; i7++ ) {
    305 												for ( i6 = 0; i6 < s6; i6++ ) {
    306 													for ( i5 = 0; i5 < s5; i5++ ) {
    307 														for ( i4 = 0; i4 < s4; i4++ ) {
    308 															for ( i3 = 0; i3 < s3; i3++ ) {
    309 																for ( i2 = 0; i2 < s2; i2++ ) {
    310 																	for ( i1 = 0; i1 < s1; i1++ ) {
    311 																		for ( i0 = 0; i0 < s0; i0++ ) {
    312 																			ybuf[ iy ] = fcn( xbuf[ ix ] );
    313 																			ix += dx0;
    314 																			iy += dy0;
    315 																		}
    316 																		ix += dx1;
    317 																		iy += dy1;
    318 																	}
    319 																	ix += dx2;
    320 																	iy += dy2;
    321 																}
    322 																ix += dx3;
    323 																iy += dy3;
    324 															}
    325 															ix += dx4;
    326 															iy += dy4;
    327 														}
    328 														ix += dx5;
    329 														iy += dy5;
    330 													}
    331 													ix += dx6;
    332 													iy += dy6;
    333 												}
    334 												ix += dx7;
    335 												iy += dy7;
    336 											}
    337 											ix += dx8;
    338 											iy += dy8;
    339 										}
    340 									}
    341 								}
    342 							}
    343 						}
    344 					}
    345 				}
    346 			}
    347 		}
    348 	}
    349 }
    350 
    351 
    352 // EXPORTS //
    353 
    354 module.exports = blockedunary9d;