10d_blocked.js (8769B)
1 /** 2 * @license Apache-2.0 3 * 4 * Copyright (c) 2021 The Stdlib Authors. 5 * 6 * Licensed under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 19 /* eslint-disable max-depth, max-len */ 20 21 'use strict'; 22 23 // MODULES // 24 25 var init = require( './init.js' ); 26 27 28 // MAIN // 29 30 /** 31 * Applies a unary callback to elements in a ten-dimensional input ndarray and assigns results to elements in an equivalently shaped output ndarray via loop blocking. 32 * 33 * @private 34 * @param {Object} x - object containing input ndarray meta data 35 * @param {string} x.dtype - data type 36 * @param {Collection} x.data - data buffer 37 * @param {NonNegativeIntegerArray} x.shape - dimensions 38 * @param {IntegerArray} x.strides - stride lengths 39 * @param {NonNegativeInteger} x.offset - index offset 40 * @param {string} x.order - specifies whether `x` is row-major (C-style) or column-major (Fortran-style) 41 * @param {Object} y - object containing output ndarray meta data 42 * @param {string} y.dtype - data type 43 * @param {Collection} y.data - data buffer 44 * @param {NonNegativeIntegerArray} y.shape - dimensions 45 * @param {IntegerArray} y.strides - stride lengths 46 * @param {NonNegativeInteger} y.offset - index offset 47 * @param {string} y.order - specifies whether `y` is row-major (C-style) or column-major (Fortran-style) 48 * @param {Callback} fcn - unary callback 49 * 50 * @example 51 * var Float64Array = require( '@stdlib/array/float64' ); 52 * 53 * function scale( x ) { 54 * return x * 10.0; 55 * } 56 * 57 * // Create data buffers: 58 * var xbuf = new Float64Array( [ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 ] ); 59 * var ybuf = new Float64Array( 6 ); 60 * 61 * // Define the shape of the input and output arrays: 62 * var shape = [ 1, 1, 1, 1, 1, 1, 1, 3, 1, 2 ]; 63 * 64 * // Define the array strides: 65 * var sx = [ 12, 12, 12, 12, 12, 12, 12, 4, 4, 1 ]; 66 * var sy = [ 6, 6, 6, 6, 6, 6, 6, 2, 2, 1 ]; 67 * 68 * // Define the index offsets: 69 * var ox = 1; 70 * var oy = 0; 71 * 72 * // Create the input and output ndarray-like objects: 73 * var x = { 74 * 'dtype': 'float64', 75 * 'data': xbuf, 76 * 'shape': shape, 77 * 'strides': sx, 78 * 'offset': ox, 79 * 'order': 'row-major' 80 * }; 81 * var y = { 82 * 'dtype': 'float64', 83 * 'data': ybuf, 84 * 'shape': shape, 85 * 'strides': sy, 86 * 'offset': oy, 87 * 'order': 'row-major' 88 * }; 89 * 90 * // Apply the unary function: 91 * blockedunary10d( x, y, scale ); 92 * 93 * console.log( y.data ); 94 * // => <Float64Array>[ 20.0, 30.0, 60.0, 70.0, 100.0, 110.0 ] 95 */ 96 function blockedunary10d( x, y, fcn ) { // eslint-disable-line max-statements, max-lines-per-function 97 var bsize; 98 var xbuf; 99 var ybuf; 100 var dx0; 101 var dx1; 102 var dx2; 103 var dx3; 104 var dx4; 105 var dx5; 106 var dx6; 107 var dx7; 108 var dx8; 109 var dx9; 110 var dy0; 111 var dy1; 112 var dy2; 113 var dy3; 114 var dy4; 115 var dy5; 116 var dy6; 117 var dy7; 118 var dy8; 119 var dy9; 120 var ox1; 121 var ox2; 122 var ox3; 123 var ox4; 124 var ox5; 125 var ox6; 126 var ox7; 127 var ox8; 128 var ox9; 129 var oy1; 130 var oy2; 131 var oy3; 132 var oy4; 133 var oy5; 134 var oy6; 135 var oy7; 136 var oy8; 137 var oy9; 138 var sh; 139 var s0; 140 var s1; 141 var s2; 142 var s3; 143 var s4; 144 var s5; 145 var s6; 146 var s7; 147 var s8; 148 var s9; 149 var sx; 150 var sy; 151 var ox; 152 var oy; 153 var ix; 154 var iy; 155 var i0; 156 var i1; 157 var i2; 158 var i3; 159 var i4; 160 var i5; 161 var i6; 162 var i7; 163 var i8; 164 var i9; 165 var j0; 166 var j1; 167 var j2; 168 var j3; 169 var j4; 170 var j5; 171 var j6; 172 var j7; 173 var j8; 174 var j9; 175 var o; 176 177 // Note on variable naming convention: s#, dx#, dy#, i#, j# where # corresponds to the loop number, with `0` being the innermost loop... 178 179 // Initialize and unpack block data: 180 o = init( x, y ); 181 sh = o.sh; 182 sx = o.sx; 183 sy = o.sy; 184 bsize = o.bsize; 185 186 // Cache the indices of the first indexed elements in the respective ndarrays... 187 ox = x.offset; 188 oy = y.offset; 189 190 // Cache references to the input and output ndarray buffers... 191 xbuf = x.data; 192 ybuf = y.data; 193 194 // Cache offset increments for the innermost loop... 195 dx0 = sx[0]; 196 dy0 = sy[0]; 197 198 // Iterate over blocks... 199 for ( j9 = sh[9]; j9 > 0; ) { 200 if ( j9 < bsize ) { 201 s9 = j9; 202 j9 = 0; 203 } else { 204 s9 = bsize; 205 j9 -= bsize; 206 } 207 ox9 = ox + ( j9*sx[9] ); 208 oy9 = oy + ( j9*sy[9] ); 209 for ( j8 = sh[8]; j8 > 0; ) { 210 if ( j8 < bsize ) { 211 s8 = j8; 212 j8 = 0; 213 } else { 214 s8 = bsize; 215 j8 -= bsize; 216 } 217 dx9 = sx[9] - ( s8*sx[8] ); 218 dy9 = sy[9] - ( s8*sy[8] ); 219 ox8 = ox9 + ( j8*sx[8] ); 220 oy8 = oy9 + ( j8*sy[8] ); 221 for ( j7 = sh[7]; j7 > 0; ) { 222 if ( j7 < bsize ) { 223 s7 = j7; 224 j7 = 0; 225 } else { 226 s7 = bsize; 227 j7 -= bsize; 228 } 229 dx8 = sx[8] - ( s7*sx[7] ); 230 dy8 = sy[8] - ( s7*sy[7] ); 231 ox7 = ox8 + ( j7*sx[7] ); 232 oy7 = oy8 + ( j7*sy[7] ); 233 for ( j6 = sh[6]; j6 > 0; ) { 234 if ( j6 < bsize ) { 235 s6 = j6; 236 j6 = 0; 237 } else { 238 s6 = bsize; 239 j6 -= bsize; 240 } 241 dx7 = sx[7] - ( s6*sx[6] ); 242 dy7 = sy[7] - ( s6*sy[6] ); 243 ox6 = ox7 + ( j6*sx[6] ); 244 oy6 = oy7 + ( j6*sy[6] ); 245 for ( j5 = sh[5]; j5 > 0; ) { 246 if ( j5 < bsize ) { 247 s5 = j5; 248 j5 = 0; 249 } else { 250 s5 = bsize; 251 j5 -= bsize; 252 } 253 dx6 = sx[6] - ( s5*sx[5] ); 254 dy6 = sy[6] - ( s5*sy[5] ); 255 ox5 = ox6 + ( j5*sx[5] ); 256 oy5 = oy6 + ( j5*sy[5] ); 257 for ( j4 = sh[4]; j4 > 0; ) { 258 if ( j4 < bsize ) { 259 s4 = j4; 260 j4 = 0; 261 } else { 262 s4 = bsize; 263 j4 -= bsize; 264 } 265 dx5 = sx[5] - ( s4*sx[4] ); 266 dy5 = sy[5] - ( s4*sy[4] ); 267 ox4 = ox5 + ( j4*sx[4] ); 268 oy4 = oy5 + ( j4*sy[4] ); 269 for ( j3 = sh[3]; j3 > 0; ) { 270 if ( j3 < bsize ) { 271 s3 = j3; 272 j3 = 0; 273 } else { 274 s3 = bsize; 275 j3 -= bsize; 276 } 277 dx4 = sx[4] - ( s3*sx[3] ); 278 dy4 = sy[4] - ( s3*sy[3] ); 279 ox3 = ox4 + ( j3*sx[3] ); 280 oy3 = oy4 + ( j3*sy[3] ); 281 for ( j2 = sh[2]; j2 > 0; ) { 282 if ( j2 < bsize ) { 283 s2 = j2; 284 j2 = 0; 285 } else { 286 s2 = bsize; 287 j2 -= bsize; 288 } 289 dx3 = sx[3] - ( s2*sx[2] ); 290 dy3 = sy[3] - ( s2*sy[2] ); 291 ox2 = ox3 + ( j2*sx[2] ); 292 oy2 = oy3 + ( j2*sy[2] ); 293 for ( j1 = sh[1]; j1 > 0; ) { 294 if ( j1 < bsize ) { 295 s1 = j1; 296 j1 = 0; 297 } else { 298 s1 = bsize; 299 j1 -= bsize; 300 } 301 dx2 = sx[2] - ( s1*sx[1] ); 302 dy2 = sy[2] - ( s1*sy[1] ); 303 ox1 = ox2 + ( j1*sx[1] ); 304 oy1 = oy2 + ( j1*sy[1] ); 305 for ( j0 = sh[0]; j0 > 0; ) { 306 if ( j0 < bsize ) { 307 s0 = j0; 308 j0 = 0; 309 } else { 310 s0 = bsize; 311 j0 -= bsize; 312 } 313 // Compute index offsets for the first input and output ndarray elements in the current block... 314 ix = ox1 + ( j0*sx[0] ); 315 iy = oy1 + ( j0*sy[0] ); 316 317 // Compute loop offset increments... 318 dx1 = sx[1] - ( s0*sx[0] ); 319 dy1 = sy[1] - ( s0*sy[0] ); 320 321 // Iterate over the ndarray dimensions... 322 for ( i9 = 0; i9 < s9; i9++ ) { 323 for ( i8 = 0; i8 < s8; i8++ ) { 324 for ( i7 = 0; i7 < s7; i7++ ) { 325 for ( i6 = 0; i6 < s6; i6++ ) { 326 for ( i5 = 0; i5 < s5; i5++ ) { 327 for ( i4 = 0; i4 < s4; i4++ ) { 328 for ( i3 = 0; i3 < s3; i3++ ) { 329 for ( i2 = 0; i2 < s2; i2++ ) { 330 for ( i1 = 0; i1 < s1; i1++ ) { 331 for ( i0 = 0; i0 < s0; i0++ ) { 332 ybuf[ iy ] = fcn( xbuf[ ix ] ); 333 ix += dx0; 334 iy += dy0; 335 } 336 ix += dx1; 337 iy += dy1; 338 } 339 ix += dx2; 340 iy += dy2; 341 } 342 ix += dx3; 343 iy += dy3; 344 } 345 ix += dx4; 346 iy += dy4; 347 } 348 ix += dx5; 349 iy += dy5; 350 } 351 ix += dx6; 352 iy += dy6; 353 } 354 ix += dx7; 355 iy += dy7; 356 } 357 ix += dx8; 358 iy += dy8; 359 } 360 ix += dx9; 361 iy += dy9; 362 } 363 } 364 } 365 } 366 } 367 } 368 } 369 } 370 } 371 } 372 } 373 } 374 375 376 // EXPORTS // 377 378 module.exports = blockedunary10d;