dispatch.c (12405B)
1 /** 2 * @license Apache-2.0 3 * 4 * Copyright (c) 2021 The Stdlib Authors. 5 * 6 * Licensed under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 19 #include "stdlib/ndarray/base/unary/dispatch_object.h" 20 #include "stdlib/ndarray/base/unary/typedefs.h" 21 #include "stdlib/ndarray/base/iteration_order.h" 22 #include "stdlib/ndarray/base/bytes_per_element.h" 23 #include "stdlib/ndarray/ctor.h" 24 #include <stdint.h> 25 #include <stddef.h> 26 27 /** 28 * Applies a unary callback to an n-dimensional input ndarray having `ndims-1` singleton dimensions and assigns results to elements in an output ndarray having the same shape. 29 * 30 * ## Notes 31 * 32 * - If able to successfully apply a unary callback, the function returns `0`; otherwise, the function returns an error code. 33 * 34 * @private 35 * @param f unary ndarray function 36 * @param x1 input ndarray 37 * @param x2 output ndarray 38 * @param i index of the non-singleton dimension 39 * @param fcn callback 40 * @return status code 41 */ 42 static int8_t stdlib_ndarray_unary_1d_squeeze( const ndarrayUnaryFcn f, struct ndarray *x1, struct ndarray *x2, const int64_t i, void *fcn ) { 43 int64_t sh[] = { stdlib_ndarray_shape( x1 )[ i ] }; 44 45 // Shallow copy and reshape the arrays... 46 int64_t sx1[] = { stdlib_ndarray_strides( x1 )[ i ] }; 47 struct ndarray *x1c = stdlib_ndarray_allocate( 48 stdlib_ndarray_dtype( x1 ), 49 stdlib_ndarray_data( x1 ), 50 1, 51 sh, 52 sx1, 53 stdlib_ndarray_offset( x1 ), 54 stdlib_ndarray_order( x1 ), 55 stdlib_ndarray_index_mode( x1 ), 56 stdlib_ndarray_nsubmodes( x1 ), 57 stdlib_ndarray_submodes( x1 ) 58 ); 59 if ( x1c == NULL ) { 60 return -1; 61 } 62 int64_t sx2[] = { stdlib_ndarray_strides( x2 )[ i ] }; 63 struct ndarray *x2c = stdlib_ndarray_allocate( 64 stdlib_ndarray_dtype( x2 ), 65 stdlib_ndarray_data( x2 ), 66 1, 67 sh, 68 sx2, 69 stdlib_ndarray_offset( x2 ), 70 stdlib_ndarray_order( x2 ), 71 stdlib_ndarray_index_mode( x2 ), 72 stdlib_ndarray_nsubmodes( x2 ), 73 stdlib_ndarray_submodes( x2 ) 74 ); 75 if ( x2c == NULL ) { 76 stdlib_ndarray_free( x1c ); 77 return -1; 78 } 79 // Apply the callback: 80 struct ndarray *arrays[] = { x1c, x2c }; 81 int8_t status = f( arrays, fcn ); 82 83 // Free allocated memory: 84 stdlib_ndarray_free( x1c ); 85 stdlib_ndarray_free( x2c ); 86 87 return status; 88 } 89 90 /** 91 * Applies a unary callback to a flattened n-dimensional input ndarray and assigns results to elements in a flattened n-dimensional output ndarray. 92 * 93 * ## Notes 94 * 95 * - If able to successfully apply a unary callback, the function returns `0`; otherwise, the function returns an error code. 96 * 97 * @private 98 * @param f unary ndarray function 99 * @param N number of elements 100 * @param x1 input ndarray 101 * @param s1 input ndarray stride length 102 * @param x2 output ndarray 103 * @param s2 output ndarray stride length 104 * @param fcn callback 105 * @return status code 106 */ 107 static int8_t stdlib_ndarray_unary_1d_flatten( const ndarrayUnaryFcn f, const int64_t N, struct ndarray *x1, const int64_t s1, struct ndarray *x2, const int64_t s2, void *fcn ) { 108 // Define the (flattened) strided array shape: 109 int64_t sh[] = { N }; 110 111 // Shallow copy and reshape the arrays... 112 int64_t sx1[] = { s2 }; 113 struct ndarray *x1c = stdlib_ndarray_allocate( 114 stdlib_ndarray_dtype( x1 ), 115 stdlib_ndarray_data( x1 ), 116 1, 117 sh, 118 sx1, 119 stdlib_ndarray_offset( x1 ), 120 stdlib_ndarray_order( x1 ), 121 stdlib_ndarray_index_mode( x1 ), 122 stdlib_ndarray_nsubmodes( x1 ), 123 stdlib_ndarray_submodes( x1 ) 124 ); 125 if ( x1c == NULL ) { 126 return -1; 127 } 128 int64_t sx2[] = { s2 }; 129 struct ndarray *x2c = stdlib_ndarray_allocate( 130 stdlib_ndarray_dtype( x2 ), 131 stdlib_ndarray_data( x2 ), 132 1, 133 sh, 134 sx2, 135 stdlib_ndarray_offset( x2 ), 136 stdlib_ndarray_order( x2 ), 137 stdlib_ndarray_index_mode( x2 ), 138 stdlib_ndarray_nsubmodes( x2 ), 139 stdlib_ndarray_submodes( x2 ) 140 ); 141 if ( x2c == NULL ) { 142 stdlib_ndarray_free( x1c ); 143 return -1; 144 } 145 // Apply the callback: 146 struct ndarray *arrays[] = { x1c, x2c }; 147 int8_t status = f( arrays, fcn ); 148 149 // Free allocated memory: 150 stdlib_ndarray_free( x1c ); 151 stdlib_ndarray_free( x2c ); 152 153 return status; 154 } 155 156 /** 157 * Dispatches to a unary ndarray function according to the dimensionality of provided ndarray arguments. 158 * 159 * ## Notes 160 * 161 * - If able to successfully dispatch, the function returns `0`; otherwise, the function returns an error code. 162 * 163 * @param obj object comprised of dispatch tables containing unary ndarray functions 164 * @param arrays array whose first element is a pointer to an input ndarray and whose last element is a pointer to an output ndarray 165 * @param fcn callback 166 * @return status code 167 * 168 * @example 169 * #include "stdlib/ndarray/base/unary/dispatch.h" 170 * #include "stdlib/ndarray/base/unary/dispatch_object.h" 171 * #include "stdlib/ndarray/base/unary/typedefs.h" 172 * #include "stdlib/ndarray/base/unary/b_b.h" 173 * #include "stdlib/ndarray/ctor.h" 174 * #include <stdint.h> 175 * #include <stdlib.h> 176 * #include <stdio.h> 177 * 178 * // Define a list of unary ndarray functions: 179 * ndarrayUnaryFcn functions[] = { 180 * stdlib_ndarray_b_b_0d, 181 * stdlib_ndarray_b_b_1d, 182 * stdlib_ndarray_b_b_2d, 183 * stdlib_ndarray_b_b_3d, 184 * stdlib_ndarray_b_b_4d, 185 * stdlib_ndarray_b_b_5d, 186 * stdlib_ndarray_b_b_6d, 187 * stdlib_ndarray_b_b_7d, 188 * stdlib_ndarray_b_b_8d, 189 * stdlib_ndarray_b_b_9d, 190 * stdlib_ndarray_b_b_10d 191 * stdlib_ndarray_b_b_nd 192 * }; 193 * 194 * // Define a list of unary ndarray functions using loop blocking... 195 * ndarrayUnaryFcn blocked_functions[] = { 196 * stdlib_ndarray_b_b_2d_blocked, 197 * stdlib_ndarray_b_b_3d_blocked, 198 * stdlib_ndarray_b_b_4d_blocked, 199 * stdlib_ndarray_b_b_5d_blocked, 200 * stdlib_ndarray_b_b_6d_blocked, 201 * stdlib_ndarray_b_b_7d_blocked, 202 * stdlib_ndarray_b_b_8d_blocked, 203 * stdlib_ndarray_b_b_9d_blocked, 204 * stdlib_ndarray_b_b_10d_blocked 205 * }; 206 * 207 * // Create a unary function dispatch object: 208 * struct ndarrayUnaryDispatchObject obj = { 209 * // Array containing unary ndarray functions: 210 * unary, 211 * 212 * // Number of unary ndarray functions: 213 * 12, 214 * 215 * // Array containing unary ndarray functions using loop blocking: 216 * blocked_unary, 217 * 218 * // Number of unary ndarray functions using loop blocking: 219 * 9 220 * } 221 * 222 * // ... 223 * 224 * // Create ndarrays... 225 * struct ndarray *x = stdlib_ndarray_allocate( ... ); 226 * if ( x == NULL ) { 227 * fprintf( stderr, "Error allocating memory.\n" ); 228 * exit( EXIT_FAILURE ); 229 * } 230 * 231 * struct ndarray *y = stdlib_ndarray_allocate( ... ); 232 * if ( y == NULL ) { 233 * fprintf( stderr, "Error allocating memory.\n" ); 234 * exit( EXIT_FAILURE ); 235 * } 236 * 237 * // ... 238 * 239 * // Define a callback: 240 * uint8_t scale( const uint8_t x ) { 241 * return x + 10; 242 * } 243 * 244 * // Apply the callback: 245 * struct ndarray *arrays[] = { x, y }; 246 * int8_t status = stdlib_ndarray_b_b( arrays, (void *)scale ); 247 * if ( status != 0 ) { 248 * fprintf( stderr, "Error during computation.\n" ); 249 * exit( EXIT_FAILURE ); 250 * } 251 */ 252 int8_t stdlib_ndarray_unary_dispatch( const struct ndarrayUnaryDispatchObject *obj, struct ndarray *arrays[], void *fcn ) { 253 struct ndarray *x1; 254 struct ndarray *x2; 255 int8_t status; 256 int64_t ndims; 257 int64_t mab1; 258 int64_t mab2; 259 int64_t mib1; 260 int64_t mib2; 261 int64_t *sh1; 262 int64_t *sh2; 263 int64_t *s1; 264 int64_t *s2; 265 int64_t len; 266 int64_t bp1; 267 int64_t bp2; 268 int8_t io1; 269 int8_t io2; 270 int64_t ns; 271 int64_t s; 272 int64_t d; 273 int64_t i; 274 275 // Unpack the arrays: 276 x1 = arrays[ 0 ]; 277 x2 = arrays[ 1 ]; 278 279 // Verify that the input and output arrays have the same number of dimensions... 280 ndims = stdlib_ndarray_ndims( x1 ); 281 if ( ndims != stdlib_ndarray_ndims( x2 ) ) { 282 return -1; // TODO: consider standardized error codes 283 } 284 // Determine whether we can avoid iteration altogether... 285 if ( ndims == 0 ) { 286 obj->functions[ 0 ]( arrays, fcn ); 287 return 0; 288 } 289 sh1 = stdlib_ndarray_shape( x1 ); 290 sh2 = stdlib_ndarray_shape( x2 ); 291 292 // Verify that the input and output arrays have the same dimensions... 293 len = 1; // number of elements 294 ns = 0; // number of singleton dimensions 295 for ( i = 0; i < ndims; i++ ) { 296 d = sh1[ i ]; 297 if ( d != sh2[ i ] ) { 298 return -1; // TODO: consider standardized error codes 299 } 300 // Note that, if one of the dimensions is `0`, the length will be `0`... 301 len *= d; 302 303 // Check whether the current dimension is a singleton dimension... 304 if ( d == 1 ) { 305 ns += 1; 306 } 307 } 308 // Check whether we were provided empty ndarrays... 309 if ( len == 0 ) { 310 return 0; 311 } 312 // Determine whether the ndarrays are one-dimensional and thus readily translate to one-dimensional strided arrays... 313 if ( ndims == 1 ) { 314 obj->functions[ 1 ]( arrays, fcn ); 315 return 0; 316 } 317 // Determine whether the ndarrays have only **one** non-singleton dimension (e.g., ndims=4, shape=[10,1,1,1]) so that we can treat the ndarrays as being equivalent to one-dimensional strided arrays... 318 if ( ns == ndims-1 ) { 319 // Get the index of the non-singleton dimension... 320 for ( i = 0; i < ndims; i++ ) { 321 if ( sh1[ i ] != 1 ) { 322 break; 323 } 324 } 325 // Remove the singleton dimensions and apply the unary function... 326 status = stdlib_ndarray_unary_1d_squeeze( obj->functions[ 1 ], x1, x2, i, fcn ); 327 if ( status == 0 ) { 328 return 0; 329 } 330 // If we failed, this is probably due to failed memory allocation, so fall through and try again... 331 } 332 s1 = stdlib_ndarray_strides( x1 ); 333 s2 = stdlib_ndarray_strides( x2 ); 334 io1 = stdlib_ndarray_iteration_order( ndims, s1 ); // +/-1 335 io2 = stdlib_ndarray_iteration_order( ndims, s2 ); // +/-1 336 337 // Determine whether we can avoid blocked iteration... 338 if ( io1 != 0 && io2 != 0 && stdlib_ndarray_order( x1 ) == stdlib_ndarray_order( x2 )) { 339 // Determine the minimum and maximum linear byte indices which are accessible by the array views: 340 mib1 = stdlib_ndarray_offset( x1 ); // byte offset 341 mab1 = mib1; 342 mib2 = stdlib_ndarray_offset( x2 ); // byte offset 343 mab2 = mib2; 344 for ( i = 0; i < ndims; i++ ) { 345 s = s1[ i ]; // units: bytes 346 if ( s > 0 ) { 347 mab1 += s * ( sh1[i]-1 ); 348 } else if ( s < 0 ) { 349 mib1 += s * ( sh1[i]-1 ); // decrements 350 } 351 s = s2[ i ]; 352 if ( s > 0 ) { 353 mab2 += s * ( sh2[i]-1 ); 354 } else if ( s < 0 ) { 355 mib2 += s * ( sh2[i]-1 ); // decrements 356 } 357 } 358 bp1 = stdlib_ndarray_bytes_per_element( stdlib_ndarray_dtype( x1 ) ); 359 bp2 = stdlib_ndarray_bytes_per_element( stdlib_ndarray_dtype( x2 ) ); 360 361 // Determine whether we can ignore shape (and strides) and treat the ndarrays as linear one-dimensional strided arrays... 362 if ( ( len*bp1 ) == ( mab1-mib1+bp1 ) && ( len*bp2 ) == ( mab2-mib2+bp2 ) ) { 363 // Note: the above is equivalent to @stdlib/ndarray/base/assert/is-contiguous, but in-lined so we can retain computed values... 364 status = stdlib_ndarray_unary_1d_flatten( obj->functions[ 1 ], len, x1, io1*bp1, x2, io2*bp2, fcn ); 365 if ( status == 0 ) { 366 return 0; 367 } 368 // If we failed, this is probably due to failed memory allocation, so fall through and try again... 369 } 370 // At least one ndarray is non-contiguous, so we cannot directly use one-dimensional array functionality... 371 372 // Determine whether we can use simple nested loops... 373 if ( ndims < (obj->nfunctions) ) { 374 // So long as iteration for each respective array always moves in the same direction (i.e., no mixed sign strides), we can leverage cache-optimal (i.e., normal) nested loops without resorting to blocked iteration... 375 obj->functions[ ndims ]( arrays, fcn ); 376 return 0; 377 } 378 // Fall-through to blocked iteration... 379 } 380 // At this point, we're either dealing with non-contiguous n-dimensional arrays, high dimensional n-dimensional arrays, and/or arrays having differing memory layouts, so our only hope is that we can still perform blocked iteration... 381 382 // Determine whether we can perform blocked iteration... 383 if ( ndims <= (obj->nblockedfunctions)+1 ) { 384 obj->blocked_functions[ ndims-2 ]( arrays, fcn ); 385 return 0; 386 } 387 // Fall-through to linear view iteration without regard for how data is stored in memory (i.e., take the slow path)... 388 obj->functions[ (obj->nfunctions)-1 ]( arrays, fcn ); 389 390 return 0; 391 }