time-to-botec

Benchmark sampling in different programming languages
Log | Files | Refs | README

dispatch.c (12405B)


      1 /**
      2 * @license Apache-2.0
      3 *
      4 * Copyright (c) 2021 The Stdlib Authors.
      5 *
      6 * Licensed under the Apache License, Version 2.0 (the "License");
      7 * you may not use this file except in compliance with the License.
      8 * You may obtain a copy of the License at
      9 *
     10 *    http://www.apache.org/licenses/LICENSE-2.0
     11 *
     12 * Unless required by applicable law or agreed to in writing, software
     13 * distributed under the License is distributed on an "AS IS" BASIS,
     14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     15 * See the License for the specific language governing permissions and
     16 * limitations under the License.
     17 */
     18 
     19 #include "stdlib/ndarray/base/unary/dispatch_object.h"
     20 #include "stdlib/ndarray/base/unary/typedefs.h"
     21 #include "stdlib/ndarray/base/iteration_order.h"
     22 #include "stdlib/ndarray/base/bytes_per_element.h"
     23 #include "stdlib/ndarray/ctor.h"
     24 #include <stdint.h>
     25 #include <stddef.h>
     26 
     27 /**
     28 * Applies a unary callback to an n-dimensional input ndarray having `ndims-1` singleton dimensions and assigns results to elements in an output ndarray having the same shape.
     29 *
     30 * ## Notes
     31 *
     32 * -   If able to successfully apply a unary callback, the function returns `0`; otherwise, the function returns an error code.
     33 *
     34 * @private
     35 * @param f     unary ndarray function
     36 * @param x1    input ndarray
     37 * @param x2    output ndarray
     38 * @param i     index of the non-singleton dimension
     39 * @param fcn   callback
     40 * @return      status code
     41 */
     42 static int8_t stdlib_ndarray_unary_1d_squeeze( const ndarrayUnaryFcn f, struct ndarray *x1, struct ndarray *x2, const int64_t i, void *fcn ) {
     43 	int64_t sh[] = { stdlib_ndarray_shape( x1 )[ i ] };
     44 
     45 	// Shallow copy and reshape the arrays...
     46 	int64_t sx1[] = { stdlib_ndarray_strides( x1 )[ i ] };
     47 	struct ndarray *x1c = stdlib_ndarray_allocate(
     48 		stdlib_ndarray_dtype( x1 ),
     49 		stdlib_ndarray_data( x1 ),
     50 		1,
     51 		sh,
     52 		sx1,
     53 		stdlib_ndarray_offset( x1 ),
     54 		stdlib_ndarray_order( x1 ),
     55 		stdlib_ndarray_index_mode( x1 ),
     56 		stdlib_ndarray_nsubmodes( x1 ),
     57 		stdlib_ndarray_submodes( x1 )
     58 	);
     59 	if ( x1c == NULL ) {
     60 		return -1;
     61 	}
     62 	int64_t sx2[] = { stdlib_ndarray_strides( x2 )[ i ] };
     63 	struct ndarray *x2c = stdlib_ndarray_allocate(
     64 		stdlib_ndarray_dtype( x2 ),
     65 		stdlib_ndarray_data( x2 ),
     66 		1,
     67 		sh,
     68 		sx2,
     69 		stdlib_ndarray_offset( x2 ),
     70 		stdlib_ndarray_order( x2 ),
     71 		stdlib_ndarray_index_mode( x2 ),
     72 		stdlib_ndarray_nsubmodes( x2 ),
     73 		stdlib_ndarray_submodes( x2 )
     74 	);
     75 	if ( x2c == NULL ) {
     76 		stdlib_ndarray_free( x1c );
     77 		return -1;
     78 	}
     79 	// Apply the callback:
     80 	struct ndarray *arrays[] = { x1c, x2c };
     81 	int8_t status = f( arrays, fcn );
     82 
     83 	// Free allocated memory:
     84 	stdlib_ndarray_free( x1c );
     85 	stdlib_ndarray_free( x2c );
     86 
     87 	return status;
     88 }
     89 
     90 /**
     91 * Applies a unary callback to a flattened n-dimensional input ndarray and assigns results to elements in a flattened n-dimensional output ndarray.
     92 *
     93 * ## Notes
     94 *
     95 * -   If able to successfully apply a unary callback, the function returns `0`; otherwise, the function returns an error code.
     96 *
     97 * @private
     98 * @param f     unary ndarray function
     99 * @param N     number of elements
    100 * @param x1    input ndarray
    101 * @param s1    input ndarray stride length
    102 * @param x2    output ndarray
    103 * @param s2    output ndarray stride length
    104 * @param fcn   callback
    105 * @return      status code
    106 */
    107 static int8_t stdlib_ndarray_unary_1d_flatten( const ndarrayUnaryFcn f, const int64_t N, struct ndarray *x1, const int64_t s1, struct ndarray *x2, const int64_t s2, void *fcn ) {
    108 	// Define the (flattened) strided array shape:
    109 	int64_t sh[] = { N };
    110 
    111 	// Shallow copy and reshape the arrays...
    112 	int64_t sx1[] = { s2 };
    113 	struct ndarray *x1c = stdlib_ndarray_allocate(
    114 		stdlib_ndarray_dtype( x1 ),
    115 		stdlib_ndarray_data( x1 ),
    116 		1,
    117 		sh,
    118 		sx1,
    119 		stdlib_ndarray_offset( x1 ),
    120 		stdlib_ndarray_order( x1 ),
    121 		stdlib_ndarray_index_mode( x1 ),
    122 		stdlib_ndarray_nsubmodes( x1 ),
    123 		stdlib_ndarray_submodes( x1 )
    124 	);
    125 	if ( x1c == NULL ) {
    126 		return -1;
    127 	}
    128 	int64_t sx2[] = { s2 };
    129 	struct ndarray *x2c = stdlib_ndarray_allocate(
    130 		stdlib_ndarray_dtype( x2 ),
    131 		stdlib_ndarray_data( x2 ),
    132 		1,
    133 		sh,
    134 		sx2,
    135 		stdlib_ndarray_offset( x2 ),
    136 		stdlib_ndarray_order( x2 ),
    137 		stdlib_ndarray_index_mode( x2 ),
    138 		stdlib_ndarray_nsubmodes( x2 ),
    139 		stdlib_ndarray_submodes( x2 )
    140 	);
    141 	if ( x2c == NULL ) {
    142 		stdlib_ndarray_free( x1c );
    143 		return -1;
    144 	}
    145 	// Apply the callback:
    146 	struct ndarray *arrays[] = { x1c, x2c };
    147 	int8_t status = f( arrays, fcn );
    148 
    149 	// Free allocated memory:
    150 	stdlib_ndarray_free( x1c );
    151 	stdlib_ndarray_free( x2c );
    152 
    153 	return status;
    154 }
    155 
    156 /**
    157 * Dispatches to a unary ndarray function according to the dimensionality of provided ndarray arguments.
    158 *
    159 * ## Notes
    160 *
    161 * -   If able to successfully dispatch, the function returns `0`; otherwise, the function returns an error code.
    162 *
    163 * @param obj      object comprised of dispatch tables containing unary ndarray functions
    164 * @param arrays   array whose first element is a pointer to an input ndarray and whose last element is a pointer to an output ndarray
    165 * @param fcn      callback
    166 * @return         status code
    167 *
    168 * @example
    169 * #include "stdlib/ndarray/base/unary/dispatch.h"
    170 * #include "stdlib/ndarray/base/unary/dispatch_object.h"
    171 * #include "stdlib/ndarray/base/unary/typedefs.h"
    172 * #include "stdlib/ndarray/base/unary/b_b.h"
    173 * #include "stdlib/ndarray/ctor.h"
    174 * #include <stdint.h>
    175 * #include <stdlib.h>
    176 * #include <stdio.h>
    177 *
    178 * // Define a list of unary ndarray functions:
    179 * ndarrayUnaryFcn functions[] = {
    180 *     stdlib_ndarray_b_b_0d,
    181 *     stdlib_ndarray_b_b_1d,
    182 *     stdlib_ndarray_b_b_2d,
    183 *     stdlib_ndarray_b_b_3d,
    184 *     stdlib_ndarray_b_b_4d,
    185 *     stdlib_ndarray_b_b_5d,
    186 *     stdlib_ndarray_b_b_6d,
    187 *     stdlib_ndarray_b_b_7d,
    188 *     stdlib_ndarray_b_b_8d,
    189 *     stdlib_ndarray_b_b_9d,
    190 *     stdlib_ndarray_b_b_10d
    191 *     stdlib_ndarray_b_b_nd
    192 * };
    193 *
    194 * // Define a list of unary ndarray functions using loop blocking...
    195 * ndarrayUnaryFcn blocked_functions[] = {
    196 *     stdlib_ndarray_b_b_2d_blocked,
    197 *     stdlib_ndarray_b_b_3d_blocked,
    198 *     stdlib_ndarray_b_b_4d_blocked,
    199 *     stdlib_ndarray_b_b_5d_blocked,
    200 *     stdlib_ndarray_b_b_6d_blocked,
    201 *     stdlib_ndarray_b_b_7d_blocked,
    202 *     stdlib_ndarray_b_b_8d_blocked,
    203 *     stdlib_ndarray_b_b_9d_blocked,
    204 *     stdlib_ndarray_b_b_10d_blocked
    205 * };
    206 *
    207 * // Create a unary function dispatch object:
    208 * struct ndarrayUnaryDispatchObject obj = {
    209 *     // Array containing unary ndarray functions:
    210 *     unary,
    211 *
    212 *     // Number of unary ndarray functions:
    213 *     12,
    214 *
    215 *     // Array containing unary ndarray functions using loop blocking:
    216 *     blocked_unary,
    217 *
    218 *     // Number of unary ndarray functions using loop blocking:
    219 *     9
    220 * }
    221 *
    222 * // ...
    223 *
    224 * // Create ndarrays...
    225 * struct ndarray *x = stdlib_ndarray_allocate( ... );
    226 * if ( x == NULL ) {
    227 *     fprintf( stderr, "Error allocating memory.\n" );
    228 *     exit( EXIT_FAILURE );
    229 * }
    230 *
    231 * struct ndarray *y = stdlib_ndarray_allocate( ... );
    232 * if ( y == NULL ) {
    233 *     fprintf( stderr, "Error allocating memory.\n" );
    234 *     exit( EXIT_FAILURE );
    235 * }
    236 *
    237 * // ...
    238 *
    239 * // Define a callback:
    240 * uint8_t scale( const uint8_t x ) {
    241 *     return x + 10;
    242 * }
    243 *
    244 * // Apply the callback:
    245 * struct ndarray *arrays[] = { x, y };
    246 * int8_t status = stdlib_ndarray_b_b( arrays, (void *)scale );
    247 * if ( status != 0 ) {
    248 *     fprintf( stderr, "Error during computation.\n" );
    249 *     exit( EXIT_FAILURE );
    250 * }
    251 */
    252 int8_t stdlib_ndarray_unary_dispatch( const struct ndarrayUnaryDispatchObject *obj, struct ndarray *arrays[], void *fcn ) {
    253 	struct ndarray *x1;
    254 	struct ndarray *x2;
    255 	int8_t status;
    256 	int64_t ndims;
    257 	int64_t mab1;
    258 	int64_t mab2;
    259 	int64_t mib1;
    260 	int64_t mib2;
    261 	int64_t *sh1;
    262 	int64_t *sh2;
    263 	int64_t *s1;
    264 	int64_t *s2;
    265 	int64_t len;
    266 	int64_t bp1;
    267 	int64_t bp2;
    268 	int8_t io1;
    269 	int8_t io2;
    270 	int64_t ns;
    271 	int64_t s;
    272 	int64_t d;
    273 	int64_t i;
    274 
    275 	// Unpack the arrays:
    276 	x1 = arrays[ 0 ];
    277 	x2 = arrays[ 1 ];
    278 
    279 	// Verify that the input and output arrays have the same number of dimensions...
    280 	ndims = stdlib_ndarray_ndims( x1 );
    281 	if ( ndims != stdlib_ndarray_ndims( x2 ) ) {
    282 		return -1; // TODO: consider standardized error codes
    283 	}
    284 	// Determine whether we can avoid iteration altogether...
    285 	if ( ndims == 0 ) {
    286 		obj->functions[ 0 ]( arrays, fcn );
    287 		return 0;
    288 	}
    289 	sh1 = stdlib_ndarray_shape( x1 );
    290 	sh2 = stdlib_ndarray_shape( x2 );
    291 
    292 	// Verify that the input and output arrays have the same dimensions...
    293 	len = 1; // number of elements
    294 	ns = 0;  // number of singleton dimensions
    295 	for ( i = 0; i < ndims; i++ ) {
    296 		d = sh1[ i ];
    297 		if ( d != sh2[ i ] ) {
    298 			return -1; // TODO: consider standardized error codes
    299 		}
    300 		// Note that, if one of the dimensions is `0`, the length will be `0`...
    301 		len *= d;
    302 
    303 		// Check whether the current dimension is a singleton dimension...
    304 		if ( d == 1 ) {
    305 			ns += 1;
    306 		}
    307 	}
    308 	// Check whether we were provided empty ndarrays...
    309 	if ( len == 0 ) {
    310 		return 0;
    311 	}
    312 	// Determine whether the ndarrays are one-dimensional and thus readily translate to one-dimensional strided arrays...
    313 	if ( ndims == 1 ) {
    314 		obj->functions[ 1 ]( arrays, fcn );
    315 		return 0;
    316 	}
    317 	// Determine whether the ndarrays have only **one** non-singleton dimension (e.g., ndims=4, shape=[10,1,1,1]) so that we can treat the ndarrays as being equivalent to one-dimensional strided arrays...
    318 	if ( ns == ndims-1 ) {
    319 		// Get the index of the non-singleton dimension...
    320 		for ( i = 0; i < ndims; i++ ) {
    321 			if ( sh1[ i ] != 1 ) {
    322 				break;
    323 			}
    324 		}
    325 		// Remove the singleton dimensions and apply the unary function...
    326 		status = stdlib_ndarray_unary_1d_squeeze( obj->functions[ 1 ], x1, x2, i, fcn );
    327 		if ( status == 0 ) {
    328 			return 0;
    329 		}
    330 		// If we failed, this is probably due to failed memory allocation, so fall through and try again...
    331 	}
    332 	s1 = stdlib_ndarray_strides( x1 );
    333 	s2 = stdlib_ndarray_strides( x2 );
    334 	io1 = stdlib_ndarray_iteration_order( ndims, s1 ); // +/-1
    335 	io2 = stdlib_ndarray_iteration_order( ndims, s2 ); // +/-1
    336 
    337 	// Determine whether we can avoid blocked iteration...
    338 	if ( io1 != 0 && io2 != 0 && stdlib_ndarray_order( x1 ) == stdlib_ndarray_order( x2 )) {
    339 		// Determine the minimum and maximum linear byte indices which are accessible by the array views:
    340 		mib1 = stdlib_ndarray_offset( x1 ); // byte offset
    341 		mab1 = mib1;
    342 		mib2 = stdlib_ndarray_offset( x2 ); // byte offset
    343 		mab2 = mib2;
    344 		for ( i = 0; i < ndims; i++ ) {
    345 			s = s1[ i ]; // units: bytes
    346 			if ( s > 0 ) {
    347 				mab1 += s * ( sh1[i]-1 );
    348 			} else if ( s < 0 ) {
    349 				mib1 += s * ( sh1[i]-1 ); // decrements
    350 			}
    351 			s = s2[ i ];
    352 			if ( s > 0 ) {
    353 				mab2 += s * ( sh2[i]-1 );
    354 			} else if ( s < 0 ) {
    355 				mib2 += s * ( sh2[i]-1 ); // decrements
    356 			}
    357 		}
    358 		bp1 = stdlib_ndarray_bytes_per_element( stdlib_ndarray_dtype( x1 ) );
    359 		bp2 = stdlib_ndarray_bytes_per_element( stdlib_ndarray_dtype( x2 ) );
    360 
    361 		// Determine whether we can ignore shape (and strides) and treat the ndarrays as linear one-dimensional strided arrays...
    362 		if ( ( len*bp1 ) == ( mab1-mib1+bp1 ) && ( len*bp2 ) == ( mab2-mib2+bp2 ) ) {
    363 			// Note: the above is equivalent to @stdlib/ndarray/base/assert/is-contiguous, but in-lined so we can retain computed values...
    364 			status = stdlib_ndarray_unary_1d_flatten( obj->functions[ 1 ], len, x1, io1*bp1, x2, io2*bp2, fcn );
    365 			if ( status == 0 ) {
    366 				return 0;
    367 			}
    368 			// If we failed, this is probably due to failed memory allocation, so fall through and try again...
    369 		}
    370 		// At least one ndarray is non-contiguous, so we cannot directly use one-dimensional array functionality...
    371 
    372 		// Determine whether we can use simple nested loops...
    373 		if ( ndims < (obj->nfunctions) ) {
    374 			// So long as iteration for each respective array always moves in the same direction (i.e., no mixed sign strides), we can leverage cache-optimal (i.e., normal) nested loops without resorting to blocked iteration...
    375 			obj->functions[ ndims ]( arrays, fcn );
    376 			return 0;
    377 		}
    378 		// Fall-through to blocked iteration...
    379 	}
    380 	// At this point, we're either dealing with non-contiguous n-dimensional arrays, high dimensional n-dimensional arrays, and/or arrays having differing memory layouts, so our only hope is that we can still perform blocked iteration...
    381 
    382 	// Determine whether we can perform blocked iteration...
    383 	if ( ndims <= (obj->nblockedfunctions)+1 ) {
    384 		obj->blocked_functions[ ndims-2 ]( arrays, fcn );
    385 		return 0;
    386 	}
    387 	// Fall-through to linear view iteration without regard for how data is stored in memory (i.e., take the slow path)...
    388 	obj->functions[ (obj->nfunctions)-1 ]( arrays, fcn );
    389 
    390 	return 0;
    391 }