daxpy.c - time-to-botec - Benchmark sampling in different programming languages

daxpy.c (1902B)
      1 /**
      2 * @license Apache-2.0
      3 *
      4 * Copyright (c) 2018 The Stdlib Authors.
      5 *
      6 * Licensed under the Apache License, Version 2.0 (the "License");
      7 * you may not use this file except in compliance with the License.
      8 * You may obtain a copy of the License at
      9 *
     10 *    http://www.apache.org/licenses/LICENSE-2.0
     11 *
     12 * Unless required by applicable law or agreed to in writing, software
     13 * distributed under the License is distributed on an "AS IS" BASIS,
     14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     15 * See the License for the specific language governing permissions and
     16 * limitations under the License.
     17 */
     18 
     19 #include "stdlib/blas/base/daxpy.h"
     20 
     21 /**
     22 * Multiplies a vector `X` by a constant and adds the result to `Y`.
     23 *
     24 * @param N        number of elements
     25 * @param alpha    scalar
     26 * @param X        input array
     27 * @param strideX  X stride length
     28 * @param Y        destination array
     29 * @param strideY  Y stride length
     30 */
     31 void c_daxpy( const int N, const double alpha, const double *X, const int strideX, double *Y, const int strideY ) {
     32 	int ix;
     33 	int iy;
     34 	int i;
     35 	int m;
     36 
     37 	if ( N <= 0 ) {
     38 		return;
     39 	}
     40 	// If `alpha` is `0`, then `y` is unchanged...
     41 	if ( alpha == 0.0 ) {
     42 		return;
     43 	}
     44 	// If both strides are equal to `1`, use unrolled loops...
     45 	if ( strideX == 1 && strideY == 1 ) {
     46 		m = N % 4;
     47 
     48 		// If we have a remainder, do a clean-up loop...
     49 		if ( m > 0 ) {
     50 			for ( i = 0; i < m; i++ ) {
     51 				Y[ i ] += alpha * X[ i ];
     52 			}
     53 			if ( N < 4 ) {
     54 				return;
     55 			}
     56 		}
     57 		for ( i = m; i < N; i += 4 ) {
     58 			Y[ i ] += alpha * X[ i ];
     59 			Y[ i+1 ] += alpha * X[ i+1 ];
     60 			Y[ i+2 ] += alpha * X[ i+2 ];
     61 			Y[ i+3 ] += alpha * X[ i+3 ];
     62 		}
     63 		return;
     64 	}
     65 	if ( strideX < 0 ) {
     66 		ix = (1-N) * strideX;
     67 	} else {
     68 		ix = 0;
     69 	}
     70 	if ( strideY < 0 ) {
     71 		iy = (1-N) * strideY;
     72 	} else {
     73 		iy = 0;
     74 	}
     75 	for ( i = 0; i < N; i++ ) {
     76 		Y[ iy ] += alpha * X[ ix ];
     77 		ix += strideX;
     78 		iy += strideY;
     79 	}
     80 	return;
     81 }
	time-to-botec Benchmark sampling in different programming languages
	Log \| Files \| Refs \| README