time-to-botec

Benchmark sampling in different programming languages
Log | Files | Refs | README

sdsdot.c (2328B)


      1 /**
      2 * @license Apache-2.0
      3 *
      4 * Copyright (c) 2020 The Stdlib Authors.
      5 *
      6 * Licensed under the Apache License, Version 2.0 (the "License");
      7 * you may not use this file except in compliance with the License.
      8 * You may obtain a copy of the License at
      9 *
     10 *    http://www.apache.org/licenses/LICENSE-2.0
     11 *
     12 * Unless required by applicable law or agreed to in writing, software
     13 * distributed under the License is distributed on an "AS IS" BASIS,
     14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     15 * See the License for the specific language governing permissions and
     16 * limitations under the License.
     17 */
     18 
     19 /**
     20  * Compute the dot product of two single-precision floating-point vectors with extended accumulation.
     21  *
     22  * @see <a href="http://www.netlib.org/lapack/expolore-html/df/d28/group__single__blas__level1.html">sdsdot</a>
     23  */
     24 #include "stdlib/blas/base/sdsdot.h"
     25 
     26 /**
     27 * Computes the dot product of two single-precision floating-point vectors with extended accumulation.
     28 *
     29 * @param N        number of values over which to compute the dot product
     30 * @param scalar   scalar constant added to the dot product
     31 * @param X        first array
     32 * @param strideX  X stride length
     33 * @param Y        second array
     34 * @param strideY  Y stride length
     35 * @returns        the dot product of X and Y
     36 */
     37 float c_sdsdot( const int N, const float scalar, const float *X, const int strideX, const float *Y, const int strideY ) {
     38 	double dot;
     39 	int ix;
     40 	int iy;
     41 	int m;
     42 	int i;
     43 
     44 	dot = (double)scalar;
     45 	if ( N <= 0 ) {
     46 		return dot;
     47 	}
     48 	// If both strides are equal to `1`, use unrolled loops...
     49 	if ( strideX == 1 && strideY == 1 ) {
     50 		m = N % 5;
     51 
     52 		// If we have a remainder, do a clean-up loop...
     53 		if ( m > 0 ) {
     54 			for ( i = 0; i < m; i++ ) {
     55 				dot += (double)X[ i ] * (double)Y[ i ];
     56 			}
     57 		}
     58 		if ( N < 5 ) {
     59 			return dot;
     60 		}
     61 		for ( i = m; i < N; i += 5 ) {
     62 			dot += ( (double)X[i]*(double)Y[i] ) + ( (double)X[i+1]*(double)Y[i+1]) + ( (double)X[i+2]*(double)Y[i+2] ) + ( (double)X[i+3]*(double)Y[i+3] ) + ( (double)X[i+4]*(double)Y[i+4] );
     63 		}
     64 		return dot;
     65 	}
     66 	if ( strideX < 0 ) {
     67 		ix = (1-N) * strideX;
     68 	} else {
     69 		ix = 0;
     70 	}
     71 	if ( strideY < 0 ) {
     72 		iy = (1-N) * strideY;
     73 	} else {
     74 		iy = 0;
     75 	}
     76 	for ( i = 0; i < N; i++ ) {
     77 		dot += (double)X[ ix ] * (double)Y[ iy ];
     78 		ix += strideX;
     79 		iy += strideY;
     80 	}
     81 	return dot;
     82 }
     83