sdsdot.c (2328B)
1 /** 2 * @license Apache-2.0 3 * 4 * Copyright (c) 2020 The Stdlib Authors. 5 * 6 * Licensed under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 19 /** 20 * Compute the dot product of two single-precision floating-point vectors with extended accumulation. 21 * 22 * @see <a href="http://www.netlib.org/lapack/expolore-html/df/d28/group__single__blas__level1.html">sdsdot</a> 23 */ 24 #include "stdlib/blas/base/sdsdot.h" 25 26 /** 27 * Computes the dot product of two single-precision floating-point vectors with extended accumulation. 28 * 29 * @param N number of values over which to compute the dot product 30 * @param scalar scalar constant added to the dot product 31 * @param X first array 32 * @param strideX X stride length 33 * @param Y second array 34 * @param strideY Y stride length 35 * @returns the dot product of X and Y 36 */ 37 float c_sdsdot( const int N, const float scalar, const float *X, const int strideX, const float *Y, const int strideY ) { 38 double dot; 39 int ix; 40 int iy; 41 int m; 42 int i; 43 44 dot = (double)scalar; 45 if ( N <= 0 ) { 46 return dot; 47 } 48 // If both strides are equal to `1`, use unrolled loops... 49 if ( strideX == 1 && strideY == 1 ) { 50 m = N % 5; 51 52 // If we have a remainder, do a clean-up loop... 53 if ( m > 0 ) { 54 for ( i = 0; i < m; i++ ) { 55 dot += (double)X[ i ] * (double)Y[ i ]; 56 } 57 } 58 if ( N < 5 ) { 59 return dot; 60 } 61 for ( i = m; i < N; i += 5 ) { 62 dot += ( (double)X[i]*(double)Y[i] ) + ( (double)X[i+1]*(double)Y[i+1]) + ( (double)X[i+2]*(double)Y[i+2] ) + ( (double)X[i+3]*(double)Y[i+3] ) + ( (double)X[i+4]*(double)Y[i+4] ); 63 } 64 return dot; 65 } 66 if ( strideX < 0 ) { 67 ix = (1-N) * strideX; 68 } else { 69 ix = 0; 70 } 71 if ( strideY < 0 ) { 72 iy = (1-N) * strideY; 73 } else { 74 iy = 0; 75 } 76 for ( i = 0; i < N; i++ ) { 77 dot += (double)X[ ix ] * (double)Y[ iy ]; 78 ix += strideX; 79 iy += strideY; 80 } 81 return dot; 82 } 83