dcusumpw.c (2457B)
1 /** 2 * @license Apache-2.0 3 * 4 * Copyright (c) 2020 The Stdlib Authors. 5 * 6 * Licensed under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 19 #include "stdlib/blas/ext/base/dcusumpw.h" 20 #include <stdint.h> 21 22 /** 23 * Computes the cumulative sum of double-precision floating-point strided array elements using pairwise summation. 24 * 25 * ## Method 26 * 27 * - This implementation uses pairwise summation, which accrues rounding error `O(log2 N)` instead of `O(N)`. The recursion depth is also `O(log2 N)`. 28 * 29 * ## References 30 * 31 * - Higham, Nicholas J. 1993. "The Accuracy of Floating Point Summation." _SIAM Journal on Scientific Computing_ 14 (4): 783–99. doi:[10.1137/0914050](https://doi.org/10.1137/0914050). 32 * 33 * @param N number of indexed elements 34 * @param sum initial sum 35 * @param X input array 36 * @param strideX X stride length 37 * @param Y output array 38 * @param strideY Y stride length 39 */ 40 void stdlib_strided_dcusumpw( const int64_t N, const double sum, const double *X, const int64_t strideX, double *Y, const int64_t strideY ) { 41 double *xp1; 42 double *xp2; 43 double *yp1; 44 double *yp2; 45 int64_t ix; 46 int64_t iy; 47 int64_t i; 48 int64_t n; 49 double s; 50 51 if ( N <= 0 ) { 52 return; 53 } 54 if ( strideX < 0 ) { 55 ix = (1-N) * strideX; 56 } else { 57 ix = 0; 58 } 59 if ( strideY < 0 ) { 60 iy = (1-N) * strideY; 61 } else { 62 iy = 0; 63 } 64 // Blocksize for pairwise summation... 65 if ( N <= 128 ) { 66 s = 0.0; 67 for ( i = 0; i < N; i++ ) { 68 s += X[ ix ]; 69 Y[ iy ] = sum + s; 70 ix += strideX; 71 iy += strideY; 72 } 73 return; 74 } 75 n = N / 2; 76 if ( strideX < 0 ) { 77 xp1 = (double *)X + ( (n-N)*strideX ); 78 xp2 = (double *)X; 79 } else { 80 xp1 = (double *)X; 81 xp2 = (double *)X + ( n*strideX ); 82 } 83 if ( strideY < 0 ) { 84 yp1 = Y + ( (n-N)*strideY ); 85 yp2 = Y; 86 } else { 87 yp1 = Y; 88 yp2 = Y + ( n*strideY ); 89 } 90 stdlib_strided_dcusumpw( n, sum, xp1, strideX, yp1, strideY ); 91 iy += (n-1) * strideY; 92 stdlib_strided_dcusumpw( N-n, Y[ iy ], xp2, strideX, yp2, strideY ); 93 return; 94 }