cbrt.c - time-to-botec - Benchmark sampling in different programming languages

cbrt.c (6215B)
      1 /**
      2 * @license Apache-2.0
      3 *
      4 * Copyright (c) 2020 The Stdlib Authors.
      5 *
      6 * Licensed under the Apache License, Version 2.0 (the "License");
      7 * you may not use this file except in compliance with the License.
      8 * You may obtain a copy of the License at
      9 *
     10 *    http://www.apache.org/licenses/LICENSE-2.0
     11 *
     12 * Unless required by applicable law or agreed to in writing, software
     13 * distributed under the License is distributed on an "AS IS" BASIS,
     14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     15 * See the License for the specific language governing permissions and
     16 * limitations under the License.
     17 */
     18 
     19 #include "stdlib/math/base/special/cbrt.h"
     20 #include "stdlib/math/base/assert/is_nan.h"
     21 #include "stdlib/math/base/assert/is_infinite.h"
     22 #include "stdlib/number/float64/base/get_high_word.h"
     23 #include "stdlib/number/float64/base/set_high_word.h"
     24 #include "stdlib/number/float64/base/from_words.h"
     25 #include <stdint.h>
     26 
     27 // 0x80000000 = 2147483648 => 1 00000000000 00000000000000000000
     28 static const uint32_t SIGN_MASK = 2147483648;
     29 
     30 // 0x7fffffff = 2147483647 => 0 11111111111 11111111111111111111
     31 static const uint32_t ABS_MASK = 2147483647;
     32 
     33 // 11111111111111111111111111111111 11000000000000000000000000000000
     34 static const uint64_t MASK = 0xffffffffc0000000ULL;
     35 
     36 // 2**54
     37 static const double TWO_54 = 18014398509481984.0;
     38 
     39 // B1 = (1023-1023/3-0.03306235651)*2**20
     40 static const uint32_t B1 = 715094163;
     41 
     42 // B2 = (1023-1023/3-54/3-0.03306235651)*2**20
     43 static const uint32_t B2 = 696219795;
     44 
     45 // 0x00100000 = 1048576 => 0 00000000001 00000000000000000000
     46 static const uint32_t FLOAT64_SMALLEST_NORMAL_HIGH_WORD = 1048576;
     47 
     48 // |1/cbrt(x) - p(x)| < 2**-23.5 (~[-7.93e-8, 7.929e-8]).
     49 static const double P0 = 1.87595182427177009643;   // 0x3ffe03e6, 0x0f61e692
     50 static const double P1 = -1.88497979543377169875;  // 0xbffe28e0, 0x92f02420
     51 static const double P2 = 1.621429720105354466140;  // 0x3ff9f160, 0x4a49d6c2
     52 static const double P3 = -0.758397934778766047437; // 0xbfe844cb, 0xbee751d9
     53 static const double P4 = 0.145996192886612446982;  // 0x3fc2b000, 0xd4e4edd7
     54 
     55 /**
     56 * Evaluates a polynomial.
     57 *
     58 * ## Notes
     59 *
     60 * -   The implementation uses [Horner's rule][horners-method] for efficient computation.
     61 *
     62 * [horners-method]: https://en.wikipedia.org/wiki/Horner%27s_method
     63 *
     64 * @param x    value at which to evaluate the polynomial
     65 * @returns    evaluated polynomial
     66 */
     67 static double polval( const double x ) {
     68 	if ( x == 0.0 ) {
     69 		return P0;
     70 	}
     71 	return P0 + (x * (P1 + (x * (P2 + (x * (P3 + (x * P4)))))));
     72 }
     73 
     74 /**
     75 * Computes the cube root of a double-precision floating-point number.
     76 *
     77 * ## Method
     78 *
     79 * 1.  Rough cube root to \\( 5 \\) bits:
     80 *
     81 *     ```tex
     82 *     \sqrt\[3\]{2^e (1+m)} \approx 2^(e/3) \biggl(1 + \frac{(e \mathrm{mod}\ 3) + m}{3}\biggr)
     83 *     ```
     84 *
     85 *     where \\( e \\) is a nonnegative integer, \\( m \\) is real and in \\( \[0, 1) \\), and \\( / \\) and \\( \mathrm{mod} \\) are integer division and modulus with rounding toward \\( -\infty \\).
     86 *
     87 *     The RHS is always greater than or equal to the LHS and has a maximum relative error of about \\( 1 \\) in \\( 16 \\).
     88 *
     89 *     Adding a bias of \\( -0.03306235651 \\) to the \\( (e \mathrm{mod} 3+ m )/ 3 \\) term reduces the error to about \\( 1 \\) in \\( 32 \\).
     90 *
     91 *     With the IEEE floating point representation, for finite positive normal values, ordinary integer division of the value in bits magically gives almost exactly the RHS of the above provided we first subtract the exponent bias (\\( 1023 \\) for doubles) and later add it back.
     92 *
     93 *     We do the subtraction virtually to keep \\( e \geq 0 \\) so that ordinary integer division rounds toward \\( -\infty \\); this is also efficient.
     94 *
     95 * 2.  New cube root to \\( 23 \\) bits:
     96 *
     97 *     ```tex
     98 *     \sqrt[3]{x} = t \cdot \sqrt\[3\]{x/t^3} \approx t \mathrm{P}(t^3/x)
     99 *     ```
    100 *
    101 *     where \\( \mathrm{P}(r) \\) is a polynomial of degree \\( 4 \\) that approximates \\( 1 / \sqrt\[3\]{r} \\) to within \\( 2^{-23.5} \\) when \\( |r - 1| < 1/10 \\).
    102 *
    103 *     The rough approximation has produced \\( t \\) such than \\( |t/sqrt\[3\]{x} - 1| \lesssim 1/32 \\), and cubing this gives us bounds for \\( r = t^3/x \\).
    104 *
    105 * 3.  Round \\( t \\) away from \\( 0 \\) to \\( 23 \\) bits (sloppily except for ensuring that the result is larger in magnitude than \\( \sqrt\[3\]{x} \\) but not much more than \\( 2 \\) 23-bit ulps larger).
    106 *
    107 *     With rounding toward zero, the error bound would be \\( \approx 5/6 \\) instead of \\( \approx 4/6 \\).
    108 *
    109 *     With a maximum error of \\( 2 \\) 23-bit ulps in the rounded \\( t \\), the infinite-precision error in the Newton approximation barely affects the third digit in the final error \\( 0.667 \\); the error in the rounded \\( t \\) can be up to about \\( 3 \\) 23-bit ulps before the final error is larger than \\( 0.667 \\) ulps.
    110 *
    111 * 4.  Perform one step of a Newton iteration to get \\( 53 \\) bits with an error of \\( < 0.667 \\) ulps.
    112 *
    113 * @param x       number
    114 * @return        cube root
    115 *
    116 * @example
    117 * double y = stdlib_base_cbrt( 27.0 );
    118 * // returns 3.0
    119 */
    120 double stdlib_base_cbrt( const double x ) {
    121 	uint32_t sgn;
    122 	uint32_t hx;
    123 	uint32_t hw;
    124 	double t;
    125 	double r;
    126 	double s;
    127 	double w;
    128 
    129 	union {
    130 		double value;
    131 		uint64_t bits;
    132 	} u;
    133 
    134 	if (
    135 		x == 0.0 || // handles +-0
    136 		stdlib_base_is_nan( x ) ||
    137 		stdlib_base_is_infinite( x )
    138 	) {
    139 		return x;
    140 	}
    141 	stdlib_base_float64_get_high_word( x, &hx );
    142 	sgn = hx & SIGN_MASK;
    143 	hx &= ABS_MASK;
    144 
    145 	// Rough cbrt...
    146 	if ( hx < FLOAT64_SMALLEST_NORMAL_HIGH_WORD ) {
    147 		t = TWO_54 * x;
    148 		stdlib_base_float64_get_high_word( t, &hw );
    149 		hw = ( (hw&ABS_MASK)/3 ) + B2;
    150 		stdlib_base_float64_from_words( sgn|hw, 0, &t );
    151 	} else {
    152 		t = 0.0;
    153 		hw = ( hx/3 ) + B1;
    154 		stdlib_base_float64_set_high_word( sgn|hw, &t );
    155 	}
    156 	// New cbrt...
    157 	r = ( t*t ) * ( t/x );
    158 	t *= polval( r );
    159 
    160 	// Round `t` away from `0` to `23` bits...
    161 	u.value = t;
    162 	u.bits = (u.bits+0x80000000)&MASK;
    163 	t = u.value;
    164 
    165 	// Newton iteration...
    166 	s = t * t;             // `t*t` is exact
    167 	r = x / s;             // error `<= 0.5` ulps; `|r| < |t|`
    168 	w = t + t;             // `t+t` is exact
    169 	r = ( r-t ) / ( w+r ); // `r-t` is exact; `w+r ~= 3*t`
    170 	t += t * r;            // error `<= 0.5 + 0.5/3 + eps`
    171 
    172 	return t;
    173 }
	time-to-botec Benchmark sampling in different programming languages
	Log \| Files \| Refs \| README