cbrt.c (6215B)
1 /** 2 * @license Apache-2.0 3 * 4 * Copyright (c) 2020 The Stdlib Authors. 5 * 6 * Licensed under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 19 #include "stdlib/math/base/special/cbrt.h" 20 #include "stdlib/math/base/assert/is_nan.h" 21 #include "stdlib/math/base/assert/is_infinite.h" 22 #include "stdlib/number/float64/base/get_high_word.h" 23 #include "stdlib/number/float64/base/set_high_word.h" 24 #include "stdlib/number/float64/base/from_words.h" 25 #include <stdint.h> 26 27 // 0x80000000 = 2147483648 => 1 00000000000 00000000000000000000 28 static const uint32_t SIGN_MASK = 2147483648; 29 30 // 0x7fffffff = 2147483647 => 0 11111111111 11111111111111111111 31 static const uint32_t ABS_MASK = 2147483647; 32 33 // 11111111111111111111111111111111 11000000000000000000000000000000 34 static const uint64_t MASK = 0xffffffffc0000000ULL; 35 36 // 2**54 37 static const double TWO_54 = 18014398509481984.0; 38 39 // B1 = (1023-1023/3-0.03306235651)*2**20 40 static const uint32_t B1 = 715094163; 41 42 // B2 = (1023-1023/3-54/3-0.03306235651)*2**20 43 static const uint32_t B2 = 696219795; 44 45 // 0x00100000 = 1048576 => 0 00000000001 00000000000000000000 46 static const uint32_t FLOAT64_SMALLEST_NORMAL_HIGH_WORD = 1048576; 47 48 // |1/cbrt(x) - p(x)| < 2**-23.5 (~[-7.93e-8, 7.929e-8]). 49 static const double P0 = 1.87595182427177009643; // 0x3ffe03e6, 0x0f61e692 50 static const double P1 = -1.88497979543377169875; // 0xbffe28e0, 0x92f02420 51 static const double P2 = 1.621429720105354466140; // 0x3ff9f160, 0x4a49d6c2 52 static const double P3 = -0.758397934778766047437; // 0xbfe844cb, 0xbee751d9 53 static const double P4 = 0.145996192886612446982; // 0x3fc2b000, 0xd4e4edd7 54 55 /** 56 * Evaluates a polynomial. 57 * 58 * ## Notes 59 * 60 * - The implementation uses [Horner's rule][horners-method] for efficient computation. 61 * 62 * [horners-method]: https://en.wikipedia.org/wiki/Horner%27s_method 63 * 64 * @param x value at which to evaluate the polynomial 65 * @returns evaluated polynomial 66 */ 67 static double polval( const double x ) { 68 if ( x == 0.0 ) { 69 return P0; 70 } 71 return P0 + (x * (P1 + (x * (P2 + (x * (P3 + (x * P4))))))); 72 } 73 74 /** 75 * Computes the cube root of a double-precision floating-point number. 76 * 77 * ## Method 78 * 79 * 1. Rough cube root to \\( 5 \\) bits: 80 * 81 * ```tex 82 * \sqrt\[3\]{2^e (1+m)} \approx 2^(e/3) \biggl(1 + \frac{(e \mathrm{mod}\ 3) + m}{3}\biggr) 83 * ``` 84 * 85 * where \\( e \\) is a nonnegative integer, \\( m \\) is real and in \\( \[0, 1) \\), and \\( / \\) and \\( \mathrm{mod} \\) are integer division and modulus with rounding toward \\( -\infty \\). 86 * 87 * The RHS is always greater than or equal to the LHS and has a maximum relative error of about \\( 1 \\) in \\( 16 \\). 88 * 89 * Adding a bias of \\( -0.03306235651 \\) to the \\( (e \mathrm{mod} 3+ m )/ 3 \\) term reduces the error to about \\( 1 \\) in \\( 32 \\). 90 * 91 * With the IEEE floating point representation, for finite positive normal values, ordinary integer division of the value in bits magically gives almost exactly the RHS of the above provided we first subtract the exponent bias (\\( 1023 \\) for doubles) and later add it back. 92 * 93 * We do the subtraction virtually to keep \\( e \geq 0 \\) so that ordinary integer division rounds toward \\( -\infty \\); this is also efficient. 94 * 95 * 2. New cube root to \\( 23 \\) bits: 96 * 97 * ```tex 98 * \sqrt[3]{x} = t \cdot \sqrt\[3\]{x/t^3} \approx t \mathrm{P}(t^3/x) 99 * ``` 100 * 101 * where \\( \mathrm{P}(r) \\) is a polynomial of degree \\( 4 \\) that approximates \\( 1 / \sqrt\[3\]{r} \\) to within \\( 2^{-23.5} \\) when \\( |r - 1| < 1/10 \\). 102 * 103 * The rough approximation has produced \\( t \\) such than \\( |t/sqrt\[3\]{x} - 1| \lesssim 1/32 \\), and cubing this gives us bounds for \\( r = t^3/x \\). 104 * 105 * 3. Round \\( t \\) away from \\( 0 \\) to \\( 23 \\) bits (sloppily except for ensuring that the result is larger in magnitude than \\( \sqrt\[3\]{x} \\) but not much more than \\( 2 \\) 23-bit ulps larger). 106 * 107 * With rounding toward zero, the error bound would be \\( \approx 5/6 \\) instead of \\( \approx 4/6 \\). 108 * 109 * With a maximum error of \\( 2 \\) 23-bit ulps in the rounded \\( t \\), the infinite-precision error in the Newton approximation barely affects the third digit in the final error \\( 0.667 \\); the error in the rounded \\( t \\) can be up to about \\( 3 \\) 23-bit ulps before the final error is larger than \\( 0.667 \\) ulps. 110 * 111 * 4. Perform one step of a Newton iteration to get \\( 53 \\) bits with an error of \\( < 0.667 \\) ulps. 112 * 113 * @param x number 114 * @return cube root 115 * 116 * @example 117 * double y = stdlib_base_cbrt( 27.0 ); 118 * // returns 3.0 119 */ 120 double stdlib_base_cbrt( const double x ) { 121 uint32_t sgn; 122 uint32_t hx; 123 uint32_t hw; 124 double t; 125 double r; 126 double s; 127 double w; 128 129 union { 130 double value; 131 uint64_t bits; 132 } u; 133 134 if ( 135 x == 0.0 || // handles +-0 136 stdlib_base_is_nan( x ) || 137 stdlib_base_is_infinite( x ) 138 ) { 139 return x; 140 } 141 stdlib_base_float64_get_high_word( x, &hx ); 142 sgn = hx & SIGN_MASK; 143 hx &= ABS_MASK; 144 145 // Rough cbrt... 146 if ( hx < FLOAT64_SMALLEST_NORMAL_HIGH_WORD ) { 147 t = TWO_54 * x; 148 stdlib_base_float64_get_high_word( t, &hw ); 149 hw = ( (hw&ABS_MASK)/3 ) + B2; 150 stdlib_base_float64_from_words( sgn|hw, 0, &t ); 151 } else { 152 t = 0.0; 153 hw = ( hx/3 ) + B1; 154 stdlib_base_float64_set_high_word( sgn|hw, &t ); 155 } 156 // New cbrt... 157 r = ( t*t ) * ( t/x ); 158 t *= polval( r ); 159 160 // Round `t` away from `0` to `23` bits... 161 u.value = t; 162 u.bits = (u.bits+0x80000000)&MASK; 163 t = u.value; 164 165 // Newton iteration... 166 s = t * t; // `t*t` is exact 167 r = x / s; // error `<= 0.5` ulps; `|r| < |t|` 168 w = t + t; // `t+t` is exact 169 r = ( r-t ) / ( w+r ); // `r-t` is exact; `w+r ~= 3*t` 170 t += t * r; // error `<= 0.5 + 0.5/3 + eps` 171 172 return t; 173 }