//===-- Implementation of cbrt function -----------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "src/math/cbrt.h" #include "hdr/fenv_macros.h" #include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/PolyEval.h" #include "src/__support/FPUtil/double_double.h" #include "src/__support/FPUtil/dyadic_float.h" #include "src/__support/FPUtil/multiply_add.h" #include "src/__support/common.h" #include "src/__support/integer_literals.h" #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #if ((LIBC_MATH & LIBC_MATH_SKIP_ACCURATE_PASS) != 0) #define LIBC_MATH_CBRT_SKIP_ACCURATE_PASS #endif namespace LIBC_NAMESPACE_DECL { DoubleDouble; Float128; namespace { // Initial approximation of x^(-2/3) for 1 <= x < 2. // Polynomial generated by Sollya with: // > P = fpminimax(x^(-2/3), 7, [|D...|], [1, 2]); // > dirtyinfnorm(P/x^(-2/3) - 1, [1, 2]); // 0x1.28...p-21 double intial_approximation(double x) { … } // Get the error term for Newton iteration: // h(x) = x^3 * a^2 - 1, #ifdef LIBC_TARGET_CPU_HAS_FMA double get_error(const DoubleDouble &x_3, const DoubleDouble &a_sq) { … } #else double get_error(const DoubleDouble &x_3, const DoubleDouble &a_sq) { DoubleDouble x_3_a_sq = fputil::quick_mult(a_sq, x_3); return (x_3_a_sq.hi - 1.0) + x_3_a_sq.lo; } #endif } // anonymous namespace // Correctly rounded cbrt algorithm: // // === Step 1 - Range reduction === // For x = (-1)^s * 2^e * (1.m), we get 2 reduced arguments x_r and a as: // x_r = 1.m // a = (-1)^s * 2^(e % 3) * (1.m) // Then cbrt(x) = x^(1/3) can be computed as: // x^(1/3) = 2^(e / 3) * a^(1/3). // // In order to avoid division, we compute a^(-2/3) using Newton method and then // multiply the results by a: // a^(1/3) = a * a^(-2/3). // // === Step 2 - First approximation to a^(-2/3) === // First, we use a degree-7 minimax polynomial generated by Sollya to // approximate x_r^(-2/3) for 1 <= x_r < 2. // p = P(x_r) ~ x_r^(-2/3), // with relative errors bounded by: // | p / x_r^(-2/3) - 1 | < 1.16 * 2^-21. // // Then we multiply with 2^(e % 3) from a small lookup table to get: // x_0 = 2^(-2*(e % 3)/3) * p // ~ 2^(-2*(e % 3)/3) * x_r^(-2/3) // = a^(-2/3) // With relative errors: // | x_0 / a^(-2/3) - 1 | < 1.16 * 2^-21. // This step is done in double precision. // // === Step 3 - First Newton iteration === // We follow the method described in: // Sibidanov, A. and Zimmermann, P., "Correctly rounded cubic root evaluation // in double precision", https://core-math.gitlabpages.inria.fr/cbrt64.pdf // to derive multiplicative Newton iterations as below: // Let x_n be the nth approximation to a^(-2/3). Define the n^th error as: // h_n = x_n^3 * a^2 - 1 // Then: // a^(-2/3) = x_n / (1 + h_n)^(1/3) // = x_n * (1 - (1/3) * h_n + (2/9) * h_n^2 - (14/81) * h_n^3 + ...) // using the Taylor series expansion of (1 + h_n)^(-1/3). // // Apply to x_0 above: // h_0 = x_0^3 * a^2 - 1 // = a^2 * (x_0 - a^(-2/3)) * (x_0^2 + x_0 * a^(-2/3) + a^(-4/3)), // it's bounded by: // |h_0| < 4 * 3 * 1.16 * 2^-21 * 4 < 2^-17. // So in the first iteration step, we use: // x_1 = x_0 * (1 - (1/3) * h_n + (2/9) * h_n^2 - (14/81) * h_n^3) // Its relative error is bounded by: // | x_1 / a^(-2/3) - 1 | < 35/242 * |h_0|^4 < 2^-70. // Then we perform Ziv's rounding test and check if the answer is exact. // This step is done in double-double precision. // // === Step 4 - Second Newton iteration === // If the Ziv's rounding test from the previous step fails, we define the error // term: // h_1 = x_1^3 * a^2 - 1, // And perform another iteration: // x_2 = x_1 * (1 - h_1 / 3) // with the relative errors exceed the precision of double-double. // We then check the Ziv's accuracy test with relative errors < 2^-102 to // compensate for rounding errors. // // === Step 5 - Final iteration === // If the Ziv's accuracy test from the previous step fails, we perform another // iteration in 128-bit precision and check for exact outputs. // // TODO: It is possible to replace this costly computation step with special // exceptional handling, similar to what was done in the CORE-MATH project: // https://gitlab.inria.fr/core-math/core-math/-/blob/master/src/binary64/cbrt/cbrt.c LLVM_LIBC_FUNCTION(double, cbrt, (double x)) { … } } // namespace LIBC_NAMESPACE_DECL