cbrt.cpp | Explore in Territory

//===-- Implementation of cbrt function -----------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/math/cbrt.h"
#include "hdr/fenv_macros.h"
#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/PolyEval.h"
#include "src/__support/FPUtil/double_double.h"
#include "src/__support/FPUtil/dyadic_float.h"
#include "src/__support/FPUtil/multiply_add.h"
#include "src/__support/common.h"
#include "src/__support/integer_literals.h"
#include "src/__support/macros/config.h"
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY

#if ((LIBC_MATH & LIBC_MATH_SKIP_ACCURATE_PASS) != 0)
#define LIBC_MATH_CBRT_SKIP_ACCURATE_PASS
#endif

namespace LIBC_NAMESPACE_DECL {

DoubleDouble;
Float128;

namespace {

// Initial approximation of x^(-2/3) for 1 <= x < 2.
// Polynomial generated by Sollya with:
// > P = fpminimax(x^(-2/3), 7, [|D...|], [1, 2]);
// > dirtyinfnorm(P/x^(-2/3) - 1, [1, 2]);
// 0x1.28...p-21
double intial_approximation(double x) { … }

// Get the error term for Newton iteration:
//   h(x) = x^3 * a^2 - 1,
#ifdef LIBC_TARGET_CPU_HAS_FMA
double get_error(const DoubleDouble &x_3, const DoubleDouble &a_sq) { … }
#else
double get_error(const DoubleDouble &x_3, const DoubleDouble &a_sq) {
  DoubleDouble x_3_a_sq = fputil::quick_mult(a_sq, x_3);
  return (x_3_a_sq.hi - 1.0) + x_3_a_sq.lo;
}
#endif

} // anonymous namespace

// Correctly rounded cbrt algorithm:
//
// === Step 1 - Range reduction ===
// For x = (-1)^s * 2^e * (1.m), we get 2 reduced arguments x_r and a as:
//   x_r = 1.m
//   a   = (-1)^s * 2^(e % 3) * (1.m)
// Then cbrt(x) = x^(1/3) can be computed as:
//   x^(1/3) = 2^(e / 3) * a^(1/3).
//
// In order to avoid division, we compute a^(-2/3) using Newton method and then
// multiply the results by a:
//   a^(1/3) = a * a^(-2/3).
//
// === Step 2 - First approximation to a^(-2/3) ===
// First, we use a degree-7 minimax polynomial generated by Sollya to
// approximate x_r^(-2/3) for 1 <= x_r < 2.
//   p = P(x_r) ~ x_r^(-2/3),
// with relative errors bounded by:
//   | p / x_r^(-2/3) - 1 | < 1.16 * 2^-21.
//
// Then we multiply with 2^(e % 3) from a small lookup table to get:
//   x_0 = 2^(-2*(e % 3)/3) * p
//       ~ 2^(-2*(e % 3)/3) * x_r^(-2/3)
//       = a^(-2/3)
// With relative errors:
//   | x_0 / a^(-2/3) - 1 | < 1.16 * 2^-21.
// This step is done in double precision.
//
// === Step 3 - First Newton iteration ===
// We follow the method described in:
//   Sibidanov, A. and Zimmermann, P., "Correctly rounded cubic root evaluation
//   in double precision", https://core-math.gitlabpages.inria.fr/cbrt64.pdf
// to derive multiplicative Newton iterations as below:
// Let x_n be the nth approximation to a^(-2/3).  Define the n^th error as:
//   h_n = x_n^3 * a^2 - 1
// Then:
//   a^(-2/3) = x_n / (1 + h_n)^(1/3)
//            = x_n * (1 - (1/3) * h_n + (2/9) * h_n^2 - (14/81) * h_n^3 + ...)
// using the Taylor series expansion of (1 + h_n)^(-1/3).
//
// Apply to x_0 above:
//   h_0 = x_0^3 * a^2 - 1
//       = a^2 * (x_0 - a^(-2/3)) * (x_0^2 + x_0 * a^(-2/3) + a^(-4/3)),
// it's bounded by:
//   |h_0| < 4 * 3 * 1.16 * 2^-21 * 4 < 2^-17.
// So in the first iteration step, we use:
//   x_1 = x_0 * (1 - (1/3) * h_n + (2/9) * h_n^2 - (14/81) * h_n^3)
// Its relative error is bounded by:
//   | x_1 / a^(-2/3) - 1 | < 35/242 * |h_0|^4 < 2^-70.
// Then we perform Ziv's rounding test and check if the answer is exact.
// This step is done in double-double precision.
//
// === Step 4 - Second Newton iteration ===
// If the Ziv's rounding test from the previous step fails, we define the error
// term:
//   h_1 = x_1^3 * a^2 - 1,
// And perform another iteration:
//   x_2 = x_1 * (1 - h_1 / 3)
// with the relative errors exceed the precision of double-double.
// We then check the Ziv's accuracy test with relative errors < 2^-102 to
// compensate for rounding errors.
//
// === Step 5 - Final iteration ===
// If the Ziv's accuracy test from the previous step fails, we perform another
// iteration in 128-bit precision and check for exact outputs.
//
// TODO: It is possible to replace this costly computation step with special
// exceptional handling, similar to what was done in the CORE-MATH project:
// https://gitlab.inria.fr/core-math/core-math/-/blob/master/src/binary64/cbrt/cbrt.c

LLVM_LIBC_FUNCTION(double, cbrt, (double x)) { … }

} // namespace LIBC_NAMESPACE_DECL
llvm/libc/src/math/generic/cbrt.cpp