range_reduction_double_common.h | Explore in Territory

//===-- Range reduction for double precision sin/cos/tan -*- C++ --------*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_DOUBLE_COMMON_H
#define LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_DOUBLE_COMMON_H

#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/double_double.h"
#include "src/__support/FPUtil/dyadic_float.h"
#include "src/__support/FPUtil/multiply_add.h"
#include "src/__support/FPUtil/nearest_integer.h"
#include "src/__support/common.h"
#include "src/__support/integer_literals.h"
#include "src/__support/macros/config.h"

#ifdef LIBC_TARGET_CPU_HAS_FMA
#include "range_reduction_double_fma.h"

// With FMA, we limit the maxmimum exponent to be 2^16, so that the error bound
// from the fma::range_reduction_small is bounded by 2^-88 instead of 2^-72.
#define FAST_PASS_EXPONENT …
using LIBC_NAMESPACE::fma::ONE_TWENTY_EIGHT_OVER_PI;
using LIBC_NAMESPACE::fma::range_reduction_small;
using LIBC_NAMESPACE::fma::SIN_K_PI_OVER_128;

LIBC_INLINE constexpr bool NO_FMA = false;
#else
#include "range_reduction_double_nofma.h"

FAST_PASS_EXPONENT;
ONE_TWENTY_EIGHT_OVER_PI;
range_reduction_small;
SIN_K_PI_OVER_128;

LIBC_INLINE constexpr bool NO_FMA = …;
#endif // LIBC_TARGET_CPU_HAS_FMA

namespace LIBC_NAMESPACE_DECL {

namespace generic {

DoubleDouble;
Float128;

LIBC_INLINE constexpr Float128 PI_OVER_128_F128 = …;

// Note: The look-up tables ONE_TWENTY_EIGHT_OVER_PI is selected to be either
// from fma:: or nofma:: namespace.

// For large range |x| >= 2^32, we use the exponent of x to find 3 double-chunks
// of 128/pi c_hi, c_mid, c_lo such that:
//   1) ulp(round(x * c_hi, D, RN)) >= 256,
//   2) If x * c_hi = ph_hi + ph_lo and x * c_mid = pm_hi + pm_lo, then
//        min(ulp(ph_lo), ulp(pm_hi)) >= 2^-53.
//   3) ulp(round(x * c_lo, D, RN)) <= 2^-7x.
// This will allow us to do quick computations as:
//   (x * 256/pi) ~ x * (c_hi + c_mid + c_lo)    (mod 256)
//                ~ ph_lo + pm_hi + pm_lo + (x * c_lo)
// Then,
//   round(x * 128/pi) = round(ph_lo + pm_hi)    (mod 256)
// And the high part of fractional part of (x * 128/pi) can simply be:
//   {x * 128/pi}_hi = {ph_lo + pm_hi}.
// To prevent overflow when x is very large, we simply scale up
// (c_hi, c_mid, c_lo) by a fixed power of 2 (based on the index) and scale down
// x by the same amount.

template <bool NO_FMA> struct LargeRangeReduction { … };

LIBC_INLINE Float128 range_reduction_small_f128(double x) { … }

LIBC_INLINE constexpr Float128 SIN_K_PI_OVER_128_F128[65] = …;

} // namespace generic

} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_DOUBLE_COMMON_H
llvm/libc/src/math/generic/range_reduction_double_common.h