str_to_float.h | Explore in Territory

//===-- String to float conversion utils ------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC___SUPPORT_STR_TO_FLOAT_H
#define LLVM_LIBC_SRC___SUPPORT_STR_TO_FLOAT_H

#include "src/__support/CPP/bit.h"
#include "src/__support/CPP/limits.h"
#include "src/__support/CPP/optional.h"
#include "src/__support/CPP/string_view.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/rounding_mode.h"
#include "src/__support/common.h"
#include "src/__support/ctype_utils.h"
#include "src/__support/detailed_powers_of_ten.h"
#include "src/__support/high_precision_decimal.h"
#include "src/__support/macros/config.h"
#include "src/__support/str_to_integer.h"
#include "src/__support/str_to_num_result.h"
#include "src/__support/uint128.h"
#include "src/errno/libc_errno.h" // For ERANGE

#include <stdint.h>

namespace LIBC_NAMESPACE_DECL {
namespace internal {

template <class T> struct ExpandedFloat { … };

template <class T> struct FloatConvertReturn { … };

LIBC_INLINE uint64_t low64(const UInt128 &num) { … }

LIBC_INLINE uint64_t high64(const UInt128 &num) { … }

template <class T> LIBC_INLINE void set_implicit_bit(fputil::FPBits<T> &) { … }

#if defined(LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80)
template <>
LIBC_INLINE void
set_implicit_bit<long double>(fputil::FPBits<long double> &result) { … }
#endif // LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80

// This Eisel-Lemire implementation is based on the algorithm described in the
// paper Number Parsing at a Gigabyte per Second, Software: Practice and
// Experience 51 (8), 2021 (https://arxiv.org/abs/2101.11408), as well as the
// description by Nigel Tao
// (https://nigeltao.github.io/blog/2020/eisel-lemire.html) and the golang
// implementation, also by Nigel Tao
// (https://github.com/golang/go/blob/release-branch.go1.16/src/strconv/eisel_lemire.go#L25)
// for some optimizations as well as handling 32 bit floats.
template <class T>
LIBC_INLINE cpp::optional<ExpandedFloat<T>>
eisel_lemire(ExpandedFloat<T> init_num,
             RoundDirection round = RoundDirection::Nearest) { … }

#if !defined(LIBC_TYPES_LONG_DOUBLE_IS_FLOAT64)
template <>
LIBC_INLINE cpp::optional<ExpandedFloat<long double>>
eisel_lemire<long double>(ExpandedFloat<long double> init_num,
                          RoundDirection round) { … }
#endif // !defined(LIBC_TYPES_LONG_DOUBLE_IS_FLOAT64)

// The nth item in POWERS_OF_TWO represents the greatest power of two less than
// 10^n. This tells us how much we can safely shift without overshooting.
constexpr uint8_t POWERS_OF_TWO[19] = …;
constexpr int32_t NUM_POWERS_OF_TWO = …;

// Takes a mantissa and base 10 exponent and converts it into its closest
// floating point type T equivalent. This is the fallback algorithm used when
// the Eisel-Lemire algorithm fails, it's slower but more accurate. It's based
// on the Simple Decimal Conversion algorithm by Nigel Tao, described at this
// link: https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html
template <class T>
LIBC_INLINE FloatConvertReturn<T> simple_decimal_conversion(
    const char *__restrict numStart,
    const size_t num_len = cpp::numeric_limits<size_t>::max(),
    RoundDirection round = RoundDirection::Nearest) { … }

// This class is used for templating the constants for Clinger's Fast Path,
// described as a method of approximation in
// Clinger WD. How to Read Floating Point Numbers Accurately. SIGPLAN Not 1990
// Jun;25(6):92–101. https://doi.org/10.1145/93548.93557.
// As well as the additions by Gay that extend the useful range by the number of
// exact digits stored by the float type, described in
// Gay DM, Correctly rounded binary-decimal and decimal-binary conversions;
// 1990. AT&T Bell Laboratories Numerical Analysis Manuscript 90-10.
template <class T> class ClingerConsts;

template <> class ClingerConsts<float> { … };

template <> class ClingerConsts<double> { … };

#if defined(LIBC_TYPES_LONG_DOUBLE_IS_FLOAT64)
template <> class ClingerConsts<long double> {
public:
  static constexpr long double POWERS_OF_TEN_ARRAY[] = {
      1e0,  1e1,  1e2,  1e3,  1e4,  1e5,  1e6,  1e7,  1e8,  1e9,  1e10, 1e11,
      1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22};
  static constexpr int32_t EXACT_POWERS_OF_TEN =
      ClingerConsts<double>::EXACT_POWERS_OF_TEN;
  static constexpr int32_t DIGITS_IN_MANTISSA =
      ClingerConsts<double>::DIGITS_IN_MANTISSA;
  static constexpr long double MAX_EXACT_INT =
      ClingerConsts<double>::MAX_EXACT_INT;
};
#elif defined(LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80)
template <> class ClingerConsts<long double> { … };
#elif defined(LIBC_TYPES_LONG_DOUBLE_IS_FLOAT128)
template <> class ClingerConsts<long double> {
public:
  static constexpr long double POWERS_OF_TEN_ARRAY[] = {
      1e0L,  1e1L,  1e2L,  1e3L,  1e4L,  1e5L,  1e6L,  1e7L,  1e8L,  1e9L,
      1e10L, 1e11L, 1e12L, 1e13L, 1e14L, 1e15L, 1e16L, 1e17L, 1e18L, 1e19L,
      1e20L, 1e21L, 1e22L, 1e23L, 1e24L, 1e25L, 1e26L, 1e27L, 1e28L, 1e29L,
      1e30L, 1e31L, 1e32L, 1e33L, 1e34L, 1e35L, 1e36L, 1e37L, 1e38L, 1e39L,
      1e40L, 1e41L, 1e42L, 1e43L, 1e44L, 1e45L, 1e46L, 1e47L, 1e48L};
  static constexpr int32_t EXACT_POWERS_OF_TEN = 48;
  static constexpr int32_t DIGITS_IN_MANTISSA = 33;
  static constexpr long double MAX_EXACT_INT =
      10384593717069655257060992658440191.0L;
};
#else
#error "Unknown long double type"
#endif

// Take an exact mantissa and exponent and attempt to convert it using only
// exact floating point arithmetic. This only handles numbers with low
// exponents, but handles them quickly. This is an implementation of Clinger's
// Fast Path, as described above.
template <class T>
LIBC_INLINE cpp::optional<ExpandedFloat<T>>
clinger_fast_path(ExpandedFloat<T> init_num,
                  RoundDirection round = RoundDirection::Nearest) { … }

// The upper bound is the highest base-10 exponent that could possibly give a
// non-inf result for this size of float. The value is
// log10(2^(exponent bias)).
// The generic approximation uses the fact that log10(2^x) ~= x/3
template <typename T> LIBC_INLINE constexpr int32_t get_upper_bound() { … }

template <> LIBC_INLINE constexpr int32_t get_upper_bound<float>() { … }

template <> LIBC_INLINE constexpr int32_t get_upper_bound<double>() { … }

// The lower bound is the largest negative base-10 exponent that could possibly
// give a non-zero result for this size of float. The value is
// log10(2^(exponent bias + final mantissa width + intermediate mantissa width))
// The intermediate mantissa is the integer that's been parsed from the string,
// and the final mantissa is the fractional part of the output number. A very
// low base 10 exponent with a very high intermediate mantissa can cancel each
// other out, and subnormal numbers allow for the result to be at the very low
// end of the final mantissa.
template <typename T> LIBC_INLINE constexpr int32_t get_lower_bound() { … }

template <> LIBC_INLINE constexpr int32_t get_lower_bound<float>() { … }

template <> LIBC_INLINE constexpr int32_t get_lower_bound<double>() { … }

// Takes a mantissa and base 10 exponent and converts it into its closest
// floating point type T equivalient. First we try the Eisel-Lemire algorithm,
// then if that fails then we fall back to a more accurate algorithm for
// accuracy. The resulting mantissa and exponent are placed in outputMantissa
// and outputExp2.
template <class T>
LIBC_INLINE FloatConvertReturn<T> decimal_exp_to_float(
    ExpandedFloat<T> init_num, bool truncated, RoundDirection round,
    const char *__restrict numStart,
    const size_t num_len = cpp::numeric_limits<size_t>::max()) { … }

// Takes a mantissa and base 2 exponent and converts it into its closest
// floating point type T equivalient. Since the exponent is already in the right
// form, this is mostly just shifting and rounding. This is used for hexadecimal
// numbers since a base 16 exponent multiplied by 4 is the base 2 exponent.
template <class T>
LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num,
                                                      bool truncated,
                                                      RoundDirection round) { … }

// checks if the next 4 characters of the string pointer are the start of a
// hexadecimal floating point number. Does not advance the string pointer.
LIBC_INLINE bool is_float_hex_start(const char *__restrict src,
                                    const char decimalPoint) { … }

// Takes the start of a string representing a decimal float, as well as the
// local decimalPoint. It returns if it suceeded in parsing any digits, and if
// the return value is true then the outputs are pointer to the end of the
// number, and the mantissa and exponent for the closest float T representation.
// If the return value is false, then it is assumed that there is no number
// here.
template <class T>
LIBC_INLINE StrToNumResult<ExpandedFloat<T>>
decimal_string_to_float(const char *__restrict src, const char DECIMAL_POINT,
                        RoundDirection round) { … }

// Takes the start of a string representing a hexadecimal float, as well as the
// local decimal point. It returns if it suceeded in parsing any digits, and if
// the return value is true then the outputs are pointer to the end of the
// number, and the mantissa and exponent for the closest float T representation.
// If the return value is false, then it is assumed that there is no number
// here.
template <class T>
LIBC_INLINE StrToNumResult<ExpandedFloat<T>>
hexadecimal_string_to_float(const char *__restrict src,
                            const char DECIMAL_POINT, RoundDirection round) { … }

template <class T>
LIBC_INLINE typename fputil::FPBits<T>::StorageType
nan_mantissa_from_ncharseq(const cpp::string_view ncharseq) { … }

// Takes a pointer to a string and a pointer to a string pointer. This function
// is used as the backend for all of the string to float functions.
// TODO: Add src_len member to match strtointeger.
// TODO: Next, move from char* and length to string_view
template <class T>
LIBC_INLINE StrToNumResult<T> strtofloatingpoint(const char *__restrict src) { … }

template <class T> LIBC_INLINE StrToNumResult<T> strtonan(const char *arg) { … }

} // namespace internal
} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC___SUPPORT_STR_TO_FLOAT_H
llvm/libc/src/__support/str_to_float.h