float_to_string.h | Explore in Territory

//===-- Utilities to convert floating point values to string ----*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC___SUPPORT_FLOAT_TO_STRING_H
#define LLVM_LIBC_SRC___SUPPORT_FLOAT_TO_STRING_H

#include <stdint.h>

#include "src/__support/CPP/limits.h"
#include "src/__support/CPP/type_traits.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/dyadic_float.h"
#include "src/__support/big_int.h"
#include "src/__support/common.h"
#include "src/__support/libc_assert.h"
#include "src/__support/macros/attributes.h"
#include "src/__support/macros/config.h"
#include "src/__support/sign.h"

// This file has 5 compile-time flags to allow the user to configure the float
// to string behavior. These were used to explore tradeoffs during the design
// phase, and can still be used to gain specific properties. Unless you
// specifically know what you're doing, you should leave all these flags off.

// LIBC_COPT_FLOAT_TO_STR_NO_SPECIALIZE_LD
//  This flag disables the separate long double conversion implementation. It is
//  not based on the Ryu algorithm, instead generating the digits by
//  multiplying/dividing the written-out number by 10^9 to get blocks. It's
//  significantly faster than INT_CALC, only about 10x slower than MEGA_TABLE,
//  and is small in binary size. Its downside is that it always calculates all
//  of the digits above the decimal point, making it inefficient for %e calls
//  with large exponents. This specialization overrides other flags, so this
//  flag must be set for other flags to effect the long double behavior.

// LIBC_COPT_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE
//  The Mega Table is ~5 megabytes when compiled. It lists the constants needed
//  to perform the Ryu Printf algorithm (described below) for all long double
//  values. This makes it extremely fast for both doubles and long doubles, in
//  exchange for large binary size.

// LIBC_COPT_FLOAT_TO_STR_USE_DYADIC_FLOAT
//  Dyadic floats are software floating point numbers, and their accuracy can be
//  as high as necessary. This option uses 256 bit dyadic floats to calculate
//  the table values that Ryu Printf needs. This is reasonably fast and very
//  small compared to the Mega Table, but the 256 bit floats only give accurate
//  results for the first ~50 digits of the output. In practice this shouldn't
//  be a problem since long doubles are only accurate for ~35 digits, but the
//  trailing values all being 0s may cause brittle tests to fail.

// LIBC_COPT_FLOAT_TO_STR_USE_INT_CALC
//  Integer Calculation uses wide integers to do the calculations for the Ryu
//  Printf table, which is just as accurate as the Mega Table without requiring
//  as much code size. These integers can be very large (~32KB at max, though
//  always on the stack) to handle the edges of the long double range. They are
//  also very slow, taking multiple seconds on a powerful CPU to calculate the
//  values at the end of the range. If no flag is set, this is used for long
//  doubles, the flag only changes the double behavior.

// LIBC_COPT_FLOAT_TO_STR_NO_TABLE
//  This flag doesn't change the actual calculation method, instead it is used
//  to disable the normal Ryu Printf table for configurations that don't use any
//  table at all.

// Default Config:
//  If no flags are set, doubles use the normal (and much more reasonably sized)
//  Ryu Printf table and long doubles use their specialized implementation. This
//  provides good performance and binary size.

#ifdef LIBC_COPT_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE
#include "src/__support/ryu_long_double_constants.h"
#elif !defined(LIBC_COPT_FLOAT_TO_STR_NO_TABLE)
#include "src/__support/ryu_constants.h"
#else
constexpr size_t IDX_SIZE = 1;
constexpr size_t MID_INT_SIZE = 192;
#endif

// This implementation is based on the Ryu Printf algorithm by Ulf Adams:
// Ulf Adams. 2019. Ryū revisited: printf floating point conversion.
// Proc. ACM Program. Lang. 3, OOPSLA, Article 169 (October 2019), 23 pages.
// https://doi.org/10.1145/3360595

// This version is modified to require significantly less memory (it doesn't use
// a large buffer to store the result).

// The general concept of this algorithm is as follows:
// We want to calculate a 9 digit segment of a floating point number using this
// formula: floor((mantissa * 2^exponent)/10^i) % 10^9.
// To do so normally would involve large integers (~1000 bits for doubles), so
// we use a shortcut. We can avoid calculating 2^exponent / 10^i by using a
// lookup table. The resulting intermediate value needs to be about 192 bits to
// store the result with enough precision. Since this is all being done with
// integers for appropriate precision, we would run into a problem if
// i > exponent since then 2^exponent / 10^i would be less than 1. To correct
// for this, the actual calculation done is 2^(exponent + c) / 10^i, and then
// when multiplying by the mantissa we reverse this by dividing by 2^c, like so:
// floor((mantissa * table[exponent][i])/(2^c)) % 10^9.
// This gives a 9 digit value, which is small enough to fit in a 32 bit integer,
// and that integer is converted into a string as normal, and called a block. In
// this implementation, the most recent block is buffered, so that if rounding
// is necessary the block can be adjusted before being written to the output.
// Any block that is all 9s adds one to the max block counter and doesn't clear
// the buffer because they can cause the block above them to be rounded up.

namespace LIBC_NAMESPACE_DECL {

BlockInt;
constexpr uint32_t BLOCK_SIZE = …;
constexpr uint64_t EXP5_9 = …;
constexpr uint64_t EXP10_9 = …;

FPBits;

// Larger numbers prefer a slightly larger constant than is used for the smaller
// numbers.
constexpr size_t CALC_SHIFT_CONST = …;

namespace internal {

// Returns floor(log_10(2^e)); requires 0 <= e <= 42039.
LIBC_INLINE constexpr uint32_t log10_pow2(uint64_t e) { … }

// Same as above, but with different constants.
LIBC_INLINE constexpr uint32_t log2_pow5(uint64_t e) { … }

// Returns 1 + floor(log_10(2^e). This could technically be off by 1 if any
// power of 2 was also a power of 10, but since that doesn't exist this is
// always accurate. This is used to calculate the maximum number of base-10
// digits a given e-bit number could have.
LIBC_INLINE constexpr uint32_t ceil_log10_pow2(uint32_t e) { … }

LIBC_INLINE constexpr uint32_t div_ceil(uint32_t num, uint32_t denom) { … }

// Returns the maximum number of 9 digit blocks a number described by the given
// index (which is ceil(exponent/16)) and mantissa width could need.
LIBC_INLINE constexpr uint32_t length_for_num(uint32_t idx,
                                              uint32_t mantissa_width) { … }

// The formula for the table when i is positive (or zero) is as follows:
// floor(10^(-9i) * 2^(e + c_1) + 1) % (10^9 * 2^c_1)
// Rewritten slightly we get:
// floor(5^(-9i) * 2^(e + c_1 - 9i) + 1) % (10^9 * 2^c_1)

// TODO: Fix long doubles (needs bigger table or alternate algorithm.)
// Currently the table values are generated, which is very slow.
template <size_t INT_SIZE>
LIBC_INLINE constexpr UInt<MID_INT_SIZE> get_table_positive(int exponent,
                                                            size_t i) { … }

template <size_t INT_SIZE>
LIBC_INLINE UInt<MID_INT_SIZE> get_table_positive_df(int exponent, size_t i) { … }

// The formula for the table when i is negative (or zero) is as follows:
// floor(10^(-9i) * 2^(c_0 - e)) % (10^9 * 2^c_0)
// Since we know i is always negative, we just take it as unsigned and treat it
// as negative. We do the same with exponent, while they're both always negative
// in theory, in practice they're converted to positive for simpler
// calculations.
// The formula being used looks more like this:
// floor(10^(9*(-i)) * 2^(c_0 + (-e))) % (10^9 * 2^c_0)
template <size_t INT_SIZE>
LIBC_INLINE UInt<MID_INT_SIZE> get_table_negative(int exponent, size_t i) { … }

template <size_t INT_SIZE>
LIBC_INLINE UInt<MID_INT_SIZE> get_table_negative_df(int exponent, size_t i) { … }

LIBC_INLINE uint32_t fast_uint_mod_1e9(const UInt<MID_INT_SIZE> &val) { … }

LIBC_INLINE uint32_t mul_shift_mod_1e9(const FPBits::StorageType mantissa,
                                       const UInt<MID_INT_SIZE> &large,
                                       const int32_t shift_amount) { … }

} // namespace internal

// Convert floating point values to their string representation.
// Because the result may not fit in a reasonably sized array, the caller must
// request blocks of digits and convert them from integers to strings themself.
// Blocks contain the most digits that can be stored in an BlockInt. This is 9
// digits for a 32 bit int and 18 digits for a 64 bit int.
// The intended use pattern is to create a FloatToString object of the
// appropriate type, then call get_positive_blocks to get an approximate number
// of blocks there are before the decimal point. Now the client code can start
// calling get_positive_block in a loop from the number of positive blocks to
// zero. This will give all digits before the decimal point. Then the user can
// start calling get_negative_block in a loop from 0 until the number of digits
// they need is reached. As an optimization, the client can use
// zero_blocks_after_point to find the number of blocks that are guaranteed to
// be zero after the decimal point and before the non-zero digits. Additionally,
// is_lowest_block will return if the current block is the lowest non-zero
// block.
template <typename T, cpp::enable_if_t<cpp::is_floating_point_v<T>, int> = 0>
class FloatToString { … };

#if !defined(LIBC_TYPES_LONG_DOUBLE_IS_FLOAT64) &&                             \
    !defined(LIBC_COPT_FLOAT_TO_STR_NO_SPECIALIZE_LD)
// --------------------------- LONG DOUBLE FUNCTIONS ---------------------------

// this algorithm will work exactly the same for 80 bit and 128 bit long
// doubles. They have the same max exponent, but even if they didn't the
// constants should be calculated to be correct for any provided floating point
// type.

template <> class FloatToString<long double> { … };

#endif // !LIBC_TYPES_LONG_DOUBLE_IS_FLOAT64 &&
       // !LIBC_COPT_FLOAT_TO_STR_NO_SPECIALIZE_LD

} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC___SUPPORT_FLOAT_TO_STRING_H
llvm/libc/src/__support/float_to_string.h