charconv.cc | Explore in Territory

// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "absl/strings/charconv.h"

#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <limits>
#include <system_error>  // NOLINT(build/c++11)

#include "absl/base/casts.h"
#include "absl/base/config.h"
#include "absl/base/nullability.h"
#include "absl/numeric/bits.h"
#include "absl/numeric/int128.h"
#include "absl/strings/internal/charconv_bigint.h"
#include "absl/strings/internal/charconv_parse.h"

// The macro ABSL_BIT_PACK_FLOATS is defined on x86-64, where IEEE floating
// point numbers have the same endianness in memory as a bitfield struct
// containing the corresponding parts.
//
// When set, we replace calls to ldexp() with manual bit packing, which is
// faster and is unaffected by floating point environment.
#ifdef ABSL_BIT_PACK_FLOATS
#error ABSL_BIT_PACK_FLOATS cannot be directly set
#elif defined(__x86_64__) || defined(_M_X64)
#define ABSL_BIT_PACK_FLOATS …
#endif

// A note about subnormals:
//
// The code below talks about "normals" and "subnormals".  A normal IEEE float
// has a fixed-width mantissa and power of two exponent.  For example, a normal
// `double` has a 53-bit mantissa.  Because the high bit is always 1, it is not
// stored in the representation.  The implicit bit buys an extra bit of
// resolution in the datatype.
//
// The downside of this scheme is that there is a large gap between DBL_MIN and
// zero.  (Large, at least, relative to the different between DBL_MIN and the
// next representable number).  This gap is softened by the "subnormal" numbers,
// which have the same power-of-two exponent as DBL_MIN, but no implicit 53rd
// bit.  An all-bits-zero exponent in the encoding represents subnormals.  (Zero
// is represented as a subnormal with an all-bits-zero mantissa.)
//
// The code below, in calculations, represents the mantissa as a uint64_t.  The
// end result normally has the 53rd bit set.  It represents subnormals by using
// narrower mantissas.

namespace absl {
ABSL_NAMESPACE_BEGIN
namespace {

template <typename FloatType>
struct FloatTraits;

template <>
struct FloatTraits<double> { … };

// Specialization of floating point traits for the `float` type.  See the
// FloatTraits<double> specialization above for meaning of each of the following
// members and methods.
template <>
struct FloatTraits<float> { … };

// Decimal-to-binary conversions require coercing powers of 10 into a mantissa
// and a power of 2.  The two helper functions Power10Mantissa(n) and
// Power10Exponent(n) perform this task.  Together, these represent a hand-
// rolled floating point value which is equal to or just less than 10**n.
//
// The return values satisfy two range guarantees:
//
//   Power10Mantissa(n) * 2**Power10Exponent(n) <= 10**n
//     < (Power10Mantissa(n) + 1) * 2**Power10Exponent(n)
//
//   2**63 <= Power10Mantissa(n) < 2**64.
//
// See the "Table of powers of 10" comment below for a "1e60" example.
//
// Lookups into the power-of-10 table must first check the Power10Overflow() and
// Power10Underflow() functions, to avoid out-of-bounds table access.
//
// Indexes into these tables are biased by -kPower10TableMinInclusive. Valid
// indexes range from kPower10TableMinInclusive to kPower10TableMaxExclusive.
extern const uint64_t kPower10MantissaHighTable[];  // High 64 of 128 bits.
extern const uint64_t kPower10MantissaLowTable[];   // Low  64 of 128 bits.

// The smallest (inclusive) allowed value for use with the Power10Mantissa()
// and Power10Exponent() functions below.  (If a smaller exponent is needed in
// calculations, the end result is guaranteed to underflow.)
constexpr int kPower10TableMinInclusive = …;

// The largest (exclusive) allowed value for use with the Power10Mantissa() and
// Power10Exponent() functions below.  (If a larger-or-equal exponent is needed
// in calculations, the end result is guaranteed to overflow.)
constexpr int kPower10TableMaxExclusive = …;

uint64_t Power10Mantissa(int n) { … }

int Power10Exponent(int n) { … }

// Returns true if n is large enough that 10**n always results in an IEEE
// overflow.
bool Power10Overflow(int n) { … }

// Returns true if n is small enough that 10**n times a ParsedFloat mantissa
// always results in an IEEE underflow.
bool Power10Underflow(int n) { … }

// Returns true if Power10Mantissa(n) * 2**Power10Exponent(n) is exactly equal
// to 10**n numerically.  Put another way, this returns true if there is no
// truncation error in Power10Mantissa(n).
bool Power10Exact(int n) { … }

// Sentinel exponent values for representing numbers too large or too close to
// zero to represent in a double.
constexpr int kOverflow = …;
constexpr int kUnderflow = …;

// Struct representing the calculated conversion result of a positive (nonzero)
// floating point number.
//
// The calculated number is mantissa * 2**exponent (mantissa is treated as an
// integer.)  `mantissa` is chosen to be the correct width for the IEEE float
// representation being calculated.  (`mantissa` will always have the same bit
// width for normal values, and narrower bit widths for subnormals.)
//
// If the result of conversion was an underflow or overflow, exponent is set
// to kUnderflow or kOverflow.
struct CalculatedFloat { … };

// Returns the bit width of the given uint128.  (Equivalently, returns 128
// minus the number of leading zero bits.)
int BitWidth(uint128 value) { … }

// Calculates how far to the right a mantissa needs to be shifted to create a
// properly adjusted mantissa for an IEEE floating point number.
//
// `mantissa_width` is the bit width of the mantissa to be shifted, and
// `binary_exponent` is the exponent of the number before the shift.
//
// This accounts for subnormal values, and will return a larger-than-normal
// shift if binary_exponent would otherwise be too low.
template <typename FloatType>
int NormalizedShiftSize(int mantissa_width, int binary_exponent) { … }

// Right shifts a uint128 so that it has the requested bit width.  (The
// resulting value will have 128 - bit_width leading zeroes.)  The initial
// `value` must be wider than the requested bit width.
//
// Returns the number of bits shifted.
int TruncateToBitWidth(int bit_width, absl::Nonnull<uint128*> value) { … }

// Checks if the given ParsedFloat represents one of the edge cases that are
// not dependent on number base: zero, infinity, or NaN.  If so, sets *value
// the appropriate double, and returns true.
template <typename FloatType>
bool HandleEdgeCase(const strings_internal::ParsedFloat& input, bool negative,
                    absl::Nonnull<FloatType*> value) { … }

// Given a CalculatedFloat result of a from_chars conversion, generate the
// correct output values.
//
// CalculatedFloat can represent an underflow or overflow, in which case the
// error code in *result is set.  Otherwise, the calculated floating point
// number is stored in *value.
template <typename FloatType>
void EncodeResult(const CalculatedFloat& calculated, bool negative,
                  absl::Nonnull<absl::from_chars_result*> result,
                  absl::Nonnull<FloatType*> value) { … }

// Returns the given uint128 shifted to the right by `shift` bits, and rounds
// the remaining bits using round_to_nearest logic.  The value is returned as a
// uint64_t, since this is the type used by this library for storing calculated
// floating point mantissas.
//
// It is expected that the width of the input value shifted by `shift` will
// be the correct bit-width for the target mantissa, which is strictly narrower
// than a uint64_t.
//
// If `input_exact` is false, then a nonzero error epsilon is assumed.  For
// rounding purposes, the true value being rounded is strictly greater than the
// input value.  The error may represent a single lost carry bit.
//
// When input_exact, shifted bits of the form 1000000... represent a tie, which
// is broken by rounding to even -- the rounding direction is chosen so the low
// bit of the returned value is 0.
//
// When !input_exact, shifted bits of the form 10000000... represent a value
// strictly greater than one half (due to the error epsilon), and so ties are
// always broken by rounding up.
//
// When !input_exact, shifted bits of the form 01111111... are uncertain;
// the true value may or may not be greater than 10000000..., due to the
// possible lost carry bit.  The correct rounding direction is unknown.  In this
// case, the result is rounded down, and `output_exact` is set to false.
//
// Zero and negative values of `shift` are accepted, in which case the word is
// shifted left, as necessary.
uint64_t ShiftRightAndRound(uint128 value, int shift, bool input_exact,
                            absl::Nonnull<bool*> output_exact) { … }

// Checks if a floating point guess needs to be rounded up, using high precision
// math.
//
// `guess_mantissa` and `guess_exponent` represent a candidate guess for the
// number represented by `parsed_decimal`.
//
// The exact number represented by `parsed_decimal` must lie between the two
// numbers:
//   A = `guess_mantissa * 2**guess_exponent`
//   B = `(guess_mantissa + 1) * 2**guess_exponent`
//
// This function returns false if `A` is the better guess, and true if `B` is
// the better guess, with rounding ties broken by rounding to even.
bool MustRoundUp(uint64_t guess_mantissa, int guess_exponent,
                 const strings_internal::ParsedFloat& parsed_decimal) { … }

// Constructs a CalculatedFloat from a given mantissa and exponent, but
// with the following normalizations applied:
//
// If rounding has caused mantissa to increase just past the allowed bit
// width, shift and adjust exponent.
//
// If exponent is too high, sets kOverflow.
//
// If mantissa is zero (representing a non-zero value not representable, even
// as a subnormal), sets kUnderflow.
template <typename FloatType>
CalculatedFloat CalculatedFloatFromRawValues(uint64_t mantissa, int exponent) { … }

template <typename FloatType>
CalculatedFloat CalculateFromParsedHexadecimal(
    const strings_internal::ParsedFloat& parsed_hex) { … }

template <typename FloatType>
CalculatedFloat CalculateFromParsedDecimal(
    const strings_internal::ParsedFloat& parsed_decimal) { … }

// As discussed in https://nigeltao.github.io/blog/2020/eisel-lemire.html the
// primary goal of the Eisel-Lemire algorithm is speed, for 99+% of the cases,
// not 100% coverage. As long as Eisel-Lemire doesn’t claim false positives,
// the combined approach (falling back to an alternative implementation when
// this function returns false) is both fast and correct.
template <typename FloatType>
bool EiselLemire(const strings_internal::ParsedFloat& input, bool negative,
                 absl::Nonnull<FloatType*> value,
                 absl::Nonnull<std::errc*> ec) { … }

template <typename FloatType>
from_chars_result FromCharsImpl(absl::Nonnull<const char*> first,
                                absl::Nonnull<const char*> last,
                                FloatType& value, chars_format fmt_flags) { … }
}  // namespace

from_chars_result from_chars(absl::Nonnull<const char*> first,
                             absl::Nonnull<const char*> last, double& value,
                             chars_format fmt) { … }

from_chars_result from_chars(absl::Nonnull<const char*> first,
                             absl::Nonnull<const char*> last, float& value,
                             chars_format fmt) { … }

namespace {

// Table of powers of 10, from kPower10TableMinInclusive to
// kPower10TableMaxExclusive.
//
// kPower10MantissaHighTable[i - kPower10TableMinInclusive] stores the 64-bit
// mantissa. The high bit is always on.
//
// kPower10MantissaLowTable extends that 64-bit mantissa to 128 bits.
//
// Power10Exponent(i) calculates the power-of-two exponent.
//
// For a number i, this gives the unique mantissaHigh and exponent such that
// (mantissaHigh * 2**exponent) <= 10**i < ((mantissaHigh + 1) * 2**exponent).
//
// For example, Python can confirm that the exact hexadecimal value of 1e60 is:
//    >>> a = 1000000000000000000000000000000000000000000000000000000000000
//    >>> hex(a)
//    '0x9f4f2726179a224501d762422c946590d91000000000000000'
// Adding underscores at every 8th hex digit shows 50 hex digits:
//    '0x9f4f2726_179a2245_01d76242_2c946590_d9100000_00000000_00'.
// In this case, the high bit of the first hex digit, 9, is coincidentally set,
// so we do not have to do further shifting to deduce the 128-bit mantissa:
//   - kPower10MantissaHighTable[60 - kP10TMI] = 0x9f4f2726179a2245U
//   - kPower10MantissaLowTable[ 60 - kP10TMI] = 0x01d762422c946590U
// where kP10TMI is kPower10TableMinInclusive. The low 18 of those 50 hex
// digits are truncated.
//
// 50 hex digits (with the high bit set) is 200 bits and mantissaHigh holds 64
// bits, so Power10Exponent(60) = 200 - 64 = 136. Again, Python can confirm:
//    >>> b = 0x9f4f2726179a2245
//    >>> ((b+0)<<136) <= a
//    True
//    >>> ((b+1)<<136) <= a
//    False
//
// The tables were generated by
// https://github.com/google/wuffs/blob/315b2e52625ebd7b02d8fac13e3cd85ea374fb80/script/print-mpb-powers-of-10.go
// after re-formatting its output into two arrays of N uint64_t values (instead
// of an N element array of uint64_t pairs).

const uint64_t kPower10MantissaHighTable[] = …;

const uint64_t kPower10MantissaLowTable[] = …;

}  // namespace
ABSL_NAMESPACE_END
}  // namespace absl
chromium/third_party/abseil-cpp/absl/strings/charconv.cc