// Copyright 2018 The Abseil Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // https://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "absl/strings/charconv.h" #include <algorithm> #include <cassert> #include <cstddef> #include <cstdint> #include <limits> #include <system_error> // NOLINT(build/c++11) #include "absl/base/casts.h" #include "absl/base/config.h" #include "absl/base/nullability.h" #include "absl/numeric/bits.h" #include "absl/numeric/int128.h" #include "absl/strings/internal/charconv_bigint.h" #include "absl/strings/internal/charconv_parse.h" // The macro ABSL_BIT_PACK_FLOATS is defined on x86-64, where IEEE floating // point numbers have the same endianness in memory as a bitfield struct // containing the corresponding parts. // // When set, we replace calls to ldexp() with manual bit packing, which is // faster and is unaffected by floating point environment. #ifdef ABSL_BIT_PACK_FLOATS #error ABSL_BIT_PACK_FLOATS cannot be directly set #elif defined(__x86_64__) || defined(_M_X64) #define ABSL_BIT_PACK_FLOATS … #endif // A note about subnormals: // // The code below talks about "normals" and "subnormals". A normal IEEE float // has a fixed-width mantissa and power of two exponent. For example, a normal // `double` has a 53-bit mantissa. Because the high bit is always 1, it is not // stored in the representation. The implicit bit buys an extra bit of // resolution in the datatype. // // The downside of this scheme is that there is a large gap between DBL_MIN and // zero. (Large, at least, relative to the different between DBL_MIN and the // next representable number). This gap is softened by the "subnormal" numbers, // which have the same power-of-two exponent as DBL_MIN, but no implicit 53rd // bit. An all-bits-zero exponent in the encoding represents subnormals. (Zero // is represented as a subnormal with an all-bits-zero mantissa.) // // The code below, in calculations, represents the mantissa as a uint64_t. The // end result normally has the 53rd bit set. It represents subnormals by using // narrower mantissas. namespace absl { ABSL_NAMESPACE_BEGIN namespace { template <typename FloatType> struct FloatTraits; template <> struct FloatTraits<double> { … }; // Specialization of floating point traits for the `float` type. See the // FloatTraits<double> specialization above for meaning of each of the following // members and methods. template <> struct FloatTraits<float> { … }; // Decimal-to-binary conversions require coercing powers of 10 into a mantissa // and a power of 2. The two helper functions Power10Mantissa(n) and // Power10Exponent(n) perform this task. Together, these represent a hand- // rolled floating point value which is equal to or just less than 10**n. // // The return values satisfy two range guarantees: // // Power10Mantissa(n) * 2**Power10Exponent(n) <= 10**n // < (Power10Mantissa(n) + 1) * 2**Power10Exponent(n) // // 2**63 <= Power10Mantissa(n) < 2**64. // // See the "Table of powers of 10" comment below for a "1e60" example. // // Lookups into the power-of-10 table must first check the Power10Overflow() and // Power10Underflow() functions, to avoid out-of-bounds table access. // // Indexes into these tables are biased by -kPower10TableMinInclusive. Valid // indexes range from kPower10TableMinInclusive to kPower10TableMaxExclusive. extern const uint64_t kPower10MantissaHighTable[]; // High 64 of 128 bits. extern const uint64_t kPower10MantissaLowTable[]; // Low 64 of 128 bits. // The smallest (inclusive) allowed value for use with the Power10Mantissa() // and Power10Exponent() functions below. (If a smaller exponent is needed in // calculations, the end result is guaranteed to underflow.) constexpr int kPower10TableMinInclusive = …; // The largest (exclusive) allowed value for use with the Power10Mantissa() and // Power10Exponent() functions below. (If a larger-or-equal exponent is needed // in calculations, the end result is guaranteed to overflow.) constexpr int kPower10TableMaxExclusive = …; uint64_t Power10Mantissa(int n) { … } int Power10Exponent(int n) { … } // Returns true if n is large enough that 10**n always results in an IEEE // overflow. bool Power10Overflow(int n) { … } // Returns true if n is small enough that 10**n times a ParsedFloat mantissa // always results in an IEEE underflow. bool Power10Underflow(int n) { … } // Returns true if Power10Mantissa(n) * 2**Power10Exponent(n) is exactly equal // to 10**n numerically. Put another way, this returns true if there is no // truncation error in Power10Mantissa(n). bool Power10Exact(int n) { … } // Sentinel exponent values for representing numbers too large or too close to // zero to represent in a double. constexpr int kOverflow = …; constexpr int kUnderflow = …; // Struct representing the calculated conversion result of a positive (nonzero) // floating point number. // // The calculated number is mantissa * 2**exponent (mantissa is treated as an // integer.) `mantissa` is chosen to be the correct width for the IEEE float // representation being calculated. (`mantissa` will always have the same bit // width for normal values, and narrower bit widths for subnormals.) // // If the result of conversion was an underflow or overflow, exponent is set // to kUnderflow or kOverflow. struct CalculatedFloat { … }; // Returns the bit width of the given uint128. (Equivalently, returns 128 // minus the number of leading zero bits.) int BitWidth(uint128 value) { … } // Calculates how far to the right a mantissa needs to be shifted to create a // properly adjusted mantissa for an IEEE floating point number. // // `mantissa_width` is the bit width of the mantissa to be shifted, and // `binary_exponent` is the exponent of the number before the shift. // // This accounts for subnormal values, and will return a larger-than-normal // shift if binary_exponent would otherwise be too low. template <typename FloatType> int NormalizedShiftSize(int mantissa_width, int binary_exponent) { … } // Right shifts a uint128 so that it has the requested bit width. (The // resulting value will have 128 - bit_width leading zeroes.) The initial // `value` must be wider than the requested bit width. // // Returns the number of bits shifted. int TruncateToBitWidth(int bit_width, absl::Nonnull<uint128*> value) { … } // Checks if the given ParsedFloat represents one of the edge cases that are // not dependent on number base: zero, infinity, or NaN. If so, sets *value // the appropriate double, and returns true. template <typename FloatType> bool HandleEdgeCase(const strings_internal::ParsedFloat& input, bool negative, absl::Nonnull<FloatType*> value) { … } // Given a CalculatedFloat result of a from_chars conversion, generate the // correct output values. // // CalculatedFloat can represent an underflow or overflow, in which case the // error code in *result is set. Otherwise, the calculated floating point // number is stored in *value. template <typename FloatType> void EncodeResult(const CalculatedFloat& calculated, bool negative, absl::Nonnull<absl::from_chars_result*> result, absl::Nonnull<FloatType*> value) { … } // Returns the given uint128 shifted to the right by `shift` bits, and rounds // the remaining bits using round_to_nearest logic. The value is returned as a // uint64_t, since this is the type used by this library for storing calculated // floating point mantissas. // // It is expected that the width of the input value shifted by `shift` will // be the correct bit-width for the target mantissa, which is strictly narrower // than a uint64_t. // // If `input_exact` is false, then a nonzero error epsilon is assumed. For // rounding purposes, the true value being rounded is strictly greater than the // input value. The error may represent a single lost carry bit. // // When input_exact, shifted bits of the form 1000000... represent a tie, which // is broken by rounding to even -- the rounding direction is chosen so the low // bit of the returned value is 0. // // When !input_exact, shifted bits of the form 10000000... represent a value // strictly greater than one half (due to the error epsilon), and so ties are // always broken by rounding up. // // When !input_exact, shifted bits of the form 01111111... are uncertain; // the true value may or may not be greater than 10000000..., due to the // possible lost carry bit. The correct rounding direction is unknown. In this // case, the result is rounded down, and `output_exact` is set to false. // // Zero and negative values of `shift` are accepted, in which case the word is // shifted left, as necessary. uint64_t ShiftRightAndRound(uint128 value, int shift, bool input_exact, absl::Nonnull<bool*> output_exact) { … } // Checks if a floating point guess needs to be rounded up, using high precision // math. // // `guess_mantissa` and `guess_exponent` represent a candidate guess for the // number represented by `parsed_decimal`. // // The exact number represented by `parsed_decimal` must lie between the two // numbers: // A = `guess_mantissa * 2**guess_exponent` // B = `(guess_mantissa + 1) * 2**guess_exponent` // // This function returns false if `A` is the better guess, and true if `B` is // the better guess, with rounding ties broken by rounding to even. bool MustRoundUp(uint64_t guess_mantissa, int guess_exponent, const strings_internal::ParsedFloat& parsed_decimal) { … } // Constructs a CalculatedFloat from a given mantissa and exponent, but // with the following normalizations applied: // // If rounding has caused mantissa to increase just past the allowed bit // width, shift and adjust exponent. // // If exponent is too high, sets kOverflow. // // If mantissa is zero (representing a non-zero value not representable, even // as a subnormal), sets kUnderflow. template <typename FloatType> CalculatedFloat CalculatedFloatFromRawValues(uint64_t mantissa, int exponent) { … } template <typename FloatType> CalculatedFloat CalculateFromParsedHexadecimal( const strings_internal::ParsedFloat& parsed_hex) { … } template <typename FloatType> CalculatedFloat CalculateFromParsedDecimal( const strings_internal::ParsedFloat& parsed_decimal) { … } // As discussed in https://nigeltao.github.io/blog/2020/eisel-lemire.html the // primary goal of the Eisel-Lemire algorithm is speed, for 99+% of the cases, // not 100% coverage. As long as Eisel-Lemire doesn’t claim false positives, // the combined approach (falling back to an alternative implementation when // this function returns false) is both fast and correct. template <typename FloatType> bool EiselLemire(const strings_internal::ParsedFloat& input, bool negative, absl::Nonnull<FloatType*> value, absl::Nonnull<std::errc*> ec) { … } template <typename FloatType> from_chars_result FromCharsImpl(absl::Nonnull<const char*> first, absl::Nonnull<const char*> last, FloatType& value, chars_format fmt_flags) { … } } // namespace from_chars_result from_chars(absl::Nonnull<const char*> first, absl::Nonnull<const char*> last, double& value, chars_format fmt) { … } from_chars_result from_chars(absl::Nonnull<const char*> first, absl::Nonnull<const char*> last, float& value, chars_format fmt) { … } namespace { // Table of powers of 10, from kPower10TableMinInclusive to // kPower10TableMaxExclusive. // // kPower10MantissaHighTable[i - kPower10TableMinInclusive] stores the 64-bit // mantissa. The high bit is always on. // // kPower10MantissaLowTable extends that 64-bit mantissa to 128 bits. // // Power10Exponent(i) calculates the power-of-two exponent. // // For a number i, this gives the unique mantissaHigh and exponent such that // (mantissaHigh * 2**exponent) <= 10**i < ((mantissaHigh + 1) * 2**exponent). // // For example, Python can confirm that the exact hexadecimal value of 1e60 is: // >>> a = 1000000000000000000000000000000000000000000000000000000000000 // >>> hex(a) // '0x9f4f2726179a224501d762422c946590d91000000000000000' // Adding underscores at every 8th hex digit shows 50 hex digits: // '0x9f4f2726_179a2245_01d76242_2c946590_d9100000_00000000_00'. // In this case, the high bit of the first hex digit, 9, is coincidentally set, // so we do not have to do further shifting to deduce the 128-bit mantissa: // - kPower10MantissaHighTable[60 - kP10TMI] = 0x9f4f2726179a2245U // - kPower10MantissaLowTable[ 60 - kP10TMI] = 0x01d762422c946590U // where kP10TMI is kPower10TableMinInclusive. The low 18 of those 50 hex // digits are truncated. // // 50 hex digits (with the high bit set) is 200 bits and mantissaHigh holds 64 // bits, so Power10Exponent(60) = 200 - 64 = 136. Again, Python can confirm: // >>> b = 0x9f4f2726179a2245 // >>> ((b+0)<<136) <= a // True // >>> ((b+1)<<136) <= a // False // // The tables were generated by // https://github.com/google/wuffs/blob/315b2e52625ebd7b02d8fac13e3cd85ea374fb80/script/print-mpb-powers-of-10.go // after re-formatting its output into two arrays of N uint64_t values (instead // of an N element array of uint64_t pairs). const uint64_t kPower10MantissaHighTable[] = …; const uint64_t kPower10MantissaLowTable[] = …; } // namespace ABSL_NAMESPACE_END } // namespace absl