// Copyright 2018 The Abseil Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // https://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "absl/strings/internal/charconv_parse.h" #include "absl/strings/charconv.h" #include <cassert> #include <cstdint> #include <limits> #include "absl/strings/internal/memutil.h" namespace absl { ABSL_NAMESPACE_BEGIN namespace { // ParseFloat<10> will read the first 19 significant digits of the mantissa. // This number was chosen for multiple reasons. // // (a) First, for whatever integer type we choose to represent the mantissa, we // want to choose the largest possible number of decimal digits for that integer // type. We are using uint64_t, which can express any 19-digit unsigned // integer. // // (b) Second, we need to parse enough digits that the binary value of any // mantissa we capture has more bits of resolution than the mantissa // representation in the target float. Our algorithm requires at least 3 bits // of headway, but 19 decimal digits give a little more than that. // // The following static assertions verify the above comments: constexpr int kDecimalMantissaDigitsMax = …; static_assert …; // IEEE doubles, which we assume in Abseil, have 53 binary bits of mantissa. static_assert …; static_assert …; static_assert …; // The lowest valued 19-digit decimal mantissa we can read still contains // sufficient information to reconstruct a binary mantissa. static_assert …; // ParseFloat<16> will read the first 15 significant digits of the mantissa. // // Because a base-16-to-base-2 conversion can be done exactly, we do not need // to maximize the number of scanned hex digits to improve our conversion. What // is required is to scan two more bits than the mantissa can represent, so that // we always round correctly. // // (One extra bit does not suffice to perform correct rounding, since a number // exactly halfway between two representable floats has unique rounding rules, // so we need to differentiate between a "halfway between" number and a "closer // to the larger value" number.) constexpr int kHexadecimalMantissaDigitsMax = …; // The minimum number of significant bits that will be read from // kHexadecimalMantissaDigitsMax hex digits. We must subtract by three, since // the most significant digit can be a "1", which only contributes a single // significant bit. constexpr int kGuaranteedHexadecimalMantissaBitPrecision = …; static_assert …; // We also impose a limit on the number of significant digits we will read from // an exponent, to avoid having to deal with integer overflow. We use 9 for // this purpose. // // If we read a 9 digit exponent, the end result of the conversion will // necessarily be infinity or zero, depending on the sign of the exponent. // Therefore we can just drop extra digits on the floor without any extra // logic. constexpr int kDecimalExponentDigitsMax = …; static_assert …; // To avoid incredibly large inputs causing integer overflow for our exponent, // we impose an arbitrary but very large limit on the number of significant // digits we will accept. The implementation refuses to match a string with // more consecutive significant mantissa digits than this. constexpr int kDecimalDigitLimit = …; // Corresponding limit for hexadecimal digit inputs. This is one fourth the // amount of kDecimalDigitLimit, since each dropped hexadecimal digit requires // a binary exponent adjustment of 4. constexpr int kHexadecimalDigitLimit = …; // The largest exponent we can read is 999999999 (per // kDecimalExponentDigitsMax), and the largest exponent adjustment we can get // from dropped mantissa digits is 2 * kDecimalDigitLimit, and the sum of these // comfortably fits in an integer. // // We count kDecimalDigitLimit twice because there are independent limits for // numbers before and after the decimal point. (In the case where there are no // significant digits before the decimal point, there are independent limits for // post-decimal-point leading zeroes and for significant digits.) static_assert …; static_assert …; // Returns true if the provided bitfield allows parsing an exponent value // (e.g., "1.5e100"). bool AllowExponent(chars_format flags) { … } // Returns true if the provided bitfield requires an exponent value be present. bool RequireExponent(chars_format flags) { … } const int8_t kAsciiToInt[256] = …; // Returns true if `ch` is a digit in the given base template <int base> bool IsDigit(char ch); // Converts a valid `ch` to its digit value in the given base. template <int base> unsigned ToDigit(char ch); // Returns true if `ch` is the exponent delimiter for the given base. template <int base> bool IsExponentCharacter(char ch); // Returns the maximum number of significant digits we will read for a float // in the given base. template <int base> constexpr int MantissaDigitsMax(); // Returns the largest consecutive run of digits we will accept when parsing a // number in the given base. template <int base> constexpr int DigitLimit(); // Returns the amount the exponent must be adjusted by for each dropped digit. // (For decimal this is 1, since the digits are in base 10 and the exponent base // is also 10, but for hexadecimal this is 4, since the digits are base 16 but // the exponent base is 2.) template <int base> constexpr int DigitMagnitude(); template <> bool IsDigit<10>(char ch) { … } template <> bool IsDigit<16>(char ch) { … } template <> unsigned ToDigit<10>(char ch) { … } template <> unsigned ToDigit<16>(char ch) { … } template <> bool IsExponentCharacter<10>(char ch) { … } template <> bool IsExponentCharacter<16>(char ch) { … } template <> constexpr int MantissaDigitsMax<10>() { … } template <> constexpr int MantissaDigitsMax<16>() { … } template <> constexpr int DigitLimit<10>() { … } template <> constexpr int DigitLimit<16>() { … } template <> constexpr int DigitMagnitude<10>() { … } template <> constexpr int DigitMagnitude<16>() { … } // Reads decimal digits from [begin, end) into *out. Returns the number of // digits consumed. // // After max_digits has been read, keeps consuming characters, but no longer // adjusts *out. If a nonzero digit is dropped this way, *dropped_nonzero_digit // is set; otherwise, it is left unmodified. // // If no digits are matched, returns 0 and leaves *out unchanged. // // ConsumeDigits does not protect against overflow on *out; max_digits must // be chosen with respect to type T to avoid the possibility of overflow. template <int base, typename T> int ConsumeDigits(const char* begin, const char* end, int max_digits, T* out, bool* dropped_nonzero_digit) { … } // Returns true if `v` is one of the chars allowed inside parentheses following // a NaN. bool IsNanChar(char v) { … } // Checks the range [begin, end) for a strtod()-formatted infinity or NaN. If // one is found, sets `out` appropriately and returns true. bool ParseInfinityOrNan(const char* begin, const char* end, strings_internal::ParsedFloat* out) { … } } // namespace namespace strings_internal { template <int base> strings_internal::ParsedFloat ParseFloat(const char* begin, const char* end, chars_format format_flags) { … } template ParsedFloat ParseFloat<10>(const char* begin, const char* end, chars_format format_flags); template ParsedFloat ParseFloat<16>(const char* begin, const char* end, chars_format format_flags); } // namespace strings_internal ABSL_NAMESPACE_END } // namespace absl