charconv_parse.cc | Explore in Territory

// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "absl/strings/internal/charconv_parse.h"
#include "absl/strings/charconv.h"

#include <cassert>
#include <cstdint>
#include <limits>

#include "absl/strings/internal/memutil.h"

namespace absl {
ABSL_NAMESPACE_BEGIN
namespace {

// ParseFloat<10> will read the first 19 significant digits of the mantissa.
// This number was chosen for multiple reasons.
//
// (a) First, for whatever integer type we choose to represent the mantissa, we
// want to choose the largest possible number of decimal digits for that integer
// type.  We are using uint64_t, which can express any 19-digit unsigned
// integer.
//
// (b) Second, we need to parse enough digits that the binary value of any
// mantissa we capture has more bits of resolution than the mantissa
// representation in the target float.  Our algorithm requires at least 3 bits
// of headway, but 19 decimal digits give a little more than that.
//
// The following static assertions verify the above comments:
constexpr int kDecimalMantissaDigitsMax = …;

static_assert …;

// IEEE doubles, which we assume in Abseil, have 53 binary bits of mantissa.
static_assert …;
static_assert …;
static_assert …;

// The lowest valued 19-digit decimal mantissa we can read still contains
// sufficient information to reconstruct a binary mantissa.
static_assert …;

// ParseFloat<16> will read the first 15 significant digits of the mantissa.
//
// Because a base-16-to-base-2 conversion can be done exactly, we do not need
// to maximize the number of scanned hex digits to improve our conversion.  What
// is required is to scan two more bits than the mantissa can represent, so that
// we always round correctly.
//
// (One extra bit does not suffice to perform correct rounding, since a number
// exactly halfway between two representable floats has unique rounding rules,
// so we need to differentiate between a "halfway between" number and a "closer
// to the larger value" number.)
constexpr int kHexadecimalMantissaDigitsMax = …;

// The minimum number of significant bits that will be read from
// kHexadecimalMantissaDigitsMax hex digits.  We must subtract by three, since
// the most significant digit can be a "1", which only contributes a single
// significant bit.
constexpr int kGuaranteedHexadecimalMantissaBitPrecision = …;

static_assert …;

// We also impose a limit on the number of significant digits we will read from
// an exponent, to avoid having to deal with integer overflow.  We use 9 for
// this purpose.
//
// If we read a 9 digit exponent, the end result of the conversion will
// necessarily be infinity or zero, depending on the sign of the exponent.
// Therefore we can just drop extra digits on the floor without any extra
// logic.
constexpr int kDecimalExponentDigitsMax = …;
static_assert …;

// To avoid incredibly large inputs causing integer overflow for our exponent,
// we impose an arbitrary but very large limit on the number of significant
// digits we will accept.  The implementation refuses to match a string with
// more consecutive significant mantissa digits than this.
constexpr int kDecimalDigitLimit = …;

// Corresponding limit for hexadecimal digit inputs.  This is one fourth the
// amount of kDecimalDigitLimit, since each dropped hexadecimal digit requires
// a binary exponent adjustment of 4.
constexpr int kHexadecimalDigitLimit = …;

// The largest exponent we can read is 999999999 (per
// kDecimalExponentDigitsMax), and the largest exponent adjustment we can get
// from dropped mantissa digits is 2 * kDecimalDigitLimit, and the sum of these
// comfortably fits in an integer.
//
// We count kDecimalDigitLimit twice because there are independent limits for
// numbers before and after the decimal point.  (In the case where there are no
// significant digits before the decimal point, there are independent limits for
// post-decimal-point leading zeroes and for significant digits.)
static_assert …;
static_assert …;

// Returns true if the provided bitfield allows parsing an exponent value
// (e.g., "1.5e100").
bool AllowExponent(chars_format flags) { … }

// Returns true if the provided bitfield requires an exponent value be present.
bool RequireExponent(chars_format flags) { … }

const int8_t kAsciiToInt[256] = …;

// Returns true if `ch` is a digit in the given base
template <int base>
bool IsDigit(char ch);

// Converts a valid `ch` to its digit value in the given base.
template <int base>
unsigned ToDigit(char ch);

// Returns true if `ch` is the exponent delimiter for the given base.
template <int base>
bool IsExponentCharacter(char ch);

// Returns the maximum number of significant digits we will read for a float
// in the given base.
template <int base>
constexpr int MantissaDigitsMax();

// Returns the largest consecutive run of digits we will accept when parsing a
// number in the given base.
template <int base>
constexpr int DigitLimit();

// Returns the amount the exponent must be adjusted by for each dropped digit.
// (For decimal this is 1, since the digits are in base 10 and the exponent base
// is also 10, but for hexadecimal this is 4, since the digits are base 16 but
// the exponent base is 2.)
template <int base>
constexpr int DigitMagnitude();

template <>
bool IsDigit<10>(char ch) { … }
template <>
bool IsDigit<16>(char ch) { … }

template <>
unsigned ToDigit<10>(char ch) { … }
template <>
unsigned ToDigit<16>(char ch) { … }

template <>
bool IsExponentCharacter<10>(char ch) { … }

template <>
bool IsExponentCharacter<16>(char ch) { … }

template <>
constexpr int MantissaDigitsMax<10>() { … }
template <>
constexpr int MantissaDigitsMax<16>() { … }

template <>
constexpr int DigitLimit<10>() { … }
template <>
constexpr int DigitLimit<16>() { … }

template <>
constexpr int DigitMagnitude<10>() { … }
template <>
constexpr int DigitMagnitude<16>() { … }

// Reads decimal digits from [begin, end) into *out.  Returns the number of
// digits consumed.
//
// After max_digits has been read, keeps consuming characters, but no longer
// adjusts *out.  If a nonzero digit is dropped this way, *dropped_nonzero_digit
// is set; otherwise, it is left unmodified.
//
// If no digits are matched, returns 0 and leaves *out unchanged.
//
// ConsumeDigits does not protect against overflow on *out; max_digits must
// be chosen with respect to type T to avoid the possibility of overflow.
template <int base, typename T>
int ConsumeDigits(const char* begin, const char* end, int max_digits, T* out,
                  bool* dropped_nonzero_digit) { … }

// Returns true if `v` is one of the chars allowed inside parentheses following
// a NaN.
bool IsNanChar(char v) { … }

// Checks the range [begin, end) for a strtod()-formatted infinity or NaN.  If
// one is found, sets `out` appropriately and returns true.
bool ParseInfinityOrNan(const char* begin, const char* end,
                        strings_internal::ParsedFloat* out) { … }
}  // namespace

namespace strings_internal {

template <int base>
strings_internal::ParsedFloat ParseFloat(const char* begin, const char* end,
                                         chars_format format_flags) { … }

template ParsedFloat ParseFloat<10>(const char* begin, const char* end,
                                    chars_format format_flags);
template ParsedFloat ParseFloat<16>(const char* begin, const char* end,
                                    chars_format format_flags);

}  // namespace strings_internal
ABSL_NAMESPACE_END
}  // namespace absl
chromium/third_party/abseil-cpp/absl/strings/internal/charconv_parse.cc