decode_rust_punycode.cc | Explore in Territory

// Copyright 2024 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "absl/debugging/internal/decode_rust_punycode.h"

#include <cstddef>
#include <cstdint>
#include <cstring>

#include "absl/base/config.h"
#include "absl/base/nullability.h"
#include "absl/debugging/internal/bounded_utf8_length_sequence.h"
#include "absl/debugging/internal/utf8_for_code_point.h"

namespace absl {
ABSL_NAMESPACE_BEGIN
namespace debugging_internal {

namespace {

// Decoding Punycode requires repeated random-access insertion into a stream of
// variable-length UTF-8 code-point encodings.  We need this to be tolerably
// fast (no N^2 slowdown for unfortunate inputs), and we can't allocate any data
// structures on the heap (async-signal-safety).
//
// It is pragmatic to impose a moderately low limit on the identifier length and
// bail out if we ever hit it.  Then BoundedUtf8LengthSequence efficiently
// determines where to insert the next code point, and memmove efficiently makes
// room for it.
//
// The chosen limit is a round number several times larger than identifiers
// expected in practice, yet still small enough that a memmove of this many
// UTF-8 characters is not much more expensive than the division and modulus
// operations that Punycode decoding requires.
constexpr uint32_t kMaxChars = …;

// Constants from RFC 3492 section 5.
constexpr uint32_t kBase = …, kTMin = …, kTMax = …, kSkew = …, kDamp = …;

constexpr uint32_t kMaxCodePoint = …;

// Overflow threshold in DecodeRustPunycode's inner loop; see comments there.
constexpr uint32_t kMaxI = …;

// If punycode_begin .. punycode_end begins with a prefix matching the regular
// expression [0-9a-zA-Z_]+_, removes that prefix, copies all but the final
// underscore into out_begin .. out_end, sets num_ascii_chars to the number of
// bytes copied, and returns true.  (A prefix of this sort represents the
// nonempty subsequence of ASCII characters in the corresponding plaintext.)
//
// If punycode_begin .. punycode_end does not contain an underscore, sets
// num_ascii_chars to zero and returns true.  (The encoding of a plaintext
// without any ASCII characters does not carry such a prefix.)
//
// Returns false and zeroes num_ascii_chars on failure (either parse error or
// not enough space in the output buffer).
bool ConsumeOptionalAsciiPrefix(const char*& punycode_begin,
                                const char* const punycode_end,
                                char* const out_begin,
                                char* const out_end,
                                uint32_t& num_ascii_chars) { … }

// Returns the value of `c` as a base-36 digit according to RFC 3492 section 5,
// or -1 if `c` is not such a digit.
int DigitValue(char c) { … }

// Consumes the next delta encoding from punycode_begin .. punycode_end,
// updating i accordingly.  Returns true on success.  Returns false on parse
// failure or arithmetic overflow.
bool ScanNextDelta(const char*& punycode_begin, const char* const punycode_end,
                   uint32_t bias, uint32_t& i) { … }

}  // namespace

absl::Nullable<char*> DecodeRustPunycode(DecodeRustPunycodeOptions options) { … }

}  // namespace debugging_internal
ABSL_NAMESPACE_END
}  // namespace absl
chromium/third_party/abseil-cpp/absl/debugging/internal/decode_rust_punycode.cc