// Copyright 2024 The Abseil Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // https://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "absl/debugging/internal/decode_rust_punycode.h" #include <cstddef> #include <cstdint> #include <cstring> #include "absl/base/config.h" #include "absl/base/nullability.h" #include "absl/debugging/internal/bounded_utf8_length_sequence.h" #include "absl/debugging/internal/utf8_for_code_point.h" namespace absl { ABSL_NAMESPACE_BEGIN namespace debugging_internal { namespace { // Decoding Punycode requires repeated random-access insertion into a stream of // variable-length UTF-8 code-point encodings. We need this to be tolerably // fast (no N^2 slowdown for unfortunate inputs), and we can't allocate any data // structures on the heap (async-signal-safety). // // It is pragmatic to impose a moderately low limit on the identifier length and // bail out if we ever hit it. Then BoundedUtf8LengthSequence efficiently // determines where to insert the next code point, and memmove efficiently makes // room for it. // // The chosen limit is a round number several times larger than identifiers // expected in practice, yet still small enough that a memmove of this many // UTF-8 characters is not much more expensive than the division and modulus // operations that Punycode decoding requires. constexpr uint32_t kMaxChars = …; // Constants from RFC 3492 section 5. constexpr uint32_t kBase = …, kTMin = …, kTMax = …, kSkew = …, kDamp = …; constexpr uint32_t kMaxCodePoint = …; // Overflow threshold in DecodeRustPunycode's inner loop; see comments there. constexpr uint32_t kMaxI = …; // If punycode_begin .. punycode_end begins with a prefix matching the regular // expression [0-9a-zA-Z_]+_, removes that prefix, copies all but the final // underscore into out_begin .. out_end, sets num_ascii_chars to the number of // bytes copied, and returns true. (A prefix of this sort represents the // nonempty subsequence of ASCII characters in the corresponding plaintext.) // // If punycode_begin .. punycode_end does not contain an underscore, sets // num_ascii_chars to zero and returns true. (The encoding of a plaintext // without any ASCII characters does not carry such a prefix.) // // Returns false and zeroes num_ascii_chars on failure (either parse error or // not enough space in the output buffer). bool ConsumeOptionalAsciiPrefix(const char*& punycode_begin, const char* const punycode_end, char* const out_begin, char* const out_end, uint32_t& num_ascii_chars) { … } // Returns the value of `c` as a base-36 digit according to RFC 3492 section 5, // or -1 if `c` is not such a digit. int DigitValue(char c) { … } // Consumes the next delta encoding from punycode_begin .. punycode_end, // updating i accordingly. Returns true on success. Returns false on parse // failure or arithmetic overflow. bool ScanNextDelta(const char*& punycode_begin, const char* const punycode_end, uint32_t bias, uint32_t& i) { … } } // namespace absl::Nullable<char*> DecodeRustPunycode(DecodeRustPunycodeOptions options) { … } } // namespace debugging_internal ABSL_NAMESPACE_END } // namespace absl