chromium/url/url_canon_host.cc

// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifdef UNSAFE_BUFFERS_BUILD
// TODO(crbug.com/350788890): Remove this and spanify to fix the errors.
#pragma allow_unsafe_buffers
#endif

#include "base/check.h"
#include "base/cpu_reduction_experiment.h"
#include "url/url_canon.h"
#include "url/url_canon_internal.h"
#include "url/url_features.h"

namespace url {

namespace {

// This table lists the canonical version of all characters we allow in the
// input, with 0 indicating it is disallowed. We use the magic kEsc value to
// indicate that this character should be escaped. At present, ' ' (SPACE) and
// '*' (asterisk) are still non-compliant to the URL Standard. See
// https://crbug.com/1416013 for details.
const unsigned char kEsc =;
// clang-format off
const unsigned char kHostCharLookup[0x80] =;
// clang-format on

// https://url.spec.whatwg.org/#forbidden-host-code-point
const uint8_t kForbiddenHost =;

// TODO(crbug.com/40063064): Merge other lookup tables into this table. That can
// be probably done after https://crbug.com/1416013 is resolved.
//
// This table is currently only used for an opaque-host in non-special URLs.
const uint8_t kHostCharacterTable[128] =;
// clang-format on

bool IsForbiddenHostCodePoint(uint8_t ch) {}

// RFC1034 maximum FQDN length.
constexpr size_t kMaxHostLength =;

// Generous padding to account for the fact that UTS#46 normalization can cause
// a long string to actually shrink and fit within the 253 character RFC1034
// FQDN length limit. Note that this can still be too short for pathological
// cases: An arbitrary number of characters (e.g. U+00AD SOFT HYPHEN) can be
// removed from the input by UTS#46 processing. However, this should be
// sufficient for all normally-encountered, non-abusive hostname strings.
constexpr size_t kMaxHostBufferLength =;

constexpr size_t kTempHostBufferLen =;
StackBuffer;
StackBufferW;

// Scans a host name and fills in the output flags according to what we find.
// |has_non_ascii| will be true if there are any non-7-bit characters, and
// |has_escaped| will be true if there is a percent sign.
template<typename CHAR, typename UCHAR>
void ScanHostname(const CHAR* spec,
                  const Component& host,
                  bool* has_non_ascii,
                  bool* has_escaped) {}

// Canonicalizes a host name that is entirely 8-bit characters (even though
// the type holding them may be 16 bits. Escaped characters will be unescaped.
// Non-7-bit characters (for example, UTF-8) will be passed unchanged.
//
// The |*has_non_ascii| flag will be true if there are non-7-bit characters in
// the output.
//
// This function is used in two situations:
//
//  * When the caller knows there is no non-ASCII or percent escaped
//    characters. This is what DoHost does. The result will be a completely
//    canonicalized host since we know nothing weird can happen (escaped
//    characters could be unescaped to non-7-bit, so they have to be treated
//    with suspicion at this point). It does not use the |has_non_ascii| flag.
//
//  * When the caller has an 8-bit string that may need unescaping.
//    DoComplexHost calls us this situation to do unescaping and validation.
//    After this, it may do other IDN operations depending on the value of the
//    |*has_non_ascii| flag.
//
// The return value indicates if the output is a potentially valid host name.
template <typename INCHAR, typename OUTCHAR>
bool DoSimpleHost(const INCHAR* host,
                  size_t host_len,
                  CanonOutputT<OUTCHAR>* output,
                  bool* has_non_ascii) {}

// Canonicalizes a host that requires IDN conversion. Returns true on success
bool DoIDNHost(const char16_t* src, size_t src_len, CanonOutput* output) {}

// 8-bit convert host to its ASCII version: this converts the UTF-8 input to
// UTF-16. The has_escaped flag should be set if the input string requires
// unescaping.
bool DoComplexHost(const char* host,
                   size_t host_len,
                   bool has_non_ascii,
                   bool has_escaped,
                   CanonOutput* output) {}

// UTF-16 convert host to its ASCII version. The set up is already ready for
// the backend, so we just pass through. The has_escaped flag should be set if
// the input string requires unescaping.
bool DoComplexHost(const char16_t* host,
                   size_t host_len,
                   bool has_non_ascii,
                   bool has_escaped,
                   CanonOutput* output) {}

template <typename CHAR, typename UCHAR>
bool DoHostSubstring(const CHAR* spec,
                     const Component& host,
                     CanonOutput* output) {}

template <typename CharT>
bool DoOpaqueHost(const std::basic_string_view<CharT> host,
                  CanonOutput& output) {}

template <typename CHAR, typename UCHAR, CanonMode canon_mode>
void DoHost(const CHAR* spec,
            const Component& host,
            CanonOutput& output,
            CanonHostInfo& host_info) {}

}  // namespace

bool CanonicalizeHost(const char* spec,
                      const Component& host,
                      CanonOutput* output,
                      Component* out_host) {}

bool CanonicalizeHost(const char16_t* spec,
                      const Component& host,
                      CanonOutput* output,
                      Component* out_host) {}

bool CanonicalizeSpecialHost(const char* spec,
                             const Component& host,
                             CanonOutput& output,
                             Component& out_host) {}

bool CanonicalizeSpecialHost(const char16_t* spec,
                             const Component& host,
                             CanonOutput& output,
                             Component& out_host) {}

bool CanonicalizeNonSpecialHost(const char* spec,
                                const Component& host,
                                CanonOutput& output,
                                Component& out_host) {}

bool CanonicalizeNonSpecialHost(const char16_t* spec,
                                const Component& host,
                                CanonOutput& output,
                                Component& out_host) {}

void CanonicalizeHostVerbose(const char* spec,
                             const Component& host,
                             CanonOutput* output,
                             CanonHostInfo* host_info) {}

void CanonicalizeHostVerbose(const char16_t* spec,
                             const Component& host,
                             CanonOutput* output,
                             CanonHostInfo* host_info) {}

void CanonicalizeSpecialHostVerbose(const char* spec,
                                    const Component& host,
                                    CanonOutput& output,
                                    CanonHostInfo& host_info) {}

void CanonicalizeSpecialHostVerbose(const char16_t* spec,
                                    const Component& host,
                                    CanonOutput& output,
                                    CanonHostInfo& host_info) {}

bool CanonicalizeHostSubstring(const char* spec,
                               const Component& host,
                               CanonOutput* output) {}

bool CanonicalizeHostSubstring(const char16_t* spec,
                               const Component& host,
                               CanonOutput* output) {}

void CanonicalizeNonSpecialHostVerbose(const char* spec,
                                       const Component& host,
                                       CanonOutput& output,
                                       CanonHostInfo& host_info) {}

void CanonicalizeNonSpecialHostVerbose(const char16_t* spec,
                                       const Component& host,
                                       CanonOutput& output,
                                       CanonHostInfo& host_info) {}

}  // namespace url