// Copyright 2013 The Chromium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifdef UNSAFE_BUFFERS_BUILD // TODO(crbug.com/350788890): Remove this and spanify to fix the errors. #pragma allow_unsafe_buffers #endif #include <limits.h> #include <optional> #include "base/check.h" #include "base/check_op.h" #include "url/url_canon.h" #include "url/url_canon_internal.h" #include "url/url_features.h" #include "url/url_parse_internal.h" namespace url { namespace { enum CharacterFlags { … }; // This table contains one of the above flag values. Note some flags are more // than one bits because they also turn on the "special" flag. Special is the // only flag that may be combined with others. // // This table was used to be designed to match exactly what IE did with the // characters, however, which doesn't comply with the URL Standard as of Dec // 2023. See https://crbug.com/1509295. // // Dot is even more special, and the escaped version is handled specially by // IsDot. Therefore, we don't need the "escape" flag. We just need the "special" // bit. // // clang-format off const unsigned char kPathCharLookup[0x100] = …; // clang-format on enum DotDisposition { … }; // When the path resolver finds a dot, this function is called with the // character following that dot to see what it is. The return value // indicates what type this dot is (see above). This code handles the case // where the dot is at the end of the input. // // |*consumed_len| will contain the number of characters in the input that // express what we found. // // If the input is "../foo", |after_dot| = 1, |end| = 6, and // at the end, |*consumed_len| = 2 for the "./" this function consumed. The // original dot length should be handled by the caller. template <typename CHAR> DotDisposition ClassifyAfterDot(const CHAR* spec, size_t after_dot, size_t end, size_t* consumed_len) { … } // Rewinds the output to the previous slash. It is assumed that the output // ends with a slash and this doesn't count (we call this when we are // appending directory paths, so the previous path component has and ending // slash). // // This will stop at the first slash (assumed to be at position // |path_begin_in_output| and not go any higher than that. Some web pages // do ".." too many times, so we need to handle that brokenness. // // It searches for a literal slash rather than including a backslash as well // because it is run only on the canonical output. // // The output is guaranteed to end in a slash when this function completes. void BackUpToPreviousSlash(size_t path_begin_in_output, CanonOutput* output) { … } // Canonicalizes and appends the given path to the output. It assumes that if // the input path starts with a slash, it should be copied to the output. // // If there are already path components (this mode is used when appending // relative paths for resolving), it assumes that the output already has // a trailing slash and that if the input begins with a slash, it should be // copied to the output. // // We do not collapse multiple slashes in a row to a single slash. It seems // no web browsers do this, and we don't want incompatibilities, even though // it would be correct for most systems. template <typename CHAR, typename UCHAR> bool DoPartialPathInternal(const CHAR* spec, const Component& path, size_t path_begin_in_output, CanonMode canon_mode, CanonOutput* output) { … } // Perform the same logic as in DoPartialPathInternal(), but updates the // publicly exposed CanonOutput structure similar to DoPath(). Returns // true if successful. template <typename CHAR, typename UCHAR> bool DoPartialPath(const CHAR* spec, const Component& path, CanonOutput* output, Component* out_path) { … } template <typename CHAR, typename UCHAR> bool DoPath(const CHAR* spec, const Component& path, CanonMode canon_mode, CanonOutput* output, Component* out_path) { … } } // namespace bool CanonicalizePath(const char* spec, const Component& path, CanonMode canon_mode, CanonOutput* output, Component* out_path) { … } bool CanonicalizePath(const char16_t* spec, const Component& path, CanonMode canon_mode, CanonOutput* output, Component* out_path) { … } bool CanonicalizePath(const char* spec, const Component& path, CanonOutput* output, Component* out_path) { … } bool CanonicalizePath(const char16_t* spec, const Component& path, CanonOutput* output, Component* out_path) { … } bool CanonicalizePartialPath(const char* spec, const Component& path, CanonOutput* output, Component* out_path) { … } bool CanonicalizePartialPath(const char16_t* spec, const Component& path, CanonOutput* output, Component* out_path) { … } bool CanonicalizePartialPathInternal(const char* spec, const Component& path, size_t path_begin_in_output, CanonMode canon_mode, CanonOutput* output) { … } bool CanonicalizePartialPathInternal(const char16_t* spec, const Component& path, size_t path_begin_in_output, CanonMode canon_mode, CanonOutput* output) { … } } // namespace url