// Copyright 2013 The Chromium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifdef UNSAFE_BUFFERS_BUILD // TODO(crbug.com/40285824): Remove this and convert code to safer constructs. #pragma allow_unsafe_buffers #endif // This is a copy of url/url_canon.h circa 2023. It should be used only by // components/feedback/redaction_tool/. // We need a copy because the components/feedback/redaction_tool source code is // shared into ChromeOS and needs to have no dependencies outside of base/. #ifndef COMPONENTS_FEEDBACK_REDACTION_TOOL_URL_CANON_H_ #define COMPONENTS_FEEDBACK_REDACTION_TOOL_URL_CANON_H_ #include <stdlib.h> #include <string.h> #include "base/memory/raw_ptr.h" #include "base/memory/raw_ptr_exclusion.h" #include "base/numerics/clamped_math.h" #include "components/feedback/redaction_tool/url_parse.h" namespace redaction_internal { // Canonicalizer output ------------------------------------------------------- // Base class for the canonicalizer output, this maintains a buffer and // supports simple resizing and append operations on it. // // It is VERY IMPORTANT that no virtual function calls be made on the common // code path. We only have two virtual function calls, the destructor and a // resize function that is called when the existing buffer is not big enough. // The derived class is then in charge of setting up our buffer which we will // manage. template <typename T> class CanonOutputT { … }; // Simple implementation of the CanonOutput using new[]. This class // also supports a static buffer so if it is allocated on the stack, most // URLs can be canonicalized with no heap allocations. template <typename T, int fixed_capacity = 1024> class RawCanonOutputT : public CanonOutputT<T> { … }; // Normally, all canonicalization output is in narrow characters. We support // the templates so it can also be used internally if a wide buffer is // required. CanonOutput; CanonOutputW; template <int fixed_capacity> class RawCanonOutput : public RawCanonOutputT<char, fixed_capacity> { … }; template <int fixed_capacity> class RawCanonOutputW : public RawCanonOutputT<char16_t, fixed_capacity> { … }; // Character set converter ---------------------------------------------------- // // Converts query strings into a custom encoding. The embedder can supply an // implementation of this class to interface with their own character set // conversion libraries. // // Embedders will want to see the unit test for the ICU version. class CharsetConverter { … }; // Schemes -------------------------------------------------------------------- // Types of a scheme representing the requirements on the data represented by // the authority component of a URL with the scheme. enum SchemeType { … }; // This structure holds detailed state exported from the IP/Host canonicalizers. // Additional fields may be added as callers require them. struct CanonHostInfo { … }; // Part replacer -------------------------------------------------------------- // Internal structure used for storing separate strings for each component. // The basic canonicalization functions use this structure internally so that // component replacement (different strings for different components) can be // treated on the same code path as regular canonicalization (the same string // for each component). // // A Parsed structure usually goes along with this. Those components identify // offsets within these strings, so that they can all be in the same string, // or spread arbitrarily across different ones. // // This structures does not own any data. It is the caller's responsibility to // ensure that the data the pointers point to stays in scope and is not // modified. template <typename CHAR> struct URLComponentSource { … }; // This structure encapsulates information on modifying a URL. Each component // may either be left unchanged, replaced, or deleted. // // By default, each component is unchanged. For those components that should be // modified, call either Set* or Clear* to modify it. // // The string passed to Set* functions DOES NOT GET COPIED AND MUST BE KEPT // IN SCOPE BY THE CALLER for as long as this object exists! // // Prefer the 8-bit replacement version if possible since it is more efficient. template <typename CHAR> class Replacements { … }; } // namespace redaction_internal #endif // COMPONENTS_FEEDBACK_REDACTION_TOOL_URL_CANON_H_