chromium/components/url_formatter/url_formatter.cc

// Copyright 2015 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifdef UNSAFE_BUFFERS_BUILD
// TODO(crbug.com/40285824): Remove this and convert code to safer constructs.
#pragma allow_unsafe_buffers
#endif

#include "components/url_formatter/url_formatter.h"

#include <algorithm>
#include <ostream>
#include <string_view>
#include <utility>
#include <vector>

#include "base/lazy_instance.h"
#include "base/memory/raw_ptr.h"
#include "base/numerics/safe_conversions.h"
#include "base/strings/strcat.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_offset_string_conversions.h"
#include "base/strings/utf_string_conversions.h"
#include "base/threading/thread_local_storage.h"
#include "build/build_config.h"
#include "net/base/registry_controlled_domains/registry_controlled_domain.h"
#include "third_party/icu/source/common/unicode/uidna.h"
#include "third_party/icu/source/common/unicode/utypes.h"
#include "url/gurl.h"
#include "url/third_party/mozilla/url_parse.h"
#include "url/url_constants.h"
#include "url/url_util.h"

namespace url_formatter {

namespace {

const char kWww[] =;
constexpr size_t kWwwLength =;
const char kMobilePrefix[] =;
constexpr size_t kMobilePrefixLength =;

IDNConversionResult IDNToUnicodeWithAdjustments(
    std::string_view host,
    base::OffsetAdjuster::Adjustments* adjustments);

// Result of converting a single IDN component (i.e. label) to unicode.
struct ComponentResult {};

ComponentResult IDNToUnicodeOneComponent(
    std::u16string_view comp,
    std::string_view top_level_domain,
    std::u16string_view top_level_domain_unicode,
    bool ignore_spoof_check_results,
    std::u16string* out);

class AppendComponentTransform {};

class HostComponentTransform : public AppendComponentTransform {};

class NonHostComponentTransform : public AppendComponentTransform {};

// Transforms the portion of |spec| covered by |original_component| according to
// |transform|.  Appends the result to |output|.  If |output_component| is
// non-NULL, its start and length are set to the transformed component's new
// start and length.  If |adjustments| is non-NULL, appends adjustments (if
// any) that reflect the transformation the original component underwent to
// become the transformed value appended to |output|.
void AppendFormattedComponent(const std::string& spec,
                              const url::Component& original_component,
                              const AppendComponentTransform& transform,
                              std::u16string* output,
                              url::Component* output_component,
                              base::OffsetAdjuster::Adjustments* adjustments) {}

// If |component| is valid, its begin is incremented by |delta|.
void AdjustComponent(int delta, url::Component* component) {}

// Adjusts all the components of |parsed| by |delta|, except for the scheme.
void AdjustAllComponentsButScheme(int delta, url::Parsed* parsed) {}

// Helper for FormatUrlWithOffsets().
std::u16string FormatViewSourceUrl(
    const GURL& url,
    FormatUrlTypes format_types,
    base::UnescapeRule::Type unescape_rules,
    url::Parsed* new_parsed,
    size_t* prefix_end,
    base::OffsetAdjuster::Adjustments* adjustments) {}

base::LazyInstance<IDNSpoofChecker>::Leaky g_idn_spoof_checker =;

// Computes the top level domain from |host|. top_level_domain_unicode will
// contain the unicode version of top_level_domain. top_level_domain_unicode can
// remain empty if the TLD is not well formed punycode.
void GetTopLevelDomain(std::string_view host,
                       std::string_view* top_level_domain,
                       std::u16string* top_level_domain_unicode) {}

IDNConversionResult IDNToUnicodeWithAdjustmentsImpl(
    std::string_view host,
    base::OffsetAdjuster::Adjustments* adjustments,
    bool ignore_spoof_check_results) {}

// TODO(brettw): We may want to skip this step in the case of file URLs to
// allow unicode UNC hostnames regardless of encodings.
IDNConversionResult IDNToUnicodeWithAdjustments(
    std::string_view host,
    base::OffsetAdjuster::Adjustments* adjustments) {}

IDNConversionResult UnsafeIDNToUnicodeWithAdjustments(
    std::string_view host,
    base::OffsetAdjuster::Adjustments* adjustments) {}

// Returns true if the given Unicode host component is safe to display to the
// user. Note that this function does not deal with pure ASCII domain labels at
// all even though it's possible to make up look-alike labels with ASCII
// characters alone.
IDNSpoofChecker::Result SpoofCheckIDNComponent(
    std::u16string_view label,
    std::string_view top_level_domain,
    std::u16string_view top_level_domain_unicode) {}

// A wrapper to use LazyInstance<>::Leaky with ICU's UIDNA, a C pointer to
// a UTS46/IDNA 2008 handling object opened with uidna_openUTS46().
//
// We use UTS46 with BiDiCheck to migrate from IDNA 2003 to IDNA 2008 with the
// backward compatibility in mind. What it does:
//
// 1. Use the up-to-date Unicode data.
// 2. Define a case folding/mapping with the up-to-date Unicode data as in
//    IDNA 2003.
// 3. Use transitional mechanism for 4 deviation characters (sharp-s,
//    final sigma, ZWJ and ZWNJ) for now.
// 4. Continue to allow symbols and punctuations.
// 5. Apply new BiDi check rules more permissive than the IDNA 2003 BiDI rules.
// 6. Do not apply STD3 rules
// 7. Do not allow unassigned code points.
//
// It also closely matches what IE 10 does except for the BiDi check (
// http://goo.gl/3XBhqw ).
// See http://http://unicode.org/reports/tr46/ and references therein/ for more
// details.
struct UIDNAWrapper {};

base::LazyInstance<UIDNAWrapper>::Leaky g_uidna =;

// Converts one component (label) of a host (between dots) to Unicode if safe.
// If |ignore_spoof_check_results| is true and input is valid unicode, ignores
// spoof check results and always converts the input to unicode. The result will
// be APPENDED to the given output string and will be the same as the input if
// it is not IDN in ACE/punycode or the IDN is unsafe to display. Returns true
// if conversion was made. Sets |has_idn_component| to true if the input has
// IDN, regardless of whether it was converted to unicode or not.
ComponentResult IDNToUnicodeOneComponent(
    std::u16string_view comp,
    std::string_view top_level_domain,
    std::u16string_view top_level_domain_unicode,
    bool ignore_spoof_check_results,
    std::u16string* out) {}

// Returns true iff URL-parsing `spec` would reveal that it has the
// "view-source" scheme, and that parsing the spec minus that scheme also has
// the "view-source" scheme.
bool HasTwoViewSourceSchemes(std::string_view spec) {}

}  // namespace

const FormatUrlType kFormatUrlOmitNothing =;
const FormatUrlType kFormatUrlOmitUsernamePassword =;
const FormatUrlType kFormatUrlOmitHTTP =;
const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname =;
const FormatUrlType kFormatUrlOmitHTTPS =;
const FormatUrlType kFormatUrlOmitTrivialSubdomains =;
const FormatUrlType kFormatUrlTrimAfterHost =;
const FormatUrlType kFormatUrlOmitFileScheme =;
const FormatUrlType kFormatUrlOmitMailToScheme =;
const FormatUrlType kFormatUrlOmitMobilePrefix =;

const FormatUrlType kFormatUrlOmitDefaults =;

std::u16string FormatUrl(const GURL& url,
                         FormatUrlTypes format_types,
                         base::UnescapeRule::Type unescape_rules,
                         url::Parsed* new_parsed,
                         size_t* prefix_end,
                         size_t* offset_for_adjustment) {}

std::u16string FormatUrlWithOffsets(
    const GURL& url,
    FormatUrlTypes format_types,
    base::UnescapeRule::Type unescape_rules,
    url::Parsed* new_parsed,
    size_t* prefix_end,
    std::vector<size_t>* offsets_for_adjustment) {}

std::u16string FormatUrlWithAdjustments(
    const GURL& url,
    FormatUrlTypes format_types,
    base::UnescapeRule::Type unescape_rules,
    url::Parsed* new_parsed,
    size_t* prefix_end,
    base::OffsetAdjuster::Adjustments* adjustments) {}

bool CanStripTrailingSlash(const GURL& url) {}

void AppendFormattedHost(const GURL& url, std::u16string* output) {}

IDNConversionResult UnsafeIDNToUnicodeWithDetails(std::string_view host) {}

std::u16string IDNToUnicode(std::string_view host) {}

std::string StripWWW(const std::string& text) {}

void StripWWWFromHostComponent(const std::string& url, url::Component* host) {}

std::string StripMobilePrefix(const std::string& text) {}

Skeletons GetSkeletons(const std::u16string& host) {}

TopDomainEntry LookupSkeletonInTopDomains(const std::string& skeleton,
                                          const SkeletonType type) {}

std::u16string MaybeRemoveDiacritics(const std::u16string& host) {}

IDNA2008DeviationCharacter GetDeviationCharacter(std::u16string_view hostname) {}

}  // namespace url_formatter