chromium/components/url_formatter/spoof_checks/skeleton_generator.cc

// Copyright 2020 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifdef UNSAFE_BUFFERS_BUILD
// TODO(crbug.com/40285824): Remove this and convert code to safer constructs.
#pragma allow_unsafe_buffers
#endif

#include "components/url_formatter/spoof_checks/skeleton_generator.h"

#include <ostream>
#include <queue>
#include <string_view>

#include "base/i18n/unicodestring.h"
#include "base/memory/ptr_util.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "third_party/icu/source/i18n/unicode/regex.h"
#include "third_party/icu/source/i18n/unicode/translit.h"
#include "third_party/icu/source/i18n/unicode/uspoof.h"

namespace {

QueueItem;

// Maximum length of a hostname whose supplemental hostnames we'll calculate.
// For hostnames longer than this length, the supplemental hostnames will be
// empty.
const size_t kMaxHostnameLengthToComputeSupplementalHostnames =;

// Maximum number of supplemental hostname to generate for a given input.
// If this number is too high, we may end up DOSing the browser process.
// If it's too low, we may not be able to cover some lookalike URLs.
const size_t kMaxSupplementalHostnames =;

// Maximum number of characters with multiple skeletons in a hostname (i.e.
// interesting characters). The number of interesting characters directly affect
// how many supplemental hostnames are generated. Assuming an interesting
// character has 3 skeletons (1 original skeleton, 2 supplemental skeletons),
// this will generate pow(3, kMaxCharactersWithMultipleSkeletons) supplemental
// hostnames, so we cap it.
// If a hostname has too many interesting characters, it's unlikely to be a
// convincing spoof.
const size_t kMaxCharactersWithMultipleSkeletons =;

// Limit the number of maximum supplemental skeletons for a given character to a
// reasonable number. This can be adjusted in the future as needed.
const size_t kMaxSupplementalSkeletonsPerCharacter =;

}  // namespace

SkeletonGenerator::SkeletonGenerator(const USpoofChecker* checker)
    :{}

SkeletonGenerator::~SkeletonGenerator() = default;

void SkeletonGenerator::MaybeRemoveDiacritics(icu::UnicodeString& hostname) {}

std::u16string SkeletonGenerator::MaybeRemoveDiacritics(
    std::u16string_view hostname) {}

bool SkeletonGenerator::ShouldComputeSupplementalHostnamesWithDiacritics(
    std::u16string_view input_hostname) const {}

Skeletons SkeletonGenerator::GetSkeletons(std::u16string_view input_hostname) {}

bool SkeletonGenerator::ShouldRemoveDiacriticsFromLabel(
    const icu::UnicodeString& label) const {}

void SkeletonGenerator::AddSkeletonMapping(const icu::UnicodeString& host,
                                           int32_t src_char,
                                           int32_t mapped_char,
                                           Skeletons* skeletons) {}

// static
base::flat_set<std::u16string> SkeletonGenerator::GenerateSupplementalHostnames(
    std::u16string_view input,
    size_t max_alternatives,
    const SkeletonMap& mapping) {}