chromium/chrome/browser/ash/input_method/diacritics_insensitive_string_comparator.cc

// Copyright 2021 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "chrome/browser/ash/input_method/diacritics_insensitive_string_comparator.h"

#include "base/check_op.h"
#include "base/memory/ptr_util.h"
#include "third_party/icu/source/common/unicode/unistr.h"

namespace ash {
namespace input_method {

DiacriticsInsensitiveStringComparator::DiacriticsInsensitiveStringComparator() {
  UErrorCode status = U_ZERO_ERROR;
  UParseError parse_error;

  // Intentionally only covering Latin-script accented letters likely found in
  // French, Spanish, Dutch, Swedish, Norwegian, Danish, and Catalan.
  diacritics_stripper_ = base::WrapUnique(icu::Transliterator::createFromRules(
      UNICODE_STRING_SIMPLE("DiacriticStripper"),
      icu::UnicodeString::fromUTF8("::NFC; "
                                   "[ á à â ä ā å ] > a; "
                                   "[ Á À Â Ä Ā Å ] > A; "
                                   "[ é è ê ë ē   ] > e; "
                                   "[ É È Ê Ë Ē   ] > E; "
                                   "[ í ì î ï ī   ] > i; "
                                   "[ Í Ì Î Ï Ī   ] > I; "
                                   "[ ó ò ô ö ō ø ] > o; "
                                   "[ Ó Ò Ô Ö Ō Ø ] > O; "
                                   "[ ú ù û ü ū   ] > u; "
                                   "[ Ú Ù Û Ü Ū   ] > U; "
                                   "[ ý ỳ ŷ ÿ ȳ   ] > y; "
                                   "[ Ý Ỳ Ŷ Ÿ Ȳ   ] > Y; "
                                   "ç > c; ñ > n; æ > ae; œ > oe; "
                                   "Ç > C; Ñ > N; Æ > AE; Œ > OE; "),
      UTRANS_FORWARD, parse_error, status));

  DCHECK_EQ(status, U_ZERO_ERROR);
}

DiacriticsInsensitiveStringComparator::
    ~DiacriticsInsensitiveStringComparator() {}

bool DiacriticsInsensitiveStringComparator::Equal(
    const std::u16string& a,
    const std::u16string& b) const {
  icu::UnicodeString unicode_str_a(a.c_str(), a.length());
  icu::UnicodeString unicode_str_b(b.c_str(), b.length());

  diacritics_stripper_->transliterate(unicode_str_a);
  diacritics_stripper_->transliterate(unicode_str_b);

  return unicode_str_a.compare(unicode_str_b) == 0;
}

}  // namespace input_method
}  // namespace ash