// Copyright 2021 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/browser/ash/input_method/diacritics_insensitive_string_comparator.h"
#include "base/check_op.h"
#include "base/memory/ptr_util.h"
#include "third_party/icu/source/common/unicode/unistr.h"
namespace ash {
namespace input_method {
DiacriticsInsensitiveStringComparator::DiacriticsInsensitiveStringComparator() {
UErrorCode status = U_ZERO_ERROR;
UParseError parse_error;
// Intentionally only covering Latin-script accented letters likely found in
// French, Spanish, Dutch, Swedish, Norwegian, Danish, and Catalan.
diacritics_stripper_ = base::WrapUnique(icu::Transliterator::createFromRules(
UNICODE_STRING_SIMPLE("DiacriticStripper"),
icu::UnicodeString::fromUTF8("::NFC; "
"[ á à â ä ā å ] > a; "
"[ Á À Â Ä Ā Å ] > A; "
"[ é è ê ë ē ] > e; "
"[ É È Ê Ë Ē ] > E; "
"[ í ì î ï ī ] > i; "
"[ Í Ì Î Ï Ī ] > I; "
"[ ó ò ô ö ō ø ] > o; "
"[ Ó Ò Ô Ö Ō Ø ] > O; "
"[ ú ù û ü ū ] > u; "
"[ Ú Ù Û Ü Ū ] > U; "
"[ ý ỳ ŷ ÿ ȳ ] > y; "
"[ Ý Ỳ Ŷ Ÿ Ȳ ] > Y; "
"ç > c; ñ > n; æ > ae; œ > oe; "
"Ç > C; Ñ > N; Æ > AE; Œ > OE; "),
UTRANS_FORWARD, parse_error, status));
DCHECK_EQ(status, U_ZERO_ERROR);
}
DiacriticsInsensitiveStringComparator::
~DiacriticsInsensitiveStringComparator() {}
bool DiacriticsInsensitiveStringComparator::Equal(
const std::u16string& a,
const std::u16string& b) const {
icu::UnicodeString unicode_str_a(a.c_str(), a.length());
icu::UnicodeString unicode_str_b(b.c_str(), b.length());
diacritics_stripper_->transliterate(unicode_str_a);
diacritics_stripper_->transliterate(unicode_str_b);
return unicode_str_a.compare(unicode_str_b) == 0;
}
} // namespace input_method
} // namespace ash