llvm/libcxx/test/libcxx/utilities/format/format.string/format.string.std/extended_grapheme_cluster.pass.cpp

//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

// UNSUPPORTED: c++03, c++11, c++14, c++17
// UNSUPPORTED: libcpp-has-no-unicode
// UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME

// <format>

// Tests the implementation of the extended grapheme cluster boundaries per
// https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundary_Rules
//
// The tests are based on the test data provided by Unicode
// https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt

#include <cassert>
#include <format>
#include <functional>
#include <numeric>

#include "extended_grapheme_cluster.h"

// Validates whether the number of code points in our "database" matches with
// the number in the Unicode. The assumption is when the number of items per
// property matches the code points themselves also match.
namespace {
namespace cluster = std::__extended_grapheme_custer_property_boundary;
constexpr int count_entries(cluster::__property property) {
  return std::transform_reduce(
      std::begin(cluster::__entries), std::end(cluster::__entries), 0, std::plus{}, [property](auto entry) {
        if (static_cast<cluster::__property>(entry & 0xf) != property)
          return 0;

        return 1 + static_cast<int>((entry >> 4) & 0x7f);
      });
}

static_assert(count_entries(cluster::__property::__Prepend) == 27);
static_assert(count_entries(cluster::__property::__CR) == 1);
static_assert(count_entries(cluster::__property::__LF) == 1);
static_assert(count_entries(cluster::__property::__Control) == 3893);
static_assert(count_entries(cluster::__property::__Extend) == 2130);
static_assert(count_entries(cluster::__property::__Regional_Indicator) == 26);
static_assert(count_entries(cluster::__property::__SpacingMark) == 395);
static_assert(count_entries(cluster::__property::__L) == 125);
static_assert(count_entries(cluster::__property::__V) == 95);
static_assert(count_entries(cluster::__property::__T) == 137);
static_assert(count_entries(cluster::__property::__LV) == 399);
static_assert(count_entries(cluster::__property::__LVT) == 10773);
static_assert(count_entries(cluster::__property::__ZWJ) == 1);
static_assert(count_entries(cluster::__property::__Extended_Pictographic) == 3537);

namespace inCB = std::__indic_conjunct_break;
constexpr int count_entries(inCB::__property property) {
  return std::transform_reduce(
      std::begin(inCB::__entries), std::end(inCB::__entries), 0, std::plus{}, [property](auto entry) {
        if (static_cast<inCB::__property>(entry & 0b11) != property)
          return 0;

        return 1 + static_cast<int>((entry >> 2) & 0b1'1111'1111);
      });
}

static_assert(count_entries(inCB::__property::__Linker) == 6);
static_assert(count_entries(inCB::__property::__Consonant) == 240);
static_assert(count_entries(inCB::__property::__Extend) == 884);

} // namespace

template <class Data>
constexpr void test(const Data& data) {
  for (const auto& d : data) {
    assert(d.code_points.size() == d.breaks.size());

    std::__unicode::__extended_grapheme_cluster_view view{d.input.begin(), d.input.end()};
    for (std::size_t i = 0; i < d.breaks.size(); ++i) {
      auto r = view.__consume();
      assert(r.__code_point_ == d.code_points[i]);
      assert(r.__last_ == d.input.begin() + d.breaks[i]);
    }
  }
}

constexpr bool test() {
  test(data_utf8);

#ifndef TEST_HAS_NO_WIDE_CHARACTERS
  if constexpr (sizeof(wchar_t) == 2)
    test(data_utf16);
  else
    test(data_utf32);
#endif

  return true;
}

int main(int, char**) {
  test();
  // static_assert(test());

  return 0;
}