llvm/libcxx/test/libcxx/utilities/format/format.string/format.string.std/escaped_output.pass.cpp

//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
// UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME

// <format>

// Tests the properties of the Unicode escaped output table.
// The libc++ algorithm has size and speed optimizations based on the properties
// of Unicode. This means updating the Unicode tables has a likilihood of
// breaking test. This is an assert; it requires validating whether the
// assumptions of the size and speed optimizations are still valid.

#include <algorithm>
#include <cassert>
#include <format>
#include <functional>
#include <numeric>

// Contains the entries for [format.string.escaped]/2.2.1.2.1
//   CE is a Unicode encoding and C corresponds to a UCS scalar value whose
//   Unicode property General_Category has a value in the groups Separator (Z)
//   or Other (C), as described by table 12 of UAX #44
//
// Separator (Z) consists of General_Category
// - Zs Space_Separator,
// - Zl Line_Separator,
// - Zp Paragraph_Separator.
//
// Other (C) consists of General_Category
// - Cc Control,
// - Cf Format,
// - Cs Surrogate,
// - Co Private_Use,
// - Cn Unassigned.
inline constexpr int Zs = 17;
inline constexpr int Zl = 1;
inline constexpr int Zp = 1;
inline constexpr int Z  = Zs + Zl + Zp;

inline constexpr int Cc = 65;
inline constexpr int Cf = 170;
inline constexpr int Cs = 2'048;
inline constexpr int Co = 137'468;
inline constexpr int Cn = 824'718;
inline constexpr int C  = Cc + Cf + Cs + Co + Cn;

// This is the final part of the Unicode properties table:
//
// 31350..323AF  ; Lo # [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
// 323B0..E0000  ; Cn # [711761] <reserved-323B0>..<reserved-E0000>
// E0001         ; Cf #       LANGUAGE TAG
// E0002..E001F  ; Cn #  [30] <reserved-E0002>..<reserved-E001F>
// E0020..E007F  ; Cf #  [96] TAG SPACE..CANCEL TAG
// E0080..E00FF  ; Cn # [128] <reserved-E0080>..<reserved-E00FF>
// E0100..E01EF  ; Mn # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
// E01F0..EFFFF  ; Cn # [65040] <reserved-E01F0>..<noncharacter-EFFFF>
// F0000..FFFFD  ; Co # [65534] <private-use-F0000>..<private-use-FFFFD>
// FFFFE..FFFFF  ; Cn #   [2] <noncharacter-FFFFE>..<noncharacter-FFFFF>
// 100000..10FFFD; Co # [65534] <private-use-100000>..<private-use-10FFFD>
// 10FFFE..10FFFF; Cn #   [2] <noncharacter-10FFFE>..<noncharacter-10FFFF>
//
// It can be observed all entries in the range 323B0..10FFFF are in the
// categories Cf, Co, Cn, except a small range with the property Mn.
// In order to reduce the size of the table only the entires in the range
// [0000, 323B0) are stored in the table. The entries in the range
// [323B0, 10FFFF] use a hand-crafted algorithm.
//
// This means a number of entries are omitted
inline constexpr int excluded = ((0x10FFFF - 0x323B0) + 1) - 240;

inline constexpr int entries = Z + C - excluded;

static constexpr int count_entries() {
  return std::transform_reduce(
      std::begin(std::__escaped_output_table::__entries),
      std::end(std::__escaped_output_table::__entries),
      0,
      std::plus{},
      [](auto entry) { return 1 + static_cast<int>(entry & 0x3fffu); });
}
static_assert(count_entries() == entries);

int main(int, char**) {
  for (char32_t c = 0x31350; c <= 0x323AF; ++c) // 31350..323AF  ; Lo # [4192]
    assert(std::__escaped_output_table::__needs_escape(c) == false);

  for (char32_t c = 0x323B0; c <= 0xE00FF; ++c) // 323B0..E00FF ; C
    assert(std::__escaped_output_table::__needs_escape(c) == true);

  for (char32_t c = 0xE0100; c <= 0xE01EF; ++c) // E0100..E01EF  ; Mn # [240]
    assert(std::__escaped_output_table::__needs_escape(c) == false);

  for (char32_t c = 0xE01F0; c <= 0x10FFFF; ++c) // E01F0..10FFFF; C
    assert(std::__escaped_output_table::__needs_escape(c) == true);

  return 0;
}