llvm/libcxx/test/libcxx/input.output/iostream.format/print.fun/transcoding.pass.cpp

//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
// UNSUPPORTED: no-filesystem
// UNSUPPORTED: libcpp-has-no-unicode
// UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME
// ADDITIONAL_COMPILE_FLAGS(has-fconstexpr-steps): -fconstexpr-steps=2000000

// <print>

// Tests the UTF-8 to UTF-16/32 encoding.
// UTF-16 is used on Windows to write to the Unicode API.
// UTF-32 is used to test the Windows behaviour on Linux using 32-bit wchar_t.

#include <algorithm>
#include <array>
#include <cassert>
#include <print>
#include <string_view>

#include "test_macros.h"
#include "make_string.h"

#define SV(S) MAKE_STRING_VIEW(CharT, S)

template <class CharT>
constexpr void test(std::basic_string_view<CharT> expected, std::string_view input) {
  assert(expected.size() < 1024);
  std::array<CharT, 1024> buffer;
  std::ranges::fill(buffer, CharT('*'));

  auto out = std::__unicode::__transcode(input.begin(), input.end(), buffer.begin());

  assert(std::basic_string_view<CharT>(buffer.begin(), out) == expected);

  out = std::find_if(out, buffer.end(), [](CharT c) { return c != CharT('*'); });
  assert(out == buffer.end());
}

template <class CharT>
constexpr void test() {
  // *** Test valid UTF-8 ***
#define TEST(S) test(SV(S), S)
  TEST("hello world");
  // copied from benchmarks/std_format_spec_string_unicode.bench.cpp
  TEST("Lorem ipsum dolor sit amet, ne sensibus evertitur aliquando his. Iuvaret fabulas qui ex.");
  TEST("Lōrem ipsūm dolor sīt æmeÞ, ea vel nostrud feuġǣit, muciūs tēmporiȝusrefērrēnÞur no mel.");
  TEST("Лорем ипсум долор сит амет, еу диам тамяуам принципес вис, еяуидем цонцептам диспутандо");
  TEST("入ト年媛ろ舗学ラロ準募ケカ社金スノ屋検れう策他セヲシ引口ぎ集7独ぱクふ出車ぽでぱ円輪ルノ受打わ。");
  TEST("\U0001f636\u200d\U0001f32b\ufe0f");
#undef TEST

  // *** Test invalid UTF-8 ***
  test(SV("\ufffd"), "\xc3");
  test(SV("\ufffd("), "\xc3\x28");

  // Surrogate range
  test(SV("\ufffd"), "\xed\xa0\x80"); // U+D800
  test(SV("\ufffd"), "\xed\xaf\xbf"); // U+DBFF
  test(SV("\ufffd"), "\xed\xbf\x80"); // U+DC00
  test(SV("\ufffd"), "\xed\xbf\xbf"); // U+DFFF

  // Beyond valid values
  test(SV("\ufffd"), "\xf4\x90\x80\x80"); // U+110000
  test(SV("\ufffd"), "\xf4\xbf\xbf\xbf"); // U+11FFFF

  // Validates http://unicode.org/review/pr-121.html option 3.
  test(SV("\u0061\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\u0062"), "\x61\xF1\x80\x80\xE1\x80\xC2\x62");
}

constexpr bool test() {
  test<char16_t>();
  test<char32_t>();
#if !defined(TEST_HAS_NO_WIDE_CHARACTERS)
  test<wchar_t>();
#endif
  return true;
}

int main(int, char**) {
  test();
  static_assert(test());

  return 0;
}