chromium/third_party/dawn/src/tint/utils/text/unicode.h

// Copyright 2022 The Dawn & Tint Authors
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
//    list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright notice,
//    this list of conditions and the following disclaimer in the documentation
//    and/or other materials provided with the distribution.
//
// 3. Neither the name of the copyright holder nor the names of its
//    contributors may be used to endorse or promote products derived from
//    this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#ifndef SRC_TINT_UTILS_TEXT_UNICODE_H_
#define SRC_TINT_UTILS_TEXT_UNICODE_H_

#include <cstddef>
#include <cstdint>
#include <string_view>
#include <utility>

namespace tint {

/// CodePoint is a unicode code point.
struct CodePoint {};

namespace utf8 {

/// Decodes the first code point in the utf8 string.
/// @param ptr the pointer to the first byte of the utf8 sequence
/// @param len the maximum number of uint8_t to read
/// @returns a pair of CodePoint and width in code units (uint8_t).
///          If the next code point cannot be decoded then returns [0,0].
std::pair<CodePoint, size_t> Decode(const uint8_t* ptr, size_t len);

/// Decodes the first code point in the utf8 string.
/// @param utf8_string the string view that contains the utf8 sequence
/// @returns a pair of CodePoint and width in code units (uint8_t).
///          If the next code point cannot be decoded, then returns [0,0].
std::pair<CodePoint, size_t> Decode(std::string_view utf8_string);

/// Encodes a code point to the utf8 string buffer or queries the number of code units used to
/// encode the code point.
/// @param code_point the code point to encode.
/// @param ptr the pointer to the utf8 string buffer, or nullptr to query the number of code units
/// that would be written if @p ptr is not nullptr.
/// @returns the number of code units written / would be written (at most 4).
size_t Encode(CodePoint code_point, uint8_t* ptr);

/// @returns true if all the utf-8 code points in the string are ASCII
/// (code-points 0x00..0x7f).
bool IsASCII(std::string_view);

}  // namespace utf8

namespace utf16 {

/// Decodes the first code point in the utf16 string.
/// @param ptr the pointer to the first byte of the utf16 sequence
/// @param len the maximum number of code units to read
/// @returns a pair of CodePoint and width in code units (16-bit integers).
///          If the next code point cannot be decoded then returns [0,0].
std::pair<CodePoint, size_t> Decode(const uint16_t* ptr, size_t len);

/// Decodes the first code point in the utf16 string.
/// @param utf16_string the string view that contains the utf16 sequence
/// @returns a pair of CodePoint and width in code units (16-bit integers).
///          If the next code point cannot be decoded then returns [0,0].
std::pair<CodePoint, size_t> Decode(std::string_view utf16_string);

/// Encodes a code point to the utf16 string buffer or queries the number of code units used to
/// encode the code point.
/// @param code_point the code point to encode.
/// @param ptr the pointer to the utf16 string buffer, or nullptr to query the number of code units
/// that would be written if @p ptr is not nullptr.
/// @returns the number of code units written / would be written (at most 2).
size_t Encode(CodePoint code_point, uint16_t* ptr);

}  // namespace utf16

}  // namespace tint

#endif  // SRC_TINT_UTILS_TEXT_UNICODE_H_