// Copyright 2022 The Dawn & Tint Authors // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are met: // // 1. Redistributions of source code must retain the above copyright notice, this // list of conditions and the following disclaimer. // // 2. Redistributions in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // // 3. Neither the name of the copyright holder nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef SRC_TINT_UTILS_TEXT_UNICODE_H_ #define SRC_TINT_UTILS_TEXT_UNICODE_H_ #include <cstddef> #include <cstdint> #include <string_view> #include <utility> namespace tint { /// CodePoint is a unicode code point. struct CodePoint { … }; namespace utf8 { /// Decodes the first code point in the utf8 string. /// @param ptr the pointer to the first byte of the utf8 sequence /// @param len the maximum number of uint8_t to read /// @returns a pair of CodePoint and width in code units (uint8_t). /// If the next code point cannot be decoded then returns [0,0]. std::pair<CodePoint, size_t> Decode(const uint8_t* ptr, size_t len); /// Decodes the first code point in the utf8 string. /// @param utf8_string the string view that contains the utf8 sequence /// @returns a pair of CodePoint and width in code units (uint8_t). /// If the next code point cannot be decoded, then returns [0,0]. std::pair<CodePoint, size_t> Decode(std::string_view utf8_string); /// Encodes a code point to the utf8 string buffer or queries the number of code units used to /// encode the code point. /// @param code_point the code point to encode. /// @param ptr the pointer to the utf8 string buffer, or nullptr to query the number of code units /// that would be written if @p ptr is not nullptr. /// @returns the number of code units written / would be written (at most 4). size_t Encode(CodePoint code_point, uint8_t* ptr); /// @returns true if all the utf-8 code points in the string are ASCII /// (code-points 0x00..0x7f). bool IsASCII(std::string_view); } // namespace utf8 namespace utf16 { /// Decodes the first code point in the utf16 string. /// @param ptr the pointer to the first byte of the utf16 sequence /// @param len the maximum number of code units to read /// @returns a pair of CodePoint and width in code units (16-bit integers). /// If the next code point cannot be decoded then returns [0,0]. std::pair<CodePoint, size_t> Decode(const uint16_t* ptr, size_t len); /// Decodes the first code point in the utf16 string. /// @param utf16_string the string view that contains the utf16 sequence /// @returns a pair of CodePoint and width in code units (16-bit integers). /// If the next code point cannot be decoded then returns [0,0]. std::pair<CodePoint, size_t> Decode(std::string_view utf16_string); /// Encodes a code point to the utf16 string buffer or queries the number of code units used to /// encode the code point. /// @param code_point the code point to encode. /// @param ptr the pointer to the utf16 string buffer, or nullptr to query the number of code units /// that would be written if @p ptr is not nullptr. /// @returns the number of code units written / would be written (at most 2). size_t Encode(CodePoint code_point, uint16_t* ptr); } // namespace utf16 } // namespace tint #endif // SRC_TINT_UTILS_TEXT_UNICODE_H_