// Copyright 2019 the V8 project authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "src/inspector/v8-string-conversions.h" #include <limits> #include <vector> #include "src/base/logging.h" namespace v8_inspector { namespace { UChar; UChar32; bool isASCII(UChar c) { … } const UChar replacementCharacter = …; inline int inlineUTF8SequenceLengthNonASCII(char b0) { … } inline int inlineUTF8SequenceLength(char b0) { … } // Once the bits are split out into bytes of UTF-8, this is a mask OR-ed // into the first byte, depending on how many bytes follow. There are // as many entries in this table as there are UTF-8 sequence types. // (I.e., one byte sequence, two byte... etc.). Remember that sequences // for *legal* UTF-8 will be 4 or fewer bytes total. static const unsigned char firstByteMark[7] = …; enum ConversionResult { … }; ConversionResult convertUTF16ToUTF8(const UChar** sourceStart, const UChar* sourceEnd, char** targetStart, char* targetEnd, bool strict) { … } /** * Is this code point a BMP code point (U+0000..U+ffff)? * @param c 32-bit code point * @return TRUE or FALSE * @stable ICU 2.8 */ #define U_IS_BMP(c) … /** * Is this code point a supplementary code point (U+010000..U+10FFFF)? * @param c 32-bit code point * @return TRUE or FALSE * @stable ICU 2.8 */ #define U_IS_SUPPLEMENTARY(c) … /** * Is this code point a surrogate (U+d800..U+dfff)? * @param c 32-bit code point * @return TRUE or FALSE * @stable ICU 2.4 */ #define U_IS_SURROGATE(c) … /** * Get the lead surrogate (0xD800..0xDBFF) for a * supplementary code point (0x010000..0x10FFFF). * @param supplementary 32-bit code point (U+010000..U+10FFFF) * @return lead surrogate (U+D800..U+DBFF) for supplementary * @stable ICU 2.4 */ #define U16_LEAD(supplementary) … /** * Get the trail surrogate (0xDC00..0xDFFF) for a * supplementary code point (0x010000..0x10FFFF). * @param supplementary 32-bit code point (U+010000..U+10FFFF) * @return trail surrogate (U+DC00..U+DFFF) for supplementary * @stable ICU 2.4 */ #define U16_TRAIL(supplementary) … // This must be called with the length pre-determined by the first byte. // If presented with a length > 4, this returns false. The Unicode // definition of UTF-8 goes up to 4-byte sequences. static bool isLegalUTF8(const unsigned char* source, int length) { … } // Magic values subtracted from a buffer value during UTF8 conversion. // This table contains as many values as there might be trailing bytes // in a UTF-8 sequence. static const UChar32 offsetsFromUTF8[6] = …; static inline UChar32 readUTF8Sequence(const char*& sequence, size_t length) { … } ConversionResult convertUTF8ToUTF16(const char** sourceStart, const char* sourceEnd, UChar** targetStart, UChar* targetEnd, bool* sourceAllASCII, bool strict) { … } // Helper to write a three-byte UTF-8 code point to the buffer, caller must // check room is available. static inline void putUTF8Triple(char*& buffer, UChar ch) { … } } // namespace std::string UTF16ToUTF8(const UChar* stringStart, size_t length) { … } std::basic_string<UChar> UTF8ToUTF16(const char* stringStart, size_t length) { … } } // namespace v8_inspector