v8-string-conversions.cc | Explore in Territory

// Copyright 2019 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "src/inspector/v8-string-conversions.h"

#include <limits>
#include <vector>

#include "src/base/logging.h"

namespace v8_inspector {
namespace {
UChar;
UChar32;

bool isASCII(UChar c) { … }

const UChar replacementCharacter = …;

inline int inlineUTF8SequenceLengthNonASCII(char b0) { … }

inline int inlineUTF8SequenceLength(char b0) { … }

// Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
// into the first byte, depending on how many bytes follow.  There are
// as many entries in this table as there are UTF-8 sequence types.
// (I.e., one byte sequence, two byte... etc.). Remember that sequences
// for *legal* UTF-8 will be 4 or fewer bytes total.
static const unsigned char firstByteMark[7] = …;

enum ConversionResult { … };

ConversionResult convertUTF16ToUTF8(const UChar** sourceStart,
                                    const UChar* sourceEnd, char** targetStart,
                                    char* targetEnd, bool strict) { … }

/**
 * Is this code point a BMP code point (U+0000..U+ffff)?
 * @param c 32-bit code point
 * @return TRUE or FALSE
 * @stable ICU 2.8
 */
#define U_IS_BMP(c) …

/**
 * Is this code point a supplementary code point (U+010000..U+10FFFF)?
 * @param c 32-bit code point
 * @return TRUE or FALSE
 * @stable ICU 2.8
 */
#define U_IS_SUPPLEMENTARY(c) …

/**
 * Is this code point a surrogate (U+d800..U+dfff)?
 * @param c 32-bit code point
 * @return TRUE or FALSE
 * @stable ICU 2.4
 */
#define U_IS_SURROGATE(c) …

/**
 * Get the lead surrogate (0xD800..0xDBFF) for a
 * supplementary code point (0x010000..0x10FFFF).
 * @param supplementary 32-bit code point (U+010000..U+10FFFF)
 * @return lead surrogate (U+D800..U+DBFF) for supplementary
 * @stable ICU 2.4
 */
#define U16_LEAD(supplementary) …

/**
 * Get the trail surrogate (0xDC00..0xDFFF) for a
 * supplementary code point (0x010000..0x10FFFF).
 * @param supplementary 32-bit code point (U+010000..U+10FFFF)
 * @return trail surrogate (U+DC00..U+DFFF) for supplementary
 * @stable ICU 2.4
 */
#define U16_TRAIL(supplementary) …

// This must be called with the length pre-determined by the first byte.
// If presented with a length > 4, this returns false.  The Unicode
// definition of UTF-8 goes up to 4-byte sequences.
static bool isLegalUTF8(const unsigned char* source, int length) { … }

// Magic values subtracted from a buffer value during UTF8 conversion.
// This table contains as many values as there might be trailing bytes
// in a UTF-8 sequence.
static const UChar32 offsetsFromUTF8[6] = …;

static inline UChar32 readUTF8Sequence(const char*& sequence, size_t length) { … }

ConversionResult convertUTF8ToUTF16(const char** sourceStart,
                                    const char* sourceEnd, UChar** targetStart,
                                    UChar* targetEnd, bool* sourceAllASCII,
                                    bool strict) { … }

// Helper to write a three-byte UTF-8 code point to the buffer, caller must
// check room is available.
static inline void putUTF8Triple(char*& buffer, UChar ch) { … }
}  // namespace

std::string UTF16ToUTF8(const UChar* stringStart, size_t length) { … }

std::basic_string<UChar> UTF8ToUTF16(const char* stringStart, size_t length) { … }

}  // namespace v8_inspector
chromium/v8/src/inspector/v8-string-conversions.cc