json_escaping.cc | Explore in Territory

// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc.  All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include <google/protobuf/util/internal/json_escaping.h>

#include <cstdint>

#include <google/protobuf/stubs/logging.h>
#include <google/protobuf/stubs/common.h>

namespace google {
namespace protobuf {
namespace util {
namespace converter {

namespace {

// Array of hex characters for conversion to hex.
static const char kHex[] = …;

// Characters 0x00 to 0x9f are very commonly used, so we provide a special
// table lookup.
//
// For unicode code point ch < 0xa0:
// kCommonEscapes[ch] is the escaped string of ch, if escaping is needed;
//                    or an empty string, if escaping is not needed.
static const char kCommonEscapes[160][7] = …;

// Determines if the given char value is a unicode surrogate code unit (either
// high-surrogate or low-surrogate).
inline bool IsSurrogate(uint32_t c) { … }

// Returns true if the given unicode code point cp is a valid
// unicode code point (i.e. in the range 0 <= cp <= kMaxCodePoint).
inline bool IsValidCodePoint(uint32_t cp) { … }

// Returns the low surrogate for the given unicode code point. The result is
// meaningless if the given code point is not a supplementary character.
inline uint16_t ToLowSurrogate(uint32_t cp) { … }

// Returns the high surrogate for the given unicode code point. The result is
// meaningless if the given code point is not a supplementary character.
inline uint16_t ToHighSurrogate(uint32_t cp) { … }

// Input str is encoded in UTF-8. A unicode code point could be encoded in
// UTF-8 using anywhere from 1 to 4 characters, and it could span multiple
// reads of the ByteSource.
//
// This function reads the next unicode code point from the input (str) at
// the given position (index), taking into account any left-over partial
// code point from the previous iteration (cp), together with the number
// of characters left to read to complete this code point (num_left).
//
// This function assumes that the input (str) is valid at the given position
// (index). In order words, at least one character could be read successfully.
//
// The code point read (partial or complete) is stored in (cp). Upon return,
// (num_left) stores the number of characters that has yet to be read in
// order to complete the current unicode code point. If the read is complete,
// then (num_left) is 0. Also, (num_read) is the number of characters read.
//
// Returns false if we encounter an invalid UTF-8 string. Returns true
// otherwise, including the case when we reach the end of the input (str)
// before a complete unicode code point is read.
bool ReadCodePoint(StringPiece str, int index, uint32_t* cp,
                   int* num_left, int* num_read) { … }

// Stores the 16-bit unicode code point as its hexadecimal digits in buffer
// and returns a StringPiece that points to this buffer. The input buffer needs
// to be at least 6 bytes long.
StringPiece ToHex(uint16_t cp, char* buffer) { … }

// Stores the 32-bit unicode code point as its hexadecimal digits in buffer
// and returns a StringPiece that points to this buffer. The input buffer needs
// to be at least 12 bytes long.
StringPiece ToSurrogateHex(uint32_t cp, char* buffer) { … }

// If the given unicode code point needs escaping, then returns the
// escaped form. The returned StringPiece either points to statically
// pre-allocated char[] or to the given buffer. The input buffer needs
// to be at least 12 bytes long.
//
// If the given unicode code point does not need escaping, an empty
// StringPiece is returned.
StringPiece EscapeCodePoint(uint32_t cp, char* buffer) { … }

// Tries to escape the given code point first. If the given code point
// does not need to be escaped, but force_output is true, then render
// the given multi-byte code point in UTF8 in the buffer and returns it.
StringPiece EscapeCodePoint(uint32_t cp, char* buffer,
                                  bool force_output) { … }

}  // namespace

void JsonEscaping::Escape(strings::ByteSource* input,
                          strings::ByteSink* output) { … }

void JsonEscaping::Escape(StringPiece input, strings::ByteSink* output) { … }

}  // namespace converter
}  // namespace util
}  // namespace protobuf
}  // namespace google
chromium/third_party/protobuf/src/google/protobuf/util/internal/json_escaping.cc