// Protocol Buffers - Google's data interchange format // Copyright 2008 Google Inc. All rights reserved. // https://developers.google.com/protocol-buffers/ // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include <google/protobuf/util/internal/json_escaping.h> #include <cstdint> #include <google/protobuf/stubs/logging.h> #include <google/protobuf/stubs/common.h> namespace google { namespace protobuf { namespace util { namespace converter { namespace { // Array of hex characters for conversion to hex. static const char kHex[] = …; // Characters 0x00 to 0x9f are very commonly used, so we provide a special // table lookup. // // For unicode code point ch < 0xa0: // kCommonEscapes[ch] is the escaped string of ch, if escaping is needed; // or an empty string, if escaping is not needed. static const char kCommonEscapes[160][7] = …; // Determines if the given char value is a unicode surrogate code unit (either // high-surrogate or low-surrogate). inline bool IsSurrogate(uint32_t c) { … } // Returns true if the given unicode code point cp is a valid // unicode code point (i.e. in the range 0 <= cp <= kMaxCodePoint). inline bool IsValidCodePoint(uint32_t cp) { … } // Returns the low surrogate for the given unicode code point. The result is // meaningless if the given code point is not a supplementary character. inline uint16_t ToLowSurrogate(uint32_t cp) { … } // Returns the high surrogate for the given unicode code point. The result is // meaningless if the given code point is not a supplementary character. inline uint16_t ToHighSurrogate(uint32_t cp) { … } // Input str is encoded in UTF-8. A unicode code point could be encoded in // UTF-8 using anywhere from 1 to 4 characters, and it could span multiple // reads of the ByteSource. // // This function reads the next unicode code point from the input (str) at // the given position (index), taking into account any left-over partial // code point from the previous iteration (cp), together with the number // of characters left to read to complete this code point (num_left). // // This function assumes that the input (str) is valid at the given position // (index). In order words, at least one character could be read successfully. // // The code point read (partial or complete) is stored in (cp). Upon return, // (num_left) stores the number of characters that has yet to be read in // order to complete the current unicode code point. If the read is complete, // then (num_left) is 0. Also, (num_read) is the number of characters read. // // Returns false if we encounter an invalid UTF-8 string. Returns true // otherwise, including the case when we reach the end of the input (str) // before a complete unicode code point is read. bool ReadCodePoint(StringPiece str, int index, uint32_t* cp, int* num_left, int* num_read) { … } // Stores the 16-bit unicode code point as its hexadecimal digits in buffer // and returns a StringPiece that points to this buffer. The input buffer needs // to be at least 6 bytes long. StringPiece ToHex(uint16_t cp, char* buffer) { … } // Stores the 32-bit unicode code point as its hexadecimal digits in buffer // and returns a StringPiece that points to this buffer. The input buffer needs // to be at least 12 bytes long. StringPiece ToSurrogateHex(uint32_t cp, char* buffer) { … } // If the given unicode code point needs escaping, then returns the // escaped form. The returned StringPiece either points to statically // pre-allocated char[] or to the given buffer. The input buffer needs // to be at least 12 bytes long. // // If the given unicode code point does not need escaping, an empty // StringPiece is returned. StringPiece EscapeCodePoint(uint32_t cp, char* buffer) { … } // Tries to escape the given code point first. If the given code point // does not need to be escaped, but force_output is true, then render // the given multi-byte code point in UTF8 in the buffer and returns it. StringPiece EscapeCodePoint(uint32_t cp, char* buffer, bool force_output) { … } } // namespace void JsonEscaping::Escape(strings::ByteSource* input, strings::ByteSink* output) { … } void JsonEscaping::Escape(StringPiece input, strings::ByteSink* output) { … } } // namespace converter } // namespace util } // namespace protobuf } // namespace google