// Copyright 2019 The Chromium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifndef V8_CRDTP_CBOR_H_ #define V8_CRDTP_CBOR_H_ #include <cstddef> #include <cstdint> #include <memory> #include <string> #include <vector> #include "export.h" #include "parser_handler.h" #include "span.h" namespace v8_crdtp { namespace cbor { // The binary encoding for the inspector protocol follows the CBOR specification // (RFC 7049). Additional constraints: // - Only indefinite length maps and arrays are supported. // - Maps and arrays are wrapped with an envelope, that is, a // CBOR tag with value 24 followed by a byte string specifying // the byte length of the enclosed map / array. The byte string // must use a 32 bit wide length. // - At the top level, a message must be an indefinite length map // wrapped by an envelope. // - Maximal size for messages is 2^32 (4 GB). // - For scalars, we support only the int32_t range, encoded as // UNSIGNED/NEGATIVE (major types 0 / 1). // - UTF16 strings, including with unbalanced surrogate pairs, are encoded // as CBOR BYTE_STRING (major type 2). For such strings, the number of // bytes encoded must be even. // - UTF8 strings (major type 3) are supported. // - 7 bit US-ASCII strings must always be encoded as UTF8 strings, never // as UTF16 strings. // - Arbitrary byte arrays, in the inspector protocol called 'binary', // are encoded as BYTE_STRING (major type 2), prefixed with a byte // indicating base64 when rendered as JSON. // ============================================================================= // Detecting CBOR content // ============================================================================= // Checks whether |msg| is a cbor message. bool IsCBORMessage(span<uint8_t> msg); // Performs a leightweight check of |msg|. // Disallows: // - Empty message // - Not starting with the two bytes 0xd8, 0x5a // - Empty envelope (all length bytes are 0) // - Not starting with a map after the envelope stanza // DevTools messages should pass this check. Status CheckCBORMessage(span<uint8_t> msg); // ============================================================================= // Encoding individual CBOR items // ============================================================================= // Some constants for CBOR tokens that only take a single byte on the wire. uint8_t EncodeTrue(); uint8_t EncodeFalse(); uint8_t EncodeNull(); uint8_t EncodeIndefiniteLengthArrayStart(); uint8_t EncodeIndefiniteLengthMapStart(); uint8_t EncodeStop(); // Encodes |value| as |UNSIGNED| (major type 0) iff >= 0, or |NEGATIVE| // (major type 1) iff < 0. void EncodeInt32(int32_t value, std::vector<uint8_t>* out); // Encodes a UTF16 string as a BYTE_STRING (major type 2). Each utf16 // character in |in| is emitted with most significant byte first, // appending to |out|. void EncodeString16(span<uint16_t> in, std::vector<uint8_t>* out); // Encodes a UTF8 string |in| as STRING (major type 3). void EncodeString8(span<uint8_t> in, std::vector<uint8_t>* out); // Encodes the given |latin1| string as STRING8. // If any non-ASCII character is present, it will be represented // as a 2 byte UTF8 sequence. void EncodeFromLatin1(span<uint8_t> latin1, std::vector<uint8_t>* out); // Encodes the given |utf16| string as STRING8 if it's entirely US-ASCII. // Otherwise, encodes as STRING16. void EncodeFromUTF16(span<uint16_t> utf16, std::vector<uint8_t>* out); // Encodes arbitrary binary data in |in| as a BYTE_STRING (major type 2) with // definitive length, prefixed with tag 22 indicating expected conversion to // base64 (see RFC 7049, Table 3 and Section 2.4.4.2). void EncodeBinary(span<uint8_t> in, std::vector<uint8_t>* out); // Encodes / decodes a double as Major type 7 (SIMPLE_VALUE), // with additional info = 27, followed by 8 bytes in big endian. void EncodeDouble(double value, std::vector<uint8_t>* out); // ============================================================================= // cbor::EnvelopeEncoder - for wrapping submessages // ============================================================================= // An envelope indicates the byte length of a wrapped item. // We use this for maps and array, which allows the decoder // to skip such (nested) values whole sale. // It's implemented as a CBOR tag (major type 6) with additional // info = 24, followed by a byte string with a 32 bit length value; // so the maximal structure that we can wrap is 2^32 bits long. // See also: https://tools.ietf.org/html/rfc7049#section-2.4.4.1 class EnvelopeEncoder { … }; class EnvelopeHeader { … }; // ============================================================================= // cbor::NewCBOREncoder - for encoding from a streaming parser // ============================================================================= // This can be used to convert to CBOR, by passing the return value to a parser // that drives it. The handler will encode into |out|, and iff an error occurs // it will set |status| to an error and clear |out|. Otherwise, |status.ok()| // will be |true|. std::unique_ptr<ParserHandler> NewCBOREncoder(std::vector<uint8_t>* out, Status* status); // ============================================================================= // cbor::CBORTokenizer - for parsing individual CBOR items // ============================================================================= // Tags for the tokens within a CBOR message that CBORTokenizer understands. // Note that this is not the same terminology as the CBOR spec (RFC 7049), // but rather, our adaptation. For instance, we lump unsigned and signed // major type into INT32 here (and disallow values outside the int32_t range). enum class CBORTokenTag { … }; // The major types from RFC 7049 Section 2.1. enum class MajorType { … }; // CBORTokenizer segments a CBOR message, presenting the tokens therein as // numbers, strings, etc. This is not a complete CBOR parser, but makes it much // easier to implement one (e.g. ParseCBOR, above). It can also be used to parse // messages partially. class CBORTokenizer { … }; // ============================================================================= // cbor::ParseCBOR - for receiving streaming parser events for CBOR messages // ============================================================================= // Parses a CBOR encoded message from |bytes|, sending events to // |out|. If an error occurs, sends |out->HandleError|, and parsing stops. // The client is responsible for discarding the already received information in // that case. void ParseCBOR(span<uint8_t> bytes, ParserHandler* out); // ============================================================================= // cbor::AppendString8EntryToMap - for limited in-place editing of messages // ============================================================================= // Modifies the |cbor| message by appending a new key/value entry at the end // of the map. Patches up the envelope size; Status.ok() iff successful. // If not successful, |cbor| may be corrupted after this call. Status AppendString8EntryToCBORMap(span<uint8_t> string8_key, span<uint8_t> string8_value, std::vector<uint8_t>* cbor); namespace internals { // Exposed only for writing tests. size_t ReadTokenStart(span<uint8_t> bytes, cbor::MajorType* type, uint64_t* value); void WriteTokenStart(cbor::MajorType type, uint64_t value, std::vector<uint8_t>* encoded); } // namespace internals } // namespace cbor } // namespace v8_crdtp #endif // V8_CRDTP_CBOR_H_