// Copyright 2019 The Chromium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "cbor.h" #include <algorithm> #include <cassert> #include <cmath> #include <cstring> #include <limits> #include <stack> namespace crdtp { namespace cbor { namespace { // Indicates the number of bits the "initial byte" needs to be shifted to the // right after applying |kMajorTypeMask| to produce the major type in the // lowermost bits. static constexpr uint8_t kMajorTypeBitShift = …; // Mask selecting the low-order 5 bits of the "initial byte", which is where // the additional information is encoded. static constexpr uint8_t kAdditionalInformationMask = …; // Mask selecting the high-order 3 bits of the "initial byte", which indicates // the major type of the encoded value. static constexpr uint8_t kMajorTypeMask = …; // Indicates the integer is in the following byte. static constexpr uint8_t kAdditionalInformation1Byte = …; // Indicates the integer is in the next 2 bytes. static constexpr uint8_t kAdditionalInformation2Bytes = …; // Indicates the integer is in the next 4 bytes. static constexpr uint8_t kAdditionalInformation4Bytes = …; // Indicates the integer is in the next 8 bytes. static constexpr uint8_t kAdditionalInformation8Bytes = …; // Encodes the initial byte, consisting of the |type| in the first 3 bits // followed by 5 bits of |additional_info|. constexpr uint8_t EncodeInitialByte(MajorType type, uint8_t additional_info) { … } // TAG 24 indicates that what follows is a byte string which is // encoded in CBOR format. We use this as a wrapper for // maps and arrays, allowing us to skip them, because the // byte string carries its size (byte length). // https://tools.ietf.org/html/rfc7049#section-2.4.4.1 static constexpr uint8_t kInitialByteForEnvelope = …; // The standalone byte for "envelope" tag, to follow kInitialByteForEnvelope // in the correct implementation, as it is above in-tag value max (which is // also, confusingly, 24). See EnvelopeHeader::Parse() for more. static constexpr uint8_t kCBOREnvelopeTag = …; // The initial byte for a byte string with at most 2^32 bytes // of payload. This is used for envelope encoding, even if // the byte string is shorter. static constexpr uint8_t kInitialByteFor32BitLengthByteString = …; // See RFC 7049 Section 2.2.1, indefinite length arrays / maps have additional // info = 31. static constexpr uint8_t kInitialByteIndefiniteLengthArray = …; static constexpr uint8_t kInitialByteIndefiniteLengthMap = …; // See RFC 7049 Section 2.3, Table 1; this is used for finishing indefinite // length maps / arrays. static constexpr uint8_t kStopByte = …; // See RFC 7049 Section 2.3, Table 2. static constexpr uint8_t kEncodedTrue = …; static constexpr uint8_t kEncodedFalse = …; static constexpr uint8_t kEncodedNull = …; static constexpr uint8_t kInitialByteForDouble = …; // See RFC 7049 Table 3 and Section 2.4.4.2. This is used as a prefix for // arbitrary binary data encoded as BYTE_STRING. static constexpr uint8_t kExpectedConversionToBase64Tag = …; // Writes the bytes for |v| to |out|, starting with the most significant byte. // See also: https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html template <typename T> void WriteBytesMostSignificantByteFirst(T v, std::vector<uint8_t>* out) { … } // Extracts sizeof(T) bytes from |in| to extract a value of type T // (e.g. uint64_t, uint32_t, ...), most significant byte first. // See also: https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html template <typename T> T ReadBytesMostSignificantByteFirst(span<uint8_t> in) { … } } // namespace namespace internals { // Reads the start of a token with definitive size from |bytes|. // |type| is the major type as specified in RFC 7049 Section 2.1. // |value| is the payload (e.g. for MajorType::UNSIGNED) or is the size // (e.g. for BYTE_STRING). // If successful, returns the number of bytes read. Otherwise returns 0. size_t ReadTokenStart(span<uint8_t> bytes, MajorType* type, uint64_t* value) { … } // Writes the start of a token with |type|. The |value| may indicate the size, // or it may be the payload if the value is an unsigned integer. void WriteTokenStart(MajorType type, uint64_t value, std::vector<uint8_t>* encoded) { … } } // namespace internals // ============================================================================= // Detecting CBOR content // ============================================================================= bool IsCBORMessage(span<uint8_t> msg) { … } Status CheckCBORMessage(span<uint8_t> msg) { … } // ============================================================================= // Encoding invidiual CBOR items // ============================================================================= uint8_t EncodeTrue() { … } uint8_t EncodeFalse() { … } uint8_t EncodeNull() { … } uint8_t EncodeIndefiniteLengthArrayStart() { … } uint8_t EncodeIndefiniteLengthMapStart() { … } uint8_t EncodeStop() { … } void EncodeInt32(int32_t value, std::vector<uint8_t>* out) { … } void EncodeString16(span<uint16_t> in, std::vector<uint8_t>* out) { … } void EncodeString8(span<uint8_t> in, std::vector<uint8_t>* out) { … } void EncodeFromLatin1(span<uint8_t> latin1, std::vector<uint8_t>* out) { … } void EncodeFromUTF16(span<uint16_t> utf16, std::vector<uint8_t>* out) { … } void EncodeBinary(span<uint8_t> in, std::vector<uint8_t>* out) { … } // A double is encoded with a specific initial byte // (kInitialByteForDouble) plus the 64 bits of payload for its value. constexpr size_t kEncodedDoubleSize = …; void EncodeDouble(double value, std::vector<uint8_t>* out) { … } // ============================================================================= // cbor::EnvelopeEncoder - for wrapping submessages // ============================================================================= void EnvelopeEncoder::EncodeStart(std::vector<uint8_t>* out) { … } bool EnvelopeEncoder::EncodeStop(std::vector<uint8_t>* out) { … } // static StatusOr<EnvelopeHeader> EnvelopeHeader::Parse(span<uint8_t> in) { … } // static StatusOr<EnvelopeHeader> EnvelopeHeader::ParseFromFragment(span<uint8_t> in) { … } // ============================================================================= // cbor::NewCBOREncoder - for encoding from a streaming parser // ============================================================================= namespace { class CBOREncoder : public ParserHandler { … }; } // namespace std::unique_ptr<ParserHandler> NewCBOREncoder(std::vector<uint8_t>* out, Status* status) { … } // ============================================================================= // cbor::CBORTokenizer - for parsing individual CBOR items // ============================================================================= CBORTokenizer::CBORTokenizer(span<uint8_t> bytes) : … { … } CBORTokenizer::~CBORTokenizer() { … } CBORTokenTag CBORTokenizer::TokenTag() const { … } void CBORTokenizer::Next() { … } void CBORTokenizer::EnterEnvelope() { … } Status CBORTokenizer::Status() const { … } // The following accessor functions ::GetInt32, ::GetDouble, // ::GetString8, ::GetString16WireRep, ::GetBinary, ::GetEnvelopeContents // assume that a particular token was recognized in ::ReadNextToken. // That's where all the error checking is done. By design, // the accessors (assuming the token was recognized) never produce // an error. int32_t CBORTokenizer::GetInt32() const { … } double CBORTokenizer::GetDouble() const { … } span<uint8_t> CBORTokenizer::GetString8() const { … } span<uint8_t> CBORTokenizer::GetString16WireRep() const { … } span<uint8_t> CBORTokenizer::GetBinary() const { … } span<uint8_t> CBORTokenizer::GetEnvelope() const { … } span<uint8_t> CBORTokenizer::GetEnvelopeContents() const { … } const EnvelopeHeader& CBORTokenizer::GetEnvelopeHeader() const { … } // All error checking happens in ::ReadNextToken, so that the accessors // can avoid having to carry an error return value. // // With respect to checking the encoded lengths of strings, arrays, etc: // On the wire, CBOR uses 1,2,4, and 8 byte unsigned integers, so // we initially read them as uint64_t, usually into token_start_internal_value_. // // However, since these containers have a representation on the machine, // we need to do corresponding size computations on the input byte array, // output span (e.g. the payload for a string), etc., and size_t is // machine specific (in practice either 32 bit or 64 bit). // // Further, we must avoid overflowing size_t. Therefore, we use this // kMaxValidLength constant to: // - Reject values that are larger than the architecture specific // max size_t (differs between 32 bit and 64 bit arch). // - Reserve at least one bit so that we can check against overflows // when adding lengths (array / string length / etc.); we do this by // ensuring that the inputs to an addition are <= kMaxValidLength, // and then checking whether the sum went past it. // // See also // https://chromium.googlesource.com/chromium/src/+/main/docs/security/integer-semantics.md static const uint64_t kMaxValidLength = …; void CBORTokenizer::ReadNextToken() { … } void CBORTokenizer::SetToken(CBORTokenTag token_tag, size_t token_byte_length) { … } void CBORTokenizer::SetError(Error error) { … } // ============================================================================= // cbor::ParseCBOR - for receiving streaming parser events for CBOR messages // ============================================================================= namespace { // When parsing CBOR, we limit recursion depth for objects and arrays // to this constant. static constexpr int kStackLimit = …; // Below are three parsing routines for CBOR, which cover enough // to roundtrip JSON messages. bool ParseMap(int32_t stack_depth, CBORTokenizer* tokenizer, ParserHandler* out); bool ParseArray(int32_t stack_depth, CBORTokenizer* tokenizer, ParserHandler* out); bool ParseValue(int32_t stack_depth, CBORTokenizer* tokenizer, ParserHandler* out); void ParseUTF16String(CBORTokenizer* tokenizer, ParserHandler* out) { … } bool ParseUTF8String(CBORTokenizer* tokenizer, ParserHandler* out) { … } bool ParseEnvelope(int32_t stack_depth, CBORTokenizer* tokenizer, ParserHandler* out) { … } bool ParseValue(int32_t stack_depth, CBORTokenizer* tokenizer, ParserHandler* out) { … } // |bytes| must start with the indefinite length array byte, so basically, // ParseArray may only be called after an indefinite length array has been // detected. bool ParseArray(int32_t stack_depth, CBORTokenizer* tokenizer, ParserHandler* out) { … } // |bytes| must start with the indefinite length array byte, so basically, // ParseArray may only be called after an indefinite length array has been // detected. bool ParseMap(int32_t stack_depth, CBORTokenizer* tokenizer, ParserHandler* out) { … } } // namespace void ParseCBOR(span<uint8_t> bytes, ParserHandler* out) { … } // ============================================================================= // cbor::AppendString8EntryToMap - for limited in-place editing of messages // ============================================================================= Status AppendString8EntryToCBORMap(span<uint8_t> string8_key, span<uint8_t> string8_value, std::vector<uint8_t>* cbor) { … } } // namespace cbor } // namespace crdtp