// Copyright 2017 The Chromium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifdef UNSAFE_BUFFERS_BUILD // TODO(crbug.com/40285824): Remove this and convert code to safer constructs. #pragma allow_unsafe_buffers #endif #ifndef COMPONENTS_ZUCCHINI_IMAGE_UTILS_H_ #define COMPONENTS_ZUCCHINI_IMAGE_UTILS_H_ #include <stddef.h> #include <stdint.h> #include <optional> #include <string> #include "base/format_macros.h" #include "base/numerics/safe_conversions.h" #include "base/strings/stringprintf.h" #include "components/zucchini/buffer_view.h" #include "components/zucchini/typed_value.h" namespace zucchini { // offset_t is used to describe an offset in an image. // Files bigger than 4GB are not supported. offset_t; // Divide by 2 since label marking uses the most significant bit. constexpr offset_t kOffsetBound = …; // Use 0xFFFFFFF*E*, since 0xFFFFFFF*F* is a sentinel value for Dex references. constexpr offset_t kInvalidOffset = …; // key_t is used to identify an offset in a table. key_t; enum Bitness : uint8_t { … }; inline uint32_t WidthOf(Bitness bitness) { … } // Used to uniquely identify a reference type. // Strongly typed objects are used to avoid ambiguitees with PoolTag. struct TypeTag : public TypedValue<TypeTag, uint8_t> { … }; // Used to uniquely identify a pool. struct PoolTag : public TypedValue<PoolTag, uint8_t> { … }; constexpr TypeTag kNoTypeTag(0xFF); // Typically used to identify raw data. constexpr PoolTag kNoPoolTag(0xFF); // Specification of references in an image file. struct ReferenceTypeTraits { … }; // There is no need to store |type| because references of the same type are // always aggregated into the same container, and so during iteration we'd have // |type| already. struct Reference { … }; inline bool operator==(const Reference& a, const Reference& b) { … } // Interface for extracting References through member function GetNext(). // This is used by Disassemblers to extract references from an image file. // Typically, a Reader lazily extracts values and does not hold any storage. class ReferenceReader { … }; // Interface for writing References through member function // PutNext(reference). This is used by Disassemblers to write new References // in the image file. class ReferenceWriter { … }; // References encoding may be quite complex in some architectures (e.g., ARM), // requiring bit-level manipulation. In general, bits in a reference body fall // under 2 categories: // * Operation bits: Instruction op code, conditionals, or structural data. // * Payload bits: Actual target data of the reference. These may be absolute, // or be displacements relative to instruction pointer / program counter. // During patch application, // Old reference bytes = {old operation, old payload}, // is transformed to // New reference bytes = {new operation, new payload}. // New image bytes are written by three sources: // (1) Direct copy from old image to new image for matched blocks. // (2) Bytewise diff correction. // (3) Dedicated reference target correction. // // For references whose operation and payload bits are stored in easily // separable bytes (e.g., rel32 reference in X86), (2) can exclude payload bits. // So during patch application, (1) naively copies everything, (2) fixes // operation bytes only, and (3) fixes payload bytes only. // // For architectures with references whose operation and payload bits may mix // within shared bytes (e.g., ARM rel32), a dilemma arises: // * (2) cannot ignores shared bytes, since otherwise new operation bits would // not properly transfer. // * Having (2) always overwrite these bytes would reduce the benefits of // reference correction, since references are likely to change. // // Our solution applies a hybrid approach: For each matching old / new reference // pair, define: // Mixed reference bytes = {new operation, old payload}, // // During patch generation, we compute bytewise correction from old reference // bytes to the mixed reference bytes. So during patch application, (2) only // corrects operation bit changes (and skips if they don't change), and (3) // overwrites old payload bits to new payload bits. // Interface for mixed reference byte generation. This base class // serves as a stub. Architectures whose references store operation bits and // payload bits can share common bytes (e.g., ARM rel32) should override this. class ReferenceMixer { … }; // An Equivalence is a block of length |length| that approximately match in // |old_image| at an offset of |src_offset| and in |new_image| at an offset of // |dst_offset|. struct Equivalence { … }; inline bool operator==(const Equivalence& a, const Equivalence& b) { … } // Same as Equivalence, but with a similarity score. This is only used when // generating the patch. struct EquivalenceCandidate { … }; template <size_t N> inline constexpr uint32_t ExeTypeToUint32(const char (&exe_type)[N]) { … } // Enumerations for supported executables. Values in this enum must be distinct. // Once present, values should never be altered or removed to ensure backwards // compatibility and patch type collision avoidance. enum ExecutableType : uint32_t { … }; constexpr ExecutableType CastToExecutableType(uint32_t possible_exe_type) { … } inline std::string CastExecutableTypeToString(ExecutableType exe_type) { … } // A region in an image with associated executable type |exe_type|. If // |exe_type == kExeTypeNoOp|, then the Element represents a region of raw data. struct Element : public BufferRegion { … }; // A matched pair of Elements. struct ElementMatch { … }; } // namespace zucchini #endif // COMPONENTS_ZUCCHINI_IMAGE_UTILS_H_