image_utils.h | Explore in Territory

// Copyright 2017 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifdef UNSAFE_BUFFERS_BUILD
// TODO(crbug.com/40285824): Remove this and convert code to safer constructs.
#pragma allow_unsafe_buffers
#endif

#ifndef COMPONENTS_ZUCCHINI_IMAGE_UTILS_H_
#define COMPONENTS_ZUCCHINI_IMAGE_UTILS_H_

#include <stddef.h>
#include <stdint.h>

#include <optional>
#include <string>

#include "base/format_macros.h"
#include "base/numerics/safe_conversions.h"
#include "base/strings/stringprintf.h"
#include "components/zucchini/buffer_view.h"
#include "components/zucchini/typed_value.h"

namespace zucchini {

// offset_t is used to describe an offset in an image.
// Files bigger than 4GB are not supported.
offset_t;
// Divide by 2 since label marking uses the most significant bit.
constexpr offset_t kOffsetBound = …;
// Use 0xFFFFFFF*E*, since 0xFFFFFFF*F* is a sentinel value for Dex references.
constexpr offset_t kInvalidOffset = …;

// key_t is used to identify an offset in a table.
key_t;

enum Bitness : uint8_t { … };

inline uint32_t WidthOf(Bitness bitness) { … }

// Used to uniquely identify a reference type.
// Strongly typed objects are used to avoid ambiguitees with PoolTag.
struct TypeTag : public TypedValue<TypeTag, uint8_t> { … };

// Used to uniquely identify a pool.
struct PoolTag : public TypedValue<PoolTag, uint8_t> { … };

constexpr TypeTag kNoTypeTag(0xFF);  // Typically used to identify raw data.
constexpr PoolTag kNoPoolTag(0xFF);

// Specification of references in an image file.
struct ReferenceTypeTraits { … };

// There is no need to store |type| because references of the same type are
// always aggregated into the same container, and so during iteration we'd have
// |type| already.
struct Reference { … };

inline bool operator==(const Reference& a, const Reference& b) { … }

// Interface for extracting References through member function GetNext().
// This is used by Disassemblers to extract references from an image file.
// Typically, a Reader lazily extracts values and does not hold any storage.
class ReferenceReader { … };

// Interface for writing References through member function
// PutNext(reference). This is used by Disassemblers to write new References
// in the image file.
class ReferenceWriter { … };

// References encoding may be quite complex in some architectures (e.g., ARM),
// requiring bit-level manipulation. In general, bits in a reference body fall
// under 2 categories:
// * Operation bits: Instruction op code, conditionals, or structural data.
// * Payload bits: Actual target data of the reference. These may be absolute,
//   or be displacements relative to instruction pointer / program counter.
// During patch application,
//   Old reference bytes = {old operation, old payload},
// is transformed to
//   New reference bytes = {new operation, new payload}.
// New image bytes are written by three sources:
//   (1) Direct copy from old image to new image for matched blocks.
//   (2) Bytewise diff correction.
//   (3) Dedicated reference target correction.
//
// For references whose operation and payload bits are stored in easily
// separable bytes (e.g., rel32 reference in X86), (2) can exclude payload bits.
// So during patch application, (1) naively copies everything, (2) fixes
// operation bytes only, and (3) fixes payload bytes only.
//
// For architectures with references whose operation and payload bits may mix
// within shared bytes (e.g., ARM rel32), a dilemma arises:
// * (2) cannot ignores shared bytes, since otherwise new operation bits would
//   not properly transfer.
// * Having (2) always overwrite these bytes would reduce the benefits of
//   reference correction, since references are likely to change.
//
// Our solution applies a hybrid approach: For each matching old / new reference
// pair, define:
//   Mixed reference bytes = {new operation, old payload},
//
// During patch generation, we compute bytewise correction from old reference
// bytes to the mixed reference bytes. So during patch application, (2) only
// corrects operation bit changes (and skips if they don't change), and (3)
// overwrites old payload bits to new payload bits.

// Interface for mixed reference byte generation. This base class
// serves as a stub. Architectures whose references store operation bits and
// payload bits can share common bytes (e.g., ARM rel32) should override this.
class ReferenceMixer { … };

// An Equivalence is a block of length |length| that approximately match in
// |old_image| at an offset of |src_offset| and in |new_image| at an offset of
// |dst_offset|.
struct Equivalence { … };

inline bool operator==(const Equivalence& a, const Equivalence& b) { … }

// Same as Equivalence, but with a similarity score. This is only used when
// generating the patch.
struct EquivalenceCandidate { … };

template <size_t N>
inline constexpr uint32_t ExeTypeToUint32(const char (&exe_type)[N]) { … }

// Enumerations for supported executables. Values in this enum must be distinct.
// Once present, values should never be altered or removed to ensure backwards
// compatibility and patch type collision avoidance.
enum ExecutableType : uint32_t { … };

constexpr ExecutableType CastToExecutableType(uint32_t possible_exe_type) { … }

inline std::string CastExecutableTypeToString(ExecutableType exe_type) { … }

// A region in an image with associated executable type |exe_type|. If
// |exe_type == kExeTypeNoOp|, then the Element represents a region of raw data.
struct Element : public BufferRegion { … };

// A matched pair of Elements.
struct ElementMatch { … };

}  // namespace zucchini

#endif  // COMPONENTS_ZUCCHINI_IMAGE_UTILS_H_
chromium/components/zucchini/image_utils.h