chromium/components/zucchini/arm_utils.h

// Copyright 2019 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef COMPONENTS_ZUCCHINI_ARM_UTILS_H_
#define COMPONENTS_ZUCCHINI_ARM_UTILS_H_

#include <stddef.h>
#include <stdint.h>

#include "base/check_op.h"
#include "components/zucchini/address_translator.h"
#include "components/zucchini/buffer_view.h"

namespace zucchini {

// References:
// * AArch32 (32-bit ARM, AKA ARM32):
//     https://static.docs.arm.com/ddi0406/c/DDI0406C_C_arm_architecture_reference_manual.pdf
// * AArch64 (64-bit ARM):
//     https://static.docs.arm.com/ddi0487/da/DDI0487D_a_armv8_arm.pdf

// Definitions (used in Zucchini):
// * |instr_rva|: Instruction RVA: The RVA where an instruction is located. In
//   ARM mode and for AArch64 this is 4-byte aligned; in THUMB2 mode this is
//   2-byte aligned.
// * |code|: Instruction code: ARM instruction code as seen in manual. In ARM
//   mode and for AArch64, this is a 32-bit int. In THUMB2 mode, this may be a
//   16-bit or 32-bit int.
// * |disp|: Displacement: For branch instructions (e.g.: B, BL, BLX, and
//   conditional varieties) this is the value encoded in instruction bytes.
// * PC: Program Counter: In ARM mode this is |instr_rva + 8|; in THUMB2 mode
//   this is |instr_rva + 4|; for AArch64 this is |instr_rva|.
// * |target_rva|: Target RVA: The RVA targeted by a branch instruction.
//
// These are related by:
//   |code| = Fetch(image data at offset(|instr_rva|)).
//   |disp| = Decode(|code|).
//   PC = |instr_rva| + {8 in ARM mode, 4 in THUMB2 mode, 0 for AArch64}.
//   |target_rva| = PC + |disp| - (see "BLX complication" below)
//
// Example 1 (ARM mode):
//   00103050: 00 01 02 EA    B     00183458
//   |instr_rva| = 0x00103050  (4-byte aligned).
//   |code| = 0xEA020100  (little endian fetched from data).
//   |disp| = 0x00080400  (decoded from |code| with A24 -> B encoding T1).
//   PC = |instr_rva| + 8 = 0x00103058  (ARM mode).
//   |target_rva| = PC + |disp| = 0x00183458.
//
// Example 2 (THUMB2 mode):
//   001030A2: 00 F0 01 FA    BL    001034A8
//   |instr_rva| = 0x001030A2  (2-byte aligned).
//   |code| = 0xF000FA01  (special THUMB2 mode data fetch).
//   |disp| = 0x00000402  (decoded from |code| with T24 -> BL encoding T1).
//   PC = |instr_rva| + 4 = 0x001030A6  (THUMB2 mode).
//   |target_rva| = PC + |disp| = 0x001034A8.
//
// Example 3 (AArch64):
//   0000000000305070: 03 02 01 14    B     000000000034587C
//   |instr_rva| = 0x00305070  (4-byte aligned, assumed to fit in 32-bit).
//   |code| = 0x14010203  (little endian fetchd from data).
//   |disp| = 0x0004080C  (decoded from |code| with Immd -> B).
//   PC = |instr_rva| = 0x00305070  (AArch64).
//   |target_rva| = PC + |disp| = 0x0034587C.

// BLX complication: BLX transits between ARM mode and THUMB2 mode, and branches
// to an address. Therefore |instr_rva| must align by the "old" mode, and
// |target_rva| must align by the "new" mode. In particular:
// * BLX encoding A2 (ARM -> THUMB2): |instr_rva| is 4-byte aligned with
//   PC = |instr_rva| + 8; |target_rva| is 2-byte aligned, and so |disp| is
//   2-byte aligned.
// * BLX encoding T2 (THUMB2 -> ARM): |instr_rva| is 2-byte aligned with
//   PC = |instr_rva| + 4; |target_rva| is 4-byte aligned. Complication: BLX
//   encoding T2 stores a bit |H| that corresponds to "2" in binary, but |H|
//   must be set to 0. Thus the encoded value is effectively 4-byte aligned. So
//   when computing |target_rva| by adding PC (2-byte aligned) to the stored
//   value (4-byte aligned), the result must be rounded down to the nearest
//   4-byte aligned address.
// The last situation creates ambiguity in how |disp| is defined! Alternatives:
// (1) |disp| := |target_rva| - PC: So |code| <-> |disp| for BLX encoding T2,
//     requires |instr_rva| % 4 to be determined, and adjustments made.
// (2) |disp| := Value stored in |code|: So |disp| <-> |target_rva| for BLX
//     encoding T2 requires adjustment: |disp| -> |target_rva| needs to round
//     down, whereas |target_rva| -> |disp| needs to round up.
// We adopt (2) to simplify |code| <-> |disp|, since that gets used.

arm_disp_t;

// Alignment requirement for |target_rva|, useful for |disp| <-> |target_rva|
// (also requires |instr_rva|). Alignment is determined by parsing |code| in
// *Decode() functions. kArmAlignFail is also defined to indicate parse failure.
// Alignments can be 2 or 4. These values are also used in the enum, so
// |x % align| with |x & (align - 1)| to compute alignment.
enum ArmAlign : uint32_t {};

// Traits for rel32 address types (technically rel64 for AArch64 -- but we
// assume values are small enough), which form collections of strategies to
// process each rel32 address type.
template <typename ENUM_ADDR_TYPE,
          ENUM_ADDR_TYPE ADDR_TYPE,
          typename CODE_T,
          CODE_T (*FETCH)(ConstBufferView, offset_t),
          void (*STORE)(MutableBufferView, offset_t, CODE_T),
          ArmAlign (*DECODE)(CODE_T, arm_disp_t*),
          bool (*ENCODE)(arm_disp_t, CODE_T*),
          bool (*READ)(rva_t, CODE_T, rva_t*),
          bool (*WRITE)(rva_t, rva_t, CODE_T*)>
class ArmAddrTraits {};

// Given THUMB2 instruction |code16|, returns 2 if it's from a 16-bit THUMB2
// instruction, or 4 if it's from a 32-bit THUMB2 instruction.
inline int GetThumb2InstructionSize(uint16_t code16) {}

// A translator for ARM mode and THUMB2 mode with static functions that
// translate among |code|, |disp|, and |target_rva|.
class AArch32Rel32Translator {};

// Translator for AArch64, which is simpler than 32-bit ARM. Although pointers
// are 64-bit, displacements are within 32-bit.
class AArch64Rel32Translator {};

}  // namespace zucchini

#endif  // COMPONENTS_ZUCCHINI_ARM_UTILS_H_