llvm/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp

//===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file is part of the X86 Disassembler.
// It contains code to translate the data produced by the decoder into
//  MCInsts.
//
//
// The X86 disassembler is a table-driven disassembler for the 16-, 32-, and
// 64-bit X86 instruction sets.  The main decode sequence for an assembly
// instruction in this disassembler is:
//
// 1. Read the prefix bytes and determine the attributes of the instruction.
//    These attributes, recorded in enum attributeBits
//    (X86DisassemblerDecoderCommon.h), form a bitmask.  The table CONTEXTS_SYM
//    provides a mapping from bitmasks to contexts, which are represented by
//    enum InstructionContext (ibid.).
//
// 2. Read the opcode, and determine what kind of opcode it is.  The
//    disassembler distinguishes four kinds of opcodes, which are enumerated in
//    OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte
//    (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a
//    (0x0f 0x3a 0xnn).  Mandatory prefixes are treated as part of the context.
//
// 3. Depending on the opcode type, look in one of four ClassDecision structures
//    (X86DisassemblerDecoderCommon.h).  Use the opcode class to determine which
//    OpcodeDecision (ibid.) to look the opcode in.  Look up the opcode, to get
//    a ModRMDecision (ibid.).
//
// 4. Some instructions, such as escape opcodes or extended opcodes, or even
//    instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the
//    ModR/M byte to complete decode.  The ModRMDecision's type is an entry from
//    ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the
//    ModR/M byte is required and how to interpret it.
//
// 5. After resolving the ModRMDecision, the disassembler has a unique ID
//    of type InstrUID (X86DisassemblerDecoderCommon.h).  Looking this ID up in
//    INSTRUCTIONS_SYM yields the name of the instruction and the encodings and
//    meanings of its operands.
//
// 6. For each operand, its encoding is an entry from OperandEncoding
//    (X86DisassemblerDecoderCommon.h) and its type is an entry from
//    OperandType (ibid.).  The encoding indicates how to read it from the
//    instruction; the type indicates how to interpret the value once it has
//    been read.  For example, a register operand could be stored in the R/M
//    field of the ModR/M byte, the REG field of the ModR/M byte, or added to
//    the main opcode.  This is orthogonal from its meaning (an GPR or an XMM
//    register, for instance).  Given this information, the operands can be
//    extracted and interpreted.
//
// 7. As the last step, the disassembler translates the instruction information
//    and operands into a format understandable by the client - in this case, an
//    MCInst for use by the MC infrastructure.
//
// The disassembler is broken broadly into two parts: the table emitter that
// emits the instruction decode tables discussed above during compilation, and
// the disassembler itself.  The table emitter is documented in more detail in
// utils/TableGen/X86DisassemblerEmitter.h.
//
// X86Disassembler.cpp contains the code responsible for step 7, and for
//   invoking the decoder to execute steps 1-6.
// X86DisassemblerDecoderCommon.h contains the definitions needed by both the
//   table emitter and the disassembler.
// X86DisassemblerDecoder.h contains the public interface of the decoder,
//   factored out into C for possible use by other projects.
// X86DisassemblerDecoder.c contains the source code of the decoder, which is
//   responsible for steps 1-6.
//
//===----------------------------------------------------------------------===//

#include "MCTargetDesc/X86BaseInfo.h"
#include "MCTargetDesc/X86MCTargetDesc.h"
#include "TargetInfo/X86TargetInfo.h"
#include "X86DisassemblerDecoder.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"

usingnamespacellvm;
usingnamespacellvm::X86Disassembler;

#define DEBUG_TYPE

#define debug(s)

// Specifies whether a ModR/M byte is needed and (if so) which
// instruction each possible value of the ModR/M byte corresponds to.  Once
// this information is known, we have narrowed down to a single instruction.
struct ModRMDecision {};

// Specifies which set of ModR/M->instruction tables to look at
// given a particular opcode.
struct OpcodeDecision {};

// Specifies which opcode->instruction tables to look at given
// a particular context (set of attributes).  Since there are many possible
// contexts, the decoder first uses CONTEXTS_SYM to determine which context
// applies given a specific set of attributes.  Hence there are only IC_max
// entries in this table, rather than 2^(ATTR_max).
struct ContextDecision {};

#include "X86GenDisassemblerTables.inc"

static InstrUID decode(OpcodeType type, InstructionContext insnContext,
                       uint8_t opcode, uint8_t modRM) {}

static bool peek(struct InternalInstruction *insn, uint8_t &byte) {}

template <typename T> static bool consume(InternalInstruction *insn, T &ptr) {}

static bool isREX(struct InternalInstruction *insn, uint8_t prefix) {}

static bool isREX2(struct InternalInstruction *insn, uint8_t prefix) {}

// Consumes all of an instruction's prefix bytes, and marks the
// instruction as having them.  Also sets the instruction's default operand,
// address, and other relevant data sizes to report operands correctly.
//
// insn must not be empty.
static int readPrefixes(struct InternalInstruction *insn) {}

// Consumes the SIB byte to determine addressing information.
static int readSIB(struct InternalInstruction *insn) {}

static int readDisplacement(struct InternalInstruction *insn) {}

// Consumes all addressing information (ModR/M byte, SIB byte, and displacement.
static int readModRM(struct InternalInstruction *insn) {}

#define GENERIC_FIXUP_FUNC(name, base, prefix)

// Consult an operand type to determine the meaning of the reg or R/M field. If
// the operand is an XMM operand, for example, an operand would be XMM0 instead
// of AX, which readModRM() would otherwise misinterpret it as.
//
// @param insn  - The instruction containing the operand.
// @param type  - The operand type.
// @param index - The existing value of the field as reported by readModRM().
// @param valid - The address of a uint8_t.  The target is set to 1 if the
//                field is valid for the register class; 0 if not.
// @return      - The proper value.
GENERIC_FIXUP_FUNC()
GENERIC_FIXUP_FUNC()

// Consult an operand specifier to determine which of the fixup*Value functions
// to use in correcting readModRM()'ss interpretation.
//
// @param insn  - See fixup*Value().
// @param op    - The operand specifier.
// @return      - 0 if fixup was successful; -1 if the register returned was
//                invalid for its class.
static int fixupReg(struct InternalInstruction *insn,
                    const struct OperandSpecifier *op) {}

// Read the opcode (except the ModR/M byte in the case of extended or escape
// opcodes).
static bool readOpcode(struct InternalInstruction *insn) {}

// Determine whether equiv is the 16-bit equivalent of orig (32-bit or 64-bit).
static bool is16BitEquivalent(const char *orig, const char *equiv) {}

// Determine whether this instruction is a 64-bit instruction.
static bool is64Bit(const char *name) {}

// Determine the ID of an instruction, consuming the ModR/M byte as appropriate
// for extended and escape opcodes, and using a supplied attribute mask.
static int getInstructionIDWithAttrMask(uint16_t *instructionID,
                                        struct InternalInstruction *insn,
                                        uint16_t attrMask) {}

static bool isCCMPOrCTEST(InternalInstruction *insn) {}

static bool isNF(InternalInstruction *insn) {}

// Determine the ID of an instruction, consuming the ModR/M byte as appropriate
// for extended and escape opcodes. Determines the attributes and context for
// the instruction before doing so.
static int getInstructionID(struct InternalInstruction *insn,
                            const MCInstrInfo *mii) {}

// Read an operand from the opcode field of an instruction and interprets it
// appropriately given the operand width. Handles AddRegFrm instructions.
//
// @param insn  - the instruction whose opcode field is to be read.
// @param size  - The width (in bytes) of the register being specified.
//                1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
//                RAX.
// @return      - 0 on success; nonzero otherwise.
static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) {}

// Consume an immediate operand from an instruction, given the desired operand
// size.
//
// @param insn  - The instruction whose operand is to be read.
// @param size  - The width (in bytes) of the operand.
// @return      - 0 if the immediate was successfully consumed; nonzero
//                otherwise.
static int readImmediate(struct InternalInstruction *insn, uint8_t size) {}

// Consume vvvv from an instruction if it has a VEX prefix.
static int readVVVV(struct InternalInstruction *insn) {}

// Read an mask register from the opcode field of an instruction.
//
// @param insn    - The instruction whose opcode field is to be read.
// @return        - 0 on success; nonzero otherwise.
static int readMaskRegister(struct InternalInstruction *insn) {}

// Consults the specifier for an instruction and consumes all
// operands for that instruction, interpreting them as it goes.
static int readOperands(struct InternalInstruction *insn) {}

namespace llvm {

// Fill-ins to make the compiler happy. These constants are never actually
// assigned; they are just filler to make an automatically-generated switch
// statement work.
namespace X86 {
  enum {};
} // namespace X86

} // namespace llvm

static bool translateInstruction(MCInst &target,
                                InternalInstruction &source,
                                const MCDisassembler *Dis);

namespace {

/// Generic disassembler for all X86 platforms. All each platform class should
/// have to do is subclass the constructor, and provide a different
/// disassemblerMode value.
class X86GenericDisassembler : public MCDisassembler {};

} // namespace

X86GenericDisassembler::X86GenericDisassembler(
                                         const MCSubtargetInfo &STI,
                                         MCContext &Ctx,
                                         std::unique_ptr<const MCInstrInfo> MII)
  :{}

MCDisassembler::DecodeStatus X86GenericDisassembler::getInstruction(
    MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address,
    raw_ostream &CStream) const {}

//
// Private code that translates from struct InternalInstructions to MCInsts.
//

/// translateRegister - Translates an internal register to the appropriate LLVM
///   register, and appends it as an operand to an MCInst.
///
/// @param mcInst     - The MCInst to append to.
/// @param reg        - The Reg to append.
static void translateRegister(MCInst &mcInst, Reg reg) {}

static const uint8_t segmentRegnums[SEG_OVERRIDE_max] =;

/// translateSrcIndex   - Appends a source index operand to an MCInst.
///
/// @param mcInst       - The MCInst to append to.
/// @param insn         - The internal instruction.
static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn) {}

/// translateDstIndex   - Appends a destination index operand to an MCInst.
///
/// @param mcInst       - The MCInst to append to.
/// @param insn         - The internal instruction.

static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn) {}

/// translateImmediate  - Appends an immediate operand to an MCInst.
///
/// @param mcInst       - The MCInst to append to.
/// @param immediate    - The immediate value to append.
/// @param operand      - The operand, as stored in the descriptor table.
/// @param insn         - The internal instruction.
static void translateImmediate(MCInst &mcInst, uint64_t immediate,
                               const OperandSpecifier &operand,
                               InternalInstruction &insn,
                               const MCDisassembler *Dis) {}

/// translateRMRegister - Translates a register stored in the R/M field of the
///   ModR/M byte to its LLVM equivalent and appends it to an MCInst.
/// @param mcInst       - The MCInst to append to.
/// @param insn         - The internal instruction to extract the R/M field
///                       from.
/// @return             - 0 on success; -1 otherwise
static bool translateRMRegister(MCInst &mcInst,
                                InternalInstruction &insn) {}

/// translateRMMemory - Translates a memory operand stored in the Mod and R/M
///   fields of an internal instruction (and possibly its SIB byte) to a memory
///   operand in LLVM's format, and appends it to an MCInst.
///
/// @param mcInst       - The MCInst to append to.
/// @param insn         - The instruction to extract Mod, R/M, and SIB fields
///                       from.
/// @param ForceSIB     - The instruction must use SIB.
/// @return             - 0 on success; nonzero otherwise
static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
                              const MCDisassembler *Dis,
                              bool ForceSIB = false) {}

/// translateRM - Translates an operand stored in the R/M (and possibly SIB)
///   byte of an instruction to LLVM form, and appends it to an MCInst.
///
/// @param mcInst       - The MCInst to append to.
/// @param operand      - The operand, as stored in the descriptor table.
/// @param insn         - The instruction to extract Mod, R/M, and SIB fields
///                       from.
/// @return             - 0 on success; nonzero otherwise
static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
                        InternalInstruction &insn, const MCDisassembler *Dis) {}

/// translateFPRegister - Translates a stack position on the FPU stack to its
///   LLVM form, and appends it to an MCInst.
///
/// @param mcInst       - The MCInst to append to.
/// @param stackPos     - The stack position to translate.
static void translateFPRegister(MCInst &mcInst,
                                uint8_t stackPos) {}

/// translateMaskRegister - Translates a 3-bit mask register number to
///   LLVM form, and appends it to an MCInst.
///
/// @param mcInst       - The MCInst to append to.
/// @param maskRegNum   - Number of mask register from 0 to 7.
/// @return             - false on success; true otherwise.
static bool translateMaskRegister(MCInst &mcInst,
                                uint8_t maskRegNum) {}

/// translateOperand - Translates an operand stored in an internal instruction
///   to LLVM's format and appends it to an MCInst.
///
/// @param mcInst       - The MCInst to append to.
/// @param operand      - The operand, as stored in the descriptor table.
/// @param insn         - The internal instruction.
/// @return             - false on success; true otherwise.
static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
                             InternalInstruction &insn,
                             const MCDisassembler *Dis) {}

/// translateInstruction - Translates an internal instruction and all its
///   operands to an MCInst.
///
/// @param mcInst       - The MCInst to populate with the instruction's data.
/// @param insn         - The internal instruction.
/// @return             - false on success; true otherwise.
static bool translateInstruction(MCInst &mcInst,
                                InternalInstruction &insn,
                                const MCDisassembler *Dis) {}

static MCDisassembler *createX86Disassembler(const Target &T,
                                             const MCSubtargetInfo &STI,
                                             MCContext &Ctx) {}

extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Disassembler() {}