//===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file is part of the X86 Disassembler. // It contains code to translate the data produced by the decoder into // MCInsts. // // // The X86 disassembler is a table-driven disassembler for the 16-, 32-, and // 64-bit X86 instruction sets. The main decode sequence for an assembly // instruction in this disassembler is: // // 1. Read the prefix bytes and determine the attributes of the instruction. // These attributes, recorded in enum attributeBits // (X86DisassemblerDecoderCommon.h), form a bitmask. The table CONTEXTS_SYM // provides a mapping from bitmasks to contexts, which are represented by // enum InstructionContext (ibid.). // // 2. Read the opcode, and determine what kind of opcode it is. The // disassembler distinguishes four kinds of opcodes, which are enumerated in // OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte // (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a // (0x0f 0x3a 0xnn). Mandatory prefixes are treated as part of the context. // // 3. Depending on the opcode type, look in one of four ClassDecision structures // (X86DisassemblerDecoderCommon.h). Use the opcode class to determine which // OpcodeDecision (ibid.) to look the opcode in. Look up the opcode, to get // a ModRMDecision (ibid.). // // 4. Some instructions, such as escape opcodes or extended opcodes, or even // instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the // ModR/M byte to complete decode. The ModRMDecision's type is an entry from // ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the // ModR/M byte is required and how to interpret it. // // 5. After resolving the ModRMDecision, the disassembler has a unique ID // of type InstrUID (X86DisassemblerDecoderCommon.h). Looking this ID up in // INSTRUCTIONS_SYM yields the name of the instruction and the encodings and // meanings of its operands. // // 6. For each operand, its encoding is an entry from OperandEncoding // (X86DisassemblerDecoderCommon.h) and its type is an entry from // OperandType (ibid.). The encoding indicates how to read it from the // instruction; the type indicates how to interpret the value once it has // been read. For example, a register operand could be stored in the R/M // field of the ModR/M byte, the REG field of the ModR/M byte, or added to // the main opcode. This is orthogonal from its meaning (an GPR or an XMM // register, for instance). Given this information, the operands can be // extracted and interpreted. // // 7. As the last step, the disassembler translates the instruction information // and operands into a format understandable by the client - in this case, an // MCInst for use by the MC infrastructure. // // The disassembler is broken broadly into two parts: the table emitter that // emits the instruction decode tables discussed above during compilation, and // the disassembler itself. The table emitter is documented in more detail in // utils/TableGen/X86DisassemblerEmitter.h. // // X86Disassembler.cpp contains the code responsible for step 7, and for // invoking the decoder to execute steps 1-6. // X86DisassemblerDecoderCommon.h contains the definitions needed by both the // table emitter and the disassembler. // X86DisassemblerDecoder.h contains the public interface of the decoder, // factored out into C for possible use by other projects. // X86DisassemblerDecoder.c contains the source code of the decoder, which is // responsible for steps 1-6. // //===----------------------------------------------------------------------===// #include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86MCTargetDesc.h" #include "TargetInfo/X86TargetInfo.h" #include "X86DisassemblerDecoder.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" usingnamespacellvm; usingnamespacellvm::X86Disassembler; #define DEBUG_TYPE … #define debug(s) … // Specifies whether a ModR/M byte is needed and (if so) which // instruction each possible value of the ModR/M byte corresponds to. Once // this information is known, we have narrowed down to a single instruction. struct ModRMDecision { … }; // Specifies which set of ModR/M->instruction tables to look at // given a particular opcode. struct OpcodeDecision { … }; // Specifies which opcode->instruction tables to look at given // a particular context (set of attributes). Since there are many possible // contexts, the decoder first uses CONTEXTS_SYM to determine which context // applies given a specific set of attributes. Hence there are only IC_max // entries in this table, rather than 2^(ATTR_max). struct ContextDecision { … }; #include "X86GenDisassemblerTables.inc" static InstrUID decode(OpcodeType type, InstructionContext insnContext, uint8_t opcode, uint8_t modRM) { … } static bool peek(struct InternalInstruction *insn, uint8_t &byte) { … } template <typename T> static bool consume(InternalInstruction *insn, T &ptr) { … } static bool isREX(struct InternalInstruction *insn, uint8_t prefix) { … } static bool isREX2(struct InternalInstruction *insn, uint8_t prefix) { … } // Consumes all of an instruction's prefix bytes, and marks the // instruction as having them. Also sets the instruction's default operand, // address, and other relevant data sizes to report operands correctly. // // insn must not be empty. static int readPrefixes(struct InternalInstruction *insn) { … } // Consumes the SIB byte to determine addressing information. static int readSIB(struct InternalInstruction *insn) { … } static int readDisplacement(struct InternalInstruction *insn) { … } // Consumes all addressing information (ModR/M byte, SIB byte, and displacement. static int readModRM(struct InternalInstruction *insn) { … } #define GENERIC_FIXUP_FUNC(name, base, prefix) … // Consult an operand type to determine the meaning of the reg or R/M field. If // the operand is an XMM operand, for example, an operand would be XMM0 instead // of AX, which readModRM() would otherwise misinterpret it as. // // @param insn - The instruction containing the operand. // @param type - The operand type. // @param index - The existing value of the field as reported by readModRM(). // @param valid - The address of a uint8_t. The target is set to 1 if the // field is valid for the register class; 0 if not. // @return - The proper value. GENERIC_FIXUP_FUNC(…) GENERIC_FIXUP_FUNC(…) // Consult an operand specifier to determine which of the fixup*Value functions // to use in correcting readModRM()'ss interpretation. // // @param insn - See fixup*Value(). // @param op - The operand specifier. // @return - 0 if fixup was successful; -1 if the register returned was // invalid for its class. static int fixupReg(struct InternalInstruction *insn, const struct OperandSpecifier *op) { … } // Read the opcode (except the ModR/M byte in the case of extended or escape // opcodes). static bool readOpcode(struct InternalInstruction *insn) { … } // Determine whether equiv is the 16-bit equivalent of orig (32-bit or 64-bit). static bool is16BitEquivalent(const char *orig, const char *equiv) { … } // Determine whether this instruction is a 64-bit instruction. static bool is64Bit(const char *name) { … } // Determine the ID of an instruction, consuming the ModR/M byte as appropriate // for extended and escape opcodes, and using a supplied attribute mask. static int getInstructionIDWithAttrMask(uint16_t *instructionID, struct InternalInstruction *insn, uint16_t attrMask) { … } static bool isCCMPOrCTEST(InternalInstruction *insn) { … } static bool isNF(InternalInstruction *insn) { … } // Determine the ID of an instruction, consuming the ModR/M byte as appropriate // for extended and escape opcodes. Determines the attributes and context for // the instruction before doing so. static int getInstructionID(struct InternalInstruction *insn, const MCInstrInfo *mii) { … } // Read an operand from the opcode field of an instruction and interprets it // appropriately given the operand width. Handles AddRegFrm instructions. // // @param insn - the instruction whose opcode field is to be read. // @param size - The width (in bytes) of the register being specified. // 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means // RAX. // @return - 0 on success; nonzero otherwise. static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) { … } // Consume an immediate operand from an instruction, given the desired operand // size. // // @param insn - The instruction whose operand is to be read. // @param size - The width (in bytes) of the operand. // @return - 0 if the immediate was successfully consumed; nonzero // otherwise. static int readImmediate(struct InternalInstruction *insn, uint8_t size) { … } // Consume vvvv from an instruction if it has a VEX prefix. static int readVVVV(struct InternalInstruction *insn) { … } // Read an mask register from the opcode field of an instruction. // // @param insn - The instruction whose opcode field is to be read. // @return - 0 on success; nonzero otherwise. static int readMaskRegister(struct InternalInstruction *insn) { … } // Consults the specifier for an instruction and consumes all // operands for that instruction, interpreting them as it goes. static int readOperands(struct InternalInstruction *insn) { … } namespace llvm { // Fill-ins to make the compiler happy. These constants are never actually // assigned; they are just filler to make an automatically-generated switch // statement work. namespace X86 { enum { … }; } // namespace X86 } // namespace llvm static bool translateInstruction(MCInst &target, InternalInstruction &source, const MCDisassembler *Dis); namespace { /// Generic disassembler for all X86 platforms. All each platform class should /// have to do is subclass the constructor, and provide a different /// disassemblerMode value. class X86GenericDisassembler : public MCDisassembler { … }; } // namespace X86GenericDisassembler::X86GenericDisassembler( const MCSubtargetInfo &STI, MCContext &Ctx, std::unique_ptr<const MCInstrInfo> MII) : … { … } MCDisassembler::DecodeStatus X86GenericDisassembler::getInstruction( MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, raw_ostream &CStream) const { … } // // Private code that translates from struct InternalInstructions to MCInsts. // /// translateRegister - Translates an internal register to the appropriate LLVM /// register, and appends it as an operand to an MCInst. /// /// @param mcInst - The MCInst to append to. /// @param reg - The Reg to append. static void translateRegister(MCInst &mcInst, Reg reg) { … } static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = …; /// translateSrcIndex - Appends a source index operand to an MCInst. /// /// @param mcInst - The MCInst to append to. /// @param insn - The internal instruction. static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn) { … } /// translateDstIndex - Appends a destination index operand to an MCInst. /// /// @param mcInst - The MCInst to append to. /// @param insn - The internal instruction. static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn) { … } /// translateImmediate - Appends an immediate operand to an MCInst. /// /// @param mcInst - The MCInst to append to. /// @param immediate - The immediate value to append. /// @param operand - The operand, as stored in the descriptor table. /// @param insn - The internal instruction. static void translateImmediate(MCInst &mcInst, uint64_t immediate, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis) { … } /// translateRMRegister - Translates a register stored in the R/M field of the /// ModR/M byte to its LLVM equivalent and appends it to an MCInst. /// @param mcInst - The MCInst to append to. /// @param insn - The internal instruction to extract the R/M field /// from. /// @return - 0 on success; -1 otherwise static bool translateRMRegister(MCInst &mcInst, InternalInstruction &insn) { … } /// translateRMMemory - Translates a memory operand stored in the Mod and R/M /// fields of an internal instruction (and possibly its SIB byte) to a memory /// operand in LLVM's format, and appends it to an MCInst. /// /// @param mcInst - The MCInst to append to. /// @param insn - The instruction to extract Mod, R/M, and SIB fields /// from. /// @param ForceSIB - The instruction must use SIB. /// @return - 0 on success; nonzero otherwise static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, const MCDisassembler *Dis, bool ForceSIB = false) { … } /// translateRM - Translates an operand stored in the R/M (and possibly SIB) /// byte of an instruction to LLVM form, and appends it to an MCInst. /// /// @param mcInst - The MCInst to append to. /// @param operand - The operand, as stored in the descriptor table. /// @param insn - The instruction to extract Mod, R/M, and SIB fields /// from. /// @return - 0 on success; nonzero otherwise static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis) { … } /// translateFPRegister - Translates a stack position on the FPU stack to its /// LLVM form, and appends it to an MCInst. /// /// @param mcInst - The MCInst to append to. /// @param stackPos - The stack position to translate. static void translateFPRegister(MCInst &mcInst, uint8_t stackPos) { … } /// translateMaskRegister - Translates a 3-bit mask register number to /// LLVM form, and appends it to an MCInst. /// /// @param mcInst - The MCInst to append to. /// @param maskRegNum - Number of mask register from 0 to 7. /// @return - false on success; true otherwise. static bool translateMaskRegister(MCInst &mcInst, uint8_t maskRegNum) { … } /// translateOperand - Translates an operand stored in an internal instruction /// to LLVM's format and appends it to an MCInst. /// /// @param mcInst - The MCInst to append to. /// @param operand - The operand, as stored in the descriptor table. /// @param insn - The internal instruction. /// @return - false on success; true otherwise. static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis) { … } /// translateInstruction - Translates an internal instruction and all its /// operands to an MCInst. /// /// @param mcInst - The MCInst to populate with the instruction's data. /// @param insn - The internal instruction. /// @return - false on success; true otherwise. static bool translateInstruction(MCInst &mcInst, InternalInstruction &insn, const MCDisassembler *Dis) { … } static MCDisassembler *createX86Disassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx) { … } extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Disassembler() { … }