assemble.c | Explore in Territory

/* ----------------------------------------------------------------------- *
 *
 *   Copyright 1996-2020 The NASM Authors - All Rights Reserved
 *   See the file AUTHORS included with the NASM distribution for
 *   the specific copyright holders.
 *
 *   Redistribution and use in source and binary forms, with or without
 *   modification, are permitted provided that the following
 *   conditions are met:
 *
 *   * Redistributions of source code must retain the above copyright
 *     notice, this list of conditions and the following disclaimer.
 *   * Redistributions in binary form must reproduce the above
 *     copyright notice, this list of conditions and the following
 *     disclaimer in the documentation and/or other materials provided
 *     with the distribution.
 *
 *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
 *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
 *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * ----------------------------------------------------------------------- */

/*
 * assemble.c   code generation for the Netwide Assembler
 *
 * Bytecode specification
 * ----------------------
 *
 *
 * Codes            Mnemonic        Explanation
 *
 * \0                                       terminates the code. (Unless it's a literal of course.)
 * \1..\4                                   that many literal bytes follow in the code stream
 * \5                                       add 4 to the primary operand number (b, low octdigit)
 * \6                                       add 4 to the secondary operand number (a, middle octdigit)
 * \7                                       add 4 to both the primary and the secondary operand number
 * \10..\13                                 a literal byte follows in the code stream, to be added
 *                                          to the register value of operand 0..3
 * \14..\17                                 the position of index register operand in MIB (BND insns)
 * \20..\23         ib                      a byte immediate operand, from operand 0..3
 * \24..\27         ib,u                    a zero-extended byte immediate operand, from operand 0..3
 * \30..\33         iw                      a word immediate operand, from operand 0..3
 * \34..\37         iwd                     select between \3[0-3] and \4[0-3] depending on 16/32 bit
 *                                          assembly mode or the operand-size override on the operand
 * \40..\43         id                      a long immediate operand, from operand 0..3
 * \44..\47         iwdq                    select between \3[0-3], \4[0-3] and \5[4-7]
 *                                          depending on the address size of the instruction.
 * \50..\53         rel8                    a byte relative operand, from operand 0..3
 * \54..\57         iq                      a qword immediate operand, from operand 0..3
 * \60..\63         rel16                   a word relative operand, from operand 0..3
 * \64..\67         rel                     select between \6[0-3] and \7[0-3] depending on 16/32 bit
 *                                          assembly mode or the operand-size override on the operand
 * \70..\73         rel32                   a long relative operand, from operand 0..3
 * \74..\77         seg                     a word constant, from the _segment_ part of operand 0..3
 * \1ab                                     a ModRM, calculated on EA in operand a, with the spare
 *                                          field the register value of operand b.
 * \172\ab                                  the register number from operand a in bits 7..4, with
 *                                          the 4-bit immediate from operand b in bits 3..0.
 * \173\xab                                 the register number from operand a in bits 7..4, with
 *                                          the value b in bits 3..0.
 * \174..\177                               the register number from operand 0..3 in bits 7..4, and
 *                                          an arbitrary value in bits 3..0 (assembled as zero.)
 * \2ab                                     a ModRM, calculated on EA in operand a, with the spare
 *                                          field equal to digit b.
 *
 * \240..\243                               this instruction uses EVEX rather than REX or VEX/XOP, with the
 *                                          V field taken from operand 0..3.
 * \250                                     this instruction uses EVEX rather than REX or VEX/XOP, with the
 *                                          V field set to 1111b.
 *
 * EVEX prefixes are followed by the sequence:
 * \cm\wlp\tup    where cm is:
 *                  cc 00m mmm
 *                  c = 2 for EVEX and mmmm is the M field (EVEX.P0[3:0])
 *                and wlp is:
 *                  00 wwl lpp
 *                  [l0]  ll = 0 (.128, .lz)
 *                  [l1]  ll = 1 (.256)
 *                  [l2]  ll = 2 (.512)
 *                  [lig] ll = 3 for EVEX.L'L don't care (always assembled as 0)
 *
 *                  [w0]  ww = 0 for W = 0
 *                  [w1]  ww = 1 for W = 1
 *                  [wig] ww = 2 for W don't care (always assembled as 0)
 *                  [ww]  ww = 3 for W used as REX.W
 *
 *                  [p0]  pp = 0 for no prefix
 *                  [60]  pp = 1 for legacy prefix 60
 *                  [f3]  pp = 2
 *                  [f2]  pp = 3
 *
 *                tup is tuple type for Disp8*N from %tuple_codes in insns.pl
 *                    (compressed displacement encoding)
 *
 * \254..\257       id,s                        a signed 32-bit operand to be extended to 64 bits.
 * \260..\263                                   this instruction uses VEX/XOP rather than REX, with the
 *                                              V field taken from operand 0..3.
 * \270                                         this instruction uses VEX/XOP rather than REX, with the
 *                                              V field set to 1111b.
 *
 * VEX/XOP prefixes are followed by the sequence:
 * \tmm\wlp        where mm is the M field; and wlp is:
 *                 00 wwl lpp
 *                 [l0]  ll = 0 for L = 0 (.128, .lz)
 *                 [l1]  ll = 1 for L = 1 (.256)
 *                 [lig] ll = 2 for L don't care (always assembled as 0)
 *
 *                 [w0]  ww = 0 for W = 0
 *                 [w1 ] ww = 1 for W = 1
 *                 [wig] ww = 2 for W don't care (always assembled as 0)
 *                 [ww]  ww = 3 for W used as REX.W
 *
 * t = 0 for VEX (C4/C5), t = 1 for XOP (8F).
 *
 * \271             hlexr                       instruction takes XRELEASE (F3) with or without lock
 * \272             hlenl                       instruction takes XACQUIRE/XRELEASE with or without lock
 * \273             hle                         instruction takes XACQUIRE/XRELEASE with lock only
 * \274..\277       ib,s                        a byte immediate operand, from operand 0..3, sign-extended
 *                                              to the operand size (if o16/o32/o64 present) or the bit size
 * \310             a16                         indicates fixed 16-bit address size, i.e. optional 0x67.
 * \311             a32                         indicates fixed 32-bit address size, i.e. optional 0x67.
 * \312             adf                         (disassembler only) invalid with non-default address size.
 * \313             a64                         indicates fixed 64-bit address size, 0x67 invalid.
 * \314             norexb                      (disassembler only) invalid with REX.B
 * \315             norexx                      (disassembler only) invalid with REX.X
 * \316             norexr                      (disassembler only) invalid with REX.R
 * \317             norexw                      (disassembler only) invalid with REX.W
 * \320             o16                         indicates fixed 16-bit operand size, i.e. optional 0x66.
 * \321             o32                         indicates fixed 32-bit operand size, i.e. optional 0x66.
 * \322             odf                         indicates that this instruction is only valid when the
 *                                              operand size is the default (instruction to disassembler,
 *                                              generates no code in the assembler)
 * \323             o64nw                       indicates fixed 64-bit operand size, REX on extensions only.
 * \324             o64                         indicates 64-bit operand size requiring REX prefix.
 * \325             nohi                        instruction which always uses spl/bpl/sil/dil
 * \326             nof3                        instruction not valid with 0xF3 REP prefix.  Hint for
                                                disassembler only; for SSE instructions.
 * \330                                         a literal byte follows in the code stream, to be added
 *                                              to the condition code value of the instruction.
 * \331             norep                       instruction not valid with REP prefix.  Hint for
 *                                              disassembler only; for SSE instructions.
 * \332             f2i                         REP prefix (0xF2 byte) used as opcode extension.
 * \333             f3i                         REP prefix (0xF3 byte) used as opcode extension.
 * \334             rex.l                       LOCK prefix used as REX.R (used in non-64-bit mode)
 * \335             repe                        disassemble a rep (0xF3 byte) prefix as repe not rep.
 * \336             mustrep                     force a REP(E) prefix (0xF3) even if not specified.
 * \337             mustrepne                   force a REPNE prefix (0xF2) even if not specified.
 *                                              \336-\337 are still listed as prefixes in the disassembler.
 * \340             resb                        reserve <operand 0> bytes of uninitialized storage.
 *                                              Operand 0 had better be a segmentless constant.
 * \341             wait                        this instruction needs a WAIT "prefix"
 * \360             np                          no SSE prefix (== \364\331)
 * \361                                         66 SSE prefix (== \366\331)
 * \364             !osp                        operand-size prefix (0x66) not permitted
 * \365             !asp                        address-size prefix (0x67) not permitted
 * \366                                         operand-size prefix (0x66) used as opcode extension
 * \367                                         address-size prefix (0x67) used as opcode extension
 * \370,\371        jcc8                        match only if operand 0 meets byte jump criteria.
 *                  jmp8                        370 is used for Jcc, 371 is used for JMP.
 * \373             jlen                        assemble 0x03 if bits==16, 0x05 if bits==32;
 *                                              used for conditional jump over longer jump
 * \374             vsibx|vm32x|vm64x           this instruction takes an XMM VSIB memory EA
 * \375             vsiby|vm32y|vm64y           this instruction takes an YMM VSIB memory EA
 * \376             vsibz|vm32z|vm64z           this instruction takes an ZMM VSIB memory EA
 */

#include "compiler.h"


#include "nasm.h"
#include "nasmlib.h"
#include "error.h"
#include "assemble.h"
#include "insns.h"
#include "tables.h"
#include "disp8.h"
#include "listing.h"

enum match_result { … };

ea;

#define GEN_SIB(scale, index, base) …

#define GEN_MODRM(mod, reg, rm) …

static int64_t calcsize(int32_t, int64_t, int, insn *,
                        const struct itemplate *);
static int emit_prefix(struct out_data *data, const int bits, insn *ins);
static void gencode(struct out_data *data, insn *ins);
static enum match_result find_match(const struct itemplate **tempp,
                                    insn *instruction,
                                    int32_t segment, int64_t offset, int bits);
static enum match_result matches(const struct itemplate *, insn *, int bits);
static opflags_t regflag(const operand *);
static int32_t regval(const operand *);
static int rexflags(int, opflags_t, int);
static int op_rexflags(const operand *, int);
static int op_evexflags(const operand *, int, uint8_t);
static void add_asp(insn *, int);

static enum ea_type process_ea(operand *, ea *, int, int,
                               opflags_t, insn *, const char **);

static inline bool absolute_op(const struct operand *o)
{ … }

static int has_prefix(insn * ins, enum prefix_pos pos, int prefix)
{ … }

static void assert_no_prefix(insn * ins, enum prefix_pos pos)
{ … }

static const char *size_name(int size)
{ … }

static void warn_overflow(int size)
{ … }

static void warn_overflow_const(int64_t data, int size)
{ … }

static void warn_overflow_out(int64_t data, int size, enum out_sign sign)
{ … }

/*
 * This routine wrappers the real output format's output routine,
 * in order to pass a copy of the data off to the listing file
 * generator at the same time, flatten unnecessary relocations,
 * and verify backend compatibility.
 */
/*
 * This warning is currently issued by backends, but in the future
 * this code should be centralized.
 *
 *!zeroing [on] RESx in initialized section becomes zero
 *!  a \c{RESx} directive was used in a section which contains
 *!  initialized data, and the output format does not support
 *!  this. Instead, this will be replaced with explicit zero
 *!  content, which may produce a large output file.
 */
static void out(struct out_data *data)
{ … }

static inline void out_rawdata(struct out_data *data, const void *rawdata,
                               size_t size)
{ … }

static void out_rawbyte(struct out_data *data, uint8_t byte)
{ … }

static inline void out_reserve(struct out_data *data, uint64_t size)
{ … }

static void out_segment(struct out_data *data, const struct operand *opx)
{ … }

static void out_imm(struct out_data *data, const struct operand *opx,
                    int size, enum out_sign sign)
{ … }

static void out_reladdr(struct out_data *data, const struct operand *opx,
                        int size)
{ … }

static bool jmp_match(int32_t segment, int64_t offset, int bits,
                      insn * ins, const struct itemplate *temp)
{ … }

static inline int64_t merge_resb(insn *ins, int64_t isize)
{ … }

/* This must be handle non-power-of-2 alignment values */
static inline size_t pad_bytes(size_t len, size_t align)
{ … }

static void out_eops(struct out_data *data, const extop *e)
{ … }

/* This is totally just a wild guess what is reasonable... */
#define INCBIN_MAX_BUF …

int64_t assemble(int32_t segment, int64_t start, int bits, insn *instruction)
{ … }

static int32_t eops_typeinfo(const extop *e)
{ … }

static inline void debug_set_db_type(insn *instruction)
{ … }

static void debug_set_type(insn *instruction)
{ … }


/* Proecess an EQU directive */
static void define_equ(insn * instruction)
{ … }

static int64_t len_extops(const extop *e)
{ … }

int64_t insn_size(int32_t segment, int64_t offset, int bits, insn *instruction)
{ … }

static void bad_hle_warn(const insn * ins, uint8_t hleok)
{ … }

/* Common construct */
#define case3(x) …
#define case4(x) …

static int64_t calcsize(int32_t segment, int64_t offset, int bits,
                        insn * ins, const struct itemplate *temp)
{ … }

static inline void emit_rex(struct out_data *data, insn *ins)
{ … }

static int emit_prefix(struct out_data *data, const int bits, insn *ins)
{ … }

static void gencode(struct out_data *data, insn *ins)
{ … }

static opflags_t regflag(const operand * o)
{ … }

static int32_t regval(const operand * o)
{ … }

static int op_rexflags(const operand * o, int mask)
{ … }

static int rexflags(int val, opflags_t flags, int mask)
{ … }

static int evexflags(int val, decoflags_t deco,
                     int mask, uint8_t byte)
{ … }

static int op_evexflags(const operand * o, int mask, uint8_t byte)
{ … }

static enum match_result find_match(const struct itemplate **tempp,
                                    insn *instruction,
                                    int32_t segment, int64_t offset, int bits)
{ … }

static uint8_t get_broadcast_num(opflags_t opflags, opflags_t brsize)
{ … }

static enum match_result matches(const struct itemplate *itemp,
                                 insn *instruction, int bits)
{ … }

/*
 * Check if ModR/M.mod should/can be 01.
 * - EAF_BYTEOFFS is set
 * - offset can fit in a byte when EVEX is not used
 * - offset can be compressed when EVEX is used
 */
#define IS_MOD_01() …

static enum ea_type process_ea(operand *input, ea *output, int bits,
                               int rfield, opflags_t rflags, insn *ins,
                               const char **errmsg)
{ … }

static void add_asp(insn *ins, int addrbits)
{ … }
chromium/third_party/nasm/asm/assemble.c