mirror of
https://github.com/php/php-src.git
synced 2026-03-29 11:42:17 +02:00
Indirect Branch Tracking (IBT) is part of Intel's Control-Flow Enforcement Technology (CET). IBT is hardware based, forward edge Control-Flow-Integrity mechanism where any indirect CALL/JMP must target an ENDBR instruction or suffer #CP. This commit adds IBT support for JIT: 1. Add endbr32/64 instruction in Dynasm. 2. Insert endbr32/64 in indirect branch target for jitted code. gcc support CET since v8.1 and set it to default since gcc 11. With this commit, endbr is inserted in jitted code if PHP is compiled with "gcc -fcf-protection=full/branch". Signed-off-by: Chen, Hu <hu1.chen@intel.com>
2391 lines
74 KiB
Lua
2391 lines
74 KiB
Lua
------------------------------------------------------------------------------
|
|
-- DynASM x86/x64 module.
|
|
--
|
|
-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
|
|
-- See dynasm.lua for full copyright notice.
|
|
------------------------------------------------------------------------------
|
|
|
|
local x64 = x64
|
|
|
|
-- Module information:
|
|
local _info = {
|
|
arch = x64 and "x64" or "x86",
|
|
description = "DynASM x86/x64 module",
|
|
version = "1.5.0",
|
|
vernum = 10500,
|
|
release = "2021-05-02",
|
|
author = "Mike Pall",
|
|
license = "MIT",
|
|
}
|
|
|
|
-- Exported glue functions for the arch-specific module.
|
|
local _M = { _info = _info }
|
|
|
|
-- Cache library functions.
|
|
local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
|
|
local assert, unpack, setmetatable = assert, unpack or table.unpack, setmetatable
|
|
local _s = string
|
|
local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
|
|
local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub
|
|
local concat, sort, remove = table.concat, table.sort, table.remove
|
|
local bit = bit or require("bit")
|
|
local band, bxor, shl, shr = bit.band, bit.bxor, bit.lshift, bit.rshift
|
|
|
|
-- Inherited tables and callbacks.
|
|
local g_opt, g_arch
|
|
local wline, werror, wfatal, wwarn
|
|
|
|
-- Action name list.
|
|
-- CHECK: Keep this in sync with the C code!
|
|
local action_names = {
|
|
-- int arg, 1 buffer pos:
|
|
"DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB",
|
|
-- action arg (1 byte), int arg, 1 buffer pos (reg/num):
|
|
"VREG", "SPACE",
|
|
-- ptrdiff_t arg, 1 buffer pos (address): !x64
|
|
"SETLABEL", "REL_A",
|
|
-- action arg (1 byte) or int arg, 2 buffer pos (link, offset):
|
|
"REL_LG", "REL_PC",
|
|
-- action arg (1 byte) or int arg, 1 buffer pos (link):
|
|
"IMM_LG", "IMM_PC",
|
|
-- action arg (1 byte) or int arg, 1 buffer pos (offset):
|
|
"LABEL_LG", "LABEL_PC",
|
|
-- action arg (1 byte), 1 buffer pos (offset):
|
|
"ALIGN",
|
|
-- action args (2 bytes), no buffer pos.
|
|
"EXTERN",
|
|
-- action arg (1 byte), no buffer pos.
|
|
"ESC",
|
|
-- no action arg, no buffer pos.
|
|
"MARK",
|
|
-- action arg (1 byte), no buffer pos, terminal action:
|
|
"SECTION",
|
|
-- no args, no buffer pos, terminal action:
|
|
"STOP"
|
|
}
|
|
|
|
-- Maximum number of section buffer positions for dasm_put().
|
|
-- CHECK: Keep this in sync with the C code!
|
|
local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
|
|
|
|
-- Action name -> action number (dynamically generated below).
|
|
local map_action = {}
|
|
-- First action number. Everything below does not need to be escaped.
|
|
local actfirst = 256-#action_names
|
|
|
|
-- Action list buffer and string (only used to remove dupes).
|
|
local actlist = {}
|
|
local actstr = ""
|
|
|
|
-- Argument list for next dasm_put(). Start with offset 0 into action list.
|
|
local actargs = { 0 }
|
|
|
|
-- Current number of section buffer positions for dasm_put().
|
|
local secpos = 1
|
|
|
|
-- VREG kind encodings, pre-shifted by 5 bits.
|
|
local map_vreg = {
|
|
["modrm.rm.m"] = 0x00,
|
|
["modrm.rm.r"] = 0x20,
|
|
["opcode"] = 0x20,
|
|
["sib.base"] = 0x20,
|
|
["sib.index"] = 0x40,
|
|
["modrm.reg"] = 0x80,
|
|
["vex.v"] = 0xa0,
|
|
["imm.hi"] = 0xc0,
|
|
}
|
|
|
|
-- Current number of VREG actions contributing to REX/VEX shrinkage.
|
|
local vreg_shrink_count = 0
|
|
|
|
------------------------------------------------------------------------------
|
|
|
|
-- Compute action numbers for action names.
|
|
for n,name in ipairs(action_names) do
|
|
local num = actfirst + n - 1
|
|
map_action[name] = num
|
|
end
|
|
|
|
-- Dump action names and numbers.
|
|
local function dumpactions(out)
|
|
out:write("DynASM encoding engine action codes:\n")
|
|
for n,name in ipairs(action_names) do
|
|
local num = map_action[name]
|
|
out:write(format(" %-10s %02X %d\n", name, num, num))
|
|
end
|
|
out:write("\n")
|
|
end
|
|
|
|
-- Write action list buffer as a huge static C array.
|
|
local function writeactions(out, name)
|
|
local nn = #actlist
|
|
local last = actlist[nn] or 255
|
|
actlist[nn] = nil -- Remove last byte.
|
|
if nn == 0 then nn = 1 end
|
|
out:write("static const unsigned char ", name, "[", nn, "] = {\n")
|
|
local s = " "
|
|
for n,b in ipairs(actlist) do
|
|
s = s..b..","
|
|
if #s >= 75 then
|
|
assert(out:write(s, "\n"))
|
|
s = " "
|
|
end
|
|
end
|
|
out:write(s, last, "\n};\n\n") -- Add last byte back.
|
|
end
|
|
|
|
------------------------------------------------------------------------------
|
|
|
|
-- Add byte to action list.
|
|
local function wputxb(n)
|
|
assert(n >= 0 and n <= 255 and n % 1 == 0, "byte out of range")
|
|
actlist[#actlist+1] = n
|
|
end
|
|
|
|
-- Add action to list with optional arg. Advance buffer pos, too.
|
|
local function waction(action, a, num)
|
|
wputxb(assert(map_action[action], "bad action name `"..action.."'"))
|
|
if a then actargs[#actargs+1] = a end
|
|
if a or num then secpos = secpos + (num or 1) end
|
|
end
|
|
|
|
-- Optionally add a VREG action.
|
|
local function wvreg(kind, vreg, psz, sk, defer)
|
|
if not vreg then return end
|
|
waction("VREG", vreg)
|
|
local b = assert(map_vreg[kind], "bad vreg kind `"..vreg.."'")
|
|
if b < (sk or 0) then
|
|
vreg_shrink_count = vreg_shrink_count + 1
|
|
end
|
|
if not defer then
|
|
b = b + vreg_shrink_count * 8
|
|
vreg_shrink_count = 0
|
|
end
|
|
wputxb(b + (psz or 0))
|
|
end
|
|
|
|
-- Add call to embedded DynASM C code.
|
|
local function wcall(func, args)
|
|
wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true)
|
|
end
|
|
|
|
-- Delete duplicate action list chunks. A tad slow, but so what.
|
|
local function dedupechunk(offset)
|
|
local al, as = actlist, actstr
|
|
local chunk = char(unpack(al, offset+1, #al))
|
|
local orig = find(as, chunk, 1, true)
|
|
if orig then
|
|
actargs[1] = orig-1 -- Replace with original offset.
|
|
for i=offset+1,#al do al[i] = nil end -- Kill dupe.
|
|
else
|
|
actstr = as..chunk
|
|
end
|
|
end
|
|
|
|
-- Flush action list (intervening C code or buffer pos overflow).
|
|
local function wflush(term)
|
|
local offset = actargs[1]
|
|
if #actlist == offset then return end -- Nothing to flush.
|
|
if not term then waction("STOP") end -- Terminate action list.
|
|
dedupechunk(offset)
|
|
wcall("put", actargs) -- Add call to dasm_put().
|
|
actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
|
|
secpos = 1 -- The actionlist offset occupies a buffer position, too.
|
|
end
|
|
|
|
-- Put escaped byte.
|
|
local function wputb(n)
|
|
if n >= actfirst then waction("ESC") end -- Need to escape byte.
|
|
wputxb(n)
|
|
end
|
|
|
|
------------------------------------------------------------------------------
|
|
|
|
-- Global label name -> global label number. With auto assignment on 1st use.
|
|
local next_global = 10
|
|
local map_global = setmetatable({}, { __index = function(t, name)
|
|
if not match(name, "^[%a_][%w_@]*$") then werror("bad global label") end
|
|
local n = next_global
|
|
if n > 246 then werror("too many global labels") end
|
|
next_global = n + 1
|
|
t[name] = n
|
|
return n
|
|
end})
|
|
|
|
-- Dump global labels.
|
|
local function dumpglobals(out, lvl)
|
|
local t = {}
|
|
for name, n in pairs(map_global) do t[n] = name end
|
|
out:write("Global labels:\n")
|
|
for i=10,next_global-1 do
|
|
out:write(format(" %s\n", t[i]))
|
|
end
|
|
out:write("\n")
|
|
end
|
|
|
|
-- Write global label enum.
|
|
local function writeglobals(out, prefix)
|
|
local t = {}
|
|
for name, n in pairs(map_global) do t[n] = name end
|
|
out:write("enum {\n")
|
|
for i=10,next_global-1 do
|
|
out:write(" ", prefix, gsub(t[i], "@.*", ""), ",\n")
|
|
end
|
|
out:write(" ", prefix, "_MAX\n};\n")
|
|
end
|
|
|
|
-- Write global label names.
|
|
local function writeglobalnames(out, name)
|
|
local t = {}
|
|
for name, n in pairs(map_global) do t[n] = name end
|
|
out:write("static const char *const ", name, "[] = {\n")
|
|
for i=10,next_global-1 do
|
|
out:write(" \"", t[i], "\",\n")
|
|
end
|
|
out:write(" (const char *)0\n};\n")
|
|
end
|
|
|
|
------------------------------------------------------------------------------
|
|
|
|
-- Extern label name -> extern label number. With auto assignment on 1st use.
|
|
local next_extern = -1
|
|
local map_extern = setmetatable({}, { __index = function(t, name)
|
|
-- No restrictions on the name for now.
|
|
local n = next_extern
|
|
if n < -256 then werror("too many extern labels") end
|
|
next_extern = n - 1
|
|
t[name] = n
|
|
return n
|
|
end})
|
|
|
|
-- Dump extern labels.
|
|
local function dumpexterns(out, lvl)
|
|
local t = {}
|
|
for name, n in pairs(map_extern) do t[-n] = name end
|
|
out:write("Extern labels:\n")
|
|
for i=1,-next_extern-1 do
|
|
out:write(format(" %s\n", t[i]))
|
|
end
|
|
out:write("\n")
|
|
end
|
|
|
|
-- Write extern label names.
|
|
local function writeexternnames(out, name)
|
|
local t = {}
|
|
for name, n in pairs(map_extern) do t[-n] = name end
|
|
out:write("static const char *const ", name, "[] = {\n")
|
|
for i=1,-next_extern-1 do
|
|
out:write(" \"", t[i], "\",\n")
|
|
end
|
|
out:write(" (const char *)0\n};\n")
|
|
end
|
|
|
|
------------------------------------------------------------------------------
|
|
|
|
-- Arch-specific maps.
|
|
local map_archdef = {} -- Ext. register name -> int. name.
|
|
local map_reg_rev = {} -- Int. register name -> ext. name.
|
|
local map_reg_num = {} -- Int. register name -> register number.
|
|
local map_reg_opsize = {} -- Int. register name -> operand size.
|
|
local map_reg_valid_base = {} -- Int. register name -> valid base register?
|
|
local map_reg_valid_index = {} -- Int. register name -> valid index register?
|
|
local map_reg_needrex = {} -- Int. register name -> need rex vs. no rex.
|
|
local reg_list = {} -- Canonical list of int. register names.
|
|
|
|
local map_type = {} -- Type name -> { ctype, reg }
|
|
local ctypenum = 0 -- Type number (for _PTx macros).
|
|
|
|
local addrsize = x64 and "q" or "d" -- Size for address operands.
|
|
|
|
-- Helper functions to fill register maps.
|
|
local function mkrmap(sz, cl, names)
|
|
local cname = format("@%s", sz)
|
|
reg_list[#reg_list+1] = cname
|
|
map_archdef[cl] = cname
|
|
map_reg_rev[cname] = cl
|
|
map_reg_num[cname] = -1
|
|
map_reg_opsize[cname] = sz
|
|
if sz == addrsize or sz == "d" then
|
|
map_reg_valid_base[cname] = true
|
|
map_reg_valid_index[cname] = true
|
|
end
|
|
if names then
|
|
for n,name in ipairs(names) do
|
|
local iname = format("@%s%x", sz, n-1)
|
|
reg_list[#reg_list+1] = iname
|
|
map_archdef[name] = iname
|
|
map_reg_rev[iname] = name
|
|
map_reg_num[iname] = n-1
|
|
map_reg_opsize[iname] = sz
|
|
if sz == "b" and n > 4 then map_reg_needrex[iname] = false end
|
|
if sz == addrsize or sz == "d" then
|
|
map_reg_valid_base[iname] = true
|
|
map_reg_valid_index[iname] = true
|
|
end
|
|
end
|
|
end
|
|
for i=0,(x64 and sz ~= "f") and 15 or 7 do
|
|
local needrex = sz == "b" and i > 3
|
|
local iname = format("@%s%x%s", sz, i, needrex and "R" or "")
|
|
if needrex then map_reg_needrex[iname] = true end
|
|
local name
|
|
if sz == "o" or sz == "y" then name = format("%s%d", cl, i)
|
|
elseif sz == "f" then name = format("st%d", i)
|
|
else name = format("r%d%s", i, sz == addrsize and "" or sz) end
|
|
map_archdef[name] = iname
|
|
if not map_reg_rev[iname] then
|
|
reg_list[#reg_list+1] = iname
|
|
map_reg_rev[iname] = name
|
|
map_reg_num[iname] = i
|
|
map_reg_opsize[iname] = sz
|
|
if sz == addrsize or sz == "d" then
|
|
map_reg_valid_base[iname] = true
|
|
map_reg_valid_index[iname] = true
|
|
end
|
|
end
|
|
end
|
|
reg_list[#reg_list+1] = ""
|
|
end
|
|
|
|
-- Integer registers (qword, dword, word and byte sized).
|
|
if x64 then
|
|
mkrmap("q", "Rq", {"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi"})
|
|
end
|
|
mkrmap("d", "Rd", {"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi"})
|
|
mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"})
|
|
mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"})
|
|
map_reg_valid_index[map_archdef.esp] = false
|
|
if x64 then map_reg_valid_index[map_archdef.rsp] = false end
|
|
if x64 then map_reg_needrex[map_archdef.Rb] = true end
|
|
map_archdef["Ra"] = "@"..addrsize
|
|
|
|
-- FP registers (internally tword sized, but use "f" as operand size).
|
|
mkrmap("f", "Rf")
|
|
|
|
-- SSE registers (oword sized, but qword and dword accessible).
|
|
mkrmap("o", "xmm")
|
|
|
|
-- AVX registers (yword sized, but oword, qword and dword accessible).
|
|
mkrmap("y", "ymm")
|
|
|
|
-- Operand size prefixes to codes.
|
|
local map_opsize = {
|
|
byte = "b", word = "w", dword = "d", qword = "q", oword = "o", yword = "y",
|
|
tword = "t", aword = addrsize,
|
|
}
|
|
|
|
-- Operand size code to number.
|
|
local map_opsizenum = {
|
|
b = 1, w = 2, d = 4, q = 8, o = 16, y = 32, t = 10,
|
|
}
|
|
|
|
-- Operand size code to name.
|
|
local map_opsizename = {
|
|
b = "byte", w = "word", d = "dword", q = "qword", o = "oword", y = "yword",
|
|
t = "tword", f = "fpword",
|
|
}
|
|
|
|
-- Valid index register scale factors.
|
|
local map_xsc = {
|
|
["1"] = 0, ["2"] = 1, ["4"] = 2, ["8"] = 3,
|
|
}
|
|
|
|
-- Condition codes.
|
|
local map_cc = {
|
|
o = 0, no = 1, b = 2, nb = 3, e = 4, ne = 5, be = 6, nbe = 7,
|
|
s = 8, ns = 9, p = 10, np = 11, l = 12, nl = 13, le = 14, nle = 15,
|
|
c = 2, nae = 2, nc = 3, ae = 3, z = 4, nz = 5, na = 6, a = 7,
|
|
pe = 10, po = 11, nge = 12, ge = 13, ng = 14, g = 15,
|
|
}
|
|
|
|
|
|
-- Reverse defines for registers.
|
|
function _M.revdef(s)
|
|
return gsub(s, "@%w+", map_reg_rev)
|
|
end
|
|
|
|
-- Dump register names and numbers
|
|
local function dumpregs(out)
|
|
out:write("Register names, sizes and internal numbers:\n")
|
|
for _,reg in ipairs(reg_list) do
|
|
if reg == "" then
|
|
out:write("\n")
|
|
else
|
|
local name = map_reg_rev[reg]
|
|
local num = map_reg_num[reg]
|
|
local opsize = map_opsizename[map_reg_opsize[reg]]
|
|
out:write(format(" %-5s %-8s %s\n", name, opsize,
|
|
num < 0 and "(variable)" or num))
|
|
end
|
|
end
|
|
end
|
|
|
|
------------------------------------------------------------------------------
|
|
|
|
-- Put action for label arg (IMM_LG, IMM_PC, REL_LG, REL_PC).
|
|
local function wputlabel(aprefix, imm, num)
|
|
if type(imm) == "number" then
|
|
if imm < 0 then
|
|
waction("EXTERN")
|
|
wputxb(aprefix == "IMM_" and 0 or 1)
|
|
imm = -imm-1
|
|
else
|
|
waction(aprefix.."LG", nil, num);
|
|
end
|
|
wputxb(imm)
|
|
else
|
|
waction(aprefix.."PC", imm, num)
|
|
end
|
|
end
|
|
|
|
-- Put signed byte or arg.
|
|
local function wputsbarg(n)
|
|
if type(n) == "number" then
|
|
if n < -128 or n > 127 then
|
|
werror("signed immediate byte out of range")
|
|
end
|
|
if n < 0 then n = n + 256 end
|
|
wputb(n)
|
|
else waction("IMM_S", n) end
|
|
end
|
|
|
|
-- Put unsigned byte or arg.
|
|
local function wputbarg(n)
|
|
if type(n) == "number" then
|
|
if n < 0 or n > 255 then
|
|
werror("unsigned immediate byte out of range")
|
|
end
|
|
wputb(n)
|
|
else waction("IMM_B", n) end
|
|
end
|
|
|
|
-- Put unsigned word or arg.
|
|
local function wputwarg(n)
|
|
if type(n) == "number" then
|
|
if shr(n, 16) ~= 0 then
|
|
werror("unsigned immediate word out of range")
|
|
end
|
|
wputb(band(n, 255)); wputb(shr(n, 8));
|
|
else waction("IMM_W", n) end
|
|
end
|
|
|
|
-- Put signed or unsigned dword or arg.
|
|
local function wputdarg(n)
|
|
local tn = type(n)
|
|
if tn == "number" then
|
|
wputb(band(n, 255))
|
|
wputb(band(shr(n, 8), 255))
|
|
wputb(band(shr(n, 16), 255))
|
|
wputb(shr(n, 24))
|
|
elseif tn == "table" then
|
|
wputlabel("IMM_", n[1], 1)
|
|
else
|
|
waction("IMM_D", n)
|
|
end
|
|
end
|
|
|
|
-- Put signed or unsigned qword or arg.
|
|
local function wputqarg(n)
|
|
local tn = type(n)
|
|
if tn == "number" then -- This is only used for numbers from -2^31..2^32-1.
|
|
wputb(band(n, 255))
|
|
wputb(band(shr(n, 8), 255))
|
|
wputb(band(shr(n, 16), 255))
|
|
wputb(shr(n, 24))
|
|
local sign = n < 0 and 255 or 0
|
|
wputb(sign); wputb(sign); wputb(sign); wputb(sign)
|
|
else
|
|
waction("IMM_D", format("(unsigned int)(%s)", n))
|
|
waction("IMM_D", format("(unsigned int)((unsigned long long)(%s)>>32)", n))
|
|
end
|
|
end
|
|
|
|
-- Put operand-size dependent number or arg (defaults to dword).
|
|
local function wputszarg(sz, n)
|
|
if not sz or sz == "d" or sz == "q" then wputdarg(n)
|
|
elseif sz == "w" then wputwarg(n)
|
|
elseif sz == "b" then wputbarg(n)
|
|
elseif sz == "s" then wputsbarg(n)
|
|
else werror("bad operand size") end
|
|
end
|
|
|
|
-- Put multi-byte opcode with operand-size dependent modifications.
|
|
local function wputop(sz, op, rex, vex, vregr, vregxb)
|
|
local psz, sk = 0, nil
|
|
if vex then
|
|
local tail
|
|
if vex.m == 1 and band(rex, 11) == 0 then
|
|
if x64 and vregxb then
|
|
sk = map_vreg["modrm.reg"]
|
|
else
|
|
wputb(0xc5)
|
|
tail = shl(bxor(band(rex, 4), 4), 5)
|
|
psz = 3
|
|
end
|
|
end
|
|
if not tail then
|
|
wputb(0xc4)
|
|
wputb(shl(bxor(band(rex, 7), 7), 5) + vex.m)
|
|
tail = shl(band(rex, 8), 4)
|
|
psz = 4
|
|
end
|
|
local reg, vreg = 0, nil
|
|
if vex.v then
|
|
reg = vex.v.reg
|
|
if not reg then werror("bad vex operand") end
|
|
if reg < 0 then reg = 0; vreg = vex.v.vreg end
|
|
end
|
|
if sz == "y" or vex.l then tail = tail + 4 end
|
|
wputb(tail + shl(bxor(reg, 15), 3) + vex.p)
|
|
wvreg("vex.v", vreg)
|
|
rex = 0
|
|
if op >= 256 then werror("bad vex opcode") end
|
|
else
|
|
if rex ~= 0 then
|
|
if not x64 then werror("bad operand size") end
|
|
elseif (vregr or vregxb) and x64 then
|
|
rex = 0x10
|
|
sk = map_vreg["vex.v"]
|
|
end
|
|
end
|
|
local r
|
|
if sz == "w" then wputb(102) end
|
|
-- Needs >32 bit numbers, but only for crc32 eax, word [ebx]
|
|
if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end
|
|
if op >= 16777216 then wputb(shr(op, 24)); op = band(op, 0xffffff) end
|
|
if op >= 65536 then
|
|
if rex ~= 0 then
|
|
local opc3 = band(op, 0xffff00)
|
|
if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then
|
|
wputb(64 + band(rex, 15)); rex = 0; psz = 2
|
|
end
|
|
end
|
|
wputb(shr(op, 16)); op = band(op, 0xffff); psz = psz + 1
|
|
end
|
|
if op >= 256 then
|
|
local b = shr(op, 8)
|
|
if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0; psz = 2 end
|
|
wputb(b); op = band(op, 255); psz = psz + 1
|
|
end
|
|
if rex ~= 0 then wputb(64 + band(rex, 15)); psz = 2 end
|
|
if sz == "b" then op = op - 1 end
|
|
wputb(op)
|
|
return psz, sk
|
|
end
|
|
|
|
-- Put ModRM or SIB formatted byte.
|
|
local function wputmodrm(m, s, rm, vs, vrm)
|
|
assert(m < 4 and s < 16 and rm < 16, "bad modrm operands")
|
|
wputb(shl(m, 6) + shl(band(s, 7), 3) + band(rm, 7))
|
|
end
|
|
|
|
-- Put ModRM/SIB plus optional displacement.
|
|
local function wputmrmsib(t, imark, s, vsreg, psz, sk)
|
|
local vreg, vxreg
|
|
local reg, xreg = t.reg, t.xreg
|
|
if reg and reg < 0 then reg = 0; vreg = t.vreg end
|
|
if xreg and xreg < 0 then xreg = 0; vxreg = t.vxreg end
|
|
if s < 0 then s = 0 end
|
|
|
|
-- Register mode.
|
|
if sub(t.mode, 1, 1) == "r" then
|
|
wputmodrm(3, s, reg)
|
|
wvreg("modrm.reg", vsreg, psz+1, sk, vreg)
|
|
wvreg("modrm.rm.r", vreg, psz+1, sk)
|
|
return
|
|
end
|
|
|
|
local disp = t.disp
|
|
local tdisp = type(disp)
|
|
-- No base register?
|
|
if not reg then
|
|
local riprel = false
|
|
if xreg then
|
|
-- Indexed mode with index register only.
|
|
-- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp)
|
|
wputmodrm(0, s, 4)
|
|
if imark == "I" then waction("MARK") end
|
|
wvreg("modrm.reg", vsreg, psz+1, sk, vxreg)
|
|
wputmodrm(t.xsc, xreg, 5)
|
|
wvreg("sib.index", vxreg, psz+2, sk)
|
|
else
|
|
-- Pure 32 bit displacement.
|
|
if x64 and tdisp ~= "table" then
|
|
wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp)
|
|
wvreg("modrm.reg", vsreg, psz+1, sk)
|
|
if imark == "I" then waction("MARK") end
|
|
wputmodrm(0, 4, 5)
|
|
else
|
|
riprel = x64
|
|
wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp)
|
|
wvreg("modrm.reg", vsreg, psz+1, sk)
|
|
if imark == "I" then waction("MARK") end
|
|
end
|
|
end
|
|
if riprel then -- Emit rip-relative displacement.
|
|
if match("UWSiI", imark) then
|
|
werror("NYI: rip-relative displacement followed by immediate")
|
|
end
|
|
-- The previous byte in the action buffer cannot be 0xe9 or 0x80-0x8f.
|
|
wputlabel("REL_", disp[1], 2)
|
|
else
|
|
wputdarg(disp)
|
|
end
|
|
return
|
|
end
|
|
|
|
local m
|
|
if tdisp == "number" then -- Check displacement size at assembly time.
|
|
if disp == 0 and band(reg, 7) ~= 5 then -- [ebp] -> [ebp+0] (in SIB, too)
|
|
if not vreg then m = 0 end -- Force DISP to allow [Rd(5)] -> [ebp+0]
|
|
elseif disp >= -128 and disp <= 127 then m = 1
|
|
else m = 2 end
|
|
elseif tdisp == "table" then
|
|
m = 2
|
|
end
|
|
|
|
-- Index register present or esp as base register: need SIB encoding.
|
|
if xreg or band(reg, 7) == 4 then
|
|
wputmodrm(m or 2, s, 4) -- ModRM.
|
|
if m == nil or imark == "I" then waction("MARK") end
|
|
wvreg("modrm.reg", vsreg, psz+1, sk, vxreg or vreg)
|
|
wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB.
|
|
wvreg("sib.index", vxreg, psz+2, sk, vreg)
|
|
wvreg("sib.base", vreg, psz+2, sk)
|
|
else
|
|
wputmodrm(m or 2, s, reg) -- ModRM.
|
|
if (imark == "I" and (m == 1 or m == 2)) or
|
|
(m == nil and (vsreg or vreg)) then waction("MARK") end
|
|
wvreg("modrm.reg", vsreg, psz+1, sk, vreg)
|
|
wvreg("modrm.rm.m", vreg, psz+1, sk)
|
|
end
|
|
|
|
-- Put displacement.
|
|
if m == 1 then wputsbarg(disp)
|
|
elseif m == 2 then wputdarg(disp)
|
|
elseif m == nil then waction("DISP", disp) end
|
|
end
|
|
|
|
------------------------------------------------------------------------------
|
|
|
|
-- Return human-readable operand mode string.
|
|
local function opmodestr(op, args)
|
|
local m = {}
|
|
for i=1,#args do
|
|
local a = args[i]
|
|
m[#m+1] = sub(a.mode, 1, 1)..(a.opsize or "?")
|
|
end
|
|
return op.." "..concat(m, ",")
|
|
end
|
|
|
|
-- Convert number to valid integer or nil.
|
|
local function toint(expr, isqword)
|
|
local n = tonumber(expr)
|
|
if n then
|
|
if n % 1 ~= 0 then
|
|
werror("not an integer number `"..expr.."'")
|
|
elseif isqword then
|
|
if n < -2147483648 or n > 2147483647 then
|
|
n = nil -- Handle it as an expression to avoid precision loss.
|
|
end
|
|
elseif n < -2147483648 or n > 4294967295 then
|
|
werror("bad integer number `"..expr.."'")
|
|
end
|
|
return n
|
|
end
|
|
end
|
|
|
|
-- Parse immediate expression.
|
|
local function immexpr(expr)
|
|
-- &expr (pointer)
|
|
if sub(expr, 1, 1) == "&" then
|
|
return "iPJ", format("(ptrdiff_t)(%s)", sub(expr,2))
|
|
end
|
|
|
|
local prefix = sub(expr, 1, 2)
|
|
-- =>expr (pc label reference)
|
|
if prefix == "=>" then
|
|
return "iJ", sub(expr, 3)
|
|
end
|
|
-- ->name (global label reference)
|
|
if prefix == "->" then
|
|
return "iJ", map_global[sub(expr, 3)]
|
|
end
|
|
|
|
-- [<>][1-9] (local label reference)
|
|
local dir, lnum = match(expr, "^([<>])([1-9])$")
|
|
if dir then -- Fwd: 247-255, Bkwd: 1-9.
|
|
return "iJ", lnum + (dir == ">" and 246 or 0)
|
|
end
|
|
|
|
local extname = match(expr, "^extern%s+(%S+)$")
|
|
if extname then
|
|
return "iJ", map_extern[extname]
|
|
end
|
|
|
|
-- expr (interpreted as immediate)
|
|
return "iI", expr
|
|
end
|
|
|
|
-- Parse displacement expression: +-num, +-expr, +-opsize*num
|
|
local function dispexpr(expr)
|
|
local disp = expr == "" and 0 or toint(expr)
|
|
if disp then return disp end
|
|
local c, dispt = match(expr, "^([+-])%s*(.+)$")
|
|
if c == "+" then
|
|
expr = dispt
|
|
elseif not c then
|
|
werror("bad displacement expression `"..expr.."'")
|
|
end
|
|
local opsize, tailops = match(dispt, "^(%w+)%s*%*%s*(.+)$")
|
|
local ops, imm = map_opsize[opsize], toint(tailops)
|
|
if ops and imm then
|
|
if c == "-" then imm = -imm end
|
|
return imm*map_opsizenum[ops]
|
|
end
|
|
local mode, iexpr = immexpr(dispt)
|
|
if mode == "iJ" then
|
|
if c == "-" then werror("cannot invert label reference") end
|
|
return { iexpr }
|
|
end
|
|
return expr -- Need to return original signed expression.
|
|
end
|
|
|
|
-- Parse register or type expression.
|
|
local function rtexpr(expr)
|
|
if not expr then return end
|
|
local tname, ovreg = match(expr, "^([%w_]+):(@[%w_]+)$")
|
|
local tp = map_type[tname or expr]
|
|
if tp then
|
|
local reg = ovreg or tp.reg
|
|
local rnum = map_reg_num[reg]
|
|
if not rnum then
|
|
werror("type `"..(tname or expr).."' needs a register override")
|
|
end
|
|
if not map_reg_valid_base[reg] then
|
|
werror("bad base register override `"..(map_reg_rev[reg] or reg).."'")
|
|
end
|
|
return reg, rnum, tp
|
|
end
|
|
return expr, map_reg_num[expr]
|
|
end
|
|
|
|
-- Parse operand and return { mode, opsize, reg, xreg, xsc, disp, imm }.
|
|
local function parseoperand(param, isqword)
|
|
local t = {}
|
|
|
|
local expr = param
|
|
local opsize, tailops = match(param, "^(%w+)%s*(.+)$")
|
|
if opsize then
|
|
t.opsize = map_opsize[opsize]
|
|
if t.opsize then expr = tailops end
|
|
end
|
|
|
|
local br = match(expr, "^%[%s*(.-)%s*%]$")
|
|
repeat
|
|
if br then
|
|
t.mode = "xm"
|
|
|
|
-- [disp]
|
|
t.disp = toint(br)
|
|
if t.disp then
|
|
t.mode = x64 and "xm" or "xmO"
|
|
break
|
|
end
|
|
|
|
-- [reg...]
|
|
local tp
|
|
local reg, tailr = match(br, "^([@%w_:]+)%s*(.*)$")
|
|
reg, t.reg, tp = rtexpr(reg)
|
|
if not t.reg then
|
|
-- [expr]
|
|
t.mode = x64 and "xm" or "xmO"
|
|
t.disp = dispexpr("+"..br)
|
|
break
|
|
end
|
|
|
|
if t.reg == -1 then
|
|
t.vreg, tailr = match(tailr, "^(%b())(.*)$")
|
|
if not t.vreg then werror("bad variable register expression") end
|
|
end
|
|
|
|
-- [xreg*xsc] or [xreg*xsc+-disp] or [xreg*xsc+-expr]
|
|
local xsc, tailsc = match(tailr, "^%*%s*([1248])%s*(.*)$")
|
|
if xsc then
|
|
if not map_reg_valid_index[reg] then
|
|
werror("bad index register `"..map_reg_rev[reg].."'")
|
|
end
|
|
t.xsc = map_xsc[xsc]
|
|
t.xreg = t.reg
|
|
t.vxreg = t.vreg
|
|
t.reg = nil
|
|
t.vreg = nil
|
|
t.disp = dispexpr(tailsc)
|
|
break
|
|
end
|
|
if not map_reg_valid_base[reg] then
|
|
werror("bad base register `"..map_reg_rev[reg].."'")
|
|
end
|
|
|
|
-- [reg] or [reg+-disp]
|
|
t.disp = toint(tailr) or (tailr == "" and 0)
|
|
if t.disp then break end
|
|
|
|
-- [reg+xreg...]
|
|
local xreg, tailx = match(tailr, "^%+%s*([@%w_:]+)%s*(.*)$")
|
|
xreg, t.xreg, tp = rtexpr(xreg)
|
|
if not t.xreg then
|
|
-- [reg+-expr]
|
|
t.disp = dispexpr(tailr)
|
|
break
|
|
end
|
|
if not map_reg_valid_index[xreg] then
|
|
werror("bad index register `"..map_reg_rev[xreg].."'")
|
|
end
|
|
|
|
if t.xreg == -1 then
|
|
t.vxreg, tailx = match(tailx, "^(%b())(.*)$")
|
|
if not t.vxreg then werror("bad variable register expression") end
|
|
end
|
|
|
|
-- [reg+xreg*xsc...]
|
|
local xsc, tailsc = match(tailx, "^%*%s*([1248])%s*(.*)$")
|
|
if xsc then
|
|
t.xsc = map_xsc[xsc]
|
|
tailx = tailsc
|
|
end
|
|
|
|
-- [...] or [...+-disp] or [...+-expr]
|
|
t.disp = dispexpr(tailx)
|
|
else
|
|
-- imm or opsize*imm
|
|
local imm = toint(expr, isqword)
|
|
if not imm and sub(expr, 1, 1) == "*" and t.opsize then
|
|
imm = toint(sub(expr, 2))
|
|
if imm then
|
|
imm = imm * map_opsizenum[t.opsize]
|
|
t.opsize = nil
|
|
end
|
|
end
|
|
if imm then
|
|
if t.opsize then werror("bad operand size override") end
|
|
local m = "i"
|
|
if imm == 1 then m = m.."1" end
|
|
if imm >= 4294967168 and imm <= 4294967295 then imm = imm-4294967296 end
|
|
if imm >= -128 and imm <= 127 then m = m.."S" end
|
|
t.imm = imm
|
|
t.mode = m
|
|
break
|
|
end
|
|
|
|
local tp
|
|
local reg, tailr = match(expr, "^([@%w_:]+)%s*(.*)$")
|
|
reg, t.reg, tp = rtexpr(reg)
|
|
if t.reg then
|
|
if t.reg == -1 then
|
|
t.vreg, tailr = match(tailr, "^(%b())(.*)$")
|
|
if not t.vreg then werror("bad variable register expression") end
|
|
end
|
|
-- reg
|
|
if tailr == "" then
|
|
if t.opsize then werror("bad operand size override") end
|
|
t.opsize = map_reg_opsize[reg]
|
|
if t.opsize == "f" then
|
|
t.mode = t.reg == 0 and "fF" or "f"
|
|
else
|
|
if reg == "@w4" or (x64 and reg == "@d4") then
|
|
wwarn("bad idea, try again with `"..(x64 and "rsp'" or "esp'"))
|
|
end
|
|
t.mode = t.reg == 0 and "rmR" or (reg == "@b1" and "rmC" or "rm")
|
|
end
|
|
t.needrex = map_reg_needrex[reg]
|
|
break
|
|
end
|
|
|
|
-- type[idx], type[idx].field, type->field -> [reg+offset_expr]
|
|
if not tp then werror("bad operand `"..param.."'") end
|
|
t.mode = "xm"
|
|
t.disp = format(tp.ctypefmt, tailr)
|
|
else
|
|
t.mode, t.imm = immexpr(expr)
|
|
if sub(t.mode, -1) == "J" then
|
|
if t.opsize and t.opsize ~= addrsize then
|
|
werror("bad operand size override")
|
|
end
|
|
t.opsize = addrsize
|
|
end
|
|
end
|
|
end
|
|
until true
|
|
return t
|
|
end
|
|
|
|
------------------------------------------------------------------------------
|
|
-- x86 Template String Description
|
|
-- ===============================
|
|
--
|
|
-- Each template string is a list of [match:]pattern pairs,
|
|
-- separated by "|". The first match wins. No match means a
|
|
-- bad or unsupported combination of operand modes or sizes.
|
|
--
|
|
-- The match part and the ":" is omitted if the operation has
|
|
-- no operands. Otherwise the first N characters are matched
|
|
-- against the mode strings of each of the N operands.
|
|
--
|
|
-- The mode string for each operand type is (see parseoperand()):
|
|
-- Integer register: "rm", +"R" for eax, ax, al, +"C" for cl
|
|
-- FP register: "f", +"F" for st0
|
|
-- Index operand: "xm", +"O" for [disp] (pure offset)
|
|
-- Immediate: "i", +"S" for signed 8 bit, +"1" for 1,
|
|
-- +"I" for arg, +"P" for pointer
|
|
-- Any: +"J" for valid jump targets
|
|
--
|
|
-- So a match character "m" (mixed) matches both an integer register
|
|
-- and an index operand (to be encoded with the ModRM/SIB scheme).
|
|
-- But "r" matches only a register and "x" only an index operand
|
|
-- (e.g. for FP memory access operations).
|
|
--
|
|
-- The operand size match string starts right after the mode match
|
|
-- characters and ends before the ":". "dwb" or "qdwb" is assumed, if empty.
|
|
-- The effective data size of the operation is matched against this list.
|
|
--
|
|
-- If only the regular "b", "w", "d", "q", "t" operand sizes are
|
|
-- present, then all operands must be the same size. Unspecified sizes
|
|
-- are ignored, but at least one operand must have a size or the pattern
|
|
-- won't match (use the "byte", "word", "dword", "qword", "tword"
|
|
-- operand size overrides. E.g.: mov dword [eax], 1).
|
|
--
|
|
-- If the list has a "1" or "2" prefix, the operand size is taken
|
|
-- from the respective operand and any other operand sizes are ignored.
|
|
-- If the list contains only ".", all operand sizes are ignored.
|
|
-- If the list has a "/" prefix, the concatenated (mixed) operand sizes
|
|
-- are compared to the match.
|
|
--
|
|
-- E.g. "rrdw" matches for either two dword registers or two word
|
|
-- registers. "Fx2dq" matches an st0 operand plus an index operand
|
|
-- pointing to a dword (float) or qword (double).
|
|
--
|
|
-- Every character after the ":" is part of the pattern string:
|
|
-- Hex chars are accumulated to form the opcode (left to right).
|
|
-- "n" disables the standard opcode mods
|
|
-- (otherwise: -1 for "b", o16 prefix for "w", rex.w for "q")
|
|
-- "X" Force REX.W.
|
|
-- "r"/"R" adds the reg. number from the 1st/2nd operand to the opcode.
|
|
-- "m"/"M" generates ModRM/SIB from the 1st/2nd operand.
|
|
-- The spare 3 bits are either filled with the last hex digit or
|
|
-- the result from a previous "r"/"R". The opcode is restored.
|
|
-- "u" Use VEX encoding, vvvv unused.
|
|
-- "v"/"V" Use VEX encoding, vvvv from 1st/2nd operand (the operand is
|
|
-- removed from the list used by future characters).
|
|
-- "w" Use VEX encoding, vvvv from 3rd operand.
|
|
-- "L" Force VEX.L
|
|
--
|
|
-- All of the following characters force a flush of the opcode:
|
|
-- "o"/"O" stores a pure 32 bit disp (offset) from the 1st/2nd operand.
|
|
-- "s" stores a 4 bit immediate from the last register operand,
|
|
-- followed by 4 zero bits.
|
|
-- "S" stores a signed 8 bit immediate from the last operand.
|
|
-- "U" stores an unsigned 8 bit immediate from the last operand.
|
|
-- "W" stores an unsigned 16 bit immediate from the last operand.
|
|
-- "i" stores an operand sized immediate from the last operand.
|
|
-- "I" dito, but generates an action code to optionally modify
|
|
-- the opcode (+2) for a signed 8 bit immediate.
|
|
-- "J" generates one of the REL action codes from the last operand.
|
|
--
|
|
------------------------------------------------------------------------------
|
|
|
|
-- Template strings for x86 instructions. Ordered by first opcode byte.
|
|
-- Unimplemented opcodes (deliberate omissions) are marked with *.
|
|
local map_op = {
|
|
-- 00-05: add...
|
|
-- 06: *push es
|
|
-- 07: *pop es
|
|
-- 08-0D: or...
|
|
-- 0E: *push cs
|
|
-- 0F: two byte opcode prefix
|
|
-- 10-15: adc...
|
|
-- 16: *push ss
|
|
-- 17: *pop ss
|
|
-- 18-1D: sbb...
|
|
-- 1E: *push ds
|
|
-- 1F: *pop ds
|
|
-- 20-25: and...
|
|
es_0 = "26",
|
|
-- 27: *daa
|
|
-- 28-2D: sub...
|
|
cs_0 = "2E",
|
|
-- 2F: *das
|
|
-- 30-35: xor...
|
|
ss_0 = "36",
|
|
-- 37: *aaa
|
|
-- 38-3D: cmp...
|
|
ds_0 = "3E",
|
|
-- 3F: *aas
|
|
inc_1 = x64 and "m:FF0m" or "rdw:40r|m:FF0m",
|
|
dec_1 = x64 and "m:FF1m" or "rdw:48r|m:FF1m",
|
|
push_1 = (x64 and "rq:n50r|rw:50r|mq:nFF6m|mw:FF6m" or
|
|
"rdw:50r|mdw:FF6m").."|S.:6AS|ib:n6Ai|i.:68i",
|
|
pop_1 = x64 and "rq:n58r|rw:58r|mq:n8F0m|mw:8F0m" or "rdw:58r|mdw:8F0m",
|
|
-- 60: *pusha, *pushad, *pushaw
|
|
-- 61: *popa, *popad, *popaw
|
|
-- 62: *bound rdw,x
|
|
-- 63: x86: *arpl mw,rw
|
|
movsxd_2 = x64 and "rm/qd:63rM",
|
|
fs_0 = "64",
|
|
gs_0 = "65",
|
|
o16_0 = "66",
|
|
a16_0 = not x64 and "67" or nil,
|
|
a32_0 = x64 and "67",
|
|
-- 68: push idw
|
|
-- 69: imul rdw,mdw,idw
|
|
-- 6A: push ib
|
|
-- 6B: imul rdw,mdw,S
|
|
-- 6C: *insb
|
|
-- 6D: *insd, *insw
|
|
-- 6E: *outsb
|
|
-- 6F: *outsd, *outsw
|
|
-- 70-7F: jcc lb
|
|
-- 80: add... mb,i
|
|
-- 81: add... mdw,i
|
|
-- 82: *undefined
|
|
-- 83: add... mdw,S
|
|
test_2 = "mr:85Rm|rm:85rM|Ri:A9ri|mi:F70mi",
|
|
-- 86: xchg rb,mb
|
|
-- 87: xchg rdw,mdw
|
|
-- 88: mov mb,r
|
|
-- 89: mov mdw,r
|
|
-- 8A: mov r,mb
|
|
-- 8B: mov r,mdw
|
|
-- 8C: *mov mdw,seg
|
|
lea_2 = "rx1dq:8DrM",
|
|
-- 8E: *mov seg,mdw
|
|
-- 8F: pop mdw
|
|
nop_0 = "90",
|
|
xchg_2 = "Rrqdw:90R|rRqdw:90r|rm:87rM|mr:87Rm",
|
|
cbw_0 = "6698",
|
|
cwde_0 = "98",
|
|
cdqe_0 = "4898",
|
|
cwd_0 = "6699",
|
|
cdq_0 = "99",
|
|
cqo_0 = "4899",
|
|
-- 9A: *call iw:idw
|
|
wait_0 = "9B",
|
|
fwait_0 = "9B",
|
|
pushf_0 = "9C",
|
|
pushfd_0 = not x64 and "9C",
|
|
pushfq_0 = x64 and "9C",
|
|
popf_0 = "9D",
|
|
popfd_0 = not x64 and "9D",
|
|
popfq_0 = x64 and "9D",
|
|
sahf_0 = "9E",
|
|
lahf_0 = "9F",
|
|
mov_2 = "OR:A3o|RO:A1O|mr:89Rm|rm:8BrM|rib:nB0ri|ridw:B8ri|mi:C70mi",
|
|
movsb_0 = "A4",
|
|
movsw_0 = "66A5",
|
|
movsd_0 = "A5",
|
|
cmpsb_0 = "A6",
|
|
cmpsw_0 = "66A7",
|
|
cmpsd_0 = "A7",
|
|
-- A8: test Rb,i
|
|
-- A9: test Rdw,i
|
|
stosb_0 = "AA",
|
|
stosw_0 = "66AB",
|
|
stosd_0 = "AB",
|
|
lodsb_0 = "AC",
|
|
lodsw_0 = "66AD",
|
|
lodsd_0 = "AD",
|
|
scasb_0 = "AE",
|
|
scasw_0 = "66AF",
|
|
scasd_0 = "AF",
|
|
-- B0-B7: mov rb,i
|
|
-- B8-BF: mov rdw,i
|
|
-- C0: rol... mb,i
|
|
-- C1: rol... mdw,i
|
|
ret_1 = "i.:nC2W",
|
|
ret_0 = "C3",
|
|
-- C4: *les rdw,mq
|
|
-- C5: *lds rdw,mq
|
|
-- C6: mov mb,i
|
|
-- C7: mov mdw,i
|
|
-- C8: *enter iw,ib
|
|
leave_0 = "C9",
|
|
-- CA: *retf iw
|
|
-- CB: *retf
|
|
int3_0 = "CC",
|
|
int_1 = "i.:nCDU",
|
|
into_0 = "CE",
|
|
-- CF: *iret
|
|
-- D0: rol... mb,1
|
|
-- D1: rol... mdw,1
|
|
-- D2: rol... mb,cl
|
|
-- D3: rol... mb,cl
|
|
-- D4: *aam ib
|
|
-- D5: *aad ib
|
|
-- D6: *salc
|
|
-- D7: *xlat
|
|
-- D8-DF: floating point ops
|
|
-- E0: *loopne
|
|
-- E1: *loope
|
|
-- E2: *loop
|
|
-- E3: *jcxz, *jecxz
|
|
-- E4: *in Rb,ib
|
|
-- E5: *in Rdw,ib
|
|
-- E6: *out ib,Rb
|
|
-- E7: *out ib,Rdw
|
|
call_1 = x64 and "mq:nFF2m|J.:E8nJ" or "md:FF2m|J.:E8J",
|
|
jmp_1 = x64 and "mq:nFF4m|J.:E9nJ" or "md:FF4m|J.:E9J", -- short: EB
|
|
-- EA: *jmp iw:idw
|
|
-- EB: jmp ib
|
|
-- EC: *in Rb,dx
|
|
-- ED: *in Rdw,dx
|
|
-- EE: *out dx,Rb
|
|
-- EF: *out dx,Rdw
|
|
lock_0 = "F0",
|
|
int1_0 = "F1",
|
|
repne_0 = "F2",
|
|
repnz_0 = "F2",
|
|
rep_0 = "F3",
|
|
repe_0 = "F3",
|
|
repz_0 = "F3",
|
|
endbr32_0 = "F30F1EFB",
|
|
endbr64_0 = "F30F1EFA",
|
|
-- F4: *hlt
|
|
cmc_0 = "F5",
|
|
-- F6: test... mb,i; div... mb
|
|
-- F7: test... mdw,i; div... mdw
|
|
clc_0 = "F8",
|
|
stc_0 = "F9",
|
|
-- FA: *cli
|
|
cld_0 = "FC",
|
|
std_0 = "FD",
|
|
-- FE: inc... mb
|
|
-- FF: inc... mdw
|
|
|
|
-- misc ops
|
|
not_1 = "m:F72m",
|
|
neg_1 = "m:F73m",
|
|
mul_1 = "m:F74m",
|
|
imul_1 = "m:F75m",
|
|
div_1 = "m:F76m",
|
|
idiv_1 = "m:F77m",
|
|
|
|
imul_2 = "rmqdw:0FAFrM|rIqdw:69rmI|rSqdw:6BrmS|riqdw:69rmi",
|
|
imul_3 = "rmIqdw:69rMI|rmSqdw:6BrMS|rmiqdw:69rMi",
|
|
|
|
movzx_2 = "rm/db:0FB6rM|rm/qb:|rm/wb:0FB6rM|rm/dw:0FB7rM|rm/qw:",
|
|
movsx_2 = "rm/db:0FBErM|rm/qb:|rm/wb:0FBErM|rm/dw:0FBFrM|rm/qw:",
|
|
|
|
bswap_1 = "rqd:0FC8r",
|
|
bsf_2 = "rmqdw:0FBCrM",
|
|
bsr_2 = "rmqdw:0FBDrM",
|
|
bt_2 = "mrqdw:0FA3Rm|miqdw:0FBA4mU",
|
|
btc_2 = "mrqdw:0FBBRm|miqdw:0FBA7mU",
|
|
btr_2 = "mrqdw:0FB3Rm|miqdw:0FBA6mU",
|
|
bts_2 = "mrqdw:0FABRm|miqdw:0FBA5mU",
|
|
|
|
shld_3 = "mriqdw:0FA4RmU|mrC/qq:0FA5Rm|mrC/dd:|mrC/ww:",
|
|
shrd_3 = "mriqdw:0FACRmU|mrC/qq:0FADRm|mrC/dd:|mrC/ww:",
|
|
|
|
rdtsc_0 = "0F31", -- P1+
|
|
rdpmc_0 = "0F33", -- P6+
|
|
cpuid_0 = "0FA2", -- P1+
|
|
|
|
-- floating point ops
|
|
fst_1 = "ff:DDD0r|xd:D92m|xq:nDD2m",
|
|
fstp_1 = "ff:DDD8r|xd:D93m|xq:nDD3m|xt:DB7m",
|
|
fld_1 = "ff:D9C0r|xd:D90m|xq:nDD0m|xt:DB5m",
|
|
|
|
fpop_0 = "DDD8", -- Alias for fstp st0.
|
|
|
|
fist_1 = "xw:nDF2m|xd:DB2m",
|
|
fistp_1 = "xw:nDF3m|xd:DB3m|xq:nDF7m",
|
|
fild_1 = "xw:nDF0m|xd:DB0m|xq:nDF5m",
|
|
|
|
fxch_0 = "D9C9",
|
|
fxch_1 = "ff:D9C8r",
|
|
fxch_2 = "fFf:D9C8r|Fff:D9C8R",
|
|
|
|
fucom_1 = "ff:DDE0r",
|
|
fucom_2 = "Fff:DDE0R",
|
|
fucomp_1 = "ff:DDE8r",
|
|
fucomp_2 = "Fff:DDE8R",
|
|
fucomi_1 = "ff:DBE8r", -- P6+
|
|
fucomi_2 = "Fff:DBE8R", -- P6+
|
|
fucomip_1 = "ff:DFE8r", -- P6+
|
|
fucomip_2 = "Fff:DFE8R", -- P6+
|
|
fcomi_1 = "ff:DBF0r", -- P6+
|
|
fcomi_2 = "Fff:DBF0R", -- P6+
|
|
fcomip_1 = "ff:DFF0r", -- P6+
|
|
fcomip_2 = "Fff:DFF0R", -- P6+
|
|
fucompp_0 = "DAE9",
|
|
fcompp_0 = "DED9",
|
|
|
|
fldenv_1 = "x.:D94m",
|
|
fnstenv_1 = "x.:D96m",
|
|
fstenv_1 = "x.:9BD96m",
|
|
fldcw_1 = "xw:nD95m",
|
|
fstcw_1 = "xw:n9BD97m",
|
|
fnstcw_1 = "xw:nD97m",
|
|
fstsw_1 = "Rw:n9BDFE0|xw:n9BDD7m",
|
|
fnstsw_1 = "Rw:nDFE0|xw:nDD7m",
|
|
fclex_0 = "9BDBE2",
|
|
fnclex_0 = "DBE2",
|
|
|
|
fnop_0 = "D9D0",
|
|
-- D9D1-D9DF: unassigned
|
|
|
|
fchs_0 = "D9E0",
|
|
fabs_0 = "D9E1",
|
|
-- D9E2: unassigned
|
|
-- D9E3: unassigned
|
|
ftst_0 = "D9E4",
|
|
fxam_0 = "D9E5",
|
|
-- D9E6: unassigned
|
|
-- D9E7: unassigned
|
|
fld1_0 = "D9E8",
|
|
fldl2t_0 = "D9E9",
|
|
fldl2e_0 = "D9EA",
|
|
fldpi_0 = "D9EB",
|
|
fldlg2_0 = "D9EC",
|
|
fldln2_0 = "D9ED",
|
|
fldz_0 = "D9EE",
|
|
-- D9EF: unassigned
|
|
|
|
f2xm1_0 = "D9F0",
|
|
fyl2x_0 = "D9F1",
|
|
fptan_0 = "D9F2",
|
|
fpatan_0 = "D9F3",
|
|
fxtract_0 = "D9F4",
|
|
fprem1_0 = "D9F5",
|
|
fdecstp_0 = "D9F6",
|
|
fincstp_0 = "D9F7",
|
|
fprem_0 = "D9F8",
|
|
fyl2xp1_0 = "D9F9",
|
|
fsqrt_0 = "D9FA",
|
|
fsincos_0 = "D9FB",
|
|
frndint_0 = "D9FC",
|
|
fscale_0 = "D9FD",
|
|
fsin_0 = "D9FE",
|
|
fcos_0 = "D9FF",
|
|
|
|
-- SSE, SSE2
|
|
andnpd_2 = "rmo:660F55rM",
|
|
andnps_2 = "rmo:0F55rM",
|
|
andpd_2 = "rmo:660F54rM",
|
|
andps_2 = "rmo:0F54rM",
|
|
clflush_1 = "x.:0FAE7m",
|
|
cmppd_3 = "rmio:660FC2rMU",
|
|
cmpps_3 = "rmio:0FC2rMU",
|
|
cmpsd_3 = "rrio:F20FC2rMU|rxi/oq:",
|
|
cmpss_3 = "rrio:F30FC2rMU|rxi/od:",
|
|
comisd_2 = "rro:660F2FrM|rx/oq:",
|
|
comiss_2 = "rro:0F2FrM|rx/od:",
|
|
cvtdq2pd_2 = "rro:F30FE6rM|rx/oq:",
|
|
cvtdq2ps_2 = "rmo:0F5BrM",
|
|
cvtpd2dq_2 = "rmo:F20FE6rM",
|
|
cvtpd2ps_2 = "rmo:660F5ArM",
|
|
cvtpi2pd_2 = "rx/oq:660F2ArM",
|
|
cvtpi2ps_2 = "rx/oq:0F2ArM",
|
|
cvtps2dq_2 = "rmo:660F5BrM",
|
|
cvtps2pd_2 = "rro:0F5ArM|rx/oq:",
|
|
cvtsd2si_2 = "rr/do:F20F2DrM|rr/qo:|rx/dq:|rxq:",
|
|
cvtsd2ss_2 = "rro:F20F5ArM|rx/oq:",
|
|
cvtsi2sd_2 = "rm/od:F20F2ArM|rm/oq:F20F2ArXM",
|
|
cvtsi2ss_2 = "rm/od:F30F2ArM|rm/oq:F30F2ArXM",
|
|
cvtss2sd_2 = "rro:F30F5ArM|rx/od:",
|
|
cvtss2si_2 = "rr/do:F30F2DrM|rr/qo:|rxd:|rx/qd:",
|
|
cvttpd2dq_2 = "rmo:660FE6rM",
|
|
cvttps2dq_2 = "rmo:F30F5BrM",
|
|
cvttsd2si_2 = "rr/do:F20F2CrM|rr/qo:|rx/dq:|rxq:",
|
|
cvttss2si_2 = "rr/do:F30F2CrM|rr/qo:|rxd:|rx/qd:",
|
|
fxsave_1 = "x.:0FAE0m",
|
|
fxrstor_1 = "x.:0FAE1m",
|
|
ldmxcsr_1 = "xd:0FAE2m",
|
|
lfence_0 = "0FAEE8",
|
|
maskmovdqu_2 = "rro:660FF7rM",
|
|
mfence_0 = "0FAEF0",
|
|
movapd_2 = "rmo:660F28rM|mro:660F29Rm",
|
|
movaps_2 = "rmo:0F28rM|mro:0F29Rm",
|
|
movd_2 = "rm/od:660F6ErM|rm/oq:660F6ErXM|mr/do:660F7ERm|mr/qo:",
|
|
movdqa_2 = "rmo:660F6FrM|mro:660F7FRm",
|
|
movdqu_2 = "rmo:F30F6FrM|mro:F30F7FRm",
|
|
movhlps_2 = "rro:0F12rM",
|
|
movhpd_2 = "rx/oq:660F16rM|xr/qo:n660F17Rm",
|
|
movhps_2 = "rx/oq:0F16rM|xr/qo:n0F17Rm",
|
|
movlhps_2 = "rro:0F16rM",
|
|
movlpd_2 = "rx/oq:660F12rM|xr/qo:n660F13Rm",
|
|
movlps_2 = "rx/oq:0F12rM|xr/qo:n0F13Rm",
|
|
movmskpd_2 = "rr/do:660F50rM",
|
|
movmskps_2 = "rr/do:0F50rM",
|
|
movntdq_2 = "xro:660FE7Rm",
|
|
movnti_2 = "xrqd:0FC3Rm",
|
|
movntpd_2 = "xro:660F2BRm",
|
|
movntps_2 = "xro:0F2BRm",
|
|
movq_2 = "rro:F30F7ErM|rx/oq:|xr/qo:n660FD6Rm",
|
|
movsd_2 = "rro:F20F10rM|rx/oq:|xr/qo:nF20F11Rm",
|
|
movss_2 = "rro:F30F10rM|rx/od:|xr/do:F30F11Rm",
|
|
movupd_2 = "rmo:660F10rM|mro:660F11Rm",
|
|
movups_2 = "rmo:0F10rM|mro:0F11Rm",
|
|
orpd_2 = "rmo:660F56rM",
|
|
orps_2 = "rmo:0F56rM",
|
|
pause_0 = "F390",
|
|
pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nRmU", -- Mem op: SSE4.1 only.
|
|
pinsrw_3 = "rri/od:660FC4rMU|rxi/ow:",
|
|
pmovmskb_2 = "rr/do:660FD7rM",
|
|
prefetchnta_1 = "xb:n0F180m",
|
|
prefetcht0_1 = "xb:n0F181m",
|
|
prefetcht1_1 = "xb:n0F182m",
|
|
prefetcht2_1 = "xb:n0F183m",
|
|
pshufd_3 = "rmio:660F70rMU",
|
|
pshufhw_3 = "rmio:F30F70rMU",
|
|
pshuflw_3 = "rmio:F20F70rMU",
|
|
pslld_2 = "rmo:660FF2rM|rio:660F726mU",
|
|
pslldq_2 = "rio:660F737mU",
|
|
psllq_2 = "rmo:660FF3rM|rio:660F736mU",
|
|
psllw_2 = "rmo:660FF1rM|rio:660F716mU",
|
|
psrad_2 = "rmo:660FE2rM|rio:660F724mU",
|
|
psraw_2 = "rmo:660FE1rM|rio:660F714mU",
|
|
psrld_2 = "rmo:660FD2rM|rio:660F722mU",
|
|
psrldq_2 = "rio:660F733mU",
|
|
psrlq_2 = "rmo:660FD3rM|rio:660F732mU",
|
|
psrlw_2 = "rmo:660FD1rM|rio:660F712mU",
|
|
rcpps_2 = "rmo:0F53rM",
|
|
rcpss_2 = "rro:F30F53rM|rx/od:",
|
|
rsqrtps_2 = "rmo:0F52rM",
|
|
rsqrtss_2 = "rmo:F30F52rM",
|
|
sfence_0 = "0FAEF8",
|
|
shufpd_3 = "rmio:660FC6rMU",
|
|
shufps_3 = "rmio:0FC6rMU",
|
|
stmxcsr_1 = "xd:0FAE3m",
|
|
ucomisd_2 = "rro:660F2ErM|rx/oq:",
|
|
ucomiss_2 = "rro:0F2ErM|rx/od:",
|
|
unpckhpd_2 = "rmo:660F15rM",
|
|
unpckhps_2 = "rmo:0F15rM",
|
|
unpcklpd_2 = "rmo:660F14rM",
|
|
unpcklps_2 = "rmo:0F14rM",
|
|
xorpd_2 = "rmo:660F57rM",
|
|
xorps_2 = "rmo:0F57rM",
|
|
|
|
-- SSE3 ops
|
|
fisttp_1 = "xw:nDF1m|xd:DB1m|xq:nDD1m",
|
|
addsubpd_2 = "rmo:660FD0rM",
|
|
addsubps_2 = "rmo:F20FD0rM",
|
|
haddpd_2 = "rmo:660F7CrM",
|
|
haddps_2 = "rmo:F20F7CrM",
|
|
hsubpd_2 = "rmo:660F7DrM",
|
|
hsubps_2 = "rmo:F20F7DrM",
|
|
lddqu_2 = "rxo:F20FF0rM",
|
|
movddup_2 = "rmo:F20F12rM",
|
|
movshdup_2 = "rmo:F30F16rM",
|
|
movsldup_2 = "rmo:F30F12rM",
|
|
|
|
-- SSSE3 ops
|
|
pabsb_2 = "rmo:660F381CrM",
|
|
pabsd_2 = "rmo:660F381ErM",
|
|
pabsw_2 = "rmo:660F381DrM",
|
|
palignr_3 = "rmio:660F3A0FrMU",
|
|
phaddd_2 = "rmo:660F3802rM",
|
|
phaddsw_2 = "rmo:660F3803rM",
|
|
phaddw_2 = "rmo:660F3801rM",
|
|
phsubd_2 = "rmo:660F3806rM",
|
|
phsubsw_2 = "rmo:660F3807rM",
|
|
phsubw_2 = "rmo:660F3805rM",
|
|
pmaddubsw_2 = "rmo:660F3804rM",
|
|
pmulhrsw_2 = "rmo:660F380BrM",
|
|
pshufb_2 = "rmo:660F3800rM",
|
|
psignb_2 = "rmo:660F3808rM",
|
|
psignd_2 = "rmo:660F380ArM",
|
|
psignw_2 = "rmo:660F3809rM",
|
|
|
|
-- SSE4.1 ops
|
|
blendpd_3 = "rmio:660F3A0DrMU",
|
|
blendps_3 = "rmio:660F3A0CrMU",
|
|
blendvpd_3 = "rmRo:660F3815rM",
|
|
blendvps_3 = "rmRo:660F3814rM",
|
|
dppd_3 = "rmio:660F3A41rMU",
|
|
dpps_3 = "rmio:660F3A40rMU",
|
|
extractps_3 = "mri/do:660F3A17RmU|rri/qo:660F3A17RXmU",
|
|
insertps_3 = "rrio:660F3A41rMU|rxi/od:",
|
|
movntdqa_2 = "rxo:660F382ArM",
|
|
mpsadbw_3 = "rmio:660F3A42rMU",
|
|
packusdw_2 = "rmo:660F382BrM",
|
|
pblendvb_3 = "rmRo:660F3810rM",
|
|
pblendw_3 = "rmio:660F3A0ErMU",
|
|
pcmpeqq_2 = "rmo:660F3829rM",
|
|
pextrb_3 = "rri/do:660F3A14nRmU|rri/qo:|xri/bo:",
|
|
pextrd_3 = "mri/do:660F3A16RmU",
|
|
pextrq_3 = "mri/qo:660F3A16RmU",
|
|
-- pextrw is SSE2, mem operand is SSE4.1 only
|
|
phminposuw_2 = "rmo:660F3841rM",
|
|
pinsrb_3 = "rri/od:660F3A20nrMU|rxi/ob:",
|
|
pinsrd_3 = "rmi/od:660F3A22rMU",
|
|
pinsrq_3 = "rmi/oq:660F3A22rXMU",
|
|
pmaxsb_2 = "rmo:660F383CrM",
|
|
pmaxsd_2 = "rmo:660F383DrM",
|
|
pmaxud_2 = "rmo:660F383FrM",
|
|
pmaxuw_2 = "rmo:660F383ErM",
|
|
pminsb_2 = "rmo:660F3838rM",
|
|
pminsd_2 = "rmo:660F3839rM",
|
|
pminud_2 = "rmo:660F383BrM",
|
|
pminuw_2 = "rmo:660F383ArM",
|
|
pmovsxbd_2 = "rro:660F3821rM|rx/od:",
|
|
pmovsxbq_2 = "rro:660F3822rM|rx/ow:",
|
|
pmovsxbw_2 = "rro:660F3820rM|rx/oq:",
|
|
pmovsxdq_2 = "rro:660F3825rM|rx/oq:",
|
|
pmovsxwd_2 = "rro:660F3823rM|rx/oq:",
|
|
pmovsxwq_2 = "rro:660F3824rM|rx/od:",
|
|
pmovzxbd_2 = "rro:660F3831rM|rx/od:",
|
|
pmovzxbq_2 = "rro:660F3832rM|rx/ow:",
|
|
pmovzxbw_2 = "rro:660F3830rM|rx/oq:",
|
|
pmovzxdq_2 = "rro:660F3835rM|rx/oq:",
|
|
pmovzxwd_2 = "rro:660F3833rM|rx/oq:",
|
|
pmovzxwq_2 = "rro:660F3834rM|rx/od:",
|
|
pmuldq_2 = "rmo:660F3828rM",
|
|
pmulld_2 = "rmo:660F3840rM",
|
|
ptest_2 = "rmo:660F3817rM",
|
|
roundpd_3 = "rmio:660F3A09rMU",
|
|
roundps_3 = "rmio:660F3A08rMU",
|
|
roundsd_3 = "rrio:660F3A0BrMU|rxi/oq:",
|
|
roundss_3 = "rrio:660F3A0ArMU|rxi/od:",
|
|
|
|
-- SSE4.2 ops
|
|
crc32_2 = "rmqd:F20F38F1rM|rm/dw:66F20F38F1rM|rm/db:F20F38F0rM|rm/qb:",
|
|
pcmpestri_3 = "rmio:660F3A61rMU",
|
|
pcmpestrm_3 = "rmio:660F3A60rMU",
|
|
pcmpgtq_2 = "rmo:660F3837rM",
|
|
pcmpistri_3 = "rmio:660F3A63rMU",
|
|
pcmpistrm_3 = "rmio:660F3A62rMU",
|
|
popcnt_2 = "rmqdw:F30FB8rM",
|
|
|
|
-- SSE4a
|
|
extrq_2 = "rro:660F79rM",
|
|
extrq_3 = "riio:660F780mUU",
|
|
insertq_2 = "rro:F20F79rM",
|
|
insertq_4 = "rriio:F20F78rMUU",
|
|
lzcnt_2 = "rmqdw:F30FBDrM",
|
|
movntsd_2 = "xr/qo:nF20F2BRm",
|
|
movntss_2 = "xr/do:F30F2BRm",
|
|
-- popcnt is also in SSE4.2
|
|
|
|
-- AES-NI
|
|
aesdec_2 = "rmo:660F38DErM",
|
|
aesdeclast_2 = "rmo:660F38DFrM",
|
|
aesenc_2 = "rmo:660F38DCrM",
|
|
aesenclast_2 = "rmo:660F38DDrM",
|
|
aesimc_2 = "rmo:660F38DBrM",
|
|
aeskeygenassist_3 = "rmio:660F3ADFrMU",
|
|
pclmulqdq_3 = "rmio:660F3A44rMU",
|
|
|
|
-- AVX FP ops
|
|
vaddsubpd_3 = "rrmoy:660FVD0rM",
|
|
vaddsubps_3 = "rrmoy:F20FVD0rM",
|
|
vandpd_3 = "rrmoy:660FV54rM",
|
|
vandps_3 = "rrmoy:0FV54rM",
|
|
vandnpd_3 = "rrmoy:660FV55rM",
|
|
vandnps_3 = "rrmoy:0FV55rM",
|
|
vblendpd_4 = "rrmioy:660F3AV0DrMU",
|
|
vblendps_4 = "rrmioy:660F3AV0CrMU",
|
|
vblendvpd_4 = "rrmroy:660F3AV4BrMs",
|
|
vblendvps_4 = "rrmroy:660F3AV4ArMs",
|
|
vbroadcastf128_2 = "rx/yo:660F38u1ArM",
|
|
vcmppd_4 = "rrmioy:660FVC2rMU",
|
|
vcmpps_4 = "rrmioy:0FVC2rMU",
|
|
vcmpsd_4 = "rrrio:F20FVC2rMU|rrxi/ooq:",
|
|
vcmpss_4 = "rrrio:F30FVC2rMU|rrxi/ood:",
|
|
vcomisd_2 = "rro:660Fu2FrM|rx/oq:",
|
|
vcomiss_2 = "rro:0Fu2FrM|rx/od:",
|
|
vcvtdq2pd_2 = "rro:F30FuE6rM|rx/oq:|rm/yo:",
|
|
vcvtdq2ps_2 = "rmoy:0Fu5BrM",
|
|
vcvtpd2dq_2 = "rmoy:F20FuE6rM",
|
|
vcvtpd2ps_2 = "rmoy:660Fu5ArM",
|
|
vcvtps2dq_2 = "rmoy:660Fu5BrM",
|
|
vcvtps2pd_2 = "rro:0Fu5ArM|rx/oq:|rm/yo:",
|
|
vcvtsd2si_2 = "rr/do:F20Fu2DrM|rx/dq:|rr/qo:|rxq:",
|
|
vcvtsd2ss_3 = "rrro:F20FV5ArM|rrx/ooq:",
|
|
vcvtsi2sd_3 = "rrm/ood:F20FV2ArM|rrm/ooq:F20FVX2ArM",
|
|
vcvtsi2ss_3 = "rrm/ood:F30FV2ArM|rrm/ooq:F30FVX2ArM",
|
|
vcvtss2sd_3 = "rrro:F30FV5ArM|rrx/ood:",
|
|
vcvtss2si_2 = "rr/do:F30Fu2DrM|rxd:|rr/qo:|rx/qd:",
|
|
vcvttpd2dq_2 = "rmo:660FuE6rM|rm/oy:660FuLE6rM",
|
|
vcvttps2dq_2 = "rmoy:F30Fu5BrM",
|
|
vcvttsd2si_2 = "rr/do:F20Fu2CrM|rx/dq:|rr/qo:|rxq:",
|
|
vcvttss2si_2 = "rr/do:F30Fu2CrM|rxd:|rr/qo:|rx/qd:",
|
|
vdppd_4 = "rrmio:660F3AV41rMU",
|
|
vdpps_4 = "rrmioy:660F3AV40rMU",
|
|
vextractf128_3 = "mri/oy:660F3AuL19RmU",
|
|
vextractps_3 = "mri/do:660F3Au17RmU",
|
|
vhaddpd_3 = "rrmoy:660FV7CrM",
|
|
vhaddps_3 = "rrmoy:F20FV7CrM",
|
|
vhsubpd_3 = "rrmoy:660FV7DrM",
|
|
vhsubps_3 = "rrmoy:F20FV7DrM",
|
|
vinsertf128_4 = "rrmi/yyo:660F3AV18rMU",
|
|
vinsertps_4 = "rrrio:660F3AV21rMU|rrxi/ood:",
|
|
vldmxcsr_1 = "xd:0FuAE2m",
|
|
vmaskmovps_3 = "rrxoy:660F38V2CrM|xrroy:660F38V2ERm",
|
|
vmaskmovpd_3 = "rrxoy:660F38V2DrM|xrroy:660F38V2FRm",
|
|
vmovapd_2 = "rmoy:660Fu28rM|mroy:660Fu29Rm",
|
|
vmovaps_2 = "rmoy:0Fu28rM|mroy:0Fu29Rm",
|
|
vmovd_2 = "rm/od:660Fu6ErM|rm/oq:660FuX6ErM|mr/do:660Fu7ERm|mr/qo:",
|
|
vmovq_2 = "rro:F30Fu7ErM|rx/oq:|xr/qo:660FuD6Rm",
|
|
vmovddup_2 = "rmy:F20Fu12rM|rro:|rx/oq:",
|
|
vmovhlps_3 = "rrro:0FV12rM",
|
|
vmovhpd_2 = "xr/qo:660Fu17Rm",
|
|
vmovhpd_3 = "rrx/ooq:660FV16rM",
|
|
vmovhps_2 = "xr/qo:0Fu17Rm",
|
|
vmovhps_3 = "rrx/ooq:0FV16rM",
|
|
vmovlhps_3 = "rrro:0FV16rM",
|
|
vmovlpd_2 = "xr/qo:660Fu13Rm",
|
|
vmovlpd_3 = "rrx/ooq:660FV12rM",
|
|
vmovlps_2 = "xr/qo:0Fu13Rm",
|
|
vmovlps_3 = "rrx/ooq:0FV12rM",
|
|
vmovmskpd_2 = "rr/do:660Fu50rM|rr/dy:660FuL50rM",
|
|
vmovmskps_2 = "rr/do:0Fu50rM|rr/dy:0FuL50rM",
|
|
vmovntpd_2 = "xroy:660Fu2BRm",
|
|
vmovntps_2 = "xroy:0Fu2BRm",
|
|
vmovsd_2 = "rx/oq:F20Fu10rM|xr/qo:F20Fu11Rm",
|
|
vmovsd_3 = "rrro:F20FV10rM",
|
|
vmovshdup_2 = "rmoy:F30Fu16rM",
|
|
vmovsldup_2 = "rmoy:F30Fu12rM",
|
|
vmovss_2 = "rx/od:F30Fu10rM|xr/do:F30Fu11Rm",
|
|
vmovss_3 = "rrro:F30FV10rM",
|
|
vmovupd_2 = "rmoy:660Fu10rM|mroy:660Fu11Rm",
|
|
vmovups_2 = "rmoy:0Fu10rM|mroy:0Fu11Rm",
|
|
vorpd_3 = "rrmoy:660FV56rM",
|
|
vorps_3 = "rrmoy:0FV56rM",
|
|
vpermilpd_3 = "rrmoy:660F38V0DrM|rmioy:660F3Au05rMU",
|
|
vpermilps_3 = "rrmoy:660F38V0CrM|rmioy:660F3Au04rMU",
|
|
vperm2f128_4 = "rrmiy:660F3AV06rMU",
|
|
vptestpd_2 = "rmoy:660F38u0FrM",
|
|
vptestps_2 = "rmoy:660F38u0ErM",
|
|
vrcpps_2 = "rmoy:0Fu53rM",
|
|
vrcpss_3 = "rrro:F30FV53rM|rrx/ood:",
|
|
vrsqrtps_2 = "rmoy:0Fu52rM",
|
|
vrsqrtss_3 = "rrro:F30FV52rM|rrx/ood:",
|
|
vroundpd_3 = "rmioy:660F3Au09rMU",
|
|
vroundps_3 = "rmioy:660F3Au08rMU",
|
|
vroundsd_4 = "rrrio:660F3AV0BrMU|rrxi/ooq:",
|
|
vroundss_4 = "rrrio:660F3AV0ArMU|rrxi/ood:",
|
|
vshufpd_4 = "rrmioy:660FVC6rMU",
|
|
vshufps_4 = "rrmioy:0FVC6rMU",
|
|
vsqrtps_2 = "rmoy:0Fu51rM",
|
|
vsqrtss_2 = "rro:F30Fu51rM|rx/od:",
|
|
vsqrtpd_2 = "rmoy:660Fu51rM",
|
|
vsqrtsd_2 = "rro:F20Fu51rM|rx/oq:",
|
|
vstmxcsr_1 = "xd:0FuAE3m",
|
|
vucomisd_2 = "rro:660Fu2ErM|rx/oq:",
|
|
vucomiss_2 = "rro:0Fu2ErM|rx/od:",
|
|
vunpckhpd_3 = "rrmoy:660FV15rM",
|
|
vunpckhps_3 = "rrmoy:0FV15rM",
|
|
vunpcklpd_3 = "rrmoy:660FV14rM",
|
|
vunpcklps_3 = "rrmoy:0FV14rM",
|
|
vxorpd_3 = "rrmoy:660FV57rM",
|
|
vxorps_3 = "rrmoy:0FV57rM",
|
|
vzeroall_0 = "0FuL77",
|
|
vzeroupper_0 = "0Fu77",
|
|
|
|
-- AVX2 FP ops
|
|
vbroadcastss_2 = "rx/od:660F38u18rM|rx/yd:|rro:|rr/yo:",
|
|
vbroadcastsd_2 = "rx/yq:660F38u19rM|rr/yo:",
|
|
-- *vgather* (!vsib)
|
|
vpermpd_3 = "rmiy:660F3AuX01rMU",
|
|
vpermps_3 = "rrmy:660F38V16rM",
|
|
|
|
-- AVX, AVX2 integer ops
|
|
-- In general, xmm requires AVX, ymm requires AVX2.
|
|
vaesdec_3 = "rrmo:660F38VDErM",
|
|
vaesdeclast_3 = "rrmo:660F38VDFrM",
|
|
vaesenc_3 = "rrmo:660F38VDCrM",
|
|
vaesenclast_3 = "rrmo:660F38VDDrM",
|
|
vaesimc_2 = "rmo:660F38uDBrM",
|
|
vaeskeygenassist_3 = "rmio:660F3AuDFrMU",
|
|
vlddqu_2 = "rxoy:F20FuF0rM",
|
|
vmaskmovdqu_2 = "rro:660FuF7rM",
|
|
vmovdqa_2 = "rmoy:660Fu6FrM|mroy:660Fu7FRm",
|
|
vmovdqu_2 = "rmoy:F30Fu6FrM|mroy:F30Fu7FRm",
|
|
vmovntdq_2 = "xroy:660FuE7Rm",
|
|
vmovntdqa_2 = "rxoy:660F38u2ArM",
|
|
vmpsadbw_4 = "rrmioy:660F3AV42rMU",
|
|
vpabsb_2 = "rmoy:660F38u1CrM",
|
|
vpabsd_2 = "rmoy:660F38u1ErM",
|
|
vpabsw_2 = "rmoy:660F38u1DrM",
|
|
vpackusdw_3 = "rrmoy:660F38V2BrM",
|
|
vpalignr_4 = "rrmioy:660F3AV0FrMU",
|
|
vpblendvb_4 = "rrmroy:660F3AV4CrMs",
|
|
vpblendw_4 = "rrmioy:660F3AV0ErMU",
|
|
vpclmulqdq_4 = "rrmio:660F3AV44rMU",
|
|
vpcmpeqq_3 = "rrmoy:660F38V29rM",
|
|
vpcmpestri_3 = "rmio:660F3Au61rMU",
|
|
vpcmpestrm_3 = "rmio:660F3Au60rMU",
|
|
vpcmpgtq_3 = "rrmoy:660F38V37rM",
|
|
vpcmpistri_3 = "rmio:660F3Au63rMU",
|
|
vpcmpistrm_3 = "rmio:660F3Au62rMU",
|
|
vpextrb_3 = "rri/do:660F3Au14nRmU|rri/qo:|xri/bo:",
|
|
vpextrw_3 = "rri/do:660FuC5rMU|xri/wo:660F3Au15nRmU",
|
|
vpextrd_3 = "mri/do:660F3Au16RmU",
|
|
vpextrq_3 = "mri/qo:660F3Au16RmU",
|
|
vphaddw_3 = "rrmoy:660F38V01rM",
|
|
vphaddd_3 = "rrmoy:660F38V02rM",
|
|
vphaddsw_3 = "rrmoy:660F38V03rM",
|
|
vphminposuw_2 = "rmo:660F38u41rM",
|
|
vphsubw_3 = "rrmoy:660F38V05rM",
|
|
vphsubd_3 = "rrmoy:660F38V06rM",
|
|
vphsubsw_3 = "rrmoy:660F38V07rM",
|
|
vpinsrb_4 = "rrri/ood:660F3AV20rMU|rrxi/oob:",
|
|
vpinsrw_4 = "rrri/ood:660FVC4rMU|rrxi/oow:",
|
|
vpinsrd_4 = "rrmi/ood:660F3AV22rMU",
|
|
vpinsrq_4 = "rrmi/ooq:660F3AVX22rMU",
|
|
vpmaddubsw_3 = "rrmoy:660F38V04rM",
|
|
vpmaxsb_3 = "rrmoy:660F38V3CrM",
|
|
vpmaxsd_3 = "rrmoy:660F38V3DrM",
|
|
vpmaxuw_3 = "rrmoy:660F38V3ErM",
|
|
vpmaxud_3 = "rrmoy:660F38V3FrM",
|
|
vpminsb_3 = "rrmoy:660F38V38rM",
|
|
vpminsd_3 = "rrmoy:660F38V39rM",
|
|
vpminuw_3 = "rrmoy:660F38V3ArM",
|
|
vpminud_3 = "rrmoy:660F38V3BrM",
|
|
vpmovmskb_2 = "rr/do:660FuD7rM|rr/dy:660FuLD7rM",
|
|
vpmovsxbw_2 = "rroy:660F38u20rM|rx/oq:|rx/yo:",
|
|
vpmovsxbd_2 = "rroy:660F38u21rM|rx/od:|rx/yq:",
|
|
vpmovsxbq_2 = "rroy:660F38u22rM|rx/ow:|rx/yd:",
|
|
vpmovsxwd_2 = "rroy:660F38u23rM|rx/oq:|rx/yo:",
|
|
vpmovsxwq_2 = "rroy:660F38u24rM|rx/od:|rx/yq:",
|
|
vpmovsxdq_2 = "rroy:660F38u25rM|rx/oq:|rx/yo:",
|
|
vpmovzxbw_2 = "rroy:660F38u30rM|rx/oq:|rx/yo:",
|
|
vpmovzxbd_2 = "rroy:660F38u31rM|rx/od:|rx/yq:",
|
|
vpmovzxbq_2 = "rroy:660F38u32rM|rx/ow:|rx/yd:",
|
|
vpmovzxwd_2 = "rroy:660F38u33rM|rx/oq:|rx/yo:",
|
|
vpmovzxwq_2 = "rroy:660F38u34rM|rx/od:|rx/yq:",
|
|
vpmovzxdq_2 = "rroy:660F38u35rM|rx/oq:|rx/yo:",
|
|
vpmuldq_3 = "rrmoy:660F38V28rM",
|
|
vpmulhrsw_3 = "rrmoy:660F38V0BrM",
|
|
vpmulld_3 = "rrmoy:660F38V40rM",
|
|
vpshufb_3 = "rrmoy:660F38V00rM",
|
|
vpshufd_3 = "rmioy:660Fu70rMU",
|
|
vpshufhw_3 = "rmioy:F30Fu70rMU",
|
|
vpshuflw_3 = "rmioy:F20Fu70rMU",
|
|
vpsignb_3 = "rrmoy:660F38V08rM",
|
|
vpsignw_3 = "rrmoy:660F38V09rM",
|
|
vpsignd_3 = "rrmoy:660F38V0ArM",
|
|
vpslldq_3 = "rrioy:660Fv737mU",
|
|
vpsllw_3 = "rrmoy:660FVF1rM|rrioy:660Fv716mU",
|
|
vpslld_3 = "rrmoy:660FVF2rM|rrioy:660Fv726mU",
|
|
vpsllq_3 = "rrmoy:660FVF3rM|rrioy:660Fv736mU",
|
|
vpsraw_3 = "rrmoy:660FVE1rM|rrioy:660Fv714mU",
|
|
vpsrad_3 = "rrmoy:660FVE2rM|rrioy:660Fv724mU",
|
|
vpsrldq_3 = "rrioy:660Fv733mU",
|
|
vpsrlw_3 = "rrmoy:660FVD1rM|rrioy:660Fv712mU",
|
|
vpsrld_3 = "rrmoy:660FVD2rM|rrioy:660Fv722mU",
|
|
vpsrlq_3 = "rrmoy:660FVD3rM|rrioy:660Fv732mU",
|
|
vptest_2 = "rmoy:660F38u17rM",
|
|
|
|
-- AVX2 integer ops
|
|
vbroadcasti128_2 = "rx/yo:660F38u5ArM",
|
|
vinserti128_4 = "rrmi/yyo:660F3AV38rMU",
|
|
vextracti128_3 = "mri/oy:660F3AuL39RmU",
|
|
vpblendd_4 = "rrmioy:660F3AV02rMU",
|
|
vpbroadcastb_2 = "rro:660F38u78rM|rx/ob:|rr/yo:|rx/yb:",
|
|
vpbroadcastw_2 = "rro:660F38u79rM|rx/ow:|rr/yo:|rx/yw:",
|
|
vpbroadcastd_2 = "rro:660F38u58rM|rx/od:|rr/yo:|rx/yd:",
|
|
vpbroadcastq_2 = "rro:660F38u59rM|rx/oq:|rr/yo:|rx/yq:",
|
|
vpermd_3 = "rrmy:660F38V36rM",
|
|
vpermq_3 = "rmiy:660F3AuX00rMU",
|
|
-- *vpgather* (!vsib)
|
|
vperm2i128_4 = "rrmiy:660F3AV46rMU",
|
|
vpmaskmovd_3 = "rrxoy:660F38V8CrM|xrroy:660F38V8ERm",
|
|
vpmaskmovq_3 = "rrxoy:660F38VX8CrM|xrroy:660F38VX8ERm",
|
|
vpsllvd_3 = "rrmoy:660F38V47rM",
|
|
vpsllvq_3 = "rrmoy:660F38VX47rM",
|
|
vpsravd_3 = "rrmoy:660F38V46rM",
|
|
vpsrlvd_3 = "rrmoy:660F38V45rM",
|
|
vpsrlvq_3 = "rrmoy:660F38VX45rM",
|
|
|
|
-- Intel ADX
|
|
adcx_2 = "rmqd:660F38F6rM",
|
|
adox_2 = "rmqd:F30F38F6rM",
|
|
|
|
-- BMI1
|
|
andn_3 = "rrmqd:0F38VF2rM",
|
|
bextr_3 = "rmrqd:0F38wF7rM",
|
|
blsi_2 = "rmqd:0F38vF33m",
|
|
blsmsk_2 = "rmqd:0F38vF32m",
|
|
blsr_2 = "rmqd:0F38vF31m",
|
|
tzcnt_2 = "rmqdw:F30FBCrM",
|
|
|
|
-- BMI2
|
|
bzhi_3 = "rmrqd:0F38wF5rM",
|
|
mulx_3 = "rrmqd:F20F38VF6rM",
|
|
pdep_3 = "rrmqd:F20F38VF5rM",
|
|
pext_3 = "rrmqd:F30F38VF5rM",
|
|
rorx_3 = "rmSqd:F20F3AuF0rMS",
|
|
sarx_3 = "rmrqd:F30F38wF7rM",
|
|
shrx_3 = "rmrqd:F20F38wF7rM",
|
|
shlx_3 = "rmrqd:660F38wF7rM",
|
|
|
|
-- FMA3
|
|
vfmaddsub132pd_3 = "rrmoy:660F38VX96rM",
|
|
vfmaddsub132ps_3 = "rrmoy:660F38V96rM",
|
|
vfmaddsub213pd_3 = "rrmoy:660F38VXA6rM",
|
|
vfmaddsub213ps_3 = "rrmoy:660F38VA6rM",
|
|
vfmaddsub231pd_3 = "rrmoy:660F38VXB6rM",
|
|
vfmaddsub231ps_3 = "rrmoy:660F38VB6rM",
|
|
|
|
vfmsubadd132pd_3 = "rrmoy:660F38VX97rM",
|
|
vfmsubadd132ps_3 = "rrmoy:660F38V97rM",
|
|
vfmsubadd213pd_3 = "rrmoy:660F38VXA7rM",
|
|
vfmsubadd213ps_3 = "rrmoy:660F38VA7rM",
|
|
vfmsubadd231pd_3 = "rrmoy:660F38VXB7rM",
|
|
vfmsubadd231ps_3 = "rrmoy:660F38VB7rM",
|
|
|
|
vfmadd132pd_3 = "rrmoy:660F38VX98rM",
|
|
vfmadd132ps_3 = "rrmoy:660F38V98rM",
|
|
vfmadd132sd_3 = "rrro:660F38VX99rM|rrx/ooq:",
|
|
vfmadd132ss_3 = "rrro:660F38V99rM|rrx/ood:",
|
|
vfmadd213pd_3 = "rrmoy:660F38VXA8rM",
|
|
vfmadd213ps_3 = "rrmoy:660F38VA8rM",
|
|
vfmadd213sd_3 = "rrro:660F38VXA9rM|rrx/ooq:",
|
|
vfmadd213ss_3 = "rrro:660F38VA9rM|rrx/ood:",
|
|
vfmadd231pd_3 = "rrmoy:660F38VXB8rM",
|
|
vfmadd231ps_3 = "rrmoy:660F38VB8rM",
|
|
vfmadd231sd_3 = "rrro:660F38VXB9rM|rrx/ooq:",
|
|
vfmadd231ss_3 = "rrro:660F38VB9rM|rrx/ood:",
|
|
|
|
vfmsub132pd_3 = "rrmoy:660F38VX9ArM",
|
|
vfmsub132ps_3 = "rrmoy:660F38V9ArM",
|
|
vfmsub132sd_3 = "rrro:660F38VX9BrM|rrx/ooq:",
|
|
vfmsub132ss_3 = "rrro:660F38V9BrM|rrx/ood:",
|
|
vfmsub213pd_3 = "rrmoy:660F38VXAArM",
|
|
vfmsub213ps_3 = "rrmoy:660F38VAArM",
|
|
vfmsub213sd_3 = "rrro:660F38VXABrM|rrx/ooq:",
|
|
vfmsub213ss_3 = "rrro:660F38VABrM|rrx/ood:",
|
|
vfmsub231pd_3 = "rrmoy:660F38VXBArM",
|
|
vfmsub231ps_3 = "rrmoy:660F38VBArM",
|
|
vfmsub231sd_3 = "rrro:660F38VXBBrM|rrx/ooq:",
|
|
vfmsub231ss_3 = "rrro:660F38VBBrM|rrx/ood:",
|
|
|
|
vfnmadd132pd_3 = "rrmoy:660F38VX9CrM",
|
|
vfnmadd132ps_3 = "rrmoy:660F38V9CrM",
|
|
vfnmadd132sd_3 = "rrro:660F38VX9DrM|rrx/ooq:",
|
|
vfnmadd132ss_3 = "rrro:660F38V9DrM|rrx/ood:",
|
|
vfnmadd213pd_3 = "rrmoy:660F38VXACrM",
|
|
vfnmadd213ps_3 = "rrmoy:660F38VACrM",
|
|
vfnmadd213sd_3 = "rrro:660F38VXADrM|rrx/ooq:",
|
|
vfnmadd213ss_3 = "rrro:660F38VADrM|rrx/ood:",
|
|
vfnmadd231pd_3 = "rrmoy:660F38VXBCrM",
|
|
vfnmadd231ps_3 = "rrmoy:660F38VBCrM",
|
|
vfnmadd231sd_3 = "rrro:660F38VXBDrM|rrx/ooq:",
|
|
vfnmadd231ss_3 = "rrro:660F38VBDrM|rrx/ood:",
|
|
|
|
vfnmsub132pd_3 = "rrmoy:660F38VX9ErM",
|
|
vfnmsub132ps_3 = "rrmoy:660F38V9ErM",
|
|
vfnmsub132sd_3 = "rrro:660F38VX9FrM|rrx/ooq:",
|
|
vfnmsub132ss_3 = "rrro:660F38V9FrM|rrx/ood:",
|
|
vfnmsub213pd_3 = "rrmoy:660F38VXAErM",
|
|
vfnmsub213ps_3 = "rrmoy:660F38VAErM",
|
|
vfnmsub213sd_3 = "rrro:660F38VXAFrM|rrx/ooq:",
|
|
vfnmsub213ss_3 = "rrro:660F38VAFrM|rrx/ood:",
|
|
vfnmsub231pd_3 = "rrmoy:660F38VXBErM",
|
|
vfnmsub231ps_3 = "rrmoy:660F38VBErM",
|
|
vfnmsub231sd_3 = "rrro:660F38VXBFrM|rrx/ooq:",
|
|
vfnmsub231ss_3 = "rrro:660F38VBFrM|rrx/ood:",
|
|
}
|
|
|
|
------------------------------------------------------------------------------
|
|
|
|
-- Arithmetic ops.
|
|
for name,n in pairs{ add = 0, ["or"] = 1, adc = 2, sbb = 3,
|
|
["and"] = 4, sub = 5, xor = 6, cmp = 7 } do
|
|
local n8 = shl(n, 3)
|
|
map_op[name.."_2"] = format(
|
|
"mr:%02XRm|rm:%02XrM|mI1qdw:81%XmI|mS1qdw:83%XmS|Ri1qdwb:%02Xri|mi1qdwb:81%Xmi",
|
|
1+n8, 3+n8, n, n, 5+n8, n)
|
|
end
|
|
|
|
-- Shift ops.
|
|
for name,n in pairs{ rol = 0, ror = 1, rcl = 2, rcr = 3,
|
|
shl = 4, shr = 5, sar = 7, sal = 4 } do
|
|
map_op[name.."_2"] = format("m1:D1%Xm|mC1qdwb:D3%Xm|mi:C1%XmU", n, n, n)
|
|
end
|
|
|
|
-- Conditional ops.
|
|
for cc,n in pairs(map_cc) do
|
|
map_op["j"..cc.."_1"] = format("J.:n0F8%XJ", n) -- short: 7%X
|
|
map_op["set"..cc.."_1"] = format("mb:n0F9%X2m", n)
|
|
map_op["cmov"..cc.."_2"] = format("rmqdw:0F4%XrM", n) -- P6+
|
|
end
|
|
|
|
-- FP arithmetic ops.
|
|
for name,n in pairs{ add = 0, mul = 1, com = 2, comp = 3,
|
|
sub = 4, subr = 5, div = 6, divr = 7 } do
|
|
local nc = 0xc0 + shl(n, 3)
|
|
local nr = nc + (n < 4 and 0 or (n % 2 == 0 and 8 or -8))
|
|
local fn = "f"..name
|
|
map_op[fn.."_1"] = format("ff:D8%02Xr|xd:D8%Xm|xq:nDC%Xm", nc, n, n)
|
|
if n == 2 or n == 3 then
|
|
map_op[fn.."_2"] = format("Fff:D8%02XR|Fx2d:D8%XM|Fx2q:nDC%XM", nc, n, n)
|
|
else
|
|
map_op[fn.."_2"] = format("Fff:D8%02XR|fFf:DC%02Xr|Fx2d:D8%XM|Fx2q:nDC%XM", nc, nr, n, n)
|
|
map_op[fn.."p_1"] = format("ff:DE%02Xr", nr)
|
|
map_op[fn.."p_2"] = format("fFf:DE%02Xr", nr)
|
|
end
|
|
map_op["fi"..name.."_1"] = format("xd:DA%Xm|xw:nDE%Xm", n, n)
|
|
end
|
|
|
|
-- FP conditional moves.
|
|
for cc,n in pairs{ b=0, e=1, be=2, u=3, nb=4, ne=5, nbe=6, nu=7 } do
|
|
local nc = 0xdac0 + shl(band(n, 3), 3) + shl(band(n, 4), 6)
|
|
map_op["fcmov"..cc.."_1"] = format("ff:%04Xr", nc) -- P6+
|
|
map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+
|
|
end
|
|
|
|
-- SSE / AVX FP arithmetic ops.
|
|
for name,n in pairs{ sqrt = 1, add = 8, mul = 9,
|
|
sub = 12, min = 13, div = 14, max = 15 } do
|
|
map_op[name.."ps_2"] = format("rmo:0F5%XrM", n)
|
|
map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n)
|
|
map_op[name.."pd_2"] = format("rmo:660F5%XrM", n)
|
|
map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n)
|
|
if n ~= 1 then
|
|
map_op["v"..name.."ps_3"] = format("rrmoy:0FV5%XrM", n)
|
|
map_op["v"..name.."ss_3"] = format("rrro:F30FV5%XrM|rrx/ood:", n)
|
|
map_op["v"..name.."pd_3"] = format("rrmoy:660FV5%XrM", n)
|
|
map_op["v"..name.."sd_3"] = format("rrro:F20FV5%XrM|rrx/ooq:", n)
|
|
end
|
|
end
|
|
|
|
-- SSE2 / AVX / AVX2 integer arithmetic ops (66 0F leaf).
|
|
for name,n in pairs{
|
|
paddb = 0xFC, paddw = 0xFD, paddd = 0xFE, paddq = 0xD4,
|
|
paddsb = 0xEC, paddsw = 0xED, packssdw = 0x6B,
|
|
packsswb = 0x63, packuswb = 0x67, paddusb = 0xDC,
|
|
paddusw = 0xDD, pand = 0xDB, pandn = 0xDF, pavgb = 0xE0,
|
|
pavgw = 0xE3, pcmpeqb = 0x74, pcmpeqd = 0x76,
|
|
pcmpeqw = 0x75, pcmpgtb = 0x64, pcmpgtd = 0x66,
|
|
pcmpgtw = 0x65, pmaddwd = 0xF5, pmaxsw = 0xEE,
|
|
pmaxub = 0xDE, pminsw = 0xEA, pminub = 0xDA,
|
|
pmulhuw = 0xE4, pmulhw = 0xE5, pmullw = 0xD5,
|
|
pmuludq = 0xF4, por = 0xEB, psadbw = 0xF6, psubb = 0xF8,
|
|
psubw = 0xF9, psubd = 0xFA, psubq = 0xFB, psubsb = 0xE8,
|
|
psubsw = 0xE9, psubusb = 0xD8, psubusw = 0xD9,
|
|
punpckhbw = 0x68, punpckhwd = 0x69, punpckhdq = 0x6A,
|
|
punpckhqdq = 0x6D, punpcklbw = 0x60, punpcklwd = 0x61,
|
|
punpckldq = 0x62, punpcklqdq = 0x6C, pxor = 0xEF
|
|
} do
|
|
map_op[name.."_2"] = format("rmo:660F%02XrM", n)
|
|
map_op["v"..name.."_3"] = format("rrmoy:660FV%02XrM", n)
|
|
end
|
|
|
|
------------------------------------------------------------------------------
|
|
|
|
local map_vexarg = { u = false, v = 1, V = 2, w = 3 }
|
|
|
|
-- Process pattern string.
|
|
local function dopattern(pat, args, sz, op, needrex)
|
|
local digit, addin, vex
|
|
local opcode = 0
|
|
local szov = sz
|
|
local narg = 1
|
|
local rex = 0
|
|
|
|
-- Limit number of section buffer positions used by a single dasm_put().
|
|
-- A single opcode needs a maximum of 6 positions.
|
|
if secpos+6 > maxsecpos then wflush() end
|
|
|
|
-- Process each character.
|
|
for c in gmatch(pat.."|", ".") do
|
|
if match(c, "%x") then -- Hex digit.
|
|
digit = byte(c) - 48
|
|
if digit > 48 then digit = digit - 39
|
|
elseif digit > 16 then digit = digit - 7 end
|
|
opcode = opcode*16 + digit
|
|
addin = nil
|
|
elseif c == "n" then -- Disable operand size mods for opcode.
|
|
szov = nil
|
|
elseif c == "X" then -- Force REX.W.
|
|
rex = 8
|
|
elseif c == "L" then -- Force VEX.L.
|
|
vex.l = true
|
|
elseif c == "r" then -- Merge 1st operand regno. into opcode.
|
|
addin = args[1]; opcode = opcode + (addin.reg % 8)
|
|
if narg < 2 then narg = 2 end
|
|
elseif c == "R" then -- Merge 2nd operand regno. into opcode.
|
|
addin = args[2]; opcode = opcode + (addin.reg % 8)
|
|
narg = 3
|
|
elseif c == "m" or c == "M" then -- Encode ModRM/SIB.
|
|
local s
|
|
if addin then
|
|
s = addin.reg
|
|
opcode = opcode - band(s, 7) -- Undo regno opcode merge.
|
|
else
|
|
s = band(opcode, 15) -- Undo last digit.
|
|
opcode = shr(opcode, 4)
|
|
end
|
|
local nn = c == "m" and 1 or 2
|
|
local t = args[nn]
|
|
if narg <= nn then narg = nn + 1 end
|
|
if szov == "q" and rex == 0 then rex = rex + 8 end
|
|
if t.reg and t.reg > 7 then rex = rex + 1 end
|
|
if t.xreg and t.xreg > 7 then rex = rex + 2 end
|
|
if s > 7 then rex = rex + 4 end
|
|
if needrex then rex = rex + 16 end
|
|
local psz, sk = wputop(szov, opcode, rex, vex, s < 0, t.vreg or t.vxreg)
|
|
opcode = nil
|
|
local imark = sub(pat, -1) -- Force a mark (ugly).
|
|
-- Put ModRM/SIB with regno/last digit as spare.
|
|
wputmrmsib(t, imark, s, addin and addin.vreg, psz, sk)
|
|
addin = nil
|
|
elseif map_vexarg[c] ~= nil then -- Encode using VEX prefix
|
|
local b = band(opcode, 255); opcode = shr(opcode, 8)
|
|
local m = 1
|
|
if b == 0x38 then m = 2
|
|
elseif b == 0x3a then m = 3 end
|
|
if m ~= 1 then b = band(opcode, 255); opcode = shr(opcode, 8) end
|
|
if b ~= 0x0f then
|
|
werror("expected `0F', `0F38', or `0F3A' to precede `"..c..
|
|
"' in pattern `"..pat.."' for `"..op.."'")
|
|
end
|
|
local v = map_vexarg[c]
|
|
if v then v = remove(args, v) end
|
|
b = band(opcode, 255)
|
|
local p = 0
|
|
if b == 0x66 then p = 1
|
|
elseif b == 0xf3 then p = 2
|
|
elseif b == 0xf2 then p = 3 end
|
|
if p ~= 0 then opcode = shr(opcode, 8) end
|
|
if opcode ~= 0 then wputop(nil, opcode, 0); opcode = 0 end
|
|
vex = { m = m, p = p, v = v }
|
|
else
|
|
if opcode then -- Flush opcode.
|
|
if szov == "q" and rex == 0 then rex = rex + 8 end
|
|
if needrex then rex = rex + 16 end
|
|
if addin and addin.reg == -1 then
|
|
local psz, sk = wputop(szov, opcode - 7, rex, vex, true)
|
|
wvreg("opcode", addin.vreg, psz, sk)
|
|
else
|
|
if addin and addin.reg > 7 then rex = rex + 1 end
|
|
wputop(szov, opcode, rex, vex)
|
|
end
|
|
opcode = nil
|
|
end
|
|
if c == "|" then break end
|
|
if c == "o" then -- Offset (pure 32 bit displacement).
|
|
wputdarg(args[1].disp); if narg < 2 then narg = 2 end
|
|
elseif c == "O" then
|
|
wputdarg(args[2].disp); narg = 3
|
|
else
|
|
-- Anything else is an immediate operand.
|
|
local a = args[narg]
|
|
narg = narg + 1
|
|
local mode, imm = a.mode, a.imm
|
|
if mode == "iJ" and not match(x64 and "J" or "iIJ", c) then
|
|
werror("bad operand size for label")
|
|
end
|
|
if c == "S" then
|
|
wputsbarg(imm)
|
|
elseif c == "U" then
|
|
wputbarg(imm)
|
|
elseif c == "W" then
|
|
wputwarg(imm)
|
|
elseif c == "i" or c == "I" then
|
|
if mode == "iJ" then
|
|
wputlabel("IMM_", imm, 1)
|
|
elseif mode == "iI" and c == "I" then
|
|
waction(sz == "w" and "IMM_WB" or "IMM_DB", imm)
|
|
else
|
|
wputszarg(sz, imm)
|
|
end
|
|
elseif c == "J" then
|
|
if mode == "iPJ" then
|
|
waction("REL_A", imm) -- !x64 (secpos)
|
|
else
|
|
wputlabel("REL_", imm, 2)
|
|
end
|
|
elseif c == "s" then
|
|
local reg = a.reg
|
|
if reg < 0 then
|
|
wputb(0)
|
|
wvreg("imm.hi", a.vreg)
|
|
else
|
|
wputb(shl(reg, 4))
|
|
end
|
|
else
|
|
werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'")
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
------------------------------------------------------------------------------
|
|
|
|
-- Mapping of operand modes to short names. Suppress output with '#'.
|
|
local map_modename = {
|
|
r = "reg", R = "eax", C = "cl", x = "mem", m = "mrm", i = "imm",
|
|
f = "stx", F = "st0", J = "lbl", ["1"] = "1",
|
|
I = "#", S = "#", O = "#",
|
|
}
|
|
|
|
-- Return a table/string showing all possible operand modes.
|
|
local function templatehelp(template, nparams)
|
|
if nparams == 0 then return "" end
|
|
local t = {}
|
|
for tm in gmatch(template, "[^%|]+") do
|
|
local s = map_modename[sub(tm, 1, 1)]
|
|
s = s..gsub(sub(tm, 2, nparams), ".", function(c)
|
|
return ", "..map_modename[c]
|
|
end)
|
|
if not match(s, "#") then t[#t+1] = s end
|
|
end
|
|
return t
|
|
end
|
|
|
|
-- Match operand modes against mode match part of template.
|
|
local function matchtm(tm, args)
|
|
for i=1,#args do
|
|
if not match(args[i].mode, sub(tm, i, i)) then return end
|
|
end
|
|
return true
|
|
end
|
|
|
|
-- Handle opcodes defined with template strings.
|
|
map_op[".template__"] = function(params, template, nparams)
|
|
if not params then return templatehelp(template, nparams) end
|
|
local args = {}
|
|
|
|
-- Zero-operand opcodes have no match part.
|
|
if #params == 0 then
|
|
dopattern(template, args, "d", params.op, nil)
|
|
return
|
|
end
|
|
|
|
-- Determine common operand size (coerce undefined size) or flag as mixed.
|
|
local sz, szmix, needrex
|
|
for i,p in ipairs(params) do
|
|
args[i] = parseoperand(p)
|
|
local nsz = args[i].opsize
|
|
if nsz then
|
|
if sz and sz ~= nsz then szmix = true else sz = nsz end
|
|
end
|
|
local nrex = args[i].needrex
|
|
if nrex ~= nil then
|
|
if needrex == nil then
|
|
needrex = nrex
|
|
elseif needrex ~= nrex then
|
|
werror("bad mix of byte-addressable registers")
|
|
end
|
|
end
|
|
end
|
|
|
|
-- Try all match:pattern pairs (separated by '|').
|
|
local gotmatch, lastpat
|
|
for tm in gmatch(template, "[^%|]+") do
|
|
-- Split off size match (starts after mode match) and pattern string.
|
|
local szm, pat = match(tm, "^(.-):(.*)$", #args+1)
|
|
if pat == "" then pat = lastpat else lastpat = pat end
|
|
if matchtm(tm, args) then
|
|
local prefix = sub(szm, 1, 1)
|
|
if prefix == "/" then -- Exactly match leading operand sizes.
|
|
for i = #szm,1,-1 do
|
|
if i == 1 then
|
|
dopattern(pat, args, sz, params.op, needrex) -- Process pattern.
|
|
return
|
|
elseif args[i-1].opsize ~= sub(szm, i, i) then
|
|
break
|
|
end
|
|
end
|
|
else -- Match common operand size.
|
|
local szp = sz
|
|
if szm == "" then szm = x64 and "qdwb" or "dwb" end -- Default sizes.
|
|
if prefix == "1" then szp = args[1].opsize; szmix = nil
|
|
elseif prefix == "2" then szp = args[2].opsize; szmix = nil end
|
|
if not szmix and (prefix == "." or match(szm, szp or "#")) then
|
|
dopattern(pat, args, szp, params.op, needrex) -- Process pattern.
|
|
return
|
|
end
|
|
end
|
|
gotmatch = true
|
|
end
|
|
end
|
|
|
|
local msg = "bad operand mode"
|
|
if gotmatch then
|
|
if szmix then
|
|
msg = "mixed operand size"
|
|
else
|
|
msg = sz and "bad operand size" or "missing operand size"
|
|
end
|
|
end
|
|
|
|
werror(msg.." in `"..opmodestr(params.op, args).."'")
|
|
end
|
|
|
|
------------------------------------------------------------------------------
|
|
|
|
-- x64-specific opcode for 64 bit immediates and displacements.
|
|
if x64 then
|
|
function map_op.mov64_2(params)
|
|
if not params then return { "reg, imm", "reg, [disp]", "[disp], reg" } end
|
|
if secpos+2 > maxsecpos then wflush() end
|
|
local opcode, op64, sz, rex, vreg
|
|
local op64 = match(params[1], "^%[%s*(.-)%s*%]$")
|
|
if op64 then
|
|
local a = parseoperand(params[2])
|
|
if a.mode ~= "rmR" then werror("bad operand mode") end
|
|
sz = a.opsize
|
|
rex = sz == "q" and 8 or 0
|
|
opcode = 0xa3
|
|
else
|
|
op64 = match(params[2], "^%[%s*(.-)%s*%]$")
|
|
local a = parseoperand(params[1])
|
|
if op64 then
|
|
if a.mode ~= "rmR" then werror("bad operand mode") end
|
|
sz = a.opsize
|
|
rex = sz == "q" and 8 or 0
|
|
opcode = 0xa1
|
|
else
|
|
if sub(a.mode, 1, 1) ~= "r" or a.opsize ~= "q" then
|
|
werror("bad operand mode")
|
|
end
|
|
op64 = params[2]
|
|
if a.reg == -1 then
|
|
vreg = a.vreg
|
|
opcode = 0xb8
|
|
else
|
|
opcode = 0xb8 + band(a.reg, 7)
|
|
end
|
|
rex = a.reg > 7 and 9 or 8
|
|
end
|
|
end
|
|
local psz, sk = wputop(sz, opcode, rex, nil, vreg)
|
|
wvreg("opcode", vreg, psz, sk)
|
|
waction("IMM_D", format("(unsigned int)(%s)", op64))
|
|
waction("IMM_D", format("(unsigned int)((%s)>>32)", op64))
|
|
end
|
|
end
|
|
|
|
------------------------------------------------------------------------------
|
|
|
|
-- Pseudo-opcodes for data storage.
|
|
local function op_data(params)
|
|
if not params then return "imm..." end
|
|
local sz = sub(params.op, 2, 2)
|
|
if sz == "l" then sz = "d" elseif sz == "a" then sz = addrsize end
|
|
for _,p in ipairs(params) do
|
|
local a = parseoperand(p, sz == "q")
|
|
if sub(a.mode, 1, 1) ~= "i" or (a.opsize and a.opsize ~= sz) then
|
|
werror("bad mode or size in `"..p.."'")
|
|
end
|
|
if a.mode == "iJ" then
|
|
wputlabel("IMM_", a.imm, 1)
|
|
elseif sz == "q" then
|
|
wputqarg(a.imm)
|
|
else
|
|
wputszarg(sz, a.imm)
|
|
end
|
|
if secpos+2 > maxsecpos then wflush() end
|
|
end
|
|
end
|
|
|
|
map_op[".byte_*"] = op_data
|
|
map_op[".sbyte_*"] = op_data
|
|
map_op[".word_*"] = op_data
|
|
map_op[".dword_*"] = op_data
|
|
map_op[".qword_*"] = op_data
|
|
map_op[".aword_*"] = op_data
|
|
map_op[".long_*"] = op_data
|
|
map_op[".quad_*"] = op_data
|
|
map_op[".addr_*"] = op_data
|
|
|
|
------------------------------------------------------------------------------
|
|
|
|
-- Pseudo-opcode to mark the position where the action list is to be emitted.
|
|
map_op[".actionlist_1"] = function(params)
|
|
if not params then return "cvar" end
|
|
local name = params[1] -- No syntax check. You get to keep the pieces.
|
|
wline(function(out) writeactions(out, name) end)
|
|
end
|
|
|
|
-- Pseudo-opcode to mark the position where the global enum is to be emitted.
|
|
map_op[".globals_1"] = function(params)
|
|
if not params then return "prefix" end
|
|
local prefix = params[1] -- No syntax check. You get to keep the pieces.
|
|
wline(function(out) writeglobals(out, prefix) end)
|
|
end
|
|
|
|
-- Pseudo-opcode to mark the position where the global names are to be emitted.
|
|
map_op[".globalnames_1"] = function(params)
|
|
if not params then return "cvar" end
|
|
local name = params[1] -- No syntax check. You get to keep the pieces.
|
|
wline(function(out) writeglobalnames(out, name) end)
|
|
end
|
|
|
|
-- Pseudo-opcode to mark the position where the extern names are to be emitted.
|
|
map_op[".externnames_1"] = function(params)
|
|
if not params then return "cvar" end
|
|
local name = params[1] -- No syntax check. You get to keep the pieces.
|
|
wline(function(out) writeexternnames(out, name) end)
|
|
end
|
|
|
|
------------------------------------------------------------------------------
|
|
|
|
-- Label pseudo-opcode (converted from trailing colon form).
|
|
map_op[".label_2"] = function(params)
|
|
if not params then return "[1-9] | ->global | =>pcexpr [, addr]" end
|
|
if secpos+2 > maxsecpos then wflush() end
|
|
local a = parseoperand(params[1])
|
|
local mode, imm = a.mode, a.imm
|
|
if type(imm) == "number" and (mode == "iJ" or (imm >= 1 and imm <= 9)) then
|
|
-- Local label (1: ... 9:) or global label (->global:).
|
|
waction("LABEL_LG", nil, 1)
|
|
wputxb(imm)
|
|
elseif mode == "iJ" then
|
|
-- PC label (=>pcexpr:).
|
|
waction("LABEL_PC", imm)
|
|
else
|
|
werror("bad label definition")
|
|
end
|
|
-- SETLABEL must immediately follow LABEL_LG/LABEL_PC.
|
|
local addr = params[2]
|
|
if addr then
|
|
local a = parseoperand(addr)
|
|
if a.mode == "iPJ" then
|
|
waction("SETLABEL", a.imm)
|
|
else
|
|
werror("bad label assignment")
|
|
end
|
|
end
|
|
end
|
|
map_op[".label_1"] = map_op[".label_2"]
|
|
|
|
------------------------------------------------------------------------------
|
|
|
|
-- Alignment pseudo-opcode.
|
|
map_op[".align_1"] = function(params)
|
|
if not params then return "numpow2" end
|
|
if secpos+1 > maxsecpos then wflush() end
|
|
local align = tonumber(params[1]) or map_opsizenum[map_opsize[params[1]]]
|
|
if align then
|
|
local x = align
|
|
-- Must be a power of 2 in the range (2 ... 256).
|
|
for i=1,8 do
|
|
x = x / 2
|
|
if x == 1 then
|
|
waction("ALIGN", nil, 1)
|
|
wputxb(align-1) -- Action byte is 2**n-1.
|
|
return
|
|
end
|
|
end
|
|
end
|
|
werror("bad alignment")
|
|
end
|
|
|
|
-- Spacing pseudo-opcode.
|
|
map_op[".space_2"] = function(params)
|
|
if not params then return "num [, filler]" end
|
|
if secpos+1 > maxsecpos then wflush() end
|
|
waction("SPACE", params[1])
|
|
local fill = params[2]
|
|
if fill then
|
|
fill = tonumber(fill)
|
|
if not fill or fill < 0 or fill > 255 then werror("bad filler") end
|
|
end
|
|
wputxb(fill or 0)
|
|
end
|
|
map_op[".space_1"] = map_op[".space_2"]
|
|
|
|
------------------------------------------------------------------------------
|
|
|
|
-- Pseudo-opcode for (primitive) type definitions (map to C types).
|
|
map_op[".type_3"] = function(params, nparams)
|
|
if not params then
|
|
return nparams == 2 and "name, ctype" or "name, ctype, reg"
|
|
end
|
|
local name, ctype, reg = params[1], params[2], params[3]
|
|
if not match(name, "^[%a_][%w_]*$") then
|
|
werror("bad type name `"..name.."'")
|
|
end
|
|
local tp = map_type[name]
|
|
if tp then
|
|
werror("duplicate type `"..name.."'")
|
|
end
|
|
if reg and not map_reg_valid_base[reg] then
|
|
werror("bad base register `"..(map_reg_rev[reg] or reg).."'")
|
|
end
|
|
-- Add #type to defines. A bit unclean to put it in map_archdef.
|
|
map_archdef["#"..name] = "sizeof("..ctype..")"
|
|
-- Add new type and emit shortcut define.
|
|
local num = ctypenum + 1
|
|
map_type[name] = {
|
|
ctype = ctype,
|
|
ctypefmt = format("Dt%X(%%s)", num),
|
|
reg = reg,
|
|
}
|
|
wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype))
|
|
ctypenum = num
|
|
end
|
|
map_op[".type_2"] = map_op[".type_3"]
|
|
|
|
-- Dump type definitions.
|
|
local function dumptypes(out, lvl)
|
|
local t = {}
|
|
for name in pairs(map_type) do t[#t+1] = name end
|
|
sort(t)
|
|
out:write("Type definitions:\n")
|
|
for _,name in ipairs(t) do
|
|
local tp = map_type[name]
|
|
local reg = tp.reg and map_reg_rev[tp.reg] or ""
|
|
out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg))
|
|
end
|
|
out:write("\n")
|
|
end
|
|
|
|
------------------------------------------------------------------------------
|
|
|
|
-- Set the current section.
|
|
function _M.section(num)
|
|
waction("SECTION")
|
|
wputxb(num)
|
|
wflush(true) -- SECTION is a terminal action.
|
|
end
|
|
|
|
------------------------------------------------------------------------------
|
|
|
|
-- Dump architecture description.
|
|
function _M.dumparch(out)
|
|
out:write(format("DynASM %s version %s, released %s\n\n",
|
|
_info.arch, _info.version, _info.release))
|
|
dumpregs(out)
|
|
dumpactions(out)
|
|
end
|
|
|
|
-- Dump all user defined elements.
|
|
function _M.dumpdef(out, lvl)
|
|
dumptypes(out, lvl)
|
|
dumpglobals(out, lvl)
|
|
dumpexterns(out, lvl)
|
|
end
|
|
|
|
------------------------------------------------------------------------------
|
|
|
|
-- Pass callbacks from/to the DynASM core.
|
|
function _M.passcb(wl, we, wf, ww)
|
|
wline, werror, wfatal, wwarn = wl, we, wf, ww
|
|
return wflush
|
|
end
|
|
|
|
-- Setup the arch-specific module.
|
|
function _M.setup(arch, opt)
|
|
g_arch, g_opt = arch, opt
|
|
end
|
|
|
|
-- Merge the core maps and the arch-specific maps.
|
|
function _M.mergemaps(map_coreop, map_def)
|
|
setmetatable(map_op, { __index = map_coreop })
|
|
setmetatable(map_def, { __index = map_archdef })
|
|
return map_op, map_def
|
|
end
|
|
|
|
return _M
|
|
|
|
------------------------------------------------------------------------------
|
|
|