tools/wrk/deps/luajit/dynasm/dasm_x86.lua

Summary

Maintainability
Test Coverage
------------------------------------------------------------------------------
-- DynASM x86/x64 module.
--
-- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice.
------------------------------------------------------------------------------

local x64 = x64

-- Module information:
local _info = {
  arch =    x64 and "x64" or "x86",
  description =    "DynASM x86/x64 module",
  version =    "1.3.0",
  vernum =     10300,
  release =    "2011-05-05",
  author =    "Mike Pall",
  license =    "MIT",
}

-- Exported glue functions for the arch-specific module.
local _M = { _info = _info }

-- Cache library functions.
local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
local assert, unpack, setmetatable = assert, unpack or table.unpack, setmetatable
local _s = string
local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub
local concat, sort = table.concat, table.sort
local bit = bit or require("bit")
local band, shl, shr = bit.band, bit.lshift, bit.rshift

-- Inherited tables and callbacks.
local g_opt, g_arch
local wline, werror, wfatal, wwarn

-- Action name list.
-- CHECK: Keep this in sync with the C code!
local action_names = {
  -- int arg, 1 buffer pos:
  "DISP",  "IMM_S", "IMM_B", "IMM_W", "IMM_D",  "IMM_WB", "IMM_DB",
  -- action arg (1 byte), int arg, 1 buffer pos (reg/num):
  "VREG", "SPACE", -- !x64: VREG support NYI.
  -- ptrdiff_t arg, 1 buffer pos (address): !x64
  "SETLABEL", "REL_A",
  -- action arg (1 byte) or int arg, 2 buffer pos (link, offset):
  "REL_LG", "REL_PC",
  -- action arg (1 byte) or int arg, 1 buffer pos (link):
  "IMM_LG", "IMM_PC",
  -- action arg (1 byte) or int arg, 1 buffer pos (offset):
  "LABEL_LG", "LABEL_PC",
  -- action arg (1 byte), 1 buffer pos (offset):
  "ALIGN",
  -- action args (2 bytes), no buffer pos.
  "EXTERN",
  -- action arg (1 byte), no buffer pos.
  "ESC",
  -- no action arg, no buffer pos.
  "MARK",
  -- action arg (1 byte), no buffer pos, terminal action:
  "SECTION",
  -- no args, no buffer pos, terminal action:
  "STOP"
}

-- Maximum number of section buffer positions for dasm_put().
-- CHECK: Keep this in sync with the C code!
local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.

-- Action name -> action number (dynamically generated below).
local map_action = {}
-- First action number. Everything below does not need to be escaped.
local actfirst = 256-#action_names

-- Action list buffer and string (only used to remove dupes).
local actlist = {}
local actstr = ""

-- Argument list for next dasm_put(). Start with offset 0 into action list.
local actargs = { 0 }

-- Current number of section buffer positions for dasm_put().
local secpos = 1

------------------------------------------------------------------------------

-- Compute action numbers for action names.
for n,name in ipairs(action_names) do
  local num = actfirst + n - 1
  map_action[name] = num
end

-- Dump action names and numbers.
local function dumpactions(out)
  out:write("DynASM encoding engine action codes:\n")
  for n,name in ipairs(action_names) do
    local num = map_action[name]
    out:write(format("  %-10s %02X  %d\n", name, num, num))
  end
  out:write("\n")
end

-- Write action list buffer as a huge static C array.
local function writeactions(out, name)
  local nn = #actlist
  local last = actlist[nn] or 255
  actlist[nn] = nil -- Remove last byte.
  if nn == 0 then nn = 1 end
  out:write("static const unsigned char ", name, "[", nn, "] = {\n")
  local s = "  "
  for n,b in ipairs(actlist) do
    s = s..b..","
    if #s >= 75 then
      assert(out:write(s, "\n"))
      s = "  "
    end
  end
  out:write(s, last, "\n};\n\n") -- Add last byte back.
end

------------------------------------------------------------------------------

-- Add byte to action list.
local function wputxb(n)
  assert(n >= 0 and n <= 255 and n % 1 == 0, "byte out of range")
  actlist[#actlist+1] = n
end

-- Add action to list with optional arg. Advance buffer pos, too.
local function waction(action, a, num)
  wputxb(assert(map_action[action], "bad action name `"..action.."'"))
  if a then actargs[#actargs+1] = a end
  if a or num then secpos = secpos + (num or 1) end
end

-- Add call to embedded DynASM C code.
local function wcall(func, args)
  wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true)
end

-- Delete duplicate action list chunks. A tad slow, but so what.
local function dedupechunk(offset)
  local al, as = actlist, actstr
  local chunk = char(unpack(al, offset+1, #al))
  local orig = find(as, chunk, 1, true)
  if orig then
    actargs[1] = orig-1 -- Replace with original offset.
    for i=offset+1,#al do al[i] = nil end -- Kill dupe.
  else
    actstr = as..chunk
  end
end

-- Flush action list (intervening C code or buffer pos overflow).
local function wflush(term)
  local offset = actargs[1]
  if #actlist == offset then return end -- Nothing to flush.
  if not term then waction("STOP") end -- Terminate action list.
  dedupechunk(offset)
  wcall("put", actargs) -- Add call to dasm_put().
  actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
  secpos = 1 -- The actionlist offset occupies a buffer position, too.
end

-- Put escaped byte.
local function wputb(n)
  if n >= actfirst then waction("ESC") end -- Need to escape byte.
  wputxb(n)
end

------------------------------------------------------------------------------

-- Global label name -> global label number. With auto assignment on 1st use.
local next_global = 10
local map_global = setmetatable({}, { __index = function(t, name)
  if not match(name, "^[%a_][%w_@]*$") then werror("bad global label") end
  local n = next_global
  if n > 246 then werror("too many global labels") end
  next_global = n + 1
  t[name] = n
  return n
end})

-- Dump global labels.
local function dumpglobals(out, lvl)
  local t = {}
  for name, n in pairs(map_global) do t[n] = name end
  out:write("Global labels:\n")
  for i=10,next_global-1 do
    out:write(format("  %s\n", t[i]))
  end
  out:write("\n")
end

-- Write global label enum.
local function writeglobals(out, prefix)
  local t = {}
  for name, n in pairs(map_global) do t[n] = name end
  out:write("enum {\n")
  for i=10,next_global-1 do
    out:write("  ", prefix, gsub(t[i], "@.*", ""), ",\n")
  end
  out:write("  ", prefix, "_MAX\n};\n")
end

-- Write global label names.
local function writeglobalnames(out, name)
  local t = {}
  for name, n in pairs(map_global) do t[n] = name end
  out:write("static const char *const ", name, "[] = {\n")
  for i=10,next_global-1 do
    out:write("  \"", t[i], "\",\n")
  end
  out:write("  (const char *)0\n};\n")
end

------------------------------------------------------------------------------

-- Extern label name -> extern label number. With auto assignment on 1st use.
local next_extern = -1
local map_extern = setmetatable({}, { __index = function(t, name)
  -- No restrictions on the name for now.
  local n = next_extern
  if n < -256 then werror("too many extern labels") end
  next_extern = n - 1
  t[name] = n
  return n
end})

-- Dump extern labels.
local function dumpexterns(out, lvl)
  local t = {}
  for name, n in pairs(map_extern) do t[-n] = name end
  out:write("Extern labels:\n")
  for i=1,-next_extern-1 do
    out:write(format("  %s\n", t[i]))
  end
  out:write("\n")
end

-- Write extern label names.
local function writeexternnames(out, name)
  local t = {}
  for name, n in pairs(map_extern) do t[-n] = name end
  out:write("static const char *const ", name, "[] = {\n")
  for i=1,-next_extern-1 do
    out:write("  \"", t[i], "\",\n")
  end
  out:write("  (const char *)0\n};\n")
end

------------------------------------------------------------------------------

-- Arch-specific maps.
local map_archdef = {}        -- Ext. register name -> int. name.
local map_reg_rev = {}        -- Int. register name -> ext. name.
local map_reg_num = {}        -- Int. register name -> register number.
local map_reg_opsize = {}    -- Int. register name -> operand size.
local map_reg_valid_base = {}    -- Int. register name -> valid base register?
local map_reg_valid_index = {}    -- Int. register name -> valid index register?
local map_reg_needrex = {}    -- Int. register name -> need rex vs. no rex.
local reg_list = {}        -- Canonical list of int. register names.

local map_type = {}        -- Type name -> { ctype, reg }
local ctypenum = 0        -- Type number (for _PTx macros).

local addrsize = x64 and "q" or "d"    -- Size for address operands.

-- Helper functions to fill register maps.
local function mkrmap(sz, cl, names)
  local cname = format("@%s", sz)
  reg_list[#reg_list+1] = cname
  map_archdef[cl] = cname
  map_reg_rev[cname] = cl
  map_reg_num[cname] = -1
  map_reg_opsize[cname] = sz
  if sz == addrsize or sz == "d" then
    map_reg_valid_base[cname] = true
    map_reg_valid_index[cname] = true
  end
  if names then
    for n,name in ipairs(names) do
      local iname = format("@%s%x", sz, n-1)
      reg_list[#reg_list+1] = iname
      map_archdef[name] = iname
      map_reg_rev[iname] = name
      map_reg_num[iname] = n-1
      map_reg_opsize[iname] = sz
      if sz == "b" and n > 4 then map_reg_needrex[iname] = false end
      if sz == addrsize or sz == "d" then
    map_reg_valid_base[iname] = true
    map_reg_valid_index[iname] = true
      end
    end
  end
  for i=0,(x64 and sz ~= "f") and 15 or 7 do
    local needrex = sz == "b" and i > 3
    local iname = format("@%s%x%s", sz, i, needrex and "R" or "")
    if needrex then map_reg_needrex[iname] = true end
    local name
    if sz == "o" then name = format("xmm%d", i)
    elseif sz == "f" then name = format("st%d", i)
    else name = format("r%d%s", i, sz == addrsize and "" or sz) end
    map_archdef[name] = iname
    if not map_reg_rev[iname] then
      reg_list[#reg_list+1] = iname
      map_reg_rev[iname] = name
      map_reg_num[iname] = i
      map_reg_opsize[iname] = sz
      if sz == addrsize or sz == "d" then
    map_reg_valid_base[iname] = true
    map_reg_valid_index[iname] = true
      end
    end
  end
  reg_list[#reg_list+1] = ""
end

-- Integer registers (qword, dword, word and byte sized).
if x64 then
  mkrmap("q", "Rq", {"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi"})
end
mkrmap("d", "Rd", {"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi"})
mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"})
mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"})
map_reg_valid_index[map_archdef.esp] = false
if x64 then map_reg_valid_index[map_archdef.rsp] = false end
map_archdef["Ra"] = "@"..addrsize

-- FP registers (internally tword sized, but use "f" as operand size).
mkrmap("f", "Rf")

-- SSE registers (oword sized, but qword and dword accessible).
mkrmap("o", "xmm")

-- Operand size prefixes to codes.
local map_opsize = {
  byte = "b", word = "w", dword = "d", qword = "q", oword = "o", tword = "t",
  aword = addrsize,
}

-- Operand size code to number.
local map_opsizenum = {
  b = 1, w = 2, d = 4, q = 8, o = 16, t = 10,
}

-- Operand size code to name.
local map_opsizename = {
  b = "byte", w = "word", d = "dword", q = "qword", o = "oword", t = "tword",
  f = "fpword",
}

-- Valid index register scale factors.
local map_xsc = {
  ["1"] = 0, ["2"] = 1, ["4"] = 2, ["8"] = 3,
}

-- Condition codes.
local map_cc = {
  o = 0, no = 1, b = 2, nb = 3, e = 4, ne = 5, be = 6, nbe = 7,
  s = 8, ns = 9, p = 10, np = 11, l = 12, nl = 13, le = 14, nle = 15,
  c = 2, nae = 2, nc = 3, ae = 3, z = 4, nz = 5, na = 6, a = 7,
  pe = 10, po = 11, nge = 12, ge = 13, ng = 14, g = 15,
}


-- Reverse defines for registers.
function _M.revdef(s)
  return gsub(s, "@%w+", map_reg_rev)
end

-- Dump register names and numbers
local function dumpregs(out)
  out:write("Register names, sizes and internal numbers:\n")
  for _,reg in ipairs(reg_list) do
    if reg == "" then
      out:write("\n")
    else
      local name = map_reg_rev[reg]
      local num = map_reg_num[reg]
      local opsize = map_opsizename[map_reg_opsize[reg]]
      out:write(format("  %-5s %-8s %s\n", name, opsize,
               num < 0 and "(variable)" or num))
    end
  end
end

------------------------------------------------------------------------------

-- Put action for label arg (IMM_LG, IMM_PC, REL_LG, REL_PC).
local function wputlabel(aprefix, imm, num)
  if type(imm) == "number" then
    if imm < 0 then
      waction("EXTERN")
      wputxb(aprefix == "IMM_" and 0 or 1)
      imm = -imm-1
    else
      waction(aprefix.."LG", nil, num);
    end
    wputxb(imm)
  else
    waction(aprefix.."PC", imm, num)
  end
end

-- Put signed byte or arg.
local function wputsbarg(n)
  if type(n) == "number" then
    if n < -128 or n > 127 then
      werror("signed immediate byte out of range")
    end
    if n < 0 then n = n + 256 end
    wputb(n)
  else waction("IMM_S", n) end
end

-- Put unsigned byte or arg.
local function wputbarg(n)
  if type(n) == "number" then
    if n < 0 or n > 255 then
      werror("unsigned immediate byte out of range")
    end
    wputb(n)
  else waction("IMM_B", n) end
end

-- Put unsigned word or arg.
local function wputwarg(n)
  if type(n) == "number" then
    if shr(n, 16) ~= 0 then
      werror("unsigned immediate word out of range")
    end
    wputb(band(n, 255)); wputb(shr(n, 8));
  else waction("IMM_W", n) end
end

-- Put signed or unsigned dword or arg.
local function wputdarg(n)
  local tn = type(n)
  if tn == "number" then
    wputb(band(n, 255))
    wputb(band(shr(n, 8), 255))
    wputb(band(shr(n, 16), 255))
    wputb(shr(n, 24))
  elseif tn == "table" then
    wputlabel("IMM_", n[1], 1)
  else
    waction("IMM_D", n)
  end
end

-- Put operand-size dependent number or arg (defaults to dword).
local function wputszarg(sz, n)
  if not sz or sz == "d" or sz == "q" then wputdarg(n)
  elseif sz == "w" then wputwarg(n)
  elseif sz == "b" then wputbarg(n)
  elseif sz == "s" then wputsbarg(n)
  else werror("bad operand size") end
end

-- Put multi-byte opcode with operand-size dependent modifications.
local function wputop(sz, op, rex)
  local r
  if rex ~= 0 and not x64 then werror("bad operand size") end
  if sz == "w" then wputb(102) end
  -- Needs >32 bit numbers, but only for crc32 eax, word [ebx]
  if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end
  if op >= 16777216 then wputb(shr(op, 24)); op = band(op, 0xffffff) end
  if op >= 65536 then
    if rex ~= 0 then
      local opc3 = band(op, 0xffff00)
      if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then
    wputb(64 + band(rex, 15)); rex = 0
      end
    end
    wputb(shr(op, 16)); op = band(op, 0xffff)
  end
  if op >= 256 then
    local b = shr(op, 8)
    if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0 end
    wputb(b)
    op = band(op, 255)
  end
  if rex ~= 0 then wputb(64 + band(rex, 15)) end
  if sz == "b" then op = op - 1 end
  wputb(op)
end

-- Put ModRM or SIB formatted byte.
local function wputmodrm(m, s, rm, vs, vrm)
  assert(m < 4 and s < 16 and rm < 16, "bad modrm operands")
  wputb(shl(m, 6) + shl(band(s, 7), 3) + band(rm, 7))
end

-- Put ModRM/SIB plus optional displacement.
local function wputmrmsib(t, imark, s, vsreg)
  local vreg, vxreg
  local reg, xreg = t.reg, t.xreg
  if reg and reg < 0 then reg = 0; vreg = t.vreg end
  if xreg and xreg < 0 then xreg = 0; vxreg = t.vxreg end
  if s < 0 then s = 0 end

  -- Register mode.
  if sub(t.mode, 1, 1) == "r" then
    wputmodrm(3, s, reg)
    if vsreg then waction("VREG", vsreg); wputxb(2) end
    if vreg then waction("VREG", vreg); wputxb(0) end
    return
  end

  local disp = t.disp
  local tdisp = type(disp)
  -- No base register?
  if not reg then
    local riprel = false
    if xreg then
      -- Indexed mode with index register only.
      -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp)
      wputmodrm(0, s, 4)
      if imark == "I" then waction("MARK") end
      if vsreg then waction("VREG", vsreg); wputxb(2) end
      wputmodrm(t.xsc, xreg, 5)
      if vxreg then waction("VREG", vxreg); wputxb(3) end
    else
      -- Pure 32 bit displacement.
      if x64 and tdisp ~= "table" then
    wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp)
    if imark == "I" then waction("MARK") end
    wputmodrm(0, 4, 5)
      else
    riprel = x64
    wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp)
    if imark == "I" then waction("MARK") end
      end
      if vsreg then waction("VREG", vsreg); wputxb(2) end
    end
    if riprel then -- Emit rip-relative displacement.
      if match("UWSiI", imark) then
    werror("NYI: rip-relative displacement followed by immediate")
      end
      -- The previous byte in the action buffer cannot be 0xe9 or 0x80-0x8f.
      wputlabel("REL_", disp[1], 2)
    else
      wputdarg(disp)
    end
    return
  end

  local m
  if tdisp == "number" then -- Check displacement size at assembly time.
    if disp == 0 and band(reg, 7) ~= 5 then -- [ebp] -> [ebp+0] (in SIB, too)
      if not vreg then m = 0 end -- Force DISP to allow [Rd(5)] -> [ebp+0]
    elseif disp >= -128 and disp <= 127 then m = 1
    else m = 2 end
  elseif tdisp == "table" then
    m = 2
  end

  -- Index register present or esp as base register: need SIB encoding.
  if xreg or band(reg, 7) == 4 then
    wputmodrm(m or 2, s, 4) -- ModRM.
    if m == nil or imark == "I" then waction("MARK") end
    if vsreg then waction("VREG", vsreg); wputxb(2) end
    wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB.
    if vxreg then waction("VREG", vxreg); wputxb(3) end
    if vreg then waction("VREG", vreg); wputxb(1) end
  else
    wputmodrm(m or 2, s, reg) -- ModRM.
    if (imark == "I" and (m == 1 or m == 2)) or
       (m == nil and (vsreg or vreg)) then waction("MARK") end
    if vsreg then waction("VREG", vsreg); wputxb(2) end
    if vreg then waction("VREG", vreg); wputxb(1) end
  end

  -- Put displacement.
  if m == 1 then wputsbarg(disp)
  elseif m == 2 then wputdarg(disp)
  elseif m == nil then waction("DISP", disp) end
end

------------------------------------------------------------------------------

-- Return human-readable operand mode string.
local function opmodestr(op, args)
  local m = {}
  for i=1,#args do
    local a = args[i]
    m[#m+1] = sub(a.mode, 1, 1)..(a.opsize or "?")
  end
  return op.." "..concat(m, ",")
end

-- Convert number to valid integer or nil.
local function toint(expr)
  local n = tonumber(expr)
  if n then
    if n % 1 ~= 0 or n < -2147483648 or n > 4294967295 then
      werror("bad integer number `"..expr.."'")
    end
    return n
  end
end

-- Parse immediate expression.
local function immexpr(expr)
  -- &expr (pointer)
  if sub(expr, 1, 1) == "&" then
    return "iPJ", format("(ptrdiff_t)(%s)", sub(expr,2))
  end

  local prefix = sub(expr, 1, 2)
  -- =>expr (pc label reference)
  if prefix == "=>" then
    return "iJ", sub(expr, 3)
  end
  -- ->name (global label reference)
  if prefix == "->" then
    return "iJ", map_global[sub(expr, 3)]
  end

  -- [<>][1-9] (local label reference)
  local dir, lnum = match(expr, "^([<>])([1-9])$")
  if dir then -- Fwd: 247-255, Bkwd: 1-9.
    return "iJ", lnum + (dir == ">" and 246 or 0)
  end

  local extname = match(expr, "^extern%s+(%S+)$")
  if extname then
    return "iJ", map_extern[extname]
  end

  -- expr (interpreted as immediate)
  return "iI", expr
end

-- Parse displacement expression: +-num, +-expr, +-opsize*num
local function dispexpr(expr)
  local disp = expr == "" and 0 or toint(expr)
  if disp then return disp end
  local c, dispt = match(expr, "^([+-])%s*(.+)$")
  if c == "+" then
    expr = dispt
  elseif not c then
    werror("bad displacement expression `"..expr.."'")
  end
  local opsize, tailops = match(dispt, "^(%w+)%s*%*%s*(.+)$")
  local ops, imm = map_opsize[opsize], toint(tailops)
  if ops and imm then
    if c == "-" then imm = -imm end
    return imm*map_opsizenum[ops]
  end
  local mode, iexpr = immexpr(dispt)
  if mode == "iJ" then
    if c == "-" then werror("cannot invert label reference") end
    return { iexpr }
  end
  return expr -- Need to return original signed expression.
end

-- Parse register or type expression.
local function rtexpr(expr)
  if not expr then return end
  local tname, ovreg = match(expr, "^([%w_]+):(@[%w_]+)$")
  local tp = map_type[tname or expr]
  if tp then
    local reg = ovreg or tp.reg
    local rnum = map_reg_num[reg]
    if not rnum then
      werror("type `"..(tname or expr).."' needs a register override")
    end
    if not map_reg_valid_base[reg] then
      werror("bad base register override `"..(map_reg_rev[reg] or reg).."'")
    end
    return reg, rnum, tp
  end
  return expr, map_reg_num[expr]
end

-- Parse operand and return { mode, opsize, reg, xreg, xsc, disp, imm }.
local function parseoperand(param)
  local t = {}

  local expr = param
  local opsize, tailops = match(param, "^(%w+)%s*(.+)$")
  if opsize then
    t.opsize = map_opsize[opsize]
    if t.opsize then expr = tailops end
  end

  local br = match(expr, "^%[%s*(.-)%s*%]$")
  repeat
    if br then
      t.mode = "xm"

      -- [disp]
      t.disp = toint(br)
      if t.disp then
    t.mode = x64 and "xm" or "xmO"
    break
      end

      -- [reg...]
      local tp
      local reg, tailr = match(br, "^([@%w_:]+)%s*(.*)$")
      reg, t.reg, tp = rtexpr(reg)
      if not t.reg then
    -- [expr]
    t.mode = x64 and "xm" or "xmO"
    t.disp = dispexpr("+"..br)
    break
      end

      if t.reg == -1 then
    t.vreg, tailr = match(tailr, "^(%b())(.*)$")
    if not t.vreg then werror("bad variable register expression") end
      end

      -- [xreg*xsc] or [xreg*xsc+-disp] or [xreg*xsc+-expr]
      local xsc, tailsc = match(tailr, "^%*%s*([1248])%s*(.*)$")
      if xsc then
    if not map_reg_valid_index[reg] then
      werror("bad index register `"..map_reg_rev[reg].."'")
    end
    t.xsc = map_xsc[xsc]
    t.xreg = t.reg
    t.vxreg = t.vreg
    t.reg = nil
    t.vreg = nil
    t.disp = dispexpr(tailsc)
    break
      end
      if not map_reg_valid_base[reg] then
    werror("bad base register `"..map_reg_rev[reg].."'")
      end

      -- [reg] or [reg+-disp]
      t.disp = toint(tailr) or (tailr == "" and 0)
      if t.disp then break end

      -- [reg+xreg...]
      local xreg, tailx = match(tailr, "^+%s*([@%w_:]+)%s*(.*)$")
      xreg, t.xreg, tp = rtexpr(xreg)
      if not t.xreg then
    -- [reg+-expr]
    t.disp = dispexpr(tailr)
    break
      end
      if not map_reg_valid_index[xreg] then
    werror("bad index register `"..map_reg_rev[xreg].."'")
      end

      if t.xreg == -1 then
    t.vxreg, tailx = match(tailx, "^(%b())(.*)$")
    if not t.vxreg then werror("bad variable register expression") end
      end

      -- [reg+xreg*xsc...]
      local xsc, tailsc = match(tailx, "^%*%s*([1248])%s*(.*)$")
      if xsc then
    t.xsc = map_xsc[xsc]
    tailx = tailsc
      end

      -- [...] or [...+-disp] or [...+-expr]
      t.disp = dispexpr(tailx)
    else
      -- imm or opsize*imm
      local imm = toint(expr)
      if not imm and sub(expr, 1, 1) == "*" and t.opsize then
    imm = toint(sub(expr, 2))
    if imm then
      imm = imm * map_opsizenum[t.opsize]
      t.opsize = nil
    end
      end
      if imm then
    if t.opsize then werror("bad operand size override") end
    local m = "i"
    if imm == 1 then m = m.."1" end
    if imm >= 4294967168 and imm <= 4294967295 then imm = imm-4294967296 end
    if imm >= -128 and imm <= 127 then m = m.."S" end
    t.imm = imm
    t.mode = m
    break
      end

      local tp
      local reg, tailr = match(expr, "^([@%w_:]+)%s*(.*)$")
      reg, t.reg, tp = rtexpr(reg)
      if t.reg then
    if t.reg == -1 then
      t.vreg, tailr = match(tailr, "^(%b())(.*)$")
      if not t.vreg then werror("bad variable register expression") end
    end
    -- reg
    if tailr == "" then
      if t.opsize then werror("bad operand size override") end
      t.opsize = map_reg_opsize[reg]
      if t.opsize == "f" then
        t.mode = t.reg == 0 and "fF" or "f"
      else
        if reg == "@w4" or (x64 and reg == "@d4") then
          wwarn("bad idea, try again with `"..(x64 and "rsp'" or "esp'"))
        end
        t.mode = t.reg == 0 and "rmR" or (reg == "@b1" and "rmC" or "rm")
      end
      t.needrex = map_reg_needrex[reg]
      break
    end

    -- type[idx], type[idx].field, type->field -> [reg+offset_expr]
    if not tp then werror("bad operand `"..param.."'") end
    t.mode = "xm"
    t.disp = format(tp.ctypefmt, tailr)
      else
    t.mode, t.imm = immexpr(expr)
    if sub(t.mode, -1) == "J" then
      if t.opsize and t.opsize ~= addrsize then
        werror("bad operand size override")
      end
      t.opsize = addrsize
    end
      end
    end
  until true
  return t
end

------------------------------------------------------------------------------
-- x86 Template String Description
-- ===============================
--
-- Each template string is a list of [match:]pattern pairs,
-- separated by "|". The first match wins. No match means a
-- bad or unsupported combination of operand modes or sizes.
--
-- The match part and the ":" is omitted if the operation has
-- no operands. Otherwise the first N characters are matched
-- against the mode strings of each of the N operands.
--
-- The mode string for each operand type is (see parseoperand()):
--   Integer register: "rm", +"R" for eax, ax, al, +"C" for cl
--   FP register:      "f",  +"F" for st0
--   Index operand:    "xm", +"O" for [disp] (pure offset)
--   Immediate:        "i",  +"S" for signed 8 bit, +"1" for 1,
--                     +"I" for arg, +"P" for pointer
--   Any:              +"J" for valid jump targets
--
-- So a match character "m" (mixed) matches both an integer register
-- and an index operand (to be encoded with the ModRM/SIB scheme).
-- But "r" matches only a register and "x" only an index operand
-- (e.g. for FP memory access operations).
--
-- The operand size match string starts right after the mode match
-- characters and ends before the ":". "dwb" or "qdwb" is assumed, if empty.
-- The effective data size of the operation is matched against this list.
--
-- If only the regular "b", "w", "d", "q", "t" operand sizes are
-- present, then all operands must be the same size. Unspecified sizes
-- are ignored, but at least one operand must have a size or the pattern
-- won't match (use the "byte", "word", "dword", "qword", "tword"
-- operand size overrides. E.g.: mov dword [eax], 1).
--
-- If the list has a "1" or "2" prefix, the operand size is taken
-- from the respective operand and any other operand sizes are ignored.
-- If the list contains only ".", all operand sizes are ignored.
-- If the list has a "/" prefix, the concatenated (mixed) operand sizes
-- are compared to the match.
--
-- E.g. "rrdw" matches for either two dword registers or two word
-- registers. "Fx2dq" matches an st0 operand plus an index operand
-- pointing to a dword (float) or qword (double).
--
-- Every character after the ":" is part of the pattern string:
--   Hex chars are accumulated to form the opcode (left to right).
--   "n"       disables the standard opcode mods
--             (otherwise: -1 for "b", o16 prefix for "w", rex.w for "q")
--   "X"       Force REX.W.
--   "r"/"R"   adds the reg. number from the 1st/2nd operand to the opcode.
--   "m"/"M"   generates ModRM/SIB from the 1st/2nd operand.
--             The spare 3 bits are either filled with the last hex digit or
--             the result from a previous "r"/"R". The opcode is restored.
--
-- All of the following characters force a flush of the opcode:
--   "o"/"O"   stores a pure 32 bit disp (offset) from the 1st/2nd operand.
--   "S"       stores a signed 8 bit immediate from the last operand.
--   "U"       stores an unsigned 8 bit immediate from the last operand.
--   "W"       stores an unsigned 16 bit immediate from the last operand.
--   "i"       stores an operand sized immediate from the last operand.
--   "I"       dito, but generates an action code to optionally modify
--             the opcode (+2) for a signed 8 bit immediate.
--   "J"       generates one of the REL action codes from the last operand.
--
------------------------------------------------------------------------------

-- Template strings for x86 instructions. Ordered by first opcode byte.
-- Unimplemented opcodes (deliberate omissions) are marked with *.
local map_op = {
  -- 00-05: add...
  -- 06: *push es
  -- 07: *pop es
  -- 08-0D: or...
  -- 0E: *push cs
  -- 0F: two byte opcode prefix
  -- 10-15: adc...
  -- 16: *push ss
  -- 17: *pop ss
  -- 18-1D: sbb...
  -- 1E: *push ds
  -- 1F: *pop ds
  -- 20-25: and...
  es_0 =    "26",
  -- 27: *daa
  -- 28-2D: sub...
  cs_0 =    "2E",
  -- 2F: *das
  -- 30-35: xor...
  ss_0 =    "36",
  -- 37: *aaa
  -- 38-3D: cmp...
  ds_0 =    "3E",
  -- 3F: *aas
  inc_1 =    x64 and "m:FF0m" or "rdw:40r|m:FF0m",
  dec_1 =    x64 and "m:FF1m" or "rdw:48r|m:FF1m",
  push_1 =    (x64 and "rq:n50r|rw:50r|mq:nFF6m|mw:FF6m" or
             "rdw:50r|mdw:FF6m").."|S.:6AS|ib:n6Ai|i.:68i",
  pop_1 =    x64 and "rq:n58r|rw:58r|mq:n8F0m|mw:8F0m" or "rdw:58r|mdw:8F0m",
  -- 60: *pusha, *pushad, *pushaw
  -- 61: *popa, *popad, *popaw
  -- 62: *bound rdw,x
  -- 63: x86: *arpl mw,rw
  movsxd_2 =    x64 and "rm/qd:63rM",
  fs_0 =    "64",
  gs_0 =    "65",
  o16_0 =    "66",
  a16_0 =    not x64 and "67" or nil,
  a32_0 =    x64 and "67",
  -- 68: push idw
  -- 69: imul rdw,mdw,idw
  -- 6A: push ib
  -- 6B: imul rdw,mdw,S
  -- 6C: *insb
  -- 6D: *insd, *insw
  -- 6E: *outsb
  -- 6F: *outsd, *outsw
  -- 70-7F: jcc lb
  -- 80: add... mb,i
  -- 81: add... mdw,i
  -- 82: *undefined
  -- 83: add... mdw,S
  test_2 =    "mr:85Rm|rm:85rM|Ri:A9ri|mi:F70mi",
  -- 86: xchg rb,mb
  -- 87: xchg rdw,mdw
  -- 88: mov mb,r
  -- 89: mov mdw,r
  -- 8A: mov r,mb
  -- 8B: mov r,mdw
  -- 8C: *mov mdw,seg
  lea_2 =    "rx1dq:8DrM",
  -- 8E: *mov seg,mdw
  -- 8F: pop mdw
  nop_0 =    "90",
  xchg_2 =    "Rrqdw:90R|rRqdw:90r|rm:87rM|mr:87Rm",
  cbw_0 =    "6698",
  cwde_0 =    "98",
  cdqe_0 =    "4898",
  cwd_0 =    "6699",
  cdq_0 =    "99",
  cqo_0 =    "4899",
  -- 9A: *call iw:idw
  wait_0 =    "9B",
  fwait_0 =    "9B",
  pushf_0 =    "9C",
  pushfd_0 =    not x64 and "9C",
  pushfq_0 =    x64 and "9C",
  popf_0 =    "9D",
  popfd_0 =    not x64 and "9D",
  popfq_0 =    x64 and "9D",
  sahf_0 =    "9E",
  lahf_0 =    "9F",
  mov_2 =    "OR:A3o|RO:A1O|mr:89Rm|rm:8BrM|rib:nB0ri|ridw:B8ri|mi:C70mi",
  movsb_0 =    "A4",
  movsw_0 =    "66A5",
  movsd_0 =    "A5",
  cmpsb_0 =    "A6",
  cmpsw_0 =    "66A7",
  cmpsd_0 =    "A7",
  -- A8: test Rb,i
  -- A9: test Rdw,i
  stosb_0 =    "AA",
  stosw_0 =    "66AB",
  stosd_0 =    "AB",
  lodsb_0 =    "AC",
  lodsw_0 =    "66AD",
  lodsd_0 =    "AD",
  scasb_0 =    "AE",
  scasw_0 =    "66AF",
  scasd_0 =    "AF",
  -- B0-B7: mov rb,i
  -- B8-BF: mov rdw,i
  -- C0: rol... mb,i
  -- C1: rol... mdw,i
  ret_1 =    "i.:nC2W",
  ret_0 =    "C3",
  -- C4: *les rdw,mq
  -- C5: *lds rdw,mq
  -- C6: mov mb,i
  -- C7: mov mdw,i
  -- C8: *enter iw,ib
  leave_0 =    "C9",
  -- CA: *retf iw
  -- CB: *retf
  int3_0 =    "CC",
  int_1 =    "i.:nCDU",
  into_0 =    "CE",
  -- CF: *iret
  -- D0: rol... mb,1
  -- D1: rol... mdw,1
  -- D2: rol... mb,cl
  -- D3: rol... mb,cl
  -- D4: *aam ib
  -- D5: *aad ib
  -- D6: *salc
  -- D7: *xlat
  -- D8-DF: floating point ops
  -- E0: *loopne
  -- E1: *loope
  -- E2: *loop
  -- E3: *jcxz, *jecxz
  -- E4: *in Rb,ib
  -- E5: *in Rdw,ib
  -- E6: *out ib,Rb
  -- E7: *out ib,Rdw
  call_1 =    x64 and "mq:nFF2m|J.:E8nJ" or "md:FF2m|J.:E8J",
  jmp_1 =    x64 and "mq:nFF4m|J.:E9nJ" or "md:FF4m|J.:E9J", -- short: EB
  -- EA: *jmp iw:idw
  -- EB: jmp ib
  -- EC: *in Rb,dx
  -- ED: *in Rdw,dx
  -- EE: *out dx,Rb
  -- EF: *out dx,Rdw
  -- F0: *lock
  int1_0 =    "F1",
  repne_0 =    "F2",
  repnz_0 =    "F2",
  rep_0 =    "F3",
  repe_0 =    "F3",
  repz_0 =    "F3",
  -- F4: *hlt
  cmc_0 =    "F5",
  -- F6: test... mb,i; div... mb
  -- F7: test... mdw,i; div... mdw
  clc_0 =    "F8",
  stc_0 =    "F9",
  -- FA: *cli
  cld_0 =    "FC",
  std_0 =    "FD",
  -- FE: inc... mb
  -- FF: inc... mdw

  -- misc ops
  not_1 =    "m:F72m",
  neg_1 =    "m:F73m",
  mul_1 =    "m:F74m",
  imul_1 =    "m:F75m",
  div_1 =    "m:F76m",
  idiv_1 =    "m:F77m",

  imul_2 =    "rmqdw:0FAFrM|rIqdw:69rmI|rSqdw:6BrmS|riqdw:69rmi",
  imul_3 =    "rmIqdw:69rMI|rmSqdw:6BrMS|rmiqdw:69rMi",

  movzx_2 =    "rm/db:0FB6rM|rm/qb:|rm/wb:0FB6rM|rm/dw:0FB7rM|rm/qw:",
  movsx_2 =    "rm/db:0FBErM|rm/qb:|rm/wb:0FBErM|rm/dw:0FBFrM|rm/qw:",

  bswap_1 =    "rqd:0FC8r",
  bsf_2 =    "rmqdw:0FBCrM",
  bsr_2 =    "rmqdw:0FBDrM",
  bt_2 =    "mrqdw:0FA3Rm|miqdw:0FBA4mU",
  btc_2 =    "mrqdw:0FBBRm|miqdw:0FBA7mU",
  btr_2 =    "mrqdw:0FB3Rm|miqdw:0FBA6mU",
  bts_2 =    "mrqdw:0FABRm|miqdw:0FBA5mU",

  rdtsc_0 =    "0F31", -- P1+
  cpuid_0 =    "0FA2", -- P1+

  -- floating point ops
  fst_1 =    "ff:DDD0r|xd:D92m|xq:nDD2m",
  fstp_1 =    "ff:DDD8r|xd:D93m|xq:nDD3m|xt:DB7m",
  fld_1 =    "ff:D9C0r|xd:D90m|xq:nDD0m|xt:DB5m",

  fpop_0 =    "DDD8", -- Alias for fstp st0.

  fist_1 =    "xw:nDF2m|xd:DB2m",
  fistp_1 =    "xw:nDF3m|xd:DB3m|xq:nDF7m",
  fild_1 =    "xw:nDF0m|xd:DB0m|xq:nDF5m",

  fxch_0 =    "D9C9",
  fxch_1 =    "ff:D9C8r",
  fxch_2 =    "fFf:D9C8r|Fff:D9C8R",

  fucom_1 =    "ff:DDE0r",
  fucom_2 =    "Fff:DDE0R",
  fucomp_1 =    "ff:DDE8r",
  fucomp_2 =    "Fff:DDE8R",
  fucomi_1 =    "ff:DBE8r", -- P6+
  fucomi_2 =    "Fff:DBE8R", -- P6+
  fucomip_1 =    "ff:DFE8r", -- P6+
  fucomip_2 =    "Fff:DFE8R", -- P6+
  fcomi_1 =    "ff:DBF0r", -- P6+
  fcomi_2 =    "Fff:DBF0R", -- P6+
  fcomip_1 =    "ff:DFF0r", -- P6+
  fcomip_2 =    "Fff:DFF0R", -- P6+
  fucompp_0 =    "DAE9",
  fcompp_0 =    "DED9",

  fldcw_1 =    "xw:nD95m",
  fstcw_1 =    "xw:n9BD97m",
  fnstcw_1 =    "xw:nD97m",
  fstsw_1 =    "Rw:n9BDFE0|xw:n9BDD7m",
  fnstsw_1 =    "Rw:nDFE0|xw:nDD7m",
  fclex_0 =    "9BDBE2",
  fnclex_0 =    "DBE2",

  fnop_0 =    "D9D0",
  -- D9D1-D9DF: unassigned

  fchs_0 =    "D9E0",
  fabs_0 =    "D9E1",
  -- D9E2: unassigned
  -- D9E3: unassigned
  ftst_0 =    "D9E4",
  fxam_0 =    "D9E5",
  -- D9E6: unassigned
  -- D9E7: unassigned
  fld1_0 =    "D9E8",
  fldl2t_0 =    "D9E9",
  fldl2e_0 =    "D9EA",
  fldpi_0 =    "D9EB",
  fldlg2_0 =    "D9EC",
  fldln2_0 =    "D9ED",
  fldz_0 =    "D9EE",
  -- D9EF: unassigned

  f2xm1_0 =    "D9F0",
  fyl2x_0 =    "D9F1",
  fptan_0 =    "D9F2",
  fpatan_0 =    "D9F3",
  fxtract_0 =    "D9F4",
  fprem1_0 =    "D9F5",
  fdecstp_0 =    "D9F6",
  fincstp_0 =    "D9F7",
  fprem_0 =    "D9F8",
  fyl2xp1_0 =    "D9F9",
  fsqrt_0 =    "D9FA",
  fsincos_0 =    "D9FB",
  frndint_0 =    "D9FC",
  fscale_0 =    "D9FD",
  fsin_0 =    "D9FE",
  fcos_0 =    "D9FF",

  -- SSE, SSE2
  andnpd_2 =    "rmo:660F55rM",
  andnps_2 =    "rmo:0F55rM",
  andpd_2 =    "rmo:660F54rM",
  andps_2 =    "rmo:0F54rM",
  clflush_1 =    "x.:0FAE7m",
  cmppd_3 =    "rmio:660FC2rMU",
  cmpps_3 =    "rmio:0FC2rMU",
  cmpsd_3 =    "rrio:F20FC2rMU|rxi/oq:",
  cmpss_3 =    "rrio:F30FC2rMU|rxi/od:",
  comisd_2 =    "rro:660F2FrM|rx/oq:",
  comiss_2 =    "rro:0F2FrM|rx/od:",
  cvtdq2pd_2 =    "rro:F30FE6rM|rx/oq:",
  cvtdq2ps_2 =    "rmo:0F5BrM",
  cvtpd2dq_2 =    "rmo:F20FE6rM",
  cvtpd2ps_2 =    "rmo:660F5ArM",
  cvtpi2pd_2 =    "rx/oq:660F2ArM",
  cvtpi2ps_2 =    "rx/oq:0F2ArM",
  cvtps2dq_2 =    "rmo:660F5BrM",
  cvtps2pd_2 =    "rro:0F5ArM|rx/oq:",
  cvtsd2si_2 =    "rr/do:F20F2DrM|rr/qo:|rx/dq:|rxq:",
  cvtsd2ss_2 =    "rro:F20F5ArM|rx/oq:",
  cvtsi2sd_2 =    "rm/od:F20F2ArM|rm/oq:F20F2ArXM",
  cvtsi2ss_2 =    "rm/od:F30F2ArM|rm/oq:F30F2ArXM",
  cvtss2sd_2 =    "rro:F30F5ArM|rx/od:",
  cvtss2si_2 =    "rr/do:F20F2CrM|rr/qo:|rxd:|rx/qd:",
  cvttpd2dq_2 =    "rmo:660FE6rM",
  cvttps2dq_2 =    "rmo:F30F5BrM",
  cvttsd2si_2 =    "rr/do:F20F2CrM|rr/qo:|rx/dq:|rxq:",
  cvttss2si_2 =    "rr/do:F30F2CrM|rr/qo:|rxd:|rx/qd:",
  ldmxcsr_1 =    "xd:0FAE2m",
  lfence_0 =    "0FAEE8",
  maskmovdqu_2 = "rro:660FF7rM",
  mfence_0 =    "0FAEF0",
  movapd_2 =    "rmo:660F28rM|mro:660F29Rm",
  movaps_2 =    "rmo:0F28rM|mro:0F29Rm",
  movd_2 =    "rm/od:660F6ErM|rm/oq:660F6ErXM|mr/do:660F7ERm|mr/qo:",
  movdqa_2 =    "rmo:660F6FrM|mro:660F7FRm",
  movdqu_2 =    "rmo:F30F6FrM|mro:F30F7FRm",
  movhlps_2 =    "rro:0F12rM",
  movhpd_2 =    "rx/oq:660F16rM|xr/qo:n660F17Rm",
  movhps_2 =    "rx/oq:0F16rM|xr/qo:n0F17Rm",
  movlhps_2 =    "rro:0F16rM",
  movlpd_2 =    "rx/oq:660F12rM|xr/qo:n660F13Rm",
  movlps_2 =    "rx/oq:0F12rM|xr/qo:n0F13Rm",
  movmskpd_2 =    "rr/do:660F50rM",
  movmskps_2 =    "rr/do:0F50rM",
  movntdq_2 =    "xro:660FE7Rm",
  movnti_2 =    "xrqd:0FC3Rm",
  movntpd_2 =    "xro:660F2BRm",
  movntps_2 =    "xro:0F2BRm",
  movq_2 =    "rro:F30F7ErM|rx/oq:|xr/qo:n660FD6Rm",
  movsd_2 =    "rro:F20F10rM|rx/oq:|xr/qo:nF20F11Rm",
  movss_2 =    "rro:F30F10rM|rx/od:|xr/do:F30F11Rm",
  movupd_2 =    "rmo:660F10rM|mro:660F11Rm",
  movups_2 =    "rmo:0F10rM|mro:0F11Rm",
  orpd_2 =    "rmo:660F56rM",
  orps_2 =    "rmo:0F56rM",
  packssdw_2 =    "rmo:660F6BrM",
  packsswb_2 =    "rmo:660F63rM",
  packuswb_2 =    "rmo:660F67rM",
  paddb_2 =    "rmo:660FFCrM",
  paddd_2 =    "rmo:660FFErM",
  paddq_2 =    "rmo:660FD4rM",
  paddsb_2 =    "rmo:660FECrM",
  paddsw_2 =    "rmo:660FEDrM",
  paddusb_2 =    "rmo:660FDCrM",
  paddusw_2 =    "rmo:660FDDrM",
  paddw_2 =    "rmo:660FFDrM",
  pand_2 =    "rmo:660FDBrM",
  pandn_2 =    "rmo:660FDFrM",
  pause_0 =    "F390",
  pavgb_2 =    "rmo:660FE0rM",
  pavgw_2 =    "rmo:660FE3rM",
  pcmpeqb_2 =    "rmo:660F74rM",
  pcmpeqd_2 =    "rmo:660F76rM",
  pcmpeqw_2 =    "rmo:660F75rM",
  pcmpgtb_2 =    "rmo:660F64rM",
  pcmpgtd_2 =    "rmo:660F66rM",
  pcmpgtw_2 =    "rmo:660F65rM",
  pextrw_3 =    "rri/do:660FC5rMU|xri/wo:660F3A15nrMU", -- Mem op: SSE4.1 only.
  pinsrw_3 =    "rri/od:660FC4rMU|rxi/ow:",
  pmaddwd_2 =    "rmo:660FF5rM",
  pmaxsw_2 =    "rmo:660FEErM",
  pmaxub_2 =    "rmo:660FDErM",
  pminsw_2 =    "rmo:660FEArM",
  pminub_2 =    "rmo:660FDArM",
  pmovmskb_2 =    "rr/do:660FD7rM",
  pmulhuw_2 =    "rmo:660FE4rM",
  pmulhw_2 =    "rmo:660FE5rM",
  pmullw_2 =    "rmo:660FD5rM",
  pmuludq_2 =    "rmo:660FF4rM",
  por_2 =    "rmo:660FEBrM",
  prefetchnta_1 = "xb:n0F180m",
  prefetcht0_1 = "xb:n0F181m",
  prefetcht1_1 = "xb:n0F182m",
  prefetcht2_1 = "xb:n0F183m",
  psadbw_2 =    "rmo:660FF6rM",
  pshufd_3 =    "rmio:660F70rMU",
  pshufhw_3 =    "rmio:F30F70rMU",
  pshuflw_3 =    "rmio:F20F70rMU",
  pslld_2 =    "rmo:660FF2rM|rio:660F726mU",
  pslldq_2 =    "rio:660F737mU",
  psllq_2 =    "rmo:660FF3rM|rio:660F736mU",
  psllw_2 =    "rmo:660FF1rM|rio:660F716mU",
  psrad_2 =    "rmo:660FE2rM|rio:660F724mU",
  psraw_2 =    "rmo:660FE1rM|rio:660F714mU",
  psrld_2 =    "rmo:660FD2rM|rio:660F722mU",
  psrldq_2 =    "rio:660F733mU",
  psrlq_2 =    "rmo:660FD3rM|rio:660F732mU",
  psrlw_2 =    "rmo:660FD1rM|rio:660F712mU",
  psubb_2 =    "rmo:660FF8rM",
  psubd_2 =    "rmo:660FFArM",
  psubq_2 =    "rmo:660FFBrM",
  psubsb_2 =    "rmo:660FE8rM",
  psubsw_2 =    "rmo:660FE9rM",
  psubusb_2 =    "rmo:660FD8rM",
  psubusw_2 =    "rmo:660FD9rM",
  psubw_2 =    "rmo:660FF9rM",
  punpckhbw_2 =    "rmo:660F68rM",
  punpckhdq_2 =    "rmo:660F6ArM",
  punpckhqdq_2 = "rmo:660F6DrM",
  punpckhwd_2 =    "rmo:660F69rM",
  punpcklbw_2 =    "rmo:660F60rM",
  punpckldq_2 =    "rmo:660F62rM",
  punpcklqdq_2 = "rmo:660F6CrM",
  punpcklwd_2 =    "rmo:660F61rM",
  pxor_2 =    "rmo:660FEFrM",
  rcpps_2 =    "rmo:0F53rM",
  rcpss_2 =    "rro:F30F53rM|rx/od:",
  rsqrtps_2 =    "rmo:0F52rM",
  rsqrtss_2 =    "rmo:F30F52rM",
  sfence_0 =    "0FAEF8",
  shufpd_3 =    "rmio:660FC6rMU",
  shufps_3 =    "rmio:0FC6rMU",
  stmxcsr_1 =   "xd:0FAE3m",
  ucomisd_2 =    "rro:660F2ErM|rx/oq:",
  ucomiss_2 =    "rro:0F2ErM|rx/od:",
  unpckhpd_2 =    "rmo:660F15rM",
  unpckhps_2 =    "rmo:0F15rM",
  unpcklpd_2 =    "rmo:660F14rM",
  unpcklps_2 =    "rmo:0F14rM",
  xorpd_2 =    "rmo:660F57rM",
  xorps_2 =    "rmo:0F57rM",

  -- SSE3 ops
  fisttp_1 =    "xw:nDF1m|xd:DB1m|xq:nDD1m",
  addsubpd_2 =    "rmo:660FD0rM",
  addsubps_2 =    "rmo:F20FD0rM",
  haddpd_2 =    "rmo:660F7CrM",
  haddps_2 =    "rmo:F20F7CrM",
  hsubpd_2 =    "rmo:660F7DrM",
  hsubps_2 =    "rmo:F20F7DrM",
  lddqu_2 =    "rxo:F20FF0rM",
  movddup_2 =    "rmo:F20F12rM",
  movshdup_2 =    "rmo:F30F16rM",
  movsldup_2 =    "rmo:F30F12rM",

  -- SSSE3 ops
  pabsb_2 =    "rmo:660F381CrM",
  pabsd_2 =    "rmo:660F381ErM",
  pabsw_2 =    "rmo:660F381DrM",
  palignr_3 =    "rmio:660F3A0FrMU",
  phaddd_2 =    "rmo:660F3802rM",
  phaddsw_2 =    "rmo:660F3803rM",
  phaddw_2 =    "rmo:660F3801rM",
  phsubd_2 =    "rmo:660F3806rM",
  phsubsw_2 =    "rmo:660F3807rM",
  phsubw_2 =    "rmo:660F3805rM",
  pmaddubsw_2 =    "rmo:660F3804rM",
  pmulhrsw_2 =    "rmo:660F380BrM",
  pshufb_2 =    "rmo:660F3800rM",
  psignb_2 =    "rmo:660F3808rM",
  psignd_2 =    "rmo:660F380ArM",
  psignw_2 =    "rmo:660F3809rM",

  -- SSE4.1 ops
  blendpd_3 =    "rmio:660F3A0DrMU",
  blendps_3 =    "rmio:660F3A0CrMU",
  blendvpd_3 =    "rmRo:660F3815rM",
  blendvps_3 =    "rmRo:660F3814rM",
  dppd_3 =    "rmio:660F3A41rMU",
  dpps_3 =    "rmio:660F3A40rMU",
  extractps_3 =    "mri/do:660F3A17RmU|rri/qo:660F3A17RXmU",
  insertps_3 =    "rrio:660F3A41rMU|rxi/od:",
  movntdqa_2 =    "rmo:660F382ArM",
  mpsadbw_3 =    "rmio:660F3A42rMU",
  packusdw_2 =    "rmo:660F382BrM",
  pblendvb_3 =    "rmRo:660F3810rM",
  pblendw_3 =    "rmio:660F3A0ErMU",
  pcmpeqq_2 =    "rmo:660F3829rM",
  pextrb_3 =    "rri/do:660F3A14nRmU|rri/qo:|xri/bo:",
  pextrd_3 =    "mri/do:660F3A16RmU",
  pextrq_3 =    "mri/qo:660F3A16RmU",
  -- pextrw is SSE2, mem operand is SSE4.1 only
  phminposuw_2 = "rmo:660F3841rM",
  pinsrb_3 =    "rri/od:660F3A20nrMU|rxi/ob:",
  pinsrd_3 =    "rmi/od:660F3A22rMU",
  pinsrq_3 =    "rmi/oq:660F3A22rXMU",
  pmaxsb_2 =    "rmo:660F383CrM",
  pmaxsd_2 =    "rmo:660F383DrM",
  pmaxud_2 =    "rmo:660F383FrM",
  pmaxuw_2 =    "rmo:660F383ErM",
  pminsb_2 =    "rmo:660F3838rM",
  pminsd_2 =    "rmo:660F3839rM",
  pminud_2 =    "rmo:660F383BrM",
  pminuw_2 =    "rmo:660F383ArM",
  pmovsxbd_2 =    "rro:660F3821rM|rx/od:",
  pmovsxbq_2 =    "rro:660F3822rM|rx/ow:",
  pmovsxbw_2 =    "rro:660F3820rM|rx/oq:",
  pmovsxdq_2 =    "rro:660F3825rM|rx/oq:",
  pmovsxwd_2 =    "rro:660F3823rM|rx/oq:",
  pmovsxwq_2 =    "rro:660F3824rM|rx/od:",
  pmovzxbd_2 =    "rro:660F3831rM|rx/od:",
  pmovzxbq_2 =    "rro:660F3832rM|rx/ow:",
  pmovzxbw_2 =    "rro:660F3830rM|rx/oq:",
  pmovzxdq_2 =    "rro:660F3835rM|rx/oq:",
  pmovzxwd_2 =    "rro:660F3833rM|rx/oq:",
  pmovzxwq_2 =    "rro:660F3834rM|rx/od:",
  pmuldq_2 =    "rmo:660F3828rM",
  pmulld_2 =    "rmo:660F3840rM",
  ptest_2 =    "rmo:660F3817rM",
  roundpd_3 =    "rmio:660F3A09rMU",
  roundps_3 =    "rmio:660F3A08rMU",
  roundsd_3 =    "rrio:660F3A0BrMU|rxi/oq:",
  roundss_3 =    "rrio:660F3A0ArMU|rxi/od:",

  -- SSE4.2 ops
  crc32_2 =    "rmqd:F20F38F1rM|rm/dw:66F20F38F1rM|rm/db:F20F38F0rM|rm/qb:",
  pcmpestri_3 =    "rmio:660F3A61rMU",
  pcmpestrm_3 =    "rmio:660F3A60rMU",
  pcmpgtq_2 =    "rmo:660F3837rM",
  pcmpistri_3 =    "rmio:660F3A63rMU",
  pcmpistrm_3 =    "rmio:660F3A62rMU",
  popcnt_2 =    "rmqdw:F30FB8rM",

  -- SSE4a
  extrq_2 =    "rro:660F79rM",
  extrq_3 =    "riio:660F780mUU",
  insertq_2 =    "rro:F20F79rM",
  insertq_4 =    "rriio:F20F78rMUU",
  lzcnt_2 =    "rmqdw:F30FBDrM",
  movntsd_2 =    "xr/qo:nF20F2BRm",
  movntss_2 =    "xr/do:F30F2BRm",
  -- popcnt is also in SSE4.2
}

------------------------------------------------------------------------------

-- Arithmetic ops.
for name,n in pairs{ add = 0, ["or"] = 1, adc = 2, sbb = 3,
             ["and"] = 4, sub = 5, xor = 6, cmp = 7 } do
  local n8 = shl(n, 3)
  map_op[name.."_2"] = format(
    "mr:%02XRm|rm:%02XrM|mI1qdw:81%XmI|mS1qdw:83%XmS|Ri1qdwb:%02Xri|mi1qdwb:81%Xmi",
    1+n8, 3+n8, n, n, 5+n8, n)
end

-- Shift ops.
for name,n in pairs{ rol = 0, ror = 1, rcl = 2, rcr = 3,
             shl = 4, shr = 5,          sar = 7, sal = 4 } do
  map_op[name.."_2"] = format("m1:D1%Xm|mC1qdwb:D3%Xm|mi:C1%XmU", n, n, n)
end

-- Conditional ops.
for cc,n in pairs(map_cc) do
  map_op["j"..cc.."_1"] = format("J.:n0F8%XJ", n) -- short: 7%X
  map_op["set"..cc.."_1"] = format("mb:n0F9%X2m", n)
  map_op["cmov"..cc.."_2"] = format("rmqdw:0F4%XrM", n) -- P6+
end

-- FP arithmetic ops.
for name,n in pairs{ add = 0, mul = 1, com = 2, comp = 3,
             sub = 4, subr = 5, div = 6, divr = 7 } do
  local nc = 0xc0 + shl(n, 3)
  local nr = nc + (n < 4 and 0 or (n % 2 == 0 and 8 or -8))
  local fn = "f"..name
  map_op[fn.."_1"] = format("ff:D8%02Xr|xd:D8%Xm|xq:nDC%Xm", nc, n, n)
  if n == 2 or n == 3 then
    map_op[fn.."_2"] = format("Fff:D8%02XR|Fx2d:D8%XM|Fx2q:nDC%XM", nc, n, n)
  else
    map_op[fn.."_2"] = format("Fff:D8%02XR|fFf:DC%02Xr|Fx2d:D8%XM|Fx2q:nDC%XM", nc, nr, n, n)
    map_op[fn.."p_1"] = format("ff:DE%02Xr", nr)
    map_op[fn.."p_2"] = format("fFf:DE%02Xr", nr)
  end
  map_op["fi"..name.."_1"] = format("xd:DA%Xm|xw:nDE%Xm", n, n)
end

-- FP conditional moves.
for cc,n in pairs{ b=0, e=1, be=2, u=3, nb=4, ne=5, nbe=6, nu=7 } do
  local nc = 0xdac0 + shl(band(n, 3), 3) + shl(band(n, 4), 6)
  map_op["fcmov"..cc.."_1"] = format("ff:%04Xr", nc) -- P6+
  map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+
end

-- SSE FP arithmetic ops.
for name,n in pairs{ sqrt = 1, add = 8, mul = 9,
             sub = 12, min = 13, div = 14, max = 15 } do
  map_op[name.."ps_2"] = format("rmo:0F5%XrM", n)
  map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n)
  map_op[name.."pd_2"] = format("rmo:660F5%XrM", n)
  map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n)
end

------------------------------------------------------------------------------

-- Process pattern string.
local function dopattern(pat, args, sz, op, needrex)
  local digit, addin
  local opcode = 0
  local szov = sz
  local narg = 1
  local rex = 0

  -- Limit number of section buffer positions used by a single dasm_put().
  -- A single opcode needs a maximum of 5 positions.
  if secpos+5 > maxsecpos then wflush() end

  -- Process each character.
  for c in gmatch(pat.."|", ".") do
    if match(c, "%x") then    -- Hex digit.
      digit = byte(c) - 48
      if digit > 48 then digit = digit - 39
      elseif digit > 16 then digit = digit - 7 end
      opcode = opcode*16 + digit
      addin = nil
    elseif c == "n" then    -- Disable operand size mods for opcode.
      szov = nil
    elseif c == "X" then    -- Force REX.W.
      rex = 8
    elseif c == "r" then    -- Merge 1st operand regno. into opcode.
      addin = args[1]; opcode = opcode + (addin.reg % 8)
      if narg < 2 then narg = 2 end
    elseif c == "R" then    -- Merge 2nd operand regno. into opcode.
      addin = args[2]; opcode = opcode + (addin.reg % 8)
      narg = 3
    elseif c == "m" or c == "M" then    -- Encode ModRM/SIB.
      local s
      if addin then
    s = addin.reg
    opcode = opcode - band(s, 7)    -- Undo regno opcode merge.
      else
    s = band(opcode, 15)    -- Undo last digit.
    opcode = shr(opcode, 4)
      end
      local nn = c == "m" and 1 or 2
      local t = args[nn]
      if narg <= nn then narg = nn + 1 end
      if szov == "q" and rex == 0 then rex = rex + 8 end
      if t.reg and t.reg > 7 then rex = rex + 1 end
      if t.xreg and t.xreg > 7 then rex = rex + 2 end
      if s > 7 then rex = rex + 4 end
      if needrex then rex = rex + 16 end
      wputop(szov, opcode, rex); opcode = nil
      local imark = sub(pat, -1) -- Force a mark (ugly).
      -- Put ModRM/SIB with regno/last digit as spare.
      wputmrmsib(t, imark, s, addin and addin.vreg)
      addin = nil
    else
      if opcode then -- Flush opcode.
    if szov == "q" and rex == 0 then rex = rex + 8 end
    if needrex then rex = rex + 16 end
    if addin and addin.reg == -1 then
      wputop(szov, opcode - 7, rex)
      waction("VREG", addin.vreg); wputxb(0)
    else
      if addin and addin.reg > 7 then rex = rex + 1 end
      wputop(szov, opcode, rex)
    end
    opcode = nil
      end
      if c == "|" then break end
      if c == "o" then -- Offset (pure 32 bit displacement).
    wputdarg(args[1].disp); if narg < 2 then narg = 2 end
      elseif c == "O" then
    wputdarg(args[2].disp); narg = 3
      else
    -- Anything else is an immediate operand.
    local a = args[narg]
    narg = narg + 1
    local mode, imm = a.mode, a.imm
    if mode == "iJ" and not match("iIJ", c) then
      werror("bad operand size for label")
    end
    if c == "S" then
      wputsbarg(imm)
    elseif c == "U" then
      wputbarg(imm)
    elseif c == "W" then
      wputwarg(imm)
    elseif c == "i" or c == "I" then
      if mode == "iJ" then
        wputlabel("IMM_", imm, 1)
      elseif mode == "iI" and c == "I" then
        waction(sz == "w" and "IMM_WB" or "IMM_DB", imm)
      else
        wputszarg(sz, imm)
      end
    elseif c == "J" then
      if mode == "iPJ" then
        waction("REL_A", imm) -- !x64 (secpos)
      else
        wputlabel("REL_", imm, 2)
      end
    else
      werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'")
    end
      end
    end
  end
end

------------------------------------------------------------------------------

-- Mapping of operand modes to short names. Suppress output with '#'.
local map_modename = {
  r = "reg", R = "eax", C = "cl", x = "mem", m = "mrm", i = "imm",
  f = "stx", F = "st0", J = "lbl", ["1"] = "1",
  I = "#", S = "#", O = "#",
}

-- Return a table/string showing all possible operand modes.
local function templatehelp(template, nparams)
  if nparams == 0 then return "" end
  local t = {}
  for tm in gmatch(template, "[^%|]+") do
    local s = map_modename[sub(tm, 1, 1)]
    s = s..gsub(sub(tm, 2, nparams), ".", function(c)
      return ", "..map_modename[c]
    end)
    if not match(s, "#") then t[#t+1] = s end
  end
  return t
end

-- Match operand modes against mode match part of template.
local function matchtm(tm, args)
  for i=1,#args do
    if not match(args[i].mode, sub(tm, i, i)) then return end
  end
  return true
end

-- Handle opcodes defined with template strings.
map_op[".template__"] = function(params, template, nparams)
  if not params then return templatehelp(template, nparams) end
  local args = {}

  -- Zero-operand opcodes have no match part.
  if #params == 0 then
    dopattern(template, args, "d", params.op, nil)
    return
  end

  -- Determine common operand size (coerce undefined size) or flag as mixed.
  local sz, szmix, needrex
  for i,p in ipairs(params) do
    args[i] = parseoperand(p)
    local nsz = args[i].opsize
    if nsz then
      if sz and sz ~= nsz then szmix = true else sz = nsz end
    end
    local nrex = args[i].needrex
    if nrex ~= nil then
      if needrex == nil then
    needrex = nrex
      elseif needrex ~= nrex then
    werror("bad mix of byte-addressable registers")
      end
    end
  end

  -- Try all match:pattern pairs (separated by '|').
  local gotmatch, lastpat
  for tm in gmatch(template, "[^%|]+") do
    -- Split off size match (starts after mode match) and pattern string.
    local szm, pat = match(tm, "^(.-):(.*)$", #args+1)
    if pat == "" then pat = lastpat else lastpat = pat end
    if matchtm(tm, args) then
      local prefix = sub(szm, 1, 1)
      if prefix == "/" then -- Match both operand sizes.
    if args[1].opsize == sub(szm, 2, 2) and
       args[2].opsize == sub(szm, 3, 3) then
      dopattern(pat, args, sz, params.op, needrex) -- Process pattern.
      return
    end
      else -- Match common operand size.
    local szp = sz
    if szm == "" then szm = x64 and "qdwb" or "dwb" end -- Default sizes.
    if prefix == "1" then szp = args[1].opsize; szmix = nil
    elseif prefix == "2" then szp = args[2].opsize; szmix = nil end
    if not szmix and (prefix == "." or match(szm, szp or "#")) then
      dopattern(pat, args, szp, params.op, needrex) -- Process pattern.
      return
    end
      end
      gotmatch = true
    end
  end

  local msg = "bad operand mode"
  if gotmatch then
    if szmix then
      msg = "mixed operand size"
    else
      msg = sz and "bad operand size" or "missing operand size"
    end
  end

  werror(msg.." in `"..opmodestr(params.op, args).."'")
end

------------------------------------------------------------------------------

-- x64-specific opcode for 64 bit immediates and displacements.
if x64 then
  function map_op.mov64_2(params)
    if not params then return { "reg, imm", "reg, [disp]", "[disp], reg" } end
    if secpos+2 > maxsecpos then wflush() end
    local opcode, op64, sz, rex
    local op64 = match(params[1], "^%[%s*(.-)%s*%]$")
    if op64 then
      local a = parseoperand(params[2])
      if a.mode ~= "rmR" then werror("bad operand mode") end
      sz = a.opsize
      rex = sz == "q" and 8 or 0
      opcode = 0xa3
    else
      op64 = match(params[2], "^%[%s*(.-)%s*%]$")
      local a = parseoperand(params[1])
      if op64 then
    if a.mode ~= "rmR" then werror("bad operand mode") end
    sz = a.opsize
    rex = sz == "q" and 8 or 0
    opcode = 0xa1
      else
    if sub(a.mode, 1, 1) ~= "r" or a.opsize ~= "q" then
      werror("bad operand mode")
    end
    op64 = params[2]
    opcode = 0xb8 + band(a.reg, 7) -- !x64: no VREG support.
    rex = a.reg > 7 and 9 or 8
      end
    end
    wputop(sz, opcode, rex)
    waction("IMM_D", format("(unsigned int)(%s)", op64))
    waction("IMM_D", format("(unsigned int)((%s)>>32)", op64))
  end
end

------------------------------------------------------------------------------

-- Pseudo-opcodes for data storage.
local function op_data(params)
  if not params then return "imm..." end
  local sz = sub(params.op, 2, 2)
  if sz == "a" then sz = addrsize end
  for _,p in ipairs(params) do
    local a = parseoperand(p)
    if sub(a.mode, 1, 1) ~= "i" or (a.opsize and a.opsize ~= sz) then
      werror("bad mode or size in `"..p.."'")
    end
    if a.mode == "iJ" then
      wputlabel("IMM_", a.imm, 1)
    else
      wputszarg(sz, a.imm)
    end
    if secpos+2 > maxsecpos then wflush() end
  end
end

map_op[".byte_*"] = op_data
map_op[".sbyte_*"] = op_data
map_op[".word_*"] = op_data
map_op[".dword_*"] = op_data
map_op[".aword_*"] = op_data

------------------------------------------------------------------------------

-- Pseudo-opcode to mark the position where the action list is to be emitted.
map_op[".actionlist_1"] = function(params)
  if not params then return "cvar" end
  local name = params[1] -- No syntax check. You get to keep the pieces.
  wline(function(out) writeactions(out, name) end)
end

-- Pseudo-opcode to mark the position where the global enum is to be emitted.
map_op[".globals_1"] = function(params)
  if not params then return "prefix" end
  local prefix = params[1] -- No syntax check. You get to keep the pieces.
  wline(function(out) writeglobals(out, prefix) end)
end

-- Pseudo-opcode to mark the position where the global names are to be emitted.
map_op[".globalnames_1"] = function(params)
  if not params then return "cvar" end
  local name = params[1] -- No syntax check. You get to keep the pieces.
  wline(function(out) writeglobalnames(out, name) end)
end

-- Pseudo-opcode to mark the position where the extern names are to be emitted.
map_op[".externnames_1"] = function(params)
  if not params then return "cvar" end
  local name = params[1] -- No syntax check. You get to keep the pieces.
  wline(function(out) writeexternnames(out, name) end)
end

------------------------------------------------------------------------------

-- Label pseudo-opcode (converted from trailing colon form).
map_op[".label_2"] = function(params)
  if not params then return "[1-9] | ->global | =>pcexpr  [, addr]" end
  if secpos+2 > maxsecpos then wflush() end
  local a = parseoperand(params[1])
  local mode, imm = a.mode, a.imm
  if type(imm) == "number" and (mode == "iJ" or (imm >= 1 and imm <= 9)) then
    -- Local label (1: ... 9:) or global label (->global:).
    waction("LABEL_LG", nil, 1)
    wputxb(imm)
  elseif mode == "iJ" then
    -- PC label (=>pcexpr:).
    waction("LABEL_PC", imm)
  else
    werror("bad label definition")
  end
  -- SETLABEL must immediately follow LABEL_LG/LABEL_PC.
  local addr = params[2]
  if addr then
    local a = parseoperand(addr)
    if a.mode == "iPJ" then
      waction("SETLABEL", a.imm)
    else
      werror("bad label assignment")
    end
  end
end
map_op[".label_1"] = map_op[".label_2"]

------------------------------------------------------------------------------

-- Alignment pseudo-opcode.
map_op[".align_1"] = function(params)
  if not params then return "numpow2" end
  if secpos+1 > maxsecpos then wflush() end
  local align = tonumber(params[1]) or map_opsizenum[map_opsize[params[1]]]
  if align then
    local x = align
    -- Must be a power of 2 in the range (2 ... 256).
    for i=1,8 do
      x = x / 2
      if x == 1 then
    waction("ALIGN", nil, 1)
    wputxb(align-1) -- Action byte is 2**n-1.
    return
      end
    end
  end
  werror("bad alignment")
end

-- Spacing pseudo-opcode.
map_op[".space_2"] = function(params)
  if not params then return "num [, filler]" end
  if secpos+1 > maxsecpos then wflush() end
  waction("SPACE", params[1])
  local fill = params[2]
  if fill then
    fill = tonumber(fill)
    if not fill or fill < 0 or fill > 255 then werror("bad filler") end
  end
  wputxb(fill or 0)
end
map_op[".space_1"] = map_op[".space_2"]

------------------------------------------------------------------------------

-- Pseudo-opcode for (primitive) type definitions (map to C types).
map_op[".type_3"] = function(params, nparams)
  if not params then
    return nparams == 2 and "name, ctype" or "name, ctype, reg"
  end
  local name, ctype, reg = params[1], params[2], params[3]
  if not match(name, "^[%a_][%w_]*$") then
    werror("bad type name `"..name.."'")
  end
  local tp = map_type[name]
  if tp then
    werror("duplicate type `"..name.."'")
  end
  if reg and not map_reg_valid_base[reg] then
    werror("bad base register `"..(map_reg_rev[reg] or reg).."'")
  end
  -- Add #type to defines. A bit unclean to put it in map_archdef.
  map_archdef["#"..name] = "sizeof("..ctype..")"
  -- Add new type and emit shortcut define.
  local num = ctypenum + 1
  map_type[name] = {
    ctype = ctype,
    ctypefmt = format("Dt%X(%%s)", num),
    reg = reg,
  }
  wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype))
  ctypenum = num
end
map_op[".type_2"] = map_op[".type_3"]

-- Dump type definitions.
local function dumptypes(out, lvl)
  local t = {}
  for name in pairs(map_type) do t[#t+1] = name end
  sort(t)
  out:write("Type definitions:\n")
  for _,name in ipairs(t) do
    local tp = map_type[name]
    local reg = tp.reg and map_reg_rev[tp.reg] or ""
    out:write(format("  %-20s %-20s %s\n", name, tp.ctype, reg))
  end
  out:write("\n")
end

------------------------------------------------------------------------------

-- Set the current section.
function _M.section(num)
  waction("SECTION")
  wputxb(num)
  wflush(true) -- SECTION is a terminal action.
end

------------------------------------------------------------------------------

-- Dump architecture description.
function _M.dumparch(out)
  out:write(format("DynASM %s version %s, released %s\n\n",
    _info.arch, _info.version, _info.release))
  dumpregs(out)
  dumpactions(out)
end

-- Dump all user defined elements.
function _M.dumpdef(out, lvl)
  dumptypes(out, lvl)
  dumpglobals(out, lvl)
  dumpexterns(out, lvl)
end

------------------------------------------------------------------------------

-- Pass callbacks from/to the DynASM core.
function _M.passcb(wl, we, wf, ww)
  wline, werror, wfatal, wwarn = wl, we, wf, ww
  return wflush
end

-- Setup the arch-specific module.
function _M.setup(arch, opt)
  g_arch, g_opt = arch, opt
end

-- Merge the core maps and the arch-specific maps.
function _M.mergemaps(map_coreop, map_def)
  setmetatable(map_op, { __index = map_coreop })
  setmetatable(map_def, { __index = map_archdef })
  return map_op, map_def
end

return _M

------------------------------------------------------------------------------