#include "t_x86-64.h" #include "obj.h" #include "u_endian.h" /** Instruction operands ** * * Can be a register, a 32-bit immediate, * a memory reference [base + index * scale + disp], * or a relocatable reference to some symbol plus a displacement and maybe index*scale */ enum operkind { ONONE, OREG, OIMM, OMEM, OSYM, OSYMGOT }; enum { NOBASE = 63, NOINDEX = 63 }; typedef struct Oper { uchar t; union { struct { uchar base; }; /* OMEM */ struct { uchar cindex : 6, cshift : 2; }; /* OSYM */ }; union { struct { uchar index, shift; }; /* OMEM */ ushort con; /* OSYM */ }; union { uchar reg; /* OREG */ int disp; /* OMEM, OSYM */ int imm; /* OIMM */ }; } Oper; #define mkoper(t, ...) ((Oper){(t), __VA_ARGS__}) #define reg2oper(R) (assert((uint)(R) <= XMM15), mkoper(OREG, .reg = (R))) static Oper mkmemoper(Ref); static Oper ioper(int i) { int reg = instrtab[i].reg - 1; return reg < 0 ? mkoper(ONONE,) : reg2oper(reg); } static struct Frame { bool usebp; int stksiz; int size; int nsave; } frame; static int stackdisp(int i) { if (frame.usebp) { return i < 0 ? 8 - i : -frame.size + i; } else { /* RSP rel */ return i < 0 ? frame.size - i : -frame.stksiz + i; } } static Oper ref2oper(Ref r) { switch (r.t) { case RTMP: return ioper(r.i); case RREG: return reg2oper(r.i); case RICON: return mkoper(OIMM, .imm = r.i); case RXCON: if (contab.p[r.i].cls == KI32) return mkoper(OIMM, .imm = contab.p[r.i].i); else if (contab.p[r.i].cls == KI64) { s64int i = contab.p[r.i].i; assert(i == (int)i); return mkoper(OIMM, .imm = i); } else if (!contab.p[r.i].cls) { return mkoper(OSYM, .con = r.i, .cindex = NOINDEX); } assert(0); case RADDR: case RSTACK: return mkmemoper(r); default: assert(0); } } static void addmemoper(Oper *mem, Oper add) { assert(mem->t == OMEM); if (add.t == OIMM) { mem->disp += add.imm; } else if (add.t == OREG) { if (mem->base == NOBASE) mem->base = add.reg; else if (mem->index == NOINDEX) mem->index = add.reg; else assert(0); } } /* helpers to convert a reference to an operand of a specific kind, * with assertions to make sure nothing went wrong */ static inline Oper mkregoper(Ref r) { assert(r.t == RREG || (r.t == RTMP && ioper(r.i).t == OREG)); return r.t == RREG ? reg2oper(r.i) : ioper(r.i); } static inline Oper mkimmoper(Ref r) { assert(iscon(r) && concls(r) == KI32); return mkoper(OIMM, .imm = intconval(r)); } #define ismemref(ref) ((ref).t == RTMP && ioper((ref).i).t == OMEM) #define isregref(ref) ((ref).t == RREG || ((ref).t == RTMP && ioper((ref).i).t == OREG)) static inline Oper mkimmregoper(Ref r) { assert(isregref(r) || (iscon(r) && concls(r) == KI32)); return ref2oper(r); } static inline Oper mkdatregoper(Ref r) { assert(isregref(r) || (r.t == RXCON && contab.p[r.i].deref)); return ref2oper(r); } static inline Oper mkimmdatregoper(Ref r) { assert(isregref(r) || r.t == RICON || (r.t == RXCON && (contab.p[r.i].cls == KI32 || contab.p[r.i].deref))); return ref2oper(r); } static Oper mkmemoper(Ref r) { if (r.t == RTMP) { Oper wop = ioper(r.i); if (wop.t == OMEM) return wop; assert(wop.t == OREG); return mkoper(OMEM, .base = wop.reg, .index = NOINDEX); } else if (r.t == RSTACK) { return mkoper(OMEM, .base = frame.usebp ? RBP : RSP, .index = NOINDEX, .disp = stackdisp(r.i)); } else if (r.t == RADDR) { const IRAddr *addr = &addrtab.p[r.i]; assert(addr->shift <= 3); if (isaddrcon(addr->base,0)) { return mkoper(OSYM, .con = addr->base.i, .cindex = addr->index.bits ? mkregoper(addr->index).reg : NOINDEX, .cshift = addr->shift, .disp = addr->disp); } else if (isintcon(addr->base)) { assert(!addr->disp); return mkoper(OMEM, .base = NOBASE, .index = addr->index.bits ? mkregoper(addr->index).reg : NOINDEX, .disp = intconval(addr->base), .shift = addr->shift); } else if (isaddrcon(addr->index,0)) { assert(!addr->shift); return mkoper(OSYM, .con = addr->index.i, .cindex = addr->base.bits ? mkregoper(addr->base).reg : NOINDEX, .disp = addr->disp); } int base = NOBASE, index = NOINDEX, disp = addr->disp; if (addr->base.t == RREG) base = addr->base.i; else if (addr->base.t == RSTACK) { base = frame.usebp ? RBP : RSP; disp += stackdisp(addr->base.i); } if (addr->index.bits) { assert(addr->index.t == RREG); index = addr->index.i; } return mkoper(OMEM, .base = base, .index = index, .disp = disp, .shift = addr->shift); } else if (r.t == RXCON) { assert(!contab.p[r.i].cls); return mkoper(OSYM, .con = r.i, .cindex = NOINDEX); } else { return mkoper(OMEM, .base = isregref(r) ? ref2oper(r).reg : NOBASE, .index = NOINDEX, .disp = isregref(r) ? 0 : mkimmoper(r).imm); } } static bool opereql(Oper a, Oper b) { if (a.t != b.t) return 0; switch (a.t) { case OREG: return a.reg == b.reg; case OIMM: return a.imm == b.imm; case OMEM: return a.base == b.base && a.index == b.index && a.shift == b.shift && a.disp == b.disp; case OSYM: case OSYMGOT: return a.cindex == b.cindex && a.cshift == b.cshift && a.con == b.con && a.disp == b.disp; default: assert(0); } } /** Instruction description tables ** * * Each instruction is a list of descs, and the first one that matches * is emitted. Each entry has a size pattern field, which is a bitset * of the sizes (in bytes) that the entry matches, and 2 operand patterns, * which describe the operands that can match (for example, PRAX matches * a RAX register operand, PGPR matches any integer register, I8 matches * an immediate operand between [-128,127]) The rest of the fields describe * the instruction's encoding. * (reference: https://www.felixcloutier.com/x86/ & https://wiki.osdev.org/X86-64_Instruction_Encoding ) */ enum operpat { PNONE, PRAX, PRCX, PGPR, PFPR, P1, /* imm = 1 */ PN1, /* imm = -1 */ PI8, PU8, PI16, PU16, PI32, PU32, PMEM, PSYM, }; enum operenc { EN_R = 1, /* reg with /r */ EN_RR, /* reg, reg with /r */ EN_RRX, /* reg, reg with /r (inverted) */ EN_MR, /* mem, reg with /r */ EN_RM, /* reg, mem with /r */ EN_M, /* mem */ EN_RI8, /* reg, imm8 with /0 */ EN_RI32, /* reg, imm32 with /0 */ EN_MI8, /* mem, imm8 with /x */ EN_MI16, /* mem, imm16 with /x */ EN_MI32, /* mem, imm32 with /x */ EN_O, /* reg with op + reg */ EN_OI, /* reg, imm32 with op + reg */ EN_I8, /* imm8 */ EN_I32, /* imm32 */ EN_R32, /* rel32 */ NOPERENC, }; typedef struct EncDesc { uchar psiz; /* subset of {1,2,4,8} */ uchar ptd, pts; /* bitsets of enum operpat */ uchar nopc; /* countof opc */ const char opc[8]; /* opcode bytes */ uchar operenc; /* enum operenc */ uchar ext; /* ModR/M.reg opc extension */ bool r8; /* uses 8bit register */ bool norexw; /* do not use REX.W even if size is 64 bits */ } EncDesc; /* match operand against pattern */ static inline bool opermatch(enum operpat pat, Oper oper) { switch (pat) { case PNONE: return !oper.t; case PRAX: return oper.t == OREG && oper.reg == RAX; case PRCX: return oper.t == OREG && oper.reg == RCX; case PGPR: return oper.t == OREG && oper.reg <= R15; case PFPR: return oper.t == OREG && oper.reg >= XMM0; case P1: return oper.t == OIMM && oper.imm == 1; case PN1: return oper.t == OIMM && oper.imm == -1; case PI8: return oper.t == OIMM && (schar)oper.imm == oper.imm; case PU8: return oper.t == OIMM && (uchar)oper.imm == oper.imm; case PI16: return oper.t == OIMM && (short)oper.imm == oper.imm; case PU16: return oper.t == OIMM && (ushort)oper.imm == oper.imm; case PI32: return oper.t == OIMM; case PU32: return oper.t == OIMM && oper.imm >= 0; case PMEM: return in_range(oper.t, OMEM, OSYMGOT); case PSYM: return oper.t == OSYM || oper.t == OSYMGOT; } assert(0); } /* code output helpers */ #define B(b) (*(*pcode)++ = (b)) #define D(xs, N) (memcpy(*pcode, (xs), (N)), (*pcode) += (N)) #define I16(w) (wr16le(*pcode, (w)), *pcode += 2) #define I32(w) (wr32le(*pcode, (w)), *pcode += 4) #define DS(S) D(S, sizeof S - 1) static internstr curfnsym; static uchar *fnstart; /* Given an instruction description table, find the first entry that matches * the operands (where dst, src are the operands in intel syntax order) and encode it */ static void encode(uchar **pcode, const EncDesc *tab, int ntab, enum irclass k, Oper dst, Oper src) { const uchar *opc; int nopc; Oper mem; enum reg reg; const EncDesc *en = NULL; for (int i = 0; i < ntab; ++i) { if ((tab[i].psiz & cls2siz[k]) && opermatch(tab[i].ptd, dst) && opermatch(tab[i].pts, src)) { en = &tab[i]; break; } } assert(en && "no match for instr"); if (en->ptd == PFPR) dst.reg &= 15; if (en->pts == PFPR) src.reg &= 15; opc = (uchar *)en->opc; nopc = en->nopc; /* mandatory prefixes go before REX */ if (*opc == 0x66 || *opc == 0xF2 || *opc == 0xF3) B(*opc++), --nopc; int rex = in_range(k, KI64, KPTR) << 3; /* REX.W */ if (en->norexw) rex = 0; switch (en->operenc) { case EN_RR: /* mod = 11; reg = dst; rm = src */ rex |= (dst.reg >> 3) << 2; /* REX.R */ rex |= (src.reg >> 3) << 0; /* REX.B */ if (rex) B(0x40 | rex); else if (en->r8 && in_range(src.reg, RSP, RDI)) { /* /r8 needs REX to encode SP,BP,SI,DI (otherwise -> AH..BH) */ B(0x40); } D(opc, nopc); B(0300 | (dst.reg & 7) << 3 | (src.reg & 7)); break; case EN_RRX: /* mod = 11; reg = src; rm = dst */ rex |= (src.reg >> 3) << 2; /* REX.R */ rex |= (dst.reg >> 3) << 0; /* REX.B */ if (rex) B(0x40 | rex); else if (en->r8 && in_range(dst.reg, RSP, RDI)) { /* /r8 needs REX to encode SP,BP,SI,DI (otherwise -> AH..BH) */ B(0x40); } D(opc, nopc); B(0300 | (src.reg & 7) << 3 | (dst.reg & 7)); break; case EN_MR: mem = dst; reg = src.reg; goto Mem; case EN_RM: mem = src; reg = dst.reg; goto Mem; case EN_M: case EN_MI8: case EN_MI16: case EN_MI32: mem = dst; reg = en->ext; Mem: if (mem.t == OMEM) { if (mem.base != NOBASE) rex |= mem.base >> 3; /* REX.B */ if (mem.index != NOINDEX) rex |= mem.index >> 3 << 1; /* REX.X */ } else { if (mem.cindex != NOINDEX) rex |= mem.cindex >> 3 << 1; /* REX.X */ } if (en->operenc != EN_M) rex |= (reg >> 3) << 2; /* REX.R */ if (rex) B(0x40 | rex); else if (en->r8 && in_range(reg, RSP, RDI)) B(0x40); if (mem.t == OSYM || mem.t == OSYMGOT) { D(opc, nopc); if (mem.cindex == NOINDEX) { /* %rip(var) */ static uchar offs[NOPERENC] = { [EN_MI8] = 1, [EN_MI16] = 2, [EN_MI32] = 4 }; uint addr; int disp = mem.disp - 4 - offs[en->operenc]; internstr sym = xcon2sym(mem.con); B(/*mod 0*/ (reg & 7) << 3 | RBP); if (objhassym(sym, &addr) == Stext && mem.t != OSYMGOT) { I32(addr - (*pcode - objout.textbegin) + disp); } else { enum relockind r = REL_PCREL32; if (mem.t == OSYMGOT) r = rex ? REL_GOTPCRELX_REX : REL_GOTPCRELX; objrelocxcon(mem.con, r, Stext, *pcode - objout.textbegin, disp); I32(0); } } else { /* var(,%reg,shift) */ assert(!ccopt.pic && !ccopt.pie && "cannot encode [RIP-rel + REG] for position independent"); B(/*mod 0*/ (reg & 7) << 3 | RSP); B(mem.cshift << 6 | ((mem.cindex & 7) << 3) | RBP); /* SIB [index*s + disp32] */ objrelocxcon(mem.con, REL_ABS32S, Stext, *pcode - objout.textbegin, mem.disp); I32(0); } } else { int mod; bool sib = 0; if (mem.base != NOBASE) { if (mem.index == NOINDEX && mem.shift == 0) sib = 0; else sib = 1; mod = !mem.disp ? 0 /* disp = 0 -> mod = 00 */ : (schar)mem.disp == mem.disp ? 1 /* disp8 -> mod = 01 */ : 2; /* disp32 -> mod = 10 */ if (mod == 0 && (mem.base == RBP || mem.base == R13)) mod = 1; if (mem.base == RSP || mem.base == R12) sib = 1; } else { /* [disp + (index*s)] */ sib = 1; mem.base = RBP; mod = 0; assert(mem.index != RSP); } D(opc, nopc); B(mod << 6 | (reg & 7) << 3 | (sib ? 4 : (mem.base & 7))); if (sib) { if (mem.index == NOINDEX) mem.index = RSP; B(mem.shift << 6 | (mem.index & 7) << 3 | (mem.base & 7)); } if (mod == 1) B(mem.disp); else if (mod == 2 || (mod == 0 && mem.base == RBP/*RIP-rel*/) || (mod == 0 && sib && mem.base == RBP/*absolute*/)) { I32(mem.disp); } } if (en->operenc == EN_MI8) B(src.imm); if (en->operenc == EN_MI16) I16(src.imm); if (en->operenc == EN_MI32) I32(src.imm); break; case EN_R: case EN_RI32: case EN_RI8: rex |= (dst.reg >> 3) << 0; /* REX.B */ if (rex) B(0x40 | rex); else if (en->r8 && in_range(dst.reg, RSP, RDI)) { /* /r8 needs REX to encode SP,BP,SI,DI (otherwise -> AH..BH) */ B(0x40); } D(opc, nopc); B(0300 | en->ext << 3 | (dst.reg & 7)); if (en->operenc == EN_RI32) I32(src.imm); else if (en->operenc == EN_RI8) B(src.imm); break; case EN_O: case EN_OI: rex |= (dst.reg >> 3) << 0; /* REX.B */ if (rex) B(0x40 | rex); D(opc, nopc - 1); B(opc[nopc-1] + (dst.reg & 7)); if (en->operenc == EN_OI) I32(src.imm); break; case EN_I8: if (rex) B(0x40 | rex); D(opc, nopc); B(src.imm); break; case EN_I32: if (rex) B(0x40 | rex); D(opc, nopc); I32(src.imm); break; case EN_R32: if (rex) B(0x40 | rex); D(opc, nopc); assert(dst.t == OSYM); internstr sym = xcon2sym(dst.con); uint addr; if (sym == curfnsym) { I32(fnstart - *pcode - 4); } else if (objhassym(sym, &addr) == Stext) { I32(addr - (*pcode - objout.textbegin) - 4); } else { enum relockind r = (ccopt.pie|ccopt.pic) ? REL_PLT32 : REL_PCREL32; objrelocxcon(dst.con, r, Stext, *pcode - objout.textbegin, -4); I32(0); } break; } } #define DEFINSTR1(X, ...) \ static void \ X(uchar **pcode, enum irclass k, Oper oper) \ { \ static const EncDesc tab[] = { __VA_ARGS__ }; \ encode(pcode, tab, countof(tab), k, oper, mkoper(0,)); \ } #define DEFINSTR2(X, ...) \ static void \ X(uchar **pcode, enum irclass k, Oper dst, Oper src) \ { \ static const EncDesc tab[] = { __VA_ARGS__ }; \ encode(pcode, tab, countof(tab), k, dst, src); \ } #define O(s) (sizeof s)-1,s DEFINSTR2(Xmovb, {-1, PMEM, PGPR, O("\x88"), EN_MR, .r8=1}, /* MOV m8, r8 */ {-1, PMEM, PI32, O("\xC6"), EN_MI8, .r8=1}, /* MOV m8, imm8 */ ) DEFINSTR2(Xmovw, {-1, PMEM, PGPR, O("\x66\x89"), EN_MR}, /* MOV m16, r16 */ {-1, PMEM, PI32, O("\x66\xC7"), EN_MI16}, /* MOV m16, imm16 */ ) static void Xmov(uchar **pcode, enum irclass k, Oper dst, Oper src) { static const EncDesc all[] = { {4 , PGPR, PI32, O("\xB8"), EN_OI}, /* MOV r32, imm */ {4|8, PGPR, PGPR, O("\x8B"), EN_RR}, /* MOV r32/64, r32/64 */ {4|8, PMEM, PGPR, O("\x89"), EN_MR}, /* MOV m32/64, r32/64 */ {4|8, PGPR, PMEM, O("\x8B"), EN_RM}, /* MOV r32/64, m32/64 */ {4|8, PMEM, PI32, O("\xC7"), EN_MI32}, /* MOV m32/64, imm */ { 8, PGPR, PU32, O("\xB8"), EN_OI, .norexw=1}, /* MOV r64, uimm */ { 8, PGPR, PI32, O("\xC7"), EN_RI32}, /* MOV r64, imm */ {4 , PFPR, PFPR, O("\x0F\x28"), EN_RR}, /* MOVPS xmm, xmm */ {4 , PFPR, PMEM, O("\xF3\x0F\x10"), EN_RM}, /* MOVSS xmm, m32 */ {4 , PMEM, PFPR, O("\xF3\x0F\x11"), EN_MR}, /* MOVSS m32, xmm */ {8 , PFPR, PFPR, O("\x0F\x28"), EN_RR}, /* MOVPS xmm, xmm */ {8 , PFPR, PMEM, O("\xF2\x0F\x10"), EN_RM}, /* MOVSD xmm, m64 */ {8 , PMEM, PFPR, O("\xF2\x0F\x11"), EN_MR}, /* MOVSS m64, xmm */ {4|8, PFPR, PGPR, O("\x66\x0F\x6E"), EN_RR}, /* MOVD/Q xmm, r64/32 */ {4|8, PGPR, PFPR, O("\x66\x0F\x7E"), EN_RRX}, /* MOVD/Q r64/32, xmm */ }; static const uchar k2off[] = { [KI32] = 0, [KI64] = 1, [KPTR] = 1, [KF32] = 7, [KF64] = 10, }; if (kisflt(k) && src.t == OIMM && src.imm == 0) { /* special case for storing zero float : use integer instruction with zero immediate */ k = KI32 + (k - KF32); } encode(pcode, all + k2off[k], countof(all) - k2off[k], k, dst, src); } DEFINSTR2(Xmovsxl, {8, PGPR, PMEM, O("\x63"), EN_RM}, /* MOVSXD r64, m32 */ {8, PGPR, PGPR, O("\x63"), EN_RR}, /* MOVSXD r64, r32 */ {4, PGPR, PMEM, O("\x8B"), EN_RM}, /* MOV r32, m32 */ {4, PGPR, PGPR, O("\x8B"), EN_RR}, /* MOV r32, r32 */ ) DEFINSTR2(Xmovsxw, {4|8, PGPR, PMEM, O("\x0F\xBF"), EN_RM}, /* MOVSX r64, m16 */ {4|8, PGPR, PGPR, O("\x0F\xBF"), EN_RR}, /* MOVSX r64, r16 */ ) DEFINSTR2(Xmovsxb, {4|8, PGPR, PMEM, O("\x0F\xBE"), EN_RM}, /* MOVSX r64, m8 */ {4|8, PGPR, PGPR, O("\x0F\xBE"), EN_RR, .r8=1}, /* MOVSX r64, r8 */ ) DEFINSTR2(Xmovzxw, {4|8, PGPR, PMEM, O("\x0F\xB7"), EN_RM}, /* MOVZX r64, m16 */ {4|8, PGPR, PGPR, O("\x0F\xB7"), EN_RR}, /* MOVZX r64, r16 */ ) DEFINSTR2(Xmovzxb, {4|8, PGPR, PMEM, O("\x0F\xB6"), EN_RM}, /* MOVZX r64, m8 */ {4|8, PGPR, PGPR, O("\x0F\xB6"), EN_RR, .r8=1}, /* MOVZX r64, r8 */ ) DEFINSTR2(Xmovaps, {-1, PMEM, PFPR, O("\x0F\x29"), EN_MR}, /* MOVAPS mem, xmm */ ) DEFINSTR2(Xxchg, {4|8, PGPR, PGPR, O("\x87"), EN_RR}, /* XCHG r32/64, r32/64 */ {4|8, PGPR, PMEM, O("\x87"), EN_RM}, /* XCHG r32/64, m32/64 */ {4|8, PMEM, PGPR, O("\x87"), EN_MR}, /* XCHG r32/64, m32/64 */ ) DEFINSTR2(Xlea, {4|8, PGPR, PMEM, O("\x8D"), EN_RM}, /* LEA r32/64,m32/64 */ { 8, PGPR, PSYM, O("\x8D"), EN_RM}, /* LEA r32/64,rel32 */ ) DEFINSTR2(Xadd, {4|8, PGPR, PGPR, O("\x03"), EN_RR}, /* ADD r32/64, r32/64 */ {4|8, PGPR, P1, O("\xFF"), EN_R, .ext=0}, /* INC r32/64 */ {4|8, PGPR, PN1, O("\xFF"), EN_R, .ext=1}, /* DEC r32/64 */ {4|8, PGPR, PI8, O("\x83"), EN_RI8}, /* ADD r32/64, imm8 */ {4|8, PRAX, PI32, O("\x05"), EN_I32}, /* ADD eax/rax, imm */ {4|8, PGPR, PI32, O("\x81"), EN_RI32}, /* ADD r32/64, imm */ { 8, PGPR, PMEM, O("\x03"), EN_RM}, /* ADD r64, m64 */ ) DEFINSTR2(Xaddf, {4, PFPR, PFPR, O("\xF3\x0F\x58"), EN_RR}, /* ADDSS xmm, xmm */ {8, PFPR, PFPR, O("\xF2\x0F\x58"), EN_RR}, /* ADDSD xmm, xmm */ {4, PFPR, PMEM, O("\xF3\x0F\x58"), EN_RM}, /* ADDSS xmm, m32 */ {8, PFPR, PMEM, O("\xF2\x0F\x58"), EN_RM}, /* ADDSD xmm, m64 */ ) DEFINSTR2(Xsub, {4|8, PGPR, PGPR, O("\x2B"), EN_RR}, /* SUB r32/64, r32/64 */ {4|8, PGPR, P1, O("\xFF"), EN_R, .ext=1}, /* DEC r32/64 */ {4|8, PGPR, PN1, O("\xFF"), EN_R, .ext=0}, /* INC r32/64 */ {4|8, PGPR, PI8, O("\x83"), EN_RI8, .ext=5}, /* SUB r32/64, imm8 */ {4|8, PRAX, PI32, O("\x2D"), EN_I32}, /* SUB eax/rax, imm */ {4|8, PGPR, PI32, O("\x81"), EN_RI32, .ext=5}, /* SUB r32/64, imm */ { 8, PGPR, PMEM, O("\x2B"), EN_RM}, /* SUB r64, m64 */ ) DEFINSTR2(Xsubf, {4, PFPR, PFPR, O("\xF3\x0F\x5C"), EN_RR}, /* SUBSS xmm, xmm */ {8, PFPR, PFPR, O("\xF2\x0F\x5C"), EN_RR}, /* SUBSD xmm, xmm */ {4, PFPR, PMEM, O("\xF3\x0F\x5C"), EN_RM}, /* SUBSS xmm, m32 */ {8, PFPR, PMEM, O("\xF2\x0F\x5C"), EN_RM}, /* SUBSD xmm, m64 */ ) DEFINSTR2(Xmulf, {4, PFPR, PFPR, O("\xF3\x0F\x59"), EN_RR}, /* MULSS xmm, xmm */ {8, PFPR, PFPR, O("\xF2\x0F\x59"), EN_RR}, /* MULSD xmm, xmm */ {4, PFPR, PMEM, O("\xF3\x0F\x59"), EN_RM}, /* MULSS xmm, m32 */ {8, PFPR, PMEM, O("\xF2\x0F\x59"), EN_RM}, /* MULSD xmm, m64 */ ) DEFINSTR2(Xdivf, {4, PFPR, PFPR, O("\xF3\x0F\x5E"), EN_RR}, /* DIVSS xmm, xmm */ {8, PFPR, PFPR, O("\xF2\x0F\x5E"), EN_RR}, /* DIVSD xmm, xmm */ {4, PFPR, PMEM, O("\xF3\x0F\x5E"), EN_RM}, /* DIVSS xmm, m32 */ {8, PFPR, PMEM, O("\xF2\x0F\x5E"), EN_RM}, /* DIVSD xmm, m64 */ ) DEFINSTR2(Xand, {4|8, PGPR, PGPR, O("\x23"), EN_RR}, /* AND r32/64, r32/64 */ {4|8, PGPR, PI8, O("\x83"), EN_RI8, .ext=4}, /* AND r32/64, imm8 */ {4|8, PRAX, PI32, O("\x25"), EN_I32}, /* AND eax/rax, imm */ {4|8, PGPR, PI32, O("\x81"), EN_RI32, .ext=4}, /* AND r32/64, imm */ { 8, PGPR, PMEM, O("\x23"), EN_RM}, /* AND r64, m64 */ ) DEFINSTR2(Xior, {4|8, PGPR, PGPR, O("\x0B"), EN_RR}, /* OR r32/64, r32/64 */ {4|8, PGPR, PI8, O("\x83"), EN_RI8, .ext=1}, /* OR r32/64, imm8 */ {4|8, PRAX, PI32, O("\x0D"), EN_I32}, /* OR eax/rax, imm */ {4|8, PGPR, PI32, O("\x81"), EN_RI32, .ext=1}, /* OR r32/64, imm */ { 8, PGPR, PMEM, O("\x0B"), EN_RM}, /* OR r64, m64 */ {4|8, PFPR, PFPR, O("\x0F\x57"), EN_RR}, /* ORPS xmm, xmm */ ) DEFINSTR2(Xxor, {4|8, PGPR, PGPR, O("\x33"), EN_RR}, /* XOR r32/64, r32/64 */ {4|8, PGPR, PI8, O("\x83"), EN_RI8, .ext=6}, /* XOR r32/64, imm8 */ {4|8, PRAX, PI32, O("\x35"), EN_I32}, /* XOR eax/rax, imm */ {4|8, PGPR, PI32, O("\x81"), EN_RI32, .ext=6}, /* XOR r32/64, imm */ { 8, PGPR, PMEM, O("\x33"), EN_RM}, /* XOR r64, m64 */ {4|8, PFPR, PFPR, O("\x0F\x57"), EN_RR}, /* XORPS xmm, xmm */ {4|8, PFPR, PMEM, O("\x0F\x57"), EN_RM}, /* XORPS xmm, m128 */ ) DEFINSTR2(Xshl, {4|8, PGPR, P1, O("\xD1"), EN_R, .ext=4}, /* SHL r32/64, 1 */ {4|8, PGPR, PI32, O("\xC1"), EN_RI8, .ext=4}, /* SHL r32/64, imm */ {4|8, PGPR, PRCX, O("\xD3"), EN_R, .ext=4}, /* SHL r32/64, CL */ ) DEFINSTR2(Xsar, {4|8, PGPR, P1, O("\xD1"), EN_R, .ext=7}, /* SAR r32/64, 1 */ {4|8, PGPR, PI32, O("\xC1"), EN_RI8, .ext=7}, /* SAR r32/64, imm */ {4|8, PGPR, PRCX, O("\xD3"), EN_R, .ext=7}, /* SAR r32/64, CL */ ) DEFINSTR2(Xrolw, {-1, PGPR, PI8, O("\x66\xC1"), EN_RI8}, /* ROL r16, imm */ ) DEFINSTR2(Xshr, {4|8, PGPR, P1, O("\xD1"), EN_R, .ext=5}, /* SHR r32/64, 1 */ {4|8, PGPR, PI32, O("\xC1"), EN_RI8, .ext=5}, /* SHR r32/64, imm */ {4|8, PGPR, PRCX, O("\xD3"), EN_R, .ext=5}, /* SHR r32/64, CL */ ) DEFINSTR2(Xcvtss2sd, {-1, PFPR, PFPR, O("\xF3\x0F\x5A"), EN_RR}, /* CVTSS2SD xmm, xmm */ {-1, PFPR, PMEM, O("\xF3\x0F\x5A"), EN_RM}, /* CVTSS2SD xmm, m32/64 */ ) DEFINSTR2(Xcvtsd2ss, {-1, PFPR, PFPR, O("\xF2\x0F\x5A"), EN_RR}, /* CVTSD2SS xmm, xmm */ {-1, PFPR, PMEM, O("\xF2\x0F\x5A"), EN_RM}, /* CVTSD2SS xmm, m32/64 */ ) DEFINSTR2(Xcvtsi2ss, {-1, PFPR, PGPR, O("\xF3\x0F\x2A"), EN_RR}, /* CVTSI2SS xmm, r32/64 */ {-1, PFPR, PMEM, O("\xF3\x0F\x2A"), EN_RM}, /* CVTSI2SS xmm, m32/64 */ ) DEFINSTR2(Xcvtsi2sd, {-1, PFPR, PGPR, O("\xF2\x0F\x2A"), EN_RR}, /* CVTSI2SD xmm, r32/64 */ {-1, PFPR, PMEM, O("\xF2\x0F\x2A"), EN_RM}, /* CVTSI2SD xmm, m32/64 */ ) DEFINSTR2(Xcvttss2si, {-1, PGPR, PFPR, O("\xF3\x0F\x2C"), EN_RR}, /* CVTTSS2SI r32/64, xmm */ {-1, PGPR, PMEM, O("\xF3\x0F\x2C"), EN_RM}, /* CVTTSS2SI r32/64, m32 */ ) DEFINSTR2(Xcvttsd2si, {-1, PGPR, PFPR, O("\xF2\x0F\x2C"), EN_RR}, /* CVTTSD2SI r32/64, xmm */ {-1, PGPR, PMEM, O("\xF2\x0F\x2C"), EN_RM}, /* CVTTSD2SI r32/64, m32 */ ) DEFINSTR1(Xneg, {4|8, PGPR, 0, O("\xF7"), EN_R, .ext=3} /* NEG r32/64 */ ) DEFINSTR1(Xnot, {4|8, PGPR, 0, O("\xF7"), EN_R, .ext=2} /* NOT r32/64 */ ) DEFINSTR1(Xidiv, {4|8, PGPR, 0, O("\xF7"), EN_R, .ext=7}, /* IDIV r32/64 */ {4|8, PMEM, 0, O("\xF7"), EN_M, .ext=7}, /* IDIV m32/64 */ ) DEFINSTR1(Xdiv, {4|8, PGPR, 0, O("\xF7"), EN_R, .ext=6}, /* DIV r32/64 */ {4|8, PMEM, 0, O("\xF7"), EN_M, .ext=6}, /* DIV m32/64 */ ) DEFINSTR1(Xbswap, {4|8, PGPR, 0, O("\x0F\xC8"), EN_O}, /* BSWAP r32/64 */ ) DEFINSTR1(Xcall, {-1, PSYM, 0, O("\xE8"), EN_R32, .norexw=1}, /* CALL rel32 */ {-1, PGPR, 0, O("\xFF"), EN_R, .ext=2, .norexw=1}, /* CALL r64 */ {-1, PMEM, 0, O("\xFF"), EN_M, .ext=2, .norexw=1}, /* CALL m64 */ ) DEFINSTR2(Xcmp, {4|8, PGPR, PGPR, O("\x3B"), EN_RR}, /* CMP r32/64, r32/64 */ {4|8, PGPR, PI8, O("\x83"), EN_RI8, .ext=7}, /* CMP r32/64, imm8 */ {4|8, PRAX, PI32, O("\x3D"), EN_I32}, /* CMP eax/rax, imm */ {4|8, PGPR, PI32, O("\x81"), EN_RI32, .ext=7}, /* CMP r32/64, imm */ { 8, PGPR, PMEM, O("\x3B"), EN_RM}, /* CMP r64, m64 */ {4 , PFPR, PFPR, O("\x0F\x2E"), EN_RR}, /* UCOMISS xmm, xmm */ {4 , PFPR, PMEM, O("\x0F\x2E"), EN_RM}, /* UCOMISS xmm, m32 */ { 8, PFPR, PFPR, O("\x66\x0F\x2E"), EN_RR}, /* UCOMISD xmm, xmm */ { 8, PFPR, PMEM, O("\x66\x0F\x2E"), EN_RM}, /* UCOMISD xmm, m64 */ ) DEFINSTR2(Xtest, {4|8, PRAX, PI8, O("\xA8"), EN_I8, .norexw=1}, /* TEST AL, imm8 */ {4, PRAX, PI32, O("\xA9"), EN_I32}, /* TEST EAX, imm32 */ { 8, PRAX, PU32, O("\xA9"), EN_I32, .norexw=1}, /* TEST EAX, imm32 */ { 8, PRAX, PI32, O("\xA9"), EN_I32}, /* TEST RAX, imm32 */ {4|8, PGPR, PI8, O("\xF6"), EN_RI8, .r8=1,.norexw=1}, /* TEST r8, imm8 */ {4|8, PGPR, PI32, O("\xF7"), EN_RI32, .ext=0}, /* TEST r32/64, imm32 */ {4|8, PGPR, PGPR, O("\x85"), EN_RR}, /* TEST r32/64, r32/64 */ {4|8, PGPR, PMEM, O("\x85"), EN_RM}, /* TEST r32/64, m32/64 */ ) DEFINSTR2(Ximul2, {4|8, PGPR, PGPR, O("\x0F\xAF"), EN_RR}, /* IMUL r32/64, r32/64 */ {4|8, PGPR, PMEM, O("\x0F\xAF"), EN_RM}, /* IMUL r32/64, m32/64 */ ) static const EncDesc imul3_imm8tab[] = { {4|8, PGPR, PGPR, O("\x6B"), EN_RR}, /* IMUL r32/64, r32/64, (imm8) */ {4|8, PGPR, PMEM, O("\x6B"), EN_RM}, /* IMUL r32/64, m32/64, (imm8) */ }, imul3_imm32tab[] = { {4|8, PGPR, PGPR, O("\x69"), EN_RR}, /* IMUL r32/64, r32/64, (imm32) */ {4|8, PGPR, PMEM, O("\x69"), EN_RM}, /* IMUL r32/64, m32/64, (imm32) */ }; #undef O static void Ximul(uchar **pcode, enum irclass k, Oper dst, Oper s1, Oper s2) { if (opereql(dst, s1) && s2.t != OIMM) { Ximul2(pcode, k, dst, s2); return; } assert(s2.t == OIMM); if (-128 <= s2.imm && s2.imm < 128) { encode(pcode, imul3_imm8tab, countof(imul3_imm8tab), k, dst, s1); B(s2.imm); } else { encode(pcode, imul3_imm32tab, countof(imul3_imm32tab), k, dst, s1); I32(s2.imm); } } enum cc { CCO = 0x0, /* OF = 1*/ CCNO = 0x1, /* OF = 0*/ CCB = 0x2, CCC = 0x2, CCNAE = 0x2, /* below; CF = 1; not above or equal */ CCAE = 0x3, CCNB = 0x3, CCNC = 0x3, /* above or equal; not below; CF = 0 */ CCE = 0x4, CCZ = 0x4, /* equal; ZF = 1 */ CCNE = 0x5, CCNZ = 0x5, /* not equal; ZF = 0 */ CCBE = 0x6, CCNA = 0x6, /* below or equal; not above; CF=1 or ZF=1 */ CCA = 0x7, CCNBE = 0x7, /* above; not below or equal; CF=0 and ZF=0 */ CCS = 0x8, /* ZS = 1; negative */ CCNS = 0x9, /* ZS = 0; non-negative */ CCP = 0xA, CCPE = 0xA, /* PF = 1; parity even */ CCNP = 0xB, CCPO = 0xB, /* PF = 0; parity odd */ CCL = 0xC, CCNGE = 0xC, /* lower; not greater or equal; SF != OF */ CCGE = 0xD, CCNL = 0xD, /* greater or equal; not lower; SF == OF */ CCLE = 0xE, CCNG = 0xE, /* less or equal; not greater; ZF=1 or SF != OF */ CCG = 0xF, CCNLE = 0xF, /* greater; not less or equal; ZF=0 and SF = OF*/ ALWAYS, }; /* maps blk -> address when resolved; or to linked list of jump displacement * relocations */ static struct BlkAddr { bool resolved; union { uint addr; uint relreloc; }; } *blkaddr; static void Xjcc(uchar **pcode, enum cc cc, Block *dst) { int disp, insaddr = *pcode - objout.textbegin; bool rel8 = 0; if (blkaddr[dst->id].resolved) { disp = blkaddr[dst->id].addr - (insaddr + 2); if ((uint)(disp + 128) < 256) /* can use 1-byte displacement? */ rel8 = 1; else { /* otherwise 4-byte displacement */ disp -= 3; disp -= cc != ALWAYS; /* 'Jcc rel32' has 2 opcode bytes */ } } else { disp = blkaddr[dst->id].relreloc; blkaddr[dst->id].relreloc = insaddr + 1 + (cc != ALWAYS); } if (cc == ALWAYS) { B(rel8 ? 0xEB : 0xE9); /* JMP rel8/rel32 */ } else { assert(in_range(cc, 0, 0xF)); if (rel8) B(0x70 + cc); /* Jcc rel8 */ else B(0x0F), B(0x80 + cc); /* Jcc rel32 */ } if (rel8) B(disp); else I32(disp); } static void Xsetcc(uchar **pcode, enum cc cc, enum reg reg) { int rex = 0; assert(in_range(cc, 0x0, 0xF)); assert(in_range(reg, RAX, R15)); if (in_range(reg, RSP, RDI)) rex = 0x40; rex |= (reg >> 3); /* REX.B */ if (rex) B(rex | 0x40); B(0x0F), B(0x90+cc); /* SETcc */ B(0xC0 + (reg & 7)); /* ModR/M with mod=11, rm=reg */ } static void Xpush(uchar **pcode, enum reg reg) { if (in_range(reg, RAX, R15)) { if (reg >> 3) B(0x41); /* REX.B */ B(0x50 + (reg & 7)); /* PUSH reg */ } else { assert(in_range(reg, XMM0, XMM15)); DS("\x48\x8d\x64\x24\xF8"); /* LEA RSP, [RSP-8] */ Xmov(pcode, KF64, mkoper(OMEM, .base = RSP, .index = NOINDEX), reg2oper(reg)); /* MOVD [rsp],xmm0 */ } } static void Xpop(uchar **pcode, enum reg reg) { if (in_range(reg, RAX, R15)) { if (reg >> 3) B(0x41); /* REX.B */ B(0x58 + (reg & 7)); /* POP reg */ } else { assert(in_range(reg, XMM0, XMM15)); Xmov(pcode, KF64, reg2oper(reg), mkoper(OMEM, .base = RSP, .index = NOINDEX)); /* MOVD xmm0,[rsp] */ DS("\x48\x8d\x64\x24\x08"); /* LEA RSP, [RSP+8] */ } } /* are flags live at given instruction? */ static bool flagslivep(Block *blk, int curi) { int cmpi; /* conditional branch that references a previous comparison instruction? */ if (blk->jmp.t != Jb || !blk->jmp.arg[0].bits) return 0; assert(blk->jmp.arg[0].t == RTMP); cmpi = blk->jmp.arg[0].i; for (int i = blk->ins.n - 1; i > curi; --i) { if (blk->ins.p[i] == cmpi) /* flags defined after given instruction, dead here */ return 0; } /* flags defined before given instruction, live here */ return 1; } static bool shouldusegot(int con) { IRCon *xcon = &contab.p[con]; if ((ccopt.pic || (xcon->flag & SFUNC)) && (xcon->flag & (SLOCAL|SFUNC)) != (SLOCAL|SFUNC)) return 1; if (ccopt.pic && objhassym(xcon2sym(con), NULL) != Stext) return 1; return 0; } /* Copy dst = val, with some peephole optimizations */ static void gencopy(uchar **pcode, enum irclass cls, Block *blk, int curi, Oper dst, Ref val) { assert(dst.t == OREG); if (val.bits == UNDREF.bits) { /* can be generated by ssa construction, since value is undefined no move is needed */ return; } if (val.t == RADDR) { /* this is a LEA, but maybe it can be lowered to a 2-address instruction, * which may clobber flags */ const IRAddr *addr = &addrtab.p[val.i]; if (flagslivep(blk, curi)) goto Lea; if (addr->base.t != RREG) goto Lea; if (addr->base.bits && dst.reg == mkregoper(addr->base).reg) { /* base = dst */ if (addr->index.bits && !addr->disp && !addr->shift){ /* lea Rx, [Rx + Ry] -> add Rx, Ry */ Xadd(pcode, cls, dst, mkregoper(addr->index)); return; } else if (!addr->index.bits) { if (!addr->disp) /* lea Rx, [Rx] -> mov Rx, Rx */ Xmov(pcode, cls, dst, dst); else /* lea Rx, [Rx + Imm] -> add Rx, Imm */ Xadd(pcode, cls, dst, mkoper(OIMM, .imm = addr->disp)); return; } } else if (addr->index.bits && dst.reg == mkregoper(addr->index).reg) { /* index = dst */ if (addr->base.bits && !addr->disp && !addr->shift) { /* lea Rx, [Ry + Rx] -> add Rx, Ry */ Xadd(pcode, cls, dst, mkregoper(addr->base)); return; } else if (!addr->base.bits) { if (!addr->disp && !addr->shift) /* lea Rx, [Rx] -> mov Rx, Rx */ Xmov(pcode, cls, dst, dst); else if (!addr->shift) /* lea Rx, [Rx + Imm] -> add Rx, Imm */ Xadd(pcode, cls, dst, mkoper(OIMM, .imm = addr->disp)); else if (!addr->disp) /* lea Rx, [Rx LSL s] -> shl Rx, s */ Xshl(pcode, cls, dst, mkoper(OIMM, .imm = addr->shift)); else goto Lea; return; } } /* normal (not 2-address) case */ Lea: if (isaddrcon(addr->base,0) && shouldusegot(addr->base.i)) { assert(!addr->disp && !addr->index.bits); val = addr->base; goto GOTLoad; } Xlea(pcode, cls, dst, ref2oper(val)); } else if (val.t == RSTACK) { Xlea(pcode, cls, dst, ref2oper(val)); } else if (val.bits == ZEROREF.bits && dst.t == OREG && (kisflt(cls) || !flagslivep(blk, curi))) { /* dst = 0 -> xor dst, dst; but only if it is ok to clobber flags */ Xxor(pcode, kisint(cls) ? KI32 : cls, dst, dst); } else if (isaddrcon(val,0)) { if (shouldusegot(val.i)) { GOTLoad: /* for mov reg, [rip(sym@GOTPCREL)] */ Xmov(pcode, cls, dst, mkoper(OSYMGOT, .con = val.i, .cindex = NOINDEX)); } else { /* for lea reg, [rip(sym)] */ Xlea(pcode, cls, dst, mkoper(OSYM, .con = val.i, .cindex = NOINDEX)); } } else if (val.t == RXCON && in_range(concls(val), KI64, KPTR)) { /* movabs */ assert(dst.t == OREG && in_range(dst.reg, RAX, R15)); B(0x48 | (dst.reg >> 3)); /* REX.W (+ REX.B) */ B(0xB8 + (dst.reg & 0x7)); /* MOVABS r64, */ wr64le(*pcode, intconval(val)); /* imm64 */ *pcode += 8; } else { Oper src = mkimmdatregoper(val); if (!opereql(dst, src)) Xmov(pcode, cls == KF64 && src.t == OREG && src.reg < XMM0 ? KI64 : cls, dst, src); } } static void Xvaprologue(uchar **pcode, Function *fn, Oper sav) { uint gpr0 = 0, fpr0 = 0, jmpaddr; for (int i = 0; i < fn->nabiarg; ++i) { ABIArg abi = fn->abiarg[i]; if (!abi.isstk) { if (abi.reg < XMM0) ++gpr0; else ++fpr0; } } assert(sav.t == OMEM && sav.base == RBP); /* save GPRS */ for (int r = 0; r < 6; ++r) { static const char reg[] = {RDI,RSI,RDX,RCX,R8,R9}; if (r >= gpr0) Xmov(pcode, KI64, sav, reg2oper(reg[r])); sav.disp += 8; } /* save FPRs, but only if al is non zero */ if (fpr0 < 8) { DS("\x84\xC0"); /* TEST al,al */ jmpaddr = *pcode - objout.textbegin; DS("\x74\xFE"); /* JE rel8 */ } for (int r = 0; r < 8; ++r) { if (r >= fpr0) Xmovaps(pcode, KF64, sav, reg2oper(XMM0 + r)); sav.disp += 16; } if (fpr0 < 8) {/* patch relative jump */ int off = (*pcode - objout.textbegin) - jmpaddr - 2; objout.textbegin[jmpaddr+1] = off; } } /* condition code for CMP */ static const uchar icmpop2cc[] = { [Oequ] = CCE, [Oneq] = CCNE, [Olth] = CCL, [Ogth] = CCG, [Olte] = CCLE, [Ogte] = CCGE, [Oulth] = CCB, [Ougth] = CCA, [Oulte] = CCBE, [Ougte] = CCAE, [Oand] = CCNE, [Osub] = CCNE, }, fcmpop2cc[] = { [Oequ] = CCE, [Oneq] = CCNE, [Olth] = CCB, [Ogth] = CCA, [Olte] = CCBE, [Ogte] = CCAE, }; /* condition code for TEST reg,reg (compare with zero) */ static const uchar icmpzero2cc[] = { [Oequ] = CCE, [Oulte] = CCE, [Oneq] = CCNE, [Ougth] = CCNE, [Olth] = CCS, [Ogte] = CCNS, [Olte] = CCLE, [Ogth] = CCG, [Oulth] = CCB, [Ougte] = CCAE, /* actually constants */ }; static void emitinstr(uchar **pcode, Function *fn, Block *blk, int curi, Instr *ins) { Oper dst, src; bool regzeroed; enum irclass cls = ins->cls; void (*X)(uchar **, enum irclass, Oper, Oper) = NULL; void (*X1)(uchar **, enum irclass, Oper) = NULL; switch (ins->op) { default: fatal(NULL, "x86_64: in %y; unimplemented instr '%s'", fn->name, opnames[ins->op]); case Onop: break; case Omove: dst = ref2oper(ins->l); gencopy(pcode, cls, blk, curi, dst, ins->r); break; case Ocopy: dst = reg2oper(ins->reg-1); gencopy(pcode, cls, blk, curi, dst, ins->l); break; case Ostorei8: cls = KI32, X = Xmovb; goto Store; case Ostorei16: cls = KI32, X = Xmovw; goto Store; case Ostorei32: cls = KI32, X = Xmov; goto Store; case Ostorei64: cls = KI64, X = Xmov; goto Store; case Ostoref32: cls = KF32, X = Xmov; goto Store; case Ostoref64: cls = KF64, X = Xmov; goto Store; Store: src = mkimmregoper(ins->r); X(pcode, cls, mkmemoper(ins->l), src); break; case Oexts8: src = mkregoper(ins->l); goto Movsxb; case Oextu8: src = mkregoper(ins->l); goto Movzxb; case Oexts16: src = mkregoper(ins->l); goto Movsxw; case Oextu16: src = mkregoper(ins->l); goto Movzxw; case Oexts32: src = mkregoper(ins->l); goto Movsxl; case Oextu32: src = mkregoper(ins->l); goto Movzxl; case Oloads8: src = mkmemoper(ins->l); Movsxb: Xmovsxb(pcode, cls, reg2oper(ins->reg-1), src); break; case Oloadu8: src = mkmemoper(ins->l); Movzxb: Xmovzxb(pcode, cls, reg2oper(ins->reg-1), src); break; case Oloads16: src = mkmemoper(ins->l); Movsxw: Xmovsxw(pcode, cls, reg2oper(ins->reg-1), src); break; case Oloadu16: src = mkmemoper(ins->l); Movzxw: Xmovzxw(pcode, cls, reg2oper(ins->reg-1), src); break; case Oloads32: src = mkmemoper(ins->l); Movsxl: Xmovsxl(pcode, cls, reg2oper(ins->reg-1), src); break; case Oloadu32: src = mkmemoper(ins->l); Movzxl: Xmov(pcode, KI32, reg2oper(ins->reg-1), src); break; case Oloadf32: case Oloadf64: Xmov(pcode, cls, reg2oper(ins->reg-1), mkmemoper(ins->l)); break; case Oloadi64: Xmov(pcode, KI64, reg2oper(ins->reg-1), mkmemoper(ins->l)); break; case Ocvtf32f64: X = Xcvtss2sd; goto FloatsCvt; case Ocvtf64f32: X = Xcvtsd2ss; goto FloatsCvt; case Ocvtf32s: X = Xcvttss2si; goto FloatsCvt; case Ocvtf64s: X = Xcvttsd2si; goto FloatsCvt; case Ocvts32f: X = cls == KF32 ? Xcvtsi2ss : Xcvtsi2sd; cls = KI32; goto FloatsCvt; case Ocvts64f: X = cls == KF32 ? Xcvtsi2ss : Xcvtsi2sd; cls = KI64; goto FloatsCvt; FloatsCvt: X(pcode, cls, reg2oper(ins->reg-1), mkdatregoper(ins->l)); break; case Oadd: dst = mkregoper(ins->l); if (kisflt(cls)) { Xaddf(pcode, cls, dst, mkimmdatregoper(ins->r)); } else if (ins->reg-1 == dst.reg) { /* two-address add */ src = ref2oper(ins->r); if (src.t == OIMM && src.imm < 0) /* ADD -imm -> SUB imm, for niceness */ Xsub(pcode, cls, dst, (src.imm = -(uint)src.imm, src)); else Xadd(pcode, cls, dst, src); } else if (isregref(ins->r) && ins->reg-1 == mkregoper(ins->r).reg) { /* also two-address after swapping operands */ Xadd(pcode, cls, reg2oper(ins->reg-1), mkimmdatregoper(ins->l)); } else { /* three-address add (lea) */ Oper mem = { OMEM, .base = NOBASE, .index = NOINDEX }; dst = reg2oper(ins->reg-1); addmemoper(&mem, ref2oper(ins->l)); addmemoper(&mem, ref2oper(ins->r)); Xlea(pcode, cls, dst, mem); } break; case Osub: dst = mkregoper(ins->l); if (kisflt(cls)) { Xsubf(pcode, cls, dst, mkimmdatregoper(ins->r)); } else if (!ins->reg) { Xcmp(pcode, cls, mkregoper(ins->l), mkimmdatregoper(ins->r)); } else if (ins->reg-1 == dst.reg) { /* two-address */ Xsub(pcode, cls, dst, ref2oper(ins->r)); } else { assert(isintcon(ins->r)); Xlea(pcode, cls, reg2oper(ins->reg-1), mkoper(OMEM, .base = mkregoper(ins->l).reg, .index = NOINDEX, .disp = -intconval(ins->r))); } break; case Oshl: dst = reg2oper(ins->reg-1); src = mkregoper(ins->l); if (dst.reg == src.reg) Xshl(pcode, cls, dst, mkimmdatregoper(ins->r)); else { uint sh = ins->r.i; assert(ins->r.t == RICON && sh <= 3); if (sh == 1) /* shl x, 1 -> lea [x + x] */ Xlea(pcode, cls, dst, mkoper(OMEM, .base = src.reg, .index = src.reg)); else /* shl x, n -> lea [x*(1<reg) { Xtest(pcode, cls, mkregoper(ins->l), mkimmdatregoper(ins->r)); break; } X = Xand; goto ALU2; case Oxor: X = Xxor; goto ALU2; case Oior: X = Xior; goto ALU2; ALU2: dst = mkregoper(ins->l); assert(ins->reg-1 == dst.reg); X(pcode, cls, dst, mkimmdatregoper(ins->r)); break; case Oneg: X1 = Xneg; goto ALU1; case Onot: X1 = Xnot; goto ALU1; ALU1: dst = mkregoper(ins->l); assert(ins->reg-1 == dst.reg); X1(pcode, cls, dst); break; case Obswap16: dst = mkregoper(ins->l); assert(ins->reg-1 == dst.reg); if (dst.reg < 4) { /* AX,BX,CX,DX */ /* XCHG rH, rL */ B(0x86), B(0xC4 | dst.reg | (dst.reg)<<3); } else { /* ROL r16,8 */ Xrolw(pcode, KI32, dst, mkoper(OIMM, .imm = 8)); } break; case Obswap32: case Obswap64: X1 = Xbswap; goto ALU1; case Omul: if (kisint(cls)) Ximul(pcode, cls, reg2oper(ins->reg-1), ref2oper(ins->l), ref2oper(ins->r)); else Xmulf(pcode, cls, reg2oper(ins->reg-1), ref2oper(ins->r)); break; case Odiv: switch (cls) { default: assert(0); case KPTR: case KI64: B(0x48); /* REX.W */ case KI32: B(0x99); /* CDQ/CQO */ assert(mkregoper(ins->l).reg == RAX); Xidiv(pcode, cls, mkdatregoper(ins->r)); break; case KF32: case KF64: Xdivf(pcode, cls, reg2oper(ins->reg-1), mkdatregoper(ins->r)); break; } break; case Oudiv: DS("\x31\xD2"); /* XOR EDX,EDX */ assert(mkregoper(ins->l).reg == RAX); Xdiv(pcode, cls, mkdatregoper(ins->r)); break; case Oequ: case Oneq: case Olth: case Ogth: case Olte: case Ogte: case Oulth: case Ougth: case Oulte: case Ougte: dst = mkregoper(ins->l); src = ref2oper(ins->r); regzeroed = 0; if (ins->reg && dst.reg != ins->reg-1 && (src.t != OREG || src.reg != ins->reg-1)) { /* can zero output reg before test instruction (differs from both inputs) */ /* XXX this doesn't check if a source operand is an addr containing the register */ Oper dst = reg2oper(ins->reg-1); Xxor(pcode, KI32, dst, dst); regzeroed = 1; } if (kisint(ins->cls) && ins->r.bits == ZEROREF.bits) Xtest(pcode, cls, dst, dst); else Xcmp(pcode, cls, dst, src); if (ins->reg) { enum cc cc; dst = reg2oper(ins->reg-1); if (ins->r.bits != ZEROREF.bits) { /* CMP */ cc = (kisint(ins->cls) ? icmpop2cc : fcmpop2cc)[ins->op]; } else { /* TEST r,r (CMP r, 0) */ assert(kisint(ins->cls)); cc = icmpzero2cc[ins->op]; } if (kisflt(ins->cls)) { /* handle float unordered result */ int unordres = ins->op == Oneq ? 1 : 0; int rex = 0; if (in_range(dst.reg, RSP, RDI)) rex = 0x40; rex |= (dst.reg >> 3); /* REX.B */ int jpoff = 3 + (rex != 0); if (regzeroed && unordres == 0) { /* if cmp unordered, just jump over the SETcc; result reg was already zeroed */ B(0x7A), B(jpoff); /* JP */ } else { /* JNP .a * MOV r8, 0/1 * JMP .b * .a: SETcc r8 * .b: MOVZX r, r8 */ B(0x7B), B(jpoff+1); /* JNP */ if (rex) B(rex | 0x40); B(0xB0 + (dst.reg & 7)), B(unordres); /* MOV r8, 0/1 */ B(0xEB), B(jpoff); /* JMP */ } } Xsetcc(pcode, cc, dst.reg); if (!regzeroed) Xmovzxb(pcode, KI32, dst, dst); } break; case Oswap: if (kisint(cls)) Xxchg(pcode, cls, ref2oper(ins->l), mkregoper(ins->r)); else { Oper l = mkregoper(ins->l), r = mkregoper(ins->r); Xxor(pcode, cls, l, r); Xxor(pcode, cls, r, l); Xxor(pcode, cls, l, r); } break; case Ocall: Xcall(pcode, KPTR, ref2oper(ins->l)); break; case Oxvaprologue: Xvaprologue(pcode, fn, mkmemoper(ins->l)); break; } } static void emitbranch(uchar **pcode, Block *blk) { enum cc cc = ALWAYS; assert(blk->s1); if (blk->s2) { /* conditional branch.. */ Ref arg = blk->jmp.arg[0]; Block *unord = NULL; assert(arg.t == RTMP); Instr *ins = &instrtab[arg.i]; if ((oiscmp(ins->op) || ins->op == Oand || ins->op == Osub)) { if (ins->r.bits != ZEROREF.bits) { /* for CMP instr */ cc = (kisint(ins->cls) ? icmpop2cc : fcmpop2cc)[ins->op]; unord = ins->op == Oneq ? blk->s1 : blk->s2; } else { assert(kisint(ins->cls)); /* for TEST instr, which modifies ZF and SF and sets CF = OF = 0 */ cc = icmpzero2cc[ins->op]; } } else { /* implicit by ZF */ cc = CCNZ; } if (kisflt(ins->cls)) { /* handle float unordered result */ Xjcc(pcode, CCP, unord); } if (blk->s1 == blk->lnext) { /* if s1 is next adjacent block, swap s1,s2 and flip condition to emit a * single jump */ Block *tmp = blk->s1; blk->s1 = blk->s2; blk->s2 = tmp; cc ^= 1; } } /* make sure to fallthru if jumping to next adjacent block */ if (blk->s2 || blk->s1 != blk->lnext) Xjcc(pcode, cc, blk->s1); if (blk->s2 && blk->s2 != blk->lnext) Xjcc(pcode, ALWAYS, blk->s2); } static int calleesave(uchar **pcode, Function *fn) { int n = 0; for (int r = R15; r >= R12; --r) { if (rstest(fn->regusage, r)) { Xpush(pcode, r); ++n; } } if (rstest(fn->regusage, RBX)) { Xpush(pcode, RBX); ++n; } return n; } static void calleerestore(uchar **pcode, Function *fn) { if (rstest(fn->regusage, RBX)) Xpop(pcode, RBX); for (int r = R12; r <= R15; ++r) if (rstest(fn->regusage, r)) Xpop(pcode, r); } /* align code using NOPs */ static void nops(uchar **pcode, int align) { int rem; while ((rem = (*pcode - objout.textbegin) & (align - 1)) != 0) { switch (align - rem) { case 15: case 14: case 13: case 12: case 11: case 10: case 9: B(0x66); case 8: DS("\x0f\x1f\x84\x00\x00\x00\x00\x00"); break; case 7: DS("\x0f\x1f\x80\x00\x00\x00\x00"); break; case 6: B(0x66); case 5: DS("\x0f\x1f\x44\x00\x00"); break; case 4: DS("\x0f\x1f\x40\x00"); break; case 3: DS("\x0f\x1f\00"); break; case 2: B(0x66); case 1: B(0x90); break; } } } enum { STACKREDZONE = 128 }; static void emitbin(Function *fn) { Block *blk; uchar **pcode = &objout.code; int npush = 0; nops(pcode, 16); fnstart = *pcode; curfnsym = fn->name; /** prologue **/ /* only use frame pointer in non-leaf functions and functions with large stack frames */ frame.usebp = 0; if (!fn->isleaf || fn->stksiz >= STACKREDZONE) { frame.usebp = 1; /* push rbp; mov rbp, rsp */ DS("\x55\x48\x89\xE5"); } npush = calleesave(pcode, fn); /* ensure stack is 16-byte aligned */ if (frame.usebp) { frame.size = fn->stksiz + npush*8; if ((frame.size & 0xF) != 0) { if (npush&1) fn->stksiz += 16; fn->stksiz += 8; frame.size += 8; } } else { frame.size = npush*8; } frame.stksiz = fn->stksiz; if (frame.usebp && fn->stksiz > 0) { /* sub rsp, */ if (fn->stksiz < 128) DS("\x48\x83\xEC"), B(fn->stksiz); else if (fn->stksiz == 128) DS("\x48\x83\xC4\x80"); /* add rsp, -128 */ else DS("\x48\x81\xEC"), I32(fn->stksiz); } if (*pcode - fnstart > 6) { /* largue prologue -> largue epilogue -> transform to use single exit point */ Block *exit = NULL; blk = fn->entry->lprev; do { if (blk->jmp.t == Jret) { if (!exit) { if (blk->ins.n == 0) { exit = blk; continue; } else { exit = newblk(fn); exit->lnext = blk->lnext; exit->lprev = blk; blk->lnext = exit; exit->lnext->lprev = exit; exit->id = fn->nblk++; exit->jmp.t = Jret; } } blk->jmp.t = Jb; memset(blk->jmp.arg, 0, sizeof blk->jmp.arg); blk->s1 = exit; } else if (exit) { /* thread jumps to the exit block */ if (blk->s1 && !blk->s1->ins.n && blk->s1->s1 == exit && !blk->s1->s2) blk->s1 = exit; if (blk->s2 && !blk->s2->ins.n && blk->s2->s1 == exit && !blk->s2->s2) blk->s2 = exit; } } while ((blk = blk->lprev) != fn->entry); } blkaddr = allocz(fn->passarena, fn->nblk * sizeof *blkaddr, 0); blk = fn->entry; do { struct BlkAddr *bb = &blkaddr[blk->id]; uint bbaddr = *pcode - objout.textbegin; assert(!bb->resolved); while (bb->relreloc) { uint next; memcpy(&next, objout.textbegin + bb->relreloc, 4); int disp = bbaddr - bb->relreloc - 4; wr32le(objout.textbegin + bb->relreloc, disp); bb->relreloc = next; } bb->resolved = 1; bb->addr = bbaddr; for (int i = 0; i < blk->ins.n; ++i) emitinstr(pcode, fn, blk, i, &instrtab[blk->ins.p[i]]); if (blk->jmp.t == Jret) { if (blk->lnext != fn->entry && blk->lnext->jmp.t == Jret && blk->lnext->ins.n == 0) continue; /* fallthru to next blk's RET */ /* epilogue */ if (fn->stksiz > 0 && npush > 0 && frame.usebp) Xadd(pcode, KPTR, mkoper(OREG, .reg = RSP), mkoper(OIMM, .imm = fn->stksiz)); if (npush > 0) calleerestore(pcode, fn); if (frame.usebp) B(0xC9); /* leave */ B(0xC3); /* ret */ } else if (blk->jmp.t == Jtrap) { DS("\x0F\x0B"); /* UD2 */ } else emitbranch(pcode, blk); } while ((blk = blk->lnext) != fn->entry); objdeffunc(fn->name, fn->globl, fnstart - objout.textbegin, *pcode - fnstart); } void x86_64_emit(Function *fn) { fn->stksiz = alignup(fn->stksiz, 8); if (fn->stksiz > 1<<24) error(NULL, "'%s' stack frame too big", fn->name); emitbin(fn); } /* vim:set ts=3 sw=3 expandtab: */