diff options
| author | 2026-03-17 13:22:00 +0100 | |
|---|---|---|
| committer | 2026-03-17 13:22:00 +0100 | |
| commit | a8d6f8bf30c07edb775e56889f568ca20240bedf (patch) | |
| tree | b5a452b2675b2400f15013617291fe6061180bbf /src/t_x86-64_emit.c | |
| parent | 24f14b7ad1af08d872971d72ce089a529911f657 (diff) | |
REFACTOR: move sources to src/
Diffstat (limited to 'src/t_x86-64_emit.c')
| -rw-r--r-- | src/t_x86-64_emit.c | 1422 |
1 files changed, 1422 insertions, 0 deletions
diff --git a/src/t_x86-64_emit.c b/src/t_x86-64_emit.c new file mode 100644 index 0000000..d3a466b --- /dev/null +++ b/src/t_x86-64_emit.c @@ -0,0 +1,1422 @@ +#include "all.h" +#include "../obj/obj.h" +#include "../endian.h" + +/** Instruction operands ** + * + * Can be a register, a 32-bit immediate, + * a memory reference [base + index * scale + disp], + * or a relocatable reference to some symbol plus a displacement and maybe index*scale + */ +enum operkind { ONONE, OREG, OIMM, OMEM, OSYM, OSYMGOT }; +enum { NOBASE = 63, NOINDEX = 63 }; +struct oper { + uchar t; + union { + struct { uchar base; }; /* OMEM */ + struct { uchar cindex : 6, cshift : 2; }; /* OSYM */ + }; + union { + struct { uchar index, shift; }; /* OMEM */ + ushort con; /* OSYM */ + }; + union { + uchar reg; /* OREG */ + int disp; /* OMEM, OSYM */ + int imm; /* OIMM */ + }; +}; +#define mkoper(t, ...) ((struct oper){(t), __VA_ARGS__}) +#define reg2oper(R) (assert((uint)(R) <= XMM15), mkoper(OREG, .reg = (R))) + +static struct oper mkmemoper(union ref); + +static struct oper +ioper(int i) +{ + int reg = instrtab[i].reg - 1; + return reg < 0 ? mkoper(ONONE,) : reg2oper(reg); +} + +static struct oper +ref2oper(union ref r) +{ + switch (r.t) { + case RTMP: return ioper(r.i); + case RREG: return reg2oper(r.i); + case RICON: return mkoper(OIMM, .imm = r.i); + case RXCON: + if (contab.p[r.i].cls == KI32) + return mkoper(OIMM, .imm = contab.p[r.i].i); + else if (contab.p[r.i].cls == KI64) { + vlong i = contab.p[r.i].i; + assert(i == (int)i); + return mkoper(OIMM, .imm = i); + } else if (!contab.p[r.i].cls) { + return mkoper(OSYM, .con = r.i, .cindex = NOINDEX); + } + assert(0); + case RADDR: return mkmemoper(r); + default: assert(0); + } +} + +static void +addmemoper(struct oper *mem, struct oper add) +{ + assert(mem->t == OMEM); + if (add.t == OIMM) { + mem->disp += add.imm; + } else if (add.t == OREG) { + if (mem->base == NOBASE) + mem->base = add.reg; + else if (mem->index == NOINDEX) + mem->index = add.reg; + else + assert(0); + } +} + +/* helpers to convert a reference to an operand of a specific kind, + * with assertions to make sure nothing went wrong */ + +static inline struct oper +mkregoper(union ref r) +{ + assert(r.t == RREG || (r.t == RTMP && ioper(r.i).t == OREG)); + return r.t == RREG ? reg2oper(r.i) : ioper(r.i); +} + +static inline struct oper +mkimmoper(union ref r) +{ + assert(iscon(r) && concls(r) == KI32); + return mkoper(OIMM, .imm = intconval(r)); +} + +#define ismemref(ref) ((ref).t == RTMP && ioper((ref).i).t == OMEM) +#define isregref(ref) ((ref).t == RREG || ((ref).t == RTMP && ioper((ref).i).t == OREG)) + +static inline struct oper +mkimmregoper(union ref r) +{ + assert(isregref(r) || (iscon(r) && concls(r) == KI32)); + return ref2oper(r); +} + +static inline struct oper +mkdatregoper(union ref r) +{ + assert(isregref(r) || (r.t == RXCON && contab.p[r.i].deref)); + return ref2oper(r); +} + +static inline struct oper +mkimmdatregoper(union ref r) +{ + assert(isregref(r) || r.t == RICON || (r.t == RXCON && (contab.p[r.i].cls == KI32 || contab.p[r.i].deref))); + return ref2oper(r); +} + +static struct oper +mkmemoper(union ref r) +{ + if (r.t == RTMP) { + struct oper wop = ioper(r.i); + if (wop.t == OMEM) return wop; + assert(wop.t == OREG); + return mkoper(OMEM, .base = wop.reg, .index = NOINDEX); + } else if (r.t == RADDR) { + const struct addr *addr = &addrtab.p[r.i]; + assert(addr->shift <= 3); + if (isaddrcon(addr->base,0)) { + return mkoper(OSYM, .con = addr->base.i, + .cindex = addr->index.bits ? mkregoper(addr->index).reg : NOINDEX, + .cshift = addr->shift, + .disp = addr->disp); + } else if (isintcon(addr->base)) { + assert(!addr->disp); + return mkoper(OMEM, .base = NOBASE, + .index = addr->index.bits ? mkregoper(addr->index).reg : NOINDEX, + .disp = intconval(addr->base), + .shift = addr->shift); + } else if (isaddrcon(addr->index,0)) { + assert(!addr->shift); + return mkoper(OSYM, .con = addr->index.i, + .cindex = addr->base.bits ? mkregoper(addr->base).reg : NOINDEX, + .disp = addr->disp); + } + return mkoper(OMEM, .base = addr->base.bits ? mkregoper(addr->base).reg : NOBASE, + .index = addr->index.bits ? mkregoper(addr->index).reg : NOINDEX, + .disp = addr->disp, + .shift = addr->shift); + } else if (r.t == RXCON) { + assert(!contab.p[r.i].cls); + return mkoper(OSYM, .con = r.i, .cindex = NOINDEX); + } else { + return mkoper(OMEM, .base = isregref(r) ? ref2oper(r).reg : NOBASE, + .index = NOINDEX, + .disp = isregref(r) ? 0 : mkimmoper(r).imm); + } +} + +/** Instruction description tables ** + * + * Each instruction is a list of descs, and the first one that matches + * is emitted. Each entry has a size pattern field, which is a bitset + * of the sizes (in bytes) that the entry matches, and 2 operand patterns, + * which describe the operands that can match (for example, PRAX matches + * a RAX register operand, PGPR matches any integer register, I8 matches + * an immediate operand between [-128,127]) The rest of the fields describe + * the instruction's encoding. + * (reference: https://www.felixcloutier.com/x86/ & https://wiki.osdev.org/X86-64_Instruction_Encoding ) + */ + +enum operpat { + PNONE, + PRAX, + PRCX, + PGPR, + PFPR, + P1, /* imm = 1 */ + PN1, /* imm = -1 */ + PI8, + PU8, + PI16, + PU16, + PI32, + PU32, + PMEM, + PSYM, +}; +enum operenc { + EN_R = 1, /* reg with /r */ + EN_RR, /* reg, reg with /r */ + EN_RRX, /* reg, reg with /r (inverted) */ + EN_MR, /* mem, reg with /r */ + EN_RM, /* reg, mem with /r */ + EN_M, /* mem */ + EN_RI8, /* reg, imm8 with /0 */ + EN_RI32, /* reg, imm32 with /0 */ + EN_MI8, /* mem, imm8 with /x */ + EN_MI16, /* mem, imm16 with /x */ + EN_MI32, /* mem, imm32 with /x */ + EN_O, /* reg with op + reg */ + EN_OI, /* reg, imm32 with op + reg */ + EN_I8, /* imm8 */ + EN_I32, /* imm32 */ + EN_R32, /* rel32 */ + NOPERENC, +}; +struct desc { + uchar psiz; /* subset of {1,2,4,8} */ + uchar ptd, pts; /* bitsets of enum operpat */ + uchar nopc; /* countof opc */ + const char opc[8]; /* opcode bytes */ + uchar operenc; /* enum operenc */ + uchar ext; /* ModR/M.reg opc extension */ + bool r8; /* uses 8bit register */ + bool norexw; /* do not use REX.W even if size is 64 bits */ +}; + +/* match operand against pattern */ +static inline bool +opermatch(enum operpat pat, struct oper oper) +{ + switch (pat) { + case PNONE: return !oper.t; + case PRAX: return oper.t == OREG && oper.reg == RAX; + case PRCX: return oper.t == OREG && oper.reg == RCX; + case PGPR: return oper.t == OREG && oper.reg <= R15; + case PFPR: return oper.t == OREG && oper.reg >= XMM0; + case P1: return oper.t == OIMM && oper.imm == 1; + case PN1: return oper.t == OIMM && oper.imm == -1; + case PI8: return oper.t == OIMM && (schar)oper.imm == oper.imm; + case PU8: return oper.t == OIMM && (uchar)oper.imm == oper.imm; + case PI16: return oper.t == OIMM && (short)oper.imm == oper.imm; + case PU16: return oper.t == OIMM && (ushort)oper.imm == oper.imm; + case PI32: return oper.t == OIMM; + case PU32: return oper.t == OIMM && oper.imm >= 0; + case PMEM: return in_range(oper.t, OMEM, OSYMGOT); + case PSYM: return oper.t == OSYM || oper.t == OSYMGOT; + } + assert(0); +} + +/* code output helpers */ +#define B(b) (*(*pcode)++ = (b)) +#define D(xs, N) (memcpy(*pcode, (xs), (N)), (*pcode) += (N)) +#define I16(w) (wr16le(*pcode, (w)), *pcode += 2) +#define I32(w) (wr32le(*pcode, (w)), *pcode += 4) +#define DS(S) D(S, sizeof S - 1) + +static bool usebp; /* use RBP? */ +static int rbpoff; +static internstr curfnsym; +static uchar *fnstart; + +/* Given an instruction description table, find the first entry that matches + * the operands (where dst, src are the operands in intel syntax order) and encode it */ +static void +encode(uchar **pcode, const struct desc *tab, int ntab, enum irclass k, struct oper dst, struct oper src) +{ + const uchar *opc; + int nopc; + struct oper mem; + enum reg reg; + const struct desc *en = NULL; + for (int i = 0; i < ntab; ++i) { + if ((tab[i].psiz & cls2siz[k]) && opermatch(tab[i].ptd, dst) && opermatch(tab[i].pts, src)) { + en = &tab[i]; + break; + } + } + assert(en && "no match for instr"); + + if (en->ptd == PFPR) dst.reg &= 15; + if (en->pts == PFPR) src.reg &= 15; + opc = (uchar *)en->opc; + nopc = en->nopc; + /* mandatory prefixes go before REX */ + if (*opc == 0x66 || *opc == 0xF2 || *opc == 0xF3) + B(*opc++), --nopc; + int rex = in_range(k, KI64, KPTR) << 3; /* REX.W */ + if (en->norexw) rex = 0; + switch (en->operenc) { + case EN_RR: /* mod = 11; reg = dst; rm = src */ + rex |= (dst.reg >> 3) << 2; /* REX.R */ + rex |= (src.reg >> 3) << 0; /* REX.B */ + if (rex) B(0x40 | rex); + else if (en->r8 && in_range(src.reg, RSP, RDI)) { + /* /r8 needs REX to encode SP,BP,SI,DI (otherwise -> AH..BH) */ + B(0x40); + } + D(opc, nopc); + B(0300 | (dst.reg & 7) << 3 | (src.reg & 7)); + break; + case EN_RRX: /* mod = 11; reg = src; rm = dst */ + rex |= (src.reg >> 3) << 2; /* REX.R */ + rex |= (dst.reg >> 3) << 0; /* REX.B */ + if (rex) B(0x40 | rex); + else if (en->r8 && in_range(dst.reg, RSP, RDI)) { + /* /r8 needs REX to encode SP,BP,SI,DI (otherwise -> AH..BH) */ + B(0x40); + } + D(opc, nopc); + B(0300 | (src.reg & 7) << 3 | (dst.reg & 7)); + break; + case EN_MR: + mem = dst; + reg = src.reg; + goto Mem; + case EN_RM: + mem = src; + reg = dst.reg; + goto Mem; + case EN_M: case EN_MI8: case EN_MI16: case EN_MI32: + mem = dst; + reg = en->ext; + Mem: + if (mem.t == OMEM) { + if (mem.base != NOBASE) rex |= mem.base >> 3; /* REX.B */ + if (mem.index != NOINDEX) rex |= mem.index >> 3 << 1; /* REX.X */ + } else { + if (mem.cindex != NOINDEX) rex |= mem.cindex >> 3 << 1; /* REX.X */ + } + if (en->operenc != EN_M) + rex |= (reg >> 3) << 2; /* REX.R */ + if (rex) B(0x40 | rex); + else if (en->r8 && in_range(reg, RSP, RDI)) B(0x40); + + if (mem.t == OSYM || mem.t == OSYMGOT) { + D(opc, nopc); + if (mem.cindex == NOINDEX) { + /* %rip(var) */ + static uchar offs[NOPERENC] = { [EN_MI8] = 1, [EN_MI16] = 2, [EN_MI32] = 4 }; + uint addr; + int disp = mem.disp - 4 - offs[en->operenc]; + internstr sym = xcon2sym(mem.con); + B(/*mod 0*/ (reg & 7) << 3 | RBP); + if (objhassym(sym, &addr) == Stext && mem.t != OSYMGOT) { + I32(addr - (*pcode - objout.textbegin) + disp); + } else { + enum relockind r = REL_PCREL32; + if (mem.t == OSYMGOT) r = rex ? REL_GOTPCRELX_REX : REL_GOTPCRELX; + objreloc(xcon2sym(mem.con), r, Stext, *pcode - objout.textbegin, disp); + I32(0); + } + } else { + /* var(,%reg,shift) */ + assert(!ccopt.pic && !ccopt.pie && "cannot encode [RIP-rel + REG] for position independent"); + B(/*mod 0*/ (reg & 7) << 3 | RSP); + B(mem.cshift << 6 | ((mem.cindex & 7) << 3) | RBP); /* SIB [index*s + disp32] */ + objreloc(xcon2sym(mem.con), REL_ABS32S, Stext, *pcode - objout.textbegin, mem.disp); + I32(0); + } + } else { + int mod; + bool sib = 0; + if (mem.base == RBP) { + if (!usebp) { + /* if RBP isn't being set up (leaf functions with no stack allocations), + * access thru RSP (function arguments in the stack) */ + mem.base = RSP; + mem.disp -= 8; + } else if (mem.disp <= 0) { + mem.disp += rbpoff; + } + } + if (mem.base != NOBASE) { + if (mem.index == NOINDEX && mem.shift == 0) sib = 0; + else sib = 1; + mod = !mem.disp ? 0 /* disp = 0 -> mod = 00 */ + : (schar)mem.disp == mem.disp ? 1 /* disp8 -> mod = 01 */ + : 2; /* disp32 -> mod = 10 */ + if (mod == 0 && (mem.base == RBP || mem.base == R13)) mod = 1; + if (mem.base == RSP || mem.base == R12) sib = 1; + } else { + /* [disp + (index*s)] */ + sib = 1; + mem.base = RBP; + mod = 0; + assert(mem.index != RSP); + } + D(opc, nopc); + B(mod << 6 | (reg & 7) << 3 | (sib ? 4 : (mem.base & 7))); + if (sib) { + if (mem.index == NOINDEX) mem.index = RSP; + B(mem.shift << 6 | (mem.index & 7) << 3 | (mem.base & 7)); + } + if (mod == 1) B(mem.disp); + else if (mod == 2 || (mod == 0 && mem.base == RBP/*RIP-rel*/) || (mod == 0 && sib && mem.base == RBP/*absolute*/)) { + I32(mem.disp); + } + } + if (en->operenc == EN_MI8) B(src.imm); + if (en->operenc == EN_MI16) I16(src.imm); + if (en->operenc == EN_MI32) I32(src.imm); + break; + case EN_R: case EN_RI32: case EN_RI8: + rex |= (dst.reg >> 3) << 0; /* REX.B */ + if (rex) B(0x40 | rex); + else if (en->r8 && in_range(dst.reg, RSP, RDI)) { + /* /r8 needs REX to encode SP,BP,SI,DI (otherwise -> AH..BH) */ + B(0x40); + } + D(opc, nopc); + B(0300 | en->ext << 3 | (dst.reg & 7)); + if (en->operenc == EN_RI32) + I32(src.imm); + else if (en->operenc == EN_RI8) + B(src.imm); + break; + case EN_O: case EN_OI: + rex |= (dst.reg >> 3) << 0; /* REX.B */ + if (rex) B(0x40 | rex); + D(opc, nopc - 1); + B(opc[nopc-1] + (dst.reg & 7)); + if (en->operenc == EN_OI) I32(src.imm); + break; + case EN_I8: + if (rex) B(0x40 | rex); + D(opc, nopc); + B(src.imm); + break; + case EN_I32: + if (rex) B(0x40 | rex); + D(opc, nopc); + I32(src.imm); + break; + case EN_R32: + if (rex) B(0x40 | rex); + D(opc, nopc); + assert(dst.t == OSYM); + internstr sym = xcon2sym(dst.con); + uint addr; + if (sym == curfnsym) { + I32(fnstart - *pcode - 4); + } else if (objhassym(sym, &addr) == Stext) { + I32(addr - (*pcode - objout.textbegin) - 4); + } else { + enum relockind r = (ccopt.pie|ccopt.pic) ? REL_PLT32 : REL_PCREL32; + objreloc(sym, r, Stext, *pcode - objout.textbegin, -4); + I32(0); + } + break; + } +} + +#define DEFINSTR1(X, ...) \ + static void \ + X(uchar **pcode, enum irclass k, struct oper oper) \ + { \ + static const struct desc tab[] = { __VA_ARGS__ }; \ + encode(pcode, tab, countof(tab), k, oper, mkoper(0,)); \ + } + +#define DEFINSTR2(X, ...) \ + static void \ + X(uchar **pcode, enum irclass k, struct oper dst, struct oper src) \ + { \ + static const struct desc tab[] = { __VA_ARGS__ }; \ + encode(pcode, tab, countof(tab), k, dst, src); \ + } + +#define O(s) (sizeof s)-1,s +DEFINSTR2(Xmovb, + {-1, PMEM, PGPR, O("\x88"), EN_MR, .r8=1}, /* MOV m8, r8 */ + {-1, PMEM, PI32, O("\xC6"), EN_MI8, .r8=1}, /* MOV m8, imm8 */ +) +DEFINSTR2(Xmovw, + {-1, PMEM, PGPR, O("\x66\x89"), EN_MR}, /* MOV m16, r16 */ + {-1, PMEM, PI32, O("\x66\xC7"), EN_MI16}, /* MOV m16, imm16 */ +) +static void Xmov(uchar **pcode, enum irclass k, struct oper dst, struct oper src) +{ + static const struct desc all[] = { + {4 , PGPR, PI32, O("\xB8"), EN_OI}, /* MOV r32, imm */ + {4|8, PGPR, PGPR, O("\x8B"), EN_RR}, /* MOV r32/64, r32/64 */ + {4|8, PMEM, PGPR, O("\x89"), EN_MR}, /* MOV m32/64, r32/64 */ + {4|8, PGPR, PMEM, O("\x8B"), EN_RM}, /* MOV r32/64, m32/64 */ + {4|8, PMEM, PI32, O("\xC7"), EN_MI32}, /* MOV m32/64, imm */ + { 8, PGPR, PU32, O("\xB8"), EN_OI, .norexw=1}, /* MOV r64, uimm */ + { 8, PGPR, PI32, O("\xC7"), EN_RI32}, /* MOV r64, imm */ + {4 , PFPR, PFPR, O("\x0F\x28"), EN_RR}, /* MOVPS xmm, xmm */ + {4 , PFPR, PMEM, O("\xF3\x0F\x10"), EN_RM}, /* MOVSS xmm, m32 */ + {4 , PMEM, PFPR, O("\xF3\x0F\x11"), EN_MR}, /* MOVSS m32, xmm */ + {8 , PFPR, PFPR, O("\x0F\x28"), EN_RR}, /* MOVPS xmm, xmm */ + {8 , PFPR, PMEM, O("\xF2\x0F\x10"), EN_RM}, /* MOVSD xmm, m64 */ + {8 , PMEM, PFPR, O("\xF2\x0F\x11"), EN_MR}, /* MOVSS m64, xmm */ + {4|8, PFPR, PGPR, O("\x66\x0F\x6E"), EN_RR}, /* MOVD/Q xmm, r64/32 */ + {4|8, PGPR, PFPR, O("\x66\x0F\x7E"), EN_RRX}, /* MOVD/Q r64/32, xmm */ + }; + static const uchar k2off[] = { + [KI32] = 0, + [KI64] = 1, [KPTR] = 1, + [KF32] = 7, + [KF64] = 10, + }; + if (kisflt(k) && src.t == OIMM && src.imm == 0) { + /* special case for storing zero float : use integer instruction with zero immediate */ + k = KI32 + (k - KF32); + } + encode(pcode, all + k2off[k], countof(all) - k2off[k], k, dst, src); +} +DEFINSTR2(Xmovsxl, + {8, PGPR, PMEM, O("\x63"), EN_RM}, /* MOVSXD r64, m32 */ + {8, PGPR, PGPR, O("\x63"), EN_RR}, /* MOVSXD r64, r32 */ + {4, PGPR, PMEM, O("\x8B"), EN_RM}, /* MOV r32, m32 */ + {4, PGPR, PGPR, O("\x8B"), EN_RR}, /* MOV r32, r32 */ +) +DEFINSTR2(Xmovsxw, + {4|8, PGPR, PMEM, O("\x0F\xBF"), EN_RM}, /* MOVSX r64, m16 */ + {4|8, PGPR, PGPR, O("\x0F\xBF"), EN_RR}, /* MOVSX r64, r16 */ +) +DEFINSTR2(Xmovsxb, + {4|8, PGPR, PMEM, O("\x0F\xBE"), EN_RM}, /* MOVSX r64, m8 */ + {4|8, PGPR, PGPR, O("\x0F\xBE"), EN_RR, .r8=1}, /* MOVSX r64, r8 */ +) +DEFINSTR2(Xmovzxw, + {4|8, PGPR, PMEM, O("\x0F\xB7"), EN_RM}, /* MOVZX r64, m16 */ + {4|8, PGPR, PGPR, O("\x0F\xB7"), EN_RR}, /* MOVZX r64, r16 */ +) +DEFINSTR2(Xmovzxb, + {4|8, PGPR, PMEM, O("\x0F\xB6"), EN_RM}, /* MOVZX r64, m8 */ + {4|8, PGPR, PGPR, O("\x0F\xB6"), EN_RR, .r8=1}, /* MOVZX r64, r8 */ +) +DEFINSTR2(Xmovaps, + {-1, PMEM, PFPR, O("\x0F\x29"), EN_MR}, /* MOVAPS mem, xmm */ +) +DEFINSTR2(Xxchg, + {4|8, PGPR, PGPR, O("\x87"), EN_RR}, /* XCHG r32/64, r32/64 */ + {4|8, PGPR, PMEM, O("\x87"), EN_RM}, /* XCHG r32/64, m32/64 */ + {4|8, PMEM, PGPR, O("\x87"), EN_MR}, /* XCHG r32/64, m32/64 */ +) +DEFINSTR2(Xlea, + {4|8, PGPR, PMEM, O("\x8D"), EN_RM}, /* LEA r32/64,m32/64 */ + { 8, PGPR, PSYM, O("\x8D"), EN_RM}, /* LEA r32/64,rel32 */ +) +DEFINSTR2(Xadd, + {4|8, PGPR, PGPR, O("\x03"), EN_RR}, /* ADD r32/64, r32/64 */ + {4|8, PGPR, P1, O("\xFF"), EN_R, .ext=0}, /* INC r32/64 */ + {4|8, PGPR, PN1, O("\xFF"), EN_R, .ext=1}, /* DEC r32/64 */ + {4|8, PGPR, PI8, O("\x83"), EN_RI8}, /* ADD r32/64, imm8 */ + {4|8, PRAX, PI32, O("\x05"), EN_I32}, /* ADD eax/rax, imm */ + {4|8, PGPR, PI32, O("\x81"), EN_RI32}, /* ADD r32/64, imm */ + { 8, PGPR, PMEM, O("\x03"), EN_RM}, /* ADD r64, m64 */ +) +DEFINSTR2(Xaddf, + {4, PFPR, PFPR, O("\xF3\x0F\x58"), EN_RR}, /* ADDSS xmm, xmm */ + {8, PFPR, PFPR, O("\xF2\x0F\x58"), EN_RR}, /* ADDSD xmm, xmm */ + {4, PFPR, PMEM, O("\xF3\x0F\x58"), EN_RM}, /* ADDSS xmm, m32 */ + {8, PFPR, PMEM, O("\xF2\x0F\x58"), EN_RM}, /* ADDSD xmm, m64 */ +) +DEFINSTR2(Xsub, + {4|8, PGPR, PGPR, O("\x2B"), EN_RR}, /* SUB r32/64, r32/64 */ + {4|8, PGPR, P1, O("\xFF"), EN_R, .ext=1}, /* DEC r32/64 */ + {4|8, PGPR, PN1, O("\xFF"), EN_R, .ext=0}, /* INC r32/64 */ + {4|8, PGPR, PI8, O("\x83"), EN_RI8, .ext=5}, /* SUB r32/64, imm8 */ + {4|8, PRAX, PI32, O("\x2D"), EN_I32}, /* SUB eax/rax, imm */ + {4|8, PGPR, PI32, O("\x81"), EN_RI32, .ext=5}, /* SUB r32/64, imm */ + { 8, PGPR, PMEM, O("\x2B"), EN_RM}, /* SUB r64, m64 */ +) +DEFINSTR2(Xsubf, + {4, PFPR, PFPR, O("\xF3\x0F\x5C"), EN_RR}, /* SUBSS xmm, xmm */ + {8, PFPR, PFPR, O("\xF2\x0F\x5C"), EN_RR}, /* SUBSD xmm, xmm */ + {4, PFPR, PMEM, O("\xF3\x0F\x5C"), EN_RM}, /* SUBSS xmm, m32 */ + {8, PFPR, PMEM, O("\xF2\x0F\x5C"), EN_RM}, /* SUBSD xmm, m64 */ +) +DEFINSTR2(Xmulf, + {4, PFPR, PFPR, O("\xF3\x0F\x59"), EN_RR}, /* MULSS xmm, xmm */ + {8, PFPR, PFPR, O("\xF2\x0F\x59"), EN_RR}, /* MULSD xmm, xmm */ + {4, PFPR, PMEM, O("\xF3\x0F\x59"), EN_RM}, /* MULSS xmm, m32 */ + {8, PFPR, PMEM, O("\xF2\x0F\x59"), EN_RM}, /* MULSD xmm, m64 */ +) +DEFINSTR2(Xdivf, + {4, PFPR, PFPR, O("\xF3\x0F\x5E"), EN_RR}, /* DIVSS xmm, xmm */ + {8, PFPR, PFPR, O("\xF2\x0F\x5E"), EN_RR}, /* DIVSD xmm, xmm */ + {4, PFPR, PMEM, O("\xF3\x0F\x5E"), EN_RM}, /* DIVSS xmm, m32 */ + {8, PFPR, PMEM, O("\xF2\x0F\x5E"), EN_RM}, /* DIVSD xmm, m64 */ +) +DEFINSTR2(Xand, + {4|8, PGPR, PGPR, O("\x23"), EN_RR}, /* AND r32/64, r32/64 */ + {4|8, PGPR, PI8, O("\x83"), EN_RI8, .ext=4}, /* AND r32/64, imm8 */ + {4|8, PRAX, PI32, O("\x25"), EN_I32}, /* AND eax/rax, imm */ + {4|8, PGPR, PI32, O("\x81"), EN_RI32, .ext=4}, /* AND r32/64, imm */ + { 8, PGPR, PMEM, O("\x23"), EN_RM}, /* AND r64, m64 */ +) +DEFINSTR2(Xior, + {4|8, PGPR, PGPR, O("\x0B"), EN_RR}, /* OR r32/64, r32/64 */ + {4|8, PGPR, PI8, O("\x83"), EN_RI8, .ext=1}, /* OR r32/64, imm8 */ + {4|8, PRAX, PI32, O("\x0D"), EN_I32}, /* OR eax/rax, imm */ + {4|8, PGPR, PI32, O("\x81"), EN_RI32, .ext=1}, /* OR r32/64, imm */ + { 8, PGPR, PMEM, O("\x0B"), EN_RM}, /* OR r64, m64 */ + {4|8, PFPR, PFPR, O("\x0F\x57"), EN_RR}, /* ORPS xmm, xmm */ +) +DEFINSTR2(Xxor, + {4|8, PGPR, PGPR, O("\x33"), EN_RR}, /* XOR r32/64, r32/64 */ + {4|8, PGPR, PI8, O("\x83"), EN_RI8, .ext=6}, /* XOR r32/64, imm8 */ + {4|8, PRAX, PI32, O("\x35"), EN_I32}, /* XOR eax/rax, imm */ + {4|8, PGPR, PI32, O("\x81"), EN_RI32, .ext=6}, /* XOR r32/64, imm */ + { 8, PGPR, PMEM, O("\x33"), EN_RM}, /* XOR r64, m64 */ + {4|8, PFPR, PFPR, O("\x0F\x57"), EN_RR}, /* XORPS xmm, xmm */ + {4|8, PFPR, PMEM, O("\x0F\x57"), EN_RM}, /* XORPS xmm, m128 */ +) +DEFINSTR2(Xshl, + {4|8, PGPR, P1, O("\xD1"), EN_R, .ext=4}, /* SHL r32/64, 1 */ + {4|8, PGPR, PI32, O("\xC1"), EN_RI8, .ext=4}, /* SHL r32/64, imm */ + {4|8, PGPR, PRCX, O("\xD3"), EN_R, .ext=4}, /* SHL r32/64, CL */ +) +DEFINSTR2(Xsar, + {4|8, PGPR, P1, O("\xD1"), EN_R, .ext=7}, /* SAR r32/64, 1 */ + {4|8, PGPR, PI32, O("\xC1"), EN_RI8, .ext=7}, /* SAR r32/64, imm */ + {4|8, PGPR, PRCX, O("\xD3"), EN_R, .ext=7}, /* SAR r32/64, CL */ +) +DEFINSTR2(Xrolw, + {-1, PGPR, PI8, O("\x66\xC1"), EN_RI8}, /* ROL r16, imm */ +) +DEFINSTR2(Xshr, + {4|8, PGPR, P1, O("\xD1"), EN_R, .ext=5}, /* SHR r32/64, 1 */ + {4|8, PGPR, PI32, O("\xC1"), EN_RI8, .ext=5}, /* SHR r32/64, imm */ + {4|8, PGPR, PRCX, O("\xD3"), EN_R, .ext=5}, /* SHR r32/64, CL */ +) +DEFINSTR2(Xcvtss2sd, + {-1, PFPR, PFPR, O("\xF3\x0F\x5A"), EN_RR}, /* CVTSS2SD xmm, xmm */ + {-1, PFPR, PMEM, O("\xF3\x0F\x5A"), EN_RM}, /* CVTSS2SD xmm, m32/64 */ +) +DEFINSTR2(Xcvtsd2ss, + {-1, PFPR, PFPR, O("\xF2\x0F\x5A"), EN_RR}, /* CVTSD2SS xmm, xmm */ + {-1, PFPR, PMEM, O("\xF2\x0F\x5A"), EN_RM}, /* CVTSD2SS xmm, m32/64 */ +) +DEFINSTR2(Xcvtsi2ss, + {-1, PFPR, PGPR, O("\xF3\x0F\x2A"), EN_RR}, /* CVTSI2SS xmm, r32/64 */ + {-1, PFPR, PMEM, O("\xF3\x0F\x2A"), EN_RM}, /* CVTSI2SS xmm, m32/64 */ +) +DEFINSTR2(Xcvtsi2sd, + {-1, PFPR, PGPR, O("\xF2\x0F\x2A"), EN_RR}, /* CVTSI2SD xmm, r32/64 */ + {-1, PFPR, PMEM, O("\xF2\x0F\x2A"), EN_RM}, /* CVTSI2SD xmm, m32/64 */ +) +DEFINSTR2(Xcvttss2si, + {-1, PGPR, PFPR, O("\xF3\x0F\x2C"), EN_RR}, /* CVTTSS2SI r32/64, xmm */ + {-1, PGPR, PMEM, O("\xF3\x0F\x2C"), EN_RM}, /* CVTTSS2SI r32/64, m32 */ +) +DEFINSTR2(Xcvttsd2si, + {-1, PGPR, PFPR, O("\xF2\x0F\x2C"), EN_RR}, /* CVTTSD2SI r32/64, xmm */ + {-1, PGPR, PMEM, O("\xF2\x0F\x2C"), EN_RM}, /* CVTTSD2SI r32/64, m32 */ +) +DEFINSTR1(Xneg, + {4|8, PGPR, 0, O("\xF7"), EN_R, .ext=3} /* NEG r32/64 */ +) +DEFINSTR1(Xnot, + {4|8, PGPR, 0, O("\xF7"), EN_R, .ext=2} /* NOT r32/64 */ +) +DEFINSTR1(Xidiv, + {4|8, PGPR, 0, O("\xF7"), EN_R, .ext=7}, /* IDIV r32/64 */ + {4|8, PMEM, 0, O("\xF7"), EN_M, .ext=7}, /* IDIV m32/64 */ +) +DEFINSTR1(Xdiv, + {4|8, PGPR, 0, O("\xF7"), EN_R, .ext=6}, /* DIV r32/64 */ + {4|8, PMEM, 0, O("\xF7"), EN_M, .ext=6}, /* DIV m32/64 */ +) +DEFINSTR1(Xbswap, + {4|8, PGPR, 0, O("\x0F\xC8"), EN_O}, /* BSWAP r32/64 */ +) +DEFINSTR1(Xcall, + {-1, PSYM, 0, O("\xE8"), EN_R32, .norexw=1}, /* CALL rel32 */ + {-1, PGPR, 0, O("\xFF"), EN_R, .ext=2, .norexw=1}, /* CALL r64 */ + {-1, PMEM, 0, O("\xFF"), EN_M, .ext=2, .norexw=1}, /* CALL m64 */ +) +DEFINSTR2(Xcmp, + {4|8, PGPR, PGPR, O("\x3B"), EN_RR}, /* CMP r32/64, r32/64 */ + {4|8, PGPR, PI8, O("\x83"), EN_RI8, .ext=7}, /* CMP r32/64, imm8 */ + {4|8, PRAX, PI32, O("\x3D"), EN_I32}, /* CMP eax/rax, imm */ + {4|8, PGPR, PI32, O("\x81"), EN_RI32, .ext=7}, /* CMP r32/64, imm */ + { 8, PGPR, PMEM, O("\x3B"), EN_RM}, /* CMP r64, m64 */ + {4 , PFPR, PFPR, O("\x0F\x2E"), EN_RR}, /* UCOMISS xmm, xmm */ + {4 , PFPR, PMEM, O("\x0F\x2E"), EN_RM}, /* UCOMISS xmm, m32 */ + { 8, PFPR, PFPR, O("\x66\x0F\x2E"), EN_RR}, /* UCOMISD xmm, xmm */ + { 8, PFPR, PMEM, O("\x66\x0F\x2E"), EN_RM}, /* UCOMISD xmm, m64 */ +) +DEFINSTR2(Xtest, + {4|8, PRAX, PI8, O("\xA8"), EN_I8, .norexw=1}, /* TEST AL, imm8 */ + {4, PRAX, PI32, O("\xA9"), EN_I32}, /* TEST EAX, imm32 */ + { 8, PRAX, PU32, O("\xA9"), EN_I32, .norexw=1}, /* TEST EAX, imm32 */ + { 8, PRAX, PI32, O("\xA9"), EN_I32}, /* TEST RAX, imm32 */ + {4|8, PGPR, PI8, O("\xF6"), EN_RI8, .r8=1,.norexw=1}, /* TEST r8, imm8 */ + {4|8, PGPR, PI32, O("\xF7"), EN_RI32, .ext=0}, /* TEST r32/64, imm32 */ + {4|8, PGPR, PGPR, O("\x85"), EN_RR}, /* TEST r32/64, r32/64 */ + {4|8, PGPR, PMEM, O("\x85"), EN_RM}, /* TEST r32/64, m32/64 */ +) + +DEFINSTR2(Ximul2, + {4|8, PGPR, PGPR, O("\x0F\xAF"), EN_RR}, /* IMUL r32/64, r32/64 */ + {4|8, PGPR, PMEM, O("\x0F\xAF"), EN_RM}, /* IMUL r32/64, m32/64 */ +) +static const struct desc imul3_imm8tab[] = { + {4|8, PGPR, PGPR, O("\x6B"), EN_RR}, /* IMUL r32/64, r32/64, (imm8) */ + {4|8, PGPR, PMEM, O("\x6B"), EN_RM}, /* IMUL r32/64, m32/64, (imm8) */ +}, imul3_imm32tab[] = { + {4|8, PGPR, PGPR, O("\x69"), EN_RR}, /* IMUL r32/64, r32/64, (imm32) */ + {4|8, PGPR, PMEM, O("\x69"), EN_RM}, /* IMUL r32/64, m32/64, (imm32) */ +}; +#undef O +static void +Ximul(uchar **pcode, enum irclass k, struct oper dst, struct oper s1, struct oper s2) +{ + if (!memcmp(&dst, &s1, sizeof dst) && s2.t != OIMM) { + Ximul2(pcode, k, dst, s2); + return; + } + assert(s2.t == OIMM); + if (-128 <= s2.imm && s2.imm < 128) { + encode(pcode, imul3_imm8tab, countof(imul3_imm8tab), k, dst, s1); + B(s2.imm); + } else { + encode(pcode, imul3_imm32tab, countof(imul3_imm32tab), k, dst, s1); + I32(s2.imm); + } +} + +enum cc { + CCO = 0x0, /* OF = 1*/ + CCNO = 0x1, /* OF = 0*/ + CCB = 0x2, CCC = 0x2, CCNAE = 0x2, /* below; CF = 1; not above or equal */ + CCAE = 0x3, CCNB = 0x3, CCNC = 0x3, /* above or equal; not below; CF = 0 */ + CCE = 0x4, CCZ = 0x4, /* equal; ZF = 1 */ + CCNE = 0x5, CCNZ = 0x5, /* not equal; ZF = 0 */ + CCBE = 0x6, CCNA = 0x6, /* below or equal; not above; CF=1 or ZF=1 */ + CCA = 0x7, CCNBE = 0x7, /* above; not below or equal; CF=0 and ZF=0 */ + CCS = 0x8, /* ZS = 1; negative */ + CCNS = 0x9, /* ZS = 0; non-negative */ + CCP = 0xA, CCPE = 0xA, /* PF = 1; parity even */ + CCNP = 0xB, CCPO = 0xB, /* PF = 0; parity odd */ + CCL = 0xC, CCNGE = 0xC, /* lower; not greater or equal; SF != OF */ + CCGE = 0xD, CCNL = 0xD, /* greater or equal; not lower; SF == OF */ + CCLE = 0xE, CCNG = 0xE, /* less or equal; not greater; ZF=1 or SF != OF */ + CCG = 0xF, CCNLE = 0xF, /* greater; not less or equal; ZF=0 and SF = OF*/ + ALWAYS, +}; + +/* maps blk -> address when resolved; or to linked list of jump displacement + * relocations */ +static struct blkaddr { + bool resolved; + union { + uint addr; + uint relreloc; + }; +} *blkaddr; + +static void +Xjcc(uchar **pcode, enum cc cc, struct block *dst) +{ + int disp, insaddr = *pcode - objout.textbegin; + bool rel8 = 0; + + if (blkaddr[dst->id].resolved) { + disp = blkaddr[dst->id].addr - (insaddr + 2); + if ((uint)(disp + 128) < 256) /* can use 1-byte displacement? */ + rel8 = 1; + else { /* otherwise 4-byte displacement */ + disp -= 3; + disp -= cc != ALWAYS; /* 'Jcc rel32' has 2 opcode bytes */ + } + } else { + disp = blkaddr[dst->id].relreloc; + blkaddr[dst->id].relreloc = insaddr + 1 + (cc != ALWAYS); + } + if (cc == ALWAYS) { + B(rel8 ? 0xEB : 0xE9); /* JMP rel8/rel32 */ + } else { + assert(in_range(cc, 0, 0xF)); + if (rel8) B(0x70 + cc); /* Jcc rel8 */ + else B(0x0F), B(0x80 + cc); /* Jcc rel32 */ + } + if (rel8) B(disp); else I32(disp); +} + +static void +Xsetcc(uchar **pcode, enum cc cc, enum reg reg) +{ + int rex = 0; + assert(in_range(cc, 0x0, 0xF)); + assert(in_range(reg, RAX, R15)); + + if (in_range(reg, RSP, RDI)) rex = 0x40; + rex |= (reg >> 3); /* REX.B */ + if (rex) B(rex | 0x40); + B(0x0F), B(0x90+cc); /* SETcc */ + B(0xC0 + (reg & 7)); /* ModR/M with mod=11, rm=reg */ +} + +static void +Xpush(uchar **pcode, enum reg reg) +{ + if (in_range(reg, RAX, R15)) { + if (reg >> 3) B(0x41); /* REX.B */ + B(0x50 + (reg & 7)); /* PUSH reg */ + } else { + assert(in_range(reg, XMM0, XMM15)); + DS("\x48\x8d\x64\x24\xF8"); /* LEA RSP, [RSP-8] */ + Xmov(pcode, KF64, mkoper(OMEM, .base = RSP, .index = NOINDEX), reg2oper(reg)); /* MOVD [rsp],xmm0 */ + } +} + +static void +Xpop(uchar **pcode, enum reg reg) +{ + if (in_range(reg, RAX, R15)) { + if (reg >> 3) B(0x41); /* REX.B */ + B(0x58 + (reg & 7)); /* POP reg */ + } else { + assert(in_range(reg, XMM0, XMM15)); + Xmov(pcode, KF64, reg2oper(reg), mkoper(OMEM, .base = RSP, .index = NOINDEX)); /* MOVD xmm0,[rsp] */ + DS("\x48\x8d\x64\x24\x08"); /* LEA RSP, [RSP+8] */ + } +} + +/* are flags live at given instruction? */ +static bool +flagslivep(struct block *blk, int curi) +{ + int cmpi; + /* conditional branch that references a previous comparison instruction? */ + if (blk->jmp.t != Jb || !blk->jmp.arg[0].bits) + return 0; + assert(blk->jmp.arg[0].t == RTMP); + cmpi = blk->jmp.arg[0].i; + for (int i = blk->ins.n - 1; i > curi; --i) { + if (blk->ins.p[i] == cmpi) + /* flags defined after given instruction, dead here */ + return 0; + } + /* flags defined before given instruction, live here */ + return 1; +} + +/* Copy dst = val, with some peephole optimizations */ +static void +gencopy(uchar **pcode, enum irclass cls, struct block *blk, int curi, struct oper dst, union ref val) +{ + assert(dst.t == OREG); + if (val.bits == UNDREF.bits) { + /* can be generated by ssa construction, since value is undefined no move is needed */ + return; + } + if (val.t == RADDR) { + /* this is a LEA, but maybe it can be lowered to a 2-address instruction, + * which may clobber flags */ + const struct addr *addr = &addrtab.p[val.i]; + if (flagslivep(blk, curi)) goto Lea; + if (addr->base.t != RREG) goto Lea; + if (addr->base.bits && dst.reg == mkregoper(addr->base).reg) { /* base = dst */ + if (addr->index.bits && !addr->disp && !addr->shift){ + /* lea Rx, [Rx + Ry] -> add Rx, Ry */ + Xadd(pcode, cls, dst, mkregoper(addr->index)); + return; + } else if (!addr->index.bits) { + if (!addr->disp) /* lea Rx, [Rx] -> mov Rx, Rx */ + Xmov(pcode, cls, dst, dst); + else /* lea Rx, [Rx + Imm] -> add Rx, Imm */ + Xadd(pcode, cls, dst, mkoper(OIMM, .imm = addr->disp)); + return; + } + } else if (addr->index.bits && dst.reg == mkregoper(addr->index).reg) { /* index = dst */ + if (addr->base.bits && !addr->disp && !addr->shift) { + /* lea Rx, [Ry + Rx] -> add Rx, Ry */ + Xadd(pcode, cls, dst, mkregoper(addr->base)); + return; + } else if (!addr->base.bits) { + if (!addr->disp && !addr->shift) /* lea Rx, [Rx] -> mov Rx, Rx */ + Xmov(pcode, cls, dst, dst); + else if (!addr->shift) /* lea Rx, [Rx + Imm] -> add Rx, Imm */ + Xadd(pcode, cls, dst, mkoper(OIMM, .imm = addr->disp)); + else if (!addr->disp) /* lea Rx, [Rx LSL s] -> shl Rx, s */ + Xshl(pcode, cls, dst, mkoper(OIMM, .imm = addr->shift)); + else + goto Lea; + return; + } + } + /* normal (not 2-address) case */ + Lea: + if (isaddrcon(addr->base,0) && (ccopt.pic || (contab.p[addr->base.i].flag & SFUNC)) + && !(contab.p[addr->base.i].flag & SLOCAL)) { + assert(!addr->disp && !addr->index.bits); + val = addr->base; + goto GOTLoad; + } + Xlea(pcode, cls, dst, ref2oper(val)); + } else if (val.bits == ZEROREF.bits && dst.t == OREG && (kisflt(cls) || !flagslivep(blk, curi))) { + /* dst = 0 -> xor dst, dst; but only if it is ok to clobber flags */ + Xxor(pcode, kisint(cls) ? KI32 : cls, dst, dst); + } else if (isaddrcon(val,0)) { + if ((ccopt.pic || (contab.p[val.i].flag & SFUNC)) && (contab.p[val.i].flag & (SLOCAL|SFUNC)) != (SLOCAL|SFUNC)) { + GOTLoad: + /* for mov reg, [rip(sym@GOTPCREL)] */ + Xmov(pcode, cls, dst, mkoper(OSYMGOT, .con = val.i, .cindex = NOINDEX)); + } else { + /* for lea reg, [rip(sym)] */ + Xlea(pcode, cls, dst, mkoper(OSYM, .con = val.i, .cindex = NOINDEX)); + } + } else if (val.t == RXCON && in_range(concls(val), KI64, KPTR)) { + /* movabs */ + assert(dst.t == OREG && in_range(dst.reg, RAX, R15)); + B(0x48 | (dst.reg >> 3)); /* REX.W (+ REX.B) */ + B(0xB8 + (dst.reg & 0x7)); /* MOVABS r64, */ + wr64le(*pcode, intconval(val)); /* imm64 */ + *pcode += 8; + } else { + struct oper src = mkimmdatregoper(val); + if (memcmp(&dst, &src, sizeof dst) != 0) + Xmov(pcode, cls == KF64 && src.t == OREG && src.reg < XMM0 ? KI64 : cls, dst, src); + } +} + +static void +Xvaprologue(uchar **pcode, struct function *fn, struct oper sav) +{ + uint gpr0 = 0, fpr0 = 0, jmpaddr; + for (int i = 0; i < fn->nabiarg; ++i) { + struct abiarg abi = fn->abiarg[i]; + if (!abi.isstk) { + if (abi.reg < XMM0) ++gpr0; + else ++fpr0; + } + } + assert(sav.t == OMEM && sav.base == RBP); + /* save GPRS */ + for (int r = 0; r < 6; ++r) { + static const char reg[] = {RDI,RSI,RDX,RCX,R8,R9}; + if (r >= gpr0) + Xmov(pcode, KI64, sav, reg2oper(reg[r])); + sav.disp += 8; + } + + /* save FPRs, but only if al is non zero */ + if (fpr0 < 8) { + DS("\x84\xC0"); /* TEST al,al */ + jmpaddr = *pcode - objout.textbegin; + DS("\x74\xFE"); /* JE rel8 */ + } + for (int r = 0; r < 8; ++r) { + if (r >= fpr0) + Xmovaps(pcode, KF64, sav, reg2oper(XMM0 + r)); + sav.disp += 16; + } + if (fpr0 < 8) {/* patch relative jump */ + int off = (*pcode - objout.textbegin) - jmpaddr - 2; + objout.textbegin[jmpaddr+1] = off; + } +} + +/* condition code for CMP */ +static const uchar icmpop2cc[] = { + [Oequ] = CCE, [Oneq] = CCNE, + [Olth] = CCL, [Ogth] = CCG, [Olte] = CCLE, [Ogte] = CCGE, + [Oulth] = CCB, [Ougth] = CCA, [Oulte] = CCBE, [Ougte] = CCAE, + [Oand] = CCNE, [Osub] = CCNE, +}, fcmpop2cc[] = { + [Oequ] = CCE, [Oneq] = CCNE, + [Olth] = CCB, [Ogth] = CCA, [Olte] = CCBE, [Ogte] = CCAE, +}; +/* condition code for TEST reg,reg (compare with zero) */ +static const uchar icmpzero2cc[] = { + [Oequ] = CCE, [Oulte] = CCE, + [Oneq] = CCNE, [Ougth] = CCNE, + [Olth] = CCS, [Ogte] = CCNS, + [Olte] = CCLE, [Ogth] = CCG, + [Oulth] = CCB, [Ougte] = CCAE, /* actually constants */ +}; + +static void +emitinstr(uchar **pcode, struct function *fn, struct block *blk, int curi, struct instr *ins) +{ + struct oper dst, src; + bool regzeroed; + enum irclass cls = ins->cls; + void (*X)(uchar **, enum irclass, struct oper, struct oper) = NULL; + void (*X1)(uchar **, enum irclass, struct oper) = NULL; + + switch (ins->op) { + default: + fatal(NULL, "x86_64: in %y; unimplemented instr '%s'", fn->name, opnames[ins->op]); + case Onop: break; + case Omove: + dst = ref2oper(ins->l); + gencopy(pcode, cls, blk, curi, dst, ins->r); + break; + case Ocopy: + dst = reg2oper(ins->reg-1); + gencopy(pcode, cls, blk, curi, dst, ins->l); + break; + case Ostorei8: cls = KI32, X = Xmovb; goto Store; + case Ostorei16: cls = KI32, X = Xmovw; goto Store; + case Ostorei32: cls = KI32, X = Xmov; goto Store; + case Ostorei64: cls = KI64, X = Xmov; goto Store; + case Ostoref32: cls = KF32, X = Xmov; goto Store; + case Ostoref64: cls = KF64, X = Xmov; goto Store; + Store: + src = mkimmregoper(ins->r); + X(pcode, cls, mkmemoper(ins->l), src); + break; + case Oexts8: src = mkregoper(ins->l); goto Movsxb; + case Oextu8: src = mkregoper(ins->l); goto Movzxb; + case Oexts16: src = mkregoper(ins->l); goto Movsxw; + case Oextu16: src = mkregoper(ins->l); goto Movzxw; + case Oexts32: src = mkregoper(ins->l); goto Movsxl; + case Oextu32: src = mkregoper(ins->l); goto Movzxl; + case Oloads8: src = mkmemoper(ins->l); Movsxb: Xmovsxb(pcode, cls, reg2oper(ins->reg-1), src); break; + case Oloadu8: src = mkmemoper(ins->l); Movzxb: Xmovzxb(pcode, cls, reg2oper(ins->reg-1), src); break; + case Oloads16: src = mkmemoper(ins->l); Movsxw: Xmovsxw(pcode, cls, reg2oper(ins->reg-1), src); break; + case Oloadu16: src = mkmemoper(ins->l); Movzxw: Xmovzxw(pcode, cls, reg2oper(ins->reg-1), src); break; + case Oloads32: src = mkmemoper(ins->l); Movsxl: Xmovsxl(pcode, cls, reg2oper(ins->reg-1), src); break; + case Oloadu32: src = mkmemoper(ins->l); Movzxl: Xmov(pcode, KI32, reg2oper(ins->reg-1), src); break; + case Oloadf32: case Oloadf64: Xmov(pcode, cls, reg2oper(ins->reg-1), mkmemoper(ins->l)); break; + case Oloadi64: Xmov(pcode, KI64, reg2oper(ins->reg-1), mkmemoper(ins->l)); break; + case Ocvtf32f64: X = Xcvtss2sd; goto FloatsCvt; + case Ocvtf64f32: X = Xcvtsd2ss; goto FloatsCvt; + case Ocvtf32s: X = Xcvttss2si; goto FloatsCvt; + case Ocvtf64s: X = Xcvttsd2si; goto FloatsCvt; + case Ocvts32f: X = cls == KF32 ? Xcvtsi2ss : Xcvtsi2sd; cls = KI32; goto FloatsCvt; + case Ocvts64f: X = cls == KF32 ? Xcvtsi2ss : Xcvtsi2sd; cls = KI64; goto FloatsCvt; + FloatsCvt: + X(pcode, cls, reg2oper(ins->reg-1), mkdatregoper(ins->l)); + break; + case Oadd: + dst = mkregoper(ins->l); + if (kisflt(cls)) { + Xaddf(pcode, cls, dst, mkimmdatregoper(ins->r)); + } else if (ins->reg-1 == dst.reg) { /* two-address add */ + src = ref2oper(ins->r); + if (src.t == OIMM && src.imm < 0) /* ADD -imm -> SUB imm, for niceness */ + Xsub(pcode, cls, dst, (src.imm = -(uint)src.imm, src)); + else + Xadd(pcode, cls, dst, src); + } else if (isregref(ins->r) && ins->reg-1 == mkregoper(ins->r).reg) { + /* also two-address after swapping operands */ + Xadd(pcode, cls, reg2oper(ins->reg-1), mkimmdatregoper(ins->l)); + } else { /* three-address add (lea) */ + struct oper mem = { OMEM, .base = NOBASE, .index = NOINDEX }; + dst = reg2oper(ins->reg-1); + addmemoper(&mem, ref2oper(ins->l)); + addmemoper(&mem, ref2oper(ins->r)); + Xlea(pcode, cls, dst, mem); + } + break; + case Osub: + dst = mkregoper(ins->l); + if (kisflt(cls)) { + Xsubf(pcode, cls, dst, mkimmdatregoper(ins->r)); + } else if (!ins->reg) { + Xcmp(pcode, cls, mkregoper(ins->l), mkimmdatregoper(ins->r)); + } else if (ins->reg-1 == dst.reg) { /* two-address */ + Xsub(pcode, cls, dst, ref2oper(ins->r)); + } else { + assert(isintcon(ins->r)); + Xlea(pcode, cls, reg2oper(ins->reg-1), + mkoper(OMEM, .base = mkregoper(ins->l).reg, .index = NOINDEX, .disp = -intconval(ins->r))); + } + break; + case Oshl: + dst = reg2oper(ins->reg-1); + src = mkregoper(ins->l); + if (dst.reg == src.reg) + Xshl(pcode, cls, dst, mkimmdatregoper(ins->r)); + else { + uint sh = ins->r.i; + assert(ins->r.t == RICON && sh <= 3); + if (sh == 1) /* shl x, 1 -> lea [x + x] */ + Xlea(pcode, cls, dst, mkoper(OMEM, .base = src.reg, .index = src.reg)); + else /* shl x, n -> lea [x*(1<<n)+0x0] */ + Xlea(pcode, cls, dst, mkoper(OMEM, .base = NOBASE, .index = src.reg, .shift = sh)); + } + break; + case Osar: X = Xsar; goto ALU2; + case Oslr: X = Xshr; goto ALU2; + case Oand: + if (!ins->reg) { + Xtest(pcode, cls, mkregoper(ins->l), mkimmdatregoper(ins->r)); + break; + } + X = Xand; + goto ALU2; + case Oxor: X = Xxor; goto ALU2; + case Oior: X = Xior; goto ALU2; + ALU2: + dst = mkregoper(ins->l); + assert(ins->reg-1 == dst.reg); + X(pcode, cls, dst, mkimmdatregoper(ins->r)); + break; + case Oneg: X1 = Xneg; goto ALU1; + case Onot: X1 = Xnot; goto ALU1; + ALU1: + dst = mkregoper(ins->l); + assert(ins->reg-1 == dst.reg); + X1(pcode, cls, dst); + break; + case Obswap16: + dst = mkregoper(ins->l); + assert(ins->reg-1 == dst.reg); + if (dst.reg < 4) { /* AX,BX,CX,DX */ + /* XCHG rH, rL */ + B(0x86), B(0xC4 | dst.reg | (dst.reg)<<3); + } else { + /* ROL r16,8 */ + Xrolw(pcode, KI32, dst, mkoper(OIMM, .imm = 8)); + } + break; + case Obswap32: case Obswap64: X1 = Xbswap; goto ALU1; + case Omul: + if (kisint(cls)) + Ximul(pcode, cls, reg2oper(ins->reg-1), ref2oper(ins->l), ref2oper(ins->r)); + else + Xmulf(pcode, cls, reg2oper(ins->reg-1), ref2oper(ins->r)); + break; + case Odiv: + switch (cls) { + default: assert(0); + case KPTR: + case KI64: B(0x48); /* REX.W */ + case KI32: B(0x99); /* CDQ/CQO */ + assert(mkregoper(ins->l).reg == RAX); + Xidiv(pcode, cls, mkdatregoper(ins->r)); + break; + case KF32: case KF64: + Xdivf(pcode, cls, reg2oper(ins->reg-1), mkdatregoper(ins->r)); + break; + } + break; + case Oudiv: + DS("\x31\xD2"); /* XOR EDX,EDX */ + assert(mkregoper(ins->l).reg == RAX); + Xdiv(pcode, cls, mkdatregoper(ins->r)); + break; + case Oequ: case Oneq: + case Olth: case Ogth: case Olte: case Ogte: + case Oulth: case Ougth: case Oulte: case Ougte: + dst = mkregoper(ins->l); + src = ref2oper(ins->r); + regzeroed = 0; + if (ins->reg && dst.reg != ins->reg-1 && (src.t != OREG || src.reg != ins->reg-1)) { + /* can zero output reg before test instruction (differs from both inputs) */ + /* XXX this doesn't check if a source operand is an addr containing the register */ + struct oper dst = reg2oper(ins->reg-1); + Xxor(pcode, KI32, dst, dst); + regzeroed = 1; + } + if (kisint(ins->cls) && ins->r.bits == ZEROREF.bits) + Xtest(pcode, cls, dst, dst); + else + Xcmp(pcode, cls, dst, src); + if (ins->reg) { + enum cc cc; + dst = reg2oper(ins->reg-1); + if (ins->r.bits != ZEROREF.bits) { /* CMP */ + cc = (kisint(ins->cls) ? icmpop2cc : fcmpop2cc)[ins->op]; + } else { /* TEST r,r (CMP r, 0) */ + assert(kisint(ins->cls)); + cc = icmpzero2cc[ins->op]; + } + if (kisflt(ins->cls)) { /* handle float unordered result */ + int unordres = ins->op == Oneq ? 1 : 0; + int rex = 0; + if (in_range(dst.reg, RSP, RDI)) rex = 0x40; + rex |= (dst.reg >> 3); /* REX.B */ + int jpoff = 3 + (rex != 0); + if (regzeroed && unordres == 0) { + /* if cmp unordered, just jump over the SETcc; result reg was already zeroed */ + B(0x7A), B(jpoff); /* JP <off> */ + } else { + /* JNP .a + * MOV r8, 0/1 + * JMP .b + * .a: SETcc r8 + * .b: MOVZX r, r8 + */ + B(0x7B), B(jpoff+1); /* JNP <off> */ + if (rex) B(rex | 0x40); + B(0xB0 + (dst.reg & 7)), B(unordres); /* MOV r8, 0/1 */ + B(0xEB), B(jpoff); /* JMP <off> */ + } + } + Xsetcc(pcode, cc, dst.reg); + if (!regzeroed) + Xmovzxb(pcode, KI32, dst, dst); + } + break; + case Oswap: + if (kisint(cls)) + Xxchg(pcode, cls, ref2oper(ins->l), mkregoper(ins->r)); + else { + struct oper l = mkregoper(ins->l), r = mkregoper(ins->r); + Xxor(pcode, cls, l, r); + Xxor(pcode, cls, r, l); + Xxor(pcode, cls, l, r); + } + break; + case Ocall: + Xcall(pcode, KPTR, ref2oper(ins->l)); + break; + case Oxvaprologue: + Xvaprologue(pcode, fn, mkmemoper(ins->l)); + break; + } +} + +static void +emitbranch(uchar **pcode, struct block *blk) +{ + enum cc cc = ALWAYS; + assert(blk->s1); + if (blk->s2) { + /* conditional branch.. */ + union ref arg = blk->jmp.arg[0]; + struct block *unord = NULL; + assert(arg.t == RTMP); + struct instr *ins = &instrtab[arg.i]; + if ((oiscmp(ins->op) || ins->op == Oand || ins->op == Osub)) { + if (ins->r.bits != ZEROREF.bits) { + /* for CMP instr */ + cc = (kisint(ins->cls) ? icmpop2cc : fcmpop2cc)[ins->op]; + unord = ins->op == Oneq ? blk->s1 : blk->s2; + } else { + assert(kisint(ins->cls)); + /* for TEST instr, which modifies ZF and SF and sets CF = OF = 0 */ + cc = icmpzero2cc[ins->op]; + } + } else { + /* implicit by ZF */ + cc = CCNZ; + } + if (kisflt(ins->cls)) { + /* handle float unordered result */ + Xjcc(pcode, CCP, unord); + } + if (blk->s1 == blk->lnext) { + /* if s1 is next adjacent block, swap s1,s2 and flip condition to emit a + * single jump */ + struct block *tmp = blk->s1; + blk->s1 = blk->s2; + blk->s2 = tmp; + cc ^= 1; + } + } + /* make sure to fallthru if jumping to next adjacent block */ + if (blk->s2 || blk->s1 != blk->lnext) + Xjcc(pcode, cc, blk->s1); + if (blk->s2 && blk->s2 != blk->lnext) + Xjcc(pcode, ALWAYS, blk->s2); +} + +static bool +calleesave(int *npush, uchar **pcode, struct function *fn) +{ + bool any = 0; + if (rstest(fn->regusage, RBX)) { + Xpush(pcode, RBX); + ++*npush; + any = 1; + } + for (int r = R12; r <= R15; ++r) + if (rstest(fn->regusage, r)) { + Xpush(pcode, r); + ++*npush; + any = 1; + } + return any; +} + +static void +calleerestore(uchar **pcode, struct function *fn) +{ + for (int r = R15; r >= R12; --r) + if (rstest(fn->regusage, r)) + Xpop(pcode, r); + if (rstest(fn->regusage, RBX)) Xpop(pcode, RBX); +} + +/* align code using NOPs */ +static void +nops(uchar **pcode, int align) +{ + int rem; + while ((rem = (*pcode - objout.textbegin) & (align - 1)) != 0) { + switch (align - rem) { + case 15: case 14: case 13: case 12: case 11: case 10: + case 9: B(0x66); + case 8: DS("\x0f\x1f\x84\x00\x00\x00\x00\x00"); break; + case 7: DS("\x0f\x1f\x80\x00\x00\x00\x00"); break; + case 6: B(0x66); + case 5: DS("\x0f\x1f\x44\x00\x00"); break; + case 4: DS("\x0f\x1f\x40\x00"); break; + case 3: DS("\x0f\x1f\00"); break; + case 2: B(0x66); + case 1: B(0x90); break; + } + } +} + +static void +emitbin(struct function *fn) +{ + struct block *blk; + uchar **pcode = &objout.code; + int npush = 0; + bool saverestore; + + nops(pcode, 16); + fnstart = *pcode; + curfnsym = fn->name; + + /** prologue **/ + + /* only use frame pointer in non-leaf functions and functions that use the stack */ + usebp = 0; + if (!fn->isleaf || fn->stksiz) { + usebp = 1; + /* push rbp; mov rbp, rsp */ + DS("\x55\x48\x89\xE5"); + } + saverestore = calleesave(&npush, pcode, fn); + if (usebp) rbpoff = -npush*8; + + /* ensure stack is 16-byte aligned for function calls */ + if (!fn->isleaf && ((fn->stksiz + npush*8) & 0xF) != 0) { + assert(usebp); + if ((rbpoff & 0xF) == 0) { + rbpoff -= 16; + fn->stksiz += 24; + } else { + rbpoff -= 8; + fn->stksiz += 8; + } + } + + if (fn->stksiz != 0) { + /* sub rsp, <stack size> */ + if (fn->stksiz < 128) + DS("\x48\x83\xEC"), B(fn->stksiz); + else if (fn->stksiz == 128) + DS("\x48\x83\xC4\x80"); /* add rsp, -128 */ + else + DS("\x48\x81\xEC"), I32(fn->stksiz); + } + + if (*pcode - fnstart > 6) { + /* largue prologue -> largue epilogue -> transform to use single exit point */ + struct block *exit = NULL; + blk = fn->entry->lprev; + do { + if (blk->jmp.t == Jret) { + if (!exit) { + if (blk->ins.n == 0) { + exit = blk; + continue; + } else { + exit = newblk(fn); + exit->lnext = blk->lnext; + exit->lprev = blk; + blk->lnext = exit; + exit->lnext->lprev = exit; + exit->id = fn->nblk++; + exit->jmp.t = Jret; + } + } + blk->jmp.t = Jb; + memset(blk->jmp.arg, 0, sizeof blk->jmp.arg); + blk->s1 = exit; + } else if (exit) { + /* thread jumps to the exit block */ + if (blk->s1 && !blk->s1->ins.n && blk->s1->s1 == exit && !blk->s1->s2) blk->s1 = exit; + if (blk->s2 && !blk->s2->ins.n && blk->s2->s1 == exit && !blk->s2->s2) blk->s2 = exit; + } + } while ((blk = blk->lprev) != fn->entry); + } + + blkaddr = allocz(fn->passarena, fn->nblk * sizeof *blkaddr, 0); + + blk = fn->entry; + do { + struct blkaddr *bb = &blkaddr[blk->id]; + uint bbaddr = *pcode - objout.textbegin; + assert(!bb->resolved); + while (bb->relreloc) { + uint next; + memcpy(&next, objout.textbegin + bb->relreloc, 4); + int disp = bbaddr - bb->relreloc - 4; + wr32le(objout.textbegin + bb->relreloc, disp); + bb->relreloc = next; + } + bb->resolved = 1; + bb->addr = bbaddr; + + for (int i = 0; i < blk->ins.n; ++i) + emitinstr(pcode, fn, blk, i, &instrtab[blk->ins.p[i]]); + + if (blk->jmp.t == Jret) { + if (blk->lnext != fn->entry && blk->lnext->jmp.t == Jret && blk->lnext->ins.n == 0) + continue; /* fallthru to next blk's RET */ + /* epilogue */ + if (fn->stksiz && (saverestore || !usebp)) + Xadd(pcode, KPTR, mkoper(OREG, .reg = RSP), mkoper(OIMM, .imm = fn->stksiz)); + if (saverestore) + calleerestore(pcode, fn); + if (usebp) B(0xC9); /* leave */ + B(0xC3); /* ret */ + } else if (blk->jmp.t == Jtrap) { + DS("\x0F\x0B"); /* UD2 */ + } else emitbranch(pcode, blk); + } while ((blk = blk->lnext) != fn->entry); + objdeffunc(fn->name, fn->globl, fnstart - objout.textbegin, *pcode - fnstart); +} + +void +x86_64_emit(struct function *fn) +{ + fn->stksiz = alignup(fn->stksiz, 8); + if (fn->stksiz > 1<<24) error(NULL, "'%s' stack frame too big", fn->name); + emitbin(fn); +} + +/* vim:set ts=3 sw=3 expandtab: */ |