diff options
Diffstat (limited to 'x86_64')
| -rw-r--r-- | x86_64/all.h | 18 | ||||
| -rw-r--r-- | x86_64/emit.c | 1388 | ||||
| -rw-r--r-- | x86_64/isel.c | 660 | ||||
| -rw-r--r-- | x86_64/sysv.c | 313 |
4 files changed, 2379 insertions, 0 deletions
diff --git a/x86_64/all.h b/x86_64/all.h new file mode 100644 index 0000000..c0c38ff --- /dev/null +++ b/x86_64/all.h @@ -0,0 +1,18 @@ +#include "../ir/ir.h" + +#define LIST_REGS(_) \ + _(RAX) _(RCX) _(RDX) _(RBX) _(RSP) _(RBP) _(RSI) _(RDI) \ + _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(R15) \ + _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) \ + _(XMM8) _(XMM9) _(XMM10) _(XMM11) _(XMM12) _(XMM13) _(XMM14) _(XMM15) + +enum reg { +#define R(r) r, + LIST_REGS(R) +#undef R +}; + +void x86_64_isel(struct function *); +void x86_64_emit(struct function *); + +/* vim:set ts=3 sw=3 expandtab: */ diff --git a/x86_64/emit.c b/x86_64/emit.c new file mode 100644 index 0000000..4a7d287 --- /dev/null +++ b/x86_64/emit.c @@ -0,0 +1,1388 @@ +#include "all.h" +#include "../obj/obj.h" +#include "../endian.h" + +/** Instruction operands ** + * + * Can be a register, a 32-bit immediate, + * a memory reference [base + index * scale + disp], + * or a relocatable reference to some symbol plus a displacement and maybe index*scale + */ +enum operkind { ONONE, OREG, OIMM, OMEM, OSYM }; +enum { NOBASE = 63, NOINDEX = 63 }; +struct oper { + uchar t; + union { + struct { uchar base; }; /* OMEM */ + struct { uchar cindex : 6, cshift : 2; }; /* OSYM */ + }; + union { + struct { uchar index, shift; }; /* OMEM */ + ushort con; /* OSYM */ + }; + union { + uchar reg; /* OREG */ + int disp; /* OMEM, OSYM */ + int imm; /* OIMM */ + }; +}; +#define mkoper(t, ...) ((struct oper){(t), __VA_ARGS__}) +#define reg2oper(R) (assert((uint)(R) <= XMM15), mkoper(OREG, .reg = (R))) + +static struct oper mkmemoper(union ref); + +static struct oper +ioper(int i) +{ + int reg = instrtab[i].reg - 1; + return reg < 0 ? mkoper(ONONE,) : reg2oper(reg); +} + +static struct oper +ref2oper(union ref r) +{ + switch (r.t) { + case RTMP: return ioper(r.i); + case RREG: return reg2oper(r.i); + case RICON: return mkoper(OIMM, .imm = r.i); + case RXCON: + if (conht[r.i].cls == KI32) + return mkoper(OIMM, .imm = conht[r.i].i); + else if (conht[r.i].cls == KI64) { + vlong i = conht[r.i].i; + assert(i == (int)i); + return mkoper(OIMM, .imm = i); + } else if (!conht[r.i].cls) { + return mkoper(OSYM, .con = r.i, .cindex = NOINDEX); + } + assert(0); + case RADDR: return mkmemoper(r); + default: assert(0); + } +} + +static void +addmemoper(struct oper *mem, struct oper add) +{ + assert(mem->t == OMEM); + if (add.t == OIMM) { + mem->disp += add.imm; + } else if (add.t == OREG) { + if (mem->base == NOBASE) + mem->base = add.reg; + else if (mem->index == NOINDEX) + mem->index = add.reg; + else + assert(0); + } +} + +/* helpers to convert a reference to an operand of a specific kind, + * with assertions to make sure nothing went wrong */ + +static inline struct oper +mkregoper(union ref r) +{ + assert(r.t == RREG || (r.t == RTMP && ioper(r.i).t == OREG)); + return r.t == RREG ? reg2oper(r.i) : ioper(r.i); +} + +static inline struct oper +mkimmoper(union ref r) +{ + assert(iscon(r) && concls(r) == KI32); + return mkoper(OIMM, .imm = intconval(r)); +} + +#define ismemref(ref) ((ref).t == RTMP && ioper((ref).i).t == OMEM) +#define isregref(ref) ((ref).t == RREG || ((ref).t == RTMP && ioper((ref).i).t == OREG)) + +static inline struct oper +mkimmregoper(union ref r) +{ + assert(isregref(r) || (iscon(r) && concls(r) == KI32)); + return ref2oper(r); +} + +static inline struct oper +mkdatregoper(union ref r) +{ + assert(isregref(r) || (r.t == RXCON && conht[r.i].deref)); + return ref2oper(r); +} + +static inline struct oper +mkimmdatregoper(union ref r) +{ + assert(isregref(r) || r.t == RICON || (r.t == RXCON && (conht[r.i].cls == KI32 || conht[r.i].deref))); + return ref2oper(r); +} + +static int rbpoff; + +static struct oper +mkmemoper(union ref r) +{ + if (r.t == RTMP) { + struct oper wop = ioper(r.i); + if (wop.t == OMEM) return wop; + assert(wop.t == OREG); + return mkoper(OMEM, .base = wop.reg, .index = NOINDEX); + } else if (r.t == RADDR) { + const struct addr *addr = &addrht[r.i]; + struct oper mem; + + assert(addr->shift <= 3); + if (addr->base.t == RTMP && ioper(addr->base.i).t == OMEM) { + mem = ioper(addr->base.i); + if (addr->index.bits) addmemoper(&mem, mkregoper(addr->index)); + assert(!mem.shift); + mem.shift = addr->shift; + addmemoper(&mem, mkoper(OIMM, .imm = addr->disp)); + return mem; + } + if (isaddrcon(addr->base,0)) { + return mkoper(OSYM, .con = addr->base.i, + .cindex = addr->index.bits ? mkregoper(addr->index).reg : NOINDEX, + .cshift = addr->shift, + .disp = addr->disp); + } else if (isintcon(addr->base)) { + assert(!addr->disp); + return mkoper(OMEM, .base = NOBASE, + .index = addr->index.bits ? mkregoper(addr->index).reg : NOINDEX, + .disp = intconval(addr->base), + .shift = addr->shift); + } else if (isaddrcon(addr->index,0)) { + assert(!addr->shift); + return mkoper(OSYM, .con = addr->index.i, + .cindex = addr->base.bits ? mkregoper(addr->base).reg : NOINDEX, + .disp = addr->disp); + } + return mkoper(OMEM, .base = addr->base.bits ? mkregoper(addr->base).reg : NOBASE, + .index = addr->index.bits ? mkregoper(addr->index).reg : NOINDEX, + .disp = addr->disp, + .shift = addr->shift); + } else if (r.t == RXCON) { + assert(!conht[r.i].cls); + return mkoper(OSYM, .con = r.i, .cindex = NOINDEX); + } else { + return mkoper(OMEM, .base = isregref(r) ? ref2oper(r).reg : NOBASE, + .index = NOINDEX, + .disp = isregref(r) ? 0 : mkimmoper(r).imm); + } +} + +/** Instruction description tables ** + * + * Each instruction is a list of descs, and the first one that matches + * is emitted. Each entry has a size pattern field, which is a bitset + * of the sizes (in bytes) that the entry matches, and 2 operand patterns, + * which describe the operands that can match (for example, PRAX matches + * a RAX register operand, PGPR matches any integer register, I8 matches + * an immediate operand between [-128,127]) The rest of the fields describe + * the instruction's encoding. + * (reference: https://www.felixcloutier.com/x86/ & https://wiki.osdev.org/X86-64_Instruction_Encoding ) + */ + +enum operpat { + PNONE, + PRAX, + PRCX, + PGPR, + PFPR, + P1, /* imm = 1 */ + PN1, /* imm = -1 */ + PI8, + PU8, + PI16, + PU16, + PI32, + PU32, + PMEM, + PSYM, +}; +enum operenc { + EN_R = 1, /* reg with /r */ + EN_RR, /* reg, reg with /r */ + EN_RRX, /* reg, reg with /r (inverted) */ + EN_MR, /* mem, reg with /r */ + EN_RM, /* reg, mem with /r */ + EN_M, /* mem */ + EN_RI8, /* reg, imm8 with /0 */ + EN_RI32, /* reg, imm32 with /0 */ + EN_MI8, /* mem, imm8 with /x */ + EN_MI16, /* mem, imm16 with /x */ + EN_MI32, /* mem, imm32 with /x */ + EN_OI, /* reg, imm32 with op + reg */ + EN_I8, /* imm8 */ + EN_I32, /* imm32 */ + EN_R32, /* rel32 */ + NOPERENC, +}; +struct desc { + uchar psiz; /* subset of {1,2,4,8} */ + uchar ptd, pts; /* bitsets of enum operpat */ + uchar nopc; /* countof opc */ + const char opc[8]; /* opcode bytes */ + uchar operenc; /* enum operenc */ + uchar ext; /* ModR/M.reg opc extension */ + bool r8; /* uses 8bit register */ + bool norexw; /* do not use REX.W even if size is 64 bits */ +}; + +/* match operand against pattern */ +static inline bool +opermatch(enum operpat pat, struct oper oper) +{ + switch (pat) { + case PNONE: return !oper.t; + case PRAX: return oper.t == OREG && oper.reg == RAX; + case PRCX: return oper.t == OREG && oper.reg == RCX; + case PGPR: return oper.t == OREG && oper.reg <= R15; + case PFPR: return oper.t == OREG && oper.reg >= XMM0; + case P1: return oper.t == OIMM && oper.imm == 1; + case PN1: return oper.t == OIMM && oper.imm == -1; + case PI8: return oper.t == OIMM && (schar)oper.imm == oper.imm; + case PU8: return oper.t == OIMM && (uchar)oper.imm == oper.imm; + case PI16: return oper.t == OIMM && (short)oper.imm == oper.imm; + case PU16: return oper.t == OIMM && (ushort)oper.imm == oper.imm; + case PI32: return oper.t == OIMM; + case PU32: return oper.t == OIMM && oper.imm >= 0; + case PMEM: return in_range(oper.t, OMEM, OSYM); + case PSYM: return oper.t == OSYM; + } + assert(0); +} + +/* code output helpers */ +#define B(b) (*(*pcode)++ = (b)) +#define D(xs, N) (memcpy(*pcode, (xs), (N)), (*pcode) += (N)) +#define I16(w) (wr16le(*pcode, (w)), *pcode += 2) +#define I32(w) (wr32le(*pcode, (w)), *pcode += 4) +#define DS(S) D(S, sizeof S - 1) + +static bool usebp; /* use RBP? */ +static const char *curfnsym; +static uchar *fnstart; + +/* Given an instruction description table, find the first entry that matches + * the operands (where dst, src are the operands in intel syntax order) and encode it */ +static void +encode(uchar **pcode, const struct desc *tab, int ntab, enum irclass k, struct oper dst, struct oper src) +{ + const uchar *opc; + int nopc; + struct oper mem; + enum reg reg; + const struct desc *en = NULL; + for (int i = 0; i < ntab; ++i) { + if ((tab[i].psiz & cls2siz[k]) && opermatch(tab[i].ptd, dst) && opermatch(tab[i].pts, src)) { + en = &tab[i]; + break; + } + } + assert(en && "no match for instr"); + + if (en->ptd == PFPR) dst.reg &= 15; + if (en->pts == PFPR) src.reg &= 15; + opc = (uchar *)en->opc; + nopc = en->nopc; + /* mandatory prefixes go before REX */ + if (*opc == 0x66 || *opc == 0xF2 || *opc == 0xF3) + B(*opc++), --nopc; + int rex = in_range(k, KI64, KPTR) << 3; /* REX.W */ + if (en->norexw) rex = 0; + switch (en->operenc) { + case EN_RR: /* mod = 11; reg = dst; rm = src */ + rex |= (dst.reg >> 3) << 2; /* REX.R */ + rex |= (src.reg >> 3) << 0; /* REX.B */ + if (rex) B(0x40 | rex); + else if (en->r8 && in_range(src.reg, RSP, RDI)) { + /* /r8 needs REX to encode SP,BP,SI,DI (otherwise -> AH..BH) */ + B(0x40); + } + D(opc, nopc); + B(0300 | (dst.reg & 7) << 3 | (src.reg & 7)); + break; + case EN_RRX: /* mod = 11; reg = src; rm = dst */ + rex |= (src.reg >> 3) << 2; /* REX.R */ + rex |= (dst.reg >> 3) << 0; /* REX.B */ + if (rex) B(0x40 | rex); + else if (en->r8 && in_range(dst.reg, RSP, RDI)) { + /* /r8 needs REX to encode SP,BP,SI,DI (otherwise -> AH..BH) */ + B(0x40); + } + D(opc, nopc); + B(0300 | (src.reg & 7) << 3 | (dst.reg & 7)); + break; + case EN_MR: + mem = dst; + reg = src.reg; + goto Mem; + case EN_RM: + mem = src; + reg = dst.reg; + goto Mem; + case EN_M: case EN_MI8: case EN_MI16: case EN_MI32: + mem = dst; + reg = en->ext; + Mem: + if (mem.t == OMEM) { + if (mem.base != NOBASE) rex |= mem.base >> 3; /* REX.B */ + if (mem.index != NOINDEX) rex |= mem.index >> 3 << 1; /* REX.X */ + } else { + if (mem.cindex != NOINDEX) rex |= mem.cindex >> 3 << 1; /* REX.X */ + } + if (en->operenc != EN_M) + rex |= (reg >> 3) << 2; /* REX.R */ + if (rex) B(0x40 | rex); + else if (en->r8 && in_range(reg, RSP, RDI)) B(0x40); + + if (mem.t == OSYM) { + D(opc, nopc); + if (mem.cindex == NOINDEX) { + /* %rip(var) */ + static uchar offs[NOPERENC] = { [EN_MI8] = 1, [EN_MI16] = 2, [EN_MI32] = 4 }; + enum relockind r = + (!conht[mem.con].deref && ccopt.pic) ? (rex ? REL_GOTPCRELX : REL_GOTPCRELX_REX) + : REL_PCREL32; + int off = -4 - offs[en->operenc]; + B(/*mod 0*/ (reg & 7) << 3 | RBP); + objreloc(xcon2sym(mem.con), r, Stext, *pcode - objout.textbegin, mem.disp + off); + } else { + /* var(,%reg,shift) */ + assert(!ccopt.pic && !ccopt.pie && "cannot encode [RIP-rel + REG] for position independent"); + B(/*mod 0*/ (reg & 7) << 3 | RSP); + B(mem.cshift << 6 | mem.cindex << 3 | RBP); /* SIB [index*s + disp32] */ + objreloc(xcon2sym(mem.con), REL_ABS32S, Stext, *pcode - objout.textbegin, mem.disp); + } + I32(0); + } else { + int mod; + bool sib = 0; + if (mem.base == RBP) { + if (!usebp) { + /* if RBP isn't being set up (leaf functions with no stack allocations), + * access thru RSP (function arguments in the stack) */ + mem.base = RSP; + mem.disp -= 8; + } else if (mem.disp <= 0) { + mem.disp += rbpoff; + } + } + if (mem.base != NOBASE) { + if (mem.index == NOINDEX && mem.shift == 0) sib = 0; + else sib = 1; + mod = !mem.disp ? 0 /* disp = 0 -> mod = 00 */ + : (uint)(mem.disp + 128) < 256 ? 1 /* disp8 -> mod = 01 */ + : 2; /* disp32 -> mod = 10 */ + if (mod == 0 && (mem.base == RBP || mem.base == R13)) mod = 1; + if (mem.base == RSP || mem.base == R12) sib = 1; + } else { + /* [disp + (index*s)] */ + sib = 1; + mem.base = RBP; + mod = 0; + assert(mem.index != RSP); + } + D(opc, nopc); + B(mod << 6 | (reg & 7) << 3 | (sib ? 4 : (mem.base & 7))); + if (sib) { + if (mem.index == NOINDEX) mem.index = RSP; + B(mem.shift << 6 | (mem.index & 7) << 3 | (mem.base & 7)); + } + if (mod == 1) B(mem.disp); + else if (mod == 2 || (mod == 0 && mem.base == RBP/*RIP-rel*/) || (mod == 0 && sib && mem.base == RBP/*absolute*/)) { + I32(mem.disp); + } + } + if (en->operenc == EN_MI8) B(src.imm); + if (en->operenc == EN_MI16) I16(src.imm); + if (en->operenc == EN_MI32) I32(src.imm); + break; + case EN_R: case EN_RI32: case EN_RI8: + rex |= (dst.reg >> 3) << 0; /* REX.B */ + if (rex) B(0x40 | rex); + else if (en->r8 && in_range(dst.reg, RSP, RDI)) { + /* /r8 needs REX to encode SP,BP,SI,DI (otherwise -> AH..BH) */ + B(0x40); + } + D(opc, nopc); + B(0300 | en->ext << 3 | (dst.reg & 7)); + if (en->operenc == EN_RI32) + I32(src.imm); + else if (en->operenc == EN_RI8) + B(src.imm); + break; + case EN_OI: + rex |= (dst.reg >> 3) << 0; /* REX.B */ + if (rex) B(0x40 | rex); + B(*opc++ + (dst.reg & 7)); + D(opc, nopc - 1); + I32(src.imm); + break; + case EN_I8: + if (rex) B(0x40 | rex); + D(opc, nopc); + B(src.imm); + break; + case EN_I32: + if (rex) B(0x40 | rex); + D(opc, nopc); + I32(src.imm); + break; + case EN_R32: + if (rex) B(0x40 | rex); + D(opc, nopc); + assert(dst.t == OSYM); + const char *sym = xcon2sym(dst.con); + if (sym != curfnsym) { + enum relockind r = (ccopt.pie|ccopt.pic) ? REL_PLT32 : REL_PCREL32; + objreloc(sym, r, Stext, *pcode - objout.textbegin, -4); + I32(0); + } else { + /* self-recursive call */ + I32(fnstart - *pcode - 4); + } + break; + } +} + +#define DEFINSTR1(X, ...) \ + static void \ + X(uchar **pcode, enum irclass k, struct oper oper) \ + { \ + static const struct desc tab[] = { __VA_ARGS__ }; \ + encode(pcode, tab, countof(tab), k, oper, mkoper(0,)); \ + } + +#define DEFINSTR2(X, ...) \ + static void \ + X(uchar **pcode, enum irclass k, struct oper dst, struct oper src) \ + { \ + static const struct desc tab[] = { __VA_ARGS__ }; \ + encode(pcode, tab, countof(tab), k, dst, src); \ + } + +#define O(s) (sizeof s)-1,s +DEFINSTR2(Xmovb, + {-1, PMEM, PGPR, O("\x88"), EN_MR, .r8=1}, /* MOV m8, r8 */ + {-1, PMEM, PI8, O("\xC6"), EN_MI8, .r8=1}, /* MOV m8, imm8 */ + {-1, PMEM, PU8, O("\xC6"), EN_MI8, .r8=1}, /* MOV m8, imm8 */ +) +DEFINSTR2(Xmovw, + {-1, PMEM, PGPR, O("\x66\x89"), EN_MR}, /* MOV m16, r16 */ + {-1, PMEM, PI16, O("\x66\xC7"), EN_MI16}, /* MOV m16, imm16 */ + {-1, PMEM, PU16, O("\x66\xC7"), EN_MI16}, /* MOV m16, imm16 */ +) +static void Xmov(uchar **pcode, enum irclass k, struct oper dst, struct oper src) +{ + static const struct desc all[] = { + {4 , PGPR, PI32, O("\xB8"), EN_OI}, /* MOV r32, imm */ + {4|8, PGPR, PGPR, O("\x8B"), EN_RR}, /* MOV r32/64, r32/64 */ + {4|8, PMEM, PGPR, O("\x89"), EN_MR}, /* MOV m32/64, r32/64 */ + {4|8, PGPR, PMEM, O("\x8B"), EN_RM}, /* MOV r32/64, m32/64 */ + {4|8, PMEM, PI32, O("\xC7"), EN_MI32}, /* MOV m32/64, imm */ + { 8, PGPR, PU32, O("\xB8"), EN_OI, .norexw=1}, /* MOV r64, uimm */ + { 8, PGPR, PI32, O("\xC7"), EN_RI32}, /* MOV r64, imm */ + {4 , PFPR, PFPR, O("\x0F\x28"), EN_RR}, /* MOVPS xmm, xmm */ + {4 , PFPR, PMEM, O("\xF3\x0F\x10"), EN_RM}, /* MOVSS xmm, m32 */ + {4 , PMEM, PFPR, O("\xF3\x0F\x11"), EN_MR}, /* MOVSS m32, xmm */ + {8 , PFPR, PFPR, O("\x0F\x28"), EN_RR}, /* MOVPS xmm, xmm */ + {8 , PFPR, PMEM, O("\xF2\x0F\x10"), EN_RM}, /* MOVSD xmm, m64 */ + {8 , PMEM, PFPR, O("\xF2\x0F\x11"), EN_MR}, /* MOVSS m64, xmm */ + {4|8, PFPR, PGPR, O("\x66\x0F\x6E"), EN_RR}, /* MOVD/Q xmm, r64/32 */ + {4|8, PGPR, PFPR, O("\x66\x0F\x7E"), EN_RRX}, /* MOVD/Q r64/32, xmm */ + }; + static const uchar k2off[] = { + [KI32] = 0, + [KI64] = 1, [KPTR] = 1, + [KF32] = 7, + [KF64] = 10, + }; + encode(pcode, all + k2off[k], countof(all) - k2off[k], k, dst, src); +} +DEFINSTR2(Xmovsxl, + {8, PGPR, PMEM, O("\x63"), EN_RM}, /* MOVSXD r64, m32 */ + {8, PGPR, PGPR, O("\x63"), EN_RR}, /* MOVSXD r64, r32 */ + {4, PGPR, PMEM, O("\x8B"), EN_RM}, /* MOV r32, m32 */ + {4, PGPR, PGPR, O("\x8B"), EN_RR}, /* MOV r32, r32 */ +) +DEFINSTR2(Xmovsxw, + {4|8, PGPR, PMEM, O("\x0F\xBF"), EN_RM}, /* MOVSX r64, m16 */ + {4|8, PGPR, PGPR, O("\x0F\xBF"), EN_RR}, /* MOVSX r64, r16 */ +) +DEFINSTR2(Xmovsxb, + {4|8, PGPR, PMEM, O("\x0F\xBE"), EN_RM}, /* MOVSX r64, m8 */ + {4|8, PGPR, PGPR, O("\x0F\xBE"), EN_RR, .r8=1}, /* MOVSX r64, r8 */ +) +DEFINSTR2(Xmovzxw, + {4|8, PGPR, PMEM, O("\x0F\xB7"), EN_RM}, /* MOVZX r64, m16 */ + {4|8, PGPR, PGPR, O("\x0F\xB7"), EN_RR}, /* MOVZX r64, r16 */ +) +DEFINSTR2(Xmovzxb, + {4|8, PGPR, PMEM, O("\x0F\xB6"), EN_RM}, /* MOVZX r64, m8 */ + {4|8, PGPR, PGPR, O("\x0F\xB6"), EN_RR, .r8=1}, /* MOVZX r64, r8 */ +) +DEFINSTR2(Xmovaps, + {-1, PMEM, PFPR, O("\x0F\x29"), EN_MR}, /* MOVAPS mem, xmm */ +) +DEFINSTR2(Xxchg, + {4|8, PGPR, PGPR, O("\x87"), EN_RR}, /* XCHG r32/64, r32/64 */ + {4|8, PGPR, PMEM, O("\x87"), EN_RM}, /* XCHG r32/64, m32/64 */ + {4|8, PMEM, PGPR, O("\x87"), EN_MR}, /* XCHG r32/64, m32/64 */ +) +DEFINSTR2(Xlea, + {4|8, PGPR, PMEM, O("\x8D"), EN_RM}, /* LEA r32/64,m32/64 */ + { 8, PGPR, PSYM, O("\x8D"), EN_RM}, /* LEA rel32 */ +) +DEFINSTR2(Xadd, + {4|8, PGPR, PGPR, O("\x03"), EN_RR}, /* ADD r32/64, r32/64 */ + {4|8, PGPR, P1, O("\xFF"), EN_R, .ext=0}, /* INC r32/64 */ + {4|8, PGPR, PN1, O("\xFF"), EN_R, .ext=1}, /* DEC r32/64 */ + {4|8, PGPR, PI8, O("\x83"), EN_RI8}, /* ADD r32/64, imm8 */ + {4|8, PRAX, PI32, O("\x05"), EN_I32}, /* ADD eax/rax, imm */ + {4|8, PGPR, PI32, O("\x81"), EN_RI32}, /* ADD r32/64, imm */ + { 8, PGPR, PMEM, O("\x03"), EN_RM}, /* ADD r64, m64 */ +) +DEFINSTR2(Xaddf, + {4, PFPR, PFPR, O("\xF3\x0F\x58"), EN_RR}, /* ADDSS xmm, xmm */ + {8, PFPR, PFPR, O("\xF2\x0F\x58"), EN_RR}, /* ADDSD xmm, xmm */ + {4, PFPR, PMEM, O("\xF3\x0F\x58"), EN_RM}, /* ADDSS xmm, m32 */ + {8, PFPR, PMEM, O("\xF2\x0F\x58"), EN_RM}, /* ADDSD xmm, m64 */ +) +DEFINSTR2(Xsub, + {4|8, PGPR, PGPR, O("\x2B"), EN_RR}, /* SUB r32/64, r32/64 */ + {4|8, PGPR, P1, O("\xFF"), EN_R, .ext=1}, /* DEC r32/64 */ + {4|8, PGPR, PN1, O("\xFF"), EN_R, .ext=0}, /* INC r32/64 */ + {4|8, PGPR, PI8, O("\x83"), EN_RI8, .ext=5}, /* SUB r32/64, imm8 */ + {4|8, PRAX, PI32, O("\x2D"), EN_I32}, /* SUB eax/rax, imm */ + {4|8, PGPR, PI32, O("\x81"), EN_RI32, .ext=5}, /* SUB r32/64, imm */ + { 8, PGPR, PMEM, O("\x2B"), EN_RM}, /* SUB r64, m64 */ +) +DEFINSTR2(Xsubf, + {4, PFPR, PFPR, O("\xF3\x0F\x5C"), EN_RR}, /* SUBSS xmm, xmm */ + {8, PFPR, PFPR, O("\xF2\x0F\x5C"), EN_RR}, /* SUBSD xmm, xmm */ + {4, PFPR, PMEM, O("\xF3\x0F\x5C"), EN_RM}, /* SUBSS xmm, m32 */ + {8, PFPR, PMEM, O("\xF2\x0F\x5C"), EN_RM}, /* SUBSD xmm, m64 */ +) +DEFINSTR2(Xmulf, + {4, PFPR, PFPR, O("\xF3\x0F\x59"), EN_RR}, /* MULSS xmm, xmm */ + {8, PFPR, PFPR, O("\xF2\x0F\x59"), EN_RR}, /* MULSD xmm, xmm */ + {4, PFPR, PMEM, O("\xF3\x0F\x59"), EN_RM}, /* MULSS xmm, m32 */ + {8, PFPR, PMEM, O("\xF2\x0F\x59"), EN_RM}, /* MULSD xmm, m64 */ +) +DEFINSTR2(Xdivf, + {4, PFPR, PFPR, O("\xF3\x0F\x5E"), EN_RR}, /* DIVSS xmm, xmm */ + {8, PFPR, PFPR, O("\xF2\x0F\x5E"), EN_RR}, /* DIVSD xmm, xmm */ + {4, PFPR, PMEM, O("\xF3\x0F\x5E"), EN_RM}, /* DIVSS xmm, m32 */ + {8, PFPR, PMEM, O("\xF2\x0F\x5E"), EN_RM}, /* DIVSD xmm, m64 */ +) +DEFINSTR2(Xand, + {4|8, PGPR, PGPR, O("\x23"), EN_RR}, /* AND r32/64, r32/64 */ + {4|8, PGPR, PI8, O("\x83"), EN_RI8, .ext=4}, /* AND r32/64, imm8 */ + {4|8, PRAX, PI32, O("\x25"), EN_I32}, /* AND eax/rax, imm */ + {4|8, PGPR, PI32, O("\x81"), EN_RI32, .ext=4}, /* AND r32/64, imm */ + { 8, PGPR, PMEM, O("\x23"), EN_RM}, /* AND r64, m64 */ +) +DEFINSTR2(Xior, + {4|8, PGPR, PGPR, O("\x0B"), EN_RR}, /* OR r32/64, r32/64 */ + {4|8, PGPR, PI8, O("\x83"), EN_RI8, .ext=1}, /* OR r32/64, imm8 */ + {4|8, PRAX, PI32, O("\x0D"), EN_I32}, /* OR eax/rax, imm */ + {4|8, PGPR, PI32, O("\x81"), EN_RI32, .ext=1}, /* OR r32/64, imm */ + { 8, PGPR, PMEM, O("\x0B"), EN_RM}, /* OR r64, m64 */ + {4|8, PFPR, PFPR, O("\x0F\x57"), EN_RR}, /* ORPS xmm, xmm */ +) +DEFINSTR2(Xxor, + {4|8, PGPR, PGPR, O("\x33"), EN_RR}, /* XOR r32/64, r32/64 */ + {4|8, PGPR, PI8, O("\x83"), EN_RI8, .ext=6}, /* XOR r32/64, imm8 */ + {4|8, PRAX, PI32, O("\x35"), EN_I32}, /* XOR eax/rax, imm */ + {4|8, PGPR, PI32, O("\x81"), EN_RI32, .ext=6}, /* XOR r32/64, imm */ + { 8, PGPR, PMEM, O("\x33"), EN_RM}, /* XOR r64, m64 */ + {4|8, PFPR, PFPR, O("\x0F\x57"), EN_RR}, /* XORPS xmm, xmm */ + {4|8, PFPR, PMEM, O("\x0F\x57"), EN_RM}, /* XORPS xmm, m128 */ +) +DEFINSTR2(Xshl, + {4|8, PGPR, P1, O("\xD1"), EN_R, .ext=4}, /* SHL r32/64, 1 */ + {4|8, PGPR, PI32, O("\xC1"), EN_RI8, .ext=4}, /* SHL r32/64, imm */ + {4|8, PGPR, PRCX, O("\xD3"), EN_R, .ext=4}, /* SHL r32/64, CL */ +) +DEFINSTR2(Xsar, + {4|8, PGPR, P1, O("\xD1"), EN_R, .ext=7}, /* SAR r32/64, 1 */ + {4|8, PGPR, PI32, O("\xC1"), EN_RI8, .ext=7}, /* SAR r32/64, imm */ + {4|8, PGPR, PRCX, O("\xD3"), EN_R, .ext=7}, /* SAR r32/64, CL */ +) +DEFINSTR2(Xshr, + {4|8, PGPR, P1, O("\xD1"), EN_R, .ext=5}, /* SHR r32/64, 1 */ + {4|8, PGPR, PI32, O("\xC1"), EN_RI8, .ext=5}, /* SHR r32/64, imm */ + {4|8, PGPR, PRCX, O("\xD3"), EN_R, .ext=5}, /* SHR r32/64, CL */ +) +DEFINSTR2(Xcvtss2sd, + {-1, PFPR, PFPR, O("\xF3\x0F\x5A"), EN_RR}, /* CVTSS2SD xmm, xmm */ + {-1, PFPR, PMEM, O("\xF3\x0F\x5A"), EN_RM}, /* CVTSS2SD xmm, m32/64 */ +) +DEFINSTR2(Xcvtsd2ss, + {-1, PFPR, PFPR, O("\xF2\x0F\x5A"), EN_RR}, /* CVTSD2SS xmm, xmm */ + {-1, PFPR, PMEM, O("\xF2\x0F\x5A"), EN_RM}, /* CVTSD2SS xmm, m32/64 */ +) +DEFINSTR2(Xcvtsi2ss, + {-1, PFPR, PGPR, O("\xF3\x0F\x2A"), EN_RR}, /* CVTSI2SS xmm, r32/64 */ + {-1, PFPR, PMEM, O("\xF3\x0F\x2A"), EN_RM}, /* CVTSI2SS xmm, m32/64 */ +) +DEFINSTR2(Xcvtsi2sd, + {-1, PFPR, PGPR, O("\xF2\x0F\x2A"), EN_RR}, /* CVTSI2SD xmm, r32/64 */ + {-1, PFPR, PMEM, O("\xF2\x0F\x2A"), EN_RM}, /* CVTSI2SD xmm, m32/64 */ +) +DEFINSTR2(Xcvttss2si, + {-1, PGPR, PFPR, O("\xF3\x0F\x2C"), EN_RR}, /* CVTTSS2SI r32/64, xmm */ + {-1, PGPR, PMEM, O("\xF3\x0F\x2C"), EN_RM}, /* CVTTSS2SI r32/64, m32 */ +) +DEFINSTR2(Xcvttsd2si, + {-1, PGPR, PFPR, O("\xF2\x0F\x2C"), EN_RR}, /* CVTTSD2SI r32/64, xmm */ + {-1, PGPR, PMEM, O("\xF2\x0F\x2C"), EN_RM}, /* CVTTSD2SI r32/64, m32 */ +) +DEFINSTR1(Xneg, + {4|8, PGPR, 0, O("\xF7"), EN_R, .ext=3} /* NEG r32/64 */ +) +DEFINSTR1(Xnot, + {4|8, PGPR, 0, O("\xF7"), EN_R, .ext=2} /* NOT r32/64 */ +) +DEFINSTR1(Xidiv, + {4|8, PGPR, 0, O("\xF7"), EN_R, .ext=7}, /* IDIV r32/64 */ + {4|8, PMEM, 0, O("\xF7"), EN_M, .ext=7}, /* IDIV m32/64 */ +) +DEFINSTR1(Xdiv, + {4|8, PGPR, 0, O("\xF7"), EN_R, .ext=6}, /* DIV r32/64 */ + {4|8, PMEM, 0, O("\xF7"), EN_M, .ext=6}, /* DIV m32/64 */ +) +DEFINSTR1(Xcall, + {-1, PSYM, 0, O("\xE8"), EN_R32, .norexw=1}, /* CALL rel32 */ + {-1, PGPR, 0, O("\xFF"), EN_R, .ext=2, .norexw=1}, /* CALL r64 */ + {-1, PMEM, 0, O("\xFF"), EN_M, .ext=2, .norexw=1}, /* CALL m64 */ +) +DEFINSTR2(Xcmp, + {4|8, PGPR, PGPR, O("\x3B"), EN_RR}, /* CMP r32/64, r32/64 */ + {4|8, PGPR, PI8, O("\x83"), EN_RI8, .ext=7}, /* CMP r32/64, imm8 */ + {4|8, PRAX, PI32, O("\x3D"), EN_I32}, /* CMP eax/rax, imm */ + {4|8, PGPR, PI32, O("\x81"), EN_RI32, .ext=7}, /* CMP r32/64, imm */ + { 8, PGPR, PMEM, O("\x3B"), EN_RM}, /* CMP r64, m64 */ + {4 , PFPR, PFPR, O("\x0F\x2E"), EN_RR}, /* UCOMISS xmm, xmm */ + {4 , PFPR, PMEM, O("\x0F\x2E"), EN_RM}, /* UCOMISS xmm, m32 */ + { 8, PFPR, PFPR, O("\x66\x0F\x2E"), EN_RR}, /* UCOMISD xmm, xmm */ + { 8, PFPR, PMEM, O("\x66\x0F\x2E"), EN_RM}, /* UCOMISD xmm, m64 */ +) +DEFINSTR2(Xtest, + {4|8, PRAX, PI8, O("\xA8"), EN_I8}, /* TEST AL, imm8 */ + {4, PRAX, PI32, O("\xA9"), EN_I32}, /* TEST EAX, imm32 */ + { 8, PRAX, PU32, O("\xA9"), EN_I32}, /* TEST EAX, imm32 */ + { 8, PRAX, PI32, O("\xA9"), EN_I32}, /* TEST RAX, imm32 */ + {4|8, PGPR, PI8, O("\xF6"), EN_RI8, .r8=1,.norexw=1}, /* TEST r8, imm8 */ + {4|8, PGPR, PI32, O("\xF7"), EN_RI32, .ext=0}, /* TEST r32/64, imm32 */ + {4|8, PGPR, PGPR, O("\x85"), EN_RR}, /* TEST r32/64, r32/64 */ + {4|8, PGPR, PMEM, O("\x85"), EN_RM}, /* TEST r32/64, m32/64 */ +) + +DEFINSTR2(Ximul2, + {4|8, PGPR, PGPR, O("\x0F\xAF"), EN_RR}, /* IMUL r32/64, r32/64 */ + {4|8, PGPR, PMEM, O("\x0F\xAF"), EN_RM}, /* IMUL r32/64, m32/64 */ +) +static const struct desc imul3_imm8tab[] = { + {4|8, PGPR, PGPR, O("\x6B"), EN_RR}, /* IMUL r32/64, r32/64, (imm8) */ + {4|8, PGPR, PMEM, O("\x6B"), EN_RM}, /* IMUL r32/64, m32/64, (imm8) */ +}, imul3_imm32tab[] = { + {4|8, PGPR, PGPR, O("\x69"), EN_RR}, /* IMUL r32/64, r32/64, (imm32) */ + {4|8, PGPR, PMEM, O("\x69"), EN_RM}, /* IMUL r32/64, m32/64, (imm32) */ +}; +#undef O +static void +Ximul(uchar **pcode, enum irclass k, struct oper dst, struct oper s1, struct oper s2) +{ + if (!memcmp(&dst, &s1, sizeof dst) && s2.t != OIMM) { + Ximul2(pcode, k, dst, s2); + return; + } + assert(s2.t == OIMM); + if ((uint)(s2.imm + 128) < 256) { + encode(pcode, imul3_imm8tab, countof(imul3_imm8tab), k, dst, s1); + B(s2.imm); + } else { + encode(pcode, imul3_imm32tab, countof(imul3_imm32tab), k, dst, s1); + I32(s2.imm); + } +} + +enum cc { + CCO = 0x0, /* OF = 1*/ + CCNO = 0x1, /* OF = 0*/ + CCB = 0x2, CCC = 0x2, CCNAE = 0x2, /* below; CF = 1; not above or equal */ + CCAE = 0x3, CCNB = 0x3, CCNC = 0x3, /* above or equal; not below; CF = 0 */ + CCE = 0x4, CCZ = 0x4, /* equal; ZF = 1 */ + CCNE = 0x5, CCNZ = 0x5, /* not equal; ZF = 0 */ + CCBE = 0x6, CCNA = 0x6, /* below or equal; not above; CF=1 or ZF=1 */ + CCA = 0x7, CCNBE = 0x7, /* above; not below or equal; CF=0 and ZF=0 */ + CCS = 0x8, /* ZS = 1; negative */ + CCNS = 0x9, /* ZS = 0; non-negative */ + CCP = 0xA, CCPE = 0xA, /* PF = 1; parity even */ + CCNP = 0xB, CCPO = 0xB, /* PF = 0; parity odd */ + CCL = 0xC, CCNGE = 0xC, /* lower; not greater or equal; SF != OF */ + CCGE = 0xD, CCNL = 0xD, /* greater or equal; not lower; SF == OF */ + CCLE = 0xE, CCNG = 0xE, /* less or equal; not greater; ZF=1 or SF != OF */ + CCG = 0xF, CCNLE = 0xF, /* greater; not less or equal; ZF=0 and SF = OF*/ + ALWAYS, +}; + +/* maps blk -> address when resolved; or to linked list of jump displacement + * relocations */ +static struct blkaddr { + bool resolved; + union { + uint addr; + uint relreloc; + }; +} *blkaddr; +static uint nblkaddr; + +static void +Xjcc(uchar **pcode, enum cc cc, struct block *dst) +{ + int disp, insaddr = *pcode - objout.textbegin; + bool rel8 = 0; + + if (blkaddr[dst->id].resolved) { + disp = blkaddr[dst->id].addr - (insaddr + 2); + if ((uint)(disp + 128) < 256) /* can use 1-byte displacement? */ + rel8 = 1; + else { /* otherwise 4-byte displacement */ + disp -= 3; + disp -= cc != ALWAYS; /* 'Jcc rel32' has 2 opcode bytes */ + } + } else { + disp = blkaddr[dst->id].relreloc; + blkaddr[dst->id].relreloc = insaddr + 1 + (cc != ALWAYS); + } + if (cc == ALWAYS) { + B(rel8 ? 0xEB : 0xE9); /* JMP rel8/rel32 */ + } else { + assert(in_range(cc, 0, 0xF)); + if (rel8) B(0x70 + cc); /* Jcc rel8 */ + else B(0x0F), B(0x80 + cc); /* Jcc rel32 */ + } + if (rel8) B(disp); else I32(disp); +} + +static void +Xsetcc(uchar **pcode, enum cc cc, enum reg reg) +{ + int rex = 0; + assert(in_range(cc, 0x0, 0xF)); + assert(in_range(reg, RAX, R15)); + + if (in_range(reg, RSP, RDI)) rex = 0x40; + rex |= (reg >> 3); /* REX.B */ + if (rex) B(rex | 0x40); + B(0x0F), B(0x90+cc); /* SETcc */ + B(0xC0 + (reg & 7)); /* ModR/M with mod=11, rm=reg */ +} + +static void +Xpush(uchar **pcode, enum reg reg) +{ + if (in_range(reg, RAX, R15)) { + if (reg >> 3) B(0x41); /* REX.B */ + B(0x50 + (reg & 7)); /* PUSH reg */ + } else { + assert(in_range(reg, XMM0, XMM15)); + DS("\x48\x8d\x64\x24\xF8"); /* LEA RSP, [RSP-8] */ + Xmov(pcode, KF64, mkoper(OMEM, .base = RSP, .index = NOINDEX), reg2oper(reg)); /* MOVD [rsp],xmm0 */ + } +} + +static void +Xpop(uchar **pcode, enum reg reg) +{ + if (in_range(reg, RAX, R15)) { + if (reg >> 3) B(0x41); /* REX.B */ + B(0x58 + (reg & 7)); /* POP reg */ + } else { + assert(in_range(reg, XMM0, XMM15)); + Xmov(pcode, KF64, reg2oper(reg), mkoper(OMEM, .base = RSP, .index = NOINDEX)); /* MOVD xmm0,[rsp] */ + DS("\x48\x8d\x64\x24\x08"); /* LEA RSP, [RSP+8] */ + } +} + +/* are flags live at given instruction? */ +static bool +flagslivep(struct block *blk, int curi) +{ + int cmpi; + /* conditional branch that references a previous comparison instruction? */ + if (blk->jmp.t != Jb || !blk->jmp.arg[0].bits) + return 0; + assert(blk->jmp.arg[0].t == RTMP); + cmpi = blk->jmp.arg[1].i; + for (int i = blk->ins.n - 1; i > curi; --i) { + if (blk->ins.p[i] == cmpi) + /* flags defined after given instruction, dead here */ + return 0; + } + /* flags defined before given instruction, live here */ + return 1; +} + +/* Copy dst = val, with some peephole optimizations */ +static void +gencopy(uchar **pcode, enum irclass cls, struct block *blk, int curi, struct oper dst, union ref val) +{ + assert(dst.t == OREG); + if (val.bits == UNDREF.bits) { + /* can be generated by ssa construction, since value is undefined no move is needed */ + return; + } + if (val.t == RADDR) { + /* this is a LEA, but maybe it can be lowered to a 2-address instruction, + * which may clobber flags */ + const struct addr *addr = &addrht[val.i]; + if (flagslivep(blk, curi)) goto Lea; + if (addr->base.t != RREG) goto Lea; + if (addr->base.bits && dst.reg == mkregoper(addr->base).reg) { /* base = dst */ + if (addr->index.bits && !addr->disp && !addr->shift){ + /* lea Rx, [Rx + Ry] -> add Rx, Ry */ + Xadd(pcode, cls, dst, mkregoper(addr->index)); + return; + } else if (!addr->index.bits) { + if (!addr->disp) /* lea Rx, [Rx] -> mov Rx, Rx */ + Xmov(pcode, cls, dst, dst); + else /* lea Rx, [Rx + Imm] -> add Rx, Imm */ + Xadd(pcode, cls, dst, mkoper(OIMM, .imm = addr->disp)); + return; + } + } else if (addr->index.bits && dst.reg == mkregoper(addr->index).reg) { /* index = dst */ + if (addr->base.bits && !addr->disp && !addr->shift) { + /* lea Rx, [Ry + Rx] -> add Rx, Ry */ + Xadd(pcode, cls, dst, mkregoper(addr->base)); + return; + } else if (!addr->base.bits) { + if (!addr->disp && !addr->shift) /* lea Rx, [Rx] -> mov Rx, Rx */ + Xmov(pcode, cls, dst, dst); + else if (!addr->shift) /* lea Rx, [Rx + Imm] -> add Rx, Imm */ + Xadd(pcode, cls, dst, mkoper(OIMM, .imm = addr->disp)); + else if (!addr->disp) /* lea Rx, [Rx LSL s] -> shl Rx, s */ + Xshl(pcode, cls, dst, mkoper(OIMM, .imm = addr->shift)); + else + goto Lea; + return; + } + } + /* normal (not 2-address) case */ + Lea: + if (isaddrcon(addr->base,0) && ccopt.pic) { + assert(!addr->disp && !addr->index.bits); + val = addr->base; + goto GOTLoad; + } + Xlea(pcode, cls, dst, ref2oper(val)); + } else if (val.bits == ZEROREF.bits && dst.t == OREG && (kisflt(cls) || !flagslivep(blk, curi))) { + /* dst = 0 -> xor dst, dst; but only if it is ok to clobber flags */ + Xxor(pcode, kisint(cls) ? KI32 : cls, dst, dst); + } else if (isaddrcon(val,0)) { + if (ccopt.pic) GOTLoad: + /* for mov reg, [rip(sym@GOTPCREL)] */ + Xmov(pcode, cls, dst, mkoper(OSYM, .con = val.i, .cindex = NOINDEX)); + else + /* for lea reg, [rip(sym)] */ + Xlea(pcode, cls, dst, mkoper(OSYM, .con = val.i, .cindex = NOINDEX)); + } else if (val.t == RXCON && in_range(concls(val), KI64, KPTR)) { + /* movabs */ + assert(dst.t == OREG && in_range(dst.reg, RAX, R15)); + B(0x48 | (dst.reg >> 3)); /* REX.W (+ REX.B) */ + B(0xB8 + (dst.reg & 0x7)); /* MOVABS r64, */ + wr64le(*pcode, intconval(val)); /* imm64 */ + *pcode += 8; + } else { + struct oper src = mkimmdatregoper(val); + if (memcmp(&dst, &src, sizeof dst) != 0) + Xmov(pcode, cls == KF64 && src.t == OREG && src.reg < XMM0 ? KI64 : cls, dst, src); + } +} + +static void +Xvaprologue(uchar **pcode, struct function *fn, struct oper sav) +{ + uint gpr0 = 0, fpr0 = 0, jmpaddr; + for (int i = 0; i < fn->nabiarg; ++i) { + struct abiarg abi = fn->abiarg[i]; + if (!abi.isstk) { + if (abi.reg < XMM0) ++gpr0; + else ++fpr0; + } + } + assert(sav.t == OMEM && sav.base == RBP); + /* save GPRS */ + for (int r = 0; r < 6; ++r) { + static const char reg[] = {RDI,RSI,RDX,RCX,R8,R9}; + if (r >= gpr0) + Xmov(pcode, KI64, sav, reg2oper(reg[r])); + sav.disp += 8; + } + + /* save FPRs, but only if al is non zero */ + if (fpr0 < 8) { + DS("\x84\xC0"); /* TEST al,al */ + jmpaddr = *pcode - objout.textbegin; + DS("\x74\xFE"); /* JE rel8 */ + } + for (int r = 0; r < 8; ++r) { + if (r >= fpr0) + Xmovaps(pcode, KF64, sav, reg2oper(XMM0 + r)); + sav.disp += 16; + } + if (fpr0 < 8) {/* patch relative jump */ + int off = (*pcode - objout.textbegin) - jmpaddr - 2; + objout.textbegin[jmpaddr+1] = off; + } +} + +/* condition code for CMP */ +static const uchar icmpop2cc[] = { + [Oequ] = CCE, [Oneq] = CCNE, + [Olth] = CCL, [Ogth] = CCG, [Olte] = CCLE, [Ogte] = CCGE, + [Oulth] = CCB, [Ougth] = CCA, [Oulte] = CCBE, [Ougte] = CCAE, + [Oand] = CCNE, [Osub] = CCNE, +}, fcmpop2cc[] = { + [Oequ] = CCE, [Oneq] = CCNE, + [Olth] = CCB, [Ogth] = CCA, [Olte] = CCBE, [Ogte] = CCAE, +}; +/* condition code for TEST reg,reg (compare with zero) */ +static const uchar icmpzero2cc[] = { + [Oequ] = CCE, [Oulte] = CCE, + [Oneq] = CCNE, [Ougth] = CCNE, + [Olth] = CCS, [Ogte] = CCNS, + [Olte] = CCLE, [Ogth] = CCG, + [Oulth] = CCB, [Ougte] = CCAE, /* actually constants */ +}; + +static void +emitinstr(uchar **pcode, struct function *fn, struct block *blk, int curi, struct instr *ins) +{ + struct oper dst, src; + bool regzeroed; + enum irclass cls = ins->cls; + void (*X)(uchar **, enum irclass, struct oper, struct oper) = NULL; + void (*X1)(uchar **, enum irclass, struct oper) = NULL; + + switch (ins->op) { + default: + fatal(NULL, "x86_64: in %y; unimplemented instr '%s'", fn->name, opnames[ins->op]); + case Onop: break; + case Ostore8: cls = KI32, X = Xmovb; goto Store; + case Ostore16: cls = KI32, X = Xmovw; goto Store; + case Ostore32: cls = KI32, X = Xmov; goto Store; + case Ostore64: cls = KI64, X = Xmov; + Store: + src = mkimmregoper(ins->r); + if (cls == KI32 && src.t == OREG && src.reg >= XMM0) cls = KF32; + if (cls == KI64 && src.t == OREG && src.reg >= XMM0) cls = KF64; + X(pcode, cls, mkmemoper(ins->l), src); + break; + case Oexts8: src = mkregoper(ins->l); goto Movsxb; + case Oextu8: src = mkregoper(ins->l); goto Movzxb; + case Oexts16: src = mkregoper(ins->l); goto Movsxw; + case Oextu16: src = mkregoper(ins->l); goto Movzxw; + case Oexts32: src = mkregoper(ins->l); goto Movsxl; + case Oextu32: src = mkregoper(ins->l); goto Movzxl; + case Oloads8: src = mkmemoper(ins->l); Movsxb: Xmovsxb(pcode, cls, reg2oper(ins->reg-1), src); break; + case Oloadu8: src = mkmemoper(ins->l); Movzxb: Xmovzxb(pcode, cls, reg2oper(ins->reg-1), src); break; + case Oloads16: src = mkmemoper(ins->l); Movsxw: Xmovsxw(pcode, cls, reg2oper(ins->reg-1), src); break; + case Oloadu16: src = mkmemoper(ins->l); Movzxw: Xmovzxw(pcode, cls, reg2oper(ins->reg-1), src); break; + case Oloads32: src = mkmemoper(ins->l); Movsxl: Xmovsxl(pcode, cls, reg2oper(ins->reg-1), src); break; + case Oloadu32: src = mkmemoper(ins->l); Movzxl: Xmov(pcode, KI32, reg2oper(ins->reg-1), src); break; + case Oloadf32: case Oloadf64: Xmov(pcode, cls, reg2oper(ins->reg-1), mkmemoper(ins->l)); break; + case Oloadi64: Xmov(pcode, KI64, reg2oper(ins->reg-1), mkmemoper(ins->l)); break; + case Ocvtf32f64: X = Xcvtss2sd; goto FloatsCvt; + case Ocvtf64f32: X = Xcvtsd2ss; goto FloatsCvt; + case Ocvtf32s: X = Xcvttss2si; goto FloatsCvt; + case Ocvtf64s: X = Xcvttsd2si; goto FloatsCvt; + case Ocvts32f: X = cls == KF32 ? Xcvtsi2ss : Xcvtsi2sd; cls = KI32; goto FloatsCvt; + case Ocvts64f: X = cls == KF32 ? Xcvtsi2ss : Xcvtsi2sd; cls = KI64; goto FloatsCvt; + FloatsCvt: + X(pcode, cls, reg2oper(ins->reg-1), mkdatregoper(ins->l)); + break; + case Oadd: + dst = mkregoper(ins->l); + if (kisflt(cls)) { + Xaddf(pcode, cls, dst, mkimmdatregoper(ins->r)); + } else if (ins->reg-1 == dst.reg) { /* two-address add */ + src = ref2oper(ins->r); + if (src.t == OIMM && src.imm < 0) /* ADD -imm -> SUB imm, for niceness */ + Xsub(pcode, cls, dst, (src.imm = -src.imm, src)); + else + Xadd(pcode, cls, dst, src); + } else if (isregref(ins->r) && ins->reg-1 == mkregoper(ins->r).reg) { + /* also two-address after swapping operands */ + Xadd(pcode, cls, reg2oper(ins->reg-1), mkimmdatregoper(ins->l)); + } else { /* three-address add (lea) */ + struct oper mem = { OMEM, .base = NOBASE, .index = NOINDEX }; + dst = reg2oper(ins->reg-1); + addmemoper(&mem, ref2oper(ins->l)); + addmemoper(&mem, ref2oper(ins->r)); + Xlea(pcode, cls, dst, mem); + } + break; + case Osub: + dst = mkregoper(ins->l); + if (kisflt(cls)) { + Xsubf(pcode, cls, dst, mkimmdatregoper(ins->r)); + } else if (ins->reg-1 == dst.reg) { /* two-address */ + Xsub(pcode, cls, dst, ref2oper(ins->r)); + } else { + assert(isintcon(ins->r)); + Xlea(pcode, cls, reg2oper(ins->reg-1), + mkoper(OMEM, .base = mkregoper(ins->l).reg, .index = NOINDEX, .disp = -intconval(ins->r))); + } + break; + case Oshl: X = Xshl; goto ALU2; + case Osar: X = Xsar; goto ALU2; + case Oslr: X = Xshr; goto ALU2; + case Oand: + if (!ins->reg) { + Xtest(pcode, cls, mkregoper(ins->l), mkimmdatregoper(ins->r)); + break; + } + X = Xand; + goto ALU2; + case Oxor: X = Xxor; goto ALU2; + case Oior: X = Xior; goto ALU2; + ALU2: + dst = mkregoper(ins->l); + assert(ins->reg-1 == dst.reg); + X(pcode, cls, dst, mkimmdatregoper(ins->r)); + break; + case Oneg: X1 = Xneg; goto ALU1; + case Onot: X1 = Xnot; goto ALU1; + ALU1: + dst = mkregoper(ins->l); + assert(ins->reg-1 == dst.reg); + X1(pcode, cls, dst); + break; + case Omul: + if (kisint(cls)) + Ximul(pcode, cls, reg2oper(ins->reg-1), ref2oper(ins->l), ref2oper(ins->r)); + else + Xmulf(pcode, cls, reg2oper(ins->reg-1), ref2oper(ins->r)); + break; + case Odiv: + switch (cls) { + default: assert(0); + case KPTR: + case KI64: B(0x48); /* REX.W */ + case KI32: B(0x99); /* CDQ/CQO */ + assert(mkregoper(ins->l).reg == RAX); + Xidiv(pcode, cls, mkdatregoper(ins->r)); + break; + case KF32: case KF64: + Xdivf(pcode, cls, reg2oper(ins->reg-1), mkdatregoper(ins->r)); + break; + } + break; + case Oudiv: + DS("\x31\xD2"); /* XOR EDX,EDX */ + assert(mkregoper(ins->l).reg == RAX); + Xdiv(pcode, cls, mkdatregoper(ins->r)); + break; + case Oequ: case Oneq: + case Olth: case Ogth: case Olte: case Ogte: + case Oulth: case Ougth: case Oulte: case Ougte: + dst = mkregoper(ins->l); + src = ref2oper(ins->r); + regzeroed = 0; + if (ins->reg && dst.reg != ins->reg-1 && (src.t != OREG || src.reg != ins->reg-1)) { + /* can zero output reg before test instruction (differs from both inputs) */ + /* XXX this doesn't check if a source operand is an addr containing the register */ + struct oper dst = reg2oper(ins->reg-1); + Xxor(pcode, KI32, dst, dst); + regzeroed = 1; + } + if (kisint(ins->cls) && ins->r.bits == ZEROREF.bits) + Xtest(pcode, cls, dst, dst); + else + Xcmp(pcode, cls, dst, src); + if (ins->reg) { + enum cc cc; + dst = reg2oper(ins->reg-1); + if (ins->r.bits != ZEROREF.bits) { /* CMP */ + cc = (kisint(ins->cls) ? icmpop2cc : fcmpop2cc)[ins->op]; + } else { /* TEST r,r (CMP r, 0) */ + assert(kisint(ins->cls)); + cc = icmpzero2cc[ins->op]; + } + if (kisflt(ins->cls)) { /* handle float unordered result */ + int unordres = ins->op == Oneq ? 1 : 0; + int rex = 0; + if (in_range(dst.reg, RSP, RDI)) rex = 0x40; + rex |= (dst.reg >> 3); /* REX.B */ + int jpoff = 3 + (rex != 0); + if (regzeroed && unordres == 0) { + /* if cmp unordered, just jump over the SETcc; result reg was already zeroed */ + B(0x7A), B(jpoff); /* JP <off> */ + } else { + /* JNP .a + * MOV r8, 0/1 + * JMP .b + * .a: SETcc r8 + * .b: MOVZX r, r8 + */ + B(0x7B), B(jpoff+1); /* JNP <off> */ + if (rex) B(rex | 0x40); + B(0xB0 + (dst.reg & 7)), B(unordres); /* MOV r8, 0/1 */ + B(0xEB), B(jpoff); /* JMP <off> */ + } + } + Xsetcc(pcode, cc, dst.reg); + if (!regzeroed) + Xmovzxb(pcode, KI32, dst, dst); + } + break; + case Omove: + dst = ref2oper(ins->l); + gencopy(pcode, cls, blk, curi, dst, ins->r); + break; + case Ocopy: + dst = reg2oper(ins->reg-1); + gencopy(pcode, cls, blk, curi, dst, ins->l); + break; + case Oswap: + if (kisint(cls)) + Xxchg(pcode, cls, ref2oper(ins->l), mkregoper(ins->r)); + else { + struct oper l = mkregoper(ins->l), r = mkregoper(ins->r); + Xxor(pcode, cls, l, r); + Xxor(pcode, cls, r, l); + Xxor(pcode, cls, l, r); + } + break; + case Oxsave: + Xpush(pcode, mkregoper(ins->l).reg); + break; + case Oxrestore: + Xpop(pcode, mkregoper(ins->l).reg); + break; + case Ocall: + if (calltab.p[ins->r.i].vararg >= 0) { + struct call *call = &calltab.p[ins->r.i]; + /* variadic functions need the caller to write num of args in sse regs to %al */ + int n = 0; + for (int i = 0; i < call->narg; ++i) + if (!call->abiarg[i].isstk && call->abiarg[i].reg >= XMM0) + ++n; + if (!n) DS("\x31\xC0"); /* XOR EAX, EAX */ + else B(0xB0), B(n); /* MOV AL, n */ + } + Xcall(pcode, KPTR, ref2oper(ins->l)); + break; + case Oxvaprologue: + Xvaprologue(pcode, fn, mkmemoper(ins->l)); + break; + } +} + +static void +emitbranch(uchar **pcode, struct block *blk) +{ + enum cc cc = ALWAYS; + assert(blk->s1); + if (blk->s2) { + /* conditional branch.. */ + union ref arg = blk->jmp.arg[0]; + struct block *unord = NULL; + assert(arg.t == RTMP); + struct instr *ins = &instrtab[arg.i]; + if ((oiscmp(ins->op) || ins->op == Oand || ins->op == Osub)) { + if (ins->r.bits != ZEROREF.bits) { + /* for CMP instr */ + cc = (kisint(ins->cls) ? icmpop2cc : fcmpop2cc)[ins->op]; + unord = ins->op == Oneq ? blk->s1 : blk->s2; + } else { + assert(kisint(ins->cls)); + /* for TEST instr, which modifies ZF and SF and sets CF = OF = 0 */ + cc = icmpzero2cc[ins->op]; + } + } else { + /* implicit by ZF */ + cc = CCNZ; + } + if (kisflt(ins->cls)) { + /* handle float unordered result */ + Xjcc(pcode, CCP, unord); + } + if (blk->s1 == blk->lnext) { + /* if s1 is next adjacent block, swap s1,s2 and flip condition to emit a + * single jump */ + struct block *tmp = blk->s1; + blk->s1 = blk->s2; + blk->s2 = tmp; + cc ^= 1; + } + } + /* make sure to fallthru if jumping to next adjacent block */ + if (blk->s2 || blk->s1 != blk->lnext) + Xjcc(pcode, cc, blk->s1); + if (blk->s2 && blk->s2 != blk->lnext) + Xjcc(pcode, ALWAYS, blk->s2); +} + +static bool +calleesave(int *npush, uchar **pcode, struct function *fn) +{ + bool any = 0; + if (rstest(fn->regusage, RBX)) { + Xpush(pcode, RBX); + ++*npush; + any = 1; + } + for (int r = R12; r <= R15; ++r) + if (rstest(fn->regusage, r)) { + Xpush(pcode, r); + ++*npush; + any = 1; + } + return any; +} + +static void +calleerestore(uchar **pcode, struct function *fn) +{ + for (int r = R15; r >= R12; --r) + if (rstest(fn->regusage, r)) + Xpop(pcode, r); + if (rstest(fn->regusage, RBX)) Xpop(pcode, RBX); +} + +/* align code using NOPs */ +static void +nops(uchar **pcode, int align) +{ + int rem; + while ((rem = (*pcode - objout.textbegin) & (align - 1)) != 0) { + switch (align - rem) { + case 15: case 14: case 13: case 12: case 11: case 10: + case 9: B(0x66); + case 8: DS("\x0f\x1f\x84\x00\x00\x00\x00\x00"); break; + case 7: DS("\x0f\x1f\x80\x00\x00\x00\x00"); break; + case 6: B(0x66); + case 5: DS("\x0f\x1f\x44\x00\x00"); break; + case 4: DS("\x0f\x1f\x40\x00"); break; + case 3: DS("\x0f\x1f\00"); break; + case 2: B(0x66); + case 1: B(0x90); break; + } + } +} + +static void +emitbin(struct function *fn) +{ + struct block *blk; + uchar **pcode = &objout.code; + int npush = 0; + uint epilogueaddr = 0; + bool saverestore; + + if (nblkaddr < fn->nblk) { + blkaddr = xrealloc(blkaddr, fn->nblk * sizeof *blkaddr); + nblkaddr = fn->nblk; + } + memset(blkaddr, 0, nblkaddr * sizeof *blkaddr); + + nops(pcode, 16); + fnstart = *pcode; + curfnsym = fn->name; + + /** prologue **/ + + /* only use frame pointer in non-leaf functions and functions that use the stack */ + usebp = 0; + if (!fn->isleaf || fn->stksiz) { + usebp = 1; + /* push rbp; mov rbp, rsp */ + DS("\x55\x48\x89\xE5"); + } + saverestore = calleesave(&npush, pcode, fn); + if (usebp) rbpoff = -npush*8; + + /* ensure stack is 16-byte aligned for function calls */ + if (!fn->isleaf && ((fn->stksiz + npush*8) & 0xF) != 0) { + assert(usebp); + if ((rbpoff & 0xF) == 0) { + rbpoff -= 16; + fn->stksiz += 24; + } else { + rbpoff -= 8; + fn->stksiz += 8; + } + } + + if (fn->stksiz != 0) { + /* sub rsp, <stack size> */ + if (fn->stksiz < 128) + DS("\x48\x83\xEC"), B(fn->stksiz); + else if (fn->stksiz == 128) + DS("\x48\x83\xC4\x80"); /* add rsp, -128 */ + else + DS("\x48\x81\xEC"), I32(fn->stksiz); + } + + blk = fn->entry; + do { + struct blkaddr *bb = &blkaddr[blk->id]; + uint bbaddr = *pcode - objout.textbegin; + assert(!bb->resolved); + while (bb->relreloc) { + uint next; + int disp = bbaddr - bb->relreloc - 4; + + memcpy(&next, objout.textbegin + bb->relreloc, 4); + wr32le(objout.textbegin + bb->relreloc, disp); + bb->relreloc = next; + } + bb->resolved = 1; + bb->addr = bbaddr; + + for (int i = 0; i < blk->ins.n; ++i) { + emitinstr(pcode, fn, blk, i, &instrtab[blk->ins.p[i]]); + } + if (blk->jmp.t == Jret) { + /* epilogue */ + uint here = *pcode - fnstart; + if (epilogueaddr) { + int disp = epilogueaddr - (here + 2); + if ((uint)(disp + 128) < 256) {/* can use 1-byte displacement? */ + B(0xEB), B(disp); /* JMP rel8 */ + } else { + B(0xE9), I32(disp - 3); /* JMP rel32 */ + } + } else { + if (fn->stksiz && (saverestore || !usebp)) + Xadd(pcode, KPTR, mkoper(OREG, .reg = RSP), mkoper(OIMM, .imm = fn->stksiz)); + if (saverestore) { + epilogueaddr = here; + calleerestore(pcode, fn); + } + if (usebp) B(0xC9); /* leave */ + B(0xC3); /* ret */ + } + } else if (blk->jmp.t == Jtrap) { + DS("\x0F\x0B"); /* UD2 */ + } else emitbranch(pcode, blk); + } while ((blk = blk->lnext) != fn->entry); + objdeffunc(fn->name, fn->globl, fnstart - objout.textbegin, *pcode - fnstart); +} + +void +x86_64_emit(struct function *fn) +{ + fn->stksiz = alignup(fn->stksiz, 8); + if (fn->stksiz > 1<<24) error(NULL, "'%s' stack frame too big", fn->name); + emitbin(fn); +} + +/* vim:set ts=3 sw=3 expandtab: */ diff --git a/x86_64/isel.c b/x86_64/isel.c new file mode 100644 index 0000000..5d373f3 --- /dev/null +++ b/x86_64/isel.c @@ -0,0 +1,660 @@ +#include "all.h" +#include "../endian.h" + +enum flag { + ZF = 1 << 0, + SF = 1 << 1, + CF = 1 << 2, + OF = 1 << 3, + CLOBF = 1 << 4, +}; + +/* flags modified by each integer op */ +static const uchar opflags[NOPER] = { + [Oneg] = ZF|CLOBF, + [Oadd] = ZF|CLOBF, + [Osub] = ZF|CLOBF, + [Omul] = CLOBF, + [Odiv] = CLOBF, + [Oudiv] = CLOBF, + [Orem] = CLOBF, + [Ourem] = CLOBF, + [Oand] = ZF|CLOBF, + [Oior] = ZF|CLOBF, + [Oxor] = ZF|CLOBF, + [Oshl] = ZF|CLOBF, + [Osar] = ZF|CLOBF, + [Oslr] = ZF|CLOBF, + [Oequ] = ZF|CLOBF, + [Oneq] = ZF|CLOBF, + [Olth] = ZF|CLOBF, + [Ogth] = ZF|CLOBF, + [Olte] = ZF|CLOBF, + [Ogte] = ZF|CLOBF, + [Oulth] = ZF|CLOBF, + [Ougth] = ZF|CLOBF, + [Oulte] = ZF|CLOBF, + [Ougte] = ZF|CLOBF, + [Ocall] = CLOBF, +}; + +static int iflagsrc = -1; + +static void +picfixsym(union ref *r, struct block *blk, int *curi) +{ + if (!ccopt.pic || !isaddrcon(*r,0)) return; + *r = insertinstr(blk, (*curi)++, mkinstr(Ocopy, KPTR, .l = *r)); +} + +/* map alloca tmp -> stack frame displacement (0 if not alloca) */ +static ushort *stkslots; +static uint nstkslots; + +#define isstkslot(r) ((r).t == RTMP && (r).i < nstkslots && stkslots[(r).i]) + +static void +fixarg(union ref *r, struct instr *ins, struct block *blk, int *curi) +{ + int sh; + enum op op = ins ? ins->op : 0; + + if (r->t == RXCON) { + struct xcon *con = &conht[r->i]; + if (in_range(op, Oshl, Oslr) && r == &ins->r) { + sh = con->i; + goto ShiftImm; + } else if (in_range(op, Oadd, Osub) && con->i == 2147483648 && r == &ins->r) { + /* add X, INT32MAX+1 -> sub X, INT32MIN */ + ins->op = Oadd + (op == Oadd); + *r = mkintcon(KI32, -2147483648); + } else if (kisflt(con->cls) && con->i == 0) { + /* copy of positive float zero -> regular zero, that emit() will turn into xor x,x */ + if (in_range(op, Ocopy, Omove) || op == Ophi) + *r = ZEROREF; + else + *r = insertinstr(blk, (*curi)++, mkinstr(Ocopy, con->cls, ZEROREF)); + } else if (con->cls >= KI64) { + /* float immediates & 64bit immediates are loaded from memory */ + uchar data[8]; + uint ksiz = cls2siz[con->cls]; + union type ctype; + /* can't use memory arg in rhs if lhs is memory */ + bool docopy = &ins->l != r && (oisstore(ins->op) || ins->l.t == RADDR); + if (con->cls <= KPTR && in_range(ins->op, Ocopy, Omove)) /* in this case we can use movabs */ + return; + else if (!docopy || con->cls >= KF32) { + if (con->cls != KF32) { + wr64le(data, con->i); + ctype = mktype(con->cls == KF64 ? TYDOUBLE : TYVLONG); + } else { + union { float f; int i; } pun = { con->f }; + wr32le(data, pun.i); + ctype = mktype(TYFLOAT); + } + *r = mkdatref(NULL, ctype, ksiz, /*align*/ksiz, data, ksiz, /*deref*/1); + } + if (docopy) + *r = insertinstr(blk, (*curi)++, mkinstr(Ocopy, con->cls, *r)); + } else if (ins->op != Omove && con->issym && r == &ins->r) { + *r = insertinstr(blk, (*curi)++, mkinstr(Ocopy, KPTR, mkaddr((struct addr){*r}))); + } else if (in_range(op, Odiv, Ourem) && kisint(ins->cls)) + goto DivImm; + } else if (r->t == RICON && in_range(op, Odiv, Ourem) && kisint(ins->cls) && r == &ins->r) { + DivImm: /* there is no division by immediate, must be copied to a register */ + *r = insertinstr(blk, (*curi)++, mkinstr(Ocopy, ins->cls, *r)); + } else if (r->t == RICON && in_range(op, Oshl, Oslr) && r == &ins->r) { + sh = r->i; + ShiftImm: /* shift immediate is always 8bit */ + *r = mkref(RICON, sh & 255); + } else if (isstkslot(*r)) { + struct instr adr = mkinstr(Oadd, KPTR, mkref(RREG, RBP), mkintcon(KI32, -stkslots[r->i])); + if (in_range(op, Ocopy, Omove)) + *ins = adr; + else + *r = insertinstr(blk, (*curi)++, adr); + } + picfixsym(r, blk, curi); +} + +#define isimm32(r) (iscon(r) && concls(r) == KI32) + +static void +selcall(struct function *fn, struct instr *ins, struct block *blk, int *curi) +{ + const struct call *call = &calltab.p[ins->r.i]; + int iarg = *curi - 1; + enum irclass cls; + uint argstksiz = alignup(call->argstksiz, 16); + + for (int i = call->narg - 1; i >= 0; --i) { + struct abiarg abi = call->abiarg[i]; + struct instr *arg; + for (;; --iarg) { + assert(iarg >= 0 && i >= 0 && "arg?"); + if ((arg = &instrtab[blk->ins.p[iarg]])->op == Oarg) + break; + } + + if (!abi.isstk) { + assert(!abi.ty.isagg); + *arg = mkinstr(Omove, call->abiarg[i].ty.cls, mkref(RREG, abi.reg), arg->r); + } else { + union ref adr = mkaddr((struct addr){mkref(RREG, RSP), .disp = abi.stk}); + int iargsave = iarg; + if (!abi.ty.isagg) { /* scalar arg in stack */ + *arg = mkinstr(Ostore8+ilog2(cls2siz[abi.ty.cls]), 0, adr, arg->r); + if (isaddrcon(arg->r,1) || arg->r.t == RADDR) + arg->r = insertinstr(blk, iarg++, mkinstr(Ocopy, abi.ty.cls, arg->r)); + else + fixarg(&ins->r, ins, blk, &iarg); + } else { /* aggregate arg in stack, callee stack frame destination address */ + *arg = mkinstr(Ocopy, KPTR, adr); + } + *curi += iarg - iargsave; + } + } + if (call->argstksiz) { + union ref disp = mkref(RICON, argstksiz); + insertinstr(blk, iarg--, (struct instr){Osub, KPTR, .keep=1, .reg = RSP+1, .l=mkref(RREG,RSP), disp}); + ++*curi; + insertinstr(blk, *curi+1, (struct instr){Oadd, KPTR, .keep=1, .reg = RSP+1, .l=mkref(RREG,RSP), disp}); + } + if (isimm32(ins->l)) + ins->l = mkaddr((struct addr){.base = ins->l}); + else if (isintcon(ins->l)) + ins->l = insertinstr(blk, (*curi)++, mkinstr(Ocopy, KPTR, ins->l)); + + if (call->vararg >= 0 && ins->l.t == RTMP) { + /* variadic calls write number of sse regs used to AL, so mark it as clobbered such that + * the function pointer of an indirect calls does not get allocated to RAX by regalloc */ + insertinstr(blk, (*curi)++, mkinstr(Omove, KPTR, mkref(RREG, RAX), mkref(RREG, RAX))); + } + cls = ins->cls; + ins->cls = 0; + if (cls) { + /* duplicate to reuse same TMP ref */ + insertinstr(blk, (*curi)++, *ins); + *ins = mkinstr(Ocopy, cls, mkref(RREG, call->abiret[0].reg)); + for (int i = 1; i <= 2; ++i) { + if (*curi + i >= blk->ins.n) break; + if (instrtab[blk->ins.p[*curi + i]].op == Ocall2r) { + ins = &instrtab[blk->ins.p[*curi += i]]; + *ins = mkinstr(Ocopy, ins->cls, mkref(RREG, call->abiret[1].reg)); + break; + } + } + } +} + +static bool +aimm(struct addr *addr, int disp) +{ + vlong a = addr->disp; + a += disp; + if ((int)a == a) { + addr->disp = a; + return 1; + } + return 0; +} + +static bool +acon(struct addr *addr, union ref r) +{ + vlong a = addr->disp; + assert(isintcon(r)); + a += intconval(r); + if ((int)a == a) { + addr->disp = a; + return 1; + } + return 0; +} + +static bool +ascale(struct addr *addr, union ref a, union ref b) +{ + if (b.t != RICON) return 0; + if (addr->index.bits) return 0; + if ((unsigned)b.i > 3) return 0; + if (a.t == RREG) { + Scaled: + addr->index = a; + addr->shift = b.i; + return 1; + } else if (a.t == RTMP) { + struct instr *ins = &instrtab[a.i]; + /* factor out shifted immediate from 'shl {add %x, imm}, s' */ + /* XXX maybe we shouldn't do this here because it should be done by a generic + * arithemetic optimization pass ? */ + if (ins->op == Oadd && (ins->l.t == RREG || ins->l.t == RTMP) && isintcon(ins->r)) { + vlong a = ((vlong) addr->disp + intconval(ins->r)) * (1 << b.i); + if (a != (int) a) return 0; + addr->disp = a; + addr->index = ins->l; + addr->shift = b.i; + return 1; + } else { + goto Scaled; + } + } + return 0; +} + +static bool +aadd(struct addr *addr, struct block *blk, int *curi, union ref r) +{ + if (isstkslot(r)) { + if (addr->base.bits || !aimm(addr, -stkslots[r.i])) goto Ref; + addr->base = mkref(RREG, RBP); + } else if (r.t == RTMP) { + struct instr *ins = &instrtab[r.i]; + if (ins->op == Oadd) { + if (!aadd(addr, blk, curi, ins->l)) goto Ref; + if (!aadd(addr, blk, curi, ins->r)) goto Ref; + ins->skip = 1; + } else if (ins->op == Oshl) { + if (!ascale(addr, ins->l, ins->r)) goto Ref; + ins->skip = 1; + } else if (ins->op == Ocopy && ins->l.t == RADDR) { + struct addr save = *addr, *addr2 = &addrht[ins->l.i]; + if ((!addr2->base.bits || aadd(addr, blk, curi, addr2->base)) + && aimm(addr, addr2->disp) + && (!addr2->index.bits || ascale(addr, addr2->index, mkref(RICON, addr2->shift)))) + { + ins->skip = 1; + } else { + *addr = save; + goto Ref; + } + } else if (ins->op == Ocopy) { + if (!aadd(addr, blk, curi, ins->l)) goto Ref; + ins->skip = 1; + } else goto Ref; + } else if (isnumcon(r)) { + return acon(addr, r); + } else if (isaddrcon(r,1)) { + if (!addr->base.bits && !isaddrcon(addr->index,1)) addr->base = r; + else return 0; + } else if (r.t == RREG) { + /* temporaries are single assignment, but register aren't, so they can't be * + * safely hoisted into an address value, unless they have global lifetime */ + if (!rstest(mctarg->rglob, r.i)) return 0; + Ref: + if (isstkslot(r) && (addr->base.bits || addr->index.bits)) { + r = insertinstr(blk, (*curi)++, mkinstr(Oadd, KPTR, mkref(RREG, RBP), mkref(RICON, -stkslots[r.i]))); + } + if (!addr->base.bits) addr->base = r; + else if (!addr->index.bits) addr->index = r; + else return 0; + } else return 0; + return 1; +} + +static bool +fuseaddr(union ref *r, struct block *blk, int *curi) +{ + struct addr addr = { 0 }; + + if (isaddrcon(*r,1)) return 1; + if (r->t == RADDR) { + const struct addr *a0 = &addrht[r->i]; + if (aadd(&addr, blk, curi, a0->base) + && (!addr.index.bits || ascale(&addr, a0->index, mkref(RICON, a0->shift))) + && aadd(&addr, blk, curi, mkintcon(KPTR, a0->disp))) { + *r = mkaddr(addr); + } + return 1; + } + if (r->t != RTMP) return 0; + if (!aadd(&addr, blk, curi, *r)) return 0; + + if (isaddrcon(addr.base,0) && (ccopt.pic || (ccopt.pie && addr.index.bits))) { + /* pic needs to load from GOT */ + /* pie cannot encode RIP-relative address with index register */ + /* first load symbol address into a temp register */ + union ref temp = mkaddr((struct addr){.base = addr.base, .disp = ccopt.pic ? 0 : addr.disp}); + addr.base = insertinstr(blk, (*curi)++, mkinstr(Ocopy, KPTR, .l = temp)); + if (!ccopt.pic) addr.disp = 0; + } + + if (!addr.base.bits) { + /* absolute int address in disp */ + if (addr.index.bits) return 0; + addr.base = mkintcon(KPTR, addr.disp); + addr.disp = 0; + } + + *r = mkaddr(addr); + return 1; +} + +/* is add instruction with this arg a candidate to transform into efective addr? */ +static bool +addarg4addrp(union ref r) +{ + struct instr *ins; + if (r.t == RXCON && !conht[r.i].cls && !conht[r.i].deref) return 1; /* sym or dat ref */ + if (r.t != RTMP) return 0; + if (isstkslot(r)) return 1; + ins = &instrtab[r.i]; + return ins->op == Oshl || (ins->op == Ocopy && ins->l.t == RADDR) || ins->op == Oadd; +} + +static void +loadstoreaddr(struct block *blk, union ref *r, int *curi) +{ + if (isimm32(*r)) { + *r = mkaddr((struct addr){.base = *r}); + } else if (isaddrcon(*r, 0)) { + picfixsym(r, blk, curi); + } else if (r->t == RTMP) { + if (addarg4addrp(*r)) fuseaddr(r, blk, curi); + } else if (r->t != RREG) { + *r = insertinstr(blk, (*curi)++, mkinstr(Ocopy, KPTR, *r)); + } +} + +static bool +arithfold(struct instr *ins) +{ + if (isnumcon(ins->l) && (!ins->r.t || isnumcon(ins->r))) { + union ref r; + bool ok = ins->r.t ? foldbinop(&r, ins->op, ins->cls, ins->l, ins->r) : foldunop(&r, ins->op, ins->cls, ins->l); + assert(ok && "fold?"); + *ins = mkinstr(Ocopy, insrescls(*ins), r); + return 1; + } + return 0; +} + +static void +sel(struct function *fn, struct instr *ins, struct block *blk, int *curi) +{ + uint siz, alignlog2; + int t = ins - instrtab; + struct instr temp = {0}; + enum op op = ins->op; + + if (oisarith(ins->op) && arithfold(ins)) { + fixarg(&ins->l, ins, blk, curi); + return; + } + + switch (op) { + default: assert(0); + case Onop: break; + case Oalloca1: case Oalloca2: case Oalloca4: case Oalloca8: case Oalloca16: + alignlog2 = ins->op - Oalloca1; + assert(ins->l.i > 0); + siz = ins->l.i << alignlog2; + fn->stksiz += siz; + fn->stksiz = alignup(fn->stksiz, 1 << alignlog2); + if (fn->stksiz > (1<<16)-1) error(NULL, "'%s' stack frame too big", fn->name); + stkslots[t] = fn->stksiz; + *ins = mkinstr(Onop,0,); + break; + case Oparam: + assert(ins->l.t == RICON && ins->l.i < fn->nabiarg); + if (!fn->abiarg[ins->l.i].isstk) + *ins = mkinstr(Ocopy, ins->cls, mkref(RREG, fn->abiarg[ins->l.i].reg)); + else /* stack */ + *ins = mkinstr(Oadd, KPTR, mkref(RREG, RBP), mkref(RICON, 16+fn->abiarg[ins->l.i].stk)); + break; + case Oarg: + fixarg(&ins->r, ins, blk, curi); + break; + case Ocall: + selcall(fn, ins, blk, curi); + break; + case Ocall2r: assert(0); + case Ointrin: + break; + case Oshl: case Osar: case Oslr: + if (!iscon(ins->r)) { + /* shift amount register is always CL */ + insertinstr(blk, (*curi)++, mkinstr(Omove, KI32, mkref(RREG, RCX), ins->r)); + ins->r = mkref(RREG, RCX); + } + goto ALU; + case Oequ: case Oneq: + case Olth: case Ogth: case Olte: case Ogte: + case Oulth: case Ougth: case Oulte: case Ougte: + if (iscon(ins->l)) { + /* lth imm, x -> gth x, imm */ + if (!in_range(ins->op, Oequ, Oneq)) + ins->op = ((op - Olth) ^ 1) + Olth; + rswap(ins->l, ins->r); + } + if (ins->l.t != RTMP && ins->l.t != RREG) + ins->l = insertinstr(blk, (*curi)++, mkinstr(Ocopy, ins->cls, ins->l)); + else + fixarg(&ins->l, ins, blk, curi); + fixarg(&ins->r, ins, blk, curi); + break; + case Odiv: case Oudiv: case Orem: case Ourem: + if (kisflt(ins->cls)) goto ALU; + /* TODO fuse div/rem pair */ + + /* (I)DIV dividend is always in RDX:RAX, output also in those regs */ + insertinstr(blk, (*curi)++, mkinstr(Omove, ins->cls, mkref(RREG, RAX), ins->l)); + /* mark RDX as clobbered. sign/zero-extending RAX into RDX is handled in emit() */ + insertinstr(blk, (*curi)++, mkinstr(Omove, ins->cls, mkref(RREG, RDX), mkref(RREG, RDX))); + fixarg(&ins->r, ins, blk, curi); /* make sure rhs is memory or reg */ + ins->l = mkref(RREG, RAX); + ins->keep = 1; + if (op == Orem) ins->op = Odiv; + else if (op == Ourem) ins->op = Oudiv; + insertinstr(blk, (*curi)++, *ins); /* duplicate ins to reuse tmp ref */ + *ins = mkinstr(Ocopy, ins->cls, mkref(RREG, op < Orem ? RAX : RDX)); /* get output */ + temp = mkinstr(Ocopy, ins->cls, mkref(RREG, op < Orem ? RDX : RAX)); /* clobber other reg*/ + insertinstr(blk, ++(*curi), temp); + /* swap instrs so that clobber goes first */ + t = blk->ins.p[*curi - 1]; + blk->ins.p[*curi - 1] = blk->ins.p[*curi - 0]; + blk->ins.p[*curi - 0] = t; + break; + case Osub: + if (isintcon(ins->l)) { + /* sub imm, x -> sub x, imm; neg x */ + fixarg(&ins->l, ins, blk, curi); + ins->inplace = 1; + struct instr sub = *ins; + rswap(sub.l, sub.r); + ins->op = op = Oneg; + ins->l = insertinstr(blk, (*curi)++, sub); + ins->r = NOREF; + goto ALU; + } else if (kisint(ins->cls) && isintcon(ins->r)) { + ins->op = op = Oadd; + ins->r = mkintcon(concls(ins->r), -intconval(ins->r)); + } else { + goto ALU; + } + /* fallthru */ + case Oadd: + if (kisint(ins->cls)) { + if ((addarg4addrp(ins->l) || addarg4addrp(ins->r))) { + temp.op = Ocopy; + temp.cls = ins->cls; + temp.l = mkref(RTMP, t); + if (fuseaddr(&temp.l, blk, curi)) { + *ins = temp; + break; + } + } + } + /* fallthru */ + case Omul: + case Oand: case Oxor: case Oior: + /* commutative ops */ + if (iscon(ins->l)) + rswap(ins->l, ins->r); + goto ALU; + case Oneg: + if (kisflt(ins->cls)) { + /* flip sign bit with XORPS/D */ + static const uvlong sd[2] = {0x8000000000000000,0x8000000000000000}; + static const uint sf[4] = {0x80000000,80000000,0x80000000,80000000}; + ins->op = Oxor; + ins->r = mkdatref(NULL, mktype(ins->cls == KF32 ? TYFLOAT : TYDOUBLE), /*siz*/16, + /*align*/16, ins->cls == KF32 ? (void *)sf : sd, /*siz*/16, /*deref*/1); + } + /* fallthru */ + case Onot: + ALU: + if (!(op == Oadd && kisint(ins->cls))) /* 3-address add is lea */ + if (!(op == Omul && kisint(ins->cls) && isimm32(ins->r))) /* for (I)MUL r,r/m,imm */ + ins->inplace = 1; + if (iscon(ins->l)) { + fixarg(&ins->l, ins, blk, curi); + ins->l = insertinstr(blk, (*curi)++, mkinstr(Ocopy, ins->cls, ins->l)); + } + if (ins->r.bits) + case Omove: + fixarg(&ins->r, ins, blk, curi); + if (op == Oadd && isaddrcon(ins->r,1)) /* no 3-address add if rhs is mem */ + ins->inplace = 1; + break; + case Oloads8: case Oloadu8: case Oloads16: case Oloadu16: + case Oloads32: case Oloadu32: case Oloadi64: case Oloadf32: case Oloadf64: + loadstoreaddr(blk, &ins->l, curi); + break; + case Ostore8: case Ostore16: case Ostore32: case Ostore64: + loadstoreaddr(blk, &ins->l, curi); + if (isaddrcon(ins->r,1) || ins->r.t == RADDR) + ins->r = insertinstr(blk, (*curi)++, mkinstr(Ocopy, KPTR, ins->r)); + else + fixarg(&ins->r, ins, blk, curi); + break; + case Ocvtu32f: + fixarg(&ins->l, ins, blk, curi); + ins->l = insertinstr(blk, (*curi)++, mkinstr(Oextu32, KI64, ins->l)); + ins->op = Ocvts64f; + break; + case Ocvtf32u: case Ocvtf64u: + fixarg(&ins->l, ins, blk, curi); + if (ins->cls == KI32) { + ins->l = insertinstr(blk, (*curi)++, mkinstr(ins->op == Ocvtf32u ? Ocvtf32s : Ocvtf64s, KI64, ins->l)); + ins->op = Oextu32; + } else assert(!"nyi flt -> u64"); + break; + case Ocvtf32f64: case Ocvtf64f32: case Ocvtf32s: case Ocvtf64s: case Ocvts32f: case Ocvts64f: + case Ocvtu64f: + case Oexts8: case Oextu8: case Oexts16: case Oextu16: case Oexts32: case Oextu32: + if (isnumcon(ins->l)) { + union ref it; + bool ok = foldunop(&it, ins->op, ins->cls, ins->l); + assert(ok); + ins->op = Ocopy; + ins->l = it; + break; + } + case Ocopy: + fixarg(&ins->l, ins, blk, curi); + break; + case Oxvaprologue: + fuseaddr(&ins->l, blk, curi); + assert(ins->l.t == RADDR); + /* !this must be the first instruction */ + assert(*curi == 1); + assert(blk == fn->entry); + t = blk->ins.p[0]; + blk->ins.p[0] = blk->ins.p[1]; + blk->ins.p[1] = t; + break; + } +} + +static void +seljmp(struct function *fn, struct block *blk) +{ + if (blk->jmp.t == Jb && blk->jmp.arg[0].bits) { + int curi = blk->ins.n; + fixarg(&blk->jmp.arg[0], NULL, blk, &curi); + union ref c = blk->jmp.arg[0]; + if (c.t != RTMP) { + enum irclass cls = c.t == RICON ? KI32 : c.t == RXCON && conht[c.i].cls ? conht[c.i].cls : KPTR; + int curi = blk->ins.n; + + c = insertinstr(blk, blk->ins.n, mkinstr(Ocopy, cls, c)); + sel(fn, &instrtab[c.i], blk, &curi); + } + if (iflagsrc == c.i /* test cmp */ + && (oiscmp(instrtab[c.i].op) || instrtab[c.i].op == Oand || instrtab[c.i].op == Osub)) { + instrtab[c.i].keep = 1; + } else { + if (!(opflags[instrtab[c.i].op] & ZF) || blk->ins.n == 0 || c.i != blk->ins.p[blk->ins.n - 1]) { + struct instr *ins; + int curi = blk->ins.n; + blk->jmp.arg[0] = insertinstr(blk, blk->ins.n, mkinstr(Oneq, instrtab[c.i].cls, c, ZEROREF)); + ins = &instrtab[blk->jmp.arg[0].i]; + if (kisflt(ins->cls)) { + ins->r = insertinstr(blk, curi, mkinstr(Ocopy, ins->cls, ZEROREF)); + } + ins->keep = 1; + } else if (instrtab[c.i].op == Oadd) { + /* prevent a 3-address add whose flag results are used from becoming a LEA */ + instrtab[c.i].inplace = 1; + } + } + } else if (blk->jmp.t == Jret) { + if (blk->jmp.arg[0].bits) { + int curi; + union ref r = mkref(RREG, fn->abiret[0].reg); + struct instr *ins = &instrtab[insertinstr(blk, blk->ins.n, mkinstr(Omove, fn->abiret[0].ty.cls, r , blk->jmp.arg[0])).i]; + curi = blk->ins.n; + fixarg(&ins->r, ins, blk, &curi); + blk->jmp.arg[0] = r; + if (blk->jmp.arg[1].bits) { + r = mkref(RREG, fn->abiret[1].reg); + ins = &instrtab[insertinstr(blk, blk->ins.n, mkinstr(Omove, fn->abiret[1].ty.cls, r, blk->jmp.arg[1])).i]; + curi = blk->ins.n; + fixarg(&ins->r, ins, blk, &curi); + blk->jmp.arg[1] = r; + } + } + } +} + +void +x86_64_isel(struct function *fn) +{ + extern int ninstr; + struct block *blk = fn->entry; + + fn->stksiz = 0; + stkslots = xcalloc((nstkslots = ninstr) * sizeof *stkslots); + do { + int i; + for (i = 0; i < blk->phi.n; ++i) { + struct instr *ins = &instrtab[blk->phi.p[i]]; + union ref *phi = phitab.p[ins->l.i]; + for (int i = 0; i < blk->npred; ++i) { + int curi = blkpred(blk, i)->ins.n; + fixarg(&phi[i], ins, blkpred(blk, i), &curi); + } + } + iflagsrc = -1; + for (i = 0; i < blk->ins.n; ++i) { + struct instr *ins = &instrtab[blk->ins.p[i]]; + sel(fn, ins, blk, &i); + if (ins->op < countof(opflags) && kisint(insrescls(*ins))) { + if (opflags[ins->op] & ZF) iflagsrc = ins - instrtab; + else if (opflags[ins->op] & CLOBF) iflagsrc = -1; + } + } + seljmp(fn, blk); + } while ((blk = blk->lnext) != fn->entry); + free(stkslots); + + if (ccopt.dbg.i) { + bfmt(ccopt.dbgout, "<< After isel >>\n"); + irdump(fn); + } + + fn->prop = 0; +} + +/* vim:set ts=3 sw=3 expandtab: */ diff --git a/x86_64/sysv.c b/x86_64/sysv.c new file mode 100644 index 0000000..32cc9e5 --- /dev/null +++ b/x86_64/sysv.c @@ -0,0 +1,313 @@ +#include "all.h" + +static int classify(uchar cls[2], const struct typedata *td, uint off); + +static void +clsscalar(uchar cls[2], uint off, union type ty) +{ + enum irclass k = type2cls[scalartypet(ty)]; + uchar *fcls = &cls[off/8]; + if (isflt(ty)) { /* SSE */ + if (!*fcls || (*fcls == KF32 && k > *fcls)) + *fcls = k; + } else { /* INTEGER */ + assert(isint(ty) || ty.t == TYPTR); + if (cls2siz[*fcls] < cls2siz[k]) + *fcls = k == KPTR ? KI64 : k; + } + if (off % 8 >= 4 && cls2siz[*fcls] < 8) + *fcls = kisint(*fcls) ? KI64 : KF64; +} + +static int +classifyarr(uchar cls[2], union type ty, uint off) +{ + union type chld = typechild(ty); + uint n = typearrlen(ty), siz = typesize(chld); + assert(n > 0); + for (uint i = 0; i < n; ++i) { + uint offx = off + i * siz; + if (isagg(chld)) { + if (!classify(cls, &typedata[chld.dat], offx)) + return cls[0] = cls[1] = 0; + } else if (chld.t == TYARRAY) { + if (!classifyarr(cls, chld, offx)) + return cls[0] = cls[1] = 0; + } else { + clsscalar(cls, offx, chld); + } + } + return !!cls[0] + !!cls[1]; +} + +static int +classify(uchar cls[2], const struct typedata *td, uint off) +{ + uint siz = alignup(td->siz, 4); + if (siz > 16) /* MEMORY */ + return 0; + for (int i = 0; i < td->nmemb; ++i) { + struct fielddata *fld = &td->fld[i].f; + uint align = typealign(fld->t); + if (alignup(fld->off, align) != fld->off) /* unaligned field -> MEMORY */ + return cls[0] = cls[1] = 0; + if (isagg(fld->t)) { + if (!classify(cls, &typedata[fld->t.dat], off + fld->off)) + return cls[0] = cls[1] = 0; + } else if (fld->t.t == TYARRAY) { + if (isincomplete(fld->t)) continue; + if (!classifyarr(cls, fld->t, off + fld->off)) + return cls[0] = cls[1] = 0; + } else { + clsscalar(cls, fld->off + off, fld->t); + } + } + return !!cls[0] + !!cls[1]; +} + +static int +abiarg(short r[2], uchar cls[2], uchar *r2off, int *ni, int *nf, int *ns, union irtype typ) +{ + static const uchar intregs[] = { RDI, RSI, RDX, RCX, R8, R9 }; + enum { NINT = countof(intregs), NFLT = 8 }; + int ret, ni_save, nf_save; + + if (!typ.isagg) { + if (kisflt(cls[0] = typ.cls) && *nf < NFLT) { + r[0] = XMM0 + (*nf)++; + } else if (kisint(cls[0]) && *ni < NINT) { + r[0] = intregs[(*ni)++]; + } else { + r[0] = *ns; + *ns += 8; + return 0; /* MEMORY */ + } + return 1; + } + cls[0] = cls[1] = 0; + ret = classify(cls, &typedata[typ.dat], 0); + if (!ret) { /*MEMORY*/ + r[0] = *ns; + *ns = alignup(*ns + typedata[typ.dat].siz, 8); + return 0; + } + assert(ret <= 2); + ni_save = *ni, nf_save = *nf; + *r2off = 8; + for (int i = 0; i < ret; ++i) { + assert(cls[i]); + if (kisflt(cls[i]) && *nf < NFLT) + r[i] = XMM0 + (*nf)++; + else if (kisint(cls[i]) && *ni < NINT) + r[i] = intregs[(*ni)++]; + else { /* MEMORY */ + *ni = ni_save, *nf = nf_save; + r[0] = *ns; + *ns = alignup(*ns + typedata[typ.dat].siz, 8); + r[1] = -1; + return cls[0] = cls[1] = 0; + } + } + return ret; +} + +static int +abiret(short r[2], uchar cls[2], uchar *r2off, int *ni, union irtype typ) +{ + int ret; + + if (!typ.isagg) { + r[0] = kisflt(cls[0] = typ.cls) ? XMM0 : RAX; + return 1; + } + + cls[0] = cls[1] = 0; + ret = classify(cls, &typedata[typ.dat], 0); + if (!ret) { /* MEMORY */ + assert(*ni == 0); + r[0] = RAX; /* on return should contain result location address */ + r[1] = RDI; /* register for caller-owned result location argument */ + ++*ni; + return 0; + } + assert(ret <= 2); + *r2off = 8; + for (int i = 0, ni = 0, nf = 0; i < ret; ++i) { + assert(cls[i]); + if (kisflt(cls[i])) /* SSE (XMM0, XMM1) */ + r[i] = XMM0 + nf++; + else if (kisint(cls[i])) /* INTEGER (RAX, RDX) */ + r[i] = ni++ == 0 ? RAX : RDX; + else assert(0); + } + return ret; +} + +/* Layout of va_list: + * struct { + * ( 0) unsigned int gp_offset; + * ( 4) unsigned int fp_offset; + * ( 8) void *overflow_arg_area; + * (16) void *reg_save_area; + * } + * Layout of register save area (align 16): + * reg off + * rdi 0 + * rsi 8 + * rdx 16 + * rcx 24 + * r8 32 + * r9 40 + * xmm0 48 + * xmm1 64 + * ... + * in x86_64/emit xvaprologue generates the code to save the registers to a stack slot + * there only needs to be one xvaprologue if there's any vastart instrs, and it has to be + * at the beginning of the function (before IR generated by regalloc can touch any registers) + * then vastart can initialize va_list.reg_save_area with a pointer to that + */ + +static void +vastart(struct function *fn, struct block *blk, int *curi) +{ + union ref rsave; /* register save area */ + int gpr0 = 0, fpr0 = 0, stk0 = 0; + struct instr *ins = &instrtab[blk->ins.p[*curi]]; + union ref ap = ins->l, src, dst; + assert(ins->op == Ovastart); + /* add xvaprologue if not there yet, which must be the first + * real instruction in the function (following alloca) */ + if (fn->entry->ins.n > 1 && instrtab[fn->entry->ins.p[1]].op == Oxvaprologue) { + rsave = mkref(RTMP, fn->entry->ins.p[0]); /* alloca instruction */ + assert(instrtab[rsave.i].op == Oalloca16); + } else { + rsave = insertinstr(fn->entry, 0, mkalloca(192, 16)); + insertinstr(fn->entry, 1, mkinstr(Oxvaprologue, 0, rsave, .keep=1)); + } + /* find first unnamed gpr and fpr */ + for (int i = 0; i < fn->nabiarg; ++i) { + struct abiarg abi = fn->abiarg[i]; + if (!abi.isstk){ + if (abi.reg < XMM0) ++gpr0; + else ++fpr0; + } else { + stk0 = abi.stk+8; + } + } + /* set ap->reg_save_area */ + *ins = mkinstr(Oadd, KPTR, ap, mkref(RICON, 16)); + dst = mkref(RTMP, ins - instrtab); + int i = *curi + 1; + insertinstr(blk, i++, mkinstr(Ostore64, 0, dst, rsave)); + /* set ap->overflow_arg_area */ + src = insertinstr(blk, i++, mkinstr(Oadd, KPTR, mkref(RREG, RBP), mkref(RICON, 16+stk0))); + dst = insertinstr(blk, i++, mkinstr(Oadd, KPTR, ap, mkref(RICON, 8))); + insertinstr(blk, i++, mkinstr(Ostore64, 0, dst, src)); + /* set ap->gp_offset */ + insertinstr(blk, i++, mkinstr(Ostore32, 0, ap, mkref(RICON, gpr0*8))); + /* set ap->fp_offset */ + dst = insertinstr(blk, i++, mkinstr(Oadd, KPTR, ap, mkref(RICON, 4))); + insertinstr(blk, i++, mkinstr(Ostore32, 0, dst, mkref(RICON, 6*8 + fpr0*16))); + *curi = i-1; +} + +static void +vaarg(struct function *fn, struct block *blk, int *curi) +{ + short r[2]; + uchar cls[2]; + union ref tmp; + int ni = 0, nf = 0, ns = 0; + uchar r2off; + int var = blk->ins.p[*curi]; + union ref ap = instrtab[var].l; + union irtype ty = ref2type(instrtab[var].r); + + assert(instrtab[var].op == Ovaarg); + blk->ins.p[*curi] = newinstr(blk, (struct instr){Onop}); + + int ret = abiarg(r, cls, &r2off, &ni, &nf, &ns, ty); + + if (ret == 2) assert(!"nyi"); + else if (ret == 1) { + struct block *merge; + union ref phi, phiargs[2]; + /* int: l->gp_offset < 48 - num_gp * 8 */ + /* sse: l->fp_offset < 304 - num_gp * 16 (why 304? ... 176) */ + tmp = ni ? ap : insertinstr(blk, (*curi)++, mkinstr(Oadd, KPTR, ap, mkref(RICON, 4))); + tmp = insertinstr(blk, (*curi)++, mkinstr(Oloadu32, KI32, tmp)); + tmp = insertinstr(blk, (*curi)++, mkinstr(Oulte, KI32, tmp, mkref(RICON, ni ? 48 - ni*8 : 176 - nf*16))); + merge = blksplitafter(fn, blk, *curi); + blk->jmp.t = 0; + useblk(fn, blk); + putcondbranch(fn, tmp, newblk(fn), newblk(fn)); + useblk(fn, blk->s1); + { + /* phi0: &l->reg_save_area[l->gp/fp_offset] */ + union ref sav = addinstr(fn, mkinstr(Oloadi64, KPTR, irbinop(fn, Oadd, KPTR, ap, mkref(RICON, 16)))); + union ref roff = addinstr(fn, mkinstr(Oloadu32, KI32, irbinop(fn, Oadd, KPTR, ap, mkref(RICON, ni ? 0 : 4)))); + phiargs[0] = irbinop(fn, Oadd, KPTR, sav, roff); + /* l->gp/fp_offset += num_gp/fp * 8(16) */ + roff = irbinop(fn, Oadd, KI32, roff, mkref(RICON, ni ? ni * 8 : nf * 16)); + addinstr(fn, mkinstr(Ostore32, 0, irbinop(fn, Oadd, KPTR, ap, mkref(RICON, ni ? 0 : 4)), roff)); + assert(merge->npred == 1); + blkpred(merge, 0) = blk->s1; + blk->s1->jmp.t = Jb; + blk->s1->s1 = merge; + } + useblk(fn, blk->s2); + { + /* phi1: l->overflow_arg_area */ + union ref adr = irbinop(fn, Oadd, KPTR, ap, mkref(RICON, 8)); + union ref ovf = addinstr(fn, mkinstr(Oloadi64, KPTR, adr)); + /* align no-op */ + + phiargs[1] = ovf; + /* update l->overflow_arg_area += size */ + int siz = 8; + addinstr(fn, mkinstr(Ostore64, 0, adr, irbinop(fn, Oadd, KPTR, ovf, mkref(RICON, siz)))); + putbranch(fn, merge); + } + assert(merge->npred == 2); + vpush(&merge->ins, 0); + memmove(merge->ins.p+1, merge->ins.p, (merge->ins.n-1)*sizeof *merge->ins.p); + merge->ins.p[0] = var; + phi = insertphi(merge, KPTR); + memcpy(phitab.p[instrtab[phi.i].l.i], phiargs, sizeof phiargs); + if (!ty.isagg) { + instrtab[var] = mkinstr(cls[0] == KI32 ? Oloads32 : Oloadi64, cls[0], phi); + } else { + instrtab[var] = mkalloca(8, 8); + tmp = insertinstr(merge, 1, mkinstr(Oloadi64, KI64, phi)); + insertinstr(merge, 2, mkinstr(Ostore64, 0, mkref(RTMP, var), tmp)); + } + fn->prop &= ~FNUSE; + } else { + assert(!"nyi"); + } +} + +static const char x86_64_rnames[][6] = { +#define R(r) #r, + LIST_REGS(R) +#undef R +}; + +const struct mctarg t_x86_64_sysv = { + .gpr0 = RAX, .ngpr = R15 - RAX + 1, + .bpr = RBP, + .gprscratch = R11, .fprscratch = XMM15, + .fpr0 = XMM0, .nfpr = XMM15 - XMM0 + 1, + .rcallee = 1<<RBX | 1<<R12 | 1<<R13 | 1<<R14 | 1<<R15, + .rglob = 1<<RSP | 1<<RBP, + .rnames = x86_64_rnames, + .objkind = OBJELF, + .abiret = abiret, + .abiarg = abiarg, + .vastart = vastart, + .vaarg = vaarg, + .isel = x86_64_isel, + .emit = x86_64_emit +}; + +/* vim:set ts=3 sw=3 expandtab: */ |