From 24bcc929477751b056e81e7772dc2bb3d11ce4a5 Mon Sep 17 00:00:00 2001 From: lemon Date: Fri, 12 Dec 2025 17:40:35 +0100 Subject: s/amd64/x86_64/ --- amd64/all.h | 18 - amd64/emit.c | 1388 ---------------------------------------------------------- amd64/isel.c | 660 ---------------------------- amd64/sysv.c | 313 ------------- 4 files changed, 2379 deletions(-) delete mode 100644 amd64/all.h delete mode 100644 amd64/emit.c delete mode 100644 amd64/isel.c delete mode 100644 amd64/sysv.c (limited to 'amd64') diff --git a/amd64/all.h b/amd64/all.h deleted file mode 100644 index 992d47e..0000000 --- a/amd64/all.h +++ /dev/null @@ -1,18 +0,0 @@ -#include "../ir/ir.h" - -#define LIST_REGS(_) \ - _(RAX) _(RCX) _(RDX) _(RBX) _(RSP) _(RBP) _(RSI) _(RDI) \ - _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(R15) \ - _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) \ - _(XMM8) _(XMM9) _(XMM10) _(XMM11) _(XMM12) _(XMM13) _(XMM14) _(XMM15) - -enum reg { -#define R(r) r, - LIST_REGS(R) -#undef R -}; - -void amd64_isel(struct function *); -void amd64_emit(struct function *); - -/* vim:set ts=3 sw=3 expandtab: */ diff --git a/amd64/emit.c b/amd64/emit.c deleted file mode 100644 index 6121f5e..0000000 --- a/amd64/emit.c +++ /dev/null @@ -1,1388 +0,0 @@ -#include "all.h" -#include "../obj/obj.h" -#include "../endian.h" - -/** Instruction operands ** - * - * Can be a register, a 32-bit immediate, - * a memory reference [base + index * scale + disp], - * or a relocatable reference to some symbol plus a displacement and maybe index*scale - */ -enum operkind { ONONE, OREG, OIMM, OMEM, OSYM }; -enum { NOBASE = 63, NOINDEX = 63 }; -struct oper { - uchar t; - union { - struct { uchar base; }; /* OMEM */ - struct { uchar cindex : 6, cshift : 2; }; /* OSYM */ - }; - union { - struct { uchar index, shift; }; /* OMEM */ - ushort con; /* OSYM */ - }; - union { - uchar reg; /* OREG */ - int disp; /* OMEM, OSYM */ - int imm; /* OIMM */ - }; -}; -#define mkoper(t, ...) ((struct oper){(t), __VA_ARGS__}) -#define reg2oper(R) (assert((uint)(R) <= XMM15), mkoper(OREG, .reg = (R))) - -static struct oper mkmemoper(union ref); - -static struct oper -ioper(int i) -{ - int reg = instrtab[i].reg - 1; - return reg < 0 ? mkoper(ONONE,) : reg2oper(reg); -} - -static struct oper -ref2oper(union ref r) -{ - switch (r.t) { - case RTMP: return ioper(r.i); - case RREG: return reg2oper(r.i); - case RICON: return mkoper(OIMM, .imm = r.i); - case RXCON: - if (conht[r.i].cls == KI32) - return mkoper(OIMM, .imm = conht[r.i].i); - else if (conht[r.i].cls == KI64) { - vlong i = conht[r.i].i; - assert(i == (int)i); - return mkoper(OIMM, .imm = i); - } else if (!conht[r.i].cls) { - return mkoper(OSYM, .con = r.i, .cindex = NOINDEX); - } - assert(0); - case RADDR: return mkmemoper(r); - default: assert(0); - } -} - -static void -addmemoper(struct oper *mem, struct oper add) -{ - assert(mem->t == OMEM); - if (add.t == OIMM) { - mem->disp += add.imm; - } else if (add.t == OREG) { - if (mem->base == NOBASE) - mem->base = add.reg; - else if (mem->index == NOINDEX) - mem->index = add.reg; - else - assert(0); - } -} - -/* helpers to convert a reference to an operand of a specific kind, - * with assertions to make sure nothing went wrong */ - -static inline struct oper -mkregoper(union ref r) -{ - assert(r.t == RREG || (r.t == RTMP && ioper(r.i).t == OREG)); - return r.t == RREG ? reg2oper(r.i) : ioper(r.i); -} - -static inline struct oper -mkimmoper(union ref r) -{ - assert(iscon(r) && concls(r) == KI32); - return mkoper(OIMM, .imm = intconval(r)); -} - -#define ismemref(ref) ((ref).t == RTMP && ioper((ref).i).t == OMEM) -#define isregref(ref) ((ref).t == RREG || ((ref).t == RTMP && ioper((ref).i).t == OREG)) - -static inline struct oper -mkimmregoper(union ref r) -{ - assert(isregref(r) || (iscon(r) && concls(r) == KI32)); - return ref2oper(r); -} - -static inline struct oper -mkdatregoper(union ref r) -{ - assert(isregref(r) || (r.t == RXCON && conht[r.i].deref)); - return ref2oper(r); -} - -static inline struct oper -mkimmdatregoper(union ref r) -{ - assert(isregref(r) || r.t == RICON || (r.t == RXCON && (conht[r.i].cls == KI32 || conht[r.i].deref))); - return ref2oper(r); -} - -static int rbpoff; - -static struct oper -mkmemoper(union ref r) -{ - if (r.t == RTMP) { - struct oper wop = ioper(r.i); - if (wop.t == OMEM) return wop; - assert(wop.t == OREG); - return mkoper(OMEM, .base = wop.reg, .index = NOINDEX); - } else if (r.t == RADDR) { - const struct addr *addr = &addrht[r.i]; - struct oper mem; - - assert(addr->shift <= 3); - if (addr->base.t == RTMP && ioper(addr->base.i).t == OMEM) { - mem = ioper(addr->base.i); - if (addr->index.bits) addmemoper(&mem, mkregoper(addr->index)); - assert(!mem.shift); - mem.shift = addr->shift; - addmemoper(&mem, mkoper(OIMM, .imm = addr->disp)); - return mem; - } - if (isaddrcon(addr->base,0)) { - return mkoper(OSYM, .con = addr->base.i, - .cindex = addr->index.bits ? mkregoper(addr->index).reg : NOINDEX, - .cshift = addr->shift, - .disp = addr->disp); - } else if (isintcon(addr->base)) { - assert(!addr->disp); - return mkoper(OMEM, .base = NOBASE, - .index = addr->index.bits ? mkregoper(addr->index).reg : NOINDEX, - .disp = intconval(addr->base), - .shift = addr->shift); - } else if (isaddrcon(addr->index,0)) { - assert(!addr->shift); - return mkoper(OSYM, .con = addr->index.i, - .cindex = addr->base.bits ? mkregoper(addr->base).reg : NOINDEX, - .disp = addr->disp); - } - return mkoper(OMEM, .base = addr->base.bits ? mkregoper(addr->base).reg : NOBASE, - .index = addr->index.bits ? mkregoper(addr->index).reg : NOINDEX, - .disp = addr->disp, - .shift = addr->shift); - } else if (r.t == RXCON) { - assert(!conht[r.i].cls); - return mkoper(OSYM, .con = r.i, .cindex = NOINDEX); - } else { - return mkoper(OMEM, .base = isregref(r) ? ref2oper(r).reg : NOBASE, - .index = NOINDEX, - .disp = isregref(r) ? 0 : mkimmoper(r).imm); - } -} - -/** Instruction description tables ** - * - * Each instruction is a list of descs, and the first one that matches - * is emitted. Each entry has a size pattern field, which is a bitset - * of the sizes (in bytes) that the entry matches, and 2 operand patterns, - * which describe the operands that can match (for example, PRAX matches - * a RAX register operand, PGPR matches any integer register, I8 matches - * an immediate operand between [-128,127]) The rest of the fields describe - * the instruction's encoding. - * (reference: https://www.felixcloutier.com/x86/ & https://wiki.osdev.org/X86-64_Instruction_Encoding ) - */ - -enum operpat { - PNONE, - PRAX, - PRCX, - PGPR, - PFPR, - P1, /* imm = 1 */ - PN1, /* imm = -1 */ - PI8, - PU8, - PI16, - PU16, - PI32, - PU32, - PMEM, - PSYM, -}; -enum operenc { - EN_R = 1, /* reg with /r */ - EN_RR, /* reg, reg with /r */ - EN_RRX, /* reg, reg with /r (inverted) */ - EN_MR, /* mem, reg with /r */ - EN_RM, /* reg, mem with /r */ - EN_M, /* mem */ - EN_RI8, /* reg, imm8 with /0 */ - EN_RI32, /* reg, imm32 with /0 */ - EN_MI8, /* mem, imm8 with /x */ - EN_MI16, /* mem, imm16 with /x */ - EN_MI32, /* mem, imm32 with /x */ - EN_OI, /* reg, imm32 with op + reg */ - EN_I8, /* imm8 */ - EN_I32, /* imm32 */ - EN_R32, /* rel32 */ - NOPERENC, -}; -struct desc { - uchar psiz; /* subset of {1,2,4,8} */ - uchar ptd, pts; /* bitsets of enum operpat */ - uchar nopc; /* countof opc */ - const char opc[8]; /* opcode bytes */ - uchar operenc; /* enum operenc */ - uchar ext; /* ModR/M.reg opc extension */ - bool r8; /* uses 8bit register */ - bool norexw; /* do not use REX.W even if size is 64 bits */ -}; - -/* match operand against pattern */ -static inline bool -opermatch(enum operpat pat, struct oper oper) -{ - switch (pat) { - case PNONE: return !oper.t; - case PRAX: return oper.t == OREG && oper.reg == RAX; - case PRCX: return oper.t == OREG && oper.reg == RCX; - case PGPR: return oper.t == OREG && oper.reg <= R15; - case PFPR: return oper.t == OREG && oper.reg >= XMM0; - case P1: return oper.t == OIMM && oper.imm == 1; - case PN1: return oper.t == OIMM && oper.imm == -1; - case PI8: return oper.t == OIMM && (schar)oper.imm == oper.imm; - case PU8: return oper.t == OIMM && (uchar)oper.imm == oper.imm; - case PI16: return oper.t == OIMM && (short)oper.imm == oper.imm; - case PU16: return oper.t == OIMM && (ushort)oper.imm == oper.imm; - case PI32: return oper.t == OIMM; - case PU32: return oper.t == OIMM && oper.imm >= 0; - case PMEM: return in_range(oper.t, OMEM, OSYM); - case PSYM: return oper.t == OSYM; - } - assert(0); -} - -/* code output helpers */ -#define B(b) (*(*pcode)++ = (b)) -#define D(xs, N) (memcpy(*pcode, (xs), (N)), (*pcode) += (N)) -#define I16(w) (wr16le(*pcode, (w)), *pcode += 2) -#define I32(w) (wr32le(*pcode, (w)), *pcode += 4) -#define DS(S) D(S, sizeof S - 1) - -static bool usebp; /* use RBP? */ -static const char *curfnsym; -static uchar *fnstart; - -/* Given an instruction description table, find the first entry that matches - * the operands (where dst, src are the operands in intel syntax order) and encode it */ -static void -encode(uchar **pcode, const struct desc *tab, int ntab, enum irclass k, struct oper dst, struct oper src) -{ - const uchar *opc; - int nopc; - struct oper mem; - enum reg reg; - const struct desc *en = NULL; - for (int i = 0; i < ntab; ++i) { - if ((tab[i].psiz & cls2siz[k]) && opermatch(tab[i].ptd, dst) && opermatch(tab[i].pts, src)) { - en = &tab[i]; - break; - } - } - assert(en && "no match for instr"); - - if (en->ptd == PFPR) dst.reg &= 15; - if (en->pts == PFPR) src.reg &= 15; - opc = (uchar *)en->opc; - nopc = en->nopc; - /* mandatory prefixes go before REX */ - if (*opc == 0x66 || *opc == 0xF2 || *opc == 0xF3) - B(*opc++), --nopc; - int rex = in_range(k, KI64, KPTR) << 3; /* REX.W */ - if (en->norexw) rex = 0; - switch (en->operenc) { - case EN_RR: /* mod = 11; reg = dst; rm = src */ - rex |= (dst.reg >> 3) << 2; /* REX.R */ - rex |= (src.reg >> 3) << 0; /* REX.B */ - if (rex) B(0x40 | rex); - else if (en->r8 && in_range(src.reg, RSP, RDI)) { - /* /r8 needs REX to encode SP,BP,SI,DI (otherwise -> AH..BH) */ - B(0x40); - } - D(opc, nopc); - B(0300 | (dst.reg & 7) << 3 | (src.reg & 7)); - break; - case EN_RRX: /* mod = 11; reg = src; rm = dst */ - rex |= (src.reg >> 3) << 2; /* REX.R */ - rex |= (dst.reg >> 3) << 0; /* REX.B */ - if (rex) B(0x40 | rex); - else if (en->r8 && in_range(dst.reg, RSP, RDI)) { - /* /r8 needs REX to encode SP,BP,SI,DI (otherwise -> AH..BH) */ - B(0x40); - } - D(opc, nopc); - B(0300 | (src.reg & 7) << 3 | (dst.reg & 7)); - break; - case EN_MR: - mem = dst; - reg = src.reg; - goto Mem; - case EN_RM: - mem = src; - reg = dst.reg; - goto Mem; - case EN_M: case EN_MI8: case EN_MI16: case EN_MI32: - mem = dst; - reg = en->ext; - Mem: - if (mem.t == OMEM) { - if (mem.base != NOBASE) rex |= mem.base >> 3; /* REX.B */ - if (mem.index != NOINDEX) rex |= mem.index >> 3 << 1; /* REX.X */ - } else { - if (mem.cindex != NOINDEX) rex |= mem.cindex >> 3 << 1; /* REX.X */ - } - if (en->operenc != EN_M) - rex |= (reg >> 3) << 2; /* REX.R */ - if (rex) B(0x40 | rex); - else if (en->r8 && in_range(reg, RSP, RDI)) B(0x40); - - if (mem.t == OSYM) { - D(opc, nopc); - if (mem.cindex == NOINDEX) { - /* %rip(var) */ - static uchar offs[NOPERENC] = { [EN_MI8] = 1, [EN_MI16] = 2, [EN_MI32] = 4 }; - enum relockind r = - (!conht[mem.con].deref && ccopt.pic) ? (rex ? REL_GOTPCRELX : REL_GOTPCRELX_REX) - : REL_PCREL32; - int off = -4 - offs[en->operenc]; - B(/*mod 0*/ (reg & 7) << 3 | RBP); - objreloc(xcon2sym(mem.con), r, Stext, *pcode - objout.textbegin, mem.disp + off); - } else { - /* var(,%reg,shift) */ - assert(!ccopt.pic && !ccopt.pie && "cannot encode [RIP-rel + REG] for position independent"); - B(/*mod 0*/ (reg & 7) << 3 | RSP); - B(mem.cshift << 6 | mem.cindex << 3 | RBP); /* SIB [index*s + disp32] */ - objreloc(xcon2sym(mem.con), REL_ABS32S, Stext, *pcode - objout.textbegin, mem.disp); - } - I32(0); - } else { - int mod; - bool sib = 0; - if (mem.base == RBP) { - if (!usebp) { - /* if RBP isn't being set up (leaf functions with no stack allocations), - * access thru RSP (function arguments in the stack) */ - mem.base = RSP; - mem.disp -= 8; - } else if (mem.disp <= 0) { - mem.disp += rbpoff; - } - } - if (mem.base != NOBASE) { - if (mem.index == NOINDEX && mem.shift == 0) sib = 0; - else sib = 1; - mod = !mem.disp ? 0 /* disp = 0 -> mod = 00 */ - : (uint)(mem.disp + 128) < 256 ? 1 /* disp8 -> mod = 01 */ - : 2; /* disp32 -> mod = 10 */ - if (mod == 0 && (mem.base == RBP || mem.base == R13)) mod = 1; - if (mem.base == RSP || mem.base == R12) sib = 1; - } else { - /* [disp + (index*s)] */ - sib = 1; - mem.base = RBP; - mod = 0; - assert(mem.index != RSP); - } - D(opc, nopc); - B(mod << 6 | (reg & 7) << 3 | (sib ? 4 : (mem.base & 7))); - if (sib) { - if (mem.index == NOINDEX) mem.index = RSP; - B(mem.shift << 6 | (mem.index & 7) << 3 | (mem.base & 7)); - } - if (mod == 1) B(mem.disp); - else if (mod == 2 || (mod == 0 && mem.base == RBP/*RIP-rel*/) || (mod == 0 && sib && mem.base == RBP/*absolute*/)) { - I32(mem.disp); - } - } - if (en->operenc == EN_MI8) B(src.imm); - if (en->operenc == EN_MI16) I16(src.imm); - if (en->operenc == EN_MI32) I32(src.imm); - break; - case EN_R: case EN_RI32: case EN_RI8: - rex |= (dst.reg >> 3) << 0; /* REX.B */ - if (rex) B(0x40 | rex); - else if (en->r8 && in_range(dst.reg, RSP, RDI)) { - /* /r8 needs REX to encode SP,BP,SI,DI (otherwise -> AH..BH) */ - B(0x40); - } - D(opc, nopc); - B(0300 | en->ext << 3 | (dst.reg & 7)); - if (en->operenc == EN_RI32) - I32(src.imm); - else if (en->operenc == EN_RI8) - B(src.imm); - break; - case EN_OI: - rex |= (dst.reg >> 3) << 0; /* REX.B */ - if (rex) B(0x40 | rex); - B(*opc++ + (dst.reg & 7)); - D(opc, nopc - 1); - I32(src.imm); - break; - case EN_I8: - if (rex) B(0x40 | rex); - D(opc, nopc); - B(src.imm); - break; - case EN_I32: - if (rex) B(0x40 | rex); - D(opc, nopc); - I32(src.imm); - break; - case EN_R32: - if (rex) B(0x40 | rex); - D(opc, nopc); - assert(dst.t == OSYM); - const char *sym = xcon2sym(dst.con); - if (sym != curfnsym) { - enum relockind r = (ccopt.pie|ccopt.pic) ? REL_PLT32 : REL_PCREL32; - objreloc(sym, r, Stext, *pcode - objout.textbegin, -4); - I32(0); - } else { - /* self-recursive call */ - I32(fnstart - *pcode - 4); - } - break; - } -} - -#define DEFINSTR1(X, ...) \ - static void \ - X(uchar **pcode, enum irclass k, struct oper oper) \ - { \ - static const struct desc tab[] = { __VA_ARGS__ }; \ - encode(pcode, tab, countof(tab), k, oper, mkoper(0,)); \ - } - -#define DEFINSTR2(X, ...) \ - static void \ - X(uchar **pcode, enum irclass k, struct oper dst, struct oper src) \ - { \ - static const struct desc tab[] = { __VA_ARGS__ }; \ - encode(pcode, tab, countof(tab), k, dst, src); \ - } - -#define O(s) (sizeof s)-1,s -DEFINSTR2(Xmovb, - {-1, PMEM, PGPR, O("\x88"), EN_MR, .r8=1}, /* MOV m8, r8 */ - {-1, PMEM, PI8, O("\xC6"), EN_MI8, .r8=1}, /* MOV m8, imm8 */ - {-1, PMEM, PU8, O("\xC6"), EN_MI8, .r8=1}, /* MOV m8, imm8 */ -) -DEFINSTR2(Xmovw, - {-1, PMEM, PGPR, O("\x66\x89"), EN_MR}, /* MOV m16, r16 */ - {-1, PMEM, PI16, O("\x66\xC7"), EN_MI16}, /* MOV m16, imm16 */ - {-1, PMEM, PU16, O("\x66\xC7"), EN_MI16}, /* MOV m16, imm16 */ -) -static void Xmov(uchar **pcode, enum irclass k, struct oper dst, struct oper src) -{ - static const struct desc all[] = { - {4 , PGPR, PI32, O("\xB8"), EN_OI}, /* MOV r32, imm */ - {4|8, PGPR, PGPR, O("\x8B"), EN_RR}, /* MOV r32/64, r32/64 */ - {4|8, PMEM, PGPR, O("\x89"), EN_MR}, /* MOV m32/64, r32/64 */ - {4|8, PGPR, PMEM, O("\x8B"), EN_RM}, /* MOV r32/64, m32/64 */ - {4|8, PMEM, PI32, O("\xC7"), EN_MI32}, /* MOV m32/64, imm */ - { 8, PGPR, PU32, O("\xB8"), EN_OI, .norexw=1}, /* MOV r64, uimm */ - { 8, PGPR, PI32, O("\xC7"), EN_RI32}, /* MOV r64, imm */ - {4 , PFPR, PFPR, O("\x0F\x28"), EN_RR}, /* MOVPS xmm, xmm */ - {4 , PFPR, PMEM, O("\xF3\x0F\x10"), EN_RM}, /* MOVSS xmm, m32 */ - {4 , PMEM, PFPR, O("\xF3\x0F\x11"), EN_MR}, /* MOVSS m32, xmm */ - {8 , PFPR, PFPR, O("\x0F\x28"), EN_RR}, /* MOVPS xmm, xmm */ - {8 , PFPR, PMEM, O("\xF2\x0F\x10"), EN_RM}, /* MOVSD xmm, m64 */ - {8 , PMEM, PFPR, O("\xF2\x0F\x11"), EN_MR}, /* MOVSS m64, xmm */ - {4|8, PFPR, PGPR, O("\x66\x0F\x6E"), EN_RR}, /* MOVD/Q xmm, r64/32 */ - {4|8, PGPR, PFPR, O("\x66\x0F\x7E"), EN_RRX}, /* MOVD/Q r64/32, xmm */ - }; - static const uchar k2off[] = { - [KI32] = 0, - [KI64] = 1, [KPTR] = 1, - [KF32] = 7, - [KF64] = 10, - }; - encode(pcode, all + k2off[k], countof(all) - k2off[k], k, dst, src); -} -DEFINSTR2(Xmovsxl, - {8, PGPR, PMEM, O("\x63"), EN_RM}, /* MOVSXD r64, m32 */ - {8, PGPR, PGPR, O("\x63"), EN_RR}, /* MOVSXD r64, r32 */ - {4, PGPR, PMEM, O("\x8B"), EN_RM}, /* MOV r32, m32 */ - {4, PGPR, PGPR, O("\x8B"), EN_RR}, /* MOV r32, r32 */ -) -DEFINSTR2(Xmovsxw, - {4|8, PGPR, PMEM, O("\x0F\xBF"), EN_RM}, /* MOVSX r64, m16 */ - {4|8, PGPR, PGPR, O("\x0F\xBF"), EN_RR}, /* MOVSX r64, r16 */ -) -DEFINSTR2(Xmovsxb, - {4|8, PGPR, PMEM, O("\x0F\xBE"), EN_RM}, /* MOVSX r64, m8 */ - {4|8, PGPR, PGPR, O("\x0F\xBE"), EN_RR, .r8=1}, /* MOVSX r64, r8 */ -) -DEFINSTR2(Xmovzxw, - {4|8, PGPR, PMEM, O("\x0F\xB7"), EN_RM}, /* MOVZX r64, m16 */ - {4|8, PGPR, PGPR, O("\x0F\xB7"), EN_RR}, /* MOVZX r64, r16 */ -) -DEFINSTR2(Xmovzxb, - {4|8, PGPR, PMEM, O("\x0F\xB6"), EN_RM}, /* MOVZX r64, m8 */ - {4|8, PGPR, PGPR, O("\x0F\xB6"), EN_RR, .r8=1}, /* MOVZX r64, r8 */ -) -DEFINSTR2(Xmovaps, - {-1, PMEM, PFPR, O("\x0F\x29"), EN_MR}, /* MOVAPS mem, xmm */ -) -DEFINSTR2(Xxchg, - {4|8, PGPR, PGPR, O("\x87"), EN_RR}, /* XCHG r32/64, r32/64 */ - {4|8, PGPR, PMEM, O("\x87"), EN_RM}, /* XCHG r32/64, m32/64 */ - {4|8, PMEM, PGPR, O("\x87"), EN_MR}, /* XCHG r32/64, m32/64 */ -) -DEFINSTR2(Xlea, - {4|8, PGPR, PMEM, O("\x8D"), EN_RM}, /* LEA r32/64,m32/64 */ - { 8, PGPR, PSYM, O("\x8D"), EN_RM}, /* LEA rel32 */ -) -DEFINSTR2(Xadd, - {4|8, PGPR, PGPR, O("\x03"), EN_RR}, /* ADD r32/64, r32/64 */ - {4|8, PGPR, P1, O("\xFF"), EN_R, .ext=0}, /* INC r32/64 */ - {4|8, PGPR, PN1, O("\xFF"), EN_R, .ext=1}, /* DEC r32/64 */ - {4|8, PGPR, PI8, O("\x83"), EN_RI8}, /* ADD r32/64, imm8 */ - {4|8, PRAX, PI32, O("\x05"), EN_I32}, /* ADD eax/rax, imm */ - {4|8, PGPR, PI32, O("\x81"), EN_RI32}, /* ADD r32/64, imm */ - { 8, PGPR, PMEM, O("\x03"), EN_RM}, /* ADD r64, m64 */ -) -DEFINSTR2(Xaddf, - {4, PFPR, PFPR, O("\xF3\x0F\x58"), EN_RR}, /* ADDSS xmm, xmm */ - {8, PFPR, PFPR, O("\xF2\x0F\x58"), EN_RR}, /* ADDSD xmm, xmm */ - {4, PFPR, PMEM, O("\xF3\x0F\x58"), EN_RM}, /* ADDSS xmm, m32 */ - {8, PFPR, PMEM, O("\xF2\x0F\x58"), EN_RM}, /* ADDSD xmm, m64 */ -) -DEFINSTR2(Xsub, - {4|8, PGPR, PGPR, O("\x2B"), EN_RR}, /* SUB r32/64, r32/64 */ - {4|8, PGPR, P1, O("\xFF"), EN_R, .ext=1}, /* DEC r32/64 */ - {4|8, PGPR, PN1, O("\xFF"), EN_R, .ext=0}, /* INC r32/64 */ - {4|8, PGPR, PI8, O("\x83"), EN_RI8, .ext=5}, /* SUB r32/64, imm8 */ - {4|8, PRAX, PI32, O("\x2D"), EN_I32}, /* SUB eax/rax, imm */ - {4|8, PGPR, PI32, O("\x81"), EN_RI32, .ext=5}, /* SUB r32/64, imm */ - { 8, PGPR, PMEM, O("\x2B"), EN_RM}, /* SUB r64, m64 */ -) -DEFINSTR2(Xsubf, - {4, PFPR, PFPR, O("\xF3\x0F\x5C"), EN_RR}, /* SUBSS xmm, xmm */ - {8, PFPR, PFPR, O("\xF2\x0F\x5C"), EN_RR}, /* SUBSD xmm, xmm */ - {4, PFPR, PMEM, O("\xF3\x0F\x5C"), EN_RM}, /* SUBSS xmm, m32 */ - {8, PFPR, PMEM, O("\xF2\x0F\x5C"), EN_RM}, /* SUBSD xmm, m64 */ -) -DEFINSTR2(Xmulf, - {4, PFPR, PFPR, O("\xF3\x0F\x59"), EN_RR}, /* MULSS xmm, xmm */ - {8, PFPR, PFPR, O("\xF2\x0F\x59"), EN_RR}, /* MULSD xmm, xmm */ - {4, PFPR, PMEM, O("\xF3\x0F\x59"), EN_RM}, /* MULSS xmm, m32 */ - {8, PFPR, PMEM, O("\xF2\x0F\x59"), EN_RM}, /* MULSD xmm, m64 */ -) -DEFINSTR2(Xdivf, - {4, PFPR, PFPR, O("\xF3\x0F\x5E"), EN_RR}, /* DIVSS xmm, xmm */ - {8, PFPR, PFPR, O("\xF2\x0F\x5E"), EN_RR}, /* DIVSD xmm, xmm */ - {4, PFPR, PMEM, O("\xF3\x0F\x5E"), EN_RM}, /* DIVSS xmm, m32 */ - {8, PFPR, PMEM, O("\xF2\x0F\x5E"), EN_RM}, /* DIVSD xmm, m64 */ -) -DEFINSTR2(Xand, - {4|8, PGPR, PGPR, O("\x23"), EN_RR}, /* AND r32/64, r32/64 */ - {4|8, PGPR, PI8, O("\x83"), EN_RI8, .ext=4}, /* AND r32/64, imm8 */ - {4|8, PRAX, PI32, O("\x25"), EN_I32}, /* AND eax/rax, imm */ - {4|8, PGPR, PI32, O("\x81"), EN_RI32, .ext=4}, /* AND r32/64, imm */ - { 8, PGPR, PMEM, O("\x23"), EN_RM}, /* AND r64, m64 */ -) -DEFINSTR2(Xior, - {4|8, PGPR, PGPR, O("\x0B"), EN_RR}, /* OR r32/64, r32/64 */ - {4|8, PGPR, PI8, O("\x83"), EN_RI8, .ext=1}, /* OR r32/64, imm8 */ - {4|8, PRAX, PI32, O("\x0D"), EN_I32}, /* OR eax/rax, imm */ - {4|8, PGPR, PI32, O("\x81"), EN_RI32, .ext=1}, /* OR r32/64, imm */ - { 8, PGPR, PMEM, O("\x0B"), EN_RM}, /* OR r64, m64 */ - {4|8, PFPR, PFPR, O("\x0F\x57"), EN_RR}, /* ORPS xmm, xmm */ -) -DEFINSTR2(Xxor, - {4|8, PGPR, PGPR, O("\x33"), EN_RR}, /* XOR r32/64, r32/64 */ - {4|8, PGPR, PI8, O("\x83"), EN_RI8, .ext=6}, /* XOR r32/64, imm8 */ - {4|8, PRAX, PI32, O("\x35"), EN_I32}, /* XOR eax/rax, imm */ - {4|8, PGPR, PI32, O("\x81"), EN_RI32, .ext=6}, /* XOR r32/64, imm */ - { 8, PGPR, PMEM, O("\x33"), EN_RM}, /* XOR r64, m64 */ - {4|8, PFPR, PFPR, O("\x0F\x57"), EN_RR}, /* XORPS xmm, xmm */ - {4|8, PFPR, PMEM, O("\x0F\x57"), EN_RM}, /* XORPS xmm, m128 */ -) -DEFINSTR2(Xshl, - {4|8, PGPR, P1, O("\xD1"), EN_R, .ext=4}, /* SHL r32/64, 1 */ - {4|8, PGPR, PI32, O("\xC1"), EN_RI8, .ext=4}, /* SHL r32/64, imm */ - {4|8, PGPR, PRCX, O("\xD3"), EN_R, .ext=4}, /* SHL r32/64, CL */ -) -DEFINSTR2(Xsar, - {4|8, PGPR, P1, O("\xD1"), EN_R, .ext=7}, /* SAR r32/64, 1 */ - {4|8, PGPR, PI32, O("\xC1"), EN_RI8, .ext=7}, /* SAR r32/64, imm */ - {4|8, PGPR, PRCX, O("\xD3"), EN_R, .ext=7}, /* SAR r32/64, CL */ -) -DEFINSTR2(Xshr, - {4|8, PGPR, P1, O("\xD1"), EN_R, .ext=5}, /* SHR r32/64, 1 */ - {4|8, PGPR, PI32, O("\xC1"), EN_RI8, .ext=5}, /* SHR r32/64, imm */ - {4|8, PGPR, PRCX, O("\xD3"), EN_R, .ext=5}, /* SHR r32/64, CL */ -) -DEFINSTR2(Xcvtss2sd, - {-1, PFPR, PFPR, O("\xF3\x0F\x5A"), EN_RR}, /* CVTSS2SD xmm, xmm */ - {-1, PFPR, PMEM, O("\xF3\x0F\x5A"), EN_RM}, /* CVTSS2SD xmm, m32/64 */ -) -DEFINSTR2(Xcvtsd2ss, - {-1, PFPR, PFPR, O("\xF2\x0F\x5A"), EN_RR}, /* CVTSD2SS xmm, xmm */ - {-1, PFPR, PMEM, O("\xF2\x0F\x5A"), EN_RM}, /* CVTSD2SS xmm, m32/64 */ -) -DEFINSTR2(Xcvtsi2ss, - {-1, PFPR, PGPR, O("\xF3\x0F\x2A"), EN_RR}, /* CVTSI2SS xmm, r32/64 */ - {-1, PFPR, PMEM, O("\xF3\x0F\x2A"), EN_RM}, /* CVTSI2SS xmm, m32/64 */ -) -DEFINSTR2(Xcvtsi2sd, - {-1, PFPR, PGPR, O("\xF2\x0F\x2A"), EN_RR}, /* CVTSI2SD xmm, r32/64 */ - {-1, PFPR, PMEM, O("\xF2\x0F\x2A"), EN_RM}, /* CVTSI2SD xmm, m32/64 */ -) -DEFINSTR2(Xcvttss2si, - {-1, PGPR, PFPR, O("\xF3\x0F\x2C"), EN_RR}, /* CVTTSS2SI r32/64, xmm */ - {-1, PGPR, PMEM, O("\xF3\x0F\x2C"), EN_RM}, /* CVTTSS2SI r32/64, m32 */ -) -DEFINSTR2(Xcvttsd2si, - {-1, PGPR, PFPR, O("\xF2\x0F\x2C"), EN_RR}, /* CVTTSD2SI r32/64, xmm */ - {-1, PGPR, PMEM, O("\xF2\x0F\x2C"), EN_RM}, /* CVTTSD2SI r32/64, m32 */ -) -DEFINSTR1(Xneg, - {4|8, PGPR, 0, O("\xF7"), EN_R, .ext=3} /* NEG r32/64 */ -) -DEFINSTR1(Xnot, - {4|8, PGPR, 0, O("\xF7"), EN_R, .ext=2} /* NOT r32/64 */ -) -DEFINSTR1(Xidiv, - {4|8, PGPR, 0, O("\xF7"), EN_R, .ext=7}, /* IDIV r32/64 */ - {4|8, PMEM, 0, O("\xF7"), EN_M, .ext=7}, /* IDIV m32/64 */ -) -DEFINSTR1(Xdiv, - {4|8, PGPR, 0, O("\xF7"), EN_R, .ext=6}, /* DIV r32/64 */ - {4|8, PMEM, 0, O("\xF7"), EN_M, .ext=6}, /* DIV m32/64 */ -) -DEFINSTR1(Xcall, - {-1, PSYM, 0, O("\xE8"), EN_R32, .norexw=1}, /* CALL rel32 */ - {-1, PGPR, 0, O("\xFF"), EN_R, .ext=2, .norexw=1}, /* CALL r64 */ - {-1, PMEM, 0, O("\xFF"), EN_M, .ext=2, .norexw=1}, /* CALL m64 */ -) -DEFINSTR2(Xcmp, - {4|8, PGPR, PGPR, O("\x3B"), EN_RR}, /* CMP r32/64, r32/64 */ - {4|8, PGPR, PI8, O("\x83"), EN_RI8, .ext=7}, /* CMP r32/64, imm8 */ - {4|8, PRAX, PI32, O("\x3D"), EN_I32}, /* CMP eax/rax, imm */ - {4|8, PGPR, PI32, O("\x81"), EN_RI32, .ext=7}, /* CMP r32/64, imm */ - { 8, PGPR, PMEM, O("\x3B"), EN_RM}, /* CMP r64, m64 */ - {4 , PFPR, PFPR, O("\x0F\x2E"), EN_RR}, /* UCOMISS xmm, xmm */ - {4 , PFPR, PMEM, O("\x0F\x2E"), EN_RM}, /* UCOMISS xmm, m32 */ - { 8, PFPR, PFPR, O("\x66\x0F\x2E"), EN_RR}, /* UCOMISD xmm, xmm */ - { 8, PFPR, PMEM, O("\x66\x0F\x2E"), EN_RM}, /* UCOMISD xmm, m64 */ -) -DEFINSTR2(Xtest, - {4|8, PRAX, PI8, O("\xA8"), EN_I8}, /* TEST AL, imm8 */ - {4, PRAX, PI32, O("\xA9"), EN_I32}, /* TEST EAX, imm32 */ - { 8, PRAX, PU32, O("\xA9"), EN_I32}, /* TEST EAX, imm32 */ - { 8, PRAX, PI32, O("\xA9"), EN_I32}, /* TEST RAX, imm32 */ - {4|8, PGPR, PI8, O("\xF6"), EN_RI8, .r8=1,.norexw=1}, /* TEST r8, imm8 */ - {4|8, PGPR, PI32, O("\xF7"), EN_RI32, .ext=0}, /* TEST r32/64, imm32 */ - {4|8, PGPR, PGPR, O("\x85"), EN_RR}, /* TEST r32/64, r32/64 */ - {4|8, PGPR, PMEM, O("\x85"), EN_RM}, /* TEST r32/64, m32/64 */ -) - -DEFINSTR2(Ximul2, - {4|8, PGPR, PGPR, O("\x0F\xAF"), EN_RR}, /* IMUL r32/64, r32/64 */ - {4|8, PGPR, PMEM, O("\x0F\xAF"), EN_RM}, /* IMUL r32/64, m32/64 */ -) -static const struct desc imul3_imm8tab[] = { - {4|8, PGPR, PGPR, O("\x6B"), EN_RR}, /* IMUL r32/64, r32/64, (imm8) */ - {4|8, PGPR, PMEM, O("\x6B"), EN_RM}, /* IMUL r32/64, m32/64, (imm8) */ -}, imul3_imm32tab[] = { - {4|8, PGPR, PGPR, O("\x69"), EN_RR}, /* IMUL r32/64, r32/64, (imm32) */ - {4|8, PGPR, PMEM, O("\x69"), EN_RM}, /* IMUL r32/64, m32/64, (imm32) */ -}; -#undef O -static void -Ximul(uchar **pcode, enum irclass k, struct oper dst, struct oper s1, struct oper s2) -{ - if (!memcmp(&dst, &s1, sizeof dst) && s2.t != OIMM) { - Ximul2(pcode, k, dst, s2); - return; - } - assert(s2.t == OIMM); - if ((uint)(s2.imm + 128) < 256) { - encode(pcode, imul3_imm8tab, countof(imul3_imm8tab), k, dst, s1); - B(s2.imm); - } else { - encode(pcode, imul3_imm32tab, countof(imul3_imm32tab), k, dst, s1); - I32(s2.imm); - } -} - -enum cc { - CCO = 0x0, /* OF = 1*/ - CCNO = 0x1, /* OF = 0*/ - CCB = 0x2, CCC = 0x2, CCNAE = 0x2, /* below; CF = 1; not above or equal */ - CCAE = 0x3, CCNB = 0x3, CCNC = 0x3, /* above or equal; not below; CF = 0 */ - CCE = 0x4, CCZ = 0x4, /* equal; ZF = 1 */ - CCNE = 0x5, CCNZ = 0x5, /* not equal; ZF = 0 */ - CCBE = 0x6, CCNA = 0x6, /* below or equal; not above; CF=1 or ZF=1 */ - CCA = 0x7, CCNBE = 0x7, /* above; not below or equal; CF=0 and ZF=0 */ - CCS = 0x8, /* ZS = 1; negative */ - CCNS = 0x9, /* ZS = 0; non-negative */ - CCP = 0xA, CCPE = 0xA, /* PF = 1; parity even */ - CCNP = 0xB, CCPO = 0xB, /* PF = 0; parity odd */ - CCL = 0xC, CCNGE = 0xC, /* lower; not greater or equal; SF != OF */ - CCGE = 0xD, CCNL = 0xD, /* greater or equal; not lower; SF == OF */ - CCLE = 0xE, CCNG = 0xE, /* less or equal; not greater; ZF=1 or SF != OF */ - CCG = 0xF, CCNLE = 0xF, /* greater; not less or equal; ZF=0 and SF = OF*/ - ALWAYS, -}; - -/* maps blk -> address when resolved; or to linked list of jump displacement - * relocations */ -static struct blkaddr { - bool resolved; - union { - uint addr; - uint relreloc; - }; -} *blkaddr; -static uint nblkaddr; - -static void -Xjcc(uchar **pcode, enum cc cc, struct block *dst) -{ - int disp, insaddr = *pcode - objout.textbegin; - bool rel8 = 0; - - if (blkaddr[dst->id].resolved) { - disp = blkaddr[dst->id].addr - (insaddr + 2); - if ((uint)(disp + 128) < 256) /* can use 1-byte displacement? */ - rel8 = 1; - else { /* otherwise 4-byte displacement */ - disp -= 3; - disp -= cc != ALWAYS; /* 'Jcc rel32' has 2 opcode bytes */ - } - } else { - disp = blkaddr[dst->id].relreloc; - blkaddr[dst->id].relreloc = insaddr + 1 + (cc != ALWAYS); - } - if (cc == ALWAYS) { - B(rel8 ? 0xEB : 0xE9); /* JMP rel8/rel32 */ - } else { - assert(in_range(cc, 0, 0xF)); - if (rel8) B(0x70 + cc); /* Jcc rel8 */ - else B(0x0F), B(0x80 + cc); /* Jcc rel32 */ - } - if (rel8) B(disp); else I32(disp); -} - -static void -Xsetcc(uchar **pcode, enum cc cc, enum reg reg) -{ - int rex = 0; - assert(in_range(cc, 0x0, 0xF)); - assert(in_range(reg, RAX, R15)); - - if (in_range(reg, RSP, RDI)) rex = 0x40; - rex |= (reg >> 3); /* REX.B */ - if (rex) B(rex | 0x40); - B(0x0F), B(0x90+cc); /* SETcc */ - B(0xC0 + (reg & 7)); /* ModR/M with mod=11, rm=reg */ -} - -static void -Xpush(uchar **pcode, enum reg reg) -{ - if (in_range(reg, RAX, R15)) { - if (reg >> 3) B(0x41); /* REX.B */ - B(0x50 + (reg & 7)); /* PUSH reg */ - } else { - assert(in_range(reg, XMM0, XMM15)); - DS("\x48\x8d\x64\x24\xF8"); /* LEA RSP, [RSP-8] */ - Xmov(pcode, KF64, mkoper(OMEM, .base = RSP, .index = NOINDEX), reg2oper(reg)); /* MOVD [rsp],xmm0 */ - } -} - -static void -Xpop(uchar **pcode, enum reg reg) -{ - if (in_range(reg, RAX, R15)) { - if (reg >> 3) B(0x41); /* REX.B */ - B(0x58 + (reg & 7)); /* POP reg */ - } else { - assert(in_range(reg, XMM0, XMM15)); - Xmov(pcode, KF64, reg2oper(reg), mkoper(OMEM, .base = RSP, .index = NOINDEX)); /* MOVD xmm0,[rsp] */ - DS("\x48\x8d\x64\x24\x08"); /* LEA RSP, [RSP+8] */ - } -} - -/* are flags live at given instruction? */ -static bool -flagslivep(struct block *blk, int curi) -{ - int cmpi; - /* conditional branch that references a previous comparison instruction? */ - if (blk->jmp.t != Jb || !blk->jmp.arg[0].bits) - return 0; - assert(blk->jmp.arg[0].t == RTMP); - cmpi = blk->jmp.arg[1].i; - for (int i = blk->ins.n - 1; i > curi; --i) { - if (blk->ins.p[i] == cmpi) - /* flags defined after given instruction, dead here */ - return 0; - } - /* flags defined before given instruction, live here */ - return 1; -} - -/* Copy dst = val, with some peephole optimizations */ -static void -gencopy(uchar **pcode, enum irclass cls, struct block *blk, int curi, struct oper dst, union ref val) -{ - assert(dst.t == OREG); - if (val.bits == UNDREF.bits) { - /* can be generated by ssa construction, since value is undefined no move is needed */ - return; - } - if (val.t == RADDR) { - /* this is a LEA, but maybe it can be lowered to a 2-address instruction, - * which may clobber flags */ - const struct addr *addr = &addrht[val.i]; - if (flagslivep(blk, curi)) goto Lea; - if (addr->base.t != RREG) goto Lea; - if (addr->base.bits && dst.reg == mkregoper(addr->base).reg) { /* base = dst */ - if (addr->index.bits && !addr->disp && !addr->shift){ - /* lea Rx, [Rx + Ry] -> add Rx, Ry */ - Xadd(pcode, cls, dst, mkregoper(addr->index)); - return; - } else if (!addr->index.bits) { - if (!addr->disp) /* lea Rx, [Rx] -> mov Rx, Rx */ - Xmov(pcode, cls, dst, dst); - else /* lea Rx, [Rx + Imm] -> add Rx, Imm */ - Xadd(pcode, cls, dst, mkoper(OIMM, .imm = addr->disp)); - return; - } - } else if (addr->index.bits && dst.reg == mkregoper(addr->index).reg) { /* index = dst */ - if (addr->base.bits && !addr->disp && !addr->shift) { - /* lea Rx, [Ry + Rx] -> add Rx, Ry */ - Xadd(pcode, cls, dst, mkregoper(addr->base)); - return; - } else if (!addr->base.bits) { - if (!addr->disp && !addr->shift) /* lea Rx, [Rx] -> mov Rx, Rx */ - Xmov(pcode, cls, dst, dst); - else if (!addr->shift) /* lea Rx, [Rx + Imm] -> add Rx, Imm */ - Xadd(pcode, cls, dst, mkoper(OIMM, .imm = addr->disp)); - else if (!addr->disp) /* lea Rx, [Rx LSL s] -> shl Rx, s */ - Xshl(pcode, cls, dst, mkoper(OIMM, .imm = addr->shift)); - else - goto Lea; - return; - } - } - /* normal (not 2-address) case */ - Lea: - if (isaddrcon(addr->base,0) && ccopt.pic) { - assert(!addr->disp && !addr->index.bits); - val = addr->base; - goto GOTLoad; - } - Xlea(pcode, cls, dst, ref2oper(val)); - } else if (val.bits == ZEROREF.bits && dst.t == OREG && (kisflt(cls) || !flagslivep(blk, curi))) { - /* dst = 0 -> xor dst, dst; but only if it is ok to clobber flags */ - Xxor(pcode, kisint(cls) ? KI32 : cls, dst, dst); - } else if (isaddrcon(val,0)) { - if (ccopt.pic) GOTLoad: - /* for mov reg, [rip(sym@GOTPCREL)] */ - Xmov(pcode, cls, dst, mkoper(OSYM, .con = val.i, .cindex = NOINDEX)); - else - /* for lea reg, [rip(sym)] */ - Xlea(pcode, cls, dst, mkoper(OSYM, .con = val.i, .cindex = NOINDEX)); - } else if (val.t == RXCON && in_range(concls(val), KI64, KPTR)) { - /* movabs */ - assert(dst.t == OREG && in_range(dst.reg, RAX, R15)); - B(0x48 | (dst.reg >> 3)); /* REX.W (+ REX.B) */ - B(0xB8 + (dst.reg & 0x7)); /* MOVABS r64, */ - wr64le(*pcode, intconval(val)); /* imm64 */ - *pcode += 8; - } else { - struct oper src = mkimmdatregoper(val); - if (memcmp(&dst, &src, sizeof dst) != 0) - Xmov(pcode, cls == KF64 && src.t == OREG && src.reg < XMM0 ? KI64 : cls, dst, src); - } -} - -static void -Xvaprologue(uchar **pcode, struct function *fn, struct oper sav) -{ - uint gpr0 = 0, fpr0 = 0, jmpaddr; - for (int i = 0; i < fn->nabiarg; ++i) { - struct abiarg abi = fn->abiarg[i]; - if (!abi.isstk) { - if (abi.reg < XMM0) ++gpr0; - else ++fpr0; - } - } - assert(sav.t == OMEM && sav.base == RBP); - /* save GPRS */ - for (int r = 0; r < 6; ++r) { - static const char reg[] = {RDI,RSI,RDX,RCX,R8,R9}; - if (r >= gpr0) - Xmov(pcode, KI64, sav, reg2oper(reg[r])); - sav.disp += 8; - } - - /* save FPRs, but only if al is non zero */ - if (fpr0 < 8) { - DS("\x84\xC0"); /* TEST al,al */ - jmpaddr = *pcode - objout.textbegin; - DS("\x74\xFE"); /* JE rel8 */ - } - for (int r = 0; r < 8; ++r) { - if (r >= fpr0) - Xmovaps(pcode, KF64, sav, reg2oper(XMM0 + r)); - sav.disp += 16; - } - if (fpr0 < 8) {/* patch relative jump */ - int off = (*pcode - objout.textbegin) - jmpaddr - 2; - objout.textbegin[jmpaddr+1] = off; - } -} - -/* condition code for CMP */ -static const uchar icmpop2cc[] = { - [Oequ] = CCE, [Oneq] = CCNE, - [Olth] = CCL, [Ogth] = CCG, [Olte] = CCLE, [Ogte] = CCGE, - [Oulth] = CCB, [Ougth] = CCA, [Oulte] = CCBE, [Ougte] = CCAE, - [Oand] = CCNE, [Osub] = CCNE, -}, fcmpop2cc[] = { - [Oequ] = CCE, [Oneq] = CCNE, - [Olth] = CCB, [Ogth] = CCA, [Olte] = CCBE, [Ogte] = CCAE, -}; -/* condition code for TEST reg,reg (compare with zero) */ -static const uchar icmpzero2cc[] = { - [Oequ] = CCE, [Oulte] = CCE, - [Oneq] = CCNE, [Ougth] = CCNE, - [Olth] = CCS, [Ogte] = CCNS, - [Olte] = CCLE, [Ogth] = CCG, - [Oulth] = CCB, [Ougte] = CCAE, /* actually constants */ -}; - -static void -emitinstr(uchar **pcode, struct function *fn, struct block *blk, int curi, struct instr *ins) -{ - struct oper dst, src; - bool regzeroed; - enum irclass cls = ins->cls; - void (*X)(uchar **, enum irclass, struct oper, struct oper) = NULL; - void (*X1)(uchar **, enum irclass, struct oper) = NULL; - - switch (ins->op) { - default: - fatal(NULL, "amd64: in %y; unimplemented instr '%s'", fn->name, opnames[ins->op]); - case Onop: break; - case Ostore8: cls = KI32, X = Xmovb; goto Store; - case Ostore16: cls = KI32, X = Xmovw; goto Store; - case Ostore32: cls = KI32, X = Xmov; goto Store; - case Ostore64: cls = KI64, X = Xmov; - Store: - src = mkimmregoper(ins->r); - if (cls == KI32 && src.t == OREG && src.reg >= XMM0) cls = KF32; - if (cls == KI64 && src.t == OREG && src.reg >= XMM0) cls = KF64; - X(pcode, cls, mkmemoper(ins->l), src); - break; - case Oexts8: src = mkregoper(ins->l); goto Movsxb; - case Oextu8: src = mkregoper(ins->l); goto Movzxb; - case Oexts16: src = mkregoper(ins->l); goto Movsxw; - case Oextu16: src = mkregoper(ins->l); goto Movzxw; - case Oexts32: src = mkregoper(ins->l); goto Movsxl; - case Oextu32: src = mkregoper(ins->l); goto Movzxl; - case Oloads8: src = mkmemoper(ins->l); Movsxb: Xmovsxb(pcode, cls, reg2oper(ins->reg-1), src); break; - case Oloadu8: src = mkmemoper(ins->l); Movzxb: Xmovzxb(pcode, cls, reg2oper(ins->reg-1), src); break; - case Oloads16: src = mkmemoper(ins->l); Movsxw: Xmovsxw(pcode, cls, reg2oper(ins->reg-1), src); break; - case Oloadu16: src = mkmemoper(ins->l); Movzxw: Xmovzxw(pcode, cls, reg2oper(ins->reg-1), src); break; - case Oloads32: src = mkmemoper(ins->l); Movsxl: Xmovsxl(pcode, cls, reg2oper(ins->reg-1), src); break; - case Oloadu32: src = mkmemoper(ins->l); Movzxl: Xmov(pcode, KI32, reg2oper(ins->reg-1), src); break; - case Oloadf32: case Oloadf64: Xmov(pcode, cls, reg2oper(ins->reg-1), mkmemoper(ins->l)); break; - case Oloadi64: Xmov(pcode, KI64, reg2oper(ins->reg-1), mkmemoper(ins->l)); break; - case Ocvtf32f64: X = Xcvtss2sd; goto FloatsCvt; - case Ocvtf64f32: X = Xcvtsd2ss; goto FloatsCvt; - case Ocvtf32s: X = Xcvttss2si; goto FloatsCvt; - case Ocvtf64s: X = Xcvttsd2si; goto FloatsCvt; - case Ocvts32f: X = cls == KF32 ? Xcvtsi2ss : Xcvtsi2sd; cls = KI32; goto FloatsCvt; - case Ocvts64f: X = cls == KF32 ? Xcvtsi2ss : Xcvtsi2sd; cls = KI64; goto FloatsCvt; - FloatsCvt: - X(pcode, cls, reg2oper(ins->reg-1), mkdatregoper(ins->l)); - break; - case Oadd: - dst = mkregoper(ins->l); - if (kisflt(cls)) { - Xaddf(pcode, cls, dst, mkimmdatregoper(ins->r)); - } else if (ins->reg-1 == dst.reg) { /* two-address add */ - src = ref2oper(ins->r); - if (src.t == OIMM && src.imm < 0) /* ADD -imm -> SUB imm, for niceness */ - Xsub(pcode, cls, dst, (src.imm = -src.imm, src)); - else - Xadd(pcode, cls, dst, src); - } else if (isregref(ins->r) && ins->reg-1 == mkregoper(ins->r).reg) { - /* also two-address after swapping operands */ - Xadd(pcode, cls, reg2oper(ins->reg-1), mkimmdatregoper(ins->l)); - } else { /* three-address add (lea) */ - struct oper mem = { OMEM, .base = NOBASE, .index = NOINDEX }; - dst = reg2oper(ins->reg-1); - addmemoper(&mem, ref2oper(ins->l)); - addmemoper(&mem, ref2oper(ins->r)); - Xlea(pcode, cls, dst, mem); - } - break; - case Osub: - dst = mkregoper(ins->l); - if (kisflt(cls)) { - Xsubf(pcode, cls, dst, mkimmdatregoper(ins->r)); - } else if (ins->reg-1 == dst.reg) { /* two-address */ - Xsub(pcode, cls, dst, ref2oper(ins->r)); - } else { - assert(isintcon(ins->r)); - Xlea(pcode, cls, reg2oper(ins->reg-1), - mkoper(OMEM, .base = mkregoper(ins->l).reg, .index = NOINDEX, .disp = -intconval(ins->r))); - } - break; - case Oshl: X = Xshl; goto ALU2; - case Osar: X = Xsar; goto ALU2; - case Oslr: X = Xshr; goto ALU2; - case Oand: - if (!ins->reg) { - Xtest(pcode, cls, mkregoper(ins->l), mkimmdatregoper(ins->r)); - break; - } - X = Xand; - goto ALU2; - case Oxor: X = Xxor; goto ALU2; - case Oior: X = Xior; goto ALU2; - ALU2: - dst = mkregoper(ins->l); - assert(ins->reg-1 == dst.reg); - X(pcode, cls, dst, mkimmdatregoper(ins->r)); - break; - case Oneg: X1 = Xneg; goto ALU1; - case Onot: X1 = Xnot; goto ALU1; - ALU1: - dst = mkregoper(ins->l); - assert(ins->reg-1 == dst.reg); - X1(pcode, cls, dst); - break; - case Omul: - if (kisint(cls)) - Ximul(pcode, cls, reg2oper(ins->reg-1), ref2oper(ins->l), ref2oper(ins->r)); - else - Xmulf(pcode, cls, reg2oper(ins->reg-1), ref2oper(ins->r)); - break; - case Odiv: - switch (cls) { - default: assert(0); - case KPTR: - case KI64: B(0x48); /* REX.W */ - case KI32: B(0x99); /* CDQ/CQO */ - assert(mkregoper(ins->l).reg == RAX); - Xidiv(pcode, cls, mkdatregoper(ins->r)); - break; - case KF32: case KF64: - Xdivf(pcode, cls, reg2oper(ins->reg-1), mkdatregoper(ins->r)); - break; - } - break; - case Oudiv: - DS("\x31\xD2"); /* XOR EDX,EDX */ - assert(mkregoper(ins->l).reg == RAX); - Xdiv(pcode, cls, mkdatregoper(ins->r)); - break; - case Oequ: case Oneq: - case Olth: case Ogth: case Olte: case Ogte: - case Oulth: case Ougth: case Oulte: case Ougte: - dst = mkregoper(ins->l); - src = ref2oper(ins->r); - regzeroed = 0; - if (ins->reg && dst.reg != ins->reg-1 && (src.t != OREG || src.reg != ins->reg-1)) { - /* can zero output reg before test instruction (differs from both inputs) */ - /* XXX this doesn't check if a source operand is an addr containing the register */ - struct oper dst = reg2oper(ins->reg-1); - Xxor(pcode, KI32, dst, dst); - regzeroed = 1; - } - if (kisint(ins->cls) && ins->r.bits == ZEROREF.bits) - Xtest(pcode, cls, dst, dst); - else - Xcmp(pcode, cls, dst, src); - if (ins->reg) { - enum cc cc; - dst = reg2oper(ins->reg-1); - if (ins->r.bits != ZEROREF.bits) { /* CMP */ - cc = (kisint(ins->cls) ? icmpop2cc : fcmpop2cc)[ins->op]; - } else { /* TEST r,r (CMP r, 0) */ - assert(kisint(ins->cls)); - cc = icmpzero2cc[ins->op]; - } - if (kisflt(ins->cls)) { /* handle float unordered result */ - int unordres = ins->op == Oneq ? 1 : 0; - int rex = 0; - if (in_range(dst.reg, RSP, RDI)) rex = 0x40; - rex |= (dst.reg >> 3); /* REX.B */ - int jpoff = 3 + (rex != 0); - if (regzeroed && unordres == 0) { - /* if cmp unordered, just jump over the SETcc; result reg was already zeroed */ - B(0x7A), B(jpoff); /* JP */ - } else { - /* JNP .a - * MOV r8, 0/1 - * JMP .b - * .a: SETcc r8 - * .b: MOVZX r, r8 - */ - B(0x7B), B(jpoff+1); /* JNP */ - if (rex) B(rex | 0x40); - B(0xB0 + (dst.reg & 7)), B(unordres); /* MOV r8, 0/1 */ - B(0xEB), B(jpoff); /* JMP */ - } - } - Xsetcc(pcode, cc, dst.reg); - if (!regzeroed) - Xmovzxb(pcode, KI32, dst, dst); - } - break; - case Omove: - dst = ref2oper(ins->l); - gencopy(pcode, cls, blk, curi, dst, ins->r); - break; - case Ocopy: - dst = reg2oper(ins->reg-1); - gencopy(pcode, cls, blk, curi, dst, ins->l); - break; - case Oswap: - if (kisint(cls)) - Xxchg(pcode, cls, ref2oper(ins->l), mkregoper(ins->r)); - else { - struct oper l = mkregoper(ins->l), r = mkregoper(ins->r); - Xxor(pcode, cls, l, r); - Xxor(pcode, cls, r, l); - Xxor(pcode, cls, l, r); - } - break; - case Oxsave: - Xpush(pcode, mkregoper(ins->l).reg); - break; - case Oxrestore: - Xpop(pcode, mkregoper(ins->l).reg); - break; - case Ocall: - if (calltab.p[ins->r.i].vararg >= 0) { - struct call *call = &calltab.p[ins->r.i]; - /* variadic functions need the caller to write num of args in sse regs to %al */ - int n = 0; - for (int i = 0; i < call->narg; ++i) - if (!call->abiarg[i].isstk && call->abiarg[i].reg >= XMM0) - ++n; - if (!n) DS("\x31\xC0"); /* XOR EAX, EAX */ - else B(0xB0), B(n); /* MOV AL, n */ - } - Xcall(pcode, KPTR, ref2oper(ins->l)); - break; - case Oxvaprologue: - Xvaprologue(pcode, fn, mkmemoper(ins->l)); - break; - } -} - -static void -emitbranch(uchar **pcode, struct block *blk) -{ - enum cc cc = ALWAYS; - assert(blk->s1); - if (blk->s2) { - /* conditional branch.. */ - union ref arg = blk->jmp.arg[0]; - struct block *unord = NULL; - assert(arg.t == RTMP); - struct instr *ins = &instrtab[arg.i]; - if ((oiscmp(ins->op) || ins->op == Oand || ins->op == Osub)) { - if (ins->r.bits != ZEROREF.bits) { - /* for CMP instr */ - cc = (kisint(ins->cls) ? icmpop2cc : fcmpop2cc)[ins->op]; - unord = ins->op == Oneq ? blk->s1 : blk->s2; - } else { - assert(kisint(ins->cls)); - /* for TEST instr, which modifies ZF and SF and sets CF = OF = 0 */ - cc = icmpzero2cc[ins->op]; - } - } else { - /* implicit by ZF */ - cc = CCNZ; - } - if (kisflt(ins->cls)) { - /* handle float unordered result */ - Xjcc(pcode, CCP, unord); - } - if (blk->s1 == blk->lnext) { - /* if s1 is next adjacent block, swap s1,s2 and flip condition to emit a - * single jump */ - struct block *tmp = blk->s1; - blk->s1 = blk->s2; - blk->s2 = tmp; - cc ^= 1; - } - } - /* make sure to fallthru if jumping to next adjacent block */ - if (blk->s2 || blk->s1 != blk->lnext) - Xjcc(pcode, cc, blk->s1); - if (blk->s2 && blk->s2 != blk->lnext) - Xjcc(pcode, ALWAYS, blk->s2); -} - -static bool -calleesave(int *npush, uchar **pcode, struct function *fn) -{ - bool any = 0; - if (rstest(fn->regusage, RBX)) { - Xpush(pcode, RBX); - ++*npush; - any = 1; - } - for (int r = R12; r <= R15; ++r) - if (rstest(fn->regusage, r)) { - Xpush(pcode, r); - ++*npush; - any = 1; - } - return any; -} - -static void -calleerestore(uchar **pcode, struct function *fn) -{ - for (int r = R15; r >= R12; --r) - if (rstest(fn->regusage, r)) - Xpop(pcode, r); - if (rstest(fn->regusage, RBX)) Xpop(pcode, RBX); -} - -/* align code using NOPs */ -static void -nops(uchar **pcode, int align) -{ - int rem; - while ((rem = (*pcode - objout.textbegin) & (align - 1)) != 0) { - switch (align - rem) { - case 15: case 14: case 13: case 12: case 11: case 10: - case 9: B(0x66); - case 8: DS("\x0f\x1f\x84\x00\x00\x00\x00\x00"); break; - case 7: DS("\x0f\x1f\x80\x00\x00\x00\x00"); break; - case 6: B(0x66); - case 5: DS("\x0f\x1f\x44\x00\x00"); break; - case 4: DS("\x0f\x1f\x40\x00"); break; - case 3: DS("\x0f\x1f\00"); break; - case 2: B(0x66); - case 1: B(0x90); break; - } - } -} - -static void -emitbin(struct function *fn) -{ - struct block *blk; - uchar **pcode = &objout.code; - int npush = 0; - uint epilogueaddr = 0; - bool saverestore; - - if (nblkaddr < fn->nblk) { - blkaddr = xrealloc(blkaddr, fn->nblk * sizeof *blkaddr); - nblkaddr = fn->nblk; - } - memset(blkaddr, 0, nblkaddr * sizeof *blkaddr); - - nops(pcode, 16); - fnstart = *pcode; - curfnsym = fn->name; - - /** prologue **/ - - /* only use frame pointer in non-leaf functions and functions that use the stack */ - usebp = 0; - if (!fn->isleaf || fn->stksiz) { - usebp = 1; - /* push rbp; mov rbp, rsp */ - DS("\x55\x48\x89\xE5"); - } - saverestore = calleesave(&npush, pcode, fn); - if (usebp) rbpoff = -npush*8; - - /* ensure stack is 16-byte aligned for function calls */ - if (!fn->isleaf && ((fn->stksiz + npush*8) & 0xF) != 0) { - assert(usebp); - if ((rbpoff & 0xF) == 0) { - rbpoff -= 16; - fn->stksiz += 24; - } else { - rbpoff -= 8; - fn->stksiz += 8; - } - } - - if (fn->stksiz != 0) { - /* sub rsp, */ - if (fn->stksiz < 128) - DS("\x48\x83\xEC"), B(fn->stksiz); - else if (fn->stksiz == 128) - DS("\x48\x83\xC4\x80"); /* add rsp, -128 */ - else - DS("\x48\x81\xEC"), I32(fn->stksiz); - } - - blk = fn->entry; - do { - struct blkaddr *bb = &blkaddr[blk->id]; - uint bbaddr = *pcode - objout.textbegin; - assert(!bb->resolved); - while (bb->relreloc) { - uint next; - int disp = bbaddr - bb->relreloc - 4; - - memcpy(&next, objout.textbegin + bb->relreloc, 4); - wr32le(objout.textbegin + bb->relreloc, disp); - bb->relreloc = next; - } - bb->resolved = 1; - bb->addr = bbaddr; - - for (int i = 0; i < blk->ins.n; ++i) { - emitinstr(pcode, fn, blk, i, &instrtab[blk->ins.p[i]]); - } - if (blk->jmp.t == Jret) { - /* epilogue */ - uint here = *pcode - fnstart; - if (epilogueaddr) { - int disp = epilogueaddr - (here + 2); - if ((uint)(disp + 128) < 256) {/* can use 1-byte displacement? */ - B(0xEB), B(disp); /* JMP rel8 */ - } else { - B(0xE9), I32(disp - 3); /* JMP rel32 */ - } - } else { - if (fn->stksiz && (saverestore || !usebp)) - Xadd(pcode, KPTR, mkoper(OREG, .reg = RSP), mkoper(OIMM, .imm = fn->stksiz)); - if (saverestore) { - epilogueaddr = here; - calleerestore(pcode, fn); - } - if (usebp) B(0xC9); /* leave */ - B(0xC3); /* ret */ - } - } else if (blk->jmp.t == Jtrap) { - DS("\x0F\x0B"); /* UD2 */ - } else emitbranch(pcode, blk); - } while ((blk = blk->lnext) != fn->entry); - objdeffunc(fn->name, fn->globl, fnstart - objout.textbegin, *pcode - fnstart); -} - -void -amd64_emit(struct function *fn) -{ - fn->stksiz = alignup(fn->stksiz, 8); - if (fn->stksiz > 1<<24) error(NULL, "'%s' stack frame too big", fn->name); - emitbin(fn); -} - -/* vim:set ts=3 sw=3 expandtab: */ diff --git a/amd64/isel.c b/amd64/isel.c deleted file mode 100644 index a0c913c..0000000 --- a/amd64/isel.c +++ /dev/null @@ -1,660 +0,0 @@ -#include "all.h" -#include "../endian.h" - -enum flag { - ZF = 1 << 0, - SF = 1 << 1, - CF = 1 << 2, - OF = 1 << 3, - CLOBF = 1 << 4, -}; - -/* flags modified by each integer op */ -static const uchar opflags[NOPER] = { - [Oneg] = ZF|CLOBF, - [Oadd] = ZF|CLOBF, - [Osub] = ZF|CLOBF, - [Omul] = CLOBF, - [Odiv] = CLOBF, - [Oudiv] = CLOBF, - [Orem] = CLOBF, - [Ourem] = CLOBF, - [Oand] = ZF|CLOBF, - [Oior] = ZF|CLOBF, - [Oxor] = ZF|CLOBF, - [Oshl] = ZF|CLOBF, - [Osar] = ZF|CLOBF, - [Oslr] = ZF|CLOBF, - [Oequ] = ZF|CLOBF, - [Oneq] = ZF|CLOBF, - [Olth] = ZF|CLOBF, - [Ogth] = ZF|CLOBF, - [Olte] = ZF|CLOBF, - [Ogte] = ZF|CLOBF, - [Oulth] = ZF|CLOBF, - [Ougth] = ZF|CLOBF, - [Oulte] = ZF|CLOBF, - [Ougte] = ZF|CLOBF, - [Ocall] = CLOBF, -}; - -static int iflagsrc = -1; - -static void -picfixsym(union ref *r, struct block *blk, int *curi) -{ - if (!ccopt.pic || !isaddrcon(*r,0)) return; - *r = insertinstr(blk, (*curi)++, mkinstr(Ocopy, KPTR, .l = *r)); -} - -/* map alloca tmp -> stack frame displacement (0 if not alloca) */ -static ushort *stkslots; -static uint nstkslots; - -#define isstkslot(r) ((r).t == RTMP && (r).i < nstkslots && stkslots[(r).i]) - -static void -fixarg(union ref *r, struct instr *ins, struct block *blk, int *curi) -{ - int sh; - enum op op = ins ? ins->op : 0; - - if (r->t == RXCON) { - struct xcon *con = &conht[r->i]; - if (in_range(op, Oshl, Oslr) && r == &ins->r) { - sh = con->i; - goto ShiftImm; - } else if (in_range(op, Oadd, Osub) && con->i == 2147483648 && r == &ins->r) { - /* add X, INT32MAX+1 -> sub X, INT32MIN */ - ins->op = Oadd + (op == Oadd); - *r = mkintcon(KI32, -2147483648); - } else if (kisflt(con->cls) && con->i == 0) { - /* copy of positive float zero -> regular zero, that emit() will turn into xor x,x */ - if (in_range(op, Ocopy, Omove) || op == Ophi) - *r = ZEROREF; - else - *r = insertinstr(blk, (*curi)++, mkinstr(Ocopy, con->cls, ZEROREF)); - } else if (con->cls >= KI64) { - /* float immediates & 64bit immediates are loaded from memory */ - uchar data[8]; - uint ksiz = cls2siz[con->cls]; - union type ctype; - /* can't use memory arg in rhs if lhs is memory */ - bool docopy = &ins->l != r && (oisstore(ins->op) || ins->l.t == RADDR); - if (con->cls <= KPTR && in_range(ins->op, Ocopy, Omove)) /* in this case we can use movabs */ - return; - else if (!docopy || con->cls >= KF32) { - if (con->cls != KF32) { - wr64le(data, con->i); - ctype = mktype(con->cls == KF64 ? TYDOUBLE : TYVLONG); - } else { - union { float f; int i; } pun = { con->f }; - wr32le(data, pun.i); - ctype = mktype(TYFLOAT); - } - *r = mkdatref(NULL, ctype, ksiz, /*align*/ksiz, data, ksiz, /*deref*/1); - } - if (docopy) - *r = insertinstr(blk, (*curi)++, mkinstr(Ocopy, con->cls, *r)); - } else if (ins->op != Omove && con->issym && r == &ins->r) { - *r = insertinstr(blk, (*curi)++, mkinstr(Ocopy, KPTR, mkaddr((struct addr){*r}))); - } else if (in_range(op, Odiv, Ourem) && kisint(ins->cls)) - goto DivImm; - } else if (r->t == RICON && in_range(op, Odiv, Ourem) && kisint(ins->cls) && r == &ins->r) { - DivImm: /* there is no division by immediate, must be copied to a register */ - *r = insertinstr(blk, (*curi)++, mkinstr(Ocopy, ins->cls, *r)); - } else if (r->t == RICON && in_range(op, Oshl, Oslr) && r == &ins->r) { - sh = r->i; - ShiftImm: /* shift immediate is always 8bit */ - *r = mkref(RICON, sh & 255); - } else if (isstkslot(*r)) { - struct instr adr = mkinstr(Oadd, KPTR, mkref(RREG, RBP), mkintcon(KI32, -stkslots[r->i])); - if (in_range(op, Ocopy, Omove)) - *ins = adr; - else - *r = insertinstr(blk, (*curi)++, adr); - } - picfixsym(r, blk, curi); -} - -#define isimm32(r) (iscon(r) && concls(r) == KI32) - -static void -selcall(struct function *fn, struct instr *ins, struct block *blk, int *curi) -{ - const struct call *call = &calltab.p[ins->r.i]; - int iarg = *curi - 1; - enum irclass cls; - uint argstksiz = alignup(call->argstksiz, 16); - - for (int i = call->narg - 1; i >= 0; --i) { - struct abiarg abi = call->abiarg[i]; - struct instr *arg; - for (;; --iarg) { - assert(iarg >= 0 && i >= 0 && "arg?"); - if ((arg = &instrtab[blk->ins.p[iarg]])->op == Oarg) - break; - } - - if (!abi.isstk) { - assert(!abi.ty.isagg); - *arg = mkinstr(Omove, call->abiarg[i].ty.cls, mkref(RREG, abi.reg), arg->r); - } else { - union ref adr = mkaddr((struct addr){mkref(RREG, RSP), .disp = abi.stk}); - int iargsave = iarg; - if (!abi.ty.isagg) { /* scalar arg in stack */ - *arg = mkinstr(Ostore8+ilog2(cls2siz[abi.ty.cls]), 0, adr, arg->r); - if (isaddrcon(arg->r,1) || arg->r.t == RADDR) - arg->r = insertinstr(blk, iarg++, mkinstr(Ocopy, abi.ty.cls, arg->r)); - else - fixarg(&ins->r, ins, blk, &iarg); - } else { /* aggregate arg in stack, callee stack frame destination address */ - *arg = mkinstr(Ocopy, KPTR, adr); - } - *curi += iarg - iargsave; - } - } - if (call->argstksiz) { - union ref disp = mkref(RICON, argstksiz); - insertinstr(blk, iarg--, (struct instr){Osub, KPTR, .keep=1, .reg = RSP+1, .l=mkref(RREG,RSP), disp}); - ++*curi; - insertinstr(blk, *curi+1, (struct instr){Oadd, KPTR, .keep=1, .reg = RSP+1, .l=mkref(RREG,RSP), disp}); - } - if (isimm32(ins->l)) - ins->l = mkaddr((struct addr){.base = ins->l}); - else if (isintcon(ins->l)) - ins->l = insertinstr(blk, (*curi)++, mkinstr(Ocopy, KPTR, ins->l)); - - if (call->vararg >= 0 && ins->l.t == RTMP) { - /* variadic calls write number of sse regs used to AL, so mark it as clobbered such that - * the function pointer of an indirect calls does not get allocated to RAX by regalloc */ - insertinstr(blk, (*curi)++, mkinstr(Omove, KPTR, mkref(RREG, RAX), mkref(RREG, RAX))); - } - cls = ins->cls; - ins->cls = 0; - if (cls) { - /* duplicate to reuse same TMP ref */ - insertinstr(blk, (*curi)++, *ins); - *ins = mkinstr(Ocopy, cls, mkref(RREG, call->abiret[0].reg)); - for (int i = 1; i <= 2; ++i) { - if (*curi + i >= blk->ins.n) break; - if (instrtab[blk->ins.p[*curi + i]].op == Ocall2r) { - ins = &instrtab[blk->ins.p[*curi += i]]; - *ins = mkinstr(Ocopy, ins->cls, mkref(RREG, call->abiret[1].reg)); - break; - } - } - } -} - -static bool -aimm(struct addr *addr, int disp) -{ - vlong a = addr->disp; - a += disp; - if ((int)a == a) { - addr->disp = a; - return 1; - } - return 0; -} - -static bool -acon(struct addr *addr, union ref r) -{ - vlong a = addr->disp; - assert(isintcon(r)); - a += intconval(r); - if ((int)a == a) { - addr->disp = a; - return 1; - } - return 0; -} - -static bool -ascale(struct addr *addr, union ref a, union ref b) -{ - if (b.t != RICON) return 0; - if (addr->index.bits) return 0; - if ((unsigned)b.i > 3) return 0; - if (a.t == RREG) { - Scaled: - addr->index = a; - addr->shift = b.i; - return 1; - } else if (a.t == RTMP) { - struct instr *ins = &instrtab[a.i]; - /* factor out shifted immediate from 'shl {add %x, imm}, s' */ - /* XXX maybe we shouldn't do this here because it should be done by a generic - * arithemetic optimization pass ? */ - if (ins->op == Oadd && (ins->l.t == RREG || ins->l.t == RTMP) && isintcon(ins->r)) { - vlong a = ((vlong) addr->disp + intconval(ins->r)) * (1 << b.i); - if (a != (int) a) return 0; - addr->disp = a; - addr->index = ins->l; - addr->shift = b.i; - return 1; - } else { - goto Scaled; - } - } - return 0; -} - -static bool -aadd(struct addr *addr, struct block *blk, int *curi, union ref r) -{ - if (isstkslot(r)) { - if (addr->base.bits || !aimm(addr, -stkslots[r.i])) goto Ref; - addr->base = mkref(RREG, RBP); - } else if (r.t == RTMP) { - struct instr *ins = &instrtab[r.i]; - if (ins->op == Oadd) { - if (!aadd(addr, blk, curi, ins->l)) goto Ref; - if (!aadd(addr, blk, curi, ins->r)) goto Ref; - ins->skip = 1; - } else if (ins->op == Oshl) { - if (!ascale(addr, ins->l, ins->r)) goto Ref; - ins->skip = 1; - } else if (ins->op == Ocopy && ins->l.t == RADDR) { - struct addr save = *addr, *addr2 = &addrht[ins->l.i]; - if ((!addr2->base.bits || aadd(addr, blk, curi, addr2->base)) - && aimm(addr, addr2->disp) - && (!addr2->index.bits || ascale(addr, addr2->index, mkref(RICON, addr2->shift)))) - { - ins->skip = 1; - } else { - *addr = save; - goto Ref; - } - } else if (ins->op == Ocopy) { - if (!aadd(addr, blk, curi, ins->l)) goto Ref; - ins->skip = 1; - } else goto Ref; - } else if (isnumcon(r)) { - return acon(addr, r); - } else if (isaddrcon(r,1)) { - if (!addr->base.bits && !isaddrcon(addr->index,1)) addr->base = r; - else return 0; - } else if (r.t == RREG) { - /* temporaries are single assignment, but register aren't, so they can't be * - * safely hoisted into an address value, unless they have global lifetime */ - if (!rstest(mctarg->rglob, r.i)) return 0; - Ref: - if (isstkslot(r) && (addr->base.bits || addr->index.bits)) { - r = insertinstr(blk, (*curi)++, mkinstr(Oadd, KPTR, mkref(RREG, RBP), mkref(RICON, -stkslots[r.i]))); - } - if (!addr->base.bits) addr->base = r; - else if (!addr->index.bits) addr->index = r; - else return 0; - } else return 0; - return 1; -} - -static bool -fuseaddr(union ref *r, struct block *blk, int *curi) -{ - struct addr addr = { 0 }; - - if (isaddrcon(*r,1)) return 1; - if (r->t == RADDR) { - const struct addr *a0 = &addrht[r->i]; - if (aadd(&addr, blk, curi, a0->base) - && (!addr.index.bits || ascale(&addr, a0->index, mkref(RICON, a0->shift))) - && aadd(&addr, blk, curi, mkintcon(KPTR, a0->disp))) { - *r = mkaddr(addr); - } - return 1; - } - if (r->t != RTMP) return 0; - if (!aadd(&addr, blk, curi, *r)) return 0; - - if (isaddrcon(addr.base,0) && (ccopt.pic || (ccopt.pie && addr.index.bits))) { - /* pic needs to load from GOT */ - /* pie cannot encode RIP-relative address with index register */ - /* first load symbol address into a temp register */ - union ref temp = mkaddr((struct addr){.base = addr.base, .disp = ccopt.pic ? 0 : addr.disp}); - addr.base = insertinstr(blk, (*curi)++, mkinstr(Ocopy, KPTR, .l = temp)); - if (!ccopt.pic) addr.disp = 0; - } - - if (!addr.base.bits) { - /* absolute int address in disp */ - if (addr.index.bits) return 0; - addr.base = mkintcon(KPTR, addr.disp); - addr.disp = 0; - } - - *r = mkaddr(addr); - return 1; -} - -/* is add instruction with this arg a candidate to transform into efective addr? */ -static bool -addarg4addrp(union ref r) -{ - struct instr *ins; - if (r.t == RXCON && !conht[r.i].cls && !conht[r.i].deref) return 1; /* sym or dat ref */ - if (r.t != RTMP) return 0; - if (isstkslot(r)) return 1; - ins = &instrtab[r.i]; - return ins->op == Oshl || (ins->op == Ocopy && ins->l.t == RADDR) || ins->op == Oadd; -} - -static void -loadstoreaddr(struct block *blk, union ref *r, int *curi) -{ - if (isimm32(*r)) { - *r = mkaddr((struct addr){.base = *r}); - } else if (isaddrcon(*r, 0)) { - picfixsym(r, blk, curi); - } else if (r->t == RTMP) { - if (addarg4addrp(*r)) fuseaddr(r, blk, curi); - } else if (r->t != RREG) { - *r = insertinstr(blk, (*curi)++, mkinstr(Ocopy, KPTR, *r)); - } -} - -static bool -arithfold(struct instr *ins) -{ - if (isnumcon(ins->l) && (!ins->r.t || isnumcon(ins->r))) { - union ref r; - bool ok = ins->r.t ? foldbinop(&r, ins->op, ins->cls, ins->l, ins->r) : foldunop(&r, ins->op, ins->cls, ins->l); - assert(ok && "fold?"); - *ins = mkinstr(Ocopy, insrescls(*ins), r); - return 1; - } - return 0; -} - -static void -sel(struct function *fn, struct instr *ins, struct block *blk, int *curi) -{ - uint siz, alignlog2; - int t = ins - instrtab; - struct instr temp = {0}; - enum op op = ins->op; - - if (oisarith(ins->op) && arithfold(ins)) { - fixarg(&ins->l, ins, blk, curi); - return; - } - - switch (op) { - default: assert(0); - case Onop: break; - case Oalloca1: case Oalloca2: case Oalloca4: case Oalloca8: case Oalloca16: - alignlog2 = ins->op - Oalloca1; - assert(ins->l.i > 0); - siz = ins->l.i << alignlog2; - fn->stksiz += siz; - fn->stksiz = alignup(fn->stksiz, 1 << alignlog2); - if (fn->stksiz > (1<<16)-1) error(NULL, "'%s' stack frame too big", fn->name); - stkslots[t] = fn->stksiz; - *ins = mkinstr(Onop,0,); - break; - case Oparam: - assert(ins->l.t == RICON && ins->l.i < fn->nabiarg); - if (!fn->abiarg[ins->l.i].isstk) - *ins = mkinstr(Ocopy, ins->cls, mkref(RREG, fn->abiarg[ins->l.i].reg)); - else /* stack */ - *ins = mkinstr(Oadd, KPTR, mkref(RREG, RBP), mkref(RICON, 16+fn->abiarg[ins->l.i].stk)); - break; - case Oarg: - fixarg(&ins->r, ins, blk, curi); - break; - case Ocall: - selcall(fn, ins, blk, curi); - break; - case Ocall2r: assert(0); - case Ointrin: - break; - case Oshl: case Osar: case Oslr: - if (!iscon(ins->r)) { - /* shift amount register is always CL */ - insertinstr(blk, (*curi)++, mkinstr(Omove, KI32, mkref(RREG, RCX), ins->r)); - ins->r = mkref(RREG, RCX); - } - goto ALU; - case Oequ: case Oneq: - case Olth: case Ogth: case Olte: case Ogte: - case Oulth: case Ougth: case Oulte: case Ougte: - if (iscon(ins->l)) { - /* lth imm, x -> gth x, imm */ - if (!in_range(ins->op, Oequ, Oneq)) - ins->op = ((op - Olth) ^ 1) + Olth; - rswap(ins->l, ins->r); - } - if (ins->l.t != RTMP && ins->l.t != RREG) - ins->l = insertinstr(blk, (*curi)++, mkinstr(Ocopy, ins->cls, ins->l)); - else - fixarg(&ins->l, ins, blk, curi); - fixarg(&ins->r, ins, blk, curi); - break; - case Odiv: case Oudiv: case Orem: case Ourem: - if (kisflt(ins->cls)) goto ALU; - /* TODO fuse div/rem pair */ - - /* (I)DIV dividend is always in RDX:RAX, output also in those regs */ - insertinstr(blk, (*curi)++, mkinstr(Omove, ins->cls, mkref(RREG, RAX), ins->l)); - /* mark RDX as clobbered. sign/zero-extending RAX into RDX is handled in emit() */ - insertinstr(blk, (*curi)++, mkinstr(Omove, ins->cls, mkref(RREG, RDX), mkref(RREG, RDX))); - fixarg(&ins->r, ins, blk, curi); /* make sure rhs is memory or reg */ - ins->l = mkref(RREG, RAX); - ins->keep = 1; - if (op == Orem) ins->op = Odiv; - else if (op == Ourem) ins->op = Oudiv; - insertinstr(blk, (*curi)++, *ins); /* duplicate ins to reuse tmp ref */ - *ins = mkinstr(Ocopy, ins->cls, mkref(RREG, op < Orem ? RAX : RDX)); /* get output */ - temp = mkinstr(Ocopy, ins->cls, mkref(RREG, op < Orem ? RDX : RAX)); /* clobber other reg*/ - insertinstr(blk, ++(*curi), temp); - /* swap instrs so that clobber goes first */ - t = blk->ins.p[*curi - 1]; - blk->ins.p[*curi - 1] = blk->ins.p[*curi - 0]; - blk->ins.p[*curi - 0] = t; - break; - case Osub: - if (isintcon(ins->l)) { - /* sub imm, x -> sub x, imm; neg x */ - fixarg(&ins->l, ins, blk, curi); - ins->inplace = 1; - struct instr sub = *ins; - rswap(sub.l, sub.r); - ins->op = op = Oneg; - ins->l = insertinstr(blk, (*curi)++, sub); - ins->r = NOREF; - goto ALU; - } else if (kisint(ins->cls) && isintcon(ins->r)) { - ins->op = op = Oadd; - ins->r = mkintcon(concls(ins->r), -intconval(ins->r)); - } else { - goto ALU; - } - /* fallthru */ - case Oadd: - if (kisint(ins->cls)) { - if ((addarg4addrp(ins->l) || addarg4addrp(ins->r))) { - temp.op = Ocopy; - temp.cls = ins->cls; - temp.l = mkref(RTMP, t); - if (fuseaddr(&temp.l, blk, curi)) { - *ins = temp; - break; - } - } - } - /* fallthru */ - case Omul: - case Oand: case Oxor: case Oior: - /* commutative ops */ - if (iscon(ins->l)) - rswap(ins->l, ins->r); - goto ALU; - case Oneg: - if (kisflt(ins->cls)) { - /* flip sign bit with XORPS/D */ - static const uvlong sd[2] = {0x8000000000000000,0x8000000000000000}; - static const uint sf[4] = {0x80000000,80000000,0x80000000,80000000}; - ins->op = Oxor; - ins->r = mkdatref(NULL, mktype(ins->cls == KF32 ? TYFLOAT : TYDOUBLE), /*siz*/16, - /*align*/16, ins->cls == KF32 ? (void *)sf : sd, /*siz*/16, /*deref*/1); - } - /* fallthru */ - case Onot: - ALU: - if (!(op == Oadd && kisint(ins->cls))) /* 3-address add is lea */ - if (!(op == Omul && kisint(ins->cls) && isimm32(ins->r))) /* for (I)MUL r,r/m,imm */ - ins->inplace = 1; - if (iscon(ins->l)) { - fixarg(&ins->l, ins, blk, curi); - ins->l = insertinstr(blk, (*curi)++, mkinstr(Ocopy, ins->cls, ins->l)); - } - if (ins->r.bits) - case Omove: - fixarg(&ins->r, ins, blk, curi); - if (op == Oadd && isaddrcon(ins->r,1)) /* no 3-address add if rhs is mem */ - ins->inplace = 1; - break; - case Oloads8: case Oloadu8: case Oloads16: case Oloadu16: - case Oloads32: case Oloadu32: case Oloadi64: case Oloadf32: case Oloadf64: - loadstoreaddr(blk, &ins->l, curi); - break; - case Ostore8: case Ostore16: case Ostore32: case Ostore64: - loadstoreaddr(blk, &ins->l, curi); - if (isaddrcon(ins->r,1) || ins->r.t == RADDR) - ins->r = insertinstr(blk, (*curi)++, mkinstr(Ocopy, KPTR, ins->r)); - else - fixarg(&ins->r, ins, blk, curi); - break; - case Ocvtu32f: - fixarg(&ins->l, ins, blk, curi); - ins->l = insertinstr(blk, (*curi)++, mkinstr(Oextu32, KI64, ins->l)); - ins->op = Ocvts64f; - break; - case Ocvtf32u: case Ocvtf64u: - fixarg(&ins->l, ins, blk, curi); - if (ins->cls == KI32) { - ins->l = insertinstr(blk, (*curi)++, mkinstr(ins->op == Ocvtf32u ? Ocvtf32s : Ocvtf64s, KI64, ins->l)); - ins->op = Oextu32; - } else assert(!"nyi flt -> u64"); - break; - case Ocvtf32f64: case Ocvtf64f32: case Ocvtf32s: case Ocvtf64s: case Ocvts32f: case Ocvts64f: - case Ocvtu64f: - case Oexts8: case Oextu8: case Oexts16: case Oextu16: case Oexts32: case Oextu32: - if (isnumcon(ins->l)) { - union ref it; - bool ok = foldunop(&it, ins->op, ins->cls, ins->l); - assert(ok); - ins->op = Ocopy; - ins->l = it; - break; - } - case Ocopy: - fixarg(&ins->l, ins, blk, curi); - break; - case Oxvaprologue: - fuseaddr(&ins->l, blk, curi); - assert(ins->l.t == RADDR); - /* !this must be the first instruction */ - assert(*curi == 1); - assert(blk == fn->entry); - t = blk->ins.p[0]; - blk->ins.p[0] = blk->ins.p[1]; - blk->ins.p[1] = t; - break; - } -} - -static void -seljmp(struct function *fn, struct block *blk) -{ - if (blk->jmp.t == Jb && blk->jmp.arg[0].bits) { - int curi = blk->ins.n; - fixarg(&blk->jmp.arg[0], NULL, blk, &curi); - union ref c = blk->jmp.arg[0]; - if (c.t != RTMP) { - enum irclass cls = c.t == RICON ? KI32 : c.t == RXCON && conht[c.i].cls ? conht[c.i].cls : KPTR; - int curi = blk->ins.n; - - c = insertinstr(blk, blk->ins.n, mkinstr(Ocopy, cls, c)); - sel(fn, &instrtab[c.i], blk, &curi); - } - if (iflagsrc == c.i /* test cmp */ - && (oiscmp(instrtab[c.i].op) || instrtab[c.i].op == Oand || instrtab[c.i].op == Osub)) { - instrtab[c.i].keep = 1; - } else { - if (!(opflags[instrtab[c.i].op] & ZF) || blk->ins.n == 0 || c.i != blk->ins.p[blk->ins.n - 1]) { - struct instr *ins; - int curi = blk->ins.n; - blk->jmp.arg[0] = insertinstr(blk, blk->ins.n, mkinstr(Oneq, instrtab[c.i].cls, c, ZEROREF)); - ins = &instrtab[blk->jmp.arg[0].i]; - if (kisflt(ins->cls)) { - ins->r = insertinstr(blk, curi, mkinstr(Ocopy, ins->cls, ZEROREF)); - } - ins->keep = 1; - } else if (instrtab[c.i].op == Oadd) { - /* prevent a 3-address add whose flag results are used from becoming a LEA */ - instrtab[c.i].inplace = 1; - } - } - } else if (blk->jmp.t == Jret) { - if (blk->jmp.arg[0].bits) { - int curi; - union ref r = mkref(RREG, fn->abiret[0].reg); - struct instr *ins = &instrtab[insertinstr(blk, blk->ins.n, mkinstr(Omove, fn->abiret[0].ty.cls, r , blk->jmp.arg[0])).i]; - curi = blk->ins.n; - fixarg(&ins->r, ins, blk, &curi); - blk->jmp.arg[0] = r; - if (blk->jmp.arg[1].bits) { - r = mkref(RREG, fn->abiret[1].reg); - ins = &instrtab[insertinstr(blk, blk->ins.n, mkinstr(Omove, fn->abiret[1].ty.cls, r, blk->jmp.arg[1])).i]; - curi = blk->ins.n; - fixarg(&ins->r, ins, blk, &curi); - blk->jmp.arg[1] = r; - } - } - } -} - -void -amd64_isel(struct function *fn) -{ - extern int ninstr; - struct block *blk = fn->entry; - - fn->stksiz = 0; - stkslots = xcalloc((nstkslots = ninstr) * sizeof *stkslots); - do { - int i; - for (i = 0; i < blk->phi.n; ++i) { - struct instr *ins = &instrtab[blk->phi.p[i]]; - union ref *phi = phitab.p[ins->l.i]; - for (int i = 0; i < blk->npred; ++i) { - int curi = blkpred(blk, i)->ins.n; - fixarg(&phi[i], ins, blkpred(blk, i), &curi); - } - } - iflagsrc = -1; - for (i = 0; i < blk->ins.n; ++i) { - struct instr *ins = &instrtab[blk->ins.p[i]]; - sel(fn, ins, blk, &i); - if (ins->op < countof(opflags) && kisint(insrescls(*ins))) { - if (opflags[ins->op] & ZF) iflagsrc = ins - instrtab; - else if (opflags[ins->op] & CLOBF) iflagsrc = -1; - } - } - seljmp(fn, blk); - } while ((blk = blk->lnext) != fn->entry); - free(stkslots); - - if (ccopt.dbg.i) { - bfmt(ccopt.dbgout, "<< After isel >>\n"); - irdump(fn); - } - - fn->prop = 0; -} - -/* vim:set ts=3 sw=3 expandtab: */ diff --git a/amd64/sysv.c b/amd64/sysv.c deleted file mode 100644 index 486c0c0..0000000 --- a/amd64/sysv.c +++ /dev/null @@ -1,313 +0,0 @@ -#include "all.h" - -static int classify(uchar cls[2], const struct typedata *td, uint off); - -static void -clsscalar(uchar cls[2], uint off, union type ty) -{ - enum irclass k = type2cls[scalartypet(ty)]; - uchar *fcls = &cls[off/8]; - if (isflt(ty)) { /* SSE */ - if (!*fcls || (*fcls == KF32 && k > *fcls)) - *fcls = k; - } else { /* INTEGER */ - assert(isint(ty) || ty.t == TYPTR); - if (cls2siz[*fcls] < cls2siz[k]) - *fcls = k == KPTR ? KI64 : k; - } - if (off % 8 >= 4 && cls2siz[*fcls] < 8) - *fcls = kisint(*fcls) ? KI64 : KF64; -} - -static int -classifyarr(uchar cls[2], union type ty, uint off) -{ - union type chld = typechild(ty); - uint n = typearrlen(ty), siz = typesize(chld); - assert(n > 0); - for (uint i = 0; i < n; ++i) { - uint offx = off + i * siz; - if (isagg(chld)) { - if (!classify(cls, &typedata[chld.dat], offx)) - return cls[0] = cls[1] = 0; - } else if (chld.t == TYARRAY) { - if (!classifyarr(cls, chld, offx)) - return cls[0] = cls[1] = 0; - } else { - clsscalar(cls, offx, chld); - } - } - return !!cls[0] + !!cls[1]; -} - -static int -classify(uchar cls[2], const struct typedata *td, uint off) -{ - uint siz = alignup(td->siz, 4); - if (siz > 16) /* MEMORY */ - return 0; - for (int i = 0; i < td->nmemb; ++i) { - struct fielddata *fld = &td->fld[i].f; - uint align = typealign(fld->t); - if (alignup(fld->off, align) != fld->off) /* unaligned field -> MEMORY */ - return cls[0] = cls[1] = 0; - if (isagg(fld->t)) { - if (!classify(cls, &typedata[fld->t.dat], off + fld->off)) - return cls[0] = cls[1] = 0; - } else if (fld->t.t == TYARRAY) { - if (isincomplete(fld->t)) continue; - if (!classifyarr(cls, fld->t, off + fld->off)) - return cls[0] = cls[1] = 0; - } else { - clsscalar(cls, fld->off + off, fld->t); - } - } - return !!cls[0] + !!cls[1]; -} - -static int -abiarg(short r[2], uchar cls[2], uchar *r2off, int *ni, int *nf, int *ns, union irtype typ) -{ - static const uchar intregs[] = { RDI, RSI, RDX, RCX, R8, R9 }; - enum { NINT = countof(intregs), NFLT = 8 }; - int ret, ni_save, nf_save; - - if (!typ.isagg) { - if (kisflt(cls[0] = typ.cls) && *nf < NFLT) { - r[0] = XMM0 + (*nf)++; - } else if (kisint(cls[0]) && *ni < NINT) { - r[0] = intregs[(*ni)++]; - } else { - r[0] = *ns; - *ns += 8; - return 0; /* MEMORY */ - } - return 1; - } - cls[0] = cls[1] = 0; - ret = classify(cls, &typedata[typ.dat], 0); - if (!ret) { /*MEMORY*/ - r[0] = *ns; - *ns = alignup(*ns + typedata[typ.dat].siz, 8); - return 0; - } - assert(ret <= 2); - ni_save = *ni, nf_save = *nf; - *r2off = 8; - for (int i = 0; i < ret; ++i) { - assert(cls[i]); - if (kisflt(cls[i]) && *nf < NFLT) - r[i] = XMM0 + (*nf)++; - else if (kisint(cls[i]) && *ni < NINT) - r[i] = intregs[(*ni)++]; - else { /* MEMORY */ - *ni = ni_save, *nf = nf_save; - r[0] = *ns; - *ns = alignup(*ns + typedata[typ.dat].siz, 8); - r[1] = -1; - return cls[0] = cls[1] = 0; - } - } - return ret; -} - -static int -abiret(short r[2], uchar cls[2], uchar *r2off, int *ni, union irtype typ) -{ - int ret; - - if (!typ.isagg) { - r[0] = kisflt(cls[0] = typ.cls) ? XMM0 : RAX; - return 1; - } - - cls[0] = cls[1] = 0; - ret = classify(cls, &typedata[typ.dat], 0); - if (!ret) { /* MEMORY */ - assert(*ni == 0); - r[0] = RAX; /* on return should contain result location address */ - r[1] = RDI; /* register for caller-owned result location argument */ - ++*ni; - return 0; - } - assert(ret <= 2); - *r2off = 8; - for (int i = 0, ni = 0, nf = 0; i < ret; ++i) { - assert(cls[i]); - if (kisflt(cls[i])) /* SSE (XMM0, XMM1) */ - r[i] = XMM0 + nf++; - else if (kisint(cls[i])) /* INTEGER (RAX, RDX) */ - r[i] = ni++ == 0 ? RAX : RDX; - else assert(0); - } - return ret; -} - -/* Layout of va_list: - * struct { - * ( 0) unsigned int gp_offset; - * ( 4) unsigned int fp_offset; - * ( 8) void *overflow_arg_area; - * (16) void *reg_save_area; - * } - * Layout of register save area (align 16): - * reg off - * rdi 0 - * rsi 8 - * rdx 16 - * rcx 24 - * r8 32 - * r9 40 - * xmm0 48 - * xmm1 64 - * ... - * in amd64/emit xvaprologue generates the code to save the registers to a stack slot - * there only needs to be one xvaprologue if there's any vastart instrs, and it has to be - * at the beginning of the function (before IR generated by regalloc can touch any registers) - * then vastart can initialize va_list.reg_save_area with a pointer to that - */ - -static void -vastart(struct function *fn, struct block *blk, int *curi) -{ - union ref rsave; /* register save area */ - int gpr0 = 0, fpr0 = 0, stk0 = 0; - struct instr *ins = &instrtab[blk->ins.p[*curi]]; - union ref ap = ins->l, src, dst; - assert(ins->op == Ovastart); - /* add xvaprologue if not there yet, which must be the first - * real instruction in the function (following alloca) */ - if (fn->entry->ins.n > 1 && instrtab[fn->entry->ins.p[1]].op == Oxvaprologue) { - rsave = mkref(RTMP, fn->entry->ins.p[0]); /* alloca instruction */ - assert(instrtab[rsave.i].op == Oalloca16); - } else { - rsave = insertinstr(fn->entry, 0, mkalloca(192, 16)); - insertinstr(fn->entry, 1, mkinstr(Oxvaprologue, 0, rsave, .keep=1)); - } - /* find first unnamed gpr and fpr */ - for (int i = 0; i < fn->nabiarg; ++i) { - struct abiarg abi = fn->abiarg[i]; - if (!abi.isstk){ - if (abi.reg < XMM0) ++gpr0; - else ++fpr0; - } else { - stk0 = abi.stk+8; - } - } - /* set ap->reg_save_area */ - *ins = mkinstr(Oadd, KPTR, ap, mkref(RICON, 16)); - dst = mkref(RTMP, ins - instrtab); - int i = *curi + 1; - insertinstr(blk, i++, mkinstr(Ostore64, 0, dst, rsave)); - /* set ap->overflow_arg_area */ - src = insertinstr(blk, i++, mkinstr(Oadd, KPTR, mkref(RREG, RBP), mkref(RICON, 16+stk0))); - dst = insertinstr(blk, i++, mkinstr(Oadd, KPTR, ap, mkref(RICON, 8))); - insertinstr(blk, i++, mkinstr(Ostore64, 0, dst, src)); - /* set ap->gp_offset */ - insertinstr(blk, i++, mkinstr(Ostore32, 0, ap, mkref(RICON, gpr0*8))); - /* set ap->fp_offset */ - dst = insertinstr(blk, i++, mkinstr(Oadd, KPTR, ap, mkref(RICON, 4))); - insertinstr(blk, i++, mkinstr(Ostore32, 0, dst, mkref(RICON, 6*8 + fpr0*16))); - *curi = i-1; -} - -static void -vaarg(struct function *fn, struct block *blk, int *curi) -{ - short r[2]; - uchar cls[2]; - union ref tmp; - int ni = 0, nf = 0, ns = 0; - uchar r2off; - int var = blk->ins.p[*curi]; - union ref ap = instrtab[var].l; - union irtype ty = ref2type(instrtab[var].r); - - assert(instrtab[var].op == Ovaarg); - blk->ins.p[*curi] = newinstr(blk, (struct instr){Onop}); - - int ret = abiarg(r, cls, &r2off, &ni, &nf, &ns, ty); - - if (ret == 2) assert(!"nyi"); - else if (ret == 1) { - struct block *merge; - union ref phi, phiargs[2]; - /* int: l->gp_offset < 48 - num_gp * 8 */ - /* sse: l->fp_offset < 304 - num_gp * 16 (why 304? ... 176) */ - tmp = ni ? ap : insertinstr(blk, (*curi)++, mkinstr(Oadd, KPTR, ap, mkref(RICON, 4))); - tmp = insertinstr(blk, (*curi)++, mkinstr(Oloadu32, KI32, tmp)); - tmp = insertinstr(blk, (*curi)++, mkinstr(Oulte, KI32, tmp, mkref(RICON, ni ? 48 - ni*8 : 176 - nf*16))); - merge = blksplitafter(fn, blk, *curi); - blk->jmp.t = 0; - useblk(fn, blk); - putcondbranch(fn, tmp, newblk(fn), newblk(fn)); - useblk(fn, blk->s1); - { - /* phi0: &l->reg_save_area[l->gp/fp_offset] */ - union ref sav = addinstr(fn, mkinstr(Oloadi64, KPTR, irbinop(fn, Oadd, KPTR, ap, mkref(RICON, 16)))); - union ref roff = addinstr(fn, mkinstr(Oloadu32, KI32, irbinop(fn, Oadd, KPTR, ap, mkref(RICON, ni ? 0 : 4)))); - phiargs[0] = irbinop(fn, Oadd, KPTR, sav, roff); - /* l->gp/fp_offset += num_gp/fp * 8(16) */ - roff = irbinop(fn, Oadd, KI32, roff, mkref(RICON, ni ? ni * 8 : nf * 16)); - addinstr(fn, mkinstr(Ostore32, 0, irbinop(fn, Oadd, KPTR, ap, mkref(RICON, ni ? 0 : 4)), roff)); - assert(merge->npred == 1); - blkpred(merge, 0) = blk->s1; - blk->s1->jmp.t = Jb; - blk->s1->s1 = merge; - } - useblk(fn, blk->s2); - { - /* phi1: l->overflow_arg_area */ - union ref adr = irbinop(fn, Oadd, KPTR, ap, mkref(RICON, 8)); - union ref ovf = addinstr(fn, mkinstr(Oloadi64, KPTR, adr)); - /* align no-op */ - - phiargs[1] = ovf; - /* update l->overflow_arg_area += size */ - int siz = 8; - addinstr(fn, mkinstr(Ostore64, 0, adr, irbinop(fn, Oadd, KPTR, ovf, mkref(RICON, siz)))); - putbranch(fn, merge); - } - assert(merge->npred == 2); - vpush(&merge->ins, 0); - memmove(merge->ins.p+1, merge->ins.p, (merge->ins.n-1)*sizeof *merge->ins.p); - merge->ins.p[0] = var; - phi = insertphi(merge, KPTR); - memcpy(phitab.p[instrtab[phi.i].l.i], phiargs, sizeof phiargs); - if (!ty.isagg) { - instrtab[var] = mkinstr(cls[0] == KI32 ? Oloads32 : Oloadi64, cls[0], phi); - } else { - instrtab[var] = mkalloca(8, 8); - tmp = insertinstr(merge, 1, mkinstr(Oloadi64, KI64, phi)); - insertinstr(merge, 2, mkinstr(Ostore64, 0, mkref(RTMP, var), tmp)); - } - fn->prop &= ~FNUSE; - } else { - assert(!"nyi"); - } -} - -static const char amd64_rnames[][6] = { -#define R(r) #r, - LIST_REGS(R) -#undef R -}; - -const struct mctarg t_amd64_sysv = { - .gpr0 = RAX, .ngpr = R15 - RAX + 1, - .bpr = RBP, - .gprscratch = R11, .fprscratch = XMM15, - .fpr0 = XMM0, .nfpr = XMM15 - XMM0 + 1, - .rcallee = 1<