aboutsummaryrefslogtreecommitdiffhomepage
path: root/amd64
diff options
context:
space:
mode:
Diffstat (limited to 'amd64')
-rw-r--r--amd64/all.h18
-rw-r--r--amd64/emit.c1388
-rw-r--r--amd64/isel.c660
-rw-r--r--amd64/sysv.c313
4 files changed, 0 insertions, 2379 deletions
diff --git a/amd64/all.h b/amd64/all.h
deleted file mode 100644
index 992d47e..0000000
--- a/amd64/all.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#include "../ir/ir.h"
-
-#define LIST_REGS(_) \
- _(RAX) _(RCX) _(RDX) _(RBX) _(RSP) _(RBP) _(RSI) _(RDI) \
- _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(R15) \
- _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) \
- _(XMM8) _(XMM9) _(XMM10) _(XMM11) _(XMM12) _(XMM13) _(XMM14) _(XMM15)
-
-enum reg {
-#define R(r) r,
- LIST_REGS(R)
-#undef R
-};
-
-void amd64_isel(struct function *);
-void amd64_emit(struct function *);
-
-/* vim:set ts=3 sw=3 expandtab: */
diff --git a/amd64/emit.c b/amd64/emit.c
deleted file mode 100644
index 6121f5e..0000000
--- a/amd64/emit.c
+++ /dev/null
@@ -1,1388 +0,0 @@
-#include "all.h"
-#include "../obj/obj.h"
-#include "../endian.h"
-
-/** Instruction operands **
- *
- * Can be a register, a 32-bit immediate,
- * a memory reference [base + index * scale + disp],
- * or a relocatable reference to some symbol plus a displacement and maybe index*scale
- */
-enum operkind { ONONE, OREG, OIMM, OMEM, OSYM };
-enum { NOBASE = 63, NOINDEX = 63 };
-struct oper {
- uchar t;
- union {
- struct { uchar base; }; /* OMEM */
- struct { uchar cindex : 6, cshift : 2; }; /* OSYM */
- };
- union {
- struct { uchar index, shift; }; /* OMEM */
- ushort con; /* OSYM */
- };
- union {
- uchar reg; /* OREG */
- int disp; /* OMEM, OSYM */
- int imm; /* OIMM */
- };
-};
-#define mkoper(t, ...) ((struct oper){(t), __VA_ARGS__})
-#define reg2oper(R) (assert((uint)(R) <= XMM15), mkoper(OREG, .reg = (R)))
-
-static struct oper mkmemoper(union ref);
-
-static struct oper
-ioper(int i)
-{
- int reg = instrtab[i].reg - 1;
- return reg < 0 ? mkoper(ONONE,) : reg2oper(reg);
-}
-
-static struct oper
-ref2oper(union ref r)
-{
- switch (r.t) {
- case RTMP: return ioper(r.i);
- case RREG: return reg2oper(r.i);
- case RICON: return mkoper(OIMM, .imm = r.i);
- case RXCON:
- if (conht[r.i].cls == KI32)
- return mkoper(OIMM, .imm = conht[r.i].i);
- else if (conht[r.i].cls == KI64) {
- vlong i = conht[r.i].i;
- assert(i == (int)i);
- return mkoper(OIMM, .imm = i);
- } else if (!conht[r.i].cls) {
- return mkoper(OSYM, .con = r.i, .cindex = NOINDEX);
- }
- assert(0);
- case RADDR: return mkmemoper(r);
- default: assert(0);
- }
-}
-
-static void
-addmemoper(struct oper *mem, struct oper add)
-{
- assert(mem->t == OMEM);
- if (add.t == OIMM) {
- mem->disp += add.imm;
- } else if (add.t == OREG) {
- if (mem->base == NOBASE)
- mem->base = add.reg;
- else if (mem->index == NOINDEX)
- mem->index = add.reg;
- else
- assert(0);
- }
-}
-
-/* helpers to convert a reference to an operand of a specific kind,
- * with assertions to make sure nothing went wrong */
-
-static inline struct oper
-mkregoper(union ref r)
-{
- assert(r.t == RREG || (r.t == RTMP && ioper(r.i).t == OREG));
- return r.t == RREG ? reg2oper(r.i) : ioper(r.i);
-}
-
-static inline struct oper
-mkimmoper(union ref r)
-{
- assert(iscon(r) && concls(r) == KI32);
- return mkoper(OIMM, .imm = intconval(r));
-}
-
-#define ismemref(ref) ((ref).t == RTMP && ioper((ref).i).t == OMEM)
-#define isregref(ref) ((ref).t == RREG || ((ref).t == RTMP && ioper((ref).i).t == OREG))
-
-static inline struct oper
-mkimmregoper(union ref r)
-{
- assert(isregref(r) || (iscon(r) && concls(r) == KI32));
- return ref2oper(r);
-}
-
-static inline struct oper
-mkdatregoper(union ref r)
-{
- assert(isregref(r) || (r.t == RXCON && conht[r.i].deref));
- return ref2oper(r);
-}
-
-static inline struct oper
-mkimmdatregoper(union ref r)
-{
- assert(isregref(r) || r.t == RICON || (r.t == RXCON && (conht[r.i].cls == KI32 || conht[r.i].deref)));
- return ref2oper(r);
-}
-
-static int rbpoff;
-
-static struct oper
-mkmemoper(union ref r)
-{
- if (r.t == RTMP) {
- struct oper wop = ioper(r.i);
- if (wop.t == OMEM) return wop;
- assert(wop.t == OREG);
- return mkoper(OMEM, .base = wop.reg, .index = NOINDEX);
- } else if (r.t == RADDR) {
- const struct addr *addr = &addrht[r.i];
- struct oper mem;
-
- assert(addr->shift <= 3);
- if (addr->base.t == RTMP && ioper(addr->base.i).t == OMEM) {
- mem = ioper(addr->base.i);
- if (addr->index.bits) addmemoper(&mem, mkregoper(addr->index));
- assert(!mem.shift);
- mem.shift = addr->shift;
- addmemoper(&mem, mkoper(OIMM, .imm = addr->disp));
- return mem;
- }
- if (isaddrcon(addr->base,0)) {
- return mkoper(OSYM, .con = addr->base.i,
- .cindex = addr->index.bits ? mkregoper(addr->index).reg : NOINDEX,
- .cshift = addr->shift,
- .disp = addr->disp);
- } else if (isintcon(addr->base)) {
- assert(!addr->disp);
- return mkoper(OMEM, .base = NOBASE,
- .index = addr->index.bits ? mkregoper(addr->index).reg : NOINDEX,
- .disp = intconval(addr->base),
- .shift = addr->shift);
- } else if (isaddrcon(addr->index,0)) {
- assert(!addr->shift);
- return mkoper(OSYM, .con = addr->index.i,
- .cindex = addr->base.bits ? mkregoper(addr->base).reg : NOINDEX,
- .disp = addr->disp);
- }
- return mkoper(OMEM, .base = addr->base.bits ? mkregoper(addr->base).reg : NOBASE,
- .index = addr->index.bits ? mkregoper(addr->index).reg : NOINDEX,
- .disp = addr->disp,
- .shift = addr->shift);
- } else if (r.t == RXCON) {
- assert(!conht[r.i].cls);
- return mkoper(OSYM, .con = r.i, .cindex = NOINDEX);
- } else {
- return mkoper(OMEM, .base = isregref(r) ? ref2oper(r).reg : NOBASE,
- .index = NOINDEX,
- .disp = isregref(r) ? 0 : mkimmoper(r).imm);
- }
-}
-
-/** Instruction description tables **
- *
- * Each instruction is a list of descs, and the first one that matches
- * is emitted. Each entry has a size pattern field, which is a bitset
- * of the sizes (in bytes) that the entry matches, and 2 operand patterns,
- * which describe the operands that can match (for example, PRAX matches
- * a RAX register operand, PGPR matches any integer register, I8 matches
- * an immediate operand between [-128,127]) The rest of the fields describe
- * the instruction's encoding.
- * (reference: https://www.felixcloutier.com/x86/ & https://wiki.osdev.org/X86-64_Instruction_Encoding )
- */
-
-enum operpat {
- PNONE,
- PRAX,
- PRCX,
- PGPR,
- PFPR,
- P1, /* imm = 1 */
- PN1, /* imm = -1 */
- PI8,
- PU8,
- PI16,
- PU16,
- PI32,
- PU32,
- PMEM,
- PSYM,
-};
-enum operenc {
- EN_R = 1, /* reg with /r */
- EN_RR, /* reg, reg with /r */
- EN_RRX, /* reg, reg with /r (inverted) */
- EN_MR, /* mem, reg with /r */
- EN_RM, /* reg, mem with /r */
- EN_M, /* mem */
- EN_RI8, /* reg, imm8 with /0 */
- EN_RI32, /* reg, imm32 with /0 */
- EN_MI8, /* mem, imm8 with /x */
- EN_MI16, /* mem, imm16 with /x */
- EN_MI32, /* mem, imm32 with /x */
- EN_OI, /* reg, imm32 with op + reg */
- EN_I8, /* imm8 */
- EN_I32, /* imm32 */
- EN_R32, /* rel32 */
- NOPERENC,
-};
-struct desc {
- uchar psiz; /* subset of {1,2,4,8} */
- uchar ptd, pts; /* bitsets of enum operpat */
- uchar nopc; /* countof opc */
- const char opc[8]; /* opcode bytes */
- uchar operenc; /* enum operenc */
- uchar ext; /* ModR/M.reg opc extension */
- bool r8; /* uses 8bit register */
- bool norexw; /* do not use REX.W even if size is 64 bits */
-};
-
-/* match operand against pattern */
-static inline bool
-opermatch(enum operpat pat, struct oper oper)
-{
- switch (pat) {
- case PNONE: return !oper.t;
- case PRAX: return oper.t == OREG && oper.reg == RAX;
- case PRCX: return oper.t == OREG && oper.reg == RCX;
- case PGPR: return oper.t == OREG && oper.reg <= R15;
- case PFPR: return oper.t == OREG && oper.reg >= XMM0;
- case P1: return oper.t == OIMM && oper.imm == 1;
- case PN1: return oper.t == OIMM && oper.imm == -1;
- case PI8: return oper.t == OIMM && (schar)oper.imm == oper.imm;
- case PU8: return oper.t == OIMM && (uchar)oper.imm == oper.imm;
- case PI16: return oper.t == OIMM && (short)oper.imm == oper.imm;
- case PU16: return oper.t == OIMM && (ushort)oper.imm == oper.imm;
- case PI32: return oper.t == OIMM;
- case PU32: return oper.t == OIMM && oper.imm >= 0;
- case PMEM: return in_range(oper.t, OMEM, OSYM);
- case PSYM: return oper.t == OSYM;
- }
- assert(0);
-}
-
-/* code output helpers */
-#define B(b) (*(*pcode)++ = (b))
-#define D(xs, N) (memcpy(*pcode, (xs), (N)), (*pcode) += (N))
-#define I16(w) (wr16le(*pcode, (w)), *pcode += 2)
-#define I32(w) (wr32le(*pcode, (w)), *pcode += 4)
-#define DS(S) D(S, sizeof S - 1)
-
-static bool usebp; /* use RBP? */
-static const char *curfnsym;
-static uchar *fnstart;
-
-/* Given an instruction description table, find the first entry that matches
- * the operands (where dst, src are the operands in intel syntax order) and encode it */
-static void
-encode(uchar **pcode, const struct desc *tab, int ntab, enum irclass k, struct oper dst, struct oper src)
-{
- const uchar *opc;
- int nopc;
- struct oper mem;
- enum reg reg;
- const struct desc *en = NULL;
- for (int i = 0; i < ntab; ++i) {
- if ((tab[i].psiz & cls2siz[k]) && opermatch(tab[i].ptd, dst) && opermatch(tab[i].pts, src)) {
- en = &tab[i];
- break;
- }
- }
- assert(en && "no match for instr");
-
- if (en->ptd == PFPR) dst.reg &= 15;
- if (en->pts == PFPR) src.reg &= 15;
- opc = (uchar *)en->opc;
- nopc = en->nopc;
- /* mandatory prefixes go before REX */
- if (*opc == 0x66 || *opc == 0xF2 || *opc == 0xF3)
- B(*opc++), --nopc;
- int rex = in_range(k, KI64, KPTR) << 3; /* REX.W */
- if (en->norexw) rex = 0;
- switch (en->operenc) {
- case EN_RR: /* mod = 11; reg = dst; rm = src */
- rex |= (dst.reg >> 3) << 2; /* REX.R */
- rex |= (src.reg >> 3) << 0; /* REX.B */
- if (rex) B(0x40 | rex);
- else if (en->r8 && in_range(src.reg, RSP, RDI)) {
- /* /r8 needs REX to encode SP,BP,SI,DI (otherwise -> AH..BH) */
- B(0x40);
- }
- D(opc, nopc);
- B(0300 | (dst.reg & 7) << 3 | (src.reg & 7));
- break;
- case EN_RRX: /* mod = 11; reg = src; rm = dst */
- rex |= (src.reg >> 3) << 2; /* REX.R */
- rex |= (dst.reg >> 3) << 0; /* REX.B */
- if (rex) B(0x40 | rex);
- else if (en->r8 && in_range(dst.reg, RSP, RDI)) {
- /* /r8 needs REX to encode SP,BP,SI,DI (otherwise -> AH..BH) */
- B(0x40);
- }
- D(opc, nopc);
- B(0300 | (src.reg & 7) << 3 | (dst.reg & 7));
- break;
- case EN_MR:
- mem = dst;
- reg = src.reg;
- goto Mem;
- case EN_RM:
- mem = src;
- reg = dst.reg;
- goto Mem;
- case EN_M: case EN_MI8: case EN_MI16: case EN_MI32:
- mem = dst;
- reg = en->ext;
- Mem:
- if (mem.t == OMEM) {
- if (mem.base != NOBASE) rex |= mem.base >> 3; /* REX.B */
- if (mem.index != NOINDEX) rex |= mem.index >> 3 << 1; /* REX.X */
- } else {
- if (mem.cindex != NOINDEX) rex |= mem.cindex >> 3 << 1; /* REX.X */
- }
- if (en->operenc != EN_M)
- rex |= (reg >> 3) << 2; /* REX.R */
- if (rex) B(0x40 | rex);
- else if (en->r8 && in_range(reg, RSP, RDI)) B(0x40);
-
- if (mem.t == OSYM) {
- D(opc, nopc);
- if (mem.cindex == NOINDEX) {
- /* %rip(var) */
- static uchar offs[NOPERENC] = { [EN_MI8] = 1, [EN_MI16] = 2, [EN_MI32] = 4 };
- enum relockind r =
- (!conht[mem.con].deref && ccopt.pic) ? (rex ? REL_GOTPCRELX : REL_GOTPCRELX_REX)
- : REL_PCREL32;
- int off = -4 - offs[en->operenc];
- B(/*mod 0*/ (reg & 7) << 3 | RBP);
- objreloc(xcon2sym(mem.con), r, Stext, *pcode - objout.textbegin, mem.disp + off);
- } else {
- /* var(,%reg,shift) */
- assert(!ccopt.pic && !ccopt.pie && "cannot encode [RIP-rel + REG] for position independent");
- B(/*mod 0*/ (reg & 7) << 3 | RSP);
- B(mem.cshift << 6 | mem.cindex << 3 | RBP); /* SIB [index*s + disp32] */
- objreloc(xcon2sym(mem.con), REL_ABS32S, Stext, *pcode - objout.textbegin, mem.disp);
- }
- I32(0);
- } else {
- int mod;
- bool sib = 0;
- if (mem.base == RBP) {
- if (!usebp) {
- /* if RBP isn't being set up (leaf functions with no stack allocations),
- * access thru RSP (function arguments in the stack) */
- mem.base = RSP;
- mem.disp -= 8;
- } else if (mem.disp <= 0) {
- mem.disp += rbpoff;
- }
- }
- if (mem.base != NOBASE) {
- if (mem.index == NOINDEX && mem.shift == 0) sib = 0;
- else sib = 1;
- mod = !mem.disp ? 0 /* disp = 0 -> mod = 00 */
- : (uint)(mem.disp + 128) < 256 ? 1 /* disp8 -> mod = 01 */
- : 2; /* disp32 -> mod = 10 */
- if (mod == 0 && (mem.base == RBP || mem.base == R13)) mod = 1;
- if (mem.base == RSP || mem.base == R12) sib = 1;
- } else {
- /* [disp + (index*s)] */
- sib = 1;
- mem.base = RBP;
- mod = 0;
- assert(mem.index != RSP);
- }
- D(opc, nopc);
- B(mod << 6 | (reg & 7) << 3 | (sib ? 4 : (mem.base & 7)));
- if (sib) {
- if (mem.index == NOINDEX) mem.index = RSP;
- B(mem.shift << 6 | (mem.index & 7) << 3 | (mem.base & 7));
- }
- if (mod == 1) B(mem.disp);
- else if (mod == 2 || (mod == 0 && mem.base == RBP/*RIP-rel*/) || (mod == 0 && sib && mem.base == RBP/*absolute*/)) {
- I32(mem.disp);
- }
- }
- if (en->operenc == EN_MI8) B(src.imm);
- if (en->operenc == EN_MI16) I16(src.imm);
- if (en->operenc == EN_MI32) I32(src.imm);
- break;
- case EN_R: case EN_RI32: case EN_RI8:
- rex |= (dst.reg >> 3) << 0; /* REX.B */
- if (rex) B(0x40 | rex);
- else if (en->r8 && in_range(dst.reg, RSP, RDI)) {
- /* /r8 needs REX to encode SP,BP,SI,DI (otherwise -> AH..BH) */
- B(0x40);
- }
- D(opc, nopc);
- B(0300 | en->ext << 3 | (dst.reg & 7));
- if (en->operenc == EN_RI32)
- I32(src.imm);
- else if (en->operenc == EN_RI8)
- B(src.imm);
- break;
- case EN_OI:
- rex |= (dst.reg >> 3) << 0; /* REX.B */
- if (rex) B(0x40 | rex);
- B(*opc++ + (dst.reg & 7));
- D(opc, nopc - 1);
- I32(src.imm);
- break;
- case EN_I8:
- if (rex) B(0x40 | rex);
- D(opc, nopc);
- B(src.imm);
- break;
- case EN_I32:
- if (rex) B(0x40 | rex);
- D(opc, nopc);
- I32(src.imm);
- break;
- case EN_R32:
- if (rex) B(0x40 | rex);
- D(opc, nopc);
- assert(dst.t == OSYM);
- const char *sym = xcon2sym(dst.con);
- if (sym != curfnsym) {
- enum relockind r = (ccopt.pie|ccopt.pic) ? REL_PLT32 : REL_PCREL32;
- objreloc(sym, r, Stext, *pcode - objout.textbegin, -4);
- I32(0);
- } else {
- /* self-recursive call */
- I32(fnstart - *pcode - 4);
- }
- break;
- }
-}
-
-#define DEFINSTR1(X, ...) \
- static void \
- X(uchar **pcode, enum irclass k, struct oper oper) \
- { \
- static const struct desc tab[] = { __VA_ARGS__ }; \
- encode(pcode, tab, countof(tab), k, oper, mkoper(0,)); \
- }
-
-#define DEFINSTR2(X, ...) \
- static void \
- X(uchar **pcode, enum irclass k, struct oper dst, struct oper src) \
- { \
- static const struct desc tab[] = { __VA_ARGS__ }; \
- encode(pcode, tab, countof(tab), k, dst, src); \
- }
-
-#define O(s) (sizeof s)-1,s
-DEFINSTR2(Xmovb,
- {-1, PMEM, PGPR, O("\x88"), EN_MR, .r8=1}, /* MOV m8, r8 */
- {-1, PMEM, PI8, O("\xC6"), EN_MI8, .r8=1}, /* MOV m8, imm8 */
- {-1, PMEM, PU8, O("\xC6"), EN_MI8, .r8=1}, /* MOV m8, imm8 */
-)
-DEFINSTR2(Xmovw,
- {-1, PMEM, PGPR, O("\x66\x89"), EN_MR}, /* MOV m16, r16 */
- {-1, PMEM, PI16, O("\x66\xC7"), EN_MI16}, /* MOV m16, imm16 */
- {-1, PMEM, PU16, O("\x66\xC7"), EN_MI16}, /* MOV m16, imm16 */
-)
-static void Xmov(uchar **pcode, enum irclass k, struct oper dst, struct oper src)
-{
- static const struct desc all[] = {
- {4 , PGPR, PI32, O("\xB8"), EN_OI}, /* MOV r32, imm */
- {4|8, PGPR, PGPR, O("\x8B"), EN_RR}, /* MOV r32/64, r32/64 */
- {4|8, PMEM, PGPR, O("\x89"), EN_MR}, /* MOV m32/64, r32/64 */
- {4|8, PGPR, PMEM, O("\x8B"), EN_RM}, /* MOV r32/64, m32/64 */
- {4|8, PMEM, PI32, O("\xC7"), EN_MI32}, /* MOV m32/64, imm */
- { 8, PGPR, PU32, O("\xB8"), EN_OI, .norexw=1}, /* MOV r64, uimm */
- { 8, PGPR, PI32, O("\xC7"), EN_RI32}, /* MOV r64, imm */
- {4 , PFPR, PFPR, O("\x0F\x28"), EN_RR}, /* MOVPS xmm, xmm */
- {4 , PFPR, PMEM, O("\xF3\x0F\x10"), EN_RM}, /* MOVSS xmm, m32 */
- {4 , PMEM, PFPR, O("\xF3\x0F\x11"), EN_MR}, /* MOVSS m32, xmm */
- {8 , PFPR, PFPR, O("\x0F\x28"), EN_RR}, /* MOVPS xmm, xmm */
- {8 , PFPR, PMEM, O("\xF2\x0F\x10"), EN_RM}, /* MOVSD xmm, m64 */
- {8 , PMEM, PFPR, O("\xF2\x0F\x11"), EN_MR}, /* MOVSS m64, xmm */
- {4|8, PFPR, PGPR, O("\x66\x0F\x6E"), EN_RR}, /* MOVD/Q xmm, r64/32 */
- {4|8, PGPR, PFPR, O("\x66\x0F\x7E"), EN_RRX}, /* MOVD/Q r64/32, xmm */
- };
- static const uchar k2off[] = {
- [KI32] = 0,
- [KI64] = 1, [KPTR] = 1,
- [KF32] = 7,
- [KF64] = 10,
- };
- encode(pcode, all + k2off[k], countof(all) - k2off[k], k, dst, src);
-}
-DEFINSTR2(Xmovsxl,
- {8, PGPR, PMEM, O("\x63"), EN_RM}, /* MOVSXD r64, m32 */
- {8, PGPR, PGPR, O("\x63"), EN_RR}, /* MOVSXD r64, r32 */
- {4, PGPR, PMEM, O("\x8B"), EN_RM}, /* MOV r32, m32 */
- {4, PGPR, PGPR, O("\x8B"), EN_RR}, /* MOV r32, r32 */
-)
-DEFINSTR2(Xmovsxw,
- {4|8, PGPR, PMEM, O("\x0F\xBF"), EN_RM}, /* MOVSX r64, m16 */
- {4|8, PGPR, PGPR, O("\x0F\xBF"), EN_RR}, /* MOVSX r64, r16 */
-)
-DEFINSTR2(Xmovsxb,
- {4|8, PGPR, PMEM, O("\x0F\xBE"), EN_RM}, /* MOVSX r64, m8 */
- {4|8, PGPR, PGPR, O("\x0F\xBE"), EN_RR, .r8=1}, /* MOVSX r64, r8 */
-)
-DEFINSTR2(Xmovzxw,
- {4|8, PGPR, PMEM, O("\x0F\xB7"), EN_RM}, /* MOVZX r64, m16 */
- {4|8, PGPR, PGPR, O("\x0F\xB7"), EN_RR}, /* MOVZX r64, r16 */
-)
-DEFINSTR2(Xmovzxb,
- {4|8, PGPR, PMEM, O("\x0F\xB6"), EN_RM}, /* MOVZX r64, m8 */
- {4|8, PGPR, PGPR, O("\x0F\xB6"), EN_RR, .r8=1}, /* MOVZX r64, r8 */
-)
-DEFINSTR2(Xmovaps,
- {-1, PMEM, PFPR, O("\x0F\x29"), EN_MR}, /* MOVAPS mem, xmm */
-)
-DEFINSTR2(Xxchg,
- {4|8, PGPR, PGPR, O("\x87"), EN_RR}, /* XCHG r32/64, r32/64 */
- {4|8, PGPR, PMEM, O("\x87"), EN_RM}, /* XCHG r32/64, m32/64 */
- {4|8, PMEM, PGPR, O("\x87"), EN_MR}, /* XCHG r32/64, m32/64 */
-)
-DEFINSTR2(Xlea,
- {4|8, PGPR, PMEM, O("\x8D"), EN_RM}, /* LEA r32/64,m32/64 */
- { 8, PGPR, PSYM, O("\x8D"), EN_RM}, /* LEA rel32 */
-)
-DEFINSTR2(Xadd,
- {4|8, PGPR, PGPR, O("\x03"), EN_RR}, /* ADD r32/64, r32/64 */
- {4|8, PGPR, P1, O("\xFF"), EN_R, .ext=0}, /* INC r32/64 */
- {4|8, PGPR, PN1, O("\xFF"), EN_R, .ext=1}, /* DEC r32/64 */
- {4|8, PGPR, PI8, O("\x83"), EN_RI8}, /* ADD r32/64, imm8 */
- {4|8, PRAX, PI32, O("\x05"), EN_I32}, /* ADD eax/rax, imm */
- {4|8, PGPR, PI32, O("\x81"), EN_RI32}, /* ADD r32/64, imm */
- { 8, PGPR, PMEM, O("\x03"), EN_RM}, /* ADD r64, m64 */
-)
-DEFINSTR2(Xaddf,
- {4, PFPR, PFPR, O("\xF3\x0F\x58"), EN_RR}, /* ADDSS xmm, xmm */
- {8, PFPR, PFPR, O("\xF2\x0F\x58"), EN_RR}, /* ADDSD xmm, xmm */
- {4, PFPR, PMEM, O("\xF3\x0F\x58"), EN_RM}, /* ADDSS xmm, m32 */
- {8, PFPR, PMEM, O("\xF2\x0F\x58"), EN_RM}, /* ADDSD xmm, m64 */
-)
-DEFINSTR2(Xsub,
- {4|8, PGPR, PGPR, O("\x2B"), EN_RR}, /* SUB r32/64, r32/64 */
- {4|8, PGPR, P1, O("\xFF"), EN_R, .ext=1}, /* DEC r32/64 */
- {4|8, PGPR, PN1, O("\xFF"), EN_R, .ext=0}, /* INC r32/64 */
- {4|8, PGPR, PI8, O("\x83"), EN_RI8, .ext=5}, /* SUB r32/64, imm8 */
- {4|8, PRAX, PI32, O("\x2D"), EN_I32}, /* SUB eax/rax, imm */
- {4|8, PGPR, PI32, O("\x81"), EN_RI32, .ext=5}, /* SUB r32/64, imm */
- { 8, PGPR, PMEM, O("\x2B"), EN_RM}, /* SUB r64, m64 */
-)
-DEFINSTR2(Xsubf,
- {4, PFPR, PFPR, O("\xF3\x0F\x5C"), EN_RR}, /* SUBSS xmm, xmm */
- {8, PFPR, PFPR, O("\xF2\x0F\x5C"), EN_RR}, /* SUBSD xmm, xmm */
- {4, PFPR, PMEM, O("\xF3\x0F\x5C"), EN_RM}, /* SUBSS xmm, m32 */
- {8, PFPR, PMEM, O("\xF2\x0F\x5C"), EN_RM}, /* SUBSD xmm, m64 */
-)
-DEFINSTR2(Xmulf,
- {4, PFPR, PFPR, O("\xF3\x0F\x59"), EN_RR}, /* MULSS xmm, xmm */
- {8, PFPR, PFPR, O("\xF2\x0F\x59"), EN_RR}, /* MULSD xmm, xmm */
- {4, PFPR, PMEM, O("\xF3\x0F\x59"), EN_RM}, /* MULSS xmm, m32 */
- {8, PFPR, PMEM, O("\xF2\x0F\x59"), EN_RM}, /* MULSD xmm, m64 */
-)
-DEFINSTR2(Xdivf,
- {4, PFPR, PFPR, O("\xF3\x0F\x5E"), EN_RR}, /* DIVSS xmm, xmm */
- {8, PFPR, PFPR, O("\xF2\x0F\x5E"), EN_RR}, /* DIVSD xmm, xmm */
- {4, PFPR, PMEM, O("\xF3\x0F\x5E"), EN_RM}, /* DIVSS xmm, m32 */
- {8, PFPR, PMEM, O("\xF2\x0F\x5E"), EN_RM}, /* DIVSD xmm, m64 */
-)
-DEFINSTR2(Xand,
- {4|8, PGPR, PGPR, O("\x23"), EN_RR}, /* AND r32/64, r32/64 */
- {4|8, PGPR, PI8, O("\x83"), EN_RI8, .ext=4}, /* AND r32/64, imm8 */
- {4|8, PRAX, PI32, O("\x25"), EN_I32}, /* AND eax/rax, imm */
- {4|8, PGPR, PI32, O("\x81"), EN_RI32, .ext=4}, /* AND r32/64, imm */
- { 8, PGPR, PMEM, O("\x23"), EN_RM}, /* AND r64, m64 */
-)
-DEFINSTR2(Xior,
- {4|8, PGPR, PGPR, O("\x0B"), EN_RR}, /* OR r32/64, r32/64 */
- {4|8, PGPR, PI8, O("\x83"), EN_RI8, .ext=1}, /* OR r32/64, imm8 */
- {4|8, PRAX, PI32, O("\x0D"), EN_I32}, /* OR eax/rax, imm */
- {4|8, PGPR, PI32, O("\x81"), EN_RI32, .ext=1}, /* OR r32/64, imm */
- { 8, PGPR, PMEM, O("\x0B"), EN_RM}, /* OR r64, m64 */
- {4|8, PFPR, PFPR, O("\x0F\x57"), EN_RR}, /* ORPS xmm, xmm */
-)
-DEFINSTR2(Xxor,
- {4|8, PGPR, PGPR, O("\x33"), EN_RR}, /* XOR r32/64, r32/64 */
- {4|8, PGPR, PI8, O("\x83"), EN_RI8, .ext=6}, /* XOR r32/64, imm8 */
- {4|8, PRAX, PI32, O("\x35"), EN_I32}, /* XOR eax/rax, imm */
- {4|8, PGPR, PI32, O("\x81"), EN_RI32, .ext=6}, /* XOR r32/64, imm */
- { 8, PGPR, PMEM, O("\x33"), EN_RM}, /* XOR r64, m64 */
- {4|8, PFPR, PFPR, O("\x0F\x57"), EN_RR}, /* XORPS xmm, xmm */
- {4|8, PFPR, PMEM, O("\x0F\x57"), EN_RM}, /* XORPS xmm, m128 */
-)
-DEFINSTR2(Xshl,
- {4|8, PGPR, P1, O("\xD1"), EN_R, .ext=4}, /* SHL r32/64, 1 */
- {4|8, PGPR, PI32, O("\xC1"), EN_RI8, .ext=4}, /* SHL r32/64, imm */
- {4|8, PGPR, PRCX, O("\xD3"), EN_R, .ext=4}, /* SHL r32/64, CL */
-)
-DEFINSTR2(Xsar,
- {4|8, PGPR, P1, O("\xD1"), EN_R, .ext=7}, /* SAR r32/64, 1 */
- {4|8, PGPR, PI32, O("\xC1"), EN_RI8, .ext=7}, /* SAR r32/64, imm */
- {4|8, PGPR, PRCX, O("\xD3"), EN_R, .ext=7}, /* SAR r32/64, CL */
-)
-DEFINSTR2(Xshr,
- {4|8, PGPR, P1, O("\xD1"), EN_R, .ext=5}, /* SHR r32/64, 1 */
- {4|8, PGPR, PI32, O("\xC1"), EN_RI8, .ext=5}, /* SHR r32/64, imm */
- {4|8, PGPR, PRCX, O("\xD3"), EN_R, .ext=5}, /* SHR r32/64, CL */
-)
-DEFINSTR2(Xcvtss2sd,
- {-1, PFPR, PFPR, O("\xF3\x0F\x5A"), EN_RR}, /* CVTSS2SD xmm, xmm */
- {-1, PFPR, PMEM, O("\xF3\x0F\x5A"), EN_RM}, /* CVTSS2SD xmm, m32/64 */
-)
-DEFINSTR2(Xcvtsd2ss,
- {-1, PFPR, PFPR, O("\xF2\x0F\x5A"), EN_RR}, /* CVTSD2SS xmm, xmm */
- {-1, PFPR, PMEM, O("\xF2\x0F\x5A"), EN_RM}, /* CVTSD2SS xmm, m32/64 */
-)
-DEFINSTR2(Xcvtsi2ss,
- {-1, PFPR, PGPR, O("\xF3\x0F\x2A"), EN_RR}, /* CVTSI2SS xmm, r32/64 */
- {-1, PFPR, PMEM, O("\xF3\x0F\x2A"), EN_RM}, /* CVTSI2SS xmm, m32/64 */
-)
-DEFINSTR2(Xcvtsi2sd,
- {-1, PFPR, PGPR, O("\xF2\x0F\x2A"), EN_RR}, /* CVTSI2SD xmm, r32/64 */
- {-1, PFPR, PMEM, O("\xF2\x0F\x2A"), EN_RM}, /* CVTSI2SD xmm, m32/64 */
-)
-DEFINSTR2(Xcvttss2si,
- {-1, PGPR, PFPR, O("\xF3\x0F\x2C"), EN_RR}, /* CVTTSS2SI r32/64, xmm */
- {-1, PGPR, PMEM, O("\xF3\x0F\x2C"), EN_RM}, /* CVTTSS2SI r32/64, m32 */
-)
-DEFINSTR2(Xcvttsd2si,
- {-1, PGPR, PFPR, O("\xF2\x0F\x2C"), EN_RR}, /* CVTTSD2SI r32/64, xmm */
- {-1, PGPR, PMEM, O("\xF2\x0F\x2C"), EN_RM}, /* CVTTSD2SI r32/64, m32 */
-)
-DEFINSTR1(Xneg,
- {4|8, PGPR, 0, O("\xF7"), EN_R, .ext=3} /* NEG r32/64 */
-)
-DEFINSTR1(Xnot,
- {4|8, PGPR, 0, O("\xF7"), EN_R, .ext=2} /* NOT r32/64 */
-)
-DEFINSTR1(Xidiv,
- {4|8, PGPR, 0, O("\xF7"), EN_R, .ext=7}, /* IDIV r32/64 */
- {4|8, PMEM, 0, O("\xF7"), EN_M, .ext=7}, /* IDIV m32/64 */
-)
-DEFINSTR1(Xdiv,
- {4|8, PGPR, 0, O("\xF7"), EN_R, .ext=6}, /* DIV r32/64 */
- {4|8, PMEM, 0, O("\xF7"), EN_M, .ext=6}, /* DIV m32/64 */
-)
-DEFINSTR1(Xcall,
- {-1, PSYM, 0, O("\xE8"), EN_R32, .norexw=1}, /* CALL rel32 */
- {-1, PGPR, 0, O("\xFF"), EN_R, .ext=2, .norexw=1}, /* CALL r64 */
- {-1, PMEM, 0, O("\xFF"), EN_M, .ext=2, .norexw=1}, /* CALL m64 */
-)
-DEFINSTR2(Xcmp,
- {4|8, PGPR, PGPR, O("\x3B"), EN_RR}, /* CMP r32/64, r32/64 */
- {4|8, PGPR, PI8, O("\x83"), EN_RI8, .ext=7}, /* CMP r32/64, imm8 */
- {4|8, PRAX, PI32, O("\x3D"), EN_I32}, /* CMP eax/rax, imm */
- {4|8, PGPR, PI32, O("\x81"), EN_RI32, .ext=7}, /* CMP r32/64, imm */
- { 8, PGPR, PMEM, O("\x3B"), EN_RM}, /* CMP r64, m64 */
- {4 , PFPR, PFPR, O("\x0F\x2E"), EN_RR}, /* UCOMISS xmm, xmm */
- {4 , PFPR, PMEM, O("\x0F\x2E"), EN_RM}, /* UCOMISS xmm, m32 */
- { 8, PFPR, PFPR, O("\x66\x0F\x2E"), EN_RR}, /* UCOMISD xmm, xmm */
- { 8, PFPR, PMEM, O("\x66\x0F\x2E"), EN_RM}, /* UCOMISD xmm, m64 */
-)
-DEFINSTR2(Xtest,
- {4|8, PRAX, PI8, O("\xA8"), EN_I8}, /* TEST AL, imm8 */
- {4, PRAX, PI32, O("\xA9"), EN_I32}, /* TEST EAX, imm32 */
- { 8, PRAX, PU32, O("\xA9"), EN_I32}, /* TEST EAX, imm32 */
- { 8, PRAX, PI32, O("\xA9"), EN_I32}, /* TEST RAX, imm32 */
- {4|8, PGPR, PI8, O("\xF6"), EN_RI8, .r8=1,.norexw=1}, /* TEST r8, imm8 */
- {4|8, PGPR, PI32, O("\xF7"), EN_RI32, .ext=0}, /* TEST r32/64, imm32 */
- {4|8, PGPR, PGPR, O("\x85"), EN_RR}, /* TEST r32/64, r32/64 */
- {4|8, PGPR, PMEM, O("\x85"), EN_RM}, /* TEST r32/64, m32/64 */
-)
-
-DEFINSTR2(Ximul2,
- {4|8, PGPR, PGPR, O("\x0F\xAF"), EN_RR}, /* IMUL r32/64, r32/64 */
- {4|8, PGPR, PMEM, O("\x0F\xAF"), EN_RM}, /* IMUL r32/64, m32/64 */
-)
-static const struct desc imul3_imm8tab[] = {
- {4|8, PGPR, PGPR, O("\x6B"), EN_RR}, /* IMUL r32/64, r32/64, (imm8) */
- {4|8, PGPR, PMEM, O("\x6B"), EN_RM}, /* IMUL r32/64, m32/64, (imm8) */
-}, imul3_imm32tab[] = {
- {4|8, PGPR, PGPR, O("\x69"), EN_RR}, /* IMUL r32/64, r32/64, (imm32) */
- {4|8, PGPR, PMEM, O("\x69"), EN_RM}, /* IMUL r32/64, m32/64, (imm32) */
-};
-#undef O
-static void
-Ximul(uchar **pcode, enum irclass k, struct oper dst, struct oper s1, struct oper s2)
-{
- if (!memcmp(&dst, &s1, sizeof dst) && s2.t != OIMM) {
- Ximul2(pcode, k, dst, s2);
- return;
- }
- assert(s2.t == OIMM);
- if ((uint)(s2.imm + 128) < 256) {
- encode(pcode, imul3_imm8tab, countof(imul3_imm8tab), k, dst, s1);
- B(s2.imm);
- } else {
- encode(pcode, imul3_imm32tab, countof(imul3_imm32tab), k, dst, s1);
- I32(s2.imm);
- }
-}
-
-enum cc {
- CCO = 0x0, /* OF = 1*/
- CCNO = 0x1, /* OF = 0*/
- CCB = 0x2, CCC = 0x2, CCNAE = 0x2, /* below; CF = 1; not above or equal */
- CCAE = 0x3, CCNB = 0x3, CCNC = 0x3, /* above or equal; not below; CF = 0 */
- CCE = 0x4, CCZ = 0x4, /* equal; ZF = 1 */
- CCNE = 0x5, CCNZ = 0x5, /* not equal; ZF = 0 */
- CCBE = 0x6, CCNA = 0x6, /* below or equal; not above; CF=1 or ZF=1 */
- CCA = 0x7, CCNBE = 0x7, /* above; not below or equal; CF=0 and ZF=0 */
- CCS = 0x8, /* ZS = 1; negative */
- CCNS = 0x9, /* ZS = 0; non-negative */
- CCP = 0xA, CCPE = 0xA, /* PF = 1; parity even */
- CCNP = 0xB, CCPO = 0xB, /* PF = 0; parity odd */
- CCL = 0xC, CCNGE = 0xC, /* lower; not greater or equal; SF != OF */
- CCGE = 0xD, CCNL = 0xD, /* greater or equal; not lower; SF == OF */
- CCLE = 0xE, CCNG = 0xE, /* less or equal; not greater; ZF=1 or SF != OF */
- CCG = 0xF, CCNLE = 0xF, /* greater; not less or equal; ZF=0 and SF = OF*/
- ALWAYS,
-};
-
-/* maps blk -> address when resolved; or to linked list of jump displacement
- * relocations */
-static struct blkaddr {
- bool resolved;
- union {
- uint addr;
- uint relreloc;
- };
-} *blkaddr;
-static uint nblkaddr;
-
-static void
-Xjcc(uchar **pcode, enum cc cc, struct block *dst)
-{
- int disp, insaddr = *pcode - objout.textbegin;
- bool rel8 = 0;
-
- if (blkaddr[dst->id].resolved) {
- disp = blkaddr[dst->id].addr - (insaddr + 2);
- if ((uint)(disp + 128) < 256) /* can use 1-byte displacement? */
- rel8 = 1;
- else { /* otherwise 4-byte displacement */
- disp -= 3;
- disp -= cc != ALWAYS; /* 'Jcc rel32' has 2 opcode bytes */
- }
- } else {
- disp = blkaddr[dst->id].relreloc;
- blkaddr[dst->id].relreloc = insaddr + 1 + (cc != ALWAYS);
- }
- if (cc == ALWAYS) {
- B(rel8 ? 0xEB : 0xE9); /* JMP rel8/rel32 */
- } else {
- assert(in_range(cc, 0, 0xF));
- if (rel8) B(0x70 + cc); /* Jcc rel8 */
- else B(0x0F), B(0x80 + cc); /* Jcc rel32 */
- }
- if (rel8) B(disp); else I32(disp);
-}
-
-static void
-Xsetcc(uchar **pcode, enum cc cc, enum reg reg)
-{
- int rex = 0;
- assert(in_range(cc, 0x0, 0xF));
- assert(in_range(reg, RAX, R15));
-
- if (in_range(reg, RSP, RDI)) rex = 0x40;
- rex |= (reg >> 3); /* REX.B */
- if (rex) B(rex | 0x40);
- B(0x0F), B(0x90+cc); /* SETcc */
- B(0xC0 + (reg & 7)); /* ModR/M with mod=11, rm=reg */
-}
-
-static void
-Xpush(uchar **pcode, enum reg reg)
-{
- if (in_range(reg, RAX, R15)) {
- if (reg >> 3) B(0x41); /* REX.B */
- B(0x50 + (reg & 7)); /* PUSH reg */
- } else {
- assert(in_range(reg, XMM0, XMM15));
- DS("\x48\x8d\x64\x24\xF8"); /* LEA RSP, [RSP-8] */
- Xmov(pcode, KF64, mkoper(OMEM, .base = RSP, .index = NOINDEX), reg2oper(reg)); /* MOVD [rsp],xmm0 */
- }
-}
-
-static void
-Xpop(uchar **pcode, enum reg reg)
-{
- if (in_range(reg, RAX, R15)) {
- if (reg >> 3) B(0x41); /* REX.B */
- B(0x58 + (reg & 7)); /* POP reg */
- } else {
- assert(in_range(reg, XMM0, XMM15));
- Xmov(pcode, KF64, reg2oper(reg), mkoper(OMEM, .base = RSP, .index = NOINDEX)); /* MOVD xmm0,[rsp] */
- DS("\x48\x8d\x64\x24\x08"); /* LEA RSP, [RSP+8] */
- }
-}
-
-/* are flags live at given instruction? */
-static bool
-flagslivep(struct block *blk, int curi)
-{
- int cmpi;
- /* conditional branch that references a previous comparison instruction? */
- if (blk->jmp.t != Jb || !blk->jmp.arg[0].bits)
- return 0;
- assert(blk->jmp.arg[0].t == RTMP);
- cmpi = blk->jmp.arg[1].i;
- for (int i = blk->ins.n - 1; i > curi; --i) {
- if (blk->ins.p[i] == cmpi)
- /* flags defined after given instruction, dead here */
- return 0;
- }
- /* flags defined before given instruction, live here */
- return 1;
-}
-
-/* Copy dst = val, with some peephole optimizations */
-static void
-gencopy(uchar **pcode, enum irclass cls, struct block *blk, int curi, struct oper dst, union ref val)
-{
- assert(dst.t == OREG);
- if (val.bits == UNDREF.bits) {
- /* can be generated by ssa construction, since value is undefined no move is needed */
- return;
- }
- if (val.t == RADDR) {
- /* this is a LEA, but maybe it can be lowered to a 2-address instruction,
- * which may clobber flags */
- const struct addr *addr = &addrht[val.i];
- if (flagslivep(blk, curi)) goto Lea;
- if (addr->base.t != RREG) goto Lea;
- if (addr->base.bits && dst.reg == mkregoper(addr->base).reg) { /* base = dst */
- if (addr->index.bits && !addr->disp && !addr->shift){
- /* lea Rx, [Rx + Ry] -> add Rx, Ry */
- Xadd(pcode, cls, dst, mkregoper(addr->index));
- return;
- } else if (!addr->index.bits) {
- if (!addr->disp) /* lea Rx, [Rx] -> mov Rx, Rx */
- Xmov(pcode, cls, dst, dst);
- else /* lea Rx, [Rx + Imm] -> add Rx, Imm */
- Xadd(pcode, cls, dst, mkoper(OIMM, .imm = addr->disp));
- return;
- }
- } else if (addr->index.bits && dst.reg == mkregoper(addr->index).reg) { /* index = dst */
- if (addr->base.bits && !addr->disp && !addr->shift) {
- /* lea Rx, [Ry + Rx] -> add Rx, Ry */
- Xadd(pcode, cls, dst, mkregoper(addr->base));
- return;
- } else if (!addr->base.bits) {
- if (!addr->disp && !addr->shift) /* lea Rx, [Rx] -> mov Rx, Rx */
- Xmov(pcode, cls, dst, dst);
- else if (!addr->shift) /* lea Rx, [Rx + Imm] -> add Rx, Imm */
- Xadd(pcode, cls, dst, mkoper(OIMM, .imm = addr->disp));
- else if (!addr->disp) /* lea Rx, [Rx LSL s] -> shl Rx, s */
- Xshl(pcode, cls, dst, mkoper(OIMM, .imm = addr->shift));
- else
- goto Lea;
- return;
- }
- }
- /* normal (not 2-address) case */
- Lea:
- if (isaddrcon(addr->base,0) && ccopt.pic) {
- assert(!addr->disp && !addr->index.bits);
- val = addr->base;
- goto GOTLoad;
- }
- Xlea(pcode, cls, dst, ref2oper(val));
- } else if (val.bits == ZEROREF.bits && dst.t == OREG && (kisflt(cls) || !flagslivep(blk, curi))) {
- /* dst = 0 -> xor dst, dst; but only if it is ok to clobber flags */
- Xxor(pcode, kisint(cls) ? KI32 : cls, dst, dst);
- } else if (isaddrcon(val,0)) {
- if (ccopt.pic) GOTLoad:
- /* for mov reg, [rip(sym@GOTPCREL)] */
- Xmov(pcode, cls, dst, mkoper(OSYM, .con = val.i, .cindex = NOINDEX));
- else
- /* for lea reg, [rip(sym)] */
- Xlea(pcode, cls, dst, mkoper(OSYM, .con = val.i, .cindex = NOINDEX));
- } else if (val.t == RXCON && in_range(concls(val), KI64, KPTR)) {
- /* movabs */
- assert(dst.t == OREG && in_range(dst.reg, RAX, R15));
- B(0x48 | (dst.reg >> 3)); /* REX.W (+ REX.B) */
- B(0xB8 + (dst.reg & 0x7)); /* MOVABS r64, */
- wr64le(*pcode, intconval(val)); /* imm64 */
- *pcode += 8;
- } else {
- struct oper src = mkimmdatregoper(val);
- if (memcmp(&dst, &src, sizeof dst) != 0)
- Xmov(pcode, cls == KF64 && src.t == OREG && src.reg < XMM0 ? KI64 : cls, dst, src);
- }
-}
-
-static void
-Xvaprologue(uchar **pcode, struct function *fn, struct oper sav)
-{
- uint gpr0 = 0, fpr0 = 0, jmpaddr;
- for (int i = 0; i < fn->nabiarg; ++i) {
- struct abiarg abi = fn->abiarg[i];
- if (!abi.isstk) {
- if (abi.reg < XMM0) ++gpr0;
- else ++fpr0;
- }
- }
- assert(sav.t == OMEM && sav.base == RBP);
- /* save GPRS */
- for (int r = 0; r < 6; ++r) {
- static const char reg[] = {RDI,RSI,RDX,RCX,R8,R9};
- if (r >= gpr0)
- Xmov(pcode, KI64, sav, reg2oper(reg[r]));
- sav.disp += 8;
- }
-
- /* save FPRs, but only if al is non zero */
- if (fpr0 < 8) {
- DS("\x84\xC0"); /* TEST al,al */
- jmpaddr = *pcode - objout.textbegin;
- DS("\x74\xFE"); /* JE rel8 */
- }
- for (int r = 0; r < 8; ++r) {
- if (r >= fpr0)
- Xmovaps(pcode, KF64, sav, reg2oper(XMM0 + r));
- sav.disp += 16;
- }
- if (fpr0 < 8) {/* patch relative jump */
- int off = (*pcode - objout.textbegin) - jmpaddr - 2;
- objout.textbegin[jmpaddr+1] = off;
- }
-}
-
-/* condition code for CMP */
-static const uchar icmpop2cc[] = {
- [Oequ] = CCE, [Oneq] = CCNE,
- [Olth] = CCL, [Ogth] = CCG, [Olte] = CCLE, [Ogte] = CCGE,
- [Oulth] = CCB, [Ougth] = CCA, [Oulte] = CCBE, [Ougte] = CCAE,
- [Oand] = CCNE, [Osub] = CCNE,
-}, fcmpop2cc[] = {
- [Oequ] = CCE, [Oneq] = CCNE,
- [Olth] = CCB, [Ogth] = CCA, [Olte] = CCBE, [Ogte] = CCAE,
-};
-/* condition code for TEST reg,reg (compare with zero) */
-static const uchar icmpzero2cc[] = {
- [Oequ] = CCE, [Oulte] = CCE,
- [Oneq] = CCNE, [Ougth] = CCNE,
- [Olth] = CCS, [Ogte] = CCNS,
- [Olte] = CCLE, [Ogth] = CCG,
- [Oulth] = CCB, [Ougte] = CCAE, /* actually constants */
-};
-
-static void
-emitinstr(uchar **pcode, struct function *fn, struct block *blk, int curi, struct instr *ins)
-{
- struct oper dst, src;
- bool regzeroed;
- enum irclass cls = ins->cls;
- void (*X)(uchar **, enum irclass, struct oper, struct oper) = NULL;
- void (*X1)(uchar **, enum irclass, struct oper) = NULL;
-
- switch (ins->op) {
- default:
- fatal(NULL, "amd64: in %y; unimplemented instr '%s'", fn->name, opnames[ins->op]);
- case Onop: break;
- case Ostore8: cls = KI32, X = Xmovb; goto Store;
- case Ostore16: cls = KI32, X = Xmovw; goto Store;
- case Ostore32: cls = KI32, X = Xmov; goto Store;
- case Ostore64: cls = KI64, X = Xmov;
- Store:
- src = mkimmregoper(ins->r);
- if (cls == KI32 && src.t == OREG && src.reg >= XMM0) cls = KF32;
- if (cls == KI64 && src.t == OREG && src.reg >= XMM0) cls = KF64;
- X(pcode, cls, mkmemoper(ins->l), src);
- break;
- case Oexts8: src = mkregoper(ins->l); goto Movsxb;
- case Oextu8: src = mkregoper(ins->l); goto Movzxb;
- case Oexts16: src = mkregoper(ins->l); goto Movsxw;
- case Oextu16: src = mkregoper(ins->l); goto Movzxw;
- case Oexts32: src = mkregoper(ins->l); goto Movsxl;
- case Oextu32: src = mkregoper(ins->l); goto Movzxl;
- case Oloads8: src = mkmemoper(ins->l); Movsxb: Xmovsxb(pcode, cls, reg2oper(ins->reg-1), src); break;
- case Oloadu8: src = mkmemoper(ins->l); Movzxb: Xmovzxb(pcode, cls, reg2oper(ins->reg-1), src); break;
- case Oloads16: src = mkmemoper(ins->l); Movsxw: Xmovsxw(pcode, cls, reg2oper(ins->reg-1), src); break;
- case Oloadu16: src = mkmemoper(ins->l); Movzxw: Xmovzxw(pcode, cls, reg2oper(ins->reg-1), src); break;
- case Oloads32: src = mkmemoper(ins->l); Movsxl: Xmovsxl(pcode, cls, reg2oper(ins->reg-1), src); break;
- case Oloadu32: src = mkmemoper(ins->l); Movzxl: Xmov(pcode, KI32, reg2oper(ins->reg-1), src); break;
- case Oloadf32: case Oloadf64: Xmov(pcode, cls, reg2oper(ins->reg-1), mkmemoper(ins->l)); break;
- case Oloadi64: Xmov(pcode, KI64, reg2oper(ins->reg-1), mkmemoper(ins->l)); break;
- case Ocvtf32f64: X = Xcvtss2sd; goto FloatsCvt;
- case Ocvtf64f32: X = Xcvtsd2ss; goto FloatsCvt;
- case Ocvtf32s: X = Xcvttss2si; goto FloatsCvt;
- case Ocvtf64s: X = Xcvttsd2si; goto FloatsCvt;
- case Ocvts32f: X = cls == KF32 ? Xcvtsi2ss : Xcvtsi2sd; cls = KI32; goto FloatsCvt;
- case Ocvts64f: X = cls == KF32 ? Xcvtsi2ss : Xcvtsi2sd; cls = KI64; goto FloatsCvt;
- FloatsCvt:
- X(pcode, cls, reg2oper(ins->reg-1), mkdatregoper(ins->l));
- break;
- case Oadd:
- dst = mkregoper(ins->l);
- if (kisflt(cls)) {
- Xaddf(pcode, cls, dst, mkimmdatregoper(ins->r));
- } else if (ins->reg-1 == dst.reg) { /* two-address add */
- src = ref2oper(ins->r);
- if (src.t == OIMM && src.imm < 0) /* ADD -imm -> SUB imm, for niceness */
- Xsub(pcode, cls, dst, (src.imm = -src.imm, src));
- else
- Xadd(pcode, cls, dst, src);
- } else if (isregref(ins->r) && ins->reg-1 == mkregoper(ins->r).reg) {
- /* also two-address after swapping operands */
- Xadd(pcode, cls, reg2oper(ins->reg-1), mkimmdatregoper(ins->l));
- } else { /* three-address add (lea) */
- struct oper mem = { OMEM, .base = NOBASE, .index = NOINDEX };
- dst = reg2oper(ins->reg-1);
- addmemoper(&mem, ref2oper(ins->l));
- addmemoper(&mem, ref2oper(ins->r));
- Xlea(pcode, cls, dst, mem);
- }
- break;
- case Osub:
- dst = mkregoper(ins->l);
- if (kisflt(cls)) {
- Xsubf(pcode, cls, dst, mkimmdatregoper(ins->r));
- } else if (ins->reg-1 == dst.reg) { /* two-address */
- Xsub(pcode, cls, dst, ref2oper(ins->r));
- } else {
- assert(isintcon(ins->r));
- Xlea(pcode, cls, reg2oper(ins->reg-1),
- mkoper(OMEM, .base = mkregoper(ins->l).reg, .index = NOINDEX, .disp = -intconval(ins->r)));
- }
- break;
- case Oshl: X = Xshl; goto ALU2;
- case Osar: X = Xsar; goto ALU2;
- case Oslr: X = Xshr; goto ALU2;
- case Oand:
- if (!ins->reg) {
- Xtest(pcode, cls, mkregoper(ins->l), mkimmdatregoper(ins->r));
- break;
- }
- X = Xand;
- goto ALU2;
- case Oxor: X = Xxor; goto ALU2;
- case Oior: X = Xior; goto ALU2;
- ALU2:
- dst = mkregoper(ins->l);
- assert(ins->reg-1 == dst.reg);
- X(pcode, cls, dst, mkimmdatregoper(ins->r));
- break;
- case Oneg: X1 = Xneg; goto ALU1;
- case Onot: X1 = Xnot; goto ALU1;
- ALU1:
- dst = mkregoper(ins->l);
- assert(ins->reg-1 == dst.reg);
- X1(pcode, cls, dst);
- break;
- case Omul:
- if (kisint(cls))
- Ximul(pcode, cls, reg2oper(ins->reg-1), ref2oper(ins->l), ref2oper(ins->r));
- else
- Xmulf(pcode, cls, reg2oper(ins->reg-1), ref2oper(ins->r));
- break;
- case Odiv:
- switch (cls) {
- default: assert(0);
- case KPTR:
- case KI64: B(0x48); /* REX.W */
- case KI32: B(0x99); /* CDQ/CQO */
- assert(mkregoper(ins->l).reg == RAX);
- Xidiv(pcode, cls, mkdatregoper(ins->r));
- break;
- case KF32: case KF64:
- Xdivf(pcode, cls, reg2oper(ins->reg-1), mkdatregoper(ins->r));
- break;
- }
- break;
- case Oudiv:
- DS("\x31\xD2"); /* XOR EDX,EDX */
- assert(mkregoper(ins->l).reg == RAX);
- Xdiv(pcode, cls, mkdatregoper(ins->r));
- break;
- case Oequ: case Oneq:
- case Olth: case Ogth: case Olte: case Ogte:
- case Oulth: case Ougth: case Oulte: case Ougte:
- dst = mkregoper(ins->l);
- src = ref2oper(ins->r);
- regzeroed = 0;
- if (ins->reg && dst.reg != ins->reg-1 && (src.t != OREG || src.reg != ins->reg-1)) {
- /* can zero output reg before test instruction (differs from both inputs) */
- /* XXX this doesn't check if a source operand is an addr containing the register */
- struct oper dst = reg2oper(ins->reg-1);
- Xxor(pcode, KI32, dst, dst);
- regzeroed = 1;
- }
- if (kisint(ins->cls) && ins->r.bits == ZEROREF.bits)
- Xtest(pcode, cls, dst, dst);
- else
- Xcmp(pcode, cls, dst, src);
- if (ins->reg) {
- enum cc cc;
- dst = reg2oper(ins->reg-1);
- if (ins->r.bits != ZEROREF.bits) { /* CMP */
- cc = (kisint(ins->cls) ? icmpop2cc : fcmpop2cc)[ins->op];
- } else { /* TEST r,r (CMP r, 0) */
- assert(kisint(ins->cls));
- cc = icmpzero2cc[ins->op];
- }
- if (kisflt(ins->cls)) { /* handle float unordered result */
- int unordres = ins->op == Oneq ? 1 : 0;
- int rex = 0;
- if (in_range(dst.reg, RSP, RDI)) rex = 0x40;
- rex |= (dst.reg >> 3); /* REX.B */
- int jpoff = 3 + (rex != 0);
- if (regzeroed && unordres == 0) {
- /* if cmp unordered, just jump over the SETcc; result reg was already zeroed */
- B(0x7A), B(jpoff); /* JP <off> */
- } else {
- /* JNP .a
- * MOV r8, 0/1
- * JMP .b
- * .a: SETcc r8
- * .b: MOVZX r, r8
- */
- B(0x7B), B(jpoff+1); /* JNP <off> */
- if (rex) B(rex | 0x40);
- B(0xB0 + (dst.reg & 7)), B(unordres); /* MOV r8, 0/1 */
- B(0xEB), B(jpoff); /* JMP <off> */
- }
- }
- Xsetcc(pcode, cc, dst.reg);
- if (!regzeroed)
- Xmovzxb(pcode, KI32, dst, dst);
- }
- break;
- case Omove:
- dst = ref2oper(ins->l);
- gencopy(pcode, cls, blk, curi, dst, ins->r);
- break;
- case Ocopy:
- dst = reg2oper(ins->reg-1);
- gencopy(pcode, cls, blk, curi, dst, ins->l);
- break;
- case Oswap:
- if (kisint(cls))
- Xxchg(pcode, cls, ref2oper(ins->l), mkregoper(ins->r));
- else {
- struct oper l = mkregoper(ins->l), r = mkregoper(ins->r);
- Xxor(pcode, cls, l, r);
- Xxor(pcode, cls, r, l);
- Xxor(pcode, cls, l, r);
- }
- break;
- case Oxsave:
- Xpush(pcode, mkregoper(ins->l).reg);
- break;
- case Oxrestore:
- Xpop(pcode, mkregoper(ins->l).reg);
- break;
- case Ocall:
- if (calltab.p[ins->r.i].vararg >= 0) {
- struct call *call = &calltab.p[ins->r.i];
- /* variadic functions need the caller to write num of args in sse regs to %al */
- int n = 0;
- for (int i = 0; i < call->narg; ++i)
- if (!call->abiarg[i].isstk && call->abiarg[i].reg >= XMM0)
- ++n;
- if (!n) DS("\x31\xC0"); /* XOR EAX, EAX */
- else B(0xB0), B(n); /* MOV AL, n */
- }
- Xcall(pcode, KPTR, ref2oper(ins->l));
- break;
- case Oxvaprologue:
- Xvaprologue(pcode, fn, mkmemoper(ins->l));
- break;
- }
-}
-
-static void
-emitbranch(uchar **pcode, struct block *blk)
-{
- enum cc cc = ALWAYS;
- assert(blk->s1);
- if (blk->s2) {
- /* conditional branch.. */
- union ref arg = blk->jmp.arg[0];
- struct block *unord = NULL;
- assert(arg.t == RTMP);
- struct instr *ins = &instrtab[arg.i];
- if ((oiscmp(ins->op) || ins->op == Oand || ins->op == Osub)) {
- if (ins->r.bits != ZEROREF.bits) {
- /* for CMP instr */
- cc = (kisint(ins->cls) ? icmpop2cc : fcmpop2cc)[ins->op];
- unord = ins->op == Oneq ? blk->s1 : blk->s2;
- } else {
- assert(kisint(ins->cls));
- /* for TEST instr, which modifies ZF and SF and sets CF = OF = 0 */
- cc = icmpzero2cc[ins->op];
- }
- } else {
- /* implicit by ZF */
- cc = CCNZ;
- }
- if (kisflt(ins->cls)) {
- /* handle float unordered result */
- Xjcc(pcode, CCP, unord);
- }
- if (blk->s1 == blk->lnext) {
- /* if s1 is next adjacent block, swap s1,s2 and flip condition to emit a
- * single jump */
- struct block *tmp = blk->s1;
- blk->s1 = blk->s2;
- blk->s2 = tmp;
- cc ^= 1;
- }
- }
- /* make sure to fallthru if jumping to next adjacent block */
- if (blk->s2 || blk->s1 != blk->lnext)
- Xjcc(pcode, cc, blk->s1);
- if (blk->s2 && blk->s2 != blk->lnext)
- Xjcc(pcode, ALWAYS, blk->s2);
-}
-
-static bool
-calleesave(int *npush, uchar **pcode, struct function *fn)
-{
- bool any = 0;
- if (rstest(fn->regusage, RBX)) {
- Xpush(pcode, RBX);
- ++*npush;
- any = 1;
- }
- for (int r = R12; r <= R15; ++r)
- if (rstest(fn->regusage, r)) {
- Xpush(pcode, r);
- ++*npush;
- any = 1;
- }
- return any;
-}
-
-static void
-calleerestore(uchar **pcode, struct function *fn)
-{
- for (int r = R15; r >= R12; --r)
- if (rstest(fn->regusage, r))
- Xpop(pcode, r);
- if (rstest(fn->regusage, RBX)) Xpop(pcode, RBX);
-}
-
-/* align code using NOPs */
-static void
-nops(uchar **pcode, int align)
-{
- int rem;
- while ((rem = (*pcode - objout.textbegin) & (align - 1)) != 0) {
- switch (align - rem) {
- case 15: case 14: case 13: case 12: case 11: case 10:
- case 9: B(0x66);
- case 8: DS("\x0f\x1f\x84\x00\x00\x00\x00\x00"); break;
- case 7: DS("\x0f\x1f\x80\x00\x00\x00\x00"); break;
- case 6: B(0x66);
- case 5: DS("\x0f\x1f\x44\x00\x00"); break;
- case 4: DS("\x0f\x1f\x40\x00"); break;
- case 3: DS("\x0f\x1f\00"); break;
- case 2: B(0x66);
- case 1: B(0x90); break;
- }
- }
-}
-
-static void
-emitbin(struct function *fn)
-{
- struct block *blk;
- uchar **pcode = &objout.code;
- int npush = 0;
- uint epilogueaddr = 0;
- bool saverestore;
-
- if (nblkaddr < fn->nblk) {
- blkaddr = xrealloc(blkaddr, fn->nblk * sizeof *blkaddr);
- nblkaddr = fn->nblk;
- }
- memset(blkaddr, 0, nblkaddr * sizeof *blkaddr);
-
- nops(pcode, 16);
- fnstart = *pcode;
- curfnsym = fn->name;
-
- /** prologue **/
-
- /* only use frame pointer in non-leaf functions and functions that use the stack */
- usebp = 0;
- if (!fn->isleaf || fn->stksiz) {
- usebp = 1;
- /* push rbp; mov rbp, rsp */
- DS("\x55\x48\x89\xE5");
- }
- saverestore = calleesave(&npush, pcode, fn);
- if (usebp) rbpoff = -npush*8;
-
- /* ensure stack is 16-byte aligned for function calls */
- if (!fn->isleaf && ((fn->stksiz + npush*8) & 0xF) != 0) {
- assert(usebp);
- if ((rbpoff & 0xF) == 0) {
- rbpoff -= 16;
- fn->stksiz += 24;
- } else {
- rbpoff -= 8;
- fn->stksiz += 8;
- }
- }
-
- if (fn->stksiz != 0) {
- /* sub rsp, <stack size> */
- if (fn->stksiz < 128)
- DS("\x48\x83\xEC"), B(fn->stksiz);
- else if (fn->stksiz == 128)
- DS("\x48\x83\xC4\x80"); /* add rsp, -128 */
- else
- DS("\x48\x81\xEC"), I32(fn->stksiz);
- }
-
- blk = fn->entry;
- do {
- struct blkaddr *bb = &blkaddr[blk->id];
- uint bbaddr = *pcode - objout.textbegin;
- assert(!bb->resolved);
- while (bb->relreloc) {
- uint next;
- int disp = bbaddr - bb->relreloc - 4;
-
- memcpy(&next, objout.textbegin + bb->relreloc, 4);
- wr32le(objout.textbegin + bb->relreloc, disp);
- bb->relreloc = next;
- }
- bb->resolved = 1;
- bb->addr = bbaddr;
-
- for (int i = 0; i < blk->ins.n; ++i) {
- emitinstr(pcode, fn, blk, i, &instrtab[blk->ins.p[i]]);
- }
- if (blk->jmp.t == Jret) {
- /* epilogue */
- uint here = *pcode - fnstart;
- if (epilogueaddr) {
- int disp = epilogueaddr - (here + 2);
- if ((uint)(disp + 128) < 256) {/* can use 1-byte displacement? */
- B(0xEB), B(disp); /* JMP rel8 */
- } else {
- B(0xE9), I32(disp - 3); /* JMP rel32 */
- }
- } else {
- if (fn->stksiz && (saverestore || !usebp))
- Xadd(pcode, KPTR, mkoper(OREG, .reg = RSP), mkoper(OIMM, .imm = fn->stksiz));
- if (saverestore) {
- epilogueaddr = here;
- calleerestore(pcode, fn);
- }
- if (usebp) B(0xC9); /* leave */
- B(0xC3); /* ret */
- }
- } else if (blk->jmp.t == Jtrap) {
- DS("\x0F\x0B"); /* UD2 */
- } else emitbranch(pcode, blk);
- } while ((blk = blk->lnext) != fn->entry);
- objdeffunc(fn->name, fn->globl, fnstart - objout.textbegin, *pcode - fnstart);
-}
-
-void
-amd64_emit(struct function *fn)
-{
- fn->stksiz = alignup(fn->stksiz, 8);
- if (fn->stksiz > 1<<24) error(NULL, "'%s' stack frame too big", fn->name);
- emitbin(fn);
-}
-
-/* vim:set ts=3 sw=3 expandtab: */
diff --git a/amd64/isel.c b/amd64/isel.c
deleted file mode 100644
index a0c913c..0000000
--- a/amd64/isel.c
+++ /dev/null
@@ -1,660 +0,0 @@
-#include "all.h"
-#include "../endian.h"
-
-enum flag {
- ZF = 1 << 0,
- SF = 1 << 1,
- CF = 1 << 2,
- OF = 1 << 3,
- CLOBF = 1 << 4,
-};
-
-/* flags modified by each integer op */
-static const uchar opflags[NOPER] = {
- [Oneg] = ZF|CLOBF,
- [Oadd] = ZF|CLOBF,
- [Osub] = ZF|CLOBF,
- [Omul] = CLOBF,
- [Odiv] = CLOBF,
- [Oudiv] = CLOBF,
- [Orem] = CLOBF,
- [Ourem] = CLOBF,
- [Oand] = ZF|CLOBF,
- [Oior] = ZF|CLOBF,
- [Oxor] = ZF|CLOBF,
- [Oshl] = ZF|CLOBF,
- [Osar] = ZF|CLOBF,
- [Oslr] = ZF|CLOBF,
- [Oequ] = ZF|CLOBF,
- [Oneq] = ZF|CLOBF,
- [Olth] = ZF|CLOBF,
- [Ogth] = ZF|CLOBF,
- [Olte] = ZF|CLOBF,
- [Ogte] = ZF|CLOBF,
- [Oulth] = ZF|CLOBF,
- [Ougth] = ZF|CLOBF,
- [Oulte] = ZF|CLOBF,
- [Ougte] = ZF|CLOBF,
- [Ocall] = CLOBF,
-};
-
-static int iflagsrc = -1;
-
-static void
-picfixsym(union ref *r, struct block *blk, int *curi)
-{
- if (!ccopt.pic || !isaddrcon(*r,0)) return;
- *r = insertinstr(blk, (*curi)++, mkinstr(Ocopy, KPTR, .l = *r));
-}
-
-/* map alloca tmp -> stack frame displacement (0 if not alloca) */
-static ushort *stkslots;
-static uint nstkslots;
-
-#define isstkslot(r) ((r).t == RTMP && (r).i < nstkslots && stkslots[(r).i])
-
-static void
-fixarg(union ref *r, struct instr *ins, struct block *blk, int *curi)
-{
- int sh;
- enum op op = ins ? ins->op : 0;
-
- if (r->t == RXCON) {
- struct xcon *con = &conht[r->i];
- if (in_range(op, Oshl, Oslr) && r == &ins->r) {
- sh = con->i;
- goto ShiftImm;
- } else if (in_range(op, Oadd, Osub) && con->i == 2147483648 && r == &ins->r) {
- /* add X, INT32MAX+1 -> sub X, INT32MIN */
- ins->op = Oadd + (op == Oadd);
- *r = mkintcon(KI32, -2147483648);
- } else if (kisflt(con->cls) && con->i == 0) {
- /* copy of positive float zero -> regular zero, that emit() will turn into xor x,x */
- if (in_range(op, Ocopy, Omove) || op == Ophi)
- *r = ZEROREF;
- else
- *r = insertinstr(blk, (*curi)++, mkinstr(Ocopy, con->cls, ZEROREF));
- } else if (con->cls >= KI64) {
- /* float immediates & 64bit immediates are loaded from memory */
- uchar data[8];
- uint ksiz = cls2siz[con->cls];
- union type ctype;
- /* can't use memory arg in rhs if lhs is memory */
- bool docopy = &ins->l != r && (oisstore(ins->op) || ins->l.t == RADDR);
- if (con->cls <= KPTR && in_range(ins->op, Ocopy, Omove)) /* in this case we can use movabs */
- return;
- else if (!docopy || con->cls >= KF32) {
- if (con->cls != KF32) {
- wr64le(data, con->i);
- ctype = mktype(con->cls == KF64 ? TYDOUBLE : TYVLONG);
- } else {
- union { float f; int i; } pun = { con->f };
- wr32le(data, pun.i);
- ctype = mktype(TYFLOAT);
- }
- *r = mkdatref(NULL, ctype, ksiz, /*align*/ksiz, data, ksiz, /*deref*/1);
- }
- if (docopy)
- *r = insertinstr(blk, (*curi)++, mkinstr(Ocopy, con->cls, *r));
- } else if (ins->op != Omove && con->issym && r == &ins->r) {
- *r = insertinstr(blk, (*curi)++, mkinstr(Ocopy, KPTR, mkaddr((struct addr){*r})));
- } else if (in_range(op, Odiv, Ourem) && kisint(ins->cls))
- goto DivImm;
- } else if (r->t == RICON && in_range(op, Odiv, Ourem) && kisint(ins->cls) && r == &ins->r) {
- DivImm: /* there is no division by immediate, must be copied to a register */
- *r = insertinstr(blk, (*curi)++, mkinstr(Ocopy, ins->cls, *r));
- } else if (r->t == RICON && in_range(op, Oshl, Oslr) && r == &ins->r) {
- sh = r->i;
- ShiftImm: /* shift immediate is always 8bit */
- *r = mkref(RICON, sh & 255);
- } else if (isstkslot(*r)) {
- struct instr adr = mkinstr(Oadd, KPTR, mkref(RREG, RBP), mkintcon(KI32, -stkslots[r->i]));
- if (in_range(op, Ocopy, Omove))
- *ins = adr;
- else
- *r = insertinstr(blk, (*curi)++, adr);
- }
- picfixsym(r, blk, curi);
-}
-
-#define isimm32(r) (iscon(r) && concls(r) == KI32)
-
-static void
-selcall(struct function *fn, struct instr *ins, struct block *blk, int *curi)
-{
- const struct call *call = &calltab.p[ins->r.i];
- int iarg = *curi - 1;
- enum irclass cls;
- uint argstksiz = alignup(call->argstksiz, 16);
-
- for (int i = call->narg - 1; i >= 0; --i) {
- struct abiarg abi = call->abiarg[i];
- struct instr *arg;
- for (;; --iarg) {
- assert(iarg >= 0 && i >= 0 && "arg?");
- if ((arg = &instrtab[blk->ins.p[iarg]])->op == Oarg)
- break;
- }
-
- if (!abi.isstk) {
- assert(!abi.ty.isagg);
- *arg = mkinstr(Omove, call->abiarg[i].ty.cls, mkref(RREG, abi.reg), arg->r);
- } else {
- union ref adr = mkaddr((struct addr){mkref(RREG, RSP), .disp = abi.stk});
- int iargsave = iarg;
- if (!abi.ty.isagg) { /* scalar arg in stack */
- *arg = mkinstr(Ostore8+ilog2(cls2siz[abi.ty.cls]), 0, adr, arg->r);
- if (isaddrcon(arg->r,1) || arg->r.t == RADDR)
- arg->r = insertinstr(blk, iarg++, mkinstr(Ocopy, abi.ty.cls, arg->r));
- else
- fixarg(&ins->r, ins, blk, &iarg);
- } else { /* aggregate arg in stack, callee stack frame destination address */
- *arg = mkinstr(Ocopy, KPTR, adr);
- }
- *curi += iarg - iargsave;
- }
- }
- if (call->argstksiz) {
- union ref disp = mkref(RICON, argstksiz);
- insertinstr(blk, iarg--, (struct instr){Osub, KPTR, .keep=1, .reg = RSP+1, .l=mkref(RREG,RSP), disp});
- ++*curi;
- insertinstr(blk, *curi+1, (struct instr){Oadd, KPTR, .keep=1, .reg = RSP+1, .l=mkref(RREG,RSP), disp});
- }
- if (isimm32(ins->l))
- ins->l = mkaddr((struct addr){.base = ins->l});
- else if (isintcon(ins->l))
- ins->l = insertinstr(blk, (*curi)++, mkinstr(Ocopy, KPTR, ins->l));
-
- if (call->vararg >= 0 && ins->l.t == RTMP) {
- /* variadic calls write number of sse regs used to AL, so mark it as clobbered such that
- * the function pointer of an indirect calls does not get allocated to RAX by regalloc */
- insertinstr(blk, (*curi)++, mkinstr(Omove, KPTR, mkref(RREG, RAX), mkref(RREG, RAX)));
- }
- cls = ins->cls;
- ins->cls = 0;
- if (cls) {
- /* duplicate to reuse same TMP ref */
- insertinstr(blk, (*curi)++, *ins);
- *ins = mkinstr(Ocopy, cls, mkref(RREG, call->abiret[0].reg));
- for (int i = 1; i <= 2; ++i) {
- if (*curi + i >= blk->ins.n) break;
- if (instrtab[blk->ins.p[*curi + i]].op == Ocall2r) {
- ins = &instrtab[blk->ins.p[*curi += i]];
- *ins = mkinstr(Ocopy, ins->cls, mkref(RREG, call->abiret[1].reg));
- break;
- }
- }
- }
-}
-
-static bool
-aimm(struct addr *addr, int disp)
-{
- vlong a = addr->disp;
- a += disp;
- if ((int)a == a) {
- addr->disp = a;
- return 1;
- }
- return 0;
-}
-
-static bool
-acon(struct addr *addr, union ref r)
-{
- vlong a = addr->disp;
- assert(isintcon(r));
- a += intconval(r);
- if ((int)a == a) {
- addr->disp = a;
- return 1;
- }
- return 0;
-}
-
-static bool
-ascale(struct addr *addr, union ref a, union ref b)
-{
- if (b.t != RICON) return 0;
- if (addr->index.bits) return 0;
- if ((unsigned)b.i > 3) return 0;
- if (a.t == RREG) {
- Scaled:
- addr->index = a;
- addr->shift = b.i;
- return 1;
- } else if (a.t == RTMP) {
- struct instr *ins = &instrtab[a.i];
- /* factor out shifted immediate from 'shl {add %x, imm}, s' */
- /* XXX maybe we shouldn't do this here because it should be done by a generic
- * arithemetic optimization pass ? */
- if (ins->op == Oadd && (ins->l.t == RREG || ins->l.t == RTMP) && isintcon(ins->r)) {
- vlong a = ((vlong) addr->disp + intconval(ins->r)) * (1 << b.i);
- if (a != (int) a) return 0;
- addr->disp = a;
- addr->index = ins->l;
- addr->shift = b.i;
- return 1;
- } else {
- goto Scaled;
- }
- }
- return 0;
-}
-
-static bool
-aadd(struct addr *addr, struct block *blk, int *curi, union ref r)
-{
- if (isstkslot(r)) {
- if (addr->base.bits || !aimm(addr, -stkslots[r.i])) goto Ref;
- addr->base = mkref(RREG, RBP);
- } else if (r.t == RTMP) {
- struct instr *ins = &instrtab[r.i];
- if (ins->op == Oadd) {
- if (!aadd(addr, blk, curi, ins->l)) goto Ref;
- if (!aadd(addr, blk, curi, ins->r)) goto Ref;
- ins->skip = 1;
- } else if (ins->op == Oshl) {
- if (!ascale(addr, ins->l, ins->r)) goto Ref;
- ins->skip = 1;
- } else if (ins->op == Ocopy && ins->l.t == RADDR) {
- struct addr save = *addr, *addr2 = &addrht[ins->l.i];
- if ((!addr2->base.bits || aadd(addr, blk, curi, addr2->base))
- && aimm(addr, addr2->disp)
- && (!addr2->index.bits || ascale(addr, addr2->index, mkref(RICON, addr2->shift))))
- {
- ins->skip = 1;
- } else {
- *addr = save;
- goto Ref;
- }
- } else if (ins->op == Ocopy) {
- if (!aadd(addr, blk, curi, ins->l)) goto Ref;
- ins->skip = 1;
- } else goto Ref;
- } else if (isnumcon(r)) {
- return acon(addr, r);
- } else if (isaddrcon(r,1)) {
- if (!addr->base.bits && !isaddrcon(addr->index,1)) addr->base = r;
- else return 0;
- } else if (r.t == RREG) {
- /* temporaries are single assignment, but register aren't, so they can't be *
- * safely hoisted into an address value, unless they have global lifetime */
- if (!rstest(mctarg->rglob, r.i)) return 0;
- Ref:
- if (isstkslot(r) && (addr->base.bits || addr->index.bits)) {
- r = insertinstr(blk, (*curi)++, mkinstr(Oadd, KPTR, mkref(RREG, RBP), mkref(RICON, -stkslots[r.i])));
- }
- if (!addr->base.bits) addr->base = r;
- else if (!addr->index.bits) addr->index = r;
- else return 0;
- } else return 0;
- return 1;
-}
-
-static bool
-fuseaddr(union ref *r, struct block *blk, int *curi)
-{
- struct addr addr = { 0 };
-
- if (isaddrcon(*r,1)) return 1;
- if (r->t == RADDR) {
- const struct addr *a0 = &addrht[r->i];
- if (aadd(&addr, blk, curi, a0->base)
- && (!addr.index.bits || ascale(&addr, a0->index, mkref(RICON, a0->shift)))
- && aadd(&addr, blk, curi, mkintcon(KPTR, a0->disp))) {
- *r = mkaddr(addr);
- }
- return 1;
- }
- if (r->t != RTMP) return 0;
- if (!aadd(&addr, blk, curi, *r)) return 0;
-
- if (isaddrcon(addr.base,0) && (ccopt.pic || (ccopt.pie && addr.index.bits))) {
- /* pic needs to load from GOT */
- /* pie cannot encode RIP-relative address with index register */
- /* first load symbol address into a temp register */
- union ref temp = mkaddr((struct addr){.base = addr.base, .disp = ccopt.pic ? 0 : addr.disp});
- addr.base = insertinstr(blk, (*curi)++, mkinstr(Ocopy, KPTR, .l = temp));
- if (!ccopt.pic) addr.disp = 0;
- }
-
- if (!addr.base.bits) {
- /* absolute int address in disp */
- if (addr.index.bits) return 0;
- addr.base = mkintcon(KPTR, addr.disp);
- addr.disp = 0;
- }
-
- *r = mkaddr(addr);
- return 1;
-}
-
-/* is add instruction with this arg a candidate to transform into efective addr? */
-static bool
-addarg4addrp(union ref r)
-{
- struct instr *ins;
- if (r.t == RXCON && !conht[r.i].cls && !conht[r.i].deref) return 1; /* sym or dat ref */
- if (r.t != RTMP) return 0;
- if (isstkslot(r)) return 1;
- ins = &instrtab[r.i];
- return ins->op == Oshl || (ins->op == Ocopy && ins->l.t == RADDR) || ins->op == Oadd;
-}
-
-static void
-loadstoreaddr(struct block *blk, union ref *r, int *curi)
-{
- if (isimm32(*r)) {
- *r = mkaddr((struct addr){.base = *r});
- } else if (isaddrcon(*r, 0)) {
- picfixsym(r, blk, curi);
- } else if (r->t == RTMP) {
- if (addarg4addrp(*r)) fuseaddr(r, blk, curi);
- } else if (r->t != RREG) {
- *r = insertinstr(blk, (*curi)++, mkinstr(Ocopy, KPTR, *r));
- }
-}
-
-static bool
-arithfold(struct instr *ins)
-{
- if (isnumcon(ins->l) && (!ins->r.t || isnumcon(ins->r))) {
- union ref r;
- bool ok = ins->r.t ? foldbinop(&r, ins->op, ins->cls, ins->l, ins->r) : foldunop(&r, ins->op, ins->cls, ins->l);
- assert(ok && "fold?");
- *ins = mkinstr(Ocopy, insrescls(*ins), r);
- return 1;
- }
- return 0;
-}
-
-static void
-sel(struct function *fn, struct instr *ins, struct block *blk, int *curi)
-{
- uint siz, alignlog2;
- int t = ins - instrtab;
- struct instr temp = {0};
- enum op op = ins->op;
-
- if (oisarith(ins->op) && arithfold(ins)) {
- fixarg(&ins->l, ins, blk, curi);
- return;
- }
-
- switch (op) {
- default: assert(0);
- case Onop: break;
- case Oalloca1: case Oalloca2: case Oalloca4: case Oalloca8: case Oalloca16:
- alignlog2 = ins->op - Oalloca1;
- assert(ins->l.i > 0);
- siz = ins->l.i << alignlog2;
- fn->stksiz += siz;
- fn->stksiz = alignup(fn->stksiz, 1 << alignlog2);
- if (fn->stksiz > (1<<16)-1) error(NULL, "'%s' stack frame too big", fn->name);
- stkslots[t] = fn->stksiz;
- *ins = mkinstr(Onop,0,);
- break;
- case Oparam:
- assert(ins->l.t == RICON && ins->l.i < fn->nabiarg);
- if (!fn->abiarg[ins->l.i].isstk)
- *ins = mkinstr(Ocopy, ins->cls, mkref(RREG, fn->abiarg[ins->l.i].reg));
- else /* stack */
- *ins = mkinstr(Oadd, KPTR, mkref(RREG, RBP), mkref(RICON, 16+fn->abiarg[ins->l.i].stk));
- break;
- case Oarg:
- fixarg(&ins->r, ins, blk, curi);
- break;
- case Ocall:
- selcall(fn, ins, blk, curi);
- break;
- case Ocall2r: assert(0);
- case Ointrin:
- break;
- case Oshl: case Osar: case Oslr:
- if (!iscon(ins->r)) {
- /* shift amount register is always CL */
- insertinstr(blk, (*curi)++, mkinstr(Omove, KI32, mkref(RREG, RCX), ins->r));
- ins->r = mkref(RREG, RCX);
- }
- goto ALU;
- case Oequ: case Oneq:
- case Olth: case Ogth: case Olte: case Ogte:
- case Oulth: case Ougth: case Oulte: case Ougte:
- if (iscon(ins->l)) {
- /* lth imm, x -> gth x, imm */
- if (!in_range(ins->op, Oequ, Oneq))
- ins->op = ((op - Olth) ^ 1) + Olth;
- rswap(ins->l, ins->r);
- }
- if (ins->l.t != RTMP && ins->l.t != RREG)
- ins->l = insertinstr(blk, (*curi)++, mkinstr(Ocopy, ins->cls, ins->l));
- else
- fixarg(&ins->l, ins, blk, curi);
- fixarg(&ins->r, ins, blk, curi);
- break;
- case Odiv: case Oudiv: case Orem: case Ourem:
- if (kisflt(ins->cls)) goto ALU;
- /* TODO fuse div/rem pair */
-
- /* (I)DIV dividend is always in RDX:RAX, output also in those regs */
- insertinstr(blk, (*curi)++, mkinstr(Omove, ins->cls, mkref(RREG, RAX), ins->l));
- /* mark RDX as clobbered. sign/zero-extending RAX into RDX is handled in emit() */
- insertinstr(blk, (*curi)++, mkinstr(Omove, ins->cls, mkref(RREG, RDX), mkref(RREG, RDX)));
- fixarg(&ins->r, ins, blk, curi); /* make sure rhs is memory or reg */
- ins->l = mkref(RREG, RAX);
- ins->keep = 1;
- if (op == Orem) ins->op = Odiv;
- else if (op == Ourem) ins->op = Oudiv;
- insertinstr(blk, (*curi)++, *ins); /* duplicate ins to reuse tmp ref */
- *ins = mkinstr(Ocopy, ins->cls, mkref(RREG, op < Orem ? RAX : RDX)); /* get output */
- temp = mkinstr(Ocopy, ins->cls, mkref(RREG, op < Orem ? RDX : RAX)); /* clobber other reg*/
- insertinstr(blk, ++(*curi), temp);
- /* swap instrs so that clobber goes first */
- t = blk->ins.p[*curi - 1];
- blk->ins.p[*curi - 1] = blk->ins.p[*curi - 0];
- blk->ins.p[*curi - 0] = t;
- break;
- case Osub:
- if (isintcon(ins->l)) {
- /* sub imm, x -> sub x, imm; neg x */
- fixarg(&ins->l, ins, blk, curi);
- ins->inplace = 1;
- struct instr sub = *ins;
- rswap(sub.l, sub.r);
- ins->op = op = Oneg;
- ins->l = insertinstr(blk, (*curi)++, sub);
- ins->r = NOREF;
- goto ALU;
- } else if (kisint(ins->cls) && isintcon(ins->r)) {
- ins->op = op = Oadd;
- ins->r = mkintcon(concls(ins->r), -intconval(ins->r));
- } else {
- goto ALU;
- }
- /* fallthru */
- case Oadd:
- if (kisint(ins->cls)) {
- if ((addarg4addrp(ins->l) || addarg4addrp(ins->r))) {
- temp.op = Ocopy;
- temp.cls = ins->cls;
- temp.l = mkref(RTMP, t);
- if (fuseaddr(&temp.l, blk, curi)) {
- *ins = temp;
- break;
- }
- }
- }
- /* fallthru */
- case Omul:
- case Oand: case Oxor: case Oior:
- /* commutative ops */
- if (iscon(ins->l))
- rswap(ins->l, ins->r);
- goto ALU;
- case Oneg:
- if (kisflt(ins->cls)) {
- /* flip sign bit with XORPS/D */
- static const uvlong sd[2] = {0x8000000000000000,0x8000000000000000};
- static const uint sf[4] = {0x80000000,80000000,0x80000000,80000000};
- ins->op = Oxor;
- ins->r = mkdatref(NULL, mktype(ins->cls == KF32 ? TYFLOAT : TYDOUBLE), /*siz*/16,
- /*align*/16, ins->cls == KF32 ? (void *)sf : sd, /*siz*/16, /*deref*/1);
- }
- /* fallthru */
- case Onot:
- ALU:
- if (!(op == Oadd && kisint(ins->cls))) /* 3-address add is lea */
- if (!(op == Omul && kisint(ins->cls) && isimm32(ins->r))) /* for (I)MUL r,r/m,imm */
- ins->inplace = 1;
- if (iscon(ins->l)) {
- fixarg(&ins->l, ins, blk, curi);
- ins->l = insertinstr(blk, (*curi)++, mkinstr(Ocopy, ins->cls, ins->l));
- }
- if (ins->r.bits)
- case Omove:
- fixarg(&ins->r, ins, blk, curi);
- if (op == Oadd && isaddrcon(ins->r,1)) /* no 3-address add if rhs is mem */
- ins->inplace = 1;
- break;
- case Oloads8: case Oloadu8: case Oloads16: case Oloadu16:
- case Oloads32: case Oloadu32: case Oloadi64: case Oloadf32: case Oloadf64:
- loadstoreaddr(blk, &ins->l, curi);
- break;
- case Ostore8: case Ostore16: case Ostore32: case Ostore64:
- loadstoreaddr(blk, &ins->l, curi);
- if (isaddrcon(ins->r,1) || ins->r.t == RADDR)
- ins->r = insertinstr(blk, (*curi)++, mkinstr(Ocopy, KPTR, ins->r));
- else
- fixarg(&ins->r, ins, blk, curi);
- break;
- case Ocvtu32f:
- fixarg(&ins->l, ins, blk, curi);
- ins->l = insertinstr(blk, (*curi)++, mkinstr(Oextu32, KI64, ins->l));
- ins->op = Ocvts64f;
- break;
- case Ocvtf32u: case Ocvtf64u:
- fixarg(&ins->l, ins, blk, curi);
- if (ins->cls == KI32) {
- ins->l = insertinstr(blk, (*curi)++, mkinstr(ins->op == Ocvtf32u ? Ocvtf32s : Ocvtf64s, KI64, ins->l));
- ins->op = Oextu32;
- } else assert(!"nyi flt -> u64");
- break;
- case Ocvtf32f64: case Ocvtf64f32: case Ocvtf32s: case Ocvtf64s: case Ocvts32f: case Ocvts64f:
- case Ocvtu64f:
- case Oexts8: case Oextu8: case Oexts16: case Oextu16: case Oexts32: case Oextu32:
- if (isnumcon(ins->l)) {
- union ref it;
- bool ok = foldunop(&it, ins->op, ins->cls, ins->l);
- assert(ok);
- ins->op = Ocopy;
- ins->l = it;
- break;
- }
- case Ocopy:
- fixarg(&ins->l, ins, blk, curi);
- break;
- case Oxvaprologue:
- fuseaddr(&ins->l, blk, curi);
- assert(ins->l.t == RADDR);
- /* !this must be the first instruction */
- assert(*curi == 1);
- assert(blk == fn->entry);
- t = blk->ins.p[0];
- blk->ins.p[0] = blk->ins.p[1];
- blk->ins.p[1] = t;
- break;
- }
-}
-
-static void
-seljmp(struct function *fn, struct block *blk)
-{
- if (blk->jmp.t == Jb && blk->jmp.arg[0].bits) {
- int curi = blk->ins.n;
- fixarg(&blk->jmp.arg[0], NULL, blk, &curi);
- union ref c = blk->jmp.arg[0];
- if (c.t != RTMP) {
- enum irclass cls = c.t == RICON ? KI32 : c.t == RXCON && conht[c.i].cls ? conht[c.i].cls : KPTR;
- int curi = blk->ins.n;
-
- c = insertinstr(blk, blk->ins.n, mkinstr(Ocopy, cls, c));
- sel(fn, &instrtab[c.i], blk, &curi);
- }
- if (iflagsrc == c.i /* test cmp */
- && (oiscmp(instrtab[c.i].op) || instrtab[c.i].op == Oand || instrtab[c.i].op == Osub)) {
- instrtab[c.i].keep = 1;
- } else {
- if (!(opflags[instrtab[c.i].op] & ZF) || blk->ins.n == 0 || c.i != blk->ins.p[blk->ins.n - 1]) {
- struct instr *ins;
- int curi = blk->ins.n;
- blk->jmp.arg[0] = insertinstr(blk, blk->ins.n, mkinstr(Oneq, instrtab[c.i].cls, c, ZEROREF));
- ins = &instrtab[blk->jmp.arg[0].i];
- if (kisflt(ins->cls)) {
- ins->r = insertinstr(blk, curi, mkinstr(Ocopy, ins->cls, ZEROREF));
- }
- ins->keep = 1;
- } else if (instrtab[c.i].op == Oadd) {
- /* prevent a 3-address add whose flag results are used from becoming a LEA */
- instrtab[c.i].inplace = 1;
- }
- }
- } else if (blk->jmp.t == Jret) {
- if (blk->jmp.arg[0].bits) {
- int curi;
- union ref r = mkref(RREG, fn->abiret[0].reg);
- struct instr *ins = &instrtab[insertinstr(blk, blk->ins.n, mkinstr(Omove, fn->abiret[0].ty.cls, r , blk->jmp.arg[0])).i];
- curi = blk->ins.n;
- fixarg(&ins->r, ins, blk, &curi);
- blk->jmp.arg[0] = r;
- if (blk->jmp.arg[1].bits) {
- r = mkref(RREG, fn->abiret[1].reg);
- ins = &instrtab[insertinstr(blk, blk->ins.n, mkinstr(Omove, fn->abiret[1].ty.cls, r, blk->jmp.arg[1])).i];
- curi = blk->ins.n;
- fixarg(&ins->r, ins, blk, &curi);
- blk->jmp.arg[1] = r;
- }
- }
- }
-}
-
-void
-amd64_isel(struct function *fn)
-{
- extern int ninstr;
- struct block *blk = fn->entry;
-
- fn->stksiz = 0;
- stkslots = xcalloc((nstkslots = ninstr) * sizeof *stkslots);
- do {
- int i;
- for (i = 0; i < blk->phi.n; ++i) {
- struct instr *ins = &instrtab[blk->phi.p[i]];
- union ref *phi = phitab.p[ins->l.i];
- for (int i = 0; i < blk->npred; ++i) {
- int curi = blkpred(blk, i)->ins.n;
- fixarg(&phi[i], ins, blkpred(blk, i), &curi);
- }
- }
- iflagsrc = -1;
- for (i = 0; i < blk->ins.n; ++i) {
- struct instr *ins = &instrtab[blk->ins.p[i]];
- sel(fn, ins, blk, &i);
- if (ins->op < countof(opflags) && kisint(insrescls(*ins))) {
- if (opflags[ins->op] & ZF) iflagsrc = ins - instrtab;
- else if (opflags[ins->op] & CLOBF) iflagsrc = -1;
- }
- }
- seljmp(fn, blk);
- } while ((blk = blk->lnext) != fn->entry);
- free(stkslots);
-
- if (ccopt.dbg.i) {
- bfmt(ccopt.dbgout, "<< After isel >>\n");
- irdump(fn);
- }
-
- fn->prop = 0;
-}
-
-/* vim:set ts=3 sw=3 expandtab: */
diff --git a/amd64/sysv.c b/amd64/sysv.c
deleted file mode 100644
index 486c0c0..0000000
--- a/amd64/sysv.c
+++ /dev/null
@@ -1,313 +0,0 @@
-#include "all.h"
-
-static int classify(uchar cls[2], const struct typedata *td, uint off);
-
-static void
-clsscalar(uchar cls[2], uint off, union type ty)
-{
- enum irclass k = type2cls[scalartypet(ty)];
- uchar *fcls = &cls[off/8];
- if (isflt(ty)) { /* SSE */
- if (!*fcls || (*fcls == KF32 && k > *fcls))
- *fcls = k;
- } else { /* INTEGER */
- assert(isint(ty) || ty.t == TYPTR);
- if (cls2siz[*fcls] < cls2siz[k])
- *fcls = k == KPTR ? KI64 : k;
- }
- if (off % 8 >= 4 && cls2siz[*fcls] < 8)
- *fcls = kisint(*fcls) ? KI64 : KF64;
-}
-
-static int
-classifyarr(uchar cls[2], union type ty, uint off)
-{
- union type chld = typechild(ty);
- uint n = typearrlen(ty), siz = typesize(chld);
- assert(n > 0);
- for (uint i = 0; i < n; ++i) {
- uint offx = off + i * siz;
- if (isagg(chld)) {
- if (!classify(cls, &typedata[chld.dat], offx))
- return cls[0] = cls[1] = 0;
- } else if (chld.t == TYARRAY) {
- if (!classifyarr(cls, chld, offx))
- return cls[0] = cls[1] = 0;
- } else {
- clsscalar(cls, offx, chld);
- }
- }
- return !!cls[0] + !!cls[1];
-}
-
-static int
-classify(uchar cls[2], const struct typedata *td, uint off)
-{
- uint siz = alignup(td->siz, 4);
- if (siz > 16) /* MEMORY */
- return 0;
- for (int i = 0; i < td->nmemb; ++i) {
- struct fielddata *fld = &td->fld[i].f;
- uint align = typealign(fld->t);
- if (alignup(fld->off, align) != fld->off) /* unaligned field -> MEMORY */
- return cls[0] = cls[1] = 0;
- if (isagg(fld->t)) {
- if (!classify(cls, &typedata[fld->t.dat], off + fld->off))
- return cls[0] = cls[1] = 0;
- } else if (fld->t.t == TYARRAY) {
- if (isincomplete(fld->t)) continue;
- if (!classifyarr(cls, fld->t, off + fld->off))
- return cls[0] = cls[1] = 0;
- } else {
- clsscalar(cls, fld->off + off, fld->t);
- }
- }
- return !!cls[0] + !!cls[1];
-}
-
-static int
-abiarg(short r[2], uchar cls[2], uchar *r2off, int *ni, int *nf, int *ns, union irtype typ)
-{
- static const uchar intregs[] = { RDI, RSI, RDX, RCX, R8, R9 };
- enum { NINT = countof(intregs), NFLT = 8 };
- int ret, ni_save, nf_save;
-
- if (!typ.isagg) {
- if (kisflt(cls[0] = typ.cls) && *nf < NFLT) {
- r[0] = XMM0 + (*nf)++;
- } else if (kisint(cls[0]) && *ni < NINT) {
- r[0] = intregs[(*ni)++];
- } else {
- r[0] = *ns;
- *ns += 8;
- return 0; /* MEMORY */
- }
- return 1;
- }
- cls[0] = cls[1] = 0;
- ret = classify(cls, &typedata[typ.dat], 0);
- if (!ret) { /*MEMORY*/
- r[0] = *ns;
- *ns = alignup(*ns + typedata[typ.dat].siz, 8);
- return 0;
- }
- assert(ret <= 2);
- ni_save = *ni, nf_save = *nf;
- *r2off = 8;
- for (int i = 0; i < ret; ++i) {
- assert(cls[i]);
- if (kisflt(cls[i]) && *nf < NFLT)
- r[i] = XMM0 + (*nf)++;
- else if (kisint(cls[i]) && *ni < NINT)
- r[i] = intregs[(*ni)++];
- else { /* MEMORY */
- *ni = ni_save, *nf = nf_save;
- r[0] = *ns;
- *ns = alignup(*ns + typedata[typ.dat].siz, 8);
- r[1] = -1;
- return cls[0] = cls[1] = 0;
- }
- }
- return ret;
-}
-
-static int
-abiret(short r[2], uchar cls[2], uchar *r2off, int *ni, union irtype typ)
-{
- int ret;
-
- if (!typ.isagg) {
- r[0] = kisflt(cls[0] = typ.cls) ? XMM0 : RAX;
- return 1;
- }
-
- cls[0] = cls[1] = 0;
- ret = classify(cls, &typedata[typ.dat], 0);
- if (!ret) { /* MEMORY */
- assert(*ni == 0);
- r[0] = RAX; /* on return should contain result location address */
- r[1] = RDI; /* register for caller-owned result location argument */
- ++*ni;
- return 0;
- }
- assert(ret <= 2);
- *r2off = 8;
- for (int i = 0, ni = 0, nf = 0; i < ret; ++i) {
- assert(cls[i]);
- if (kisflt(cls[i])) /* SSE (XMM0, XMM1) */
- r[i] = XMM0 + nf++;
- else if (kisint(cls[i])) /* INTEGER (RAX, RDX) */
- r[i] = ni++ == 0 ? RAX : RDX;
- else assert(0);
- }
- return ret;
-}
-
-/* Layout of va_list:
- * struct {
- * ( 0) unsigned int gp_offset;
- * ( 4) unsigned int fp_offset;
- * ( 8) void *overflow_arg_area;
- * (16) void *reg_save_area;
- * }
- * Layout of register save area (align 16):
- * reg off
- * rdi 0
- * rsi 8
- * rdx 16
- * rcx 24
- * r8 32
- * r9 40
- * xmm0 48
- * xmm1 64
- * ...
- * in amd64/emit xvaprologue generates the code to save the registers to a stack slot
- * there only needs to be one xvaprologue if there's any vastart instrs, and it has to be
- * at the beginning of the function (before IR generated by regalloc can touch any registers)
- * then vastart can initialize va_list.reg_save_area with a pointer to that
- */
-
-static void
-vastart(struct function *fn, struct block *blk, int *curi)
-{
- union ref rsave; /* register save area */
- int gpr0 = 0, fpr0 = 0, stk0 = 0;
- struct instr *ins = &instrtab[blk->ins.p[*curi]];
- union ref ap = ins->l, src, dst;
- assert(ins->op == Ovastart);
- /* add xvaprologue if not there yet, which must be the first
- * real instruction in the function (following alloca) */
- if (fn->entry->ins.n > 1 && instrtab[fn->entry->ins.p[1]].op == Oxvaprologue) {
- rsave = mkref(RTMP, fn->entry->ins.p[0]); /* alloca instruction */
- assert(instrtab[rsave.i].op == Oalloca16);
- } else {
- rsave = insertinstr(fn->entry, 0, mkalloca(192, 16));
- insertinstr(fn->entry, 1, mkinstr(Oxvaprologue, 0, rsave, .keep=1));
- }
- /* find first unnamed gpr and fpr */
- for (int i = 0; i < fn->nabiarg; ++i) {
- struct abiarg abi = fn->abiarg[i];
- if (!abi.isstk){
- if (abi.reg < XMM0) ++gpr0;
- else ++fpr0;
- } else {
- stk0 = abi.stk+8;
- }
- }
- /* set ap->reg_save_area */
- *ins = mkinstr(Oadd, KPTR, ap, mkref(RICON, 16));
- dst = mkref(RTMP, ins - instrtab);
- int i = *curi + 1;
- insertinstr(blk, i++, mkinstr(Ostore64, 0, dst, rsave));
- /* set ap->overflow_arg_area */
- src = insertinstr(blk, i++, mkinstr(Oadd, KPTR, mkref(RREG, RBP), mkref(RICON, 16+stk0)));
- dst = insertinstr(blk, i++, mkinstr(Oadd, KPTR, ap, mkref(RICON, 8)));
- insertinstr(blk, i++, mkinstr(Ostore64, 0, dst, src));
- /* set ap->gp_offset */
- insertinstr(blk, i++, mkinstr(Ostore32, 0, ap, mkref(RICON, gpr0*8)));
- /* set ap->fp_offset */
- dst = insertinstr(blk, i++, mkinstr(Oadd, KPTR, ap, mkref(RICON, 4)));
- insertinstr(blk, i++, mkinstr(Ostore32, 0, dst, mkref(RICON, 6*8 + fpr0*16)));
- *curi = i-1;
-}
-
-static void
-vaarg(struct function *fn, struct block *blk, int *curi)
-{
- short r[2];
- uchar cls[2];
- union ref tmp;
- int ni = 0, nf = 0, ns = 0;
- uchar r2off;
- int var = blk->ins.p[*curi];
- union ref ap = instrtab[var].l;
- union irtype ty = ref2type(instrtab[var].r);
-
- assert(instrtab[var].op == Ovaarg);
- blk->ins.p[*curi] = newinstr(blk, (struct instr){Onop});
-
- int ret = abiarg(r, cls, &r2off, &ni, &nf, &ns, ty);
-
- if (ret == 2) assert(!"nyi");
- else if (ret == 1) {
- struct block *merge;
- union ref phi, phiargs[2];
- /* int: l->gp_offset < 48 - num_gp * 8 */
- /* sse: l->fp_offset < 304 - num_gp * 16 (why 304? ... 176) */
- tmp = ni ? ap : insertinstr(blk, (*curi)++, mkinstr(Oadd, KPTR, ap, mkref(RICON, 4)));
- tmp = insertinstr(blk, (*curi)++, mkinstr(Oloadu32, KI32, tmp));
- tmp = insertinstr(blk, (*curi)++, mkinstr(Oulte, KI32, tmp, mkref(RICON, ni ? 48 - ni*8 : 176 - nf*16)));
- merge = blksplitafter(fn, blk, *curi);
- blk->jmp.t = 0;
- useblk(fn, blk);
- putcondbranch(fn, tmp, newblk(fn), newblk(fn));
- useblk(fn, blk->s1);
- {
- /* phi0: &l->reg_save_area[l->gp/fp_offset] */
- union ref sav = addinstr(fn, mkinstr(Oloadi64, KPTR, irbinop(fn, Oadd, KPTR, ap, mkref(RICON, 16))));
- union ref roff = addinstr(fn, mkinstr(Oloadu32, KI32, irbinop(fn, Oadd, KPTR, ap, mkref(RICON, ni ? 0 : 4))));
- phiargs[0] = irbinop(fn, Oadd, KPTR, sav, roff);
- /* l->gp/fp_offset += num_gp/fp * 8(16) */
- roff = irbinop(fn, Oadd, KI32, roff, mkref(RICON, ni ? ni * 8 : nf * 16));
- addinstr(fn, mkinstr(Ostore32, 0, irbinop(fn, Oadd, KPTR, ap, mkref(RICON, ni ? 0 : 4)), roff));
- assert(merge->npred == 1);
- blkpred(merge, 0) = blk->s1;
- blk->s1->jmp.t = Jb;
- blk->s1->s1 = merge;
- }
- useblk(fn, blk->s2);
- {
- /* phi1: l->overflow_arg_area */
- union ref adr = irbinop(fn, Oadd, KPTR, ap, mkref(RICON, 8));
- union ref ovf = addinstr(fn, mkinstr(Oloadi64, KPTR, adr));
- /* align no-op */
-
- phiargs[1] = ovf;
- /* update l->overflow_arg_area += size */
- int siz = 8;
- addinstr(fn, mkinstr(Ostore64, 0, adr, irbinop(fn, Oadd, KPTR, ovf, mkref(RICON, siz))));
- putbranch(fn, merge);
- }
- assert(merge->npred == 2);
- vpush(&merge->ins, 0);
- memmove(merge->ins.p+1, merge->ins.p, (merge->ins.n-1)*sizeof *merge->ins.p);
- merge->ins.p[0] = var;
- phi = insertphi(merge, KPTR);
- memcpy(phitab.p[instrtab[phi.i].l.i], phiargs, sizeof phiargs);
- if (!ty.isagg) {
- instrtab[var] = mkinstr(cls[0] == KI32 ? Oloads32 : Oloadi64, cls[0], phi);
- } else {
- instrtab[var] = mkalloca(8, 8);
- tmp = insertinstr(merge, 1, mkinstr(Oloadi64, KI64, phi));
- insertinstr(merge, 2, mkinstr(Ostore64, 0, mkref(RTMP, var), tmp));
- }
- fn->prop &= ~FNUSE;
- } else {
- assert(!"nyi");
- }
-}
-
-static const char amd64_rnames[][6] = {
-#define R(r) #r,
- LIST_REGS(R)
-#undef R
-};
-
-const struct mctarg t_amd64_sysv = {
- .gpr0 = RAX, .ngpr = R15 - RAX + 1,
- .bpr = RBP,
- .gprscratch = R11, .fprscratch = XMM15,
- .fpr0 = XMM0, .nfpr = XMM15 - XMM0 + 1,
- .rcallee = 1<<RBX | 1<<R12 | 1<<R13 | 1<<R14 | 1<<R15,
- .rglob = 1<<RSP | 1<<RBP,
- .rnames = amd64_rnames,
- .objkind = OBJELF,
- .abiret = abiret,
- .abiarg = abiarg,
- .vastart = vastart,
- .vaarg = vaarg,
- .isel = amd64_isel,
- .emit = amd64_emit
-};
-
-/* vim:set ts=3 sw=3 expandtab: */