aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/t_aarch64_isel.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/t_aarch64_isel.c')
-rw-r--r--src/t_aarch64_isel.c515
1 files changed, 515 insertions, 0 deletions
diff --git a/src/t_aarch64_isel.c b/src/t_aarch64_isel.c
new file mode 100644
index 0000000..398ea28
--- /dev/null
+++ b/src/t_aarch64_isel.c
@@ -0,0 +1,515 @@
+#include "all.h"
+
+#define isimm32(r) (iscon(r) && concls(r) == KI32)
+
+static inline uint
+clz(uvlong x)
+{
+#if HAS_BUILTIN(clzll)
+ return __builtin_clzll(x);
+#else
+ int i = 0;
+ for (uvlong mask = BIT(63);; ++i, mask >>= 1)
+ if (x & mask)
+ break;
+ return i;
+#endif
+}
+
+/* Encode logical immediate */
+bool
+aarch64_logimm(uint *enc, enum irclass k, uvlong x)
+{
+ /* https://github.com/v8/v8/blob/927ccc6076e25a614787c7011315468e40fe39a4/src/codegen/arm64/assembler-arm64.cc#L4409 */
+ if (k == KI32) x = (uint)x | x << 32;
+ bool neg;
+ if ((neg = x & 1)) x = ~x;
+ if (x == 0) return 0;
+ uvlong a = x & (~x + 1),
+ xa = x + a,
+ b = xa & (~xa + 1),
+ xa_b = xa - b,
+ c = xa_b & (~xa_b + 1),
+ mask;
+ uint clza = clz(a),
+ d, outn;
+ if (c != 0) {
+ d = clza - clz(c);
+ mask = BIT(d) - 1;
+ outn = 0;
+ } else {
+ assert(a != 0);
+ d = 64;
+ mask = ~0ull;
+ outn = 1;
+ }
+ if (!ispo2(d)) return 0;
+ if (((b - a) & ~mask) != 0) return 0;
+ static const uvlong M[] = {
+ 0x0000000000000001, 0x0000000100000001, 0x0001000100010001,
+ 0x0101010101010101, 0x1111111111111111, 0x5555555555555555,
+ };
+ int i = clz(d) - 57;
+ assert((uint)i < countof(M));
+ uvlong m = M[i];
+ uvlong y = (b - a) * m;
+ if (y != x) return 0;
+ if (enc) {
+ int clzb = b == 0 ? -1 : clz(b),
+ s = clza - clzb, r;
+ if (neg) {
+ s = d - s;
+ r = (clzb + 1) & (d - 1);
+ } else {
+ r = (clza + 1) & (d - 1);
+ }
+ *enc = outn<<12 | r<<6 | (((-d * 2) | (s - 1)) & 0x3F);
+ }
+ return 1;
+}
+
+
+static void fixarg(union ref *r, struct instr *ins, struct block *blk, int *curi);
+static void
+regarg(union ref *r, enum irclass k, struct block *blk, int *curi)
+{
+ if (r->t != RTMP) {
+ *r = insertinstr(blk, (*curi)++, mkinstr(Ocopy, k, *r));
+ if (kisflt(k) || instrtab[r->i].l.t == RSTACK) {
+ int iprev = *curi-1;
+ fixarg(&instrtab[r->i].l, &instrtab[r->i], blk, &iprev);
+ *curi = iprev+1;
+ }
+ }
+}
+
+static void
+fixarg(union ref *r, struct instr *ins, struct block *blk, int *curi)
+{
+ enum op op = ins ? ins->op : 0;
+ if (isintcon(*r)) {
+ vlong x = intconval(*r);
+ switch (op) {
+ case Ocopy: return;
+ default:
+ if (oiscmp(op)) {
+ case Oadd: case Osub:
+ /* imm12 (lsl 12) */
+ if ((x &~ 0xFFF) == 0 || (x &~ 0xFFF000) == 0) return;
+ break;
+ case Oshl: case Osar: case Oslr:
+ if ((uvlong)x < (ins->cls == KI32 ? 32 : 64)) return;
+ break;
+ case Oand: case Oior: case Oxor:
+ if (aarch64_logimm(NULL, ins->cls, x)) return;
+ break;
+ }
+ }
+ goto Reg;
+ } else if (isfltcon(*r)) {
+ enum irclass k = concls(*r), ki = KI32 + k-KF32;
+ if (contab.p[r->i].f != 0.0) {
+ union {
+ vlong i64;
+ int i32;
+ float f32;
+ double f64;
+ } pun;
+ vlong i;
+ if (k == KF32) {
+ pun.f32 = contab.p[r->i].f;
+ i = pun.i32;
+ } else {
+ pun.f64 = contab.p[r->i].f;
+ i = pun.i64;
+ }
+ union ref gpr = insertinstr(blk, (*curi)++, mkinstr(Ocopy, ki, mkintcon(ki, i)));
+ *r = insertinstr(blk, (*curi)++, mkinstr(Ocopy, k, gpr));
+ } else if (oiscmp(op)) {
+ return;
+ } else {
+ *r = insertinstr(blk, (*curi)++, mkinstr(Ocopy, k, *r));
+ }
+ } else if (r->t == RSTACK) {
+ struct instr adr = mkinstr(Osub, KPTR, mkref(RREG, FP), mkintcon(KI32, r->i));
+ if (op == Ocopy)
+ *ins = adr;
+ else
+ *r = insertinstr(blk, (*curi)++, adr);
+ } else if (r->t != RTMP) Reg: {
+ regarg(r, r->t == RTMP ? instrtab[r->i].cls : ins->cls ? ins->cls : KI32, blk, curi);
+ }
+}
+
+static bool
+arithfold(struct instr *ins)
+{
+ if (isnumcon(ins->l) && (!ins->r.t || isnumcon(ins->r))) {
+ union ref r;
+ bool ok = ins->r.t ? foldbinop(&r, ins->op, ins->cls, ins->l, ins->r) : foldunop(&r, ins->op, ins->cls, ins->l);
+ assert(ok && "fold?");
+ *ins = mkinstr(Ocopy, insrescls(*ins), r);
+ return 1;
+ }
+ return 0;
+}
+
+static void
+selcall(struct function *fn, struct instr *ins, struct block *blk, int *curi)
+{
+ const struct call *call = &calltab.p[ins->r.i];
+ int iarg = *curi - 1;
+ enum irclass cls;
+ uint argstksiz = alignup(call->argstksiz, 16);
+
+ for (int i = call->narg - 1; i >= 0; --i) {
+ struct abiarg abi = call->abiarg[i];
+ struct instr *arg;
+ for (;; --iarg) {
+ assert(iarg >= 0 && i >= 0 && "arg?");
+ if ((arg = &instrtab[blk->ins.p[iarg]])->op == Oarg)
+ break;
+ }
+
+ if (!abi.isstk) {
+ assert(!abi.ty.isagg);
+ *arg = mkinstr(Omove, call->abiarg[i].ty.cls, mkref(RREG, abi.reg), arg->r);
+ } else {
+ union ref adr = mkaddr((struct addr){mkref(RREG, SP), .disp = abi.stk});
+ int iargsave = iarg;
+ if (!abi.ty.isagg) { /* scalar arg in stack */
+ *arg = mkinstr(cls2store[abi.ty.cls], 0, adr, arg->r);
+ if (isaddrcon(arg->r,1) || arg->r.t == RADDR)
+ arg->r = insertinstr(blk, iarg++, mkinstr(Ocopy, abi.ty.cls, arg->r));
+ else
+ fixarg(&ins->r, ins, blk, &iarg);
+ } else { /* aggregate arg in stack, callee stack frame destination address */
+ *arg = mkinstr(Ocopy, KPTR, adr);
+ }
+ *curi += iarg - iargsave;
+ }
+ }
+ if (call->argstksiz) {
+ union ref disp = mkref(RICON, argstksiz);
+ insertinstr(blk, iarg--, (struct instr){Osub, KPTR, .keep=1, .reg = SP+1, .l=mkref(RREG,SP), disp});
+ ++*curi;
+ insertinstr(blk, *curi+1, (struct instr){Oadd, KPTR, .keep=1, .reg = SP+1, .l=mkref(RREG,SP), disp});
+ }
+ if (isimm32(ins->l))
+ ins->l = mkaddr((struct addr){.base = ins->l});
+ else if (isintcon(ins->l))
+ ins->l = insertinstr(blk, (*curi)++, mkinstr(Ocopy, KPTR, ins->l));
+
+ cls = ins->cls;
+ ins->cls = 0;
+ if (cls) {
+ /* duplicate to reuse same TMP ref */
+ insertinstr(blk, (*curi)++, *ins);
+ *ins = mkinstr(Ocopy, cls, mkref(RREG, call->abiret[0].reg));
+ for (int i = 1; i <= 2; ++i) {
+ if (*curi + i >= blk->ins.n) break;
+ if (instrtab[blk->ins.p[*curi + i]].op == Ocall2r) {
+ ins = &instrtab[blk->ins.p[*curi += i]];
+ *ins = mkinstr(Ocopy, ins->cls, mkref(RREG, call->abiret[1].reg));
+ break;
+ }
+ }
+ }
+}
+
+static bool
+aimm(struct addr *addr, int disp)
+{
+ if (addr->index.bits) return 0;
+ vlong a = addr->disp;
+ a += disp;
+ if ((int)a == a) {
+ addr->disp = a;
+ return 1;
+ }
+ return 0;
+}
+
+static bool
+ascale(struct addr *addr, union ref a, union ref b, uint siz/*1,2,4,8*/)
+{
+ if (b.t != RICON) return 0;
+ if (addr->index.bits || (addr->disp && !isaddrcon(addr->base,1))) return 0;
+ if ((unsigned)b.i > 3 || 1<<b.i != siz) return 0;
+ if (a.t == RREG || a.t == RTMP) {
+ addr->index = a;
+ addr->shift = b.i;
+ return 1;
+ }
+ return 0;
+}
+
+static bool
+aadd(struct addr *addr, struct block *blk, int *curi, union ref r, uint siz/*1,2,4,8*/)
+{
+ if (r.t == RSTACK) {
+ if (addr->base.bits || addr->index.bits || !aimm(addr, -r.i)) goto Ref;
+ addr->base = mkref(RREG, FP);
+ } else if (r.t == RTMP) {
+ struct instr *ins = &instrtab[r.i];
+ if (ins->op == Oadd) {
+ if (!aadd(addr, blk, curi, ins->l, siz)) goto Ref;
+ if (!aadd(addr, blk, curi, ins->r, siz)) goto Ref;
+ ins->skip = 1;
+ } else if (ins->op == Osub) {
+ if (!aadd(addr, blk, curi, ins->l, siz)) goto Ref;
+ if (!isintcon(ins->r)) goto Ref;
+ if (!aimm(addr, -intconval(ins->r))) goto Ref;
+ ins->skip = 1;
+ } else if (ins->op == Oshl) {
+ if (!ascale(addr, ins->l, ins->r, siz)) goto Ref;
+ ins->skip = 1;
+ } else if (ins->op == Ocopy) {
+ if (!aadd(addr, blk, curi, ins->l, siz)) goto Ref;
+ ins->skip = 1;
+ } else goto Ref;
+ } else if (isnumcon(r)) {
+ assert(isintcon(r));
+ return aimm(addr, intconval(r));
+ } else if (isaddrcon(r,1)) {
+ if (!addr->base.bits && !isaddrcon(addr->index,1)) addr->base = r;
+ else return 0;
+ } else if (r.t == RREG) {
+ /* temporaries are single assignment, but register aren't, so they can't be *
+ * safely hoisted into an address value, unless they have global lifetime */
+ if (!rstest(mctarg->rglob, r.i)) return 0;
+ Ref:
+ if (r.t == RSTACK && (addr->base.bits || addr->index.bits)) {
+ r = insertinstr(blk, (*curi)++, mkinstr(Oadd, KPTR, mkref(RREG, FP), mkref(RICON, -r.i)));
+ }
+ if (!addr->base.bits) addr->base = r;
+ else if (!addr->index.bits) addr->index = r;
+ else return 0;
+ } else return 0;
+ return 1;
+}
+
+static bool
+fuseaddr(union ref *r, struct block *blk, int *curi, uint siz/*1,2,4,8*/)
+{
+ struct addr addr = {0};
+
+ if (isaddrcon(*r,1)) return 1;
+
+ if (r->t != RSTACK && r->t != RTMP) return 0;
+ if (!aadd(&addr, blk, curi, *r, siz)) return 0;
+ if (!(addr.disp >= -256 && addr.disp < 256) /* for 9-bit signed unscaled offset */
+ && !(!(addr.disp & (siz-1)) && (uvlong)addr.disp < (1<<12)*siz)) /* 12-bit unsigned scaled offset */
+ return 0;
+ if (isaddrcon(addr.base,0) && (!(contab.p[addr.base.i].flag & SLOCAL) || addr.index.bits)) {
+ /* first load symbol address into a temp register */
+ if (addr.disp && (ccopt.pic || (contab.p[addr.base.i].flag & SFUNC)) && !addr.index.bits) {
+ addr.base = insertinstr(blk, (*curi)++, mkinstr(Ocopy, KPTR, .l = addr.base));
+ } else {
+ addr.base = insertinstr(blk, (*curi)++, mkinstr(Ocopy, KPTR,
+ mkaddr((struct addr){addr.base, .disp = addr.disp})));
+ addr.disp = 0;
+ }
+ }
+ *r = mkaddr(addr);
+ return 1;
+}
+
+static const uchar loadsz[] = {
+ [Oloads8 - Oloads8] = 1, [Oloadu8 - Oloads8] = 1,
+ [Oloads16 - Oloads8] = 2, [Oloadu16 - Oloads8] = 2,
+ [Oloads32 - Oloads8] = 4, [Oloadu32 - Oloads8] = 4,
+ [Oloadi64 - Oloads8] = 8,
+ [Oloadf32 - Oloads8] = 4,
+ [Oloadf64 - Oloads8] = 8,
+};
+static const uchar storesz[] = {
+ [Ostorei8 - Ostorei8] = 1,
+ [Ostorei16 - Ostorei8] = 2,
+ [Ostorei32 - Ostorei8] = 4,
+ [Ostorei64 - Ostorei8] = 8,
+ [Ostoref32 - Ostorei8] = 4,
+ [Ostoref64 - Ostorei8] = 8,
+};
+static void
+loadstoreaddr(struct block *blk, union ref *r, int *curi, enum op op)
+{
+ uint siz = oisload(op) ? loadsz[op-Oloads8] : storesz[op-Ostorei8];
+ if (isimm32(*r)) {
+ *r = mkaddr((struct addr){.base = *r});
+ } else if (isaddrcon(*r, 0)) {
+ bool pcrelok = in_range(op, Oloads32, Oloadi64); /* LDR-LDRSW have PC-relative literal form */
+ if (!pcrelok || !(contab.p[r->i].flag & SLOCAL))
+ regarg(r, KPTR, blk, curi);
+ } else if (r->t == RTMP || r->t == RSTACK) {
+ fuseaddr(r, blk, curi, siz);
+ } else if (r->t != RREG) {
+ *r = insertinstr(blk, (*curi)++, mkinstr(Ocopy, KPTR, *r));
+ }
+}
+
+static void
+sel(struct function *fn, struct instr *ins, struct block *blk, int *curi)
+{
+ enum op op = ins->op;
+ enum irclass cls;
+
+ if (oisarith(ins->op) && arithfold(ins)) {
+ fixarg(&ins->l, ins, blk, curi);
+ return;
+ }
+
+ switch (op) {
+ //default: assert(0);
+ case Onop: break;
+ case Oalloca1: case Oalloca2: case Oalloca4: case Oalloca8: case Oalloca16:
+ assert(!"unlowered alloca");
+ break;
+ case Ocopy:
+ fixarg(&ins->l, ins, blk, curi);
+ break;
+ case Oparam:
+ assert(ins->l.t == RICON && ins->l.i < fn->nabiarg);
+ if (!fn->abiarg[ins->l.i].isstk)
+ *ins = mkinstr(Ocopy, ins->cls, mkref(RREG, fn->abiarg[ins->l.i].reg));
+ else /* stack */
+ *ins = mkinstr(Oadd, KPTR, mkref(RREG, FP), mkref(RICON, 16+fn->abiarg[ins->l.i].stk));
+ break;
+ case Oneg: case Onot:
+ case Ocvtf32s: case Ocvtf32u:
+ case Ocvtf32f64: case Ocvtf64s:
+ case Ocvtf64u: case Ocvtf64f32:
+ case Ocvts32f: case Ocvtu32f:
+ case Ocvts64f: case Ocvtu64f:
+ case Oexts8: case Oextu8:
+ case Oexts16: case Oextu16:
+ case Oexts32:
+ regarg(&ins->l, ins->cls, blk, curi);
+ break;
+ case Oextu32:
+ regarg(&ins->l, ins->cls, blk, curi);
+ ins->op = Ocopy;
+ break;
+ case Oadd:
+ if (isnumcon(ins->l)) {
+ /* swap to have const in rhs */
+ union ref tmp = ins->l;
+ ins->l = ins->r;
+ ins->r = tmp;
+ }
+ case Osub:
+ if (ins->r.t == RICON && ins->r.i < 0) {
+ op = ins->op ^= 1;
+ ins->r.i = -ins->r.i;
+ }
+ if (!(isaddrcon(ins->l,0) && (contab.p[ins->l.i].flag & SLOCAL)))
+ regarg(&ins->l, ins->cls, blk, curi);
+ fixarg(&ins->r, ins, blk, curi);
+ break;
+ case Oand: case Oior: case Oxor:
+ if (isnumcon(ins->l)) {
+ /* swap to have const in rhs */
+ union ref tmp = ins->l;
+ ins->l = ins->r;
+ ins->r = tmp;
+ }
+ case Oshl: case Osar: case Oslr:
+ case Oequ: case Oneq:
+ case Olth: case Ogth: case Olte: case Ogte:
+ case Oulth: case Ougth: case Oulte: case Ougte:
+ case Omove:
+ regarg(&ins->l, ins->cls, blk, curi);
+ fixarg(&ins->r, ins, blk, curi);
+ break;
+ case Omul: case Odiv: case Oudiv: case Ourem:
+ regarg(&ins->l, ins->cls, blk, curi);
+ regarg(&ins->r, ins->cls, blk, curi);
+ break;
+ case Oarg:
+ fixarg(&ins->r, ins, blk, curi);
+ break;
+ case Ocall:
+ selcall(fn, ins, blk, curi);
+ break;
+ case Oloads8: case Oloadu8: case Oloads16: case Oloadu16:
+ case Oloads32: case Oloadu32: case Oloadi64: case Oloadf32: case Oloadf64:
+ loadstoreaddr(blk, &ins->l, curi, op);
+ break;
+ case Ostorei8: case Ostorei16: case Ostorei32: cls = KI32; goto Store;
+ case Ostorei64: cls = KI64; goto Store;
+ case Ostoref32: cls = KF32; goto Store;
+ case Ostoref64: cls = KF64; Store:
+ loadstoreaddr(blk, &ins->l, curi, op);
+ regarg(&ins->r, cls, blk, curi);
+ break;
+ }
+}
+
+static void
+seljmp(struct function *fn, struct block *blk)
+{
+ if (blk->jmp.t == Jb && blk->jmp.arg[0].bits) {
+ int curi = blk->ins.n;
+ fixarg(&blk->jmp.arg[0], NULL, blk, &curi);
+ union ref c = blk->jmp.arg[0];
+ if (c.t != RTMP) {
+ enum irclass cls = c.t == RICON ? KI32 : c.t == RXCON && contab.p[c.i].cls ? contab.p[c.i].cls : KPTR;
+ int curi = blk->ins.n;
+
+ c = insertinstr(blk, blk->ins.n, mkinstr(Ocopy, cls, c));
+ sel(fn, &instrtab[c.i], blk, &curi);
+ }
+ if (!oiscmp(instrtab[c.i].op)) {
+ enum irclass k = insrescls(instrtab[c.i]);
+ blk->jmp.arg[0] = insertinstr(blk, blk->ins.n, mkinstr(Oneq, k, c, kisint(k) ? ZEROREF : mkfltcon(k, 0)));
+ struct instr *ins = &instrtab[blk->jmp.arg[0].i];
+ ins->keep = 1;
+ } else {
+ instrtab[c.i].keep = 1;
+ }
+ } else if (blk->jmp.t == Jret) {
+ if (blk->jmp.arg[0].bits) {
+ union ref r = mkref(RREG, fn->abiret[0].reg);
+ struct instr *ins = &instrtab[insertinstr(blk, blk->ins.n, mkinstr(Omove, fn->abiret[0].ty.cls, r, blk->jmp.arg[0])).i];
+ int curi = blk->ins.n-1;
+ fixarg(&ins->r, ins, blk, &curi);
+ blk->jmp.arg[0] = r;
+ if (blk->jmp.arg[1].bits) {
+ r = mkref(RREG, fn->abiret[1].reg);
+ ins = &instrtab[insertinstr(blk, blk->ins.n, mkinstr(Omove, fn->abiret[1].ty.cls, r, blk->jmp.arg[1])).i];
+ }
+ }
+ }
+}
+
+void
+aarch64_isel(struct function *fn)
+{
+ struct block *blk = fn->entry;
+
+ do {
+ int i;
+ for (i = 0; i < blk->phi.n; ++i) {
+ struct instr *ins = &instrtab[blk->phi.p[i]];
+ union ref *phi = phitab.p[ins->l.i];
+ for (int i = 0; i < blk->npred; ++i) {
+ int curi = blkpred(blk, i)->ins.n;
+ fixarg(&phi[i], ins, blkpred(blk, i), &curi);
+ }
+ }
+ for (i = 0; i < blk->ins.n; ++i) {
+ struct instr *ins = &instrtab[blk->ins.p[i]];
+ sel(fn, ins, blk, &i);
+ }
+ seljmp(fn, blk);
+ } while ((blk = blk->lnext) != fn->entry);
+
+ if (ccopt.dbg.i) {
+ bfmt(ccopt.dbgout, "<< After isel >>\n");
+ irdump(fn);
+ }
+
+ fn->prop = 0;
+}
+
+/* vim:set ts=3 sw=3 expandtab: */