diff options
| author | 2026-03-17 13:22:00 +0100 | |
|---|---|---|
| committer | 2026-03-17 13:22:00 +0100 | |
| commit | a8d6f8bf30c07edb775e56889f568ca20240bedf (patch) | |
| tree | b5a452b2675b2400f15013617291fe6061180bbf /src/t_aarch64_isel.c | |
| parent | 24f14b7ad1af08d872971d72ce089a529911f657 (diff) | |
REFACTOR: move sources to src/
Diffstat (limited to 'src/t_aarch64_isel.c')
| -rw-r--r-- | src/t_aarch64_isel.c | 515 |
1 files changed, 515 insertions, 0 deletions
diff --git a/src/t_aarch64_isel.c b/src/t_aarch64_isel.c new file mode 100644 index 0000000..398ea28 --- /dev/null +++ b/src/t_aarch64_isel.c @@ -0,0 +1,515 @@ +#include "all.h" + +#define isimm32(r) (iscon(r) && concls(r) == KI32) + +static inline uint +clz(uvlong x) +{ +#if HAS_BUILTIN(clzll) + return __builtin_clzll(x); +#else + int i = 0; + for (uvlong mask = BIT(63);; ++i, mask >>= 1) + if (x & mask) + break; + return i; +#endif +} + +/* Encode logical immediate */ +bool +aarch64_logimm(uint *enc, enum irclass k, uvlong x) +{ + /* https://github.com/v8/v8/blob/927ccc6076e25a614787c7011315468e40fe39a4/src/codegen/arm64/assembler-arm64.cc#L4409 */ + if (k == KI32) x = (uint)x | x << 32; + bool neg; + if ((neg = x & 1)) x = ~x; + if (x == 0) return 0; + uvlong a = x & (~x + 1), + xa = x + a, + b = xa & (~xa + 1), + xa_b = xa - b, + c = xa_b & (~xa_b + 1), + mask; + uint clza = clz(a), + d, outn; + if (c != 0) { + d = clza - clz(c); + mask = BIT(d) - 1; + outn = 0; + } else { + assert(a != 0); + d = 64; + mask = ~0ull; + outn = 1; + } + if (!ispo2(d)) return 0; + if (((b - a) & ~mask) != 0) return 0; + static const uvlong M[] = { + 0x0000000000000001, 0x0000000100000001, 0x0001000100010001, + 0x0101010101010101, 0x1111111111111111, 0x5555555555555555, + }; + int i = clz(d) - 57; + assert((uint)i < countof(M)); + uvlong m = M[i]; + uvlong y = (b - a) * m; + if (y != x) return 0; + if (enc) { + int clzb = b == 0 ? -1 : clz(b), + s = clza - clzb, r; + if (neg) { + s = d - s; + r = (clzb + 1) & (d - 1); + } else { + r = (clza + 1) & (d - 1); + } + *enc = outn<<12 | r<<6 | (((-d * 2) | (s - 1)) & 0x3F); + } + return 1; +} + + +static void fixarg(union ref *r, struct instr *ins, struct block *blk, int *curi); +static void +regarg(union ref *r, enum irclass k, struct block *blk, int *curi) +{ + if (r->t != RTMP) { + *r = insertinstr(blk, (*curi)++, mkinstr(Ocopy, k, *r)); + if (kisflt(k) || instrtab[r->i].l.t == RSTACK) { + int iprev = *curi-1; + fixarg(&instrtab[r->i].l, &instrtab[r->i], blk, &iprev); + *curi = iprev+1; + } + } +} + +static void +fixarg(union ref *r, struct instr *ins, struct block *blk, int *curi) +{ + enum op op = ins ? ins->op : 0; + if (isintcon(*r)) { + vlong x = intconval(*r); + switch (op) { + case Ocopy: return; + default: + if (oiscmp(op)) { + case Oadd: case Osub: + /* imm12 (lsl 12) */ + if ((x &~ 0xFFF) == 0 || (x &~ 0xFFF000) == 0) return; + break; + case Oshl: case Osar: case Oslr: + if ((uvlong)x < (ins->cls == KI32 ? 32 : 64)) return; + break; + case Oand: case Oior: case Oxor: + if (aarch64_logimm(NULL, ins->cls, x)) return; + break; + } + } + goto Reg; + } else if (isfltcon(*r)) { + enum irclass k = concls(*r), ki = KI32 + k-KF32; + if (contab.p[r->i].f != 0.0) { + union { + vlong i64; + int i32; + float f32; + double f64; + } pun; + vlong i; + if (k == KF32) { + pun.f32 = contab.p[r->i].f; + i = pun.i32; + } else { + pun.f64 = contab.p[r->i].f; + i = pun.i64; + } + union ref gpr = insertinstr(blk, (*curi)++, mkinstr(Ocopy, ki, mkintcon(ki, i))); + *r = insertinstr(blk, (*curi)++, mkinstr(Ocopy, k, gpr)); + } else if (oiscmp(op)) { + return; + } else { + *r = insertinstr(blk, (*curi)++, mkinstr(Ocopy, k, *r)); + } + } else if (r->t == RSTACK) { + struct instr adr = mkinstr(Osub, KPTR, mkref(RREG, FP), mkintcon(KI32, r->i)); + if (op == Ocopy) + *ins = adr; + else + *r = insertinstr(blk, (*curi)++, adr); + } else if (r->t != RTMP) Reg: { + regarg(r, r->t == RTMP ? instrtab[r->i].cls : ins->cls ? ins->cls : KI32, blk, curi); + } +} + +static bool +arithfold(struct instr *ins) +{ + if (isnumcon(ins->l) && (!ins->r.t || isnumcon(ins->r))) { + union ref r; + bool ok = ins->r.t ? foldbinop(&r, ins->op, ins->cls, ins->l, ins->r) : foldunop(&r, ins->op, ins->cls, ins->l); + assert(ok && "fold?"); + *ins = mkinstr(Ocopy, insrescls(*ins), r); + return 1; + } + return 0; +} + +static void +selcall(struct function *fn, struct instr *ins, struct block *blk, int *curi) +{ + const struct call *call = &calltab.p[ins->r.i]; + int iarg = *curi - 1; + enum irclass cls; + uint argstksiz = alignup(call->argstksiz, 16); + + for (int i = call->narg - 1; i >= 0; --i) { + struct abiarg abi = call->abiarg[i]; + struct instr *arg; + for (;; --iarg) { + assert(iarg >= 0 && i >= 0 && "arg?"); + if ((arg = &instrtab[blk->ins.p[iarg]])->op == Oarg) + break; + } + + if (!abi.isstk) { + assert(!abi.ty.isagg); + *arg = mkinstr(Omove, call->abiarg[i].ty.cls, mkref(RREG, abi.reg), arg->r); + } else { + union ref adr = mkaddr((struct addr){mkref(RREG, SP), .disp = abi.stk}); + int iargsave = iarg; + if (!abi.ty.isagg) { /* scalar arg in stack */ + *arg = mkinstr(cls2store[abi.ty.cls], 0, adr, arg->r); + if (isaddrcon(arg->r,1) || arg->r.t == RADDR) + arg->r = insertinstr(blk, iarg++, mkinstr(Ocopy, abi.ty.cls, arg->r)); + else + fixarg(&ins->r, ins, blk, &iarg); + } else { /* aggregate arg in stack, callee stack frame destination address */ + *arg = mkinstr(Ocopy, KPTR, adr); + } + *curi += iarg - iargsave; + } + } + if (call->argstksiz) { + union ref disp = mkref(RICON, argstksiz); + insertinstr(blk, iarg--, (struct instr){Osub, KPTR, .keep=1, .reg = SP+1, .l=mkref(RREG,SP), disp}); + ++*curi; + insertinstr(blk, *curi+1, (struct instr){Oadd, KPTR, .keep=1, .reg = SP+1, .l=mkref(RREG,SP), disp}); + } + if (isimm32(ins->l)) + ins->l = mkaddr((struct addr){.base = ins->l}); + else if (isintcon(ins->l)) + ins->l = insertinstr(blk, (*curi)++, mkinstr(Ocopy, KPTR, ins->l)); + + cls = ins->cls; + ins->cls = 0; + if (cls) { + /* duplicate to reuse same TMP ref */ + insertinstr(blk, (*curi)++, *ins); + *ins = mkinstr(Ocopy, cls, mkref(RREG, call->abiret[0].reg)); + for (int i = 1; i <= 2; ++i) { + if (*curi + i >= blk->ins.n) break; + if (instrtab[blk->ins.p[*curi + i]].op == Ocall2r) { + ins = &instrtab[blk->ins.p[*curi += i]]; + *ins = mkinstr(Ocopy, ins->cls, mkref(RREG, call->abiret[1].reg)); + break; + } + } + } +} + +static bool +aimm(struct addr *addr, int disp) +{ + if (addr->index.bits) return 0; + vlong a = addr->disp; + a += disp; + if ((int)a == a) { + addr->disp = a; + return 1; + } + return 0; +} + +static bool +ascale(struct addr *addr, union ref a, union ref b, uint siz/*1,2,4,8*/) +{ + if (b.t != RICON) return 0; + if (addr->index.bits || (addr->disp && !isaddrcon(addr->base,1))) return 0; + if ((unsigned)b.i > 3 || 1<<b.i != siz) return 0; + if (a.t == RREG || a.t == RTMP) { + addr->index = a; + addr->shift = b.i; + return 1; + } + return 0; +} + +static bool +aadd(struct addr *addr, struct block *blk, int *curi, union ref r, uint siz/*1,2,4,8*/) +{ + if (r.t == RSTACK) { + if (addr->base.bits || addr->index.bits || !aimm(addr, -r.i)) goto Ref; + addr->base = mkref(RREG, FP); + } else if (r.t == RTMP) { + struct instr *ins = &instrtab[r.i]; + if (ins->op == Oadd) { + if (!aadd(addr, blk, curi, ins->l, siz)) goto Ref; + if (!aadd(addr, blk, curi, ins->r, siz)) goto Ref; + ins->skip = 1; + } else if (ins->op == Osub) { + if (!aadd(addr, blk, curi, ins->l, siz)) goto Ref; + if (!isintcon(ins->r)) goto Ref; + if (!aimm(addr, -intconval(ins->r))) goto Ref; + ins->skip = 1; + } else if (ins->op == Oshl) { + if (!ascale(addr, ins->l, ins->r, siz)) goto Ref; + ins->skip = 1; + } else if (ins->op == Ocopy) { + if (!aadd(addr, blk, curi, ins->l, siz)) goto Ref; + ins->skip = 1; + } else goto Ref; + } else if (isnumcon(r)) { + assert(isintcon(r)); + return aimm(addr, intconval(r)); + } else if (isaddrcon(r,1)) { + if (!addr->base.bits && !isaddrcon(addr->index,1)) addr->base = r; + else return 0; + } else if (r.t == RREG) { + /* temporaries are single assignment, but register aren't, so they can't be * + * safely hoisted into an address value, unless they have global lifetime */ + if (!rstest(mctarg->rglob, r.i)) return 0; + Ref: + if (r.t == RSTACK && (addr->base.bits || addr->index.bits)) { + r = insertinstr(blk, (*curi)++, mkinstr(Oadd, KPTR, mkref(RREG, FP), mkref(RICON, -r.i))); + } + if (!addr->base.bits) addr->base = r; + else if (!addr->index.bits) addr->index = r; + else return 0; + } else return 0; + return 1; +} + +static bool +fuseaddr(union ref *r, struct block *blk, int *curi, uint siz/*1,2,4,8*/) +{ + struct addr addr = {0}; + + if (isaddrcon(*r,1)) return 1; + + if (r->t != RSTACK && r->t != RTMP) return 0; + if (!aadd(&addr, blk, curi, *r, siz)) return 0; + if (!(addr.disp >= -256 && addr.disp < 256) /* for 9-bit signed unscaled offset */ + && !(!(addr.disp & (siz-1)) && (uvlong)addr.disp < (1<<12)*siz)) /* 12-bit unsigned scaled offset */ + return 0; + if (isaddrcon(addr.base,0) && (!(contab.p[addr.base.i].flag & SLOCAL) || addr.index.bits)) { + /* first load symbol address into a temp register */ + if (addr.disp && (ccopt.pic || (contab.p[addr.base.i].flag & SFUNC)) && !addr.index.bits) { + addr.base = insertinstr(blk, (*curi)++, mkinstr(Ocopy, KPTR, .l = addr.base)); + } else { + addr.base = insertinstr(blk, (*curi)++, mkinstr(Ocopy, KPTR, + mkaddr((struct addr){addr.base, .disp = addr.disp}))); + addr.disp = 0; + } + } + *r = mkaddr(addr); + return 1; +} + +static const uchar loadsz[] = { + [Oloads8 - Oloads8] = 1, [Oloadu8 - Oloads8] = 1, + [Oloads16 - Oloads8] = 2, [Oloadu16 - Oloads8] = 2, + [Oloads32 - Oloads8] = 4, [Oloadu32 - Oloads8] = 4, + [Oloadi64 - Oloads8] = 8, + [Oloadf32 - Oloads8] = 4, + [Oloadf64 - Oloads8] = 8, +}; +static const uchar storesz[] = { + [Ostorei8 - Ostorei8] = 1, + [Ostorei16 - Ostorei8] = 2, + [Ostorei32 - Ostorei8] = 4, + [Ostorei64 - Ostorei8] = 8, + [Ostoref32 - Ostorei8] = 4, + [Ostoref64 - Ostorei8] = 8, +}; +static void +loadstoreaddr(struct block *blk, union ref *r, int *curi, enum op op) +{ + uint siz = oisload(op) ? loadsz[op-Oloads8] : storesz[op-Ostorei8]; + if (isimm32(*r)) { + *r = mkaddr((struct addr){.base = *r}); + } else if (isaddrcon(*r, 0)) { + bool pcrelok = in_range(op, Oloads32, Oloadi64); /* LDR-LDRSW have PC-relative literal form */ + if (!pcrelok || !(contab.p[r->i].flag & SLOCAL)) + regarg(r, KPTR, blk, curi); + } else if (r->t == RTMP || r->t == RSTACK) { + fuseaddr(r, blk, curi, siz); + } else if (r->t != RREG) { + *r = insertinstr(blk, (*curi)++, mkinstr(Ocopy, KPTR, *r)); + } +} + +static void +sel(struct function *fn, struct instr *ins, struct block *blk, int *curi) +{ + enum op op = ins->op; + enum irclass cls; + + if (oisarith(ins->op) && arithfold(ins)) { + fixarg(&ins->l, ins, blk, curi); + return; + } + + switch (op) { + //default: assert(0); + case Onop: break; + case Oalloca1: case Oalloca2: case Oalloca4: case Oalloca8: case Oalloca16: + assert(!"unlowered alloca"); + break; + case Ocopy: + fixarg(&ins->l, ins, blk, curi); + break; + case Oparam: + assert(ins->l.t == RICON && ins->l.i < fn->nabiarg); + if (!fn->abiarg[ins->l.i].isstk) + *ins = mkinstr(Ocopy, ins->cls, mkref(RREG, fn->abiarg[ins->l.i].reg)); + else /* stack */ + *ins = mkinstr(Oadd, KPTR, mkref(RREG, FP), mkref(RICON, 16+fn->abiarg[ins->l.i].stk)); + break; + case Oneg: case Onot: + case Ocvtf32s: case Ocvtf32u: + case Ocvtf32f64: case Ocvtf64s: + case Ocvtf64u: case Ocvtf64f32: + case Ocvts32f: case Ocvtu32f: + case Ocvts64f: case Ocvtu64f: + case Oexts8: case Oextu8: + case Oexts16: case Oextu16: + case Oexts32: + regarg(&ins->l, ins->cls, blk, curi); + break; + case Oextu32: + regarg(&ins->l, ins->cls, blk, curi); + ins->op = Ocopy; + break; + case Oadd: + if (isnumcon(ins->l)) { + /* swap to have const in rhs */ + union ref tmp = ins->l; + ins->l = ins->r; + ins->r = tmp; + } + case Osub: + if (ins->r.t == RICON && ins->r.i < 0) { + op = ins->op ^= 1; + ins->r.i = -ins->r.i; + } + if (!(isaddrcon(ins->l,0) && (contab.p[ins->l.i].flag & SLOCAL))) + regarg(&ins->l, ins->cls, blk, curi); + fixarg(&ins->r, ins, blk, curi); + break; + case Oand: case Oior: case Oxor: + if (isnumcon(ins->l)) { + /* swap to have const in rhs */ + union ref tmp = ins->l; + ins->l = ins->r; + ins->r = tmp; + } + case Oshl: case Osar: case Oslr: + case Oequ: case Oneq: + case Olth: case Ogth: case Olte: case Ogte: + case Oulth: case Ougth: case Oulte: case Ougte: + case Omove: + regarg(&ins->l, ins->cls, blk, curi); + fixarg(&ins->r, ins, blk, curi); + break; + case Omul: case Odiv: case Oudiv: case Ourem: + regarg(&ins->l, ins->cls, blk, curi); + regarg(&ins->r, ins->cls, blk, curi); + break; + case Oarg: + fixarg(&ins->r, ins, blk, curi); + break; + case Ocall: + selcall(fn, ins, blk, curi); + break; + case Oloads8: case Oloadu8: case Oloads16: case Oloadu16: + case Oloads32: case Oloadu32: case Oloadi64: case Oloadf32: case Oloadf64: + loadstoreaddr(blk, &ins->l, curi, op); + break; + case Ostorei8: case Ostorei16: case Ostorei32: cls = KI32; goto Store; + case Ostorei64: cls = KI64; goto Store; + case Ostoref32: cls = KF32; goto Store; + case Ostoref64: cls = KF64; Store: + loadstoreaddr(blk, &ins->l, curi, op); + regarg(&ins->r, cls, blk, curi); + break; + } +} + +static void +seljmp(struct function *fn, struct block *blk) +{ + if (blk->jmp.t == Jb && blk->jmp.arg[0].bits) { + int curi = blk->ins.n; + fixarg(&blk->jmp.arg[0], NULL, blk, &curi); + union ref c = blk->jmp.arg[0]; + if (c.t != RTMP) { + enum irclass cls = c.t == RICON ? KI32 : c.t == RXCON && contab.p[c.i].cls ? contab.p[c.i].cls : KPTR; + int curi = blk->ins.n; + + c = insertinstr(blk, blk->ins.n, mkinstr(Ocopy, cls, c)); + sel(fn, &instrtab[c.i], blk, &curi); + } + if (!oiscmp(instrtab[c.i].op)) { + enum irclass k = insrescls(instrtab[c.i]); + blk->jmp.arg[0] = insertinstr(blk, blk->ins.n, mkinstr(Oneq, k, c, kisint(k) ? ZEROREF : mkfltcon(k, 0))); + struct instr *ins = &instrtab[blk->jmp.arg[0].i]; + ins->keep = 1; + } else { + instrtab[c.i].keep = 1; + } + } else if (blk->jmp.t == Jret) { + if (blk->jmp.arg[0].bits) { + union ref r = mkref(RREG, fn->abiret[0].reg); + struct instr *ins = &instrtab[insertinstr(blk, blk->ins.n, mkinstr(Omove, fn->abiret[0].ty.cls, r, blk->jmp.arg[0])).i]; + int curi = blk->ins.n-1; + fixarg(&ins->r, ins, blk, &curi); + blk->jmp.arg[0] = r; + if (blk->jmp.arg[1].bits) { + r = mkref(RREG, fn->abiret[1].reg); + ins = &instrtab[insertinstr(blk, blk->ins.n, mkinstr(Omove, fn->abiret[1].ty.cls, r, blk->jmp.arg[1])).i]; + } + } + } +} + +void +aarch64_isel(struct function *fn) +{ + struct block *blk = fn->entry; + + do { + int i; + for (i = 0; i < blk->phi.n; ++i) { + struct instr *ins = &instrtab[blk->phi.p[i]]; + union ref *phi = phitab.p[ins->l.i]; + for (int i = 0; i < blk->npred; ++i) { + int curi = blkpred(blk, i)->ins.n; + fixarg(&phi[i], ins, blkpred(blk, i), &curi); + } + } + for (i = 0; i < blk->ins.n; ++i) { + struct instr *ins = &instrtab[blk->ins.p[i]]; + sel(fn, ins, blk, &i); + } + seljmp(fn, blk); + } while ((blk = blk->lnext) != fn->entry); + + if (ccopt.dbg.i) { + bfmt(ccopt.dbgout, "<< After isel >>\n"); + irdump(fn); + } + + fn->prop = 0; +} + +/* vim:set ts=3 sw=3 expandtab: */ |