From dea8fd171acb54b6d9685422d5e391fb55074008 Mon Sep 17 00:00:00 2001 From: lemon Date: Sun, 19 Oct 2025 08:09:09 +0200 Subject: Organize source files into directories --- ir/abi0.c | 393 +++++++++++++++++++ ir/cfg.c | 43 +++ ir/dump.c | 255 ++++++++++++ ir/intrin.c | 77 ++++ ir/intrin.def | 2 + ir/ir.c | 618 +++++++++++++++++++++++++++++ ir/ir.h | 280 ++++++++++++++ ir/op.def | 76 ++++ ir/optmem.c | 285 ++++++++++++++ ir/regalloc.c | 1195 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ir/ssa.c | 58 +++ 11 files changed, 3282 insertions(+) create mode 100644 ir/abi0.c create mode 100644 ir/cfg.c create mode 100644 ir/dump.c create mode 100644 ir/intrin.c create mode 100644 ir/intrin.def create mode 100644 ir/ir.c create mode 100644 ir/ir.h create mode 100644 ir/op.def create mode 100644 ir/optmem.c create mode 100644 ir/regalloc.c create mode 100644 ir/ssa.c (limited to 'ir') diff --git a/ir/abi0.c b/ir/abi0.c new file mode 100644 index 0000000..82d10a5 --- /dev/null +++ b/ir/abi0.c @@ -0,0 +1,393 @@ +#include "ir.h" + +/** This pass adds in ABI arguments/returns register mappings + ** and lowers aggregate params/args/returns into scalars + ** + ** invariant: all `call` instructions when doing this pass shall be preceded by + ** exactly narg `arg` instructions with no other instructions in between + **/ + +struct abiargsvec { vec_of(struct abiarg); }; + +static int +abiret(struct abiarg abiret[2], struct abiargsvec *abiargs, int *ni, union irtype retty) +{ + short r[2]; + uchar cls[2]; + int retreg = 0; + if (retty.isagg) { + retreg = mctarg->abiret(r, cls, ni, retty); + if (!retreg) { + vpush(abiargs, ((struct abiarg) { cls2type(KPTR), .reg = r[1] })); + if (r[0] == -1) { + memset(abiret, 0, 2*sizeof *abiret); + } else { + abiret[0].ty = cls2type(KPTR); + abiret[0].reg = r[0]; + } + } + } else if (retty.cls) { + retreg = mctarg->abiret(r, cls, ni, retty); + assert(retreg == 1); + } + for (int i = 0; i < retreg; ++i) { + abiret[i].ty = cls2type(cls[i]); + abiret[i].reg = r[i]; + } + return retreg; +} + +static int +abiarg(struct abiargsvec *abiargs, int *ni, int *nf, int *ns, union irtype ty) +{ + short r[2]; + uchar cls[2]; + int ret = mctarg->abiarg(r, cls, ni, nf, ns, ty); + if (!ret) { /* in stack */ + vpush(abiargs, ((struct abiarg) { ty, .stk = r[0] })); + } else if (ret == 1 && ty.isagg && cls[0] == KPTR) { /* aggregate by pointer */ + vpush(abiargs, ((struct abiarg) { cls2type(cls[0]), .reg = r[0] })); + } else { /* by regs */ + vpush(abiargs, ((struct abiarg) { cls2type(cls[0]), .reg = r[0] })); + if (ret == 2) + vpush(abiargs, ((struct abiarg) { cls2type(cls[1]), .reg = r[1] })); + } + return ret; +} + +static struct instr +copyparam(struct function *fn, int *curi, int param, struct abiarg abi) +{ + struct instr par = mkinstr(Oparam, abi.ty.cls, mkref(RICON, param), mktyperef(abi.ty)); + if (abi.reg >= 0) { /* reg */ + assert(!abi.ty.isagg); + return par; + } else if (!abi.ty.isagg) { /* scalar in stack */ + enum op ld; + par.cls = KPTR; + if (abi.ty.cls == KPTR) abi.ty.cls = siz2intcls[cls2siz[abi.ty.cls]]; + switch (abi.ty.cls) { + default: assert(0); + case KI4: ld = Oloadu4; break; + case KI8: ld = Oloadi8; break; + case KF4: ld = Oloadf4; break; + case KF8: ld = Oloadf8; break; + } + return mkinstr(ld, abi.ty.cls, insertinstr(fn->entry, (*curi)++, par)); + } else { /* aggregate in stack */ + par.cls = KPTR; + return par; + } +} + +static void +patchparam(struct function *fn, int *curi, int *param, int tydat, int nabi, struct abiarg abi[2]) +{ + struct block *blk = fn->entry; + assert(in_range(nabi,1,2)); + + for (; *curi < blk->ins.n; ++*curi) { + struct instr *ins = &instrtab[blk->ins.p[*curi]]; + if (ins->op != Oparam) continue; + assert(ins->r.t == RTYPE + && ins->r.i == (tydat < 0 ? abi[0].ty : (union irtype){.isagg=1, .dat=tydat}).bits); + if (abi[0].ty.isagg || tydat < 0) { + /* aggregate in stack or scalar, just copy */ + assert(nabi == 1); + *ins = copyparam(fn, curi, *param, abi[0]); + } else { /* aggregate in registers, materialize */ + union ref alloc, r[2]; + struct instr st; + const struct typedata *td; + uint nalloc; + + assert(tydat >= 0); + td = &typedata[tydat]; + assert(td->siz <= 16 && td->align <= 16); + nalloc = td->align == 16 ? 1 : td->siz/8 + (td->siz%8 != 0); + *ins = mkinstr(Oalloca8 + (td->align==16), KPTR, mkref(RICON, nalloc)); + alloc = mkref(RTMP, ins - instrtab); + r[0] = insertinstr(blk, ++*curi, copyparam(fn, NULL, *param, abi[0])); + if (nabi > 1) + r[1] = insertinstr(blk, ++*curi, copyparam(fn, NULL, ++*param, abi[1])); + /* transform + * %x = copy %p + * into + * %x = alloca... + * store* %x, %a + * store* %x + N, %b + */ + st = mkinstr(Ostore1 + ilog2(cls2siz[abi[0].ty.cls]), 0, alloc, r[0]); + insertinstr(blk, ++*curi, st); + if (nabi > 1) { + struct instr tmp = mkinstr(Oadd, KPTR, alloc, mkref(RICON, cls2siz[abi[0].ty.cls])); + st = mkinstr(Ostore1 + ilog2(cls2siz[abi[1].ty.cls]), 0, insertinstr(blk, ++*curi, tmp), r[1]); + insertinstr(blk, ++*curi, st); + } + } + ++*param; + ++*curi; + break; + } +} + +static int +patcharg(struct block *blk, int *icall, struct call *call, + int argidx, int nabi, struct abiarg abi[2]) +{ + int arginst = *icall - (call->narg - argidx); + struct instr *arg = &instrtab[blk->ins.p[arginst]]; + assert(arg->op == Oarg && arg->l.t == RTYPE); + if (ref2type(arg->l).isagg) { + /* aggregate argument */ + if (abi[0].ty.isagg /* aggregate in stack */ + || abi[0].ty.cls == KPTR) /* aggregate by pointer */ + { + return 1; + } else { /* aggregate in registers */ + union ref src = arg->r; + /* deconstruct into + * %a = load* %x + * (%b = load* %x + N) + */ + delinstr(blk, arginst); + for (int i = 0; i < nabi; ++i) { + /* XXX this can generate unaligned loads */ + struct instr ins = {0}; + union ref temp; + switch (ins.cls = abi[i].ty.cls) { + default: assert(0); + case KI4: ins.op = Oloadu4; break; + case KI8: ins.op = Oloadi8; break; + case KF4: ins.op = Oloadf4; break; + case KF8: ins.op = Oloadf8; break; + } + if (i == 0) + ins.l = src; + else + ins.l = insertinstr(blk, arginst++, + mkinstr(Oadd, KPTR, src, + mkref(RICON, cls2siz[abi[0].ty.cls]))); + temp = insertinstr(blk, arginst++, ins); + insertinstr(blk, arginst++, mkarginstr(abi[i].ty, temp)); + } + *icall = arginst + (call->narg - argidx - 1); + return nabi; + } + } else /* normal scalar argument */ + return 1; +} + +static struct abiarg abiargsbuf[32]; + +void +abi0_call(struct function *fn, struct instr *ins, struct block *blk, int *curi) +{ + union ref retmem; + struct abiargsvec abiargs = {VINIT(abiargsbuf, arraylength(abiargsbuf))}; + bool sretarghidden = 0; + int ni, nf, ns, vararg, nret = 0; + struct call *call = &calltab.p[ins->r.i]; + + vararg = call->vararg; + vinit(&abiargs, abiargsbuf, arraylength(abiargsbuf)); + ni = nf = ns = 0; + assert(!ins->cls == !call->ret.bits); + nret = abiret(call->abiret, &abiargs, &ni, call->ret); + if (call->ret.isagg) { /* adjust struct return */ + union irtype retty = call->ret; + struct typedata *td = &typedata[retty.dat]; + struct instr alloca = mkalloca(td->siz, td->align); + int ialloca; + + sretarghidden = ni == 0; + /* swap alloca and call temps so users of original call point to alloca */ + retmem = insertinstr(blk, ialloca = (*curi)++ - call->narg, *ins); + *ins = alloca; + blk->ins.p[ialloca] = ins - instrtab; + blk->ins.p[*curi] = retmem.i; + ins = &instrtab[retmem.i]; + retmem.i = blk->ins.p[ialloca]; + + if (!nret) /* hidden pointer argument */ + insertinstr(blk, (*curi)++ - call->narg, + mkinstr(Oarg, 0, mktyperef((union irtype){.cls=KPTR}), retmem)); + } + + /* adjust args */ + for (int i = 0, i2 = ni + sretarghidden; i < call->narg; ++i) { + int arginst = *curi - (call->narg - i); + struct instr *arg = &instrtab[blk->ins.p[arginst]]; + union irtype pty = ref2type(arg->l); + int first = abiargs.n; + int ret = abiarg(&abiargs, &ni, &nf, &ns, pty); + ret = patcharg(blk, curi, call, i, ret, &abiargs.p[first]); + if (call->vararg == i) vararg = i2; + i2 += ret; + } + /* adjust return */ + if (call->ret.isagg) { + ins->cls = 0; + if (!nret) { /* hidden pointer argument */ + ins->cls = 0; + if (call->abiret[0].reg >= 0) { + /* the result location pointer is also returned by the callee, e.g. in x86 */ + ins->cls = KPTR; + ++nret; + /* even if this is not used, the register copy + * must be emitted for the register allocator to know */ + } + } else { /* aggregate returned in regs */ + union ref r[2]; + struct instr ret2; + assert(in_range(nret, 1, 2)); + ins->cls = call->abiret[0].ty.cls; + r[0] = mkref(RTMP, ins - instrtab); + if (nret == 2) { + ret2 = mkinstr(Ocall2r, call->abiret[1].ty.cls, r[0]); + r[1] = insertinstr(blk, ++*curi, ret2); + } + for (int i = 0; i < nret; ++i) { + struct instr store = {0}; + /* XXX this can generate unaligned stores */ + switch (call->abiret[i].ty.cls) { + default: assert(0); + case KF4: case KI4: store.op = Ostore4; break; + case KI8: case KF8: store.op = Ostore8; break; + } + if (i == 0) { + store.l = retmem; + } else { + union ref off = mkref(RICON, cls2siz[call->abiret[0].ty.cls]); + struct instr addr = mkinstr(Oadd, KPTR, retmem, off); + store.l = insertinstr(blk, ++*curi, addr); + } + store.r = r[i]; + insertinstr(blk, ++*curi, store); + } + } + } + + if (call->ret.isagg) call->ret = (union irtype){0}; + call->vararg = vararg; + call->abiarg = alloccopy(fn->arena, abiargs.p, abiargs.n * sizeof(struct abiarg), 0); + call->narg = abiargs.n; + vfree(&abiargs); +} + +void +abi0(struct function *fn) +{ + uint nparam = typedata[fn->fnty.dat].nmemb; + const union type *paramty = typedata[fn->fnty.dat].param; + struct abiargsvec abiargs = {VINIT(abiargsbuf, arraylength(abiargsbuf))}; + int rvovar = -1; + int ni = 0, nf = 0, ns = 0, istart = 0; + struct block *blk; + union ref sret = {0}; + + if (fn->retty.t == TYVOID) { + fn->nabiret = 0; + } else { + fn->nabiret = abiret(fn->abiret, &abiargs, &ni, mkirtype(fn->retty)); + if (!fn->nabiret && isagg(fn->retty)) { /* ret agg by hidden pointer */ + struct instr param = copyparam(fn, NULL, 0, abiargs.p[0]); + sret = insertinstr(fn->entry, 0, param); + ++istart; + /* increment real param ordinals */ + for (int i = 1; i < fn->entry->ins.n; ++i) { + struct instr *ins = &instrtab[fn->entry->ins.p[i]]; + if (ins->op == Oparam) ++ins->l.i; + } + } + } + + /* adjust params */ + for (int i = 0, param = abiargs.n; i < nparam; ++i) { + union irtype pty = mkirtype(paramty[i]); + int first = abiargs.n; + int ret = abiarg(&abiargs, &ni, &nf, &ns, pty); + patchparam(fn, &istart, ¶m, pty.isagg ? pty.dat : -1, ret+!ret, &abiargs.p[first]); + } + fn->abiarg = alloccopy(fn->arena, abiargs.p, abiargs.n * sizeof *abiargs.p, 0); + fn->nabiarg = abiargs.n; + vfree(&abiargs); + + if (!fn->nabiret && isagg(fn->retty)) { + /* for structures returned by hidden pointer argument, + * if all return instrs return local var X, make X point to the result location, + * (return value optimization (RVO)) */ + blk = fn->entry; + do { + union ref arg = blk->jmp.arg[0]; + if (blk->jmp.t != Jret) continue; + if (!arg.bits) continue; + if (arg.t != RTMP || !oisalloca(instrtab[arg.i].op)) { + rvovar = -1; + break; + } + if (rvovar == -1) { + rvovar = arg.i; + } else if (arg.i != rvovar) { + rvovar = -1; + break; + } + } while ((blk = blk->lnext) != fn->entry); + if (rvovar != -1) + instrtab[rvovar] = mkinstr(Ocopy, KPTR, sret); + } + + blk = fn->entry->lnext; + do { + /* adjust calls */ + for (int iinstr = 0; iinstr < blk->ins.n; ++iinstr) { + struct instr *ins = &instrtab[blk->ins.p[iinstr]]; + if (ins->op != Ocall) continue; + abi0_call(fn, ins, blk, &iinstr); + } + + /* adjust returns */ + if (isagg(fn->retty) && blk->jmp.t == Jret && blk->jmp.arg[0].bits) { + assert(!blk->jmp.arg[1].bits); + if (fn->nabiret) { /* aggregate return in register(s) */ + union ref src = blk->jmp.arg[0]; + for (int i = 0; i < fn->nabiret; ++i) { + /* XXX this can generate unaligned loads */ + struct instr ins = {0}; + switch (ins.cls = fn->abiret[i].ty.cls) { + default: assert(0); + case KI4: ins.op = Oloadu4; break; + case KI8: ins.op = Oloadi8; break; + case KF4: ins.op = Oloadf4; break; + case KF8: ins.op = Oloadf8; break; + } + if (i == 0) + ins.l = src; + else + ins.l = insertinstr(blk, blk->ins.n, + mkinstr(Oadd, KPTR, src, + mkref(RICON, cls2siz[fn->abiret[0].ty.cls]))); + blk->jmp.arg[i] = insertinstr(blk, blk->ins.n, ins); + } + } else { + /* aggregate return (arg[0] is pointer to return value) */ + if (rvovar == -1) { + /* blit %sret, %arg */ + union irtype typ = mkirtype(fn->retty); + insertinstr(blk, blk->ins.n, mkarginstr(typ, sret)); + insertinstr(blk, blk->ins.n, mkarginstr(typ, blk->jmp.arg[0])); + insertinstr(blk, blk->ins.n, mkintrin(INstructcopy, 0, 2)); + } else assert(blk->jmp.arg[0].bits == mkref(RTMP, rvovar).bits); + if (fn->abiret[0].ty.cls) blk->jmp.arg[0] = rvovar == -1 ? sret : mkref(RTMP, rvovar); + else memset(blk->jmp.arg, 0, sizeof blk->jmp.arg); + } + } + } while ((blk = blk->lnext) != fn->entry); + + if (ccopt.dbg.a) { + efmt("<< After abi0 >>\n"); + irdump(fn); + } +} + +/* vim:set ts=3 sw=3 expandtab: */ diff --git a/ir/cfg.c b/ir/cfg.c new file mode 100644 index 0000000..d9aa409 --- /dev/null +++ b/ir/cfg.c @@ -0,0 +1,43 @@ +#include "ir.h" + +static void +porec(struct block ***rpo, struct block *b) +{ + if (wasvisited(b)) return; + markvisited(b); + if (b->s2) porec(rpo, b->s2); + if (b->s1) porec(rpo, b->s1); + *--*rpo = b; +} + +void +sortrpo(struct function *fn) +{ + static struct block **rpobuf; + struct block **rpoend, **rpo; + int i, ndead; + + xbgrow(&rpobuf, fn->nblk); + rpo = rpoend = rpobuf + fn->nblk, + + startbbvisit(); + fn->entry->id = 0; + porec(&rpo, fn->entry); + ndead = rpo - rpobuf; + for (struct block *blk = fn->entry; ndead > 0; blk = blk->lnext) { + if (!wasvisited(blk)) { + blk->lnext = blk->lprev = NULL; + freeblk(fn, blk); + --ndead; + } + } + for (i = 1, ++rpo; rpo < rpoend; ++rpo, ++i) { + rpo[-1]->lnext = rpo[0]; + rpo[0]->lprev = rpo[-1]; + rpo[0]->id = i; + } + fn->entry->lprev = rpo[-1]; + rpo[-1]->lnext = fn->entry; +} + +/* vim:set ts=3 sw=3 expandtab: */ diff --git a/ir/dump.c b/ir/dump.c new file mode 100644 index 0000000..ddde7a3 --- /dev/null +++ b/ir/dump.c @@ -0,0 +1,255 @@ +#include "ir.h" +#include "../obj/obj.h" + +static int nextdat; + +static void +pridat(const struct irdat *dat) +{ + uchar *p = (dat->section == Sdata ? objout.data.p : objout.rodata.p) + dat->off; + enum { + MINZERO = 4, + MAXLINE = 60, + }; + int npri = 0; + int strbegin = 0, nstr = 0; + assert(dat->section == Sdata || dat->section == Srodata); + efmt("%s %y(align %d, size %d):\n\t", dat->section == Sdata ? "data" : "rodata", dat->name, dat->align, dat->siz); + for (int i = 0; i < dat->siz; ++i) { + int c = p[i]; + if (npri > MAXLINE) { + npri = 0; + efmt("\n\t"); + } + if (aisprint(c)) { + if (!nstr++) strbegin = i; + } else { + if (nstr) { + npri += efmt("asc %'S,", p+strbegin, nstr); + nstr = 0; + efmt("b "); + } + npri += efmt("%d,", c); + } + } + if (nstr) npri += efmt("asc %'S,", p+strbegin, nstr); + efmt("\n"); +} + +static const char *clsname[] = { + "?", "i4", "i8", "ptr", "f4", "f8" +}; + +static void +prityp(union irtype typ) +{ + if (!typ.isagg) + efmt(clsname[typ.cls]); + else { + const struct typedata *td = &typedata[typ.dat]; + const char *tag = td->t == TYSTRUCT ? "struct" : "union"; + if (ttypenames[td->id]) + efmt("%s.%s.%d", tag, ttypenames[td->id], td->id); + else + efmt("%s.%d", tag, td->id); + } +} + +static const char *intrinname[] = { + "?\??", +#define _(b,...) #b, +#include "intrin.def" +#undef _ +}; + +static void +dumpref(enum op o, union ref ref) +{ + struct xcon *con; + switch (ref.t) { + case RXXX: + if (ref.bits == UNDREF.bits) + efmt("undef"); + else + efmt("??"); + break; + case RTMP: + efmt("%%%d", ref.i); + if (instrtab[ref.i].reg) + efmt("(%s)", mctarg->rnames[instrtab[ref.i].reg-1]); + break; + case RREG: + efmt("%s", mctarg->rnames[ref.i]); + break; + case RICON: + if (o == Ointrin) efmt("\"%s\"", intrinname[ref.i]); + else efmt("%d", ref.i); + break; + case RXCON: + con = &conht[ref.i]; + if (con->deref) efmt("*["); + if (con->issym || con->isdat) efmt("$%y", xcon2sym(ref.i)); + else switch (con->cls) { + case KI4: efmt("%d", (int)con->i); break; + case KI8: efmt("%ld", con->i); break; + case KPTR: efmt("%'lx", con->i); break; + case KF4: efmt("%fs", con->f); break; + case KF8: efmt("%fd", con->f); break; + default: assert(0); + } + if (con->deref) efmt("]"); + break; + case RTYPE: + prityp(ref2type(ref)); + break; + case RADDR: + { + const struct addr *addr = &addrht[ref.i]; + bool k = 0; + efmt("addr ["); + if ((k = addr->base.bits)) dumpref(0, addr->base); + if (addr->index.bits) { + if (k) efmt(" + "); + dumpref(0, addr->index); + if (addr->shift) + efmt(" * %d", 1<shift); + k = 1; + } + if (k && addr->disp) { + efmt(" %c %d", "-+"[addr->disp > 0], addr->disp < 0 ? -addr->disp : addr->disp); + } + assert(k); + efmt("]"); + } + break; + default: assert(!"ref"); + } +} + +static void +dumpcall(struct call *call) +{ + if (call->ret.isagg) { + efmt("sret "); + prityp(call->ret); + efmt(", "); + } + if (call->vararg < 0) { + efmt("#%d", call->narg); + } else { + assert(call->vararg <= call->narg); + efmt("#%d, ... #%d", call->vararg, call->narg - call->vararg); + } +} + +static void +dumpinst(const struct instr *ins) +{ + int i; + if (ins->op == Omove) { + efmt("move %s ", clsname[ins->cls]); + } else { + enum irclass cls = insrescls(*ins); + if (ins->reg) { + if (cls) + efmt("%s ", clsname[cls]); + efmt("(%%%d)%s = ", ins - instrtab, mctarg->rnames[ins->reg - 1]); + } else if (cls) { + efmt("%s %%%d", clsname[cls], ins - instrtab); + efmt(" = "); + } + efmt("%s ", opnames[ins->op]); + if (oiscmp(ins->op)) + efmt("%s ", clsname[ins->cls]); + } + for (i = 0; i < opnarg[ins->op]; ++i) { + if (i) efmt(", "); + if (i == 1 && (ins->op == Ocall || ins->op == Ointrin)) { + dumpcall(&calltab.p[ins->r.i]); + } else { + dumpref(ins->op, (&ins->l)[i]); + } + } + efmt("\n"); +} + +static void +dumpblk(struct function *fn, struct block *blk) +{ + static const char *jnames[] = { 0, "b", "ret" }; + int i; + efmt(" @%d:\n", blk->id); + for (i = 0; i < blk->phi.n; ++i) { + struct instr *phi = &instrtab[blk->phi.p[i]]; + union ref *refs = phitab.p[phi->l.i]; + if (i == 0) efmt("%-4d", blk->inumstart); + else efmt(" |> "); + efmt(" %s ", clsname[phi->cls]); + if (!phi->reg) efmt("%%%d = %s ", blk->phi.p[i], opnames[phi->op]); + else efmt("(%%%d)%s = %s ", phi - instrtab, mctarg->rnames[phi->reg-1], opnames[phi->op]); + for (int i = 0; i < blk->npred; ++i) { + if (i) efmt(", "); + efmt("@%d ", blkpred(blk, i)->id); + dumpref(0, refs[i]); + } + efmt("\n"); + } + for (i = 0; i < blk->ins.n; ++i) { + efmt("%-4d ", blk->inumstart + 1 + i); + dumpinst(&instrtab[blk->ins.p[i]]); + } + efmt("%-4d %s ", blk->inumstart + 1 + i, jnames[blk->jmp.t]); + if (blk->jmp.arg[0].bits && !fn->nabiret && isagg(fn->retty)) { + /* un-lowered struct return */ + dumpref(0, mktyperef(mkirtype(fn->retty))); + efmt(" "); + } + for (i = 0; i < 2; ++i) { + if (!blk->jmp.arg[i].bits) break; + if (i > 0) efmt(", "); + dumpref(0, blk->jmp.arg[i]); + } + if (i && blk->s1) efmt(", "); + if (blk->s1 && blk->s2) efmt("@%d, @%d", blk->s1->id, blk->s2->id); + else if (blk->s1) efmt("@%d", blk->s1->id); + efmt("\n"); +} + +void +irdump(struct function *fn) +{ + struct block *blk; + + /* print datas that have never been printed before */ + while (nextdat < dattab.n) pridat(&dattab.p[nextdat++]); + + efmt("function %s : %ty\n", fn->name, fn->fnty); + if (fn->abiarg || fn->nabiret) { + efmt("abi: ("); + for (int i = 0; i < fn->nabiarg; ++i) { + if (i > 0) efmt(", "); + if (fn->abiarg[i].reg >= 0) { + efmt("%s", mctarg->rnames[fn->abiarg[i].reg]); + } else { + prityp(fn->abiarg[i].ty); + efmt(" "); + } + } + efmt(")"); + if (fn->retty.t != TYVOID) { + efmt(" -> %s", mctarg->rnames[fn->abiret[0].reg]); + if (fn->nabiret > 1) + efmt(", %s", mctarg->rnames[fn->abiret[1].reg]); + } + efmt("\n"); + } + numberinstrs(fn); + blk = fn->entry; + do { + dumpblk(fn, blk); + assert(blk->lnext != NULL); + } while ((blk = blk->lnext) != fn->entry); + efmt("\n"); +} + +/* vim:set ts=3 sw=3 expandtab: */ diff --git a/ir/intrin.c b/ir/intrin.c new file mode 100644 index 0000000..9bb0bcb --- /dev/null +++ b/ir/intrin.c @@ -0,0 +1,77 @@ +#include "ir.h" + +struct arg { union ref *arg, *ty; }; + +static int +intrin(struct block *blk, int *curi, enum intrin in, struct arg *args, int narg, union irtype ret) +{ + struct instr *this = &instrtab[blk->ins.p[*curi]]; + const struct typedata *td; + union irtype ty; + uint ncopy, step; + + switch (in) { + case 0: assert(0); + case INstructcopy: + assert(narg == 2 && args[0].ty->bits == args[1].ty->bits); + ty = ref2type(*args[0].ty); + assert(ty.isagg); + td = &typedata[ty.cls]; + step = td->align <= 8 ? td->align : 8; + ncopy = td->siz / step; + if (ncopy > 4) { + enum irclass cls = siz2intcls[cls2siz[KPTR]]; + /* memcpy */ + *args[1].ty = *args[0].ty = mktyperef(cls2type(KPTR)); + insertinstr(blk, (*curi)++, mkarginstr(cls2type(cls), mkintcon(cls, td->siz))); + *this = mkinstr(Ocall, 0, mksymref("memcpy"), this->r); + calltab.p[this->r.i].narg = 3; + calltab.p[this->r.i].ret = cls2type(0); + return 0; + } else { + delinstr(blk, (*curi)--); + for (int off = 0; off < td->siz; off += step) { + union ref psrc = *args[1].arg, pdst = *args[0].arg, src; + if (off) { + pdst = insertinstr(blk, ++*curi, mkinstr(Oadd, KPTR, *args[0].arg, mkref(RICON, off))); + psrc = insertinstr(blk, ++*curi, mkinstr(Oadd, KPTR, *args[1].arg, mkref(RICON, off))); + } + src = insertinstr(blk, ++*curi, mkinstr(Oloads1 + 2*ilog2(step), step < 8 ? KI4 : KI8, psrc)); + insertinstr(blk, ++*curi, mkinstr(Ostore1 + ilog2(step), 0, pdst, src)); + } + return 1; + } + } + assert(0); +} + +void +lowerintrin(struct function *fn) +{ + struct block *blk = fn->entry; + static struct arg argsbuf[64]; + vec_of(struct arg) args = VINIT(argsbuf, arraylength(argsbuf)); + + do { + for (int i = 0; i < blk->ins.n; ++i) { + struct instr *ins = &instrtab[blk->ins.p[i]]; + if (ins->op == Oarg) + vpush(&args, ((struct arg){ &ins->r, &ins->l })); + else if (ins->op == Ocall) + vinit(&args, argsbuf, arraylength(argsbuf)); + else if (ins->op == Ointrin) { + int arg0 = i - args.n; + assert(calltab.p[ins->r.i].narg == args.n); + if (intrin(blk, &i, ins->l.i, args.p, args.n, calltab.p[ins->r.i].ret)) + for (int j = args.n; j > 0; --j, --i) + delinstr(blk, arg0); + else + abi0_call(fn, ins, blk, &i); + vinit(&args, argsbuf, arraylength(argsbuf)); + } + } + assert(args.n == 0); + } while ((blk = blk->lnext) != fn->entry); +} + +/* vim:set ts=3 sw=3 expandtab: */ diff --git a/ir/intrin.def b/ir/intrin.def new file mode 100644 index 0000000..2ccc3b5 --- /dev/null +++ b/ir/intrin.def @@ -0,0 +1,2 @@ +/* NAME NARG */ +_(structcopy, 2) diff --git a/ir/ir.c b/ir/ir.c new file mode 100644 index 0000000..0132a97 --- /dev/null +++ b/ir/ir.c @@ -0,0 +1,618 @@ +#include "ir.h" +#include "../obj/obj.h" + +uchar type2cls[NTYPETAG]; +uchar cls2siz[KF8+1]; +const uchar siz2intcls[] = { [1] = KI4, [2] = KI4, [4] = KI4, [8] = KI8 }; + +const char *opnames[] = { + "?\??", +#define _(o,...) #o, +#include "op.def" +#undef _ +}; + +const uchar opnarg[] = { + 0, +#define _(o,n) n, +#include "op.def" +#undef _ +}; + +struct instr instrtab[MAXINSTR]; +struct use *instruse[MAXINSTR]; +short instrnuse[MAXINSTR]; +static struct use instrusebuf[MAXINSTR][2]; +int ninstr; +static int instrfreelist; +struct calltab calltab; +struct phitab phitab; +struct dattab dattab; +struct addr addrht[1 << 12]; +static int naddrht; +struct xcon conht[1 << 12]; +static int nconht; +int visitmark; + +void +irinit(struct function *fn) +{ + static struct call callsbuf[64]; + static union ref *phisbuf[64]; + static struct irdat datsbuf[64]; + + ninstr = 0; + instrfreelist = -1; + vinit(&calltab, callsbuf, arraylength(callsbuf)); + for (int i = 0; i < phitab.n; ++i) xbfree(phitab.p[i]); + vinit(&phitab, phisbuf, arraylength(phisbuf)); + if (!dattab.p) vinit(&dattab, datsbuf, arraylength(datsbuf)); + if (naddrht >= arraylength(addrht)/2) + memset(addrht, naddrht = 0, sizeof addrht); + if (nconht >= arraylength(conht)/2) + memset(conht, nconht = 0, sizeof conht); + if (!type2cls[TYINT]) { + for (int i = TYBOOL; i <= TYUVLONG; ++i) { + int siz = targ_primsizes[i]; + type2cls[i] = siz < 8 ? KI4 : KI8; + } + type2cls[TYFLOAT] = KF4; + type2cls[TYDOUBLE] = KF8; + type2cls[TYLDOUBLE] = KF8; + type2cls[TYPTR] = KPTR; + type2cls[TYARRAY] = KPTR; + cls2siz[KI4] = cls2siz[KF4] = 4; + cls2siz[KI8] = cls2siz[KF8] = 8; + cls2siz[KPTR] = targ_primsizes[TYPTR]; + } + fn->entry = fn->curblk = allocz(fn->arena, sizeof(struct block), 0); + fn->nblk = 1; + fn->entry->lprev = fn->entry->lnext = fn->entry; +} + +static int +addaddr(const struct addr *addr) +{ + uint h = hashb(0, addr, sizeof *addr); + uint i = h, n = arraylength(addrht); + for (;; ++i) { + i &= arraylength(addrht) - 1; + if (!addrht[i].base.bits && !addrht[i].index.bits) { + addrht[i] = *addr; + ++naddrht; + return i; + } else if (!memcmp(&addrht[i], addr, sizeof *addr)) { + return i; + } + assert(--n > 0 && "addrht full"); + } +} + +static int +addcon(const struct xcon *con) +{ + uint h = hashb(0, con, sizeof *con); + uint i = h, n = arraylength(conht); + assert(con->issym || con->isdat || con->cls); + for (;; ++i) { + i &= arraylength(conht) - 1; + if (!conht[i].issym && !conht[i].cls) { + conht[i] = *con; + ++nconht; + return i; + } else if (!memcmp(&conht[i], con, sizeof *con)) { + return i; + } + assert(--n > 0 && "conht full"); + } +} + +union irtype +mkirtype(union type t) +{ + if (t.t == TYVOID || isscalar(t)) + return (union irtype) { .cls = type2cls[scalartypet(t)] }; + assert(isagg(t)); + return (union irtype) { .isagg = 1, .dat = t.dat }; +} + +union ref +mkintcon(enum irclass k, vlong i) +{ + if (i < 1l << 28 && i >= -(1l << 28)) { + return mkref(RICON, i); + } else { + struct xcon con = { .cls = k, .i = i }; + if (cls2siz[k] == 4) /* check upper half is zero or -1 */ + assert(in_range((i >> 32) + 1, 0, 1)); + return mkref(RXCON, addcon(&con)); + } +} + +union ref +mkfltcon(enum irclass k, double f) +{ + struct xcon con = { .cls = k, .f = k == KF4 ? (float) f : f }; + return mkref(RXCON, addcon(&con)); +} + +union ref +mksymref(const char *s) +{ + struct xcon con = { .issym = 1, .sym = s }; + return mkref(RXCON, addcon(&con)); +} + +union ref +mkdatref(const char *name, uint siz, uint align, const void *bytes, uint n, bool deref) +{ + struct irdat dat = { .align = align, .siz = siz, .name = name, .section = Srodata }; + + assert(n <= siz && siz && align); + if (!name) { + extern const char *intern(const char *); + char buf[32]; + struct wbuf wbuf = MEMBUF(buf, sizeof buf); + + bfmt(&wbuf, ".L.%d", dattab.n); + ioputc(&wbuf, 0); + assert(!wbuf.err); + dat.name = name = intern(buf); + } + dat.off = objnewdat(name, dat.section, 0, siz, align); + memcpy(objout.rodata.p+dat.off, bytes, n); + memset(objout.rodata.p+dat.off+n, 0, siz - n); + vpush(&dattab, dat); + return mkref(RXCON, addcon(&(struct xcon){.isdat = 1, .deref = deref, .dat = dattab.n - 1})); +} + +const char * +xcon2sym(int ref) +{ + struct xcon con = conht[ref]; + assert(con.issym ^ con.isdat); + return con.issym ? con.sym : dattab.p[con.dat].name; +} + +struct instr +mkalloca(uint siz, uint align) +{ + struct instr ins = { .cls = KPTR }; + assert(ispo2(align) && align <= 16); + ins.op = Oalloca1 + ilog2(align); + ins.l = mkref(RICON, siz/align + (siz%align != 0)); + return ins; +} + +union ref +mkcallarg(union irtype ret, uint narg, int vararg) +{ + struct call call = { .ret=ret, .narg=narg, .vararg=vararg }; + assert((long) vararg <= narg); + vpush(&calltab, call); + return mkref(RXXX, calltab.n-1); +} + +union ref +mkaddr(struct addr addr) +{ + return mkref(RADDR, addaddr(&addr)); +} + +void +addpred(struct block *blk, struct block *p) +{ + if (blk->npred == 0) { + blk->_pred0 = p; + ++blk->npred; + return; + } + if (blk->npred == 1) { + struct block *p0 = blk->_pred0; + blk->_pred = 0; + xbgrow(&blk->_pred, 4); + *blk->_pred = p0; + } + xbpush(&blk->_pred, &blk->npred, p); +} + +static void +delpred(struct block *blk, struct block *p) +{ + for (int i = 0; i < blk->npred; ++i) { + if (blkpred(blk, i) == p) { + for (int k = i; k < blk->npred - 1; ++k) { + blkpred(blk, k) = blkpred(blk, k + 1); + } + --blk->npred; + return; + } + } + assert(0&&"blk not in p"); +} + +struct block * +newblk(struct function *fn) +{ + struct block *blk = alloc(fn->arena, sizeof(struct block), 0); + memset(blk, 0, sizeof *blk); + blk->id = -1; + return blk; +} + +void +freeblk(struct function *fn, struct block *blk) +{ + if (blk->npred > 1) + xbfree(blk->_pred); + vfree(&blk->phi); + vfree(&blk->ins); + if (blk->lnext) blk->lnext->lprev = blk->lprev; + if (blk->lprev) blk->lprev->lnext = blk->lnext; + blk->id = 1u<<31; + --fn->nblk; +} + +struct block * +insertblk(struct function *fn, struct block *pred, struct block *subst) +{ + struct block *new = newblk(fn); + struct block **s = pred->s1 == subst ? &pred->s1 : &pred->s2; + assert(*s == subst); + new->lnext = pred->lnext; + new->lprev = pred; + pred->lnext->lprev = new; + pred->lnext = new; + *s = new; + new->jmp.t = Jb; + new->s1 = subst; + addpred(new, pred); + for (int i = 0; i < subst->npred; ++i) { + if (blkpred(subst, i) == pred) { + blkpred(subst, i) = new; + ++fn->nblk; + return new; + } + } + assert(0); +} + + +static int +newinstr(void) +{ + int t; + if (instrfreelist != -1) { + t = instrfreelist; + memcpy(&instrfreelist, &instrtab[t], sizeof(int)); + } else { + assert(ninstr < arraylength(instrtab)); + t = ninstr++; + } + if (instruse[t] != instrusebuf[t]) + xbfree(instruse[t]); + instruse[t] = instrusebuf[t]; + instrnuse[t] = 0; + return t; +} + +void +adduse(struct block *ublk, int ui, union ref r) { + struct use user = { ublk, ui }; + + if (r.t != RTMP) return; + if (instrnuse[r.i] < arraylength(instrusebuf[r.i])) { + instruse[r.i][instrnuse[r.i]++] = user; + } else if (instrnuse[r.i] == arraylength(*instrusebuf)) { + struct use *use = NULL; + xbgrow(&use, arraylength(*instrusebuf) + 2); + memcpy(use, instruse[r.i], sizeof(*instrusebuf)); + use[instrnuse[r.i]++] = user; + instruse[r.i] = use; + } else { + xbpush(&instruse[r.i], &instrnuse[r.i], user); + } +} + +bool +deluse(struct block *ublk, int ui, union ref r) { + if (r.t != RTMP) return 0; + + for (int i = 0; i < instrnuse[r.i]; ++i) { + if (instruse[r.i][i].blk == ublk && instruse[r.i][i].u == ui) { + for (int k = i; k < instrnuse[r.i] - 1; ++k) { + instruse[r.i][k] = instruse[r.i][k+1]; + } + goto Shrink; + } + } + return 0; + +Shrink: + if (instrnuse[r.i] == arraylength(*instrusebuf) + 1) { + struct use *use = instruse[r.i]; + instruse[r.i] = instrusebuf[r.i]; + memcpy(instruse[r.i], use, sizeof *instrusebuf); + } + --instrnuse[r.i]; + return 1; +} + +union ref +insertinstr(struct block *blk, int idx, struct instr ins) +{ + int new = newinstr(); + + instrtab[new] = ins; + if (idx == blk->ins.n) vpush(&blk->ins, new); + else { + assert(idx >= 0 && idx < blk->ins.n); + vpush_((void **)&blk->ins.p, &blk->ins._cap, &blk->ins.n, sizeof *blk->ins.p); + vresize(&blk->ins, blk->ins.n); + for (int i = blk->ins.n++; i > idx; --i) + blk->ins.p[i] = blk->ins.p[i - 1]; + blk->ins.p[idx] = new; + } + adduse(blk, new, ins.l); + adduse(blk, new, ins.r); + return mkref(RTMP, new); +} + +union ref +insertphi(struct block *blk, enum irclass cls) +{ + int new = newinstr(); + union ref *refs = NULL; + assert(blk->npred > 0); + xbgrow(&refs, blk->npred); + memset(refs, 0, blk->npred * sizeof *refs); + vpush(&phitab, refs); + instrtab[new] = mkinstr(Ophi, cls, mkref(RXXX, phitab.n - 1)); + vpush(&blk->phi, new); + return mkref(RTMP, new); +} + +void +numberinstrs(struct function *fn) +{ + struct block *blk = fn->entry; + int start = 0; + do { + blk->inumstart = start; + start += blk->ins.n+2; + } while ((blk = blk->lnext) != fn->entry); +} + +/* require use */ +void +replcuses(union ref from, union ref to) +{ + assert(from.t == RTMP); + for (int i = 0; i < instrnuse[from.i]; ++i) { + struct use use = instruse[from.i][i]; + union ref *u; + int n, j; + if (use.u == from.i) continue; + if (use.u == USERJUMP) { + u = &use.blk->jmp.arg[0]; + n = 2; + } else if (instrtab[use.u].op == Ophi) { + u = phitab.p[instrtab[use.u].l.i]; + n = use.blk->npred; + } else { + u = &instrtab[use.u].l; + n = 2; + } + + for (j = 0; j < n; ++j) { + if (u[j].bits == from.bits) { + u[j].bits = to.bits; + adduse(use.blk, use.u, to); + --i; + break; + } + } + } +} + +void +deluses(int ins) +{ + if (instruse[ins] != instrusebuf[ins]) { + xbfree(instruse[ins]); + instruse[ins] = instrusebuf[ins]; + } + instrnuse[ins] = 0; +} + +void +delinstr(struct block *blk, int idx) +{ + int t = blk->ins.p[idx]; + assert(idx >= 0 && idx < blk->ins.n); + memcpy(&instrtab[t], &instrfreelist, sizeof(int)); + instrfreelist = t; + deluses(t); + for (int i = idx; i < blk->ins.n - 1; ++i) + blk->ins.p[i] = blk->ins.p[i + 1]; + --blk->ins.n; +} + +void +delphi(struct block *blk, int idx) +{ + int t = blk->phi.p[idx]; + assert(idx >= 0 && idx < blk->phi.n); + memcpy(&instrtab[t], &instrfreelist, sizeof(int)); + instrfreelist = t; + deluses(t); + for (int i = idx; i < blk->phi.n - 1; ++i) + blk->phi.p[i] = blk->phi.p[i + 1]; + --blk->phi.n; +} + +void +delnops(struct block *blk) +{ + int i, n, t; + /* delete trailing nops */ + while (blk->ins.n > 0 && instrtab[t = blk->ins.p[blk->ins.n - 1]].op == Onop) { + --blk->ins.n; + deluses(t); + memcpy(&instrtab[t], &instrfreelist, sizeof(int)); + instrfreelist = t; + } + /* delete rest of nops */ + for (i = blk->ins.n - 2, n = 0; i >= 0; --i) { + if (instrtab[t = blk->ins.p[i]].op == Onop) { + deluses(t); + memcpy(&instrtab[t], &instrfreelist, sizeof(int)); + instrfreelist = t; + ++n; + } else if (n) { + memmove(blk->ins.p+i+1, blk->ins.p+i+1+n, (blk->ins.n - n - i - 1)*sizeof *blk->ins.p); + blk->ins.n -= n; + n = 0; + } + } + if (n) + memmove(blk->ins.p, blk->ins.p + n, (blk->ins.n -= n)*sizeof *blk->ins.p); +} + +void +fillblkids(struct function *fn) +{ + int i = 0; + struct block *blk = fn->entry; + do blk->id = i++; while ((blk = blk->lnext) != fn->entry); +} + +/** IR builders **/ + +void +useblk(struct function *fn, struct block *blk) +{ + extern int nerror; + if (fn->curblk && nerror == 0) assert(fn->curblk->jmp.t && "never finished block"); + if (blk) assert(!blk->jmp.t && "reusing built block"); + if (!blk->lprev) { /* initialize */ + blk->lnext = fn->entry; + blk->lprev = fn->entry->lprev; + blk->lprev->lnext = blk; + blk->id = blk->lprev->id + 1; + ++fn->nblk; + fn->entry->lprev = blk; + } + fn->curblk = blk; +} + +union ref +addinstr(struct function *fn, struct instr ins) +{ + int new = newinstr(); + assert(fn->curblk != NULL); + instrtab[new] = ins; + adduse(fn->curblk, new, ins.l); + adduse(fn->curblk, new, ins.r); + vpush(&fn->curblk->ins, new); + return mkref(RTMP, new); +} + +union ref +addphi(struct function *fn, enum irclass cls, union ref *r) +{ + int new; + struct instr ins = { Ophi, cls }; + union ref *refs = NULL; + + xbgrow(&refs, fn->curblk->npred); + memcpy(refs, r, fn->curblk->npred * sizeof *r); + vpush(&phitab, refs); + ins.l = mkref(RXXX, phitab.n-1); + + assert(fn->curblk != NULL); + assert(fn->curblk->ins.n == 0); + new = newinstr(); + instrtab[new] = ins; + for (int i = 0; i < fn->curblk->npred; ++i) { + adduse(fn->curblk, new, r[i]); + } + vpush(&fn->curblk->phi, new); + return mkref(RTMP, new); +} + +#define putjump(fn, j, arg0, arg1, T, F) \ + fn->curblk->jmp.t = j; \ + fn->curblk->jmp.arg[0] = arg0; \ + fn->curblk->jmp.arg[1] = arg1; \ + fn->curblk->s1 = T; \ + fn->curblk->s2 = F; \ + fn->curblk = NULL; + +void +putbranch(struct function *fn, struct block *blk) +{ + assert(fn->curblk && blk); + addpred(blk, fn->curblk); + putjump(fn, Jb, NOREF, NOREF, blk, NULL); +} + +void +putcondbranch(struct function *fn, union ref arg, struct block *t, struct block *f) +{ + assert(fn->curblk && t && f); + adduse(fn->curblk, USERJUMP, arg); + addpred(t, fn->curblk); + addpred(f, fn->curblk); + putjump(fn, Jb, arg, NOREF, t, f); +} + +void +putreturn(struct function *fn, union ref r0, union ref r1) +{ + adduse(fn->curblk, USERJUMP, r0); + adduse(fn->curblk, USERJUMP, r1); + putjump(fn, Jret, r0, r1, NULL, NULL); +} + +#undef putjump + +/** Misc **/ + +static void +freefn(struct function *fn) +{ + struct block *blk = fn->entry; + do { + if (blk->npred > 1) xbfree(blk->_pred); + vfree(&blk->phi); + vfree(&blk->ins); + } while ((blk = blk->lnext) != fn->entry); +} + +void +irfini(struct function *fn) +{ + extern int nerror; + if (nerror) { + freefn(fn); + return; + } + + abi0(fn); + lowerintrin(fn); + mem2reg(fn); + copyopt(fn); + if (ccopt.dbg.o) { + efmt("<< Before isel >>\n"); + irdump(fn); + } + mctarg->isel(fn); + regalloc(fn); + if (!ccopt.dbg.any) + mctarg->emit(fn); + + freefn(fn); +} + +/* vim:set ts=3 sw=3 expandtab: */ diff --git a/ir/ir.h b/ir/ir.h new file mode 100644 index 0000000..2c8a55d --- /dev/null +++ b/ir/ir.h @@ -0,0 +1,280 @@ +#include "../common.h" + +enum irclass { + KXXX, + KI4, KI8, KPTR, + KF4, KF8, +}; + +#define kisint(k) in_range((k), KI4, KPTR) +#define kisflt(k) in_range((k), KF4, KF8) + +union irtype { + struct { ushort _ : 1, cls : 15; }; + struct { ushort isagg : 1, dat : 15; }; + ushort bits; +}; + +struct irdat { + uchar align : 6, globl : 1; + uchar section; + uint siz; + uint off; + const char *name; +}; + +struct xcon { + bool issym, isdat, deref; + uchar cls; + union { + const char *sym; + int dat; + vlong i; + double f; + }; +}; + +struct abiarg { + union irtype ty; + union { + short reg; /* >= 0 */ + short stk; /* < 0 */ + }; +}; + +struct call { + union irtype ret; + ushort narg; + short vararg; /* first variadic arg or -1 */ + struct abiarg *abiarg; + struct abiarg abiret[2]; +}; + +enum refkind { + RXXX, /* used for empty ref (zeros), undef, and the special args of call,phi,etc */ + RTMP, /* reference to another instruction's result */ + RREG, /* machine register */ + RICON, /* small integer constants */ + RXCON, /* other constants (incl. external symbols) */ + RADDR, /* target-specific addressing mode */ + RTYPE, /* irtype */ +}; + +union ref { + struct { unsigned t : 3; signed i : 29; }; + uint bits; +}; + +struct addr { + union ref base, index; + int shift, disp; +}; + +#define insrescls(ins) (oiscmp((ins).op) ? KI4 : (ins).cls) +#define NOREF ((union ref) {0}) +#define UNDREF ((union ref) {{ 0, -1 }}) +#define ZEROREF ((union ref) {{ RICON, 0 }}) +#define mkref(t, x) ((union ref) {{ (t), (x) }}) +#define mktyperef(t) ((union ref) {{ RTYPE, (t).bits }}) +#define ref2type(r) ((union irtype) {.bits = (r).i}) + +enum op { + Oxxx, +#define _(o,...) O##o, +#include "op.def" +#undef _ +}; + +#define oiscmp(o) in_range(o, Oequ, Ougte) +#define oisalloca(o) in_range(o, Oalloca1, Oalloca16) +#define oisstore(o) in_range(o, Ostore1, Ostore8) +#define oisload(o) in_range(o, Oloads1, Oloadf8) +extern const char *opnames[]; +extern const uchar opnarg[]; + +enum intrin { + INxxx, +#define _(b,...) IN##b, +#include "intrin.def" +#undef _ +}; + +struct instr { + uchar op, + cls; /* operation data class; also result class except for cmp ops (always i4) */ + uchar skip : 1, /* ignore during codegen: forms part of one machine instruction */ + keep : 1; /* for codegen, keep instr even if result seems unused */ + uchar inplace : 1; /* set (by isel) for instructions which modify its first arg in place */ + uchar reg; /* 0 -> no reg; else reg + 1 */ + union ref l, r; /* args */ +}; + +enum jumpkind { JXXX, Jb, Jret, }; + +struct block { + int id; + int npred; + int visit; + int inumstart; + union { + struct block *_pred0; + struct block **_pred; + }; + struct block *lprev, *lnext; + struct block *s1, *s2; + vec_of(ushort) phi; + vec_of(ushort) ins; + struct { uchar t; union ref arg[2]; } jmp; +}; + +#define blkpred(blk, i) 0[(blk)->npred < 2 ? &(blk)->_pred0 : &(blk)->_pred[i]] + +enum { USERJUMP = 0xFFFF }; +struct use { struct block *blk; ushort u; }; + +enum { MAXREGS = 64 }; + +struct function { + struct arena **arena; + const char *name; + struct block *entry, *curblk; + struct use *use; + short *nuse; + union type fnty, retty; + struct abiarg *abiarg, abiret[2]; + uint nblk; + int stksiz; + ushort nabiarg, nabiret; + bool globl; + bool isleaf; + regset regusage; +}; + +enum objkind { OBJELF }; +enum mcisa { ISamd64 }; + +struct mctarg { + short gpr0, /* first gpr */ + ngpr, /* gpr count */ + bpr, /* frame/base pointer reg */ + gprscratch, fprscratch, /* scratch registers for regalloc */ + fpr0, /* first fpr */ + nfpr; /* fpr count */ + regset rcallee, /* callee-saved */ + rglob; /* globally live (never used for regalloc) */ + const char (*rnames)[6]; + enum objkind objkind; + enum mcisa isa; + /* abiret: lower return type: + * scalar/small struct -> returns number of regs (1..2), + * r & cls filled with reg and irclass of each scalar return + * big struct -> returns 0, is passed via hidden pointer argument, + * r[0] contains register for returning said pointer or -1, + * r[1] contains register for hidden pointer argument, + * *ni is set to 1 if said register is the first ABI integer argument + */ + int (*abiret)(short r[2], uchar cls[2], int *ni, union irtype); + /* abiarg: lower argument type: + * scalar/small struct -> returns number of regs (1..2), + * r & cls filled with reg and irclass of each scalar arg + * if reg == -1 -> stack + * big struct -> returns 0, + * if passed in stack cls[0] == 0, r[0] == negative SP offset + * if passed by pointer cls[0] == KPTR, r[0] contains integer register + * or negative SP offset if stack + */ + int (*abiarg)(short r[2], uchar cls[2], int *ni, int *nf, int *ns, union irtype); + + void (*isel)(struct function *); + void (*emit)(struct function *); +}; + +enum { MAXINSTR = 1<<14 }; + +/** ir.c **/ +extern uchar type2cls[]; +extern uchar cls2siz[]; +extern const uchar siz2intcls[]; +extern struct instr instrtab[]; +extern struct use *instruse[]; +extern short instrnuse[]; +extern struct xcon conht[]; +extern struct calltab {vec_of(struct call);} calltab; +extern struct phitab {vec_of(union ref *);} phitab; +extern struct dattab {vec_of(struct irdat);} dattab; +extern struct addr addrht[]; +extern int visitmark; +#define mkinstr(O, C, ...) ((struct instr) { .op = (O), .cls = (C), .reg=0, __VA_ARGS__ }) +#define mkarginstr(ty, x) mkinstr(Oarg, 0, mktyperef(ty), (x)) +void irinit(struct function *); +void irfini(struct function *); +#define cls2type(k) ((union irtype){.cls=(k)}) +union irtype mkirtype(union type); +union ref mkintcon(enum irclass, vlong); +union ref mkfltcon(enum irclass, double); +#define iscon(r) in_range((r).t, RICON, RXCON) +#define concls(r) ((r).t == RICON ? KI4 : conht[(r).i].cls) +#define isintcon(r) (iscon(r) && kisint(concls(r))) +#define isfltcon(r) ((r).t == RXCON && kisflt(conht[(r).i].cls)) +#define isnumcon(r) ((r).t == RICON || ((r).t == RXCON && conht[(r).i].cls)) +#define isaddrcon(r) ((r).t == RXCON && !conht[(r).i].cls && !conht[(r).i].deref) +#define intconval(r) ((r).t == RICON ? (r).i : conht[(r).i].i) +#define fltconval(r) (conht[(r).i].f) +union ref mksymref(const char *); +union ref mkdatref(const char *name, uint siz, uint align, const void *, uint n, bool deref); +const char *xcon2sym(int ref); +struct instr mkalloca(uint siz, uint align); +union ref mkcallarg(union irtype ret, uint narg, int vararg); +#define mkintrin(B, C, N) mkinstr(Ointrin, C, {.t=RICON,B}, mkcallarg((union irtype){{0}},N,-1)) +union ref mkaddr(struct addr); +void addpred(struct block *blk, struct block *p); + +struct block *newblk(struct function *); +void freeblk(struct function *, struct block *); +struct block *insertblk(struct function *, struct block *pred, struct block *subst); +void adduse(struct block *ublk, int ui, union ref r); +union ref insertinstr(struct block *, int idx, struct instr); +union ref insertphi(struct block *, enum irclass cls); +void replcuses(union ref from, union ref to); +void deluses(int ins); +void delinstr(struct block *, int idx); +void delphi(struct block *, int idx); +void delnops(struct block *blk); +void fillblkids(struct function *); +#define startbbvisit() (void)(++visitmark) +#define wasvisited(blk) ((blk)->visit == visitmark) +#define markvisited(blk) ((blk)->visit = visitmark) +void numberinstrs(struct function *); + +/* IR builder functions */ +union ref addinstr(struct function *, struct instr); +union ref addphi(struct function *, enum irclass, union ref []); +void useblk(struct function *, struct block *); +void putbranch(struct function *, struct block *); +void putcondbranch(struct function *, union ref arg, struct block *t, struct block *f); +void putreturn(struct function *, union ref r0, union ref r1); + +/** irdump.c **/ +void irdump(struct function *); + +/** ssa.c **/ +void filluses(struct function *); +void copyopt(struct function *); + +/** cfg.c **/ +void sortrpo(struct function *fn); + +/** abi0.c **/ +void abi0(struct function *); +void abi0_call(struct function *, struct instr *, struct block *blk, int *curi); + +/** optmem.c **/ +void mem2reg(struct function *); + +/** intrin.c **/ +void lowerintrin(struct function *); + +/** regalloc.c **/ +void regalloc(struct function *); + +/* vim:set ts=3 sw=3 expandtab: */ diff --git a/ir/op.def b/ir/op.def new file mode 100644 index 0000000..9e18fe7 --- /dev/null +++ b/ir/op.def @@ -0,0 +1,76 @@ +/* OP NARG */ +_(nop, 0) +_(copy, 1) +_(move, 2) +_(neg, 1) +_(not, 1) +_(cvtf4s, 1) +_(cvtf4u, 1) +_(cvtf4f8, 1) +_(cvtf8s, 1) +_(cvtf8u, 1) +_(cvtf8f4, 1) +_(cvts4f, 1) +_(cvtu4f, 1) +_(cvts8f, 1) +_(cvtu8f, 1) +_(exts1, 1) +_(extu1, 1) +_(exts2, 1) +_(extu2, 1) +_(exts4, 1) +_(extu4, 1) +_(add, 2) +_(sub, 2) +_(mul, 2) +_(umul, 2) +_(div, 2) +_(udiv, 2) +_(rem, 2) +_(urem, 2) +_(and, 2) +_(ior, 2) +_(xor, 2) +_(shl, 2) +_(sar, 2) +_(slr, 2) +_(equ, 2) +_(neq, 2) +_(lth, 2) +_(gth, 2) +_(lte, 2) +_(gte, 2) +_(ulth, 2) +_(ugth, 2) +_(ulte, 2) +_(ugte, 2) +_(alloca1, 1) +_(alloca2, 1) +_(alloca4, 1) +_(alloca8, 1) +_(alloca16, 1) +_(loads1, 1) +_(loadu1, 1) +_(loads2, 1) +_(loadu2, 1) +_(loads4, 1) +_(loadu4, 1) +_(loadi8, 1) +_(loadf4, 1) +_(loadf8, 1) +_(store1, 2) +_(store2, 2) +_(store4, 2) +_(store8, 2) +_(param, 2) +_(arg, 2) +_(call, 2) +_(call2r, 1) +_(intrin, 2) +_(phi, 1) +_(swap, 2) +/* machine-specific instructions */ +_(xinc, 1) +_(xdec, 1) +_(xsave, 1) +_(xrestore, 1) diff --git a/ir/optmem.c b/ir/optmem.c new file mode 100644 index 0000000..1c137b7 --- /dev/null +++ b/ir/optmem.c @@ -0,0 +1,285 @@ +#include "ir.h" + +static const uchar loadszcls[] = { + [Oloads1 - Oloads1] = 1|KI4<<4, [Oloadu1 - Oloads1] = 1|KI4<<4, + [Oloads2 - Oloads1] = 2|KI4<<4, [Oloadu2 - Oloads1] = 2|KI4<<4, + [Oloads4 - Oloads1] = 4|KI4<<4, [Oloadu4 - Oloads1] = 4|KI4<<4, + [Oloadi8 - Oloads1] = 8|KI8<<4, + [Oloadf4 - Oloads1] = 4|KF4<<4, + [Oloadf8 - Oloads1] = 8|KF8<<4, +}; +static const uchar load2ext[] = { + [Oloads1 - Oloads1] = Oexts1, [Oloadu1 - Oloads1] = Oextu1, + [Oloads2 - Oloads1] = Oexts2, [Oloadu2 - Oloads1] = Oextu2, + [Oloads4 - Oloads1] = Oexts4, [Oloadu4 - Oloads1] = Oextu4, + [Oloadi8 - Oloads1] = Ocopy, +}; +#define loadsz(o) (loadszcls[(o) - Oloads1] & 0xF) +#define loadcls(o) (loadszcls[(o) - Oloads1] >> 4) +#define load2ext(o) (load2ext[(o) - Oloads1]) +#define storesz(o) (1 << ((o) - Ostore1)) + +/* Implements algorithm in 'Simple and Efficient Construction of Static Single Assignment' (Braun et al) */ + +struct pendingphi { ushort var, phi; }; +struct ssabuilder { + vec_of(struct pendingphi) *pendingphis; /* map of block to list of incomplete phis in block */ + imap_of(union ref *) curdefs; /* map of var to (map of block to def of var) */ + struct bitset *sealed; + int lastvisit; + int nblk; +}; + +static union ref readvar(struct ssabuilder *, int var, enum irclass, struct block *); + +static union ref +deltrivialphis(struct ssabuilder *sb, struct block *blk, union ref phiref) +{ + union ref **pcurdefs; + int var; + struct use *use, *uend; + union ref *args = phitab.p[instrtab[phiref.i].l.i]; + union ref same = {0}; + + assert(instrtab[phiref.i].op == Ophi); + + for (int i = 0; i < blk->npred; ++i) { + if (args[i].bits == same.bits || args[i].bits == phiref.bits) { + continue; /* unique value or self-reference */ + } if (same.bits != 0) { + return phiref; /* non-trivial */ + } + same = args[i]; + } + + if (same.bits == 0) + same = UNDREF; /* the phi is unreachable or in the start block */ + + /* replace uses */ + replcuses(phiref, same); + imap_each(&sb->curdefs, var, pcurdefs) { + (void)var; + for (int i = blk->id; i < sb->nblk; ++i) { + if ((*pcurdefs)[i].bits == phiref.bits) + (*pcurdefs)[i] = same; + } + } + + /* stops infinite recursion and marks phi for removal */ + instrtab[phiref.i].op = Onop; + + /* recursively try to remove all phi users as they might have become trivial */ +Redo: + for (use = instruse[phiref.i], uend = use + instrnuse[phiref.i]; use < uend; ++use) { + if (use->u != USERJUMP && instrtab[use->u].op == Ophi && use->u != phiref.i) { + union ref it = mkref(RTMP, use->u); + union ref vphi2 = deltrivialphis(sb, use->blk, it); + if (vphi2.bits != it.bits) { + /* deletion happened so phiref use may have changed */ + if (same.bits == it.bits) same.bits = vphi2.bits; + goto Redo; + } + } + } + deluses(phiref.i); + + return same; +} + +static union ref +addphiargs(struct ssabuilder *sb, int var, enum irclass cls, struct block *blk, union ref phiref) +{ + union ref *args = phitab.p[instrtab[phiref.i].l.i]; + for (int i = 0; i < blk->npred; ++i) { + struct block *pred = blkpred(blk, i); + args[i] = readvar(sb, var, cls, pred); + adduse(blk, phiref.i, args[i]); + } + return deltrivialphis(sb, blk, phiref); +} + +static void +writevar(struct ssabuilder *sb, int var, struct block *blk, union ref val) +{ + union ref **pcurdefs; + if (!(pcurdefs = imap_get(&sb->curdefs, var))) { + pcurdefs = imap_set(&sb->curdefs, var, xcalloc(sb->nblk * sizeof(union ref))); + } + if (val.t == RTMP) assert(instrtab[val.i].op != Onop); + (*pcurdefs)[blk->id] = val; +} + +static union ref +readvarrec(struct ssabuilder *sb, int var, enum irclass cls, struct block *blk) +{ + union ref val; + assert(blk->npred > 0); + if (!bstest(sb->sealed, blk->id)) { /* unsealed block */ + val = insertphi(blk, cls); + vpush(&sb->pendingphis[blk->id], ((struct pendingphi) { var, val.i })); + } else if (blk->npred == 1) { + val = readvar(sb, var, cls, blkpred(blk, 0)); + } else { + val = insertphi(blk, cls); + writevar(sb, var, blk, val); + val = addphiargs(sb, var, cls, blk, val); + } + writevar(sb, var, blk, val); + return val; +} + +static union ref +readvar(struct ssabuilder *sb, int var, enum irclass cls, struct block *blk) +{ + union ref **pcurdefs; + if ((pcurdefs = imap_get(&sb->curdefs, var)) && (*pcurdefs)[blk->id].bits) + return (*pcurdefs)[blk->id]; + if (blk->npred == 0) /* entry block, var is read before being written to */ + return UNDREF; + return readvarrec(sb, var, cls, blk); +} + +static bool +trysealrec(struct ssabuilder *sb, struct block *blk) +{ + vec_of(struct pendingphi) *pending; + +Recur: + if (bstest(sb->sealed, blk->id)) return 1; + if (blk->id > sb->lastvisit) return 0; + markvisited(blk); + for (int i = 0; i < blk->npred; ++i) { + struct block *p = blkpred(blk, i); + if (wasvisited(p)) continue; + if (!trysealrec(sb, p)) return 0; + } + + bsset(sb->sealed, blk->id); + pending = (void *) &sb->pendingphis[blk->id]; + for (int i = 0; i < pending->n; ++i) { + addphiargs(sb, pending->p[i].var, instrtab[pending->p[i].phi].cls, blk, + mkref(RTMP, pending->p[i].phi)); + } + vfree(pending); + if (blk->s1 && !blk->s2) { + blk = blk->s1; + goto Recur; + } else if (blk->s1 && blk->s2) { + trysealrec(sb, blk->s1); + blk = blk->s2; + goto Recur; + } + return 1; +} + +static void +tryseal(struct ssabuilder *sb, struct block *blk) +{ + startbbvisit(); + trysealrec(sb, blk); +} + +/* require use, blkid; keeps use */ +void +mem2reg(struct function *fn) +{ + static struct bitset ssealed[4]; + struct ssabuilder sb = { .nblk = fn->nblk }; + struct block *blk; + + sb.pendingphis = xcalloc(fn->nblk * sizeof *sb.pendingphis); + if (fn->nblk <= 64 * arraylength(ssealed)) { + sb.sealed = ssealed; + memset(ssealed, 0, BSSIZE(fn->nblk) * sizeof *ssealed); + } else { + sb.sealed = xcalloc(BSSIZE(fn->nblk) * sizeof *sb.sealed); + } + + sortrpo(fn); + blk = fn->entry; + + do { + for (int i = 0; i < blk->ins.n; ++i) { + struct use *use, *uend; + enum irclass k = 0; + int sz = 0, ndef = 0; + enum op ext = Ocopy; + int var = blk->ins.p[i]; + struct instr *ins = &instrtab[var]; + + if (!oisalloca(ins->op)) continue; + uend = instruse[var] + instrnuse[var]; + for (use = instruse[var]; use < uend; ++use) { + struct instr *m; + + if (use->u == USERJUMP) goto Next; + m = &instrtab[use->u]; + if (oisload(m->op) && (!sz || sz == loadsz(m->op))) { + sz = loadsz(m->op); + k = loadcls(m->op); + if (sz < 4) ext = load2ext(m->op); + continue; + } + if (oisstore(m->op) && m->l.bits == mkref(RTMP, var).bits + && (!sz || sz == storesz(m->op))) { + sz = storesz(m->op); + ++ndef; + continue; + } + goto Next; + } + if (!ndef) /* slot is read from but never written to */ + goto Next; + + for (use = instruse[var]; use < uend; ++use) { + struct instr *m = &instrtab[use->u]; + + if (oisstore(m->op)) { + writevar(&sb, var, use->blk, m->r); + *m = mkinstr(Onop,0,); + } else if (oisload(m->op)) { + union ref val = readvar(&sb, var, k = m->cls, use->blk); + if (!val.bits) { /* var is used uninitialized */ + /* TODO emit diagnostic */ + /* load some garbage */ + *m = mkinstr(kisflt(k) ? Oloadf4 + (k==KF8) : Oloads1+ilog2(sz)*2, + k, mkref(RREG, mctarg->bpr)); + } else { + adduse(use->blk, use->u, val); + *m = mkinstr(ext, k, val); + } + } + } + /* remove alloca */ + delinstr(blk, i--); + + Next:; + } + + assert(sb.lastvisit == blk->id); + ++sb.lastvisit; + tryseal(&sb, blk); + } while ((blk = blk->lnext) != fn->entry); + + do { + /* remove phis marked for deletion */ + for (int i = 0; i < blk->phi.n; ++i) + if (instrtab[blk->phi.p[i]].op == Onop) + delphi(blk, i--); + } while ((blk = blk->lnext) != fn->entry); + + if (sb.sealed != ssealed) free(sb.sealed); + free(sb.pendingphis); + + for (int i = 0; i < sb.curdefs.mb.N; ++i) + if (bstest(sb.curdefs.mb.bs, i)) + free(sb.curdefs.v[i]); + imap_free(&sb.curdefs); + if (ccopt.dbg.m) { + efmt("<< After mem2reg >>\n"); + irdump(fn); + } +} + + +/* vim:set ts=3 sw=3 expandtab: */ diff --git a/ir/regalloc.c b/ir/regalloc.c new file mode 100644 index 0000000..64dcfac --- /dev/null +++ b/ir/regalloc.c @@ -0,0 +1,1195 @@ +#include "ir.h" + +/** Implements linear scan register allocation **/ + +#if 1 +#define DBG(...) if(ccopt.dbg.r) efmt(__VA_ARGS__) +#else +#define DBG(...) ((void)0) +#endif + +/* The algorithm used here to introduce phis for temporaries whose definitions + * appear later than some of its uses is very similar to that in mem2reg() */ + +static int livelastblk; +struct pendingphi { ushort var, phi; }; +static vec_of(struct pendingphi) *pendingphis; +static int npendingphi; +static ushort **curdefs; +static union ref readvar(struct bitset *defined, enum irclass cls, int var, struct block *blk); + +static void +fillphi(struct bitset *defined, union ref phi, enum irclass cls, int var, struct block *blk) +{ + union ref *args = phitab.p[instrtab[phi.i].l.i]; + assert(blk->npred > 0); + for (int i = 0; i < blk->npred; ++i) + args[i] = readvar(defined, cls, var, blk); +} + +static union ref +readvar(struct bitset *defined, enum irclass cls, int var, struct block *blk) +{ + union ref val; + + if (bstest(defined, var)) return mkref(RTMP, var); + assert(cls && "?"); + + /* memoed definition */ + if (xbcap(curdefs) > blk->id && xbcap(curdefs[blk->id]) > var && curdefs[blk->id][var]) + return mkref(RTMP, curdefs[blk->id][var]); + + xbgrowz(&curdefs, blk->id + 1); + if (blk->id > livelastblk) { + ++npendingphi; + val = insertphi(blk, cls); + xbgrowz(&pendingphis, blk->id + 1); + vpush(&pendingphis[blk->id], ((struct pendingphi) { var, val.i })); + } else if (blk->npred == 1) { + val = readvar(defined, cls, var, blkpred(blk, 0)); + } else { + val = insertphi(blk, cls); + fillphi(defined, val, cls, var, blk); + } + xbgrowz(&curdefs[blk->id], var + 1); + assert(val.i > 0); + curdefs[blk->id][var] = val.i; + return val; +} + +static void +liveuse(struct bitset *defined, struct instr *ins, union ref *r, struct block *blk) +{ + int var; + if (r->t == RADDR) { + liveuse(defined, ins, &addrht[r->i].base, blk); + liveuse(defined, ins, &addrht[r->i].index, blk); + return; + } else if (r->t != RTMP) return; + var = r->i; + if (bstest(defined, var)) return; + + *r = readvar(defined, insrescls(instrtab[r->i]), var, blk); +} + +/* regalloc() assumes every use of a temporary is visited before its definition + * so this function fixes cases where that would not apply by introducing phi functions */ +static void +fixlive(struct function *fn) +{ + extern int ninstr; + struct block *blk = fn->entry; + struct bitset definedbuf[4] = {0}; + struct bitset *defined = definedbuf; + + if (BSSIZE(ninstr) >= arraylength(definedbuf)) + defined = xcalloc(sizeof *defined * BSSIZE(ninstr)); + npendingphi = 0; + + do { + for (int i = 0; i < blk->phi.n; ++i) { + int var = blk->phi.p[i]; + bsset(defined, var); + } + + for (int i = 0; i < blk->ins.n; ++i) { + int var = blk->ins.p[i]; + struct instr *ins = &instrtab[var]; + if (ins->l.t) liveuse(defined, ins, &ins->l, blk); + if (ins->r.t) liveuse(defined, ins, &ins->r, blk); + bsset(defined, var); + } + } while ((blk = blk->lnext) != fn->entry); + + do { + vec_of(struct pendingphi) *pphi; + + if (!npendingphi) break; + if (xbcap(pendingphis) <= blk->id) break; + + pphi = (void *)&pendingphis[blk->id]; + npendingphi -= pphi->n; + for (int i = 0; i < pphi->n; ++i) { + fillphi(defined, mkref(RTMP, pphi->p[i].phi), instrtab[pphi->p[i].phi].cls, pphi->p[i].var, blk); + } + vfree(pphi); + } while ((blk = blk->lnext) != fn->entry); + + if (ccopt.dbg.l) { + DBG("<< After liveness fixup >>\n"); + irdump(fn); + } + if (defined != definedbuf) free(defined); +} + +static regset gpregset, fpregset; + +#define isfpr(reg) in_range((reg), mctarg->fpr0, mctarg->fpr0 + mctarg->nfpr - 1) +#define isgpr(reg) in_range((reg), mctarg->gpr0, mctarg->gpr0 + mctarg->nfpr - 1) + +/* an allocated physical register or stack slot */ +enum { ADEAD, AREG, ASTACK }; +struct alloc { ushort t : 2, a : 14; }; +#define afree() ((struct alloc) { ADEAD }) +#define areg(r) ((struct alloc) { AREG, (r) }) +#define astack(s) ((struct alloc) { ASTACK, (s) }) + +enum { MAXSPILL = 512 }; + +/* half-closed instr range [from, to) */ +struct range { ushort from, to; }; + +/* a temporary's lifetime interval */ +struct interval { + struct interval *next; /* for linked list of active,inactive,handled sets in linear scan */ + struct alloc alloc; + schar rhint : 7, /* register hint */ + fpr : 1; /* needs float register? */ + + /* sorted ranges array */ + uchar nrange; + union { + struct range _inl[2]; + struct range *_dyn; + }; +}; + +struct intervals { + int count; /* number of actual intervals */ + struct interval *temps; /* map of tmp -> interval */ + struct fixinterval { + struct fixinterval *next; + regset rs; + struct range range; + } *fixed; /* linked list of fixed intervals, always sorted */ +}; + +struct rega { + struct function *fn; + struct arena **arena; + regset free; /* free registers */ + struct bitset freestk[BSSIZE(MAXSPILL)]; /* free stack slots */ + int maxstk, /* highest stack slot used */ + stktop; + struct intervals intervals; +}; + +/* materialization of stack slot references is deferred until the end because + * the offset from base pointer depends on how many slots we end up allocating */ +static vec_of(union ref *) stkslotrefs; + +static void +addstkslotref(int instr, uint off) +{ + union ref *ref = &instrtab[instr].l; + *ref = mkref(RICON, off); + vpush(&stkslotrefs, ref); +} + +static struct alloc +allocstk(struct rega *ra) +{ + int s = -1; + + for (int i = 0; i < BSSIZE(MAXSPILL); ++i) { + if (ra->freestk[i].u != 0) { + s = i*64 + lowestsetbit(ra->freestk[i].u); + break; + } + } + if (s != -1) { + bsclr(ra->freestk, s); + if (ra->stktop < s) ra->stktop = s+1; + } else { + s = ra->stktop++; + } + if (ra->maxstk < s+1) ra->maxstk = s+1; + //imap_get(&ra->intervals.temps, var)->alloc = astack(s); + return astack(s); +} + +static void +freestk(struct rega *ra, int slot) +{ + DBG("FREE stk %d\n",slot); + if (slot < MAXSPILL) + bsset(ra->freestk, slot); + else if (slot == ra->stktop - 1) + --ra->stktop; +} + +/* Parallel moves algorithm from QBE: https://c9x.me/git/qbe.git/tree/rega.c?id=e493a7f23352f51acc0a1e12284ab19d7894488a#n201 */ + +#define mkmove(k, rd, rs) mkinstr(Omove, k, mkref(RREG, rd), mkref(RREG, rs)) + +enum pmstat { PMTOMOVE, PMMOVING, PMDONE }; +static struct pmove { + uchar k; + uchar stat; + struct alloc dst, src; +} pmove[MAXREGS]; +static int npmove; + +static void +pmadd(enum irclass k, struct alloc dst, struct alloc src) +{ + if (!memcmp(&dst, &src, sizeof dst)) return; + assert(npmove < MAXREGS); + pmove[npmove++] = (struct pmove) { k, PMTOMOVE, dst, src }; +} + +static void +emitmove(enum irclass k, struct alloc dst, struct alloc src, struct block *blk, int curi) +{ + struct instr mv = {.keep = 1}; + if (dst.t == AREG && src.t == AREG) { + insertinstr(blk, curi, mkmove(k, dst.a, src.a)); + } else if (dst.t == ASTACK && src.t == AREG) { + mv = mkinstr(Ostore1+ilog2(cls2siz[k]), 0, .r = mkref(RREG, src.a)); + addstkslotref(insertinstr(blk, curi, mv).i, dst.a*8); + } else if (dst.t == AREG && src.t == ASTACK) { + switch (mv.cls = k) { + default: assert(0); + case KI4: mv.op = Oloads4; break; + case KI8: mv.op = Oloadi8; break; + case KPTR: mv.op = targ_64bit ? Oloadi8 : Oloads4; break; + case KF4: mv.op = Oloadf4; break; + case KF8: mv.op = Oloadf8; break; + } + mv.reg = dst.a+1; + addstkslotref(insertinstr(blk, curi, mv).i, src.a*8); + } else assert(0); +} + +static int +pmrec(int i, struct block *blk, int curi, enum irclass *k) +{ + int j, c; + + if (!memcmp(&pmove[i].dst, &pmove[i].src, sizeof pmove->dst)) { + pmove[i].stat = PMDONE; + return -1; + } + + /* widen when necessary */ + assert(kisint(pmove[i].k) == kisint(*k)); + if (cls2siz[pmove[i].k] > cls2siz[*k]) + *k = pmove[i].k; + + for (j = 0; j < npmove; ++j) { + if (!memcmp(&pmove[j].dst, &pmove[i].src, sizeof pmove->dst)) { + break; + } + } + if (j == npmove) goto Done; + switch (pmove[j].stat) { + default: assert(0); + case PMMOVING: + c = j; + Swap: + assert(pmove[i].src.t == AREG && pmove[i].dst.t == AREG); + insertinstr(blk, curi, + mkinstr(Oswap, *k, mkref(RREG, pmove[i].dst.a), mkref(RREG, pmove[i].src.a), .keep = 1)); + break; + case PMTOMOVE: + pmove[i].stat = PMMOVING; + c = pmrec(j, blk, curi, k); + if (c == i) { + c = -1; + break; + } else if (c != -1) { + goto Swap; + } + /* fallthru */ + case PMDONE: + Done: + c = -1; + emitmove(*k, pmove[i].dst, pmove[i].src, blk, curi); + break; + } + + pmove[i].stat = PMDONE; + return c; +} + +static void +emitpm(struct block *blk) +{ + int curi = blk->ins.n; + for (int i = 0; i < npmove; ++i) { + if (pmove[i].stat == PMTOMOVE) { + pmrec(i, blk, curi, &(enum irclass) { pmove[i].k }); + } + } +} + +/* remove phis by inserting parallel moves */ +static void +lowerphis(struct rega *ra, struct block *blk, struct block *suc) +{ + int predno; + struct block *n = NULL; + + if (!blk->s2) n = blk; + + for (predno = 0; predno < suc->npred; ++predno) + if (blkpred(suc, predno) == blk) + break; + assert(predno < suc->npred); + + npmove = 0; + /* ensure phi args go to the same slot as phi with parallel copies */ + for (int i = 0; i < suc->phi.n; ++i) { + struct instr *phi = &instrtab[suc->phi.p[i]]; + union ref *arg = &phitab.p[phi->l.i][predno]; + struct alloc from, to; + + if (arg->t == RREG) continue; + assert(arg->t == RTMP); + DBG("resolve phi @%d, @%d, %%%d <- %%%d\n", blk->id, suc->id, phi - instrtab, arg->i); + if (instrtab[arg->i].reg) { + from = areg(instrtab[arg->i].reg - 1); + DBG(" it had R%d\n", from.a); + } else { + from = ra->intervals.temps[arg->i].alloc; + assert(from.t != ADEAD); + DBG(" found %c%d\n", " RS"[from.t], from.a); + if (from.t == AREG) + instrtab[arg->i].reg = from.a+1; + } + if (phi->reg) { + to = areg(phi->reg - 1); + DBG(" phi had R%d\n", to.a); + } else { + to = ra->intervals.temps[phi - instrtab].alloc; + assert(to.t != ADEAD); + DBG(" found phi %c%d\n", " RS"[to.t], to.a); + if (to.t == AREG) + phi->reg = to.a+1; + } + DBG(" > phi move %c%d -> %c%d\n", " RS"[from.t], from.a, " RS"[to.t], to.a); + if (!n) n = insertblk(ra->fn, blk, suc); + pmadd(phi->cls, to, from); + } + if (n) emitpm(n); +} + +/* generate copies for phi operands to transform into conventional-SSA */ +static void +fixcssa(struct function *fn) +{ + struct block *blk = fn->entry; + do { + if (!blk->phi.n) continue; + for (int p = 0; p < blk->npred; ++p) { + struct block *n, *pred = blkpred(blk, p); + if (!pred->s2) { + /* pred only has 1 successor (blk), so insert move directly in it */ + n = pred; + } else { + n = insertblk(fn, pred, blk); + assert(n->jmp.t == Jb && n->s1 == blk); + } + for (int i = 0; i < blk->phi.n; ++i) { + int phi = blk->phi.p[i]; + union ref *args = phitab.p[instrtab[phi].l.i]; + args[p] = insertinstr(n, n->ins.n, mkinstr(Ocopy, instrtab[phi].cls, args[p])); + } + } + } while ((blk = blk->lnext) != fn->entry); +} + +static inline bool +rangeoverlap(struct range a, struct range b) +{ + return a.from < b.to && b.from < a.to; +} + +static void +pushrange(struct interval *lv, struct range r) +{ + if (lv->nrange < 2) lv->_inl[lv->nrange++] = r; + else if (lv->nrange > 2) xbpush(&lv->_dyn, &lv->nrange, r); + else { + struct range *d = NULL; + xbgrow(&d, 4); + memcpy(d, lv->_inl, 2*sizeof *d); + d[lv->nrange++] = r; + lv->_dyn = d; + } +} +#define itrange(lv, i) ((lv)->nrange <= 2 ? (lv)->_inl : (lv)->_dyn)[i] + +static bool +intervaloverlap(struct interval *a, struct interval *b) +{ + for (int i = 0, j = 0; i < a->nrange && j < b->nrange; ) { + struct range r1 = itrange(a, i), r2 = itrange(b, j); + if (rangeoverlap(r1, r2)) return 1; + if (r1.to <= r2.from) ++i; + else ++j; + } + return 0; +} + +static bool +intervaldef(struct intervals *intervals, int t, struct block *blk, int pos, int reghint) +{ + struct interval *it = &intervals->temps[t]; + if (it->nrange) { + assert(itrange(it, 0).from <= pos); + itrange(it, 0).from = pos; + return 1; + } + return 0; +} + +static void +addrange(struct intervals *intervals, int t, struct range new, int reghint) +{ + struct interval *it = &intervals->temps[t]; + struct range *fst; + int n; + + if (!it->nrange) { + ++intervals->count; + it->rhint = reghint; + it->fpr = kisflt(insrescls(instrtab[t])); + pushrange(it, new); + return; + } + + fst = &itrange(it, 0); + /* fully covered by first range? */ + if (fst->from <= new.from && fst->to >= new.to) return; + /* overlaps with first range ? */ + if (fst->from <= new.to && new.to < fst->to) { + fst->from = new.from; + } else { + /* put new range at the start */ + pushrange(it, new); + memmove(&itrange(it, 1), &itrange(it, 0), sizeof(struct range) * (it->nrange - 1)); + itrange(it, 0) = new; + } + + /* new range might cover existing ranges (loop header lives), + * check and succesively merge */ + fst = &itrange(it, 0); + n = 0; + for (int i = 1; i < it->nrange; ++i) { + struct range other = itrange(it, i); + if (fst->to >= other.from) { + fst->to = fst->to > other.to ? fst->to : other.to; + ++n; + } else break; + } + + if (n > 0) { + for (int i = 1; i + n < it->nrange; ++i) + itrange(it, i) = itrange(it, i+n); + if (it->nrange > 2 && it->nrange - n <= 2) { + struct range *dyn = it->_dyn; + memcpy(it->_inl, dyn, (it->nrange - n) * sizeof *dyn); + xbfree(dyn); + } + it->nrange -= n; + } +} + +static void +usereg(struct rega *ra, int reg, struct block *blk, int pos) +{ + struct fixinterval *fxit; + if (rstest(mctarg->rglob, reg)) return; /* regalloc never allocates globally live regs, so don't need intervals for those */ + fxit = alloc(ra->arena, sizeof *fxit, 0); + fxit->next = ra->intervals.fixed; + fxit->range = (struct range) {blk->inumstart, pos}; + fxit->rs = 1<intervals.fixed = fxit; +} + +static void +defreg(struct rega *ra, int reg, int pos) { + if (rstest(mctarg->rglob, reg)) return; + for (struct fixinterval *fxit = ra->intervals.fixed; fxit; fxit = fxit->next) { + if (fxit->rs == 1<range.from <= pos); + fxit->range.from = pos; + // DBG(">>>REG %s range @%d: %d-%d\n", mctarg->rnames[reg], fxit->range.from.blk, fxit->range.from.ins, fxit->range.to.ins); + return; + } + } + assert(0&&"def reg not used"); +} + +/* lifetime interval construction: https://c9x.me/compile/bib/Wimmer10a.pdf */ +static void +buildintervals(struct rega *ra) +{ + extern int ninstr; + struct block *blk, *last; + struct bitset **livein = alloc(ra->arena, ra->fn->nblk * sizeof *livein, 0); + size_t bssize = BSSIZE(ninstr); + for (int i = 0; i < ra->fn->nblk; ++i) + livein[i] = allocz(ra->arena, bssize * sizeof *livein[i], 0); + ra->intervals.temps = allocz(ra->arena, ninstr * sizeof *ra->intervals.temps, 0); + + numberinstrs(ra->fn); + /* visit blocks in reverse, to build lifetime intervals */ + blk = last = ra->fn->entry->lprev; + do { + struct instr *ins = NULL; + struct bitset *live = livein[blk->id]; + struct block *suc = blk->s1; + // DBG("--- @%d ---\n",blk->id); + + /* live = union of successor.liveIn for each successor of b */ + if (blk->s1) bsunion(live, livein[blk->s1->id], bssize); + if (blk->s2) bsunion(live, livein[blk->s2->id], bssize); + + /* for each phi function phi of successors of b do + * live.add(phi.inputOf(b)) + */ + if (suc) do { + int predno; + for (predno = 0; blkpred(suc, predno) != blk; ++predno) ; + for (int i = 0; i < suc->phi.n; ++i) { + struct instr *phi = &instrtab[suc->phi.p[i]]; + union ref *arg = &phitab.p[phi->l.i][predno]; + assert(arg->t == RTMP); + // DBG("from phi set live %%%d\n", arg->i); + bsset(live, arg->i); + } + } while (suc != blk->s2 && (suc = blk->s2)); + + /* for each opd in live do + * intervals[opd].addRange(b.from, b.to) + */ + for (uint i = 0; bsiter(&i, live, bssize); ++i) { + // DBG("itretave %%%d\n",i ); + addrange(&ra->intervals, i, (struct range){blk->inumstart, blk->inumstart + blk->ins.n + 2}, -1); + } + + /* for each operation op of b in reverse order do */ + union ref queue[8] = { blk->jmp.arg[0], blk->jmp.arg[1] }; + goto Branchopd; + for (int curi, pos ; curi >= 0; --curi) { + int out = blk->ins.p[curi], reghint; + ins = &instrtab[out]; + pos = blk->inumstart + 1 + curi; + /* for each output operand opd of op do + * intervals[opd].setFrom(op.id) + * live.remove(opd) + */ + reghint = ins && ins->op == Ocopy && ins->l.t == RREG ? ins->l.i : -1; + if (!intervaldef(&ra->intervals, out, blk, pos, reghint)) { + if (insrescls(*ins) && ins->op != Omove && !ins->keep && !(ins->op == Ocopy && ins->l.t == RREG)) { + /* dead */ + *ins = mkinstr(Onop,0,); + } + } + bsclr(live, out); + + /* gather fixed intervals */ + if (ins->op == Omove) { + assert(ins->l.t == RREG); + defreg(ra, ins->l.i, pos); + } else if (ins->op == Ocall) { + struct call *call = &calltab.p[ins->r.i]; + regset rclob = (gpregset | fpregset) &~ (mctarg->rglob | mctarg->rcallee); + ra->fn->isleaf = 0; + + for (int i = 0; i < 2; ++i) { + if (call->abiret[i].ty.bits) { + int reg = call->abiret[i].reg; + rsclr(&rclob, reg); + defreg(ra, reg, pos); + } + } + if (rclob) { + struct fixinterval *fxit = alloc(ra->arena, sizeof *fxit, 0); + fxit->next = ra->intervals.fixed; + fxit->range = (struct range) {pos, pos}; + fxit->rs = rclob; + ra->intervals.fixed = fxit; + } + for (int j = call->narg - 1; j >= 0; --j) { + int reg = call->abiarg[j].reg; + if (reg >= 0) { + usereg(ra, reg, blk, pos); + } + } + } + + + /* for each input operand opd of op do + * intervals[opd].addRange(b.from, op.id) + * live.add(opd) + */ + reghint = (ins && ins->op == Omove && ins->l.t == RREG) ? ins->l.i : -1; + queue[0] = ins->r, queue[1] = ins->l; + if (0) { + Branchopd: + reghint = -1; + curi = blk->ins.n; + pos = blk->inumstart + blk->ins.n + 1; + } + for (int nqueue = ins && ins->op == Omove ? 1 : 2; nqueue > 0;) { + union ref r = queue[--nqueue]; + + /* do not allocate a reg for a cmp op used as branch argument, since it's a pseudo op */ + if (curi == blk->ins.n && blk->jmp.t == Jb && r.t == RTMP && instrtab[r.i].keep) + continue; + + if (r.t == RTMP) { + addrange(&ra->intervals, r.i, (struct range){blk->inumstart, pos}, reghint); + bsset(live, r.i); + } else if (r.t == RREG) { + usereg(ra, r.i, blk, pos); + } else if (r.t == RADDR) { + reghint = -1; + queue[nqueue++] = addrht[r.i].base; + queue[nqueue++] = addrht[r.i].index; + } + } + } + + /* for each phi function phi of b do + * live.remove(phi.output) + */ + for (int i = 0; i < blk->phi.n; ++i) + bsclr(live, blk->phi.p[i]); + + /* if b is loop header then + * loopEnd = last block of the loop starting at b + * for each opd in live do + * &ra->intervals[opd].addRange(b.from, loopEnd.to) + */ + struct block *loopend = NULL; + for (int i = 0; i < blk->npred; ++i) { + struct block *pred = blkpred(blk, i); + if (pred->id > blk->id) + loopend = loopend && loopend->id > pred->id ? loopend : pred; + } + if (loopend) { + // DBG("i'm loop header - @%d (to @%d)\n", blk->id, loopend->id); + for (uint opd = 0; bsiter(&opd, live, bssize); ++opd) { + // DBG(" i have live %%%d\n", opd); + addrange(&ra->intervals, opd, (struct range){blk->inumstart, loopend->inumstart + loopend->ins.n+1}, -1); + /* struct interval *lv = imap_get(&ra->intervals.temps, opd); + for (int i = 0; i < lv->n; ++i) { + struct range r = itrange(lv, i); + // DBG(" @%d:%d - @%d:%d\n", r.from.blk, r.from.ins, r.to.blk, r.to.ins); + } */ + } + } + } while ((blk = blk->lprev) != last); + + for (int var = 0; var < ninstr; ++var) { + struct interval *it = &ra->intervals.temps[var]; + if (!it->nrange) continue; + DBG("lifetime of %%%d: ", var); + for (int i = 0; i < it->nrange; ++i) { + struct range r = itrange(it, i); + DBG("[%d,%d)%s", r.from, r.to, i < it->nrange-1 ? ", " : ""); + } + DBG("\n"); + } + for (struct fixinterval *fx = ra->intervals.fixed; fx; fx = fx->next) { + DBG("fixed {"); + for (int r = 0; rsiter(&r, fx->rs); ++r) + DBG("%s,", mctarg->rnames[r]); + DBG("}: [%d,%d)\n", fx->range.from, fx->range.to); + } +} + +static bool +itcontainspos(const struct interval *it, int pos) +{ + for (int i = 0; i < it->nrange; ++i) { + struct range r = itrange(it, i); + if (r.from > pos) return 0; + if (pos < r.to) return 1; + } + return 0; +} + +/* quicksort */ +static void +sortintervals(struct interval **xs, int lo, int hi) +{ + assert(lo >= 0 && hi >= 0); + while (lo < hi) { + /* partition */ + int i = lo - 1, p = hi + 1, + pivot = itrange(xs[lo], 0).from; + for (;;) { + struct interval *tmp; + do ++i; while (itrange(xs[i], 0).from < pivot); + do --p; while (itrange(xs[p], 0).from > pivot); + if (i >= p) break; + /* swap */ + tmp = xs[i]; + xs[i] = xs[p]; + xs[p] = tmp; + } + /* recur */ + if (p + 1 >= hi) { + hi = p; + } else { + if (lo < p) + sortintervals(xs, lo, p); + lo = p + 1; + } + } +} + +static void +linearscan(struct rega *ra) +{ + struct intervals *intervals = &ra->intervals; + int nunhandled = 0; + struct interval **unhandled = NULL; + struct interval *active = NULL, *inactive = NULL, *handled = NULL; + + if (!intervals->count) return; + /* sort intervals */ + { + extern int ninstr; + unhandled = alloc(ra->arena, sizeof *unhandled * intervals->count, 0); + + for (int i = 0; i < ninstr; ++i) { + if (!intervals->temps[i].nrange) continue; + unhandled[nunhandled++] = &intervals->temps[i]; + } + assert(nunhandled == intervals->count); + sortintervals(unhandled, 0, nunhandled-1); + } + + /* LINEAR SCAN */ + for (struct interval **pcurrent = unhandled; nunhandled-- > 0; ++pcurrent) { + struct interval *current = *pcurrent; + int pos = itrange(current, 0).from; + + /* Expire old intervals */ + + /* check for intervals in active that are handled or inactive */ + for (struct interval **lnk = &active, *it = *lnk, *next; (next = it?it->next:0), it; it = next) { + //DBG("< im active %%%d\n", intervals->temps.mb.k[it - intervals->temps.v]); + /* ends before position? */ + if (itrange(it, it->nrange-1).to <= pos) { + /* move from active to handled */ + *lnk = next; + it->next = handled; + handled = it; + if (it->alloc.t == AREG) { + ra->free |= 1<alloc.a; + //DBG(" unblock %s\n", mctarg->rnames[it->reg]); + } else if (it->alloc.t == ASTACK) { + freestk(ra, it->alloc.a); + } + } else + /* it does not cover position? */ + if (!itcontainspos(it, pos)) { + /* move from active to inactive */ + *lnk = next; + it->next = inactive; + inactive = it; + if (it->alloc.t == AREG) { + ra->free |= 1<alloc.a; + DBG(" >> %%%zd unblock %s\n", it-ra->intervals.temps, mctarg->rnames[it->alloc.a]); + } + } else lnk = &it->next; + } + + /* check for intervals in inactive that are handled or active */ + for (struct interval **lnk = &inactive, *it = *lnk, *next; (next = it?it->next:0), it; it = next) { + //DBG("< im inactive %%%d\n", intervals->temps.mb.k[it - intervals->temps.v]); + /* ends before position? */ + if (itrange(it, it->nrange-1).to <= pos) { + /* move from inactive to handled */ + *lnk = next; + it->next = handled; + handled = it; + if (it->alloc.t == ASTACK) { + freestk(ra, it->alloc.a); + } + } else + /* it covers position? */ + if (itcontainspos(it, pos)) { + /* move from inactive to active */ + *lnk = next; + it->next = active; + active = it; + if (it->alloc.t == AREG) { + assert(rstest(ra->free, it->alloc.a)); + ra->free &= ~(1<alloc.a); + DBG(" << %%%zd reblock %s\n", it-ra->intervals.temps, mctarg->rnames[it->alloc.a]); + } + } else lnk = &it->next; + } + + /* find a register for current */ + { + int this = current - intervals->temps; + regset free = ra->free & (current->fpr ? fpregset : gpregset); + struct instr *ins = &instrtab[this]; + int reg = 0; + int end = itrange(current, current->nrange-1).to; + + /* exclude regs from overlapping fixed intervals */ + for (struct fixinterval *fxit = intervals->fixed; fxit; fxit = fxit->next) { + if (fxit->range.to <= pos) { + intervals->fixed = fxit->next; + continue; + } else if (fxit->range.from >= end) { + break; + } + + for (int i = 0; i < current->nrange; ++i) { + if (rangeoverlap(fxit->range, itrange(current, i))) { + free &=~ fxit->rs; + } + } + } + /* exclude regs from overlapping inactive intervals */ + for (struct interval *it = inactive; it; it = it->next) { + if (it->alloc.t == AREG && intervaloverlap(it, current)) { + rsclr(&free, it->alloc.a); + } + } + /* for 2-address instrs, exclude reg from 2nd arg */ + if (ins->inplace && opnarg[ins->op] == 2) { + int xreg; + if (ins->r.t == RREG) rsclr(&free, ins->r.i); + else if (ins->r.t == RTMP && (xreg = instrtab[ins->r.i].reg)) + rsclr(&free, xreg-1); + } + + if (!free) { + /* spill */ + current->alloc = allocstk(ra); + DBG("%%%d got stk%d\n", this, current->alloc.a); + /* move current to active */ + current->next = active; + active = current; + continue; + } + + /* have free regs, try to use hint */ + if (current->rhint >= 0) + DBG("have hint %s for %%%zd\n", + mctarg->rnames[current->rhint], current - intervals->temps); + if (current->rhint >= 0 && rstest(free, current->rhint)) { + DBG(" (used hint)\n"); + reg = current->rhint; + goto GotReg; + } else { + /* for two-address instructions, try to use the reg of left arg */ + if (ins->op != Ophi && (opnarg[ins->op] == 1 || (opnarg[ins->op] == 2 && ins->inplace))) { + DBG(" %%%d try %d,%d\n", this, ins->l.t,ins->l.i); + if (ins->l.t == RREG && rstest(free, reg = ins->l.i)) + goto GotReg; + if (ins->l.t == RTMP) + if ((reg = instrtab[ins->l.i].reg-1) >= 0) + if (rstest(free, reg)) + goto GotReg; + /* for phi, try to use reg of any arg */ + } else if (ins->op == Ophi) { + union ref *arg = phitab.p[ins->l.i]; + for (int i = 0; i < xbcap(arg); ++i) { + if (arg->t == RREG && rstest(free, reg = arg->i)) goto GotReg; + if (arg->t == RTMP) + if ((reg = instrtab[arg->i].reg-1) >= 0) + if (rstest(free, reg)) + goto GotReg; + } + } + + /* prefer caller-saved registers */ + if (free &~ mctarg->rcallee) free &=~ mctarg->rcallee; + + for (reg = 0; !rstest(free, reg); ++reg); + } + GotReg: + current->alloc = areg(reg); + ins->reg = reg + 1; + DBG("%%%d got %s\n", this, mctarg->rnames[reg]); + rsclr(&ra->free, reg); + rsset(&ra->fn->regusage, reg); + + //if current has a register assigned then add current to active + current->next = active; + active = current; + } + } +} + +/* replace temps with physical regs, add loads & stores for spilled temps */ +static bool +devirt(struct rega *ra, struct block *blk) +{ + bool allnops = 1; + struct function *fn = ra->fn; + for (int curi = 0; curi < blk->ins.n; ++curi) { + int temp = blk->ins.p[curi]; + struct instr *ins = &instrtab[temp]; + struct interval *it; + struct alloc *alloc; + union ref *argref[4]; + int nargref = 0; + int nspill = 0; + + /* devirtualize ref args */ + for (int i = 0; i < 2; ++i) { + union ref *r = &i[&ins->l]; + if (r->t == RADDR) { + /* XXX mutating hashed addr.. should be fine though (because + * new RADDRs shouldn't be created after regalloc) + * maybe hashing them in the first place is unnecessary */ + struct addr *a = &addrht[r->i]; + argref[nargref++] = &a->base; + argref[nargref++] = &a->index; + } else { + argref[nargref++] = r; + } + } + for (int i = 0; i < nargref; ++i) { + static uchar cls2load[] = { + [KI4] = Oloads4, [KI8] = Oloadi8, [KF4] = Oloadf4, [KF8] = Oloadf8, [KPTR] = 0 + }; + cls2load[KPTR] = targ_64bit ? Oloadi8 : Oloads4; + union ref *r = argref[i]; + int tr; + if (r->t == RTMP) { + alloc = (it = &ra->intervals.temps[r->i]) && it->nrange ? &it->alloc : NULL; + if (alloc->t == ASTACK && ins->op == Omove) { + /* move [reg], [stk] -> [reg] = load [stk] */ + assert(r == &ins->r && ins->l.t == RREG); + ins->reg = ins->l.i+1; + ins->op = cls2load[ins->cls]; + ins->r = NOREF; + addstkslotref(temp, alloc->a*8); + } else if (alloc->t == ASTACK && ins->op == Ocopy) { + /* [reg] = copy [stk] -> [reg] = load [stk] */ + assert(r == &ins->l && ins->reg); + ins->op = cls2load[ins->cls]; + addstkslotref(temp, alloc->a*8); + } else if (alloc->t == ASTACK) { + /* ref was spilled, gen load to scratch register and use it */ + struct instr ld = {.cls = insrescls(instrtab[r->i])}; + int reg = kisint(ld.cls) ? mctarg->gprscratch : mctarg->fprscratch; + bool dosave; + /* pick scratch register, or any register that doesn't conflict with this instr's srcs/dst */ + if (nspill > 0) { + for (reg = kisflt(ld.cls) ? mctarg->fpr0 : mctarg->gpr0;; ++reg) { + if (reg == ins->reg-1) continue; + for (int j = 0; j < i; ++j) + if (argref[j]->t == RREG && argref[j]->i == reg) continue; + break; + } + /* if not the designated scratch register, we need to save+restore */ + if ((dosave = rstest(fn->regusage, reg) || rstest(mctarg->rcallee, reg))) { + insertinstr(blk, curi++, mkinstr(Oxsave, 0, .l = mkref(RREG, reg))); + } + } + ld.reg = reg+1; + ld.op = cls2load[ld.cls]; + addstkslotref(insertinstr(blk, curi++, ld).i, alloc->a*8); + *r = mkref(RREG, reg); + if (nspill > 0 && dosave) { + insertinstr(blk, curi+1, mkinstr(Oxrestore, 0, .l = mkref(RREG, reg))); + } + ++nspill; + } else if ((tr = instrtab[r->i].reg)) { + assert(alloc && alloc->t == AREG && alloc->a == tr-1); + *r = mkref(RREG, tr-1); + } + } + } + if (nspill > 0) assert(ins->op != Ocall); + + /* devirtualize destination */ + alloc = temp < ra->intervals.count && (it = &ra->intervals.temps[temp]) && it->nrange ? &it->alloc : NULL; + if (alloc && alloc->t == ASTACK) { + int store = Ostore1 + ilog2(cls2siz[insrescls(*ins)]); + /* t was spilled, gen store */ + if (ins->op == Ocopy) { + ins->op = store; + ins->r = ins->l; + addstkslotref(temp, alloc->a*8); + } else { + int reg = kisint(insrescls(*ins)) ? mctarg->gprscratch : mctarg->fprscratch; + assert(nspill == 0); + ins->reg = reg+1; + addstkslotref( + insertinstr(blk, ++curi, mkinstr(store, 0, .r = mkref(RREG, reg))).i, + alloc->a*8); + } + } else if (!ins->reg && insrescls(*ins) && ins->op != Omove && !ins->keep) { + /* dead */ + Nop: + ins->op = Onop; + } else if (ins->op == Omove && ins->r.t == RREG && ins->l.i == ins->r.i) { + /* move r1,r2 / r1=r2 */ + goto Nop; + } else if (ins->op == Ocopy && ins->l.t == RREG && ins->reg-1 == ins->l.i) { + /* r1 = copy r2 / r1=r2 */ + goto Nop; + } else if (ins->inplace && ins->l.t == RREG && ins->reg && ins->reg-1 != ins->l.i) { + /* fixup in-place (two-address) instructions */ + allnops = 0; + insertinstr(blk, curi++, mkmove(ins->cls, ins->reg-1, ins->l.i)); + ins->l.i = ins->reg-1; + } else if (ins->op != Onop) allnops = 0; + } + + return allnops; +} + +static void +fini(struct rega *ra) +{ + int id = 0; + struct function *fn = ra->fn; + struct block *blk = fn->entry; + + do { + bool allnops; + + blk->id = id++; + allnops = devirt(ra, blk); + + /* remove no-op blocks */ + if (allnops && !blk->s2 && blk->npred > 0) { + bool delet = 1; + for (int i = 0; i < blk->npred; ++i) { + struct block *p = blkpred(blk, i); + if (p->s2 && !blk->s1) + delet = 0; + } + for (int i = 0; i < blk->npred; ++i) { + struct block *p = blkpred(blk, i); + if (!p->s2 && !blk->s1) { + /* simplify: + * + * @p: + * ... + * b @blk + * @blk: + * NOP + * ret + */ + assert(p->s1 == blk); + p->jmp.t = Jret; + p->s1 = NULL; + } else if (blk->s1) { + /* simplify: + * + * @p: + * ... + * b %x, @blk, @other + * @blk: + * NOP + * b @next + */ + struct block *next = blk->s1; + if (p->s1 == blk) p->s1 = next; + else if (p->s2 == blk) p->s2 = next; + else continue; + for (int i = 0; i < next->npred; ++i) { + if (blkpred(next, i) == blk) { + blkpred(next, i) = p; + goto NextPred; + } + } + addpred(next, p); + } + NextPred:; + } + if (delet) { + freeblk(fn, blk); + --id; + } + } + } while ((blk = blk->lnext) != fn->entry); +} + +void +regalloc(struct function *fn) +{ + static union ref *stkslotrefsbuf[64]; + struct rega ra = {fn, .arena = fn->arena}; + struct block *blk, *last; + + /* setup */ + if (!fpregset || !gpregset) { + for (int r = 0; r < MAXREGS; ++r) { + if (isfpr(r)) + rsset(&fpregset, r); + else if (isgpr(r)) + rsset(&gpregset, r); + } + } + ra.free = (gpregset | fpregset) &~ (mctarg->rglob | (1ull<gprscratch) | (1ull<fprscratch)); + memset(ra.freestk, 0xFF, sizeof ra.freestk); + fn->regusage = 0; + fn->stksiz = alignup(fn->stksiz, 8); + fn->isleaf = 1; + vinit(&stkslotrefs, stkslotrefsbuf, arraylength(stkslotrefsbuf)); + + /* put into reverse post order */ + sortrpo(fn); + + /* fix liveness ranges */ + fixlive(fn); + + /* transform into CSSA */ + fixcssa(fn); + + fillblkids(fn); + + if (ccopt.dbg.r) { + efmt("<< Before linear scan >>\n"); + irdump(fn); + } + + /* linear scan: build lifetime intervals */ + buildintervals(&ra); + + /* linear scan: assign physical registers and stack slots */ + linearscan(&ra); + + /* get out of SSA */ + blk = last = fn->entry->lprev; + do { + if (blk->id < 0) continue; + for (int i = 0; i < blk->npred; ++i) { + lowerphis(&ra, blkpred(blk, i), blk); + } + vfree(&blk->phi); + } while ((blk = blk->lprev) != last); + + /* devirtualize & final cleanup */ + fini(&ra); + + for (struct interval *it = ra.intervals.temps; ra.intervals.count > 0; ++it) { + if (it->nrange > 2) xbfree(it->_dyn); + if (it->nrange > 0) --ra.intervals.count; + } + + fn->stksiz += ra.maxstk*8; + if (fn->stksiz > 1<<24) error(NULL, "'%s' stack frame too big", fn->name); + while (stkslotrefs.n) { + union ref *adr = stkslotrefs.p[--stkslotrefs.n]; + *adr = mkaddr((struct addr) { .base = mkref(RREG, mctarg->bpr), .disp = -fn->stksiz + adr->i }); + } + vfree(&stkslotrefs); + + if (ccopt.dbg.r) { + DBG("<< After regalloc >>\n"); + irdump(fn); + } +} + +/* vim:set ts=3 sw=3 expandtab: */ diff --git a/ir/ssa.c b/ir/ssa.c new file mode 100644 index 0000000..9b89878 --- /dev/null +++ b/ir/ssa.c @@ -0,0 +1,58 @@ +#include "ir.h" + +/* require use, keeps use */ +void +copyopt(struct function *fn) +{ + struct block *blk = fn->entry; + + do { + for (int i = 0; i < blk->ins.n; ++i) { + union ref var = mkref(RTMP, blk->ins.p[i]); + struct instr *ins = &instrtab[var.i]; + enum irclass k; + + if (ins->op == Ocopy) { + union ref arg = ins->l; + if (arg.t == RTMP) k = insrescls(instrtab[arg.i]); + else if (arg.t == RICON) k = cls2siz[ins->cls] == 4 ? KI4 : KI8; + else if (arg.t == RXCON) k = isnumcon(arg) ? conht[arg.i].cls : KPTR; + else continue; + if (ins->cls != k) continue; + + replcuses(var, arg); + *ins = mkinstr(Onop,0,); + deluses(var.i); + } + } + delnops(blk); + } while ((blk = blk->lnext) != fn->entry); +} + +void +filluses(struct function *fn) +{ + extern int ninstr; + struct block *blk = fn->entry; + + for (int i = 0; i < ninstr; ++i) + deluses(i); + + do { + for (int i = 0; i < blk->phi.n; ++i) { + int ins = blk->phi.p[i]; + union ref *phi = phitab.p[instrtab[ins].l.i]; + for (int i = 0; i < blk->npred; ++i) + adduse(blk, ins, phi[i]); + } + for (int i = 0; i < blk->ins.n; ++i) { + int ins = blk->ins.p[i]; + adduse(blk, ins, instrtab[ins].l); + adduse(blk, ins, instrtab[ins].r); + } + adduse(blk, USERJUMP, blk->jmp.arg[0]); + adduse(blk, USERJUMP, blk->jmp.arg[1]); + } while ((blk = blk->lnext) != fn->entry); +} + +/* vim:set ts=3 sw=3 expandtab: */ -- cgit v1.2.3