aboutsummaryrefslogtreecommitdiffhomepage
path: root/ir
diff options
context:
space:
mode:
Diffstat (limited to 'ir')
-rw-r--r--ir/abi0.c393
-rw-r--r--ir/cfg.c43
-rw-r--r--ir/dump.c255
-rw-r--r--ir/intrin.c77
-rw-r--r--ir/intrin.def2
-rw-r--r--ir/ir.c618
-rw-r--r--ir/ir.h280
-rw-r--r--ir/op.def76
-rw-r--r--ir/optmem.c285
-rw-r--r--ir/regalloc.c1195
-rw-r--r--ir/ssa.c58
11 files changed, 3282 insertions, 0 deletions
diff --git a/ir/abi0.c b/ir/abi0.c
new file mode 100644
index 0000000..82d10a5
--- /dev/null
+++ b/ir/abi0.c
@@ -0,0 +1,393 @@
+#include "ir.h"
+
+/** This pass adds in ABI arguments/returns register mappings
+ ** and lowers aggregate params/args/returns into scalars
+ **
+ ** invariant: all `call` instructions when doing this pass shall be preceded by
+ ** exactly narg `arg` instructions with no other instructions in between
+ **/
+
+struct abiargsvec { vec_of(struct abiarg); };
+
+static int
+abiret(struct abiarg abiret[2], struct abiargsvec *abiargs, int *ni, union irtype retty)
+{
+ short r[2];
+ uchar cls[2];
+ int retreg = 0;
+ if (retty.isagg) {
+ retreg = mctarg->abiret(r, cls, ni, retty);
+ if (!retreg) {
+ vpush(abiargs, ((struct abiarg) { cls2type(KPTR), .reg = r[1] }));
+ if (r[0] == -1) {
+ memset(abiret, 0, 2*sizeof *abiret);
+ } else {
+ abiret[0].ty = cls2type(KPTR);
+ abiret[0].reg = r[0];
+ }
+ }
+ } else if (retty.cls) {
+ retreg = mctarg->abiret(r, cls, ni, retty);
+ assert(retreg == 1);
+ }
+ for (int i = 0; i < retreg; ++i) {
+ abiret[i].ty = cls2type(cls[i]);
+ abiret[i].reg = r[i];
+ }
+ return retreg;
+}
+
+static int
+abiarg(struct abiargsvec *abiargs, int *ni, int *nf, int *ns, union irtype ty)
+{
+ short r[2];
+ uchar cls[2];
+ int ret = mctarg->abiarg(r, cls, ni, nf, ns, ty);
+ if (!ret) { /* in stack */
+ vpush(abiargs, ((struct abiarg) { ty, .stk = r[0] }));
+ } else if (ret == 1 && ty.isagg && cls[0] == KPTR) { /* aggregate by pointer */
+ vpush(abiargs, ((struct abiarg) { cls2type(cls[0]), .reg = r[0] }));
+ } else { /* by regs */
+ vpush(abiargs, ((struct abiarg) { cls2type(cls[0]), .reg = r[0] }));
+ if (ret == 2)
+ vpush(abiargs, ((struct abiarg) { cls2type(cls[1]), .reg = r[1] }));
+ }
+ return ret;
+}
+
+static struct instr
+copyparam(struct function *fn, int *curi, int param, struct abiarg abi)
+{
+ struct instr par = mkinstr(Oparam, abi.ty.cls, mkref(RICON, param), mktyperef(abi.ty));
+ if (abi.reg >= 0) { /* reg */
+ assert(!abi.ty.isagg);
+ return par;
+ } else if (!abi.ty.isagg) { /* scalar in stack */
+ enum op ld;
+ par.cls = KPTR;
+ if (abi.ty.cls == KPTR) abi.ty.cls = siz2intcls[cls2siz[abi.ty.cls]];
+ switch (abi.ty.cls) {
+ default: assert(0);
+ case KI4: ld = Oloadu4; break;
+ case KI8: ld = Oloadi8; break;
+ case KF4: ld = Oloadf4; break;
+ case KF8: ld = Oloadf8; break;
+ }
+ return mkinstr(ld, abi.ty.cls, insertinstr(fn->entry, (*curi)++, par));
+ } else { /* aggregate in stack */
+ par.cls = KPTR;
+ return par;
+ }
+}
+
+static void
+patchparam(struct function *fn, int *curi, int *param, int tydat, int nabi, struct abiarg abi[2])
+{
+ struct block *blk = fn->entry;
+ assert(in_range(nabi,1,2));
+
+ for (; *curi < blk->ins.n; ++*curi) {
+ struct instr *ins = &instrtab[blk->ins.p[*curi]];
+ if (ins->op != Oparam) continue;
+ assert(ins->r.t == RTYPE
+ && ins->r.i == (tydat < 0 ? abi[0].ty : (union irtype){.isagg=1, .dat=tydat}).bits);
+ if (abi[0].ty.isagg || tydat < 0) {
+ /* aggregate in stack or scalar, just copy */
+ assert(nabi == 1);
+ *ins = copyparam(fn, curi, *param, abi[0]);
+ } else { /* aggregate in registers, materialize */
+ union ref alloc, r[2];
+ struct instr st;
+ const struct typedata *td;
+ uint nalloc;
+
+ assert(tydat >= 0);
+ td = &typedata[tydat];
+ assert(td->siz <= 16 && td->align <= 16);
+ nalloc = td->align == 16 ? 1 : td->siz/8 + (td->siz%8 != 0);
+ *ins = mkinstr(Oalloca8 + (td->align==16), KPTR, mkref(RICON, nalloc));
+ alloc = mkref(RTMP, ins - instrtab);
+ r[0] = insertinstr(blk, ++*curi, copyparam(fn, NULL, *param, abi[0]));
+ if (nabi > 1)
+ r[1] = insertinstr(blk, ++*curi, copyparam(fn, NULL, ++*param, abi[1]));
+ /* transform
+ * %x = copy %p
+ * into
+ * %x = alloca...
+ * store* %x, %a
+ * store* %x + N, %b
+ */
+ st = mkinstr(Ostore1 + ilog2(cls2siz[abi[0].ty.cls]), 0, alloc, r[0]);
+ insertinstr(blk, ++*curi, st);
+ if (nabi > 1) {
+ struct instr tmp = mkinstr(Oadd, KPTR, alloc, mkref(RICON, cls2siz[abi[0].ty.cls]));
+ st = mkinstr(Ostore1 + ilog2(cls2siz[abi[1].ty.cls]), 0, insertinstr(blk, ++*curi, tmp), r[1]);
+ insertinstr(blk, ++*curi, st);
+ }
+ }
+ ++*param;
+ ++*curi;
+ break;
+ }
+}
+
+static int
+patcharg(struct block *blk, int *icall, struct call *call,
+ int argidx, int nabi, struct abiarg abi[2])
+{
+ int arginst = *icall - (call->narg - argidx);
+ struct instr *arg = &instrtab[blk->ins.p[arginst]];
+ assert(arg->op == Oarg && arg->l.t == RTYPE);
+ if (ref2type(arg->l).isagg) {
+ /* aggregate argument */
+ if (abi[0].ty.isagg /* aggregate in stack */
+ || abi[0].ty.cls == KPTR) /* aggregate by pointer */
+ {
+ return 1;
+ } else { /* aggregate in registers */
+ union ref src = arg->r;
+ /* deconstruct into
+ * %a = load* %x
+ * (%b = load* %x + N)
+ */
+ delinstr(blk, arginst);
+ for (int i = 0; i < nabi; ++i) {
+ /* XXX this can generate unaligned loads */
+ struct instr ins = {0};
+ union ref temp;
+ switch (ins.cls = abi[i].ty.cls) {
+ default: assert(0);
+ case KI4: ins.op = Oloadu4; break;
+ case KI8: ins.op = Oloadi8; break;
+ case KF4: ins.op = Oloadf4; break;
+ case KF8: ins.op = Oloadf8; break;
+ }
+ if (i == 0)
+ ins.l = src;
+ else
+ ins.l = insertinstr(blk, arginst++,
+ mkinstr(Oadd, KPTR, src,
+ mkref(RICON, cls2siz[abi[0].ty.cls])));
+ temp = insertinstr(blk, arginst++, ins);
+ insertinstr(blk, arginst++, mkarginstr(abi[i].ty, temp));
+ }
+ *icall = arginst + (call->narg - argidx - 1);
+ return nabi;
+ }
+ } else /* normal scalar argument */
+ return 1;
+}
+
+static struct abiarg abiargsbuf[32];
+
+void
+abi0_call(struct function *fn, struct instr *ins, struct block *blk, int *curi)
+{
+ union ref retmem;
+ struct abiargsvec abiargs = {VINIT(abiargsbuf, arraylength(abiargsbuf))};
+ bool sretarghidden = 0;
+ int ni, nf, ns, vararg, nret = 0;
+ struct call *call = &calltab.p[ins->r.i];
+
+ vararg = call->vararg;
+ vinit(&abiargs, abiargsbuf, arraylength(abiargsbuf));
+ ni = nf = ns = 0;
+ assert(!ins->cls == !call->ret.bits);
+ nret = abiret(call->abiret, &abiargs, &ni, call->ret);
+ if (call->ret.isagg) { /* adjust struct return */
+ union irtype retty = call->ret;
+ struct typedata *td = &typedata[retty.dat];
+ struct instr alloca = mkalloca(td->siz, td->align);
+ int ialloca;
+
+ sretarghidden = ni == 0;
+ /* swap alloca and call temps so users of original call point to alloca */
+ retmem = insertinstr(blk, ialloca = (*curi)++ - call->narg, *ins);
+ *ins = alloca;
+ blk->ins.p[ialloca] = ins - instrtab;
+ blk->ins.p[*curi] = retmem.i;
+ ins = &instrtab[retmem.i];
+ retmem.i = blk->ins.p[ialloca];
+
+ if (!nret) /* hidden pointer argument */
+ insertinstr(blk, (*curi)++ - call->narg,
+ mkinstr(Oarg, 0, mktyperef((union irtype){.cls=KPTR}), retmem));
+ }
+
+ /* adjust args */
+ for (int i = 0, i2 = ni + sretarghidden; i < call->narg; ++i) {
+ int arginst = *curi - (call->narg - i);
+ struct instr *arg = &instrtab[blk->ins.p[arginst]];
+ union irtype pty = ref2type(arg->l);
+ int first = abiargs.n;
+ int ret = abiarg(&abiargs, &ni, &nf, &ns, pty);
+ ret = patcharg(blk, curi, call, i, ret, &abiargs.p[first]);
+ if (call->vararg == i) vararg = i2;
+ i2 += ret;
+ }
+ /* adjust return */
+ if (call->ret.isagg) {
+ ins->cls = 0;
+ if (!nret) { /* hidden pointer argument */
+ ins->cls = 0;
+ if (call->abiret[0].reg >= 0) {
+ /* the result location pointer is also returned by the callee, e.g. in x86 */
+ ins->cls = KPTR;
+ ++nret;
+ /* even if this is not used, the register copy
+ * must be emitted for the register allocator to know */
+ }
+ } else { /* aggregate returned in regs */
+ union ref r[2];
+ struct instr ret2;
+ assert(in_range(nret, 1, 2));
+ ins->cls = call->abiret[0].ty.cls;
+ r[0] = mkref(RTMP, ins - instrtab);
+ if (nret == 2) {
+ ret2 = mkinstr(Ocall2r, call->abiret[1].ty.cls, r[0]);
+ r[1] = insertinstr(blk, ++*curi, ret2);
+ }
+ for (int i = 0; i < nret; ++i) {
+ struct instr store = {0};
+ /* XXX this can generate unaligned stores */
+ switch (call->abiret[i].ty.cls) {
+ default: assert(0);
+ case KF4: case KI4: store.op = Ostore4; break;
+ case KI8: case KF8: store.op = Ostore8; break;
+ }
+ if (i == 0) {
+ store.l = retmem;
+ } else {
+ union ref off = mkref(RICON, cls2siz[call->abiret[0].ty.cls]);
+ struct instr addr = mkinstr(Oadd, KPTR, retmem, off);
+ store.l = insertinstr(blk, ++*curi, addr);
+ }
+ store.r = r[i];
+ insertinstr(blk, ++*curi, store);
+ }
+ }
+ }
+
+ if (call->ret.isagg) call->ret = (union irtype){0};
+ call->vararg = vararg;
+ call->abiarg = alloccopy(fn->arena, abiargs.p, abiargs.n * sizeof(struct abiarg), 0);
+ call->narg = abiargs.n;
+ vfree(&abiargs);
+}
+
+void
+abi0(struct function *fn)
+{
+ uint nparam = typedata[fn->fnty.dat].nmemb;
+ const union type *paramty = typedata[fn->fnty.dat].param;
+ struct abiargsvec abiargs = {VINIT(abiargsbuf, arraylength(abiargsbuf))};
+ int rvovar = -1;
+ int ni = 0, nf = 0, ns = 0, istart = 0;
+ struct block *blk;
+ union ref sret = {0};
+
+ if (fn->retty.t == TYVOID) {
+ fn->nabiret = 0;
+ } else {
+ fn->nabiret = abiret(fn->abiret, &abiargs, &ni, mkirtype(fn->retty));
+ if (!fn->nabiret && isagg(fn->retty)) { /* ret agg by hidden pointer */
+ struct instr param = copyparam(fn, NULL, 0, abiargs.p[0]);
+ sret = insertinstr(fn->entry, 0, param);
+ ++istart;
+ /* increment real param ordinals */
+ for (int i = 1; i < fn->entry->ins.n; ++i) {
+ struct instr *ins = &instrtab[fn->entry->ins.p[i]];
+ if (ins->op == Oparam) ++ins->l.i;
+ }
+ }
+ }
+
+ /* adjust params */
+ for (int i = 0, param = abiargs.n; i < nparam; ++i) {
+ union irtype pty = mkirtype(paramty[i]);
+ int first = abiargs.n;
+ int ret = abiarg(&abiargs, &ni, &nf, &ns, pty);
+ patchparam(fn, &istart, &param, pty.isagg ? pty.dat : -1, ret+!ret, &abiargs.p[first]);
+ }
+ fn->abiarg = alloccopy(fn->arena, abiargs.p, abiargs.n * sizeof *abiargs.p, 0);
+ fn->nabiarg = abiargs.n;
+ vfree(&abiargs);
+
+ if (!fn->nabiret && isagg(fn->retty)) {
+ /* for structures returned by hidden pointer argument,
+ * if all return instrs return local var X, make X point to the result location,
+ * (return value optimization (RVO)) */
+ blk = fn->entry;
+ do {
+ union ref arg = blk->jmp.arg[0];
+ if (blk->jmp.t != Jret) continue;
+ if (!arg.bits) continue;
+ if (arg.t != RTMP || !oisalloca(instrtab[arg.i].op)) {
+ rvovar = -1;
+ break;
+ }
+ if (rvovar == -1) {
+ rvovar = arg.i;
+ } else if (arg.i != rvovar) {
+ rvovar = -1;
+ break;
+ }
+ } while ((blk = blk->lnext) != fn->entry);
+ if (rvovar != -1)
+ instrtab[rvovar] = mkinstr(Ocopy, KPTR, sret);
+ }
+
+ blk = fn->entry->lnext;
+ do {
+ /* adjust calls */
+ for (int iinstr = 0; iinstr < blk->ins.n; ++iinstr) {
+ struct instr *ins = &instrtab[blk->ins.p[iinstr]];
+ if (ins->op != Ocall) continue;
+ abi0_call(fn, ins, blk, &iinstr);
+ }
+
+ /* adjust returns */
+ if (isagg(fn->retty) && blk->jmp.t == Jret && blk->jmp.arg[0].bits) {
+ assert(!blk->jmp.arg[1].bits);
+ if (fn->nabiret) { /* aggregate return in register(s) */
+ union ref src = blk->jmp.arg[0];
+ for (int i = 0; i < fn->nabiret; ++i) {
+ /* XXX this can generate unaligned loads */
+ struct instr ins = {0};
+ switch (ins.cls = fn->abiret[i].ty.cls) {
+ default: assert(0);
+ case KI4: ins.op = Oloadu4; break;
+ case KI8: ins.op = Oloadi8; break;
+ case KF4: ins.op = Oloadf4; break;
+ case KF8: ins.op = Oloadf8; break;
+ }
+ if (i == 0)
+ ins.l = src;
+ else
+ ins.l = insertinstr(blk, blk->ins.n,
+ mkinstr(Oadd, KPTR, src,
+ mkref(RICON, cls2siz[fn->abiret[0].ty.cls])));
+ blk->jmp.arg[i] = insertinstr(blk, blk->ins.n, ins);
+ }
+ } else {
+ /* aggregate return (arg[0] is pointer to return value) */
+ if (rvovar == -1) {
+ /* blit %sret, %arg */
+ union irtype typ = mkirtype(fn->retty);
+ insertinstr(blk, blk->ins.n, mkarginstr(typ, sret));
+ insertinstr(blk, blk->ins.n, mkarginstr(typ, blk->jmp.arg[0]));
+ insertinstr(blk, blk->ins.n, mkintrin(INstructcopy, 0, 2));
+ } else assert(blk->jmp.arg[0].bits == mkref(RTMP, rvovar).bits);
+ if (fn->abiret[0].ty.cls) blk->jmp.arg[0] = rvovar == -1 ? sret : mkref(RTMP, rvovar);
+ else memset(blk->jmp.arg, 0, sizeof blk->jmp.arg);
+ }
+ }
+ } while ((blk = blk->lnext) != fn->entry);
+
+ if (ccopt.dbg.a) {
+ efmt("<< After abi0 >>\n");
+ irdump(fn);
+ }
+}
+
+/* vim:set ts=3 sw=3 expandtab: */
diff --git a/ir/cfg.c b/ir/cfg.c
new file mode 100644
index 0000000..d9aa409
--- /dev/null
+++ b/ir/cfg.c
@@ -0,0 +1,43 @@
+#include "ir.h"
+
+static void
+porec(struct block ***rpo, struct block *b)
+{
+ if (wasvisited(b)) return;
+ markvisited(b);
+ if (b->s2) porec(rpo, b->s2);
+ if (b->s1) porec(rpo, b->s1);
+ *--*rpo = b;
+}
+
+void
+sortrpo(struct function *fn)
+{
+ static struct block **rpobuf;
+ struct block **rpoend, **rpo;
+ int i, ndead;
+
+ xbgrow(&rpobuf, fn->nblk);
+ rpo = rpoend = rpobuf + fn->nblk,
+
+ startbbvisit();
+ fn->entry->id = 0;
+ porec(&rpo, fn->entry);
+ ndead = rpo - rpobuf;
+ for (struct block *blk = fn->entry; ndead > 0; blk = blk->lnext) {
+ if (!wasvisited(blk)) {
+ blk->lnext = blk->lprev = NULL;
+ freeblk(fn, blk);
+ --ndead;
+ }
+ }
+ for (i = 1, ++rpo; rpo < rpoend; ++rpo, ++i) {
+ rpo[-1]->lnext = rpo[0];
+ rpo[0]->lprev = rpo[-1];
+ rpo[0]->id = i;
+ }
+ fn->entry->lprev = rpo[-1];
+ rpo[-1]->lnext = fn->entry;
+}
+
+/* vim:set ts=3 sw=3 expandtab: */
diff --git a/ir/dump.c b/ir/dump.c
new file mode 100644
index 0000000..ddde7a3
--- /dev/null
+++ b/ir/dump.c
@@ -0,0 +1,255 @@
+#include "ir.h"
+#include "../obj/obj.h"
+
+static int nextdat;
+
+static void
+pridat(const struct irdat *dat)
+{
+ uchar *p = (dat->section == Sdata ? objout.data.p : objout.rodata.p) + dat->off;
+ enum {
+ MINZERO = 4,
+ MAXLINE = 60,
+ };
+ int npri = 0;
+ int strbegin = 0, nstr = 0;
+ assert(dat->section == Sdata || dat->section == Srodata);
+ efmt("%s %y(align %d, size %d):\n\t", dat->section == Sdata ? "data" : "rodata", dat->name, dat->align, dat->siz);
+ for (int i = 0; i < dat->siz; ++i) {
+ int c = p[i];
+ if (npri > MAXLINE) {
+ npri = 0;
+ efmt("\n\t");
+ }
+ if (aisprint(c)) {
+ if (!nstr++) strbegin = i;
+ } else {
+ if (nstr) {
+ npri += efmt("asc %'S,", p+strbegin, nstr);
+ nstr = 0;
+ efmt("b ");
+ }
+ npri += efmt("%d,", c);
+ }
+ }
+ if (nstr) npri += efmt("asc %'S,", p+strbegin, nstr);
+ efmt("\n");
+}
+
+static const char *clsname[] = {
+ "?", "i4", "i8", "ptr", "f4", "f8"
+};
+
+static void
+prityp(union irtype typ)
+{
+ if (!typ.isagg)
+ efmt(clsname[typ.cls]);
+ else {
+ const struct typedata *td = &typedata[typ.dat];
+ const char *tag = td->t == TYSTRUCT ? "struct" : "union";
+ if (ttypenames[td->id])
+ efmt("%s.%s.%d", tag, ttypenames[td->id], td->id);
+ else
+ efmt("%s.%d", tag, td->id);
+ }
+}
+
+static const char *intrinname[] = {
+ "?\??",
+#define _(b,...) #b,
+#include "intrin.def"
+#undef _
+};
+
+static void
+dumpref(enum op o, union ref ref)
+{
+ struct xcon *con;
+ switch (ref.t) {
+ case RXXX:
+ if (ref.bits == UNDREF.bits)
+ efmt("undef");
+ else
+ efmt("??");
+ break;
+ case RTMP:
+ efmt("%%%d", ref.i);
+ if (instrtab[ref.i].reg)
+ efmt("(%s)", mctarg->rnames[instrtab[ref.i].reg-1]);
+ break;
+ case RREG:
+ efmt("%s", mctarg->rnames[ref.i]);
+ break;
+ case RICON:
+ if (o == Ointrin) efmt("\"%s\"", intrinname[ref.i]);
+ else efmt("%d", ref.i);
+ break;
+ case RXCON:
+ con = &conht[ref.i];
+ if (con->deref) efmt("*[");
+ if (con->issym || con->isdat) efmt("$%y", xcon2sym(ref.i));
+ else switch (con->cls) {
+ case KI4: efmt("%d", (int)con->i); break;
+ case KI8: efmt("%ld", con->i); break;
+ case KPTR: efmt("%'lx", con->i); break;
+ case KF4: efmt("%fs", con->f); break;
+ case KF8: efmt("%fd", con->f); break;
+ default: assert(0);
+ }
+ if (con->deref) efmt("]");
+ break;
+ case RTYPE:
+ prityp(ref2type(ref));
+ break;
+ case RADDR:
+ {
+ const struct addr *addr = &addrht[ref.i];
+ bool k = 0;
+ efmt("addr [");
+ if ((k = addr->base.bits)) dumpref(0, addr->base);
+ if (addr->index.bits) {
+ if (k) efmt(" + ");
+ dumpref(0, addr->index);
+ if (addr->shift)
+ efmt(" * %d", 1<<addr->shift);
+ k = 1;
+ }
+ if (k && addr->disp) {
+ efmt(" %c %d", "-+"[addr->disp > 0], addr->disp < 0 ? -addr->disp : addr->disp);
+ }
+ assert(k);
+ efmt("]");
+ }
+ break;
+ default: assert(!"ref");
+ }
+}
+
+static void
+dumpcall(struct call *call)
+{
+ if (call->ret.isagg) {
+ efmt("sret ");
+ prityp(call->ret);
+ efmt(", ");
+ }
+ if (call->vararg < 0) {
+ efmt("#%d", call->narg);
+ } else {
+ assert(call->vararg <= call->narg);
+ efmt("#%d, ... #%d", call->vararg, call->narg - call->vararg);
+ }
+}
+
+static void
+dumpinst(const struct instr *ins)
+{
+ int i;
+ if (ins->op == Omove) {
+ efmt("move %s ", clsname[ins->cls]);
+ } else {
+ enum irclass cls = insrescls(*ins);
+ if (ins->reg) {
+ if (cls)
+ efmt("%s ", clsname[cls]);
+ efmt("(%%%d)%s = ", ins - instrtab, mctarg->rnames[ins->reg - 1]);
+ } else if (cls) {
+ efmt("%s %%%d", clsname[cls], ins - instrtab);
+ efmt(" = ");
+ }
+ efmt("%s ", opnames[ins->op]);
+ if (oiscmp(ins->op))
+ efmt("%s ", clsname[ins->cls]);
+ }
+ for (i = 0; i < opnarg[ins->op]; ++i) {
+ if (i) efmt(", ");
+ if (i == 1 && (ins->op == Ocall || ins->op == Ointrin)) {
+ dumpcall(&calltab.p[ins->r.i]);
+ } else {
+ dumpref(ins->op, (&ins->l)[i]);
+ }
+ }
+ efmt("\n");
+}
+
+static void
+dumpblk(struct function *fn, struct block *blk)
+{
+ static const char *jnames[] = { 0, "b", "ret" };
+ int i;
+ efmt(" @%d:\n", blk->id);
+ for (i = 0; i < blk->phi.n; ++i) {
+ struct instr *phi = &instrtab[blk->phi.p[i]];
+ union ref *refs = phitab.p[phi->l.i];
+ if (i == 0) efmt("%-4d", blk->inumstart);
+ else efmt(" |> ");
+ efmt(" %s ", clsname[phi->cls]);
+ if (!phi->reg) efmt("%%%d = %s ", blk->phi.p[i], opnames[phi->op]);
+ else efmt("(%%%d)%s = %s ", phi - instrtab, mctarg->rnames[phi->reg-1], opnames[phi->op]);
+ for (int i = 0; i < blk->npred; ++i) {
+ if (i) efmt(", ");
+ efmt("@%d ", blkpred(blk, i)->id);
+ dumpref(0, refs[i]);
+ }
+ efmt("\n");
+ }
+ for (i = 0; i < blk->ins.n; ++i) {
+ efmt("%-4d ", blk->inumstart + 1 + i);
+ dumpinst(&instrtab[blk->ins.p[i]]);
+ }
+ efmt("%-4d %s ", blk->inumstart + 1 + i, jnames[blk->jmp.t]);
+ if (blk->jmp.arg[0].bits && !fn->nabiret && isagg(fn->retty)) {
+ /* un-lowered struct return */
+ dumpref(0, mktyperef(mkirtype(fn->retty)));
+ efmt(" ");
+ }
+ for (i = 0; i < 2; ++i) {
+ if (!blk->jmp.arg[i].bits) break;
+ if (i > 0) efmt(", ");
+ dumpref(0, blk->jmp.arg[i]);
+ }
+ if (i && blk->s1) efmt(", ");
+ if (blk->s1 && blk->s2) efmt("@%d, @%d", blk->s1->id, blk->s2->id);
+ else if (blk->s1) efmt("@%d", blk->s1->id);
+ efmt("\n");
+}
+
+void
+irdump(struct function *fn)
+{
+ struct block *blk;
+
+ /* print datas that have never been printed before */
+ while (nextdat < dattab.n) pridat(&dattab.p[nextdat++]);
+
+ efmt("function %s : %ty\n", fn->name, fn->fnty);
+ if (fn->abiarg || fn->nabiret) {
+ efmt("abi: (");
+ for (int i = 0; i < fn->nabiarg; ++i) {
+ if (i > 0) efmt(", ");
+ if (fn->abiarg[i].reg >= 0) {
+ efmt("%s", mctarg->rnames[fn->abiarg[i].reg]);
+ } else {
+ prityp(fn->abiarg[i].ty);
+ efmt(" <stk>");
+ }
+ }
+ efmt(")");
+ if (fn->retty.t != TYVOID) {
+ efmt(" -> %s", mctarg->rnames[fn->abiret[0].reg]);
+ if (fn->nabiret > 1)
+ efmt(", %s", mctarg->rnames[fn->abiret[1].reg]);
+ }
+ efmt("\n");
+ }
+ numberinstrs(fn);
+ blk = fn->entry;
+ do {
+ dumpblk(fn, blk);
+ assert(blk->lnext != NULL);
+ } while ((blk = blk->lnext) != fn->entry);
+ efmt("\n");
+}
+
+/* vim:set ts=3 sw=3 expandtab: */
diff --git a/ir/intrin.c b/ir/intrin.c
new file mode 100644
index 0000000..9bb0bcb
--- /dev/null
+++ b/ir/intrin.c
@@ -0,0 +1,77 @@
+#include "ir.h"
+
+struct arg { union ref *arg, *ty; };
+
+static int
+intrin(struct block *blk, int *curi, enum intrin in, struct arg *args, int narg, union irtype ret)
+{
+ struct instr *this = &instrtab[blk->ins.p[*curi]];
+ const struct typedata *td;
+ union irtype ty;
+ uint ncopy, step;
+
+ switch (in) {
+ case 0: assert(0);
+ case INstructcopy:
+ assert(narg == 2 && args[0].ty->bits == args[1].ty->bits);
+ ty = ref2type(*args[0].ty);
+ assert(ty.isagg);
+ td = &typedata[ty.cls];
+ step = td->align <= 8 ? td->align : 8;
+ ncopy = td->siz / step;
+ if (ncopy > 4) {
+ enum irclass cls = siz2intcls[cls2siz[KPTR]];
+ /* memcpy */
+ *args[1].ty = *args[0].ty = mktyperef(cls2type(KPTR));
+ insertinstr(blk, (*curi)++, mkarginstr(cls2type(cls), mkintcon(cls, td->siz)));
+ *this = mkinstr(Ocall, 0, mksymref("memcpy"), this->r);
+ calltab.p[this->r.i].narg = 3;
+ calltab.p[this->r.i].ret = cls2type(0);
+ return 0;
+ } else {
+ delinstr(blk, (*curi)--);
+ for (int off = 0; off < td->siz; off += step) {
+ union ref psrc = *args[1].arg, pdst = *args[0].arg, src;
+ if (off) {
+ pdst = insertinstr(blk, ++*curi, mkinstr(Oadd, KPTR, *args[0].arg, mkref(RICON, off)));
+ psrc = insertinstr(blk, ++*curi, mkinstr(Oadd, KPTR, *args[1].arg, mkref(RICON, off)));
+ }
+ src = insertinstr(blk, ++*curi, mkinstr(Oloads1 + 2*ilog2(step), step < 8 ? KI4 : KI8, psrc));
+ insertinstr(blk, ++*curi, mkinstr(Ostore1 + ilog2(step), 0, pdst, src));
+ }
+ return 1;
+ }
+ }
+ assert(0);
+}
+
+void
+lowerintrin(struct function *fn)
+{
+ struct block *blk = fn->entry;
+ static struct arg argsbuf[64];
+ vec_of(struct arg) args = VINIT(argsbuf, arraylength(argsbuf));
+
+ do {
+ for (int i = 0; i < blk->ins.n; ++i) {
+ struct instr *ins = &instrtab[blk->ins.p[i]];
+ if (ins->op == Oarg)
+ vpush(&args, ((struct arg){ &ins->r, &ins->l }));
+ else if (ins->op == Ocall)
+ vinit(&args, argsbuf, arraylength(argsbuf));
+ else if (ins->op == Ointrin) {
+ int arg0 = i - args.n;
+ assert(calltab.p[ins->r.i].narg == args.n);
+ if (intrin(blk, &i, ins->l.i, args.p, args.n, calltab.p[ins->r.i].ret))
+ for (int j = args.n; j > 0; --j, --i)
+ delinstr(blk, arg0);
+ else
+ abi0_call(fn, ins, blk, &i);
+ vinit(&args, argsbuf, arraylength(argsbuf));
+ }
+ }
+ assert(args.n == 0);
+ } while ((blk = blk->lnext) != fn->entry);
+}
+
+/* vim:set ts=3 sw=3 expandtab: */
diff --git a/ir/intrin.def b/ir/intrin.def
new file mode 100644
index 0000000..2ccc3b5
--- /dev/null
+++ b/ir/intrin.def
@@ -0,0 +1,2 @@
+/* NAME NARG */
+_(structcopy, 2)
diff --git a/ir/ir.c b/ir/ir.c
new file mode 100644
index 0000000..0132a97
--- /dev/null
+++ b/ir/ir.c
@@ -0,0 +1,618 @@
+#include "ir.h"
+#include "../obj/obj.h"
+
+uchar type2cls[NTYPETAG];
+uchar cls2siz[KF8+1];
+const uchar siz2intcls[] = { [1] = KI4, [2] = KI4, [4] = KI4, [8] = KI8 };
+
+const char *opnames[] = {
+ "?\??",
+#define _(o,...) #o,
+#include "op.def"
+#undef _
+};
+
+const uchar opnarg[] = {
+ 0,
+#define _(o,n) n,
+#include "op.def"
+#undef _
+};
+
+struct instr instrtab[MAXINSTR];
+struct use *instruse[MAXINSTR];
+short instrnuse[MAXINSTR];
+static struct use instrusebuf[MAXINSTR][2];
+int ninstr;
+static int instrfreelist;
+struct calltab calltab;
+struct phitab phitab;
+struct dattab dattab;
+struct addr addrht[1 << 12];
+static int naddrht;
+struct xcon conht[1 << 12];
+static int nconht;
+int visitmark;
+
+void
+irinit(struct function *fn)
+{
+ static struct call callsbuf[64];
+ static union ref *phisbuf[64];
+ static struct irdat datsbuf[64];
+
+ ninstr = 0;
+ instrfreelist = -1;
+ vinit(&calltab, callsbuf, arraylength(callsbuf));
+ for (int i = 0; i < phitab.n; ++i) xbfree(phitab.p[i]);
+ vinit(&phitab, phisbuf, arraylength(phisbuf));
+ if (!dattab.p) vinit(&dattab, datsbuf, arraylength(datsbuf));
+ if (naddrht >= arraylength(addrht)/2)
+ memset(addrht, naddrht = 0, sizeof addrht);
+ if (nconht >= arraylength(conht)/2)
+ memset(conht, nconht = 0, sizeof conht);
+ if (!type2cls[TYINT]) {
+ for (int i = TYBOOL; i <= TYUVLONG; ++i) {
+ int siz = targ_primsizes[i];
+ type2cls[i] = siz < 8 ? KI4 : KI8;
+ }
+ type2cls[TYFLOAT] = KF4;
+ type2cls[TYDOUBLE] = KF8;
+ type2cls[TYLDOUBLE] = KF8;
+ type2cls[TYPTR] = KPTR;
+ type2cls[TYARRAY] = KPTR;
+ cls2siz[KI4] = cls2siz[KF4] = 4;
+ cls2siz[KI8] = cls2siz[KF8] = 8;
+ cls2siz[KPTR] = targ_primsizes[TYPTR];
+ }
+ fn->entry = fn->curblk = allocz(fn->arena, sizeof(struct block), 0);
+ fn->nblk = 1;
+ fn->entry->lprev = fn->entry->lnext = fn->entry;
+}
+
+static int
+addaddr(const struct addr *addr)
+{
+ uint h = hashb(0, addr, sizeof *addr);
+ uint i = h, n = arraylength(addrht);
+ for (;; ++i) {
+ i &= arraylength(addrht) - 1;
+ if (!addrht[i].base.bits && !addrht[i].index.bits) {
+ addrht[i] = *addr;
+ ++naddrht;
+ return i;
+ } else if (!memcmp(&addrht[i], addr, sizeof *addr)) {
+ return i;
+ }
+ assert(--n > 0 && "addrht full");
+ }
+}
+
+static int
+addcon(const struct xcon *con)
+{
+ uint h = hashb(0, con, sizeof *con);
+ uint i = h, n = arraylength(conht);
+ assert(con->issym || con->isdat || con->cls);
+ for (;; ++i) {
+ i &= arraylength(conht) - 1;
+ if (!conht[i].issym && !conht[i].cls) {
+ conht[i] = *con;
+ ++nconht;
+ return i;
+ } else if (!memcmp(&conht[i], con, sizeof *con)) {
+ return i;
+ }
+ assert(--n > 0 && "conht full");
+ }
+}
+
+union irtype
+mkirtype(union type t)
+{
+ if (t.t == TYVOID || isscalar(t))
+ return (union irtype) { .cls = type2cls[scalartypet(t)] };
+ assert(isagg(t));
+ return (union irtype) { .isagg = 1, .dat = t.dat };
+}
+
+union ref
+mkintcon(enum irclass k, vlong i)
+{
+ if (i < 1l << 28 && i >= -(1l << 28)) {
+ return mkref(RICON, i);
+ } else {
+ struct xcon con = { .cls = k, .i = i };
+ if (cls2siz[k] == 4) /* check upper half is zero or -1 */
+ assert(in_range((i >> 32) + 1, 0, 1));
+ return mkref(RXCON, addcon(&con));
+ }
+}
+
+union ref
+mkfltcon(enum irclass k, double f)
+{
+ struct xcon con = { .cls = k, .f = k == KF4 ? (float) f : f };
+ return mkref(RXCON, addcon(&con));
+}
+
+union ref
+mksymref(const char *s)
+{
+ struct xcon con = { .issym = 1, .sym = s };
+ return mkref(RXCON, addcon(&con));
+}
+
+union ref
+mkdatref(const char *name, uint siz, uint align, const void *bytes, uint n, bool deref)
+{
+ struct irdat dat = { .align = align, .siz = siz, .name = name, .section = Srodata };
+
+ assert(n <= siz && siz && align);
+ if (!name) {
+ extern const char *intern(const char *);
+ char buf[32];
+ struct wbuf wbuf = MEMBUF(buf, sizeof buf);
+
+ bfmt(&wbuf, ".L.%d", dattab.n);
+ ioputc(&wbuf, 0);
+ assert(!wbuf.err);
+ dat.name = name = intern(buf);
+ }
+ dat.off = objnewdat(name, dat.section, 0, siz, align);
+ memcpy(objout.rodata.p+dat.off, bytes, n);
+ memset(objout.rodata.p+dat.off+n, 0, siz - n);
+ vpush(&dattab, dat);
+ return mkref(RXCON, addcon(&(struct xcon){.isdat = 1, .deref = deref, .dat = dattab.n - 1}));
+}
+
+const char *
+xcon2sym(int ref)
+{
+ struct xcon con = conht[ref];
+ assert(con.issym ^ con.isdat);
+ return con.issym ? con.sym : dattab.p[con.dat].name;
+}
+
+struct instr
+mkalloca(uint siz, uint align)
+{
+ struct instr ins = { .cls = KPTR };
+ assert(ispo2(align) && align <= 16);
+ ins.op = Oalloca1 + ilog2(align);
+ ins.l = mkref(RICON, siz/align + (siz%align != 0));
+ return ins;
+}
+
+union ref
+mkcallarg(union irtype ret, uint narg, int vararg)
+{
+ struct call call = { .ret=ret, .narg=narg, .vararg=vararg };
+ assert((long) vararg <= narg);
+ vpush(&calltab, call);
+ return mkref(RXXX, calltab.n-1);
+}
+
+union ref
+mkaddr(struct addr addr)
+{
+ return mkref(RADDR, addaddr(&addr));
+}
+
+void
+addpred(struct block *blk, struct block *p)
+{
+ if (blk->npred == 0) {
+ blk->_pred0 = p;
+ ++blk->npred;
+ return;
+ }
+ if (blk->npred == 1) {
+ struct block *p0 = blk->_pred0;
+ blk->_pred = 0;
+ xbgrow(&blk->_pred, 4);
+ *blk->_pred = p0;
+ }
+ xbpush(&blk->_pred, &blk->npred, p);
+}
+
+static void
+delpred(struct block *blk, struct block *p)
+{
+ for (int i = 0; i < blk->npred; ++i) {
+ if (blkpred(blk, i) == p) {
+ for (int k = i; k < blk->npred - 1; ++k) {
+ blkpred(blk, k) = blkpred(blk, k + 1);
+ }
+ --blk->npred;
+ return;
+ }
+ }
+ assert(0&&"blk not in p");
+}
+
+struct block *
+newblk(struct function *fn)
+{
+ struct block *blk = alloc(fn->arena, sizeof(struct block), 0);
+ memset(blk, 0, sizeof *blk);
+ blk->id = -1;
+ return blk;
+}
+
+void
+freeblk(struct function *fn, struct block *blk)
+{
+ if (blk->npred > 1)
+ xbfree(blk->_pred);
+ vfree(&blk->phi);
+ vfree(&blk->ins);
+ if (blk->lnext) blk->lnext->lprev = blk->lprev;
+ if (blk->lprev) blk->lprev->lnext = blk->lnext;
+ blk->id = 1u<<31;
+ --fn->nblk;
+}
+
+struct block *
+insertblk(struct function *fn, struct block *pred, struct block *subst)
+{
+ struct block *new = newblk(fn);
+ struct block **s = pred->s1 == subst ? &pred->s1 : &pred->s2;
+ assert(*s == subst);
+ new->lnext = pred->lnext;
+ new->lprev = pred;
+ pred->lnext->lprev = new;
+ pred->lnext = new;
+ *s = new;
+ new->jmp.t = Jb;
+ new->s1 = subst;
+ addpred(new, pred);
+ for (int i = 0; i < subst->npred; ++i) {
+ if (blkpred(subst, i) == pred) {
+ blkpred(subst, i) = new;
+ ++fn->nblk;
+ return new;
+ }
+ }
+ assert(0);
+}
+
+
+static int
+newinstr(void)
+{
+ int t;
+ if (instrfreelist != -1) {
+ t = instrfreelist;
+ memcpy(&instrfreelist, &instrtab[t], sizeof(int));
+ } else {
+ assert(ninstr < arraylength(instrtab));
+ t = ninstr++;
+ }
+ if (instruse[t] != instrusebuf[t])
+ xbfree(instruse[t]);
+ instruse[t] = instrusebuf[t];
+ instrnuse[t] = 0;
+ return t;
+}
+
+void
+adduse(struct block *ublk, int ui, union ref r) {
+ struct use user = { ublk, ui };
+
+ if (r.t != RTMP) return;
+ if (instrnuse[r.i] < arraylength(instrusebuf[r.i])) {
+ instruse[r.i][instrnuse[r.i]++] = user;
+ } else if (instrnuse[r.i] == arraylength(*instrusebuf)) {
+ struct use *use = NULL;
+ xbgrow(&use, arraylength(*instrusebuf) + 2);
+ memcpy(use, instruse[r.i], sizeof(*instrusebuf));
+ use[instrnuse[r.i]++] = user;
+ instruse[r.i] = use;
+ } else {
+ xbpush(&instruse[r.i], &instrnuse[r.i], user);
+ }
+}
+
+bool
+deluse(struct block *ublk, int ui, union ref r) {
+ if (r.t != RTMP) return 0;
+
+ for (int i = 0; i < instrnuse[r.i]; ++i) {
+ if (instruse[r.i][i].blk == ublk && instruse[r.i][i].u == ui) {
+ for (int k = i; k < instrnuse[r.i] - 1; ++k) {
+ instruse[r.i][k] = instruse[r.i][k+1];
+ }
+ goto Shrink;
+ }
+ }
+ return 0;
+
+Shrink:
+ if (instrnuse[r.i] == arraylength(*instrusebuf) + 1) {
+ struct use *use = instruse[r.i];
+ instruse[r.i] = instrusebuf[r.i];
+ memcpy(instruse[r.i], use, sizeof *instrusebuf);
+ }
+ --instrnuse[r.i];
+ return 1;
+}
+
+union ref
+insertinstr(struct block *blk, int idx, struct instr ins)
+{
+ int new = newinstr();
+
+ instrtab[new] = ins;
+ if (idx == blk->ins.n) vpush(&blk->ins, new);
+ else {
+ assert(idx >= 0 && idx < blk->ins.n);
+ vpush_((void **)&blk->ins.p, &blk->ins._cap, &blk->ins.n, sizeof *blk->ins.p);
+ vresize(&blk->ins, blk->ins.n);
+ for (int i = blk->ins.n++; i > idx; --i)
+ blk->ins.p[i] = blk->ins.p[i - 1];
+ blk->ins.p[idx] = new;
+ }
+ adduse(blk, new, ins.l);
+ adduse(blk, new, ins.r);
+ return mkref(RTMP, new);
+}
+
+union ref
+insertphi(struct block *blk, enum irclass cls)
+{
+ int new = newinstr();
+ union ref *refs = NULL;
+ assert(blk->npred > 0);
+ xbgrow(&refs, blk->npred);
+ memset(refs, 0, blk->npred * sizeof *refs);
+ vpush(&phitab, refs);
+ instrtab[new] = mkinstr(Ophi, cls, mkref(RXXX, phitab.n - 1));
+ vpush(&blk->phi, new);
+ return mkref(RTMP, new);
+}
+
+void
+numberinstrs(struct function *fn)
+{
+ struct block *blk = fn->entry;
+ int start = 0;
+ do {
+ blk->inumstart = start;
+ start += blk->ins.n+2;
+ } while ((blk = blk->lnext) != fn->entry);
+}
+
+/* require use */
+void
+replcuses(union ref from, union ref to)
+{
+ assert(from.t == RTMP);
+ for (int i = 0; i < instrnuse[from.i]; ++i) {
+ struct use use = instruse[from.i][i];
+ union ref *u;
+ int n, j;
+ if (use.u == from.i) continue;
+ if (use.u == USERJUMP) {
+ u = &use.blk->jmp.arg[0];
+ n = 2;
+ } else if (instrtab[use.u].op == Ophi) {
+ u = phitab.p[instrtab[use.u].l.i];
+ n = use.blk->npred;
+ } else {
+ u = &instrtab[use.u].l;
+ n = 2;
+ }
+
+ for (j = 0; j < n; ++j) {
+ if (u[j].bits == from.bits) {
+ u[j].bits = to.bits;
+ adduse(use.blk, use.u, to);
+ --i;
+ break;
+ }
+ }
+ }
+}
+
+void
+deluses(int ins)
+{
+ if (instruse[ins] != instrusebuf[ins]) {
+ xbfree(instruse[ins]);
+ instruse[ins] = instrusebuf[ins];
+ }
+ instrnuse[ins] = 0;
+}
+
+void
+delinstr(struct block *blk, int idx)
+{
+ int t = blk->ins.p[idx];
+ assert(idx >= 0 && idx < blk->ins.n);
+ memcpy(&instrtab[t], &instrfreelist, sizeof(int));
+ instrfreelist = t;
+ deluses(t);
+ for (int i = idx; i < blk->ins.n - 1; ++i)
+ blk->ins.p[i] = blk->ins.p[i + 1];
+ --blk->ins.n;
+}
+
+void
+delphi(struct block *blk, int idx)
+{
+ int t = blk->phi.p[idx];
+ assert(idx >= 0 && idx < blk->phi.n);
+ memcpy(&instrtab[t], &instrfreelist, sizeof(int));
+ instrfreelist = t;
+ deluses(t);
+ for (int i = idx; i < blk->phi.n - 1; ++i)
+ blk->phi.p[i] = blk->phi.p[i + 1];
+ --blk->phi.n;
+}
+
+void
+delnops(struct block *blk)
+{
+ int i, n, t;
+ /* delete trailing nops */
+ while (blk->ins.n > 0 && instrtab[t = blk->ins.p[blk->ins.n - 1]].op == Onop) {
+ --blk->ins.n;
+ deluses(t);
+ memcpy(&instrtab[t], &instrfreelist, sizeof(int));
+ instrfreelist = t;
+ }
+ /* delete rest of nops */
+ for (i = blk->ins.n - 2, n = 0; i >= 0; --i) {
+ if (instrtab[t = blk->ins.p[i]].op == Onop) {
+ deluses(t);
+ memcpy(&instrtab[t], &instrfreelist, sizeof(int));
+ instrfreelist = t;
+ ++n;
+ } else if (n) {
+ memmove(blk->ins.p+i+1, blk->ins.p+i+1+n, (blk->ins.n - n - i - 1)*sizeof *blk->ins.p);
+ blk->ins.n -= n;
+ n = 0;
+ }
+ }
+ if (n)
+ memmove(blk->ins.p, blk->ins.p + n, (blk->ins.n -= n)*sizeof *blk->ins.p);
+}
+
+void
+fillblkids(struct function *fn)
+{
+ int i = 0;
+ struct block *blk = fn->entry;
+ do blk->id = i++; while ((blk = blk->lnext) != fn->entry);
+}
+
+/** IR builders **/
+
+void
+useblk(struct function *fn, struct block *blk)
+{
+ extern int nerror;
+ if (fn->curblk && nerror == 0) assert(fn->curblk->jmp.t && "never finished block");
+ if (blk) assert(!blk->jmp.t && "reusing built block");
+ if (!blk->lprev) { /* initialize */
+ blk->lnext = fn->entry;
+ blk->lprev = fn->entry->lprev;
+ blk->lprev->lnext = blk;
+ blk->id = blk->lprev->id + 1;
+ ++fn->nblk;
+ fn->entry->lprev = blk;
+ }
+ fn->curblk = blk;
+}
+
+union ref
+addinstr(struct function *fn, struct instr ins)
+{
+ int new = newinstr();
+ assert(fn->curblk != NULL);
+ instrtab[new] = ins;
+ adduse(fn->curblk, new, ins.l);
+ adduse(fn->curblk, new, ins.r);
+ vpush(&fn->curblk->ins, new);
+ return mkref(RTMP, new);
+}
+
+union ref
+addphi(struct function *fn, enum irclass cls, union ref *r)
+{
+ int new;
+ struct instr ins = { Ophi, cls };
+ union ref *refs = NULL;
+
+ xbgrow(&refs, fn->curblk->npred);
+ memcpy(refs, r, fn->curblk->npred * sizeof *r);
+ vpush(&phitab, refs);
+ ins.l = mkref(RXXX, phitab.n-1);
+
+ assert(fn->curblk != NULL);
+ assert(fn->curblk->ins.n == 0);
+ new = newinstr();
+ instrtab[new] = ins;
+ for (int i = 0; i < fn->curblk->npred; ++i) {
+ adduse(fn->curblk, new, r[i]);
+ }
+ vpush(&fn->curblk->phi, new);
+ return mkref(RTMP, new);
+}
+
+#define putjump(fn, j, arg0, arg1, T, F) \
+ fn->curblk->jmp.t = j; \
+ fn->curblk->jmp.arg[0] = arg0; \
+ fn->curblk->jmp.arg[1] = arg1; \
+ fn->curblk->s1 = T; \
+ fn->curblk->s2 = F; \
+ fn->curblk = NULL;
+
+void
+putbranch(struct function *fn, struct block *blk)
+{
+ assert(fn->curblk && blk);
+ addpred(blk, fn->curblk);
+ putjump(fn, Jb, NOREF, NOREF, blk, NULL);
+}
+
+void
+putcondbranch(struct function *fn, union ref arg, struct block *t, struct block *f)
+{
+ assert(fn->curblk && t && f);
+ adduse(fn->curblk, USERJUMP, arg);
+ addpred(t, fn->curblk);
+ addpred(f, fn->curblk);
+ putjump(fn, Jb, arg, NOREF, t, f);
+}
+
+void
+putreturn(struct function *fn, union ref r0, union ref r1)
+{
+ adduse(fn->curblk, USERJUMP, r0);
+ adduse(fn->curblk, USERJUMP, r1);
+ putjump(fn, Jret, r0, r1, NULL, NULL);
+}
+
+#undef putjump
+
+/** Misc **/
+
+static void
+freefn(struct function *fn)
+{
+ struct block *blk = fn->entry;
+ do {
+ if (blk->npred > 1) xbfree(blk->_pred);
+ vfree(&blk->phi);
+ vfree(&blk->ins);
+ } while ((blk = blk->lnext) != fn->entry);
+}
+
+void
+irfini(struct function *fn)
+{
+ extern int nerror;
+ if (nerror) {
+ freefn(fn);
+ return;
+ }
+
+ abi0(fn);
+ lowerintrin(fn);
+ mem2reg(fn);
+ copyopt(fn);
+ if (ccopt.dbg.o) {
+ efmt("<< Before isel >>\n");
+ irdump(fn);
+ }
+ mctarg->isel(fn);
+ regalloc(fn);
+ if (!ccopt.dbg.any)
+ mctarg->emit(fn);
+
+ freefn(fn);
+}
+
+/* vim:set ts=3 sw=3 expandtab: */
diff --git a/ir/ir.h b/ir/ir.h
new file mode 100644
index 0000000..2c8a55d
--- /dev/null
+++ b/ir/ir.h
@@ -0,0 +1,280 @@
+#include "../common.h"
+
+enum irclass {
+ KXXX,
+ KI4, KI8, KPTR,
+ KF4, KF8,
+};
+
+#define kisint(k) in_range((k), KI4, KPTR)
+#define kisflt(k) in_range((k), KF4, KF8)
+
+union irtype {
+ struct { ushort _ : 1, cls : 15; };
+ struct { ushort isagg : 1, dat : 15; };
+ ushort bits;
+};
+
+struct irdat {
+ uchar align : 6, globl : 1;
+ uchar section;
+ uint siz;
+ uint off;
+ const char *name;
+};
+
+struct xcon {
+ bool issym, isdat, deref;
+ uchar cls;
+ union {
+ const char *sym;
+ int dat;
+ vlong i;
+ double f;
+ };
+};
+
+struct abiarg {
+ union irtype ty;
+ union {
+ short reg; /* >= 0 */
+ short stk; /* < 0 */
+ };
+};
+
+struct call {
+ union irtype ret;
+ ushort narg;
+ short vararg; /* first variadic arg or -1 */
+ struct abiarg *abiarg;
+ struct abiarg abiret[2];
+};
+
+enum refkind {
+ RXXX, /* used for empty ref (zeros), undef, and the special args of call,phi,etc */
+ RTMP, /* reference to another instruction's result */
+ RREG, /* machine register */
+ RICON, /* small integer constants */
+ RXCON, /* other constants (incl. external symbols) */
+ RADDR, /* target-specific addressing mode */
+ RTYPE, /* irtype */
+};
+
+union ref {
+ struct { unsigned t : 3; signed i : 29; };
+ uint bits;
+};
+
+struct addr {
+ union ref base, index;
+ int shift, disp;
+};
+
+#define insrescls(ins) (oiscmp((ins).op) ? KI4 : (ins).cls)
+#define NOREF ((union ref) {0})
+#define UNDREF ((union ref) {{ 0, -1 }})
+#define ZEROREF ((union ref) {{ RICON, 0 }})
+#define mkref(t, x) ((union ref) {{ (t), (x) }})
+#define mktyperef(t) ((union ref) {{ RTYPE, (t).bits }})
+#define ref2type(r) ((union irtype) {.bits = (r).i})
+
+enum op {
+ Oxxx,
+#define _(o,...) O##o,
+#include "op.def"
+#undef _
+};
+
+#define oiscmp(o) in_range(o, Oequ, Ougte)
+#define oisalloca(o) in_range(o, Oalloca1, Oalloca16)
+#define oisstore(o) in_range(o, Ostore1, Ostore8)
+#define oisload(o) in_range(o, Oloads1, Oloadf8)
+extern const char *opnames[];
+extern const uchar opnarg[];
+
+enum intrin {
+ INxxx,
+#define _(b,...) IN##b,
+#include "intrin.def"
+#undef _
+};
+
+struct instr {
+ uchar op,
+ cls; /* operation data class; also result class except for cmp ops (always i4) */
+ uchar skip : 1, /* ignore during codegen: forms part of one machine instruction */
+ keep : 1; /* for codegen, keep instr even if result seems unused */
+ uchar inplace : 1; /* set (by isel) for instructions which modify its first arg in place */
+ uchar reg; /* 0 -> no reg; else reg + 1 */
+ union ref l, r; /* args */
+};
+
+enum jumpkind { JXXX, Jb, Jret, };
+
+struct block {
+ int id;
+ int npred;
+ int visit;
+ int inumstart;
+ union {
+ struct block *_pred0;
+ struct block **_pred;
+ };
+ struct block *lprev, *lnext;
+ struct block *s1, *s2;
+ vec_of(ushort) phi;
+ vec_of(ushort) ins;
+ struct { uchar t; union ref arg[2]; } jmp;
+};
+
+#define blkpred(blk, i) 0[(blk)->npred < 2 ? &(blk)->_pred0 : &(blk)->_pred[i]]
+
+enum { USERJUMP = 0xFFFF };
+struct use { struct block *blk; ushort u; };
+
+enum { MAXREGS = 64 };
+
+struct function {
+ struct arena **arena;
+ const char *name;
+ struct block *entry, *curblk;
+ struct use *use;
+ short *nuse;
+ union type fnty, retty;
+ struct abiarg *abiarg, abiret[2];
+ uint nblk;
+ int stksiz;
+ ushort nabiarg, nabiret;
+ bool globl;
+ bool isleaf;
+ regset regusage;
+};
+
+enum objkind { OBJELF };
+enum mcisa { ISamd64 };
+
+struct mctarg {
+ short gpr0, /* first gpr */
+ ngpr, /* gpr count */
+ bpr, /* frame/base pointer reg */
+ gprscratch, fprscratch, /* scratch registers for regalloc */
+ fpr0, /* first fpr */
+ nfpr; /* fpr count */
+ regset rcallee, /* callee-saved */
+ rglob; /* globally live (never used for regalloc) */
+ const char (*rnames)[6];
+ enum objkind objkind;
+ enum mcisa isa;
+ /* abiret: lower return type:
+ * scalar/small struct -> returns number of regs (1..2),
+ * r & cls filled with reg and irclass of each scalar return
+ * big struct -> returns 0, is passed via hidden pointer argument,
+ * r[0] contains register for returning said pointer or -1,
+ * r[1] contains register for hidden pointer argument,
+ * *ni is set to 1 if said register is the first ABI integer argument
+ */
+ int (*abiret)(short r[2], uchar cls[2], int *ni, union irtype);
+ /* abiarg: lower argument type:
+ * scalar/small struct -> returns number of regs (1..2),
+ * r & cls filled with reg and irclass of each scalar arg
+ * if reg == -1 -> stack
+ * big struct -> returns 0,
+ * if passed in stack cls[0] == 0, r[0] == negative SP offset
+ * if passed by pointer cls[0] == KPTR, r[0] contains integer register
+ * or negative SP offset if stack
+ */
+ int (*abiarg)(short r[2], uchar cls[2], int *ni, int *nf, int *ns, union irtype);
+
+ void (*isel)(struct function *);
+ void (*emit)(struct function *);
+};
+
+enum { MAXINSTR = 1<<14 };
+
+/** ir.c **/
+extern uchar type2cls[];
+extern uchar cls2siz[];
+extern const uchar siz2intcls[];
+extern struct instr instrtab[];
+extern struct use *instruse[];
+extern short instrnuse[];
+extern struct xcon conht[];
+extern struct calltab {vec_of(struct call);} calltab;
+extern struct phitab {vec_of(union ref *);} phitab;
+extern struct dattab {vec_of(struct irdat);} dattab;
+extern struct addr addrht[];
+extern int visitmark;
+#define mkinstr(O, C, ...) ((struct instr) { .op = (O), .cls = (C), .reg=0, __VA_ARGS__ })
+#define mkarginstr(ty, x) mkinstr(Oarg, 0, mktyperef(ty), (x))
+void irinit(struct function *);
+void irfini(struct function *);
+#define cls2type(k) ((union irtype){.cls=(k)})
+union irtype mkirtype(union type);
+union ref mkintcon(enum irclass, vlong);
+union ref mkfltcon(enum irclass, double);
+#define iscon(r) in_range((r).t, RICON, RXCON)
+#define concls(r) ((r).t == RICON ? KI4 : conht[(r).i].cls)
+#define isintcon(r) (iscon(r) && kisint(concls(r)))
+#define isfltcon(r) ((r).t == RXCON && kisflt(conht[(r).i].cls))
+#define isnumcon(r) ((r).t == RICON || ((r).t == RXCON && conht[(r).i].cls))
+#define isaddrcon(r) ((r).t == RXCON && !conht[(r).i].cls && !conht[(r).i].deref)
+#define intconval(r) ((r).t == RICON ? (r).i : conht[(r).i].i)
+#define fltconval(r) (conht[(r).i].f)
+union ref mksymref(const char *);
+union ref mkdatref(const char *name, uint siz, uint align, const void *, uint n, bool deref);
+const char *xcon2sym(int ref);
+struct instr mkalloca(uint siz, uint align);
+union ref mkcallarg(union irtype ret, uint narg, int vararg);
+#define mkintrin(B, C, N) mkinstr(Ointrin, C, {.t=RICON,B}, mkcallarg((union irtype){{0}},N,-1))
+union ref mkaddr(struct addr);
+void addpred(struct block *blk, struct block *p);
+
+struct block *newblk(struct function *);
+void freeblk(struct function *, struct block *);
+struct block *insertblk(struct function *, struct block *pred, struct block *subst);
+void adduse(struct block *ublk, int ui, union ref r);
+union ref insertinstr(struct block *, int idx, struct instr);
+union ref insertphi(struct block *, enum irclass cls);
+void replcuses(union ref from, union ref to);
+void deluses(int ins);
+void delinstr(struct block *, int idx);
+void delphi(struct block *, int idx);
+void delnops(struct block *blk);
+void fillblkids(struct function *);
+#define startbbvisit() (void)(++visitmark)
+#define wasvisited(blk) ((blk)->visit == visitmark)
+#define markvisited(blk) ((blk)->visit = visitmark)
+void numberinstrs(struct function *);
+
+/* IR builder functions */
+union ref addinstr(struct function *, struct instr);
+union ref addphi(struct function *, enum irclass, union ref []);
+void useblk(struct function *, struct block *);
+void putbranch(struct function *, struct block *);
+void putcondbranch(struct function *, union ref arg, struct block *t, struct block *f);
+void putreturn(struct function *, union ref r0, union ref r1);
+
+/** irdump.c **/
+void irdump(struct function *);
+
+/** ssa.c **/
+void filluses(struct function *);
+void copyopt(struct function *);
+
+/** cfg.c **/
+void sortrpo(struct function *fn);
+
+/** abi0.c **/
+void abi0(struct function *);
+void abi0_call(struct function *, struct instr *, struct block *blk, int *curi);
+
+/** optmem.c **/
+void mem2reg(struct function *);
+
+/** intrin.c **/
+void lowerintrin(struct function *);
+
+/** regalloc.c **/
+void regalloc(struct function *);
+
+/* vim:set ts=3 sw=3 expandtab: */
diff --git a/ir/op.def b/ir/op.def
new file mode 100644
index 0000000..9e18fe7
--- /dev/null
+++ b/ir/op.def
@@ -0,0 +1,76 @@
+/* OP NARG */
+_(nop, 0)
+_(copy, 1)
+_(move, 2)
+_(neg, 1)
+_(not, 1)
+_(cvtf4s, 1)
+_(cvtf4u, 1)
+_(cvtf4f8, 1)
+_(cvtf8s, 1)
+_(cvtf8u, 1)
+_(cvtf8f4, 1)
+_(cvts4f, 1)
+_(cvtu4f, 1)
+_(cvts8f, 1)
+_(cvtu8f, 1)
+_(exts1, 1)
+_(extu1, 1)
+_(exts2, 1)
+_(extu2, 1)
+_(exts4, 1)
+_(extu4, 1)
+_(add, 2)
+_(sub, 2)
+_(mul, 2)
+_(umul, 2)
+_(div, 2)
+_(udiv, 2)
+_(rem, 2)
+_(urem, 2)
+_(and, 2)
+_(ior, 2)
+_(xor, 2)
+_(shl, 2)
+_(sar, 2)
+_(slr, 2)
+_(equ, 2)
+_(neq, 2)
+_(lth, 2)
+_(gth, 2)
+_(lte, 2)
+_(gte, 2)
+_(ulth, 2)
+_(ugth, 2)
+_(ulte, 2)
+_(ugte, 2)
+_(alloca1, 1)
+_(alloca2, 1)
+_(alloca4, 1)
+_(alloca8, 1)
+_(alloca16, 1)
+_(loads1, 1)
+_(loadu1, 1)
+_(loads2, 1)
+_(loadu2, 1)
+_(loads4, 1)
+_(loadu4, 1)
+_(loadi8, 1)
+_(loadf4, 1)
+_(loadf8, 1)
+_(store1, 2)
+_(store2, 2)
+_(store4, 2)
+_(store8, 2)
+_(param, 2)
+_(arg, 2)
+_(call, 2)
+_(call2r, 1)
+_(intrin, 2)
+_(phi, 1)
+_(swap, 2)
+/* machine-specific instructions */
+_(xinc, 1)
+_(xdec, 1)
+_(xsave, 1)
+_(xrestore, 1)
diff --git a/ir/optmem.c b/ir/optmem.c
new file mode 100644
index 0000000..1c137b7
--- /dev/null
+++ b/ir/optmem.c
@@ -0,0 +1,285 @@
+#include "ir.h"
+
+static const uchar loadszcls[] = {
+ [Oloads1 - Oloads1] = 1|KI4<<4, [Oloadu1 - Oloads1] = 1|KI4<<4,
+ [Oloads2 - Oloads1] = 2|KI4<<4, [Oloadu2 - Oloads1] = 2|KI4<<4,
+ [Oloads4 - Oloads1] = 4|KI4<<4, [Oloadu4 - Oloads1] = 4|KI4<<4,
+ [Oloadi8 - Oloads1] = 8|KI8<<4,
+ [Oloadf4 - Oloads1] = 4|KF4<<4,
+ [Oloadf8 - Oloads1] = 8|KF8<<4,
+};
+static const uchar load2ext[] = {
+ [Oloads1 - Oloads1] = Oexts1, [Oloadu1 - Oloads1] = Oextu1,
+ [Oloads2 - Oloads1] = Oexts2, [Oloadu2 - Oloads1] = Oextu2,
+ [Oloads4 - Oloads1] = Oexts4, [Oloadu4 - Oloads1] = Oextu4,
+ [Oloadi8 - Oloads1] = Ocopy,
+};
+#define loadsz(o) (loadszcls[(o) - Oloads1] & 0xF)
+#define loadcls(o) (loadszcls[(o) - Oloads1] >> 4)
+#define load2ext(o) (load2ext[(o) - Oloads1])
+#define storesz(o) (1 << ((o) - Ostore1))
+
+/* Implements algorithm in 'Simple and Efficient Construction of Static Single Assignment' (Braun et al) */
+
+struct pendingphi { ushort var, phi; };
+struct ssabuilder {
+ vec_of(struct pendingphi) *pendingphis; /* map of block to list of incomplete phis in block */
+ imap_of(union ref *) curdefs; /* map of var to (map of block to def of var) */
+ struct bitset *sealed;
+ int lastvisit;
+ int nblk;
+};
+
+static union ref readvar(struct ssabuilder *, int var, enum irclass, struct block *);
+
+static union ref
+deltrivialphis(struct ssabuilder *sb, struct block *blk, union ref phiref)
+{
+ union ref **pcurdefs;
+ int var;
+ struct use *use, *uend;
+ union ref *args = phitab.p[instrtab[phiref.i].l.i];
+ union ref same = {0};
+
+ assert(instrtab[phiref.i].op == Ophi);
+
+ for (int i = 0; i < blk->npred; ++i) {
+ if (args[i].bits == same.bits || args[i].bits == phiref.bits) {
+ continue; /* unique value or self-reference */
+ } if (same.bits != 0) {
+ return phiref; /* non-trivial */
+ }
+ same = args[i];
+ }
+
+ if (same.bits == 0)
+ same = UNDREF; /* the phi is unreachable or in the start block */
+
+ /* replace uses */
+ replcuses(phiref, same);
+ imap_each(&sb->curdefs, var, pcurdefs) {
+ (void)var;
+ for (int i = blk->id; i < sb->nblk; ++i) {
+ if ((*pcurdefs)[i].bits == phiref.bits)
+ (*pcurdefs)[i] = same;
+ }
+ }
+
+ /* stops infinite recursion and marks phi for removal */
+ instrtab[phiref.i].op = Onop;
+
+ /* recursively try to remove all phi users as they might have become trivial */
+Redo:
+ for (use = instruse[phiref.i], uend = use + instrnuse[phiref.i]; use < uend; ++use) {
+ if (use->u != USERJUMP && instrtab[use->u].op == Ophi && use->u != phiref.i) {
+ union ref it = mkref(RTMP, use->u);
+ union ref vphi2 = deltrivialphis(sb, use->blk, it);
+ if (vphi2.bits != it.bits) {
+ /* deletion happened so phiref use may have changed */
+ if (same.bits == it.bits) same.bits = vphi2.bits;
+ goto Redo;
+ }
+ }
+ }
+ deluses(phiref.i);
+
+ return same;
+}
+
+static union ref
+addphiargs(struct ssabuilder *sb, int var, enum irclass cls, struct block *blk, union ref phiref)
+{
+ union ref *args = phitab.p[instrtab[phiref.i].l.i];
+ for (int i = 0; i < blk->npred; ++i) {
+ struct block *pred = blkpred(blk, i);
+ args[i] = readvar(sb, var, cls, pred);
+ adduse(blk, phiref.i, args[i]);
+ }
+ return deltrivialphis(sb, blk, phiref);
+}
+
+static void
+writevar(struct ssabuilder *sb, int var, struct block *blk, union ref val)
+{
+ union ref **pcurdefs;
+ if (!(pcurdefs = imap_get(&sb->curdefs, var))) {
+ pcurdefs = imap_set(&sb->curdefs, var, xcalloc(sb->nblk * sizeof(union ref)));
+ }
+ if (val.t == RTMP) assert(instrtab[val.i].op != Onop);
+ (*pcurdefs)[blk->id] = val;
+}
+
+static union ref
+readvarrec(struct ssabuilder *sb, int var, enum irclass cls, struct block *blk)
+{
+ union ref val;
+ assert(blk->npred > 0);
+ if (!bstest(sb->sealed, blk->id)) { /* unsealed block */
+ val = insertphi(blk, cls);
+ vpush(&sb->pendingphis[blk->id], ((struct pendingphi) { var, val.i }));
+ } else if (blk->npred == 1) {
+ val = readvar(sb, var, cls, blkpred(blk, 0));
+ } else {
+ val = insertphi(blk, cls);
+ writevar(sb, var, blk, val);
+ val = addphiargs(sb, var, cls, blk, val);
+ }
+ writevar(sb, var, blk, val);
+ return val;
+}
+
+static union ref
+readvar(struct ssabuilder *sb, int var, enum irclass cls, struct block *blk)
+{
+ union ref **pcurdefs;
+ if ((pcurdefs = imap_get(&sb->curdefs, var)) && (*pcurdefs)[blk->id].bits)
+ return (*pcurdefs)[blk->id];
+ if (blk->npred == 0) /* entry block, var is read before being written to */
+ return UNDREF;
+ return readvarrec(sb, var, cls, blk);
+}
+
+static bool
+trysealrec(struct ssabuilder *sb, struct block *blk)
+{
+ vec_of(struct pendingphi) *pending;
+
+Recur:
+ if (bstest(sb->sealed, blk->id)) return 1;
+ if (blk->id > sb->lastvisit) return 0;
+ markvisited(blk);
+ for (int i = 0; i < blk->npred; ++i) {
+ struct block *p = blkpred(blk, i);
+ if (wasvisited(p)) continue;
+ if (!trysealrec(sb, p)) return 0;
+ }
+
+ bsset(sb->sealed, blk->id);
+ pending = (void *) &sb->pendingphis[blk->id];
+ for (int i = 0; i < pending->n; ++i) {
+ addphiargs(sb, pending->p[i].var, instrtab[pending->p[i].phi].cls, blk,
+ mkref(RTMP, pending->p[i].phi));
+ }
+ vfree(pending);
+ if (blk->s1 && !blk->s2) {
+ blk = blk->s1;
+ goto Recur;
+ } else if (blk->s1 && blk->s2) {
+ trysealrec(sb, blk->s1);
+ blk = blk->s2;
+ goto Recur;
+ }
+ return 1;
+}
+
+static void
+tryseal(struct ssabuilder *sb, struct block *blk)
+{
+ startbbvisit();
+ trysealrec(sb, blk);
+}
+
+/* require use, blkid; keeps use */
+void
+mem2reg(struct function *fn)
+{
+ static struct bitset ssealed[4];
+ struct ssabuilder sb = { .nblk = fn->nblk };
+ struct block *blk;
+
+ sb.pendingphis = xcalloc(fn->nblk * sizeof *sb.pendingphis);
+ if (fn->nblk <= 64 * arraylength(ssealed)) {
+ sb.sealed = ssealed;
+ memset(ssealed, 0, BSSIZE(fn->nblk) * sizeof *ssealed);
+ } else {
+ sb.sealed = xcalloc(BSSIZE(fn->nblk) * sizeof *sb.sealed);
+ }
+
+ sortrpo(fn);
+ blk = fn->entry;
+
+ do {
+ for (int i = 0; i < blk->ins.n; ++i) {
+ struct use *use, *uend;
+ enum irclass k = 0;
+ int sz = 0, ndef = 0;
+ enum op ext = Ocopy;
+ int var = blk->ins.p[i];
+ struct instr *ins = &instrtab[var];
+
+ if (!oisalloca(ins->op)) continue;
+ uend = instruse[var] + instrnuse[var];
+ for (use = instruse[var]; use < uend; ++use) {
+ struct instr *m;
+
+ if (use->u == USERJUMP) goto Next;
+ m = &instrtab[use->u];
+ if (oisload(m->op) && (!sz || sz == loadsz(m->op))) {
+ sz = loadsz(m->op);
+ k = loadcls(m->op);
+ if (sz < 4) ext = load2ext(m->op);
+ continue;
+ }
+ if (oisstore(m->op) && m->l.bits == mkref(RTMP, var).bits
+ && (!sz || sz == storesz(m->op))) {
+ sz = storesz(m->op);
+ ++ndef;
+ continue;
+ }
+ goto Next;
+ }
+ if (!ndef) /* slot is read from but never written to */
+ goto Next;
+
+ for (use = instruse[var]; use < uend; ++use) {
+ struct instr *m = &instrtab[use->u];
+
+ if (oisstore(m->op)) {
+ writevar(&sb, var, use->blk, m->r);
+ *m = mkinstr(Onop,0,);
+ } else if (oisload(m->op)) {
+ union ref val = readvar(&sb, var, k = m->cls, use->blk);
+ if (!val.bits) { /* var is used uninitialized */
+ /* TODO emit diagnostic */
+ /* load some garbage */
+ *m = mkinstr(kisflt(k) ? Oloadf4 + (k==KF8) : Oloads1+ilog2(sz)*2,
+ k, mkref(RREG, mctarg->bpr));
+ } else {
+ adduse(use->blk, use->u, val);
+ *m = mkinstr(ext, k, val);
+ }
+ }
+ }
+ /* remove alloca */
+ delinstr(blk, i--);
+
+ Next:;
+ }
+
+ assert(sb.lastvisit == blk->id);
+ ++sb.lastvisit;
+ tryseal(&sb, blk);
+ } while ((blk = blk->lnext) != fn->entry);
+
+ do {
+ /* remove phis marked for deletion */
+ for (int i = 0; i < blk->phi.n; ++i)
+ if (instrtab[blk->phi.p[i]].op == Onop)
+ delphi(blk, i--);
+ } while ((blk = blk->lnext) != fn->entry);
+
+ if (sb.sealed != ssealed) free(sb.sealed);
+ free(sb.pendingphis);
+
+ for (int i = 0; i < sb.curdefs.mb.N; ++i)
+ if (bstest(sb.curdefs.mb.bs, i))
+ free(sb.curdefs.v[i]);
+ imap_free(&sb.curdefs);
+ if (ccopt.dbg.m) {
+ efmt("<< After mem2reg >>\n");
+ irdump(fn);
+ }
+}
+
+
+/* vim:set ts=3 sw=3 expandtab: */
diff --git a/ir/regalloc.c b/ir/regalloc.c
new file mode 100644
index 0000000..64dcfac
--- /dev/null
+++ b/ir/regalloc.c
@@ -0,0 +1,1195 @@
+#include "ir.h"
+
+/** Implements linear scan register allocation **/
+
+#if 1
+#define DBG(...) if(ccopt.dbg.r) efmt(__VA_ARGS__)
+#else
+#define DBG(...) ((void)0)
+#endif
+
+/* The algorithm used here to introduce phis for temporaries whose definitions
+ * appear later than some of its uses is very similar to that in mem2reg() */
+
+static int livelastblk;
+struct pendingphi { ushort var, phi; };
+static vec_of(struct pendingphi) *pendingphis;
+static int npendingphi;
+static ushort **curdefs;
+static union ref readvar(struct bitset *defined, enum irclass cls, int var, struct block *blk);
+
+static void
+fillphi(struct bitset *defined, union ref phi, enum irclass cls, int var, struct block *blk)
+{
+ union ref *args = phitab.p[instrtab[phi.i].l.i];
+ assert(blk->npred > 0);
+ for (int i = 0; i < blk->npred; ++i)
+ args[i] = readvar(defined, cls, var, blk);
+}
+
+static union ref
+readvar(struct bitset *defined, enum irclass cls, int var, struct block *blk)
+{
+ union ref val;
+
+ if (bstest(defined, var)) return mkref(RTMP, var);
+ assert(cls && "?");
+
+ /* memoed definition */
+ if (xbcap(curdefs) > blk->id && xbcap(curdefs[blk->id]) > var && curdefs[blk->id][var])
+ return mkref(RTMP, curdefs[blk->id][var]);
+
+ xbgrowz(&curdefs, blk->id + 1);
+ if (blk->id > livelastblk) {
+ ++npendingphi;
+ val = insertphi(blk, cls);
+ xbgrowz(&pendingphis, blk->id + 1);
+ vpush(&pendingphis[blk->id], ((struct pendingphi) { var, val.i }));
+ } else if (blk->npred == 1) {
+ val = readvar(defined, cls, var, blkpred(blk, 0));
+ } else {
+ val = insertphi(blk, cls);
+ fillphi(defined, val, cls, var, blk);
+ }
+ xbgrowz(&curdefs[blk->id], var + 1);
+ assert(val.i > 0);
+ curdefs[blk->id][var] = val.i;
+ return val;
+}
+
+static void
+liveuse(struct bitset *defined, struct instr *ins, union ref *r, struct block *blk)
+{
+ int var;
+ if (r->t == RADDR) {
+ liveuse(defined, ins, &addrht[r->i].base, blk);
+ liveuse(defined, ins, &addrht[r->i].index, blk);
+ return;
+ } else if (r->t != RTMP) return;
+ var = r->i;
+ if (bstest(defined, var)) return;
+
+ *r = readvar(defined, insrescls(instrtab[r->i]), var, blk);
+}
+
+/* regalloc() assumes every use of a temporary is visited before its definition
+ * so this function fixes cases where that would not apply by introducing phi functions */
+static void
+fixlive(struct function *fn)
+{
+ extern int ninstr;
+ struct block *blk = fn->entry;
+ struct bitset definedbuf[4] = {0};
+ struct bitset *defined = definedbuf;
+
+ if (BSSIZE(ninstr) >= arraylength(definedbuf))
+ defined = xcalloc(sizeof *defined * BSSIZE(ninstr));
+ npendingphi = 0;
+
+ do {
+ for (int i = 0; i < blk->phi.n; ++i) {
+ int var = blk->phi.p[i];
+ bsset(defined, var);
+ }
+
+ for (int i = 0; i < blk->ins.n; ++i) {
+ int var = blk->ins.p[i];
+ struct instr *ins = &instrtab[var];
+ if (ins->l.t) liveuse(defined, ins, &ins->l, blk);
+ if (ins->r.t) liveuse(defined, ins, &ins->r, blk);
+ bsset(defined, var);
+ }
+ } while ((blk = blk->lnext) != fn->entry);
+
+ do {
+ vec_of(struct pendingphi) *pphi;
+
+ if (!npendingphi) break;
+ if (xbcap(pendingphis) <= blk->id) break;
+
+ pphi = (void *)&pendingphis[blk->id];
+ npendingphi -= pphi->n;
+ for (int i = 0; i < pphi->n; ++i) {
+ fillphi(defined, mkref(RTMP, pphi->p[i].phi), instrtab[pphi->p[i].phi].cls, pphi->p[i].var, blk);
+ }
+ vfree(pphi);
+ } while ((blk = blk->lnext) != fn->entry);
+
+ if (ccopt.dbg.l) {
+ DBG("<< After liveness fixup >>\n");
+ irdump(fn);
+ }
+ if (defined != definedbuf) free(defined);
+}
+
+static regset gpregset, fpregset;
+
+#define isfpr(reg) in_range((reg), mctarg->fpr0, mctarg->fpr0 + mctarg->nfpr - 1)
+#define isgpr(reg) in_range((reg), mctarg->gpr0, mctarg->gpr0 + mctarg->nfpr - 1)
+
+/* an allocated physical register or stack slot */
+enum { ADEAD, AREG, ASTACK };
+struct alloc { ushort t : 2, a : 14; };
+#define afree() ((struct alloc) { ADEAD })
+#define areg(r) ((struct alloc) { AREG, (r) })
+#define astack(s) ((struct alloc) { ASTACK, (s) })
+
+enum { MAXSPILL = 512 };
+
+/* half-closed instr range [from, to) */
+struct range { ushort from, to; };
+
+/* a temporary's lifetime interval */
+struct interval {
+ struct interval *next; /* for linked list of active,inactive,handled sets in linear scan */
+ struct alloc alloc;
+ schar rhint : 7, /* register hint */
+ fpr : 1; /* needs float register? */
+
+ /* sorted ranges array */
+ uchar nrange;
+ union {
+ struct range _inl[2];
+ struct range *_dyn;
+ };
+};
+
+struct intervals {
+ int count; /* number of actual intervals */
+ struct interval *temps; /* map of tmp -> interval */
+ struct fixinterval {
+ struct fixinterval *next;
+ regset rs;
+ struct range range;
+ } *fixed; /* linked list of fixed intervals, always sorted */
+};
+
+struct rega {
+ struct function *fn;
+ struct arena **arena;
+ regset free; /* free registers */
+ struct bitset freestk[BSSIZE(MAXSPILL)]; /* free stack slots */
+ int maxstk, /* highest stack slot used */
+ stktop;
+ struct intervals intervals;
+};
+
+/* materialization of stack slot references is deferred until the end because
+ * the offset from base pointer depends on how many slots we end up allocating */
+static vec_of(union ref *) stkslotrefs;
+
+static void
+addstkslotref(int instr, uint off)
+{
+ union ref *ref = &instrtab[instr].l;
+ *ref = mkref(RICON, off);
+ vpush(&stkslotrefs, ref);
+}
+
+static struct alloc
+allocstk(struct rega *ra)
+{
+ int s = -1;
+
+ for (int i = 0; i < BSSIZE(MAXSPILL); ++i) {
+ if (ra->freestk[i].u != 0) {
+ s = i*64 + lowestsetbit(ra->freestk[i].u);
+ break;
+ }
+ }
+ if (s != -1) {
+ bsclr(ra->freestk, s);
+ if (ra->stktop < s) ra->stktop = s+1;
+ } else {
+ s = ra->stktop++;
+ }
+ if (ra->maxstk < s+1) ra->maxstk = s+1;
+ //imap_get(&ra->intervals.temps, var)->alloc = astack(s);
+ return astack(s);
+}
+
+static void
+freestk(struct rega *ra, int slot)
+{
+ DBG("FREE stk %d\n",slot);
+ if (slot < MAXSPILL)
+ bsset(ra->freestk, slot);
+ else if (slot == ra->stktop - 1)
+ --ra->stktop;
+}
+
+/* Parallel moves algorithm from QBE: https://c9x.me/git/qbe.git/tree/rega.c?id=e493a7f23352f51acc0a1e12284ab19d7894488a#n201 */
+
+#define mkmove(k, rd, rs) mkinstr(Omove, k, mkref(RREG, rd), mkref(RREG, rs))
+
+enum pmstat { PMTOMOVE, PMMOVING, PMDONE };
+static struct pmove {
+ uchar k;
+ uchar stat;
+ struct alloc dst, src;
+} pmove[MAXREGS];
+static int npmove;
+
+static void
+pmadd(enum irclass k, struct alloc dst, struct alloc src)
+{
+ if (!memcmp(&dst, &src, sizeof dst)) return;
+ assert(npmove < MAXREGS);
+ pmove[npmove++] = (struct pmove) { k, PMTOMOVE, dst, src };
+}
+
+static void
+emitmove(enum irclass k, struct alloc dst, struct alloc src, struct block *blk, int curi)
+{
+ struct instr mv = {.keep = 1};
+ if (dst.t == AREG && src.t == AREG) {
+ insertinstr(blk, curi, mkmove(k, dst.a, src.a));
+ } else if (dst.t == ASTACK && src.t == AREG) {
+ mv = mkinstr(Ostore1+ilog2(cls2siz[k]), 0, .r = mkref(RREG, src.a));
+ addstkslotref(insertinstr(blk, curi, mv).i, dst.a*8);
+ } else if (dst.t == AREG && src.t == ASTACK) {
+ switch (mv.cls = k) {
+ default: assert(0);
+ case KI4: mv.op = Oloads4; break;
+ case KI8: mv.op = Oloadi8; break;
+ case KPTR: mv.op = targ_64bit ? Oloadi8 : Oloads4; break;
+ case KF4: mv.op = Oloadf4; break;
+ case KF8: mv.op = Oloadf8; break;
+ }
+ mv.reg = dst.a+1;
+ addstkslotref(insertinstr(blk, curi, mv).i, src.a*8);
+ } else assert(0);
+}
+
+static int
+pmrec(int i, struct block *blk, int curi, enum irclass *k)
+{
+ int j, c;
+
+ if (!memcmp(&pmove[i].dst, &pmove[i].src, sizeof pmove->dst)) {
+ pmove[i].stat = PMDONE;
+ return -1;
+ }
+
+ /* widen when necessary */
+ assert(kisint(pmove[i].k) == kisint(*k));
+ if (cls2siz[pmove[i].k] > cls2siz[*k])
+ *k = pmove[i].k;
+
+ for (j = 0; j < npmove; ++j) {
+ if (!memcmp(&pmove[j].dst, &pmove[i].src, sizeof pmove->dst)) {
+ break;
+ }
+ }
+ if (j == npmove) goto Done;
+ switch (pmove[j].stat) {
+ default: assert(0);
+ case PMMOVING:
+ c = j;
+ Swap:
+ assert(pmove[i].src.t == AREG && pmove[i].dst.t == AREG);
+ insertinstr(blk, curi,
+ mkinstr(Oswap, *k, mkref(RREG, pmove[i].dst.a), mkref(RREG, pmove[i].src.a), .keep = 1));
+ break;
+ case PMTOMOVE:
+ pmove[i].stat = PMMOVING;
+ c = pmrec(j, blk, curi, k);
+ if (c == i) {
+ c = -1;
+ break;
+ } else if (c != -1) {
+ goto Swap;
+ }
+ /* fallthru */
+ case PMDONE:
+ Done:
+ c = -1;
+ emitmove(*k, pmove[i].dst, pmove[i].src, blk, curi);
+ break;
+ }
+
+ pmove[i].stat = PMDONE;
+ return c;
+}
+
+static void
+emitpm(struct block *blk)
+{
+ int curi = blk->ins.n;
+ for (int i = 0; i < npmove; ++i) {
+ if (pmove[i].stat == PMTOMOVE) {
+ pmrec(i, blk, curi, &(enum irclass) { pmove[i].k });
+ }
+ }
+}
+
+/* remove phis by inserting parallel moves */
+static void
+lowerphis(struct rega *ra, struct block *blk, struct block *suc)
+{
+ int predno;
+ struct block *n = NULL;
+
+ if (!blk->s2) n = blk;
+
+ for (predno = 0; predno < suc->npred; ++predno)
+ if (blkpred(suc, predno) == blk)
+ break;
+ assert(predno < suc->npred);
+
+ npmove = 0;
+ /* ensure phi args go to the same slot as phi with parallel copies */
+ for (int i = 0; i < suc->phi.n; ++i) {
+ struct instr *phi = &instrtab[suc->phi.p[i]];
+ union ref *arg = &phitab.p[phi->l.i][predno];
+ struct alloc from, to;
+
+ if (arg->t == RREG) continue;
+ assert(arg->t == RTMP);
+ DBG("resolve phi @%d, @%d, %%%d <- %%%d\n", blk->id, suc->id, phi - instrtab, arg->i);
+ if (instrtab[arg->i].reg) {
+ from = areg(instrtab[arg->i].reg - 1);
+ DBG(" it had R%d\n", from.a);
+ } else {
+ from = ra->intervals.temps[arg->i].alloc;
+ assert(from.t != ADEAD);
+ DBG(" found %c%d\n", " RS"[from.t], from.a);
+ if (from.t == AREG)
+ instrtab[arg->i].reg = from.a+1;
+ }
+ if (phi->reg) {
+ to = areg(phi->reg - 1);
+ DBG(" phi had R%d\n", to.a);
+ } else {
+ to = ra->intervals.temps[phi - instrtab].alloc;
+ assert(to.t != ADEAD);
+ DBG(" found phi %c%d\n", " RS"[to.t], to.a);
+ if (to.t == AREG)
+ phi->reg = to.a+1;
+ }
+ DBG(" > phi move %c%d -> %c%d\n", " RS"[from.t], from.a, " RS"[to.t], to.a);
+ if (!n) n = insertblk(ra->fn, blk, suc);
+ pmadd(phi->cls, to, from);
+ }
+ if (n) emitpm(n);
+}
+
+/* generate copies for phi operands to transform into conventional-SSA */
+static void
+fixcssa(struct function *fn)
+{
+ struct block *blk = fn->entry;
+ do {
+ if (!blk->phi.n) continue;
+ for (int p = 0; p < blk->npred; ++p) {
+ struct block *n, *pred = blkpred(blk, p);
+ if (!pred->s2) {
+ /* pred only has 1 successor (blk), so insert move directly in it */
+ n = pred;
+ } else {
+ n = insertblk(fn, pred, blk);
+ assert(n->jmp.t == Jb && n->s1 == blk);
+ }
+ for (int i = 0; i < blk->phi.n; ++i) {
+ int phi = blk->phi.p[i];
+ union ref *args = phitab.p[instrtab[phi].l.i];
+ args[p] = insertinstr(n, n->ins.n, mkinstr(Ocopy, instrtab[phi].cls, args[p]));
+ }
+ }
+ } while ((blk = blk->lnext) != fn->entry);
+}
+
+static inline bool
+rangeoverlap(struct range a, struct range b)
+{
+ return a.from < b.to && b.from < a.to;
+}
+
+static void
+pushrange(struct interval *lv, struct range r)
+{
+ if (lv->nrange < 2) lv->_inl[lv->nrange++] = r;
+ else if (lv->nrange > 2) xbpush(&lv->_dyn, &lv->nrange, r);
+ else {
+ struct range *d = NULL;
+ xbgrow(&d, 4);
+ memcpy(d, lv->_inl, 2*sizeof *d);
+ d[lv->nrange++] = r;
+ lv->_dyn = d;
+ }
+}
+#define itrange(lv, i) ((lv)->nrange <= 2 ? (lv)->_inl : (lv)->_dyn)[i]
+
+static bool
+intervaloverlap(struct interval *a, struct interval *b)
+{
+ for (int i = 0, j = 0; i < a->nrange && j < b->nrange; ) {
+ struct range r1 = itrange(a, i), r2 = itrange(b, j);
+ if (rangeoverlap(r1, r2)) return 1;
+ if (r1.to <= r2.from) ++i;
+ else ++j;
+ }
+ return 0;
+}
+
+static bool
+intervaldef(struct intervals *intervals, int t, struct block *blk, int pos, int reghint)
+{
+ struct interval *it = &intervals->temps[t];
+ if (it->nrange) {
+ assert(itrange(it, 0).from <= pos);
+ itrange(it, 0).from = pos;
+ return 1;
+ }
+ return 0;
+}
+
+static void
+addrange(struct intervals *intervals, int t, struct range new, int reghint)
+{
+ struct interval *it = &intervals->temps[t];
+ struct range *fst;
+ int n;
+
+ if (!it->nrange) {
+ ++intervals->count;
+ it->rhint = reghint;
+ it->fpr = kisflt(insrescls(instrtab[t]));
+ pushrange(it, new);
+ return;
+ }
+
+ fst = &itrange(it, 0);
+ /* fully covered by first range? */
+ if (fst->from <= new.from && fst->to >= new.to) return;
+ /* overlaps with first range ? */
+ if (fst->from <= new.to && new.to < fst->to) {
+ fst->from = new.from;
+ } else {
+ /* put new range at the start */
+ pushrange(it, new);
+ memmove(&itrange(it, 1), &itrange(it, 0), sizeof(struct range) * (it->nrange - 1));
+ itrange(it, 0) = new;
+ }
+
+ /* new range might cover existing ranges (loop header lives),
+ * check and succesively merge */
+ fst = &itrange(it, 0);
+ n = 0;
+ for (int i = 1; i < it->nrange; ++i) {
+ struct range other = itrange(it, i);
+ if (fst->to >= other.from) {
+ fst->to = fst->to > other.to ? fst->to : other.to;
+ ++n;
+ } else break;
+ }
+
+ if (n > 0) {
+ for (int i = 1; i + n < it->nrange; ++i)
+ itrange(it, i) = itrange(it, i+n);
+ if (it->nrange > 2 && it->nrange - n <= 2) {
+ struct range *dyn = it->_dyn;
+ memcpy(it->_inl, dyn, (it->nrange - n) * sizeof *dyn);
+ xbfree(dyn);
+ }
+ it->nrange -= n;
+ }
+}
+
+static void
+usereg(struct rega *ra, int reg, struct block *blk, int pos)
+{
+ struct fixinterval *fxit;
+ if (rstest(mctarg->rglob, reg)) return; /* regalloc never allocates globally live regs, so don't need intervals for those */
+ fxit = alloc(ra->arena, sizeof *fxit, 0);
+ fxit->next = ra->intervals.fixed;
+ fxit->range = (struct range) {blk->inumstart, pos};
+ fxit->rs = 1<<reg;
+ ra->intervals.fixed = fxit;
+}
+
+static void
+defreg(struct rega *ra, int reg, int pos) {
+ if (rstest(mctarg->rglob, reg)) return;
+ for (struct fixinterval *fxit = ra->intervals.fixed; fxit; fxit = fxit->next) {
+ if (fxit->rs == 1<<reg) {
+ assert(fxit->range.from <= pos);
+ fxit->range.from = pos;
+ // DBG(">>>REG %s range @%d: %d-%d\n", mctarg->rnames[reg], fxit->range.from.blk, fxit->range.from.ins, fxit->range.to.ins);
+ return;
+ }
+ }
+ assert(0&&"def reg not used");
+}
+
+/* lifetime interval construction: https://c9x.me/compile/bib/Wimmer10a.pdf */
+static void
+buildintervals(struct rega *ra)
+{
+ extern int ninstr;
+ struct block *blk, *last;
+ struct bitset **livein = alloc(ra->arena, ra->fn->nblk * sizeof *livein, 0);
+ size_t bssize = BSSIZE(ninstr);
+ for (int i = 0; i < ra->fn->nblk; ++i)
+ livein[i] = allocz(ra->arena, bssize * sizeof *livein[i], 0);
+ ra->intervals.temps = allocz(ra->arena, ninstr * sizeof *ra->intervals.temps, 0);
+
+ numberinstrs(ra->fn);
+ /* visit blocks in reverse, to build lifetime intervals */
+ blk = last = ra->fn->entry->lprev;
+ do {
+ struct instr *ins = NULL;
+ struct bitset *live = livein[blk->id];
+ struct block *suc = blk->s1;
+ // DBG("--- @%d ---\n",blk->id);
+
+ /* live = union of successor.liveIn for each successor of b */
+ if (blk->s1) bsunion(live, livein[blk->s1->id], bssize);
+ if (blk->s2) bsunion(live, livein[blk->s2->id], bssize);
+
+ /* for each phi function phi of successors of b do
+ * live.add(phi.inputOf(b))
+ */
+ if (suc) do {
+ int predno;
+ for (predno = 0; blkpred(suc, predno) != blk; ++predno) ;
+ for (int i = 0; i < suc->phi.n; ++i) {
+ struct instr *phi = &instrtab[suc->phi.p[i]];
+ union ref *arg = &phitab.p[phi->l.i][predno];
+ assert(arg->t == RTMP);
+ // DBG("from phi set live %%%d\n", arg->i);
+ bsset(live, arg->i);
+ }
+ } while (suc != blk->s2 && (suc = blk->s2));
+
+ /* for each opd in live do
+ * intervals[opd].addRange(b.from, b.to)
+ */
+ for (uint i = 0; bsiter(&i, live, bssize); ++i) {
+ // DBG("itretave %%%d\n",i );
+ addrange(&ra->intervals, i, (struct range){blk->inumstart, blk->inumstart + blk->ins.n + 2}, -1);
+ }
+
+ /* for each operation op of b in reverse order do */
+ union ref queue[8] = { blk->jmp.arg[0], blk->jmp.arg[1] };
+ goto Branchopd;
+ for (int curi, pos ; curi >= 0; --curi) {
+ int out = blk->ins.p[curi], reghint;
+ ins = &instrtab[out];
+ pos = blk->inumstart + 1 + curi;
+ /* for each output operand opd of op do
+ * intervals[opd].setFrom(op.id)
+ * live.remove(opd)
+ */
+ reghint = ins && ins->op == Ocopy && ins->l.t == RREG ? ins->l.i : -1;
+ if (!intervaldef(&ra->intervals, out, blk, pos, reghint)) {
+ if (insrescls(*ins) && ins->op != Omove && !ins->keep && !(ins->op == Ocopy && ins->l.t == RREG)) {
+ /* dead */
+ *ins = mkinstr(Onop,0,);
+ }
+ }
+ bsclr(live, out);
+
+ /* gather fixed intervals */
+ if (ins->op == Omove) {
+ assert(ins->l.t == RREG);
+ defreg(ra, ins->l.i, pos);
+ } else if (ins->op == Ocall) {
+ struct call *call = &calltab.p[ins->r.i];
+ regset rclob = (gpregset | fpregset) &~ (mctarg->rglob | mctarg->rcallee);
+ ra->fn->isleaf = 0;
+
+ for (int i = 0; i < 2; ++i) {
+ if (call->abiret[i].ty.bits) {
+ int reg = call->abiret[i].reg;
+ rsclr(&rclob, reg);
+ defreg(ra, reg, pos);
+ }
+ }
+ if (rclob) {
+ struct fixinterval *fxit = alloc(ra->arena, sizeof *fxit, 0);
+ fxit->next = ra->intervals.fixed;
+ fxit->range = (struct range) {pos, pos};
+ fxit->rs = rclob;
+ ra->intervals.fixed = fxit;
+ }
+ for (int j = call->narg - 1; j >= 0; --j) {
+ int reg = call->abiarg[j].reg;
+ if (reg >= 0) {
+ usereg(ra, reg, blk, pos);
+ }
+ }
+ }
+
+
+ /* for each input operand opd of op do
+ * intervals[opd].addRange(b.from, op.id)
+ * live.add(opd)
+ */
+ reghint = (ins && ins->op == Omove && ins->l.t == RREG) ? ins->l.i : -1;
+ queue[0] = ins->r, queue[1] = ins->l;
+ if (0) {
+ Branchopd:
+ reghint = -1;
+ curi = blk->ins.n;
+ pos = blk->inumstart + blk->ins.n + 1;
+ }
+ for (int nqueue = ins && ins->op == Omove ? 1 : 2; nqueue > 0;) {
+ union ref r = queue[--nqueue];
+
+ /* do not allocate a reg for a cmp op used as branch argument, since it's a pseudo op */
+ if (curi == blk->ins.n && blk->jmp.t == Jb && r.t == RTMP && instrtab[r.i].keep)
+ continue;
+
+ if (r.t == RTMP) {
+ addrange(&ra->intervals, r.i, (struct range){blk->inumstart, pos}, reghint);
+ bsset(live, r.i);
+ } else if (r.t == RREG) {
+ usereg(ra, r.i, blk, pos);
+ } else if (r.t == RADDR) {
+ reghint = -1;
+ queue[nqueue++] = addrht[r.i].base;
+ queue[nqueue++] = addrht[r.i].index;
+ }
+ }
+ }
+
+ /* for each phi function phi of b do
+ * live.remove(phi.output)
+ */
+ for (int i = 0; i < blk->phi.n; ++i)
+ bsclr(live, blk->phi.p[i]);
+
+ /* if b is loop header then
+ * loopEnd = last block of the loop starting at b
+ * for each opd in live do
+ * &ra->intervals[opd].addRange(b.from, loopEnd.to)
+ */
+ struct block *loopend = NULL;
+ for (int i = 0; i < blk->npred; ++i) {
+ struct block *pred = blkpred(blk, i);
+ if (pred->id > blk->id)
+ loopend = loopend && loopend->id > pred->id ? loopend : pred;
+ }
+ if (loopend) {
+ // DBG("i'm loop header - @%d (to @%d)\n", blk->id, loopend->id);
+ for (uint opd = 0; bsiter(&opd, live, bssize); ++opd) {
+ // DBG(" i have live %%%d\n", opd);
+ addrange(&ra->intervals, opd, (struct range){blk->inumstart, loopend->inumstart + loopend->ins.n+1}, -1);
+ /* struct interval *lv = imap_get(&ra->intervals.temps, opd);
+ for (int i = 0; i < lv->n; ++i) {
+ struct range r = itrange(lv, i);
+ // DBG(" @%d:%d - @%d:%d\n", r.from.blk, r.from.ins, r.to.blk, r.to.ins);
+ } */
+ }
+ }
+ } while ((blk = blk->lprev) != last);
+
+ for (int var = 0; var < ninstr; ++var) {
+ struct interval *it = &ra->intervals.temps[var];
+ if (!it->nrange) continue;
+ DBG("lifetime of %%%d: ", var);
+ for (int i = 0; i < it->nrange; ++i) {
+ struct range r = itrange(it, i);
+ DBG("[%d,%d)%s", r.from, r.to, i < it->nrange-1 ? ", " : "");
+ }
+ DBG("\n");
+ }
+ for (struct fixinterval *fx = ra->intervals.fixed; fx; fx = fx->next) {
+ DBG("fixed {");
+ for (int r = 0; rsiter(&r, fx->rs); ++r)
+ DBG("%s,", mctarg->rnames[r]);
+ DBG("}: [%d,%d)\n", fx->range.from, fx->range.to);
+ }
+}
+
+static bool
+itcontainspos(const struct interval *it, int pos)
+{
+ for (int i = 0; i < it->nrange; ++i) {
+ struct range r = itrange(it, i);
+ if (r.from > pos) return 0;
+ if (pos < r.to) return 1;
+ }
+ return 0;
+}
+
+/* quicksort */
+static void
+sortintervals(struct interval **xs, int lo, int hi)
+{
+ assert(lo >= 0 && hi >= 0);
+ while (lo < hi) {
+ /* partition */
+ int i = lo - 1, p = hi + 1,
+ pivot = itrange(xs[lo], 0).from;
+ for (;;) {
+ struct interval *tmp;
+ do ++i; while (itrange(xs[i], 0).from < pivot);
+ do --p; while (itrange(xs[p], 0).from > pivot);
+ if (i >= p) break;
+ /* swap */
+ tmp = xs[i];
+ xs[i] = xs[p];
+ xs[p] = tmp;
+ }
+ /* recur */
+ if (p + 1 >= hi) {
+ hi = p;
+ } else {
+ if (lo < p)
+ sortintervals(xs, lo, p);
+ lo = p + 1;
+ }
+ }
+}
+
+static void
+linearscan(struct rega *ra)
+{
+ struct intervals *intervals = &ra->intervals;
+ int nunhandled = 0;
+ struct interval **unhandled = NULL;
+ struct interval *active = NULL, *inactive = NULL, *handled = NULL;
+
+ if (!intervals->count) return;
+ /* sort intervals */
+ {
+ extern int ninstr;
+ unhandled = alloc(ra->arena, sizeof *unhandled * intervals->count, 0);
+
+ for (int i = 0; i < ninstr; ++i) {
+ if (!intervals->temps[i].nrange) continue;
+ unhandled[nunhandled++] = &intervals->temps[i];
+ }
+ assert(nunhandled == intervals->count);
+ sortintervals(unhandled, 0, nunhandled-1);
+ }
+
+ /* LINEAR SCAN */
+ for (struct interval **pcurrent = unhandled; nunhandled-- > 0; ++pcurrent) {
+ struct interval *current = *pcurrent;
+ int pos = itrange(current, 0).from;
+
+ /* Expire old intervals */
+
+ /* check for intervals in active that are handled or inactive */
+ for (struct interval **lnk = &active, *it = *lnk, *next; (next = it?it->next:0), it; it = next) {
+ //DBG("< im active %%%d\n", intervals->temps.mb.k[it - intervals->temps.v]);
+ /* ends before position? */
+ if (itrange(it, it->nrange-1).to <= pos) {
+ /* move from active to handled */
+ *lnk = next;
+ it->next = handled;
+ handled = it;
+ if (it->alloc.t == AREG) {
+ ra->free |= 1<<it->alloc.a;
+ //DBG(" unblock %s\n", mctarg->rnames[it->reg]);
+ } else if (it->alloc.t == ASTACK) {
+ freestk(ra, it->alloc.a);
+ }
+ } else
+ /* it does not cover position? */
+ if (!itcontainspos(it, pos)) {
+ /* move from active to inactive */
+ *lnk = next;
+ it->next = inactive;
+ inactive = it;
+ if (it->alloc.t == AREG) {
+ ra->free |= 1<<it->alloc.a;
+ DBG(" >> %%%zd unblock %s\n", it-ra->intervals.temps, mctarg->rnames[it->alloc.a]);
+ }
+ } else lnk = &it->next;
+ }
+
+ /* check for intervals in inactive that are handled or active */
+ for (struct interval **lnk = &inactive, *it = *lnk, *next; (next = it?it->next:0), it; it = next) {
+ //DBG("< im inactive %%%d\n", intervals->temps.mb.k[it - intervals->temps.v]);
+ /* ends before position? */
+ if (itrange(it, it->nrange-1).to <= pos) {
+ /* move from inactive to handled */
+ *lnk = next;
+ it->next = handled;
+ handled = it;
+ if (it->alloc.t == ASTACK) {
+ freestk(ra, it->alloc.a);
+ }
+ } else
+ /* it covers position? */
+ if (itcontainspos(it, pos)) {
+ /* move from inactive to active */
+ *lnk = next;
+ it->next = active;
+ active = it;
+ if (it->alloc.t == AREG) {
+ assert(rstest(ra->free, it->alloc.a));
+ ra->free &= ~(1<<it->alloc.a);
+ DBG(" << %%%zd reblock %s\n", it-ra->intervals.temps, mctarg->rnames[it->alloc.a]);
+ }
+ } else lnk = &it->next;
+ }
+
+ /* find a register for current */
+ {
+ int this = current - intervals->temps;
+ regset free = ra->free & (current->fpr ? fpregset : gpregset);
+ struct instr *ins = &instrtab[this];
+ int reg = 0;
+ int end = itrange(current, current->nrange-1).to;
+
+ /* exclude regs from overlapping fixed intervals */
+ for (struct fixinterval *fxit = intervals->fixed; fxit; fxit = fxit->next) {
+ if (fxit->range.to <= pos) {
+ intervals->fixed = fxit->next;
+ continue;
+ } else if (fxit->range.from >= end) {
+ break;
+ }
+
+ for (int i = 0; i < current->nrange; ++i) {
+ if (rangeoverlap(fxit->range, itrange(current, i))) {
+ free &=~ fxit->rs;
+ }
+ }
+ }
+ /* exclude regs from overlapping inactive intervals */
+ for (struct interval *it = inactive; it; it = it->next) {
+ if (it->alloc.t == AREG && intervaloverlap(it, current)) {
+ rsclr(&free, it->alloc.a);
+ }
+ }
+ /* for 2-address instrs, exclude reg from 2nd arg */
+ if (ins->inplace && opnarg[ins->op] == 2) {
+ int xreg;
+ if (ins->r.t == RREG) rsclr(&free, ins->r.i);
+ else if (ins->r.t == RTMP && (xreg = instrtab[ins->r.i].reg))
+ rsclr(&free, xreg-1);
+ }
+
+ if (!free) {
+ /* spill */
+ current->alloc = allocstk(ra);
+ DBG("%%%d got stk%d\n", this, current->alloc.a);
+ /* move current to active */
+ current->next = active;
+ active = current;
+ continue;
+ }
+
+ /* have free regs, try to use hint */
+ if (current->rhint >= 0)
+ DBG("have hint %s for %%%zd\n",
+ mctarg->rnames[current->rhint], current - intervals->temps);
+ if (current->rhint >= 0 && rstest(free, current->rhint)) {
+ DBG(" (used hint)\n");
+ reg = current->rhint;
+ goto GotReg;
+ } else {
+ /* for two-address instructions, try to use the reg of left arg */
+ if (ins->op != Ophi && (opnarg[ins->op] == 1 || (opnarg[ins->op] == 2 && ins->inplace))) {
+ DBG(" %%%d try %d,%d\n", this, ins->l.t,ins->l.i);
+ if (ins->l.t == RREG && rstest(free, reg = ins->l.i))
+ goto GotReg;
+ if (ins->l.t == RTMP)
+ if ((reg = instrtab[ins->l.i].reg-1) >= 0)
+ if (rstest(free, reg))
+ goto GotReg;
+ /* for phi, try to use reg of any arg */
+ } else if (ins->op == Ophi) {
+ union ref *arg = phitab.p[ins->l.i];
+ for (int i = 0; i < xbcap(arg); ++i) {
+ if (arg->t == RREG && rstest(free, reg = arg->i)) goto GotReg;
+ if (arg->t == RTMP)
+ if ((reg = instrtab[arg->i].reg-1) >= 0)
+ if (rstest(free, reg))
+ goto GotReg;
+ }
+ }
+
+ /* prefer caller-saved registers */
+ if (free &~ mctarg->rcallee) free &=~ mctarg->rcallee;
+
+ for (reg = 0; !rstest(free, reg); ++reg);
+ }
+ GotReg:
+ current->alloc = areg(reg);
+ ins->reg = reg + 1;
+ DBG("%%%d got %s\n", this, mctarg->rnames[reg]);
+ rsclr(&ra->free, reg);
+ rsset(&ra->fn->regusage, reg);
+
+ //if current has a register assigned then add current to active
+ current->next = active;
+ active = current;
+ }
+ }
+}
+
+/* replace temps with physical regs, add loads & stores for spilled temps */
+static bool
+devirt(struct rega *ra, struct block *blk)
+{
+ bool allnops = 1;
+ struct function *fn = ra->fn;
+ for (int curi = 0; curi < blk->ins.n; ++curi) {
+ int temp = blk->ins.p[curi];
+ struct instr *ins = &instrtab[temp];
+ struct interval *it;
+ struct alloc *alloc;
+ union ref *argref[4];
+ int nargref = 0;
+ int nspill = 0;
+
+ /* devirtualize ref args */
+ for (int i = 0; i < 2; ++i) {
+ union ref *r = &i[&ins->l];
+ if (r->t == RADDR) {
+ /* XXX mutating hashed addr.. should be fine though (because
+ * new RADDRs shouldn't be created after regalloc)
+ * maybe hashing them in the first place is unnecessary */
+ struct addr *a = &addrht[r->i];
+ argref[nargref++] = &a->base;
+ argref[nargref++] = &a->index;
+ } else {
+ argref[nargref++] = r;
+ }
+ }
+ for (int i = 0; i < nargref; ++i) {
+ static uchar cls2load[] = {
+ [KI4] = Oloads4, [KI8] = Oloadi8, [KF4] = Oloadf4, [KF8] = Oloadf8, [KPTR] = 0
+ };
+ cls2load[KPTR] = targ_64bit ? Oloadi8 : Oloads4;
+ union ref *r = argref[i];
+ int tr;
+ if (r->t == RTMP) {
+ alloc = (it = &ra->intervals.temps[r->i]) && it->nrange ? &it->alloc : NULL;
+ if (alloc->t == ASTACK && ins->op == Omove) {
+ /* move [reg], [stk] -> [reg] = load [stk] */
+ assert(r == &ins->r && ins->l.t == RREG);
+ ins->reg = ins->l.i+1;
+ ins->op = cls2load[ins->cls];
+ ins->r = NOREF;
+ addstkslotref(temp, alloc->a*8);
+ } else if (alloc->t == ASTACK && ins->op == Ocopy) {
+ /* [reg] = copy [stk] -> [reg] = load [stk] */
+ assert(r == &ins->l && ins->reg);
+ ins->op = cls2load[ins->cls];
+ addstkslotref(temp, alloc->a*8);
+ } else if (alloc->t == ASTACK) {
+ /* ref was spilled, gen load to scratch register and use it */
+ struct instr ld = {.cls = insrescls(instrtab[r->i])};
+ int reg = kisint(ld.cls) ? mctarg->gprscratch : mctarg->fprscratch;
+ bool dosave;
+ /* pick scratch register, or any register that doesn't conflict with this instr's srcs/dst */
+ if (nspill > 0) {
+ for (reg = kisflt(ld.cls) ? mctarg->fpr0 : mctarg->gpr0;; ++reg) {
+ if (reg == ins->reg-1) continue;
+ for (int j = 0; j < i; ++j)
+ if (argref[j]->t == RREG && argref[j]->i == reg) continue;
+ break;
+ }
+ /* if not the designated scratch register, we need to save+restore */
+ if ((dosave = rstest(fn->regusage, reg) || rstest(mctarg->rcallee, reg))) {
+ insertinstr(blk, curi++, mkinstr(Oxsave, 0, .l = mkref(RREG, reg)));
+ }
+ }
+ ld.reg = reg+1;
+ ld.op = cls2load[ld.cls];
+ addstkslotref(insertinstr(blk, curi++, ld).i, alloc->a*8);
+ *r = mkref(RREG, reg);
+ if (nspill > 0 && dosave) {
+ insertinstr(blk, curi+1, mkinstr(Oxrestore, 0, .l = mkref(RREG, reg)));
+ }
+ ++nspill;
+ } else if ((tr = instrtab[r->i].reg)) {
+ assert(alloc && alloc->t == AREG && alloc->a == tr-1);
+ *r = mkref(RREG, tr-1);
+ }
+ }
+ }
+ if (nspill > 0) assert(ins->op != Ocall);
+
+ /* devirtualize destination */
+ alloc = temp < ra->intervals.count && (it = &ra->intervals.temps[temp]) && it->nrange ? &it->alloc : NULL;
+ if (alloc && alloc->t == ASTACK) {
+ int store = Ostore1 + ilog2(cls2siz[insrescls(*ins)]);
+ /* t was spilled, gen store */
+ if (ins->op == Ocopy) {
+ ins->op = store;
+ ins->r = ins->l;
+ addstkslotref(temp, alloc->a*8);
+ } else {
+ int reg = kisint(insrescls(*ins)) ? mctarg->gprscratch : mctarg->fprscratch;
+ assert(nspill == 0);
+ ins->reg = reg+1;
+ addstkslotref(
+ insertinstr(blk, ++curi, mkinstr(store, 0, .r = mkref(RREG, reg))).i,
+ alloc->a*8);
+ }
+ } else if (!ins->reg && insrescls(*ins) && ins->op != Omove && !ins->keep) {
+ /* dead */
+ Nop:
+ ins->op = Onop;
+ } else if (ins->op == Omove && ins->r.t == RREG && ins->l.i == ins->r.i) {
+ /* move r1,r2 / r1=r2 */
+ goto Nop;
+ } else if (ins->op == Ocopy && ins->l.t == RREG && ins->reg-1 == ins->l.i) {
+ /* r1 = copy r2 / r1=r2 */
+ goto Nop;
+ } else if (ins->inplace && ins->l.t == RREG && ins->reg && ins->reg-1 != ins->l.i) {
+ /* fixup in-place (two-address) instructions */
+ allnops = 0;
+ insertinstr(blk, curi++, mkmove(ins->cls, ins->reg-1, ins->l.i));
+ ins->l.i = ins->reg-1;
+ } else if (ins->op != Onop) allnops = 0;
+ }
+
+ return allnops;
+}
+
+static void
+fini(struct rega *ra)
+{
+ int id = 0;
+ struct function *fn = ra->fn;
+ struct block *blk = fn->entry;
+
+ do {
+ bool allnops;
+
+ blk->id = id++;
+ allnops = devirt(ra, blk);
+
+ /* remove no-op blocks */
+ if (allnops && !blk->s2 && blk->npred > 0) {
+ bool delet = 1;
+ for (int i = 0; i < blk->npred; ++i) {
+ struct block *p = blkpred(blk, i);
+ if (p->s2 && !blk->s1)
+ delet = 0;
+ }
+ for (int i = 0; i < blk->npred; ++i) {
+ struct block *p = blkpred(blk, i);
+ if (!p->s2 && !blk->s1) {
+ /* simplify:
+ *
+ * @p:
+ * ...
+ * b @blk
+ * @blk:
+ * NOP
+ * ret
+ */
+ assert(p->s1 == blk);
+ p->jmp.t = Jret;
+ p->s1 = NULL;
+ } else if (blk->s1) {
+ /* simplify:
+ *
+ * @p:
+ * ...
+ * b %x, @blk, @other
+ * @blk:
+ * NOP
+ * b @next
+ */
+ struct block *next = blk->s1;
+ if (p->s1 == blk) p->s1 = next;
+ else if (p->s2 == blk) p->s2 = next;
+ else continue;
+ for (int i = 0; i < next->npred; ++i) {
+ if (blkpred(next, i) == blk) {
+ blkpred(next, i) = p;
+ goto NextPred;
+ }
+ }
+ addpred(next, p);
+ }
+ NextPred:;
+ }
+ if (delet) {
+ freeblk(fn, blk);
+ --id;
+ }
+ }
+ } while ((blk = blk->lnext) != fn->entry);
+}
+
+void
+regalloc(struct function *fn)
+{
+ static union ref *stkslotrefsbuf[64];
+ struct rega ra = {fn, .arena = fn->arena};
+ struct block *blk, *last;
+
+ /* setup */
+ if (!fpregset || !gpregset) {
+ for (int r = 0; r < MAXREGS; ++r) {
+ if (isfpr(r))
+ rsset(&fpregset, r);
+ else if (isgpr(r))
+ rsset(&gpregset, r);
+ }
+ }
+ ra.free = (gpregset | fpregset) &~ (mctarg->rglob | (1ull<<mctarg->gprscratch) | (1ull<<mctarg->fprscratch));
+ memset(ra.freestk, 0xFF, sizeof ra.freestk);
+ fn->regusage = 0;
+ fn->stksiz = alignup(fn->stksiz, 8);
+ fn->isleaf = 1;
+ vinit(&stkslotrefs, stkslotrefsbuf, arraylength(stkslotrefsbuf));
+
+ /* put into reverse post order */
+ sortrpo(fn);
+
+ /* fix liveness ranges */
+ fixlive(fn);
+
+ /* transform into CSSA */
+ fixcssa(fn);
+
+ fillblkids(fn);
+
+ if (ccopt.dbg.r) {
+ efmt("<< Before linear scan >>\n");
+ irdump(fn);
+ }
+
+ /* linear scan: build lifetime intervals */
+ buildintervals(&ra);
+
+ /* linear scan: assign physical registers and stack slots */
+ linearscan(&ra);
+
+ /* get out of SSA */
+ blk = last = fn->entry->lprev;
+ do {
+ if (blk->id < 0) continue;
+ for (int i = 0; i < blk->npred; ++i) {
+ lowerphis(&ra, blkpred(blk, i), blk);
+ }
+ vfree(&blk->phi);
+ } while ((blk = blk->lprev) != last);
+
+ /* devirtualize & final cleanup */
+ fini(&ra);
+
+ for (struct interval *it = ra.intervals.temps; ra.intervals.count > 0; ++it) {
+ if (it->nrange > 2) xbfree(it->_dyn);
+ if (it->nrange > 0) --ra.intervals.count;
+ }
+
+ fn->stksiz += ra.maxstk*8;
+ if (fn->stksiz > 1<<24) error(NULL, "'%s' stack frame too big", fn->name);
+ while (stkslotrefs.n) {
+ union ref *adr = stkslotrefs.p[--stkslotrefs.n];
+ *adr = mkaddr((struct addr) { .base = mkref(RREG, mctarg->bpr), .disp = -fn->stksiz + adr->i });
+ }
+ vfree(&stkslotrefs);
+
+ if (ccopt.dbg.r) {
+ DBG("<< After regalloc >>\n");
+ irdump(fn);
+ }
+}
+
+/* vim:set ts=3 sw=3 expandtab: */
diff --git a/ir/ssa.c b/ir/ssa.c
new file mode 100644
index 0000000..9b89878
--- /dev/null
+++ b/ir/ssa.c
@@ -0,0 +1,58 @@
+#include "ir.h"
+
+/* require use, keeps use */
+void
+copyopt(struct function *fn)
+{
+ struct block *blk = fn->entry;
+
+ do {
+ for (int i = 0; i < blk->ins.n; ++i) {
+ union ref var = mkref(RTMP, blk->ins.p[i]);
+ struct instr *ins = &instrtab[var.i];
+ enum irclass k;
+
+ if (ins->op == Ocopy) {
+ union ref arg = ins->l;
+ if (arg.t == RTMP) k = insrescls(instrtab[arg.i]);
+ else if (arg.t == RICON) k = cls2siz[ins->cls] == 4 ? KI4 : KI8;
+ else if (arg.t == RXCON) k = isnumcon(arg) ? conht[arg.i].cls : KPTR;
+ else continue;
+ if (ins->cls != k) continue;
+
+ replcuses(var, arg);
+ *ins = mkinstr(Onop,0,);
+ deluses(var.i);
+ }
+ }
+ delnops(blk);
+ } while ((blk = blk->lnext) != fn->entry);
+}
+
+void
+filluses(struct function *fn)
+{
+ extern int ninstr;
+ struct block *blk = fn->entry;
+
+ for (int i = 0; i < ninstr; ++i)
+ deluses(i);
+
+ do {
+ for (int i = 0; i < blk->phi.n; ++i) {
+ int ins = blk->phi.p[i];
+ union ref *phi = phitab.p[instrtab[ins].l.i];
+ for (int i = 0; i < blk->npred; ++i)
+ adduse(blk, ins, phi[i]);
+ }
+ for (int i = 0; i < blk->ins.n; ++i) {
+ int ins = blk->ins.p[i];
+ adduse(blk, ins, instrtab[ins].l);
+ adduse(blk, ins, instrtab[ins].r);
+ }
+ adduse(blk, USERJUMP, blk->jmp.arg[0]);
+ adduse(blk, USERJUMP, blk->jmp.arg[1]);
+ } while ((blk = blk->lnext) != fn->entry);
+}
+
+/* vim:set ts=3 sw=3 expandtab: */