aboutsummaryrefslogtreecommitdiffhomepage
path: root/ir/mem2reg.c
diff options
context:
space:
mode:
Diffstat (limited to 'ir/mem2reg.c')
-rw-r--r--ir/mem2reg.c345
1 files changed, 345 insertions, 0 deletions
diff --git a/ir/mem2reg.c b/ir/mem2reg.c
new file mode 100644
index 0000000..f9ad245
--- /dev/null
+++ b/ir/mem2reg.c
@@ -0,0 +1,345 @@
+#include "ir.h"
+#include <stdlib.h> /* qsort */
+
+static const uchar loadszcls[] = {
+ [Oloads8 - Oloads8] = 1|KI32<<4, [Oloadu8 - Oloads8] = 1|KI32<<4,
+ [Oloads16 - Oloads8] = 2|KI32<<4, [Oloadu16 - Oloads8] = 2|KI32<<4,
+ [Oloads32 - Oloads8] = 4|KI32<<4, [Oloadu32 - Oloads8] = 4|KI32<<4,
+ [Oloadi64 - Oloads8] = 8|KI64<<4,
+ [Oloadf32 - Oloads8] = 4|KF32<<4,
+ [Oloadf64 - Oloads8] = 8|KF64<<4,
+};
+static const uchar load2ext[] = {
+ [Oloads8 - Oloads8] = Oexts8, [Oloadu8 - Oloads8] = Oextu8,
+ [Oloads16 - Oloads8] = Oexts16, [Oloadu16 - Oloads8] = Oextu16,
+ [Oloads32 - Oloads8] = Oexts32, [Oloadu32 - Oloads8] = Oextu32,
+ [Oloadi64 - Oloads8] = Ocopy,
+};
+#define loadsz(o) (loadszcls[(o) - Oloads8] & 0xF)
+#define loadcls(o) (loadszcls[(o) - Oloads8] >> 4)
+#define load2ext(o) (load2ext[(o) - Oloads8])
+#define storesz(o) (1 << ((o) - Ostore8))
+
+/* Implements algorithm in 'Simple and Efficient Construction of Static Single Assignment' (Braun et al) */
+
+struct ssabuilder {
+ imap_of(union ref *) curdefs; /* map of var to (map of block to def of var) */
+ struct bitset *sealed, /* set of sealed blocks */
+ *marked; /* blocks marked, for 'Marker Algorithm' in the paper */
+ int lastvisit;
+ int nblk;
+};
+
+static union ref readvar(struct ssabuilder *, int var, enum irclass, struct block *);
+
+static union ref
+deltrivialphis(struct ssabuilder *sb, int var, struct block *blk, union ref phiref)
+{
+ assert(instrtab[phiref.i].op == Ophi);
+ union ref *args = phitab.p[instrtab[phiref.i].l.i];
+ union ref same = {0};
+ for (int i = 0; i < blk->npred; ++i) {
+ if (args[i].bits == same.bits || args[i].bits == phiref.bits) {
+ continue; /* unique value or self-reference */
+ } if (same.bits != 0) {
+ return phiref; /* non-trivial */
+ }
+ same = args[i];
+ }
+
+ if (same.bits == 0)
+ same = UNDREF; /* the phi is unreachable or in the start block */
+
+ /* replace uses */
+ replcuses(phiref, same);
+ union ref **pcurdefs = imap_get(&sb->curdefs, var);
+ assert (pcurdefs);
+ for (int i = blk->id; i < sb->nblk; ++i) {
+ if ((*pcurdefs)[i].bits == phiref.bits)
+ (*pcurdefs)[i] = same;
+ }
+
+ /* stops infinite recursion and marks phi for removal */
+ instrtab[phiref.i].op = Onop;
+
+ /* recursively try to remove all phi users as they might have become trivial */
+Redo:
+ for (struct use *use = instruse[phiref.i], *uend = use + instrnuse[phiref.i]; use < uend; ++use) {
+ if (use->u != USERJUMP && instrtab[use->u].op == Ophi && use->u != phiref.i) {
+ union ref it = mkref(RTMP, use->u);
+ union ref vphi2 = deltrivialphis(sb, var, use->blk, it);
+ if (vphi2.bits != it.bits) {
+ same = vphi2;
+ /* deletion happened so phiref use may have changed */
+ goto Redo;
+ }
+ }
+ }
+ deluses(phiref.i);
+
+ return same;
+}
+
+static union ref
+addphiargs(struct ssabuilder *sb, int var, enum irclass cls, struct block *blk, union ref phiref)
+{
+ union ref *args = phitab.p[instrtab[phiref.i].l.i];
+ for (int i = 0; i < blk->npred; ++i) {
+ struct block *pred = blkpred(blk, i);
+ args[i] = readvar(sb, var, cls, pred);
+ adduse(blk, phiref.i, args[i]);
+ }
+ instrtab[phiref.i].r.bits = 0;
+ return deltrivialphis(sb, var, blk, phiref);
+}
+
+static void
+writevar(struct ssabuilder *sb, int var, struct block *blk, union ref val)
+{
+ union ref **pcurdefs;
+ if (!(pcurdefs = imap_get(&sb->curdefs, var))) {
+ pcurdefs = imap_set(&sb->curdefs, var, xcalloc(sb->nblk * sizeof(union ref)));
+ }
+ if (val.t == RTMP) assert(instrtab[val.i].op != Onop);
+ (*pcurdefs)[blk->id] = val;
+}
+
+enum { RPENDINGPHI = 7 };
+static union ref
+readvarrec(struct ssabuilder *sb, int var, enum irclass cls, struct block *blk)
+{
+ union ref val;
+ assert(blk->npred > 0);
+ if (!bstest(sb->sealed, blk->id)) { /* unsealed block */
+ /* add pending phi */
+ val = insertphi(blk, cls);
+ instrtab[val.i].r = mkref(RPENDINGPHI, var);
+ } else if (blk->npred == 1) { /* trivial case when block has a single predecessor */
+ val = readvar(sb, var, cls, blkpred(blk, 0));
+ } else if (!bstest(sb->marked, blk->id)) {
+ /* Marker Algorithm to optimize the common case where a control flow join doesn't
+ * need a phi function for the variable: mark the block in case we recursively reach
+ * it again, collect defs from predecessors, only create a phi if they differ.
+ * This avoids creating temporary trivial phi functions in acyclic data flow
+ */
+ bsset(sb->marked, blk->id);
+ for (int i = 0; i < blk->npred; ++i) {
+ struct block *pred = blkpred(blk, i);
+ union ref it = readvar(sb, var, cls, pred);
+ if (!bstest(sb->marked, blk->id)) {
+ /* recursion reached this blk again, use its phi */
+ union ref **pcurdefs = imap_get(&sb->curdefs, var);
+ /* must have called writevar */
+ assert(*pcurdefs && (*pcurdefs)[blk->id].bits);
+ return (*pcurdefs)[blk->id];
+ }
+ if (i == 0) val = it;
+ else if (it.bits != val.bits) /* found a different definition, must add phi */
+ goto AddPhi;
+ }
+ bsclr(sb->marked, blk->id);
+ } else { /* reached block while recursing */
+ AddPhi:
+ val = insertphi(blk, cls);
+ writevar(sb, var, blk, val);
+ val = addphiargs(sb, var, cls, blk, val);
+ bsclr(sb->marked, blk->id);
+ return val;
+ }
+ writevar(sb, var, blk, val);
+ return val;
+}
+
+static union ref
+readvar(struct ssabuilder *sb, int var, enum irclass cls, struct block *blk)
+{
+ union ref **pcurdefs;
+ if ((pcurdefs = imap_get(&sb->curdefs, var)) && (*pcurdefs)[blk->id].bits)
+ return (*pcurdefs)[blk->id];
+ if (blk->npred == 0) /* entry block, var is read before being written to */
+ return UNDREF;
+ return readvarrec(sb, var, cls, blk);
+}
+
+static bool
+trysealrec(struct ssabuilder *sb, struct block *blk)
+{
+Recur:
+ if (bstest(sb->sealed, blk->id)) return 1;
+ if (blk->id > sb->lastvisit) return 0;
+ markvisited(blk);
+ for (int i = 0; i < blk->npred; ++i) {
+ struct block *p = blkpred(blk, i);
+ if (wasvisited(p)) continue;
+ if (p->id > sb->lastvisit) return 0;
+ }
+
+ bsset(sb->sealed, blk->id);
+ for (int i = 0; i < blk->phi.n; ++i) {
+ struct instr *ins = &instrtab[blk->phi.p[i]];
+ if (ins->r.t == RPENDINGPHI)
+ addphiargs(sb, ins->r.i, ins->cls, blk, mkref(RTMP, blk->phi.p[i]));
+ }
+ if (blk->s1 && !blk->s2) {
+ blk = blk->s1;
+ goto Recur;
+ } else if (blk->s1 && blk->s2) {
+ trysealrec(sb, blk->s1);
+ blk = blk->s2;
+ goto Recur;
+ }
+ return 1;
+}
+
+static void
+tryseal(struct ssabuilder *sb, struct block *blk)
+{
+ startbbvisit();
+ trysealrec(sb, blk);
+}
+
+static int
+blkfindins(struct block *blk, int ins)
+{
+ for (int i = 0; i < blk->phi.n; ++i)
+ if (blk->phi.p[i] == ins) return -1;
+ for (int i = 0; i < blk->ins.n; ++i)
+ if (blk->ins.p[i] == ins) return i;
+ assert(0 && "ins not in blk");
+}
+
+static int
+cmpuse(const void *a, const void *b)
+{
+ const struct use *ua = a, *ub = b;
+ struct block *blk = ua->blk;
+ if (ua->blk != ub->blk) return ua->blk->id - ub->blk->id;
+ if (ua->u == USERJUMP) return ub->u != USERJUMP;
+ if (ub->u == USERJUMP) return -(ua->u != USERJUMP);
+ return blkfindins(blk, ua->u) - blkfindins(blk, ub->u);
+}
+
+void
+mem2reg(struct function *fn)
+{
+ static struct bitset bsbuf[2][4];
+ struct ssabuilder sb = { .nblk = fn->nblk };
+ struct block *blk;
+
+ FREQUIRE(FNUSE);
+
+ if (fn->nblk <= BSNBIT * countof(bsbuf[0])) {
+ sb.sealed = bsbuf[0];
+ sb.marked = bsbuf[1];
+ memset(bsbuf[0], 0, BSSIZE(fn->nblk) * sizeof *bsbuf[0]);
+ memset(bsbuf[1], 0, BSSIZE(fn->nblk) * sizeof *bsbuf[1]);
+ } else {
+ sb.sealed = xcalloc(BSSIZE(fn->nblk) * sizeof *sb.sealed);
+ sb.marked = xcalloc(BSSIZE(fn->nblk) * sizeof *sb.marked);
+ }
+
+ sortrpo(fn);
+ blk = fn->entry;
+
+ do {
+ for (int i = 0; i < blk->ins.n; ++i) {
+ struct use *use, *uend;
+ enum irclass k = 0;
+ int sz = 0, ndef = 0;
+ enum op ext = Ocopy;
+ int var = blk->ins.p[i];
+ struct instr *ins = &instrtab[var];
+
+ if (!oisalloca(ins->op)) continue;
+ uend = instruse[var] + instrnuse[var];
+ for (use = instruse[var]; use < uend; ++use) {
+ struct instr *m;
+
+ if (use->u == USERJUMP) goto Next;
+ m = &instrtab[use->u];
+ if (oisload(m->op) && (!sz || sz == loadsz(m->op))) {
+ sz = loadsz(m->op);
+ k = loadcls(m->op);
+ if (sz < 4) ext = load2ext(m->op);
+ continue;
+ }
+ if (oisstore(m->op) && m->l.bits == mkref(RTMP, var).bits
+ && (!sz || sz == storesz(m->op))) {
+ sz = storesz(m->op);
+ ++ndef;
+ continue;
+ }
+ goto Next;
+ }
+ if (!ndef) /* slot is read from but never written to */
+ goto Next;
+
+ qsort(instruse[var], instrnuse[var], sizeof *instruse[var], cmpuse);
+ for (use = instruse[var]; use < uend; ++use) {
+ struct instr *m = &instrtab[use->u];
+
+ if (oisstore(m->op)) {
+ writevar(&sb, var, use->blk, m->r);
+ *m = mkinstr(Onop,0,);
+ } else if (oisload(m->op)) {
+ union ref val = readvar(&sb, var, k = m->cls, use->blk);
+ if (!val.bits) { /* var is used uninitialized */
+ /* TODO emit diagnostic */
+ /* load some garbage */
+ *m = mkinstr(kisflt(k) ? Oloadf32 + (k==KF64) : Oloads8+ilog2(sz)*2,
+ k, mkref(RREG, mctarg->bpr));
+ } else {
+ adduse(use->blk, use->u, val);
+ if (isintcon(val) && ext != Ocopy) {
+ vlong x = intconval(val);
+ switch (ext) {
+ case Oexts8: x = (schar)x; break;
+ case Oextu8: x = (uchar)x; break;
+ case Oexts16: x = (short)x; break;
+ case Oextu16: x = (ushort)x; break;
+ case Oexts32: x = (int)x; break;
+ case Oextu32: x = (uint)x; break;
+ default: assert(0);
+ }
+ val = mkintcon(k, x);
+ ext = Ocopy;
+ }
+ *m = mkinstr(ext, k, val);
+ }
+ }
+ }
+ /* remove alloca */
+ delinstr(blk, i--);
+
+ Next:;
+ }
+
+ assert(sb.lastvisit == blk->id);
+ ++sb.lastvisit;
+ tryseal(&sb, blk);
+ } while ((blk = blk->lnext) != fn->entry);
+
+ do {
+ /* remove phis marked for deletion */
+ for (int i = 0; i < blk->phi.n; ++i)
+ if (instrtab[blk->phi.p[i]].op == Onop)
+ delphi(blk, i--);
+ } while ((blk = blk->lnext) != fn->entry);
+
+ if (sb.sealed != bsbuf[0]) {
+ free(sb.sealed);
+ free(sb.marked);
+ }
+
+ for (int i = 0; i < sb.curdefs.mb.N; ++i)
+ if (bstest(sb.curdefs.mb.bs, i))
+ free(sb.curdefs.v[i]);
+ imap_free(&sb.curdefs);
+ if (ccopt.dbg.m) {
+ bfmt(ccopt.dbgout, "<< After mem2reg >>\n");
+ irdump(fn);
+ }
+}
+
+
+/* vim:set ts=3 sw=3 expandtab: */