aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/ir_mem2reg.c
diff options
context:
space:
mode:
authorlemon <lsof@mailbox.org>2026-03-17 13:22:00 +0100
committerlemon <lsof@mailbox.org>2026-03-17 13:22:00 +0100
commita8d6f8bf30c07edb775e56889f568ca20240bedf (patch)
treeb5a452b2675b2400f15013617291fe6061180bbf /src/ir_mem2reg.c
parent24f14b7ad1af08d872971d72ce089a529911f657 (diff)
REFACTOR: move sources to src/
Diffstat (limited to 'src/ir_mem2reg.c')
-rw-r--r--src/ir_mem2reg.c317
1 files changed, 317 insertions, 0 deletions
diff --git a/src/ir_mem2reg.c b/src/ir_mem2reg.c
new file mode 100644
index 0000000..7a5874c
--- /dev/null
+++ b/src/ir_mem2reg.c
@@ -0,0 +1,317 @@
+#include "ir.h"
+#include <stdlib.h> /* qsort */
+
+static const uchar loadszcls[] = {
+ [Oloads8 - Oloads8] = 1|KI32<<4, [Oloadu8 - Oloads8] = 1|KI32<<4,
+ [Oloads16 - Oloads8] = 2|KI32<<4, [Oloadu16 - Oloads8] = 2|KI32<<4,
+ [Oloads32 - Oloads8] = 4|KI32<<4, [Oloadu32 - Oloads8] = 4|KI32<<4,
+ [Oloadi64 - Oloads8] = 8|KI64<<4,
+ [Oloadf32 - Oloads8] = 4|KF32<<4,
+ [Oloadf64 - Oloads8] = 8|KF64<<4,
+};
+static const uchar load2ext[] = {
+ [Oloads8 - Oloads8] = Oexts8, [Oloadu8 - Oloads8] = Oextu8,
+ [Oloads16 - Oloads8] = Oexts16, [Oloadu16 - Oloads8] = Oextu16,
+ [Oloads32 - Oloads8] = Oexts32, [Oloadu32 - Oloads8] = Oextu32,
+ [Oloadi64 - Oloads8] = Ocopy,
+};
+static const uchar storesz[] = {
+ [Ostorei8 - Ostorei8] = 1,
+ [Ostorei16 - Ostorei8] = 2,
+ [Ostorei32 - Ostorei8] = 4,
+ [Ostorei64 - Ostorei8] = 8,
+ [Ostoref32 - Ostorei8] = 4,
+ [Ostoref64 - Ostorei8] = 8,
+};
+#define loadsz(o) (loadszcls[(o) - Oloads8] & 0xF)
+#define loadcls(o) (loadszcls[(o) - Oloads8] >> 4)
+#define load2ext(o) (load2ext[(o) - Oloads8])
+#define storesz(o) (storesz[(o) - Ostorei8])
+
+/* Implements algorithm in 'Simple and Efficient Construction of Static Single Assignment' (Braun et al) */
+
+struct ssabuilder {
+ struct arena **arena;
+ imap_of(union ref *) curdefs; /* map of var to (map of block to def of var) */
+ struct bitset *sealed, /* set of sealed blocks */
+ *marked; /* blocks marked, for 'Marker Algorithm' in the paper */
+ int lastvisit;
+ int nblk;
+};
+
+static union ref readvar(struct ssabuilder *, int var, enum irclass, struct block *);
+
+static union ref
+deltrivialphis(struct ssabuilder *sb, int var, struct block *blk, union ref phiref)
+{
+ assert(instrtab[phiref.i].op == Ophi);
+ union ref *args = phitab.p[instrtab[phiref.i].l.i];
+ union ref same = {0};
+ for (int i = 0; i < blk->npred; ++i) {
+ if (args[i].bits == same.bits || args[i].bits == phiref.bits) {
+ continue; /* unique value or self-reference */
+ } if (same.bits != 0) {
+ return phiref; /* non-trivial */
+ }
+ same = args[i];
+ }
+
+ if (same.bits == 0)
+ same = UNDREF; /* the phi is unreachable or in the start block */
+
+ /* replace uses */
+ replcuses(phiref, same);
+ union ref **pcurdefs = imap_get(&sb->curdefs, var);
+ assert (pcurdefs);
+ for (int i = blk->id; i < sb->nblk; ++i) {
+ if ((*pcurdefs)[i].bits == phiref.bits)
+ (*pcurdefs)[i] = same;
+ }
+
+ /* stops infinite recursion and marks phi for removal */
+ instrtab[phiref.i].op = Onop;
+
+ /* recursively try to remove all phi users as they might have become trivial */
+Redo:
+ for (struct use *use = instruse[phiref.i]; use; use = use->next) {
+ if (use->u != USERJUMP && instrtab[use->u].op == Ophi && use->u != phiref.i) {
+ union ref it = mkref(RTMP, use->u);
+ union ref vphi2 = deltrivialphis(sb, var, use->blk, it);
+ if (vphi2.bits != it.bits) {
+ same = vphi2;
+ /* deletion happened so phiref use may have changed */
+ goto Redo;
+ }
+ }
+ }
+ deluses(phiref.i);
+
+ return same;
+}
+
+static union ref
+addphiargs(struct ssabuilder *sb, int var, enum irclass cls, struct block *blk, union ref phiref)
+{
+ union ref *args = phitab.p[instrtab[phiref.i].l.i];
+ for (int i = 0; i < blk->npred; ++i) {
+ struct block *pred = blkpred(blk, i);
+ args[i] = readvar(sb, var, cls, pred);
+ adduse(blk, phiref.i, args[i]);
+ }
+ instrtab[phiref.i].r.bits = 0;
+ return deltrivialphis(sb, var, blk, phiref);
+}
+
+static void
+writevar(struct ssabuilder *sb, int var, struct block *blk, union ref val)
+{
+ union ref **pcurdefs;
+ if (!(pcurdefs = imap_get(&sb->curdefs, var))) {
+ pcurdefs = imap_set(&sb->curdefs, var, allocz(sb->arena, sb->nblk * sizeof(union ref), 0));
+ }
+ if (val.t == RTMP) assert(instrtab[val.i].op != Onop);
+ (*pcurdefs)[blk->id] = val;
+}
+
+enum { RPENDINGPHI = 7 };
+static union ref
+readvarrec(struct ssabuilder *sb, int var, enum irclass cls, struct block *blk)
+{
+ union ref val;
+ assert(blk->npred > 0);
+ if (!bstest(sb->sealed, blk->id)) { /* unsealed block */
+ /* add pending phi */
+ val = insertphi(blk, cls);
+ instrtab[val.i].r = mkref(RPENDINGPHI, var);
+ } else if (blk->npred == 1) { /* trivial case when block has a single predecessor */
+ val = readvar(sb, var, cls, blkpred(blk, 0));
+ } else if (!bstest(sb->marked, blk->id)) {
+ /* Marker Algorithm to optimize the common case where a control flow join doesn't
+ * need a phi function for the variable: mark the block in case we recursively reach
+ * it again, collect defs from predecessors, only create a phi if they differ.
+ * This avoids creating temporary trivial phi functions in acyclic data flow
+ */
+ bsset(sb->marked, blk->id);
+ for (int i = 0; i < blk->npred; ++i) {
+ struct block *pred = blkpred(blk, i);
+ union ref it = readvar(sb, var, cls, pred);
+ if (!bstest(sb->marked, blk->id)) {
+ /* recursion reached this blk again, use its phi */
+ union ref **pcurdefs = imap_get(&sb->curdefs, var);
+ /* must have called writevar */
+ assert(*pcurdefs && (*pcurdefs)[blk->id].bits);
+ return (*pcurdefs)[blk->id];
+ }
+ if (i == 0) val = it;
+ else if (it.bits != val.bits) /* found a different definition, must add phi */
+ goto AddPhi;
+ }
+ bsclr(sb->marked, blk->id);
+ } else { /* reached block while recursing */
+ AddPhi:
+ val = insertphi(blk, cls);
+ writevar(sb, var, blk, val);
+ val = addphiargs(sb, var, cls, blk, val);
+ bsclr(sb->marked, blk->id);
+ return val;
+ }
+ writevar(sb, var, blk, val);
+ return val;
+}
+
+static union ref
+readvar(struct ssabuilder *sb, int var, enum irclass cls, struct block *blk)
+{
+ union ref **pcurdefs;
+ if ((pcurdefs = imap_get(&sb->curdefs, var)) && (*pcurdefs)[blk->id].bits)
+ return (*pcurdefs)[blk->id];
+ if (blk->npred == 0) /* entry block, var is read before being written to */
+ return UNDREF;
+ return readvarrec(sb, var, cls, blk);
+}
+
+static bool
+trysealrec(struct ssabuilder *sb, struct block *blk)
+{
+Recur:
+ if (bstest(sb->sealed, blk->id)) return 1;
+ if (blk->id > sb->lastvisit) return 0;
+ markvisited(blk);
+ for (int i = 0; i < blk->npred; ++i) {
+ struct block *p = blkpred(blk, i);
+ if (wasvisited(p)) continue;
+ if (p->id > sb->lastvisit) return 0;
+ }
+
+ bsset(sb->sealed, blk->id);
+ for (int i = 0; i < blk->phi.n; ++i) {
+ struct instr *ins = &instrtab[blk->phi.p[i]];
+ if (ins->r.t == RPENDINGPHI)
+ addphiargs(sb, ins->r.i, ins->cls, blk, mkref(RTMP, blk->phi.p[i]));
+ }
+ if (blk->s1 && !blk->s2) {
+ blk = blk->s1;
+ goto Recur;
+ } else if (blk->s1 && blk->s2) {
+ trysealrec(sb, blk->s1);
+ blk = blk->s2;
+ goto Recur;
+ }
+ return 1;
+}
+
+static void
+tryseal(struct ssabuilder *sb, struct block *blk)
+{
+ startbbvisit();
+ trysealrec(sb, blk);
+}
+
+static int
+blkfindins(struct block *blk, int ins)
+{
+ for (int i = 0; i < blk->phi.n; ++i)
+ if (blk->phi.p[i] == ins) return -1;
+ for (int i = 0; i < blk->ins.n; ++i)
+ if (blk->ins.p[i] == ins) return i;
+ assert(0 && "ins not in blk");
+}
+
+static int
+rcmpuse(const void *a, const void *b)
+{
+ /* postorder sort */
+ const struct use *ua = *(struct use **)a, *ub = *(struct use **)b;
+ struct block *blk = ua->blk;
+ if (ua->blk != ub->blk) return ub->blk->id - ua->blk->id;
+ assert(ua->u != USERJUMP && ub->u != USERJUMP);
+ return blkfindins(blk, ub->u) - blkfindins(blk, ua->u);
+}
+
+void
+mem2reg(struct function *fn)
+{
+ struct ssabuilder sb = { fn->passarena, .nblk = fn->nblk };
+
+ FREQUIRE(FNUSE);
+
+ sb.sealed = allocz(sb.arena, BSSIZE(fn->nblk) * sizeof *sb.sealed, 0);
+ sb.marked = allocz(sb.arena, BSSIZE(fn->nblk) * sizeof *sb.sealed, 0);
+ sortrpo(fn);
+
+ struct block *blk = fn->entry;
+ do {
+ for (int i = 0; i < blk->ins.n; ++i) {
+ enum irclass k = 0;
+ int sz = 0;
+ enum op ext = Ocopy;
+ int var = blk->ins.p[i];
+ struct instr *ins = &instrtab[var];
+ struct use *use;
+
+ /* find allocas only used in loads/stores of uniform size */
+ if (!oisalloca(ins->op) || !(use = instruse[var])) continue;
+ struct use *usesbuf[64];
+ vec_of(struct use *) uses = VINIT(usesbuf, countof(usesbuf));
+ do {
+ if (use->u == USERJUMP) goto Skip;
+ struct instr *m = &instrtab[use->u];
+ if (oisload(m->op) && (!sz || sz == loadsz(m->op))) {
+ sz = loadsz(m->op);
+ k = loadcls(m->op);
+ if (sz < 4) ext = load2ext(m->op);
+ } else if (oisstore(m->op) && m->l.bits == mkref(RTMP, var).bits
+ && (!sz || sz == storesz(m->op))) {
+ sz = storesz(m->op);
+ } else goto Skip;
+ vpush(&uses, use);
+ } while ((use = use->next));
+
+ /* visit uses in rpo */
+ qsort(uses.p, uses.n, sizeof *uses.p, rcmpuse);
+ for (int i = uses.n-1; i >= 0; --i) {
+ use = uses.p[i];
+ ins = &instrtab[use->u];
+
+ if (oisstore(ins->op)) {
+ writevar(&sb, var, use->blk, ins->r);
+ *ins = mkinstr(Onop,0,);
+ } else if (oisload(ins->op)) {
+ union ref val = readvar(&sb, var, k = ins->cls, use->blk);
+ adduse(use->blk, use->u, val);
+ if (ext != Ocopy && isintcon(val)) { /* fold constant int extension */
+ val = irunop(NULL, ext, k, val);
+ assert(val.bits);
+ ext = Ocopy;
+ }
+ *ins = mkinstr(ext, k, val);
+ }
+ }
+ /* remove alloca */
+ delinstr(blk, i--);
+
+ Skip:
+ vfree(&uses);
+ }
+
+ assert(sb.lastvisit == blk->id);
+ ++sb.lastvisit;
+ tryseal(&sb, blk);
+ } while ((blk = blk->lnext) != fn->entry);
+
+ do {
+ /* remove phis marked for deletion */
+ for (int i = 0; i < blk->phi.n; ++i)
+ if (instrtab[blk->phi.p[i]].op == Onop)
+ delphi(blk, i--);
+ } while ((blk = blk->lnext) != fn->entry);
+
+ imap_free(&sb.curdefs);
+ if (ccopt.dbg.m) {
+ bfmt(ccopt.dbgout, "<< After mem2reg >>\n");
+ irdump(fn);
+ }
+}
+
+
+/* vim:set ts=3 sw=3 expandtab: */