aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--common.h9
-rw-r--r--ir.c1
-rw-r--r--ir.h5
-rw-r--r--irdump.c5
-rw-r--r--main.c1
-rw-r--r--optmem.c60
-rw-r--r--regalloc.c186
7 files changed, 206 insertions, 61 deletions
diff --git a/common.h b/common.h
index a47837a..252cfa4 100644
--- a/common.h
+++ b/common.h
@@ -367,6 +367,12 @@ xbgrow_(void **p, size_t n)
#define xbgrow(p, n) xbgrow_((void **)(p), (n) * sizeof**(p))
#define xbpush(p, n, x) (xbgrow(p, (*(n) + 1)), (*(p))[(*(n))++] = (x))
#define xbfree(p) ((p) ? free(&xbcap_(p)) : (void)0)
+#define xbcap(p) ((p) ? xbcap_(p) / sizeof*(p) : 0)
+#define xbgrowz(p, n) do { \
+ size_t tmp = *(p) ? xbcap_(*(p)) : 0; \
+ xbgrow(p, n); \
+ memset((char*)*(p)+tmp, 0, xbcap_(*(p))-tmp); \
+} while (0)
struct arena *newarena(uint chunksiz);
void *alloc(struct arena **, uint siz, uint align);
@@ -431,6 +437,9 @@ int pmap_set_(struct pmapbase *, void **v, uint vsiz, const void *k);
#define pmap_get(m, k) (((m)->tmp = pmap_get_(&(m)->mb, k)) < 0 ? NULL : &(m)->v[(m)->tmp])
#define pmap_set(m, k, x) ((m)->tmp = pmap_set_(&(m)->mb, (void **)&(m)->v, sizeof*(m)->v, k), \
(m)->v[(m)->tmp] = (x))
+#define pmap_each(m,kx,pvx) \
+ for (int _i = 0; _i < (m)->mb.N && ((kx) = (m)->mb.k[_i], (pvx) = &(m)->v[_i], 1); ++_i) \
+ if (kx)
static inline bool
bstest(const struct bitset *bs, uint i)
diff --git a/ir.c b/ir.c
index 69cb1fa..79f9ebd 100644
--- a/ir.c
+++ b/ir.c
@@ -25,6 +25,7 @@ struct addr addrht[1 << 12];
static int naddrht;
struct xcon conht[1 << 12];
static int nconht;
+int visitmark;
void
irinit(struct function *fn)
diff --git a/ir.h b/ir.h
index 85500d1..6507fe0 100644
--- a/ir.h
+++ b/ir.h
@@ -121,6 +121,7 @@ enum jumpkind { JXXX, Jb, Jret, };
struct block {
int id;
int npred;
+ int visit;
union {
struct block *_pred0;
struct block **_pred;
@@ -206,6 +207,7 @@ extern struct calltab {vec_of(struct call);} calltab;
extern struct phitab {vec_of(union ref *);} phitab;
extern struct dattab {vec_of(struct irdat);} dattab;
extern struct addr addrht[];
+extern int visitmark;
#define mkinstr(O, C, ...) ((struct instr) { .op = (O), .cls = (C), .reg=0, __VA_ARGS__ })
#define mkarginstr(ty, x) mkinstr(Oarg, 0, mktyperef(ty), (x))
void irinit(struct function *);
@@ -240,6 +242,9 @@ void deluses(int ins);
void delinstr(struct block *, int idx);
void delphi(struct block *, int idx);
void fillblkids(struct function *);
+#define startbbvisit() (void)(++visitmark)
+#define wasvisited(blk) ((blk)->visit == visitmark)
+#define markvisited(blk) ((blk)->visit = visitmark)
/* IR builder functions */
union ref addinstr(struct function *, struct instr);
diff --git a/irdump.c b/irdump.c
index 3d95843..21b5f09 100644
--- a/irdump.c
+++ b/irdump.c
@@ -196,10 +196,9 @@ dumpblk(struct function *fn, struct block *blk)
for (i = 0; i < blk->phi.n; ++i) {
struct instr *phi = &instrtab[blk->phi.p[i]];
union ref *refs = phitab.p[phi->l.i];
- assert(phi->op == Ophi);
efmt(" %s ", clsname[phi->cls]);
- if (!phi->reg) efmt("%%%d = phi ", blk->phi.p[i]);
- else efmt("(%%%d)%s = phi ", phi - instrtab, mctarg->rnames[phi->reg-1]);
+ if (!phi->reg) efmt("%%%d = %s ", blk->phi.p[i], opnames[phi->op]);
+ else efmt("(%%%d)%s = %s ", phi - instrtab, mctarg->rnames[phi->reg-1], opnames[phi->op]);
for (int i = 0; i < blk->npred; ++i) {
if (i) efmt(", ");
efmt("@%d ", blkpred(blk, i)->id);
diff --git a/main.c b/main.c
index 6f9e0dc..35d25ac 100644
--- a/main.c
+++ b/main.c
@@ -116,6 +116,7 @@ optparse(char **args)
case 'p': ccopt.dbg.p = 1; break;
case 'a': ccopt.dbg.a = 1; break;
case 'i': ccopt.dbg.i = 1; break;
+ case 'l': ccopt.dbg.l = 1; break;
case 'r': ccopt.dbg.r = 1; break;
case 'm': ccopt.dbg.m = 1; break;
default: warn(NULL, "-d: invalid debug flag %'c", *arg);
diff --git a/optmem.c b/optmem.c
index 70737ec..4dc6b5b 100644
--- a/optmem.c
+++ b/optmem.c
@@ -32,11 +32,16 @@ struct ssabuilder {
static union ref readvar(struct ssabuilder *, int var, enum irclass, struct block *);
static union ref
-deltrivialphis(struct block *blk, union ref phiref)
+deltrivialphis(struct ssabuilder *sb, struct block *blk, union ref phiref)
{
+ union ref **pcurdefs;
+ int var;
struct use *use, *uend;
union ref *args = phitab.p[instrtab[phiref.i].l.i];
union ref same = {0};
+
+ assert(instrtab[phiref.i].op == Ophi);
+
for (int i = 0; i < blk->npred; ++i) {
if (args[i].bits == same.bits || args[i].bits == phiref.bits)
continue; /* unique value or self-reference */
@@ -50,21 +55,28 @@ deltrivialphis(struct block *blk, union ref phiref)
/* replace uses */
replcuses(phiref, same);
+ imap_each(&sb->curdefs, var, pcurdefs) {
+ (void)var;
+ if ((*pcurdefs)[blk->id].bits == phiref.bits)
+ (*pcurdefs)[blk->id] = same;
+ }
+ /* stops infinite recursion and marks phi for removal */
instrtab[phiref.i].op = Onop;
/* recursively try to remove all phi users as they might have become trivial */
- for (use = instruse[phiref.i], uend = use + instrnuse[phiref.i]; use < uend; ++use)
- if (use->u != USERJUMP && instrtab[use->u].op == Ophi)
- deltrivialphis(use->blk, mkref(RTMP, use->u));
-
- /* remove phi */
- for (int i = 0; i < blk->phi.n; ++i) {
- if (blk->phi.p[i] == phiref.i) {
- delphi(blk, i);
- break;
+Redo:
+ for (use = instruse[phiref.i], uend = use + instrnuse[phiref.i]; use < uend; ++use) {
+ if (use->u != USERJUMP && instrtab[use->u].op == Ophi && use->u != phiref.i) {
+ union ref it = mkref(RTMP, use->u);
+ union ref vphi2 = deltrivialphis(sb, use->blk, it);
+ if (vphi2.bits != it.bits) {
+ /* deletion happened so phiref use may have changed */
+ goto Redo;
+ }
}
}
+
return same;
}
@@ -77,7 +89,7 @@ addphiargs(struct ssabuilder *sb, int var, enum irclass cls, struct block *blk,
args[i] = readvar(sb, var, cls, pred);
adduse(blk, phiref.i, args[i]);
}
- return deltrivialphis(blk, phiref);
+ return deltrivialphis(sb, blk, phiref);
}
static void
@@ -90,24 +102,12 @@ writevar(struct ssabuilder *sb, int var, struct block *blk, union ref val)
(*pcurdefs)[blk->id] = val;
}
-static bool
-issealed(struct ssabuilder *sb, struct block *blk)
-{
- /* consider block sealed if it has been visited or if no backbranches flow to it */
- if (blk->id < sb->lastvisit) return 1;
- for (int i = 0; i < blk->npred; ++i)
- if (blkpred(blk, i)->id > blk->id)
- return 0;
- return 1;
-}
-
-
static union ref
readvarrec(struct ssabuilder *sb, int var, enum irclass cls, struct block *blk)
{
union ref val;
assert(blk->npred > 0);
- if (!issealed(sb, blk)) {
+ if (blk->id > sb->lastvisit) { /* unsealed block */
val = insertphi(blk, cls);
vpush(&sb->pendingphis[blk->id], ((struct pendingphi) { var, val.i }));
} else if (blk->npred == 1) {
@@ -132,7 +132,7 @@ readvar(struct ssabuilder *sb, int var, enum irclass cls, struct block *blk)
return readvarrec(sb, var, cls, blk);
}
-/* require use, blkid */
+/* require use, blkid; keeps use */
void
mem2reg(struct function *fn)
{
@@ -201,17 +201,23 @@ mem2reg(struct function *fn)
Next:;
}
- /* seal block */
assert(sb.lastvisit == blk->id);
- ++sb.lastvisit;
pending = (void *) &sb.pendingphis[blk->id];
for (int i = 0; i < pending->n; ++i) {
addphiargs(&sb, pending->p[i].var, instrtab[pending->p[i].phi].cls, blk,
mkref(RTMP, pending->p[i].phi));
}
+ ++sb.lastvisit;
vfree(pending);
} while ((blk = blk->lnext) != fn->entry);
+ do {
+ /* remove phis marked for deletion */
+ for (int i = 0; i < blk->phi.n; ++i)
+ if (instrtab[blk->phi.p[i]].op == Onop)
+ delphi(blk, i--);
+ } while ((blk = blk->lnext) != fn->entry);
+
free(sb.pendingphis);
for (int i = 0; i < sb.curdefs.mb.N; ++i)
diff --git a/regalloc.c b/regalloc.c
index 1e781f4..9d84e1e 100644
--- a/regalloc.c
+++ b/regalloc.c
@@ -1,4 +1,3 @@
-#include "common.h"
#include "ir.h"
/* Implements reverse linear register allocation, quite ad-hoc right now */
@@ -43,7 +42,7 @@ addstkslotref(union ref inst)
static int allocreg(struct rega *ra, enum irclass cls, union ref ref, uvlong excl);
#if 0
-#define DBG efmt
+#define DBG(...) if(ccopt.dbg.r) efmt(__VA_ARGS__)
static void
dumpallocs(const char *s, struct rmap *m)
{
@@ -77,7 +76,7 @@ allocstk(struct rega *ra, int var)
int s = -1;
for (int i = 0; i < BSSIZE(MAXSPILL); ++i) {
- if (~ra->m.freestk[i].u == 0) continue;
+ if (ra->m.freestk[i].u == 0) continue;
s = i*64 + lowestsetbit(ra->m.freestk[i].u);
}
if (s != -1) {
@@ -94,6 +93,7 @@ allocstk(struct rega *ra, int var)
static void
freestk(struct rega *ra, int slot)
{
+ DBG("FREE stk %d\n",slot);
if (slot < MAXSPILL)
bsset(ra->m.freestk, slot);
else if (slot == ra->stktop - 1)
@@ -117,7 +117,7 @@ def(struct rega *ra, struct instr *ins, struct block *blk, int curi)
assert(!rstest(ra->m.free, reg));
assert(ra->m.regs[reg] == var);
ins->reg = reg+1;
- } else if (alloc->t == ASTACK) {
+ } else if (alloc->t == ASTACK && ins->op != Ophi) {
/* unspill, insert 'store [slot], reg' */
struct alloc astk = *alloc;
struct instr store;
@@ -141,7 +141,7 @@ def(struct rega *ra, struct instr *ins, struct block *blk, int curi)
def(ra, ins, blk, curi); /* and free the reg again */
return;
}
- if (alloc) *alloc = afree();
+ if (alloc && (alloc->t != ASTACK || ins->op != Ophi)) *alloc = afree();
} else if (ins->op == Omove) {
assert(ins->l.t == RREG); /* move to a register */
assert(rstest(ra->m.blocked, ins->l.i));
@@ -253,7 +253,7 @@ forcetake(struct rega *ra, int reg, union ref ref, struct block *blk, int curi)
insertinstr(blk, ++curi, mkmove(insrescls(instrtab[var]), reg, rename));
ra->m.regs[rename] = var;
globusage = rsset(globusage, rename);
- *alloc = areg(rename);
+ (void)imap_set(&ra->m.allocs, var, areg(rename));
ra->m.free = rsset(ra->m.free, reg);
} else {
spill(ra, reg, blk, curi);
@@ -405,7 +405,7 @@ emitmove(enum irclass k, struct alloc dst, struct alloc src, struct block *blk,
if (dst.t == AREG && src.t == AREG) {
insertinstr(blk, curi, mkmove(k, dst.a, src.a));
} else if (dst.t == ASTACK && src.t == AREG) {
- mv = mkinstr(Ostore1+ilog2(cls2siz[k]), 0, mkref(RICON, dst.a), mkref(RREG, src.a));
+ mv = mkinstr(Ostore1+ilog2(cls2siz[k]), 0, mkref(RICON, dst.a*8), mkref(RREG, src.a));
addstkslotref(insertinstr(blk, curi, mv));
} else if (dst.t == AREG && src.t == ASTACK) {
switch (mv.cls = k) {
@@ -416,7 +416,7 @@ emitmove(enum irclass k, struct alloc dst, struct alloc src, struct block *blk,
case KF4: mv.op = Oloadf4; break;
case KF8: mv.op = Oloadf8; break;
}
- mv.l = mkref(RICON, dst.a);
+ mv.l = mkref(RICON, dst.a*8);
mv.reg = src.a;
addstkslotref(insertinstr(blk, curi, mv));
}
@@ -490,6 +490,7 @@ lowerphis(struct function *fn, struct bbrm *bbrm, struct block *blk, struct bloc
int predno;
struct alloc *alloc;
struct rmap *out = &bbrm[blk->id].out;
+ struct rmap *in = &bbrm[suc->id].in;
struct block *n = NULL;
if (!blk->s2) n = blk;
@@ -500,36 +501,40 @@ lowerphis(struct function *fn, struct bbrm *bbrm, struct block *blk, struct bloc
assert(predno < suc->npred);
npmove = 0;
- /* ensure phi args go to the same reg as phi with parallel copies */
+ /* ensure phi args go to the same slot as phi with parallel copies */
for (int i = 0; i < suc->phi.n; ++i) {
struct instr *phi = &instrtab[suc->phi.p[i]];
union ref *arg = &phitab.p[phi->l.i][predno];
- int regto, regfrom;
+ struct alloc from, to;
if (arg->t == RREG) continue;
assert(arg->t == RTMP);
DBG("resolve phi @%d, @%d, %%%d <- %%%d\n", blk->id, suc->id, phi - instrtab, arg->i);
if (instrtab[arg->i].reg) {
- regfrom = instrtab[arg->i].reg - 1;
- DBG(" it had R%d\n", regfrom);
+ from = areg(instrtab[arg->i].reg - 1);
+ DBG(" it had R%d\n", from.a);
} else {
alloc = imap_get(&out->allocs, arg->i);
- assert(alloc && alloc->t == AREG);
- regfrom = alloc->a;
- DBG(" found R%d\n", regfrom);
- instrtab[arg->i].reg = regfrom+1;
+ assert(alloc && alloc->t != ADEAD);
+ from = *alloc;
+ DBG(" found %c%d\n", " RS"[from.t], from.a);
+ if (from.t == AREG)
+ instrtab[arg->i].reg = from.a+1;
}
if (phi->reg) {
- regto = phi->reg - 1;
+ to = areg(phi->reg - 1);
+ DBG(" phi had R%d\n", to.a);
} else {
- alloc = imap_get(&out->allocs, phi - instrtab);
- assert(alloc && alloc->t == AREG);
- regto = alloc->a;
- phi->reg = regto+1;
+ alloc = imap_get(&in->allocs, phi - instrtab);
+ assert(alloc && alloc->t != ADEAD);
+ DBG(" found phi %c%d\n", " RS"[from.t], from.a);
+ to = *alloc;
+ if (to.t == AREG)
+ phi->reg = to.a+1;
}
- DBG(" > phi move %c%d -> %c%d\n", " RS"[AREG], regfrom, " RS"[AREG], regto);
+ DBG(" > phi move %c%d -> %c%d\n", " RS"[from.t], from.a, " RS"[to.t], to.a);
if (!n) n = insertblk(fn, blk, suc);
- pmadd(phi->cls, areg(regto), areg(regfrom));
+ pmadd(phi->cls, to, from);
}
if (n) emitpm(n);
}
@@ -540,7 +545,6 @@ resolve(struct function *fn, struct bbrm *bbrm, struct block *blk, struct block
ushort var;
struct alloc *alloc, *alloc2;
struct rmap *out = &bbrm[blk->id].out, *in = &bbrm[suc->id].in;
- struct rmap *in2 = &bbrm[blk->id].in;
struct block *n = NULL;
DBG("resolve(@%d, @%d)\n",blk->id, suc->id);
@@ -559,8 +563,7 @@ resolve(struct function *fn, struct bbrm *bbrm, struct block *blk, struct block
}
pmadd(insrescls(instrtab[var]), *alloc, *alloc2);
}
- (void)imap_set(&in2->allocs, var, *alloc);
- if (!instrtab[var].reg)
+ if (!instrtab[var].reg && alloc->t == AREG)
instrtab[var].reg = alloc->a + 1;
}
if (n) emitpm(n);
@@ -569,6 +572,8 @@ resolve(struct function *fn, struct bbrm *bbrm, struct block *blk, struct block
dumpallocs("out", out);
}
+static void fixlive(struct function *fn);
+
void
regalloc(struct function *fn)
{
@@ -596,6 +601,9 @@ regalloc(struct function *fn)
fn->isleaf = 1;
vinit(&stkslotrefs, stkslotrefsbuf, arraylength(stkslotrefsbuf));
+ /* fix liveness ranges */
+ fixlive(fn);
+
/* generate copies for phi operands to transform into CSSA */
blk = fn->entry;
do {
@@ -618,7 +626,7 @@ regalloc(struct function *fn)
} while ((blk = blk->lnext) != fn->entry);
fillblkids(fn);
bbrm = xcalloc(sizeof *bbrm * (nbbrm = fn->nblk));
-
+
/* visit blocks in reverse, allocating registers in a greedy manner */
blk = last;
do {
@@ -688,9 +696,9 @@ regalloc(struct function *fn)
do {
if (blk->id < 0) continue;
for (int i = 0; i < blk->npred; ++i) {
- if (blkpred(blk, i)->id < 0) continue;
lowerphis(fn, bbrm, blkpred(blk, i), blk);
}
+ vfree(&blk->phi);
} while ((blk = blk->lprev) != last);
/* final cleanup */
@@ -759,9 +767,6 @@ regalloc(struct function *fn)
} while ((blk = blk->lnext) != fn->entry);
fn->nblk = id;
- blk = fn->entry;
- do vfree(&blk->phi); while ((blk = blk->lnext) != fn->entry);
-
fn->stksiz += ra.maxstk*8;
if (fn->stksiz > 1<<24) error(NULL, "'%s' stack frame too big", fn->name);
while (stkslotrefs.n) {
@@ -781,4 +786,123 @@ regalloc(struct function *fn)
fn->regusage = globusage;
}
+
+static int livelastblk;
+struct pendingphi { ushort var, phi; };
+static vec_of(struct pendingphi) *pendingphis;
+static int npendingphi;
+static ushort **curdefs;
+static union ref readvar(struct bitset *defined, enum irclass cls, int var, struct block *blk);
+
+/* The algorithm used here to introduce phis for temporaries whose definitions
+ * appear later than some of its uses is very similar to that in mem2reg() */
+
+static void
+fillphi(struct bitset *defined, union ref phi, enum irclass cls, int var, struct block *blk)
+{
+ union ref *args = phitab.p[instrtab[phi.i].l.i];
+ assert(blk->npred > 0);
+ for (int i = 0; i < blk->npred; ++i)
+ args[i] = readvar(defined, cls, var, blk);
+}
+
+static union ref
+readvar(struct bitset *defined, enum irclass cls, int var, struct block *blk)
+{
+ union ref val;
+
+ if (bstest(defined, var)) return mkref(RTMP, var);
+
+ /* memoed definition */
+ if (xbcap(curdefs) > blk->id && xbcap(curdefs[blk->id]) > var && curdefs[blk->id][var])
+ return mkref(RTMP, curdefs[blk->id][var]);
+
+ xbgrowz(&curdefs, blk->id + 1);
+ if (blk->id > livelastblk) {
+ ++npendingphi;
+ val = insertphi(blk, cls);
+ xbgrowz(&pendingphis, blk->id + 1);
+ vpush(&pendingphis[blk->id], ((struct pendingphi) { var, val.i }));
+ } else if (blk->npred == 1) {
+ val = readvar(defined, cls, var, blkpred(blk, 0));
+ } else {
+ val = insertphi(blk, cls);
+ fillphi(defined, val, cls, var, blk);
+ }
+ xbgrowz(&curdefs[blk->id], var + 1);
+ assert(val.i > 0);
+ curdefs[blk->id][var] = val.i;
+ return val;
+}
+
+static void
+liveuse(struct bitset *defined, struct instr *ins, union ref *r, struct block *blk)
+{
+ int var;
+ if (r->t == RADDR) {
+ liveuse(defined, ins, &addrht[r->i].base, blk);
+ liveuse(defined, ins, &addrht[r->i].index, blk);
+ return;
+ } else if (r->t != RTMP) return;
+ var = r->i;
+ if (bstest(defined, var)) return;
+
+ *r = readvar(defined, insrescls(instrtab[r->i]), var, blk);
+}
+
+/* regalloc() assumes every use of a temporary is visited before its definition
+ * so this function fixes cases where that would not apply by introducing phi functions */
+static void
+fixlive(struct function *fn)
+{
+ extern int ninstr;
+ struct block *blk = fn->entry;
+ struct bitset definedbuf[4] = {0};
+ struct bitset sealedbuf[2] = {0};
+ struct bitset *defined = definedbuf;
+ struct bitset *sealed = sealedbuf;
+
+ if (ninstr >= sizeof(definedbuf)*8)
+ defined = xcalloc(sizeof *defined * BSSIZE(ninstr));
+ if (fn->nblk >= sizeof(sealedbuf)*8)
+ sealed = xcalloc(sizeof *sealed * BSSIZE(fn->nblk));
+ npendingphi = 0;
+
+ do {
+ for (int i = 0; i < blk->phi.n; ++i) {
+ int var = blk->phi.p[i];
+ bsset(defined, var);
+ }
+
+ for (int i = 0; i < blk->ins.n; ++i) {
+ int var = blk->ins.p[i];
+ struct instr *ins = &instrtab[var];
+ if (ins->l.t) liveuse(defined, ins, &ins->l, blk);
+ if (ins->r.t) liveuse(defined, ins, &ins->r, blk);
+ bsset(defined, var);
+ }
+ } while ((blk = blk->lnext) != fn->entry);
+
+ do {
+ vec_of(struct pendingphi) *pphi;
+
+ if (!npendingphi) break;
+ if (xbcap(pendingphis) <= blk->id) break;
+
+ pphi = (void *)&pendingphis[blk->id];
+ if (pphi->n) --npendingphi;
+ for (int i = 0; i < pphi->n; ++i) {
+ fillphi(defined, mkref(RTMP, pphi->p[i].phi), instrtab[pphi->p[i].phi].cls, pphi->p[i].var, blk);
+ }
+ vfree(pphi);
+ } while ((blk = blk->lnext) != fn->entry);
+
+ if (ccopt.dbg.l) {
+ efmt("<< After liveness fixup >>\n");
+ irdump(fn);
+ }
+ if (defined != definedbuf) free(defined);
+ if (sealed != sealedbuf) free(sealed);
+}
+
/* vim:set ts=3 sw=3 expandtab: */