aboutsummaryrefslogtreecommitdiffhomepage
path: root/regalloc.c
diff options
context:
space:
mode:
authorlemon <lsof@mailbox.org>2025-09-14 11:06:06 +0200
committerlemon <lsof@mailbox.org>2025-09-14 11:07:11 +0200
commit92c6943fa81145050b083348831a03154be2210c (patch)
treec15a17756c04f87ed1ad00ca510273cc348590af /regalloc.c
parenta95e385217841da91c3e44674dbaa95fb613a153 (diff)
regalloc.c cleanup
Diffstat (limited to 'regalloc.c')
-rw-r--r--regalloc.c391
1 files changed, 204 insertions, 187 deletions
diff --git a/regalloc.c b/regalloc.c
index b7516d0..7242732 100644
--- a/regalloc.c
+++ b/regalloc.c
@@ -3,7 +3,6 @@
/* Implements linear scan register allocation */
static regset gpregset, fpregset;
-static regset globusage;
#define isfpr(reg) in_range((reg), mctarg->fpr0, mctarg->fpr0 + mctarg->nfpr - 1)
#define isgpr(reg) in_range((reg), mctarg->gpr0, mctarg->gpr0 + mctarg->nfpr - 1)
@@ -198,6 +197,7 @@ emitpm(struct block *blk)
}
}
+/* remove phis by inserting parallel moves */
static void
lowerphis(struct rega *ra, struct block *blk, struct block *suc)
{
@@ -249,12 +249,12 @@ lowerphis(struct rega *ra, struct block *blk, struct block *suc)
}
static void
-rporec(struct block ***rpo, struct block *b)
+porec(struct block ***rpo, struct block *b)
{
if (wasvisited(b)) return;
markvisited(b);
- if (b->s2) rporec(rpo, b->s2);
- if (b->s1) rporec(rpo, b->s1);
+ if (b->s2) porec(rpo, b->s2);
+ if (b->s1) porec(rpo, b->s1);
*--*rpo = b;
}
@@ -271,8 +271,8 @@ sortrpo(struct function *fn)
startbbvisit();
fn->entry->id = 0;
markvisited(fn->entry);
- if (fn->entry->s1) rporec(&rpo, fn->entry->s1);
- if (fn->entry->s2) rporec(&rpo, fn->entry->s2);
+ if (fn->entry->s1) porec(&rpo, fn->entry->s1);
+ if (fn->entry->s2) porec(&rpo, fn->entry->s2);
*--rpo = fn->entry;
ndead = rpo - rpobuf;
for (i = 1, ++rpo; rpo < rpoend; ++rpo, ++i) {
@@ -536,7 +536,7 @@ buildintervals(struct rega *ra)
*ins = mkinstr(Onop,0,);
}
}
- bsclr(live, out);
+ bsclr(live, out);
if (ins->op == Omove) {
assert(ins->l.t == RREG);
defreg(ra, ins->l.i, pos);
@@ -619,7 +619,7 @@ buildintervals(struct rega *ra)
addrange(&ra->intervals, opd, (struct range){blk->inumstart, loopend->inumstart + loopend->ins.n+1}, -1);
/* struct interval *lv = imap_get(&ra->intervals.temps, opd);
for (int i = 0; i < lv->n; ++i) {
- struct range r = itrange(lv, i);
+ struct range r = itrange(lv, i);
// DBG(" @%d:%d - @%d:%d\n", r.from.blk, r.from.ins, r.to.blk, r.to.ins);
} */
}
@@ -632,7 +632,7 @@ buildintervals(struct rega *ra)
imap_each(&ra->intervals.temps, var, lv) {
DBG("lifetime of %%%d: ", var);
for (int i = 0; i < lv->nrange; ++i) {
- struct range r = itrange(lv, i);
+ struct range r = itrange(lv, i);
DBG("[%d,%d)%s", r.from, r.to, i < lv->nrange-1 ? ", " : "");
}
DBG("\n");
@@ -657,7 +657,7 @@ itcontainspos(const struct interval *it, int pos)
{
for (int i = 0; i < it->nrange; ++i) {
struct range r = itrange(it, i);
- if (r.from > pos) return 0;
+ if (r.from > pos) return 0;
if (pos < r.to) return 1;
}
return 0;
@@ -840,7 +840,7 @@ linearscan(struct rega *ra)
ins->reg = reg + 1;
DBG("%%%d got %s\n", this, mctarg->rnames[reg]);
ra->free = rsclr(ra->free, reg);
- globusage = rsset(globusage, reg);
+ ra->fn->regusage = rsset(ra->fn->regusage, reg);
//if current has a register assigned then add current to active
current->next = active;
@@ -849,187 +849,148 @@ linearscan(struct rega *ra)
}
}
-void
-regalloc(struct function *fn)
+/* replace temps with physical regs, add loads & stores for spilled temps */
+static bool
+devirt(struct rega *ra, struct block *blk)
{
- static union ref *stkslotrefsbuf[64];
- int id;
- static union { char m[sizeof(struct arena) + (1<<10)]; struct arena *_align; } amem;
- struct rega ra = {fn, .arena = (void *)amem.m};
- struct block *blk, *last;
- memset(ra.arena, 0, sizeof *ra.arena);
- ra.arena->cap = sizeof amem.m - sizeof(struct arena);
-
- /* setup */
- if (!fpregset || !gpregset) {
- for (int i = 0; i < MAXREGS; ++i) {
- if (isfpr(i))
- fpregset = rsset(fpregset, i);
- else if (isgpr(i))
- gpregset = rsset(gpregset, i);
+ bool allnops = 1;
+ struct function *fn = ra->fn;
+ for (int curi = 0; curi < blk->ins.n; ++curi) {
+ int temp = blk->ins.p[curi];
+ struct instr *ins = &instrtab[temp];
+ struct interval *it;
+ struct alloc *alloc;
+ union ref *argref[4];
+ int nargref = 0;
+ int nspill = 0;
+
+ /* devirtualize ref args */
+ for (int i = 0; i < 2; ++i) {
+ union ref *r = &i[&ins->l];
+ if (r->t == RADDR) {
+ /* XXX mutating hashed addr.. should be fine though (because
+ * new RADDRs shouldn't be created after regalloc)
+ * maybe hashing them in the first place is unnecessary */
+ struct addr *a = &addrht[r->i];
+ argref[nargref++] = &a->base;
+ argref[nargref++] = &a->index;
+ } else {
+ argref[nargref++] = r;
+ }
}
+ for (int i = 0; i < nargref; ++i) {
+ union ref *r = argref[i];
+ int tr;
+ if (r->t == RTMP) {
+ static uchar cls2load[] = {
+ [KI4] = Oloads4, [KI8] = Oloadi8, [KF4] = Oloadf4, [KF8] = Oloadf8, [KPTR] = 0
+ };
+ cls2load[KPTR] = targ_64bit ? Oloadi8 : Oloads4;
+
+ alloc = (it = imap_get(&ra->intervals.temps, r->i)) ? &it->alloc : NULL;
+ if (alloc->t == ASTACK && ins->op == Omove) {
+ /* move [reg], [stk] -> [reg] = load [stk] */
+ assert(r == &ins->r && ins->l.t == RREG);
+ ins->reg = ins->l.i+1;
+ ins->op = cls2load[ins->cls];
+ ins->r = NOREF;
+ addstkslotref(temp, alloc->a*8);
+ } else if (alloc->t == ASTACK && ins->op == Ocopy) {
+ /* [reg] = copy [stk] -> [reg] = load [stk] */
+ assert(r == &ins->l && ins->reg);
+ ins->op = cls2load[ins->cls];
+ addstkslotref(temp, alloc->a*8);
+ } else if (alloc->t == ASTACK) {
+ /* ref was spilled, gen load to scratch register and use it */
+ struct instr ld = {.cls = insrescls(instrtab[r->i])};
+ int reg = kisint(ld.cls) ? mctarg->gprscratch : mctarg->fprscratch;
+ bool dosave;
+ /* pick scratch register, or any register that doesn't conflict with this instr's srcs/dst */
+ if (nspill > 0) {
+ for (reg = kisflt(ld.cls) ? mctarg->fpr0 : mctarg->gpr0;; ++reg) {
+ if (reg == ins->reg-1) continue;
+ for (int j = 0; j < i; ++j)
+ if (argref[j]->t == RREG && argref[j]->i == reg) continue;
+ break;
+ }
+ /* if not the designated scratch register, we need to save+restore */
+ if ((dosave = rstest(fn->regusage, reg) || rstest(mctarg->rcallee, reg))) {
+ insertinstr(blk, curi++, mkinstr(Oxsave, 0, .l = mkref(RREG, reg)));
+ }
+ }
+ ld.reg = reg+1;
+ switch (ld.cls) {
+ default: assert(0);
+ case KI4: ld.op = Oloads4; break;
+ case KI8: ld.op = Oloadi8; break;
+ case KPTR: ld.op = targ_64bit ? Oloadi8 : Oloads4; break;
+ case KF4: ld.op = Oloadf4; break;
+ case KF8: ld.op = Oloadf8; break;
+ }
+ addstkslotref(insertinstr(blk, curi++, ld).i, alloc->a*8);
+ *r = mkref(RREG, reg);
+ if (nspill > 0 && dosave) {
+ insertinstr(blk, curi+1, mkinstr(Oxrestore, 0, .l = mkref(RREG, reg)));
+ }
+ ++nspill;
+ } else if ((tr = instrtab[r->i].reg)) {
+ assert(alloc && alloc->t == AREG && alloc->a == tr-1);
+ *r = mkref(RREG, tr-1);
+ }
+ }
+ }
+ if (nspill > 0) assert(ins->op != Ocall);
+
+ /* devirtualize destination */
+ alloc = (it = imap_get(&ra->intervals.temps, temp)) ? &it->alloc : NULL;
+ if (alloc && alloc->t == ASTACK) {
+ int store = Ostore1 + ilog2(cls2siz[insrescls(*ins)]);
+ /* t was spilled, gen store */
+ if (ins->op == Ocopy) {
+ ins->op = store;
+ ins->r = ins->l;
+ addstkslotref(temp, alloc->a*8);
+ } else {
+ int reg = kisint(insrescls(*ins)) ? mctarg->gprscratch : mctarg->fprscratch;
+ assert(nspill == 0);
+ ins->reg = reg+1;
+ addstkslotref(
+ insertinstr(blk, ++curi, mkinstr(store, 0, .r = mkref(RREG, reg))).i,
+ alloc->a*8);
+ }
+ } else if (!ins->reg && insrescls(*ins) && ins->op != Omove && !ins->keep) {
+ /* dead */
+ Nop:
+ ins->op = Onop;
+ } else if (ins->op == Omove && ins->r.t == RREG && ins->l.i == ins->r.i) {
+ /* move r1,r2 / r1=r2 */
+ goto Nop;
+ } else if (ins->op == Ocopy && ins->l.t == RREG && ins->reg-1 == ins->l.i) {
+ /* r1 = copy r2 / r1=r2 */
+ goto Nop;
+ } else if (ins->inplace && ins->l.t == RREG && ins->reg && ins->reg-1 != ins->l.i) {
+ /* fixup in-place (two-address) instructions */
+ allnops = 0;
+ insertinstr(blk, curi++, mkmove(ins->cls, ins->reg-1, ins->l.i));
+ ins->l.i = ins->reg-1;
+ } else if (ins->op != Onop) allnops = 0;
}
- ra.free = rsclr(rsclr(rsminus(rsunion(gpregset, fpregset), mctarg->rglob), mctarg->gprscratch), mctarg->fprscratch);
- memset(ra.freestk, 0xFF, sizeof ra.freestk);
- globusage = 0;
- fn->stksiz = alignup(fn->stksiz, 8);
- fn->isleaf = 1;
- vinit(&stkslotrefs, stkslotrefsbuf, arraylength(stkslotrefsbuf));
-
- /* put into reverse post order */
- sortrpo(fn);
-
- /* fix liveness ranges */
- fixlive(fn);
-
- /* transform into CSSA */
- fixcssa(fn);
-
- fillblkids(fn);
-
- if (ccopt.dbg.r) {
- efmt("<< Before linear scan >>\n");
- irdump(fn);
- }
-
- /* linear scan: build lifetime intervals */
- buildintervals(&ra);
- /* linear scan */
- linearscan(&ra);
-
- /* resolve */
+ return allnops;
+}
- /* parallel copies for each phi */
- blk = last = fn->entry->lprev;
- do {
- if (blk->id < 0) continue;
- for (int i = 0; i < blk->npred; ++i) {
- lowerphis(&ra, blkpred(blk, i), blk);
- }
- vfree(&blk->phi);
- } while ((blk = blk->lprev) != last);
+static void
+fini(struct rega *ra)
+{
+ int id = 0;
+ struct function *fn = ra->fn;
+ struct block *blk = fn->entry;
- /* final cleanup */
- id = 0;
- blk = fn->entry;
do {
- bool allnops = 1;
+ bool allnops;
blk->id = id++;
- for (int curi = 0; curi < blk->ins.n; ++curi) {
- int t = blk->ins.p[curi];
- struct instr *ins = &instrtab[t];
- struct interval *it;
- struct alloc *alloc;
- union ref *targs[4];
- int ntargs = 0;
- int nspill = 0;
-
- /* devirtualize ref args */
- for (int i = 0; i < 2; ++i) {
- union ref *r = &i[&ins->l];
- if (r->t == RADDR) {
- /* XXX mutating hashed addr.. should be fine though (because
- * new RADDR shouldn't be created after regalloc)
- * maybe hashing them in the first place is unnecessary */
- struct addr *a = &addrht[r->i];
- targs[ntargs++] = &a->base;
- targs[ntargs++] = &a->index;
- } else {
- targs[ntargs++] = r;
- }
- }
- for (int i = 0; i < ntargs; ++i) {
- union ref *r = targs[i];
- int tr;
- if (r->t == RTMP) {
- static uchar cls2load[] = {[KI4] = Oloads4, [KI8] = Oloadi8, [KF4] = Oloadf4, [KF8] = Oloadf8, [KPTR] = 0};
- cls2load[KPTR] = targ_64bit ? Oloadi8 : Oloads4;
-
- alloc = (it = imap_get(&ra.intervals.temps, r->i)) ? &it->alloc : NULL;
- if (alloc->t == ASTACK && ins->op == Omove) {
- assert(r == &ins->r && ins->l.t == RREG);
- ins->reg = ins->l.i+1;
- ins->op = cls2load[ins->cls];
- ins->r = NOREF;
- addstkslotref(t, alloc->a*8);
- } else if (alloc->t == ASTACK && ins->op == Ocopy) {
- assert(r == &ins->l && ins->reg);
- ins->op = cls2load[ins->cls];
- addstkslotref(t, alloc->a*8);
- } else if (alloc->t == ASTACK) {
- /* ref was spilled, gen load */
- struct instr ld = {.cls = insrescls(instrtab[r->i])};
- int reg = kisint(ld.cls) ? mctarg->gprscratch : mctarg->fprscratch;
- bool dosave;
- /* pick scratch register, or any register that doesn't conflict with this instr's srcs/dst */
- if (nspill > 0) {
- for (reg = kisflt(ld.cls) ? mctarg->fpr0 : mctarg->gpr0;; ++reg) {
- if (reg == ins->reg-1) continue;
- for (int j = 0; j < i; ++j)
- if (targs[j]->t == RREG && targs[j]->i == reg) continue;
- break;
- }
- /* if not the designated scratch register, we need to save+restore */
- if ((dosave = rstest(fn->regusage, reg) || rstest(mctarg->rcallee, reg))) {
- insertinstr(blk, curi++, mkinstr(Oxsave, 0, .l = mkref(RREG, reg)));
- }
- }
- ld.reg = reg+1;
- switch (ld.cls) {
- default: assert(0);
- case KI4: ld.op = Oloads4; break;
- case KI8: ld.op = Oloadi8; break;
- case KPTR: ld.op = targ_64bit ? Oloadi8 : Oloads4; break;
- case KF4: ld.op = Oloadf4; break;
- case KF8: ld.op = Oloadf8; break;
- }
- addstkslotref(insertinstr(blk, curi++, ld).i, alloc->a*8);
- *r = mkref(RREG, reg);
- if (nspill > 0 && dosave) {
- insertinstr(blk, curi+1, mkinstr(Oxrestore, 0, .l = mkref(RREG, reg)));
- }
- ++nspill;
- } else if ((tr = instrtab[r->i].reg)) {
- *r = mkref(RREG, tr-1);
- }
- }
- }
- if (nspill > 0) assert(ins->op != Ocall);
-
- /* devirtualize destination */
- alloc = (it = imap_get(&ra.intervals.temps, t)) ? &it->alloc : NULL;
- if (alloc && alloc->t == ASTACK) {
- int store = Ostore1 + ilog2(cls2siz[insrescls(*ins)]);
- /* t was spilled, gen store */
- if (ins->op == Ocopy) {
- ins->op = store;
- ins->r = ins->l;
- addstkslotref(t, alloc->a*8);
- } else {
- int reg = kisint(insrescls(*ins)) ? mctarg->gprscratch : mctarg->fprscratch;
- assert(nspill == 0);
- ins->reg = reg+1;
- addstkslotref(
- insertinstr(blk, ++curi, mkinstr(store, 0, .r = mkref(RREG, reg))).i,
- alloc->a*8);
- }
- } else if (!ins->reg && insrescls(*ins) && ins->op != Omove && !ins->keep) {
- /* dead */
- Nop:
- ins->op = Onop;
- } else if (ins->op == Omove && ins->r.t == RREG && ins->l.i == ins->r.i) {
- goto Nop;
- } else if (ins->op == Ocopy && ins->l.t == RREG && ins->reg-1 == ins->l.i) {
- goto Nop;
- } else if (ins->inplace && ins->l.t == RREG && ins->reg && ins->reg-1 != ins->l.i) {
- /* fixup in-place (two-address) instructions */
- allnops = 0;
- insertinstr(blk, curi++, mkmove(ins->cls, ins->reg-1, ins->l.i));
- ins->l.i = ins->reg-1;
- } else if (ins->op != Onop) allnops = 0;
- }
+ allnops = devirt(ra, blk);
/* remove no-op blocks */
if (allnops && !blk->s2 && blk->npred > 0) {
@@ -1084,17 +1045,75 @@ regalloc(struct function *fn)
}
}
} while ((blk = blk->lnext) != fn->entry);
+}
+
+void
+regalloc(struct function *fn)
+{
+ static union ref *stkslotrefsbuf[64];
+ struct rega ra = {fn, .arena = fn->arena};
+ struct block *blk, *last;
+
+ /* setup */
+ if (!fpregset || !gpregset) {
+ for (int i = 0; i < MAXREGS; ++i) {
+ if (isfpr(i))
+ fpregset = rsset(fpregset, i);
+ else if (isgpr(i))
+ gpregset = rsset(gpregset, i);
+ }
+ }
+ ra.free = rsclr(rsclr(rsminus(rsunion(gpregset, fpregset), mctarg->rglob), mctarg->gprscratch), mctarg->fprscratch);
+ memset(ra.freestk, 0xFF, sizeof ra.freestk);
+ fn->regusage = 0;
+ fn->stksiz = alignup(fn->stksiz, 8);
+ fn->isleaf = 1;
+ vinit(&stkslotrefs, stkslotrefsbuf, arraylength(stkslotrefsbuf));
+
+ /* put into reverse post order */
+ sortrpo(fn);
+
+ /* fix liveness ranges */
+ fixlive(fn);
+
+ /* transform into CSSA */
+ fixcssa(fn);
+
+ fillblkids(fn);
+
+ if (ccopt.dbg.r) {
+ efmt("<< Before linear scan >>\n");
+ irdump(fn);
+ }
+
+ /* linear scan: build lifetime intervals */
+ buildintervals(&ra);
+
+ /* linear scan: assign physical registers and stack slots */
+ linearscan(&ra);
+
+ /* get out of SSA */
+ blk = last = fn->entry->lprev;
+ do {
+ if (blk->id < 0) continue;
+ for (int i = 0; i < blk->npred; ++i) {
+ lowerphis(&ra, blkpred(blk, i), blk);
+ }
+ vfree(&blk->phi);
+ } while ((blk = blk->lprev) != last);
+
+ /* devirtualize & final cleanup */
+ fini(&ra);
{ int var;
struct interval *lv;
imap_each(&ra.intervals.temps, var, lv) {
- // instrtab[var].reg = lv->reg+1;
+ (void)var;
if (lv->nrange > 2) xbfree(lv->_dyn);
}
imap_free(&ra.intervals.temps);
}
-
fn->stksiz += ra.maxstk*8;
if (fn->stksiz > 1<<24) error(NULL, "'%s' stack frame too big", fn->name);
while (stkslotrefs.n) {
@@ -1106,10 +1125,8 @@ regalloc(struct function *fn)
DBG("<< After regalloc >>\n");
irdump(fn);
}
- fn->regusage = globusage;
}
-
static int livelastblk;
struct pendingphi { ushort var, phi; };
static vec_of(struct pendingphi) *pendingphis;