diff options
| author | 2023-06-24 18:47:05 +0200 | |
|---|---|---|
| committer | 2023-06-24 18:47:05 +0200 | |
| commit | 19bbdfa3c7ae05f4694ce5e434d9855c6f2c3682 (patch) | |
| tree | 700ca75e92f443fcb3fed30b1078b8aedde979f9 /regalloc.c | |
| parent | d313c6e49bfb32ae24745e90eebe833da20efa1a (diff) | |
backend: fix regalloc to work with more complex dataflow
basically an allocation map at the beginning (in) and end (out) of each
block is kept and after the first allocation pass another pass is ran to
resolve allocation conflicts between each edge, plus another pass to
finish lowering phi functions.
also introduced `regset` and plenty of other miscellaneous fixes
Diffstat (limited to 'regalloc.c')
| -rw-r--r-- | regalloc.c | 746 |
1 files changed, 556 insertions, 190 deletions
@@ -1,9 +1,12 @@ +#include "common.h" #include "ir.h" -static struct bitset globusage[1]; -static struct bitset floatregs[1]; +/* Implements reverse linear register allocation, quite ad-hoc right now */ -#define isfpr(reg) bstest(floatregs, (reg)) +static regset gpregset, fpregset; +static regset globusage; + +#define isfpr(reg) in_range((reg), mctarg->fpr0, mctarg->fpr0 + mctarg->nfpr - 1) #define isgpr(reg) (!isfpr(reg)) enum { ADEAD, AREG, ASTACK } ; @@ -12,40 +15,91 @@ struct alloc { ushort t : 2, a : 14; }; #define areg(r) ((struct alloc) { AREG, (r) }) #define astack(s) ((struct alloc) { ASTACK, (s) }) +enum { MAXSPILL = 128 }; + +struct rmap { + regset free; /* free registers */ + regset blocked; /* registers allocated to themselves (from e.g. isel reg constraints, abi) */ + ushort regs[MAXREGS]; /* map reg -> temp holding reg */ + struct bitset freestk[BSSIZE(MAXSPILL)]; /* free stack slots */ + imap_of(struct alloc) allocs; /* map tmpidx -> allocation */ +}; + struct rega { - union ref regs[MAXREGS]; /* map reg -> value holding reg */ - struct alloc *allocs; /* map tmpidx -> allocation */ - vec_of(ushort) freestk; /* available stack slots */ - int nfreegpr, nfreefpr; + struct function *fn; + struct rmap m; int maxstk, stktop; }; static vec_of(union ref *) stkslotrefs; -static void +static union ref addstkslotref(union ref inst) { vpush(&stkslotrefs, &instrtab[inst.i].l); + return inst; } static int allocreg(struct rega *ra, enum irclass cls, union ref ref, uvlong excl); #if 0 #define DBG efmt +static void +dumpallocs(const char *s, struct rmap *m) +{ + int var; + struct alloc *alloc; + + DBG("%s map [", s); + imap_each(&m->allocs, var, alloc) { + if (!alloc->t) continue; + DBG(" %%%d -> %c%d,", var, "XRS"[alloc->t], alloc->a); + } + DBG("]\n"); +} #else #define DBG(...) ((void)0) +#define dumpallocs(...) #endif static void freereg(struct rega *ra, int r) { - ra->regs[r] = NOREF; - if (isfpr(r)) ++ra->nfreefpr; - else ++ra->nfreegpr; + ra->m.free = rsset(ra->m.free, r); + ra->m.blocked = rsclr(ra->m.blocked, r); } static void take(struct rega *ra, int reg, union ref ref); +static int +allocstk(struct rega *ra, int var) +{ + int s = -1; + + for (int i = 0; i < BSSIZE(MAXSPILL); ++i) { + if (~ra->m.freestk[i].u == 0) continue; + s = i*64 + lowestsetbit(ra->m.freestk[i].u); + } + if (s != -1) { + bsclr(ra->m.freestk, s); + if (ra->stktop < s) ra->stktop = s+1; + } else { + s = ra->stktop++; + } + if (ra->maxstk < s+1) ra->maxstk = s+1; + (void)imap_set(&ra->m.allocs, var, astack(s)); + return s; +} + +static void +freestk(struct rega *ra, int slot) +{ + if (slot < MAXSPILL) + bsset(ra->m.freestk, slot); + else if (slot == ra->stktop - 1) + --ra->stktop; +} + static void def(struct rega *ra, struct instr *ins, struct block *blk, int curi) { @@ -54,14 +108,16 @@ def(struct rega *ra, struct instr *ins, struct block *blk, int curi) if (ins->op != Omove && ins->op != Ocall) { var = ins - instrtab; DBG("def %%%d\n",var); - alloc = &ra->allocs[var]; - if (alloc->t == ADEAD) { + alloc = imap_get(&ra->m.allocs, var); + if (!alloc || alloc->t == ADEAD) { return; } else if (alloc->t == AREG) { reg = alloc->a; DBG("-- free %s for %%%d\n", mctarg->rnames[alloc->a], var); - assert(ra->regs[reg].bits == mkref(RTMP, var).bits); - } else if (alloc->t == ASTACK) { + assert(!rstest(ra->m.free, reg)); + assert(ra->m.regs[reg] == var); + ins->reg = reg+1; + } else if (alloc->t == ASTACK) { /* unspill, insert 'store [slot], reg' */ struct alloc astk = *alloc; struct instr store; @@ -69,26 +125,27 @@ def(struct rega *ra, struct instr *ins, struct block *blk, int curi) if ((ins->op == Ocopy || ins->inplace) && ins->l.t == RREG) { int hint = ins->l.i; - if (!ra->regs[hint].bits) { + if (rstest(ra->m.free, hint)) { take(ra, reg = hint, mkref(RTMP, var)); - assert(ra->regs[reg].bits == mkref(RTMP, var).bits); + assert(ra->m.regs[reg] == var && !rstest(ra->m.free, hint)); } } if (reg < 0) reg = allocreg(ra, insrescls(*ins), mkref(RTMP, var), 0); store = mkinstr(Ostore1 + ilog2(cls2siz[insrescls(*ins)]), 0, mkref(RICON, astk.a*8), mkref(RREG, reg)); - DBG("-- unspill %%%d s%d -> %s\n", var, astk.a, mctarg->rnames[ins->reg+1]); + DBG("-- unspill %%%d s%d -> %s\n", var, astk.a, mctarg->rnames[reg]); addstkslotref(insertinstr(blk, ++curi, store)); - vpush(&ra->freestk, astk.a); + freestk(ra, astk.a); ins->reg = reg+1; def(ra, ins, blk, curi); /* and free the reg again */ return; } - *alloc = afree(); + if (alloc) *alloc = afree(); } else if (ins->op == Omove) { + assert(ins->l.t == RREG); /* move to a register */ + assert(rstest(ra->m.blocked, ins->l.i)); reg = ins->l.i; - assert(ins->l.t == RREG); DBG("-- free %s\n", mctarg->rnames[ins->l.i]); } else { struct call *call = &calltab.p[ins->r.i]; @@ -104,68 +161,58 @@ def(struct rega *ra, struct instr *ins, struct block *blk, int curi) static void take(struct rega *ra, int reg, union ref ref) { DBG("-- take %s for %c%d\n", mctarg->rnames[reg], "R%"[ref.t==RTMP], ref.i); - assert(!ra->regs[reg].bits && "taken"); - if (ref.t == RTMP) - ra->allocs[ref.i] = areg(reg); - ra->regs[reg] = ref; - assert(ra->regs[reg].bits); - bsset(globusage, reg); - if (isfpr(reg)) --ra->nfreefpr; - else --ra->nfreegpr; + assert(rstest(ra->m.free, reg) && "taken"); + if (ref.t == RTMP) { + (void)imap_set(&ra->m.allocs, ref.i, areg(reg)); + ra->m.regs[reg] = ref.i; + } else { + ra->m.blocked = rsset(ra->m.blocked, reg); + } + ra->m.free = rsclr(ra->m.free, reg); + globusage = rsset(globusage, reg); } static int allocreg(struct rega *ra, enum irclass cls, union ref ref, uvlong excl) { - int r0, rend, reg; + regset rs; + int reg; assert(cls); if (kisint(cls)) { - r0 = mctarg->gpr0; - rend = mctarg->gpr0 + mctarg->ngpr; + rs = gpregset; } else if (kisflt(cls)) { - r0 = mctarg->fpr0; - rend = mctarg->fpr0 + mctarg->nfpr; + rs = fpregset; } else assert(0); - for (reg = r0; reg < rend; ++reg) { - if (bstest(mctarg->rglob, reg)) continue; - if (!(excl >> reg & 1) && !ra->regs[reg].bits) { - take(ra, reg, ref); - return reg; - } - } - assert(!"no more reg"); -} - -static int -allocstk(struct rega *ra, int var) -{ - int s; - - if (ra->freestk.n) { - s = ra->freestk.p[--ra->freestk.n]; - ra->allocs[var] = astack(s); - } else { - if (ra->stktop+1 >= ra->maxstk) ra->maxstk = ra->stktop+1; - s = ra->stktop++; - } - ra->allocs[var] = astack(s); - return s; + rs = rsminus(rs, mctarg->rglob); + rs = rsand(rs, ra->m.free); + rs = rsminus(rs, excl); + + assert(rs != 0 && "no more reg"); + if (rsminus(rs, mctarg->rcallee) != 0) + reg = lowestsetbit(rsminus(rs, mctarg->rcallee)); + else + reg = lowestsetbit(rs); + take(ra, reg, ref); + return reg; } static void spill(struct rega *ra, int reg, struct block *blk, int curi) { int var, s; struct instr load; + struct alloc *alloc; - if (!ra->regs[reg].bits) return; - var = ra->regs[reg].i; - assert(ra->regs[reg].bits == RTMP && *(ushort *)&ra->allocs[var] == *(ushort *)&areg(reg)); + if (rstest(ra->m.free, reg)) return; + var = ra->m.regs[reg]; + assert(!rstest(ra->m.blocked, reg)); + assert((alloc = imap_get(&ra->m.allocs, var)) && !memcmp(alloc, &areg(reg), sizeof *alloc)); s = allocstk(ra, var); DBG("-- spill %%%d %s -> s%d\n", var, mctarg->rnames[reg], s); instrtab[var].reg = 0; /* insert 'reg = load [slot]' */ - load = mkinstr(Oloads1 + 2*ilog2(cls2siz[insrescls(instrtab[var])]), insrescls(instrtab[var]), mkref(RICON, s*8)); + load = mkinstr(Oloads1 + 2*ilog2(cls2siz[insrescls(instrtab[var])]), + insrescls(instrtab[var]), mkref(RICON, s*8)); load.reg = reg+1; addstkslotref(insertinstr(blk, ++curi, load)); freereg(ra, reg); @@ -178,34 +225,39 @@ forcetake(struct rega *ra, int reg, union ref ref, struct block *blk, int curi) int var; struct alloc *alloc; - if (ra->regs[reg].bits == ref.bits) return; - if (!ra->regs[reg].bits) { + if (ref.t == RREG && rstest(ra->m.blocked, ref.i)) { + assert(!rstest(ra->m.free, reg)); + return; + } + if (ref.t == RTMP && !rstest(ra->m.free, reg) && ra->m.regs[reg] == ref.i) return; + if (rstest(ra->m.free, reg)) { take(ra, reg, ref); return; } - assert(ra->regs[reg].bits == RTMP); - var = ra->regs[reg].i; - alloc = &ra->allocs[var]; - assert(alloc->a == reg); + assert(!rstest(ra->m.blocked, ref.i)); + var = ra->m.regs[reg]; + alloc = imap_get(&ra->m.allocs, var); + assert(alloc && alloc->a == reg); *alloc = afree(); /* try to move temp to another register */ - if (isgpr(reg) ? ra->nfreegpr > 0 : ra->nfreefpr > 0) { + if (rsand(ra->m.free, isgpr(reg) ? gpregset : fpregset) != 0) { /* the register of the current instruction (if any) was already free'd (by def), so * we need to explictly exclude it from the pool of rename registers * e.g.: given 'R0 = copy R1'; if R1 => %x, we need to prevent renaming %x => R0 */ - uvlong excl = instrtab[blk->ins.p[curi]].reg; - int rename = allocreg(ra, isgpr(reg) ? KI4 : KF4, ra->regs[reg], excl ? 1ull<<(excl-1) : 0); + regset excl = instrtab[blk->ins.p[curi]].reg; + int rename = allocreg(ra, isgpr(reg) ? KI4 : KF4, mkref(RTMP, ra->m.regs[reg]), + excl ? rsset(0, excl-1) : 0); if (ccopt.dbg.r)DBG("-- rename %%%d %s -> %s\n", var, mctarg->rnames[reg], mctarg->rnames[rename]); /* introduce move from rename -> original (since we allocate backwards) */ insertinstr(blk, ++curi, mkmove(insrescls(instrtab[var]), reg, rename)); - instrtab[var].reg = rename+1; - ra->regs[rename] = mkref(RTMP, var); - bsset(globusage, rename); + ra->m.regs[rename] = var; + globusage = rsset(globusage, rename); *alloc = areg(rename); - ra->regs[reg].bits = 0; + ra->m.free = rsset(ra->m.free, reg); } else { spill(ra, reg, blk, curi); + ra->m.free = rsset(ra->m.free, reg); } take(ra, reg, ref); } @@ -216,6 +268,8 @@ use(struct rega *ra, struct block *blk, int curi, enum op op, int hint, union re { struct instr *ins; uvlong excl = other.t == RREG ? 1ull<<other.i : 0; + struct alloc *alloc; + int reg; if (ref->t == RADDR) { struct addr addr = addrht[ref->i]; @@ -224,167 +278,475 @@ use(struct rega *ra, struct block *blk, int curi, enum op op, int hint, union re *ref = mkaddr(addr); return; } else if (ref->t == RREG) { - assert(!(excl >> hint & 1)); + assert(hint<0 || !rstest(excl, hint)); forcetake(ra, ref->i, *ref, blk, curi); } if (ref->t != RTMP) return; ins = &instrtab[ref->i]; if (!ins->cls) return; - if (!ins->reg) { + if (!(alloc = imap_get(&ra->m.allocs, ref->i)) || alloc->t != AREG) { int s = -1; - if (ra->allocs[ref->i].t == ASTACK) - s = ra->allocs[ref->i].a; + if (alloc && alloc->t == ASTACK) { + /* ensure spill isn't overwritten by dest + * e.g. in R0 = add %s, 7 => can't spill %s to R0 */ + struct instr *use = &instrtab[blk->ins.p[curi]]; + s = alloc->a; + if (use->reg) excl = rsset(excl, use->reg-1); + } assert(ins->op != Ocall); if (ins->r.t == RREG && ins->inplace) excl |= 1ull<<ins->r.i; - if ((hint == -1 || ra->regs[hint].bits) && ins->op == Ocopy && ins->l.t == RREG) + if ((hint == -1 || !rstest(ra->m.free, hint)) && ins->op == Ocopy && ins->l.t == RREG) /* for '%x = copy Rx', hint %x to use Rx */ hint = ins->l.i; - if (hint != -1 && !(excl >> hint & 1) && !ra->regs[hint].bits) { + + if (hint != -1 && !rstest(excl, hint) && rstest(ra->m.free, hint)) { take(ra, hint, *ref); - ins->reg = hint + 1; + reg = hint; } else { - ins->reg = allocreg(ra, insrescls(*ins), *ref, excl) + 1; + reg = allocreg(ra, insrescls(*ins), *ref, excl); } + if (s >= 0) { /* unspill, insert 'store [slot], reg' */ - DBG("-- unspill %%%d s%d -> %s\n", ref->i, s, mctarg->rnames[ins->reg-1]); + DBG("-- unspill %%%d s%d -> %s\n", ref->i, s, mctarg->rnames[reg]); struct instr store = mkinstr(Ostore1 + ilog2(cls2siz[insrescls(*ins)]), 0, mkref(RICON, s*8), - mkref(RREG, ins->reg-1)); + mkref(RREG, reg)); addstkslotref(insertinstr(blk, ++curi, store)); - vpush(&ra->freestk, s); + freestk(ra, s); } + } else { + reg = alloc->a; } - /* do not patch ref if it's cond branch arg, emit() wants to know what instr it is */ + /* do not patch ref if it's used in a phi + * or if it's cond branch arg, emit() wants to know what instr it is */ + if (op != Ophi) if (ref != blk->jmp.arg || blk->jmp.t != Jb) - *ref = mkref(RREG, ins->reg-1); + *ref = mkref(RREG, reg); +} + +static void +doins(struct rega *ra, struct block *blk, struct instr *ins, int curi) +{ + int hint0 = -1, hint1 = -1; + if (ins->op != Ocopy && !imap_get(&ra->m.allocs, ins - instrtab) && ins->skip) { /* unused */ + *ins = mkinstr(Onop, 0,); + return; + } + def(ra, ins, blk, curi); + if (ins->op != Ocall) { + if (ins->op == Ocopy || ins->inplace) hint0 = ins->reg - 1; + if (ins->op == Omove) { + if (ins->l.t == RREG) hint1 = ins->l.i; + /* MOV Rx,Rx is used by isel to indicate a clobber, + * so it should be a def point for Rx but not a use point */ + if (ins->r.bits != ins->l.bits) + use(ra, blk, curi, ins->op, hint1, &ins->r, NOREF); + } else { + if (ins->l.bits) use(ra, blk, curi, ins->op, hint0, &ins->l, ins->r); + if (ins->r.bits) use(ra, blk, curi, ins->op, hint1, &ins->r, NOREF); + } + } else { + struct call *call = &calltab.p[ins->r.i]; + regset rspill = rsminus(rsunion(gpregset, fpregset), rsunion(mctarg->rglob, mctarg->rcallee)); + + dumpallocs("CALL", &ra->m); + if (call->abiret[0].ty.bits) rspill = rsclr(rspill, call->abiret[0].reg); + if (call->abiret[1].ty.bits) rspill = rsclr(rspill, call->abiret[1].reg); + for (int r = 0; rsiter(&r, rspill); ++r) { + spill(ra, r, blk, curi); + } + for (int j = 0; j < call->narg; ++j) { + short reg = call->abiarg[j].reg; + if (reg >= 0) { + forcetake(ra, reg, mkref(RREG, reg), blk, curi); + } + } + + use(ra, blk, curi, ins->op, hint0, &ins->l, NOREF); + } +} + +enum pmstat { PMTOMOVE, PMMOVING, PMDONE }; +static struct pmove { + uchar k; + uchar stat; + struct alloc dst, src; +} pmove[MAXREGS]; +static int npmove; + +static void +pmadd(enum irclass k, struct alloc dst, struct alloc src) +{ + assert(npmove < MAXREGS); + pmove[npmove++] = (struct pmove) { k, PMTOMOVE, dst, src }; +} + +static void +emitmove(enum irclass k, struct alloc dst, struct alloc src, struct block *blk, int curi) +{ + struct instr mv; + if (dst.t == AREG && src.t == AREG) { + insertinstr(blk, curi, mkmove(k, dst.a, src.a)); + } else if (dst.t == ASTACK && src.t == AREG) { + mv = mkinstr(Ostore1+ilog2(cls2siz[k]), 0, mkref(RICON, dst.a), mkref(RREG, src.a)); + addstkslotref(insertinstr(blk, curi, mv)); + } else if (dst.t == AREG && src.t == ASTACK) { + switch (mv.cls = k) { + default: assert(0); + case KI4: mv.op = Oloads4; break; + case KI8: mv.op = Oloadi8; break; + case KPTR: mv.op = targ_64bit ? Oloadi8 : Oloads4; break; + case KF4: mv.op = Oloadf4; break; + case KF8: mv.op = Oloadf8; break; + } + mv.l = mkref(RICON, dst.a); + mv.reg = src.a; + addstkslotref(insertinstr(blk, curi, mv)); + } +} + +static int +pmrec(int i, struct block *blk, int curi, enum irclass *k) +{ + int j, c; + + if (!memcmp(&pmove[i].dst, &pmove[i].src, sizeof pmove->dst)) { + pmove[i].stat = PMDONE; + return -1; + } + + /* widen when necessary */ + assert(kisint(pmove[i].k) == kisint(*k)); + if (cls2siz[pmove[i].k] > cls2siz[*k]) + *k = pmove[i].k; + + for (j = 0; j < npmove; ++j) + if (!memcmp(&pmove[j].dst, &pmove[i].src, sizeof pmove->dst)) { + break; + } + if (j == npmove) goto Done; + switch (pmove[j].stat) { + default: assert(0); + case PMMOVING: + c = j; + Swap: + assert(pmove[i].src.t == AREG && pmove[i].dst.t == AREG); + insertinstr(blk, curi, + mkinstr(Oswap, *k, mkref(RREG, pmove[i].dst.a), mkref(RREG, pmove[i].src.a))); + break; + case PMTOMOVE: + pmove[i].stat = PMMOVING; + c = pmrec(j, blk, curi, k); + if (c == i) { + c = -1; + break; + } else if (c != -1) { + goto Swap; + } + /* fallthru */ + case PMDONE: + Done: + c = -1; + emitmove(*k, pmove[i].dst, pmove[i].src, blk, curi); + break; + } + + pmove[i].stat = PMDONE; + return c; +} + +static void +emitpm(struct block *blk) +{ + int curi = blk->ins.n; + for (int i = 0; i < npmove; ++i) + if (pmove[i].stat == PMTOMOVE) { + pmrec(i, blk, curi, &(enum irclass) { pmove[i].k }); + } +} + +struct bbrm { struct rmap in, out; }; + +static void +lowerphis(struct function *fn, struct bbrm *bbrm, struct block *blk, struct block *suc) +{ + int predno; + struct alloc *alloc; + struct rmap *out = &bbrm[blk->id].out; + struct block *n = NULL; + + if (!blk->s2) n = blk; + + for (predno = 0; predno < suc->npred; ++predno) + if (blkpred(suc, predno) == blk) + break; + assert(predno < suc->npred); + + npmove = 0; + /* ensure phi args go to the same reg as phi with parallel copies */ + for (int i = 0; i < suc->phi.n; ++i) { + struct instr *phi = &instrtab[suc->phi.p[i]]; + union ref *arg = &phitab.p[phi->l.i][predno]; + int regto, regfrom; + + if (arg->t == RREG) continue; + assert(arg->t == RTMP); + DBG("resolve phi @%d, @%d, %%%d <- %%%d\n", blk->id, suc->id, phi - instrtab, arg->i); + if (instrtab[arg->i].reg) { + regfrom = instrtab[arg->i].reg - 1; + DBG(" it had R%d\n", regfrom); + } else { + alloc = imap_get(&out->allocs, arg->i); + assert(alloc && alloc->t == AREG); + regfrom = alloc->a; + DBG(" found R%d\n", regfrom); + instrtab[arg->i].reg = regfrom+1; + } + if (phi->reg) { + regto = phi->reg - 1; + } else { + alloc = imap_get(&out->allocs, phi - instrtab); + assert(alloc && alloc->t == AREG); + regto = alloc->a; + phi->reg = regto+1; + } + DBG(" > phi move %c%d -> %c%d\n", " RS"[AREG], regfrom, " RS"[AREG], regto); + if (!n) n = insertblk(fn, blk, suc); + pmadd(phi->cls, areg(regto), areg(regfrom)); + } + if (n) emitpm(n); +} + +static void +resolve(struct function *fn, struct bbrm *bbrm, struct block *blk, struct block *suc) +{ + ushort var; + struct alloc *alloc, *alloc2; + struct rmap *out = &bbrm[blk->id].out, *in = &bbrm[suc->id].in; + struct rmap *in2 = &bbrm[blk->id].in; + struct block *n = NULL; + + DBG("resolve(@%d, @%d)\n",blk->id, suc->id); + + npmove = 0; + if (!blk->s2) n = blk; + imap_each(&in->allocs, var, alloc) { + if (alloc->t == ADEAD) continue; + if ((alloc2 = imap_get(&out->allocs, var)) && alloc2->t != ADEAD + && memcmp(alloc2, alloc, sizeof *alloc) != 0) { + DBG("resolve @%d, @%d, %%%d\n", blk->id, suc->id, var); + DBG(" > move %c%d -> %c%d\n", " RS"[alloc2->t], alloc2->a, " RS"[alloc->t], alloc->a); + if (!n) { + n = insertblk(fn, blk, suc); + n->id = blk->id; /* use same bbrm */ + } + pmadd(insrescls(instrtab[var]), *alloc, *alloc2); + } + (void)imap_set(&in2->allocs, var, *alloc); + if (!instrtab[var].reg) + instrtab[var].reg = alloc->a + 1; + } + if (n) emitpm(n); + + dumpallocs("in", in); + dumpallocs("out", out); } void regalloc(struct function *fn) { - extern int ninstr; - struct instr *ins; + int id; struct block *last = fn->entry->lprev, *blk; static union ref *stkslotrefsbuf[64]; - struct rega ra = {0}; - - fn->isleaf = 1; - - vinit(&stkslotrefs, stkslotrefsbuf, arraylength(stkslotrefsbuf)); - ra.allocs = xcalloc((ninstr*2 < MAXINSTR ? ninstr*2 : MAXINSTR) * sizeof(struct alloc)); - ra.nfreegpr = mctarg->ngpr - popcnt(mctarg->rglob->u); - ra.nfreefpr = mctarg->nfpr; - for (int i = 0; i < MAXREGS; ++i) { - if (in_range(i, mctarg->fpr0, mctarg->fpr0 + mctarg->nfpr - 1)) - bsset(floatregs, i); - if (bstest(mctarg->rglob, i)) - ra.regs[i] = mkref(RREG, i); + struct bbrm *bbrm; + int nbbrm; + struct rega ra = {fn}; + + /* setup */ + if (!fpregset || !gpregset) { + for (int i = 0; i < MAXREGS; ++i) { + if (in_range(i, mctarg->fpr0, mctarg->fpr0 + mctarg->nfpr - 1)) + fpregset = rsset(fpregset, i); + else if (in_range(i, mctarg->gpr0, mctarg->gpr0 + mctarg->ngpr - 1)) + gpregset = rsset(gpregset, i); + } } - bszero(globusage, 1); + ra.m.blocked = mctarg->rglob; + ra.m.free = rsminus(rsunion(gpregset, fpregset), ra.m.blocked); + memset(ra.m.freestk, 0xFF, sizeof ra.m.freestk); + globusage = 0; fn->stksiz = alignup(fn->stksiz, 8); + fn->isleaf = 1; + vinit(&stkslotrefs, stkslotrefsbuf, arraylength(stkslotrefsbuf)); - /* a dumb linear register allocator that visits instructions physically backwards - * starting at the end of the function, when encountering a use of a new - * temporary, it allocates a register for it. when encountering the definition - * of a temporary, it frees up its register - */ + /* generate copies for phi operands */ + blk = fn->entry; + do { + if (!blk->phi.n) continue; + for (int p = 0; p < blk->npred; ++p) { + struct block *n, *pred = blkpred(blk, p); + if (!pred->s2) { + /* pred only has 1 successor (blk), so insert move directly in it */ + n = pred; + } else { + n = insertblk(fn, pred, blk); + assert(n->jmp.t == Jb && n->s1 == blk); + } + for (int i = 0; i < blk->phi.n; ++i) { + int phi = blk->phi.p[i]; + union ref *args = phitab.p[instrtab[phi].l.i]; + args[p] = insertinstr(n, n->ins.n, mkinstr(Ocopy, instrtab[phi].cls, args[p])); + } + } + } while ((blk = blk->lnext) != fn->entry); + fillblkids(fn); + bbrm = xcalloc(sizeof *bbrm * (nbbrm = fn->nblk)); + + /* visit blocks in reverse, allocating registers in a greedy manner */ blk = last; do { + int curi; + DBG("do @%d\n", blk->id); + memcpy(bbrm[blk->id].out.regs, ra.m.regs, sizeof ra.m.regs); + imap_copy(&bbrm[blk->id].out.allocs, &ra.m.allocs); + dumpallocs("out", &ra.m); + + if (blk->s1 && blk->s1->phi.n) { + /* if the block is a predecessor to some phis, introduce use points + * for the corresponding arguments to each phi */ + struct block *s = blk->s1; + int predno = 0; + for (; predno < s->npred; ++predno) + if (blkpred(s, predno) == blk) + break; + assert(predno < s->npred); + for (int i = s->phi.n - 1; i >= 0; --i) { + struct instr *phi = &instrtab[s->phi.p[i]]; + union ref *arg = &phitab.p[phi->l.i][predno];; + use(&ra, blk, blk->ins.n, Ophi, phi->reg-1, arg, NOREF); + } + } + + curi = blk->ins.n - 1; for (int i = 0; i < 2; ++i) { if (!blk->jmp.arg[i].bits) break; /* do not allocate a reg for a cmp op used a branch argument, since it's a pseudo op */ if (blk->jmp.t == Jb && blk->jmp.arg[i].t == RTMP && oiscmp(instrtab[blk->jmp.arg[i].i].op)) break; - use(&ra, blk, blk->ins.n-1, (blk->jmp.t != Jb) - 1, + use(&ra, blk, curi, (blk->jmp.t != Jb) - 1, blk->jmp.t == Jret ? fn->abiret[i].reg : -1, &blk->jmp.arg[i], blk->jmp.arg[!i]); } - for (int i = blk->ins.n - 1; i >= 0; --i) { - int hint0 = -1, hint1 = -1; - ins = &instrtab[blk->ins.p[i]]; - if (ins->op != Ocopy && !ra.allocs[ins - instrtab].t && ins->skip) { /* unused */ - *ins = mkinstr(Onop, 0,); - continue; - } - def(&ra, ins, blk, i); - if (ins->op != Ocall) { - if (ins->op == Ocopy || ins->inplace) hint0 = ins->reg - 1; - if (ins->op == Omove) { - if (ins->l.t == RREG) hint1 = ins->l.i; - /* MOV Rx,Rx is used by isel to indicate a clobber, - * so it should be a def point for Rx but not a use point */ - if (ins->r.bits != ins->l.bits) - use(&ra, blk, i, ins->op, hint1, &ins->r, NOREF); - } else { - if (ins->r.t == RREG) { - use(&ra, blk, i, ins->op, hint0, &ins->r, NOREF); - use(&ra, blk, i, ins->op, hint0, &ins->l, ins->r); - } else { - if (ins->l.bits) use(&ra, blk, i, ins->op, hint0, &ins->l, ins->r); - if (ins->r.bits) use(&ra, blk, i, ins->op, hint1, &ins->r, NOREF); - } - } - } else { - struct call *call = &calltab.p[ins->r.i]; - struct bitset rspill[1] = {0}; - - fn->isleaf = 0; - - for (int r = mctarg->gpr0; r < mctarg->gpr0 + mctarg->ngpr; ++r) - if (!bstest(mctarg->rglob, r) && !bstest(mctarg->rcallee, r)) - bsset(rspill, r); - for (int r = mctarg->fpr0; r < mctarg->fpr0 + mctarg->nfpr; ++r) - if (!bstest(mctarg->rglob, r) && !bstest(mctarg->rcallee, r)) - bsset(rspill, r); - - if (call->abiret[0].ty.bits) bsclr(rspill, call->abiret[0].reg); - if (call->abiret[1].ty.bits) bsclr(rspill, call->abiret[1].reg); - for (uint r = 0; bsiter(&r, rspill, 1); ++r) { - spill(&ra, r, blk, i); - } - for (int j = 0; j < call->narg; ++j) { - short reg = call->abiarg[j].reg; - if (reg >= 0) { - forcetake(&ra, reg, mkref(RREG, reg), blk, i); - } - } - - use(&ra, blk, i, ins->op, hint0, &ins->l, NOREF); - } - if (ins->op == Ocopy && !ins->reg) - *ins = mkinstr(Onop, 0,); - if (ins->inplace && ins->reg) { - assert(ins->l.t == RREG); - if (ins->reg-1 != ins->l.i) { - /* an in-place operation where the destination does not - * match the first operand, so we need to add a move */ - insertinstr(blk, i, mkmove(insrescls(*ins), ins->reg-1, ins->l.i)); - ins->l.i = ins->reg-1; - } - } + for (; curi >= 0; --curi) { + struct instr *ins = &instrtab[blk->ins.p[curi]]; + if (ins->op == Ocall) fn->isleaf = 0; + doins(&ra, blk, ins, curi); } + for (int i = blk->phi.n - 1; i >= 0; --i) { - union ref *phi; - ins = &instrtab[blk->phi.p[i]]; - assert(ins->op == Ophi); - phi = phitab.p[ins->l.i]; - if (ins->reg) { - /* introduce necessary moves in each pred, - * XXX this doesn't work for backwards branches */ - for (int i = 0; i < blk->npred; ++i) { - struct instr mov = mkinstr(Omove, insrescls(*ins), mkref(RREG, ins->reg-1), phi[i]); - insertinstr(blkpred(blk, i), blkpred(blk, i)->ins.n, mov); - } - } - def(&ra, ins, NULL, 0); + struct instr *phi = &instrtab[blk->phi.p[i]]; + def(&ra, phi, blk, -1); + } + + memcpy(bbrm[blk->id].in.regs, ra.m.regs, sizeof ra.m.regs); + imap_copy(&bbrm[blk->id].in.allocs, &ra.m.allocs);; + } while ((blk = blk->lprev) != last); + + if (1&&ccopt.dbg.r) { + efmt("<< regalloc before resolve\n"); + irdump(fn); + } + + /* resolve allocation mismatches across each edge */ + blk = last = fn->entry->lprev; + do { + if (blk->id < 0) continue; + if (blk->s1) resolve(fn, bbrm, blk, blk->s1); + if (blk->s2) resolve(fn, bbrm, blk, blk->s2); + } while ((blk = blk->lprev) != last); + + /* parallel copies for each phi */ + blk = last = fn->entry->lprev; + do { + if (blk->id < 0) continue; + for (int i = 0; i < blk->npred; ++i) { + if (blkpred(blk, i)->id < 0) continue; + lowerphis(fn, bbrm, blkpred(blk, i), blk); } } while ((blk = blk->lprev) != last); - do vfree(&blk->phi); while ((blk = blk->lprev) != last); + /* final cleanup */ + id = 0; + blk = fn->entry; + do { + bool allnops = 1; + blk->id = id++; + for (int i = 0; i < blk->ins.n; ++i) { + struct instr *ins = &instrtab[blk->ins.p[i]]; + if (!ins->reg && insrescls(*ins) && ins->op != Omove && !oiscmp(ins->op)) { + /* dead */ + Nop: + *ins = mkinstr(Onop,0,); + } else if (ins->op == Omove && ins->r.t == RREG && ins->l.i == ins->r.i) { + goto Nop; + } else if (ins->op == Ocopy && ins->l.t == RREG && ins->reg-1 == ins->l.i) { + goto Nop; + } else if (ins->inplace && ins->l.t == RREG && ins->reg-1 != ins->l.i) { + /* fixup in-place (two-address) instructions */ + allnops = 0; + insertinstr(blk, i++, mkmove(ins->cls, ins->reg-1, ins->l.i)); + ins->l.i = ins->reg-1; + } else if (ins->op != Onop) allnops = 0; + } + + /* remove no-op blocks with only 1 pred + 1 succ OR 1 pred w/ 1 succ + 0 succ*/ + if (allnops && blk->npred == 1 && !blk->s2) { + struct block *p = blkpred(blk, 0); + if (!p->s2 && !blk->s1) { + /* simplify: + * + * @p: + * ... + * b @blk + * @blk: + * NOP + * ret + */ + assert(p->s1 == blk); + p->jmp.t = Jret; + p->s1 = NULL; + DelBlk: + vfree(&blk->phi); + vfree(&blk->ins); + blk->lnext->lprev = blk->lprev; + blk->lprev->lnext = blk->lnext; + --id; + } else if (blk->s1) { + /* simplify: + * + * @p: + * ... + * b %x, @blk, @other + * @blk: + * NOP + * b @next + */ + struct block *next = blk->s1; + if (p->s1 == blk) p->s1 = next; + else if (p->s2 == blk) p->s2 = next; + else continue; + goto DelBlk; + } + } + } while ((blk = blk->lnext) != fn->entry); + fn->nblk = id; + + blk = fn->entry; + do vfree(&blk->phi); while ((blk = blk->lnext) != fn->entry); fn->stksiz += ra.maxstk*8; if (fn->stksiz > 1<<24) error(NULL, "'%s' stack frame too big", fn->name); @@ -396,9 +758,13 @@ regalloc(struct function *fn) efmt("<< After regalloc >>\n"); irdump(fn); } - bscopy(fn->regusage, globusage, 1); - free(ra.allocs); - vfree(&ra.freestk); + for (int i = 0; i < nbbrm; ++i) { + imap_free(&bbrm[i].in.allocs); + imap_free(&bbrm[i].out.allocs); + } + imap_free(&ra.m.allocs); + free(bbrm); + fn->regusage = globusage; } /* vim:set ts=3 sw=3 expandtab: */ |