diff options
Diffstat (limited to 'regalloc.c')
| -rw-r--r-- | regalloc.c | 746 |
1 files changed, 556 insertions, 190 deletions
@@ -1,9 +1,12 @@ +#include "common.h" #include "ir.h" -static struct bitset globusage[1]; -static struct bitset floatregs[1]; +/* Implements reverse linear register allocation, quite ad-hoc right now */ -#define isfpr(reg) bstest(floatregs, (reg)) +static regset gpregset, fpregset; +static regset globusage; + +#define isfpr(reg) in_range((reg), mctarg->fpr0, mctarg->fpr0 + mctarg->nfpr - 1) #define isgpr(reg) (!isfpr(reg)) enum { ADEAD, AREG, ASTACK } ; @@ -12,40 +15,91 @@ struct alloc { ushort t : 2, a : 14; }; #define areg(r) ((struct alloc) { AREG, (r) }) #define astack(s) ((struct alloc) { ASTACK, (s) }) +enum { MAXSPILL = 128 }; + +struct rmap { + regset free; /* free registers */ + regset blocked; /* registers allocated to themselves (from e.g. isel reg constraints, abi) */ + ushort regs[MAXREGS]; /* map reg -> temp holding reg */ + struct bitset freestk[BSSIZE(MAXSPILL)]; /* free stack slots */ + imap_of(struct alloc) allocs; /* map tmpidx -> allocation */ +}; + struct rega { - union ref regs[MAXREGS]; /* map reg -> value holding reg */ - struct alloc *allocs; /* map tmpidx -> allocation */ - vec_of(ushort) freestk; /* available stack slots */ - int nfreegpr, nfreefpr; + struct function *fn; + struct rmap m; int maxstk, stktop; }; static vec_of(union ref *) stkslotrefs; -static void +static union ref addstkslotref(union ref inst) { vpush(&stkslotrefs, &instrtab[inst.i].l); + return inst; } static int allocreg(struct rega *ra, enum irclass cls, union ref ref, uvlong excl); #if 0 #define DBG efmt +static void +dumpallocs(const char *s, struct rmap *m) +{ + int var; + struct alloc *alloc; + + DBG("%s map [", s); + imap_each(&m->allocs, var, alloc) { + if (!alloc->t) continue; + DBG(" %%%d -> %c%d,", var, "XRS"[alloc->t], alloc->a); + } + DBG("]\n"); +} #else #define DBG(...) ((void)0) +#define dumpallocs(...) #endif static void freereg(struct rega *ra, int r) { - ra->regs[r] = NOREF; - if (isfpr(r)) ++ra->nfreefpr; - else ++ra->nfreegpr; + ra->m.free = rsset(ra->m.free, r); + ra->m.blocked = rsclr(ra->m.blocked, r); } static void take(struct rega *ra, int reg, union ref ref); +static int +allocstk(struct rega *ra, int var) +{ + int s = -1; + + for (int i = 0; i < BSSIZE(MAXSPILL); ++i) { + if (~ra->m.freestk[i].u == 0) continue; + s = i*64 + lowestsetbit(ra->m.freestk[i].u); + } + if (s != -1) { + bsclr(ra->m.freestk, s); + if (ra->stktop < s) ra->stktop = s+1; + } else { + s = ra->stktop++; + } + if (ra->maxstk < s+1) ra->maxstk = s+1; + (void)imap_set(&ra->m.allocs, var, astack(s)); + return s; +} + +static void +freestk(struct rega *ra, int slot) +{ + if (slot < MAXSPILL) + bsset(ra->m.freestk, slot); + else if (slot == ra->stktop - 1) + --ra->stktop; +} + static void def(struct rega *ra, struct instr *ins, struct block *blk, int curi) { @@ -54,14 +108,16 @@ def(struct rega *ra, struct instr *ins, struct block *blk, int curi) if (ins->op != Omove && ins->op != Ocall) { var = ins - instrtab; DBG("def %%%d\n",var); - alloc = &ra->allocs[var]; - if (alloc->t == ADEAD) { + alloc = imap_get(&ra->m.allocs, var); + if (!alloc || alloc->t == ADEAD) { return; } else if (alloc->t == AREG) { reg = alloc->a; DBG("-- free %s for %%%d\n", mctarg->rnames[alloc->a], var); - assert(ra->regs[reg].bits == mkref(RTMP, var).bits); - } else if (alloc->t == ASTACK) { + assert(!rstest(ra->m.free, reg)); + assert(ra->m.regs[reg] == var); + ins->reg = reg+1; + } else if (alloc->t == ASTACK) { /* unspill, insert 'store [slot], reg' */ struct alloc astk = *alloc; struct instr store; @@ -69,26 +125,27 @@ def(struct rega *ra, struct instr *ins, struct block *blk, int curi) if ((ins->op == Ocopy || ins->inplace) && ins->l.t == RREG) { int hint = ins->l.i; - if (!ra->regs[hint].bits) { + if (rstest(ra->m.free, hint)) { take(ra, reg = hint, mkref(RTMP, var)); - assert(ra->regs[reg].bits == mkref(RTMP, var).bits); + assert(ra->m.regs[reg] == var && !rstest(ra->m.free, hint)); } } if (reg < 0) reg = allocreg(ra, insrescls(*ins), mkref(RTMP, var), 0); store = mkinstr(Ostore1 + ilog2(cls2siz[insrescls(*ins)]), 0, mkref(RICON, astk.a*8), mkref(RREG, reg)); - DBG("-- unspill %%%d s%d -> %s\n", var, astk.a, mctarg->rnames[ins->reg+1]); + DBG("-- unspill %%%d s%d -> %s\n", var, astk.a, mctarg->rnames[reg]); addstkslotref(insertinstr(blk, ++curi, store)); - vpush(&ra->freestk, astk.a); + freestk(ra, astk.a); ins->reg = reg+1; def(ra, ins, blk, curi); /* and free the reg again */ return; } - *alloc = afree(); + if (alloc) *alloc = afree(); } else if (ins->op == Omove) { + assert(ins->l.t == RREG); /* move to a register */ + assert(rstest(ra->m.blocked, ins->l.i)); reg = ins->l.i; - assert(ins->l.t == RREG); DBG("-- free %s\n", mctarg->rnames[ins->l.i]); } else { struct call *call = &calltab.p[ins->r.i]; @@ -104,68 +161,58 @@ def(struct rega *ra, struct instr *ins, struct block *blk, int curi) static void take(struct rega *ra, int reg, union ref ref) { DBG("-- take %s for %c%d\n", mctarg->rnames[reg], "R%"[ref.t==RTMP], ref.i); - assert(!ra->regs[reg].bits && "taken"); - if (ref.t == RTMP) - ra->allocs[ref.i] = areg(reg); - ra->regs[reg] = ref; - assert(ra->regs[reg].bits); - bsset(globusage, reg); - if (isfpr(reg)) --ra->nfreefpr; - else --ra->nfreegpr; + assert(rstest(ra->m.free, reg) && "taken"); + if (ref.t == RTMP) { + (void)imap_set(&ra->m.allocs, ref.i, areg(reg)); + ra->m.regs[reg] = ref.i; + } else { + ra->m.blocked = rsset(ra->m.blocked, reg); + } + ra->m.free = rsclr(ra->m.free, reg); + globusage = rsset(globusage, reg); } static int allocreg(struct rega *ra, enum irclass cls, union ref ref, uvlong excl) { - int r0, rend, reg; + regset rs; + int reg; assert(cls); if (kisint(cls)) { - r0 = mctarg->gpr0; - rend = mctarg->gpr0 + mctarg->ngpr; + rs = gpregset; } else if (kisflt(cls)) { - r0 = mctarg->fpr0; - rend = mctarg->fpr0 + mctarg->nfpr; + rs = fpregset; } else assert(0); - for (reg = r0; reg < rend; ++reg) { - if (bstest(mctarg->rglob, reg)) continue; - if (!(excl >> reg & 1) && !ra->regs[reg].bits) { - take(ra, reg, ref); - return reg; - } - } - assert(!"no more reg"); -} - -static int -allocstk(struct rega *ra, int var) -{ - int s; - - if (ra->freestk.n) { - s = ra->freestk.p[--ra->freestk.n]; - ra->allocs[var] = astack(s); - } else { - if (ra->stktop+1 >= ra->maxstk) ra->maxstk = ra->stktop+1; - s = ra->stktop++; - } - ra->allocs[var] = astack(s); - return s; + rs = rsminus(rs, mctarg->rglob); + rs = rsand(rs, ra->m.free); + rs = rsminus(rs, excl); + + assert(rs != 0 && "no more reg"); + if (rsminus(rs, mctarg->rcallee) != 0) + reg = lowestsetbit(rsminus(rs, mctarg->rcallee)); + else + reg = lowestsetbit(rs); + take(ra, reg, ref); + return reg; } static void spill(struct rega *ra, int reg, struct block *blk, int curi) { int var, s; struct instr load; + struct alloc *alloc; - if (!ra->regs[reg].bits) return; - var = ra->regs[reg].i; - assert(ra->regs[reg].bits == RTMP && *(ushort *)&ra->allocs[var] == *(ushort *)&areg(reg)); + if (rstest(ra->m.free, reg)) return; + var = ra->m.regs[reg]; + assert(!rstest(ra->m.blocked, reg)); + assert((alloc = imap_get(&ra->m.allocs, var)) && !memcmp(alloc, &areg(reg), sizeof *alloc)); s = allocstk(ra, var); DBG("-- spill %%%d %s -> s%d\n", var, mctarg->rnames[reg], s); instrtab[var].reg = 0; /* insert 'reg = load [slot]' */ - load = mkinstr(Oloads1 + 2*ilog2(cls2siz[insrescls(instrtab[var])]), insrescls(instrtab[var]), mkref(RICON, s*8)); + load = mkinstr(Oloads1 + 2*ilog2(cls2siz[insrescls(instrtab[var])]), + insrescls(instrtab[var]), mkref(RICON, s*8)); load.reg = reg+1; addstkslotref(insertinstr(blk, ++curi, load)); freereg(ra, reg); @@ -178,34 +225,39 @@ forcetake(struct rega *ra, int reg, union ref ref, struct block *blk, int curi) int var; struct alloc *alloc; - if (ra->regs[reg].bits == ref.bits) return; - if (!ra->regs[reg].bits) { + if (ref.t == RREG && rstest(ra->m.blocked, ref.i)) { + assert(!rstest(ra->m.free, reg)); + return; + } + if (ref.t == RTMP && !rstest(ra->m.free, reg) && ra->m.regs[reg] == ref.i) return; + if (rstest(ra->m.free, reg)) { take(ra, reg, ref); return; } - assert(ra->regs[reg].bits == RTMP); - var = ra->regs[reg].i; - alloc = &ra->allocs[var]; - assert(alloc->a == reg); + assert(!rstest(ra->m.blocked, ref.i)); + var = ra->m.regs[reg]; + alloc = imap_get(&ra->m.allocs, var); + assert(alloc && alloc->a == reg); *alloc = afree(); /* try to move temp to another register */ - if (isgpr(reg) ? ra->nfreegpr > 0 : ra->nfreefpr > 0) { + if (rsand(ra->m.free, isgpr(reg) ? gpregset : fpregset) != 0) { /* the register of the current instruction (if any) was already free'd (by def), so * we need to explictly exclude it from the pool of rename registers * e.g.: given 'R0 = copy R1'; if R1 => %x, we need to prevent renaming %x => R0 */ - uvlong excl = instrtab[blk->ins.p[curi]].reg; - int rename = allocreg(ra, isgpr(reg) ? KI4 : KF4, ra->regs[reg], excl ? 1ull<<(excl-1) : 0); + regset excl = instrtab[blk->ins.p[curi]].reg; + int rename = allocreg(ra, isgpr(reg) ? KI4 : KF4, mkref(RTMP, ra->m.regs[reg]), + excl ? rsset(0, excl-1) : 0); if (ccopt.dbg.r)DBG("-- rename %%%d %s -> %s\n", var, mctarg->rnames[reg], mctarg->rnames[rename]); /* introduce move from rename -> original (since we allocate backwards) */ insertinstr(blk, ++curi, mkmove(insrescls(instrtab[var]), reg, rename)); - instrtab[var].reg = rename+1; - ra->regs[rename] = mkref(RTMP, var); - bsset(globusage, rename); + ra->m.regs[rename] = var; + globusage = rsset(globusage, rename); *alloc = areg(rename); - ra->regs[reg].bits = 0; + ra->m.free = rsset(ra->m.free, reg); } else { spill(ra, reg, blk, curi); + ra->m.free = rsset(ra->m.free, reg); } take(ra, reg, ref); } @@ -216,6 +268,8 @@ use(struct rega *ra, struct block *blk, int curi, enum op op, int hint, union re { struct instr *ins; uvlong excl = other.t == RREG ? 1ull<<other.i : 0; + struct alloc *alloc; + int reg; if (ref->t == RADDR) { struct addr addr = addrht[ref->i]; @@ -224,167 +278,475 @@ use(struct rega *ra, struct block *blk, int curi, enum op op, int hint, union re *ref = mkaddr(addr); return; } else if (ref->t == RREG) { - assert(!(excl >> hint & 1)); + assert(hint<0 || !rstest(excl, hint)); forcetake(ra, ref->i, *ref, blk, curi); } if (ref->t != RTMP) return; ins = &instrtab[ref->i]; if (!ins->cls) return; - if (!ins->reg) { + if (!(alloc = imap_get(&ra->m.allocs, ref->i)) || alloc->t != AREG) { int s = -1; - if (ra->allocs[ref->i].t == ASTACK) - s = ra->allocs[ref->i].a; + if (alloc && alloc->t == ASTACK) { + /* ensure spill isn't overwritten by dest + * e.g. in R0 = add %s, 7 => can't spill %s to R0 */ + struct instr *use = &instrtab[blk->ins.p[curi]]; + s = alloc->a; + if (use->reg) excl = rsset(excl, use->reg-1); + } assert(ins->op != Ocall); if (ins->r.t == RREG && ins->inplace) excl |= 1ull<<ins->r.i; - if ((hint == -1 || ra->regs[hint].bits) && ins->op == Ocopy && ins->l.t == RREG) + if ((hint == -1 || !rstest(ra->m.free, hint)) && ins->op == Ocopy && ins->l.t == RREG) /* for '%x = copy Rx', hint %x to use Rx */ hint = ins->l.i; - if (hint != -1 && !(excl >> hint & 1) && !ra->regs[hint].bits) { + + if (hint != -1 && !rstest(excl, hint) && rstest(ra->m.free, hint)) { take(ra, hint, *ref); - ins->reg = hint + 1; + reg = hint; } else { - ins->reg = allocreg(ra, insrescls(*ins), *ref, excl) + 1; + reg = allocreg(ra, insrescls(*ins), *ref, excl); } + if (s >= 0) { /* unspill, insert 'store [slot], reg' */ - DBG("-- unspill %%%d s%d -> %s\n", ref->i, s, mctarg->rnames[ins->reg-1]); + DBG("-- unspill %%%d s%d -> %s\n", ref->i, s, mctarg->rnames[reg]); struct instr store = mkinstr(Ostore1 + ilog2(cls2siz[insrescls(*ins)]), 0, mkref(RICON, s*8), - mkref(RREG, ins->reg-1)); + mkref(RREG, reg)); addstkslotref(insertinstr(blk, ++curi, store)); - vpush(&ra->freestk, s); + freestk(ra, s); } + } else { + reg = alloc->a; } - /* do not patch ref if it's cond branch arg, emit() wants to know what instr it is */ + /* do not patch ref if it's used in a phi + * or if it's cond branch arg, emit() wants to know what instr it is */ + if (op != Ophi) if (ref != blk->jmp.arg || blk->jmp.t != Jb) - *ref = mkref(RREG, ins->reg-1); + *ref = mkref(RREG, reg); +} + +static void +doins(struct rega *ra, struct block *blk, struct instr *ins, int curi) +{ + int hint0 = -1, hint1 = -1; + if (ins->op != Ocopy && !imap_get(&ra->m.allocs, ins - instrtab) && ins->skip) { /* unused */ + *ins = mkinstr(Onop, 0,); + return; + } + def(ra, ins, blk, curi); + if (ins->op != Ocall) { + if (ins->op == Ocopy || ins->inplace) hint0 = ins->reg - 1; + if (ins->op == Omove) { + if (ins->l.t == RREG) hint1 = ins->l.i; + /* MOV Rx,Rx is used by isel to indicate a clobber, + * so it should be a def point for Rx but not a use point */ + if (ins->r.bits != ins->l.bits) + use(ra, blk, curi, ins->op, hint1, &ins->r, NOREF); + } else { + if (ins->l.bits) use(ra, blk, curi, ins->op, hint0, &ins->l, ins->r); + if (ins->r.bits) use(ra, blk, curi, ins->op, hint1, &ins->r, NOREF); + } + } else { + struct call *call = &calltab.p[ins->r.i]; + regset rspill = rsminus(rsunion(gpregset, fpregset), rsunion(mctarg->rglob, mctarg->rcallee)); + + dumpallocs("CALL", &ra->m); + if (call->abiret[0].ty.bits) rspill = rsclr(rspill, call->abiret[0].reg); + if (call->abiret[1].ty.bits) rspill = rsclr(rspill, call->abiret[1].reg); + for (int r = 0; rsiter(&r, rspill); ++r) { + spill(ra, r, blk, curi); + } + for (int j = 0; j < call->narg; ++j) { + short reg = call->abiarg[j].reg; + if (reg >= 0) { + forcetake(ra, reg, mkref(RREG, reg), blk, curi); + } + } + + use(ra, blk, curi, ins->op, hint0, &ins->l, NOREF); + } +} + +enum pmstat { PMTOMOVE, PMMOVING, PMDONE }; +static struct pmove { + uchar k; + uchar stat; + struct alloc dst, src; +} pmove[MAXREGS]; +static int npmove; + +static void +pmadd(enum irclass k, struct alloc dst, struct alloc src) +{ + assert(npmove < MAXREGS); + pmove[npmove++] = (struct pmove) { k, PMTOMOVE, dst, src }; +} + +static void +emitmove(enum irclass k, struct alloc dst, struct alloc src, struct block *blk, int curi) +{ + struct instr mv; + if (dst.t == AREG && src.t == AREG) { + insertinstr(blk, curi, mkmove(k, dst.a, src.a)); + } else if (dst.t == ASTACK && src.t == AREG) { + mv = mkinstr(Ostore1+ilog2(cls2siz[k]), 0, mkref(RICON, dst.a), mkref(RREG, src.a)); + addstkslotref(insertinstr(blk, curi, mv)); + } else if (dst.t == AREG && src.t == ASTACK) { + switch (mv.cls = k) { + default: assert(0); + case KI4: mv.op = Oloads4; break; + case KI8: mv.op = Oloadi8; break; + case KPTR: mv.op = targ_64bit ? Oloadi8 : Oloads4; break; + case KF4: mv.op = Oloadf4; break; + case KF8: mv.op = Oloadf8; break; + } + mv.l = mkref(RICON, dst.a); + mv.reg = src.a; + addstkslotref(insertinstr(blk, curi, mv)); + } +} + +static int +pmrec(int i, struct block *blk, int curi, enum irclass *k) +{ + int j, c; + + if (!memcmp(&pmove[i].dst, &pmove[i].src, sizeof pmove->dst)) { + pmove[i].stat = PMDONE; + return -1; + } + + /* widen when necessary */ + assert(kisint(pmove[i].k) == kisint(*k)); + if (cls2siz[pmove[i].k] > cls2siz[*k]) + *k = pmove[i].k; + + for (j = 0; j < npmove; ++j) + if (!memcmp(&pmove[j].dst, &pmove[i].src, sizeof pmove->dst)) { + break; + } + if (j == npmove) goto Done; + switch (pmove[j].stat) { + default: assert(0); + case PMMOVING: + c = j; + Swap: + assert(pmove[i].src.t == AREG && pmove[i].dst.t == AREG); + insertinstr(blk, curi, + mkinstr(Oswap, *k, mkref(RREG, pmove[i].dst.a), mkref(RREG, pmove[i].src.a))); + break; + case PMTOMOVE: + pmove[i].stat = PMMOVING; + c = pmrec(j, blk, curi, k); + if (c == i) { + c = -1; + break; + } else if (c != -1) { + goto Swap; + } + /* fallthru */ + case PMDONE: + Done: + c = -1; + emitmove(*k, pmove[i].dst, pmove[i].src, blk, curi); + break; + } + + pmove[i].stat = PMDONE; + return c; +} + +static void +emitpm(struct block *blk) +{ + int curi = blk->ins.n; + for (int i = 0; i < npmove; ++i) + if (pmove[i].stat == PMTOMOVE) { + pmrec(i, blk, curi, &(enum irclass) { pmove[i].k }); + } +} + +struct bbrm { struct rmap in, out; }; + +static void +lowerphis(struct function *fn, struct bbrm *bbrm, struct block *blk, struct block *suc) +{ + int predno; + struct alloc *alloc; + struct rmap *out = &bbrm[blk->id].out; + struct block *n = NULL; + + if (!blk->s2) n = blk; + + for (predno = 0; predno < suc->npred; ++predno) + if (blkpred(suc, predno) == blk) + break; + assert(predno < suc->npred); + + npmove = 0; + /* ensure phi args go to the same reg as phi with parallel copies */ + for (int i = 0; i < suc->phi.n; ++i) { + struct instr *phi = &instrtab[suc->phi.p[i]]; + union ref *arg = &phitab.p[phi->l.i][predno]; + int regto, regfrom; + + if (arg->t == RREG) continue; + assert(arg->t == RTMP); + DBG("resolve phi @%d, @%d, %%%d <- %%%d\n", blk->id, suc->id, phi - instrtab, arg->i); + if (instrtab[arg->i].reg) { + regfrom = instrtab[arg->i].reg - 1; + DBG(" it had R%d\n", regfrom); + } else { + alloc = imap_get(&out->allocs, arg->i); + assert(alloc && alloc->t == AREG); + regfrom = alloc->a; + DBG(" found R%d\n", regfrom); + instrtab[arg->i].reg = regfrom+1; + } + if (phi->reg) { + regto = phi->reg - 1; + } else { + alloc = imap_get(&out->allocs, phi - instrtab); + assert(alloc && alloc->t == AREG); + regto = alloc->a; + phi->reg = regto+1; + } + DBG(" > phi move %c%d -> %c%d\n", " RS"[AREG], regfrom, " RS"[AREG], regto); + if (!n) n = insertblk(fn, blk, suc); + pmadd(phi->cls, areg(regto), areg(regfrom)); + } + if (n) emitpm(n); +} + +static void +resolve(struct function *fn, struct bbrm *bbrm, struct block *blk, struct block *suc) +{ + ushort var; + struct alloc *alloc, *alloc2; + struct rmap *out = &bbrm[blk->id].out, *in = &bbrm[suc->id].in; + struct rmap *in2 = &bbrm[blk->id].in; + struct block *n = NULL; + + DBG("resolve(@%d, @%d)\n",blk->id, suc->id); + + npmove = 0; + if (!blk->s2) n = blk; + imap_each(&in->allocs, var, alloc) { + if (alloc->t == ADEAD) continue; + if ((alloc2 = imap_get(&out->allocs, var)) && alloc2->t != ADEAD + && memcmp(alloc2, alloc, sizeof *alloc) != 0) { + DBG("resolve @%d, @%d, %%%d\n", blk->id, suc->id, var); + DBG(" > move %c%d -> %c%d\n", " RS"[alloc2->t], alloc2->a, " RS"[alloc->t], alloc->a); + if (!n) { + n = insertblk(fn, blk, suc); + n->id = blk->id; /* use same bbrm */ + } + pmadd(insrescls(instrtab[var]), *alloc, *alloc2); + } + (void)imap_set(&in2->allocs, var, *alloc); + if (!instrtab[var].reg) + instrtab[var].reg = alloc->a + 1; + } + if (n) emitpm(n); + + dumpallocs("in", in); + dumpallocs("out", out); } void regalloc(struct function *fn) { - extern int ninstr; - struct instr *ins; + int id; struct block *last = fn->entry->lprev, *blk; static union ref *stkslotrefsbuf[64]; - struct rega ra = {0}; - - fn->isleaf = 1; - - vinit(&stkslotrefs, stkslotrefsbuf, arraylength(stkslotrefsbuf)); - ra.allocs = xcalloc((ninstr*2 < MAXINSTR ? ninstr*2 : MAXINSTR) * sizeof(struct alloc)); - ra.nfreegpr = mctarg->ngpr - popcnt(mctarg->rglob->u); - ra.nfreefpr = mctarg->nfpr; - for (int i = 0; i < MAXREGS; ++i) { - if (in_range(i, mctarg->fpr0, mctarg->fpr0 + mctarg->nfpr - 1)) - bsset(floatregs, i); - if (bstest(mctarg->rglob, i)) - ra.regs[i] = mkref(RREG, i); + struct bbrm *bbrm; + int nbbrm; + struct rega ra = {fn}; + + /* setup */ + if (!fpregset || !gpregset) { + for (int i = 0; i < MAXREGS; ++i) { + if (in_range(i, mctarg->fpr0, mctarg->fpr0 + mctarg->nfpr - 1)) + fpregset = rsset(fpregset, i); + else if (in_range(i, mctarg->gpr0, mctarg->gpr0 + mctarg->ngpr - 1)) + gpregset = rsset(gpregset, i); + } } - bszero(globusage, 1); + ra.m.blocked = mctarg->rglob; + ra.m.free = rsminus(rsunion(gpregset, fpregset), ra.m.blocked); + memset(ra.m.freestk, 0xFF, sizeof ra.m.freestk); + globusage = 0; fn->stksiz = alignup(fn->stksiz, 8); + fn->isleaf = 1; + vinit(&stkslotrefs, stkslotrefsbuf, arraylength(stkslotrefsbuf)); - /* a dumb linear register allocator that visits instructions physically backwards - * starting at the end of the function, when encountering a use of a new - * temporary, it allocates a register for it. when encountering the definition - * of a temporary, it frees up its register - */ + /* generate copies for phi operands */ + blk = fn->entry; + do { + if (!blk->phi.n) continue; + for (int p = 0; p < blk->npred; ++p) { + struct block *n, *pred = blkpred(blk, p); + if (!pred->s2) { + /* pred only has 1 successor (blk), so insert move directly in it */ + n = pred; + } else { + n = insertblk(fn, pred, blk); + assert(n->jmp.t == Jb && n->s1 == blk); + } + for (int i = 0; i < blk->phi.n; ++i) { + int phi = blk->phi.p[i]; + union ref *args = phitab.p[instrtab[phi].l.i]; + args[p] = insertinstr(n, n->ins.n, mkinstr(Ocopy, instrtab[phi].cls, args[p])); + } + } + } while ((blk = blk->lnext) != fn->entry); + fillblkids(fn); + bbrm = xcalloc(sizeof *bbrm * (nbbrm = fn->nblk)); + + /* visit blocks in reverse, allocating registers in a greedy manner */ blk = last; do { + int curi; + DBG("do @%d\n", blk->id); + memcpy(bbrm[blk->id].out.regs, ra.m.regs, sizeof ra.m.regs); + imap_copy(&bbrm[blk->id].out.allocs, &ra.m.allocs); + dumpallocs("out", &ra.m); + + if (blk->s1 && blk->s1->phi.n) { + /* if the block is a predecessor to some phis, introduce use points + * for the corresponding arguments to each phi */ + struct block *s = blk->s1; + int predno = 0; + for (; predno < s->npred; ++predno) + if (blkpred(s, predno) == blk) + break; + assert(predno < s->npred); + for (int i = s->phi.n - 1; i >= 0; --i) { + struct instr *phi = &instrtab[s->phi.p[i]]; + union ref *arg = &phitab.p[phi->l.i][predno];; + use(&ra, blk, blk->ins.n, Ophi, phi->reg-1, arg, NOREF); + } + } + + curi = blk->ins.n - 1; for (int i = 0; i < 2; ++i) { if (!blk->jmp.arg[i].bits) break; /* do not allocate a reg for a cmp op used a branch argument, since it's a pseudo op */ if (blk->jmp.t == Jb && blk->jmp.arg[i].t == RTMP && oiscmp(instrtab[blk->jmp.arg[i].i].op)) break; - use(&ra, blk, blk->ins.n-1, (blk->jmp.t != Jb) - 1, + use(&ra, blk, curi, (blk->jmp.t != Jb) - 1, blk->jmp.t == Jret ? fn->abiret[i].reg : -1, &blk->jmp.arg[i], blk->jmp.arg[!i]); } - for (int i = blk->ins.n - 1; i >= 0; --i) { - int hint0 = -1, hint1 = -1; - ins = &instrtab[blk->ins.p[i]]; - if (ins->op != Ocopy && !ra.allocs[ins - instrtab].t && ins->skip) { /* unused */ - *ins = mkinstr(Onop, 0,); - continue; - } - def(&ra, ins, blk, i); - if (ins->op != Ocall) { - if (ins->op == Ocopy || ins->inplace) hint0 = ins->reg - 1; - if (ins->op == Omove) { - if (ins->l.t == RREG) hint1 = ins->l.i; - /* MOV Rx,Rx is used by isel to indicate a clobber, - * so it should be a def point for Rx but not a use point */ - if (ins->r.bits != ins->l.bits) - use(&ra, blk, i, ins->op, hint1, &ins->r, NOREF); - } else { - if (ins->r.t == RREG) { - use(&ra, blk, i, ins->op, hint0, &ins->r, NOREF); - use(&ra, blk, i, ins->op, hint0, &ins->l, ins->r); - } else { - if (ins->l.bits) use(&ra, blk, i, ins->op, hint0, &ins->l, ins->r); - if (ins->r.bits) use(&ra, blk, i, ins->op, hint1, &ins->r, NOREF); - } - } - } else { - struct call *call = &calltab.p[ins->r.i]; - struct bitset rspill[1] = {0}; - - fn->isleaf = 0; - - for (int r = mctarg->gpr0; r < mctarg->gpr0 + mctarg->ngpr; ++r) - if (!bstest(mctarg->rglob, r) && !bstest(mctarg->rcallee, r)) - bsset(rspill, r); - for (int r = mctarg->fpr0; r < mctarg->fpr0 + mctarg->nfpr; ++r) - if (!bstest(mctarg->rglob, r) && !bstest(mctarg->rcallee, r)) - bsset(rspill, r); - - if (call->abiret[0].ty.bits) bsclr(rspill, call->abiret[0].reg); - if (call->abiret[1].ty.bits) bsclr(rspill, call->abiret[1].reg); - for (uint r = 0; bsiter(&r, rspill, 1); ++r) { - spill(&ra, r, blk, i); - } - for (int j = 0; j < call->narg; ++j) { - short reg = call->abiarg[j].reg; - if (reg >= 0) { - forcetake(&ra, reg, mkref(RREG, reg), blk, i); - } - } - - use(&ra, blk, i, ins->op, hint0, &ins->l, NOREF); - } - if (ins->op == Ocopy && !ins->reg) - *ins = mkinstr(Onop, 0,); - if (ins->inplace && ins->reg) { - assert(ins->l.t == RREG); - if (ins->reg-1 != ins->l.i) { - /* an in-place operation where the destination does not - * match the first operand, so we need to add a move */ - insertinstr(blk, i, mkmove(insrescls(*ins), ins->reg-1, ins->l.i)); - ins->l.i = ins->reg-1; - } - } + for (; curi >= 0; --curi) { + struct instr *ins = &instrtab[blk->ins.p[curi]]; + if (ins->op == Ocall) fn->isleaf = 0; + doins(&ra, blk, ins, curi); } + for (int i = blk->phi.n - 1; i >= 0; --i) { - union ref *phi; - ins = &instrtab[blk->phi.p[i]]; - assert(ins->op == Ophi); - phi = phitab.p[ins->l.i]; - if (ins->reg) { - /* introduce necessary moves in each pred, - * XXX this doesn't work for backwards branches */ - for (int i = 0; i < blk->npred; ++i) { - struct instr mov = mkinstr(Omove, insrescls(*ins), mkref(RREG, ins->reg-1), phi[i]); - insertinstr(blkpred(blk, i), blkpred(blk, i)->ins.n, mov); - } - } - def(&ra, ins, NULL, 0); + struct instr *phi = &instrtab[blk->phi.p[i]]; + def(&ra, phi, blk, -1); + } + + memcpy(bbrm[blk->id].in.regs, ra.m.regs, sizeof ra.m.regs); + imap_copy(&bbrm[blk->id].in.allocs, &ra.m.allocs);; + } while ((blk = blk->lprev) != last); + + if (1&&ccopt.dbg.r) { + efmt("<< regalloc before resolve\n"); + irdump(fn); + } + + /* resolve allocation mismatches across each edge */ + blk = last = fn->entry->lprev; + do { + if (blk->id < 0) continue; + if (blk->s1) resolve(fn, bbrm, blk, blk->s1); + if (blk->s2) resolve(fn, bbrm, blk, blk->s2); + } while ((blk = blk->lprev) != last); + + /* parallel copies for each phi */ + blk = last = fn->entry->lprev; + do { + if (blk->id < 0) continue; + for (int i = 0; i < blk->npred; ++i) { + if (blkpred(blk, i)->id < 0) continue; + lowerphis(fn, bbrm, blkpred(blk, i), blk); } } while ((blk = blk->lprev) != last); - do vfree(&blk->phi); while ((blk = blk->lprev) != last); + /* final cleanup */ + id = 0; + blk = fn->entry; + do { + bool allnops = 1; + blk->id = id++; + for (int i = 0; i < blk->ins.n; ++i) { + struct instr *ins = &instrtab[blk->ins.p[i]]; + if (!ins->reg && insrescls(*ins) && ins->op != Omove && !oiscmp(ins->op)) { + /* dead */ + Nop: + *ins = mkinstr(Onop,0,); + } else if (ins->op == Omove && ins->r.t == RREG && ins->l.i == ins->r.i) { + goto Nop; + } else if (ins->op == Ocopy && ins->l.t == RREG && ins->reg-1 == ins->l.i) { + goto Nop; + } else if (ins->inplace && ins->l.t == RREG && ins->reg-1 != ins->l.i) { + /* fixup in-place (two-address) instructions */ + allnops = 0; + insertinstr(blk, i++, mkmove(ins->cls, ins->reg-1, ins->l.i)); + ins->l.i = ins->reg-1; + } else if (ins->op != Onop) allnops = 0; + } + + /* remove no-op blocks with only 1 pred + 1 succ OR 1 pred w/ 1 succ + 0 succ*/ + if (allnops && blk->npred == 1 && !blk->s2) { + struct block *p = blkpred(blk, 0); + if (!p->s2 && !blk->s1) { + /* simplify: + * + * @p: + * ... + * b @blk + * @blk: + * NOP + * ret + */ + assert(p->s1 == blk); + p->jmp.t = Jret; + p->s1 = NULL; + DelBlk: + vfree(&blk->phi); + vfree(&blk->ins); + blk->lnext->lprev = blk->lprev; + blk->lprev->lnext = blk->lnext; + --id; + } else if (blk->s1) { + /* simplify: + * + * @p: + * ... + * b %x, @blk, @other + * @blk: + * NOP + * b @next + */ + struct block *next = blk->s1; + if (p->s1 == blk) p->s1 = next; + else if (p->s2 == blk) p->s2 = next; + else continue; + goto DelBlk; + } + } + } while ((blk = blk->lnext) != fn->entry); + fn->nblk = id; + + blk = fn->entry; + do vfree(&blk->phi); while ((blk = blk->lnext) != fn->entry); fn->stksiz += ra.maxstk*8; if (fn->stksiz > 1<<24) error(NULL, "'%s' stack frame too big", fn->name); @@ -396,9 +758,13 @@ regalloc(struct function *fn) efmt("<< After regalloc >>\n"); irdump(fn); } - bscopy(fn->regusage, globusage, 1); - free(ra.allocs); - vfree(&ra.freestk); + for (int i = 0; i < nbbrm; ++i) { + imap_free(&bbrm[i].in.allocs); + imap_free(&bbrm[i].out.allocs); + } + imap_free(&ra.m.allocs); + free(bbrm); + fn->regusage = globusage; } /* vim:set ts=3 sw=3 expandtab: */ |