diff options
| author | 2023-06-24 18:47:05 +0200 | |
|---|---|---|
| committer | 2023-06-24 18:47:05 +0200 | |
| commit | 19bbdfa3c7ae05f4694ce5e434d9855c6f2c3682 (patch) | |
| tree | 700ca75e92f443fcb3fed30b1078b8aedde979f9 | |
| parent | d313c6e49bfb32ae24745e90eebe833da20efa1a (diff) | |
backend: fix regalloc to work with more complex dataflow
basically an allocation map at the beginning (in) and end (out) of each
block is kept and after the first allocation pass another pass is ran to
resolve allocation conflicts between each edge, plus another pass to
finish lowering phi functions.
also introduced `regset` and plenty of other miscellaneous fixes
| -rw-r--r-- | amd64/emit.c | 26 | ||||
| -rw-r--r-- | amd64/isel.c | 2 | ||||
| -rw-r--r-- | amd64/sysv.c | 4 | ||||
| -rw-r--r-- | common.h | 42 | ||||
| -rw-r--r-- | ir.c | 70 | ||||
| -rw-r--r-- | ir.h | 15 | ||||
| -rw-r--r-- | irdump.c | 11 | ||||
| -rw-r--r-- | op.def | 1 | ||||
| -rw-r--r-- | optmem.c | 23 | ||||
| -rw-r--r-- | regalloc.c | 746 | ||||
| -rw-r--r-- | ssa.c | 23 | ||||
| -rw-r--r-- | test/fib.c | 10 |
12 files changed, 748 insertions, 225 deletions
diff --git a/amd64/emit.c b/amd64/emit.c index f354262..5b3a298 100644 --- a/amd64/emit.c +++ b/amd64/emit.c @@ -162,6 +162,7 @@ enum operpat { enum operenc { EN_R = 1, /* reg with /r */ EN_RR, /* reg, reg with /r */ + EN_RRX, /* reg, reg with /r (inverted) */ EN_MR, /* mem, reg with /r */ EN_RM, /* reg, mem with /r */ EN_M, /* mem */ @@ -249,6 +250,17 @@ encode(uchar **pcode, const struct desc *tab, int ntab, enum irclass k, struct o D(opc, nopc); B(0300 | (dst.reg & 7) << 3 | (src.reg & 7)); break; + case EN_RRX: /* mod = 11; reg = src; rm = dst */ + rex |= (src.reg >> 3) << 2; /* REX.R */ + rex |= (dst.reg >> 3) << 0; /* REX.B */ + if (rex) B(0x40 | rex); + else if (en->r8 && in_range(src.reg, RSP, RDI)) { + /* /r8 needs REX to encode SP,BP,SI,DI (otherwise -> AH..BH) */ + B(0x40); + } + D(opc, nopc); + B(0300 | (src.reg & 7) << 3 | (dst.reg & 7)); + break; case EN_MR: mem = dst; reg = src.reg; @@ -370,6 +382,8 @@ static void Xmov(uchar **pcode, enum irclass k, struct oper dst, struct oper src {8, PFPR, PFPR, "\xF2\x0F\x10", EN_RR}, /* MOVSD xmm, xmm */ {8, PFPR, PMEM, "\xF2\x0F\x10", EN_RM}, /* MOVSD xmm, m64 */ {8, PMEM, PFPR, "\xF2\x0F\x11", EN_MR}, /* MOVSS m64, xmm */ + {4|8, PGPR, PFPR, "\x66\x0F\x6E", EN_RRX}, /* MOVD/Q r64/32, xmm */ + {4|8, PFPR, PGPR, "\x66\x0F\x6E", EN_RR}, /* MOVD/Q xmm, r64/32 */ }; static const uchar k2off[] = { [KI4] = 0, @@ -583,6 +597,10 @@ static void gencopy(uchar **pcode, enum irclass cls, struct block *blk, int curi, struct oper dst, union ref val) { assert(dst.t == OREG); + if (val.bits == UNDREF.bits) { + /* can be generated by ssa construction, since value is undefined no move is needed */ + return; + } if (val.t == RADDR) { /* this is a LEA, but maybe it can be lowered to a 2-address instruction, * which may clobber flags */ @@ -824,12 +842,12 @@ emitbranch(uchar **pcode, struct block *blk) static void calleesave(int *npush, uchar **pcode, struct function *fn) { - if (bstest(fn->regusage, RBX)) { + if (rstest(fn->regusage, RBX)) { Xpush(pcode, RBX); ++*npush; } for (int r = R12; r <= R15; ++r) - if (bstest(fn->regusage, r)) { + if (rstest(fn->regusage, r)) { Xpush(pcode, r); ++*npush; } @@ -839,9 +857,9 @@ static void calleerestore(uchar **pcode, struct function *fn) { for (int r = R15; r >= R12; --r) - if (bstest(fn->regusage, r)) + if (rstest(fn->regusage, r)) Xpop(pcode, r); - if (bstest(fn->regusage, RBX)) Xpop(pcode, RBX); + if (rstest(fn->regusage, RBX)) Xpop(pcode, RBX); } /* align code using NOPs */ diff --git a/amd64/isel.c b/amd64/isel.c index cb87b7d..07115ac 100644 --- a/amd64/isel.c +++ b/amd64/isel.c @@ -187,7 +187,7 @@ aadd(struct addr *addr, union ref r) } else if (r.t == RREG) { /* temporaries are single assignment, but register aren't, so they can't be * * safely hoisted into an address value, unless they have global lifetime */ - if (!bstest(mctarg->rglob, r.i)) return 0; + if (!rstest(mctarg->rglob, r.i)) return 0; Ref: if (!addr->base.bits) addr->base = r; else if (!addr->index.bits) addr->index = r; diff --git a/amd64/sysv.c b/amd64/sysv.c index 9c7bc15..6c5b67c 100644 --- a/amd64/sysv.c +++ b/amd64/sysv.c @@ -141,8 +141,8 @@ const struct mctarg t_amd64_sysv = { .gpr0 = RAX, .ngpr = R15 - RAX + 1, .bpr = RBP, .fpr0 = XMM0, .nfpr = XMM15 - XMM0 + 1, - .rcallee = {{1<<RBX | 1<<R12 | 1<<R13 | 1<<R14 | 1<<R15}}, - .rglob = {{1<<RSP | 1<<RBP}}, + .rcallee = 1<<RBX | 1<<R12 | 1<<R13 | 1<<R14 | 1<<R15, + .rglob = 1<<RSP | 1<<RBP, .rnames = amd64_rnames, .objkind = OBJELF, .isa = ISamd64, @@ -87,6 +87,18 @@ ilog2(uint x) { /* assumes x is a power of 2 */ return n; #endif } +static inline uint +lowestsetbit(uvlong x) +{ +#if HAS_BUILTIN(ctz) + return __builtin_ctzll(x); +#else + int i = 0; + for (uvlong mask = 1;; ++i, mask <<= 1) + if (x & mask) + return i; +#endif +} #define aisprint(c) in_range(c, ' ', '~') #define aisdigit(c) in_range(c, '0', '9') @@ -388,12 +400,24 @@ void imap_init_(struct imapbase *, void **v, uint vsiz, uint N); int imap_get_(struct imapbase *, short k); int imap_set_(struct imapbase *, void **v, uint vsiz, short k); #define imap_free(m) (free((m)->mb.k), memset((m), 0, sizeof *(m))) -#define imap_init(m, N) (imap_free(m), imap_init_(&(m)->mb, (void **)&(m)->v, sizeof*(m)->v, (N)) +#define imap_init(m, N) (imap_free(m), imap_init_(&(m)->mb, (void **)&(m)->v, sizeof*(m)->v, (N))) #define imap_clear(m) ((m)->mb.bs ? bszero((m)->mb.bs, BSSIZE((m)->mb.N)) : (void)0, \ (m)->mb.n = 0) #define imap_get(m, k) (((m)->tmp = imap_get_(&(m)->mb, k)) < 0 ? NULL : &(m)->v[(m)->tmp]) #define imap_set(m, k, x) ((m)->tmp = imap_set_(&(m)->mb, (void **)&(m)->v, sizeof*(m)->v, k), \ (m)->v[(m)->tmp] = (x), &(m)->v[(m)->tmp]) +#define imap_each(m,kx,pvx) \ + for (int _i = 0; _i < (m)->mb.N && ((kx) = (m)->mb.k[_i], (pvx) = &(m)->v[_i], 1); ++_i) \ + if (bstest((m)->mb.bs, _i)) +#define imap_copy(dst,src) do { \ + size_t N = (src)->mb.N; \ + if (!N) break; \ + (dst)->mb.n = (src)->mb.n; \ + imap_init((dst), N); \ + memcpy((dst)->mb.k, (src)->mb.k, \ + N*(sizeof*(src)->mb.k + sizeof*(src)->v) + BSSIZE(N)*sizeof(struct bitset)); \ +} while (0) + struct pmapbase { void **k; uint n, N; }; /* map of non-null ptr -> T */ @@ -445,6 +469,22 @@ bsiter(uint *i, struct bitset bs[/*siz*/], uint siz) return 0; } +typedef uvlong regset; +#define rsset(S, r) ((S) | 1ull << (r)) +#define rsclr(S, r) ((S) & ~(1ull << (r))) +#define rstest(S, r) ((S) >> (r) & 1) +#define rsminus(A, B) ((A) & ~(B)) +#define rsand(A, B) ((A) & (B)) +#define rsunion(A, B) ((A) | (B)) +static inline bool +rsiter(int *i, uvlong rs) +{ + uvlong mask = -(1ull << *i); + if ((rs & mask) == 0) return 0; + *i = lowestsetbit(rs & mask); + return 1; +} + /********/ /** IO **/ /********/ @@ -222,7 +222,7 @@ mkaddr(struct addr addr) return mkref(RADDR, addaddr(&addr)); } -static inline void +void addpred(struct block *blk, struct block *p) { if (blk->npred == 0) { @@ -239,6 +239,46 @@ addpred(struct block *blk, struct block *p) xbpush(&blk->_pred, &blk->npred, p); } +static void +delpred(struct block *blk, struct block *p) +{ + for (int i = 0; i < blk->npred; ++i) { + if (blkpred(blk, i) == p) { + for (int k = i; k < blk->npred - 1; ++k) { + blkpred(blk, k) = blkpred(blk, k + 1); + } + --blk->npred; + return; + } + } + assert(0&&"blk not in p"); +} + +struct block * +insertblk(struct function *fn, struct block *pred, struct block *subst) +{ + struct block *new = newblk(fn); + struct block **s = pred->s1 == subst ? &pred->s1 : &pred->s2; + assert(*s == subst); + new->lnext = pred->lnext; + new->lprev = pred; + pred->lnext->lprev = new; + pred->lnext = new; + *s = new; + new->jmp.t = Jb; + new->s1 = subst; + addpred(new, pred); + for (int i = 0; i < subst->npred; ++i) { + if (blkpred(subst, i) == pred) { + blkpred(subst, i) = new; + ++fn->nblk; + return new; + } + } + assert(0); +} + + static int newinstr(void) { @@ -400,6 +440,14 @@ delphi(struct block *blk, int idx) --blk->phi.n; } +void +fillblkids(struct function *fn) +{ + int i = 0; + struct block *blk = fn->entry; + do blk->id = i++; while ((blk = blk->lnext) != fn->entry); +} + /** IR builders **/ struct block * @@ -515,16 +563,20 @@ void irfini(struct function *fn) { extern int nerror; - if (!nerror) { - abi0(fn); - mem2reg(fn); - lowerintrin(fn); - mctarg->isel(fn); - regalloc(fn); - if (!ccopt.dbg.any) - mctarg->emit(fn); + if (nerror) { + freefn(fn); + return; } + abi0(fn); + lowerintrin(fn); + mem2reg(fn); + copyopt(fn); + mctarg->isel(fn); + regalloc(fn); + if (!ccopt.dbg.any) + mctarg->emit(fn); + freefn(fn); } @@ -131,6 +131,8 @@ struct block { struct { uchar t; union ref arg[2]; } jmp; }; +#define blkpred(blk, i) 0[(blk)->npred < 2 ? &(blk)->_pred0 : &(blk)->_pred[i]] + enum { USERJUMP = 0xFFFF }; struct use { struct block *blk; ushort u; }; @@ -149,7 +151,7 @@ struct function { ushort nabiarg, nabiret; bool globl; bool isleaf; - struct bitset regusage[1]; + regset regusage; }; enum objkind { OBJELF }; @@ -161,8 +163,8 @@ struct mctarg { bpr, /* frame/base pointer reg */ fpr0, /* first fpr */ nfpr; /* fpr count */ - struct bitset rcallee[1], /* callee-saved */ - rglob[1]; /* globally live (never used for regalloc) */ + regset rcallee, /* callee-saved */ + rglob; /* globally live (never used for regalloc) */ const char (*rnames)[6]; enum objkind objkind; enum mcisa isa; @@ -226,6 +228,9 @@ void conputdat(struct irdat *, uint off, enum typetag t, const void *dat); union ref mkcallarg(union irtype ret, uint narg, int vararg); #define mkintrin(B, C, N) mkinstr(Ointrin, C, {.t=RICON,B}, mkcallarg((union irtype){{0}},N,-1)) union ref mkaddr(struct addr); +void addpred(struct block *blk, struct block *p); + +struct block *insertblk(struct function *, struct block *pred, struct block *subst); void adduse(struct block *ublk, int ui, union ref r); union ref insertinstr(struct block *, int idx, struct instr); union ref insertphi(struct block *, enum irclass cls); @@ -233,7 +238,7 @@ void replcuses(union ref from, union ref to); void deluses(int ins); void delinstr(struct block *, int idx); void delphi(struct block *, int idx); -#define blkpred(blk, i) 0[(blk)->npred < 2 ? &(blk)->_pred0 : &(blk)->_pred[i]] +void fillblkids(struct function *); /* IR builder functions */ union ref addinstr(struct function *, struct instr); @@ -247,7 +252,7 @@ void putreturn(struct function *, union ref r0, union ref r1); void irdump(struct function *); void filluses(struct function *); -void freeuses(struct use *, int n); +void copyopt(struct function *); void abi0(struct function *); void abi0_call(struct function *, struct instr *, struct block *blk, int *curi); @@ -80,10 +80,9 @@ dumpref(enum op o, union ref ref) efmt("??"); break; case RTMP: + efmt("%%%d", ref.i); if (instrtab[ref.i].reg) - efmt("%s", mctarg->rnames[instrtab[ref.i].reg - 1]); - else - efmt("%%%d", ref.i); + efmt("(%s)", mctarg->rnames[instrtab[ref.i].reg-1]); break; case RREG: efmt("%s", mctarg->rnames[ref.i]); @@ -168,7 +167,7 @@ dumpinst(const struct instr *ins) if (ins->reg) { if (cls) efmt("%s ", clsname[cls]); - efmt("%s = ", mctarg->rnames[ins->reg - 1]); + efmt("(%%%d)%s = ", ins - instrtab, mctarg->rnames[ins->reg - 1]); } else if (cls) { efmt("%s %%%d", clsname[cls], ins - instrtab); efmt(" = "); @@ -198,7 +197,9 @@ dumpblk(struct function *fn, struct block *blk) struct instr *phi = &instrtab[blk->phi.p[i]]; union ref *refs = phitab.p[phi->l.i]; assert(phi->op == Ophi); - efmt(" %s %%%d = phi ", clsname[phi->cls], blk->phi.p[i]); + efmt(" %s ", clsname[phi->cls]); + if (!phi->reg) efmt("%%%d = phi ", blk->phi.p[i]); + else efmt("(%%%d)%s = phi ", phi - instrtab, mctarg->rnames[phi->reg-1]); for (int i = 0; i < blk->npred; ++i) { if (i) efmt(", "); efmt("@%d ", blkpred(blk, i)->id); @@ -68,6 +68,7 @@ _(call, 2) _(call2r, 1) _(intrin, 2) _(phi, 1) +_(swap, 2) /* machine-specific instructions */ _(xinc, 1) _(xdec, 1) @@ -25,7 +25,7 @@ struct pendingphi { ushort var, phi; }; struct ssabuilder { vec_of(struct pendingphi) *pendingphis; /* map of block to list of incomplete phis in block */ imap_of(union ref *) curdefs; /* map of var to (map of block to def of var) */ - int nsealed; /* num of sealed blocks */ + int lastvisit; int nblk; }; @@ -90,13 +90,24 @@ writevar(struct ssabuilder *sb, int var, struct block *blk, union ref val) (*pcurdefs)[blk->id] = val; } +static bool +issealed(struct ssabuilder *sb, struct block *blk) +{ + /* consider block sealed if it has been visited or if no backbranches flow to it */ + if (blk->id < sb->lastvisit) return 1; + for (int i = 0; i < blk->npred; ++i) + if (blkpred(blk, i)->id > blk->id) + return 0; + return 1; +} + + static union ref readvarrec(struct ssabuilder *sb, int var, enum irclass cls, struct block *blk) { union ref val; assert(blk->npred > 0); - if (blk->id >= sb->nsealed) { - /* unsealed block */ + if (!issealed(sb, blk)) { val = insertphi(blk, cls); vpush(&sb->pendingphis[blk->id], ((struct pendingphi) { var, val.i })); } else if (blk->npred == 1) { @@ -171,7 +182,7 @@ mem2reg(struct function *fn) writevar(&sb, var, use->blk, m->r); *m = mkinstr(Onop,0,); } else if (oisload(m->op)) { - union ref val = readvar(&sb, var, k, use->blk); + union ref val = readvar(&sb, var, k = m->cls, use->blk); if (!val.bits) { /* var is used uninitialized */ /* TODO emit diagnostic */ /* load some garbage */ @@ -190,8 +201,8 @@ mem2reg(struct function *fn) } /* seal block */ - assert(sb.nsealed == blk->id); - ++sb.nsealed; + assert(sb.lastvisit == blk->id); + ++sb.lastvisit; pending = (void *) &sb.pendingphis[blk->id]; for (int i = 0; i < pending->n; ++i) { addphiargs(&sb, pending->p[i].var, instrtab[pending->p[i].phi].cls, blk, @@ -1,9 +1,12 @@ +#include "common.h" #include "ir.h" -static struct bitset globusage[1]; -static struct bitset floatregs[1]; +/* Implements reverse linear register allocation, quite ad-hoc right now */ -#define isfpr(reg) bstest(floatregs, (reg)) +static regset gpregset, fpregset; +static regset globusage; + +#define isfpr(reg) in_range((reg), mctarg->fpr0, mctarg->fpr0 + mctarg->nfpr - 1) #define isgpr(reg) (!isfpr(reg)) enum { ADEAD, AREG, ASTACK } ; @@ -12,40 +15,91 @@ struct alloc { ushort t : 2, a : 14; }; #define areg(r) ((struct alloc) { AREG, (r) }) #define astack(s) ((struct alloc) { ASTACK, (s) }) +enum { MAXSPILL = 128 }; + +struct rmap { + regset free; /* free registers */ + regset blocked; /* registers allocated to themselves (from e.g. isel reg constraints, abi) */ + ushort regs[MAXREGS]; /* map reg -> temp holding reg */ + struct bitset freestk[BSSIZE(MAXSPILL)]; /* free stack slots */ + imap_of(struct alloc) allocs; /* map tmpidx -> allocation */ +}; + struct rega { - union ref regs[MAXREGS]; /* map reg -> value holding reg */ - struct alloc *allocs; /* map tmpidx -> allocation */ - vec_of(ushort) freestk; /* available stack slots */ - int nfreegpr, nfreefpr; + struct function *fn; + struct rmap m; int maxstk, stktop; }; static vec_of(union ref *) stkslotrefs; -static void +static union ref addstkslotref(union ref inst) { vpush(&stkslotrefs, &instrtab[inst.i].l); + return inst; } static int allocreg(struct rega *ra, enum irclass cls, union ref ref, uvlong excl); #if 0 #define DBG efmt +static void +dumpallocs(const char *s, struct rmap *m) +{ + int var; + struct alloc *alloc; + + DBG("%s map [", s); + imap_each(&m->allocs, var, alloc) { + if (!alloc->t) continue; + DBG(" %%%d -> %c%d,", var, "XRS"[alloc->t], alloc->a); + } + DBG("]\n"); +} #else #define DBG(...) ((void)0) +#define dumpallocs(...) #endif static void freereg(struct rega *ra, int r) { - ra->regs[r] = NOREF; - if (isfpr(r)) ++ra->nfreefpr; - else ++ra->nfreegpr; + ra->m.free = rsset(ra->m.free, r); + ra->m.blocked = rsclr(ra->m.blocked, r); } static void take(struct rega *ra, int reg, union ref ref); +static int +allocstk(struct rega *ra, int var) +{ + int s = -1; + + for (int i = 0; i < BSSIZE(MAXSPILL); ++i) { + if (~ra->m.freestk[i].u == 0) continue; + s = i*64 + lowestsetbit(ra->m.freestk[i].u); + } + if (s != -1) { + bsclr(ra->m.freestk, s); + if (ra->stktop < s) ra->stktop = s+1; + } else { + s = ra->stktop++; + } + if (ra->maxstk < s+1) ra->maxstk = s+1; + (void)imap_set(&ra->m.allocs, var, astack(s)); + return s; +} + +static void +freestk(struct rega *ra, int slot) +{ + if (slot < MAXSPILL) + bsset(ra->m.freestk, slot); + else if (slot == ra->stktop - 1) + --ra->stktop; +} + static void def(struct rega *ra, struct instr *ins, struct block *blk, int curi) { @@ -54,14 +108,16 @@ def(struct rega *ra, struct instr *ins, struct block *blk, int curi) if (ins->op != Omove && ins->op != Ocall) { var = ins - instrtab; DBG("def %%%d\n",var); - alloc = &ra->allocs[var]; - if (alloc->t == ADEAD) { + alloc = imap_get(&ra->m.allocs, var); + if (!alloc || alloc->t == ADEAD) { return; } else if (alloc->t == AREG) { reg = alloc->a; DBG("-- free %s for %%%d\n", mctarg->rnames[alloc->a], var); - assert(ra->regs[reg].bits == mkref(RTMP, var).bits); - } else if (alloc->t == ASTACK) { + assert(!rstest(ra->m.free, reg)); + assert(ra->m.regs[reg] == var); + ins->reg = reg+1; + } else if (alloc->t == ASTACK) { /* unspill, insert 'store [slot], reg' */ struct alloc astk = *alloc; struct instr store; @@ -69,26 +125,27 @@ def(struct rega *ra, struct instr *ins, struct block *blk, int curi) if ((ins->op == Ocopy || ins->inplace) && ins->l.t == RREG) { int hint = ins->l.i; - if (!ra->regs[hint].bits) { + if (rstest(ra->m.free, hint)) { take(ra, reg = hint, mkref(RTMP, var)); - assert(ra->regs[reg].bits == mkref(RTMP, var).bits); + assert(ra->m.regs[reg] == var && !rstest(ra->m.free, hint)); } } if (reg < 0) reg = allocreg(ra, insrescls(*ins), mkref(RTMP, var), 0); store = mkinstr(Ostore1 + ilog2(cls2siz[insrescls(*ins)]), 0, mkref(RICON, astk.a*8), mkref(RREG, reg)); - DBG("-- unspill %%%d s%d -> %s\n", var, astk.a, mctarg->rnames[ins->reg+1]); + DBG("-- unspill %%%d s%d -> %s\n", var, astk.a, mctarg->rnames[reg]); addstkslotref(insertinstr(blk, ++curi, store)); - vpush(&ra->freestk, astk.a); + freestk(ra, astk.a); ins->reg = reg+1; def(ra, ins, blk, curi); /* and free the reg again */ return; } - *alloc = afree(); + if (alloc) *alloc = afree(); } else if (ins->op == Omove) { + assert(ins->l.t == RREG); /* move to a register */ + assert(rstest(ra->m.blocked, ins->l.i)); reg = ins->l.i; - assert(ins->l.t == RREG); DBG("-- free %s\n", mctarg->rnames[ins->l.i]); } else { struct call *call = &calltab.p[ins->r.i]; @@ -104,68 +161,58 @@ def(struct rega *ra, struct instr *ins, struct block *blk, int curi) static void take(struct rega *ra, int reg, union ref ref) { DBG("-- take %s for %c%d\n", mctarg->rnames[reg], "R%"[ref.t==RTMP], ref.i); - assert(!ra->regs[reg].bits && "taken"); - if (ref.t == RTMP) - ra->allocs[ref.i] = areg(reg); - ra->regs[reg] = ref; - assert(ra->regs[reg].bits); - bsset(globusage, reg); - if (isfpr(reg)) --ra->nfreefpr; - else --ra->nfreegpr; + assert(rstest(ra->m.free, reg) && "taken"); + if (ref.t == RTMP) { + (void)imap_set(&ra->m.allocs, ref.i, areg(reg)); + ra->m.regs[reg] = ref.i; + } else { + ra->m.blocked = rsset(ra->m.blocked, reg); + } + ra->m.free = rsclr(ra->m.free, reg); + globusage = rsset(globusage, reg); } static int allocreg(struct rega *ra, enum irclass cls, union ref ref, uvlong excl) { - int r0, rend, reg; + regset rs; + int reg; assert(cls); if (kisint(cls)) { - r0 = mctarg->gpr0; - rend = mctarg->gpr0 + mctarg->ngpr; + rs = gpregset; } else if (kisflt(cls)) { - r0 = mctarg->fpr0; - rend = mctarg->fpr0 + mctarg->nfpr; + rs = fpregset; } else assert(0); - for (reg = r0; reg < rend; ++reg) { - if (bstest(mctarg->rglob, reg)) continue; - if (!(excl >> reg & 1) && !ra->regs[reg].bits) { - take(ra, reg, ref); - return reg; - } - } - assert(!"no more reg"); -} - -static int -allocstk(struct rega *ra, int var) -{ - int s; - - if (ra->freestk.n) { - s = ra->freestk.p[--ra->freestk.n]; - ra->allocs[var] = astack(s); - } else { - if (ra->stktop+1 >= ra->maxstk) ra->maxstk = ra->stktop+1; - s = ra->stktop++; - } - ra->allocs[var] = astack(s); - return s; + rs = rsminus(rs, mctarg->rglob); + rs = rsand(rs, ra->m.free); + rs = rsminus(rs, excl); + + assert(rs != 0 && "no more reg"); + if (rsminus(rs, mctarg->rcallee) != 0) + reg = lowestsetbit(rsminus(rs, mctarg->rcallee)); + else + reg = lowestsetbit(rs); + take(ra, reg, ref); + return reg; } static void spill(struct rega *ra, int reg, struct block *blk, int curi) { int var, s; struct instr load; + struct alloc *alloc; - if (!ra->regs[reg].bits) return; - var = ra->regs[reg].i; - assert(ra->regs[reg].bits == RTMP && *(ushort *)&ra->allocs[var] == *(ushort *)&areg(reg)); + if (rstest(ra->m.free, reg)) return; + var = ra->m.regs[reg]; + assert(!rstest(ra->m.blocked, reg)); + assert((alloc = imap_get(&ra->m.allocs, var)) && !memcmp(alloc, &areg(reg), sizeof *alloc)); s = allocstk(ra, var); DBG("-- spill %%%d %s -> s%d\n", var, mctarg->rnames[reg], s); instrtab[var].reg = 0; /* insert 'reg = load [slot]' */ - load = mkinstr(Oloads1 + 2*ilog2(cls2siz[insrescls(instrtab[var])]), insrescls(instrtab[var]), mkref(RICON, s*8)); + load = mkinstr(Oloads1 + 2*ilog2(cls2siz[insrescls(instrtab[var])]), + insrescls(instrtab[var]), mkref(RICON, s*8)); load.reg = reg+1; addstkslotref(insertinstr(blk, ++curi, load)); freereg(ra, reg); @@ -178,34 +225,39 @@ forcetake(struct rega *ra, int reg, union ref ref, struct block *blk, int curi) int var; struct alloc *alloc; - if (ra->regs[reg].bits == ref.bits) return; - if (!ra->regs[reg].bits) { + if (ref.t == RREG && rstest(ra->m.blocked, ref.i)) { + assert(!rstest(ra->m.free, reg)); + return; + } + if (ref.t == RTMP && !rstest(ra->m.free, reg) && ra->m.regs[reg] == ref.i) return; + if (rstest(ra->m.free, reg)) { take(ra, reg, ref); return; } - assert(ra->regs[reg].bits == RTMP); - var = ra->regs[reg].i; - alloc = &ra->allocs[var]; - assert(alloc->a == reg); + assert(!rstest(ra->m.blocked, ref.i)); + var = ra->m.regs[reg]; + alloc = imap_get(&ra->m.allocs, var); + assert(alloc && alloc->a == reg); *alloc = afree(); /* try to move temp to another register */ - if (isgpr(reg) ? ra->nfreegpr > 0 : ra->nfreefpr > 0) { + if (rsand(ra->m.free, isgpr(reg) ? gpregset : fpregset) != 0) { /* the register of the current instruction (if any) was already free'd (by def), so * we need to explictly exclude it from the pool of rename registers * e.g.: given 'R0 = copy R1'; if R1 => %x, we need to prevent renaming %x => R0 */ - uvlong excl = instrtab[blk->ins.p[curi]].reg; - int rename = allocreg(ra, isgpr(reg) ? KI4 : KF4, ra->regs[reg], excl ? 1ull<<(excl-1) : 0); + regset excl = instrtab[blk->ins.p[curi]].reg; + int rename = allocreg(ra, isgpr(reg) ? KI4 : KF4, mkref(RTMP, ra->m.regs[reg]), + excl ? rsset(0, excl-1) : 0); if (ccopt.dbg.r)DBG("-- rename %%%d %s -> %s\n", var, mctarg->rnames[reg], mctarg->rnames[rename]); /* introduce move from rename -> original (since we allocate backwards) */ insertinstr(blk, ++curi, mkmove(insrescls(instrtab[var]), reg, rename)); - instrtab[var].reg = rename+1; - ra->regs[rename] = mkref(RTMP, var); - bsset(globusage, rename); + ra->m.regs[rename] = var; + globusage = rsset(globusage, rename); *alloc = areg(rename); - ra->regs[reg].bits = 0; + ra->m.free = rsset(ra->m.free, reg); } else { spill(ra, reg, blk, curi); + ra->m.free = rsset(ra->m.free, reg); } take(ra, reg, ref); } @@ -216,6 +268,8 @@ use(struct rega *ra, struct block *blk, int curi, enum op op, int hint, union re { struct instr *ins; uvlong excl = other.t == RREG ? 1ull<<other.i : 0; + struct alloc *alloc; + int reg; if (ref->t == RADDR) { struct addr addr = addrht[ref->i]; @@ -224,167 +278,475 @@ use(struct rega *ra, struct block *blk, int curi, enum op op, int hint, union re *ref = mkaddr(addr); return; } else if (ref->t == RREG) { - assert(!(excl >> hint & 1)); + assert(hint<0 || !rstest(excl, hint)); forcetake(ra, ref->i, *ref, blk, curi); } if (ref->t != RTMP) return; ins = &instrtab[ref->i]; if (!ins->cls) return; - if (!ins->reg) { + if (!(alloc = imap_get(&ra->m.allocs, ref->i)) || alloc->t != AREG) { int s = -1; - if (ra->allocs[ref->i].t == ASTACK) - s = ra->allocs[ref->i].a; + if (alloc && alloc->t == ASTACK) { + /* ensure spill isn't overwritten by dest + * e.g. in R0 = add %s, 7 => can't spill %s to R0 */ + struct instr *use = &instrtab[blk->ins.p[curi]]; + s = alloc->a; + if (use->reg) excl = rsset(excl, use->reg-1); + } assert(ins->op != Ocall); if (ins->r.t == RREG && ins->inplace) excl |= 1ull<<ins->r.i; - if ((hint == -1 || ra->regs[hint].bits) && ins->op == Ocopy && ins->l.t == RREG) + if ((hint == -1 || !rstest(ra->m.free, hint)) && ins->op == Ocopy && ins->l.t == RREG) /* for '%x = copy Rx', hint %x to use Rx */ hint = ins->l.i; - if (hint != -1 && !(excl >> hint & 1) && !ra->regs[hint].bits) { + + if (hint != -1 && !rstest(excl, hint) && rstest(ra->m.free, hint)) { take(ra, hint, *ref); - ins->reg = hint + 1; + reg = hint; } else { - ins->reg = allocreg(ra, insrescls(*ins), *ref, excl) + 1; + reg = allocreg(ra, insrescls(*ins), *ref, excl); } + if (s >= 0) { /* unspill, insert 'store [slot], reg' */ - DBG("-- unspill %%%d s%d -> %s\n", ref->i, s, mctarg->rnames[ins->reg-1]); + DBG("-- unspill %%%d s%d -> %s\n", ref->i, s, mctarg->rnames[reg]); struct instr store = mkinstr(Ostore1 + ilog2(cls2siz[insrescls(*ins)]), 0, mkref(RICON, s*8), - mkref(RREG, ins->reg-1)); + mkref(RREG, reg)); addstkslotref(insertinstr(blk, ++curi, store)); - vpush(&ra->freestk, s); + freestk(ra, s); } + } else { + reg = alloc->a; } - /* do not patch ref if it's cond branch arg, emit() wants to know what instr it is */ + /* do not patch ref if it's used in a phi + * or if it's cond branch arg, emit() wants to know what instr it is */ + if (op != Ophi) if (ref != blk->jmp.arg || blk->jmp.t != Jb) - *ref = mkref(RREG, ins->reg-1); + *ref = mkref(RREG, reg); +} + +static void +doins(struct rega *ra, struct block *blk, struct instr *ins, int curi) +{ + int hint0 = -1, hint1 = -1; + if (ins->op != Ocopy && !imap_get(&ra->m.allocs, ins - instrtab) && ins->skip) { /* unused */ + *ins = mkinstr(Onop, 0,); + return; + } + def(ra, ins, blk, curi); + if (ins->op != Ocall) { + if (ins->op == Ocopy || ins->inplace) hint0 = ins->reg - 1; + if (ins->op == Omove) { + if (ins->l.t == RREG) hint1 = ins->l.i; + /* MOV Rx,Rx is used by isel to indicate a clobber, + * so it should be a def point for Rx but not a use point */ + if (ins->r.bits != ins->l.bits) + use(ra, blk, curi, ins->op, hint1, &ins->r, NOREF); + } else { + if (ins->l.bits) use(ra, blk, curi, ins->op, hint0, &ins->l, ins->r); + if (ins->r.bits) use(ra, blk, curi, ins->op, hint1, &ins->r, NOREF); + } + } else { + struct call *call = &calltab.p[ins->r.i]; + regset rspill = rsminus(rsunion(gpregset, fpregset), rsunion(mctarg->rglob, mctarg->rcallee)); + + dumpallocs("CALL", &ra->m); + if (call->abiret[0].ty.bits) rspill = rsclr(rspill, call->abiret[0].reg); + if (call->abiret[1].ty.bits) rspill = rsclr(rspill, call->abiret[1].reg); + for (int r = 0; rsiter(&r, rspill); ++r) { + spill(ra, r, blk, curi); + } + for (int j = 0; j < call->narg; ++j) { + short reg = call->abiarg[j].reg; + if (reg >= 0) { + forcetake(ra, reg, mkref(RREG, reg), blk, curi); + } + } + + use(ra, blk, curi, ins->op, hint0, &ins->l, NOREF); + } +} + +enum pmstat { PMTOMOVE, PMMOVING, PMDONE }; +static struct pmove { + uchar k; + uchar stat; + struct alloc dst, src; +} pmove[MAXREGS]; +static int npmove; + +static void +pmadd(enum irclass k, struct alloc dst, struct alloc src) +{ + assert(npmove < MAXREGS); + pmove[npmove++] = (struct pmove) { k, PMTOMOVE, dst, src }; +} + +static void +emitmove(enum irclass k, struct alloc dst, struct alloc src, struct block *blk, int curi) +{ + struct instr mv; + if (dst.t == AREG && src.t == AREG) { + insertinstr(blk, curi, mkmove(k, dst.a, src.a)); + } else if (dst.t == ASTACK && src.t == AREG) { + mv = mkinstr(Ostore1+ilog2(cls2siz[k]), 0, mkref(RICON, dst.a), mkref(RREG, src.a)); + addstkslotref(insertinstr(blk, curi, mv)); + } else if (dst.t == AREG && src.t == ASTACK) { + switch (mv.cls = k) { + default: assert(0); + case KI4: mv.op = Oloads4; break; + case KI8: mv.op = Oloadi8; break; + case KPTR: mv.op = targ_64bit ? Oloadi8 : Oloads4; break; + case KF4: mv.op = Oloadf4; break; + case KF8: mv.op = Oloadf8; break; + } + mv.l = mkref(RICON, dst.a); + mv.reg = src.a; + addstkslotref(insertinstr(blk, curi, mv)); + } +} + +static int +pmrec(int i, struct block *blk, int curi, enum irclass *k) +{ + int j, c; + + if (!memcmp(&pmove[i].dst, &pmove[i].src, sizeof pmove->dst)) { + pmove[i].stat = PMDONE; + return -1; + } + + /* widen when necessary */ + assert(kisint(pmove[i].k) == kisint(*k)); + if (cls2siz[pmove[i].k] > cls2siz[*k]) + *k = pmove[i].k; + + for (j = 0; j < npmove; ++j) + if (!memcmp(&pmove[j].dst, &pmove[i].src, sizeof pmove->dst)) { + break; + } + if (j == npmove) goto Done; + switch (pmove[j].stat) { + default: assert(0); + case PMMOVING: + c = j; + Swap: + assert(pmove[i].src.t == AREG && pmove[i].dst.t == AREG); + insertinstr(blk, curi, + mkinstr(Oswap, *k, mkref(RREG, pmove[i].dst.a), mkref(RREG, pmove[i].src.a))); + break; + case PMTOMOVE: + pmove[i].stat = PMMOVING; + c = pmrec(j, blk, curi, k); + if (c == i) { + c = -1; + break; + } else if (c != -1) { + goto Swap; + } + /* fallthru */ + case PMDONE: + Done: + c = -1; + emitmove(*k, pmove[i].dst, pmove[i].src, blk, curi); + break; + } + + pmove[i].stat = PMDONE; + return c; +} + +static void +emitpm(struct block *blk) +{ + int curi = blk->ins.n; + for (int i = 0; i < npmove; ++i) + if (pmove[i].stat == PMTOMOVE) { + pmrec(i, blk, curi, &(enum irclass) { pmove[i].k }); + } +} + +struct bbrm { struct rmap in, out; }; + +static void +lowerphis(struct function *fn, struct bbrm *bbrm, struct block *blk, struct block *suc) +{ + int predno; + struct alloc *alloc; + struct rmap *out = &bbrm[blk->id].out; + struct block *n = NULL; + + if (!blk->s2) n = blk; + + for (predno = 0; predno < suc->npred; ++predno) + if (blkpred(suc, predno) == blk) + break; + assert(predno < suc->npred); + + npmove = 0; + /* ensure phi args go to the same reg as phi with parallel copies */ + for (int i = 0; i < suc->phi.n; ++i) { + struct instr *phi = &instrtab[suc->phi.p[i]]; + union ref *arg = &phitab.p[phi->l.i][predno]; + int regto, regfrom; + + if (arg->t == RREG) continue; + assert(arg->t == RTMP); + DBG("resolve phi @%d, @%d, %%%d <- %%%d\n", blk->id, suc->id, phi - instrtab, arg->i); + if (instrtab[arg->i].reg) { + regfrom = instrtab[arg->i].reg - 1; + DBG(" it had R%d\n", regfrom); + } else { + alloc = imap_get(&out->allocs, arg->i); + assert(alloc && alloc->t == AREG); + regfrom = alloc->a; + DBG(" found R%d\n", regfrom); + instrtab[arg->i].reg = regfrom+1; + } + if (phi->reg) { + regto = phi->reg - 1; + } else { + alloc = imap_get(&out->allocs, phi - instrtab); + assert(alloc && alloc->t == AREG); + regto = alloc->a; + phi->reg = regto+1; + } + DBG(" > phi move %c%d -> %c%d\n", " RS"[AREG], regfrom, " RS"[AREG], regto); + if (!n) n = insertblk(fn, blk, suc); + pmadd(phi->cls, areg(regto), areg(regfrom)); + } + if (n) emitpm(n); +} + +static void +resolve(struct function *fn, struct bbrm *bbrm, struct block *blk, struct block *suc) +{ + ushort var; + struct alloc *alloc, *alloc2; + struct rmap *out = &bbrm[blk->id].out, *in = &bbrm[suc->id].in; + struct rmap *in2 = &bbrm[blk->id].in; + struct block *n = NULL; + + DBG("resolve(@%d, @%d)\n",blk->id, suc->id); + + npmove = 0; + if (!blk->s2) n = blk; + imap_each(&in->allocs, var, alloc) { + if (alloc->t == ADEAD) continue; + if ((alloc2 = imap_get(&out->allocs, var)) && alloc2->t != ADEAD + && memcmp(alloc2, alloc, sizeof *alloc) != 0) { + DBG("resolve @%d, @%d, %%%d\n", blk->id, suc->id, var); + DBG(" > move %c%d -> %c%d\n", " RS"[alloc2->t], alloc2->a, " RS"[alloc->t], alloc->a); + if (!n) { + n = insertblk(fn, blk, suc); + n->id = blk->id; /* use same bbrm */ + } + pmadd(insrescls(instrtab[var]), *alloc, *alloc2); + } + (void)imap_set(&in2->allocs, var, *alloc); + if (!instrtab[var].reg) + instrtab[var].reg = alloc->a + 1; + } + if (n) emitpm(n); + + dumpallocs("in", in); + dumpallocs("out", out); } void regalloc(struct function *fn) { - extern int ninstr; - struct instr *ins; + int id; struct block *last = fn->entry->lprev, *blk; static union ref *stkslotrefsbuf[64]; - struct rega ra = {0}; - - fn->isleaf = 1; - - vinit(&stkslotrefs, stkslotrefsbuf, arraylength(stkslotrefsbuf)); - ra.allocs = xcalloc((ninstr*2 < MAXINSTR ? ninstr*2 : MAXINSTR) * sizeof(struct alloc)); - ra.nfreegpr = mctarg->ngpr - popcnt(mctarg->rglob->u); - ra.nfreefpr = mctarg->nfpr; - for (int i = 0; i < MAXREGS; ++i) { - if (in_range(i, mctarg->fpr0, mctarg->fpr0 + mctarg->nfpr - 1)) - bsset(floatregs, i); - if (bstest(mctarg->rglob, i)) - ra.regs[i] = mkref(RREG, i); + struct bbrm *bbrm; + int nbbrm; + struct rega ra = {fn}; + + /* setup */ + if (!fpregset || !gpregset) { + for (int i = 0; i < MAXREGS; ++i) { + if (in_range(i, mctarg->fpr0, mctarg->fpr0 + mctarg->nfpr - 1)) + fpregset = rsset(fpregset, i); + else if (in_range(i, mctarg->gpr0, mctarg->gpr0 + mctarg->ngpr - 1)) + gpregset = rsset(gpregset, i); + } } - bszero(globusage, 1); + ra.m.blocked = mctarg->rglob; + ra.m.free = rsminus(rsunion(gpregset, fpregset), ra.m.blocked); + memset(ra.m.freestk, 0xFF, sizeof ra.m.freestk); + globusage = 0; fn->stksiz = alignup(fn->stksiz, 8); + fn->isleaf = 1; + vinit(&stkslotrefs, stkslotrefsbuf, arraylength(stkslotrefsbuf)); - /* a dumb linear register allocator that visits instructions physically backwards - * starting at the end of the function, when encountering a use of a new - * temporary, it allocates a register for it. when encountering the definition - * of a temporary, it frees up its register - */ + /* generate copies for phi operands */ + blk = fn->entry; + do { + if (!blk->phi.n) continue; + for (int p = 0; p < blk->npred; ++p) { + struct block *n, *pred = blkpred(blk, p); + if (!pred->s2) { + /* pred only has 1 successor (blk), so insert move directly in it */ + n = pred; + } else { + n = insertblk(fn, pred, blk); + assert(n->jmp.t == Jb && n->s1 == blk); + } + for (int i = 0; i < blk->phi.n; ++i) { + int phi = blk->phi.p[i]; + union ref *args = phitab.p[instrtab[phi].l.i]; + args[p] = insertinstr(n, n->ins.n, mkinstr(Ocopy, instrtab[phi].cls, args[p])); + } + } + } while ((blk = blk->lnext) != fn->entry); + fillblkids(fn); + bbrm = xcalloc(sizeof *bbrm * (nbbrm = fn->nblk)); + + /* visit blocks in reverse, allocating registers in a greedy manner */ blk = last; do { + int curi; + DBG("do @%d\n", blk->id); + memcpy(bbrm[blk->id].out.regs, ra.m.regs, sizeof ra.m.regs); + imap_copy(&bbrm[blk->id].out.allocs, &ra.m.allocs); + dumpallocs("out", &ra.m); + + if (blk->s1 && blk->s1->phi.n) { + /* if the block is a predecessor to some phis, introduce use points + * for the corresponding arguments to each phi */ + struct block *s = blk->s1; + int predno = 0; + for (; predno < s->npred; ++predno) + if (blkpred(s, predno) == blk) + break; + assert(predno < s->npred); + for (int i = s->phi.n - 1; i >= 0; --i) { + struct instr *phi = &instrtab[s->phi.p[i]]; + union ref *arg = &phitab.p[phi->l.i][predno];; + use(&ra, blk, blk->ins.n, Ophi, phi->reg-1, arg, NOREF); + } + } + + curi = blk->ins.n - 1; for (int i = 0; i < 2; ++i) { if (!blk->jmp.arg[i].bits) break; /* do not allocate a reg for a cmp op used a branch argument, since it's a pseudo op */ if (blk->jmp.t == Jb && blk->jmp.arg[i].t == RTMP && oiscmp(instrtab[blk->jmp.arg[i].i].op)) break; - use(&ra, blk, blk->ins.n-1, (blk->jmp.t != Jb) - 1, + use(&ra, blk, curi, (blk->jmp.t != Jb) - 1, blk->jmp.t == Jret ? fn->abiret[i].reg : -1, &blk->jmp.arg[i], blk->jmp.arg[!i]); } - for (int i = blk->ins.n - 1; i >= 0; --i) { - int hint0 = -1, hint1 = -1; - ins = &instrtab[blk->ins.p[i]]; - if (ins->op != Ocopy && !ra.allocs[ins - instrtab].t && ins->skip) { /* unused */ - *ins = mkinstr(Onop, 0,); - continue; - } - def(&ra, ins, blk, i); - if (ins->op != Ocall) { - if (ins->op == Ocopy || ins->inplace) hint0 = ins->reg - 1; - if (ins->op == Omove) { - if (ins->l.t == RREG) hint1 = ins->l.i; - /* MOV Rx,Rx is used by isel to indicate a clobber, - * so it should be a def point for Rx but not a use point */ - if (ins->r.bits != ins->l.bits) - use(&ra, blk, i, ins->op, hint1, &ins->r, NOREF); - } else { - if (ins->r.t == RREG) { - use(&ra, blk, i, ins->op, hint0, &ins->r, NOREF); - use(&ra, blk, i, ins->op, hint0, &ins->l, ins->r); - } else { - if (ins->l.bits) use(&ra, blk, i, ins->op, hint0, &ins->l, ins->r); - if (ins->r.bits) use(&ra, blk, i, ins->op, hint1, &ins->r, NOREF); - } - } - } else { - struct call *call = &calltab.p[ins->r.i]; - struct bitset rspill[1] = {0}; - - fn->isleaf = 0; - - for (int r = mctarg->gpr0; r < mctarg->gpr0 + mctarg->ngpr; ++r) - if (!bstest(mctarg->rglob, r) && !bstest(mctarg->rcallee, r)) - bsset(rspill, r); - for (int r = mctarg->fpr0; r < mctarg->fpr0 + mctarg->nfpr; ++r) - if (!bstest(mctarg->rglob, r) && !bstest(mctarg->rcallee, r)) - bsset(rspill, r); - - if (call->abiret[0].ty.bits) bsclr(rspill, call->abiret[0].reg); - if (call->abiret[1].ty.bits) bsclr(rspill, call->abiret[1].reg); - for (uint r = 0; bsiter(&r, rspill, 1); ++r) { - spill(&ra, r, blk, i); - } - for (int j = 0; j < call->narg; ++j) { - short reg = call->abiarg[j].reg; - if (reg >= 0) { - forcetake(&ra, reg, mkref(RREG, reg), blk, i); - } - } - - use(&ra, blk, i, ins->op, hint0, &ins->l, NOREF); - } - if (ins->op == Ocopy && !ins->reg) - *ins = mkinstr(Onop, 0,); - if (ins->inplace && ins->reg) { - assert(ins->l.t == RREG); - if (ins->reg-1 != ins->l.i) { - /* an in-place operation where the destination does not - * match the first operand, so we need to add a move */ - insertinstr(blk, i, mkmove(insrescls(*ins), ins->reg-1, ins->l.i)); - ins->l.i = ins->reg-1; - } - } + for (; curi >= 0; --curi) { + struct instr *ins = &instrtab[blk->ins.p[curi]]; + if (ins->op == Ocall) fn->isleaf = 0; + doins(&ra, blk, ins, curi); } + for (int i = blk->phi.n - 1; i >= 0; --i) { - union ref *phi; - ins = &instrtab[blk->phi.p[i]]; - assert(ins->op == Ophi); - phi = phitab.p[ins->l.i]; - if (ins->reg) { - /* introduce necessary moves in each pred, - * XXX this doesn't work for backwards branches */ - for (int i = 0; i < blk->npred; ++i) { - struct instr mov = mkinstr(Omove, insrescls(*ins), mkref(RREG, ins->reg-1), phi[i]); - insertinstr(blkpred(blk, i), blkpred(blk, i)->ins.n, mov); - } - } - def(&ra, ins, NULL, 0); + struct instr *phi = &instrtab[blk->phi.p[i]]; + def(&ra, phi, blk, -1); + } + + memcpy(bbrm[blk->id].in.regs, ra.m.regs, sizeof ra.m.regs); + imap_copy(&bbrm[blk->id].in.allocs, &ra.m.allocs);; + } while ((blk = blk->lprev) != last); + + if (1&&ccopt.dbg.r) { + efmt("<< regalloc before resolve\n"); + irdump(fn); + } + + /* resolve allocation mismatches across each edge */ + blk = last = fn->entry->lprev; + do { + if (blk->id < 0) continue; + if (blk->s1) resolve(fn, bbrm, blk, blk->s1); + if (blk->s2) resolve(fn, bbrm, blk, blk->s2); + } while ((blk = blk->lprev) != last); + + /* parallel copies for each phi */ + blk = last = fn->entry->lprev; + do { + if (blk->id < 0) continue; + for (int i = 0; i < blk->npred; ++i) { + if (blkpred(blk, i)->id < 0) continue; + lowerphis(fn, bbrm, blkpred(blk, i), blk); } } while ((blk = blk->lprev) != last); - do vfree(&blk->phi); while ((blk = blk->lprev) != last); + /* final cleanup */ + id = 0; + blk = fn->entry; + do { + bool allnops = 1; + blk->id = id++; + for (int i = 0; i < blk->ins.n; ++i) { + struct instr *ins = &instrtab[blk->ins.p[i]]; + if (!ins->reg && insrescls(*ins) && ins->op != Omove && !oiscmp(ins->op)) { + /* dead */ + Nop: + *ins = mkinstr(Onop,0,); + } else if (ins->op == Omove && ins->r.t == RREG && ins->l.i == ins->r.i) { + goto Nop; + } else if (ins->op == Ocopy && ins->l.t == RREG && ins->reg-1 == ins->l.i) { + goto Nop; + } else if (ins->inplace && ins->l.t == RREG && ins->reg-1 != ins->l.i) { + /* fixup in-place (two-address) instructions */ + allnops = 0; + insertinstr(blk, i++, mkmove(ins->cls, ins->reg-1, ins->l.i)); + ins->l.i = ins->reg-1; + } else if (ins->op != Onop) allnops = 0; + } + + /* remove no-op blocks with only 1 pred + 1 succ OR 1 pred w/ 1 succ + 0 succ*/ + if (allnops && blk->npred == 1 && !blk->s2) { + struct block *p = blkpred(blk, 0); + if (!p->s2 && !blk->s1) { + /* simplify: + * + * @p: + * ... + * b @blk + * @blk: + * NOP + * ret + */ + assert(p->s1 == blk); + p->jmp.t = Jret; + p->s1 = NULL; + DelBlk: + vfree(&blk->phi); + vfree(&blk->ins); + blk->lnext->lprev = blk->lprev; + blk->lprev->lnext = blk->lnext; + --id; + } else if (blk->s1) { + /* simplify: + * + * @p: + * ... + * b %x, @blk, @other + * @blk: + * NOP + * b @next + */ + struct block *next = blk->s1; + if (p->s1 == blk) p->s1 = next; + else if (p->s2 == blk) p->s2 = next; + else continue; + goto DelBlk; + } + } + } while ((blk = blk->lnext) != fn->entry); + fn->nblk = id; + + blk = fn->entry; + do vfree(&blk->phi); while ((blk = blk->lnext) != fn->entry); fn->stksiz += ra.maxstk*8; if (fn->stksiz > 1<<24) error(NULL, "'%s' stack frame too big", fn->name); @@ -396,9 +758,13 @@ regalloc(struct function *fn) efmt("<< After regalloc >>\n"); irdump(fn); } - bscopy(fn->regusage, globusage, 1); - free(ra.allocs); - vfree(&ra.freestk); + for (int i = 0; i < nbbrm; ++i) { + imap_free(&bbrm[i].in.allocs); + imap_free(&bbrm[i].out.allocs); + } + imap_free(&ra.m.allocs); + free(bbrm); + fn->regusage = globusage; } /* vim:set ts=3 sw=3 expandtab: */ @@ -1,6 +1,29 @@ #include "common.h" #include "ir.h" +/* require use, keeps use */ +void +copyopt(struct function *fn) +{ + struct block *blk = fn->entry; + + do { + for (int i = 0; i < blk->ins.n; ++i) { + struct use *use, *uend; + union ref var = mkref(RTMP, blk->ins.p[i]); + struct instr *ins = &instrtab[var.i]; + + if (ins->op == Ocopy) { + assert(ins->l.t != RREG); + + replcuses(var, ins->l); + *ins = mkinstr(Onop,0,); + deluses(var.i); + } + } + } while ((blk = blk->lnext) != fn->entry); +} + void filluses(struct function *fn) { @@ -1,6 +1,6 @@ unsigned fib(unsigned x) { unsigned r = 0, q = 1; - while (x--) { + while (x-- > 1) { unsigned s = r + q; r = q; q = s; @@ -8,12 +8,18 @@ unsigned fib(unsigned x) { return q; } +unsigned fibr(unsigned x) { + if (x < 2) return x; + return fibr(x-1) + fibr(x-2); +} + int atoi(const char *); int printf(const char *, ...); int main(int argc, char **argv) { unsigned n = argv[1] ? atoi(argv[1]) : 10; - printf("fib(%u) = %u\n", n, fib(n)); + printf("fib(%u) = %u\n", n, fib(n)); + printf("fibr(%u) = %u\n", n, fibr(n)); } /* vim:set ts=3 sw=3 expandtab: */ |