diff options
| -rw-r--r-- | amd64/emit.c | 26 | ||||
| -rw-r--r-- | amd64/isel.c | 2 | ||||
| -rw-r--r-- | amd64/sysv.c | 4 | ||||
| -rw-r--r-- | common.h | 42 | ||||
| -rw-r--r-- | ir.c | 70 | ||||
| -rw-r--r-- | ir.h | 15 | ||||
| -rw-r--r-- | irdump.c | 11 | ||||
| -rw-r--r-- | op.def | 1 | ||||
| -rw-r--r-- | optmem.c | 23 | ||||
| -rw-r--r-- | regalloc.c | 746 | ||||
| -rw-r--r-- | ssa.c | 23 | ||||
| -rw-r--r-- | test/fib.c | 10 |
12 files changed, 748 insertions, 225 deletions
diff --git a/amd64/emit.c b/amd64/emit.c index f354262..5b3a298 100644 --- a/amd64/emit.c +++ b/amd64/emit.c @@ -162,6 +162,7 @@ enum operpat { enum operenc { EN_R = 1, /* reg with /r */ EN_RR, /* reg, reg with /r */ + EN_RRX, /* reg, reg with /r (inverted) */ EN_MR, /* mem, reg with /r */ EN_RM, /* reg, mem with /r */ EN_M, /* mem */ @@ -249,6 +250,17 @@ encode(uchar **pcode, const struct desc *tab, int ntab, enum irclass k, struct o D(opc, nopc); B(0300 | (dst.reg & 7) << 3 | (src.reg & 7)); break; + case EN_RRX: /* mod = 11; reg = src; rm = dst */ + rex |= (src.reg >> 3) << 2; /* REX.R */ + rex |= (dst.reg >> 3) << 0; /* REX.B */ + if (rex) B(0x40 | rex); + else if (en->r8 && in_range(src.reg, RSP, RDI)) { + /* /r8 needs REX to encode SP,BP,SI,DI (otherwise -> AH..BH) */ + B(0x40); + } + D(opc, nopc); + B(0300 | (src.reg & 7) << 3 | (dst.reg & 7)); + break; case EN_MR: mem = dst; reg = src.reg; @@ -370,6 +382,8 @@ static void Xmov(uchar **pcode, enum irclass k, struct oper dst, struct oper src {8, PFPR, PFPR, "\xF2\x0F\x10", EN_RR}, /* MOVSD xmm, xmm */ {8, PFPR, PMEM, "\xF2\x0F\x10", EN_RM}, /* MOVSD xmm, m64 */ {8, PMEM, PFPR, "\xF2\x0F\x11", EN_MR}, /* MOVSS m64, xmm */ + {4|8, PGPR, PFPR, "\x66\x0F\x6E", EN_RRX}, /* MOVD/Q r64/32, xmm */ + {4|8, PFPR, PGPR, "\x66\x0F\x6E", EN_RR}, /* MOVD/Q xmm, r64/32 */ }; static const uchar k2off[] = { [KI4] = 0, @@ -583,6 +597,10 @@ static void gencopy(uchar **pcode, enum irclass cls, struct block *blk, int curi, struct oper dst, union ref val) { assert(dst.t == OREG); + if (val.bits == UNDREF.bits) { + /* can be generated by ssa construction, since value is undefined no move is needed */ + return; + } if (val.t == RADDR) { /* this is a LEA, but maybe it can be lowered to a 2-address instruction, * which may clobber flags */ @@ -824,12 +842,12 @@ emitbranch(uchar **pcode, struct block *blk) static void calleesave(int *npush, uchar **pcode, struct function *fn) { - if (bstest(fn->regusage, RBX)) { + if (rstest(fn->regusage, RBX)) { Xpush(pcode, RBX); ++*npush; } for (int r = R12; r <= R15; ++r) - if (bstest(fn->regusage, r)) { + if (rstest(fn->regusage, r)) { Xpush(pcode, r); ++*npush; } @@ -839,9 +857,9 @@ static void calleerestore(uchar **pcode, struct function *fn) { for (int r = R15; r >= R12; --r) - if (bstest(fn->regusage, r)) + if (rstest(fn->regusage, r)) Xpop(pcode, r); - if (bstest(fn->regusage, RBX)) Xpop(pcode, RBX); + if (rstest(fn->regusage, RBX)) Xpop(pcode, RBX); } /* align code using NOPs */ diff --git a/amd64/isel.c b/amd64/isel.c index cb87b7d..07115ac 100644 --- a/amd64/isel.c +++ b/amd64/isel.c @@ -187,7 +187,7 @@ aadd(struct addr *addr, union ref r) } else if (r.t == RREG) { /* temporaries are single assignment, but register aren't, so they can't be * * safely hoisted into an address value, unless they have global lifetime */ - if (!bstest(mctarg->rglob, r.i)) return 0; + if (!rstest(mctarg->rglob, r.i)) return 0; Ref: if (!addr->base.bits) addr->base = r; else if (!addr->index.bits) addr->index = r; diff --git a/amd64/sysv.c b/amd64/sysv.c index 9c7bc15..6c5b67c 100644 --- a/amd64/sysv.c +++ b/amd64/sysv.c @@ -141,8 +141,8 @@ const struct mctarg t_amd64_sysv = { .gpr0 = RAX, .ngpr = R15 - RAX + 1, .bpr = RBP, .fpr0 = XMM0, .nfpr = XMM15 - XMM0 + 1, - .rcallee = {{1<<RBX | 1<<R12 | 1<<R13 | 1<<R14 | 1<<R15}}, - .rglob = {{1<<RSP | 1<<RBP}}, + .rcallee = 1<<RBX | 1<<R12 | 1<<R13 | 1<<R14 | 1<<R15, + .rglob = 1<<RSP | 1<<RBP, .rnames = amd64_rnames, .objkind = OBJELF, .isa = ISamd64, @@ -87,6 +87,18 @@ ilog2(uint x) { /* assumes x is a power of 2 */ return n; #endif } +static inline uint +lowestsetbit(uvlong x) +{ +#if HAS_BUILTIN(ctz) + return __builtin_ctzll(x); +#else + int i = 0; + for (uvlong mask = 1;; ++i, mask <<= 1) + if (x & mask) + return i; +#endif +} #define aisprint(c) in_range(c, ' ', '~') #define aisdigit(c) in_range(c, '0', '9') @@ -388,12 +400,24 @@ void imap_init_(struct imapbase *, void **v, uint vsiz, uint N); int imap_get_(struct imapbase *, short k); int imap_set_(struct imapbase *, void **v, uint vsiz, short k); #define imap_free(m) (free((m)->mb.k), memset((m), 0, sizeof *(m))) -#define imap_init(m, N) (imap_free(m), imap_init_(&(m)->mb, (void **)&(m)->v, sizeof*(m)->v, (N)) +#define imap_init(m, N) (imap_free(m), imap_init_(&(m)->mb, (void **)&(m)->v, sizeof*(m)->v, (N))) #define imap_clear(m) ((m)->mb.bs ? bszero((m)->mb.bs, BSSIZE((m)->mb.N)) : (void)0, \ (m)->mb.n = 0) #define imap_get(m, k) (((m)->tmp = imap_get_(&(m)->mb, k)) < 0 ? NULL : &(m)->v[(m)->tmp]) #define imap_set(m, k, x) ((m)->tmp = imap_set_(&(m)->mb, (void **)&(m)->v, sizeof*(m)->v, k), \ (m)->v[(m)->tmp] = (x), &(m)->v[(m)->tmp]) +#define imap_each(m,kx,pvx) \ + for (int _i = 0; _i < (m)->mb.N && ((kx) = (m)->mb.k[_i], (pvx) = &(m)->v[_i], 1); ++_i) \ + if (bstest((m)->mb.bs, _i)) +#define imap_copy(dst,src) do { \ + size_t N = (src)->mb.N; \ + if (!N) break; \ + (dst)->mb.n = (src)->mb.n; \ + imap_init((dst), N); \ + memcpy((dst)->mb.k, (src)->mb.k, \ + N*(sizeof*(src)->mb.k + sizeof*(src)->v) + BSSIZE(N)*sizeof(struct bitset)); \ +} while (0) + struct pmapbase { void **k; uint n, N; }; /* map of non-null ptr -> T */ @@ -445,6 +469,22 @@ bsiter(uint *i, struct bitset bs[/*siz*/], uint siz) return 0; } +typedef uvlong regset; +#define rsset(S, r) ((S) | 1ull << (r)) +#define rsclr(S, r) ((S) & ~(1ull << (r))) +#define rstest(S, r) ((S) >> (r) & 1) +#define rsminus(A, B) ((A) & ~(B)) +#define rsand(A, B) ((A) & (B)) +#define rsunion(A, B) ((A) | (B)) +static inline bool +rsiter(int *i, uvlong rs) +{ + uvlong mask = -(1ull << *i); + if ((rs & mask) == 0) return 0; + *i = lowestsetbit(rs & mask); + return 1; +} + /********/ /** IO **/ /********/ @@ -222,7 +222,7 @@ mkaddr(struct addr addr) return mkref(RADDR, addaddr(&addr)); } -static inline void +void addpred(struct block *blk, struct block *p) { if (blk->npred == 0) { @@ -239,6 +239,46 @@ addpred(struct block *blk, struct block *p) xbpush(&blk->_pred, &blk->npred, p); } +static void +delpred(struct block *blk, struct block *p) +{ + for (int i = 0; i < blk->npred; ++i) { + if (blkpred(blk, i) == p) { + for (int k = i; k < blk->npred - 1; ++k) { + blkpred(blk, k) = blkpred(blk, k + 1); + } + --blk->npred; + return; + } + } + assert(0&&"blk not in p"); +} + +struct block * +insertblk(struct function *fn, struct block *pred, struct block *subst) +{ + struct block *new = newblk(fn); + struct block **s = pred->s1 == subst ? &pred->s1 : &pred->s2; + assert(*s == subst); + new->lnext = pred->lnext; + new->lprev = pred; + pred->lnext->lprev = new; + pred->lnext = new; + *s = new; + new->jmp.t = Jb; + new->s1 = subst; + addpred(new, pred); + for (int i = 0; i < subst->npred; ++i) { + if (blkpred(subst, i) == pred) { + blkpred(subst, i) = new; + ++fn->nblk; + return new; + } + } + assert(0); +} + + static int newinstr(void) { @@ -400,6 +440,14 @@ delphi(struct block *blk, int idx) --blk->phi.n; } +void +fillblkids(struct function *fn) +{ + int i = 0; + struct block *blk = fn->entry; + do blk->id = i++; while ((blk = blk->lnext) != fn->entry); +} + /** IR builders **/ struct block * @@ -515,16 +563,20 @@ void irfini(struct function *fn) { extern int nerror; - if (!nerror) { - abi0(fn); - mem2reg(fn); - lowerintrin(fn); - mctarg->isel(fn); - regalloc(fn); - if (!ccopt.dbg.any) - mctarg->emit(fn); + if (nerror) { + freefn(fn); + return; } + abi0(fn); + lowerintrin(fn); + mem2reg(fn); + copyopt(fn); + mctarg->isel(fn); + regalloc(fn); + if (!ccopt.dbg.any) + mctarg->emit(fn); + freefn(fn); } @@ -131,6 +131,8 @@ struct block { struct { uchar t; union ref arg[2]; } jmp; }; +#define blkpred(blk, i) 0[(blk)->npred < 2 ? &(blk)->_pred0 : &(blk)->_pred[i]] + enum { USERJUMP = 0xFFFF }; struct use { struct block *blk; ushort u; }; @@ -149,7 +151,7 @@ struct function { ushort nabiarg, nabiret; bool globl; bool isleaf; - struct bitset regusage[1]; + regset regusage; }; enum objkind { OBJELF }; @@ -161,8 +163,8 @@ struct mctarg { bpr, /* frame/base pointer reg */ fpr0, /* first fpr */ nfpr; /* fpr count */ - struct bitset rcallee[1], /* callee-saved */ - rglob[1]; /* globally live (never used for regalloc) */ + regset rcallee, /* callee-saved */ + rglob; /* globally live (never used for regalloc) */ const char (*rnames)[6]; enum objkind objkind; enum mcisa isa; @@ -226,6 +228,9 @@ void conputdat(struct irdat *, uint off, enum typetag t, const void *dat); union ref mkcallarg(union irtype ret, uint narg, int vararg); #define mkintrin(B, C, N) mkinstr(Ointrin, C, {.t=RICON,B}, mkcallarg((union irtype){{0}},N,-1)) union ref mkaddr(struct addr); +void addpred(struct block *blk, struct block *p); + +struct block *insertblk(struct function *, struct block *pred, struct block *subst); void adduse(struct block *ublk, int ui, union ref r); union ref insertinstr(struct block *, int idx, struct instr); union ref insertphi(struct block *, enum irclass cls); @@ -233,7 +238,7 @@ void replcuses(union ref from, union ref to); void deluses(int ins); void delinstr(struct block *, int idx); void delphi(struct block *, int idx); -#define blkpred(blk, i) 0[(blk)->npred < 2 ? &(blk)->_pred0 : &(blk)->_pred[i]] +void fillblkids(struct function *); /* IR builder functions */ union ref addinstr(struct function *, struct instr); @@ -247,7 +252,7 @@ void putreturn(struct function *, union ref r0, union ref r1); void irdump(struct function *); void filluses(struct function *); -void freeuses(struct use *, int n); +void copyopt(struct function *); void abi0(struct function *); void abi0_call(struct function *, struct instr *, struct block *blk, int *curi); @@ -80,10 +80,9 @@ dumpref(enum op o, union ref ref) efmt("??"); break; case RTMP: + efmt("%%%d", ref.i); if (instrtab[ref.i].reg) - efmt("%s", mctarg->rnames[instrtab[ref.i].reg - 1]); - else - efmt("%%%d", ref.i); + efmt("(%s)", mctarg->rnames[instrtab[ref.i].reg-1]); break; case RREG: efmt("%s", mctarg->rnames[ref.i]); @@ -168,7 +167,7 @@ dumpinst(const struct instr *ins) if (ins->reg) { if (cls) efmt("%s ", clsname[cls]); - efmt("%s = ", mctarg->rnames[ins->reg - 1]); + efmt("(%%%d)%s = ", ins - instrtab, mctarg->rnames[ins->reg - 1]); } else if (cls) { efmt("%s %%%d", clsname[cls], ins - instrtab); efmt(" = "); @@ -198,7 +197,9 @@ dumpblk(struct function *fn, struct block *blk) struct instr *phi = &instrtab[blk->phi.p[i]]; union ref *refs = phitab.p[phi->l.i]; assert(phi->op == Ophi); - efmt(" %s %%%d = phi ", clsname[phi->cls], blk->phi.p[i]); + efmt(" %s ", clsname[phi->cls]); + if (!phi->reg) efmt("%%%d = phi ", blk->phi.p[i]); + else efmt("(%%%d)%s = phi ", phi - instrtab, mctarg->rnames[phi->reg-1]); for (int i = 0; i < blk->npred; ++i) { if (i) efmt(", "); efmt("@%d ", blkpred(blk, i)->id); @@ -68,6 +68,7 @@ _(call, 2) _(call2r, 1) _(intrin, 2) _(phi, 1) +_(swap, 2) /* machine-specific instructions */ _(xinc, 1) _(xdec, 1) @@ -25,7 +25,7 @@ struct pendingphi { ushort var, phi; }; struct ssabuilder { vec_of(struct pendingphi) *pendingphis; /* map of block to list of incomplete phis in block */ imap_of(union ref *) curdefs; /* map of var to (map of block to def of var) */ - int nsealed; /* num of sealed blocks */ + int lastvisit; int nblk; }; @@ -90,13 +90,24 @@ writevar(struct ssabuilder *sb, int var, struct block *blk, union ref val) (*pcurdefs)[blk->id] = val; } +static bool +issealed(struct ssabuilder *sb, struct block *blk) +{ + /* consider block sealed if it has been visited or if no backbranches flow to it */ + if (blk->id < sb->lastvisit) return 1; + for (int i = 0; i < blk->npred; ++i) + if (blkpred(blk, i)->id > blk->id) + return 0; + return 1; +} + + static union ref readvarrec(struct ssabuilder *sb, int var, enum irclass cls, struct block *blk) { union ref val; assert(blk->npred > 0); - if (blk->id >= sb->nsealed) { - /* unsealed block */ + if (!issealed(sb, blk)) { val = insertphi(blk, cls); vpush(&sb->pendingphis[blk->id], ((struct pendingphi) { var, val.i })); } else if (blk->npred == 1) { @@ -171,7 +182,7 @@ mem2reg(struct function *fn) writevar(&sb, var, use->blk, m->r); *m = mkinstr(Onop,0,); } else if (oisload(m->op)) { - union ref val = readvar(&sb, var, k, use->blk); + union ref val = readvar(&sb, var, k = m->cls, use->blk); if (!val.bits) { /* var is used uninitialized */ /* TODO emit diagnostic */ /* load some garbage */ @@ -190,8 +201,8 @@ mem2reg(struct function *fn) } /* seal block */ - assert(sb.nsealed == blk->id); - ++sb.nsealed; + assert(sb.lastvisit == blk->id); + ++sb.lastvisit; pending = (void *) &sb.pendingphis[blk->id]; for (int i = 0; i < pending->n; ++i) { addphiargs(&sb, pending->p[i].var, instrtab[pending->p[i].phi].cls, blk, @@ -1,9 +1,12 @@ +#include "common.h" #include "ir.h" -static struct bitset globusage[1]; -static struct bitset floatregs[1]; +/* Implements reverse linear register allocation, quite ad-hoc right now */ -#define isfpr(reg) bstest(floatregs, (reg)) +static regset gpregset, fpregset; +static regset globusage; + +#define isfpr(reg) in_range((reg), mctarg->fpr0, mctarg->fpr0 + mctarg->nfpr - 1) #define isgpr(reg) (!isfpr(reg)) enum { ADEAD, AREG, ASTACK } ; @@ -12,40 +15,91 @@ struct alloc { ushort t : 2, a : 14; }; #define areg(r) ((struct alloc) { AREG, (r) }) #define astack(s) ((struct alloc) { ASTACK, (s) }) +enum { MAXSPILL = 128 }; + +struct rmap { + regset free; /* free registers */ + regset blocked; /* registers allocated to themselves (from e.g. isel reg constraints, abi) */ + ushort regs[MAXREGS]; /* map reg -> temp holding reg */ + struct bitset freestk[BSSIZE(MAXSPILL)]; /* free stack slots */ + imap_of(struct alloc) allocs; /* map tmpidx -> allocation */ +}; + struct rega { - union ref regs[MAXREGS]; /* map reg -> value holding reg */ - struct alloc *allocs; /* map tmpidx -> allocation */ - vec_of(ushort) freestk; /* available stack slots */ - int nfreegpr, nfreefpr; + struct function *fn; + struct rmap m; int maxstk, stktop; }; static vec_of(union ref *) stkslotrefs; -static void +static union ref addstkslotref(union ref inst) { vpush(&stkslotrefs, &instrtab[inst.i].l); + return inst; } static int allocreg(struct rega *ra, enum irclass cls, union ref ref, uvlong excl); #if 0 #define DBG efmt +static void +dumpallocs(const char *s, struct rmap *m) +{ + int var; + struct alloc *alloc; + + DBG("%s map [", s); + imap_each(&m->allocs, var, alloc) { + if (!alloc->t) continue; + DBG(" %%%d -> %c%d,", var, "XRS"[alloc->t], alloc->a); + } + DBG("]\n"); +} #else #define DBG(...) ((void)0) +#define dumpallocs(...) #endif static void freereg(struct rega *ra, int r) { - ra->regs[r] = NOREF; - if (isfpr(r)) ++ra->nfreefpr; - else ++ra->nfreegpr; + ra->m.free = rsset(ra->m.free, r); + ra->m.blocked = rsclr(ra->m.blocked, r); } static void take(struct rega *ra, int reg, union ref ref); +static int +allocstk(struct rega *ra, int var) +{ + int s = -1; + + for (int i = 0; i < BSSIZE(MAXSPILL); ++i) { + if (~ra->m.freestk[i].u == 0) continue; + s = i*64 + lowestsetbit(ra->m.freestk[i].u); + } + if (s != -1) { + bsclr(ra->m.freestk, s); + if (ra->stktop < s) ra->stktop = s+1; + } else { + s = ra->stktop++; + } + if (ra->maxstk < s+1) ra->maxstk = s+1; + (void)imap_set(&ra->m.allocs, var, astack(s)); + return s; +} + +static void +freestk(struct rega *ra, int slot) +{ + if (slot < MAXSPILL) + bsset(ra->m.freestk, slot); + else if (slot == ra->stktop - 1) + --ra->stktop; +} + static void def(struct rega *ra, struct instr *ins, struct block *blk, int curi) { @@ -54,14 +108,16 @@ def(struct rega *ra, struct instr *ins, struct block *blk, int curi) if (ins->op != Omove && ins->op != Ocall) { var = ins - instrtab; DBG("def %%%d\n",var); - alloc = &ra->allocs[var]; - if (alloc->t == ADEAD) { + alloc = imap_get(&ra->m.allocs, var); + if (!alloc || alloc->t == ADEAD) { return; } else if (alloc->t == AREG) { reg = alloc->a; DBG("-- free %s for %%%d\n", mctarg->rnames[alloc->a], var); - assert(ra->regs[reg].bits == mkref(RTMP, var).bits); - } else if (alloc->t == ASTACK) { + assert(!rstest(ra->m.free, reg)); + assert(ra->m.regs[reg] == var); + ins->reg = reg+1; + } else if (alloc->t == ASTACK) { /* unspill, insert 'store [slot], reg' */ struct alloc astk = *alloc; struct instr store; @@ -69,26 +125,27 @@ def(struct rega *ra, struct instr *ins, struct block *blk, int curi) if ((ins->op == Ocopy || ins->inplace) && ins->l.t == RREG) { int hint = ins->l.i; - if (!ra->regs[hint].bits) { + if (rstest(ra->m.free, hint)) { take(ra, reg = hint, mkref(RTMP, var)); - assert(ra->regs[reg].bits == mkref(RTMP, var).bits); + assert(ra->m.regs[reg] == var && !rstest(ra->m.free, hint)); } } if (reg < 0) reg = allocreg(ra, insrescls(*ins), mkref(RTMP, var), 0); store = mkinstr(Ostore1 + ilog2(cls2siz[insrescls(*ins)]), 0, mkref(RICON, astk.a*8), mkref(RREG, reg)); - DBG("-- unspill %%%d s%d -> %s\n", var, astk.a, mctarg->rnames[ins->reg+1]); + DBG("-- unspill %%%d s%d -> %s\n", var, astk.a, mctarg->rnames[reg]); addstkslotref(insertinstr(blk, ++curi, store)); - vpush(&ra->freestk, astk.a); + freestk(ra, astk.a); ins->reg = reg+1; def(ra, ins, blk, curi); /* and free the reg again */ return; } - *alloc = afree(); + if (alloc) *alloc = afree(); } else if (ins->op == Omove) { + assert(ins->l.t == RREG); /* move to a register */ + assert(rstest(ra->m.blocked, ins->l.i)); reg = ins->l.i; - assert(ins->l.t == RREG); DBG("-- free %s\n", mctarg->rnames[ins->l.i]); } else { struct call *call = &calltab.p[ins->r.i]; @@ -104,68 +161,58 @@ def(struct rega *ra, struct instr *ins, struct block *blk, int curi) static void take(struct rega *ra, int reg, union ref ref) { DBG("-- take %s for %c%d\n", mctarg->rnames[reg], "R%"[ref.t==RTMP], ref.i); - assert(!ra->regs[reg].bits && "taken"); - if (ref.t == RTMP) - ra->allocs[ref.i] = areg(reg); - ra->regs[reg] = ref; - assert(ra->regs[reg].bits); - bsset(globusage, reg); - if (isfpr(reg)) --ra->nfreefpr; - else --ra->nfreegpr; + assert(rstest(ra->m.free, reg) && "taken"); + if (ref.t == RTMP) { + (void)imap_set(&ra->m.allocs, ref.i, areg(reg)); + ra->m.regs[reg] = ref.i; + } else { + ra->m.blocked = rsset(ra->m.blocked, reg); + } + ra->m.free = rsclr(ra->m.free, reg); + globusage = rsset(globusage, reg); } static int allocreg(struct rega *ra, enum irclass cls, union ref ref, uvlong excl) { - int r0, rend, reg; + regset rs; + int reg; assert(cls); if (kisint(cls)) { - r0 = mctarg->gpr0; - rend = mctarg->gpr0 + mctarg->ngpr; + rs = gpregset; } else if (kisflt(cls)) { - r0 = mctarg->fpr0; - rend = mctarg->fpr0 + mctarg->nfpr; + rs = fpregset; } else assert(0); - for (reg = r0; reg < rend; ++reg) { - if (bstest(mctarg->rglob, reg)) continue; - if (!(excl >> reg & 1) && !ra->regs[reg].bits) { - take(ra, reg, ref); - return reg; - } - } - assert(!"no more reg"); -} - -static int -allocstk(struct rega *ra, int var) -{ - int s; - - if (ra->freestk.n) { - s = ra->freestk.p[--ra->freestk.n]; - ra->allocs[var] = astack(s); - } else { - if (ra->stktop+1 >= ra->maxstk) ra->maxstk = ra->stktop+1; - s = ra->stktop++; - } - ra->allocs[var] = astack(s); - return s; + rs = rsminus(rs, mctarg->rglob); + rs = rsand(rs, ra->m.free); + rs = rsminus(rs, excl); + + assert(rs != 0 && "no more reg"); + if (rsminus(rs, mctarg->rcallee) != 0) + reg = lowestsetbit(rsminus(rs, mctarg->rcallee)); + else + reg = lowestsetbit(rs); + take(ra, reg, ref); + return reg; } static void spill(struct rega *ra, int reg, struct block *blk, int curi) { int var, s; struct instr load; + struct alloc *alloc; - if (!ra->regs[reg].bits) return; - var = ra->regs[reg].i; - assert(ra->regs[reg].bits == RTMP && *(ushort *)&ra->allocs[var] == *(ushort *)&areg(reg)); + if (rstest(ra->m.free, reg)) return; + var = ra->m.regs[reg]; + assert(!rstest(ra->m.blocked, reg)); + assert((alloc = imap_get(&ra->m.allocs, var)) && !memcmp(alloc, &areg(reg), sizeof *alloc)); s = allocstk(ra, var); DBG("-- spill %%%d %s -> s%d\n", var, mctarg->rnames[reg], s); instrtab[var].reg = 0; /* insert 'reg = load [slot]' */ - load = mkinstr(Oloads1 + 2*ilog2(cls2siz[insrescls(instrtab[var])]), insrescls(instrtab[var]), mkref(RICON, s*8)); + load = mkinstr(Oloads1 + 2*ilog2(cls2siz[insrescls(instrtab[var])]), + insrescls(instrtab[var]), mkref(RICON, s*8)); load.reg = reg+1; addstkslotref(insertinstr(blk, ++curi, load)); freereg(ra, reg); @@ -178,34 +225,39 @@ forcetake(struct rega *ra, int reg, union ref ref, struct block *blk, int curi) int var; struct alloc *alloc; - if (ra->regs[reg].bits == ref.bits) return; - if (!ra->regs[reg].bits) { + if (ref.t == RREG && rstest(ra->m.blocked, ref.i)) { + assert(!rstest(ra->m.free, reg)); + return; + } + if (ref.t == RTMP && !rstest(ra->m.free, reg) && ra->m.regs[reg] == ref.i) return; + if (rstest(ra->m.free, reg)) { take(ra, reg, ref); return; } - assert(ra->regs[reg].bits == RTMP); - var = ra->regs[reg].i; - alloc = &ra->allocs[var]; - assert(alloc->a == reg); + assert(!rstest(ra->m.blocked, ref.i)); + var = ra->m.regs[reg]; + alloc = imap_get(&ra->m.allocs, var); + assert(alloc && alloc->a == reg); *alloc = afree(); /* try to move temp to another register */ - if (isgpr(reg) ? ra->nfreegpr > 0 : ra->nfreefpr > 0) { + if (rsand(ra->m.free, isgpr(reg) ? gpregset : fpregset) != 0) { /* the register of the current instruction (if any) was already free'd (by def), so * we need to explictly exclude it from the pool of rename registers * e.g.: given 'R0 = copy R1'; if R1 => %x, we need to prevent renaming %x => R0 */ - uvlong excl = instrtab[blk->ins.p[curi]].reg; - int rename = allocreg(ra, isgpr(reg) ? KI4 : KF4, ra->regs[reg], excl ? 1ull<<(excl-1) : 0); + regset excl = instrtab[blk->ins.p[curi]].reg; + int rename = allocreg(ra, isgpr(reg) ? KI4 : KF4, mkref(RTMP, ra->m.regs[reg]), + excl ? rsset(0, excl-1) : 0); if (ccopt.dbg.r)DBG("-- rename %%%d %s -> %s\n", var, mctarg->rnames[reg], mctarg->rnames[rename]); /* introduce move from rename -> original (since we allocate backwards) */ insertinstr(blk, ++curi, mkmove(insrescls(instrtab[var]), reg, rename)); - instrtab[var].reg = rename+1; - ra->regs[rename] = mkref(RTMP, var); - bsset(globusage, rename); + ra->m.regs[rename] = var; + globusage = rsset(globusage, rename); *alloc = areg(rename); - ra->regs[reg].bits = 0; + ra->m.free = rsset(ra->m.free, reg); } else { spill(ra, reg, blk, curi); + ra->m.free = rsset(ra->m.free, reg); } take(ra, reg, ref); } @@ -216,6 +268,8 @@ use(struct rega *ra, struct block *blk, int curi, enum op op, int hint, union re { struct instr *ins; uvlong excl = other.t == RREG ? 1ull<<other.i : 0; + struct alloc *alloc; + int reg; if (ref->t == RADDR) { struct addr addr = addrht[ref->i]; @@ -224,167 +278,475 @@ use(struct rega *ra, struct block *blk, int curi, enum op op, int hint, union re *ref = mkaddr(addr); return; } else if (ref->t == RREG) { - assert(!(excl >> hint & 1)); + assert(hint<0 || !rstest(excl, hint)); forcetake(ra, ref->i, *ref, blk, curi); } if (ref->t != RTMP) return; ins = &instrtab[ref->i]; if (!ins->cls) return; - if (!ins->reg) { + if (!(alloc = imap_get(&ra->m.allocs, ref->i)) || alloc->t != AREG) { int s = -1; - if (ra->allocs[ref->i].t == ASTACK) - s = ra->allocs[ref->i].a; + if (alloc && alloc->t == ASTACK) { + /* ensure spill isn't overwritten by dest + * e.g. in R0 = add %s, 7 => can't spill %s to R0 */ + struct instr *use = &instrtab[blk->ins.p[curi]]; + s = alloc->a; + if (use->reg) excl = rsset(excl, use->reg-1); + } assert(ins->op != Ocall); if (ins->r.t == RREG && ins->inplace) excl |= 1ull<<ins->r.i; - if ((hint == -1 || ra->regs[hint].bits) && ins->op == Ocopy && ins->l.t == RREG) + if ((hint == -1 || !rstest(ra->m.free, hint)) && ins->op == Ocopy && ins->l.t == RREG) /* for '%x = copy Rx', hint %x to use Rx */ hint = ins->l.i; - if (hint != -1 && !(excl >> hint & 1) && !ra->regs[hint].bits) { + + if (hint != -1 && !rstest(excl, hint) && rstest(ra->m.free, hint)) { take(ra, hint, *ref); - ins->reg = hint + 1; + reg = hint; } else { - ins->reg = allocreg(ra, insrescls(*ins), *ref, excl) + 1; + reg = allocreg(ra, insrescls(*ins), *ref, excl); } + if (s >= 0) { /* unspill, insert 'store [slot], reg' */ - DBG("-- unspill %%%d s%d -> %s\n", ref->i, s, mctarg->rnames[ins->reg-1]); + DBG("-- unspill %%%d s%d -> %s\n", ref->i, s, mctarg->rnames[reg]); struct instr store = mkinstr(Ostore1 + ilog2(cls2siz[insrescls(*ins)]), 0, mkref(RICON, s*8), - mkref(RREG, ins->reg-1)); + mkref(RREG, reg)); addstkslotref(insertinstr(blk, ++curi, store)); - vpush(&ra->freestk, s); + freestk(ra, s); } + } else { + reg = alloc->a; } - /* do not patch ref if it's cond branch arg, emit() wants to know what instr it is */ + /* do not patch ref if it's used in a phi + * or if it's cond branch arg, emit() wants to know what instr it is */ + if (op != Ophi) if (ref != blk->jmp.arg || blk->jmp.t != Jb) - *ref = mkref(RREG, ins->reg-1); + *ref = mkref(RREG, reg); +} + +static void +doins(struct rega *ra, struct block *blk, struct instr *ins, int curi) +{ + int hint0 = -1, hint1 = -1; + if (ins->op != Ocopy && !imap_get(&ra->m.allocs, ins - instrtab) && ins->skip) { /* unused */ + *ins = mkinstr(Onop, 0,); + return; + } + def(ra, ins, blk, curi); + if (ins->op != Ocall) { + if (ins->op == Ocopy || ins->inplace) hint0 = ins->reg - 1; + if (ins->op == Omove) { + if (ins->l.t == RREG) hint1 = ins->l.i; + /* MOV Rx,Rx is used by isel to indicate a clobber, + * so it should be a def point for Rx but not a use point */ + if (ins->r.bits != ins->l.bits) + use(ra, blk, curi, ins->op, hint1, &ins->r, NOREF); + } else { + if (ins->l.bits) use(ra, blk, curi, ins->op, hint0, &ins->l, ins->r); + if (ins->r.bits) use(ra, blk, curi, ins->op, hint1, &ins->r, NOREF); + } + } else { + struct call *call = &calltab.p[ins->r.i]; + regset rspill = rsminus(rsunion(gpregset, fpregset), rsunion(mctarg->rglob, mctarg->rcallee)); + + dumpallocs("CALL", &ra->m); + if (call->abiret[0].ty.bits) rspill = rsclr(rspill, call->abiret[0].reg); + if (call->abiret[1].ty.bits) rspill = rsclr(rspill, call->abiret[1].reg); + for (int r = 0; rsiter(&r, rspill); ++r) { + spill(ra, r, blk, curi); + } + for (int j = 0; j < call->narg; ++j) { + short reg = call->abiarg[j].reg; + if (reg >= 0) { + forcetake(ra, reg, mkref(RREG, reg), blk, curi); + } + } + + use(ra, blk, curi, ins->op, hint0, &ins->l, NOREF); + } +} + +enum pmstat { PMTOMOVE, PMMOVING, PMDONE }; +static struct pmove { + uchar k; + uchar stat; + struct alloc dst, src; +} pmove[MAXREGS]; +static int npmove; + +static void +pmadd(enum irclass k, struct alloc dst, struct alloc src) +{ + assert(npmove < MAXREGS); + pmove[npmove++] = (struct pmove) { k, PMTOMOVE, dst, src }; +} + +static void +emitmove(enum irclass k, struct alloc dst, struct alloc src, struct block *blk, int curi) +{ + struct instr mv; + if (dst.t == AREG && src.t == AREG) { + insertinstr(blk, curi, mkmove(k, dst.a, src.a)); + } else if (dst.t == ASTACK && src.t == AREG) { + mv = mkinstr(Ostore1+ilog2(cls2siz[k]), 0, mkref(RICON, dst.a), mkref(RREG, src.a)); + addstkslotref(insertinstr(blk, curi, mv)); + } else if (dst.t == AREG && src.t == ASTACK) { + switch (mv.cls = k) { + default: assert(0); + case KI4: mv.op = Oloads4; break; + case KI8: mv.op = Oloadi8; break; + case KPTR: mv.op = targ_64bit ? Oloadi8 : Oloads4; break; + case KF4: mv.op = Oloadf4; break; + case KF8: mv.op = Oloadf8; break; + } + mv.l = mkref(RICON, dst.a); + mv.reg = src.a; + addstkslotref(insertinstr(blk, curi, mv)); + } +} + +static int +pmrec(int i, struct block *blk, int curi, enum irclass *k) +{ + int j, c; + + if (!memcmp(&pmove[i].dst, &pmove[i].src, sizeof pmove->dst)) { + pmove[i].stat = PMDONE; + return -1; + } + + /* widen when necessary */ + assert(kisint(pmove[i].k) == kisint(*k)); + if (cls2siz[pmove[i].k] > cls2siz[*k]) + *k = pmove[i].k; + + for (j = 0; j < npmove; ++j) + if (!memcmp(&pmove[j].dst, &pmove[i].src, sizeof pmove->dst)) { + break; + } + if (j == npmove) goto Done; + switch (pmove[j].stat) { + default: assert(0); + case PMMOVING: + c = j; + Swap: + assert(pmove[i].src.t == AREG && pmove[i].dst.t == AREG); + insertinstr(blk, curi, + mkinstr(Oswap, *k, mkref(RREG, pmove[i].dst.a), mkref(RREG, pmove[i].src.a))); + break; + case PMTOMOVE: + pmove[i].stat = PMMOVING; + c = pmrec(j, blk, curi, k); + if (c == i) { + c = -1; + break; + } else if (c != -1) { + goto Swap; + } + /* fallthru */ + case PMDONE: + Done: + c = -1; + emitmove(*k, pmove[i].dst, pmove[i].src, blk, curi); + break; + } + + pmove[i].stat = PMDONE; + return c; +} + +static void +emitpm(struct block *blk) +{ + int curi = blk->ins.n; + for (int i = 0; i < npmove; ++i) + if (pmove[i].stat == PMTOMOVE) { + pmrec(i, blk, curi, &(enum irclass) { pmove[i].k }); + } +} + +struct bbrm { struct rmap in, out; }; + +static void +lowerphis(struct function *fn, struct bbrm *bbrm, struct block *blk, struct block *suc) +{ + int predno; + struct alloc *alloc; + struct rmap *out = &bbrm[blk->id].out; + struct block *n = NULL; + + if (!blk->s2) n = blk; + + for (predno = 0; predno < suc->npred; ++predno) + if (blkpred(suc, predno) == blk) + break; + assert(predno < suc->npred); + + npmove = 0; + /* ensure phi args go to the same reg as phi with parallel copies */ + for (int i = 0; i < suc->phi.n; ++i) { + struct instr *phi = &instrtab[suc->phi.p[i]]; + union ref *arg = &phitab.p[phi->l.i][predno]; + int regto, regfrom; + + if (arg->t == RREG) continue; + assert(arg->t == RTMP); + DBG("resolve phi @%d, @%d, %%%d <- %%%d\n", blk->id, suc->id, phi - instrtab, arg->i); + if (instrtab[arg->i].reg) { + regfrom = instrtab[arg->i].reg - 1; + DBG(" it had R%d\n", regfrom); + } else { + alloc = imap_get(&out->allocs, arg->i); + assert(alloc && alloc->t == AREG); + regfrom = alloc->a; + DBG(" found R%d\n", regfrom); + instrtab[arg->i].reg = regfrom+1; + } + if (phi->reg) { + regto = phi->reg - 1; + } else { + alloc = imap_get(&out->allocs, phi - instrtab); + assert(alloc && alloc->t == AREG); + regto = alloc->a; + phi->reg = regto+1; + } + DBG(" > phi move %c%d -> %c%d\n", " RS"[AREG], regfrom, " RS"[AREG], regto); + if (!n) n = insertblk(fn, blk, suc); + pmadd(phi->cls, areg(regto), areg(regfrom)); + } + if (n) emitpm(n); +} + +static void +resolve(struct function *fn, struct bbrm *bbrm, struct block *blk, struct block *suc) +{ + ushort var; + struct alloc *alloc, *alloc2; + struct rmap *out = &bbrm[blk->id].out, *in = &bbrm[suc->id].in; + struct rmap *in2 = &bbrm[blk->id].in; + struct block *n = NULL; + + DBG("resolve(@%d, @%d)\n",blk->id, suc->id); + + npmove = 0; + if (!blk->s2) n = blk; + imap_each(&in->allocs, var, alloc) { + if (alloc->t == ADEAD) continue; + if ((alloc2 = imap_get(&out->allocs, var)) && alloc2->t != ADEAD + && memcmp(alloc2, alloc, sizeof *alloc) != 0) { + DBG("resolve @%d, @%d, %%%d\n", blk->id, suc->id, var); + DBG(" > move %c%d -> %c%d\n", " RS"[alloc2->t], alloc2->a, " RS"[alloc->t], alloc->a); + if (!n) { + n = insertblk(fn, blk, suc); + n->id = blk->id; /* use same bbrm */ + } + pmadd(insrescls(instrtab[var]), *alloc, *alloc2); + } + (void)imap_set(&in2->allocs, var, *alloc); + if (!instrtab[var].reg) + instrtab[var].reg = alloc->a + 1; + } + if (n) emitpm(n); + + dumpallocs("in", in); + dumpallocs("out", out); } void regalloc(struct function *fn) { - extern int ninstr; - struct instr *ins; + int id; struct block *last = fn->entry->lprev, *blk; static union ref *stkslotrefsbuf[64]; - struct rega ra = {0}; - - fn->isleaf = 1; - - vinit(&stkslotrefs, stkslotrefsbuf, arraylength(stkslotrefsbuf)); - ra.allocs = xcalloc((ninstr*2 < MAXINSTR ? ninstr*2 : MAXINSTR) * sizeof(struct alloc)); - ra.nfreegpr = mctarg->ngpr - popcnt(mctarg->rglob->u); - ra.nfreefpr = mctarg->nfpr; - for (int i = 0; i < MAXREGS; ++i) { - if (in_range(i, mctarg->fpr0, mctarg->fpr0 + mctarg->nfpr - 1)) - bsset(floatregs, i); - if (bstest(mctarg->rglob, i)) - ra.regs[i] = mkref(RREG, i); + struct bbrm *bbrm; + int nbbrm; + struct rega ra = {fn}; + + /* setup */ + if (!fpregset || !gpregset) { + for (int i = 0; i < MAXREGS; ++i) { + if (in_range(i, mctarg->fpr0, mctarg->fpr0 + mctarg->nfpr - 1)) + fpregset = rsset(fpregset, i); + else if (in_range(i, mctarg->gpr0, mctarg->gpr0 + mctarg->ngpr - 1)) + gpregset = rsset(gpregset, i); + } } - bszero(globusage, 1); + ra.m.blocked = mctarg->rglob; + ra.m.free = rsminus(rsunion(gpregset, fpregset), ra.m.blocked); + memset(ra.m.freestk, 0xFF, sizeof ra.m.freestk); + globusage = 0; fn->stksiz = alignup(fn->stksiz, 8); + fn->isleaf = 1; + vinit(&stkslotrefs, stkslotrefsbuf, arraylength(stkslotrefsbuf)); - /* a dumb linear register allocator that visits instructions physically backwards - * starting at the end of the function, when encountering a use of a new - * temporary, it allocates a register for it. when encountering the definition - * of a temporary, it frees up its register - */ + /* generate copies for phi operands */ + blk = fn->entry; + do { + if (!blk->phi.n) continue; + for (int p = 0; p < blk->npred; ++p) { + struct block *n, *pred = blkpred(blk, p); + if (!pred->s2) { + /* pred only has 1 successor (blk), so insert move directly in it */ + n = pred; + } else { + n = insertblk(fn, pred, blk); + assert(n->jmp.t == Jb && n->s1 == blk); + } + for (int i = 0; i < blk->phi.n; ++i) { + int phi = blk->phi.p[i]; + union ref *args = phitab.p[instrtab[phi].l.i]; + args[p] = insertinstr(n, n->ins.n, mkinstr(Ocopy, instrtab[phi].cls, args[p])); + } + } + } while ((blk = blk->lnext) != fn->entry); + fillblkids(fn); + bbrm = xcalloc(sizeof *bbrm * (nbbrm = fn->nblk)); + + /* visit blocks in reverse, allocating registers in a greedy manner */ blk = last; do { + int curi; + DBG("do @%d\n", blk->id); + memcpy(bbrm[blk->id].out.regs, ra.m.regs, sizeof ra.m.regs); + imap_copy(&bbrm[blk->id].out.allocs, &ra.m.allocs); + dumpallocs("out", &ra.m); + + if (blk->s1 && blk->s1->phi.n) { + /* if the block is a predecessor to some phis, introduce use points + * for the corresponding arguments to each phi */ + struct block *s = blk->s1; + int predno = 0; + for (; predno < s->npred; ++predno) + if (blkpred(s, predno) == blk) + break; + assert(predno < s->npred); + for (int i = s->phi.n - 1; i >= 0; --i) { + struct instr *phi = &instrtab[s->phi.p[i]]; + union ref *arg = &phitab.p[phi->l.i][predno];; + use(&ra, blk, blk->ins.n, Ophi, phi->reg-1, arg, NOREF); + } + } + + curi = blk->ins.n - 1; for (int i = 0; i < 2; ++i) { if (!blk->jmp.arg[i].bits) break; /* do not allocate a reg for a cmp op used a branch argument, since it's a pseudo op */ if (blk->jmp.t == Jb && blk->jmp.arg[i].t == RTMP && oiscmp(instrtab[blk->jmp.arg[i].i].op)) break; - use(&ra, blk, blk->ins.n-1, (blk->jmp.t != Jb) - 1, + use(&ra, blk, curi, (blk->jmp.t != Jb) - 1, blk->jmp.t == Jret ? fn->abiret[i].reg : -1, &blk->jmp.arg[i], blk->jmp.arg[!i]); } - for (int i = blk->ins.n - 1; i >= 0; --i) { - int hint0 = -1, hint1 = -1; - ins = &instrtab[blk->ins.p[i]]; - if (ins->op != Ocopy && !ra.allocs[ins - instrtab].t && ins->skip) { /* unused */ - *ins = mkinstr(Onop, 0,); - continue; - } - def(&ra, ins, blk, i); - if (ins->op != Ocall) { - if (ins->op == Ocopy || ins->inplace) hint0 = ins->reg - 1; - if (ins->op == Omove) { - if (ins->l.t == RREG) hint1 = ins->l.i; - /* MOV Rx,Rx is used by isel to indicate a clobber, - * so it should be a def point for Rx but not a use point */ - if (ins->r.bits != ins->l.bits) - use(&ra, blk, i, ins->op, hint1, &ins->r, NOREF); - } else { - if (ins->r.t == RREG) { - use(&ra, blk, i, ins->op, hint0, &ins->r, NOREF); - use(&ra, blk, i, ins->op, hint0, &ins->l, ins->r); - } else { - if (ins->l.bits) use(&ra, blk, i, ins->op, hint0, &ins->l, ins->r); - if (ins->r.bits) use(&ra, blk, i, ins->op, hint1, &ins->r, NOREF); - } - } - } else { - struct call *call = &calltab.p[ins->r.i]; - struct bitset rspill[1] = {0}; - - fn->isleaf = 0; - - for (int r = mctarg->gpr0; r < mctarg->gpr0 + mctarg->ngpr; ++r) - if (!bstest(mctarg->rglob, r) && !bstest(mctarg->rcallee, r)) - bsset(rspill, r); - for (int r = mctarg->fpr0; r < mctarg->fpr0 + mctarg->nfpr; ++r) - if (!bstest(mctarg->rglob, r) && !bstest(mctarg->rcallee, r)) - bsset(rspill, r); - - if (call->abiret[0].ty.bits) bsclr(rspill, call->abiret[0].reg); - if (call->abiret[1].ty.bits) bsclr(rspill, call->abiret[1].reg); - for (uint r = 0; bsiter(&r, rspill, 1); ++r) { - spill(&ra, r, blk, i); - } - for (int j = 0; j < call->narg; ++j) { - short reg = call->abiarg[j].reg; - if (reg >= 0) { - forcetake(&ra, reg, mkref(RREG, reg), blk, i); - } - } - - use(&ra, blk, i, ins->op, hint0, &ins->l, NOREF); - } - if (ins->op == Ocopy && !ins->reg) - *ins = mkinstr(Onop, 0,); - if (ins->inplace && ins->reg) { - assert(ins->l.t == RREG); - if (ins->reg-1 != ins->l.i) { - /* an in-place operation where the destination does not - * match the first operand, so we need to add a move */ - insertinstr(blk, i, mkmove(insrescls(*ins), ins->reg-1, ins->l.i)); - ins->l.i = ins->reg-1; - } - } + for (; curi >= 0; --curi) { + struct instr *ins = &instrtab[blk->ins.p[curi]]; + if (ins->op == Ocall) fn->isleaf = 0; + doins(&ra, blk, ins, curi); } + for (int i = blk->phi.n - 1; i >= 0; --i) { - union ref *phi; - ins = &instrtab[blk->phi.p[i]]; - assert(ins->op == Ophi); - phi = phitab.p[ins->l.i]; - if (ins->reg) { - /* introduce necessary moves in each pred, - * XXX this doesn't work for backwards branches */ - for (int i = 0; i < blk->npred; ++i) { - struct instr mov = mkinstr(Omove, insrescls(*ins), mkref(RREG, ins->reg-1), phi[i]); - insertinstr(blkpred(blk, i), blkpred(blk, i)->ins.n, mov); - } - } - def(&ra, ins, NULL, 0); + struct instr *phi = &instrtab[blk->phi.p[i]]; + def(&ra, phi, blk, -1); + } + + memcpy(bbrm[blk->id].in.regs, ra.m.regs, sizeof ra.m.regs); + imap_copy(&bbrm[blk->id].in.allocs, &ra.m.allocs);; + } while ((blk = blk->lprev) != last); + + if (1&&ccopt.dbg.r) { + efmt("<< regalloc before resolve\n"); + irdump(fn); + } + + /* resolve allocation mismatches across each edge */ + blk = last = fn->entry->lprev; + do { + if (blk->id < 0) continue; + if (blk->s1) resolve(fn, bbrm, blk, blk->s1); + if (blk->s2) resolve(fn, bbrm, blk, blk->s2); + } while ((blk = blk->lprev) != last); + + /* parallel copies for each phi */ + blk = last = fn->entry->lprev; + do { + if (blk->id < 0) continue; + for (int i = 0; i < blk->npred; ++i) { + if (blkpred(blk, i)->id < 0) continue; + lowerphis(fn, bbrm, blkpred(blk, i), blk); } } while ((blk = blk->lprev) != last); - do vfree(&blk->phi); while ((blk = blk->lprev) != last); + /* final cleanup */ + id = 0; + blk = fn->entry; + do { + bool allnops = 1; + blk->id = id++; + for (int i = 0; i < blk->ins.n; ++i) { + struct instr *ins = &instrtab[blk->ins.p[i]]; + if (!ins->reg && insrescls(*ins) && ins->op != Omove && !oiscmp(ins->op)) { + /* dead */ + Nop: + *ins = mkinstr(Onop,0,); + } else if (ins->op == Omove && ins->r.t == RREG && ins->l.i == ins->r.i) { + goto Nop; + } else if (ins->op == Ocopy && ins->l.t == RREG && ins->reg-1 == ins->l.i) { + goto Nop; + } else if (ins->inplace && ins->l.t == RREG && ins->reg-1 != ins->l.i) { + /* fixup in-place (two-address) instructions */ + allnops = 0; + insertinstr(blk, i++, mkmove(ins->cls, ins->reg-1, ins->l.i)); + ins->l.i = ins->reg-1; + } else if (ins->op != Onop) allnops = 0; + } + + /* remove no-op blocks with only 1 pred + 1 succ OR 1 pred w/ 1 succ + 0 succ*/ + if (allnops && blk->npred == 1 && !blk->s2) { + struct block *p = blkpred(blk, 0); + if (!p->s2 && !blk->s1) { + /* simplify: + * + * @p: + * ... + * b @blk + * @blk: + * NOP + * ret + */ + assert(p->s1 == blk); + p->jmp.t = Jret; + p->s1 = NULL; + DelBlk: + vfree(&blk->phi); + vfree(&blk->ins); + blk->lnext->lprev = blk->lprev; + blk->lprev->lnext = blk->lnext; + --id; + } else if (blk->s1) { + /* simplify: + * + * @p: + * ... + * b %x, @blk, @other + * @blk: + * NOP + * b @next + */ + struct block *next = blk->s1; + if (p->s1 == blk) p->s1 = next; + else if (p->s2 == blk) p->s2 = next; + else continue; + goto DelBlk; + } + } + } while ((blk = blk->lnext) != fn->entry); + fn->nblk = id; + + blk = fn->entry; + do vfree(&blk->phi); while ((blk = blk->lnext) != fn->entry); fn->stksiz += ra.maxstk*8; if (fn->stksiz > 1<<24) error(NULL, "'%s' stack frame too big", fn->name); @@ -396,9 +758,13 @@ regalloc(struct function *fn) efmt("<< After regalloc >>\n"); irdump(fn); } - bscopy(fn->regusage, globusage, 1); - free(ra.allocs); - vfree(&ra.freestk); + for (int i = 0; i < nbbrm; ++i) { + imap_free(&bbrm[i].in.allocs); + imap_free(&bbrm[i].out.allocs); + } + imap_free(&ra.m.allocs); + free(bbrm); + fn->regusage = globusage; } /* vim:set ts=3 sw=3 expandtab: */ @@ -1,6 +1,29 @@ #include "common.h" #include "ir.h" +/* require use, keeps use */ +void +copyopt(struct function *fn) +{ + struct block *blk = fn->entry; + + do { + for (int i = 0; i < blk->ins.n; ++i) { + struct use *use, *uend; + union ref var = mkref(RTMP, blk->ins.p[i]); + struct instr *ins = &instrtab[var.i]; + + if (ins->op == Ocopy) { + assert(ins->l.t != RREG); + + replcuses(var, ins->l); + *ins = mkinstr(Onop,0,); + deluses(var.i); + } + } + } while ((blk = blk->lnext) != fn->entry); +} + void filluses(struct function *fn) { @@ -1,6 +1,6 @@ unsigned fib(unsigned x) { unsigned r = 0, q = 1; - while (x--) { + while (x-- > 1) { unsigned s = r + q; r = q; q = s; @@ -8,12 +8,18 @@ unsigned fib(unsigned x) { return q; } +unsigned fibr(unsigned x) { + if (x < 2) return x; + return fibr(x-1) + fibr(x-2); +} + int atoi(const char *); int printf(const char *, ...); int main(int argc, char **argv) { unsigned n = argv[1] ? atoi(argv[1]) : 10; - printf("fib(%u) = %u\n", n, fib(n)); + printf("fib(%u) = %u\n", n, fib(n)); + printf("fibr(%u) = %u\n", n, fibr(n)); } /* vim:set ts=3 sw=3 expandtab: */ |