diff options
| author | 2025-09-08 22:05:33 +0200 | |
|---|---|---|
| committer | 2025-09-08 22:05:33 +0200 | |
| commit | e043811980db560fc2507bb53b644e54c80527dc (patch) | |
| tree | 6ea563d81c9d3767f439e361fc2c884cf4f9b64d | |
| parent | 36b5b19bf183cb01525201ccbddd6afa692f21bb (diff) | |
regalloc: start implementing linear scan
| -rw-r--r-- | common.h | 11 | ||||
| -rw-r--r-- | ir.c | 22 | ||||
| -rw-r--r-- | ir.h | 5 | ||||
| -rw-r--r-- | irdump.c | 16 | ||||
| -rw-r--r-- | main.c | 22 | ||||
| -rw-r--r-- | mem.c | 13 | ||||
| -rw-r--r-- | optmem.c | 7 | ||||
| -rw-r--r-- | regalloc.c | 1029 | ||||
| -rw-r--r-- | test/collatz.c | 29 | ||||
| -rw-r--r-- | test/fact.c | 13 | ||||
| -rw-r--r-- | test/fib.c | 2 | ||||
| -rw-r--r-- | test/sort.c | 28 |
12 files changed, 729 insertions, 468 deletions
@@ -411,8 +411,8 @@ int imap_set_(struct imapbase *, void **v, uint vsiz, short k); #define imap_clear(m) ((m)->mb.bs ? bszero((m)->mb.bs, BSSIZE((m)->mb.N)) : (void)0, \ (m)->mb.n = 0) #define imap_get(m, k) ((m)->tmp = imap_get_(&(m)->mb, k), (m)->tmp < 0 ? NULL : &(m)->v[(m)->tmp]) -#define imap_set(m, k, x) ((m)->tmp = imap_set_(&(m)->mb, (void **)&(m)->v, sizeof*(m)->v, k), \ - (m)->v[(m)->tmp] = (x), &(m)->v[(m)->tmp]) +#define imap_set(m, k, ...) ((m)->tmp = imap_set_(&(m)->mb, (void **)&(m)->v, sizeof*(m)->v, k), \ + (m)->v[(m)->tmp] = (__VA_ARGS__), &(m)->v[(m)->tmp]) #define imap_each(m,kx,pvx) \ for (int _i = 0; _i < (m)->mb.N && ((kx) = (m)->mb.k[_i], (pvx) = &(m)->v[_i], 1); ++_i) \ if (bstest((m)->mb.bs, _i)) @@ -471,6 +471,12 @@ bscopy(struct bitset dst[/*siz*/], const struct bitset src[/*siz*/], uint siz) while (siz--) dst++->u = src++->u; } +static inline void +bsunion(struct bitset dst[/*siz*/], const struct bitset src[/*siz*/], uint siz) +{ + while (siz--) dst++->u |= src++->u; +} + static inline bool bsiter(uint *i, struct bitset bs[/*siz*/], uint siz) { @@ -478,6 +484,7 @@ bsiter(uint *i, struct bitset bs[/*siz*/], uint siz) if (bstest(bs, *i)) return 1; return 0; } +#define bs_each(T, var, bs, siz) for (T (var) = 0; bsiter(&(var), (bs), (siz)); ++(var)) typedef uvlong regset; #define rsset(S, r) ((S) | 1ull << (r)) @@ -1,5 +1,4 @@ #include "ir.h" -#include "endian.h" #include "obj.h" uchar type2cls[NTYPETAG]; @@ -13,6 +12,13 @@ const char *opnames[] = { #undef _ }; +const uchar opnarg[] = { + 0, +#define _(o,n) n, +#include "op.def" +#undef _ +}; + struct instr instrtab[MAXINSTR]; struct use *instruse[MAXINSTR]; short instrnuse[MAXINSTR]; @@ -362,6 +368,17 @@ insertphi(struct block *blk, enum irclass cls) return mkref(RTMP, new); } +void +numberinstrs(struct function *fn) +{ + struct block *blk = fn->entry; + int start = 0; + do { + blk->inumstart = start; + start += blk->ins.n+2; + } while ((blk = blk->lnext) != fn->entry); +} + /* require use */ void replcuses(union ref from, union ref to) @@ -387,7 +404,6 @@ replcuses(union ref from, union ref to) if (u[j].bits == from.bits) { u[j].bits = to.bits; adduse(use.blk, use.u, to); - deluse(use.blk, use.u, from); --i; break; } @@ -411,7 +427,7 @@ delinstr(struct block *blk, int idx) memcpy(&instrtab[t], &instrfreelist, sizeof(int)); instrfreelist = t; deluses(t); - for (int i = idx; i < blk->ins.n; ++i) + for (int i = idx; i < blk->ins.n-1; ++i) blk->ins.p[i] = blk->ins.p[i + 1]; --blk->ins.n; } @@ -90,6 +90,7 @@ enum op { #define oisstore(o) in_range(o, Ostore1, Ostore8) #define oisload(o) in_range(o, Oloads1, Oloadf8) extern const char *opnames[]; +extern const uchar opnarg[]; enum intrin { INxxx, @@ -105,7 +106,7 @@ struct instr { keep : 1; /* for codegen, keep instr even if result seems unused */ uchar inplace : 1; /* set (by isel) for instructions which modify its first arg in place */ uchar reg; /* 0 -> no reg; else reg + 1 */ - union ref l, r; + union ref l, r; /* args */ }; enum jumpkind { JXXX, Jb, Jret, }; @@ -114,6 +115,7 @@ struct block { int id; int npred; int visit; + int inumstart; union { struct block *_pred0; struct block **_pred; @@ -239,6 +241,7 @@ void fillblkids(struct function *); #define startbbvisit() (void)(++visitmark) #define wasvisited(blk) ((blk)->visit == visitmark) #define markvisited(blk) ((blk)->visit = visitmark) +void numberinstrs(struct function *); /* IR builder functions */ union ref addinstr(struct function *, struct instr); @@ -142,18 +142,10 @@ dumpcall(struct call *call) } } -static const uchar opnarg[] = { - 0, -#define _(o,n) n, -#include "op.def" -#undef _ -}; - static void dumpinst(const struct instr *ins) { int i; - efmt(" "); if (ins->op == Omove) { efmt("move %s ", clsname[ins->cls]); } else { @@ -190,7 +182,9 @@ dumpblk(struct function *fn, struct block *blk) for (i = 0; i < blk->phi.n; ++i) { struct instr *phi = &instrtab[blk->phi.p[i]]; union ref *refs = phitab.p[phi->l.i]; - efmt(" %s ", clsname[phi->cls]); + if (i == 0) efmt("%-4d", blk->inumstart); + else efmt(" |> "); + efmt(" %s ", clsname[phi->cls]); if (!phi->reg) efmt("%%%d = %s ", blk->phi.p[i], opnames[phi->op]); else efmt("(%%%d)%s = %s ", phi - instrtab, mctarg->rnames[phi->reg-1], opnames[phi->op]); for (int i = 0; i < blk->npred; ++i) { @@ -201,9 +195,10 @@ dumpblk(struct function *fn, struct block *blk) efmt("\n"); } for (i = 0; i < blk->ins.n; ++i) { + efmt("%-4d ", blk->inumstart + 1 + i); dumpinst(&instrtab[blk->ins.p[i]]); } - efmt(" %s ", jnames[blk->jmp.t]); + efmt("%-4d %s ", blk->inumstart + 1 + i, jnames[blk->jmp.t]); if (blk->jmp.arg[0].bits && !fn->nabiret && isagg(fn->retty)) { /* un-lowered struct return */ dumpref(0, mktyperef(mkirtype(fn->retty))); @@ -248,6 +243,7 @@ irdump(struct function *fn) } efmt("\n"); } + numberinstrs(fn); blk = fn->entry; do { dumpblk(fn, blk); @@ -86,6 +86,8 @@ static struct task { bool verbose; } task; +static void prihelp(void); + static void optparse(char **args) { @@ -101,7 +103,10 @@ optparse(char **args) ft = IFTauto; continue; } - if ((x = optval(arg, "std"))) { + if (!strcmp(arg, "help") || !strcmp(arg, "h") || !strcmp(arg, "-help")) { + prihelp(); + exit(0); + } else if ((x = optval(arg, "std"))) { if (!strcmp(x, "c89") || !strcmp(x, "c90")) ccopt.cstd = STDC89; else if (!strcmp(x, "c99")) ccopt.cstd = STDC99; else if (!strcmp(x, "c11")) ccopt.cstd = STDC11; @@ -266,6 +271,21 @@ dolink(void) return WEXITSTATUS(wstat); } +static void +prihelp(void) +{ + pfmt("Usage: antcc [options] file...\n\n" + "Options:\n" + " -help \tPrint this help message\n" + " -std=<..> \tSet C standard\n" + " -pedantic \tWarnings for strict standards compliance\n" + " -d{pailrm} \tDebug options\n" + " -o <file> \tPlace the output into <file>\n" + " -v \tVerbose output\n" + " -c \tEmit object file but do not link\n" + ); +} + static int driver(void) { @@ -74,16 +74,9 @@ vpushn_(void **p, int *pcap, uint *pn, uint siz, const void *dat, uint ndat) void vresize_(void **p, int *pcap, uint *pn, uint siz, uint N) { - if (N <= *pn) { - } else if (*pcap > 0 && *pcap < N) { - void *old = *p; - *p = xrealloc(NULL, -(*pcap = -(N * siz))); - if (old) memcpy(*p, old, *pcap * siz); - } else if (*pcap <= 0 && -*pcap < N) { - *pcap = *pcap ? *pcap : -1; - do *pcap *= 2; while (-*pcap < N); - *p = xrealloc(*p, -*pcap * siz); - memset((char *)*p + *pn*siz, 0, (N - *pn) * siz); + while ((*pcap < 0 ? -*pcap : *pcap) < N) { + vpush_(p, pcap, pn, siz); + *pn = *pcap < 0 ? -*pcap : *pcap; } *pn = N; } @@ -19,7 +19,7 @@ static const uchar load2ext[] = { #define load2ext(o) (load2ext[(o) - Oloads1]) #define storesz(o) (1 << ((o) - Ostore1)) -/* Implements algorithm in 'Simple and Efficient Construction of Static Single' (Braun et al) */ +/* Implements algorithm in 'Simple and Efficient Construction of Static Single Assignment' (Braun et al) */ struct pendingphi { ushort var, phi; }; struct ssabuilder { @@ -43,6 +43,9 @@ deltrivialphis(struct ssabuilder *sb, struct block *blk, union ref phiref) assert(instrtab[phiref.i].op == Ophi); + if (phiref.i == 4) + efmt(""); + for (int i = 0; i < blk->npred; ++i) { if (args[i].bits == same.bits || args[i].bits == phiref.bits) continue; /* unique value or self-reference */ @@ -79,6 +82,7 @@ Redo: } } } + deluses(phiref.i); return same; } @@ -185,7 +189,6 @@ mem2reg(struct function *fn) } do { - for (int i = 0; i < blk->ins.n; ++i) { struct use *use, *uend; enum irclass k = 0; @@ -1,6 +1,6 @@ #include "ir.h" -/* Implements reverse linear register allocation, quite ad-hoc right now */ +/* Implements linear scan register allocation */ static regset gpregset, fpregset; static regset globusage; @@ -24,10 +24,32 @@ struct rmap { imap_of(struct alloc) allocs; /* map tmpidx -> allocation */ }; +struct range { ushort from, to; }; +struct interval { + struct interval *next; + short reg : 8, rhint : 8, fpr : 1; + short n; + union { + struct range _inl[2]; + struct range *_dyn; + }; +}; +struct lifetimes { + imap_of(struct interval) temps; + struct interval *unhandled, *active, *inactive, *handled; + struct fixinterval { + struct fixinterval *next; + struct range range; + regset rs; + } *fixed; +}; + struct rega { struct function *fn; + struct arena *arena; struct rmap m; int maxstk, stktop; + struct lifetimes intervals; }; static vec_of(union ref *) stkslotrefs; @@ -39,7 +61,7 @@ addstkslotref(union ref inst) return inst; } -static int allocreg(struct rega *ra, enum irclass cls, union ref ref, uvlong excl); +#define mkmove(k, rd, rs) mkinstr(Omove, k, mkref(RREG, rd), mkref(RREG, rs)) #if 1 #define DBG(...) if(ccopt.dbg.r) efmt(__VA_ARGS__) @@ -61,15 +83,6 @@ dumpallocs(const char *s, struct rmap *m) #define dumpallocs(...) #endif -static void -freereg(struct rega *ra, int r) -{ - ra->m.free = rsset(ra->m.free, r); - ra->m.blocked = rsclr(ra->m.blocked, r); -} - -static void take(struct rega *ra, int reg, union ref ref); - static int allocstk(struct rega *ra, int var) { @@ -100,293 +113,6 @@ freestk(struct rega *ra, int slot) --ra->stktop; } -static void -def(struct rega *ra, struct instr *ins, struct block *blk, int curi) -{ - int reg = -1, var; - struct alloc *alloc; - if (ins->op != Omove && ins->op != Ocall) { - var = ins - instrtab; - DBG("def %%%d\n",var); - alloc = imap_get(&ra->m.allocs, var); - if (!alloc || alloc->t == ADEAD) { - return; - } else if (alloc->t == AREG) { - reg = alloc->a; - DBG("-- free %s for %%%d\n", mctarg->rnames[alloc->a], var); - assert(!rstest(ra->m.free, reg)); - assert(ra->m.regs[reg] == var); - ins->reg = reg+1; - } else if (alloc->t == ASTACK && ins->op != Ophi) { - /* unspill, insert 'store [slot], reg' */ - struct alloc astk = *alloc; - struct instr store; - int reg = -1; - - if ((ins->op == Ocopy || ins->inplace) && ins->l.t == RREG) { - int hint = ins->l.i; - if (rstest(ra->m.free, hint) && kisflt(insrescls(*ins)) == isfpr(hint)) { - take(ra, reg = hint, mkref(RTMP, var)); - assert(ra->m.regs[reg] == var && !rstest(ra->m.free, hint)); - } - } - if (reg < 0) - reg = allocreg(ra, insrescls(*ins), mkref(RTMP, var), 0); - store = mkinstr(Ostore1 + ilog2(cls2siz[insrescls(*ins)]), 0, - mkref(RICON, astk.a*8), mkref(RREG, reg)); - DBG("-- unspill %%%d s%d -> %s\n", var, astk.a, mctarg->rnames[reg]); - addstkslotref(insertinstr(blk, ++curi, store)); - freestk(ra, astk.a); - ins->reg = reg+1; - def(ra, ins, blk, curi); /* and free the reg again */ - return; - } - if (alloc && (alloc->t != ASTACK || ins->op != Ophi)) *alloc = afree(); - } else if (ins->op == Omove) { - assert(ins->l.t == RREG); /* move to a register */ - assert(rstest(ra->m.blocked, ins->l.i)); - reg = ins->l.i; - DBG("-- free %s\n", mctarg->rnames[ins->l.i]); - } else { - struct call *call = &calltab.p[ins->r.i]; - for (int i = 0; i < 2; ++i) { - if (!call->abiret[i].ty.cls) break; - freereg(ra, call->abiret[i].reg); - } - return; - } - if (reg != -1) freereg(ra, reg); -} - -static void -take(struct rega *ra, int reg, union ref ref) { - DBG("-- take %s for %c%d\n", mctarg->rnames[reg], "R%"[ref.t==RTMP], ref.i); - assert(rstest(ra->m.free, reg) && "taken"); - if (ref.t == RTMP) { - (void)imap_set(&ra->m.allocs, ref.i, areg(reg)); - ra->m.regs[reg] = ref.i; - } else { - ra->m.blocked = rsset(ra->m.blocked, reg); - } - ra->m.free = rsclr(ra->m.free, reg); - globusage = rsset(globusage, reg); -} - -static int -allocreg(struct rega *ra, enum irclass cls, union ref ref, uvlong excl) -{ - regset rs; - int reg; - - assert(cls); - if (kisint(cls)) { - rs = gpregset; - } else if (kisflt(cls)) { - rs = fpregset; - } else assert(0); - rs = rsminus(rs, mctarg->rglob); - rs = rsand(rs, ra->m.free); - rs = rsminus(rs, excl); - - assert(rs != 0 && "no more reg"); - if (rsminus(rs, mctarg->rcallee) != 0) - reg = lowestsetbit(rsminus(rs, mctarg->rcallee)); - else - reg = lowestsetbit(rs); - take(ra, reg, ref); - return reg; -} - -static void -spill(struct rega *ra, int reg, struct block *blk, int curi) { - int var, s; - struct instr load; - struct alloc *alloc; - - if (rstest(ra->m.free, reg)) return; - var = ra->m.regs[reg]; - assert(!rstest(ra->m.blocked, reg)); - assert((alloc = imap_get(&ra->m.allocs, var)) && !memcmp(alloc, &areg(reg), sizeof *alloc)); - s = allocstk(ra, var); - DBG("-- spill %%%d %s -> s%d\n", var, mctarg->rnames[reg], s); - instrtab[var].reg = 0; - /* insert 'reg = load [slot]' */ - load = mkinstr(Oloads1 + 2*ilog2(cls2siz[insrescls(instrtab[var])]), - insrescls(instrtab[var]), mkref(RICON, s*8)); - load.reg = reg+1; - addstkslotref(insertinstr(blk, ++curi, load)); - freereg(ra, reg); -} - -#define mkmove(k, rd, rs) mkinstr(Omove, k, mkref(RREG, rd), mkref(RREG, rs)) - -static void -forcetake(struct rega *ra, int reg, union ref ref, struct block *blk, int curi) { - int var; - struct alloc *alloc; - - if (ref.t == RREG && rstest(ra->m.blocked, ref.i)) { - assert(!rstest(ra->m.free, reg)); - return; - } - if (ref.t == RTMP && !rstest(ra->m.free, reg) && ra->m.regs[reg] == ref.i) return; - if (rstest(ra->m.free, reg)) { - take(ra, reg, ref); - return; - } - assert(!rstest(ra->m.blocked, ref.i)); - var = ra->m.regs[reg]; - alloc = imap_get(&ra->m.allocs, var); - assert(alloc && alloc->a == reg); - *alloc = afree(); - /* try to move temp to another register */ - if (rsand(ra->m.free, isgpr(reg) ? gpregset : fpregset) != 0) { - /* the register of the current instruction (if any) was already free'd (by def), so - * we need to explictly exclude it from the pool of rename registers - * e.g.: given 'R0 = copy R1'; if R1 => %x, we need to prevent renaming %x => R0 - */ - regset excl = instrtab[blk->ins.p[curi]].reg; - int rename = allocreg(ra, isgpr(reg) ? KI4 : KF4, mkref(RTMP, ra->m.regs[reg]), - excl ? rsset(0, excl-1) : 0); - if (ccopt.dbg.r)DBG("-- rename %%%d %s -> %s\n", var, mctarg->rnames[reg], mctarg->rnames[rename]); - /* introduce move from rename -> original (since we allocate backwards) */ - insertinstr(blk, ++curi, mkmove(insrescls(instrtab[var]), reg, rename)); - ra->m.regs[rename] = var; - globusage = rsset(globusage, rename); - (void)imap_set(&ra->m.allocs, var, areg(rename)); - ra->m.free = rsset(ra->m.free, reg); - } else { - spill(ra, reg, blk, curi); - ra->m.free = rsset(ra->m.free, reg); - } - take(ra, reg, ref); -} - -/* mark a use for *ref, possibly allocating a register for it, considering it won't clash with `other` */ -static void -use(struct rega *ra, struct block *blk, int curi, enum op op, int hint, union ref *ref, union ref other) -{ - struct instr *iuse, *ins; - uvlong excl = other.t == RREG ? 1ull<<other.i : 0; - struct alloc *alloc; - int reg; - - if (ref->t == RADDR) { - struct addr addr = addrht[ref->i]; - if (addr.base.bits) use(ra, blk, curi, 0, hint, &addr.base, addr.index); - if (addr.index.bits) use(ra, blk, curi, 0, hint, &addr.index, NOREF); - *ref = mkaddr(addr); - return; - } else if (ref->t == RREG) { - int reg = ref->i; - assert(hint<0 || !rstest(excl, hint)); - if (op == -Jret) { - /* since return marks an exit block, if any temporary is occuping a - * return register we mark it as dead since it cannot be live after - * the return, so can't rename it like forcetake would try to do */ - if (!rstest(ra->m.free, reg)) { - (void)imap_set(&ra->m.allocs, ra->m.regs[reg], afree()); - ra->m.free = rsset(ra->m.free, reg); - } - } - forcetake(ra, reg, *ref, blk, curi); - return; - } - if (ref->t != RTMP) return; - - iuse = curi < blk->ins.n ? &instrtab[blk->ins.p[curi]] : NULL; - ins = &instrtab[ref->i]; - if (!ins->cls) return; - if (!(alloc = imap_get(&ra->m.allocs, ref->i)) || alloc->t != AREG) { - int s = -1; - if (alloc && alloc->t == ASTACK) { - /* ensure spill isn't overwritten by dest - * e.g. in R0 = add %s, 7 => can't spill %s to R0 */ - s = alloc->a; - if (iuse && iuse->reg) excl = rsset(excl, iuse->reg-1); - } else if (iuse && iuse->inplace && iuse->reg && ref == &iuse->r - && iuse->l.bits != mkref(RREG, iuse->reg-1).bits) { - /* ensure in an inplace operation rhs reg cannot overlap dest reg - * e.g. in R0 = sub R1, %x => cannot allocate %x to R0 - * FIXME in commutative ops this is fine if we swap the operands */ - excl = rsset(excl, iuse->reg-1); - } - - assert(ins->op != Ocall); - if (ins->r.t == RREG && ins->inplace) excl |= 1ull<<ins->r.i; - if ((hint == -1 || !rstest(ra->m.free, hint)) && ins->op == Ocopy && ins->l.t == RREG) - /* for '%x = copy Rx', hint %x to use Rx */ - hint = ins->l.i; - - if (hint != -1 && !rstest(excl, hint) && rstest(ra->m.free, hint) - && kisflt(insrescls(*ins)) == isfpr(hint)) { - take(ra, hint, *ref); - reg = hint; - } else { - reg = allocreg(ra, insrescls(*ins), *ref, excl); - } - - if (s >= 0) { - /* unspill, insert 'store [slot], reg' */ - DBG("-- unspill %%%d s%d -> %s\n", ref->i, s, mctarg->rnames[reg]); - struct instr store = mkinstr(Ostore1 + ilog2(cls2siz[insrescls(*ins)]), 0, - mkref(RICON, s*8), - mkref(RREG, reg)); - addstkslotref(insertinstr(blk, ++curi, store)); - freestk(ra, s); - } - } else { - reg = alloc->a; - } - /* do not patch ref if it's used in a phi - * or if it's cond branch arg, emit() wants to know what instr it is */ - if (op != Ophi) - if (ref != blk->jmp.arg || blk->jmp.t != Jb) - *ref = mkref(RREG, reg); -} - -static void -doins(struct rega *ra, struct block *blk, struct instr *ins, int curi) -{ - int hint0 = -1, hint1 = -1; - if (ins->op != Ocopy && !imap_get(&ra->m.allocs, ins - instrtab) && ins->skip) { /* unused */ - *ins = mkinstr(Onop, 0,); - return; - } - def(ra, ins, blk, curi); - if (ins->op != Ocall) { - if (ins->op == Ocopy || ins->inplace) hint0 = ins->reg - 1; - if (ins->op == Omove) { - if (ins->l.t == RREG) hint1 = ins->l.i; - /* MOV Rx,Rx is used by isel to indicate a clobber, - * so it should be a def point for Rx but not a use point */ - if (ins->r.bits != ins->l.bits) - use(ra, blk, curi, ins->op, hint1, &ins->r, NOREF); - } else { - if (ins->l.bits) use(ra, blk, curi, ins->op, hint0, &ins->l, ins->r); - if (ins->r.bits) use(ra, blk, curi, ins->op, hint1, &ins->r, NOREF); - } - } else { - struct call *call = &calltab.p[ins->r.i]; - regset rspill = rsminus(rsunion(gpregset, fpregset), rsunion(mctarg->rglob, mctarg->rcallee)); - - dumpallocs("CALL", &ra->m); - if (call->abiret[0].ty.bits) rspill = rsclr(rspill, call->abiret[0].reg); - if (call->abiret[1].ty.bits) rspill = rsclr(rspill, call->abiret[1].reg); - for (int r = 0; rsiter(&r, rspill); ++r) { - spill(ra, r, blk, curi); - } - for (int j = 0; j < call->narg; ++j) { - short reg = call->abiarg[j].reg; - if (reg >= 0) { - forcetake(ra, reg, mkref(RREG, reg), blk, curi); - } - } - - use(ra, blk, curi, ins->op, hint0, &ins->l, NOREF); - } -} - /* Parallel moves algorithm from QBE: https://c9x.me/git/qbe.git/tree/rega.c?id=e493a7f23352f51acc0a1e12284ab19d7894488a#n201 */ enum pmstat { PMTOMOVE, PMMOVING, PMDONE }; @@ -400,6 +126,7 @@ static int npmove; static void pmadd(enum irclass k, struct alloc dst, struct alloc src) { + if (!memcmp(&dst, &src, sizeof dst)) return; assert(npmove < MAXREGS); pmove[npmove++] = (struct pmove) { k, PMTOMOVE, dst, src }; } @@ -407,7 +134,7 @@ pmadd(enum irclass k, struct alloc dst, struct alloc src) static void emitmove(enum irclass k, struct alloc dst, struct alloc src, struct block *blk, int curi) { - struct instr mv = {0}; + struct instr mv = {.keep = 1}; if (dst.t == AREG && src.t == AREG) { insertinstr(blk, curi, mkmove(k, dst.a, src.a)); } else if (dst.t == ASTACK && src.t == AREG) { @@ -443,10 +170,11 @@ pmrec(int i, struct block *blk, int curi, enum irclass *k) if (cls2siz[pmove[i].k] > cls2siz[*k]) *k = pmove[i].k; - for (j = 0; j < npmove; ++j) + for (j = 0; j < npmove; ++j) { if (!memcmp(&pmove[j].dst, &pmove[i].src, sizeof pmove->dst)) { break; } + } if (j == npmove) goto Done; switch (pmove[j].stat) { default: assert(0); @@ -455,7 +183,7 @@ pmrec(int i, struct block *blk, int curi, enum irclass *k) Swap: assert(pmove[i].src.t == AREG && pmove[i].dst.t == AREG); insertinstr(blk, curi, - mkinstr(Oswap, *k, mkref(RREG, pmove[i].dst.a), mkref(RREG, pmove[i].src.a))); + mkinstr(Oswap, *k, mkref(RREG, pmove[i].dst.a), mkref(RREG, pmove[i].src.a), .keep = 1)); break; case PMTOMOVE: pmove[i].stat = PMMOVING; @@ -482,21 +210,18 @@ static void emitpm(struct block *blk) { int curi = blk->ins.n; - for (int i = 0; i < npmove; ++i) + for (int i = 0; i < npmove; ++i) { if (pmove[i].stat == PMTOMOVE) { pmrec(i, blk, curi, &(enum irclass) { pmove[i].k }); } + } } -struct bbrm { struct rmap in, out; }; - static void -lowerphis(struct function *fn, struct bbrm *bbrm, struct block *blk, struct block *suc) +lowerphis(struct function *fn, struct block *blk, struct block *suc) { int predno; struct alloc *alloc; - struct rmap *out = &bbrm[blk->id].out; - struct rmap *in = &bbrm[suc->id].in; struct block *n = NULL; if (!blk->s2) n = blk; @@ -520,7 +245,7 @@ lowerphis(struct function *fn, struct bbrm *bbrm, struct block *blk, struct bloc from = areg(instrtab[arg->i].reg - 1); DBG(" it had R%d\n", from.a); } else { - alloc = imap_get(&out->allocs, arg->i); + // alloc = imap_get(&out->allocs, arg->i); assert(alloc && alloc->t != ADEAD); from = *alloc; DBG(" found %c%d\n", " RS"[from.t], from.a); @@ -531,7 +256,7 @@ lowerphis(struct function *fn, struct bbrm *bbrm, struct block *blk, struct bloc to = areg(phi->reg - 1); DBG(" phi had R%d\n", to.a); } else { - alloc = imap_get(&in->allocs, phi - instrtab); + // alloc = imap_get(&in->allocs, phi - instrtab); assert(alloc && alloc->t != ADEAD); DBG(" found phi %c%d\n", " RS"[from.t], from.a); to = *alloc; @@ -546,40 +271,6 @@ lowerphis(struct function *fn, struct bbrm *bbrm, struct block *blk, struct bloc } static void -resolve(struct function *fn, struct bbrm *bbrm, struct block *blk, struct block *suc) -{ - ushort var; - struct alloc *alloc, *alloc2; - struct rmap *out = &bbrm[blk->id].out, *in = &bbrm[suc->id].in; - struct block *n = NULL; - - DBG("resolve(@%d, @%d)\n",blk->id, suc->id); - - npmove = 0; - if (!blk->s2) n = blk; - imap_each(&in->allocs, var, alloc) { - if (alloc->t == ADEAD) continue; - if ((alloc2 = imap_get(&out->allocs, var)) && alloc2->t != ADEAD - && memcmp(alloc2, alloc, sizeof *alloc) != 0) { - DBG("resolve @%d, @%d, %%%d\n", blk->id, suc->id, var); - DBG(" > move %c%d -> %c%d\n", " RS"[alloc2->t], alloc2->a, " RS"[alloc->t], alloc->a); - if (!n) { - n = insertblk(fn, blk, suc); - n->id = blk->id; /* use same bbrm */ - } - pmadd(insrescls(instrtab[var]), *alloc, *alloc2); - } - if (!instrtab[var].reg && alloc->t == AREG) { - instrtab[var].reg = alloc->a + 1; - } - } - if (n) emitpm(n); - - dumpallocs("in", in); - dumpallocs("out", out); -} - -static void rporec(struct block ***rpo, struct block *b) { if (wasvisited(b)) return; @@ -621,41 +312,11 @@ sortrpo(struct function *fn) static void fixlive(struct function *fn); -void -regalloc(struct function *fn) +/* generate copies for phi operands to transform into conventional-SSA */ +static void +fixcssa(struct function *fn) { - int id; - struct block *last = fn->entry->lprev, *blk; - static union ref *stkslotrefsbuf[64]; - struct bbrm *bbrm; - int nbbrm; - struct rega ra = {fn}; - - /* setup */ - if (!fpregset || !gpregset) { - for (int i = 0; i < MAXREGS; ++i) { - if (isfpr(i)) - fpregset = rsset(fpregset, i); - else if (isgpr(i)) - gpregset = rsset(gpregset, i); - } - } - ra.m.blocked = mctarg->rglob; - ra.m.free = rsminus(rsunion(gpregset, fpregset), ra.m.blocked); - memset(ra.m.freestk, 0xFF, sizeof ra.m.freestk); - globusage = 0; - fn->stksiz = alignup(fn->stksiz, 8); - fn->isleaf = 1; - vinit(&stkslotrefs, stkslotrefsbuf, arraylength(stkslotrefsbuf)); - - /* put into reverse post order */ - sortrpo(fn); - - /* fix liveness ranges */ - fixlive(fn); - - /* generate copies for phi operands to transform into CSSA */ - blk = fn->entry; + struct block *blk = fn->entry; do { if (!blk->phi.n) continue; for (int p = 0; p < blk->npred; ++p) { @@ -674,79 +335,566 @@ regalloc(struct function *fn) } } } while ((blk = blk->lnext) != fn->entry); - fillblkids(fn); - bbrm = xcalloc(sizeof *bbrm * (nbbrm = fn->nblk)); +} + +static bool +rangeoverlap(struct range a, struct range b) +{ + return (a.from < b.from && b.from < a.to) + || (b.from < a.from && a.from < b.to); +} + +static bool +rangeadj(struct range a, struct range b) +{ + return a.to+1 >= b.from; +} + +static void +pushrange(struct interval *lv, struct range r) +{ + if (lv->n < 2) lv->_inl[lv->n++] = r; + else if (lv->n > 2) xbpush(&lv->_dyn, &lv->n, r); + else { + struct range *d = NULL; + xbgrow(&d, 4); + memcpy(d, lv->_inl, 2*sizeof *d); + d[lv->n++] = r; + lv->_dyn = d; + } +} +#define itrange(lv, i) ((lv)->n <= 2 ? (lv)->_inl : (lv)->_dyn)[i] + +static void +addrange0(struct interval *it, struct range new) +{ + int dst = it->n; + + if (dst == 0) { Push: pushrange(it, new); return; } + /* start from the end, find place by order */ + for (int i = dst - 1; i >= 0; --i) { + struct range range = itrange(it, i); + + /* fully contained? */ + if (range.from <= new.from && new.to <= range.to) + return; + if (range.from > new.from) + dst = i; + } + if (dst == it->n) goto Push; + if (rangeoverlap(new, itrange(it, dst)) || rangeadj(new, itrange(it, dst))) { + struct range old = itrange(it, dst), + *merge = &itrange(it, dst); + if (new.from < old.from) merge->from = new.from; + if (new.to > old.to) merge->to = new.to; + } else { + /* insert at dst */ + pushrange(it, new); + for (int j = it->n-1; j > dst; --j) + itrange(it, j) = itrange(it, j-1); + itrange(it, dst) = new; + } + /* more merges? */ + for (struct range *last = &itrange(it, it->n-2); + it->n > 1 && (rangeoverlap(last[0], last[1]) || rangeadj(last[0], last[1])); + --last) + { + if (last[1].from < last[0].from) + last[0].from = last[1].from; + if (last[1].to > last[0].to) + last[0].to = last[1].to; + + if (--it->n == 2) { + struct range *tmp = it->_dyn; + memcpy(it->_inl, tmp, 2*sizeof*tmp); + xbfree(it->_dyn); + } + } +} + +static void +addrange(struct lifetimes *intervals, int t, struct range range, int reghint) +{ + struct interval *it; + it = imap_get(&intervals->temps, t); + if (!it) { + it = imap_set(&intervals->temps, t, + (struct interval){ .reg = -1, .rhint = reghint, .fpr = kisflt(insrescls(instrtab[t])) }); + } + addrange0(it, range); +} + +static bool +intervaldef(struct lifetimes *intervals, int t, struct block *blk, int pos, int reghint) +{ + struct interval *it = imap_get(&intervals->temps, t); + if (it) { + assert(it->n > 0); + + if (itrange(it, 0).from <= pos) /* shorten */ + itrange(it, 0).from = pos; + else + addrange0(it, (struct range){pos, blk->inumstart + blk->ins.n+1}); + if (it->rhint < 0) it->rhint = reghint; + } + return it != NULL; +} + +static void +priliveset(struct bitset *s, size_t siz) +{ + int k = 0; + DBG("{"); + for (uint i = 0; bsiter(&i, s, siz); ++i) { + if (k++) DBG(","); + DBG("%%%d",i); + } + DBG("}\n"); +} + - /* visit blocks in reverse, allocating registers in a greedy manner */ - blk = last; +static void +usereg(struct rega *ra, int reg, struct block *blk, int pos) +{ + struct fixinterval *fxit = alloc(&ra->arena, sizeof *fxit, 0); + fxit->next = ra->intervals.fixed; + fxit->range = (struct range) {blk->inumstart, pos}; + fxit->rs = 1<<reg; + ra->intervals.fixed = fxit; +} + +static void +defreg(struct rega *ra, int reg, int pos) { + for (struct fixinterval *fxit = ra->intervals.fixed; fxit; fxit = fxit->next) { + if (fxit->rs == 1<<reg) { + assert(fxit->range.from <= pos); + fxit->range.from = pos; + // DBG(">>>REG %s range @%d: %d-%d\n", mctarg->rnames[reg], fxit->range.from.blk, fxit->range.from.ins, fxit->range.to.ins); + break; + } + } +} + +static void +buildintervals(struct rega *ra) +{ + extern int ninstr; + struct block *blk, *last; + struct bitset **livein = xcalloc(ra->fn->nblk * sizeof *livein); + + for (int i = 0; i < ra->fn->nblk; ++i) + livein[i] = xcalloc(BSSIZE(ninstr) * sizeof *livein[i]); + + numberinstrs(ra->fn); + /* visit blocks in reverse, to build lifetime intervals */ + blk = last = ra->fn->entry->lprev; do { - int curi; - DBG("do @%d\n", blk->id); - memcpy(bbrm[blk->id].out.regs, ra.m.regs, sizeof ra.m.regs); - imap_copy(&bbrm[blk->id].out.allocs, &ra.m.allocs); - dumpallocs("out", &ra.m); - - if (blk->s1 && blk->s1->phi.n) { - /* if the block is a predecessor to some phis, introduce use points - * for the corresponding arguments to each phi */ - struct block *s = blk->s1; - int predno = 0; - for (; predno < s->npred; ++predno) - if (blkpred(s, predno) == blk) - break; - assert(predno < s->npred); - for (int i = s->phi.n - 1; i >= 0; --i) { - struct instr *phi = &instrtab[s->phi.p[i]]; - union ref *arg = &phitab.p[phi->l.i][predno];; - use(&ra, blk, blk->ins.n, Ophi, phi->reg-1, arg, NOREF); + struct instr *ins = NULL; + struct bitset *live = livein[blk->id]; + size_t bssize = BSSIZE(ninstr); + struct block *suc = blk->s1; + // DBG("--- @%d ---\n",blk->id); + + /* live = union of successor.liveIn for each successor of b */ + if (blk->s1) bsunion(live, livein[blk->s1->id], bssize); + if (blk->s2) bsunion(live, livein[blk->s2->id], bssize); + // DBG("live in: "), priliveset(live, bssize); + + /* for each phi function phi of successors of b do + * live.add(phi.inputOf(b)) + */ + if (suc) do { + int predno; + for (predno = 0; blkpred(suc, predno) != blk; ++predno) ; + for (int i = 0; i < suc->phi.n; ++i) { + struct instr *phi = &instrtab[suc->phi.p[i]]; + union ref *arg = &phitab.p[phi->l.i][predno]; + assert(arg->t == RTMP); + // DBG("from phi set live %%%d\n", arg->i); + bsset(live, arg->i); } + } while (suc != blk->s2 && (suc = blk->s2)); + + /* for each opd in live do + * intervals[opd].addRange(b.from, b.to) + */ + for (uint i = 0; bsiter(&i, live, bssize); ++i) { + // DBG("itretave %%%d\n",i ); + addrange(&ra->intervals, i, (struct range){blk->inumstart, blk->inumstart + blk->ins.n + 2}, -1); } - curi = blk->ins.n - 1; - for (int i = 0; i < 2; ++i) { - if (!blk->jmp.arg[i].bits) break; - /* do not allocate a reg for a cmp op used a branch argument, since it's a pseudo op */ - if (blk->jmp.t == Jb && blk->jmp.arg[i].t == RTMP && instrtab[blk->jmp.arg[i].i].keep) - break; - use(&ra, blk, curi, -blk->jmp.t, - blk->jmp.t == Jret ? fn->abiret[i].reg : -1, - &blk->jmp.arg[i], blk->jmp.arg[!i]); + /* for each operation op of b in reverse order do */ + union ref queue[8] = { blk->jmp.arg[0], blk->jmp.arg[1] }; + goto Branchopd; + for (int curi, pos ; curi >= 0; --curi) { + int out = blk->ins.p[curi], reghint; + ins = &instrtab[out]; + pos = blk->inumstart + 1 + curi; + /* for each output operand opd of op do + * intervals[opd].setFrom(op.id) + * live.remove(opd) + * + * for each input operand opd of op do + * intervals[opd].addRange(b.from, op.id) + * live.add(opd) + */ + + reghint = ins && ins->op == Ocopy && ins->l.t == RREG ? ins->l.i : -1; + if (!intervaldef(&ra->intervals, out, blk, pos, reghint)) { + if (insrescls(*ins) && ins->op != Omove && !ins->keep && !(ins->op == Ocopy && ins->l.t == RREG)) { + /* dead */ + *ins = mkinstr(Onop,0,); + } + } + bsclr(live, out); + if (ins->op == Omove) { + assert(ins->l.t == RREG); + defreg(ra, ins->l.i, pos); + } else if (ins->op == Ocall) { + struct call *call = &calltab.p[ins->r.i]; + regset rclob = rsminus(rsunion(gpregset, fpregset), rsunion(mctarg->rglob, mctarg->rcallee)); + ra->fn->isleaf = 0; + + for (int i = 0; i < 2; ++i) { + if (call->abiret[i].ty.bits) { + int reg = call->abiret[i].reg; + rclob = rsclr(rclob, reg); + defreg(ra, reg, pos); + } + } + for (int j = 0; j < call->narg; ++j) { + int reg = call->abiarg[j].reg; + if (reg >= 0) { + rclob = rsclr(rclob, reg); + usereg(ra, reg, blk, pos); + } + } + if (rclob) { + struct fixinterval *fxit = alloc(&ra->arena, sizeof *fxit, 0); + fxit->next = ra->intervals.fixed; + fxit->range = (struct range) {pos, pos}; + fxit->rs = rclob; + ra->intervals.fixed = fxit; + /*DBG("CLOBBER @%d:%d {", blk->id, curi); + for (int reg = 0; rsiter(®, rclob); ++reg) { + DBG("%s,",mctarg->rnames[reg]); + } + DBG("}\n");*/ + } + } + + reghint = (ins && ins->op == Omove && ins->l.t == RREG) ? ins->l.i : -1; + queue[0] = ins->r, queue[1] = ins->l; + if (0) { + Branchopd: + reghint = -1; + curi = blk->ins.n; + pos = blk->inumstart + blk->ins.n + 1; + } + for (int nqueue = ins && ins->op == Omove ? 1 : 2; nqueue > 0;) { + union ref r = queue[--nqueue]; + + /* do not allocate a reg for a cmp op used as branch argument, since it's a pseudo op */ + if (curi == blk->ins.n && blk->jmp.t == Jb && r.t == RTMP && instrtab[r.i].keep) + continue; + + if (r.t == RTMP) { + addrange(&ra->intervals, r.i, (struct range){blk->inumstart, pos}, reghint); + bsset(live, r.i); + } else if (r.t == RREG) { + usereg(ra, r.i, blk, pos); + } else if (r.t == RADDR) { + reghint = -1; + queue[nqueue++] = addrht[r.i].base; + queue[nqueue++] = addrht[r.i].index; + } + } } - for (; curi >= 0; --curi) { - struct instr *ins = &instrtab[blk->ins.p[curi]]; - if (ins->op == Ocall) fn->isleaf = 0; - doins(&ra, blk, ins, curi); + /* for each phi function phi of b do + * live.remove(phi.output) + */ + for (int i = 0; i < blk->phi.n; ++i) + bsclr(live, blk->phi.p[i]); + + /* if b is loop header then + * loopEnd = last block of the loop starting at b + * for each opd in live do + * &ra->intervals[opd].addRange(b.from, loopEnd.to) + */ + struct block *loopend = NULL; + for (int i = 0; i < blk->npred; ++i) { + struct block *pred = blkpred(blk, i); + if (pred->id > blk->id) + loopend = loopend && loopend->id > pred->id ? loopend : pred; } + if (loopend) { + // DBG("i'm loop header - @%d (to @%d)\n", blk->id, loopend->id); + for (uint opd = 0; bsiter(&opd, live, bssize); ++opd) { + // DBG(" i have live %%%d\n", opd); + addrange(&ra->intervals, opd, (struct range){blk->inumstart, loopend->inumstart + loopend->ins.n+1}, -1); + /* struct interval *lv = imap_get(&ra->intervals.temps, opd); + for (int i = 0; i < lv->n; ++i) { + struct range r = itrange(lv, i); + // DBG(" @%d:%d - @%d:%d\n", r.from.blk, r.from.ins, r.to.blk, r.to.ins); + } */ + } + } + // DBG("live out: "), priliveset(live, bssize); + } while ((blk = blk->lprev) != last); - for (int i = blk->phi.n - 1; i >= 0; --i) { - struct instr *phi = &instrtab[blk->phi.p[i]]; - def(&ra, phi, blk, -1); + for (int i = 0; i < ra->fn->nblk; ++i) free(livein[i]); + free(livein); + + int var; + struct interval *lv; + imap_each(&ra->intervals.temps, var, lv) { + DBG("lifetime of %%%d: [ ", var); + for (int i = 0; i < lv->n; ++i) { + struct range r = itrange(lv, i); + DBG("%d-%d, ", r.from, r.to); } + DBG("]\n"); + } + for (struct fixinterval *fx = ra->intervals.fixed; fx; fx = fx->next) { + DBG("fixed {"); + for (int r = 0; rsiter(&r, fx->rs); ++r) + DBG("%s,", mctarg->rnames[r]); + DBG("}: [%d-%d]\n", fx->range.from, fx->range.to); + } +} - memcpy(bbrm[blk->id].in.regs, ra.m.regs, sizeof ra.m.regs); - imap_copy(&bbrm[blk->id].in.allocs, &ra.m.allocs);; - } while ((blk = blk->lprev) != last); +static int +cmppinterval(void *a, void *b) +{ + struct interval **la = a, **lb = b; + return (int)itrange(*la, 0).from - (int)itrange(*lb, 0).from; +} - if (1&&ccopt.dbg.r) { - efmt("<< regalloc before resolve\n"); +static bool +itcontainspos(const struct interval *it, int pos) +{ + for (int i = 0; i < it->n; ++i) { + struct range r = itrange(it, i); + if (r.from > pos) return 0; + if (pos < r.to) return 1; + } + return 0; +} + +static void +linearscan(struct rega *ra) +{ + struct lifetimes *intervals = &ra->intervals; + /* sort intervals */ + { + extern void qsort(void *p, size_t n, size_t size, int (*)(void *, void *)); + struct interval **unhandled = xcalloc(sizeof *unhandled * intervals->temps.mb.n); + int i = 0, var; + struct interval *lv; + + imap_each(&intervals->temps, var, lv) { + unhandled[i++] = lv; + } + if (!i) { + free(unhandled); + return; + } + qsort(unhandled, i, sizeof *unhandled, cmppinterval); + intervals->unhandled = NULL; + while (i-- > 0) { + unhandled[i]->next = intervals->unhandled; + intervals->unhandled = unhandled[i]; + } + free(unhandled); + } + + /* LINEAR SCAN */ + for (struct interval *current, *next; (current = intervals->unhandled); intervals->unhandled = next) { + int pos = itrange(current, 0).from; + next = current->next; + /* Expire old intervals */ + + /* check for intervals in active that are handled or inactive */ + for (struct interval **lnk = &intervals->active, *it = *lnk, *next; (next = it?it->next:0), it; it = next) { + /* ends before position? */ + DBG("< im active %%%d\n", intervals->temps.mb.k[it - intervals->temps.v]); + if (itrange(it, it->n-1).to <= pos) { + /* move from active to handled */ + *lnk = next; + it->next = intervals->handled; + intervals->handled = it; + if (it->reg >= 0) { + ra->m.free |= 1<<it->reg; + DBG(" unblock %s\n", mctarg->rnames[it->reg]); + } + } else + /* it does not cover position? */ + if (!itcontainspos(it, pos)) { + /* move from active to inactive */ + *lnk = next; + it->next = intervals->inactive; + intervals->inactive = it; + if (it->reg >= 0) { + ra->m.free |= 1<<it->reg; + DBG(" unblock %s\n", mctarg->rnames[it->reg]); + } + } else lnk = &it->next; + } + + /* check for intervals in inactive that are handled or active */ + for (struct interval **lnk = &intervals->inactive, *it = *lnk, *next; (next = it?it->next:0), it; it = next) { + DBG("< im inactive %%%d\n", intervals->temps.mb.k[it - intervals->temps.v]); + /* ends before position? */ + if (itrange(it, it->n-1).to <= pos) { + /* move from inactive to handled */ + *lnk = next; + it->next = intervals->handled; + intervals->handled = it; + } else + /* it covers position? */ + if (itcontainspos(it, pos)) { + /* move from inactive to active */ + *lnk = next; + it->next = intervals->active; + intervals->active = it; + if (it->reg >= 0) { + assert(rstest(ra->m.free, it->reg)); + ra->m.free &= ~(1<<it->reg); + DBG("reblock %s\n", mctarg->rnames[it->reg]); + } + } else lnk = &it->next; + } + + /* find a register for current */ + { + int this = intervals->temps.mb.k[current - intervals->temps.v]; + regset free = rsminus(ra->m.free, current->fpr ? gpregset : fpregset); + struct instr *ins = &instrtab[this]; + int reg = 0; + for (struct fixinterval *fxit = intervals->fixed; fxit; fxit = fxit->next) { +// if (rangeoverlap(it->range, (struct range) {pos, itrange(current, current->n-1).to})) { + // if (fxit->range.to >= pos && fxit->range.to < itrange(current, current->n-1).from) { + if (rangeoverlap(fxit->range, (struct range) {pos, itrange(current, current->n-1).to}) + && fxit->range.from != itrange(current, current->n-1).to) { + free = rsminus(free, fxit->rs); + } + } + if (ins->inplace && opnarg[ins->op] == 2) { + int xreg; + if (ins->r.t == RREG) free = rsclr(free, ins->r.i); + else if (ins->r.t == RTMP && (xreg = instrtab[ins->r.i].reg)) + free = rsclr(free, xreg-1); + } + assert(free); + if (current->rhint>=0) + DBG("have hint %s for %%%d\n", mctarg->rnames[current->rhint], intervals->temps.mb.k[current - intervals->temps.v]); + + if (current->rhint >= 0 && rstest(free, current->rhint)) { + DBG(" (used hint)\n"); + reg = current->rhint; + } else { + if (ins->op != Ophi && (opnarg[ins->op] == 1 || (opnarg[ins->op] == 2 && ins->inplace))) { + DBG(" %%%d try %d,%d\n", this, ins->l.t,ins->l.i); + if (ins->l.t == RREG && rstest(free, reg = ins->l.i)) + goto GotReg; + if (ins->l.t == RTMP) + if ((reg = instrtab[ins->l.i].reg-1) >= 0) + if (rstest(free, reg)) + goto GotReg; + } else if (ins->op == Ophi) { + union ref *arg = phitab.p[ins->l.i]; + for (int i = 0; i < xbcap(arg); ++i) { + if (arg->t == RREG && rstest(free, reg = arg->i)) goto GotReg; + if (arg->t == RTMP) + if ((reg = instrtab[arg->i].reg-1) >= 0) + if (rstest(free, reg)) + goto GotReg; + } + } + while (!rstest(free, reg)) ++reg; + } + GotReg: + current->reg = reg; + ins->reg = reg + 1; + DBG("%%%d got %s\n", this, mctarg->rnames[reg]); + ra->m.free = rsclr(ra->m.free, reg); + globusage = rsset(globusage, reg); + + //if current has a register assigned then add current to active + current->next = intervals->active; + intervals->active = current; + } + } + + { int var; + struct interval *lv; + imap_each(&intervals->temps, var, lv) { + // instrtab[var].reg = lv->reg+1; + if (lv->n > 2) xbfree(lv->_dyn); + } + imap_free(&intervals->temps); + } +} + +void +regalloc(struct function *fn) +{ + static union ref *stkslotrefsbuf[64]; + int id; + static union { char m[sizeof(struct arena) + (1<<10)]; struct arena *_align; } amem; + struct rega ra = {fn, .arena = (void *)amem.m}; + struct block *blk, *last; + memset(ra.arena, 0, sizeof *ra.arena); + ra.arena->cap = sizeof amem.m; + + /* setup */ + if (!fpregset || !gpregset) { + for (int i = 0; i < MAXREGS; ++i) { + if (isfpr(i)) + fpregset = rsset(fpregset, i); + else if (isgpr(i)) + gpregset = rsset(gpregset, i); + } + } + ra.m.blocked = mctarg->rglob; + ra.m.free = rsminus(rsunion(gpregset, fpregset), ra.m.blocked); + memset(ra.m.freestk, 0xFF, sizeof ra.m.freestk); + globusage = 0; + fn->stksiz = alignup(fn->stksiz, 8); + fn->isleaf = 1; + vinit(&stkslotrefs, stkslotrefsbuf, arraylength(stkslotrefsbuf)); + + /* put into reverse post order */ + sortrpo(fn); + + /* fix liveness ranges */ + fixlive(fn); + + /* transform into CSSA */ + fixcssa(fn); + + fillblkids(fn); + + if (ccopt.dbg.r) { + DBG("<< Before linear scan >>\n"); irdump(fn); } - /* resolve allocation mismatches across each edge */ - blk = last = fn->entry->lprev; - do { - if (blk->id < 0) continue; - if (blk->s1) resolve(fn, bbrm, blk, blk->s1); - if (blk->s2) resolve(fn, bbrm, blk, blk->s2); - } while ((blk = blk->lprev) != last); + /* linear scan: build lifetime intervals */ + buildintervals(&ra); + + /* linear scan */ + linearscan(&ra); + + /* resolve */ /* parallel copies for each phi */ blk = last = fn->entry->lprev; do { if (blk->id < 0) continue; for (int i = 0; i < blk->npred; ++i) { - lowerphis(fn, bbrm, blkpred(blk, i), blk); + lowerphis(fn, blkpred(blk, i), blk); } vfree(&blk->phi); } while ((blk = blk->lprev) != last); @@ -758,11 +906,19 @@ regalloc(struct function *fn) bool allnops = 1; blk->id = id++; for (int i = 0; i < blk->ins.n; ++i) { - struct instr *ins = &instrtab[blk->ins.p[i]]; + int t = blk->ins.p[i]; + struct instr *ins = &instrtab[t]; + + for (int i = 0; i < 2; ++i) { + union ref *r = &i[&ins->l]; + int tr; + if (r->t == RTMP && (tr = instrtab[r->i].reg)) *r = mkref(RREG, tr-1); + } + if (!ins->reg && insrescls(*ins) && ins->op != Omove && !ins->keep) { /* dead */ Nop: - *ins = mkinstr(Onop,0,); + ins->op = Onop; } else if (ins->op == Omove && ins->r.t == RREG && ins->l.i == ins->r.i) { goto Nop; } else if (ins->op == Ocopy && ins->l.t == RREG && ins->reg-1 == ins->l.i) { @@ -825,17 +981,14 @@ regalloc(struct function *fn) union ref *adr = stkslotrefs.p[--stkslotrefs.n]; *adr = mkaddr((struct addr) { .base = mkref(RREG, mctarg->bpr), .disp = -fn->stksiz + adr->i }); } + if (ccopt.dbg.r) { - efmt("<< After regalloc >>\n"); + DBG("<< After regalloc >>\n"); irdump(fn); } - for (int i = 0; i < nbbrm; ++i) { - imap_free(&bbrm[i].in.allocs); - imap_free(&bbrm[i].out.allocs); - } imap_free(&ra.m.allocs); - free(bbrm); fn->regusage = globusage; + freearena(ra.arena); } @@ -946,7 +1099,7 @@ fixlive(struct function *fn) } while ((blk = blk->lnext) != fn->entry); if (ccopt.dbg.l) { - efmt("<< After liveness fixup >>\n"); + DBG("<< After liveness fixup >>\n"); irdump(fn); } if (defined != definedbuf) free(defined); diff --git a/test/collatz.c b/test/collatz.c new file mode 100644 index 0000000..9793594 --- /dev/null +++ b/test/collatz.c @@ -0,0 +1,29 @@ +int printf(const char *, ...); +void *malloc(unsigned long); +int main() { + int *mem = malloc(sizeof(int) * 4000); + + int cmax = 0; + for (int nv = 1; nv < 1000; ++nv) { + int n = nv, c = 0; + while (n != 1) { + if (n < nv) { + c += mem[n]; + break; + } + if ((n & 1) != 0) { + n = (3 * n) + 1; + } else { + n /= 2; + //n >>= 1; + } + ++c; + } + mem[nv] = c; + if (c > cmax) { + cmax = c; + } + } + printf("should print 178: %d\n", cmax); +} + diff --git a/test/fact.c b/test/fact.c new file mode 100644 index 0000000..1147d8f --- /dev/null +++ b/test/fact.c @@ -0,0 +1,13 @@ +int +fact(int x) +{ + int y = 1; + while (x >= 1) { + y *= x; + x -= 1; + } + return y; +} + +extern int printf(); +int main() { printf("6! = %d\n", fact(6)); } @@ -29,8 +29,8 @@ int printf(const char *, ...); int main(int argc, char **argv) { unsigned n = argv[1] ? atoi(argv[1]) : 10; printf("fib(%u) = %u\n", n, fib(n)); - printf("fibr(%u) = %u\n", n, fibr(n)); printf("fibf(%u) = %g\n", n, fibf(n)); + printf("fibr(%u) = %u\n", n, fibr(n)); } /* vim:set ts=3 sw=3 expandtab: */ diff --git a/test/sort.c b/test/sort.c new file mode 100644 index 0000000..af58917 --- /dev/null +++ b/test/sort.c @@ -0,0 +1,28 @@ +typedef unsigned long size_t; + +int printf(const char *, ...); +void *calloc(size_t, size_t); +void qsort(void *, size_t nmemb, size_t size, int (const void *, const void *)); +int atoi(const char *); + +int +icmp(const void *a, const void *b) +{ + int l = *(int *)a, r = *(int *)b; + return (l > r) - (l < r); +} + +int +main(int argc, char **argv) +{ + int N = argc - 1; + int *xs = calloc(N, sizeof *xs); + + for (int i = 0; i < N; ++i) + xs[i] = atoi(argv[i+1]); + qsort(xs, N, sizeof *xs, icmp); + for (int i = 0; i < N; ++i) + printf("%d, ", xs[i]); + printf("\n"); + return N; +} |