aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--amd64/emit.c26
-rw-r--r--amd64/isel.c2
-rw-r--r--amd64/sysv.c4
-rw-r--r--common.h42
-rw-r--r--ir.c70
-rw-r--r--ir.h15
-rw-r--r--irdump.c11
-rw-r--r--op.def1
-rw-r--r--optmem.c23
-rw-r--r--regalloc.c746
-rw-r--r--ssa.c23
-rw-r--r--test/fib.c10
12 files changed, 748 insertions, 225 deletions
diff --git a/amd64/emit.c b/amd64/emit.c
index f354262..5b3a298 100644
--- a/amd64/emit.c
+++ b/amd64/emit.c
@@ -162,6 +162,7 @@ enum operpat {
enum operenc {
EN_R = 1, /* reg with /r */
EN_RR, /* reg, reg with /r */
+ EN_RRX, /* reg, reg with /r (inverted) */
EN_MR, /* mem, reg with /r */
EN_RM, /* reg, mem with /r */
EN_M, /* mem */
@@ -249,6 +250,17 @@ encode(uchar **pcode, const struct desc *tab, int ntab, enum irclass k, struct o
D(opc, nopc);
B(0300 | (dst.reg & 7) << 3 | (src.reg & 7));
break;
+ case EN_RRX: /* mod = 11; reg = src; rm = dst */
+ rex |= (src.reg >> 3) << 2; /* REX.R */
+ rex |= (dst.reg >> 3) << 0; /* REX.B */
+ if (rex) B(0x40 | rex);
+ else if (en->r8 && in_range(src.reg, RSP, RDI)) {
+ /* /r8 needs REX to encode SP,BP,SI,DI (otherwise -> AH..BH) */
+ B(0x40);
+ }
+ D(opc, nopc);
+ B(0300 | (src.reg & 7) << 3 | (dst.reg & 7));
+ break;
case EN_MR:
mem = dst;
reg = src.reg;
@@ -370,6 +382,8 @@ static void Xmov(uchar **pcode, enum irclass k, struct oper dst, struct oper src
{8, PFPR, PFPR, "\xF2\x0F\x10", EN_RR}, /* MOVSD xmm, xmm */
{8, PFPR, PMEM, "\xF2\x0F\x10", EN_RM}, /* MOVSD xmm, m64 */
{8, PMEM, PFPR, "\xF2\x0F\x11", EN_MR}, /* MOVSS m64, xmm */
+ {4|8, PGPR, PFPR, "\x66\x0F\x6E", EN_RRX}, /* MOVD/Q r64/32, xmm */
+ {4|8, PFPR, PGPR, "\x66\x0F\x6E", EN_RR}, /* MOVD/Q xmm, r64/32 */
};
static const uchar k2off[] = {
[KI4] = 0,
@@ -583,6 +597,10 @@ static void
gencopy(uchar **pcode, enum irclass cls, struct block *blk, int curi, struct oper dst, union ref val)
{
assert(dst.t == OREG);
+ if (val.bits == UNDREF.bits) {
+ /* can be generated by ssa construction, since value is undefined no move is needed */
+ return;
+ }
if (val.t == RADDR) {
/* this is a LEA, but maybe it can be lowered to a 2-address instruction,
* which may clobber flags */
@@ -824,12 +842,12 @@ emitbranch(uchar **pcode, struct block *blk)
static void
calleesave(int *npush, uchar **pcode, struct function *fn)
{
- if (bstest(fn->regusage, RBX)) {
+ if (rstest(fn->regusage, RBX)) {
Xpush(pcode, RBX);
++*npush;
}
for (int r = R12; r <= R15; ++r)
- if (bstest(fn->regusage, r)) {
+ if (rstest(fn->regusage, r)) {
Xpush(pcode, r);
++*npush;
}
@@ -839,9 +857,9 @@ static void
calleerestore(uchar **pcode, struct function *fn)
{
for (int r = R15; r >= R12; --r)
- if (bstest(fn->regusage, r))
+ if (rstest(fn->regusage, r))
Xpop(pcode, r);
- if (bstest(fn->regusage, RBX)) Xpop(pcode, RBX);
+ if (rstest(fn->regusage, RBX)) Xpop(pcode, RBX);
}
/* align code using NOPs */
diff --git a/amd64/isel.c b/amd64/isel.c
index cb87b7d..07115ac 100644
--- a/amd64/isel.c
+++ b/amd64/isel.c
@@ -187,7 +187,7 @@ aadd(struct addr *addr, union ref r)
} else if (r.t == RREG) {
/* temporaries are single assignment, but register aren't, so they can't be *
* safely hoisted into an address value, unless they have global lifetime */
- if (!bstest(mctarg->rglob, r.i)) return 0;
+ if (!rstest(mctarg->rglob, r.i)) return 0;
Ref:
if (!addr->base.bits) addr->base = r;
else if (!addr->index.bits) addr->index = r;
diff --git a/amd64/sysv.c b/amd64/sysv.c
index 9c7bc15..6c5b67c 100644
--- a/amd64/sysv.c
+++ b/amd64/sysv.c
@@ -141,8 +141,8 @@ const struct mctarg t_amd64_sysv = {
.gpr0 = RAX, .ngpr = R15 - RAX + 1,
.bpr = RBP,
.fpr0 = XMM0, .nfpr = XMM15 - XMM0 + 1,
- .rcallee = {{1<<RBX | 1<<R12 | 1<<R13 | 1<<R14 | 1<<R15}},
- .rglob = {{1<<RSP | 1<<RBP}},
+ .rcallee = 1<<RBX | 1<<R12 | 1<<R13 | 1<<R14 | 1<<R15,
+ .rglob = 1<<RSP | 1<<RBP,
.rnames = amd64_rnames,
.objkind = OBJELF,
.isa = ISamd64,
diff --git a/common.h b/common.h
index 319903e..449c777 100644
--- a/common.h
+++ b/common.h
@@ -87,6 +87,18 @@ ilog2(uint x) { /* assumes x is a power of 2 */
return n;
#endif
}
+static inline uint
+lowestsetbit(uvlong x)
+{
+#if HAS_BUILTIN(ctz)
+ return __builtin_ctzll(x);
+#else
+ int i = 0;
+ for (uvlong mask = 1;; ++i, mask <<= 1)
+ if (x & mask)
+ return i;
+#endif
+}
#define aisprint(c) in_range(c, ' ', '~')
#define aisdigit(c) in_range(c, '0', '9')
@@ -388,12 +400,24 @@ void imap_init_(struct imapbase *, void **v, uint vsiz, uint N);
int imap_get_(struct imapbase *, short k);
int imap_set_(struct imapbase *, void **v, uint vsiz, short k);
#define imap_free(m) (free((m)->mb.k), memset((m), 0, sizeof *(m)))
-#define imap_init(m, N) (imap_free(m), imap_init_(&(m)->mb, (void **)&(m)->v, sizeof*(m)->v, (N))
+#define imap_init(m, N) (imap_free(m), imap_init_(&(m)->mb, (void **)&(m)->v, sizeof*(m)->v, (N)))
#define imap_clear(m) ((m)->mb.bs ? bszero((m)->mb.bs, BSSIZE((m)->mb.N)) : (void)0, \
(m)->mb.n = 0)
#define imap_get(m, k) (((m)->tmp = imap_get_(&(m)->mb, k)) < 0 ? NULL : &(m)->v[(m)->tmp])
#define imap_set(m, k, x) ((m)->tmp = imap_set_(&(m)->mb, (void **)&(m)->v, sizeof*(m)->v, k), \
(m)->v[(m)->tmp] = (x), &(m)->v[(m)->tmp])
+#define imap_each(m,kx,pvx) \
+ for (int _i = 0; _i < (m)->mb.N && ((kx) = (m)->mb.k[_i], (pvx) = &(m)->v[_i], 1); ++_i) \
+ if (bstest((m)->mb.bs, _i))
+#define imap_copy(dst,src) do { \
+ size_t N = (src)->mb.N; \
+ if (!N) break; \
+ (dst)->mb.n = (src)->mb.n; \
+ imap_init((dst), N); \
+ memcpy((dst)->mb.k, (src)->mb.k, \
+ N*(sizeof*(src)->mb.k + sizeof*(src)->v) + BSSIZE(N)*sizeof(struct bitset)); \
+} while (0)
+
struct pmapbase { void **k; uint n, N; };
/* map of non-null ptr -> T */
@@ -445,6 +469,22 @@ bsiter(uint *i, struct bitset bs[/*siz*/], uint siz)
return 0;
}
+typedef uvlong regset;
+#define rsset(S, r) ((S) | 1ull << (r))
+#define rsclr(S, r) ((S) & ~(1ull << (r)))
+#define rstest(S, r) ((S) >> (r) & 1)
+#define rsminus(A, B) ((A) & ~(B))
+#define rsand(A, B) ((A) & (B))
+#define rsunion(A, B) ((A) | (B))
+static inline bool
+rsiter(int *i, uvlong rs)
+{
+ uvlong mask = -(1ull << *i);
+ if ((rs & mask) == 0) return 0;
+ *i = lowestsetbit(rs & mask);
+ return 1;
+}
+
/********/
/** IO **/
/********/
diff --git a/ir.c b/ir.c
index 8fb6fed..b39cae3 100644
--- a/ir.c
+++ b/ir.c
@@ -222,7 +222,7 @@ mkaddr(struct addr addr)
return mkref(RADDR, addaddr(&addr));
}
-static inline void
+void
addpred(struct block *blk, struct block *p)
{
if (blk->npred == 0) {
@@ -239,6 +239,46 @@ addpred(struct block *blk, struct block *p)
xbpush(&blk->_pred, &blk->npred, p);
}
+static void
+delpred(struct block *blk, struct block *p)
+{
+ for (int i = 0; i < blk->npred; ++i) {
+ if (blkpred(blk, i) == p) {
+ for (int k = i; k < blk->npred - 1; ++k) {
+ blkpred(blk, k) = blkpred(blk, k + 1);
+ }
+ --blk->npred;
+ return;
+ }
+ }
+ assert(0&&"blk not in p");
+}
+
+struct block *
+insertblk(struct function *fn, struct block *pred, struct block *subst)
+{
+ struct block *new = newblk(fn);
+ struct block **s = pred->s1 == subst ? &pred->s1 : &pred->s2;
+ assert(*s == subst);
+ new->lnext = pred->lnext;
+ new->lprev = pred;
+ pred->lnext->lprev = new;
+ pred->lnext = new;
+ *s = new;
+ new->jmp.t = Jb;
+ new->s1 = subst;
+ addpred(new, pred);
+ for (int i = 0; i < subst->npred; ++i) {
+ if (blkpred(subst, i) == pred) {
+ blkpred(subst, i) = new;
+ ++fn->nblk;
+ return new;
+ }
+ }
+ assert(0);
+}
+
+
static int
newinstr(void)
{
@@ -400,6 +440,14 @@ delphi(struct block *blk, int idx)
--blk->phi.n;
}
+void
+fillblkids(struct function *fn)
+{
+ int i = 0;
+ struct block *blk = fn->entry;
+ do blk->id = i++; while ((blk = blk->lnext) != fn->entry);
+}
+
/** IR builders **/
struct block *
@@ -515,16 +563,20 @@ void
irfini(struct function *fn)
{
extern int nerror;
- if (!nerror) {
- abi0(fn);
- mem2reg(fn);
- lowerintrin(fn);
- mctarg->isel(fn);
- regalloc(fn);
- if (!ccopt.dbg.any)
- mctarg->emit(fn);
+ if (nerror) {
+ freefn(fn);
+ return;
}
+ abi0(fn);
+ lowerintrin(fn);
+ mem2reg(fn);
+ copyopt(fn);
+ mctarg->isel(fn);
+ regalloc(fn);
+ if (!ccopt.dbg.any)
+ mctarg->emit(fn);
+
freefn(fn);
}
diff --git a/ir.h b/ir.h
index 0ebe68b..60a2ad8 100644
--- a/ir.h
+++ b/ir.h
@@ -131,6 +131,8 @@ struct block {
struct { uchar t; union ref arg[2]; } jmp;
};
+#define blkpred(blk, i) 0[(blk)->npred < 2 ? &(blk)->_pred0 : &(blk)->_pred[i]]
+
enum { USERJUMP = 0xFFFF };
struct use { struct block *blk; ushort u; };
@@ -149,7 +151,7 @@ struct function {
ushort nabiarg, nabiret;
bool globl;
bool isleaf;
- struct bitset regusage[1];
+ regset regusage;
};
enum objkind { OBJELF };
@@ -161,8 +163,8 @@ struct mctarg {
bpr, /* frame/base pointer reg */
fpr0, /* first fpr */
nfpr; /* fpr count */
- struct bitset rcallee[1], /* callee-saved */
- rglob[1]; /* globally live (never used for regalloc) */
+ regset rcallee, /* callee-saved */
+ rglob; /* globally live (never used for regalloc) */
const char (*rnames)[6];
enum objkind objkind;
enum mcisa isa;
@@ -226,6 +228,9 @@ void conputdat(struct irdat *, uint off, enum typetag t, const void *dat);
union ref mkcallarg(union irtype ret, uint narg, int vararg);
#define mkintrin(B, C, N) mkinstr(Ointrin, C, {.t=RICON,B}, mkcallarg((union irtype){{0}},N,-1))
union ref mkaddr(struct addr);
+void addpred(struct block *blk, struct block *p);
+
+struct block *insertblk(struct function *, struct block *pred, struct block *subst);
void adduse(struct block *ublk, int ui, union ref r);
union ref insertinstr(struct block *, int idx, struct instr);
union ref insertphi(struct block *, enum irclass cls);
@@ -233,7 +238,7 @@ void replcuses(union ref from, union ref to);
void deluses(int ins);
void delinstr(struct block *, int idx);
void delphi(struct block *, int idx);
-#define blkpred(blk, i) 0[(blk)->npred < 2 ? &(blk)->_pred0 : &(blk)->_pred[i]]
+void fillblkids(struct function *);
/* IR builder functions */
union ref addinstr(struct function *, struct instr);
@@ -247,7 +252,7 @@ void putreturn(struct function *, union ref r0, union ref r1);
void irdump(struct function *);
void filluses(struct function *);
-void freeuses(struct use *, int n);
+void copyopt(struct function *);
void abi0(struct function *);
void abi0_call(struct function *, struct instr *, struct block *blk, int *curi);
diff --git a/irdump.c b/irdump.c
index 9eda415..3d95843 100644
--- a/irdump.c
+++ b/irdump.c
@@ -80,10 +80,9 @@ dumpref(enum op o, union ref ref)
efmt("??");
break;
case RTMP:
+ efmt("%%%d", ref.i);
if (instrtab[ref.i].reg)
- efmt("%s", mctarg->rnames[instrtab[ref.i].reg - 1]);
- else
- efmt("%%%d", ref.i);
+ efmt("(%s)", mctarg->rnames[instrtab[ref.i].reg-1]);
break;
case RREG:
efmt("%s", mctarg->rnames[ref.i]);
@@ -168,7 +167,7 @@ dumpinst(const struct instr *ins)
if (ins->reg) {
if (cls)
efmt("%s ", clsname[cls]);
- efmt("%s = ", mctarg->rnames[ins->reg - 1]);
+ efmt("(%%%d)%s = ", ins - instrtab, mctarg->rnames[ins->reg - 1]);
} else if (cls) {
efmt("%s %%%d", clsname[cls], ins - instrtab);
efmt(" = ");
@@ -198,7 +197,9 @@ dumpblk(struct function *fn, struct block *blk)
struct instr *phi = &instrtab[blk->phi.p[i]];
union ref *refs = phitab.p[phi->l.i];
assert(phi->op == Ophi);
- efmt(" %s %%%d = phi ", clsname[phi->cls], blk->phi.p[i]);
+ efmt(" %s ", clsname[phi->cls]);
+ if (!phi->reg) efmt("%%%d = phi ", blk->phi.p[i]);
+ else efmt("(%%%d)%s = phi ", phi - instrtab, mctarg->rnames[phi->reg-1]);
for (int i = 0; i < blk->npred; ++i) {
if (i) efmt(", ");
efmt("@%d ", blkpred(blk, i)->id);
diff --git a/op.def b/op.def
index 6ff6882..7c5d3ea 100644
--- a/op.def
+++ b/op.def
@@ -68,6 +68,7 @@ _(call, 2)
_(call2r, 1)
_(intrin, 2)
_(phi, 1)
+_(swap, 2)
/* machine-specific instructions */
_(xinc, 1)
_(xdec, 1)
diff --git a/optmem.c b/optmem.c
index 39719bb..2e7c7e5 100644
--- a/optmem.c
+++ b/optmem.c
@@ -25,7 +25,7 @@ struct pendingphi { ushort var, phi; };
struct ssabuilder {
vec_of(struct pendingphi) *pendingphis; /* map of block to list of incomplete phis in block */
imap_of(union ref *) curdefs; /* map of var to (map of block to def of var) */
- int nsealed; /* num of sealed blocks */
+ int lastvisit;
int nblk;
};
@@ -90,13 +90,24 @@ writevar(struct ssabuilder *sb, int var, struct block *blk, union ref val)
(*pcurdefs)[blk->id] = val;
}
+static bool
+issealed(struct ssabuilder *sb, struct block *blk)
+{
+ /* consider block sealed if it has been visited or if no backbranches flow to it */
+ if (blk->id < sb->lastvisit) return 1;
+ for (int i = 0; i < blk->npred; ++i)
+ if (blkpred(blk, i)->id > blk->id)
+ return 0;
+ return 1;
+}
+
+
static union ref
readvarrec(struct ssabuilder *sb, int var, enum irclass cls, struct block *blk)
{
union ref val;
assert(blk->npred > 0);
- if (blk->id >= sb->nsealed) {
- /* unsealed block */
+ if (!issealed(sb, blk)) {
val = insertphi(blk, cls);
vpush(&sb->pendingphis[blk->id], ((struct pendingphi) { var, val.i }));
} else if (blk->npred == 1) {
@@ -171,7 +182,7 @@ mem2reg(struct function *fn)
writevar(&sb, var, use->blk, m->r);
*m = mkinstr(Onop,0,);
} else if (oisload(m->op)) {
- union ref val = readvar(&sb, var, k, use->blk);
+ union ref val = readvar(&sb, var, k = m->cls, use->blk);
if (!val.bits) { /* var is used uninitialized */
/* TODO emit diagnostic */
/* load some garbage */
@@ -190,8 +201,8 @@ mem2reg(struct function *fn)
}
/* seal block */
- assert(sb.nsealed == blk->id);
- ++sb.nsealed;
+ assert(sb.lastvisit == blk->id);
+ ++sb.lastvisit;
pending = (void *) &sb.pendingphis[blk->id];
for (int i = 0; i < pending->n; ++i) {
addphiargs(&sb, pending->p[i].var, instrtab[pending->p[i].phi].cls, blk,
diff --git a/regalloc.c b/regalloc.c
index 7202e68..a49330d 100644
--- a/regalloc.c
+++ b/regalloc.c
@@ -1,9 +1,12 @@
+#include "common.h"
#include "ir.h"
-static struct bitset globusage[1];
-static struct bitset floatregs[1];
+/* Implements reverse linear register allocation, quite ad-hoc right now */
-#define isfpr(reg) bstest(floatregs, (reg))
+static regset gpregset, fpregset;
+static regset globusage;
+
+#define isfpr(reg) in_range((reg), mctarg->fpr0, mctarg->fpr0 + mctarg->nfpr - 1)
#define isgpr(reg) (!isfpr(reg))
enum { ADEAD, AREG, ASTACK } ;
@@ -12,40 +15,91 @@ struct alloc { ushort t : 2, a : 14; };
#define areg(r) ((struct alloc) { AREG, (r) })
#define astack(s) ((struct alloc) { ASTACK, (s) })
+enum { MAXSPILL = 128 };
+
+struct rmap {
+ regset free; /* free registers */
+ regset blocked; /* registers allocated to themselves (from e.g. isel reg constraints, abi) */
+ ushort regs[MAXREGS]; /* map reg -> temp holding reg */
+ struct bitset freestk[BSSIZE(MAXSPILL)]; /* free stack slots */
+ imap_of(struct alloc) allocs; /* map tmpidx -> allocation */
+};
+
struct rega {
- union ref regs[MAXREGS]; /* map reg -> value holding reg */
- struct alloc *allocs; /* map tmpidx -> allocation */
- vec_of(ushort) freestk; /* available stack slots */
- int nfreegpr, nfreefpr;
+ struct function *fn;
+ struct rmap m;
int maxstk, stktop;
};
static vec_of(union ref *) stkslotrefs;
-static void
+static union ref
addstkslotref(union ref inst)
{
vpush(&stkslotrefs, &instrtab[inst.i].l);
+ return inst;
}
static int allocreg(struct rega *ra, enum irclass cls, union ref ref, uvlong excl);
#if 0
#define DBG efmt
+static void
+dumpallocs(const char *s, struct rmap *m)
+{
+ int var;
+ struct alloc *alloc;
+
+ DBG("%s map [", s);
+ imap_each(&m->allocs, var, alloc) {
+ if (!alloc->t) continue;
+ DBG(" %%%d -> %c%d,", var, "XRS"[alloc->t], alloc->a);
+ }
+ DBG("]\n");
+}
#else
#define DBG(...) ((void)0)
+#define dumpallocs(...)
#endif
static void
freereg(struct rega *ra, int r)
{
- ra->regs[r] = NOREF;
- if (isfpr(r)) ++ra->nfreefpr;
- else ++ra->nfreegpr;
+ ra->m.free = rsset(ra->m.free, r);
+ ra->m.blocked = rsclr(ra->m.blocked, r);
}
static void take(struct rega *ra, int reg, union ref ref);
+static int
+allocstk(struct rega *ra, int var)
+{
+ int s = -1;
+
+ for (int i = 0; i < BSSIZE(MAXSPILL); ++i) {
+ if (~ra->m.freestk[i].u == 0) continue;
+ s = i*64 + lowestsetbit(ra->m.freestk[i].u);
+ }
+ if (s != -1) {
+ bsclr(ra->m.freestk, s);
+ if (ra->stktop < s) ra->stktop = s+1;
+ } else {
+ s = ra->stktop++;
+ }
+ if (ra->maxstk < s+1) ra->maxstk = s+1;
+ (void)imap_set(&ra->m.allocs, var, astack(s));
+ return s;
+}
+
+static void
+freestk(struct rega *ra, int slot)
+{
+ if (slot < MAXSPILL)
+ bsset(ra->m.freestk, slot);
+ else if (slot == ra->stktop - 1)
+ --ra->stktop;
+}
+
static void
def(struct rega *ra, struct instr *ins, struct block *blk, int curi)
{
@@ -54,14 +108,16 @@ def(struct rega *ra, struct instr *ins, struct block *blk, int curi)
if (ins->op != Omove && ins->op != Ocall) {
var = ins - instrtab;
DBG("def %%%d\n",var);
- alloc = &ra->allocs[var];
- if (alloc->t == ADEAD) {
+ alloc = imap_get(&ra->m.allocs, var);
+ if (!alloc || alloc->t == ADEAD) {
return;
} else if (alloc->t == AREG) {
reg = alloc->a;
DBG("-- free %s for %%%d\n", mctarg->rnames[alloc->a], var);
- assert(ra->regs[reg].bits == mkref(RTMP, var).bits);
- } else if (alloc->t == ASTACK) {
+ assert(!rstest(ra->m.free, reg));
+ assert(ra->m.regs[reg] == var);
+ ins->reg = reg+1;
+ } else if (alloc->t == ASTACK) {
/* unspill, insert 'store [slot], reg' */
struct alloc astk = *alloc;
struct instr store;
@@ -69,26 +125,27 @@ def(struct rega *ra, struct instr *ins, struct block *blk, int curi)
if ((ins->op == Ocopy || ins->inplace) && ins->l.t == RREG) {
int hint = ins->l.i;
- if (!ra->regs[hint].bits) {
+ if (rstest(ra->m.free, hint)) {
take(ra, reg = hint, mkref(RTMP, var));
- assert(ra->regs[reg].bits == mkref(RTMP, var).bits);
+ assert(ra->m.regs[reg] == var && !rstest(ra->m.free, hint));
}
}
if (reg < 0)
reg = allocreg(ra, insrescls(*ins), mkref(RTMP, var), 0);
store = mkinstr(Ostore1 + ilog2(cls2siz[insrescls(*ins)]), 0,
mkref(RICON, astk.a*8), mkref(RREG, reg));
- DBG("-- unspill %%%d s%d -> %s\n", var, astk.a, mctarg->rnames[ins->reg+1]);
+ DBG("-- unspill %%%d s%d -> %s\n", var, astk.a, mctarg->rnames[reg]);
addstkslotref(insertinstr(blk, ++curi, store));
- vpush(&ra->freestk, astk.a);
+ freestk(ra, astk.a);
ins->reg = reg+1;
def(ra, ins, blk, curi); /* and free the reg again */
return;
}
- *alloc = afree();
+ if (alloc) *alloc = afree();
} else if (ins->op == Omove) {
+ assert(ins->l.t == RREG); /* move to a register */
+ assert(rstest(ra->m.blocked, ins->l.i));
reg = ins->l.i;
- assert(ins->l.t == RREG);
DBG("-- free %s\n", mctarg->rnames[ins->l.i]);
} else {
struct call *call = &calltab.p[ins->r.i];
@@ -104,68 +161,58 @@ def(struct rega *ra, struct instr *ins, struct block *blk, int curi)
static void
take(struct rega *ra, int reg, union ref ref) {
DBG("-- take %s for %c%d\n", mctarg->rnames[reg], "R%"[ref.t==RTMP], ref.i);
- assert(!ra->regs[reg].bits && "taken");
- if (ref.t == RTMP)
- ra->allocs[ref.i] = areg(reg);
- ra->regs[reg] = ref;
- assert(ra->regs[reg].bits);
- bsset(globusage, reg);
- if (isfpr(reg)) --ra->nfreefpr;
- else --ra->nfreegpr;
+ assert(rstest(ra->m.free, reg) && "taken");
+ if (ref.t == RTMP) {
+ (void)imap_set(&ra->m.allocs, ref.i, areg(reg));
+ ra->m.regs[reg] = ref.i;
+ } else {
+ ra->m.blocked = rsset(ra->m.blocked, reg);
+ }
+ ra->m.free = rsclr(ra->m.free, reg);
+ globusage = rsset(globusage, reg);
}
static int
allocreg(struct rega *ra, enum irclass cls, union ref ref, uvlong excl)
{
- int r0, rend, reg;
+ regset rs;
+ int reg;
assert(cls);
if (kisint(cls)) {
- r0 = mctarg->gpr0;
- rend = mctarg->gpr0 + mctarg->ngpr;
+ rs = gpregset;
} else if (kisflt(cls)) {
- r0 = mctarg->fpr0;
- rend = mctarg->fpr0 + mctarg->nfpr;
+ rs = fpregset;
} else assert(0);
- for (reg = r0; reg < rend; ++reg) {
- if (bstest(mctarg->rglob, reg)) continue;
- if (!(excl >> reg & 1) && !ra->regs[reg].bits) {
- take(ra, reg, ref);
- return reg;
- }
- }
- assert(!"no more reg");
-}
-
-static int
-allocstk(struct rega *ra, int var)
-{
- int s;
-
- if (ra->freestk.n) {
- s = ra->freestk.p[--ra->freestk.n];
- ra->allocs[var] = astack(s);
- } else {
- if (ra->stktop+1 >= ra->maxstk) ra->maxstk = ra->stktop+1;
- s = ra->stktop++;
- }
- ra->allocs[var] = astack(s);
- return s;
+ rs = rsminus(rs, mctarg->rglob);
+ rs = rsand(rs, ra->m.free);
+ rs = rsminus(rs, excl);
+
+ assert(rs != 0 && "no more reg");
+ if (rsminus(rs, mctarg->rcallee) != 0)
+ reg = lowestsetbit(rsminus(rs, mctarg->rcallee));
+ else
+ reg = lowestsetbit(rs);
+ take(ra, reg, ref);
+ return reg;
}
static void
spill(struct rega *ra, int reg, struct block *blk, int curi) {
int var, s;
struct instr load;
+ struct alloc *alloc;
- if (!ra->regs[reg].bits) return;
- var = ra->regs[reg].i;
- assert(ra->regs[reg].bits == RTMP && *(ushort *)&ra->allocs[var] == *(ushort *)&areg(reg));
+ if (rstest(ra->m.free, reg)) return;
+ var = ra->m.regs[reg];
+ assert(!rstest(ra->m.blocked, reg));
+ assert((alloc = imap_get(&ra->m.allocs, var)) && !memcmp(alloc, &areg(reg), sizeof *alloc));
s = allocstk(ra, var);
DBG("-- spill %%%d %s -> s%d\n", var, mctarg->rnames[reg], s);
instrtab[var].reg = 0;
/* insert 'reg = load [slot]' */
- load = mkinstr(Oloads1 + 2*ilog2(cls2siz[insrescls(instrtab[var])]), insrescls(instrtab[var]), mkref(RICON, s*8));
+ load = mkinstr(Oloads1 + 2*ilog2(cls2siz[insrescls(instrtab[var])]),
+ insrescls(instrtab[var]), mkref(RICON, s*8));
load.reg = reg+1;
addstkslotref(insertinstr(blk, ++curi, load));
freereg(ra, reg);
@@ -178,34 +225,39 @@ forcetake(struct rega *ra, int reg, union ref ref, struct block *blk, int curi)
int var;
struct alloc *alloc;
- if (ra->regs[reg].bits == ref.bits) return;
- if (!ra->regs[reg].bits) {
+ if (ref.t == RREG && rstest(ra->m.blocked, ref.i)) {
+ assert(!rstest(ra->m.free, reg));
+ return;
+ }
+ if (ref.t == RTMP && !rstest(ra->m.free, reg) && ra->m.regs[reg] == ref.i) return;
+ if (rstest(ra->m.free, reg)) {
take(ra, reg, ref);
return;
}
- assert(ra->regs[reg].bits == RTMP);
- var = ra->regs[reg].i;
- alloc = &ra->allocs[var];
- assert(alloc->a == reg);
+ assert(!rstest(ra->m.blocked, ref.i));
+ var = ra->m.regs[reg];
+ alloc = imap_get(&ra->m.allocs, var);
+ assert(alloc && alloc->a == reg);
*alloc = afree();
/* try to move temp to another register */
- if (isgpr(reg) ? ra->nfreegpr > 0 : ra->nfreefpr > 0) {
+ if (rsand(ra->m.free, isgpr(reg) ? gpregset : fpregset) != 0) {
/* the register of the current instruction (if any) was already free'd (by def), so
* we need to explictly exclude it from the pool of rename registers
* e.g.: given 'R0 = copy R1'; if R1 => %x, we need to prevent renaming %x => R0
*/
- uvlong excl = instrtab[blk->ins.p[curi]].reg;
- int rename = allocreg(ra, isgpr(reg) ? KI4 : KF4, ra->regs[reg], excl ? 1ull<<(excl-1) : 0);
+ regset excl = instrtab[blk->ins.p[curi]].reg;
+ int rename = allocreg(ra, isgpr(reg) ? KI4 : KF4, mkref(RTMP, ra->m.regs[reg]),
+ excl ? rsset(0, excl-1) : 0);
if (ccopt.dbg.r)DBG("-- rename %%%d %s -> %s\n", var, mctarg->rnames[reg], mctarg->rnames[rename]);
/* introduce move from rename -> original (since we allocate backwards) */
insertinstr(blk, ++curi, mkmove(insrescls(instrtab[var]), reg, rename));
- instrtab[var].reg = rename+1;
- ra->regs[rename] = mkref(RTMP, var);
- bsset(globusage, rename);
+ ra->m.regs[rename] = var;
+ globusage = rsset(globusage, rename);
*alloc = areg(rename);
- ra->regs[reg].bits = 0;
+ ra->m.free = rsset(ra->m.free, reg);
} else {
spill(ra, reg, blk, curi);
+ ra->m.free = rsset(ra->m.free, reg);
}
take(ra, reg, ref);
}
@@ -216,6 +268,8 @@ use(struct rega *ra, struct block *blk, int curi, enum op op, int hint, union re
{
struct instr *ins;
uvlong excl = other.t == RREG ? 1ull<<other.i : 0;
+ struct alloc *alloc;
+ int reg;
if (ref->t == RADDR) {
struct addr addr = addrht[ref->i];
@@ -224,167 +278,475 @@ use(struct rega *ra, struct block *blk, int curi, enum op op, int hint, union re
*ref = mkaddr(addr);
return;
} else if (ref->t == RREG) {
- assert(!(excl >> hint & 1));
+ assert(hint<0 || !rstest(excl, hint));
forcetake(ra, ref->i, *ref, blk, curi);
}
if (ref->t != RTMP) return;
ins = &instrtab[ref->i];
if (!ins->cls) return;
- if (!ins->reg) {
+ if (!(alloc = imap_get(&ra->m.allocs, ref->i)) || alloc->t != AREG) {
int s = -1;
- if (ra->allocs[ref->i].t == ASTACK)
- s = ra->allocs[ref->i].a;
+ if (alloc && alloc->t == ASTACK) {
+ /* ensure spill isn't overwritten by dest
+ * e.g. in R0 = add %s, 7 => can't spill %s to R0 */
+ struct instr *use = &instrtab[blk->ins.p[curi]];
+ s = alloc->a;
+ if (use->reg) excl = rsset(excl, use->reg-1);
+ }
assert(ins->op != Ocall);
if (ins->r.t == RREG && ins->inplace) excl |= 1ull<<ins->r.i;
- if ((hint == -1 || ra->regs[hint].bits) && ins->op == Ocopy && ins->l.t == RREG)
+ if ((hint == -1 || !rstest(ra->m.free, hint)) && ins->op == Ocopy && ins->l.t == RREG)
/* for '%x = copy Rx', hint %x to use Rx */
hint = ins->l.i;
- if (hint != -1 && !(excl >> hint & 1) && !ra->regs[hint].bits) {
+
+ if (hint != -1 && !rstest(excl, hint) && rstest(ra->m.free, hint)) {
take(ra, hint, *ref);
- ins->reg = hint + 1;
+ reg = hint;
} else {
- ins->reg = allocreg(ra, insrescls(*ins), *ref, excl) + 1;
+ reg = allocreg(ra, insrescls(*ins), *ref, excl);
}
+
if (s >= 0) {
/* unspill, insert 'store [slot], reg' */
- DBG("-- unspill %%%d s%d -> %s\n", ref->i, s, mctarg->rnames[ins->reg-1]);
+ DBG("-- unspill %%%d s%d -> %s\n", ref->i, s, mctarg->rnames[reg]);
struct instr store = mkinstr(Ostore1 + ilog2(cls2siz[insrescls(*ins)]), 0,
mkref(RICON, s*8),
- mkref(RREG, ins->reg-1));
+ mkref(RREG, reg));
addstkslotref(insertinstr(blk, ++curi, store));
- vpush(&ra->freestk, s);
+ freestk(ra, s);
}
+ } else {
+ reg = alloc->a;
}
- /* do not patch ref if it's cond branch arg, emit() wants to know what instr it is */
+ /* do not patch ref if it's used in a phi
+ * or if it's cond branch arg, emit() wants to know what instr it is */
+ if (op != Ophi)
if (ref != blk->jmp.arg || blk->jmp.t != Jb)
- *ref = mkref(RREG, ins->reg-1);
+ *ref = mkref(RREG, reg);
+}
+
+static void
+doins(struct rega *ra, struct block *blk, struct instr *ins, int curi)
+{
+ int hint0 = -1, hint1 = -1;
+ if (ins->op != Ocopy && !imap_get(&ra->m.allocs, ins - instrtab) && ins->skip) { /* unused */
+ *ins = mkinstr(Onop, 0,);
+ return;
+ }
+ def(ra, ins, blk, curi);
+ if (ins->op != Ocall) {
+ if (ins->op == Ocopy || ins->inplace) hint0 = ins->reg - 1;
+ if (ins->op == Omove) {
+ if (ins->l.t == RREG) hint1 = ins->l.i;
+ /* MOV Rx,Rx is used by isel to indicate a clobber,
+ * so it should be a def point for Rx but not a use point */
+ if (ins->r.bits != ins->l.bits)
+ use(ra, blk, curi, ins->op, hint1, &ins->r, NOREF);
+ } else {
+ if (ins->l.bits) use(ra, blk, curi, ins->op, hint0, &ins->l, ins->r);
+ if (ins->r.bits) use(ra, blk, curi, ins->op, hint1, &ins->r, NOREF);
+ }
+ } else {
+ struct call *call = &calltab.p[ins->r.i];
+ regset rspill = rsminus(rsunion(gpregset, fpregset), rsunion(mctarg->rglob, mctarg->rcallee));
+
+ dumpallocs("CALL", &ra->m);
+ if (call->abiret[0].ty.bits) rspill = rsclr(rspill, call->abiret[0].reg);
+ if (call->abiret[1].ty.bits) rspill = rsclr(rspill, call->abiret[1].reg);
+ for (int r = 0; rsiter(&r, rspill); ++r) {
+ spill(ra, r, blk, curi);
+ }
+ for (int j = 0; j < call->narg; ++j) {
+ short reg = call->abiarg[j].reg;
+ if (reg >= 0) {
+ forcetake(ra, reg, mkref(RREG, reg), blk, curi);
+ }
+ }
+
+ use(ra, blk, curi, ins->op, hint0, &ins->l, NOREF);
+ }
+}
+
+enum pmstat { PMTOMOVE, PMMOVING, PMDONE };
+static struct pmove {
+ uchar k;
+ uchar stat;
+ struct alloc dst, src;
+} pmove[MAXREGS];
+static int npmove;
+
+static void
+pmadd(enum irclass k, struct alloc dst, struct alloc src)
+{
+ assert(npmove < MAXREGS);
+ pmove[npmove++] = (struct pmove) { k, PMTOMOVE, dst, src };
+}
+
+static void
+emitmove(enum irclass k, struct alloc dst, struct alloc src, struct block *blk, int curi)
+{
+ struct instr mv;
+ if (dst.t == AREG && src.t == AREG) {
+ insertinstr(blk, curi, mkmove(k, dst.a, src.a));
+ } else if (dst.t == ASTACK && src.t == AREG) {
+ mv = mkinstr(Ostore1+ilog2(cls2siz[k]), 0, mkref(RICON, dst.a), mkref(RREG, src.a));
+ addstkslotref(insertinstr(blk, curi, mv));
+ } else if (dst.t == AREG && src.t == ASTACK) {
+ switch (mv.cls = k) {
+ default: assert(0);
+ case KI4: mv.op = Oloads4; break;
+ case KI8: mv.op = Oloadi8; break;
+ case KPTR: mv.op = targ_64bit ? Oloadi8 : Oloads4; break;
+ case KF4: mv.op = Oloadf4; break;
+ case KF8: mv.op = Oloadf8; break;
+ }
+ mv.l = mkref(RICON, dst.a);
+ mv.reg = src.a;
+ addstkslotref(insertinstr(blk, curi, mv));
+ }
+}
+
+static int
+pmrec(int i, struct block *blk, int curi, enum irclass *k)
+{
+ int j, c;
+
+ if (!memcmp(&pmove[i].dst, &pmove[i].src, sizeof pmove->dst)) {
+ pmove[i].stat = PMDONE;
+ return -1;
+ }
+
+ /* widen when necessary */
+ assert(kisint(pmove[i].k) == kisint(*k));
+ if (cls2siz[pmove[i].k] > cls2siz[*k])
+ *k = pmove[i].k;
+
+ for (j = 0; j < npmove; ++j)
+ if (!memcmp(&pmove[j].dst, &pmove[i].src, sizeof pmove->dst)) {
+ break;
+ }
+ if (j == npmove) goto Done;
+ switch (pmove[j].stat) {
+ default: assert(0);
+ case PMMOVING:
+ c = j;
+ Swap:
+ assert(pmove[i].src.t == AREG && pmove[i].dst.t == AREG);
+ insertinstr(blk, curi,
+ mkinstr(Oswap, *k, mkref(RREG, pmove[i].dst.a), mkref(RREG, pmove[i].src.a)));
+ break;
+ case PMTOMOVE:
+ pmove[i].stat = PMMOVING;
+ c = pmrec(j, blk, curi, k);
+ if (c == i) {
+ c = -1;
+ break;
+ } else if (c != -1) {
+ goto Swap;
+ }
+ /* fallthru */
+ case PMDONE:
+ Done:
+ c = -1;
+ emitmove(*k, pmove[i].dst, pmove[i].src, blk, curi);
+ break;
+ }
+
+ pmove[i].stat = PMDONE;
+ return c;
+}
+
+static void
+emitpm(struct block *blk)
+{
+ int curi = blk->ins.n;
+ for (int i = 0; i < npmove; ++i)
+ if (pmove[i].stat == PMTOMOVE) {
+ pmrec(i, blk, curi, &(enum irclass) { pmove[i].k });
+ }
+}
+
+struct bbrm { struct rmap in, out; };
+
+static void
+lowerphis(struct function *fn, struct bbrm *bbrm, struct block *blk, struct block *suc)
+{
+ int predno;
+ struct alloc *alloc;
+ struct rmap *out = &bbrm[blk->id].out;
+ struct block *n = NULL;
+
+ if (!blk->s2) n = blk;
+
+ for (predno = 0; predno < suc->npred; ++predno)
+ if (blkpred(suc, predno) == blk)
+ break;
+ assert(predno < suc->npred);
+
+ npmove = 0;
+ /* ensure phi args go to the same reg as phi with parallel copies */
+ for (int i = 0; i < suc->phi.n; ++i) {
+ struct instr *phi = &instrtab[suc->phi.p[i]];
+ union ref *arg = &phitab.p[phi->l.i][predno];
+ int regto, regfrom;
+
+ if (arg->t == RREG) continue;
+ assert(arg->t == RTMP);
+ DBG("resolve phi @%d, @%d, %%%d <- %%%d\n", blk->id, suc->id, phi - instrtab, arg->i);
+ if (instrtab[arg->i].reg) {
+ regfrom = instrtab[arg->i].reg - 1;
+ DBG(" it had R%d\n", regfrom);
+ } else {
+ alloc = imap_get(&out->allocs, arg->i);
+ assert(alloc && alloc->t == AREG);
+ regfrom = alloc->a;
+ DBG(" found R%d\n", regfrom);
+ instrtab[arg->i].reg = regfrom+1;
+ }
+ if (phi->reg) {
+ regto = phi->reg - 1;
+ } else {
+ alloc = imap_get(&out->allocs, phi - instrtab);
+ assert(alloc && alloc->t == AREG);
+ regto = alloc->a;
+ phi->reg = regto+1;
+ }
+ DBG(" > phi move %c%d -> %c%d\n", " RS"[AREG], regfrom, " RS"[AREG], regto);
+ if (!n) n = insertblk(fn, blk, suc);
+ pmadd(phi->cls, areg(regto), areg(regfrom));
+ }
+ if (n) emitpm(n);
+}
+
+static void
+resolve(struct function *fn, struct bbrm *bbrm, struct block *blk, struct block *suc)
+{
+ ushort var;
+ struct alloc *alloc, *alloc2;
+ struct rmap *out = &bbrm[blk->id].out, *in = &bbrm[suc->id].in;
+ struct rmap *in2 = &bbrm[blk->id].in;
+ struct block *n = NULL;
+
+ DBG("resolve(@%d, @%d)\n",blk->id, suc->id);
+
+ npmove = 0;
+ if (!blk->s2) n = blk;
+ imap_each(&in->allocs, var, alloc) {
+ if (alloc->t == ADEAD) continue;
+ if ((alloc2 = imap_get(&out->allocs, var)) && alloc2->t != ADEAD
+ && memcmp(alloc2, alloc, sizeof *alloc) != 0) {
+ DBG("resolve @%d, @%d, %%%d\n", blk->id, suc->id, var);
+ DBG(" > move %c%d -> %c%d\n", " RS"[alloc2->t], alloc2->a, " RS"[alloc->t], alloc->a);
+ if (!n) {
+ n = insertblk(fn, blk, suc);
+ n->id = blk->id; /* use same bbrm */
+ }
+ pmadd(insrescls(instrtab[var]), *alloc, *alloc2);
+ }
+ (void)imap_set(&in2->allocs, var, *alloc);
+ if (!instrtab[var].reg)
+ instrtab[var].reg = alloc->a + 1;
+ }
+ if (n) emitpm(n);
+
+ dumpallocs("in", in);
+ dumpallocs("out", out);
}
void
regalloc(struct function *fn)
{
- extern int ninstr;
- struct instr *ins;
+ int id;
struct block *last = fn->entry->lprev, *blk;
static union ref *stkslotrefsbuf[64];
- struct rega ra = {0};
-
- fn->isleaf = 1;
-
- vinit(&stkslotrefs, stkslotrefsbuf, arraylength(stkslotrefsbuf));
- ra.allocs = xcalloc((ninstr*2 < MAXINSTR ? ninstr*2 : MAXINSTR) * sizeof(struct alloc));
- ra.nfreegpr = mctarg->ngpr - popcnt(mctarg->rglob->u);
- ra.nfreefpr = mctarg->nfpr;
- for (int i = 0; i < MAXREGS; ++i) {
- if (in_range(i, mctarg->fpr0, mctarg->fpr0 + mctarg->nfpr - 1))
- bsset(floatregs, i);
- if (bstest(mctarg->rglob, i))
- ra.regs[i] = mkref(RREG, i);
+ struct bbrm *bbrm;
+ int nbbrm;
+ struct rega ra = {fn};
+
+ /* setup */
+ if (!fpregset || !gpregset) {
+ for (int i = 0; i < MAXREGS; ++i) {
+ if (in_range(i, mctarg->fpr0, mctarg->fpr0 + mctarg->nfpr - 1))
+ fpregset = rsset(fpregset, i);
+ else if (in_range(i, mctarg->gpr0, mctarg->gpr0 + mctarg->ngpr - 1))
+ gpregset = rsset(gpregset, i);
+ }
}
- bszero(globusage, 1);
+ ra.m.blocked = mctarg->rglob;
+ ra.m.free = rsminus(rsunion(gpregset, fpregset), ra.m.blocked);
+ memset(ra.m.freestk, 0xFF, sizeof ra.m.freestk);
+ globusage = 0;
fn->stksiz = alignup(fn->stksiz, 8);
+ fn->isleaf = 1;
+ vinit(&stkslotrefs, stkslotrefsbuf, arraylength(stkslotrefsbuf));
- /* a dumb linear register allocator that visits instructions physically backwards
- * starting at the end of the function, when encountering a use of a new
- * temporary, it allocates a register for it. when encountering the definition
- * of a temporary, it frees up its register
- */
+ /* generate copies for phi operands */
+ blk = fn->entry;
+ do {
+ if (!blk->phi.n) continue;
+ for (int p = 0; p < blk->npred; ++p) {
+ struct block *n, *pred = blkpred(blk, p);
+ if (!pred->s2) {
+ /* pred only has 1 successor (blk), so insert move directly in it */
+ n = pred;
+ } else {
+ n = insertblk(fn, pred, blk);
+ assert(n->jmp.t == Jb && n->s1 == blk);
+ }
+ for (int i = 0; i < blk->phi.n; ++i) {
+ int phi = blk->phi.p[i];
+ union ref *args = phitab.p[instrtab[phi].l.i];
+ args[p] = insertinstr(n, n->ins.n, mkinstr(Ocopy, instrtab[phi].cls, args[p]));
+ }
+ }
+ } while ((blk = blk->lnext) != fn->entry);
+ fillblkids(fn);
+ bbrm = xcalloc(sizeof *bbrm * (nbbrm = fn->nblk));
+
+ /* visit blocks in reverse, allocating registers in a greedy manner */
blk = last;
do {
+ int curi;
+ DBG("do @%d\n", blk->id);
+ memcpy(bbrm[blk->id].out.regs, ra.m.regs, sizeof ra.m.regs);
+ imap_copy(&bbrm[blk->id].out.allocs, &ra.m.allocs);
+ dumpallocs("out", &ra.m);
+
+ if (blk->s1 && blk->s1->phi.n) {
+ /* if the block is a predecessor to some phis, introduce use points
+ * for the corresponding arguments to each phi */
+ struct block *s = blk->s1;
+ int predno = 0;
+ for (; predno < s->npred; ++predno)
+ if (blkpred(s, predno) == blk)
+ break;
+ assert(predno < s->npred);
+ for (int i = s->phi.n - 1; i >= 0; --i) {
+ struct instr *phi = &instrtab[s->phi.p[i]];
+ union ref *arg = &phitab.p[phi->l.i][predno];;
+ use(&ra, blk, blk->ins.n, Ophi, phi->reg-1, arg, NOREF);
+ }
+ }
+
+ curi = blk->ins.n - 1;
for (int i = 0; i < 2; ++i) {
if (!blk->jmp.arg[i].bits) break;
/* do not allocate a reg for a cmp op used a branch argument, since it's a pseudo op */
if (blk->jmp.t == Jb && blk->jmp.arg[i].t == RTMP && oiscmp(instrtab[blk->jmp.arg[i].i].op))
break;
- use(&ra, blk, blk->ins.n-1, (blk->jmp.t != Jb) - 1,
+ use(&ra, blk, curi, (blk->jmp.t != Jb) - 1,
blk->jmp.t == Jret ? fn->abiret[i].reg : -1,
&blk->jmp.arg[i], blk->jmp.arg[!i]);
}
- for (int i = blk->ins.n - 1; i >= 0; --i) {
- int hint0 = -1, hint1 = -1;
- ins = &instrtab[blk->ins.p[i]];
- if (ins->op != Ocopy && !ra.allocs[ins - instrtab].t && ins->skip) { /* unused */
- *ins = mkinstr(Onop, 0,);
- continue;
- }
- def(&ra, ins, blk, i);
- if (ins->op != Ocall) {
- if (ins->op == Ocopy || ins->inplace) hint0 = ins->reg - 1;
- if (ins->op == Omove) {
- if (ins->l.t == RREG) hint1 = ins->l.i;
- /* MOV Rx,Rx is used by isel to indicate a clobber,
- * so it should be a def point for Rx but not a use point */
- if (ins->r.bits != ins->l.bits)
- use(&ra, blk, i, ins->op, hint1, &ins->r, NOREF);
- } else {
- if (ins->r.t == RREG) {
- use(&ra, blk, i, ins->op, hint0, &ins->r, NOREF);
- use(&ra, blk, i, ins->op, hint0, &ins->l, ins->r);
- } else {
- if (ins->l.bits) use(&ra, blk, i, ins->op, hint0, &ins->l, ins->r);
- if (ins->r.bits) use(&ra, blk, i, ins->op, hint1, &ins->r, NOREF);
- }
- }
- } else {
- struct call *call = &calltab.p[ins->r.i];
- struct bitset rspill[1] = {0};
-
- fn->isleaf = 0;
-
- for (int r = mctarg->gpr0; r < mctarg->gpr0 + mctarg->ngpr; ++r)
- if (!bstest(mctarg->rglob, r) && !bstest(mctarg->rcallee, r))
- bsset(rspill, r);
- for (int r = mctarg->fpr0; r < mctarg->fpr0 + mctarg->nfpr; ++r)
- if (!bstest(mctarg->rglob, r) && !bstest(mctarg->rcallee, r))
- bsset(rspill, r);
-
- if (call->abiret[0].ty.bits) bsclr(rspill, call->abiret[0].reg);
- if (call->abiret[1].ty.bits) bsclr(rspill, call->abiret[1].reg);
- for (uint r = 0; bsiter(&r, rspill, 1); ++r) {
- spill(&ra, r, blk, i);
- }
- for (int j = 0; j < call->narg; ++j) {
- short reg = call->abiarg[j].reg;
- if (reg >= 0) {
- forcetake(&ra, reg, mkref(RREG, reg), blk, i);
- }
- }
-
- use(&ra, blk, i, ins->op, hint0, &ins->l, NOREF);
- }
- if (ins->op == Ocopy && !ins->reg)
- *ins = mkinstr(Onop, 0,);
- if (ins->inplace && ins->reg) {
- assert(ins->l.t == RREG);
- if (ins->reg-1 != ins->l.i) {
- /* an in-place operation where the destination does not
- * match the first operand, so we need to add a move */
- insertinstr(blk, i, mkmove(insrescls(*ins), ins->reg-1, ins->l.i));
- ins->l.i = ins->reg-1;
- }
- }
+ for (; curi >= 0; --curi) {
+ struct instr *ins = &instrtab[blk->ins.p[curi]];
+ if (ins->op == Ocall) fn->isleaf = 0;
+ doins(&ra, blk, ins, curi);
}
+
for (int i = blk->phi.n - 1; i >= 0; --i) {
- union ref *phi;
- ins = &instrtab[blk->phi.p[i]];
- assert(ins->op == Ophi);
- phi = phitab.p[ins->l.i];
- if (ins->reg) {
- /* introduce necessary moves in each pred,
- * XXX this doesn't work for backwards branches */
- for (int i = 0; i < blk->npred; ++i) {
- struct instr mov = mkinstr(Omove, insrescls(*ins), mkref(RREG, ins->reg-1), phi[i]);
- insertinstr(blkpred(blk, i), blkpred(blk, i)->ins.n, mov);
- }
- }
- def(&ra, ins, NULL, 0);
+ struct instr *phi = &instrtab[blk->phi.p[i]];
+ def(&ra, phi, blk, -1);
+ }
+
+ memcpy(bbrm[blk->id].in.regs, ra.m.regs, sizeof ra.m.regs);
+ imap_copy(&bbrm[blk->id].in.allocs, &ra.m.allocs);;
+ } while ((blk = blk->lprev) != last);
+
+ if (1&&ccopt.dbg.r) {
+ efmt("<< regalloc before resolve\n");
+ irdump(fn);
+ }
+
+ /* resolve allocation mismatches across each edge */
+ blk = last = fn->entry->lprev;
+ do {
+ if (blk->id < 0) continue;
+ if (blk->s1) resolve(fn, bbrm, blk, blk->s1);
+ if (blk->s2) resolve(fn, bbrm, blk, blk->s2);
+ } while ((blk = blk->lprev) != last);
+
+ /* parallel copies for each phi */
+ blk = last = fn->entry->lprev;
+ do {
+ if (blk->id < 0) continue;
+ for (int i = 0; i < blk->npred; ++i) {
+ if (blkpred(blk, i)->id < 0) continue;
+ lowerphis(fn, bbrm, blkpred(blk, i), blk);
}
} while ((blk = blk->lprev) != last);
- do vfree(&blk->phi); while ((blk = blk->lprev) != last);
+ /* final cleanup */
+ id = 0;
+ blk = fn->entry;
+ do {
+ bool allnops = 1;
+ blk->id = id++;
+ for (int i = 0; i < blk->ins.n; ++i) {
+ struct instr *ins = &instrtab[blk->ins.p[i]];
+ if (!ins->reg && insrescls(*ins) && ins->op != Omove && !oiscmp(ins->op)) {
+ /* dead */
+ Nop:
+ *ins = mkinstr(Onop,0,);
+ } else if (ins->op == Omove && ins->r.t == RREG && ins->l.i == ins->r.i) {
+ goto Nop;
+ } else if (ins->op == Ocopy && ins->l.t == RREG && ins->reg-1 == ins->l.i) {
+ goto Nop;
+ } else if (ins->inplace && ins->l.t == RREG && ins->reg-1 != ins->l.i) {
+ /* fixup in-place (two-address) instructions */
+ allnops = 0;
+ insertinstr(blk, i++, mkmove(ins->cls, ins->reg-1, ins->l.i));
+ ins->l.i = ins->reg-1;
+ } else if (ins->op != Onop) allnops = 0;
+ }
+
+ /* remove no-op blocks with only 1 pred + 1 succ OR 1 pred w/ 1 succ + 0 succ*/
+ if (allnops && blk->npred == 1 && !blk->s2) {
+ struct block *p = blkpred(blk, 0);
+ if (!p->s2 && !blk->s1) {
+ /* simplify:
+ *
+ * @p:
+ * ...
+ * b @blk
+ * @blk:
+ * NOP
+ * ret
+ */
+ assert(p->s1 == blk);
+ p->jmp.t = Jret;
+ p->s1 = NULL;
+ DelBlk:
+ vfree(&blk->phi);
+ vfree(&blk->ins);
+ blk->lnext->lprev = blk->lprev;
+ blk->lprev->lnext = blk->lnext;
+ --id;
+ } else if (blk->s1) {
+ /* simplify:
+ *
+ * @p:
+ * ...
+ * b %x, @blk, @other
+ * @blk:
+ * NOP
+ * b @next
+ */
+ struct block *next = blk->s1;
+ if (p->s1 == blk) p->s1 = next;
+ else if (p->s2 == blk) p->s2 = next;
+ else continue;
+ goto DelBlk;
+ }
+ }
+ } while ((blk = blk->lnext) != fn->entry);
+ fn->nblk = id;
+
+ blk = fn->entry;
+ do vfree(&blk->phi); while ((blk = blk->lnext) != fn->entry);
fn->stksiz += ra.maxstk*8;
if (fn->stksiz > 1<<24) error(NULL, "'%s' stack frame too big", fn->name);
@@ -396,9 +758,13 @@ regalloc(struct function *fn)
efmt("<< After regalloc >>\n");
irdump(fn);
}
- bscopy(fn->regusage, globusage, 1);
- free(ra.allocs);
- vfree(&ra.freestk);
+ for (int i = 0; i < nbbrm; ++i) {
+ imap_free(&bbrm[i].in.allocs);
+ imap_free(&bbrm[i].out.allocs);
+ }
+ imap_free(&ra.m.allocs);
+ free(bbrm);
+ fn->regusage = globusage;
}
/* vim:set ts=3 sw=3 expandtab: */
diff --git a/ssa.c b/ssa.c
index 4ce8774..4a4ed4d 100644
--- a/ssa.c
+++ b/ssa.c
@@ -1,6 +1,29 @@
#include "common.h"
#include "ir.h"
+/* require use, keeps use */
+void
+copyopt(struct function *fn)
+{
+ struct block *blk = fn->entry;
+
+ do {
+ for (int i = 0; i < blk->ins.n; ++i) {
+ struct use *use, *uend;
+ union ref var = mkref(RTMP, blk->ins.p[i]);
+ struct instr *ins = &instrtab[var.i];
+
+ if (ins->op == Ocopy) {
+ assert(ins->l.t != RREG);
+
+ replcuses(var, ins->l);
+ *ins = mkinstr(Onop,0,);
+ deluses(var.i);
+ }
+ }
+ } while ((blk = blk->lnext) != fn->entry);
+}
+
void
filluses(struct function *fn)
{
diff --git a/test/fib.c b/test/fib.c
index 6254a5f..eadce03 100644
--- a/test/fib.c
+++ b/test/fib.c
@@ -1,6 +1,6 @@
unsigned fib(unsigned x) {
unsigned r = 0, q = 1;
- while (x--) {
+ while (x-- > 1) {
unsigned s = r + q;
r = q;
q = s;
@@ -8,12 +8,18 @@ unsigned fib(unsigned x) {
return q;
}
+unsigned fibr(unsigned x) {
+ if (x < 2) return x;
+ return fibr(x-1) + fibr(x-2);
+}
+
int atoi(const char *);
int printf(const char *, ...);
int main(int argc, char **argv) {
unsigned n = argv[1] ? atoi(argv[1]) : 10;
- printf("fib(%u) = %u\n", n, fib(n));
+ printf("fib(%u) = %u\n", n, fib(n));
+ printf("fibr(%u) = %u\n", n, fibr(n));
}
/* vim:set ts=3 sw=3 expandtab: */