aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorlemon <lsof@mailbox.org>2023-06-14 15:05:43 +0200
committerlemon <lsof@mailbox.org>2023-06-14 15:05:43 +0200
commit782d4e9df0363ca9f64d8b92a3d6952d552f13a5 (patch)
tree8c71e107a8c0b54f53c2c94d053ad4cc1b7a441d
parent8d8cf6584bf4081b54cd91fcaa42578cbd794440 (diff)
add spilling for function calls, misc fixes
-rw-r--r--abi0.c7
-rw-r--r--amd64/emit.c24
-rw-r--r--amd64/isel.c18
-rw-r--r--common.h6
-rw-r--r--regalloc.c162
-rw-r--r--test/test.c4
6 files changed, 180 insertions, 41 deletions
diff --git a/abi0.c b/abi0.c
index e523a7d..19a5dff 100644
--- a/abi0.c
+++ b/abi0.c
@@ -227,8 +227,13 @@ abi0_call(struct function *fn, struct instr *ins, struct block *blk, int *curi)
if (call->ret.isagg) {
replref(fn, blk, (*curi), mkref(RTMP, ins - instrtab), retmem);
if (!nret) { /* hidden pointer argument */
- if (call->abiret[0].reg >= 0)
+ if (call->abiret[0].reg >= 0) {
+ /* the result location pointer is also returned by the callee, like in x86 */
++nret;
+ insertinstr(blk, ++*curi, mkinstr(Ocopy, KPTR, mkref(RREG, call->abiret[0].reg)));
+ /* even if this is not used it the register copy
+ * must be emitted for the register allocator to know */
+ }
} else { /* aggregate returned in regs */
union ref r[2];
struct instr ins;
diff --git a/amd64/emit.c b/amd64/emit.c
index 4f1a448..499b928 100644
--- a/amd64/emit.c
+++ b/amd64/emit.c
@@ -49,8 +49,12 @@ addmemoper(struct oper *mem, struct oper add)
if (add.t == OIMM) {
mem->disp += add.imm;
} else if (add.t == OREG) {
- assert(mem->index == NOINDEX);
- mem->index = add.reg;
+ if (mem->base == NOBASE)
+ mem->base = add.reg;
+ else if (mem->index == NOINDEX)
+ mem->index = add.reg;
+ else
+ assert(0);
}
}
@@ -146,6 +150,7 @@ enum operpat {
P1, /* imm = 1 */
PI8,
PI32,
+ PU32,
PMEM,
PSYM,
};
@@ -168,6 +173,7 @@ struct desc {
uchar operenc; /* enum operenc */
uchar ext; /* ModR/M.reg opc extension */
bool r8 : 1; /* uses 8bit register */
+ bool norexw : 1; /* do not use REX.W even if size is 64 bits */
};
/* match operand against pattern */
@@ -183,6 +189,7 @@ opermatch(enum operpat pat, struct oper oper)
case P1: return oper.t == OIMM && oper.imm == 1;
case PI8: return oper.t == OIMM && (uint)(oper.imm+128) < 256;
case PI32: return oper.t == OIMM;
+ case PU32: return oper.t == OIMM && oper.imm >= 0;
case PMEM: return in_range(oper.t, OMEM, OCONR);
case PSYM: return oper.t == OCONR;
}
@@ -219,6 +226,7 @@ encode(uchar **pcode, const struct desc *tab, int ntab, uint siz, struct oper ds
if (*opc == 0x66 || *opc == 0xF2 || *opc == 0xF3)
B(*opc++), --nopc;
rex = -(en->ptd != PFPR && en->pts != PFPR) & (siz == 8) << 3; /* REX.W */
+ if (en->norexw) rex = 0;
switch (en->operenc) {
case EN_RR: /* mod = 11; reg = dst; rm = src */
rex |= (dst.reg >> 3) << 2; /* REX.R */
@@ -325,7 +333,9 @@ DEFINSTR2(Xmov,
{4|8, PGPR, PGPR, "\x8B", EN_RR}, /* MOV r32/64, r32/64 */
{4|8, PMEM, PGPR, "\x89", EN_MR}, /* MOV m32/64, r32/64 */
{4|8, PGPR, PMEM, "\x8B", EN_RM}, /* MOV r32/64, m32/64 */
- {4|8, PGPR, PI32, "\xB8", EN_OI}, /* MOV r32/64, imm */
+ {4 , PGPR, PI32, "\xB8", EN_OI}, /* MOV r32, imm */
+ { 8, PGPR, PU32, "\xB8", EN_OI, .norexw=1}, /* MOV r64, uimm */
+ { 8, PGPR, PI32, "\xC7", EN_RI32}, /* MOV r64, imm */
{4, PFPR, PFPR, "\xF3\x0F\x10", EN_RR}, /* MOVSS xmm, xmm */
{8, PFPR, PFPR, "\xF2\x0F\x10", EN_RR}, /* MOVSD xmm, xmm */
{4, PFPR, PMEM, "\xF3\x0F\x10", EN_RM}, /* MOVSS xmm, m32 */
@@ -511,10 +521,10 @@ emitinstr(uchar **pcode, struct function *fn, struct block *blk, int ii, struct
if (ins->reg-1 == dst.reg) { /* two-address add */
Xadd(pcode, ksiz, dst, mkimmdatregoper(ins->r));
} else { /* three-address add (lea) */
- struct oper mem = { OMEM, .index = NOINDEX };
+ struct oper mem = { OMEM, .base = NOBASE, .index = NOINDEX };
dst = reg2oper(ins->reg-1);
- if (isregref(ins->r)) mem.base = mkregoper(ins->r).reg;
- else mem.disp = mkimmoper(ins->r).imm;
+ addmemoper(&mem, ref2oper(ins->l));
+ addmemoper(&mem, ref2oper(ins->r));
Xlea(pcode, ksiz, dst, mem);
}
break;
@@ -632,6 +642,8 @@ emitbin(struct function *fn)
void
amd64_emit(struct function *fn)
{
+ fn->stksiz = alignup(fn->stksiz, 16);
+ if (fn->stksiz > 1<<24) error(NULL, "'%s' stack frame too big", fn->name);
emitbin(fn);
}
diff --git a/amd64/isel.c b/amd64/isel.c
index 6d4c87e..ff0968c 100644
--- a/amd64/isel.c
+++ b/amd64/isel.c
@@ -71,21 +71,21 @@ ascale(struct addr *addr, union ref a, union ref b)
}
static bool
-aadd(struct addr *addr, union ref r, bool rec)
+aadd(struct addr *addr, union ref r)
{
if (r.t == RTMP) {
struct instr *ins = &instrtab[r.i];
if (ins->op == Oadd) {
- if (!aadd(addr, ins->l, rec)) return 0;
- if (!aadd(addr, ins->r, rec)) return 0;
+ if (!aadd(addr, ins->l)) goto Ref;
+ if (!aadd(addr, ins->r)) goto Ref;
ins->skip = 1;
} else if (ins->op == Oshl) {
- if (!ascale(addr, ins->l, ins->r)) return 0;
+ if (!ascale(addr, ins->l, ins->r)) goto Ref;
ins->skip = 1;
- } else if (!rec && ins->op == Ocopy && ins->l.t == RMORE) {
+ } else if (ins->op == Ocopy && ins->l.t == RMORE) {
struct addr save = *addr, *addr2 = &addrht[ins->l.i];
- if ((!addr2->base.t || aadd(addr, addr2->base, 1))
+ if ((!addr2->base.t || aadd(addr, addr2->base))
&& acon(addr, mkintcon(KI4, addr2->disp))
&& (!addr2->index.t || ascale(addr, addr2->index, mkref(RICON, addr2->shift))))
{
@@ -94,6 +94,9 @@ aadd(struct addr *addr, union ref r, bool rec)
*addr = save;
goto Ref;
}
+ } else if (ins->op == Ocopy) {
+ if (!aadd(addr, ins->l)) goto Ref;
+ ins->skip = 1;
} else goto Ref;
} else if (iscon(r)) {
return acon(addr, r);
@@ -117,7 +120,7 @@ fuseaddr(struct function *fn, union ref *r)
if (r->t == RMORE) return 1;
if (r->t != RTMP) return 0;
- if (!aadd(&addr, *r, 0)) return 0;
+ if (!aadd(&addr, *r)) return 0;
*r = mkaddr(addr);
return 1;
@@ -272,7 +275,6 @@ amd64_isel(struct function *fn)
sel(fn, &instrtab[blk->ins.p[i]], blk, &i);
}
} while ((blk = blk->lnext) != fn->entry);
- fn->stksiz = alignup(fn->stksiz, 16);
if (ccopt.dbg.i) {
efmt("<< After isel >>\n");
diff --git a/common.h b/common.h
index 3628c89..a486426 100644
--- a/common.h
+++ b/common.h
@@ -62,7 +62,7 @@ popcnt(uvlong x) {
return __builtin_popcountll(x);
#else
uint n = 0;
- while (x) x >>= 1, ++n;
+ while (x) n += x&1, x >>= 1;
return n;
#endif
}
@@ -375,13 +375,13 @@ bstest(const struct bitset *bs, uint i)
static inline void
bsset(struct bitset *bs, uint i)
{
- bs[i / BSNBIT].u |= 1 << i % BSNBIT;
+ bs[i / BSNBIT].u |= 1ull << i % BSNBIT;
}
static inline void
bsclr(struct bitset *bs, uint i)
{
- bs[i / BSNBIT].u &= ~(1 << i % BSNBIT);
+ bs[i / BSNBIT].u &= ~(1ull << i % BSNBIT);
}
static inline void
diff --git a/regalloc.c b/regalloc.c
index b90940f..40963ea 100644
--- a/regalloc.c
+++ b/regalloc.c
@@ -16,30 +16,69 @@ struct alloc { ushort t : 2, a : 14; };
struct rega {
union ref regs[MAXREGS]; /* map reg -> value holding reg */
struct alloc *allocs; /* map tmpidx -> allocation */
+ vec_of(ushort) freestk; /* available stack slots */
int nfreegpr, nfreefpr;
+ int maxstk, stktop;
};
+static vec_of(union ref *) stkslotrefs;
+
+static void
+addstkslotref(union ref inst)
+{
+ vpush(&stkslotrefs, &instrtab[inst.i].l);
+}
+
+static int allocreg(struct rega *ra, enum irclass cls, union ref ref, int excl);
+
+#if 0
+#define DBG efmt
+#else
+#define DBG(...) ((void)0)
+#endif
+
static void
-def(struct rega *ra, struct instr *ins)
+def(struct rega *ra, struct instr *ins, struct block *blk, int curi)
{
int reg = -1, var;
struct alloc *alloc;
- if (ins->op != Omove) {
+ if (ins->op != Omove && ins->op != Ocall) {
var = ins - instrtab;
- // efmt("def %%%d\n",var);
+ DBG("def %%%d\n",var);
alloc = &ra->allocs[var];
if (alloc->t == ADEAD) {
return;
} else if (alloc->t == AREG) {
reg = alloc->a;
- // efmt("-- free %s for %%%d\n", mctarg->rnames[alloc->a], var);
+ DBG("-- free %s for %%%d\n", mctarg->rnames[alloc->a], var);
assert(ra->regs[reg].bits == mkref(RTMP, var).bits);
- } else assert(0);
+ } else if (alloc->t == ASTACK) {
+ /* unspill, insert 'store [slot], reg' */
+ int reg = allocreg(ra, ins->cls, mkref(RTMP, var), -1);
+ struct instr store = mkinstr(Ostore1 + ilog2(cls2siz[ins->cls]), 0,
+ mkref(RICON, alloc->a*8), mkref(RREG, reg));
+ DBG("-- unspill %%%d s%d -> %s\n", var, alloc->a, mctarg->rnames[ins->reg+1]);
+ addstkslotref(insertinstr(blk, ++curi, store));
+ vpush(&ra->freestk, alloc->a);
+ ins->reg = reg+1;
+ def(ra, ins, blk, curi); /* and free the reg again */
+ return;
+ }
*alloc = afree();
- } else {
+ } else if (ins->op == Omove) {
reg = ins->l.i;
assert(ins->l.t == RREG);
- // efmt("-- free %s\n", mctarg->rnames[ins->l.i]);
+ DBG("-- free %s\n", mctarg->rnames[ins->l.i]);
+ } else {
+ struct call *call = &calltab.p[ins->r.i];
+ for (int i = 0; i < 2; ++i) {
+ if (!call->abiret[0].ty.cls) break;
+ reg = call->abiret[0].reg;
+ ra->regs[reg] = NOREF;
+ if (isfpr(reg)) ++ra->nfreefpr;
+ else ++ra->nfreegpr;
+ return;
+ }
}
if (reg != -1) {
ra->regs[reg] = NOREF;
@@ -50,18 +89,19 @@ def(struct rega *ra, struct instr *ins)
static void
take(struct rega *ra, int reg, union ref ref) {
- // efmt("-- take %s for %d %d\n", mctarg->rnames[reg], ref.t, ref.i);
+ DBG("-- take %s for %c%d\n", mctarg->rnames[reg], "R%"[ref.t==RTMP], ref.i);
assert(!ra->regs[reg].t && "taken");
if (ref.t == RTMP)
ra->allocs[ref.i] = areg(reg);
ra->regs[reg] = ref;
+ assert(ra->regs[reg].bits);
bsset(globusage, reg);
if (isfpr(reg)) --ra->nfreefpr;
else --ra->nfreegpr;
}
static int
-nextreg(struct rega *ra, enum irclass cls, union ref ref, int excl)
+allocreg(struct rega *ra, enum irclass cls, union ref ref, int excl)
{
int r0, rend, reg;
@@ -83,6 +123,43 @@ nextreg(struct rega *ra, enum irclass cls, union ref ref, int excl)
assert(!"no more reg");
}
+static int
+allocstk(struct rega *ra, int var)
+{
+ int s;
+
+ if (ra->freestk.n) {
+ s = ra->freestk.p[--ra->freestk.n];
+ ra->allocs[var] = astack(s);
+ } else {
+ if (ra->stktop+1 >= ra->maxstk) ra->maxstk = ra->stktop+1;
+ s = ra->stktop++;
+ }
+ ra->allocs[var] = astack(s);
+ return s;
+}
+
+static void
+spill(struct rega *ra, int reg, struct block *blk, int curi) {
+ int var, s;
+ struct instr load;
+
+ if (!ra->regs[reg].t) return;
+ var = ra->regs[reg].i;
+ assert(ra->regs[reg].t == RTMP && *(ushort *)&ra->allocs[var] == *(ushort *)&areg(reg));
+ ra->regs[reg] = NOREF;
+ s = allocstk(ra, var);
+ DBG("-- spill %%%d %s -> s%d\n", var, mctarg->rnames[reg], s);
+ instrtab[var].reg = 0;
+ /* insert 'reg = load [slot]' */
+ load = mkinstr(Oloads1 + 2*ilog2(cls2siz[instrtab[var].cls]), instrtab[var].cls, mkref(RICON, s*8));
+ load.reg = reg+1;
+ addstkslotref(insertinstr(blk, ++curi, load));
+ if (isfpr(reg)) --ra->nfreefpr;
+ else --ra->nfreegpr;
+
+}
+
#define mkmove(k, rd, rs) mkinstr(Omove, k, mkref(RREG, rd), mkref(RREG, rs))
static void
@@ -107,8 +184,8 @@ forcetake(struct rega *ra, int reg, union ref ref, struct block *blk, int curi)
* e.g.: given 'R0 = copy R1'; if R1 => %x, we need to prevent renaming %x => R0
*/
int excl = instrtab[blk->ins.p[curi]].reg-1;
- int rename = nextreg(ra, isgpr(reg) ? KI4 : KF4, ra->regs[reg], excl);
- if (ccopt.dbg.r) efmt("-- rename %%%d %s -> %s\n", var, mctarg->rnames[reg], mctarg->rnames[rename]);
+ int rename = allocreg(ra, isgpr(reg) ? KI4 : KF4, ra->regs[reg], excl);
+ if (ccopt.dbg.r)DBG("-- rename %%%d %s -> %s\n", var, mctarg->rnames[reg], mctarg->rnames[rename]);
/* introduce move from rename -> original (since we allocate backwards) */
insertinstr(blk, ++curi, mkmove(instrtab[var].cls, reg, rename));
instrtab[var].reg = rename+1;
@@ -117,7 +194,7 @@ forcetake(struct rega *ra, int reg, union ref ref, struct block *blk, int curi)
*alloc = areg(rename);
ra->regs[reg].bits = 0;
} else {
- assert(!"spill");
+ spill(ra, reg, blk, curi);
}
take(ra, reg, ref);
}
@@ -136,6 +213,7 @@ use(struct rega *ra, struct block *blk, int curi, enum op op, int hint, union re
*ref = mkaddr(addr);
return;
} else if (ref->t == RREG) {
+ assert(ref->i != excl);
forcetake(ra, ref->i, *ref, blk, curi);
}
if (ref->t != RTMP) return;
@@ -144,11 +222,10 @@ use(struct rega *ra, struct block *blk, int curi, enum op op, int hint, union re
if (oisalloca(ins->op)) return;
if (!ins->cls) return;
if (!ins->reg) {
- if (op == -1) /* cond branch */
- if (oiscmp(ins->op) && ref->i == blk->ins.p[blk->ins.n-1])
- /* result of comparison instr is only used to conditionally branch,
- * doesn't usually need a reg (this should be handled by isel) */
- return;
+ int s = -1;
+ if (ra->allocs[ref->i].t == ASTACK)
+ s = ra->allocs[ref->i].a;
+
assert(ins->op != Ocall);
if (hint == -1 && ins->op == Ocopy && ins->l.t == RREG) /* for '%x = copy Rx', hint %x to use Rx */
hint = ins->l.i;
@@ -156,7 +233,16 @@ use(struct rega *ra, struct block *blk, int curi, enum op op, int hint, union re
take(ra, hint, *ref);
ins->reg = hint + 1;
} else {
- ins->reg = nextreg(ra, ins->cls, *ref, excl) + 1;
+ ins->reg = allocreg(ra, ins->cls, *ref, excl) + 1;
+ }
+ if (s >= 0) {
+ /* unspill, insert 'store [slot], reg' */
+ DBG("-- unspill %%%d s%d -> %s\n", ref->i, s, mctarg->rnames[ins->reg-1]);
+ struct instr store = mkinstr(Ostore1 + ilog2(cls2siz[ins->cls]), 0,
+ mkref(RICON, s*8),
+ mkref(RREG, ins->reg-1));
+ addstkslotref(insertinstr(blk, ++curi, store));
+ vpush(&ra->freestk, s);
}
}
*ref = mkref(RREG, ins->reg-1);
@@ -168,13 +254,18 @@ regalloc(struct function *fn)
extern int ninstr;
struct instr *ins;
struct block *last = fn->entry->lprev, *blk;
- struct rega ra = {.allocs = xcalloc(ninstr * sizeof(struct alloc))};
+ static union ref *stkslotrefsbuf[64];
+ struct rega ra = {0};
+
+ vinit(&stkslotrefs, stkslotrefsbuf, arraylength(stkslotrefsbuf));
+ ra.allocs = xcalloc(ninstr * sizeof(struct alloc));
ra.nfreegpr = mctarg->ngpr - popcnt(mctarg->rglob->u);
ra.nfreefpr = mctarg->nfpr;
for (int i = 0; i < MAXREGS; ++i)
if (in_range(i, mctarg->fpr0, mctarg->fpr0 + mctarg->nfpr - 1))
bsset(floatregs, i);
bszero(globusage, 1);
+ fn->stksiz = alignup(fn->stksiz, 8);
/* a dumb linear register allocator that visits instructions physically backwards
* starting at the end of the function, when encountering a use of a new
@@ -193,11 +284,11 @@ regalloc(struct function *fn)
for (int i = blk->ins.n - 1; i >= 0; --i) {
int hint0 = -1, hint1 = -1;
ins = &instrtab[blk->ins.p[i]];
- if (!ins->reg && ins->skip) { /* unused */
+ if (ins->op != Ocopy && !ra.allocs[ins - instrtab].t && ins->skip) { /* unused */
*ins = mkinstr(Onop, 0,);
continue;
}
- def(&ra, ins);
+ def(&ra, ins, blk, i);
if (ins->op != Ocall) {
if (ins->op == Ocopy) hint0 = ins->reg - 1;
if (ins->op == Omove) {
@@ -212,6 +303,27 @@ regalloc(struct function *fn)
}
} else {
struct call *call = &calltab.p[ins->r.i];
+ struct bitset rspill[1] = {0};
+
+ for (int r = mctarg->gpr0; r < mctarg->gpr0 + mctarg->ngpr; ++r)
+ if (!bstest(mctarg->rglob, r) && !bstest(mctarg->rcallee, r))
+ bsset(rspill, r);
+ for (int r = mctarg->fpr0; r < mctarg->fpr0 + mctarg->nfpr; ++r)
+ if (!bstest(mctarg->rglob, r) && !bstest(mctarg->rcallee, r))
+ bsset(rspill, r);
+
+ if (call->abiret[0].ty.bits) bsclr(rspill, call->abiret[0].reg);
+ if (call->abiret[1].ty.bits) bsclr(rspill, call->abiret[1].reg);
+ for (uint r = 0; bsiter(&r, rspill, 1); ++r) {
+ spill(&ra, r, blk, i);
+ }
+ for (int j = 0; j < call->narg; ++j) {
+ short reg = call->abiargregs[j];
+ if (reg >= 0) {
+ forcetake(&ra, reg, mkref(RREG, reg), blk, i);
+ }
+ }
+
use(&ra, blk, i, ins->op, hint0, &ins->l, NOREF);
}
if (ins->op == Ocopy && !ins->reg)
@@ -239,12 +351,18 @@ regalloc(struct function *fn)
insertinstr(phi->blk[i], phi->blk[i]->ins.n, mov);
}
}
- def(&ra, ins);
+ def(&ra, ins, NULL, 0);
}
} while ((blk = blk->lprev) != last);
do vfree(&blk->phi); while ((blk = blk->lprev) != last);
+ fn->stksiz += ra.maxstk*8;
+ if (fn->stksiz > 1<<24) error(NULL, "'%s' stack frame too big", fn->name);
+ while (stkslotrefs.n) {
+ union ref *adr = stkslotrefs.p[--stkslotrefs.n];
+ *adr = mkaddr((struct addr) { .base = mkref(RREG, mctarg->bpr), .disp = -fn->stksiz + adr->i });
+ }
if (ccopt.dbg.r) {
efmt("<< After regalloc >>\n");
irdump(fn);
diff --git a/test/test.c b/test/test.c
index e429104..c44d1a0 100644
--- a/test/test.c
+++ b/test/test.c
@@ -47,16 +47,18 @@ struct quad quad(long x, long y, long z, long w) {
void silly(struct pair *p, struct quad *q)
{
- *p = pair(1,2);
+ *p = pair(-1,2);
*q = quad(1,2,3,4);
}
+#if 0
int test2(struct big *b) {
struct big s = *b;
extern int h(int,float, struct big, float);
s.x[5] += 2;
return h(0, -.5f, s, 0);
}
+#endif
struct f2 { float f,g; };
struct f2 f2test(struct f2 *r) {