diff options
| -rw-r--r-- | abi0.c | 7 | ||||
| -rw-r--r-- | amd64/emit.c | 24 | ||||
| -rw-r--r-- | amd64/isel.c | 18 | ||||
| -rw-r--r-- | common.h | 6 | ||||
| -rw-r--r-- | regalloc.c | 162 | ||||
| -rw-r--r-- | test/test.c | 4 |
6 files changed, 180 insertions, 41 deletions
@@ -227,8 +227,13 @@ abi0_call(struct function *fn, struct instr *ins, struct block *blk, int *curi) if (call->ret.isagg) { replref(fn, blk, (*curi), mkref(RTMP, ins - instrtab), retmem); if (!nret) { /* hidden pointer argument */ - if (call->abiret[0].reg >= 0) + if (call->abiret[0].reg >= 0) { + /* the result location pointer is also returned by the callee, like in x86 */ ++nret; + insertinstr(blk, ++*curi, mkinstr(Ocopy, KPTR, mkref(RREG, call->abiret[0].reg))); + /* even if this is not used it the register copy + * must be emitted for the register allocator to know */ + } } else { /* aggregate returned in regs */ union ref r[2]; struct instr ins; diff --git a/amd64/emit.c b/amd64/emit.c index 4f1a448..499b928 100644 --- a/amd64/emit.c +++ b/amd64/emit.c @@ -49,8 +49,12 @@ addmemoper(struct oper *mem, struct oper add) if (add.t == OIMM) { mem->disp += add.imm; } else if (add.t == OREG) { - assert(mem->index == NOINDEX); - mem->index = add.reg; + if (mem->base == NOBASE) + mem->base = add.reg; + else if (mem->index == NOINDEX) + mem->index = add.reg; + else + assert(0); } } @@ -146,6 +150,7 @@ enum operpat { P1, /* imm = 1 */ PI8, PI32, + PU32, PMEM, PSYM, }; @@ -168,6 +173,7 @@ struct desc { uchar operenc; /* enum operenc */ uchar ext; /* ModR/M.reg opc extension */ bool r8 : 1; /* uses 8bit register */ + bool norexw : 1; /* do not use REX.W even if size is 64 bits */ }; /* match operand against pattern */ @@ -183,6 +189,7 @@ opermatch(enum operpat pat, struct oper oper) case P1: return oper.t == OIMM && oper.imm == 1; case PI8: return oper.t == OIMM && (uint)(oper.imm+128) < 256; case PI32: return oper.t == OIMM; + case PU32: return oper.t == OIMM && oper.imm >= 0; case PMEM: return in_range(oper.t, OMEM, OCONR); case PSYM: return oper.t == OCONR; } @@ -219,6 +226,7 @@ encode(uchar **pcode, const struct desc *tab, int ntab, uint siz, struct oper ds if (*opc == 0x66 || *opc == 0xF2 || *opc == 0xF3) B(*opc++), --nopc; rex = -(en->ptd != PFPR && en->pts != PFPR) & (siz == 8) << 3; /* REX.W */ + if (en->norexw) rex = 0; switch (en->operenc) { case EN_RR: /* mod = 11; reg = dst; rm = src */ rex |= (dst.reg >> 3) << 2; /* REX.R */ @@ -325,7 +333,9 @@ DEFINSTR2(Xmov, {4|8, PGPR, PGPR, "\x8B", EN_RR}, /* MOV r32/64, r32/64 */ {4|8, PMEM, PGPR, "\x89", EN_MR}, /* MOV m32/64, r32/64 */ {4|8, PGPR, PMEM, "\x8B", EN_RM}, /* MOV r32/64, m32/64 */ - {4|8, PGPR, PI32, "\xB8", EN_OI}, /* MOV r32/64, imm */ + {4 , PGPR, PI32, "\xB8", EN_OI}, /* MOV r32, imm */ + { 8, PGPR, PU32, "\xB8", EN_OI, .norexw=1}, /* MOV r64, uimm */ + { 8, PGPR, PI32, "\xC7", EN_RI32}, /* MOV r64, imm */ {4, PFPR, PFPR, "\xF3\x0F\x10", EN_RR}, /* MOVSS xmm, xmm */ {8, PFPR, PFPR, "\xF2\x0F\x10", EN_RR}, /* MOVSD xmm, xmm */ {4, PFPR, PMEM, "\xF3\x0F\x10", EN_RM}, /* MOVSS xmm, m32 */ @@ -511,10 +521,10 @@ emitinstr(uchar **pcode, struct function *fn, struct block *blk, int ii, struct if (ins->reg-1 == dst.reg) { /* two-address add */ Xadd(pcode, ksiz, dst, mkimmdatregoper(ins->r)); } else { /* three-address add (lea) */ - struct oper mem = { OMEM, .index = NOINDEX }; + struct oper mem = { OMEM, .base = NOBASE, .index = NOINDEX }; dst = reg2oper(ins->reg-1); - if (isregref(ins->r)) mem.base = mkregoper(ins->r).reg; - else mem.disp = mkimmoper(ins->r).imm; + addmemoper(&mem, ref2oper(ins->l)); + addmemoper(&mem, ref2oper(ins->r)); Xlea(pcode, ksiz, dst, mem); } break; @@ -632,6 +642,8 @@ emitbin(struct function *fn) void amd64_emit(struct function *fn) { + fn->stksiz = alignup(fn->stksiz, 16); + if (fn->stksiz > 1<<24) error(NULL, "'%s' stack frame too big", fn->name); emitbin(fn); } diff --git a/amd64/isel.c b/amd64/isel.c index 6d4c87e..ff0968c 100644 --- a/amd64/isel.c +++ b/amd64/isel.c @@ -71,21 +71,21 @@ ascale(struct addr *addr, union ref a, union ref b) } static bool -aadd(struct addr *addr, union ref r, bool rec) +aadd(struct addr *addr, union ref r) { if (r.t == RTMP) { struct instr *ins = &instrtab[r.i]; if (ins->op == Oadd) { - if (!aadd(addr, ins->l, rec)) return 0; - if (!aadd(addr, ins->r, rec)) return 0; + if (!aadd(addr, ins->l)) goto Ref; + if (!aadd(addr, ins->r)) goto Ref; ins->skip = 1; } else if (ins->op == Oshl) { - if (!ascale(addr, ins->l, ins->r)) return 0; + if (!ascale(addr, ins->l, ins->r)) goto Ref; ins->skip = 1; - } else if (!rec && ins->op == Ocopy && ins->l.t == RMORE) { + } else if (ins->op == Ocopy && ins->l.t == RMORE) { struct addr save = *addr, *addr2 = &addrht[ins->l.i]; - if ((!addr2->base.t || aadd(addr, addr2->base, 1)) + if ((!addr2->base.t || aadd(addr, addr2->base)) && acon(addr, mkintcon(KI4, addr2->disp)) && (!addr2->index.t || ascale(addr, addr2->index, mkref(RICON, addr2->shift)))) { @@ -94,6 +94,9 @@ aadd(struct addr *addr, union ref r, bool rec) *addr = save; goto Ref; } + } else if (ins->op == Ocopy) { + if (!aadd(addr, ins->l)) goto Ref; + ins->skip = 1; } else goto Ref; } else if (iscon(r)) { return acon(addr, r); @@ -117,7 +120,7 @@ fuseaddr(struct function *fn, union ref *r) if (r->t == RMORE) return 1; if (r->t != RTMP) return 0; - if (!aadd(&addr, *r, 0)) return 0; + if (!aadd(&addr, *r)) return 0; *r = mkaddr(addr); return 1; @@ -272,7 +275,6 @@ amd64_isel(struct function *fn) sel(fn, &instrtab[blk->ins.p[i]], blk, &i); } } while ((blk = blk->lnext) != fn->entry); - fn->stksiz = alignup(fn->stksiz, 16); if (ccopt.dbg.i) { efmt("<< After isel >>\n"); @@ -62,7 +62,7 @@ popcnt(uvlong x) { return __builtin_popcountll(x); #else uint n = 0; - while (x) x >>= 1, ++n; + while (x) n += x&1, x >>= 1; return n; #endif } @@ -375,13 +375,13 @@ bstest(const struct bitset *bs, uint i) static inline void bsset(struct bitset *bs, uint i) { - bs[i / BSNBIT].u |= 1 << i % BSNBIT; + bs[i / BSNBIT].u |= 1ull << i % BSNBIT; } static inline void bsclr(struct bitset *bs, uint i) { - bs[i / BSNBIT].u &= ~(1 << i % BSNBIT); + bs[i / BSNBIT].u &= ~(1ull << i % BSNBIT); } static inline void @@ -16,30 +16,69 @@ struct alloc { ushort t : 2, a : 14; }; struct rega { union ref regs[MAXREGS]; /* map reg -> value holding reg */ struct alloc *allocs; /* map tmpidx -> allocation */ + vec_of(ushort) freestk; /* available stack slots */ int nfreegpr, nfreefpr; + int maxstk, stktop; }; +static vec_of(union ref *) stkslotrefs; + +static void +addstkslotref(union ref inst) +{ + vpush(&stkslotrefs, &instrtab[inst.i].l); +} + +static int allocreg(struct rega *ra, enum irclass cls, union ref ref, int excl); + +#if 0 +#define DBG efmt +#else +#define DBG(...) ((void)0) +#endif + static void -def(struct rega *ra, struct instr *ins) +def(struct rega *ra, struct instr *ins, struct block *blk, int curi) { int reg = -1, var; struct alloc *alloc; - if (ins->op != Omove) { + if (ins->op != Omove && ins->op != Ocall) { var = ins - instrtab; - // efmt("def %%%d\n",var); + DBG("def %%%d\n",var); alloc = &ra->allocs[var]; if (alloc->t == ADEAD) { return; } else if (alloc->t == AREG) { reg = alloc->a; - // efmt("-- free %s for %%%d\n", mctarg->rnames[alloc->a], var); + DBG("-- free %s for %%%d\n", mctarg->rnames[alloc->a], var); assert(ra->regs[reg].bits == mkref(RTMP, var).bits); - } else assert(0); + } else if (alloc->t == ASTACK) { + /* unspill, insert 'store [slot], reg' */ + int reg = allocreg(ra, ins->cls, mkref(RTMP, var), -1); + struct instr store = mkinstr(Ostore1 + ilog2(cls2siz[ins->cls]), 0, + mkref(RICON, alloc->a*8), mkref(RREG, reg)); + DBG("-- unspill %%%d s%d -> %s\n", var, alloc->a, mctarg->rnames[ins->reg+1]); + addstkslotref(insertinstr(blk, ++curi, store)); + vpush(&ra->freestk, alloc->a); + ins->reg = reg+1; + def(ra, ins, blk, curi); /* and free the reg again */ + return; + } *alloc = afree(); - } else { + } else if (ins->op == Omove) { reg = ins->l.i; assert(ins->l.t == RREG); - // efmt("-- free %s\n", mctarg->rnames[ins->l.i]); + DBG("-- free %s\n", mctarg->rnames[ins->l.i]); + } else { + struct call *call = &calltab.p[ins->r.i]; + for (int i = 0; i < 2; ++i) { + if (!call->abiret[0].ty.cls) break; + reg = call->abiret[0].reg; + ra->regs[reg] = NOREF; + if (isfpr(reg)) ++ra->nfreefpr; + else ++ra->nfreegpr; + return; + } } if (reg != -1) { ra->regs[reg] = NOREF; @@ -50,18 +89,19 @@ def(struct rega *ra, struct instr *ins) static void take(struct rega *ra, int reg, union ref ref) { - // efmt("-- take %s for %d %d\n", mctarg->rnames[reg], ref.t, ref.i); + DBG("-- take %s for %c%d\n", mctarg->rnames[reg], "R%"[ref.t==RTMP], ref.i); assert(!ra->regs[reg].t && "taken"); if (ref.t == RTMP) ra->allocs[ref.i] = areg(reg); ra->regs[reg] = ref; + assert(ra->regs[reg].bits); bsset(globusage, reg); if (isfpr(reg)) --ra->nfreefpr; else --ra->nfreegpr; } static int -nextreg(struct rega *ra, enum irclass cls, union ref ref, int excl) +allocreg(struct rega *ra, enum irclass cls, union ref ref, int excl) { int r0, rend, reg; @@ -83,6 +123,43 @@ nextreg(struct rega *ra, enum irclass cls, union ref ref, int excl) assert(!"no more reg"); } +static int +allocstk(struct rega *ra, int var) +{ + int s; + + if (ra->freestk.n) { + s = ra->freestk.p[--ra->freestk.n]; + ra->allocs[var] = astack(s); + } else { + if (ra->stktop+1 >= ra->maxstk) ra->maxstk = ra->stktop+1; + s = ra->stktop++; + } + ra->allocs[var] = astack(s); + return s; +} + +static void +spill(struct rega *ra, int reg, struct block *blk, int curi) { + int var, s; + struct instr load; + + if (!ra->regs[reg].t) return; + var = ra->regs[reg].i; + assert(ra->regs[reg].t == RTMP && *(ushort *)&ra->allocs[var] == *(ushort *)&areg(reg)); + ra->regs[reg] = NOREF; + s = allocstk(ra, var); + DBG("-- spill %%%d %s -> s%d\n", var, mctarg->rnames[reg], s); + instrtab[var].reg = 0; + /* insert 'reg = load [slot]' */ + load = mkinstr(Oloads1 + 2*ilog2(cls2siz[instrtab[var].cls]), instrtab[var].cls, mkref(RICON, s*8)); + load.reg = reg+1; + addstkslotref(insertinstr(blk, ++curi, load)); + if (isfpr(reg)) --ra->nfreefpr; + else --ra->nfreegpr; + +} + #define mkmove(k, rd, rs) mkinstr(Omove, k, mkref(RREG, rd), mkref(RREG, rs)) static void @@ -107,8 +184,8 @@ forcetake(struct rega *ra, int reg, union ref ref, struct block *blk, int curi) * e.g.: given 'R0 = copy R1'; if R1 => %x, we need to prevent renaming %x => R0 */ int excl = instrtab[blk->ins.p[curi]].reg-1; - int rename = nextreg(ra, isgpr(reg) ? KI4 : KF4, ra->regs[reg], excl); - if (ccopt.dbg.r) efmt("-- rename %%%d %s -> %s\n", var, mctarg->rnames[reg], mctarg->rnames[rename]); + int rename = allocreg(ra, isgpr(reg) ? KI4 : KF4, ra->regs[reg], excl); + if (ccopt.dbg.r)DBG("-- rename %%%d %s -> %s\n", var, mctarg->rnames[reg], mctarg->rnames[rename]); /* introduce move from rename -> original (since we allocate backwards) */ insertinstr(blk, ++curi, mkmove(instrtab[var].cls, reg, rename)); instrtab[var].reg = rename+1; @@ -117,7 +194,7 @@ forcetake(struct rega *ra, int reg, union ref ref, struct block *blk, int curi) *alloc = areg(rename); ra->regs[reg].bits = 0; } else { - assert(!"spill"); + spill(ra, reg, blk, curi); } take(ra, reg, ref); } @@ -136,6 +213,7 @@ use(struct rega *ra, struct block *blk, int curi, enum op op, int hint, union re *ref = mkaddr(addr); return; } else if (ref->t == RREG) { + assert(ref->i != excl); forcetake(ra, ref->i, *ref, blk, curi); } if (ref->t != RTMP) return; @@ -144,11 +222,10 @@ use(struct rega *ra, struct block *blk, int curi, enum op op, int hint, union re if (oisalloca(ins->op)) return; if (!ins->cls) return; if (!ins->reg) { - if (op == -1) /* cond branch */ - if (oiscmp(ins->op) && ref->i == blk->ins.p[blk->ins.n-1]) - /* result of comparison instr is only used to conditionally branch, - * doesn't usually need a reg (this should be handled by isel) */ - return; + int s = -1; + if (ra->allocs[ref->i].t == ASTACK) + s = ra->allocs[ref->i].a; + assert(ins->op != Ocall); if (hint == -1 && ins->op == Ocopy && ins->l.t == RREG) /* for '%x = copy Rx', hint %x to use Rx */ hint = ins->l.i; @@ -156,7 +233,16 @@ use(struct rega *ra, struct block *blk, int curi, enum op op, int hint, union re take(ra, hint, *ref); ins->reg = hint + 1; } else { - ins->reg = nextreg(ra, ins->cls, *ref, excl) + 1; + ins->reg = allocreg(ra, ins->cls, *ref, excl) + 1; + } + if (s >= 0) { + /* unspill, insert 'store [slot], reg' */ + DBG("-- unspill %%%d s%d -> %s\n", ref->i, s, mctarg->rnames[ins->reg-1]); + struct instr store = mkinstr(Ostore1 + ilog2(cls2siz[ins->cls]), 0, + mkref(RICON, s*8), + mkref(RREG, ins->reg-1)); + addstkslotref(insertinstr(blk, ++curi, store)); + vpush(&ra->freestk, s); } } *ref = mkref(RREG, ins->reg-1); @@ -168,13 +254,18 @@ regalloc(struct function *fn) extern int ninstr; struct instr *ins; struct block *last = fn->entry->lprev, *blk; - struct rega ra = {.allocs = xcalloc(ninstr * sizeof(struct alloc))}; + static union ref *stkslotrefsbuf[64]; + struct rega ra = {0}; + + vinit(&stkslotrefs, stkslotrefsbuf, arraylength(stkslotrefsbuf)); + ra.allocs = xcalloc(ninstr * sizeof(struct alloc)); ra.nfreegpr = mctarg->ngpr - popcnt(mctarg->rglob->u); ra.nfreefpr = mctarg->nfpr; for (int i = 0; i < MAXREGS; ++i) if (in_range(i, mctarg->fpr0, mctarg->fpr0 + mctarg->nfpr - 1)) bsset(floatregs, i); bszero(globusage, 1); + fn->stksiz = alignup(fn->stksiz, 8); /* a dumb linear register allocator that visits instructions physically backwards * starting at the end of the function, when encountering a use of a new @@ -193,11 +284,11 @@ regalloc(struct function *fn) for (int i = blk->ins.n - 1; i >= 0; --i) { int hint0 = -1, hint1 = -1; ins = &instrtab[blk->ins.p[i]]; - if (!ins->reg && ins->skip) { /* unused */ + if (ins->op != Ocopy && !ra.allocs[ins - instrtab].t && ins->skip) { /* unused */ *ins = mkinstr(Onop, 0,); continue; } - def(&ra, ins); + def(&ra, ins, blk, i); if (ins->op != Ocall) { if (ins->op == Ocopy) hint0 = ins->reg - 1; if (ins->op == Omove) { @@ -212,6 +303,27 @@ regalloc(struct function *fn) } } else { struct call *call = &calltab.p[ins->r.i]; + struct bitset rspill[1] = {0}; + + for (int r = mctarg->gpr0; r < mctarg->gpr0 + mctarg->ngpr; ++r) + if (!bstest(mctarg->rglob, r) && !bstest(mctarg->rcallee, r)) + bsset(rspill, r); + for (int r = mctarg->fpr0; r < mctarg->fpr0 + mctarg->nfpr; ++r) + if (!bstest(mctarg->rglob, r) && !bstest(mctarg->rcallee, r)) + bsset(rspill, r); + + if (call->abiret[0].ty.bits) bsclr(rspill, call->abiret[0].reg); + if (call->abiret[1].ty.bits) bsclr(rspill, call->abiret[1].reg); + for (uint r = 0; bsiter(&r, rspill, 1); ++r) { + spill(&ra, r, blk, i); + } + for (int j = 0; j < call->narg; ++j) { + short reg = call->abiargregs[j]; + if (reg >= 0) { + forcetake(&ra, reg, mkref(RREG, reg), blk, i); + } + } + use(&ra, blk, i, ins->op, hint0, &ins->l, NOREF); } if (ins->op == Ocopy && !ins->reg) @@ -239,12 +351,18 @@ regalloc(struct function *fn) insertinstr(phi->blk[i], phi->blk[i]->ins.n, mov); } } - def(&ra, ins); + def(&ra, ins, NULL, 0); } } while ((blk = blk->lprev) != last); do vfree(&blk->phi); while ((blk = blk->lprev) != last); + fn->stksiz += ra.maxstk*8; + if (fn->stksiz > 1<<24) error(NULL, "'%s' stack frame too big", fn->name); + while (stkslotrefs.n) { + union ref *adr = stkslotrefs.p[--stkslotrefs.n]; + *adr = mkaddr((struct addr) { .base = mkref(RREG, mctarg->bpr), .disp = -fn->stksiz + adr->i }); + } if (ccopt.dbg.r) { efmt("<< After regalloc >>\n"); irdump(fn); diff --git a/test/test.c b/test/test.c index e429104..c44d1a0 100644 --- a/test/test.c +++ b/test/test.c @@ -47,16 +47,18 @@ struct quad quad(long x, long y, long z, long w) { void silly(struct pair *p, struct quad *q) { - *p = pair(1,2); + *p = pair(-1,2); *q = quad(1,2,3,4); } +#if 0 int test2(struct big *b) { struct big s = *b; extern int h(int,float, struct big, float); s.x[5] += 2; return h(0, -.5f, s, 0); } +#endif struct f2 { float f,g; }; struct f2 f2test(struct f2 *r) { |