diff options
| author | 2023-06-20 19:11:15 +0200 | |
|---|---|---|
| committer | 2023-06-20 19:11:15 +0200 | |
| commit | 8cea6c2e91641b06921b4e358c73c60981ba366d (patch) | |
| tree | 060198058427b9272f2167abd5b36580cd917ef7 | |
| parent | 3abdb713474bd282b9ce322abf7ec3609af2eb12 (diff) | |
add basic mem2reg
promotes uniform stack slots to temporaries
currently only for immutable variables, next thing to implement is ssa
construction
| -rw-r--r-- | Makefile | 3 | ||||
| -rw-r--r-- | common.h | 1 | ||||
| -rw-r--r-- | ir.c | 1 | ||||
| -rw-r--r-- | ir.h | 12 | ||||
| -rw-r--r-- | main.c | 1 | ||||
| -rw-r--r-- | optmem.c | 99 | ||||
| -rw-r--r-- | regalloc.c | 29 | ||||
| -rw-r--r-- | ssa.c | 89 |
8 files changed, 221 insertions, 14 deletions
@@ -1,4 +1,5 @@ -SRC=main.c io.c mem.c c.c lex.c type.c targ.c eval.c ir.c irdump.c intrin.c abi0.c regalloc.c amd64/sysv.c amd64/isel.c amd64/emit.c obj.c elf.c +SRC=main.c io.c mem.c c.c lex.c type.c targ.c eval.c ir.c irdump.c ssa.c intrin.c abi0.c \ + optmem.c regalloc.c amd64/sysv.c amd64/isel.c amd64/emit.c obj.c elf.c CFLAGS=-Wall -std=c11 -pedantic OBJ=$(patsubst %.c,obj/%.o,$(SRC)) DEP=$(OBJ:.o=.d) @@ -114,6 +114,7 @@ struct option { struct { bool p : 1, /* after parsing */ a : 1, /* after abi0 */ + m : 1, /* after mem */ i : 1, /* after isel */ r : 1; /* after regalloc */ }; @@ -413,6 +413,7 @@ irfini(struct function *fn) extern int nerror; if (!nerror) { abi0(fn); + mem2reg(fn); lowerintrin(fn); mctarg->isel(fn); regalloc(fn); @@ -113,6 +113,12 @@ struct instr { union ref l, r; }; +struct use { bool isjmp; union { ushort ins, blk; }; }; +struct uses { + int nuse; + struct use *use, _use0[2]; +}; + enum jumpkind { JXXX, Jb, Jret, }; @@ -235,9 +241,13 @@ void replref(struct function *, struct block *, int, union ref from, union ref t void irdump(struct function *); -void lowerintrin(struct function *); +struct uses *ssauses(struct function *); +void freeuses(struct uses *, int n); + void abi0(struct function *); void abi0_call(struct function *, struct instr *, struct block *blk, int *curi); +void mem2reg(struct function *); +void lowerintrin(struct function *); void regalloc(struct function *); /* vim:set ts=3 sw=3 expandtab: */ @@ -117,6 +117,7 @@ optparse(char **args) case 'a': ccopt.dbg.a = 1; break; case 'i': ccopt.dbg.i = 1; break; case 'r': ccopt.dbg.r = 1; break; + case 'm': ccopt.dbg.m = 1; break; default: warn(NULL, "-d: invalid debug flag %'c", *arg); } } else if (*arg == 'o') { diff --git a/optmem.c b/optmem.c new file mode 100644 index 0000000..ba81cd2 --- /dev/null +++ b/optmem.c @@ -0,0 +1,99 @@ +#include "ir.h" + +static const uchar loadszcls[] = { + [Oloads1 - Oloads1] = 1|KI4<<4, [Oloadu1 - Oloads1] = 1|KI4<<4, + [Oloads2 - Oloads1] = 2|KI4<<4, [Oloadu2 - Oloads1] = 2|KI4<<4, + [Oloads4 - Oloads1] = 4|KI4<<4, [Oloadu4 - Oloads1] = 4|KI4<<4, + [Oloadi8 - Oloads1] = 8|KI8<<4, + [Oloadf4 - Oloads1] = 4|KF4<<4, + [Oloadf8 - Oloads1] = 8|KF8<<4, +}; +static const uchar load2ext[] = { + [Oloads1 - Oloads1] = Oexts1, [Oloadu1 - Oloads1] = Oextu1, + [Oloads2 - Oloads1] = Oexts2, [Oloadu2 - Oloads1] = Oextu2, + [Oloads4 - Oloads1] = Oexts4, [Oloadu4 - Oloads1] = Oextu4, + [Oloadi8 - Oloads1] = Ocopy, +}; +#define loadsz(o) (loadszcls[(o) - Oloads1] & 0xF) +#define loadcls(o) (loadszcls[(o) - Oloads1] >> 4) +#define load2ext(o) (load2ext[(o) - Oloads1]) +#define storesz(o) (1 << ((o) - Ostore1)) + +void +mem2reg(struct function *fn) +{ + extern int ninstr; + struct uses *uses = ssauses(fn); + struct block *blk = fn->entry; + + do { + for (int i = 0; i < blk->ins.n; ++i) { + struct use *use, *uend; + union ref var; + enum irclass k = 0; + int sz = 0, ndef = 0; + enum op ext; + int t = blk->ins.p[i]; + struct instr *ins = &instrtab[t]; + + if (!oisalloca(ins->op)) continue; + for (use = uses[t].use, uend = &uses[t].use[uses[t].nuse]; use < uend; ++use) { + struct instr *m; + + if (use->isjmp) goto Next; + m = &instrtab[use->ins]; + if (oisload(m->op) && (!sz || sz == loadsz(m->op))) { + sz = loadsz(m->op); + k = loadcls(m->op); + if (sz < 4) ext = load2ext(m->op); + continue; + } + if (oisstore(m->op) && m->l.bits == mkref(RTMP, t).bits + && (!sz || sz == storesz(m->op))) { + sz = storesz(m->op); + ++ndef; + continue; + } + goto Next; + } + if (!ndef) /* slot is read from but never written to */ + goto Next; + + if (ndef>1) /* TODO phi construction */ + goto Next; + + /* remove alloca */ + *ins = mkinstr(Onop, 0,); + + var.t = 0; + for (use = uses[t].use; use < uend; ++use) { + struct instr *m = &instrtab[use->ins]; + + if (oisstore(m->op)) { + if (sz < 4) { + *m = mkinstr(ext, KI4, m->r); + var = mkref(RTMP, use->ins); + } else { + var = m->r; + *m = mkinstr(Onop,0,); + } + } else if (oisload(m->op)) { + if (!var.t) /* use before def */ + goto Next; + *m = mkinstr(Ocopy, k, var); + } + } + + Next:; + } + } while ((blk = blk->lnext) != fn->entry); + + freeuses(uses, ninstr); + if (ccopt.dbg.m) { + efmt("<< After mem2reg >>\n"); + irdump(fn); + } +} + + +/* vim:set ts=3 sw=3 expandtab: */ @@ -28,7 +28,7 @@ addstkslotref(union ref inst) vpush(&stkslotrefs, &instrtab[inst.i].l); } -static int allocreg(struct rega *ra, enum irclass cls, union ref ref, int excl); +static int allocreg(struct rega *ra, enum irclass cls, union ref ref, uvlong excl); #if 0 #define DBG efmt @@ -75,7 +75,7 @@ def(struct rega *ra, struct instr *ins, struct block *blk, int curi) } } if (reg < 0) - reg = allocreg(ra, insrescls(*ins), mkref(RTMP, var), -1); + reg = allocreg(ra, insrescls(*ins), mkref(RTMP, var), 0); store = mkinstr(Ostore1 + ilog2(cls2siz[insrescls(*ins)]), 0, mkref(RICON, astk.a*8), mkref(RREG, reg)); DBG("-- unspill %%%d s%d -> %s\n", var, astk.a, mctarg->rnames[ins->reg+1]); @@ -115,7 +115,7 @@ take(struct rega *ra, int reg, union ref ref) { } static int -allocreg(struct rega *ra, enum irclass cls, union ref ref, int excl) +allocreg(struct rega *ra, enum irclass cls, union ref ref, uvlong excl) { int r0, rend, reg; @@ -129,7 +129,7 @@ allocreg(struct rega *ra, enum irclass cls, union ref ref, int excl) } else assert(0); for (reg = r0; reg < rend; ++reg) { if (bstest(mctarg->rglob, reg)) continue; - if (reg != excl && !ra->regs[reg].t) { + if (!(excl >> reg & 1) && !ra->regs[reg].t) { take(ra, reg, ref); return reg; } @@ -194,8 +194,8 @@ forcetake(struct rega *ra, int reg, union ref ref, struct block *blk, int curi) * we need to explictly exclude it from the pool of rename registers * e.g.: given 'R0 = copy R1'; if R1 => %x, we need to prevent renaming %x => R0 */ - int excl = instrtab[blk->ins.p[curi]].reg-1; - int rename = allocreg(ra, isgpr(reg) ? KI4 : KF4, ra->regs[reg], excl); + uvlong excl = instrtab[blk->ins.p[curi]].reg; + int rename = allocreg(ra, isgpr(reg) ? KI4 : KF4, ra->regs[reg], excl ? 1ull<<(excl-1) : 0); if (ccopt.dbg.r)DBG("-- rename %%%d %s -> %s\n", var, mctarg->rnames[reg], mctarg->rnames[rename]); /* introduce move from rename -> original (since we allocate backwards) */ insertinstr(blk, ++curi, mkmove(insrescls(instrtab[var]), reg, rename)); @@ -215,7 +215,7 @@ static void use(struct rega *ra, struct block *blk, int curi, enum op op, int hint, union ref *ref, union ref other) { struct instr *ins; - int excl = other.t == RREG ? other.i : -1; + uvlong excl = other.t == RREG ? 1ull<<other.i : 0; if (ref->t == RMORE) { struct addr addr = addrht[ref->i]; @@ -224,13 +224,12 @@ use(struct rega *ra, struct block *blk, int curi, enum op op, int hint, union re *ref = mkaddr(addr); return; } else if (ref->t == RREG) { - assert(ref->i != excl); + assert(!(excl >> hint & 1)); forcetake(ra, ref->i, *ref, blk, curi); } if (ref->t != RTMP) return; ins = &instrtab[ref->i]; - if (oisalloca(ins->op)) return; if (!ins->cls) return; if (!ins->reg) { int s = -1; @@ -238,9 +237,10 @@ use(struct rega *ra, struct block *blk, int curi, enum op op, int hint, union re s = ra->allocs[ref->i].a; assert(ins->op != Ocall); + if (ins->r.t == RREG && ins->inplace) excl |= 1ull<<ins->r.i; if (hint == -1 && ins->op == Ocopy && ins->l.t == RREG) /* for '%x = copy Rx', hint %x to use Rx */ hint = ins->l.i; - if (hint != -1 && hint != excl && !ra->regs[hint].t) { + if (hint != -1 && !(excl >> hint & 1) && !ra->regs[hint].t) { take(ra, hint, *ref); ins->reg = hint + 1; } else { @@ -317,8 +317,13 @@ regalloc(struct function *fn) if (ins->r.bits != ins->l.bits) use(&ra, blk, i, ins->op, hint1, &ins->r, NOREF); } else { - if (ins->l.t) use(&ra, blk, i, ins->op, hint0, &ins->l, ins->r); - if (ins->r.t) use(&ra, blk, i, ins->op, hint1, &ins->r, NOREF); + if (ins->r.t == RREG) { + use(&ra, blk, i, ins->op, hint0, &ins->r, NOREF); + use(&ra, blk, i, ins->op, hint0, &ins->l, ins->r); + } else { + if (ins->l.t) use(&ra, blk, i, ins->op, hint0, &ins->l, ins->r); + if (ins->r.t) use(&ra, blk, i, ins->op, hint1, &ins->r, NOREF); + } } } else { struct call *call = &calltab.p[ins->r.i]; @@ -0,0 +1,89 @@ +#include "ir.h" +#include <stdint.h> + +struct uses * +ssauses(struct function *fn) +{ + extern int ninstr; + struct uses *uses = xcalloc(ninstr * sizeof *uses); + vec_of(struct use) *vuse = xcalloc(ninstr * sizeof *vuse), useacc = {0}; + struct block *blk = fn->entry; + + #define USE(r, isjmp, x) do { \ + struct use u0, u = {isjmp, {x}}; \ + if ((r).t == RTMP) { \ + if (vuse[(r).i].n == 0){ \ + memcpy(&vuse[(r).i].p, &u, sizeof u); \ + ++vuse[(r).i].n; \ + } else { \ + if (vuse[(r).i].n == 1) { \ + memcpy(&u0, &vuse[(r).i].p, sizeof u0); \ + vuse[(r).i].n = 0; \ + vuse[(r).i].p = 0; \ + vpush(&vuse[(r).i], u0); \ + } \ + vpush(&vuse[(r).i], u); \ + } \ + } \ + } while (0) + + do { + for (int i = 0; i < blk->phi.n; ++i) { + int ins = blk->phi.p[i]; + struct phi *phi = &phitab.p[instrtab[ins].l.i]; + for (int i = 0; i < phi->n; ++i) { + USE(phi->ref[i], 0, ins); + } + } + for (int i = 0; i < blk->ins.n; ++i) { + int ins = blk->ins.p[i]; + assert(instrtab[ins].l.t != RMORE); + if (instrtab[ins].op != Ocall) assert(instrtab[ins].l.t != RMORE); + USE(instrtab[ins].l, 0, ins); + USE(instrtab[ins].r, 0, ins); + } + USE(blk->jmp.arg[0], 1, blk->id); + USE(blk->jmp.arg[1], 1, blk->id); + } while ((blk = blk->lnext) != fn->entry); + for (int i = 0; i < ninstr; ++i) { + int nuse = uses[i].nuse = vuse[i].n; + if (!nuse) continue; + if (nuse == 1) { + uses[i].use = uses[i]._use0; + memcpy(uses[i].use, &vuse[i].p, sizeof *uses[i].use); + } else if (nuse < arraylength(uses[i]._use0)) { + uses[i].use = uses[i]._use0; + memcpy(uses[i].use, vuse[i].p, nuse * sizeof(struct use)); + vfree(&vuse[i]); + } else { + intptr_t o; + o = useacc.n; + memcpy(&uses[i].use, &o, sizeof o); + vpushn(&useacc, vuse[i].p, nuse); + vfree(&vuse[i]); + } + } + for (int i = 0; i < ninstr; ++i) { + if (uses[i].use != uses[i]._use0) { + intptr_t o; + memcpy(&o, &uses[i].use, sizeof o); + uses[i].use = useacc.p + o; + } + } + free(vuse); + return uses; +} + +void +freeuses(struct uses *u, int n) +{ + for (int i = 0; i < n; ++i) { + if (u[i].use != u[i]._use0) { + free(u[i].use); + break; + } + } + free(u); +} + +/* vim:set ts=3 sw=3 expandtab: */ |