From 984b74da895d7155f68434be9cc9b6d49a77abec Mon Sep 17 00:00:00 2001 From: lemon Date: Mon, 12 Jun 2023 12:07:17 +0200 Subject: register renaming and such --- Makefile | 2 +- abi0.c | 4 +- amd64/emit.c | 30 +++++++++- amd64/isel.c | 41 +++++++++++--- amd64/sysv.c | 2 +- common.h | 23 ++++++++ ir.h | 2 +- mem.c | 141 ++++++++++++++++++++++++++++++++------------- regalloc.c | 182 +++++++++++++++++++++++++++++++++++++++++++++++------------ test/test3.c | 16 +++++- 10 files changed, 351 insertions(+), 92 deletions(-) diff --git a/Makefile b/Makefile index 6ce0ce7..ef6dcab 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ OBJ=$(patsubst %.c,obj/%.o,$(SRC)) DEP=$(OBJ:.o=.d) OUT=cchomp -all: CFLAGS += -g -Og +all: CFLAGS += -g all: $(OUT) opt: CFLAGS += -O2 diff --git a/abi0.c b/abi0.c index 0e8b551..907ac12 100644 --- a/abi0.c +++ b/abi0.c @@ -71,10 +71,10 @@ copyparam(struct abiarg abi) case KF4: ld = Oloadf4; break; case KF8: ld = Oloadf8; break; } - vpush(&addrtab, ((struct addr) {.base = mkref(RREG, mctarg->spr), .disp = abi.reg})); + vpush(&addrtab, ((struct addr) {.base = mkref(RREG, mctarg->fpr), .disp = abi.stk})); return mkinstr(ld, abi.ty.cls, mkref(RMORE, addrtab.n - 1)); } else { /* aggregate in stack */ - vpush(&addrtab, ((struct addr) {.base = mkref(RREG, mctarg->spr), .disp = abi.reg})); + vpush(&addrtab, ((struct addr) {.base = mkref(RREG, mctarg->fpr), .disp = abi.stk})); return mkinstr(Ocopy, KPTR, mkref(RMORE, addrtab.n - 1)); } } diff --git a/amd64/emit.c b/amd64/emit.c index 160c207..cf8d325 100644 --- a/amd64/emit.c +++ b/amd64/emit.c @@ -58,6 +58,7 @@ addmemoper(struct oper mem, struct oper add) enum operpat { PNONE, PRAX, + PRCX, PGPR, PFPR, P1, /* imm = 1 */ @@ -93,6 +94,7 @@ opermatch(enum operpat pat, struct oper oper) switch (pat) { case PNONE: return !oper.t; case PRAX: return oper.t == OREG && oper.reg == RAX; + case PRCX: return oper.t == OREG && oper.reg == RCX; case PGPR: return oper.t == OREG && oper.reg <= R15; case PFPR: return oper.t == OREG && oper.reg >= XMM0; case P1: return oper.t == OIMM && oper.imm == 1; @@ -240,7 +242,7 @@ DEFINSTR2(Xmov, {4, PMEM, PFPR, "\xF3\x0F\x10", EN_MR}, /* MOVSS m32, xmm */ {8, PFPR, PFPR, "\xF2\x0F\x10", EN_RR}, /* MOVSD xmm, xmm */ {8, PFPR, PMEM, "\xF2\x0F\x10", EN_RM}, /* MOVSD xmm, m64 */ - {8, PMEM, PFPR, "\xF2\x0F\x10", EN_MR}, /* MOVSS m64, xmm */ + {8, PMEM, PFPR, "\xF2\x0F\x11", EN_MR}, /* MOVSS m64, xmm */ ) DEFINSTR2(Xmovsx4, {8, PGPR, PMEM, "\x63", EN_RM}, /* MOVSXD r64, m32 */ @@ -279,8 +281,13 @@ DEFINSTR2(Xadd, {8, PFPR, PMEM, "\xF2\x0F\x58", EN_RM}, /* ADDSD xmm, m64 */ ) DEFINSTR2(Xshl, - {4|8, PGPR, P1, "\xD1", EN_R, .ext=4}, /* SHL r32/64, 1 */ - {4|8, PGPR, PI32, "\xC1", EN_RI8, .ext=4}, /* SHL r32/64, imm */ + {4|8, PGPR, P1, "\xD1", EN_R, .ext=4}, /* SHL r32/64, 1 */ + {4|8, PGPR, PI32, "\xC1", EN_RI8, .ext=4}, /* SHL r32/64, imm */ + {4|8, PGPR, PRCX, "\xD3", EN_R, .ext=4}, /* SHL r32/64, CL */ +) +DEFINSTR1(Xidiv, + {4|8, PGPR, 0, "\xF7", EN_R, .ext=7}, /* IDIV r32/64 */ + {4|8, PMEM, 0, "\xF7", EN_M, .ext=7}, /* IDIV m32/64 */ ) DEFINSTR1(Xcall, {-1, PSYM, 0, "\xE8", EN_R32}, /* CALL rel32 */ @@ -328,6 +335,13 @@ mkimmregoper(union ref r) return ref2oper(r); } +static inline struct oper +mkdatregoper(union ref r) +{ + assert(isregref(r) || (r.t == RXCON && conht[r.i].deref)); + return ref2oper(r); +} + static inline struct oper mkimmdatregoper(union ref r) { @@ -454,6 +468,16 @@ emitinstr(uchar **pcode, uint *stktop, struct function *fn, struct block *blk, i assert(dst.t == OREG && ins->reg-1 == dst.reg); X(pcode, ksiz, dst, mkimmdatregoper(ins->r)); break; + case Odiv: case Orem: + switch (ins->cls) { + case KI8: B(0x48); /* REX.W */ + case KI4: B(0x99); /* CDQ/CQO */ + assert(mkregoper(ins->l).reg == RAX); + Xidiv(pcode, ksiz, mkdatregoper(ins->r)); + break; + case KF4: case KF8: assert(0); + } + break; case Omove: dst = ref2oper(ins->l); gencopy(pcode, ins->cls, dst, ins->r); diff --git a/amd64/isel.c b/amd64/isel.c index f99c8a2..e6cc692 100644 --- a/amd64/isel.c +++ b/amd64/isel.c @@ -6,14 +6,16 @@ static void fixarg(struct function *fn, union ref *r, struct instr *ins, struct block *blk, int *curi) { int sh; + enum op op = ins->op; + if (r->t == RXCON) { struct xcon *con = &conht[r->i]; - if (in_range(ins->op, Oshl, Oslr)) { + if (in_range(op, Oshl, Oslr)) { sh = con->cls == KI4 ? con->i4 : con->i8; goto ShiftImm; - } else if (in_range(ins->op, Oadd, Osub) && con->i8 == 2147483648) { + } else if (in_range(op, Oadd, Osub) && con->i8 == 2147483648) { /* add X, INT32MAX+1 -> sub X, INT32MIN */ - ins->op = Oadd + (ins->op == Oadd); + ins->op = Oadd + (op == Oadd); *r = mkintcon(fn, KI4, -2147483648); } else if (con->cls && (kisflt(con->cls) || con->cls == KI8)) { /* float immediates & >32b immediates are loaded from memory */ @@ -22,8 +24,12 @@ fixarg(struct function *fn, union ref *r, struct instr *ins, struct block *blk, if (con->cls == KI4 || con->cls == KF4) wr32le(data, con->i4); else wr64le(data, con->i8); *r = mkdatref(fn, siz, /*align*/siz, data, siz, /*deref*/1); - } - } else if (in_range(ins->op, Oshl, Oslr) && r->t == RICON) { + } else if (in_range(op, Odiv, Ourem) && kisint(ins->cls)) + goto DivImm; + } else if (r->t == RICON && in_range(op, Odiv, Ourem) && kisint(ins->cls)) { + DivImm: /* there is no division by immediate, must be copied to a register */ + *r = insertinstr(blk, (*curi)++, mkinstr(Ocopy, ins->cls, *r)); + } else if (r->t == RICON && in_range(op, Oshl, Oslr)) { sh = r->i; ShiftImm: /* shift immediate is always 8bit */ *r = mkref(RICON, sh & 255); @@ -131,8 +137,9 @@ static void sel(struct function *fn, struct instr *ins, struct block *blk, int *curi) { struct instr temp = {0}; + enum op op = ins->op; - switch (ins->op) { + switch (op) { case Oshl: case Osar: case Oslr: if (!iscon(ins->r)) { /* shift amount register is always CL */ @@ -144,10 +151,25 @@ sel(struct function *fn, struct instr *ins, struct block *blk, int *curi) case Oulth: case Ougth: case Oulte: case Ougte: if (iscon(ins->l)) { /* lth imm, x -> gth x, imm */ - ins->op = ((ins->op - Olth) ^ 1) + Olth; + ins->op = ((op - Olth) ^ 1) + Olth; rswap(ins->l, ins->r); } goto ALU; + case Odiv: case Oudiv: case Orem: case Ourem: + if (kisflt(ins->cls)) goto ALU; + /* TODO fuse div/rem pair */ + + /* (I)DIV dividend is always in RDX:RAX, output also in those regs */ + insertinstr(blk, (*curi)++, mkinstr(Omove, ins->cls, mkref(RREG, RAX), ins->l)); + /* mark RDX as clobbered. sign/zero-extending RAX into RDX is handled in emit() */ + insertinstr(blk, (*curi)++, mkinstr(Omove, ins->cls, mkref(RREG, RDX), mkref(RREG, RDX))); + fixarg(fn, &ins->r, ins, blk, curi); /* make sure rhs is memory or reg */ + ins->l = mkref(RREG, RAX); + insertinstr(blk, (*curi)++, *ins); /* duplicate ins to reuse tmp ref */ + *ins = mkinstr(Ocopy, ins->cls, mkref(RREG, op < Orem ? RAX : RDX)); /* get output */ + temp = mkinstr(Ocopy, ins->cls, mkref(RREG, op < Orem ? RDX : RAX)); /* clobber other reg*/ + insertinstr(blk, ++(*curi), temp); + break; case Osub: if (iscon(ins->l)) { /* sub imm, x -> sub x, imm; neg x */ @@ -168,6 +190,7 @@ sel(struct function *fn, struct instr *ins, struct block *blk, int *curi) break; } } + /* fallthru */ case Omul: case Oumul: case Oand: case Oxor: case Oior: case Oequ: case Oneq: @@ -177,8 +200,8 @@ sel(struct function *fn, struct instr *ins, struct block *blk, int *curi) case Oneg: case Onot: case Oexts1: case Oextu1: case Oexts2: case Oextu2: case Oexts4: case Oextu4: ALU: - if (!(ins->op == Oadd && kisint(ins->cls))) /* 3-address add is lea */ - if (!(in_range(ins->op, Omul, Oumul) && kisint(ins->cls) && isimm32(ins->r))) /* for (I)MUL r,r/m,imm */ + if (!(op == Oadd && kisint(ins->cls))) /* 3-address add is lea */ + if (!(in_range(op, Omul, Oumul) && kisint(ins->cls) && isimm32(ins->r))) /* for (I)MUL r,r/m,imm */ ins->inplace = 1; if (ins->l.t != RTMP && ins->l.t != RREG) ins->l = insertinstr(blk, (*curi)++, mkinstr(Ocopy, ins->cls, ins->l)); diff --git a/amd64/sysv.c b/amd64/sysv.c index 1128cc2..7d391dc 100644 --- a/amd64/sysv.c +++ b/amd64/sysv.c @@ -139,7 +139,7 @@ const char amd64_rnames[][6] = { const struct mctarg t_amd64_sysv = { .gpr0 = RAX, .ngpr = R15 - RAX + 1, - .spr = RSP, + .fpr = RBP, .fpr0 = XMM0, .nfpr = XMM15 - XMM0 + 1, .rcallee = {{1< T */ +#define imap_of(T) struct { T *v; int tmp; struct imapbase mb; } +void imap_init_(struct imapbase *, void **v, uint vsiz, uint N); +int imap_get_(struct imapbase *, short k); +int imap_set_(struct imapbase *, void **v, uint vsiz, short k); +#define imap_free(m) (free((m)->mb.k), memset((m), 0, sizeof *(m))) +#define imap_init(m, N) (imap_free(m), imap_init_(&(m)->mb, (void **)&(m)->v, sizeof*(m)->v, (N)) +#define imap_get(m, k) (((m)->tmp = imap_get_(&(m)->mb, k)) < 0 ? NULL : &(m)->v[(m)->tmp]) +#define imap_set(m, k, x) ((m)->tmp = imap_set_(&(m)->mb, (void **)&(m)->v, sizeof*(m)->v, k), \ + (m)->v[(m)->tmp] = (x)) + +struct pmapbase { void **k; uint n, N; }; +/* map of non-null ptr -> T */ +#define pmap_of(T) struct { T *v; int tmp; struct imapbase mb; } +void pmap_init_(struct pmapbase *, void **v, uint vsiz, uint N); +int pmap_get_(struct pmapbase *, void *k); +int pmap_set_(struct pmapbase *, void **v, uint vsiz, void *k); +#define pmap_free(m) (free((m)->mb.k), memset((m), 0, sizeof *(m))) +#define pmap_init(m, N) (pmap_free(m), pmap_init_(&(m)->mb, (void **)&(m)->v, sizeof*(m)->v, (N)) +#define pmap_get(m, k) (((m)->tmp = pmap_get_(&(m)->mb, k)) < 0 ? NULL : &(m)->v[(m)->tmp]) +#define pmap_set(m, k, x) ((m)->v[pmap_set_(&(m)->mb, (void **)&(m)->v, sizeof*(m)->v, k)] = (x)) + static inline bool bstest(const struct bitset *bs, uint i) { diff --git a/ir.h b/ir.h index 1c8468b..3561397 100644 --- a/ir.h +++ b/ir.h @@ -142,7 +142,7 @@ struct function { struct mctarg { short gpr0, /* first gpr */ ngpr, /* gpr count */ - spr, /* stack pointer reg */ + fpr, /* frame pointer reg */ fpr0, /* first fpr */ nfpr; /* fpr count */ struct bitset rcallee[1], /* callee-saved */ diff --git a/mem.c b/mem.c index 3a22a35..ea35e41 100644 --- a/mem.c +++ b/mem.c @@ -126,50 +126,46 @@ freearena(struct arena *ar) } } -#if 0 - -int -map_get_(struct mapbase *m, int k) -{ - if (!m->N) return 0; - for (int i = k;; ++i) { - bool notempty = bstest(m->bs, k); - i &= m->N - 1; - if (notempty && m->k[i] == k) - return i; - if (!notempty) - return -1; - } -} - - void -map_init_(struct mapbase *m, void **v, uint vsiz, uint N) +imap_init_(struct imapbase *m, void **v, uint vsiz, uint N) { - uint sizk = N*sizeof(int), + uint sizk = N*sizeof*m->k, sizv = N*vsiz, - sizbs = BSCOUNT(N)*sizeof(struct bitset); + sizbs = BSSIZE(N)*sizeof(struct bitset); - assert(N && (N & (N - 1)) == 0); - m->k = xcalloc(sizk + sizv + sizbs, "map_rehash"); + assert(ispo2(N)); + m->k = xcalloc(sizk + sizv + sizbs, "imap_init"); *v = (char *)m->k + sizk; m->bs = (struct bitset *)((char *)*v + sizv); m->N = N; } +int +imap_get_(struct imapbase *m, short k) +{ + if (!m->N) return -1; + for (int i = k;; ++i) { + bool notempty = bstest(m->bs, i &= m->N - 1); + if (bstest(m->bs, i) && m->k[i] == k) return i; + if (!notempty) return -1; + } +} + static void -map_rehash(struct mapbase *m, void **v, uint vsiz) +imap_rehash(struct imapbase *m, void **v, uint vsiz) { - int *newk, i, k, j; + short *newk, k; + int i, j; void *newv; struct bitset *newbs; - uint N2 = m->N << 1, - sizk = N2*sizeof(int), + uint N2 = m->N ? m->N << 1 : 16, + sizk = N2*sizeof*m->k, sizv = N2*vsiz, - sizbs = BSCOUNT(N2)*sizeof(struct bitset); + sizbs = BSSIZE(N2)*sizeof(struct bitset); assert(N2); - newk = xrealloc(NULL, sizk + sizv + sizbs, "map_rehash"); + newk = xcalloc(sizk + sizv + sizbs, "imap_rehash"); + memset(newk, 0, sizk + sizv + sizbs); newv = (char *)newk + sizk; newbs = (struct bitset *)((char *)newv + sizv); for (i = 0; i < m->N; ++i) { @@ -178,8 +174,8 @@ map_rehash(struct mapbase *m, void **v, uint vsiz) j = k = m->k[i]; for (;; ++j) { j &= N2 - 1; - if (!bstest(newbs, i)) { - bsset(newbs, i); + if (!bstest(newbs, j)) { + bsset(newbs, j); m->k[j] = k; memcpy((char *)newv + j*vsiz, (char *)*v + i*vsiz, vsiz); break; @@ -187,8 +183,6 @@ map_rehash(struct mapbase *m, void **v, uint vsiz) } } free(m->k); - free(*v); - free(m->bs); m->k = newk; *v = newv; m->bs = newbs; @@ -196,16 +190,14 @@ map_rehash(struct mapbase *m, void **v, uint vsiz) } int -map_set_(struct mapbase *m, void **v, uint vsiz, int k) +imap_set_(struct imapbase *m, void **v, uint vsiz, short k) { - if (!m->N) return 0; if (m->n >= m->N >> 1) { - map_rehash(m, v, vsiz); + imap_rehash(m, v, vsiz); assert(m->n < m->N); } for (int i = k;; ++i) { - bool notempty = bstest(m->bs, k); - i &= m->N - 1; + bool notempty = bstest(m->bs, i &= m->N - 1); if (notempty && m->k[i] == k) return i; if (!notempty) { @@ -217,6 +209,79 @@ map_set_(struct mapbase *m, void **v, uint vsiz, int k) } } -#endif +void +pmap_init_(struct pmapbase *m, void **v, uint vsiz, uint N) +{ + uint sizk = N*sizeof(int), + sizv = N*vsiz; + + assert(ispo2(N)); + m->k = xcalloc(sizk + sizv, "pmap_init"); + *v = (char *)m->k + sizk; + m->N = N; +} + +int pmap_get_(struct pmapbase *m, void *k) +{ + assert(k && "null key"); + if (!m->N) return -1; + for (int i = ptrhash(k);; ++i) { + i &= m->N - 1; + if (m->k[i] == k) return i; + if (!m->k[i]) return -1; + } +} + +static void +pmap_rehash(struct pmapbase *m, void **v, uint vsiz) +{ + void **newk; + int i, j; + void *k; + void *newv; + uint N2 = m->N << 1, + sizk = N2*sizeof*m->k, + sizv = N2*vsiz; + + assert(N2); + newk = xcalloc(sizk + sizv, "pmap_rehash"); + newv = (char *)newk + sizk; + for (i = 0; i < m->N; ++i) { + if (!m->k[i]) + continue; + j = ptrhash(k = m->k[i]); + for (;; ++j) { + j &= N2 - 1; + if (!newk[j]) { + newk[j] = k; + memcpy((char *)newv + j*vsiz, (char *)*v + i*vsiz, vsiz); + break; + } + } + } + free(m->k); + m->k = newk; + *v = newv; + m->N = N2; +} + +int pmap_set_(struct pmapbase *m, void **v, uint vsiz, void *k) +{ + assert(k && "null key"); + if (m->n >= m->N >> 1) { + pmap_rehash(m, v, vsiz); + assert(m->n < m->N); + } + for (int i = ptrhash(k);; ++i) { + i &= m->N - 1; + if (m->k[i] == k) + return i; + if (!m->k[i]) { + m->k[i] = k; + ++m->n; + return i; + } + } +} /* vim:set ts=3 sw=3 expandtab: */ diff --git a/regalloc.c b/regalloc.c index f887c59..0527228 100644 --- a/regalloc.c +++ b/regalloc.c @@ -1,25 +1,68 @@ +#include "common.h" #include "ir.h" static struct bitset globusage[1]; -static struct bitset taken[1]; +static struct bitset floatregs[1]; + +#define isfpr(reg) bstest(floatregs, (reg)) +#define isgpr(reg) (!isfpr(reg)) + +enum { ADEAD, AREG, ASTACK } ; +struct alloc { ushort t : 2, a : 14; }; +#define afree() ((struct alloc) { ADEAD }) +#define areg(r) ((struct alloc) { AREG, (r) }) +#define astack(s) ((struct alloc) { ASTACK, (s) }) + +struct rega { + union ref regs[MAXREGS]; /* map reg -> value holding reg */ + imap_of(struct alloc) allocs; /* map tmpidx -> allocation */ + int nfreegpr, nfreefpr; +}; static void -def(struct instr *ins) +def(struct rega *ra, struct instr *ins) { - if (ins->reg) - bsclr(taken, ins->reg - 1); + int reg = -1, var; + struct alloc *alloc; + if (ins->op != Omove) { + var = ins - instrtab; + // efmt("def %%%d\n",var); + if ((alloc = imap_get(&ra->allocs, var))) { + if (alloc->t == AREG) { + reg = alloc->a; + // efmt("-- free %s for %%%d\n", mctarg->rnames[alloc->a], var); + assert(ra->regs[reg].bits == mkref(RTMP, var).bits); + } else assert(0); + *alloc = afree(); + } + } else { + reg = ins->l.i; + assert(ins->l.t == RREG); + // efmt("-- free %s\n", mctarg->rnames[ins->l.i]); + } + if (reg != -1) { + ra->regs[reg] = NOREF; + if (isfpr(reg)) ++ra->nfreefpr; + else ++ra->nfreegpr; + } } static void -take(int r) { - bsset(taken, r); - bsset(globusage, r); +take(struct rega *ra, int reg, union ref ref) { + // efmt("-- take %s for %d %d\n", mctarg->rnames[reg], ref.t, ref.i); + assert(!ra->regs[reg].t && "taken"); + if (ref.t == RTMP) + imap_set(&ra->allocs, ref.i, areg(reg)); + ra->regs[reg] = ref; + bsset(globusage, reg); + if (isfpr(reg)) --ra->nfreefpr; + else --ra->nfreegpr; } static int -nextreg(enum irclass cls) +nextreg(struct rega *ra, enum irclass cls, union ref ref, int excl) { - int r0, rend, i; + int r0, rend, reg; assert(cls); if (kisint(cls)) { @@ -29,31 +72,77 @@ nextreg(enum irclass cls) r0 = mctarg->fpr0; rend = mctarg->fpr0 + mctarg->nfpr; } else assert(0); - for (i = r0; i < rend; ++i) { - if (!bstest(taken, i)) { - take(i); - return i; + for (reg = r0; reg < rend; ++reg) { + if (bstest(mctarg->rglob, reg)) continue; + if (reg != excl && !ra->regs[reg].t) { + take(ra, reg, ref); + return reg; } } assert(!"no more reg"); } +#define mkmove(k, rd, rs) mkinstr(Omove, k, mkref(RREG, rd), mkref(RREG, rs)) + +static void +forcetake(struct rega *ra, int reg, union ref ref, struct block *blk, int curi) { + int var; + struct alloc *alloc; + + if (ra->regs[reg].bits == ref.bits) return; + if (!ra->regs[reg].t) { + take(ra, reg, ref); + return; + } + assert(ra->regs[reg].t == RTMP); + var = ra->regs[reg].i; + alloc = imap_get(&ra->allocs, var); + assert(alloc && alloc->a == reg); + *alloc = afree(); + /* try to move temp to another register */ + if (isgpr(reg) ? ra->nfreegpr > 0 : ra->nfreefpr > 0) { + /* the register of the current instruction (if any) was already free'd (by def), so + * we need to explictly exclude it from the pool of rename registers + * e.g.: given 'R0 = copy R1'; if R1 => %x, we need to prevent renaming %x => R0 + */ + int excl = instrtab[blk->ins.p[curi]].reg-1; + int rename = nextreg(ra, isgpr(reg) ? KI4 : KF4, ra->regs[reg], excl); + if (ccopt.dbg.r) efmt("-- rename %%%d %s -> %s\n", var, mctarg->rnames[reg], mctarg->rnames[rename]); + /* introduce move from rename -> original (since we allocate backwards) */ + insertinstr(blk, ++curi, mkmove(instrtab[var].cls, reg, rename)); + instrtab[var].reg = rename+1; + ra->regs[rename] = mkref(RTMP, var); + bsset(globusage, rename); + imap_set(&ra->allocs, var, areg(rename)); + ra->regs[reg].bits = 0; + } else { + assert(!"spill"); + } + take(ra, reg, ref); +} + +/* mark a use for *ref, possibly allocating a register for it, considering it won't clash with `other` */ static void -use(struct block *blk, enum op op, int hint, union ref *ref) +use(struct rega *ra, struct block *blk, int curi, enum op op, int hint, union ref *ref, union ref other) { struct instr *ins; + int excl = other.t == RREG ? other.i : -1; + if (ref->t == RMORE) { if (op == Ophi) { struct phi *phi = &phitab.p[ref->i]; for (int i = 0; i < phi->n; ++i) - use(blk, 0, hint, &phi->ref[i]); + use(ra, blk, curi, 0, hint, &phi->ref[i], NOREF); } else { struct addr *addr = &addrtab.p[ref->i]; - if (addr->base.t) use(blk, 0, hint, &addr->base); - if (addr->index.t) use(blk, 0, hint, &addr->index); + if (addr->base.t) use(ra, blk, curi, 0, hint, &addr->base, addr->index); + if (addr->index.t) use(ra, blk, curi, 0, hint, &addr->index, NOREF); } return; - } else if (ref->t != RTMP) return; + } else if (ref->t == RREG) { + forcetake(ra, ref->i, *ref, blk, curi); + } + if (ref->t != RTMP) return; ins = &instrtab[ref->i]; if (oisalloca(ins->op)) return; @@ -62,14 +151,16 @@ use(struct block *blk, enum op op, int hint, union ref *ref) if (op == -1) /* cond branch */ if (oiscmp(ins->op) && ref->i == blk->ins.p[blk->ins.n-1]) /* result of comparison instr is only used to conditionally branch, - * doesn't usually need a reg (handled by isel) */ + * doesn't usually need a reg (this should be handled by isel) */ return; assert(ins->op != Ocall); - if (hint != -1 && !bstest(taken, hint)) { - take(hint); + if (hint == -1 && ins->op == Ocopy && ins->l.t == RREG) /* for '%x = copy Rx', hint %x to use Rx */ + hint = ins->l.i; + if (hint != -1 && hint != excl && !ra->regs[hint].t) { + take(ra, hint, *ref); ins->reg = hint + 1; } else { - ins->reg = nextreg(ins->cls) + 1; + ins->reg = nextreg(ra, ins->cls, *ref, excl) + 1; } *ref = mkref(RREG, ins->reg-1); } @@ -80,6 +171,12 @@ regalloc(struct function *fn) { struct instr *ins; struct block *last = fn->entry->lprev, *blk; + struct rega ra = {0}; + ra.nfreegpr = mctarg->ngpr - popcnt(mctarg->rglob->u); + ra.nfreefpr = mctarg->fpr; + for (int i = 0; i < MAXREGS; ++i) + if (in_range(i, mctarg->fpr0, mctarg->fpr0 + mctarg->nfpr - 1)) + bsset(floatregs, i); /* a dumb linear register allocator that visits instructions physically backwards * starting at the end of the function, when encountering a use of a new @@ -90,9 +187,9 @@ regalloc(struct function *fn) do { for (int i = 0; i < 2; ++i) { if (!blk->jmp.arg[i].t) break; - use(blk, (blk->jmp.t == Jb) - 1, + use(&ra, blk, blk->ins.n-1, (blk->jmp.t == Jb) - 1, blk->jmp.t == Jret ? fn->abiret[i].reg : -1, - &blk->jmp.arg[i]); + &blk->jmp.arg[i], blk->jmp.arg[!i]); } for (struct block *s = blk->s1; s; s = blk->s2) { /* introduce necessary moves for successors' phis */ @@ -105,8 +202,7 @@ regalloc(struct function *fn) int reg = ins->reg - 1; assert(reg >= 0); if (src.t != RTMP || instrtab[src.i].reg-1 != reg) /* not in the right register already */ - insertinstr(blk, blk->ins.n, - (struct instr){Ocopy, ins->cls, reg+1, .l = src}); + insertinstr(blk, blk->ins.n, mkinstr(Omove, ins->cls, mkref(RREG, reg), src)); break; } } @@ -120,23 +216,39 @@ regalloc(struct function *fn) *ins = mkinstr(Onop, 0,); continue; } - def(ins); - if (ins->op == Omove) { - use(blk, ins->op, ins->l.i, &ins->r); - } else if (ins->op != Ocall) { + def(&ra, ins); + if (ins->op != Ocall) { if (ins->op == Ocopy) hint0 = ins->reg - 1; - if (ins->l.t) use(blk, ins->op, hint0, &ins->l); - if (ins->r.t) use(blk, ins->op, hint1, &ins->r); + if (ins->op == Omove) { + hint1 = ins->l.i; + /* MOV Rx,Rx is used by isel to indicate a clobber, + * so it should be a def point for Rx but not a use point */ + if (ins->r.bits != ins->l.bits) + use(&ra, blk, i, ins->op, hint1, &ins->r, NOREF); + } else { + if (ins->l.t) use(&ra, blk, i, ins->op, hint0, &ins->l, ins->r); + if (ins->r.t) use(&ra, blk, i, ins->op, hint1, &ins->r, NOREF); + } } else { struct call *call = &calltab.p[ins->r.i]; - use(blk, ins->op, hint0, &ins->l); + use(&ra, blk, i, ins->op, hint0, &ins->l, NOREF); + } + if (ins->op == Ocopy && !ins->reg) + *ins = mkinstr(Onop, 0,); + if (ins->inplace && ins->reg) { + assert(ins->l.t == RREG); + if (ins->reg-1 != ins->l.i) { + /* an in-place operation where the destination does not + * match the first operand, so we need to add a move */ + insertinstr(blk, i, mkmove(ins->cls, ins->reg-1, ins->l.i)); + } } } for (int i = blk->phi.n - 1; i >= 0; --i) { ins = &instrtab[blk->phi.p[i]]; assert(ins->op == Ophi); - def(ins); - use(blk, Ophi, ins->reg - 1, &ins->l); + def(&ra, ins); + use(&ra, blk, 0, Ophi, ins->reg - 1, &ins->l, NOREF); } } while ((blk = blk->lprev) != last); diff --git a/test/test3.c b/test/test3.c index 35e5e8b..bf53396 100644 --- a/test/test3.c +++ b/test/test3.c @@ -8,11 +8,22 @@ int t(unsigned short *p, short i) { return p[i]; } -#if 1 +int shcl(int a, int b) { + return a << (b+1); +} + +struct p { long x,y; }; +struct p divsh(int a) { + struct p p; + p.x = a << (a / 5); + p.y = a; + return p; +} + +#if 0 long test(long x) { return x + (long)"abc"; } -#endif double ff(double x, double y) { @@ -26,3 +37,4 @@ void testss() { long fma(long x, long y) { return x + (y <<1) - 2147483648;} +#endif -- cgit v1.2.3