From f06da11d8524a9eb7fe984171d4462cef8eac2e6 Mon Sep 17 00:00:00 2001 From: lemon Date: Sat, 28 Feb 2026 20:38:55 +0100 Subject: ir: make address ref hash table resizable Would hit the limit on very large functions (thanks csmith). --- aarch64/emit.c | 4 ++-- ir/dump.c | 2 +- ir/ir.c | 43 +++++++++++++++++++++++++++++-------------- ir/ir.h | 2 +- ir/regalloc.c | 17 ++++++++++++----- x86_64/emit.c | 4 ++-- x86_64/isel.c | 4 ++-- 7 files changed, 49 insertions(+), 27 deletions(-) diff --git a/aarch64/emit.c b/aarch64/emit.c index fdcb866..9fdcd83 100644 --- a/aarch64/emit.c +++ b/aarch64/emit.c @@ -54,7 +54,7 @@ mkmemoper(uint msiz, union ref r) } else if (isaddrcon(r,1)) { return mkoper(OSYM, .con = r.i,); } else if (r.t == RADDR) { - const struct addr *addr = &addrht[r.i]; + const struct addr *addr = &addrtab.p[r.i]; assert(addr->shift <= 3 && (!addr->disp || !addr->index.bits)); if (isaddrcon(addr->base,0)) { assert(!addr->index.bits); @@ -552,7 +552,7 @@ gencopy(uchar **pcode, enum irclass cls, struct block *blk, int curi, struct ope else if (src.t == OIMM && src.imm == 0) Xfmov(pcode, cls, dst, REGZR); else assert(0); - } else if (isaddrcon(val,0) || (val.t == RADDR && isaddrcon(addrht[val.i].base,0))) { + } else if (isaddrcon(val,0) || (val.t == RADDR && isaddrcon(addrtab.p[val.i].base,0))) { if ((ccopt.pic || (contab.p[val.i].flag & SFUNC)) && !(contab.p[val.i].flag & SLOCAL)) { Xadrp(pcode, KPTR, dst, src); Xadd(pcode, KPTR, dst, dst, src); diff --git a/ir/dump.c b/ir/dump.c index bb6a2fd..35b39de 100644 --- a/ir/dump.c +++ b/ir/dump.c @@ -137,7 +137,7 @@ dumpref(enum op o, union ref ref) break; case RADDR: { - const struct addr *addr = &addrht[ref.i]; + const struct addr *addr = &addrtab.p[ref.i]; bool k = 0; bfmt(out, "addr ["); if ((k = addr->base.bits)) dumpref(0, addr->base); diff --git a/ir/ir.c b/ir/ir.c index 136935f..bb75d4b 100644 --- a/ir/ir.c +++ b/ir/ir.c @@ -36,7 +36,8 @@ struct calltab calltab; struct phitab phitab; struct dattab dattab; struct contab contab; -struct addr addrht[1 << 12]; +struct addrtab addrtab; +static ushort *addrht; static int naddrht; int visitmark; @@ -47,6 +48,7 @@ irinit(struct function *fn) static union ref *phisbuf[64]; static struct irdat datsbuf[64]; static struct xcon consbuf[64]; + static struct addr addrsbuf[64]; assert(fn->arena && !fn->passarena); @@ -59,8 +61,11 @@ irinit(struct function *fn) vinit(&phitab, phisbuf, countof(phisbuf)); vinit(&contab, consbuf, countof(consbuf)); if (!dattab.p) vinit(&dattab, datsbuf, countof(datsbuf)); - if (naddrht >= countof(addrht)/2) - memset(addrht, naddrht = 0, sizeof addrht); + naddrht = 128; + if (!addrht) xbgrowz(&addrht, naddrht); + else if (addrtab.n) memset(addrht, 0, xbcap(addrht) * sizeof *addrht); + vinit(&addrtab, addrsbuf, countof(addrsbuf)); + if (!type2cls[TYINT]) { for (int i = TYBOOL; i <= TYUVLONG; ++i) { int siz = targ_primsizes[i]; @@ -84,18 +89,28 @@ irinit(struct function *fn) static int newaddr(const struct addr *addr) { - uint h = hashb(0, addr, sizeof *addr); - uint i = h, n = countof(addrht); - for (;; ++i) { - i &= countof(addrht) - 1; - if (!addrht[i].base.bits && !addrht[i].index.bits) { - addrht[i] = *addr; - ++naddrht; - return i; - } else if (!memcmp(&addrht[i], addr, sizeof *addr)) { - return i; + if (addrtab.n >= naddrht/4*3 /*75% load factor */) { + xbgrowz(&addrht, naddrht*2); + memset(addrht, 0, naddrht * sizeof *addrht); + naddrht *= 2; + for (int i = 0; i < addrtab.n; ++i) { /* rehash */ + const struct addr *addr = &addrtab.p[i]; + for (uint h = hashb(0, addr, sizeof *addr), j = h;; ++j) { + if (!addrht[j &= naddrht - 1]) { + addrht[j] = i+1; + break; + } + } + } + } + for (uint h = hashb(0, addr, sizeof *addr), i = h;; ++i) { + i &= naddrht - 1; + if (!addrht[i]) { + vpush(&addrtab, *addr); + return (addrht[i] = addrtab.n) - 1; + } else if (!memcmp(&addrtab.p[addrht[i]-1], addr, sizeof *addr)) { + return addrht[i] - 1; } - assert(--n > 0 && "addrht full"); } } diff --git a/ir/ir.h b/ir/ir.h index c7abaa7..39fa116 100644 --- a/ir/ir.h +++ b/ir/ir.h @@ -245,7 +245,7 @@ extern struct calltab {vec_of(struct call);} calltab; extern struct phitab {vec_of(union ref *);} phitab; extern struct dattab {vec_of(struct irdat);} dattab; extern struct contab {vec_of(struct xcon);} contab; -extern struct addr addrht[]; +extern struct addrtab {vec_of(struct addr);} addrtab; extern int visitmark; #define mkinstr(O, C, ...) ((struct instr) { .op = (O), .cls = (C), .reg=0, __VA_ARGS__ }) #define mkarginstr(ty, x) mkinstr(Oarg, 0, mktyperef(ty), (x)) diff --git a/ir/regalloc.c b/ir/regalloc.c index aa18e10..cdbd7af 100644 --- a/ir/regalloc.c +++ b/ir/regalloc.c @@ -62,8 +62,8 @@ liveuse(struct bitset *defined, struct instr *ins, union ref *r, struct block *b { int var; if (r->t == RADDR) { - liveuse(defined, ins, &addrht[r->i].base, blk); - liveuse(defined, ins, &addrht[r->i].index, blk); + liveuse(defined, ins, &addrtab.p[r->i].base, blk); + liveuse(defined, ins, &addrtab.p[r->i].index, blk); return; } else if (r->t != RTMP) return; var = r->i; @@ -736,8 +736,8 @@ buildintervals(struct rega *ra) usereg(ra, r.i, blk, pos); } else if (r.t == RADDR) { reghint = -1; - queue[nqueue++] = addrht[r.i].base; - queue[nqueue++] = addrht[r.i].index; + queue[nqueue++] = addrtab.p[r.i].base; + queue[nqueue++] = addrtab.p[r.i].index; } } } @@ -970,10 +970,17 @@ linearscan(struct rega *ra) avail &= ~excl; if (!avail) { + /* XXX heuristically pick a better interval to spill */ /* spill current */ current->alloc = allocstk(ra); DBG("%%%d got stk%d\n", this, current->alloc.a); /* move current to active */ + /* XXX spilled intervals are being put in active so their stack + * slots can be freed when expiring old intervals but it turns the + * linear scan algorithmic complexity closer to O(n^2), so is a + * performance downgrade. in the referenced paper, they are moved + * to handled. this should be fixed by doing stack slot allocation + * separately */ current->next = *active; *active = current; continue; @@ -1071,7 +1078,7 @@ devirt(struct rega *ra, struct block *blk) for (int i = 0; i < 2; ++i) { union ref *r = &i[&ins->l]; if (r->t == RADDR) { - struct addr *a = &addrht[r->i]; + struct addr *a = &addrtab.p[r->i]; ++naddr; newaddr = *a; argref[nargref++] = &newaddr.base; diff --git a/x86_64/emit.c b/x86_64/emit.c index e54b7dd..5dbbed1 100644 --- a/x86_64/emit.c +++ b/x86_64/emit.c @@ -127,7 +127,7 @@ mkmemoper(union ref r) assert(wop.t == OREG); return mkoper(OMEM, .base = wop.reg, .index = NOINDEX); } else if (r.t == RADDR) { - const struct addr *addr = &addrht[r.i]; + const struct addr *addr = &addrtab.p[r.i]; assert(addr->shift <= 3); if (isaddrcon(addr->base,0)) { return mkoper(OSYM, .con = addr->base.i, @@ -835,7 +835,7 @@ gencopy(uchar **pcode, enum irclass cls, struct block *blk, int curi, struct ope if (val.t == RADDR) { /* this is a LEA, but maybe it can be lowered to a 2-address instruction, * which may clobber flags */ - const struct addr *addr = &addrht[val.i]; + const struct addr *addr = &addrtab.p[val.i]; if (flagslivep(blk, curi)) goto Lea; if (addr->base.t != RREG) goto Lea; if (addr->base.bits && dst.reg == mkregoper(addr->base).reg) { /* base = dst */ diff --git a/x86_64/isel.c b/x86_64/isel.c index 637df7e..fffc4c9 100644 --- a/x86_64/isel.c +++ b/x86_64/isel.c @@ -248,7 +248,7 @@ aadd(struct addr *addr, struct block *blk, int *curi, union ref r) if (!ascale(addr, ins->l, ins->r)) goto Ref; ins->skip = 1; } else if (ins->op == Ocopy && ins->l.t == RADDR) { - struct addr save = *addr, *addr2 = &addrht[ins->l.i]; + struct addr save = *addr, *addr2 = &addrtab.p[ins->l.i]; if ((!addr2->base.bits || aadd(addr, blk, curi, addr2->base)) && aimm(addr, addr2->disp) && (!addr2->index.bits || ascale(addr, addr2->index, mkref(RICON, addr2->shift)))) @@ -290,7 +290,7 @@ fuseaddr(union ref *r, struct block *blk, int *curi) if (isaddrcon(*r,1)) return 1; if (r->t == RADDR) { - const struct addr *a0 = &addrht[r->i]; + const struct addr *a0 = &addrtab.p[r->i]; if (aadd(&addr, blk, curi, a0->base) && (!addr.index.bits || ascale(&addr, a0->index, mkref(RICON, a0->shift))) && aadd(&addr, blk, curi, mkintcon(KPTR, a0->disp))) { -- cgit v1.2.3