ir: make address ref hash table resizable

Would hit the limit on very large functions (thanks csmith).
author: lemon <lsof@mailbox.org> 2026-02-28 20:38:55 +0100
committer: lemon <lsof@mailbox.org> 2026-02-28 20:38:55 +0100
commit: f06da11d8524a9eb7fe984171d4462cef8eac2e6 (patch)
tree: 0050bede4ec0e9939e4a5f98be089a539d5810d2 /ir
parent: 3d1efdcc77de5ce8f3279bd22b0a510a699229ea (diff)
4 files changed, 43 insertions, 21 deletions
diff --git a/ir/dump.c b/ir/dump.c
index bb6a2fd..35b39de 100644
--- a/ir/dump.c
+++ b/ir/dump.c
@@ -137,7 +137,7 @@ dumpref(enum op o, union ref ref)
       break;
    case RADDR:
       {
-         const struct addr *addr = &addrht[ref.i];
+         const struct addr *addr = &addrtab.p[ref.i];
          bool k = 0;
          bfmt(out, "addr [");
          if ((k = addr->base.bits)) dumpref(0, addr->base);
diff --git a/ir/ir.c b/ir/ir.c
index 136935f..bb75d4b 100644
--- a/ir/ir.c
+++ b/ir/ir.c
@@ -36,7 +36,8 @@ struct calltab calltab;
 struct phitab phitab;
 struct dattab dattab;
 struct contab contab;
-struct addr addrht[1 << 12];
+struct addrtab addrtab;
+static ushort *addrht;
 static int naddrht;
 int visitmark;
 
@@ -47,6 +48,7 @@ irinit(struct function *fn)
    static union ref *phisbuf[64];
    static struct irdat datsbuf[64];
    static struct xcon consbuf[64];
+   static struct addr addrsbuf[64];
 
    assert(fn->arena && !fn->passarena);
 
@@ -59,8 +61,11 @@ irinit(struct function *fn)
    vinit(&phitab, phisbuf, countof(phisbuf));
    vinit(&contab, consbuf, countof(consbuf));
    if (!dattab.p) vinit(&dattab, datsbuf, countof(datsbuf));
-   if (naddrht >= countof(addrht)/2)
-      memset(addrht, naddrht = 0, sizeof addrht);
+   naddrht = 128;
+   if (!addrht) xbgrowz(&addrht, naddrht);
+   else if (addrtab.n) memset(addrht, 0, xbcap(addrht) * sizeof *addrht);
+   vinit(&addrtab, addrsbuf, countof(addrsbuf));
+   
    if (!type2cls[TYINT]) {
       for (int i = TYBOOL; i <= TYUVLONG; ++i) {
          int siz = targ_primsizes[i];
@@ -84,18 +89,28 @@ irinit(struct function *fn)
 static int
 newaddr(const struct addr *addr)
 {
-   uint h = hashb(0, addr, sizeof *addr);
-   uint i = h, n = countof(addrht);
-   for (;; ++i) {
-      i &= countof(addrht) - 1;
-      if (!addrht[i].base.bits && !addrht[i].index.bits) {
-         addrht[i] = *addr;
-         ++naddrht;
-         return i;
-      } else if (!memcmp(&addrht[i], addr, sizeof *addr)) {
-         return i;
+   if (addrtab.n >= naddrht/4*3 /*75% load factor */) {
+      xbgrowz(&addrht, naddrht*2);
+      memset(addrht, 0, naddrht * sizeof *addrht);
+      naddrht *= 2;
+      for (int i = 0; i < addrtab.n; ++i) { /* rehash */
+         const struct addr *addr = &addrtab.p[i];
+         for (uint h = hashb(0, addr, sizeof *addr), j = h;; ++j) {
+            if (!addrht[j &= naddrht - 1]) {
+               addrht[j] = i+1;
+               break;
+            }
+         }
+      }
+   }
+   for (uint h = hashb(0, addr, sizeof *addr), i = h;; ++i) {
+      i &= naddrht - 1;
+      if (!addrht[i]) {
+         vpush(&addrtab, *addr);
+         return (addrht[i] = addrtab.n) - 1;
+      } else if (!memcmp(&addrtab.p[addrht[i]-1], addr, sizeof *addr)) {
+         return addrht[i] - 1;
       }
-      assert(--n > 0 && "addrht full");
    }
 }
 
diff --git a/ir/ir.h b/ir/ir.h
index c7abaa7..39fa116 100644
--- a/ir/ir.h
+++ b/ir/ir.h
@@ -245,7 +245,7 @@ extern struct calltab {vec_of(struct call);} calltab;
 extern struct phitab {vec_of(union ref *);} phitab;
 extern struct dattab {vec_of(struct irdat);} dattab;
 extern struct contab {vec_of(struct xcon);} contab;
-extern struct addr addrht[];
+extern struct addrtab {vec_of(struct addr);} addrtab;
 extern int visitmark;
 #define mkinstr(O, C, ...) ((struct instr) { .op = (O), .cls = (C), .reg=0, __VA_ARGS__ })
 #define mkarginstr(ty, x) mkinstr(Oarg, 0, mktyperef(ty), (x))
diff --git a/ir/regalloc.c b/ir/regalloc.c
index aa18e10..cdbd7af 100644
--- a/ir/regalloc.c
+++ b/ir/regalloc.c
@@ -62,8 +62,8 @@ liveuse(struct bitset *defined, struct instr *ins, union ref *r, struct block *b
 {
    int var;
    if (r->t == RADDR) {
-      liveuse(defined, ins, &addrht[r->i].base, blk);
-      liveuse(defined, ins, &addrht[r->i].index, blk);
+      liveuse(defined, ins, &addrtab.p[r->i].base, blk);
+      liveuse(defined, ins, &addrtab.p[r->i].index, blk);
       return;
    } else if (r->t != RTMP) return;
    var = r->i;
@@ -736,8 +736,8 @@ buildintervals(struct rega *ra)
                usereg(ra, r.i, blk, pos);
             } else if (r.t == RADDR) {
                reghint = -1;
-               queue[nqueue++] = addrht[r.i].base;
-               queue[nqueue++] = addrht[r.i].index;
+               queue[nqueue++] = addrtab.p[r.i].base;
+               queue[nqueue++] = addrtab.p[r.i].index;
             }
          }
       }
@@ -970,10 +970,17 @@ linearscan(struct rega *ra)
          avail &= ~excl;
 
          if (!avail) {
+            /* XXX heuristically pick a better interval to spill */
             /* spill current */
             current->alloc = allocstk(ra);
             DBG("%%%d got stk%d\n", this, current->alloc.a);
             /* move current to active */
+            /* XXX spilled intervals are being put in active so their stack
+             * slots can be freed when expiring old intervals but it turns the
+             * linear scan algorithmic complexity closer to O(n^2), so is a
+             * performance downgrade. in the referenced paper, they are moved
+             * to handled.  this should be fixed by doing stack slot allocation
+             * separately */
             current->next = *active;
             *active = current;
             continue;
@@ -1071,7 +1078,7 @@ devirt(struct rega *ra, struct block *blk)
       for (int i = 0; i < 2; ++i) {
          union ref *r = &i[&ins->l];
          if (r->t == RADDR) {
-            struct addr *a = &addrht[r->i];
+            struct addr *a = &addrtab.p[r->i];
             ++naddr;
             newaddr = *a;
             argref[nargref++] = &newaddr.base;
author	lemon <lsof@mailbox.org>	2026-02-28 20:38:55 +0100
committer	lemon <lsof@mailbox.org>	2026-02-28 20:38:55 +0100
commit	f06da11d8524a9eb7fe984171d4462cef8eac2e6 (patch)
tree	0050bede4ec0e9939e4a5f98be089a539d5810d2 /ir
parent	3d1efdcc77de5ce8f3279bd22b0a510a699229ea (diff)