aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorlemon <lsof@mailbox.org>2023-06-12 12:07:17 +0200
committerlemon <lsof@mailbox.org>2023-06-12 12:10:47 +0200
commit984b74da895d7155f68434be9cc9b6d49a77abec (patch)
treef8748466b274abd172a1491a6b45cb8dcbfe7d32
parent1139df03b0edbf08deb9aa26ade3776be3c1e180 (diff)
register renaming and such
-rw-r--r--Makefile2
-rw-r--r--abi0.c4
-rw-r--r--amd64/emit.c30
-rw-r--r--amd64/isel.c41
-rw-r--r--amd64/sysv.c2
-rw-r--r--common.h23
-rw-r--r--ir.h2
-rw-r--r--mem.c141
-rw-r--r--regalloc.c182
-rw-r--r--test/test3.c16
10 files changed, 351 insertions, 92 deletions
diff --git a/Makefile b/Makefile
index 6ce0ce7..ef6dcab 100644
--- a/Makefile
+++ b/Makefile
@@ -4,7 +4,7 @@ OBJ=$(patsubst %.c,obj/%.o,$(SRC))
DEP=$(OBJ:.o=.d)
OUT=cchomp
-all: CFLAGS += -g -Og
+all: CFLAGS += -g
all: $(OUT)
opt: CFLAGS += -O2
diff --git a/abi0.c b/abi0.c
index 0e8b551..907ac12 100644
--- a/abi0.c
+++ b/abi0.c
@@ -71,10 +71,10 @@ copyparam(struct abiarg abi)
case KF4: ld = Oloadf4; break;
case KF8: ld = Oloadf8; break;
}
- vpush(&addrtab, ((struct addr) {.base = mkref(RREG, mctarg->spr), .disp = abi.reg}));
+ vpush(&addrtab, ((struct addr) {.base = mkref(RREG, mctarg->fpr), .disp = abi.stk}));
return mkinstr(ld, abi.ty.cls, mkref(RMORE, addrtab.n - 1));
} else { /* aggregate in stack */
- vpush(&addrtab, ((struct addr) {.base = mkref(RREG, mctarg->spr), .disp = abi.reg}));
+ vpush(&addrtab, ((struct addr) {.base = mkref(RREG, mctarg->fpr), .disp = abi.stk}));
return mkinstr(Ocopy, KPTR, mkref(RMORE, addrtab.n - 1));
}
}
diff --git a/amd64/emit.c b/amd64/emit.c
index 160c207..cf8d325 100644
--- a/amd64/emit.c
+++ b/amd64/emit.c
@@ -58,6 +58,7 @@ addmemoper(struct oper mem, struct oper add)
enum operpat {
PNONE,
PRAX,
+ PRCX,
PGPR,
PFPR,
P1, /* imm = 1 */
@@ -93,6 +94,7 @@ opermatch(enum operpat pat, struct oper oper)
switch (pat) {
case PNONE: return !oper.t;
case PRAX: return oper.t == OREG && oper.reg == RAX;
+ case PRCX: return oper.t == OREG && oper.reg == RCX;
case PGPR: return oper.t == OREG && oper.reg <= R15;
case PFPR: return oper.t == OREG && oper.reg >= XMM0;
case P1: return oper.t == OIMM && oper.imm == 1;
@@ -240,7 +242,7 @@ DEFINSTR2(Xmov,
{4, PMEM, PFPR, "\xF3\x0F\x10", EN_MR}, /* MOVSS m32, xmm */
{8, PFPR, PFPR, "\xF2\x0F\x10", EN_RR}, /* MOVSD xmm, xmm */
{8, PFPR, PMEM, "\xF2\x0F\x10", EN_RM}, /* MOVSD xmm, m64 */
- {8, PMEM, PFPR, "\xF2\x0F\x10", EN_MR}, /* MOVSS m64, xmm */
+ {8, PMEM, PFPR, "\xF2\x0F\x11", EN_MR}, /* MOVSS m64, xmm */
)
DEFINSTR2(Xmovsx4,
{8, PGPR, PMEM, "\x63", EN_RM}, /* MOVSXD r64, m32 */
@@ -279,8 +281,13 @@ DEFINSTR2(Xadd,
{8, PFPR, PMEM, "\xF2\x0F\x58", EN_RM}, /* ADDSD xmm, m64 */
)
DEFINSTR2(Xshl,
- {4|8, PGPR, P1, "\xD1", EN_R, .ext=4}, /* SHL r32/64, 1 */
- {4|8, PGPR, PI32, "\xC1", EN_RI8, .ext=4}, /* SHL r32/64, imm */
+ {4|8, PGPR, P1, "\xD1", EN_R, .ext=4}, /* SHL r32/64, 1 */
+ {4|8, PGPR, PI32, "\xC1", EN_RI8, .ext=4}, /* SHL r32/64, imm */
+ {4|8, PGPR, PRCX, "\xD3", EN_R, .ext=4}, /* SHL r32/64, CL */
+)
+DEFINSTR1(Xidiv,
+ {4|8, PGPR, 0, "\xF7", EN_R, .ext=7}, /* IDIV r32/64 */
+ {4|8, PMEM, 0, "\xF7", EN_M, .ext=7}, /* IDIV m32/64 */
)
DEFINSTR1(Xcall,
{-1, PSYM, 0, "\xE8", EN_R32}, /* CALL rel32 */
@@ -329,6 +336,13 @@ mkimmregoper(union ref r)
}
static inline struct oper
+mkdatregoper(union ref r)
+{
+ assert(isregref(r) || (r.t == RXCON && conht[r.i].deref));
+ return ref2oper(r);
+}
+
+static inline struct oper
mkimmdatregoper(union ref r)
{
assert(isregref(r) || r.t == RICON || (r.t == RXCON && (conht[r.i].cls == KI4 || conht[r.i].deref)));
@@ -454,6 +468,16 @@ emitinstr(uchar **pcode, uint *stktop, struct function *fn, struct block *blk, i
assert(dst.t == OREG && ins->reg-1 == dst.reg);
X(pcode, ksiz, dst, mkimmdatregoper(ins->r));
break;
+ case Odiv: case Orem:
+ switch (ins->cls) {
+ case KI8: B(0x48); /* REX.W */
+ case KI4: B(0x99); /* CDQ/CQO */
+ assert(mkregoper(ins->l).reg == RAX);
+ Xidiv(pcode, ksiz, mkdatregoper(ins->r));
+ break;
+ case KF4: case KF8: assert(0);
+ }
+ break;
case Omove:
dst = ref2oper(ins->l);
gencopy(pcode, ins->cls, dst, ins->r);
diff --git a/amd64/isel.c b/amd64/isel.c
index f99c8a2..e6cc692 100644
--- a/amd64/isel.c
+++ b/amd64/isel.c
@@ -6,14 +6,16 @@ static void
fixarg(struct function *fn, union ref *r, struct instr *ins, struct block *blk, int *curi)
{
int sh;
+ enum op op = ins->op;
+
if (r->t == RXCON) {
struct xcon *con = &conht[r->i];
- if (in_range(ins->op, Oshl, Oslr)) {
+ if (in_range(op, Oshl, Oslr)) {
sh = con->cls == KI4 ? con->i4 : con->i8;
goto ShiftImm;
- } else if (in_range(ins->op, Oadd, Osub) && con->i8 == 2147483648) {
+ } else if (in_range(op, Oadd, Osub) && con->i8 == 2147483648) {
/* add X, INT32MAX+1 -> sub X, INT32MIN */
- ins->op = Oadd + (ins->op == Oadd);
+ ins->op = Oadd + (op == Oadd);
*r = mkintcon(fn, KI4, -2147483648);
} else if (con->cls && (kisflt(con->cls) || con->cls == KI8)) {
/* float immediates & >32b immediates are loaded from memory */
@@ -22,8 +24,12 @@ fixarg(struct function *fn, union ref *r, struct instr *ins, struct block *blk,
if (con->cls == KI4 || con->cls == KF4) wr32le(data, con->i4);
else wr64le(data, con->i8);
*r = mkdatref(fn, siz, /*align*/siz, data, siz, /*deref*/1);
- }
- } else if (in_range(ins->op, Oshl, Oslr) && r->t == RICON) {
+ } else if (in_range(op, Odiv, Ourem) && kisint(ins->cls))
+ goto DivImm;
+ } else if (r->t == RICON && in_range(op, Odiv, Ourem) && kisint(ins->cls)) {
+ DivImm: /* there is no division by immediate, must be copied to a register */
+ *r = insertinstr(blk, (*curi)++, mkinstr(Ocopy, ins->cls, *r));
+ } else if (r->t == RICON && in_range(op, Oshl, Oslr)) {
sh = r->i;
ShiftImm: /* shift immediate is always 8bit */
*r = mkref(RICON, sh & 255);
@@ -131,8 +137,9 @@ static void
sel(struct function *fn, struct instr *ins, struct block *blk, int *curi)
{
struct instr temp = {0};
+ enum op op = ins->op;
- switch (ins->op) {
+ switch (op) {
case Oshl: case Osar: case Oslr:
if (!iscon(ins->r)) {
/* shift amount register is always CL */
@@ -144,10 +151,25 @@ sel(struct function *fn, struct instr *ins, struct block *blk, int *curi)
case Oulth: case Ougth: case Oulte: case Ougte:
if (iscon(ins->l)) {
/* lth imm, x -> gth x, imm */
- ins->op = ((ins->op - Olth) ^ 1) + Olth;
+ ins->op = ((op - Olth) ^ 1) + Olth;
rswap(ins->l, ins->r);
}
goto ALU;
+ case Odiv: case Oudiv: case Orem: case Ourem:
+ if (kisflt(ins->cls)) goto ALU;
+ /* TODO fuse div/rem pair */
+
+ /* (I)DIV dividend is always in RDX:RAX, output also in those regs */
+ insertinstr(blk, (*curi)++, mkinstr(Omove, ins->cls, mkref(RREG, RAX), ins->l));
+ /* mark RDX as clobbered. sign/zero-extending RAX into RDX is handled in emit() */
+ insertinstr(blk, (*curi)++, mkinstr(Omove, ins->cls, mkref(RREG, RDX), mkref(RREG, RDX)));
+ fixarg(fn, &ins->r, ins, blk, curi); /* make sure rhs is memory or reg */
+ ins->l = mkref(RREG, RAX);
+ insertinstr(blk, (*curi)++, *ins); /* duplicate ins to reuse tmp ref */
+ *ins = mkinstr(Ocopy, ins->cls, mkref(RREG, op < Orem ? RAX : RDX)); /* get output */
+ temp = mkinstr(Ocopy, ins->cls, mkref(RREG, op < Orem ? RDX : RAX)); /* clobber other reg*/
+ insertinstr(blk, ++(*curi), temp);
+ break;
case Osub:
if (iscon(ins->l)) {
/* sub imm, x -> sub x, imm; neg x */
@@ -168,6 +190,7 @@ sel(struct function *fn, struct instr *ins, struct block *blk, int *curi)
break;
}
}
+ /* fallthru */
case Omul: case Oumul:
case Oand: case Oxor: case Oior:
case Oequ: case Oneq:
@@ -177,8 +200,8 @@ sel(struct function *fn, struct instr *ins, struct block *blk, int *curi)
case Oneg: case Onot:
case Oexts1: case Oextu1: case Oexts2: case Oextu2: case Oexts4: case Oextu4:
ALU:
- if (!(ins->op == Oadd && kisint(ins->cls))) /* 3-address add is lea */
- if (!(in_range(ins->op, Omul, Oumul) && kisint(ins->cls) && isimm32(ins->r))) /* for (I)MUL r,r/m,imm */
+ if (!(op == Oadd && kisint(ins->cls))) /* 3-address add is lea */
+ if (!(in_range(op, Omul, Oumul) && kisint(ins->cls) && isimm32(ins->r))) /* for (I)MUL r,r/m,imm */
ins->inplace = 1;
if (ins->l.t != RTMP && ins->l.t != RREG)
ins->l = insertinstr(blk, (*curi)++, mkinstr(Ocopy, ins->cls, ins->l));
diff --git a/amd64/sysv.c b/amd64/sysv.c
index 1128cc2..7d391dc 100644
--- a/amd64/sysv.c
+++ b/amd64/sysv.c
@@ -139,7 +139,7 @@ const char amd64_rnames[][6] = {
const struct mctarg t_amd64_sysv = {
.gpr0 = RAX, .ngpr = R15 - RAX + 1,
- .spr = RSP,
+ .fpr = RBP,
.fpr0 = XMM0, .nfpr = XMM15 - XMM0 + 1,
.rcallee = {{1<<RBX | 1<<R12 | 1<<R13 | 1<<R14 | 1<<R15}},
.rglob = {{1<<RSP | 1<<RBP}},
diff --git a/common.h b/common.h
index e31d48b..ee6dee6 100644
--- a/common.h
+++ b/common.h
@@ -339,6 +339,29 @@ struct bitset { uvlong u; };
enum { BSNBIT = 8 * sizeof(uvlong) };
#define BSSIZE(nbit) ((nbit) / BSNBIT + ((nbit) % BSNBIT != 0))
+struct imapbase { short *k; struct bitset *bs; uint n, N; };
+/* map of short -> T */
+#define imap_of(T) struct { T *v; int tmp; struct imapbase mb; }
+void imap_init_(struct imapbase *, void **v, uint vsiz, uint N);
+int imap_get_(struct imapbase *, short k);
+int imap_set_(struct imapbase *, void **v, uint vsiz, short k);
+#define imap_free(m) (free((m)->mb.k), memset((m), 0, sizeof *(m)))
+#define imap_init(m, N) (imap_free(m), imap_init_(&(m)->mb, (void **)&(m)->v, sizeof*(m)->v, (N))
+#define imap_get(m, k) (((m)->tmp = imap_get_(&(m)->mb, k)) < 0 ? NULL : &(m)->v[(m)->tmp])
+#define imap_set(m, k, x) ((m)->tmp = imap_set_(&(m)->mb, (void **)&(m)->v, sizeof*(m)->v, k), \
+ (m)->v[(m)->tmp] = (x))
+
+struct pmapbase { void **k; uint n, N; };
+/* map of non-null ptr -> T */
+#define pmap_of(T) struct { T *v; int tmp; struct imapbase mb; }
+void pmap_init_(struct pmapbase *, void **v, uint vsiz, uint N);
+int pmap_get_(struct pmapbase *, void *k);
+int pmap_set_(struct pmapbase *, void **v, uint vsiz, void *k);
+#define pmap_free(m) (free((m)->mb.k), memset((m), 0, sizeof *(m)))
+#define pmap_init(m, N) (pmap_free(m), pmap_init_(&(m)->mb, (void **)&(m)->v, sizeof*(m)->v, (N))
+#define pmap_get(m, k) (((m)->tmp = pmap_get_(&(m)->mb, k)) < 0 ? NULL : &(m)->v[(m)->tmp])
+#define pmap_set(m, k, x) ((m)->v[pmap_set_(&(m)->mb, (void **)&(m)->v, sizeof*(m)->v, k)] = (x))
+
static inline bool
bstest(const struct bitset *bs, uint i)
{
diff --git a/ir.h b/ir.h
index 1c8468b..3561397 100644
--- a/ir.h
+++ b/ir.h
@@ -142,7 +142,7 @@ struct function {
struct mctarg {
short gpr0, /* first gpr */
ngpr, /* gpr count */
- spr, /* stack pointer reg */
+ fpr, /* frame pointer reg */
fpr0, /* first fpr */
nfpr; /* fpr count */
struct bitset rcallee[1], /* callee-saved */
diff --git a/mem.c b/mem.c
index 3a22a35..ea35e41 100644
--- a/mem.c
+++ b/mem.c
@@ -126,50 +126,46 @@ freearena(struct arena *ar)
}
}
-#if 0
-
-int
-map_get_(struct mapbase *m, int k)
-{
- if (!m->N) return 0;
- for (int i = k;; ++i) {
- bool notempty = bstest(m->bs, k);
- i &= m->N - 1;
- if (notempty && m->k[i] == k)
- return i;
- if (!notempty)
- return -1;
- }
-}
-
-
void
-map_init_(struct mapbase *m, void **v, uint vsiz, uint N)
+imap_init_(struct imapbase *m, void **v, uint vsiz, uint N)
{
- uint sizk = N*sizeof(int),
+ uint sizk = N*sizeof*m->k,
sizv = N*vsiz,
- sizbs = BSCOUNT(N)*sizeof(struct bitset);
+ sizbs = BSSIZE(N)*sizeof(struct bitset);
- assert(N && (N & (N - 1)) == 0);
- m->k = xcalloc(sizk + sizv + sizbs, "map_rehash");
+ assert(ispo2(N));
+ m->k = xcalloc(sizk + sizv + sizbs, "imap_init");
*v = (char *)m->k + sizk;
m->bs = (struct bitset *)((char *)*v + sizv);
m->N = N;
}
+int
+imap_get_(struct imapbase *m, short k)
+{
+ if (!m->N) return -1;
+ for (int i = k;; ++i) {
+ bool notempty = bstest(m->bs, i &= m->N - 1);
+ if (bstest(m->bs, i) && m->k[i] == k) return i;
+ if (!notempty) return -1;
+ }
+}
+
static void
-map_rehash(struct mapbase *m, void **v, uint vsiz)
+imap_rehash(struct imapbase *m, void **v, uint vsiz)
{
- int *newk, i, k, j;
+ short *newk, k;
+ int i, j;
void *newv;
struct bitset *newbs;
- uint N2 = m->N << 1,
- sizk = N2*sizeof(int),
+ uint N2 = m->N ? m->N << 1 : 16,
+ sizk = N2*sizeof*m->k,
sizv = N2*vsiz,
- sizbs = BSCOUNT(N2)*sizeof(struct bitset);
+ sizbs = BSSIZE(N2)*sizeof(struct bitset);
assert(N2);
- newk = xrealloc(NULL, sizk + sizv + sizbs, "map_rehash");
+ newk = xcalloc(sizk + sizv + sizbs, "imap_rehash");
+ memset(newk, 0, sizk + sizv + sizbs);
newv = (char *)newk + sizk;
newbs = (struct bitset *)((char *)newv + sizv);
for (i = 0; i < m->N; ++i) {
@@ -178,8 +174,8 @@ map_rehash(struct mapbase *m, void **v, uint vsiz)
j = k = m->k[i];
for (;; ++j) {
j &= N2 - 1;
- if (!bstest(newbs, i)) {
- bsset(newbs, i);
+ if (!bstest(newbs, j)) {
+ bsset(newbs, j);
m->k[j] = k;
memcpy((char *)newv + j*vsiz, (char *)*v + i*vsiz, vsiz);
break;
@@ -187,8 +183,6 @@ map_rehash(struct mapbase *m, void **v, uint vsiz)
}
}
free(m->k);
- free(*v);
- free(m->bs);
m->k = newk;
*v = newv;
m->bs = newbs;
@@ -196,16 +190,14 @@ map_rehash(struct mapbase *m, void **v, uint vsiz)
}
int
-map_set_(struct mapbase *m, void **v, uint vsiz, int k)
+imap_set_(struct imapbase *m, void **v, uint vsiz, short k)
{
- if (!m->N) return 0;
if (m->n >= m->N >> 1) {
- map_rehash(m, v, vsiz);
+ imap_rehash(m, v, vsiz);
assert(m->n < m->N);
}
for (int i = k;; ++i) {
- bool notempty = bstest(m->bs, k);
- i &= m->N - 1;
+ bool notempty = bstest(m->bs, i &= m->N - 1);
if (notempty && m->k[i] == k)
return i;
if (!notempty) {
@@ -217,6 +209,79 @@ map_set_(struct mapbase *m, void **v, uint vsiz, int k)
}
}
-#endif
+void
+pmap_init_(struct pmapbase *m, void **v, uint vsiz, uint N)
+{
+ uint sizk = N*sizeof(int),
+ sizv = N*vsiz;
+
+ assert(ispo2(N));
+ m->k = xcalloc(sizk + sizv, "pmap_init");
+ *v = (char *)m->k + sizk;
+ m->N = N;
+}
+
+int pmap_get_(struct pmapbase *m, void *k)
+{
+ assert(k && "null key");
+ if (!m->N) return -1;
+ for (int i = ptrhash(k);; ++i) {
+ i &= m->N - 1;
+ if (m->k[i] == k) return i;
+ if (!m->k[i]) return -1;
+ }
+}
+
+static void
+pmap_rehash(struct pmapbase *m, void **v, uint vsiz)
+{
+ void **newk;
+ int i, j;
+ void *k;
+ void *newv;
+ uint N2 = m->N << 1,
+ sizk = N2*sizeof*m->k,
+ sizv = N2*vsiz;
+
+ assert(N2);
+ newk = xcalloc(sizk + sizv, "pmap_rehash");
+ newv = (char *)newk + sizk;
+ for (i = 0; i < m->N; ++i) {
+ if (!m->k[i])
+ continue;
+ j = ptrhash(k = m->k[i]);
+ for (;; ++j) {
+ j &= N2 - 1;
+ if (!newk[j]) {
+ newk[j] = k;
+ memcpy((char *)newv + j*vsiz, (char *)*v + i*vsiz, vsiz);
+ break;
+ }
+ }
+ }
+ free(m->k);
+ m->k = newk;
+ *v = newv;
+ m->N = N2;
+}
+
+int pmap_set_(struct pmapbase *m, void **v, uint vsiz, void *k)
+{
+ assert(k && "null key");
+ if (m->n >= m->N >> 1) {
+ pmap_rehash(m, v, vsiz);
+ assert(m->n < m->N);
+ }
+ for (int i = ptrhash(k);; ++i) {
+ i &= m->N - 1;
+ if (m->k[i] == k)
+ return i;
+ if (!m->k[i]) {
+ m->k[i] = k;
+ ++m->n;
+ return i;
+ }
+ }
+}
/* vim:set ts=3 sw=3 expandtab: */
diff --git a/regalloc.c b/regalloc.c
index f887c59..0527228 100644
--- a/regalloc.c
+++ b/regalloc.c
@@ -1,25 +1,68 @@
+#include "common.h"
#include "ir.h"
static struct bitset globusage[1];
-static struct bitset taken[1];
+static struct bitset floatregs[1];
+
+#define isfpr(reg) bstest(floatregs, (reg))
+#define isgpr(reg) (!isfpr(reg))
+
+enum { ADEAD, AREG, ASTACK } ;
+struct alloc { ushort t : 2, a : 14; };
+#define afree() ((struct alloc) { ADEAD })
+#define areg(r) ((struct alloc) { AREG, (r) })
+#define astack(s) ((struct alloc) { ASTACK, (s) })
+
+struct rega {
+ union ref regs[MAXREGS]; /* map reg -> value holding reg */
+ imap_of(struct alloc) allocs; /* map tmpidx -> allocation */
+ int nfreegpr, nfreefpr;
+};
static void
-def(struct instr *ins)
+def(struct rega *ra, struct instr *ins)
{
- if (ins->reg)
- bsclr(taken, ins->reg - 1);
+ int reg = -1, var;
+ struct alloc *alloc;
+ if (ins->op != Omove) {
+ var = ins - instrtab;
+ // efmt("def %%%d\n",var);
+ if ((alloc = imap_get(&ra->allocs, var))) {
+ if (alloc->t == AREG) {
+ reg = alloc->a;
+ // efmt("-- free %s for %%%d\n", mctarg->rnames[alloc->a], var);
+ assert(ra->regs[reg].bits == mkref(RTMP, var).bits);
+ } else assert(0);
+ *alloc = afree();
+ }
+ } else {
+ reg = ins->l.i;
+ assert(ins->l.t == RREG);
+ // efmt("-- free %s\n", mctarg->rnames[ins->l.i]);
+ }
+ if (reg != -1) {
+ ra->regs[reg] = NOREF;
+ if (isfpr(reg)) ++ra->nfreefpr;
+ else ++ra->nfreegpr;
+ }
}
static void
-take(int r) {
- bsset(taken, r);
- bsset(globusage, r);
+take(struct rega *ra, int reg, union ref ref) {
+ // efmt("-- take %s for %d %d\n", mctarg->rnames[reg], ref.t, ref.i);
+ assert(!ra->regs[reg].t && "taken");
+ if (ref.t == RTMP)
+ imap_set(&ra->allocs, ref.i, areg(reg));
+ ra->regs[reg] = ref;
+ bsset(globusage, reg);
+ if (isfpr(reg)) --ra->nfreefpr;
+ else --ra->nfreegpr;
}
static int
-nextreg(enum irclass cls)
+nextreg(struct rega *ra, enum irclass cls, union ref ref, int excl)
{
- int r0, rend, i;
+ int r0, rend, reg;
assert(cls);
if (kisint(cls)) {
@@ -29,31 +72,77 @@ nextreg(enum irclass cls)
r0 = mctarg->fpr0;
rend = mctarg->fpr0 + mctarg->nfpr;
} else assert(0);
- for (i = r0; i < rend; ++i) {
- if (!bstest(taken, i)) {
- take(i);
- return i;
+ for (reg = r0; reg < rend; ++reg) {
+ if (bstest(mctarg->rglob, reg)) continue;
+ if (reg != excl && !ra->regs[reg].t) {
+ take(ra, reg, ref);
+ return reg;
}
}
assert(!"no more reg");
}
+#define mkmove(k, rd, rs) mkinstr(Omove, k, mkref(RREG, rd), mkref(RREG, rs))
+
+static void
+forcetake(struct rega *ra, int reg, union ref ref, struct block *blk, int curi) {
+ int var;
+ struct alloc *alloc;
+
+ if (ra->regs[reg].bits == ref.bits) return;
+ if (!ra->regs[reg].t) {
+ take(ra, reg, ref);
+ return;
+ }
+ assert(ra->regs[reg].t == RTMP);
+ var = ra->regs[reg].i;
+ alloc = imap_get(&ra->allocs, var);
+ assert(alloc && alloc->a == reg);
+ *alloc = afree();
+ /* try to move temp to another register */
+ if (isgpr(reg) ? ra->nfreegpr > 0 : ra->nfreefpr > 0) {
+ /* the register of the current instruction (if any) was already free'd (by def), so
+ * we need to explictly exclude it from the pool of rename registers
+ * e.g.: given 'R0 = copy R1'; if R1 => %x, we need to prevent renaming %x => R0
+ */
+ int excl = instrtab[blk->ins.p[curi]].reg-1;
+ int rename = nextreg(ra, isgpr(reg) ? KI4 : KF4, ra->regs[reg], excl);
+ if (ccopt.dbg.r) efmt("-- rename %%%d %s -> %s\n", var, mctarg->rnames[reg], mctarg->rnames[rename]);
+ /* introduce move from rename -> original (since we allocate backwards) */
+ insertinstr(blk, ++curi, mkmove(instrtab[var].cls, reg, rename));
+ instrtab[var].reg = rename+1;
+ ra->regs[rename] = mkref(RTMP, var);
+ bsset(globusage, rename);
+ imap_set(&ra->allocs, var, areg(rename));
+ ra->regs[reg].bits = 0;
+ } else {
+ assert(!"spill");
+ }
+ take(ra, reg, ref);
+}
+
+/* mark a use for *ref, possibly allocating a register for it, considering it won't clash with `other` */
static void
-use(struct block *blk, enum op op, int hint, union ref *ref)
+use(struct rega *ra, struct block *blk, int curi, enum op op, int hint, union ref *ref, union ref other)
{
struct instr *ins;
+ int excl = other.t == RREG ? other.i : -1;
+
if (ref->t == RMORE) {
if (op == Ophi) {
struct phi *phi = &phitab.p[ref->i];
for (int i = 0; i < phi->n; ++i)
- use(blk, 0, hint, &phi->ref[i]);
+ use(ra, blk, curi, 0, hint, &phi->ref[i], NOREF);
} else {
struct addr *addr = &addrtab.p[ref->i];
- if (addr->base.t) use(blk, 0, hint, &addr->base);
- if (addr->index.t) use(blk, 0, hint, &addr->index);
+ if (addr->base.t) use(ra, blk, curi, 0, hint, &addr->base, addr->index);
+ if (addr->index.t) use(ra, blk, curi, 0, hint, &addr->index, NOREF);
}
return;
- } else if (ref->t != RTMP) return;
+ } else if (ref->t == RREG) {
+ forcetake(ra, ref->i, *ref, blk, curi);
+ }
+ if (ref->t != RTMP) return;
ins = &instrtab[ref->i];
if (oisalloca(ins->op)) return;
@@ -62,14 +151,16 @@ use(struct block *blk, enum op op, int hint, union ref *ref)
if (op == -1) /* cond branch */
if (oiscmp(ins->op) && ref->i == blk->ins.p[blk->ins.n-1])
/* result of comparison instr is only used to conditionally branch,
- * doesn't usually need a reg (handled by isel) */
+ * doesn't usually need a reg (this should be handled by isel) */
return;
assert(ins->op != Ocall);
- if (hint != -1 && !bstest(taken, hint)) {
- take(hint);
+ if (hint == -1 && ins->op == Ocopy && ins->l.t == RREG) /* for '%x = copy Rx', hint %x to use Rx */
+ hint = ins->l.i;
+ if (hint != -1 && hint != excl && !ra->regs[hint].t) {
+ take(ra, hint, *ref);
ins->reg = hint + 1;
} else {
- ins->reg = nextreg(ins->cls) + 1;
+ ins->reg = nextreg(ra, ins->cls, *ref, excl) + 1;
}
*ref = mkref(RREG, ins->reg-1);
}
@@ -80,6 +171,12 @@ regalloc(struct function *fn)
{
struct instr *ins;
struct block *last = fn->entry->lprev, *blk;
+ struct rega ra = {0};
+ ra.nfreegpr = mctarg->ngpr - popcnt(mctarg->rglob->u);
+ ra.nfreefpr = mctarg->fpr;
+ for (int i = 0; i < MAXREGS; ++i)
+ if (in_range(i, mctarg->fpr0, mctarg->fpr0 + mctarg->nfpr - 1))
+ bsset(floatregs, i);
/* a dumb linear register allocator that visits instructions physically backwards
* starting at the end of the function, when encountering a use of a new
@@ -90,9 +187,9 @@ regalloc(struct function *fn)
do {
for (int i = 0; i < 2; ++i) {
if (!blk->jmp.arg[i].t) break;
- use(blk, (blk->jmp.t == Jb) - 1,
+ use(&ra, blk, blk->ins.n-1, (blk->jmp.t == Jb) - 1,
blk->jmp.t == Jret ? fn->abiret[i].reg : -1,
- &blk->jmp.arg[i]);
+ &blk->jmp.arg[i], blk->jmp.arg[!i]);
}
for (struct block *s = blk->s1; s; s = blk->s2) {
/* introduce necessary moves for successors' phis */
@@ -105,8 +202,7 @@ regalloc(struct function *fn)
int reg = ins->reg - 1;
assert(reg >= 0);
if (src.t != RTMP || instrtab[src.i].reg-1 != reg) /* not in the right register already */
- insertinstr(blk, blk->ins.n,
- (struct instr){Ocopy, ins->cls, reg+1, .l = src});
+ insertinstr(blk, blk->ins.n, mkinstr(Omove, ins->cls, mkref(RREG, reg), src));
break;
}
}
@@ -120,23 +216,39 @@ regalloc(struct function *fn)
*ins = mkinstr(Onop, 0,);
continue;
}
- def(ins);
- if (ins->op == Omove) {
- use(blk, ins->op, ins->l.i, &ins->r);
- } else if (ins->op != Ocall) {
+ def(&ra, ins);
+ if (ins->op != Ocall) {
if (ins->op == Ocopy) hint0 = ins->reg - 1;
- if (ins->l.t) use(blk, ins->op, hint0, &ins->l);
- if (ins->r.t) use(blk, ins->op, hint1, &ins->r);
+ if (ins->op == Omove) {
+ hint1 = ins->l.i;
+ /* MOV Rx,Rx is used by isel to indicate a clobber,
+ * so it should be a def point for Rx but not a use point */
+ if (ins->r.bits != ins->l.bits)
+ use(&ra, blk, i, ins->op, hint1, &ins->r, NOREF);
+ } else {
+ if (ins->l.t) use(&ra, blk, i, ins->op, hint0, &ins->l, ins->r);
+ if (ins->r.t) use(&ra, blk, i, ins->op, hint1, &ins->r, NOREF);
+ }
} else {
struct call *call = &calltab.p[ins->r.i];
- use(blk, ins->op, hint0, &ins->l);
+ use(&ra, blk, i, ins->op, hint0, &ins->l, NOREF);
+ }
+ if (ins->op == Ocopy && !ins->reg)
+ *ins = mkinstr(Onop, 0,);
+ if (ins->inplace && ins->reg) {
+ assert(ins->l.t == RREG);
+ if (ins->reg-1 != ins->l.i) {
+ /* an in-place operation where the destination does not
+ * match the first operand, so we need to add a move */
+ insertinstr(blk, i, mkmove(ins->cls, ins->reg-1, ins->l.i));
+ }
}
}
for (int i = blk->phi.n - 1; i >= 0; --i) {
ins = &instrtab[blk->phi.p[i]];
assert(ins->op == Ophi);
- def(ins);
- use(blk, Ophi, ins->reg - 1, &ins->l);
+ def(&ra, ins);
+ use(&ra, blk, 0, Ophi, ins->reg - 1, &ins->l, NOREF);
}
} while ((blk = blk->lprev) != last);
diff --git a/test/test3.c b/test/test3.c
index 35e5e8b..bf53396 100644
--- a/test/test3.c
+++ b/test/test3.c
@@ -8,11 +8,22 @@ int t(unsigned short *p, short i) {
return p[i];
}
-#if 1
+int shcl(int a, int b) {
+ return a << (b+1);
+}
+
+struct p { long x,y; };
+struct p divsh(int a) {
+ struct p p;
+ p.x = a << (a / 5);
+ p.y = a;
+ return p;
+}
+
+#if 0
long test(long x) {
return x + (long)"abc";
}
-#endif
double ff(double x, double y)
{
@@ -26,3 +37,4 @@ void testss() {
long fma(long x, long y) {
return x + (y <<1) - 2147483648;}
+#endif