From 19bbdfa3c7ae05f4694ce5e434d9855c6f2c3682 Mon Sep 17 00:00:00 2001 From: lemon Date: Sat, 24 Jun 2023 18:47:05 +0200 Subject: backend: fix regalloc to work with more complex dataflow basically an allocation map at the beginning (in) and end (out) of each block is kept and after the first allocation pass another pass is ran to resolve allocation conflicts between each edge, plus another pass to finish lowering phi functions. also introduced `regset` and plenty of other miscellaneous fixes --- amd64/emit.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) (limited to 'amd64/emit.c') diff --git a/amd64/emit.c b/amd64/emit.c index f354262..5b3a298 100644 --- a/amd64/emit.c +++ b/amd64/emit.c @@ -162,6 +162,7 @@ enum operpat { enum operenc { EN_R = 1, /* reg with /r */ EN_RR, /* reg, reg with /r */ + EN_RRX, /* reg, reg with /r (inverted) */ EN_MR, /* mem, reg with /r */ EN_RM, /* reg, mem with /r */ EN_M, /* mem */ @@ -249,6 +250,17 @@ encode(uchar **pcode, const struct desc *tab, int ntab, enum irclass k, struct o D(opc, nopc); B(0300 | (dst.reg & 7) << 3 | (src.reg & 7)); break; + case EN_RRX: /* mod = 11; reg = src; rm = dst */ + rex |= (src.reg >> 3) << 2; /* REX.R */ + rex |= (dst.reg >> 3) << 0; /* REX.B */ + if (rex) B(0x40 | rex); + else if (en->r8 && in_range(src.reg, RSP, RDI)) { + /* /r8 needs REX to encode SP,BP,SI,DI (otherwise -> AH..BH) */ + B(0x40); + } + D(opc, nopc); + B(0300 | (src.reg & 7) << 3 | (dst.reg & 7)); + break; case EN_MR: mem = dst; reg = src.reg; @@ -370,6 +382,8 @@ static void Xmov(uchar **pcode, enum irclass k, struct oper dst, struct oper src {8, PFPR, PFPR, "\xF2\x0F\x10", EN_RR}, /* MOVSD xmm, xmm */ {8, PFPR, PMEM, "\xF2\x0F\x10", EN_RM}, /* MOVSD xmm, m64 */ {8, PMEM, PFPR, "\xF2\x0F\x11", EN_MR}, /* MOVSS m64, xmm */ + {4|8, PGPR, PFPR, "\x66\x0F\x6E", EN_RRX}, /* MOVD/Q r64/32, xmm */ + {4|8, PFPR, PGPR, "\x66\x0F\x6E", EN_RR}, /* MOVD/Q xmm, r64/32 */ }; static const uchar k2off[] = { [KI4] = 0, @@ -583,6 +597,10 @@ static void gencopy(uchar **pcode, enum irclass cls, struct block *blk, int curi, struct oper dst, union ref val) { assert(dst.t == OREG); + if (val.bits == UNDREF.bits) { + /* can be generated by ssa construction, since value is undefined no move is needed */ + return; + } if (val.t == RADDR) { /* this is a LEA, but maybe it can be lowered to a 2-address instruction, * which may clobber flags */ @@ -824,12 +842,12 @@ emitbranch(uchar **pcode, struct block *blk) static void calleesave(int *npush, uchar **pcode, struct function *fn) { - if (bstest(fn->regusage, RBX)) { + if (rstest(fn->regusage, RBX)) { Xpush(pcode, RBX); ++*npush; } for (int r = R12; r <= R15; ++r) - if (bstest(fn->regusage, r)) { + if (rstest(fn->regusage, r)) { Xpush(pcode, r); ++*npush; } @@ -839,9 +857,9 @@ static void calleerestore(uchar **pcode, struct function *fn) { for (int r = R15; r >= R12; --r) - if (bstest(fn->regusage, r)) + if (rstest(fn->regusage, r)) Xpop(pcode, r); - if (bstest(fn->regusage, RBX)) Xpop(pcode, RBX); + if (rstest(fn->regusage, RBX)) Xpop(pcode, RBX); } /* align code using NOPs */ -- cgit v1.2.3