diff options
| author | 2023-06-24 18:47:05 +0200 | |
|---|---|---|
| committer | 2023-06-24 18:47:05 +0200 | |
| commit | 19bbdfa3c7ae05f4694ce5e434d9855c6f2c3682 (patch) | |
| tree | 700ca75e92f443fcb3fed30b1078b8aedde979f9 /amd64/emit.c | |
| parent | d313c6e49bfb32ae24745e90eebe833da20efa1a (diff) | |
backend: fix regalloc to work with more complex dataflow
basically an allocation map at the beginning (in) and end (out) of each
block is kept and after the first allocation pass another pass is ran to
resolve allocation conflicts between each edge, plus another pass to
finish lowering phi functions.
also introduced `regset` and plenty of other miscellaneous fixes
Diffstat (limited to 'amd64/emit.c')
| -rw-r--r-- | amd64/emit.c | 26 |
1 files changed, 22 insertions, 4 deletions
diff --git a/amd64/emit.c b/amd64/emit.c index f354262..5b3a298 100644 --- a/amd64/emit.c +++ b/amd64/emit.c @@ -162,6 +162,7 @@ enum operpat { enum operenc { EN_R = 1, /* reg with /r */ EN_RR, /* reg, reg with /r */ + EN_RRX, /* reg, reg with /r (inverted) */ EN_MR, /* mem, reg with /r */ EN_RM, /* reg, mem with /r */ EN_M, /* mem */ @@ -249,6 +250,17 @@ encode(uchar **pcode, const struct desc *tab, int ntab, enum irclass k, struct o D(opc, nopc); B(0300 | (dst.reg & 7) << 3 | (src.reg & 7)); break; + case EN_RRX: /* mod = 11; reg = src; rm = dst */ + rex |= (src.reg >> 3) << 2; /* REX.R */ + rex |= (dst.reg >> 3) << 0; /* REX.B */ + if (rex) B(0x40 | rex); + else if (en->r8 && in_range(src.reg, RSP, RDI)) { + /* /r8 needs REX to encode SP,BP,SI,DI (otherwise -> AH..BH) */ + B(0x40); + } + D(opc, nopc); + B(0300 | (src.reg & 7) << 3 | (dst.reg & 7)); + break; case EN_MR: mem = dst; reg = src.reg; @@ -370,6 +382,8 @@ static void Xmov(uchar **pcode, enum irclass k, struct oper dst, struct oper src {8, PFPR, PFPR, "\xF2\x0F\x10", EN_RR}, /* MOVSD xmm, xmm */ {8, PFPR, PMEM, "\xF2\x0F\x10", EN_RM}, /* MOVSD xmm, m64 */ {8, PMEM, PFPR, "\xF2\x0F\x11", EN_MR}, /* MOVSS m64, xmm */ + {4|8, PGPR, PFPR, "\x66\x0F\x6E", EN_RRX}, /* MOVD/Q r64/32, xmm */ + {4|8, PFPR, PGPR, "\x66\x0F\x6E", EN_RR}, /* MOVD/Q xmm, r64/32 */ }; static const uchar k2off[] = { [KI4] = 0, @@ -583,6 +597,10 @@ static void gencopy(uchar **pcode, enum irclass cls, struct block *blk, int curi, struct oper dst, union ref val) { assert(dst.t == OREG); + if (val.bits == UNDREF.bits) { + /* can be generated by ssa construction, since value is undefined no move is needed */ + return; + } if (val.t == RADDR) { /* this is a LEA, but maybe it can be lowered to a 2-address instruction, * which may clobber flags */ @@ -824,12 +842,12 @@ emitbranch(uchar **pcode, struct block *blk) static void calleesave(int *npush, uchar **pcode, struct function *fn) { - if (bstest(fn->regusage, RBX)) { + if (rstest(fn->regusage, RBX)) { Xpush(pcode, RBX); ++*npush; } for (int r = R12; r <= R15; ++r) - if (bstest(fn->regusage, r)) { + if (rstest(fn->regusage, r)) { Xpush(pcode, r); ++*npush; } @@ -839,9 +857,9 @@ static void calleerestore(uchar **pcode, struct function *fn) { for (int r = R15; r >= R12; --r) - if (bstest(fn->regusage, r)) + if (rstest(fn->regusage, r)) Xpop(pcode, r); - if (bstest(fn->regusage, RBX)) Xpop(pcode, RBX); + if (rstest(fn->regusage, RBX)) Xpop(pcode, RBX); } /* align code using NOPs */ |