diff options
| author | 2023-06-24 18:47:05 +0200 | |
|---|---|---|
| committer | 2023-06-24 18:47:05 +0200 | |
| commit | 19bbdfa3c7ae05f4694ce5e434d9855c6f2c3682 (patch) | |
| tree | 700ca75e92f443fcb3fed30b1078b8aedde979f9 /amd64 | |
| parent | d313c6e49bfb32ae24745e90eebe833da20efa1a (diff) | |
backend: fix regalloc to work with more complex dataflow
basically an allocation map at the beginning (in) and end (out) of each
block is kept and after the first allocation pass another pass is ran to
resolve allocation conflicts between each edge, plus another pass to
finish lowering phi functions.
also introduced `regset` and plenty of other miscellaneous fixes
Diffstat (limited to 'amd64')
| -rw-r--r-- | amd64/emit.c | 26 | ||||
| -rw-r--r-- | amd64/isel.c | 2 | ||||
| -rw-r--r-- | amd64/sysv.c | 4 |
3 files changed, 25 insertions, 7 deletions
diff --git a/amd64/emit.c b/amd64/emit.c index f354262..5b3a298 100644 --- a/amd64/emit.c +++ b/amd64/emit.c @@ -162,6 +162,7 @@ enum operpat { enum operenc { EN_R = 1, /* reg with /r */ EN_RR, /* reg, reg with /r */ + EN_RRX, /* reg, reg with /r (inverted) */ EN_MR, /* mem, reg with /r */ EN_RM, /* reg, mem with /r */ EN_M, /* mem */ @@ -249,6 +250,17 @@ encode(uchar **pcode, const struct desc *tab, int ntab, enum irclass k, struct o D(opc, nopc); B(0300 | (dst.reg & 7) << 3 | (src.reg & 7)); break; + case EN_RRX: /* mod = 11; reg = src; rm = dst */ + rex |= (src.reg >> 3) << 2; /* REX.R */ + rex |= (dst.reg >> 3) << 0; /* REX.B */ + if (rex) B(0x40 | rex); + else if (en->r8 && in_range(src.reg, RSP, RDI)) { + /* /r8 needs REX to encode SP,BP,SI,DI (otherwise -> AH..BH) */ + B(0x40); + } + D(opc, nopc); + B(0300 | (src.reg & 7) << 3 | (dst.reg & 7)); + break; case EN_MR: mem = dst; reg = src.reg; @@ -370,6 +382,8 @@ static void Xmov(uchar **pcode, enum irclass k, struct oper dst, struct oper src {8, PFPR, PFPR, "\xF2\x0F\x10", EN_RR}, /* MOVSD xmm, xmm */ {8, PFPR, PMEM, "\xF2\x0F\x10", EN_RM}, /* MOVSD xmm, m64 */ {8, PMEM, PFPR, "\xF2\x0F\x11", EN_MR}, /* MOVSS m64, xmm */ + {4|8, PGPR, PFPR, "\x66\x0F\x6E", EN_RRX}, /* MOVD/Q r64/32, xmm */ + {4|8, PFPR, PGPR, "\x66\x0F\x6E", EN_RR}, /* MOVD/Q xmm, r64/32 */ }; static const uchar k2off[] = { [KI4] = 0, @@ -583,6 +597,10 @@ static void gencopy(uchar **pcode, enum irclass cls, struct block *blk, int curi, struct oper dst, union ref val) { assert(dst.t == OREG); + if (val.bits == UNDREF.bits) { + /* can be generated by ssa construction, since value is undefined no move is needed */ + return; + } if (val.t == RADDR) { /* this is a LEA, but maybe it can be lowered to a 2-address instruction, * which may clobber flags */ @@ -824,12 +842,12 @@ emitbranch(uchar **pcode, struct block *blk) static void calleesave(int *npush, uchar **pcode, struct function *fn) { - if (bstest(fn->regusage, RBX)) { + if (rstest(fn->regusage, RBX)) { Xpush(pcode, RBX); ++*npush; } for (int r = R12; r <= R15; ++r) - if (bstest(fn->regusage, r)) { + if (rstest(fn->regusage, r)) { Xpush(pcode, r); ++*npush; } @@ -839,9 +857,9 @@ static void calleerestore(uchar **pcode, struct function *fn) { for (int r = R15; r >= R12; --r) - if (bstest(fn->regusage, r)) + if (rstest(fn->regusage, r)) Xpop(pcode, r); - if (bstest(fn->regusage, RBX)) Xpop(pcode, RBX); + if (rstest(fn->regusage, RBX)) Xpop(pcode, RBX); } /* align code using NOPs */ diff --git a/amd64/isel.c b/amd64/isel.c index cb87b7d..07115ac 100644 --- a/amd64/isel.c +++ b/amd64/isel.c @@ -187,7 +187,7 @@ aadd(struct addr *addr, union ref r) } else if (r.t == RREG) { /* temporaries are single assignment, but register aren't, so they can't be * * safely hoisted into an address value, unless they have global lifetime */ - if (!bstest(mctarg->rglob, r.i)) return 0; + if (!rstest(mctarg->rglob, r.i)) return 0; Ref: if (!addr->base.bits) addr->base = r; else if (!addr->index.bits) addr->index = r; diff --git a/amd64/sysv.c b/amd64/sysv.c index 9c7bc15..6c5b67c 100644 --- a/amd64/sysv.c +++ b/amd64/sysv.c @@ -141,8 +141,8 @@ const struct mctarg t_amd64_sysv = { .gpr0 = RAX, .ngpr = R15 - RAX + 1, .bpr = RBP, .fpr0 = XMM0, .nfpr = XMM15 - XMM0 + 1, - .rcallee = {{1<<RBX | 1<<R12 | 1<<R13 | 1<<R14 | 1<<R15}}, - .rglob = {{1<<RSP | 1<<RBP}}, + .rcallee = 1<<RBX | 1<<R12 | 1<<R13 | 1<<R14 | 1<<R15, + .rglob = 1<<RSP | 1<<RBP, .rnames = amd64_rnames, .objkind = OBJELF, .isa = ISamd64, |