aboutsummaryrefslogtreecommitdiffhomepage
path: root/amd64/emit.c
diff options
context:
space:
mode:
authorlemon <lsof@mailbox.org>2023-06-24 18:47:05 +0200
committerlemon <lsof@mailbox.org>2023-06-24 18:47:05 +0200
commit19bbdfa3c7ae05f4694ce5e434d9855c6f2c3682 (patch)
tree700ca75e92f443fcb3fed30b1078b8aedde979f9 /amd64/emit.c
parentd313c6e49bfb32ae24745e90eebe833da20efa1a (diff)
backend: fix regalloc to work with more complex dataflow
basically an allocation map at the beginning (in) and end (out) of each block is kept and after the first allocation pass another pass is ran to resolve allocation conflicts between each edge, plus another pass to finish lowering phi functions. also introduced `regset` and plenty of other miscellaneous fixes
Diffstat (limited to 'amd64/emit.c')
-rw-r--r--amd64/emit.c26
1 files changed, 22 insertions, 4 deletions
diff --git a/amd64/emit.c b/amd64/emit.c
index f354262..5b3a298 100644
--- a/amd64/emit.c
+++ b/amd64/emit.c
@@ -162,6 +162,7 @@ enum operpat {
enum operenc {
EN_R = 1, /* reg with /r */
EN_RR, /* reg, reg with /r */
+ EN_RRX, /* reg, reg with /r (inverted) */
EN_MR, /* mem, reg with /r */
EN_RM, /* reg, mem with /r */
EN_M, /* mem */
@@ -249,6 +250,17 @@ encode(uchar **pcode, const struct desc *tab, int ntab, enum irclass k, struct o
D(opc, nopc);
B(0300 | (dst.reg & 7) << 3 | (src.reg & 7));
break;
+ case EN_RRX: /* mod = 11; reg = src; rm = dst */
+ rex |= (src.reg >> 3) << 2; /* REX.R */
+ rex |= (dst.reg >> 3) << 0; /* REX.B */
+ if (rex) B(0x40 | rex);
+ else if (en->r8 && in_range(src.reg, RSP, RDI)) {
+ /* /r8 needs REX to encode SP,BP,SI,DI (otherwise -> AH..BH) */
+ B(0x40);
+ }
+ D(opc, nopc);
+ B(0300 | (src.reg & 7) << 3 | (dst.reg & 7));
+ break;
case EN_MR:
mem = dst;
reg = src.reg;
@@ -370,6 +382,8 @@ static void Xmov(uchar **pcode, enum irclass k, struct oper dst, struct oper src
{8, PFPR, PFPR, "\xF2\x0F\x10", EN_RR}, /* MOVSD xmm, xmm */
{8, PFPR, PMEM, "\xF2\x0F\x10", EN_RM}, /* MOVSD xmm, m64 */
{8, PMEM, PFPR, "\xF2\x0F\x11", EN_MR}, /* MOVSS m64, xmm */
+ {4|8, PGPR, PFPR, "\x66\x0F\x6E", EN_RRX}, /* MOVD/Q r64/32, xmm */
+ {4|8, PFPR, PGPR, "\x66\x0F\x6E", EN_RR}, /* MOVD/Q xmm, r64/32 */
};
static const uchar k2off[] = {
[KI4] = 0,
@@ -583,6 +597,10 @@ static void
gencopy(uchar **pcode, enum irclass cls, struct block *blk, int curi, struct oper dst, union ref val)
{
assert(dst.t == OREG);
+ if (val.bits == UNDREF.bits) {
+ /* can be generated by ssa construction, since value is undefined no move is needed */
+ return;
+ }
if (val.t == RADDR) {
/* this is a LEA, but maybe it can be lowered to a 2-address instruction,
* which may clobber flags */
@@ -824,12 +842,12 @@ emitbranch(uchar **pcode, struct block *blk)
static void
calleesave(int *npush, uchar **pcode, struct function *fn)
{
- if (bstest(fn->regusage, RBX)) {
+ if (rstest(fn->regusage, RBX)) {
Xpush(pcode, RBX);
++*npush;
}
for (int r = R12; r <= R15; ++r)
- if (bstest(fn->regusage, r)) {
+ if (rstest(fn->regusage, r)) {
Xpush(pcode, r);
++*npush;
}
@@ -839,9 +857,9 @@ static void
calleerestore(uchar **pcode, struct function *fn)
{
for (int r = R15; r >= R12; --r)
- if (bstest(fn->regusage, r))
+ if (rstest(fn->regusage, r))
Xpop(pcode, r);
- if (bstest(fn->regusage, RBX)) Xpop(pcode, RBX);
+ if (rstest(fn->regusage, RBX)) Xpop(pcode, RBX);
}
/* align code using NOPs */