aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--src/ir_abi0.c19
-rw-r--r--src/ir_dump.c2
-rw-r--r--src/ir_regalloc.c4
-rw-r--r--src/ir_stack.c2
-rw-r--r--src/t_aarch64_aapcs.c12
-rw-r--r--src/t_aarch64_emit.c120
-rw-r--r--src/t_aarch64_isel.c41
-rw-r--r--src/t_x86-64_emit.c111
-rw-r--r--src/t_x86-64_isel.c30
-rw-r--r--src/t_x86-64_sysv.c2
10 files changed, 204 insertions, 139 deletions
diff --git a/src/ir_abi0.c b/src/ir_abi0.c
index dd8bc40..b8ae90f 100644
--- a/src/ir_abi0.c
+++ b/src/ir_abi0.c
@@ -91,18 +91,18 @@ static void
patchparam(Function *fn, int *curi, int *param, int tydat, int nabi, ABIArg abi[2], uchar r2off)
{
Block *blk = fn->entry;
- assert(in_range(nabi,1,2));
for (; *curi < blk->ins.n; ++*curi) {
Instr *ins = &instrtab[blk->ins.p[*curi]];
if (ins->op != Oparam) continue;
assert(ins->r.t == RTYPE
&& ins->r.i == (tydat < 0 ? abi[0].ty : (IRType){.isagg=1, .dat=tydat}).bits);
- if (abi[0].ty.isagg || tydat < 0) {
+ if (abi[0].ty.isagg || tydat < 0 || abi[0].ty.bits == cls2type(KPTR).bits) {
/* aggregate in stack or scalar, just copy */
- assert(nabi == 1);
+ assert(nabi < 2);
*ins = copyparam(fn, curi, *param, abi[0]);
} else { /* aggregate in registers, materialize */
+ assert(nabi >= 1);
Ref alloc, r[2];
Instr st;
const TypeData *td;
@@ -120,13 +120,13 @@ patchparam(Function *fn, int *curi, int *param, int tydat, int nabi, ABIArg abi[
if (nabi > 1)
r[1] = insertinstr(blk, ++*curi, copyparam(fn, NULL, ++*param, abi[1]));
/* transform
- * %x = copy %p
+ * %x = param %p
* into
* %x = alloca...
* store* %x, %a
* store* %x + N, %b
*/
- st = mkinstr2(cls2store[abi[0].ty.cls], 0, alloc, r[0]);
+ st = mkinstr2(cls2store[abi[0].ty.isagg ? KPTR : abi[0].ty.cls], 0, alloc, r[0]);
insertinstr(blk, ++*curi, st);
if (nabi > 1) {
Instr tmp = mkinstr2(Oadd, KPTR, alloc, mkref(RICON, r2off));
@@ -174,7 +174,6 @@ load2regs(Ref out[2], IRType typ, Ref src, int nabi, ABIArg abi[2], uchar r2off,
ins.l = insertinstr(blk, (*curi)++, adr);
}
temp = insertinstr(blk, (*curi)++, ins);
- //insertinstr(blk, (*curi)++, mkarginstr(abi[i].ty, temp));
out[i] = temp;
}
} else {
@@ -200,7 +199,6 @@ load2regs(Ref out[2], IRType typ, Ref src, int nabi, ABIArg abi[2], uchar r2off,
reg = temp;
}
}
- //insertinstr(blk, arginst++, mkarginstr(abi[i].ty, reg));
out[i] = reg;
}
}
@@ -215,6 +213,7 @@ patcharg(Block *blk, int *icall, IRCall *call,
assert(arg->op == Oarg && arg->l.t == RTYPE);
if (ref2type(arg->l).isagg) { /* aggregate argument */
if (abi[0].ty.isagg) { /* aggregate in stack */
+ assert(nabi == 0);
/* XXX do this better.. */
/* ptr %dst = arg <stk dst> */
/* (blit %dst, %src) */
@@ -236,9 +235,12 @@ patcharg(Block *blk, int *icall, IRCall *call,
*icall = arginst + (call->narg - argidx);
return 1;
} else if (abi[0].ty.cls == KPTR) { /* aggregate by pointer */
+ /* XXX make a copy */
+ assert(nabi == -1 || nabi == 1);
arg->cls = KPTR;
return 1;
} else { /* aggregate in registers */
+ assert(nabi > 0);
Ref r[2];
IRType typ = ref2type(arg->l);
delinstr(blk, arginst);
@@ -249,6 +251,7 @@ patcharg(Block *blk, int *icall, IRCall *call,
return nabi;
}
} else { /* normal scalar argument */
+ assert(nabi >= 0);
return 1;
}
}
@@ -383,7 +386,7 @@ abi0(Function *fn)
int first = abiargs.n;
uchar r2off;
int ret = abiarg(&abiargs, &r2off, &ni, &nf, &ns, pty);
- patchparam(fn, &istart, &param, pty.isagg ? pty.dat : -1, ret+!ret, &abiargs.p[first], r2off);
+ patchparam(fn, &istart, &param, pty.isagg ? pty.dat : -1, ret, &abiargs.p[first], r2off);
}
fn->abiarg = alloccopy(fn->arena, abiargs.p, abiargs.n * sizeof *abiargs.p, 0);
fn->nabiarg = abiargs.n;
diff --git a/src/ir_dump.c b/src/ir_dump.c
index 4c18a70..b66fd95 100644
--- a/src/ir_dump.c
+++ b/src/ir_dump.c
@@ -168,7 +168,7 @@ dumpref(enum op o, Ref ref)
}
break;
case RSTACK:
- bfmt(out, "[stack %d]", ref.i);
+ bfmt(out, "stack(%d)", ref.i);
break;
default: assert(!"ref");
}
diff --git a/src/ir_regalloc.c b/src/ir_regalloc.c
index 31f03c2..26b90a6 100644
--- a/src/ir_regalloc.c
+++ b/src/ir_regalloc.c
@@ -111,9 +111,7 @@ typedef struct RegAlloc {
stktop;
} RegAlloc;
-#define stkslotref(fn, off) \
- mkaddr((IRAddr){.base = mkref(RREG, mctarg->bpr), \
- .disp = -(fn)->stksiz - 8 - (off)})
+#define stkslotref(fn, off) mkref(RSTACK, (fn)->stksiz + (off))
/* Parallel moves algorithm from QBE
* <https://c9x.me/git/qbe.git/tree/rega.c?id=e493a7f23352f51acc0a1e12284ab19d7894488a#n201> */
diff --git a/src/ir_stack.c b/src/ir_stack.c
index ff49805..a9acc61 100644
--- a/src/ir_stack.c
+++ b/src/ir_stack.c
@@ -19,7 +19,7 @@ lowerstack(Function *fn)
fn->stksiz = alignup(fn->stksiz, 1 << alignlog2);
if (fn->stksiz > (1<<20)-1) error(NULL, "'%s' stack frame too big", fn->name);
*ins = mkinstr0(Onop,0);
- replcuses(mkref(RTMP, t), mkref(RSTACK, fn->stksiz));
+ replcuses(mkref(RTMP, t), mkref(RSTACK, fn->stksiz-siz));
}
}
} while ((blk = blk->lnext) != fn->entry);
diff --git a/src/t_aarch64_aapcs.c b/src/t_aarch64_aapcs.c
index a321f5b..1cf3a61 100644
--- a/src/t_aarch64_aapcs.c
+++ b/src/t_aarch64_aapcs.c
@@ -93,7 +93,7 @@ abiarg(short r[2], uchar cls[2], uchar *r2off, int *ni, int *nf, int *ns, IRType
if (n <= NFLT - *nf) {
for (int i = 0; i < n; ++i) {
r[i] = V(0) + *nf + i;
- cls[i] = type2cls[k];
+ cls[i] = k;
}
*nf += n;
} else { /* stack */
@@ -121,17 +121,17 @@ abiarg(short r[2], uchar cls[2], uchar *r2off, int *ni, int *nf, int *ns, IRType
}
static int
-abiret(short r[2], uchar cls[2], uchar *r2off, int *ni, IRType typ)
+abiret(short r[2], uchar cls[2], uchar *r2off, int *_ni, IRType typ)
{
if (!typ.isagg) {
r[0] = kisflt(cls[0] = typ.cls) ? V(0) : R0;
return 1;
}
- int nf = 0, ns = 0;
- int ret = abiarg(r, cls, r2off, ni, &nf, &ns, typ);
- if (ret) return ret;
+ int ni = 0, nf = 0, ns = 0;
+ int ret = abiarg(r, cls, r2off, &ni, &nf, &ns, typ);
+ if (ret && cls[0] != KPTR) /* in regs */
+ return ret;
/* caller-allocated result address in x8 */
- assert(*ni == 0);
r[0] = -1;
r[1] = R(8);
return 0;
diff --git a/src/t_aarch64_emit.c b/src/t_aarch64_emit.c
index 2f80b3a..799b388 100644
--- a/src/t_aarch64_emit.c
+++ b/src/t_aarch64_emit.c
@@ -51,6 +51,24 @@ static inline bool usegot(int c)
&& (con->flag & (SLOCAL|SFUNC)) != (SLOCAL|SFUNC);
}
+typedef struct Frame {
+ regset save;
+ struct RPair { uchar a,b; } pairs[10];
+ uchar single[3];
+ uint nfpairs, ngpairs;
+ bool usefp;
+ int size;
+} Frame;
+
+static Frame frame;
+
+static int
+stackdisp(int i)
+{
+ return i < 0 ? frame.size - i - 8 /* arg */
+ : frame.size - i + 16*frame.usefp;
+}
+
static Oper
mkmemoper(uint msiz, Ref r)
{
@@ -59,6 +77,9 @@ mkmemoper(uint msiz, Ref r)
return mkoper(OMEM, .m = {AIMMIDX, .base = instrtab[r.i].reg-1});
} else if (r.t == RREG) {
return mkoper(OMEM, .m = {AIMMIDX, .base = r.i});
+ } else if (r.t == RSTACK) {
+ int disp = stackdisp(r.i);
+ return mkoper(OMEM, .m = {AIMMIDX, .base = frame.usefp ? FP : SP, .disp = disp});
} else if (isaddrcon(r,1)) {
return mkoper(OSYM + usegot(r.i), .con = r.i,);
} else if (r.t == RADDR) {
@@ -68,10 +89,19 @@ mkmemoper(uint msiz, Ref r)
assert(!addr->index.bits);
return mkoper(OSYM + usegot(addr->base.i), .con = addr->base.i, .cdisp = addr->disp);
}
- assert(addr->base.t == RREG);
if (!addr->index.bits) {
- return mkoper(OMEM, .m = {.mode = AIMMIDX, .base = addr->base.i, .disp = addr->disp});
+ int base, disp;
+ if (addr->base.t == RREG) {
+ base = addr->base.i;
+ disp = 0;
+ } else if (addr->base.t == RSTACK) {
+ base = frame.usefp ? FP : SP;
+ disp = stackdisp(addr->base.i);
+ } else assert(0);
+ disp += addr->disp;
+ return mkoper(OMEM, .m = {.mode = AIMMIDX, .base = base, .disp = disp});
} else {
+ assert(addr->base.t == RREG);
assert(addr->index.t == RREG);
assert(addr->shift == 0 || 1<<addr->shift == msiz);
return mkoper(OMEM, .m = {
@@ -100,7 +130,7 @@ ref2oper(Ref r)
assert(contab.p[r.i].f == 0.0);
return mkoper(OIMM, .imm = 0);
} else if (!contab.p[r.i].cls) {
- case RADDR:
+ case RSTACK: case RADDR:
return mkmemoper(0, r);
}
assert(0);
@@ -209,8 +239,6 @@ opermatch(enum operpat pat, enum irclass k, Oper o)
static uchar *fnstart;
static internstr curfnsym;
-static bool usefp;
-static int rbpoff;
/* Given an instruction description table, find the first entry that matches
* the operands and encode it. */
@@ -582,6 +610,9 @@ gencopy(uchar **pcode, enum irclass cls, Block *blk, int curi, Oper dst, Ref val
}
}
return;
+ } else if (val.t == RSTACK) {
+ Xadd(pcode, cls, dst, reg2oper(FP), mkoper(OIMM, .imm = stackdisp(val.i)));
+ return;
}
src = ref2oper(val);
if (opermatch(PGPRZ, cls, src) && kisint(cls)) {
@@ -895,20 +926,12 @@ emitinstr(uchar **pcode, Function *fn, Block *blk, int curi, Instr *ins)
}
}
-typedef struct Frame {
- regset save;
- struct RPair { uchar a,b; } pairs[10];
- uchar single[2];
- uint nfpairs, ngpairs;
-} Frame;
-
static void
prologue(uchar **pcode, Frame *frame, Function *fn)
{
*frame = (Frame){0};
- regset save = frame->save = (fn->regusage & mctarg->rcallee) | (usefp * BIT(FP));
+ regset save = frame->save = fn->regusage & mctarg->rcallee;
if (save) {
- save = rsset(&frame->save, LR);
int prev = 0;
struct RPair *p = frame->pairs;
for (uint reg = V(8); reg <= V(15); ++reg) {
@@ -919,21 +942,16 @@ prologue(uchar **pcode, Frame *frame, Function *fn)
prev = 0;
} else prev = reg;
}
- uint ngpr = popcnt(save & (BIT(32)-1));
+ uint ngpr = popcnt(save & (BIT(30)-1));
if (prev) {
+ frame->single[0] = prev;
if (ngpr & 1) {
- frame->single[0] = prev;
frame->single[1] = prev = lowestsetbit(save);
rsclr(&save, prev);
- } else {
- *p++ = (struct RPair) {prev, V(0)};
- ++frame->nfpairs;
}
prev = 0;
- } else if (ngpr & 1) {
- prev = 0x100;
}
- for (uint reg = R(19); reg <= LR; ++reg) {
+ for (uint reg = R(19); reg < FP; ++reg) {
if (!rstest(save, reg)) continue;
if (prev) {
*p++ = (struct RPair) {prev, reg};
@@ -941,39 +959,64 @@ prologue(uchar **pcode, Frame *frame, Function *fn)
prev = 0;
} else prev = reg;
}
- assert(!prev);
+ if (prev) frame->single[2] = prev;
p = frame->pairs;
Oper adr = mkoper(OMEM, .m = {.mode = APREIDX, .base = SP, .disp = -16});
- for (int i = 0; i < frame->nfpairs; ++i, ++p)
+ for (int i = 0; i < frame->nfpairs; ++i, ++p) {
Xfstp(pcode, KF64, reg2oper(p->a), reg2oper(p->b), adr);
+ frame->size += 16;
+ }
adr.m.disp = -8;
- if (frame->single[0]) Xfstr(pcode, KF64, reg2oper(frame->single[0]), adr);
- if (frame->single[1]) Xstr(pcode, KPTR, reg2oper(frame->single[1]), adr);
+ int rx;
+ if ((rx = frame->single[0])) {
+ Xfstr(pcode, KF64, reg2oper(rx), adr);
+ frame->size += 8;
+ }
+ if ((rx = frame->single[1])) {
+ Xstr(pcode, KPTR, reg2oper(rx), adr);
+ frame->size += 8;
+ }
adr.m.disp = -16;
- for (int i = 0; i < frame->ngpairs; ++i, ++p)
+ for (int i = 0; i < frame->ngpairs; ++i, ++p) {
Xstp(pcode, KPTR, reg2oper(p->a), reg2oper(p->b), adr);
+ frame->size += 16;
+ }
+ adr.m.disp = -8;
+ if ((rx = frame->single[2])) {
+ Xstr(pcode, KPTR, reg2oper(rx), adr);
+ frame->size += 8;
+ }
}
-
- if (usefp) /* MOV x29, sp */
- Xadd(pcode, KPTR, reg2oper(FP), reg2oper(SP), mkoper(OIMM,));
-
/* ensure stack is 16-byte aligned for function calls */
- if (!fn->isleaf && ((fn->stksiz) & 0xF) != 0) {
- assert(usefp);
- rbpoff -= 8;
+ if (!fn->isleaf && ((fn->stksiz + frame->size) & 0xF) != 0) {
fn->stksiz += 8;
}
- if (fn->stksiz) Xsub(pcode, KPTR, reg2oper(SP), reg2oper(SP), mkoper(OIMM, .imm = fn->stksiz));
+ frame->size += fn->stksiz;
+ if ((frame->usefp = !fn->isleaf)) {
+ frame->size += 16;
+ Oper adr = mkoper(OMEM, .m = {.mode = APREIDX, .base = SP, .disp = -16 - fn->stksiz});
+ Xstp(pcode, KPTR, reg2oper(FP), reg2oper(LR), adr);
+ Xadd(pcode, KPTR, reg2oper(R(29)), reg2oper(SP), mkoper(OIMM, {0})); /* MOV x29,sp */
+ } else if (fn->stksiz) {
+ Xsub(pcode, KPTR, reg2oper(SP), reg2oper(SP), mkoper(OIMM, .imm = fn->stksiz));
+ }
}
static void
epilogue(uchar **pcode, Function *fn, Frame *frame)
{
- if (fn->stksiz) Xadd(pcode, KPTR, reg2oper(SP), reg2oper(SP), mkoper(OIMM, .imm = fn->stksiz));
+ Oper adr = mkoper(OMEM, .m = {.mode = APOSTIDX, .base = SP, .disp = 16+fn->stksiz});
+ if (frame->usefp) {
+ Xldp(pcode, KPTR, reg2oper(FP), reg2oper(LR), adr);
+ } else if (fn->stksiz) {
+ Xadd(pcode, KPTR, reg2oper(SP), reg2oper(SP), mkoper(OIMM, .imm = fn->stksiz));
+ }
if (frame->save) {
struct RPair *p = frame->pairs + frame->nfpairs + frame->ngpairs - 1;
- Oper adr = mkoper(OMEM, .m = {.mode = APOSTIDX, .base = SP, .disp = 16});
+ adr.m.disp = 8;
+ if (frame->single[2]) Xldr(pcode, KF64, reg2oper(frame->single[2]), adr);
+ adr.m.disp = 16;
for (int i = 0; i < frame->ngpairs; ++i, --p)
Xldp(pcode, KPTR, reg2oper(p->a), reg2oper(p->b), adr);
adr.m.disp = 8;
@@ -997,9 +1040,6 @@ emitbin(Function *fn)
/** prologue **/
- /* only use frame pointer in non-leaf functions and functions that use the stack */
- usefp = !fn->isleaf || fn->stksiz;
- Frame frame;
prologue(pcode, &frame, fn);
if (*pcode - fnstart > 8) {
diff --git a/src/t_aarch64_isel.c b/src/t_aarch64_isel.c
index 4490831..58d9377 100644
--- a/src/t_aarch64_isel.c
+++ b/src/t_aarch64_isel.c
@@ -73,7 +73,7 @@ static void fixarg(Ref *r, Instr *ins, Block *blk, int *curi);
static void
regarg(Ref *r, enum irclass k, Block *blk, int *curi)
{
- if (r->t != RTMP) {
+ if (r->t != RTMP && r->t != RREG) {
*r = insertinstr(blk, (*curi)++, mkinstr1(Ocopy, k, *r));
if (kisflt(k) || instrtab[r->i].l.t == RSTACK) {
int iprev = *curi-1;
@@ -90,7 +90,12 @@ fixarg(Ref *r, Instr *ins, Block *blk, int *curi)
if (isintcon(*r)) {
s64int x = intconval(*r);
switch (op) {
- case Ocopy: return;
+ case Ocopy: case Omove:
+ if (kisint(ins->cls))
+ return;
+ case Oarg:
+ if (ref2type(ins->l).isagg || !kisflt(ref2type(ins->l).cls))
+ return;
default:
if (oiscmp(op)) {
case Oadd: case Osub:
@@ -108,7 +113,8 @@ fixarg(Ref *r, Instr *ins, Block *blk, int *curi)
goto Reg;
} else if (isfltcon(*r)) {
enum irclass k = concls(*r), ki = KI32 + k-KF32;
- if (contab.p[r->i].f != 0.0) {
+ /* allow positive zero (copy from rzr) */
+ if (contab.p[r->i].i != 0) {
union {
s64int i64;
int i32;
@@ -125,17 +131,17 @@ fixarg(Ref *r, Instr *ins, Block *blk, int *curi)
}
Ref gpr = insertinstr(blk, (*curi)++, mkinstr1(Ocopy, ki, mkintcon(ki, i)));
*r = insertinstr(blk, (*curi)++, mkinstr1(Ocopy, k, gpr));
- } else if (oiscmp(op)) {
+ } else if (oiscmp(op) || ((op == Ocopy || op == Omove) && kisflt(ins->cls))) {
+ return;
+ } else if (op == Oarg && !ref2type(ins->l).isagg && kisflt(ref2type(ins->l).cls)) {
return;
} else {
*r = insertinstr(blk, (*curi)++, mkinstr1(Ocopy, k, *r));
}
} else if (r->t == RSTACK) {
- Instr adr = mkinstr2(Osub, KPTR, mkref(RREG, FP), mkintcon(KI32, r->i));
- if (op == Ocopy)
- *ins = adr;
- else
- *r = insertinstr(blk, (*curi)++, adr);
+ if (op == Ocopy || op == Omove || op == Ophi || op == Oarg)
+ return;
+ goto Reg;
} else if (r->t != RTMP) Reg: {
enum irclass k;
if (r->t == RTMP) k = insrescls(instrtab[r->i]);
@@ -183,14 +189,14 @@ selcall(Function *fn, Instr *ins, Block *blk, int *curi)
assert(!abi.ty.isagg);
*arg = mkinstr2(Omove, call->abiarg[i].ty.cls, mkref(RREG, abi.reg), arg->r);
} else {
- Ref adr = mkaddr((IRAddr){mkref(RREG, SP), .disp = abi.stk});
+ Ref adr = mkaddr((IRAddr){.base = mkref(RREG, SP), .disp = abi.stk});
int iargsave = iarg;
if (!abi.ty.isagg) { /* scalar arg in stack */
*arg = mkinstr2(cls2store[abi.ty.cls], 0, adr, arg->r);
if (isaddrcon(arg->r,1) || arg->r.t == RADDR)
arg->r = insertinstr(blk, iarg++, mkinstr1(Ocopy, abi.ty.cls, arg->r));
else
- fixarg(&ins->r, ins, blk, &iarg);
+ fixarg(&arg->r, arg, blk, &iarg);
} else { /* aggregate arg in stack, callee stack frame destination address */
*arg = mkinstr1(Ocopy, KPTR, adr);
}
@@ -256,8 +262,8 @@ static bool
aadd(IRAddr *addr, Block *blk, int *curi, Ref r, uint siz/*1,2,4,8*/)
{
if (r.t == RSTACK) {
- if (addr->base.bits || addr->index.bits || !aimm(addr, -r.i)) goto Ref;
- addr->base = mkref(RREG, FP);
+ if (addr->base.bits) goto Ref;
+ addr->base = r;
} else if (r.t == RTMP) {
Instr *ins = &instrtab[r.i];
if (ins->op == Oadd) {
@@ -288,7 +294,7 @@ aadd(IRAddr *addr, Block *blk, int *curi, Ref r, uint siz/*1,2,4,8*/)
if (!rstest(mctarg->rglob, r.i)) return 0;
Ref:
if (r.t == RSTACK && (addr->base.bits || addr->index.bits)) {
- r = insertinstr(blk, (*curi)++, mkinstr2(Oadd, KPTR, mkref(RREG, FP), mkref(RICON, -r.i)));
+ r = insertinstr(blk, (*curi)++, mkinstr1(Ocopy, KPTR, r));
}
if (!addr->base.bits) addr->base = r;
else if (!addr->index.bits) addr->index = r;
@@ -349,7 +355,8 @@ loadstoreaddr(Block *blk, Ref *r, int *curi, enum op op)
} else if (isaddrcon(*r, 0)) {
if (!pcrelok || !(contab.p[r->i].flag & SLOCAL))
regarg(r, KPTR, blk, curi);
- } else if (r->t == RTMP || r->t == RSTACK) {
+ } else if (r->t == RSTACK) {
+ } else if (r->t == RTMP) {
Ref b;
if (fuseaddr(r, blk, curi, siz)
&& isaddrcon(b = addrtab.p[r->i].base,0)
@@ -386,7 +393,7 @@ sel(Function *fn, Instr *ins, Block *blk, int *curi)
if (!fn->abiarg[ins->l.i].isstk)
*ins = mkinstr1(Ocopy, ins->cls, mkref(RREG, fn->abiarg[ins->l.i].reg));
else /* stack */
- *ins = mkinstr2(Oadd, KPTR, mkref(RREG, FP), mkref(RICON, 16+fn->abiarg[ins->l.i].stk));
+ *ins = mkinstr1(Ocopy, KPTR, mkref(RSTACK, -fn->abiarg[ins->l.i].stk-8));
break;
case Oneg: case Onot:
case Ocvtf32s: case Ocvtf32u:
@@ -442,8 +449,8 @@ sel(Function *fn, Instr *ins, Block *blk, int *curi)
case Oequ: case Oneq:
case Olth: case Ogth: case Olte: case Ogte:
case Oulth: case Ougth: case Oulte: case Ougte:
- case Omove:
regarg(&ins->l, ins->cls, blk, curi);
+ case Omove:
fixarg(&ins->r, ins, blk, curi);
break;
case Omul: case Odiv: case Oudiv:
diff --git a/src/t_x86-64_emit.c b/src/t_x86-64_emit.c
index 39dbf3f..c2da048 100644
--- a/src/t_x86-64_emit.c
+++ b/src/t_x86-64_emit.c
@@ -38,6 +38,25 @@ ioper(int i)
return reg < 0 ? mkoper(ONONE,) : reg2oper(reg);
}
+static struct Frame {
+ bool usebp;
+ int stksiz;
+ int size;
+ int nsave;
+} frame;
+
+static int
+stackdisp(int i)
+{
+ if (frame.usebp) {
+ return i < 0 ? 8 - i
+ : -frame.size + i;
+ } else { /* RSP rel */
+ return i < 0 ? frame.size - i
+ : -frame.stksiz + i;
+ }
+}
+
static Oper
ref2oper(Ref r)
{
@@ -56,7 +75,7 @@ ref2oper(Ref r)
return mkoper(OSYM, .con = r.i, .cindex = NOINDEX);
}
assert(0);
- case RADDR: return mkmemoper(r);
+ case RADDR: case RSTACK: return mkmemoper(r);
default: assert(0);
}
}
@@ -126,6 +145,8 @@ mkmemoper(Ref r)
if (wop.t == OMEM) return wop;
assert(wop.t == OREG);
return mkoper(OMEM, .base = wop.reg, .index = NOINDEX);
+ } else if (r.t == RSTACK) {
+ return mkoper(OMEM, .base = frame.usebp ? RBP : RSP, .index = NOINDEX, .disp = stackdisp(r.i));
} else if (r.t == RADDR) {
const IRAddr *addr = &addrtab.p[r.i];
assert(addr->shift <= 3);
@@ -146,9 +167,19 @@ mkmemoper(Ref r)
.cindex = addr->base.bits ? mkregoper(addr->base).reg : NOINDEX,
.disp = addr->disp);
}
- return mkoper(OMEM, .base = addr->base.bits ? mkregoper(addr->base).reg : NOBASE,
- .index = addr->index.bits ? mkregoper(addr->index).reg : NOINDEX,
- .disp = addr->disp,
+ int base = NOBASE, index = NOINDEX, disp = addr->disp;
+ if (addr->base.t == RREG) base = addr->base.i;
+ else if (addr->base.t == RSTACK) {
+ base = frame.usebp ? RBP : RSP;
+ disp += stackdisp(addr->base.i);
+ }
+ if (addr->index.bits) {
+ assert(addr->index.t == RREG);
+ index = addr->index.i;
+ }
+ return mkoper(OMEM, .base = base,
+ .index = index,
+ .disp = disp,
.shift = addr->shift);
} else if (r.t == RXCON) {
assert(!contab.p[r.i].cls);
@@ -266,8 +297,6 @@ opermatch(enum operpat pat, Oper oper)
#define I32(w) (wr32le(*pcode, (w)), *pcode += 4)
#define DS(S) D(S, sizeof S - 1)
-static bool usebp; /* use RBP? */
-static int rbpoff;
static internstr curfnsym;
static uchar *fnstart;
@@ -372,17 +401,7 @@ encode(uchar **pcode, const EncDesc *tab, int ntab, enum irclass k, Oper dst, Op
} else {
int mod;
bool sib = 0;
- if (mem.base == RBP) {
- if (!usebp) {
- mem.base = RSP;
- if (mem.disp > 0) {
- /* function stack parameters */
- mem.disp -= 8;
- }
- } else if (mem.disp <= 0) {
- mem.disp += rbpoff;
- }
- }
+
if (mem.base != NOBASE) {
if (mem.index == NOINDEX && mem.shift == 0) sib = 0;
else sib = 1;
@@ -904,6 +923,8 @@ gencopy(uchar **pcode, enum irclass cls, Block *blk, int curi, Oper dst, Ref val
goto GOTLoad;
}
Xlea(pcode, cls, dst, ref2oper(val));
+ } else if (val.t == RSTACK) {
+ Xlea(pcode, cls, dst, ref2oper(val));
} else if (val.bits == ZEROREF.bits && dst.t == OREG && (kisflt(cls) || !flagslivep(blk, curi))) {
/* dst = 0 -> xor dst, dst; but only if it is ok to clobber flags */
Xxor(pcode, kisint(cls) ? KI32 : cls, dst, dst);
@@ -1266,31 +1287,30 @@ emitbranch(uchar **pcode, Block *blk)
Xjcc(pcode, ALWAYS, blk->s2);
}
-static bool
-calleesave(int *npush, uchar **pcode, Function *fn)
+static int
+calleesave(uchar **pcode, Function *fn)
{
- bool any = 0;
- if (rstest(fn->regusage, RBX)) {
- Xpush(pcode, RBX);
- ++*npush;
- any = 1;
- }
- for (int r = R12; r <= R15; ++r)
+ int n = 0;
+ for (int r = R15; r >= R12; --r) {
if (rstest(fn->regusage, r)) {
Xpush(pcode, r);
- ++*npush;
- any = 1;
+ ++n;
}
- return any;
+ }
+ if (rstest(fn->regusage, RBX)) {
+ Xpush(pcode, RBX);
+ ++n;
+ }
+ return n;
}
static void
calleerestore(uchar **pcode, Function *fn)
{
- for (int r = R15; r >= R12; --r)
+ if (rstest(fn->regusage, RBX)) Xpop(pcode, RBX);
+ for (int r = R12; r <= R15; ++r)
if (rstest(fn->regusage, r))
Xpop(pcode, r);
- if (rstest(fn->regusage, RBX)) Xpop(pcode, RBX);
}
/* align code using NOPs */
@@ -1331,28 +1351,27 @@ emitbin(Function *fn)
/** prologue **/
/* only use frame pointer in non-leaf functions and functions with large stack frames */
- usebp = 0;
+ frame.usebp = 0;
if (!fn->isleaf || fn->stksiz >= STACKREDZONE) {
- usebp = 1;
+ frame.usebp = 1;
/* push rbp; mov rbp, rsp */
DS("\x55\x48\x89\xE5");
}
- saverestore = calleesave(&npush, pcode, fn);
- if (usebp) rbpoff = -npush*8;
+ saverestore = npush = calleesave(pcode, fn);
+ npush += !frame.usebp;
- /* ensure stack is 16-byte aligned for function calls */
- if (!fn->isleaf && ((fn->stksiz + npush*8) & 0xF) != 0) {
- assert(usebp);
- if ((rbpoff & 0xF) == 0) {
- rbpoff -= 16;
- fn->stksiz += 24;
- } else {
- rbpoff -= 8;
+ /* ensure stack is 16-byte aligned */
+ if (frame.usebp) {
+ frame.size = fn->stksiz + npush*8;
+ if ((frame.size & 0xF) != 0) {
+ if (npush&1) fn->stksiz += 16;
fn->stksiz += 8;
+ frame.size += 8;
}
}
+ frame.stksiz = fn->stksiz;
- if (usebp && fn->stksiz > 0) {
+ if (frame.usebp && fn->stksiz > 0) {
/* sub rsp, <stack size> */
if (fn->stksiz < 128)
DS("\x48\x83\xEC"), B(fn->stksiz);
@@ -1417,11 +1436,11 @@ emitbin(Function *fn)
if (blk->lnext != fn->entry && blk->lnext->jmp.t == Jret && blk->lnext->ins.n == 0)
continue; /* fallthru to next blk's RET */
/* epilogue */
- if (fn->stksiz && saverestore)
+ if (fn->stksiz && saverestore && frame.usebp)
Xadd(pcode, KPTR, mkoper(OREG, .reg = RSP), mkoper(OIMM, .imm = fn->stksiz));
if (saverestore)
calleerestore(pcode, fn);
- if (usebp) B(0xC9); /* leave */
+ if (frame.usebp) B(0xC9); /* leave */
B(0xC3); /* ret */
} else if (blk->jmp.t == Jtrap) {
DS("\x0F\x0B"); /* UD2 */
diff --git a/src/t_x86-64_isel.c b/src/t_x86-64_isel.c
index be2f2c7..0e3c55d 100644
--- a/src/t_x86-64_isel.c
+++ b/src/t_x86-64_isel.c
@@ -110,11 +110,8 @@ Begin:
ShiftImm: /* shift immediate is always 8bit */
*r = mkref(RICON, sh & 255);
} else if (r->t == RSTACK) {
- Instr adr = mkinstr2(Oadd, KPTR, mkref(RREG, RBP), mkintcon(KI32, -r->i));
- if (op == Ocopy)
- *ins = adr;
- else
- *r = insertinstr(blk, (*curi)++, adr);
+ if (!(oisloadstore(op) && r == &ins->l) && !in_range(op, Ocopy, Omove) && op != Ophi)
+ *r = inscopy(blk, curi, KPTR, *r);
} else if (r->bits == UNDREF.bits && ins && !in_range(op, Ocopy, Omove) && op != Ophi) {
*r = inscopy(blk, curi, ins->cls, *r);
}
@@ -150,7 +147,7 @@ selcall(Function *fn, Instr *ins, Block *blk, int *curi)
int iargsave = iarg;
if (!abi.ty.isagg) { /* scalar arg in stack */
*arg = mkinstr2(cls2store[abi.ty.cls], 0, adr, arg->r);
- if (isaddrcon(arg->r,1) || arg->r.t == RADDR)
+ if (isaddrcon(arg->r,1) || arg->r.t == RADDR || arg->r.t == RSTACK)
arg->r = insertinstr(blk, iarg++, mkinstr1(Ocopy, abi.ty.cls, arg->r));
else
fixarg(&ins->r, ins, blk, &iarg);
@@ -239,11 +236,9 @@ static bool
aadd(IRAddr *out, Block *blk, int *curi, Ref r, bool recurring)
{
if (r.t == RSTACK) {
- if (out->base.bits || !aimm(out, -r.i)) {
- r = insertinstr(blk, (*curi)++, mkinstr2(Oadd, KPTR, mkref(RREG, RBP), mkref(RICON, -r.i)));
+ if (out->base.bits)
goto Ref;
- }
- out->base = mkref(RREG, RBP);
+ out->base = r;
} else if (r.t == RTMP) {
Instr *ins = &instrtab[r.i];
IRAddr adr = {0};
@@ -300,7 +295,7 @@ fuseaddr(Ref *r, Block *blk, int *curi)
{
IRAddr addr = { 0 };
- if (isaddrcon(*r,1)) return 1;
+ if (isaddrcon(*r,1) || r->t == RSTACK) return 1;
if (!aadd(&addr, blk, curi, *r, 0)) return 0;
if (isaddrcon(addr.base,0) && (ccopt.pic || (ccopt.pie && addr.index.bits) || (contab.p[addr.base.i].flag & SFUNC))) {
@@ -386,7 +381,7 @@ sel(Function *fn, Instr *ins, Block *blk, int *curi)
if (!fn->abiarg[ins->l.i].isstk)
*ins = mkinstr1(Ocopy, ins->cls, mkref(RREG, fn->abiarg[ins->l.i].reg));
else /* stack */
- *ins = mkinstr2(Oadd, KPTR, mkref(RREG, RBP), mkref(RICON, 16+fn->abiarg[ins->l.i].stk));
+ *ins = mkinstr1(Ocopy, KPTR, mkref(RSTACK, -fn->abiarg[ins->l.i].stk-8));
break;
case Oarg:
fixarg(&ins->r, ins, blk, curi);
@@ -412,11 +407,14 @@ sel(Function *fn, Instr *ins, Block *blk, int *curi)
ins->op = ((op - Olth) ^ 1) + Olth;
rswap(ins->l, ins->r);
}
- if (ins->l.t != RTMP && ins->l.t != RREG && ins->l.t != RSTACK)
+ if (ins->l.t != RTMP && ins->l.t != RREG)
ins->l = inscopy(blk, curi, ins->cls, ins->l);
else
fixarg(&ins->l, ins, blk, curi);
- fixarg(&ins->r, ins, blk, curi);
+ if (ins->r.t == RSTACK)
+ ins->r = inscopy(blk, curi, ins->cls, ins->r);
+ else
+ fixarg(&ins->r, ins, blk, curi);
break;
case Odiv: case Oudiv: case Orem: case Ourem:
if (kisflt(ins->cls)) goto ALU;
@@ -505,7 +503,7 @@ sel(Function *fn, Instr *ins, Block *blk, int *curi)
break;
case Ostorei8: case Ostorei16: case Ostorei32: case Ostorei64: case Ostoref32: case Ostoref64:
loadstoreaddr(blk, &ins->l, curi);
- if (isaddrcon(ins->r,1) || ins->r.t == RADDR)
+ if (isaddrcon(ins->r,1) || ins->r.t == RADDR || ins->r.t == RSTACK)
ins->r = insertinstr(blk, (*curi)++, mkinstr1(Ocopy, KPTR, ins->r));
else
fixarg(&ins->r, ins, blk, curi);
@@ -554,7 +552,7 @@ sel(Function *fn, Instr *ins, Block *blk, int *curi)
break;
case Oxvaprologue:
fuseaddr(&ins->l, blk, curi);
- assert(ins->l.t == RADDR);
+ assert(ins->l.t == RSTACK);
/* !this must be the first instruction */
assert(*curi == 1);
assert(blk == fn->entry);
diff --git a/src/t_x86-64_sysv.c b/src/t_x86-64_sysv.c
index 96f40d6..2404a86 100644
--- a/src/t_x86-64_sysv.c
+++ b/src/t_x86-64_sysv.c
@@ -200,7 +200,7 @@ vastart(Function *fn, Block *blk, int *curi)
int i = *curi + 1;
insertinstr(blk, i++, mkinstr2(Ostorei64, 0, dst, rsave));
/* set ap->overflow_arg_area */
- src = insertinstr(blk, i++, mkinstr2(Oadd, KPTR, mkref(RREG, RBP), mkref(RICON, 16+stk0)));
+ src = insertinstr(blk, i++, mkinstr1(Ocopy, KPTR, mkref(RSTACK, -stk0-8)));
dst = insertinstr(blk, i++, mkinstr2(Oadd, KPTR, ap, mkref(RICON, 8)));
insertinstr(blk, i++, mkinstr2(Ostorei64, 0, dst, src));
/* set ap->gp_offset */