diff options
| -rw-r--r-- | src/ir_abi0.c | 19 | ||||
| -rw-r--r-- | src/ir_dump.c | 2 | ||||
| -rw-r--r-- | src/ir_regalloc.c | 4 | ||||
| -rw-r--r-- | src/ir_stack.c | 2 | ||||
| -rw-r--r-- | src/t_aarch64_aapcs.c | 12 | ||||
| -rw-r--r-- | src/t_aarch64_emit.c | 120 | ||||
| -rw-r--r-- | src/t_aarch64_isel.c | 41 | ||||
| -rw-r--r-- | src/t_x86-64_emit.c | 111 | ||||
| -rw-r--r-- | src/t_x86-64_isel.c | 30 | ||||
| -rw-r--r-- | src/t_x86-64_sysv.c | 2 |
10 files changed, 204 insertions, 139 deletions
diff --git a/src/ir_abi0.c b/src/ir_abi0.c index dd8bc40..b8ae90f 100644 --- a/src/ir_abi0.c +++ b/src/ir_abi0.c @@ -91,18 +91,18 @@ static void patchparam(Function *fn, int *curi, int *param, int tydat, int nabi, ABIArg abi[2], uchar r2off) { Block *blk = fn->entry; - assert(in_range(nabi,1,2)); for (; *curi < blk->ins.n; ++*curi) { Instr *ins = &instrtab[blk->ins.p[*curi]]; if (ins->op != Oparam) continue; assert(ins->r.t == RTYPE && ins->r.i == (tydat < 0 ? abi[0].ty : (IRType){.isagg=1, .dat=tydat}).bits); - if (abi[0].ty.isagg || tydat < 0) { + if (abi[0].ty.isagg || tydat < 0 || abi[0].ty.bits == cls2type(KPTR).bits) { /* aggregate in stack or scalar, just copy */ - assert(nabi == 1); + assert(nabi < 2); *ins = copyparam(fn, curi, *param, abi[0]); } else { /* aggregate in registers, materialize */ + assert(nabi >= 1); Ref alloc, r[2]; Instr st; const TypeData *td; @@ -120,13 +120,13 @@ patchparam(Function *fn, int *curi, int *param, int tydat, int nabi, ABIArg abi[ if (nabi > 1) r[1] = insertinstr(blk, ++*curi, copyparam(fn, NULL, ++*param, abi[1])); /* transform - * %x = copy %p + * %x = param %p * into * %x = alloca... * store* %x, %a * store* %x + N, %b */ - st = mkinstr2(cls2store[abi[0].ty.cls], 0, alloc, r[0]); + st = mkinstr2(cls2store[abi[0].ty.isagg ? KPTR : abi[0].ty.cls], 0, alloc, r[0]); insertinstr(blk, ++*curi, st); if (nabi > 1) { Instr tmp = mkinstr2(Oadd, KPTR, alloc, mkref(RICON, r2off)); @@ -174,7 +174,6 @@ load2regs(Ref out[2], IRType typ, Ref src, int nabi, ABIArg abi[2], uchar r2off, ins.l = insertinstr(blk, (*curi)++, adr); } temp = insertinstr(blk, (*curi)++, ins); - //insertinstr(blk, (*curi)++, mkarginstr(abi[i].ty, temp)); out[i] = temp; } } else { @@ -200,7 +199,6 @@ load2regs(Ref out[2], IRType typ, Ref src, int nabi, ABIArg abi[2], uchar r2off, reg = temp; } } - //insertinstr(blk, arginst++, mkarginstr(abi[i].ty, reg)); out[i] = reg; } } @@ -215,6 +213,7 @@ patcharg(Block *blk, int *icall, IRCall *call, assert(arg->op == Oarg && arg->l.t == RTYPE); if (ref2type(arg->l).isagg) { /* aggregate argument */ if (abi[0].ty.isagg) { /* aggregate in stack */ + assert(nabi == 0); /* XXX do this better.. */ /* ptr %dst = arg <stk dst> */ /* (blit %dst, %src) */ @@ -236,9 +235,12 @@ patcharg(Block *blk, int *icall, IRCall *call, *icall = arginst + (call->narg - argidx); return 1; } else if (abi[0].ty.cls == KPTR) { /* aggregate by pointer */ + /* XXX make a copy */ + assert(nabi == -1 || nabi == 1); arg->cls = KPTR; return 1; } else { /* aggregate in registers */ + assert(nabi > 0); Ref r[2]; IRType typ = ref2type(arg->l); delinstr(blk, arginst); @@ -249,6 +251,7 @@ patcharg(Block *blk, int *icall, IRCall *call, return nabi; } } else { /* normal scalar argument */ + assert(nabi >= 0); return 1; } } @@ -383,7 +386,7 @@ abi0(Function *fn) int first = abiargs.n; uchar r2off; int ret = abiarg(&abiargs, &r2off, &ni, &nf, &ns, pty); - patchparam(fn, &istart, ¶m, pty.isagg ? pty.dat : -1, ret+!ret, &abiargs.p[first], r2off); + patchparam(fn, &istart, ¶m, pty.isagg ? pty.dat : -1, ret, &abiargs.p[first], r2off); } fn->abiarg = alloccopy(fn->arena, abiargs.p, abiargs.n * sizeof *abiargs.p, 0); fn->nabiarg = abiargs.n; diff --git a/src/ir_dump.c b/src/ir_dump.c index 4c18a70..b66fd95 100644 --- a/src/ir_dump.c +++ b/src/ir_dump.c @@ -168,7 +168,7 @@ dumpref(enum op o, Ref ref) } break; case RSTACK: - bfmt(out, "[stack %d]", ref.i); + bfmt(out, "stack(%d)", ref.i); break; default: assert(!"ref"); } diff --git a/src/ir_regalloc.c b/src/ir_regalloc.c index 31f03c2..26b90a6 100644 --- a/src/ir_regalloc.c +++ b/src/ir_regalloc.c @@ -111,9 +111,7 @@ typedef struct RegAlloc { stktop; } RegAlloc; -#define stkslotref(fn, off) \ - mkaddr((IRAddr){.base = mkref(RREG, mctarg->bpr), \ - .disp = -(fn)->stksiz - 8 - (off)}) +#define stkslotref(fn, off) mkref(RSTACK, (fn)->stksiz + (off)) /* Parallel moves algorithm from QBE * <https://c9x.me/git/qbe.git/tree/rega.c?id=e493a7f23352f51acc0a1e12284ab19d7894488a#n201> */ diff --git a/src/ir_stack.c b/src/ir_stack.c index ff49805..a9acc61 100644 --- a/src/ir_stack.c +++ b/src/ir_stack.c @@ -19,7 +19,7 @@ lowerstack(Function *fn) fn->stksiz = alignup(fn->stksiz, 1 << alignlog2); if (fn->stksiz > (1<<20)-1) error(NULL, "'%s' stack frame too big", fn->name); *ins = mkinstr0(Onop,0); - replcuses(mkref(RTMP, t), mkref(RSTACK, fn->stksiz)); + replcuses(mkref(RTMP, t), mkref(RSTACK, fn->stksiz-siz)); } } } while ((blk = blk->lnext) != fn->entry); diff --git a/src/t_aarch64_aapcs.c b/src/t_aarch64_aapcs.c index a321f5b..1cf3a61 100644 --- a/src/t_aarch64_aapcs.c +++ b/src/t_aarch64_aapcs.c @@ -93,7 +93,7 @@ abiarg(short r[2], uchar cls[2], uchar *r2off, int *ni, int *nf, int *ns, IRType if (n <= NFLT - *nf) { for (int i = 0; i < n; ++i) { r[i] = V(0) + *nf + i; - cls[i] = type2cls[k]; + cls[i] = k; } *nf += n; } else { /* stack */ @@ -121,17 +121,17 @@ abiarg(short r[2], uchar cls[2], uchar *r2off, int *ni, int *nf, int *ns, IRType } static int -abiret(short r[2], uchar cls[2], uchar *r2off, int *ni, IRType typ) +abiret(short r[2], uchar cls[2], uchar *r2off, int *_ni, IRType typ) { if (!typ.isagg) { r[0] = kisflt(cls[0] = typ.cls) ? V(0) : R0; return 1; } - int nf = 0, ns = 0; - int ret = abiarg(r, cls, r2off, ni, &nf, &ns, typ); - if (ret) return ret; + int ni = 0, nf = 0, ns = 0; + int ret = abiarg(r, cls, r2off, &ni, &nf, &ns, typ); + if (ret && cls[0] != KPTR) /* in regs */ + return ret; /* caller-allocated result address in x8 */ - assert(*ni == 0); r[0] = -1; r[1] = R(8); return 0; diff --git a/src/t_aarch64_emit.c b/src/t_aarch64_emit.c index 2f80b3a..799b388 100644 --- a/src/t_aarch64_emit.c +++ b/src/t_aarch64_emit.c @@ -51,6 +51,24 @@ static inline bool usegot(int c) && (con->flag & (SLOCAL|SFUNC)) != (SLOCAL|SFUNC); } +typedef struct Frame { + regset save; + struct RPair { uchar a,b; } pairs[10]; + uchar single[3]; + uint nfpairs, ngpairs; + bool usefp; + int size; +} Frame; + +static Frame frame; + +static int +stackdisp(int i) +{ + return i < 0 ? frame.size - i - 8 /* arg */ + : frame.size - i + 16*frame.usefp; +} + static Oper mkmemoper(uint msiz, Ref r) { @@ -59,6 +77,9 @@ mkmemoper(uint msiz, Ref r) return mkoper(OMEM, .m = {AIMMIDX, .base = instrtab[r.i].reg-1}); } else if (r.t == RREG) { return mkoper(OMEM, .m = {AIMMIDX, .base = r.i}); + } else if (r.t == RSTACK) { + int disp = stackdisp(r.i); + return mkoper(OMEM, .m = {AIMMIDX, .base = frame.usefp ? FP : SP, .disp = disp}); } else if (isaddrcon(r,1)) { return mkoper(OSYM + usegot(r.i), .con = r.i,); } else if (r.t == RADDR) { @@ -68,10 +89,19 @@ mkmemoper(uint msiz, Ref r) assert(!addr->index.bits); return mkoper(OSYM + usegot(addr->base.i), .con = addr->base.i, .cdisp = addr->disp); } - assert(addr->base.t == RREG); if (!addr->index.bits) { - return mkoper(OMEM, .m = {.mode = AIMMIDX, .base = addr->base.i, .disp = addr->disp}); + int base, disp; + if (addr->base.t == RREG) { + base = addr->base.i; + disp = 0; + } else if (addr->base.t == RSTACK) { + base = frame.usefp ? FP : SP; + disp = stackdisp(addr->base.i); + } else assert(0); + disp += addr->disp; + return mkoper(OMEM, .m = {.mode = AIMMIDX, .base = base, .disp = disp}); } else { + assert(addr->base.t == RREG); assert(addr->index.t == RREG); assert(addr->shift == 0 || 1<<addr->shift == msiz); return mkoper(OMEM, .m = { @@ -100,7 +130,7 @@ ref2oper(Ref r) assert(contab.p[r.i].f == 0.0); return mkoper(OIMM, .imm = 0); } else if (!contab.p[r.i].cls) { - case RADDR: + case RSTACK: case RADDR: return mkmemoper(0, r); } assert(0); @@ -209,8 +239,6 @@ opermatch(enum operpat pat, enum irclass k, Oper o) static uchar *fnstart; static internstr curfnsym; -static bool usefp; -static int rbpoff; /* Given an instruction description table, find the first entry that matches * the operands and encode it. */ @@ -582,6 +610,9 @@ gencopy(uchar **pcode, enum irclass cls, Block *blk, int curi, Oper dst, Ref val } } return; + } else if (val.t == RSTACK) { + Xadd(pcode, cls, dst, reg2oper(FP), mkoper(OIMM, .imm = stackdisp(val.i))); + return; } src = ref2oper(val); if (opermatch(PGPRZ, cls, src) && kisint(cls)) { @@ -895,20 +926,12 @@ emitinstr(uchar **pcode, Function *fn, Block *blk, int curi, Instr *ins) } } -typedef struct Frame { - regset save; - struct RPair { uchar a,b; } pairs[10]; - uchar single[2]; - uint nfpairs, ngpairs; -} Frame; - static void prologue(uchar **pcode, Frame *frame, Function *fn) { *frame = (Frame){0}; - regset save = frame->save = (fn->regusage & mctarg->rcallee) | (usefp * BIT(FP)); + regset save = frame->save = fn->regusage & mctarg->rcallee; if (save) { - save = rsset(&frame->save, LR); int prev = 0; struct RPair *p = frame->pairs; for (uint reg = V(8); reg <= V(15); ++reg) { @@ -919,21 +942,16 @@ prologue(uchar **pcode, Frame *frame, Function *fn) prev = 0; } else prev = reg; } - uint ngpr = popcnt(save & (BIT(32)-1)); + uint ngpr = popcnt(save & (BIT(30)-1)); if (prev) { + frame->single[0] = prev; if (ngpr & 1) { - frame->single[0] = prev; frame->single[1] = prev = lowestsetbit(save); rsclr(&save, prev); - } else { - *p++ = (struct RPair) {prev, V(0)}; - ++frame->nfpairs; } prev = 0; - } else if (ngpr & 1) { - prev = 0x100; } - for (uint reg = R(19); reg <= LR; ++reg) { + for (uint reg = R(19); reg < FP; ++reg) { if (!rstest(save, reg)) continue; if (prev) { *p++ = (struct RPair) {prev, reg}; @@ -941,39 +959,64 @@ prologue(uchar **pcode, Frame *frame, Function *fn) prev = 0; } else prev = reg; } - assert(!prev); + if (prev) frame->single[2] = prev; p = frame->pairs; Oper adr = mkoper(OMEM, .m = {.mode = APREIDX, .base = SP, .disp = -16}); - for (int i = 0; i < frame->nfpairs; ++i, ++p) + for (int i = 0; i < frame->nfpairs; ++i, ++p) { Xfstp(pcode, KF64, reg2oper(p->a), reg2oper(p->b), adr); + frame->size += 16; + } adr.m.disp = -8; - if (frame->single[0]) Xfstr(pcode, KF64, reg2oper(frame->single[0]), adr); - if (frame->single[1]) Xstr(pcode, KPTR, reg2oper(frame->single[1]), adr); + int rx; + if ((rx = frame->single[0])) { + Xfstr(pcode, KF64, reg2oper(rx), adr); + frame->size += 8; + } + if ((rx = frame->single[1])) { + Xstr(pcode, KPTR, reg2oper(rx), adr); + frame->size += 8; + } adr.m.disp = -16; - for (int i = 0; i < frame->ngpairs; ++i, ++p) + for (int i = 0; i < frame->ngpairs; ++i, ++p) { Xstp(pcode, KPTR, reg2oper(p->a), reg2oper(p->b), adr); + frame->size += 16; + } + adr.m.disp = -8; + if ((rx = frame->single[2])) { + Xstr(pcode, KPTR, reg2oper(rx), adr); + frame->size += 8; + } } - - if (usefp) /* MOV x29, sp */ - Xadd(pcode, KPTR, reg2oper(FP), reg2oper(SP), mkoper(OIMM,)); - /* ensure stack is 16-byte aligned for function calls */ - if (!fn->isleaf && ((fn->stksiz) & 0xF) != 0) { - assert(usefp); - rbpoff -= 8; + if (!fn->isleaf && ((fn->stksiz + frame->size) & 0xF) != 0) { fn->stksiz += 8; } - if (fn->stksiz) Xsub(pcode, KPTR, reg2oper(SP), reg2oper(SP), mkoper(OIMM, .imm = fn->stksiz)); + frame->size += fn->stksiz; + if ((frame->usefp = !fn->isleaf)) { + frame->size += 16; + Oper adr = mkoper(OMEM, .m = {.mode = APREIDX, .base = SP, .disp = -16 - fn->stksiz}); + Xstp(pcode, KPTR, reg2oper(FP), reg2oper(LR), adr); + Xadd(pcode, KPTR, reg2oper(R(29)), reg2oper(SP), mkoper(OIMM, {0})); /* MOV x29,sp */ + } else if (fn->stksiz) { + Xsub(pcode, KPTR, reg2oper(SP), reg2oper(SP), mkoper(OIMM, .imm = fn->stksiz)); + } } static void epilogue(uchar **pcode, Function *fn, Frame *frame) { - if (fn->stksiz) Xadd(pcode, KPTR, reg2oper(SP), reg2oper(SP), mkoper(OIMM, .imm = fn->stksiz)); + Oper adr = mkoper(OMEM, .m = {.mode = APOSTIDX, .base = SP, .disp = 16+fn->stksiz}); + if (frame->usefp) { + Xldp(pcode, KPTR, reg2oper(FP), reg2oper(LR), adr); + } else if (fn->stksiz) { + Xadd(pcode, KPTR, reg2oper(SP), reg2oper(SP), mkoper(OIMM, .imm = fn->stksiz)); + } if (frame->save) { struct RPair *p = frame->pairs + frame->nfpairs + frame->ngpairs - 1; - Oper adr = mkoper(OMEM, .m = {.mode = APOSTIDX, .base = SP, .disp = 16}); + adr.m.disp = 8; + if (frame->single[2]) Xldr(pcode, KF64, reg2oper(frame->single[2]), adr); + adr.m.disp = 16; for (int i = 0; i < frame->ngpairs; ++i, --p) Xldp(pcode, KPTR, reg2oper(p->a), reg2oper(p->b), adr); adr.m.disp = 8; @@ -997,9 +1040,6 @@ emitbin(Function *fn) /** prologue **/ - /* only use frame pointer in non-leaf functions and functions that use the stack */ - usefp = !fn->isleaf || fn->stksiz; - Frame frame; prologue(pcode, &frame, fn); if (*pcode - fnstart > 8) { diff --git a/src/t_aarch64_isel.c b/src/t_aarch64_isel.c index 4490831..58d9377 100644 --- a/src/t_aarch64_isel.c +++ b/src/t_aarch64_isel.c @@ -73,7 +73,7 @@ static void fixarg(Ref *r, Instr *ins, Block *blk, int *curi); static void regarg(Ref *r, enum irclass k, Block *blk, int *curi) { - if (r->t != RTMP) { + if (r->t != RTMP && r->t != RREG) { *r = insertinstr(blk, (*curi)++, mkinstr1(Ocopy, k, *r)); if (kisflt(k) || instrtab[r->i].l.t == RSTACK) { int iprev = *curi-1; @@ -90,7 +90,12 @@ fixarg(Ref *r, Instr *ins, Block *blk, int *curi) if (isintcon(*r)) { s64int x = intconval(*r); switch (op) { - case Ocopy: return; + case Ocopy: case Omove: + if (kisint(ins->cls)) + return; + case Oarg: + if (ref2type(ins->l).isagg || !kisflt(ref2type(ins->l).cls)) + return; default: if (oiscmp(op)) { case Oadd: case Osub: @@ -108,7 +113,8 @@ fixarg(Ref *r, Instr *ins, Block *blk, int *curi) goto Reg; } else if (isfltcon(*r)) { enum irclass k = concls(*r), ki = KI32 + k-KF32; - if (contab.p[r->i].f != 0.0) { + /* allow positive zero (copy from rzr) */ + if (contab.p[r->i].i != 0) { union { s64int i64; int i32; @@ -125,17 +131,17 @@ fixarg(Ref *r, Instr *ins, Block *blk, int *curi) } Ref gpr = insertinstr(blk, (*curi)++, mkinstr1(Ocopy, ki, mkintcon(ki, i))); *r = insertinstr(blk, (*curi)++, mkinstr1(Ocopy, k, gpr)); - } else if (oiscmp(op)) { + } else if (oiscmp(op) || ((op == Ocopy || op == Omove) && kisflt(ins->cls))) { + return; + } else if (op == Oarg && !ref2type(ins->l).isagg && kisflt(ref2type(ins->l).cls)) { return; } else { *r = insertinstr(blk, (*curi)++, mkinstr1(Ocopy, k, *r)); } } else if (r->t == RSTACK) { - Instr adr = mkinstr2(Osub, KPTR, mkref(RREG, FP), mkintcon(KI32, r->i)); - if (op == Ocopy) - *ins = adr; - else - *r = insertinstr(blk, (*curi)++, adr); + if (op == Ocopy || op == Omove || op == Ophi || op == Oarg) + return; + goto Reg; } else if (r->t != RTMP) Reg: { enum irclass k; if (r->t == RTMP) k = insrescls(instrtab[r->i]); @@ -183,14 +189,14 @@ selcall(Function *fn, Instr *ins, Block *blk, int *curi) assert(!abi.ty.isagg); *arg = mkinstr2(Omove, call->abiarg[i].ty.cls, mkref(RREG, abi.reg), arg->r); } else { - Ref adr = mkaddr((IRAddr){mkref(RREG, SP), .disp = abi.stk}); + Ref adr = mkaddr((IRAddr){.base = mkref(RREG, SP), .disp = abi.stk}); int iargsave = iarg; if (!abi.ty.isagg) { /* scalar arg in stack */ *arg = mkinstr2(cls2store[abi.ty.cls], 0, adr, arg->r); if (isaddrcon(arg->r,1) || arg->r.t == RADDR) arg->r = insertinstr(blk, iarg++, mkinstr1(Ocopy, abi.ty.cls, arg->r)); else - fixarg(&ins->r, ins, blk, &iarg); + fixarg(&arg->r, arg, blk, &iarg); } else { /* aggregate arg in stack, callee stack frame destination address */ *arg = mkinstr1(Ocopy, KPTR, adr); } @@ -256,8 +262,8 @@ static bool aadd(IRAddr *addr, Block *blk, int *curi, Ref r, uint siz/*1,2,4,8*/) { if (r.t == RSTACK) { - if (addr->base.bits || addr->index.bits || !aimm(addr, -r.i)) goto Ref; - addr->base = mkref(RREG, FP); + if (addr->base.bits) goto Ref; + addr->base = r; } else if (r.t == RTMP) { Instr *ins = &instrtab[r.i]; if (ins->op == Oadd) { @@ -288,7 +294,7 @@ aadd(IRAddr *addr, Block *blk, int *curi, Ref r, uint siz/*1,2,4,8*/) if (!rstest(mctarg->rglob, r.i)) return 0; Ref: if (r.t == RSTACK && (addr->base.bits || addr->index.bits)) { - r = insertinstr(blk, (*curi)++, mkinstr2(Oadd, KPTR, mkref(RREG, FP), mkref(RICON, -r.i))); + r = insertinstr(blk, (*curi)++, mkinstr1(Ocopy, KPTR, r)); } if (!addr->base.bits) addr->base = r; else if (!addr->index.bits) addr->index = r; @@ -349,7 +355,8 @@ loadstoreaddr(Block *blk, Ref *r, int *curi, enum op op) } else if (isaddrcon(*r, 0)) { if (!pcrelok || !(contab.p[r->i].flag & SLOCAL)) regarg(r, KPTR, blk, curi); - } else if (r->t == RTMP || r->t == RSTACK) { + } else if (r->t == RSTACK) { + } else if (r->t == RTMP) { Ref b; if (fuseaddr(r, blk, curi, siz) && isaddrcon(b = addrtab.p[r->i].base,0) @@ -386,7 +393,7 @@ sel(Function *fn, Instr *ins, Block *blk, int *curi) if (!fn->abiarg[ins->l.i].isstk) *ins = mkinstr1(Ocopy, ins->cls, mkref(RREG, fn->abiarg[ins->l.i].reg)); else /* stack */ - *ins = mkinstr2(Oadd, KPTR, mkref(RREG, FP), mkref(RICON, 16+fn->abiarg[ins->l.i].stk)); + *ins = mkinstr1(Ocopy, KPTR, mkref(RSTACK, -fn->abiarg[ins->l.i].stk-8)); break; case Oneg: case Onot: case Ocvtf32s: case Ocvtf32u: @@ -442,8 +449,8 @@ sel(Function *fn, Instr *ins, Block *blk, int *curi) case Oequ: case Oneq: case Olth: case Ogth: case Olte: case Ogte: case Oulth: case Ougth: case Oulte: case Ougte: - case Omove: regarg(&ins->l, ins->cls, blk, curi); + case Omove: fixarg(&ins->r, ins, blk, curi); break; case Omul: case Odiv: case Oudiv: diff --git a/src/t_x86-64_emit.c b/src/t_x86-64_emit.c index 39dbf3f..c2da048 100644 --- a/src/t_x86-64_emit.c +++ b/src/t_x86-64_emit.c @@ -38,6 +38,25 @@ ioper(int i) return reg < 0 ? mkoper(ONONE,) : reg2oper(reg); } +static struct Frame { + bool usebp; + int stksiz; + int size; + int nsave; +} frame; + +static int +stackdisp(int i) +{ + if (frame.usebp) { + return i < 0 ? 8 - i + : -frame.size + i; + } else { /* RSP rel */ + return i < 0 ? frame.size - i + : -frame.stksiz + i; + } +} + static Oper ref2oper(Ref r) { @@ -56,7 +75,7 @@ ref2oper(Ref r) return mkoper(OSYM, .con = r.i, .cindex = NOINDEX); } assert(0); - case RADDR: return mkmemoper(r); + case RADDR: case RSTACK: return mkmemoper(r); default: assert(0); } } @@ -126,6 +145,8 @@ mkmemoper(Ref r) if (wop.t == OMEM) return wop; assert(wop.t == OREG); return mkoper(OMEM, .base = wop.reg, .index = NOINDEX); + } else if (r.t == RSTACK) { + return mkoper(OMEM, .base = frame.usebp ? RBP : RSP, .index = NOINDEX, .disp = stackdisp(r.i)); } else if (r.t == RADDR) { const IRAddr *addr = &addrtab.p[r.i]; assert(addr->shift <= 3); @@ -146,9 +167,19 @@ mkmemoper(Ref r) .cindex = addr->base.bits ? mkregoper(addr->base).reg : NOINDEX, .disp = addr->disp); } - return mkoper(OMEM, .base = addr->base.bits ? mkregoper(addr->base).reg : NOBASE, - .index = addr->index.bits ? mkregoper(addr->index).reg : NOINDEX, - .disp = addr->disp, + int base = NOBASE, index = NOINDEX, disp = addr->disp; + if (addr->base.t == RREG) base = addr->base.i; + else if (addr->base.t == RSTACK) { + base = frame.usebp ? RBP : RSP; + disp += stackdisp(addr->base.i); + } + if (addr->index.bits) { + assert(addr->index.t == RREG); + index = addr->index.i; + } + return mkoper(OMEM, .base = base, + .index = index, + .disp = disp, .shift = addr->shift); } else if (r.t == RXCON) { assert(!contab.p[r.i].cls); @@ -266,8 +297,6 @@ opermatch(enum operpat pat, Oper oper) #define I32(w) (wr32le(*pcode, (w)), *pcode += 4) #define DS(S) D(S, sizeof S - 1) -static bool usebp; /* use RBP? */ -static int rbpoff; static internstr curfnsym; static uchar *fnstart; @@ -372,17 +401,7 @@ encode(uchar **pcode, const EncDesc *tab, int ntab, enum irclass k, Oper dst, Op } else { int mod; bool sib = 0; - if (mem.base == RBP) { - if (!usebp) { - mem.base = RSP; - if (mem.disp > 0) { - /* function stack parameters */ - mem.disp -= 8; - } - } else if (mem.disp <= 0) { - mem.disp += rbpoff; - } - } + if (mem.base != NOBASE) { if (mem.index == NOINDEX && mem.shift == 0) sib = 0; else sib = 1; @@ -904,6 +923,8 @@ gencopy(uchar **pcode, enum irclass cls, Block *blk, int curi, Oper dst, Ref val goto GOTLoad; } Xlea(pcode, cls, dst, ref2oper(val)); + } else if (val.t == RSTACK) { + Xlea(pcode, cls, dst, ref2oper(val)); } else if (val.bits == ZEROREF.bits && dst.t == OREG && (kisflt(cls) || !flagslivep(blk, curi))) { /* dst = 0 -> xor dst, dst; but only if it is ok to clobber flags */ Xxor(pcode, kisint(cls) ? KI32 : cls, dst, dst); @@ -1266,31 +1287,30 @@ emitbranch(uchar **pcode, Block *blk) Xjcc(pcode, ALWAYS, blk->s2); } -static bool -calleesave(int *npush, uchar **pcode, Function *fn) +static int +calleesave(uchar **pcode, Function *fn) { - bool any = 0; - if (rstest(fn->regusage, RBX)) { - Xpush(pcode, RBX); - ++*npush; - any = 1; - } - for (int r = R12; r <= R15; ++r) + int n = 0; + for (int r = R15; r >= R12; --r) { if (rstest(fn->regusage, r)) { Xpush(pcode, r); - ++*npush; - any = 1; + ++n; } - return any; + } + if (rstest(fn->regusage, RBX)) { + Xpush(pcode, RBX); + ++n; + } + return n; } static void calleerestore(uchar **pcode, Function *fn) { - for (int r = R15; r >= R12; --r) + if (rstest(fn->regusage, RBX)) Xpop(pcode, RBX); + for (int r = R12; r <= R15; ++r) if (rstest(fn->regusage, r)) Xpop(pcode, r); - if (rstest(fn->regusage, RBX)) Xpop(pcode, RBX); } /* align code using NOPs */ @@ -1331,28 +1351,27 @@ emitbin(Function *fn) /** prologue **/ /* only use frame pointer in non-leaf functions and functions with large stack frames */ - usebp = 0; + frame.usebp = 0; if (!fn->isleaf || fn->stksiz >= STACKREDZONE) { - usebp = 1; + frame.usebp = 1; /* push rbp; mov rbp, rsp */ DS("\x55\x48\x89\xE5"); } - saverestore = calleesave(&npush, pcode, fn); - if (usebp) rbpoff = -npush*8; + saverestore = npush = calleesave(pcode, fn); + npush += !frame.usebp; - /* ensure stack is 16-byte aligned for function calls */ - if (!fn->isleaf && ((fn->stksiz + npush*8) & 0xF) != 0) { - assert(usebp); - if ((rbpoff & 0xF) == 0) { - rbpoff -= 16; - fn->stksiz += 24; - } else { - rbpoff -= 8; + /* ensure stack is 16-byte aligned */ + if (frame.usebp) { + frame.size = fn->stksiz + npush*8; + if ((frame.size & 0xF) != 0) { + if (npush&1) fn->stksiz += 16; fn->stksiz += 8; + frame.size += 8; } } + frame.stksiz = fn->stksiz; - if (usebp && fn->stksiz > 0) { + if (frame.usebp && fn->stksiz > 0) { /* sub rsp, <stack size> */ if (fn->stksiz < 128) DS("\x48\x83\xEC"), B(fn->stksiz); @@ -1417,11 +1436,11 @@ emitbin(Function *fn) if (blk->lnext != fn->entry && blk->lnext->jmp.t == Jret && blk->lnext->ins.n == 0) continue; /* fallthru to next blk's RET */ /* epilogue */ - if (fn->stksiz && saverestore) + if (fn->stksiz && saverestore && frame.usebp) Xadd(pcode, KPTR, mkoper(OREG, .reg = RSP), mkoper(OIMM, .imm = fn->stksiz)); if (saverestore) calleerestore(pcode, fn); - if (usebp) B(0xC9); /* leave */ + if (frame.usebp) B(0xC9); /* leave */ B(0xC3); /* ret */ } else if (blk->jmp.t == Jtrap) { DS("\x0F\x0B"); /* UD2 */ diff --git a/src/t_x86-64_isel.c b/src/t_x86-64_isel.c index be2f2c7..0e3c55d 100644 --- a/src/t_x86-64_isel.c +++ b/src/t_x86-64_isel.c @@ -110,11 +110,8 @@ Begin: ShiftImm: /* shift immediate is always 8bit */ *r = mkref(RICON, sh & 255); } else if (r->t == RSTACK) { - Instr adr = mkinstr2(Oadd, KPTR, mkref(RREG, RBP), mkintcon(KI32, -r->i)); - if (op == Ocopy) - *ins = adr; - else - *r = insertinstr(blk, (*curi)++, adr); + if (!(oisloadstore(op) && r == &ins->l) && !in_range(op, Ocopy, Omove) && op != Ophi) + *r = inscopy(blk, curi, KPTR, *r); } else if (r->bits == UNDREF.bits && ins && !in_range(op, Ocopy, Omove) && op != Ophi) { *r = inscopy(blk, curi, ins->cls, *r); } @@ -150,7 +147,7 @@ selcall(Function *fn, Instr *ins, Block *blk, int *curi) int iargsave = iarg; if (!abi.ty.isagg) { /* scalar arg in stack */ *arg = mkinstr2(cls2store[abi.ty.cls], 0, adr, arg->r); - if (isaddrcon(arg->r,1) || arg->r.t == RADDR) + if (isaddrcon(arg->r,1) || arg->r.t == RADDR || arg->r.t == RSTACK) arg->r = insertinstr(blk, iarg++, mkinstr1(Ocopy, abi.ty.cls, arg->r)); else fixarg(&ins->r, ins, blk, &iarg); @@ -239,11 +236,9 @@ static bool aadd(IRAddr *out, Block *blk, int *curi, Ref r, bool recurring) { if (r.t == RSTACK) { - if (out->base.bits || !aimm(out, -r.i)) { - r = insertinstr(blk, (*curi)++, mkinstr2(Oadd, KPTR, mkref(RREG, RBP), mkref(RICON, -r.i))); + if (out->base.bits) goto Ref; - } - out->base = mkref(RREG, RBP); + out->base = r; } else if (r.t == RTMP) { Instr *ins = &instrtab[r.i]; IRAddr adr = {0}; @@ -300,7 +295,7 @@ fuseaddr(Ref *r, Block *blk, int *curi) { IRAddr addr = { 0 }; - if (isaddrcon(*r,1)) return 1; + if (isaddrcon(*r,1) || r->t == RSTACK) return 1; if (!aadd(&addr, blk, curi, *r, 0)) return 0; if (isaddrcon(addr.base,0) && (ccopt.pic || (ccopt.pie && addr.index.bits) || (contab.p[addr.base.i].flag & SFUNC))) { @@ -386,7 +381,7 @@ sel(Function *fn, Instr *ins, Block *blk, int *curi) if (!fn->abiarg[ins->l.i].isstk) *ins = mkinstr1(Ocopy, ins->cls, mkref(RREG, fn->abiarg[ins->l.i].reg)); else /* stack */ - *ins = mkinstr2(Oadd, KPTR, mkref(RREG, RBP), mkref(RICON, 16+fn->abiarg[ins->l.i].stk)); + *ins = mkinstr1(Ocopy, KPTR, mkref(RSTACK, -fn->abiarg[ins->l.i].stk-8)); break; case Oarg: fixarg(&ins->r, ins, blk, curi); @@ -412,11 +407,14 @@ sel(Function *fn, Instr *ins, Block *blk, int *curi) ins->op = ((op - Olth) ^ 1) + Olth; rswap(ins->l, ins->r); } - if (ins->l.t != RTMP && ins->l.t != RREG && ins->l.t != RSTACK) + if (ins->l.t != RTMP && ins->l.t != RREG) ins->l = inscopy(blk, curi, ins->cls, ins->l); else fixarg(&ins->l, ins, blk, curi); - fixarg(&ins->r, ins, blk, curi); + if (ins->r.t == RSTACK) + ins->r = inscopy(blk, curi, ins->cls, ins->r); + else + fixarg(&ins->r, ins, blk, curi); break; case Odiv: case Oudiv: case Orem: case Ourem: if (kisflt(ins->cls)) goto ALU; @@ -505,7 +503,7 @@ sel(Function *fn, Instr *ins, Block *blk, int *curi) break; case Ostorei8: case Ostorei16: case Ostorei32: case Ostorei64: case Ostoref32: case Ostoref64: loadstoreaddr(blk, &ins->l, curi); - if (isaddrcon(ins->r,1) || ins->r.t == RADDR) + if (isaddrcon(ins->r,1) || ins->r.t == RADDR || ins->r.t == RSTACK) ins->r = insertinstr(blk, (*curi)++, mkinstr1(Ocopy, KPTR, ins->r)); else fixarg(&ins->r, ins, blk, curi); @@ -554,7 +552,7 @@ sel(Function *fn, Instr *ins, Block *blk, int *curi) break; case Oxvaprologue: fuseaddr(&ins->l, blk, curi); - assert(ins->l.t == RADDR); + assert(ins->l.t == RSTACK); /* !this must be the first instruction */ assert(*curi == 1); assert(blk == fn->entry); diff --git a/src/t_x86-64_sysv.c b/src/t_x86-64_sysv.c index 96f40d6..2404a86 100644 --- a/src/t_x86-64_sysv.c +++ b/src/t_x86-64_sysv.c @@ -200,7 +200,7 @@ vastart(Function *fn, Block *blk, int *curi) int i = *curi + 1; insertinstr(blk, i++, mkinstr2(Ostorei64, 0, dst, rsave)); /* set ap->overflow_arg_area */ - src = insertinstr(blk, i++, mkinstr2(Oadd, KPTR, mkref(RREG, RBP), mkref(RICON, 16+stk0))); + src = insertinstr(blk, i++, mkinstr1(Ocopy, KPTR, mkref(RSTACK, -stk0-8))); dst = insertinstr(blk, i++, mkinstr2(Oadd, KPTR, ap, mkref(RICON, 8))); insertinstr(blk, i++, mkinstr2(Ostorei64, 0, dst, src)); /* set ap->gp_offset */ |