aboutsummaryrefslogtreecommitdiffhomepage
path: root/amd64
diff options
context:
space:
mode:
Diffstat (limited to 'amd64')
-rw-r--r--amd64/emit.c49
-rw-r--r--amd64/isel.c28
-rw-r--r--amd64/sysv.c120
3 files changed, 186 insertions, 11 deletions
diff --git a/amd64/emit.c b/amd64/emit.c
index e098a81..30c0b99 100644
--- a/amd64/emit.c
+++ b/amd64/emit.c
@@ -518,6 +518,9 @@ DEFINSTR2(Xmovzxb,
{4|8, PGPR, PMEM, "\x0F\xB6", EN_RM}, /* MOVZX r64, m8 */
{4|8, PGPR, PGPR, "\x0F\xB6", EN_RR, .r8=1}, /* MOVZX r64, r8 */
)
+DEFINSTR2(Xmovaps,
+ {-1, PMEM, PFPR, "\x0F\x29", EN_MR}, /* MOVAPS mem, xmm */
+)
DEFINSTR2(Xxchg,
{4|8, PGPR, PGPR, "\x87", EN_RR}, /* XCHG r32/64, r32/64 */
//{4|8, PGPR, PMEM, "\x87", EN_RM}, /* XCHG r32/64, m32/64 */
@@ -884,6 +887,43 @@ gencopy(uchar **pcode, enum irclass cls, struct block *blk, int curi, struct ope
}
}
+static void
+Xvaprologue(uchar **pcode, struct function *fn, struct oper sav)
+{
+ uint gpr0 = 0, fpr0 = 0, jmpaddr;
+ for (int i = 0; i < fn->nabiarg; ++i) {
+ struct abiarg abi = fn->abiarg[i];
+ if (!abi.isstk) {
+ if (abi.reg < XMM0) ++gpr0;
+ else ++fpr0;
+ }
+ }
+ assert(sav.t == OMEM && sav.base == RBP);
+ /* save GPRS */
+ for (int r = 0; r < 6; ++r) {
+ static const char reg[] = {RDI,RSI,RDX,RCX,R8,R9};
+ if (r >= gpr0)
+ Xmov(pcode, KI8, sav, reg2oper(reg[r]));
+ sav.disp += 8;
+ }
+
+ /* save FPRs, but only if al is non zero */
+ if (fpr0 < 8) {
+ DS("\x84\xC0"); /* TEST al,al */
+ jmpaddr = *pcode - objout.textbegin;
+ DS("\x74\xFE"); /* JE rel8 */
+ }
+ for (int r = 0; r < 8; ++r) {
+ if (r >= fpr0)
+ Xmovaps(pcode, KF8, sav, reg2oper(XMM0 + r));
+ sav.disp += 16;
+ }
+ if (fpr0 < 8) {/* patch relative jump */
+ int off = (*pcode - objout.textbegin) - jmpaddr - 2;
+ objout.textbegin[jmpaddr+1] = off;
+ }
+}
+
/* condition code for CMP */
static const uchar icmpop2cc[] = {
[Oequ] = CCE, [Oneq] = CCNE,
@@ -1085,14 +1125,17 @@ emitinstr(uchar **pcode, struct function *fn, struct block *blk, int curi, struc
/* variadic functions need the caller to write num of args in sse regs to %al */
int n = 0;
for (int i = 0; i < call->narg; ++i)
- n += call->abiarg[i].reg >= XMM0;
+ if (!call->abiarg[i].isstk && call->abiarg[i].reg >= XMM0)
+ ++n;
if (!n) DS("\x31\xC0"); /* XOR EAX, EAX */
else B(0xB0), B(n); /* MOV AL, n */
}
Xcall(pcode, KPTR, ref2oper(ins->l));
break;
+ case Oxvaprologue:
+ Xvaprologue(pcode, fn, mkmemoper(ins->l));
+ break;
}
- // if (ins->reg) ioper(ins - instrtab) = reg2oper(ins->reg-1);
}
static void
@@ -1218,8 +1261,8 @@ emitbin(struct function *fn)
if (!fn->isleaf && ((fn->stksiz + npush*8) & 0xF) != 0x8) {
assert(usebp);
fn->stksiz += 8;
- rbpoff -= 8;
}
+ rbpoff = alignup(rbpoff, 16);
if (fn->stksiz != 0) {
/* sub rsp, <stack size> */
diff --git a/amd64/isel.c b/amd64/isel.c
index 23645bb..f8adb9a 100644
--- a/amd64/isel.c
+++ b/amd64/isel.c
@@ -118,11 +118,11 @@ selcall(struct function *fn, struct instr *ins, struct block *blk, int *curi)
}
assert(!abi.ty.isagg);
- if (abi.reg >= 0) {
+ if (!abi.isstk) {
assert(!abi.ty.isagg);
*arg = mkinstr(Omove, call->abiarg[i].ty.cls, mkref(RREG, abi.reg), arg->r);
} else {
- union ref adr = mkaddr((struct addr){mkref(RREG, RSP), .disp = argstksiz+abi.stk});
+ union ref adr = mkaddr((struct addr){mkref(RREG, RSP), .disp = abi.stk});
*arg = mkinstr(Ostore1+ilog2(cls2siz[abi.ty.cls]), 0, adr, arg->r);
}
}
@@ -241,8 +241,16 @@ fuseaddr(union ref *r, struct block *blk, int *curi)
{
struct addr addr = { 0 };
- if (r->t == RADDR) return 1;
if (isaddrcon(*r)) return 1;
+ if (r->t == RADDR) {
+ const struct addr *a0 = &addrht[r->i];
+ if (aadd(&addr, a0->base)
+ && (!addr.index.bits || ascale(&addr, a0->index, mkref(RICON, a0->shift)))
+ && aadd(&addr, mkintcon(KPTR, a0->disp))) {
+ *r = mkaddr(addr);
+ }
+ return 1;
+ }
if (r->t != RTMP) return 0;
if (!aadd(&addr, *r)) return 0;
@@ -311,10 +319,10 @@ sel(struct function *fn, struct instr *ins, struct block *blk, int *curi)
break;
case Oparam:
assert(ins->l.t == RICON && ins->l.i < fn->nabiarg);
- if (fn->abiarg[ins->l.i].reg >= 0)
+ if (!fn->abiarg[ins->l.i].isstk)
*ins = mkinstr(Ocopy, ins->cls, mkref(RREG, fn->abiarg[ins->l.i].reg));
else /* stack */
- *ins = mkinstr(Oadd, KPTR, mkref(RREG, RBP), mkref(RICON, -fn->abiarg[ins->l.i].stk));
+ *ins = mkinstr(Oadd, KPTR, mkref(RREG, RBP), mkref(RICON, 16+fn->abiarg[ins->l.i].stk));
break;
case Oarg:
fixarg(&ins->r, ins, blk, curi);
@@ -457,6 +465,16 @@ sel(struct function *fn, struct instr *ins, struct block *blk, int *curi)
case Ocopy:
fixarg(&ins->l, ins, blk, curi);
break;
+ case Oxvaprologue:
+ fuseaddr(&ins->l, blk, curi);
+ assert(ins->l.t == RADDR);
+ /* !this must be the first instruction */
+ assert(*curi == 1);
+ assert(blk == fn->entry);
+ t = blk->ins.p[0];
+ blk->ins.p[0] = blk->ins.p[1];
+ blk->ins.p[1] = t;
+ break;
}
}
diff --git a/amd64/sysv.c b/amd64/sysv.c
index 334be26..af0ade0 100644
--- a/amd64/sysv.c
+++ b/amd64/sysv.c
@@ -77,7 +77,7 @@ abiarg(short r[2], uchar cls[2], int *ni, int *nf, int *ns, union irtype typ)
} else if (*ni < NINT) {
r[0] = intregs[(*ni)++];
} else {
- r[0] = -*ns - 16;
+ r[0] = *ns;
*ns += 8;
return 0; /* MEMORY */
}
@@ -86,7 +86,7 @@ abiarg(short r[2], uchar cls[2], int *ni, int *nf, int *ns, union irtype typ)
cls[0] = cls[1] = 0;
ret = classify(cls, &typedata[typ.dat], 0);
if (!ret) { /*MEMORY*/
- r[0] = -*ns - 16;
+ r[0] = *ns;
*ns = alignup(*ns + typedata[typ.dat].siz, 8);
return 0;
}
@@ -100,7 +100,7 @@ abiarg(short r[2], uchar cls[2], int *ni, int *nf, int *ns, union irtype typ)
r[i] = intregs[(*ni)++];
else { /* MEMORY */
*ni = ni_save, *nf = nf_save;
- r[0] = -*ns - 16;
+ r[0] = *ns;
*ns = alignup(*ns + typedata[typ.dat].siz, 8);
r[1] = -1;
return cls[0] = cls[1] = 0;
@@ -140,6 +140,118 @@ abiret(short r[2], uchar cls[2], int *ni, union irtype typ)
return ret;
}
+static void
+vastart(struct function *fn, struct block *blk, int *curi)
+{
+ union ref rsave; /* register save area */
+ int gpr0 = 0, fpr0 = 0, stk0 = 0;
+ struct instr *ins = &instrtab[blk->ins.p[*curi]];
+ union ref ap = ins->l, src, dst;
+ assert(ins->op == Ovastart);
+ /* add xvaprologue if not there yet, which must be the first instruction in the function */
+ if (fn->entry->ins.n > 0 && instrtab[fn->entry->ins.p[0]].op == Oxvaprologue) {
+ rsave = mkref(RTMP, fn->entry->ins.p[0]);
+ } else {
+ rsave = insertinstr(fn->entry, 0, mkalloca(192, 16));
+ insertinstr(fn->entry, 1, mkinstr(Oxvaprologue, 0, rsave, .keep=1));
+ }
+ /* find first unnamed gpr and fpr */
+ for (int i = 0; i < fn->nabiarg; ++i) {
+ struct abiarg abi = fn->abiarg[i];
+ if (!abi.isstk){
+ if (abi.reg < XMM0) ++gpr0;
+ else ++fpr0;
+ } else {
+ stk0 = abi.stk+8;
+ }
+ }
+ /* set ap->reg_save_area */
+ *ins = mkinstr(Oadd, KPTR, ap, mkref(RICON, 16));
+ dst = mkref(RTMP, ins - instrtab);
+ int i = *curi + 1;
+ insertinstr(blk, i++, mkinstr(Ostore8, 0, dst, rsave));
+ /* set ap->overflow_arg_area */
+ src = insertinstr(blk, i++, mkinstr(Oadd, KPTR, mkref(RREG, RBP), mkref(RICON, 16+stk0)));
+ dst = insertinstr(blk, i++, mkinstr(Oadd, KPTR, ap, mkref(RICON, 8)));
+ insertinstr(blk, i++, mkinstr(Ostore8, 0, dst, src));
+ /* set ap->gp_offset */
+ insertinstr(blk, i++, mkinstr(Ostore4, 0, ap, mkref(RICON, gpr0*8)));
+ /* set ap->fp_offset */
+ dst = insertinstr(blk, i++, mkinstr(Oadd, KPTR, ap, mkref(RICON, 4)));
+ insertinstr(blk, i++, mkinstr(Ostore4, 0, dst, mkref(RICON, 6*8 + fpr0*8)));
+ *curi = i;
+}
+
+static void
+vaarg(struct function *fn, struct block *blk, int *curi)
+{
+ short r[2];
+ uchar cls[2];
+ union ref tmp;
+ int ni = 0, nf = 0, ns = 0;
+ int var = blk->ins.p[*curi];
+ union ref ap = instrtab[var].l;
+ union irtype ty = ref2type(instrtab[var].r);
+
+ assert(instrtab[var].op == Ovaarg);
+ blk->ins.p[*curi] = newinstr(blk, (struct instr){Onop});
+
+ int ret = abiarg(r, cls, &ni, &nf, &ns, ty);
+
+ if (ret == 2) assert(!"nyi");
+ else if (ret == 1) {
+ struct block *merge;
+ union ref phi, phiargs[2];
+ if (ni) {
+ /* l->gp_offset < 48 - num_gp * 8 */
+ tmp = insertinstr(blk, (*curi)++, mkinstr(Oloadu4, KI4, ap));
+ tmp = insertinstr(blk, (*curi)++, mkinstr(Oulte, KI4, tmp, mkref(RICON, 48 - ni*8)));
+ merge = blksplitafter(fn, blk, *curi);
+ blk->jmp.t = 0;
+ useblk(fn, blk);
+ putcondbranch(fn, tmp, newblk(fn), newblk(fn));
+ useblk(fn, blk->s1);
+ {
+ /* phi0: &l->reg_save_area[l->gp_offset] */
+ union ref sav = addinstr(fn, mkinstr(Oloadi8, KPTR, irbinop(fn, Oadd, KPTR, ap, mkref(RICON, 16))));
+ union ref gpoff = addinstr(fn, mkinstr(Oloadu4, KI4, ap));
+ phiargs[0] = irbinop(fn, Oadd, KPTR, sav, gpoff);
+ /* l->gp_offset += num_gp * 8 */
+ gpoff = irbinop(fn, Oadd, KI4, gpoff, mkref(RICON, ni * 8));
+ addinstr(fn, mkinstr(Ostore4, 0, ap, gpoff));
+ assert(merge->npred == 1);
+ blkpred(merge, 0) = blk->s1;
+ blk->s1->jmp.t = Jb;
+ blk->s1->s1 = merge;
+ }
+ useblk(fn, blk->s2);
+ {
+ /* phi1: l->overflow_arg_area */
+ union ref adr = irbinop(fn, Oadd, KPTR, ap, mkref(RICON, 8));
+ union ref ovf = addinstr(fn, mkinstr(Oloadi8, KPTR, adr));
+ /* align no-op */
+
+ phiargs[1] = ovf;
+ /* update l->overflow_arg_area += num_gp*8 */
+ int siz = 8;
+ addinstr(fn, mkinstr(Ostore8, 0, adr, irbinop(fn, Oadd, KPTR, ovf, mkref(RICON, siz))));
+ putbranch(fn, merge);
+ }
+ assert(merge->npred == 2);
+ vpush(&merge->ins, 0);
+ memmove(merge->ins.p+1, merge->ins.p, (merge->ins.n-1)*sizeof *merge->ins.p);
+ merge->ins.p[0] = var;
+ phi = insertphi(merge, KPTR);
+ memcpy(phitab.p[instrtab[phi.i].l.i], phiargs, sizeof phiargs);
+ instrtab[var] = mkinstr(cls[0] == KI4 ? Oloads4 : Oloadi8, cls[0], phi);
+ } else {
+ assert(0&&nf);
+ }
+ } else {
+ assert(!"nyi");
+ }
+}
+
static const char amd64_rnames[][6] = {
#define R(r) #r,
LIST_REGS(R)
@@ -158,6 +270,8 @@ const struct mctarg t_amd64_sysv = {
.isa = ISamd64,
.abiret = abiret,
.abiarg = abiarg,
+ .vastart = vastart,
+ .vaarg = vaarg,
.isel = amd64_isel,
.emit = amd64_emit
};