#include "t_x86-64.h" static int classify(uchar cls[2], const TypeData *td, uint off); static void clsscalar(uchar cls[2], uint off, Type ty) { if (iscomplex(ty)) { classify(cls, &typedata[complex2struct(ty).dat], off); return; } enum irclass k = type2cls[scalartypet(ty)]; uchar *fcls = &cls[off/8]; if (isflt(ty)) { /* SSE */ if (!*fcls || (*fcls == KF32 && k > *fcls)) *fcls = k; } else { /* INTEGER */ assert(isint(ty) || ty.t == TYPTR); if (cls2siz[*fcls] < cls2siz[k]) *fcls = k == KPTR ? KI64 : k; } if (off % 8 >= 4 && cls2siz[*fcls] < 8) *fcls = kisint(*fcls) ? KI64 : KF64; } static int classifyarr(uchar cls[2], Type ty, uint off) { Type chld = typechild(ty); uint n = typearrlen(ty), siz = typesize(chld); assert(n > 0); for (uint i = 0; i < n; ++i) { uint offx = off + i * siz; if (isagg(chld)) { if (!classify(cls, &typedata[chld.dat], offx)) return cls[0] = cls[1] = 0; } else if (chld.t == TYARRAY) { if (!classifyarr(cls, chld, offx)) return cls[0] = cls[1] = 0; } else { clsscalar(cls, offx, chld); } } return !!cls[0] + !!cls[1]; } /* XXX types with alignment >= 16 */ static int classify(uchar cls[2], const TypeData *td, uint off) { uint siz = alignup(td->siz, 4); if (siz > 16) /* MEMORY */ return 0; assert(isaggt(td->t)); for (int i = 0; i < td->nmemb; ++i) { FieldData *fld = &td->fld[i].f; uint align = typealign(fld->t); if (alignup(fld->off, align) != fld->off) /* unaligned field -> MEMORY */ return cls[0] = cls[1] = 0; if (isagg(fld->t)) { if (!classify(cls, &typedata[fld->t.dat], off + fld->off)) return cls[0] = cls[1] = 0; } else if (fld->t.t == TYARRAY) { if (isincomplete(fld->t)) continue; if (!classifyarr(cls, fld->t, off + fld->off)) return cls[0] = cls[1] = 0; } else { clsscalar(cls, fld->off + off, fld->t); } } return !!cls[0] + !!cls[1]; } static int abiarg(short r[2], uchar cls[2], uchar *r2off, int *ni, int *nf, int *ns, IRType typ) { static const uchar intregs[] = { RDI, RSI, RDX, RCX, R8, R9 }; enum { NINT = countof(intregs), NFLT = 8 }; if (!typ.isagg) { if (kisflt(cls[0] = typ.cls) && *nf < NFLT) { r[0] = XMM0 + (*nf)++; } else if (kisint(cls[0]) && *ni < NINT) { r[0] = intregs[(*ni)++]; } else { r[0] = *ns; *ns += 8; return 0; /* MEMORY */ } return 1; } cls[0] = cls[1] = 0; int ret = classify(cls, &typedata[typ.dat], 0); if (!ret) { /*MEMORY*/ r[0] = *ns; *ns = alignup(*ns + typedata[typ.dat].siz, 8); return 0; } assert(ret <= 2); int ni_save = *ni, nf_save = *nf; *r2off = 8; for (int i = 0; i < ret; ++i) { assert(cls[i]); if (kisflt(cls[i]) && *nf < NFLT) r[i] = XMM0 + (*nf)++; else if (kisint(cls[i]) && *ni < NINT) r[i] = intregs[(*ni)++]; else { /* MEMORY */ *ni = ni_save, *nf = nf_save; r[0] = *ns; *ns = alignup(*ns + typedata[typ.dat].siz, 8); r[1] = -1; return cls[0] = cls[1] = 0; } } return ret; } static int abiret(short r[2], uchar cls[2], uchar *r2off, int *ni, IRType typ) { if (!typ.isagg) { r[0] = kisflt(cls[0] = typ.cls) ? XMM0 : RAX; return 1; } cls[0] = cls[1] = 0; int ret = classify(cls, &typedata[typ.dat], 0); if (!ret) { /* MEMORY */ assert(*ni == 0); r[0] = RAX; /* on return should contain result location address */ r[1] = RDI; /* register for caller-owned result location argument */ ++*ni; return 0; } assert(ret <= 2); *r2off = 8; for (int i = 0, ni = 0, nf = 0; i < ret; ++i) { assert(cls[i]); if (kisflt(cls[i])) /* SSE (XMM0, XMM1) */ r[i] = XMM0 + nf++; else if (kisint(cls[i])) /* INTEGER (RAX, RDX) */ r[i] = ni++ == 0 ? RAX : RDX; else assert(0); } return ret; } /* Layout of va_list: * struct { * ( 0) unsigned int gp_offset; * ( 4) unsigned int fp_offset; * ( 8) void *overflow_arg_area; * (16) void *reg_save_area; * } * Layout of register save area (align 16): * reg off * rdi 0 * rsi 8 * rdx 16 * rcx 24 * r8 32 * r9 40 * xmm0 48 * xmm1 64 * ... * in x86_64/emit xvaprologue generates the code to save the registers to a stack slot * there only needs to be one xvaprologue if there's any vastart instrs, and it has to be * at the beginning of the function (before IR generated by regalloc can touch any registers) * then vastart can initialize va_list.reg_save_area with a pointer to that */ static void vastart(Function *fn, Block *blk, int *curi) { Ref rsave; /* register save area */ int gpr0 = 0, fpr0 = 0, stk0 = 0; Instr *ins = &instrtab[blk->ins.p[*curi]]; Ref ap = ins->l, src, dst; assert(ins->op == Ovastart); /* add xvaprologue if not there yet, which must be the first * real instruction in the function (following alloca) */ if (fn->entry->ins.n > 1 && instrtab[fn->entry->ins.p[1]].op == Oxvaprologue) { rsave = mkref(RTMP, fn->entry->ins.p[0]); /* alloca instruction */ assert(instrtab[rsave.i].op == Oalloca16); } else { rsave = insertinstr(fn->entry, 0, mkalloca(192, 16)); insertinstr(fn->entry, 1, (Instr){Oxvaprologue, 0, .keep=1, .l=rsave}); } /* find first unnamed gpr and fpr */ for (int i = 0; i < fn->nabiarg; ++i) { ABIArg abi = fn->abiarg[i]; if (!abi.isstk){ if (abi.reg < XMM0) ++gpr0; else ++fpr0; } else { stk0 = abi.stk+8; } } /* set ap->reg_save_area */ *ins = mkinstr2(Oadd, KPTR, ap, mkref(RICON, 16)); dst = mkref(RTMP, ins - instrtab); int i = *curi + 1; insertinstr(blk, i++, mkinstr2(Ostorei64, 0, dst, rsave)); /* set ap->overflow_arg_area */ src = insertinstr(blk, i++, mkinstr1(Ocopy, KPTR, mkref(RSTACK, -stk0-8))); dst = insertinstr(blk, i++, mkinstr2(Oadd, KPTR, ap, mkref(RICON, 8))); insertinstr(blk, i++, mkinstr2(Ostorei64, 0, dst, src)); /* set ap->gp_offset */ insertinstr(blk, i++, mkinstr2(Ostorei32, 0, ap, mkref(RICON, gpr0*8))); /* set ap->fp_offset */ dst = insertinstr(blk, i++, mkinstr2(Oadd, KPTR, ap, mkref(RICON, 4))); insertinstr(blk, i++, mkinstr2(Ostorei32, 0, dst, mkref(RICON, 6*8 + fpr0*16))); *curi = i-1; } static void vaarg(Function *fn, Block *blk, int *curi) { short r[2]; uchar cls[2]; Ref tmp; int ni = 0, nf = 0, ns = 0; uchar r2off; int var = blk->ins.p[*curi]; Ref ap = instrtab[var].l; IRType ty = ref2type(instrtab[var].r); assert(instrtab[var].op == Ovaarg); blk->ins.p[*curi] = newinstr(blk, (Instr){Onop}); int ret = abiarg(r, cls, &r2off, &ni, &nf, &ns, ty); if (ret == 2) assert(!"nyi"); else if (ret == 1) { Block *merge; Ref phi, phiargs[2]; /* int: l->gp_offset < 48 - num_gp * 8 */ /* sse: l->fp_offset < 304 - num_gp * 16 (why 304? ... 176) */ tmp = ni ? ap : insertinstr(blk, (*curi)++, mkinstr2(Oadd, KPTR, ap, mkref(RICON, 4))); tmp = insertinstr(blk, (*curi)++, mkinstr1(Oloadu32, KI32, tmp)); tmp = insertinstr(blk, (*curi)++, mkinstr2(Oulte, KI32, tmp, mkref(RICON, ni ? 48 - ni*8 : 176 - nf*16))); merge = blksplitafter(fn, blk, *curi); blk->jmp.t = 0; useblk(fn, blk); putcondbranch(fn, tmp, newblk(fn), newblk(fn)); useblk(fn, blk->s1); { /* phi0: &l->reg_save_area[l->gp/fp_offset] */ Ref sav = addinstr(fn, mkinstr1(Oloadi64, KPTR, irbinop(fn, Oadd, KPTR, ap, mkref(RICON, 16)))); Ref roff = addinstr(fn, mkinstr1(Oloadu32, KI32, irbinop(fn, Oadd, KPTR, ap, mkref(RICON, ni ? 0 : 4)))); phiargs[0] = irbinop(fn, Oadd, KPTR, sav, roff); /* l->gp/fp_offset += num_gp/fp * 8(16) */ roff = irbinop(fn, Oadd, KI32, roff, mkref(RICON, ni ? ni * 8 : nf * 16)); addinstr(fn, mkinstr2(Ostorei32, 0, irbinop(fn, Oadd, KPTR, ap, mkref(RICON, ni ? 0 : 4)), roff)); assert(merge->npred == 1); blkpred(merge, 0) = blk->s1; blk->s1->jmp.t = Jb; blk->s1->s1 = merge; } useblk(fn, blk->s2); { /* phi1: l->overflow_arg_area */ Ref adr = irbinop(fn, Oadd, KPTR, ap, mkref(RICON, 8)); Ref ovf = addinstr(fn, mkinstr1(Oloadi64, KPTR, adr)); /* align no-op */ phiargs[1] = ovf; /* update l->overflow_arg_area += size */ int siz = 8; addinstr(fn, mkinstr2(Ostorei64, 0, adr, irbinop(fn, Oadd, KPTR, ovf, mkref(RICON, siz)))); putbranch(fn, merge); } assert(merge->npred == 2); vpush(&merge->ins, 0); memmove(merge->ins.p+1, merge->ins.p, (merge->ins.n-1)*sizeof *merge->ins.p); merge->ins.p[0] = var; phi = insertphi(merge, KPTR); memcpy(phitab.p[instrtab[phi.i].l.i], phiargs, sizeof phiargs); if (!ty.isagg) { instrtab[var] = mkinstr1(cls2load[cls[0]], cls[0], phi); } else { instrtab[var] = mkalloca(8, 8); tmp = insertinstr(merge, 1, mkinstr1(Oloadi64, KI64, phi)); insertinstr(merge, 2, mkinstr2(Ostorei64, 0, mkref(RTMP, var), tmp)); } fn->prop &= ~FNUSE; } else { assert(!"nyi"); } } static const char x86_64_rnames[][6] = { #define R(r) #r, LIST_REGS(R) #undef R }; const MCTarg t_x86_64_sysv = { .gpr0 = RAX, .ngpr = R15 - RAX + 1, .gprscratch = R11, .fprscratch = XMM15, .fpr0 = XMM0, .nfpr = XMM15 - XMM0 + 1, .rcallee = 1<