aboutsummaryrefslogtreecommitdiffhomepage
path: root/amd64/emit.c
diff options
context:
space:
mode:
Diffstat (limited to 'amd64/emit.c')
-rw-r--r--amd64/emit.c243
1 files changed, 229 insertions, 14 deletions
diff --git a/amd64/emit.c b/amd64/emit.c
index 955d59d..e9561d3 100644
--- a/amd64/emit.c
+++ b/amd64/emit.c
@@ -168,7 +168,7 @@ enum operenc {
EN_RI8, /* reg, imm8 with /0 */
EN_RI32, /* reg, imm32 with /0 */
EN_MI8, /* mem, imm8 with /x */
- EN_MI16, /* mem, imm16 with /x */
+ EN_MI16, /* mem, imm16 with /x */
EN_MI32, /* mem, imm32 with /x */
EN_OI, /* reg, imm32 with op + reg */
EN_I32, /* imm32 */
@@ -361,6 +361,7 @@ static void Xmov(uchar **pcode, enum irclass k, struct oper dst, struct oper src
{4|8, PGPR, PGPR, "\x8B", EN_RR}, /* MOV r32/64, r32/64 */
{4|8, PMEM, PGPR, "\x89", EN_MR}, /* MOV m32/64, r32/64 */
{4|8, PGPR, PMEM, "\x8B", EN_RM}, /* MOV r32/64, m32/64 */
+ {4|8, PMEM, PI32, "\xC7", EN_MI32}, /* MOV m32/64, imm */
{ 8, PGPR, PU32, "\xB8", EN_OI, .norexw=1}, /* MOV r64, uimm */
{ 8, PGPR, PI32, "\xC7", EN_RI32}, /* MOV r64, imm */
{4, PFPR, PFPR, "\xF3\x0F\x10", EN_RR}, /* MOVSS xmm, xmm */
@@ -373,8 +374,8 @@ static void Xmov(uchar **pcode, enum irclass k, struct oper dst, struct oper src
static const uchar k2off[] = {
[KI4] = 0,
[KI8] = 1, [KPTR] = 1,
- [KF4] = 6,
- [KF8] = 9,
+ [KF4] = 7,
+ [KF8] = 10,
};
encode(pcode, all + k2off[k], arraylength(all) - k2off[k], k, dst, src);
}
@@ -457,6 +458,89 @@ DEFINSTR1(Xcall,
{-1, PGPR, 0, "\xFF", EN_R, .ext=2, .norexw=1}, /* CALL r64 */
{-1, PMEM, 0, "\xFF", EN_M, .ext=2, .norexw=1}, /* CALL m64 */
)
+DEFINSTR2(Xcmp,
+ {4|8, PGPR, PGPR, "\x3B", EN_RR}, /* CMP r32/64, r32/64 */
+ {4|8, PGPR, PI8, "\x83", EN_RI8, .ext=7}, /* CMP r32/64, imm8 */
+ {4|8, PRAX, PI32, "\x3D", EN_I32}, /* CMP eax/rax, imm */
+ {4|8, PGPR, PI32, "\x81", EN_RI32, .ext=7}, /* CMP r32/64, imm */
+ { 8, PGPR, PMEM, "\x3B", EN_RM}, /* CMP r64, m64 */
+)
+DEFINSTR2(Xtest,
+ {4|8, PGPR, PGPR, "\x85", EN_RR}, /* TEST r32/64, r32/64 */
+)
+
+enum cc {
+ CCO = 0x0, /* OF = 1*/
+ CCNO = 0x1, /* OF = 0*/
+ CCB = 0x2, CCC = 0x2, CCNAE = 0x2, /* below; CF = 1; not above or equal */
+ CCAE = 0x3, CCNB = 0x3, CCNC = 0x3, /* above or equal; not below; CF = 0 */
+ CCE = 0x4, CCZ = 0x4, /* equal; ZF = 1 */
+ CCNE = 0x5, CCNZ = 0x5, /* not equal; ZF = 0 */
+ CCBE = 0x6, CCNA = 0x6, /* below or equal; not above; CF=1 or ZF=1 */
+ CCA = 0x7, CCNBE = 0x7, /* above; not below or equal; CF=0 and ZF=0 */
+ CCS = 0x8, /* ZS = 1; negative */
+ CCNS = 0x9, /* ZS = 0; non-negative */
+ CCP = 0xA, CCPE = 0xA, /* PF = 1; parity even */
+ CCNP = 0xB, CCPO = 0xB, /* PF = 0; parity odd */
+ CCL = 0xC, CCNGE = 0xC, /* lower; not greater or equal; SF != OF */
+ CCGE = 0xD, CCNL = 0xD, /* greater or equal; not lower; SF == OF */
+ CCLE = 0xE, CCNG = 0xE, /* less or equal; not greater; ZF=1 or SF != OF */
+ CCG = 0xF, CCNLE = 0xF, /* greater; not less or equal; ZF=0 and SF = OF*/
+ ALWAYS,
+};
+
+/* maps blk -> address when resolved; or to linked list of jump displacement
+ * relocations */
+static struct blkaddr {
+ bool resolved;
+ union {
+ uint addr;
+ uint relreloc;
+ };
+} *blkaddr;
+static uint nblkaddr;
+
+static void
+Xjcc(uchar **pcode, enum cc cc, struct block *dst)
+{
+ int disp, insaddr = *pcode - objout.textbegin;
+ bool rel8 = 0;
+
+ if (blkaddr[dst->id].resolved) {
+ disp = blkaddr[dst->id].addr - (insaddr + 2);
+ if ((uint)(disp + 128) < 256) /* can use 1-byte displacement? */
+ rel8 = 1;
+ else { /* otherwise 4-byte displacement */
+ disp -= 3;
+ disp -= cc != ALWAYS; /* 'Jcc rel32' has 2 opcode bytes */
+ }
+ } else {
+ disp = blkaddr[dst->id].relreloc;
+ blkaddr[dst->id].relreloc = insaddr + 1 + (cc != ALWAYS);
+ }
+ if (cc == ALWAYS) {
+ B(rel8 ? 0xEB : 0xE9); /* JMP rel8/rel32 */
+ } else {
+ assert(in_range(cc, 0, 0xF));
+ if (rel8) B(0x70 + cc); /* Jcc rel8 */
+ else B(0x0F), B(0x80 + cc); /* Jcc rel32 */
+ }
+ if (rel8) B(disp); else I32(disp);
+}
+
+static void
+Xsetcc(uchar **pcode, enum cc cc, int reg)
+{
+ int rex = 0;
+ assert(in_range(cc, 0x0, 0xF));
+
+ if (in_range(reg, RSP, RDI)) rex = 0x40;
+ rex |= (reg >> 3); /* REX.B */
+ if (rex) B(rex);
+ B(0x0F), B(0x90+cc); /* SETcc */
+ B(0xC0 + (reg & 7)); /* ModR/M with mod=11, rm=reg */
+
+}
static void
Xpush(uchar **pcode, enum reg reg)
@@ -474,13 +558,33 @@ Xpop(uchar **pcode, enum reg reg)
B(0x58 + (reg & 7));
}
+/* are flags live at given instruction? */
+static bool
+flagslivep(struct block *blk, int curi)
+{
+ int cmpi;
+ /* conditional branch that references a previous comparison instruction? */
+ if (blk->jmp.t != Jb || !blk->jmp.arg[0].t)
+ return 0;
+ assert(blk->jmp.arg[0].t == RTMP);
+ cmpi = blk->jmp.arg[1].i;
+ for (int i = blk->ins.n - 1; i > curi; --i) {
+ if (blk->ins.p[i] == cmpi)
+ /* flags defined after given instruction, dead here */
+ return 0;
+ }
+ /* flags defined before given instruction, live here */
+ return 1;
+}
+
/* Copy dst = val, with some peephole optimizations */
static void
-gencopy(uchar **pcode, enum irclass cls, struct oper dst, union ref val)
+gencopy(uchar **pcode, enum irclass cls, struct block *blk, int curi, struct oper dst, union ref val)
{
assert(dst.t == OREG);
- if (val.t == RMORE) {
- /* this is a LEA, but maybe it can be lowered to a 2-address instruction */
+ if (val.t == RMORE && !flagslivep(blk, curi)) {
+ /* this is a LEA, but maybe it can be lowered to a 2-address instruction,
+ * which may clobber flags */
const struct addr *addr = &addrht[val.i];
if (addr->base.t && dst.reg == mkregoper(addr->base).reg) { /* base = dst */
if (addr->index.t && !addr->disp && !addr->shift){
@@ -514,9 +618,9 @@ gencopy(uchar **pcode, enum irclass cls, struct oper dst, union ref val)
/* normal (not 2-address) case */
Lea:
Xlea(pcode, cls, dst, ref2oper(val));
- } else if (val.t == RICON && val.i == 0 && dst.t == OREG) {
- /* dst = 0 -> xor dst, dst */
- Xxor(pcode, cls, dst, dst);
+ } else if (val.bits == ZEROREF.bits && dst.t == OREG && !flagslivep(blk, curi)) {
+ /* dst = 0 -> xor dst, dst; but only if it is ok to clobber flags */
+ Xxor(pcode, kisint(cls) ? KI4 : cls, dst, dst);
} else if (val.t == RXCON && conht[val.i].isdat && !conht[val.i].deref) {
Xlea(pcode, cls, dst, mkoper(OCONR, .con = val.i));
} else {
@@ -526,16 +630,33 @@ gencopy(uchar **pcode, enum irclass cls, struct oper dst, union ref val)
}
}
+/* condition code for CMP */
+static const uchar icmpop2cc[] = {
+ [Oequ] = CCE, [Oneq] = CCNE,
+ [Olth] = CCL, [Ogth] = CCG, [Olte] = CCLE, [Ogte] = CCGE,
+ [Oulth] = CCB, [Ougth] = CCA, [Oulte] = CCBE, [Ougte] = CCGE,
+};
+/* condition code for TEST reg,reg (compare with zero) */
+static const uchar icmpzero2cc[] = {
+ [Oequ] = CCE, [Oulte] = CCE,
+ [Oneq] = CCNE, [Ougth] = CCNE,
+ [Olth] = CCS, [Ogte] = CCNS,
+ [Olte] = CCLE, [Ogth] = CCG,
+ [Oulth] = CCB, [Ougte] = CCAE, /* actually constants */
+};
+
static void
-emitinstr(uchar **pcode, struct function *fn, struct block *blk, int ii, struct instr *ins)
+emitinstr(uchar **pcode, struct function *fn, struct block *blk, int curi, struct instr *ins)
{
struct oper dst, src;
+ bool regzeroed;
enum irclass cls = ins->cls;
void (*X)(uchar **, enum irclass, struct oper, struct oper) = NULL;
void (*X1)(uchar **, enum irclass, struct oper) = NULL;
switch (ins->op) {
- default: assert(!"nyi ins");
+ default:
+ fatal(NULL, "amd64: in %y; unimplemented instr '%s'", fn->name, opnames[ins->op]);
case Onop: break;
case Ostore1: cls = KI4, X = Xmovb; goto Store;
case Ostore2: cls = KI4, X = Xmovw; goto Store;
@@ -577,6 +698,7 @@ emitinstr(uchar **pcode, struct function *fn, struct block *blk, int ii, struct
break;
case Osub: X = kisint(cls) ? Xsub : Xsubf; goto ALU2;
case Oshl: X = Xshl; goto ALU2;
+ case Oxor: X = Xxor; goto ALU2;
ALU2:
dst = mkregoper(ins->l);
assert(ins->reg-1 == dst.reg);
@@ -601,13 +723,44 @@ emitinstr(uchar **pcode, struct function *fn, struct block *blk, int ii, struct
case KF4: case KF8: assert(!"nyi");
}
break;
+ case Oequ: case Oneq:
+ case Olth: case Ogth: case Olte: case Ogte:
+ case Oulth: case Ougth: case Oulte: case Ougte:
+ dst = mkregoper(ins->l);
+ /* TODO handle float cmps */
+ src = mkimmdatregoper(ins->r);
+ regzeroed = 0;
+ if (ins->reg && dst.reg != ins->reg-1 && (src.t != OREG || src.reg != ins->reg-1)) {
+ /* can zero output reg before test instruction (differs from both inputs) */
+ /* XXX this doesn't check if a source operand is an addr containing the register */
+ struct oper dst = reg2oper(ins->reg-1);
+ Xxor(pcode, KI4, dst, dst);
+ regzeroed = 1;
+ }
+ if (ins->r.bits != ZEROREF.bits)
+ Xcmp(pcode, cls, dst, src);
+ else
+ Xtest(pcode, cls, dst, dst);
+ if (ins->reg) {
+ enum cc cc;
+ dst = reg2oper(ins->reg-1);
+ if (ins->r.bits != ZEROREF.bits) { /* CMP */
+ cc = icmpop2cc[ins->op];
+ } else { /* TEST r,r (CMP r, 0) */
+ cc = icmpzero2cc[ins->op];
+ }
+ Xsetcc(pcode, cc, dst.reg);
+ if (!regzeroed)
+ Xmovzxb(pcode, KI4, dst, dst);
+ }
+ break;
case Omove:
dst = ref2oper(ins->l);
- gencopy(pcode, cls, dst, ins->r);
+ gencopy(pcode, cls, blk, curi, dst, ins->r);
break;
case Ocopy:
dst = reg2oper(ins->reg-1);
- gencopy(pcode, cls, dst, ins->l);
+ gencopy(pcode, cls, blk, curi, dst, ins->l);
break;
case Ocall:
Xcall(pcode, KPTR, ref2oper(ins->l));
@@ -617,6 +770,47 @@ emitinstr(uchar **pcode, struct function *fn, struct block *blk, int ii, struct
}
static void
+emitbranch(uchar **pcode, struct block *blk)
+{
+ enum cc cc = ALWAYS;
+ assert(blk->s1);
+ if (blk->s2) {
+ /* conditional branch.. */
+ union ref arg = blk->jmp.arg[0];
+
+ if (!arg.t) /* implicit by ZF */
+ cc = CCNZ;
+ else {
+ struct instr *ins;
+ assert(arg.t == RTMP);
+ ins = &instrtab[arg.i];
+ assert(oiscmp(ins->op));
+ /* TODO handle float cmps */
+ if (ins->r.bits != ZEROREF.bits) {
+ /* for CMP instr */
+ cc = icmpop2cc[ins->op];
+ } else {
+ /* for TEST instr, which modifies ZF and SF and sets CF = OF = 0 */
+ cc = icmpzero2cc[ins->op];
+ }
+ }
+ if (blk->s1 == blk->lnext) {
+ /* if s1 is next adjacent block, swap s1,s2 and flip condition to emit a
+ * single jump */
+ struct block *tmp = blk->s1;
+ blk->s1 = blk->s2;
+ blk->s2 = tmp;
+ cc ^= 1;
+ }
+ }
+ /* make sure to fallthru if jumping to next adjacent block */
+ if (blk->s2 || blk->s1 != blk->lnext)
+ Xjcc(pcode, cc, blk->s1);
+ if (blk->s2 && blk->s2 != blk->lnext)
+ Xjcc(pcode, ALWAYS, blk->s2);
+}
+
+static void
calleesave(uchar **pcode, struct function *fn)
{
if (bstest(fn->regusage, RBX)) Xpush(pcode, RBX);
@@ -662,6 +856,13 @@ emitbin(struct function *fn)
uchar **pcode = &objout.code;
uchar *start;
+
+ if (nblkaddr < fn->nblk) {
+ blkaddr = xrealloc(blkaddr, fn->nblk * sizeof *blkaddr);
+ nblkaddr = fn->nblk;
+ }
+ memset(blkaddr, 0, nblkaddr * sizeof *blkaddr);
+
aligncode(pcode, 16);
start = *pcode;
@@ -682,6 +883,20 @@ emitbin(struct function *fn)
blk = fn->entry;
do {
+ struct blkaddr *bb = &blkaddr[blk->id];
+ uint bbaddr = *pcode - objout.textbegin;
+ assert(!bb->resolved);
+ while (bb->relreloc) {
+ uint next;
+ int disp = bbaddr - bb->relreloc - 4;
+
+ memcpy(&next, objout.textbegin + bb->relreloc, 4);
+ wr32le(objout.textbegin + bb->relreloc, disp);
+ bb->relreloc = next;
+ }
+ bb->resolved = 1;
+ bb->addr = bbaddr;
+
for (int i = 0; i < blk->ins.n; ++i) {
emitinstr(pcode, fn, blk, i, &instrtab[blk->ins.p[i]]);
}
@@ -690,7 +905,7 @@ emitbin(struct function *fn)
calleerestore(pcode, fn);
if (fn->stksiz) B(0xC9); /* leave */
B(0xC3); /* ret */
- }
+ } else emitbranch(pcode, blk);
} while ((blk = blk->lnext) != fn->entry);
objdeffunc(fn->name, fn->globl, start - objout.textbegin, *pcode - start);
}