aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--Makefile1
-rw-r--r--aarch64/aapcs.c77
-rw-r--r--aarch64/all.h16
-rw-r--r--aarch64/emit.c672
-rw-r--r--aarch64/isel.c440
-rw-r--r--c/lex.c4
-rw-r--r--common.h8
-rw-r--r--obj/elf.c11
-rw-r--r--obj/obj.h1
-rw-r--r--targ.c23
-rw-r--r--x86_64/isel.c16
11 files changed, 1245 insertions, 24 deletions
diff --git a/Makefile b/Makefile
index 9818c08..726f121 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,7 @@
SRC=main.c io.c mem.c c/c.c c/lex.c c/eval.c c/builtin.c type.c targ.c \
ir/ir.c ir/builder.c ir/fold.c ir/dump.c ir/ssa.c ir/cfg.c ir/intrin.c ir/abi0.c ir/mem2reg.c ir/regalloc.c ir/simpl.c ir/stack.c \
x86_64/sysv.c x86_64/isel.c x86_64/emit.c \
+ aarch64/aapcs.c aarch64/isel.c aarch64/emit.c \
obj/obj.c obj/elf.c \
embedfilesdir.c
CFLAGS=-Wall -std=c11 -pedantic
diff --git a/aarch64/aapcs.c b/aarch64/aapcs.c
new file mode 100644
index 0000000..fc08da1
--- /dev/null
+++ b/aarch64/aapcs.c
@@ -0,0 +1,77 @@
+#include "all.h"
+
+static int
+abiarg(short r[2], uchar cls[2], uchar *r2off, int *ni, int *nf, int *ns, union irtype typ)
+{
+ enum { NINT = 8, NFLT = 8 };
+ if (!typ.isagg) {
+ if (kisflt(cls[0] = typ.cls) && *nf < 8) {
+ r[0] = V(0) + (*nf)++;
+ } else if (kisint(cls[0]) && *ni < NINT) {
+ r[0] = R0 + (*ni)++;
+ } else {
+ r[0] = *ns;
+ *ns += 8;
+ return 0; /* MEMORY */
+ }
+ return 1;
+ } else assert(!"nyi");
+}
+
+static int
+abiret(short r[2], uchar cls[2], uchar *r2off, int *ni, union irtype typ)
+{
+ if (!typ.isagg) {
+ r[0] = kisflt(cls[0] = typ.cls) ? V(0) : R0;
+ return 1;
+ }
+ int nf = 0, ns = 0;
+ int ret = abiarg(r, cls, r2off, ni, &nf, &ns, typ);
+ if (ret) return ret;
+ /* caller-allocated result address in x8 */
+ assert(*ni == 0);
+ r[0] = -1;
+ r[1] = R(8);
+ return 0;
+}
+
+static void
+vastart(struct function *fn, struct block *blk, int *curi)
+{
+ assert(!"nyi");
+}
+
+static void
+vaarg(struct function *fn, struct block *blk, int *curi)
+{
+ assert(!"nyi");
+}
+
+static const char aarch64_rnames[][6] = {
+ "R0", "R1", "R2", "R3", "R4", "R5", "R6", "R7", "R8", "R9","R10","R11","R12","R13","R14","R15",
+ "R16","R17","R18","R19","R20","R21","R22","R23","R24","R25","R26","R27","R28", "FP", "LR", "SP",
+ "V0", "V1", "V2", "V3", "V4", "V5", "V6", "V7", "V8", "V9","V10","V11","V12","V13","V14","V15",
+ "V16","V17","V18","V19","V20","V21","V22","V23","V24","V25","V26","V27","V28","V29","V30","V31",
+};
+
+const struct mctarg t_aarch64_aapcs = {
+ .gpr0 = R0, .ngpr = 31,
+ .bpr = FP,
+ .gprscratch = R(16), .fprscratch = V(31),
+ .fpr0 = V0, .nfpr = 32,
+ .rcallee = BIT(R(19)) | BIT(R(20)) | BIT(R(21)) | BIT(R(22)) | BIT(R(23))
+ | BIT(R(24)) | BIT(R(25)) | BIT(R(26)) | BIT(R(27)) | BIT(R(28))
+ | BIT( V(8)) | BIT( V(9)) | BIT(V(10)) | BIT(V(11)) | BIT(V(12))
+ | BIT(V(13)) | BIT(V(14)) | BIT(V(15)),
+ .rglob = BIT(FP) | BIT(LR) | BIT(SP),
+ .rnames = aarch64_rnames,
+ .objkind = OBJELF,
+ .abiret = abiret,
+ .abiarg = abiarg,
+ .vastart = vastart,
+ .vaarg = vaarg,
+ .isel = aarch64_isel,
+ .emit = aarch64_emit,
+};
+
+/* vim:set ts=3 sw=3 expandtab: */
diff --git a/aarch64/all.h b/aarch64/all.h
new file mode 100644
index 0000000..828909e
--- /dev/null
+++ b/aarch64/all.h
@@ -0,0 +1,16 @@
+#include "../ir/ir.h"
+
+enum reg {
+ R0 = 0,
+#define R(n) (R0+n)
+ FP = R(29), LR = R(30), SP = R(31),
+ V0,
+#define V(n) (V0+n)
+};
+
+bool aarch64_logimm(uint *enc, enum irclass, uvlong x);
+void aarch64_isel(struct function *);
+void aarch64_emit(struct function *);
+
+/* vim:set ts=3 sw=3 expandtab: */
+
diff --git a/aarch64/emit.c b/aarch64/emit.c
new file mode 100644
index 0000000..a0a7ca6
--- /dev/null
+++ b/aarch64/emit.c
@@ -0,0 +1,672 @@
+#include "all.h"
+#include "../obj/obj.h"
+#include "../endian.h"
+
+/* References: https://weinholt.se/articles/arm-a64-instruction-set/
+ * ARM ARM https://developer.arm.com/documentation/ddi0628/aa/?lang=en
+ */
+
+enum operkind { ONONE, OREGZR, OREG, OIMM, OMEM, OSYM };
+enum shiftkind { SLSL, SLSR, SASR, SROR };
+enum addrmode { AIMMIDX, AREGIDX, APREIDX, APOSTIDX };
+enum addrregext { XUXTW = 2, XLSL = 3, XSXTW = 6, XSXTX = 7 };
+struct oper {
+ uchar t;
+ union {
+ struct { /* OREG (opt. shifted) */
+ uchar reg;
+ uchar shft : 2, /* enum shiftkind */
+ shamt : 6;
+ };
+ struct { /* OMEM */
+ uchar mode : 3; /* enum addrmode */
+ uchar base : 5; /* reg */
+ union {
+ struct {
+ uchar index : 5; /* reg */
+ uchar ext : 3; /* enum addrregext */
+ uchar shamt;
+ };
+ short disp;
+ };
+ } m;
+ vlong imm; uvlong uimm; /* OIMM */
+ struct { /* OSYM */
+ ushort con;
+ int cdisp;
+ };
+ };
+};
+
+#define REGZR ((struct oper){OREGZR, .reg=31})
+#define mkoper(t, ...) ((struct oper){(t), __VA_ARGS__})
+#define reg2oper(r) (assert((uint)(r) <= V(31)), mkoper(OREG, .reg = (r)))
+
+static struct oper
+mkmemoper(uint msiz, union ref r)
+{
+ if (r.t == RTMP) {
+ assert(in_range(instrtab[r.i].reg-1, R0, SP));
+ return mkoper(OMEM, .m = {AIMMIDX, .base = instrtab[r.i].reg-1});
+ } else if (r.t == RREG) {
+ return mkoper(OMEM, .m = {AIMMIDX, .base = r.i});
+ } else if (r.t == RADDR) {
+ const struct addr *addr = &addrht[r.i];
+ assert(addr->shift <= 3 && (!addr->disp || !addr->index.bits));
+ if (isaddrcon(addr->base,0)) {
+ assert(!addr->index.bits);
+ return mkoper(OSYM, .con = addr->base.i, .cdisp = addr->disp);
+ }
+ assert(addr->base.t == RREG);
+ if (!addr->index.bits) {
+ return mkoper(OMEM, .m = {.mode = AIMMIDX, .base = addr->base.i, .disp = addr->disp});
+ } else {
+ assert(addr->index.t == RREG);
+ assert(addr->shift == 0 || 8<<addr->shift == msiz);
+ return mkoper(OMEM, .m = {
+ .mode = AREGIDX,
+ .base = addr->base.i,
+ .index = addr->index.i,
+ .ext = XLSL,
+ .shamt = !!addr->shift,
+ });
+ }
+ }
+ assert(!"nyi");
+}
+
+static struct oper
+ref2oper(union ref r)
+{
+ switch (r.t) {
+ case RTMP: return instrtab[r.i].reg ? mkoper(ONONE,) : reg2oper(instrtab[r.i].reg-1);
+ case RREG: return reg2oper(r.i);
+ case RICON: return mkoper(OIMM, .imm = r.i);
+ case RXCON:
+ if (conht[r.i].cls == KI32)
+ return mkoper(OIMM, .imm = conht[r.i].i);
+ else if (conht[r.i].cls == KI64) {
+ vlong i = conht[r.i].i;
+ return mkoper(OIMM, .imm = i);
+ } else if (!conht[r.i].cls) {
+ return mkoper(OSYM, .con = r.i);
+ }
+ assert(0);
+ //case RADDR: return mkmemoper(r);
+ default: assert(0);
+ }
+}
+
+enum operpat {
+ PNONE,
+ PGPRZ, /* R0-R30,ZR */
+ PGPRSP, /* R0-R30,SP */
+ PSP, /* SP */
+ PGPRZSHFT, /* R0-30,ZR SFHT #n */
+ PFPR, /* V0 - V31 */
+ PZERO, /* zero immediate */
+ PU6, /* 6-bit uimm */
+ PU12SL12, /* 12 bit uimm, optionally left shifted by 12 */
+ PU16SL16, /* 16 bit uimm, left shift by 0/16/32/48 */
+ PLOGIMM, /* immediate for logical instrs */
+ PMEMAIMM, /* addr 12bit immediate byte offset */
+ PMEMAIMMH, /* addr 12bit immediate halfword offset (multiple of 2) */
+ PMEMAIMMW, /* addr 12bit immediate word offset (multiple of 4) */
+ PMEMAIMMX, /* addr 12bit immediate doubleword offset (multiple of 8) */
+ PMEMPREPOST, /* addr signed 9bit immediate byte offset */
+ PMEMAREG, /* addr reg offset, optionally left shifted */
+ PMEMAXREG, /* addr extended reg offset */
+ PSYM, /* symbol */
+};
+enum operenc {
+ EN_ADDSUBEXT3R, /* add/sub-ext-reg */
+ EN_ADDSUBSHFT3R, /* add/sub-shift-reg */
+ EN_LOGSHFT3R, /* logical/shifted-reg */
+ EN_ARITH2R, /* data-processing/1src */
+ EN_ARITH3R, /* data-processing/2src */
+ EN_ARITH4R, /* data-processing/3src */
+ EN_ADDSUBIMM, /* add/subtract-imm */
+ EN_LOGIMM, /* logical-imm */
+ EN_MOVEIMM, /* move/wide-imm */
+ EN_MEMAIMM, /* load/store/unsigned-imm */
+ EN_MEMAIMMH, /* load/store/unsigned-imm (halfword) */
+ EN_MEMAIMMW, /* load/store/unsigned-imm (word) */
+ EN_MEMAIMMX, /* load/store/unsigned-imm (doubleword) */
+ EN_MEMAPREPOST, /* load/store/pre/postidx-imm */
+ EN_MEMAREG, /* load/store/reg-offset */
+ EN_MEMPPREPOST, /* load/store-pair/pre/postidx-imm */
+};
+struct desc {
+ uchar psiz; /* subset of {4,8} */
+ uchar pt[3]; /* bitsets of enum operpat, up to 3 operands */
+ uint opc;
+ uchar operenc; /* enum operenc */
+};
+
+/* match operand against pattern */
+static inline bool
+opermatch(enum operpat pat, enum irclass k, struct oper o)
+{
+ switch (pat) {
+ case PNONE: return !o.t;
+ case PGPRZ:
+ return o.t == OREGZR || (o.t == OREG && in_range(o.reg, R0, R(30)) && !o.shamt);
+ case PGPRSP:
+ return o.t == OREG && in_range(o.reg, R0, R(31)) && !o.shamt;
+ case PGPRZSHFT:
+ return o.t == OREGZR || (o.t == OREG && in_range(o.reg, R0, R(30)));
+ case PSP: return o.t == OREG && o.reg == SP;
+ case PFPR: return o.t == OREG && in_range(o.reg, V0, V(31));
+ case PZERO: return o.t == OIMM && o.imm == 0;
+ case PU6: return o.t == OIMM && (uint)o.imm < 63;
+ case PU12SL12:
+ return o.t == OIMM && ((o.imm &~ 0xFFF) == 0 || (o.imm &~ 0xFFF000) == 0);
+ case PU16SL16:
+ return o.t == OIMM
+ && ((o.imm &~ 0xFFFF) == 0 || (o.imm &~ 0xFFFF0000) == 0
+ || (o.imm &~ (0xFFFFull<<32)) == 0 || (o.imm &~ (0xFFFFull<<48)) == 0);
+ case PLOGIMM: return o.t == OIMM && aarch64_logimm(NULL, k, o.imm);
+ case PMEMAIMM:
+ return o.t == OMEM && o.m.mode == AIMMIDX && (uint)o.m.disp < (1<<12);
+ case PMEMAIMMH:
+ return o.t == OMEM && o.m.mode == AIMMIDX && (uint)o.m.disp < (1<<13) && !(o.m.disp % 2);
+ case PMEMAIMMW:
+ return o.t == OMEM && o.m.mode == AIMMIDX && (uint)o.m.disp < (1<<14) && !(o.m.disp % 4);
+ case PMEMAIMMX:
+ return o.t == OMEM && o.m.mode == AIMMIDX && (uint)o.m.disp < (1<<15) && !(o.m.disp % 8);
+ case PMEMAREG:
+ return o.t == OMEM && o.m.mode == AREGIDX;
+ case PMEMPREPOST:
+ return o.t == OMEM && (o.m.mode == APREIDX || o.m.mode == APOSTIDX
+ || (o.m.mode == AIMMIDX && o.m.disp >= -256 && o.m.disp < 256));
+ }
+ assert(0);
+}
+
+/* code output helpers */
+#define W32(w) (wr32targ(*pcode, (w)), *pcode += 4)
+
+static uchar *fnstart;
+static internstr curfnsym;
+static bool usebp;
+static int rbpoff;
+
+/* Given an instruction description table, find the first entry that matches
+ * the operands (where dst, src are the operands in intel syntax order) and encode it */
+static void
+encode(uchar **pcode, const struct desc *tab, int ntab, enum irclass k, struct oper o[3])
+{
+ const struct desc *en = NULL;
+ for (int i = 0; i < ntab; ++i) {
+ if (!(tab[i].psiz & cls2siz[k])) continue;
+ for (int j = 0; j < 3; ++j)
+ if (!opermatch(tab[i].pt[j], k, o[j]))
+ goto Skip;
+ en = &tab[i];
+ break;
+ Skip:;
+ }
+ assert(en && "no match for instr");
+
+ uint sf = cls2siz[k] >> 3;
+ uint ins = en->opc, sh, nimmrs;
+ switch (en->operenc) {
+ default: assert(!"nyi enc");
+ case EN_ADDSUBSHFT3R:
+ case EN_LOGSHFT3R:
+ ins |= sf<<31 | o[2].shft<<22 | o[2].reg<<16 | o[2].shamt<<10 | o[1].reg<<5 | o[0].reg;
+ break;
+ case EN_ADDSUBIMM:
+ sh = o[2].imm > 0xFFF;
+ ins |= sf<<31 | sh<<22 | (o[2].uimm >> 12*sh)<<10 | o[1].reg<<5 | o[0].reg;
+ break;
+ case EN_LOGIMM:
+ assert(aarch64_logimm(&nimmrs, k, o[2].uimm));
+ ins |= sf<<31 | nimmrs<<10 | o[1].reg<<5 | o[0].reg;
+ break;
+ case EN_MOVEIMM:
+ sh = o[1].imm ? lowestsetbit(o[1].imm) / 16 : 0;
+ ins |= sf<<31 | sh<<21 | (o[1].uimm >> 16*sh)<<5 | o[0].reg;
+ break;
+ case EN_MEMAIMM: AImm:
+ ins |= o[1].m.disp<<10 | o[1].m.base<<5 | o[0].reg;
+ break;
+ case EN_MEMAIMMH: o[1].m.disp >>= 1; goto AImm;
+ case EN_MEMAIMMW: o[1].m.disp >>= 2; goto AImm;
+ case EN_MEMAIMMX: o[1].m.disp >>= 3; goto AImm;
+ case EN_MEMAPREPOST:
+ ins |= (o[1].m.disp&0x1FF)<<12 | o[1].m.base<<5 | o[0].reg;
+ if (o[1].m.mode == APREIDX) ins |= 3<<10;
+ else if (o[1].m.mode == APOSTIDX) ins |= 1<<10;
+ break;
+ case EN_MEMAREG:
+ assert(o[1].m.shamt <= 1);
+ ins |= o[1].m.index<<16 | o[1].m.ext<<13 | o[1].m.shamt<<12 | o[1].m.base<<5 | o[0].reg;
+ break;
+ case EN_MEMPPREPOST:
+ assert(o[2].m.disp % 8 == 0);
+ ins |= (o[2].m.disp/8&0x7F)<<15 | o[1].reg<<10 | o[2].m.base<<5 | o[0].reg;
+ if (o[2].m.mode == APREIDX) ins |= 3<<23;
+ else if (o[2].m.mode == APOSTIDX) ins |= 1<<23;
+ else ins |= 2<<23;
+ break;
+ }
+ W32(ins);
+}
+#define DEFINSTR1(X, ...) \
+ static void \
+ X(uchar **pcode, enum irclass k, struct oper a) \
+ { \
+ static const struct desc tab[] = { __VA_ARGS__ }; \
+ encode(pcode, tab, countof(tab), k, ((struct oper [3]){a})); \
+ }
+
+#define DEFINSTR2(X, ...) \
+ static void \
+ X(uchar **pcode, enum irclass k, struct oper op1, struct oper op2) \
+ { \
+ static const struct desc tab[] = { __VA_ARGS__ }; \
+ encode(pcode, tab, countof(tab), k, ((struct oper [3]){op1,op2})); \
+ }
+#define DEFINSTR3(X, ...) \
+ static void \
+ X(uchar **pcode, enum irclass k, struct oper op1, struct oper op2, struct oper op3) \
+ { \
+ static const struct desc tab[] = { __VA_ARGS__ }; \
+ encode(pcode, tab, countof(tab), k, ((struct oper [3]){op1,op2,op3})); \
+ }
+
+DEFINSTR3(Xadd,
+ {4|8, {PGPRSP, PGPRSP, PU12SL12}, 0x11000000, EN_ADDSUBIMM}, /* ADD (immediate) */
+ {4|8, {PGPRZ, PGPRZ, PGPRZSHFT}, 0x0B000000, EN_ADDSUBSHFT3R}, /* ADD (shifted register) */
+)
+DEFINSTR3(Xsub,
+ {4|8, {PGPRSP, PGPRSP, PU12SL12}, 0x51000000, EN_ADDSUBIMM}, /* SUB (immediate) */
+ {4|8, {PGPRZ, PGPRZ, PGPRZSHFT}, 0x4B000000, EN_ADDSUBSHFT3R}, /* SUB (shifted register) */
+)
+
+DEFINSTR3(Xand,
+ {4|8, {PGPRSP, PGPRZ, PLOGIMM}, 0x12000000, EN_LOGIMM}, /* AND (immediate) */
+ {4|8, {PGPRZ, PGPRZ, PGPRZSHFT}, 0x0A000000, EN_LOGSHFT3R}, /* AND (shifted register) */
+)
+DEFINSTR3(Xorr,
+ {4|8, {PGPRSP, PGPRZ, PLOGIMM}, 0x32000000, EN_LOGIMM}, /* ORR (immediate) */
+ {4|8, {PGPRZ, PGPRZ, PGPRZSHFT}, 0x2A000000, EN_LOGSHFT3R}, /* ORR (shifted register) */
+)
+DEFINSTR3(Xeor,
+ {4|8, {PGPRSP, PGPRZ, PLOGIMM}, 0x52000000, EN_LOGIMM}, /* EOR (immediate) */
+ {4|8, {PGPRZ, PGPRZ, PGPRZSHFT}, 0x4A000000, EN_LOGSHFT3R}, /* EOR (shifted register) */
+)
+
+static void
+Xubfm(uchar **pcode, enum irclass k, struct oper rd, struct oper rn, uint immr, uint imms)
+{
+ uint x = k != KI32;
+ uint nbit = x ? 64 : 32;
+ assert(opermatch(PGPRZ, k, rd) && opermatch(PGPRZ, k, rn) && immr < nbit && imms < nbit);
+ W32(x<<31 | 0x53000000 | x<<22 | immr<<16 | imms<<10 | rn.reg<<5 | rd.reg);
+}
+static void
+Xsbfm(uchar **pcode, enum irclass k, struct oper rd, struct oper rn, uint immr, uint imms)
+{
+ uint x = k != KI32;
+ uint nbit = x ? 64 : 32;
+ assert(opermatch(PGPRZ, k, rd) && opermatch(PGPRZ, k, rn) && immr < nbit && imms < nbit);
+ W32(x<<31 | 0x13000000 | x<<22 | immr<<16 | imms<<10 | rn.reg<<5 | rd.reg);
+}
+
+DEFINSTR2(Xmovz, {4|8, {PGPRZ, PU16SL16}, 0x52800000, EN_MOVEIMM}, /* MOVZ */)
+DEFINSTR2(Xmovn, {4|8, {PGPRZ, PU16SL16}, 0x12800000, EN_MOVEIMM}, /* MOVN */)
+DEFINSTR2(Xmovk, {4|8, {PGPRZ, PU16SL16}, 0x72800000, EN_MOVEIMM}, /* MOVK */)
+DEFINSTR2(Xldr,
+ {4, {PGPRZ, PMEMAIMMW}, 0xB9400000, EN_MEMAIMMW}, /* LDR (immediate) */
+ {8, {PGPRZ, PMEMAIMMX}, 0xF9400000, EN_MEMAIMMX},
+ {4, {PGPRZ, PMEMAREG}, 0xB8600800, EN_MEMAREG}, /* LDR (register) */
+ {8, {PGPRZ, PMEMAREG}, 0xF8600800, EN_MEMAREG},
+ {4, {PGPRZ, PMEMPREPOST}, 0xB8400000, EN_MEMAPREPOST}, /* LDR (immediate, (pre/postinc)) */
+ {8, {PGPRZ, PMEMPREPOST}, 0xF8400000, EN_MEMAPREPOST},
+)
+DEFINSTR2(Xldrsw,
+ {8, {PGPRZ, PMEMAIMMW}, 0xB9800000, EN_MEMAIMMW}, /* LDRSW (immediate) */
+// {8, {PGPRZ, PMEMAREG}, 0xB8A00800, EN_MEMAREG}, /* LDRSW (register) */
+ {8, {PGPRZ, PMEMPREPOST}, 0xB8800000, EN_MEMAPREPOST}, /* LDRSW (immediate, (pre/postinc)) */
+)
+DEFINSTR2(Xldrh,
+ {4|8, {PGPRZ, PMEMAIMMH}, 0x79400000, EN_MEMAIMMH}, /* LDRH (immediate) */
+ {4|8, {PGPRZ, PMEMAREG}, 0x78600800, EN_MEMAREG}, /* LDRH (register) */
+ {4|8, {PGPRZ, PMEMPREPOST}, 0x78400000, EN_MEMAPREPOST}, /* LDRH (immediate, (pre/postinc)) */
+)
+DEFINSTR2(Xldrsh,
+ {4, {PGPRZ, PMEMAIMMH}, 0x79C00000, EN_MEMAIMMH}, /* LDRSH (immediate) */
+ {8, {PGPRZ, PMEMAIMMH}, 0x79800000, EN_MEMAIMMH},
+ {4, {PGPRZ, PMEMAREG}, 0x78E00800, EN_MEMAREG}, /* LDRSH (register) */
+ {8, {PGPRZ, PMEMAREG}, 0x78A00800, EN_MEMAREG},
+ {4, {PGPRZ, PMEMPREPOST}, 0x78C00000, EN_MEMAPREPOST}, /* LDRSH (immediate, (pre/postinc)) */
+ {8, {PGPRZ, PMEMPREPOST}, 0x78800000, EN_MEMAPREPOST},
+)
+DEFINSTR2(Xldrb,
+ {4|8, {PGPRZ, PMEMAIMM}, 0x39400000, EN_MEMAIMM}, /* LDRB (immediate) */
+ {4|8, {PGPRZ, PMEMAREG}, 0x38600800, EN_MEMAREG}, /* LDRB (register) */
+ {4|8, {PGPRZ, PMEMPREPOST}, 0x38400000, EN_MEMAPREPOST}, /* LDRB (immediate, (pre/postinc)) */
+)
+DEFINSTR2(Xldrsb,
+ {4, {PGPRZ, PMEMAIMM}, 0x39C00000, EN_MEMAIMM}, /* LDRSB (immediate) */
+ {8, {PGPRZ, PMEMAIMM}, 0x39800000, EN_MEMAIMM},
+ {4, {PGPRZ, PMEMAREG}, 0x38E00800, EN_MEMAREG}, /* LDRSB (register) */
+ {8, {PGPRZ, PMEMAREG}, 0x38A00800, EN_MEMAREG},
+ {4, {PGPRZ, PMEMPREPOST}, 0x38C00000, EN_MEMAPREPOST}, /* LDRSB (immediate, (pre/postinc)) */
+ {8, {PGPRZ, PMEMPREPOST}, 0x38800000, EN_MEMAPREPOST},
+)
+DEFINSTR2(Xstr,
+ {4, {PGPRZ, PMEMAIMMW}, 0xB9000000, EN_MEMAIMMW}, /* STR (immediate) */
+ {8, {PGPRZ, PMEMAIMMX}, 0xF9000000, EN_MEMAIMMX},
+ {4, {PGPRZ, PMEMAREG}, 0xB8200800, EN_MEMAREG}, /* STR (register) */
+ {8, {PGPRZ, PMEMAREG}, 0xF8200800, EN_MEMAREG},
+ {4, {PGPRZ, PMEMPREPOST}, 0xB8000000, EN_MEMAPREPOST}, /* STR (immediate, (pre/postinc)) */
+ {8, {PGPRZ, PMEMPREPOST}, 0xF8000000, EN_MEMAPREPOST},
+)
+DEFINSTR2(Xstrh,
+ {4|8, {PGPRZ, PMEMAIMMH}, 0x79000000, EN_MEMAIMMH}, /* STRH (immediate) */
+ {4|8, {PGPRZ, PMEMAREG}, 0x78200800, EN_MEMAREG}, /* STRH (register) */
+ {4|8, {PGPRZ, PMEMPREPOST}, 0x78000000, EN_MEMAPREPOST}, /* STRH (immediate, (pre/postinc)) */
+)
+DEFINSTR2(Xstrb,
+ {4|8, {PGPRZ, PMEMAIMM}, 0x39000000, EN_MEMAIMM}, /* STRB (immediate) */
+ {4|8, {PGPRZ, PMEMAREG}, 0x38200800, EN_MEMAREG}, /* STRB (register) */
+ {4|8, {PGPRZ, PMEMPREPOST}, 0x38000000, EN_MEMAPREPOST}, /* STRB (immediate, (pre/postinc)) */
+)
+DEFINSTR3(Xldp,
+ {8, {PGPRZ, PGPRZ, PMEMPREPOST}, 0xA8400000, EN_MEMPPREPOST} /* LDP (immediate, (pre/postinc)) */
+)
+DEFINSTR3(Xstp,
+ {8, {PGPRZ, PGPRZ, PMEMPREPOST}, 0xA8000000, EN_MEMPPREPOST} /* STP (immediate, (pre/postinc)) */
+)
+static void
+Xcall(uchar **pcode, struct oper f)
+{
+ if (f.t == OSYM) {
+ objreloc(xcon2sym(f.con), REL_CALL26, Stext, *pcode - objout.textbegin, 0);
+ W32(0x94000000); /* BL <rel26> */
+ } else {
+ assert(opermatch(PGPRZ, KPTR, f));
+ }
+}
+
+static void
+gencopy(uchar **pcode, enum irclass cls, struct block *blk, int curi, struct oper dst, union ref val)
+{
+ if (kisint(cls) && dst.t == OREG && isintcon(val)) {
+ /* MOV r, #imm */
+ uvlong u = intconval(val);
+ if (~u <= 0xFFFF) {
+ /* immediate can be encoded with 1 MOVN instruction */
+ Xmovn(pcode, cls, dst, mkoper(OIMM, .imm = ~u));
+ } else if (u > 0xFFFF && aarch64_logimm(NULL, cls, u)) {
+ /* can be encoded as a logical immediate */
+ Xorr(pcode, cls, dst, REGZR, mkoper(OIMM, .uimm = u));
+ } else {
+ /* generate MOV (+ MOVKs) */
+ if (cls == KI32) u = (uint)u;
+ int s = 0;
+ while (s < 48 && (u >> s & 0xFFFF) == 0) s += 16;
+ Xmovz(pcode, cls, dst, mkoper(OIMM, .imm = u & (0xFFFFull << s)));
+ for (s += 16; s <= 48; s += 16) {
+ if ((u >> s) & 0xFFFF)
+ Xmovk(pcode, cls, dst, mkoper(OIMM, .imm = u & (0xFFFFull << s)));
+ }
+ }
+ } else if (dst.t == OREG && (val.t == RREG || val.t == RTMP)) {
+ Xorr(pcode, cls, dst, REGZR, ref2oper(val)); /* MOV Rd, Rn ==> ORR Rd, zr, Rn */
+ } else assert(0);
+}
+
+/* maps blk -> address when resolved; or to linked list of jump displacement
+ * relocations */
+static struct blkaddr {
+ bool resolved;
+ union {
+ uint addr;
+ uint relreloc;
+ };
+} *blkaddr;
+
+static void
+emitinstr(uchar **pcode, struct function *fn, struct block *blk, int curi, struct instr *ins)
+{
+ struct oper dst, o1, o2;
+ enum irclass cls = ins->cls;
+ void (*X3)(uchar **, enum irclass, struct oper, struct oper, struct oper) = NULL;
+ void (*X2)(uchar **, enum irclass, struct oper, struct oper) = NULL;
+
+ switch (ins->op) {
+ default: assert(!"nyi");
+ case Onop: break;
+ case Omove:
+ dst = ref2oper(ins->l);
+ gencopy(pcode, cls, blk, curi, dst, ins->r);
+ break;
+ case Oextu32: cls = KI32;
+ /* fallthru */
+ case Ocopy:
+ dst = reg2oper(ins->reg-1);
+ gencopy(pcode, cls, blk, curi, dst, ins->l);
+ break;
+ case Oneg: /* NEG Rd, Rn ==> SUB Rd, zr, Rn */
+ Xsub(pcode, cls, reg2oper(ins->reg-1), REGZR, ref2oper(ins->l));
+ break;
+ case Oexts8: case Oexts16: case Oexts32: /* SXTB/H/W Rd, Rn ==> SBFM Rd, Rn, #0, #7/15/31 */
+ Xsbfm(pcode, cls, reg2oper(ins->reg-1), ref2oper(ins->l), 0, (8<<(ins->op-Oexts8)/2)-1);
+ break;
+ case Oextu8: case Oextu16: /* UXTB/H Rd, Rn ==> UBFM Rd, Rn, #0, #7/15 */
+ Xubfm(pcode, cls, reg2oper(ins->reg-1), ref2oper(ins->l), 0, (8<<(ins->op-Oexts8)/2)-1);
+ break;
+ case Oadd: dst = reg2oper(ins->reg-1); X3 = Xadd; goto ALU3;
+ case Osub: dst = reg2oper(ins->reg-1); X3 = Xsub; goto ALU3;
+ case Oand: dst = reg2oper(ins->reg-1); X3 = Xand; goto ALU3;
+ case Oior: dst = reg2oper(ins->reg-1); X3 = Xorr; goto ALU3;
+ case Oxor: dst = reg2oper(ins->reg-1); X3 = Xeor; goto ALU3;
+ ALU3:
+ X3(pcode, cls, dst, ref2oper(ins->l), ref2oper(ins->r));
+ break;
+ case Oshl:
+ if (ins->r.t == RICON) {
+ uint nbit = cls == KI32 ? 32 : 64, s = ins->r.i & nbit-1;
+ assert(s > 0);
+ Xubfm(pcode, cls, reg2oper(ins->reg-1), ref2oper(ins->l), nbit-s, nbit-s-1);
+ } else assert(!"nyi lslv");
+ break;
+ case Oslr:
+ if (ins->r.t == RICON) {
+ uint nbit = cls == KI32 ? 32 : 64, s = ins->r.i & nbit-1;
+ assert(s > 0);
+ Xubfm(pcode, cls, reg2oper(ins->reg-1), ref2oper(ins->l), s, nbit-1);
+ } else assert(!"nyi lsrv");
+ break;
+ case Osar:
+ if (ins->r.t == RICON) {
+ uint nbit = cls == KI32 ? 32 : 64, s = ins->r.i & nbit-1;
+ assert(s > 0);
+ Xsbfm(pcode, cls, reg2oper(ins->reg-1), ref2oper(ins->l), s, nbit-1);
+ } else assert(!"nyi lsrv");
+ break;
+ case Oloadu8: X2 = Xldrb; goto Load;
+ case Oloads8: X2 = Xldrsb; goto Load;
+ case Oloadu16: X2 = Xldrh; goto Load;
+ case Oloads16: X2 = Xldrsh; goto Load;
+ case Oloads32:
+ if (cls != KI32) {
+ X2 = Xldrsw;
+ goto Load;
+ }
+ /* fallthru */
+ case Oloadu32:
+ cls = KI32;
+ /* fallthru */
+ case Oloadi64: X2 = Xldr;
+ Load:
+ X2(pcode, cls, reg2oper(ins->reg-1), mkmemoper(8<<(ins->op - Oloads8)/2, ins->l));
+ break;
+ case Ostore8: cls = KI32; X2 = Xstrb; goto Store;
+ case Ostore16: cls = KI32; X2 = Xstrh; goto Store;
+ case Ostore32: cls = KI32; X2 = Xstr; goto Store;
+ case Ostore64: cls = KI64; X2 = Xstr;
+ Store:
+ X2(pcode, cls, ref2oper(ins->r), mkmemoper(8<<(ins->op-Ostore8), ins->l));
+ break;
+ case Ocall:
+ Xcall(pcode, ref2oper(ins->l));
+ break;
+ }
+}
+
+static bool
+calleesave(int *npush, uchar **pcode, struct function *fn)
+{
+ regset usage = (fn->regusage & mctarg->rcallee) | (usebp * BIT(FP)) | (!fn->isleaf * BIT(LR));
+ if (!usage) return 0;
+ int prev = 0;
+ for (uint reg = R(19); reg <= LR; ++reg) {
+ if (!rstest(usage, reg)) continue;
+ if (prev) {
+ *npush += 2;
+ Xstp(pcode, KPTR, reg2oper(prev), reg2oper(reg),
+ mkoper(OMEM, .m = {.mode = APREIDX, .base = SP, .disp = -16}));
+ prev = 0;
+ } else prev = reg;
+ }
+ if (prev) {
+ Xstp(pcode, KPTR, reg2oper(prev), REGZR,
+ mkoper(OMEM, .m = {.mode = APREIDX, .base = SP, .disp = -16}));
+ *npush += 2;
+ }
+ return 1;
+}
+
+static void
+calleerestore(uchar **pcode, struct function *fn)
+{
+ regset usage = (fn->regusage & mctarg->rcallee) | (usebp * BIT(FP)) | (!fn->isleaf * BIT(LR));
+ if (!usage) return;
+ int prev = 0;
+ for (uint reg = LR; reg >= R(19); --reg) {
+ if (!rstest(usage, reg)) continue;
+ if (prev) {
+ Xldp(pcode, KPTR, reg2oper(reg), reg2oper(prev),
+ mkoper(OMEM, .m = {.mode = APOSTIDX, .base = SP, .disp = 16}));
+ prev = 0;
+ } else prev = reg;
+ }
+ if (prev) {
+ Xldp(pcode, KPTR, REGZR, reg2oper(prev),
+ mkoper(OMEM, .m = {.mode = APOSTIDX, .base = SP, .disp = 16}));
+ }
+}
+
+static void
+emitbin(struct function *fn)
+{
+ struct block *blk;
+ uchar **pcode = &objout.code;
+ int npush = 0;
+ bool saverestore;
+
+ fnstart = *pcode;
+ curfnsym = fn->name;
+
+ /** prologue **/
+
+ /* only use frame pointer in non-leaf functions and functions that use the stack */
+ usebp = 0;
+ if (!fn->isleaf || fn->stksiz) {
+ usebp = 1;
+ }
+ saverestore = calleesave(&npush, pcode, fn);
+
+ /* ensure stack is 16-byte aligned for function calls */
+ if (!fn->isleaf && ((fn->stksiz + npush*8) & 0xF) != 0) {
+ assert(usebp);
+ if ((rbpoff & 0xF) == 0) {
+ rbpoff -= 16;
+ fn->stksiz += 24;
+ } else {
+ rbpoff -= 8;
+ fn->stksiz += 8;
+ }
+ }
+
+ if (fn->stksiz != 0) {
+ }
+
+ if (*pcode - fnstart > 6) {
+ /* largue prologue -> largue epilogue -> transform to use single exit point */
+ struct block *exit = NULL;
+ blk = fn->entry->lprev;
+ do {
+ if (blk->jmp.t == Jret) {
+ if (!exit) {
+ if (blk->ins.n == 0) {
+ exit = blk;
+ continue;
+ } else {
+ exit = newblk(fn);
+ exit->lnext = blk->lnext;
+ exit->lprev = blk;
+ blk->lnext = exit;
+ exit->lnext->lprev = exit;
+ exit->id = fn->nblk++;
+ exit->jmp.t = Jret;
+ }
+ }
+ blk->jmp.t = Jb;
+ memset(blk->jmp.arg, 0, sizeof blk->jmp.arg);
+ blk->s1 = exit;
+ } else if (exit) {
+ /* thread jumps to the exit block */
+ if (blk->s1 && !blk->s1->ins.n && blk->s1->s1 == exit && !blk->s1->s2) blk->s1 = exit;
+ if (blk->s2 && !blk->s2->ins.n && blk->s2->s1 == exit && !blk->s2->s2) blk->s2 = exit;
+ }
+ } while ((blk = blk->lprev) != fn->entry);
+ }
+
+ blkaddr = allocz(fn->passarena, fn->nblk * sizeof *blkaddr, 0);
+
+ blk = fn->entry;
+ do {
+ struct blkaddr *bb = &blkaddr[blk->id];
+ uint bbaddr = *pcode - objout.textbegin;
+ assert(!bb->resolved);
+ while (bb->relreloc) {
+ uint next;
+ int disp = bbaddr - bb->relreloc - 4;
+
+ //memcpy(&next, objout.textbegin + bb->relreloc, 4);
+ //wr32le(objout.textbegin + bb->relreloc, disp);
+ bb->relreloc = next;
+ }
+ bb->resolved = 1;
+ bb->addr = bbaddr;
+
+ for (int i = 0; i < blk->ins.n; ++i) {
+ emitinstr(pcode, fn, blk, i, &instrtab[blk->ins.p[i]]);
+ }
+ if (blk->jmp.t == Jret) {
+ /* epilogue */
+ if (saverestore)
+ calleerestore(pcode, fn);
+ W32(0xD65F03C0); /* RET */
+ } else if (blk->jmp.t == Jtrap) {
+ W32(0xD4200020); /* BRK #0x1 */
+ } else ;//emitbranch(pcode, blk);
+ } while ((blk = blk->lnext) != fn->entry);
+ objdeffunc(fn->name, fn->globl, fnstart - objout.textbegin, *pcode - fnstart);
+}
+
+void
+aarch64_emit(struct function *fn)
+{
+ fn->stksiz = alignup(fn->stksiz, 8);
+ if (fn->stksiz > 1<<24) error(NULL, "'%s' stack frame too big", fn->name);
+ emitbin(fn);
+}
+
+/* vim:set ts=3 sw=3 expandtab: */
diff --git a/aarch64/isel.c b/aarch64/isel.c
new file mode 100644
index 0000000..a61fa21
--- /dev/null
+++ b/aarch64/isel.c
@@ -0,0 +1,440 @@
+#include "all.h"
+
+/* map alloca tmp -> stack frame displacement (0 if not alloca) */
+static ushort *stkslots;
+static uint nstkslots;
+
+#define isstkslot(r) ((r).t == RTMP && (r).i < nstkslots && stkslots[(r).i])
+#define isimm32(r) (iscon(r) && concls(r) == KI32)
+
+static void
+picfixsym(union ref *r, struct block *blk, int *curi)
+{
+ if (!ccopt.pic || !isaddrcon(*r,0)) return;
+ *r = insertinstr(blk, (*curi)++, mkinstr(Ocopy, KPTR, .l = *r));
+}
+
+static inline uint
+clz(uvlong x)
+{
+#if HAS_BUILTIN(clzll)
+ return __builtin_clzll(x);
+#else
+ int i = 0;
+ for (uvlong mask = BIT(63);; ++i, mask >>= 1)
+ if (x & mask)
+ break;
+ return i;
+#endif
+}
+
+/* Encode logical immediate */
+bool
+aarch64_logimm(uint *enc, enum irclass k, uvlong x)
+{
+ /* https://github.com/v8/v8/blob/927ccc6076e25a614787c7011315468e40fe39a4/src/codegen/arm64/assembler-arm64.cc#L4409 */
+ if (k == KI32) x = (uint)x | x << 32;
+ bool neg;
+ if ((neg = x & 1)) x = ~x;
+ if (x == 0) return 0;
+ uvlong a = x & (~x + 1),
+ xa = x + a,
+ b = xa & (~xa + 1),
+ xa_b = xa - b,
+ c = xa_b & (~xa_b + 1),
+ mask;
+ uint clza = clz(a),
+ d, outn;
+ if (c != 0) {
+ d = clza - clz(c);
+ mask = BIT(d) - 1;
+ outn = 0;
+ } else {
+ assert(a != 0);
+ d = 64;
+ mask = ~0ull;
+ outn = 1;
+ }
+ if (!ispo2(d)) return 0;
+ if (((b - a) & ~mask) != 0) return 0;
+ static const uvlong M[] = {
+ 0x0000000000000001, 0x0000000100000001, 0x0001000100010001,
+ 0x0101010101010101, 0x1111111111111111, 0x5555555555555555,
+ };
+ int i = clz(d) - 57;
+ assert((uint)i < countof(M));
+ uvlong m = M[i];
+ uvlong y = (b - a) * m;
+ if (y != x) return 0;
+ if (enc) {
+ int clzb = b == 0 ? -1 : clz(b),
+ s = clza - clzb,
+ r;
+ if (neg) {
+ s = d - s;
+ r = (clzb + 1) & (d - 1);
+ } else {
+ r = (clza + 1) & (d - 1);
+ }
+ *enc = outn<<12 | r<<6 | (((-d * 2) | (s - 1)) & 0x3F);
+ }
+ return 1;
+
+}
+
+static void
+fixarg(union ref *r, struct instr *ins, struct block *blk, int *curi)
+{
+ enum op op = ins ? ins->op : 0;
+ if (isintcon(ins->r)) {
+ vlong x = intconval(ins->r);
+ switch (op) {
+ default:
+ if (oiscmp(op)) {
+ case Oadd: case Osub:
+ /* imm12 (lsl 12) */
+ if ((x &~ 0xFFF) == 0 || (x &~ 0xFFF000) == 0) return;
+ break;
+ case Oshl: case Osar: case Oslr:
+ if ((uvlong)x < (ins->cls == KI32 ? 32 : 64)) return;
+ break;
+ case Oand: case Oior: case Oxor:
+ if (aarch64_logimm(NULL, ins->cls, x)) return;
+ break;
+ }
+ }
+ goto Copy;
+ } else if (isstkslot(*r)) {
+ struct instr adr = mkinstr(Oadd, KPTR, mkref(RREG, FP), mkintcon(KI32, -stkslots[r->i]));
+ if (ins && ins->op == Ocopy)
+ *ins = adr;
+ else
+ *r = insertinstr(blk, (*curi)++, adr);
+ } else if (r->t != RTMP) Copy: {
+ *r = insertinstr(blk, (*curi)++, mkinstr(Ocopy, r->t == RTMP ? instrtab[r->i].cls : ins->cls ? ins->cls : KI32, *r));
+ }
+}
+
+static bool
+arithfold(struct instr *ins)
+{
+ if (isnumcon(ins->l) && (!ins->r.t || isnumcon(ins->r))) {
+ union ref r;
+ bool ok = ins->r.t ? foldbinop(&r, ins->op, ins->cls, ins->l, ins->r) : foldunop(&r, ins->op, ins->cls, ins->l);
+ assert(ok && "fold?");
+ *ins = mkinstr(Ocopy, insrescls(*ins), r);
+ return 1;
+ }
+ return 0;
+}
+
+static void
+selcall(struct function *fn, struct instr *ins, struct block *blk, int *curi)
+{
+ const struct call *call = &calltab.p[ins->r.i];
+ int iarg = *curi - 1;
+ enum irclass cls;
+ uint argstksiz = alignup(call->argstksiz, 16);
+
+ for (int i = call->narg - 1; i >= 0; --i) {
+ struct abiarg abi = call->abiarg[i];
+ struct instr *arg;
+ for (;; --iarg) {
+ assert(iarg >= 0 && i >= 0 && "arg?");
+ if ((arg = &instrtab[blk->ins.p[iarg]])->op == Oarg)
+ break;
+ }
+
+ if (!abi.isstk) {
+ assert(!abi.ty.isagg);
+ *arg = mkinstr(Omove, call->abiarg[i].ty.cls, mkref(RREG, abi.reg), arg->r);
+ } else {
+ union ref adr = mkaddr((struct addr){mkref(RREG, SP), .disp = abi.stk});
+ int iargsave = iarg;
+ if (!abi.ty.isagg) { /* scalar arg in stack */
+ *arg = mkinstr(Ostore8+ilog2(cls2siz[abi.ty.cls]), 0, adr, arg->r);
+ if (isaddrcon(arg->r,1) || arg->r.t == RADDR)
+ arg->r = insertinstr(blk, iarg++, mkinstr(Ocopy, abi.ty.cls, arg->r));
+ else
+ fixarg(&ins->r, ins, blk, &iarg);
+ } else { /* aggregate arg in stack, callee stack frame destination address */
+ *arg = mkinstr(Ocopy, KPTR, adr);
+ }
+ *curi += iarg - iargsave;
+ }
+ }
+ if (call->argstksiz) {
+ union ref disp = mkref(RICON, argstksiz);
+ insertinstr(blk, iarg--, (struct instr){Osub, KPTR, .keep=1, .reg = SP+1, .l=mkref(RREG,SP), disp});
+ ++*curi;
+ insertinstr(blk, *curi+1, (struct instr){Oadd, KPTR, .keep=1, .reg = SP+1, .l=mkref(RREG,SP), disp});
+ }
+ if (isimm32(ins->l))
+ ins->l = mkaddr((struct addr){.base = ins->l});
+ else if (isintcon(ins->l))
+ ins->l = insertinstr(blk, (*curi)++, mkinstr(Ocopy, KPTR, ins->l));
+
+ cls = ins->cls;
+ ins->cls = 0;
+ if (cls) {
+ /* duplicate to reuse same TMP ref */
+ insertinstr(blk, (*curi)++, *ins);
+ *ins = mkinstr(Ocopy, cls, mkref(RREG, call->abiret[0].reg));
+ for (int i = 1; i <= 2; ++i) {
+ if (*curi + i >= blk->ins.n) break;
+ if (instrtab[blk->ins.p[*curi + i]].op == Ocall2r) {
+ ins = &instrtab[blk->ins.p[*curi += i]];
+ *ins = mkinstr(Ocopy, ins->cls, mkref(RREG, call->abiret[1].reg));
+ break;
+ }
+ }
+ }
+}
+
+static bool
+aimm(struct addr *addr, int disp)
+{
+ if (addr->index.bits) return 0;
+ vlong a = addr->disp;
+ a += disp;
+ if ((int)a == a) {
+ addr->disp = a;
+ return 1;
+ }
+ return 0;
+}
+
+static bool
+ascale(struct addr *addr, union ref a, union ref b, uint siz/*1,2,4,8*/)
+{
+ if (b.t != RICON) return 0;
+ if (addr->index.bits || addr->disp) return 0;
+ if ((unsigned)b.i > 3 || 1<<b.i != siz) return 0;
+ if (a.t == RREG || a.t == RTMP) {
+ addr->index = a;
+ addr->shift = b.i;
+ return 1;
+ }
+ return 0;
+}
+
+static bool
+aadd(struct addr *addr, struct block *blk, int *curi, union ref r, uint siz/*1,2,4,8*/)
+{
+ if (r.t == RSTACK) {
+ if (addr->base.bits || addr->index.bits || !aimm(addr, -r.i)) goto Ref;
+ addr->base = mkref(RREG, FP);
+ } else if (r.t == RTMP) {
+ struct instr *ins = &instrtab[r.i];
+ if (ins->op == Oadd) {
+ if (!aadd(addr, blk, curi, ins->l, siz)) goto Ref;
+ if (!aadd(addr, blk, curi, ins->r, siz)) goto Ref;
+ ins->skip = 1;
+ } else if (ins->op == Osub) {
+ if (!aadd(addr, blk, curi, ins->l, siz)) goto Ref;
+ if (!isintcon(ins->r)) goto Ref;
+ if (!aimm(addr, -intconval(ins->r))) goto Ref;
+ ins->skip = 1;
+ } else if (ins->op == Oshl) {
+ if (!ascale(addr, ins->l, ins->r, siz)) goto Ref;
+ ins->skip = 1;
+ } else if (ins->op == Ocopy) {
+ if (!aadd(addr, blk, curi, ins->l, siz)) goto Ref;
+ ins->skip = 1;
+ } else goto Ref;
+ } else if (isnumcon(r)) {
+ assert(isintcon(r));
+ return aimm(addr, intconval(r));
+ } else if (isaddrcon(r,1)) {
+ if (!addr->base.bits && !isaddrcon(addr->index,1)) addr->base = r;
+ else return 0;
+ } else if (r.t == RREG) {
+ /* temporaries are single assignment, but register aren't, so they can't be *
+ * safely hoisted into an address value, unless they have global lifetime */
+ if (!rstest(mctarg->rglob, r.i)) return 0;
+ Ref:
+ if (r.t == RSTACK && (addr->base.bits || addr->index.bits)) {
+ r = insertinstr(blk, (*curi)++, mkinstr(Oadd, KPTR, mkref(RREG, FP), mkref(RICON, -r.i)));
+ }
+ if (!addr->base.bits) addr->base = r;
+ else if (!addr->index.bits) addr->index = r;
+ else return 0;
+ } else return 0;
+ return 1;
+}
+
+static bool
+fuseaddr(union ref *r, struct block *blk, int *curi, uint siz/*1,2,4,8*/)
+{
+ struct addr addr = {0};
+
+ if (isaddrcon(*r,1)) return 1;
+
+ if (r->t != RSTACK && r->t != RTMP) return 0;
+ if (!aadd(&addr, blk, curi, *r, siz)) return 0;
+ if (isaddrcon(addr.base,0) && (ccopt.pic || (ccopt.pie && addr.index.bits) || (conht[addr.base.i].flag & SFUNC))) {
+ /* pic needs to load from GOT */
+ /* pie cannot encode RIP-relative address with index register */
+ /* first load symbol address into a temp register */
+ union ref temp = mkaddr((struct addr){.base = addr.base, .disp = ccopt.pic ? 0 : addr.disp});
+ addr.base = insertinstr(blk, (*curi)++, mkinstr(Ocopy, KPTR, .l = temp));
+ if (!ccopt.pic) addr.disp = 0;
+ }
+ if (!(addr.disp >= -256 && addr.disp < 256) /* for 9-bit signed unscaled offset */
+ && !(!(addr.disp & (siz-1)) && (uvlong)addr.disp < (1<<12)*siz)) /* 12-bit unsigned scaled offset */
+ return 0;
+ *r = mkaddr(addr);
+ return 1;
+}
+
+
+static void
+loadstoreaddr(struct block *blk, union ref *r, int *curi, uint siz)
+{
+ if (isimm32(*r)) {
+ *r = mkaddr((struct addr){.base = *r});
+ } else if (isaddrcon(*r, 0)) {
+ picfixsym(r, blk, curi);
+ } else if (r->t == RTMP || r->t == RSTACK) {
+ fuseaddr(r, blk, curi, siz);
+ } else if (r->t != RREG) {
+ *r = insertinstr(blk, (*curi)++, mkinstr(Ocopy, KPTR, *r));
+ }
+}
+
+static void
+sel(struct function *fn, struct instr *ins, struct block *blk, int *curi)
+{
+ uint siz, alignlog2;
+ int t = ins - instrtab;
+ struct instr temp = {0};
+ enum op op = ins->op;
+
+ if (oisarith(ins->op) && arithfold(ins)) {
+ fixarg(&ins->l, ins, blk, curi);
+ return;
+ }
+
+ switch (op) {
+ //default: assert(0);
+ case Onop: break;
+ case Oalloca1: case Oalloca2: case Oalloca4: case Oalloca8: case Oalloca16:
+ alignlog2 = ins->op - Oalloca1;
+ assert(ins->l.i > 0);
+ siz = ins->l.i << alignlog2;
+ fn->stksiz += siz;
+ fn->stksiz = alignup(fn->stksiz, 1 << alignlog2);
+ if (fn->stksiz > (1<<16)-1) error(NULL, "'%s' stack frame too big", fn->name);
+ stkslots[t] = fn->stksiz;
+ *ins = mkinstr(Onop,0,);
+ break;
+ case Oparam:
+ assert(ins->l.t == RICON && ins->l.i < fn->nabiarg);
+ if (!fn->abiarg[ins->l.i].isstk)
+ *ins = mkinstr(Ocopy, ins->cls, mkref(RREG, fn->abiarg[ins->l.i].reg));
+ else /* stack */
+ *ins = mkinstr(Oadd, KPTR, mkref(RREG, FP), mkref(RICON, 16+fn->abiarg[ins->l.i].stk));
+ break;
+ case Oadd: case Osub:
+ if (ins->r.t == RICON && ins->r.i < 0) {
+ op = ins->op ^= 1;
+ ins->r.i = -ins->r.i;
+ }
+ fixarg(&ins->l, ins, blk, curi);
+ fixarg(&ins->r, ins, blk, curi);
+ break;
+ case Oand: case Oior: case Oxor:
+ case Oshl: case Osar: case Oslr:
+ fixarg(&ins->r, ins, blk, curi);
+ break;
+ case Oarg:
+ fixarg(&ins->r, ins, blk, curi);
+ break;
+ case Ocall:
+ selcall(fn, ins, blk, curi);
+ break;
+ case Oloads8: case Oloadu8: case Oloads16: case Oloadu16:
+ case Oloads32: case Oloadu32: case Oloadi64:
+ loadstoreaddr(blk, &ins->l, curi, 1<<((op - Oloads8)/2));
+ break;
+ case Ostore8: case Ostore16: case Ostore32: case Ostore64:
+ loadstoreaddr(blk, &ins->l, curi, 1<<(op - Ostore8));
+ fixarg(&ins->r, ins, blk, curi);
+ break;
+ }
+}
+
+static void
+seljmp(struct function *fn, struct block *blk)
+{
+ if (blk->jmp.t == Jb && blk->jmp.arg[0].bits) {
+ int curi = blk->ins.n;
+ fixarg(&blk->jmp.arg[0], NULL, blk, &curi);
+ union ref c = blk->jmp.arg[0];
+ if (c.t != RTMP) {
+ enum irclass cls = c.t == RICON ? KI32 : c.t == RXCON && conht[c.i].cls ? conht[c.i].cls : KPTR;
+ int curi = blk->ins.n;
+
+ c = insertinstr(blk, blk->ins.n, mkinstr(Ocopy, cls, c));
+ sel(fn, &instrtab[c.i], blk, &curi);
+ }
+ if (!oiscmp(instrtab[c.i].op)) {
+ struct instr *ins;
+ int curi = blk->ins.n;
+ blk->jmp.arg[0] = insertinstr(blk, blk->ins.n, mkinstr(Oneq, insrescls(instrtab[c.i]), c, ZEROREF));
+ ins = &instrtab[blk->jmp.arg[0].i];
+ if (kisflt(ins->cls)) {
+ ins->r = insertinstr(blk, curi, mkinstr(Ocopy, ins->cls, ZEROREF));
+ }
+ ins->keep = 1;
+ }
+ } else if (blk->jmp.t == Jret) {
+ if (blk->jmp.arg[0].bits) {
+ int curi;
+ union ref r = mkref(RREG, fn->abiret[0].reg);
+ struct instr *ins = &instrtab[insertinstr(blk, blk->ins.n, mkinstr(Omove, fn->abiret[0].ty.cls, r, blk->jmp.arg[0])).i];
+ curi = blk->ins.n-1;
+ fixarg(&ins->r, ins, blk, &curi);
+ blk->jmp.arg[0] = r;
+ if (blk->jmp.arg[1].bits) {
+ r = mkref(RREG, fn->abiret[1].reg);
+ ins = &instrtab[insertinstr(blk, blk->ins.n, mkinstr(Omove, fn->abiret[1].ty.cls, r, blk->jmp.arg[1])).i];
+ }
+ }
+ }
+}
+
+void
+aarch64_isel(struct function *fn)
+{
+ extern int ninstr;
+ struct block *blk = fn->entry;
+
+ fn->stksiz = 0;
+ stkslots = allocz(fn->passarena, (nstkslots = ninstr) * sizeof *stkslots, 0);
+ do {
+ int i;
+ for (i = 0; i < blk->phi.n; ++i) {
+ struct instr *ins = &instrtab[blk->phi.p[i]];
+ union ref *phi = phitab.p[ins->l.i];
+ for (int i = 0; i < blk->npred; ++i) {
+ int curi = blkpred(blk, i)->ins.n;
+ fixarg(&phi[i], ins, blkpred(blk, i), &curi);
+ }
+ }
+ for (i = 0; i < blk->ins.n; ++i) {
+ struct instr *ins = &instrtab[blk->ins.p[i]];
+ sel(fn, ins, blk, &i);
+ }
+ seljmp(fn, blk);
+ } while ((blk = blk->lnext) != fn->entry);
+
+ if (ccopt.dbg.i) {
+ bfmt(ccopt.dbgout, "<< After isel >>\n");
+ irdump(fn);
+ }
+
+ fn->prop = 0;
+}
+
+/* vim:set ts=3 sw=3 expandtab: */
diff --git a/c/lex.c b/c/lex.c
index d6aa95a..63b066b 100644
--- a/c/lex.c
+++ b/c/lex.c
@@ -2065,6 +2065,10 @@ addpredefmacros(struct arena **tmparena)
putdef1("__x86_64__");
putdef1("__x86_64");
break;
+ case ISaarch64:
+ putdef1("__aarch64");
+ putdef1("__aarch64__");
+ break;
}
if (target.os != OSunknown) putdef1("__STDC_HOSTED__");
diff --git a/common.h b/common.h
index 9804447..ac4bc88 100644
--- a/common.h
+++ b/common.h
@@ -73,7 +73,7 @@ ptrhash(const void *p) {
static inline uint
popcnt(uvlong x) {
#if HAS_BUILTIN(popcountll)
- return __builtin_popcountll(x);
+ return x ? __builtin_popcountll(x) : 0;
#else
uint n = 0;
while (x) n += x&1, x >>= 1;
@@ -86,7 +86,7 @@ ispo2(uvlong x) {
}
static inline uint
ilog2(uvlong x) { /* assumes x is a power of 2 */
-#if HAS_BUILTIN(ctz)
+#if HAS_BUILTIN(ctzll)
return __builtin_ctzll(x);
#else
uint n = 0;
@@ -97,7 +97,7 @@ ilog2(uvlong x) { /* assumes x is a power of 2 */
static inline uint
lowestsetbit(uvlong x)
{
-#if HAS_BUILTIN(ctz)
+#if HAS_BUILTIN(ctzll)
return __builtin_ctzll(x);
#else
int i = 0;
@@ -163,7 +163,7 @@ extern struct inclpaths {
/**********/
struct targtriple {
- enum mcarch { ISxxx, ISx86_64 } arch;
+ enum mcarch { ISxxx, ISx86_64, ISaarch64 } arch;
enum mcos { OSunknown, OSlinux } os;
enum mcabi { ABInone, ABIgnu, ABImusl } abi;
};
diff --git a/obj/elf.c b/obj/elf.c
index 512e710..e39a8d8 100644
--- a/obj/elf.c
+++ b/obj/elf.c
@@ -49,6 +49,7 @@ elfinit(void)
switch (target.arch) {
default: assert(!"arch?");
case ISx86_64: hdr.h32.machine = EM_X86_64; break;
+ case ISaarch64: hdr.h32.machine = EM_ARM64; break;
}
hdr.h32.version = ELFVERSION;
if (targ_64bit) {
@@ -142,6 +143,14 @@ static const ushort relktab[][NRELOCKIND] = {
[REL_PLT32] = 4, /* R_X86_64_PLT32 */
[REL_GOTPCRELX] = 41, /* R_X86_64_GOTPCRELX */
[REL_GOTPCRELX_REX] = 42, /* R_X86_64_REX_GOTPCRELX */
+ },
+ [ISaarch64] = {
+ [REL_ABS64] = 257, /* R_AARCH64_ABS64 */
+ [REL_ABS32] = 258, /* R_AARCH64_ABS32 */
+ [REL_ABS32S] = 258, /* R_AARCH64_ABS32S */
+ [REL_PCREL32] = 261, /* R_AARCH64_PREL2 */
+ [REL_PLT32] = 314, /* R_AARCH64_PLT32 */
+ [REL_CALL26] = 283, /* R_AARCH64_CALL26 */
}
};
@@ -363,7 +372,7 @@ wordalign(struct wbuf *out, int align)
while (off++ & (align - 1)) ioputc(out, 0);
}
-static const bool userelatab[] = { [ISx86_64] = 1 };
+static const bool userelatab[] = { [ISx86_64] = 1, [ISaarch64] = 1 };
void
elffini(struct wbuf *out)
diff --git a/obj/obj.h b/obj/obj.h
index 77da99a..985e583 100644
--- a/obj/obj.h
+++ b/obj/obj.h
@@ -17,6 +17,7 @@ enum relockind {
REL_PLT32,
REL_GOTPCRELX,
REL_GOTPCRELX_REX,
+ REL_CALL26,
NRELOCKIND,
};
enum section { Snone, Stext, Srodata, Sdata, Sbss };
diff --git a/targ.c b/targ.c
index c715ed7..515bda3 100644
--- a/targ.c
+++ b/targ.c
@@ -1,7 +1,7 @@
#include "common.h"
#include "type.h"
-extern const struct mctarg t_x86_64_sysv;
+extern const struct mctarg t_x86_64_sysv, t_aarch64_aapcs;
static const struct targ {
struct { enum mcarch arch; uint oss, abis; };
struct { uchar longsize, vlongsize, ptrsize, valistsize; };
@@ -10,7 +10,8 @@ static const struct targ {
uchar sizetype, ptrdifftype, wchartype;
const struct mctarg *mctarg;
} targs[] = {
- { {ISx86_64, -1, 1<<ABIgnu | 1<<ABImusl}, {8,8,8,24}, {8,8,8,8}, 1, TYULONG, TYLONG, TYINT, &t_x86_64_sysv },
+ { {ISx86_64, -1, 1<<ABIgnu | 1<<ABImusl}, {8,8,8,24}, {8,8,8,8}, 1, TYULONG, TYLONG, TYINT, &t_x86_64_sysv },
+ { {ISaarch64, -1, 1<<ABIgnu | 1<<ABImusl}, {8,8,8,32}, {8,8,8,8}, 0, TYULONG, TYLONG, TYUINT, &t_aarch64_aapcs },
};
struct targtriple target;
@@ -37,9 +38,11 @@ matchstr(const char **s, const char *pat)
static bool
parsetriple(struct targtriple *trg, const char *str)
{
- if (matchstr(&str, "x86_64-")) {
+ if (matchstr(&str, "x86_64-"))
trg->arch = ISx86_64;
- } else return 0;
+ else if (matchstr(&str, "aarch64-") || matchstr(&str, "arm64-"))
+ trg->arch = ISaarch64;
+ else return 0;
if (matchstr(&str, "unknown-") || matchstr(&str, "pc-")) {}
@@ -64,7 +67,7 @@ parsetriple(struct targtriple *trg, const char *str)
void
targ_init(const char *starg)
{
- const struct targ *t = &targs[0];
+ const struct targ *t = NULL;
uchar *sizes = targ_primsizes, *align = targ_primalign;
if (!starg) {
@@ -75,6 +78,16 @@ targ_init(const char *starg)
fatal(NULL, "unrecognized target: %s", starg);
}
+ for (size_t i = 0; i < countof(targs); ++i) {
+ if (targs[i].arch == target.arch)
+ if (targs[i].oss & (1 << target.os))
+ if (targs[i].abis & (1 << target.abi)) {
+ t = &targs[i];
+ break;
+ }
+ }
+ if (!t) fatal(NULL, "unsupported target: %s", starg ? starg : "(host)");
+
sizes[TYBOOL] = sizes[TYCHAR] = sizes[TYSCHAR] = sizes[TYUCHAR] = 1;
sizes[TYSHORT] = sizes[TYUSHORT] = 2;
sizes[TYUINT] = sizes[TYINT] = 4;
diff --git a/x86_64/isel.c b/x86_64/isel.c
index 40d8db4..98d66a1 100644
--- a/x86_64/isel.c
+++ b/x86_64/isel.c
@@ -195,19 +195,6 @@ aimm(struct addr *addr, int disp)
}
static bool
-acon(struct addr *addr, union ref r)
-{
- vlong a = addr->disp;
- assert(isintcon(r));
- a += intconval(r);
- if ((int)a == a) {
- addr->disp = a;
- return 1;
- }
- return 0;
-}
-
-static bool
ascale(struct addr *addr, union ref a, union ref b)
{
if (b.t != RICON) return 0;
@@ -268,7 +255,8 @@ aadd(struct addr *addr, struct block *blk, int *curi, union ref r)
ins->skip = 1;
} else goto Ref;
} else if (isnumcon(r)) {
- return acon(addr, r);
+ assert(isintcon(r));
+ return aimm(addr, intconval(r));
} else if (isaddrcon(r,1)) {
if (!addr->base.bits && !isaddrcon(addr->index,1)) addr->base = r;
else return 0;