From 17b4861e53fd5be2107f3b7fd8bf77f3d2cc15da Mon Sep 17 00:00:00 2001 From: lemon Date: Sun, 28 Dec 2025 19:02:39 +0100 Subject: backend: start implementing aarch64 --- Makefile | 1 + aarch64/aapcs.c | 77 +++++++ aarch64/all.h | 16 ++ aarch64/emit.c | 672 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ aarch64/isel.c | 440 +++++++++++++++++++++++++++++++++++++ c/lex.c | 4 + common.h | 8 +- obj/elf.c | 11 +- obj/obj.h | 1 + targ.c | 23 +- x86_64/isel.c | 16 +- 11 files changed, 1245 insertions(+), 24 deletions(-) create mode 100644 aarch64/aapcs.c create mode 100644 aarch64/all.h create mode 100644 aarch64/emit.c create mode 100644 aarch64/isel.c diff --git a/Makefile b/Makefile index 9818c08..726f121 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,7 @@ SRC=main.c io.c mem.c c/c.c c/lex.c c/eval.c c/builtin.c type.c targ.c \ ir/ir.c ir/builder.c ir/fold.c ir/dump.c ir/ssa.c ir/cfg.c ir/intrin.c ir/abi0.c ir/mem2reg.c ir/regalloc.c ir/simpl.c ir/stack.c \ x86_64/sysv.c x86_64/isel.c x86_64/emit.c \ + aarch64/aapcs.c aarch64/isel.c aarch64/emit.c \ obj/obj.c obj/elf.c \ embedfilesdir.c CFLAGS=-Wall -std=c11 -pedantic diff --git a/aarch64/aapcs.c b/aarch64/aapcs.c new file mode 100644 index 0000000..fc08da1 --- /dev/null +++ b/aarch64/aapcs.c @@ -0,0 +1,77 @@ +#include "all.h" + +static int +abiarg(short r[2], uchar cls[2], uchar *r2off, int *ni, int *nf, int *ns, union irtype typ) +{ + enum { NINT = 8, NFLT = 8 }; + if (!typ.isagg) { + if (kisflt(cls[0] = typ.cls) && *nf < 8) { + r[0] = V(0) + (*nf)++; + } else if (kisint(cls[0]) && *ni < NINT) { + r[0] = R0 + (*ni)++; + } else { + r[0] = *ns; + *ns += 8; + return 0; /* MEMORY */ + } + return 1; + } else assert(!"nyi"); +} + +static int +abiret(short r[2], uchar cls[2], uchar *r2off, int *ni, union irtype typ) +{ + if (!typ.isagg) { + r[0] = kisflt(cls[0] = typ.cls) ? V(0) : R0; + return 1; + } + int nf = 0, ns = 0; + int ret = abiarg(r, cls, r2off, ni, &nf, &ns, typ); + if (ret) return ret; + /* caller-allocated result address in x8 */ + assert(*ni == 0); + r[0] = -1; + r[1] = R(8); + return 0; +} + +static void +vastart(struct function *fn, struct block *blk, int *curi) +{ + assert(!"nyi"); +} + +static void +vaarg(struct function *fn, struct block *blk, int *curi) +{ + assert(!"nyi"); +} + +static const char aarch64_rnames[][6] = { + "R0", "R1", "R2", "R3", "R4", "R5", "R6", "R7", "R8", "R9","R10","R11","R12","R13","R14","R15", + "R16","R17","R18","R19","R20","R21","R22","R23","R24","R25","R26","R27","R28", "FP", "LR", "SP", + "V0", "V1", "V2", "V3", "V4", "V5", "V6", "V7", "V8", "V9","V10","V11","V12","V13","V14","V15", + "V16","V17","V18","V19","V20","V21","V22","V23","V24","V25","V26","V27","V28","V29","V30","V31", +}; + +const struct mctarg t_aarch64_aapcs = { + .gpr0 = R0, .ngpr = 31, + .bpr = FP, + .gprscratch = R(16), .fprscratch = V(31), + .fpr0 = V0, .nfpr = 32, + .rcallee = BIT(R(19)) | BIT(R(20)) | BIT(R(21)) | BIT(R(22)) | BIT(R(23)) + | BIT(R(24)) | BIT(R(25)) | BIT(R(26)) | BIT(R(27)) | BIT(R(28)) + | BIT( V(8)) | BIT( V(9)) | BIT(V(10)) | BIT(V(11)) | BIT(V(12)) + | BIT(V(13)) | BIT(V(14)) | BIT(V(15)), + .rglob = BIT(FP) | BIT(LR) | BIT(SP), + .rnames = aarch64_rnames, + .objkind = OBJELF, + .abiret = abiret, + .abiarg = abiarg, + .vastart = vastart, + .vaarg = vaarg, + .isel = aarch64_isel, + .emit = aarch64_emit, +}; + +/* vim:set ts=3 sw=3 expandtab: */ diff --git a/aarch64/all.h b/aarch64/all.h new file mode 100644 index 0000000..828909e --- /dev/null +++ b/aarch64/all.h @@ -0,0 +1,16 @@ +#include "../ir/ir.h" + +enum reg { + R0 = 0, +#define R(n) (R0+n) + FP = R(29), LR = R(30), SP = R(31), + V0, +#define V(n) (V0+n) +}; + +bool aarch64_logimm(uint *enc, enum irclass, uvlong x); +void aarch64_isel(struct function *); +void aarch64_emit(struct function *); + +/* vim:set ts=3 sw=3 expandtab: */ + diff --git a/aarch64/emit.c b/aarch64/emit.c new file mode 100644 index 0000000..a0a7ca6 --- /dev/null +++ b/aarch64/emit.c @@ -0,0 +1,672 @@ +#include "all.h" +#include "../obj/obj.h" +#include "../endian.h" + +/* References: https://weinholt.se/articles/arm-a64-instruction-set/ + * ARM ARM https://developer.arm.com/documentation/ddi0628/aa/?lang=en + */ + +enum operkind { ONONE, OREGZR, OREG, OIMM, OMEM, OSYM }; +enum shiftkind { SLSL, SLSR, SASR, SROR }; +enum addrmode { AIMMIDX, AREGIDX, APREIDX, APOSTIDX }; +enum addrregext { XUXTW = 2, XLSL = 3, XSXTW = 6, XSXTX = 7 }; +struct oper { + uchar t; + union { + struct { /* OREG (opt. shifted) */ + uchar reg; + uchar shft : 2, /* enum shiftkind */ + shamt : 6; + }; + struct { /* OMEM */ + uchar mode : 3; /* enum addrmode */ + uchar base : 5; /* reg */ + union { + struct { + uchar index : 5; /* reg */ + uchar ext : 3; /* enum addrregext */ + uchar shamt; + }; + short disp; + }; + } m; + vlong imm; uvlong uimm; /* OIMM */ + struct { /* OSYM */ + ushort con; + int cdisp; + }; + }; +}; + +#define REGZR ((struct oper){OREGZR, .reg=31}) +#define mkoper(t, ...) ((struct oper){(t), __VA_ARGS__}) +#define reg2oper(r) (assert((uint)(r) <= V(31)), mkoper(OREG, .reg = (r))) + +static struct oper +mkmemoper(uint msiz, union ref r) +{ + if (r.t == RTMP) { + assert(in_range(instrtab[r.i].reg-1, R0, SP)); + return mkoper(OMEM, .m = {AIMMIDX, .base = instrtab[r.i].reg-1}); + } else if (r.t == RREG) { + return mkoper(OMEM, .m = {AIMMIDX, .base = r.i}); + } else if (r.t == RADDR) { + const struct addr *addr = &addrht[r.i]; + assert(addr->shift <= 3 && (!addr->disp || !addr->index.bits)); + if (isaddrcon(addr->base,0)) { + assert(!addr->index.bits); + return mkoper(OSYM, .con = addr->base.i, .cdisp = addr->disp); + } + assert(addr->base.t == RREG); + if (!addr->index.bits) { + return mkoper(OMEM, .m = {.mode = AIMMIDX, .base = addr->base.i, .disp = addr->disp}); + } else { + assert(addr->index.t == RREG); + assert(addr->shift == 0 || 8<shift == msiz); + return mkoper(OMEM, .m = { + .mode = AREGIDX, + .base = addr->base.i, + .index = addr->index.i, + .ext = XLSL, + .shamt = !!addr->shift, + }); + } + } + assert(!"nyi"); +} + +static struct oper +ref2oper(union ref r) +{ + switch (r.t) { + case RTMP: return instrtab[r.i].reg ? mkoper(ONONE,) : reg2oper(instrtab[r.i].reg-1); + case RREG: return reg2oper(r.i); + case RICON: return mkoper(OIMM, .imm = r.i); + case RXCON: + if (conht[r.i].cls == KI32) + return mkoper(OIMM, .imm = conht[r.i].i); + else if (conht[r.i].cls == KI64) { + vlong i = conht[r.i].i; + return mkoper(OIMM, .imm = i); + } else if (!conht[r.i].cls) { + return mkoper(OSYM, .con = r.i); + } + assert(0); + //case RADDR: return mkmemoper(r); + default: assert(0); + } +} + +enum operpat { + PNONE, + PGPRZ, /* R0-R30,ZR */ + PGPRSP, /* R0-R30,SP */ + PSP, /* SP */ + PGPRZSHFT, /* R0-30,ZR SFHT #n */ + PFPR, /* V0 - V31 */ + PZERO, /* zero immediate */ + PU6, /* 6-bit uimm */ + PU12SL12, /* 12 bit uimm, optionally left shifted by 12 */ + PU16SL16, /* 16 bit uimm, left shift by 0/16/32/48 */ + PLOGIMM, /* immediate for logical instrs */ + PMEMAIMM, /* addr 12bit immediate byte offset */ + PMEMAIMMH, /* addr 12bit immediate halfword offset (multiple of 2) */ + PMEMAIMMW, /* addr 12bit immediate word offset (multiple of 4) */ + PMEMAIMMX, /* addr 12bit immediate doubleword offset (multiple of 8) */ + PMEMPREPOST, /* addr signed 9bit immediate byte offset */ + PMEMAREG, /* addr reg offset, optionally left shifted */ + PMEMAXREG, /* addr extended reg offset */ + PSYM, /* symbol */ +}; +enum operenc { + EN_ADDSUBEXT3R, /* add/sub-ext-reg */ + EN_ADDSUBSHFT3R, /* add/sub-shift-reg */ + EN_LOGSHFT3R, /* logical/shifted-reg */ + EN_ARITH2R, /* data-processing/1src */ + EN_ARITH3R, /* data-processing/2src */ + EN_ARITH4R, /* data-processing/3src */ + EN_ADDSUBIMM, /* add/subtract-imm */ + EN_LOGIMM, /* logical-imm */ + EN_MOVEIMM, /* move/wide-imm */ + EN_MEMAIMM, /* load/store/unsigned-imm */ + EN_MEMAIMMH, /* load/store/unsigned-imm (halfword) */ + EN_MEMAIMMW, /* load/store/unsigned-imm (word) */ + EN_MEMAIMMX, /* load/store/unsigned-imm (doubleword) */ + EN_MEMAPREPOST, /* load/store/pre/postidx-imm */ + EN_MEMAREG, /* load/store/reg-offset */ + EN_MEMPPREPOST, /* load/store-pair/pre/postidx-imm */ +}; +struct desc { + uchar psiz; /* subset of {4,8} */ + uchar pt[3]; /* bitsets of enum operpat, up to 3 operands */ + uint opc; + uchar operenc; /* enum operenc */ +}; + +/* match operand against pattern */ +static inline bool +opermatch(enum operpat pat, enum irclass k, struct oper o) +{ + switch (pat) { + case PNONE: return !o.t; + case PGPRZ: + return o.t == OREGZR || (o.t == OREG && in_range(o.reg, R0, R(30)) && !o.shamt); + case PGPRSP: + return o.t == OREG && in_range(o.reg, R0, R(31)) && !o.shamt; + case PGPRZSHFT: + return o.t == OREGZR || (o.t == OREG && in_range(o.reg, R0, R(30))); + case PSP: return o.t == OREG && o.reg == SP; + case PFPR: return o.t == OREG && in_range(o.reg, V0, V(31)); + case PZERO: return o.t == OIMM && o.imm == 0; + case PU6: return o.t == OIMM && (uint)o.imm < 63; + case PU12SL12: + return o.t == OIMM && ((o.imm &~ 0xFFF) == 0 || (o.imm &~ 0xFFF000) == 0); + case PU16SL16: + return o.t == OIMM + && ((o.imm &~ 0xFFFF) == 0 || (o.imm &~ 0xFFFF0000) == 0 + || (o.imm &~ (0xFFFFull<<32)) == 0 || (o.imm &~ (0xFFFFull<<48)) == 0); + case PLOGIMM: return o.t == OIMM && aarch64_logimm(NULL, k, o.imm); + case PMEMAIMM: + return o.t == OMEM && o.m.mode == AIMMIDX && (uint)o.m.disp < (1<<12); + case PMEMAIMMH: + return o.t == OMEM && o.m.mode == AIMMIDX && (uint)o.m.disp < (1<<13) && !(o.m.disp % 2); + case PMEMAIMMW: + return o.t == OMEM && o.m.mode == AIMMIDX && (uint)o.m.disp < (1<<14) && !(o.m.disp % 4); + case PMEMAIMMX: + return o.t == OMEM && o.m.mode == AIMMIDX && (uint)o.m.disp < (1<<15) && !(o.m.disp % 8); + case PMEMAREG: + return o.t == OMEM && o.m.mode == AREGIDX; + case PMEMPREPOST: + return o.t == OMEM && (o.m.mode == APREIDX || o.m.mode == APOSTIDX + || (o.m.mode == AIMMIDX && o.m.disp >= -256 && o.m.disp < 256)); + } + assert(0); +} + +/* code output helpers */ +#define W32(w) (wr32targ(*pcode, (w)), *pcode += 4) + +static uchar *fnstart; +static internstr curfnsym; +static bool usebp; +static int rbpoff; + +/* Given an instruction description table, find the first entry that matches + * the operands (where dst, src are the operands in intel syntax order) and encode it */ +static void +encode(uchar **pcode, const struct desc *tab, int ntab, enum irclass k, struct oper o[3]) +{ + const struct desc *en = NULL; + for (int i = 0; i < ntab; ++i) { + if (!(tab[i].psiz & cls2siz[k])) continue; + for (int j = 0; j < 3; ++j) + if (!opermatch(tab[i].pt[j], k, o[j])) + goto Skip; + en = &tab[i]; + break; + Skip:; + } + assert(en && "no match for instr"); + + uint sf = cls2siz[k] >> 3; + uint ins = en->opc, sh, nimmrs; + switch (en->operenc) { + default: assert(!"nyi enc"); + case EN_ADDSUBSHFT3R: + case EN_LOGSHFT3R: + ins |= sf<<31 | o[2].shft<<22 | o[2].reg<<16 | o[2].shamt<<10 | o[1].reg<<5 | o[0].reg; + break; + case EN_ADDSUBIMM: + sh = o[2].imm > 0xFFF; + ins |= sf<<31 | sh<<22 | (o[2].uimm >> 12*sh)<<10 | o[1].reg<<5 | o[0].reg; + break; + case EN_LOGIMM: + assert(aarch64_logimm(&nimmrs, k, o[2].uimm)); + ins |= sf<<31 | nimmrs<<10 | o[1].reg<<5 | o[0].reg; + break; + case EN_MOVEIMM: + sh = o[1].imm ? lowestsetbit(o[1].imm) / 16 : 0; + ins |= sf<<31 | sh<<21 | (o[1].uimm >> 16*sh)<<5 | o[0].reg; + break; + case EN_MEMAIMM: AImm: + ins |= o[1].m.disp<<10 | o[1].m.base<<5 | o[0].reg; + break; + case EN_MEMAIMMH: o[1].m.disp >>= 1; goto AImm; + case EN_MEMAIMMW: o[1].m.disp >>= 2; goto AImm; + case EN_MEMAIMMX: o[1].m.disp >>= 3; goto AImm; + case EN_MEMAPREPOST: + ins |= (o[1].m.disp&0x1FF)<<12 | o[1].m.base<<5 | o[0].reg; + if (o[1].m.mode == APREIDX) ins |= 3<<10; + else if (o[1].m.mode == APOSTIDX) ins |= 1<<10; + break; + case EN_MEMAREG: + assert(o[1].m.shamt <= 1); + ins |= o[1].m.index<<16 | o[1].m.ext<<13 | o[1].m.shamt<<12 | o[1].m.base<<5 | o[0].reg; + break; + case EN_MEMPPREPOST: + assert(o[2].m.disp % 8 == 0); + ins |= (o[2].m.disp/8&0x7F)<<15 | o[1].reg<<10 | o[2].m.base<<5 | o[0].reg; + if (o[2].m.mode == APREIDX) ins |= 3<<23; + else if (o[2].m.mode == APOSTIDX) ins |= 1<<23; + else ins |= 2<<23; + break; + } + W32(ins); +} +#define DEFINSTR1(X, ...) \ + static void \ + X(uchar **pcode, enum irclass k, struct oper a) \ + { \ + static const struct desc tab[] = { __VA_ARGS__ }; \ + encode(pcode, tab, countof(tab), k, ((struct oper [3]){a})); \ + } + +#define DEFINSTR2(X, ...) \ + static void \ + X(uchar **pcode, enum irclass k, struct oper op1, struct oper op2) \ + { \ + static const struct desc tab[] = { __VA_ARGS__ }; \ + encode(pcode, tab, countof(tab), k, ((struct oper [3]){op1,op2})); \ + } +#define DEFINSTR3(X, ...) \ + static void \ + X(uchar **pcode, enum irclass k, struct oper op1, struct oper op2, struct oper op3) \ + { \ + static const struct desc tab[] = { __VA_ARGS__ }; \ + encode(pcode, tab, countof(tab), k, ((struct oper [3]){op1,op2,op3})); \ + } + +DEFINSTR3(Xadd, + {4|8, {PGPRSP, PGPRSP, PU12SL12}, 0x11000000, EN_ADDSUBIMM}, /* ADD (immediate) */ + {4|8, {PGPRZ, PGPRZ, PGPRZSHFT}, 0x0B000000, EN_ADDSUBSHFT3R}, /* ADD (shifted register) */ +) +DEFINSTR3(Xsub, + {4|8, {PGPRSP, PGPRSP, PU12SL12}, 0x51000000, EN_ADDSUBIMM}, /* SUB (immediate) */ + {4|8, {PGPRZ, PGPRZ, PGPRZSHFT}, 0x4B000000, EN_ADDSUBSHFT3R}, /* SUB (shifted register) */ +) + +DEFINSTR3(Xand, + {4|8, {PGPRSP, PGPRZ, PLOGIMM}, 0x12000000, EN_LOGIMM}, /* AND (immediate) */ + {4|8, {PGPRZ, PGPRZ, PGPRZSHFT}, 0x0A000000, EN_LOGSHFT3R}, /* AND (shifted register) */ +) +DEFINSTR3(Xorr, + {4|8, {PGPRSP, PGPRZ, PLOGIMM}, 0x32000000, EN_LOGIMM}, /* ORR (immediate) */ + {4|8, {PGPRZ, PGPRZ, PGPRZSHFT}, 0x2A000000, EN_LOGSHFT3R}, /* ORR (shifted register) */ +) +DEFINSTR3(Xeor, + {4|8, {PGPRSP, PGPRZ, PLOGIMM}, 0x52000000, EN_LOGIMM}, /* EOR (immediate) */ + {4|8, {PGPRZ, PGPRZ, PGPRZSHFT}, 0x4A000000, EN_LOGSHFT3R}, /* EOR (shifted register) */ +) + +static void +Xubfm(uchar **pcode, enum irclass k, struct oper rd, struct oper rn, uint immr, uint imms) +{ + uint x = k != KI32; + uint nbit = x ? 64 : 32; + assert(opermatch(PGPRZ, k, rd) && opermatch(PGPRZ, k, rn) && immr < nbit && imms < nbit); + W32(x<<31 | 0x53000000 | x<<22 | immr<<16 | imms<<10 | rn.reg<<5 | rd.reg); +} +static void +Xsbfm(uchar **pcode, enum irclass k, struct oper rd, struct oper rn, uint immr, uint imms) +{ + uint x = k != KI32; + uint nbit = x ? 64 : 32; + assert(opermatch(PGPRZ, k, rd) && opermatch(PGPRZ, k, rn) && immr < nbit && imms < nbit); + W32(x<<31 | 0x13000000 | x<<22 | immr<<16 | imms<<10 | rn.reg<<5 | rd.reg); +} + +DEFINSTR2(Xmovz, {4|8, {PGPRZ, PU16SL16}, 0x52800000, EN_MOVEIMM}, /* MOVZ */) +DEFINSTR2(Xmovn, {4|8, {PGPRZ, PU16SL16}, 0x12800000, EN_MOVEIMM}, /* MOVN */) +DEFINSTR2(Xmovk, {4|8, {PGPRZ, PU16SL16}, 0x72800000, EN_MOVEIMM}, /* MOVK */) +DEFINSTR2(Xldr, + {4, {PGPRZ, PMEMAIMMW}, 0xB9400000, EN_MEMAIMMW}, /* LDR (immediate) */ + {8, {PGPRZ, PMEMAIMMX}, 0xF9400000, EN_MEMAIMMX}, + {4, {PGPRZ, PMEMAREG}, 0xB8600800, EN_MEMAREG}, /* LDR (register) */ + {8, {PGPRZ, PMEMAREG}, 0xF8600800, EN_MEMAREG}, + {4, {PGPRZ, PMEMPREPOST}, 0xB8400000, EN_MEMAPREPOST}, /* LDR (immediate, (pre/postinc)) */ + {8, {PGPRZ, PMEMPREPOST}, 0xF8400000, EN_MEMAPREPOST}, +) +DEFINSTR2(Xldrsw, + {8, {PGPRZ, PMEMAIMMW}, 0xB9800000, EN_MEMAIMMW}, /* LDRSW (immediate) */ +// {8, {PGPRZ, PMEMAREG}, 0xB8A00800, EN_MEMAREG}, /* LDRSW (register) */ + {8, {PGPRZ, PMEMPREPOST}, 0xB8800000, EN_MEMAPREPOST}, /* LDRSW (immediate, (pre/postinc)) */ +) +DEFINSTR2(Xldrh, + {4|8, {PGPRZ, PMEMAIMMH}, 0x79400000, EN_MEMAIMMH}, /* LDRH (immediate) */ + {4|8, {PGPRZ, PMEMAREG}, 0x78600800, EN_MEMAREG}, /* LDRH (register) */ + {4|8, {PGPRZ, PMEMPREPOST}, 0x78400000, EN_MEMAPREPOST}, /* LDRH (immediate, (pre/postinc)) */ +) +DEFINSTR2(Xldrsh, + {4, {PGPRZ, PMEMAIMMH}, 0x79C00000, EN_MEMAIMMH}, /* LDRSH (immediate) */ + {8, {PGPRZ, PMEMAIMMH}, 0x79800000, EN_MEMAIMMH}, + {4, {PGPRZ, PMEMAREG}, 0x78E00800, EN_MEMAREG}, /* LDRSH (register) */ + {8, {PGPRZ, PMEMAREG}, 0x78A00800, EN_MEMAREG}, + {4, {PGPRZ, PMEMPREPOST}, 0x78C00000, EN_MEMAPREPOST}, /* LDRSH (immediate, (pre/postinc)) */ + {8, {PGPRZ, PMEMPREPOST}, 0x78800000, EN_MEMAPREPOST}, +) +DEFINSTR2(Xldrb, + {4|8, {PGPRZ, PMEMAIMM}, 0x39400000, EN_MEMAIMM}, /* LDRB (immediate) */ + {4|8, {PGPRZ, PMEMAREG}, 0x38600800, EN_MEMAREG}, /* LDRB (register) */ + {4|8, {PGPRZ, PMEMPREPOST}, 0x38400000, EN_MEMAPREPOST}, /* LDRB (immediate, (pre/postinc)) */ +) +DEFINSTR2(Xldrsb, + {4, {PGPRZ, PMEMAIMM}, 0x39C00000, EN_MEMAIMM}, /* LDRSB (immediate) */ + {8, {PGPRZ, PMEMAIMM}, 0x39800000, EN_MEMAIMM}, + {4, {PGPRZ, PMEMAREG}, 0x38E00800, EN_MEMAREG}, /* LDRSB (register) */ + {8, {PGPRZ, PMEMAREG}, 0x38A00800, EN_MEMAREG}, + {4, {PGPRZ, PMEMPREPOST}, 0x38C00000, EN_MEMAPREPOST}, /* LDRSB (immediate, (pre/postinc)) */ + {8, {PGPRZ, PMEMPREPOST}, 0x38800000, EN_MEMAPREPOST}, +) +DEFINSTR2(Xstr, + {4, {PGPRZ, PMEMAIMMW}, 0xB9000000, EN_MEMAIMMW}, /* STR (immediate) */ + {8, {PGPRZ, PMEMAIMMX}, 0xF9000000, EN_MEMAIMMX}, + {4, {PGPRZ, PMEMAREG}, 0xB8200800, EN_MEMAREG}, /* STR (register) */ + {8, {PGPRZ, PMEMAREG}, 0xF8200800, EN_MEMAREG}, + {4, {PGPRZ, PMEMPREPOST}, 0xB8000000, EN_MEMAPREPOST}, /* STR (immediate, (pre/postinc)) */ + {8, {PGPRZ, PMEMPREPOST}, 0xF8000000, EN_MEMAPREPOST}, +) +DEFINSTR2(Xstrh, + {4|8, {PGPRZ, PMEMAIMMH}, 0x79000000, EN_MEMAIMMH}, /* STRH (immediate) */ + {4|8, {PGPRZ, PMEMAREG}, 0x78200800, EN_MEMAREG}, /* STRH (register) */ + {4|8, {PGPRZ, PMEMPREPOST}, 0x78000000, EN_MEMAPREPOST}, /* STRH (immediate, (pre/postinc)) */ +) +DEFINSTR2(Xstrb, + {4|8, {PGPRZ, PMEMAIMM}, 0x39000000, EN_MEMAIMM}, /* STRB (immediate) */ + {4|8, {PGPRZ, PMEMAREG}, 0x38200800, EN_MEMAREG}, /* STRB (register) */ + {4|8, {PGPRZ, PMEMPREPOST}, 0x38000000, EN_MEMAPREPOST}, /* STRB (immediate, (pre/postinc)) */ +) +DEFINSTR3(Xldp, + {8, {PGPRZ, PGPRZ, PMEMPREPOST}, 0xA8400000, EN_MEMPPREPOST} /* LDP (immediate, (pre/postinc)) */ +) +DEFINSTR3(Xstp, + {8, {PGPRZ, PGPRZ, PMEMPREPOST}, 0xA8000000, EN_MEMPPREPOST} /* STP (immediate, (pre/postinc)) */ +) +static void +Xcall(uchar **pcode, struct oper f) +{ + if (f.t == OSYM) { + objreloc(xcon2sym(f.con), REL_CALL26, Stext, *pcode - objout.textbegin, 0); + W32(0x94000000); /* BL */ + } else { + assert(opermatch(PGPRZ, KPTR, f)); + } +} + +static void +gencopy(uchar **pcode, enum irclass cls, struct block *blk, int curi, struct oper dst, union ref val) +{ + if (kisint(cls) && dst.t == OREG && isintcon(val)) { + /* MOV r, #imm */ + uvlong u = intconval(val); + if (~u <= 0xFFFF) { + /* immediate can be encoded with 1 MOVN instruction */ + Xmovn(pcode, cls, dst, mkoper(OIMM, .imm = ~u)); + } else if (u > 0xFFFF && aarch64_logimm(NULL, cls, u)) { + /* can be encoded as a logical immediate */ + Xorr(pcode, cls, dst, REGZR, mkoper(OIMM, .uimm = u)); + } else { + /* generate MOV (+ MOVKs) */ + if (cls == KI32) u = (uint)u; + int s = 0; + while (s < 48 && (u >> s & 0xFFFF) == 0) s += 16; + Xmovz(pcode, cls, dst, mkoper(OIMM, .imm = u & (0xFFFFull << s))); + for (s += 16; s <= 48; s += 16) { + if ((u >> s) & 0xFFFF) + Xmovk(pcode, cls, dst, mkoper(OIMM, .imm = u & (0xFFFFull << s))); + } + } + } else if (dst.t == OREG && (val.t == RREG || val.t == RTMP)) { + Xorr(pcode, cls, dst, REGZR, ref2oper(val)); /* MOV Rd, Rn ==> ORR Rd, zr, Rn */ + } else assert(0); +} + +/* maps blk -> address when resolved; or to linked list of jump displacement + * relocations */ +static struct blkaddr { + bool resolved; + union { + uint addr; + uint relreloc; + }; +} *blkaddr; + +static void +emitinstr(uchar **pcode, struct function *fn, struct block *blk, int curi, struct instr *ins) +{ + struct oper dst, o1, o2; + enum irclass cls = ins->cls; + void (*X3)(uchar **, enum irclass, struct oper, struct oper, struct oper) = NULL; + void (*X2)(uchar **, enum irclass, struct oper, struct oper) = NULL; + + switch (ins->op) { + default: assert(!"nyi"); + case Onop: break; + case Omove: + dst = ref2oper(ins->l); + gencopy(pcode, cls, blk, curi, dst, ins->r); + break; + case Oextu32: cls = KI32; + /* fallthru */ + case Ocopy: + dst = reg2oper(ins->reg-1); + gencopy(pcode, cls, blk, curi, dst, ins->l); + break; + case Oneg: /* NEG Rd, Rn ==> SUB Rd, zr, Rn */ + Xsub(pcode, cls, reg2oper(ins->reg-1), REGZR, ref2oper(ins->l)); + break; + case Oexts8: case Oexts16: case Oexts32: /* SXTB/H/W Rd, Rn ==> SBFM Rd, Rn, #0, #7/15/31 */ + Xsbfm(pcode, cls, reg2oper(ins->reg-1), ref2oper(ins->l), 0, (8<<(ins->op-Oexts8)/2)-1); + break; + case Oextu8: case Oextu16: /* UXTB/H Rd, Rn ==> UBFM Rd, Rn, #0, #7/15 */ + Xubfm(pcode, cls, reg2oper(ins->reg-1), ref2oper(ins->l), 0, (8<<(ins->op-Oexts8)/2)-1); + break; + case Oadd: dst = reg2oper(ins->reg-1); X3 = Xadd; goto ALU3; + case Osub: dst = reg2oper(ins->reg-1); X3 = Xsub; goto ALU3; + case Oand: dst = reg2oper(ins->reg-1); X3 = Xand; goto ALU3; + case Oior: dst = reg2oper(ins->reg-1); X3 = Xorr; goto ALU3; + case Oxor: dst = reg2oper(ins->reg-1); X3 = Xeor; goto ALU3; + ALU3: + X3(pcode, cls, dst, ref2oper(ins->l), ref2oper(ins->r)); + break; + case Oshl: + if (ins->r.t == RICON) { + uint nbit = cls == KI32 ? 32 : 64, s = ins->r.i & nbit-1; + assert(s > 0); + Xubfm(pcode, cls, reg2oper(ins->reg-1), ref2oper(ins->l), nbit-s, nbit-s-1); + } else assert(!"nyi lslv"); + break; + case Oslr: + if (ins->r.t == RICON) { + uint nbit = cls == KI32 ? 32 : 64, s = ins->r.i & nbit-1; + assert(s > 0); + Xubfm(pcode, cls, reg2oper(ins->reg-1), ref2oper(ins->l), s, nbit-1); + } else assert(!"nyi lsrv"); + break; + case Osar: + if (ins->r.t == RICON) { + uint nbit = cls == KI32 ? 32 : 64, s = ins->r.i & nbit-1; + assert(s > 0); + Xsbfm(pcode, cls, reg2oper(ins->reg-1), ref2oper(ins->l), s, nbit-1); + } else assert(!"nyi lsrv"); + break; + case Oloadu8: X2 = Xldrb; goto Load; + case Oloads8: X2 = Xldrsb; goto Load; + case Oloadu16: X2 = Xldrh; goto Load; + case Oloads16: X2 = Xldrsh; goto Load; + case Oloads32: + if (cls != KI32) { + X2 = Xldrsw; + goto Load; + } + /* fallthru */ + case Oloadu32: + cls = KI32; + /* fallthru */ + case Oloadi64: X2 = Xldr; + Load: + X2(pcode, cls, reg2oper(ins->reg-1), mkmemoper(8<<(ins->op - Oloads8)/2, ins->l)); + break; + case Ostore8: cls = KI32; X2 = Xstrb; goto Store; + case Ostore16: cls = KI32; X2 = Xstrh; goto Store; + case Ostore32: cls = KI32; X2 = Xstr; goto Store; + case Ostore64: cls = KI64; X2 = Xstr; + Store: + X2(pcode, cls, ref2oper(ins->r), mkmemoper(8<<(ins->op-Ostore8), ins->l)); + break; + case Ocall: + Xcall(pcode, ref2oper(ins->l)); + break; + } +} + +static bool +calleesave(int *npush, uchar **pcode, struct function *fn) +{ + regset usage = (fn->regusage & mctarg->rcallee) | (usebp * BIT(FP)) | (!fn->isleaf * BIT(LR)); + if (!usage) return 0; + int prev = 0; + for (uint reg = R(19); reg <= LR; ++reg) { + if (!rstest(usage, reg)) continue; + if (prev) { + *npush += 2; + Xstp(pcode, KPTR, reg2oper(prev), reg2oper(reg), + mkoper(OMEM, .m = {.mode = APREIDX, .base = SP, .disp = -16})); + prev = 0; + } else prev = reg; + } + if (prev) { + Xstp(pcode, KPTR, reg2oper(prev), REGZR, + mkoper(OMEM, .m = {.mode = APREIDX, .base = SP, .disp = -16})); + *npush += 2; + } + return 1; +} + +static void +calleerestore(uchar **pcode, struct function *fn) +{ + regset usage = (fn->regusage & mctarg->rcallee) | (usebp * BIT(FP)) | (!fn->isleaf * BIT(LR)); + if (!usage) return; + int prev = 0; + for (uint reg = LR; reg >= R(19); --reg) { + if (!rstest(usage, reg)) continue; + if (prev) { + Xldp(pcode, KPTR, reg2oper(reg), reg2oper(prev), + mkoper(OMEM, .m = {.mode = APOSTIDX, .base = SP, .disp = 16})); + prev = 0; + } else prev = reg; + } + if (prev) { + Xldp(pcode, KPTR, REGZR, reg2oper(prev), + mkoper(OMEM, .m = {.mode = APOSTIDX, .base = SP, .disp = 16})); + } +} + +static void +emitbin(struct function *fn) +{ + struct block *blk; + uchar **pcode = &objout.code; + int npush = 0; + bool saverestore; + + fnstart = *pcode; + curfnsym = fn->name; + + /** prologue **/ + + /* only use frame pointer in non-leaf functions and functions that use the stack */ + usebp = 0; + if (!fn->isleaf || fn->stksiz) { + usebp = 1; + } + saverestore = calleesave(&npush, pcode, fn); + + /* ensure stack is 16-byte aligned for function calls */ + if (!fn->isleaf && ((fn->stksiz + npush*8) & 0xF) != 0) { + assert(usebp); + if ((rbpoff & 0xF) == 0) { + rbpoff -= 16; + fn->stksiz += 24; + } else { + rbpoff -= 8; + fn->stksiz += 8; + } + } + + if (fn->stksiz != 0) { + } + + if (*pcode - fnstart > 6) { + /* largue prologue -> largue epilogue -> transform to use single exit point */ + struct block *exit = NULL; + blk = fn->entry->lprev; + do { + if (blk->jmp.t == Jret) { + if (!exit) { + if (blk->ins.n == 0) { + exit = blk; + continue; + } else { + exit = newblk(fn); + exit->lnext = blk->lnext; + exit->lprev = blk; + blk->lnext = exit; + exit->lnext->lprev = exit; + exit->id = fn->nblk++; + exit->jmp.t = Jret; + } + } + blk->jmp.t = Jb; + memset(blk->jmp.arg, 0, sizeof blk->jmp.arg); + blk->s1 = exit; + } else if (exit) { + /* thread jumps to the exit block */ + if (blk->s1 && !blk->s1->ins.n && blk->s1->s1 == exit && !blk->s1->s2) blk->s1 = exit; + if (blk->s2 && !blk->s2->ins.n && blk->s2->s1 == exit && !blk->s2->s2) blk->s2 = exit; + } + } while ((blk = blk->lprev) != fn->entry); + } + + blkaddr = allocz(fn->passarena, fn->nblk * sizeof *blkaddr, 0); + + blk = fn->entry; + do { + struct blkaddr *bb = &blkaddr[blk->id]; + uint bbaddr = *pcode - objout.textbegin; + assert(!bb->resolved); + while (bb->relreloc) { + uint next; + int disp = bbaddr - bb->relreloc - 4; + + //memcpy(&next, objout.textbegin + bb->relreloc, 4); + //wr32le(objout.textbegin + bb->relreloc, disp); + bb->relreloc = next; + } + bb->resolved = 1; + bb->addr = bbaddr; + + for (int i = 0; i < blk->ins.n; ++i) { + emitinstr(pcode, fn, blk, i, &instrtab[blk->ins.p[i]]); + } + if (blk->jmp.t == Jret) { + /* epilogue */ + if (saverestore) + calleerestore(pcode, fn); + W32(0xD65F03C0); /* RET */ + } else if (blk->jmp.t == Jtrap) { + W32(0xD4200020); /* BRK #0x1 */ + } else ;//emitbranch(pcode, blk); + } while ((blk = blk->lnext) != fn->entry); + objdeffunc(fn->name, fn->globl, fnstart - objout.textbegin, *pcode - fnstart); +} + +void +aarch64_emit(struct function *fn) +{ + fn->stksiz = alignup(fn->stksiz, 8); + if (fn->stksiz > 1<<24) error(NULL, "'%s' stack frame too big", fn->name); + emitbin(fn); +} + +/* vim:set ts=3 sw=3 expandtab: */ diff --git a/aarch64/isel.c b/aarch64/isel.c new file mode 100644 index 0000000..a61fa21 --- /dev/null +++ b/aarch64/isel.c @@ -0,0 +1,440 @@ +#include "all.h" + +/* map alloca tmp -> stack frame displacement (0 if not alloca) */ +static ushort *stkslots; +static uint nstkslots; + +#define isstkslot(r) ((r).t == RTMP && (r).i < nstkslots && stkslots[(r).i]) +#define isimm32(r) (iscon(r) && concls(r) == KI32) + +static void +picfixsym(union ref *r, struct block *blk, int *curi) +{ + if (!ccopt.pic || !isaddrcon(*r,0)) return; + *r = insertinstr(blk, (*curi)++, mkinstr(Ocopy, KPTR, .l = *r)); +} + +static inline uint +clz(uvlong x) +{ +#if HAS_BUILTIN(clzll) + return __builtin_clzll(x); +#else + int i = 0; + for (uvlong mask = BIT(63);; ++i, mask >>= 1) + if (x & mask) + break; + return i; +#endif +} + +/* Encode logical immediate */ +bool +aarch64_logimm(uint *enc, enum irclass k, uvlong x) +{ + /* https://github.com/v8/v8/blob/927ccc6076e25a614787c7011315468e40fe39a4/src/codegen/arm64/assembler-arm64.cc#L4409 */ + if (k == KI32) x = (uint)x | x << 32; + bool neg; + if ((neg = x & 1)) x = ~x; + if (x == 0) return 0; + uvlong a = x & (~x + 1), + xa = x + a, + b = xa & (~xa + 1), + xa_b = xa - b, + c = xa_b & (~xa_b + 1), + mask; + uint clza = clz(a), + d, outn; + if (c != 0) { + d = clza - clz(c); + mask = BIT(d) - 1; + outn = 0; + } else { + assert(a != 0); + d = 64; + mask = ~0ull; + outn = 1; + } + if (!ispo2(d)) return 0; + if (((b - a) & ~mask) != 0) return 0; + static const uvlong M[] = { + 0x0000000000000001, 0x0000000100000001, 0x0001000100010001, + 0x0101010101010101, 0x1111111111111111, 0x5555555555555555, + }; + int i = clz(d) - 57; + assert((uint)i < countof(M)); + uvlong m = M[i]; + uvlong y = (b - a) * m; + if (y != x) return 0; + if (enc) { + int clzb = b == 0 ? -1 : clz(b), + s = clza - clzb, + r; + if (neg) { + s = d - s; + r = (clzb + 1) & (d - 1); + } else { + r = (clza + 1) & (d - 1); + } + *enc = outn<<12 | r<<6 | (((-d * 2) | (s - 1)) & 0x3F); + } + return 1; + +} + +static void +fixarg(union ref *r, struct instr *ins, struct block *blk, int *curi) +{ + enum op op = ins ? ins->op : 0; + if (isintcon(ins->r)) { + vlong x = intconval(ins->r); + switch (op) { + default: + if (oiscmp(op)) { + case Oadd: case Osub: + /* imm12 (lsl 12) */ + if ((x &~ 0xFFF) == 0 || (x &~ 0xFFF000) == 0) return; + break; + case Oshl: case Osar: case Oslr: + if ((uvlong)x < (ins->cls == KI32 ? 32 : 64)) return; + break; + case Oand: case Oior: case Oxor: + if (aarch64_logimm(NULL, ins->cls, x)) return; + break; + } + } + goto Copy; + } else if (isstkslot(*r)) { + struct instr adr = mkinstr(Oadd, KPTR, mkref(RREG, FP), mkintcon(KI32, -stkslots[r->i])); + if (ins && ins->op == Ocopy) + *ins = adr; + else + *r = insertinstr(blk, (*curi)++, adr); + } else if (r->t != RTMP) Copy: { + *r = insertinstr(blk, (*curi)++, mkinstr(Ocopy, r->t == RTMP ? instrtab[r->i].cls : ins->cls ? ins->cls : KI32, *r)); + } +} + +static bool +arithfold(struct instr *ins) +{ + if (isnumcon(ins->l) && (!ins->r.t || isnumcon(ins->r))) { + union ref r; + bool ok = ins->r.t ? foldbinop(&r, ins->op, ins->cls, ins->l, ins->r) : foldunop(&r, ins->op, ins->cls, ins->l); + assert(ok && "fold?"); + *ins = mkinstr(Ocopy, insrescls(*ins), r); + return 1; + } + return 0; +} + +static void +selcall(struct function *fn, struct instr *ins, struct block *blk, int *curi) +{ + const struct call *call = &calltab.p[ins->r.i]; + int iarg = *curi - 1; + enum irclass cls; + uint argstksiz = alignup(call->argstksiz, 16); + + for (int i = call->narg - 1; i >= 0; --i) { + struct abiarg abi = call->abiarg[i]; + struct instr *arg; + for (;; --iarg) { + assert(iarg >= 0 && i >= 0 && "arg?"); + if ((arg = &instrtab[blk->ins.p[iarg]])->op == Oarg) + break; + } + + if (!abi.isstk) { + assert(!abi.ty.isagg); + *arg = mkinstr(Omove, call->abiarg[i].ty.cls, mkref(RREG, abi.reg), arg->r); + } else { + union ref adr = mkaddr((struct addr){mkref(RREG, SP), .disp = abi.stk}); + int iargsave = iarg; + if (!abi.ty.isagg) { /* scalar arg in stack */ + *arg = mkinstr(Ostore8+ilog2(cls2siz[abi.ty.cls]), 0, adr, arg->r); + if (isaddrcon(arg->r,1) || arg->r.t == RADDR) + arg->r = insertinstr(blk, iarg++, mkinstr(Ocopy, abi.ty.cls, arg->r)); + else + fixarg(&ins->r, ins, blk, &iarg); + } else { /* aggregate arg in stack, callee stack frame destination address */ + *arg = mkinstr(Ocopy, KPTR, adr); + } + *curi += iarg - iargsave; + } + } + if (call->argstksiz) { + union ref disp = mkref(RICON, argstksiz); + insertinstr(blk, iarg--, (struct instr){Osub, KPTR, .keep=1, .reg = SP+1, .l=mkref(RREG,SP), disp}); + ++*curi; + insertinstr(blk, *curi+1, (struct instr){Oadd, KPTR, .keep=1, .reg = SP+1, .l=mkref(RREG,SP), disp}); + } + if (isimm32(ins->l)) + ins->l = mkaddr((struct addr){.base = ins->l}); + else if (isintcon(ins->l)) + ins->l = insertinstr(blk, (*curi)++, mkinstr(Ocopy, KPTR, ins->l)); + + cls = ins->cls; + ins->cls = 0; + if (cls) { + /* duplicate to reuse same TMP ref */ + insertinstr(blk, (*curi)++, *ins); + *ins = mkinstr(Ocopy, cls, mkref(RREG, call->abiret[0].reg)); + for (int i = 1; i <= 2; ++i) { + if (*curi + i >= blk->ins.n) break; + if (instrtab[blk->ins.p[*curi + i]].op == Ocall2r) { + ins = &instrtab[blk->ins.p[*curi += i]]; + *ins = mkinstr(Ocopy, ins->cls, mkref(RREG, call->abiret[1].reg)); + break; + } + } + } +} + +static bool +aimm(struct addr *addr, int disp) +{ + if (addr->index.bits) return 0; + vlong a = addr->disp; + a += disp; + if ((int)a == a) { + addr->disp = a; + return 1; + } + return 0; +} + +static bool +ascale(struct addr *addr, union ref a, union ref b, uint siz/*1,2,4,8*/) +{ + if (b.t != RICON) return 0; + if (addr->index.bits || addr->disp) return 0; + if ((unsigned)b.i > 3 || 1<index = a; + addr->shift = b.i; + return 1; + } + return 0; +} + +static bool +aadd(struct addr *addr, struct block *blk, int *curi, union ref r, uint siz/*1,2,4,8*/) +{ + if (r.t == RSTACK) { + if (addr->base.bits || addr->index.bits || !aimm(addr, -r.i)) goto Ref; + addr->base = mkref(RREG, FP); + } else if (r.t == RTMP) { + struct instr *ins = &instrtab[r.i]; + if (ins->op == Oadd) { + if (!aadd(addr, blk, curi, ins->l, siz)) goto Ref; + if (!aadd(addr, blk, curi, ins->r, siz)) goto Ref; + ins->skip = 1; + } else if (ins->op == Osub) { + if (!aadd(addr, blk, curi, ins->l, siz)) goto Ref; + if (!isintcon(ins->r)) goto Ref; + if (!aimm(addr, -intconval(ins->r))) goto Ref; + ins->skip = 1; + } else if (ins->op == Oshl) { + if (!ascale(addr, ins->l, ins->r, siz)) goto Ref; + ins->skip = 1; + } else if (ins->op == Ocopy) { + if (!aadd(addr, blk, curi, ins->l, siz)) goto Ref; + ins->skip = 1; + } else goto Ref; + } else if (isnumcon(r)) { + assert(isintcon(r)); + return aimm(addr, intconval(r)); + } else if (isaddrcon(r,1)) { + if (!addr->base.bits && !isaddrcon(addr->index,1)) addr->base = r; + else return 0; + } else if (r.t == RREG) { + /* temporaries are single assignment, but register aren't, so they can't be * + * safely hoisted into an address value, unless they have global lifetime */ + if (!rstest(mctarg->rglob, r.i)) return 0; + Ref: + if (r.t == RSTACK && (addr->base.bits || addr->index.bits)) { + r = insertinstr(blk, (*curi)++, mkinstr(Oadd, KPTR, mkref(RREG, FP), mkref(RICON, -r.i))); + } + if (!addr->base.bits) addr->base = r; + else if (!addr->index.bits) addr->index = r; + else return 0; + } else return 0; + return 1; +} + +static bool +fuseaddr(union ref *r, struct block *blk, int *curi, uint siz/*1,2,4,8*/) +{ + struct addr addr = {0}; + + if (isaddrcon(*r,1)) return 1; + + if (r->t != RSTACK && r->t != RTMP) return 0; + if (!aadd(&addr, blk, curi, *r, siz)) return 0; + if (isaddrcon(addr.base,0) && (ccopt.pic || (ccopt.pie && addr.index.bits) || (conht[addr.base.i].flag & SFUNC))) { + /* pic needs to load from GOT */ + /* pie cannot encode RIP-relative address with index register */ + /* first load symbol address into a temp register */ + union ref temp = mkaddr((struct addr){.base = addr.base, .disp = ccopt.pic ? 0 : addr.disp}); + addr.base = insertinstr(blk, (*curi)++, mkinstr(Ocopy, KPTR, .l = temp)); + if (!ccopt.pic) addr.disp = 0; + } + if (!(addr.disp >= -256 && addr.disp < 256) /* for 9-bit signed unscaled offset */ + && !(!(addr.disp & (siz-1)) && (uvlong)addr.disp < (1<<12)*siz)) /* 12-bit unsigned scaled offset */ + return 0; + *r = mkaddr(addr); + return 1; +} + + +static void +loadstoreaddr(struct block *blk, union ref *r, int *curi, uint siz) +{ + if (isimm32(*r)) { + *r = mkaddr((struct addr){.base = *r}); + } else if (isaddrcon(*r, 0)) { + picfixsym(r, blk, curi); + } else if (r->t == RTMP || r->t == RSTACK) { + fuseaddr(r, blk, curi, siz); + } else if (r->t != RREG) { + *r = insertinstr(blk, (*curi)++, mkinstr(Ocopy, KPTR, *r)); + } +} + +static void +sel(struct function *fn, struct instr *ins, struct block *blk, int *curi) +{ + uint siz, alignlog2; + int t = ins - instrtab; + struct instr temp = {0}; + enum op op = ins->op; + + if (oisarith(ins->op) && arithfold(ins)) { + fixarg(&ins->l, ins, blk, curi); + return; + } + + switch (op) { + //default: assert(0); + case Onop: break; + case Oalloca1: case Oalloca2: case Oalloca4: case Oalloca8: case Oalloca16: + alignlog2 = ins->op - Oalloca1; + assert(ins->l.i > 0); + siz = ins->l.i << alignlog2; + fn->stksiz += siz; + fn->stksiz = alignup(fn->stksiz, 1 << alignlog2); + if (fn->stksiz > (1<<16)-1) error(NULL, "'%s' stack frame too big", fn->name); + stkslots[t] = fn->stksiz; + *ins = mkinstr(Onop,0,); + break; + case Oparam: + assert(ins->l.t == RICON && ins->l.i < fn->nabiarg); + if (!fn->abiarg[ins->l.i].isstk) + *ins = mkinstr(Ocopy, ins->cls, mkref(RREG, fn->abiarg[ins->l.i].reg)); + else /* stack */ + *ins = mkinstr(Oadd, KPTR, mkref(RREG, FP), mkref(RICON, 16+fn->abiarg[ins->l.i].stk)); + break; + case Oadd: case Osub: + if (ins->r.t == RICON && ins->r.i < 0) { + op = ins->op ^= 1; + ins->r.i = -ins->r.i; + } + fixarg(&ins->l, ins, blk, curi); + fixarg(&ins->r, ins, blk, curi); + break; + case Oand: case Oior: case Oxor: + case Oshl: case Osar: case Oslr: + fixarg(&ins->r, ins, blk, curi); + break; + case Oarg: + fixarg(&ins->r, ins, blk, curi); + break; + case Ocall: + selcall(fn, ins, blk, curi); + break; + case Oloads8: case Oloadu8: case Oloads16: case Oloadu16: + case Oloads32: case Oloadu32: case Oloadi64: + loadstoreaddr(blk, &ins->l, curi, 1<<((op - Oloads8)/2)); + break; + case Ostore8: case Ostore16: case Ostore32: case Ostore64: + loadstoreaddr(blk, &ins->l, curi, 1<<(op - Ostore8)); + fixarg(&ins->r, ins, blk, curi); + break; + } +} + +static void +seljmp(struct function *fn, struct block *blk) +{ + if (blk->jmp.t == Jb && blk->jmp.arg[0].bits) { + int curi = blk->ins.n; + fixarg(&blk->jmp.arg[0], NULL, blk, &curi); + union ref c = blk->jmp.arg[0]; + if (c.t != RTMP) { + enum irclass cls = c.t == RICON ? KI32 : c.t == RXCON && conht[c.i].cls ? conht[c.i].cls : KPTR; + int curi = blk->ins.n; + + c = insertinstr(blk, blk->ins.n, mkinstr(Ocopy, cls, c)); + sel(fn, &instrtab[c.i], blk, &curi); + } + if (!oiscmp(instrtab[c.i].op)) { + struct instr *ins; + int curi = blk->ins.n; + blk->jmp.arg[0] = insertinstr(blk, blk->ins.n, mkinstr(Oneq, insrescls(instrtab[c.i]), c, ZEROREF)); + ins = &instrtab[blk->jmp.arg[0].i]; + if (kisflt(ins->cls)) { + ins->r = insertinstr(blk, curi, mkinstr(Ocopy, ins->cls, ZEROREF)); + } + ins->keep = 1; + } + } else if (blk->jmp.t == Jret) { + if (blk->jmp.arg[0].bits) { + int curi; + union ref r = mkref(RREG, fn->abiret[0].reg); + struct instr *ins = &instrtab[insertinstr(blk, blk->ins.n, mkinstr(Omove, fn->abiret[0].ty.cls, r, blk->jmp.arg[0])).i]; + curi = blk->ins.n-1; + fixarg(&ins->r, ins, blk, &curi); + blk->jmp.arg[0] = r; + if (blk->jmp.arg[1].bits) { + r = mkref(RREG, fn->abiret[1].reg); + ins = &instrtab[insertinstr(blk, blk->ins.n, mkinstr(Omove, fn->abiret[1].ty.cls, r, blk->jmp.arg[1])).i]; + } + } + } +} + +void +aarch64_isel(struct function *fn) +{ + extern int ninstr; + struct block *blk = fn->entry; + + fn->stksiz = 0; + stkslots = allocz(fn->passarena, (nstkslots = ninstr) * sizeof *stkslots, 0); + do { + int i; + for (i = 0; i < blk->phi.n; ++i) { + struct instr *ins = &instrtab[blk->phi.p[i]]; + union ref *phi = phitab.p[ins->l.i]; + for (int i = 0; i < blk->npred; ++i) { + int curi = blkpred(blk, i)->ins.n; + fixarg(&phi[i], ins, blkpred(blk, i), &curi); + } + } + for (i = 0; i < blk->ins.n; ++i) { + struct instr *ins = &instrtab[blk->ins.p[i]]; + sel(fn, ins, blk, &i); + } + seljmp(fn, blk); + } while ((blk = blk->lnext) != fn->entry); + + if (ccopt.dbg.i) { + bfmt(ccopt.dbgout, "<< After isel >>\n"); + irdump(fn); + } + + fn->prop = 0; +} + +/* vim:set ts=3 sw=3 expandtab: */ diff --git a/c/lex.c b/c/lex.c index d6aa95a..63b066b 100644 --- a/c/lex.c +++ b/c/lex.c @@ -2065,6 +2065,10 @@ addpredefmacros(struct arena **tmparena) putdef1("__x86_64__"); putdef1("__x86_64"); break; + case ISaarch64: + putdef1("__aarch64"); + putdef1("__aarch64__"); + break; } if (target.os != OSunknown) putdef1("__STDC_HOSTED__"); diff --git a/common.h b/common.h index 9804447..ac4bc88 100644 --- a/common.h +++ b/common.h @@ -73,7 +73,7 @@ ptrhash(const void *p) { static inline uint popcnt(uvlong x) { #if HAS_BUILTIN(popcountll) - return __builtin_popcountll(x); + return x ? __builtin_popcountll(x) : 0; #else uint n = 0; while (x) n += x&1, x >>= 1; @@ -86,7 +86,7 @@ ispo2(uvlong x) { } static inline uint ilog2(uvlong x) { /* assumes x is a power of 2 */ -#if HAS_BUILTIN(ctz) +#if HAS_BUILTIN(ctzll) return __builtin_ctzll(x); #else uint n = 0; @@ -97,7 +97,7 @@ ilog2(uvlong x) { /* assumes x is a power of 2 */ static inline uint lowestsetbit(uvlong x) { -#if HAS_BUILTIN(ctz) +#if HAS_BUILTIN(ctzll) return __builtin_ctzll(x); #else int i = 0; @@ -163,7 +163,7 @@ extern struct inclpaths { /**********/ struct targtriple { - enum mcarch { ISxxx, ISx86_64 } arch; + enum mcarch { ISxxx, ISx86_64, ISaarch64 } arch; enum mcos { OSunknown, OSlinux } os; enum mcabi { ABInone, ABIgnu, ABImusl } abi; }; diff --git a/obj/elf.c b/obj/elf.c index 512e710..e39a8d8 100644 --- a/obj/elf.c +++ b/obj/elf.c @@ -49,6 +49,7 @@ elfinit(void) switch (target.arch) { default: assert(!"arch?"); case ISx86_64: hdr.h32.machine = EM_X86_64; break; + case ISaarch64: hdr.h32.machine = EM_ARM64; break; } hdr.h32.version = ELFVERSION; if (targ_64bit) { @@ -142,6 +143,14 @@ static const ushort relktab[][NRELOCKIND] = { [REL_PLT32] = 4, /* R_X86_64_PLT32 */ [REL_GOTPCRELX] = 41, /* R_X86_64_GOTPCRELX */ [REL_GOTPCRELX_REX] = 42, /* R_X86_64_REX_GOTPCRELX */ + }, + [ISaarch64] = { + [REL_ABS64] = 257, /* R_AARCH64_ABS64 */ + [REL_ABS32] = 258, /* R_AARCH64_ABS32 */ + [REL_ABS32S] = 258, /* R_AARCH64_ABS32S */ + [REL_PCREL32] = 261, /* R_AARCH64_PREL2 */ + [REL_PLT32] = 314, /* R_AARCH64_PLT32 */ + [REL_CALL26] = 283, /* R_AARCH64_CALL26 */ } }; @@ -363,7 +372,7 @@ wordalign(struct wbuf *out, int align) while (off++ & (align - 1)) ioputc(out, 0); } -static const bool userelatab[] = { [ISx86_64] = 1 }; +static const bool userelatab[] = { [ISx86_64] = 1, [ISaarch64] = 1 }; void elffini(struct wbuf *out) diff --git a/obj/obj.h b/obj/obj.h index 77da99a..985e583 100644 --- a/obj/obj.h +++ b/obj/obj.h @@ -17,6 +17,7 @@ enum relockind { REL_PLT32, REL_GOTPCRELX, REL_GOTPCRELX_REX, + REL_CALL26, NRELOCKIND, }; enum section { Snone, Stext, Srodata, Sdata, Sbss }; diff --git a/targ.c b/targ.c index c715ed7..515bda3 100644 --- a/targ.c +++ b/targ.c @@ -1,7 +1,7 @@ #include "common.h" #include "type.h" -extern const struct mctarg t_x86_64_sysv; +extern const struct mctarg t_x86_64_sysv, t_aarch64_aapcs; static const struct targ { struct { enum mcarch arch; uint oss, abis; }; struct { uchar longsize, vlongsize, ptrsize, valistsize; }; @@ -10,7 +10,8 @@ static const struct targ { uchar sizetype, ptrdifftype, wchartype; const struct mctarg *mctarg; } targs[] = { - { {ISx86_64, -1, 1<arch = ISx86_64; - } else return 0; + else if (matchstr(&str, "aarch64-") || matchstr(&str, "arm64-")) + trg->arch = ISaarch64; + else return 0; if (matchstr(&str, "unknown-") || matchstr(&str, "pc-")) {} @@ -64,7 +67,7 @@ parsetriple(struct targtriple *trg, const char *str) void targ_init(const char *starg) { - const struct targ *t = &targs[0]; + const struct targ *t = NULL; uchar *sizes = targ_primsizes, *align = targ_primalign; if (!starg) { @@ -75,6 +78,16 @@ targ_init(const char *starg) fatal(NULL, "unrecognized target: %s", starg); } + for (size_t i = 0; i < countof(targs); ++i) { + if (targs[i].arch == target.arch) + if (targs[i].oss & (1 << target.os)) + if (targs[i].abis & (1 << target.abi)) { + t = &targs[i]; + break; + } + } + if (!t) fatal(NULL, "unsupported target: %s", starg ? starg : "(host)"); + sizes[TYBOOL] = sizes[TYCHAR] = sizes[TYSCHAR] = sizes[TYUCHAR] = 1; sizes[TYSHORT] = sizes[TYUSHORT] = 2; sizes[TYUINT] = sizes[TYINT] = 4; diff --git a/x86_64/isel.c b/x86_64/isel.c index 40d8db4..98d66a1 100644 --- a/x86_64/isel.c +++ b/x86_64/isel.c @@ -194,19 +194,6 @@ aimm(struct addr *addr, int disp) return 0; } -static bool -acon(struct addr *addr, union ref r) -{ - vlong a = addr->disp; - assert(isintcon(r)); - a += intconval(r); - if ((int)a == a) { - addr->disp = a; - return 1; - } - return 0; -} - static bool ascale(struct addr *addr, union ref a, union ref b) { @@ -268,7 +255,8 @@ aadd(struct addr *addr, struct block *blk, int *curi, union ref r) ins->skip = 1; } else goto Ref; } else if (isnumcon(r)) { - return acon(addr, r); + assert(isintcon(r)); + return aimm(addr, intconval(r)); } else if (isaddrcon(r,1)) { if (!addr->base.bits && !isaddrcon(addr->index,1)) addr->base = r; else return 0; -- cgit v1.2.3