diff options
| -rw-r--r-- | src/a_main.c | 2 | ||||
| -rw-r--r-- | src/antcc.h | 2 | ||||
| -rw-r--r-- | src/c.c | 17 | ||||
| -rw-r--r-- | src/ir.c | 11 | ||||
| -rw-r--r-- | src/ir.h | 5 | ||||
| -rw-r--r-- | src/ir_inliner.c | 106 | ||||
| -rw-r--r-- | src/obj.c | 16 | ||||
| -rw-r--r-- | src/obj.h | 5 | ||||
| -rw-r--r-- | src/t_aarch64_emit.c | 10 | ||||
| -rw-r--r-- | src/t_x86-64_emit.c | 6 |
10 files changed, 139 insertions, 41 deletions
diff --git a/src/a_main.c b/src/a_main.c index 309fe23..56a38d2 100644 --- a/src/a_main.c +++ b/src/a_main.c @@ -688,7 +688,7 @@ cc1(const char *out, const char *in) if (task.verbose) efmt("cc1(/*out*/ %'s, /*in*/ %'s)\n", out, in); if (!ccopt.dbg.any && !task.syntaxonly) objini(in, out); ccomp(in); - if (!ccopt.dbg.any && !task.syntaxonly && !nerror) objfini(); + if (!task.syntaxonly && !nerror) objfini(!ccopt.dbg.any); return !!nerror; } diff --git a/src/antcc.h b/src/antcc.h index fcaf02a..b467156 100644 --- a/src/antcc.h +++ b/src/antcc.h @@ -291,7 +291,7 @@ extern char pmap_tombstone_[]; #define pmap_del(m, k) pmap_del_(&(m)->mb, k) #define pmap_each(m,kx,pvx) \ for (size_t _i = 0; _i < (m)->mb.N && ((kx) = (m)->mb.k[_i], (pvx) = &(m)->v[_i], 1); ++_i) \ - if (kx && kx != pmap_tombstone_) + if (kx && kx != (void*)pmap_tombstone_) /********/ /** IO **/ @@ -1387,6 +1387,7 @@ typedef struct InitParser { internstr sym; s64int addend; uint off; + uchar flag; } *drel; }; }; @@ -1428,14 +1429,17 @@ dumpini(InitParser *ip) #endif static s64int /* -> returns addend */ -expr2reloc(internstr *psym, const Expr *ex) +expr2reloc(internstr *psym, enum symflags *sf, const Expr *ex) { if (ex->t == ESSYMREF) { *psym = ex->ssym.sym; + *sf = (SLOCAL &- ex->ssym.local) | (SFUNC &- ex->ssym.func); return ex->ssym.off; } else if (ex->t == ESTRLIT || ex->t == EINIT) { if (ex->t == ESTRLIT) assert(ex->ty.t == TYARRAY); - *psym = xcon2sym(expraddr(NULL, ex).i); + Ref r = expraddr(NULL, ex); + *psym = xcon2sym(r.i); + *sf = contab.p[r.i].flag; return 0; } fatal(&ex->span, "internal bug: non static reloc?"); @@ -1506,10 +1510,11 @@ iniwrite(CComp *cm, InitParser *ip, uint off, uint bitsiz, uint bitoff, Type ty, memcpy(p, ex->s.p, n); } else { internstr sym; - s64int addend = expr2reloc(&sym, ex); + enum symflags sf; + s64int addend = expr2reloc(&sym, &sf, ex); if (!ip->dyn) { assert(ip->sec != Srodata || rodatarelocok()); - objreloc(sym, targ_64bit ? REL_ABS64 : REL_ABS32, + objreloc(sym, sf, targ_64bit ? REL_ABS64 : REL_ABS32, ip->sec, ip->off + off, addend); } else { InitReloc *rel = alloc(ip->arena, sizeof *rel, 0); @@ -1517,6 +1522,7 @@ iniwrite(CComp *cm, InitParser *ip, uint off, uint bitsiz, uint bitoff, Type ty, rel->sym = sym; rel->off = off; rel->addend = addend; + rel->flag = sf; ip->drel = rel; } } @@ -1886,7 +1892,8 @@ initializer(CComp *cm, Type *ty, enum evalmode ev, bool globl, memcpy(p + off, ip->ddat.p, ip->ddat.n); memset(p + off + ip->ddat.n, 0, siz - ip->ddat.n); for (InitReloc *rel = ip->drel; rel; rel = rel->link) { - objreloc(rel->sym, targ_64bit ? REL_ABS64 : REL_ABS32, sec, off + rel->off, rel->addend); + objreloc(rel->sym, rel->flag, targ_64bit ? REL_ABS64 : REL_ABS32, + sec, off + rel->off, rel->addend); } } vfree(&ip->ddat); @@ -688,8 +688,16 @@ irfini(Function *fn) freearena(fn->passarena); } if (maybeinlinee(fn)) { - // goto Fin; XXX do this by having inline function rematerialization when symbol is actually referenced + freearena(fn->passarena); + return; } + + irfini_end(fn); +} + +void +irfini_end(Function *fn) +{ lowerstack(fn); freearena(fn->passarena); if (ccopt.dbg.o) { @@ -702,7 +710,6 @@ irfini(Function *fn) if (objout.code) mctarg->emit(fn); -//Fin: freearena(fn->passarena); freefn(fn); } @@ -28,7 +28,7 @@ enum symflags { typedef struct IRCon { bool issym, isdat, deref; uchar cls; - uchar flag; + uchar flag; /* enum symflags */ union { internstr sym; int dat; @@ -262,6 +262,7 @@ extern int visitmark; #define mkarginstr(ty, x) mkinstr2(Oarg, 0, mktyperef(ty), (x)) void irinit(Function *); void irfini(Function *); +void irfini_end(Function *); #define cls2type(k) ((IRType){.cls=(k)}) IRType mkirtype(Type); Ref newxcon(const IRCon *); @@ -279,6 +280,7 @@ Ref mksymref(internstr, enum symflags); Ref mkdatref(internstr sym, Type ctype, uint siz, uint align, const void *, uint n, bool deref, bool funclocal); internstr xcon2sym(int ref); +#define objrelocxcon(xc, ...) objreloc(xcon2sym(xc), contab.p[xc].flag, __VA_ARGS__) Instr mkalloca(uint siz, uint align); Ref mkcallarg(IRType ret, uint narg, int vararg); #define mkintrin(B, C, N) mkinstr2(Ointrin, C, mkref(RICON, B), mkcallarg((IRType){{0}},N,-1)) @@ -350,6 +352,7 @@ void cselim(Function *); /** inliner.c **/ bool maybeinlinee(Function *); void doinline(Function *); +void emitxinlfns(bool all); /** intrin.c **/ void lowerintrin(Function *); diff --git a/src/ir_inliner.c b/src/ir_inliner.c index c1c36e2..685815f 100644 --- a/src/ir_inliner.c +++ b/src/ir_inliner.c @@ -1,7 +1,9 @@ #include "ir.h" +#include "obj.h" typedef struct SavedFunc { - uint ninstrtab; + bool emitted; + uint ninstrtab, ncontab, ncalltab, nphitab; Instr *instrtab; IRCon *contab; IRCall *calltab; @@ -17,16 +19,16 @@ enum { MAX_INLINED_FN_NINS = 50, MAX_INLINED_FN_NBLK = 16, }; static pmap_of(SavedFunc *) savedfns; +static Arena *savearena; bool maybeinlinee(Function *fn) { - static Arena *savearena; extern int ninstrtab, nfreeinstr; // TODO better heuristics if (ccopt.o < OPT1) return 0; - if (!fn->inlin && ccopt.o < OPT2) return 0; + if (!(fn->inlin || (!fn->globl && ccopt.o >= OPT2))) return 0; if (ninstrtab - nfreeinstr > MAX_INLINED_FN_NINS) return 0; if (fn->nblk > MAX_INLINED_FN_NBLK) return 0; for (int i = 0; i < fn->nabiarg; ++i) { @@ -50,20 +52,14 @@ maybeinlinee(Function *fn) if (fn->abiarg) sv->abiarg = alloccopy(&savearena, fn->abiarg, sizeof *sv->abiarg * fn->nabiarg, 0); sv->nabiarg = fn->nabiarg; - if ((sv->nabiret = fn->nabiret) > 0) - memcpy(sv->abiret, fn->abiret, sizeof sv->abiret); + sv->nabiret = fn->nabiret; + memcpy(sv->abiret, fn->abiret, sizeof sv->abiret); Block *bmap[MAX_INLINED_FN_NBLK]; Block *b = fn->entry; int id = 0; do { b->id = id++; Block *q = alloccopy(&savearena, b, sizeof *b, 0); - if (q->phi.n) - q->phi.p = alloccopy(&savearena, q->phi.p, sizeof *q->phi.p * q->phi.n, 0); - if (q->ins.n) - q->ins.p = alloccopy(&savearena, q->ins.p, sizeof *q->ins.p * q->ins.n, 0); - if (q->npred > 1) - q->_pred = alloccopy(&savearena, q->_pred, sizeof *q->_pred * q->npred, 0); q->lprev = NULL; q->idom = NULL; bmap[b->id] = q; @@ -81,8 +77,8 @@ maybeinlinee(Function *fn) } while ((b = b->lnext)); sv->instrtab = alloccopy(&savearena, instrtab, sizeof *instrtab * (sv->ninstrtab = ninstrtab), 0); - sv->contab = alloccopy(&savearena, contab.p, sizeof *contab.p * contab.n, 0); - if (calltab.n) { + sv->contab = alloccopy(&savearena, contab.p, sizeof *contab.p * (sv->ncontab = contab.n), 0); + if ((sv->ncalltab = calltab.n)) { sv->calltab = alloccopy(&savearena, calltab.p, sizeof *calltab.p * calltab.n, 0); for (int i = 0; i < calltab.n; ++i) { if (sv->calltab[i].abiarg) @@ -90,11 +86,9 @@ maybeinlinee(Function *fn) sv->calltab[i].narg * sizeof *sv->calltab[i].abiarg, 0); } } - if (phitab.n) { - sv->phitab = alloc(&savearena, sizeof *phitab.p * phitab.n, 0); - for (int i = 0; i < phitab.n; ++i) { - sv->phitab[i] = alloccopy(&savearena, phitab.p[i], sizeof *phitab.p[i] * xbcap(phitab.p[i]), 0); - } + if ((sv->nphitab = phitab.n)) { + sv->phitab = alloccopy(&savearena, phitab.p, sizeof *phitab.p * phitab.n, 0); + phitab.n = 0; } pmap_set(&savedfns, fn->name, sv); return 1; @@ -180,7 +174,7 @@ inlcall(Function *fn, Block *blk, int curi, SavedFunc *sv) for (int i = 0; i < b->phi.n; ++i) { int t = b->phi.p[i]; Ref *refs = NULL, - *src = sv->phitab[sv->instrtab[t].l.i]; + *src = sv->phitab[sv->instrtab[t].l.i]; xbgrow(&refs, b->npred); for (int i = 0; i < b->npred; ++i) refs[i] = mapref(instrmap, sv, src[i]); @@ -304,4 +298,78 @@ doinline(Function *fn) } while ((b = b->lnext) != fn->entry); } +static Function +rematerialize(Arena **arena, internstr name, SavedFunc *sv) +{ + Function fn = { arena, .name = name, .globl = 0/*always localG*/, .fnty = sv->fnty, + .retty = sv->retty, .abiarg = sv->abiarg, .nabiarg = sv->nabiarg, + .abiret = {sv->abiret[0], sv->abiret[1]}, .nabiret = sv->nabiret, + }; + irinit(&fn); + extern int ninstrtab; + ninstrtab = sv->ninstrtab; + memcpy(instrtab, sv->instrtab, ninstrtab * sizeof *instrtab); + vpushn(&calltab, sv->calltab, sv->ncalltab); + vpushn(&phitab, sv->phitab, sv->nphitab); + vpushn(&contab, sv->contab, sv->ncontab); + + fn.nblk = 0; + struct Block *last = fn.entry = NULL; + for (struct Block *b = sv->entry, *next; b; b = next) { + next = b->lnext; + if (last) { + b->lprev = last; + last->lnext = b; + } else { + fn.entry = b; + b->lprev = b; + } + last = b; + if (!next) { + fn.entry->lprev = b; + b->lnext = fn.entry; + } + ++fn.nblk; + } + fn.entry->lprev->lnext = fn.entry; + memset(instruse, 0, sizeof *instruse * ninstrtab); + filluses(&fn); + + return fn; +} + +void +emitxinlfns(bool all) +{ + enum { N = 1 << 12 }; + static union { char m[sizeof(Arena) + N]; Arena *_align; } amem[2]; + Arena *arena = (void *)amem[0].m, *passarena = (void *)amem[1].m; + arena->cap = N; + passarena->cap = N; + + /* looping until fixpoint because emitting functions might generate + * references to other stashed functions, which might have already been + * visited, but they need to be visited them again */ + for (bool change = 1; change;) { + change = 0; + SavedFunc **psv, *sv; + internstr name; + pmap_each(&savedfns, name, psv) { + sv = *psv; + if (!sv->emitted && (all || fnisneeded(name))) { + sv->emitted = 1; + Function fn = rematerialize(&arena, name, sv); + fn.passarena = &passarena; + if (ccopt.dbg.y) { + bfmt(ccopt.dbgout, "<< Rematerialize inlinee >>\n"); + irdump(&fn); + } + irfini_end(&fn); + change = 1; + freearena(&arena); + } + } + } +} + /* vim:set ts=3 sw=3 expandtab: */ @@ -89,9 +89,13 @@ objnewdat(internstr name, enum section sec, bool globl, uint siz, uint align) return off; } +static pmap_of(uchar) needed_fns; + void -objreloc(internstr sym, enum relockind reloc, enum section section, uint off, s64int addend) +objreloc(internstr sym, int symflags, enum relockind reloc, enum section section, uint off, s64int addend) { + if ((symflags & (SLOCAL|SFUNC)) == (SLOCAL|SFUNC)) + pmap_set(&needed_fns, sym, 1); switch (mctarg->objkind) { case OBJELF: elfreloc(sym, reloc, section, off, addend); @@ -99,9 +103,17 @@ objreloc(internstr sym, enum relockind reloc, enum section section, uint off, s6 } } +bool +fnisneeded(internstr name) +{ + return pmap_get(&needed_fns, name) != NULL; +} + void -objfini(void) +objfini(bool emit) { + emitxinlfns(/*all*/!emit); + if (!emit) return; static char buf[1<<12]; WriteBuf out = FDBUF(buf, sizeof buf, open(objout.outfile, O_WRONLY | O_CREAT | O_TRUNC, 0666)); if (out.fd < 0) fatal(NULL, "could not open %'s for writing: %s", objout.outfile, strerror(errno)); @@ -33,7 +33,8 @@ void objini(const char *infile, const char *outfile); void objdeffunc(internstr nam, bool globl, uint off, uint siz); enum section objhassym(internstr name, uint *off); uint objnewdat(internstr name, enum section, bool globl, uint siz, uint align); -void objreloc(internstr sym, enum relockind, enum section, uint off, s64int addend); -void objfini(void); +void objreloc(internstr sym, /*enum symflags*/int, enum relockind, enum section, uint off, s64int addend); +void objfini(bool emit); +bool fnisneeded(internstr); /* vim:set ts=3 sw=3 expandtab: */ diff --git a/src/t_aarch64_emit.c b/src/t_aarch64_emit.c index 52429a3..f413f98 100644 --- a/src/t_aarch64_emit.c +++ b/src/t_aarch64_emit.c @@ -269,19 +269,19 @@ encode(uchar **pcode, const EncDesc *tab, int ntab, enum irclass k, Oper o[3]) break; case EN_ADRSYMLO21: ins |= o[0].reg; - objreloc(xcon2sym(o[1].con), REL_ADR_PREL_LO21, Stext, *pcode - objout.textbegin, o[1].cdisp); + objrelocxcon(o[1].con, REL_ADR_PREL_LO21, Stext, *pcode - objout.textbegin, o[1].cdisp); break; case EN_ADRSYMPGHI21: ins |= o[0].reg; - objreloc(xcon2sym(o[1].con), REL_ADR_PREL_PG_HI21, Stext, *pcode - objout.textbegin, o[1].cdisp); + objrelocxcon(o[1].con, REL_ADR_PREL_PG_HI21, Stext, *pcode - objout.textbegin, o[1].cdisp); break; case EN_ADDSYMLO12: ins |= sf<<31 | o[1].reg<<5 | o[0].reg; - objreloc(xcon2sym(o[2].con), REL_ADD_ABS_LO12_NC, Stext, *pcode - objout.textbegin, o[1].cdisp); + objrelocxcon(o[2].con, REL_ADD_ABS_LO12_NC, Stext, *pcode - objout.textbegin, o[1].cdisp); break; case EN_LDSYMLO19: ins |= o[0].reg; - objreloc(xcon2sym(o[1].con), REL_LD_PREL_LO19, Stext, *pcode - objout.textbegin, o[1].cdisp); + objrelocxcon(o[1].con, REL_LD_PREL_LO19, Stext, *pcode - objout.textbegin, o[1].cdisp); break; case EN_FP2R: ins |= sf<<22 | (o[1].reg&31)<<5 | (o[0].reg&31); @@ -494,7 +494,7 @@ static void Xcall(uchar **pcode, Oper dst) { if (dst.t == OSYM) { - objreloc(xcon2sym(dst.con), REL_CALL26, Stext, *pcode - objout.textbegin, 0); + objrelocxcon(dst.con, REL_CALL26, Stext, *pcode - objout.textbegin, 0); W32(0x94000000); /* BL <rel26> */ } else { assert(opermatch(PGPRZ, KPTR, dst)); diff --git a/src/t_x86-64_emit.c b/src/t_x86-64_emit.c index 9224d4b..0da27d8 100644 --- a/src/t_x86-64_emit.c +++ b/src/t_x86-64_emit.c @@ -358,7 +358,7 @@ encode(uchar **pcode, const EncDesc *tab, int ntab, enum irclass k, Oper dst, Op } else { enum relockind r = REL_PCREL32; if (mem.t == OSYMGOT) r = rex ? REL_GOTPCRELX_REX : REL_GOTPCRELX; - objreloc(xcon2sym(mem.con), r, Stext, *pcode - objout.textbegin, disp); + objrelocxcon(mem.con, r, Stext, *pcode - objout.textbegin, disp); I32(0); } } else { @@ -366,7 +366,7 @@ encode(uchar **pcode, const EncDesc *tab, int ntab, enum irclass k, Oper dst, Op assert(!ccopt.pic && !ccopt.pie && "cannot encode [RIP-rel + REG] for position independent"); B(/*mod 0*/ (reg & 7) << 3 | RSP); B(mem.cshift << 6 | ((mem.cindex & 7) << 3) | RBP); /* SIB [index*s + disp32] */ - objreloc(xcon2sym(mem.con), REL_ABS32S, Stext, *pcode - objout.textbegin, mem.disp); + objrelocxcon(mem.con, REL_ABS32S, Stext, *pcode - objout.textbegin, mem.disp); I32(0); } } else { @@ -455,7 +455,7 @@ encode(uchar **pcode, const EncDesc *tab, int ntab, enum irclass k, Oper dst, Op I32(addr - (*pcode - objout.textbegin) - 4); } else { enum relockind r = (ccopt.pie|ccopt.pic) ? REL_PLT32 : REL_PCREL32; - objreloc(sym, r, Stext, *pcode - objout.textbegin, -4); + objrelocxcon(dst.con, r, Stext, *pcode - objout.textbegin, -4); I32(0); } break; |