diff options
| author | 2026-03-23 23:38:53 +0100 | |
|---|---|---|
| committer | 2026-03-23 23:43:52 +0100 | |
| commit | 62d995124c0cc2eaeec79e18edc3e044f3e524c9 (patch) | |
| tree | 4c79a80efca09f8050109aa0440ec75351d72c17 /src | |
| parent | 8630aeb8b43c507cd00f5b091ddcee4def464f4d (diff) | |
IR: emit inline function standalone bodies lazily
If a function is stashed for inlining and inlined in all of its
callsites or unused, it never ends up in the object file. If any symbol
reference to it is emitted, then it must be de-inlined (rematerialized),
and this is done near the end before emitting the actual object file.
Diffstat (limited to 'src')
| -rw-r--r-- | src/a_main.c | 2 | ||||
| -rw-r--r-- | src/antcc.h | 2 | ||||
| -rw-r--r-- | src/c.c | 17 | ||||
| -rw-r--r-- | src/ir.c | 11 | ||||
| -rw-r--r-- | src/ir.h | 5 | ||||
| -rw-r--r-- | src/ir_inliner.c | 106 | ||||
| -rw-r--r-- | src/obj.c | 16 | ||||
| -rw-r--r-- | src/obj.h | 5 | ||||
| -rw-r--r-- | src/t_aarch64_emit.c | 10 | ||||
| -rw-r--r-- | src/t_x86-64_emit.c | 6 |
10 files changed, 139 insertions, 41 deletions
diff --git a/src/a_main.c b/src/a_main.c index 309fe23..56a38d2 100644 --- a/src/a_main.c +++ b/src/a_main.c @@ -688,7 +688,7 @@ cc1(const char *out, const char *in) if (task.verbose) efmt("cc1(/*out*/ %'s, /*in*/ %'s)\n", out, in); if (!ccopt.dbg.any && !task.syntaxonly) objini(in, out); ccomp(in); - if (!ccopt.dbg.any && !task.syntaxonly && !nerror) objfini(); + if (!task.syntaxonly && !nerror) objfini(!ccopt.dbg.any); return !!nerror; } diff --git a/src/antcc.h b/src/antcc.h index fcaf02a..b467156 100644 --- a/src/antcc.h +++ b/src/antcc.h @@ -291,7 +291,7 @@ extern char pmap_tombstone_[]; #define pmap_del(m, k) pmap_del_(&(m)->mb, k) #define pmap_each(m,kx,pvx) \ for (size_t _i = 0; _i < (m)->mb.N && ((kx) = (m)->mb.k[_i], (pvx) = &(m)->v[_i], 1); ++_i) \ - if (kx && kx != pmap_tombstone_) + if (kx && kx != (void*)pmap_tombstone_) /********/ /** IO **/ @@ -1387,6 +1387,7 @@ typedef struct InitParser { internstr sym; s64int addend; uint off; + uchar flag; } *drel; }; }; @@ -1428,14 +1429,17 @@ dumpini(InitParser *ip) #endif static s64int /* -> returns addend */ -expr2reloc(internstr *psym, const Expr *ex) +expr2reloc(internstr *psym, enum symflags *sf, const Expr *ex) { if (ex->t == ESSYMREF) { *psym = ex->ssym.sym; + *sf = (SLOCAL &- ex->ssym.local) | (SFUNC &- ex->ssym.func); return ex->ssym.off; } else if (ex->t == ESTRLIT || ex->t == EINIT) { if (ex->t == ESTRLIT) assert(ex->ty.t == TYARRAY); - *psym = xcon2sym(expraddr(NULL, ex).i); + Ref r = expraddr(NULL, ex); + *psym = xcon2sym(r.i); + *sf = contab.p[r.i].flag; return 0; } fatal(&ex->span, "internal bug: non static reloc?"); @@ -1506,10 +1510,11 @@ iniwrite(CComp *cm, InitParser *ip, uint off, uint bitsiz, uint bitoff, Type ty, memcpy(p, ex->s.p, n); } else { internstr sym; - s64int addend = expr2reloc(&sym, ex); + enum symflags sf; + s64int addend = expr2reloc(&sym, &sf, ex); if (!ip->dyn) { assert(ip->sec != Srodata || rodatarelocok()); - objreloc(sym, targ_64bit ? REL_ABS64 : REL_ABS32, + objreloc(sym, sf, targ_64bit ? REL_ABS64 : REL_ABS32, ip->sec, ip->off + off, addend); } else { InitReloc *rel = alloc(ip->arena, sizeof *rel, 0); @@ -1517,6 +1522,7 @@ iniwrite(CComp *cm, InitParser *ip, uint off, uint bitsiz, uint bitoff, Type ty, rel->sym = sym; rel->off = off; rel->addend = addend; + rel->flag = sf; ip->drel = rel; } } @@ -1886,7 +1892,8 @@ initializer(CComp *cm, Type *ty, enum evalmode ev, bool globl, memcpy(p + off, ip->ddat.p, ip->ddat.n); memset(p + off + ip->ddat.n, 0, siz - ip->ddat.n); for (InitReloc *rel = ip->drel; rel; rel = rel->link) { - objreloc(rel->sym, targ_64bit ? REL_ABS64 : REL_ABS32, sec, off + rel->off, rel->addend); + objreloc(rel->sym, rel->flag, targ_64bit ? REL_ABS64 : REL_ABS32, + sec, off + rel->off, rel->addend); } } vfree(&ip->ddat); @@ -688,8 +688,16 @@ irfini(Function *fn) freearena(fn->passarena); } if (maybeinlinee(fn)) { - // goto Fin; XXX do this by having inline function rematerialization when symbol is actually referenced + freearena(fn->passarena); + return; } + + irfini_end(fn); +} + +void +irfini_end(Function *fn) +{ lowerstack(fn); freearena(fn->passarena); if (ccopt.dbg.o) { @@ -702,7 +710,6 @@ irfini(Function *fn) if (objout.code) mctarg->emit(fn); -//Fin: freearena(fn->passarena); freefn(fn); } @@ -28,7 +28,7 @@ enum symflags { typedef struct IRCon { bool issym, isdat, deref; uchar cls; - uchar flag; + uchar flag; /* enum symflags */ union { internstr sym; int dat; @@ -262,6 +262,7 @@ extern int visitmark; #define mkarginstr(ty, x) mkinstr2(Oarg, 0, mktyperef(ty), (x)) void irinit(Function *); void irfini(Function *); +void irfini_end(Function *); #define cls2type(k) ((IRType){.cls=(k)}) IRType mkirtype(Type); Ref newxcon(const IRCon *); @@ -279,6 +280,7 @@ Ref mksymref(internstr, enum symflags); Ref mkdatref(internstr sym, Type ctype, uint siz, uint align, const void *, uint n, bool deref, bool funclocal); internstr xcon2sym(int ref); +#define objrelocxcon(xc, ...) objreloc(xcon2sym(xc), contab.p[xc].flag, __VA_ARGS__) Instr mkalloca(uint siz, uint align); Ref mkcallarg(IRType ret, uint narg, int vararg); #define mkintrin(B, C, N) mkinstr2(Ointrin, C, mkref(RICON, B), mkcallarg((IRType){{0}},N,-1)) @@ -350,6 +352,7 @@ void cselim(Function *); /** inliner.c **/ bool maybeinlinee(Function *); void doinline(Function *); +void emitxinlfns(bool all); /** intrin.c **/ void lowerintrin(Function *); diff --git a/src/ir_inliner.c b/src/ir_inliner.c index c1c36e2..685815f 100644 --- a/src/ir_inliner.c +++ b/src/ir_inliner.c @@ -1,7 +1,9 @@ #include "ir.h" +#include "obj.h" typedef struct SavedFunc { - uint ninstrtab; + bool emitted; + uint ninstrtab, ncontab, ncalltab, nphitab; Instr *instrtab; IRCon *contab; IRCall *calltab; @@ -17,16 +19,16 @@ enum { MAX_INLINED_FN_NINS = 50, MAX_INLINED_FN_NBLK = 16, }; static pmap_of(SavedFunc *) savedfns; +static Arena *savearena; bool maybeinlinee(Function *fn) { - static Arena *savearena; extern int ninstrtab, nfreeinstr; // TODO better heuristics if (ccopt.o < OPT1) return 0; - if (!fn->inlin && ccopt.o < OPT2) return 0; + if (!(fn->inlin || (!fn->globl && ccopt.o >= OPT2))) return 0; if (ninstrtab - nfreeinstr > MAX_INLINED_FN_NINS) return 0; if (fn->nblk > MAX_INLINED_FN_NBLK) return 0; for (int i = 0; i < fn->nabiarg; ++i) { @@ -50,20 +52,14 @@ maybeinlinee(Function *fn) if (fn->abiarg) sv->abiarg = alloccopy(&savearena, fn->abiarg, sizeof *sv->abiarg * fn->nabiarg, 0); sv->nabiarg = fn->nabiarg; - if ((sv->nabiret = fn->nabiret) > 0) - memcpy(sv->abiret, fn->abiret, sizeof sv->abiret); + sv->nabiret = fn->nabiret; + memcpy(sv->abiret, fn->abiret, sizeof sv->abiret); Block *bmap[MAX_INLINED_FN_NBLK]; Block *b = fn->entry; int id = 0; do { b->id = id++; Block *q = alloccopy(&savearena, b, sizeof *b, 0); - if (q->phi.n) - q->phi.p = alloccopy(&savearena, q->phi.p, sizeof *q->phi.p * q->phi.n, 0); - if (q->ins.n) - q->ins.p = alloccopy(&savearena, q->ins.p, sizeof *q->ins.p * q->ins.n, 0); - if (q->npred > 1) - q->_pred = alloccopy(&savearena, q->_pred, sizeof *q->_pred * q->npred, 0); q->lprev = NULL; q->idom = NULL; bmap[b->id] = q; @@ -81,8 +77,8 @@ maybeinlinee(Function *fn) } while ((b = b->lnext)); sv->instrtab = alloccopy(&savearena, instrtab, sizeof *instrtab * (sv->ninstrtab = ninstrtab), 0); - sv->contab = alloccopy(&savearena, contab.p, sizeof *contab.p * contab.n, 0); - if (calltab.n) { + sv->contab = alloccopy(&savearena, contab.p, sizeof *contab.p * (sv->ncontab = contab.n), 0); + if ((sv->ncalltab = calltab.n)) { sv->calltab = alloccopy(&savearena, calltab.p, sizeof *calltab.p * calltab.n, 0); for (int i = 0; i < calltab.n; ++i) { if (sv->calltab[i].abiarg) @@ -90,11 +86,9 @@ maybeinlinee(Function *fn) sv->calltab[i].narg * sizeof *sv->calltab[i].abiarg, 0); } } - if (phitab.n) { - sv->phitab = alloc(&savearena, sizeof *phitab.p * phitab.n, 0); - for (int i = 0; i < phitab.n; ++i) { - sv->phitab[i] = alloccopy(&savearena, phitab.p[i], sizeof *phitab.p[i] * xbcap(phitab.p[i]), 0); - } + if ((sv->nphitab = phitab.n)) { + sv->phitab = alloccopy(&savearena, phitab.p, sizeof *phitab.p * phitab.n, 0); + phitab.n = 0; } pmap_set(&savedfns, fn->name, sv); return 1; @@ -180,7 +174,7 @@ inlcall(Function *fn, Block *blk, int curi, SavedFunc *sv) for (int i = 0; i < b->phi.n; ++i) { int t = b->phi.p[i]; Ref *refs = NULL, - *src = sv->phitab[sv->instrtab[t].l.i]; + *src = sv->phitab[sv->instrtab[t].l.i]; xbgrow(&refs, b->npred); for (int i = 0; i < b->npred; ++i) refs[i] = mapref(instrmap, sv, src[i]); @@ -304,4 +298,78 @@ doinline(Function *fn) } while ((b = b->lnext) != fn->entry); } +static Function +rematerialize(Arena **arena, internstr name, SavedFunc *sv) +{ + Function fn = { arena, .name = name, .globl = 0/*always localG*/, .fnty = sv->fnty, + .retty = sv->retty, .abiarg = sv->abiarg, .nabiarg = sv->nabiarg, + .abiret = {sv->abiret[0], sv->abiret[1]}, .nabiret = sv->nabiret, + }; + irinit(&fn); + extern int ninstrtab; + ninstrtab = sv->ninstrtab; + memcpy(instrtab, sv->instrtab, ninstrtab * sizeof *instrtab); + vpushn(&calltab, sv->calltab, sv->ncalltab); + vpushn(&phitab, sv->phitab, sv->nphitab); + vpushn(&contab, sv->contab, sv->ncontab); + + fn.nblk = 0; + struct Block *last = fn.entry = NULL; + for (struct Block *b = sv->entry, *next; b; b = next) { + next = b->lnext; + if (last) { + b->lprev = last; + last->lnext = b; + } else { + fn.entry = b; + b->lprev = b; + } + last = b; + if (!next) { + fn.entry->lprev = b; + b->lnext = fn.entry; + } + ++fn.nblk; + } + fn.entry->lprev->lnext = fn.entry; + memset(instruse, 0, sizeof *instruse * ninstrtab); + filluses(&fn); + + return fn; +} + +void +emitxinlfns(bool all) +{ + enum { N = 1 << 12 }; + static union { char m[sizeof(Arena) + N]; Arena *_align; } amem[2]; + Arena *arena = (void *)amem[0].m, *passarena = (void *)amem[1].m; + arena->cap = N; + passarena->cap = N; + + /* looping until fixpoint because emitting functions might generate + * references to other stashed functions, which might have already been + * visited, but they need to be visited them again */ + for (bool change = 1; change;) { + change = 0; + SavedFunc **psv, *sv; + internstr name; + pmap_each(&savedfns, name, psv) { + sv = *psv; + if (!sv->emitted && (all || fnisneeded(name))) { + sv->emitted = 1; + Function fn = rematerialize(&arena, name, sv); + fn.passarena = &passarena; + if (ccopt.dbg.y) { + bfmt(ccopt.dbgout, "<< Rematerialize inlinee >>\n"); + irdump(&fn); + } + irfini_end(&fn); + change = 1; + freearena(&arena); + } + } + } +} + /* vim:set ts=3 sw=3 expandtab: */ @@ -89,9 +89,13 @@ objnewdat(internstr name, enum section sec, bool globl, uint siz, uint align) return off; } +static pmap_of(uchar) needed_fns; + void -objreloc(internstr sym, enum relockind reloc, enum section section, uint off, s64int addend) +objreloc(internstr sym, int symflags, enum relockind reloc, enum section section, uint off, s64int addend) { + if ((symflags & (SLOCAL|SFUNC)) == (SLOCAL|SFUNC)) + pmap_set(&needed_fns, sym, 1); switch (mctarg->objkind) { case OBJELF: elfreloc(sym, reloc, section, off, addend); @@ -99,9 +103,17 @@ objreloc(internstr sym, enum relockind reloc, enum section section, uint off, s6 } } +bool +fnisneeded(internstr name) +{ + return pmap_get(&needed_fns, name) != NULL; +} + void -objfini(void) +objfini(bool emit) { + emitxinlfns(/*all*/!emit); + if (!emit) return; static char buf[1<<12]; WriteBuf out = FDBUF(buf, sizeof buf, open(objout.outfile, O_WRONLY | O_CREAT | O_TRUNC, 0666)); if (out.fd < 0) fatal(NULL, "could not open %'s for writing: %s", objout.outfile, strerror(errno)); @@ -33,7 +33,8 @@ void objini(const char *infile, const char *outfile); void objdeffunc(internstr nam, bool globl, uint off, uint siz); enum section objhassym(internstr name, uint *off); uint objnewdat(internstr name, enum section, bool globl, uint siz, uint align); -void objreloc(internstr sym, enum relockind, enum section, uint off, s64int addend); -void objfini(void); +void objreloc(internstr sym, /*enum symflags*/int, enum relockind, enum section, uint off, s64int addend); +void objfini(bool emit); +bool fnisneeded(internstr); /* vim:set ts=3 sw=3 expandtab: */ diff --git a/src/t_aarch64_emit.c b/src/t_aarch64_emit.c index 52429a3..f413f98 100644 --- a/src/t_aarch64_emit.c +++ b/src/t_aarch64_emit.c @@ -269,19 +269,19 @@ encode(uchar **pcode, const EncDesc *tab, int ntab, enum irclass k, Oper o[3]) break; case EN_ADRSYMLO21: ins |= o[0].reg; - objreloc(xcon2sym(o[1].con), REL_ADR_PREL_LO21, Stext, *pcode - objout.textbegin, o[1].cdisp); + objrelocxcon(o[1].con, REL_ADR_PREL_LO21, Stext, *pcode - objout.textbegin, o[1].cdisp); break; case EN_ADRSYMPGHI21: ins |= o[0].reg; - objreloc(xcon2sym(o[1].con), REL_ADR_PREL_PG_HI21, Stext, *pcode - objout.textbegin, o[1].cdisp); + objrelocxcon(o[1].con, REL_ADR_PREL_PG_HI21, Stext, *pcode - objout.textbegin, o[1].cdisp); break; case EN_ADDSYMLO12: ins |= sf<<31 | o[1].reg<<5 | o[0].reg; - objreloc(xcon2sym(o[2].con), REL_ADD_ABS_LO12_NC, Stext, *pcode - objout.textbegin, o[1].cdisp); + objrelocxcon(o[2].con, REL_ADD_ABS_LO12_NC, Stext, *pcode - objout.textbegin, o[1].cdisp); break; case EN_LDSYMLO19: ins |= o[0].reg; - objreloc(xcon2sym(o[1].con), REL_LD_PREL_LO19, Stext, *pcode - objout.textbegin, o[1].cdisp); + objrelocxcon(o[1].con, REL_LD_PREL_LO19, Stext, *pcode - objout.textbegin, o[1].cdisp); break; case EN_FP2R: ins |= sf<<22 | (o[1].reg&31)<<5 | (o[0].reg&31); @@ -494,7 +494,7 @@ static void Xcall(uchar **pcode, Oper dst) { if (dst.t == OSYM) { - objreloc(xcon2sym(dst.con), REL_CALL26, Stext, *pcode - objout.textbegin, 0); + objrelocxcon(dst.con, REL_CALL26, Stext, *pcode - objout.textbegin, 0); W32(0x94000000); /* BL <rel26> */ } else { assert(opermatch(PGPRZ, KPTR, dst)); diff --git a/src/t_x86-64_emit.c b/src/t_x86-64_emit.c index 9224d4b..0da27d8 100644 --- a/src/t_x86-64_emit.c +++ b/src/t_x86-64_emit.c @@ -358,7 +358,7 @@ encode(uchar **pcode, const EncDesc *tab, int ntab, enum irclass k, Oper dst, Op } else { enum relockind r = REL_PCREL32; if (mem.t == OSYMGOT) r = rex ? REL_GOTPCRELX_REX : REL_GOTPCRELX; - objreloc(xcon2sym(mem.con), r, Stext, *pcode - objout.textbegin, disp); + objrelocxcon(mem.con, r, Stext, *pcode - objout.textbegin, disp); I32(0); } } else { @@ -366,7 +366,7 @@ encode(uchar **pcode, const EncDesc *tab, int ntab, enum irclass k, Oper dst, Op assert(!ccopt.pic && !ccopt.pie && "cannot encode [RIP-rel + REG] for position independent"); B(/*mod 0*/ (reg & 7) << 3 | RSP); B(mem.cshift << 6 | ((mem.cindex & 7) << 3) | RBP); /* SIB [index*s + disp32] */ - objreloc(xcon2sym(mem.con), REL_ABS32S, Stext, *pcode - objout.textbegin, mem.disp); + objrelocxcon(mem.con, REL_ABS32S, Stext, *pcode - objout.textbegin, mem.disp); I32(0); } } else { @@ -455,7 +455,7 @@ encode(uchar **pcode, const EncDesc *tab, int ntab, enum irclass k, Oper dst, Op I32(addr - (*pcode - objout.textbegin) - 4); } else { enum relockind r = (ccopt.pie|ccopt.pic) ? REL_PLT32 : REL_PCREL32; - objreloc(sym, r, Stext, *pcode - objout.textbegin, -4); + objrelocxcon(dst.con, r, Stext, *pcode - objout.textbegin, -4); I32(0); } break; |