From d8b4e87af669c2b260686a5db67f7f02b4c164d9 Mon Sep 17 00:00:00 2001 From: lemon Date: Sun, 14 Dec 2025 12:15:59 +0100 Subject: various relocation related optimization With 59ca5a8db, querying if a symbol is defined is cheap. If we're compiling code that calls foo() and we defined foo() in this compilation unit, we already know its offset within the .text section, so use it instead of emitting a relocation for the linker to handle. Also, put small literal data in the .text section instead of .rodata. This seems to improve performance (cache locality?), and as a bonus, it will be good for aarch64's instr encoding with smallish PC-relative offsets. --- c/c.c | 6 +++--- ir/ir.c | 10 ++++++---- obj/elf.c | 29 +++++++++++++++-------------- obj/obj.c | 42 ++++++++++++++++++++++++------------------ obj/obj.h | 2 +- x86_64/emit.c | 45 +++++++++++++++++++++++++++++---------------- 6 files changed, 78 insertions(+), 56 deletions(-) diff --git a/c/c.c b/c/c.c index 5ff2041..4c4868b 100644 --- a/c/c.c +++ b/c/c.c @@ -4308,8 +4308,8 @@ function(struct comp *cm, struct function *fn, const char **pnames, const struct if (!ifunc) ifunc = intern("__func__"); union type ty = mkarrtype(mktype(TYCHAR), QCONST, strlen(fn->name) + 1); const char *sym = mkhiddensym(fn->name, ifunc, 1); - uint off = objnewdat(sym, Srodata, 0, typesize(ty), typealign(ty)); - uchar *p = objout.rodata.p + off; + uint off = objnewdat(sym, Stext, 0, typesize(ty), typealign(ty)); + uchar *p = objout.textbegin + off; memcpy(p, fn->name, typearrlen(ty)-1); putdecl(cm, &(struct decl) { .ty = ty, .qual = QCONST, @@ -4418,7 +4418,7 @@ docomp(struct comp *cm) pdecl(&st, cm); } else if (decl.ty.t != TYFUNC && decl.scls != SCTYPEDEF && (decl.scls != SCEXTERN || noscls)) { /* tentative definitions */ - if (!objhassym(decl.sym)) { + if (!objhassym(decl.sym, NULL)) { uint size = typesize(d->ty); if (isincomplete(d->ty)) { if (d->ty.t == TYARRAY) { diff --git a/ir/ir.c b/ir/ir.c index 3befc18..5bc94aa 100644 --- a/ir/ir.c +++ b/ir/ir.c @@ -147,7 +147,8 @@ mksymref(const char *s, bool isfunc) union ref mkdatref(const char *name, union type ctype, uint siz, uint align, const void *bytes, uint n, bool deref) { - struct irdat dat = { .ctype = ctype, .align = align, .siz = siz, .name = name, .section = Srodata }; + struct irdat dat = { .ctype = ctype, .align = align, .siz = siz, .name = name }; + dat.section = align >= 4 && align <= targ_primsizes[TYPTR] && siz <= 16 ? Stext : Srodata; assert(n <= siz && siz && align); if (!name) { @@ -155,14 +156,15 @@ mkdatref(const char *name, union type ctype, uint siz, uint align, const void *b char buf[32]; struct wbuf wbuf = MEMBUF(buf, sizeof buf); - bfmt(&wbuf, ".L.%d", dattab.n); + bfmt(&wbuf, ".L%c.%d", dat.section == Stext ? 'L' : 'D', dattab.n); ioputc(&wbuf, 0); assert(!wbuf.err); dat.name = name = intern(buf); } dat.off = objnewdat(name, dat.section, 0, siz, align); - memcpy(objout.rodata.p+dat.off, bytes, n); - memset(objout.rodata.p+dat.off+n, 0, siz - n); + uchar *p = (dat.section == Stext ? objout.textbegin : objout.rodata.p) + dat.off; + if (n) memcpy(p, bytes, n); + if (dat.section != Stext) memset(p+n, 0, siz - n); vpush(&dattab, dat); return mkref(RXCON, addcon(&(struct xcon){.isdat = 1, .deref = deref, .dat = dattab.n - 1})); } diff --git a/obj/elf.c b/obj/elf.c index e20da99..521a159 100644 --- a/obj/elf.c +++ b/obj/elf.c @@ -91,16 +91,23 @@ enum { BSS_SHNDX = 4, }; +static const char sect2ndx[] = { + [Snone] = SHN_UND, + [Stext] = TEXT_SHNDX, [Srodata] = RODATA_SHNDX, + [Sdata] = DATA_SHNDX, [Sbss] = BSS_SHNDX, +}, shndx2sect[] = { + [SHN_UND] = Snone, + [TEXT_SHNDX] = Stext, [RODATA_SHNDX] = Srodata, + [DATA_SHNDX] = Sdata, [BSS_SHNDX] = Sbss, +}; + enum section -elfhassym(const char *nam) +elfhassym(const char *nam, uint *value) { struct sym *sym = findsym(nam); - if (sym) switch (sym->shndx) { - case SHN_UND: return Snone; - case TEXT_SHNDX: return Stext; - case RODATA_SHNDX: return Srodata; - case DATA_SHNDX: return Sdata; - case BSS_SHNDX: return Sbss; + if (sym) { + if (value) *value = sym->value; + return shndx2sect[sym->shndx]; } return Snone; } @@ -115,13 +122,7 @@ elfaddsym(const char *nam, int info, enum section sect, uvlong value, uvlong siz } sym->bind = info >> 4; sym->type = info & 0xF; - switch (sect) { - case Snone: sym->shndx = SHN_UND; break; - case Stext: sym->shndx = TEXT_SHNDX; break; - case Srodata: sym->shndx = RODATA_SHNDX; break; - case Sdata: sym->shndx = DATA_SHNDX; break; - case Sbss: sym->shndx = BSS_SHNDX; break; - } + sym->shndx = sect2ndx[sect]; sym->value = value; sym->size = size; if (sym == &sym0) { diff --git a/obj/obj.c b/obj/obj.c index fa62e9e..2d8b497 100644 --- a/obj/obj.c +++ b/obj/obj.c @@ -6,7 +6,7 @@ void elfinit(void); -enum section elfhassym(const char *); +enum section elfhassym(const char *, uint *value); void elfaddsym(const char *, int info, enum section, uvlong value, uvlong size); void elfreloc(const char *sym, enum relockind, enum section, uint off, vlong addend); void elffini(struct wbuf *); @@ -40,38 +40,44 @@ objdeffunc(const char *nam, bool globl, uint off, uint siz) } enum section -objhassym(const char *name) +objhassym(const char *name, uint *off) { - return elfhassym(name); + return elfhassym(name, off); } uint objnewdat(const char *name, enum section sec, bool globl, uint siz, uint align) { + struct objfile *o = &objout; uint off; - assert(siz && align && ispo2(align)); - switch (sec) { default: assert(0); + case Stext: + assert(align <= targ_primsizes[TYPTR]); + assert(o->textend - siz > o->code); + while ((o->code - o->textbegin) & (align - 1)) ++o->code; + off = o->code - o->textbegin; + o->code += siz; + break; case Srodata: - if (align > objout.rodataalign) objout.rodataalign = align; - while (objout.rodata.n & (align - 1)) vpush(&objout.rodata, 0); - off = objout.rodata.n; - vresize(&objout.rodata, objout.rodata.n + siz); - memset(objout.rodata.p+off, 0, siz); + if (align > o->rodataalign) o->rodataalign = align; + while (o->rodata.n & (align - 1)) vpush(&o->rodata, 0); + off = o->rodata.n; + vresize(&o->rodata, o->rodata.n + siz); + memset(o->rodata.p+off, 0, siz); break; case Sdata: - if (align > objout.dataalign) objout.dataalign = align; - while (objout.data.n & (align - 1)) vpush(&objout.data, 0); - off = objout.data.n; - vresize(&objout.data, objout.data.n + siz); - memset(objout.data.p+off, 0, siz); + if (align > o->dataalign) o->dataalign = align; + while (o->data.n & (align - 1)) vpush(&o->data, 0); + off = o->data.n; + vresize(&o->data, o->data.n + siz); + memset(o->data.p+off, 0, siz); break; case Sbss: - if (align > objout.bssalign) objout.bssalign = align; - off = alignup(objout.nbss, align); - objout.nbss = off + siz; + if (align > o->bssalign) o->bssalign = align; + off = alignup(o->nbss, align); + o->nbss = off + siz; break; } diff --git a/obj/obj.h b/obj/obj.h index 8e9dea8..59769ff 100644 --- a/obj/obj.h +++ b/obj/obj.h @@ -23,7 +23,7 @@ enum section { Snone, Stext, Srodata, Sdata, Sbss }; void objini(const char *infile, const char *outfile); void objdeffunc(const char *nam, bool globl, uint off, uint siz); -enum section objhassym(const char *name); +enum section objhassym(const char *name, uint *off); uint objnewdat(const char *name, enum section, bool globl, uint siz, uint align); void objreloc(const char *sym, enum relockind, enum section, uint off, vlong addend); void objfini(void); diff --git a/x86_64/emit.c b/x86_64/emit.c index 2f7db11..6e832a4 100644 --- a/x86_64/emit.c +++ b/x86_64/emit.c @@ -118,8 +118,6 @@ mkimmdatregoper(union ref r) return ref2oper(r); } -static int rbpoff; - static struct oper mkmemoper(union ref r) { @@ -252,6 +250,7 @@ opermatch(enum operpat pat, struct oper oper) #define DS(S) D(S, sizeof S - 1) static bool usebp; /* use RBP? */ +static int rbpoff; static const char *curfnsym; static uchar *fnstart; @@ -333,21 +332,29 @@ encode(uchar **pcode, const struct desc *tab, int ntab, enum irclass k, struct o if (mem.cindex == NOINDEX) { /* %rip(var) */ static uchar offs[NOPERENC] = { [EN_MI8] = 1, [EN_MI16] = 2, [EN_MI32] = 4 }; - enum relockind r; - if ((!conht[mem.con].deref && ccopt.pic) || conht[mem.con].isfunc) - r = (rex ? REL_GOTPCRELX_REX : REL_GOTPCRELX); - else - r = REL_PCREL32; + uint addr; + int disp = mem.disp - 4 - offs[en->operenc]; + const char *sym = xcon2sym(mem.con); B(/*mod 0*/ (reg & 7) << 3 | RBP); - objreloc(xcon2sym(mem.con), r, Stext, *pcode - objout.textbegin, mem.disp - 4 - offs[en->operenc]); + if (objhassym(sym, &addr) == Stext) { + I32(addr - (*pcode - objout.textbegin) + disp); + } else { + enum relockind r; + if ((!conht[mem.con].deref && ccopt.pic) || conht[mem.con].isfunc) + r = (rex ? REL_GOTPCRELX_REX : REL_GOTPCRELX); + else + r = REL_PCREL32; + objreloc(xcon2sym(mem.con), r, Stext, *pcode - objout.textbegin, disp); + I32(0); + } } else { /* var(,%reg,shift) */ assert(!ccopt.pic && !ccopt.pie && "cannot encode [RIP-rel + REG] for position independent"); B(/*mod 0*/ (reg & 7) << 3 | RSP); B(mem.cshift << 6 | ((mem.cindex & 7) << 3) | RBP); /* SIB [index*s + disp32] */ objreloc(xcon2sym(mem.con), REL_ABS32S, Stext, *pcode - objout.textbegin, mem.disp); + I32(0); } - I32(0); } else { int mod; bool sib = 0; @@ -427,13 +434,15 @@ encode(uchar **pcode, const struct desc *tab, int ntab, enum irclass k, struct o D(opc, nopc); assert(dst.t == OSYM); const char *sym = xcon2sym(dst.con); - if (sym != curfnsym) { + uint addr; + if (sym == curfnsym) { + I32(fnstart - *pcode - 4); + } else if (objhassym(sym, &addr) == Stext) { + I32(addr - (*pcode - objout.textbegin) - 4); + } else { enum relockind r = (ccopt.pie|ccopt.pic) ? REL_PLT32 : REL_PCREL32; objreloc(sym, r, Stext, *pcode - objout.textbegin, -4); I32(0); - } else { - /* self-recursive call */ - I32(fnstart - *pcode - 4); } break; } @@ -864,7 +873,8 @@ gencopy(uchar **pcode, enum irclass cls, struct block *blk, int curi, struct ope } /* normal (not 2-address) case */ Lea: - if (isaddrcon(addr->base,0) && (ccopt.pic || conht[addr->base.i].isfunc)) { + if (isaddrcon(addr->base,0) && (ccopt.pic || conht[addr->base.i].isfunc) + && !objhassym(xcon2sym(addr->base.i), NULL)) { assert(!addr->disp && !addr->index.bits); val = addr->base; goto GOTLoad; @@ -874,12 +884,15 @@ gencopy(uchar **pcode, enum irclass cls, struct block *blk, int curi, struct ope /* dst = 0 -> xor dst, dst; but only if it is ok to clobber flags */ Xxor(pcode, kisint(cls) ? KI32 : cls, dst, dst); } else if (isaddrcon(val,0)) { - if (ccopt.pic || conht[val.i].isfunc) GOTLoad: + if ((ccopt.pic || conht[val.i].isfunc) + && !objhassym(xcon2sym(val.i), NULL)) { + GOTLoad: /* for mov reg, [rip(sym@GOTPCREL)] */ Xmov(pcode, cls, dst, mkoper(OSYM, .con = val.i, .cindex = NOINDEX)); - else + } else { /* for lea reg, [rip(sym)] */ Xlea(pcode, cls, dst, mkoper(OSYM, .con = val.i, .cindex = NOINDEX)); + } } else if (val.t == RXCON && in_range(concls(val), KI64, KPTR)) { /* movabs */ assert(dst.t == OREG && in_range(dst.reg, RAX, R15)); -- cgit v1.2.3