diff options
| author | 2025-12-14 12:15:59 +0100 | |
|---|---|---|
| committer | 2025-12-14 12:15:59 +0100 | |
| commit | d8b4e87af669c2b260686a5db67f7f02b4c164d9 (patch) | |
| tree | c46f03cc43462953dfb82df935bb7e2adcb098e4 | |
| parent | 59ca5a8db396e3e9c395793d49e1cab05d2d3261 (diff) | |
various relocation related optimization
With 59ca5a8db, querying if a symbol is defined is cheap. If we're
compiling code that calls foo() and we defined foo() in this compilation
unit, we already know its offset within the .text section, so use it
instead of emitting a relocation for the linker to handle. Also, put
small literal data in the .text section instead of .rodata. This seems
to improve performance (cache locality?), and as a bonus, it will be
good for aarch64's instr encoding with smallish PC-relative offsets.
| -rw-r--r-- | c/c.c | 6 | ||||
| -rw-r--r-- | ir/ir.c | 10 | ||||
| -rw-r--r-- | obj/elf.c | 29 | ||||
| -rw-r--r-- | obj/obj.c | 42 | ||||
| -rw-r--r-- | obj/obj.h | 2 | ||||
| -rw-r--r-- | x86_64/emit.c | 45 |
6 files changed, 78 insertions, 56 deletions
@@ -4308,8 +4308,8 @@ function(struct comp *cm, struct function *fn, const char **pnames, const struct if (!ifunc) ifunc = intern("__func__"); union type ty = mkarrtype(mktype(TYCHAR), QCONST, strlen(fn->name) + 1); const char *sym = mkhiddensym(fn->name, ifunc, 1); - uint off = objnewdat(sym, Srodata, 0, typesize(ty), typealign(ty)); - uchar *p = objout.rodata.p + off; + uint off = objnewdat(sym, Stext, 0, typesize(ty), typealign(ty)); + uchar *p = objout.textbegin + off; memcpy(p, fn->name, typearrlen(ty)-1); putdecl(cm, &(struct decl) { .ty = ty, .qual = QCONST, @@ -4418,7 +4418,7 @@ docomp(struct comp *cm) pdecl(&st, cm); } else if (decl.ty.t != TYFUNC && decl.scls != SCTYPEDEF && (decl.scls != SCEXTERN || noscls)) { /* tentative definitions */ - if (!objhassym(decl.sym)) { + if (!objhassym(decl.sym, NULL)) { uint size = typesize(d->ty); if (isincomplete(d->ty)) { if (d->ty.t == TYARRAY) { @@ -147,7 +147,8 @@ mksymref(const char *s, bool isfunc) union ref mkdatref(const char *name, union type ctype, uint siz, uint align, const void *bytes, uint n, bool deref) { - struct irdat dat = { .ctype = ctype, .align = align, .siz = siz, .name = name, .section = Srodata }; + struct irdat dat = { .ctype = ctype, .align = align, .siz = siz, .name = name }; + dat.section = align >= 4 && align <= targ_primsizes[TYPTR] && siz <= 16 ? Stext : Srodata; assert(n <= siz && siz && align); if (!name) { @@ -155,14 +156,15 @@ mkdatref(const char *name, union type ctype, uint siz, uint align, const void *b char buf[32]; struct wbuf wbuf = MEMBUF(buf, sizeof buf); - bfmt(&wbuf, ".L.%d", dattab.n); + bfmt(&wbuf, ".L%c.%d", dat.section == Stext ? 'L' : 'D', dattab.n); ioputc(&wbuf, 0); assert(!wbuf.err); dat.name = name = intern(buf); } dat.off = objnewdat(name, dat.section, 0, siz, align); - memcpy(objout.rodata.p+dat.off, bytes, n); - memset(objout.rodata.p+dat.off+n, 0, siz - n); + uchar *p = (dat.section == Stext ? objout.textbegin : objout.rodata.p) + dat.off; + if (n) memcpy(p, bytes, n); + if (dat.section != Stext) memset(p+n, 0, siz - n); vpush(&dattab, dat); return mkref(RXCON, addcon(&(struct xcon){.isdat = 1, .deref = deref, .dat = dattab.n - 1})); } @@ -91,16 +91,23 @@ enum { BSS_SHNDX = 4, }; +static const char sect2ndx[] = { + [Snone] = SHN_UND, + [Stext] = TEXT_SHNDX, [Srodata] = RODATA_SHNDX, + [Sdata] = DATA_SHNDX, [Sbss] = BSS_SHNDX, +}, shndx2sect[] = { + [SHN_UND] = Snone, + [TEXT_SHNDX] = Stext, [RODATA_SHNDX] = Srodata, + [DATA_SHNDX] = Sdata, [BSS_SHNDX] = Sbss, +}; + enum section -elfhassym(const char *nam) +elfhassym(const char *nam, uint *value) { struct sym *sym = findsym(nam); - if (sym) switch (sym->shndx) { - case SHN_UND: return Snone; - case TEXT_SHNDX: return Stext; - case RODATA_SHNDX: return Srodata; - case DATA_SHNDX: return Sdata; - case BSS_SHNDX: return Sbss; + if (sym) { + if (value) *value = sym->value; + return shndx2sect[sym->shndx]; } return Snone; } @@ -115,13 +122,7 @@ elfaddsym(const char *nam, int info, enum section sect, uvlong value, uvlong siz } sym->bind = info >> 4; sym->type = info & 0xF; - switch (sect) { - case Snone: sym->shndx = SHN_UND; break; - case Stext: sym->shndx = TEXT_SHNDX; break; - case Srodata: sym->shndx = RODATA_SHNDX; break; - case Sdata: sym->shndx = DATA_SHNDX; break; - case Sbss: sym->shndx = BSS_SHNDX; break; - } + sym->shndx = sect2ndx[sect]; sym->value = value; sym->size = size; if (sym == &sym0) { @@ -6,7 +6,7 @@ void elfinit(void); -enum section elfhassym(const char *); +enum section elfhassym(const char *, uint *value); void elfaddsym(const char *, int info, enum section, uvlong value, uvlong size); void elfreloc(const char *sym, enum relockind, enum section, uint off, vlong addend); void elffini(struct wbuf *); @@ -40,38 +40,44 @@ objdeffunc(const char *nam, bool globl, uint off, uint siz) } enum section -objhassym(const char *name) +objhassym(const char *name, uint *off) { - return elfhassym(name); + return elfhassym(name, off); } uint objnewdat(const char *name, enum section sec, bool globl, uint siz, uint align) { + struct objfile *o = &objout; uint off; - assert(siz && align && ispo2(align)); - switch (sec) { default: assert(0); + case Stext: + assert(align <= targ_primsizes[TYPTR]); + assert(o->textend - siz > o->code); + while ((o->code - o->textbegin) & (align - 1)) ++o->code; + off = o->code - o->textbegin; + o->code += siz; + break; case Srodata: - if (align > objout.rodataalign) objout.rodataalign = align; - while (objout.rodata.n & (align - 1)) vpush(&objout.rodata, 0); - off = objout.rodata.n; - vresize(&objout.rodata, objout.rodata.n + siz); - memset(objout.rodata.p+off, 0, siz); + if (align > o->rodataalign) o->rodataalign = align; + while (o->rodata.n & (align - 1)) vpush(&o->rodata, 0); + off = o->rodata.n; + vresize(&o->rodata, o->rodata.n + siz); + memset(o->rodata.p+off, 0, siz); break; case Sdata: - if (align > objout.dataalign) objout.dataalign = align; - while (objout.data.n & (align - 1)) vpush(&objout.data, 0); - off = objout.data.n; - vresize(&objout.data, objout.data.n + siz); - memset(objout.data.p+off, 0, siz); + if (align > o->dataalign) o->dataalign = align; + while (o->data.n & (align - 1)) vpush(&o->data, 0); + off = o->data.n; + vresize(&o->data, o->data.n + siz); + memset(o->data.p+off, 0, siz); break; case Sbss: - if (align > objout.bssalign) objout.bssalign = align; - off = alignup(objout.nbss, align); - objout.nbss = off + siz; + if (align > o->bssalign) o->bssalign = align; + off = alignup(o->nbss, align); + o->nbss = off + siz; break; } @@ -23,7 +23,7 @@ enum section { Snone, Stext, Srodata, Sdata, Sbss }; void objini(const char *infile, const char *outfile); void objdeffunc(const char *nam, bool globl, uint off, uint siz); -enum section objhassym(const char *name); +enum section objhassym(const char *name, uint *off); uint objnewdat(const char *name, enum section, bool globl, uint siz, uint align); void objreloc(const char *sym, enum relockind, enum section, uint off, vlong addend); void objfini(void); diff --git a/x86_64/emit.c b/x86_64/emit.c index 2f7db11..6e832a4 100644 --- a/x86_64/emit.c +++ b/x86_64/emit.c @@ -118,8 +118,6 @@ mkimmdatregoper(union ref r) return ref2oper(r); } -static int rbpoff; - static struct oper mkmemoper(union ref r) { @@ -252,6 +250,7 @@ opermatch(enum operpat pat, struct oper oper) #define DS(S) D(S, sizeof S - 1) static bool usebp; /* use RBP? */ +static int rbpoff; static const char *curfnsym; static uchar *fnstart; @@ -333,21 +332,29 @@ encode(uchar **pcode, const struct desc *tab, int ntab, enum irclass k, struct o if (mem.cindex == NOINDEX) { /* %rip(var) */ static uchar offs[NOPERENC] = { [EN_MI8] = 1, [EN_MI16] = 2, [EN_MI32] = 4 }; - enum relockind r; - if ((!conht[mem.con].deref && ccopt.pic) || conht[mem.con].isfunc) - r = (rex ? REL_GOTPCRELX_REX : REL_GOTPCRELX); - else - r = REL_PCREL32; + uint addr; + int disp = mem.disp - 4 - offs[en->operenc]; + const char *sym = xcon2sym(mem.con); B(/*mod 0*/ (reg & 7) << 3 | RBP); - objreloc(xcon2sym(mem.con), r, Stext, *pcode - objout.textbegin, mem.disp - 4 - offs[en->operenc]); + if (objhassym(sym, &addr) == Stext) { + I32(addr - (*pcode - objout.textbegin) + disp); + } else { + enum relockind r; + if ((!conht[mem.con].deref && ccopt.pic) || conht[mem.con].isfunc) + r = (rex ? REL_GOTPCRELX_REX : REL_GOTPCRELX); + else + r = REL_PCREL32; + objreloc(xcon2sym(mem.con), r, Stext, *pcode - objout.textbegin, disp); + I32(0); + } } else { /* var(,%reg,shift) */ assert(!ccopt.pic && !ccopt.pie && "cannot encode [RIP-rel + REG] for position independent"); B(/*mod 0*/ (reg & 7) << 3 | RSP); B(mem.cshift << 6 | ((mem.cindex & 7) << 3) | RBP); /* SIB [index*s + disp32] */ objreloc(xcon2sym(mem.con), REL_ABS32S, Stext, *pcode - objout.textbegin, mem.disp); + I32(0); } - I32(0); } else { int mod; bool sib = 0; @@ -427,13 +434,15 @@ encode(uchar **pcode, const struct desc *tab, int ntab, enum irclass k, struct o D(opc, nopc); assert(dst.t == OSYM); const char *sym = xcon2sym(dst.con); - if (sym != curfnsym) { + uint addr; + if (sym == curfnsym) { + I32(fnstart - *pcode - 4); + } else if (objhassym(sym, &addr) == Stext) { + I32(addr - (*pcode - objout.textbegin) - 4); + } else { enum relockind r = (ccopt.pie|ccopt.pic) ? REL_PLT32 : REL_PCREL32; objreloc(sym, r, Stext, *pcode - objout.textbegin, -4); I32(0); - } else { - /* self-recursive call */ - I32(fnstart - *pcode - 4); } break; } @@ -864,7 +873,8 @@ gencopy(uchar **pcode, enum irclass cls, struct block *blk, int curi, struct ope } /* normal (not 2-address) case */ Lea: - if (isaddrcon(addr->base,0) && (ccopt.pic || conht[addr->base.i].isfunc)) { + if (isaddrcon(addr->base,0) && (ccopt.pic || conht[addr->base.i].isfunc) + && !objhassym(xcon2sym(addr->base.i), NULL)) { assert(!addr->disp && !addr->index.bits); val = addr->base; goto GOTLoad; @@ -874,12 +884,15 @@ gencopy(uchar **pcode, enum irclass cls, struct block *blk, int curi, struct ope /* dst = 0 -> xor dst, dst; but only if it is ok to clobber flags */ Xxor(pcode, kisint(cls) ? KI32 : cls, dst, dst); } else if (isaddrcon(val,0)) { - if (ccopt.pic || conht[val.i].isfunc) GOTLoad: + if ((ccopt.pic || conht[val.i].isfunc) + && !objhassym(xcon2sym(val.i), NULL)) { + GOTLoad: /* for mov reg, [rip(sym@GOTPCREL)] */ Xmov(pcode, cls, dst, mkoper(OSYM, .con = val.i, .cindex = NOINDEX)); - else + } else { /* for lea reg, [rip(sym)] */ Xlea(pcode, cls, dst, mkoper(OSYM, .con = val.i, .cindex = NOINDEX)); + } } else if (val.t == RXCON && in_range(concls(val), KI64, KPTR)) { /* movabs */ assert(dst.t == OREG && in_range(dst.reg, RAX, R15)); |