From a8d6f8bf30c07edb775e56889f568ca20240bedf Mon Sep 17 00:00:00 2001 From: lemon Date: Tue, 17 Mar 2026 13:22:00 +0100 Subject: REFACTOR: move sources to src/ --- c/lex.c | 2496 --------------------------------------------------------------- 1 file changed, 2496 deletions(-) delete mode 100644 c/lex.c (limited to 'c/lex.c') diff --git a/c/lex.c b/c/lex.c deleted file mode 100644 index c196a21..0000000 --- a/c/lex.c +++ /dev/null @@ -1,2496 +0,0 @@ -#include "lex.h" -#include "../version.h" -#include -#include - -/* fill internal circular character buffer with input after translation phase 1 & 2 - * (trigraph substitution and backslash-newline deletion */ -static void -fillchrbuf(struct lexer *lx) -{ - const uchar *p = lx->dat + lx->idx; - int i = lx->chrbuf0, idx = lx->idx; - int rem = countof(lx->chrbuf) - i; - assert(rem >= 0); - if (rem > 0) { - memmove(lx->chrbuf, lx->chrbuf+i, rem * sizeof *lx->chrbuf); - memmove(lx->chridxbuf, lx->chridxbuf+i, rem * sizeof *lx->chridxbuf); - } - lx->chrbuf0 = 0; - i = rem; - - for (; i < countof(lx->chrbuf); ++i) { - uchar c; - /* skip backslash-newline* */ - for (;;) { - if (p[0] == '\\') { - if (p[1] == '\n') { - idx += 2; - p += 2; - } else if (p[1] == '\r' && p[2] == '\n') { - idx += 3; - p += 3; - } else break; - } else if (ccopt.trigraph && !memcmp(p, "\?\?/\n", 4)) { - idx += 4; - p += 4; - } else if (ccopt.trigraph && !memcmp(p, "\?\?/\r\n", 5)) { - idx += 5; - p += 5; - } else break; - addfileline(lx->fileid, idx); - } - - if (idx >= lx->ndat) { - c = 0; - } else if (ccopt.trigraph && ((p[0] == '?') & (p[1] == '?'))) { - switch (p[2]) { - case '=': c = '#'; break; - case '(': c = '['; break; - case ')': c = ']'; break; - case '!': c = '|'; break; - case '<': c = '{'; break; - case '>': c = '}'; break; - case '-': c = '~'; break; - case '/': c = '\\'; break; - case '\'': c = '^'; break; - default: goto NoTrigraph; - } - p += 3; - idx += 3; - } else { - NoTrigraph: - ++idx; - if ((c = *p++) == '\n') - addfileline(lx->fileid, idx); - } - lx->chrbuf[i] = c; - lx->chridxbuf[i] = idx; - } - lx->idx = idx; -} - -static uchar -next(struct lexer *lx) -{ - if (lx->chrbuf0 >= countof(lx->chrbuf)) - fillchrbuf(lx); - lx->chridx = lx->chridxbuf[lx->chrbuf0]; - uchar c = lx->chrbuf[lx->chrbuf0]; - lx->eof = lx->chridx >= lx->ndat; - ++lx->chrbuf0; - return c; -} - -static uchar -peek(struct lexer *lx, int off) -{ - assert(off < countof(lx->chrbuf)); - if (lx->chrbuf0 + off >= countof(lx->chrbuf)) - fillchrbuf(lx); - return lx->chrbuf[lx->chrbuf0 + off]; -} - -static bool -match(struct lexer *lx, uchar c) -{ - if (!lx->eof && peek(lx, 0) == c) { - next(lx); - return 1; - } - return 0; -} - -static bool -aissep(int c) { - static const bool tab[] = { - ['('] = 1, [')'] = 1, ['['] = 1, [']'] = 1, - ['{'] = 1, ['}'] = 1, ['.'] = 1, [','] = 1, - [';'] = 1, ['?'] = 1, ['+'] = 1, ['-'] = 1, - ['*'] = 1, ['/'] = 1, ['&'] = 1, ['|'] = 1, - ['^'] = 1, ['~'] = 1, ['='] = 1, ['\''] = 1, - ['"'] = 1, ['<'] = 1, ['>'] = 1, [':'] = 1, - ['@'] = 1, ['#'] = 1, ['%'] = 1, ['\\'] = 1, - ['`'] = 1, ['!'] = 1, - }; - if (!aisprint(c) || aisspace(c)) - return 1; - return (uint)c < sizeof(tab) && tab[c]; -} - -enum typetag -parsenumlit(uvlong *outi, double *outf, const struct token *tk, bool ispp) -{ - if (tk->t == TKCHRLIT) { - uvlong n = 0; - if (!tk->wide) { - for (int i = 0; i < tk->len; ++i) - n = n << 8 | (uchar)tk->s[i]; - } else if (tk->wide == 1) { - n = tk->ws16[0]; - } else { - assert(tk->wide == 2); - n = tk->ws32[0]; - } - if (outi) *outi = n; - return TYINT; - } else if (memchr(tk->s, '.', tk->len)) { - extern double strtod(const char *, char **); - double f; - char buf[80], *suffix; - Float: /* float literal */ - assert(tk->len < sizeof buf - 1 && "numlit too big"); - memcpy(buf, tk->s, tk->len); - buf[tk->len] = 0; - f = strtod(buf, &suffix); - if (suffix == buf) - return 0; - if (!*suffix) { - if (outf) *outf = f; - return TYDOUBLE; - } else if ((suffix[0]|0x20) == 'f' && !suffix[1]) { - if (outf) *outf = f; - return TYFLOAT; - } else if ((suffix[0]|0x20) == 'l' && !suffix[1]) { - if (outf) *outf = f; - return TYLDOUBLE; - } - return 0; - } else { /* int literal */ - static uvlong max4typ[TYUVLONG-TYINT+1]; - uvlong n = 0; - int base = 10, nsx; - bool dec, u = 0, longlongok = ccopt.cstd >= STDC99 || !ccopt.pedant; - enum typetag ty = 0; - const char *sx; /*suffix*/ - char c; - - if (!max4typ[0]) - for (ty = TYINT; ty <= TYUVLONG; ++ty) - max4typ[ty-TYINT] = ((1ull << (8*targ_primsizes[ty]-1))-1) << isunsignedt(ty) | 1; - - sx = tk->s; - if (tk->len > 2 && sx[0] == '0') { - if ((sx[1]|32) == 'x') sx += 2, base = 16; /* 0x.. */ - else if ((sx[1]|32) == 'b') sx += 2, base = 2; /* 0b.. */ - else base = 8; /* 0.. */ - } - for (; sx < tk->s + tk->len; ++sx) { - if (base < 16) { - if (!in_range(c = *sx, '0', '0'+base-1)) break; - n = n*base + c - '0'; - } else { - if (in_range(c = *sx, '0', '9')) n = n*base + c - '0'; - else if (in_range(c|32, 'a', 'f')) n = n*base + 0xa + (c|32) - 'a'; - else break; - } - } - dec = base == 10; - nsx = tk->len - (sx - tk->s); - - if (nsx == 0) /* '' */ {} - else if ((sx[0]|32) == 'u') { - u = 1; - if (nsx == 1) /* 'u' */ {} - else if ((sx[1]|32) == 'l') { - if (nsx == 2) /* 'ul' */ goto L; - if (sx[1] == sx[2] && nsx == 3) /* 'ull' */ goto LL; - return 0; - } else return 0; - } else if ((sx[0]|32) == 'l') { - if (nsx == 1) /* 'l' */ goto L; - if ((sx[1]|32) == 'u' && nsx == 2) /* 'lu' */ { u=1; goto L; } - if (sx[1] == sx[0]) { - if (nsx == 2) /* 'll' */ goto LL; - if ((sx[2]|32) == 'u' && nsx == 3) /* 'llu' */ { u=1; goto LL; } - } - return 0; - } else if ((sx[0]|32) == 'e' || (sx[0]|32) == 'p') - goto Float; - else return 0; - -#define I(T) if (n <= max4typ[T - TYINT]) { ty = T; goto Ok; } - I(TYINT) - if (u || !dec) I(TYUINT) - L: - I(TYLONG) - if (u || !dec || !longlongok) I(TYULONG) - if (longlongok) { - LL: - I(TYVLONG) - if (u || !dec) I(TYUVLONG) - } - if (ispp) { ty = TYUVLONG; goto Ok; } -#undef I - /* too big */ - if (outi) *outi = n; - return 0; - Ok: - if (u && issignedt(ty)) ++ty; /* make unsigned */ - if (outi) *outi = n; - if (ispp) { - if (u) return TYUVLONG; - else if (n <= max4typ[TYVLONG-TYINT]) return TYVLONG; - } - if (ty >= TYVLONG && !longlongok) - warn(&tk->span, "'long long' in %M is an extension"); - return ty; - } -} - -static void -readstrchrlit(struct lexer *lx, struct token *tk, char delim, int wide) -{ - int c, i; - uchar tmp[200]; - vec_of(uchar) b = VINIT(tmp, sizeof tmp); - struct span span = {0}; - uint n, beginoff, idx; - beginoff = idx = lx->chridx; - - while ((c = next(lx)) != delim) { - static uint wmax[] = {0xFF, 0xFFFF, 0xFFFFFFFFu}; - if (c == '\n' || c == TKEOF) { - Noterm: - span.sl = (struct span0) { idx, lx->chridx - idx, lx->fileid }; - error(&span, "missing terminating %c character", delim); - break; - } else if (c == '\\') { - span.sl = (struct span0) { idx, lx->chridx - idx, lx->fileid }; - switch (c = next(lx)) { - case '\n': case TKEOF: - goto Noterm; - case '\'': c = '\''; break; - case '\\': c = '\\'; break; - case '"': c = '"'; break; - case '?': c = '?'; break; - case 'a': c = '\a'; break; - case 'b': c = '\b'; break; - case 'f': c = '\f'; break; - case 'n': c = '\n'; break; - case 'r': c = '\r'; break; - case 't': c = '\t'; break; - case 'v': c = '\v'; break; - case 'x': case 'X': /* hex */ - n = 0; - if (!aisxdigit(peek(lx, 0))) goto Badescseq; - do { - c = next(lx); - if (c-'0' < 10) n = n<<4 | (c-'0'); - else n = n<<4 | (10 + (c|0x20)-'a'); - } while (aisxdigit(peek(lx, 0))); - if (n > wmax[wide]) { - span.sl.len = lx->chridx - span.sl.off; - error(&span, "hex escape sequence out of range"); - } - c = n; - break; - default: - if (aisodigit(c)) { /* octal */ - n = c-'0'; - for (i = 2; i--;) { - if (!aisodigit(peek(lx, 0))) break; - n = n<<3 | ((c = next(lx))-'0'); - } - if (n > wmax[wide]) { - span.sl.len = lx->chridx - span.sl.off; - error(&span, "octal escape sequence out of range"); - } - c = n; - break; - } - Badescseq: - span.sl.len = lx->chridx - span.sl.off; - error(&span, "invalid escape sequence"); - } - } - if (!wide || c <= 0xFF) { - vpush(&b, c); - } else { - /* XXX this doesn't work for non-utf sequences, UTF-16 surrogates, etc - * the source utf8 -> utf16/32 conversion should be done on the fly, then - * these can also be appended directly, rather than doing the conversion at the end */ - char p[4]; - int n = utf8enc(p, c); - vpushn(&b, p, n); - } - idx = lx->chridx;; - } - if (delim == '"') { - tk->t = TKSTRLIT; - tk->len = b.n; - if ((tk->wide = wide)) { - tk->litlit = 0; - if (wide == 1) - tk->ws16 = utf8to16(&tk->len, lx->tmparena, b.p, b.n); - else - tk->ws32 = utf8to32(&tk->len, lx->tmparena, b.p, b.n); - } else if (lx->chridx - beginoff == tk->len + 1) { - tk->litlit = 1; - tk->s = (char *)&lx->dat[beginoff]; - } else { - tk->litlit = 0; - vpush(&b, 0); - tk->s = alloccopy(lx->tmparena, b.p, b.n, 1); - } - } else { - if (b.n == 0) { - span.sl = (struct span0) { idx, lx->chridx - idx, lx->fileid }; - error(&span, "empty character literal"); - } else if (b.n > targ_primsizes[TYINT]) { - span.sl = (struct span0) { idx, lx->chridx - idx, lx->fileid }; - error(&span, "multicharacter literal too long"); - } - tk->t = TKCHRLIT; - tk->len = b.n; - if ((tk->wide = wide)) { - tk->litlit = 0; - if (wide == 1) - tk->ws16 = utf8to16(&tk->len, lx->tmparena, b.p, b.n); - else - tk->ws32 = utf8to32(&tk->len, lx->tmparena, b.p, b.n); - } else if (lx->chridx - beginoff == tk->len + 1) { - tk->litlit = 1; - tk->s = (char *)&lx->dat[beginoff]; - } else { - tk->litlit = 0; - tk->s = alloccopy(lx->tmparena, b.p, tk->len, 1); - } - } - vfree(&b); -} - -/* for #include directive, read "header" or
*/ -static void -readheadername(struct lexer *lx, struct token *tk, char delim) -{ - int c; - uchar tmp[200]; - vec_of(uchar) b = VINIT(tmp, sizeof tmp); - struct span span = {0}; - uint beginoff, idx; - beginoff = idx = lx->chridx; - - while ((c = next(lx)) != delim) { - if (c == '\n' || lx->eof) { - span.sl = (struct span0) { idx, lx->chridx - idx, lx->fileid }; - error(&span, "missing terminating %c character", delim); - break; - } - vpush(&b, c); - idx = lx->chridx;; - } - tk->t = delim == '"' ? TKPPHDRQ : TKPPHDRH; - tk->len = b.n; - if (lx->chridx - beginoff == tk->len + 1) { - tk->litlit = 1; - tk->s = (char *)&lx->dat[beginoff]; - } else { - tk->litlit = 0; - vpush(&b, 0); - tk->s = alloccopy(lx->tmparena, b.p, b.n, 1); - } - vfree(&b); -} - -/* matches " | | '.' | ([eEpP][+-])" */ -static bool -isppnum(char prev, char c) -{ - if (!aissep(c) || c == '.') - return 1; - if (c == '+' || c == '-') - return (prev|0x20) == 'e' || (prev|0x20) == 'p'; - return 0; -} - -enum { MAXLITLEN = 256 }; /* maximum length of num literals and identifiers */ -static int -lex0(struct lexer *lx, struct token *tk, bool includeheader) -{ - int idx,q; - bool space = 0; -Begin: - idx = lx->chridx; - if (lx->chrbuf0+4 >= countof(lx->chrbuf)) - fillchrbuf(lx); - lx->chridx = lx->chridxbuf[lx->chrbuf0]; - uchar *p = &lx->chrbuf[lx->chrbuf0++], - c = p[0]; - switch (c) { - -#define RET(t_) do { tk->t = (t_); goto End; } while (0) -#define TK2(c2,t) if (p[1] == c2) { \ - lx->chridx = lx->chridxbuf[lx->chrbuf0]; \ - ++lx->chrbuf0; \ - RET(t); \ - } -#define TK3(c2,c3,t) if (p[1] == c2 && p[2] == c3) { \ - lx->chridx = lx->chridxbuf[++lx->chrbuf0]; \ - ++lx->chrbuf0; \ - RET(t); \ - } - - case ' ': case '\t': case '\f': case '\v': case '\r': - space = 1; - goto Begin; - break; - case '(': case ')': case ',': case ':': - case ';': case '?': case '[': case ']': - case '{': case '}': case '~': case '$': - case '@': case '`': case '\\': case '\n': - RET(c); - case '!': - TK2('=', TKNEQ); - RET(c); - case '#': - TK2('#', TKPPCAT); - RET(c); - case '+': - TK2('+', TKINC); - TK2('=', TKSETADD); - RET(c); - case '-': - TK2('-', TKDEC); - TK2('=', TKSETSUB); - TK2('>', TKARROW); - RET(c); - case '*': - TK2('=', TKSETMUL); - RET(c); - case '/': - TK2('=', TKSETDIV); - if (match(lx, '/')) { - /* // single line comment */ - for (;;) { - do { - if (lx->chrbuf[lx->chrbuf0] == '\n') { - lx->chridx = lx->chridxbuf[lx->chrbuf0++]; - lx->eof = lx->chridx >= lx->ndat; - RET('\n'); - } else if (lx->eof) RET(TKEOF); - } while (++lx->chrbuf0 < countof(lx->chrbuf)); - fillchrbuf(lx); - lx->chridx = lx->chridxbuf[lx->chrbuf0]; - lx->eof = lx->chridx >= lx->ndat; - } - } else if (match(lx, '*')) { - // /* multi line comment */ - if (lx->chrbuf0+1 >= countof(lx->chrbuf)) fillchrbuf(lx); - for (;;) { - do { - if (lx->chrbuf[lx->chrbuf0] == '*' && lx->chrbuf[lx->chrbuf0+1] == '/') { - lx->chridx = lx->chridxbuf[lx->chrbuf0+1]; - lx->chrbuf0 += 2; - lx->eof = lx->chridx >= lx->ndat; - space = 1; - goto Begin; - } - } while (++lx->chrbuf0+1 < countof(lx->chrbuf)); - fillchrbuf(lx); - lx->chridx = lx->chridxbuf[lx->chrbuf0]; - if ((lx->eof = (lx->chridx >= lx->ndat))) { - struct span span = {{ idx, lx->chridx - idx, lx->fileid }}; - fatal(&span, "unterminated comment"); - } - } - } - RET(c); - case '%': - TK2('=', TKSETREM); - RET(c); - case '^': - TK2('=', TKSETXOR); - RET(c); - case '=': - TK2('=', TKEQU); - RET(c); - case '<': - if (includeheader) { - readheadername(lx, tk, '>'); - goto End; - } - TK2('=', TKLTE); - TK3('<','=', TKSETSHL) - TK2('<', TKSHL); - RET(c); - case '>': - TK2('=', TKGTE); - TK3('>','=', TKSETSHR) - TK2('>', TKSHR); - RET(c); - case '&': - TK2('&', TKLOGAND); - TK2('=', TKSETAND); - RET(c); - case '|': - TK2('|', TKLOGIOR); - TK2('=', TKSETIOR); - RET(c); - case '"': - if (includeheader) { - readheadername(lx, tk, '"'); - } else { - case '\'': - tk->wideuni = 0; - readstrchrlit(lx, tk, c, 0); - } - goto End; - case '.': - TK3('.','.',TKDOTS) - if (aisdigit(p[1])) goto Numlit; - RET(c); - case 'L': - if (match(lx, (q = '\'')) || match(lx, (q = '"'))) { - tk->wideuni = 0; - readstrchrlit(lx, tk, q, /* wide */ targ_primsizes[targ_wchartype] == 2 ? 1 : 2); - goto End; - } - /* fallthru */ - default: - if (aisdigit(c)) Numlit: { - --lx->chrbuf0; - if (lx->chrbuf0 + MAXLITLEN >= countof(lx->chrbuf)) - fillchrbuf(lx); - int n = 1; - uchar *p = &lx->chrbuf[lx->chrbuf0]; - for (; isppnum(p[n-1], p[n]); ++n) { - if (n >= MAXLITLEN) { - lx->chridx = lx->chridxbuf[lx->chrbuf0+n-1]; - TooLong: - fatal(&(struct span) {{ idx, lx->chridx - idx, lx->fileid }}, - "token is too long"); - } - } - tk->len = n; - lx->chridx = lx->chridxbuf[(lx->chrbuf0 += n) - 1]; - if (n == lx->chridx - idx) { - tk->litlit = 1; - tk->s = (char *)&lx->dat[idx]; - } else { - tk->litlit = 0; - tk->s = alloccopy(lx->tmparena, p, n, 1); - } - RET(TKNUMLIT); - } else if (c == '_' || aisalpha(c)) { - --lx->chrbuf0; - if (lx->chrbuf0 + MAXLITLEN >= countof(lx->chrbuf)) - fillchrbuf(lx); - uchar *p = &lx->chrbuf[lx->chrbuf0]; - int n = 1; - for (; !aissep(p[n]); ++n) { - if (n >= MAXLITLEN) { - lx->chridx = lx->chridxbuf[lx->chrbuf0+n-1]; - goto TooLong; - } - } - tk->blue = 0; - tk->len = n; - tk->name = intern_((char *)p, n); - lx->chridx = lx->chridxbuf[(lx->chrbuf0 += n) - 1]; - RET(TKIDENT); - } - /* fallthru */ - case 0: if (lx->idx >= lx->ndat) RET(TKEOF); -#undef TK2 - } - fatal(&(struct span) {{ idx, lx->chridx - idx, lx->fileid }}, - "unexpected character %'c at %d (%d)", c, idx, lx->idx); -End: - tk->space = space; - tk->span.sl.file = lx->fileid; - tk->span.sl.off = idx; - tk->span.sl.len = lx->chridx - idx; - tk->span.ex = tk->span.sl; - return tk->t; -#undef RET -} - -/****************/ -/* PREPROCESSOR */ -/****************/ - -static bool -tokequ(const struct token *a, const struct token *b) -{ - if (a->t != b->t) return 0; - if (a->t == TKNUMLIT || a->t == TKSTRLIT || a->t == TKCHRLIT) { - if (a->len != b->len) return 0; - return !memcmp(a->s, b->s, a->len); - } else if (a->t == TKIDENT) { - return a->name == b->name; - } else if (a->t == TKPPMACARG || a->t == TKPPMACSTR) { - return a->argidx == b->argidx; - } - return 1; -} - -static vec_of(struct token) mtoksbuf, /* buffers for macro replacement list tokens */ - mdyntoksbuf; /* for function-like macros after parameter substitution */ - -struct macro { - internstr *param; - struct span0 span; - uchar nparam; - bool predef : 1, - special : 1, - fnlike : 1, - variadic : 1; - short id; - union { - void (*handler)(struct lexer *, struct token *); - struct rlist { - uint off; /* mtoksbuf[] */ - int n; - } rl; - const struct token *single; /* predef */ - void (*handlerfn)(struct lexer *, struct token *ret, const struct token *arg, int narg); - }; -}; - -static bool -macroequ(const struct macro *a, const struct macro *b) -{ - if (a->special != b->special) return 0; - if (a->fnlike != b->fnlike || a->variadic != b->variadic) return 0; - if (a->fnlike) { - if (a->nparam != b->nparam) return 0; - for (int i = 0; i < a->nparam; ++i) - if (a->param[i] != b->param[i]) - return 0; - } - if (a->special) return a->handler == b->handler; - if (a->rl.n != b->rl.n) return 0; - const struct token *tka = &mtoksbuf.p[a->rl.off], *tkb = &mtoksbuf.p[b->rl.off]; - for (int i = 0; i < a->rl.n; ++i) { - if (!tokequ(&tka[i], &tkb[i])) - return 0; - if (i > 0 && tka[i].space != tkb[i].space) - return 0; - } - return 1; -} - -static void -freemac(struct macro *mac) -{ - if (mac->special) return; - free(mac->param); -} - -static pmap_of(struct macro) macroht; - -static void -putmac(internstr name, struct macro *mac) -{ - static short id; - if (!macroht.v) pmap_init(¯oht, 1<<10); - struct macro *slot = pmap_get(¯oht, name); - mac->id = id++; - if (slot) { - if (!macroequ(slot, mac)) { - if (slot->predef) - warn(&(struct span){mac->span}, "redefining builtin macro"); - else { - warn(&(struct span){mac->span}, "redefining macro"); - note(&(struct span){slot->span}, "previous definition:"); - } - freemac(slot); - *slot = *mac; - } else { - freemac(mac); - } - } else { - pmap_set(¯oht, name, *mac); - } -} - -static void -delmac(internstr name) -{ - struct macro *slot = pmap_get(¯oht, name); - if (!slot) return; - freemac(slot); - pmap_del(¯oht, name); -} - -static inline internstr -macname(struct macro *mac) -{ - return macroht.mb.k[mac - macroht.v]; -} - -static inline struct macro * -findmac(internstr name) -{ - return pmap_get(¯oht, name); -} - -static void popmac(struct lexer *, bool all); - -static struct macrostack { - struct { - union { - uint off; /* mtoksbuf[]/mdyntoksbuf[] */ - const struct token *p; - }; - int n; - } rl; - struct span0 exspan; - int idx; - short macid; /* -1 for argument undergoing expansion */ - bool space : 1, stop : 1, dyn; -} mstk[1200]; - -static void NORETURN -lxfatal(struct lexer *lx, const struct span *span, const char *fmt, ...) -{ - if (fmt) { - va_list ap; - va_start(ap, fmt); - vdiag(span, DGERROR, fmt, ap); - va_end(ap); - } - int n = lx->macstk ? lx->macstk - mstk : 0, i = 0; - for (struct macrostack *l = lx->macstk; l && l > mstk; --l, ++i) { - if (i < 4 || i > n - 5) { - note(&(struct span){l->exspan}, "expanded from here"); - } else if (i == 5) { - efmt(" (...) \n"); - } - } - for (struct lexer *sv = lx->save; sv; sv = sv->save) { - int line; - const char *f = getfilepos(&line, NULL, sv->fileid, sv->chridx-2); - note(NULL, "in file included from %s:%d", f, line); - } - if (!fmt || span) efmt("Aborting due to previous error.\n"); - exit(1); -} - -static void -ppskipline(struct lexer *lx) -{ - while (lx->macstk) popmac(lx, 1); - for (int c; (c = peek(lx, 0)) != '\n' && !lx->eof; next(lx)) { - if (c == '/' && peek(lx, 1) == '*') { /* comment */ - next(lx), next(lx); - bool done = 0; - while (!((c = peek(lx, 0)) == '*' && peek(lx, 1) == '/')) { - if (lx->eof) { - struct span span = {{ lx->idx, lx->chridx - lx->idx, lx->fileid }}; - lxfatal(lx, &span, "unterminated comment"); - } - done = c == '\n'; - next(lx); - } - next(lx); - if (done) return; - } - } -} - -#define isppident(tk) in_range((tk).t, TKIDENT, TKWEND_) - -static bool -tokpaste(struct lexer *lx, struct token *dst, const struct token *l, const struct token *r) -{ - int t; - if (isppident(*l) && (isppident(*r) || r->t == TKNUMLIT)) { - /* foo ## bar ; foo ## 123 */ - t = TKIDENT; - } else if (l->t == TKNUMLIT && (isppident(*r) || r->t == TKNUMLIT)) { - /* 0x ## abc ; 213 ## 456 */ - t = TKNUMLIT; - } else if (l->t && !r->t) { - if (dst) *dst = *l; - return 1; - } else if (!l->t && r->t) { - if (dst) *dst = *r; - return 1; - } else { - static const struct { char s[2]; char t; } tab[] = { - {"==", TKEQU}, {"!=", TKNEQ}, {"<=", TKLTE}, {">=", TKGTE}, - {">>", TKSHR}, {"<<", TKSHL}, {"++", TKINC}, {"--", TKDEC}, - {"->", TKARROW}, {"##", TKPPCAT}, {"&&", TKLOGAND}, {"||", TKLOGIOR}, - {"+=", TKSETADD}, {"-=", TKSETSUB}, {"*=", TKSETMUL}, {"/=", TKSETDIV}, - {"%=", TKSETREM}, {"|=", TKSETIOR}, {"^=", TKSETXOR}, {"&=", TKSETAND}, - {{TKSHL,'='}, TKSETSHL}, {{TKSHR,'='}, TKSETSHR} - }; - for (int i = 0; i < countof(tab); ++i) { - if (tab[i].s[0] == l->t && tab[i].s[1] == r->t) { - if (dst) dst->t = tab[i].t; - return 1; - } - } - - if (dst) { - error(&l->span, "pasting %'tk and %'tk does not form a valid preprocessing token", l, r); - note(&r->span, "right-hand side"); - } - return 0; - } - - if (!dst) return 1; - char buf[200]; - memset(dst, 0, sizeof *dst); - dst->span = l->span; - if (dst->span.ex.file == r->span.ex.file && dst->span.ex.off < r->span.ex.off) - joinspan(&dst->span.ex, r->span.ex); - dst->t = t; - dst->len = l->len + r->len; - char *s = (isppident(*dst) && dst->len + 1 < sizeof buf) ? buf : alloc(lx->tmparena, dst->len + 1, 1); - memcpy(s, l->s, l->len); - memcpy(s + l->len, r->s, r->len); - s[dst->len] = 0; - dst->space = l->space; - if (isppident(*dst)) { - dst->blue = 0; - dst->name = intern(s); - } else { - dst->s = s; - } - return 1; -} - -enum { MAXMACROARGS = 128 }; - -static void -ppdefine(struct lexer *lx) -{ - struct token tk0, tk; - internstr mname; - struct macro mac = {0}; - struct bitset usedparams[BSSIZE(MAXMACROARGS)] = {0}; - - lex0(lx, &tk0, 0); - if (tk0.t != TKIDENT) { - error(&tk0.span, "macro name missing"); - ppskipline(lx); - return; - } - mname = tk0.name; - mac.span = tk0.span.sl; - - if (match(lx, '(')) { - /* gather params for function-like macro */ - vec_of(internstr) params = {0}; - vinit(¶ms, NULL, 4); - mac.fnlike = 1; - while (lex0(lx, &tk, 0) != ')') { - if (mac.variadic) { - error(&tk.span, "expected `)' after `...'"); - if (tk.t == TKEOF || tk.t == '\n') return; - break; - } - if (params.n > 0) { - if (tk.t == TKDOTS) { /* GNU extension 'args...' */ - mac.variadic = 1; - continue; - } if (tk.t != ',') { - error(&tk.span, "expected `,' or `)'"); - if (tk.t == TKEOF || tk.t == '\n') return; - break; - } - lex0(lx, &tk, 0); - } - if (tk.t == TKIDENT) - vpush(¶ms, tk.name); - else if (tk.t == TKDOTS) { - mac.variadic = 1; - vpush(¶ms, intern("__VA_ARGS__")); - } else { - error(&tk.span, "expected parameter name or `)'"); - if (tk.t == TKEOF || tk.t == '\n') return; - break; - } - } - if (!params.n) vfree(¶ms); - mac.param = params.p; - mac.nparam = params.n; - } - - /* gather replacement list */ - mac.rl.off = mtoksbuf.n; - for (int n = 0; lex0(lx, &tk, 0) != '\n' && tk.t != TKEOF;) { - if (n == 0 && !tk.space) - warn(&tk.span, "no whitespace after macro name"); - struct token *prev = n ? &mtoksbuf.p[mtoksbuf.n-1] : NULL; - if (mac.fnlike && tk.t == TKIDENT) { - for (int i = 0; i < mac.nparam; ++i) { - if (tk.name == mac.param[i]) { - bsset(usedparams, i); - tk.argidx = i; - if (prev && prev->t == '#') { - tk.t = TKPPMACSTR; - *prev = tk; - goto Next; - } else { - tk.t = TKPPMACARG; - break; - } - } - } - } - if (n > 1 && prev->t == TKPPCAT) { - struct token new; - if (prev[-1].t != TKPPMACARG && tk.t != TKPPMACARG - && tokpaste(lx, &new, &prev[-1], &tk)) - { - /* trivial concatenations */ - prev[-1] = new; - --mtoksbuf.n; - --n; - continue; - } - } - if (in_range(tk.t, TKNUMLIT, TKSTRLIT) && !tk.litlit) - tk.s = alloccopy(&globarena, tk.s, tk.len << tk.wide, 1); - vpush(&mtoksbuf, tk); - ++n; - Next:; - } - mac.rl.n = mtoksbuf.n - mac.rl.off; - /* mark unused params as such by nulling out param name, - * this way they aren't expanded when unused in the macro body */ - for (uint i = 0; bsiterzr(&i, usedparams, countof(usedparams)) && i < mac.nparam; ++i) { - mac.param[i] = NULL; - } - putmac(mname, &mac); -} - -static void -expecteol(struct lexer *lx, const char *ppname) -{ - struct token tk; - assert(!lx->macstk); - if (lex0(lx, &tk, 0) != '\n' && tk.t != TKEOF) { - (ccopt.pedant ? error : warn)(&tk.span, "extra tokens after #%s", ppname); - ppskipline(lx); - } -} -static void -ppundef(struct lexer *lx) -{ - struct token tk; - - lex0(lx, &tk, 0); - if (tk.t != TKIDENT) { - error(&tk.span, "macro name missing"); - ppskipline(lx); - return; - } - expecteol(lx, "undef"); - delmac(tk.name); -} - -static void -pushmacstk(struct lexer *lx, const struct span *span, const struct macrostack *m) -{ - struct macrostack *l = lx->macstk; - if (!l) l = mstk; - else if ((++l == mstk+countof(mstk))) lxfatal(lx, span, "macro expansion depth limit reached"); - *l = *m; - l->idx = 0; - l->exspan = span->ex; - lx->macstk = l; -} - -static void -popmac(struct lexer *lx, bool all) -{ - struct macrostack *stk; - - assert(stk = lx->macstk); - do { - if (stk->dyn) - mdyntoksbuf.n -= stk->rl.n; - if (lx->macstk == mstk) lx->macstk = NULL; - else --lx->macstk; - if (!all) break; - } while ((stk = lx->macstk) && stk->idx >= stk->rl.n && !stk->stop); -} - - -static inline const struct token * -stkgetrl(struct macrostack *s) -{ - if (s->macid < 0) return s->rl.p; - return (s->dyn ? mdyntoksbuf.p : mtoksbuf.p) + s->rl.off; -} - -static void expandfnmacro(struct lexer *lx, struct span *span, internstr mname, struct macro *mac); - -static enum expandres { EXPNONE, EXPINL, EXPSTACK } -tryexpand(struct lexer *lx, struct token *tk) -{ - struct span span = tk->span; - struct macro *mac = NULL; - internstr mname = tk->name; - - if (tk->t != TKIDENT || tk->blue || !(mac = findmac(mname))) - return EXPNONE; - - /* prevent infinite recursion */ - for (struct macrostack *l = lx->macstk; l && l+1 > mstk; --l) { - if (l->macid == mac->id) { - tk->blue = 1; - return EXPNONE; - } - } - - struct macrostack *stkprev = lx->macstk; - if (mac->special && !mac->fnlike) { - mac->handler(lx, tk); - return EXPINL; - } else if (mac->fnlike) { - /* look if there is a '(' token ahead, expand if so */ - struct macrostack *s = lx->macstk; - if (s && s->idx >= s->rl.n && !s->stop) { - popmac(lx, 1); - s = lx->macstk; - } - if (!s) { /* top-level context: looking ahead in file data */ - struct token tk; - int t; - for (;;) { /* skip whitespace and comments */ - if (aisspace(t = peek(lx, 0))) next(lx); - else if (t == '/') { - int idx = lx->chridx; - switch (peek(lx, 1)) { - case '/': - while (!lx->eof && next(lx) != '\n') ; - continue; - case '*': - next(lx), next(lx); - while (peek(lx, 0) != '*' || peek(lx, 1) != '/') { - if (lx->eof) { - struct span span = {{ idx, lx->chridx - idx, lx->fileid }}; - lxfatal(lx, &span, "unterminated comment"); - } - next(lx); - } - next(lx), next(lx); - continue; - } - break; - } else break; - } - if (t != '(') return 0; - lex0(lx, &tk, 0); - } else { /* expansion context: look ahead in macro stack */ - if (s->idx >= s->rl.n || stkgetrl(s)[s->idx].t != '(') return 0; - ++s->idx; - } - expandfnmacro(lx, &span, mname, mac); - } else if (mac->predef && mac->single) { - struct span span = tk->span; - *tk = *mac->single; - tk->span = span; - return EXPINL; - } else if (mac->rl.n) { - pushmacstk(lx, &span, &(struct macrostack){ - .rl = { .off = mac->rl.off, .n = mac->rl.n }, - .macid = mac->id, - .space = tk->space, - }); - } - if (lx->macstk != stkprev) { - lx->macstk->space = tk->space; - } - return EXPSTACK; -} - -static bool -advancemacstk(struct lexer *lx, struct token *tk) -{ - struct macrostack *s = lx->macstk; - assert(s != NULL); - if (s->idx >= s->rl.n) { - if (s->stop) { - tk->t = TKEOF; - return 1; - } - popmac(lx, 1); - return 0; - } - *tk = stkgetrl(s)[s->idx]; - if (s->idx == 0) { - /* the first token of the replaced expansion gets its space from the - * context in which it is expanded */ - tk->space = s->space; - } - ++s->idx; - assert(tk->t && tk->t != TKEOF); - tk->span.ex = s->exspan; - return tryexpand(lx, tk) != EXPSTACK; -} - -static void -expandfnmacro(struct lexer *lx, struct span *span, internstr mname, struct macro *mac) -{ - struct token _argsbuf[30]; - vec_of(struct token) argsbuf = VINIT(_argsbuf, countof(_argsbuf)); /* buffer for argument tokens */ - struct span excessspan; - int cur, len, i, bal, narg; - struct token tk; - bool toomany = 0; - struct argtks { - int idx, n; /* slices of argsbuf */ - int idx2, n2; - ushort nfirstx, /* for concatenation to work properly with expanded arguments, */ - nlastx; /* length of expanded first and last tokens of the unexpanded argument */ - } _args0[4], - *args = mac->nparam < countof(_args0) ? _args0 : alloc(lx->tmparena, sizeof *args * mac->nparam, 0); - - cur = i = bal = len = narg = 0; - for (struct macrostack *s = lx->macstk;;) { - if (!s) { - bool nl = 0; - for (;; nl = 1) { - lex0(lx, &tk, 0); - if (tk.t != '\n') break; - } - tk.space |= nl; - } - else { - tk = s->idx < s->rl.n ? stkgetrl(s)[s->idx++] : (struct token){TKEOF}; - } - if (((tk.t == ')' && bal == 0) || tk.t == TKEOF)) break; - if (tk.t == ',' && bal == 0) { - ++narg; - if (i == mac->nparam-1 && !mac->variadic) { - excessspan = tk.span; - toomany = 1; - } else if (i < mac->nparam - mac->variadic) { - assert(i < MAXMACROARGS); - args[i].idx = cur; - args[i].n = len; - cur = argsbuf.n; - len = 0; - ++i; - } else if (mac->variadic) { - vpush(&argsbuf, tk); - ++len; - } - } else if (!toomany) { - if (tk.t == '(') ++bal; - else if (tk.t == ')') --bal; - vpush(&argsbuf, tk); - ++len; - } - } - - if (tk.t == TKEOF) { - joinspan(&span->ex, tk.span.ex); - lxfatal(lx, span, "unterminated function-like macro invocation"); - } else if (i < mac->nparam) { - ++narg; - args[i].idx = cur; - args[i].n = len; - cur = argsbuf.n; - len = 0; - ++i; - } - joinspan(&span->ex, tk.span.ex); - int expargs0 = argsbuf.n; - for (int i = 0; i < mac->nparam; ++i) { - struct argtks *arg = &args[i]; - if (i >= narg) { - memset(arg, 0, sizeof *arg); - } else if (!mac->param || (mac->param[i] && arg->n > 0)) { - /* expand args used in the macro body */ - pushmacstk(lx, &tk.span, &(struct macrostack) { - .rl = { .p = argsbuf.p + arg->idx, .n = arg->n }, - .macid = -1, - .stop = 1, - }); - struct macrostack *l = lx->macstk; - arg->idx2 = argsbuf.n; - arg->nfirstx = arg->nlastx = 1; - int ilastx = -1; - for (bool pad = 0;;) { - struct macrostack *sprev = lx->macstk; - if (!advancemacstk(lx, &tk)) { - pad |= tk.space && lx->macstk == sprev; /* preserve whitespace empty macro */ - if (lx->macstk == l && l->idx == 1) - arg->nfirstx = argsbuf.n - arg->idx2; - if (lx->macstk == l+1 && lx->macstk->idx == 0 && l->idx == l->rl.n) - ilastx = argsbuf.n - arg->idx2; - continue; - } - if (tk.t == TKEOF) break; - size_t off = l->rl.p - argsbuf.p; - tk.space |= pad; - vpush(&argsbuf, tk); - l->rl.p = argsbuf.p + off; - pad = 0; - } - arg->n2 = argsbuf.n - arg->idx2; - arg->nlastx = ilastx < 0 ? 1 : args->n2 - ilastx; - assert(lx->macstk == l); - popmac(lx, 0); - } else { - memset(arg, 0, sizeof *arg); - } - } - if (narg < mac->nparam - mac->variadic) { - warn(span, "macro `%s' passed %d arguments, but takes %d", mname, narg, mac->nparam); - } else if (toomany) { - joinspan(&excessspan.ex, tk.span.ex); - warn(&excessspan, "macro `%s' passed %d arguments, but takes just %d", mname, narg, mac->nparam); - } - if (mac->special) { - mac->handlerfn(lx, &tk, argsbuf.p+expargs0, argsbuf.n-expargs0); - vpush(&mdyntoksbuf, tk); - pushmacstk(lx, span, &(struct macrostack){ - .rl = { .off = mdyntoksbuf.n-1, .n = 1 }, - .dyn = 1, - .macid = mac->id, - }); - } else if (mac->nparam > 0) { /* make new rlist with args replaced */ - bool vaoptskip = 0, spacepad = 0; - int vaoptbal = 0; - uint off = mdyntoksbuf.n; - for (int i = 0; i < mac->rl.n; ++i) { - struct argtks *arg; - const struct token *tki = &mtoksbuf.p[mac->rl.off+i]; - if (vaoptskip) { - assert(vaoptbal > 0); - if (tki->t == '(') ++vaoptbal; - else if (tki->t == ')') { - if (--vaoptbal == 0) vaoptskip = 0; - } - continue; - } - if (tki->t == TKPPCAT && i > 0 && i < mac->rl.n-1) { /* concatenation */ - const struct token *lhs = tki-1, - *rhs = tki+1; - bool space = lhs->space | spacepad; - if (lhs->t == ',' && mac->variadic - && rhs->t == TKPPMACARG && rhs->argidx == mac->nparam-1) { - /* handle GNU extension: ', ## __VA_ARGS__' */ - arg = &args[rhs->argidx]; - if (narg < mac->nparam) { /* no vaargs -> skip comma */ - assert(arg->n == 0); - --mdyntoksbuf.n; - } else { /* otherwise put comma and substitute vaargs */ - vpushn(&mdyntoksbuf, argsbuf.p+arg->idx2, arg->n2); - mdyntoksbuf.p[mdyntoksbuf.n - arg->n2].space |= rhs->space | tk.space; - } - ++i; /* we already handled rhs (__VA_ARGS__) */ - continue; - } - if (i > 2 && tki[-2].t == TKPPCAT) { - /* handles chained concatenations: xyz ## arg ## c - * lhs ^ rhs */ - lhs = (off < mdyntoksbuf.n) ? &mdyntoksbuf.p[--mdyntoksbuf.n] : NULL; - } else if (lhs->t == TKPPMACARG) { - arg = &args[lhs->argidx]; - lhs = arg->n ? &argsbuf.p[arg->idx + arg->n-1] : NULL; - if (lhs && arg->n > 1) space |= lhs->space; - } else { - --mdyntoksbuf.n; - } - if (rhs->t == TKPPMACARG) { - arg = &args[rhs->argidx]; - rhs = arg->n ? &argsbuf.p[arg->idx] : NULL; - } else { - ++i; - } - if (!lhs && !rhs) continue; - spacepad = 0; - if (!lhs) vpush(&mdyntoksbuf, *rhs); - else if (!rhs) vpush(&mdyntoksbuf, *lhs); - else { - struct token new; - if (tokpaste(lx, &new, lhs, rhs)) { - new.span.sl = tki->span.sl; - } - vpush(&mdyntoksbuf, new); - } - mdyntoksbuf.p[mdyntoksbuf.n-1].space = space; - } else if (tki->t != TKPPMACARG && tki->t != TKPPMACSTR) { /* regular token */ - if (tki->t == TKIDENT && mac->variadic) { - /* handle GNUC __VA_OPT__(...) */ - static internstr istr_vaopt; - if (!istr_vaopt) istr_vaopt = intern("__VA_OPT__"); - if (tki->name == istr_vaopt && i+2 < mac->rl.n && tki[1].t == '(') { - vaoptbal = 1; - vaoptskip = args[mac->nparam-1].n == 0; - ++i; /* skip open paren */ - continue; - } - } - if (vaoptbal) { - if (tki->t == '(') ++vaoptbal; - else if (tki->t == ')') { - /* skip closing paren of __VA_OPT__ invocation */ - if (--vaoptbal == 0) continue; - } - } - vpush(&mdyntoksbuf, *tki); - mdyntoksbuf.p[mdyntoksbuf.n-1].space |= spacepad; - spacepad = 0; - } else if (tki->t == TKPPMACARG) { - arg = &args[tki->argidx]; - if (arg->n == 0) { - spacepad = 1; - continue; - } - struct token *rl = argsbuf.p + arg->idx2; - int n = arg->n2; - bool skipfirst = 0; - if (i > 0 && tki[-1].t == TKPPCAT) { - /* skip first unexpanded token, was pasted */ - rl += arg->nfirstx; - n -= arg->nfirstx; - skipfirst = 1; - } - if (i < mac->rl.n-2 && tki[1].t == TKPPCAT) { - /* skip last unexpanded token, will be pasted */ - n -= arg->nlastx; - } - if (n > 0) { - vpushn(&mdyntoksbuf, rl, n); - if (!skipfirst) - /* the first token of the expanded body gets its space from the replacement list */ - mdyntoksbuf.p[mdyntoksbuf.n - n].space = tki->space | spacepad; - } - spacepad = 0; - } else { /* PPMACSTR */ - char tmp[200]; - struct wbuf buf = MEMBUF(tmp, sizeof tmp); - int n = 0; - - arg = &args[tki->argidx]; - // XXX this is wrong bc the string literal produced should be re-parsed later - // i.e. stringifying the token sequence '\n' should ultimately produce a - // string with an actual newline, not {'\\','n'} - Redo: - for (int i = 0; i < arg->n; ++i) { - struct token *tk = &argsbuf.p[arg->idx + i]; - if (i > 0 && tk->space) - n += bfmt(&buf, " "); - n += bfmt(&buf, "%tk", tk); - } - ioputc(&buf, 0); - if (buf.err) { - struct wbuf new = MEMBUF(alloc(lx->tmparena, n+1, 1), n+1); - assert(buf.buf == tmp); - memcpy(&buf, &new, sizeof buf); - goto Redo; - } - vpush(&mdyntoksbuf, ((struct token) { - .t = TKSTRLIT, - .wide = 0, - .space = tki->space | spacepad, - .s = buf.buf != tmp ? buf.buf : alloccopy(lx->tmparena, buf.buf, buf.len, 1), - .len = buf.len-1, - })); - spacepad = 0; - } - } - uint n = mdyntoksbuf.n - off; - - if (n) { - pushmacstk(lx, span, &(struct macrostack){ - .rl = { .off = off, .n = n }, - .macid = mac->id, - .dyn = 1, - }); - } - } else if (mac->rl.n) { - pushmacstk(lx, span, &(struct macrostack){ - .rl = { .off = mac->rl.off, .n = mac->rl.n }, - .macid = mac->id, - }); - } - vfree(&argsbuf); -} - -static struct token epeektk; -static int -elex(struct lexer *lx, struct token *tk) -{ - assert(tk); - if (epeektk.t) { - int tt = epeektk.t; - if (tk) *tk = epeektk; - epeektk.t = 0; - return tt; - } - if (lx->macstk) { - if (!advancemacstk(lx, tk)) - return elex(lx, tk); - return tk->t; - } - - lex0(lx, tk, 0); - return tk->t; -} - -static int -epeek(struct lexer *lx, struct token *tk) -{ - if (!epeektk.t) elex(lx, &epeektk); - if (tk) *tk = epeektk; - return epeektk.t; -} - -static int -tkprec(int tt) -{ - static const char tab[] = { - ['*'] = 12, ['/'] = 12, ['%'] = 12, - ['+'] = 11, ['-'] = 11, - [TKSHL] = 10, [TKSHR] = 10, - ['<'] = 9, ['>'] = 9, [TKLTE] = 9, [TKGTE] = 9, - [TKEQU] = 8, [TKNEQ] = 8, - ['&'] = 7, - ['^'] = 6, - ['|'] = 5, - [TKLOGAND] = 4, - [TKLOGIOR] = 3, - ['?'] = 2, - }; - if ((uint)tt < countof(tab)) - return tab[tt] - 1; - return -1; -} - -static vlong -expr(struct lexer *lx, bool *pu, int prec, bool ignore) -{ - struct token tk; - enum typetag ty; - char unops[16]; - int nunop = 0; - vlong x, y; - bool xu = 0, yu; /* x unsigned?; y unsigned? */ - -Unary: - elex(lx, &tk); -Switch: - switch (tk.t) { - case '-': case '~': case '!': - unops[nunop++] = tk.t; - if (nunop >= countof(unops)) { - x = expr(lx, &xu, 999, ignore); - break; - } - /* fallthru */ - case '+': goto Unary; - case '(': - x = expr(lx, &xu, 1, ignore); - if (elex(lx, &tk) != ')') { - error(&tk.span, "expected ')'"); - goto Err; - } - break; - case TKNUMLIT: - case TKCHRLIT: - ty = parsenumlit((uvlong *)&x, NULL, &tk, 1); - if (!ty) { - error(&tk.span, "bad number literal"); - goto Err; - } else if (isfltt(ty)) { - error(&tk.span, "float literal in preprocessor expresion"); - goto Err; - } - xu = isunsignedt(ty); - break; - default: - if (tk.t == TKIDENT) { - xu = 0; - if (!strcmp(tk.s, "defined")) { - /* 'defined' ppident */ - bool paren = 0; - lex0(lx, &tk, 0); - if ((paren = tk.t == '(')) lex0(lx, &tk, 0); - if (!isppident(tk)) { - error(&tk.span, "expected macro name"); - goto Err; - } - if (paren && lex0(lx, &tk, 0) != ')') { - error(&tk.span, "expected `)'"); - goto Err; - } - x = findmac(tk.name) != NULL; - } else { - switch (tryexpand(lx, &tk)) { - case EXPSTACK: goto Unary; - case EXPINL: goto Switch; - case EXPNONE: x = 0; break; /* non defined pp name -> 0 */ - } - } - break; - } - error(&tk.span, "expected preprocessor integer expression (near %'tk)", &tk); - goto Err; - } - - while (nunop > 0) switch (unops[--nunop]) { - case '-': x = -(uvlong)x; break; - case '~': x = ~x; break; - case '!': x = !x; break; - default: assert(0); - } - - for (int opprec; (opprec = tkprec(epeek(lx, &tk))) >= prec;) { - elex(lx, &tk); - if (tk.t == TKLOGAND) { - x = !!x & !!expr(lx, &yu, opprec+1, ignore || !x); - xu = 0; - } else if (tk.t == TKLOGIOR) { - x = !!x | !!expr(lx, &yu, opprec+1, ignore || x); - xu = 0; - } else if (tk.t == '?') { - struct span span = tk.span; - vlong m = expr(lx, &xu, 1, ignore || !x); - if (elex(lx, &tk) != ':') { - error(&tk.span, "expected ':'"); - note(&span, "to match conditional expression here"); - goto Err; - } - y = expr(lx, &yu, 1, ignore || x); - x = x ? m : y; - xu |= yu; - } else { - y = expr(lx, &yu, opprec + 1, ignore); - bool u = xu | yu; - switch ((int) tk.t) { - case '+': x += (uvlong) y; break; - case '-': x -= (uvlong) y; break; - case '*': x = u ? (uvlong) x * y : x * y; break; - case '&': x &= y; break; - case '^': x ^= y; break; - case '|': x |= y; break; - case '/': if (y) x = u ? (uvlong) x / y : x / y; - else if (ignore) x = 0; - else goto Div0; - break; - case '%': if (y) x = u ? (uvlong) x % y : x % y; - else if (ignore) x = 0; - else Div0: error(&tk.span, "division by zero"); - break; - case TKSHL: if ((uvlong)y < 64) x <<= y; - else if (ignore) x = 0; - else goto BadShift; - break; - u = xu; - case TKSHR: if ((uvlong)y < 64) x = u ? (uvlong) x >> y : x >> y; - else if (ignore) x = 0; - else BadShift: error(&tk.span, "bad shift by %ld", y); - u = xu; - break; - case '<': x = u ? (uvlong) x < y : x < y; u = 0; break; - case '>': x = u ? (uvlong) x > y : x > y; u = 0; break; - case TKLTE: x = u ? (uvlong) x <= y : x <= y; u = 0; break; - case TKGTE: x = u ? (uvlong) x >= y : x >= y; u = 0; break; - case TKEQU: x = x == y; u = 0; break; - case TKNEQ: x = x != y; u = 0; break; - default: assert(0); - } - xu = u; - } - } - if (!prec) { /* not a sub expr */ - if (elex(lx, &tk) != '\n' && tk.t != TKEOF) { - error(&tk.span, "extra tokens after preprocessor expression"); - ppskipline(lx); - } - } - if (pu) *pu = xu; - return x; - -Err: - ppskipline(lx); - if (pu) *pu = xu; - return 0; -} - -enum { - PPCNDFALSE, /* the condition was zero, skip until #else/#elif */ - PPCNDTRUE, /* the condition was non-zero, emit until #else/#elif */ - PPCNDTAKEN /* some branch was already taken, skip until #else */ -}; -static struct ppcnd { - struct span0 ifspan; - int filedepth; - uchar cnd; - bool elsep; -} ppcndstk[32]; -static int nppcnd; - -static int includedepth; - -static void -ppif(struct lexer *lx, const struct span *span) -{ - vlong v = expr(lx, NULL, 0, 0); - assert(nppcnd < countof(ppcndstk) && "too many nested #if"); - ppcndstk[nppcnd].ifspan = span->sl; - ppcndstk[nppcnd].filedepth = includedepth; - ppcndstk[nppcnd].cnd = v ? PPCNDTRUE : PPCNDFALSE; - ppcndstk[nppcnd++].elsep = 0; -} - -static void -ppifxdef(struct lexer *lx, bool defp, const struct span *span) -{ - struct token tk; - - lex0(lx, &tk, 0); - if (tk.t != TKIDENT) { - error(&tk.span, "macro name missing"); - ppskipline(lx); - return; - } - expecteol(lx, defp ? "ifdef" : "ifndef"); - if (!defp && lx->firstdirective) lx->inclguard = tk.name; - assert(nppcnd < countof(ppcndstk) && "too many nested #if"); - ppcndstk[nppcnd].ifspan = span->sl; - ppcndstk[nppcnd].filedepth = includedepth; - ppcndstk[nppcnd].cnd = (findmac(tk.name) == NULL) ^ defp ? PPCNDTRUE : PPCNDFALSE; - ppcndstk[nppcnd++].elsep = 0; -} - -static void -ppelif(struct lexer *lx, const struct span *span) -{ - vlong v; - struct ppcnd *cnd; - - if (!nppcnd) { - error(span, "#elif without matching #if"); - ppif(lx, span); - return; - } - v = expr(lx, NULL, 0, 0); - cnd = &ppcndstk[nppcnd-1]; - if (cnd->elsep) { - error(span, "#elif after #else"); - return; - } - switch (cnd->cnd) { - case PPCNDTRUE: cnd->cnd = PPCNDTAKEN; break; - case PPCNDFALSE: cnd->cnd = v ? PPCNDTRUE : PPCNDFALSE; break; - } -} -static void -ppelifxdef(struct lexer *lx, bool defp, const struct span *span) -{ - struct token tk; - struct ppcnd *cnd; - - if (!nppcnd) { - error(span, "#elif%sdef without matching #if", &"n"[defp]); - ppif(lx, span); - return; - } - cnd = &ppcndstk[nppcnd-1]; - if (cnd->elsep) { - error(span, "#elif%sdef after #else", &"n"[defp]); - return; - } - lex0(lx, &tk, 0); - if (tk.t != TKIDENT) { - error(&tk.span, "macro name missing"); - ppskipline(lx); - return; - } - expecteol(lx, defp ? "elifdef" : "elifndef"); - switch (cnd->cnd) { - case PPCNDTRUE: cnd->cnd = PPCNDTAKEN; break; - case PPCNDFALSE: cnd->cnd = (findmac(tk.name) == NULL) ^ defp ? PPCNDTRUE : PPCNDFALSE; break; - case PPCNDTAKEN: assert(0); - } -} - -static void -ppendif(struct lexer *lx, const struct span *span) -{ - expecteol(lx, "endif"); - if (!nppcnd) { - error(span, "#endif without matching #if"); - return; - } - --nppcnd; -} - -static void -ppelse(struct lexer *lx, const struct span *span) -{ - struct ppcnd *cnd; - expecteol(lx, "else"); - if (!nppcnd) { - error(span, "#else without matching #if"); - return; - } - cnd = &ppcndstk[nppcnd-1]; - if (cnd->elsep) - error(span, "#else after #else"); - switch (cnd->cnd) { - case PPCNDFALSE: cnd->cnd = PPCNDTRUE; break; - case PPCNDTRUE: cnd->cnd = PPCNDTAKEN; break; - } - cnd->elsep = 1; -} - -enum { MAXINCLUDE = 200 }; -static bool -tryincludepath(struct lexer *lx, const struct span *span, char *path) -{ - struct lexer new; - const char *err; - switch (initlexer(&new, &err, path)) { - default: assert(0); - case LXERR: return 0; - case LXFILESEEN: - xbfree(path); - /* fallthru */ - case LXOK: - new.save = xmalloc(sizeof *new.save); - lx->inclnerror = nerror; - lx->inclnwarn = nwarn; - memcpy(new.save, lx, sizeof *lx); - *lx = new; - - if (++includedepth == MAXINCLUDE) - lxfatal(lx, span, "Maximum nested include depth of %d reached", includedepth); - break; - case LXFILESKIP: - xbfree(path); - break; - } - return 1; -} - -static bool -doinclude(struct lexer *lx, const struct span *span, bool quote, const char *str, size_t slen) -{ - char *path = NULL; - const char *base, *end; - if (quote) { - if (str[0] == '/') { - /* try absolute path */ - xbgrow(&path, slen + 1); - memcpy(path, str, slen); - path[slen] = 0; - if (tryincludepath(lx, span, path)) return 1; - goto NotFound; - } - - /* try relative to current file's directory */ - base = getfilename(lx->fileid, 0); - for (end = base; *end != 0; ++end) {} - for (--end; *end != '/' && end != base; --end) {} - if (*end == '/') ++end; - xbgrow(&path, end - base + slen + 1); - memcpy(path, base, end - base); - memcpy(path + (end - base), str, slen); - path[end - base + slen] = 0; - if (tryincludepath(lx, span, path)) return 1; - } - /* try system paths. order: - * 1. -iquote - * 2. -I - * 3. -isystem - * 4. embedded include files - * 5. standard system includes - * 6. -idirafter - */ - for (int i = quote ? CINCL_iquote : CINCL_I; i < countof(cinclpaths); ++i) { - for (struct inclpath *p = cinclpaths[i].list; p; p = p->next) { - if (i == CINCLsys) { - /* try embedded files pseudo-path */ - xbgrow(&path, slen + 3); - path[0] = '@', path[1] = ':'; - memcpy(path+2, str, slen); - path[slen+2] = 0; - if (tryincludepath(lx, span, path)) return 1; - } - int ndir = strlen(p->path); - xbgrow(&path, ndir + slen + 2); - memcpy(path, p->path, ndir); - path[ndir++] = '/'; - memcpy(path + ndir, str, slen); - path[ndir + slen] = 0; - if (tryincludepath(lx, span, path)) return 1; - } - } -NotFound: - error(span, "file not found: %'S", str, slen); - xbfree(path); - return 0; -} - -static bool -ppinclude(struct lexer *lx, const struct span *span0) -{ - struct token tk; - struct span span = *span0; - - if (in_range(lex0(lx, &tk, 1), TKPPHDRH, TKPPHDRQ)) { - expecteol(lx, "include"); - joinspan(&span.ex, tk.span.ex); - return doinclude(lx, &span, tk.t == TKPPHDRQ, tk.s, tk.len); - } else if (tk.t == '\n' || tk.t == TKEOF) { - goto BadSyntax; - } else { - /* '#include pp-tokens' - * gather and expand pp-tokens */ - struct token tksbuf[8]; - vec_of(struct token) tks = VINIT(tksbuf, countof(tksbuf)); - for (;;) { - if (!lx->macstk) { - if (tryexpand(lx, &tk) == EXPSTACK) continue; - vpush(&tks, tk); - } else if (advancemacstk(lx, &tk)) { - vpush(&tks, tk); - continue; - } - if (lex0(lx, &tk, 0) == '\n' || tk.t == TKEOF) break; - } - if (tks.n >= 1 && tks.p[0].t == TKSTRLIT) { /* "header.h" */ - if (tks.n > 1) - (ccopt.pedant ? error : warn)(&tks.p[1].span, "extra tokens after #include"); - joinspan(&span.ex, tks.p[0].span.ex); - return doinclude(lx, &span, 1, tks.p[0].s, tks.p[0].len); - } else if (tks.n > 2 && tks.p[0].t == '<' && tks.p[tks.n-1].t == '>') { /* */ - /* this is multiple tokens, concatenate them together */ - char buf[4096]; - struct wbuf wbuf = MEMBUF(buf, sizeof buf); - for (int i = 1; i < tks.n-1; ++i) { - struct token *tk = &tks.p[i]; - bfmt(&wbuf, &" %tk"[!tk->space], tk); - } - joinspan(&span.ex, tks.p[tks.n-1].span.ex); - if (wbuf.err) error(&span, "path too long"); - else { - return doinclude(lx, &span, 0, buf, wbuf.len); - } - } else { - BadSyntax: - error(&tk.span, "expected \"header\" or
"); - ppskipline(lx); - } - vfree(&tks); - } - return 1; -} - -static void -ppline(struct lexer *lx, struct token *tk0) -{ - struct token tk, tks[2]; - int ntk = 0; - struct span span = tk0->span; - bool ext = 0; - if (tk0->t == TKNUMLIT) { /* handles GNU-style post preprocessing directive '# n ...' */ - tks[ntk++] = *tk0; - ext = 1; - } - while (ntk < 2) { - if (lx->macstk && advancemacstk(lx, &tk)) { - tks[ntk++] = tk; - if (lx->macstk->idx >= lx->macstk->rl.n) popmac(lx, 1); - } else if (!lx->macstk && (lex0(lx, &tk, 0) == '\n' || tk.t == TKEOF)) { - break; - } else if (tk.t == TKIDENT && tryexpand(lx, &tk) == EXPSTACK) { - continue; - } else { - tks[ntk++] = tk; - } - } - uvlong lineno = 0; - char *file = NULL; - if (ntk > 0 && tks[0].t == TKNUMLIT) { - if (!parsenumlit(&lineno, NULL, &tks[0], 1) || (lineno == 0 && !ext)) - goto BadNum; - if (lineno >= 1<<(32-SPANFILEBITS)) { - warn(&tks[0].span, "ignoring #line number that is too big"); - lineno = 0; - goto Err; - } - } else { - BadNum: - error(ntk ? &tks[0].span : &span, "#line requires a positive integer argument"); - Err: - if (lx->macstk || (tk.t != '\n' && tk.t != TKEOF)) ppskipline(lx); - return; - } - if (ntk > 1) { - if (tks[1].t == TKSTRLIT && !tks[1].wide) { - file = alloc(&globarena, tks[1].len+1, 0); - memcpy(file, tks[1].s, tks[1].len); - file[tks[1].len] = 0; - } else { - error(&tks[1].span, "invalid filename for #line directive"); - } - } - if (lineno) setfileline(lx->fileid, lx->chridx, lineno, file); - if (lx->macstk) { - span.sl.off = span.ex.off = lx->chridx; - span.sl.len = span.ex.len = 1; - ppskipline(lx); - if (!ext) - (ccopt.pedant ? error : warn)(&span, "extra tokens after #line"); - } else if (tk.t != '\n' && tk.t != TKEOF) { - if (ext) ppskipline(lx); - else expecteol(lx, "line"); - } -} - -static void -pppragma(struct lexer *lx, const struct span *span0) -{ - struct token tk; - struct span span = *span0; - if (lex0(lx, &tk, 0) == TKIDENT && !strcmp(tk.s, "once")) { - markfileonce(lx->fileid, NULL); - } else { - joinspan(&span.ex, tk.span.ex); - warn(&span, "unknown pragma ignored"); - ppskipline(lx); - return; - } - expecteol(lx, "pragma"); -} - -static void -ppdiag(struct lexer *lx, const struct span *span0, bool err) -{ - const uchar *p = getfile(lx->fileid)->p; - uint off = lx->chridx, end; - ppskipline(lx); - end = lx->chridx; - while (off < end && aisspace(p[off])) ++off; - (err ? error : warn)(span0, "%S", p + off, end - off); -} - -enum directive { - PPXXX, - /* !sorted */ - PPDEFINE, - PPELIF, - PPELIFDEF, - PPELIFNDEF, - PPELSE, - PPENDIF, - PPERROR, - PPIF, - PPIFDEF, - PPIFNDEF, - PPINCLUDE, - PPLINE, - PPPRAGMA, - PPUNDEF, - PPWARNING, -}; - -static enum directive -findppcmd(const struct token *tk) -{ - static const char *tab[] = { - /* !sorted */ - "define", - "elif", - "elifdef", - "elifndef", - "else", - "endif", - "error", - "if", - "ifdef", - "ifndef", - "include", - "line", - "pragma", - "undef", - "warning", - }; - int l = 0, h = countof(tab) - 1, i, cmp; - const char *s = tk->s; - - if (tk->t == TKWif) return PPIF; - if (tk->t == TKWelse) return PPELSE; - /* binary search over sorted array */ - while (l <= h) { - i = (l + h) / 2; - cmp = strcmp(tab[i], s); - if (cmp < 0) l = i + 1; - else if (cmp > 0) h = i - 1; - else return i + 1; - } - return PPXXX; -} - -static void -identkeyword(struct token *tk) -{ -#ifdef __GNUC__ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wmissing-braces" -#endif - static const struct { - const char *s; - struct kw { uchar t, cstd : 4, ext : 1; } kw; - const char *alias[2]; - } kwtab[] = { -#define _(kw, cstd, ...) { #kw, {TKW##kw, cstd}, __VA_ARGS__ }, -#include "keywords.def" -#undef _ - }; -#ifdef __GNUC__ -#pragma GCC diagnostic pop -#endif - static pmap_of(struct kw) kwmap; - if (!kwmap.v) { - pmap_init(&kwmap, 128); - for (int i = 0; i < countof(kwtab); ++i) { - struct kw kw = kwtab[i].kw; - /* allow future keywords but only if they begin with _ */ - if (kw.cstd <= ccopt.cstd || kwtab[i].s[0] == '_') { - kw.ext = kw.cstd > ccopt.cstd; - pmap_set(&kwmap, intern(kwtab[i].s), kw); - } - for (const char *const *palias = kwtab[i].alias, *const *end = palias+2; - palias != end && *palias; ++palias) - { - pmap_set(&kwmap, intern(*palias), kw); - } - } - } - struct kw *kw = pmap_get(&kwmap, tk->name); - if (kw) { - tk->t = kw->t; - tk->extwarn = kw->ext; - } -} - -int -lex(struct lexer *lx, struct token *tk_) -{ - struct token tkx[1], *tk; - int t; - -Begin: - assert(tk_ != &lx->peektok); - tk = tk_ ? tk_ : tkx; - if (lx->peektok.t) { - *tk = lx->peektok; - memset(&lx->peektok, 0, sizeof lx->peektok); - return tk->t; - } - - if (lx->macstk) { - if (!advancemacstk(lx, tk)) - goto Begin; - if (tk->t == TKIDENT) identkeyword(tk); - return tk->t; - } - bool linebegin = 1, - skip = nppcnd ? ppcndstk[nppcnd-1].cnd != PPCNDTRUE : 0, - inclerror = 0; /* set when #include header file not found: process other directives then abort */ - enum directive lastcmd = 0; - for (;;) { - while ((t = lex0(lx, tk, 0)) == '\n') linebegin = 1; - if (t == '#' && linebegin) { - if (lex0(lx, tk, 0) == '\n') { } - else if (tk->t == TKNUMLIT || tk->t == TKIDENT) { - lastcmd = tk->t == TKNUMLIT ? PPLINE : findppcmd(tk); - if (nppcnd == lx->nppcnd0) lx->inclguard = NULL; - if (!skip) { - switch (lastcmd) { - case PPXXX: goto BadPP; - case PPDEFINE: ppdefine(lx); break; - case PPUNDEF: ppundef(lx); break; - case PPIF: ppif(lx, &tk->span); break; - case PPIFDEF: ppifxdef(lx, 1, &tk->span); break; - case PPIFNDEF: ppifxdef(lx, 0, &tk->span); break; - case PPELIF: ppelif(lx, &tk->span); break; - case PPELIFDEF: ppelifxdef(lx, 1, &tk->span); break; - case PPELIFNDEF: ppelifxdef(lx, 0, &tk->span); break; - case PPELSE: ppelse(lx, &tk->span); break; - case PPENDIF: ppendif(lx, &tk->span); break; - case PPLINE: ppline(lx, tk); break; - case PPPRAGMA: pppragma(lx, &tk->span); break; - case PPWARNING: ppdiag(lx, &tk->span, 0); break; - case PPERROR: ppdiag(lx, &tk->span, 1); break; - case PPINCLUDE: inclerror |= !ppinclude(lx, &tk->span); break; - default: assert(0&&"nyi"); - } - } else { - switch (lastcmd) { - case PPIF: /* increment nesting level */ - case PPIFDEF: - case PPIFNDEF: - assert(nppcnd < countof(ppcndstk) && "too many nested #if"); - ppcndstk[nppcnd].ifspan = tk->span.sl; - ppcndstk[nppcnd].cnd = PPCNDTAKEN; - ppcndstk[nppcnd++].elsep = 0; - break; - case PPELIF: ppelif(lx, &tk->span); break; - case PPELIFDEF: ppelifxdef(lx, 1, &tk->span); break; - case PPELIFNDEF: ppelifxdef(lx, 0, &tk->span); break; - case PPELSE: ppelse(lx, &tk->span); break; - case PPENDIF: ppendif(lx, &tk->span); break; - default: ppskipline(lx); break; - } - } - if (lastcmd != PPINCLUDE) - lx->firstdirective = 0; - skip = nppcnd ? ppcndstk[nppcnd-1].cnd != PPCNDTRUE : 0; - } else { - if (!skip) { - BadPP: - error(&tk->span, "invalid preprocessor directive"); - } - ppskipline(lx); - } - linebegin = 1; - } else { - lx->firstdirective = 0; - linebegin = 0; - if (skip && t != TKEOF) - continue; - if (tryexpand(lx, tk) == EXPSTACK) - goto Begin; - if (t == TKEOF && nppcnd && ppcndstk[nppcnd-1].filedepth == includedepth) { - struct span span = { ppcndstk[nppcnd-1].ifspan }; - error(&span, "#if is not matched by #endif"); - } - if (t == TKEOF && lx->save) { - /* end of #include'd file, restore previous state */ - if (lastcmd == PPENDIF && lx->inclguard) { - markfileonce(lx->fileid, lx->inclguard); - } - struct lexer *sv = lx->save; - if (sv->inclnerror != nerror || sv->inclnwarn != nwarn) { - int line; - const char *f = getfilepos(&line, NULL, sv->fileid, sv->chridx-2); - note(NULL, "in file included from %s:%d", f, line); - } - memcpy(lx, sv, sizeof *lx); - free(sv); - --includedepth; - linebegin = 1; - lx->firstdirective = 0; - } else if (t == TKEOF && inclerror) { - break; - } else { - if (nppcnd == lx->nppcnd0) lx->inclguard = NULL; - if (t == TKIDENT) identkeyword(tk); - if (!inclerror) return tk->t; - } - } - } - assert(inclerror); - efmt("Aborting due to previous error(s).\n"); - exit(1); - assert(0); -} - -int -lexpeek(struct lexer *lx, struct token *tk_) -{ - struct token tkx[1], *tk; - uint t; - - tk = tk_ ? tk_ : tkx; - if ((t = lx->peektok.t)) { - *tk = lx->peektok; - return t; - } - t = lex(lx, tk); - lx->peektok = *tk; - return t; -} - -/* Predefined/builtin macros */ - -static vec_of(uchar) ppcmdline; - -void -cpppredef(bool undef, const char *cmd) -{ - const char *sep = strchr(cmd, '='), *body = sep ? sep+1 : "1"; - uint namelen = sep ? sep - cmd : strlen(cmd); - char line[1024]; - struct wbuf wbuf = MEMBUF(line, sizeof line); - if (!ppcmdline.p) vinit(&ppcmdline, NULL, 1<<10); - int n; - if (undef) - n = bfmt(&wbuf, "#undef %S\n", cmd, namelen); - else - n = bfmt(&wbuf, "#define %S %s\n", cmd, namelen, body); - assert(n <= sizeof line); - vpushn(&ppcmdline, line, n); -} - -static void -mac__file__(struct lexer *lx, struct token *tk) -{ - tk->t = TKSTRLIT; - tk->s = getfilename(lx->fileid, lx->chridx); - tk->wide = 0; - tk->len = strlen(tk->s); -} - -static void -mac__line__(struct lexer *lx, struct token *tk) -{ - char buf[20]; - int line; - struct wbuf wbuf = MEMBUF(buf, sizeof buf); - getfilepos(&line, NULL, lx->fileid, lx->chridx); - bfmt(&wbuf, "%d", line), buf[wbuf.len++] = 0; - tk->t = TKNUMLIT; - tk->s = alloccopy(lx->tmparena, buf, wbuf.len, 1); - tk->len = wbuf.len-1; -} - -#include - -static void -mac__date__(struct lexer *lx, struct token *tk) -{ - char buf[20]; - struct wbuf wbuf = MEMBUF(buf, sizeof buf); - time_t tm = time(NULL); - struct tm *ts = localtime(&tm); - tk->t = TKSTRLIT; - tk->wide = 0; - tk->len = 11; - if (ts) { - bfmt(&wbuf, "%S %2d %4d%c", - &"JanFebMarAprMayJunJulAugSepOctNovDec"[ts->tm_mon*3], 3, - ts->tm_mday, 1900+ts->tm_year, 0); - assert(wbuf.len == 11+1); - tk->s = alloccopy(lx->tmparena, buf, wbuf.len, 1); - } else { - tk->s = "\?\?\? \?\? \?\?\?\?"; - } -} - -static void -mac__time__(struct lexer *lx, struct token *tk) -{ - char buf[20]; - struct wbuf wbuf = MEMBUF(buf, sizeof buf); - time_t tm = time(NULL); - struct tm *ts = localtime(&tm); - tk->t = TKSTRLIT; - tk->wide = 0; - tk->len = 8; - if (ts) { - bfmt(&wbuf, "%.2d:%.2d:%.2d%c", ts->tm_hour, ts->tm_min, ts->tm_sec, 0); - tk->s = alloccopy(lx->tmparena, buf, wbuf.len, 1); - assert(wbuf.len == 8+1); - } else { - tk->s = "\?\?:\?\?:\?\?"; - } -} - -static void -mac__counter__(struct lexer *lx, struct token *tk) -{ - char buf[20]; - struct wbuf wbuf = MEMBUF(buf, sizeof buf); - static int counter; - bfmt(&wbuf, "%d", counter++), buf[wbuf.len++] = 0; - tk->t = TKNUMLIT; - tk->s = alloccopy(lx->tmparena, buf, wbuf.len, 1); - tk->len = wbuf.len-1; -} - -static void -mac__has_builtin(struct lexer *lx, struct token *tk, const struct token *args, int narg) -{ - extern bool hasbuiltin(const char *, uint n); - bool has = 0; - tk->t = TKNUMLIT, tk->len = 1; - if (narg >= 1) { - if (args[0].t == TKIDENT) - has = hasbuiltin(args[0].s, args[0].len); - else if (in_range(args[0].t, TKWBEGIN_, TKWEND_)) - has = args[0].len >= sizeof "__builtin_" && !memcmp(args[0].s, "__builtin_", 10); - else goto Bad; - if (narg != 1) - error(&args[1].span, "expected `)' after '%tk'", &args[0]); - } else Bad: { - error(narg ? &args[0].span : &tk->span, "'__has_builtin' requires an identifier"); - } - tk->s = &"01"[has]; -} - - -static void -putdef1(const char *name) -{ - static const struct token tok_1 = { TKNUMLIT, .s = "1", .len = 1, .litlit = 1 }; - putmac(intern(name), &(struct macro) { - .predef = 1, - .single = &tok_1 - }); -} - -static void -putdefs1(const char *s) -{ - for (; *s; s += strlen(s) + 1) putdef1(s); -} - -static void -addpredefmacros(struct arena **tmparena) -{ - static struct token tok_stdc = {TKNUMLIT}, - tok_major = {TKNUMLIT, .s = XSTR(ANTCC_VERSION_MAJOR), - .len = sizeof XSTR(ANTCC_VERSION_MAJOR) - 1}, - tok_minor = {TKNUMLIT, .s = XSTR(ANTCC_VERSION_MINOR), - .len = sizeof XSTR(ANTCC_VERSION_MINOR) - 1}, - tok_patch = {TKNUMLIT, .s = XSTR(ANTCC_VERSION_PATCH), - .len = sizeof XSTR(ANTCC_VERSION_PATCH) - 1}; - static struct { const char *name; struct macro m; } macs[] = { - { "__FILE__", { .predef = 1, .special = 1, .handler = mac__file__ }}, - { "__LINE__", { .predef = 1, .special = 1, .handler = mac__line__ }}, - { "__DATE__", { .predef = 1, .special = 1, .handler = mac__date__ }}, - { "__TIME__", { .predef = 1, .special = 1, .handler = mac__time__ }}, - { "__COUNTER__", { .predef = 1, .special = 1, .handler = mac__counter__ }}, - { "__has_builtin", { .predef = 1, .nparam = 1, .fnlike = 1, .special = 1, .handlerfn = mac__has_builtin }}, - { "__STDC_VERSION__", { .predef = 1, .single = &tok_stdc }}, - { "__antcc_major__", { .predef = 1, .single = &tok_major }}, - { "__antcc_minor__", { .predef = 1, .single = &tok_minor }}, - { "__antcc_patch__", { .predef = 1, .single = &tok_patch }}, - { "__extension__", { .predef = 1, .single = NULL }}, - }; - static const char - cpredefs[] = - "__antcc__\0__STDC__\0__STDC_NO_ATOMICS__\0__STDC_NO_COMPLEX__\0__STDC_NO_THREADS__\0__STDC_NO_VLA__\0", - *ospredefs[] = { - [OSlinux] = "__linux\0__linux__\0linux\0unix\0__unix\0__unix__\0" - }, *archpredefs[] = { - [ISx86_64] = "__x86_64__\0__x86_64\0", - [ISaarch64] = "__aarch64__\0__aarch64\0", - }, cstdver[][8] = { - [STDC89] = "199409L", - [STDC99] = "199901L", - [STDC11] = "201112L", - [STDC23] = "202311L", - }; - - tok_stdc.s = cstdver[ccopt.cstd]; - tok_stdc.len = 7; - - for (int i = 0; i < countof(macs); ++i) - putmac(intern(macs[i].name), &macs[i].m); - putdefs1(cpredefs); - if (target.os != OSunknown) putdef1("__STDC_HOSTED__"); - putdefs1(ospredefs[target.os]); - putdefs1(archpredefs[target.arch]); - - if (ppcmdline.n) { - struct memfile *f; - struct lexer lx[1] = {0}; - lx->fileid = getpredeffile(&f, ""); - assert(!f->p); - lx->ndat = f->n = ppcmdline.n; - vpushn(&ppcmdline, "\0\0\0\0\0\0", 6); - lx->dat = f->p = ppcmdline.p; - lx->tmparena = tmparena; - lx->chrbuf0 = countof(lx->chrbuf); - lx->firstdirective = 1; - while (lex(lx, NULL) != TKEOF) ; - } -} - -enum initlexer -initlexer(struct lexer *lx, const char **err, const char *file) -{ - enum { NARENA = 1<<12 }; - static union { char m[sizeof(struct arena) + NARENA]; struct arena *_align; } amem; - static struct arena *tmparena = (void *)amem.m; - - if (!tmparena->cap) tmparena->cap = NARENA; - if (!mtoksbuf.p) vinit(&mtoksbuf, NULL, 1024); - if (!mdyntoksbuf.p) vinit(&mdyntoksbuf, NULL, 256); - if (!macroht.v) addpredefmacros(&tmparena); - - struct memfile *f; - int fileid = openfile(err, &f, file); - if (fileid < 0) - return LXERR; - internstr guard; - if (isfileseen(fileid) && isoncefile(fileid, &guard) && (!guard || findmac(guard))) { - //efmt("skipping %s .. guard %s\n", file, guard ? guard : ""); - return LXFILESKIP; - } - memset(lx, 0, sizeof *lx); - lx->fileid = fileid; - markfileseen(fileid); - - lx->dat = f->p; - lx->ndat = f->n; - lx->tmparena = &tmparena; - lx->chrbuf0 = countof(lx->chrbuf); - lx->firstdirective = 1; - lx->nppcnd0 = nppcnd; - return getfilename(fileid, 0) != file ? LXFILESEEN : LXOK; -} - -/* callback to let lexer release temp memory for arena allocated token data */ -void -lexerfreetemps(struct lexer *lx) -{ - if (!lx->macstk) { - /* some of the tokens could be somewhere in the macro stack */ - freearena(lx->tmparena); - } -} - -void -lexerdump(struct lexer *lx, struct wbuf *out) -{ - struct token prev = {0}, tok; - int file = lx->fileid, line = 1, col = 1; - const char *lastfile = getfilename(file, 0); - bfmt(out, "# %d %'s\n", 1, lastfile); - while (lex(lx, &tok) != TKEOF) { - int tkline, tkcol; - const char *fname = getfilepos(&tkline, &tkcol, tok.span.ex.file, tok.span.ex.off); - if (tok.span.ex.file != file || fname != lastfile) { - file = tok.span.ex.file; - bfmt(out, "\n# %d %'s\n", tkline, fname); - col = 1; - lexerfreetemps(lx); - lastfile = fname; - } else if (line < tkline && tkline - line < 5) { - do - ioputc(out, '\n'); - while (++line != tkline); - col = 1; - } else if (line != tkline) { - bfmt(out, "\n# %d\n", tkline); - line = tkline; - col = 1; - lexerfreetemps(lx); - } else if (prev.t && (tok.space || tokpaste(lx, NULL, &prev, &tok))) { - /* preserve whitespace & paste avoidance */ - ioputc(out, ' '); - ++col; - } - if (col == 1) - for (; col < tkcol; ++col) - ioputc(out, ' '); - line = tkline; - bfmt(out, "%tk", &tok); - col += tok.span.ex.len; - prev = tok; - } - bfmt(out, "\n"); - ioflush(out); -} - -/* vim:set ts=3 sw=3 expandtab: */ -- cgit v1.2.3