aboutsummaryrefslogtreecommitdiffhomepage
path: root/lex.c
diff options
context:
space:
mode:
authorlemon <lsof@mailbox.org>2025-10-19 08:09:09 +0200
committerlemon <lsof@mailbox.org>2025-10-19 08:09:09 +0200
commitdea8fd171acb54b6d9685422d5e391fb55074008 (patch)
tree2c149892f35c5183c9b2a1da4ab437228dc432ef /lex.c
parent3437945692f2b87883a4f066473c9deed50f25f5 (diff)
Organize source files into directories
Diffstat (limited to 'lex.c')
-rw-r--r--lex.c1977
1 files changed, 0 insertions, 1977 deletions
diff --git a/lex.c b/lex.c
deleted file mode 100644
index 951bb5a..0000000
--- a/lex.c
+++ /dev/null
@@ -1,1977 +0,0 @@
-#include "lex.h"
-#include <string.h>
-
-const char *
-intern(const char *s)
-{
- static const char *ht[1<<12];
- static struct { char m[sizeof(struct arena) + (1<<10)]; struct arena *_a; } amem;
- static struct arena *arena;
- uint h, i, n = arraylength(ht);
-
- if (!arena) arena = (void *)amem.m, arena->cap = 1<<10;
-
- i = h = hashs(0, s);
- for (;; ++i) {
- i &= arraylength(ht) - 1;
- if (!ht[i]) {
- return ht[i] = alloccopy(&arena, s, strlen(s)+1, 1);
- } else if (!strcmp(s, ht[i])) {
- return ht[i];
- }
- assert(--n > 0 && "intern full");
- }
-}
-
-static bool
-identkeyword(struct token *tk, const char *s, int len)
-{
- static const struct { const char *s; enum toktag t; enum cstd cstd; } kwtab[] = {
-#define _(kw, cstd) { #kw, TKW##kw, cstd },
-#include "keywords.def"
-#undef _
- };
- int l = 0, h = arraylength(kwtab) - 1, i, cmp;
-
- if (len > TKWMAXLEN_) goto ident;
- /* binary search over sorted array */
- while (l <= h) {
- i = (l + h) / 2;
- cmp = strcmp(kwtab[i].s, s);
- if (cmp < 0) l = i + 1;
- else if (cmp > 0) h = i - 1;
- else if (kwtab[i].cstd <= ccopt.cstd || kwtab[i].s[0] == '_') {
- /* allow future keywords but only if they begin with _ */
- tk->t = kwtab[i].t;
- tk->s = kwtab[i].s;
- return kwtab[i].cstd <= ccopt.cstd;
- } else break;
- }
-ident:
- tk->t = TKIDENT;
- tk->s = intern(s);
- tk->len = len;
- return 1;
-}
-
-/* fill internal circular character buffer with input after translation phase 1 & 2
- * (trigraph substitution and backslash-newline deletion */
-static void
-fillchrbuf(struct lexer *lx)
-{
- bool trigraph = ccopt.trigraph;
- const uchar *p = lx->dat + lx->idx;
- int i = lx->chrbuf0, idx = lx->idx, c;
-
- while (lx->nchrbuf < arraylength(lx->chrbuf)) {
- int n;
- while (!memcmp(p, "\\\n", n = 2) || (trigraph && !memcmp(p, "\?\?/\n", n = 4))) {
- idx += n;
- p += n;
- addfileline(lx->fileid, idx);
- }
- if (idx >= lx->ndat)
- c = TKEOF;
- else if (trigraph && ((p[0] == '?') & (p[1] == '?'))) {
- switch (p[2]) {
- case '=': c = '#'; break;
- case '(': c = '['; break;
- case ')': c = ']'; break;
- case '!': c = '|'; break;
- case '<': c = '{'; break;
- case '>': c = '}'; break;
- case '-': c = '~'; break;
- case '/': c = '\\'; break;
- case '\'': c = '^'; break;
- default: goto NoTrigraph;
- }
- p += 3;
- idx += 3;
- } else {
- NoTrigraph:
- ++idx;
- if ((c = *p++) == '\n')
- addfileline(lx->fileid, idx);
- }
- lx->chrbuf[i % arraylength(lx->chrbuf)] = c;
- lx->chridxbuf[i % arraylength(lx->chrbuf)] = idx;
- ++lx->nchrbuf;
- ++i;
- }
- lx->idx = idx;
-}
-
-static int
-next(struct lexer *lx)
-{
- int c;
-
- if (lx->nchrbuf == 0)
- fillchrbuf(lx);
- lx->chridx = lx->chridxbuf[lx->chrbuf0];
- c = lx->chrbuf[lx->chrbuf0];
- lx->eof = c == TKEOF;
- lx->chrbuf0 = (lx->chrbuf0 + 1) % arraylength(lx->chrbuf);
- --lx->nchrbuf;
- return c;
-}
-
-static int
-peek(struct lexer *lx, int off)
-{
- assert(off < arraylength(lx->chrbuf));
- if (lx->nchrbuf < off+1)
- fillchrbuf(lx);
- return lx->chrbuf[(lx->chrbuf0 + off) % arraylength(lx->chrbuf)];
-}
-
-static bool
-match(struct lexer *lx, int c)
-{
- if (!lx->eof && peek(lx, 0) == c) {
- next(lx);
- return 1;
- }
- return 0;
-}
-
-static bool
-aissep(int c) {
- static const bool tab[] = {
- ['('] = 1, [')'] = 1, ['['] = 1, [']'] = 1,
- ['{'] = 1, ['}'] = 1, ['.'] = 1, [','] = 1,
- [';'] = 1, ['?'] = 1, ['+'] = 1, ['-'] = 1,
- ['*'] = 1, ['/'] = 1, ['&'] = 1, ['|'] = 1,
- ['^'] = 1, ['~'] = 1, ['='] = 1, ['\''] = 1,
- ['"'] = 1, ['<'] = 1, ['>'] = 1, [':'] = 1,
- ['@'] = 1, ['#'] = 1, ['%'] = 1, ['\\'] = 1,
- ['`'] = 1, ['!'] = 1,
- };
- if (!aisprint(c) || aisspace(c))
- return 1;
- return (uint)c < sizeof(tab) && tab[c];
-}
-
-enum typetag
-parsenumlit(uvlong *outi, double *outf, const struct token *tk, bool ispp)
-{
- if (tk->t == TKCHRLIT) {
- uvlong n = 0;
- if (!tk->wide) {
- for (int i = 0; i < tk->len; ++i)
- n = n << 8 | (uchar)tk->s[i];
- } else if (tk->wide == 1) {
- n = tk->ws16[0];
- } else {
- assert(tk->wide == 2);
- n = tk->ws32[0];
- }
- if (outi) *outi = n;
- return TYINT;
- } else if (memchr(tk->s, '.', tk->len)) {
- extern double strtod(const char *, char **);
- double f;
- char buf[80], *suffix;
- Float: /* float literal */
- assert(tk->len < sizeof buf - 1 && "numlit too big");
- memcpy(buf, tk->s, tk->len);
- buf[tk->len] = 0;
- f = strtod(buf, &suffix);
- if (suffix == buf)
- return 0;
- if (!*suffix) {
- if (outf) *outf = f;
- return TYDOUBLE;
- } else if ((suffix[0]|0x20) == 'f' && !suffix[1]) {
- if (outf) *outf = f;
- return TYFLOAT;
- } else if ((suffix[0]|0x20) == 'l' && !suffix[1]) {
- if (outf) *outf = f;
- return TYLDOUBLE;
- }
- return 0;
- } else { /* int literal */
- static uvlong max4typ[TYUVLONG-TYINT+1];
- uvlong n = 0;
- int base = 10, nsx;
- bool dec, u = 0, longlongok = ccopt.cstd >= STDC99 || !ccopt.pedant;
- enum typetag ty = 0;
- const char *sx; /*suffix*/
- char c;
-
- if (!max4typ[0])
- for (ty = TYINT; ty <= TYUVLONG; ++ty)
- max4typ[ty-TYINT] = ((1ull << (8*targ_primsizes[ty]-1))-1) << isunsignedt(ty) | 1;
-
- sx = tk->s;
- if (tk->len > 2 && sx[0] == '0') {
- if ((sx[1]|32) == 'x') sx += 2, base = 16; /* 0x.. */
- else if ((sx[1]|32) == 'b') sx += 2, base = 2; /* 0b.. */
- else base = 8; /* 0.. */
- }
- for (; sx < tk->s + tk->len; ++sx) {
- if (base < 16) {
- if (!in_range(c = *sx, '0', '0'+base-1)) break;
- n = n * base + c - '0';
- } else {
- n *= base;
- if (in_range(c = *sx, '0', '9')) n += c - '0';
- else if (in_range(c|32, 'a', 'f')) n += 0xa + (c|32) - 'a';
- else break;
- }
- }
- dec = base == 10;
- nsx = tk->len - (sx - tk->s);
-
- if (nsx == 0) /* '' */ {}
- else if ((sx[0]|32) == 'u') {
- u = 1;
- if (nsx == 1) /* 'u' */ {}
- else if ((sx[1]|32) == 'l') {
- if (nsx == 2) /* 'ul' */ goto L;
- if (sx[1] == sx[2] && nsx == 3) /* 'ull' */ goto LL;
- return 0;
- } else return 0;
- } else if ((sx[0]|32) == 'l') {
- if (nsx == 1) /* 'l' */ goto L;
- if ((sx[1]|32) == 'u' && nsx == 2) /* 'lu' */ { u=1; goto L; }
- if (sx[1] == sx[0]) {
- if (nsx == 2) /* 'll' */ goto LL;
- if ((sx[2]|32) == 'u' && nsx == 3) /* 'llu' */ { u=1; goto LL; }
- }
- return 0;
- } else if ((sx[0]|32) == 'e' || (sx[0]|32) == 'p')
- goto Float;
- else return 0;
-
-#define I(T) if (n <= max4typ[T - TYINT]) { ty = T; goto Ok; }
- I(TYINT)
- if (u || !dec) I(TYUINT)
- L:
- I(TYLONG)
- if (u || !dec || !longlongok) I(TYULONG)
- if (longlongok) {
- LL:
- I(TYVLONG)
- if (u || !dec) I(TYUVLONG)
- }
- if (ispp) { ty = TYUVLONG; goto Ok; }
-#undef I
- /* too big */
- if (outi) *outi = n;
- return 0;
- Ok:
- if (u && issignedt(ty)) ++ty; /* make unsigned */
- if (outi) *outi = n;
- if (ispp) {
- if (u) return TYUVLONG;
- else if (n <= max4typ[TYVLONG-TYINT]) return TYVLONG;
- }
- if (ty >= TYVLONG && !longlongok)
- warn(&tk->span, "'long long' in %M is an extension");
- return ty;
- }
-}
-
-static void
-readstrchrlit(struct lexer *lx, struct token *tk, char delim, int wide)
-{
- int c, i;
- uchar tmp[80];
- vec_of(uchar) b = VINIT(tmp, sizeof tmp);
- struct span span = {0};
- uint n, beginoff, idx;
- beginoff = idx = lx->chridx;
-
- while ((c = next(lx)) != delim) {
- if (c == '\n' || c == TKEOF) {
- Noterm:
- span.sl = (struct span0) { idx, lx->chridx - idx, lx->fileid };
- error(&span, "missing terminating %c character", delim);
- break;
- } else if (c == '\\') {
- span.sl = (struct span0) { idx, lx->chridx - idx, lx->fileid };
- switch (c = next(lx)) {
- case '\n': case TKEOF:
- goto Noterm;
- case '\'': c = '\''; break;
- case '\\': c = '\\'; break;
- case '"': c = '"'; break;
- case '?': c = '?'; break;
- case 'a': c = '\a'; break;
- case 'b': c = '\b'; break;
- case 'f': c = '\f'; break;
- case 'n': c = '\n'; break;
- case 'r': c = '\r'; break;
- case 't': c = '\t'; break;
- case 'v': c = '\v'; break;
- case 'x': case 'X': /* hex */
- n = 0;
- if (!aisxdigit(peek(lx, 0))) goto Badescseq;
- do {
- c = next(lx);
- if (c-'0' < 10) n = n<<4 | (c-'0');
- else n = n<<4 | (10 + (c|0x20)-'a');
- } while (aisxdigit(peek(lx, 0)));
- if (n > 0xFF) {
- span.sl.len = lx->chridx - span.sl.off;
- error(&span, "hex escape sequence out of range");
- }
- c = n & 0xFF;
- break;
- default:
- if (aisodigit(c)) { /* octal */
- n = c-'0';
- for (i = 2; i--;) {
- if (!aisodigit(peek(lx, 0))) break;
- n = n<<3 | ((c = next(lx))-'0');
- }
- if (n > 0377) {
- span.sl.len = lx->chridx - span.sl.off;
- error(&span, "octal escape sequence out of range");
- }
- c = n;
- break;
- }
- Badescseq:
- span.sl.len = lx->chridx - span.sl.off;
- error(&span, "invalid escape sequence");
- }
- }
- vpush(&b, c);
- idx = lx->chridx;;
- }
- if (delim == '"') {
- tk->t = TKSTRLIT;
- tk->len = b.n;
- if ((tk->wide = wide)) {
- tk->litlit = 0;
- if (wide == 1)
- tk->ws16 = utf8to16(&tk->len, lx->tmparena, b.p, b.n);
- else
- tk->ws32 = utf8to32(&tk->len, lx->tmparena, b.p, b.n);
- } else if (lx->chridx - beginoff == tk->len + 1) {
- tk->litlit = 1;
- tk->s = (char *)&lx->dat[beginoff];
- } else {
- tk->litlit = 0;
- vpush(&b, 0);
- tk->s = alloccopy(lx->tmparena, b.p, b.n, 1);
- }
- } else {
- if (b.n == 0) {
- span.sl = (struct span0) { idx, lx->chridx - idx, lx->fileid };
- error(&span, "empty character literal");
- } else if (b.n > targ_primsizes[TYINT]) {
- span.sl = (struct span0) { idx, lx->chridx - idx, lx->fileid };
- error(&span, "multicharacter literal too long");
- }
- tk->t = TKCHRLIT;
- tk->len = b.n;
- if ((tk->wide = wide)) {
- tk->litlit = 0;
- if (wide == 1)
- tk->ws16 = utf8to16(&tk->len, lx->tmparena, b.p, b.n);
- else
- tk->ws32 = utf8to32(&tk->len, lx->tmparena, b.p, b.n);
- } else if (lx->chridx - beginoff == tk->len + 1) {
- tk->litlit = 1;
- tk->s = (char *)&lx->dat[beginoff];
- } else {
- tk->litlit = 0;
- tk->s = alloccopy(lx->tmparena, b.p, tk->len, 1);
- }
- }
- vfree(&b);
-}
-
-/* for #include directive, read "header" or <header> */
-static void
-readheadername(struct lexer *lx, struct token *tk, char delim)
-{
- int c;
- uchar tmp[80];
- vec_of(uchar) b = VINIT(tmp, sizeof tmp);
- struct span span = {0};
- uint beginoff, idx;
- beginoff = idx = lx->chridx;
-
- while ((c = next(lx)) != delim) {
- if (c == '\n' || c == TKEOF) {
- span.sl = (struct span0) { idx, lx->chridx - idx, lx->fileid };
- error(&span, "missing terminating %c character", delim);
- break;
- }
- vpush(&b, c);
- idx = lx->chridx;;
- }
- tk->t = delim == '"' ? TKPPHDRQ : TKPPHDRH;
- tk->len = b.n;
- if (lx->chridx - beginoff == tk->len + 1) {
- tk->litlit = 1;
- tk->s = (char *)&lx->dat[beginoff];
- } else {
- tk->litlit = 0;
- vpush(&b, 0);
- tk->s = alloccopy(lx->tmparena, b.p, b.n, 1);
- }
- vfree(&b);
-}
-
-/* matches "<digit> | <identifier-nondigit> | '.' | ([eEpP][+-])" */
-static bool
-isppnum(char prev, char c)
-{
- if (!aissep(c) || c == '.')
- return 1;
- if (c == '+' || c == '-')
- return (prev|0x20) == 'e' || (prev|0x20) == 'p';
- return 0;
-}
-
-/* special mode to parse header path for #include */
-static bool lexingheadername = 0;
-
-static int
-lex0(struct lexer *lx, struct token *tk)
-{
- int idx, c, q;
-
-#define RET(t_) do { tk->t = (t_); goto End; } while (0)
-
-Begin:
- idx = lx->chridx;
- switch (c = next(lx)) {
- case ' ': case '\r': case '\t':
- goto Begin;
- break;
- case '(': case ')': case ',': case ':':
- case ';': case '?': case '[': case ']':
- case '{': case '}': case '~': case '$':
- case '@': case '`': case '\\': case TKEOF: case '\n':
- RET(c);
- case '!':
- if (match(lx, '=')) RET(TKNEQ);
- RET(c);
- case '#':
- if (match(lx, '#')) RET(TKPPCAT);
- RET(c);
- case '+':
- if (match(lx, '+')) RET(TKINC);
- if (match(lx, '=')) RET(TKSETADD);
- RET(c);
- case '-':
- if (match(lx, '-')) RET(TKDEC);
- if (match(lx, '=')) RET(TKSETSUB);
- if (match(lx, '>')) RET(TKARROW);
- RET(c);
- case '*':
- if (match(lx, '=')) RET(TKSETMUL);
- RET(c);
- case '/':
- if (match(lx, '=')) RET(TKSETDIV);
- if (match(lx, '/')) {
- /* // comment */
- while (!lx->eof && !match(lx, '\n'))
- next(lx);
- goto Begin;
- }
- if (match(lx, '*')) {
- /* comment */
- while (peek(lx, 0) != '*' || peek(lx, 1) != '/') {
- if (next(lx) == TKEOF) {
- struct span span = {{ idx, lx->chridx - idx, lx->fileid }};
- fatal(&span, "unterminated multiline comment");
- }
- }
- next(lx), next(lx);
- goto Begin;
- }
- RET(c);
- case '%':
- if (match(lx, '=')) RET(TKSETREM);
- RET(c);
- case '^':
- if (match(lx, '=')) RET(TKSETXOR);
- RET(c);
- case '=':
- if (match(lx, '=')) RET(TKEQU);
- RET(c);
- case '<':
- if (lexingheadername) {
- readheadername(lx, tk, '>');
- lexingheadername = 0;
- goto End;
- }
- if (match(lx, '=')) RET(TKLTE);
- if (match(lx, '<')) RET(match(lx, '=') ? TKSETSHL : TKSHL);
- RET(c);
- case '>':
- if (match(lx, '=')) RET(TKGTE);
- if (match(lx, '>')) RET(match(lx, '=') ? TKSETSHR : TKSHR);
- RET(c);
- case '&':
- if (match(lx, '&')) RET(TKLOGAND);
- if (match(lx, '=')) RET(TKSETAND);
- RET(c);
- case '|':
- if (match(lx, '|')) RET(TKLOGIOR);
- if (match(lx, '=')) RET(TKSETIOR);
- RET(c);
- case '"':
- if (lexingheadername) {
- readheadername(lx, tk, '"');
- lexingheadername = 0;
- } else {
- case '\'':
- tk->wideuni = 0;
- readstrchrlit(lx, tk, c, 0);
- }
- goto End;
- case '.':
- if (peek(lx, 0) == '.' && peek(lx, 1) == '.') {
- next(lx), next(lx);
- RET(TKDOTS);
- } else if (aisdigit(peek(lx, 0))) {
- goto Numlit;
- }
- RET(c);
- case 'L':
- if (match(lx, (q = '\'')) || match(lx, (q = '"'))) {
- tk->wideuni = 0;
- readstrchrlit(lx, tk, q, /* wide */ targ_primsizes[targ_wchartype] == 2 ? 1 : 2);
- goto End;
- }
- /* fallthru */
- default:
- if (aisdigit(c)) Numlit: {
- char tmp[70];
- int n = 0;
- tmp[n++] = c;
- while (isppnum(tmp[n-1], peek(lx, 0))) {
- assert(n < arraylength(tmp)-1 && "too big");
- tmp[n++] = next(lx);
- }
- tmp[n] = 0;
- tk->len = n;
- if (n == lx->chridx - idx) tk->s = (char *)&lx->dat[idx];
- else {
- tk->s = alloccopy(lx->tmparena, tmp, n, 1);
- }
- RET(TKNUMLIT);
- } else if (c == '_' || aisalpha(c)) {
- char tmp[70];
- int n = 0;
- tmp[n++] = c;
- while (!aissep(c = peek(lx, 0))) {
- assert(n < arraylength(tmp)-1 && "too big");
- tmp[n++] = next(lx);
- }
- tmp[n] = 0;
- if (!identkeyword(tk, tmp, n) && ccopt.pedant)
- warn(&(struct span) {{ idx, lx->chridx - idx, lx->fileid }},
- "%'tk in %M is an extension", tk);
- goto End;
- }
- case 0: if (lx->idx >= lx->ndat) RET(TKEOF);
- }
- fatal(&(struct span) {{ idx, lx->chridx - idx, lx->fileid }},
- "unexpected character %'c at %d", c, idx);
-End:
- tk->span.sl.file = lx->fileid;
- tk->span.sl.off = idx;
- tk->span.sl.len = lx->chridx - idx;
- tk->span.ex = tk->span.sl;
- return tk->t;
-#undef RET
-}
-
-/****************/
-/* PREPROCESSOR */
-/****************/
-
-struct macro {
- const char *name; /* interned. NULL for tombstone */
- const char **param;
- struct span0 span;
- uchar nparam;
- bool predefined,
- special,
- fnlike,
- variadic;
- union {
- void (*handler)(struct lexer *, struct token *);
- struct rlist {
- const struct token *tk;
- int n;
- } rlist;
- };
-};
-
-#define isppident(tk) (in_range((tk).t, TKIDENT, TKWEND_))
-
-static vec_of(struct macro) macros;
-static ushort macroht[1<<12];
-
-static bool
-tokequ(const struct token *a, const struct token *b)
-{
- if (a->t != b->t) return 0;
- if (a->t == TKNUMLIT || a->t == TKSTRLIT || a->t == TKCHRLIT) {
- if (a->len != b->len) return 0;
- return !memcmp(a->s, b->s, a->len);
- } else if (a->t == TKIDENT) {
- return a->s == b->s;
- } else if (a->t == TKPPMACARG || a->t == TKPPMACSTR) {
- return a->argidx == b->argidx;
- }
- return 1;
-}
-
-static bool /* whitespace separating tokens? */
-wsseparated(const struct token *l, const struct token *r)
-{
- if (l->span.sl.file != r->span.sl.file) return 1;
- return l->span.sl.off + l->span.sl.len != r->span.sl.off;
-}
-
-static bool
-macroequ(const struct macro *a, const struct macro *b)
-{
- int i;
- if (a->name != b->name) return 0;
- if (a->special != b->special) return 0;
- if (a->fnlike != b->fnlike || a->variadic != b->variadic) return 0;
- if (a->fnlike) {
- if (a->nparam != b->nparam) return 0;
- for (i = 0; i < a->nparam; ++i)
- if (a->param[i] != b->param[i])
- return 0;
- }
- if (a->special) return a->handler == b->handler;
- if (a->rlist.n != b->rlist.n) return 0;
- for (i = 0; i < a->rlist.n; ++i) {
- const struct token *tka = a->rlist.tk, *tkb = b->rlist.tk;
- if (!tokequ(&tka[i], &tkb[i]))
- return 0;
- if (i && wsseparated(&tka[i-1], &tka[i]) != wsseparated(&tkb[i-1], &tkb[i]))
- return 0;
- }
- return 1;
-}
-
-static void
-freemac(struct macro *mac)
-{
- if (mac->special) return;
- free(mac->param);
- free((void *)mac->rlist.tk);
-}
-
-static struct macro *
-putmac(struct macro *mac)
-{
- uint h, i, n = arraylength(macroht);
- struct macro *slot;
-
- assert(mac->name);
- i = h = ptrhash(mac->name);
- for (;; ++i) {
- i &= arraylength(macroht) - 1;
- if (!macroht[i]) {
- macroht[i] = macros.n+1;
- vpush(&macros, *mac);
- return &macros.p[macros.n - 1];
- } else if ((slot = &macros.p[macroht[i]-1])->name == mac->name) {
- if (!macroequ(slot, mac)) {
- if (slot->predefined)
- warn(&(struct span){mac->span}, "redefining builtin macro");
- else {
- warn(&(struct span){mac->span}, "redefining macro");
- note(&(struct span){slot->span}, "previous definition:");
- }
- freemac(slot);
- *slot = *mac;
- } else {
- freemac(mac);
- }
- return slot;
- } else if (!slot->name) { /* was tomb */
- *slot = *mac;
- return slot;
- }
- assert(--n && "macro limit");
- }
-}
-
-static void
-delmac(const char *name)
-{
- uint h, i;
-
- i = h = ptrhash(name);
- for (;; ++i) {
- struct macro *slot;
-
- i &= arraylength(macroht) - 1;
- if (!macroht[i]) {
- return;
- } else if ((slot = &macros.p[macroht[i]-1])->name == name) {
- freemac(slot);
- memset(slot, 0, sizeof *slot);
- return;
- }
- }
-}
-
-static struct macro *
-findmac(const char *name)
-{
- uint h, i, n = arraylength(macroht);
-
- i = h = ptrhash(name);
- for (; n--; ++i) {
- i &= arraylength(macroht) - 1;
- if (!macroht[i]) {
- return NULL;
- } else if (macros.p[macroht[i]-1].name == name) {
- return &macros.p[macroht[i]-1];
- }
- }
- return NULL;
-}
-
-static void popmac(struct lexer *);
-
-static void
-ppskipline(struct lexer *lx)
-{
- while (lx->macstk) popmac(lx);
- while (peek(lx, 0) != '\n' && peek(lx, 0) != TKEOF)
- next(lx);
-}
-
-static bool
-tokpaste(struct lexer *lx, struct token *dst, const struct token *l, const struct token *r)
-{
- char *s;
- dst->span = l->span;
- if (dst->span.ex.file == r->span.ex.file && dst->span.ex.off < r->span.ex.off)
- joinspan(&dst->span.ex, r->span.ex);
- if (isppident(*l) && (isppident(*r) || r->t == TKNUMLIT)) {
- /* foo ## bar ; foo ## 123 */
- dst->t = TKIDENT;
- } else if (l->t == TKNUMLIT && (isppident(*r) || r->t == TKNUMLIT)) {
- /* 0x ## abc ; 213 ## 456 */
- dst->t = TKNUMLIT;
- } else if (l->t && !r->t) {
- *dst = *l;
- return 1;
- } else if (!l->t && r->t) {
- *dst = *r;
- return 1;
- } else {
- static const struct { char s[2]; char t; } tab[] = {
- {"==", TKEQU}, {"!=", TKNEQ}, {"<=", TKLTE}, {">=", TKGTE},
- {">>", TKSHR}, {"<<", TKSHL}, {"++", TKINC}, {"--", TKDEC},
- {"->", TKARROW}, {"##", TKPPCAT}, {"&&", TKLOGAND}, {"||", TKLOGIOR},
- {"+=", TKSETADD}, {"-=", TKSETSUB}, {"*=", TKSETMUL}, {"/=", TKSETDIV},
- {"%=", TKSETREM}, {"|=", TKSETIOR}, {"^=", TKSETXOR}, {"&=", TKSETAND},
- {{TKSHL,'='}, TKSETSHL}, {{TKSHR,'='}, TKSETSHR}
- };
- struct span span = l->span;
-
- for (int i = 0; i < arraylength(tab); ++i)
- if (tab[i].s[0] == l->t && tab[i].s[1] == r->t)
- return dst->t = tab[i].t, 1;
-
- joinspan(&span.ex, r->span.ex);
- error(&span, "pasting %'tk and %'tk does not form a valid preprocessing token", l, r);
- return 0;
- }
-
- /* shared for ident,keyword,numlit */
- dst->len = l->len + r->len;
- s = alloc(lx->tmparena, dst->len + 1, 1);
- memcpy(s, l->s, l->len);
- memcpy(s + l->len, r->s, r->len);
- s[l->len + r->len] = 0;
- if (dst->t == TKIDENT) identkeyword(dst, s, dst->len);
- else dst->s = s;
- return 1;
-}
-
-static void
-ppdefine(struct lexer *lx)
-{
- struct token tk0, tk;
- int newmacidx;
- struct macro mac = {0};
- vec_of(struct token) rlist = {0};
- vec_of(const char *) params = {0};
-
- lex0(lx, &tk0);
- if (!isppident(tk0)) {
- error(&tk0.span, "macro name missing");
- ppskipline(lx);
- return;
- }
- mac.name = tk0.s;
- mac.span = tk0.span.sl;
-
- if (match(lx, '(')) {
- /* gather params */
- mac.fnlike = 1;
- while (lex0(lx, &tk) != ')') {
- if (mac.variadic) {
- error(&tk.span, "expected `)' after `...'");
- if (tk.t == TKEOF)
- return;
- else break;
- }
- if (params.n > 0) {
- if (tk.t != ',')
- error(&tk.span, "expected `,' or `)'");
- if (tk.t == TKEOF) return;
- lex0(lx, &tk);
- }
- if (isppident(tk))
- vpush(&params, tk.s);
- else if (tk.t == TKDOTS) {
- mac.variadic = 1;
- vpush(&params, intern("__VA_ARGS__"));
- } else {
- error(&tk.span, "expected parameter name or `)'");
- if (tk.t == TKEOF)
- return;
- }
- }
- mac.param = params.p;
- mac.nparam = params.n;
- }
-
- newmacidx = macros.n;
- /* gather replacement list */
- while (lex0(lx, &tk) != '\n' && tk.t != TKEOF) {
- if (!rlist.n && !wsseparated(&tk0, &tk))
- warn(&tk.span, "no whitespace after macro name");
- if (mac.fnlike && isppident(tk)) {
- for (int i = 0; i < mac.nparam; ++i) {
- if (tk.s == mac.param[i]) {
- tk.argidx = i;
- tk.macidx = newmacidx;
- if (rlist.n > 0 && rlist.p[rlist.n - 1].t == '#') {
- tk.t = TKPPMACSTR;
- rlist.p[rlist.n - 1] = tk;
- goto Next;
- } else {
- tk.t = TKPPMACARG;
- break;
- }
- }
- }
- }
- if (rlist.n > 1 && rlist.p[rlist.n-1].t == TKPPCAT) {
- struct token new;
- if (rlist.p[rlist.n-2].t != TKPPMACARG && tk.t != TKPPMACARG
- && tokpaste(lx, &new, &rlist.p[rlist.n-2], &tk))
- {
- /* trivial concatenations */
- rlist.p[rlist.n-2] = new;
- --rlist.n;
- continue;
- }
- }
- vpush(&rlist, tk);
- Next:;
- }
- mac.rlist.tk = rlist.p;
- mac.rlist.n = rlist.n;
- putmac(&mac);
-}
-
-static void
-ppundef(struct lexer *lx)
-{
- struct token tk;
-
- lex0(lx, &tk);
- if (!isppident(tk)) {
- error(&tk.span, "macro name missing");
- ppskipline(lx);
- return;
- }
- delmac(tk.s);
-}
-
-/* kludge for proper expansion in the face of nested macros with arguments,
- * stringifying, etc */
-static bool noexpandmac;
-
-static struct macrostack {
- struct macrostack *link;
- struct rlist rlist;
- struct span0 exspan;
- int idx;
- int macno:28;
- uint prevnoexpandmac:1;
- uint stop:1;
-} mstk[64], *mfreelist;
-
-static void
-pushmacstk(struct lexer *lx, const struct span *span, const struct macrostack *m)
-{
- struct macrostack *l;
- if (!(l = mfreelist)) fatal(span, "macro depth limit reached");
- l = mfreelist;
- mfreelist = l->link;
- l->link = lx->macstk;
- l->rlist = m->rlist;
- l->macno = m->macno;
- l->idx = 0;
- l->stop = m->stop;
- l->exspan = span->ex;
- l->prevnoexpandmac = noexpandmac;
- lx->macstk = l;
-}
-
-static void
-popmac(struct lexer *lx)
-{
- struct macrostack *stk;
-
- assert(stk = lx->macstk);
- do {
- noexpandmac = stk->prevnoexpandmac;
- if (stk->macno >= 0 && !macros.p[stk->macno].special
- && stk->rlist.tk != macros.p[stk->macno].rlist.tk) {
- free((void *)stk->rlist.tk);
- }
- lx->macstk = stk->link;
- stk->link = mfreelist;
- mfreelist = stk;
- } while ((stk = lx->macstk) && stk->idx >= stk->rlist.n && !stk->stop);
-}
-
-static void expandfnmacro(struct lexer *lx, struct span *span, struct macro *mac);
-
-static bool
-tryexpand(struct lexer *lx, struct token *tk)
-{
- static bool inimstk;
- int macidx, i;
- struct span span = tk->span;
- struct macrostack *l;
- struct macro *mac = NULL;
-
- if (!inimstk) {
- inimstk = 1;
- for (i = 0; i < arraylength(mstk); ++i) {
- mstk[i].link = mfreelist;
- mfreelist = &mstk[i];
- }
- }
-
- if (noexpandmac || !isppident(*tk) || !(mac = findmac(tk->s)))
- return 0;
-
- macidx = mac - macros.p;
- /* prevent infinite recursion */
- for (l = lx->macstk; l; l = l->link)
- if (l->macno == macidx)
- return 0;
-
- if (mac->special) {
- mac->handler(lx, tk);
- pushmacstk(lx, &span, &(struct macrostack){
- .rlist = { alloccopy(lx->tmparena, tk, sizeof *tk, 0), 1 },
- .macno = -1,
- .idx = 0,
- });
- } else if (mac->fnlike) {
- struct token *tk_ = tk;
- struct token tk;
- noexpandmac = 1;
- if (lex(lx, &tk) != '(') {
- /* cannot backtrack here, so this is a kludge to reexpand <ident> <token> */
- struct token *tk2 = xmalloc(sizeof *tk2 * 2);
- tk2[0] = *tk_, tk2[1] = tk;
- noexpandmac = 0;
- pushmacstk(lx, &span, &(struct macrostack) {
- .rlist = { tk2, 2 },
- .exspan = span.ex,
- .macno = macidx,
- });
- return 1;
- }
-
- expandfnmacro(lx, &span, mac);
- } else if (mac->rlist.n) {
- pushmacstk(lx, &span, &(struct macrostack){
- .rlist = mac->rlist,
- .macno = macidx,
- .idx = 0,
- });
- }
- return 1;
-}
-
-static void
-expandfnmacro(struct lexer *lx, struct span *span, struct macro *mac)
-{
- vec_of(struct token) argsbuf = {0}, /* argument tokens pre-expansion */
- rlist2 = {0}; /* macro replacement list with arguments subsituted */
- struct argtks { int idx, n; } args[100]; /* index,n into argsbuf */
- struct span excessspan;
- int cur, len, i, bal, narg;
- struct token tk;
- bool toomany = 0;
-
- /* we push all arg tokens to buffer, each of args[i] is a slice (idx..idx+n) of the vector;
- * while we're building the list, args[i].tk points to &tk + idx, because rlist.p can move,
- * then we fix them up in the end to point to rlist.p + idx */
-
- cur = i = bal = len = narg = 0;
- while ((lex(lx, &tk) != ')' || bal != 0) && tk.t != TKEOF) {
- if (tk.t == ',' && bal == 0) {
- ++narg;
- if (i == mac->nparam-1 && !mac->variadic) {
- excessspan = tk.span;
- toomany = 1;
- } else if (i < mac->nparam - mac->variadic) {
- args[i].idx = cur;
- args[i].n = len;
- cur = argsbuf.n;
- len = 0;
- ++i;
- } else if (mac->variadic) {
- vpush(&argsbuf, tk);
- ++len;
- }
- } else if (!toomany) {
- if (tk.t == '(' || tk.t == '[') ++bal;
- else if (tk.t == ')' || tk.t == ']') --bal;
- vpush(&argsbuf, tk);
- ++len;
- }
- }
- noexpandmac = 0;
- if (tk.t == TKEOF)
- error(span, "unterminated function-like macro invocation");
- else if (i < mac->nparam) {
- ++narg;
- args[i].idx = cur;
- args[i].n = len;
- cur = argsbuf.n;
- len = 0;
- ++i;
- }
- joinspan(&span->ex, tk.span.ex);
- if (narg < mac->nparam)
- error(span, "macro `%s' passed %d arguments, but takes %d", mac->name, narg, mac->nparam);
- else if (toomany) {
- joinspan(&excessspan.ex, tk.span.ex);
- error(&excessspan, "macro `%s' passed %d arguments, but takes just %d", mac->name, narg, mac->nparam);
- }
-
- /* make new rlist with args replaced */
- if (mac->nparam) {
- struct token lhsargforpaste;
- bool lhsargpaste = 0, rhsargpaste = 0;
- for (int i = 0; i < mac->rlist.n; ++i) {
- struct argtks *arg;
- tk = mac->rlist.tk[i];
- if (tk.t == TKPPCAT) {
- if (i > 0 && i < mac->rlist.n-1) {
- const struct token *lhs = &mac->rlist.tk[i-1], *rhs = &mac->rlist.tk[i+1];
- struct token new;
- if (lhs->t != TKPPMACARG && rhs->t != TKPPMACARG) {
- /* trivial case should have been handled when defining */
- assert(0 && "## ?");
- } else if (rhs->t != TKPPMACARG) {
- assert(lhsargpaste);
- if (tokpaste(lx, &new, &lhsargforpaste, rhs)) {
- vpush(&rlist2, new);
- ++i;
- continue;
- }
- lhsargpaste = 0;
- } else {
- if (lhs->t != TKPPMACARG) {
- --rlist2.n;
- lhsargforpaste = *lhs;
- }
- rhsargpaste = 1;
- continue;
- }
- }
- }
- if (tk.t != TKPPMACARG && tk.t != TKPPMACSTR) {
- vpush(&rlist2, tk);
- continue;
- }
-
- arg = &args[tk.argidx];
- if (tk.t == TKPPMACARG) {
- struct macrostack *l;
- lhsargpaste = i < mac->rlist.n-1 && mac->rlist.tk[i+1].t == TKPPCAT;
- if (arg->n == 0) {
- if (lhsargpaste) {
- lhsargforpaste.t = 0;
- lhsargforpaste.span = tk.span;
- }
- if (rhsargpaste) {
- rhsargpaste = 0;
- vpush(&rlist2, lhsargforpaste);
- }
- continue;
- }
- pushmacstk(lx, &tk.span, &(struct macrostack) {
- .rlist = {argsbuf.p + arg->idx, arg->n - lhsargpaste},
- .macno = -1,
- .idx = 0,
- .stop = 1,
- });
- l = lx->macstk;
- if (rhsargpaste) {
- struct token new;
- rhsargpaste = 0;
- if (tokpaste(lx, &new, &lhsargforpaste, &l->rlist.tk[0])) {
- l->idx = 1;
- vpush(&rlist2, new);
- }
- }
- while (lex(lx, &tk) != TKEOF)
- vpush(&rlist2, tk);
- assert(lx->macstk == l);
- popmac(lx);
- if (lhsargpaste)
- lhsargforpaste = argsbuf.p[arg->idx + arg->n-1];
- } else { /* PPMACSTR */
- char tmp[100];
- struct wbuf buf = MEMBUF(tmp, sizeof tmp);
- int n = 0;
-
- // XXX this is wrong bc the string literal produced should be re-parsed later
- // i.e. stringifying the token sequence '\n' should ultimately produce a
- // string with an actual newline, not {'\\','n'}
- Redo:
- for (int i = 0; i < arg->n; ++i) {
- struct token *tk = &argsbuf.p[arg->idx + i];
- if (i > 0 && wsseparated(tk-1, tk))
- n += bfmt(&buf, " ");
- n += bfmt(&buf, "%tk", tk);
- }
- ioputc(&buf, 0);
- if (buf.err) {
- struct wbuf new = MEMBUF(alloc(lx->tmparena, n+1, 1), n+1);
- assert(buf.buf == tmp);
- memcpy(&buf, &new, sizeof buf);
- goto Redo;
- }
- tk.t = TKSTRLIT;
- tk.wide = 0;
- tk.s = buf.buf != tmp ? buf.buf : alloccopy(lx->tmparena, buf.buf, buf.len, 1);
- tk.len = buf.len-1;
- vpush(&rlist2, tk);
- }
- }
-
- if (rlist2.n) {
- pushmacstk(lx, span, &(struct macrostack){
- .rlist = { rlist2.p, rlist2.n },
- .macno = mac - macros.p,
- });
- }
- } else if (mac->rlist.n) {
- pushmacstk(lx, span, &(struct macrostack){
- .rlist = mac->rlist,
- .macno = mac - macros.p,
- });
- }
- vfree(&argsbuf);
-}
-
-static bool
-advancemacro(struct lexer *lx, struct token *tk)
-{
- struct rlist rl;
- assert(lx->macstk);
- rl = lx->macstk->rlist;
- if (lx->macstk->idx == rl.n) {
- if (lx->macstk->stop) return tk->t = TKEOF;
- popmac(lx);
- return 0;
- }
- *tk = rl.tk[lx->macstk->idx++];
- assert(tk->t);
- tk->span.ex = lx->macstk->exspan;
- if (tryexpand(lx, tk))
- return 0;
- return tk->t;
-}
-
-static struct token epeektk;
-static int
-elex(struct lexer *lx, struct token *tk)
-{
- assert(tk);
- if (epeektk.t) {
- int tt = epeektk.t;
- if (tk) *tk = epeektk;
- epeektk.t = 0;
- return tt;
- }
- if (lx->macstk) {
- if (!advancemacro(lx, tk))
- return elex(lx, tk);
- return tk->t;
- }
-
- lex0(lx, tk);
- return tk->t;
-}
-
-static int
-epeek(struct lexer *lx, struct token *tk)
-{
- if (!epeektk.t) elex(lx, &epeektk);
- if (tk) *tk = epeektk;
- return epeektk.t;
-}
-
-static int
-tkprec(int tt)
-{
- static const char tab[] = {
- ['*'] = 12, ['/'] = 12, ['%'] = 12,
- ['+'] = 11, ['-'] = 11,
- [TKSHL] = 10, [TKSHR] = 10,
- ['<'] = 9, ['>'] = 9, [TKLTE] = 9, [TKGTE] = 9,
- [TKEQU] = 8, [TKNEQ] = 8,
- ['&'] = 7,
- ['^'] = 6,
- ['|'] = 5,
- [TKLOGAND] = 4,
- [TKLOGIOR] = 3,
- ['?'] = 2,
- };
- if ((uint)tt < arraylength(tab))
- return tab[tt] - 1;
- return -1;
-}
-
-static vlong
-expr(struct lexer *lx, bool *pu, int prec)
-{
- vlong x, y;
- struct token tk;
- enum typetag ty;
- int opprec;
- char unops[16];
- int nunop = 0;
- bool xu = 0, yu; /* x unsigned?; y unsigned? */
-
-Unary:
- switch (elex(lx, &tk)) {
- case '-': case '~': case '!':
- unops[nunop++] = tk.t;
- if (nunop >= arraylength(unops)) {
- x = expr(lx, &xu, 999);
- break;
- }
- /* fallthru */
- case '+': goto Unary;
- case '(':
- x = expr(lx, &xu, 1);
- if (elex(lx, &tk) != ')') {
- error(&tk.span, "expected ')'");
- goto Err;
- }
- break;
- case TKNUMLIT:
- case TKCHRLIT:
- ty = parsenumlit((uvlong *)&x, NULL, &tk, 1);
- if (!ty) {
- error(&tk.span, "bad number literal");
- goto Err;
- } else if (isfltt(ty)) {
- error(&tk.span, "float literal in preprocessor expresion");
- goto Err;
- }
- xu = isunsignedt(ty);
- break;
- default:
- if (isppident(tk)) {
- //efmt("in expr>> %s\n", tk.s);
- xu = 0;
- if (!strcmp(tk.s, "defined")) {
- /* 'defined' ppident */
- bool paren = 0;
- lex0(lx, &tk);
- if ((paren = tk.t == '(')) lex0(lx, &tk);
- if (tk.t != TKIDENT && !in_range(tk.t, TKWBEGIN_, TKWEND_)) {
- error(&tk.span, "expected macro name");
- goto Err;
- }
- if (paren && lex0(lx, &tk) != ')') {
- error(&tk.span, "expected `)'");
- goto Err;
- }
- x = findmac(tk.s) != NULL;
- } else {
- if (tryexpand(lx, &tk)){
- goto Unary;}
- //efmt(" << NOT defined %d>> %s %p\n", noexpandmac, tk.s, findmac(tk.s));
- /* non defined pp name -> 0 */
- x = 0;
- }
- break;
- }
- error(&tk.span, "expected preprocessor integer expression");
- goto Err;
- }
-
- while (nunop > 0)
- switch (unops[--nunop]) {
- case '-': x = -(uvlong)x; break;
- case '~': x = ~x; break;
- case '!': x = !x; break;
- default: assert(0);
- }
-
- while ((opprec = tkprec(epeek(lx, &tk))) >= prec) {
- elex(lx, &tk);
- if (tk.t != '?') {
- bool u;
- y = expr(lx, &yu, opprec + 1);
- u = xu | yu;
- switch ((int) tk.t) {
- case '+': x += (uvlong) y; break;
- case '-': x -= (uvlong) y; break;
- case '*': x = u ? (uvlong) x * y : x * y; break;
- case '&': x &= y; break;
- case '^': x ^= y; break;
- case '|': x |= y; break;
- case '/': if (y) x = u ? (uvlong) x / y : x / y;
- else goto Div0;
- break;
- case '%': if (y) x = u ? (uvlong) x % y : x % y;
- else Div0: error(&tk.span, "division by zero");
- break;
- case TKSHL: if ((uvlong)y < 64) x <<= y;
- else goto BadShift;
- break;
- case TKSHR: if ((uvlong)y < 64) x = u ? (uvlong) x >> y : x >> y;
- else BadShift: error(&tk.span, "bad shift by %ld", y);
- break;
- case '<': x = u ? (uvlong) x < y : x < y; goto BoolRes;
- case '>': x = u ? (uvlong) x > y : x > y; goto BoolRes;
- case TKLTE: x = u ? (uvlong) x <= y : x <= y; goto BoolRes;
- case TKGTE: x = u ? (uvlong) x >= y : x >= y; goto BoolRes;
- case TKEQU: x = x == y; goto BoolRes;
- case TKNEQ: x = x != y; goto BoolRes;
- case TKLOGAND: x = x && y; goto BoolRes;
- case TKLOGIOR: x = x || y; BoolRes: u = 0; break;
- default: assert(0);
- }
- xu = u;
- } else {
- struct span span = tk.span;
- vlong m = expr(lx, &xu, 1);
- if (elex(lx, &tk) != ':') {
- error(&tk.span, "expected ':'");
- note(&span, "to match conditional expression here");
- goto Err;
- }
- y = expr(lx, &yu, 1);
- x = x ? m : y;
- xu |= yu;
- }
- }
- if (!prec) /* not a sub expr */
- if (elex(lx, &tk) != '\n' && tk.t != TKEOF) {
- error(&tk.span, "garbage after preprocessor expression");
- ppskipline(lx);
- }
- if (pu) *pu = xu;
- return x;
-
-Err:
- ppskipline(lx);
- if (pu) *pu = xu;
- return 0;
-}
-
-enum {
- PPCNDFALSE, /* the condition was zero, skip until #else/#elif */
- PPCNDTRUE, /* the condition was non-zero, emit until #else/#elif */
- PPCNDTAKEN /* some branch was already taken, skip until #else */
-};
-static struct ppcnd {
- struct span0 ifspan;
- int filedepth;
- uchar cnd;
- bool elsep;
-} ppcndstk[32];
-static int nppcnd;
-
-static int includedepth;
-
-static void
-ppif(struct lexer *lx, const struct span *span)
-{
- vlong v = expr(lx, NULL, 0);
- assert(nppcnd < arraylength(ppcndstk) && "too many nested #if");
- ppcndstk[nppcnd].ifspan = span->sl;
- ppcndstk[nppcnd].filedepth = includedepth;
- ppcndstk[nppcnd].cnd = v ? PPCNDTRUE : PPCNDFALSE;
- ppcndstk[nppcnd++].elsep = 0;
-}
-
-static void
-ppifxdef(struct lexer *lx, bool defp, const struct span *span)
-{
- struct token tk;
-
- lex0(lx, &tk);
- if (!isppident(tk)) {
- error(&tk.span, "macro name missing");
- ppskipline(lx);
- return;
- }
- assert(nppcnd < arraylength(ppcndstk) && "too many nested #if");
- ppcndstk[nppcnd].ifspan = span->sl;
- ppcndstk[nppcnd].filedepth = includedepth;
- ppcndstk[nppcnd].cnd = (findmac(tk.s) == NULL) ^ defp ? PPCNDTRUE : PPCNDFALSE;
- ppcndstk[nppcnd++].elsep = 0;
-}
-
-static void
-ppelif(struct lexer *lx, const struct span *span)
-{
- vlong v;
- struct ppcnd *cnd;
-
- if (!nppcnd) {
- error(span, "#elif without matching #if");
- ppif(lx, span);
- return;
- }
- v = expr(lx, NULL, 0);
- cnd = &ppcndstk[nppcnd-1];
- if (cnd->elsep) {
- error(span, "#elif after #else");
- return;
- }
- switch (cnd->cnd) {
- case PPCNDTRUE: cnd->cnd = PPCNDTAKEN; break;
- case PPCNDFALSE: cnd->cnd = v ? PPCNDTRUE : PPCNDFALSE; break;
- }
-}
-static void
-ppelifxdef(struct lexer *lx, bool defp, const struct span *span)
-{
- struct token tk;
- struct ppcnd *cnd;
-
- if (!nppcnd) {
- error(span, "#elif%sdef without matching #if", &"n"[defp]);
- ppif(lx, span);
- return;
- }
- cnd = &ppcndstk[nppcnd-1];
- if (cnd->elsep) {
- error(span, "#elif%sdef after #else", &"n"[defp]);
- return;
- }
- lex0(lx, &tk);
- if (!isppident(tk)) {
- error(&tk.span, "macro name missing");
- ppskipline(lx);
- return;
- }
- switch (cnd->cnd) {
- case PPCNDTRUE: cnd->cnd = PPCNDTAKEN; break;
- case PPCNDFALSE: cnd->cnd = (findmac(tk.s) == NULL) ^ defp ? PPCNDTRUE : PPCNDFALSE; break;
- case PPCNDTAKEN: assert(0);
- }
-}
-
-static void
-ppendif(struct lexer *lx, const struct span *span)
-{
- struct token tk;
- if (lex0(lx, &tk) != '\n' && tk.t != TKEOF) {
- error(&tk.span, "garbage after #endif");
- ppskipline(lx);
- }
- if (!nppcnd) {
- error(span, "#endif without matching #if");
- return;
- }
- --nppcnd;
-}
-
-static void
-ppelse(struct lexer *lx, const struct span *span)
-{
- struct token tk;
- struct ppcnd *cnd;
- if (lex0(lx, &tk) != '\n' && tk.t != TKEOF) {
- error(&tk.span, "garbage after #else");
- ppskipline(lx);
- }
- if (!nppcnd) {
- error(span, "#else without matching #if");
- return;
- }
- cnd = &ppcndstk[nppcnd-1];
- if (cnd->elsep)
- error(span, "#else after #else");
- switch (cnd->cnd) {
- case PPCNDFALSE: cnd->cnd = PPCNDTRUE; break;
- case PPCNDTRUE: cnd->cnd = PPCNDTAKEN; break;
- }
- cnd->elsep = 1;
-}
-
-enum { MAXINCLUDE = 200 };
-static bool
-tryinclude(struct lexer *lx, const struct span *span, const char *path)
-{
- struct lexer new;
- const char *err;
- switch (initlexer(&new, &err, path)) {
- default: assert(0);
- case LXERR: return 0;
- case LXOK:
- new.save = xmalloc(sizeof *new.save);
- memcpy(new.save, lx, sizeof *lx);
- *lx = new;
-
- if (++includedepth == MAXINCLUDE)
- fatal(span, "Maximum nested include depth of %d reached", includedepth);
- break;
- case LXFILESEEN:
- break;
- }
- return 1;
-}
-
-static void
-ppinclude(struct lexer *lx, const struct span *span0)
-{
- struct token tk;
- struct span span = *span0;
-
- lexingheadername = 1;
- if (in_range(lex0(lx, &tk), TKPPHDRH, TKPPHDRQ)) {
- char *path = NULL;
- const char *base, *end;
- joinspan(&span.ex, tk.span.ex);
- if (tk.t == TKPPHDRQ) {
- if (tk.s[0] == '/') {
- /* absolute path */
- xbgrow(&path, tk.len + 1);
- memcpy(path, tk.s, tk.len);
- path[tk.len] = 0;
- if (tryinclude(lx, &span, path)) return;
- goto NotFound;
- } else {
- /* build relative path */
- base = getfilename(lx->fileid);
- for (end = base; *end != 0; ++end) {}
- for (--end; *end != '/' && end != base; --end) {}
- if (*end == '/') ++end;
- xbgrow(&path, end - base + tk.len + 1);
- memcpy(path, base, end - base);
- memcpy(path + (end - base), tk.s, tk.len);
- path[end - base + tk.len] = 0;
- if (tryinclude(lx, &span, path)) return;
- }
- }
- /* try system paths */
- for (struct inclpaths *p = cinclpaths; p; p = p->next) {
- int ndir = strlen(p->path);
- xbgrow(&path, ndir + tk.len + 2);
- memcpy(path, p->path, ndir);
- path[ndir++] = '/';
- memcpy(path + ndir, tk.s, tk.len);
- path[ndir + tk.len] = 0;
- if (tryinclude(lx, &span, path)) return;
- }
- /* try embedded files pseudo-path */
- xbgrow(&path, tk.len + 3);
- path[0] = '@', path[1] = ':';
- memcpy(path+2, tk.s, tk.len);
- path[tk.len+2] = 0;
- if (tryinclude(lx, &span, path)) return;
- NotFound:
- fatal(&tk.span, "file not found: %'S", tk.s, tk.len);
- } else {
- error(&tk.span, "garbage after #include");
- ppskipline(lx);
- }
-}
-
-static void
-pppragma(struct lexer *lx, const struct span *span0)
-{
- struct token tk;
- struct span span = *span0;
- if (lex0(lx, &tk) == TKIDENT && !strcmp(tk.s, "once")) {
- markfileonce(lx->fileid);
- } else {
- joinspan(&span.ex, tk.span.ex);
- warn(&span, "unknown pragma ignored");
- ppskipline(lx);
- return;
- }
- if (lex0(lx, &tk) != '\n' && tk.t != TKEOF) {
- warn(&tk.span, "garbage after pragma ignored");
- ppskipline(lx);
- }
-}
-
-enum directive {
- PPXXX,
- /* !sorted */
- PPDEFINE,
- PPELIF,
- PPELIFDEF,
- PPELIFNDEF,
- PPELSE,
- PPENDIF,
- PPERROR,
- PPIF,
- PPIFDEF,
- PPIFNDEF,
- PPINCLUDE,
- PPLINE,
- PPPRAGMA,
- PPUNDEF,
- PPWARNING,
-};
-
-static enum directive
-findppcmd(const struct token *tk)
-{
- static const char *tab[] = {
- /* !sorted */
- "define",
- "elif",
- "elifdef",
- "elifndef",
- "else",
- "endif",
- "error",
- "if",
- "ifdef",
- "ifndef",
- "include",
- "line",
- "pragma",
- "undef",
- "warning",
- };
- int l = 0, h = arraylength(tab) - 1, i, cmp;
- const char *s = tk->s;
-
- if (tk->t == TKWif) return PPIF;
- if (tk->t == TKWelse) return PPELSE;
- /* binary search over sorted array */
- while (l <= h) {
- i = (l + h) / 2;
- cmp = strcmp(tab[i], s);
- if (cmp < 0) l = i + 1;
- else if (cmp > 0) h = i - 1;
- else return i + 1;
- }
- return PPXXX;
-}
-
-int
-lex(struct lexer *lx, struct token *tk_)
-{
- struct token tkx[1], *tk;
- int t;
- bool linebegin, skip;
-
- assert(tk_ != &lx->peektok);
- tk = tk_ ? tk_ : tkx;
- if (lx->peektok.t) {
- *tk = lx->peektok;
- memset(&lx->peektok, 0, sizeof lx->peektok);
- return tk->t;
- }
-
- if (lx->macstk) {
- if (!advancemacro(lx, tk))
- return lex(lx, tk_);
- return tk->t;
- }
-
- skip = !noexpandmac && nppcnd ? ppcndstk[nppcnd-1].cnd != PPCNDTRUE : 0;
- for (linebegin = 1;;) {
- while ((t = lex0(lx, tk)) == '\n') linebegin = 1;
- if (t == '#' && linebegin && !noexpandmac) {
- if (lex0(lx, tk) == '\n') { }
- else if (isppident(*tk)) {
- if (!skip) {
- switch (findppcmd(tk)) {
- case PPXXX: goto BadPP;
- case PPDEFINE: ppdefine(lx); break;
- case PPUNDEF: ppundef(lx); break;
- case PPIF: ppif(lx, &tk->span); break;
- case PPIFDEF: ppifxdef(lx, 1, &tk->span); break;
- case PPIFNDEF: ppifxdef(lx, 0, &tk->span); break;
- case PPELIF: ppelif(lx, &tk->span); break;
- case PPELIFDEF: ppelifxdef(lx, 1, &tk->span); break;
- case PPELIFNDEF: ppelifxdef(lx, 0, &tk->span); break;
- case PPELSE: ppelse(lx, &tk->span); break;
- case PPENDIF: ppendif(lx, &tk->span); break;
- case PPINCLUDE: ppinclude(lx, &tk->span); break;
- case PPLINE: break;
- case PPPRAGMA: pppragma(lx, &tk->span); break;
- case PPWARNING: break;
- case PPERROR: break;
- default: assert(0&&"nyi");
- }
- } else {
- switch (findppcmd(tk)) {
- case PPIF: /* increment nesting level */
- case PPIFDEF:
- case PPIFNDEF:
- assert(nppcnd < arraylength(ppcndstk) && "too many nested #if");
- ppcndstk[nppcnd].ifspan = tk->span.sl;
- ppcndstk[nppcnd].cnd = PPCNDTAKEN;
- ppcndstk[nppcnd++].elsep = 0;
- break;
- case PPELIF: ppelif(lx, &tk->span); break;
- case PPELIFDEF: ppelifxdef(lx, 1, &tk->span); break;
- case PPELIFNDEF: ppelifxdef(lx, 0, &tk->span); break;
- case PPELSE: ppelse(lx, &tk->span); break;
- case PPENDIF: ppendif(lx, &tk->span); break;
- default: ppskipline(lx); break;
- }
- }
- skip = nppcnd ? ppcndstk[nppcnd-1].cnd != PPCNDTRUE : 0;
- } else {
- if (!skip) {
- BadPP:
- error(&tk->span, "invalid preprocessor directive");
- }
- ppskipline(lx);
- }
- linebegin = 1;
- } else {
- linebegin = 0;
- if (skip && tk->t != TKEOF) continue;
- if (tryexpand(lx, tk))
- return lex(lx, tk_);
- if (t == TKEOF && nppcnd && ppcndstk[nppcnd-1].filedepth == includedepth) {
- struct span span = { ppcndstk[nppcnd-1].ifspan };
- error(&span, "#if is not matched by #endif");
- }
- if (t == TKEOF && lx->save) {
- /* end of #include'd file, restore previous state */
- struct lexer *sv = lx->save;
- memcpy(lx, lx->save, sizeof *lx);
- free(sv);
- --includedepth;
- } else {
- return t;
- }
- }
- }
- assert(0);
-}
-
-int
-lexpeek(struct lexer *lx, struct token *tk_)
-{
- struct token tkx[1], *tk;
- uint t;
-
- tk = tk_ ? tk_ : tkx;
- if ((t = lx->peektok.t)) {
- *tk = lx->peektok;
- return t;
- }
- t = lex(lx, tk);
- lx->peektok = *tk;
- return t;
-}
-
-static void
-mac__file__handler(struct lexer *lx, struct token *tk)
-{
- tk->t = TKSTRLIT;
- tk->s = getfilename(lx->fileid);
- tk->wide = 0;
- tk->len = strlen(tk->s);
-}
-
-static void
-mac__line__handler(struct lexer *lx, struct token *tk)
-{
- char buf[40];
- int line;
- struct wbuf wbuf = MEMBUF(buf, sizeof buf);
- getfilepos(&line, NULL, lx->fileid, lx->chridx);
- bfmt(&wbuf, "%d", line), buf[wbuf.len++] = 0;
- tk->t = TKNUMLIT;
- tk->s = alloccopy(lx->tmparena, buf, wbuf.len, 1);
- tk->len = strlen(tk->s);
-}
-
-#include <time.h>
-
-static void
-mac__date__handler(struct lexer *lx, struct token *tk)
-{
- char buf[20];
- struct wbuf wbuf = MEMBUF(buf, sizeof buf);
- time_t tm = time(NULL);
- struct tm *ts = localtime(&tm);
- tk->t = TKSTRLIT;
- tk->wide = 0;
- tk->len = 11;
- if (ts) {
- bfmt(&wbuf, "%S %2d %4d%c",
- &"JanFebMarAprMayJunJulAugSepOctNovDec"[ts->tm_mon*3], 3,
- ts->tm_mday, 1900+ts->tm_year, 0);
- assert(wbuf.len == 11+1);
- tk->s = alloccopy(lx->tmparena, buf, wbuf.len, 1);
- } else {
- tk->s = "\?\?\? \?\? \?\?\?\?";
- }
-}
-
-
-static void
-mac__time__handler(struct lexer *lx, struct token *tk)
-{
- char buf[20];
- struct wbuf wbuf = MEMBUF(buf, sizeof buf);
- time_t tm = time(NULL);
- struct tm *ts = localtime(&tm);
- tk->t = TKSTRLIT;
- tk->wide = 0;
- tk->len = 8;
- if (ts) {
- bfmt(&wbuf, "%.2d:%.2d:%.2d%c", ts->tm_hour, ts->tm_min, ts->tm_sec, 0);
- tk->s = alloccopy(lx->tmparena, buf, wbuf.len, 1);
- assert(wbuf.len == 8+1);
- } else {
- tk->s = "\?\?:\?\?:\?\?";
- }
-}
-
-static void
-addpredefmacros(void)
-{
- static const struct token tok_1 = { TKNUMLIT, .s = "1", .len = 1 };
- static struct token tok_ver = { TKNUMLIT };
- static struct macro macs[] = {
- { "__FILE__", .predefined = 1, .special = 1, .handler = mac__file__handler },
- { "__LINE__", .predefined = 1, .special = 1, .handler = mac__line__handler },
- { "__DATE__", .predefined = 1, .special = 1, .handler = mac__date__handler },
- { "__TIME__", .predefined = 1, .special = 1, .handler = mac__time__handler },
- { "__STDC__", .predefined = 1, .rlist = { &tok_1, 1 } },
- { "__STDC_VERSION__", .predefined = 1, .rlist = { &tok_ver, 1 } },
- { "__STDC_HOSTED__", .predefined = 1, .rlist = { &tok_1, 1 } },
- };
- switch (ccopt.cstd) {
- default: assert(0);
- case STDC89: tok_ver.s = "199409L"; break;
- case STDC99: tok_ver.s = "199901L"; break;
- case STDC11: tok_ver.s = "201112L"; break;
- case STDC23: tok_ver.s = "202311L"; break;
- }
- tok_ver.len = 7;
- for (int i = 0; i < arraylength(macs); ++i) {
- macs[i].name = intern(macs[i].name);
- putmac(&macs[i]);
- }
-}
-
-enum initlexer
-initlexer(struct lexer *lx, const char **err, const char *file)
-{
- enum { NARENA = 1<<12 };
- static union { char m[sizeof(struct arena) + NARENA]; struct arena *_align; } amem;
- static struct arena *tmparena = (void *)amem.m;
- int fileid;
-
- struct memfile *f;
-
- if (!macros.n) addpredefmacros();
- if (!tmparena->cap) tmparena->cap = NARENA;
-
- fileid = openfile(err, &f, file);
- if (fileid < 0)
- return LXERR;
- if (isoncefile(fileid) && isfileseen(fileid))
- return LXFILESEEN;
- memset(lx, 0, sizeof *lx);
- lx->fileid = fileid;
- markfileseen(fileid);
- lx->dat = f->p;
- lx->ndat = f->n;
- lx->tmparena = &tmparena;
- return LXOK;
-}
-
-/* callback to let lexer release temp memory for arena allocated token data */
-void
-lexerfreetemps(struct lexer *lx)
-{
- if (!lx->macstk) {
- /* some of the tokens could be somewhere in the macro stack */
- freearena(lx->tmparena);
- }
-}
-
-void
-lexerdump(struct lexer *lx, struct wbuf *out)
-{
- struct token prev = {0}, tok;
- int file = lx->fileid, line = 1, col = 1;
- bfmt(out, "# %d %'s\n", 1, getfilename(file));
- while (lex(lx, &tok) != TKEOF) {
- int tkline, tkcol;
- getfilepos(&tkline, &tkcol, tok.span.ex.file, tok.span.ex.off);
- if (tok.span.ex.file != file) {
- file = tok.span.ex.file;
- bfmt(out, "\n# %d %'s\n", tkline, getfilename(file));
- col = 1;
- lexerfreetemps(lx);
- } else if (line < tkline && tkline - line < 5) {
- do
- ioputc(out, '\n');
- while (++line != tkline);
- col = 1;
- } else if (line != tkline) {
- bfmt(out, "\n# %d\n", tkline);
- line = tkline;
- col = 1;
- lexerfreetemps(lx);
- } else if (prev.t && wsseparated(&prev, &tok)) {
- ioputc(out, ' ');
- ++col;
- }
- if (col == 1)
- for (; col < tkcol; ++col)
- ioputc(out, ' ');
- line = tkline;
- bfmt(out, "%tk", &tok);
- col += tok.span.ex.len;
- prev = tok;
- }
- bfmt(out, "\n");
- ioflush(out);
-}
-
-/* vim:set ts=3 sw=3 expandtab: */