From 013e4d624873cd47cc5ef2b801e13e9b669c7ae1 Mon Sep 17 00:00:00 2001 From: lemon Date: Mon, 19 Jun 2023 12:28:21 +0200 Subject: frontend: rename parser -> lexer and document c.c --- c.c | 94 ++++++++++------- lex.c | 380 +++++++++++++++++++++++++++++++++--------------------------------- lex.h | 10 +- 3 files changed, 251 insertions(+), 233 deletions(-) diff --git a/c.c b/c.c index 38c8890..fdcaa48 100644 --- a/c.c +++ b/c.c @@ -2,8 +2,9 @@ #include "lex.h" #include "ir.h" +/** C compiler state **/ struct comp { - struct parser pr; + struct lexer lx; struct env *env; struct arena *fnarena, *exarena; struct span fnblkspan; @@ -11,9 +12,9 @@ struct comp { struct block *loopbreak, *loopcont; }; -#define peek(Cm,Tk) lexpeek(&(Cm)->pr,Tk) -#define lex(Cm,Tk) lex(&(Cm)->pr,Tk) - +/** Parsing helper functions **/ +#define peek(Cm,Tk) lexpeek(&(Cm)->lx,Tk) +#define lex(Cm,Tk) lex(&(Cm)->lx,Tk) static bool match(struct comp *cm, struct token *tk, enum toktag t) { @@ -23,7 +24,6 @@ match(struct comp *cm, struct token *tk, enum toktag t) } return 0; } - static bool expect(struct comp *cm, enum toktag t, const char *s) { @@ -36,7 +36,6 @@ expect(struct comp *cm, enum toktag t, const char *s) } return 1; } - static struct token expectdie(struct comp *cm, enum toktag t, const char *s) { @@ -46,13 +45,9 @@ expectdie(struct comp *cm, enum toktag t, const char *s) return tk; } -enum declkind { - DTOPLEVEL, - DFUNCPARAM, - DFUNCVAR, - DFIELD, - DCASTEXPR, -}; +/**************************************/ +/* Data structures for C declarations */ +/**************************************/ enum storageclass { SCNONE, @@ -77,20 +72,41 @@ struct decl { }; }; +enum declkind { + DTOPLEVEL, + DFUNCPARAM, + DFUNCVAR, + DFIELD, + DCASTEXPR, +}; + +/* Since a declaration can have multiple declarators, and we need to process + * each one individually, the declaration parser is a state machine + * (conceptually a generator coroutine); the state is zero-initialized (except + * for the .kind field), each call to pdecl yields the next individual decl, + * st.more indicates whether there are more decls left to parse (the coroutine + * has yielded), or this declaration list is done (the coroutine has finalized) + */ struct declstate { enum declkind kind; union type base; enum storageclass scls; enum qualifier qual; uint align; - bool more, varini, funcdef, tagdecl; - const char **pnames; - struct span *pspans; + bool more, /* caller should keep calling pdecl to get next decl */ + varini, /* caller should parse an initializer ('=' ) and + call pdecl() to advance state before checking .more */ + funcdef, /* caller should parse an func definition ('{' '}'). + the declaration list is finished. */ + tagdecl; + const char **pnames; /* param names for function definition */ + struct span *pspans; /* param spans ditto */ }; - static struct decl pdecl(struct declstate *st, struct comp *cm); + static struct decl *finddecl(struct comp *cm, const char *name); +/* next token starts a decl? */ static bool isdecltok(struct comp *cm) { @@ -111,13 +127,13 @@ isdecltok(struct comp *cm) } -/*******/ -/* ENV */ -/*******/ +/**********************************/ +/* Environment (scope) management */ +/**********************************/ static struct decl envdeclsbuf[1<<10]; static vec_of(struct decl) envdecls = VINIT(envdeclsbuf, arraylength(envdeclsbuf)); -struct tagged { +struct tagged { /* a tagged type declaration */ union type ty; struct span span; }; @@ -186,6 +202,7 @@ envaddtagged(struct env *env, union type ty, const struct span *span) return &envtagged.p[envtagged.n - 1]; } +/* like enviterdecl */ static inline bool envitertagged(struct tagged **l, struct env *env) { @@ -276,13 +293,9 @@ deftagged(struct comp *cm, struct span *span, enum typetag tt, const char *name, return envaddtagged(cm->env, ty.t ? ty : mktagtype(name, &td), span)->ty; } -/*******************/ -/*** EXPRESSIONS ***/ -/*******************/ - -/**********************/ -/* EXPR TYPE CHECKING */ -/**********************/ +/*********************/ +/* Expr Typechecking */ +/*********************/ #define iszero(ex) ((ex).t == ENUMLIT && (ex).u == 0) @@ -567,7 +580,7 @@ bintypecheck(const struct span *span, enum toktag tt, struct expr *lhs, struct e } /****************/ -/* EXPR PARSING */ +/* Expr Parsing */ /****************/ #define mkexpr(t_,span_,ty_,...) ((struct expr){.t=(t_), .ty=(ty_), .span=(span_), __VA_ARGS__}) @@ -921,7 +934,8 @@ Postfix: lex(cm, &tk); ek = bintab[tk.t].t; if (ek != ECOND) { - bool leftassoc = (bintab[tk.t].k & BCSET) == 0; /* only the assignment operators are right-associative */ + /* only the assignment operators are right-associative */ + bool leftassoc = (bintab[tk.t].k & BCSET) == 0; /* ex OP rhs */ span.sl = tk.span.sl; span.ex = ex.span.ex; @@ -971,9 +985,9 @@ commaexpr(struct comp *cm) return exprparse(cm, 1); } -/*********/ -/* -> IR */ -/*********/ +/*****************/ +/* IR Generation */ +/*****************/ static union ref expraddr(struct function *, const struct expr *); static union ref compileexpr(struct function *, const struct expr *, bool discard); @@ -1591,6 +1605,10 @@ compileexpr(struct function *fn, const struct expr *ex, bool discard) } } +/************************************/ +/* Statements parsing & compilation */ +/************************************/ + static void stmtterm(struct comp *cm) { @@ -1976,9 +1994,9 @@ function(struct comp *cm, struct function *fn, const char **pnames, const struct } } -/********/ -/* DECL */ -/********/ +/*****************/ +/* Decls Parsing */ +/*****************/ static union type buildagg(struct comp *cm, enum typetag tt, const char *name, int id) @@ -2699,7 +2717,7 @@ docomp(struct comp *cm) while (peek(cm, tk) != TKEOF) { struct expr ini; - struct declstate st = { DTOPLEVEL, }; + struct declstate st = { DTOPLEVEL }; do { int nerr = nerror; struct decl decl = pdecl(&st, cm); @@ -2749,7 +2767,7 @@ ccomp(const char *file) static union { char m[sizeof(struct arena) + N]; struct arena *_align; } amem[2]; struct comp cm = {0}; - initparser(&cm.pr, file, &cm.exarena); + initlexer(&cm.lx, file, &cm.exarena); cm.fnarena = (void *)amem[0].m; cm.fnarena->cap = N; cm.exarena = (void *)amem[1].m; diff --git a/lex.c b/lex.c index b63666d..d13a549 100644 --- a/lex.c +++ b/lex.c @@ -54,72 +54,72 @@ ident: } static int -next0(struct parser *pr) +next0(struct lexer *lx) { bool trigraph = ccopt.trigraph; int n, c; - while (!memcmp(pr->dat+pr->idx, "\\\n", n = 2) - || (trigraph && !memcmp(pr->dat+pr->idx, "\?\?/\n", n = 4))) { - pr->idx += n; - addfileline(pr->fileid, pr->idx); + while (!memcmp(lx->dat+lx->idx, "\\\n", n = 2) + || (trigraph && !memcmp(lx->dat+lx->idx, "\?\?/\n", n = 4))) { + lx->idx += n; + addfileline(lx->fileid, lx->idx); } - if (pr->idx >= pr->ndat) + if (lx->idx >= lx->ndat) return TKEOF; - if (trigraph && !memcmp(pr->dat+pr->idx, "??", 2)) { - switch (pr->dat[pr->idx+2]) { - case '=': pr->idx += 3; return '#'; - case '(': pr->idx += 3; return '['; - case ')': pr->idx += 3; return ']'; - case '!': pr->idx += 3; return '|'; - case '<': pr->idx += 3; return '{'; - case '>': pr->idx += 3; return '}'; - case '-': pr->idx += 3; return '~'; - case '/': pr->idx += 3; return '\\'; - case '\'': pr->idx += 3; return '^'; + if (trigraph && !memcmp(lx->dat+lx->idx, "??", 2)) { + switch (lx->dat[lx->idx+2]) { + case '=': lx->idx += 3; return '#'; + case '(': lx->idx += 3; return '['; + case ')': lx->idx += 3; return ']'; + case '!': lx->idx += 3; return '|'; + case '<': lx->idx += 3; return '{'; + case '>': lx->idx += 3; return '}'; + case '-': lx->idx += 3; return '~'; + case '/': lx->idx += 3; return '\\'; + case '\'': lx->idx += 3; return '^'; } } - if ((c = pr->dat[pr->idx++]) == '\n') { - addfileline(pr->fileid, pr->idx); + if ((c = lx->dat[lx->idx++]) == '\n') { + addfileline(lx->fileid, lx->idx); } return c; } static int -next(struct parser *pr) +next(struct lexer *lx) { int c; - if (pr->npeekchr) { - int c = pr->peekchr[0]; - pr->chridx = pr->peekcidx[0]; - memmove(pr->peekchr, pr->peekchr + 1, --pr->npeekchr * sizeof *pr->peekchr); - memmove(pr->peekcidx, pr->peekcidx + 1, pr->npeekchr * sizeof *pr->peekcidx); - pr->eof = c == TKEOF; + if (lx->npeekchr) { + int c = lx->peekchr[0]; + lx->chridx = lx->peekcidx[0]; + memmove(lx->peekchr, lx->peekchr + 1, --lx->npeekchr * sizeof *lx->peekchr); + memmove(lx->peekcidx, lx->peekcidx + 1, lx->npeekchr * sizeof *lx->peekcidx); + lx->eof = c == TKEOF; return c; } - c = next0(pr); - pr->eof = c == TKEOF; - pr->chridx = pr->idx; + c = next0(lx); + lx->eof = c == TKEOF; + lx->chridx = lx->idx; return c; } static int -peek(struct parser *pr, int off) +peek(struct lexer *lx, int off) { - assert(off < arraylength(pr->peekchr)); - while (pr->npeekchr < off+1) { - pr->peekchr[pr->npeekchr] = next0(pr); - pr->peekcidx[pr->npeekchr++] = pr->idx; + assert(off < arraylength(lx->peekchr)); + while (lx->npeekchr < off+1) { + lx->peekchr[lx->npeekchr] = next0(lx); + lx->peekcidx[lx->npeekchr++] = lx->idx; } - return pr->peekchr[off]; + return lx->peekchr[off]; } static bool -match(struct parser *pr, int c) +match(struct lexer *lx, int c) { - if (!pr->eof && peek(pr, 0) == c) { - next(pr); + if (!lx->eof && peek(lx, 0) == c) { + next(lx); return 1; } return 0; @@ -255,24 +255,24 @@ parsenumlit(uvlong *outi, double *outf, const struct token *tk, bool ispp) } static void -readstrchrlit(struct parser *pr, struct token *tk, char delim) +readstrchrlit(struct lexer *lx, struct token *tk, char delim) { int c, i; uchar tmp[80]; vec_of(uchar) b = VINIT(tmp, sizeof tmp); struct span span = {0}; uint n, beginoff, idx; - beginoff = idx = pr->chridx; + beginoff = idx = lx->chridx; - while ((c = next(pr)) != delim) { + while ((c = next(lx)) != delim) { if (c == '\n' || c == TKEOF) { Noterm: - span.sl = (struct span0) { idx, pr->chridx - idx, pr->fileid }; + span.sl = (struct span0) { idx, lx->chridx - idx, lx->fileid }; error(&span, "missing terminating %c character", delim); break; } else if (c == '\\') { - span.sl = (struct span0) { idx, pr->chridx - idx, pr->fileid }; - switch (c = next(pr)) { + span.sl = (struct span0) { idx, lx->chridx - idx, lx->fileid }; + switch (c = next(lx)) { case '\n': case TKEOF: goto Noterm; case '\'': c = '\''; break; @@ -288,14 +288,14 @@ readstrchrlit(struct parser *pr, struct token *tk, char delim) case 'v': c = '\v'; break; case 'x': case 'X': /* hex */ n = 0; - if (!aisxdigit(peek(pr, 0))) goto Badescseq; + if (!aisxdigit(peek(lx, 0))) goto Badescseq; do { - c = next(pr); + c = next(lx); if (c-'0' < 10) n = n<<4 | (c-'0'); else n = n<<4 | (10 + (c|0x20)-'a'); - } while (aisxdigit(peek(pr, 0))); + } while (aisxdigit(peek(lx, 0))); if (n > 0xFF) { - span.sl.len = pr->chridx - span.sl.off; + span.sl.len = lx->chridx - span.sl.off; error(&span, "hex escape sequence out of range"); } c = n & 0xFF; @@ -304,52 +304,52 @@ readstrchrlit(struct parser *pr, struct token *tk, char delim) if (aisodigit(c)) { /* octal */ n = c-'0'; for (i = 2; i--;) { - if (!aisodigit(peek(pr, 0))) break; - n = n<<3 | ((c = next(pr))-'0'); + if (!aisodigit(peek(lx, 0))) break; + n = n<<3 | ((c = next(lx))-'0'); } if (n > 0377) { - span.sl.len = pr->chridx - span.sl.off; + span.sl.len = lx->chridx - span.sl.off; error(&span, "octal escape sequence out of range"); } c = n; break; } Badescseq: - span.sl.len = pr->chridx - span.sl.off; + span.sl.len = lx->chridx - span.sl.off; error(&span, "invalid escape sequence"); } } vpush(&b, c); - idx = pr->chridx;; + idx = lx->chridx;; } if (delim == '"') { tk->t = TKSTRLIT; tk->len = b.n; - if (pr->chridx - beginoff == tk->len + 1) { + if (lx->chridx - beginoff == tk->len + 1) { tk->litlit = 1; - tk->s = (char *)&pr->dat[beginoff]; + tk->s = (char *)&lx->dat[beginoff]; } else { tk->litlit = 0; vpush(&b, 0); - tk->s = alloc(pr->tmparena, b.n, 1); + tk->s = alloc(lx->tmparena, b.n, 1); memcpy((char *)tk->s, b.p, b.n); } } else { if (b.n == 0) { - span.sl = (struct span0) { idx, pr->chridx - idx, pr->fileid }; + span.sl = (struct span0) { idx, lx->chridx - idx, lx->fileid }; error(&span, "empty character literal"); } else if (b.n > targ_primsizes[TYINT]) { - span.sl = (struct span0) { idx, pr->chridx - idx, pr->fileid }; + span.sl = (struct span0) { idx, lx->chridx - idx, lx->fileid }; error(&span, "multicharacter literal too long"); } tk->t = TKCHRLIT; tk->len = b.n; - if (pr->chridx - beginoff == tk->len + 1) { + if (lx->chridx - beginoff == tk->len + 1) { tk->litlit = 1; - tk->s = (char *)&pr->dat[beginoff]; + tk->s = (char *)&lx->dat[beginoff]; } else { tk->litlit = 0; - tk->s = alloc(pr->tmparena, tk->len, 1); + tk->s = alloc(lx->tmparena, tk->len, 1); memcpy((char *)tk->s, b.p, tk->len); } } @@ -367,15 +367,15 @@ isppnum(char prev, char c) } static int -lex0(struct parser *pr, struct token *tk) +lex0(struct lexer *lx, struct token *tk) { int idx, c; #define RET(t_) do { tk->t = (t_); goto End; } while (0) Begin: - idx = pr->chridx; - switch (c = next(pr)) { + idx = lx->chridx; + switch (c = next(lx)) { case ' ': case '\r': case '\t': goto Begin; break; @@ -385,77 +385,77 @@ Begin: case '@': case '`': case '\\': case TKEOF: case '\n': RET(c); case '!': - if (match(pr, '=')) RET(TKNEQ); + if (match(lx, '=')) RET(TKNEQ); RET(c); case '#': - if (match(pr, '#')) RET(TKPPCAT); + if (match(lx, '#')) RET(TKPPCAT); RET(c); case '+': - if (match(pr, '+')) RET(TKINC); - if (match(pr, '=')) RET(TKSETADD); + if (match(lx, '+')) RET(TKINC); + if (match(lx, '=')) RET(TKSETADD); RET(c); case '-': - if (match(pr, '-')) RET(TKDEC); - if (match(pr, '=')) RET(TKSETSUB); - if (match(pr, '>')) RET(TKARROW); + if (match(lx, '-')) RET(TKDEC); + if (match(lx, '=')) RET(TKSETSUB); + if (match(lx, '>')) RET(TKARROW); RET(c); case '*': - if (match(pr, '=')) RET(TKSETMUL); + if (match(lx, '=')) RET(TKSETMUL); RET(c); case '/': - if (match(pr, '=')) RET(TKSETDIV); - if (match(pr, '/')) { + if (match(lx, '=')) RET(TKSETDIV); + if (match(lx, '/')) { /* // comment */ - while (!pr->eof && !match(pr, '\n')) - next(pr); + while (!lx->eof && !match(lx, '\n')) + next(lx); goto Begin; } - if (match(pr, '*')) { + if (match(lx, '*')) { /* comment */ - while (peek(pr, 0) != '*' || peek(pr, 1) != '/') { - if (next(pr) == TKEOF) { - struct span span = {{ idx, pr->chridx - idx, pr->fileid }}; + while (peek(lx, 0) != '*' || peek(lx, 1) != '/') { + if (next(lx) == TKEOF) { + struct span span = {{ idx, lx->chridx - idx, lx->fileid }}; fatal(&span, "unterminated multiline comment"); } } - next(pr), next(pr); + next(lx), next(lx); goto Begin; } RET(c); case '%': - if (match(pr, '=')) RET(TKSETREM); + if (match(lx, '=')) RET(TKSETREM); RET(c); case '^': - if (match(pr, '=')) RET(TKSETXOR); + if (match(lx, '=')) RET(TKSETXOR); RET(c); case '=': - if (match(pr, '=')) RET(TKEQU); + if (match(lx, '=')) RET(TKEQU); RET(c); case '<': - if (match(pr, '=')) RET(TKLTE); - if (match(pr, '<')) RET(match(pr, '=') ? TKSETSHL : TKSHL); + if (match(lx, '=')) RET(TKLTE); + if (match(lx, '<')) RET(match(lx, '=') ? TKSETSHL : TKSHL); RET(c); case '>': - if (match(pr, '=')) RET(TKGTE); - if (match(pr, '>')) RET(match(pr, '=') ? TKSETSHR : TKSHR); + if (match(lx, '=')) RET(TKGTE); + if (match(lx, '>')) RET(match(lx, '=') ? TKSETSHR : TKSHR); RET(c); case '&': - if (match(pr, '&')) RET(TKLOGAND); - if (match(pr, '=')) RET(TKSETAND); + if (match(lx, '&')) RET(TKLOGAND); + if (match(lx, '=')) RET(TKSETAND); RET(c); case '|': - if (match(pr, '|')) RET(TKLOGIOR); - if (match(pr, '=')) RET(TKSETIOR); + if (match(lx, '|')) RET(TKLOGIOR); + if (match(lx, '=')) RET(TKSETIOR); RET(c); case '\'': case '"': - readstrchrlit(pr, tk, c); + readstrchrlit(lx, tk, c); goto End; case '.': - if (peek(pr, 0) == '.' && peek(pr, 1) == '.') { - next(pr), next(pr); + if (peek(lx, 0) == '.' && peek(lx, 1) == '.') { + next(lx), next(lx); RET(TKDOTS); - } else if (aisdigit(peek(pr, 0))) { + } else if (aisdigit(peek(lx, 0))) { goto Numlit; } RET(c); @@ -464,15 +464,15 @@ Begin: char tmp[70]; int n = 0; tmp[n++] = c; - while (isppnum(tmp[n-1], peek(pr, 0))) { + while (isppnum(tmp[n-1], peek(lx, 0))) { assert(n < arraylength(tmp)-1 && "too big"); - tmp[n++] = next(pr); + tmp[n++] = next(lx); } tmp[n] = 0; tk->len = n; - if (n == pr->chridx - idx) tk->s = (char *)&pr->dat[idx]; + if (n == lx->chridx - idx) tk->s = (char *)&lx->dat[idx]; else { - tk->s = alloc(pr->tmparena, n, 1); + tk->s = alloc(lx->tmparena, n, 1); memcpy((char *)tk->s, tmp, n); } RET(TKNUMLIT); @@ -480,23 +480,23 @@ Begin: char tmp[70]; int n = 0; tmp[n++] = c; - while (!aissep(c = peek(pr, 0))) { + while (!aissep(c = peek(lx, 0))) { assert(n < arraylength(tmp)-1 && "too big"); - tmp[n++] = next(pr); + tmp[n++] = next(lx); } tmp[n] = 0; if (!identkeyword(tk, tmp, n)) - warn(&(struct span) {{ idx, pr->chridx - idx, pr->fileid }}, + warn(&(struct span) {{ idx, lx->chridx - idx, lx->fileid }}, "%'tk in %M is an extension", tk); goto End; } } - fatal(&(struct span) {{ idx, pr->chridx - idx, pr->fileid }}, + fatal(&(struct span) {{ idx, lx->chridx - idx, lx->fileid }}, "unexpected character %'c at %d", c, idx); End: - tk->span.sl.file = pr->fileid; + tk->span.sl.file = lx->fileid; tk->span.sl.off = idx; - tk->span.sl.len = pr->chridx - idx; + tk->span.sl.len = lx->chridx - idx; tk->span.ex = tk->span.sl; return tk->t; #undef RET @@ -607,33 +607,33 @@ putmac(struct macro *mac) } static void -ppskipline(struct parser *pr) +ppskipline(struct lexer *lx) { - while (peek(pr, 0) != '\n' && peek(pr, 0) != TKEOF) - next(pr); + while (peek(lx, 0) != '\n' && peek(lx, 0) != TKEOF) + next(lx); } static void -ppdefine(struct parser *pr) +ppdefine(struct lexer *lx) { struct token tk0, tk; struct macro mac = {0}; vec_of(struct token) rlist = {0}; - lex0(pr, &tk0); + lex0(lx, &tk0); if (!isppident(tk0)) { error(&tk0.span, "macro name missing"); - ppskipline(pr); + ppskipline(lx); return; } mac.name = tk0.s; mac.span = tk0.span.sl; - if (peek(pr, 0) == '(') { + if (peek(lx, 0) == '(') { mac.fnlike = 1; } - while (lex0(pr, &tk) != '\n' && tk.t != TKEOF) { + while (lex0(lx, &tk) != '\n' && tk.t != TKEOF) { if (!wsseparated(&tk0, &tk)) warn(&tk.span, "no whitespace after macro name"); vpush(&rlist, tk); @@ -645,7 +645,7 @@ ppdefine(struct parser *pr) static struct token epeektk; static int -elex(struct parser *pr, struct token *tk) +elex(struct lexer *lx, struct token *tk) { if (epeektk.t) { int tt = epeektk.t; @@ -653,13 +653,13 @@ elex(struct parser *pr, struct token *tk) epeektk.t = 0; return tt; } - return lex0(pr, tk); + return lex0(lx, tk); } static int -epeek(struct parser *pr, struct token *tk) +epeek(struct lexer *lx, struct token *tk) { - if (!epeektk.t) elex(pr, &epeektk); + if (!epeektk.t) elex(lx, &epeektk); if (tk) *tk = epeektk; return epeektk.t; } @@ -686,7 +686,7 @@ tkprec(int tt) } static vlong -expr(struct parser *pr, bool *pu, int prec) +expr(struct lexer *lx, bool *pu, int prec) { vlong x, y; struct token tk; @@ -697,18 +697,18 @@ expr(struct parser *pr, bool *pu, int prec) bool xu = 0, yu; /* x unsigned?; y unsigned? */ Unary: - switch (elex(pr, &tk)) { + switch (elex(lx, &tk)) { case '-': case '~': case '!': unops[nunop++] = tk.t; if (nunop >= arraylength(unops)) { - x = expr(pr, &xu, 999); + x = expr(lx, &xu, 999); break; } /* fallthru */ case '+': goto Unary; case '(': - x = expr(pr, &xu, 1); - if (elex(pr, &tk) != ')') { + x = expr(lx, &xu, 1); + if (elex(lx, &tk) != ')') { error(&tk.span, "expected ')'"); goto Err; } @@ -744,11 +744,11 @@ Unary: default: assert(0); } - while ((opprec = tkprec(epeek(pr, &tk))) >= prec) { - elex(pr, &tk); + while ((opprec = tkprec(epeek(lx, &tk))) >= prec) { + elex(lx, &tk); if (tk.t != '?') { bool u; - y = expr(pr, &yu, opprec + 1); + y = expr(lx, &yu, opprec + 1); u = xu | yu; switch ((int) tk.t) { case '+': x += (uvlong) y; break; @@ -782,28 +782,28 @@ Unary: xu = u; } else { struct span span = tk.span; - vlong m = expr(pr, &xu, 1); - if (elex(pr, &tk) != ':') { + vlong m = expr(lx, &xu, 1); + if (elex(lx, &tk) != ':') { error(&tk.span, "expected ':'"); note(&span, "to match conditional expression here"); goto Err; } - y = expr(pr, &yu, 1); + y = expr(lx, &yu, 1); efmt("%ld ? %ld : %ld\n", x, m, y); x = x ? m : y; xu |= yu; } } if (!prec) /* not a sub expr */ - if (elex(pr, &tk) != '\n' && tk.t != TKEOF) { + if (elex(lx, &tk) != '\n' && tk.t != TKEOF) { error(&tk.span, "garbage after preprocessor expression"); - ppskipline(pr); + ppskipline(lx); } if (pu) *pu = xu; return x; Err: - ppskipline(pr); + ppskipline(lx); if (pu) *pu = xu; return 0; } @@ -821,9 +821,9 @@ static struct ppcnd { static int nppcnd; static void -ppif(struct parser *pr, const struct span *span) +ppif(struct lexer *lx, const struct span *span) { - vlong v = expr(pr, NULL, 0); + vlong v = expr(lx, NULL, 0); assert(nppcnd < arraylength(ppcndstk) && "too many nested #if"); ppcndstk[nppcnd].ifspan = span->sl; ppcndstk[nppcnd].cnd = v ? PPCNDTRUE : PPCNDFALSE; @@ -831,17 +831,17 @@ ppif(struct parser *pr, const struct span *span) } static void -ppelif(struct parser *pr, const struct span *span) +ppelif(struct lexer *lx, const struct span *span) { vlong v; struct ppcnd *cnd; if (!nppcnd) { error(span, "#elif without matching #if"); - ppif(pr, span); + ppif(lx, span); return; } - v = expr(pr, NULL, 0); + v = expr(lx, NULL, 0); cnd = &ppcndstk[nppcnd-1]; if (cnd->elsep) { error(span, "#elif after #else"); @@ -855,12 +855,12 @@ ppelif(struct parser *pr, const struct span *span) } static void -ppendif(struct parser *pr, const struct span *span) +ppendif(struct lexer *lx, const struct span *span) { struct token tk; - if (lex0(pr, &tk) != '\n' && tk.t != TKEOF) { + if (lex0(lx, &tk) != '\n' && tk.t != TKEOF) { error(&tk.span, "garbage after #endif"); - ppskipline(pr); + ppskipline(lx); } if (!nppcnd) { error(span, "#endif without matching #if"); @@ -870,13 +870,13 @@ ppendif(struct parser *pr, const struct span *span) } static void -ppelse(struct parser *pr, const struct span *span) +ppelse(struct lexer *lx, const struct span *span) { struct token tk; struct ppcnd *cnd; - if (lex0(pr, &tk) != '\n' && tk.t != TKEOF) { + if (lex0(lx, &tk) != '\n' && tk.t != TKEOF) { error(&tk.span, "garbage after #else"); - ppskipline(pr); + ppskipline(lx); } if (!nppcnd) { error(span, "#else without matching #if"); @@ -894,7 +894,7 @@ ppelse(struct parser *pr, const struct span *span) static struct macrostack mstk[64], *mfreelist; static bool -tryexpand(struct parser *pr, const struct token *tk) +tryexpand(struct lexer *lx, const struct token *tk) { static bool inimstk; struct macro *mac; @@ -914,7 +914,7 @@ tryexpand(struct parser *pr, const struct token *tk) macidx = mac - macros.p; /* prevent infinite recursion */ - for (l = pr->macstk; l; l = l->link) + for (l = lx->macstk; l; l = l->link) if (l->mac == macidx) return 0; @@ -924,26 +924,26 @@ tryexpand(struct parser *pr, const struct token *tk) if (!(l = mfreelist)) fatal(&tk->span, "macro depth limit reached"); l = mfreelist; mfreelist = l->link; - l->link = pr->macstk; + l->link = lx->macstk; l->mac = macidx; l->idx = 0; l->exspan = tk->span.ex; - pr->macstk = l; + lx->macstk = l; } return 1; } static void -popmac(struct parser *pr) +popmac(struct lexer *lx) { struct macrostack *stk; - assert(stk = pr->macstk); + assert(stk = lx->macstk); do { - pr->macstk = stk->link; + lx->macstk = stk->link; stk->link = mfreelist; mfreelist = stk; - } while ((stk = pr->macstk) && stk->idx >= macros.p[stk->mac].rlist.n); + } while ((stk = lx->macstk) && stk->idx >= macros.p[stk->mac].rlist.n); } enum directive { @@ -1004,47 +1004,47 @@ findppcmd(const struct token *tk) } int -lex(struct parser *pr, struct token *tk_) +lex(struct lexer *lx, struct token *tk_) { struct token tkx[1], *tk; int t; bool linebegin, skip; - assert(tk_ != &pr->peektok); + assert(tk_ != &lx->peektok); tk = tk_ ? tk_ : tkx; - if (pr->peektok.t) { - *tk = pr->peektok; - memset(&pr->peektok, 0, sizeof pr->peektok); + if (lx->peektok.t) { + *tk = lx->peektok; + memset(&lx->peektok, 0, sizeof lx->peektok); return tk->t; } - if (pr->macstk) { - struct macro *mac = ¯os.p[pr->macstk->mac]; + if (lx->macstk) { + struct macro *mac = ¯os.p[lx->macstk->mac]; struct rlist rl = mac->rlist; - *tk = rl.tk[pr->macstk->idx++]; + *tk = rl.tk[lx->macstk->idx++]; assert(tk->t); - tk->span.ex = pr->macstk->exspan; - if (tryexpand(pr, tk)) - return lex(pr, tk_); - if (pr->macstk->idx == rl.n) - popmac(pr); + tk->span.ex = lx->macstk->exspan; + if (tryexpand(lx, tk)) + return lex(lx, tk_); + if (lx->macstk->idx == rl.n) + popmac(lx); return tk->t; } skip = nppcnd ? ppcndstk[nppcnd-1].cnd != PPCNDTRUE : 0; for (linebegin = 0;;) { - while ((t = lex0(pr, tk)) == '\n') linebegin = 1; + while ((t = lex0(lx, tk)) == '\n') linebegin = 1; if (t == '#' && linebegin) { - if (lex0(pr, tk) == '\n') { } + if (lex0(lx, tk) == '\n') { } else if (isppident(*tk)) { if (!skip) { switch (findppcmd(tk)) { case PPXXX: goto BadPP; - case PPDEFINE: ppdefine(pr); break; - case PPIF: ppif(pr, &tk->span); break; - case PPELIF: ppelif(pr, &tk->span); break; - case PPENDIF: ppendif(pr, &tk->span); break; - case PPELSE: ppelse(pr, &tk->span); break; + case PPDEFINE: ppdefine(lx); break; + case PPIF: ppif(lx, &tk->span); break; + case PPELIF: ppelif(lx, &tk->span); break; + case PPENDIF: ppendif(lx, &tk->span); break; + case PPELSE: ppelse(lx, &tk->span); break; default: assert(0&&"nyi"); } } else { @@ -1055,10 +1055,10 @@ lex(struct parser *pr, struct token *tk_) ppcndstk[nppcnd].cnd = PPCNDTAKEN; ppcndstk[nppcnd++].elsep = 0; break; - case PPELIF: ppelif(pr, &tk->span); break; - case PPENDIF: ppendif(pr, &tk->span); break; - case PPELSE: ppelse(pr, &tk->span); break; - default: ppskipline(pr); break; + case PPELIF: ppelif(lx, &tk->span); break; + case PPENDIF: ppendif(lx, &tk->span); break; + case PPELSE: ppelse(lx, &tk->span); break; + default: ppskipline(lx); break; } } skip = nppcnd ? ppcndstk[nppcnd-1].cnd != PPCNDTRUE : 0; @@ -1067,13 +1067,13 @@ lex(struct parser *pr, struct token *tk_) BadPP: error(&tk->span, "invalid preprocessor directive"); } - ppskipline(pr); + ppskipline(lx); } } else { linebegin = 0; if (skip && tk->t != TKEOF) continue; - if (tryexpand(pr, tk)) - return lex(pr, tk_); + if (tryexpand(lx, tk)) + return lex(lx, tk_); if (t == TKEOF && nppcnd) { struct span span = { ppcndstk[nppcnd-1].ifspan }; error(&span, "#if is not matched by #endif"); @@ -1085,34 +1085,34 @@ lex(struct parser *pr, struct token *tk_) } int -lexpeek(struct parser *pr, struct token *tk_) +lexpeek(struct lexer *lx, struct token *tk_) { struct token tkx[1], *tk; uint t; tk = tk_ ? tk_ : tkx; - if ((t = pr->peektok.t)) { - *tk = pr->peektok; + if ((t = lx->peektok.t)) { + *tk = lx->peektok; return t; } - t = lex(pr, tk); - pr->peektok = *tk; + t = lex(lx, tk); + lx->peektok = *tk; return t; } void -initparser(struct parser *pr, const char *file, struct arena **tmparena) +initlexer(struct lexer *lx, const char *file, struct arena **tmparena) { const char *error; struct memfile *f; - memset(pr, 0, sizeof *pr); - pr->fileid = openfile(&error, &f, file); - if (pr->fileid < 0) + memset(lx, 0, sizeof *lx); + lx->fileid = openfile(&error, &f, file); + if (lx->fileid < 0) fatal(NULL, "Cannot open %'s: %s", file, error); - pr->dat = f->p; - pr->ndat = f->n; - pr->tmparena = tmparena; + lx->dat = f->p; + lx->ndat = f->n; + lx->tmparena = tmparena; } diff --git a/lex.h b/lex.h index 29b40bb..a8e972f 100644 --- a/lex.h +++ b/lex.h @@ -85,8 +85,8 @@ struct macrostack { }; extern int nerror; -struct parser { - struct parser *save; +struct lexer { + struct lexer *save; short fileid; const uchar *dat; uint ndat; @@ -101,9 +101,9 @@ struct parser { }; const char *intern(const char *); -int lex(struct parser *, struct token *); -int lexpeek(struct parser *, struct token *); +int lex(struct lexer *, struct token *); +int lexpeek(struct lexer *, struct token *); enum typetag parsenumlit(uvlong *, double *, const struct token *, bool ispp); -void initparser(struct parser *, const char *file, struct arena **); +void initlexer(struct lexer *, const char *file, struct arena **); /* vim:set ts=3 sw=3 expandtab: */ -- cgit v1.2.3