diff options
Diffstat (limited to 'bootstrap/parse.c')
| -rw-r--r-- | bootstrap/parse.c | 1733 |
1 files changed, 1733 insertions, 0 deletions
diff --git a/bootstrap/parse.c b/bootstrap/parse.c new file mode 100644 index 0000000..96a185c --- /dev/null +++ b/bootstrap/parse.c @@ -0,0 +1,1733 @@ +#include "all.h" + +/***********/ +/** Lexer **/ +/***********/ + +static u8 +chr(struct parser *P) { + int c; + if (P->peekchr >= 0) { + c = P->peekchr; + P->peekchr = -1; + } else { + c = fgetc(P->fp); + } + P->curspan.idx++; + P->curspan.col++; + if (c == '\n') { + P->curspan.col = 1; + P->curspan.line++; + } + if (c == EOF || !c) { + P->eof = 1; + return 0; + } + return c; +} + +static u8 +chrpeek(struct parser *P) { + int c; + if (P->peekchr >= 0) + return P->peekchr; + c = fgetc(P->fp); + return P->peekchr = c == EOF ? 0 : (u8)c; +} + +static bool +chrmatch(struct parser *P, u8 c) { + if (chrpeek(P) == c) { + chr(P); + return 1; + } + return 0; +} + +static bool +aisspace(char c) { + switch (c) + case ' ': case '\t': case '\n': + case '\r': case '\v': case '\f': + return 1; + return 0; +} + +static bool +aisdigit(char c) { + return c >= '0' && c <= '9'; +} + +static bool +aisxdigit(char c) { + return (c >= '0' && c <= '9') + || (c >= 'A' && c <= 'F') + || (c >= 'a' && c <= 'f') ; +} + +static bool +aisalpha(char c) { + return (c >= 'a' && c <= 'z') + || (c >= 'A' && c <= 'Z'); +} + +static bool +aissep(char c) { + if (aisspace(c)) + return 1; + switch (c) + case '(': case ')': case '[': case ']': + case '{': case '}': case '.': case ',': + case ';': case '?': case '+': case '-': + case '*': case '/': case '&': case '|': + case '^': case '~': case '=': case '\'': + case '"': case '<': case '>': + return 1; + return 0; +} + +static void +eatspaces(struct parser *P) { + for (;; chr(P)) { + if (aisspace(chrpeek(P))) + continue; + else if (chrpeek(P) == '!') { + while (chrpeek(P) != '\n' && chrpeek(P) != 0) + chr(P); + } else { + break; + } + } +} + +static int +readtilsep(struct parser *P, char *buf, int n, bool dot) { + int i = 0; + u8 c; + while (!aissep((c = chrpeek(P))) || (dot && c == '.')) { + chr(P); + if (i >= n - 1) + return -1; + buf[i++] = c; + } + buf[i++] = 0; + return i; +} + +const char *keyword2str[] = { +#define KWSTR(kw) #kw, + LIST_KEYWORDS(KWSTR) +#undef KWSTR +}; +// #define NUM_KEYWORDS (sizeof keyword2str / sizeof *keyword2str) + +static int +str2keyword(const char *s) { + int i = 0, j = NUM_KEYWORDS - 1; + while (i <= j) { + int k = (j + i) / 2; + int cmp = strcmp(keyword2str[k], s); + if (!cmp) { + return k; + } else if (cmp < 0) { + i = k + 1; + } else { + j = k - 1; + } + } + return -1; +} + +static bool +str2number(struct tok *res, const char *s) { + char c; + u64 acc = 0; + double accf = 0.0; + double fmul = 0.1; + int base = 10; + bool flt = 0; + int nused = 0; + + for (int i = 0; (c = s[i]); ++i) { + extern int tolower(int); + + if (i == 0 && c == '0') + --nused; + + if (i == 1 && tolower(c) == 'x') { + base = 16; + continue; + } + if (!flt && c == '.' && base == 10) { + flt = 1; + accf = acc; + continue; + } + + if (nused > 0 && c == '_') + continue; + + if (base == 16 && !aisxdigit(c)) + return 0; + else if (base != 16 && (c < '0' || c > '0' + base - 1)) + return 0; + + ++nused; + if (flt) { + accf = accf + (c - '0') * fmul; + fmul *= 0.1; + } else { + c = tolower(c); + acc = acc * base + (c <= '9' ? c - '0' : c - 'a' + 10); + } + } + memset(res, 0, sizeof *res); + if (flt) { + res->t = TKflolit; + res->flit.f = accf; + } else { + res->t = TKintlit; + res->ilit.i = acc; + } + return 1; +} + +static size_t +readstrlit(struct parser *P, u8 delim, char *s, size_t n) { + u8 c; + size_t i = 0; + while ((c = chr(P)) != delim) { + if (!c || c == '\n') + fatal(P, P->tokspan, "unterminated %s literal", + delim == '"' ? "string" : "character"); + if (i == n - 1) + assert(0 && "too long string/charliteral"); + + if (c != '\\') + s[i++] = c; + else switch ((c = chr(P))) { + case 0: case '\n': + fatal(P, P->tokspan, "unterminated %s literal", + delim == '"' ? "string" : "character"); + case '\'': s[i++] = '\''; break; + case '"': s[i++] = '"'; break; + case 'n': s[i++] = '\n'; break; + case 'r': s[i++] = '\r'; break; + case 't': s[i++] = '\t'; break; + case 'v': s[i++] = '\v'; break; + case 'f': s[i++] = '\f'; break; + case '0': s[i++] = '\0'; break; + default: + fatal(P, P->tokspan, "unknown escape sequence '\\%c'", c); + } + } + s[i++] = 0; + return i - 1; +} + +#define MAX_MACROEXPAND_ITER 100 + +// guard used at macro arg expansion to avoid it expanding into itself, e.g.: +// defmacro foo(x,y) [(x+y)] +// let x, y ...; +// foo(x,y) +// would cause infinite recursion otherwise +static bool +ismacrodupe(struct parser *P, struct toktree toks) { + for (struct expan *ep = P->curexpan; ep; ep = ep->prev) { + if (ep->toks.d == toks.d) + return 1; + } + return 0; +} + +static struct tok +lex(struct parser *P) { + u8 c; + struct tok tok; + + if (P->have_peektok) { + tok = P->peektok; + P->have_peektok = 0; + return tok; + } + + if (P->expanno > MAX_MACROEXPAND_ITER) { + fatal(P, P->curspan, + "maximum number of nested macro expansions reached (infinite recursion?)"); + } + + if (P->curexpan) { + struct expan *ep = P->curexpan; + if (ep->idx == ep->toks.n) { + // end of macro expansion + --P->expanno; + free(ep->args.d); + P->curexpan = ep->prev; + free(ep); + return lex(P); + } + tok = ep->toks.d[ep->idx++]; + // expand macro arg? + if (tok.t == TKident) { + for (struct expan *ep = P->curexpan; ep; ep = ep->prev) { + for (int i = 0; i < ep->args.n; ++i) { + if (!strcmp(tok.str, ep->args.d[i].name)) { + if (!ismacrodupe(P, ep->args.d[i].toks)) { + ++P->expanno; + P->curexpan = memcpy(xmalloc(sizeof *ep), &(struct expan) { + P->curexpan, {0}, ep->args.d[i].toks, + }, sizeof *ep); + return lex(P); + } else + return tok; + } + } + } + } + return tok; + } + + eatspaces(P); + tok.span = P->tokspan = P->curspan; + if (aisdigit((c = chrpeek(P)))) { + char s[80]; + + if (readtilsep(P, s, sizeof s, 1) < 0) + fatal(P, P->tokspan, "number literal too long"); + if (!str2number(&tok, s)) + fatal(P, P->tokspan, "invalid number literal"); + tok.span = P->tokspan; + return tok; + } else if (aisalpha(c) || c == '_' || c == '#' || c == '$') { + char s[100]; + int t; + + if (readtilsep(P, s, sizeof s, 0) < 0) + fatal(P, P->tokspan, "identifier too long"); + if ((t = str2keyword(s)) >= 0) { + tok.t = t; + } else if (!strcmp(s, "#")) { + tok.t = '#'; + } else if (!strcmp(s, "#t") || !strcmp(s, "#f")) { + tok.t = TKboolit; + tok.boolit = s[1] == 't'; + } else if (!strcmp(s, "#null")) { + tok.t = TKnullit; + } else if (c == '#') { + fatal(P, P->tokspan, "identifier cannot start with '#'"); + } else if (c == '$') { + tok.t = TKgensym; + tok.str = xstrdup(s + 1); + } else { + tok.t = TKident; + tok.str = xstrdup(s); + } + return tok; + } else if (c == '\'' || c == '"') { + u8 delim = c; + char s[1000]; + size_t n; + chr(P); + tok.span = P->tokspan = P->curspan; + n = readstrlit(P, delim, s, sizeof s); + if (delim == '"') { + tok.t = TKstrlit; + tok.str = strcpy(xmalloc(n + 1), s); + tok.strlen = n; + } else { + if (n == 0) + fatal(P, P->tokspan, "empty char literal"); + if (n > 8) + fatal(P, P->tokspan, "too long multichar literal '%s'", s); + tok.t = TKchrlit; + tok.ilit.i = 0; + tok.ilit.ty = NULL; + for (int j = 0; j < n; ++j) + tok.ilit.i = (tok.ilit.i << 8) | (u8)s[j]; + } + return tok; + } + switch ((c = chr(P))) { + case '(': case ')': case '[': case ']': + case '{': case '}': case ',': + case ';': case '?': case '~': + tok.t = c; + return tok; + case '.': + if (chrmatch(P, '.')) { + if (chr(P) != '.') + fatal(P, P->tokspan, "invalid token '..'"); + tok.t = '...'; + } else { + tok.t = c; + } + return tok; + case '*': + if (chrmatch(P, '=')) tok.t = '*='; + else tok.t = '*'; + return tok; + case '/': + if (chrmatch(P, '=')) tok.t = '/='; + else tok.t = '/'; + return tok; + case '%': + if (chrmatch(P, '=')) tok.t = '%='; + else tok.t = '%'; + return tok; + case '+': + if (chrmatch(P, '=')) tok.t = '+='; + else if (chrmatch(P, '+')) tok.t = '++'; + else tok.t = '+'; + return tok; + case '-': + if (chrmatch(P, '=')) tok.t = '-='; + else if (chrmatch(P, '-')) tok.t = '--'; + else tok.t = '-'; + return tok; + case '&': + if (chrmatch(P, '=')) tok.t = '&='; + else tok.t = '&'; + return tok; + case '|': + if (chrmatch(P, '=')) tok.t = '|='; + else tok.t = '|'; + return tok; + case '^': + if (chrmatch(P, '=')) tok.t = '^='; + else tok.t = '^'; + return tok; + case ':': + if (chrmatch(P, '=')) tok.t = ':='; + else tok.t = ':'; + return tok; + case '=': + if (chrmatch(P, '=')) tok.t = '=='; + else tok.t = '='; + return tok; + case '!': + if (chrmatch(P, '=')) tok.t = '!='; + else tok.t = '!'; + return tok; + case '<': + if (chrmatch(P, '=')) tok.t = '<='; + else if (chrmatch(P, '<')) { + if (chrmatch(P, '=')) tok.t = '<<='; + else tok.t = '<<'; + } else tok.t = '<'; + return tok; + case '>': + if (chrmatch(P, '=')) tok.t = '>='; + else if (chrmatch(P, '>')) { + if (chrmatch(P, '=')) tok.t = '>>='; + else tok.t = '>>'; + } else tok.t = '>'; + return tok; + case 0: + tok.t = TKeof; + return tok; + } + fatal(P, P->tokspan, "unexpected character '%c'", c); +} + +static struct tok +lexpeek(struct parser *P) { + struct tok tok; + + if (P->have_peektok) + return P->peektok; + tok = lex(P); + P->peektok = tok; + P->have_peektok = 1; + return tok; +} + +static bool +lexmatch(struct parser *P, struct tok *tokp, int t) { + struct tok tok = lexpeek(P); + + if (tokp) + *tokp = tok; + if (tok.t == t) { + lex(P); + return 1; + } + return 0; +} + +static struct tok +lexexpects(struct parser *P, int t, const char *what) { + struct tok tok; + if (!lexmatch(P, &tok, t)) + fatal(P, tok.span, "expected %s (near %s)", + what ? what : tokt2str(t), tok2str(tok)); + return tok; +} +#define lexexpect(P, t) lexexpects(P, t, NULL) + +/************/ +/** Parser **/ +/************/ + +static void +pushenv(struct parser *P, struct env *env) { + assert(P->curenv == env->parent); + P->curenv = env; +} + +static void +popenv(struct parser *P) { + P->curenv = P->curenv->parent; + assert(P->curenv); +} + +static void +putdecl(struct parser *P, struct span espan, const struct decl *decl) { + if (envfind(P->primenv, decl->name)) + fatal(P, espan, "cannot shadow primitive `%s'", decl->name); + if (!envput(P->curenv, decl)) + fatal(P, decl->span, + "cannot shadow earlier incompatible declaration of `%s'", decl->name); +} + +static const struct decl * +finddecl(struct parser *P, const char *name) { + const struct decl *decl; + if ((decl = envfind(P->primenv, name))) + return decl; + return envfind(P->curenv, name); +} + +static struct expr parseexpr(struct parser *P); + +static const struct type * +constify(const struct type *ty) { + struct type ty2 = *ty; + if (ty->konst) + return ty; + if (ty2.t != TYfn) + ty2.konst = 1; + return interntype(ty2); +} + +static const struct type * +unconstify(const struct type *ty) { + struct type ty2 = *ty; + if (!ty->konst) + return ty; + ty2.konst = 0; + return interntype(ty2); +} + +static const struct type * +constifychild(const struct type *ty) { + struct type ty2 = *ty; + const struct type *child = constify(ty->child); + if (child == ty->child) + return ty; + ty2.child = child; + return interntype(ty2); +} + + +static const struct type * typeof2(const struct type *, const struct type *); + +static const struct type * +parsetype(struct parser *P) { + struct tok tok; + if (lexmatch(P, &tok, '*')) { + return interntype((struct type) { + TYptr, + g_targ.ptrsize, + .child = parsetype(P) + }); + } else if (lexmatch(P, &tok, '[')) { + i64 length = -1; + if (!lexmatch(P, &tok, ']')) { + length = lexexpect(P, TKintlit).ilit.i; + assert(length >= 0); + lexexpect(P, ']'); + } + return interntype((struct type) { + TYarr, + g_targ.ptrsize, + .child = parsetype(P), + .length = length + }); + } else if (lexmatch(P, &tok, TKkw_const)) { + return constify(parsetype(P)); + } else if (lexmatch(P, &tok, TKkw_typeof)) { + const struct type *ty = NULL, *ty2; + lexexpect(P, '('); + do { + ty2 = parseexpr(P).ty; + ty = ty ? typeof2(ty, ty2) : ty2; + if (!ty) + fatal(P, tok.span, "incompatible types in typeof(...)"); + if (!lexmatch(P, &tok, ',')) { + lexexpect(P, ')'); + break; + } + } while (!lexmatch(P, &tok, ')')); + return ty; + } else if (lexmatch(P, &tok, TKident)) { + const struct decl *decl = finddecl(P, tok.str); + if (!decl) + fatal(P, P->tokspan, "%s is not defined", tok2str(tok)); + if (decl->t != Dtype) + fatal(P, P->tokspan, "%s is not a type", tok2str(tok)); + return decl->ty; + } + fatal(P, P->tokspan, "expected type (near %s)", tok2str(tok)); +} + +static const struct type * +ilittype(struct parser *P, u64 i) { + if (i <= INT_MAX) + return ty_int; + if (i <= INT32_MAX) + return ty_i32; + if (i <= INT64_MAX) + return ty_i64; + return ty_u64; +} + +static int +numtype2rank(const struct type *a) { + if (a->t == TYint) { + if (a->size < g_targ.intsize || a == ty_int) + return 0; + if (a == ty_uint) return 1; + if (a == ty_i32) return 2; + if (a == ty_u32) return 3; + if (a == ty_i64) return 4; + if (a == ty_u64) return 5; + } + if (a == ty_f32) return 6; + if (a == ty_f64) return 7; + return -1; +} + +static const struct type * +rank2numtype(int r) { + static const struct type **types[] = { + &ty_int, &ty_uint, &ty_i32, &ty_u32, + &ty_i64, &ty_u64, &ty_f32, &ty_f64, + }; + assert(r >= 0 && r < ARRAY_LENGTH(types)); + return *types[r]; +} + +static const struct type * +numpromote(const struct type *ty) { + int r = numtype2rank(ty); + if (r < 0) + return NULL; + return rank2numtype(r); +} + +static bool +isnumtype(const struct type *a) { + return a->t == TYint || a->t == TYfloat; +} + +static const struct type * +arraydecay(const struct type *ty) { + struct type ty2 = *ty; + + if (ty->t != TYarr) + return ty; + + ty2.t = TYptr; + return interntype(ty2); +} + +// peer type resolution +static const struct type * +typeof2(const struct type *a, const struct type *b) { + if (isnumtype(a) && isnumtype(b)) { + int ra = numtype2rank(a), rb = numtype2rank(b); + return rank2numtype(MAX(ra, rb)); + } + if (a == b) + return a; + if (a->t == TYptr && b->t == TYarr) + b = arraydecay(b); + if (a->t == TYarr && b->t == TYptr) + a = arraydecay(a); + if (a->t == TYptr && b->t == TYptr) { + bool akonst = a->child->konst, + bkonst = b->child->konst; + const struct type *uac = unconstify(a->child), + *ubc = unconstify(b->child); + if (uac == ubc) + return akonst ? a : b; + if (uac == ty_void) { + if (bkonst && !akonst) + return constifychild(a); + return a; + } + if (ubc == ty_void) { + if (akonst && !bkonst) + return constifychild(b); + return b; + } + if (uac == ty_u8) { + if (bkonst && !akonst) + return constifychild(a); + return a; + } + if (ubc == ty_u8) { + if (akonst && !bkonst) + return constifychild(b); + return b; + } + + } + return NULL; +} + +static bool +islvalue(const struct expr *ex) { + if (ex->t == Ename) + return 1; + if (ex->t == Eprefix && ex->unop.op == '*') + return 1; + return 0; +} + +/** Expression parsing **/ + +static struct expr * +exprdup(struct expr ex) { + return memcpy(xmalloc(sizeof ex), &ex, sizeof ex); +} + + +static struct blockstmt parseblock0(struct parser *P); +static struct stmt parseblock(struct parser *P); +static void parseexpandmacro(struct parser *P, const struct macro *macro); + +static struct expr +pexprimary(struct parser *P) { + struct expr ex = {0}; + struct tok tok; + + if (lexmatch(P, &tok, TKintlit) || lexmatch(P, &tok, TKchrlit)) { + ex.t = Eintlit; + ex.span = tok.span; + ex.i = tok.ilit.i; + ex.ty = tok.ilit.ty ? tok.ilit.ty : ilittype(P, ex.i); + } else if (lexmatch(P, &tok, TKflolit)) { + ex.t = Eflolit; + ex.span = tok.span; + ex.f = tok.flit.f; + ex.ty = tok.flit.ty ? tok.flit.ty : ty_f64; + } else if (lexmatch(P, &tok, TKstrlit)) { + ex.t = Estrlit; + ex.span = tok.span; + ex.strlit.d = tok.str; + ex.strlit.n = tok.strlen; + ex.ty = interntype((struct type) { + TYarr, tok.strlen + 1, 1, 0, + .child = constify(ty_u8), .length = tok.strlen + 1 + }); + } else if (lexmatch(P, &tok, TKboolit)) { + ex.t = Eboolit; + ex.span = tok.span; + ex.i = tok.boolit; + ex.ty = ty_bool; + } else if (lexmatch(P, &tok, TKnullit)) { + ex.t = Enullit; + ex.span = tok.span; + ex.i = tok.boolit; + ex.ty = interntype((struct type) { + TYptr, g_targ.ptrsize, .child = ty_void + }); + } else if (lexmatch(P, &tok, TKident)) { + const struct decl *decl; + + decl = finddecl(P, tok.str); + if (!decl) + fatal(P, tok.span, "%s is not defined", tok2str(tok)); + if (decl->t == Dtype) { + assert(0); + } else if (decl->t == Dmacro) { + parseexpandmacro(P, &decl->macro); + return parseexpr(P); + } else { + ex.t = Ename; + ex.span = tok.span; + ex.ref = decl; + if (decl->t == Dvar) { + if (decl->var.fnid != P->curfn->id) + fatal(P, tok.span, + "cannot access local variable `%s' belonging to outer function", + tok.str); + ex.ty = decl->var.ty; + } else if (decl->t == Dfn) { + ex.ty = decl->fn.selfty; + } else assert(0); + } + } else if (lexmatch(P, &tok, '(')) { + ex = parseexpr(P); + lexexpect(P, ')'); + } else { + fatal(P, tok.span, "expected expression (near %s)", tok2str(tok)); + } + return ex; +} + +static struct expr +pexpostfix(struct parser *P) { + struct expr ex = pexprimary(P); + struct tok tok; + + for (;;) if (lexmatch(P, &tok, '(')) { + vec_t(struct expr) args = {0}; + int i = 0, n = ex.ty->fn.params.n; + + if (ex.ty->t != TYfn) + fatal(P, ex.span, "callee is not a function"); + while (!lexmatch(P, NULL, ')')) { + struct expr arg = parseexpr(P); + + if (i == n && ! ex.ty->fn.variadic) + fatal(P, arg.span, "too many args for call"); + if (i < n && !typeof2(arg.ty, ex.ty->fn.params.d[i++])) + fatal(P, arg.span, "call argument #%d type mismatch", i); + + vec_push(&args, arg); + if (!lexmatch(P, NULL, ',')) { + lexexpect(P, ')'); + break; + } + } + ex.call.callee = exprdup(ex); + ex.t = Ecall; + ex.span = tok.span; + ex.ty = ex.call.callee->ty->fn.retty; + vec_slice_cpy(&ex.call.args, &args); + } else if (lexmatch(P, &tok, '[')) { + struct expr lhs = ex, + rhs = parseexpr(P); + lexexpect(P, ']'); + if (lhs.ty->t != TYarr && lhs.ty->t != TYptr) + fatal(P, lhs.span, "indexee is not array or pointer type"); + if (rhs.ty->t != TYint) + fatal(P, lhs.span, "index expression type is not integral"); + ex.t = Eindex; + ex.span = tok.span; + ex.ty = lhs.ty->child; + ex.index.lhs = exprdup(lhs); + ex.index.rhs = exprdup(rhs); + } else if (lexmatch(P, &tok, '++') || lexmatch(P, &tok, '--')) { + if (!isnumtype(ex.ty)) + fatal(P, ex.span, "invalid operand to unary operator %s: not numeric", + tok2str(tok)); + if (!islvalue(&ex)) + fatal(P, ex.span, "left operand to postfix %s operator is not lvalue", + tok2str(tok)); + if (ex.ty->konst) + fatal(P, ex.span, "left operand to postfix %s operator is const", + tok2str(tok)); + ex = (struct expr) { + Epostfix, tok.span, ex.ty, .unop = { + tok.t, exprdup(ex) + } + }; + } else { + break; + } + + return ex; +} + +static struct expr +pexprefix(struct parser *P) { + struct tok tok; + + if (lexmatch(P, &tok, '-') || lexmatch(P, &tok, '~')) { + struct expr ex = pexprefix(P); + const struct type *ty = numpromote(ex.ty); + + if (!ty) + fatal(P, ex.span, "invalid operand to unary operator %s: not numeric", + tok2str(tok)); + if (ty->t != TYint && tok.t == '~') + fatal(P, ex.span, "invalid operand to unary operator %s: not integral", + tok2str(tok)); + return (struct expr) { + Eprefix, tok.span, ty, .unop = { + tok.t, exprdup(ex) + } + }; + } else if (lexmatch(P, &tok, '++') || lexmatch(P, &tok, '--')) { + struct expr ex = pexprefix(P); + + if (!isnumtype(ex.ty)) + fatal(P, ex.span, "invalid operand to unary operator %s: not numeric", + tok2str(tok)); + if (!islvalue(&ex)) + fatal(P, ex.span, "left operand to prefix %s operator is not lvalue", + tok2str(tok)); + if (ex.ty->konst) + fatal(P, ex.span, "left operand to prefix %s operator is const", + tok2str(tok)); + return (struct expr) { + Eprefix, tok.span, ex.ty, .unop = { + tok.t, exprdup(ex) + } + }; + } else if (lexmatch(P, &tok, TKkw_not)) { + struct expr ex = pexprefix(P); + + if (ex.ty->t != TYbool) + fatal(P, ex.span, "invalid operand to unary operator %s: not boolean", + tok2str(tok)); + return (struct expr) { + Eprefix, tok.span, ty_bool, .unop = { + 'not', exprdup(ex) + } + }; + } else if (lexmatch(P, &tok, '*')) { + struct expr ex = pexprefix(P); + if (ex.ty->t != TYptr) + fatal(P, ex.span, "invalid operand type to dereference, not pointer"); + if (!completetype(ex.ty->child)) + fatal(P, ex.span, "invalid operand type to dereference, incomplete"); + return (struct expr) { + Eprefix, tok.span, ex.ty->child, .unop = { + '*', exprdup(ex) + } + }; + } else if (lexmatch(P, &tok, '&')) { + struct expr ex = pexprefix(P); + struct type ty2 = { TYptr, g_targ.ptrsize, .child = ex.ty}; + if (!islvalue(&ex)) + fatal(P, ex.span, "invalid operand type to address-of, not an lvalue"); + return (struct expr) { + Eprefix, tok.span, interntype(ty2), .unop = { + '&', exprdup(ex) + } + }; + } + + return pexpostfix(P); +} + +static bool +peeksbitarithop(struct parser *P, struct tok* tokp) { + struct tok tok = lexpeek(P); + + if (tokp) + *tokp = tok; + switch (tok.t) { + case '+': case '-': case '*': case '/': case '%': + return 1; + case '&': case '|': case '^': case '<<': case '>>': + return 2; + } + return 0; +} + +static struct expr +pexbitarith(struct parser *P) { + struct expr ex; + struct tok tok; + int tokt; + int oret; + + ex = pexprefix(P); + if (!(oret = peeksbitarithop(P, &tok))) + return ex; + tokt = tok.t; + + while (lexmatch(P, &tok, tokt)) { + struct expr rhs = pexprefix(P); + const struct type *ty = typeof2(ex.ty, rhs.ty); + if (!ty) { + if ((tokt == '+' || tokt == '-') + && ((ex.ty->t == TYptr && rhs.ty->t == TYint) + || (ex.ty->t == TYint && rhs.ty->t == TYptr))) + { + ty = ex.ty->t == TYptr ? ex.ty : rhs.ty; + } else { + goto err; + } + } else if (tokt == '-' && ex.ty->t == TYptr && rhs.ty->t == TYptr) { + ty = ty_isize; + } else if (!isnumtype(ty)) { + err: + fatal(P, tok.span, "invalid operands to binary operator %s", + tokt2str(tokt)); + } + if (oret == 2 && ty->t == TYfloat) + fatal(P, tok.span, "invalid operands to bitwise operator: not integral", + tokt2str(tokt)); + ex = (struct expr) { + Ebinop, tok.span, ty, .binop = { + tokt, exprdup(ex), exprdup(rhs) + } + }; + + } + + return ex; +} + +static bool +matchcmpop(struct parser *P, struct tok *tokp) { + struct tok tok = lexpeek(P); + + if (tokp) + *tokp = tok; + switch (tok.t) { + case '==': case '<=': case '>=': + case '<': case '>': + lex(P); + return 1; + } + return 0; +} + +static struct expr +pexcmp(struct parser *P) { + struct expr ex; + struct tok tok; + + ex = pexbitarith(P); + if (matchcmpop(P, &tok)) { + struct expr rhs = pexbitarith(P); + if (!typeof2(ex.ty, rhs.ty)) + fatal(P, tok.span, "incompatible operands to binary operator %s", + tok2str(tok)); + if (tok.t != '==' && !isnumtype(typeof2(ex.ty, rhs.ty))) + fatal(P, tok.span, "invalid operands to relational operator %s: not numeric", + tok2str(tok)); + ex = (struct expr) { + Ebinop, tok.span, ty_bool, .binop = { + tok.t, exprdup(ex), exprdup(rhs) + } + }; + } + return ex; +} + +static struct expr +pexlog(struct parser *P) { + struct expr ex; + struct tok tok; + int tokt; + + ex = pexcmp(P); + tok = lexpeek(P); + tokt = tok.t; + if (tokt != TKkw_and && tokt != TKkw_or) + return ex; + + while (lexmatch(P, &tok, tokt)) { + struct expr rhs = pexcmp(P); + if (ex.ty->t != TYbool || rhs.ty->t != TYbool) + fatal(P, tok.span, "invalid operands to binary operator %s: not boolean", + tokt2str(tok.t)); + ex = (struct expr) { + Ebinop, tok.span, ty_bool, .binop = { + tokt == TKkw_and ? 'and' : 'or', exprdup(ex), exprdup(rhs) + } + }; + + } + + return ex; +} + +static struct expr +pexcond(struct parser *P) { + struct expr ex; + struct tok tok; + + ex = pexlog(P); + if (lexmatch(P, &tok, '?')) { + struct expr ex2 = parseexpr(P); + struct expr ex3; + const struct type *ty; + + lexexpect(P, ':'); + ex3 = pexcond(P); + if (!(ty = typeof2(ex2.ty, ex3.ty))) + fatal(P, tok.span, "conditional operator branches have incompatible types"); + ex = (struct expr) { + Econd, tok.span, ty, .cond = { + exprdup(ex), exprdup(ex2), exprdup(ex3) + } + }; + } + return ex; +} + +static bool +matchassignop(struct parser *P, struct tok *tokp) { + struct tok tok = lexpeek(P); + if (tokp) + *tokp = tok; + switch (tok.t) { + case '=': + lex(P); + return 1; + case '+=': case '-=': case '*=': case '/=': case '%=': + lex(P); + return 2; + case '&=': case '|=': case '^=': case '<<=': case '>>=': + lex(P); + return 3; + } + return 0; +} + +static struct expr +pexassign(struct parser *P) { + struct expr ex; + struct tok tok; + int oret; + + ex = pexcond(P); + if ((oret = matchassignop(P, &tok))) { + struct expr rhs = pexcond(P); + if (!islvalue(&ex)) + fatal(P, ex.span, + "left operand to assignment operator %s is not an lvalue", + tok2str(tok)); + if (!typeof2(ex.ty, rhs.ty)) + fatal(P, ex.span, + "operands to assignment operator %s have incompatible types", + tok2str(tok)); + if (ex.ty->konst) + fatal(P, ex.span, + "left operand to assignment operator %s is const", + tok2str(tok)); + ex = (struct expr) { + Ebinop, tok.span, ex.ty, .binop = { + tok.t, exprdup(ex), exprdup(rhs) + } + }; + } + + return ex; +} + +static struct expr +parseexpr(struct parser *P) { + return pexassign(P); +} + +/** Statements and declarations parsing **/ + +static struct stmt * +stmtdup(struct stmt st) { + return memcpy(xmalloc(sizeof st), &st, sizeof st); +} + +static struct stmt +pstlet(struct parser *P) { + struct stmt st = {0}; + struct tok tok; + const char *name; + const struct type *ty = NULL; + struct expr *ini = NULL; + bool konst = 0; + + if (lexmatch(P, NULL, TKkw_const)) + konst = 1; + + tok = lexexpect(P, TKident); + name = tok.str; + + if (lexmatch(P, NULL, '=')) { + ini = exprdup(parseexpr(P)); + ty = ini->ty; + } else { + ty = parsetype(P); + lexexpect(P, '='); + ini = exprdup(parseexpr(P)); + } + if (!typeof2(ty, ini->ty)) + fatal(P, tok.span, "incompatible initializer type"); + + if (!completetype(ty)) + fatal(P, tok.span, "let `%s': variable type is incomplete", name); + + if (konst) + ty = constify(ty); + + assert(ty); + st.t = Sdecl; + st.decl = (struct decl) { + Dvar, name, .var = { + .ty = ty, + .ini = ini, + .fnid = P->curfn->id, + } + }; + putdecl(P, tok.span, &st.decl); + return st; +} + +static struct stmt +pstifelse(struct parser *P) { + struct stmt st = {Sifelse}; + + st.ifelse.test = parseexpr(P); + if (st.ifelse.test.ty->t != TYbool && st.ifelse.test.ty->t != TYptr) + fatal(P, st.ifelse.test.span, + "if statement condition must be bool or pointer"); + lexexpect(P, '{'); + st.ifelse.t = parseblock(P).block; + if (lexmatch(P, NULL, TKkw_else)) { + if (lexmatch(P, NULL, TKkw_if)) + st.ifelse.f = stmtdup(pstifelse(P)); + else { + lexexpect(P, '{'); + st.ifelse.f = stmtdup(parseblock(P)); + } + } + + return st; +} + +static struct stmt +pstfor(struct parser *P) { + struct stmt st = {Sfor}; + struct tok tok; + + if (lexmatch(P, NULL, TKkw_let)) { + st.loop.ini = stmtdup(pstlet(P)); + lexexpect(P, ';'); + } else if (lexmatch(P, NULL, ';')) { + // pass + } else { + st.loop.ini = stmtdup((struct stmt) { + Sexpr, .expr = parseexpr(P) + }); + lexexpect(P, ';'); + } + + if (lexmatch(P, &tok, ';')) { + // for (...; ; ...;) -> for (...; #t; ...;) + st.loop.test = (struct expr) { Eboolit, tok.span, ty_bool, .i = 1 }; + } else { + st.loop.test = parseexpr(P); + lexexpect(P, ';'); + } + + if (st.loop.test.ty->t != TYbool && st.loop.test.ty->t != TYptr) + fatal(P, st.loop.test.span, + "for statement condition must be bool or pointer"); + + if (!lexmatch(P, &tok, '{')) { + st.loop.next = exprdup(parseexpr(P)); + lexexpect(P, '{'); + } + st.loop.body = parseblock(P).block; + return st; +} + +static struct stmt parsestmt(struct parser *P); + +static struct stmt +pstiswitch(struct parser *P, const struct expr *test) { + struct stmt st = {Siswitch}; + struct tok tok; + vec_t(struct iswitchcase) cs = {0}; + struct blockstmt *f = NULL; + + lexexpect(P, '{'); + while (!lexmatch(P, &tok, '}')) { + struct iswitchcase c; + vec_t(struct expr) es = {0}; + + lexexpect(P, TKkw_case); + + if (lexmatch(P, &tok, TKkw_else)) { + if (f) + fatal(P, tok.span, "duplicate 'case else' block"); + f = malloc(sizeof *f); + *f = parseblock0(P); + } else { + do { + struct expr ex = parseexpr(P); + assert(ex.ty->t == TYint); + vec_push(&es, ex); + if (!lexmatch(P, &tok, ',')) { + lexexpect(P, TKkw_do); + break; + } + } while (!lexmatch(P, &tok, TKkw_do)) ; + vec_slice_cpy(&c.es, &es); + c.t = parseblock0(P); + vec_push(&cs, c); + } + } + + st.iswitch.test = *test; + vec_slice_cpy(&st.iswitch.cs, &cs); + st.iswitch.f = f; + return st; +} + +static struct stmt +pstswitch(struct parser *P) { + struct tok tok; + struct expr test; + + if (!lexmatch(P, &tok, '{')) { + test = parseexpr(P); + if (test.ty->t == TYint) + return pstiswitch(P, &test); + else + assert(0 && "NYI"); + } else { + assert(0 && "NYI"); + } +} + +static void parsedecl(struct decl *, struct parser *, bool toplevel); + +static bool +isdecltokt(int tokt) { + switch (tokt) + case TKkw_extern: case TKkw_fn: case TKkw_typedef: case TKkw_defmacro: + return 1; + return 0; +} + +static void +mergetoktrees(struct toktree *dst, int n) { + size_t total = 0; + for (int i = 0; i < n; ++i) { + total += dst[i].n; + if (i > 0) + ++total; + } + dst->d = xrealloc(dst->d, total * sizeof (struct tok)); + for (int i = 1; i < n; ++i) { + dst->d[dst->n++] = (struct tok) { ',' }; + memcpy(dst->d + dst->n, dst[i].d, dst[i].n * sizeof (struct tok)); + dst->n += dst[i].n; + } + assert(dst->n == total); +} + +static void +parseexpandmacro(struct parser *P, const struct macro *macro) { + vec_t(struct toktree) args = {0}; + struct tok tok; + struct expan expan = {0}; + struct expanarg *expanargs; + struct macrocase c; + + expan.span = lexpeek(P).span; + lexexpect(P, '('); + while (!lexmatch(P, &tok, ')')) { + int pabal = 0, // ( ) parens balance + bkbal = 0, // [ ] brackets .. + bcbal = 0; // { } braces .. + struct span espan = tok.span; + vec_t(struct tok) toks = {0}; + struct toktree ttoks; + + for (;;) { + tok = lex(P); + switch (tok.t) { + case '[': ++bkbal; break; + case ']': --bkbal; break; + case '{': ++bcbal; break; + case '}': --bcbal; break; + case '(': ++pabal; break; + case ')': + if (--pabal >= 0) + break; + goto arg_done; + case ',': + if (!pabal && !bkbal && !bcbal) + goto arg_done; + break; + case TKeof: + fatal(P, espan, "unterminated macro invokation"); + } + vec_push(&toks, tok); + } + arg_done: + + vec_slice_cpy(&ttoks, &toks); + vec_push(&args, ttoks); + + if (tok.t != ',') { + assert(tok.t == ')'); + break; + } + } + + for (int i = 0; i < macro->cs.n; ++i) { + c = macro->cs.d[i]; + if (!c.variadic && args.length == c.params.n) { + goto ok; + } else if (c.variadic && args.length >= c.params.n - 1) { + int n = args.length - (c.params.n - 1); + if (n == 0) { + vec_push(&args, (struct toktree){0}); + } else { + mergetoktrees(args.data + c.params.n - 1, n); + } + goto ok; + } + } + fatal(P, tok.span, "no match for invoking macro `%s' with %d arguments", + macro->name, args.length); +ok: + expanargs = xcalloc(c.params.n, sizeof *expanargs); + for (int i = 0; i < c.params.n; ++i) { + expanargs[i].name = c.params.d[i]; + expanargs[i].toks = args.data[i]; + } + expan.prev = P->curexpan; + expan.name = macro->name; + expan.args.d = expanargs; + expan.args.n = c.params.n; + expan.toks = c.body; + ++P->expanno; + P->curexpan = memcpy(xmalloc(sizeof expan), &expan, sizeof expan); +} + +static struct stmt +parsestmt(struct parser *P) { + struct stmt st = {0}; + struct tok tok; + + if (lexmatch(P, NULL, '{')) { + return parseblock(P); + } else if (lexmatch(P, NULL, TKkw_let)) { + st = pstlet(P); + lexexpect(P, ';'); + } else if (lexmatch(P, NULL, TKkw_if)) { + st = pstifelse(P); + } else if (lexmatch(P, NULL, TKkw_while)) { + st.t = Swhile; + st.loop.test = parseexpr(P); + if (st.loop.test.ty->t != TYbool && st.loop.test.ty->t != TYptr) + fatal(P, st.loop.test.span, + "while statement condition must be bool or pointer"); + lexexpect(P, '{'); + st.loop.body = parseblock(P).block; + } else if (lexmatch(P, NULL, TKkw_for)) { + st = pstfor(P); + } else if (lexmatch(P, NULL, TKkw_switch)) { + st = pstswitch(P); + } else if (lexmatch(P, &tok, TKkw_return)) { + st.t = Sreturn; + if (!lexmatch(P, NULL, ';')) { + st.retex = exprdup(parseexpr(P)); + lexexpect(P, ';'); + if (P->curfn->retty == ty_void) + fatal(P, st.retex->span, + "return statement in void function must not return a value"); + } else if (P->curfn->retty != ty_void) { + fatal(P, tok.span, + "return statement in non-void function must return a value"); + } + } else if (isdecltokt((tok = lexpeek(P)).t)) { + st.t = Sdecl; + parsedecl(&st.decl, P, 0); + if (st.decl.t == Dfn && !st.decl.externp && !st.decl.fn.body) + fatal(P, tok.span, + "cannot forward-declare non-external function inside another function"); + + if (st.decl.t == Dfn && st.decl.externp && st.decl.fn.body) + fatal(P, tok.span, + "cannot define external function inside another function"); + } else { + struct expr ex; + + if ((tok = lexpeek(P)).t == TKident) { + const struct decl *decl; + if ((decl = finddecl(P, tok.str)) && decl->t == Dmacro) { + lex(P); + parseexpandmacro(P, &decl->macro); + return parsestmt(P); + } + } + + ex = parseexpr(P); + + lexexpect(P, ';'); + st.t = Sexpr; + st.expr = ex; + } + return st; +} + +static struct blockstmt +parseblock0(struct parser *P) { + vec_t(struct stmt) stmts = {0}; + struct blockstmt block = {0}; + int tokt; + + block.env.parent = P->curenv; + pushenv(P, &block.env); + while ((tokt = lexpeek(P).t) != '}' && tokt != TKkw_case && tokt != ')') { + if (lexmatch(P, NULL, ';')) + continue; + vec_push(&stmts, parsestmt(P)); + } + + vec_slice_cpy(&block.stmts, &stmts); + + popenv(P); + return block; +} + +static struct stmt +parseblock(struct parser *P) { + struct stmt st = { Sblock }; + st.block = parseblock0(P); + lexexpect(P, '}'); + return st; +} + +static const struct type * +fntype(const struct fn *fn) { + struct type ty = { TYfn, 0, g_targ.sizesize }; + vec_t(const struct type *) params = {0}; + ty.fn.retty = fn->retty; + for (int i = 0; i < fn->params.n; ++i) + vec_push(¶ms, fn->params.d[i].ty); + vec_slice_cpy(&ty.fn.params, ¶ms); + ty.fn.variadic = fn->variadic; + return interntype(ty); +} + +static void +parsefn(struct decl *decl, struct parser *P) { + vec_t(struct fnparam) params = {0}; + struct tok tok; + struct fn *fn = &decl->fn; + const char *name = fn->name; + memset(fn, 0, sizeof *fn); + fn->name = name; + + lexexpect(P, '('); + while (!lexmatch(P, NULL, ')')) { + struct fnparam param; + if (lexmatch(P, NULL, '...')) { + fn->variadic = 1; + } else { + param.name = lexexpects(P, TKident, "parameter name").str; + param.ty = parsetype(P); + vec_push(¶ms, param); + } + if (!lexmatch(P, NULL, ',')) { + lexexpect(P, ')'); + break; + } else if (fn->variadic) { + lexexpect(P, ')'); + break; + } + } + vec_slice_cpy(&fn->params, ¶ms); + fn->retty = unconstify(parsetype(P)); + fn->selfty = fntype(fn); + if (!lexmatch(P, &tok, ';')) { + struct env *env = xcalloc(1, sizeof *env); + static int id; + fn->id = id++; + lexexpect(P, '{'); + env->parent = P->curenv; + + WITH_TMPCHANGE(struct fn *, P->curfn, fn) { + pushenv(P, env); + putdecl(P, tok.span, &(struct decl) { + Dfn, fn->name, .fn = *fn, ._cname = decl->_cname + }); + for (int i = 0; i < params.length; ++i) { + struct decl decl = { + Dvar, params.data[i].name, .var = { + .ty = params.data[i].ty, + .fnid = fn->id, + } + }; + putdecl(P, tok.span, &decl); + } + fn->body = xcalloc(1, sizeof *fn->body); + *fn->body = parseblock(P); + popenv(P); + } + } +} + +static struct macrocase +parsemacrocase(struct parser *P) { + struct macrocase c = {0}; + vec_t(const char *) params = {0}; + vec_t(struct tok) body = {0}; + struct tok tok; + int pabal = 0, // ( ) parens balance + bkbal = 1, // [ ] brackets .. + bcbal = 0; // { } braces .. + struct span espan; + static int gensymid; + struct gensyms { + struct gensyms *next; + const char *name; + struct tok tok; + } *gensyms = NULL; + + lexexpect(P, '('); + while (!lexmatch(P, &tok, ')')) { + if (lexmatch(P, &tok, '...')) { + vec_push(¶ms, lexexpects(P, TKident, "parameter name").str); + c.variadic = 1; + lexmatch(P, &tok, ','); + lexexpect(P, ')'); + break; + } else { + vec_push(¶ms, lexexpects(P, TKident, "parameter name").str); + if (!lexmatch(P, &tok, ',')) { + lexexpect(P, ')'); + break; + } + } + } + + espan = tok.span; + + lexexpect(P, '['); + while (bkbal || pabal || bcbal) { + tok = lex(P); + switch (tok.t) { + case '(': ++pabal; break; + case ')': --pabal; break; + case '[': ++bkbal; break; + case ']': --bkbal; break; + case '{': ++bcbal; break; + case '}': --bcbal; break; + case TKeof: + fatal(P, espan, "unterminated macro definition"); + } + if (tok.t == TKgensym) { + struct gensyms *gs; + for (gs = gensyms; gs; gs = gs->next) { + if (!strcmp(gs->name, tok.str)) { + tok = gs->tok; + goto found; + } + } + gs = xcalloc(1, sizeof *gs); + gs->name = tok.str; + gs->tok.t = TKident; + gs->tok.span = tok.span; + gs->tok.str = xmalloc(strlen(tok.str) + 24); + sprintf((char *)gs->tok.str, "__fG%s_%d", tok.str, gensymid++); + tok = gs->tok; + gs->next = gensyms; + gensyms = gs; + } + found: + vec_push(&body, tok); + } + (void)vec_pop(&body); + + vec_slice_cpy(&c.params, ¶ms); + vec_slice_cpy(&c.body, &body); + return c; +} + +static struct macro +parsemacro(struct parser *P) { + struct tok tok; + struct macro macro; + vec_t(struct macrocase) cs = {0}; + + if (lexpeek(P).t == '(') { + vec_push(&cs, parsemacrocase(P)); + } else { + lexexpect(P, '{'); + while (!lexmatch(P, &tok, '}')) { + vec_push(&cs, parsemacrocase(P)); + if (!lexmatch(P, &tok, ',')) { + lexexpect(P, '}'); + break; + } + } + } + + if (cs.length == 0) + fatal(P, tok.span, "macro must have at least one case"); + + vec_slice_cpy(¯o.cs, &cs); + return macro; +} + +static void +parsedecl(struct decl *decl, struct parser *P, bool toplevel) { + struct tok tok = { .span = P->tokspan }; + bool externp = 0; + memset(decl, 0, sizeof *decl); + + if (lexmatch(P, &tok, TKkw_extern)) { + externp = 1; + } + + if (lexmatch(P, &tok, TKkw_fn)) { + const char *name = lexexpects(P, TKident, "function name").str; + + decl->t = Dfn; + decl->name = decl->fn.name = name; + decl->_cname = xcalloc(1, sizeof (char *)); + parsefn(decl, P); + decl->externp = externp; + } else if (lexmatch(P, &tok, TKkw_typedef)) { + decl->t = Dtype; + decl->name = lexexpects(P, TKident, "typedef name").str; + decl->ty = parsetype(P); + decl->_cname = xcalloc(1, sizeof(char *)); + lexexpect(P, ';'); + } else if (lexmatch(P, &tok, TKkw_defmacro)) { + const char *name = lexexpects(P, TKident, "macro name").str; + decl->t = Dmacro; + decl->name = name; + decl->macro = parsemacro(P); + decl->macro.name = name; + + } else { + fatal(P, tok.span, "expected declaration (near %s)", + tok2str(tok)); + } + + decl->span = tok.span; + putdecl(P, tok.span, decl); +} + +void +parse(struct transunit *tu, struct parser *P) { + vec_t(struct decl) decls = {0}; + int i = 0; + while (!P->eof && lexpeek(P).t != TKeof) { + vec_push(&decls, (struct decl){0}); + parsedecl(&decls.data[i++], P, 1); + } + vec_slice_cpy(&tu->decls, &decls); +} + +void +initparser(struct parser *P, const char *fname) { + assert(NUM_LEXTOKENS - 1 < '!'); + memset(P, 0, sizeof *P); + P->curfile = fname; + if (!(P->fp = fopen(fname, "r"))) + perror(fname), exit(1); + P->tokspan = P->curspan = (struct span) { addfilepath(fname), 0, 1, 1 }; + P->peekchr = -1; + P->curenv = P->tlenv = xcalloc(1, sizeof *P->tlenv); + P->primenv = xcalloc(1, sizeof *P->primenv); + putprimtypes(P->primenv); +} |