#include "all.h" /***********/ /** Lexer **/ /***********/ static u8 chr(struct parser *P) { int c; if (P->peekchr >= 0) { c = P->peekchr; P->peekchr = -1; } else { c = fgetc(P->fp); } P->curspan.idx++; P->curspan.col++; if (c == '\n') { P->curspan.col = 1; P->curspan.line++; } if (c == EOF || !c) { P->eof = 1; return 0; } return c; } static u8 chrpeek(struct parser *P) { int c; if (P->peekchr >= 0) return P->peekchr; c = fgetc(P->fp); return P->peekchr = c == EOF ? 0 : (u8)c; } static bool chrmatch(struct parser *P, u8 c) { if (chrpeek(P) == c) { chr(P); return 1; } return 0; } static bool aisspace(char c) { switch (c) case ' ': case '\t': case '\n': case '\r': case '\v': case '\f': return 1; return 0; } static bool aisdigit(char c) { return c >= '0' && c <= '9'; } static bool aisxdigit(char c) { return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f') ; } static bool aisalpha(char c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); } static bool aissep(char c) { if (aisspace(c)) return 1; switch (c) case '(': case ')': case '[': case ']': case '{': case '}': case '.': case ',': case ';': case '?': case '+': case '-': case '*': case '/': case '&': case '|': case '^': case '~': case '=': case '\'': case '"': case '<': case '>': return 1; return 0; } static void eatspaces(struct parser *P) { for (;; chr(P)) { if (aisspace(chrpeek(P))) continue; else if (chrpeek(P) == '!') { while (chrpeek(P) != '\n' && chrpeek(P) != 0) chr(P); } else { break; } } } static int readtilsep(struct parser *P, char *buf, int n, bool dot) { int i = 0; u8 c; while (!aissep((c = chrpeek(P))) || (dot && c == '.')) { chr(P); if (i >= n - 1) return -1; buf[i++] = c; } buf[i++] = 0; return i; } const char *keyword2str[] = { #define KWSTR(kw) #kw, LIST_KEYWORDS(KWSTR) #undef KWSTR }; // #define NUM_KEYWORDS (sizeof keyword2str / sizeof *keyword2str) static int str2keyword(const char *s) { int i = 0, j = NUM_KEYWORDS - 1; while (i <= j) { int k = (j + i) / 2; int cmp = strcmp(keyword2str[k], s); if (!cmp) { return k; } else if (cmp < 0) { i = k + 1; } else { j = k - 1; } } return -1; } static bool str2number(struct tok *res, const char *s) { char c; u64 acc = 0; double accf = 0.0; double fmul = 0.1; int base = 10; bool flt = 0; int nused = 0; for (int i = 0; (c = s[i]); ++i) { extern int tolower(int); if (i == 0 && c == '0') --nused; if (i == 1 && tolower(c) == 'x') { base = 16; continue; } if (!flt && c == '.' && base == 10) { flt = 1; accf = acc; continue; } if (nused > 0 && c == '_') continue; if (base == 16 && !aisxdigit(c)) return 0; else if (base != 16 && (c < '0' || c > '0' + base - 1)) return 0; ++nused; if (flt) { accf = accf + (c - '0') * fmul; fmul *= 0.1; } else { c = tolower(c); acc = acc * base + (c <= '9' ? c - '0' : c - 'a' + 10); } } memset(res, 0, sizeof *res); if (flt) { res->t = TKflolit; res->flit.f = accf; } else { res->t = TKintlit; res->ilit.i = acc; } return 1; } static size_t readstrlit(struct parser *P, u8 delim, char *s, size_t n) { u8 c; size_t i = 0; while ((c = chr(P)) != delim) { if (!c || c == '\n') fatal(P, P->tokspan, "unterminated %s literal", delim == '"' ? "string" : "character"); if (i == n - 1) assert(0 && "too long string/charliteral"); if (c != '\\') s[i++] = c; else switch ((c = chr(P))) { case 0: case '\n': fatal(P, P->tokspan, "unterminated %s literal", delim == '"' ? "string" : "character"); case '\'': s[i++] = '\''; break; case '"': s[i++] = '"'; break; case 'n': s[i++] = '\n'; break; case 'r': s[i++] = '\r'; break; case 't': s[i++] = '\t'; break; case 'v': s[i++] = '\v'; break; case 'f': s[i++] = '\f'; break; case '0': s[i++] = '\0'; break; default: fatal(P, P->tokspan, "unknown escape sequence '\\%c'", c); } } s[i++] = 0; return i - 1; } #define MAX_MACROEXPAND_ITER 100 /* // guard used at macro arg expansion to avoid it expanding into itself, e.g.: // defmacro foo(x,y) [(x+y)] // let x, y ...; // foo(x,y) // would cause infinite recursion otherwise static bool ismacrodupe(struct parser *P, struct toktree toks) { for (struct expan *ep = P->curexpan; ep; ep = ep->prev) { if (ep->toks.d == toks.d) return 1; } return 0; } */ static struct tok lex(struct parser *P) { u8 c; struct tok tok; if (P->have_peektok) { tok = P->peektok; P->have_peektok = 0; return tok; } if (P->expanno > MAX_MACROEXPAND_ITER) { fatal(P, P->curspan, "maximum number of nested macro expansions reached (infinite recursion?)"); } if (P->curexpan) { struct expan *ep = P->curexpan; if (ep->idx == ep->toks.n) { // end of macro expansion --P->expanno; free(ep->args.d); P->curexpan = ep->prev; free(ep); return lex(P); } tok = ep->toks.d[ep->idx++]; // expand macro arg? if (tok.t == TKmacident) { for (struct expan *ep = P->curexpan; ep; ep = ep->prev) { for (int i = 0; i < ep->args.n; ++i) { if (!strcmp(tok.str, ep->args.d[i].name)) { ++P->expanno; P->curexpan = memcpy(xmalloc(sizeof *ep), &(struct expan) { P->curexpan, {0}, ep->args.d[i].toks, }, sizeof *ep); return lex(P); } } } } return tok; } eatspaces(P); tok.span = P->tokspan = P->curspan; if (aisdigit((c = chrpeek(P)))) { char s[80]; if (readtilsep(P, s, sizeof s, 1) < 0) fatal(P, P->tokspan, "number literal too long"); if (!str2number(&tok, s)) fatal(P, P->tokspan, "invalid number literal"); tok.span = P->tokspan; return tok; } else if (aisalpha(c) || c == '_' || c == '#' || c == '$') { char s[100]; int t; if (readtilsep(P, s, sizeof s, 0) < 0) fatal(P, P->tokspan, "identifier too long"); if ((t = str2keyword(s)) >= 0) { tok.t = t; } else if (!strcmp(s, "#")) { tok.t = '#'; } else if (!strcmp(s, "#t") || !strcmp(s, "#f")) { tok.t = TKboolit; tok.boolit = s[1] == 't'; } else if (!strcmp(s, "#null")) { tok.t = TKnullit; } else if (!strcmp(s, "##")) { tok.t = '##'; } else if (c == '#') { fatal(P, P->tokspan, "identifier cannot start with '#'"); } else if (c == '$') { tok.t = TKgensym; tok.str = xstrdup(s + 1); } else { tok.t = TKident; tok.str = xstrdup(s); } return tok; } else if (c == '\'' || c == '"') { u8 delim = c; char s[1000]; size_t n; chr(P); tok.span = P->tokspan = P->curspan; n = readstrlit(P, delim, s, sizeof s); if (delim == '"') { tok.t = TKstrlit; tok.str = memcpy(xmalloc(n + 1), s, n + 1); tok.strlen = n; } else { if (n == 0) fatal(P, P->tokspan, "empty char literal"); if (n > 8) fatal(P, P->tokspan, "too long multichar literal '%s'", s); tok.t = TKchrlit; tok.ilit.i = 0; tok.ilit.ty = NULL; for (int j = 0; j < n; ++j) tok.ilit.i = (tok.ilit.i << 8) | (u8)s[j]; } return tok; } switch ((c = chr(P))) { case '(': case ')': case '[': case ']': case '{': case '}': case ',': case ';': case '?': case '~': tok.t = c; return tok; case '.': if (chrmatch(P, '.')) { if (chr(P) != '.') fatal(P, P->tokspan, "invalid token '..'"); tok.t = '...'; } else { tok.t = c; } return tok; case '*': if (chrmatch(P, '=')) tok.t = '*='; else tok.t = '*'; return tok; case '/': if (chrmatch(P, '=')) tok.t = '/='; else tok.t = '/'; return tok; case '%': if (chrmatch(P, '=')) tok.t = '%='; else tok.t = '%'; return tok; case '+': if (chrmatch(P, '=')) tok.t = '+='; else if (chrmatch(P, '+')) tok.t = '++'; else tok.t = '+'; return tok; case '-': if (chrmatch(P, '=')) tok.t = '-='; else if (chrmatch(P, '-')) tok.t = '--'; else tok.t = '-'; return tok; case '&': if (chrmatch(P, '=')) tok.t = '&='; else tok.t = '&'; return tok; case '|': if (chrmatch(P, '=')) tok.t = '|='; else tok.t = '|'; return tok; case '^': if (chrmatch(P, '=')) tok.t = '^='; else tok.t = '^'; return tok; case ':': if (chrmatch(P, '=')) tok.t = ':='; else tok.t = ':'; return tok; case '=': if (chrmatch(P, '=')) tok.t = '=='; else tok.t = '='; return tok; case '!': if (chrmatch(P, '=')) tok.t = '!='; else tok.t = '!'; return tok; case '<': if (chrmatch(P, '=')) tok.t = '<='; else if (chrmatch(P, '<')) { if (chrmatch(P, '=')) tok.t = '<<='; else tok.t = '<<'; } else tok.t = '<'; return tok; case '>': if (chrmatch(P, '=')) tok.t = '>='; else if (chrmatch(P, '>')) { if (chrmatch(P, '=')) tok.t = '>>='; else tok.t = '>>'; } else tok.t = '>'; return tok; case 0: tok.t = TKeof; return tok; } fatal(P, P->tokspan, "unexpected character '%c'", c); } static struct tok lexpeek(struct parser *P) { struct tok tok; if (P->have_peektok) return P->peektok; tok = lex(P); P->peektok = tok; P->have_peektok = 1; return tok; } static bool lexmatch(struct parser *P, struct tok *tokp, int t) { struct tok tok = lexpeek(P); if (tokp) *tokp = tok; if (tok.t == t) { lex(P); return 1; } return 0; } static struct tok lexexpects(struct parser *P, int t, const char *what) { struct tok tok; if (!lexmatch(P, &tok, t)) fatal(P, tok.span, "expected %s (near %s)", what ? what : tokt2str(t), tok2str(tok)); return tok; } #define lexexpect(P, t) lexexpects(P, t, NULL) /************/ /** Parser **/ /************/ static void pushenv(struct parser *P, struct env *env) { assert(P->curenv == env->parent); P->curenv = env; } static void popenv(struct parser *P) { P->curenv = P->curenv->parent; assert(P->curenv); } static void putdecl(struct parser *P, struct span espan, const struct decl *decl) { if (envfind(P->primenv, decl->name)) fatal(P, espan, "cannot shadow primitive `%s'", decl->name); if (!envput(P->curenv, decl)) fatal(P, decl->span, "cannot shadow earlier incompatible declaration of `%s'", decl->name); } static const struct decl * finddecl(struct parser *P, const char *name) { const struct decl *decl; if ((decl = envfind(P->primenv, name))) return decl; return envfind(P->curenv, name); } static struct expr parseexpr(struct parser *P); static const struct type * constify(const struct type *ty) { struct type ty2 = *ty; if (ty->konst) return ty; if (ty2.t != TYfn) ty2.konst = 1; return interntype(ty2); } static const struct type * unconstify(const struct type *ty) { struct type ty2 = *ty; if (!ty->konst) return ty; ty2.konst = 0; return interntype(ty2); } static const struct type * constifychild(const struct type *ty) { struct type ty2 = *ty; const struct type *child = constify(ty->child); if (child == ty->child) return ty; ty2.child = child; return interntype(ty2); } static const struct type * typeof2(const struct type *, const struct type *); static const struct type * parsetype(struct parser *P) { struct tok tok; if (lexmatch(P, &tok, '*')) { return interntype((struct type) { TYptr, g_targ.ptrsize, .child = parsetype(P) }); } else if (lexmatch(P, &tok, '[')) { i64 length = -1; if (!lexmatch(P, &tok, ']')) { length = lexexpect(P, TKintlit).ilit.i; assert(length >= 0); lexexpect(P, ']'); } return interntype((struct type) { TYarr, g_targ.ptrsize, .child = parsetype(P), .length = length }); } else if (lexmatch(P, &tok, TKkw_const)) { return constify(parsetype(P)); } else if (lexmatch(P, &tok, TKkw_typeof)) { const struct type *ty = NULL, *ty2; lexexpect(P, '('); do { ty2 = parseexpr(P).ty; ty = ty ? typeof2(ty, ty2) : ty2; if (!ty) fatal(P, tok.span, "incompatible types in typeof(...)"); if (!lexmatch(P, &tok, ',')) { lexexpect(P, ')'); break; } } while (!lexmatch(P, &tok, ')')); return ty; } else if (lexmatch(P, &tok, TKident)) { const struct decl *decl = finddecl(P, tok.str); if (!decl) fatal(P, P->tokspan, "%s is not defined", tok2str(tok)); if (decl->t != Dtype) fatal(P, P->tokspan, "%s is not a type", tok2str(tok)); return decl->ty; } fatal(P, P->tokspan, "expected type (near %s)", tok2str(tok)); } static const struct type * ilittype(struct parser *P, u64 i) { if (i <= INT_MAX) return ty_int; if (i <= INT32_MAX) return ty_i32; if (i <= INT64_MAX) return ty_i64; return ty_u64; } static int numtype2rank(const struct type *a) { if (a->t == TYint) { if (a->size < g_targ.intsize || a == ty_int) return 0; if (a == ty_uint) return 1; if (a == ty_i32) return 2; if (a == ty_u32) return 3; if (a == ty_i64) return 4; if (a == ty_u64) return 5; } if (a == ty_f32) return 6; if (a == ty_f64) return 7; return -1; } static const struct type * rank2numtype(int r) { static const struct type **types[] = { &ty_int, &ty_uint, &ty_i32, &ty_u32, &ty_i64, &ty_u64, &ty_f32, &ty_f64, }; assert(r >= 0 && r < ARRAY_LENGTH(types)); return *types[r]; } static const struct type * numpromote(const struct type *ty) { int r = numtype2rank(ty); if (r < 0) return NULL; return rank2numtype(r); } static bool isnumtype(const struct type *a) { return a->t == TYint || a->t == TYfloat; } static const struct type * arraydecay(const struct type *ty) { struct type ty2 = *ty; if (ty->t != TYarr) return ty; ty2.t = TYptr; return interntype(ty2); } // peer type resolution static const struct type * typeof2(const struct type *a, const struct type *b) { if (isnumtype(a) && isnumtype(b)) { int ra = numtype2rank(a), rb = numtype2rank(b); return rank2numtype(MAX(ra, rb)); } if (a == b) return a; if (unconstify(a) == b) return a; if (a == unconstify(b)) return b; if (a->t == TYarr && b->t == TYarr) a = arraydecay(a); if (a->t == TYptr && b->t == TYarr) b = arraydecay(b); if (a->t == TYarr && b->t == TYptr) a = arraydecay(a); if (a->t == TYptr && b->t == TYptr) { bool akonst = a->child->konst, bkonst = b->child->konst; const struct type *uac = unconstify(a->child), *ubc = unconstify(b->child); if (uac == ubc) return akonst ? a : b; if (uac == ty_void) { if (bkonst && !akonst) return constifychild(a); return a; } if (ubc == ty_void) { if (akonst && !bkonst) return constifychild(b); return b; } if (uac == ty_u8) { if (bkonst && !akonst) return constifychild(a); return a; } if (ubc == ty_u8) { if (akonst && !bkonst) return constifychild(b); return b; } } return NULL; } static bool islvalue(const struct expr *ex) { if (ex->t == Ename) return 1; if (ex->t == Eprefix && ex->unop.op == '*') return 1; return 0; } /** Expression parsing **/ static struct expr * exprdup(struct expr ex) { return memcpy(xmalloc(sizeof ex), &ex, sizeof ex); } static struct blockstmt parseblock0(struct parser *P); static struct stmt parseblock(struct parser *P); static void parseexpandmacro(struct parser *P, const struct macro *macro); static const struct type * mkarraytype(const struct type *child, size_t n) { return interntype((struct type) { TYarr, n * child->size, 1, 0, .child = child, .length = n }); } static struct expr pexprimary(struct parser *P) { struct expr ex = {0}; struct tok tok; if (lexmatch(P, &tok, TKintlit) || lexmatch(P, &tok, TKchrlit)) { ex.t = Eintlit; ex.span = tok.span; ex.i = tok.ilit.i; ex.ty = tok.ilit.ty ? tok.ilit.ty : ilittype(P, ex.i); } else if (lexmatch(P, &tok, TKflolit)) { ex.t = Eflolit; ex.span = tok.span; ex.f = tok.flit.f; ex.ty = tok.flit.ty ? tok.flit.ty : ty_f64; } else if (lexmatch(P, &tok, TKstrlit)) { ex.t = Estrlit; ex.span = tok.span; ex.strlit.d = tok.str; ex.strlit.n = tok.strlen; while (lexmatch(P, &tok, TKstrlit)) { ex.strlit.d = xrealloc((char *)ex.strlit.d, ex.strlit.n + tok.strlen + 1); memcpy((char *)ex.strlit.d + ex.strlit.n, tok.str, tok.strlen + 1); free((char *)tok.str); ex.strlit.n += tok.strlen; } ex.ty = mkarraytype(constify(ty_u8), ex.strlit.n + 1); } else if (lexmatch(P, &tok, TKboolit)) { ex.t = Eboolit; ex.span = tok.span; ex.i = tok.boolit; ex.ty = ty_bool; } else if (lexmatch(P, &tok, TKnullit)) { ex.t = Enullit; ex.span = tok.span; ex.i = tok.boolit; ex.ty = interntype((struct type) { TYptr, g_targ.ptrsize, .child = ty_void }); } else if (lexmatch(P, &tok, TKident)) { const struct decl *decl; decl = finddecl(P, tok.str); if (!decl) fatal(P, tok.span, "%s is not defined", tok2str(tok)); if (decl->t == Dtype) { assert(0); } else if (decl->t == Dmacro) { parseexpandmacro(P, &decl->macro); return parseexpr(P); } else { ex.t = Ename; ex.span = tok.span; ex.ref = decl; if (decl->t == Dlet) { if (decl->var.fnid != P->curfn->id) fatal(P, tok.span, "cannot access local variable `%s' belonging to outer function", tok.str); ex.ty = decl->var.ty; } else if (decl->t == Dstatic) { ex.ty = decl->var.ty; } else if (decl->t == Dfn) { ex.ty = decl->fn.selfty; } else assert(0); } } else if (lexmatch(P, &tok, '(')) { if (lexpeek(P).t == TKkw_do) { struct blockstmt block; lex(P); block = parseblock0(P); ex.t = Eblock; ex.block = block; ex.ty = ty_void; if (ex.block.stmts.n > 0) { struct stmt *last = &block.stmts.d[block.stmts.n - 1]; if (last->t == Sexpr) ex.ty = last->expr.ty; } } else { ex = parseexpr(P); } lexexpect(P, ')'); } else { fatal(P, tok.span, "expected expression (near %s)", tok2str(tok)); } return ex; } static struct expr pexpostfix(struct parser *P) { struct expr ex = pexprimary(P); struct tok tok; for (;;) if (lexmatch(P, &tok, '(')) { vec_t(struct expr) args = {0}; int i = 0, n = ex.ty->fn.params.n; if (ex.ty->t != TYfn) fatal(P, ex.span, "callee is not a function"); while (!lexmatch(P, NULL, ')')) { struct expr arg = parseexpr(P); if (i == n && ! ex.ty->fn.variadic) fatal(P, arg.span, "too many args for call"); if (i < n && !typeof2(arg.ty, ex.ty->fn.params.d[i++])) fatal(P, arg.span, "call argument #%d type mismatch", i); vec_push(&args, arg); if (!lexmatch(P, NULL, ',')) { lexexpect(P, ')'); break; } } ex.call.callee = exprdup(ex); ex.t = Ecall; ex.span = tok.span; ex.ty = ex.call.callee->ty->fn.retty; vec_slice_cpy(&ex.call.args, &args); } else if (lexmatch(P, &tok, '[')) { struct expr lhs = ex, rhs = parseexpr(P); lexexpect(P, ']'); if (lhs.ty->t != TYarr && lhs.ty->t != TYptr) fatal(P, lhs.span, "indexee is not array or pointer type"); if (rhs.ty->t != TYint) fatal(P, lhs.span, "index expression type is not integral"); ex.t = Eindex; ex.span = tok.span; ex.ty = lhs.ty->child; ex.index.lhs = exprdup(lhs); ex.index.rhs = exprdup(rhs); } else if (lexmatch(P, &tok, '++') || lexmatch(P, &tok, '--')) { if (!isnumtype(ex.ty)) fatal(P, ex.span, "invalid operand to unary operator %s: not numeric", tok2str(tok)); if (!islvalue(&ex)) fatal(P, ex.span, "left operand to postfix %s operator is not lvalue", tok2str(tok)); if (ex.ty->konst) fatal(P, ex.span, "left operand to postfix %s operator is const", tok2str(tok)); ex = (struct expr) { Epostfix, tok.span, ex.ty, .unop = { tok.t, exprdup(ex) } }; } else { break; } return ex; } static struct expr pexprefix(struct parser *P) { struct tok tok; if (lexmatch(P, &tok, '-') || lexmatch(P, &tok, '~')) { struct expr ex = pexprefix(P); const struct type *ty = numpromote(ex.ty); if (!ty) fatal(P, ex.span, "invalid operand to unary operator %s: not numeric", tok2str(tok)); if (ty->t != TYint && tok.t == '~') fatal(P, ex.span, "invalid operand to unary operator %s: not integral", tok2str(tok)); return (struct expr) { Eprefix, tok.span, ty, .unop = { tok.t, exprdup(ex) } }; } else if (lexmatch(P, &tok, '++') || lexmatch(P, &tok, '--')) { struct expr ex = pexprefix(P); if (!isnumtype(ex.ty)) fatal(P, ex.span, "invalid operand to unary operator %s: not numeric", tok2str(tok)); if (!islvalue(&ex)) fatal(P, ex.span, "left operand to prefix %s operator is not lvalue", tok2str(tok)); if (ex.ty->konst) fatal(P, ex.span, "left operand to prefix %s operator is const", tok2str(tok)); return (struct expr) { Eprefix, tok.span, ex.ty, .unop = { tok.t, exprdup(ex) } }; } else if (lexmatch(P, &tok, TKkw_not)) { struct expr ex = pexprefix(P); if (ex.ty->t != TYbool) fatal(P, ex.span, "invalid operand to unary operator %s: not boolean", tok2str(tok)); return (struct expr) { Eprefix, tok.span, ty_bool, .unop = { 'not', exprdup(ex) } }; } else if (lexmatch(P, &tok, '*')) { struct expr ex = pexprefix(P); if (ex.ty->t != TYptr) fatal(P, ex.span, "invalid operand type to dereference, not pointer"); if (!completetype(ex.ty->child) && ex.ty->child->t != TYfn) fatal(P, ex.span, "invalid operand type to dereference, incomplete"); return (struct expr) { Eprefix, tok.span, ex.ty->child, .unop = { '*', exprdup(ex) } }; } else if (lexmatch(P, &tok, '&')) { struct expr ex = pexprefix(P); struct type ty2 = { TYptr, g_targ.ptrsize, .child = ex.ty}; if (!islvalue(&ex) && !(ex.t == Ename && ex.ref->t == Dfn)) fatal(P, ex.span, "invalid operand type to address-of, not an lvalue"); return (struct expr) { Eprefix, tok.span, interntype(ty2), .unop = { '&', exprdup(ex) } }; } return pexpostfix(P); } static bool peeksbitarithop(struct parser *P, struct tok* tokp) { struct tok tok = lexpeek(P); if (tokp) *tokp = tok; switch (tok.t) { case '+': case '-': case '*': case '/': case '%': return 1; case '&': case '|': case '^': case '<<': case '>>': return 2; case '##': return 3; } return 0; } static struct expr pexbitarith(struct parser *P) { struct expr ex; struct tok tok; int tokt; int oret; ex = pexprefix(P); if (!(oret = peeksbitarithop(P, &tok))) return ex; tokt = tok.t; while (lexmatch(P, &tok, tokt)) { struct expr rhs = pexprefix(P); const struct type *ty = typeof2(ex.ty, rhs.ty); if (!ty) { if ((tokt == '+' || tokt == '-') && ((ex.ty->t == TYptr && rhs.ty->t == TYint) || (ex.ty->t == TYint && rhs.ty->t == TYptr))) { ty = ex.ty->t == TYptr ? ex.ty : rhs.ty; } else { goto err; } } else if (tokt == '-' && ex.ty->t == TYptr && rhs.ty->t == TYptr) { ty = ty_isize; } else if (tokt != '##' && !isnumtype(ty)) { err: fatal(P, tok.span, "invalid operands to binary operator %s", tokt2str(tokt)); } if (oret == 2 && ty->t == TYfloat) fatal(P, tok.span, "invalid operands to bitwise operator: not integral", tokt2str(tokt)); if (tokt != '##') { ex = (struct expr) { Ebinop, tok.span, ty, .binop = { tokt, exprdup(ex), exprdup(rhs) } }; } else { char *s; size_t n = ex.strlit.n + rhs.strlit.n; /// TODO fold if (ex.t != Estrlit || rhs.t != Estrlit) fatal(P, tok.span, "operands to `##' operator are not string literals"); s = xmalloc(n + 1); memcpy(s, ex.strlit.d, ex.strlit.n); memcpy(s + ex.strlit.n, rhs.strlit.d, rhs.strlit.n + 1); ex = (struct expr) { Estrlit, tok.span, mkarraytype(constify(ty_u8), n + 1), .strlit.d = s, .strlit.n = n, }; } } return ex; } static bool matchcmpop(struct parser *P, struct tok *tokp) { struct tok tok = lexpeek(P); if (tokp) *tokp = tok; switch (tok.t) { case '==': case '<=': case '>=': case '<': case '>': lex(P); return 1; } return 0; } static struct expr pexcmp(struct parser *P) { struct expr ex; struct tok tok; ex = pexbitarith(P); if (matchcmpop(P, &tok)) { struct expr rhs = pexbitarith(P); if (!typeof2(ex.ty, rhs.ty)) fatal(P, tok.span, "incompatible operands to binary operator %s", tok2str(tok)); if (tok.t != '==' && !isnumtype(typeof2(ex.ty, rhs.ty))) fatal(P, tok.span, "invalid operands to relational operator %s: not numeric", tok2str(tok)); ex = (struct expr) { Ebinop, tok.span, ty_bool, .binop = { tok.t, exprdup(ex), exprdup(rhs) } }; } return ex; } static struct expr pexlog(struct parser *P) { struct expr ex; struct tok tok; int tokt; ex = pexcmp(P); tok = lexpeek(P); tokt = tok.t; if (tokt != TKkw_and && tokt != TKkw_or) return ex; while (lexmatch(P, &tok, tokt)) { struct expr rhs = pexcmp(P); if (ex.ty->t != TYbool || rhs.ty->t != TYbool) fatal(P, tok.span, "invalid operands to binary operator %s: not boolean", tokt2str(tok.t)); ex = (struct expr) { Ebinop, tok.span, ty_bool, .binop = { tokt == TKkw_and ? 'and' : 'or', exprdup(ex), exprdup(rhs) } }; } return ex; } static struct expr pexcond(struct parser *P) { struct expr ex; struct tok tok; ex = pexlog(P); if (lexmatch(P, &tok, '?')) { struct expr ex2 = parseexpr(P); struct expr ex3; const struct type *ty; lexexpect(P, ':'); ex3 = pexcond(P); if (!(ty = typeof2(ex2.ty, ex3.ty))) fatal(P, tok.span, "conditional operator branches have incompatible types"); ex = (struct expr) { Econd, tok.span, ty, .cond = { exprdup(ex), exprdup(ex2), exprdup(ex3) } }; } return ex; } static bool matchassignop(struct parser *P, struct tok *tokp) { struct tok tok = lexpeek(P); if (tokp) *tokp = tok; switch (tok.t) { case '=': lex(P); return 1; case '+=': case '-=': case '*=': case '/=': case '%=': lex(P); return 2; case '&=': case '|=': case '^=': case '<<=': case '>>=': lex(P); return 3; } return 0; } static struct expr pexassign(struct parser *P) { struct expr ex; struct tok tok; int oret; ex = pexcond(P); if ((oret = matchassignop(P, &tok))) { struct expr rhs = pexcond(P); if (!islvalue(&ex)) fatal(P, ex.span, "left operand to assignment operator %s is not an lvalue", tok2str(tok)); if (!typeof2(ex.ty, rhs.ty)) fatal(P, ex.span, "operands to assignment operator %s have incompatible types", tok2str(tok)); if (ex.ty->konst) fatal(P, ex.span, "left operand to assignment operator %s is const", tok2str(tok)); ex = (struct expr) { Ebinop, tok.span, ex.ty, .binop = { tok.t, exprdup(ex), exprdup(rhs) } }; } return ex; } static struct expr parseexpr(struct parser *P) { return pexassign(P); } /** Statements and declarations parsing **/ static struct stmt * stmtdup(struct stmt st) { return memcpy(xmalloc(sizeof st), &st, sizeof st); } static void parsevardecl(struct parser *P, struct decl *decl) { struct tok tok; const char *name; const struct type *ty = NULL; struct expr *ini = NULL; bool konst = 0; if (lexmatch(P, NULL, TKkw_const)) konst = 1; tok = lexexpect(P, TKident); name = tok.str; if (lexmatch(P, NULL, '=')) { ini = exprdup(parseexpr(P)); ty = ini->ty; } else { ty = parsetype(P); if (lexmatch(P, NULL, '=')) { ini = exprdup(parseexpr(P)); } else if (decl->t == Dlet) { fatal(P, tok.span, "variable must be initialized"); } } if (ini && !typeof2(ty, ini->ty)) fatal(P, tok.span, "incompatible initializer type"); if (!completetype(ty)) fatal(P, tok.span, "let `%s': variable type is incomplete", name); if (konst) ty = constify(ty); assert(ty); decl->name = name; decl->var.ty = ty; decl->var.ini = ini; decl->var.fnid = P->curfn ? P->curfn->id : -1; } static struct stmt pstlet(struct parser *P) { struct stmt st = {Sdecl}; st.decl.t = Dlet; parsevardecl(P, &st.decl); putdecl(P, st.decl.span, &st.decl); return st; } static struct stmt pstifelse(struct parser *P) { struct stmt st = {Sifelse}; st.ifelse.test = parseexpr(P); if (st.ifelse.test.ty->t != TYbool && st.ifelse.test.ty->t != TYptr) fatal(P, st.ifelse.test.span, "if statement condition must be bool or pointer"); lexexpect(P, '{'); st.ifelse.t = parseblock(P).block; if (lexmatch(P, NULL, TKkw_else)) { if (lexmatch(P, NULL, TKkw_if)) st.ifelse.f = stmtdup(pstifelse(P)); else { lexexpect(P, '{'); st.ifelse.f = stmtdup(parseblock(P)); } } return st; } static struct stmt pstfor(struct parser *P) { struct stmt st = {Sfor}; struct tok tok; if (lexmatch(P, NULL, TKkw_let)) { st.loop.ini = stmtdup(pstlet(P)); lexexpect(P, ';'); } else if (lexmatch(P, NULL, ';')) { // pass } else { st.loop.ini = stmtdup((struct stmt) { Sexpr, .expr = parseexpr(P) }); lexexpect(P, ';'); } if (lexmatch(P, &tok, ';')) { // for (...; ; ...;) -> for (...; #t; ...;) st.loop.test = (struct expr) { Eboolit, tok.span, ty_bool, .i = 1 }; } else { st.loop.test = parseexpr(P); lexexpect(P, ';'); } if (st.loop.test.ty->t != TYbool && st.loop.test.ty->t != TYptr) fatal(P, st.loop.test.span, "for statement condition must be bool or pointer"); if (!lexmatch(P, &tok, '{')) { st.loop.next = exprdup(parseexpr(P)); lexexpect(P, '{'); } st.loop.body = parseblock(P).block; return st; } static struct stmt parsestmt(struct parser *P); static struct stmt pstiswitch(struct parser *P, const struct expr *test) { struct stmt st = {Siswitch}; struct tok tok; vec_t(struct iswitchcase) cs = {0}; struct blockstmt *f = NULL; lexexpect(P, '{'); while (!lexmatch(P, &tok, '}')) { struct iswitchcase c; vec_t(struct expr) es = {0}; lexexpect(P, TKkw_case); if (lexmatch(P, &tok, TKkw_else)) { if (f) fatal(P, tok.span, "duplicate 'case else' block"); f = malloc(sizeof *f); *f = parseblock0(P); } else { do { struct expr ex = parseexpr(P); assert(ex.ty->t == TYint); vec_push(&es, ex); if (!lexmatch(P, &tok, ',')) { lexexpect(P, TKkw_do); break; } } while (!lexmatch(P, &tok, TKkw_do)) ; vec_slice_cpy(&c.es, &es); c.t = parseblock0(P); vec_push(&cs, c); } } st.iswitch.test = *test; vec_slice_cpy(&st.iswitch.cs, &cs); st.iswitch.f = f; return st; } static struct stmt pstswitch(struct parser *P) { struct tok tok; struct expr test; if (!lexmatch(P, &tok, '{')) { test = parseexpr(P); if (test.ty->t == TYint) return pstiswitch(P, &test); else assert(0 && "NYI"); } else { assert(0 && "NYI"); } } static void parsedecl(struct decl *, struct parser *, bool toplevel); static bool isdecltokt(int tokt) { switch (tokt) case TKkw_extern: case TKkw_fn: case TKkw_typedef: case TKkw_defmacro: case TKkw_static: return 1; return 0; } static void mergetoktrees(struct toktree *dst, int n) { size_t total = 0; for (int i = 0; i < n; ++i) { total += dst[i].n; if (i > 0) ++total; } dst->d = xrealloc(dst->d, total * sizeof (struct tok)); for (int i = 1; i < n; ++i) { dst->d[dst->n++] = (struct tok) { ',' }; memcpy(dst->d + dst->n, dst[i].d, dst[i].n * sizeof (struct tok)); dst->n += dst[i].n; } assert(dst->n == total); } static void parseexpandmacro(struct parser *P, const struct macro *macro) { vec_t(struct toktree) args = {0}; struct tok tok; struct expan expan = {0}; struct expanarg *expanargs; struct macrocase c; expan.span = lexpeek(P).span; lexexpect(P, '('); while (!lexmatch(P, &tok, ')')) { int pabal = 0, // ( ) parens balance bkbal = 0, // [ ] brackets .. bcbal = 0; // { } braces .. struct span espan = tok.span; vec_t(struct tok) toks = {0}; struct toktree ttoks; for (;;) { tok = lex(P); switch (tok.t) { case '[': ++bkbal; break; case ']': --bkbal; break; case '{': ++bcbal; break; case '}': --bcbal; break; case '(': ++pabal; break; case ')': if (--pabal >= 0) break; goto arg_done; case ',': if (!pabal && !bkbal && !bcbal) goto arg_done; break; case TKeof: fatal(P, espan, "unterminated macro invokation"); } vec_push(&toks, tok); } arg_done: vec_slice_cpy(&ttoks, &toks); vec_push(&args, ttoks); if (tok.t != ',') { assert(tok.t == ')'); break; } } for (int i = 0; i < macro->cs.n; ++i) { c = macro->cs.d[i]; if (!c.variadic && args.length == c.params.n) { goto ok; } else if (c.variadic && args.length >= c.params.n - 1) { int n = args.length - (c.params.n - 1); if (n == 0) { vec_push(&args, (struct toktree){0}); } else { mergetoktrees(args.data + c.params.n - 1, n); } goto ok; } } fatal(P, tok.span, "no match for invoking macro `%s' with %d arguments", macro->name, args.length); ok: expanargs = xcalloc(c.params.n, sizeof *expanargs); for (int i = 0; i < c.params.n; ++i) { expanargs[i].name = c.params.d[i]; expanargs[i].toks = args.data[i]; } expan.prev = P->curexpan; expan.name = macro->name; expan.args.d = expanargs; expan.args.n = c.params.n; expan.toks = c.body; ++P->expanno; P->curexpan = memcpy(xmalloc(sizeof expan), &expan, sizeof expan); } static struct stmt parsestmt(struct parser *P) { struct stmt st = {0}; struct tok tok; if (lexmatch(P, &tok, '{')) { st.span = tok.span; st = parseblock(P); } else if (lexmatch(P, &tok, TKkw_let)) { st = pstlet(P); st.span = tok.span; lexexpect(P, ';'); } else if (lexmatch(P, &tok, TKkw_if)) { st.span = tok.span; st = pstifelse(P); } else if (lexmatch(P, &tok, TKkw_while)) { st.t = Swhile; st.span = tok.span; st.loop.test = parseexpr(P); if (st.loop.test.ty->t != TYbool && st.loop.test.ty->t != TYptr) fatal(P, st.loop.test.span, "while statement condition must be bool or pointer"); lexexpect(P, '{'); st.loop.body = parseblock(P).block; } else if (lexmatch(P, &tok, TKkw_for)) { st.span = tok.span; st = pstfor(P); } else if (lexmatch(P, &tok, TKkw_switch)) { st = pstswitch(P); st.span = tok.span; } else if (lexmatch(P, &tok, TKkw_return)) { if (!P->curfn) fatal(P, tok.span, "return disallowed here"); st.t = Sreturn; st.span = tok.span; if (!lexmatch(P, &tok, ';')) { st.retex = exprdup(parseexpr(P)); lexexpect(P, ';'); if (!typeof2(st.retex->ty, P->curfn->retty)) fatal(P, st.retex->span, "incompatible type in return statement"); } else if (P->curfn->retty != ty_void) { fatal(P, tok.span, "return statement in non-void function must return a value"); } } else if (isdecltokt((tok = lexpeek(P)).t)) { st.t = Sdecl; st.span = tok.span; parsedecl(&st.decl, P, 0); if (st.decl.t == Dfn && !st.decl.externp && !st.decl.fn.body) fatal(P, tok.span, "cannot forward-declare non-external function inside another function"); if (st.decl.t == Dfn && st.decl.externp && st.decl.fn.body) fatal(P, tok.span, "cannot define external function inside another function"); } else { struct expr ex; st.span = tok.span; if ((tok = lexpeek(P)).t == TKident) { const struct decl *decl; if ((decl = finddecl(P, tok.str)) && decl->t == Dmacro) { lex(P); parseexpandmacro(P, &decl->macro); return parsestmt(P); } } ex = parseexpr(P); lexexpect(P, ';'); st.t = Sexpr; st.expr = ex; } return st; } static struct blockstmt parseblock0(struct parser *P) { vec_t(struct stmt) stmts = {0}; struct blockstmt block = {0}; int tokt; block.env.parent = P->curenv; pushenv(P, &block.env); while ((tokt = lexpeek(P).t) != '}' && tokt != TKkw_case && tokt != ')') { if (lexmatch(P, NULL, ';')) continue; vec_push(&stmts, parsestmt(P)); } vec_slice_cpy(&block.stmts, &stmts); popenv(P); return block; } static struct stmt parseblock(struct parser *P) { struct stmt st = { Sblock }; st.block = parseblock0(P); lexexpect(P, '}'); return st; } static const struct type * fntype(const struct fn *fn) { struct type ty = { TYfn, 0, g_targ.sizesize }; vec_t(const struct type *) params = {0}; ty.fn.retty = fn->retty; for (int i = 0; i < fn->params.n; ++i) vec_push(¶ms, fn->params.d[i].ty); vec_slice_cpy(&ty.fn.params, ¶ms); ty.fn.variadic = fn->variadic; return interntype(ty); } static void parsefn(struct decl *decl, struct parser *P) { vec_t(struct fnparam) params = {0}; struct tok tok; struct fn *fn = &decl->fn; const char *name = fn->name; memset(fn, 0, sizeof *fn); fn->name = name; lexexpect(P, '('); while (!lexmatch(P, NULL, ')')) { struct fnparam param; if (lexmatch(P, NULL, '...')) { fn->variadic = 1; } else { param.name = lexexpects(P, TKident, "parameter name").str; param.ty = parsetype(P); vec_push(¶ms, param); } if (!lexmatch(P, NULL, ',')) { lexexpect(P, ')'); break; } else if (fn->variadic) { lexexpect(P, ')'); break; } } vec_slice_cpy(&fn->params, ¶ms); fn->retty = unconstify(parsetype(P)); fn->selfty = fntype(fn); if (!lexmatch(P, &tok, ';')) { struct env *env = xcalloc(1, sizeof *env); static int id; fn->id = id++; lexexpect(P, '{'); env->parent = P->curenv; WITH_TMPCHANGE(struct fn *, P->curfn, fn) { pushenv(P, env); putdecl(P, tok.span, &(struct decl) { Dfn, fn->name, .fn = *fn, ._cname = decl->_cname }); for (int i = 0; i < params.length; ++i) { struct decl decl = { Dlet, params.data[i].name, .var = { .ty = params.data[i].ty, .fnid = fn->id, } }; putdecl(P, tok.span, &decl); } fn->body = xcalloc(1, sizeof *fn->body); *fn->body = parseblock(P); popenv(P); } } } static struct macrocase parsemacrocase(struct parser *P) { struct macrocase c = {0}; vec_t(const char *) params = {0}; vec_t(struct tok) body = {0}; struct tok tok; int pabal = 0, // ( ) parens balance bkbal = 1, // [ ] brackets .. bcbal = 0; // { } braces .. struct span espan; static int gensymid; struct gensyms { struct gensyms *next; const char *name; struct tok tok; } *gensyms = NULL; lexexpect(P, '('); while (!lexmatch(P, &tok, ')')) { if (lexmatch(P, &tok, '...')) { vec_push(¶ms, lexexpects(P, TKident, "parameter name").str); c.variadic = 1; lexmatch(P, &tok, ','); lexexpect(P, ')'); break; } else { vec_push(¶ms, lexexpects(P, TKident, "parameter name").str); if (!lexmatch(P, &tok, ',')) { lexexpect(P, ')'); break; } } } espan = tok.span; lexexpect(P, '['); while (bkbal || pabal || bcbal) { tok = lex(P); switch (tok.t) { case '(': ++pabal; break; case ')': --pabal; break; case '[': ++bkbal; break; case ']': --bkbal; break; case '{': ++bcbal; break; case '}': --bcbal; break; case TKeof: fatal(P, espan, "unterminated macro definition"); } if (tok.t == TKident) { for (int i = 0; i < params.length; ++i) if (!strcmp(params.data[i], tok.str)) tok.t = TKmacident; } else if (tok.t == TKgensym) { struct gensyms *gs; for (gs = gensyms; gs; gs = gs->next) { if (!strcmp(gs->name, tok.str)) { tok = gs->tok; goto found; } } gs = xcalloc(1, sizeof *gs); gs->name = tok.str; gs->tok.t = TKident; gs->tok.span = tok.span; gs->tok.str = xasprintf("__fG%s_%d", tok.str, gensymid++); tok = gs->tok; gs->next = gensyms; gensyms = gs; } found: vec_push(&body, tok); } (void)vec_pop(&body); for (struct gensyms *gs = gensyms, *next; gs; gs = next) { next = gs->next; free((char *)gs->name); free(gs); } vec_slice_cpy(&c.params, ¶ms); vec_slice_cpy(&c.body, &body); return c; } static struct macro parsemacro(struct parser *P) { struct tok tok; struct macro macro; vec_t(struct macrocase) cs = {0}; if (lexpeek(P).t == '(') { vec_push(&cs, parsemacrocase(P)); } else { lexexpect(P, '{'); while (!lexmatch(P, &tok, '}')) { vec_push(&cs, parsemacrocase(P)); if (!lexmatch(P, &tok, ',')) { lexexpect(P, '}'); break; } } } if (cs.length == 0) fatal(P, tok.span, "macro must have at least one case"); vec_slice_cpy(¯o.cs, &cs); return macro; } static void parsedecl(struct decl *decl, struct parser *P, bool toplevel) { struct tok tok = { .span = P->tokspan }; bool externp = 0; memset(decl, 0, sizeof *decl); if (lexmatch(P, &tok, TKkw_extern)) externp = 1; if (lexmatch(P, &tok, TKkw_fn)) { const char *name = lexexpects(P, TKident, "function name").str; decl->t = Dfn; decl->name = decl->fn.name = name; decl->_cname = xcalloc(1, sizeof (char *)); parsefn(decl, P); decl->externp = externp; } else if (lexmatch(P, &tok, TKkw_static)) { decl->t = Dstatic; decl->externp = externp; decl->_cname = xcalloc(1, sizeof (char *)); parsevardecl(P, decl); if (!toplevel && !externp && !decl->var.ini) fatal(P, decl->span, "static variable inside function cannot be forward-declared"); if (!toplevel && externp && decl->var.ini) fatal(P, decl->span, "extern static variable inside function cannot be initialized"); lexexpect(P, ';'); } else if (lexmatch(P, &tok, TKkw_typedef)) { if (externp) fatal(P, tok.span, "typedef cannot be `extern'"); decl->t = Dtype; decl->name = lexexpects(P, TKident, "typedef name").str; decl->ty = parsetype(P); decl->_cname = xcalloc(1, sizeof(char *)); lexexpect(P, ';'); } else if (lexmatch(P, &tok, TKkw_defmacro)) { if (externp) fatal(P, tok.span, "macro cannot be `extern'"); const char *name = lexexpects(P, TKident, "macro name").str; decl->t = Dmacro; decl->name = name; decl->macro = parsemacro(P); decl->macro.name = name; } else { fatal(P, tok.span, "expected declaration (near %s)", tok2str(tok)); } decl->span = tok.span; putdecl(P, tok.span, decl); } void parse(struct transunit *tu, struct parser *P) { vec_t(struct decl) decls = {0}; int i = 0; while (!P->eof && lexpeek(P).t != TKeof) { vec_push(&decls, (struct decl){0}); parsedecl(&decls.data[i++], P, 1); } vec_slice_cpy(&tu->decls, &decls); } void initparser(struct parser *P, const char *fname) { assert(NUM_LEXTOKENS - 1 < '!'); memset(P, 0, sizeof *P); P->curfile = fname; if (!(P->fp = fopen(fname, "r"))) perror(fname), exit(1); P->tokspan = P->curspan = (struct span) { addfilepath(fname), 0, 1, 1 }; P->peekchr = -1; P->curenv = P->tlenv = xcalloc(1, sizeof *P->tlenv); P->primenv = xcalloc(1, sizeof *P->primenv); putprimtypes(P->primenv); }