import "mem.hff"; import "util.hff"; import "vec.hff"; import "cffc.hff"; /////////// // Lexer // /////////// fn chr(P *Parser) int { let c int #?; switch P.peekchr { case Some pc; c = pc; P.peekchr = :None; case None; c = fgetc(P.fp); } ++P.curloc.idx; ++P.curloc.col; if c == '\n' { P.curloc.col = 1; ++P.curloc.line; } if c == EOF { P.eof = #t; } return c; } fn chrpeek(P *Parser) int { switch P.peekchr { case Some c; return c; } let c = fgetc(P.fp); if c == EOF { P.eof = #t; } P.peekchr = :Some c; return c; } fn chrmatch(P *Parser, c int) bool { if chrpeek(P) == c { chr(P); return #t; } return #f; } fn isspace(c u8) bool { switch c { case ' ', '\t', '\n', '\r', '\v', '\f'; return #t; } return #f; } fn isdigit(c u8) bool { return c >= '0' and c <= '9'; } fn isxdigit(c u8) bool { return isdigit(c) or (c >= 'A' and c <= 'F') or (c >= 'a' and c <= 'f'); } fn isalpha(c u8) bool { return (c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z'); } fn issep(c u8) bool { if isspace(c) { return #t; } switch (c) { case '(', ')', '[', ']', '{', '}', '.', ',', ';', '?', '+', '-', '*', '/', '&', '|', '^', '~', '=', '\'', '"', '<', '>', ':', '@', '#', '\\', '`'; return #t; } return #f; } fn ishsep(c u8) bool { if isspace(c) { return #t; } switch (c) { case '(', ')', '[', ']', '{', '}', '.', ',', ';', '"'; return #t; } return #f; } fn readtilsep(P *Parser, buf [#]u8, dot bool) int { let i = 0, c u8 #?; while (!issep(c = chrpeek(P))) or (dot and c == '.') { chr(P); if i >= buf.#len - 1 { return -1; } buf[i++] = c; } buf[i++] = 0; return i; } fn readtilhsep(P *Parser, buf [#]u8, dot bool) int { let i = 0, c u8 #?, pred = &ishsep; while (!pred(c = chrpeek(P))) or (dot and c == '.') { chr(P); if !issep(c) { pred = &issep; } if i >= buf.#len - 1 { return -1; } buf[i++] = c; } buf[i++] = 0; return i; } fn eatspaces(P *Parser) void { for ;;chr(P) { if !isspace(chrpeek(P)) { break; } } } // !sorted extern static keyword2str [NUM_KEYWORDS]*const u8 = { "and", "as", "break", "case", "const", "continue", "def", "defmacro", "do", "else", "enum", "extern", "fn", "for", "if", "import", "let", "or", "return", "sizeof", "static", "struct", "switch", "typedef", "typeof", "union", "while", }; fn str2keyword(s *const u8) int { let i = 0zs, j = keyword2str.#len - 1; while i <= j { let k = (j + i) / 2; let cmp = strcmp(keyword2str[k], s); if cmp == 0 { return k; } else if cmp < 0 { i = k + 1; } else { j = k - 1; } } return -1; } fn xdigit2num(c u8) int { switch { case isdigit(c); return c - '0'; case c >= 'a' and c <= 'f'; return (c - 'a') + 10; case c >= 'A' and c <= 'F'; return (c - 'A') + 10; } return -1; } fn readnumber(s *const u8) Option { let c u8 #?, acc = 0u64, accf = 0.0f64, fmul = 0.1, base = 10, flt = #f, nused = 0, suffix *const u8 = #null; for let i = 0; (c = s[i]) != 0; ++i { if i == 0 and c == '0' { --nused; } if i == 1 and tolower(c) == 'x' { base = 16; continue; } if !flt and c == '.' and base == 10 { flt = #t; accf = acc; continue; } if nused > 0 and c == '_' { continue; } if (base == 16 and !isxdigit(c)) or (base != 16 and (c < '0' or c > ('0' + base) - 1)) { suffix = s + i; } ++nused; if flt { accf = accf + ((c - '0') * fmul); fmul *= 0.1; } else { acc = (acc * base) + xdigit2num(c); } } let tok = Tok {}; if flt { tok.t = :flo; tok.u.flo = accf; switch { case suffix == #null; tok.ty = ty_f64; case strcieq(suffix, "f"); tok.ty = ty_f32; case strcieq(suffix, "f32"); tok.ty = ty_f32; case strcieq(suffix, "f64"); tok.ty = ty_f64; case else; return :None; } return :Some tok; } else { tok.t = :int; tok.u.uint = acc; fn ty4intX(i u64) *const Type { if i < 1u64 << ((g_targ.intsize * 8) - 1) { return ty_int; } if i < 1u64 << 31 { return ty_i32; } if i < 1u64 << 63 { return ty_i64; } return ty_u64; } fn ty4uintX(i u64) *const Type { if i < 1u64 << (g_targ.intsize * 8) { return ty_uint; } if i < 1u64 << 32 { return ty_u32; } return ty_u64; } switch { case suffix == #null; tok.ty = ty4intX(tok.u.uint); case strcieq(suffix, "u"); tok.ty = ty4uintX(tok.u.uint); case strcieq(suffix, "u8"); tok.ty = ty_u8; case strcieq(suffix, "i8"); tok.ty = ty_i8; case strcieq(suffix, "u16"); tok.ty = ty_u16; case strcieq(suffix, "i16"); tok.ty = ty_i16; case strcieq(suffix, "u32"); tok.ty = ty_u32; case strcieq(suffix, "i32"); tok.ty = ty_i32; case strcieq(suffix, "u64"); tok.ty = ty_u64; case strcieq(suffix, "i64"); tok.ty = ty_i64; case strcieq(suffix, "z"); tok.ty = ty_usize; case strcieq(suffix, "zs"); tok.ty = ty_isize; case strcieq(suffix, "p"); tok.ty = ty_uptrint; case strcieq(suffix, "ps"); tok.ty = ty_iptrint; case else; return :None; } return :Some tok; } } fn lex(P *Parser) Tok { let c int #?; let tok Tok = {}; switch P.peektok { case Some pt; P.peektok = :None; return pt; } eatspaces(P); tok.loc = (P.tokloc = P.curloc); if isdigit(c = chrpeek(P)) { let s [80]u8 = {}; if readtilsep(P, s[0::], #t) < 0 { fatal(P, tok.loc, "bad number literal"); } switch readnumber(s) { case None; fatal(P, tok.loc, "bad number literal %qs", s); case Some tok; tok.loc = P.tokloc; return tok; } } if isalpha(c) or c == '_' or c == '$' { let s [120]u8; if readtilsep(P, s[0::], #f) < 0 { fatal(P, tok.loc, "identifier too long"); } let kw = str2keyword(s); if kw >= 0 { tok.t = kw; tok.u.ident = keyword2str[kw]; } else if (c == '$') { tok.t = :gensym; tok.u.ident = internstr(&s[1]); } else { tok.t = :ident; tok.u.ident = internstr(s); } return tok; } if c == '#' { let s [100]u8 = {}; if readtilhsep(P, s[0::], #f) < 0 { fatal(P, P.tokloc, "invalid #keyword"); } switch { case streq(s, "#") and chrpeek(P) == '{'; chr(P); let bal = 1; while bal != 0 { switch lex(P).t { case '{'; ++bal; case '}'; --bal; case :eof; fatal(P, tok.loc, "unterminated comment"); } } return lex(P); case streq(s, "#"); tok.t = '#'; case streq(s, "#t") or streq(s, "#f"); tok.t = :bool; tok.u.bool = s[1] == 't'; case streq(s, "#null"); tok.t = :null; case streq(s, "##"); tok.t = '##'; case streq(s, "#len"); tok.t = '#len'; case streq(s, "#tag"); tok.t = '#tag'; case streq(s, "#?"); tok.t = '#?'; case s[0] == '#' and s[1] == '\''; tok.t = :label; tok.u.str = spanz(internstr(s)); case else fatal(P, P.tokloc, "invalid #keyword `%s'", s); } return tok; } if c == '"' or c == '\'' { chr(P); let delim = c, str Vec = {}, c u8 #?, i = 0z; while (c = chr(P)) != delim { if c == 0 or c == '\n' { fatal(P, P.tokloc, "unterminated %s literal", delim == '"' ? "string" : "character"); } if c != '\\' { str->push(c); continue; } switch ((c = chr(P))) { case 0, '\n'; fatal(P, P.tokloc, "unterminated %s literal", delim == '"' ? "string" : "character"); case '\''; str->push('\''); case '\\'; str->push('\\'); case '"'; str->push('"'); case 'n'; str->push('\n'); case 'r'; str->push('\r'); case 't'; str->push('\t'); case 'v'; str->push('\v'); case 'f'; str->push('\f'); case '0'; str->push('\0'); case 'x'; let x0 = xdigit2num(chr(P)), x1 = xdigit2num(chr(P)); if x0 < 0 or x1 < 0 { fatal(P, P.tokloc, "not a hex byte escape sequence"); } str->push((x0 << 4) | x1); case else fatal(P, P.tokloc, "unknown escape sequence '\\%c'", c); } } if delim == '"' { tok.t = :str; tok.u.str = str->move(P.alloc); } else { tok.t = :chr; if str.len == 0 { fatal(P, P.tokloc, "empty char literal"); } else if str.len > 8 { fatal(P, P.tokloc, "too long multichar literal %qs", str.dat); } tok.u.uint = 0; vec_each(c0, i, str, tok.u.uint = (tok.u.uint << 8) | c0; ) } return tok; } switch c = chr(P) { case '(', ')', '[', ']', '{', '}', ',', ';', '?', '~'; tok.t = c; return tok; case '.'; if chrmatch(P, '.') { if chrmatch(P, '.') { tok.t = '...'; } else { tok.t = '..'; } } else { tok.t = '.'; } return tok; case '*'; if chrmatch(P, '=') { tok.t = '*='; } else { tok.t = '*'; } return tok; case '/'; if chrmatch(P, '=') { tok.t = '/='; } else if chrmatch(P, '/') { while (c = chr(P)) != 0 and c != '\n' { } return lex(P); } else { tok.t = '/'; } return tok; case '%'; if chrmatch(P, '=') { tok.t = '%='; } else { tok.t = '%'; } return tok; case '+'; if chrmatch(P, '=') { tok.t = '+='; } else if chrmatch(P, '+') { tok.t = '++'; } else { tok.t = '+'; } return tok; case '-'; if chrmatch(P, '=') { tok.t = '-='; } else if chrmatch(P, '-') { tok.t = '--'; } else if chrmatch(P, '>') { tok.t = '->'; } else { tok.t = '-'; } return tok; case '&'; if chrmatch(P, '=') { tok.t = '&='; } else { tok.t = '&'; } return tok; case '|'; if chrmatch(P, '=') { tok.t = '|='; } else { tok.t = '|'; } return tok; case '^'; if chrmatch(P, '=') { tok.t = '^='; } else { tok.t = '^'; } return tok; case ':'; if chrmatch(P, ':') { tok.t = '::'; } else { tok.t = ':'; } return tok; case '='; if chrmatch(P, '=') { tok.t = '=='; } else { tok.t = '='; } return tok; case '!'; if chrmatch(P, '=') { tok.t = '!='; } else { tok.t = '!'; } return tok; case '<'; if chrmatch(P, '=') { tok.t = '<='; } else if chrmatch(P, '<') { if chrmatch(P, '=') { tok.t = '<<='; } else { tok.t = '<<'; } } else { tok.t = '<'; } return tok; case '>'; if chrmatch(P, '=') { tok.t = '>='; } else if chrmatch(P, '>') { if chrmatch(P, '=') { tok.t = '>>='; } else { tok.t = '>>'; } } else { tok.t = '>'; } return tok; case EOF, 0; tok.t = :eof; return tok; } fatal(P, tok.loc, "stray %qc in program", c); } fn lexpeek(P *Parser) Tok { switch P.peektok { case Some t; return t; } let tok = lex(P); P.peektok = :Some tok; return tok; } fn lexmatch(P *Parser, tokp *Tok, t TokT) bool { let tok = lexpeek(P); if tokp { *tokp = tok; } if tok.t == t { lex(P); return #t; } return #f; } fn lexexpects(P *Parser, t TokT, what *const u8) Tok { let tok = Tok {}; if !lexmatch(P, &tok, t) { if what { fatal(P, tok.loc, "expected %s (near %qT)", what, tok); } else { fatal(P, tok.loc, "expected %qk (near %qT)", t, tok); } } return tok; } defmacro lexexpect(P, t) [lexexpects(P, t, #null)] //////////// // Parser // //////////// static primenv *Env = {}; fn finddecl(P *Parser, name *const u8) *Decl { if primenv == #null { primenv = mkenv(#null, P.alloc); putprimtypes(primenv); } let p = envfind(primenv, name); return p ? p : envfind(P.curenv, name); } fn putdecl(P *Parser, decl Decl) *Decl { assert(primenv != #null, "primenv"); if envfind(primenv, decl.name) { fatal(P, decl.loc, "cannot shadow primitive `%s'", decl.name); } let d0 *const Decl; let d = envput(P.curenv, decl, &d0); if d == #null { fatal(P, decl.loc, "attempt to redefine `%s'", decl.name); } return d; } /////////////////// // Types Parsing // /////////////////// fn parsetype(P *Parser) *const Type { let tok Tok = {}; switch { case lexmatch(P, &tok, :kw_const); return constify(parsetype(P)); case lexmatch(P, &tok, '*'); return mkptrtype(parsetype(P)); case lexmatch(P, &tok, :ident); let decl = finddecl(P, tok.u.ident); if decl == #null { fatal(P, tok.loc, "%qT is not defined", tok); } if decl.u.#tag != :Ty { fatal(P, tok.loc, "%qT is not a type", tok); } return decl.u.Ty; case else; fatal(P, tok.loc, "expected type (near %qT)", tok); } } ///////////////////////// // Expressions Parsing // ///////////////////////// fn exprdup(alloc *Allocator, ex Expr) *Expr { return memcpy(alloc->alloc(sizeof(ex)), &ex, sizeof(ex)); } fn islvalue(ex Expr) bool { switch ex.u { case Symbol decl; return decl.u.#tag == :Let or decl.u.#tag == :Static; case UnOp u; return u.op == :deref; } return #f; } fn parseexpr(P *Parser) Expr; fn pexprimary(P *Parser) Expr { let tok Tok = lex(P); switch tok.t { case :int, :chr; return { tok.loc, tok.ty, .u: :IntLit { tok.u.int }}; case :flo; return { tok.loc, tok.ty, .u: :FloLit tok.u.flo }; case :bool; return { tok.loc, ty_bool, .u: :BoolLit tok.u.bool }; case :null; return { tok.loc, ty_voidptr, .u: :NullLit }; case :str; let ty = mkarrtype(tok.u.str.#len + 1, #t, ty_u8); return { tok.loc, ty, .u: :StrLit tok.u.str }; case :ident; let ident = tok.u.ident; let decl = finddecl(P, ident); if decl == #null { fatal(P, tok.loc, "%qT is not defined", tok); } switch decl.u { case Fn f; return { tok.loc, f.ty, .u: :Symbol decl }; case Let var; return { tok.loc, var.ty, .u: :Symbol decl }; case Static var; return { tok.loc, var.ty, .u: :Symbol decl }; } case '('; let ex = parseexpr(P); lexexpect(P, ')'); return ex; case else; fatal(P, tok.loc, "expected expression (near %qT)", tok); } } fn pexpostfix(P *Parser) Expr { let tok Tok = {}; let ex = pexprimary(P); for ;; { switch { case lexmatch(P, &tok, '('); let lhs = exprdup(P.alloc, ex), ty = lhs.ty->is(:Ptr) ? lhs.ty.u.Ptr : lhs.ty; if !ty->is(:Fn) { fatal(P, lhs.loc, "not callable (%t)", lhs.ty); } let Fn = &ty.u.Fn; let args Vec = {}; while !lexmatch(P, #null, ')') { args->push(parseexpr(P)); if args.len > Fn.params.#len and !Fn.variadic { fatal(P, args->last().loc, "too many args (%z, expected %z)", args.len, Fn.params.#len); } if !lexmatch(P, #null, ',') { lexexpect(P, ')'); break; } } if args.len < Fn.params.#len { err(P, lhs.loc, "too few args (%z, expected %z)", args.len, Fn.params.#len); } ex = { tok.loc, .ty: Fn.ret, .u: :Call { lhs, args->move(P.alloc) }}; case else; break; } } return ex; } fn pexprefix(P *Parser) Expr { return pexpostfix(P); } fn pexbitarith(P *Parser) Expr { enum Kind { None, IntFlo, Int, StrLit } fn peeksop(P *Parser, tokp *Tok) Kind { let tok = lexpeek(P); if tokp { *tokp = tok; } switch tok.t { case '+', '-', '*', '/'; return :IntFlo; case '%', '&', '|', '^', '<<', '>>'; return :Int; case '##'; return :StrLit; } return :None; } let ex = pexprefix(P); let tok Tok = {}; let okind = peeksop(P, &tok); if okind == :None { return ex; } let tokt = tok.t; while lexmatch(P, &tok, tokt) { let rhs = pexprefix(P); let ty = typeof2(ex.ty, rhs.ty); switch { case ty == #null and (tokt == '+' or tokt == '-') and ((ex.ty->is(:Int) and rhs.ty->is(:Ptr)) or (rhs.ty->is(:Int) and ex.ty->is(:Ptr))); ty = ex.ty->is(:Ptr) ? ex.ty : rhs.ty; case ty == #null; fatal(P, tok.loc, "invalid operands %t and %t to binary operator %k", ex.ty, rhs.ty, tokt); case tokt == '-' and ex.ty->is(:Ptr) and rhs.ty->is(:Ptr); ty = ty_isize; case okind == :Int and ty->is(:Flo); err(P, tok.loc, "invalid operands %t and %t to binary operator %k", ex.ty, rhs.ty, tokt); case okind != :StrLit and !isnumtype(ty); fatal(P, tok.loc, "invalid operands %t and %t to binary operator %k", ex.ty, rhs.ty, tokt); } ex = { tok.loc, ty, .u: :BinOp { tokt, exprdup(P.alloc, ex), exprdup(P.alloc, rhs) }}; } return ex; } fn pexcmp(P *Parser) Expr { let ex = pexbitarith(P); return ex; } fn pexlog(P *Parser) Expr { let ex = pexcmp(P); return ex; } fn pexcond(P *Parser) Expr { let ex = pexlog(P); return ex; } fn pexassign(P *Parser) Expr { enum Kind { None, Set, IntFlo, Int } fn matchop(P *Parser, tokp *Tok) Kind { let tok = lexpeek(P); if tokp { *tokp = tok; } switch tok.t { case '='; lex(P); return :Set; case '+=', '-=', '*=', '/='; lex(P); return :IntFlo; case '%=', '&=', '|=', '^=', '<<=', '>>='; lex(P); return :Int; } return :None; } let tok Tok = {}; let ex = pexcond(P); let okind = matchop(P, &tok); if okind == :None { return ex; } let rhs = pexcond(P); switch { case !islvalue(ex); err(P, ex.loc, "left operand to assignment is not lvalue"); case (typeof2(ex.ty, rhs.ty) == #null) and !((ex.ty->is(:Ptr) and rhs.ty->is(:Int) and (tok.t == '+=' or tok.t == '-='))); err(P, ex.loc, "operands %t and %t to assignment operator %qT have incompatible types", ex.ty, rhs.ty, tok); case ex.ty.konst; err(P, ex.loc, "left operand to assignment is const"); } return { tok.loc, ex.ty, .u: :BinOp { tok.t, exprdup(P.alloc, ex), exprdup(P.alloc, rhs) }}; } fn parseexpr(P *Parser) Expr { return pexassign(P); } //////////////////////// // Statements Parsing // //////////////////////// typedef DeclYielder *fn(*Decl, *void) void; typedef StmtYielder *fn(Stmt, *void) void; fn parsestmts(P *Parser, yield StmtYielder, yarg *void) void { let tok Tok = {}; switch { case lexmatch(P, &tok, :kw_if); case else; let ex = parseexpr(P); lexexpect(P, ';'); return yield({ tok.loc, .u: :Expr ex }, yarg); } } fn parseblock0(P *Parser) [#]Stmt { let sts Vec = {}; let tok Tok = {}; while (tok = lexpeek(P)).t != '}' and tok.t != :kw_case and tok.t != '}' { if lexmatch(P, #null, ';') { continue; } fn pushstmt(st Stmt, arg *void) void { let sts *Vec = arg; sts->push(st); } parsestmts(P, &pushstmt, &sts); } return sts->move(P.alloc); } fn parseblock(P *Parser) [#]Stmt { let b = parseblock0(P); lexexpect(P, '}'); return b; } /////////////////// // Decls Parsing // /////////////////// fn parsefn(P *Parser, decl *Decl, externp bool, name *const u8) void { decl.name = name; decl.u = :Fn {}; let Fn = &decl.u.Fn, tok Tok = {}, paramnames Vec<*const u8> = {}, paramtys Vec<*const Type> = {}; lexexpect(P, '('); while !lexmatch(P, &tok, ')') { if lexmatch(P, #null, '...') { Fn.variadic = #t; lexmatch(P, #null, ','); lexexpect(P, ')'); break; } let name = lexexpect(P, :ident).u.ident; let type = parsetype(P); paramnames->push(name); paramtys->push(type); if !lexmatch(P, &tok, ',') { lexexpect(P, ')'); break; } } Fn.paramnames = paramnames->move(P.alloc); let retty = parsetype(P); Fn.ty = interntype({ .size: 0, .align: 1, .u: :Fn { paramtys->move(P.alloc), Fn.variadic, retty }}); static id int = 0; Fn.id = ++id; if !lexmatch(P, #null, '{') { lexexpects(P, ';', "';' or '{'"); return; } with_tmpchange(P.alloc, &Allocator { &Arena {}, &Arena:allocf }, let env = mkenv(P.curenv, P.alloc); P.curenv = env; foreach(name, i, Fn.paramnames, if name { putdecl(P, { name, tok.loc, .u: :Let { Fn.ty.u.Fn.params[i], :None, Fn.id, }}); } ) Fn.body = :Some parseblock(P); P.curenv = envparent(env); envfree(env); (as(*Arena)P.alloc.a)->destroy(); ); } fn parsedecls(P *Parser, yield DeclYielder, yarg *void) void { let tok = Tok { .loc: P.tokloc }, decl = Decl {}, externp = #f, name *const u8 = #null; if lexmatch(P, &tok, :kw_extern) { externp = #t; } switch { case lexmatch(P, &tok, :kw_fn); let name = lexmatch(P, &tok, :ident) ? tok.u.ident : #null; parsefn(P, &decl, externp, name); decl.loc = tok.loc; decl.externp = externp; case else; fatal(P, tok.loc, "expected declaration (near %qT)", tok); } yield(putdecl(P, decl), yarg); } extern fn parse(P *Parser) [#]Decl { let aralloc = Arena {}; let alloc = Allocator { &aralloc, &Arena:allocf, #null }; let decls Vec<*Decl> = {}; P.alloc = &alloc; P.curenv = mkenv(#null, P.alloc); while !P.eof { fn yield(decl *Decl, arg *void) void { let decls *Vec<*Decl> = arg; decls->push(decl); } parsedecls(P, &yield, &decls); if lexmatch(P, #null, :eof) { break; } } envfree(P.curenv); aralloc->destroy(); decls->clear(); } extern fn parser_init(P *Parser, path *const u8) void { assert(NUM_KEYWORDS - 1 < '!', "2manykw"); *P = {}; P.curfile = path; if (P.fp = fopen(path, "r")) == #null { perror(path); exit(1); } P.tokloc = (P.curloc = { addfilepath(path), 0, 1, 1 }); P.peekchr = :None; }