import "vec.hff"; import "all.hff"; /////////// // Lexer // /////////// fn chr(P *Parser) int { let c int #?; switch P.peekchr { case Some pc; c = pc; P.peekchr = :None; case None; c = fgetc(P.fp); } ++P.curloc.idx; ++P.curloc.col; if c == '\n' { P.curloc.col = 1; ++P.curloc.line; } if c == EOF { P.eof = #t; } return c; } fn chrpeek(P *Parser) int { switch P.peekchr { case Some c; return c; } let c = fgetc(P.fp); if c == EOF { P.eof = #t; } P.peekchr = :Some c; return c; } fn chrmatch(P *Parser, c int) bool { if chrpeek(P) == c { chr(P); return #t; } return #f; } fn isspace(c u8) bool { switch c { case ' ', '\t', '\n', '\r', '\v', '\f'; return #t; } return #f; } fn isdigit(c u8) bool { return c >= '0' and c <= '9'; } fn isxdigit(c u8) bool { return isdigit(c) or (c >= 'A' and c <= 'F') or (c >= 'a' and c <= 'f'); } fn isalpha(c u8) bool { return (c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z'); } fn issep(c u8) bool { if isspace(c) { return #t; } switch (c) { case '(', ')', '[', ']', '{', '}', '.', ',', ';', '?', '+', '-', '*', '/', '&', '|', '^', '~', '=', '\'', '"', '<', '>', ':', '@', '#', '\\', '`'; return #t; } return #f; } fn ishsep(c u8) bool { if isspace(c) { return #t; } switch (c) { case '(', ')', '[', ']', '{', '}', '.', ',', ';', '"'; return #t; } return #f; } fn readtilsep(P *Parser, buf [#]u8, dot bool) int { let i = 0, c u8 #?; while (not issep(c = chrpeek(P))) or (dot and c == '.') { chr(P); if i >= buf.#len - 1 { return -1; } buf[i++] = c; } buf[i++] = 0; return i; } fn readtilhsep(P *Parser, buf [#]u8, dot bool) int { let i = 0, c u8 #?, pred = &ishsep; while (not pred(c = chrpeek(P))) or (dot and c == '.') { chr(P); if not issep(c) { pred = &issep; } if i >= buf.#len - 1 { return -1; } buf[i++] = c; } buf[i++] = 0; return i; } fn eatspaces(P *Parser) void { for ;;chr(P) { if not isspace(chrpeek(P)) { break; } } } // !sorted static keyword2str []*const u8 = { "and", "as", "break", "case", "const", "continue", "def", "defmacro", "do", "else", "enum", "extern", "fn", "for", "if", "import", "let", "not", "or", "return", "sizeof", "static", "struct", "switch", "typedef", "typeof", "union", "while", }; fn str2keyword(s *const u8) int { let i = 0, j = keyword2str.#len - 1; while i <= j { let k = (j + i) / 2; let cmp = strcmp(keyword2str[k], s); if cmp == 0 { return k; } else if cmp < 0 { i = k + 1; } else { j = k - 1; } } return -1; } fn readnumber(s *const u8) Option { let c u8 #?, acc = 0u64, accf = 0.0f64, fmul = 0.1, base = 10, flt = #f, nused = 0, suffix *const u8 = #null; for let i = 0; (c = s[i]) != 0; ++i { if i == 0 and c == '0' { --nused; } if i == 1 and tolower(c) == 'x' { base = 16; continue; } if not flt and c == '.' and base == 10 { flt = #t; accf = acc; continue; } if nused > 0 and c == '_' { continue; } if (base == 16 and not isxdigit(c)) or (base != 16 and (c < '0' or c > ('0' + base) - 1)) { suffix = s + i; } ++nused; if flt { accf = accf + ((c - '0') * fmul); fmul *= 0.1; } else { c = tolower(c); acc = (acc * base) + (c <= '9' ? c - '0' : (c - 'a') + 10); } } let tok = Tok {}; if flt { tok.t = :flo; tok.u.flo = accf; return :Some tok; } else { tok.t = :int; tok.u.uint = acc; return :Some tok; } return :None; } fn lex(P *Parser) Tok { let c int #?; let tok Tok = {}; switch P.peektok { case Some pt; P.peektok = :None; return pt; } eatspaces(P); tok.loc = (P.tokloc = P.curloc); if isdigit(c = chrpeek(P)) { let s [80]u8 = {}; if readtilsep(P, s[0::], #t) < 0 { fatal(P, tok.loc, "bad number literal"); } switch readnumber(s) { case None; fatal(P, tok.loc, "bad number literal %qs", s); case Some tok; tok.loc = P.tokloc; return tok; } } if isalpha(c) or c == '_' { let s [120]u8; if readtilsep(P, s[0::], #f) < 0 { fatal(P, tok.loc, "identifier too long"); } let kw = str2keyword(s); if kw >= 0 { tok.t = kw; tok.u.ident = keyword2str[kw]; } else { tok.t = :ident; tok.u.ident = internstr(s); } return tok; } if c == '#' { let s [100]u8 = {}; if readtilhsep(P, s[0::], #f) < 0 { fatal(P, P.tokloc, "invalid #keyword"); } switch { case streq(s, "#"); tok.t = '#'; case else fatal(P, P.tokloc, "invalid #keyword"); } } if c == '"' or c == '\'' { chr(P); let delim = c; let str Vec = {}; let c u8 #?; let i = 0z; while (c = chr(P)) != delim { if c == 0 or c == '\n' { fatal(P, P.tokloc, "unterminated %s literal", delim == '"' ? "string" : "character"); } if c != '\\' { str->push(c); continue; } switch ((c = chr(P))) { case 0, '\n'; fatal(P, P.tokloc, "unterminated %s literal", delim == '"' ? "string" : "character"); case '\''; str->push('\''); case '\\'; str->push('\\'); case '"'; str->push('"'); case 'n'; str->push('\n'); case 'r'; str->push('\r'); case 't'; str->push('\t'); case 'v'; str->push('\v'); case 'f'; str->push('\f'); case '0'; str->push('\0'); case else fatal(P, P.tokloc, "unknown escape sequence '\\%c'", c); } } if delim == '"' { tok.t = :str; tok.u.str = str->compact(); } else { tok.t = :chr; if str.len == 0 { fatal(P, P.tokloc, "empty char literal"); } else if str.len > 8 { fatal(P, P.tokloc, "too long multichar literal %qs", str.dat); } tok.u.uint = 0; vec_each(c0, i, str, tok.u.uint = (tok.u.uint << 8) | c0; ) } return tok; } switch c = chr(P) { case '(', ')', '[', ']', '{', '}', ',', ';', '?', '~'; tok.t = c; return tok; case '.'; if chrmatch(P, '.') { if chrmatch(P, '.') { tok.t = '...'; } else { tok.t = '..'; } } else { tok.t = '.'; } return tok; case '*'; if chrmatch(P, '=') { tok.t = '*='; } else { tok.t = '*'; } return tok; case '/'; if chrmatch(P, '=') { tok.t = '/='; } else if chrmatch(P, '/') { while (c = chr(P)) != 0 and c != '\n' { } return lex(P); } else { tok.t = '/'; } return tok; case '%'; if chrmatch(P, '=') { tok.t = '%='; } else { tok.t = '%'; } return tok; case '+'; if chrmatch(P, '=') { tok.t = '+='; } else if chrmatch(P, '+') { tok.t = '++'; } else { tok.t = '+'; } return tok; case '-'; if chrmatch(P, '=') { tok.t = '-='; } else if chrmatch(P, '-') { tok.t = '--'; } else if chrmatch(P, '>') { tok.t = '->'; } else { tok.t = '-'; } return tok; case '&'; if chrmatch(P, '=') { tok.t = '&='; } else { tok.t = '&'; } return tok; case '|'; if chrmatch(P, '=') { tok.t = '|='; } else { tok.t = '|'; } return tok; case '^'; if chrmatch(P, '=') { tok.t = '^='; } else { tok.t = '^'; } return tok; case ':'; if chrmatch(P, ':') { tok.t = '::'; } else { tok.t = ':'; } return tok; case '='; if chrmatch(P, '=') { tok.t = '=='; } else { tok.t = '='; } return tok; case '!'; if chrmatch(P, '=') { tok.t = '!='; } else { tok.t = '!'; } return tok; case '<'; if chrmatch(P, '=') { tok.t = '<='; } else if chrmatch(P, '<') { if chrmatch(P, '=') { tok.t = '<<='; } else { tok.t = '<<'; } } else { tok.t = '<'; } return tok; case '>'; if chrmatch(P, '=') { tok.t = '>='; } else if chrmatch(P, '>') { if chrmatch(P, '=') { tok.t = '>>='; } else { tok.t = '>>'; } } else { tok.t = '>'; } return tok; case EOF, 0; tok.t = :eof; return tok; } fatal(P, tok.loc, "stray %qc in program", c); } extern fn parse(P *Parser) [#]Decl { while not P.eof { let tok = lex(P); if tok.t == :eof { break; } efmt("* tok: %qT\n", tok); } } extern fn parser_init(P *Parser, path *const u8) void { assert(NUM_KEYWORDS - 1 < '!', "2manykw"); *P = {}; P.curfile = path; if (P.fp = fopen(path, "r")) == #null { perror(path); exit(1); } P.tokloc = (P.curloc = { addfilepath(path), 0, 1, 1 }); P.peekchr = :None; }