aboutsummaryrefslogtreecommitdiff
path: root/src/parse.cff
diff options
context:
space:
mode:
authorlemon <lsof@mailbox.org>2022-08-12 16:43:06 +0200
committerlemon <lsof@mailbox.org>2022-08-12 16:43:06 +0200
commit1eb17cda6780476b166b55d0fedc3ad355969e87 (patch)
treeccb4ffa74cc37f24e93d4325ccf395d3fe73529f /src/parse.cff
parentf14aee6184568bae34f8d8d8f9140fa760099fa5 (diff)
selfhosted lexer
Diffstat (limited to 'src/parse.cff')
-rw-r--r--src/parse.cff142
1 files changed, 137 insertions, 5 deletions
diff --git a/src/parse.cff b/src/parse.cff
index d672173..bcb5763 100644
--- a/src/parse.cff
+++ b/src/parse.cff
@@ -1,3 +1,4 @@
+import "vec.hff";
import "all.hff";
///////////
@@ -31,6 +32,9 @@ fn chrpeek(P *Parser) int {
case Some c; return c;
}
let c = fgetc(P.fp);
+ if c == EOF {
+ P.eof = #t;
+ }
P.peekchr = :Some c;
return c;
}
@@ -63,7 +67,7 @@ fn isxdigit(c u8) bool {
fn isalpha(c u8) bool {
return (c >= 'a' and c <= 'z')
- or (c >= 'A' and c <= 'z');
+ or (c >= 'A' and c <= 'Z');
}
fn issep(c u8) bool {
@@ -153,7 +157,7 @@ fn readnumber(s *const u8) Option<Tok> {
continue;
}
if nused > 0 and c == '_' { continue; }
- if (base == 16 and not isdigit(c))
+ if (base == 16 and not isxdigit(c))
or (base != 16 and (c < '0' or c > ('0' + base) - 1)) {
suffix = s + i;
}
@@ -197,7 +201,7 @@ fn lex(P *Parser) Tok {
if isdigit(c = chrpeek(P)) {
let s [80]u8 = {};
if readtilsep(P, s[0::], #t) < 0 {
- // fatal
+ fatal(P, tok.loc, "bad number literal");
}
switch readnumber(s) {
case None;
@@ -207,7 +211,131 @@ fn lex(P *Parser) Tok {
return tok;
}
}
- if c == EOF {
+ if isalpha(c) or c == '_' {
+ let s [120]u8;
+ if readtilsep(P, s[0::], #f) < 0 {
+ fatal(P, tok.loc, "identifier too long");
+ }
+ let kw = str2keyword(s);
+ if kw >= 0 {
+ tok.t = kw;
+ tok.u.ident = keyword2str[kw];
+ } else {
+ tok.t = :ident;
+ tok.u.ident = internstr(s);
+ }
+ return tok;
+ }
+ if c == '"' or c == '\'' {
+ chr(P);
+ let delim = c;
+ let str Vec<u8> = {};
+ let c u8 #?;
+ let i = 0z;
+ while (c = chr(P)) != delim {
+ if c == 0 or c == '\n' {
+ fatal(P, P.tokloc, "unterminated %s literal",
+ delim == '"' ? "string" : "character");
+ }
+ if c != '\\' {
+ str->push(c);
+ continue;
+ }
+ switch ((c = chr(P))) {
+ case 0, '\n';
+ fatal(P, P.tokloc, "unterminated %s literal",
+ delim == '"' ? "string" : "character");
+ case '\''; str->push('\''); case '\\'; str->push('\\');
+ case '"'; str->push('"'); case 'n'; str->push('\n');
+ case 'r'; str->push('\r'); case 't'; str->push('\t');
+ case 'v'; str->push('\v'); case 'f'; str->push('\f');
+ case '0'; str->push('\0');
+ case else
+ fatal(P, P.tokloc, "unknown escape sequence '\\%c'", c);
+ }
+ }
+
+ tok.t = :str;
+ tok.u.str = str->compact();
+ return tok;
+ }
+ switch c = chr(P) {
+ case '(', ')', '[', ']', '{',
+ '}', ',', ';', '?', '~';
+ tok.t = c;
+ return tok;
+ case '.';
+ if chrmatch(P, '.') {
+ if chrmatch(P, '.') { tok.t = '...'; }
+ else { tok.t = '..'; }
+ } else { tok.t = '.'; }
+ return tok;
+ case '*';
+ if chrmatch(P, '=') { tok.t = '*='; }
+ else { tok.t = '*'; }
+ return tok;
+ case '/';
+ if chrmatch(P, '=') { tok.t = '/='; }
+ else if chrmatch(P, '/') {
+ while (c = chr(P)) != 0 and c != '\n' { }
+ return lex(P);
+ }
+ else { tok.t = '/'; }
+ return tok;
+ case '%';
+ if chrmatch(P, '=') { tok.t = '%='; }
+ else { tok.t = '%'; }
+ return tok;
+ case '+';
+ if chrmatch(P, '=') { tok.t = '+='; }
+ else if chrmatch(P, '+') { tok.t = '++'; }
+ else { tok.t = '+'; }
+ return tok;
+ case '-';
+ if chrmatch(P, '=') { tok.t = '-='; }
+ else if chrmatch(P, '-') { tok.t = '--'; }
+ else if chrmatch(P, '>') { tok.t = '->'; }
+ else { tok.t = '-'; }
+ return tok;
+ case '&';
+ if chrmatch(P, '=') { tok.t = '&='; }
+ else { tok.t = '&'; }
+ return tok;
+ case '|';
+ if chrmatch(P, '=') { tok.t = '|='; }
+ else { tok.t = '|'; }
+ return tok;
+ case '^';
+ if chrmatch(P, '=') { tok.t = '^='; }
+ else { tok.t = '^'; }
+ return tok;
+ case ':';
+ if chrmatch(P, ':') { tok.t = '::'; }
+ else { tok.t = ':'; }
+ return tok;
+ case '=';
+ if chrmatch(P, '=') { tok.t = '=='; }
+ else { tok.t = '='; }
+ return tok;
+ case '!';
+ if chrmatch(P, '=') { tok.t = '!='; }
+ else { tok.t = '!'; }
+ return tok;
+ case '<';
+ if chrmatch(P, '=') { tok.t = '<='; }
+ else if chrmatch(P, '<') {
+ if chrmatch(P, '=') { tok.t = '<<='; }
+ else { tok.t = '<<'; }
+ } else { tok.t = '<'; }
+ return tok;
+ case '>';
+ if chrmatch(P, '=') { tok.t = '>='; }
+ else if chrmatch(P, '>') {
+ if chrmatch(P, '=') { tok.t = '>>='; }
+ else { tok.t = '>>'; }
+ } else { tok.t = '>'; }
+ return tok;
+ case EOF, 0;
tok.t = :eof;
return tok;
}
@@ -215,7 +343,11 @@ fn lex(P *Parser) Tok {
}
extern fn parse(P *Parser) [#]Decl {
- let tok = lex(P);
+ while not P.eof {
+ let tok = lex(P);
+ if tok.t == :eof { break; }
+ efmt("* tok: %qT\n", tok);
+ }
}
extern fn parser_init(P *Parser, path *const u8) void {