aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorlemon <lsof@mailbox.org>2022-08-12 16:43:06 +0200
committerlemon <lsof@mailbox.org>2022-08-12 16:43:06 +0200
commit1eb17cda6780476b166b55d0fedc3ad355969e87 (patch)
treeccb4ffa74cc37f24e93d4325ccf395d3fe73529f
parentf14aee6184568bae34f8d8d8f9140fa760099fa5 (diff)
selfhosted lexer
-rw-r--r--bootstrap/all.h1
-rw-r--r--bootstrap/cgen.c8
-rw-r--r--bootstrap/parse.c9
-rw-r--r--bootstrap/types.c13
-rw-r--r--src/all.hff5
-rw-r--r--src/fmt.cff113
-rw-r--r--src/libc.hff3
-rw-r--r--src/parse.cff142
-rw-r--r--src/util.cff54
9 files changed, 306 insertions, 42 deletions
diff --git a/bootstrap/all.h b/bootstrap/all.h
index f8eb769..ceb9841 100644
--- a/bootstrap/all.h
+++ b/bootstrap/all.h
@@ -585,6 +585,7 @@ void visittypes(void (*visitor)(const struct type *, void *), void *arg);
const struct type *constify(const struct type *ty);
const struct type *unconstify(const struct type *ty);
const struct type *constifychild(const struct type *ty);
+const struct type *unconstifychild(const struct type *ty);
int numtype2rank(const struct type *a);
const struct type * rank2numtype(int r);
bool isnumtype(const struct type *a);
diff --git a/bootstrap/cgen.c b/bootstrap/cgen.c
index b0cfd92..7519982 100644
--- a/bootstrap/cgen.c
+++ b/bootstrap/cgen.c
@@ -541,7 +541,7 @@ liftdecl(struct decl *decl) {
static int id;
switch (decl->t) {
case Dfn:
- if (decl->fn.body || (decl->container && !decl->externp)) {
+ if ((decl->fn.body || (decl->container && !decl->externp)) && !*decl->_cname) {
if (decl->container)
*decl->_cname = xasprintf("__m%s_%s%d", decl->container->agg.name, decl->fn.name, decl->fn.id);
else
@@ -710,6 +710,12 @@ defctype(const struct type *ty, void *_) {
}
break;
case TYslice:
+ if (ty->child->konst) {
+ const struct type *ty2 = unconstifychild(ty);
+ defctype(ty2, NULL);
+ *cname = (char *)ty2->_cname;
+ return;
+ }
defctype(ty->child, NULL);
*cname = xasprintf("__ty%d", id++);
pri("typedef struct { %t *ptr; size_t len; } %s;\n",
diff --git a/bootstrap/parse.c b/bootstrap/parse.c
index 7c31ce1..7ff52bf 100644
--- a/bootstrap/parse.c
+++ b/bootstrap/parse.c
@@ -1178,7 +1178,7 @@ pexprimary(struct parser *P) {
ex = parsearrini(P, P->targty);
} else if (lexmatch(P, &tok, TKkw_sizeof)) {
ex.t = Eintlit;
- ex.ty = ty_usize;
+ ex.ty = ty_isize;
if (lexmatch(P, &tok, '(')) {
struct expr exp = parseexpr(P);
ex.i = exp.ty->size;
@@ -1320,13 +1320,13 @@ pexpostfix(struct parser *P) {
if (ty->t == TYarr) {
assert(ty->length >= 0);
ex.t = Eintlit;
- ex.ty = ty->konst ? constify(ty_usize) : ty_usize;
+ ex.ty = ty->konst ? constify(ty_isize) : ty_isize;
ex.span = tok.span;
ex.i = ty->length;
} else if (ty->t == TYslice) {
ex.child = exprdup(ex);
ex.t = Elen;
- ex.ty = ty->konst ? constify(ty_usize) : ty_usize;
+ ex.ty = ty->konst ? constify(ty_isize) : ty_isize;
ex.span = tok.span;
} else {
fatal(P, ex.span, "invalid operand to `.#len' (%t)", ex.ty);
@@ -1654,6 +1654,7 @@ pexcmp(struct parser *P) {
ex = pexbitarith(P);
if (P->used_targty) return ex;
if (matchcmpop(P, &tok)) {
+ P->targty = ex.ty;
struct expr rhs = pexbitarith(P);
if (!typeof2(ex.ty, rhs.ty))
fatal(P, tok.span, "incompatible operands %t and %t to binary operator %T",
@@ -1713,7 +1714,7 @@ pexcond(struct parser *P) {
struct expr ex3;
const struct type *ty;
- if (ex.ty->t != TYbool)
+ if (ex.ty->t != TYbool && ex.ty->t != TYptr)
fatal(P, ex.span, "invalid test operand %t to conditional operator", ex.ty);
lexexpect(P, ':');
diff --git a/bootstrap/types.c b/bootstrap/types.c
index 66ae840..37977ea 100644
--- a/bootstrap/types.c
+++ b/bootstrap/types.c
@@ -301,6 +301,16 @@ constifychild(const struct type *ty) {
return interntype(ty2);
}
+const struct type *
+unconstifychild(const struct type *ty) {
+ struct type ty2 = *ty;
+ const struct type *child = unconstify(ty->child);
+ if (child == ty->child)
+ return ty;
+ ty2.child = child;
+ return interntype(ty2);
+}
+
static const struct type *
arraydecay(const struct type *ty) {
struct type ty2 = *ty;
@@ -367,7 +377,7 @@ typeof2(const struct type *a, const struct type *b) {
b = arraydecay(b);
if (a->t == TYarr && b->t == TYptr)
a = arraydecay(a);
- if (a->t == TYptr && b->t == TYptr) {
+ if (a->t == b->t && (a->t == TYptr || a->t == TYslice)) {
bool akonst = a->child->konst,
bkonst = b->child->konst;
const struct type *uac = unconstify(a->child),
@@ -396,6 +406,7 @@ typeof2(const struct type *a, const struct type *b) {
}
}
+
return NULL;
}
diff --git a/src/all.hff b/src/all.hff
index a35694b..e090570 100644
--- a/src/all.hff
+++ b/src/all.hff
@@ -81,6 +81,7 @@ struct Tok {
flo f64,
bool bool,
str [#]const u8,
+ ident *const u8,
},
}
@@ -104,15 +105,19 @@ extern fn parse(*Parser) [#]Decl;
// util.cff
extern fn xmalloc(n usize) *void;
+extern fn xcalloc(n usize, m usize) *void;
extern fn xrealloc(p *void, n usize) *void;
+extern fn xstrdup(str *const u8) *u8;
def FNV1A_INI u32 = 0x811c9dc5;
extern fn fnv1a(h u32, [#]const u8) u32;
extern fn fnv1a_s(h u32, *const u8) u32;
extern fn addfilepath(*const u8) int;
extern fn fatal(*Parser, Loc, fmt *const u8, ...) void;
+extern fn internstr(*const u8) *const u8;
// fmt.cff
extern fn vpfmt(proc *fn(u8, *void) void, parg *void, fmt *const u8, va_list) void;
+extern fn pfmt(proc *fn(u8, *void) void, parg *void, fmt *const u8, ...) void;
extern fn vefmt(fmt *const u8, ap va_list) void;
extern fn efmt(fmt *const u8, ...) void;
diff --git a/src/fmt.cff b/src/fmt.cff
index 885ea50..ecf7d74 100644
--- a/src/fmt.cff
+++ b/src/fmt.cff
@@ -3,41 +3,83 @@ import "all.hff";
extern fn vpfmt(proc *fn(u8, *void) void, parg *void, fmt *const u8, ap va_list) void {
defmacro p(x) [ proc(x, parg) ]
defmacro ps(s) [
- for let $i = 0; (s)[$i] != 0; ++$i {
- p(s[$i]);
+ for let $s *const u8 = (s); *$s != 0; ++$s {
+ p(*$s);
}
]
- let buf [100]u8 = {};
+
+ defmacro pch(ch) [ {
+ extern fn isprint(int) int;
+ if isprint(ch) != 0 {
+ p(ch);
+ } else {
+ p('\\');
+ p('0' + (ch % 8));
+ p('0' + ((ch / 8) % 8));
+ p('0' + ((ch / 8 / 8) % 8));
+ }
+ } ]
+
+ static buf [100]u8 = {};
+
+ fn pritok(proc typeof(proc), parg *void, quote bool, tok *const Tok) void {
+ switch tok.t {
+ case :int;
+ sprintf(buf, "%lld", tok.u.int);
+ ps(buf);
+ case :str;
+ pfmt(proc, parg, "%S", tok.u.str);
+ case :ident;
+ if quote { p('`'); }
+ ps(tok.u.ident);
+ if quote { p('\''); }
+ case else
+ if tok.t >= 0 and tok.t < NUM_KEYWORDS {
+ if quote { p('`'); }
+ ps(tok.u.ident);
+ if quote { p('\''); }
+ } else if tok.t > 0 {
+ if quote { p('`'); }
+ let t = bswap32(tok.t);
+ let i = 0;
+ while t != 0 {
+ if t & 0xFF != 0 {
+ p(t);
+ }
+ t >>= 8;
+ }
+ buf[i] = '\0';
+ if quote { p('\''); }
+ }
+ }
+ }
+
for let c u8 = *fmt; c != 0; c = *++fmt {
assert(c != 0, "?");
if c != '%' {
p(c);
if fmt[1] == 0 { break; }
- continue;
+ continue;
}
let quote = #f;
- #'fmt do {
+ for ;; {
switch (c = *++fmt) {
- case 'i';
- sprintf(buf, "%d", ap->arg(int));
- ps(buf);
case 'q';
quote = #t;
- continue #'fmt;
+ continue;
+ case 'i', 'd';
+ sprintf(buf, "%d", ap->arg(int));
+ ps(buf);
+ case 'p';
+ sprintf(buf, "%p", ap->arg(*void));
+ ps(buf);
case 'c';
let ch u32 = ap->arg(int);
if quote {
- extern fn isprint(int) int;
p('\'');
for ch = bswap32(ch); ch != 0; ch >>= 8 {
if ch & 0xFF != 0 {
- if isprint(ch) != 0 { p(ch); }
- else {
- p('\\');
- p('0' + (ch % 8));
- p('0' + ((ch / 8) % 8));
- p('0' + ((ch / 8 / 8) % 8));
- }
+ pch(ch);
}
}
p('\'');
@@ -57,38 +99,47 @@ extern fn vpfmt(proc *fn(u8, *void) void, parg *void, fmt *const u8, ap va_list)
extern fn isprint(int) int;
p('\"');
for let c u8 #?; (c = *s++) != 0; {
- if isprint(c) != 0 {
- p(c);
- } else {
- p('\\');
- p('0' + (c % 8));
- p('0' + ((c / 8) % 8));
- p('0' + ((c / 8 / 8) % 8));
- }
+ pch(c);
}
p('\"');
} else {
ps(s);
}
+ case 'S';
+ let str = ap->arg([#]const u8);
+ p('"');
+ foreach(c, i, str,
+ pch(c);
+ )
+ p('"');
+ case 'T';
+ let tok = ap->arg(Tok);
+ pritok(proc, parg, quote, &tok);
case else
- // assert(#f, "bad fmt '%c' @ %d", c, i);
+ assert(#f, "bad fmt '%c'", c);
}
- } while #f;
+ break;
+ }
}
}
+extern fn pfmt(proc *fn(u8, *void) void, parg *void, fmt *const u8, ...) void {
+ let ap va_list #?;
+ ap->start(fmt);
+ vpfmt(proc, parg, fmt, ap);
+ ap->end();
+}
+
extern fn vefmt(fmt *const u8, ap va_list) void {
- fn epri(c u8, *void) void {
+ fn eputc(c u8, *void) void {
fputc(c, stderr);
}
-
- vpfmt(&epri, #null, fmt, ap);
+ vpfmt(&eputc, #null, fmt, ap);
}
extern fn efmt(fmt *const u8, ...) void {
let ap va_list #?;
ap->start(fmt);
-
vefmt(fmt, ap);
ap->end();
}
diff --git a/src/libc.hff b/src/libc.hff
index a231614..488a495 100644
--- a/src/libc.hff
+++ b/src/libc.hff
@@ -18,12 +18,15 @@ extern fn abort() void;
extern fn exit(c int) void;
extern fn perror(s *const u8) void;
extern fn malloc(n usize) *void;
+extern fn calloc(n usize, m usize) *void;
extern fn realloc(p *void, n usize) *void;
extern fn free(p *void) void;
// string.h
extern fn strlen(s *const u8) usize;
extern fn strcmp(a *const u8, b *const u8) int;
+extern fn memcpy(*void, *const void, usize) *void;
+extern fn strcpy(*u8, *const u8) *u8;
//ctype.h
extern fn tolower(int) int;
diff --git a/src/parse.cff b/src/parse.cff
index d672173..bcb5763 100644
--- a/src/parse.cff
+++ b/src/parse.cff
@@ -1,3 +1,4 @@
+import "vec.hff";
import "all.hff";
///////////
@@ -31,6 +32,9 @@ fn chrpeek(P *Parser) int {
case Some c; return c;
}
let c = fgetc(P.fp);
+ if c == EOF {
+ P.eof = #t;
+ }
P.peekchr = :Some c;
return c;
}
@@ -63,7 +67,7 @@ fn isxdigit(c u8) bool {
fn isalpha(c u8) bool {
return (c >= 'a' and c <= 'z')
- or (c >= 'A' and c <= 'z');
+ or (c >= 'A' and c <= 'Z');
}
fn issep(c u8) bool {
@@ -153,7 +157,7 @@ fn readnumber(s *const u8) Option<Tok> {
continue;
}
if nused > 0 and c == '_' { continue; }
- if (base == 16 and not isdigit(c))
+ if (base == 16 and not isxdigit(c))
or (base != 16 and (c < '0' or c > ('0' + base) - 1)) {
suffix = s + i;
}
@@ -197,7 +201,7 @@ fn lex(P *Parser) Tok {
if isdigit(c = chrpeek(P)) {
let s [80]u8 = {};
if readtilsep(P, s[0::], #t) < 0 {
- // fatal
+ fatal(P, tok.loc, "bad number literal");
}
switch readnumber(s) {
case None;
@@ -207,7 +211,131 @@ fn lex(P *Parser) Tok {
return tok;
}
}
- if c == EOF {
+ if isalpha(c) or c == '_' {
+ let s [120]u8;
+ if readtilsep(P, s[0::], #f) < 0 {
+ fatal(P, tok.loc, "identifier too long");
+ }
+ let kw = str2keyword(s);
+ if kw >= 0 {
+ tok.t = kw;
+ tok.u.ident = keyword2str[kw];
+ } else {
+ tok.t = :ident;
+ tok.u.ident = internstr(s);
+ }
+ return tok;
+ }
+ if c == '"' or c == '\'' {
+ chr(P);
+ let delim = c;
+ let str Vec<u8> = {};
+ let c u8 #?;
+ let i = 0z;
+ while (c = chr(P)) != delim {
+ if c == 0 or c == '\n' {
+ fatal(P, P.tokloc, "unterminated %s literal",
+ delim == '"' ? "string" : "character");
+ }
+ if c != '\\' {
+ str->push(c);
+ continue;
+ }
+ switch ((c = chr(P))) {
+ case 0, '\n';
+ fatal(P, P.tokloc, "unterminated %s literal",
+ delim == '"' ? "string" : "character");
+ case '\''; str->push('\''); case '\\'; str->push('\\');
+ case '"'; str->push('"'); case 'n'; str->push('\n');
+ case 'r'; str->push('\r'); case 't'; str->push('\t');
+ case 'v'; str->push('\v'); case 'f'; str->push('\f');
+ case '0'; str->push('\0');
+ case else
+ fatal(P, P.tokloc, "unknown escape sequence '\\%c'", c);
+ }
+ }
+
+ tok.t = :str;
+ tok.u.str = str->compact();
+ return tok;
+ }
+ switch c = chr(P) {
+ case '(', ')', '[', ']', '{',
+ '}', ',', ';', '?', '~';
+ tok.t = c;
+ return tok;
+ case '.';
+ if chrmatch(P, '.') {
+ if chrmatch(P, '.') { tok.t = '...'; }
+ else { tok.t = '..'; }
+ } else { tok.t = '.'; }
+ return tok;
+ case '*';
+ if chrmatch(P, '=') { tok.t = '*='; }
+ else { tok.t = '*'; }
+ return tok;
+ case '/';
+ if chrmatch(P, '=') { tok.t = '/='; }
+ else if chrmatch(P, '/') {
+ while (c = chr(P)) != 0 and c != '\n' { }
+ return lex(P);
+ }
+ else { tok.t = '/'; }
+ return tok;
+ case '%';
+ if chrmatch(P, '=') { tok.t = '%='; }
+ else { tok.t = '%'; }
+ return tok;
+ case '+';
+ if chrmatch(P, '=') { tok.t = '+='; }
+ else if chrmatch(P, '+') { tok.t = '++'; }
+ else { tok.t = '+'; }
+ return tok;
+ case '-';
+ if chrmatch(P, '=') { tok.t = '-='; }
+ else if chrmatch(P, '-') { tok.t = '--'; }
+ else if chrmatch(P, '>') { tok.t = '->'; }
+ else { tok.t = '-'; }
+ return tok;
+ case '&';
+ if chrmatch(P, '=') { tok.t = '&='; }
+ else { tok.t = '&'; }
+ return tok;
+ case '|';
+ if chrmatch(P, '=') { tok.t = '|='; }
+ else { tok.t = '|'; }
+ return tok;
+ case '^';
+ if chrmatch(P, '=') { tok.t = '^='; }
+ else { tok.t = '^'; }
+ return tok;
+ case ':';
+ if chrmatch(P, ':') { tok.t = '::'; }
+ else { tok.t = ':'; }
+ return tok;
+ case '=';
+ if chrmatch(P, '=') { tok.t = '=='; }
+ else { tok.t = '='; }
+ return tok;
+ case '!';
+ if chrmatch(P, '=') { tok.t = '!='; }
+ else { tok.t = '!'; }
+ return tok;
+ case '<';
+ if chrmatch(P, '=') { tok.t = '<='; }
+ else if chrmatch(P, '<') {
+ if chrmatch(P, '=') { tok.t = '<<='; }
+ else { tok.t = '<<'; }
+ } else { tok.t = '<'; }
+ return tok;
+ case '>';
+ if chrmatch(P, '=') { tok.t = '>='; }
+ else if chrmatch(P, '>') {
+ if chrmatch(P, '=') { tok.t = '>>='; }
+ else { tok.t = '>>'; }
+ } else { tok.t = '>'; }
+ return tok;
+ case EOF, 0;
tok.t = :eof;
return tok;
}
@@ -215,7 +343,11 @@ fn lex(P *Parser) Tok {
}
extern fn parse(P *Parser) [#]Decl {
- let tok = lex(P);
+ while not P.eof {
+ let tok = lex(P);
+ if tok.t == :eof { break; }
+ efmt("* tok: %qT\n", tok);
+ }
}
extern fn parser_init(P *Parser, path *const u8) void {
diff --git a/src/util.cff b/src/util.cff
index c3a08cb..d1c9dc5 100644
--- a/src/util.cff
+++ b/src/util.cff
@@ -1,3 +1,4 @@
+import "vec.hff";
import "all.hff";
extern fn xmalloc(n usize) *void {
@@ -6,12 +7,23 @@ extern fn xmalloc(n usize) *void {
return p;
}
+extern fn xcalloc(n usize, m usize) *void {
+ let p = calloc(n, n);
+ assert(p != #null, "calloc");
+ return p;
+}
+
extern fn xrealloc(p *void, n usize) *void {
let p = realloc(p, n);
assert(p != #null, "realloc");
return p;
}
+extern fn xstrdup(str *const u8) *u8 {
+ let p = xmalloc(strlen(str) + 1);
+ strcpy(p, str);
+ return p;
+}
extern fn fnv1a(h u32, d [#]const u8) u32 {
foreach(i, x, d,
@@ -65,3 +77,45 @@ extern fn fatal(P *Parser, loc Loc, fmt *const u8, ...) void {
ap->end();
exit(1);
}
+
+extern fn internstr(s *const u8) *const u8 {
+ static buf Vec<*const u8> = {};
+ static set **const u8 = {};
+ static N int = {};
+ static count int = {};
+
+ if set == #null {
+ set = xcalloc(N = 16, sizeof int);
+ }
+
+ if count == N / 2 {
+ free(set);
+ set = xcalloc(N *= 2, sizeof int);
+ vec_each(s, i, buf,
+ let i = fnv1a_s(FNV1A_INI, s) & (N - 1);
+ for ;; {
+ if set[i] == #null {
+ set[i] = s;
+ break;
+ }
+ i = (i + 1) & (N - 1);
+ }
+ )
+ }
+
+ let i0 = fnv1a_s(FNV1A_INI, s) & (N - 1);
+ let i int = i0;
+ do {
+ if set[i] == #null {
+ ++count;
+ buf->push(xstrdup(s));
+ set[i] = buf.dat[buf.len - 1];
+ return set[i];
+ } else if streq(set[i], s) {
+ return set[i];
+ }
+ i = (i + 1) & (N - 1);
+ } while i != i0;
+ assert(#f, "unreachable");
+
+}