aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--io.c16
-rw-r--r--lex.c213
-rw-r--r--parse.c22
-rw-r--r--parse.h75
-rw-r--r--test.c8
-rw-r--r--type.c6
6 files changed, 200 insertions, 140 deletions
diff --git a/io.c b/io.c
index d51e28d..cc9f672 100644
--- a/io.c
+++ b/io.c
@@ -437,17 +437,21 @@ vbfmt(struct wbuf *out, const char *fmt, va_list ap)
n += bwriteS(buf, "\?\?\?");
break;
case TKNUMLIT:
- s = (const char *)(getfile(tok->span.sl.file)->p + tok->span.sl.off);
if (quote) n += bputc(buf, '`');
- for (i = tok->span.sl.len; i--; ++s)
- if (*s != '\\' && *s != '\n') n += bputc(buf, *s);
+ n += bfmt(buf, "%S", tok->s, tok->len);
if (quote) n += bputc(buf, '\'');
break;
+ case TKCHRLIT:
+ n += bputc(buf, '\'');
+ for (int i = 0; i < tok->len; ++i)
+ n += putquoted(buf, tok->s[i], '\'', i < tok->len - 1 ? tok->s[i+1] : -1);
+ n += bputc(buf, '\'');
+ break;
case TKSTRLIT:
- n += bfmt(buf, "%'S", tok->s.p, tok->s.n-1);
+ n += bfmt(buf, "%'S", tok->s, tok->len);
break;
case TKIDENT:
- n += bfmt(buf, "`%s'", tok->ident);
+ n += bfmt(buf, "`%s'", tok->s);
break;
case TKEOF:
n += bwriteS(buf, "<end-of-file>");
@@ -478,7 +482,7 @@ vbfmt(struct wbuf *out, const char *fmt, va_list ap)
default:
if (quote) n += bputc(buf, '`');
if (in_range(tok->t, TKWBEGIN_, TKWEND_)) {
- n += bfmt(buf, "%s", tok->ident);
+ n += bfmt(buf, "%s", tok->s);
} else if (aisprint(tok->t)) {
n += bputc(buf, tok->t);
} else {
diff --git a/lex.c b/lex.c
index 7ffe8e0..f82f255 100644
--- a/lex.c
+++ b/lex.c
@@ -43,13 +43,13 @@ identkeyword(struct token *tk, const char *s, int len)
else if (cmp > 0) h = i - 1;
else if (kwtab[i].cstd <= ccopt.cstd) {
tk->t = kwtab[i].t;
- tk->ident = kwtab[i].s;
+ tk->s = kwtab[i].s;
return;
} else break;
}
ident:
tk->t = TKIDENT;
- tk->ident = intern(s);
+ tk->s = intern(s);
}
static int
@@ -141,61 +141,90 @@ aissep(int c) {
return 0;
}
-static void
-strtonum(struct token *tk, const char *s)
+
+enum typetag
+parsenumlit(uvlong *outi, double *outf, const struct token *tk, bool ispp)
{
- extern uvlong strtoull(const char *, char **, int);
- extern double strtod(const char *, char **);
- char *sx; /*suffix*/
-
- tk->ty = TYXXX;
- if (strchr(s, '.')) { /* float literal */
- Float:
- tk->f = strtod(s, &sx);
- if (sx == s)
- return;
- if (!*sx)
- tk->ty = TYDOUBLE;
- else if ((sx[0]|0x20) == 'f' && !sx[1]) {
- tk->ty = TYFLOAT;
- tk->f = (float) tk->f;
- } else tk->ty = TYXXX;
+ if (tk->t == TKCHRLIT) {
+ uvlong n = 0;
+ for (int i = 0; i < tk->len; ++i)
+ n = n << 8 | (uchar)tk->s[i];
+ if (outi) *outi = n;
+ return TYINT;
+ } else if (memchr(tk->s, '.', tk->len)) {
+ extern double strtod(const char *, char **);
+ double f;
+ char buf[80], *suffix;
+ Float: /* float literal */
+ assert(tk->len < sizeof buf - 1 && "numlit too big");
+ memcpy(buf, tk->s, tk->len);
+ buf[tk->len] = 0;
+ f = strtod(buf, &suffix);
+ if (suffix == buf)
+ return 0;
+ if (!*suffix) {
+ if (outf) *outf = f;
+ return TYDOUBLE;
+ } else if ((suffix[0]|0x20) == 'f' && !suffix[1]) {
+ if (outf) *outf = f;
+ return TYFLOAT;
+ }
+ return 0;
} else { /* int literal */
static uvlong max4typ[TYUVLONG-TYINT+1];
- enum typetag t;
- bool u = 0, dec = s[0] != '0';
- bool c99 = ccopt.cstd >= STDC99;
-
- tk->u = strtoull(s, &sx, 0);
- if (sx == s)
- return;
+ uvlong n = 0;
+ int base = 10, nsx;
+ bool dec, u = 0, c99 = ccopt.cstd >= STDC99;
+ enum typetag ty = 0;
+ const char *sx; /*suffix*/
+ char c;
if (!max4typ[0])
- for (t = TYINT; t <= TYUVLONG; ++t)
- max4typ[t-TYINT] = ((1ull << (8*targ_primsizes[t]-1))-1) << isunsignedt(t) | 1;
+ for (ty = TYINT; ty <= TYUVLONG; ++ty)
+ max4typ[ty-TYINT] = ((1ull << (8*targ_primsizes[ty]-1))-1) << isunsignedt(ty) | 1;
+
+ sx = tk->s;
+ if (tk->len > 2 && sx[0] == '0') {
+ if ((sx[1]|32) == 'x') sx += 2, base = 16; /* 0x.. */
+ else if ((sx[1]|32) == 'b') sx += 2, base = 2; /* 0b.. */
+ else base = 8; /* 0.. */
+ }
+ for (; sx < tk->s + tk->len; ++sx) {
+ if (base < 16) {
+ if (!in_range(c = *sx, '0', '0'+base-1)) break;
+ n = n * base + c - '0';
+ } else {
+ n *= base;
+ if (in_range(c = *sx, '0', '9')) n += c - '0';
+ else if (in_range(c|32, 'a', 'f')) n += 0xa + (c|32) - 'a';
+ else break;
+ }
+ }
+ dec = base == 10;
+ nsx = tk->len - (sx - tk->s);
- if (!*sx) /* '' */ {}
- else if ((sx[0]|0x20) == 'u') {
+ if (nsx == 0) /* '' */ {}
+ else if ((sx[0]|32) == 'u') {
u = 1;
- if (!sx[1]) /* 'u' */ {}
- else if ((sx[1]|0x20) == 'l') {
- if (!sx[2]) /* 'ul' */ goto L;
- if (c99 && sx[1] == sx[2] && !sx[3]) /* 'ull' */ goto LL;
- return;
- } else return;
- } else if ((sx[0]|0x20) == 'l') {
- if (!sx[1]) /* 'l' */ goto L;
- if ((sx[1]|0x20) == 'u' && !sx[2]) /* 'lu' */ { u=1; goto L; }
+ if (nsx == 1) /* 'u' */ {}
+ else if ((sx[1]|32) == 'l') {
+ if (nsx == 2) /* 'ul' */ goto L;
+ if (c99 && sx[1] == sx[2] && nsx == 3) /* 'ull' */ goto LL;
+ return 0;
+ } else return 0;
+ } else if ((sx[0]|32) == 'l') {
+ if (nsx == 1) /* 'l' */ goto L;
+ if ((sx[1]|32) == 'u' && nsx == 2) /* 'lu' */ { u=1; goto L; }
if (c99 && sx[1] == sx[0]) {
- if (!sx[2]) /* 'll' */ goto LL;
- if ((sx[2]|0x20) == 'u' && !sx[3]) /* 'llu' */ { u=1; goto LL; }
+ if (nsx == 2) /* 'll' */ goto LL;
+ if ((sx[2]|32) == 'u' && nsx == 3) /* 'llu' */ { u=1; goto LL; }
}
- return;
- } else if ((sx[0]|0x20) == 'e' || (sx[0]|0x20) == 'p')
+ return 0;
+ } else if ((sx[0]|32) == 'e' || (sx[0]|32) == 'p')
goto Float;
- else return;
+ else return 0;
-#define I(T) if (tk->u <= max4typ[T - TYINT]) { t = T; goto Ok; }
+#define I(T) if (n <= max4typ[T - TYINT]) { ty = T; goto Ok; }
I(TYINT)
if (u || !dec) I(TYUINT)
L:
@@ -206,12 +235,19 @@ strtonum(struct token *tk, const char *s)
I(TYVLONG)
if (u || !dec) I(TYUVLONG)
}
+ if (ispp) { ty = TYUVLONG; goto Ok; }
#undef I
/* too big */
- return;
+ if (outi) *outi = n;
+ return 0;
Ok:
- if (u && issignedt(t)) ++t; /* make unsigned */
- tk->ty = t;
+ if (u && issignedt(ty)) ++ty; /* make unsigned */
+ if (outi) *outi = n;
+ if (ispp) {
+ if (u) return TYUVLONG;
+ else if (n <= max4typ[TYVLONG-TYINT]) return TYVLONG;
+ }
+ return ty;
}
}
@@ -222,7 +258,8 @@ readstrchrlit(struct parser *pr, struct token *tk, char delim)
uchar tmp[80];
vec_of(uchar) b = VINIT(tmp, sizeof tmp);
struct span span = {0};
- uint n, idx = pr->chridx;
+ uint n, beginoff, idx;
+ beginoff = idx = pr->chridx;
while ((c = next(pr)) != delim) {
if (c == '\n' || c == TKEOF) {
@@ -269,7 +306,7 @@ readstrchrlit(struct parser *pr, struct token *tk, char delim)
}
if (n > 0377) {
span.sl.len = pr->chridx - span.sl.off;
- error(&span, "hex escape sequence out of range");
+ error(&span, "octal escape sequence out of range");
}
c = n;
break;
@@ -283,10 +320,17 @@ readstrchrlit(struct parser *pr, struct token *tk, char delim)
idx = pr->chridx;;
}
if (delim == '"') {
- vpush(&b, 0);
tk->t = TKSTRLIT;
- tk->s.p = alloc(&pr->exarena, b.n, 1);
- memcpy(tk->s.p, b.p, tk->s.n = b.n-1);
+ tk->len = b.n;
+ if (pr->chridx - beginoff == tk->len + 1) {
+ tk->litlit = 1;
+ tk->s = (char *)&pr->dat[beginoff];
+ } else {
+ tk->litlit = 0;
+ vpush(&b, 0);
+ tk->s = alloc(&pr->exarena, b.n, 1);
+ memcpy((char *)tk->s, b.p, b.n);
+ }
} else {
if (b.n == 0) {
span.sl = (struct span0) { idx, pr->chridx - idx, pr->fileid };
@@ -295,11 +339,16 @@ readstrchrlit(struct parser *pr, struct token *tk, char delim)
span.sl = (struct span0) { idx, pr->chridx - idx, pr->fileid };
error(&span, "multicharacter literal too long");
}
- tk->t = TKNUMLIT;
- tk->ty = TYINT;
- tk->u = 0;
- for (i = 0; i < b.n; ++i)
- tk->u = tk->u<<8 | b.p[i];
+ tk->t = TKCHRLIT;
+ tk->len = b.n;
+ if (pr->chridx - beginoff == tk->len + 1) {
+ tk->litlit = 1;
+ tk->s = (char *)&pr->dat[beginoff];
+ } else {
+ tk->litlit = 0;
+ tk->s = alloc(&pr->exarena, tk->len, 1);
+ memcpy((char *)tk->s, b.p, tk->len);
+ }
}
vfree(&b);
}
@@ -414,7 +463,12 @@ Begin:
tmp[n++] = next(pr);
}
tmp[n] = 0;
- strtonum(tk, tmp);
+ tk->len = n;
+ if (n == pr->chridx - idx) tk->s = (char *)&pr->dat[idx];
+ else {
+ tk->s = alloc(&pr->exarena, n, 1);
+ memcpy((char *)tk->s, tmp, n);
+ }
RET(TKNUMLIT);
} else if (c == '_' || aisalpha(c)) {
char tmp[70];
@@ -435,9 +489,7 @@ End:
tk->span.sl.file = pr->fileid;
tk->span.sl.off = idx;
tk->span.sl.len = pr->chridx - idx;
- tk->span.ex.file = pr->fileid;
- tk->span.ex.off = idx;
- tk->span.ex.len = pr->chridx - idx;
+ tk->span.ex = tk->span.sl;
return tk->t;
#undef RET
}
@@ -478,23 +530,12 @@ freemac(struct macro *mac)
static bool
tokequ(const struct token *a, const struct token *b)
{
- char tmpbuf[100];
- struct wbuf tmp = MEMBUF(tmpbuf, sizeof tmpbuf);
if (a->t != b->t) return 0;
- if (a->t == TKNUMLIT) {
- const char *s1 = tmp.buf, *s2;
- int n1, n2;
-
- if (a->ty != b->ty) return 0;
- n1 = bfmt(&tmp, "%tk", a);
- s2 = tmp.buf + tmp.len;
- n2 = bfmt(&tmp, "%tk", b);
- if (tmp.err) return 0;
- return n1 == n2 && !memcmp(s1, s2, n1);
+ if (a->t == TKNUMLIT || a->t == TKSTRLIT || a->t == TKCHRLIT) {
+ if (a->len != b->len) return 0;
+ return !memcmp(a->s, b->s, a->len);
} else if (a->t == TKIDENT) {
- return a->ident == b->ident;
- } else if (a->t == TKSTRLIT) {
- return a->s.n == b->s.n && !memcmp(a->s.p, b->s.p, a->s.n);
+ return a->s == b->s;
}
return 1;
}
@@ -577,7 +618,7 @@ ppdefine(struct parser *pr)
ppskipline(pr);
return;
}
- mac.name = tk0.ident;
+ mac.name = tk0.s;
mac.span = tk0.span.sl;
if (peek(pr, 0) == '(') {
@@ -641,6 +682,7 @@ expr(struct parser *pr, bool *pu, int prec)
{
vlong x, y;
struct token tk;
+ enum typetag ty;
int opprec;
char unops[16];
int nunop = 0;
@@ -664,15 +706,16 @@ Unary:
}
break;
case TKNUMLIT:
- if (!tk.ty) {
+ case TKCHRLIT:
+ ty = parsenumlit((uvlong *)&x, NULL, &tk, 1);
+ if (!ty) {
error(&tk.span, "bad number literal");
goto Err;
- } else if (isfltt(tk.ty)) {
+ } else if (isfltt(ty)) {
error(&tk.span, "float literal in preprocessor expresion");
goto Err;
}
- x = tk.i;
- xu = isunsignedt(tk.ty);
+ xu = isunsignedt(ty);
break;
default:
if (in_range(tk.t, TKWBEGIN_, TKWEND_)) {
@@ -850,7 +893,7 @@ tryexpand(struct parser *pr, const struct token *tk)
struct macrostack *l;
int macidx, i;
- if (!isppident(*tk) || !(mac = findmac(tk->ident)))
+ if (!isppident(*tk) || !(mac = findmac(tk->s)))
return 0;
if (!inimstk) {
@@ -937,7 +980,7 @@ findppcmd(const struct token *tk)
"warning",
};
int l = 0, h = arraylength(tab) - 1, i, cmp;
- const char *s = tk->ident;
+ const char *s = tk->s;
if (tk->t == TKWif) return PPIF;
if (tk->t == TKWelse) return PPELSE;
diff --git a/parse.c b/parse.c
index b39e299..0de9346 100644
--- a/parse.c
+++ b/parse.c
@@ -36,7 +36,7 @@ isdecltok(struct parser *pr)
case TKWdouble:
return 1;
case TKIDENT:
- return (decl = finddecl(pr, tk.ident)) && decl->scls == SCTYPEDEF;
+ return (decl = finddecl(pr, tk.s)) && decl->scls == SCTYPEDEF;
}
return 0;
}
@@ -665,16 +665,18 @@ Unary:
/* base exprs */
case TKNUMLIT:
- if (!tk.ty)
- error(&tk.span, "invalid number literal %'tk", &tk);
- ex = mkexpr(ENUMLIT, tk.span, mktype(tk.ty ? tk.ty : TYINT), .u = tk.u);
+ case TKCHRLIT:
+ ex = mkexpr(ENUMLIT, tk.span, mktype(0), );
+ if (!(ty.t = parsenumlit(&ex.u, &ex.f, &tk, 0)))
+ error(&tk.span, "bad number literal %'tk", &tk);
+ ex.ty.t = ty.t ? ty.t : TYINT;
break;
case TKSTRLIT:
- ++tk.s.n;
- ex = mkexpr(ESTRLIT, tk.span, mkarrtype(mktype(TYCHAR), 0, tk.s.n), .s = tk.s);
+ ex = mkexpr(ESTRLIT, tk.span,
+ mkarrtype(mktype(TYCHAR), 0, tk.len+1), .s = (uchar *)tk.s);
break;
case TKIDENT:
- decl = finddecl(pr, tk.ident);
+ decl = finddecl(pr, tk.s);
if (!decl) {
error(&tk.span, "undeclared identifier %'tk", &tk);
ex = mkexpr(ESYM, tk.span, mktype(TYINT), .sym = NULL);
@@ -1605,7 +1607,7 @@ tagtype(struct parser *pr, enum toktag kind)
const char *tag = NULL;
if (match(pr, &tk, TKIDENT))
- tag = tk.ident;
+ tag = tk.s;
span = tk.span;
if (!match(pr, NULL, '{')) {
if (!tag) {
@@ -1727,7 +1729,7 @@ declspec(struct declstate *st, struct parser *pr)
joinspan(&span.ex, tk.span.ex);
goto End;
case TKIDENT:
- if (!st->base.t && !arith && (decl = finddecl(pr, tk.ident))
+ if (!st->base.t && !arith && (decl = finddecl(pr, tk.s))
&& decl->scls == SCTYPEDEF) {
st->base = decl->ty;
break;
@@ -1897,7 +1899,7 @@ decltypes(struct parser *pr, struct decllist *list, const char **name, struct sp
if (!name)
error(&tk.span, "unexpected identifier in type name");
else {
- *name = tk.ident;
+ *name = tk.s;
*span = tk.span;
}
lex(pr, &tk);
diff --git a/parse.h b/parse.h
index 520a76e..2a9b076 100644
--- a/parse.h
+++ b/parse.h
@@ -17,30 +17,31 @@ enum toktag { /* single-character tokens' tag value is the character itself */
TKEOF = -1,
TKXXX,
TKNUMLIT,
+ TKCHRLIT,
TKSTRLIT,
- TKEQU = '@',
- TKNEQ,
- TKLTE,
- TKGTE,
- TKSHR,
- TKSHL,
- TKINC,
- TKDEC,
- TKDOTS,
- TKARROW,
- TKPPCAT,
- TKLOGAND,
- TKLOGIOR,
- TKSETADD,
- TKSETSUB,
- TKSETMUL,
- TKSETDIV,
- TKSETREM,
- TKSETIOR,
- TKSETXOR,
- TKSETAND,
- TKSETSHL,
- TKSETSHR,
+ TKEQU = '@', /* == */
+ TKNEQ, /* != */
+ TKLTE, /* <= */
+ TKGTE, /* >= */
+ TKSHR, /* >> */
+ TKSHL, /* << */
+ TKINC, /* ++ */
+ TKDEC, /* -- */
+ TKDOTS, /* ... */
+ TKARROW, /* -> */
+ TKPPCAT, /* ## */
+ TKLOGAND, /* && */
+ TKLOGIOR, /* || */
+ TKSETADD, /* += */
+ TKSETSUB, /* -= */
+ TKSETMUL, /* *= */
+ TKSETDIV, /* /= */
+ TKSETREM, /* %= */
+ TKSETIOR, /* |= */
+ TKSETXOR, /* ^= */
+ TKSETAND, /* &= */
+ TKSETSHL, /* <<= */
+ TKSETSHR, /* >>= */
TKIDENT = 0x80,
#define _(kw, stdc) TKW##kw,
#include "keywords.def"
@@ -48,20 +49,27 @@ enum toktag { /* single-character tokens' tag value is the character itself */
};
struct token {
- enum toktag t;
- uchar ty; /* type tag for num lits */
+ short t; /* toktag */
+ bool litlit;
+ uint len;
struct span span;
- union {
- uvlong u;
- vlong i;
- double f;
- const char *ident;
- struct bytes s;
- };
+ const char *s;
+ /* for (multi-)character tokens s & len are unused
+ * for keywords, s is constant cstring, len = strlen(s)
+ * for idents, s is interned cstring, len = strlen(s)
+ * for strlit and chrlit:
+ * when litlit : s points to start of string within file buffer (after the ")
+ * len == span.sl.len - 2 (string data appears literally in source code)
+ * otherwise s is heap allocated buffer of len bytes
+ * for numlit:
+ * when litlit : s points to start of token within file buffer (normal case)
+ * len == span.sl.len (number literal appears literally in source code)
+ * otherwise s is heap allocated buffer of len bytes
+ */
};
struct macro {
- const char *name; /* interned from tk->ident */
+ const char *name; /* interned */
const char **param;
struct span0 span;
uchar nparam;
@@ -101,6 +109,7 @@ struct parser {
const char *intern(const char *);
int lex(struct parser *, struct token *);
int lexpeek(struct parser *, struct token *);
+enum typetag parsenumlit(uvlong *, double *, const struct token *, bool ispp);
void initparser(struct parser *, const char *file);
void parse(struct parser *);
diff --git a/test.c b/test.c
index cb81b9c..1d6c01f 100644
--- a/test.c
+++ b/test.c
@@ -1,6 +1,6 @@
/* coment */
-#if 1+1 < (-2*2)
+#if 1+1 < (-2*'a')
wawa
#elif 9<<1
#define wow 3
@@ -16,13 +16,15 @@ int add (int x, int y) {
struct foo {struct foo *foo;};
int abs(int x){
- return (x ^ x >> 31) - (x >> 31);
+ return (x ^ x >> 3\
+1) - (x >> 31);
}
int popcnt(unsigned x) {
int n = 0;
while (x) x >>= 1, n++;
- return n + sizeof &"รก"[0];
+ return n + sizeof "ab\r\
+c";
}
struct f {
diff --git a/type.c b/type.c
index 3aeb8a1..5f984d6 100644
--- a/type.c
+++ b/type.c
@@ -80,7 +80,7 @@ interntd(const struct typedata *td)
if (!slot->t) {
uint nmemb;
static struct arena *datarena;
- if (!datarena) {
+ if (!datarena) {
enum { N = 1<<12 };
static union { char m[sizeof(struct arena) + N]; struct arena *_align; } amem;
datarena = (void *)amem.m, datarena->cap = N;
@@ -270,9 +270,9 @@ cvtarith(union type a, union type b)
if (issigned(a) == issigned(b)) {
/* when both are integers with same signage, choose type with greatest rank */
return a.t > b.t ? a : b;
- }
+ }
/* if the signed type can represent all values of the unsigned type,
- * choose it, otherwise choose its corresponding unsigned type */
+ * choose it, otherwise choose its corresponding unsigned type */
/* so long long + unsigned = long long;
* but long long + unsigned long = unsigned long long */
if (issigned(a)) {