From 104330a399f405b83328525bb2be55b360109b16 Mon Sep 17 00:00:00 2001 From: lemon Date: Sun, 28 May 2023 19:29:10 +0200 Subject: improve struct token --- parse.h | 75 ++++++++++++++++++++++++++++++++++++----------------------------- 1 file changed, 42 insertions(+), 33 deletions(-) (limited to 'parse.h') diff --git a/parse.h b/parse.h index 520a76e..2a9b076 100644 --- a/parse.h +++ b/parse.h @@ -17,30 +17,31 @@ enum toktag { /* single-character tokens' tag value is the character itself */ TKEOF = -1, TKXXX, TKNUMLIT, + TKCHRLIT, TKSTRLIT, - TKEQU = '@', - TKNEQ, - TKLTE, - TKGTE, - TKSHR, - TKSHL, - TKINC, - TKDEC, - TKDOTS, - TKARROW, - TKPPCAT, - TKLOGAND, - TKLOGIOR, - TKSETADD, - TKSETSUB, - TKSETMUL, - TKSETDIV, - TKSETREM, - TKSETIOR, - TKSETXOR, - TKSETAND, - TKSETSHL, - TKSETSHR, + TKEQU = '@', /* == */ + TKNEQ, /* != */ + TKLTE, /* <= */ + TKGTE, /* >= */ + TKSHR, /* >> */ + TKSHL, /* << */ + TKINC, /* ++ */ + TKDEC, /* -- */ + TKDOTS, /* ... */ + TKARROW, /* -> */ + TKPPCAT, /* ## */ + TKLOGAND, /* && */ + TKLOGIOR, /* || */ + TKSETADD, /* += */ + TKSETSUB, /* -= */ + TKSETMUL, /* *= */ + TKSETDIV, /* /= */ + TKSETREM, /* %= */ + TKSETIOR, /* |= */ + TKSETXOR, /* ^= */ + TKSETAND, /* &= */ + TKSETSHL, /* <<= */ + TKSETSHR, /* >>= */ TKIDENT = 0x80, #define _(kw, stdc) TKW##kw, #include "keywords.def" @@ -48,20 +49,27 @@ enum toktag { /* single-character tokens' tag value is the character itself */ }; struct token { - enum toktag t; - uchar ty; /* type tag for num lits */ + short t; /* toktag */ + bool litlit; + uint len; struct span span; - union { - uvlong u; - vlong i; - double f; - const char *ident; - struct bytes s; - }; + const char *s; + /* for (multi-)character tokens s & len are unused + * for keywords, s is constant cstring, len = strlen(s) + * for idents, s is interned cstring, len = strlen(s) + * for strlit and chrlit: + * when litlit : s points to start of string within file buffer (after the ") + * len == span.sl.len - 2 (string data appears literally in source code) + * otherwise s is heap allocated buffer of len bytes + * for numlit: + * when litlit : s points to start of token within file buffer (normal case) + * len == span.sl.len (number literal appears literally in source code) + * otherwise s is heap allocated buffer of len bytes + */ }; struct macro { - const char *name; /* interned from tk->ident */ + const char *name; /* interned */ const char **param; struct span0 span; uchar nparam; @@ -101,6 +109,7 @@ struct parser { const char *intern(const char *); int lex(struct parser *, struct token *); int lexpeek(struct parser *, struct token *); +enum typetag parsenumlit(uvlong *, double *, const struct token *, bool ispp); void initparser(struct parser *, const char *file); void parse(struct parser *); -- cgit v1.2.3