diff options
| author | 2023-06-19 11:56:46 +0200 | |
|---|---|---|
| committer | 2023-06-19 11:56:46 +0200 | |
| commit | b71515071d1310bbf9cd34f8997aa736ebd30099 (patch) | |
| tree | f14ac1364d508ad2f1e8cdcda59b4dc9bae96f21 /lex.h | |
| parent | 08649c95cc15b5ad99e6b8899d639f6c3b63266b (diff) | |
frontend: separate compiler & lexer
Diffstat (limited to 'lex.h')
| -rw-r--r-- | lex.h | 109 |
1 files changed, 109 insertions, 0 deletions
@@ -0,0 +1,109 @@ +#include "common.h" + +static inline bool +joinspan(struct span0 *dst, struct span0 snd) +{ + if (dst->file != snd.file) return 0; + assert(dst->off <= snd.off); + dst->len = snd.off + snd.len - dst->off; + return 1; +} + +enum toktag { /* single-character tokens' tag value is the character itself */ + TKEOF = -1, + TKXXX, + TKNUMLIT, + TKCHRLIT, + TKSTRLIT, + TKEQU = '@', /* == */ + TKNEQ, /* != */ + TKLTE, /* <= */ + TKGTE, /* >= */ + TKSHR, /* >> */ + TKSHL, /* << */ + TKINC, /* ++ */ + TKDEC, /* -- */ + TKDOTS, /* ... */ + TKARROW, /* -> */ + TKPPCAT, /* ## */ + TKLOGAND, /* && */ + TKLOGIOR, /* || */ + TKSETADD, /* += */ + TKSETSUB, /* -= */ + TKSETMUL, /* *= */ + TKSETDIV, /* /= */ + TKSETREM, /* %= */ + TKSETIOR, /* |= */ + TKSETXOR, /* ^= */ + TKSETAND, /* &= */ + TKSETSHL, /* <<= */ + TKSETSHR, /* >>= */ + TKIDENT = 0x80, +#define _(kw, stdc) TKW##kw, +#include "keywords.def" +#undef _ +}; + +struct token { + short t; /* toktag */ + bool litlit; + uint len; + struct span span; + const char *s; + /* for (multi-)character tokens s & len are unused + * for keywords, s is constant cstring, len = strlen(s) + * for idents, s is interned cstring, len = strlen(s) + * for strlit and chrlit: + * when litlit : s points to start of string within file buffer (after the ") + * len == span.sl.len - 2 (string data appears literally in source code) + * otherwise s is heap allocated buffer of len bytes + * for numlit: + * when litlit : s points to start of token within file buffer (normal case) + * len == span.sl.len (number literal appears literally in source code) + * otherwise s is heap allocated buffer of len bytes + */ +}; + +struct macro { + const char *name; /* interned */ + const char **param; + struct span0 span; + uchar nparam; + bool fnlike, variadic; + struct rlist { + struct token *tk; + int n; + } rlist; +}; + +struct macrostack { + struct macrostack *link; + struct rlist *args; + struct span0 exspan; + int mac; + int idx; +}; + +extern int nerror; +struct parser { + struct parser *save; + short fileid; + const uchar *dat; + uint ndat; + uint idx, chridx; + short peekchr[2]; + uint peekcidx[2]; + short npeekchr; + struct macrostack *macstk; + struct token peektok; + bool eof, err; + struct arena **tmparena; +}; + +const char *intern(const char *); +int lex(struct parser *, struct token *); +int lexpeek(struct parser *, struct token *); +enum typetag parsenumlit(uvlong *, double *, const struct token *, bool ispp); +void initparser(struct parser *, const char *file, struct arena **); + +/* vim:set ts=3 sw=3 expandtab: */ |