diff options
| author | 2026-03-17 13:22:00 +0100 | |
|---|---|---|
| committer | 2026-03-17 13:22:00 +0100 | |
| commit | a8d6f8bf30c07edb775e56889f568ca20240bedf (patch) | |
| tree | b5a452b2675b2400f15013617291fe6061180bbf /src/c_lex.h | |
| parent | 24f14b7ad1af08d872971d72ce089a529911f657 (diff) | |
REFACTOR: move sources to src/
Diffstat (limited to 'src/c_lex.h')
| -rw-r--r-- | src/c_lex.h | 126 |
1 files changed, 126 insertions, 0 deletions
diff --git a/src/c_lex.h b/src/c_lex.h new file mode 100644 index 0000000..e70bc78 --- /dev/null +++ b/src/c_lex.h @@ -0,0 +1,126 @@ +#include "../common.h" +#include "../type.h" + +static inline bool +joinspan(struct span0 *dst, struct span0 snd) +{ + if (dst->file != snd.file) return 0; + if (dst->off > snd.off) return 0; + dst->len = snd.off + snd.len - dst->off; + return 1; +} + +enum toktag { /* single-character tokens' tag value is the character itself */ + TKEOF = 0xFF, + TKXXX = 0, + TKNUMLIT, + TKCHRLIT, + TKSTRLIT, + TKPPHDRH, /* <hdr> (for #include) */ + TKPPHDRQ, /* "hdr" (for #include) */ + TKPPMACARG, /* macro param, in repl list */ + TKPPMACSTR, /* stringify macro param, in repl list */ + TKEQU = '@', /* == */ + TKNEQ, /* != */ + TKLTE, /* <= */ + TKGTE, /* >= */ + TKSHR, /* >> */ + TKSHL, /* << */ + TKINC, /* ++ */ + TKDEC, /* -- */ + TKDOTS, /* ... */ + TKARROW, /* -> */ + TKPPCAT, /* ## */ + TKLOGAND, /* && */ + TKLOGIOR, /* || */ + TKSETADD, /* += */ + TKSETSUB, /* -= */ + TKSETMUL, /* *= */ + TKSETDIV, /* /= */ + TKSETREM, /* %= */ + TKSETIOR, /* |= */ + TKSETXOR, /* ^= */ + TKSETAND, /* &= */ + TKSETSHL, /* <<= */ + TKSETSHR, /* >>= */ + TKIDENT = 0x80, +#define _(kw, stdc, ...) TKW##kw, +#include "keywords.def" +#undef _ + NTOKTAG, +}; +static_assert(NTOKTAG < 256); + +struct token { + uchar t; /* toktag */ + bool litlit : 1, + blue : 1, /* preprocessor token painted blue */ + extwarn : 1; /* warn this keyword token is an extension */ + uchar wide : 2, /* for CHRLIT & STRLIT; 1 -> 16bit, 2 -> 32bit */ + wideuni : 1, /* ditto, 0 -> 'L', 1 -> 'u'/'U' (C11) */ + space : 1; /* preceded by whitespace? */ + union { + uint len; + ushort argidx; + }; + struct span span; + union { + internstr name; + const char *s; + const ushort *ws16; + const uint *ws32; + }; + /* for (multi-)character tokens s & len are unused + * for keywords, s is constant cstring, len = strlen(s) + * for idents, s is interned cstring, len = strlen(s) + * for strlit and chrlit: + * when litlit : s points to start of string within file buffer (after the ") + * len == span.sl.len - 2 (string data appears literally in source code) + * otherwise s is heap allocated buffer of len bytes + * when wide, litlit = 0 and use ws16/ws32 + * for numlit: + * when litlit : s points to start of token within file buffer (normal case) + * len == span.sl.len (number literal appears literally in source code) + * otherwise s is heap allocated buffer of len bytes + * for macro arg/stringify: + * s is like keyword/ident + * argidx is index in macro param list, + * macidx is macro id of which it is a parameter + */ +}; + +extern int nerror, nwarn; +struct lexer { + struct lexer *save; + short fileid; + const uchar *dat; + uint ndat; + uint idx, chridx; + ushort chrbuf0; + struct macrostack *macstk; + struct token peektok; + bool eof, err; + struct arena **tmparena; + bool firstdirective; + short nppcnd0; + short inclnerror, inclnwarn; + internstr inclguard; + uchar chrbuf[1<<10]; + uint chridxbuf[1<<10]; +}; + +enum initlexer { + LXOK, + LXFILESEEN, + LXFILESKIP, + LXERR, +}; + +int lex(struct lexer *, struct token *); +int lexpeek(struct lexer *, struct token *); +enum typetag parsenumlit(uvlong *, double *, const struct token *, bool ispp); +enum initlexer initlexer(struct lexer *, const char **err, const char *file); +void lexerdump(struct lexer *, struct wbuf *out); +void lexerfreetemps(struct lexer *); + +/* vim:set ts=3 sw=3 expandtab: */ |