#include "../common.h" #include "../type.h" static inline bool joinspan(struct span0 *dst, struct span0 snd) { if (dst->file != snd.file) return 0; if (dst->off > snd.off) return 0; dst->len = snd.off + snd.len - dst->off; return 1; } enum toktag { /* single-character tokens' tag value is the character itself */ TKEOF = 0xFF, TKXXX = 0, TKNUMLIT, TKCHRLIT, TKSTRLIT, TKPPHDRH, /* (for #include) */ TKPPHDRQ, /* "hdr" (for #include) */ TKPPMACARG, /* macro param, in repl list */ TKPPMACSTR, /* stringify macro param, in repl list */ TKEQU = '@', /* == */ TKNEQ, /* != */ TKLTE, /* <= */ TKGTE, /* >= */ TKSHR, /* >> */ TKSHL, /* << */ TKINC, /* ++ */ TKDEC, /* -- */ TKDOTS, /* ... */ TKARROW, /* -> */ TKPPCAT, /* ## */ TKLOGAND, /* && */ TKLOGIOR, /* || */ TKSETADD, /* += */ TKSETSUB, /* -= */ TKSETMUL, /* *= */ TKSETDIV, /* /= */ TKSETREM, /* %= */ TKSETIOR, /* |= */ TKSETXOR, /* ^= */ TKSETAND, /* &= */ TKSETSHL, /* <<= */ TKSETSHR, /* >>= */ TKIDENT = 0x80, #define _(kw, stdc) TKW##kw, #include "keywords.def" #undef _ NTOKTAG, }; static_assert(NTOKTAG < 256); struct token { uchar t; /* toktag */ bool litlit : 1, blue : 1, /* preprocessor token painted blue */ extwarn : 1; /* warn this keyword token is an extension */ uchar wide : 2, /* for CHRLIT & STRLIT; 1 -> 16bit, 2 -> 32bit */ wideuni : 1; /* ditto, 0 -> 'L', 1 -> 'u'/'U' (C11) */ union { uint len; ushort argidx; }; struct span span; union { internstr name; const char *s; const ushort *ws16; const uint *ws32; }; /* for (multi-)character tokens s & len are unused * for keywords, s is constant cstring, len = strlen(s) * for idents, s is interned cstring, len = strlen(s) * for strlit and chrlit: * when litlit : s points to start of string within file buffer (after the ") * len == span.sl.len - 2 (string data appears literally in source code) * otherwise s is heap allocated buffer of len bytes * when wide, litlit = 0 and use ws16/ws32 * for numlit: * when litlit : s points to start of token within file buffer (normal case) * len == span.sl.len (number literal appears literally in source code) * otherwise s is heap allocated buffer of len bytes * for macro arg/stringify: * s is like keyword/ident * argidx is index in macro param list, * macidx is macro id of which it is a parameter */ }; extern int nerror; struct lexer { struct lexer *save; short fileid; const uchar *dat; uint ndat; uint idx, chridx; short chrbuf[1<<10]; uint chridxbuf[1<<10]; ushort chrbuf0; struct macrostack *macstk; struct token peektok; bool eof, err; struct arena **tmparena; bool firstdirective; ushort nppcnd0; internstr inclguard; }; enum initlexer { LXOK, LXFILESEEN, LXFILESKIP, LXERR, }; int lex(struct lexer *, struct token *); int lexpeek(struct lexer *, struct token *); enum typetag parsenumlit(uvlong *, double *, const struct token *, bool ispp); enum initlexer initlexer(struct lexer *, const char **err, const char *file); void lexerdump(struct lexer *, struct wbuf *out); void lexerfreetemps(struct lexer *); /* vim:set ts=3 sw=3 expandtab: */