#include "common.h" static inline bool joinspan(struct span0 *dst, struct span0 snd) { if (dst->file != snd.file) return 0; if (dst->off > snd.off) return 0; dst->len = snd.off + snd.len - dst->off; return 1; } enum toktag { /* single-character tokens' tag value is the character itself */ TKEOF = -1, TKXXX, TKNUMLIT, TKCHRLIT, TKSTRLIT, TKPPHDRH, /* (for #include) */ TKPPHDRQ, /* "hdr" (for #include) */ TKPPMACARG, /* macro param, in repl list */ TKPPMACSTR, /* stringify macro param, in repl list */ TKEQU = '@', /* == */ TKNEQ, /* != */ TKLTE, /* <= */ TKGTE, /* >= */ TKSHR, /* >> */ TKSHL, /* << */ TKINC, /* ++ */ TKDEC, /* -- */ TKDOTS, /* ... */ TKARROW, /* -> */ TKPPCAT, /* ## */ TKLOGAND, /* && */ TKLOGIOR, /* || */ TKSETADD, /* += */ TKSETSUB, /* -= */ TKSETMUL, /* *= */ TKSETDIV, /* /= */ TKSETREM, /* %= */ TKSETIOR, /* |= */ TKSETXOR, /* ^= */ TKSETAND, /* &= */ TKSETSHL, /* <<= */ TKSETSHR, /* >>= */ TKIDENT = 0x80, #define _(kw, stdc) TKW##kw, #include "keywords.def" #undef _ }; struct token { short t; /* toktag */ bool litlit; uchar wide : 2; /* for CHRLIT & STRLIT; 1 -> 16bit, 2 -> 32bit */ uchar wideuni : 1; /* ditto, 0 -> 'L', 1 -> 'u'/'U' (C11) */ union { uint len; struct { ushort macidx, argidx; }; }; struct span span; union { const char *s; const ushort *ws16; const uint *ws32; }; /* for (multi-)character tokens s & len are unused * for keywords, s is constant cstring, len = strlen(s) * for idents, s is interned cstring, len = strlen(s) * for strlit and chrlit: * when litlit : s points to start of string within file buffer (after the ") * len == span.sl.len - 2 (string data appears literally in source code) * otherwise s is heap allocated buffer of len bytes * when wide, litlit = 0 and use ws16/ws32 * for numlit: * when litlit : s points to start of token within file buffer (normal case) * len == span.sl.len (number literal appears literally in source code) * otherwise s is heap allocated buffer of len bytes * for macro arg/stringify: * s is like keyword/ident * argidx is index in macro param list, * macidx is macro id of which it is a parameter */ }; extern int nerror; struct lexer { struct lexer *save; short fileid; const uchar *dat; uint ndat; uint idx, chridx; short chrbuf[1<<10]; uint chridxbuf[1<<10]; ushort nchrbuf, chrbuf0; struct macrostack *macstk; struct token peektok; bool eof, err; struct arena **tmparena; }; enum initlexer { LXOK, LXFILESEEN, LXERR, }; const char *intern(const char *); int lex(struct lexer *, struct token *); int lexpeek(struct lexer *, struct token *); enum typetag parsenumlit(uvlong *, double *, const struct token *, bool ispp); enum initlexer initlexer(struct lexer *, const char **err, const char *file); void lexerdump(struct lexer *, struct wbuf *out); void lexerfreetemps(struct lexer *); /* vim:set ts=3 sw=3 expandtab: */