diff options
Diffstat (limited to 'c/lex.h')
| -rw-r--r-- | c/lex.h | 115 |
1 files changed, 115 insertions, 0 deletions
@@ -0,0 +1,115 @@ +#include "../common.h" + +static inline bool +joinspan(struct span0 *dst, struct span0 snd) +{ + if (dst->file != snd.file) return 0; + if (dst->off > snd.off) return 0; + dst->len = snd.off + snd.len - dst->off; + return 1; +} + +enum toktag { /* single-character tokens' tag value is the character itself */ + TKEOF = -1, + TKXXX, + TKNUMLIT, + TKCHRLIT, + TKSTRLIT, + TKPPHDRH, /* <hdr> (for #include) */ + TKPPHDRQ, /* "hdr" (for #include) */ + TKPPMACARG, /* macro param, in repl list */ + TKPPMACSTR, /* stringify macro param, in repl list */ + TKEQU = '@', /* == */ + TKNEQ, /* != */ + TKLTE, /* <= */ + TKGTE, /* >= */ + TKSHR, /* >> */ + TKSHL, /* << */ + TKINC, /* ++ */ + TKDEC, /* -- */ + TKDOTS, /* ... */ + TKARROW, /* -> */ + TKPPCAT, /* ## */ + TKLOGAND, /* && */ + TKLOGIOR, /* || */ + TKSETADD, /* += */ + TKSETSUB, /* -= */ + TKSETMUL, /* *= */ + TKSETDIV, /* /= */ + TKSETREM, /* %= */ + TKSETIOR, /* |= */ + TKSETXOR, /* ^= */ + TKSETAND, /* &= */ + TKSETSHL, /* <<= */ + TKSETSHR, /* >>= */ + TKIDENT = 0x80, +#define _(kw, stdc) TKW##kw, +#include "keywords.def" +#undef _ +}; + +struct token { + short t; /* toktag */ + bool litlit; + uchar wide : 2; /* for CHRLIT & STRLIT; 1 -> 16bit, 2 -> 32bit */ + uchar wideuni : 1; /* ditto, 0 -> 'L', 1 -> 'u'/'U' (C11) */ + union { + uint len; + struct { ushort macidx, argidx; }; + }; + struct span span; + union { + const char *s; + const ushort *ws16; + const uint *ws32; + }; + /* for (multi-)character tokens s & len are unused + * for keywords, s is constant cstring, len = strlen(s) + * for idents, s is interned cstring, len = strlen(s) + * for strlit and chrlit: + * when litlit : s points to start of string within file buffer (after the ") + * len == span.sl.len - 2 (string data appears literally in source code) + * otherwise s is heap allocated buffer of len bytes + * when wide, litlit = 0 and use ws16/ws32 + * for numlit: + * when litlit : s points to start of token within file buffer (normal case) + * len == span.sl.len (number literal appears literally in source code) + * otherwise s is heap allocated buffer of len bytes + * for macro arg/stringify: + * s is like keyword/ident + * argidx is index in macro param list, + * macidx is macro id of which it is a parameter + */ +}; + +extern int nerror; +struct lexer { + struct lexer *save; + short fileid; + const uchar *dat; + uint ndat; + uint idx, chridx; + short chrbuf[1<<10]; + uint chridxbuf[1<<10]; + ushort nchrbuf, chrbuf0; + struct macrostack *macstk; + struct token peektok; + bool eof, err; + struct arena **tmparena; +}; + +enum initlexer { + LXOK, + LXFILESEEN, + LXERR, +}; + +const char *intern(const char *); +int lex(struct lexer *, struct token *); +int lexpeek(struct lexer *, struct token *); +enum typetag parsenumlit(uvlong *, double *, const struct token *, bool ispp); +enum initlexer initlexer(struct lexer *, const char **err, const char *file); +void lexerdump(struct lexer *, struct wbuf *out); +void lexerfreetemps(struct lexer *); + +/* vim:set ts=3 sw=3 expandtab: */ |