aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/c_lex.h
diff options
context:
space:
mode:
authorlemon <lsof@mailbox.org>2026-03-17 13:22:00 +0100
committerlemon <lsof@mailbox.org>2026-03-17 13:22:00 +0100
commita8d6f8bf30c07edb775e56889f568ca20240bedf (patch)
treeb5a452b2675b2400f15013617291fe6061180bbf /src/c_lex.h
parent24f14b7ad1af08d872971d72ce089a529911f657 (diff)
REFACTOR: move sources to src/
Diffstat (limited to 'src/c_lex.h')
-rw-r--r--src/c_lex.h126
1 files changed, 126 insertions, 0 deletions
diff --git a/src/c_lex.h b/src/c_lex.h
new file mode 100644
index 0000000..e70bc78
--- /dev/null
+++ b/src/c_lex.h
@@ -0,0 +1,126 @@
+#include "../common.h"
+#include "../type.h"
+
+static inline bool
+joinspan(struct span0 *dst, struct span0 snd)
+{
+ if (dst->file != snd.file) return 0;
+ if (dst->off > snd.off) return 0;
+ dst->len = snd.off + snd.len - dst->off;
+ return 1;
+}
+
+enum toktag { /* single-character tokens' tag value is the character itself */
+ TKEOF = 0xFF,
+ TKXXX = 0,
+ TKNUMLIT,
+ TKCHRLIT,
+ TKSTRLIT,
+ TKPPHDRH, /* <hdr> (for #include) */
+ TKPPHDRQ, /* "hdr" (for #include) */
+ TKPPMACARG, /* macro param, in repl list */
+ TKPPMACSTR, /* stringify macro param, in repl list */
+ TKEQU = '@', /* == */
+ TKNEQ, /* != */
+ TKLTE, /* <= */
+ TKGTE, /* >= */
+ TKSHR, /* >> */
+ TKSHL, /* << */
+ TKINC, /* ++ */
+ TKDEC, /* -- */
+ TKDOTS, /* ... */
+ TKARROW, /* -> */
+ TKPPCAT, /* ## */
+ TKLOGAND, /* && */
+ TKLOGIOR, /* || */
+ TKSETADD, /* += */
+ TKSETSUB, /* -= */
+ TKSETMUL, /* *= */
+ TKSETDIV, /* /= */
+ TKSETREM, /* %= */
+ TKSETIOR, /* |= */
+ TKSETXOR, /* ^= */
+ TKSETAND, /* &= */
+ TKSETSHL, /* <<= */
+ TKSETSHR, /* >>= */
+ TKIDENT = 0x80,
+#define _(kw, stdc, ...) TKW##kw,
+#include "keywords.def"
+#undef _
+ NTOKTAG,
+};
+static_assert(NTOKTAG < 256);
+
+struct token {
+ uchar t; /* toktag */
+ bool litlit : 1,
+ blue : 1, /* preprocessor token painted blue */
+ extwarn : 1; /* warn this keyword token is an extension */
+ uchar wide : 2, /* for CHRLIT & STRLIT; 1 -> 16bit, 2 -> 32bit */
+ wideuni : 1, /* ditto, 0 -> 'L', 1 -> 'u'/'U' (C11) */
+ space : 1; /* preceded by whitespace? */
+ union {
+ uint len;
+ ushort argidx;
+ };
+ struct span span;
+ union {
+ internstr name;
+ const char *s;
+ const ushort *ws16;
+ const uint *ws32;
+ };
+ /* for (multi-)character tokens s & len are unused
+ * for keywords, s is constant cstring, len = strlen(s)
+ * for idents, s is interned cstring, len = strlen(s)
+ * for strlit and chrlit:
+ * when litlit : s points to start of string within file buffer (after the ")
+ * len == span.sl.len - 2 (string data appears literally in source code)
+ * otherwise s is heap allocated buffer of len bytes
+ * when wide, litlit = 0 and use ws16/ws32
+ * for numlit:
+ * when litlit : s points to start of token within file buffer (normal case)
+ * len == span.sl.len (number literal appears literally in source code)
+ * otherwise s is heap allocated buffer of len bytes
+ * for macro arg/stringify:
+ * s is like keyword/ident
+ * argidx is index in macro param list,
+ * macidx is macro id of which it is a parameter
+ */
+};
+
+extern int nerror, nwarn;
+struct lexer {
+ struct lexer *save;
+ short fileid;
+ const uchar *dat;
+ uint ndat;
+ uint idx, chridx;
+ ushort chrbuf0;
+ struct macrostack *macstk;
+ struct token peektok;
+ bool eof, err;
+ struct arena **tmparena;
+ bool firstdirective;
+ short nppcnd0;
+ short inclnerror, inclnwarn;
+ internstr inclguard;
+ uchar chrbuf[1<<10];
+ uint chridxbuf[1<<10];
+};
+
+enum initlexer {
+ LXOK,
+ LXFILESEEN,
+ LXFILESKIP,
+ LXERR,
+};
+
+int lex(struct lexer *, struct token *);
+int lexpeek(struct lexer *, struct token *);
+enum typetag parsenumlit(uvlong *, double *, const struct token *, bool ispp);
+enum initlexer initlexer(struct lexer *, const char **err, const char *file);
+void lexerdump(struct lexer *, struct wbuf *out);
+void lexerfreetemps(struct lexer *);
+
+/* vim:set ts=3 sw=3 expandtab: */