REFACTOR: move sources to src/

author: lemon <lsof@mailbox.org> 2026-03-17 13:22:00 +0100
committer: lemon <lsof@mailbox.org> 2026-03-17 13:22:00 +0100
commit: a8d6f8bf30c07edb775e56889f568ca20240bedf (patch)
tree: b5a452b2675b2400f15013617291fe6061180bbf /src/c_lex.h
parent: 24f14b7ad1af08d872971d72ce089a529911f657 (diff)
1 files changed, 126 insertions, 0 deletions
diff --git a/src/c_lex.h b/src/c_lex.h
new file mode 100644
index 0000000..e70bc78
--- /dev/null
+++ b/src/c_lex.h
@@ -0,0 +1,126 @@
+#include "../common.h"
+#include "../type.h"
+
+static inline bool
+joinspan(struct span0 *dst, struct span0 snd)
+{
+   if (dst->file != snd.file) return 0;
+   if (dst->off > snd.off) return 0;
+   dst->len = snd.off + snd.len - dst->off;
+   return 1;
+}
+
+enum toktag { /* single-character tokens' tag value is the character itself */
+   TKEOF = 0xFF,
+   TKXXX = 0,
+   TKNUMLIT,
+   TKCHRLIT,
+   TKSTRLIT,
+   TKPPHDRH, /* <hdr> (for #include) */
+   TKPPHDRQ, /* "hdr" (for #include) */
+   TKPPMACARG, /* macro param, in repl list */
+   TKPPMACSTR, /* stringify macro param, in repl list */
+   TKEQU = '@', /* == */
+   TKNEQ, /* != */
+   TKLTE, /* <= */
+   TKGTE, /* >= */
+   TKSHR, /* >> */
+   TKSHL, /* << */
+   TKINC, /* ++ */
+   TKDEC, /* -- */
+   TKDOTS, /* ... */
+   TKARROW, /* -> */
+   TKPPCAT, /* ## */
+   TKLOGAND, /* && */
+   TKLOGIOR, /* || */
+   TKSETADD, /* += */
+   TKSETSUB, /* -= */
+   TKSETMUL, /* *= */
+   TKSETDIV, /* /= */
+   TKSETREM, /* %= */
+   TKSETIOR, /* |= */
+   TKSETXOR, /* ^= */
+   TKSETAND, /* &= */
+   TKSETSHL, /* <<= */
+   TKSETSHR, /* >>= */
+   TKIDENT = 0x80,
+#define _(kw, stdc, ...) TKW##kw,
+#include "keywords.def"
+#undef _
+   NTOKTAG,
+};
+static_assert(NTOKTAG < 256);
+
+struct token {
+   uchar t; /* toktag */
+   bool litlit : 1,
+        blue : 1, /* preprocessor token painted blue */
+        extwarn : 1; /* warn this keyword token is an extension */
+   uchar wide : 2, /* for CHRLIT & STRLIT; 1 -> 16bit, 2 -> 32bit */
+         wideuni : 1, /* ditto, 0 -> 'L', 1 -> 'u'/'U' (C11) */
+         space : 1; /* preceded by whitespace? */
+   union {
+      uint len;
+      ushort argidx;
+   };
+   struct span span;
+   union {
+      internstr name;
+      const char *s;
+      const ushort *ws16;
+      const uint   *ws32;
+   };
+   /* for (multi-)character tokens s & len are unused
+    * for keywords, s is constant cstring, len = strlen(s)
+    * for idents, s is interned cstring, len = strlen(s)
+    * for strlit and chrlit:
+    *  when litlit : s points to start of string within file buffer (after the ")
+    *                len == span.sl.len - 2 (string data appears literally in source code)
+    *  otherwise s is heap allocated buffer of len bytes
+    *  when wide, litlit = 0 and use ws16/ws32
+    * for numlit:
+    *  when litlit : s points to start of token within file buffer (normal case)
+    *                len == span.sl.len (number literal appears literally in source code)
+    *  otherwise s is heap allocated buffer of len bytes
+    * for macro arg/stringify:
+    *  s is like keyword/ident
+    *  argidx is index in macro param list,
+    *  macidx is macro id of which it is a parameter
+    */
+};
+
+extern int nerror, nwarn;
+struct lexer {
+   struct lexer *save;
+   short fileid;
+   const uchar *dat;
+   uint ndat;
+   uint idx, chridx;
+   ushort chrbuf0;
+   struct macrostack *macstk;
+   struct token peektok;
+   bool eof, err;
+   struct arena **tmparena;
+   bool firstdirective;
+   short nppcnd0;
+   short inclnerror, inclnwarn;
+   internstr inclguard;
+   uchar chrbuf[1<<10];
+   uint chridxbuf[1<<10];
+};
+
+enum initlexer {
+   LXOK,
+   LXFILESEEN,
+   LXFILESKIP,
+   LXERR,
+};
+
+int lex(struct lexer *, struct token *);
+int lexpeek(struct lexer *, struct token *);
+enum typetag parsenumlit(uvlong *, double *, const struct token *, bool ispp);
+enum initlexer initlexer(struct lexer *, const char **err, const char *file);
+void lexerdump(struct lexer *, struct wbuf *out);
+void lexerfreetemps(struct lexer *);
+
+/* vim:set ts=3 sw=3 expandtab: */
author	lemon <lsof@mailbox.org>	2026-03-17 13:22:00 +0100
committer	lemon <lsof@mailbox.org>	2026-03-17 13:22:00 +0100
commit	a8d6f8bf30c07edb775e56889f568ca20240bedf (patch)
tree	b5a452b2675b2400f15013617291fe6061180bbf /src/c_lex.h
parent	24f14b7ad1af08d872971d72ce089a529911f657 (diff)