1 files changed, 115 insertions, 0 deletions
diff --git a/c/lex.h b/c/lex.h
new file mode 100644
index 0000000..4ea7327
--- /dev/null
+++ b/c/lex.h
@@ -0,0 +1,115 @@
+#include "../common.h"
+
+static inline bool
+joinspan(struct span0 *dst, struct span0 snd)
+{
+   if (dst->file != snd.file) return 0;
+   if (dst->off > snd.off) return 0;
+   dst->len = snd.off + snd.len - dst->off;
+   return 1;
+}
+
+enum toktag { /* single-character tokens' tag value is the character itself */
+   TKEOF = -1,
+   TKXXX,
+   TKNUMLIT,
+   TKCHRLIT,
+   TKSTRLIT,
+   TKPPHDRH, /* <hdr> (for #include) */
+   TKPPHDRQ, /* "hdr" (for #include) */
+   TKPPMACARG, /* macro param, in repl list */
+   TKPPMACSTR, /* stringify macro param, in repl list */
+   TKEQU = '@', /* == */
+   TKNEQ, /* != */
+   TKLTE, /* <= */
+   TKGTE, /* >= */
+   TKSHR, /* >> */
+   TKSHL, /* << */
+   TKINC, /* ++ */
+   TKDEC, /* -- */
+   TKDOTS, /* ... */
+   TKARROW, /* -> */
+   TKPPCAT, /* ## */
+   TKLOGAND, /* && */
+   TKLOGIOR, /* || */
+   TKSETADD, /* += */
+   TKSETSUB, /* -= */
+   TKSETMUL, /* *= */
+   TKSETDIV, /* /= */
+   TKSETREM, /* %= */
+   TKSETIOR, /* |= */
+   TKSETXOR, /* ^= */
+   TKSETAND, /* &= */
+   TKSETSHL, /* <<= */
+   TKSETSHR, /* >>= */
+   TKIDENT = 0x80,
+#define _(kw, stdc) TKW##kw,
+#include "keywords.def"
+#undef _
+};
+
+struct token {
+   short t; /* toktag */
+   bool litlit;
+   uchar wide : 2; /* for CHRLIT & STRLIT; 1 -> 16bit, 2 -> 32bit */
+   uchar wideuni : 1; /* ditto, 0 -> 'L', 1 -> 'u'/'U' (C11) */
+   union {
+      uint len;
+      struct { ushort macidx, argidx; };
+   };
+   struct span span;
+   union {
+      const char *s;
+      const ushort *ws16;
+      const uint   *ws32;
+   };
+   /* for (multi-)character tokens s & len are unused
+    * for keywords, s is constant cstring, len = strlen(s)
+    * for idents, s is interned cstring, len = strlen(s)
+    * for strlit and chrlit:
+    *  when litlit : s points to start of string within file buffer (after the ")
+    *                len == span.sl.len - 2 (string data appears literally in source code)
+    *  otherwise s is heap allocated buffer of len bytes
+    *  when wide, litlit = 0 and use ws16/ws32
+    * for numlit:
+    *  when litlit : s points to start of token within file buffer (normal case)
+    *                len == span.sl.len (number literal appears literally in source code)
+    *  otherwise s is heap allocated buffer of len bytes
+    * for macro arg/stringify:
+    *  s is like keyword/ident
+    *  argidx is index in macro param list,
+    *  macidx is macro id of which it is a parameter
+    */
+};
+
+extern int nerror;
+struct lexer {
+   struct lexer *save;
+   short fileid;
+   const uchar *dat;
+   uint ndat;
+   uint idx, chridx;
+   short chrbuf[1<<10];
+   uint chridxbuf[1<<10];
+   ushort nchrbuf, chrbuf0;
+   struct macrostack *macstk;
+   struct token peektok;
+   bool eof, err;
+   struct arena **tmparena;
+};
+
+enum initlexer {
+   LXOK,
+   LXFILESEEN,
+   LXERR,
+};
+
+const char *intern(const char *);
+int lex(struct lexer *, struct token *);
+int lexpeek(struct lexer *, struct token *);
+enum typetag parsenumlit(uvlong *, double *, const struct token *, bool ispp);
+enum initlexer initlexer(struct lexer *, const char **err, const char *file);
+void lexerdump(struct lexer *, struct wbuf *out);
+void lexerfreetemps(struct lexer *);
+
+/* vim:set ts=3 sw=3 expandtab: */