aboutsummaryrefslogtreecommitdiffhomepage
path: root/c/lex.h
diff options
context:
space:
mode:
Diffstat (limited to 'c/lex.h')
-rw-r--r--c/lex.h115
1 files changed, 115 insertions, 0 deletions
diff --git a/c/lex.h b/c/lex.h
new file mode 100644
index 0000000..4ea7327
--- /dev/null
+++ b/c/lex.h
@@ -0,0 +1,115 @@
+#include "../common.h"
+
+static inline bool
+joinspan(struct span0 *dst, struct span0 snd)
+{
+ if (dst->file != snd.file) return 0;
+ if (dst->off > snd.off) return 0;
+ dst->len = snd.off + snd.len - dst->off;
+ return 1;
+}
+
+enum toktag { /* single-character tokens' tag value is the character itself */
+ TKEOF = -1,
+ TKXXX,
+ TKNUMLIT,
+ TKCHRLIT,
+ TKSTRLIT,
+ TKPPHDRH, /* <hdr> (for #include) */
+ TKPPHDRQ, /* "hdr" (for #include) */
+ TKPPMACARG, /* macro param, in repl list */
+ TKPPMACSTR, /* stringify macro param, in repl list */
+ TKEQU = '@', /* == */
+ TKNEQ, /* != */
+ TKLTE, /* <= */
+ TKGTE, /* >= */
+ TKSHR, /* >> */
+ TKSHL, /* << */
+ TKINC, /* ++ */
+ TKDEC, /* -- */
+ TKDOTS, /* ... */
+ TKARROW, /* -> */
+ TKPPCAT, /* ## */
+ TKLOGAND, /* && */
+ TKLOGIOR, /* || */
+ TKSETADD, /* += */
+ TKSETSUB, /* -= */
+ TKSETMUL, /* *= */
+ TKSETDIV, /* /= */
+ TKSETREM, /* %= */
+ TKSETIOR, /* |= */
+ TKSETXOR, /* ^= */
+ TKSETAND, /* &= */
+ TKSETSHL, /* <<= */
+ TKSETSHR, /* >>= */
+ TKIDENT = 0x80,
+#define _(kw, stdc) TKW##kw,
+#include "keywords.def"
+#undef _
+};
+
+struct token {
+ short t; /* toktag */
+ bool litlit;
+ uchar wide : 2; /* for CHRLIT & STRLIT; 1 -> 16bit, 2 -> 32bit */
+ uchar wideuni : 1; /* ditto, 0 -> 'L', 1 -> 'u'/'U' (C11) */
+ union {
+ uint len;
+ struct { ushort macidx, argidx; };
+ };
+ struct span span;
+ union {
+ const char *s;
+ const ushort *ws16;
+ const uint *ws32;
+ };
+ /* for (multi-)character tokens s & len are unused
+ * for keywords, s is constant cstring, len = strlen(s)
+ * for idents, s is interned cstring, len = strlen(s)
+ * for strlit and chrlit:
+ * when litlit : s points to start of string within file buffer (after the ")
+ * len == span.sl.len - 2 (string data appears literally in source code)
+ * otherwise s is heap allocated buffer of len bytes
+ * when wide, litlit = 0 and use ws16/ws32
+ * for numlit:
+ * when litlit : s points to start of token within file buffer (normal case)
+ * len == span.sl.len (number literal appears literally in source code)
+ * otherwise s is heap allocated buffer of len bytes
+ * for macro arg/stringify:
+ * s is like keyword/ident
+ * argidx is index in macro param list,
+ * macidx is macro id of which it is a parameter
+ */
+};
+
+extern int nerror;
+struct lexer {
+ struct lexer *save;
+ short fileid;
+ const uchar *dat;
+ uint ndat;
+ uint idx, chridx;
+ short chrbuf[1<<10];
+ uint chridxbuf[1<<10];
+ ushort nchrbuf, chrbuf0;
+ struct macrostack *macstk;
+ struct token peektok;
+ bool eof, err;
+ struct arena **tmparena;
+};
+
+enum initlexer {
+ LXOK,
+ LXFILESEEN,
+ LXERR,
+};
+
+const char *intern(const char *);
+int lex(struct lexer *, struct token *);
+int lexpeek(struct lexer *, struct token *);
+enum typetag parsenumlit(uvlong *, double *, const struct token *, bool ispp);
+enum initlexer initlexer(struct lexer *, const char **err, const char *file);
+void lexerdump(struct lexer *, struct wbuf *out);
+void lexerfreetemps(struct lexer *);
+
+/* vim:set ts=3 sw=3 expandtab: */