wide str and char literals

author: lemon <lsof@mailbox.org> 2025-10-16 17:25:02 +0200
committer: lemon <lsof@mailbox.org> 2025-10-16 17:25:02 +0200
commit: 77b13b42643991fc8c2b8942ca167eb7bf156908 (patch)
tree: f65a402832af6111c623af02cf946f7de928e223 /lex.h
parent: c19b3e277399a513c5e3a02d126ba666847566df (diff)
1 files changed, 7 insertions, 1 deletions
diff --git a/lex.h b/lex.h
index 3de4f6e..80898ce 100644
--- a/lex.h
+++ b/lex.h
@@ -51,12 +51,17 @@ enum toktag { /* single-character tokens' tag value is the character itself */
 struct token {
    short t; /* toktag */
    bool litlit;
+   uchar wide : 2; /* for CHRLIT & STRLIT; 1 -> 16bit, 2 -> 32bit */
    union {
       uint len;
       struct { ushort macidx, argidx; };
    };
    struct span span;
-   const char *s; 
+   union {
+      const char *s;
+      const ushort *ws16;
+      const uint   *ws32;
+   };
    /* for (multi-)character tokens s & len are unused
     * for keywords, s is constant cstring, len = strlen(s)
     * for idents, s is interned cstring, len = strlen(s)
@@ -64,6 +69,7 @@ struct token {
     *  when litlit : s points to start of string within file buffer (after the ")
     *                len == span.sl.len - 2 (string data appears literally in source code)
     *  otherwise s is heap allocated buffer of len bytes
+    *  when wide, litlit = 0 and use ws16/ws32
     * for numlit:
     *  when litlit : s points to start of token within file buffer (normal case)
     *                len == span.sl.len (number literal appears literally in source code)
author	lemon <lsof@mailbox.org>	2025-10-16 17:25:02 +0200
committer	lemon <lsof@mailbox.org>	2025-10-16 17:25:02 +0200
commit	77b13b42643991fc8c2b8942ca167eb7bf156908 (patch)
tree	f65a402832af6111c623af02cf946f7de928e223 /lex.h
parent	c19b3e277399a513c5e3a02d126ba666847566df (diff)