From 77b13b42643991fc8c2b8942ca167eb7bf156908 Mon Sep 17 00:00:00 2001
From: lemon <lsof@mailbox.org>
Date: Thu, 16 Oct 2025 17:25:02 +0200
Subject: wide str and char literals

---
 lex.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'lex.h')

diff --git a/lex.h b/lex.h
index 3de4f6e..80898ce 100644
--- a/lex.h
+++ b/lex.h
@@ -51,12 +51,17 @@ enum toktag { /* single-character tokens' tag value is the character itself */
 struct token {
    short t; /* toktag */
    bool litlit;
+   uchar wide : 2; /* for CHRLIT & STRLIT; 1 -> 16bit, 2 -> 32bit */
    union {
       uint len;
       struct { ushort macidx, argidx; };
    };
    struct span span;
-   const char *s; 
+   union {
+      const char *s;
+      const ushort *ws16;
+      const uint   *ws32;
+   };
    /* for (multi-)character tokens s & len are unused
     * for keywords, s is constant cstring, len = strlen(s)
     * for idents, s is interned cstring, len = strlen(s)
@@ -64,6 +69,7 @@ struct token {
     *  when litlit : s points to start of string within file buffer (after the ")
     *                len == span.sl.len - 2 (string data appears literally in source code)
     *  otherwise s is heap allocated buffer of len bytes
+    *  when wide, litlit = 0 and use ws16/ws32
     * for numlit:
     *  when litlit : s points to start of token within file buffer (normal case)
     *                len == span.sl.len (number literal appears literally in source code)
-- 
cgit v1.2.3