From 77b13b42643991fc8c2b8942ca167eb7bf156908 Mon Sep 17 00:00:00 2001 From: lemon Date: Thu, 16 Oct 2025 17:25:02 +0200 Subject: wide str and char literals --- lex.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'lex.h') diff --git a/lex.h b/lex.h index 3de4f6e..80898ce 100644 --- a/lex.h +++ b/lex.h @@ -51,12 +51,17 @@ enum toktag { /* single-character tokens' tag value is the character itself */ struct token { short t; /* toktag */ bool litlit; + uchar wide : 2; /* for CHRLIT & STRLIT; 1 -> 16bit, 2 -> 32bit */ union { uint len; struct { ushort macidx, argidx; }; }; struct span span; - const char *s; + union { + const char *s; + const ushort *ws16; + const uint *ws32; + }; /* for (multi-)character tokens s & len are unused * for keywords, s is constant cstring, len = strlen(s) * for idents, s is interned cstring, len = strlen(s) @@ -64,6 +69,7 @@ struct token { * when litlit : s points to start of string within file buffer (after the ") * len == span.sl.len - 2 (string data appears literally in source code) * otherwise s is heap allocated buffer of len bytes + * when wide, litlit = 0 and use ws16/ws32 * for numlit: * when litlit : s points to start of token within file buffer (normal case) * len == span.sl.len (number literal appears literally in source code) -- cgit v1.2.3