aboutsummaryrefslogtreecommitdiffhomepage
path: root/c
diff options
context:
space:
mode:
authorlemon <lsof@mailbox.org>2026-01-09 13:55:44 +0100
committerlemon <lsof@mailbox.org>2026-01-09 13:56:23 +0100
commit9567ba9173ed194504ac0f714c5c7cecc6d0383d (patch)
tree8332c32edbdaea8cc53e2fc6da8ef4efce9dd7e7 /c
parent5d088fa24f5e9739c3cd184f9df840e3486fcd51 (diff)
lexer: multibyte escape seqs in wide character literal
(hacky)
Diffstat (limited to 'c')
-rw-r--r--c/lex.c17
1 files changed, 13 insertions, 4 deletions
diff --git a/c/lex.c b/c/lex.c
index 02e9f3d..8bfee38 100644
--- a/c/lex.c
+++ b/c/lex.c
@@ -247,6 +247,7 @@ readstrchrlit(struct lexer *lx, struct token *tk, char delim, int wide)
beginoff = idx = lx->chridx;
while ((c = next(lx)) != delim) {
+ static uint wmax[] = {0xFF, 0xFFFF, 0xFFFFFFFFu};
if (c == '\n' || c == TKEOF) {
Noterm:
span.sl = (struct span0) { idx, lx->chridx - idx, lx->fileid };
@@ -276,11 +277,10 @@ readstrchrlit(struct lexer *lx, struct token *tk, char delim, int wide)
if (c-'0' < 10) n = n<<4 | (c-'0');
else n = n<<4 | (10 + (c|0x20)-'a');
} while (aisxdigit(peek(lx, 0)));
- if (n > 0xFF) {
+ if (n > wmax[wide]) {
span.sl.len = lx->chridx - span.sl.off;
error(&span, "hex escape sequence out of range");
}
- c = n & 0xFF;
break;
default:
if (aisodigit(c)) { /* octal */
@@ -289,7 +289,7 @@ readstrchrlit(struct lexer *lx, struct token *tk, char delim, int wide)
if (!aisodigit(peek(lx, 0))) break;
n = n<<3 | ((c = next(lx))-'0');
}
- if (n > 0377) {
+ if (n > wmax[wide]) {
span.sl.len = lx->chridx - span.sl.off;
error(&span, "octal escape sequence out of range");
}
@@ -301,7 +301,16 @@ readstrchrlit(struct lexer *lx, struct token *tk, char delim, int wide)
error(&span, "invalid escape sequence");
}
}
- vpush(&b, c);
+ if (!wide || c <= 0xFF) {
+ vpush(&b, c);
+ } else {
+ /* XXX this doesn't work for non-utf sequences, UTF-16 surrogates, etc
+ * the source utf8 -> utf16/32 conversion should be done on the fly, then
+ * these can also be appended directly, rather than doing the conversion at the end */
+ char p[4];
+ int n = utf8enc(p, c);
+ vpushn(&b, p, n);
+ }
idx = lx->chridx;;
}
if (delim == '"') {