aboutsummaryrefslogtreecommitdiffhomepage
path: root/io.c
diff options
context:
space:
mode:
authorlemon <lsof@mailbox.org>2025-10-16 17:25:02 +0200
committerlemon <lsof@mailbox.org>2025-10-16 17:25:02 +0200
commit77b13b42643991fc8c2b8942ca167eb7bf156908 (patch)
treef65a402832af6111c623af02cf946f7de928e223 /io.c
parentc19b3e277399a513c5e3a02d126ba666847566df (diff)
wide str and char literals
Diffstat (limited to 'io.c')
-rw-r--r--io.c60
1 files changed, 60 insertions, 0 deletions
diff --git a/io.c b/io.c
index 2a9e876..f73add8 100644
--- a/io.c
+++ b/io.c
@@ -919,4 +919,64 @@ note(const struct span *span, const char *fmt, ...)
va_end(ap);
}
+/*** UTF util ***/
+
+ushort *
+utf8to16(uint *ulen, struct arena **arena, const uchar *s, size_t len)
+{
+ assert(0 && "nyi");
+}
+
+uint *
+utf8to32(uint *ulen, struct arena **arena, const uchar *s, size_t len)
+{
+ uint *ret, *w;
+ const uchar *p, *end;
+ size_t n = 0;
+ bool istrunc;
+
+ if (!len) return NULL;
+
+ for (p = s; p < s + len; ++n) {
+ end = p;
+ if ((*p & 0xF8) == 0xF0) /* 11110xxx */
+ p += 4;
+ else if ((*p & 0xF0) == 0xE0) /* 1110xxxx */
+ p += 3;
+ else if ((*p & 0xE0) == 0xC0) /* 110xxxxx */
+ p += 2;
+ else p += 1;
+ }
+ istrunc = p > s+len;
+ if (!istrunc) end += 1;
+
+ ret = allocz(arena, n * sizeof *ret, sizeof *ret);
+ for (w = ret, p = s; p < end; ++w) {
+ if ((*p & 0xF8) == 0xF0) { /* 11110xxx */
+ *w = (uint)(p[0] & 0x07) << 18
+ | (uint)(p[1] & 0x3F) << 12
+ | (uint)(p[2] & 0x3F) << 6
+ | (uint)(p[3] & 0x3F);
+ p += 4;
+ } else if ((*p & 0xF0) == 0xE0) { /* 1110xxxx */
+ *w = (uint)(p[0] & 0x07) << 12
+ | (uint)(p[1] & 0x3F) << 6
+ | (uint)(p[2] & 0x3F);
+ p += 3;
+ } else if ((*p & 0xE0) == 0xC0) { /* 110xxxxx */
+ *w = (uint)(p[0] & 0x07) << 6
+ | (uint)(p[1] & 0x3F);
+ p += 2;
+ } else {
+ *w = *p;
+ p += 1;
+ }
+ }
+ if (istrunc) *w++ = 0xFFFD;
+ *ulen = n;
+
+ return ret;
+}
+
+
/* vim:set ts=3 sw=3 expandtab: */