aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorlemon <lsof@mailbox.org>2025-10-17 18:00:52 +0200
committerlemon <lsof@mailbox.org>2025-10-17 18:30:36 +0200
commitffc76d36f985817a86ff73822e0ed268226737dd (patch)
tree45b8c70a8e07e2abb3eef9f4e0a50d672b7e40bb
parented58941c2c5dc62d7c5703d4be205626f9c3389b (diff)
add -E preprocessing option
-rw-r--r--c.c30
-rw-r--r--common.h3
-rw-r--r--io.c61
-rw-r--r--lex.c42
-rw-r--r--lex.h2
-rw-r--r--main.c41
6 files changed, 161 insertions, 18 deletions
diff --git a/c.c b/c.c
index 2d3b3cf..58a3147 100644
--- a/c.c
+++ b/c.c
@@ -4017,20 +4017,34 @@ docomp(struct comp *cm)
}
}
-void
-ccomp(const char *file)
+static void
+initcm(struct comp *cm, const char *file)
{
enum { N = 1<<12 };
static union { char m[sizeof(struct arena) + N]; struct arena *_align; } amem[2];
- struct comp cm = {0};
- const char *err = initlexer(&cm.lx, NULL, file);
+ const char *err = initlexer(&cm->lx, NULL, file);
if (err)
fatal(NULL, "Cannot open %'s: %s", file, err);
- cm.fnarena = (void *)amem[0].m;
- cm.fnarena->cap = N;
- cm.exarena = (void *)amem[1].m;
- cm.exarena->cap = N;
+ cm->fnarena = (void *)amem[0].m;
+ cm->fnarena->cap = N;
+ cm->exarena = (void *)amem[1].m;
+ cm->exarena->cap = N;
+}
+
+void
+ccomp(const char *file)
+{
+ struct comp cm = {0};
+ initcm(&cm, file);
docomp(&cm);
}
+void
+cpp(struct wbuf *out, const char *file)
+{
+ struct comp cm = {0};
+ initcm(&cm, file);
+ lexerdump(&cm.lx, out);
+}
+
/* vim:set ts=3 sw=3 expandtab: */
diff --git a/common.h b/common.h
index f2fff86..c5b7513 100644
--- a/common.h
+++ b/common.h
@@ -534,7 +534,7 @@ struct wbuf {
char *buf;
const uint cap;
uint len;
- const int fd;
+ int fd;
bool err;
};
@@ -578,6 +578,7 @@ void warn(const struct span *, const char *, ...);
void note(const struct span *, const char *, ...);
ushort *utf8to16(uint *ulen, struct arena **, const uchar *s, size_t len);
uint *utf8to32(uint *ulen, struct arena **, const uchar *s, size_t len);
+int utf8enc(char out[4], uint cp);
#endif /* COMMON_H_ */
diff --git a/io.c b/io.c
index f73add8..67b23b6 100644
--- a/io.c
+++ b/io.c
@@ -96,14 +96,14 @@ putquoted(struct wbuf *buf, uchar c, uchar qchar, int next)
case '\v': cseq = 'v'; goto Charseq;
case '\n': cseq = 'n'; goto Charseq;
default:
- if (in_range(next, '0', '7'))
+ if (!next || in_range(next, '0', '7'))
n += bfmt(buf, "%.3o", c);
else
n += bfmt(buf, "%o", c);
}
return n;
}
- if (c == '?' && next == '?') {
+ if (c == '?' && (!next || next == '?')) {
return ioputc(buf, c), ioputc(buf, '\\'), 2;
}
return ioputc(buf, c), 1;
@@ -473,13 +473,39 @@ vbfmt(struct wbuf *out, const char *fmt, va_list ap)
if (quote) n += bputc(buf, '\'');
break;
case TKCHRLIT:
+ if (tok->wide) n += bputc(buf, tok->wideuni ? tok->wide == 1 ? 'u' : 'U' : 'L');
n += bputc(buf, '\'');
- for (int i = 0; i < tok->len; ++i)
- n += putquoted(buf, tok->s[i], '\'', i < tok->len - 1 ? tok->s[i+1] : -1);
+ if (tok->wide == 0)
+ for (int i = 0; i < tok->len; ++i)
+ n += putquoted(buf, tok->s[i], '\'', i < tok->len - 1 ? tok->s[i+1] : -1);
+ else {
+ char p[4];
+ uint c = tok->wide == 1 ? tok->ws16[0] : tok->ws32[0];
+ int l = utf8enc(p, c);
+ if (l == 1)
+ n += putquoted(buf, *p, '\'', -1);
+ else
+ n += (iowrite(buf, p, l), l);
+ }
n += bputc(buf, '\'');
break;
case TKSTRLIT:
- n += bfmt(buf, "%'S", tok->s, tok->len);
+ if (tok->wide == 0)
+ n += bfmt(buf, "%'S", tok->s, tok->len);
+ else {
+ n += bputc(buf, tok->wideuni ? tok->wide == 1 ? 'u' : 'U' : 'L');
+ n += bputc(buf, '\"');
+ for (int i = 0; i < tok->len; ++i) {
+ char p[4];
+ uint c = tok->wide == 1 ? tok->ws16[i] : tok->ws32[i];
+ int l = utf8enc(p, c);
+ if (l == 1)
+ n += putquoted(buf, *p, '\"', 0);
+ else
+ n += (iowrite(buf, p, l), l);
+ }
+ n += bputc(buf, '\"');
+ }
break;
case TKPPMACSTR:
if (quote) n += bputc(buf, '`');
@@ -937,7 +963,7 @@ utf8to32(uint *ulen, struct arena **arena, const uchar *s, size_t len)
if (!len) return NULL;
- for (p = s; p < s + len; ++n) {
+ for (p = end = s; p < s + len; ++n) {
end = p;
if ((*p & 0xF8) == 0xF0) /* 11110xxx */
p += 4;
@@ -978,5 +1004,28 @@ utf8to32(uint *ulen, struct arena **arena, const uchar *s, size_t len)
return ret;
}
+int
+utf8enc(char p[4], uint cp)
+{
+ if ((cp & 0xffffff80) == 0) {
+ p[0] = cp;
+ return 1;
+ } else if ((cp & 0xfffff800) == 0) {
+ p[0] = 0xC0 | (cp >> 6 & 0x1F);
+ p[1] = 0x80 | (cp & 0x3F);
+ return 2;
+ } else if ((cp & 0xffff0000) == 0) {
+ p[0] = 0xE0 | (cp >> 12 & 0x0F);
+ p[1] = 0x80 | (cp >> 6 & 0x3F);
+ p[2] = 0x80 | (cp & 0x3F);
+ return 3;
+ } else {
+ p[0] = 0xF0 | (cp >> 18 & 0x07);
+ p[1] = 0x80 | (cp >> 12 & 0x3F);
+ p[2] = 0x80 | (cp >> 6 & 0x3F);
+ p[3] = 0x80 | (cp & 0x3F);
+ return 4;
+ }
+}
/* vim:set ts=3 sw=3 expandtab: */
diff --git a/lex.c b/lex.c
index e668500..605b3d2 100644
--- a/lex.c
+++ b/lex.c
@@ -524,6 +524,7 @@ Begin:
lexingheadername = 0;
} else {
case '\'':
+ tk->wideuni = 0;
readstrchrlit(lx, tk, c, 0);
}
goto End;
@@ -537,6 +538,7 @@ Begin:
RET(c);
case 'L':
if (match(lx, (q = '\'')) || match(lx, (q = '"'))) {
+ tk->wideuni = 0;
readstrchrlit(lx, tk, q, /* wide */ targ_primsizes[targ_wchartype] == 2 ? 1 : 2);
goto End;
}
@@ -629,7 +631,7 @@ tokequ(const struct token *a, const struct token *b)
static bool /* whitespace separating tokens? */
wsseparated(const struct token *l, const struct token *r)
{
- assert(l->span.sl.file == r->span.sl.file);
+ if (l->span.sl.file != r->span.sl.file) return 1;
return l->span.sl.off + l->span.sl.len != r->span.sl.off;
}
@@ -1893,4 +1895,42 @@ lexerfreetemps(struct lexer *lx)
}
}
+void
+lexerdump(struct lexer *lx, struct wbuf *out)
+{
+ struct token prev = {0}, tok;
+ int file = lx->fileid, line = 1, col = 1;
+ bfmt(out, "# %d %'s\n", 1, getfilename(file));
+ while (lex(lx, &tok) != TKEOF) {
+ int tkline, tkcol;
+ getfilepos(&tkline, &tkcol, tok.span.ex.file, tok.span.ex.off);
+ if (tok.span.ex.file != file) {
+ file = tok.span.ex.file;
+ bfmt(out, "\n# %d %'s\n", tkline, getfilename(file));
+ col = 1;
+ } else if (line < tkline && tkline - line < 5) {
+ do
+ ioputc(out, '\n');
+ while (++line != tkline);
+ col = 1;
+ } else if (line != tkline) {
+ bfmt(out, "\n# %d\n", tkline);
+ line = tkline;
+ col = 1;
+ } else if (prev.t && wsseparated(&prev, &tok)) {
+ ioputc(out, ' ');
+ ++col;
+ }
+ if (col == 1)
+ for (; col < tkcol; ++col)
+ ioputc(out, ' ');
+ line = tkline;
+ bfmt(out, "%tk", &tok);
+ col += tok.span.ex.len;
+ prev = tok;
+ }
+ bfmt(out, "\n");
+ ioflush(out);
+}
+
/* vim:set ts=3 sw=3 expandtab: */
diff --git a/lex.h b/lex.h
index 80898ce..6adbad9 100644
--- a/lex.h
+++ b/lex.h
@@ -52,6 +52,7 @@ struct token {
short t; /* toktag */
bool litlit;
uchar wide : 2; /* for CHRLIT & STRLIT; 1 -> 16bit, 2 -> 32bit */
+ uchar wideuni : 1; /* ditto, 0 -> 'L', 1 -> 'u'/'U' (C11) */
union {
uint len;
struct { ushort macidx, argidx; };
@@ -102,6 +103,7 @@ int lex(struct lexer *, struct token *);
int lexpeek(struct lexer *, struct token *);
enum typetag parsenumlit(uvlong *, double *, const struct token *, bool ispp);
const char *initlexer(struct lexer *, const struct span *span, const char *file);
+void lexerdump(struct lexer *, struct wbuf *out);
void lexerfreetemps(struct lexer *);
/* vim:set ts=3 sw=3 expandtab: */
diff --git a/main.c b/main.c
index 1f75a50..3d7c846 100644
--- a/main.c
+++ b/main.c
@@ -84,7 +84,7 @@ withext(const char *path, const char *ext)
}
static struct task {
- enum outft { OFTexe, OFTdll, OFTobj, OFTasm } outft;
+ enum outft { OFTexe, OFTdll, OFTobj, OFTasm, OFTc } outft;
const char *out;
const char *targ;
const char *inf[64];
@@ -153,6 +153,8 @@ optparse(char **args)
task.verbose = 1;
} else if (!strcmp(arg, "c")) {
task.outft = OFTobj;
+ } else if (!strcmp(arg, "E")) {
+ task.outft = OFTc;
} else Bad: warn(NULL, "invalid option: %'s", arg-1);
}
@@ -164,9 +166,10 @@ optparse(char **args)
case OFTexe: task.out = "a.out"; break;
case OFTasm: task.out = withext(*task.inf, "s"); break;
case OFTobj: task.out = withext(*task.inf, "o"); break;
+ case OFTc: break;
}
}
- if (!in_range(task.outft, OFTexe, OFTdll) && task.ninf > 1)
+ if (!in_range(task.outft, OFTexe, OFTdll) && task.outft != OFTc && task.ninf > 1)
fatal(NULL, "too many input files");
}
@@ -309,15 +312,49 @@ prihelp(void)
);
}
+#include <fcntl.h> /* open */
+
static int
driver(void)
{
+ void cpp(struct wbuf *, const char *);
if (task.verbose)
efmt("# Target: %s\n", task.targ);
if (task.outft == OFTobj) {
assert(task.ninf == 1);
assert(*task.inft == IFTc && "nyi");
return cc1(task.out, *task.inf);
+ } else if (task.outft == OFTc) {
+ static char tmp[1<<12];
+ struct wbuf _buf = FDBUF(tmp, sizeof tmp, 1),
+ *buf = &_buf;
+ bool ok = 1;
+ if (task.out) {
+ buf->fd = open(task.out, O_CREAT | O_TRUNC | O_WRONLY, 0777);
+ if (buf->fd < 0) {
+ error(NULL, "open(%'s): %s", task.out, strerror(errno));
+ return 1;
+ }
+ }
+ for (int i = 0; i < task.ninf; ++i) {
+ pid_t p;
+ int wstat;
+
+ if ((p = fork()) < 0) {
+ error(NULL, "fork(): %s\n", strerror(errno));
+ ok = 0;
+ } else if (p == 0) {
+ cpp(buf, task.inf[i]);
+ exit(0);
+ }
+ waitpid(p, &wstat, 0);
+ if (!WIFEXITED(wstat)) ok = 0;
+ ok = ok && WEXITSTATUS(wstat) == 0;
+ }
+ if (task.out) {
+ close(buf->fd);
+ }
+ return ok ? 0 : 1;
} else if (task.outft == OFTexe || task.outft == OFTdll) {
compileobjs();
if (ccopt.dbg.any) return 0;