diff options
| author | 2025-10-17 18:00:52 +0200 | |
|---|---|---|
| committer | 2025-10-17 18:30:36 +0200 | |
| commit | ffc76d36f985817a86ff73822e0ed268226737dd (patch) | |
| tree | 45b8c70a8e07e2abb3eef9f4e0a50d672b7e40bb | |
| parent | ed58941c2c5dc62d7c5703d4be205626f9c3389b (diff) | |
add -E preprocessing option
| -rw-r--r-- | c.c | 30 | ||||
| -rw-r--r-- | common.h | 3 | ||||
| -rw-r--r-- | io.c | 61 | ||||
| -rw-r--r-- | lex.c | 42 | ||||
| -rw-r--r-- | lex.h | 2 | ||||
| -rw-r--r-- | main.c | 41 |
6 files changed, 161 insertions, 18 deletions
@@ -4017,20 +4017,34 @@ docomp(struct comp *cm) } } -void -ccomp(const char *file) +static void +initcm(struct comp *cm, const char *file) { enum { N = 1<<12 }; static union { char m[sizeof(struct arena) + N]; struct arena *_align; } amem[2]; - struct comp cm = {0}; - const char *err = initlexer(&cm.lx, NULL, file); + const char *err = initlexer(&cm->lx, NULL, file); if (err) fatal(NULL, "Cannot open %'s: %s", file, err); - cm.fnarena = (void *)amem[0].m; - cm.fnarena->cap = N; - cm.exarena = (void *)amem[1].m; - cm.exarena->cap = N; + cm->fnarena = (void *)amem[0].m; + cm->fnarena->cap = N; + cm->exarena = (void *)amem[1].m; + cm->exarena->cap = N; +} + +void +ccomp(const char *file) +{ + struct comp cm = {0}; + initcm(&cm, file); docomp(&cm); } +void +cpp(struct wbuf *out, const char *file) +{ + struct comp cm = {0}; + initcm(&cm, file); + lexerdump(&cm.lx, out); +} + /* vim:set ts=3 sw=3 expandtab: */ @@ -534,7 +534,7 @@ struct wbuf { char *buf; const uint cap; uint len; - const int fd; + int fd; bool err; }; @@ -578,6 +578,7 @@ void warn(const struct span *, const char *, ...); void note(const struct span *, const char *, ...); ushort *utf8to16(uint *ulen, struct arena **, const uchar *s, size_t len); uint *utf8to32(uint *ulen, struct arena **, const uchar *s, size_t len); +int utf8enc(char out[4], uint cp); #endif /* COMMON_H_ */ @@ -96,14 +96,14 @@ putquoted(struct wbuf *buf, uchar c, uchar qchar, int next) case '\v': cseq = 'v'; goto Charseq; case '\n': cseq = 'n'; goto Charseq; default: - if (in_range(next, '0', '7')) + if (!next || in_range(next, '0', '7')) n += bfmt(buf, "%.3o", c); else n += bfmt(buf, "%o", c); } return n; } - if (c == '?' && next == '?') { + if (c == '?' && (!next || next == '?')) { return ioputc(buf, c), ioputc(buf, '\\'), 2; } return ioputc(buf, c), 1; @@ -473,13 +473,39 @@ vbfmt(struct wbuf *out, const char *fmt, va_list ap) if (quote) n += bputc(buf, '\''); break; case TKCHRLIT: + if (tok->wide) n += bputc(buf, tok->wideuni ? tok->wide == 1 ? 'u' : 'U' : 'L'); n += bputc(buf, '\''); - for (int i = 0; i < tok->len; ++i) - n += putquoted(buf, tok->s[i], '\'', i < tok->len - 1 ? tok->s[i+1] : -1); + if (tok->wide == 0) + for (int i = 0; i < tok->len; ++i) + n += putquoted(buf, tok->s[i], '\'', i < tok->len - 1 ? tok->s[i+1] : -1); + else { + char p[4]; + uint c = tok->wide == 1 ? tok->ws16[0] : tok->ws32[0]; + int l = utf8enc(p, c); + if (l == 1) + n += putquoted(buf, *p, '\'', -1); + else + n += (iowrite(buf, p, l), l); + } n += bputc(buf, '\''); break; case TKSTRLIT: - n += bfmt(buf, "%'S", tok->s, tok->len); + if (tok->wide == 0) + n += bfmt(buf, "%'S", tok->s, tok->len); + else { + n += bputc(buf, tok->wideuni ? tok->wide == 1 ? 'u' : 'U' : 'L'); + n += bputc(buf, '\"'); + for (int i = 0; i < tok->len; ++i) { + char p[4]; + uint c = tok->wide == 1 ? tok->ws16[i] : tok->ws32[i]; + int l = utf8enc(p, c); + if (l == 1) + n += putquoted(buf, *p, '\"', 0); + else + n += (iowrite(buf, p, l), l); + } + n += bputc(buf, '\"'); + } break; case TKPPMACSTR: if (quote) n += bputc(buf, '`'); @@ -937,7 +963,7 @@ utf8to32(uint *ulen, struct arena **arena, const uchar *s, size_t len) if (!len) return NULL; - for (p = s; p < s + len; ++n) { + for (p = end = s; p < s + len; ++n) { end = p; if ((*p & 0xF8) == 0xF0) /* 11110xxx */ p += 4; @@ -978,5 +1004,28 @@ utf8to32(uint *ulen, struct arena **arena, const uchar *s, size_t len) return ret; } +int +utf8enc(char p[4], uint cp) +{ + if ((cp & 0xffffff80) == 0) { + p[0] = cp; + return 1; + } else if ((cp & 0xfffff800) == 0) { + p[0] = 0xC0 | (cp >> 6 & 0x1F); + p[1] = 0x80 | (cp & 0x3F); + return 2; + } else if ((cp & 0xffff0000) == 0) { + p[0] = 0xE0 | (cp >> 12 & 0x0F); + p[1] = 0x80 | (cp >> 6 & 0x3F); + p[2] = 0x80 | (cp & 0x3F); + return 3; + } else { + p[0] = 0xF0 | (cp >> 18 & 0x07); + p[1] = 0x80 | (cp >> 12 & 0x3F); + p[2] = 0x80 | (cp >> 6 & 0x3F); + p[3] = 0x80 | (cp & 0x3F); + return 4; + } +} /* vim:set ts=3 sw=3 expandtab: */ @@ -524,6 +524,7 @@ Begin: lexingheadername = 0; } else { case '\'': + tk->wideuni = 0; readstrchrlit(lx, tk, c, 0); } goto End; @@ -537,6 +538,7 @@ Begin: RET(c); case 'L': if (match(lx, (q = '\'')) || match(lx, (q = '"'))) { + tk->wideuni = 0; readstrchrlit(lx, tk, q, /* wide */ targ_primsizes[targ_wchartype] == 2 ? 1 : 2); goto End; } @@ -629,7 +631,7 @@ tokequ(const struct token *a, const struct token *b) static bool /* whitespace separating tokens? */ wsseparated(const struct token *l, const struct token *r) { - assert(l->span.sl.file == r->span.sl.file); + if (l->span.sl.file != r->span.sl.file) return 1; return l->span.sl.off + l->span.sl.len != r->span.sl.off; } @@ -1893,4 +1895,42 @@ lexerfreetemps(struct lexer *lx) } } +void +lexerdump(struct lexer *lx, struct wbuf *out) +{ + struct token prev = {0}, tok; + int file = lx->fileid, line = 1, col = 1; + bfmt(out, "# %d %'s\n", 1, getfilename(file)); + while (lex(lx, &tok) != TKEOF) { + int tkline, tkcol; + getfilepos(&tkline, &tkcol, tok.span.ex.file, tok.span.ex.off); + if (tok.span.ex.file != file) { + file = tok.span.ex.file; + bfmt(out, "\n# %d %'s\n", tkline, getfilename(file)); + col = 1; + } else if (line < tkline && tkline - line < 5) { + do + ioputc(out, '\n'); + while (++line != tkline); + col = 1; + } else if (line != tkline) { + bfmt(out, "\n# %d\n", tkline); + line = tkline; + col = 1; + } else if (prev.t && wsseparated(&prev, &tok)) { + ioputc(out, ' '); + ++col; + } + if (col == 1) + for (; col < tkcol; ++col) + ioputc(out, ' '); + line = tkline; + bfmt(out, "%tk", &tok); + col += tok.span.ex.len; + prev = tok; + } + bfmt(out, "\n"); + ioflush(out); +} + /* vim:set ts=3 sw=3 expandtab: */ @@ -52,6 +52,7 @@ struct token { short t; /* toktag */ bool litlit; uchar wide : 2; /* for CHRLIT & STRLIT; 1 -> 16bit, 2 -> 32bit */ + uchar wideuni : 1; /* ditto, 0 -> 'L', 1 -> 'u'/'U' (C11) */ union { uint len; struct { ushort macidx, argidx; }; @@ -102,6 +103,7 @@ int lex(struct lexer *, struct token *); int lexpeek(struct lexer *, struct token *); enum typetag parsenumlit(uvlong *, double *, const struct token *, bool ispp); const char *initlexer(struct lexer *, const struct span *span, const char *file); +void lexerdump(struct lexer *, struct wbuf *out); void lexerfreetemps(struct lexer *); /* vim:set ts=3 sw=3 expandtab: */ @@ -84,7 +84,7 @@ withext(const char *path, const char *ext) } static struct task { - enum outft { OFTexe, OFTdll, OFTobj, OFTasm } outft; + enum outft { OFTexe, OFTdll, OFTobj, OFTasm, OFTc } outft; const char *out; const char *targ; const char *inf[64]; @@ -153,6 +153,8 @@ optparse(char **args) task.verbose = 1; } else if (!strcmp(arg, "c")) { task.outft = OFTobj; + } else if (!strcmp(arg, "E")) { + task.outft = OFTc; } else Bad: warn(NULL, "invalid option: %'s", arg-1); } @@ -164,9 +166,10 @@ optparse(char **args) case OFTexe: task.out = "a.out"; break; case OFTasm: task.out = withext(*task.inf, "s"); break; case OFTobj: task.out = withext(*task.inf, "o"); break; + case OFTc: break; } } - if (!in_range(task.outft, OFTexe, OFTdll) && task.ninf > 1) + if (!in_range(task.outft, OFTexe, OFTdll) && task.outft != OFTc && task.ninf > 1) fatal(NULL, "too many input files"); } @@ -309,15 +312,49 @@ prihelp(void) ); } +#include <fcntl.h> /* open */ + static int driver(void) { + void cpp(struct wbuf *, const char *); if (task.verbose) efmt("# Target: %s\n", task.targ); if (task.outft == OFTobj) { assert(task.ninf == 1); assert(*task.inft == IFTc && "nyi"); return cc1(task.out, *task.inf); + } else if (task.outft == OFTc) { + static char tmp[1<<12]; + struct wbuf _buf = FDBUF(tmp, sizeof tmp, 1), + *buf = &_buf; + bool ok = 1; + if (task.out) { + buf->fd = open(task.out, O_CREAT | O_TRUNC | O_WRONLY, 0777); + if (buf->fd < 0) { + error(NULL, "open(%'s): %s", task.out, strerror(errno)); + return 1; + } + } + for (int i = 0; i < task.ninf; ++i) { + pid_t p; + int wstat; + + if ((p = fork()) < 0) { + error(NULL, "fork(): %s\n", strerror(errno)); + ok = 0; + } else if (p == 0) { + cpp(buf, task.inf[i]); + exit(0); + } + waitpid(p, &wstat, 0); + if (!WIFEXITED(wstat)) ok = 0; + ok = ok && WEXITSTATUS(wstat) == 0; + } + if (task.out) { + close(buf->fd); + } + return ok ? 0 : 1; } else if (task.outft == OFTexe || task.outft == OFTdll) { compileobjs(); if (ccopt.dbg.any) return 0; |