From 9100ed2b5dd01df8e6b766c7bc2a12c0dd44f1ff Mon Sep 17 00:00:00 2001 From: lemon Date: Wed, 10 May 2023 20:38:32 +0200 Subject: initial commit --- .gitignore | 5 + Makefile | 33 + common.h | 359 ++++++++++ eval.c | 225 +++++++ io.c | 836 +++++++++++++++++++++++ ir.c | 221 ++++++ ir.h | 117 ++++ irdump.c | 124 ++++ keywords.def | 70 ++ lex.c | 669 +++++++++++++++++++ main.c | 27 + mem.c | 117 ++++ op.def | 60 ++ parse.c | 2104 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ parse.h | 205 ++++++ targ.c | 41 ++ test.c | 39 ++ type.c | 287 ++++++++ 18 files changed, 5539 insertions(+) create mode 100644 .gitignore create mode 100644 Makefile create mode 100644 common.h create mode 100644 eval.c create mode 100644 io.c create mode 100644 ir.c create mode 100644 ir.h create mode 100644 irdump.c create mode 100644 keywords.def create mode 100644 lex.c create mode 100644 main.c create mode 100644 mem.c create mode 100644 op.def create mode 100644 parse.c create mode 100644 parse.h create mode 100644 targ.c create mode 100644 test.c create mode 100644 type.c diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8b0a3a4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +cchomp +obj/ +compile_commands.json +.cache/ +.gdb_history diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..f9f3e12 --- /dev/null +++ b/Makefile @@ -0,0 +1,33 @@ +SRC=main.c io.c mem.c parse.c lex.c type.c targ.c eval.c ir.c irdump.c +CFLAGS=-Wall -std=c11 -pedantic +OBJ=$(patsubst %.c,obj/%.o,$(SRC)) +OUT=cchomp + +all: CFLAGS += -g -Og +all: $(OUT) + +opt: CFLAGS += -O2 +opt: $(OUT) + +dbg: CFLAGS += -g -fsanitize=address -fsanitize=undefined +dbg: $(OUT) + +$(OUT): $(OBJ) + $(CC) $(CFLAGS) -o $@ $(OBJ) + +obj/%.o: %.c common.h + @mkdir -p obj/ + $(CC) $(CFLAGS) -c -o $@ $< + +obj/main.o: parse.h +obj/parse.o: parse.h ir.h +obj/ir.o: ir.h +obj/irdump.o: ir.h op.def +obj/lex.o: parse.h +obj/eval.o: parse.h +obj/io.o: parse.h keywords.def + +clean: + $(RM) -r obj/ $(OUT) + +.PHONY: clean diff --git a/common.h b/common.h new file mode 100644 index 0000000..ab3c8ff --- /dev/null +++ b/common.h @@ -0,0 +1,359 @@ +#ifndef COMMON_H_ +#define COMMON_H_ + +#include +#include +#include + +#ifdef __clang__ /* stop linter from complaining about "unused" inline functions */ +#pragma GCC diagnostic ignored "-Wunused-function" +#endif + +#define bool _Bool +typedef unsigned char uchar; +typedef signed char schar; +typedef unsigned short ushort; +typedef unsigned long long uvlong; +typedef signed long long vlong; +typedef unsigned uint; + +#define in_range(x, Lo, Hi) ((uint) (x) - (Lo) <= (Hi) - (Lo)) /* lo <= x <= hi; lo > 0, hi > 0 */ +#define alignup(x, A) (((x) + ((A) - 1)) & -(A)) +#define arraylength(a) (sizeof(a) / sizeof 0[a]) +struct bytes { uchar *p; uint n; }; + +struct span { + struct span0 { + uint off; + uint len : 24, + file : 8; + } sl, /* original source location */ + ex; /* the location after #include/macro expansion */ +}; + +void _assertfmt(const char *file, int line, const char *func, const char *expr); +#ifdef __GNUC__ +#define assert(x) (!(x) ? _assertfmt(__FILE__,__LINE__,__func__,#x), __builtin_trap() : (void)0) +#else +#define assert(x) (void)(!(x) ? _assertfmt(__FILE__,__LINE__,__func__,#x), *(volatile int *)0 : 0) +#endif + +static inline uint +hashs(uint h, const char *s) +{ + while (*s) h = (uchar)*s++ + h*65599; + return h; +} +static inline uint +hashb(uint h, const void *d, uint n) +{ + + const uchar *b = d; + while (n--) h = *b++ + h*65599; + return h; +} +static inline uint +ptrhash(const void *p) { + return (uint)(size_t)p * 2654435761; +} +static inline uint +popcnt(uint x) { +#ifdef __GNUC__ + return __builtin_popcount(x); +#else + uint n = 0; + while (x) x >>= 1, ++n; + return n; +#endif +} + +/******************/ +/* COMPILER STATE */ +/******************/ + +enum cstd { + STDC89, + STDC99, + STDC11, + STDC23, +}; +struct option { + enum cstd cstd; + bool trigraph; + bool nocolor; +}; +extern struct option ccopt; + +/*************************/ +/** TYPE REPRESENTATION **/ +/*************************/ + +enum qualifier { + QCONST = 1<<0, + QVOLATILE = 1<<1, + QNORETURN = 1<<2, /* functions */ + QINLINE = 1<<3, /* functions */ +}; + +enum typetag { /* ordering is important here! */ + TYXXX, + TYENUM, + TYBOOL, TYCHAR, TYSCHAR, TYUCHAR, TYSHORT, TYUSHORT, TYINT, TYUINT, TYLONG, TYULONG, TYVLONG, TYUVLONG, + TYFLOAT, TYDOUBLE, TYLDOUBLE, + TYVOID, + TYVALIST, + TYPTR, TYARRAY, TYFUNC, + TYSTRUCT, TYUNION, + TYSIGNEDSET_ = 1<> (t) & 1) +#define isunsignedt(t) ((TYUNSIGNEDSET_ | !targ_charsigned << TYCHAR) >> (t) & 1) +#define isfltt(t) in_range((t), TYFLOAT, TYLDOUBLE) +#define isaritht(t) in_range((t), TYENUM, TYLDOUBLE) +#define isscalart(t) (TYSCALARSET_ >> (t) & 1) +#define isptrcvtt(t) in_range((t), TYPTR, TYFUNC) +#define isaggt(t) in_range((t), TYSTRUCT, TYUNION) +#define isprim(ty) isprimt((ty).t) +#define isint(ty) isintt((ty).t) +#define issigned(ty) issignedt((ty).t) +#define isunsigned(ty) isunsignedt((ty).t) +#define isflt(ty) isfltt((ty).t) +#define isarith(ty) isaritht((ty).t) +#define isscalar(ty) isscalart((ty).t) +#define isptrcvt(ty) isptrcvtt((ty).t) +#define isagg(ty) isaggt((ty).t) +#define mktype(...) ((union type) {{ __VA_ARGS__ }}) + +struct enumvar { + const char *name; + union { vlong i; uvlong u; }; +}; + +struct field { + const char *name; + union type t; + ushort off; + uchar bitsiz, bitoff; +}; + +struct typedata { + uchar t; + ushort id; + union { + union type child; + struct { /* aggregates */ + const uchar *quals; /* packed N x 2bit array (NULL if no member has quals) */ + union { + struct field *fld; + const union type *param; + }; + }; + struct { + uchar backing; + struct enumvar *var; + }; + }; + union { + uint arrlen; /* array */ + struct { + short nmemb; + uchar align; + union { + struct { /* function */ + bool kandr : 1, variadic : 1; + }; + struct { /* aggregate */ + bool anyconst : 1, flexi : 1; + }; + }; + }; + }; + union { + uint siz; /* aggregate & array */ + union type ret; /* function */ + }; +}; + +extern struct typedata typedata[]; +extern const char *ttypenames[/*id*/]; + +#define tdqualsiz(nmemb) ((nmemb)/4 + ((nmemb)%4 != 0)) +static inline int +tdgetqual(const uchar *pqual, int idx) +{ + return pqual ? pqual[idx/4] >> 2*(idx%4) & 3 : 0; +} +static inline void +tdsetqual(uchar *pqual, int idx, int qual) +{ + assert(pqual); + pqual[idx/4] &= ~(3 << (2*(idx%4))); + pqual[idx/4] |= (qual&3) << (2*(idx%4)); +} + +bool isincomplete(union type); +uint typesize(union type); +uint typealign(union type); +union type mkptrtype(union type, int qual); +union type mkarrtype(union type t, int qual, uint n); +union type mkfntype(union type ret, uint n, const union type *, const uchar *qual, bool kandr, bool variadic); +union type mktagtype(const char *name, struct typedata *td); +union type completetype(const char *name, int id, struct typedata *td); +union type typedecay(union type); +bool assigncompat(union type dst, union type src); +enum typetag intpromote(enum typetag); +union type cvtarith(union type a, union type b); +static inline union type +typechild(union type t) +{ + if (t.t == TYENUM) return mktype(t.backing); + if (t.flag & TFCHLDPRIM) return mktype(t.child); + if (t.flag & TFCHLDISDAT) { + union type chld = mktype(typedata[t.dat].t, .dat = t.dat); + if (chld.t == TYENUM) chld.backing = typedata[t.dat].backing; + return chld; + } + return typedata[t.dat].child; +} +static inline uint +typearrlen(union type t) +{ + return (t.flag & TFCHLDPRIM) ? t.arrlen : typedata[t.dat].arrlen; +} + +/**********/ +/* TARGET */ +/**********/ + +extern uchar targ_primsizes[]; +extern uchar targ_primalign[]; +extern enum typetag targ_sizetype; +extern bool targ_charsigned, targ_bigendian; +void targ_init(const char *targ); + +/*********/ +/** MEM **/ +/*********/ + +struct arena { + uint cap : 31, + dyn : 1; + uint n; + struct arena *prev; + uchar mem[]; +}; + +#define vec_of(T) struct { T *p; int _cap; uint n; } + +struct arena *newarena(uint chunksiz); +void *alloc(struct arena **, uint siz, uint align); +void freearena(struct arena *); +void vinit_(void **p, int *pcap, void *inlbuf, int cap, uint siz); +void vpush_(void **p, int *pcap, uint *pn, uint siz); +void *vpushn_(void **p, int *pcap, uint *pn, uint siz, const void *dat, uint ndat); +extern void free(void *); +#define VINIT(inlbuf, Cap) { (inlbuf), (Cap) } +#define vfree(v) ((v)->_cap < 0 ? free((v)->p) : (void)0, memset((v), 0, sizeof*(v))) +#define vinit(v, inlbuf, Cap) (vfree(v), vinit_((void **)&(v)->p, &(v)->_cap, inlbuf, (Cap), sizeof *(v)->p)) +#define vpush(v, x) (vpush_((void **)&(v)->p, &(v)->_cap, &(v)->n, sizeof *(v)->p), \ + (v)->p[(v)->n++] = (x)) +#define vpushn(v, xs, N) vpushn_((void **)&(v)->p, &(v)->_cap, &(v)->n, sizeof *(v)->p, xs, N) + +struct bitset { uvlong u; }; +enum { BSNBIT = 8 * sizeof(uvlong) }; +#define BSSIZE(nbit) ((nbit) / BSNBIT + ((nbit) % BSNBIT != 0)) + +static inline bool +bstest(struct bitset *bs, uint i) +{ + return bs[i / BSNBIT].u >> i % BSNBIT & 1; +} + +static inline void +bszero(struct bitset *bs, uint siz) +{ + memset(bs, 0, siz * sizeof *bs); +} + +static inline bool +bsiter(uint *i, struct bitset *bs, uint siz) +{ + for (; *i < siz*BSNBIT; ++*i) + if (bstest(bs, *i)) return 1; + return 0; +} + +/********/ +/** IO **/ +/********/ + +struct wbuf { + char *buf; + const uint cap; + uint len; + const int fd; + bool err; +}; + +/* read-only file mapping that is at least 1 page larger than the real file + * so it's legal to read a few bytes beyond it to avoid some bounds checks */ +struct memfile { + const uchar *p; + uint n; +}; + +#define MEMBUF(buf, cap) { (buf), (cap), .fd = -1 } +#define FDBUF(buf, cap, fd_) { (buf), (cap), .fd = (fd_) } +extern struct wbuf bstdout, bstderr; +void iowrite(struct wbuf *, const void *src, int n); +void ioputc(struct wbuf *, uchar); +void ioflush(struct wbuf *); +int vbfmt(struct wbuf *, const char *, va_list ap); +int bfmt(struct wbuf *, const char *, ...); +#define pfmt(...) bfmt(&bstdout, __VA_ARGS__) +#define efmt(...) bfmt(&bstderr, __VA_ARGS__) +struct memfile mapopen(const char **err, const char *path); +void mapclose(struct memfile *); +int openfile(const char **err, struct memfile **, const char *path); +const char *getfilename(int id); +struct memfile *getfile(int id); +void addfileline(int id, uint off); +void getfilepos(int *line, int *col, int id, uint off); +void closefile(int id); +void fatal(const struct span *, const char *, ...); +void error(const struct span *, const char *, ...); +void warn(const struct span *, const char *, ...); +void note(const struct span *, const char *, ...); + +#endif /* COMMON_H_ */ + +/* vim:set ts=3 sw=3 expandtab: */ diff --git a/eval.c b/eval.c new file mode 100644 index 0000000..ecfc9ed --- /dev/null +++ b/eval.c @@ -0,0 +1,225 @@ +#include "common.h" +#include "parse.h" + +static int +targ2hosttype(enum typetag t) +{ + if (isintt(t)) { + int siz = targ_primsizes[t]; + int sgn = issignedt(t); +#define U(Ty,Tag) if (!sgn & (siz == sizeof(unsigned Ty))) return Tag; +#define S(Ty,Tag) if ( sgn & (siz == sizeof(signed Ty))) return Tag; + U(char, TYUCHAR) + S(char, TYSCHAR) + U(short, TYUSHORT) + S(short, TYSHORT) + U(int, TYUINT) + S(int, TYINT) + U(long long, TYUVLONG) + S(long long, TYVLONG) +#undef U +#undef S + } else if (isfltt(t)) return t; + return 0; +} + +static bool +numcast(enum typetag t, struct expr *dst, const struct expr *src) +{ + struct expr tmp; + enum typetag td = targ2hosttype(t); + enum typetag ts = targ2hosttype(src->ty.t == TYENUM ? src->ty.backing : src->ty.t); + if (src == dst) tmp = *src, src = &tmp; + + assert(src->t == ENUMLIT); +#define TT(d,s) (td == d && ts == s) +#define TF(d) (td == d && isfltt(ts)) + if (!ts || !td) return 0; + else if (TT(TYFLOAT, TYFLOAT)) dst->f = (float) src->f; + else if (TT(TYFLOAT, TYDOUBLE)) dst->f = (float) src->f; + else if (TT(TYDOUBLE, TYFLOAT)) dst->f = src->f; + else if (TT(TYDOUBLE, TYDOUBLE)) dst->f = src->f; + else if (TT(TYFLOAT, TYUVLONG)) dst->f = (float) src->u; + else if (TT(TYDOUBLE, TYUVLONG)) dst->f = (double) src->u; + else if (td == TYFLOAT) dst->f = (float) src->i; + else if (td == TYDOUBLE) dst->f = (double) src->i; + else if (TF(TYUVLONG)) dst->u = src->f; + else if (TF(TYBOOL)) dst->i = (bool) src->f; + else if (isfltt(ts)) { dst->i = src->f; goto Narrow; } + else { + Narrow: + switch (td) { +#define I(Ty, Tag) case Tag: dst->i = (Ty) src->i; break; + I(bool, TYBOOL) + I(signed char, TYSCHAR) + I(unsigned char, TYUCHAR) + I(signed short, TYSHORT) + I(unsigned short, TYUSHORT) + I(signed int, TYINT) + I(unsigned int, TYUINT) + I(signed long long, TYVLONG) + I(unsigned long long, TYUVLONG) +#undef I + case TYFLOAT: dst->f = (float) src->f; break; + case TYDOUBLE: dst->f = src->f; break; + default: assert(0 && "bad cast?"); + } + } +#undef TT +#undef TF + + dst->t = ENUMLIT; + dst->ty = mktype(t); + return 1; +} + +static bool +unop(struct expr *ex, enum evalmode mode) +{ + struct expr *sub = ex->sub; + + if (sub->t != ENUMLIT && !eval(sub, mode)) return 0; + switch (ex->t) { + case ECAST: + case EPLUS: + break; + case ENEG: + if (isint(sub->ty)) sub->u = -sub->u; + else assert(isflt(sub->ty)), sub->f = -sub->f; + break; + case ECOMPL: + if (!isint(sub->ty)) return 0; + sub->u = ~sub->u; + break; + case ELOGNOT: + if (isint(sub->ty)) sub->u = !sub->u; + else assert(isflt(sub->ty)), sub->u = !sub->f; + break; + default: + return 0; + } + if (!numcast(ex->ty.t, ex, sub)) return 0; + return 1; +} + +static bool +binop(struct expr *ex, enum evalmode mode) +{ + union type oty; + bool flt; + bool sgn; + int t; + struct expr *lhs = &ex->sub[0], *rhs = &ex->sub[1]; + + if (!eval(lhs, mode)) return 0; + if (!eval(rhs, mode)) return 0; + if (in_range(ex->t, EADD, ESHR)) + oty = ex->ty; + else + oty = cvtarith(lhs->ty, rhs->ty); + if (!numcast(oty.t, lhs, lhs)) return 0; + if (!numcast(oty.t, rhs, rhs)) return 0; + flt = isflt(oty); + sgn = issigned(oty); + switch (ex->t) { +#define ef else if + case EADD: if (flt) lhs->f += rhs->f; + else lhs->u += rhs->u; + break; + case ESUB: if (flt) lhs->f -= rhs->f; + else lhs->u -= rhs->u; + break; + case EMUL: if (flt) lhs->f *= rhs->f; + ef (sgn) lhs->i *= rhs->i; + else lhs->u *= rhs->u; + break; + case EDIV: if (!flt && !rhs->i) return 0; + ef (flt) lhs->f /= rhs->f; + ef (sgn) lhs->i /= rhs->i; + else lhs->u /= rhs->u; + break; + case EREM: if (!rhs->i) return 0; + ef (sgn) lhs->i %= rhs->i; + else lhs->u %= rhs->u; + break; + case EBAND: lhs->u &= rhs->u; + break; + case EBIOR: lhs->u |= rhs->u; + break; + case EXOR: lhs->u ^= rhs->u; + break; + case ESHL: if (sgn && lhs->i < 0) return 0; + ef (rhs->i >= 8*targ_primsizes[oty.t]) return 0; + lhs->u <<= rhs->u; + break; + case ESHR: if (rhs->i >= 8*targ_primsizes[oty.t]) return 0; + ef (sgn) lhs->i >>= rhs->i; + else lhs->u >>= rhs->u; + break; + case ELOGAND: if (flt) t = lhs->f && rhs->f; + else t = lhs->u && rhs->u; + lhs->u = t; + break; + case ELOGIOR: if (flt) t = lhs->f || rhs->f; + else t = lhs->u || rhs->u; + lhs->u = t; + break; + case EEQU: if (flt) t = lhs->f == rhs->f; + else t = lhs->u == rhs->u; + lhs->u = t; + break; + case ENEQ: if (flt) t = lhs->f != rhs->f; + else t = lhs->u != rhs->u; + lhs->u = t; + break; + case ELTH: if (flt) t = lhs->f < rhs->f; + ef (sgn) t = lhs->i < rhs->i; + else t = lhs->u < rhs->u; + lhs->u = t; + break; + case EGTH: if (flt) t = lhs->f > rhs->f; + ef (sgn) t = lhs->i > rhs->i; + else t = lhs->u > rhs->u; + lhs->u = t; + break; + case ELTE: if (flt) t = lhs->f <= rhs->f; + ef (sgn) t = lhs->i <= rhs->i; + else t = lhs->u <= rhs->u; + lhs->u = t; + break; + case EGTE: if (flt) t = lhs->f >= rhs->f; + ef (sgn) t = lhs->i >= rhs->i; + else t = lhs->u >= rhs->u; + lhs->u = t; + break; + default: return 0; +#undef ef + } + return numcast(ex->ty.t, ex, lhs); +} + +bool +eval(struct expr *ex, enum evalmode mode) +{ + if (ex->t == ENUMLIT) { + if (mode <= EVINTCONST) return isint(ex->ty); + return 1; + } + if (isunop(ex->t)) return unop(ex, mode) && eval(ex, mode); + if (isbinop(ex->t)) return binop(ex, mode) && eval(ex, mode); + if (ex->t == ESEQ) { + if (!eval(&ex->sub[0], mode)) return 0; + *ex = ex->sub[1]; + return eval(ex, mode); + } + if (ex->t == ECOND) { + if (!eval(&ex->sub[0], mode)) return 0; + if (!eval(&ex->sub[1], mode)) return 0; + if (!eval(&ex->sub[2], mode)) return 0; + *ex = ex->sub[!ex->sub[0].u + 1]; + return eval(ex, mode); + } + return 0; +} + +/* vim:set ts=3 sw=3 expandtab: */ diff --git a/io.c b/io.c new file mode 100644 index 0000000..83b15b8 --- /dev/null +++ b/io.c @@ -0,0 +1,836 @@ +#include "parse.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static char stdoutbuf[1024], stderrbuf[1024]; +struct wbuf bstdout = FDBUF(stdoutbuf, sizeof stdoutbuf, 1), + bstderr = FDBUF(stderrbuf, sizeof stderrbuf, 2); + +void +iowrite(struct wbuf *buf, const void *Src, int n) +{ + const uchar *src = Src; + + while (n > 0) { + int avail = buf->cap - buf->len; + int amt = avail < n ? avail : n; + + memcpy(buf->buf + buf->len, src, amt); + n -= amt; + if ((buf->len += amt) == buf->cap) { + if (buf->fd < 0) { + buf->err = 1; + return; + } + ioflush(buf); + } + } +} + +void +ioflush(struct wbuf *buf) +{ + int i, ret; + + buf->err = 0; + if (buf->fd < 0) { + buf->len = 0; + return; + } + for (i = 0; buf->len > 0;) { + ret = write(buf->fd, buf->buf + i, buf->len); + if (ret > 0) { + buf->len -= ret; + i += ret; + } else if (errno == EAGAIN || errno == EWOULDBLOCK) { + continue; + } else { + buf->err = 1; + break; + } + } +} + +void +ioputc(struct wbuf *buf, uchar c) +{ + if (buf->len == buf->cap) { + if (buf->fd < 0) { + buf->err = 1; + return; + } + ioflush(buf); + } + buf->buf[buf->len++] = c; +} + +static int +putquoted(struct wbuf *buf, uchar c, uchar qchar, int next) +{ + if (c == qchar || c == '\\' || !aisprint(c)) { + int n = (ioputc(buf, '\\'), 1); + uchar cseq; + + switch (c) { + case '\\': + case '\'': + case '"': + cseq = c; + Charseq: + n += (ioputc(buf, cseq), 1); + break; + case '\a': cseq = 'a'; goto Charseq; + case '\b': cseq = 'b'; goto Charseq; + case '\f': cseq = 'f'; goto Charseq; + case '\r': cseq = 'r'; goto Charseq; + case '\t': cseq = 't'; goto Charseq; + case '\v': cseq = 'v'; goto Charseq; + case '\n': cseq = 'n'; goto Charseq; + default: + if (in_range(next, '0', '7')) + n += bfmt(buf, "%.3o", c); + else + n += bfmt(buf, "%o", c); + } + return n; + } + if (c == '?' && next == '?') { + return ioputc(buf, c), ioputc(buf, '\\'), 2; + } + return ioputc(buf, c), 1; +} + +static int +putuint(struct wbuf *buf, uvlong x, int base, bool lower) +{ + uchar tmp[64]; + uchar *end = tmp + sizeof(tmp); + uchar *s = end; + switch (base) { + case 2: + do *--s = '0' + x%2; while (x >>= 1); + break; + case 8: + do *--s = '0' + x%8; while (x >>= 3); + break; + case 10: + do *--s = '0' + x%10; while (x /= 10); + break; + case 16: + do *--s = "0123456789ABCDEF"[x&15] | (-lower & 0x20); while (x >>= 4); + break; + default: + assert(0&&"base"); + } + iowrite(buf, s, end - s); + return end - s; +} + +static void +fmterr(const char *fmt, ...) +{ + va_list ap; + + efmt("fmt Error: "); + va_start(ap, fmt); + vbfmt(&bstderr, fmt, ap); + va_end(ap); + ioputc(&bstderr, '\n'); + ioflush(&bstderr); + ioflush(&bstdout); + abort(); +} + +#define bwriteS(buf, S) (iowrite(buf, S, sizeof S - 1), sizeof S - 1) +#define bputc(B, C) (ioputc(B, C), 1) + +static int +priquals(struct wbuf *buf, int q) +{ + const char s[] = " const volatile", *p = s; + int m = sizeof s - 1; + if (!q) return 0; + else if (q == QCONST) m -= 9; + else if (q == QVOLATILE) p += 6, m -= 6; + else assert(q == (QCONST | QVOLATILE)); + iowrite(buf, p, m); + return m; +} +static int +pritypebefore(struct wbuf *buf, union type ty, int qual) +{ + const char *s, *s2; + union type chld; + int n; + switch (ty.t) { + case TYVOID: s = "void"; Prim: n = bfmt(buf, "%s", s); return n + priquals(buf, qual); + case TYCHAR: s = "char"; goto Prim; + case TYSCHAR: s = "signed char"; goto Prim; + case TYUCHAR: s = "unsigned char"; goto Prim; + case TYSHORT: s = "short"; goto Prim; + case TYUSHORT: s = "unsigned short"; goto Prim; + case TYINT: s = "int"; goto Prim; + case TYUINT: s = "unsigned int"; goto Prim; + case TYLONG: s = "long"; goto Prim; + case TYULONG: s = "unsigned long"; goto Prim; + case TYVLONG: s = "long long"; goto Prim; + case TYUVLONG: s = "unsigned long long"; goto Prim; + case TYFLOAT: s = "float"; goto Prim; + case TYDOUBLE: s = "double"; goto Prim; + case TYLDOUBLE:s = "long double"; goto Prim; + case TYPTR: + chld = typechild(ty); + n = pritypebefore(buf, chld, ty.flag & TFCHLDQUAL); + //if (chld.t != TYPTR) n += ioputc(buf, ' '); + if (chld.t == TYARRAY || chld.t == TYFUNC) + n += bputc(buf, '('); + n += bputc(buf, '*'); + n += priquals(buf, qual); + return n; + case TYARRAY: + return pritypebefore(buf, typechild(ty), ty.flag & TFCHLDQUAL); + case TYFUNC: + return pritypebefore(buf, typedata[ty.dat].ret, 0); + case TYSTRUCT: + s = "struct"; + Tagged: + n = bfmt(buf, "%s %s", s, (s2 = ttypenames[typedata[ty.dat].id]) ? s2 : "(anonymous)"); + return n + priquals(buf, qual); + case TYUNION: + s = "union"; + goto Tagged; + case TYENUM: + s = "enum"; + goto Tagged; + default: + return bfmt(buf, "?\?%d?",ty.t); + } +} + +static int +pritypeafter(struct wbuf *buf, union type ty, int qual) +{ + const struct typedata *td; + int n = 0; + switch (ty.t) { + case TYPTR: + if (typechild(ty).t == TYARRAY || typechild(ty).t == TYFUNC) + n += bputc(buf, ')'); + n += pritypeafter(buf, typechild(ty), ty.flag & TFCHLDQUAL); + break; + case TYARRAY: + n += bputc(buf, '['); + if (typearrlen(ty)) + n += bfmt(buf, "%u", typearrlen(ty)); + n += bputc(buf, ']'); + n += pritypeafter(buf, typechild(ty), ty.flag & TFCHLDQUAL); + break; + case TYFUNC: + td = &typedata[ty.dat]; + n += bputc(buf, '('); + for (int i = 0; i < td->nmemb; ++i) { + n += bfmt(buf, "%tq", td->param[i], tdgetqual(td->quals, i)); + if (i < td->nmemb - 1 || td->variadic) + n += bwriteS(buf, ", "); + } + if (td->variadic) n += bwriteS(buf, "..."); + else if (td->nmemb == 0 && !td->kandr) n += bwriteS(buf, "void"); + n += bwriteS(buf, ")"); + n += pritypeafter(buf, td->ret, 0); + break; + } + return n; +} + +static int +fmttype(struct wbuf *buf, union type ty, int qual) +{ + int n = pritypebefore(buf, ty, qual); + n += pritypeafter(buf, ty, qual); + return n; +} + +static int +putdouble(struct wbuf *buf, double x, vlong prec) +{ + int n = 0; + if (x < 0) { + n += bputc(buf, '-'); + x = -x; + } + + x += 0.5 / prec; // round last decimal + if (x >= (double)(-1ULL>>1)) { // out of range? + n += bwriteS(buf, "inf"); + } else { + vlong integral = x; + vlong fractional = (x - integral)*prec; + n += putuint(buf, integral, 10, 0); + n += bputc(buf, '.'); + for (vlong i = prec/10; i > 1; i /= 10) { + if (i > fractional) { + n += bputc(buf, '0'); + } + } + n += putuint(buf, fractional, 10, 0); + } + return n; +} + +int +vbfmt(struct wbuf *out, const char *fmt, va_list ap) +{ + bool quote, unsgnd, numlong, lower; + int base; + vlong i; + int pad, prec, q; + const char *s; + void *p; + struct token *tok; + union type ty; + double f; + char tmpbuf1[70], tmpbuf2[70]; + struct wbuf tmp1 = MEMBUF(tmpbuf1, sizeof tmpbuf1); + struct wbuf tmp2 = MEMBUF(tmpbuf2, sizeof tmpbuf2); + struct wbuf *buf = out; + int n = 0, prevn; + + while (*fmt) { + buf = out; + if (*fmt++ != '%') { + n += bputc(buf, fmt[-1]); + continue; + } + if (*fmt == '%') { + n += bputc(buf, *fmt++); + continue; + } + fmt += quote = *fmt == '\''; + pad = 0; + if (aisdigit(*fmt)) { /* left pad */ + for (; aisdigit(*fmt); ++fmt) + pad = pad*10 + *fmt-'0'; + if (pad) { + tmp1.len = 0; + buf = &tmp1; + } + } else if (*fmt == '-') { /* right pad */ + if (!aisdigit(*++fmt)) + fmterr("padding amount expected"); + for (; aisdigit(*fmt); ++fmt) + pad = pad*10 - (*fmt-'0'); + } + prec = -1; + if (*fmt == '.') { + if (!aisdigit(*++fmt)) + fmterr("precision expected"); + prec = 0; + for (; aisdigit(*fmt); ++fmt) + prec = prec*10 + *fmt-'0'; + } + fmt += unsgnd = *fmt == 'u'; + fmt += numlong = *fmt == 'l'; + lower = 0; + prevn = n; + switch (*fmt++) { + case 'c': /* character */ + if (quote) { + n += bputc(buf, '\''); + n += putquoted(buf, va_arg(ap, int), '\'', -1); + n += bputc(buf, '\''); + } else { + n += bputc(buf, va_arg(ap, int)); + } + break; + case 's': /* nullterminated string */ + s = va_arg(ap, const char *); + assert(s && "%s null!"); + if (quote) { + n += bputc(buf, '"'); + for (; *s; ++s) n += putquoted(buf, *s, '"', s[1]); + n += bputc(buf, '"'); + } else { + while (*s) n += bputc(buf, *s++); + } + break; + case 'S': /* string ptr + len */ + s = va_arg(ap, const char *); + i = va_arg(ap, uint); + if (quote) { + n += bputc(buf, '"'); + for (; i--; ++s) n += putquoted(buf, *s, '"', i ? s[1] : -1); + n += bputc(buf, '"'); + } else { + iowrite(buf, s, i); + n += i; + } + break; + case 'd': /* decimal */ + base = 10; + Int: + if (base != 10) unsgnd = 1; + i = numlong ? va_arg(ap, vlong) : (unsgnd ? va_arg(ap, uint) : (vlong)va_arg(ap, int)); + tmp2.len = 0; + if (quote) { + switch (base) { + case 2: n += bwriteS(buf, "0b"); break; + case 8: n += bwriteS(buf, "0"); break; + case 16: n += bwriteS(buf, "0x"); break; + } + } + if (!unsgnd && i < 0) { + n += bputc(buf, '-'); + i = -i; + } + n += putuint(prec > 0 ? &tmp2 : buf, i, base, lower); + if (prec > 0) { + int fil = prec - tmp2.len; + while (fil-- > 0) n += bputc(buf, '0'); + iowrite(buf, tmp2.buf, tmp2.len); + } + break; + case 'o': /* octal */ + base = 8; + goto Int; + case 'b': /* binary */ + base = 2; + goto Int; + case 'x': case 'X': /* hexadecimal */ + base = 16; + lower = fmt[-1] == 'x'; + goto Int; + case 'p': /* pointer */ + p = va_arg(ap, void *); + if (!p && quote) { + n += bwriteS(buf, "NULL"); + } else { + n += bwriteS(buf, "0x"); + tmp2.len = 0; + n += putuint(prec > 0 ? &tmp2 : buf, (uvlong)p, 16, 1); + if (prec > 0) { + int fil = prec - tmp2.len; + while (fil-- > 0) n += bputc(buf, '0'); + iowrite(buf, tmp2.buf, tmp2.len); + } + } + break; + case 'f': /* float */ + f = va_arg(ap, double); + n += putdouble(buf, f, prec <= 0 ? 10e6 : prec); + break; + case 't': /* token/tokentag/type */ + switch (*fmt++) { + case 'k': /* tk token */ + tok = va_arg(ap, struct token *); + Tok: + switch (tok->t) { + case TKXXX: + n += bwriteS(buf, "???"); + break; + case TKNUMLIT: + s = (const char *)(getfile(tok->span.sl.file)->p + tok->span.sl.off); + if (quote) n += bputc(buf, '`'); + for (i = tok->span.sl.len; i--; ++s) + if (*s != '\\' && *s != '\n') n += bputc(buf, *s); + if (quote) n += bputc(buf, '\''); + break; + case TKSTRLIT: + n += bfmt(buf, "%'S", tok->s.p, tok->s.n-1); + break; + case TKIDENT: + n += bfmt(buf, "`%s'", tok->ident); + break; + case TKEOF: + n += bwriteS(buf, ""); + break; + default: + if (quote) n += bputc(buf, '`'); + if (in_range(tok->t, TKWBEGIN_, TKWEND_)) { + n += bfmt(buf, "%s", tok->ident); + } else if (aisprint(tok->t)) { + n += bputc(buf, tok->t); + } else if (tok->t >> 16) { + n += bputc(buf, tok->t >> 0); + n += bputc(buf, tok->t >> 8); + n += bputc(buf, tok->t >> 16); + } else { + n += bputc(buf, tok->t >> 0); + n += bputc(buf, tok->t >> 8); + } + if (quote) n += bputc(buf, '\''); + break; + } + break; + case 't': /* tt token tag */ + tok = &(struct token) { va_arg(ap, int) }; + switch (tok->t) { + case TKNUMLIT: + n += bfmt(buf, "numeric literal"); + break; + case TKSTRLIT: + n += bfmt(buf, "string literal"); + break; + case TKIDENT: + n += bfmt(buf, "identifier"); + break; + case TKEOF: + n += bfmt(buf, ""); + break; + default: + if (tok->t >= TKWBEGIN_ && tok->t <= TKWEND_) { + static const char *tab[] = { + #define _(kw, c) #kw, + #include "keywords.def" + #undef _ + }; + n += bfmt(buf, "%s", tab[tok->t - TKWBEGIN_]); + break; + } + goto Tok; + } + break; + case 'y': /* ty type */ + ty = va_arg(ap, union type); + n += fmttype(buf, ty, 0); + break; + case 'q': /* tq qualified type */ + ty = va_arg(ap, union type); + q = va_arg(ap, int); + n += fmttype(buf, ty, q); + break; + default: + if (fmt[-1] == ' ' || !aisprint(fmt[-1])) + fmterr("expected format specifier"); + else + fmterr("unknown format specifier 't%c'", fmt[-1]); + } + break; + case 'g': /* graphics rendition (color) */ + if (!ccopt.nocolor) n += bwriteS(buf, "\033["); + while (*fmt++ != '.') { + if (ccopt.nocolor) continue; + n += bputc(buf, fmt[-1]); + } + if (!ccopt.nocolor) n += bputc(buf, 'm'); + break; + default: + if (unsgnd || numlong) { + --fmt; + base = 10; + goto Int; + } + if (fmt[-1] == ' ' || !aisprint(fmt[-1])) + fmterr("expected format specifier"); + else + fmterr("unknown format specifier '%c'", fmt[-1]); + } + if (pad > 0) { /* left pad */ + while (pad-- > buf->len) + n += bputc(out, ' '); + iowrite(out, buf->buf, buf->len); + out->err |= buf->err; + } else if (pad < 0) { /* right pad */ + int len = n - prevn; + while (pad++ < -len) + n += bputc(out, ' '); + } + } + return buf->err ? -1 : n; +} + +int +bfmt(struct wbuf *buf, const char *fmt, ...) +{ + va_list ap; + int ret; + + va_start(ap, fmt); + ret = vbfmt(buf, fmt, ap); + va_end(ap); + return ret; +} + +static uint pagesiz; + +struct memfile +mapopen(const char **err, const char *path) +{ + struct stat stat; + int fd = -1; + void *p = NULL; + struct memfile f = {0}; + uint mapsiz; + + assert("nullp" && err && path); + + if (!pagesiz) pagesiz = sysconf(_SC_PAGESIZE); + *err = NULL; + + if ((fd = open(path, O_RDONLY)) < 0) + goto Err; + if (fstat(fd, &stat) != 0) + goto Err; + + if (!S_ISREG(stat.st_mode)) { + uint cap = 0; + int ret; + + do { + enum { CHUNKSIZ = 1<<16 }; + if ((cap += CHUNKSIZ) < CHUNKSIZ) { + /* overflow */ + free(p); + goto Big; + } + if (!(f.p = p ? realloc(p, cap) : malloc(cap))) { + free(p); + goto Err; + } + p = (void *)f.p; + ret = read(fd, p, CHUNKSIZ); + if (ret >= 0) + f.n += ret; + else if (errno != EAGAIN && errno != EWOULDBLOCK) + goto Err; + } while (ret != 0); + + close(fd); + fd = -1; + mapsiz = alignup(f.n, pagesiz); + if ((f.p = mmap(NULL, mapsiz + pagesiz, PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0)) == MAP_FAILED) { + free(p); + goto Err; + } + memcpy((void *)f.p, p, f.n); + free(p); + mprotect((void *)f.p, mapsiz + pagesiz, PROT_READ); + return f; + } + + if (stat.st_size > UINT_MAX) { + Big: + errno = EFBIG; + goto Err; + } + mapsiz = alignup(stat.st_size, pagesiz); + if ((p = mmap(NULL, mapsiz + pagesiz, PROT_READ, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0)) == MAP_FAILED) + goto Err; + if (mapsiz > 0 && mmap(p, mapsiz, PROT_READ, MAP_FIXED|MAP_PRIVATE, fd, 0) == MAP_FAILED) + goto Err; + + close(fd); + f.p = p; + f.n = stat.st_size; + return f; +Err: + if (fd >= 0) close(fd); + if (!*err) *err = strerror(errno); + return f; +} + +void +_assertfmt(const char *file, int line, const char *func, const char *expr) +{ + ioflush(&bstdout); + efmt("%s:%d: %s: Assertion `%s' failed.\n", file, line, func, expr); + ioflush(&bstderr); +} + +void +mapclose(struct memfile *f) +{ + assert(f->p); + munmap((void *)f->p, alignup(f->n, pagesiz) + pagesiz); + memset(f, 0, sizeof *f); +} + +static struct file { + const char *path; + struct memfile f; + vec_of(uint) lineoffs; +} files[256]; +static int nfiles; + +int +openfile(const char **err, struct memfile **pf, const char *path) +{ + struct file *f; + + assert(nfiles + 1 < arraylength(files) && "too many files"); + f = &files[nfiles]; + f->path = path; + f->f = mapopen(err, path); + if (*err) return -1; + vinit(&f->lineoffs, NULL, 50); + vpush(&f->lineoffs, 0); + *pf = &f->f; + return nfiles++; +} + +const char * +getfilename(int id) +{ + assert(id < nfiles); + return files[id].path; +} + +struct memfile * +getfile(int id) +{ + assert(id < nfiles); + return &files[id].f; +} + +void +addfileline(int id, uint off) +{ + vec_of(uint) *lineoffs; + + assert(id < nfiles); + lineoffs = (void *)&files[id].lineoffs; + if (lineoffs->n) assert(off > lineoffs->p[lineoffs->n-1]); + vpush(lineoffs, off); +} + +void +getfilepos(int *line, int *col, int id, uint off) +{ + uint *offs, n; + int l = 0, h, i = 0; + + assert(id < nfiles); + offs = files[id].lineoffs.p; + n = files[id].lineoffs.n; + h = n - 1; + + /* binary search over offsets array */ + while (l <= h) { + i = (l + h) / 2; + if (offs[i] < off) l = i + 1; + else if (offs[i] > off) h = i - 1; + else break; + } + i -= offs[i] > off; + *line = i + 1; + *col = off - offs[i] + 1; +} + +void +closefile(int id) +{ + assert(id >= 0 && id < nfiles); + mapclose(&files[id].f); +} + +enum diagkind { DGERROR, DGWARN, DGNOTE, }; + +void +vdiag(const struct span *span, enum diagkind kind, const char *fmt, va_list ap) +{ + static const char *label[] = { "error", "warning", "note" }; + static const char *color[] = { "%g1;31.", "%g1;35.", "%g1;36." }; + int line, col; + struct memfile *f; + const struct span0 *loc; + + if (span) { + loc = span->ex.len ? &span->ex : &span->sl; + f = getfile(loc->file); + getfilepos(&line, &col, loc->file, loc->off); + efmt("%s:%d:%d: ", getfilename(loc->file), line, col); + } + efmt(color[kind]); + efmt("%s: %g.", label[kind]); + vbfmt(&bstderr, fmt, ap); + efmt("\n"); + if (span) { + uint i; + int j, nmark; + char mark = '^'; + + /* find start of line */ + for (i = loc->off - 1; i + 1 > 0 && f->p[i] != '\n'; --i) ; + if (i) ++i; + + nmark = loc->len; + while (i < loc->off + loc->len) { + int end; + int curoff = efmt("%5d | ", line); + for (end = 0; f->p[i] != '\n' && i < f->n; ++i, ++end) + ioputc(&bstderr, f->p[i]); + ++i; + ioputc(&bstderr, '\n'); + + for (j = 0; j < curoff + col - 1; ++j) + ioputc(&bstderr, j == curoff-2 ? '|' : ' '); + efmt(color[kind]); + j -= curoff; + do { + ioputc(&bstderr, mark); + mark = '~'; + } while (--nmark && ++j < end); + col = 1; + ++line; + efmt("%g.\n"); + } + ioputc(&bstderr, '\n'); + } + + if (span && loc == &span->ex && span->sl.len) + if (span->ex.file != span->sl.file || !((uint) span->sl.off - span->ex.off < span->ex.len)) + note(&(struct span){ span->sl }, "expanded from here"); +} + +void +fatal(const struct span *span, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vdiag(span, DGERROR, fmt, ap); + va_end(ap); + efmt("Aborting due to previous error.\n"); + exit(1); +} + +int nerror; + +void +error(const struct span *span, const char *fmt, ...) +{ + va_list ap; + + ++nerror; + va_start(ap, fmt); + vdiag(span, DGERROR, fmt, ap); + va_end(ap); +} + +void +warn(const struct span *span, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vdiag(span, DGWARN, fmt, ap); + va_end(ap); +} + +void +note(const struct span *span, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vdiag(span, DGNOTE, fmt, ap); + va_end(ap); +} + +/* vim:set ts=3 sw=3 expandtab: */ diff --git a/ir.c b/ir.c new file mode 100644 index 0000000..2cb1783 --- /dev/null +++ b/ir.c @@ -0,0 +1,221 @@ +#include "ir.h" +#include "common.h" + +uchar type2cls[TYARRAY + 1]; +uchar cls2siz[KF8+1]; +const uchar siz2intcls[] = { [1] = KI4, [2] = KI4, [4] = KI4, [8] = KI8 }; + +static vec_of(struct irdat) dats; +struct instr instr[1<<14]; +static int ninstr; +vec_of(struct ircall) calls; + +void +irinit(struct function *fn) +{ + static struct ircall callsbuf[64]; + + ninstr = 0; + vinit(&calls, callsbuf, arraylength(callsbuf)); + if (!type2cls[TYINT]) { + for (int i = TYCHAR; i <= TYUVLONG; ++i) { + int siz = targ_primsizes[i]; + type2cls[i] = siz < 8 ? KI4 : KI8; + } + type2cls[TYFLOAT] = KF4; + type2cls[TYDOUBLE] = KF8; + type2cls[TYPTR] = KIP; + type2cls[TYARRAY] = KIP; + cls2siz[KI4] = cls2siz[KF4] = 4; + cls2siz[KI8] = cls2siz[KF8] = 8; + cls2siz[KIP] = targ_primsizes[TYPTR]; + } + fn->entry = fn->curblk = alloc(&fn->arena, sizeof(struct block), 0); + fn->entry->lprev = fn->entry->lnext = fn->entry; +} + +struct xcon conht[1 << 12]; + +static int +addcon(const struct xcon *con) +{ + uint h = hashb(0, con, sizeof *con); + uint i = h, n = arraylength(conht); + assert(con->issym || con->cls); + for (;; ++i) { + i &= arraylength(conht) - 1; + if (!conht[i].issym && !conht[i].cls) { + conht[i] = *con; + return i; + } else if (!memcmp(&conht[i], con, sizeof *con)) { + return i; + } + assert(--n > 0 && "conht full"); + } +} + +union irref +adddat(struct function *fn, const struct irdat *dat) +{ + assert(dats.n < 1u<<29); + vpush(&dats, *dat); + /* return mkref(RDAT, dats.n - 1); */ +} + +static void +targwrite2(uchar *d, ushort x) +{ + if (targ_bigendian) + d[0] = x >> 8, d[1] = x; + else + d[0] = x, d[1] = x >> 8; +} + +static void +targwrite4(uchar *d, uint x) +{ + if (targ_bigendian) + d[0] = x >> 24, d[1] = x >> 16, d[2] = x >> 8, d[3] = x; + else + d[0] = x, d[1] = x >> 8, d[2] = x >> 16, d[3] = x >> 24; +} + +static void +targwrite8(uchar *d, uvlong x) +{ + if (targ_bigendian) + targwrite4(d, x >> 32), targwrite4(d + 4, x); + else + targwrite4(d, x), targwrite4(d + 4, x >> 32); +} + +void +conputdat(struct irdat *dat, uint off, enum typetag t, const void *src) +{ + uint siz = targ_primsizes[t]; + bool iszero = 1; + uchar *pdat; + assert(off + siz <= dat->siz); + for (uint i = 0; i < siz; ++i) { + if (((uchar *)src) != 0) { + iszero = 0; + break; + } + } + if (iszero && (dat->siz <= 8 || dat->dat.n < off)) + return; + + if (dat->siz > 8) + while (off + siz > dat->dat.n) + vpush(&dat->dat, 0); + pdat = dat->siz <= 8 ? dat->sdat : dat->dat.p; + + switch (siz) { + case 1: pdat[off] = *(uchar *)src; break; + case 2: targwrite2(&pdat[off], *(ushort *)src); break; + case 4: targwrite4(&pdat[off], *(uint *)src); break; + case 8: targwrite8(&pdat[off], *(uvlong *)src); break; + default: assert(0); + } +} + +union irtype +mkirtype(union type t) +{ + assert(t.t != TYVOID); + if (isscalar(t)) return (union irtype) { .cls = type2cls[t.t] }; + assert(isagg(t)); + return (union irtype) { .isagg = 1, .dat = t.dat }; +} + +union irref +mkintcon(struct function *fn, enum irclass k, vlong i) +{ + if (i < 1ll << 28 && i >= -(1ll << 28)) { + return mkref(RICON, i); + } else if (k == KI4) { + struct xcon con = { 0, k, .i4 = i }; + return mkref(RXCON, addcon(&con)); + } else { + struct xcon con = { 0, k, .i8 = i }; + return mkref(RXCON, addcon(&con)); + } +} + +union irref +mkfltcon(struct function *fn, enum irclass k, double f) +{ + struct xcon con = { 0, k }; + if (k == KF4) con.fs = f; + else con.fd = f; + return mkref(RXCON, addcon(&con)); +} + +union irref +mksymref(struct function *fn, const char *s) +{ + struct xcon con = { 1, KIP, .sym = s }; + return mkref(RXCON, addcon(&con)); +} + +union irref +mkcall(struct function *fn, union type fnty, uint narg, union irref *args, union irtype *typs) +{ + const struct typedata *td = &typedata[fnty.dat]; + struct ircall call = { narg, td->variadic ? td->nmemb : -1 }; + + assert(td->variadic ? narg >= td->nmemb : narg == td->nmemb); + if (narg) { + call.args = alloc(&fn->arena, narg*sizeof *args + narg*sizeof(union irtype), 0); + call.typs = (union irtype *)((char *)call.args + narg*sizeof *args); + memcpy(call.args, args, narg*sizeof *args); + memcpy(call.typs, typs, narg*sizeof *typs); + } + vpush(&calls, call); + return mkref(RCALL, calls.n-1); +} + +union irref +addinstr(struct function *fn, struct instr ins) +{ + assert(ninstr < arraylength(instr)); + assert(fn->curblk != NULL); + instr[ninstr] = ins; + vpush(&fn->curblk->ins, ninstr); + return mkref(RTMP, ninstr++); +} + +struct block * +newblk(struct function *fn) +{ + struct block *blk = alloc(&fn->arena, sizeof(struct block), 0); + memset(blk, 0, sizeof *blk); + return blk; +} + +void +useblk(struct function *fn, struct block *blk) +{ + if (fn->curblk) assert(fn->curblk->jmp.t && "never finished block"); + if (blk) assert(!blk->jmp.t && "reusing built block"); + if (!blk->lprev) { /* initialize */ + blk->lnext = fn->entry; + blk->lprev = fn->entry->lprev; + blk->lprev->lnext = blk; + blk->id = blk->lprev->id + 1; + fn->entry->lprev = blk; + } + fn->curblk = blk; +} + +void +putjump(struct function *fn, enum jumpkind j, union irref arg, struct block *t, struct block *f) +{ + fn->curblk->jmp.t = j; + fn->curblk->jmp.arg = arg; + fn->curblk->s1 = t; + fn->curblk->s2 = f; + fn->curblk = NULL; +} + +/* vim:set ts=3 sw=3 expandtab: */ diff --git a/ir.h b/ir.h new file mode 100644 index 0000000..02c9a2d --- /dev/null +++ b/ir.h @@ -0,0 +1,117 @@ +#include "common.h" + +enum irclass { + KXXX, + KI4, KI8, KIP, + KF4, KF8, +}; + +#define kisint(k) in_range((k), KI4, KIP) +#define kisflt(k) in_range((k), KF4, KF8) + +union irtype { + struct { ushort isagg : 1, cls : 15; }; + struct { ushort _ : 1, dat : 15; }; + ushort bits; +}; + +struct irdat { + uchar align : 7, mut : 1; + uint siz; + union { + vec_of(uchar) dat; + uchar sdat[8]; + }; + struct symref { + struct symref *next; + const char *sym; + uint off; + vlong addend; + } *syms; +}; + +struct xcon { + bool issym; + uchar cls; + union { + const char *sym; + int i4; + vlong i8; + float fs; + double fd; + }; +}; + +struct ircall { + short narg; + short vararg; /* first variadic arg or -1 */ + union irtype *typs; + union irref *args; +}; + +enum irrefkind { + RNONE, RTMP, RARG, RICON, RXCON, RSYM, RCALL +}; + +union irref { + struct { uint t : 3, idx : 29; }; + struct { signed _0: 3, i : 29; }; /* RICON */ + uint bits; +}; + +enum op { + Oxxx, +#define _(o,...) O##o, +#include "op.def" +#undef _ +}; + +struct instr { + uchar op; + uchar cls; + union irref l; + union irref r; +}; + +enum jumpkind { + JXXX, Jb, Jbcnd, Jret, Jrets, +}; + +struct block { + int id; + struct block *s1, *s2; + struct { uchar t; union irref arg; } jmp; + vec_of(ushort) ins; + struct block *lprev, *lnext; +}; + +struct function { + struct arena *arena; + const char *name; + struct block *entry, *curblk; + union type fnty, retty, *paramty; + uint nblk; + uint nparam; + bool globl; +}; + +extern uchar type2cls[]; +extern uchar cls2siz[]; +extern const uchar siz2intcls[]; +void irinit(struct function *); +#define NOREF ((union irref) {0}) +#define mkref(t, x) ((union irref) {{ (t), (x) }}) +#define mkzerocon() ((union irref){ RICON, 0 }) +union irtype mkirtype(union type); +union irref mkintcon(struct function *, enum irclass, vlong); +union irref mkfltcon(struct function *, enum irclass, double); +union irref mksymref(struct function *, const char *); +void conputdat(struct irdat *, uint off, enum typetag t, const void *dat); +union irref mkcall(struct function *, union type fnty, uint narg, union irref *, union irtype *); +union irref addinstr(struct function *, struct instr); +struct block *newblk(struct function *); +void useblk(struct function *, struct block *); +void putjump(struct function *, enum jumpkind, union irref arg, struct block *t, struct block *f); +void irdump(struct function *, const char *fname); + +/* vim:set ts=3 sw=3 expandtab: */ diff --git a/irdump.c b/irdump.c new file mode 100644 index 0000000..b7fbcc0 --- /dev/null +++ b/irdump.c @@ -0,0 +1,124 @@ +#include "common.h" +#include "ir.h" + +extern struct xcon conht[]; +extern vec_of(struct ircall) calls; + +static const char *clsname[] = { + "?", "i4", "i8", "iP", "f4", "f8" +}; + +static void +prityp(union irtype typ) +{ + if (!typ.isagg) + efmt(clsname[typ.cls]); + else { + const struct typedata *td = &typedata[typ.dat]; + const char *tag = td->t == TYSTRUCT ? "struct" : "union"; + if (ttypenames[td->id]) + efmt("%s.%s.%d", tag, ttypenames[td->id], td->id); + else + efmt("%s.%d", tag, td->id); + } +} + +static void +dumpref(union irref ref) +{ + struct xcon *con; + struct ircall *call; + switch (ref.t) { + case RTMP: efmt("%%%d", ref.idx); break; + case RARG: efmt("%%arg%d", ref.idx); break; + case RICON: efmt("%d", ref.i); break; + case RXCON: + con = &conht[ref.idx]; + if (con->issym) efmt("$%s", con->sym); + else switch (con->cls) { + case KI4: efmt("%d", con->i4); break; + case KI8: efmt("%ld", con->i8); break; + case KIP: efmt("%'x", con->i8); break; + case KF4: efmt("%f", con->fs); break; + case KF8: efmt("%f", con->fd); break; + default: assert(0); + } + break; + case RCALL: + call = &calls.p[ref.idx]; + for (int i = 0; i < call->narg; ++i) { + if (call->vararg == i) { + if (i > 0) efmt(", "); + efmt("..., "); + } + prityp(call->typs[i]); + efmt(" "); + dumpref(call->args[i]); + } + break; + default: assert(!"ref"); + } +} + +extern struct instr instr[]; +static const char *opname[] = { + "???", +#define _(o,...) #o, +#include "op.def" +#undef _ +}; +static const uchar opnarg[] = { + 0, +#define _(o,n) n, +#include "op.def" +#undef _ +}; + +static void +dumpinst(const struct instr *ins) +{ + int i; + efmt(" "); + if (ins->cls) + efmt("%s %%%d = ", clsname[ins->cls], ins - instr); + efmt("%s ", opname[ins->op]); + for (i = 0; i < opnarg[ins->op]; ++i) { + if (i) efmt(", "); + dumpref((&ins->l)[i]); + } + efmt("\n"); +} + +static void +dumpblk(struct block *blk) +{ + static const char *jnames[] = { 0, "b", "b", "ret", "rets" }; + static const uchar jnarg[] = { 0, 0, 1, 0, 1 }; + efmt(" .L%d:\n", blk->id); + for (int i = 0; i < blk->ins.n; ++i) { + dumpinst(&instr[blk->ins.p[i]]); + } + efmt(" %s ", jnames[blk->jmp.t]); + if (jnarg[blk->jmp.t]) { + dumpref(blk->jmp.arg); + if (blk->s1) efmt(", "); + } + if (blk->s1 && blk->s2) efmt(".L%d, .L%d", blk->s1->id, blk->s2->id); + else if (blk->s1) efmt(".L%d", blk->s1->id); + efmt("\n"); +} + +void +irdump(struct function *fn, const char *fname) +{ + struct block *blk; + + efmt("function %s : %ty\n", fname, fn->fnty); + blk = fn->entry; + do { + dumpblk(blk); + blk = blk->lnext; + } while (blk != fn->entry); +} + +/* vim:set ts=3 sw=3 expandtab: */ diff --git a/keywords.def b/keywords.def new file mode 100644 index 0000000..e0bb92f --- /dev/null +++ b/keywords.def @@ -0,0 +1,70 @@ +/* !SORTED */ +_(_Alignas, STDC11) +_(_Alignof, STDC11) +_(_Atomic, STDC11) +_(_BitInt, STDC23) +_(_Bool, STDC99) +_(_Complex, STDC99) +_(_Decimal128, STDC23) +_(_Decimal32, STDC23) +_(_Decimal64, STDC23) +_(_Generic, STDC11) +_(_Imaginary, STDC99) +_(_Noreturn, STDC11) +_(_Static_assert, STDC11) +_(_Thread_local, STDC11) +_(alignas, STDC23) +_(alignof, STDC23) +_(auto, 0) +_(bool, STDC23) +_(break, 0) +_(case, 0) +_(char, 0) +_(const, 0) +_(constexpr, STDC23) +_(continue, 0) +_(default, 0) +_(do, 0) +_(double, 0) +_(else, 0) +_(enum, 0) +_(extern, 0) +_(false, STDC23) +_(float, 0) +_(for, 0) +_(goto, 0) +_(if, 0) +_(inline, STDC99) +_(int, 0) +_(long, 0) +_(nullptr, STDC23) +_(register, 0) +_(restrict, STDC99) +_(return, 0) +_(short, 0) +_(signed, 0) +_(sizeof, 0) +_(static, 0) +_(static_assert, STDC23) +_(struct, 0) +_(switch, 0) +_(thread_local, STDC23) +_(true, STDC23) +_(typedef, 0) +_(typeof, STDC23) +_(typeof_unqual, STDC23) +_(union, 0) +_(unsigned, 0) +_(void, 0) +_(volatile, 0) +_(while, 0) + +#ifndef TKWBEGIN_ +# define TKWBEGIN_ TKW_Alignas +#endif +#ifndef TKWEND_ +# define TKWEND_ TKWwhile +#endif +#ifndef TKWMAXLEN_ +# define TKWMAXLEN_ (sizeof "_Static_assert" - 1) +#endif diff --git a/lex.c b/lex.c new file mode 100644 index 0000000..635c838 --- /dev/null +++ b/lex.c @@ -0,0 +1,669 @@ +#include "parse.h" +#include + +static const char * +intern(const char *s) +{ + static vec_of(char) mem; + static uint ht[1<<10]; + uint h, i, n = arraylength(ht); + + if (!mem.p) vinit(&mem, NULL, 1<<10); + + i = h = hashs(0, s); + for (;; ++i) { + i &= arraylength(ht) - 1; + if (!ht[i]) { + ht[i] = mem.n+1; + return vpushn(&mem, s, strlen(s)+1); + } else if (!strcmp(s, &mem.p[ht[i]-1])) { + return &mem.p[ht[i]-1]; + } + assert(--n > 0 && "intern full"); + } +} + +static void +identkeyword(struct token *tk, const char *s, int n) +{ + static const struct { const char *s; enum toktag t; enum cstd cstd; } kwtab[] = { +#define _(kw, cstd) { #kw, TKW##kw, cstd }, +#include "keywords.def" +#undef _ + }; + int l = 0, h = arraylength(kwtab) - 1, i, cmp; + + if (n > TKWMAXLEN_) goto ident; + /* binary search over sorted array */ + while (l <= h) { + i = (l + h) / 2; + cmp = strcmp(kwtab[i].s, s); + if (cmp < 0) l = i + 1; + else if (cmp > 0) h = i - 1; + else if (kwtab[i].cstd <= ccopt.cstd) { + tk->t = kwtab[i].t; + tk->ident = kwtab[i].s; + return; + } else break; + } +ident: + tk->t = TKIDENT; + tk->ident = intern(s); +} + +static int +next0(struct parser *pr) +{ + bool trigraph = ccopt.trigraph; + int n, c; + + while (!memcmp(pr->dat+pr->idx, "\\\n", n = 2) + || (trigraph && !memcmp(pr->dat+pr->idx, "\?\?/\n", n = 4))) { + pr->idx += n; + addfileline(pr->fileid, pr->idx); + } + if (pr->idx >= pr->ndat) + return TKEOF; + if (trigraph && !memcmp(pr->dat+pr->idx, "??", 2)) { + switch (pr->dat[pr->idx+2]) { + case '=': pr->idx += 3; return '#'; + case '(': pr->idx += 3; return '['; + case ')': pr->idx += 3; return ']'; + case '!': pr->idx += 3; return '|'; + case '<': pr->idx += 3; return '{'; + case '>': pr->idx += 3; return '}'; + case '-': pr->idx += 3; return '~'; + case '/': pr->idx += 3; return '\\'; + case '\'': pr->idx += 3; return '^'; + } + } + if ((c = pr->dat[pr->idx++]) == '\n') { + addfileline(pr->fileid, pr->idx); + } + return c; +} + +static int +next(struct parser *pr) +{ + int c; + + if (pr->npeekchr) { + int c = pr->peekchr[0]; + pr->chridx = pr->peekcidx[0]; + memmove(pr->peekchr, pr->peekchr + 1, --pr->npeekchr * sizeof *pr->peekchr); + memmove(pr->peekcidx, pr->peekcidx + 1, pr->npeekchr * sizeof *pr->peekcidx); + pr->eof = c == TKEOF; + return c; + } + c = next0(pr); + pr->eof = c == TKEOF; + pr->chridx = pr->idx; + return c; +} + +static int +peek(struct parser *pr, int off) +{ + assert(off < arraylength(pr->peekchr)); + while (pr->npeekchr < off+1) { + pr->peekchr[pr->npeekchr] = next0(pr); + pr->peekcidx[pr->npeekchr++] = pr->idx; + } + return pr->peekchr[off]; +} + +static bool +match(struct parser *pr, int c) +{ + if (!pr->eof && peek(pr, 0) == c) { + next(pr); + return 1; + } + return 0; +} + +static bool +aissep(int c) { + if (!aisprint(c) || aisspace(c)) + return 1; + switch (c) + case '(': case ')': case '[': case ']': + case '{': case '}': case '.': case ',': + case ';': case '?': case '+': case '-': + case '*': case '/': case '&': case '|': + case '^': case '~': case '=': case '\'': + case '"': case '<': case '>': case ':': + case '@': case '#': case '%': case '\\': + case '`': + return 1; + return 0; +} + +static void +strtonum(struct token *tk, char *s) +{ + extern int sscanf(const char *, const char *, ...); + extern uvlong strtoull(const char *, char **, int); + char *suffix; + + tk->ty = TYXXX; + if (strchr(s, '.')) { + /* float literal */ + int n; + + if (!sscanf(s, "%lf%n", &tk->f, &n)) + return; + suffix = s + n; + tk->ty = TYDOUBLE; + } else { + tk->u = strtoull(s, &suffix, 0); + if (suffix == s) + return; + /* XXX proper int lit types */ + tk->ty = TYINT; + } + if (!*suffix) return; + + for (s = suffix; *s; ++s) + *s |= 0x20; /* make lowercase */ + if (tk->ty == TYDOUBLE) { + if (!strcmp(suffix, "f")) { + tk->ty = TYFLOAT; + tk->f = (float) tk->f; + } else tk->ty = TYXXX; + } else { + if (!strcmp(suffix, "u")) tk->ty = TYUINT; + else if (!strcmp(suffix, "ul")) tk->ty = TYULONG; + else if (!strcmp(suffix, "lu")) tk->ty = TYULONG; + else if (!strcmp(suffix, "ull")) tk->ty = TYUVLONG; + else if (!strcmp(suffix, "llu")) tk->ty = TYUVLONG; + else if (!strcmp(suffix, "ll")) tk->ty = TYVLONG; + else if (!strcmp(suffix, "l")) tk->ty = TYLONG; + else tk->ty = TYXXX; + } +} + +static void +readstrchrlit(struct parser *pr, struct token *tk, char delim) +{ + int c, i; + uchar tmp[80]; + vec_of(uchar) b = VINIT(tmp, sizeof tmp); + struct span span = {0}; + uint n, idx = pr->chridx; + + while ((c = next(pr)) != delim) { + if (c == '\n' || c == TKEOF) { + Noterm: + span.sl = (struct span0) { idx, pr->chridx - idx, pr->fileid }; + error(&span, "missing terminating %c character", delim); + break; + } else if (c == '\\') { + span.sl = (struct span0) { idx, pr->chridx - idx, pr->fileid }; + switch (c = next(pr)) { + case '\n': case TKEOF: + goto Noterm; + case '\'': c = '\''; break; + case '\\': c = '\\'; break; + case '"': c = '"'; break; + case '?': c = '?'; break; + case 'a': c = '\a'; break; + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + case 'v': c = '\v'; break; + case 'x': case 'X': /* hex */ + n = 0; + if (!aisxdigit(peek(pr, 0))) goto Badescseq; + do { + c = next(pr); + if (c-'0' < 10) n = n<<4 | (c-'0'); + else n = n<<4 | (10 + (c|0x20)-'a'); + } while (aisxdigit(peek(pr, 0))); + if (n > 0xFF) { + span.sl.len = pr->chridx - span.sl.off; + error(&span, "hex escape sequence out of range"); + } + c = n & 0xFF; + break; + default: + if (aisodigit(c)) { /* octal */ + n = c-'0'; + for (i = 2; i--;) { + if (!aisodigit(peek(pr, 0))) break; + n = n<<3 | ((c = next(pr))-'0'); + } + if (n > 0377) { + span.sl.len = pr->chridx - span.sl.off; + error(&span, "hex escape sequence out of range"); + } + c = n; + break; + } + Badescseq: + span.sl.len = pr->chridx - span.sl.off; + error(&span, "invalid escape sequence"); + } + } + vpush(&b, c); + idx = pr->chridx;; + } + if (delim == '"') { + vpush(&b, 0); + tk->t = TKSTRLIT; + tk->s.p = alloc(&pr->exarena, b.n, 1); + memcpy(tk->s.p, b.p, tk->s.n = b.n); + } else { + if (b.n == 0) { + span.sl = (struct span0) { idx, pr->chridx - idx, pr->fileid }; + error(&span, "empty character literal"); + } else if (b.n > targ_primsizes[TYINT]) { + span.sl = (struct span0) { idx, pr->chridx - idx, pr->fileid }; + error(&span, "multicharacter literal too long"); + } + tk->t = TKNUMLIT; + tk->ty = TYINT; + tk->u = 0; + for (i = 0; i < b.n; ++i) + tk->u = tk->u<<8 | b.p[i]; + } + vfree(&b); +} + +static int +lex0(struct parser *pr, struct token *tk) +{ + int idx, c; + +#define RET(t_) do { tk->t = (t_); goto End; } while (0) + +Begin: + idx = pr->chridx; + switch (c = next(pr)) { + case ' ': case '\r': case '\t': + goto Begin; + break; + case '!': case '(': case ')': case ',': + case ':': case ';': case '?': case '[': + case ']': case '{': case '}': case '~': + case '$': case '@': case '`': case '\\': case TKEOF: case '\n': + RET(c); + case '#': + if (match(pr, '#')) RET(TKPPCAT); + RET(c); + case '+': + if (match(pr, '+')) RET(TKINC); + if (match(pr, '=')) RET(TKSETADD); + RET(c); + case '-': + if (match(pr, '-')) RET(TKDEC); + if (match(pr, '=')) RET(TKSETSUB); + if (match(pr, '>')) RET(TKARROW); + RET(c); + case '*': + if (match(pr, '=')) RET(TKSETMUL); + RET(c); + case '/': + if (match(pr, '=')) RET(TKSETDIV); + if (match(pr, '/')) { + /* // comment */ + while (!pr->eof && !match(pr, '\n')) + next(pr); + goto Begin; + } + if (match(pr, '*')) { + /* comment */ + while (peek(pr, 0) != '*' || peek(pr, 1) != '/') { + if (next(pr) == TKEOF) { + struct span span = {{ idx, pr->chridx - idx, pr->fileid }}; + fatal(&span, "unterminated multiline comment"); + } + } + next(pr), next(pr); + goto Begin; + } + RET(c); + case '%': + if (match(pr, '=')) RET(TKSETMOD); + RET(c); + case '^': + if (match(pr, '=')) RET(TKSETXOR); + RET(c); + case '=': + if (match(pr, '=')) RET(TKEQU); + RET(c); + case '<': + if (match(pr, '=')) RET(TKLTE); + if (match(pr, '<')) RET(match(pr, '=') ? TKSETSHL : TKSHL); + RET(c); + case '>': + if (match(pr, '=')) RET(TKGTE); + if (match(pr, '>')) RET(match(pr, '=') ? TKSETSHR : TKSHR); + RET(c); + case '&': + if (match(pr, '&')) RET(TKLOGAND); + if (match(pr, '=')) RET(TKSETAND); + RET(c); + case '|': + if (match(pr, '|')) RET(TKLOGIOR); + if (match(pr, '=')) RET(TKSETIOR); + RET(c); + case '.': + if (peek(pr, 0) == '.' && peek(pr, 1) == '.') { + next(pr), next(pr); + RET(TKDOTS); + } + RET(c); + case '\'': + case '"': + readstrchrlit(pr, tk, c); + goto End; + default: + if (aisdigit(c)) { + char tmp[70]; + int n = 0; + tmp[n++] = c; + while (!aissep(c = peek(pr, 0)) || c == '.' || ((tmp[n-1]|0x20) == 'e' && (c == '+' || c == '-'))) { + assert(n < arraylength(tmp)-1 && "too big"); + tmp[n++] = next(pr); + } + tmp[n] = 0; + strtonum(tk, tmp); + RET(TKNUMLIT); + } else if (c == '_' || aisalpha(c)) { + char tmp[70]; + int n = 0; + tmp[n++] = c; + while (!aissep(c = peek(pr, 0))) { + assert(n < arraylength(tmp)-1 && "too big"); + tmp[n++] = next(pr); + } + tmp[n] = 0; + identkeyword(tk, tmp, n); + goto End; + } + } + fatal(&(struct span) {{ idx, pr->chridx - idx, pr->fileid }}, "unexpected character %'c at %d", c, idx); +End: + tk->span.sl.file = pr->fileid; + tk->span.sl.off = idx; + tk->span.sl.len = pr->chridx - idx; + tk->span.ex.file = pr->fileid; + tk->span.ex.off = idx; + tk->span.ex.len = pr->chridx - idx; + return tk->t; +#undef RET +} + +/****************/ +/* PREPROCESSOR */ +/****************/ + +#define isppident(tk) ((tk).t == TKIDENT || in_range((tk).t, TKWBEGIN_, TKWEND_)) + +static vec_of(struct macro) macros; +static ushort macroht[1<<10]; + +static struct macro * +findmac(const char *name) +{ + uint h, i, n = arraylength(macroht); + + i = h = ptrhash(name); + for (; n--; ++i) { + i &= arraylength(macroht) - 1; + if (!macroht[i]) { + return NULL; + } else if (macros.p[macroht[i]-1].name == name) { + return ¯os.p[macroht[i]-1]; + } + } + return NULL; +} + +static void +freemac(struct macro *mac) +{ + free(mac->param); + free(mac->rlist.tk); +} + +static bool +tokequ(const struct token *a, const struct token *b) +{ + char tmpbuf[100]; + struct wbuf tmp = MEMBUF(tmpbuf, sizeof tmpbuf); + if (a->t != b->t) return 0; + if (a->t == TKNUMLIT) { + const char *s1 = tmp.buf, *s2; + int n1, n2; + + if (a->ty != b->ty) return 0; + n1 = bfmt(&tmp, "%tk", a); + s2 = tmp.buf + tmp.len; + n2 = bfmt(&tmp, "%tk", b); + if (tmp.err) return 0; + return n1 == n2 && !memcmp(s1, s2, n1); + } else if (a->t == TKIDENT) { + return a->ident == b->ident; + } else if (a->t == TKSTRLIT) { + return a->s.n == b->s.n && !memcmp(a->s.p, b->s.p, a->s.n); + } + return 1; +} + +static bool /* whitespace separating tokens? */ +wsseparated(const struct token *l, const struct token *r) +{ + assert(l->span.sl.file == r->span.sl.file); + return l->span.sl.off + l->span.sl.len < r->span.sl.off; +} + +static bool +macroequ(const struct macro *a, const struct macro *b) +{ + int i; + if (a->name != b->name) return 0; + if (a->fnlike != b->fnlike || a->variadic != b->variadic) return 0; + if (a->fnlike) { + if (a->nparam != b->nparam) return 0; + for (i = 0; i < a->nparam; ++i) + if (a->param[i] != b->param[i]) + return 0; + } + if (a->rlist.n != b->rlist.n) return 0; + for (i = 0; i < a->rlist.n; ++i) { + struct token *tka = a->rlist.tk, *tkb = b->rlist.tk; + if (!tokequ(&tka[i], &tkb[i])) + return 0; + if (i && wsseparated(&tka[i-1], &tka[i]) != wsseparated(&tkb[i-1], &tkb[i])) + return 0; + } + return 1; +} + +static struct macro * +putmac(struct macro *mac) +{ + uint h, i, n = arraylength(macroht); + struct macro *slot; + + i = h = ptrhash(mac->name); + for (;; ++i) { + i &= arraylength(macroht) - 1; + if (!macroht[i]) { + macroht[i] = macros.n+1; + vpush(¯os, *mac); + return ¯os.p[macros.n - 1]; + } else if ((slot = ¯os.p[macroht[i]-1])->name == mac->name) { + if (!macroequ(slot, mac)) { + warn(&(struct span){mac->span}, "redefining macro"); + note(&(struct span){slot->span}, "previous definition:"); + freemac(slot); + *slot = *mac; + } else { + freemac(mac); + } + return slot; + } + assert(--n && "macro limit"); + } +} + +static void +ppskipline(struct parser *pr) +{ + while (peek(pr, 0) != '\n' && peek(pr, 0 != TKEOF)) + next(pr); +} + +static void +ppdefine(struct parser *pr) +{ + struct token tk0, tk; + struct macro mac = {0}; + vec_of(struct token) rlist = {0}; + + lex0(pr, &tk0); + if (!isppident(tk0)) { + error(&tk0.span, "macro name missing"); + ppskipline(pr); + return; + } + mac.name = tk0.ident; + mac.span = tk0.span.sl; + + if (peek(pr, 0) != '(') { + mac.fnlike = 1; + } + + while (lex0(pr, &tk) != '\n' && tk.t != TKEOF) { + if (!wsseparated(&tk0, &tk)) + warn(&tk.span, "no whitespace after macro name"); + vpush(&rlist, tk); + } + mac.rlist.tk = rlist.p; + mac.rlist.n = rlist.n; + putmac(&mac); +} + +static struct macrostack mstk[64], *mfreelist; +static bool +tryexpand(struct parser *pr, const struct token *tk) +{ + static bool inimstk; + struct macro *mac; + struct macrostack *l; + int macidx; + if (!inimstk) { + inimstk = 1; + for (int i = 0; i < arraylength(mstk); ++i) { + mstk[i].link = mfreelist; + mfreelist = &mstk[i]; + } + } + + if (!isppident(*tk) || !(mac = findmac(tk->ident))) + return 0; + + macidx = mac - macros.p; + /* prevent infinite recursion */ + for (l = pr->macstk; l; l = l->link) + if (l->mac == macidx) + return 0; + + if (mac->fnlike) { + } + if (mac->rlist.n) { + if (!(l = mfreelist)) fatal(&tk->span, "macro depth limit reached"); + l = mfreelist; + mfreelist = l->link; + l->link = pr->macstk; + l->mac = macidx; + l->idx = 0; + l->exspan = tk->span.ex; + pr->macstk = l; + } + return 1; +} + +static void +popmac(struct parser *pr) +{ + struct macrostack *stk; + + assert(stk = pr->macstk); + do { + pr->macstk = stk->link; + stk->link = mfreelist; + mfreelist = stk; + } while ((stk = pr->macstk) && stk->idx >= macros.p[stk->mac].rlist.n); +} + +int +lex(struct parser *pr, struct token *tk_) +{ + struct token tkx[1], *tk; + int t; + bool linebegin = 0; + + assert(tk_ != &pr->peektok); + tk = tk_ ? tk_ : tkx; + if (pr->peektok.t) { + *tk = pr->peektok; + memset(&pr->peektok, 0, sizeof pr->peektok); + return tk->t; + } + + if (pr->macstk) { + struct macro *mac = ¯os.p[pr->macstk->mac]; + struct rlist rl = mac->rlist; + *tk = rl.tk[pr->macstk->idx++]; + assert(tk->t); + tk->span.ex = pr->macstk->exspan; + if (tryexpand(pr, tk)) + return lex(pr, tk_); + if (pr->macstk->idx == rl.n) + popmac(pr); + return tk->t; + } + + for (;;) { + while ((t = lex0(pr, tk)) == '\n') linebegin = 1; + if (t == '#' && linebegin) { + if (lex0(pr, tk) == '\n') break; + else if (tk->t == TKIDENT && !strcmp(tk->ident, "define")) + ppdefine(pr); + else { + error(&tk->span, "invalid preprocessor directive"); + ppskipline(pr); + } + } else { + if (tryexpand(pr, tk)) + return lex(pr, tk_); + return t; + } + } + assert(0); +} + +int +lexpeek(struct parser *pr, struct token *tk_) +{ + struct token tkx[1], *tk; + uint t; + + tk = tk_ ? tk_ : tkx; + if ((t = pr->peektok.t)) { + *tk = pr->peektok; + return t; + } + t = lex(pr, tk); + pr->peektok = *tk; + return t; +} + +/* vim:set ts=3 sw=3 expandtab: */ diff --git a/main.c b/main.c new file mode 100644 index 0000000..6dcd6ea --- /dev/null +++ b/main.c @@ -0,0 +1,27 @@ +#include "parse.h" +#include + +struct option ccopt; + +static void +flushstd(void) +{ + ioflush(&bstdout); + ioflush(&bstderr); +} + +int +main(int argc, char **argv) +{ + struct parser pr; + + atexit(flushstd); + if (argc < 2) { + efmt("usage: %s [options] \n", *argv); + return 1; + } + + targ_init("amd64-sysv"); + initparser(&pr, argv[1]); + parse(&pr); +} diff --git a/mem.c b/mem.c new file mode 100644 index 0000000..11c1c8c --- /dev/null +++ b/mem.c @@ -0,0 +1,117 @@ +#include "common.h" +#include +#include +#include + +#define ALLOCERR(f) do { \ + efmt("%s: %s\n", f, strerror(errno)); \ + ioflush(&bstdout); \ + ioflush(&bstderr); \ + abort(); \ +} while (0) + +static void * +xcalloc(size_t n, const char *f) +{ + void *p = calloc(n, 1); + if (!p) ALLOCERR(f); + return p; +} + +static void * +xrealloc(void *p, size_t n, const char *f) +{ + p = p ? realloc(p, n) : malloc(n); + if (!p) ALLOCERR(f); + return p; +} + +/* vec: when _cap < 0, buf is dynamic allocated, otherwise an user provided buf */ + +void +vinit_(void **p, int *pcap, void *inlbuf, int cap, uint siz) +{ + assert(!*p); + *pcap = cap; + if (inlbuf) { + *p = inlbuf; + } else if (cap) { + *p = xrealloc(0, cap*siz, "vinit"); + *pcap = -cap; + } +} + +void +vpush_(void **p, int *pcap, uint *pn, uint siz) +{ + if (*pn == *pcap) { /* empty or inline buffer */ + int cap = *pcap ? *pcap * 2 : 8; + *p = xrealloc(NULL, cap * siz, "vpush"); + *pcap = -cap; + } else if (*pn == -*pcap) { /* dyn buf */ + *p = xrealloc(*p, -(*pcap *= 2) * siz, "vpush"); + } + assert(-(volatile int)*pcap > *pn && "overflow"); +} + +void * +vpushn_(void **p, int *pcap, uint *pn, uint siz, const void *dat, uint ndat) +{ + void *beg; + + for (uint i = 0; i < ndat; ++i) + vpush_(p, pcap, pn, siz); + beg = (char *)*p + *pn * siz; + memcpy(beg, dat, ndat * siz); + *pn += ndat; + return beg; +} + +struct arena * +newarena(uint chunksiz) +{ + struct arena *ar = xcalloc(offsetof(struct arena, mem) + chunksiz, "newarena"); + assert(chunksiz < 1u<<31 && "toobig"); + ar->cap = chunksiz; + ar->dyn = 1; + return ar; +} + +void * +alloc(struct arena **par, uint siz, uint align) +{ + uint idx; + struct arena *new; + + if (siz > (*par)->cap) { + new = newarena(siz); + new->n = siz; + new->prev = (*par)->prev; + (*par)->prev = new; + return new->mem; + } + align = align ? align : sizeof(void *); + idx = (uchar *)alignup((uintptr_t)&(*par)->mem[(*par)->n], align) - (*par)->mem; + if ((*par)->cap - idx >= siz) { + (*par)->n = idx + siz; + return (*par)->mem + idx; + } + new = newarena((*par)->cap); + new->prev = *par; + *par = new; + new->n = siz; + return new->mem; +} + +void +freearena(struct arena *ar) +{ + struct arena *prev; + for (; ar; ar = prev) { + prev = ar->prev; + if (ar->dyn) + free(ar); + } +} + +/* vim:set ts=3 sw=3 expandtab: */ diff --git a/op.def b/op.def new file mode 100644 index 0000000..99df839 --- /dev/null +++ b/op.def @@ -0,0 +1,60 @@ +/* OP NARG */ +_(mov, 1) +_(neg, 1) +_(not, 1) +_(cvtf4s, 1) +_(cvtf4u, 1) +_(cvtf4f8, 1) +_(cvtf8s, 1) +_(cvtf8u, 1) +_(cvtf8f4, 1) +_(cvts4f, 1) +_(cvtu4f, 1) +_(cvts8f, 1) +_(cvtu8f, 1) +_(extu1, 1) +_(extu2, 1) +_(extu4, 1) +_(exts1, 1) +_(exts2, 1) +_(exts4, 1) +_(add, 2) +_(sub, 2) +_(mul, 2) +_(umul, 2) +_(div, 2) +_(udiv, 2) +_(rem, 2) +_(urem, 2) +_(and, 2) +_(ior, 2) +_(xor, 2) +_(shl, 2) +_(slr, 2) +_(sar, 2) +_(shr, 2) +_(equ, 2) +_(neq, 2) +_(lth, 2) +_(lte, 2) +_(ulth, 2) +_(ulte, 2) +_(alloca1, 1) +_(alloca2, 1) +_(alloca4, 1) +_(alloca8, 1) +_(alloca16, 1) +_(loads1, 1) +_(loadu1, 1) +_(loads2, 1) +_(loadu2, 1) +_(loads4, 1) +_(loadu4, 1) +_(loadf4, 1) +_(loadi8, 1) +_(loadf8, 1) +_(store1, 2) +_(store2, 2) +_(store4, 2) +_(store8, 2) +_(call, 2) diff --git a/parse.c b/parse.c new file mode 100644 index 0000000..44979d0 --- /dev/null +++ b/parse.c @@ -0,0 +1,2104 @@ +#include "parse.h" +#include "common.h" +#include "ir.h" + +static struct env toplevel; +static struct arena *tlarena; + +#define peek(Pr,Tk) lexpeek(Pr,Tk) + +void +initparser(struct parser *pr, const char *file) +{ + const char *error; + struct memfile *f; + + memset(pr, 0, sizeof *pr); + pr->fileid = openfile(&error, &f, file); + if (pr->fileid < 0) + fatal(NULL, "Cannot open %'s: %s", file, error); + pr->dat = f->p; + pr->ndat = f->n; +} + +static struct decl *finddecl(struct parser *pr, const char *name); + +static bool +isdecltok(struct parser *pr) +{ + struct decl *decl; + struct token tk; + switch (peek(pr, &tk)) { + case TKWsigned: case TKWunsigned: case TKWshort: case TKWlong: + case TKWint: case TKWchar: case TKW_Bool: case TKWauto: + case TKWstruct: case TKWunion: case TKWenum: case TKWtypedef: + case TKWextern: case TKWstatic: case TKWinline: case TKW_Noreturn: + case TKWconst: case TKWvolatile: case TKWvoid: case TKWfloat: + case TKWdouble: + return 1; + case TKIDENT: + return (decl = finddecl(pr, tk.ident)) && decl->scls == SCTYPEDEF; + } + return 0; +} + +static bool +match(struct parser *pr, struct token *tk, enum toktag t) +{ + if (peek(pr, NULL) == t) { + lex(pr, tk); + return 1; + } + return 0; +} + +static bool +expect(struct parser *pr, enum toktag t, const char *s) +{ + struct token tk; + if (!match(pr, &tk, t)) { + peek(pr, &tk); + if (aisprint(t)) tk.span.ex.len = tk.span.sl.len = 1; + error(&tk.span, "expected %'tt%s%s", t, s?" ":"",s ? s : ""); + return 0; + } + return 1; +} + +static struct token +expectdie(struct parser *pr, enum toktag t, const char *s) +{ + struct token tk; + if (!match(pr, &tk, t)) + fatal(&tk.span, "expected %'tt%s%s", t, s?" ":"",s ? s : ""); + return tk; +} + +enum declkind { + DTOPLEVEL, + DFUNCPARAM, + DFUNCVAR, + DFIELD, + DCASTEXPR, +}; + +struct declstate { + enum declkind kind; + union type base; + enum storageclass scls; + enum qualifier qual; + uint align; + bool more, varini, funcdef, tagdecl; + const char **pnames; +}; + +static struct decl pdecl(struct declstate *st, struct parser *pr); + +/*******/ +/* ENV */ +/*******/ + +static void +envdown(struct parser *pr) +{ + struct env *e = alloc(&pr->fnarena, sizeof *e, 0); + e->decls = NULL; + e->tagged = NULL; + e->up = pr->env; + pr->env = e; +} + +static void +envup(struct parser *pr) +{ + assert(pr->env->up); + pr->env = pr->env->up; +} + + +static bool +redeclarationok(const struct decl *old, const struct decl *new) +{ + if (old->scls != new->scls) return 0; + switch (old->scls) { + case SCTYPEDEF: + return old->t.bits == new->t.bits; + } + return 0; +} + +static struct decl * +putdecl(struct parser *pr, const struct decl *decl) +{ + struct decls *l; + for (l = pr->env->decls; l; l = l->prev) { + if (decl->name == l->decl.name && !redeclarationok(&l->decl, decl)) { + error(&decl->span, "incompatible redeclaration of '%s'", decl->name); + note(&l->decl.span, "previously declared here"); + } + } + l = alloc(pr->env->up ? &pr->fnarena : &tlarena, sizeof *l, 0); + l->decl = *decl; + l->prev = pr->env->decls; + pr->env->decls = l; + return &l->decl; +} + +static struct decl * +finddecl(struct parser *pr, const char *name) +{ + struct env *e; + struct decls *l; + for (e = pr->env; e; e = e->up) { + for (l = e->decls; l; l = l->prev) { + if (name == l->decl.name) { + return &l->decl; + } + } + } + return NULL; +} + +static union type +gettagged(struct parser *pr, struct span *span, enum typetag tt, const char *name, bool dodef) +{ + struct env *e; + struct tagged *l; + struct typedata td = {0}; + for (e = pr->env; e; e = e->up) { + for (l = e->tagged; l; l = l->prev) { + if (name == ttypenames[typedata[l->t.dat].id]) { + if (dodef && e != pr->env) + goto Break2; + *span = l->span; + return l->t; + } + } + } +Break2: + if (tt == TYENUM) + return mktype(0); + l = alloc(pr->env->up ? &pr->fnarena : &tlarena, sizeof *l, 0); + l->prev = pr->env->tagged; + pr->env->tagged = l; + l->span = *span; + td.t = tt; + return l->t = mktagtype(name, &td); +} + +static union type +deftagged(struct parser *pr, struct span *span, enum typetag tt, const char *name) +{ + struct tagged *l; + struct typedata td = {0}; + for (l = pr->env->tagged; l; l = l->prev) { + if (name == ttypenames[typedata[l->t.dat].id]) { + *span = l->span; + return l->t; + } + } + l = alloc(pr->env->up ? &pr->fnarena : &tlarena, sizeof *l, 0); + l->prev = pr->env->tagged; + pr->env->tagged = l; + l->span = *span; + td.t = tt; + return l->t = mktagtype(name, &td); +} + +/********/ +/* EXPR */ +/********/ + +#define iszero(ex) ((ex).t == ENUMLIT && (ex).u == 0) + +static bool +assigncheck(union type t, const struct expr *src) +{ + if (assigncompat(t, typedecay(src->ty))) return 1; + if (t.t == TYPTR && iszero(*src)) return 1; + return 0; +} + +#define mkexpr(t_,span_,ty_,...) ((struct expr){.t=(t_), .ty=(ty_), .span=(span_), __VA_ARGS__}) + +static struct expr * +exprdup(struct parser *pr, const struct expr *e) +{ + return memcpy(alloc(&pr->exarena, sizeof *e, 0), e, sizeof *e); +} +static struct expr * +exprdup2(struct parser *pr, const struct expr *e1, const struct expr *e2) +{ + struct expr *r = alloc(&pr->exarena, 2*sizeof *r, 0); + r[0] = *e1; + r[1] = *e2; + return r; +} + +static struct expr expr(struct parser *pr); +static struct expr commaexpr(struct parser *pr); + +/* TODO recursive descent is probably slow, use precedence climbing? */ + +static bool +islvalue(const struct expr *ex) +{ + if (ex->t == EGETF) return islvalue(ex->sub); + return ex->t == ESYM || ex->t == EDEREF; +} + +static union type +argpromote(union type t) +{ + if (isint(t)) t.t = intpromote(t.t); + else if (t.t == TYFLOAT) t.t = TYDOUBLE; + return t; +} + +static void +postfixops(struct parser *pr, struct expr *lhs) +{ + struct expr ex, tmp; + struct span span; + struct token tk; + union type ty; + + for (;;) + switch (peek(pr, &tk)) { + default: return; + case TKINC: + case TKDEC: + lex(pr, &tk); + if (!isscalar(lhs->ty)) + error(&lhs->span, "invalid operand to postfix %tt (%ty)", &tk, lhs->ty); + span = lhs->span; + if (!joinspan(&span.ex, tk.span.ex)) span = tk.span; + if (lhs->ty.t == TYPTR && isincomplete(typechild(lhs->ty))) + error(&span, "arithmetic on pointer to incomplete type (%ty)", lhs->ty); + else if (lhs->ty.t == TYPTR && typechild(lhs->ty).t == TYFUNC) + error(&span, "arithmetic on function pointer", &tk, lhs->ty); + *lhs = mkexpr(tk.t == TKINC ? EPOSTINC : EPOSTDEC, span, lhs->ty, .sub = exprdup(pr, lhs)); + break; + case '[': + lex(pr, NULL); + ex = commaexpr(pr); + span = lhs->span; + if (!joinspan(&span.ex, tk.span.ex) || !joinspan(&span.ex, ex.span.ex) + || (peek(pr, &tk), !joinspan(&span.ex, tk.span.ex))) + span = tk.span; + expect(pr, ']', NULL); + + if (isint(lhs->ty) && isptrcvt(ex.ty)) { + /* swap idx[ptr] -> ptr[idx] */ + tmp = *lhs; + *lhs = ex; + ex = tmp; + } + + if (lhs->ty.t == TYPTR || lhs->ty.t == TYARRAY) { + if (isincomplete(ty = typechild(lhs->ty))) + error(&span, "cannot dereference pointer to incomplete type (%ty)", ty); + else if (ty.t == TYFUNC) + error(&span, "subscripted value is pointer to function"); + } else { + error(&lhs->span, "subscripted value is not pointer-convertible (%ty)", ex.ty); + ty = mktype(TYINT); + } + if (!isint(ex.ty)) + error(&ex.span, "array subscript is not integer (%ty)", ex.ty); + if (!iszero(ex)) { + ex.sub = exprdup2(pr, lhs, &ex); + ex.t = EADD; + ex.span = span; + ex.ty = typedecay(lhs->ty); + } + ex.sub = exprdup(pr, iszero(ex) ? lhs : &ex); + ex.span = span; + ex.t = EDEREF; + ex.ty = ty; + *lhs = ex; + break; + case '(': + span = lhs->span; + lex(pr, &tk); + if (lhs->ty.t == TYPTR) /* auto-deref when calling a function pointer */ + *lhs = mkexpr(EDEREF, lhs->span, typechild(lhs->ty), .sub = exprdup(pr, lhs)); + ty = lhs->ty; + if (ty.t != TYFUNC) error(&lhs->span, "calling a value of type '%ty'", lhs->ty); + { + const struct typedata *td = &typedata[ty.dat]; + struct expr argbuf[10]; + vec_of(struct expr) args = VINIT(argbuf, arraylength(argbuf)); + struct span spanbck = tk.span; + bool spanok = joinspan(&span.ex, tk.span.ex); + if (!match(pr, &tk, ')')) for (;;) { + ex = expr(pr); + spanok = spanok && joinspan(&span.ex, ex.span.ex); + if (ty.t == TYFUNC && args.n == td->nmemb && !td->variadic && !td->kandr) + error(&ex.span, "too many args to function taking %d params", td->nmemb); + if (ty.t == TYFUNC && args.n < td->nmemb && !td->kandr) { + if (!assigncheck(td->param[args.n], &ex)) + error(&ex.span, "passing arg of type '%ty' is incompatible with '%ty'", + ex.ty, td->param[args.n]); + } + vpush(&args, ex); + if (match(pr, &tk, ',')) { + spanok = spanok && joinspan(&span.ex, tk.span.ex); + } else if (expect(pr, ')', "or ',' after arg")) { + break; + } + } + if (!spanok || !joinspan(&span.ex, tk.span.ex)) span = spanbck; + ex = mkexpr(ECALL, span, ty.t == TYFUNC ? td->ret : ty, .narg = args.n, + .sub = alloc(&pr->exarena, (args.n+1)*sizeof(struct expr), 0)); + ex.sub[0] = *lhs; + memcpy(ex.sub+1, args.p, args.n*sizeof(struct expr)); + *lhs = ex; + vfree(&args); + } + break; + } +} + +static struct expr +primaryex(struct parser *pr) +{ + struct token tk; + struct decl *decl; + struct expr ex; + + switch (lex(pr, &tk)) { + case TKNUMLIT: + if (!tk.ty) + error(&tk.span, "invalid number literal %'tk", &tk); + ex = mkexpr(ENUMLIT, tk.span, mktype(tk.ty), .u = tk.u); + break; + case TKSTRLIT: + ++tk.s.n; + ex = mkexpr(ESTRLIT, tk.span, mkarrtype(mktype(TYCHAR), 0, tk.s.n), .s = tk.s); + break; + case TKIDENT: + decl = finddecl(pr, tk.ident); + if (!decl) { + error(&tk.span, "undeclared identifier %'tk", &tk); + ex = mkexpr(ESYM, tk.span, mktype(TYINT), .sym = NULL); + } else if (decl->scls == SCTYPEDEF) { + error(&tk.span, "unexpected typename %'tk (expected expression)", &tk); + ex = mkexpr(ESYM, tk.span, decl->t, .sym = NULL); + } else { + ex = mkexpr(ESYM, tk.span, decl->t, .qual = decl->qual, .sym = decl); + } + break; + default: + fatal(&tk.span, "expected expression (near %'tk)", &tk); + } + postfixops(pr, &ex); + return ex; +} + +static bool +castcheck(union type to, const struct expr *ex) +{ + union type src = ex->ty; + if (to.t == TYVOID) return 1; + if (isagg(to)) return 0; + if (to.bits == src.bits) return 1; + if (isarith(to) && isarith(src)) return 1; + if (isint(to) && isptrcvt(src)) return 1; + if (to.t == TYPTR && isint(src)) return 1; + if (to.t == TYPTR && isptrcvt(src)) return 1; + return 0; +} + +static struct expr +unaryex(struct parser *pr) +{ + enum exprkind ek; + struct token tk; + struct span span; + struct expr ex; + union type ty; + uint siz; + + switch (peek(pr, &tk)) { + default: return primaryex(pr); + case '+': + ek = EPLUS; + goto Alu; + case '-': + ek = ENEG; + goto Alu; + case '~': + ek = ECOMPL; + goto Alu; + case '!': + ek = ELOGNOT; + Alu: + lex(pr, NULL); + span = tk.span; + ex = unaryex(pr); + joinspan(&span.ex, ex.span.ex); + ty = ek == ELOGNOT ? mktype(TYINT) : cvtarith(ex.ty, ex.ty); + if (!ty.t || (ek == ECOMPL && !isint(ty))) { + error(&tk.span, "invalid operand to %'tk (%ty)", &tk, ex.ty); + ty = mktype(TYINT); + } + return mkexpr(ek, span, ty, .sub = exprdup(pr, &ex)); + case '*': + lex(pr, NULL); + span = tk.span; + ex = unaryex(pr); + joinspan(&span.ex, ex.span.ex); + if (ex.ty.t == TYPTR || ex.ty.t == TYARRAY) { + ty = typechild(ex.ty); + if (isincomplete(ty)) + error(&span, "cannot dereference pointer to incomplete type (%ty)", ty); + } else { + error(&span, "invalid operand to unary * (%ty)", ex.ty); + ty = ex.ty; + } + return mkexpr(EDEREF, span, ty, .qual = ex.ty.flag & TFCHLDQUAL, .sub = exprdup(pr, &ex)); + case '&': + lex(pr, NULL); + span = tk.span; + ex = unaryex(pr); + joinspan(&span.ex, ex.span.ex); + if (!islvalue(&ex)) + error(&span, "operand to unary & is not an lvalue"); + return mkexpr(EADDROF, span, mkptrtype(ex.ty, ex.qual), .sub = exprdup(pr, &ex)); + case '(': + lex(pr, NULL); + span = tk.span; + if (isdecltok(pr)) { /* (type)cast */ + struct declstate st = { DCASTEXPR }; + struct decl decl = pdecl(&st, pr); + ty = decl.t; + expect(pr, ')', NULL); + ex = unaryex(pr); + joinspan(&span.ex, ex.span.ex); + if (!castcheck(ty, &ex)) + error(&span, "cannot cast value of type '%ty' to '%ty'", ex.ty, ty); + return mkexpr(ECAST, span, ty, .sub = exprdup(pr, &ex)); + } else { + ex = commaexpr(pr); + expect(pr, ')', NULL); + return ex; + } + case TKWsizeof: + lex(pr, NULL); + span = tk.span; + if (match(pr, NULL, '(')) { + struct token tk2; + if (isdecltok(pr)) { /* sizeof(type) */ + struct declstate st = { DCASTEXPR }; + struct decl decl = pdecl(&st, pr); + ty = decl.t; + } else { /* sizeof(expr) */ + ex = commaexpr(pr); + ty = ex.ty; + } + peek(pr, &tk2); + expect(pr, ')', NULL); + joinspan(&span.ex, tk2.span.ex); + } else { /* sizeof expr */ + ex = unaryex(pr); + ty = ex.ty; + joinspan(&span.ex, ex.span.ex); + } + siz = typesize(ty); + if (isincomplete(ty)) + error(&span, "cannot apply sizeof to incomplete type (%ty)", ty); + else if (ty.t == TYFUNC) + error(&span, "cannot apply sizeof to function type (%ty)", ty); + return mkexpr(ENUMLIT, span, mktype(targ_sizetype), .u = siz); + } +} + +static void +bintypeerr(const struct span *span, enum toktag tt, union type lhs, union type rhs) +{ + error(span, "bad operands to %tt (%ty, %ty)", tt, lhs, rhs); +} + +static bool +relationalcheck(const struct expr *a, const struct expr *b) +{ + union type t1 = a->ty, t2 = b->ty; + if (isarith(t1) && isarith(t2)) return 1; + if (isptrcvt(t1) && isptrcvt(t2)) { + t1 = typedecay(t1); + t2 = typedecay(t2); + return t1.dat == t2.dat; + } + return 0; +} + +static struct expr +multiplicativeex(struct parser *pr) +{ + static struct expr (*const NEXT)(struct parser *pr) = unaryex; + struct token tk; + struct span span; + union type ty; + enum exprkind ek; + struct expr ex = NEXT(pr), rhs; + + for (;;) { + switch (peek(pr, &tk)) { + default: return ex; + case '*': ek = EMUL; break; + case '/': ek = EDIV; break; + case '%': ek = EREM; break; + } + lex(pr, &tk); + rhs = NEXT(pr); + span.sl = tk.span.sl; + span.ex = ex.span.ex; + if (!joinspan(&span.ex, tk.span.ex) || !joinspan(&span.ex, rhs.span.ex)) + span.ex = tk.span.ex; + ty = cvtarith(ex.ty, rhs.ty); + if (!ty.t || (ek == EREM && isflt(ty))) { + bintypeerr(&span, tk.t, ex.ty, rhs.ty); + ty.t = TYINT; + } + ex = mkexpr(ek, span, ty, .sub = exprdup2(pr, &ex, &rhs)); + } +} + +static struct expr +additiveex(struct parser *pr) +{ + static struct expr (*const NEXT)(struct parser *pr) = multiplicativeex; + struct token tk; + struct span span; + union type ty; + struct expr ex = NEXT(pr), rhs; + + while (match(pr, &tk, '+') || match(pr, &tk, '-')) { + rhs = NEXT(pr); + ty = ex.ty; + span.sl = tk.span.sl; + span.ex = ex.span.ex; + if (!joinspan(&span.ex, tk.span.ex) || !joinspan(&span.ex, rhs.span.ex)) + span.ex = tk.span.ex; + if (tk.t == '+' && isptrcvt(rhs.ty)) { + struct expr swaptmp = ex; + ex = rhs; + rhs = swaptmp; + ty = ex.ty; + } + if (isarith(ty) && isarith(rhs.ty)) ty = cvtarith(ty, rhs.ty); + else if (!in_range(ty.t, TYPTR, TYARRAY) || !isint(rhs.ty)) + bintypeerr(&span, tk.t, ty, rhs.ty); + else { + union type pointee = typechild(typedecay(ty)); + if (isincomplete(pointee)) + error(&span, "arithmetic on pointer to incomplete type (%ty)", ty); + else if (pointee.t == TYFUNC) + error(&span, "arithmetic on function pointer (%ty)", ex.ty); + } + ex = mkexpr(tk.t == '+' ? EADD : ESUB, span, ty, .sub = exprdup2(pr, &ex, &rhs)); + } + return ex; +} + +static struct expr +shiftex(struct parser *pr) +{ + static struct expr (*const NEXT)(struct parser *pr) = additiveex; + struct token tk; + struct span span; + union type ty; + struct expr ex = NEXT(pr), rhs; + + while (match(pr, &tk, TKSHL) || match(pr, &tk, TKSHR)) { + rhs = NEXT(pr); + ty = ex.ty; + span.sl = tk.span.sl; + span.ex = ex.span.ex; + if (!joinspan(&span.ex, tk.span.ex) || !joinspan(&span.ex, rhs.span.ex)) + span.ex = tk.span.ex; + if (!isint(ty) || !isint(rhs.ty)) + bintypeerr(&span, tk.t, ty, rhs.ty); + ty.t = intpromote(ty.t); + ex = mkexpr(tk.t == TKSHL ? ESHL : ESHR, span, ty, .sub = exprdup2(pr, &ex, &rhs)); + } + return ex; +} + +static struct expr +relationalex(struct parser *pr) +{ + static struct expr (*const NEXT)(struct parser *pr) = shiftex; + struct token tk; + struct span span; + enum exprkind ek; + struct expr ex = NEXT(pr), rhs; + for (;;) { + switch (peek(pr, &tk)) { + default: return ex; + case '<': ek = ELTH; break; + case '>': ek = EGTH; break; + case TKLTE: ek = ELTE; break; + case TKGTE: ek = EGTE; break; + } + lex(pr, &tk); + rhs = NEXT(pr); + span.sl = tk.span.sl; + span.ex = ex.span.ex; + if (!joinspan(&span.ex, tk.span.ex) || !joinspan(&span.ex, rhs.span.ex)) + span.ex = tk.span.ex; + if (!relationalcheck(&ex, &rhs)) + bintypeerr(&span, tk.t, ex.ty, rhs.ty); + ex = mkexpr(ek, span, mktype(TYINT), .sub = exprdup2(pr, &ex, &rhs)); + } +} + +static bool +isnullpo(const struct expr *ex) +{ + static const union type voidptr = {{ TYPTR, .flag = TFCHLDPRIM, .child = TYVOID }}; + if (ex->t == ECAST && ex->ty.bits == voidptr.bits) + ex = ex->sub; + return iszero(*ex); +} + +static bool +equalitycheck(const struct expr *a, const struct expr *b) +{ + union type t1 = a->ty, t2 = b->ty; + if (isarith(t1) && isarith(t2)) return 1; + if (isptrcvt(t1) && isptrcvt(t2)) { + t1 = typedecay(t1); + t2 = typedecay(t2); + return t1.dat == t2.dat || typechild(t1).t == TYVOID || typechild(t2).t == TYVOID; + } + if (isptrcvt(t1) && isnullpo(b)) return 1; + return isptrcvt(t2) && isnullpo(a); +} + +static struct expr +equalityex(struct parser *pr) +{ + static struct expr (*const NEXT)(struct parser *pr) = relationalex; + struct token tk; + struct span span; + struct expr ex = NEXT(pr), rhs; + + while (match(pr, &tk, TKEQU) || match(pr, &tk, TKNEQ)) { + rhs = NEXT(pr); + span.sl = tk.span.sl; + span.ex = ex.span.ex; + if (!joinspan(&span.ex, tk.span.ex) || !joinspan(&span.ex, rhs.span.ex)) + span.ex = tk.span.ex; + if (!equalitycheck(&ex, &rhs)) + bintypeerr(&span, tk.t, ex.ty, rhs.ty); + ex = mkexpr(tk.t == TKEQU ? EEQU : ENEQ, span, + mktype(TYINT), .sub = exprdup2(pr, &ex, &rhs)); + } + return ex; +} + +#define DEFBINEX(name, Tk, E, Next) \ + static struct expr \ + bit##name##ex(struct parser *pr) \ + { \ + static struct expr (*const NEXT)(struct parser *pr) = Next; \ + struct token tk; \ + struct span span; \ + struct expr ex = NEXT(pr), rhs; \ + while (match(pr, &tk, Tk)) { \ + rhs = NEXT(pr); \ + span.sl = tk.span.sl; \ + span.ex = ex.span.ex; \ + if (!joinspan(&span.ex, tk.span.ex) || !joinspan(&span.ex, rhs.span.ex)) \ + span.ex = tk.span.ex; \ + if (!isint(ex.ty) || !isint(rhs.ty)) \ + bintypeerr(&span, tk.t, ex.ty, rhs.ty); \ + ex = mkexpr(E, span, cvtarith(ex.ty, rhs.ty), .sub = exprdup2(pr, &ex, &rhs)); \ + } \ + return ex; \ + } +DEFBINEX(and, '&', EBAND, equalityex) +DEFBINEX(xor, '^', EXOR, bitandex) +DEFBINEX(ior, '|', EBIOR, bitxorex) +#undef DEFBINEX + +#define DEFLOGEX(name, Tk, E, Next) \ + static struct expr \ + log##name##ex(struct parser *pr) \ + { \ + static struct expr (*const NEXT)(struct parser *pr) = Next; \ + struct token tk; \ + struct span span; \ + struct expr ex = NEXT(pr), rhs; \ + while (match(pr, &tk, Tk)) { \ + rhs = NEXT(pr); \ + span.sl = tk.span.sl; \ + span.ex = ex.span.ex; \ + if (!joinspan(&span.ex, tk.span.ex) || !joinspan(&span.ex, rhs.span.ex)) \ + span.ex = tk.span.ex; \ + if (!isscalar(ex.ty) || !isscalar(rhs.ty)) \ + bintypeerr(&span, tk.t, ex.ty, rhs.ty); \ + ex = mkexpr(E, span, mktype(TYINT), .sub = exprdup2(pr, &ex, &rhs)); \ + } \ + return ex; \ + } +DEFLOGEX(and, TKLOGAND, ELOGAND, bitiorex) +DEFLOGEX(ior, TKLOGIOR, ELOGIOR, logandex) +#undef DEFLOGEX + +static union type /* 6.5.15 Conditional operator Constraints */ +condtype(const struct expr *a, const struct expr *b) +{ + union type t1 = typedecay(a->ty), t2 = typedecay(b->ty), s1, s2; + if (isarith(t1) && isarith(t2)) return cvtarith(t1, t2); + if (t1.bits == t2.bits) return t1; + if (t1.t == TYPTR && t2.t == TYPTR) { + s1 = typechild(t1); + s2 = typechild(t2); + if (s1.bits == s2.bits || s2.t == TYVOID || s1.t == TYVOID) { + return mkptrtype(s1.t == TYVOID ? s1 : s2, (t1.flag | t2.flag) & TFCHLDQUAL); + } + } + if (t1.t == TYPTR && isnullpo(b)) return t1; + if (isnullpo(a) && t2.t == TYPTR) return t2; + return mktype(0); +} + +static struct expr +condex(struct parser *pr) +{ + static struct expr (*const NEXT)(struct parser *pr) = logiorex; + struct token tk; + struct span span; + union type ty; + struct expr ex = NEXT(pr), tr, fl, *sub; + + if (match(pr, &tk, '?')) { + if (!isscalar(ex.ty)) + error(&ex.span, "?: condition is not a scalar type (%ty)", ex.ty); + span.sl = tk.span.sl; + span.ex = ex.span.ex; + tr = condex(pr); + joinspan(&tk.span.ex, tr.span.ex); + expect(pr, ':', NULL); + fl = condex(pr); + if (!joinspan(&span.ex, tk.span.ex) || !joinspan(&span.ex, tr.span.ex) + || !joinspan(&span.ex, fl.span.ex)) + span.ex = tk.span.ex; + ty = condtype(&tr, &fl); + if (!ty.t) { + error(&span, "bad operands to conditional expression (%ty, %ty)", tr.ty, fl.ty); + ty = tr.ty; + } + sub = alloc(&pr->exarena, 3*sizeof*sub, 0); + sub[0] = ex, sub[1] = tr, sub[2] = fl; + ex = mkexpr(ECOND, span, ty, .sub = sub); + } + return ex; +} + +static struct expr +assignex(struct parser *pr) +{ + static struct expr (*const NEXT)(struct parser *pr) = condex; + struct token tk; + struct span span; + union type ty, res, pointee; + enum { KANY, KADDITIVE, KARITH, KSHFT, KBIT } k; + enum exprkind ek; + struct expr ex = NEXT(pr), rhs; + + switch (peek(pr, &tk)) { + default: return ex; +#define OP(Tk, E, K) case Tk: ek = E, k = K; break; + OP('=', ESET, KANY) + OP(TKSETADD, ESETADD, KADDITIVE) + OP(TKSETSUB, ESETSUB, KADDITIVE) + OP(TKSETMUL, ESETMUL, KARITH) + OP(TKSETDIV, ESETDIV, KARITH) + OP(TKSETMOD, ESETREM, KARITH) + OP(TKSETSHL, ESETSHL, KSHFT) + OP(TKSETSHR, ESETSHR, KSHFT) + OP(TKSETAND, ESETAND, KBIT) + OP(TKSETIOR, ESETIOR, KBIT) + OP(TKSETXOR, ESETXOR, KBIT) +#undef OP + } + lex(pr, &tk); + rhs = assignex(pr); + ty = ex.ty; + span.sl = tk.span.sl; + span.ex = ex.span.ex; + if (!joinspan(&span.ex, tk.span.ex) || !joinspan(&span.ex, rhs.span.ex)) + span.ex = tk.span.ex; + if (!islvalue(&ex)) { + error(&ex.span, "left-hand-side of assignment is not an lvalue"); + return ex; + } + if (ty.t == TYARRAY) + error(&ex.span, "cannot assign to array designator (%ty)", ex.ty); + else if (ty.t == TYFUNC) + error(&ex.span, "cannot assign to function designator (%ty)", ex.ty); + else if (isincomplete(ty)) + error(&ex.span, "cannot assign to incomplete type (%ty)", ex.ty); + else if (ex.qual & QCONST) + error(&ex.span, "cannot assign to const-qualified lvalue (%tq)", ex.ty, ex.qual); + switch (k) { + case KANY: + if (!assigncheck(ty, &rhs)) + bintypeerr(&tk.span, tk.t, ty, rhs.ty); + break; + case KADDITIVE: + if (ty.t == TYPTR) pointee = typechild(ty); + if (ty.t == TYPTR && !isincomplete(pointee) && pointee.t != TYFUNC && isint(rhs.ty)) + break; + /* fallthru */ + case KARITH: + res = cvtarith(ty, rhs.ty); + if (!res.t) + bintypeerr(&tk.span, tk.t, ty, rhs.ty); + else ty = res; + break; + case KSHFT: + if (!isint(ty) || !isint(rhs.ty)) + bintypeerr(&tk.span, tk.t, ty, rhs.ty); + ty.t = intpromote(ty.t); + break; + case KBIT: + if (!isint(ty) || !isint(rhs.ty)) + bintypeerr(&tk.span, tk.t, ty, rhs.ty); + ty = cvtarith(ty, rhs.ty); + assert(ty.t); + break; + } + return mkexpr(ek, span, ex.ty, .sub = exprdup2(pr, &ex, &rhs)); +} + +static struct expr +expr(struct parser *pr) +{ + return assignex(pr); +} + +static struct expr +commaexpr(struct parser *pr) +{ + static struct expr (*const NEXT)(struct parser *pr) = assignex; + struct span span; + struct expr ex = NEXT(pr), rhs; + struct token tk; + + while (match(pr, &tk, ',')) { + span.sl = tk.span.sl; + span.ex = ex.span.ex; + rhs = NEXT(pr); + if (!joinspan(&span.ex, tk.span.ex) || !joinspan(&span.ex, rhs.span.ex)) + span.ex = tk.span.ex; + ex = mkexpr(ESEQ, span, rhs.ty, .sub = exprdup2(pr, &ex, &rhs)); + } + return ex; +} + +/*********/ +/* -> IR */ +/*********/ + +static union irref exprvalue(struct function *, const struct expr *); + +static union irref +expraddr(struct function *fn, const struct expr *ex) +{ + struct decl *decl; + + switch (ex->t) { + case ESYM: + decl = ex->sym; + assert(decl != NULL); + switch (decl->scls) { + case SCAUTO: case SCREGISTER: + return mkref(RTMP, decl->id); + case SCEXTERN: case SCNONE: + return mksymref(fn, decl->name); + case SCSTATIC: + assert(!"nyi"); + break; + default: + assert(0); + } + break; + case EDEREF: + return exprvalue(fn, ex->sub); + default: + assert(!"lvalue?>"); + } + +} + +static union irref +genload(struct function *fn, union type t, union irref ref) +{ + struct instr ins = {0}; + + assert(isscalar(t)); + ins.cls = type2cls[t.t]; + switch (typesize(t)) { + case 1: ins.op = issigned(t) ? Oloads1 : Oloadu1; break; + case 2: ins.op = issigned(t) ? Oloads2 : Oloadu2; break; + case 4: ins.op = isflt(t) ? Oloadf4 : issigned(t) ? Oloads4 : Oloadu4; break; + case 8: ins.op = isflt(t) ? Oloadf8 : Oloadi8; break; + default: assert(0); + } + ins.l = ref; + return addinstr(fn, ins); +} + +static union irref +genstore(struct function *fn, union type t, union irref ptr, union irref val) +{ + struct instr ins = {0}; + + assert(isscalar(t)); + switch (typesize(t)) { + case 1: ins.op = Ostore1; break; + case 2: ins.op = Ostore2; break; + case 4: ins.op = Ostore4; break; + case 8: ins.op = Ostore8; break; + default: assert(0); + } + ins.l = ptr; + ins.r = val; + return addinstr(fn, ins); +} + +static union irref +cvt(struct function *fn, enum typetag to, enum typetag from, union irref ref) +{ + enum irclass kto = type2cls[to], kfrom = type2cls[from]; + struct instr ins = {0}; + if (kto == kfrom) return ref; + if (ref.t == RICON && kto < KF4) return ref; + + ins.cls = kto; + ins.l = ref; + if (kisflt(kto) || kisflt(kfrom)) { + if (kto == KIP) kto = siz2intcls[cls2siz[kto]]; + if (kfrom == KIP) kfrom = siz2intcls[cls2siz[kfrom]]; + if (kisflt(kto) && kfrom == KI4) ins.op = issignedt(from) ? Ocvts4f : Ocvtu4f; + else if (kisflt(kto) && kfrom == KI8) ins.op = issignedt(from) ? Ocvts8f : Ocvtu8f; + else if (kto == KF8 && kfrom == KF4) ins.op = Ocvtf4f8; + else if (kto == KF4 && kfrom == KF8) ins.op = Ocvtf8f4; + else if (kfrom == KF4) ins.op = issignedt(to) ? Ocvtf4s : Ocvtf4u; + else if (kfrom == KF8) ins.op = issignedt(to) ? Ocvtf8s : Ocvtf8u; + else assert(0); + } else { + if (kfrom == KI4 && issignedt(from)) ins.op = Oexts4; + else if (kfrom == KI4) ins.op = Oextu4; + else ins.op = Omov; + } + return addinstr(fn, ins); +} + +static union irref +narrow(struct function *fn, enum irclass to, enum typetag tt, union irref ref) +{ + struct instr ins; + assert(isintt(tt)); + if (targ_primsizes[tt] >= cls2siz[to]) return ref; + ins.cls = to; + switch (targ_primsizes[tt]) { + case 1: ins.op = issignedt(tt) ? Oexts1 : Oextu1; break; + case 2: ins.op = issignedt(tt) ? Oexts2 : Oextu2; break; + case 4: ins.op = issignedt(tt) ? Oexts4 : Oextu4; break; + default: assert(0); + } + ins.l = ref; + return addinstr(fn, ins); +} + +static union irref +exprvalue(struct function *fn, const struct expr *ex) +{ + union type ty; + union irref r, q; + enum irclass cls = type2cls[ex->ty.t]; + struct instr ins = {0}; + int swp = 0; + struct expr *sub = ex->sub; + + if (ex->ty.t == TYARRAY) return expraddr(fn, ex); + switch (ex->t) { + case ENUMLIT: + if (isflt(ex->ty)) + return mkfltcon(fn, cls, ex->f); + return mkintcon(fn, cls, ex->i); + case ESYM: + return genload(fn, ex->ty, expraddr(fn, ex)); + case ECAST: + if (ex->ty.t == TYVOID) return exprvalue(fn, sub); + case EPLUS: + return cvt(fn, ex->ty.t, sub->ty.t, exprvalue(fn, sub)); + case ENEG: + ins.op = Oneg; + goto Unary; + case ECOMPL: + ins.op = Onot; + Unary: + ins.l = exprvalue(fn, sub); + ins.l = cvt(fn, ex->ty.t, sub->ty.t, ins.l); + ins.cls = cls; + return addinstr(fn, ins); + case EDEREF: + return genload(fn, ex->ty, exprvalue(fn, sub)); + case EADDROF: + return expraddr(fn, sub); + case EMUL: + ins.op = isunsigned(ex->ty) ? Oumul : Omul; + goto BinArith; + case EDIV: + ins.op = isunsigned(ex->ty) ? Oudiv : Odiv; + goto BinArith; + case EREM: + ins.op = issigned(ex->ty) ? Orem : Ourem; + goto BinArith; + case EBAND: + ins.op = Oand; + goto BinArith; + case EXOR: + ins.op = Oxor; + goto BinArith; + case EBIOR: + ins.op = Oior; + goto BinArith; + case ESHL: + ins.op = Oshl; + goto BinArith; + case ESHR: + ins.op = issigned(ex->ty) ? Osar : Oshr; + goto BinArith; + case ESUB: + ins.op = Osub; + goto BinArith; + case EADD: + ins.op = Oadd; + BinArith: + ins.l = exprvalue(fn, &sub[0]); + ins.l = cvt(fn, ex->ty.t, sub[0].ty.t, ins.l); + ins.r = exprvalue(fn, &sub[1]); + if ((ins.op != Oadd && ins.op != Osub) || cls != KIP) + ins.r = cvt(fn, ex->ty.t, sub[1].ty.t, ins.r); + else { + uint siz = typesize(typechild(ex->ty)); + enum typetag tt = intpromote(sub[1].ty.t); + struct instr scale = { Omul, type2cls[tt], ins.r }; + scale.r = mkintcon(fn, type2cls[tt], siz); + ins.r = addinstr(fn, scale); + } + ins.cls = cls; + return addinstr(fn, ins); + case EPOSTINC: + case EPOSTDEC: + ins.op = ex->t == EPOSTINC ? Oadd : Osub; + ins.cls = cls; + r = expraddr(fn, sub); + ins.l = genload(fn, sub->ty, r); + if (ex->ty.t == TYPTR) + ins.r = mkintcon(fn, KI4, typesize(typechild(ex->ty))); + else + ins.r = mkref(RICON, 1); + genstore(fn, sub->ty, r, addinstr(fn, ins)); + return ins.l; + case EEQU: + ins.op = Oequ; + goto Cmp; + case ENEQ: + ins.op = Oneq; + goto Cmp; + case ELTH: + ins.op = Olth; + goto Cmp; + case ELTE: + ins.op = Olte; + goto Cmp; + case EGTH: + ins.op = Olth; + swp = 1; + goto Cmp; + case EGTE: + ins.op = Olte; + swp = 1; + Cmp: + ty = cvtarith(sub[0].ty, sub[1].ty); + if (isunsigned(ty) && in_range(ins.op, Olth, Olte)) + ins.op += Oulth - Olth; + ins.l = exprvalue(fn, &sub[0^swp]); + ins.l = cvt(fn, ex->ty.t, ty.t, ins.l); + ins.r = exprvalue(fn, &sub[1^swp]); + ins.r = cvt(fn, ex->ty.t, ty.t, ins.r); + ins.cls = cls; + return addinstr(fn, ins); + break; + case ESET: + assert(isscalar(ex->ty)); + return genstore(fn, ex->ty, expraddr(fn, &sub[0]), exprvalue(fn, &sub[1])); + case ESETMUL: + ins.op = isunsigned(ex->ty) ? Oumul : Omul; + goto Compound; + case ESETDIV: + ins.op = isunsigned(ex->ty) ? Oudiv : Odiv; + goto Compound; + case ESETREM: + ins.op = issigned(ex->ty) ? Orem : Ourem; + goto Compound; + case ESETAND: + ins.op = Oand; + goto Compound; + case ESETXOR: + ins.op = Oxor; + goto Compound; + case ESETIOR: + ins.op = Oior; + goto Compound; + case ESETSHL: + ins.op = Oshl; + goto Compound; + case ESETSHR: + ins.op = issigned(ex->ty) ? Osar : Oshr; + goto Compound; + case ESETSUB: + ins.op = Osub; + goto Compound; + case ESETADD: + ins.op = Oadd; + Compound: + r = expraddr(fn, &sub[0]); + ty = in_range(ex->t, ESETSHL, ESETSHR) ? mktype(intpromote(ex->ty.t)) + : cvtarith(sub[0].ty, sub[1].ty); + ins.cls = cls; + ins.l = cvt(fn, ty.t, sub[0].ty.t, genload(fn, ex->ty, r)); + ins.r = exprvalue(fn, &sub[1]); + if ((ins.op != Oadd && ins.op != Osub) || cls != KIP) + ins.r = cvt(fn, ex->ty.t, sub[1].ty.t, ins.r); + else { + uint siz = typesize(typechild(ex->ty)); + enum typetag tt = intpromote(sub[1].ty.t); + struct instr scale = { Omul, type2cls[tt], ins.r }; + scale.r = mkintcon(fn, type2cls[tt], siz); + ins.r = addinstr(fn, scale); + } + q = addinstr(fn, ins); + genstore(fn, ex->ty, r, q); + return narrow(fn, cls, ex->ty.t, q); + case ECALL: + { + const struct typedata *td = &typedata[sub[0].ty.dat]; + union irref argsbuf[10]; + union irtype typbuf[10]; + vec_of(union irref) args = VINIT(argsbuf, arraylength(argsbuf)); + vec_of(union irtype) typs = VINIT(typbuf, arraylength(typbuf)); + ins.op = Ocall; + assert(isscalar(ex->ty) || ex->ty.t == TYVOID); + assert(sub[0].ty.t == TYFUNC); + ins.cls = type2cls[ex->ty.t]; + ins.l = expraddr(fn, &sub[0]); + for (int i = 0; i < ex->narg; ++i) { + struct expr *arg = &sub[i+1]; + union type ty = i < td->nmemb ? td->param[i] : argpromote(arg->ty); + vpush(&args, cvt(fn, ty.t, arg->ty.t, exprvalue(fn, arg))); + vpush(&typs, mkirtype(ty)); + } + ins.r = mkcall(fn, sub[0].ty, ex->narg, args.p, typs.p); + vfree(&args); + vfree(&typs); + return addinstr(fn, ins); + } + case ESEQ: + (void)exprvalue(fn, &sub[0]); + return exprvalue(fn, &sub[1]); + default: assert(!"nyi expr"); + } +} + +static void +stmtterm(struct parser *pr) +{ + expect(pr, ';', "to terminate previous statement"); +} + +static void block(struct parser *pr, struct function *fn); + +static bool /* return 1 if stmt is terminating (all codepaths return) */ +stmt(struct parser *pr, struct function *fn) +{ + struct block *t, *f, *end, *begin; + struct expr ex; + union irref r; + bool terminates = 0; + const bool doemit = fn->curblk; + +#define EMITS if (doemit && !nerror) + + switch (peek(pr, NULL)) { + case '{': + lex(pr, NULL); + envdown(pr); + block(pr, fn); + envup(pr); + break; + case ';': + lex(pr, NULL); + break; + case TKWif: + lex(pr, NULL); + expect(pr, '(', NULL); + ex = commaexpr(pr); + expect(pr, ')', NULL); + if (!isscalar(ex.ty)) + error(&ex.span, "'if' condition is not a scalar (%ty)", ex.ty); + t = f = end = NULL; + EMITS { + t = newblk(fn); + f = newblk(fn); + r = exprvalue(fn, &ex); + if (!isint(ex.ty)) + r = cvt(fn, TYINT, ex.ty.t, r); + EMITS { + putjump(fn, Jbcnd, r, t, f); + useblk(fn, t); + } + } + terminates = stmt(pr, fn); + if (!match(pr, NULL, TKWelse)) { + EMITS putjump(fn, Jb, NOREF, f, NULL); + end = f; + terminates = 0; + } else { + EMITS { + if (!terminates) putjump(fn, Jb, NOREF, end = newblk(fn), NULL); + useblk(fn, f); + } + terminates &= stmt(pr, fn); + EMITS { + if (fn->curblk) putjump(fn, Jb, NOREF, end, NULL); + } + } + EMITS if (!terminates) useblk(fn, end); + break; + case TKWwhile: + lex(pr, NULL); + expect(pr, '(', NULL); + ex = commaexpr(pr); + expect(pr, ')', NULL); + if (!isscalar(ex.ty)) + error(&ex.span, "'while' condition is not a scalar (%ty)", ex.ty); + t = begin = end = NULL; + EMITS { + begin = newblk(fn); + putjump(fn, Jb, NOREF, begin, NULL); + useblk(fn, begin); + r = exprvalue(fn, &ex); + if (!isint(ex.ty)) + r = cvt(fn, TYINT, ex.ty.t, r); + EMITS { + putjump(fn, Jbcnd, r, t = newblk(fn), end = newblk(fn)); + useblk(fn, t); + } + } + terminates = stmt(pr, fn); + EMITS { + if (!terminates) putjump(fn, Jb, NOREF, begin, NULL); + useblk(fn, end); + } + break; + case TKWreturn: + lex(pr, NULL); + if (fn->retty.t != TYVOID) { + ex = commaexpr(pr); + if (!assigncheck(fn->retty, &ex)) { + error(&ex.span, + "cannot return '%ty' value from function with return type '%ty'", + ex.ty, fn->retty); + } + EMITS { + r = cvt(fn, fn->retty.t, ex.ty.t, exprvalue(fn, &ex)); + putjump(fn, Jrets, r, NULL, NULL); + } + } else { + EMITS putjump(fn, Jret, NOREF, NULL, NULL); + } + stmtterm(pr); + break; + default: + ex = commaexpr(pr); + stmtterm(pr); + EMITS exprvalue(fn, &ex); + break; + } + freearena(pr->exarena); + return fn->curblk == NULL; +} + +static void +block(struct parser *pr, struct function *fn) +{ + struct token tk; + const bool doemit = fn->curblk; + + while (!match(pr, &tk, '}')) { + if (isdecltok(pr)) { /* decl */ + struct expr ini; + struct declstate st = { DFUNCVAR }; + do { + struct decl decl = pdecl(&st, pr); + enum op op; + uint siz, align, nalloc; + if (decl.name) { + static int staticid; + bool put = 0; + switch (decl.scls) { + case SCSTATIC: + decl.id = ++staticid; + break; + case SCNONE: + decl.scls = SCAUTO; + case SCAUTO: + case SCREGISTER: + switch (align = typealign(decl.t)) { + case 1: op = Oalloca1; break; + case 2: op = Oalloca2; break; + case 4: op = Oalloca4; break; + case 8: op = Oalloca8; break; + case 16: op = Oalloca16; break; + default: assert(!"align"); + } + siz = typesize(decl.t); + nalloc = siz/align + ((siz&(align-1)) != 0); + EMITS { + decl.id = addinstr(fn, + (struct instr) { op, KIP, mkintcon(fn, KI4, nalloc) }).idx; + } + if (st.varini) { + putdecl(pr, &decl); + put = 1; + ini = expr(pr); + pdecl(&st, pr); + if (!assigncheck(decl.t, &ini)) + error(&ini.span, "cannot initialize %ty with %ty", decl.t, ini.ty); + EMITS genstore(fn, decl.t, mkref(RTMP, decl.id), exprvalue(fn, &ini)); + } + break; + case SCTYPEDEF: break; + default: assert(0); + } + if (!put) putdecl(pr, &decl); + } + } while (0); + } else { + stmt(pr, fn); + } + } + pr->fnblkspan = tk.span; +} + +static void +function(struct parser *pr, struct function *fn, const char **pnames) +{ + const struct typedata *td = &typedata[fn->fnty.dat]; + const bool doemit = fn->curblk; + envdown(pr); + for (int i = 0; i < td->nmemb; ++i) { + if (pnames[i]) { + uint siz, align, nalloc; + struct decl arg = { .t = td->param[i], .qual = tdgetqual(td->quals, i), + .name = pnames[i], .scls = SCAUTO }; + enum op op; + switch (align = typealign(arg.t)) { + case 1: op = Oalloca1; break; + case 2: op = Oalloca2; break; + case 4: op = Oalloca4; break; + case 8: op = Oalloca8; break; + case 16: op = Oalloca16; break; + default: assert(!"align"); + } + siz = typesize(arg.t); + nalloc = siz/align + ((siz&(align-1)) != 0); + EMITS { + struct instr alloca = { op, KIP, mkintcon(fn, KI4, nalloc) }; + arg.id = addinstr(fn, alloca).idx; + genstore(fn, arg.t, mkref(RTMP, arg.id), mkref(RARG, i)); + } + putdecl(pr, &arg); + } /*else { + warn(NULL, "missing name of parameter #%d", i+1); + }*/ + } + block(pr, fn); + envup(pr); + if (fn->curblk) { + if (fn->retty.t != TYVOID && !nerror) { + warn(&pr->fnblkspan, "non-void function may not return a value"); + } + putjump(fn, Jret, NOREF, NULL, NULL); + } +} + +/********/ +/* DECL */ +/********/ + +static union type +buildagg(struct parser *pr, enum typetag tt, const char *name, int id) +{ + struct token tk; + union type t; + struct span flexspan; + struct field fbuf[32]; + uchar qbuf[arraylength(fbuf)/4]; + vec_of(struct field) fld = VINIT(fbuf, arraylength(fbuf)); + vec_of(uchar) qual = VINIT(qbuf, arraylength(qbuf)); + struct typedata td = {tt}; + bool isunion = tt == TYUNION; + const char *tag = isunion ? "union" : "struct"; + + while (!match(pr, &tk, '}')) { + struct declstate st = { DFIELD }; + do { + struct decl decl = pdecl(&st, pr); + if (fld.n && td.flexi) { + td.flexi = 0; + error(&flexspan, "flexible array member is not at end of struct"); + } + if (!isunion && decl.t.t == TYARRAY && !typearrlen(decl.t)) { + td.flexi = 1; + flexspan = decl.span; + } else if (isincomplete(decl.t)) { + error(&decl.span, "field has incomplete type (%ty)", decl.t); + } else if (decl.t.t == TYFUNC) { + error(&decl.span, "field has function type (%ty)", decl.t); + } + if (decl.t.t) { + uint align = typealign(decl.t); + uint siz = typesize(decl.t); + uint off = isunion ? 0 : alignup(td.siz, align); + struct field f = { decl.name, decl.t, off }; + vpush(&fld, f); + if (decl.qual) { + td.anyconst |= decl.qual & QCONST; + while (qual.n < tdqualsiz(fld.n)) vpush(&qual, 0); + tdsetqual(qual.p, fld.n-1, decl.qual); + } + if (isunion) + td.siz = td.siz < siz ? siz : td.siz; + else + td.siz = off + siz; + td.align = td.align < align ? align : td.align; + } + } while (st.more); + } + if (fld.n == 0) { + struct field dummy = { "", mktype(TYCHAR), 0 }; + error(&tk.span, "%s cannot have zero members", tag); + vpush(&fld, dummy); + td.siz = td.align = 1; + } + td.siz = alignup(td.siz, td.align); + if (qual.p) while (qual.n < tdqualsiz(fld.n)) vpush(&qual, 0); + td.quals = qual.p; + td.fld = fld.p; + td.nmemb = fld.n; + t = completetype(name, id, &td); + vfree(&fld); + vfree(&qual); + return t; +} + +static union type +buildenum(struct parser *pr, const char *name) +{ + union type t; + struct typedata td = {TYENUM}; + enum typetag backing = TYINT; + + t = mktagtype(name, &td); + t.backing = backing; + return t; +} + +static union type +tagtype(struct parser *pr, enum toktag kind) +{ + struct token tk; + union type t; + struct span span; + enum typetag tt = kind == TKWenum ? TYENUM : kind == TKWstruct ? TYSTRUCT : TYUNION; + const char *tag = NULL; + + if (match(pr, &tk, TKIDENT)) + tag = tk.ident; + span = tk.span; + if (!match(pr, NULL, '{')) { + if (!tag) { + error(&tk.span, "expected %tt name or '{'", kind); + return mktype(0); + } + t = gettagged(pr, &span, tt, tag, /* def? */ peek(pr, NULL) == ';'); + } else { + if (tt != TYENUM) { + t = deftagged(pr, &span, tt, tag); + if (t.t != tt || !isincomplete(t)) { + if (t.t != tt) + error(&tk.span, + "defining tagged type %'tk as %tt clashes with previous definition", + &tk, kind); + else + error(&tk.span, "redefinition of '%tt %s'", kind, tag); + note(&span, "previous definition:"); + } + t = buildagg(pr, tt, tag, typedata[t.dat].id); + } else { + t = buildenum(pr, tag); + } + } + + if (t.t != tt) { + error(&tk.span, "declaring tagged type %'tk as %tt clashes with previous definition", + &tk, kind); + note(&span, "previous definition:"); + } + return t; +} + +static void +declspec(struct declstate *st, struct parser *pr) +{ + struct token tk; + struct decl *decl; + enum arith { + KSIGNED = 1<<0, + KUNSIGNED = 1<<1, + KBOOL = 1<<2, + KCHAR = 1<<3, + KSHORT = 1<<4, + KLONG = 1<<5, + KLONGLONG = 1<<6, + KINT = 1<<7, + KFLOAT = 1<<8, + KDOUBLE = 1<<9, + } arith = 0; + struct span span = {0}; + + for (;;) { + peek(pr, &tk); + switch (tk.t) { + case TKWconst: + st->qual |= QCONST; + break; + case TKWvolatile: + st->qual |= QVOLATILE; + break; + case TKW_Noreturn: + st->qual |= QNORETURN; + break; + case TKWinline: + st->qual |= QINLINE; + break; + case TKWvoid: + st->base = mktype(TYVOID); + break; + case TKWsigned: + arith |= KSIGNED; + break; + case TKWunsigned: + arith |= KUNSIGNED; + break; + case TKW_Bool: + case TKWbool: + if (arith & KBOOL) goto Dup; + arith |= KBOOL; + case TKWchar: + if (arith & KCHAR) { + Dup: + error(&tk.span, "duplicate %tk specifier", &tk); + } + arith |= KCHAR; + break; + case TKWshort: + arith |= KSHORT; + break; + case TKWlong: + if ((arith & (KLONG | KLONGLONG)) == KLONG) + arith = (arith &~ KLONG) | KLONGLONG; + else if ((arith & (KLONG | KLONGLONG)) == 0) + arith |= KLONG; + else + error(&tk.span, "too long"); + break; + case TKWint: + if (arith & KINT) goto Dup; + arith |= KINT; + break; + case TKWfloat: + if (arith & KFLOAT) goto Dup; + arith |= KFLOAT; + break; + case TKWdouble: + if (arith & KDOUBLE) goto Dup; + arith |= KDOUBLE; + break; + case TKWenum: + case TKWstruct: + case TKWunion: + lex(pr, &tk); + st->base = tagtype(pr, tk.t); + st->tagdecl = 1; + if (!span.ex.len) span.ex = tk.span.ex; + joinspan(&span.ex, tk.span.ex); + goto End; + case TKIDENT: + if (!st->base.t && !arith && (decl = finddecl(pr, tk.ident)) + && decl->scls == SCTYPEDEF) { + st->base = decl->t; + break; + } + /* fallthru */ + default: + if (!span.ex.len) span.ex = tk.span.ex; + goto End; + } + if (!span.ex.len) span.ex = tk.span.ex; + joinspan(&span.ex, tk.span.ex); + lex(pr, &tk); + if (st->base.t) break; + } +End: + if (st->base.t && arith) { + /* combining arith type specifiers and other types */ + Bad: + error(&span, "invalid declaration specifier"); + } else if (!st->base.t && arith) { + enum typetag t; + ioflush(&bstderr); + if (arith == KFLOAT) + t = TYFLOAT; + else if (arith == KDOUBLE) + t = TYDOUBLE; + else if (arith == (KLONG | KDOUBLE)) + t = TYLDOUBLE; + else if (arith == KBOOL) + t = TYBOOL; + else if (arith == KCHAR) + t = TYCHAR; + else if (arith == (KSIGNED | KCHAR)) + t = TYSCHAR; + else if (arith == (KUNSIGNED | KCHAR)) + t = TYUCHAR; + else if ((arith & ~KINT & ~KSIGNED) == KSHORT) + t = TYSHORT; + else if ((arith & ~KINT) == (KUNSIGNED | KSHORT)) + t = TYUSHORT; + else if ((arith & ~KINT & ~KSIGNED) == 0) + t = TYINT; + else if ((arith & ~KINT) == KUNSIGNED) + t = TYUINT; + else if ((arith & ~KINT & ~KSIGNED) == KLONG) + t = TYLONG; + else if ((arith & ~KINT) == (KUNSIGNED | KLONG)) + t = TYULONG; + else if ((arith & ~KINT & ~KSIGNED) == KLONGLONG) + t = TYVLONG; + else if ((arith & ~KINT) == (KUNSIGNED | KLONGLONG)) + t = TYUVLONG; + else + goto Bad; + st->base = mktype(t); + } else if (!st->base.t && ccopt.cstd < STDC99) { + warn(&span, "type implicitly declared as int"); + st->base = mktype(TYINT); + } else if (!st->base.t) + fatal(&span, "expected declaration type specifier"); +} + +/* circular doubly linked list used to parse declarators */ +static struct decllist { + struct decllist *prev, *next; + uchar t; /* TYPTR, TYARRAY or TYFUNC */ + union { + uchar qual; /* TYPTR */ + uint len; /* TYARRAY */ + struct { /* TYFUNC */ + union type *param; + const char **pnames; + uchar *pqual; + short npar; + bool kandr : 1, variadic : 1; + }; + }; + struct span span; +} decltmp[64], *declfreelist; +static union type declparamtmp[16]; +static const char *declpnamestmp[16]; +static uchar declpqualtmp[tdqualsiz(16)]; + +static void +declinsert(struct decllist *list, const struct decllist *node) +{ + struct decllist *pnode = declfreelist; + if (!pnode) fatal(NULL, "too many nested declarators"); + declfreelist = declfreelist->next; + *pnode = *node; + pnode->next = list->next; + pnode->prev = list; + list->next->prev = pnode; + list->next = pnode; +} + +static int +sclass(struct parser *pr, struct span *span) +{ + struct token tk; + int sc = 0, first = 1; + for (;; lex(pr, &tk)) { + switch (peek(pr, &tk)) { + case TKWtypedef: sc |= SCTYPEDEF; break; + case TKWextern: sc |= SCEXTERN; break; + case TKWstatic: sc |= SCSTATIC; break; + case TKWauto: sc |= SCAUTO; break; + case TKWregister: sc |= SCREGISTER; break; + case TKWthread_local: + case TKW_Thread_local: + sc |= SCTHREADLOCAL; break; + default: return sc; + } + if (first) *span = tk.span; + else joinspan(&span->ex, tk.span.ex); + first = 0; + } +} + +static int +cvqual(struct parser *pr) +{ + struct token tk; + int q = 0; + while (match(pr, &tk, TKWconst) || match(pr, &tk, TKWvolatile)) + q |= tk.t == TKWconst ? QCONST : QVOLATILE; + return q; +} + +static void +decltypes(struct parser *pr, struct decllist *list, const char **name, struct span *span) { + struct token tk; + struct decllist *ptr, node; + + while (match(pr, &tk, '*')) { + node.t = TYPTR; + node.qual = cvqual(pr); + node.span = tk.span; + declinsert(list, &node); + } + ptr = list->next; + switch (peek(pr, &tk)) { + case '(': + lex(pr, &tk); + if (isdecltok(pr)) { + goto Func; + } else if (match(pr, &tk, ')')) { + /* T () is K&R func proto */ + node.span = tk.span; + node.t = TYFUNC; + node.param = NULL; + node.pqual = NULL; + node.pnames = NULL; + node.variadic = 0; + node.kandr = 1; + node.npar = 0; + declinsert(ptr->prev, &node); + break; + } else { + decltypes(pr, list, name, span); + expect(pr, ')', NULL); + } + break; + case TKIDENT: + if (!name) + error(&tk.span, "unexpected identifier in type name"); + else { + *name = tk.ident; + *span = tk.span; + } + lex(pr, &tk); + break; + default: + *span = tk.span; + if (name) + *name = NULL; + } + for (;;) { + if (match(pr, &tk, '[')) { + node.span = tk.span; + uint n = 0; + if (!match(pr, &tk, ']')) { + struct expr ex = expr(pr); + if (!eval(&ex, EVINTCONST)) { + error(&ex.span, "array length is not an integer constant"); + } else if (typesize(ex.ty) < 8 && ex.i < 0) { + error(&ex.span, "array length is negative"); + } else if (ex.u > (1ull << (8*sizeof n)) - 1) { + error(&ex.span, "array too long (%ul)", ex.u); + } else if (ex.u == 0) { + error(&ex.span, "array cannot have zero length"); + } else { + n = ex.u; + } + peek(pr, &tk); + joinspan(&node.span.ex, tk.span.ex); + expect(pr, ']', NULL); + } + node.t = TYARRAY; + node.len = n; + declinsert(ptr->prev, &node); + } else if (match(pr, &tk, '(')) Func: { + static int depth = 0; + vec_of(union type) params = {0}; + vec_of(uchar) qual = {0}; + vec_of(const char *) names = {0}; + bool anyqual = 0; + + if (depth++ == 0) { + vinit(¶ms, declparamtmp, arraylength(declparamtmp)); + vinit(&qual, declpqualtmp, arraylength(declpqualtmp)); + vinit(&names, declpnamestmp, arraylength(declpnamestmp)); + } + node.span = tk.span; + node.kandr = 0; + node.variadic = 0; + + while (!match(pr, &tk, ')')) { + struct declstate st = { DFUNCPARAM }; + struct decl decl; + if (match(pr, &tk, TKDOTS)) { + node.variadic = 1; + expect(pr, ')', NULL); + break; + } + decl = pdecl(&st, pr); + decl.t = typedecay(decl.t); + vpush(¶ms, decl.t); + vpush(&names, decl.name); + if (decl.qual) { + anyqual = 1; + while (qual.n < tdqualsiz(params.n)) vpush(&qual, 0); + tdsetqual(qual.p, params.n-1, decl.qual); + } + if (isincomplete(decl.t)) { + if (params.n > 1 || decl.t.t != TYVOID || decl.qual || decl.name) { + error(&decl.span, + "function parameter #%d has incomplete type (%tq)", + params.n, decl.t, tdgetqual(qual.p, params.n-1)); + } + } + joinspan(&node.span.ex, tk.span.ex); + if (!match(pr, &tk, ',')) { + expect(pr, ')', NULL); + break; + } + } + --depth; + node.kandr = params.n == 0 && ccopt.cstd < STDC23; + if (params.n == 1 && params.p[0].t == TYVOID && !qual.n && !names.p[0]) { /* (void) */ + vfree(¶ms); + vfree(&names); + } else if (params.n && params.p[0].t == TYVOID && !qual.n && !names.p[0]) { + error(&node.span, "function parameter #1 has incomplete type (%tq)", + params.p[0], tdgetqual(qual.p, 0)); + } + node.t = TYFUNC; + node.param = params.n ? params.p : NULL; + node.pqual = anyqual ? qual.p : NULL; + node.pnames = params.n ? names.p : NULL; + node.npar = params.n; + declinsert(ptr->prev, &node); + } else break; + } +} + +static struct decl +declarator(struct declstate *st, struct parser *pr) { + struct decl decl = { st->base, st->scls, st->qual, st->align }; + struct decllist list = { &list, &list }, *l; + static bool inidecltmp = 0; + if (!inidecltmp) { + inidecltmp = 1; + for (int i = 0; i < arraylength(decltmp); ++i) { + decltmp[i].next = declfreelist; + declfreelist = &decltmp[i]; + } + } + + decltypes(pr, &list, st->kind == DCASTEXPR ? NULL : &decl.name, &decl.span); + if (!decl.name && st->kind != DCASTEXPR && st->kind != DFUNCPARAM) { + if (list.prev == &list) lex(pr, NULL); + error(&decl.span, "expected `(', `*' or identifier"); + } + for (l = list.prev; l != &list; l = l->prev) { + switch (l->t) { + case TYPTR: + decl.t = mkptrtype(decl.t, decl.qual); + decl.qual = l->qual; + break; + case TYARRAY: + if (isincomplete(decl.t)) + error(&l->span, "array has incomplete element type (%ty)", decl.t); + else if (decl.t.t == TYFUNC) + error(&l->span, "array has element has function type (%ty)", decl.t); + decl.t = mkarrtype(decl.t, decl.qual, l->len); + decl.qual = 0; + break; + case TYFUNC: + if (decl.t.t == TYFUNC) + error(&decl.span, "function cannot return function type (%ty)", decl.t); + else if (decl.t.t == TYARRAY) + error(&decl.span, "function cannot return array type", decl.t); + else if (decl.t.t != TYVOID && isincomplete(decl.t)) + error(&decl.span, "function cannot return incomplete type (%ty)", decl.t); + decl.t = mkfntype(decl.t, l->npar, l->param, l->pqual, l->kandr, l->variadic); + if (l->param != declparamtmp) free(l->param); + if (l->pqual != declpqualtmp) free(l->pqual); + if (l->prev == &list && l->npar) { /* last */ + st->pnames = alloc(&pr->fnarena, l->npar * sizeof(char *), 0); + memcpy(st->pnames, l->pnames, l->npar * sizeof(char *)); + } + if (l->pnames != declpnamestmp) free(l->pnames); + decl.qual = 0; + break; + } + + l->next = declfreelist; + declfreelist = l; + } + + return decl; +} + +static struct decl +pdecl(struct declstate *st, struct parser *pr) { + struct token tk; + struct decl decl; + bool iniallowed = st->kind != DFIELD && st->kind != DFUNCPARAM && st->kind != DCASTEXPR; + bool first = 0; + + if (st->varini) { + memset(&decl, 0, sizeof decl); + goto AfterVarIni; + } + + if (!st->base.t) { + first = 1; + st->scls = sclass(pr, &tk.span); + if (popcnt(st->scls) > 1) + error(&tk.span, "invalid combination of storage class specifiers"); + else { + int allowed; + switch (st->kind) { + case DTOPLEVEL: allowed = SCTYPEDEF | SCEXTERN | SCSTATIC | SCTHREADLOCAL; break; + case DCASTEXPR: allowed = 0; break; + case DFIELD: allowed = 0; break; + case DFUNCPARAM: allowed = 0; break; + case DFUNCVAR: + allowed = SCTYPEDEF | SCREGISTER | SCAUTO | SCEXTERN | SCSTATIC | SCTHREADLOCAL; + break; + default: assert(0); + } + if ((st->scls & allowed) != st->scls) + error(&tk.span, "this storage class is not allowed in this context"); + st->scls &= allowed; + } + declspec(st, pr); + } + + if (first && st->tagdecl && match(pr, &tk, ';')) { + decl = (struct decl) { st->base, st->scls, st->qual, st->align }; + return decl; + } + decl = declarator(st, pr); + + if (iniallowed && match(pr, &tk, '=')) { + st->varini = 1; + return decl; + } else if (first && decl.t.t == TYFUNC && match(pr, &tk, '{')) { + st->funcdef = 1; + return decl; + } + +AfterVarIni: + st->varini = 0; + st->more = 0; + if (st->kind != DCASTEXPR && st->kind != DFUNCPARAM) { + if (match(pr, &tk, ',')) + st->more = 1; + else expect(pr, st->kind == DFUNCPARAM ? ')' : ';', "or `,'"); + } + + return decl; +} + +void +parse(struct parser *pr) +{ + struct token tk[1]; + + if (!pr->env) pr->env = &toplevel; + if (!tlarena) { + enum { N = 1<<12 }; + static union { char m[sizeof(struct arena) + N]; struct arena *_align; } amem[3]; + + tlarena = (void *)amem[0].m; + tlarena->cap = N; + pr->fnarena = (void *)amem[1].m; + pr->fnarena->cap = N; + pr->exarena = (void *)amem[2].m; + pr->exarena->cap = N; + } + while (peek(pr, tk) != TKEOF) { + struct expr ini; + struct declstate st = { DTOPLEVEL, }; + do { + int nerr = nerror; + struct decl decl = pdecl(&st, pr); + + if (nerror != nerr) { + if (st.varini) { + (void)expr(pr); + pdecl(&st, pr); + } + continue; + } + if (decl.name) efmt("%s : %tq\n", decl.name, decl.t, decl.qual); + if (st.funcdef) { + const struct typedata *td = &typedata[decl.t.dat]; + struct function fn = { pr->fnarena, decl.name, .globl = decl.scls != SCSTATIC }; + fn.fnty = decl.t; + fn.retty = td->ret; + putdecl(pr, &decl); + irinit(&fn); + function(pr, &fn, st.pnames); + if (!nerror) irdump(&fn, decl.name); + } else if (decl.name) { + putdecl(pr, &decl); + if (st.varini) { + ini = expr(pr); + pdecl(&st, pr); + if (!assigncheck(decl.t, &ini)) + error(&ini.span, "cannot initialize %ty with %ty", decl.t, ini.ty); + if (!eval(&ini, EVSTATICINI)) + error(&ini.span, "cannot evaluate expression statically"); + } + } + freearena(pr->fnarena); + freearena(pr->exarena); + } while (st.more); + } +} + +/* vim:set ts=3 sw=3 expandtab: */ diff --git a/parse.h b/parse.h new file mode 100644 index 0000000..732d764 --- /dev/null +++ b/parse.h @@ -0,0 +1,205 @@ +#include "common.h" + +/*************/ +/** PARSING **/ +/************/ + +static inline bool +joinspan(struct span0 *dst, struct span0 snd) +{ + if (dst->file != snd.file) return 0; + assert(dst->off <= snd.off); + dst->len = snd.off + snd.len - dst->off; + return 1; +} + +enum toktag { /* single-character tokens' tag value is the character itself */ + TKEOF = -1, + TKXXX, + TKNUMLIT, + TKSTRLIT, + TKIDENT, + TKDUMMY_ = 0x7F, +#define _(kw, stdc) TKW##kw, +#include "keywords.def" +#undef _ +#define C2(a,b) (a | b<<8) +#define C3(a,b,c) (a | b<<8 | c<<16) + TKEQU = C2('=','='), + TKNEQ = C2('!','='), + TKLTE = C2('<','='), + TKGTE = C2('>','='), + TKSHR = C2('>','>'), + TKSHL = C2('<','<'), + TKINC = C2('+','+'), + TKDEC = C2('-','-'), + TKARROW = C2('-','>'), + TKPPCAT = C2('#', '#'), + TKLOGAND = C2('&','&'), + TKLOGIOR = C2('|','|'), + TKSETADD = C2('+','='), + TKSETSUB = C2('-','='), + TKSETMUL = C2('*','='), + TKSETDIV = C2('/','='), + TKSETMOD = C2('%','='), + TKSETIOR = C2('|','='), + TKSETXOR = C2('^','='), + TKSETAND = C2('&','='), + TKSETSHR = C3('>','>','='), + TKSETSHL = C3('<','<','='), + TKDOTS = C3('.','.','.'), +#undef C2 +#undef C3 +}; + +struct token { + enum toktag t; + uchar ty; /* type tag for num lits */ + struct span span; + union { + uvlong u; + vlong i; + double f; + const char *ident; + struct bytes s; + }; +}; + +struct macro { + const char *name; /* interned from tk->ident */ + const char **param; + struct span0 span; + uchar nparam; + bool fnlike, variadic; + struct rlist { + struct token *tk; + int n; + } rlist; +}; + +struct macrostack { + struct macrostack *link; + struct rlist *args; + struct span0 exspan; + int mac; + int idx; +}; + +extern int nerror; +struct parser { + struct parser *save; + short fileid; + const uchar *dat; + uint ndat; + uint idx, chridx; + short peekchr[2]; + uint peekcidx[2]; + short npeekchr; + struct macrostack *macstk; + struct token peektok; + bool eof, err; + struct env *env; + struct arena *fnarena, *exarena; + struct span fnblkspan; +}; + +int lex(struct parser *, struct token *); +int lexpeek(struct parser *, struct token *); +void initparser(struct parser *, const char *file); +void parse(struct parser *); + +/***********/ +/** CTYPE **/ +/***********/ + +#define aisprint(c) in_range(c, ' ', '~') +#define aisdigit(c) in_range(c, '0', '9') +#define aisodigit(c) in_range(c, '0', '7') +#define aisalpha(c) in_range((c)|0x20, 'a', 'z') +static inline bool aisspace(int c) { return c == ' ' || in_range(c, '\t', '\r'); } +static inline bool aisxdigit(int c) { return aisdigit(c) || in_range(c|0x20, 'a', 'f'); } + +/************/ +/* ANALYSIS */ +/************/ + +enum exprkind { + EXXX, ENUMLIT, ESTRLIT, ESYM, EINIT, EGETF, ECALL, ECOND, + /* unary */ + EPLUS, ENEG, ECOMPL, ELOGNOT, EDEREF, EADDROF, ECAST, + EPREINC, EPOSTINC, EPREDEC, EPOSTDEC, + /* binary */ + EADD, ESUB, EMUL, EDIV, EREM, EBAND, EBIOR, EXOR, ESHL, ESHR, + ELOGAND, ELOGIOR, + EEQU, ENEQ, ELTH, EGTH, ELTE, EGTE, + ESET, ESETADD, ESETSUB, ESETMUL, ESETDIV, ESETREM, ESETAND, ESETIOR, ESETXOR, ESETSHL, ESETSHR, + ESEQ, +}; +#define isunop(t) in_range(t, EPLUS, EPOSTDEC) +#define isbinop(t) in_range(t, EADD, ESEQ) +#define isassign(t) in_range(t, ESET, ESETSHR) +#define assigntobinop(t) ((t) - ESETADD + EADD) + +struct expr { + uchar t; + uchar qual; + ushort narg; /* ECALL */ + union type ty; + struct span span; + union { + struct expr *sub; + uvlong u; vlong i; double f; /* ENUMLIT */ + struct bytes s; /* ESTRLIT */ + struct decl *sym; /* ESYM */ + struct initializer *ini; /* EINIT */ + struct { /* EGETF */ + const char *name; + uint off; + uchar bitsiz, bitoff; + } fld; + char dummy; + }; +}; + +enum storageclass { + SCNONE, + SCTYPEDEF = 1<<0, + SCEXTERN = 1<<1, + SCSTATIC = 1<<2, + SCTHREADLOCAL = 1<<3, + SCAUTO = 1<<4, + SCREGISTER = 1<<5, +}; + +struct decl { + union type t; + uchar scls; + uchar qual; + ushort align; + struct span span; + const char *name; + int id; +}; + +struct env { + struct env *up; + struct decls { + struct decls *prev; + struct decl decl; + } *decls; + struct tagged { + struct tagged *prev; + struct span span; + union type t; + } *tagged; +}; + +enum evalmode { + EVINTCONST, + EVARITH, + EVSTATICINI, +}; + +bool eval(struct expr *, enum evalmode); + +/* vim:set ts=3 sw=3 expandtab: */ diff --git a/targ.c b/targ.c new file mode 100644 index 0000000..7368fa5 --- /dev/null +++ b/targ.c @@ -0,0 +1,41 @@ +#include "common.h" + +uchar targ_primsizes[TYPTR+1]; +uchar targ_primalign[TYPTR+1]; +enum typetag targ_sizetype; +bool targ_charsigned, targ_bigendian; + +struct targ { + const char *name; + struct { uchar longsize, vlongsize, ptrsize, valistsize; }; + struct { uchar longalign, vlongalign, doublealign, ptralign; }; + bool charsigned; + uchar sizetype; +} targs[] = { + { "amd64-sysv", {8, 8, 8, 24}, {8, 8, 8, 8}, 1, TYULONG } +}; + +void +targ_init(const char *starg) +{ + struct targ *t = &targs[0]; + uchar *sizes = targ_primsizes, *align = targ_primalign; + + sizes[TYBOOL] = sizes[TYCHAR] = sizes[TYSCHAR] = sizes[TYUCHAR] = 1; + sizes[TYSHORT] = sizes[TYUSHORT] = 2; + sizes[TYUINT] = sizes[TYINT] = 4; + sizes[TYFLOAT] = 4; + sizes[TYDOUBLE] = 8; + memcpy(align, sizes, sizeof targ_primalign); + sizes[TYULONG] = sizes[TYLONG] = t->longsize; + sizes[TYUVLONG] = sizes[TYVLONG] = t->vlongsize; + sizes[TYPTR] = t->ptrsize; + sizes[TYVALIST] = t->valistsize; + align[TYULONG] = align[TYLONG] = t->longalign; + align[TYUVLONG] = align[TYVLONG] = t->vlongalign; + align[TYDOUBLE] = t->doublealign; + align[TYVALIST] = align[TYPTR] = t->ptralign; + targ_sizetype = t->sizetype; + targ_charsigned = t->charsigned; + targ_bigendian = 0; +} diff --git a/test.c b/test.c new file mode 100644 index 0000000..9eac7de --- /dev/null +++ b/test.c @@ -0,0 +1,39 @@ +/* coment */ + +int glob; + +int add (int x, int y) { + return x + y + glob; +} + +int abs(int x){ + return (x ^ x >> 31) + ((unsigned)x >> 31); +} + +int popcnt(unsigned x) { + int n = 0; + while (x) x >>= 1, n++; + return n; +} + +int foo(int x) { + int work[10]; + work[x]=x; + if (x == 0) + return 1; + else if (x == 1) + return -1; + else if (x < 0) + return x; + else + return 0; +} + +extern void printf(char *, ...); +int main() { + char x; + int k = x += 1; + return abs(-33); +} + +// diff --git a/type.c b/type.c new file mode 100644 index 0000000..c973906 --- /dev/null +++ b/type.c @@ -0,0 +1,287 @@ +#include "common.h" +#include + +struct typedata typedata[1<<13]; +const char *ttypenames[1<<10]; + +static ushort +hashtd(const struct typedata *td) +{ + uint h = td->t*33; + switch (td->t) { + case TYARRAY: + h = hashb(h, &td->arrlen, sizeof td->arrlen); + /* fallthru */ + case TYPTR: + h = hashb(h, &td->child, sizeof td->child); + break; + case TYSTRUCT: + case TYUNION: + case TYENUM: + h = hashb(h, &td->id, sizeof td->id); + break; + case TYFUNC: + h = hashb(h, &td->ret, sizeof td->ret); + h = hashb(h, &td->nmemb, sizeof td->nmemb); + for (int i = 0; i < td->nmemb; ++i) { + uchar q = tdgetqual(td->quals, i); + h = hashb(h, &td->param[i], sizeof *td->param); + h = hashb(h, &q, sizeof q); + } + break; + default: + assert(0 && "bad typedata tag"); + } + return h ^ h>>16; +} + +static bool +tdequ(const struct typedata *a, const struct typedata *b) +{ + if (a->t != b->t) return 0; + switch (a->t) { + case TYARRAY: + return a->arrlen == b->arrlen && a->child.bits == b->child.bits; + case TYPTR: + return a->child.bits == b->child.bits; + case TYSTRUCT: + case TYUNION: + case TYENUM: + return a->id == b->id; + case TYFUNC: + if (a->ret.bits != b->ret.bits) return 0; + if (a->nmemb != b->nmemb) return 0; + if (a->variadic != b->variadic) return 0; + for (int i = 0; i < a->nmemb; ++i) { + if (!a->quals != !b->quals || a->param[i].bits != b->param[i].bits) + return 0; + if (a->quals && tdgetqual(a->quals, i) != tdgetqual(b->quals, i)) + return 0; + } + return 1; + break; + default: + assert(0 && "bad typedata tag"); + } +} + +static void * +alloccopy(struct arena **arena, const void *src, uint siz, uint align) +{ + return memcpy(alloc(arena, siz, align), src, siz); +} + +static ushort +interntd(const struct typedata *td) +{ + uint h, i, n = arraylength(typedata); + for (i = h = hashtd(td); n--; ++i) { + struct typedata *slot = &typedata[i &= arraylength(typedata) - 1]; + if (!slot->t) { + uint nmemb; + static struct arena *datarena; + if (!datarena) { + enum { N = 1<<12 }; + static union { char m[sizeof(struct arena) + N]; struct arena *_align; } amem; + datarena = (void *)amem.m, datarena->cap = N; + } + + Copy: + nmemb = td->nmemb; + *slot = *td; + switch (slot->t) { + case TYENUM: + if (slot->var) + slot->var = alloccopy(&datarena, td->var, nmemb * sizeof *slot->var, 0); + break; + case TYSTRUCT: + case TYUNION: + if (slot->fld) + slot->fld = alloccopy(&datarena, td->fld, nmemb * sizeof *slot->fld, 0); + Qual: + if (slot->quals) + slot->quals = alloccopy(&datarena, td->quals, tdqualsiz(nmemb), 1); + break; + case TYFUNC: + if (slot->param) { + slot->param = alloccopy(&datarena, td->param, nmemb * sizeof *slot->param, 0); + } + goto Qual; + } + return i; + } else if (tdequ(slot, td)) { + if (td->t == TYSTRUCT || td->t == TYUNION) + goto Copy; + return i; + } + } + assert(0 && "typedata[] full"); +} + +bool +isincomplete(union type t) +{ + switch (t.t) { + case TYVOID: return 1; + case TYARRAY: return !typearrlen(t); + case TYSTRUCT: + case TYUNION: + return typedata[t.dat].nmemb == 0; + } + return 0; +} + +uint +typesize(union type t) +{ + if (isprim(t) || t.t == TYPTR) return targ_primsizes[t.t]; + switch (t.t) { + case TYENUM: + return targ_primsizes[t.backing]; + case TYARRAY: + return t.flag & TFCHLDPRIM ? targ_primsizes[t.child] * t.arrlen : typedata[t.dat].siz; + case TYSTRUCT: + case TYUNION: + return typedata[t.dat].siz; + } + return 0; +} + +uint +typealign(union type t) +{ + if (isprim(t) || t.t == TYPTR) return targ_primalign[t.t]; + switch (t.t) { + case TYENUM: + return targ_primalign[t.backing]; + case TYARRAY: + return targ_primalign[typechild(t).t]; + case TYSTRUCT: + case TYUNION: + return typedata[t.dat].align; + } + return 0; +} + +union type +mkptrtype(union type t, int qual) +{ + if (isprim(t)) + return mktype(TYPTR, .flag = TFCHLDPRIM | (qual & TFCHLDQUAL), .child = t.t); + else if (t.t == TYENUM || t.t == TYFUNC || isagg(t)) + return mktype(TYPTR, .flag = TFCHLDISDAT | (qual & TFCHLDQUAL), .dat = t.dat); + return mktype(TYPTR, .flag = qual & TFCHLDQUAL, + .dat = interntd(&(struct typedata) { TYPTR, .child = t })); +} + +union type +mkarrtype(union type t, int qual, uint n) +{ + if (isprim(t) && n < 256) + return mktype(TYARRAY, .flag = TFCHLDPRIM | (qual & TFCHLDQUAL), .child = t.t, .arrlen = n); + return mktype(TYARRAY, .flag = qual & TFCHLDQUAL, + .dat = interntd(&(struct typedata) { TYARRAY, .child = t, .arrlen = n })); +} + +union type +mkfntype(union type ret, uint n, const union type *par, const uchar *qual, bool kandr, bool variadic) +{ + struct typedata td = { TYFUNC, .ret = ret, .nmemb = n, .param = par, .quals = qual }; + td.kandr = kandr, td.variadic = variadic; + return mktype(TYFUNC, .dat = interntd(&td)); +} + + +union type +completetype(const char *name, int id, struct typedata *td) +{ + assert(td->t == TYENUM || td->t == TYSTRUCT || td->t == TYUNION); + td->id = id; + assert(id < arraylength(ttypenames) && "too many tag types"); + if (ttypenames[id]) + assert(ttypenames[id] == name && "bad redefn"); + else + ttypenames[id] = name; + return mktype(td->t, .dat = interntd(td)); +} + +union type +mktagtype(const char *name, struct typedata *td) +{ + static int id; + return completetype(name, id++, td); +} + +union type +typedecay(union type t) +{ + if (t.t == TYARRAY) + return mkptrtype(typechild(t), t.flag & TFCHLDQUAL); + if (t.t == TYFUNC) + return mkptrtype(t, 0); + return t; +} + +bool /* 6.5.16.1 Simple assignment Constraints */ +assigncompat(union type dst, union type src) +{ + union type ds, ss; + if (dst.bits == src.bits) return 1; + if (isarith(dst) && isarith(src)) return 1; + if (dst.t == TYPTR && src.t == TYPTR) { + ds = typechild(dst); + ss = typechild(src); + if (ds.bits == ss.bits || ss.t == TYVOID || ds.t == TYVOID) + return ((dst.flag & TFCHLDQUAL) & (src.flag & TFCHLDQUAL)) == (src.flag & TFCHLDQUAL); + } + if (dst.t == TYBOOL && src.t == TYPTR) + return 1; + return 0; +} + +enum typetag +intpromote(enum typetag t) +{ + static int intisshort = -1; + if (intisshort < 0) intisshort = targ_primsizes[TYINT] == targ_primsizes[TYSHORT]; + if (intisshort && t == TYUSHORT) return TYUINT; + return t < TYINT ? TYINT : t; +} + +union type /* 6.3.1.8 Usual arithmetic conversions */ +cvtarith(union type a, union type b) +{ + const union type none = {0}; + + if (!isarith(a) || !isarith(b)) return none; + if (a.t == TYENUM) a = typechild(a); + if (b.t == TYENUM) b = typechild(b); + if (isflt(a) || isflt(b)) { + /* when one type is float, choose type with greatest rank */ + /* enumeration order of type tags reflects arithmetic type rank */ + return a.t > b.t ? a : b; + } + a.t = intpromote(a.t); + b.t = intpromote(b.t); + if (a.bits == b.bits) return a; + + if (issigned(a) == issigned(b)) { + /* when both are integers with same signage, choose type with greatest rank */ + return a.t > b.t ? a : b; + } + /* if the signed type can represent all values of the unsigned type, + * choose it, otherwise choose its corresponding unsigned type */ + /* so long long + unsigned = long long; + * but long long + unsigned long = unsigned long long */ + if (issigned(a)) { + if (targ_primsizes[a.t] <= targ_primsizes[b.t]) + a.t += 1; /* make unsigned */ + return a; + } else { + if (targ_primsizes[b.t] <= targ_primsizes[a.t]) + b.t += 1; /* make unsigned */ + return b; + } +} + +/* vim:set ts=3 sw=3 expandtab: */ -- cgit v1.2.3