diff options
| -rw-r--r-- | .gitignore | 5 | ||||
| -rw-r--r-- | Makefile | 33 | ||||
| -rw-r--r-- | common.h | 359 | ||||
| -rw-r--r-- | eval.c | 225 | ||||
| -rw-r--r-- | io.c | 836 | ||||
| -rw-r--r-- | ir.c | 221 | ||||
| -rw-r--r-- | ir.h | 117 | ||||
| -rw-r--r-- | irdump.c | 124 | ||||
| -rw-r--r-- | keywords.def | 70 | ||||
| -rw-r--r-- | lex.c | 669 | ||||
| -rw-r--r-- | main.c | 27 | ||||
| -rw-r--r-- | mem.c | 117 | ||||
| -rw-r--r-- | op.def | 60 | ||||
| -rw-r--r-- | parse.c | 2104 | ||||
| -rw-r--r-- | parse.h | 205 | ||||
| -rw-r--r-- | targ.c | 41 | ||||
| -rw-r--r-- | test.c | 39 | ||||
| -rw-r--r-- | type.c | 287 |
18 files changed, 5539 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8b0a3a4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +cchomp +obj/ +compile_commands.json +.cache/ +.gdb_history diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..f9f3e12 --- /dev/null +++ b/Makefile @@ -0,0 +1,33 @@ +SRC=main.c io.c mem.c parse.c lex.c type.c targ.c eval.c ir.c irdump.c +CFLAGS=-Wall -std=c11 -pedantic +OBJ=$(patsubst %.c,obj/%.o,$(SRC)) +OUT=cchomp + +all: CFLAGS += -g -Og +all: $(OUT) + +opt: CFLAGS += -O2 +opt: $(OUT) + +dbg: CFLAGS += -g -fsanitize=address -fsanitize=undefined +dbg: $(OUT) + +$(OUT): $(OBJ) + $(CC) $(CFLAGS) -o $@ $(OBJ) + +obj/%.o: %.c common.h + @mkdir -p obj/ + $(CC) $(CFLAGS) -c -o $@ $< + +obj/main.o: parse.h +obj/parse.o: parse.h ir.h +obj/ir.o: ir.h +obj/irdump.o: ir.h op.def +obj/lex.o: parse.h +obj/eval.o: parse.h +obj/io.o: parse.h keywords.def + +clean: + $(RM) -r obj/ $(OUT) + +.PHONY: clean diff --git a/common.h b/common.h new file mode 100644 index 0000000..ab3c8ff --- /dev/null +++ b/common.h @@ -0,0 +1,359 @@ +#ifndef COMMON_H_ +#define COMMON_H_ + +#include <stdarg.h> +#include <stddef.h> +#include <string.h> + +#ifdef __clang__ /* stop linter from complaining about "unused" inline functions */ +#pragma GCC diagnostic ignored "-Wunused-function" +#endif + +#define bool _Bool +typedef unsigned char uchar; +typedef signed char schar; +typedef unsigned short ushort; +typedef unsigned long long uvlong; +typedef signed long long vlong; +typedef unsigned uint; + +#define in_range(x, Lo, Hi) ((uint) (x) - (Lo) <= (Hi) - (Lo)) /* lo <= x <= hi; lo > 0, hi > 0 */ +#define alignup(x, A) (((x) + ((A) - 1)) & -(A)) +#define arraylength(a) (sizeof(a) / sizeof 0[a]) +struct bytes { uchar *p; uint n; }; + +struct span { + struct span0 { + uint off; + uint len : 24, + file : 8; + } sl, /* original source location */ + ex; /* the location after #include/macro expansion */ +}; + +void _assertfmt(const char *file, int line, const char *func, const char *expr); +#ifdef __GNUC__ +#define assert(x) (!(x) ? _assertfmt(__FILE__,__LINE__,__func__,#x), __builtin_trap() : (void)0) +#else +#define assert(x) (void)(!(x) ? _assertfmt(__FILE__,__LINE__,__func__,#x), *(volatile int *)0 : 0) +#endif + +static inline uint +hashs(uint h, const char *s) +{ + while (*s) h = (uchar)*s++ + h*65599; + return h; +} +static inline uint +hashb(uint h, const void *d, uint n) +{ + + const uchar *b = d; + while (n--) h = *b++ + h*65599; + return h; +} +static inline uint +ptrhash(const void *p) { + return (uint)(size_t)p * 2654435761; +} +static inline uint +popcnt(uint x) { +#ifdef __GNUC__ + return __builtin_popcount(x); +#else + uint n = 0; + while (x) x >>= 1, ++n; + return n; +#endif +} + +/******************/ +/* COMPILER STATE */ +/******************/ + +enum cstd { + STDC89, + STDC99, + STDC11, + STDC23, +}; +struct option { + enum cstd cstd; + bool trigraph; + bool nocolor; +}; +extern struct option ccopt; + +/*************************/ +/** TYPE REPRESENTATION **/ +/*************************/ + +enum qualifier { + QCONST = 1<<0, + QVOLATILE = 1<<1, + QNORETURN = 1<<2, /* functions */ + QINLINE = 1<<3, /* functions */ +}; + +enum typetag { /* ordering is important here! */ + TYXXX, + TYENUM, + TYBOOL, TYCHAR, TYSCHAR, TYUCHAR, TYSHORT, TYUSHORT, TYINT, TYUINT, TYLONG, TYULONG, TYVLONG, TYUVLONG, + TYFLOAT, TYDOUBLE, TYLDOUBLE, + TYVOID, + TYVALIST, + TYPTR, TYARRAY, TYFUNC, + TYSTRUCT, TYUNION, + TYSIGNEDSET_ = 1<<TYSCHAR | 1<<TYSHORT | 1<<TYINT | 1<<TYLONG | 1<<TYVLONG, + TYUNSIGNEDSET_ = 1<<TYUCHAR | 1<<TYUSHORT | 1<<TYUINT | 1<<TYULONG | 1<<TYUVLONG, + TYSCALARSET_ = ((1u << (TYLDOUBLE - TYENUM + 1)) - 1) << TYENUM | 1<<TYPTR +}; + +enum typeflagmask { + TFCHLDQUAL = 3, + TFCHLDPRIM = 1<<2, + TFCHLDISDAT = 1<<3, +}; + +union type { + struct { + uchar t; /* type tag */ + union { + uchar flag; + uchar backing; /* type tag for enum backing int */ + }; + union { + struct { + uchar child; /* prim type tag */ + uchar arrlen; /* small array */ + }; + ushort dat; /* index into typedata */ + }; + }; + uint bits; +}; + +#define isprimt(t) in_range((t), TYBOOL, TYVALIST) +#define isintt(t) in_range((t), TYENUM, TYUVLONG) +#define issignedt(t) ((TYSIGNEDSET_ | targ_charsigned << TYCHAR) >> (t) & 1) +#define isunsignedt(t) ((TYUNSIGNEDSET_ | !targ_charsigned << TYCHAR) >> (t) & 1) +#define isfltt(t) in_range((t), TYFLOAT, TYLDOUBLE) +#define isaritht(t) in_range((t), TYENUM, TYLDOUBLE) +#define isscalart(t) (TYSCALARSET_ >> (t) & 1) +#define isptrcvtt(t) in_range((t), TYPTR, TYFUNC) +#define isaggt(t) in_range((t), TYSTRUCT, TYUNION) +#define isprim(ty) isprimt((ty).t) +#define isint(ty) isintt((ty).t) +#define issigned(ty) issignedt((ty).t) +#define isunsigned(ty) isunsignedt((ty).t) +#define isflt(ty) isfltt((ty).t) +#define isarith(ty) isaritht((ty).t) +#define isscalar(ty) isscalart((ty).t) +#define isptrcvt(ty) isptrcvtt((ty).t) +#define isagg(ty) isaggt((ty).t) +#define mktype(...) ((union type) {{ __VA_ARGS__ }}) + +struct enumvar { + const char *name; + union { vlong i; uvlong u; }; +}; + +struct field { + const char *name; + union type t; + ushort off; + uchar bitsiz, bitoff; +}; + +struct typedata { + uchar t; + ushort id; + union { + union type child; + struct { /* aggregates */ + const uchar *quals; /* packed N x 2bit array (NULL if no member has quals) */ + union { + struct field *fld; + const union type *param; + }; + }; + struct { + uchar backing; + struct enumvar *var; + }; + }; + union { + uint arrlen; /* array */ + struct { + short nmemb; + uchar align; + union { + struct { /* function */ + bool kandr : 1, variadic : 1; + }; + struct { /* aggregate */ + bool anyconst : 1, flexi : 1; + }; + }; + }; + }; + union { + uint siz; /* aggregate & array */ + union type ret; /* function */ + }; +}; + +extern struct typedata typedata[]; +extern const char *ttypenames[/*id*/]; + +#define tdqualsiz(nmemb) ((nmemb)/4 + ((nmemb)%4 != 0)) +static inline int +tdgetqual(const uchar *pqual, int idx) +{ + return pqual ? pqual[idx/4] >> 2*(idx%4) & 3 : 0; +} +static inline void +tdsetqual(uchar *pqual, int idx, int qual) +{ + assert(pqual); + pqual[idx/4] &= ~(3 << (2*(idx%4))); + pqual[idx/4] |= (qual&3) << (2*(idx%4)); +} + +bool isincomplete(union type); +uint typesize(union type); +uint typealign(union type); +union type mkptrtype(union type, int qual); +union type mkarrtype(union type t, int qual, uint n); +union type mkfntype(union type ret, uint n, const union type *, const uchar *qual, bool kandr, bool variadic); +union type mktagtype(const char *name, struct typedata *td); +union type completetype(const char *name, int id, struct typedata *td); +union type typedecay(union type); +bool assigncompat(union type dst, union type src); +enum typetag intpromote(enum typetag); +union type cvtarith(union type a, union type b); +static inline union type +typechild(union type t) +{ + if (t.t == TYENUM) return mktype(t.backing); + if (t.flag & TFCHLDPRIM) return mktype(t.child); + if (t.flag & TFCHLDISDAT) { + union type chld = mktype(typedata[t.dat].t, .dat = t.dat); + if (chld.t == TYENUM) chld.backing = typedata[t.dat].backing; + return chld; + } + return typedata[t.dat].child; +} +static inline uint +typearrlen(union type t) +{ + return (t.flag & TFCHLDPRIM) ? t.arrlen : typedata[t.dat].arrlen; +} + +/**********/ +/* TARGET */ +/**********/ + +extern uchar targ_primsizes[]; +extern uchar targ_primalign[]; +extern enum typetag targ_sizetype; +extern bool targ_charsigned, targ_bigendian; +void targ_init(const char *targ); + +/*********/ +/** MEM **/ +/*********/ + +struct arena { + uint cap : 31, + dyn : 1; + uint n; + struct arena *prev; + uchar mem[]; +}; + +#define vec_of(T) struct { T *p; int _cap; uint n; } + +struct arena *newarena(uint chunksiz); +void *alloc(struct arena **, uint siz, uint align); +void freearena(struct arena *); +void vinit_(void **p, int *pcap, void *inlbuf, int cap, uint siz); +void vpush_(void **p, int *pcap, uint *pn, uint siz); +void *vpushn_(void **p, int *pcap, uint *pn, uint siz, const void *dat, uint ndat); +extern void free(void *); +#define VINIT(inlbuf, Cap) { (inlbuf), (Cap) } +#define vfree(v) ((v)->_cap < 0 ? free((v)->p) : (void)0, memset((v), 0, sizeof*(v))) +#define vinit(v, inlbuf, Cap) (vfree(v), vinit_((void **)&(v)->p, &(v)->_cap, inlbuf, (Cap), sizeof *(v)->p)) +#define vpush(v, x) (vpush_((void **)&(v)->p, &(v)->_cap, &(v)->n, sizeof *(v)->p), \ + (v)->p[(v)->n++] = (x)) +#define vpushn(v, xs, N) vpushn_((void **)&(v)->p, &(v)->_cap, &(v)->n, sizeof *(v)->p, xs, N) + +struct bitset { uvlong u; }; +enum { BSNBIT = 8 * sizeof(uvlong) }; +#define BSSIZE(nbit) ((nbit) / BSNBIT + ((nbit) % BSNBIT != 0)) + +static inline bool +bstest(struct bitset *bs, uint i) +{ + return bs[i / BSNBIT].u >> i % BSNBIT & 1; +} + +static inline void +bszero(struct bitset *bs, uint siz) +{ + memset(bs, 0, siz * sizeof *bs); +} + +static inline bool +bsiter(uint *i, struct bitset *bs, uint siz) +{ + for (; *i < siz*BSNBIT; ++*i) + if (bstest(bs, *i)) return 1; + return 0; +} + +/********/ +/** IO **/ +/********/ + +struct wbuf { + char *buf; + const uint cap; + uint len; + const int fd; + bool err; +}; + +/* read-only file mapping that is at least 1 page larger than the real file + * so it's legal to read a few bytes beyond it to avoid some bounds checks */ +struct memfile { + const uchar *p; + uint n; +}; + +#define MEMBUF(buf, cap) { (buf), (cap), .fd = -1 } +#define FDBUF(buf, cap, fd_) { (buf), (cap), .fd = (fd_) } +extern struct wbuf bstdout, bstderr; +void iowrite(struct wbuf *, const void *src, int n); +void ioputc(struct wbuf *, uchar); +void ioflush(struct wbuf *); +int vbfmt(struct wbuf *, const char *, va_list ap); +int bfmt(struct wbuf *, const char *, ...); +#define pfmt(...) bfmt(&bstdout, __VA_ARGS__) +#define efmt(...) bfmt(&bstderr, __VA_ARGS__) +struct memfile mapopen(const char **err, const char *path); +void mapclose(struct memfile *); +int openfile(const char **err, struct memfile **, const char *path); +const char *getfilename(int id); +struct memfile *getfile(int id); +void addfileline(int id, uint off); +void getfilepos(int *line, int *col, int id, uint off); +void closefile(int id); +void fatal(const struct span *, const char *, ...); +void error(const struct span *, const char *, ...); +void warn(const struct span *, const char *, ...); +void note(const struct span *, const char *, ...); + +#endif /* COMMON_H_ */ + +/* vim:set ts=3 sw=3 expandtab: */ @@ -0,0 +1,225 @@ +#include "common.h" +#include "parse.h" + +static int +targ2hosttype(enum typetag t) +{ + if (isintt(t)) { + int siz = targ_primsizes[t]; + int sgn = issignedt(t); +#define U(Ty,Tag) if (!sgn & (siz == sizeof(unsigned Ty))) return Tag; +#define S(Ty,Tag) if ( sgn & (siz == sizeof(signed Ty))) return Tag; + U(char, TYUCHAR) + S(char, TYSCHAR) + U(short, TYUSHORT) + S(short, TYSHORT) + U(int, TYUINT) + S(int, TYINT) + U(long long, TYUVLONG) + S(long long, TYVLONG) +#undef U +#undef S + } else if (isfltt(t)) return t; + return 0; +} + +static bool +numcast(enum typetag t, struct expr *dst, const struct expr *src) +{ + struct expr tmp; + enum typetag td = targ2hosttype(t); + enum typetag ts = targ2hosttype(src->ty.t == TYENUM ? src->ty.backing : src->ty.t); + if (src == dst) tmp = *src, src = &tmp; + + assert(src->t == ENUMLIT); +#define TT(d,s) (td == d && ts == s) +#define TF(d) (td == d && isfltt(ts)) + if (!ts || !td) return 0; + else if (TT(TYFLOAT, TYFLOAT)) dst->f = (float) src->f; + else if (TT(TYFLOAT, TYDOUBLE)) dst->f = (float) src->f; + else if (TT(TYDOUBLE, TYFLOAT)) dst->f = src->f; + else if (TT(TYDOUBLE, TYDOUBLE)) dst->f = src->f; + else if (TT(TYFLOAT, TYUVLONG)) dst->f = (float) src->u; + else if (TT(TYDOUBLE, TYUVLONG)) dst->f = (double) src->u; + else if (td == TYFLOAT) dst->f = (float) src->i; + else if (td == TYDOUBLE) dst->f = (double) src->i; + else if (TF(TYUVLONG)) dst->u = src->f; + else if (TF(TYBOOL)) dst->i = (bool) src->f; + else if (isfltt(ts)) { dst->i = src->f; goto Narrow; } + else { + Narrow: + switch (td) { +#define I(Ty, Tag) case Tag: dst->i = (Ty) src->i; break; + I(bool, TYBOOL) + I(signed char, TYSCHAR) + I(unsigned char, TYUCHAR) + I(signed short, TYSHORT) + I(unsigned short, TYUSHORT) + I(signed int, TYINT) + I(unsigned int, TYUINT) + I(signed long long, TYVLONG) + I(unsigned long long, TYUVLONG) +#undef I + case TYFLOAT: dst->f = (float) src->f; break; + case TYDOUBLE: dst->f = src->f; break; + default: assert(0 && "bad cast?"); + } + } +#undef TT +#undef TF + + dst->t = ENUMLIT; + dst->ty = mktype(t); + return 1; +} + +static bool +unop(struct expr *ex, enum evalmode mode) +{ + struct expr *sub = ex->sub; + + if (sub->t != ENUMLIT && !eval(sub, mode)) return 0; + switch (ex->t) { + case ECAST: + case EPLUS: + break; + case ENEG: + if (isint(sub->ty)) sub->u = -sub->u; + else assert(isflt(sub->ty)), sub->f = -sub->f; + break; + case ECOMPL: + if (!isint(sub->ty)) return 0; + sub->u = ~sub->u; + break; + case ELOGNOT: + if (isint(sub->ty)) sub->u = !sub->u; + else assert(isflt(sub->ty)), sub->u = !sub->f; + break; + default: + return 0; + } + if (!numcast(ex->ty.t, ex, sub)) return 0; + return 1; +} + +static bool +binop(struct expr *ex, enum evalmode mode) +{ + union type oty; + bool flt; + bool sgn; + int t; + struct expr *lhs = &ex->sub[0], *rhs = &ex->sub[1]; + + if (!eval(lhs, mode)) return 0; + if (!eval(rhs, mode)) return 0; + if (in_range(ex->t, EADD, ESHR)) + oty = ex->ty; + else + oty = cvtarith(lhs->ty, rhs->ty); + if (!numcast(oty.t, lhs, lhs)) return 0; + if (!numcast(oty.t, rhs, rhs)) return 0; + flt = isflt(oty); + sgn = issigned(oty); + switch (ex->t) { +#define ef else if + case EADD: if (flt) lhs->f += rhs->f; + else lhs->u += rhs->u; + break; + case ESUB: if (flt) lhs->f -= rhs->f; + else lhs->u -= rhs->u; + break; + case EMUL: if (flt) lhs->f *= rhs->f; + ef (sgn) lhs->i *= rhs->i; + else lhs->u *= rhs->u; + break; + case EDIV: if (!flt && !rhs->i) return 0; + ef (flt) lhs->f /= rhs->f; + ef (sgn) lhs->i /= rhs->i; + else lhs->u /= rhs->u; + break; + case EREM: if (!rhs->i) return 0; + ef (sgn) lhs->i %= rhs->i; + else lhs->u %= rhs->u; + break; + case EBAND: lhs->u &= rhs->u; + break; + case EBIOR: lhs->u |= rhs->u; + break; + case EXOR: lhs->u ^= rhs->u; + break; + case ESHL: if (sgn && lhs->i < 0) return 0; + ef (rhs->i >= 8*targ_primsizes[oty.t]) return 0; + lhs->u <<= rhs->u; + break; + case ESHR: if (rhs->i >= 8*targ_primsizes[oty.t]) return 0; + ef (sgn) lhs->i >>= rhs->i; + else lhs->u >>= rhs->u; + break; + case ELOGAND: if (flt) t = lhs->f && rhs->f; + else t = lhs->u && rhs->u; + lhs->u = t; + break; + case ELOGIOR: if (flt) t = lhs->f || rhs->f; + else t = lhs->u || rhs->u; + lhs->u = t; + break; + case EEQU: if (flt) t = lhs->f == rhs->f; + else t = lhs->u == rhs->u; + lhs->u = t; + break; + case ENEQ: if (flt) t = lhs->f != rhs->f; + else t = lhs->u != rhs->u; + lhs->u = t; + break; + case ELTH: if (flt) t = lhs->f < rhs->f; + ef (sgn) t = lhs->i < rhs->i; + else t = lhs->u < rhs->u; + lhs->u = t; + break; + case EGTH: if (flt) t = lhs->f > rhs->f; + ef (sgn) t = lhs->i > rhs->i; + else t = lhs->u > rhs->u; + lhs->u = t; + break; + case ELTE: if (flt) t = lhs->f <= rhs->f; + ef (sgn) t = lhs->i <= rhs->i; + else t = lhs->u <= rhs->u; + lhs->u = t; + break; + case EGTE: if (flt) t = lhs->f >= rhs->f; + ef (sgn) t = lhs->i >= rhs->i; + else t = lhs->u >= rhs->u; + lhs->u = t; + break; + default: return 0; +#undef ef + } + return numcast(ex->ty.t, ex, lhs); +} + +bool +eval(struct expr *ex, enum evalmode mode) +{ + if (ex->t == ENUMLIT) { + if (mode <= EVINTCONST) return isint(ex->ty); + return 1; + } + if (isunop(ex->t)) return unop(ex, mode) && eval(ex, mode); + if (isbinop(ex->t)) return binop(ex, mode) && eval(ex, mode); + if (ex->t == ESEQ) { + if (!eval(&ex->sub[0], mode)) return 0; + *ex = ex->sub[1]; + return eval(ex, mode); + } + if (ex->t == ECOND) { + if (!eval(&ex->sub[0], mode)) return 0; + if (!eval(&ex->sub[1], mode)) return 0; + if (!eval(&ex->sub[2], mode)) return 0; + *ex = ex->sub[!ex->sub[0].u + 1]; + return eval(ex, mode); + } + return 0; +} + +/* vim:set ts=3 sw=3 expandtab: */ @@ -0,0 +1,836 @@ +#include "parse.h" +#include <limits.h> +#include <stdlib.h> +#include <string.h> +#include <fcntl.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/sysmacros.h> +#include <unistd.h> +#include <errno.h> + +static char stdoutbuf[1024], stderrbuf[1024]; +struct wbuf bstdout = FDBUF(stdoutbuf, sizeof stdoutbuf, 1), + bstderr = FDBUF(stderrbuf, sizeof stderrbuf, 2); + +void +iowrite(struct wbuf *buf, const void *Src, int n) +{ + const uchar *src = Src; + + while (n > 0) { + int avail = buf->cap - buf->len; + int amt = avail < n ? avail : n; + + memcpy(buf->buf + buf->len, src, amt); + n -= amt; + if ((buf->len += amt) == buf->cap) { + if (buf->fd < 0) { + buf->err = 1; + return; + } + ioflush(buf); + } + } +} + +void +ioflush(struct wbuf *buf) +{ + int i, ret; + + buf->err = 0; + if (buf->fd < 0) { + buf->len = 0; + return; + } + for (i = 0; buf->len > 0;) { + ret = write(buf->fd, buf->buf + i, buf->len); + if (ret > 0) { + buf->len -= ret; + i += ret; + } else if (errno == EAGAIN || errno == EWOULDBLOCK) { + continue; + } else { + buf->err = 1; + break; + } + } +} + +void +ioputc(struct wbuf *buf, uchar c) +{ + if (buf->len == buf->cap) { + if (buf->fd < 0) { + buf->err = 1; + return; + } + ioflush(buf); + } + buf->buf[buf->len++] = c; +} + +static int +putquoted(struct wbuf *buf, uchar c, uchar qchar, int next) +{ + if (c == qchar || c == '\\' || !aisprint(c)) { + int n = (ioputc(buf, '\\'), 1); + uchar cseq; + + switch (c) { + case '\\': + case '\'': + case '"': + cseq = c; + Charseq: + n += (ioputc(buf, cseq), 1); + break; + case '\a': cseq = 'a'; goto Charseq; + case '\b': cseq = 'b'; goto Charseq; + case '\f': cseq = 'f'; goto Charseq; + case '\r': cseq = 'r'; goto Charseq; + case '\t': cseq = 't'; goto Charseq; + case '\v': cseq = 'v'; goto Charseq; + case '\n': cseq = 'n'; goto Charseq; + default: + if (in_range(next, '0', '7')) + n += bfmt(buf, "%.3o", c); + else + n += bfmt(buf, "%o", c); + } + return n; + } + if (c == '?' && next == '?') { + return ioputc(buf, c), ioputc(buf, '\\'), 2; + } + return ioputc(buf, c), 1; +} + +static int +putuint(struct wbuf *buf, uvlong x, int base, bool lower) +{ + uchar tmp[64]; + uchar *end = tmp + sizeof(tmp); + uchar *s = end; + switch (base) { + case 2: + do *--s = '0' + x%2; while (x >>= 1); + break; + case 8: + do *--s = '0' + x%8; while (x >>= 3); + break; + case 10: + do *--s = '0' + x%10; while (x /= 10); + break; + case 16: + do *--s = "0123456789ABCDEF"[x&15] | (-lower & 0x20); while (x >>= 4); + break; + default: + assert(0&&"base"); + } + iowrite(buf, s, end - s); + return end - s; +} + +static void +fmterr(const char *fmt, ...) +{ + va_list ap; + + efmt("fmt Error: "); + va_start(ap, fmt); + vbfmt(&bstderr, fmt, ap); + va_end(ap); + ioputc(&bstderr, '\n'); + ioflush(&bstderr); + ioflush(&bstdout); + abort(); +} + +#define bwriteS(buf, S) (iowrite(buf, S, sizeof S - 1), sizeof S - 1) +#define bputc(B, C) (ioputc(B, C), 1) + +static int +priquals(struct wbuf *buf, int q) +{ + const char s[] = " const volatile", *p = s; + int m = sizeof s - 1; + if (!q) return 0; + else if (q == QCONST) m -= 9; + else if (q == QVOLATILE) p += 6, m -= 6; + else assert(q == (QCONST | QVOLATILE)); + iowrite(buf, p, m); + return m; +} +static int +pritypebefore(struct wbuf *buf, union type ty, int qual) +{ + const char *s, *s2; + union type chld; + int n; + switch (ty.t) { + case TYVOID: s = "void"; Prim: n = bfmt(buf, "%s", s); return n + priquals(buf, qual); + case TYCHAR: s = "char"; goto Prim; + case TYSCHAR: s = "signed char"; goto Prim; + case TYUCHAR: s = "unsigned char"; goto Prim; + case TYSHORT: s = "short"; goto Prim; + case TYUSHORT: s = "unsigned short"; goto Prim; + case TYINT: s = "int"; goto Prim; + case TYUINT: s = "unsigned int"; goto Prim; + case TYLONG: s = "long"; goto Prim; + case TYULONG: s = "unsigned long"; goto Prim; + case TYVLONG: s = "long long"; goto Prim; + case TYUVLONG: s = "unsigned long long"; goto Prim; + case TYFLOAT: s = "float"; goto Prim; + case TYDOUBLE: s = "double"; goto Prim; + case TYLDOUBLE:s = "long double"; goto Prim; + case TYPTR: + chld = typechild(ty); + n = pritypebefore(buf, chld, ty.flag & TFCHLDQUAL); + //if (chld.t != TYPTR) n += ioputc(buf, ' '); + if (chld.t == TYARRAY || chld.t == TYFUNC) + n += bputc(buf, '('); + n += bputc(buf, '*'); + n += priquals(buf, qual); + return n; + case TYARRAY: + return pritypebefore(buf, typechild(ty), ty.flag & TFCHLDQUAL); + case TYFUNC: + return pritypebefore(buf, typedata[ty.dat].ret, 0); + case TYSTRUCT: + s = "struct"; + Tagged: + n = bfmt(buf, "%s %s", s, (s2 = ttypenames[typedata[ty.dat].id]) ? s2 : "(anonymous)"); + return n + priquals(buf, qual); + case TYUNION: + s = "union"; + goto Tagged; + case TYENUM: + s = "enum"; + goto Tagged; + default: + return bfmt(buf, "?\?%d?",ty.t); + } +} + +static int +pritypeafter(struct wbuf *buf, union type ty, int qual) +{ + const struct typedata *td; + int n = 0; + switch (ty.t) { + case TYPTR: + if (typechild(ty).t == TYARRAY || typechild(ty).t == TYFUNC) + n += bputc(buf, ')'); + n += pritypeafter(buf, typechild(ty), ty.flag & TFCHLDQUAL); + break; + case TYARRAY: + n += bputc(buf, '['); + if (typearrlen(ty)) + n += bfmt(buf, "%u", typearrlen(ty)); + n += bputc(buf, ']'); + n += pritypeafter(buf, typechild(ty), ty.flag & TFCHLDQUAL); + break; + case TYFUNC: + td = &typedata[ty.dat]; + n += bputc(buf, '('); + for (int i = 0; i < td->nmemb; ++i) { + n += bfmt(buf, "%tq", td->param[i], tdgetqual(td->quals, i)); + if (i < td->nmemb - 1 || td->variadic) + n += bwriteS(buf, ", "); + } + if (td->variadic) n += bwriteS(buf, "..."); + else if (td->nmemb == 0 && !td->kandr) n += bwriteS(buf, "void"); + n += bwriteS(buf, ")"); + n += pritypeafter(buf, td->ret, 0); + break; + } + return n; +} + +static int +fmttype(struct wbuf *buf, union type ty, int qual) +{ + int n = pritypebefore(buf, ty, qual); + n += pritypeafter(buf, ty, qual); + return n; +} + +static int +putdouble(struct wbuf *buf, double x, vlong prec) +{ + int n = 0; + if (x < 0) { + n += bputc(buf, '-'); + x = -x; + } + + x += 0.5 / prec; // round last decimal + if (x >= (double)(-1ULL>>1)) { // out of range? + n += bwriteS(buf, "inf"); + } else { + vlong integral = x; + vlong fractional = (x - integral)*prec; + n += putuint(buf, integral, 10, 0); + n += bputc(buf, '.'); + for (vlong i = prec/10; i > 1; i /= 10) { + if (i > fractional) { + n += bputc(buf, '0'); + } + } + n += putuint(buf, fractional, 10, 0); + } + return n; +} + +int +vbfmt(struct wbuf *out, const char *fmt, va_list ap) +{ + bool quote, unsgnd, numlong, lower; + int base; + vlong i; + int pad, prec, q; + const char *s; + void *p; + struct token *tok; + union type ty; + double f; + char tmpbuf1[70], tmpbuf2[70]; + struct wbuf tmp1 = MEMBUF(tmpbuf1, sizeof tmpbuf1); + struct wbuf tmp2 = MEMBUF(tmpbuf2, sizeof tmpbuf2); + struct wbuf *buf = out; + int n = 0, prevn; + + while (*fmt) { + buf = out; + if (*fmt++ != '%') { + n += bputc(buf, fmt[-1]); + continue; + } + if (*fmt == '%') { + n += bputc(buf, *fmt++); + continue; + } + fmt += quote = *fmt == '\''; + pad = 0; + if (aisdigit(*fmt)) { /* left pad */ + for (; aisdigit(*fmt); ++fmt) + pad = pad*10 + *fmt-'0'; + if (pad) { + tmp1.len = 0; + buf = &tmp1; + } + } else if (*fmt == '-') { /* right pad */ + if (!aisdigit(*++fmt)) + fmterr("padding amount expected"); + for (; aisdigit(*fmt); ++fmt) + pad = pad*10 - (*fmt-'0'); + } + prec = -1; + if (*fmt == '.') { + if (!aisdigit(*++fmt)) + fmterr("precision expected"); + prec = 0; + for (; aisdigit(*fmt); ++fmt) + prec = prec*10 + *fmt-'0'; + } + fmt += unsgnd = *fmt == 'u'; + fmt += numlong = *fmt == 'l'; + lower = 0; + prevn = n; + switch (*fmt++) { + case 'c': /* character */ + if (quote) { + n += bputc(buf, '\''); + n += putquoted(buf, va_arg(ap, int), '\'', -1); + n += bputc(buf, '\''); + } else { + n += bputc(buf, va_arg(ap, int)); + } + break; + case 's': /* nullterminated string */ + s = va_arg(ap, const char *); + assert(s && "%s null!"); + if (quote) { + n += bputc(buf, '"'); + for (; *s; ++s) n += putquoted(buf, *s, '"', s[1]); + n += bputc(buf, '"'); + } else { + while (*s) n += bputc(buf, *s++); + } + break; + case 'S': /* string ptr + len */ + s = va_arg(ap, const char *); + i = va_arg(ap, uint); + if (quote) { + n += bputc(buf, '"'); + for (; i--; ++s) n += putquoted(buf, *s, '"', i ? s[1] : -1); + n += bputc(buf, '"'); + } else { + iowrite(buf, s, i); + n += i; + } + break; + case 'd': /* decimal */ + base = 10; + Int: + if (base != 10) unsgnd = 1; + i = numlong ? va_arg(ap, vlong) : (unsgnd ? va_arg(ap, uint) : (vlong)va_arg(ap, int)); + tmp2.len = 0; + if (quote) { + switch (base) { + case 2: n += bwriteS(buf, "0b"); break; + case 8: n += bwriteS(buf, "0"); break; + case 16: n += bwriteS(buf, "0x"); break; + } + } + if (!unsgnd && i < 0) { + n += bputc(buf, '-'); + i = -i; + } + n += putuint(prec > 0 ? &tmp2 : buf, i, base, lower); + if (prec > 0) { + int fil = prec - tmp2.len; + while (fil-- > 0) n += bputc(buf, '0'); + iowrite(buf, tmp2.buf, tmp2.len); + } + break; + case 'o': /* octal */ + base = 8; + goto Int; + case 'b': /* binary */ + base = 2; + goto Int; + case 'x': case 'X': /* hexadecimal */ + base = 16; + lower = fmt[-1] == 'x'; + goto Int; + case 'p': /* pointer */ + p = va_arg(ap, void *); + if (!p && quote) { + n += bwriteS(buf, "NULL"); + } else { + n += bwriteS(buf, "0x"); + tmp2.len = 0; + n += putuint(prec > 0 ? &tmp2 : buf, (uvlong)p, 16, 1); + if (prec > 0) { + int fil = prec - tmp2.len; + while (fil-- > 0) n += bputc(buf, '0'); + iowrite(buf, tmp2.buf, tmp2.len); + } + } + break; + case 'f': /* float */ + f = va_arg(ap, double); + n += putdouble(buf, f, prec <= 0 ? 10e6 : prec); + break; + case 't': /* token/tokentag/type */ + switch (*fmt++) { + case 'k': /* tk token */ + tok = va_arg(ap, struct token *); + Tok: + switch (tok->t) { + case TKXXX: + n += bwriteS(buf, "???"); + break; + case TKNUMLIT: + s = (const char *)(getfile(tok->span.sl.file)->p + tok->span.sl.off); + if (quote) n += bputc(buf, '`'); + for (i = tok->span.sl.len; i--; ++s) + if (*s != '\\' && *s != '\n') n += bputc(buf, *s); + if (quote) n += bputc(buf, '\''); + break; + case TKSTRLIT: + n += bfmt(buf, "%'S", tok->s.p, tok->s.n-1); + break; + case TKIDENT: + n += bfmt(buf, "`%s'", tok->ident); + break; + case TKEOF: + n += bwriteS(buf, "<end-of-file>"); + break; + default: + if (quote) n += bputc(buf, '`'); + if (in_range(tok->t, TKWBEGIN_, TKWEND_)) { + n += bfmt(buf, "%s", tok->ident); + } else if (aisprint(tok->t)) { + n += bputc(buf, tok->t); + } else if (tok->t >> 16) { + n += bputc(buf, tok->t >> 0); + n += bputc(buf, tok->t >> 8); + n += bputc(buf, tok->t >> 16); + } else { + n += bputc(buf, tok->t >> 0); + n += bputc(buf, tok->t >> 8); + } + if (quote) n += bputc(buf, '\''); + break; + } + break; + case 't': /* tt token tag */ + tok = &(struct token) { va_arg(ap, int) }; + switch (tok->t) { + case TKNUMLIT: + n += bfmt(buf, "numeric literal"); + break; + case TKSTRLIT: + n += bfmt(buf, "string literal"); + break; + case TKIDENT: + n += bfmt(buf, "identifier"); + break; + case TKEOF: + n += bfmt(buf, "<end-of-file>"); + break; + default: + if (tok->t >= TKWBEGIN_ && tok->t <= TKWEND_) { + static const char *tab[] = { + #define _(kw, c) #kw, + #include "keywords.def" + #undef _ + }; + n += bfmt(buf, "%s", tab[tok->t - TKWBEGIN_]); + break; + } + goto Tok; + } + break; + case 'y': /* ty type */ + ty = va_arg(ap, union type); + n += fmttype(buf, ty, 0); + break; + case 'q': /* tq qualified type */ + ty = va_arg(ap, union type); + q = va_arg(ap, int); + n += fmttype(buf, ty, q); + break; + default: + if (fmt[-1] == ' ' || !aisprint(fmt[-1])) + fmterr("expected format specifier"); + else + fmterr("unknown format specifier 't%c'", fmt[-1]); + } + break; + case 'g': /* graphics rendition (color) */ + if (!ccopt.nocolor) n += bwriteS(buf, "\033["); + while (*fmt++ != '.') { + if (ccopt.nocolor) continue; + n += bputc(buf, fmt[-1]); + } + if (!ccopt.nocolor) n += bputc(buf, 'm'); + break; + default: + if (unsgnd || numlong) { + --fmt; + base = 10; + goto Int; + } + if (fmt[-1] == ' ' || !aisprint(fmt[-1])) + fmterr("expected format specifier"); + else + fmterr("unknown format specifier '%c'", fmt[-1]); + } + if (pad > 0) { /* left pad */ + while (pad-- > buf->len) + n += bputc(out, ' '); + iowrite(out, buf->buf, buf->len); + out->err |= buf->err; + } else if (pad < 0) { /* right pad */ + int len = n - prevn; + while (pad++ < -len) + n += bputc(out, ' '); + } + } + return buf->err ? -1 : n; +} + +int +bfmt(struct wbuf *buf, const char *fmt, ...) +{ + va_list ap; + int ret; + + va_start(ap, fmt); + ret = vbfmt(buf, fmt, ap); + va_end(ap); + return ret; +} + +static uint pagesiz; + +struct memfile +mapopen(const char **err, const char *path) +{ + struct stat stat; + int fd = -1; + void *p = NULL; + struct memfile f = {0}; + uint mapsiz; + + assert("nullp" && err && path); + + if (!pagesiz) pagesiz = sysconf(_SC_PAGESIZE); + *err = NULL; + + if ((fd = open(path, O_RDONLY)) < 0) + goto Err; + if (fstat(fd, &stat) != 0) + goto Err; + + if (!S_ISREG(stat.st_mode)) { + uint cap = 0; + int ret; + + do { + enum { CHUNKSIZ = 1<<16 }; + if ((cap += CHUNKSIZ) < CHUNKSIZ) { + /* overflow */ + free(p); + goto Big; + } + if (!(f.p = p ? realloc(p, cap) : malloc(cap))) { + free(p); + goto Err; + } + p = (void *)f.p; + ret = read(fd, p, CHUNKSIZ); + if (ret >= 0) + f.n += ret; + else if (errno != EAGAIN && errno != EWOULDBLOCK) + goto Err; + } while (ret != 0); + + close(fd); + fd = -1; + mapsiz = alignup(f.n, pagesiz); + if ((f.p = mmap(NULL, mapsiz + pagesiz, PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0)) == MAP_FAILED) { + free(p); + goto Err; + } + memcpy((void *)f.p, p, f.n); + free(p); + mprotect((void *)f.p, mapsiz + pagesiz, PROT_READ); + return f; + } + + if (stat.st_size > UINT_MAX) { + Big: + errno = EFBIG; + goto Err; + } + mapsiz = alignup(stat.st_size, pagesiz); + if ((p = mmap(NULL, mapsiz + pagesiz, PROT_READ, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0)) == MAP_FAILED) + goto Err; + if (mapsiz > 0 && mmap(p, mapsiz, PROT_READ, MAP_FIXED|MAP_PRIVATE, fd, 0) == MAP_FAILED) + goto Err; + + close(fd); + f.p = p; + f.n = stat.st_size; + return f; +Err: + if (fd >= 0) close(fd); + if (!*err) *err = strerror(errno); + return f; +} + +void +_assertfmt(const char *file, int line, const char *func, const char *expr) +{ + ioflush(&bstdout); + efmt("%s:%d: %s: Assertion `%s' failed.\n", file, line, func, expr); + ioflush(&bstderr); +} + +void +mapclose(struct memfile *f) +{ + assert(f->p); + munmap((void *)f->p, alignup(f->n, pagesiz) + pagesiz); + memset(f, 0, sizeof *f); +} + +static struct file { + const char *path; + struct memfile f; + vec_of(uint) lineoffs; +} files[256]; +static int nfiles; + +int +openfile(const char **err, struct memfile **pf, const char *path) +{ + struct file *f; + + assert(nfiles + 1 < arraylength(files) && "too many files"); + f = &files[nfiles]; + f->path = path; + f->f = mapopen(err, path); + if (*err) return -1; + vinit(&f->lineoffs, NULL, 50); + vpush(&f->lineoffs, 0); + *pf = &f->f; + return nfiles++; +} + +const char * +getfilename(int id) +{ + assert(id < nfiles); + return files[id].path; +} + +struct memfile * +getfile(int id) +{ + assert(id < nfiles); + return &files[id].f; +} + +void +addfileline(int id, uint off) +{ + vec_of(uint) *lineoffs; + + assert(id < nfiles); + lineoffs = (void *)&files[id].lineoffs; + if (lineoffs->n) assert(off > lineoffs->p[lineoffs->n-1]); + vpush(lineoffs, off); +} + +void +getfilepos(int *line, int *col, int id, uint off) +{ + uint *offs, n; + int l = 0, h, i = 0; + + assert(id < nfiles); + offs = files[id].lineoffs.p; + n = files[id].lineoffs.n; + h = n - 1; + + /* binary search over offsets array */ + while (l <= h) { + i = (l + h) / 2; + if (offs[i] < off) l = i + 1; + else if (offs[i] > off) h = i - 1; + else break; + } + i -= offs[i] > off; + *line = i + 1; + *col = off - offs[i] + 1; +} + +void +closefile(int id) +{ + assert(id >= 0 && id < nfiles); + mapclose(&files[id].f); +} + +enum diagkind { DGERROR, DGWARN, DGNOTE, }; + +void +vdiag(const struct span *span, enum diagkind kind, const char *fmt, va_list ap) +{ + static const char *label[] = { "error", "warning", "note" }; + static const char *color[] = { "%g1;31.", "%g1;35.", "%g1;36." }; + int line, col; + struct memfile *f; + const struct span0 *loc; + + if (span) { + loc = span->ex.len ? &span->ex : &span->sl; + f = getfile(loc->file); + getfilepos(&line, &col, loc->file, loc->off); + efmt("%s:%d:%d: ", getfilename(loc->file), line, col); + } + efmt(color[kind]); + efmt("%s: %g.", label[kind]); + vbfmt(&bstderr, fmt, ap); + efmt("\n"); + if (span) { + uint i; + int j, nmark; + char mark = '^'; + + /* find start of line */ + for (i = loc->off - 1; i + 1 > 0 && f->p[i] != '\n'; --i) ; + if (i) ++i; + + nmark = loc->len; + while (i < loc->off + loc->len) { + int end; + int curoff = efmt("%5d | ", line); + for (end = 0; f->p[i] != '\n' && i < f->n; ++i, ++end) + ioputc(&bstderr, f->p[i]); + ++i; + ioputc(&bstderr, '\n'); + + for (j = 0; j < curoff + col - 1; ++j) + ioputc(&bstderr, j == curoff-2 ? '|' : ' '); + efmt(color[kind]); + j -= curoff; + do { + ioputc(&bstderr, mark); + mark = '~'; + } while (--nmark && ++j < end); + col = 1; + ++line; + efmt("%g.\n"); + } + ioputc(&bstderr, '\n'); + } + + if (span && loc == &span->ex && span->sl.len) + if (span->ex.file != span->sl.file || !((uint) span->sl.off - span->ex.off < span->ex.len)) + note(&(struct span){ span->sl }, "expanded from here"); +} + +void +fatal(const struct span *span, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vdiag(span, DGERROR, fmt, ap); + va_end(ap); + efmt("Aborting due to previous error.\n"); + exit(1); +} + +int nerror; + +void +error(const struct span *span, const char *fmt, ...) +{ + va_list ap; + + ++nerror; + va_start(ap, fmt); + vdiag(span, DGERROR, fmt, ap); + va_end(ap); +} + +void +warn(const struct span *span, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vdiag(span, DGWARN, fmt, ap); + va_end(ap); +} + +void +note(const struct span *span, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vdiag(span, DGNOTE, fmt, ap); + va_end(ap); +} + +/* vim:set ts=3 sw=3 expandtab: */ @@ -0,0 +1,221 @@ +#include "ir.h" +#include "common.h" + +uchar type2cls[TYARRAY + 1]; +uchar cls2siz[KF8+1]; +const uchar siz2intcls[] = { [1] = KI4, [2] = KI4, [4] = KI4, [8] = KI8 }; + +static vec_of(struct irdat) dats; +struct instr instr[1<<14]; +static int ninstr; +vec_of(struct ircall) calls; + +void +irinit(struct function *fn) +{ + static struct ircall callsbuf[64]; + + ninstr = 0; + vinit(&calls, callsbuf, arraylength(callsbuf)); + if (!type2cls[TYINT]) { + for (int i = TYCHAR; i <= TYUVLONG; ++i) { + int siz = targ_primsizes[i]; + type2cls[i] = siz < 8 ? KI4 : KI8; + } + type2cls[TYFLOAT] = KF4; + type2cls[TYDOUBLE] = KF8; + type2cls[TYPTR] = KIP; + type2cls[TYARRAY] = KIP; + cls2siz[KI4] = cls2siz[KF4] = 4; + cls2siz[KI8] = cls2siz[KF8] = 8; + cls2siz[KIP] = targ_primsizes[TYPTR]; + } + fn->entry = fn->curblk = alloc(&fn->arena, sizeof(struct block), 0); + fn->entry->lprev = fn->entry->lnext = fn->entry; +} + +struct xcon conht[1 << 12]; + +static int +addcon(const struct xcon *con) +{ + uint h = hashb(0, con, sizeof *con); + uint i = h, n = arraylength(conht); + assert(con->issym || con->cls); + for (;; ++i) { + i &= arraylength(conht) - 1; + if (!conht[i].issym && !conht[i].cls) { + conht[i] = *con; + return i; + } else if (!memcmp(&conht[i], con, sizeof *con)) { + return i; + } + assert(--n > 0 && "conht full"); + } +} + +union irref +adddat(struct function *fn, const struct irdat *dat) +{ + assert(dats.n < 1u<<29); + vpush(&dats, *dat); + /* return mkref(RDAT, dats.n - 1); */ +} + +static void +targwrite2(uchar *d, ushort x) +{ + if (targ_bigendian) + d[0] = x >> 8, d[1] = x; + else + d[0] = x, d[1] = x >> 8; +} + +static void +targwrite4(uchar *d, uint x) +{ + if (targ_bigendian) + d[0] = x >> 24, d[1] = x >> 16, d[2] = x >> 8, d[3] = x; + else + d[0] = x, d[1] = x >> 8, d[2] = x >> 16, d[3] = x >> 24; +} + +static void +targwrite8(uchar *d, uvlong x) +{ + if (targ_bigendian) + targwrite4(d, x >> 32), targwrite4(d + 4, x); + else + targwrite4(d, x), targwrite4(d + 4, x >> 32); +} + +void +conputdat(struct irdat *dat, uint off, enum typetag t, const void *src) +{ + uint siz = targ_primsizes[t]; + bool iszero = 1; + uchar *pdat; + assert(off + siz <= dat->siz); + for (uint i = 0; i < siz; ++i) { + if (((uchar *)src) != 0) { + iszero = 0; + break; + } + } + if (iszero && (dat->siz <= 8 || dat->dat.n < off)) + return; + + if (dat->siz > 8) + while (off + siz > dat->dat.n) + vpush(&dat->dat, 0); + pdat = dat->siz <= 8 ? dat->sdat : dat->dat.p; + + switch (siz) { + case 1: pdat[off] = *(uchar *)src; break; + case 2: targwrite2(&pdat[off], *(ushort *)src); break; + case 4: targwrite4(&pdat[off], *(uint *)src); break; + case 8: targwrite8(&pdat[off], *(uvlong *)src); break; + default: assert(0); + } +} + +union irtype +mkirtype(union type t) +{ + assert(t.t != TYVOID); + if (isscalar(t)) return (union irtype) { .cls = type2cls[t.t] }; + assert(isagg(t)); + return (union irtype) { .isagg = 1, .dat = t.dat }; +} + +union irref +mkintcon(struct function *fn, enum irclass k, vlong i) +{ + if (i < 1ll << 28 && i >= -(1ll << 28)) { + return mkref(RICON, i); + } else if (k == KI4) { + struct xcon con = { 0, k, .i4 = i }; + return mkref(RXCON, addcon(&con)); + } else { + struct xcon con = { 0, k, .i8 = i }; + return mkref(RXCON, addcon(&con)); + } +} + +union irref +mkfltcon(struct function *fn, enum irclass k, double f) +{ + struct xcon con = { 0, k }; + if (k == KF4) con.fs = f; + else con.fd = f; + return mkref(RXCON, addcon(&con)); +} + +union irref +mksymref(struct function *fn, const char *s) +{ + struct xcon con = { 1, KIP, .sym = s }; + return mkref(RXCON, addcon(&con)); +} + +union irref +mkcall(struct function *fn, union type fnty, uint narg, union irref *args, union irtype *typs) +{ + const struct typedata *td = &typedata[fnty.dat]; + struct ircall call = { narg, td->variadic ? td->nmemb : -1 }; + + assert(td->variadic ? narg >= td->nmemb : narg == td->nmemb); + if (narg) { + call.args = alloc(&fn->arena, narg*sizeof *args + narg*sizeof(union irtype), 0); + call.typs = (union irtype *)((char *)call.args + narg*sizeof *args); + memcpy(call.args, args, narg*sizeof *args); + memcpy(call.typs, typs, narg*sizeof *typs); + } + vpush(&calls, call); + return mkref(RCALL, calls.n-1); +} + +union irref +addinstr(struct function *fn, struct instr ins) +{ + assert(ninstr < arraylength(instr)); + assert(fn->curblk != NULL); + instr[ninstr] = ins; + vpush(&fn->curblk->ins, ninstr); + return mkref(RTMP, ninstr++); +} + +struct block * +newblk(struct function *fn) +{ + struct block *blk = alloc(&fn->arena, sizeof(struct block), 0); + memset(blk, 0, sizeof *blk); + return blk; +} + +void +useblk(struct function *fn, struct block *blk) +{ + if (fn->curblk) assert(fn->curblk->jmp.t && "never finished block"); + if (blk) assert(!blk->jmp.t && "reusing built block"); + if (!blk->lprev) { /* initialize */ + blk->lnext = fn->entry; + blk->lprev = fn->entry->lprev; + blk->lprev->lnext = blk; + blk->id = blk->lprev->id + 1; + fn->entry->lprev = blk; + } + fn->curblk = blk; +} + +void +putjump(struct function *fn, enum jumpkind j, union irref arg, struct block *t, struct block *f) +{ + fn->curblk->jmp.t = j; + fn->curblk->jmp.arg = arg; + fn->curblk->s1 = t; + fn->curblk->s2 = f; + fn->curblk = NULL; +} + +/* vim:set ts=3 sw=3 expandtab: */ @@ -0,0 +1,117 @@ +#include "common.h" + +enum irclass { + KXXX, + KI4, KI8, KIP, + KF4, KF8, +}; + +#define kisint(k) in_range((k), KI4, KIP) +#define kisflt(k) in_range((k), KF4, KF8) + +union irtype { + struct { ushort isagg : 1, cls : 15; }; + struct { ushort _ : 1, dat : 15; }; + ushort bits; +}; + +struct irdat { + uchar align : 7, mut : 1; + uint siz; + union { + vec_of(uchar) dat; + uchar sdat[8]; + }; + struct symref { + struct symref *next; + const char *sym; + uint off; + vlong addend; + } *syms; +}; + +struct xcon { + bool issym; + uchar cls; + union { + const char *sym; + int i4; + vlong i8; + float fs; + double fd; + }; +}; + +struct ircall { + short narg; + short vararg; /* first variadic arg or -1 */ + union irtype *typs; + union irref *args; +}; + +enum irrefkind { + RNONE, RTMP, RARG, RICON, RXCON, RSYM, RCALL +}; + +union irref { + struct { uint t : 3, idx : 29; }; + struct { signed _0: 3, i : 29; }; /* RICON */ + uint bits; +}; + +enum op { + Oxxx, +#define _(o,...) O##o, +#include "op.def" +#undef _ +}; + +struct instr { + uchar op; + uchar cls; + union irref l; + union irref r; +}; + +enum jumpkind { + JXXX, Jb, Jbcnd, Jret, Jrets, +}; + +struct block { + int id; + struct block *s1, *s2; + struct { uchar t; union irref arg; } jmp; + vec_of(ushort) ins; + struct block *lprev, *lnext; +}; + +struct function { + struct arena *arena; + const char *name; + struct block *entry, *curblk; + union type fnty, retty, *paramty; + uint nblk; + uint nparam; + bool globl; +}; + +extern uchar type2cls[]; +extern uchar cls2siz[]; +extern const uchar siz2intcls[]; +void irinit(struct function *); +#define NOREF ((union irref) {0}) +#define mkref(t, x) ((union irref) {{ (t), (x) }}) +#define mkzerocon() ((union irref){ RICON, 0 }) +union irtype mkirtype(union type); +union irref mkintcon(struct function *, enum irclass, vlong); +union irref mkfltcon(struct function *, enum irclass, double); +union irref mksymref(struct function *, const char *); +void conputdat(struct irdat *, uint off, enum typetag t, const void *dat); +union irref mkcall(struct function *, union type fnty, uint narg, union irref *, union irtype *); +union irref addinstr(struct function *, struct instr); +struct block *newblk(struct function *); +void useblk(struct function *, struct block *); +void putjump(struct function *, enum jumpkind, union irref arg, struct block *t, struct block *f); +void irdump(struct function *, const char *fname); + +/* vim:set ts=3 sw=3 expandtab: */ diff --git a/irdump.c b/irdump.c new file mode 100644 index 0000000..b7fbcc0 --- /dev/null +++ b/irdump.c @@ -0,0 +1,124 @@ +#include "common.h" +#include "ir.h" + +extern struct xcon conht[]; +extern vec_of(struct ircall) calls; + +static const char *clsname[] = { + "?", "i4", "i8", "iP", "f4", "f8" +}; + +static void +prityp(union irtype typ) +{ + if (!typ.isagg) + efmt(clsname[typ.cls]); + else { + const struct typedata *td = &typedata[typ.dat]; + const char *tag = td->t == TYSTRUCT ? "struct" : "union"; + if (ttypenames[td->id]) + efmt("%s.%s.%d", tag, ttypenames[td->id], td->id); + else + efmt("%s.%d", tag, td->id); + } +} + +static void +dumpref(union irref ref) +{ + struct xcon *con; + struct ircall *call; + switch (ref.t) { + case RTMP: efmt("%%%d", ref.idx); break; + case RARG: efmt("%%arg%d", ref.idx); break; + case RICON: efmt("%d", ref.i); break; + case RXCON: + con = &conht[ref.idx]; + if (con->issym) efmt("$%s", con->sym); + else switch (con->cls) { + case KI4: efmt("%d", con->i4); break; + case KI8: efmt("%ld", con->i8); break; + case KIP: efmt("%'x", con->i8); break; + case KF4: efmt("%f", con->fs); break; + case KF8: efmt("%f", con->fd); break; + default: assert(0); + } + break; + case RCALL: + call = &calls.p[ref.idx]; + for (int i = 0; i < call->narg; ++i) { + if (call->vararg == i) { + if (i > 0) efmt(", "); + efmt("..., "); + } + prityp(call->typs[i]); + efmt(" "); + dumpref(call->args[i]); + } + break; + default: assert(!"ref"); + } +} + +extern struct instr instr[]; +static const char *opname[] = { + "???", +#define _(o,...) #o, +#include "op.def" +#undef _ +}; +static const uchar opnarg[] = { + 0, +#define _(o,n) n, +#include "op.def" +#undef _ +}; + +static void +dumpinst(const struct instr *ins) +{ + int i; + efmt(" "); + if (ins->cls) + efmt("%s %%%d = ", clsname[ins->cls], ins - instr); + efmt("%s ", opname[ins->op]); + for (i = 0; i < opnarg[ins->op]; ++i) { + if (i) efmt(", "); + dumpref((&ins->l)[i]); + } + efmt("\n"); +} + +static void +dumpblk(struct block *blk) +{ + static const char *jnames[] = { 0, "b", "b", "ret", "rets" }; + static const uchar jnarg[] = { 0, 0, 1, 0, 1 }; + efmt(" .L%d:\n", blk->id); + for (int i = 0; i < blk->ins.n; ++i) { + dumpinst(&instr[blk->ins.p[i]]); + } + efmt(" %s ", jnames[blk->jmp.t]); + if (jnarg[blk->jmp.t]) { + dumpref(blk->jmp.arg); + if (blk->s1) efmt(", "); + } + if (blk->s1 && blk->s2) efmt(".L%d, .L%d", blk->s1->id, blk->s2->id); + else if (blk->s1) efmt(".L%d", blk->s1->id); + efmt("\n"); +} + +void +irdump(struct function *fn, const char *fname) +{ + struct block *blk; + + efmt("function %s : %ty\n", fname, fn->fnty); + blk = fn->entry; + do { + dumpblk(blk); + blk = blk->lnext; + } while (blk != fn->entry); +} + +/* vim:set ts=3 sw=3 expandtab: */ diff --git a/keywords.def b/keywords.def new file mode 100644 index 0000000..e0bb92f --- /dev/null +++ b/keywords.def @@ -0,0 +1,70 @@ +/* !SORTED */ +_(_Alignas, STDC11) +_(_Alignof, STDC11) +_(_Atomic, STDC11) +_(_BitInt, STDC23) +_(_Bool, STDC99) +_(_Complex, STDC99) +_(_Decimal128, STDC23) +_(_Decimal32, STDC23) +_(_Decimal64, STDC23) +_(_Generic, STDC11) +_(_Imaginary, STDC99) +_(_Noreturn, STDC11) +_(_Static_assert, STDC11) +_(_Thread_local, STDC11) +_(alignas, STDC23) +_(alignof, STDC23) +_(auto, 0) +_(bool, STDC23) +_(break, 0) +_(case, 0) +_(char, 0) +_(const, 0) +_(constexpr, STDC23) +_(continue, 0) +_(default, 0) +_(do, 0) +_(double, 0) +_(else, 0) +_(enum, 0) +_(extern, 0) +_(false, STDC23) +_(float, 0) +_(for, 0) +_(goto, 0) +_(if, 0) +_(inline, STDC99) +_(int, 0) +_(long, 0) +_(nullptr, STDC23) +_(register, 0) +_(restrict, STDC99) +_(return, 0) +_(short, 0) +_(signed, 0) +_(sizeof, 0) +_(static, 0) +_(static_assert, STDC23) +_(struct, 0) +_(switch, 0) +_(thread_local, STDC23) +_(true, STDC23) +_(typedef, 0) +_(typeof, STDC23) +_(typeof_unqual, STDC23) +_(union, 0) +_(unsigned, 0) +_(void, 0) +_(volatile, 0) +_(while, 0) + +#ifndef TKWBEGIN_ +# define TKWBEGIN_ TKW_Alignas +#endif +#ifndef TKWEND_ +# define TKWEND_ TKWwhile +#endif +#ifndef TKWMAXLEN_ +# define TKWMAXLEN_ (sizeof "_Static_assert" - 1) +#endif @@ -0,0 +1,669 @@ +#include "parse.h" +#include <string.h> + +static const char * +intern(const char *s) +{ + static vec_of(char) mem; + static uint ht[1<<10]; + uint h, i, n = arraylength(ht); + + if (!mem.p) vinit(&mem, NULL, 1<<10); + + i = h = hashs(0, s); + for (;; ++i) { + i &= arraylength(ht) - 1; + if (!ht[i]) { + ht[i] = mem.n+1; + return vpushn(&mem, s, strlen(s)+1); + } else if (!strcmp(s, &mem.p[ht[i]-1])) { + return &mem.p[ht[i]-1]; + } + assert(--n > 0 && "intern full"); + } +} + +static void +identkeyword(struct token *tk, const char *s, int n) +{ + static const struct { const char *s; enum toktag t; enum cstd cstd; } kwtab[] = { +#define _(kw, cstd) { #kw, TKW##kw, cstd }, +#include "keywords.def" +#undef _ + }; + int l = 0, h = arraylength(kwtab) - 1, i, cmp; + + if (n > TKWMAXLEN_) goto ident; + /* binary search over sorted array */ + while (l <= h) { + i = (l + h) / 2; + cmp = strcmp(kwtab[i].s, s); + if (cmp < 0) l = i + 1; + else if (cmp > 0) h = i - 1; + else if (kwtab[i].cstd <= ccopt.cstd) { + tk->t = kwtab[i].t; + tk->ident = kwtab[i].s; + return; + } else break; + } +ident: + tk->t = TKIDENT; + tk->ident = intern(s); +} + +static int +next0(struct parser *pr) +{ + bool trigraph = ccopt.trigraph; + int n, c; + + while (!memcmp(pr->dat+pr->idx, "\\\n", n = 2) + || (trigraph && !memcmp(pr->dat+pr->idx, "\?\?/\n", n = 4))) { + pr->idx += n; + addfileline(pr->fileid, pr->idx); + } + if (pr->idx >= pr->ndat) + return TKEOF; + if (trigraph && !memcmp(pr->dat+pr->idx, "??", 2)) { + switch (pr->dat[pr->idx+2]) { + case '=': pr->idx += 3; return '#'; + case '(': pr->idx += 3; return '['; + case ')': pr->idx += 3; return ']'; + case '!': pr->idx += 3; return '|'; + case '<': pr->idx += 3; return '{'; + case '>': pr->idx += 3; return '}'; + case '-': pr->idx += 3; return '~'; + case '/': pr->idx += 3; return '\\'; + case '\'': pr->idx += 3; return '^'; + } + } + if ((c = pr->dat[pr->idx++]) == '\n') { + addfileline(pr->fileid, pr->idx); + } + return c; +} + +static int +next(struct parser *pr) +{ + int c; + + if (pr->npeekchr) { + int c = pr->peekchr[0]; + pr->chridx = pr->peekcidx[0]; + memmove(pr->peekchr, pr->peekchr + 1, --pr->npeekchr * sizeof *pr->peekchr); + memmove(pr->peekcidx, pr->peekcidx + 1, pr->npeekchr * sizeof *pr->peekcidx); + pr->eof = c == TKEOF; + return c; + } + c = next0(pr); + pr->eof = c == TKEOF; + pr->chridx = pr->idx; + return c; +} + +static int +peek(struct parser *pr, int off) +{ + assert(off < arraylength(pr->peekchr)); + while (pr->npeekchr < off+1) { + pr->peekchr[pr->npeekchr] = next0(pr); + pr->peekcidx[pr->npeekchr++] = pr->idx; + } + return pr->peekchr[off]; +} + +static bool +match(struct parser *pr, int c) +{ + if (!pr->eof && peek(pr, 0) == c) { + next(pr); + return 1; + } + return 0; +} + +static bool +aissep(int c) { + if (!aisprint(c) || aisspace(c)) + return 1; + switch (c) + case '(': case ')': case '[': case ']': + case '{': case '}': case '.': case ',': + case ';': case '?': case '+': case '-': + case '*': case '/': case '&': case '|': + case '^': case '~': case '=': case '\'': + case '"': case '<': case '>': case ':': + case '@': case '#': case '%': case '\\': + case '`': + return 1; + return 0; +} + +static void +strtonum(struct token *tk, char *s) +{ + extern int sscanf(const char *, const char *, ...); + extern uvlong strtoull(const char *, char **, int); + char *suffix; + + tk->ty = TYXXX; + if (strchr(s, '.')) { + /* float literal */ + int n; + + if (!sscanf(s, "%lf%n", &tk->f, &n)) + return; + suffix = s + n; + tk->ty = TYDOUBLE; + } else { + tk->u = strtoull(s, &suffix, 0); + if (suffix == s) + return; + /* XXX proper int lit types */ + tk->ty = TYINT; + } + if (!*suffix) return; + + for (s = suffix; *s; ++s) + *s |= 0x20; /* make lowercase */ + if (tk->ty == TYDOUBLE) { + if (!strcmp(suffix, "f")) { + tk->ty = TYFLOAT; + tk->f = (float) tk->f; + } else tk->ty = TYXXX; + } else { + if (!strcmp(suffix, "u")) tk->ty = TYUINT; + else if (!strcmp(suffix, "ul")) tk->ty = TYULONG; + else if (!strcmp(suffix, "lu")) tk->ty = TYULONG; + else if (!strcmp(suffix, "ull")) tk->ty = TYUVLONG; + else if (!strcmp(suffix, "llu")) tk->ty = TYUVLONG; + else if (!strcmp(suffix, "ll")) tk->ty = TYVLONG; + else if (!strcmp(suffix, "l")) tk->ty = TYLONG; + else tk->ty = TYXXX; + } +} + +static void +readstrchrlit(struct parser *pr, struct token *tk, char delim) +{ + int c, i; + uchar tmp[80]; + vec_of(uchar) b = VINIT(tmp, sizeof tmp); + struct span span = {0}; + uint n, idx = pr->chridx; + + while ((c = next(pr)) != delim) { + if (c == '\n' || c == TKEOF) { + Noterm: + span.sl = (struct span0) { idx, pr->chridx - idx, pr->fileid }; + error(&span, "missing terminating %c character", delim); + break; + } else if (c == '\\') { + span.sl = (struct span0) { idx, pr->chridx - idx, pr->fileid }; + switch (c = next(pr)) { + case '\n': case TKEOF: + goto Noterm; + case '\'': c = '\''; break; + case '\\': c = '\\'; break; + case '"': c = '"'; break; + case '?': c = '?'; break; + case 'a': c = '\a'; break; + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + case 'v': c = '\v'; break; + case 'x': case 'X': /* hex */ + n = 0; + if (!aisxdigit(peek(pr, 0))) goto Badescseq; + do { + c = next(pr); + if (c-'0' < 10) n = n<<4 | (c-'0'); + else n = n<<4 | (10 + (c|0x20)-'a'); + } while (aisxdigit(peek(pr, 0))); + if (n > 0xFF) { + span.sl.len = pr->chridx - span.sl.off; + error(&span, "hex escape sequence out of range"); + } + c = n & 0xFF; + break; + default: + if (aisodigit(c)) { /* octal */ + n = c-'0'; + for (i = 2; i--;) { + if (!aisodigit(peek(pr, 0))) break; + n = n<<3 | ((c = next(pr))-'0'); + } + if (n > 0377) { + span.sl.len = pr->chridx - span.sl.off; + error(&span, "hex escape sequence out of range"); + } + c = n; + break; + } + Badescseq: + span.sl.len = pr->chridx - span.sl.off; + error(&span, "invalid escape sequence"); + } + } + vpush(&b, c); + idx = pr->chridx;; + } + if (delim == '"') { + vpush(&b, 0); + tk->t = TKSTRLIT; + tk->s.p = alloc(&pr->exarena, b.n, 1); + memcpy(tk->s.p, b.p, tk->s.n = b.n); + } else { + if (b.n == 0) { + span.sl = (struct span0) { idx, pr->chridx - idx, pr->fileid }; + error(&span, "empty character literal"); + } else if (b.n > targ_primsizes[TYINT]) { + span.sl = (struct span0) { idx, pr->chridx - idx, pr->fileid }; + error(&span, "multicharacter literal too long"); + } + tk->t = TKNUMLIT; + tk->ty = TYINT; + tk->u = 0; + for (i = 0; i < b.n; ++i) + tk->u = tk->u<<8 | b.p[i]; + } + vfree(&b); +} + +static int +lex0(struct parser *pr, struct token *tk) +{ + int idx, c; + +#define RET(t_) do { tk->t = (t_); goto End; } while (0) + +Begin: + idx = pr->chridx; + switch (c = next(pr)) { + case ' ': case '\r': case '\t': + goto Begin; + break; + case '!': case '(': case ')': case ',': + case ':': case ';': case '?': case '[': + case ']': case '{': case '}': case '~': + case '$': case '@': case '`': case '\\': case TKEOF: case '\n': + RET(c); + case '#': + if (match(pr, '#')) RET(TKPPCAT); + RET(c); + case '+': + if (match(pr, '+')) RET(TKINC); + if (match(pr, '=')) RET(TKSETADD); + RET(c); + case '-': + if (match(pr, '-')) RET(TKDEC); + if (match(pr, '=')) RET(TKSETSUB); + if (match(pr, '>')) RET(TKARROW); + RET(c); + case '*': + if (match(pr, '=')) RET(TKSETMUL); + RET(c); + case '/': + if (match(pr, '=')) RET(TKSETDIV); + if (match(pr, '/')) { + /* // comment */ + while (!pr->eof && !match(pr, '\n')) + next(pr); + goto Begin; + } + if (match(pr, '*')) { + /* comment */ + while (peek(pr, 0) != '*' || peek(pr, 1) != '/') { + if (next(pr) == TKEOF) { + struct span span = {{ idx, pr->chridx - idx, pr->fileid }}; + fatal(&span, "unterminated multiline comment"); + } + } + next(pr), next(pr); + goto Begin; + } + RET(c); + case '%': + if (match(pr, '=')) RET(TKSETMOD); + RET(c); + case '^': + if (match(pr, '=')) RET(TKSETXOR); + RET(c); + case '=': + if (match(pr, '=')) RET(TKEQU); + RET(c); + case '<': + if (match(pr, '=')) RET(TKLTE); + if (match(pr, '<')) RET(match(pr, '=') ? TKSETSHL : TKSHL); + RET(c); + case '>': + if (match(pr, '=')) RET(TKGTE); + if (match(pr, '>')) RET(match(pr, '=') ? TKSETSHR : TKSHR); + RET(c); + case '&': + if (match(pr, '&')) RET(TKLOGAND); + if (match(pr, '=')) RET(TKSETAND); + RET(c); + case '|': + if (match(pr, '|')) RET(TKLOGIOR); + if (match(pr, '=')) RET(TKSETIOR); + RET(c); + case '.': + if (peek(pr, 0) == '.' && peek(pr, 1) == '.') { + next(pr), next(pr); + RET(TKDOTS); + } + RET(c); + case '\'': + case '"': + readstrchrlit(pr, tk, c); + goto End; + default: + if (aisdigit(c)) { + char tmp[70]; + int n = 0; + tmp[n++] = c; + while (!aissep(c = peek(pr, 0)) || c == '.' || ((tmp[n-1]|0x20) == 'e' && (c == '+' || c == '-'))) { + assert(n < arraylength(tmp)-1 && "too big"); + tmp[n++] = next(pr); + } + tmp[n] = 0; + strtonum(tk, tmp); + RET(TKNUMLIT); + } else if (c == '_' || aisalpha(c)) { + char tmp[70]; + int n = 0; + tmp[n++] = c; + while (!aissep(c = peek(pr, 0))) { + assert(n < arraylength(tmp)-1 && "too big"); + tmp[n++] = next(pr); + } + tmp[n] = 0; + identkeyword(tk, tmp, n); + goto End; + } + } + fatal(&(struct span) {{ idx, pr->chridx - idx, pr->fileid }}, "unexpected character %'c at %d", c, idx); +End: + tk->span.sl.file = pr->fileid; + tk->span.sl.off = idx; + tk->span.sl.len = pr->chridx - idx; + tk->span.ex.file = pr->fileid; + tk->span.ex.off = idx; + tk->span.ex.len = pr->chridx - idx; + return tk->t; +#undef RET +} + +/****************/ +/* PREPROCESSOR */ +/****************/ + +#define isppident(tk) ((tk).t == TKIDENT || in_range((tk).t, TKWBEGIN_, TKWEND_)) + +static vec_of(struct macro) macros; +static ushort macroht[1<<10]; + +static struct macro * +findmac(const char *name) +{ + uint h, i, n = arraylength(macroht); + + i = h = ptrhash(name); + for (; n--; ++i) { + i &= arraylength(macroht) - 1; + if (!macroht[i]) { + return NULL; + } else if (macros.p[macroht[i]-1].name == name) { + return ¯os.p[macroht[i]-1]; + } + } + return NULL; +} + +static void +freemac(struct macro *mac) +{ + free(mac->param); + free(mac->rlist.tk); +} + +static bool +tokequ(const struct token *a, const struct token *b) +{ + char tmpbuf[100]; + struct wbuf tmp = MEMBUF(tmpbuf, sizeof tmpbuf); + if (a->t != b->t) return 0; + if (a->t == TKNUMLIT) { + const char *s1 = tmp.buf, *s2; + int n1, n2; + + if (a->ty != b->ty) return 0; + n1 = bfmt(&tmp, "%tk", a); + s2 = tmp.buf + tmp.len; + n2 = bfmt(&tmp, "%tk", b); + if (tmp.err) return 0; + return n1 == n2 && !memcmp(s1, s2, n1); + } else if (a->t == TKIDENT) { + return a->ident == b->ident; + } else if (a->t == TKSTRLIT) { + return a->s.n == b->s.n && !memcmp(a->s.p, b->s.p, a->s.n); + } + return 1; +} + +static bool /* whitespace separating tokens? */ +wsseparated(const struct token *l, const struct token *r) +{ + assert(l->span.sl.file == r->span.sl.file); + return l->span.sl.off + l->span.sl.len < r->span.sl.off; +} + +static bool +macroequ(const struct macro *a, const struct macro *b) +{ + int i; + if (a->name != b->name) return 0; + if (a->fnlike != b->fnlike || a->variadic != b->variadic) return 0; + if (a->fnlike) { + if (a->nparam != b->nparam) return 0; + for (i = 0; i < a->nparam; ++i) + if (a->param[i] != b->param[i]) + return 0; + } + if (a->rlist.n != b->rlist.n) return 0; + for (i = 0; i < a->rlist.n; ++i) { + struct token *tka = a->rlist.tk, *tkb = b->rlist.tk; + if (!tokequ(&tka[i], &tkb[i])) + return 0; + if (i && wsseparated(&tka[i-1], &tka[i]) != wsseparated(&tkb[i-1], &tkb[i])) + return 0; + } + return 1; +} + +static struct macro * +putmac(struct macro *mac) +{ + uint h, i, n = arraylength(macroht); + struct macro *slot; + + i = h = ptrhash(mac->name); + for (;; ++i) { + i &= arraylength(macroht) - 1; + if (!macroht[i]) { + macroht[i] = macros.n+1; + vpush(¯os, *mac); + return ¯os.p[macros.n - 1]; + } else if ((slot = ¯os.p[macroht[i]-1])->name == mac->name) { + if (!macroequ(slot, mac)) { + warn(&(struct span){mac->span}, "redefining macro"); + note(&(struct span){slot->span}, "previous definition:"); + freemac(slot); + *slot = *mac; + } else { + freemac(mac); + } + return slot; + } + assert(--n && "macro limit"); + } +} + +static void +ppskipline(struct parser *pr) +{ + while (peek(pr, 0) != '\n' && peek(pr, 0 != TKEOF)) + next(pr); +} + +static void +ppdefine(struct parser *pr) +{ + struct token tk0, tk; + struct macro mac = {0}; + vec_of(struct token) rlist = {0}; + + lex0(pr, &tk0); + if (!isppident(tk0)) { + error(&tk0.span, "macro name missing"); + ppskipline(pr); + return; + } + mac.name = tk0.ident; + mac.span = tk0.span.sl; + + if (peek(pr, 0) != '(') { + mac.fnlike = 1; + } + + while (lex0(pr, &tk) != '\n' && tk.t != TKEOF) { + if (!wsseparated(&tk0, &tk)) + warn(&tk.span, "no whitespace after macro name"); + vpush(&rlist, tk); + } + mac.rlist.tk = rlist.p; + mac.rlist.n = rlist.n; + putmac(&mac); +} + +static struct macrostack mstk[64], *mfreelist; +static bool +tryexpand(struct parser *pr, const struct token *tk) +{ + static bool inimstk; + struct macro *mac; + struct macrostack *l; + int macidx; + if (!inimstk) { + inimstk = 1; + for (int i = 0; i < arraylength(mstk); ++i) { + mstk[i].link = mfreelist; + mfreelist = &mstk[i]; + } + } + + if (!isppident(*tk) || !(mac = findmac(tk->ident))) + return 0; + + macidx = mac - macros.p; + /* prevent infinite recursion */ + for (l = pr->macstk; l; l = l->link) + if (l->mac == macidx) + return 0; + + if (mac->fnlike) { + } + if (mac->rlist.n) { + if (!(l = mfreelist)) fatal(&tk->span, "macro depth limit reached"); + l = mfreelist; + mfreelist = l->link; + l->link = pr->macstk; + l->mac = macidx; + l->idx = 0; + l->exspan = tk->span.ex; + pr->macstk = l; + } + return 1; +} + +static void +popmac(struct parser *pr) +{ + struct macrostack *stk; + + assert(stk = pr->macstk); + do { + pr->macstk = stk->link; + stk->link = mfreelist; + mfreelist = stk; + } while ((stk = pr->macstk) && stk->idx >= macros.p[stk->mac].rlist.n); +} + +int +lex(struct parser *pr, struct token *tk_) +{ + struct token tkx[1], *tk; + int t; + bool linebegin = 0; + + assert(tk_ != &pr->peektok); + tk = tk_ ? tk_ : tkx; + if (pr->peektok.t) { + *tk = pr->peektok; + memset(&pr->peektok, 0, sizeof pr->peektok); + return tk->t; + } + + if (pr->macstk) { + struct macro *mac = ¯os.p[pr->macstk->mac]; + struct rlist rl = mac->rlist; + *tk = rl.tk[pr->macstk->idx++]; + assert(tk->t); + tk->span.ex = pr->macstk->exspan; + if (tryexpand(pr, tk)) + return lex(pr, tk_); + if (pr->macstk->idx == rl.n) + popmac(pr); + return tk->t; + } + + for (;;) { + while ((t = lex0(pr, tk)) == '\n') linebegin = 1; + if (t == '#' && linebegin) { + if (lex0(pr, tk) == '\n') break; + else if (tk->t == TKIDENT && !strcmp(tk->ident, "define")) + ppdefine(pr); + else { + error(&tk->span, "invalid preprocessor directive"); + ppskipline(pr); + } + } else { + if (tryexpand(pr, tk)) + return lex(pr, tk_); + return t; + } + } + assert(0); +} + +int +lexpeek(struct parser *pr, struct token *tk_) +{ + struct token tkx[1], *tk; + uint t; + + tk = tk_ ? tk_ : tkx; + if ((t = pr->peektok.t)) { + *tk = pr->peektok; + return t; + } + t = lex(pr, tk); + pr->peektok = *tk; + return t; +} + +/* vim:set ts=3 sw=3 expandtab: */ @@ -0,0 +1,27 @@ +#include "parse.h" +#include <stdlib.h> + +struct option ccopt; + +static void +flushstd(void) +{ + ioflush(&bstdout); + ioflush(&bstderr); +} + +int +main(int argc, char **argv) +{ + struct parser pr; + + atexit(flushstd); + if (argc < 2) { + efmt("usage: %s [options] <file>\n", *argv); + return 1; + } + + targ_init("amd64-sysv"); + initparser(&pr, argv[1]); + parse(&pr); +} @@ -0,0 +1,117 @@ +#include "common.h" +#include <stdlib.h> +#include <errno.h> +#include <stdint.h> + +#define ALLOCERR(f) do { \ + efmt("%s: %s\n", f, strerror(errno)); \ + ioflush(&bstdout); \ + ioflush(&bstderr); \ + abort(); \ +} while (0) + +static void * +xcalloc(size_t n, const char *f) +{ + void *p = calloc(n, 1); + if (!p) ALLOCERR(f); + return p; +} + +static void * +xrealloc(void *p, size_t n, const char *f) +{ + p = p ? realloc(p, n) : malloc(n); + if (!p) ALLOCERR(f); + return p; +} + +/* vec: when _cap < 0, buf is dynamic allocated, otherwise an user provided buf */ + +void +vinit_(void **p, int *pcap, void *inlbuf, int cap, uint siz) +{ + assert(!*p); + *pcap = cap; + if (inlbuf) { + *p = inlbuf; + } else if (cap) { + *p = xrealloc(0, cap*siz, "vinit"); + *pcap = -cap; + } +} + +void +vpush_(void **p, int *pcap, uint *pn, uint siz) +{ + if (*pn == *pcap) { /* empty or inline buffer */ + int cap = *pcap ? *pcap * 2 : 8; + *p = xrealloc(NULL, cap * siz, "vpush"); + *pcap = -cap; + } else if (*pn == -*pcap) { /* dyn buf */ + *p = xrealloc(*p, -(*pcap *= 2) * siz, "vpush"); + } + assert(-(volatile int)*pcap > *pn && "overflow"); +} + +void * +vpushn_(void **p, int *pcap, uint *pn, uint siz, const void *dat, uint ndat) +{ + void *beg; + + for (uint i = 0; i < ndat; ++i) + vpush_(p, pcap, pn, siz); + beg = (char *)*p + *pn * siz; + memcpy(beg, dat, ndat * siz); + *pn += ndat; + return beg; +} + +struct arena * +newarena(uint chunksiz) +{ + struct arena *ar = xcalloc(offsetof(struct arena, mem) + chunksiz, "newarena"); + assert(chunksiz < 1u<<31 && "toobig"); + ar->cap = chunksiz; + ar->dyn = 1; + return ar; +} + +void * +alloc(struct arena **par, uint siz, uint align) +{ + uint idx; + struct arena *new; + + if (siz > (*par)->cap) { + new = newarena(siz); + new->n = siz; + new->prev = (*par)->prev; + (*par)->prev = new; + return new->mem; + } + align = align ? align : sizeof(void *); + idx = (uchar *)alignup((uintptr_t)&(*par)->mem[(*par)->n], align) - (*par)->mem; + if ((*par)->cap - idx >= siz) { + (*par)->n = idx + siz; + return (*par)->mem + idx; + } + new = newarena((*par)->cap); + new->prev = *par; + *par = new; + new->n = siz; + return new->mem; +} + +void +freearena(struct arena *ar) +{ + struct arena *prev; + for (; ar; ar = prev) { + prev = ar->prev; + if (ar->dyn) + free(ar); + } +} + +/* vim:set ts=3 sw=3 expandtab: */ @@ -0,0 +1,60 @@ +/* OP NARG */ +_(mov, 1) +_(neg, 1) +_(not, 1) +_(cvtf4s, 1) +_(cvtf4u, 1) +_(cvtf4f8, 1) +_(cvtf8s, 1) +_(cvtf8u, 1) +_(cvtf8f4, 1) +_(cvts4f, 1) +_(cvtu4f, 1) +_(cvts8f, 1) +_(cvtu8f, 1) +_(extu1, 1) +_(extu2, 1) +_(extu4, 1) +_(exts1, 1) +_(exts2, 1) +_(exts4, 1) +_(add, 2) +_(sub, 2) +_(mul, 2) +_(umul, 2) +_(div, 2) +_(udiv, 2) +_(rem, 2) +_(urem, 2) +_(and, 2) +_(ior, 2) +_(xor, 2) +_(shl, 2) +_(slr, 2) +_(sar, 2) +_(shr, 2) +_(equ, 2) +_(neq, 2) +_(lth, 2) +_(lte, 2) +_(ulth, 2) +_(ulte, 2) +_(alloca1, 1) +_(alloca2, 1) +_(alloca4, 1) +_(alloca8, 1) +_(alloca16, 1) +_(loads1, 1) +_(loadu1, 1) +_(loads2, 1) +_(loadu2, 1) +_(loads4, 1) +_(loadu4, 1) +_(loadf4, 1) +_(loadi8, 1) +_(loadf8, 1) +_(store1, 2) +_(store2, 2) +_(store4, 2) +_(store8, 2) +_(call, 2) @@ -0,0 +1,2104 @@ +#include "parse.h" +#include "common.h" +#include "ir.h" + +static struct env toplevel; +static struct arena *tlarena; + +#define peek(Pr,Tk) lexpeek(Pr,Tk) + +void +initparser(struct parser *pr, const char *file) +{ + const char *error; + struct memfile *f; + + memset(pr, 0, sizeof *pr); + pr->fileid = openfile(&error, &f, file); + if (pr->fileid < 0) + fatal(NULL, "Cannot open %'s: %s", file, error); + pr->dat = f->p; + pr->ndat = f->n; +} + +static struct decl *finddecl(struct parser *pr, const char *name); + +static bool +isdecltok(struct parser *pr) +{ + struct decl *decl; + struct token tk; + switch (peek(pr, &tk)) { + case TKWsigned: case TKWunsigned: case TKWshort: case TKWlong: + case TKWint: case TKWchar: case TKW_Bool: case TKWauto: + case TKWstruct: case TKWunion: case TKWenum: case TKWtypedef: + case TKWextern: case TKWstatic: case TKWinline: case TKW_Noreturn: + case TKWconst: case TKWvolatile: case TKWvoid: case TKWfloat: + case TKWdouble: + return 1; + case TKIDENT: + return (decl = finddecl(pr, tk.ident)) && decl->scls == SCTYPEDEF; + } + return 0; +} + +static bool +match(struct parser *pr, struct token *tk, enum toktag t) +{ + if (peek(pr, NULL) == t) { + lex(pr, tk); + return 1; + } + return 0; +} + +static bool +expect(struct parser *pr, enum toktag t, const char *s) +{ + struct token tk; + if (!match(pr, &tk, t)) { + peek(pr, &tk); + if (aisprint(t)) tk.span.ex.len = tk.span.sl.len = 1; + error(&tk.span, "expected %'tt%s%s", t, s?" ":"",s ? s : ""); + return 0; + } + return 1; +} + +static struct token +expectdie(struct parser *pr, enum toktag t, const char *s) +{ + struct token tk; + if (!match(pr, &tk, t)) + fatal(&tk.span, "expected %'tt%s%s", t, s?" ":"",s ? s : ""); + return tk; +} + +enum declkind { + DTOPLEVEL, + DFUNCPARAM, + DFUNCVAR, + DFIELD, + DCASTEXPR, +}; + +struct declstate { + enum declkind kind; + union type base; + enum storageclass scls; + enum qualifier qual; + uint align; + bool more, varini, funcdef, tagdecl; + const char **pnames; +}; + +static struct decl pdecl(struct declstate *st, struct parser *pr); + +/*******/ +/* ENV */ +/*******/ + +static void +envdown(struct parser *pr) +{ + struct env *e = alloc(&pr->fnarena, sizeof *e, 0); + e->decls = NULL; + e->tagged = NULL; + e->up = pr->env; + pr->env = e; +} + +static void +envup(struct parser *pr) +{ + assert(pr->env->up); + pr->env = pr->env->up; +} + + +static bool +redeclarationok(const struct decl *old, const struct decl *new) +{ + if (old->scls != new->scls) return 0; + switch (old->scls) { + case SCTYPEDEF: + return old->t.bits == new->t.bits; + } + return 0; +} + +static struct decl * +putdecl(struct parser *pr, const struct decl *decl) +{ + struct decls *l; + for (l = pr->env->decls; l; l = l->prev) { + if (decl->name == l->decl.name && !redeclarationok(&l->decl, decl)) { + error(&decl->span, "incompatible redeclaration of '%s'", decl->name); + note(&l->decl.span, "previously declared here"); + } + } + l = alloc(pr->env->up ? &pr->fnarena : &tlarena, sizeof *l, 0); + l->decl = *decl; + l->prev = pr->env->decls; + pr->env->decls = l; + return &l->decl; +} + +static struct decl * +finddecl(struct parser *pr, const char *name) +{ + struct env *e; + struct decls *l; + for (e = pr->env; e; e = e->up) { + for (l = e->decls; l; l = l->prev) { + if (name == l->decl.name) { + return &l->decl; + } + } + } + return NULL; +} + +static union type +gettagged(struct parser *pr, struct span *span, enum typetag tt, const char *name, bool dodef) +{ + struct env *e; + struct tagged *l; + struct typedata td = {0}; + for (e = pr->env; e; e = e->up) { + for (l = e->tagged; l; l = l->prev) { + if (name == ttypenames[typedata[l->t.dat].id]) { + if (dodef && e != pr->env) + goto Break2; + *span = l->span; + return l->t; + } + } + } +Break2: + if (tt == TYENUM) + return mktype(0); + l = alloc(pr->env->up ? &pr->fnarena : &tlarena, sizeof *l, 0); + l->prev = pr->env->tagged; + pr->env->tagged = l; + l->span = *span; + td.t = tt; + return l->t = mktagtype(name, &td); +} + +static union type +deftagged(struct parser *pr, struct span *span, enum typetag tt, const char *name) +{ + struct tagged *l; + struct typedata td = {0}; + for (l = pr->env->tagged; l; l = l->prev) { + if (name == ttypenames[typedata[l->t.dat].id]) { + *span = l->span; + return l->t; + } + } + l = alloc(pr->env->up ? &pr->fnarena : &tlarena, sizeof *l, 0); + l->prev = pr->env->tagged; + pr->env->tagged = l; + l->span = *span; + td.t = tt; + return l->t = mktagtype(name, &td); +} + +/********/ +/* EXPR */ +/********/ + +#define iszero(ex) ((ex).t == ENUMLIT && (ex).u == 0) + +static bool +assigncheck(union type t, const struct expr *src) +{ + if (assigncompat(t, typedecay(src->ty))) return 1; + if (t.t == TYPTR && iszero(*src)) return 1; + return 0; +} + +#define mkexpr(t_,span_,ty_,...) ((struct expr){.t=(t_), .ty=(ty_), .span=(span_), __VA_ARGS__}) + +static struct expr * +exprdup(struct parser *pr, const struct expr *e) +{ + return memcpy(alloc(&pr->exarena, sizeof *e, 0), e, sizeof *e); +} +static struct expr * +exprdup2(struct parser *pr, const struct expr *e1, const struct expr *e2) +{ + struct expr *r = alloc(&pr->exarena, 2*sizeof *r, 0); + r[0] = *e1; + r[1] = *e2; + return r; +} + +static struct expr expr(struct parser *pr); +static struct expr commaexpr(struct parser *pr); + +/* TODO recursive descent is probably slow, use precedence climbing? */ + +static bool +islvalue(const struct expr *ex) +{ + if (ex->t == EGETF) return islvalue(ex->sub); + return ex->t == ESYM || ex->t == EDEREF; +} + +static union type +argpromote(union type t) +{ + if (isint(t)) t.t = intpromote(t.t); + else if (t.t == TYFLOAT) t.t = TYDOUBLE; + return t; +} + +static void +postfixops(struct parser *pr, struct expr *lhs) +{ + struct expr ex, tmp; + struct span span; + struct token tk; + union type ty; + + for (;;) + switch (peek(pr, &tk)) { + default: return; + case TKINC: + case TKDEC: + lex(pr, &tk); + if (!isscalar(lhs->ty)) + error(&lhs->span, "invalid operand to postfix %tt (%ty)", &tk, lhs->ty); + span = lhs->span; + if (!joinspan(&span.ex, tk.span.ex)) span = tk.span; + if (lhs->ty.t == TYPTR && isincomplete(typechild(lhs->ty))) + error(&span, "arithmetic on pointer to incomplete type (%ty)", lhs->ty); + else if (lhs->ty.t == TYPTR && typechild(lhs->ty).t == TYFUNC) + error(&span, "arithmetic on function pointer", &tk, lhs->ty); + *lhs = mkexpr(tk.t == TKINC ? EPOSTINC : EPOSTDEC, span, lhs->ty, .sub = exprdup(pr, lhs)); + break; + case '[': + lex(pr, NULL); + ex = commaexpr(pr); + span = lhs->span; + if (!joinspan(&span.ex, tk.span.ex) || !joinspan(&span.ex, ex.span.ex) + || (peek(pr, &tk), !joinspan(&span.ex, tk.span.ex))) + span = tk.span; + expect(pr, ']', NULL); + + if (isint(lhs->ty) && isptrcvt(ex.ty)) { + /* swap idx[ptr] -> ptr[idx] */ + tmp = *lhs; + *lhs = ex; + ex = tmp; + } + + if (lhs->ty.t == TYPTR || lhs->ty.t == TYARRAY) { + if (isincomplete(ty = typechild(lhs->ty))) + error(&span, "cannot dereference pointer to incomplete type (%ty)", ty); + else if (ty.t == TYFUNC) + error(&span, "subscripted value is pointer to function"); + } else { + error(&lhs->span, "subscripted value is not pointer-convertible (%ty)", ex.ty); + ty = mktype(TYINT); + } + if (!isint(ex.ty)) + error(&ex.span, "array subscript is not integer (%ty)", ex.ty); + if (!iszero(ex)) { + ex.sub = exprdup2(pr, lhs, &ex); + ex.t = EADD; + ex.span = span; + ex.ty = typedecay(lhs->ty); + } + ex.sub = exprdup(pr, iszero(ex) ? lhs : &ex); + ex.span = span; + ex.t = EDEREF; + ex.ty = ty; + *lhs = ex; + break; + case '(': + span = lhs->span; + lex(pr, &tk); + if (lhs->ty.t == TYPTR) /* auto-deref when calling a function pointer */ + *lhs = mkexpr(EDEREF, lhs->span, typechild(lhs->ty), .sub = exprdup(pr, lhs)); + ty = lhs->ty; + if (ty.t != TYFUNC) error(&lhs->span, "calling a value of type '%ty'", lhs->ty); + { + const struct typedata *td = &typedata[ty.dat]; + struct expr argbuf[10]; + vec_of(struct expr) args = VINIT(argbuf, arraylength(argbuf)); + struct span spanbck = tk.span; + bool spanok = joinspan(&span.ex, tk.span.ex); + if (!match(pr, &tk, ')')) for (;;) { + ex = expr(pr); + spanok = spanok && joinspan(&span.ex, ex.span.ex); + if (ty.t == TYFUNC && args.n == td->nmemb && !td->variadic && !td->kandr) + error(&ex.span, "too many args to function taking %d params", td->nmemb); + if (ty.t == TYFUNC && args.n < td->nmemb && !td->kandr) { + if (!assigncheck(td->param[args.n], &ex)) + error(&ex.span, "passing arg of type '%ty' is incompatible with '%ty'", + ex.ty, td->param[args.n]); + } + vpush(&args, ex); + if (match(pr, &tk, ',')) { + spanok = spanok && joinspan(&span.ex, tk.span.ex); + } else if (expect(pr, ')', "or ',' after arg")) { + break; + } + } + if (!spanok || !joinspan(&span.ex, tk.span.ex)) span = spanbck; + ex = mkexpr(ECALL, span, ty.t == TYFUNC ? td->ret : ty, .narg = args.n, + .sub = alloc(&pr->exarena, (args.n+1)*sizeof(struct expr), 0)); + ex.sub[0] = *lhs; + memcpy(ex.sub+1, args.p, args.n*sizeof(struct expr)); + *lhs = ex; + vfree(&args); + } + break; + } +} + +static struct expr +primaryex(struct parser *pr) +{ + struct token tk; + struct decl *decl; + struct expr ex; + + switch (lex(pr, &tk)) { + case TKNUMLIT: + if (!tk.ty) + error(&tk.span, "invalid number literal %'tk", &tk); + ex = mkexpr(ENUMLIT, tk.span, mktype(tk.ty), .u = tk.u); + break; + case TKSTRLIT: + ++tk.s.n; + ex = mkexpr(ESTRLIT, tk.span, mkarrtype(mktype(TYCHAR), 0, tk.s.n), .s = tk.s); + break; + case TKIDENT: + decl = finddecl(pr, tk.ident); + if (!decl) { + error(&tk.span, "undeclared identifier %'tk", &tk); + ex = mkexpr(ESYM, tk.span, mktype(TYINT), .sym = NULL); + } else if (decl->scls == SCTYPEDEF) { + error(&tk.span, "unexpected typename %'tk (expected expression)", &tk); + ex = mkexpr(ESYM, tk.span, decl->t, .sym = NULL); + } else { + ex = mkexpr(ESYM, tk.span, decl->t, .qual = decl->qual, .sym = decl); + } + break; + default: + fatal(&tk.span, "expected expression (near %'tk)", &tk); + } + postfixops(pr, &ex); + return ex; +} + +static bool +castcheck(union type to, const struct expr *ex) +{ + union type src = ex->ty; + if (to.t == TYVOID) return 1; + if (isagg(to)) return 0; + if (to.bits == src.bits) return 1; + if (isarith(to) && isarith(src)) return 1; + if (isint(to) && isptrcvt(src)) return 1; + if (to.t == TYPTR && isint(src)) return 1; + if (to.t == TYPTR && isptrcvt(src)) return 1; + return 0; +} + +static struct expr +unaryex(struct parser *pr) +{ + enum exprkind ek; + struct token tk; + struct span span; + struct expr ex; + union type ty; + uint siz; + + switch (peek(pr, &tk)) { + default: return primaryex(pr); + case '+': + ek = EPLUS; + goto Alu; + case '-': + ek = ENEG; + goto Alu; + case '~': + ek = ECOMPL; + goto Alu; + case '!': + ek = ELOGNOT; + Alu: + lex(pr, NULL); + span = tk.span; + ex = unaryex(pr); + joinspan(&span.ex, ex.span.ex); + ty = ek == ELOGNOT ? mktype(TYINT) : cvtarith(ex.ty, ex.ty); + if (!ty.t || (ek == ECOMPL && !isint(ty))) { + error(&tk.span, "invalid operand to %'tk (%ty)", &tk, ex.ty); + ty = mktype(TYINT); + } + return mkexpr(ek, span, ty, .sub = exprdup(pr, &ex)); + case '*': + lex(pr, NULL); + span = tk.span; + ex = unaryex(pr); + joinspan(&span.ex, ex.span.ex); + if (ex.ty.t == TYPTR || ex.ty.t == TYARRAY) { + ty = typechild(ex.ty); + if (isincomplete(ty)) + error(&span, "cannot dereference pointer to incomplete type (%ty)", ty); + } else { + error(&span, "invalid operand to unary * (%ty)", ex.ty); + ty = ex.ty; + } + return mkexpr(EDEREF, span, ty, .qual = ex.ty.flag & TFCHLDQUAL, .sub = exprdup(pr, &ex)); + case '&': + lex(pr, NULL); + span = tk.span; + ex = unaryex(pr); + joinspan(&span.ex, ex.span.ex); + if (!islvalue(&ex)) + error(&span, "operand to unary & is not an lvalue"); + return mkexpr(EADDROF, span, mkptrtype(ex.ty, ex.qual), .sub = exprdup(pr, &ex)); + case '(': + lex(pr, NULL); + span = tk.span; + if (isdecltok(pr)) { /* (type)cast */ + struct declstate st = { DCASTEXPR }; + struct decl decl = pdecl(&st, pr); + ty = decl.t; + expect(pr, ')', NULL); + ex = unaryex(pr); + joinspan(&span.ex, ex.span.ex); + if (!castcheck(ty, &ex)) + error(&span, "cannot cast value of type '%ty' to '%ty'", ex.ty, ty); + return mkexpr(ECAST, span, ty, .sub = exprdup(pr, &ex)); + } else { + ex = commaexpr(pr); + expect(pr, ')', NULL); + return ex; + } + case TKWsizeof: + lex(pr, NULL); + span = tk.span; + if (match(pr, NULL, '(')) { + struct token tk2; + if (isdecltok(pr)) { /* sizeof(type) */ + struct declstate st = { DCASTEXPR }; + struct decl decl = pdecl(&st, pr); + ty = decl.t; + } else { /* sizeof(expr) */ + ex = commaexpr(pr); + ty = ex.ty; + } + peek(pr, &tk2); + expect(pr, ')', NULL); + joinspan(&span.ex, tk2.span.ex); + } else { /* sizeof expr */ + ex = unaryex(pr); + ty = ex.ty; + joinspan(&span.ex, ex.span.ex); + } + siz = typesize(ty); + if (isincomplete(ty)) + error(&span, "cannot apply sizeof to incomplete type (%ty)", ty); + else if (ty.t == TYFUNC) + error(&span, "cannot apply sizeof to function type (%ty)", ty); + return mkexpr(ENUMLIT, span, mktype(targ_sizetype), .u = siz); + } +} + +static void +bintypeerr(const struct span *span, enum toktag tt, union type lhs, union type rhs) +{ + error(span, "bad operands to %tt (%ty, %ty)", tt, lhs, rhs); +} + +static bool +relationalcheck(const struct expr *a, const struct expr *b) +{ + union type t1 = a->ty, t2 = b->ty; + if (isarith(t1) && isarith(t2)) return 1; + if (isptrcvt(t1) && isptrcvt(t2)) { + t1 = typedecay(t1); + t2 = typedecay(t2); + return t1.dat == t2.dat; + } + return 0; +} + +static struct expr +multiplicativeex(struct parser *pr) +{ + static struct expr (*const NEXT)(struct parser *pr) = unaryex; + struct token tk; + struct span span; + union type ty; + enum exprkind ek; + struct expr ex = NEXT(pr), rhs; + + for (;;) { + switch (peek(pr, &tk)) { + default: return ex; + case '*': ek = EMUL; break; + case '/': ek = EDIV; break; + case '%': ek = EREM; break; + } + lex(pr, &tk); + rhs = NEXT(pr); + span.sl = tk.span.sl; + span.ex = ex.span.ex; + if (!joinspan(&span.ex, tk.span.ex) || !joinspan(&span.ex, rhs.span.ex)) + span.ex = tk.span.ex; + ty = cvtarith(ex.ty, rhs.ty); + if (!ty.t || (ek == EREM && isflt(ty))) { + bintypeerr(&span, tk.t, ex.ty, rhs.ty); + ty.t = TYINT; + } + ex = mkexpr(ek, span, ty, .sub = exprdup2(pr, &ex, &rhs)); + } +} + +static struct expr +additiveex(struct parser *pr) +{ + static struct expr (*const NEXT)(struct parser *pr) = multiplicativeex; + struct token tk; + struct span span; + union type ty; + struct expr ex = NEXT(pr), rhs; + + while (match(pr, &tk, '+') || match(pr, &tk, '-')) { + rhs = NEXT(pr); + ty = ex.ty; + span.sl = tk.span.sl; + span.ex = ex.span.ex; + if (!joinspan(&span.ex, tk.span.ex) || !joinspan(&span.ex, rhs.span.ex)) + span.ex = tk.span.ex; + if (tk.t == '+' && isptrcvt(rhs.ty)) { + struct expr swaptmp = ex; + ex = rhs; + rhs = swaptmp; + ty = ex.ty; + } + if (isarith(ty) && isarith(rhs.ty)) ty = cvtarith(ty, rhs.ty); + else if (!in_range(ty.t, TYPTR, TYARRAY) || !isint(rhs.ty)) + bintypeerr(&span, tk.t, ty, rhs.ty); + else { + union type pointee = typechild(typedecay(ty)); + if (isincomplete(pointee)) + error(&span, "arithmetic on pointer to incomplete type (%ty)", ty); + else if (pointee.t == TYFUNC) + error(&span, "arithmetic on function pointer (%ty)", ex.ty); + } + ex = mkexpr(tk.t == '+' ? EADD : ESUB, span, ty, .sub = exprdup2(pr, &ex, &rhs)); + } + return ex; +} + +static struct expr +shiftex(struct parser *pr) +{ + static struct expr (*const NEXT)(struct parser *pr) = additiveex; + struct token tk; + struct span span; + union type ty; + struct expr ex = NEXT(pr), rhs; + + while (match(pr, &tk, TKSHL) || match(pr, &tk, TKSHR)) { + rhs = NEXT(pr); + ty = ex.ty; + span.sl = tk.span.sl; + span.ex = ex.span.ex; + if (!joinspan(&span.ex, tk.span.ex) || !joinspan(&span.ex, rhs.span.ex)) + span.ex = tk.span.ex; + if (!isint(ty) || !isint(rhs.ty)) + bintypeerr(&span, tk.t, ty, rhs.ty); + ty.t = intpromote(ty.t); + ex = mkexpr(tk.t == TKSHL ? ESHL : ESHR, span, ty, .sub = exprdup2(pr, &ex, &rhs)); + } + return ex; +} + +static struct expr +relationalex(struct parser *pr) +{ + static struct expr (*const NEXT)(struct parser *pr) = shiftex; + struct token tk; + struct span span; + enum exprkind ek; + struct expr ex = NEXT(pr), rhs; + for (;;) { + switch (peek(pr, &tk)) { + default: return ex; + case '<': ek = ELTH; break; + case '>': ek = EGTH; break; + case TKLTE: ek = ELTE; break; + case TKGTE: ek = EGTE; break; + } + lex(pr, &tk); + rhs = NEXT(pr); + span.sl = tk.span.sl; + span.ex = ex.span.ex; + if (!joinspan(&span.ex, tk.span.ex) || !joinspan(&span.ex, rhs.span.ex)) + span.ex = tk.span.ex; + if (!relationalcheck(&ex, &rhs)) + bintypeerr(&span, tk.t, ex.ty, rhs.ty); + ex = mkexpr(ek, span, mktype(TYINT), .sub = exprdup2(pr, &ex, &rhs)); + } +} + +static bool +isnullpo(const struct expr *ex) +{ + static const union type voidptr = {{ TYPTR, .flag = TFCHLDPRIM, .child = TYVOID }}; + if (ex->t == ECAST && ex->ty.bits == voidptr.bits) + ex = ex->sub; + return iszero(*ex); +} + +static bool +equalitycheck(const struct expr *a, const struct expr *b) +{ + union type t1 = a->ty, t2 = b->ty; + if (isarith(t1) && isarith(t2)) return 1; + if (isptrcvt(t1) && isptrcvt(t2)) { + t1 = typedecay(t1); + t2 = typedecay(t2); + return t1.dat == t2.dat || typechild(t1).t == TYVOID || typechild(t2).t == TYVOID; + } + if (isptrcvt(t1) && isnullpo(b)) return 1; + return isptrcvt(t2) && isnullpo(a); +} + +static struct expr +equalityex(struct parser *pr) +{ + static struct expr (*const NEXT)(struct parser *pr) = relationalex; + struct token tk; + struct span span; + struct expr ex = NEXT(pr), rhs; + + while (match(pr, &tk, TKEQU) || match(pr, &tk, TKNEQ)) { + rhs = NEXT(pr); + span.sl = tk.span.sl; + span.ex = ex.span.ex; + if (!joinspan(&span.ex, tk.span.ex) || !joinspan(&span.ex, rhs.span.ex)) + span.ex = tk.span.ex; + if (!equalitycheck(&ex, &rhs)) + bintypeerr(&span, tk.t, ex.ty, rhs.ty); + ex = mkexpr(tk.t == TKEQU ? EEQU : ENEQ, span, + mktype(TYINT), .sub = exprdup2(pr, &ex, &rhs)); + } + return ex; +} + +#define DEFBINEX(name, Tk, E, Next) \ + static struct expr \ + bit##name##ex(struct parser *pr) \ + { \ + static struct expr (*const NEXT)(struct parser *pr) = Next; \ + struct token tk; \ + struct span span; \ + struct expr ex = NEXT(pr), rhs; \ + while (match(pr, &tk, Tk)) { \ + rhs = NEXT(pr); \ + span.sl = tk.span.sl; \ + span.ex = ex.span.ex; \ + if (!joinspan(&span.ex, tk.span.ex) || !joinspan(&span.ex, rhs.span.ex)) \ + span.ex = tk.span.ex; \ + if (!isint(ex.ty) || !isint(rhs.ty)) \ + bintypeerr(&span, tk.t, ex.ty, rhs.ty); \ + ex = mkexpr(E, span, cvtarith(ex.ty, rhs.ty), .sub = exprdup2(pr, &ex, &rhs)); \ + } \ + return ex; \ + } +DEFBINEX(and, '&', EBAND, equalityex) +DEFBINEX(xor, '^', EXOR, bitandex) +DEFBINEX(ior, '|', EBIOR, bitxorex) +#undef DEFBINEX + +#define DEFLOGEX(name, Tk, E, Next) \ + static struct expr \ + log##name##ex(struct parser *pr) \ + { \ + static struct expr (*const NEXT)(struct parser *pr) = Next; \ + struct token tk; \ + struct span span; \ + struct expr ex = NEXT(pr), rhs; \ + while (match(pr, &tk, Tk)) { \ + rhs = NEXT(pr); \ + span.sl = tk.span.sl; \ + span.ex = ex.span.ex; \ + if (!joinspan(&span.ex, tk.span.ex) || !joinspan(&span.ex, rhs.span.ex)) \ + span.ex = tk.span.ex; \ + if (!isscalar(ex.ty) || !isscalar(rhs.ty)) \ + bintypeerr(&span, tk.t, ex.ty, rhs.ty); \ + ex = mkexpr(E, span, mktype(TYINT), .sub = exprdup2(pr, &ex, &rhs)); \ + } \ + return ex; \ + } +DEFLOGEX(and, TKLOGAND, ELOGAND, bitiorex) +DEFLOGEX(ior, TKLOGIOR, ELOGIOR, logandex) +#undef DEFLOGEX + +static union type /* 6.5.15 Conditional operator Constraints */ +condtype(const struct expr *a, const struct expr *b) +{ + union type t1 = typedecay(a->ty), t2 = typedecay(b->ty), s1, s2; + if (isarith(t1) && isarith(t2)) return cvtarith(t1, t2); + if (t1.bits == t2.bits) return t1; + if (t1.t == TYPTR && t2.t == TYPTR) { + s1 = typechild(t1); + s2 = typechild(t2); + if (s1.bits == s2.bits || s2.t == TYVOID || s1.t == TYVOID) { + return mkptrtype(s1.t == TYVOID ? s1 : s2, (t1.flag | t2.flag) & TFCHLDQUAL); + } + } + if (t1.t == TYPTR && isnullpo(b)) return t1; + if (isnullpo(a) && t2.t == TYPTR) return t2; + return mktype(0); +} + +static struct expr +condex(struct parser *pr) +{ + static struct expr (*const NEXT)(struct parser *pr) = logiorex; + struct token tk; + struct span span; + union type ty; + struct expr ex = NEXT(pr), tr, fl, *sub; + + if (match(pr, &tk, '?')) { + if (!isscalar(ex.ty)) + error(&ex.span, "?: condition is not a scalar type (%ty)", ex.ty); + span.sl = tk.span.sl; + span.ex = ex.span.ex; + tr = condex(pr); + joinspan(&tk.span.ex, tr.span.ex); + expect(pr, ':', NULL); + fl = condex(pr); + if (!joinspan(&span.ex, tk.span.ex) || !joinspan(&span.ex, tr.span.ex) + || !joinspan(&span.ex, fl.span.ex)) + span.ex = tk.span.ex; + ty = condtype(&tr, &fl); + if (!ty.t) { + error(&span, "bad operands to conditional expression (%ty, %ty)", tr.ty, fl.ty); + ty = tr.ty; + } + sub = alloc(&pr->exarena, 3*sizeof*sub, 0); + sub[0] = ex, sub[1] = tr, sub[2] = fl; + ex = mkexpr(ECOND, span, ty, .sub = sub); + } + return ex; +} + +static struct expr +assignex(struct parser *pr) +{ + static struct expr (*const NEXT)(struct parser *pr) = condex; + struct token tk; + struct span span; + union type ty, res, pointee; + enum { KANY, KADDITIVE, KARITH, KSHFT, KBIT } k; + enum exprkind ek; + struct expr ex = NEXT(pr), rhs; + + switch (peek(pr, &tk)) { + default: return ex; +#define OP(Tk, E, K) case Tk: ek = E, k = K; break; + OP('=', ESET, KANY) + OP(TKSETADD, ESETADD, KADDITIVE) + OP(TKSETSUB, ESETSUB, KADDITIVE) + OP(TKSETMUL, ESETMUL, KARITH) + OP(TKSETDIV, ESETDIV, KARITH) + OP(TKSETMOD, ESETREM, KARITH) + OP(TKSETSHL, ESETSHL, KSHFT) + OP(TKSETSHR, ESETSHR, KSHFT) + OP(TKSETAND, ESETAND, KBIT) + OP(TKSETIOR, ESETIOR, KBIT) + OP(TKSETXOR, ESETXOR, KBIT) +#undef OP + } + lex(pr, &tk); + rhs = assignex(pr); + ty = ex.ty; + span.sl = tk.span.sl; + span.ex = ex.span.ex; + if (!joinspan(&span.ex, tk.span.ex) || !joinspan(&span.ex, rhs.span.ex)) + span.ex = tk.span.ex; + if (!islvalue(&ex)) { + error(&ex.span, "left-hand-side of assignment is not an lvalue"); + return ex; + } + if (ty.t == TYARRAY) + error(&ex.span, "cannot assign to array designator (%ty)", ex.ty); + else if (ty.t == TYFUNC) + error(&ex.span, "cannot assign to function designator (%ty)", ex.ty); + else if (isincomplete(ty)) + error(&ex.span, "cannot assign to incomplete type (%ty)", ex.ty); + else if (ex.qual & QCONST) + error(&ex.span, "cannot assign to const-qualified lvalue (%tq)", ex.ty, ex.qual); + switch (k) { + case KANY: + if (!assigncheck(ty, &rhs)) + bintypeerr(&tk.span, tk.t, ty, rhs.ty); + break; + case KADDITIVE: + if (ty.t == TYPTR) pointee = typechild(ty); + if (ty.t == TYPTR && !isincomplete(pointee) && pointee.t != TYFUNC && isint(rhs.ty)) + break; + /* fallthru */ + case KARITH: + res = cvtarith(ty, rhs.ty); + if (!res.t) + bintypeerr(&tk.span, tk.t, ty, rhs.ty); + else ty = res; + break; + case KSHFT: + if (!isint(ty) || !isint(rhs.ty)) + bintypeerr(&tk.span, tk.t, ty, rhs.ty); + ty.t = intpromote(ty.t); + break; + case KBIT: + if (!isint(ty) || !isint(rhs.ty)) + bintypeerr(&tk.span, tk.t, ty, rhs.ty); + ty = cvtarith(ty, rhs.ty); + assert(ty.t); + break; + } + return mkexpr(ek, span, ex.ty, .sub = exprdup2(pr, &ex, &rhs)); +} + +static struct expr +expr(struct parser *pr) +{ + return assignex(pr); +} + +static struct expr +commaexpr(struct parser *pr) +{ + static struct expr (*const NEXT)(struct parser *pr) = assignex; + struct span span; + struct expr ex = NEXT(pr), rhs; + struct token tk; + + while (match(pr, &tk, ',')) { + span.sl = tk.span.sl; + span.ex = ex.span.ex; + rhs = NEXT(pr); + if (!joinspan(&span.ex, tk.span.ex) || !joinspan(&span.ex, rhs.span.ex)) + span.ex = tk.span.ex; + ex = mkexpr(ESEQ, span, rhs.ty, .sub = exprdup2(pr, &ex, &rhs)); + } + return ex; +} + +/*********/ +/* -> IR */ +/*********/ + +static union irref exprvalue(struct function *, const struct expr *); + +static union irref +expraddr(struct function *fn, const struct expr *ex) +{ + struct decl *decl; + + switch (ex->t) { + case ESYM: + decl = ex->sym; + assert(decl != NULL); + switch (decl->scls) { + case SCAUTO: case SCREGISTER: + return mkref(RTMP, decl->id); + case SCEXTERN: case SCNONE: + return mksymref(fn, decl->name); + case SCSTATIC: + assert(!"nyi"); + break; + default: + assert(0); + } + break; + case EDEREF: + return exprvalue(fn, ex->sub); + default: + assert(!"lvalue?>"); + } + +} + +static union irref +genload(struct function *fn, union type t, union irref ref) +{ + struct instr ins = {0}; + + assert(isscalar(t)); + ins.cls = type2cls[t.t]; + switch (typesize(t)) { + case 1: ins.op = issigned(t) ? Oloads1 : Oloadu1; break; + case 2: ins.op = issigned(t) ? Oloads2 : Oloadu2; break; + case 4: ins.op = isflt(t) ? Oloadf4 : issigned(t) ? Oloads4 : Oloadu4; break; + case 8: ins.op = isflt(t) ? Oloadf8 : Oloadi8; break; + default: assert(0); + } + ins.l = ref; + return addinstr(fn, ins); +} + +static union irref +genstore(struct function *fn, union type t, union irref ptr, union irref val) +{ + struct instr ins = {0}; + + assert(isscalar(t)); + switch (typesize(t)) { + case 1: ins.op = Ostore1; break; + case 2: ins.op = Ostore2; break; + case 4: ins.op = Ostore4; break; + case 8: ins.op = Ostore8; break; + default: assert(0); + } + ins.l = ptr; + ins.r = val; + return addinstr(fn, ins); +} + +static union irref +cvt(struct function *fn, enum typetag to, enum typetag from, union irref ref) +{ + enum irclass kto = type2cls[to], kfrom = type2cls[from]; + struct instr ins = {0}; + if (kto == kfrom) return ref; + if (ref.t == RICON && kto < KF4) return ref; + + ins.cls = kto; + ins.l = ref; + if (kisflt(kto) || kisflt(kfrom)) { + if (kto == KIP) kto = siz2intcls[cls2siz[kto]]; + if (kfrom == KIP) kfrom = siz2intcls[cls2siz[kfrom]]; + if (kisflt(kto) && kfrom == KI4) ins.op = issignedt(from) ? Ocvts4f : Ocvtu4f; + else if (kisflt(kto) && kfrom == KI8) ins.op = issignedt(from) ? Ocvts8f : Ocvtu8f; + else if (kto == KF8 && kfrom == KF4) ins.op = Ocvtf4f8; + else if (kto == KF4 && kfrom == KF8) ins.op = Ocvtf8f4; + else if (kfrom == KF4) ins.op = issignedt(to) ? Ocvtf4s : Ocvtf4u; + else if (kfrom == KF8) ins.op = issignedt(to) ? Ocvtf8s : Ocvtf8u; + else assert(0); + } else { + if (kfrom == KI4 && issignedt(from)) ins.op = Oexts4; + else if (kfrom == KI4) ins.op = Oextu4; + else ins.op = Omov; + } + return addinstr(fn, ins); +} + +static union irref +narrow(struct function *fn, enum irclass to, enum typetag tt, union irref ref) +{ + struct instr ins; + assert(isintt(tt)); + if (targ_primsizes[tt] >= cls2siz[to]) return ref; + ins.cls = to; + switch (targ_primsizes[tt]) { + case 1: ins.op = issignedt(tt) ? Oexts1 : Oextu1; break; + case 2: ins.op = issignedt(tt) ? Oexts2 : Oextu2; break; + case 4: ins.op = issignedt(tt) ? Oexts4 : Oextu4; break; + default: assert(0); + } + ins.l = ref; + return addinstr(fn, ins); +} + +static union irref +exprvalue(struct function *fn, const struct expr *ex) +{ + union type ty; + union irref r, q; + enum irclass cls = type2cls[ex->ty.t]; + struct instr ins = {0}; + int swp = 0; + struct expr *sub = ex->sub; + + if (ex->ty.t == TYARRAY) return expraddr(fn, ex); + switch (ex->t) { + case ENUMLIT: + if (isflt(ex->ty)) + return mkfltcon(fn, cls, ex->f); + return mkintcon(fn, cls, ex->i); + case ESYM: + return genload(fn, ex->ty, expraddr(fn, ex)); + case ECAST: + if (ex->ty.t == TYVOID) return exprvalue(fn, sub); + case EPLUS: + return cvt(fn, ex->ty.t, sub->ty.t, exprvalue(fn, sub)); + case ENEG: + ins.op = Oneg; + goto Unary; + case ECOMPL: + ins.op = Onot; + Unary: + ins.l = exprvalue(fn, sub); + ins.l = cvt(fn, ex->ty.t, sub->ty.t, ins.l); + ins.cls = cls; + return addinstr(fn, ins); + case EDEREF: + return genload(fn, ex->ty, exprvalue(fn, sub)); + case EADDROF: + return expraddr(fn, sub); + case EMUL: + ins.op = isunsigned(ex->ty) ? Oumul : Omul; + goto BinArith; + case EDIV: + ins.op = isunsigned(ex->ty) ? Oudiv : Odiv; + goto BinArith; + case EREM: + ins.op = issigned(ex->ty) ? Orem : Ourem; + goto BinArith; + case EBAND: + ins.op = Oand; + goto BinArith; + case EXOR: + ins.op = Oxor; + goto BinArith; + case EBIOR: + ins.op = Oior; + goto BinArith; + case ESHL: + ins.op = Oshl; + goto BinArith; + case ESHR: + ins.op = issigned(ex->ty) ? Osar : Oshr; + goto BinArith; + case ESUB: + ins.op = Osub; + goto BinArith; + case EADD: + ins.op = Oadd; + BinArith: + ins.l = exprvalue(fn, &sub[0]); + ins.l = cvt(fn, ex->ty.t, sub[0].ty.t, ins.l); + ins.r = exprvalue(fn, &sub[1]); + if ((ins.op != Oadd && ins.op != Osub) || cls != KIP) + ins.r = cvt(fn, ex->ty.t, sub[1].ty.t, ins.r); + else { + uint siz = typesize(typechild(ex->ty)); + enum typetag tt = intpromote(sub[1].ty.t); + struct instr scale = { Omul, type2cls[tt], ins.r }; + scale.r = mkintcon(fn, type2cls[tt], siz); + ins.r = addinstr(fn, scale); + } + ins.cls = cls; + return addinstr(fn, ins); + case EPOSTINC: + case EPOSTDEC: + ins.op = ex->t == EPOSTINC ? Oadd : Osub; + ins.cls = cls; + r = expraddr(fn, sub); + ins.l = genload(fn, sub->ty, r); + if (ex->ty.t == TYPTR) + ins.r = mkintcon(fn, KI4, typesize(typechild(ex->ty))); + else + ins.r = mkref(RICON, 1); + genstore(fn, sub->ty, r, addinstr(fn, ins)); + return ins.l; + case EEQU: + ins.op = Oequ; + goto Cmp; + case ENEQ: + ins.op = Oneq; + goto Cmp; + case ELTH: + ins.op = Olth; + goto Cmp; + case ELTE: + ins.op = Olte; + goto Cmp; + case EGTH: + ins.op = Olth; + swp = 1; + goto Cmp; + case EGTE: + ins.op = Olte; + swp = 1; + Cmp: + ty = cvtarith(sub[0].ty, sub[1].ty); + if (isunsigned(ty) && in_range(ins.op, Olth, Olte)) + ins.op += Oulth - Olth; + ins.l = exprvalue(fn, &sub[0^swp]); + ins.l = cvt(fn, ex->ty.t, ty.t, ins.l); + ins.r = exprvalue(fn, &sub[1^swp]); + ins.r = cvt(fn, ex->ty.t, ty.t, ins.r); + ins.cls = cls; + return addinstr(fn, ins); + break; + case ESET: + assert(isscalar(ex->ty)); + return genstore(fn, ex->ty, expraddr(fn, &sub[0]), exprvalue(fn, &sub[1])); + case ESETMUL: + ins.op = isunsigned(ex->ty) ? Oumul : Omul; + goto Compound; + case ESETDIV: + ins.op = isunsigned(ex->ty) ? Oudiv : Odiv; + goto Compound; + case ESETREM: + ins.op = issigned(ex->ty) ? Orem : Ourem; + goto Compound; + case ESETAND: + ins.op = Oand; + goto Compound; + case ESETXOR: + ins.op = Oxor; + goto Compound; + case ESETIOR: + ins.op = Oior; + goto Compound; + case ESETSHL: + ins.op = Oshl; + goto Compound; + case ESETSHR: + ins.op = issigned(ex->ty) ? Osar : Oshr; + goto Compound; + case ESETSUB: + ins.op = Osub; + goto Compound; + case ESETADD: + ins.op = Oadd; + Compound: + r = expraddr(fn, &sub[0]); + ty = in_range(ex->t, ESETSHL, ESETSHR) ? mktype(intpromote(ex->ty.t)) + : cvtarith(sub[0].ty, sub[1].ty); + ins.cls = cls; + ins.l = cvt(fn, ty.t, sub[0].ty.t, genload(fn, ex->ty, r)); + ins.r = exprvalue(fn, &sub[1]); + if ((ins.op != Oadd && ins.op != Osub) || cls != KIP) + ins.r = cvt(fn, ex->ty.t, sub[1].ty.t, ins.r); + else { + uint siz = typesize(typechild(ex->ty)); + enum typetag tt = intpromote(sub[1].ty.t); + struct instr scale = { Omul, type2cls[tt], ins.r }; + scale.r = mkintcon(fn, type2cls[tt], siz); + ins.r = addinstr(fn, scale); + } + q = addinstr(fn, ins); + genstore(fn, ex->ty, r, q); + return narrow(fn, cls, ex->ty.t, q); + case ECALL: + { + const struct typedata *td = &typedata[sub[0].ty.dat]; + union irref argsbuf[10]; + union irtype typbuf[10]; + vec_of(union irref) args = VINIT(argsbuf, arraylength(argsbuf)); + vec_of(union irtype) typs = VINIT(typbuf, arraylength(typbuf)); + ins.op = Ocall; + assert(isscalar(ex->ty) || ex->ty.t == TYVOID); + assert(sub[0].ty.t == TYFUNC); + ins.cls = type2cls[ex->ty.t]; + ins.l = expraddr(fn, &sub[0]); + for (int i = 0; i < ex->narg; ++i) { + struct expr *arg = &sub[i+1]; + union type ty = i < td->nmemb ? td->param[i] : argpromote(arg->ty); + vpush(&args, cvt(fn, ty.t, arg->ty.t, exprvalue(fn, arg))); + vpush(&typs, mkirtype(ty)); + } + ins.r = mkcall(fn, sub[0].ty, ex->narg, args.p, typs.p); + vfree(&args); + vfree(&typs); + return addinstr(fn, ins); + } + case ESEQ: + (void)exprvalue(fn, &sub[0]); + return exprvalue(fn, &sub[1]); + default: assert(!"nyi expr"); + } +} + +static void +stmtterm(struct parser *pr) +{ + expect(pr, ';', "to terminate previous statement"); +} + +static void block(struct parser *pr, struct function *fn); + +static bool /* return 1 if stmt is terminating (all codepaths return) */ +stmt(struct parser *pr, struct function *fn) +{ + struct block *t, *f, *end, *begin; + struct expr ex; + union irref r; + bool terminates = 0; + const bool doemit = fn->curblk; + +#define EMITS if (doemit && !nerror) + + switch (peek(pr, NULL)) { + case '{': + lex(pr, NULL); + envdown(pr); + block(pr, fn); + envup(pr); + break; + case ';': + lex(pr, NULL); + break; + case TKWif: + lex(pr, NULL); + expect(pr, '(', NULL); + ex = commaexpr(pr); + expect(pr, ')', NULL); + if (!isscalar(ex.ty)) + error(&ex.span, "'if' condition is not a scalar (%ty)", ex.ty); + t = f = end = NULL; + EMITS { + t = newblk(fn); + f = newblk(fn); + r = exprvalue(fn, &ex); + if (!isint(ex.ty)) + r = cvt(fn, TYINT, ex.ty.t, r); + EMITS { + putjump(fn, Jbcnd, r, t, f); + useblk(fn, t); + } + } + terminates = stmt(pr, fn); + if (!match(pr, NULL, TKWelse)) { + EMITS putjump(fn, Jb, NOREF, f, NULL); + end = f; + terminates = 0; + } else { + EMITS { + if (!terminates) putjump(fn, Jb, NOREF, end = newblk(fn), NULL); + useblk(fn, f); + } + terminates &= stmt(pr, fn); + EMITS { + if (fn->curblk) putjump(fn, Jb, NOREF, end, NULL); + } + } + EMITS if (!terminates) useblk(fn, end); + break; + case TKWwhile: + lex(pr, NULL); + expect(pr, '(', NULL); + ex = commaexpr(pr); + expect(pr, ')', NULL); + if (!isscalar(ex.ty)) + error(&ex.span, "'while' condition is not a scalar (%ty)", ex.ty); + t = begin = end = NULL; + EMITS { + begin = newblk(fn); + putjump(fn, Jb, NOREF, begin, NULL); + useblk(fn, begin); + r = exprvalue(fn, &ex); + if (!isint(ex.ty)) + r = cvt(fn, TYINT, ex.ty.t, r); + EMITS { + putjump(fn, Jbcnd, r, t = newblk(fn), end = newblk(fn)); + useblk(fn, t); + } + } + terminates = stmt(pr, fn); + EMITS { + if (!terminates) putjump(fn, Jb, NOREF, begin, NULL); + useblk(fn, end); + } + break; + case TKWreturn: + lex(pr, NULL); + if (fn->retty.t != TYVOID) { + ex = commaexpr(pr); + if (!assigncheck(fn->retty, &ex)) { + error(&ex.span, + "cannot return '%ty' value from function with return type '%ty'", + ex.ty, fn->retty); + } + EMITS { + r = cvt(fn, fn->retty.t, ex.ty.t, exprvalue(fn, &ex)); + putjump(fn, Jrets, r, NULL, NULL); + } + } else { + EMITS putjump(fn, Jret, NOREF, NULL, NULL); + } + stmtterm(pr); + break; + default: + ex = commaexpr(pr); + stmtterm(pr); + EMITS exprvalue(fn, &ex); + break; + } + freearena(pr->exarena); + return fn->curblk == NULL; +} + +static void +block(struct parser *pr, struct function *fn) +{ + struct token tk; + const bool doemit = fn->curblk; + + while (!match(pr, &tk, '}')) { + if (isdecltok(pr)) { /* decl */ + struct expr ini; + struct declstate st = { DFUNCVAR }; + do { + struct decl decl = pdecl(&st, pr); + enum op op; + uint siz, align, nalloc; + if (decl.name) { + static int staticid; + bool put = 0; + switch (decl.scls) { + case SCSTATIC: + decl.id = ++staticid; + break; + case SCNONE: + decl.scls = SCAUTO; + case SCAUTO: + case SCREGISTER: + switch (align = typealign(decl.t)) { + case 1: op = Oalloca1; break; + case 2: op = Oalloca2; break; + case 4: op = Oalloca4; break; + case 8: op = Oalloca8; break; + case 16: op = Oalloca16; break; + default: assert(!"align"); + } + siz = typesize(decl.t); + nalloc = siz/align + ((siz&(align-1)) != 0); + EMITS { + decl.id = addinstr(fn, + (struct instr) { op, KIP, mkintcon(fn, KI4, nalloc) }).idx; + } + if (st.varini) { + putdecl(pr, &decl); + put = 1; + ini = expr(pr); + pdecl(&st, pr); + if (!assigncheck(decl.t, &ini)) + error(&ini.span, "cannot initialize %ty with %ty", decl.t, ini.ty); + EMITS genstore(fn, decl.t, mkref(RTMP, decl.id), exprvalue(fn, &ini)); + } + break; + case SCTYPEDEF: break; + default: assert(0); + } + if (!put) putdecl(pr, &decl); + } + } while (0); + } else { + stmt(pr, fn); + } + } + pr->fnblkspan = tk.span; +} + +static void +function(struct parser *pr, struct function *fn, const char **pnames) +{ + const struct typedata *td = &typedata[fn->fnty.dat]; + const bool doemit = fn->curblk; + envdown(pr); + for (int i = 0; i < td->nmemb; ++i) { + if (pnames[i]) { + uint siz, align, nalloc; + struct decl arg = { .t = td->param[i], .qual = tdgetqual(td->quals, i), + .name = pnames[i], .scls = SCAUTO }; + enum op op; + switch (align = typealign(arg.t)) { + case 1: op = Oalloca1; break; + case 2: op = Oalloca2; break; + case 4: op = Oalloca4; break; + case 8: op = Oalloca8; break; + case 16: op = Oalloca16; break; + default: assert(!"align"); + } + siz = typesize(arg.t); + nalloc = siz/align + ((siz&(align-1)) != 0); + EMITS { + struct instr alloca = { op, KIP, mkintcon(fn, KI4, nalloc) }; + arg.id = addinstr(fn, alloca).idx; + genstore(fn, arg.t, mkref(RTMP, arg.id), mkref(RARG, i)); + } + putdecl(pr, &arg); + } /*else { + warn(NULL, "missing name of parameter #%d", i+1); + }*/ + } + block(pr, fn); + envup(pr); + if (fn->curblk) { + if (fn->retty.t != TYVOID && !nerror) { + warn(&pr->fnblkspan, "non-void function may not return a value"); + } + putjump(fn, Jret, NOREF, NULL, NULL); + } +} + +/********/ +/* DECL */ +/********/ + +static union type +buildagg(struct parser *pr, enum typetag tt, const char *name, int id) +{ + struct token tk; + union type t; + struct span flexspan; + struct field fbuf[32]; + uchar qbuf[arraylength(fbuf)/4]; + vec_of(struct field) fld = VINIT(fbuf, arraylength(fbuf)); + vec_of(uchar) qual = VINIT(qbuf, arraylength(qbuf)); + struct typedata td = {tt}; + bool isunion = tt == TYUNION; + const char *tag = isunion ? "union" : "struct"; + + while (!match(pr, &tk, '}')) { + struct declstate st = { DFIELD }; + do { + struct decl decl = pdecl(&st, pr); + if (fld.n && td.flexi) { + td.flexi = 0; + error(&flexspan, "flexible array member is not at end of struct"); + } + if (!isunion && decl.t.t == TYARRAY && !typearrlen(decl.t)) { + td.flexi = 1; + flexspan = decl.span; + } else if (isincomplete(decl.t)) { + error(&decl.span, "field has incomplete type (%ty)", decl.t); + } else if (decl.t.t == TYFUNC) { + error(&decl.span, "field has function type (%ty)", decl.t); + } + if (decl.t.t) { + uint align = typealign(decl.t); + uint siz = typesize(decl.t); + uint off = isunion ? 0 : alignup(td.siz, align); + struct field f = { decl.name, decl.t, off }; + vpush(&fld, f); + if (decl.qual) { + td.anyconst |= decl.qual & QCONST; + while (qual.n < tdqualsiz(fld.n)) vpush(&qual, 0); + tdsetqual(qual.p, fld.n-1, decl.qual); + } + if (isunion) + td.siz = td.siz < siz ? siz : td.siz; + else + td.siz = off + siz; + td.align = td.align < align ? align : td.align; + } + } while (st.more); + } + if (fld.n == 0) { + struct field dummy = { "", mktype(TYCHAR), 0 }; + error(&tk.span, "%s cannot have zero members", tag); + vpush(&fld, dummy); + td.siz = td.align = 1; + } + td.siz = alignup(td.siz, td.align); + if (qual.p) while (qual.n < tdqualsiz(fld.n)) vpush(&qual, 0); + td.quals = qual.p; + td.fld = fld.p; + td.nmemb = fld.n; + t = completetype(name, id, &td); + vfree(&fld); + vfree(&qual); + return t; +} + +static union type +buildenum(struct parser *pr, const char *name) +{ + union type t; + struct typedata td = {TYENUM}; + enum typetag backing = TYINT; + + t = mktagtype(name, &td); + t.backing = backing; + return t; +} + +static union type +tagtype(struct parser *pr, enum toktag kind) +{ + struct token tk; + union type t; + struct span span; + enum typetag tt = kind == TKWenum ? TYENUM : kind == TKWstruct ? TYSTRUCT : TYUNION; + const char *tag = NULL; + + if (match(pr, &tk, TKIDENT)) + tag = tk.ident; + span = tk.span; + if (!match(pr, NULL, '{')) { + if (!tag) { + error(&tk.span, "expected %tt name or '{'", kind); + return mktype(0); + } + t = gettagged(pr, &span, tt, tag, /* def? */ peek(pr, NULL) == ';'); + } else { + if (tt != TYENUM) { + t = deftagged(pr, &span, tt, tag); + if (t.t != tt || !isincomplete(t)) { + if (t.t != tt) + error(&tk.span, + "defining tagged type %'tk as %tt clashes with previous definition", + &tk, kind); + else + error(&tk.span, "redefinition of '%tt %s'", kind, tag); + note(&span, "previous definition:"); + } + t = buildagg(pr, tt, tag, typedata[t.dat].id); + } else { + t = buildenum(pr, tag); + } + } + + if (t.t != tt) { + error(&tk.span, "declaring tagged type %'tk as %tt clashes with previous definition", + &tk, kind); + note(&span, "previous definition:"); + } + return t; +} + +static void +declspec(struct declstate *st, struct parser *pr) +{ + struct token tk; + struct decl *decl; + enum arith { + KSIGNED = 1<<0, + KUNSIGNED = 1<<1, + KBOOL = 1<<2, + KCHAR = 1<<3, + KSHORT = 1<<4, + KLONG = 1<<5, + KLONGLONG = 1<<6, + KINT = 1<<7, + KFLOAT = 1<<8, + KDOUBLE = 1<<9, + } arith = 0; + struct span span = {0}; + + for (;;) { + peek(pr, &tk); + switch (tk.t) { + case TKWconst: + st->qual |= QCONST; + break; + case TKWvolatile: + st->qual |= QVOLATILE; + break; + case TKW_Noreturn: + st->qual |= QNORETURN; + break; + case TKWinline: + st->qual |= QINLINE; + break; + case TKWvoid: + st->base = mktype(TYVOID); + break; + case TKWsigned: + arith |= KSIGNED; + break; + case TKWunsigned: + arith |= KUNSIGNED; + break; + case TKW_Bool: + case TKWbool: + if (arith & KBOOL) goto Dup; + arith |= KBOOL; + case TKWchar: + if (arith & KCHAR) { + Dup: + error(&tk.span, "duplicate %tk specifier", &tk); + } + arith |= KCHAR; + break; + case TKWshort: + arith |= KSHORT; + break; + case TKWlong: + if ((arith & (KLONG | KLONGLONG)) == KLONG) + arith = (arith &~ KLONG) | KLONGLONG; + else if ((arith & (KLONG | KLONGLONG)) == 0) + arith |= KLONG; + else + error(&tk.span, "too long"); + break; + case TKWint: + if (arith & KINT) goto Dup; + arith |= KINT; + break; + case TKWfloat: + if (arith & KFLOAT) goto Dup; + arith |= KFLOAT; + break; + case TKWdouble: + if (arith & KDOUBLE) goto Dup; + arith |= KDOUBLE; + break; + case TKWenum: + case TKWstruct: + case TKWunion: + lex(pr, &tk); + st->base = tagtype(pr, tk.t); + st->tagdecl = 1; + if (!span.ex.len) span.ex = tk.span.ex; + joinspan(&span.ex, tk.span.ex); + goto End; + case TKIDENT: + if (!st->base.t && !arith && (decl = finddecl(pr, tk.ident)) + && decl->scls == SCTYPEDEF) { + st->base = decl->t; + break; + } + /* fallthru */ + default: + if (!span.ex.len) span.ex = tk.span.ex; + goto End; + } + if (!span.ex.len) span.ex = tk.span.ex; + joinspan(&span.ex, tk.span.ex); + lex(pr, &tk); + if (st->base.t) break; + } +End: + if (st->base.t && arith) { + /* combining arith type specifiers and other types */ + Bad: + error(&span, "invalid declaration specifier"); + } else if (!st->base.t && arith) { + enum typetag t; + ioflush(&bstderr); + if (arith == KFLOAT) + t = TYFLOAT; + else if (arith == KDOUBLE) + t = TYDOUBLE; + else if (arith == (KLONG | KDOUBLE)) + t = TYLDOUBLE; + else if (arith == KBOOL) + t = TYBOOL; + else if (arith == KCHAR) + t = TYCHAR; + else if (arith == (KSIGNED | KCHAR)) + t = TYSCHAR; + else if (arith == (KUNSIGNED | KCHAR)) + t = TYUCHAR; + else if ((arith & ~KINT & ~KSIGNED) == KSHORT) + t = TYSHORT; + else if ((arith & ~KINT) == (KUNSIGNED | KSHORT)) + t = TYUSHORT; + else if ((arith & ~KINT & ~KSIGNED) == 0) + t = TYINT; + else if ((arith & ~KINT) == KUNSIGNED) + t = TYUINT; + else if ((arith & ~KINT & ~KSIGNED) == KLONG) + t = TYLONG; + else if ((arith & ~KINT) == (KUNSIGNED | KLONG)) + t = TYULONG; + else if ((arith & ~KINT & ~KSIGNED) == KLONGLONG) + t = TYVLONG; + else if ((arith & ~KINT) == (KUNSIGNED | KLONGLONG)) + t = TYUVLONG; + else + goto Bad; + st->base = mktype(t); + } else if (!st->base.t && ccopt.cstd < STDC99) { + warn(&span, "type implicitly declared as int"); + st->base = mktype(TYINT); + } else if (!st->base.t) + fatal(&span, "expected declaration type specifier"); +} + +/* circular doubly linked list used to parse declarators */ +static struct decllist { + struct decllist *prev, *next; + uchar t; /* TYPTR, TYARRAY or TYFUNC */ + union { + uchar qual; /* TYPTR */ + uint len; /* TYARRAY */ + struct { /* TYFUNC */ + union type *param; + const char **pnames; + uchar *pqual; + short npar; + bool kandr : 1, variadic : 1; + }; + }; + struct span span; +} decltmp[64], *declfreelist; +static union type declparamtmp[16]; +static const char *declpnamestmp[16]; +static uchar declpqualtmp[tdqualsiz(16)]; + +static void +declinsert(struct decllist *list, const struct decllist *node) +{ + struct decllist *pnode = declfreelist; + if (!pnode) fatal(NULL, "too many nested declarators"); + declfreelist = declfreelist->next; + *pnode = *node; + pnode->next = list->next; + pnode->prev = list; + list->next->prev = pnode; + list->next = pnode; +} + +static int +sclass(struct parser *pr, struct span *span) +{ + struct token tk; + int sc = 0, first = 1; + for (;; lex(pr, &tk)) { + switch (peek(pr, &tk)) { + case TKWtypedef: sc |= SCTYPEDEF; break; + case TKWextern: sc |= SCEXTERN; break; + case TKWstatic: sc |= SCSTATIC; break; + case TKWauto: sc |= SCAUTO; break; + case TKWregister: sc |= SCREGISTER; break; + case TKWthread_local: + case TKW_Thread_local: + sc |= SCTHREADLOCAL; break; + default: return sc; + } + if (first) *span = tk.span; + else joinspan(&span->ex, tk.span.ex); + first = 0; + } +} + +static int +cvqual(struct parser *pr) +{ + struct token tk; + int q = 0; + while (match(pr, &tk, TKWconst) || match(pr, &tk, TKWvolatile)) + q |= tk.t == TKWconst ? QCONST : QVOLATILE; + return q; +} + +static void +decltypes(struct parser *pr, struct decllist *list, const char **name, struct span *span) { + struct token tk; + struct decllist *ptr, node; + + while (match(pr, &tk, '*')) { + node.t = TYPTR; + node.qual = cvqual(pr); + node.span = tk.span; + declinsert(list, &node); + } + ptr = list->next; + switch (peek(pr, &tk)) { + case '(': + lex(pr, &tk); + if (isdecltok(pr)) { + goto Func; + } else if (match(pr, &tk, ')')) { + /* T () is K&R func proto */ + node.span = tk.span; + node.t = TYFUNC; + node.param = NULL; + node.pqual = NULL; + node.pnames = NULL; + node.variadic = 0; + node.kandr = 1; + node.npar = 0; + declinsert(ptr->prev, &node); + break; + } else { + decltypes(pr, list, name, span); + expect(pr, ')', NULL); + } + break; + case TKIDENT: + if (!name) + error(&tk.span, "unexpected identifier in type name"); + else { + *name = tk.ident; + *span = tk.span; + } + lex(pr, &tk); + break; + default: + *span = tk.span; + if (name) + *name = NULL; + } + for (;;) { + if (match(pr, &tk, '[')) { + node.span = tk.span; + uint n = 0; + if (!match(pr, &tk, ']')) { + struct expr ex = expr(pr); + if (!eval(&ex, EVINTCONST)) { + error(&ex.span, "array length is not an integer constant"); + } else if (typesize(ex.ty) < 8 && ex.i < 0) { + error(&ex.span, "array length is negative"); + } else if (ex.u > (1ull << (8*sizeof n)) - 1) { + error(&ex.span, "array too long (%ul)", ex.u); + } else if (ex.u == 0) { + error(&ex.span, "array cannot have zero length"); + } else { + n = ex.u; + } + peek(pr, &tk); + joinspan(&node.span.ex, tk.span.ex); + expect(pr, ']', NULL); + } + node.t = TYARRAY; + node.len = n; + declinsert(ptr->prev, &node); + } else if (match(pr, &tk, '(')) Func: { + static int depth = 0; + vec_of(union type) params = {0}; + vec_of(uchar) qual = {0}; + vec_of(const char *) names = {0}; + bool anyqual = 0; + + if (depth++ == 0) { + vinit(¶ms, declparamtmp, arraylength(declparamtmp)); + vinit(&qual, declpqualtmp, arraylength(declpqualtmp)); + vinit(&names, declpnamestmp, arraylength(declpnamestmp)); + } + node.span = tk.span; + node.kandr = 0; + node.variadic = 0; + + while (!match(pr, &tk, ')')) { + struct declstate st = { DFUNCPARAM }; + struct decl decl; + if (match(pr, &tk, TKDOTS)) { + node.variadic = 1; + expect(pr, ')', NULL); + break; + } + decl = pdecl(&st, pr); + decl.t = typedecay(decl.t); + vpush(¶ms, decl.t); + vpush(&names, decl.name); + if (decl.qual) { + anyqual = 1; + while (qual.n < tdqualsiz(params.n)) vpush(&qual, 0); + tdsetqual(qual.p, params.n-1, decl.qual); + } + if (isincomplete(decl.t)) { + if (params.n > 1 || decl.t.t != TYVOID || decl.qual || decl.name) { + error(&decl.span, + "function parameter #%d has incomplete type (%tq)", + params.n, decl.t, tdgetqual(qual.p, params.n-1)); + } + } + joinspan(&node.span.ex, tk.span.ex); + if (!match(pr, &tk, ',')) { + expect(pr, ')', NULL); + break; + } + } + --depth; + node.kandr = params.n == 0 && ccopt.cstd < STDC23; + if (params.n == 1 && params.p[0].t == TYVOID && !qual.n && !names.p[0]) { /* (void) */ + vfree(¶ms); + vfree(&names); + } else if (params.n && params.p[0].t == TYVOID && !qual.n && !names.p[0]) { + error(&node.span, "function parameter #1 has incomplete type (%tq)", + params.p[0], tdgetqual(qual.p, 0)); + } + node.t = TYFUNC; + node.param = params.n ? params.p : NULL; + node.pqual = anyqual ? qual.p : NULL; + node.pnames = params.n ? names.p : NULL; + node.npar = params.n; + declinsert(ptr->prev, &node); + } else break; + } +} + +static struct decl +declarator(struct declstate *st, struct parser *pr) { + struct decl decl = { st->base, st->scls, st->qual, st->align }; + struct decllist list = { &list, &list }, *l; + static bool inidecltmp = 0; + if (!inidecltmp) { + inidecltmp = 1; + for (int i = 0; i < arraylength(decltmp); ++i) { + decltmp[i].next = declfreelist; + declfreelist = &decltmp[i]; + } + } + + decltypes(pr, &list, st->kind == DCASTEXPR ? NULL : &decl.name, &decl.span); + if (!decl.name && st->kind != DCASTEXPR && st->kind != DFUNCPARAM) { + if (list.prev == &list) lex(pr, NULL); + error(&decl.span, "expected `(', `*' or identifier"); + } + for (l = list.prev; l != &list; l = l->prev) { + switch (l->t) { + case TYPTR: + decl.t = mkptrtype(decl.t, decl.qual); + decl.qual = l->qual; + break; + case TYARRAY: + if (isincomplete(decl.t)) + error(&l->span, "array has incomplete element type (%ty)", decl.t); + else if (decl.t.t == TYFUNC) + error(&l->span, "array has element has function type (%ty)", decl.t); + decl.t = mkarrtype(decl.t, decl.qual, l->len); + decl.qual = 0; + break; + case TYFUNC: + if (decl.t.t == TYFUNC) + error(&decl.span, "function cannot return function type (%ty)", decl.t); + else if (decl.t.t == TYARRAY) + error(&decl.span, "function cannot return array type", decl.t); + else if (decl.t.t != TYVOID && isincomplete(decl.t)) + error(&decl.span, "function cannot return incomplete type (%ty)", decl.t); + decl.t = mkfntype(decl.t, l->npar, l->param, l->pqual, l->kandr, l->variadic); + if (l->param != declparamtmp) free(l->param); + if (l->pqual != declpqualtmp) free(l->pqual); + if (l->prev == &list && l->npar) { /* last */ + st->pnames = alloc(&pr->fnarena, l->npar * sizeof(char *), 0); + memcpy(st->pnames, l->pnames, l->npar * sizeof(char *)); + } + if (l->pnames != declpnamestmp) free(l->pnames); + decl.qual = 0; + break; + } + + l->next = declfreelist; + declfreelist = l; + } + + return decl; +} + +static struct decl +pdecl(struct declstate *st, struct parser *pr) { + struct token tk; + struct decl decl; + bool iniallowed = st->kind != DFIELD && st->kind != DFUNCPARAM && st->kind != DCASTEXPR; + bool first = 0; + + if (st->varini) { + memset(&decl, 0, sizeof decl); + goto AfterVarIni; + } + + if (!st->base.t) { + first = 1; + st->scls = sclass(pr, &tk.span); + if (popcnt(st->scls) > 1) + error(&tk.span, "invalid combination of storage class specifiers"); + else { + int allowed; + switch (st->kind) { + case DTOPLEVEL: allowed = SCTYPEDEF | SCEXTERN | SCSTATIC | SCTHREADLOCAL; break; + case DCASTEXPR: allowed = 0; break; + case DFIELD: allowed = 0; break; + case DFUNCPARAM: allowed = 0; break; + case DFUNCVAR: + allowed = SCTYPEDEF | SCREGISTER | SCAUTO | SCEXTERN | SCSTATIC | SCTHREADLOCAL; + break; + default: assert(0); + } + if ((st->scls & allowed) != st->scls) + error(&tk.span, "this storage class is not allowed in this context"); + st->scls &= allowed; + } + declspec(st, pr); + } + + if (first && st->tagdecl && match(pr, &tk, ';')) { + decl = (struct decl) { st->base, st->scls, st->qual, st->align }; + return decl; + } + decl = declarator(st, pr); + + if (iniallowed && match(pr, &tk, '=')) { + st->varini = 1; + return decl; + } else if (first && decl.t.t == TYFUNC && match(pr, &tk, '{')) { + st->funcdef = 1; + return decl; + } + +AfterVarIni: + st->varini = 0; + st->more = 0; + if (st->kind != DCASTEXPR && st->kind != DFUNCPARAM) { + if (match(pr, &tk, ',')) + st->more = 1; + else expect(pr, st->kind == DFUNCPARAM ? ')' : ';', "or `,'"); + } + + return decl; +} + +void +parse(struct parser *pr) +{ + struct token tk[1]; + + if (!pr->env) pr->env = &toplevel; + if (!tlarena) { + enum { N = 1<<12 }; + static union { char m[sizeof(struct arena) + N]; struct arena *_align; } amem[3]; + + tlarena = (void *)amem[0].m; + tlarena->cap = N; + pr->fnarena = (void *)amem[1].m; + pr->fnarena->cap = N; + pr->exarena = (void *)amem[2].m; + pr->exarena->cap = N; + } + while (peek(pr, tk) != TKEOF) { + struct expr ini; + struct declstate st = { DTOPLEVEL, }; + do { + int nerr = nerror; + struct decl decl = pdecl(&st, pr); + + if (nerror != nerr) { + if (st.varini) { + (void)expr(pr); + pdecl(&st, pr); + } + continue; + } + if (decl.name) efmt("%s : %tq\n", decl.name, decl.t, decl.qual); + if (st.funcdef) { + const struct typedata *td = &typedata[decl.t.dat]; + struct function fn = { pr->fnarena, decl.name, .globl = decl.scls != SCSTATIC }; + fn.fnty = decl.t; + fn.retty = td->ret; + putdecl(pr, &decl); + irinit(&fn); + function(pr, &fn, st.pnames); + if (!nerror) irdump(&fn, decl.name); + } else if (decl.name) { + putdecl(pr, &decl); + if (st.varini) { + ini = expr(pr); + pdecl(&st, pr); + if (!assigncheck(decl.t, &ini)) + error(&ini.span, "cannot initialize %ty with %ty", decl.t, ini.ty); + if (!eval(&ini, EVSTATICINI)) + error(&ini.span, "cannot evaluate expression statically"); + } + } + freearena(pr->fnarena); + freearena(pr->exarena); + } while (st.more); + } +} + +/* vim:set ts=3 sw=3 expandtab: */ @@ -0,0 +1,205 @@ +#include "common.h" + +/*************/ +/** PARSING **/ +/************/ + +static inline bool +joinspan(struct span0 *dst, struct span0 snd) +{ + if (dst->file != snd.file) return 0; + assert(dst->off <= snd.off); + dst->len = snd.off + snd.len - dst->off; + return 1; +} + +enum toktag { /* single-character tokens' tag value is the character itself */ + TKEOF = -1, + TKXXX, + TKNUMLIT, + TKSTRLIT, + TKIDENT, + TKDUMMY_ = 0x7F, +#define _(kw, stdc) TKW##kw, +#include "keywords.def" +#undef _ +#define C2(a,b) (a | b<<8) +#define C3(a,b,c) (a | b<<8 | c<<16) + TKEQU = C2('=','='), + TKNEQ = C2('!','='), + TKLTE = C2('<','='), + TKGTE = C2('>','='), + TKSHR = C2('>','>'), + TKSHL = C2('<','<'), + TKINC = C2('+','+'), + TKDEC = C2('-','-'), + TKARROW = C2('-','>'), + TKPPCAT = C2('#', '#'), + TKLOGAND = C2('&','&'), + TKLOGIOR = C2('|','|'), + TKSETADD = C2('+','='), + TKSETSUB = C2('-','='), + TKSETMUL = C2('*','='), + TKSETDIV = C2('/','='), + TKSETMOD = C2('%','='), + TKSETIOR = C2('|','='), + TKSETXOR = C2('^','='), + TKSETAND = C2('&','='), + TKSETSHR = C3('>','>','='), + TKSETSHL = C3('<','<','='), + TKDOTS = C3('.','.','.'), +#undef C2 +#undef C3 +}; + +struct token { + enum toktag t; + uchar ty; /* type tag for num lits */ + struct span span; + union { + uvlong u; + vlong i; + double f; + const char *ident; + struct bytes s; + }; +}; + +struct macro { + const char *name; /* interned from tk->ident */ + const char **param; + struct span0 span; + uchar nparam; + bool fnlike, variadic; + struct rlist { + struct token *tk; + int n; + } rlist; +}; + +struct macrostack { + struct macrostack *link; + struct rlist *args; + struct span0 exspan; + int mac; + int idx; +}; + +extern int nerror; +struct parser { + struct parser *save; + short fileid; + const uchar *dat; + uint ndat; + uint idx, chridx; + short peekchr[2]; + uint peekcidx[2]; + short npeekchr; + struct macrostack *macstk; + struct token peektok; + bool eof, err; + struct env *env; + struct arena *fnarena, *exarena; + struct span fnblkspan; +}; + +int lex(struct parser *, struct token *); +int lexpeek(struct parser *, struct token *); +void initparser(struct parser *, const char *file); +void parse(struct parser *); + +/***********/ +/** CTYPE **/ +/***********/ + +#define aisprint(c) in_range(c, ' ', '~') +#define aisdigit(c) in_range(c, '0', '9') +#define aisodigit(c) in_range(c, '0', '7') +#define aisalpha(c) in_range((c)|0x20, 'a', 'z') +static inline bool aisspace(int c) { return c == ' ' || in_range(c, '\t', '\r'); } +static inline bool aisxdigit(int c) { return aisdigit(c) || in_range(c|0x20, 'a', 'f'); } + +/************/ +/* ANALYSIS */ +/************/ + +enum exprkind { + EXXX, ENUMLIT, ESTRLIT, ESYM, EINIT, EGETF, ECALL, ECOND, + /* unary */ + EPLUS, ENEG, ECOMPL, ELOGNOT, EDEREF, EADDROF, ECAST, + EPREINC, EPOSTINC, EPREDEC, EPOSTDEC, + /* binary */ + EADD, ESUB, EMUL, EDIV, EREM, EBAND, EBIOR, EXOR, ESHL, ESHR, + ELOGAND, ELOGIOR, + EEQU, ENEQ, ELTH, EGTH, ELTE, EGTE, + ESET, ESETADD, ESETSUB, ESETMUL, ESETDIV, ESETREM, ESETAND, ESETIOR, ESETXOR, ESETSHL, ESETSHR, + ESEQ, +}; +#define isunop(t) in_range(t, EPLUS, EPOSTDEC) +#define isbinop(t) in_range(t, EADD, ESEQ) +#define isassign(t) in_range(t, ESET, ESETSHR) +#define assigntobinop(t) ((t) - ESETADD + EADD) + +struct expr { + uchar t; + uchar qual; + ushort narg; /* ECALL */ + union type ty; + struct span span; + union { + struct expr *sub; + uvlong u; vlong i; double f; /* ENUMLIT */ + struct bytes s; /* ESTRLIT */ + struct decl *sym; /* ESYM */ + struct initializer *ini; /* EINIT */ + struct { /* EGETF */ + const char *name; + uint off; + uchar bitsiz, bitoff; + } fld; + char dummy; + }; +}; + +enum storageclass { + SCNONE, + SCTYPEDEF = 1<<0, + SCEXTERN = 1<<1, + SCSTATIC = 1<<2, + SCTHREADLOCAL = 1<<3, + SCAUTO = 1<<4, + SCREGISTER = 1<<5, +}; + +struct decl { + union type t; + uchar scls; + uchar qual; + ushort align; + struct span span; + const char *name; + int id; +}; + +struct env { + struct env *up; + struct decls { + struct decls *prev; + struct decl decl; + } *decls; + struct tagged { + struct tagged *prev; + struct span span; + union type t; + } *tagged; +}; + +enum evalmode { + EVINTCONST, + EVARITH, + EVSTATICINI, +}; + +bool eval(struct expr *, enum evalmode); + +/* vim:set ts=3 sw=3 expandtab: */ @@ -0,0 +1,41 @@ +#include "common.h" + +uchar targ_primsizes[TYPTR+1]; +uchar targ_primalign[TYPTR+1]; +enum typetag targ_sizetype; +bool targ_charsigned, targ_bigendian; + +struct targ { + const char *name; + struct { uchar longsize, vlongsize, ptrsize, valistsize; }; + struct { uchar longalign, vlongalign, doublealign, ptralign; }; + bool charsigned; + uchar sizetype; +} targs[] = { + { "amd64-sysv", {8, 8, 8, 24}, {8, 8, 8, 8}, 1, TYULONG } +}; + +void +targ_init(const char *starg) +{ + struct targ *t = &targs[0]; + uchar *sizes = targ_primsizes, *align = targ_primalign; + + sizes[TYBOOL] = sizes[TYCHAR] = sizes[TYSCHAR] = sizes[TYUCHAR] = 1; + sizes[TYSHORT] = sizes[TYUSHORT] = 2; + sizes[TYUINT] = sizes[TYINT] = 4; + sizes[TYFLOAT] = 4; + sizes[TYDOUBLE] = 8; + memcpy(align, sizes, sizeof targ_primalign); + sizes[TYULONG] = sizes[TYLONG] = t->longsize; + sizes[TYUVLONG] = sizes[TYVLONG] = t->vlongsize; + sizes[TYPTR] = t->ptrsize; + sizes[TYVALIST] = t->valistsize; + align[TYULONG] = align[TYLONG] = t->longalign; + align[TYUVLONG] = align[TYVLONG] = t->vlongalign; + align[TYDOUBLE] = t->doublealign; + align[TYVALIST] = align[TYPTR] = t->ptralign; + targ_sizetype = t->sizetype; + targ_charsigned = t->charsigned; + targ_bigendian = 0; +} @@ -0,0 +1,39 @@ +/* coment */ + +int glob; + +int add (int x, int y) { + return x + y + glob; +} + +int abs(int x){ + return (x ^ x >> 31) + ((unsigned)x >> 31); +} + +int popcnt(unsigned x) { + int n = 0; + while (x) x >>= 1, n++; + return n; +} + +int foo(int x) { + int work[10]; + work[x]=x; + if (x == 0) + return 1; + else if (x == 1) + return -1; + else if (x < 0) + return x; + else + return 0; +} + +extern void printf(char *, ...); +int main() { + char x; + int k = x += 1; + return abs(-33); +} + +// @@ -0,0 +1,287 @@ +#include "common.h" +#include <string.h> + +struct typedata typedata[1<<13]; +const char *ttypenames[1<<10]; + +static ushort +hashtd(const struct typedata *td) +{ + uint h = td->t*33; + switch (td->t) { + case TYARRAY: + h = hashb(h, &td->arrlen, sizeof td->arrlen); + /* fallthru */ + case TYPTR: + h = hashb(h, &td->child, sizeof td->child); + break; + case TYSTRUCT: + case TYUNION: + case TYENUM: + h = hashb(h, &td->id, sizeof td->id); + break; + case TYFUNC: + h = hashb(h, &td->ret, sizeof td->ret); + h = hashb(h, &td->nmemb, sizeof td->nmemb); + for (int i = 0; i < td->nmemb; ++i) { + uchar q = tdgetqual(td->quals, i); + h = hashb(h, &td->param[i], sizeof *td->param); + h = hashb(h, &q, sizeof q); + } + break; + default: + assert(0 && "bad typedata tag"); + } + return h ^ h>>16; +} + +static bool +tdequ(const struct typedata *a, const struct typedata *b) +{ + if (a->t != b->t) return 0; + switch (a->t) { + case TYARRAY: + return a->arrlen == b->arrlen && a->child.bits == b->child.bits; + case TYPTR: + return a->child.bits == b->child.bits; + case TYSTRUCT: + case TYUNION: + case TYENUM: + return a->id == b->id; + case TYFUNC: + if (a->ret.bits != b->ret.bits) return 0; + if (a->nmemb != b->nmemb) return 0; + if (a->variadic != b->variadic) return 0; + for (int i = 0; i < a->nmemb; ++i) { + if (!a->quals != !b->quals || a->param[i].bits != b->param[i].bits) + return 0; + if (a->quals && tdgetqual(a->quals, i) != tdgetqual(b->quals, i)) + return 0; + } + return 1; + break; + default: + assert(0 && "bad typedata tag"); + } +} + +static void * +alloccopy(struct arena **arena, const void *src, uint siz, uint align) +{ + return memcpy(alloc(arena, siz, align), src, siz); +} + +static ushort +interntd(const struct typedata *td) +{ + uint h, i, n = arraylength(typedata); + for (i = h = hashtd(td); n--; ++i) { + struct typedata *slot = &typedata[i &= arraylength(typedata) - 1]; + if (!slot->t) { + uint nmemb; + static struct arena *datarena; + if (!datarena) { + enum { N = 1<<12 }; + static union { char m[sizeof(struct arena) + N]; struct arena *_align; } amem; + datarena = (void *)amem.m, datarena->cap = N; + } + + Copy: + nmemb = td->nmemb; + *slot = *td; + switch (slot->t) { + case TYENUM: + if (slot->var) + slot->var = alloccopy(&datarena, td->var, nmemb * sizeof *slot->var, 0); + break; + case TYSTRUCT: + case TYUNION: + if (slot->fld) + slot->fld = alloccopy(&datarena, td->fld, nmemb * sizeof *slot->fld, 0); + Qual: + if (slot->quals) + slot->quals = alloccopy(&datarena, td->quals, tdqualsiz(nmemb), 1); + break; + case TYFUNC: + if (slot->param) { + slot->param = alloccopy(&datarena, td->param, nmemb * sizeof *slot->param, 0); + } + goto Qual; + } + return i; + } else if (tdequ(slot, td)) { + if (td->t == TYSTRUCT || td->t == TYUNION) + goto Copy; + return i; + } + } + assert(0 && "typedata[] full"); +} + +bool +isincomplete(union type t) +{ + switch (t.t) { + case TYVOID: return 1; + case TYARRAY: return !typearrlen(t); + case TYSTRUCT: + case TYUNION: + return typedata[t.dat].nmemb == 0; + } + return 0; +} + +uint +typesize(union type t) +{ + if (isprim(t) || t.t == TYPTR) return targ_primsizes[t.t]; + switch (t.t) { + case TYENUM: + return targ_primsizes[t.backing]; + case TYARRAY: + return t.flag & TFCHLDPRIM ? targ_primsizes[t.child] * t.arrlen : typedata[t.dat].siz; + case TYSTRUCT: + case TYUNION: + return typedata[t.dat].siz; + } + return 0; +} + +uint +typealign(union type t) +{ + if (isprim(t) || t.t == TYPTR) return targ_primalign[t.t]; + switch (t.t) { + case TYENUM: + return targ_primalign[t.backing]; + case TYARRAY: + return targ_primalign[typechild(t).t]; + case TYSTRUCT: + case TYUNION: + return typedata[t.dat].align; + } + return 0; +} + +union type +mkptrtype(union type t, int qual) +{ + if (isprim(t)) + return mktype(TYPTR, .flag = TFCHLDPRIM | (qual & TFCHLDQUAL), .child = t.t); + else if (t.t == TYENUM || t.t == TYFUNC || isagg(t)) + return mktype(TYPTR, .flag = TFCHLDISDAT | (qual & TFCHLDQUAL), .dat = t.dat); + return mktype(TYPTR, .flag = qual & TFCHLDQUAL, + .dat = interntd(&(struct typedata) { TYPTR, .child = t })); +} + +union type +mkarrtype(union type t, int qual, uint n) +{ + if (isprim(t) && n < 256) + return mktype(TYARRAY, .flag = TFCHLDPRIM | (qual & TFCHLDQUAL), .child = t.t, .arrlen = n); + return mktype(TYARRAY, .flag = qual & TFCHLDQUAL, + .dat = interntd(&(struct typedata) { TYARRAY, .child = t, .arrlen = n })); +} + +union type +mkfntype(union type ret, uint n, const union type *par, const uchar *qual, bool kandr, bool variadic) +{ + struct typedata td = { TYFUNC, .ret = ret, .nmemb = n, .param = par, .quals = qual }; + td.kandr = kandr, td.variadic = variadic; + return mktype(TYFUNC, .dat = interntd(&td)); +} + + +union type +completetype(const char *name, int id, struct typedata *td) +{ + assert(td->t == TYENUM || td->t == TYSTRUCT || td->t == TYUNION); + td->id = id; + assert(id < arraylength(ttypenames) && "too many tag types"); + if (ttypenames[id]) + assert(ttypenames[id] == name && "bad redefn"); + else + ttypenames[id] = name; + return mktype(td->t, .dat = interntd(td)); +} + +union type +mktagtype(const char *name, struct typedata *td) +{ + static int id; + return completetype(name, id++, td); +} + +union type +typedecay(union type t) +{ + if (t.t == TYARRAY) + return mkptrtype(typechild(t), t.flag & TFCHLDQUAL); + if (t.t == TYFUNC) + return mkptrtype(t, 0); + return t; +} + +bool /* 6.5.16.1 Simple assignment Constraints */ +assigncompat(union type dst, union type src) +{ + union type ds, ss; + if (dst.bits == src.bits) return 1; + if (isarith(dst) && isarith(src)) return 1; + if (dst.t == TYPTR && src.t == TYPTR) { + ds = typechild(dst); + ss = typechild(src); + if (ds.bits == ss.bits || ss.t == TYVOID || ds.t == TYVOID) + return ((dst.flag & TFCHLDQUAL) & (src.flag & TFCHLDQUAL)) == (src.flag & TFCHLDQUAL); + } + if (dst.t == TYBOOL && src.t == TYPTR) + return 1; + return 0; +} + +enum typetag +intpromote(enum typetag t) +{ + static int intisshort = -1; + if (intisshort < 0) intisshort = targ_primsizes[TYINT] == targ_primsizes[TYSHORT]; + if (intisshort && t == TYUSHORT) return TYUINT; + return t < TYINT ? TYINT : t; +} + +union type /* 6.3.1.8 Usual arithmetic conversions */ +cvtarith(union type a, union type b) +{ + const union type none = {0}; + + if (!isarith(a) || !isarith(b)) return none; + if (a.t == TYENUM) a = typechild(a); + if (b.t == TYENUM) b = typechild(b); + if (isflt(a) || isflt(b)) { + /* when one type is float, choose type with greatest rank */ + /* enumeration order of type tags reflects arithmetic type rank */ + return a.t > b.t ? a : b; + } + a.t = intpromote(a.t); + b.t = intpromote(b.t); + if (a.bits == b.bits) return a; + + if (issigned(a) == issigned(b)) { + /* when both are integers with same signage, choose type with greatest rank */ + return a.t > b.t ? a : b; + } + /* if the signed type can represent all values of the unsigned type, + * choose it, otherwise choose its corresponding unsigned type */ + /* so long long + unsigned = long long; + * but long long + unsigned long = unsigned long long */ + if (issigned(a)) { + if (targ_primsizes[a.t] <= targ_primsizes[b.t]) + a.t += 1; /* make unsigned */ + return a; + } else { + if (targ_primsizes[b.t] <= targ_primsizes[a.t]) + b.t += 1; /* make unsigned */ + return b; + } +} + +/* vim:set ts=3 sw=3 expandtab: */ |