1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
|
#include "common.h"
static inline bool
joinspan(struct span0 *dst, struct span0 snd)
{
if (dst->file != snd.file) return 0;
assert(dst->off <= snd.off);
dst->len = snd.off + snd.len - dst->off;
return 1;
}
enum toktag { /* single-character tokens' tag value is the character itself */
TKEOF = -1,
TKXXX,
TKNUMLIT,
TKCHRLIT,
TKSTRLIT,
TKPPHDRH, /* <hdr> (for #include) */
TKPPHDRQ, /* "hdr" (for #include) */
TKPPMACARG, /* macro param, in repl list */
TKPPMACSTR, /* stringify macro param, in repl list */
TKEQU = '@', /* == */
TKNEQ, /* != */
TKLTE, /* <= */
TKGTE, /* >= */
TKSHR, /* >> */
TKSHL, /* << */
TKINC, /* ++ */
TKDEC, /* -- */
TKDOTS, /* ... */
TKARROW, /* -> */
TKPPCAT, /* ## */
TKLOGAND, /* && */
TKLOGIOR, /* || */
TKSETADD, /* += */
TKSETSUB, /* -= */
TKSETMUL, /* *= */
TKSETDIV, /* /= */
TKSETREM, /* %= */
TKSETIOR, /* |= */
TKSETXOR, /* ^= */
TKSETAND, /* &= */
TKSETSHL, /* <<= */
TKSETSHR, /* >>= */
TKIDENT = 0x80,
#define _(kw, stdc) TKW##kw,
#include "keywords.def"
#undef _
};
struct token {
short t; /* toktag */
bool litlit;
union {
uint len, argidx;
};
struct span span;
const char *s;
/* for (multi-)character tokens s & len are unused
* for keywords, s is constant cstring, len = strlen(s)
* for idents, s is interned cstring, len = strlen(s)
* for strlit and chrlit:
* when litlit : s points to start of string within file buffer (after the ")
* len == span.sl.len - 2 (string data appears literally in source code)
* otherwise s is heap allocated buffer of len bytes
* for numlit:
* when litlit : s points to start of token within file buffer (normal case)
* len == span.sl.len (number literal appears literally in source code)
* otherwise s is heap allocated buffer of len bytes
* for macro arg/stringify:
* s is like keyword/ident
* argidx is index in macro param list
*/
};
extern int nerror;
struct lexer {
struct lexer *save;
short fileid;
const uchar *dat;
uint ndat;
uint idx, chridx;
short chrbuf[1<<10];
uint chridxbuf[1<<10];
ushort nchrbuf, chrbuf0;
struct macrostack *macstk;
struct token peektok;
bool eof, err;
struct arena **tmparena;
};
const char *intern(const char *);
int lex(struct lexer *, struct token *);
int lexpeek(struct lexer *, struct token *);
enum typetag parsenumlit(uvlong *, double *, const struct token *, bool ispp);
void initlexer(struct lexer *, const struct span *span, const char *file, struct arena **tmparena);
/* vim:set ts=3 sw=3 expandtab: */
|