1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
|
#pragma once
#include "antcc.h"
#include "c_type.h"
static inline bool
joinspan(Span0 *dst, Span0 snd)
{
if (dst->file != snd.file) return 0;
if (dst->off > snd.off) return 0;
dst->len = snd.off + snd.len - dst->off;
return 1;
}
enum toktag { /* single-character tokens' tag value is the character itself */
TKEOF = 0xFF,
TKXXX = 0,
TKNUMLIT,
TKCHRLIT,
TKSTRLIT,
TKPPHDRH, /* <hdr> (for #include) */
TKPPHDRQ, /* "hdr" (for #include) */
TKPPMACARG, /* macro param, in repl list */
TKPPMACSTR, /* stringify macro param, in repl list */
TKEQU = '@', /* == */
TKNEQ, /* != */
TKLTE, /* <= */
TKGTE, /* >= */
TKSHR, /* >> */
TKSHL, /* << */
TKINC, /* ++ */
TKDEC, /* -- */
TKDOTS, /* ... */
TKARROW, /* -> */
TKPPCAT, /* ## */
TKLOGAND, /* && */
TKLOGIOR, /* || */
TKSETADD, /* += */
TKSETSUB, /* -= */
TKSETMUL, /* *= */
TKSETDIV, /* /= */
TKSETREM, /* %= */
TKSETIOR, /* |= */
TKSETXOR, /* ^= */
TKSETAND, /* &= */
TKSETSHL, /* <<= */
TKSETSHR, /* >>= */
TKIDENT = 0x80,
#define _(kw, stdc, ...) TKW##kw,
#include "c_keywords.def"
#undef _
NTOKTAG,
};
static_assert(NTOKTAG < 256);
typedef struct Token {
uchar t; /* toktag */
bool litlit : 1,
blue : 1, /* preprocessor token painted blue */
extwarn : 1; /* warn this keyword token is an extension */
uchar wide : 2, /* for CHRLIT & STRLIT; 1 -> 16bit, 2 -> 32bit */
wideuni : 1, /* ditto, 0 -> 'L', 1 -> 'u'/'U' (C11) */
space : 1; /* preceded by whitespace? */
union {
uint len;
ushort argidx;
};
Span span;
union {
internstr name;
const char *s;
const ushort *ws16;
const uint *ws32;
};
/* for (multi-)character tokens s & len are unused
* for keywords, s is constant cstring, len = strlen(s)
* for idents, s is interned cstring, len = strlen(s)
* for strlit and chrlit:
* when litlit : s points to start of string within file buffer (after the ")
* len == span.sl.len - 2 (string data appears literally in source code)
* otherwise s is heap allocated buffer of len bytes
* when wide, litlit = 0 and use ws16/ws32
* for numlit:
* when litlit : s points to start of token within file buffer (normal case)
* len == span.sl.len (number literal appears literally in source code)
* otherwise s is heap allocated buffer of len bytes
* for macro arg/stringify:
* s is like keyword/ident
* argidx is index in macro param list,
* macidx is macro id of which it is a parameter
*/
} Token;
extern int nerror, nwarn;
typedef struct Lexer {
struct Lexer *save;
short fileid;
const uchar *dat;
uint ndat;
uint idx, chridx;
ushort chrbuf0;
struct MacroStack *macstk;
Token peektok;
bool eof, err;
Arena **tmparena;
bool firstdirective;
short nppcnd0;
short inclnerror, inclnwarn;
internstr inclguard;
uchar chrbuf[1<<10];
uint chridxbuf[1<<10];
} Lexer;
enum initlexer {
LXOK,
LXFILESEEN,
LXFILESKIP,
LXERR,
};
int lex(Lexer *, Token *);
int lexpeek(Lexer *, Token *);
enum typetag parsenumlit(u64int *, double *, const Token *, bool ispp);
enum initlexer initlexer(Lexer *, const char **err, const char *file);
void lexerdump(Lexer *, WriteBuf *out);
void lexerfreetemps(Lexer *);
/* vim:set ts=3 sw=3 expandtab: */
|