aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/c_lex.h
blob: d43696743b206172de65bd4c01570d724cbe957c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#pragma once

#include "antcc.h"
#include "c_type.h"

static inline bool
joinspan(Span0 *dst, Span0 snd)
{
   if (dst->file != snd.file) return 0;
   if (dst->off > snd.off) return 0;
   dst->len = snd.off + snd.len - dst->off;
   return 1;
}

enum toktag { /* single-character tokens' tag value is the character itself */
   TKEOF = 0xFF,
   TKXXX = 0,
   TKNUMLIT,
   TKCHRLIT,
   TKSTRLIT,
   TKPPHDRH, /* <hdr> (for #include) */
   TKPPHDRQ, /* "hdr" (for #include) */
   TKPPMACARG, /* macro param, in repl list */
   TKPPMACSTR, /* stringify macro param, in repl list */
   TKEQU = '@', /* == */
   TKNEQ, /* != */
   TKLTE, /* <= */
   TKGTE, /* >= */
   TKSHR, /* >> */
   TKSHL, /* << */
   TKINC, /* ++ */
   TKDEC, /* -- */
   TKDOTS, /* ... */
   TKARROW, /* -> */
   TKPPCAT, /* ## */
   TKLOGAND, /* && */
   TKLOGIOR, /* || */
   TKSETADD, /* += */
   TKSETSUB, /* -= */
   TKSETMUL, /* *= */
   TKSETDIV, /* /= */
   TKSETREM, /* %= */
   TKSETIOR, /* |= */
   TKSETXOR, /* ^= */
   TKSETAND, /* &= */
   TKSETSHL, /* <<= */
   TKSETSHR, /* >>= */
   TKIDENT = 0x80,
#define _(kw, stdc, ...) TKW##kw,
#include "c_keywords.def"
#undef _
   NTOKTAG,
};
static_assert(NTOKTAG < 256);

typedef struct Token {
   uchar t; /* toktag */
   bool litlit : 1,
        blue : 1, /* preprocessor token painted blue */
        extwarn : 1; /* warn this keyword token is an extension */
   uchar wide : 2, /* for CHRLIT & STRLIT; 1 -> 16bit, 2 -> 32bit */
         wideuni : 1, /* ditto, 0 -> 'L', 1 -> 'u'/'U' (C11) */
         space : 1; /* preceded by whitespace? */
   union {
      uint len;
      ushort argidx;
   };
   Span span;
   union {
      internstr name;
      const char *s;
      const ushort *ws16;
      const uint   *ws32;
   };
   /* for (multi-)character tokens s & len are unused
    * for keywords, s is constant cstring, len = strlen(s)
    * for idents, s is interned cstring, len = strlen(s)
    * for strlit and chrlit:
    *  when litlit : s points to start of string within file buffer (after the ")
    *                len == span.sl.len - 2 (string data appears literally in source code)
    *  otherwise s is heap allocated buffer of len bytes
    *  when wide, litlit = 0 and use ws16/ws32
    * for numlit:
    *  when litlit : s points to start of token within file buffer (normal case)
    *                len == span.sl.len (number literal appears literally in source code)
    *  otherwise s is heap allocated buffer of len bytes
    * for macro arg/stringify:
    *  s is like keyword/ident
    *  argidx is index in macro param list,
    *  macidx is macro id of which it is a parameter
    */
} Token;

extern int nerror, nwarn;
typedef struct Lexer {
   struct Lexer *save;
   short fileid;
   const uchar *dat;
   uint ndat;
   uint idx, chridx;
   ushort chrbuf0;
   struct MacroStack *macstk;
   Token peektok;
   bool eof, err;
   Arena **tmparena;
   bool firstdirective;
   short nppcnd0;
   short inclnerror, inclnwarn;
   internstr inclguard;
   uchar chrbuf[1<<10];
   uint chridxbuf[1<<10];
} Lexer;

enum initlexer {
   LXOK,
   LXFILESEEN,
   LXFILESKIP,
   LXERR,
};

int lex(Lexer *, Token *);
int lexpeek(Lexer *, Token *);
enum typetag parsenumlit(u64int *, double *, const Token *, bool ispp);
enum initlexer initlexer(Lexer *, const char **err, const char *file);
void lexerdump(Lexer *, WriteBuf *out);
void lexerfreetemps(Lexer *);

/* vim:set ts=3 sw=3 expandtab: */