From 104330a399f405b83328525bb2be55b360109b16 Mon Sep 17 00:00:00 2001
From: lemon <lsof@mailbox.org>
Date: Sun, 28 May 2023 19:29:10 +0200
Subject: improve struct token

---
 parse.h | 75 ++++++++++++++++++++++++++++++++++++-----------------------------
 1 file changed, 42 insertions(+), 33 deletions(-)

(limited to 'parse.h')

diff --git a/parse.h b/parse.h
index 520a76e..2a9b076 100644
--- a/parse.h
+++ b/parse.h
@@ -17,30 +17,31 @@ enum toktag { /* single-character tokens' tag value is the character itself */
    TKEOF = -1,
    TKXXX,
    TKNUMLIT,
+   TKCHRLIT,
    TKSTRLIT,
-   TKEQU = '@',
-   TKNEQ,
-   TKLTE,
-   TKGTE,
-   TKSHR,
-   TKSHL,
-   TKINC,
-   TKDEC,
-   TKDOTS,
-   TKARROW,
-   TKPPCAT,
-   TKLOGAND,
-   TKLOGIOR,
-   TKSETADD,
-   TKSETSUB,
-   TKSETMUL,
-   TKSETDIV,
-   TKSETREM,
-   TKSETIOR,
-   TKSETXOR,
-   TKSETAND,
-   TKSETSHL,
-   TKSETSHR,
+   TKEQU = '@', /* == */
+   TKNEQ, /* != */
+   TKLTE, /* <= */
+   TKGTE, /* >= */
+   TKSHR, /* >> */
+   TKSHL, /* << */
+   TKINC, /* ++ */
+   TKDEC, /* -- */
+   TKDOTS, /* ... */
+   TKARROW, /* -> */
+   TKPPCAT, /* ## */
+   TKLOGAND, /* && */
+   TKLOGIOR, /* || */
+   TKSETADD, /* += */
+   TKSETSUB, /* -= */
+   TKSETMUL, /* *= */
+   TKSETDIV, /* /= */
+   TKSETREM, /* %= */
+   TKSETIOR, /* |= */
+   TKSETXOR, /* ^= */
+   TKSETAND, /* &= */
+   TKSETSHL, /* <<= */
+   TKSETSHR, /* >>= */
    TKIDENT = 0x80,
 #define _(kw, stdc) TKW##kw,
 #include "keywords.def"
@@ -48,20 +49,27 @@ enum toktag { /* single-character tokens' tag value is the character itself */
 };
 
 struct token {
-   enum toktag t;
-   uchar ty; /* type tag for num lits */
+   short t; /* toktag */
+   bool litlit;
+   uint len;
    struct span span;
-   union {
-     uvlong u;
-     vlong i;
-     double f;
-     const char *ident;
-     struct bytes s;
-   };
+   const char *s; 
+   /* for (multi-)character tokens s & len are unused
+    * for keywords, s is constant cstring, len = strlen(s)
+    * for idents, s is interned cstring, len = strlen(s)
+    * for strlit and chrlit:
+    *  when litlit : s points to start of string within file buffer (after the ")
+    *                len == span.sl.len - 2 (string data appears literally in source code)
+    *  otherwise s is heap allocated buffer of len bytes
+    * for numlit:
+    *  when litlit : s points to start of token within file buffer (normal case)
+    *                len == span.sl.len (number literal appears literally in source code)
+    *  otherwise s is heap allocated buffer of len bytes
+    */
 };
 
 struct macro {
-   const char *name; /* interned from tk->ident */
+   const char *name; /* interned */
    const char **param;
    struct span0 span;
    uchar nparam;
@@ -101,6 +109,7 @@ struct parser {
 const char *intern(const char *);
 int lex(struct parser *, struct token *);
 int lexpeek(struct parser *, struct token *);
+enum typetag parsenumlit(uvlong *, double *, const struct token *, bool ispp);
 void initparser(struct parser *, const char *file);
 void parse(struct parser *);
 
-- 
cgit v1.2.3