From 2e0160015d756702a799be95db237b7a91bae400 Mon Sep 17 00:00:00 2001 From: lemon Date: Thu, 1 Sep 2022 12:09:22 +0200 Subject: check for duplicate fields faster using bloom filter --- src/parse.cff | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 54 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/parse.cff b/src/parse.cff index 14f7082..7afec96 100644 --- a/src/parse.cff +++ b/src/parse.cff @@ -725,6 +725,46 @@ fn isdecltokt(t TokT) bool { return #f; } +// A simple bloom filter for quick duplicates definitions checking for aggregates +// At least for reasonably sized ones.. +struct BloomFilt { + bits [4]u32, + + fn _hash(x u32) u32 { + // https://nullprogram.com/blog/2018/07/31/ + x ^= x >> 16; + x *= 0x7feb352dU; + x ^= x >> 15; + x *= 0x846ca68bU; + x ^= x >> 16; + return x; + } + + def NHASH = 4; + def NBITS = 8 * sizeof BloomFilt; + + fn add(this *BloomFilt, s *const u8) void { + let h u32 = as(uptrint)s; + for let i = 0; i < NHASH; ++i { + h = _hash(h); + let idx = h % NBITS; + this.bits[idx/32] |= 1u32 << (idx%32); + } + } + + fn matches(this *const BloomFilt, s *const u8) bool { + let h u32 = as(uptrint)s; + for let i = 0; i < NHASH; ++i { + h = _hash(h); + let idx = h % NBITS; + if this.bits[idx/32] & (1u64 << (idx%32)) == 0 { + return #f; + } + } + return #t; + } +} + fn parseagg(P *Parser, loc Loc, kind AggKind, name *const u8, retdecl **Decl) *const Type { let tok Tok #?; static id int = 0; @@ -758,6 +798,7 @@ fn parseagg(P *Parser, loc Loc, kind AggKind, name *const u8, retdecl **Decl) *c lexexpect(P, '{'); let size = 0z, align = 1z; let flds Vec = {}; + let bfilt BloomFilt = {}; let havedecls = #f; while !lexmatch(P, &tok, '}') { if isdecltokt(lexpeek(P).t) { @@ -780,11 +821,14 @@ fn parseagg(P *Parser, loc Loc, kind AggKind, name *const u8, retdecl **Decl) *c size = kind == :Struct ? off + type.size : MAX(size, type.size); } - vec_each (fld, _, flds) { - if fld.name == name { - err(P, tok.loc, "duplicate field %qT", &tok); + if bfilt->matches(name) { + vec_each (fld, _, flds) { + if fld.name == name { + err(P, tok.loc, "duplicate field %qT", &tok); + } } } + bfilt->add(name); flds->push({ name, type, off }); if !lexmatch(P, #null, ',') { @@ -877,6 +921,7 @@ fn parsebitfield(P *Parser, name *const u8) *const Type { lexexpect(P, '{'); static id int = {}; bitf.id = id++; + let bfilt BloomFilt = {}; let flds Vec = {}; let iota = 0u16; while !lexmatch(P, &tok, '}') { @@ -884,11 +929,14 @@ fn parsebitfield(P *Parser, name *const u8) *const Type { let off = 0u, size = 0u; let ty = bitf.intty; - vec_each (fld, _, flds) { - if fld.name == name { - err(P, tok.loc, "duplicate field %qT", &tok); + if bfilt->matches(name) { + vec_each (fld, _, flds) { + if fld.name == name { + err(P, tok.loc, "duplicate field %qT", &tok); + } } } + bfilt->add(name); if lexmatch(P, &tok, '(') { // `(offset, size)` -- cgit v1.2.3