Skip to content

Instantly share code, notes, and snippets.

@shawnl
Last active May 19, 2020 19:49
Show Gist options
  • Save shawnl/6b91e85c64ad0f7de57e6236ba9a184f to your computer and use it in GitHub Desktop.
Save shawnl/6b91e85c64ad0f7de57e6236ba9a184f to your computer and use it in GitHub Desktop.
perfect bisection (hash) generator
The proof of concept is complete, and the interpreter is less than 4KB (plus memcmp) on x86_64. Unlike gperf which has a generation worst case of n^3,
this always generates in O(n log n) time, which makes it suitable for inclusion in a compiler.
const KVs = [_]KV{
KV.init("align", .Keyword_align),
KV.init("allowzero", .Keyword_allowzero),
KV.init("and", .Keyword_and),
KV.init("anyframe", .Keyword_anyframe),
KV.init("asm", .Keyword_asm),
KV.init("async", .Keyword_async),
KV.init("await", .Keyword_await),
KV.init("break", .Keyword_break),
KV.init("callconv", .Keyword_callconv),
KV.init("catch", .Keyword_catch),
KV.init("comptime", .Keyword_comptime),
KV.init("const", .Keyword_const),
KV.init("continue", .Keyword_continue),
KV.init("defer", .Keyword_defer),
KV.init("else", .Keyword_else),
KV.init("enum", .Keyword_enum),
KV.init("errdefer", .Keyword_errdefer),
KV.init("error", .Keyword_error),
KV.init("export", .Keyword_export),
KV.init("extern", .Keyword_extern),
KV.init("false", .Keyword_false),
KV.init("fn", .Keyword_fn),
KV.init("for", .Keyword_for),
KV.init("if", .Keyword_if),
KV.init("inline", .Keyword_inline),
KV.init("noalias", .Keyword_noalias),
KV.init("noasync", .Keyword_nosuspend), // TODO: remove this
KV.init("noinline", .Keyword_noinline),
KV.init("nosuspend", .Keyword_nosuspend),
KV.init("null", .Keyword_null),
KV.init("or", .Keyword_or),
KV.init("orelse", .Keyword_orelse),
KV.init("packed", .Keyword_packed),
KV.init("pub", .Keyword_pub),
KV.init("resume", .Keyword_resume),
KV.init("return", .Keyword_return),
KV.init("linksection", .Keyword_linksection),
KV.init("struct", .Keyword_struct),
KV.init("suspend", .Keyword_suspend),
KV.init("switch", .Keyword_switch),
KV.init("test", .Keyword_test),
KV.init("threadlocal", .Keyword_threadlocal),
KV.init("true", .Keyword_true),
KV.init("try", .Keyword_try),
KV.init("undefined", .Keyword_undefined),
KV.init("union", .Keyword_union),
KV.init("unreachable", .Keyword_unreachable),
KV.init("usingnamespace", .Keyword_usingnamespace),
KV.init("var", .Keyword_var),
KV.init("volatile", .Keyword_volatile),
KV.init("while", .Keyword_while),
};
==>
0000 f8 02 00 00 01 01 02 00 00 00 00 00 00 00 00 00 ................
0010 00 00 00 00 7e d2 fd 00 00 00 00 00 00 00 00 00 ....~...........
0020 00 00 00 00 00 00 00 00 45 00 b7 00 bf 00 1e 01 ........E.......
0030 26 01 8c 01 a2 01 b4 01 c2 01 23 02 35 02 48 02 &.........#.5.H.
0040 5e 02 7d 02 a1 02 d6 02 eb 02 01 02 01 00 00 00 ^.}.............
0050 00 00 00 00 00 00 00 00 00 00 50 88 00 00 00 00 ..........P.....
0060 00 00 00 00 00 00 00 00 00 00 00 00 00 2b 00 43 .............+.C
0070 00 58 00 6a 00 04 13 00 00 00 00 c4 61 6c 69 67 .X.j........alig
0080 6e 00 c5 61 6c 6c 6f 77 7a 65 72 6f 00 04 10 00 n..allowzero....
0090 00 00 00 c6 61 6e 64 00 e3 61 6e 79 66 72 61 6d ....and..anyfram
00a0 65 00 04 0d 00 00 00 00 c7 61 73 6d 00 c8 61 73 e........asm..as
00b0 79 6e 63 00 08 c9 61 77 61 69 74 00 08 ca 62 72 ync...await...br
00c0 65 61 6b 00 01 02 01 00 00 00 00 00 00 00 00 00 eak.............
00d0 00 00 00 02 80 00 00 00 00 00 00 00 00 00 00 00 ................
00e0 00 00 00 00 00 00 00 27 00 3e 00 04 12 00 00 00 .......'.>......
00f0 00 cb 63 61 6c 6c 63 6f 6e 76 00 cc 63 61 74 63 ..callconv..catc
0100 68 00 04 1c 00 00 00 00 cd 63 6f 6d 70 74 69 6d h........comptim
0110 65 00 ce 63 6f 6e 73 74 00 cf 63 6f 6e 74 69 6e e..const..contin
0120 75 65 00 08 d0 64 65 66 65 72 00 01 02 01 00 00 ue...defer......
0130 00 00 00 00 00 00 00 00 00 00 00 50 04 01 00 00 ...........P....
0140 00 00 00 00 00 00 00 00 00 00 00 00 00 00 2b 00 ..............+.
0150 32 00 39 00 50 00 08 d1 65 6c 73 65 00 08 d2 65 2.9.P...else...e
0160 6e 75 6d 00 04 12 00 00 00 00 d3 65 72 72 64 65 num........errde
0170 66 65 72 00 d4 65 72 72 6f 72 00 04 11 00 00 00 fer..error......
0180 00 d5 65 78 70 6f 72 74 00 d6 65 78 74 65 72 6e ..export..extern
0190 00 04 11 00 00 00 00 d7 66 61 6c 73 65 00 d8 66 ........false..f
01a0 6e 00 d9 66 6f 72 00 04 0d 00 00 00 00 da 69 66 n..for........if
01b0 00 db 69 6e 6c 69 6e 65 00 08 e7 6c 69 6e 6b 73 ..inline...links
01c0 65 63 74 69 6f 6e 00 01 02 02 00 00 00 00 00 00 ection..........
01d0 00 00 00 00 00 00 02 12 08 00 00 00 00 00 00 00 ................
01e0 00 00 00 00 00 00 00 00 00 00 2b 00 43 00 4e 00 ..........+.C.N.
01f0 55 00 04 13 00 00 00 00 dc 6e 6f 61 6c 69 61 73 U........noalias
0200 00 de 6e 6f 61 73 79 6e 63 00 08 dd 6e 6f 69 6e ..noasync...noin
0210 6c 69 6e 65 00 08 df 6e 75 6c 6c 00 08 de 6e 6f line...null...no
0220 73 75 73 70 65 6e 64 00 04 0d 00 00 00 00 e0 6f suspend........o
0230 72 00 e1 6f 72 65 6c 73 65 00 04 0e 00 00 00 00 r..orelse.......
0240 e2 70 61 63 6b 65 64 00 e4 70 75 62 00 04 11 00 .packed..pub....
0250 00 00 00 e5 72 65 73 75 6d 65 00 e6 72 65 74 75 ....resume..retu
0260 72 6e 00 04 1a 00 00 00 00 e8 73 74 72 75 63 74 rn........struct
0270 00 e9 73 75 73 70 65 6e 64 00 ea 73 77 69 74 63 ..suspend..switc
0280 68 00 04 1f 00 00 00 00 eb 74 65 73 74 00 ec 74 h........test..t
0290 68 72 65 61 64 6c 6f 63 61 6c 00 ed 74 72 75 65 hreadlocal..true
02a0 00 ee 74 72 79 00 04 30 00 00 00 00 ef 75 6e 64 ..try..0.....und
02b0 65 66 69 6e 65 64 00 f0 75 6e 69 6f 6e 00 f1 75 efined..union..u
02c0 6e 72 65 61 63 68 61 62 6c 65 00 f2 75 73 69 6e nreachable..usin
02d0 67 6e 61 6d 65 73 70 61 63 65 00 04 10 00 00 00 gnamespace......
02e0 00 f3 76 61 72 00 f4 76 6f 6c 61 74 69 6c 65 00 ..var..volatile.
02f0 08 f5 77 68 69 6c 65 00 ..while.
const InterpHeader = packed struct {
length: u32,
valueSize: u8, // Width of values in StringBisect and String sections. Values cannot contain null bytes. base-128, 0x80-0xff.
};
const SectionType = enum(u8) {
MultiwayWithVerify = 1,
MultiwayWithoutVerify = 2,
StringBisect = 4,
String = 8,
};
const StringBisectHeader = packed struct {
typ: SectionType = .StringBisect,
length: u32,
zerobyte: u8 = 0,
};
const MultiwayHeader = packed struct {
typ: SectionType,
ptrSize: u8,
which: u8,
bitfield: [4]u64,
// blah....
};
fn interp_recursive(file: []const u8, search: [:0]const u8, valueSize: u8) ?u64 {
var typ: SectionType = @intToEnum(SectionType, file[0]);
{switch (typ) {
.MultiwayWithVerify => {
var hdr = @ptrCast(*align(1) const MultiwayHeader, file.ptr);
assert(hdr.typ == .MultiwayWithVerify);
var char = if (search.len < hdr.which) 0 else search[hdr.which];
var mask: [4]u64 = undefined;
if (((@as(u64, 1) << @intCast(u6, char % 64)) & hdr.bitfield[char / 64]) == 0) {
return null;
}
mask[0] = if (char >= 64) @as(u64, 0xffffffffffffffff) else @as(u64, 0xffffffffffffffff) << @intCast(u6, 63 - char);
if (char >= 128) {
mask[1] = 0xffffffffffffffff;
} else if (char < 64) {
mask[1] = 0;
} else {
mask[1] = @as(u64, 0xffffffffffffffff) >> @intCast(u6, 63 - (char - 64));
}
if (char >= 192) {
mask[2] = 0xffffffffffffffff;
} else if (char < 128) {
mask[2] = 0;
} else {
mask[2] = @as(u64, 0xffffffffffffffff) << @intCast(u6, 63 - (char - 128));
}
if (char < 192) {
mask[3] = 0;
} else {
mask[3] = @as(u64, 0xffffffffffffffff) << @intCast(u6, 63 - (char - 192));
}
var ptrWhich = @popCount(u64, mask[0] & hdr.bitfield[0]) + @popCount(u64, mask[1] & hdr.bitfield[1]) + @popCount(u64, mask[2] & hdr.bitfield[2]) + @popCount(u64, mask[3] & hdr.bitfield[3]);
var ptr = @ptrCast([*]align(1) const i16, &file.ptr[@sizeOf(MultiwayHeader)]);
var jump = @intCast(usize, ptr[ptrWhich - 1]);
//std.debug.warn("jump {}\n", .{jump});
return interp_recursive(file[jump..], search, valueSize);
},
.MultiwayWithoutVerify => unreachable,
.StringBisect => {
var hdr = @ptrCast(*align(1) const StringBisectHeader, file.ptr);
assert(hdr.length + @sizeOf(StringBisectHeader) - 1 <= file.len);
// std.debug.warn("hdr.length: {}\n",.{hdr.length});
// hexDump("bisect", file[@sizeOf(StringBisectHeader) - 1..@sizeOf(StringBisectHeader) + hdr.length - 1]);
var res = bisectWay(valueSize, file[@sizeOf(StringBisectHeader)-1..@sizeOf(StringBisectHeader)-1 + hdr.length], search);
if (res) |r| {
assert(mem.eql(u8, r[1..], search));
return @as(u64, r[0] & 0x7f);
}
return null;
},
.String => {
assert(file[0] == @enumToInt(SectionType.String));
assert(valueSize == 1);
var value = file[1] & 0b01111111;
var len = port_strlen(file[2..].ptr);
if (mem.eql(u8, file[2..2+len], search) and len == search.len) {
// std.debug.warn("got {}\n", .{search});
return @as(u64, value);
} else {
return null;
}
},
}}
}
pub fn interp_start(file: []const u8, search: [:0]const u8) ?u64 {
var hdr = @ptrCast(*align(1) const InterpHeader, file.ptr);
//assert(hdr.length == file.len);
return interp_recursive(file[@sizeOf(InterpHeader)..], search, hdr.valueSize);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment