Skip to content

Instantly share code, notes, and snippets.

View Validark's full-sized avatar
🍳

Niles Salter Validark

🍳
View GitHub Profile
@Validark
Validark / comment.md
Created March 14, 2024 00:12
PDEP & PEXT information across architectures
@Validark
Validark / swar.zig
Created March 6, 2024 19:36
Some SWAR functions for byte-level operations
fn swarGe(x: anytype, y: @TypeOf(x)) @TypeOf(x) {
const msb: @TypeOf(x) = @bitCast(@as(@Vector(@sizeOf(@TypeOf(x)), u8), @splat(0x80)));
return (((x ^ y) & x) | (~(x ^ y) & ((x | msb) -% (y & ~msb))));
}
fn swarLe(x: anytype, y: @TypeOf(x)) @TypeOf(x) {
return swarGe(y, x);
}
fn swarEq(x: anytype, y: @TypeOf(x)) @TypeOf(x) {
@Validark
Validark / yaml_predicated_prefix_sum.zig
Last active March 18, 2024 11:49
yaml predicated-prefix-sum
const std = @import("std");
export fn columnCountsVec(chunk: @Vector(16, u8)) @TypeOf(chunk) {
var mask = chunk != @as(@TypeOf(chunk), @splat('\n'));
var array_count = @select(u8, mask, @as(@TypeOf(chunk), @splat(1)), @as(@TypeOf(chunk), @splat(0)));
inline for (0..std.math.log2(@sizeOf(@TypeOf(chunk)))) |i| {
array_count = @select(u8, mask, array_count +% std.simd.shiftElementsRight(array_count, 1 << i, 0), array_count);
mask = @select(bool, mask, std.simd.shiftElementsRight(mask, 1 << i, false), @as(@TypeOf(mask), @splat(false)));
}
@Validark
Validark / pext.zig
Last active January 24, 2024 10:35
comptime pext zig
const std = @import("std");
const builtin = @import("builtin");
const HAS_FAST_PDEP_AND_PEXT = blk: {
const cpu_name = builtin.cpu.model.llvm_name orelse builtin.cpu.model.name;
break :blk builtin.cpu.arch == .x86_64 and
std.Target.x86.featureSetHas(builtin.cpu.features, .bmi2) and
// pdep is microcoded (slow) on AMD architectures before Zen 3.
!std.mem.startsWith(u8, cpu_name, "bdver") and
(!std.mem.startsWith(u8, cpu_name, "znver") or cpu_name["znver".len] >= '3');
@Validark
Validark / select.zig
Last active January 3, 2024 14:37
Select nth bit from bitstring
const std = @import("std");
const builtin = @import("builtin");
const assert = std.debug.assert;
inline fn pdep(src: u64, mask: u64) u64 {
return asm ("pdep %[mask], %[src], %[ret]"
: [ret] "=r" (-> u64),
: [src] "r" (src),
[mask] "r" (mask),
);
@Validark
Validark / pext.zig
Created November 14, 2023 13:04
Pext zig
inline fn pext(src: anytype, mask: @TypeOf(src)) @TypeOf(src) {
switch (@TypeOf(src)) {
u32, u64 => {},
else => @compileError(std.fmt.comptimePrint("pext called with a bad type: {}\n", .{@TypeOf(src)})),
}
if (@inComptime()) {
@setEvalBranchQuota(std.math.maxInt(u32));
var result: @TypeOf(src) = 0;
var m = mask;
@Validark
Validark / prefix_xor.zig
Created April 13, 2023 23:29
prefix_xor in Zig
/// Given a bitmask, will return a mask where the bits are filled in between.
/// On modern x86 and aarch64 CPU's, it should have a latency of 3 and a throughput of 1.
pub fn prefix_xor(bitmask: u64) u64 {
const impl: enum { x86, aarch64, agnostic } =
// There should be no such thing with a processor supporting avx2 but not clmul.
comptime if (builtin.cpu.arch == .x86_64 and
std.Target.x86.featureSetHas(builtin.cpu.features, .pclmul) and
std.Target.x86.featureSetHas(builtin.cpu.features, .avx2))
.x86
else if (builtin.cpu.arch == .aarch64 and std.Target.aarch64.featureSetHas(builtin.cpu.features, .aes))
@Validark
Validark / pdep.zig
Created February 22, 2023 23:00
pdep in Zig with select implementation that find the nth set bit of an integer
fn pdep(src: u64, mask: u64) u64 {
return asm ("pdep %[mask], %[src], %[ret]"
: [ret] "=r" (-> u64),
: [src] "r" (src),
[mask] "r" (mask),
);
}
fn select(x: u64, k: u6) u6 {
@setRuntimeSafety(false);
@Validark
Validark / aho-corasick.lua
Created July 18, 2021 12:31
A clean implementation of the aho-corasick algorithm taking full advantage of Lua's __index metamethod.
-- We can re-use metatables where possible
local lookupCache = {
__index = function(self, i)
local v = { __index = i }
self[i] = v
return v
end
}
local function use_aho_corasick(automaton, str)
@Validark
Validark / autocomplete.lua
Created June 27, 2021 17:57
A simple autocomplete proof of concept in Lua which binary searches a sorted array of strings. Also allows for searching for terms with a different word order than the original string (by inserting permutations into array) and permitting alternate spellings/abbreviations by permuting those as well.
-- A cool autocomplete demo
-- @author Validark
-- Strings are stored in a lexigraphically sorted array, which can be binary searched for the first and last element which matches a query
-- Please note the "groupings" functionality is an unvalidated afterthought which may or may not work properly, but overall this code has some nice gems:
-- I was originally thinking that https://github.com/evaera/Cmdr could use this to get O(log n) autocompletes (old algo uses O(n))
-- However, Cmdr is designed in such a way that it creates a new autocomplete function on-demand each time,
-- which means we'd have to sort (or at least verify the sortedness) of the most recent data each time (for changing data at least, like a Player list).
-- Sorting at run-time takes O(n*log n) time, making it hard to compete with the old algorithm where the pre-processing step just gets the data in the right format