Skip to content

Instantly share code, notes, and snippets.

@Validark
Created April 13, 2023 23:29
Show Gist options
  • Save Validark/f487e3806f255c785d45ddef89cab5dd to your computer and use it in GitHub Desktop.
Save Validark/f487e3806f255c785d45ddef89cab5dd to your computer and use it in GitHub Desktop.
prefix_xor in Zig
/// Given a bitmask, will return a mask where the bits are filled in between.
/// On modern x86 and aarch64 CPU's, it should have a latency of 3 and a throughput of 1.
pub fn prefix_xor(bitmask: u64) u64 {
const impl: enum { x86, aarch64, agnostic } =
// There should be no such thing with a processor supporting avx2 but not clmul.
comptime if (builtin.cpu.arch == .x86_64 and
std.Target.x86.featureSetHas(builtin.cpu.features, .pclmul) and
std.Target.x86.featureSetHas(builtin.cpu.features, .avx2))
.x86
else if (builtin.cpu.arch == .aarch64 and std.Target.aarch64.featureSetHas(builtin.cpu.features, .aes))
.aarch64
else
.agnostic;
switch (impl) {
.agnostic => {
var x = bitmask;
x ^= x << 1;
x ^= x << 2;
x ^= x << 4;
x ^= x << 8;
x ^= x << 16;
x ^= x << 32;
return x;
},
else => {
// adapted from zig/lib/std/crypto/ghash_polyval.zig
const x = @bitCast(u128, [2]u64{ bitmask, 0 });
const y = @bitCast(u128, @splat(16, @as(u8, 0xff)));
return switch (impl) {
.x86 => asm (
\\ vpclmulqdq $0x00, %[x], %[y], %[out]
: [out] "=x" (-> @Vector(2, u64)),
: [x] "x" (@bitCast(@Vector(2, u64), x)),
[y] "x" (@bitCast(@Vector(2, u64), y)),
),
.aarch64 => asm (
\\ pmull %[out].1q, %[x].1d, %[y].1d
: [out] "=w" (-> @Vector(2, u64)),
: [x] "w" (@bitCast(@Vector(2, u64), x)),
[y] "w" (@bitCast(@Vector(2, u64), y)),
),
else => unreachable,
}[0];
},
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment