Skip to content

Instantly share code, notes, and snippets.

@israellot
Created September 26, 2022 17:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save israellot/4c38ee6804ba6a36d5b59932db02ea7d to your computer and use it in GitHub Desktop.
Save israellot/4c38ee6804ba6a36d5b59932db02ea7d to your computer and use it in GitHub Desktop.
Expert Checksum with AVX2
public static unsafe uint ChecksumExpertAvx2(ReadOnlySpan<byte> arr)
{
ref byte refSpan = ref MemoryMarshal.GetReference<byte>(arr);
var z = 0;
uint sum = 0;
var vectorSum = Avx2.Xor(Vector256<byte>.Zero, Vector256<byte>.Zero).AsUInt32();
var mask = Vector256.Create((byte)3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12, (byte)3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
int limit = arr.Length - 128;
while (z <= limit)
{
var v1 = Vector256.LoadUnsafe(ref Unsafe.Add(ref refSpan, z));
var v2 = Vector256.LoadUnsafe(ref Unsafe.Add(ref refSpan, z + 32));
var v3 = Vector256.LoadUnsafe(ref Unsafe.Add(ref refSpan, z + 64));
var v4 = Vector256.LoadUnsafe(ref Unsafe.Add(ref refSpan, z + 96));
var s1 = Avx2.Shuffle(v1, mask).AsUInt32();
var s2 = Avx2.Shuffle(v2, mask).AsUInt32();
var s3 = Avx2.Shuffle(v3, mask).AsUInt32();
var s4 = Avx2.Shuffle(v4, mask).AsUInt32();
var s5 = Avx2.Add(Avx2.Add(s1, s2), Avx2.Add(s3, s4));
vectorSum = Avx2.Add(vectorSum, s5);
z += 128;
}
...
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment