Skip to content

Instantly share code, notes, and snippets.

@michaeljclark
Created June 12, 2024 08:47
Show Gist options
  • Save michaeljclark/72b550e076eebb3754322db137c0a821 to your computer and use it in GitHub Desktop.
Save michaeljclark/72b550e076eebb3754322db137c0a821 to your computer and use it in GitHub Desktop.
SipHash derivative using carryless multiplies instead of rotates
/*
SlurpHash 0-pre-alpha AVX implementation
(SipHash derivative using carryless multiplies instead of rotates)
------------------------------------------------------------------
Copyright (c) 2012-2016 Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com>
Copyright (c) 2012-2014 Daniel J. Bernstein <djb@cr.yp.to>
Copyright (c) 2017 Salvatore Sanfilippo <antirez@gmail.com>
Copyright (c) 2024 Michael Clark <michaeljclark@mac.com>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along
with this software. If not, see
<http://creativecommons.org/publicdomain/zero/1.0/>.
*/
// compile: gcc -O3 -DBENCH_MAIN -march=skylake-avx512 -o bench_slurphash slurphash.c
#include <stddef.h>
#include <stdint.h>
#include <immintrin.h>
/*
* SMHasher 0x9754755B
*
* [[[ Avalanche Tests ]]]
*
* 24-bit keys -> 64-bit hashes, 300000 reps, worst bias is 0.658000%
* 32-bit keys -> 64-bit hashes, 300000 reps, worst bias is 0.691333%
* 40-bit keys -> 64-bit hashes, 300000 reps, worst bias is 0.652000%
* 48-bit keys -> 64-bit hashes, 300000 reps, worst bias is 0.720667%
* 56-bit keys -> 64-bit hashes, 300000 reps, worst bias is 0.757333%
* 64-bit keys -> 64-bit hashes, 300000 reps, worst bias is 0.740667%
* 72-bit keys -> 64-bit hashes, 300000 reps, worst bias is 0.688000%
* 80-bit keys -> 64-bit hashes, 300000 reps, worst bias is 0.617333%
* 96-bit keys -> 64-bit hashes, 300000 reps, worst bias is 0.674667%
* 112-bit keys -> 64-bit hashes, 300000 reps, worst bias is 0.665333%
* 128-bit keys -> 64-bit hashes, 300000 reps, worst bias is 0.720000%
* 160-bit keys -> 64-bit hashes, 300000 reps, worst bias is 0.648667%
* 512-bit keys -> 64-bit hashes, 300000 reps, worst bias is 0.769333%
* 1024-bit keys -> 64-bit hashes, 300000 reps, worst bias is 0.736667%
*
* [[[ MomentChi2 Tests ]]]
*
* linearly increasing numbers of 32-bit, using a step of 2 ...
* Target values to approximate : 38918200.000000 - 273633.333333
*
* Popcount 1 stats : 38918583.940467 - 273647.079550
* Popcount 0 stats : 38918853.766915 - 273620.670023
* MomentChi2 for bits 1 : 0.269351
* MomentChi2 for bits 0 : 0.781011
*
* Derivative stats (transition from 2 consecutive values) :
* Popcount 1 stats : 38918796.153440 - 273631.111416
* Popcount 0 stats : 38918666.234741 - 273631.079280
* MomentChi2 for deriv b1 : 0.64941
* MomentChi2 for deriv b0 : 0.397203
*/
/*
* p0 = 0b000000000000000011100101110011111
* { 0-4,7,8,9,11,14,15,16 } [6EVE,6ODD]
*
* p1 = 0b100100000001100100000010100100000
* { 5,8,10,17,20,21,29,32 } [4EVE,4ODD]
*/
#define SLURPROUND() do { \
t0 = _mm_clmulepi64_si128(v0, p0, 0); \
t1 = _mm_clmulepi64_si128(v0, p0, 1); \
t2 = _mm_slli_si128(t1, 8); \
v0 = _mm_xor_si128(t0, t2); \
v0 = _mm_shuffle_epi32(v0, 0b00100111); /* 3,1,2,0 */ \
v1 = _mm_add_epi64(v1, v0); \
t0 = _mm_clmulepi64_si128(v1, p1, 0); \
t1 = _mm_clmulepi64_si128(v1, p1, 1); \
t2 = _mm_slli_si128(t1, 8); \
v1 = _mm_xor_si128(t0, t2); \
v1 = _mm_shuffle_epi32(v1, 0b00011011); /* 3,2,1,0 */ \
v0 = _mm_add_epi64(v0, v1); \
} while(0);
int slurphash(uint8_t *out, const uint8_t *in, size_t len, const uint8_t *k)
{
const uint64_t x[4] = {
0x736f6d6570736575ULL, 0x646f72616e646f6dULL,
0x6c7967656e657261ULL, 0x7465646279746573ULL
};
const uint64_t pf[2] = {
0b000000000000000011100101110011111,
0b100100000001100100000010100100000
};
__m128i key = _mm_loadu_si128((const __m128i *)k);
__m128i iv0 = _mm_xor_si128(key, _mm_load_si128((const __m128i *)&x[0]));
__m128i iv1 = _mm_xor_si128(key, _mm_load_si128((const __m128i *)&x[2]));
__m128i v0 = _mm_unpacklo_epi64(iv0, iv1);
__m128i v1 = _mm_unpackhi_epi64(iv0, iv1);
__m128i p0 = _mm_set_epi64x(0, pf[0]);
__m128i p1 = _mm_set_epi64x(0, pf[1]);
__m128i p, q, t0, t1, t2;
const uint8_t *end = in + (len & ~7);
const uint64_t *in64 = (const uint64_t*)in;
const uint64_t *end64 = (const uint64_t*)end;
uint64_t b = ((uint64_t)len) << 56;
for(; in64 < end64; in64++)
{
p =_mm_loadl_epi64((const __m128i*)in64);
v0 = _mm_add_epi64(p, v0);
SLURPROUND();
}
switch( len & 7 )
{
case 7: b |= ((uint64_t)end[6]) << 48;
case 6: b |= ((uint64_t)end[5]) << 40;
case 5: b |= ((uint64_t)end[4]) << 32;
case 4: b |= ((uint64_t)end[3]) << 24;
case 3: b |= ((uint64_t)end[2]) << 16;
case 2: b |= ((uint64_t)end[1]) << 8;
case 1: b |= ((uint64_t)end[0]); break;
case 0: break;
}
p = _mm_loadl_epi64((const __m128i*)&b);
v1 = _mm_xor_si128(v1, _mm_slli_si128(p, 8));
SLURPROUND();
v0 = _mm_xor_si128(v0, p);
v0 = _mm_xor_si128(v0, _mm_set_epi64x(0xFF, 0));
SLURPROUND();
SLURPROUND();
p = _mm_xor_si128(v0, v1);
q = _mm_unpackhi_epi64(p, p);
p = _mm_xor_si128(p, q);
*(uint64_t*)out = _mm_cvtsi128_si64(p);
return 0;
}
/*
//SMhasher test function
void slurphash_test(const void *input, int len, uint32_t seed, void *out)
{
unsigned char key[16];
memset(key, 0, sizeof(key));
memcpy(key, &seed, sizeof(seed));
slurphash((uint8_t*)out, (const uint8_t *)input, len, key);
}
*/
#ifdef BENCH_MAIN
#include <stdio.h>
#include <string.h>
#include <time.h>
#define buffer_size 32768
uint64_t bench_siphash(size_t count)
{
clock_t start, end;
double gbsec, ns;
uint8_t key[16], buf[buffer_size];
uint64_t hash, sum = 0;
memset(buf, 0, sizeof(buf));
start = clock();
for (size_t s = 0; s < count;) {
size_t l = count - s > buffer_size ? buffer_size : count - s;
siphash((uint8_t*)&hash, buf, l, key);
sum += hash;
s += l;
}
end = clock();
ns = (double)(end - start) / (double)CLOCKS_PER_SEC * 1e9;
gbsec = ((double)count / (double)(1<<30)) / ((double)ns / 1e9);
printf("|%-12s|%8s|%8s|%8s|\n", "algorithm", "sz_mib", "ns_byte", "gb_sec");
printf("|%-12s|%8s|%8s|%8s|\n", ":--------", "-----:", "------:", "------:");
printf("|%-12s|%8zu|%8.2f|%8.2f|\n", "slurphash", count>>20, ns/count, gbsec);
return sum;
}
int main(int argc, char **argv)
{
return (int)bench_siphash(1<<30);
}
#endif
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment