Skip to content

Instantly share code, notes, and snippets.

@barrbrain barrbrain/fmix32.c
Last active Oct 14, 2015

Embed
What would you like to do?
SSE4.2 implementation of David Barr's similarity hash
/* See https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
for original plain C/C++ implementation */
/* build with -msse4.2 */
#include <smmintrin.h>
#include <emmintrin.h>
FORCE_INLINE fmix32_m128i ( __m128i h )
{
h = _mm_xor_si128(h, _mm_srli_epi32(h, 16));
h = _mm_mullo_epi32(h, _mm_set1_epi32(0x85ebca6b));
h = _mm_xor_si128(h, _mm_srli_epi32(h, 13));
h = _mm_mullo_epi32(h, _mm_set1_epi32(0xc2b2ae35));
h = _mm_xor_si128(h, _mm_srli_epi32(h, 16));
return h;
}
/* build with -msse4.2 */
#include <smmintrin.h>
#include <emmintrin.h>
#include <stdint.h>
/* WIP: need to find my original implementation.
This is just the kernel for maximum speed.
The essence of the algorithm is:
* Simple multiplicative sliding window hash.
* Multiplier 3; 16 byte window.
* Apply finalising mix function from murmur3.
* Accumulate the minimum and maximum.
* Combine these two into a single 32-bit output.
Publication of minhash 1.0:
https://github.com/barrbrain/libgit2/commit/ad0f827f0b2182017e5421e10a65a3279e1397a9.patch
*/
void hash_4n (uint8_t *c, size_t n)
{
uint8_t *w = c - 16;
uint32_t roll;
__m128i minh, maxh;
while (n--) {
__m128i h;
uint32_t roll0, roll1, roll2;
roll0 = c[0] + 3 * (roll - 0xDAF26B * w[0]);
roll1 = c[1] + 3 * (roll0 - 0xDAF26B * w[1]);
roll2 = c[2] + 3 * (roll1 - 0xDAF26B * w[2]);
roll = c[3] + 3 * (roll2 - 0xDAF26B * w[3]);
h = _mm_set_epi32(roll0, roll1, roll2, roll);
h = fmix32_m128i(h);
minh = _mm_min_epu32(minh, h);
maxh = _mm_max_epu32(maxh, h);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.