Last active
October 14, 2015 15:03
-
-
Save barrbrain/9052705 to your computer and use it in GitHub Desktop.
SSE4.2 implementation of David Barr's similarity hash
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* See https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp | |
for original plain C/C++ implementation */ | |
/* build with -msse4.2 */ | |
#include <smmintrin.h> | |
#include <emmintrin.h> | |
FORCE_INLINE fmix32_m128i ( __m128i h ) | |
{ | |
h = _mm_xor_si128(h, _mm_srli_epi32(h, 16)); | |
h = _mm_mullo_epi32(h, _mm_set1_epi32(0x85ebca6b)); | |
h = _mm_xor_si128(h, _mm_srli_epi32(h, 13)); | |
h = _mm_mullo_epi32(h, _mm_set1_epi32(0xc2b2ae35)); | |
h = _mm_xor_si128(h, _mm_srli_epi32(h, 16)); | |
return h; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* build with -msse4.2 */ | |
#include <smmintrin.h> | |
#include <emmintrin.h> | |
#include <stdint.h> | |
/* WIP: need to find my original implementation. | |
This is just the kernel for maximum speed. | |
The essence of the algorithm is: | |
* Simple multiplicative sliding window hash. | |
* Multiplier 3; 16 byte window. | |
* Apply finalising mix function from murmur3. | |
* Accumulate the minimum and maximum. | |
* Combine these two into a single 32-bit output. | |
Publication of minhash 1.0: | |
https://github.com/barrbrain/libgit2/commit/ad0f827f0b2182017e5421e10a65a3279e1397a9.patch | |
*/ | |
void hash_4n (uint8_t *c, size_t n) | |
{ | |
uint8_t *w = c - 16; | |
uint32_t roll; | |
__m128i minh, maxh; | |
while (n--) { | |
__m128i h; | |
uint32_t roll0, roll1, roll2; | |
roll0 = c[0] + 3 * (roll - 0xDAF26B * w[0]); | |
roll1 = c[1] + 3 * (roll0 - 0xDAF26B * w[1]); | |
roll2 = c[2] + 3 * (roll1 - 0xDAF26B * w[2]); | |
roll = c[3] + 3 * (roll2 - 0xDAF26B * w[3]); | |
h = _mm_set_epi32(roll0, roll1, roll2, roll); | |
h = fmix32_m128i(h); | |
minh = _mm_min_epu32(minh, h); | |
maxh = _mm_max_epu32(maxh, h); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment