barrbrain/fmix32.c

## fmix32.c
/* See https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
   for original plain C/C++ implementation */

/* build with -msse4.2 */

#include <smmintrin.h>
#include <emmintrin.h>

FORCE_INLINE fmix32_m128i ( __m128i h )
{
  h = _mm_xor_si128(h, _mm_srli_epi32(h, 16));
  h = _mm_mullo_epi32(h, _mm_set1_epi32(0x85ebca6b));
  h = _mm_xor_si128(h, _mm_srli_epi32(h, 13));
  h = _mm_mullo_epi32(h, _mm_set1_epi32(0xc2b2ae35));
  h = _mm_xor_si128(h, _mm_srli_epi32(h, 16));

  return h;
}

## simhash.c
/* build with -msse4.2 */

#include <smmintrin.h>
#include <emmintrin.h>
#include <stdint.h>

/* WIP: need to find my original implementation.
   This is just the kernel for maximum speed.
   The essence of the algorithm is:
   * Simple multiplicative sliding window hash.
   * Multiplier 3; 16 byte window.
   * Apply finalising mix function from murmur3.
   * Accumulate the minimum and maximum.
   * Combine these two into a single 32-bit output.
   Publication of minhash 1.0:
   https://github.com/barrbrain/libgit2/commit/ad0f827f0b2182017e5421e10a65a3279e1397a9.patch
   */

void hash_4n (uint8_t *c, size_t n)
{
  uint8_t *w = c - 16;
  uint32_t roll;
  __m128i minh, maxh;
  while (n--) {
    __m128i h;
    uint32_t roll0, roll1, roll2;
    roll0 = c[0] + 3 * (roll  - 0xDAF26B * w[0]);
    roll1 = c[1] + 3 * (roll0 - 0xDAF26B * w[1]);
    roll2 = c[2] + 3 * (roll1 - 0xDAF26B * w[2]);
    roll  = c[3] + 3 * (roll2 - 0xDAF26B * w[3]);
    h = _mm_set_epi32(roll0, roll1, roll2, roll);
    h = fmix32_m128i(h);
    minh = _mm_min_epu32(minh, h);
    maxh = _mm_max_epu32(maxh, h);
  }
}
	/* See https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
	for original plain C/C++ implementation */

	/* build with -msse4.2 */

	#include <smmintrin.h>
	#include <emmintrin.h>

	FORCE_INLINE fmix32_m128i ( __m128i h )
	{
	h = _mm_xor_si128(h, _mm_srli_epi32(h, 16));
	h = _mm_mullo_epi32(h, _mm_set1_epi32(0x85ebca6b));
	h = _mm_xor_si128(h, _mm_srli_epi32(h, 13));
	h = _mm_mullo_epi32(h, _mm_set1_epi32(0xc2b2ae35));
	h = _mm_xor_si128(h, _mm_srli_epi32(h, 16));

	return h;
	}