Skip to content

Instantly share code, notes, and snippets.

@laruence
Created January 16, 2018 08:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save laruence/cea019df35241e6e0d60f93793bc1789 to your computer and use it in GitHub Desktop.
Save laruence/cea019df35241e6e0d60f93793bc1789 to your computer and use it in GitHub Desktop.
#include <stdio.h>
#include <string.h>
#include <nmmintrin.h>
static unsigned long zend_inline_hash_func(const char *str, long len)
{
unsigned long hash = 5381;
/* variant with the hash unrolled eight times */
for (; len >= 8; len -= 8) {
hash = ((hash << 5) + hash) + *str++;
hash = ((hash << 5) + hash) + *str++;
hash = ((hash << 5) + hash) + *str++;
hash = ((hash << 5) + hash) + *str++;
hash = ((hash << 5) + hash) + *str++;
hash = ((hash << 5) + hash) + *str++;
hash = ((hash << 5) + hash) + *str++;
hash = ((hash << 5) + hash) + *str++;
}
switch (len) {
case 7: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
case 6: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
case 5: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
case 4: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
case 3: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
case 2: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
case 1: hash = ((hash << 5) + hash) + *str++; break;
case 0: break;
}
return hash;
}
static unsigned long zend_inline_hash_func_sse4(const char *str, long len)
{
unsigned int state[16] = {5381, 5381, 5381, 5381};
const char *p = str;
const char *end = p + len;
__m128i xstate;
__m128i xp;
__m128i xpin;
__m128i xbmask;
__m128i xshuffle;
int i = 0;
unsigned long hash;
if (len > 15) {
//transfer state into register
xstate = _mm_load_si128((__m128i*)state);
//load AND mask
xbmask = _mm_set1_epi32(0x000000ff);
//load shuffle mask
xshuffle = _mm_set_epi8(
15, 11, 7, 3,
14, 10, 6, 2,
13, 9 , 5, 1,
12, 8 , 4, 0
);
for (;end - p > 15; p += 16) {
//load 16 bytes aligned
xpin = _mm_loadu_si128((__m128i*)p);
xpin = _mm_shuffle_epi8(xpin, xshuffle);
#define HX4_SSSE3_DJB2ROUND(round) \
xp = _mm_srli_epi32(xpin, 8*round); \
xp = _mm_and_si128(xp, xbmask); \
xp = _mm_add_epi32(xp, xstate); \
xstate = _mm_slli_epi32(xstate, 5); \
xstate = _mm_add_epi32(xstate, xp); \
HX4_SSSE3_DJB2ROUND(0)
HX4_SSSE3_DJB2ROUND(1)
HX4_SSSE3_DJB2ROUND(2)
HX4_SSSE3_DJB2ROUND(3)
}
_mm_store_si128((__m128i*)state, xstate);
}
i = 0;
for (; p < end; p++) {
state[i % 3] = ((state[i % 3] << 5) + state[i % 3]) + *p;
}
hash = state[3];
i = 3;
while (i--) {
hash = ((hash << 5) + hash) + state[i];
}
return hash;
}
int main() {
int i = 1000;
int count = 0;
int count2 = 0;
char conflicts[1000];
char conflicts2[1000];
FILE *fp = fopen("/dev/urandom", "r");
memset(conflicts, 0, 1000);
memset(conflicts2, 0, 1000);
while(--i>0) {
char buf[1000];
unsigned long hash;
fread(buf, (i/10), 1, fp);
hash = zend_inline_hash_func(buf, i);
conflicts[hash % 1000] = 1;
hash = zend_inline_hash_func_sse4(buf, i);
conflicts2[hash % 1000] = 1;
}
for (; i < 1000; i++) {
count += conflicts[i];
count2 += conflicts2[i];
}
printf("default: %d, sse: %d\n", count, count2);
fclose(fp);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment