Skip to content

Instantly share code, notes, and snippets.

@karthick18
Created November 13, 2011 08:42
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save karthick18/1361842 to your computer and use it in GitHub Desktop.
Save karthick18/1361842 to your computer and use it in GitHub Desktop.
memcmp with SSE2 16 byte loads and compares
/*
* word compare with SSE2 functions/instructions for faster 16 byte loads and compares.
*
* compile with:
* gcc -o sse_load_test sse_load.c -Wall -O2 -g -DTEST_SSE2
*
*/
#if defined(TEST_SSE2) && !defined(__SSE2__)
#error "no SSE2 set"
#endif
#include <emmintrin.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#ifdef TEST_SSE2
#define __MEMCMP __sse_memcmp
static int __sse_memcmp_tail(const uint16_t *a, const uint16_t *b, int len)
{
switch(len)
{
case 8:
if(*a++ != *b++) return -1;
case 7:
if(*a++ != *b++) return -1;
case 6:
if(*a++ != *b++) return -1;
case 5:
if(*a++ != *b++) return -1;
case 4:
if(*a++ != *b++) return -1;
case 3:
if(*a++ != *b++) return -1;
case 2:
if(*a++ != *b++) return -1;
case 1:
if(*a != *b) return -1;
}
return 0;
}
static int __sse_memcmp(const uint16_t *a, const uint16_t *b, int half_words)
{
int i = 0;
int len = half_words;
int aligned_a = 0, aligned_b = 0;
if(!len) return 0;
if(!a && !b) return 0;
if(!a || !b) return -1;
if( (unsigned long) a & 1 ) return -1;
if( (unsigned long) b & 1 ) return -1;
aligned_a = ( (unsigned long)a & (sizeof(__m128i)-1) );
aligned_b = ( (unsigned long)b & (sizeof(__m128i)-1) );
if(aligned_a != aligned_b) return -1; /* both has to be unaligned on the same boundary or aligned */
if(aligned_a)
{
while( len &&
( (unsigned long) a & ( sizeof(__m128i)-1) ) )
{
if(*a++ != *b++) return -1;
--len;
}
}
if(!len) return 0;
while( len && !(len & 7 ) )
{
__m128i x = _mm_load_si128( (__m128i*)&a[i]);
__m128i y = _mm_load_si128( (__m128i*)&b[i]);
/*
* _mm_cmpeq_epi16 returns 0xffff for each of the 8 half words when it matches
*/
__m128i cmp = _mm_cmpeq_epi16(x, y);
/*
* _mm_movemask_epi8 creates a 16 bit mask with the MSB for each of the 16 bytes of cmp
*/
if ( (uint16_t)_mm_movemask_epi8(cmp) != 0xffffU) return -1;
len -= 8;
i += 8;
}
return __sse_memcmp_tail(&a[i], &b[i], len);
}
#else
#define __MEMCMP memcmp
#endif
static int test_sse_memcmp(int half_words, int randomize_factor)
{
uint16_t *a = malloc(half_words * sizeof(*a));
uint16_t *b = malloc(half_words * sizeof(*b));
int cmp;
register int i;
assert(a && b);
for(i = 0; i < half_words; ++i)
{
a[i] = random() & ( (1<<24)-1);
b[i] = random() & ( (1<<24)-1);
if(!randomize_factor || (i & randomize_factor)) a[i] = b[i];
}
cmp = __MEMCMP(a, b, half_words);
free(a); free(b);
return cmp;
}
int main(int argc, char **argv)
{
__attribute__((aligned(16))) const uint16_t val[8] = { [ 0 ... 7] = 1 };
union { __m128i ret; uint16_t ret_s[8];} v;
register int i;
int randomize_factor = 0;
int half_words = 0;
/*
* warm up
*/
v.ret = _mm_load_si128((__m128i*)val);
for(i = 0; i < sizeof(val)/sizeof(val[0]); ++i)
if(v.ret_s[i] != val[i]) printf("val at index [%d] doesn't match expected [%d]\n", v.ret_s[i], val[i]);
if(argc > 1) half_words = atoi(argv[1]);
if(argc > 2) randomize_factor = atoi(argv[2]);
if(!half_words) half_words = 10;
printf("memcmp test result [%d]\n", test_sse_memcmp(half_words, randomize_factor));
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment