Skip to content

Instantly share code, notes, and snippets.

@JimHokanson
Created November 12, 2017 05:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save JimHokanson/433e185ba53b41e49ce3ac804568ac1e to your computer and use it in GitHub Desktop.
Save JimHokanson/433e185ba53b41e49ce3ac804568ac1e to your computer and use it in GitHub Desktop.
#include <immintrin.h>
#include <stdio.h>
#include <time.h>
#include <stdint.h>
#include <stdbool.h>
#include <string.h>
#include <x86intrin.h>
long long lzcnt(long long l)
{
return __lzcnt64(l);
}
int main() {
size_t N = 1e9;
bool found = false; //Not really used ...
size_t char_index1 = 0;
size_t char_index2 = 0;
char * str = malloc(N);
char * s;
char * str2;
memset(str,'a',N);
clock_t clock_begin;
clock_t clock_end;
__m256i char_match;
__m256i str_simd;
__m256i result;
__m256i* pSrc1;
int simd_mask;
double time_spent_std1;
double time_spent_std2;
double time_spent_simd;
double speed_up = 0;
str[(size_t)5e8] = 'b';
for (int k = 0; k < 10; k++){
//=========================================
//strchr
//--------------------
clock_begin = clock();
str2 = strchr(str,'b');
if (str2!=NULL){
found = true;
char_index2 = str2 - str;
}
clock_end = clock();
time_spent_std2 = (double)(clock_end - clock_begin) / CLOCKS_PER_SEC;
//=========================================
//=========================================
//Manual search
//----------------------------------------------
//- useful if we know the charcter is present ...
clock_begin = clock();
s = str;
while (*s != 'b'){
++s;
}
char_index1 = s - str;
clock_end = clock();
time_spent_std1 = (double)(clock_end - clock_begin) / CLOCKS_PER_SEC;
//=========================================
printf("STD1: %0.3f, I: %zu\n",time_spent_std1,char_index1);
printf("STD2: %0.3f, I: %zu\n",time_spent_std2,char_index2);
printf("strchr vs naive: %0.1fv\n",time_spent_std1/time_spent_std2);
//SIMD
//------------------------
//__m256i _mm256_cmpeq_epi8 (__m256i a, __m256i b)
clock_begin = clock();
//Notes
//-----------
//=> placing 1 in while loop led to speedup
//=> increment pointer as mm256i
char_match = _mm256_set1_epi8('b');
result = _mm256_set1_epi32(0);
simd_mask = 0;
pSrc1 = (__m256i *)str;
while (1){
str_simd = _mm256_lddqu_si256(pSrc1);
result = _mm256_cmpeq_epi8(str_simd, char_match);
simd_mask = _mm256_movemask_epi8(result);
if (simd_mask != 0){
break;
}
pSrc1++;
}
//TODO: go from pSrc to local pointer ...
clock_end = clock();
time_spent_simd = (double)(clock_end - clock_begin) / CLOCKS_PER_SEC;
printf("SIMD: %0.3f\n",time_spent_simd);
printf("mask:%d, I:%ld , off: %lld\n",simd_mask,s - str,lzcnt((long long)simd_mask));
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment