Created
November 12, 2017 05:22
-
-
Save JimHokanson/433e185ba53b41e49ce3ac804568ac1e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <immintrin.h> | |
#include <stdio.h> | |
#include <time.h> | |
#include <stdint.h> | |
#include <stdbool.h> | |
#include <string.h> | |
#include <x86intrin.h> | |
long long lzcnt(long long l) | |
{ | |
return __lzcnt64(l); | |
} | |
int main() { | |
size_t N = 1e9; | |
bool found = false; //Not really used ... | |
size_t char_index1 = 0; | |
size_t char_index2 = 0; | |
char * str = malloc(N); | |
char * s; | |
char * str2; | |
memset(str,'a',N); | |
clock_t clock_begin; | |
clock_t clock_end; | |
__m256i char_match; | |
__m256i str_simd; | |
__m256i result; | |
__m256i* pSrc1; | |
int simd_mask; | |
double time_spent_std1; | |
double time_spent_std2; | |
double time_spent_simd; | |
double speed_up = 0; | |
str[(size_t)5e8] = 'b'; | |
for (int k = 0; k < 10; k++){ | |
//========================================= | |
//strchr | |
//-------------------- | |
clock_begin = clock(); | |
str2 = strchr(str,'b'); | |
if (str2!=NULL){ | |
found = true; | |
char_index2 = str2 - str; | |
} | |
clock_end = clock(); | |
time_spent_std2 = (double)(clock_end - clock_begin) / CLOCKS_PER_SEC; | |
//========================================= | |
//========================================= | |
//Manual search | |
//---------------------------------------------- | |
//- useful if we know the charcter is present ... | |
clock_begin = clock(); | |
s = str; | |
while (*s != 'b'){ | |
++s; | |
} | |
char_index1 = s - str; | |
clock_end = clock(); | |
time_spent_std1 = (double)(clock_end - clock_begin) / CLOCKS_PER_SEC; | |
//========================================= | |
printf("STD1: %0.3f, I: %zu\n",time_spent_std1,char_index1); | |
printf("STD2: %0.3f, I: %zu\n",time_spent_std2,char_index2); | |
printf("strchr vs naive: %0.1fv\n",time_spent_std1/time_spent_std2); | |
//SIMD | |
//------------------------ | |
//__m256i _mm256_cmpeq_epi8 (__m256i a, __m256i b) | |
clock_begin = clock(); | |
//Notes | |
//----------- | |
//=> placing 1 in while loop led to speedup | |
//=> increment pointer as mm256i | |
char_match = _mm256_set1_epi8('b'); | |
result = _mm256_set1_epi32(0); | |
simd_mask = 0; | |
pSrc1 = (__m256i *)str; | |
while (1){ | |
str_simd = _mm256_lddqu_si256(pSrc1); | |
result = _mm256_cmpeq_epi8(str_simd, char_match); | |
simd_mask = _mm256_movemask_epi8(result); | |
if (simd_mask != 0){ | |
break; | |
} | |
pSrc1++; | |
} | |
//TODO: go from pSrc to local pointer ... | |
clock_end = clock(); | |
time_spent_simd = (double)(clock_end - clock_begin) / CLOCKS_PER_SEC; | |
printf("SIMD: %0.3f\n",time_spent_simd); | |
printf("mask:%d, I:%ld , off: %lld\n",simd_mask,s - str,lzcnt((long long)simd_mask)); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment