Skip to content

Instantly share code, notes, and snippets.

@hktechn0
Last active August 29, 2015 14:03
Show Gist options
  • Save hktechn0/60f050d0ba2392f2e49f to your computer and use it in GitHub Desktop.
Save hktechn0/60f050d0ba2392f2e49f to your computer and use it in GitHub Desktop.
AVX2 strlen()
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <immintrin.h>
#define LENGTH 1000000010
/*
coins syspro strlen()
http://www.coins.tsukuba.ac.jp/~syspro/2015/shui/quizzes2.html#exam207
*/
size_t my_strlen1(const char *s)__attribute__((noinline));
size_t my_strlen2(const char *s)__attribute__((noinline));
size_t my_strlen3(const char *s)__attribute__((noinline));
size_t my_strlen1(const char *s)
{
unsigned int i = 0;
while (s[i++] != '\0');
return --i;
}
size_t my_strlen2(const char *s)
{
unsigned int i = 0;
unsigned int a;
unsigned int *p;
p = (unsigned int *)s;
a = *p;
while (1) {
if (!(a & 0x000000ff)) {
return i * 4;
} else if (!(a & 0x0000ff00)) {
return i * 4 + 1;
} else if (!(a & 0x00ff0000)) {
return i * 4 + 2;
} else if (!(a & 0xff000000)) {
return i * 4 + 3;
}
a = p[++i];
}
}
size_t my_strlen3(const char *s)
{
unsigned int i = 0;
const __m256i *p;
__m256i mask, zero;
p = (__m256i *) s;
zero = _mm256_setzero_si256();
while (1) {
mask = _mm256_cmpeq_epi8(*p, zero);
if (!_mm256_testz_si256(mask, mask)) {
return (i * 32) + my_strlen1((char *)p);
}
i++;
p++;
}
}
int main(void)
{
char *str;
FILE *fp;
clock_t c1, c2, c3, c4, t;
unsigned int l1, l2, l3, l4;
if ((str = calloc(LENGTH, sizeof(char))) == NULL) {
perror("calloc");
exit(1);
}
if ((fp = fopen("samplestring", "r")) == NULL) {
perror("fopen");
exit(1);
}
fread(str, sizeof(char), LENGTH, fp);
fclose(fp);
t = clock();
l1 = strlen(str);
c1 = clock() - t;
t = clock();
l2 = my_strlen1(str);
c2 = clock() - t;
t = clock();
l3 = my_strlen2(str);
c3 = clock() - t;
t = clock();
l4 = my_strlen3(str);
c4 = clock() - t;
free(str);
printf("strlen(): %u, %u\n", l1, (unsigned int)c1);
printf("my_strlen1(): %u, %u\n", l2, (unsigned int)c2);
printf("my_strlen2(): %u, %u\n", l3, (unsigned int)c3);
printf("my_strlen3(): %u, %u\n", l4, (unsigned int)c4);
return 0;
}
/*
[s0911454@borage08]~$ clang -mavx2 -O3 strlen.c
[s0911454@borage08]~$ ./a.out
strlen(): 1000000000, 61636
my_strlen1(): 1000000000, 391436
my_strlen2(): 1000000000, 177409
my_strlen3(): 1000000000, 57086
[s0911454@borage08]~$ uname -a
Darwin borage08.coins.tsukuba.ac.jp 13.3.0 Darwin Kernel Version 13.3.0: Tue Jun 3 21:27:35 PDT 2014; root:xnu-2422.110.17~1/RELEASE_X86_64 x86_64 i386 iMac14,2 Darwin
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment