Last active
April 22, 2017 21:30
-
-
Save blotta/a5998b5cc0a48813f0947ef78569697d to your computer and use it in GitHub Desktop.
glibc wmemchr aarch64 optimization test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
BIN=wchar-tst | |
SRC=${BIN}.c | |
all: no o1 o2 o3 | |
no: ${SRC} | |
gcc -o no-${BIN} ${SRC} | |
o1: ${SRC} | |
gcc -O1 -o o1-${BIN} ${SRC} | |
o2: ${SRC} | |
gcc -O2 -o o2-${BIN} ${SRC} | |
o3: ${SRC} | |
gcc -O3 -o o3-${BIN} ${SRC} | |
clean: | |
rm {no,o1,o2,o3}-${BIN} | |
debug: ${SRC} | |
gcc -g -O2 -o o2-${BIN} ${SRC} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <wchar.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <sys/time.h> | |
#include <string.h> | |
wchar_t * | |
my__wmemchr (const wchar_t *s, wchar_t c, size_t n) | |
{ | |
/* For performance reasons unfold the loop four times. */ | |
while (n >= 4) | |
{ | |
if (s[0] == c) | |
return (wchar_t *) s; | |
if (s[1] == c) | |
return (wchar_t *) &s[1]; | |
if (s[2] == c) | |
return (wchar_t *) &s[2]; | |
if (s[3] == c) | |
return (wchar_t *) &s[3]; | |
s += 4; | |
n -= 4; | |
} | |
if (n > 0) | |
{ | |
if (*s == c) | |
return (wchar_t *) s; | |
++s; | |
--n; | |
} | |
if (n > 0) | |
{ | |
if (*s == c) | |
return (wchar_t *) s; | |
++s; | |
--n; | |
} | |
if (n > 0) | |
if (*s == c) | |
return (wchar_t *) s; | |
return NULL; | |
} | |
wchar_t * | |
mynew__wmemchr (const wchar_t *s, wchar_t c, size_t n) | |
{ | |
while( n>=4 ){ | |
static unsigned int set; | |
__asm__ __volatile__ | |
("DUP v1.4s, %w[c]\n\t" | |
"LD1 {v0.4s}, [%[s]]\n\t" | |
//if a lane is all 1s, it matches the character | |
"CMEQ v2.4s, v0.4s, v1.4s\n\t" | |
//"check" if there were any matches | |
"UMAXV S3, v2.4s\n\t" | |
//move v3.4s[0] to set | |
"UMOV %w[set], v3.4s[0]\n\t" | |
:[set]"=r"(set) | |
:[s] "r" (s), [c] "r" (c)//input | |
:); | |
if (set){ | |
//found match in set | |
if (s[0] == c) | |
return (wchar_t *) s; | |
if (s[1] == c) | |
return (wchar_t *) &s[1]; | |
if (s[2] == c) | |
return (wchar_t *) &s[2]; | |
if (s[3] == c) | |
return (wchar_t *) &s[3]; | |
} | |
s += 4; | |
n -= 4; | |
} | |
if (n > 0) | |
{ | |
if (*s == c) | |
return (wchar_t *) s; | |
++s; | |
--n; | |
} | |
if (n > 0) | |
{ | |
if (*s == c) | |
return (wchar_t *) s; | |
++s; | |
--n; | |
} | |
if (n > 0) | |
if (*s == c) | |
return (wchar_t *) s; | |
return NULL; | |
} | |
#define CHRNUM 1000000000 | |
struct timeval t1, t2; | |
int main(){ | |
//wchar_t * wcs = L"abcdefghijklmnopqrstuvwxyz1234567890ABCDEFGHIJKLMNOPQRSTUVWXYZ"; | |
wchar_t* wcs = (wchar_t*)malloc(CHRNUM * sizeof(wchar_t)); | |
//generate long string with all chrs as 'a' and last as 'b' | |
unsigned long i; | |
for(i = 0; i<CHRNUM; i++) | |
wcs[i] = L'a'; | |
//wcs[6] = L'b'; | |
wcs[CHRNUM-1] = L'b'; | |
wprintf(L"\nwchar_t size: %d\n", sizeof(wchar_t)); | |
wprintf(L"%lc - %lc, %lc\n", wcs[0], wcs[CHRNUM-2], wcs[CHRNUM-1]); | |
wchar_t wc = L'b'; | |
//find wc in wcs using the system dynamic library | |
gettimeofday(&t1, NULL); | |
wchar_t* wcp = wmemchr(wcs,wc,CHRNUM); | |
gettimeofday(&t2, NULL); | |
//////////////////////// | |
wprintf(L"\nbase: %p\tplus %lu\twchar: %lc\n", wcs, (unsigned long)(wcp-wcs), *wcp); | |
float elapsed_ms = (t2.tv_sec - t1.tv_sec) *1000 + (t2.tv_usec - t1.tv_usec) / 1000; | |
wprintf(L"so time: %f ms\n",elapsed_ms); | |
//find wc in wcs the "old" way | |
gettimeofday(&t1, NULL); | |
wcp = my__wmemchr(wcs,wc,CHRNUM); | |
gettimeofday(&t2, NULL); | |
//////////////////////// | |
wprintf(L"\nbase: %p\tplus %lu\twchar: %lc\n", wcs, (unsigned long)(wcp-wcs), *wcp); | |
elapsed_ms = (t2.tv_sec - t1.tv_sec) *1000 + (t2.tv_usec - t1.tv_usec) / 1000; | |
wprintf(L"Old way time: %f ms\n",elapsed_ms); | |
//find wc in wcs the "new" way | |
gettimeofday(&t1, NULL); | |
wcp = mynew__wmemchr(wcs,wc,CHRNUM); | |
gettimeofday(&t2, NULL); | |
//////////////////////// | |
wprintf(L"\nbase: %p\tplus %lu\twchar: %lc\n", wcs, (unsigned long)(wcp-wcs), *wcp); | |
elapsed_ms = (t2.tv_sec - t1.tv_sec) *1000 + (t2.tv_usec - t1.tv_usec) / 1000; | |
wprintf(L"new way time: %f ms\n",elapsed_ms); | |
free(wcs); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment