Skip to content

Instantly share code, notes, and snippets.

@blotta
Last active April 22, 2017 21:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save blotta/a5998b5cc0a48813f0947ef78569697d to your computer and use it in GitHub Desktop.
Save blotta/a5998b5cc0a48813f0947ef78569697d to your computer and use it in GitHub Desktop.
glibc wmemchr aarch64 optimization test
BIN=wchar-tst
SRC=${BIN}.c
all: no o1 o2 o3
no: ${SRC}
gcc -o no-${BIN} ${SRC}
o1: ${SRC}
gcc -O1 -o o1-${BIN} ${SRC}
o2: ${SRC}
gcc -O2 -o o2-${BIN} ${SRC}
o3: ${SRC}
gcc -O3 -o o3-${BIN} ${SRC}
clean:
rm {no,o1,o2,o3}-${BIN}
debug: ${SRC}
gcc -g -O2 -o o2-${BIN} ${SRC}
#include <wchar.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <string.h>
wchar_t *
my__wmemchr (const wchar_t *s, wchar_t c, size_t n)
{
/* For performance reasons unfold the loop four times. */
while (n >= 4)
{
if (s[0] == c)
return (wchar_t *) s;
if (s[1] == c)
return (wchar_t *) &s[1];
if (s[2] == c)
return (wchar_t *) &s[2];
if (s[3] == c)
return (wchar_t *) &s[3];
s += 4;
n -= 4;
}
if (n > 0)
{
if (*s == c)
return (wchar_t *) s;
++s;
--n;
}
if (n > 0)
{
if (*s == c)
return (wchar_t *) s;
++s;
--n;
}
if (n > 0)
if (*s == c)
return (wchar_t *) s;
return NULL;
}
wchar_t *
mynew__wmemchr (const wchar_t *s, wchar_t c, size_t n)
{
while( n>=4 ){
static unsigned int set;
__asm__ __volatile__
("DUP v1.4s, %w[c]\n\t"
"LD1 {v0.4s}, [%[s]]\n\t"
//if a lane is all 1s, it matches the character
"CMEQ v2.4s, v0.4s, v1.4s\n\t"
//"check" if there were any matches
"UMAXV S3, v2.4s\n\t"
//move v3.4s[0] to set
"UMOV %w[set], v3.4s[0]\n\t"
:[set]"=r"(set)
:[s] "r" (s), [c] "r" (c)//input
:);
if (set){
//found match in set
if (s[0] == c)
return (wchar_t *) s;
if (s[1] == c)
return (wchar_t *) &s[1];
if (s[2] == c)
return (wchar_t *) &s[2];
if (s[3] == c)
return (wchar_t *) &s[3];
}
s += 4;
n -= 4;
}
if (n > 0)
{
if (*s == c)
return (wchar_t *) s;
++s;
--n;
}
if (n > 0)
{
if (*s == c)
return (wchar_t *) s;
++s;
--n;
}
if (n > 0)
if (*s == c)
return (wchar_t *) s;
return NULL;
}
#define CHRNUM 1000000000
struct timeval t1, t2;
int main(){
//wchar_t * wcs = L"abcdefghijklmnopqrstuvwxyz1234567890ABCDEFGHIJKLMNOPQRSTUVWXYZ";
wchar_t* wcs = (wchar_t*)malloc(CHRNUM * sizeof(wchar_t));
//generate long string with all chrs as 'a' and last as 'b'
unsigned long i;
for(i = 0; i<CHRNUM; i++)
wcs[i] = L'a';
//wcs[6] = L'b';
wcs[CHRNUM-1] = L'b';
wprintf(L"\nwchar_t size: %d\n", sizeof(wchar_t));
wprintf(L"%lc - %lc, %lc\n", wcs[0], wcs[CHRNUM-2], wcs[CHRNUM-1]);
wchar_t wc = L'b';
//find wc in wcs using the system dynamic library
gettimeofday(&t1, NULL);
wchar_t* wcp = wmemchr(wcs,wc,CHRNUM);
gettimeofday(&t2, NULL);
////////////////////////
wprintf(L"\nbase: %p\tplus %lu\twchar: %lc\n", wcs, (unsigned long)(wcp-wcs), *wcp);
float elapsed_ms = (t2.tv_sec - t1.tv_sec) *1000 + (t2.tv_usec - t1.tv_usec) / 1000;
wprintf(L"so time: %f ms\n",elapsed_ms);
//find wc in wcs the "old" way
gettimeofday(&t1, NULL);
wcp = my__wmemchr(wcs,wc,CHRNUM);
gettimeofday(&t2, NULL);
////////////////////////
wprintf(L"\nbase: %p\tplus %lu\twchar: %lc\n", wcs, (unsigned long)(wcp-wcs), *wcp);
elapsed_ms = (t2.tv_sec - t1.tv_sec) *1000 + (t2.tv_usec - t1.tv_usec) / 1000;
wprintf(L"Old way time: %f ms\n",elapsed_ms);
//find wc in wcs the "new" way
gettimeofday(&t1, NULL);
wcp = mynew__wmemchr(wcs,wc,CHRNUM);
gettimeofday(&t2, NULL);
////////////////////////
wprintf(L"\nbase: %p\tplus %lu\twchar: %lc\n", wcs, (unsigned long)(wcp-wcs), *wcp);
elapsed_ms = (t2.tv_sec - t1.tv_sec) *1000 + (t2.tv_usec - t1.tv_usec) / 1000;
wprintf(L"new way time: %f ms\n",elapsed_ms);
free(wcs);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment