Skip to content

Instantly share code, notes, and snippets.

@munnellg
Created August 22, 2018 18:04
Show Gist options
  • Save munnellg/f655033dd97af442bc09804f07b90292 to your computer and use it in GitHub Desktop.
Save munnellg/f655033dd97af442bc09804f07b90292 to your computer and use it in GitHub Desktop.
Benchmarking glibc's strlen function
#include <math.h>
#include <time.h>
#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#ifdef __MACH__
#include <sys/time.h>
#include <mach/clock.h>
#include <mach/mach.h>
#endif
#define STR_MIN 8 // start with 8 bytes
#define STR_MAX (4 * 1024 * 1024) // work up to 4 megabytes
#define BUF_LEN (STR_MAX + 1) // size of the string buffer
#define MAX_ITR 1000 // we'll average time over 1000 iterations
static size_t
glibc_strlen (const char *str)
{
const char *char_ptr;
const unsigned long int *longword_ptr;
unsigned long int longword, himagic, lomagic;
for (char_ptr = str; ((unsigned long int) char_ptr
& (sizeof (longword) - 1)) != 0;
++char_ptr)
if (*char_ptr == '\0')
return char_ptr - str;
longword_ptr = (unsigned long int *) char_ptr;
himagic = 0x80808080L;
lomagic = 0x01010101L;
if (sizeof (longword) > 4) {
himagic = ((himagic << 16) << 16) | himagic;
lomagic = ((lomagic << 16) << 16) | lomagic;
}
if (sizeof (longword) > 8)
abort ();
for (;;) {
longword = *longword_ptr++;
if (((longword - lomagic) & ~longword & himagic) != 0) {
const char *cp = (const char *) (longword_ptr - 1);
if (cp[0] == 0)
return cp - str;
if (cp[1] == 0)
return cp - str + 1;
if (cp[2] == 0)
return cp - str + 2;
if (cp[3] == 0)
return cp - str + 3;
if (sizeof (longword) > 4) {
if (cp[4] == 0)
return cp - str + 4;
if (cp[5] == 0)
return cp - str + 5;
if (cp[6] == 0)
return cp - str + 6;
if (cp[7] == 0)
return cp - str + 7;
}
}
}
}
static size_t
simple_strlen (const char *str)
{
const char *p = str;
if (str == NULL)
return 0;
while (*p != '\0')
p++;
return p - str;
}
static double
get_time (void)
{
#ifdef _WIN32
return 0.0;
#else
struct timespec t;
static double prev_value = 0.0;
#if __MACH__
clock_serv_t cclock;
mach_timespec_t mts;
host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &cclock);
int r = clock_get_time(cclock, &mts);
mach_port_deallocate(mach_task_self(), cclock);
t.tv_sec = mts.tv_sec;
t.tv_nsec = mts.tv_nsec;
#else
int r = clock_gettime(CLOCK_MONOTONIC, &t);
#endif
if (r < 0) {
fprintf(stderr, "%s\n", strerror(errno));
return prev_value;
}
double ns = t.tv_nsec;
double s = ns * 0.000000001;
time_t tts = t.tv_sec;
s += difftime(tts, 0);
prev_value = s;
return s;
#endif
}
static void
gen_unitstr (char *strbuf, size_t i)
{
char *units[] = { "B", "KB", "MB", "GB", "TB" };
int unit = log2(i) / 10;
sprintf(strbuf, "%d%s", (int) (i / pow(1024, unit)), units[unit]);
}
static void
eval_strlen (const char *name, size_t (*func)(const char *s))
{
/* Use nudge to shove buffer off the word boundary */
/* If the size of nudge is a multiple of 8 then buffer is word aligned */
struct offset { char nudge[1]; char buffer[BUF_LEN]; } offset;
char *buffer = offset.buffer;
char unitstr[6];
memset(buffer, 'x', sizeof(char) * BUF_LEN);
buffer[STR_MAX] = 0;
fprintf(stdout, "Beginning test: %s\n", name);
fprintf(stdout, "Buffer at address %p\n", buffer);
for (size_t i = STR_MIN; i <= STR_MAX; i <<= 1) {
buffer[i] = 0;
gen_unitstr(unitstr, i);
fprintf(stdout, "[len = %5s] ", unitstr);
fflush(stdout);
double start = get_time();
for (int j = 0; j < MAX_ITR; j++ ) {
/* update prevents compiler from optimising the loop away */
buffer[j % i] = rand() % 26 + 'a';
size_t res = func(buffer);
if (res != i) {
fprintf(stderr, "\nERROR: %s : ", name);
fprintf(stderr, "result %lu, expected %lu\n", res, i);
return;
}
}
double end = get_time();
fprintf(stdout, "avg %.5fs over %d iterations\n", (end - start) / MAX_ITR, MAX_ITR);
buffer[i] = 'x';
}
}
int
main ( int argc, char *argv[] )
{
srand(time(NULL));
eval_strlen("simple ", simple_strlen);
fprintf(stdout, "\n");
eval_strlen("glibc ", glibc_strlen);
fprintf(stdout, "\n");
eval_strlen("string.h", strlen);
return EXIT_SUCCESS;
}
@DBJDBJ
Copy link

DBJDBJ commented Jan 26, 2022

Benchmarking refactored

https://godbolt.org/z/xabx11EqY

  • Obviously CRT strlen() would always be the fastest one
    • there is perhaps no point in comparing to an inferior algorithm like simple_strlen()
  • Use the serious statistical benchmarking method
    • thus relative differences will stay the same, regardless of the specimen length
    • thus if using optimizations, results will be again the same, relative to each other
  • All optimizations are off, ditto there are no optimizations

Try and reuse as much as possible. Code the solution that fulfills the requirements with the minimum amount of coding possible. Benchmark as you go. Modern compilers are powerfull and unpredictable.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment