Skip to content

Instantly share code, notes, and snippets.

@aktau
Last active August 29, 2015 14:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aktau/11975b15dfb8d5504a58 to your computer and use it in GitHub Desktop.
Save aktau/11975b15dfb8d5504a58 to your computer and use it in GitHub Desktop.
neovim: strchr() test
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <limits.h>
#include <time.h>
#include <stdbool.h>
/* test if Vim was right when it implemented strchr() again as vim_strbyte() */
#define NUL '\0'
#define NL '\n'
/* define some benchmark timing code */
#ifdef __MACH__
#include <mach/mach_time.h>
double ns_conversion_factor;
double us_conversion_factor;
double ms_conversion_factor;
void timeinit() {
mach_timebase_info_data_t timebase;
mach_timebase_info(&timebase);
ns_conversion_factor = (double)timebase.numer / (double)timebase.denom;
us_conversion_factor = (double)timebase.numer / (double)timebase.denom / 1000;
ms_conversion_factor = (double)timebase.numer / (double)timebase.denom / 1000000;
}
double nsticks() {
return mach_absolute_time() * ns_conversion_factor;
}
double msticks() {
return mach_absolute_time() * ms_conversion_factor;
}
#else
void timeinit() {
/* do nothing */
}
double nsticks() {
timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return ((double)ts.tv_sec) / 1000000000 + ((double)ts.tv_nsec);
}
double msticks() {
timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return ((double)ts.tv_sec) / 1000 + ((double)ts.tv_nsec) * 1000000;
}
#endif
typedef char *(*strchrfn_t)(const char *, int);
/*
* Version of strchr() that only works for bytes and handles unsigned char
* strings with characters above 128 correctly. It also doesn't return a
* pointer to the NUL at the end of the string.
*/
char *vim_strbyte(const char *string, int c)
{
unsigned char *p = (unsigned char *) string;
while (*p != NUL) {
if (*p == c)
return (char *) p;
++p;
}
return NULL;
}
/* a wrapper around strchr that brings it more in line with vim_strbyte() */
char *akt_strbyte(const char *str, int c) {
if (c == 0) {
return NULL; /* Vim prefers returning NULL instead of the end of string */
}
if (c > (unsigned char) -1) {
/* vim_strbyte() never casts c to int, which means that if c >= 255, it
* simply never matches and will always return NULL, but not before
* running through the entire string first. We emulate this behaviour by
* just returning NULL immediately. */
return NULL;
}
return strchr(str, c);
}
/* short version of akt_strbyte() above */
char *akt_strbyte_short(const char *str, int c) {
return (c > 0 && c <= (unsigned char) -1) ? strchr(str, c) : NULL;
}
static void compar(strchrfn_t fn1, strchrfn_t fn2, char *str, int c) {
char *found1 = fn1(str, c);
char *found2 = fn2(str, c);
if (found1 != found2) {
printf("looking for %d (char: %d): %p = str[%zu]/%p = str[%zu], strlen(str) = %zu\n",
c, (int) (char) c, found1, found1 - str, found2, found2 - str, strlen(str));
assert(found1 == found2);
}
}
/* my_strchr() and check_strchr_correctness() below are part of a bugzilla
* bugreport. */
void my_strchr(strchrfn_t fn, const char *s, int c) {
char *r = fn(s, c);
printf("strchr(%d [%d]) = [%d]\n",
(int) (unsigned char) c, c,
(int) (unsigned char) (r ? *r : 0));
}
/* check if strchr is faulty, see
* https://sourceware.org/bugzilla/show_bug.cgi?id=12159 */
static void check_strchr_correctness(strchrfn_t fn) {
static const char s[] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0};
char ch = 228;
int d = 228;
my_strchr(fn, s, ch);
my_strchr(fn, s, (int) ch);
my_strchr(fn, s, (unsigned int) ch);
my_strchr(fn, s, (unsigned char) ch);
my_strchr(fn, s, d);
}
#define benchmark(fn, str, rounds, use_char) \
benchmarkfn(fn, #fn, str, rounds, use_char)
void benchmarkfn(strchrfn_t fn, const char *fnname, const char *str, size_t rounds, bool use_char) {
size_t nulls = 0;
uint64_t t1 = nsticks();
for (int i = 1; i < rounds; ++i) {
char *res = fn(str, use_char ? (unsigned char) i : i);
if (!res) {
nulls++;
}
}
uint64_t t2 = nsticks();
printf("%s: %8.2f ns per call, %s, nulls counted = %zu\n",
use_char ? "chr" : "int", (double) (t2 - t1) / (double) rounds, fnname, nulls);
}
#define CHECK_STRCHR(fn) \
do { \
printf("checking " #fn ":\n"); \
check_strchr_correctness(fn); \
} while (0)
int main() {
/* check for basic correctness */
CHECK_STRCHR(strchr);
CHECK_STRCHR(akt_strbyte);
CHECK_STRCHR(vim_strbyte);
char str[256];
/* fill up `str` with all possible bytes that fit in a char */
for (int i = 0; i < sizeof(str); ++i) {
str[i] = (sizeof(str) - 1) - i;
}
/* try to search all sorts of bytes inside of `str`, compare the output
* of two different strchr()-like functions */
for (int i = 1; i < 0x10000; ++i) {
compar(akt_strbyte_short, vim_strbyte, str, i);
}
/* test how an implementation responds to NUL */
compar(akt_strbyte, vim_strbyte, str, NUL);
/* benchmark */
timeinit();
#define ROUNDS 0x10000
/* with a short string */
printf("benchmarking finding a needle in a short string: %zu chars, %d rounds\n", strlen(str), ROUNDS);
benchmark(akt_strbyte_short, str, ROUNDS, true);
benchmark(akt_strbyte, str, ROUNDS, true);
benchmark(strchr, str, ROUNDS, true);
benchmark(vim_strbyte, str, ROUNDS, true);
benchmark(akt_strbyte_short, str, ROUNDS, false);
benchmark(akt_strbyte, str, ROUNDS, false);
benchmark(strchr, str, ROUNDS, false);
benchmark(vim_strbyte, str, ROUNDS, false);
/* with a longer string filled with random bytes */
srand(time(NULL));
size_t longstrlen = 1 * 256 * 1024;
char *longstr = malloc(longstrlen); /* several MBs of string */
for (int i = 0; i < longstrlen; ++i) {
char c = rand() % 255 + 1;
longstr[i] = c;
}
longstr[longstrlen - 1] = NUL;
printf("benchmarking finding a needle in a longer string: %zu chars\n", strlen(longstr));
benchmark(akt_strbyte_short, longstr, ROUNDS, true);
benchmark(akt_strbyte, longstr, ROUNDS, true);
benchmark(strchr, longstr, ROUNDS, true);
benchmark(vim_strbyte, longstr, ROUNDS, true);
benchmark(akt_strbyte_short, longstr, ROUNDS, false);
benchmark(akt_strbyte, longstr, ROUNDS, false);
benchmark(strchr, longstr, ROUNDS, false);
benchmark(vim_strbyte, longstr, ROUNDS, false);
memset(longstr, 12, longstrlen);
longstr[longstrlen - 1] = NUL;
printf("benchmarking finding a needle that's usually not there: %zu chars\n", strlen(longstr));
benchmark(akt_strbyte_short, longstr, ROUNDS, false);
benchmark(akt_strbyte, longstr, ROUNDS, false);
benchmark(strchr, longstr, ROUNDS, false);
benchmark(vim_strbyte, longstr, ROUNDS, false);
free(longstr);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment