Skip to content

Instantly share code, notes, and snippets.

@alk
Last active December 26, 2015 00:09
Show Gist options
  • Save alk/7061687 to your computer and use it in GitHub Desktop.
Save alk/7061687 to your computer and use it in GitHub Desktop.
// -*- mode: C++; indent-tabs-mode: t; c-basic-offset: 8 -*-
//
// -- running on self-built 52
// # ./collation_test
// running memcmp("00000001", "00000023"): 5.6 ns
// running icu_cs("00000001", "00000023"): 508 ns
// running icu_d("00000001", "00000023"): 78.3 ns
// running icu_du("00000001", "00000023"): 94.5 ns
// running memcmp("00000001", "00300023"): 6.2 ns
// running icu_cs("00000001", "00300023"): 260 ns
// running icu_d("00000001", "00300023"): 47.9 ns
// running icu_du("00000001", "00300023"): 52.1 ns
// running memcmp("и00000001", "и00300023"): 6.1 ns
// running icu_cs("и00000001", "и00300023"): 392 ns
// running icu_d("и00000001", "и00300023"): 317.3 ns
// running icu_du("и00000001", "и00300023"): 349.9 ns
// running memcmp("приветæ00000001⊼", "приветæ00300023À"): 7.3 ns
// running icu_cs("приветæ00000001⊼", "приветæ00300023À"): 454 ns
// running icu_d("приветæ00000001⊼", "приветæ00300023À"): 225.6 ns
// running icu_du("приветæ00000001⊼", "приветæ00300023À"): 99.7 ns
// running memcmp("приватæ00000001⊼", "приветæ00300023À"): 7.3 ns
// running icu_cs("приватæ00000001⊼", "приветæ00300023À"): 268 ns
// running icu_d("приватæ00000001⊼", "приветæ00300023À"): 303.7 ns
// running icu_du("приватæ00000001⊼", "приветæ00300023À"): 228.3 ns
// running memcmp("прииииииватæ00000001⊼", "прииииииветæ00300023À"): 6.7 ns
// running icu_cs("прииииииватæ00000001⊼", "прииииииветæ00300023À"): 328 ns
// running icu_d("прииииииватæ00000001⊼", "прииииииветæ00300023À"): 375 ns
// running icu_du("прииииииватæ00000001⊼", "прииииииветæ00300023À"): 249.3 ns
//
// -- running on debian's 4.8
// # ./collation_test
// running memcmp("00000001", "00000023"): 5.7 ns
// running icu_cs("00000001", "00000023"): 560 ns
// running icu_d("00000001", "00000023"): 79 ns
// running memcmp("00000001", "00300023"): 6.2 ns
// running icu_cs("00000001", "00300023"): 298 ns
// running icu_d("00000001", "00300023"): 46.5 ns
// running memcmp("и00000001", "и00300023"): 6.2 ns
// running icu_cs("и00000001", "и00300023"): 434 ns
// running icu_d("и00000001", "и00300023"): 341.8 ns
// running memcmp("приветæ00000001⊼", "приветæ00300023À"): 7.3 ns
// running icu_cs("приветæ00000001⊼", "приветæ00300023À"): 512 ns
// running icu_d("приветæ00000001⊼", "приветæ00300023À"): 210.6 ns
// running memcmp("приватæ00000001⊼", "приветæ00300023À"): 7.3 ns
// running icu_cs("приватæ00000001⊼", "приветæ00300023À"): 306 ns
// running icu_d("приватæ00000001⊼", "приветæ00300023À"): 323.3 ns
// running memcmp("прииииииватæ00000001⊼", "прииииииветæ00300023À"): 6.7 ns
// running icu_cs("прииииииватæ00000001⊼", "прииииииветæ00300023À"): 378 ns
// running icu_d("прииииииватæ00000001⊼", "прииииииветæ00300023À"): 386.6 ns
#include <unicode/coll.h>
#include <unicode/ucnv_err.h>
#include <unicode/ucnv.h>
#include <unicode/uenum.h>
#include <unicode/localpointer.h>
#include <unicode/ucol.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <assert.h>
static
int do_compare_raw_inner(const char *a, const char *b, int len)
{
return memcmp(a, b, len);
}
int do_compare_raw(const char *a, const char *b, int alen, int blen)
{
if (alen == blen) {
return do_compare_raw_inner(a, b, alen);
}
int mlen = (alen < blen) ? alen : blen;
int rv = do_compare_raw_inner(a, b, mlen);
if (rv != 0) {
return rv;
}
return alen - blen;
}
int do_compare_icu_cs(const char *a, const char *b, int alen, int blen)
{
static UCollator* coll = NULL;
UErrorCode status = U_ZERO_ERROR;
if (!coll) {
coll = ucol_open("", &status);
if (U_FAILURE(status)) {
fprintf(stderr, "CouchStore CollateJSON: Couldn't initialize ICU (%d)\n", (int)status);
return -1;
}
}
UCharIterator iterA, iterB;
int result;
uiter_setUTF8(&iterA, a, alen);
uiter_setUTF8(&iterB, b, blen);
result = ucol_strcollIter(coll, &iterA, &iterB, &status);
if (U_FAILURE(status)) {
fprintf(stderr, "CouchStore CollateJSON: ICU error %d\n", (int)status);
abort();
}
return result;
}
#if (U_ICU_VERSION_MAJOR_NUM >= 5)
#define ct_HAVE_DU
int do_compare_icu_du(const char *a, const char *b, int alen, int blen)
{
static UCollator* coll = NULL;
UErrorCode status = U_ZERO_ERROR;
if (!coll) {
coll = ucol_open("", &status);
if (U_FAILURE(status)) {
fprintf(stderr, "CouchStore CollateJSON: Couldn't initialize ICU (%d)\n", (int)status);
return -1;
}
}
int result = ucol_strcollUTF8(coll, a, alen, b, blen, &status);
if (U_FAILURE(status)) {
fprintf(stderr, "CouchStore CollateJSON: ICU error %d\n", (int)status);
abort();
}
return result;
}
#endif
static
int convert_utf8_to_uchar(const char *src, UChar *dst, int len)
{
static UConverter *c;
UErrorCode status = U_ZERO_ERROR;
if (!c) {
c = ucnv_open("UTF-8", &status);
if (!c) {
fprintf(stderr, "CouchStore CollateJSON: Couldn't initialize ICU (%d)\n", (int)status);
abort();
}
}
UChar *p = dst;
const char *s = src;
while (len) {
unsigned char ch = static_cast<unsigned char>(*s);
if ((ch & 0x80)) {
goto icu_conv;
}
*p++ = static_cast<UChar>(ch);
s++;
--len;
}
return p - dst;
icu_conv:
ucnv_toUnicode(c, &p, p + len, &s, s + len, NULL, TRUE, &status);
if (U_FAILURE(status)) {
abort();
}
return p - dst;
}
int do_compare_icu_d(const char *a, const char *b, int alen, int blen)
{
static UCollator* coll = NULL;
UErrorCode status = U_ZERO_ERROR;
if (!coll) {
coll = ucol_open("", &status);
if (U_FAILURE(status)) {
fprintf(stderr, "CouchStore CollateJSON: Couldn't initialize ICU (%d)\n", (int)status);
return -1;
}
}
if (alen > 256 || blen > 256) {
return do_compare_icu_cs(a, b, alen, blen);
}
UChar ab[alen];
UChar bb[blen];
alen = convert_utf8_to_uchar(a, ab, alen);
blen = convert_utf8_to_uchar(b, bb, blen);
return ucol_strcoll(coll, ab, alen, bb, blen);
}
typedef int (*compare_fn_t)(const char *, const char *, int, int);
double bench_comparison(compare_fn_t fn, const char *a, const char *b, int alen, int blen, int count, int *prv)
{
int rv = 0;
clock_t before, after;
before = clock();
for (int i = 0; i < count; i++) {
rv |= fn(a, b, alen, blen);
}
after = clock();
if (prv) {
*prv = rv;
}
assert(rv < 0);
return ((double)after - before) / CLOCKS_PER_SEC / count * 1E9;
}
void bench_run(const char *a, const char *b)
{
double t;
printf("running memcmp(\"%s\", \"%s\"): ", a, b);
t = bench_comparison(do_compare_raw, a, b, strlen(a), strlen(b), 100000000, NULL);
printf("%g ns\n", t);
printf("running icu_cs(\"%s\", \"%s\"): ", a, b);
t = bench_comparison(do_compare_icu_cs, a, b, strlen(a), strlen(b), 5000000, NULL);
printf("%g ns\n", t);
printf("running icu_d(\"%s\", \"%s\"): ", a, b);
t = bench_comparison(do_compare_icu_d, a, b, strlen(a), strlen(b), 100000000, NULL);
printf("%g ns\n", t);
#ifdef ct_HAVE_DU
printf("running icu_du(\"%s\", \"%s\"): ", a, b);
t = bench_comparison(do_compare_icu_du, a, b, strlen(a), strlen(b), 100000000, NULL);
printf("%g ns\n", t);
#endif
}
int main(void)
{
bench_run("00000001", "00000023");
bench_run("00000001", "00300023");
bench_run("и00000001", "и00300023");
bench_run("приветæ00000001⊼", "приветæ00300023À");
bench_run("приватæ00000001⊼", "приветæ00300023À");
bench_run("прииииииватæ00000001⊼", "прииииииветæ00300023À");
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment