Last active
December 26, 2015 00:09
-
-
Save alk/7061687 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// -*- mode: C++; indent-tabs-mode: t; c-basic-offset: 8 -*- | |
// | |
// -- running on self-built 52 | |
// # ./collation_test | |
// running memcmp("00000001", "00000023"): 5.6 ns | |
// running icu_cs("00000001", "00000023"): 508 ns | |
// running icu_d("00000001", "00000023"): 78.3 ns | |
// running icu_du("00000001", "00000023"): 94.5 ns | |
// running memcmp("00000001", "00300023"): 6.2 ns | |
// running icu_cs("00000001", "00300023"): 260 ns | |
// running icu_d("00000001", "00300023"): 47.9 ns | |
// running icu_du("00000001", "00300023"): 52.1 ns | |
// running memcmp("и00000001", "и00300023"): 6.1 ns | |
// running icu_cs("и00000001", "и00300023"): 392 ns | |
// running icu_d("и00000001", "и00300023"): 317.3 ns | |
// running icu_du("и00000001", "и00300023"): 349.9 ns | |
// running memcmp("приветæ00000001⊼", "приветæ00300023À"): 7.3 ns | |
// running icu_cs("приветæ00000001⊼", "приветæ00300023À"): 454 ns | |
// running icu_d("приветæ00000001⊼", "приветæ00300023À"): 225.6 ns | |
// running icu_du("приветæ00000001⊼", "приветæ00300023À"): 99.7 ns | |
// running memcmp("приватæ00000001⊼", "приветæ00300023À"): 7.3 ns | |
// running icu_cs("приватæ00000001⊼", "приветæ00300023À"): 268 ns | |
// running icu_d("приватæ00000001⊼", "приветæ00300023À"): 303.7 ns | |
// running icu_du("приватæ00000001⊼", "приветæ00300023À"): 228.3 ns | |
// running memcmp("прииииииватæ00000001⊼", "прииииииветæ00300023À"): 6.7 ns | |
// running icu_cs("прииииииватæ00000001⊼", "прииииииветæ00300023À"): 328 ns | |
// running icu_d("прииииииватæ00000001⊼", "прииииииветæ00300023À"): 375 ns | |
// running icu_du("прииииииватæ00000001⊼", "прииииииветæ00300023À"): 249.3 ns | |
// | |
// -- running on debian's 4.8 | |
// # ./collation_test | |
// running memcmp("00000001", "00000023"): 5.7 ns | |
// running icu_cs("00000001", "00000023"): 560 ns | |
// running icu_d("00000001", "00000023"): 79 ns | |
// running memcmp("00000001", "00300023"): 6.2 ns | |
// running icu_cs("00000001", "00300023"): 298 ns | |
// running icu_d("00000001", "00300023"): 46.5 ns | |
// running memcmp("и00000001", "и00300023"): 6.2 ns | |
// running icu_cs("и00000001", "и00300023"): 434 ns | |
// running icu_d("и00000001", "и00300023"): 341.8 ns | |
// running memcmp("приветæ00000001⊼", "приветæ00300023À"): 7.3 ns | |
// running icu_cs("приветæ00000001⊼", "приветæ00300023À"): 512 ns | |
// running icu_d("приветæ00000001⊼", "приветæ00300023À"): 210.6 ns | |
// running memcmp("приватæ00000001⊼", "приветæ00300023À"): 7.3 ns | |
// running icu_cs("приватæ00000001⊼", "приветæ00300023À"): 306 ns | |
// running icu_d("приватæ00000001⊼", "приветæ00300023À"): 323.3 ns | |
// running memcmp("прииииииватæ00000001⊼", "прииииииветæ00300023À"): 6.7 ns | |
// running icu_cs("прииииииватæ00000001⊼", "прииииииветæ00300023À"): 378 ns | |
// running icu_d("прииииииватæ00000001⊼", "прииииииветæ00300023À"): 386.6 ns | |
#include <unicode/coll.h> | |
#include <unicode/ucnv_err.h> | |
#include <unicode/ucnv.h> | |
#include <unicode/uenum.h> | |
#include <unicode/localpointer.h> | |
#include <unicode/ucol.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <time.h> | |
#include <assert.h> | |
static | |
int do_compare_raw_inner(const char *a, const char *b, int len) | |
{ | |
return memcmp(a, b, len); | |
} | |
int do_compare_raw(const char *a, const char *b, int alen, int blen) | |
{ | |
if (alen == blen) { | |
return do_compare_raw_inner(a, b, alen); | |
} | |
int mlen = (alen < blen) ? alen : blen; | |
int rv = do_compare_raw_inner(a, b, mlen); | |
if (rv != 0) { | |
return rv; | |
} | |
return alen - blen; | |
} | |
int do_compare_icu_cs(const char *a, const char *b, int alen, int blen) | |
{ | |
static UCollator* coll = NULL; | |
UErrorCode status = U_ZERO_ERROR; | |
if (!coll) { | |
coll = ucol_open("", &status); | |
if (U_FAILURE(status)) { | |
fprintf(stderr, "CouchStore CollateJSON: Couldn't initialize ICU (%d)\n", (int)status); | |
return -1; | |
} | |
} | |
UCharIterator iterA, iterB; | |
int result; | |
uiter_setUTF8(&iterA, a, alen); | |
uiter_setUTF8(&iterB, b, blen); | |
result = ucol_strcollIter(coll, &iterA, &iterB, &status); | |
if (U_FAILURE(status)) { | |
fprintf(stderr, "CouchStore CollateJSON: ICU error %d\n", (int)status); | |
abort(); | |
} | |
return result; | |
} | |
#if (U_ICU_VERSION_MAJOR_NUM >= 5) | |
#define ct_HAVE_DU | |
int do_compare_icu_du(const char *a, const char *b, int alen, int blen) | |
{ | |
static UCollator* coll = NULL; | |
UErrorCode status = U_ZERO_ERROR; | |
if (!coll) { | |
coll = ucol_open("", &status); | |
if (U_FAILURE(status)) { | |
fprintf(stderr, "CouchStore CollateJSON: Couldn't initialize ICU (%d)\n", (int)status); | |
return -1; | |
} | |
} | |
int result = ucol_strcollUTF8(coll, a, alen, b, blen, &status); | |
if (U_FAILURE(status)) { | |
fprintf(stderr, "CouchStore CollateJSON: ICU error %d\n", (int)status); | |
abort(); | |
} | |
return result; | |
} | |
#endif | |
static | |
int convert_utf8_to_uchar(const char *src, UChar *dst, int len) | |
{ | |
static UConverter *c; | |
UErrorCode status = U_ZERO_ERROR; | |
if (!c) { | |
c = ucnv_open("UTF-8", &status); | |
if (!c) { | |
fprintf(stderr, "CouchStore CollateJSON: Couldn't initialize ICU (%d)\n", (int)status); | |
abort(); | |
} | |
} | |
UChar *p = dst; | |
const char *s = src; | |
while (len) { | |
unsigned char ch = static_cast<unsigned char>(*s); | |
if ((ch & 0x80)) { | |
goto icu_conv; | |
} | |
*p++ = static_cast<UChar>(ch); | |
s++; | |
--len; | |
} | |
return p - dst; | |
icu_conv: | |
ucnv_toUnicode(c, &p, p + len, &s, s + len, NULL, TRUE, &status); | |
if (U_FAILURE(status)) { | |
abort(); | |
} | |
return p - dst; | |
} | |
int do_compare_icu_d(const char *a, const char *b, int alen, int blen) | |
{ | |
static UCollator* coll = NULL; | |
UErrorCode status = U_ZERO_ERROR; | |
if (!coll) { | |
coll = ucol_open("", &status); | |
if (U_FAILURE(status)) { | |
fprintf(stderr, "CouchStore CollateJSON: Couldn't initialize ICU (%d)\n", (int)status); | |
return -1; | |
} | |
} | |
if (alen > 256 || blen > 256) { | |
return do_compare_icu_cs(a, b, alen, blen); | |
} | |
UChar ab[alen]; | |
UChar bb[blen]; | |
alen = convert_utf8_to_uchar(a, ab, alen); | |
blen = convert_utf8_to_uchar(b, bb, blen); | |
return ucol_strcoll(coll, ab, alen, bb, blen); | |
} | |
typedef int (*compare_fn_t)(const char *, const char *, int, int); | |
double bench_comparison(compare_fn_t fn, const char *a, const char *b, int alen, int blen, int count, int *prv) | |
{ | |
int rv = 0; | |
clock_t before, after; | |
before = clock(); | |
for (int i = 0; i < count; i++) { | |
rv |= fn(a, b, alen, blen); | |
} | |
after = clock(); | |
if (prv) { | |
*prv = rv; | |
} | |
assert(rv < 0); | |
return ((double)after - before) / CLOCKS_PER_SEC / count * 1E9; | |
} | |
void bench_run(const char *a, const char *b) | |
{ | |
double t; | |
printf("running memcmp(\"%s\", \"%s\"): ", a, b); | |
t = bench_comparison(do_compare_raw, a, b, strlen(a), strlen(b), 100000000, NULL); | |
printf("%g ns\n", t); | |
printf("running icu_cs(\"%s\", \"%s\"): ", a, b); | |
t = bench_comparison(do_compare_icu_cs, a, b, strlen(a), strlen(b), 5000000, NULL); | |
printf("%g ns\n", t); | |
printf("running icu_d(\"%s\", \"%s\"): ", a, b); | |
t = bench_comparison(do_compare_icu_d, a, b, strlen(a), strlen(b), 100000000, NULL); | |
printf("%g ns\n", t); | |
#ifdef ct_HAVE_DU | |
printf("running icu_du(\"%s\", \"%s\"): ", a, b); | |
t = bench_comparison(do_compare_icu_du, a, b, strlen(a), strlen(b), 100000000, NULL); | |
printf("%g ns\n", t); | |
#endif | |
} | |
int main(void) | |
{ | |
bench_run("00000001", "00000023"); | |
bench_run("00000001", "00300023"); | |
bench_run("и00000001", "и00300023"); | |
bench_run("приветæ00000001⊼", "приветæ00300023À"); | |
bench_run("приватæ00000001⊼", "приветæ00300023À"); | |
bench_run("прииииииватæ00000001⊼", "прииииииветæ00300023À"); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment