Last active
October 19, 2015 07:39
-
-
Save benwills/63895e02a8755311f6a8 to your computer and use it in GitHub Desktop.
Performance benchmark of a computed goto (lookup table) vs a switch statement in C.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <inttypes.h> | |
#include <sys/time.h> | |
#include <time.h> | |
#ifdef __MACH__ | |
#include <mach/clock.h> | |
#include <mach/mach.h> | |
#endif | |
/* | |
this benchmarks switch statements vs computed gotos. | |
it's often said that a compiler will convert a | |
switch statement to a lookup table. | |
with computed gotos, we have one way of implementing | |
this in our code directly. | |
in my tests, switch statements end up being about 25% faster. | |
gcc5.1, -O3 | |
results, early 2011 mbp. 100,000,000 iterations. | |
seven runs. throwing out fastest and slowest for each | |
ret_goto : 00h 00m 01s 867006000ns | |
ret_goto : 00h 00m 01s 888378000ns | |
ret_goto : 00h 00m 01s 910496000ns | |
ret_goto : 00h 00m 01s 917300000ns | |
ret_goto : 00h 00m 01s 925422000ns | |
ret_switch: 00h 00m 01s 294920000ns | |
ret_switch: 00h 00m 01s 325172000ns | |
ret_switch: 00h 00m 01s 339150000ns | |
ret_switch: 00h 00m 01s 340093000ns | |
ret_switch: 00h 00m 01s 346443000ns | |
* same performance difference if i swap the order of | |
which function loop runs first. | |
i haven't looked at the assembly, but guessing the compiler | |
implements some form of optimization that is different | |
than a computed goto...which i believe is essentially the | |
kind of lookup table "many folks" say a compiler might | |
implement to replace a switch statement. | |
all the fancy business about randomizing numbers and adding | |
large primes together is to ensure the compiler does | |
not optimize away the operations. | |
*/ | |
//------------------------------------------------------------------------------ | |
// nanosecond interval "timer" | |
uint64_t | |
tmr_ns_now(void) | |
{ | |
struct timespec ts = {0}; | |
//---------------------------------- | |
// https://gist.github.com/jbenet/1087739 | |
#ifdef __MACH__ // osx | |
clock_serv_t cclock; | |
mach_timespec_t mts; | |
host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &cclock); | |
clock_get_time(cclock, &mts); | |
mach_port_deallocate(mach_task_self(), cclock); | |
ts.tv_sec = mts.tv_sec; | |
ts.tv_nsec = mts.tv_nsec; | |
#else | |
clock_gettime(CLOCK_REALTIME, &ts); | |
#endif | |
uint64_t ret = 0; | |
ret += (uint64_t)ts.tv_sec; | |
ret *= 1000ULL * 1000ULL * 1000ULL; | |
ret += (uint64_t)ts.tv_nsec; | |
return ret; | |
} | |
//-------------------------------------- | |
void | |
tmr_ns_dsp(uint64_t tmr) | |
{ | |
uint64_t nn = 1000ULL * 1000ULL * 1000ULL; | |
uint64_t ns = tmr % nn; | |
uint64_t sc = (tmr / nn) % 60; | |
uint64_t mn = ((tmr / nn) / 60) % 60; | |
uint64_t hr = (((tmr / nn) / 60) / 60) % 60; | |
printf("%02"PRIu64"h ", hr); | |
printf("%02"PRIu64"m ", mn); | |
printf("%02"PRIu64"s ", sc); | |
printf("%09"PRIu64"ns", ns); | |
} | |
//------------------------------------------------------------------------------ | |
uint64_t rng_64x = 0; | |
uint64_t rng_64y = 0; | |
//-------------------------------------- | |
void | |
rng_u64_ini(void) | |
{ | |
uint64_t x = tmr_ns_now(); | |
uint64_t y = x ^ 0xFFFFFFFFFFFFFFFF; | |
// last prime before 2^55 | |
x ^= x * 36028797018963913ULL; | |
// last prime before 2^63 | |
y ^= y * 9223372036854775783ULL; | |
// ... thanks mersenne | |
rng_64x = x; | |
rng_64y = y; | |
} | |
//-------------------------------------- | |
uint64_t | |
rng_u64_gen(void) | |
{ | |
// XSadd, Saito and Matsumoto | |
uint64_t x = rng_64x; | |
uint64_t const y = rng_64y; | |
rng_64x = y; | |
x ^= x << 23; | |
x ^= x >> 17; | |
x ^= y ^ (y >> 26); | |
rng_64y = x; | |
return x + y; | |
} | |
//------------------------------------------------------------------------------ | |
uint64_t | |
tst_goto() | |
{ | |
uint64_t num = rng_u64_gen(); | |
num %= 16; | |
void *jmp_tbl[] = { | |
&&L_00, &&L_01, &&L_02, &&L_03, | |
&&L_04, &&L_05, &&L_06, &&L_07, | |
&&L_08, &&L_09, &&L_10, &&L_11, | |
&&L_12, &&L_13, &&L_14, &&L_15, | |
}; | |
goto *jmp_tbl[num]; | |
L_00: num = 18413381962852322687ULL + rng_u64_gen(); goto RET; | |
L_01: num = 17806508382742283977ULL + rng_u64_gen(); goto RET; | |
L_02: num = 17076670450730705909ULL + rng_u64_gen(); goto RET; | |
L_03: num = 16237183119943284463ULL + rng_u64_gen(); goto RET; | |
L_04: num = 15578794349642244839ULL + rng_u64_gen(); goto RET; | |
L_05: num = 14902457677632526457ULL + rng_u64_gen(); goto RET; | |
L_06: num = 14132584040985449731ULL + rng_u64_gen(); goto RET; | |
L_07: num = 13027813273630283239ULL + rng_u64_gen(); goto RET; | |
L_08: num = 12683218250036823889ULL + rng_u64_gen(); goto RET; | |
L_09: num = 11743514028444804211ULL + rng_u64_gen(); goto RET; | |
L_10: num = 11149616652624904919ULL + rng_u64_gen(); goto RET; | |
L_11: num = 10363976020133498597ULL + rng_u64_gen(); goto RET; | |
L_12: num = 10040706912106088837ULL + rng_u64_gen(); goto RET; | |
L_13: num = 12258751936488380971ULL + rng_u64_gen(); goto RET; | |
L_14: num = 13588946698766785117ULL + rng_u64_gen(); goto RET; | |
L_15: num = 16869227750867815237ULL + rng_u64_gen(); goto RET; | |
RET: | |
num *= rng_u64_gen(); | |
return num; | |
} | |
//------------------------------------------------------------------------------ | |
uint64_t | |
tst_switch() | |
{ | |
uint64_t num = rng_u64_gen(); | |
num %= 16; | |
switch(num) | |
{ | |
case 0: num = 18413381962852322687ULL + rng_u64_gen(); break; | |
case 1: num = 17806508382742283977ULL + rng_u64_gen(); break; | |
case 2: num = 17076670450730705909ULL + rng_u64_gen(); break; | |
case 3: num = 16237183119943284463ULL + rng_u64_gen(); break; | |
case 4: num = 15578794349642244839ULL + rng_u64_gen(); break; | |
case 5: num = 14902457677632526457ULL + rng_u64_gen(); break; | |
case 6: num = 14132584040985449731ULL + rng_u64_gen(); break; | |
case 7: num = 13027813273630283239ULL + rng_u64_gen(); break; | |
case 8: num = 12683218250036823889ULL + rng_u64_gen(); break; | |
case 9: num = 11743514028444804211ULL + rng_u64_gen(); break; | |
case 10: num = 11149616652624904919ULL + rng_u64_gen(); break; | |
case 11: num = 10363976020133498597ULL + rng_u64_gen(); break; | |
case 12: num = 10040706912106088837ULL + rng_u64_gen(); break; | |
case 13: num = 12258751936488380971ULL + rng_u64_gen(); break; | |
case 14: num = 13588946698766785117ULL + rng_u64_gen(); break; | |
case 15: num = 16869227750867815237ULL + rng_u64_gen(); break; | |
} | |
num *= rng_u64_gen(); | |
return num; | |
} | |
//------------------------------------------------------------------------------ | |
int | |
main() | |
{ | |
uint64_t itr = 100 * 1000 * 1000; | |
uint64_t tmr = 0; | |
uint64_t ret_goto = 0; | |
uint64_t ret_switch = 0; | |
rng_u64_ini(); | |
printf("\n\n"); | |
tmr = tmr_ns_now(); | |
for (uint64_t i = itr; i; --i) | |
ret_switch += tst_switch(); | |
tmr = tmr_ns_now() - tmr; | |
printf("ret_switch: %"PRIu64"\n", ret_switch); | |
tmr_ns_dsp(tmr); | |
printf("\n\n"); | |
tmr = tmr_ns_now(); | |
for (uint64_t i = itr; i; --i) | |
ret_goto += tst_goto(); | |
tmr = tmr_ns_now() - tmr; | |
printf("ret_goto: %"PRIu64"\n", ret_goto); | |
tmr_ns_dsp(tmr); | |
printf("\n\n"); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment