Skip to content

Instantly share code, notes, and snippets.

@benwills
Last active October 19, 2015 07:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save benwills/63895e02a8755311f6a8 to your computer and use it in GitHub Desktop.
Save benwills/63895e02a8755311f6a8 to your computer and use it in GitHub Desktop.
Performance benchmark of a computed goto (lookup table) vs a switch statement in C.
#include <stdio.h>
#include <inttypes.h>
#include <sys/time.h>
#include <time.h>
#ifdef __MACH__
#include <mach/clock.h>
#include <mach/mach.h>
#endif
/*
this benchmarks switch statements vs computed gotos.
it's often said that a compiler will convert a
switch statement to a lookup table.
with computed gotos, we have one way of implementing
this in our code directly.
in my tests, switch statements end up being about 25% faster.
gcc5.1, -O3
results, early 2011 mbp. 100,000,000 iterations.
seven runs. throwing out fastest and slowest for each
ret_goto : 00h 00m 01s 867006000ns
ret_goto : 00h 00m 01s 888378000ns
ret_goto : 00h 00m 01s 910496000ns
ret_goto : 00h 00m 01s 917300000ns
ret_goto : 00h 00m 01s 925422000ns
ret_switch: 00h 00m 01s 294920000ns
ret_switch: 00h 00m 01s 325172000ns
ret_switch: 00h 00m 01s 339150000ns
ret_switch: 00h 00m 01s 340093000ns
ret_switch: 00h 00m 01s 346443000ns
* same performance difference if i swap the order of
which function loop runs first.
i haven't looked at the assembly, but guessing the compiler
implements some form of optimization that is different
than a computed goto...which i believe is essentially the
kind of lookup table "many folks" say a compiler might
implement to replace a switch statement.
all the fancy business about randomizing numbers and adding
large primes together is to ensure the compiler does
not optimize away the operations.
*/
//------------------------------------------------------------------------------
// nanosecond interval "timer"
uint64_t
tmr_ns_now(void)
{
struct timespec ts = {0};
//----------------------------------
// https://gist.github.com/jbenet/1087739
#ifdef __MACH__ // osx
clock_serv_t cclock;
mach_timespec_t mts;
host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &cclock);
clock_get_time(cclock, &mts);
mach_port_deallocate(mach_task_self(), cclock);
ts.tv_sec = mts.tv_sec;
ts.tv_nsec = mts.tv_nsec;
#else
clock_gettime(CLOCK_REALTIME, &ts);
#endif
uint64_t ret = 0;
ret += (uint64_t)ts.tv_sec;
ret *= 1000ULL * 1000ULL * 1000ULL;
ret += (uint64_t)ts.tv_nsec;
return ret;
}
//--------------------------------------
void
tmr_ns_dsp(uint64_t tmr)
{
uint64_t nn = 1000ULL * 1000ULL * 1000ULL;
uint64_t ns = tmr % nn;
uint64_t sc = (tmr / nn) % 60;
uint64_t mn = ((tmr / nn) / 60) % 60;
uint64_t hr = (((tmr / nn) / 60) / 60) % 60;
printf("%02"PRIu64"h ", hr);
printf("%02"PRIu64"m ", mn);
printf("%02"PRIu64"s ", sc);
printf("%09"PRIu64"ns", ns);
}
//------------------------------------------------------------------------------
uint64_t rng_64x = 0;
uint64_t rng_64y = 0;
//--------------------------------------
void
rng_u64_ini(void)
{
uint64_t x = tmr_ns_now();
uint64_t y = x ^ 0xFFFFFFFFFFFFFFFF;
// last prime before 2^55
x ^= x * 36028797018963913ULL;
// last prime before 2^63
y ^= y * 9223372036854775783ULL;
// ... thanks mersenne
rng_64x = x;
rng_64y = y;
}
//--------------------------------------
uint64_t
rng_u64_gen(void)
{
// XSadd, Saito and Matsumoto
uint64_t x = rng_64x;
uint64_t const y = rng_64y;
rng_64x = y;
x ^= x << 23;
x ^= x >> 17;
x ^= y ^ (y >> 26);
rng_64y = x;
return x + y;
}
//------------------------------------------------------------------------------
uint64_t
tst_goto()
{
uint64_t num = rng_u64_gen();
num %= 16;
void *jmp_tbl[] = {
&&L_00, &&L_01, &&L_02, &&L_03,
&&L_04, &&L_05, &&L_06, &&L_07,
&&L_08, &&L_09, &&L_10, &&L_11,
&&L_12, &&L_13, &&L_14, &&L_15,
};
goto *jmp_tbl[num];
L_00: num = 18413381962852322687ULL + rng_u64_gen(); goto RET;
L_01: num = 17806508382742283977ULL + rng_u64_gen(); goto RET;
L_02: num = 17076670450730705909ULL + rng_u64_gen(); goto RET;
L_03: num = 16237183119943284463ULL + rng_u64_gen(); goto RET;
L_04: num = 15578794349642244839ULL + rng_u64_gen(); goto RET;
L_05: num = 14902457677632526457ULL + rng_u64_gen(); goto RET;
L_06: num = 14132584040985449731ULL + rng_u64_gen(); goto RET;
L_07: num = 13027813273630283239ULL + rng_u64_gen(); goto RET;
L_08: num = 12683218250036823889ULL + rng_u64_gen(); goto RET;
L_09: num = 11743514028444804211ULL + rng_u64_gen(); goto RET;
L_10: num = 11149616652624904919ULL + rng_u64_gen(); goto RET;
L_11: num = 10363976020133498597ULL + rng_u64_gen(); goto RET;
L_12: num = 10040706912106088837ULL + rng_u64_gen(); goto RET;
L_13: num = 12258751936488380971ULL + rng_u64_gen(); goto RET;
L_14: num = 13588946698766785117ULL + rng_u64_gen(); goto RET;
L_15: num = 16869227750867815237ULL + rng_u64_gen(); goto RET;
RET:
num *= rng_u64_gen();
return num;
}
//------------------------------------------------------------------------------
uint64_t
tst_switch()
{
uint64_t num = rng_u64_gen();
num %= 16;
switch(num)
{
case 0: num = 18413381962852322687ULL + rng_u64_gen(); break;
case 1: num = 17806508382742283977ULL + rng_u64_gen(); break;
case 2: num = 17076670450730705909ULL + rng_u64_gen(); break;
case 3: num = 16237183119943284463ULL + rng_u64_gen(); break;
case 4: num = 15578794349642244839ULL + rng_u64_gen(); break;
case 5: num = 14902457677632526457ULL + rng_u64_gen(); break;
case 6: num = 14132584040985449731ULL + rng_u64_gen(); break;
case 7: num = 13027813273630283239ULL + rng_u64_gen(); break;
case 8: num = 12683218250036823889ULL + rng_u64_gen(); break;
case 9: num = 11743514028444804211ULL + rng_u64_gen(); break;
case 10: num = 11149616652624904919ULL + rng_u64_gen(); break;
case 11: num = 10363976020133498597ULL + rng_u64_gen(); break;
case 12: num = 10040706912106088837ULL + rng_u64_gen(); break;
case 13: num = 12258751936488380971ULL + rng_u64_gen(); break;
case 14: num = 13588946698766785117ULL + rng_u64_gen(); break;
case 15: num = 16869227750867815237ULL + rng_u64_gen(); break;
}
num *= rng_u64_gen();
return num;
}
//------------------------------------------------------------------------------
int
main()
{
uint64_t itr = 100 * 1000 * 1000;
uint64_t tmr = 0;
uint64_t ret_goto = 0;
uint64_t ret_switch = 0;
rng_u64_ini();
printf("\n\n");
tmr = tmr_ns_now();
for (uint64_t i = itr; i; --i)
ret_switch += tst_switch();
tmr = tmr_ns_now() - tmr;
printf("ret_switch: %"PRIu64"\n", ret_switch);
tmr_ns_dsp(tmr);
printf("\n\n");
tmr = tmr_ns_now();
for (uint64_t i = itr; i; --i)
ret_goto += tst_goto();
tmr = tmr_ns_now() - tmr;
printf("ret_goto: %"PRIu64"\n", ret_goto);
tmr_ns_dsp(tmr);
printf("\n\n");
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment