Last active
January 15, 2016 15:11
-
-
Save aktau/9f52f812200d8d69a5d1 to your computer and use it in GitHub Desktop.
libuv: ghetto benchmark uv_hrtime implementations on OSX (a link for QPC on Windows, still handy: http://technet.microsoft.com/it-it/sysinternals/dn553408(v=vs.110).aspx)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdlib.h> | |
#include <stdio.h> | |
#include <stdint.h> | |
#include <mach/mach_time.h> | |
#include <pthread.h> | |
#ifdef DISABLE_INLINE | |
#define MAYBE_INLINE __attribute__ ((noinline)) | |
#else | |
#define MAYBE_INLINE | |
#endif | |
#ifdef DISABLE_INLINE | |
#define FORCE_INLINE | |
#else | |
#define FORCE_INLINE __attribute__((always_inline)) | |
#endif | |
#define likely(x) __builtin_expect(!!(x), 1) | |
#define unlikely(x) __builtin_expect(!!(x), 0) | |
uint64_t uv_hrtime(void) MAYBE_INLINE; | |
uint64_t uv_hrtime(void) { | |
mach_timebase_info_data_t info; | |
if (mach_timebase_info(&info) != KERN_SUCCESS) | |
abort(); | |
return mach_absolute_time() * info.numer / info.denom; | |
} | |
uint64_t uv_hrtime_non_threadsafe(void) MAYBE_INLINE; | |
uint64_t uv_hrtime_non_threadsafe(void) { | |
static mach_timebase_info_data_t info = {0}; | |
if (info.denom == 0 && mach_timebase_info(&info) != KERN_SUCCESS) | |
abort(); | |
return mach_absolute_time() * info.numer / info.denom; | |
} | |
static pthread_mutex_t mu = PTHREAD_MUTEX_INITIALIZER; | |
uint64_t uv_hrtime_mutex(void) MAYBE_INLINE; | |
uint64_t uv_hrtime_mutex(void) { | |
static mach_timebase_info_data_t info = {0}; | |
pthread_mutex_lock(&mu); | |
if (info.denom == 0 && mach_timebase_info(&info) != KERN_SUCCESS) | |
abort(); | |
pthread_mutex_unlock(&mu); | |
return mach_absolute_time() * info.numer / info.denom; | |
} | |
static mach_timebase_info_data_t once_info; | |
static void init_info_once(void) { | |
if (mach_timebase_info(&once_info) != KERN_SUCCESS) | |
abort(); | |
} | |
static pthread_once_t once = PTHREAD_ONCE_INIT; | |
uint64_t MAYBE_INLINE uv_hrtime_once(void) { | |
pthread_once(&once, init_info_once); | |
return mach_absolute_time() * once_info.numer / once_info.denom; | |
} | |
#define ACCESS_ONCE(type, var) \ | |
(*(volatile type*) &(var)) | |
uint64_t uv_hrtime_fedor(void) MAYBE_INLINE; | |
uint64_t uv_hrtime_fedor(void) { | |
static mach_timebase_info_data_t info; | |
if (ACCESS_ONCE(uint32_t, info.numer) == 0 && | |
ACCESS_ONCE(uint32_t, info.denom) == 0 && | |
mach_timebase_info(&info) != KERN_SUCCESS) | |
abort(); | |
return mach_absolute_time() * info.numer / info.denom; | |
} | |
uint64_t uv_hrtime_atomic(void) MAYBE_INLINE; | |
uint64_t uv_hrtime_atomic(void) { | |
static mach_timebase_info_data_t info = {0}; | |
/* by the time denom is not 0, the structure will have been fully | |
* updated and no more atomic accesses are necessary */ | |
if (__atomic_load_n(&info.denom, __ATOMIC_SEQ_CST) == 0) { | |
mach_timebase_info_data_t tempinfo; | |
if (mach_timebase_info(&tempinfo) != KERN_SUCCESS) { | |
abort(); | |
} | |
__atomic_store_n(&info.numer, tempinfo.numer, __ATOMIC_SEQ_CST); | |
__atomic_store_n(&info.denom, tempinfo.denom, __ATOMIC_SEQ_CST); | |
} | |
return mach_absolute_time() * info.numer / info.denom; | |
} | |
uint64_t uv_hrtime_atomic_bopt(void) MAYBE_INLINE; | |
uint64_t uv_hrtime_atomic_bopt(void) { | |
static mach_timebase_info_data_t info = {0}; | |
/* by the time denom is not 0, the structure will have been fully | |
* updated and no more atomic accesses are necessary */ | |
if (unlikely(__atomic_load_n(&info.denom, __ATOMIC_SEQ_CST) == 0)) { | |
mach_timebase_info_data_t tempinfo; | |
if (mach_timebase_info(&tempinfo) != KERN_SUCCESS) { | |
abort(); | |
} | |
__atomic_store_n(&info.numer, tempinfo.numer, __ATOMIC_SEQ_CST); | |
__atomic_store_n(&info.denom, tempinfo.denom, __ATOMIC_SEQ_CST); | |
} | |
return mach_absolute_time() * info.numer / info.denom; | |
} | |
uint64_t uv_hrtime_atomic_bopt_aligned(void) MAYBE_INLINE __attribute__((aligned(8))); | |
uint64_t uv_hrtime_atomic_bopt_aligned(void) { | |
static mach_timebase_info_data_t info __attribute__((aligned(8))) = {0}; | |
/* by the time denom is not 0, the structure will have been fully | |
* updated and no more atomic accesses are necessary */ | |
if (unlikely(__atomic_load_n(&info.denom, __ATOMIC_SEQ_CST) == 0)) { | |
mach_timebase_info_data_t tempinfo; | |
if (mach_timebase_info(&tempinfo) != KERN_SUCCESS) { | |
abort(); | |
} | |
__atomic_store_n(&info.numer, tempinfo.numer, __ATOMIC_SEQ_CST); | |
__atomic_store_n(&info.denom, tempinfo.denom, __ATOMIC_SEQ_CST); | |
} | |
return mach_absolute_time() * info.numer / info.denom; | |
} | |
// returns dummy calculated value, print it out if you don't want the | |
// compiler to be clever | |
static uint64_t loopfn(size_t it, uint64_t (*fn)(void)) FORCE_INLINE; | |
static uint64_t loopfn(size_t it, uint64_t (*fn)(void)) { | |
uint64_t ts = 0; | |
for (size_t i = 0; i < it; ++i) { | |
ts += fn(); | |
} | |
return ts; | |
} | |
struct bench_ret { | |
uint64_t avg; /* avg runtime of a call to fn */ | |
uint64_t total; /* total runtime of calling fn iteration times */ | |
}; | |
// returns the average time (in ns) per call | |
static struct bench_ret benchfn(size_t it, uint64_t (*fn)(void), struct bench_ret lb, const char *name) FORCE_INLINE; | |
static struct bench_ret benchfn(size_t it, uint64_t (*fn)(void), struct bench_ret lb, const char *name) { | |
uint64_t start = uv_hrtime(); | |
uint64_t dummy = loopfn(it, fn); | |
uint64_t total = uv_hrtime() - start; | |
uint64_t avg = total / it; | |
if (lb.total) { | |
printf("%-25s took %10llu ns, %3llu ns per call, %6.1lf%% of lower-bound, dummy %llu\n", | |
name, total, avg, ((double) total / (double) lb.total) * 100.0, dummy); | |
} | |
else { | |
printf("%-25s took %10llu ns, %3llu ns per call, dummy %llu\n", | |
name, total, avg, dummy); | |
} | |
return (struct bench_ret) {.avg = avg, .total = total }; | |
} | |
int main() { | |
size_t it = 20000000; | |
struct bench_ret lb = benchfn(it, mach_absolute_time, (struct bench_ret) {0}, "mach_absolute_time"); | |
benchfn(it, uv_hrtime_atomic_bopt, lb, "atomic + unlikely"); | |
benchfn(it, uv_hrtime_atomic_bopt_aligned, lb, "atomic + unlikely + align"); | |
benchfn(it, uv_hrtime_fedor, lb, "ACCESS_ONCE"); | |
benchfn(it, uv_hrtime_mutex, lb, "pthread mutex"); | |
benchfn(it, uv_hrtime_once, lb, "pthread once"); | |
benchfn(it, uv_hrtime, lb, "current libuv"); | |
benchfn(it, uv_hrtime_non_threadsafe, lb, "non threadsafe"); | |
benchfn(it, uv_hrtime_atomic, lb, "atomic"); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment