-
-
Save heatd/49c9be23ccb1f4ad8dfeac231da2647a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Install github.com/google/benchmark and compile using c++ -O2 -lbenchmark | |
#include <cstdint> | |
#include <benchmark/benchmark.h> | |
typedef uint8_t u8; | |
typedef uint16_t u16; | |
typedef uint32_t u32; | |
typedef uint64_t u64; | |
#define CONFIG_64BIT | |
#define LOCK_PREFIX "lock;" | |
extern void __cmpxchg_wrong_size(void); | |
#define __X86_CASE_B 1 | |
#define __X86_CASE_W 2 | |
#define __X86_CASE_L 4 | |
#ifdef CONFIG_64BIT | |
#define __X86_CASE_Q 8 | |
#else | |
#define __X86_CASE_Q -1 /* sizeof will never return -1 */ | |
#endif | |
/* | |
* Atomic compare and exchange. Compare OLD with MEM, if identical, | |
* store NEW in MEM. Return the initial value in MEM. Success is | |
* indicated by comparing RETURN with OLD. | |
*/ | |
#define __raw_cmpxchg(ptr, old, new, size, lock) \ | |
({ \ | |
__typeof__(*(ptr)) __ret; \ | |
__typeof__(*(ptr)) __old = (old); \ | |
__typeof__(*(ptr)) __new = (new); \ | |
switch (size) { \ | |
case __X86_CASE_B: \ | |
{ \ | |
volatile u8 *__ptr = (volatile u8 *)(ptr); \ | |
asm volatile(lock "cmpxchgb %2,%1" \ | |
: "=a" (__ret), "+m" (*__ptr) \ | |
: "q" (__new), "0" (__old) \ | |
: "memory"); \ | |
break; \ | |
} \ | |
case __X86_CASE_W: \ | |
{ \ | |
volatile u16 *__ptr = (volatile u16 *)(ptr); \ | |
asm volatile(lock "cmpxchgw %2,%1" \ | |
: "=a" (__ret), "+m" (*__ptr) \ | |
: "r" (__new), "0" (__old) \ | |
: "memory"); \ | |
break; \ | |
} \ | |
case __X86_CASE_L: \ | |
{ \ | |
volatile u32 *__ptr = (volatile u32 *)(ptr); \ | |
asm volatile(lock "cmpxchgl %2,%1" \ | |
: "=a" (__ret), "+m" (*__ptr) \ | |
: "r" (__new), "0" (__old) \ | |
: "memory"); \ | |
break; \ | |
} \ | |
case __X86_CASE_Q: \ | |
{ \ | |
volatile u64 *__ptr = (volatile u64 *)(ptr); \ | |
asm volatile(lock "cmpxchgq %2,%1" \ | |
: "=a" (__ret), "+m" (*__ptr) \ | |
: "r" (__new), "0" (__old) \ | |
: "memory"); \ | |
break; \ | |
} \ | |
default: \ | |
__cmpxchg_wrong_size(); \ | |
} \ | |
__ret; \ | |
}) | |
#define __cmpxchg(ptr, old, new, size) \ | |
__raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX) | |
#define __sync_cmpxchg(ptr, old, new, size) \ | |
__raw_cmpxchg((ptr), (old), (new), (size), "lock; ") | |
#define __cmpxchg_local(ptr, old, new, size) \ | |
__raw_cmpxchg((ptr), (old), (new), (size), "") | |
alignas(16) char input[16 * 1024] = {}; | |
template<int op> | |
void do_benchmark(benchmark::State& state) { | |
unsigned n = 0; | |
u64* p = reinterpret_cast<u64*>(input); | |
constexpr unsigned count = sizeof input / sizeof(u64); | |
unsigned i = 0; | |
u64 *target; | |
for(auto _ : state) { | |
u64 v{0}; | |
target = p + (i++ % count); | |
if constexpr (op == 0) | |
n += __cmpxchg_local(target, 0, 1, sizeof(u64)); | |
else if (op == 1) | |
n += __cmpxchg(target, 0, 1, sizeof(u64)); | |
else | |
{ | |
if (*(volatile u64 *) target == 0) | |
*(volatile u64*) target = 1; | |
__asm__ __volatile__("":::"memory"); | |
} | |
} | |
benchmark::DoNotOptimize(n); | |
} | |
BENCHMARK_TEMPLATE(do_benchmark, 0); | |
BENCHMARK_TEMPLATE(do_benchmark, 1); | |
BENCHMARK_TEMPLATE(do_benchmark, 2); | |
BENCHMARK_MAIN(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment