Skip to content

Instantly share code, notes, and snippets.

@heatd
Created August 8, 2023 14:08
Show Gist options
  • Save heatd/49c9be23ccb1f4ad8dfeac231da2647a to your computer and use it in GitHub Desktop.
Save heatd/49c9be23ccb1f4ad8dfeac231da2647a to your computer and use it in GitHub Desktop.
// Install github.com/google/benchmark and compile using c++ -O2 -lbenchmark
#include <cstdint>
#include <benchmark/benchmark.h>
typedef uint8_t u8;
typedef uint16_t u16;
typedef uint32_t u32;
typedef uint64_t u64;
#define CONFIG_64BIT
#define LOCK_PREFIX "lock;"
extern void __cmpxchg_wrong_size(void);
#define __X86_CASE_B 1
#define __X86_CASE_W 2
#define __X86_CASE_L 4
#ifdef CONFIG_64BIT
#define __X86_CASE_Q 8
#else
#define __X86_CASE_Q -1 /* sizeof will never return -1 */
#endif
/*
* Atomic compare and exchange. Compare OLD with MEM, if identical,
* store NEW in MEM. Return the initial value in MEM. Success is
* indicated by comparing RETURN with OLD.
*/
#define __raw_cmpxchg(ptr, old, new, size, lock) \
({ \
__typeof__(*(ptr)) __ret; \
__typeof__(*(ptr)) __old = (old); \
__typeof__(*(ptr)) __new = (new); \
switch (size) { \
case __X86_CASE_B: \
{ \
volatile u8 *__ptr = (volatile u8 *)(ptr); \
asm volatile(lock "cmpxchgb %2,%1" \
: "=a" (__ret), "+m" (*__ptr) \
: "q" (__new), "0" (__old) \
: "memory"); \
break; \
} \
case __X86_CASE_W: \
{ \
volatile u16 *__ptr = (volatile u16 *)(ptr); \
asm volatile(lock "cmpxchgw %2,%1" \
: "=a" (__ret), "+m" (*__ptr) \
: "r" (__new), "0" (__old) \
: "memory"); \
break; \
} \
case __X86_CASE_L: \
{ \
volatile u32 *__ptr = (volatile u32 *)(ptr); \
asm volatile(lock "cmpxchgl %2,%1" \
: "=a" (__ret), "+m" (*__ptr) \
: "r" (__new), "0" (__old) \
: "memory"); \
break; \
} \
case __X86_CASE_Q: \
{ \
volatile u64 *__ptr = (volatile u64 *)(ptr); \
asm volatile(lock "cmpxchgq %2,%1" \
: "=a" (__ret), "+m" (*__ptr) \
: "r" (__new), "0" (__old) \
: "memory"); \
break; \
} \
default: \
__cmpxchg_wrong_size(); \
} \
__ret; \
})
#define __cmpxchg(ptr, old, new, size) \
__raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX)
#define __sync_cmpxchg(ptr, old, new, size) \
__raw_cmpxchg((ptr), (old), (new), (size), "lock; ")
#define __cmpxchg_local(ptr, old, new, size) \
__raw_cmpxchg((ptr), (old), (new), (size), "")
alignas(16) char input[16 * 1024] = {};
template<int op>
void do_benchmark(benchmark::State& state) {
unsigned n = 0;
u64* p = reinterpret_cast<u64*>(input);
constexpr unsigned count = sizeof input / sizeof(u64);
unsigned i = 0;
u64 *target;
for(auto _ : state) {
u64 v{0};
target = p + (i++ % count);
if constexpr (op == 0)
n += __cmpxchg_local(target, 0, 1, sizeof(u64));
else if (op == 1)
n += __cmpxchg(target, 0, 1, sizeof(u64));
else
{
if (*(volatile u64 *) target == 0)
*(volatile u64*) target = 1;
__asm__ __volatile__("":::"memory");
}
}
benchmark::DoNotOptimize(n);
}
BENCHMARK_TEMPLATE(do_benchmark, 0);
BENCHMARK_TEMPLATE(do_benchmark, 1);
BENCHMARK_TEMPLATE(do_benchmark, 2);
BENCHMARK_MAIN();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment