Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Benchmark showing how locks sharing a cache line will contend with each other
#import <Foundation/Foundation.h>
#import <time.h>
#import <os/lock.h>
#define ITERS 2000
#define NSEC_PER_ITER(time) (((double)time * (double)NSEC_PER_SEC) / (double)ITERS)
#define TEST(body, name) do {\
start = [NSDate date];\
for (int i = 0; i < ITERS; i++) {\
body\
}\
elapsed = -[start timeIntervalSinceNow];\
if (baseline == 0) printf("Baseline of %f nanoseconds to repeatedly lock-unlock 16 striped unfair locks\n", NSEC_PER_ITER(elapsed));\
else printf("It was %.2fx as fast to use %s \n", baseline / elapsed, name);\
} while (0)
static char unusedBuffer1[16384] = { 1 };
static os_unfair_lock packed[16] __attribute__((aligned(64))) = { OS_UNFAIR_LOCK_INIT };
static char unusedBuffer2[16384] = { 1 };
static char unusedBuffer3[16384] = { 1 };
static os_unfair_lock spread[16 * 16] __attribute__((aligned(64))) = { OS_UNFAIR_LOCK_INIT };
static char unusedBuffer4[16384] = { 1 };
int main() {
NSDate *start = nil;
NSTimeInterval elapsed = 0;
NSTimeInterval baseline = 0;
void (^packedTest)() = ^{
dispatch_apply(16, dispatch_get_global_queue(0,0), ^(size_t idx) {
for (int i = 0; i < 1000; i++) {
os_unfair_lock_lock(&packed[idx]);
os_unfair_lock_unlock(&packed[idx]);
}
});
};
TEST(packedTest();, "packed");
baseline = elapsed;
void (^spreadTest)() = ^{
dispatch_apply(16, dispatch_get_global_queue(0,0), ^(size_t idx) {
for (int i = 0; i < 1000; i++) {
//only use 1 lock out of every 16 slots. 4 bytes * 16 is 64 bytes, which is the size of a cacheline
os_unfair_lock_lock(&spread[idx * 16]);
os_unfair_lock_unlock(&spread[idx * 16]);
}
});
};
TEST(spreadTest();, "spread locks"); //On my laptop, this is ~5.5x as fast as the previous one, despite doing "the same" work
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.