Catfish-Man/lockcachecontention.m

## lockcachecontention.m
#import <Foundation/Foundation.h>
#import <time.h>
#import <os/lock.h>

#define ITERS 2000
#define NSEC_PER_ITER(time) (((double)time * (double)NSEC_PER_SEC) / (double)ITERS)

#define TEST(body, name) do {\
    start = [NSDate date];\
    for (int i = 0; i < ITERS; i++) {\
        body\
    }\
    elapsed = -[start timeIntervalSinceNow];\
    if (baseline == 0) printf("Baseline of %f nanoseconds to repeatedly lock-unlock 16 striped unfair locks\n", NSEC_PER_ITER(elapsed));\
    else printf("It was %.2fx as fast to use %s \n", baseline / elapsed, name);\
} while (0)

static char unusedBuffer1[16384] = { 1 };
static os_unfair_lock packed[16] __attribute__((aligned(64))) = { OS_UNFAIR_LOCK_INIT };
static char unusedBuffer2[16384] = { 1 };

static char unusedBuffer3[16384] = { 1 };
static os_unfair_lock spread[16 * 16] __attribute__((aligned(64))) = { OS_UNFAIR_LOCK_INIT };
static char unusedBuffer4[16384] = { 1 };

int main() {
    NSDate *start = nil;
    NSTimeInterval elapsed = 0;
    NSTimeInterval baseline = 0;

    void (^packedTest)() = ^{
        dispatch_apply(16, dispatch_get_global_queue(0,0), ^(size_t idx) {
            for (int i = 0; i < 1000; i++) {
                os_unfair_lock_lock(&packed[idx]);
                os_unfair_lock_unlock(&packed[idx]);
            }
        });
    };

    TEST(packedTest();, "packed");

    baseline = elapsed;

    void (^spreadTest)() = ^{
        dispatch_apply(16, dispatch_get_global_queue(0,0), ^(size_t idx) {
            for (int i = 0; i < 1000; i++) {
                //only use 1 lock out of every 16 slots. 4 bytes * 16 is 64 bytes, which is the size of a cacheline
                os_unfair_lock_lock(&spread[idx * 16]);
                os_unfair_lock_unlock(&spread[idx * 16]);
            }
        });
    };

    TEST(spreadTest();, "spread locks"); //On my laptop, this is ~5.5x as fast as the previous one, despite doing "the same" work
}
	#import <Foundation/Foundation.h>
	#import <time.h>
	#import <os/lock.h>

	#define ITERS 2000
	#define NSEC_PER_ITER(time) (((double)time * (double)NSEC_PER_SEC) / (double)ITERS)

	#define TEST(body, name) do {\
	start = [NSDate date];\
	for (int i = 0; i < ITERS; i++) {\
	body\
	}\
	elapsed = -[start timeIntervalSinceNow];\
	if (baseline == 0) printf("Baseline of %f nanoseconds to repeatedly lock-unlock 16 striped unfair locks\n", NSEC_PER_ITER(elapsed));\
	else printf("It was %.2fx as fast to use %s \n", baseline / elapsed, name);\
	} while (0)

	static char unusedBuffer1[16384] = { 1 };
	static os_unfair_lock packed[16] __attribute__((aligned(64))) = { OS_UNFAIR_LOCK_INIT };
	static char unusedBuffer2[16384] = { 1 };

	static char unusedBuffer3[16384] = { 1 };
	static os_unfair_lock spread[16 * 16] __attribute__((aligned(64))) = { OS_UNFAIR_LOCK_INIT };
	static char unusedBuffer4[16384] = { 1 };

	int main() {
	NSDate *start = nil;
	NSTimeInterval elapsed = 0;
	NSTimeInterval baseline = 0;

	void (^packedTest)() = ^{
	dispatch_apply(16, dispatch_get_global_queue(0,0), ^(size_t idx) {
	for (int i = 0; i < 1000; i++) {
	os_unfair_lock_lock(&packed[idx]);
	os_unfair_lock_unlock(&packed[idx]);
	}
	});
	};

	TEST(packedTest();, "packed");

	baseline = elapsed;

	void (^spreadTest)() = ^{
	dispatch_apply(16, dispatch_get_global_queue(0,0), ^(size_t idx) {
	for (int i = 0; i < 1000; i++) {
	//only use 1 lock out of every 16 slots. 4 bytes * 16 is 64 bytes, which is the size of a cacheline
	os_unfair_lock_lock(&spread[idx * 16]);
	os_unfair_lock_unlock(&spread[idx * 16]);
	}
	});
	};

	TEST(spreadTest();, "spread locks"); //On my laptop, this is ~5.5x as fast as the previous one, despite doing "the same" work
	}