Created
February 26, 2017 23:29
-
-
Save joshring/b321fa162d19f8006c0c5d5395965f0b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* Spin lock using xchg. Added backoff wait to avoid concurrent lock/unlock | |
* operation. | |
* Original code copied from http://locklessinc.com/articles/locks/ | |
* | |
* | |
* Adapted from: https://github.com/cyfdecyf/spinlock/tree/54943d3b16c15701ddaccffee0ac3a124160cd4a | |
* Optimised the backoff incrments to match the intel memory model, improved performance ~30% | |
* Generally simplified and cleaned up, made into a self contained example | |
*/ | |
#include <pthread.h> | |
#include <assert.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <stdint.h> | |
#include <sys/time.h> | |
// Compile read-write barrier | |
#define barrier() asm volatile("": : :"memory") | |
// Pause instruction to prevent excess processor bus usage | |
#define cpu_relax() asm volatile("pause\n": : :"memory") | |
static inline unsigned short | |
xchg_8(void *ptr, unsigned char x) { | |
__asm__ __volatile__("xchgb %0,%1" | |
:"=r" (x) | |
:"m" (*(volatile unsigned char *)ptr), "0" (x) | |
:"memory"); | |
return x; | |
} | |
#define BUSY 1 | |
typedef unsigned char spinlock; | |
static inline void spin_lock(spinlock *lock) { | |
int wait = 1; | |
while (1) | |
{ | |
if (!xchg_8(lock, BUSY)) return; | |
// Waiting here is important to conserve CPU memory bandwidth | |
for (int i = 0; i < wait; i++) | |
cpu_relax(); | |
while (*lock) | |
{ | |
// Wait ~75-300 cycles for L3 cache flush, http://stackoverflow.com/questions/4087280/approximate-cost-to-access-various-caches-and-main-memory | |
wait += 75; | |
for (int i = 0; i < wait; i++) | |
cpu_relax(); | |
} | |
} | |
} | |
static inline void spin_unlock(spinlock *lock) { | |
barrier(); | |
*lock = 0; | |
} | |
static inline int spin_trylock(spinlock *lock) { | |
return xchg_8(lock, BUSY); | |
} | |
//------------------------------------------------------------- | |
// Global variables, including the lock variable | |
const int size = 131072; | |
int32_t counter[size]; | |
spinlock sl = 0; // <<<<----- Lock variable ------- | |
int nthr = 0; | |
static struct timeval start_time; | |
static struct timeval end_time; | |
//------------------------------------------------------------- | |
// Timing, using linux internal performance counters | |
static void | |
calc_time (struct timeval *start, struct timeval *end) | |
{ | |
if (end->tv_usec < start->tv_usec) | |
{ | |
end->tv_sec -= 1; | |
end->tv_usec += 1000000; | |
} | |
assert(end->tv_sec >= start->tv_sec); | |
assert(end->tv_usec >= start->tv_usec); | |
struct timeval interval = { | |
end->tv_sec - start->tv_sec, | |
end->tv_usec - start->tv_usec | |
}; | |
printf("%lu.%lu\n", (long)interval.tv_sec, (long)interval.tv_usec); | |
} | |
//------------------------------------------------------------- | |
// Thread testing function | |
void * | |
inc_thread(void *id) | |
{ | |
long cpuNum = (long)id; | |
long n = size / nthr; | |
long start = (cpuNum * n); | |
long stop = ((1+cpuNum) * n); | |
// Start lock & unlock test. | |
for (long i = 0; i < (long)(8192); i++) { | |
spin_lock(&sl); | |
for (int j = start; j < stop; j++) | |
counter[j]++; | |
spin_unlock(&sl); | |
} | |
return NULL; | |
} | |
//------------------------------------------------------------- | |
int main(int argc, const char *argv[]) { | |
pthread_t *thr; | |
if (argc != 2) { | |
printf("Usage: %s <num of threads>\n", argv[0]); | |
exit(1); | |
} | |
nthr = atoi(argv[1]); | |
thr = calloc(sizeof(*thr), nthr); | |
gettimeofday(&start_time, NULL); | |
// Start thread dispatch | |
for (long i = 0; i < nthr; i++) { | |
if (pthread_create(&thr[i], NULL, inc_thread, (void *)i) != 0) { | |
perror("thread creating failed"); | |
} | |
} | |
// Join threads | |
for (long i = 0; i < nthr; i++) | |
pthread_join(thr[i], NULL); | |
gettimeofday(&end_time, NULL); | |
calc_time(&start_time, &end_time); | |
long start = 0; | |
long stop = size; | |
printf("\nSingle threaded benchmark:\t"); | |
gettimeofday(&start_time, NULL); | |
for (long i = 0; i < (long)(8192); i++) { | |
for (int j = start; j < stop; j++) | |
counter[j]++; | |
} | |
gettimeofday(&end_time, NULL); | |
calc_time(&start_time, &end_time); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment