Skip to content

Instantly share code, notes, and snippets.

@joshring
Created February 26, 2017 23:29
Show Gist options
  • Save joshring/b321fa162d19f8006c0c5d5395965f0b to your computer and use it in GitHub Desktop.
Save joshring/b321fa162d19f8006c0c5d5395965f0b to your computer and use it in GitHub Desktop.
/* Spin lock using xchg. Added backoff wait to avoid concurrent lock/unlock
* operation.
* Original code copied from http://locklessinc.com/articles/locks/
*
*
* Adapted from: https://github.com/cyfdecyf/spinlock/tree/54943d3b16c15701ddaccffee0ac3a124160cd4a
* Optimised the backoff incrments to match the intel memory model, improved performance ~30%
* Generally simplified and cleaned up, made into a self contained example
*/
#include <pthread.h>
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <sys/time.h>
// Compile read-write barrier
#define barrier() asm volatile("": : :"memory")
// Pause instruction to prevent excess processor bus usage
#define cpu_relax() asm volatile("pause\n": : :"memory")
static inline unsigned short
xchg_8(void *ptr, unsigned char x) {
__asm__ __volatile__("xchgb %0,%1"
:"=r" (x)
:"m" (*(volatile unsigned char *)ptr), "0" (x)
:"memory");
return x;
}
#define BUSY 1
typedef unsigned char spinlock;
static inline void spin_lock(spinlock *lock) {
int wait = 1;
while (1)
{
if (!xchg_8(lock, BUSY)) return;
// Waiting here is important to conserve CPU memory bandwidth
for (int i = 0; i < wait; i++)
cpu_relax();
while (*lock)
{
// Wait ~75-300 cycles for L3 cache flush, http://stackoverflow.com/questions/4087280/approximate-cost-to-access-various-caches-and-main-memory
wait += 75;
for (int i = 0; i < wait; i++)
cpu_relax();
}
}
}
static inline void spin_unlock(spinlock *lock) {
barrier();
*lock = 0;
}
static inline int spin_trylock(spinlock *lock) {
return xchg_8(lock, BUSY);
}
//-------------------------------------------------------------
// Global variables, including the lock variable
const int size = 131072;
int32_t counter[size];
spinlock sl = 0; // <<<<----- Lock variable -------
int nthr = 0;
static struct timeval start_time;
static struct timeval end_time;
//-------------------------------------------------------------
// Timing, using linux internal performance counters
static void
calc_time (struct timeval *start, struct timeval *end)
{
if (end->tv_usec < start->tv_usec)
{
end->tv_sec -= 1;
end->tv_usec += 1000000;
}
assert(end->tv_sec >= start->tv_sec);
assert(end->tv_usec >= start->tv_usec);
struct timeval interval = {
end->tv_sec - start->tv_sec,
end->tv_usec - start->tv_usec
};
printf("%lu.%lu\n", (long)interval.tv_sec, (long)interval.tv_usec);
}
//-------------------------------------------------------------
// Thread testing function
void *
inc_thread(void *id)
{
long cpuNum = (long)id;
long n = size / nthr;
long start = (cpuNum * n);
long stop = ((1+cpuNum) * n);
// Start lock & unlock test.
for (long i = 0; i < (long)(8192); i++) {
spin_lock(&sl);
for (int j = start; j < stop; j++)
counter[j]++;
spin_unlock(&sl);
}
return NULL;
}
//-------------------------------------------------------------
int main(int argc, const char *argv[]) {
pthread_t *thr;
if (argc != 2) {
printf("Usage: %s <num of threads>\n", argv[0]);
exit(1);
}
nthr = atoi(argv[1]);
thr = calloc(sizeof(*thr), nthr);
gettimeofday(&start_time, NULL);
// Start thread dispatch
for (long i = 0; i < nthr; i++) {
if (pthread_create(&thr[i], NULL, inc_thread, (void *)i) != 0) {
perror("thread creating failed");
}
}
// Join threads
for (long i = 0; i < nthr; i++)
pthread_join(thr[i], NULL);
gettimeofday(&end_time, NULL);
calc_time(&start_time, &end_time);
long start = 0;
long stop = size;
printf("\nSingle threaded benchmark:\t");
gettimeofday(&start_time, NULL);
for (long i = 0; i < (long)(8192); i++) {
for (int j = start; j < stop; j++)
counter[j]++;
}
gettimeofday(&end_time, NULL);
calc_time(&start_time, &end_time);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment