Created
March 11, 2017 12:51
-
-
Save joshring/b1b42841f14abe76999f448348256638 to your computer and use it in GitHub Desktop.
seqlocks vs openmp locks, seqlocks perform better due to lock-free read and a faster writing lock.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// OpenMP locks vs spinlocks with "holdoff" to avoid excessive CPU traffic | |
// Reads are attempted inside the lock with OpenMP, which is the best case, compared to in a contested state with spin locks | |
// Performance is still x2 greater for the spinlocks | |
// compile with: gcc -O2 -fopenmp filename.c -o filename | |
// run with: ./filename 4 or however many CPUs, by default it will use 4. | |
#include <stdio.h> | |
#include <omp.h> | |
#include <stdint.h> | |
#include <stdlib.h> | |
// Object used for openMP lock | |
typedef struct{ | |
double val; | |
omp_lock_t lock; | |
}object; | |
// Object used for Seqlock | |
typedef struct{ | |
double val; | |
uint8_t users; | |
uint8_t writers; | |
}object2; | |
int main(int argc, char* argv[]) | |
{ | |
int cpu = 4; | |
if( argc > 1) | |
cpu = atoi(argv[1]); | |
object a; | |
a.val = 0; | |
omp_init_lock(&a.lock); | |
unsigned iter = 30000000; | |
double temp, start, stop; | |
start = omp_get_wtime(); | |
#pragma omp parallel shared(temp) num_threads(cpu) | |
{ | |
omp_init_lock(&a.lock); // has to be initialised inside a parallel section | |
#pragma omp for | |
for(unsigned i=0; i<iter; i++) | |
{ | |
// Best case scenario where the read is inside the write lock region | |
omp_set_lock(&a.lock); | |
a.val = a.val + 1; | |
temp = a.val; | |
omp_unset_lock(&a.lock); | |
} | |
omp_destroy_lock(&a.lock); | |
} | |
stop = omp_get_wtime(); | |
printf("%f\n", temp); | |
printf("%f\n", a.val); | |
printf("openMP locked write and read in a single lock, time = %f seconds\n\n", stop-start); | |
a.val = 0; | |
omp_init_lock(&a.lock); | |
start = omp_get_wtime(); | |
#pragma omp parallel shared(temp) num_threads(cpu) | |
{ | |
omp_init_lock(&a.lock); // has to be initialised inside a parallel section | |
#pragma omp for | |
for(unsigned i=0; i<iter; i++) | |
{ | |
// Lock writing to the variable. | |
omp_set_lock(&a.lock); | |
a.val = a.val + 1; | |
omp_unset_lock(&a.lock); | |
// Separate lock for reading, simulating contention. | |
omp_set_lock(&a.lock); | |
temp = a.val; | |
omp_unset_lock(&a.lock); | |
} | |
omp_destroy_lock(&a.lock); | |
} | |
stop = omp_get_wtime(); | |
printf("%f\n", temp); | |
printf("%f\n", a.val); | |
printf("openMP locked write and read in two separate locks, time = %f seconds\n\n", stop-start); | |
object2 c; | |
c.writers = 0; | |
c.users = 0; | |
c.val = 0.0; | |
start = omp_get_wtime(); | |
#pragma omp parallel shared(temp) num_threads(cpu) | |
{ | |
unsigned wait = 16; | |
double buffer = 0; | |
#pragma omp for | |
for(unsigned i=0; i<iter; i++) | |
{ | |
//===================================================================== | |
// Writelock | |
// Increments the number of users, the original value is returned for | |
// comparison to the number of writers | |
//--------------------------------------------------------------------- | |
uint8_t users = __sync_fetch_and_add(&c.users, 1); | |
while(users != c.writers) | |
{ | |
for(int cnt=0; cnt < wait; cnt++) | |
__asm__ __volatile__("pause\n": : :"memory"); | |
} | |
//===================================================================== | |
c.val += 1; | |
//===================================================================== | |
// Write unlock | |
// Sets writers == users again, lock is free | |
//--------------------------------------------------------------------- | |
uint8_t writers = __sync_fetch_and_add(&c.writers, 1); | |
//===================================================================== | |
//===================================================================== | |
// Lock free reads | |
//--------------------------------------------------------------------- | |
uint8_t state1=0, state2=1; | |
for(;;) | |
{ | |
state1 = c.users; | |
buffer = c.val; | |
state2 = c.users; | |
if( (c.writers == c.users) ) | |
if( (state1 == state2) ) | |
{ | |
temp = buffer; | |
break; | |
} | |
// Holdoff querying the state for wait# of cycles to reduce traffic | |
// (performance critical region) | |
for (unsigned count = 0; count < wait; count++) | |
__asm__ __volatile__("pause\n": : :"memory"); | |
} | |
//===================================================================== | |
} | |
} | |
stop = omp_get_wtime(); | |
printf("temp = %f\n", temp); | |
printf("c.val = %f\n", c.val); | |
printf("Seqlock used to lock during write, with separate lock-free read %f\n", stop-start); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment