adah1972/ordering.cpp

## ordering.cpp
// Jeff Preshing's example that shows the reordering of Intel CPUs made to
// work on both Linux and macOS.
//
// Original source:
//
//   https://preshing.com/20120515/memory-reordering-caught-in-the-act/
//
// Apple semaphore change is based on the answer by dho at:
//
//   https://stackoverflow.com/questions/27736618/why-are-sem-init-sem-getvalue-sem-destroy-deprecated-on-mac-os-x-and-w
//

#include <errno.h>
#include <pthread.h>
#include <stdint.h>
#include <stdio.h>

// Set either of these to 1 to prevent CPU reordering
#define USE_CPU_FENCE         0
#define USE_SINGLE_HW_THREAD  0  // Supported on Linux, but not Cygwin or PS3

#if USE_SINGLE_HW_THREAD
#include <sched.h>
#endif

#ifdef __APPLE__
#include <dispatch/dispatch.h>
#else
#include <semaphore.h>
#endif

struct rk_sema {
#ifdef __APPLE__
    dispatch_semaphore_t    sem;
#else
    sem_t                   sem;
#endif
};


static inline void
rk_sema_init(struct rk_sema *s, uint32_t value)
{
#ifdef __APPLE__
    dispatch_semaphore_t *sem = &s->sem;

    *sem = dispatch_semaphore_create(value);
#else
    sem_init(&s->sem, 0, value);
#endif
}

static inline void
rk_sema_wait(struct rk_sema *s)
{

#ifdef __APPLE__
    dispatch_semaphore_wait(s->sem, DISPATCH_TIME_FOREVER);
#else
    int r;

    do {
            r = sem_wait(&s->sem);
    } while (r == -1 && errno == EINTR);
#endif
}

static inline void
rk_sema_post(struct rk_sema *s)
{

#ifdef __APPLE__
    dispatch_semaphore_signal(s->sem);
#else
    sem_post(&s->sem);
#endif
}


//-------------------------------------
//  MersenneTwister
//  A thread-safe random number generator with good randomness
//  in a small number of instructions. We'll use it to introduce
//  random timing delays.
//-------------------------------------
#define MT_IA  397
#define MT_LEN 624

class MersenneTwister
{
    unsigned int m_buffer[MT_LEN];
    int m_index;

public:
    MersenneTwister(unsigned int seed);
    // Declare noinline so that the function call acts as a compiler barrier:
    unsigned int integer() __attribute__((noinline));
};

MersenneTwister::MersenneTwister(unsigned int seed)
{
    // Initialize by filling with the seed, then iterating
    // the algorithm a bunch of times to shuffle things up.
    for (int i = 0; i < MT_LEN; i++)
        m_buffer[i] = seed;
    m_index = 0;
    for (int i = 0; i < MT_LEN * 100; i++)
        integer();
}

unsigned int MersenneTwister::integer()
{
    // Indices
    int i = m_index;
    int i2 = m_index + 1; if (i2 >= MT_LEN) i2 = 0; // wrap-around
    int j = m_index + MT_IA; if (j >= MT_LEN) j -= MT_LEN; // wrap-around

    // Twist
    unsigned int s = (m_buffer[i] & 0x80000000) | (m_buffer[i2] & 0x7fffffff);
    unsigned int r = m_buffer[j] ^ (s >> 1) ^ ((s & 1) * 0x9908B0DF);
    m_buffer[m_index] = r;
    m_index = i2;

    // Swizzle
    r ^= (r >> 11);
    r ^= (r << 7) & 0x9d2c5680UL;
    r ^= (r << 15) & 0xefc60000UL;
    r ^= (r >> 18);
    return r;
}


//-------------------------------------
//  Main program, as decribed in the post
//-------------------------------------
rk_sema beginSema1;
rk_sema beginSema2;
rk_sema endSema;

int X;
int Y;
int r1, r2;

void *thread1Func(void *param)
{
    MersenneTwister random(1);
    for (;;)
    {
        rk_sema_wait(&beginSema1);  // Wait for signal
        while (random.integer() % 8 != 0) {}  // Random delay

        // ----- THE TRANSACTION! -----
        X = 1;
#if USE_CPU_FENCE
        asm volatile("mfence" ::: "memory");  // Prevent CPU reordering
#else
        asm volatile("" ::: "memory");  // Prevent compiler reordering
#endif
        r1 = Y;

        rk_sema_post(&endSema);  // Notify transaction complete
    }
    return NULL;  // Never returns
};

void *thread2Func(void *param)
{
    MersenneTwister random(2);
    for (;;)
    {
        rk_sema_wait(&beginSema2);  // Wait for signal
        while (random.integer() % 8 != 0) {}  // Random delay

        // ----- THE TRANSACTION! -----
        Y = 1;
#if USE_CPU_FENCE
        asm volatile("mfence" ::: "memory");  // Prevent CPU reordering
#else
        asm volatile("" ::: "memory");  // Prevent compiler reordering
#endif
        r2 = X;

        rk_sema_post(&endSema);  // Notify transaction complete
    }
    return NULL;  // Never returns
};

int main()
{
    // Initialize the semaphores
    rk_sema_init(&beginSema1, 0);
    rk_sema_init(&beginSema2, 0);
    rk_sema_init(&endSema, 0);

    // Spawn the threads
    pthread_t thread1, thread2;
    pthread_create(&thread1, NULL, thread1Func, NULL);
    pthread_create(&thread2, NULL, thread2Func, NULL);

#if USE_SINGLE_HW_THREAD
    // Force thread affinities to the same cpu core.
    cpu_set_t cpus;
    CPU_ZERO(&cpus);
    CPU_SET(0, &cpus);
    pthread_setaffinity_np(thread1, sizeof(cpu_set_t), &cpus);
    pthread_setaffinity_np(thread2, sizeof(cpu_set_t), &cpus);
#endif

    // Repeat the experiment ad infinitum
    int detected = 0;
    for (int iterations = 1; ; iterations++)
    {
        // Reset X and Y
        X = 0;
        Y = 0;
        // Signal both threads
        rk_sema_post(&beginSema1);
        rk_sema_post(&beginSema2);
        // Wait for both threads
        rk_sema_wait(&endSema);
        rk_sema_wait(&endSema);
        // Check if there was a simultaneous reorder
        if (r1 == 0 && r2 == 0)
        {
            detected++;
            printf("%d reorders detected after %d iterations\n", detected,
                   iterations);
        }
    }
    return 0;  // Never returns
}
	// Jeff Preshing's example that shows the reordering of Intel CPUs made to
	// work on both Linux and macOS.
	//
	// Original source:
	//
	// https://preshing.com/20120515/memory-reordering-caught-in-the-act/
	//
	// Apple semaphore change is based on the answer by dho at:
	//
	// https://stackoverflow.com/questions/27736618/why-are-sem-init-sem-getvalue-sem-destroy-deprecated-on-mac-os-x-and-w
	//

	#include <errno.h>
	#include <pthread.h>
	#include <stdint.h>
	#include <stdio.h>

	// Set either of these to 1 to prevent CPU reordering
	#define USE_CPU_FENCE 0
	#define USE_SINGLE_HW_THREAD 0 // Supported on Linux, but not Cygwin or PS3

	#if USE_SINGLE_HW_THREAD
	#include <sched.h>
	#endif

	#ifdef __APPLE__
	#include <dispatch/dispatch.h>
	#else
	#include <semaphore.h>
	#endif

	struct rk_sema {
	#ifdef __APPLE__
	dispatch_semaphore_t sem;
	#else
	sem_t sem;
	#endif
	};


	static inline void
	rk_sema_init(struct rk_sema *s, uint32_t value)
	{
	#ifdef __APPLE__
	dispatch_semaphore_t *sem = &s->sem;

	*sem = dispatch_semaphore_create(value);
	#else
	sem_init(&s->sem, 0, value);
	#endif
	}

	static inline void
	rk_sema_wait(struct rk_sema *s)
	{

	#ifdef __APPLE__
	dispatch_semaphore_wait(s->sem, DISPATCH_TIME_FOREVER);
	#else
	int r;

	do {
	r = sem_wait(&s->sem);
	} while (r == -1 && errno == EINTR);
	#endif
	}

	static inline void
	rk_sema_post(struct rk_sema *s)
	{

	#ifdef __APPLE__
	dispatch_semaphore_signal(s->sem);
	#else
	sem_post(&s->sem);
	#endif
	}


	//-------------------------------------
	// MersenneTwister
	// A thread-safe random number generator with good randomness
	// in a small number of instructions. We'll use it to introduce
	// random timing delays.
	//-------------------------------------
	#define MT_IA 397
	#define MT_LEN 624

	class MersenneTwister
	{
	unsigned int m_buffer[MT_LEN];
	int m_index;

	public:
	MersenneTwister(unsigned int seed);
	// Declare noinline so that the function call acts as a compiler barrier:
	unsigned int integer() __attribute__((noinline));
	};

	MersenneTwister::MersenneTwister(unsigned int seed)
	{
	// Initialize by filling with the seed, then iterating
	// the algorithm a bunch of times to shuffle things up.
	for (int i = 0; i < MT_LEN; i++)
	m_buffer[i] = seed;
	m_index = 0;
	for (int i = 0; i < MT_LEN * 100; i++)
	integer();
	}

	unsigned int MersenneTwister::integer()
	{
	// Indices
	int i = m_index;
	int i2 = m_index + 1; if (i2 >= MT_LEN) i2 = 0; // wrap-around
	int j = m_index + MT_IA; if (j >= MT_LEN) j -= MT_LEN; // wrap-around

	// Twist
	unsigned int s = (m_buffer[i] & 0x80000000) \| (m_buffer[i2] & 0x7fffffff);
	unsigned int r = m_buffer[j] ^ (s >> 1) ^ ((s & 1) * 0x9908B0DF);
	m_buffer[m_index] = r;
	m_index = i2;

	// Swizzle
	r ^= (r >> 11);
	r ^= (r << 7) & 0x9d2c5680UL;
	r ^= (r << 15) & 0xefc60000UL;
	r ^= (r >> 18);
	return r;
	}


	//-------------------------------------
	// Main program, as decribed in the post
	//-------------------------------------
	rk_sema beginSema1;
	rk_sema beginSema2;
	rk_sema endSema;

	int X;
	int Y;
	int r1, r2;

	void thread1Func(void param)
	{
	MersenneTwister random(1);
	for (;;)
	{
	rk_sema_wait(&beginSema1); // Wait for signal
	while (random.integer() % 8 != 0) {} // Random delay

	// ----- THE TRANSACTION! -----
	X = 1;
	#if USE_CPU_FENCE
	asm volatile("mfence" ::: "memory"); // Prevent CPU reordering
	#else
	asm volatile("" ::: "memory"); // Prevent compiler reordering
	#endif
	r1 = Y;

	rk_sema_post(&endSema); // Notify transaction complete
	}
	return NULL; // Never returns
	};

	void thread2Func(void param)
	{
	MersenneTwister random(2);
	for (;;)
	{
	rk_sema_wait(&beginSema2); // Wait for signal
	while (random.integer() % 8 != 0) {} // Random delay

	// ----- THE TRANSACTION! -----
	Y = 1;
	#if USE_CPU_FENCE
	asm volatile("mfence" ::: "memory"); // Prevent CPU reordering
	#else
	asm volatile("" ::: "memory"); // Prevent compiler reordering
	#endif
	r2 = X;

	rk_sema_post(&endSema); // Notify transaction complete
	}
	return NULL; // Never returns
	};

	int main()
	{
	// Initialize the semaphores
	rk_sema_init(&beginSema1, 0);
	rk_sema_init(&beginSema2, 0);
	rk_sema_init(&endSema, 0);

	// Spawn the threads
	pthread_t thread1, thread2;
	pthread_create(&thread1, NULL, thread1Func, NULL);
	pthread_create(&thread2, NULL, thread2Func, NULL);

	#if USE_SINGLE_HW_THREAD
	// Force thread affinities to the same cpu core.
	cpu_set_t cpus;
	CPU_ZERO(&cpus);
	CPU_SET(0, &cpus);
	pthread_setaffinity_np(thread1, sizeof(cpu_set_t), &cpus);
	pthread_setaffinity_np(thread2, sizeof(cpu_set_t), &cpus);
	#endif

	// Repeat the experiment ad infinitum
	int detected = 0;
	for (int iterations = 1; ; iterations++)
	{
	// Reset X and Y
	X = 0;
	Y = 0;
	// Signal both threads
	rk_sema_post(&beginSema1);
	rk_sema_post(&beginSema2);
	// Wait for both threads
	rk_sema_wait(&endSema);
	rk_sema_wait(&endSema);
	// Check if there was a simultaneous reorder
	if (r1 == 0 && r2 == 0)
	{
	detected++;
	printf("%d reorders detected after %d iterations\n", detected,
	iterations);
	}
	}
	return 0; // Never returns
	}