Skip to content

Instantly share code, notes, and snippets.

@brouhaha
Created December 6, 2014 03:02
Show Gist options
  • Save brouhaha/62f2178d12ec04a81078 to your computer and use it in GitHub Desktop.
Save brouhaha/62f2178d12ec04a81078 to your computer and use it in GitHub Desktop.
Test misaligned reads and writes spanning cache line boundaries
// Test misaligned reads and writes spanning cache line boundaries
// 2014-12-05 Eric Smith <spacewar@gmail.com>
// This program demonstrates that on an AMD FX-8350, and presumably
// other x86_64 processors, misaligned 64-bit reads and/or writes
// which span a cache line boundary are not atomic. For a
// "simultaneous" write and read of a misaligned value, the read may
// return a value that is partially the pre-write value, and partially
// the written value.
// At least on an AMD FX-8350, it appears that misaligned reads and writes
// that do not span cache line boundaries are atomic.
#include <inttypes.h>
#include <pthread.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <sysexits.h>
#include <unistd.h>
#define CACHE_LINE_SIZE 64
volatile bool stop;
volatile uint64_t *p;
void *producer(void *arg)
{
int id = *((int *) arg);
uint64_t count = 0;
while (! stop)
{
*p = (count & 0xff) * 0x0101010101010101ULL;
count++;
}
fprintf(stderr, "producer %d iterations: %" PRIu64 "d\n", id, count);
return 0;
}
void *consumer(void *arg)
{
int id = *((int *) arg);
uint8_t b;
uint64_t v;
uint64_t count = 0;
while (! stop)
{
v = *p;
b = v & 0xff;
if (v != (b * 0x0101010101010101ULL))
{
fprintf(stderr, "consumer %d iteration %" PRIu64 " read %" PRIx64 "\n", id, count, v);
//stop = true;
}
count++;
}
fprintf(stderr, "consumer %d iterations: %" PRIu64 "\n", id, count);
return 0;
}
#define MAX_THREADS 100
int producer_count;
int consumer_count;
int thread_arg [MAX_THREADS];
pthread_t thread [MAX_THREADS];
int main(int argc, char **argv)
{
int i;
int thread_num;
uint8_t *buf;
(void) argc;
(void) argv;
producer_count = 2;
consumer_count = 2;
if (posix_memalign((void **) & buf, CACHE_LINE_SIZE, 2 * CACHE_LINE_SIZE))
{
fprintf (stderr, "posix_memalign_failed\n");
return EX_UNAVAILABLE;
}
// Construct pointer so that data value is split across cache line
p = (uint64_t *)(buf + CACHE_LINE_SIZE - 1);
// The same test can be performed with a misaligned value that does
// not cross a cache line by replacing the above assignment to p
// with this one. On an AMD FX-8350, testing that case reveals no
// failures.
// p = (uint64_t *)(buf + 1);
stop = false;
thread_num = 0;
for (i = 0; i < producer_count; i++)
{
thread_arg [thread_num] = i;
int rc = pthread_create (& thread [thread_num],
NULL,
producer,
& thread_arg [thread_num]);
if (rc)
{
fprintf (stderr, "producer pthread_create failed\n");
return EX_UNAVAILABLE;
}
thread_num++;
}
for (i = 0; i < consumer_count; i++)
{
thread_arg [thread_num] = i;
int rc = pthread_create (& thread [thread_num],
NULL,
consumer,
& thread_arg [thread_num]);
if (rc)
{
fprintf (stderr, "consumer pthread_create failed\n");
return EX_UNAVAILABLE;
}
thread_num++;
}
sleep (10);
stop = true;
sleep (1);
return EX_OK;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment