Skip to content

Instantly share code, notes, and snippets.

@dannas
Created April 30, 2020 14:24
Show Gist options
  • Save dannas/3f8397e76894d33e45e9299d0f1c95fd to your computer and use it in GitHub Desktop.
Save dannas/3f8397e76894d33e45e9299d0f1c95fd to your computer and use it in GitHub Desktop.
Experiment with cacheline strides for prefetching
#define RUN_ME /*
gcc -O2 -Wall $0 -o "$(basename $0 .c)" && ./"$(basename $0 .c)" ; exit
*/
#include <inttypes.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <malloc.h>
#include <x86intrin.h>
inline __attribute__((always_inline)) uint64_t rdtscp() {
uint64_t a, d, c;
__asm__ volatile("rdtscp" : "=a" (a), "=d" (d), "=c" (c));
return (a | (d << 32));
}
uint64_t probe(char *address) {
uint64_t t0 = rdtscp();
*(volatile char*)address;
uint64_t now = rdtscp();
return now - t0;
}
void delayloop(uint64_t cycles) {
uint64_t t0 = rdtscp();
while (rdtscp() - t0 < cycles) {
;
}
}
// Number of cache lines to stride over.
int STRIDE = 64;
// Measured overhead on my laptop.
int RDTSCP_OVERHEAD = 22;
uint8_t times[1024];
int main(void) {
char *buf = memalign(64, 64 * 1024);
// Flush cachelines
for (int i = 0; i < 1024; i++) {
_mm_clflush(buf + i * 64);
}
// Measure access times of lines
int n = 0;
for (int i = 0; i < 1024; i+= STRIDE) {
uint64_t cycles = probe(buf+ i * 64);
times[n++] = cycles;
}
// Report times
for (int i = 0; i < 1024 / STRIDE; i++) {
printf("[after flush] cycles = %hhu\n", times[i] - RDTSCP_OVERHEAD);
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment