Created
April 30, 2020 14:24
-
-
Save dannas/3f8397e76894d33e45e9299d0f1c95fd to your computer and use it in GitHub Desktop.
Experiment with cacheline strides for prefetching
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define RUN_ME /* | |
gcc -O2 -Wall $0 -o "$(basename $0 .c)" && ./"$(basename $0 .c)" ; exit | |
*/ | |
#include <inttypes.h> | |
#include <stdint.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <malloc.h> | |
#include <x86intrin.h> | |
inline __attribute__((always_inline)) uint64_t rdtscp() { | |
uint64_t a, d, c; | |
__asm__ volatile("rdtscp" : "=a" (a), "=d" (d), "=c" (c)); | |
return (a | (d << 32)); | |
} | |
uint64_t probe(char *address) { | |
uint64_t t0 = rdtscp(); | |
*(volatile char*)address; | |
uint64_t now = rdtscp(); | |
return now - t0; | |
} | |
void delayloop(uint64_t cycles) { | |
uint64_t t0 = rdtscp(); | |
while (rdtscp() - t0 < cycles) { | |
; | |
} | |
} | |
// Number of cache lines to stride over. | |
int STRIDE = 64; | |
// Measured overhead on my laptop. | |
int RDTSCP_OVERHEAD = 22; | |
uint8_t times[1024]; | |
int main(void) { | |
char *buf = memalign(64, 64 * 1024); | |
// Flush cachelines | |
for (int i = 0; i < 1024; i++) { | |
_mm_clflush(buf + i * 64); | |
} | |
// Measure access times of lines | |
int n = 0; | |
for (int i = 0; i < 1024; i+= STRIDE) { | |
uint64_t cycles = probe(buf+ i * 64); | |
times[n++] = cycles; | |
} | |
// Report times | |
for (int i = 0; i < 1024 / STRIDE; i++) { | |
printf("[after flush] cycles = %hhu\n", times[i] - RDTSCP_OVERHEAD); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment