Last active
March 3, 2024 09:01
-
-
Save thejh/39a408fbf767e28670a8088e5425b6f6 to your computer and use it in GitHub Desktop.
testing making misspeculated NULL derefs less page-walky (tested on Tiger Lake)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# perf stat -e task-clock:u -e cycles:u -e instructions:u -e branches:u -e branch-misses:u -e dTLB-loads:u -e dTLB-load-misses:u -e dtlb_load_misses.walk_active:u ./spec_test map | |
Performance counter stats for './spec_test map': | |
1,150.38 msec task-clock:u # 1.000 CPUs utilized | |
5,370,185,772 cycles:u # 4.668 GHz | |
1,331,717,669 instructions:u # 0.25 insn per cycle | |
307,326,910 branches:u # 267.153 M/sec | |
102,502,300 branch-misses:u # 33.35% of all branches | |
102,427,183 dTLB-loads # 89.038 M/sec | |
180 dTLB-load-misses # 0.00% of all dTLB cache accesses | |
9,842 dtlb_load_misses.walk_active:u # 8.555 K/sec | |
1.150692567 seconds time elapsed | |
1.150681000 seconds user | |
0.000000000 seconds sys | |
# perf stat -e task-clock:u -e cycles:u -e instructions:u -e branches:u -e branch-misses:u -e dTLB-loads:u -e dTLB-load-misses:u -e dtlb_load_misses.walk_active:u ./spec_test nomap | |
Performance counter stats for './spec_test nomap': | |
1,146.92 msec task-clock:u # 1.000 CPUs utilized | |
5,367,627,921 cycles:u # 4.680 GHz | |
1,331,715,636 instructions:u # 0.25 insn per cycle | |
307,326,565 branches:u # 267.957 M/sec | |
102,502,200 branch-misses:u # 33.35% of all branches | |
102,426,610 dTLB-loads # 89.305 M/sec | |
102,395,299 dTLB-load-misses # 99.97% of all dTLB cache accesses | |
2,850,800,022 dtlb_load_misses.walk_active:u # 2.486 G/sec | |
1.147238142 seconds time elapsed | |
1.147193000 seconds user | |
0.000000000 seconds sys | |
# |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// compile with "gcc -o spec_test spec_test.c -O3 -mno-red-zone" | |
#define _GNU_SOURCE | |
#include <err.h> | |
#include <string.h> | |
#include <unistd.h> | |
#include <stdlib.h> | |
#include <sys/mman.h> | |
#define LOAD_ADDR 0 | |
__attribute__((noinline)) | |
void bench(void) { | |
for (unsigned long i=0; i<100000UL; i++) { | |
for (unsigned long j=0; j<0x10000; j += 64) { | |
asm volatile( | |
"lfence\n\t" | |
// basically retpoline | |
"call 1f\n\t" | |
// MISSPECULATION START | |
"mov (%[load_addr]), %%rax\n\t" | |
"ud2\n\t" | |
// MISSPECULATION END | |
"1:\n\t" | |
"mov $0, %%rbx\n\t" | |
"lea 2f(%%rip), %%rax\n\t" | |
// slow dependent ops. | |
// each popcnt should be 3 cycles on Tiger Lake according to | |
// <https://www.agner.org/optimize/instruction_tables.pdf>. | |
"popcnt %%rbx, %%rbx\n\t" | |
"popcnt %%rbx, %%rbx\n\t" | |
"popcnt %%rbx, %%rbx\n\t" | |
"popcnt %%rbx, %%rbx\n\t" | |
"xor %%rbx, %%rax\n\t" | |
"mov %%rax, (%%rsp)\n\t" | |
"ret\n\t" | |
"2:\n\t" | |
://out | |
://in | |
[load_addr] "r"(LOAD_ADDR) | |
://clobber | |
"rax", "rbx", "rcx", "rdx" | |
); | |
} | |
} | |
} | |
int main(int argc, char **argv) { | |
if (argc == 2 && strcmp(argv[1], "map") == 0) { | |
int pkey = pkey_alloc(0, PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); | |
if (pkey == -1) | |
err(1, "pkey_alloc"); | |
unsigned char *null_map = mmap(NULL, 0x1000, PROT_READ, MAP_PRIVATE|MAP_FIXED_NOREPLACE|MAP_ANONYMOUS, -1, 0); | |
if (null_map == MAP_FAILED) | |
err(1, "mmap"); | |
*(volatile char *)null_map; // fault in the zeropage | |
if (pkey_mprotect(null_map, 0x1000, PROT_READ, pkey)) | |
err(1, "pkey_mprotect"); | |
} else if (argc == 2 && strcmp(argv[1], "nomap") == 0) { | |
} else { | |
errx(1, "usage: ./spec_test <map|nomap>"); | |
} | |
bench(); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment