Last active
December 27, 2021 18:48
-
-
Save edwintorok/0a458872c594f98946dc2743df56b9d5 to your computer and use it in GitHub Desktop.
mmap populate test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CPU: 12-Core AMD Ryzen 9 3900X (-MT MCP-) speed/min/max: 3800/2200/3800 MHz Kernel: 5.15.11-200.fc35.x86_64 x86_64 Up: 3h 40m | |
Mem: 3884.9/64274.8 MiB (6.0%) Storage: 24.99 TiB (7.7% used) Procs: 506 Shell: Zsh inxi: 3.3.09 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdint.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <sys/mman.h> | |
enum method { NONE, MEMSET, TOUCH4K, POPULATE }; | |
static enum method parse_method(const char *s) { | |
if (!strcmp(s, "none")) | |
return NONE; | |
if (!strcmp(s, "memset")) | |
return MEMSET; | |
if (!strcmp(s, "touch4k")) | |
return TOUCH4K; | |
if (!strcmp(s, "populate")) | |
return POPULATE; | |
fprintf(stderr, "Unknown populate method: %s\n", s); | |
exit(5); | |
} | |
static size_t parse_pow2(const char *arg) { | |
int pow2; | |
pow2 = atoi(arg); | |
if (pow2 < 0) { | |
fprintf(stderr, "Size cannot be negative\n"); | |
exit(2); | |
} | |
if ((size_t)pow2 >= sizeof(size_t) * 8) { | |
fprintf(stderr, | |
"Requested size too big, it must be a power of 2 that fits within " | |
"size_t: %d\n", | |
pow2); | |
exit(2); | |
} | |
return 1ULL << pow2; | |
} | |
int main(int argc, char *argv[]) { | |
size_t length, loops, i = 0; | |
enum method populate_method; | |
int flags; | |
if (argc != 4) { | |
/* Allocating one large chunk of N G will just fail immediately, whereas | |
* N*1G would go into overcommit */ | |
fprintf(stderr, | |
"Usage: %s <chunk-size-power-of-2> <loops-power-of-2> " | |
"<populate_method=memset|touch4k|populate|none>\n", | |
argv[0]); | |
return 1; | |
} | |
length = parse_pow2(argv[1]); | |
loops = parse_pow2(argv[2]); | |
populate_method = parse_method(argv[3]); | |
flags = MAP_PRIVATE | MAP_ANONYMOUS | | |
(populate_method == POPULATE ? MAP_POPULATE : 0); | |
for (i = 0; i < loops; i++) { | |
void *m = mmap(NULL, length, PROT_READ | PROT_WRITE, flags, -1, 0); | |
if (MAP_FAILED == m) { | |
perror("mmap failed"); | |
return 4; | |
} | |
switch (populate_method) { | |
case MEMSET: | |
memset(m, 0, length); | |
break; | |
case TOUCH4K: { | |
uint8_t *x = m; | |
const uint8_t *end = x + length; | |
for (; x < end; x += 4096) { | |
*x = 0; | |
} | |
break; | |
} | |
case NONE: | |
/* none, on purpose */ | |
break; | |
case POPULATE: | |
/* all done already */ | |
break; | |
} | |
} | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
./run.sh | |
Performance counter stats for './map 32 3 none' (5 runs): | |
0.17 msec task-clock:u # 0.440 CPUs utilized ( +- 3.68% ) | |
0 context-switches:u # 0.000 /sec | |
0 cpu-migrations:u # 0.000 /sec | |
49 page-faults:u # 274.120 K/sec ( +- 0.50% ) | |
151,922 cycles:u # 0.850 GHz ( +- 2.21% ) | |
16,721 stalled-cycles-frontend:u # 10.83% frontend cycles idle ( +- 4.93% ) | |
18,151 stalled-cycles-backend:u # 11.76% backend cycles idle ( +- 0.90% ) | |
133,047 instructions:u # 0.86 insn per cycle | |
# 0.14 stalled cycles per insn ( +- 0.00% ) | |
27,942 branches:u # 156.316 M/sec ( +- 0.00% ) | |
1,957 branch-misses:u # 7.00% of all branches ( +- 0.92% ) | |
<not counted> L1-dcache-loads:u (0.00%) | |
<not counted> L1-dcache-load-misses:u (0.00%) | |
<not supported> LLC-loads:u | |
<not supported> LLC-load-misses:u | |
<not counted> L1-icache-loads:u (0.00%) | |
<not counted> L1-icache-load-misses:u (0.00%) | |
<not counted> dTLB-loads:u (0.00%) | |
<not counted> dTLB-load-misses:u (0.00%) | |
<not counted> iTLB-loads:u (0.00%) | |
<not counted> iTLB-load-misses:u (0.00%) | |
<not counted> L1-dcache-prefetches:u (0.00%) | |
<not supported> L1-dcache-prefetch-misses:u | |
0.0003871 +- 0.0000133 seconds time elapsed ( +- 3.44% ) | |
Performance counter stats for './map 32 3 memset' (5 runs): | |
7,977.87 msec task-clock:u # 1.002 CPUs utilized ( +- 0.21% ) | |
0 context-switches:u # 0.000 /sec | |
0 cpu-migrations:u # 0.000 /sec | |
8,388,658 page-faults:u # 1.054 M/sec ( +- 0.00% ) | |
4,821,856,232 cycles:u # 0.606 GHz ( +- 0.23% ) (39.98%) | |
34,973,942 stalled-cycles-frontend:u # 0.73% frontend cycles idle ( +- 0.90% ) (39.99%) | |
842,854,631 stalled-cycles-backend:u # 17.52% backend cycles idle ( +- 0.29% ) (39.98%) | |
1,887,839,036 instructions:u # 0.39 insn per cycle | |
# 0.45 stalled cycles per insn ( +- 0.03% ) (39.99%) | |
276,888,289 branches:u # 34.791 M/sec ( +- 0.04% ) (39.98%) | |
8,395,181 branch-misses:u # 3.03% of all branches ( +- 0.04% ) (40.00%) | |
1,383,400,402 L1-dcache-loads:u # 173.824 M/sec ( +- 0.27% ) (40.01%) | |
439,622,095 L1-dcache-load-misses:u # 31.58% of all L1-dcache accesses ( +- 0.43% ) (40.01%) | |
<not supported> LLC-loads:u | |
<not supported> LLC-load-misses:u | |
158,067,690 L1-icache-loads:u # 19.861 M/sec ( +- 0.21% ) (40.01%) | |
281,682 L1-icache-load-misses:u # 0.18% of all L1-icache accesses ( +- 18.12% ) (40.01%) | |
26,339,959 dTLB-loads:u # 3.310 M/sec ( +- 0.37% ) (40.01%) | |
25,192,826 dTLB-load-misses:u # 95.66% of all dTLB cache accesses ( +- 0.04% ) (40.01%) | |
1,197 iTLB-loads:u # 150.403 /sec ( +- 9.72% ) (40.01%) | |
9 iTLB-load-misses:u # 0.79% of all iTLB cache accesses ( +- 27.80% ) (40.01%) | |
6,571,588 L1-dcache-prefetches:u # 825.718 K/sec ( +- 14.52% ) (39.99%) | |
<not supported> L1-dcache-prefetch-misses:u | |
7.9593 +- 0.0167 seconds time elapsed ( +- 0.21% ) | |
Performance counter stats for './map 32 3 touch4k' (5 runs): | |
7,665.12 msec task-clock:u # 1.012 CPUs utilized ( +- 0.36% ) | |
0 context-switches:u # 0.000 /sec | |
0 cpu-migrations:u # 0.000 /sec | |
8,388,656 page-faults:u # 1.107 M/sec ( +- 0.00% ) | |
3,472,017,913 cycles:u # 0.458 GHz ( +- 0.29% ) (40.00%) | |
2,172,679,732 stalled-cycles-frontend:u # 62.36% frontend cycles idle ( +- 1.19% ) (40.00%) | |
42,090,912 stalled-cycles-backend:u # 1.21% backend cycles idle ( +- 4.86% ) (40.01%) | |
42,271,922 instructions:u # 0.01 insn per cycle | |
# 50.31 stalled cycles per insn ( +- 0.02% ) (40.01%) | |
16,862,698 branches:u # 2.226 M/sec ( +- 0.03% ) (40.01%) | |
8,406,852 branch-misses:u # 49.90% of all branches ( +- 0.04% ) (40.00%) | |
281,181,535 L1-dcache-loads:u # 37.118 M/sec ( +- 0.26% ) (40.00%) | |
6,166,522 L1-dcache-load-misses:u # 2.20% of all L1-dcache accesses ( +- 5.52% ) (40.00%) | |
<not supported> LLC-loads:u | |
<not supported> LLC-load-misses:u | |
190,211,810 L1-icache-loads:u # 25.109 M/sec ( +- 1.20% ) (40.00%) | |
82,567 L1-icache-load-misses:u # 0.04% of all L1-icache accesses ( +- 12.89% ) (40.00%) | |
52,123,183 dTLB-loads:u # 6.881 M/sec ( +- 1.82% ) (40.00%) | |
50,173,242 dTLB-load-misses:u # 100.88% of all dTLB cache accesses ( +- 1.71% ) (40.00%) | |
8,063 iTLB-loads:u # 1.064 K/sec ( +- 19.99% ) (40.00%) | |
82 iTLB-load-misses:u # 5.08% of all iTLB cache accesses ( +- 16.70% ) (39.99%) | |
170,884 L1-dcache-prefetches:u # 22.558 K/sec ( +- 7.96% ) (40.00%) | |
<not supported> L1-dcache-prefetch-misses:u | |
7.5761 +- 0.0278 seconds time elapsed ( +- 0.37% ) | |
Performance counter stats for './map 32 3 populate' (5 runs): | |
6,067.89 msec task-clock:u # 0.998 CPUs utilized ( +- 0.14% ) | |
0 context-switches:u # 0.000 /sec | |
0 cpu-migrations:u # 0.000 /sec | |
49 page-faults:u # 8.064 /sec ( +- 1.44% ) | |
695,554 cycles:u # 0.000 GHz ( +- 4.78% ) (39.96%) | |
79,679 stalled-cycles-frontend:u # 12.04% frontend cycles idle ( +- 7.03% ) (39.98%) | |
45,000 stalled-cycles-backend:u # 6.80% backend cycles idle ( +- 0.82% ) (39.99%) | |
332,305 instructions:u # 0.50 insn per cycle | |
# 0.23 stalled cycles per insn ( +- 0.17% ) (40.01%) | |
69,686 branches:u # 11.468 K/sec ( +- 0.20% ) (40.02%) | |
4,877 branch-misses:u # 7.01% of all branches ( +- 1.07% ) (40.04%) | |
2,007 L1-dcache-loads:u # 330.275 /sec ( +- 20.14% ) (40.04%) | |
312 L1-dcache-load-misses:u # 17.64% of all L1-dcache accesses ( +- 22.52% ) (40.05%) | |
<not supported> LLC-loads:u | |
<not supported> LLC-load-misses:u | |
3,165 L1-icache-loads:u # 520.838 /sec ( +- 23.08% ) (40.03%) | |
0 L1-icache-load-misses:u # 0.00% of all L1-icache accesses (40.02%) | |
19 dTLB-loads:u # 3.127 /sec ( +- 92.71% ) (40.00%) | |
12 dTLB-load-misses:u # 32.43% of all dTLB cache accesses ( +-141.44% ) (39.98%) | |
0 iTLB-loads:u # 0.000 /sec (39.97%) | |
15 iTLB-load-misses:u # 1071.43% of all iTLB cache accesses ( +- 53.27% ) (39.95%) | |
55 L1-dcache-prefetches:u # 9.051 /sec ( +- 55.84% ) (39.95%) | |
<not supported> L1-dcache-prefetch-misses:u | |
6.07737 +- 0.00877 seconds time elapsed ( +- 0.14% ) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
set -eu | |
gcc -Wall -O2 map.c -o map | |
for METHOD in none memset touch4k populate; do | |
perf stat -ddd -r 5 ./map 32 3 "${METHOD}" | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment