Skip to content

Instantly share code, notes, and snippets.

@edwintorok
Last active December 27, 2021 18:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save edwintorok/0a458872c594f98946dc2743df56b9d5 to your computer and use it in GitHub Desktop.
Save edwintorok/0a458872c594f98946dc2743df56b9d5 to your computer and use it in GitHub Desktop.
mmap populate test
CPU: 12-Core AMD Ryzen 9 3900X (-MT MCP-) speed/min/max: 3800/2200/3800 MHz Kernel: 5.15.11-200.fc35.x86_64 x86_64 Up: 3h 40m
Mem: 3884.9/64274.8 MiB (6.0%) Storage: 24.99 TiB (7.7% used) Procs: 506 Shell: Zsh inxi: 3.3.09
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
enum method { NONE, MEMSET, TOUCH4K, POPULATE };
static enum method parse_method(const char *s) {
if (!strcmp(s, "none"))
return NONE;
if (!strcmp(s, "memset"))
return MEMSET;
if (!strcmp(s, "touch4k"))
return TOUCH4K;
if (!strcmp(s, "populate"))
return POPULATE;
fprintf(stderr, "Unknown populate method: %s\n", s);
exit(5);
}
static size_t parse_pow2(const char *arg) {
int pow2;
pow2 = atoi(arg);
if (pow2 < 0) {
fprintf(stderr, "Size cannot be negative\n");
exit(2);
}
if ((size_t)pow2 >= sizeof(size_t) * 8) {
fprintf(stderr,
"Requested size too big, it must be a power of 2 that fits within "
"size_t: %d\n",
pow2);
exit(2);
}
return 1ULL << pow2;
}
int main(int argc, char *argv[]) {
size_t length, loops, i = 0;
enum method populate_method;
int flags;
if (argc != 4) {
/* Allocating one large chunk of N G will just fail immediately, whereas
* N*1G would go into overcommit */
fprintf(stderr,
"Usage: %s <chunk-size-power-of-2> <loops-power-of-2> "
"<populate_method=memset|touch4k|populate|none>\n",
argv[0]);
return 1;
}
length = parse_pow2(argv[1]);
loops = parse_pow2(argv[2]);
populate_method = parse_method(argv[3]);
flags = MAP_PRIVATE | MAP_ANONYMOUS |
(populate_method == POPULATE ? MAP_POPULATE : 0);
for (i = 0; i < loops; i++) {
void *m = mmap(NULL, length, PROT_READ | PROT_WRITE, flags, -1, 0);
if (MAP_FAILED == m) {
perror("mmap failed");
return 4;
}
switch (populate_method) {
case MEMSET:
memset(m, 0, length);
break;
case TOUCH4K: {
uint8_t *x = m;
const uint8_t *end = x + length;
for (; x < end; x += 4096) {
*x = 0;
}
break;
}
case NONE:
/* none, on purpose */
break;
case POPULATE:
/* all done already */
break;
}
}
return 0;
}
./run.sh
Performance counter stats for './map 32 3 none' (5 runs):
0.17 msec task-clock:u # 0.440 CPUs utilized ( +- 3.68% )
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
49 page-faults:u # 274.120 K/sec ( +- 0.50% )
151,922 cycles:u # 0.850 GHz ( +- 2.21% )
16,721 stalled-cycles-frontend:u # 10.83% frontend cycles idle ( +- 4.93% )
18,151 stalled-cycles-backend:u # 11.76% backend cycles idle ( +- 0.90% )
133,047 instructions:u # 0.86 insn per cycle
# 0.14 stalled cycles per insn ( +- 0.00% )
27,942 branches:u # 156.316 M/sec ( +- 0.00% )
1,957 branch-misses:u # 7.00% of all branches ( +- 0.92% )
<not counted> L1-dcache-loads:u (0.00%)
<not counted> L1-dcache-load-misses:u (0.00%)
<not supported> LLC-loads:u
<not supported> LLC-load-misses:u
<not counted> L1-icache-loads:u (0.00%)
<not counted> L1-icache-load-misses:u (0.00%)
<not counted> dTLB-loads:u (0.00%)
<not counted> dTLB-load-misses:u (0.00%)
<not counted> iTLB-loads:u (0.00%)
<not counted> iTLB-load-misses:u (0.00%)
<not counted> L1-dcache-prefetches:u (0.00%)
<not supported> L1-dcache-prefetch-misses:u
0.0003871 +- 0.0000133 seconds time elapsed ( +- 3.44% )
Performance counter stats for './map 32 3 memset' (5 runs):
7,977.87 msec task-clock:u # 1.002 CPUs utilized ( +- 0.21% )
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
8,388,658 page-faults:u # 1.054 M/sec ( +- 0.00% )
4,821,856,232 cycles:u # 0.606 GHz ( +- 0.23% ) (39.98%)
34,973,942 stalled-cycles-frontend:u # 0.73% frontend cycles idle ( +- 0.90% ) (39.99%)
842,854,631 stalled-cycles-backend:u # 17.52% backend cycles idle ( +- 0.29% ) (39.98%)
1,887,839,036 instructions:u # 0.39 insn per cycle
# 0.45 stalled cycles per insn ( +- 0.03% ) (39.99%)
276,888,289 branches:u # 34.791 M/sec ( +- 0.04% ) (39.98%)
8,395,181 branch-misses:u # 3.03% of all branches ( +- 0.04% ) (40.00%)
1,383,400,402 L1-dcache-loads:u # 173.824 M/sec ( +- 0.27% ) (40.01%)
439,622,095 L1-dcache-load-misses:u # 31.58% of all L1-dcache accesses ( +- 0.43% ) (40.01%)
<not supported> LLC-loads:u
<not supported> LLC-load-misses:u
158,067,690 L1-icache-loads:u # 19.861 M/sec ( +- 0.21% ) (40.01%)
281,682 L1-icache-load-misses:u # 0.18% of all L1-icache accesses ( +- 18.12% ) (40.01%)
26,339,959 dTLB-loads:u # 3.310 M/sec ( +- 0.37% ) (40.01%)
25,192,826 dTLB-load-misses:u # 95.66% of all dTLB cache accesses ( +- 0.04% ) (40.01%)
1,197 iTLB-loads:u # 150.403 /sec ( +- 9.72% ) (40.01%)
9 iTLB-load-misses:u # 0.79% of all iTLB cache accesses ( +- 27.80% ) (40.01%)
6,571,588 L1-dcache-prefetches:u # 825.718 K/sec ( +- 14.52% ) (39.99%)
<not supported> L1-dcache-prefetch-misses:u
7.9593 +- 0.0167 seconds time elapsed ( +- 0.21% )
Performance counter stats for './map 32 3 touch4k' (5 runs):
7,665.12 msec task-clock:u # 1.012 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
8,388,656 page-faults:u # 1.107 M/sec ( +- 0.00% )
3,472,017,913 cycles:u # 0.458 GHz ( +- 0.29% ) (40.00%)
2,172,679,732 stalled-cycles-frontend:u # 62.36% frontend cycles idle ( +- 1.19% ) (40.00%)
42,090,912 stalled-cycles-backend:u # 1.21% backend cycles idle ( +- 4.86% ) (40.01%)
42,271,922 instructions:u # 0.01 insn per cycle
# 50.31 stalled cycles per insn ( +- 0.02% ) (40.01%)
16,862,698 branches:u # 2.226 M/sec ( +- 0.03% ) (40.01%)
8,406,852 branch-misses:u # 49.90% of all branches ( +- 0.04% ) (40.00%)
281,181,535 L1-dcache-loads:u # 37.118 M/sec ( +- 0.26% ) (40.00%)
6,166,522 L1-dcache-load-misses:u # 2.20% of all L1-dcache accesses ( +- 5.52% ) (40.00%)
<not supported> LLC-loads:u
<not supported> LLC-load-misses:u
190,211,810 L1-icache-loads:u # 25.109 M/sec ( +- 1.20% ) (40.00%)
82,567 L1-icache-load-misses:u # 0.04% of all L1-icache accesses ( +- 12.89% ) (40.00%)
52,123,183 dTLB-loads:u # 6.881 M/sec ( +- 1.82% ) (40.00%)
50,173,242 dTLB-load-misses:u # 100.88% of all dTLB cache accesses ( +- 1.71% ) (40.00%)
8,063 iTLB-loads:u # 1.064 K/sec ( +- 19.99% ) (40.00%)
82 iTLB-load-misses:u # 5.08% of all iTLB cache accesses ( +- 16.70% ) (39.99%)
170,884 L1-dcache-prefetches:u # 22.558 K/sec ( +- 7.96% ) (40.00%)
<not supported> L1-dcache-prefetch-misses:u
7.5761 +- 0.0278 seconds time elapsed ( +- 0.37% )
Performance counter stats for './map 32 3 populate' (5 runs):
6,067.89 msec task-clock:u # 0.998 CPUs utilized ( +- 0.14% )
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
49 page-faults:u # 8.064 /sec ( +- 1.44% )
695,554 cycles:u # 0.000 GHz ( +- 4.78% ) (39.96%)
79,679 stalled-cycles-frontend:u # 12.04% frontend cycles idle ( +- 7.03% ) (39.98%)
45,000 stalled-cycles-backend:u # 6.80% backend cycles idle ( +- 0.82% ) (39.99%)
332,305 instructions:u # 0.50 insn per cycle
# 0.23 stalled cycles per insn ( +- 0.17% ) (40.01%)
69,686 branches:u # 11.468 K/sec ( +- 0.20% ) (40.02%)
4,877 branch-misses:u # 7.01% of all branches ( +- 1.07% ) (40.04%)
2,007 L1-dcache-loads:u # 330.275 /sec ( +- 20.14% ) (40.04%)
312 L1-dcache-load-misses:u # 17.64% of all L1-dcache accesses ( +- 22.52% ) (40.05%)
<not supported> LLC-loads:u
<not supported> LLC-load-misses:u
3,165 L1-icache-loads:u # 520.838 /sec ( +- 23.08% ) (40.03%)
0 L1-icache-load-misses:u # 0.00% of all L1-icache accesses (40.02%)
19 dTLB-loads:u # 3.127 /sec ( +- 92.71% ) (40.00%)
12 dTLB-load-misses:u # 32.43% of all dTLB cache accesses ( +-141.44% ) (39.98%)
0 iTLB-loads:u # 0.000 /sec (39.97%)
15 iTLB-load-misses:u # 1071.43% of all iTLB cache accesses ( +- 53.27% ) (39.95%)
55 L1-dcache-prefetches:u # 9.051 /sec ( +- 55.84% ) (39.95%)
<not supported> L1-dcache-prefetch-misses:u
6.07737 +- 0.00877 seconds time elapsed ( +- 0.14% )
#!/bin/sh
set -eu
gcc -Wall -O2 map.c -o map
for METHOD in none memset touch4k populate; do
perf stat -ddd -r 5 ./map 32 3 "${METHOD}"
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment