Created
November 18, 2016 05:02
-
-
Save alk/f0713c2e1df7c9af20057684d5f3e39d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <sys/types.h> | |
#include <sys/mman.h> | |
#include <string.h> | |
#include <stdint.h> | |
#include <emmintrin.h> | |
static void setbytes(char *p, int c) | |
{ | |
__m128i i = _mm_set_epi8(c, c, c, c, | |
c, c, c, c, | |
c, c, c, c, | |
c, c, c, c); | |
_mm_stream_si128((__m128i *)&p[0], i); | |
_mm_stream_si128((__m128i *)&p[16], i); | |
_mm_stream_si128((__m128i *)&p[32], i); | |
_mm_stream_si128((__m128i *)&p[48], i); | |
} | |
#define SZ (2ULL << 30) | |
static void *do_mmap(int flags) { | |
void *rv = mmap(0, SZ, PROT_READ|PROT_WRITE, flags, 0, 0); | |
if (rv == MAP_FAILED) { | |
perror("mmap"); | |
abort(); | |
} | |
return rv; | |
} | |
int main(int argc, char **argv) | |
{ | |
char *ptr; | |
int opt = (argc > 1) ? argv[1][0] & 0x0f : 0; | |
printf("opt = %d\n", opt); | |
switch (opt) { | |
case 1: | |
printf("HUGETLB + populate (just in case)\n"); | |
do_mmap(MAP_PRIVATE|MAP_ANONYMOUS|MAP_POPULATE|MAP_HUGETLB); | |
break; | |
case 2: | |
case 3: | |
case 4: | |
case 5: | |
opt -= 2; | |
ptr = do_mmap(MAP_PRIVATE|MAP_ANONYMOUS); | |
if ((opt & 1)) { | |
printf("MADV_HUGEPAGE\n"); | |
int rv = madvise(ptr, SZ, MADV_HUGEPAGE); | |
if (rv) { | |
perror("madvise"); | |
abort(); | |
} | |
} | |
if ((opt & 2)) { | |
printf("efficient touching\n"); | |
for (uintptr_t i = 0; i < SZ; i += 4096) { | |
setbytes(ptr + i, 0); | |
} | |
} else { | |
printf("memset\n"); | |
memset(ptr, 0, SZ); | |
} | |
break; | |
case 6: | |
printf("just populate\n"); | |
do_mmap(MAP_PRIVATE|MAP_ANONYMOUS|MAP_POPULATE); | |
break; | |
default: | |
abort(); | |
} | |
return 0; | |
} | |
/* | |
# (for i in `seq 1 6`; do perf stat -r5 ./populate_test $i; done) | |
opt = 1 | |
HUGETLB + populate (just in case) | |
opt = 1 | |
HUGETLB + populate (just in case) | |
opt = 1 | |
HUGETLB + populate (just in case) | |
opt = 1 | |
HUGETLB + populate (just in case) | |
opt = 1 | |
HUGETLB + populate (just in case) | |
Performance counter stats for './populate_test 1' (5 runs): | |
115.057047 task-clock (msec) # 0.997 CPUs utilized ( +- 1.97% ) | |
0 context-switches # 0.000 K/sec | |
0 cpu-migrations # 0.000 K/sec | |
49 page-faults # 0.429 K/sec ( +- 0.81% ) | |
363,227,102 cycles # 3.157 GHz ( +- 1.01% ) | |
25,183,169 instructions # 0.07 insn per cycle ( +- 0.02% ) | |
7,415,763 branches # 64.453 M/sec ( +- 0.01% ) | |
9,850 branch-misses # 0.13% of all branches ( +- 2.74% ) | |
0.115358648 seconds time elapsed ( +- 2.00% ) | |
opt = 2 | |
memset | |
opt = 2 | |
memset | |
opt = 2 | |
memset | |
opt = 2 | |
memset | |
opt = 2 | |
memset | |
Performance counter stats for './populate_test 2' (5 runs): | |
605.700727 task-clock (msec) # 0.999 CPUs utilized ( +- 0.74% ) | |
1 context-switches # 0.002 K/sec ( +- 31.62% ) | |
0 cpu-migrations # 0.000 K/sec | |
524,338 page-faults # 0.866 M/sec ( +- 0.00% ) | |
1,989,238,394 cycles # 3.284 GHz ( +- 0.19% ) | |
1,472,320,756 instructions # 0.74 insn per cycle ( +- 0.21% ) | |
274,699,776 branches # 453.524 M/sec ( +- 0.39% ) | |
490,520 branch-misses # 0.18% of all branches ( +- 0.24% ) | |
0.606006626 seconds time elapsed ( +- 0.73% ) | |
opt = 3 | |
MADV_HUGEPAGE | |
memset | |
opt = 3 | |
MADV_HUGEPAGE | |
memset | |
opt = 3 | |
MADV_HUGEPAGE | |
memset | |
opt = 3 | |
MADV_HUGEPAGE | |
memset | |
opt = 3 | |
MADV_HUGEPAGE | |
memset | |
Performance counter stats for './populate_test 3' (5 runs): | |
175.333728 task-clock (msec) # 0.998 CPUs utilized ( +- 0.62% ) | |
1 context-switches # 0.007 K/sec ( +- 48.59% ) | |
0 cpu-migrations # 0.000 K/sec | |
1,584 page-faults # 0.009 M/sec ( +- 0.02% ) | |
558,791,647 cycles # 3.187 GHz ( +- 1.04% ) | |
43,796,589 instructions # 0.08 insn per cycle ( +- 0.02% ) | |
9,866,852 branches # 56.275 M/sec ( +- 0.03% ) | |
14,870 branch-misses # 0.15% of all branches ( +- 4.56% ) | |
0.175604618 seconds time elapsed ( +- 0.63% ) | |
opt = 4 | |
efficient touching | |
opt = 4 | |
efficient touching | |
opt = 4 | |
efficient touching | |
opt = 4 | |
efficient touching | |
opt = 4 | |
efficient touching | |
Performance counter stats for './populate_test 4' (5 runs): | |
497.347276 task-clock (msec) # 0.999 CPUs utilized ( +- 1.19% ) | |
2 context-switches # 0.004 K/sec ( +- 44.07% ) | |
0 cpu-migrations # 0.000 K/sec ( +-100.00% ) | |
524,337 page-faults # 1.054 M/sec ( +- 0.00% ) | |
1,639,292,414 cycles # 3.296 GHz ( +- 0.71% ) | |
1,476,161,160 instructions # 0.90 insn per cycle ( +- 0.09% ) | |
275,197,682 branches # 553.331 M/sec ( +- 0.19% ) | |
490,947 branch-misses # 0.18% of all branches ( +- 0.33% ) | |
0.497722060 seconds time elapsed ( +- 1.19% ) | |
opt = 5 | |
MADV_HUGEPAGE | |
efficient touching | |
opt = 5 | |
MADV_HUGEPAGE | |
efficient touching | |
opt = 5 | |
MADV_HUGEPAGE | |
efficient touching | |
opt = 5 | |
MADV_HUGEPAGE | |
efficient touching | |
opt = 5 | |
MADV_HUGEPAGE | |
efficient touching | |
Performance counter stats for './populate_test 5' (5 runs): | |
124.864210 task-clock (msec) # 0.997 CPUs utilized ( +- 1.19% ) | |
2 context-switches # 0.014 K/sec ( +- 32.39% ) | |
0 cpu-migrations # 0.000 K/sec | |
1,585 page-faults # 0.013 M/sec ( +- 0.02% ) | |
404,063,340 cycles # 3.236 GHz ( +- 1.05% ) | |
47,390,410 instructions # 0.12 insn per cycle ( +- 0.01% ) | |
10,375,379 branches # 83.093 M/sec ( +- 0.02% ) | |
15,927 branch-misses # 0.15% of all branches ( +- 2.34% ) | |
0.125194075 seconds time elapsed ( +- 1.18% ) | |
opt = 6 | |
just populate | |
opt = 6 | |
just populate | |
opt = 6 | |
just populate | |
opt = 6 | |
just populate | |
opt = 6 | |
just populate | |
Performance counter stats for './populate_test 6' (5 runs): | |
316.634223 task-clock (msec) # 0.999 CPUs utilized ( +- 0.89% ) | |
2 context-switches # 0.006 K/sec ( +- 44.72% ) | |
0 cpu-migrations # 0.001 K/sec ( +-100.00% ) | |
49 page-faults # 0.155 K/sec ( +- 1.12% ) | |
1,030,379,032 cycles # 3.254 GHz ( +- 0.15% ) | |
1,583,286,269 instructions # 1.54 insn per cycle ( +- 0.00% ) | |
276,504,484 branches # 873.262 M/sec ( +- 0.00% ) | |
489,914 branch-misses # 0.18% of all branches ( +- 0.12% ) | |
0.316950401 seconds time elapsed ( +- 0.89% ) | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment