Skip to content

Instantly share code, notes, and snippets.

@alk
Created November 18, 2016 05:02
Show Gist options
  • Save alk/f0713c2e1df7c9af20057684d5f3e39d to your computer and use it in GitHub Desktop.
Save alk/f0713c2e1df7c9af20057684d5f3e39d to your computer and use it in GitHub Desktop.
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <string.h>
#include <stdint.h>
#include <emmintrin.h>
static void setbytes(char *p, int c)
{
__m128i i = _mm_set_epi8(c, c, c, c,
c, c, c, c,
c, c, c, c,
c, c, c, c);
_mm_stream_si128((__m128i *)&p[0], i);
_mm_stream_si128((__m128i *)&p[16], i);
_mm_stream_si128((__m128i *)&p[32], i);
_mm_stream_si128((__m128i *)&p[48], i);
}
#define SZ (2ULL << 30)
static void *do_mmap(int flags) {
void *rv = mmap(0, SZ, PROT_READ|PROT_WRITE, flags, 0, 0);
if (rv == MAP_FAILED) {
perror("mmap");
abort();
}
return rv;
}
int main(int argc, char **argv)
{
char *ptr;
int opt = (argc > 1) ? argv[1][0] & 0x0f : 0;
printf("opt = %d\n", opt);
switch (opt) {
case 1:
printf("HUGETLB + populate (just in case)\n");
do_mmap(MAP_PRIVATE|MAP_ANONYMOUS|MAP_POPULATE|MAP_HUGETLB);
break;
case 2:
case 3:
case 4:
case 5:
opt -= 2;
ptr = do_mmap(MAP_PRIVATE|MAP_ANONYMOUS);
if ((opt & 1)) {
printf("MADV_HUGEPAGE\n");
int rv = madvise(ptr, SZ, MADV_HUGEPAGE);
if (rv) {
perror("madvise");
abort();
}
}
if ((opt & 2)) {
printf("efficient touching\n");
for (uintptr_t i = 0; i < SZ; i += 4096) {
setbytes(ptr + i, 0);
}
} else {
printf("memset\n");
memset(ptr, 0, SZ);
}
break;
case 6:
printf("just populate\n");
do_mmap(MAP_PRIVATE|MAP_ANONYMOUS|MAP_POPULATE);
break;
default:
abort();
}
return 0;
}
/*
# (for i in `seq 1 6`; do perf stat -r5 ./populate_test $i; done)
opt = 1
HUGETLB + populate (just in case)
opt = 1
HUGETLB + populate (just in case)
opt = 1
HUGETLB + populate (just in case)
opt = 1
HUGETLB + populate (just in case)
opt = 1
HUGETLB + populate (just in case)
Performance counter stats for './populate_test 1' (5 runs):
115.057047 task-clock (msec) # 0.997 CPUs utilized ( +- 1.97% )
0 context-switches # 0.000 K/sec
0 cpu-migrations # 0.000 K/sec
49 page-faults # 0.429 K/sec ( +- 0.81% )
363,227,102 cycles # 3.157 GHz ( +- 1.01% )
25,183,169 instructions # 0.07 insn per cycle ( +- 0.02% )
7,415,763 branches # 64.453 M/sec ( +- 0.01% )
9,850 branch-misses # 0.13% of all branches ( +- 2.74% )
0.115358648 seconds time elapsed ( +- 2.00% )
opt = 2
memset
opt = 2
memset
opt = 2
memset
opt = 2
memset
opt = 2
memset
Performance counter stats for './populate_test 2' (5 runs):
605.700727 task-clock (msec) # 0.999 CPUs utilized ( +- 0.74% )
1 context-switches # 0.002 K/sec ( +- 31.62% )
0 cpu-migrations # 0.000 K/sec
524,338 page-faults # 0.866 M/sec ( +- 0.00% )
1,989,238,394 cycles # 3.284 GHz ( +- 0.19% )
1,472,320,756 instructions # 0.74 insn per cycle ( +- 0.21% )
274,699,776 branches # 453.524 M/sec ( +- 0.39% )
490,520 branch-misses # 0.18% of all branches ( +- 0.24% )
0.606006626 seconds time elapsed ( +- 0.73% )
opt = 3
MADV_HUGEPAGE
memset
opt = 3
MADV_HUGEPAGE
memset
opt = 3
MADV_HUGEPAGE
memset
opt = 3
MADV_HUGEPAGE
memset
opt = 3
MADV_HUGEPAGE
memset
Performance counter stats for './populate_test 3' (5 runs):
175.333728 task-clock (msec) # 0.998 CPUs utilized ( +- 0.62% )
1 context-switches # 0.007 K/sec ( +- 48.59% )
0 cpu-migrations # 0.000 K/sec
1,584 page-faults # 0.009 M/sec ( +- 0.02% )
558,791,647 cycles # 3.187 GHz ( +- 1.04% )
43,796,589 instructions # 0.08 insn per cycle ( +- 0.02% )
9,866,852 branches # 56.275 M/sec ( +- 0.03% )
14,870 branch-misses # 0.15% of all branches ( +- 4.56% )
0.175604618 seconds time elapsed ( +- 0.63% )
opt = 4
efficient touching
opt = 4
efficient touching
opt = 4
efficient touching
opt = 4
efficient touching
opt = 4
efficient touching
Performance counter stats for './populate_test 4' (5 runs):
497.347276 task-clock (msec) # 0.999 CPUs utilized ( +- 1.19% )
2 context-switches # 0.004 K/sec ( +- 44.07% )
0 cpu-migrations # 0.000 K/sec ( +-100.00% )
524,337 page-faults # 1.054 M/sec ( +- 0.00% )
1,639,292,414 cycles # 3.296 GHz ( +- 0.71% )
1,476,161,160 instructions # 0.90 insn per cycle ( +- 0.09% )
275,197,682 branches # 553.331 M/sec ( +- 0.19% )
490,947 branch-misses # 0.18% of all branches ( +- 0.33% )
0.497722060 seconds time elapsed ( +- 1.19% )
opt = 5
MADV_HUGEPAGE
efficient touching
opt = 5
MADV_HUGEPAGE
efficient touching
opt = 5
MADV_HUGEPAGE
efficient touching
opt = 5
MADV_HUGEPAGE
efficient touching
opt = 5
MADV_HUGEPAGE
efficient touching
Performance counter stats for './populate_test 5' (5 runs):
124.864210 task-clock (msec) # 0.997 CPUs utilized ( +- 1.19% )
2 context-switches # 0.014 K/sec ( +- 32.39% )
0 cpu-migrations # 0.000 K/sec
1,585 page-faults # 0.013 M/sec ( +- 0.02% )
404,063,340 cycles # 3.236 GHz ( +- 1.05% )
47,390,410 instructions # 0.12 insn per cycle ( +- 0.01% )
10,375,379 branches # 83.093 M/sec ( +- 0.02% )
15,927 branch-misses # 0.15% of all branches ( +- 2.34% )
0.125194075 seconds time elapsed ( +- 1.18% )
opt = 6
just populate
opt = 6
just populate
opt = 6
just populate
opt = 6
just populate
opt = 6
just populate
Performance counter stats for './populate_test 6' (5 runs):
316.634223 task-clock (msec) # 0.999 CPUs utilized ( +- 0.89% )
2 context-switches # 0.006 K/sec ( +- 44.72% )
0 cpu-migrations # 0.001 K/sec ( +-100.00% )
49 page-faults # 0.155 K/sec ( +- 1.12% )
1,030,379,032 cycles # 3.254 GHz ( +- 0.15% )
1,583,286,269 instructions # 1.54 insn per cycle ( +- 0.00% )
276,504,484 branches # 873.262 M/sec ( +- 0.00% )
489,914 branch-misses # 0.18% of all branches ( +- 0.12% )
0.316950401 seconds time elapsed ( +- 0.89% )
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment