Skip to content

Instantly share code, notes, and snippets.

@sandip4n
Last active July 28, 2021 09:22
Show Gist options
  • Save sandip4n/3e423db98ef58f9835fba9fabebcd7da to your computer and use it in GitHub Desktop.
Save sandip4n/3e423db98ef58f9835fba9fabebcd7da to your computer and use it in GitHub Desktop.
/*
* SPDX-License-Identifier: GPL-2.0-or-later
* Author: Sandipan Das
*/
#if !defined(__x86_64__) && !defined(__powerpc64__)
#error "unsupported architecture"
#endif
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <assert.h>
#include <getopt.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/param.h>
#include <sys/types.h>
#include <perfmon/pfmlib_perf_event.h>
struct event_data {
unsigned long last;
unsigned long total;
unsigned long min;
unsigned long max;
};
static const char *evname[] = {
"page-faults",
// "cpu-cycles",
// "instructions",
"cache-misses",
"dTLB-load-misses",
};
static const char *pgflag[] = {
"LOCKED",
"ERROR",
"REFERENCED",
"UPTODATE",
"DIRTY",
"LRU",
"ACTIVE",
"SLAB",
"WRITEBACK",
"RECLAIM",
"BUDDY",
"MMAP",
"ANON",
"SWAPCACHE",
"SWAPBACKED",
"COMPOUND-HEAD",
"COMPOUND-TAIL",
"HUGE",
"UNEVICTABLE",
"HWPOISON",
"NOPAGE",
"KSM",
"THP",
"BALLOON",
"ZERO-PAGE",
"IDLE",
"RESERVED",
"MLOCKED",
"MAPPEDTODISK",
"PRIVATE",
"PRIVATE-2",
"OWNER-PRIVATE",
"ARCH",
"UNCACHED",
"SOFTDIRTY",
};
static inline
void perf_event_reset(int fd)
{
assert(!ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP));
}
static inline
void perf_event_enable(int fd)
{
assert(!ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP));
}
static inline
void perf_event_disable(int fd)
{
assert(!ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP));
}
static inline
void perf_event_read(int fd, struct event_data *evcount)
{
assert(read(fd, &evcount->last, sizeof(unsigned long)) > 0);
evcount->total += evcount->last;
evcount->min = MIN(evcount->min, evcount->last);
evcount->max = MAX(evcount->max, evcount->last);
}
static
int perf_event_prepare(const char *name, int level, int group)
{
struct perf_event_attr attr;
int fd;
memset(&attr, 0, sizeof(struct perf_event_attr));
assert(pfm_get_perf_event_encoding(name, level, &attr,
NULL, NULL) == PFM_SUCCESS);
attr.disabled = 1;
assert((fd = perf_event_open(&attr, 0, -1, group, 0)) > 0);
return fd;
}
static inline __attribute__((always_inline))
void loadflush(volatile void *addr)
{
register volatile unsigned long tmp asm("r14");
#ifdef __x86_64__
asm volatile("mov %1, %0; clflush (%1); sfence" : "=r"(tmp) : "r"(addr) : "memory");
#elif defined(__powerpc64__)
asm volatile("ld %0, 0(%1); dcbf 0, %1; sync" : "=r"(tmp) : "r"(addr) : "memory");
#endif
}
static inline
unsigned long bits(unsigned long val, unsigned int l, unsigned int r)
{
unsigned long bits, mask;
bits = l - r + 1;
mask = (bits == (sizeof(unsigned long) * 8)) ? -1UL : ((1UL << bits) - 1);
return (val >> r) & mask;
}
static
int gethugepagesize(void)
{
int hpgsize;
FILE *fp;
fp = popen("cat /proc/meminfo | grep Hugepagesize: | awk '{ print $2}'", "r");
assert(fp && fscanf(fp, "%d", &hpgsize) > 0);
pclose(fp);
return hpgsize * 1024;
}
static
unsigned long getpagedata(const char *path, void *addr)
{
unsigned long entry, offset;
int fd;
fd = open(path, O_RDONLY);
offset = ((unsigned long) addr / getpagesize()) * 8;
assert(fd && pread(fd, &entry, sizeof(unsigned long), offset) > 0);
close(fd);
return entry;
}
static
void pageinfo(void *addr)
{
unsigned long paddr, pgmap, pgcount, pgshift, pgflags;
unsigned int i;
pgmap = getpagedata("/proc/self/pagemap", addr);
pgshift = 63 - __builtin_clzl(getpagesize());
paddr = bits(pgmap, 54, 0) << pgshift;
pgcount = getpagedata("/proc/kpagecount", (void *) paddr);
pgflags = getpagedata("/proc/kpageflags", (void *) paddr);
printf("page information for virtual addr %016p\n", addr);
printf(" page frame number (if present) = 0x%016lx\n", bits(pgmap, 54, 0));
printf(" swap type (if swapped) = 0x%016lx\n", bits(pgmap, 4, 0));
printf(" swap offset (if swapped) = 0x%016lx\n", bits(pgmap, 54, 5));
printf(" pte is soft-dirty = 0x%016lx\n", bits(pgmap, 55, 55));
printf(" page exclusively mapped = 0x%016lx\n", bits(pgmap, 56, 56));
printf(" page is file-page or shared-anon = 0x%016lx\n", bits(pgmap, 61, 61));
printf(" page swapped = 0x%016lx\n", bits(pgmap, 62, 62));
printf(" page present = 0x%016lx\n", bits(pgmap, 63, 63));
printf(" page count = 0x%016lx\n", pgcount);
printf(" page flags = 0x%016lx [", pgflags);
for (i = 0; i < sizeof(pgflag) / sizeof(char *); i++)
if (pgflags & (1UL << ((i > 25) ? i + 6 : i)))
printf(" %s", pgflag[i]);
printf(" ]\n");
}
static
void usage(const char *prog)
{
fprintf(stderr, "Usage: %s [-H] [-L] [-p <level(s)>]\n", prog);
fprintf(stderr, " -H use hugepage mapping\n");
fprintf(stderr, " -L use locked mapping\n");
fprintf(stderr, " -p <0..3> measure at one or more privilege levels\n");
fprintf(stderr, " level 0 usually corresponds to kernel level\n");
fprintf(stderr, " level 3 usually corresponds to user level (default)\n");
fprintf(stderr, " -h, -? print this help message\n");
}
int main(int argc, char **argv)
{
unsigned long pgsize, hpgsize, pgprot, claddr, clsize, clcount, mflags;
char *tokens[] = { "0", "1", "2", "3", NULL };
int *fd, nevents, plm, idx, opt, i, j;
volatile unsigned int *addr, npages;
struct event_data *evcount;
char *subopts, *value;
plm = PFM_PLM3;
mflags = MAP_PRIVATE | MAP_ANONYMOUS;
while ((opt = getopt(argc, argv, "HLp:h?")) != -1) {
switch (opt) {
case 'H':
mflags |= MAP_HUGETLB;
break;
case 'L':
mflags |= MAP_LOCKED;
break;
case 'p':
subopts = optarg;
while (*subopts != '\0') {
switch (getsubopt(&subopts, tokens, &value)) {
case 0:
plm |= PFM_PLM0;
break;
case 1:
plm |= PFM_PLM1;
break;
case 2:
plm |= PFM_PLM2;
break;
case 3:
plm |= PFM_PLM3;
break;
default:
usage(argv[0]);
exit(EXIT_FAILURE);
}
}
break;
case 'h':
case '?':
default:
usage(argv[0]);
exit(EXIT_FAILURE);
}
}
pfm_initialize();
pgsize = getpagesize();
hpgsize = gethugepagesize();
npages = hpgsize / pgsize;
pgprot = PROT_READ | PROT_WRITE;
clsize = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
clcount = npages * pgsize / clsize;
nevents = sizeof(evname) / sizeof(char *);
printf("pid = %d, page size = %lu, hugepage size = %lu, "
"L1 dcache line size = %lu, events = %d\n",
getpid(), pgsize, hpgsize, clsize, nevents);
fd = (int *) malloc(nevents * sizeof(int));
evcount = (struct event_data *) malloc(nevents * sizeof(struct event_data));
assert(fd != NULL && evcount != NULL);
memset(evcount, 0, nevents * sizeof(evcount[i]));
for (i = 0; i < nevents; i++) {
fd[i] = perf_event_prepare(evname[i], plm, i > 0 ? fd[0] : -1);
evcount[i].min = ULONG_MAX;
evcount[i].max = 0;
}
addr = (unsigned int *) mmap(NULL, npages * pgsize, pgprot, mflags, -1, 0);
assert(addr != MAP_FAILED);
pageinfo((void *) addr);
for (i = 0; i < clcount; i++) {
idx = i * clsize / sizeof(unsigned int);
claddr = ((unsigned long) &addr[idx]) & ~(clsize - 1UL);
perf_event_reset(fd[0]);
perf_event_enable(fd[0]);
loadflush((volatile void *) claddr);
perf_event_disable(fd[0]);
for (j = 0; j < nevents; j++)
perf_event_read(fd[j], &evcount[j]);
}
for (j = 0; j < nevents; j++)
printf("%32s : total = %-10ld \tmin = %-10ld \tmax = %-10ld \tavg = %-10ld\n",
evname[j],
evcount[j].total,
evcount[j].min, evcount[j].max,
evcount[j].total / clcount);
munmap((void *) addr, npages * pgsize);
free(evcount);
free(fd);
return EXIT_SUCCESS;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment