Skip to content

Instantly share code, notes, and snippets.

@mmozeiko
Last active April 2, 2024 08:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save mmozeiko/98bb947fb5a9d5b8a695adf503308a58 to your computer and use it in GitHub Desktop.
Save mmozeiko/98bb947fb5a9d5b8a695adf503308a58 to your computer and use it in GitHub Desktop.
armv8 timer & cycle counter
#pragma once
#define _GNU_SOURCE
#include <stdint.h>
#include <stdbool.h>
#include <sched.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <linux/perf_event.h>
// fixed frequency counter, always available
static inline uint64_t armv8_cntvct(void)
{
uint64_t r;
__asm__ __volatile__("mrs %0, cntvct_el0" : "=r"(r));
return r;
}
static inline uint64_t armv8_cntfrq(void)
{
uint64_t r;
__asm__ __volatile__("mrs %0, cntfrq_el0" : "=r"(r));
return r;
}
// performance monitor cycle counter, pinned to one core
// requires extra setup - read the comments below
static int armv8_perf_fd;
static cpu_set_t armv8_perf_mask;
static inline bool armv8_perf_init(void)
{
sched_getaffinity(0, sizeof(cpu_set_t), &armv8_perf_mask);
int core = sched_getcpu();
cpu_set_t set;
CPU_ZERO(&set);
CPU_SET(core, &set);
sched_setaffinity(0, sizeof(cpu_set_t), &set);
struct perf_event_attr attr =
{
.size = sizeof(attr),
.type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_CPU_CYCLES,
.config1 = 1 | 2, // 1=64-bit counters, 2=allow user access
.pinned = 1,
};
int fd = syscall(__NR_perf_event_open, &attr, 0, core, -1, 0);
if (fd < 0)
{
// perf not enabled in kernel, or perf requires root
// to allow non-root access, run the following:
// echo 1 | sudo tee /proc/sys/kernel/perf_event_paranoid
return false;
}
uint64_t r;
__asm__ __volatile__("mrs %0, pmuserenr_el0" : "=r"(r));
if (!(r & 4))
{
// PMU not allowed for user-space access, to allow run this:
// echo 1 | sudo tee /proc/sys/kernel/perf_user_access
close(fd);
return false;
}
armv8_perf_fd = fd;
return true;
}
static inline void armv8_perf_done(void)
{
close(armv8_perf_fd);
sched_setaffinity(0, sizeof(cpu_set_t), &armv8_perf_mask);
}
// call only armv8_perf_init() if returned true, otherwise SIGILL will be raised
static inline uint64_t armv8_pmccntr(void)
{
uint64_t r;
__asm__ __volatile__("mrs %0, pmccntr_el0" : "=r"(r));
return r;
}
#include "armv8_tsc.h"
#include <stdio.h>
static void loop()
{
for (int i=0; i<1000000; i++)
{
__asm__ __volatile__("");
}
}
int main()
{
uint64_t ticks0 = armv8_cntvct();
loop();
uint64_t ticks1 = armv8_cntvct();
uint64_t freq = armv8_cntfrq();
printf("cntvct : %zu ticks @ %zu MHz = %.2f msec\n",
(size_t)(ticks1 - ticks0),
(size_t)(freq / 1000000),
(ticks1 - ticks0) * 1000.0 / freq);
if (armv8_perf_init(0))
{
uint64_t cycles0 = armv8_pmccntr();
loop();
uint64_t cycles1 = armv8_pmccntr();
printf("pmccntr: %zu cycles\n", (size_t)(cycles1 - cycles0));
armv8_perf_done();
}
else
{
printf("perf not available! not enough privilges?\n");
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment