Skip to content

Instantly share code, notes, and snippets.

@dmbfm
Last active December 13, 2021 04:07
Show Gist options
  • Save dmbfm/0c7e868624a3f8e390348b9f2cf3d7a7 to your computer and use it in GitHub Desktop.
Save dmbfm/0c7e868624a3f8e390348b9f2cf3d7a7 to your computer and use it in GitHub Desktop.
Performance counters on apple M1 processors using Zig!
const std = @import("std");
// Sources:
// - https://github.com/lemire/Code-used-on-Daniel-Lemire-s-blog/blob/master/2021/03/24/m1cycles.cpp
// - https://gitea.com/matteyeux/darwin-xnu/src/branch/master/osfmk/kperf
// - https://patchwork.ffmpeg.org/project/ffmpeg/patch/20210413004523.6500-1-josh@itanimul.li/
// - https://github.com/GMUCERG/PQC_NEON/blob/main/neon/kyber/m1cycles.c
const c = @cImport({
@cInclude("pthread.h");
});
const DynLib = std.DynLib;
const TypeInfo = std.builtin.TypeInfo;
const dlopen = std.c.dlopen;
const dlsym = std.c.dlsym;
const dlclose = std.c.dlclose;
const KpcClass = struct {
const Fixed = 0;
const Configurable = 1;
const Power = 2;
const RawPmu = 3;
const FixedMask = 1 << Fixed;
const ConfigurableMask = 1 << Configurable;
const PowerMask = 1 << Power;
const RawPmuMask = 1 << RawPmu;
};
const Cpmu = struct {
const NONE = 0;
const CORE_CYCLE = 0x02;
const INST_A64 = 0x8c;
const INST_BRANCH = 0x8d;
const SYNC_DC_LOAD_MISS = 0xbf;
const SYNC_DC_STORE_MISS = 0xc0;
const SYNC_DTLB_MISS = 0xc1;
const SYNC_ST_HIT_YNGR_LD = 0xc4;
const SYNC_BR_ANY_MISP = 0xcb;
const FED_IC_MISS_DEM = 0xd3;
const FED_ITLB_MISS = 0xd4;
};
const CfgWord = struct {
const EL0A32EN_MASK = 0x10000;
const EL0A64EN_MASK = 0x20000;
const EL1EN_MASK = 0x40000;
const EL3EN_MASK = 0x80000;
const ALLMODES_MASK = 0xf0000;
};
const KpcMask = KpcClass.ConfigurableMask | KpcClass.FixedMask;
const CountersCount = 10;
const ConfigCount = 8;
const KpcGetCounterCountFn = fn (u32) u32;
const KpcGetConfigCountFn = fn (u32) u32;
const KpcGetCountingFn = fn () c_int;
const KpcForceAllCtrsSetFn = fn (c_int) c_int;
const KpcSetCountingFn = fn (u32) c_int;
const KpcSetThreadCountingFn = fn (u32) c_int;
const KpcSetConfigFn = fn (u32, u64) c_int;
const KpcGetConfigFn = fn (u32, *c_void) c_int;
const KpcSetPeriodFn = fn (u32, *c_void) c_int;
const KpcGetPeriodFn = fn (u32, *c_void) c_int;
const KpcPerfSampleGetFn = fn (*c_int) c_int;
const KpcGetThreadCountersFn = fn (c_int, c_uint, u64) c_int;
const kpc_functions_map = [_]struct { name: [:0]const u8, type: type }{
.{ .name = "kpc_get_counter_count", .type = KpcGetCounterCountFn },
.{ .name = "kpc_get_config_count", .type = KpcGetCounterCountFn },
.{ .name = "kpc_get_counting", .type = KpcGetCountingFn },
.{ .name = "kpc_force_all_ctrs_set", .type = KpcForceAllCtrsSetFn },
.{ .name = "kpc_set_counting", .type = KpcSetCountingFn },
.{ .name = "kpc_set_thread_counting", .type = KpcSetThreadCountingFn },
.{ .name = "kpc_set_config", .type = KpcSetConfigFn },
.{ .name = "kpc_get_config", .type = KpcGetConfigFn },
.{ .name = "kpc_set_period", .type = KpcSetPeriodFn },
.{ .name = "kpc_get_period", .type = KpcGetPeriodFn },
.{ .name = "kperf_sample_get", .type = KpcForceAllCtrsSetFn },
.{ .name = "kpc_get_thread_counters", .type = KpcGetThreadCountersFn },
};
var kpc_config: [CountersCount]u64 = [_]u64{
Cpmu.CORE_CYCLE | CfgWord.EL0A64EN_MASK,
0,
0,
Cpmu.INST_BRANCH | CfgWord.EL0A64EN_MASK,
Cpmu.SYNC_BR_ANY_MISP | CfgWord.EL0A64EN_MASK,
Cpmu.INST_A64 | CfgWord.EL0A64EN_MASK,
0,
0,
0,
0,
};
var kpc_counters: [CountersCount]u64 = undefined;
// TODO: Report this bug. When I use this as a global I get a compiler crash:
// Assertion failed at /Users/daniel/Sources/zig/src/stage1/analyze.cpp:8753 in resolve_llvm_types_struct. This is a bug in the Zig compiler.thread 6387290 panic:
//
// That is why I use the KpcApiWrapper struct instead.
//
// var api: KpcApi = undefined;
const KpcApiWrapper = struct { api: KpcApi };
var kpc_api: KpcApiWrapper = undefined;
/// Generates the KpcApi struct based on the contents of the `kpc_functions_map` array.
/// This isn't really necessary, I did this mainly to learn to generate a struct at comptime with Zig.
const KpcApi = kpc_api_blk: {
const fields = blk: {
const N = @typeInfo(@TypeOf(kpc_functions_map)).Array.len;
var result: [N]TypeInfo.StructField = undefined;
inline for (kpc_functions_map) |item, i| {
result[i] = .{
.name = item.name,
.field_type = item.type,
.default_value = null,
.is_comptime = false,
.alignment = @alignOf(item.type),
};
}
break :blk result;
};
const type_info = blk: {
break :blk std.builtin.TypeInfo{
.Struct = .{
.layout = .Auto,
.fields = &fields,
.decls = &[_]std.builtin.TypeInfo.Declaration{},
.is_tuple = false,
},
};
};
break :kpc_api_blk @Type(type_info);
};
pub fn initRdtsc() !void {
var kperf = try std.DynLib.open("/System/Library/PrivateFrameworks/kperf.framework/Versions/A/kperf");
var api: KpcApi = undefined;
inline for (kpc_functions_map) |item| {
@field(api, item.name) = kperf.lookup(item.type, item.name).?;
std.log.info("{s}: {}", .{ item.name, @field(api, item.name) });
}
kpc_api.api = api;
if (api.kpc_get_counter_count(KpcMask) != CountersCount) {
return error.WrongCounterCount;
}
if (api.kpc_get_config_count(KpcMask) != ConfigCount) {
return error.WrongConfigCount;
}
if (api.kpc_set_config(KpcMask, @ptrToInt(&kpc_config[0])) != 0) {
return error.KpcSetConfigFailed;
}
if (api.kpc_force_all_ctrs_set(1) != 0) {
return error.KpcForceAllCtrsSetFailed;
}
if (api.kpc_set_counting(KpcMask) != 0) {
return error.KpcSetCountingFailed;
}
if (api.kpc_set_thread_counting(KpcMask) != 0) {
return error.KpcSetThreadCountingFailed;
}
}
pub fn setupPeformanceCounters() !void {
_ = c.pthread_set_qos_class_self_np(c.QOS_CLASS_USER_INTERACTIVE, 0);
try initRdtsc();
}
const PerformanceCounters = struct {
cycles: u64,
branches: u64,
missed_branches: u64,
instructions: u64,
};
pub fn getCounters() !PerformanceCounters {
var api = kpc_api.api;
if (api.kpc_get_thread_counters(0, CountersCount, @ptrToInt(&kpc_counters)) != 0) {
return error.KpcGetThreadCountersError;
}
return PerformanceCounters{
.cycles = kpc_counters[0 + 2],
.branches = kpc_counters[3 + 2],
.missed_branches = kpc_counters[4 + 2],
.instructions = kpc_counters[5 + 2],
};
}
const stdout = std.io.getStdOut().writer();
pub fn main() !void {
try setupPeformanceCounters();
var counters = try getCounters();
try stdout.print("Before: {}\n", .{counters});
var random = std.rand.DefaultPrng.init(0).random();
var s: u64 = 0;
var i: usize = 0;
while (i < 100_000) : (i += 1) {
s += random.intRangeAtMost(u64, 0, 10000);
}
var next = try getCounters();
try stdout.print("After: {}\n", .{next});
var delta = next.cycles - counters.cycles;
try stdout.print("{} cyles, {d:.2} cycles/iteration\n", .{ delta, @intToFloat(f32, delta) / 100_000.0 });
}
@dmbfm
Copy link
Author

dmbfm commented Dec 13, 2021

Just a note: you must run this with sudo to work!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment