Last active
December 6, 2024 23:17
-
-
Save iximeow/62c967d2fad28d70aa333f8310bfac4c to your computer and use it in GitHub Desktop.
msr read characteristics
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//! # hey! you're curious about the characteristics of reading various MSRs? me too! | |
//! | |
//! this reads a few registers (TSC, AMD RAPL core and package power) 2M times and reports a bit | |
//! about the adventure. you need root wherever you run it. runs on linux (need the `msr` module | |
//! loaded) or illumos. | |
//! | |
//! you probably want to bind this to a single CPU when run. if you don't, it might work | |
//! reasonably, or if it's task-switched between cores while running, core-specific MSRs (RAPL core | |
//! power) will have nonsense skew. | |
//! | |
//! on linux that's something like `sudo taskset -c 0 ./msr_peek` | |
//! on illumos that's something like `pfexec pbind -e 0 ./msr_peek` | |
//! | |
//! this is written to be buildable as a standalone .rs. you can build it with cargo, if you want. | |
//! but you can also `rustc -C opt-level=3 msr_peek.rs`. | |
//! | |
//! # results | |
//! | |
//! ## 3950x/linux | |
//! | |
//! [13:54:06] # iximeow:~> sudo taskset -c 0 ./msr_peek | |
//! [sudo] password for iximeow: | |
//! read 2000000 samples in 7.683337ms (3ns each) | |
//! /!\ MSR 0x000010, TSC /!\ | |
//! read 2000000 samples in 435.246707ms (217ns each) | |
//! changed by 3495367.75 per ms (total diff: 1520484945) | |
//! averaged 1.00 consecutive read per change, for 217ns per change | |
//! average change per tick: 760.24 | |
//! /!\ MSR 0xc001029a, RAPL Core Power /!\ | |
//! read 2000000 samples in 484.547539ms (242ns each) | |
//! changed by 510.72 per ms (total diff: 247189) | |
//! averaged 4123.71 consecutive reads per change, for 997766ns per change | |
//! average change per tick: 510.72 | |
//! /!\ MSR 0xc001029b, RAPL Package Power /!\ | |
//! read 2000000 samples in 558.854677ms (279ns each) | |
//! changed by 3413.75 per ms (total diff: 1904871) | |
//! averaged 3577.82 consecutive reads per change, for 997983ns per change | |
//! average change per tick: 3413.75 | |
//! | |
//! ## 7950x/illumos | |
//! | |
//! root@helios:/rpool/devel/oxide/msr_peek# pbind -e 0 ./target/release/msr_peek | |
//! read 2000000 samples in 8.852352ms (4ns each) | |
//! /!\ MSR 0x000010, TSC /!\ | |
//! read 2000000 samples in 716.010781ms (358ns each) | |
//! changed by 4499621.50 per ms (total diff: 3221729010) | |
//! averaged 1.00 consecutive read per change, for 358ns per change | |
//! average change per tick: 1610.87 | |
//! /!\ MSR 0xc001029a, RAPL Core Power /!\ | |
//! read 2000000 samples in 939.279162ms (469ns each) | |
//! changed by 10083.45 per ms (total diff: 9468358) | |
//! averaged 2127.66 consecutive reads per change, for 997563ns per change | |
//! average change per tick: 10083.45 | |
//! /!\ MSR 0xc001029b, RAPL Package Power /!\ | |
//! read 2000000 samples in 851.845292ms (425ns each) | |
//! changed by 3044.61 per ms (total diff: 2590965) | |
//! averaged 2344.67 consecutive reads per change, for 996200ns per change | |
//! average change per tick: 3041.04 | |
//! | |
//! ## 7713p/illumos | |
//! | |
//! # pbind -e 0 ./msr_peek | |
//! read 2000000 samples in 11.602298ms (5ns each) | |
//! /!\ MSR 0x000010, TSC /!\ | |
//! read 2000000 samples in 942.356169ms (471ns each) | |
//! changed by 1996969.00 per ms (total diff: 1881144860) | |
//! averaged 1.00 consecutive read per change, for 471ns per change | |
//! average change per tick: 940.57 | |
//! /!\ MSR 0xc001029a, RAPL Core Power /!\ | |
//! read 2000000 samples in 1.040690635s (520ns each) | |
//! changed by 209.42 per ms (total diff: 217794) | |
//! averaged 1924.93 consecutive reads per change, for 1000480ns per change | |
//! average change per tick: 209.82 | |
//! /!\ MSR 0xc001029b, RAPL Package Power /!\ | |
//! read 2000000 samples in 1.167764682s (583ns each) | |
//! changed by 5661.90 per ms (total diff: 6607439) | |
//! averaged 1715.27 consecutive reads per change, for 999845ns per change | |
//! average change per tick: 5671.62 | |
//! | |
//! # remarks | |
//! | |
//! TSC counts at just about the core frequency. no surprise here. TSC updates on every read, | |
//! that's minorly interesting! | |
//! | |
//! in both cases reads go though some layers of abstraction to actually issue an `rdmsr`, so the | |
//! floor here is not reflective of the hardware's responsiveness to `rdmsr`. it's a syscall, | |
//! virtual filesystem translation, indirect calls therein, an rdmsr, then any syscall exit | |
//! overhead w.r.t uarch state flushing for the kernel/user boundary. a TSC read is pretty fast | |
//! though, and changes between that and other MSRs are probably actually due to the MSR choice. | |
//! | |
//! yes, on a 7950x the package power MSR is faster to read than the core power MSR. surprise to | |
//! me! | |
//! | |
//! RAPL registers update about once a millisecond. this is consistent with advice that reads of | |
//! RAPL registers must ensure that measurements are taken more than 1ms apart so as to be | |
//! meaningful. | |
//! | |
//! RAPL registers count in units of 1/65536 joules. the core being used here is under ~100% load, | |
//! so one can expect it to be in its highest power state. RAPL-reported draw on: | |
//! * 3950x: 7.79W core/52.1W package. this system was otherwise busy, other cores active, etc. | |
//! * 7950x: 153.86W core/46.4W package. this system was otherwise idle. | |
//! - note core and package power are measured in separate steps. this measurement suggests | |
//! that measuring package power momentarily interferes with turbo, and is preventing the | |
//! busy core from being kept in its highest-power state. | |
//! * 7713p: 3.2W core/86.5W package. this system was otherwise idle. | |
//! - server cores don't get clocked terribly high even under load. not too surprising here. | |
use std::fmt::Write; | |
use std::os::fd::IntoRawFd; | |
use std::os::fd::RawFd; | |
use std::time::Duration; | |
// 2M samples. reading in a tight loop this might take a second or two if we're looking at a full | |
// microsecond per read. it's an ioctl though, so, i guess we'll see what we see. | |
const SAMPLE_COUNT: usize = 1000 * 1000 * 2; | |
#[derive(Copy, Clone)] | |
struct MsrDesc { | |
msr_nr: u64, | |
name: &'static str, | |
} | |
struct MsrCtx { | |
msr_fd: RawFd, | |
} | |
impl MsrCtx { | |
fn new() -> Self { | |
#[cfg(target_os = "illumos")] | |
{ | |
// happens to be "self" as time of query. if we get switched to another CPU, we'll be | |
// checking someone else's MSRs with IPIs. sppoky!!! pbind to a single cpu for best | |
// results. | |
let cpu_file = std::fs::File::open("/dev/cpu/self/cpuid").expect("can open cpuid fd"); | |
return Self { | |
msr_fd: cpu_file.into_raw_fd(), | |
}; | |
}; | |
#[cfg(target_os = "linux")] | |
{ | |
// arbitrarily pick cpu 0. run on CPU 0 for best results. | |
let cpu_file = std::fs::File::open("/dev/cpu/0/msr").expect("can open msr fd"); | |
return Self { | |
msr_fd: cpu_file.into_raw_fd(), | |
}; | |
}; | |
#[cfg(not(any(target_os = "illumos", target_os = "linux")))] | |
panic!("unsupported target OS"); | |
} | |
#[inline(always)] | |
fn rdmsr(&self, msr: u64) -> u64 { | |
#[cfg(target_os = "illumos")] | |
{ | |
// trusting the system can provide an ioctl here instead of depending on libc and | |
// requiring cargo to build this file... | |
extern "C" { | |
// `request` is an `int` on illumos, `unsigned long` on linux. | |
fn ioctl( | |
fd: std::os::fd::RawFd, | |
op: std::ffi::c_int, | |
param: *mut std::ffi::c_void, | |
) -> u64; | |
} | |
const CPUID_IOC: i32 = | |
((b'c' as i32) << 24) | ((b'i' as i32) << 16) | ((b'd' as i32) << 8); | |
const CPUID_RDMSR: i32 = CPUID_IOC | 1; | |
#[repr(C)] | |
struct CpuidRdmsr { | |
msr_nr: u64, | |
msr_val: u64, | |
} | |
let mut crm: CpuidRdmsr = CpuidRdmsr { | |
msr_nr: msr, | |
msr_val: 0, | |
}; | |
let res = unsafe { | |
ioctl( | |
self.msr_fd, | |
CPUID_RDMSR, | |
&mut crm as *mut CpuidRdmsr as *mut std::ffi::c_void, | |
) | |
}; | |
assert_eq!(res, 0); | |
return crm.msr_val as u64; | |
} | |
#[cfg(target_os = "linux")] | |
{ | |
extern "C" { | |
fn pread( | |
fd: std::os::fd::RawFd, | |
buf: *mut std::ffi::c_void, | |
count: isize, | |
offset: u64, | |
) -> isize; | |
} | |
let mut v: u64 = 0; | |
let res = unsafe { | |
pread( | |
self.msr_fd, | |
&mut v as *mut u64 as *mut [u8; 8] as *mut std::ffi::c_void, | |
8, | |
msr, | |
) | |
}; | |
assert_eq!(res, 8); | |
return v; | |
} | |
#[cfg(not(any(target_os = "illumos", target_os = "linux")))] | |
panic!("unsupported target OS"); | |
} | |
} | |
const TSC: MsrDesc = MsrDesc { | |
msr_nr: 0x00000010, | |
name: "TSC", | |
}; | |
const CORE_POWER: MsrDesc = MsrDesc { | |
msr_nr: 0xC001_029A, | |
name: "RAPL Core Power", | |
}; | |
const PACKAGE_POWER: MsrDesc = MsrDesc { | |
msr_nr: 0xC001_029B, | |
name: "RAPL Package Power", | |
}; | |
fn sample_msr(msr_ctx: &MsrCtx, msr: MsrDesc, samples: usize) -> (Vec<u64>, Duration) { | |
let mut buf = Vec::with_capacity(samples); | |
let start = std::time::Instant::now(); | |
for _ in 0..samples { | |
buf.push(msr_ctx.rdmsr(msr.msr_nr)); | |
} | |
(buf, start.elapsed()) | |
} | |
fn no_op(_msr_ctx: &MsrCtx, msr: MsrDesc, samples: usize) -> (Vec<u64>, Duration) { | |
let mut buf = Vec::with_capacity(samples); | |
let start = std::time::Instant::now(); | |
for _ in 0..samples { | |
buf.push(msr.msr_nr); | |
} | |
(buf, start.elapsed()) | |
} | |
// in case you want to write out all the samples from an MSR... | |
#[allow(dead_code)] | |
fn stringify(samples: Vec<u64>) -> String { | |
let mut out = String::new(); | |
for sample in samples.iter() { | |
writeln!(out, "{}", sample).expect("works"); | |
} | |
out | |
} | |
fn show_stats(msr: MsrDesc, samples: Vec<u64>, duration: Duration) { | |
println!("/!\\ MSR {:#08x}, {} /!\\", msr.msr_nr, msr.name); | |
let time_per_sample = duration / samples.len() as u32; | |
println!( | |
"read {} samples in {:?} ({:?} each)", | |
samples.len(), | |
duration, | |
time_per_sample | |
); | |
let first_last_diff = samples[samples.len() - 1] - samples[0]; | |
let diff_per_ms = (first_last_diff as f32) / (duration.as_millis() as f32); | |
println!( | |
"changed by {:.2} per ms (total diff: {})", | |
diff_per_ms, first_last_diff | |
); | |
let min = samples.iter().min().expect("has a min"); | |
let max = samples.iter().max().expect("has a max"); | |
if max - min != first_last_diff { | |
eprintln!("! first was not min or last was not max (wraparound while running?)"); | |
} | |
// consecutive reads may return the same value. some MSRs note that reads must be spaced more | |
// than 1ms apart to compute us / reads_per_tick.len()ful deltas. validate that here. | |
let mut reads_per_tick = Vec::new(); | |
let mut diff_per_tick = Vec::new(); | |
let mut sample_iter = samples.iter(); | |
let mut prev = sample_iter.next().expect("has a sample"); | |
let mut reads_this_tick = 1; | |
for sample in sample_iter { | |
if sample == prev { | |
reads_this_tick += 1; | |
} else { | |
if sample > prev { | |
diff_per_tick.push((sample - prev) as f64); | |
} | |
reads_per_tick.push(reads_this_tick as f64); | |
reads_this_tick = 1; | |
} | |
prev = sample; | |
} | |
reads_per_tick.push(reads_this_tick as f64); | |
let avg_reads_per_tick = reads_per_tick.iter().sum::<f64>() / reads_per_tick.len() as f64; | |
println!( | |
"averaged {:0.2} consecutive {} per change, for {:?}ns per change", | |
avg_reads_per_tick, | |
if avg_reads_per_tick == 1.0 { | |
"read" | |
} else { | |
"reads" | |
}, | |
(time_per_sample * avg_reads_per_tick as u32).as_nanos() | |
); | |
println!( | |
"average change per tick: {:0.2}", | |
diff_per_tick.iter().sum::<f64>() / diff_per_tick.len() as f64 | |
); | |
} | |
pub fn main() { | |
let msr_ctx = MsrCtx::new(); | |
let (no_op_samples, no_op_duration) = no_op(&msr_ctx, TSC, SAMPLE_COUNT); | |
println!( | |
"read {} samples in {:?} ({:?} each)", | |
no_op_samples.len(), | |
no_op_duration, | |
no_op_duration / no_op_samples.len() as u32 | |
); | |
let (tsc_samples, tsc_duration) = sample_msr(&msr_ctx, TSC, SAMPLE_COUNT); | |
show_stats(TSC, tsc_samples, tsc_duration); | |
let (samples, duration) = sample_msr(&msr_ctx, CORE_POWER, SAMPLE_COUNT); | |
show_stats(CORE_POWER, samples, duration); | |
let (samples, duration) = sample_msr(&msr_ctx, PACKAGE_POWER, SAMPLE_COUNT); | |
show_stats(PACKAGE_POWER, samples, duration); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment