Skip to content

Instantly share code, notes, and snippets.

@lifthrasiir
Last active December 10, 2022 21:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lifthrasiir/393ffb3e9900709fa2e3ae2a540b635f to your computer and use it in GitHub Desktop.
Save lifthrasiir/393ffb3e9900709fa2e3ae2a540b635f to your computer and use it in GitHub Desktop.
Precision wall clock benchmark in Rust
target
Cargo.lock

Example Results

Linux 3.13.0 x86-64:

test bench_cgt_clock             ... [example: (1476013433, 628525902)] bench:          48 ns/iter (+/- 0)
test bench_cgt_coarse_clock      ... [example: (1476013437, 455553608)] bench:           8 ns/iter (+/- 0)
test bench_gtod_clock            ... [example: (1476013443, 428944000)] bench:          48 ns/iter (+/- 0)
test bench_vdso_cgt_clock        ... [example: (1476013446, 675103021)] bench:          47 ns/iter (+/- 0)
test bench_vdso_cgt_coarse_clock ... [example: (1476013449, 799553818)] bench:           5 ns/iter (+/- 0)
test bench_vdso_gtod_clock       ... [example: (1476013453, 169117000)] bench:          48 ns/iter (+/- 0)
test bench_x_system_time         ... [example: (1476013456, 837792207)] bench:          58 ns/iter (+/- 0)

macOS 10.11 x86-64:

test bench_gtod_clock  ... [example: (1475435769, 959170000)] bench:          52 ns/iter (+/- 21)
test bench_mach_clock  ... [example: (1475435770, 297107000)] bench:       1,056 ns/iter (+/- 358)
test bench_system_time ... [example: (1475435774, 387928000)] bench:          54 ns/iter (+/- 12)

See also:

[package]
authors = ["Kang Seonghoon <public+git@mearie.org>"]
name = "system-time-bench"
version = "0.0.0"
[lib]
path = "lib.rs"
[dependencies]
libc = "0.2.16"
[profile.bench]
opt-level = 3
lto = true
#![feature(test, asm)]
#![allow(non_camel_case_types)]
extern crate libc;
extern crate test;
use std::mem;
use std::ptr;
use std::sync::{Once, ONCE_INIT};
use std::time::{SystemTime, UNIX_EPOCH};
pub fn black_box<T>(dummy: T) {
// we need to "use" the argument in some way LLVM can't
// introspect.
unsafe { asm!("" : : "m"(&dummy)) }
}
/// Clock based on `gettimeofday`.
/// Precision: 1us; Performance: <100ns/call
pub struct GtodClock;
impl GtodClock {
pub fn get(&self) -> (i64, u32) {
unsafe {
let mut timeval: libc::timeval = mem::uninitialized();
let ret = libc::gettimeofday(&mut timeval, ptr::null_mut());
if ret != 0 { panic!("gettimeofday failed"); }
(timeval.tv_sec as i64, timeval.tv_usec as u32 * 1000)
}
}
}
/// Clock based on `clock_gettime` in Linux.
/// Precision: ~1ns (interpolated); Performance: <100ns/call
#[cfg(target_os = "linux")]
pub struct CgtClock;
#[cfg(target_os = "linux")]
impl CgtClock {
pub fn get(&self) -> (i64, u32) {
unsafe {
let mut timespec: libc::timespec = mem::uninitialized();
let ret = libc::clock_gettime(libc::CLOCK_REALTIME, &mut timespec);
if ret != 0 { panic!("clock_gettime failed"); }
(timespec.tv_sec as i64, timespec.tv_nsec as u32)
}
}
}
/// Clock based on `clock_gettime` with a coarse request in Linux.
/// Precision: 1~10ms depending on kernel config; Performance: ~10ns/call
#[cfg(target_os = "linux")]
pub struct CgtCoarseClock;
#[cfg(target_os = "linux")]
impl CgtCoarseClock {
pub fn get(&self) -> (i64, u32) {
unsafe {
let mut timespec: libc::timespec = mem::uninitialized();
let ret = libc::clock_gettime(libc::CLOCK_REALTIME_COARSE, &mut timespec);
if ret != 0 { panic!("clock_gettime failed"); }
(timespec.tv_sec as i64, timespec.tv_nsec as u32)
}
}
}
#[cfg(target_os = "linux")]
mod vdso {
use std::mem;
use std::ptr;
use std::ffi::CStr;
use libc;
// this is theoretically platform-dependent but glibc essentially fixes it (thank you!)
const AT_SYSINFO_EHDR: libc::c_ulong = 33;
type ElfUsize = libc::uintptr_t;
type ElfIsize = libc::intptr_t;
#[cfg(target_pointer_width = "32")] const ELFCLASS: u8 = 1;
#[cfg(target_pointer_width = "64")] const ELFCLASS: u8 = 2;
#[cfg(target_endian = "little")] const ELFDATA: u8 = 1;
#[cfg(target_endian = "big")] const ELFDATA: u8 = 2;
const ELFIDENT: [u8; 6] = [0x7f, b'E', b'L', b'F', ELFCLASS, ELFDATA];
const PT_LOAD: u32 = 1;
const PT_DYNAMIC: u32 = 2;
const DT_NULL: ElfIsize = 0;
const DT_HASH: ElfIsize = 4;
const DT_STRTAB: ElfIsize = 5;
const DT_SYMTAB: ElfIsize = 6;
const DT_VERSYM: ElfIsize = 0x6ffffff0;
const DT_VERDEF: ElfIsize = 0x6ffffffc;
const STT_FUNC: u8 = 2;
const STB_GLOBAL: u8 = 1;
const STB_WEAK: u8 = 2;
const SHN_UNDEF: u16 = 0;
const VER_FLG_BASE: u16 = 0x1;
#[repr(C)]
struct ElfEhdr {
e_ident: [u8; 16],
e_type: u16,
e_machine: u16,
e_version: u32,
e_entry: ElfUsize,
e_phoff: ElfIsize,
e_shoff: ElfIsize,
e_flags: u32,
e_ehsize: u16,
e_phentsize: u16,
e_phnum: u16,
e_shentsize: u16,
e_shnum: u16,
e_shstrndx: u16,
}
#[repr(C)]
struct ElfPhdr {
p_type: u32,
#[cfg(target_pointer_width = "64")] p_flags: u32,
p_offset: ElfIsize,
p_vaddr: ElfUsize,
p_paddr: ElfUsize,
p_filesz: ElfUsize,
p_memsz: ElfUsize,
#[cfg(target_pointer_width = "32")] p_flags: u32,
p_align: ElfUsize,
}
#[repr(C)]
struct ElfDyn {
d_tag: ElfIsize,
d_ptr: ElfIsize, // overlaps with d_val, which we never use
}
#[repr(C)]
struct ElfSym {
st_name: u32,
#[cfg(target_pointer_width = "32")] st_value: ElfUsize,
#[cfg(target_pointer_width = "32")] st_size: ElfUsize,
st_info: u8,
st_other: u8,
st_shndx: u16,
#[cfg(target_pointer_width = "64")] st_value: ElfUsize,
#[cfg(target_pointer_width = "64")] st_size: ElfUsize,
}
type ElfVersym = u16;
#[repr(C)]
struct ElfVerDef {
vd_version: u16,
vd_flags: u16,
vd_ndx: u16,
vd_cnt: u16,
vd_hash: u32,
vd_aux: u32,
vd_next: u32,
}
#[repr(C)]
struct ElfVerdaux {
vda_name: u32,
vda_next: u32,
}
extern "system" {
fn getauxval(type_: libc::c_ulong) -> libc::c_ulong;
}
macro_rules! try_opt {
($e:expr) => (
match $e { Some(v) => v, None => return None }
)
}
macro_rules! set {
($x:ident <- $e:expr) => ({
if $x.is_some() { return None; }
$x = Some($e);
})
}
// partially adapted from linux /Documentation/vDSO/parse_vdso.c
unsafe fn vdso_sym(name: &[u8], version: &[u8]) -> Option<*const libc::c_void> {
// the auxv is passed through the main stack, which has to be retrieved by libc.
// if vDSO is supported this particular auxv should be non-zero.
let base = getauxval(AT_SYSINFO_EHDR) as ElfIsize;
if base == 0 { return None; }
// ensure if the result is indeed an ELF header, just in case.
let ehdr: *const ElfEhdr = mem::transmute(base);
if (*ehdr).e_ident[..6] != ELFIDENT { return None; }
// extract the load offset and the offset to dynamic linking information.
let mut phdrbase = base + (*ehdr).e_phoff;
let mut load_offset = None;
let mut dyn = None;
for _ in 0..(*ehdr).e_phnum {
let phdr: *const ElfPhdr = mem::transmute(phdrbase);
match (*phdr).p_type {
PT_LOAD => set!(
load_offset <- base + (*phdr).p_offset - (*phdr).p_vaddr as ElfIsize
),
PT_DYNAMIC => set!(dyn <- base + (*phdr).p_offset),
_ => {},
}
phdrbase += (*ehdr).e_phentsize as ElfIsize;
}
let load_offset = try_opt!(load_offset);
let mut dyn: *const ElfDyn = mem::transmute(try_opt!(dyn));
// parse dynamic linking information.
let mut hash = None;
let mut symstrings = None;
let mut symtab = None;
let mut versym = None;
let mut verdef = None;
loop {
match (*dyn).d_tag {
DT_NULL => break,
DT_STRTAB => set!(symstrings <- (*dyn).d_ptr + load_offset),
DT_SYMTAB => set!(symtab <- (*dyn).d_ptr + load_offset),
DT_HASH => set!(hash <- (*dyn).d_ptr + load_offset),
DT_VERSYM => set!(versym <- (*dyn).d_ptr + load_offset),
DT_VERDEF => set!(verdef <- (*dyn).d_ptr + load_offset),
_ => {},
}
dyn = dyn.offset(1);
}
let hash: *const u32 = mem::transmute(try_opt!(hash));
let symstrings: *const libc::c_char = mem::transmute(try_opt!(symstrings));
let symtab: *const ElfSym = mem::transmute(try_opt!(symtab));
let vers: Option<(*const ElfVersym, ElfIsize)> = match (versym, verdef) {
(Some(sym), Some(def)) => Some((mem::transmute(sym), def)),
(_, _) => None,
};
// traverse the symbol table to get the desired symbol with given version.
// this differs from parse_vdso.c in that we don't perform hashing;
// we are doing this only once, so we are fine as long as this is reasonably fast.
let nchain = *hash.offset(1) as isize;
'restart: for chain in 0..nchain {
let sym = symtab.offset(chain);
// only pass a defined global or weak function
let st_type = (*sym).st_info & 0xf;
let st_bind = (*sym).st_info >> 4;
if st_type != STT_FUNC { continue; }
if st_bind != STB_GLOBAL && st_bind != STB_WEAK { continue; }
if (*sym).st_shndx == SHN_UNDEF { continue; }
// filter by the name
let st_name = CStr::from_ptr(symstrings.offset((*sym).st_name as isize));
if st_name.to_bytes() != name { continue; }
// filter by the version if present
if let Some((versym, mut verdef)) = vers {
const MASK: u16 = 0x7fff; // strip the "hidden" bit
let ver = *versym.offset(chain) & MASK;
loop {
let def: *const ElfVerDef = mem::transmute(verdef);
if (*def).vd_flags & VER_FLG_BASE == 0 && (*def).vd_ndx & MASK == ver {
let verdaux: *const ElfVerdaux =
mem::transmute(verdef + (*def).vd_aux as ElfIsize);
let st_vername =
CStr::from_ptr(symstrings.offset((*verdaux).vda_name as isize));
if st_vername.to_bytes() != version { continue 'restart; }
break;
} else if (*def).vd_next == 0 {
continue 'restart;
} else {
verdef += (*def).vd_next as isize;
}
}
}
return Some(mem::transmute(load_offset + (*sym).st_value as ElfIsize));
}
None
}
type VdsoSymbol = Option<(&'static [u8], &'static [u8])>; // name, version
// TODO should really use cfg_if!
#[cfg(any(
target_arch = "arm", target_arch = "armv7", target_arch = "armv7s",
target_arch = "i386", target_arch = "i586", target_arch = "i686", target_arch = "x86_64",
))]
const VDSO_GTOD: VdsoSymbol = Some((b"__vdso_gettimeofday", b"LINUX_2.6"));
#[cfg(any(target_arch = "mips", target_arch = "mipsel"))]
const VDSO_GTOD: VdsoSymbol = Some((b"__kernel_gettimeofday", b"LINUX_2.6"));
#[cfg(target_arch = "aarch64")]
const VDSO_GTOD: VdsoSymbol = Some((b"__kernel_gettimeofday", b"LINUX_2.6.39"));
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64", target_arch = "powerpc64le"))]
const VDSO_GTOD: VdsoSymbol = Some((b"__kernel_gettimeofday", b"LINUX_2.6.15"));
#[cfg(not(any(
target_arch = "arm", target_arch = "armv7", target_arch = "armv7s",
target_arch = "i386", target_arch = "i586", target_arch = "i686", target_arch = "x86_64",
target_arch = "mips", target_arch = "mipsel", target_arch = "aarch64",
target_arch = "powerpc", target_arch = "powerpc64", target_arch = "powerpc64le",
)))]
const VDSO_GTOD: VdsoSymbol = None;
type VdsoGtodFunc = extern "C" fn(*mut libc::timeval, *mut libc::c_void) -> libc::c_int;
/// Clock based on the vDSO `gettimeofday` call in Linux.
/// Precision: 1us; Performance: <100ns/call, very slightly faster than syscall
pub struct VdsoGtodClock {
func: VdsoGtodFunc,
}
impl VdsoGtodClock {
pub fn new() -> Option<VdsoGtodClock> {
let (name, version) = try_opt!(VDSO_GTOD);
unsafe {
let sym = try_opt!(vdso_sym(name, version));
let func: VdsoGtodFunc = mem::transmute(sym);
Some(VdsoGtodClock { func: func })
}
}
#[inline(always)]
pub fn get(&self) -> (i64, u32) {
unsafe {
let mut timeval: libc::timeval = mem::uninitialized();
let ret = (self.func)(&mut timeval, ptr::null_mut());
if ret != 0 { panic!("gettimeofday failed"); }
(timeval.tv_sec as i64, timeval.tv_usec as u32 * 1000)
}
}
}
// TODO should really use cfg_if!
#[cfg(any(
target_arch = "arm", target_arch = "armv7", target_arch = "armv7s",
target_arch = "i386", target_arch = "i586", target_arch = "i686", target_arch = "x86_64",
))]
const VDSO_CGT: VdsoSymbol = Some((b"__vdso_clock_gettime", b"LINUX_2.6"));
#[cfg(any(target_arch = "mips", target_arch = "mipsel"))]
const VDSO_CGT: VdsoSymbol = Some((b"__kernel_clock_gettime", b"LINUX_2.6"));
#[cfg(target_arch = "aarch64")]
const VDSO_CGT: VdsoSymbol = Some((b"__kernel_clock_gettime", b"LINUX_2.6.39"));
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64", target_arch = "powerpc64le"))]
const VDSO_CGT: VdsoSymbol = Some((b"__kernel_clock_gettime", b"LINUX_2.6.15"));
#[cfg(not(any(
target_arch = "arm", target_arch = "armv7", target_arch = "armv7s",
target_arch = "i386", target_arch = "i586", target_arch = "i686", target_arch = "x86_64",
target_arch = "mips", target_arch = "mipsel", target_arch = "aarch64",
target_arch = "powerpc", target_arch = "powerpc64", target_arch = "powerpc64le",
)))]
const VDSO_CGT: VdsoSymbol = None;
type VdsoCgtFunc = extern "C" fn(libc::clockid_t, *mut libc::timespec) -> libc::c_int;
/// Clock based on the vDSO `clock_gettime` call in Linux.
/// Precision: ~1ns (interpolated);
/// Performance: <100ns/call, very slightly faster than syscall
pub struct VdsoCgtClock {
func: VdsoCgtFunc,
}
impl VdsoCgtClock {
pub fn new() -> Option<VdsoCgtClock> {
let (name, version) = try_opt!(VDSO_CGT);
unsafe {
let sym = try_opt!(vdso_sym(name, version));
let func: VdsoCgtFunc = mem::transmute(sym);
Some(VdsoCgtClock { func: func })
}
}
#[inline(always)]
pub fn get(&self) -> (i64, u32) {
unsafe {
let mut timespec: libc::timespec = mem::uninitialized();
let ret = (self.func)(libc::CLOCK_REALTIME, &mut timespec);
if ret != 0 { panic!("clock_gettime vDSO call failed"); }
(timespec.tv_sec as i64, timespec.tv_nsec as u32)
}
}
}
/// Clock based on the vDSO `clock_gettime` call with a coarse request in Linux.
/// Precision: 1~10ms depending on kernel config;
/// Performance: ~5ns/call, slightly faster than syscall
pub struct VdsoCgtCoarseClock {
func: VdsoCgtFunc,
}
impl VdsoCgtCoarseClock {
pub fn new() -> Option<VdsoCgtCoarseClock> {
let (name, version) = try_opt!(VDSO_CGT);
unsafe {
let sym = try_opt!(vdso_sym(name, version));
let func: VdsoCgtFunc = mem::transmute(sym);
Some(VdsoCgtCoarseClock { func: func })
}
}
#[inline(always)]
pub fn get(&self) -> (i64, u32) {
unsafe {
let mut timespec: libc::timespec = mem::uninitialized();
let ret = (self.func)(libc::CLOCK_REALTIME_COARSE, &mut timespec);
if ret != 0 { panic!("clock_gettime vDSO call failed"); }
(timespec.tv_sec as i64, timespec.tv_nsec as u32)
}
}
}
}
#[cfg(target_os = "linux")]
use self::vdso::{VdsoGtodClock, VdsoCgtClock, VdsoCgtCoarseClock};
#[cfg(any(target_os = "macos", target_os = "ios"))]
mod mach {
use std::mem;
use libc;
type mach_port_t = libc::c_uint;
type mach_port_name_t = mach_port_t;
type host_t = mach_port_t;
type clock_serv_t = mach_port_t;
type ipc_space_t = mach_port_t;
type clock_id_t = libc::c_int;
type kern_return_t = libc::c_int;
type clock_res_t = libc::c_int;
#[repr(C)] struct mach_timespec_t { tv_sec: libc::c_uint, tv_nsec: clock_res_t }
const CALENDAR_CLOCK: clock_id_t = 1;
extern "system" {
fn mach_host_self() -> mach_port_t;
fn host_get_clock_service(host: host_t, id: clock_id_t, clock_serv: *mut clock_serv_t) -> kern_return_t;
fn clock_get_time(clock_serv: clock_serv_t, cur_time: *mut mach_timespec_t) -> kern_return_t;
fn mach_port_deallocate(task: ipc_space_t, name: mach_port_name_t) -> kern_return_t;
static mach_task_self_: mach_port_t; // mach_task_self()
}
/// Clock based on the Mach clock service in macOS.
/// Precision: 1us (yes, interface is misleading; see http://stackoverflow.com/a/21352348);
/// Performance: ~1000ns/call
pub struct MachClock {
clock_serv: clock_serv_t,
}
impl MachClock {
pub fn new() -> MachClock {
unsafe {
let mut clock_serv: clock_serv_t = mem::uninitialized();
let ret = host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &mut clock_serv);
if ret != 0 { panic!("host_get_clock_service failed (error {})", ret); }
MachClock { clock_serv: clock_serv }
}
}
pub fn get(&self) -> (i64, u32) {
unsafe {
let mut timespec: mach_timespec_t = mem::uninitialized();
let ret = clock_get_time(self.clock_serv, &mut timespec);
if ret != 0 { panic!("clock_get_time failed (error {})", ret); }
(timespec.tv_sec as i64, timespec.tv_nsec as u32)
}
}
}
impl Drop for MachClock {
fn drop(&mut self) {
unsafe {
mach_port_deallocate(mach_task_self_, self.clock_serv);
}
}
}
}
#[cfg(any(target_os = "macos", target_os = "ios"))]
use self::mach::MachClock;
#[bench]
fn bench_gtod_clock(bh: &mut test::Bencher) {
let c = GtodClock;
static EXAMPLE: Once = ONCE_INIT;
EXAMPLE.call_once(|| print!("[example: {:?}] ", c.get()));
bh.iter(|| { black_box(c.get()); });
}
#[cfg(target_os = "linux")]
#[bench]
fn bench_cgt_clock(bh: &mut test::Bencher) {
let c = CgtClock;
static EXAMPLE: Once = ONCE_INIT;
EXAMPLE.call_once(|| print!("[example: {:?}] ", c.get()));
bh.iter(|| { black_box(c.get()); });
}
#[cfg(target_os = "linux")]
#[bench]
fn bench_cgt_coarse_clock(bh: &mut test::Bencher) {
let c = CgtCoarseClock;
static EXAMPLE: Once = ONCE_INIT;
EXAMPLE.call_once(|| print!("[example: {:?}] ", c.get()));
bh.iter(|| { black_box(c.get()); });
}
#[cfg(target_os = "linux")]
#[bench]
fn bench_vdso_gtod_clock(bh: &mut test::Bencher) {
let c = VdsoGtodClock::new().expect("failed to initialize vDSO");
static EXAMPLE: Once = ONCE_INIT;
EXAMPLE.call_once(|| print!("[example: {:?}] ", c.get()));
bh.iter(|| { black_box(c.get()); });
}
#[cfg(target_os = "linux")]
#[bench]
fn bench_vdso_cgt_clock(bh: &mut test::Bencher) {
let c = VdsoCgtClock::new().expect("failed to initialize vDSO");
static EXAMPLE: Once = ONCE_INIT;
EXAMPLE.call_once(|| print!("[example: {:?}] ", c.get()));
bh.iter(|| { black_box(c.get()); });
}
#[cfg(target_os = "linux")]
#[bench]
fn bench_vdso_cgt_coarse_clock(bh: &mut test::Bencher) {
let c = VdsoCgtCoarseClock::new().expect("failed to initialize vDSO");
static EXAMPLE: Once = ONCE_INIT;
EXAMPLE.call_once(|| print!("[example: {:?}] ", c.get()));
bh.iter(|| { black_box(c.get()); });
}
#[cfg(any(target_os = "macos", target_os = "ios"))]
#[bench]
fn bench_mach_clock(bh: &mut test::Bencher) {
let c = MachClock::new();
static EXAMPLE: Once = ONCE_INIT;
EXAMPLE.call_once(|| print!("[example: {:?}] ", c.get()));
bh.iter(|| { black_box(c.get()); });
}
#[bench]
fn bench_x_system_time(bh: &mut test::Bencher) {
static EXAMPLE: Once = ONCE_INIT;
EXAMPLE.call_once(|| {
let dur = SystemTime::now().duration_since(UNIX_EPOCH).unwrap();
print!("[example: {:?}] ", (dur.as_secs(), dur.subsec_nanos()));
});
bh.iter(|| { black_box(SystemTime::now()); });
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment