Skip to content

Instantly share code, notes, and snippets.

@MaxGraey
Last active September 16, 2020 19:52
Show Gist options
  • Save MaxGraey/a826c71909353e3a28a54e8a749c06ac to your computer and use it in GitHub Desktop.
Save MaxGraey/a826c71909353e3a28a54e8a749c06ac to your computer and use it in GitHub Desktop.
Benchmark different nearest functions for Rust
[package]
name = "bench"
version = "0.1.0"
authors = ["MaxGraey <maxgraey@gmail.com>"]
[profile.bench]
codegen-units = 1
opt-level = 3
lto = true
debug = false
rpath = false
debug-assertions = false
panic = 'unwind'
[profile.release]
codegen-units = 1
opt-level = 3
lto = true
debug = true
rpath = false
debug-assertions = false
panic = 'unwind'
#![feature(stdsimd)]
#![feature(test)]
extern crate test;
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
const TOINT_64: f64 = 1.0 / f64::EPSILON;
pub extern "C" fn f64_nearest_original(x: f64) -> f64 {
// Rust doesn't have a nearest function, so do it manually.
if x == 0.0 {
// Preserve the sign of zero.
x
} else {
// Nearest is either ceil or floor depending on which is nearest or even.
let u = x.ceil();
let d = x.floor();
let um = (x - u).abs();
let dm = (x - d).abs();
if um < dm
|| (um == dm && {
let h = u / 2.;
h.floor() == h
})
{
u
} else {
d
}
}
}
pub extern "C" fn f64_nearest_branch(x: f64) -> f64 {
if x == 0.0 {
x
} else {
let i = x.to_bits();
let e = i >> 52 & 0x7ff;
if e < 0x3ff + 52 {
if i >> 63 != 0 {
x - TOINT_64 + TOINT_64
} else {
x + TOINT_64 - TOINT_64
}
} else {
x
}
}
}
pub extern "C" fn f64_nearest_copysign(x: f64) -> f64 {
let i = x.to_bits();
let e = i >> 52 & 0x7ff;
if e < 0x3ff + 52 {
if i >> 63 != 0 {
x - TOINT_64 + TOINT_64
} else {
x + TOINT_64 - TOINT_64
}.copysign(x)
} else {
x
}
}
pub extern "C" fn f64_nearest_abs_copysign(x: f64) -> f64 {
let i = x.to_bits();
let e = i >> 52 & 0x7ff;
if e < 0x3ff + 52 {
(x.abs() + TOINT_64 - TOINT_64).copysign(x)
} else {
x
}
}
pub extern "C" fn f64_nearest_abs_copysign_without_bits(x: f64) -> f64 {
let y = x.abs();
let m = f64::from_bits(0x3ff_u64 + 52 << 52_u64);
if y < m {
(y + TOINT_64 - TOINT_64).copysign(x)
} else {
x
}
}
#[repr(C)]
#[repr(align(16))]
struct F64x2(f64, f64);
pub extern "C" fn f64_nearest_sse41(x: f64) -> f64 {
// woraround due to stdsimd missed _mm_extract_pd
let mut inout = F64x2(x, 0_f64);
unsafe {
_mm_store_pd(
&mut inout.0,
_mm_round_pd(
_mm_load_pd(&inout.0),
_MM_FROUND_RINT
)
)
};
inout.0
}
use test::{Bencher, black_box};
#[bench]
fn nearest_original(b: &mut Bencher) {
let min = black_box(-10_000);
let max = black_box( 10_000);
b.iter(|| {
let mut sum = black_box(0_f64);
for x in min..=max {
sum += f64_nearest_original(x as f64 * 0.5);
}
black_box(sum)
});
}
#[bench]
fn nearest_branch(b: &mut Bencher) {
let min = black_box(-10_000);
let max = black_box( 10_000);
b.iter(|| {
let mut sum = black_box(0_f64);
for x in min..=max {
sum += f64_nearest_branch(x as f64 * 0.5);
}
black_box(sum)
});
}
#[bench]
fn nearest_copysign(b: &mut Bencher) {
let min = black_box(-10_000);
let max = black_box( 10_000);
b.iter(|| {
let mut sum = black_box(0_f64);
for x in min..=max {
sum += f64_nearest_copysign(x as f64 * 0.5);
}
black_box(sum)
});
}
#[bench]
fn nearest_abs_copysign(b: &mut Bencher) {
let min = black_box(-10_000);
let max = black_box( 10_000);
b.iter(|| {
let mut sum = black_box(0_f64);
for x in min..=max {
sum += f64_nearest_abs_copysign(x as f64 * 0.5);
}
black_box(sum)
});
}
#[bench]
fn nearest_abs_copysign_without_bits(b: &mut Bencher) {
let min = black_box(-10_000);
let max = black_box( 10_000);
b.iter(|| {
let mut sum = black_box(0_f64);
for x in min..=max {
sum += f64_nearest_abs_copysign_without_bits(x as f64 * 0.5);
}
black_box(sum)
});
}
#[bench]
fn nearest_sse41(b: &mut Bencher) {
let min = black_box(-10_000);
let max = black_box( 10_000);
b.iter(|| {
let mut sum = black_box(0_f64);
for x in min..=max {
sum += f64_nearest_sse41(x as f64 * 0.5);
}
black_box(sum)
});
}
@MaxGraey
Copy link
Author

launch with cargo bench:

test nearest_abs_copysign              ... bench:      35,993 ns/iter (+/- 7,475)
test nearest_abs_copysign_without_bits ... bench:      37,380 ns/iter (+/- 16,714)
test nearest_branch                    ... bench:      37,300 ns/iter (+/- 7,593)
test nearest_copysign                  ... bench:      32,348 ns/iter (+/- 4,869)
test nearest_original                  ... bench:      99,693 ns/iter (+/- 16,491)
test nearest_sse41                     ... bench:      40,587 ns/iter (+/- 3,854)

launch with RUSTFLAGS='-C target-cpu=native' cargo bench:

test nearest_abs_copysign              ... bench:      29,055 ns/iter (+/- 5,596)
test nearest_abs_copysign_without_bits ... bench:      28,080 ns/iter (+/- 9,036)
test nearest_branch                    ... bench:      45,084 ns/iter (+/- 3,673)
test nearest_copysign                  ... bench:      32,565 ns/iter (+/- 4,966)
test nearest_original                  ... bench:      53,212 ns/iter (+/- 9,682)
test nearest_sse41                     ... bench:      20,099 ns/iter (+/- 2,394)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment