Created
June 1, 2018 23:31
-
-
Save thomcc/a6a0abf52de03e853dd410d744e4605b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#![feature(test)] | |
#![recursion_limit = "1024"] | |
#![allow(dead_code)] | |
#![feature(stdsimd)] | |
extern crate test; | |
extern crate rand; | |
// Defines a little benchmarking DSL macro. If you've been linked here, it's safe to ignore. | |
#[macro_use] | |
mod bench_helpers { | |
use super::*; | |
pub struct BenchGroupItem<F>(usize, u64, F); | |
impl<F: Fn(usize, &mut test::Bencher)> BenchGroupItem<F> { | |
pub fn new(size: usize, tsz: usize, runner: F) -> Self { | |
BenchGroupItem(size, (size as u64) * (tsz as u64), runner) | |
} | |
} | |
impl<F: Fn(usize, &mut test::Bencher) + Send> test::TDynBenchFn for BenchGroupItem<F> { | |
fn run(&self, b: &mut test::Bencher) { | |
b.bytes = self.1; | |
(self.2)(self.0, b); | |
} | |
} | |
pub fn static_bench_desc(name: &'static str, f: fn(&mut test::Bencher)) -> test::TestDescAndFn { | |
test::TestDescAndFn { | |
desc: test::TestDesc { | |
name: test::StaticTestName(name), | |
ignore: false, | |
allow_fail: false, | |
should_panic: test::ShouldPanic::No, | |
}, | |
testfn: test::TestFn::StaticBenchFn(f) | |
} | |
} | |
pub fn dyn_bench_desc(name: String, boxed: ::std::boxed::Box<test::TDynBenchFn + 'static>) -> test::TestDescAndFn { | |
test::TestDescAndFn { | |
desc: test::TestDesc { | |
name: test::DynTestName(name), | |
ignore: false, | |
allow_fail: false, | |
should_panic: test::ShouldPanic::No, | |
}, | |
testfn: test::TestFn::DynBenchFn(boxed) | |
} | |
} | |
#[macro_export] | |
macro_rules! benchmarks { | |
(@bench_group($benches:ident) $name:ident, $tsz:expr, $sizes:expr, $sn:ident, {$($setup:tt)*}, {$(;)*}) => {}; | |
(@bench_group($benches:ident) $name:ident, $_ts:expr, $_sz:expr, $sn:ident, {$($setup:tt)*}, { | |
sizes[$t:ty] = $sizes:expr; $($rest:tt)* | |
}) => { | |
benchmarks!(@bench_group($benches) $name, ::std::mem::size_of::<$t>(), $sizes, $sn, {$($setup)*}, { $($rest)* }); | |
}; | |
(@bench_group($benches:ident) $name:ident, $tsz:expr, $sizes:expr, $sn:ident, {$($_setup:tt)*}, { | |
setup {$($setup:tt)*}; $($rest:tt)* | |
}) => { | |
benchmarks!(@bench_group($benches) $name, $tsz, $sizes, $sn, {$($setup)*}, { $($rest)* }); | |
}; | |
(@bench_group($benches:ident) $name:ident, $tsz:expr, $sizes:expr, $_sn:ident, {$($_setup:tt)*}, { | |
setup($size_name:ident) {$($setup:tt)*}; $($rest:tt)* | |
}) => { | |
benchmarks!(@bench_group($benches) $name, $tsz, $sizes, $size_name, {$($setup)*}, { $($rest)* }); | |
}; | |
(@bench_group($benches:ident) $name:ident, $tsz:expr, $sizes:expr, $szname:ident, {$($setup:tt)*}, { | |
variant $var_name:ident { $($vbody:tt)* }; | |
$($rest:tt)* | |
}) => { | |
benchmarks!(@bench_group($benches) $name, $tsz, $sizes, $szname, {$($setup)*}, { | |
cond_variant(true) $var_name { $($vbody)* }; | |
$($rest)* | |
}); | |
}; | |
(@bench_group($benches:ident) $name:ident, $tsz:expr, $sizes:expr, $szname:ident, {$($setup:tt)*}, { | |
cond_variant($cond:expr) $var_name:ident { $($vbody:tt)* }; | |
$($rest:tt)* | |
}) => { | |
if $cond { | |
fn $var_name($szname: usize, bencher: &mut test::Bencher) { | |
$($setup)*; | |
bencher.iter(|| { $($vbody)* }); | |
} | |
for &size in $sizes.iter() { | |
$benches.push($crate::bench_helpers::dyn_bench_desc( | |
format!("{}_{} for size = {}", stringify!($name), stringify!($var_name), size), | |
::std::boxed::Box::new($crate::bench_helpers::BenchGroupItem::new(size, $tsz, $var_name)) | |
)); | |
} | |
} else { | |
println!("Benchmark {}_{} skipped because the test `{}` failed", | |
stringify!($name), stringify!($var_name), | |
stringify!($cond)); | |
} | |
benchmarks!(@bench_group($benches) $name, $tsz, $sizes, $szname, {$($setup)*}, { $($rest)* }); | |
}; | |
(@inner($_benches:ident)) => {}; | |
(@inner($_benches:ident) ;) => {}; | |
(@inner($benches:ident) group $name:ident { $($inner:tt)+ }; $($rest:tt)*) => { | |
{ benchmarks!(@bench_group($benches) $name, 0, [0], _size, {}, { $($inner)+ }); }; | |
benchmarks!(@inner($benches) $($rest)*); | |
}; | |
(@inner($benches:ident) bench $name:ident { $e:expr }; $($rest:tt)*) => { | |
fn $name(bencher_: &mut test::Bencher) { bencher_.iter(|| $e); } | |
$benches.push($crate::bench_helpers::static_bench_desc(stringify!($name), $name)); | |
benchmarks!(@inner($benches) $($rest)*); | |
}; | |
(@inner($benches:ident) bench $name:ident ($id:ident) { $($body:tt)* }; $($rest:tt)*) => { | |
fn $name($id : &mut test::Bencher) { $($body)* } | |
$benches.push($crate::bench_helpers::static_bench_desc(stringify!($name), $name)); | |
benchmarks!(@inner($benches) $($rest)*); | |
}; | |
(@inner($benches:ident) fn $name:ident ($($args:tt)*) { $($body:tt)* }; $($rest:tt)*) => { | |
fn $name($($args)*) { $($body)* } | |
$benches.push($crate::bench_helpers::static_bench_desc(stringify!($name), $name)); | |
benchmarks!(@inner($benches) $($rest)*); | |
}; | |
($($benchmarks:tt)*) => { | |
fn get_benchmarks() -> Vec<test::TestDescAndFn> { | |
let mut benches = vec![]; | |
benchmarks!(@inner(benches) $($benchmarks)*); | |
benches | |
} | |
}; | |
} | |
} | |
fn aligned_f32_vec(capacity: usize) -> Vec<f32> { | |
use std::mem; | |
#[repr(C)] | |
#[repr(align(32))] | |
struct Aligned([f32; 8]); | |
assert_eq!(mem::size_of::<Aligned>(), 32); | |
assert!(mem::align_of::<Aligned>() >= 32, | |
"alignment was {}", mem::align_of::<Aligned>()); | |
let mut v: Vec<Aligned> = Vec::with_capacity(1 + capacity / 8); | |
let p = v.as_mut_ptr(); | |
let cap = v.capacity(); | |
mem::forget(v); | |
let floats = unsafe { Vec::from_raw_parts(p as *mut f32, 0, cap * 8) }; | |
assert_eq!((floats.as_ptr() as usize) & 31, 0); | |
floats | |
} | |
fn rand_vec(sz: usize) -> Vec<f32> { | |
let mut res = aligned_f32_vec(sz); | |
for _ in 0..sz { | |
res.push(rand::random::<f32>() * 2.0 - 1.0); | |
} | |
assert_eq!((res.as_ptr() as usize) & 31, 0); | |
res | |
} | |
#[no_mangle] | |
#[inline(never)] | |
pub fn dot_fold_iter(a: &[f32], b: &[f32]) -> f32 { | |
assert_eq!(a.len(), b.len()); | |
a.iter().zip(b.iter()).fold(0.0, |v, (x, y)| v + (x * y)) | |
} | |
#[no_mangle] | |
#[inline(never)] | |
pub fn dot_iter(a: &[f32], b: &[f32]) -> f32 { | |
assert_eq!(a.len(), b.len()); | |
let mut result = 0.0; | |
for (x, y) in a.iter().zip(b.iter()) { | |
result += x * y; | |
} | |
result | |
} | |
#[no_mangle] | |
#[inline(never)] | |
pub fn dot_slices_unchecked(a: &[f32], b: &[f32]) -> f32 { | |
assert_eq!(a.len(), b.len()); | |
let mut result = 0.0; | |
let mut i = 0; | |
let len = a.len(); | |
while i < len { | |
unsafe { | |
result += (*a.get_unchecked(i)) * (*b.get_unchecked(i)); | |
} | |
i += 1; | |
} | |
result | |
} | |
#[no_mangle] | |
#[inline(never)] | |
pub fn dot_slices_checked(a: &[f32], b: &[f32]) -> f32 { | |
assert_eq!(a.len(), b.len()); | |
let mut result = 0.0; | |
let mut i = 0; | |
let len = a.len(); | |
while i < len { | |
result += a[i] * b[i]; | |
i += 1; | |
} | |
result | |
} | |
#[no_mangle] | |
#[inline(never)] | |
pub fn dot_ptr(a: &[f32], b: &[f32]) -> f32 { | |
assert_eq!(a.len(), b.len()); | |
let mut result = 0.0; | |
unsafe { | |
let mut pa = a.as_ptr(); | |
let mut pb = b.as_ptr(); | |
let end = pa.offset(a.len() as isize); | |
while pa < end { | |
result += (*pa) * (*pb); | |
pa = pa.offset(1); | |
pb = pb.offset(1); | |
} | |
} | |
result | |
} | |
#[no_mangle] | |
#[inline(never)] | |
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] | |
#[target_feature(enable = "sse")] | |
pub unsafe fn dot_sse(a: &[f32], b: &[f32]) -> f32 { | |
#[cfg(target_arch = "x86")] use std::arch::x86::*; | |
#[cfg(target_arch = "x86_64")] use std::arch::x86_64::*; | |
assert_eq!(a.len(), b.len()); | |
let mut p_a = a.as_ptr(); | |
let mut p_b = b.as_ptr(); | |
assert_eq!((p_a as usize) & 15, 0, "a is not 16 byte aligned..."); | |
assert_eq!((p_b as usize) & 15, 0, "a is not 16 byte aligned..."); | |
let p_trail_end = p_a.offset(a.len() as isize); | |
let p_vec_end = p_a.offset((a.len() & !3) as isize); | |
let mut sums = _mm_setzero_ps(); | |
while p_a < p_vec_end { | |
let x = _mm_load_ps(p_a); | |
let y = _mm_load_ps(p_b); | |
// sums = _mm_fmadd(x, y, sums); | |
sums = _mm_add_ps(_mm_mul_ps(x, y), sums); | |
p_a = p_a.offset(4); | |
p_b = p_b.offset(4); | |
} | |
let main_result = { | |
let mut sum_data = [0.0f32; 4]; | |
_mm_store_ps(sum_data.as_mut_ptr(), sums); | |
sum_data[0] + sum_data[1] + sum_data[2] + sum_data[3] | |
}; | |
let mut trail_result = 0.0; | |
while p_a < p_trail_end { | |
trail_result += (*p_a) * (*p_b); | |
p_a = p_a.offset(1); | |
p_b = p_b.offset(1); | |
} | |
main_result + trail_result | |
} | |
#[no_mangle] | |
#[inline(never)] | |
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] | |
#[target_feature(enable = "avx")] | |
pub unsafe fn dot_avx(a: &[f32], b: &[f32]) -> f32 { | |
#[cfg(target_arch = "x86")] use std::arch::x86::*; | |
#[cfg(target_arch = "x86_64")] use std::arch::x86_64::*; | |
assert_eq!(a.len(), b.len()); | |
let mut p_a = a.as_ptr(); | |
let mut p_b = b.as_ptr(); | |
assert_eq!((p_a as usize) & 31, 0, "a is not 32 byte aligned..."); | |
assert_eq!((p_b as usize) & 31, 0, "b is not 32 byte aligned..."); | |
let p_trail_end = p_a.offset(a.len() as isize); | |
let p_vec_end = p_a.offset((a.len() & !7) as isize); | |
let mut sums = _mm256_setzero_ps(); | |
while p_a < p_vec_end { | |
let x = _mm256_load_ps(p_a); | |
let y = _mm256_load_ps(p_b); | |
// sums = _mm256_fmadd(x, y, sums); | |
sums = _mm256_add_ps(_mm256_mul_ps(x, y), sums); | |
p_a = p_a.offset(8); | |
p_b = p_b.offset(8); | |
} | |
let main_result = { | |
let flipped = _mm256_permute2f128_ps(sums, sums, 1); | |
let mut partial = _mm256_add_ps(flipped, sums); | |
partial = _mm256_hadd_ps(partial, partial); | |
partial = _mm256_hadd_ps(partial, partial); | |
_mm256_cvtss_f32(partial) | |
}; | |
let mut trail_result = 0.0; | |
while p_a < p_trail_end { | |
trail_result += (*p_a) * (*p_b); | |
p_a = p_a.offset(1); | |
p_b = p_b.offset(1); | |
} | |
main_result + trail_result | |
} | |
use test::black_box; | |
benchmarks! { | |
group dot { | |
sizes[f32] = [/*3, 9, 64, 100, 256, */512, 519]; | |
setup(size) { | |
let a = rand_vec(size); | |
let b = rand_vec(size); | |
}; | |
variant ptr { | |
black_box(dot_ptr(black_box(&a), black_box(&b))) | |
}; | |
variant slice_unchecked { | |
black_box(dot_slices_unchecked(black_box(&a), black_box(&b))) | |
}; | |
variant slices_checked { | |
black_box(dot_slices_checked(black_box(&a), black_box(&b))) | |
}; | |
variant iter { | |
black_box(dot_iter(black_box(&a), black_box(&b))) | |
}; | |
variant fold_iter { | |
black_box(dot_fold_iter(black_box(&a), black_box(&b))) | |
}; | |
cond_variant(is_x86_feature_detected!("sse")) simd_sse { | |
black_box(unsafe { dot_sse(black_box(&a), black_box(&b)) }) | |
}; | |
cond_variant(is_x86_feature_detected!("avx")) simd_avx { | |
black_box(unsafe { dot_avx(black_box(&a), black_box(&b)) }) | |
}; | |
}; | |
} | |
fn print_feats() { | |
let arch = if cfg!(target_arch = "x86") { "x86" } | |
else if cfg!(target_arch = "x86_64") { "x86_64" } | |
else { println!("not intel!"); return; }; | |
println!("Running on: {}", arch); | |
// println!("aes: {}", is_x86_feature_detected!("aes")); | |
// println!("pclmulqdq: {}", is_x86_feature_detected!("pclmulqdq")); | |
// println!("rdrand: {}", is_x86_feature_detected!("rdrand")); | |
// println!("rdseed: {}", is_x86_feature_detected!("rdseed")); | |
// println!("tsc: {}", is_x86_feature_detected!("tsc")); | |
println!("mmx: {}", is_x86_feature_detected!("mmx")); | |
println!("sse: {}", is_x86_feature_detected!("sse")); | |
println!("sse2: {}", is_x86_feature_detected!("sse2")); | |
println!("sse3: {}", is_x86_feature_detected!("sse3")); | |
println!("ssse3: {}", is_x86_feature_detected!("ssse3")); | |
println!("sse4.1: {}", is_x86_feature_detected!("sse4.1")); | |
println!("sse4.2: {}", is_x86_feature_detected!("sse4.2")); | |
println!("sse4a: {}", is_x86_feature_detected!("sse4a")); | |
// println!("sha: {}", is_x86_feature_detected!("sha")); | |
println!("avx: {}", is_x86_feature_detected!("avx")); | |
println!("avx2: {}", is_x86_feature_detected!("avx2")); | |
// println!("avx512f: {}", is_x86_feature_detected!("avx512f")); | |
// println!("avx512cd: {}", is_x86_feature_detected!("avx512cd")); | |
// println!("avx512er: {}", is_x86_feature_detected!("avx512er")); | |
// println!("avx512pf: {}", is_x86_feature_detected!("avx512pf")); | |
// println!("avx512bw: {}", is_x86_feature_detected!("avx512bw")); | |
// println!("avx512dq: {}", is_x86_feature_detected!("avx512dq")); | |
// println!("avx512vl: {}", is_x86_feature_detected!("avx512vl")); | |
// println!("avx512ifma: {}", is_x86_feature_detected!("avx512ifma")); | |
// println!("avx512vbmi: {}", is_x86_feature_detected!("avx512vbmi")); | |
// println!("avx512vpopcntdq: {}", is_x86_feature_detected!("avx512vpopcntdq")); | |
println!("fma: {}", is_x86_feature_detected!("fma")); | |
// println!("bmi1: {}", is_x86_feature_detected!("bmi1")); | |
// println!("bmi2: {}", is_x86_feature_detected!("bmi2")); | |
// println!("abm: {}", is_x86_feature_detected!("abm")); | |
// println!("lzcnt: {}", is_x86_feature_detected!("lzcnt")); | |
// println!("tbm: {}", is_x86_feature_detected!("tbm")); | |
// println!("popcnt: {}", is_x86_feature_detected!("popcnt")); | |
// println!("fxsr: {}", is_x86_feature_detected!("fxsr")); | |
// println!("xsave: {}", is_x86_feature_detected!("xsave")); | |
// println!("xsaveopt: {}", is_x86_feature_detected!("xsaveopt")); | |
// println!("xsaves: {}", is_x86_feature_detected!("xsaves")); | |
// println!("xsavec: {}", is_x86_feature_detected!("xsavec")); | |
} | |
fn main() { | |
let test_opts = test::TestOpts { | |
bench_benchmarks: true, | |
color: test::NeverColor, | |
// defaults... | |
list: false, | |
filter: None, | |
filter_exact: false, | |
run_ignored: false, | |
run_tests: true, | |
logfile: None, | |
nocapture: false, | |
format: test::OutputFormat::Pretty, | |
test_threads: None, | |
skip: vec![], | |
options: test::Options::new(), | |
}; | |
print_feats(); | |
test::run_tests_console(&test_opts, get_benchmarks()).unwrap(); | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment