Skip to content

Instantly share code, notes, and snippets.

@zommiommy
Last active January 23, 2022 10:29
Show Gist options
  • Save zommiommy/1e70a039a7ed8ea323995e1f88a6c500 to your computer and use it in GitHub Desktop.
Save zommiommy/1e70a039a7ed8ea323995e1f88a6c500 to your computer and use it in GitHub Desktop.
Benching of AVX dish vs relu
#![feature(portable_simd)]
#![feature(test)]
extern crate test;
use test::{black_box, Bencher};
use std::convert::TryInto;
use std::simd::*;
use std::arch::x86_64::*;
type Reg = f32x8;
/// compute the activation function
#[inline(always)]
fn dish(x: &Reg) -> Reg {
let ones = Reg::splat(1.0);
let half = Reg::splat(0.5);
let fma = ones + x * x;
half * x * vec_sqrt(ones + (x / (fma)))
}
#[inline(always)]
fn vec_sqrt(x: Reg) -> Reg {
unsafe {
let res = _mm256_rsqrt_ps(
*((&x as *const Reg) as *const __m256)
);
*((&res as *const __m256) as *const Reg)
}
}
#[inline(always)]
fn relu(x: &Reg) -> Reg {
let zeros = Reg::splat(0.0);
x.max(zeros)
}
fn get_test_values() -> Vec<Reg> {
const MAX: usize = 1_000_000;
let res = (0..MAX).map(|x| x as f32 - (MAX / 2) as f32).collect::<Vec<f32>>();
res.chunks(8).map(|x| {
let array: [f32; 8] = x.try_into().unwrap();
array.into()
}).collect::<Vec<Reg>>()
}
#[bench]
fn bench_dish(b: &mut Bencher) {
let values = black_box(get_test_values());
b.bytes = (values.len() * std::mem::size_of::<Reg>()) as _;
b.iter(|| {
values.iter()
.map(dish)
.collect::<Vec<Reg>>()
});
}
#[bench]
fn bench_relu(b: &mut Bencher) {
let values = black_box(get_test_values());
b.bytes = (values.len() * std::mem::size_of::<Reg>()) as _;
b.iter(|| {
values.iter()
.map(relu)
.collect::<Vec<Reg>>()
});
}
On my Ryzen 3900x
test bench_dish ... bench: 1,424,578 ns/iter (+/- 10,866) = 2807 MB/s
test bench_relu ... bench: 95,921 ns/iter (+/- 11,679) = 41700 MB/s
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment