Skip to content

Instantly share code, notes, and snippets.

@jackmott
Last active August 6, 2018 17:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jackmott/7985e64591e79611ebcf6127e7bc24c5 to your computer and use it in GitHub Desktop.
Save jackmott/7985e64591e79611ebcf6127e7bc24c5 to your computer and use it in GitHub Desktop.
compiler bug?
// SSE returns the correct answer in Debug and Release
// AVX returns the correct answer in Debug but not Release
// This seems to be related to add_stuff being recursive
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
use std::fmt::Debug;
pub trait Simd {
type Vf32: Copy + Debug;
unsafe fn add_ps(a: Self::Vf32, b: Self::Vf32) -> Self::Vf32;
unsafe fn set1_ps(a: f32) -> Self::Vf32;
}
pub struct Sse2;
impl Simd for Sse2 {
type Vf32 = __m128;
#[inline(always)]
unsafe fn add_ps(a: Self::Vf32, b: Self::Vf32) -> Self::Vf32 {
_mm_add_ps(a, b)
}
#[inline(always)]
unsafe fn set1_ps(a: f32) -> Self::Vf32 {
_mm_set1_ps(a)
}
}
pub struct Avx2;
impl Simd for Avx2 {
type Vf32 = __m256;
#[inline(always)]
unsafe fn add_ps(a: Self::Vf32, b: Self::Vf32) -> Self::Vf32 {
_mm256_add_ps(a, b)
}
#[inline(always)]
unsafe fn set1_ps(a: f32) -> Self::Vf32 {
_mm256_set1_ps(a)
}
}
#[inline(always)]
unsafe fn add_stuff<S: Simd>(a: f32, count: i32) -> S::Vf32 {
let b = S::set1_ps(2.0);
let a2 = S::set1_ps(a);
if count < 3 {
S::add_ps(S::add_ps(a2, b), add_stuff::<S>(a, count + 1))
} else {
S::add_ps(a2, b)
}
}
#[target_feature(enable = "avx2")]
unsafe fn add_stuff_avx() {
let r = add_stuff::<Avx2>(2.0, 1);
println!("avx {:?}", r);
}
#[target_feature(enable = "sse")]
unsafe fn add_stuff_sse() {
let r = add_stuff::<Sse2>(2.0, 1);
println!("sse {:?}", r);
}
fn main() {
unsafe {
add_stuff_sse();
add_stuff_avx();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment