Last active
August 6, 2018 17:37
-
-
Save jackmott/7985e64591e79611ebcf6127e7bc24c5 to your computer and use it in GitHub Desktop.
compiler bug?
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// SSE returns the correct answer in Debug and Release | |
// AVX returns the correct answer in Debug but not Release | |
// This seems to be related to add_stuff being recursive | |
#[cfg(target_arch = "x86")] | |
use std::arch::x86::*; | |
#[cfg(target_arch = "x86_64")] | |
use std::arch::x86_64::*; | |
use std::fmt::Debug; | |
pub trait Simd { | |
type Vf32: Copy + Debug; | |
unsafe fn add_ps(a: Self::Vf32, b: Self::Vf32) -> Self::Vf32; | |
unsafe fn set1_ps(a: f32) -> Self::Vf32; | |
} | |
pub struct Sse2; | |
impl Simd for Sse2 { | |
type Vf32 = __m128; | |
#[inline(always)] | |
unsafe fn add_ps(a: Self::Vf32, b: Self::Vf32) -> Self::Vf32 { | |
_mm_add_ps(a, b) | |
} | |
#[inline(always)] | |
unsafe fn set1_ps(a: f32) -> Self::Vf32 { | |
_mm_set1_ps(a) | |
} | |
} | |
pub struct Avx2; | |
impl Simd for Avx2 { | |
type Vf32 = __m256; | |
#[inline(always)] | |
unsafe fn add_ps(a: Self::Vf32, b: Self::Vf32) -> Self::Vf32 { | |
_mm256_add_ps(a, b) | |
} | |
#[inline(always)] | |
unsafe fn set1_ps(a: f32) -> Self::Vf32 { | |
_mm256_set1_ps(a) | |
} | |
} | |
#[inline(always)] | |
unsafe fn add_stuff<S: Simd>(a: f32, count: i32) -> S::Vf32 { | |
let b = S::set1_ps(2.0); | |
let a2 = S::set1_ps(a); | |
if count < 3 { | |
S::add_ps(S::add_ps(a2, b), add_stuff::<S>(a, count + 1)) | |
} else { | |
S::add_ps(a2, b) | |
} | |
} | |
#[target_feature(enable = "avx2")] | |
unsafe fn add_stuff_avx() { | |
let r = add_stuff::<Avx2>(2.0, 1); | |
println!("avx {:?}", r); | |
} | |
#[target_feature(enable = "sse")] | |
unsafe fn add_stuff_sse() { | |
let r = add_stuff::<Sse2>(2.0, 1); | |
println!("sse {:?}", r); | |
} | |
fn main() { | |
unsafe { | |
add_stuff_sse(); | |
add_stuff_avx(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment