Skip to content

Instantly share code, notes, and snippets.

@djg
Created September 14, 2018 02:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save djg/9ca44f6e9872dd4a457a24a61b8b63ca to your computer and use it in GitHub Desktop.
Save djg/9ca44f6e9872dd4a457a24a61b8b63ca to your computer and use it in GitHub Desktop.
SIMD in Rust is kinda gross.
// Type your code here, or load an example.
#[cfg(target_arch = "x86")]
use std::arch::x86::__m128;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::__m128;
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2"))]
pub struct Ray {
pub pt: __m128,
pub dir: __m128,
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2"))]
pub struct Aabb {
pub min: __m128,
pub max: __m128,
}
macro_rules! _mm_shuffle {
($w:expr, $z:expr, $y:expr, $x:expr) => (
($w << 6) | ($z << 4) | ($y << 2) | $x
)
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2"))]
pub fn hit(aabb: &Aabb, r: &Ray, t_min: f32, t_max: f32) -> bool {
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
unsafe {
let inv_d = _mm_div_ps(_mm_set1_ps(1.), r.dir);
let t0 = _mm_mul_ps(_mm_sub_ps(aabb.min, r.pt), inv_d);
let t1 = _mm_mul_ps(_mm_sub_ps(aabb.max, r.pt), inv_d);
// swap
let nm = _mm_cmplt_ps(inv_d, _mm_setzero_ps());
let (t0, t1) = (_mm_or_ps(_mm_and_ps(t1, nm), _mm_andnot_ps(nm, t0)),
_mm_or_ps(_mm_and_ps(t0, nm), _mm_andnot_ps(nm, t1)));
let t_min = _mm_move_ss(_mm_shuffle_ps(t0, t0, _mm_shuffle!(2, 1, 0, 0)), _mm_set_ss(t_min));
let t_max = _mm_move_ss(_mm_shuffle_ps(t1, t1, _mm_shuffle!(2, 1, 0, 0)), _mm_set_ss(t_max));
let t_min = {
let v = _mm_max_ps(t_min, _mm_shuffle_ps(t_min, t_min, _mm_shuffle!(0, 0, 3, 2)));
let v = _mm_max_ps(v, _mm_shuffle_ps(v, v, _mm_shuffle!(0, 0, 0, 1)));
_mm_cvtss_f32(v)
};
let t_max = {
let v = _mm_min_ps(t_max, _mm_shuffle_ps(t_max, t_max, _mm_shuffle!(0, 0, 3, 2)));
let v = _mm_min_ps(v, _mm_shuffle_ps(v, v, _mm_shuffle!(0, 0, 0, 1)));
_mm_cvtss_f32(v)
};
t_min < t_max
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment