Skip to content

Instantly share code, notes, and snippets.

@mtytel
Created February 13, 2019 19:56
Show Gist options
  • Save mtytel/3de6e4ce01c38cba079daf7ea979c841 to your computer and use it in GitHub Desktop.
Save mtytel/3de6e4ce01c38cba079daf7ea979c841 to your computer and use it in GitHub Desktop.
force_inline __m128 fastCubicInterpolate(const mono_float* const* buffers, __m128i indices, __m128 t_to) {
const __m128 kMultNext = _mm_set1_ps(-1.0f / 6.0f);
const __m128 kMultTo = _mm_set1_ps(1.0f / 2.0f);
const __m128 kMultFrom = _mm_set1_ps(-1.0f / 2.0f);
const __m128 kMultPrev = _mm_set1_ps(1.0f / 6.0f);
const __m128 one = _mm_set1_ps(1.0f);
const __m128 two = _mm_set1_ps(2.0f);
__m128 t_prev = _mm_sub_ps(t_to, two);
__m128 t_from = _mm_sub_ps(t_to, one);
__m128 t_next = _mm_add_ps(t_to, one);
__m128 t_prev_from = _mm_mul_ps(t_prev, t_from);
__m128 t_to_next = _mm_mul_ps(t_to, t_next);
// Initialize interpolation matrix.
__m128 interpolate_row0 = _mm_mul_ps(_mm_mul_ps(t_to, t_prev_from), kMultNext);
__m128 interpolate_row1 = _mm_mul_ps(_mm_mul_ps(t_next, t_prev_from), kMultTo);
__m128 interpolate_row2 = _mm_mul_ps(_mm_mul_ps(t_to_next, t_prev), kMultFrom);
__m128 interpolate_row3 = _mm_mul_ps(_mm_mul_ps(t_to_next, t_from), kMultPrev);
// Initialize value matrix.
__m128 value_row0 = _mm_loadu_ps(buffers[0] + _mm_extract_epi16(indices, 0));
__m128 value_row1 = _mm_loadu_ps(buffers[1] + _mm_extract_epi16(indices, 2));
__m128 value_row2 = _mm_loadu_ps(buffers[2] + _mm_extract_epi16(indices, 4));
__m128 value_row3 = _mm_loadu_ps(buffers[3] + _mm_extract_epi16(indices, 6));
// Transpose value matrix.
__m128 low0 = _mm_unpacklo_ps(value_row0, value_row1);
__m128 low1 = _mm_unpacklo_ps(value_row2, value_row3);
__m128 high0 = _mm_unpackhi_ps(value_row0, value_row1);
__m128 high1 = _mm_unpackhi_ps(value_row2, value_row3);
value_row0 = _mm_movelh_ps(low0, low1);
value_row1 = _mm_movehl_ps(low1, low0);
value_row2 = _mm_movelh_ps(high0, high1);
value_row3 = _mm_movehl_ps(high1, high0);
// Collapse.
__m128 result = _mm_mul_ps(interpolate_row0, value_row0);
result = _mm_add_ps(result, _mm_mul_ps(interpolate_row1, value_row1));
result = _mm_add_ps(result, _mm_mul_ps(interpolate_row2, value_row2));
return _mm_add_ps(result, _mm_mul_ps(interpolate_row3, value_row3));
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment