Created
February 13, 2019 19:56
-
-
Save mtytel/3de6e4ce01c38cba079daf7ea979c841 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
force_inline __m128 fastCubicInterpolate(const mono_float* const* buffers, __m128i indices, __m128 t_to) { | |
const __m128 kMultNext = _mm_set1_ps(-1.0f / 6.0f); | |
const __m128 kMultTo = _mm_set1_ps(1.0f / 2.0f); | |
const __m128 kMultFrom = _mm_set1_ps(-1.0f / 2.0f); | |
const __m128 kMultPrev = _mm_set1_ps(1.0f / 6.0f); | |
const __m128 one = _mm_set1_ps(1.0f); | |
const __m128 two = _mm_set1_ps(2.0f); | |
__m128 t_prev = _mm_sub_ps(t_to, two); | |
__m128 t_from = _mm_sub_ps(t_to, one); | |
__m128 t_next = _mm_add_ps(t_to, one); | |
__m128 t_prev_from = _mm_mul_ps(t_prev, t_from); | |
__m128 t_to_next = _mm_mul_ps(t_to, t_next); | |
// Initialize interpolation matrix. | |
__m128 interpolate_row0 = _mm_mul_ps(_mm_mul_ps(t_to, t_prev_from), kMultNext); | |
__m128 interpolate_row1 = _mm_mul_ps(_mm_mul_ps(t_next, t_prev_from), kMultTo); | |
__m128 interpolate_row2 = _mm_mul_ps(_mm_mul_ps(t_to_next, t_prev), kMultFrom); | |
__m128 interpolate_row3 = _mm_mul_ps(_mm_mul_ps(t_to_next, t_from), kMultPrev); | |
// Initialize value matrix. | |
__m128 value_row0 = _mm_loadu_ps(buffers[0] + _mm_extract_epi16(indices, 0)); | |
__m128 value_row1 = _mm_loadu_ps(buffers[1] + _mm_extract_epi16(indices, 2)); | |
__m128 value_row2 = _mm_loadu_ps(buffers[2] + _mm_extract_epi16(indices, 4)); | |
__m128 value_row3 = _mm_loadu_ps(buffers[3] + _mm_extract_epi16(indices, 6)); | |
// Transpose value matrix. | |
__m128 low0 = _mm_unpacklo_ps(value_row0, value_row1); | |
__m128 low1 = _mm_unpacklo_ps(value_row2, value_row3); | |
__m128 high0 = _mm_unpackhi_ps(value_row0, value_row1); | |
__m128 high1 = _mm_unpackhi_ps(value_row2, value_row3); | |
value_row0 = _mm_movelh_ps(low0, low1); | |
value_row1 = _mm_movehl_ps(low1, low0); | |
value_row2 = _mm_movelh_ps(high0, high1); | |
value_row3 = _mm_movehl_ps(high1, high0); | |
// Collapse. | |
__m128 result = _mm_mul_ps(interpolate_row0, value_row0); | |
result = _mm_add_ps(result, _mm_mul_ps(interpolate_row1, value_row1)); | |
result = _mm_add_ps(result, _mm_mul_ps(interpolate_row2, value_row2)); | |
return _mm_add_ps(result, _mm_mul_ps(interpolate_row3, value_row3)); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment