Skip to content

Instantly share code, notes, and snippets.

@harubaru
Last active August 7, 2018 18:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save harubaru/1981d6517e499cd5eb611055de4b712f to your computer and use it in GitHub Desktop.
Save harubaru/1981d6517e499cd5eb611055de4b712f to your computer and use it in GitHub Desktop.
a wrapper made in C for SSE
/*
* sse.h - a wrapper made in C for SSE
*/
#ifndef INTRINSICS_H
#define INTRINSICS_H
#if (__STDC_VERSION__ > 199409L)
#define USE_INLINE inline
#else
#define USE_INLINE
#endif
#include <x86intrin.h>
/* float */
USE_INLINE float sse_fadd(__m128 a) { a = _mm_add_ps(a, _mm_movehl_ps(a, a)); a = _mm_add_ss(a, _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 1, 1, 1))); return _mm_cvtss_f32(a); }
USE_INLINE float sse_fsub(__m128 a) { a = _mm_sub_ps(a, _mm_movehl_ps(a, a)); a = _mm_sub_ss(a, _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 1, 1, 1))); return _mm_cvtss_f32(a); }
USE_INLINE float sse_fmul(__m128 a) { a = _mm_mul_ps(a, _mm_movehl_ps(a, a)); a = _mm_mul_ss(a, _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 1, 1, 1))); return _mm_cvtss_f32(a); }
USE_INLINE float sse_fmin(__m128 a) { a = _mm_min_ps(a, _mm_movehl_ps(a, a)); a = _mm_min_ss(a, _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 1, 1, 1))); return _mm_cvtss_f32(a); }
USE_INLINE float sse_fmax(__m128 a) { a = _mm_max_ps(a, _mm_movehl_ps(a, a)); a = _mm_max_ss(a, _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 1, 1, 1))); return _mm_cvtss_f32(a); }
USE_INLINE float vec2_fadd(float a, float b) { return sse_fadd(_mm_set_ps(a, b, 0, 0)); }
USE_INLINE float vec2_fsub(float a, float b) { return sse_fsub(_mm_set_ps(a, b, 0, 0)); }
USE_INLINE float vec2_fmul(float a, float b) { return sse_fmul(_mm_set_ps(a, b, 1, 1)); }
USE_INLINE float vec2_fmax(float a, float b) { return sse_fmax(_mm_set_ps(a, b, 1.175494351e-38F, 1.175494351e-38F)); }
USE_INLINE float vec2_fmin(float a, float b) { return sse_fmin(_mm_set_ps(a, b, 3.402823466e+38F, 3.402823466e+38F)); }
USE_INLINE float vec3_fadd(float a, float b, float c) { return sse_fadd(_mm_set_ps(a, b, c, 0)); }
USE_INLINE float vec3_fsub(float a, float b, float c) { return sse_fsub(_mm_set_ps(a, b, c, 0)); }
USE_INLINE float vec3_fmul(float a, float b, float c) { return sse_fmul(_mm_set_ps(a, b, c, 1)); }
USE_INLINE float vec3_fmax(float a, float b, float c) { return sse_fmul(_mm_set_ps(a, b, c, 1.175494351e-38F)); }
USE_INLINE float vec3_fmin(float a, float b, float c) { return sse_fmul(_mm_set_ps(a, b, c, 3.402823466e+38F)); }
USE_INLINE float vec4_fadd(float a, float b, float c, float d) { return sse_fadd(_mm_set_ps(a, b, c, d)); }
USE_INLINE float vec4_fsub(float a, float b, float c, float d) { return sse_fsub(_mm_set_ps(a, b, c, d)); }
USE_INLINE float vec4_fmul(float a, float b, float c, float d) { return sse_fmul(_mm_set_ps(a, b, c, d)); }
USE_INLINE float vec4_fmax(float a, float b, float c, float d) { return sse_fmul(_mm_set_ps(a, b, c, d)); }
USE_INLINE float vec4_fmin(float a, float b, float c, float d) { return sse_fmul(_mm_set_ps(a, b, c, d)); }
/* signed int */
USE_INLINE int sse_iadd(__m128i a) { a = _mm_add_epi32(a, _mm_srli_si128(a, 8)); a = _mm_add_epi32(a, _mm_srli_si128(a, 4)); return _mm_cvtsi128_si32(a); }
USE_INLINE int sse_isub(__m128i a) { a = _mm_sub_epi32(a, _mm_srli_si128(a, 8)); a = _mm_sub_epi32(a, _mm_srli_si128(a, 4)); return _mm_cvtsi128_si32(a); }
USE_INLINE int sse_imul(__m128i a) { a = _mm_mullo_epi32(a, _mm_srli_si128(a, 8)); a = _mm_mullo_epi32(a, _mm_srli_si128(a, 4)); return _mm_cvtsi128_si32(a); }
USE_INLINE int sse_imin(__m128i a) { a = _mm_min_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 0, 3, 2))); a = _mm_min_epi16(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2))); return _mm_cvtsi128_si32(a); }
USE_INLINE int sse_imax(__m128i a) { a = _mm_max_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 0, 3, 2))); a = _mm_max_epi16(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2))); return _mm_cvtsi128_si32(a); }
USE_INLINE int vec2_iadd(int a, int b) { return sse_iadd(_mm_set_epi32(a, b, 0, 0)); }
USE_INLINE int vec2_isub(int a, int b) { return sse_isub(_mm_set_epi32(a, b, 0, 0)); }
USE_INLINE int vec2_imul(int a, int b) { return sse_imul(_mm_set_epi32(a, b, 1, 1)); }
USE_INLINE int vec2_imin(int a, int b) { return sse_imin(_mm_set_epi32(a, b, 0x7FFFFFFF, 0x7FFFFFFF)); }
USE_INLINE int vec2_imax(int a, int b) { return sse_imax(_mm_set_epi32(a, b, 0x80000000, 0x80000000)); }
USE_INLINE int vec3_iadd(int a, int b, int c) { return sse_iadd(_mm_set_epi32(a, b, c, 0)); }
USE_INLINE int vec3_isub(int a, int b, int c) { return sse_isub(_mm_set_epi32(a, b, c, 0)); }
USE_INLINE int vec3_imul(int a, int b, int c) { return sse_imul(_mm_set_epi32(a, b, c, 1)); }
USE_INLINE int vec3_imin(int a, int b, int c) { return sse_imin(_mm_set_epi32(a, b, c, 0x7FFFFFFF)); }
USE_INLINE int vec3_imax(int a, int b, int c) { return sse_imax(_mm_set_epi32(a, b, c, 0x80000000)); }
USE_INLINE int vec4_iadd(int a, int b, int c, int d) { return sse_iadd(_mm_set_epi32(a, b, c, d)); }
USE_INLINE int vec4_isub(int a, int b, int c, int d) { return sse_isub(_mm_set_epi32(a, b, c, d)); }
USE_INLINE int vec4_imul(int a, int b, int c, int d) { return sse_imul(_mm_set_epi32(a, b, c, d)); }
USE_INLINE int vec4_imin(int a, int b, int c, int d) { return sse_imin(_mm_set_epi32(a, b, c, d)); }
USE_INLINE int vec4_imax(int a, int b, int c, int d) { return sse_imax(_mm_set_epi32(a, b, c, d)); }
#endif
@harubaru
Copy link
Author

harubaru commented Aug 7, 2018

Fixed problem where it couldn't be compiled on compilers using ANSI C standard

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment