Skip to content

Instantly share code, notes, and snippets.

@dmarion
Created March 27, 2019 08:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dmarion/2820400cf8dadab768fe9fceed8d0c0a to your computer and use it in GitHub Desktop.
Save dmarion/2820400cf8dadab768fe9fceed8d0c0a to your computer and use it in GitHub Desktop.
vppinc.h
#include <x86intrin.h>
typedef unsigned long long u64;
typedef unsigned int u32;
typedef unsigned short u16;
typedef unsigned char u8;
#define _(x) __attribute__((vector_size (x)))
typedef u8 u8x32 _(32);
typedef u16 u16x16 _(32);
typedef u32 u32x8 _(32);
typedef u64 u64x4 _(32);
typedef u8 u8x16 _(16);
typedef u8 u16x8 _(16);
typedef u8 u32x4 _(16);
typedef u8 u64x2 _(16);
#undef _
#define static_always_inline static inline __attribute__ ((__always_inline__))
#define _ static_always_inline __attribute__ ((unused))
_ u8x32 u8x32_load_unaligned (void *p) { return (u8x32) _mm256_loadu_si256 (p); }
_ u8x16 u8x16_load_unaligned (void *p) { return (u8x16) _mm_loadu_si128 (p); }
_ u16x16 u16x16_load_unaligned (void *p) { return (u16x16) _mm256_loadu_si256 (p); }
_ u32x8 u32x8_load_unaligned (void *p) { return (u32x8) _mm256_loadu_si256 (p); }
_ u64x4 u64x4_load_unaligned (void *p) { return (u64x4) _mm256_loadu_si256 (p); }
_ void u8x32_store_unaligned (u8x32 v, void *p) { _mm256_storeu_si256 ((void *) p, (__m256i) v); }
_ void u8x16_store_unaligned (u8x16 v, void *p) { _mm_storeu_si128 ((void *) p, (__m128i) v); }
_ u64x4 u64x4_splat (u64 x) { return (u64x4) _mm256_set1_epi64x (x); }
_ u8x32 u8x32_splat (u8 x) { return (u8x32) _mm256_set1_epi8 (x); }
_ u8x16 u8x16_splat (u8 x) { return (u8x16) _mm_set1_epi8 (x); }
_ u8x32 u8x32_blend (u8x32 v1, u8x32 v2, u8x32 mask) { return (u8x32) _mm256_blendv_epi8 ((__m256i) v1, (__m256i) v2, (__m256i) mask); }
_ u8x16 u8x16_blend (u8x16 v1, u8x16 v2, u8x16 mask) { return (u8x16) _mm_blendv_epi8 ((__m128i) v1, (__m128i) v2, (__m128i) mask); }
_ u8x32 u8x32_is_greater (u8x32 v1, u8x32 v2) { return (u8x32) _mm256_cmpgt_epi8 ((__m256i) v1, (__m256i) v2); }
_ u8x16 u8x16_is_greater (u8x16 v1, u8x16 v2) { return (u8x16) _mm_cmpgt_epi8 ((__m128i) v1, (__m128i) v2); }
_ u32x8 u32x8_hadd (u32x8 v1, u32x8 v2) { return (u32x8) _mm256_hadd_epi32 ((__m256i) v1, (__m256i) v2);
}
#undef _
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment