Created
March 27, 2019 08:14
-
-
Save dmarion/2820400cf8dadab768fe9fceed8d0c0a to your computer and use it in GitHub Desktop.
vppinc.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <x86intrin.h> | |
typedef unsigned long long u64; | |
typedef unsigned int u32; | |
typedef unsigned short u16; | |
typedef unsigned char u8; | |
#define _(x) __attribute__((vector_size (x))) | |
typedef u8 u8x32 _(32); | |
typedef u16 u16x16 _(32); | |
typedef u32 u32x8 _(32); | |
typedef u64 u64x4 _(32); | |
typedef u8 u8x16 _(16); | |
typedef u8 u16x8 _(16); | |
typedef u8 u32x4 _(16); | |
typedef u8 u64x2 _(16); | |
#undef _ | |
#define static_always_inline static inline __attribute__ ((__always_inline__)) | |
#define _ static_always_inline __attribute__ ((unused)) | |
_ u8x32 u8x32_load_unaligned (void *p) { return (u8x32) _mm256_loadu_si256 (p); } | |
_ u8x16 u8x16_load_unaligned (void *p) { return (u8x16) _mm_loadu_si128 (p); } | |
_ u16x16 u16x16_load_unaligned (void *p) { return (u16x16) _mm256_loadu_si256 (p); } | |
_ u32x8 u32x8_load_unaligned (void *p) { return (u32x8) _mm256_loadu_si256 (p); } | |
_ u64x4 u64x4_load_unaligned (void *p) { return (u64x4) _mm256_loadu_si256 (p); } | |
_ void u8x32_store_unaligned (u8x32 v, void *p) { _mm256_storeu_si256 ((void *) p, (__m256i) v); } | |
_ void u8x16_store_unaligned (u8x16 v, void *p) { _mm_storeu_si128 ((void *) p, (__m128i) v); } | |
_ u64x4 u64x4_splat (u64 x) { return (u64x4) _mm256_set1_epi64x (x); } | |
_ u8x32 u8x32_splat (u8 x) { return (u8x32) _mm256_set1_epi8 (x); } | |
_ u8x16 u8x16_splat (u8 x) { return (u8x16) _mm_set1_epi8 (x); } | |
_ u8x32 u8x32_blend (u8x32 v1, u8x32 v2, u8x32 mask) { return (u8x32) _mm256_blendv_epi8 ((__m256i) v1, (__m256i) v2, (__m256i) mask); } | |
_ u8x16 u8x16_blend (u8x16 v1, u8x16 v2, u8x16 mask) { return (u8x16) _mm_blendv_epi8 ((__m128i) v1, (__m128i) v2, (__m128i) mask); } | |
_ u8x32 u8x32_is_greater (u8x32 v1, u8x32 v2) { return (u8x32) _mm256_cmpgt_epi8 ((__m256i) v1, (__m256i) v2); } | |
_ u8x16 u8x16_is_greater (u8x16 v1, u8x16 v2) { return (u8x16) _mm_cmpgt_epi8 ((__m128i) v1, (__m128i) v2); } | |
_ u32x8 u32x8_hadd (u32x8 v1, u32x8 v2) { return (u32x8) _mm256_hadd_epi32 ((__m256i) v1, (__m256i) v2); | |
} | |
#undef _ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment