-
-
Save Cu3PO42/d5497ec50605ca6372e788addfdb707f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/*===---- immintrin.h - Intel intrinsics -----------------------------------=== | |
* | |
* Permission is hereby granted, free of charge, to any person obtaining a copy | |
* of this software and associated documentation files (the "Software"), to deal | |
* in the Software without restriction, including without limitation the rights | |
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
* copies of the Software, and to permit persons to whom the Software is | |
* furnished to do so, subject to the following conditions: | |
* | |
* The above copyright notice and this permission notice shall be included in | |
* all copies or substantial portions of the Software. | |
* | |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
* THE SOFTWARE. | |
* | |
*===-----------------------------------------------------------------------=== | |
*/ | |
#ifndef __IMMINTRIN_H | |
#define __IMMINTRIN_H | |
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MMX__) | |
#include <mmintrin.h> | |
#endif | |
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE__) | |
#include <xmmintrin.h> | |
#endif | |
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE2__) | |
#include <emmintrin.h> | |
#endif | |
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE3__) | |
#include <pmmintrin.h> | |
#endif | |
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSSE3__) | |
#include <tmmintrin.h> | |
#endif | |
#if !defined(_MSC_VER) || __has_feature(modules) || \ | |
(defined(__SSE4_2__) || defined(__SSE4_1__)) | |
#include <smmintrin.h> | |
#endif | |
#if !defined(_MSC_VER) || __has_feature(modules) || \ | |
(defined(__AES__) || defined(__PCLMUL__)) | |
#include <wmmintrin.h> | |
#endif | |
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLFLUSHOPT__) | |
#include <clflushoptintrin.h> | |
#endif | |
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__) | |
#include <avxintrin.h> | |
#endif | |
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX2__) | |
#include <avx2intrin.h> | |
/* The 256-bit versions of functions in f16cintrin.h. | |
Intel documents these as being in immintrin.h, and | |
they depend on typedefs from avxintrin.h. */ | |
/// \brief Converts a 256-bit vector of [8 x float] into a 128-bit vector | |
/// containing 16-bit half-precision float values. | |
/// | |
/// \headerfile <x86intrin.h> | |
/// | |
/// \code | |
/// __m128i _mm256_cvtps_ph(__m256 a, const int imm); | |
/// \endcode | |
/// | |
/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction. | |
/// | |
/// \param a | |
/// A 256-bit vector containing 32-bit single-precision float values to be | |
/// converted to 16-bit half-precision float values. | |
/// \param imm | |
/// An immediate value controlling rounding using bits [2:0]: \n | |
/// 000: Nearest \n | |
/// 001: Down \n | |
/// 010: Up \n | |
/// 011: Truncate \n | |
/// 1XX: Use MXCSR.RC for rounding | |
/// \returns A 128-bit vector containing the converted 16-bit half-precision | |
/// float values. | |
#define _mm256_cvtps_ph(a, imm) __extension__ ({ \ | |
(__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm)); }) | |
/// \brief Converts a 128-bit vector containing 16-bit half-precision float | |
/// values into a 256-bit vector of [8 x float]. | |
/// | |
/// \headerfile <x86intrin.h> | |
/// | |
/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction. | |
/// | |
/// \param __a | |
/// A 128-bit vector containing 16-bit half-precision float values to be | |
/// converted to 32-bit single-precision float values. | |
/// \returns A vector of [8 x float] containing the converted 32-bit | |
/// single-precision float values. | |
static __inline __m256 __attribute__((__always_inline__, __nodebug__, __target__("f16c"))) | |
_mm256_cvtph_ps(__m128i __a) | |
{ | |
return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a); | |
} | |
#endif /* __AVX2__ */ | |
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__) | |
#include <bmiintrin.h> | |
#endif | |
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__) | |
#include <bmi2intrin.h> | |
#endif | |
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__LZCNT__) | |
#include <lzcntintrin.h> | |
#endif | |
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA__) | |
#include <fmaintrin.h> | |
#endif | |
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512F__) | |
#include <avx512fintrin.h> | |
#endif | |
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VL__) | |
#include <avx512vlintrin.h> | |
#endif | |
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BW__) | |
#include <avx512bwintrin.h> | |
#endif | |
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__) | |
#include <avx512cdintrin.h> | |
#endif | |
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__) | |
#include <avx512dqintrin.h> | |
#endif | |
#if !defined(_MSC_VER) || __has_feature(modules) || \ | |
(defined(__AVX512VL__) && defined(__AVX512BW__)) | |
#include <avx512vlbwintrin.h> | |
#endif | |
#if !defined(_MSC_VER) || __has_feature(modules) || \ | |
(defined(__AVX512VL__) && defined(__AVX512CD__)) | |
#include <avx512vlcdintrin.h> | |
#endif | |
#if !defined(_MSC_VER) || __has_feature(modules) || \ | |
(defined(__AVX512VL__) && defined(__AVX512DQ__)) | |
#include <avx512vldqintrin.h> | |
#endif | |
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512ER__) | |
#include <avx512erintrin.h> | |
#endif | |
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512IFMA__) | |
#include <avx512ifmaintrin.h> | |
#endif | |
#if !defined(_MSC_VER) || __has_feature(modules) || \ | |
(defined(__AVX512IFMA__) && defined(__AVX512VL__)) | |
#include <avx512ifmavlintrin.h> | |
#endif | |
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI__) | |
#include <avx512vbmiintrin.h> | |
#endif | |
#if !defined(_MSC_VER) || __has_feature(modules) || \ | |
(defined(__AVX512VBMI__) && defined(__AVX512VL__)) | |
#include <avx512vbmivlintrin.h> | |
#endif | |
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512PF__) | |
#include <avx512pfintrin.h> | |
#endif | |
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PKU__) | |
#include <pkuintrin.h> | |
#endif | |
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__) | |
static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) | |
_rdrand16_step(unsigned short *__p) | |
{ | |
return __builtin_ia32_rdrand16_step(__p); | |
} | |
static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) | |
_rdrand32_step(unsigned int *__p) | |
{ | |
return __builtin_ia32_rdrand32_step(__p); | |
} | |
/* __bit_scan_forward */ | |
static __inline__ int __attribute__((__always_inline__, __nodebug__)) | |
_bit_scan_forward(int __A) { | |
return __builtin_ctz(__A); | |
} | |
/* __bit_scan_reverse */ | |
static __inline__ int __attribute__((__always_inline__, __nodebug__)) | |
_bit_scan_reverse(int __A) { | |
return 31 - __builtin_clz(__A); | |
} | |
#ifdef __x86_64__ | |
static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) | |
_rdrand64_step(unsigned long long *__p) | |
{ | |
return __builtin_ia32_rdrand64_step(__p); | |
} | |
#endif | |
#endif /* __RDRND__ */ | |
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FSGSBASE__) | |
#ifdef __x86_64__ | |
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) | |
_readfsbase_u32(void) | |
{ | |
return __builtin_ia32_rdfsbase32(); | |
} | |
static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) | |
_readfsbase_u64(void) | |
{ | |
return __builtin_ia32_rdfsbase64(); | |
} | |
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) | |
_readgsbase_u32(void) | |
{ | |
return __builtin_ia32_rdgsbase32(); | |
} | |
static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) | |
_readgsbase_u64(void) | |
{ | |
return __builtin_ia32_rdgsbase64(); | |
} | |
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) | |
_writefsbase_u32(unsigned int __V) | |
{ | |
return __builtin_ia32_wrfsbase32(__V); | |
} | |
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) | |
_writefsbase_u64(unsigned long long __V) | |
{ | |
return __builtin_ia32_wrfsbase64(__V); | |
} | |
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) | |
_writegsbase_u32(unsigned int __V) | |
{ | |
return __builtin_ia32_wrgsbase32(__V); | |
} | |
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) | |
_writegsbase_u64(unsigned long long __V) | |
{ | |
return __builtin_ia32_wrgsbase64(__V); | |
} | |
#endif | |
#endif /* __FSGSBASE__ */ | |
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RTM__) | |
#include <rtmintrin.h> | |
#include <xtestintrin.h> | |
#endif | |
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHA__) | |
#include <shaintrin.h> | |
#endif | |
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FXSR__) | |
#include <fxsrintrin.h> | |
#endif | |
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVE__) | |
#include <xsaveintrin.h> | |
#endif | |
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEOPT__) | |
#include <xsaveoptintrin.h> | |
#endif | |
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEC__) | |
#include <xsavecintrin.h> | |
#endif | |
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVES__) | |
#include <xsavesintrin.h> | |
#endif | |
/* Some intrinsics inside adxintrin.h are available only on processors with ADX, | |
* whereas others are also available at all times. */ | |
#include <adxintrin.h> | |
#endif /* __IMMINTRIN_H */ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment