Last active
June 17, 2024 10:45
-
-
Save pps83/3210a2f980fd02bb2ba2e5a1fc4a2ef0 to your computer and use it in GitHub Desktop.
__builtin_ctz (ctzl, ctzll) and __builtin_clz (clzl, clzll) for Visual Studio
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Note, bsf/bsr are used by default. | |
// Enable /arch:AVX2 compilation for better optimizations | |
#if defined(_MSC_VER) && !defined(__clang__) | |
#include <intrin.h> | |
static __forceinline int __builtin_ctz(unsigned x) | |
{ | |
#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) | |
return (int)_CountTrailingZeros(x); | |
#elif defined(__AVX2__) || defined(__BMI__) | |
return (int)_tzcnt_u32(x); | |
#else | |
unsigned long r; | |
_BitScanForward(&r, x); | |
return (int)r; | |
#endif | |
} | |
static __forceinline int __builtin_ctzll(unsigned long long x) | |
{ | |
#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) | |
return (int)_CountTrailingZeros64(x); | |
#elif defined(_WIN64) | |
#if defined(__AVX2__) || defined(__BMI__) | |
return (int)_tzcnt_u64(x); | |
#else | |
unsigned long r; | |
_BitScanForward64(&r, x); | |
return (int)r; | |
#endif | |
#else | |
int l = __builtin_ctz((unsigned)x); | |
int h = __builtin_ctz((unsigned)(x >> 32)) + 32; | |
return !!((unsigned)x) ? l : h; | |
#endif | |
} | |
static __forceinline int __builtin_ctzl(unsigned long x) | |
{ | |
return sizeof(x) == 8 ? __builtin_ctzll(x) : __builtin_ctz((unsigned)x); | |
} | |
static __forceinline int __builtin_clz(unsigned x) | |
{ | |
#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) | |
return (int)_CountLeadingZeros(x); | |
#elif defined(__AVX2__) || defined(__LZCNT__) | |
return (int)_lzcnt_u32(x); | |
#else | |
unsigned long r; | |
_BitScanReverse(&r, x); | |
return (int)(r ^ 31); | |
#endif | |
} | |
static __forceinline int __builtin_clzll(unsigned long long x) | |
{ | |
#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) | |
return (int)_CountLeadingZeros64(x); | |
#elif defined(_WIN64) | |
#if defined(__AVX2__) || defined(__LZCNT__) | |
return (int)_lzcnt_u64(x); | |
#else | |
unsigned long r; | |
_BitScanReverse64(&r, x); | |
return (int)(r ^ 63); | |
#endif | |
#else | |
int l = __builtin_clz((unsigned)x) + 32; | |
int h = __builtin_clz((unsigned)(x >> 32)); | |
return !!((unsigned)(x >> 32)) ? h : l; | |
#endif | |
} | |
static __forceinline int __builtin_clzl(unsigned long x) | |
{ | |
return sizeof(x) == 8 ? __builtin_clzll(x) : __builtin_clz((unsigned)x); | |
} | |
#endif // defined(_MSC_VER) && !defined(__clang__) |
Thanks very much, Pavel, that's perfect, you're a star! :-)
Note, I updated it to:
- check for
__AVX2__
builds to use lzcnt/tzcnt vs bsf/bsr - add code to handle arm/arm64
- add guard for clang-cl
The code was tested to verify that it's bit exact with the code that clang/gcc emits for these builtins.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks very much, Pavel, that's perfect, you're a star! :-)
https://github.com/nickpelling/C_STD/blob/develop/THANKS.md