Skip to content

Instantly share code, notes, and snippets.

@pps83
Last active June 17, 2024 10:45
Show Gist options
  • Save pps83/3210a2f980fd02bb2ba2e5a1fc4a2ef0 to your computer and use it in GitHub Desktop.
Save pps83/3210a2f980fd02bb2ba2e5a1fc4a2ef0 to your computer and use it in GitHub Desktop.
__builtin_ctz (ctzl, ctzll) and __builtin_clz (clzl, clzll) for Visual Studio
// Note, bsf/bsr are used by default.
// Enable /arch:AVX2 compilation for better optimizations
#if defined(_MSC_VER) && !defined(__clang__)
#include <intrin.h>
static __forceinline int __builtin_ctz(unsigned x)
{
#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC)
return (int)_CountTrailingZeros(x);
#elif defined(__AVX2__) || defined(__BMI__)
return (int)_tzcnt_u32(x);
#else
unsigned long r;
_BitScanForward(&r, x);
return (int)r;
#endif
}
static __forceinline int __builtin_ctzll(unsigned long long x)
{
#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC)
return (int)_CountTrailingZeros64(x);
#elif defined(_WIN64)
#if defined(__AVX2__) || defined(__BMI__)
return (int)_tzcnt_u64(x);
#else
unsigned long r;
_BitScanForward64(&r, x);
return (int)r;
#endif
#else
int l = __builtin_ctz((unsigned)x);
int h = __builtin_ctz((unsigned)(x >> 32)) + 32;
return !!((unsigned)x) ? l : h;
#endif
}
static __forceinline int __builtin_ctzl(unsigned long x)
{
return sizeof(x) == 8 ? __builtin_ctzll(x) : __builtin_ctz((unsigned)x);
}
static __forceinline int __builtin_clz(unsigned x)
{
#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC)
return (int)_CountLeadingZeros(x);
#elif defined(__AVX2__) || defined(__LZCNT__)
return (int)_lzcnt_u32(x);
#else
unsigned long r;
_BitScanReverse(&r, x);
return (int)(r ^ 31);
#endif
}
static __forceinline int __builtin_clzll(unsigned long long x)
{
#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC)
return (int)_CountLeadingZeros64(x);
#elif defined(_WIN64)
#if defined(__AVX2__) || defined(__LZCNT__)
return (int)_lzcnt_u64(x);
#else
unsigned long r;
_BitScanReverse64(&r, x);
return (int)(r ^ 63);
#endif
#else
int l = __builtin_clz((unsigned)x) + 32;
int h = __builtin_clz((unsigned)(x >> 32));
return !!((unsigned)(x >> 32)) ? h : l;
#endif
}
static __forceinline int __builtin_clzl(unsigned long x)
{
return sizeof(x) == 8 ? __builtin_clzll(x) : __builtin_clz((unsigned)x);
}
#endif // defined(_MSC_VER) && !defined(__clang__)
@nickpelling
Copy link

nickpelling commented May 6, 2024

Thanks very much, Pavel, that's perfect, you're a star! :-)

https://github.com/nickpelling/C_STD/blob/develop/THANKS.md

@pps83
Copy link
Author

pps83 commented May 7, 2024

Thanks very much, Pavel, that's perfect, you're a star! :-)

Note, I updated it to:

  • check for __AVX2__ builds to use lzcnt/tzcnt vs bsf/bsr
  • add code to handle arm/arm64
  • add guard for clang-cl

The code was tested to verify that it's bit exact with the code that clang/gcc emits for these builtins.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment