Created
June 15, 2016 13:59
-
-
Save resilar/e722d4600dbec9752771ab4c9d47044f to your computer and use it in GitHub Desktop.
de Bruijn CTZ with proper handling of 0
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// you faggots probably know the de bruijn trick to count trailing zeros: | |
inline int ctz32_retarded(uint32_t x) | |
{ | |
static const unsigned char debruijn_ctz32[32] = { | |
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, | |
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 | |
}; | |
return debruijn_ctz32[((x & -x) * 0x077CB531) >> 27]; | |
} | |
// explanation: | |
// | |
// * x&-x clears all bits except the least significant non-zero bit, i.e., we | |
// obtain 2^n for some 0<=n<=31 and our goal is to determine n | |
// * 0x077CB531(0) is a de bruijn sequence containing all of the 32 distinct | |
// 5-bit strings (and exactly once) | |
// * multiplication by 2^n shifts the sequence to the left by n bits | |
// * thus, the 5 most significant bits of the product are unique for each n | |
// * the debruijn_ctz32 lookup table maps the unique 5 bits to n | |
// | |
// however, this yields an incorrect result for x=0 | |
// | |
// debruijn_ctz32[((0 & -0) * 0x077CB531) >> 27] = debruin_ctz32[0] = 0 | |
// | |
// some may argue that this isn't incorrect because CTZ is undefined for 0. | |
// these people are retarded since ctz32(1)=0=ctz32(0) is just fucking stupid. | |
// | |
// so let me propose a relatively clean workaround to get ctz32(0)=32 | |
inline int ctz32(uint32_t x) | |
{ | |
static const unsigned char debruijn_ctz32[32] = { | |
31, 0, 27, 1, 28, 13, 23, 2, 29, 21, 19, 14, 24, 16, 3, 7, | |
30, 26, 12, 22, 20, 18, 15, 6, 25, 11, 17, 5, 10, 4, 9, 8 | |
}; | |
return !x + debruijn_ctz32[((x & -x) * 0x0EF96A62) >> 27]; | |
} | |
// here we use a de bruijn sequence that multiplied by 2^31 sets the 5 most | |
// significant bits to 0 so that debruijn_ctz32[0] = 31, and therefore | |
// | |
// !0 + debruijn_ctz32[((0 & -0) * 0x0EF96A62) >> 27] = 1 + 31 = 32 | |
// 64-bit versions: | |
inline int ctz64_retarded(uint64_t x) { | |
static const unsigned char debruijn_ctz64[64] = { | |
0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28, | |
62, 5, 39, 46, 44, 42, 22, 9, 24, 35, 59, 56, 49, 18, 29, 11, | |
63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10, | |
51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12 | |
}; | |
return debruijn_ctz64[((x & -x) * 0x022FDD63CC95386D) >> 58]; | |
} | |
inline int ctz64(uint64_t x) { | |
static const unsigned char debruijn_ctz64[64] = { | |
63, 0, 1, 52, 2, 6, 53, 26, 3, 37, 40, 7, 33, 54, 47, 27, | |
61, 4, 38, 45, 43, 41, 21, 8, 23, 34, 58, 55, 48, 17, 28, 10, | |
62, 51, 5, 25, 36, 39, 32, 46, 60, 44, 42, 20, 22, 57, 16, 9, | |
50, 24, 35, 31, 59, 19, 56, 15, 49, 30, 18, 14, 29, 13, 12, 11 | |
}; | |
return !x + debruijn_ctz64[((x & -x) * 0x045FBAC7992A70DA) >> 58]; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment