-
-
Save orlp/3551590 to your computer and use it in GitHub Desktop.
int64_t ipow(int64_t base, uint8_t exp) { | |
static const uint8_t highest_bit_set[] = { | |
0, 1, 2, 2, 3, 3, 3, 3, | |
4, 4, 4, 4, 4, 4, 4, 4, | |
5, 5, 5, 5, 5, 5, 5, 5, | |
5, 5, 5, 5, 5, 5, 5, 5, | |
6, 6, 6, 6, 6, 6, 6, 6, | |
6, 6, 6, 6, 6, 6, 6, 6, | |
6, 6, 6, 6, 6, 6, 6, 6, | |
6, 6, 6, 6, 6, 6, 6, 255, // anything past 63 is a guaranteed overflow with base > 1 | |
255, 255, 255, 255, 255, 255, 255, 255, | |
255, 255, 255, 255, 255, 255, 255, 255, | |
255, 255, 255, 255, 255, 255, 255, 255, | |
255, 255, 255, 255, 255, 255, 255, 255, | |
255, 255, 255, 255, 255, 255, 255, 255, | |
255, 255, 255, 255, 255, 255, 255, 255, | |
255, 255, 255, 255, 255, 255, 255, 255, | |
255, 255, 255, 255, 255, 255, 255, 255, | |
255, 255, 255, 255, 255, 255, 255, 255, | |
255, 255, 255, 255, 255, 255, 255, 255, | |
255, 255, 255, 255, 255, 255, 255, 255, | |
255, 255, 255, 255, 255, 255, 255, 255, | |
255, 255, 255, 255, 255, 255, 255, 255, | |
255, 255, 255, 255, 255, 255, 255, 255, | |
255, 255, 255, 255, 255, 255, 255, 255, | |
255, 255, 255, 255, 255, 255, 255, 255, | |
255, 255, 255, 255, 255, 255, 255, 255, | |
255, 255, 255, 255, 255, 255, 255, 255, | |
255, 255, 255, 255, 255, 255, 255, 255, | |
255, 255, 255, 255, 255, 255, 255, 255, | |
255, 255, 255, 255, 255, 255, 255, 255, | |
255, 255, 255, 255, 255, 255, 255, 255, | |
255, 255, 255, 255, 255, 255, 255, 255, | |
255, 255, 255, 255, 255, 255, 255, 255, | |
}; | |
int64_t result = 1; | |
switch (highest_bit_set[exp]) { | |
case 255: // we use 255 as an overflow marker and return 0 on overflow/underflow | |
if (base == 1) { | |
return 1; | |
} | |
if (base == -1) { | |
return 1 - 2 * (exp & 1); | |
} | |
return 0; | |
case 6: | |
if (exp & 1) result *= base; | |
exp >>= 1; | |
base *= base; | |
case 5: | |
if (exp & 1) result *= base; | |
exp >>= 1; | |
base *= base; | |
case 4: | |
if (exp & 1) result *= base; | |
exp >>= 1; | |
base *= base; | |
case 3: | |
if (exp & 1) result *= base; | |
exp >>= 1; | |
base *= base; | |
case 2: | |
if (exp & 1) result *= base; | |
exp >>= 1; | |
base *= base; | |
case 1: | |
if (exp & 1) result *= base; | |
default: | |
return result; | |
} | |
} |
your code should end the cases sooner - when exp
is down to { 0 , 1 , 2 }
Cuz from there it's very obvious what needs to be returned :::
return { <= 0 := result
1 := result * base
2 := result * base * base
}
or more succinctly ::
return ( base ** exp ) * result
Because when exp == 2
, you already know ahead of time that ( exp >>= 1 ) & 1
must be true, so why bother evaluating the obvious ?
Another speed up trick you can contemplate is that if you already know up-front that the exponent is 1 short of a power of 2, then you already know every single bit is a 1.
In that case, make it into a vanilla countdown loop for # of bits, and simply doing the exact same thing every round without bothering to either right shift or check for ( exp & 1 )
. In my own code, for this scenario, I usually make the starting point slightly lagged, so each round I simply do a compound statement of
result *= base *= base # intentionally lagged, so one must be careful regarding when you
# need to perform the extra one to sync them back up with each other
the lookup table for highest bit set can be replaced with
__builtin_clz
if it is available (gcc, clang). It will make it faster (especially if the lookup table is not in cache, which is likely in a real-world application doing other things than that) and smaller.Simply replace
highest_bit_set[exp]
with(exp>0) ? 32 - __builtin_clz(exp) : 0