fiveoutofnine/Log2.sol

## compute_lut.py
# Compute the lookup table corresponding to `MAGIC`.
MAGIC = 0x3F79C6D30BACA89
UINT64_MASK = 0xFFFFFFFFFFFFFFFF

lut_lst = [0 for _ in range(64)]
for i in range(64):
  n = 1 << i
  n |= n >> 1
  n |= n >> 2
  n |= n >> 4
  n |= n >> 8
  n |= n >> 16
  n |= n >> 32
  n |= n >> 64
  n |= n >> 128

  lut_lst[((n * MAGIC) & UINT64_MASK) >> 58] = i

# Reverse the lists so we can bitpack into 1 `uint256`.
lut_one = lut_lst[:32][::-1]
lut_two = lut_lst[32:][::-1]

# Bitpack elements in `lut_one` and `lut_two` into `LUT_ONE` and `LUT_TWO`,
# respectively. We use words of size 8 so it's slightly more efficient to access
# from later on (can use `<< 3` instead of `* 6`).
LUT_ONE, LUT_TWO = 0, 0
for e in lut_one:
  LUT_ONE <<= 8
  LUT_ONE |= e

for e in lut_two:
  LUT_TWO <<= 8
  LUT_TWO |= e

LUT_ONE, LUT_TWO = sorted((LUT_ONE, LUT_TWO))
LUT_DIFF = LUT_TWO - LUT_ONE

## Log2.sol
// SPDX-License-Identifier: MIT
pragma solidity ^0.8.14;

contract Math {
    uint256 private constant MAGIC = 0x3F79C6D30BACA89;
    uint256 private constant LUT_ONE
        = 0x3E040B2811171A2E20262C32341D3A363D0310161F2531393C02152438012300;
    uint256 private constant LUT_DIFF
        = 0x100FADEFAF10EDEF1E2EBF7E6F0F4DCE40717030E0601E2F90D090C03131422;

    function log2(uint64 _n) external pure returns (uint64 _r) {
        assembly {
            _n := or(_n, shr( 1, _n))
            _n := or(_n, shr( 2, _n))
            _n := or(_n, shr( 4, _n))
            _n := or(_n, shr( 8, _n))
            _n := or(_n, shr(16, _n))
            _n := or(_n, shr(32, _n))

            let index := shr(
                58,
                and(
                    mul(_n, MAGIC),
                    0xFFFFFFFFFFFFFFFF
                )
            )

            // Maybe unnecessary branchless optimization... (idea is to use
            // `gt(index, 31) = 1` rather than in switch statements.
            let gt31 := gt(index, 31)
            let lut := add(LUT_ONE, mul(gt31, LUT_DIFF))

            _r := and(
                shr(
                    shl(
                        3,
                        // Branchless optimization (same idea as above).
                        sub(index, mul(gt31, 32))
                    ),
                    lut
                ),
                0xFF
            )
        }
    }
}
	# Compute the lookup table corresponding to `MAGIC`.
	MAGIC = 0x3F79C6D30BACA89
	UINT64_MASK = 0xFFFFFFFFFFFFFFFF

	lut_lst = [0 for _ in range(64)]
	for i in range(64):
	n = 1 << i
	n \|= n >> 1
	n \|= n >> 2
	n \|= n >> 4
	n \|= n >> 8
	n \|= n >> 16
	n \|= n >> 32
	n \|= n >> 64
	n \|= n >> 128

	lut_lst[((n * MAGIC) & UINT64_MASK) >> 58] = i

	# Reverse the lists so we can bitpack into 1 `uint256`.
	lut_one = lut_lst[:32][::-1]
	lut_two = lut_lst[32:][::-1]

	# Bitpack elements in `lut_one` and `lut_two` into `LUT_ONE` and `LUT_TWO`,
	# respectively. We use words of size 8 so it's slightly more efficient to access
	# from later on (can use `<< 3` instead of `* 6`).
	LUT_ONE, LUT_TWO = 0, 0
	for e in lut_one:
	LUT_ONE <<= 8
	LUT_ONE \|= e

	for e in lut_two:
	LUT_TWO <<= 8
	LUT_TWO \|= e

	LUT_ONE, LUT_TWO = sorted((LUT_ONE, LUT_TWO))
	LUT_DIFF = LUT_TWO - LUT_ONE
	// SPDX-License-Identifier: MIT
	pragma solidity ^0.8.14;

	contract Math {
	uint256 private constant MAGIC = 0x3F79C6D30BACA89;
	uint256 private constant LUT_ONE
	= 0x3E040B2811171A2E20262C32341D3A363D0310161F2531393C02152438012300;
	uint256 private constant LUT_DIFF
	= 0x100FADEFAF10EDEF1E2EBF7E6F0F4DCE40717030E0601E2F90D090C03131422;

	function log2(uint64 _n) external pure returns (uint64 _r) {
	assembly {
	_n := or(_n, shr( 1, _n))
	_n := or(_n, shr( 2, _n))
	_n := or(_n, shr( 4, _n))
	_n := or(_n, shr( 8, _n))
	_n := or(_n, shr(16, _n))
	_n := or(_n, shr(32, _n))

	let index := shr(
	58,
	and(
	mul(_n, MAGIC),
	0xFFFFFFFFFFFFFFFF
	)
	)

	// Maybe unnecessary branchless optimization... (idea is to use
	// `gt(index, 31) = 1` rather than in switch statements.
	let gt31 := gt(index, 31)
	let lut := add(LUT_ONE, mul(gt31, LUT_DIFF))

	_r := and(
	shr(
	shl(
	3,
	// Branchless optimization (same idea as above).
	sub(index, mul(gt31, 32))
	),
	lut
	),
	0xFF
	)
	}
	}
	}