-
-
Save dougallj/97d8621d4542ba31e004acc8075fac14 to your computer and use it in GitHub Desktop.
AArch64 Logical Immediates
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// AArch64 Logical Immediate Encoding and Decoding | |
// | |
// I hereby place this code in the public domain, as per the terms of the | |
// CC0 license: https://creativecommons.org/publicdomain/zero/1.0/ | |
#include <stdint.h> | |
#include <stdbool.h> | |
static inline int nonzeroCountTrailingZeros64(uint64_t n) { | |
return __builtin_ctzll(n); | |
} | |
static inline int countTrailingZeros64(uint64_t n) { | |
return n ? nonzeroCountTrailingZeros64(n) : 64; | |
} | |
static inline int nonzeroCountLeadingZeros64(uint64_t n) { | |
return __builtin_clzll(n); | |
} | |
static inline int nonzeroCountLeadingZeros32(uint32_t n) { | |
return __builtin_clz(n); | |
} | |
static inline uint64_t rotateRight64(uint64_t v, int n) { | |
// return __builtin_rotateright64(v, n); | |
return (v >> (n & 63)) | (v << (-n & 63)); | |
} | |
static inline uint64_t clearTrailingOnes64(uint64_t n) { | |
return n & (n + 1); | |
} | |
#define ENCODE_FAILED (-1) | |
int encodeLogicalImmediate64(uint64_t val) { | |
// Consider an ARM64 logical immediate as a pattern of "o" ones preceded | |
// by "z" more-significant zeroes, repeated to fill a 64-bit integer. | |
// o > 0, z > 0, and the size (o + z) is a power of two in [2,64]. This | |
// part of the pattern is encoded in the fields "imms" and "N". | |
// | |
// "immr" encodes a further right rotate of the repeated pattern, allowing | |
// a wide range of useful bitwise constants to be represented. | |
// | |
// (The spec describes the "immr" rotate as rotating the "o + z" bit | |
// pattern before repeating it to fill 64-bits, but, as it's a repeating | |
// pattern, rotating afterwards is equivalent.) | |
// This encoding is not allowed to represent all-zero or all-one values. | |
if (val == 0 || ~val == 0) | |
return ENCODE_FAILED; | |
// To detect an immediate that may be encoded in this scheme, we first | |
// remove the right-rotate, by rotating such that the least significant | |
// bit is a one and the most significant bit is a zero. | |
// | |
// We do this by clearing any trailing one bits, then counting the | |
// trailing zeroes. This finds an "edge", where zero goes to one. | |
// We then rotate the original value right by that amount, moving | |
// the first one to the least significant bit. | |
int rotation = countTrailingZeros64(clearTrailingOnes64(val)); | |
uint64_t normalized = rotateRight64(val, rotation & 63); | |
// Now we have normalized the value, and determined the rotation, we can | |
// determine "z" by counting the leading zeroes, and "o" by counting the | |
// trailing ones. (These will both be positive, as we already rejected 0 | |
// and ~0, and rotated the value to start with a zero and end with a one.) | |
int zeroes = nonzeroCountLeadingZeros64(normalized); | |
int ones = nonzeroCountTrailingZeros64(~normalized); | |
int size = zeroes + ones; | |
// Detect the repeating pattern (by comparing every repetition to the | |
// one next to it, using rotate). | |
if (rotateRight64(val, size & 63) != val) | |
return ENCODE_FAILED; | |
// We do not need to further validate size to ensure it is a power of two | |
// between 2 and 64. The only "minimal" patterns that can repeat to fill a | |
// 64-bit value must have a length that is a factor of 64 (i.e. it is a | |
// power of two in the range [1,64]). And our pattern cannot be of length | |
// one (as we already rejected 0 and ~0). | |
// | |
// By "minimal" patterns I refer to patterns which do not themselves | |
// contain repetitions. For example, '010101' is a non-minimal pattern of | |
// a non-power-of-two length that can pass the above rotational test. It | |
// consists of the minimal pattern '01'. All our patterns are minimal, as | |
// they contain only one contiguous run of ones separated by at least one | |
// zero. | |
// Finally, we encode the values. "rotation" is the amount we rotated | |
// right by to "undo" the right-rotate encoded in immr, so must be | |
// negated. | |
// size 2: N=0 immr=00000r imms=11110s | |
// size 4: N=0 immr=0000rr imms=1110ss | |
// size 8: N=0 immr=000rrr imms=110sss | |
// size 16: N=0 immr=00rrrr imms=10ssss | |
// size 32: N=0 immr=0rrrrr imms=0sssss | |
// size 64: N=1 immr=rrrrrr imms=ssssss | |
int immr = -rotation & (size - 1); | |
int imms = -(size << 1) | (ones - 1); | |
int N = (size >> 6); | |
return (N << 12) | (immr << 6) | (imms & 0x3f); | |
} | |
int encodeLogicalImmediate32(uint32_t val) { | |
return encodeLogicalImmediate64(((uint64_t)val << 32) | val); | |
} | |
// Decoding! | |
bool isValidLogicalImmediate64(unsigned val) { | |
unsigned N = (val >> 12) & 1; | |
unsigned imms = val & 0x3f; | |
unsigned pattern = (N << 6) | (~imms & 0x3f); | |
return (pattern & (pattern - 1)) != 0; | |
} | |
bool isValidLogicalImmediate32(unsigned val) { | |
unsigned N = (val >> 12) & 1; | |
return N == 0 && isValidLogicalImmediate64(val); | |
} | |
#define DECODE_FAILED 0 | |
// returns DECODE_FAILED (zero) if the encoding is invalid | |
uint64_t decodeLogicalImmediate64(unsigned val) { | |
// Fun way to generate the immediates with mask ^ (mask << S) | |
static const uint64_t mask_lookup[] = { | |
0xffffffffffffffff, // size = 64 | |
0x00000000ffffffff, // size = 32 | |
0x0000ffff0000ffff, // size = 16 | |
0x00ff00ff00ff00ff, // size = 8 | |
0x0f0f0f0f0f0f0f0f, // size = 4 | |
0x3333333333333333, // size = 2 | |
}; | |
unsigned N = (val >> 12) & 1; | |
int immr = (val >> 6) & 0x3f; | |
unsigned imms = val & 0x3f; | |
unsigned pattern = (N << 6) | (~imms & 0x3f); | |
if (!(pattern & (pattern - 1))) return DECODE_FAILED; | |
int leading_zeroes = nonzeroCountLeadingZeros32(pattern); | |
unsigned imms_mask = 0x7fffffff >> leading_zeroes; | |
uint64_t mask = mask_lookup[leading_zeroes - 25]; | |
unsigned S = (imms + 1) & imms_mask; | |
return rotateRight64(mask ^ (mask << S), immr); | |
} | |
uint32_t decodeLogicalImmediate32(unsigned val) { | |
unsigned N = (val >> 12) & 1; | |
if (N) return DECODE_FAILED; | |
return (uint32_t)decodeLogicalImmediate64(val); | |
} |
Nice! Thanks for letting me know :)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Just used it in the aarch64 GNU Lightning backend, see
https://git.savannah.gnu.org/cgit/lightning.git/commit/?id=c3dfbe299b3e7404c4aa520c6b3a1c9af2ead801
Original code was from 2013, with just a few common pattern, built by just looking at an early binutils implementation and never revisited that code...