Skip to content

Instantly share code, notes, and snippets.

@dougallj
Last active February 11, 2023 05:09
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dougallj/97d8621d4542ba31e004acc8075fac14 to your computer and use it in GitHub Desktop.
Save dougallj/97d8621d4542ba31e004acc8075fac14 to your computer and use it in GitHub Desktop.
AArch64 Logical Immediates
// AArch64 Logical Immediate Encoding and Decoding
//
// I hereby place this code in the public domain, as per the terms of the
// CC0 license: https://creativecommons.org/publicdomain/zero/1.0/
#include <stdint.h>
#include <stdbool.h>
static inline int nonzeroCountTrailingZeros64(uint64_t n) {
return __builtin_ctzll(n);
}
static inline int countTrailingZeros64(uint64_t n) {
return n ? nonzeroCountTrailingZeros64(n) : 64;
}
static inline int nonzeroCountLeadingZeros64(uint64_t n) {
return __builtin_clzll(n);
}
static inline int nonzeroCountLeadingZeros32(uint32_t n) {
return __builtin_clz(n);
}
static inline uint64_t rotateRight64(uint64_t v, int n) {
// return __builtin_rotateright64(v, n);
return (v >> (n & 63)) | (v << (-n & 63));
}
static inline uint64_t clearTrailingOnes64(uint64_t n) {
return n & (n + 1);
}
#define ENCODE_FAILED (-1)
int encodeLogicalImmediate64(uint64_t val) {
// Consider an ARM64 logical immediate as a pattern of "o" ones preceded
// by "z" more-significant zeroes, repeated to fill a 64-bit integer.
// o > 0, z > 0, and the size (o + z) is a power of two in [2,64]. This
// part of the pattern is encoded in the fields "imms" and "N".
//
// "immr" encodes a further right rotate of the repeated pattern, allowing
// a wide range of useful bitwise constants to be represented.
//
// (The spec describes the "immr" rotate as rotating the "o + z" bit
// pattern before repeating it to fill 64-bits, but, as it's a repeating
// pattern, rotating afterwards is equivalent.)
// This encoding is not allowed to represent all-zero or all-one values.
if (val == 0 || ~val == 0)
return ENCODE_FAILED;
// To detect an immediate that may be encoded in this scheme, we first
// remove the right-rotate, by rotating such that the least significant
// bit is a one and the most significant bit is a zero.
//
// We do this by clearing any trailing one bits, then counting the
// trailing zeroes. This finds an "edge", where zero goes to one.
// We then rotate the original value right by that amount, moving
// the first one to the least significant bit.
int rotation = countTrailingZeros64(clearTrailingOnes64(val));
uint64_t normalized = rotateRight64(val, rotation & 63);
// Now we have normalized the value, and determined the rotation, we can
// determine "z" by counting the leading zeroes, and "o" by counting the
// trailing ones. (These will both be positive, as we already rejected 0
// and ~0, and rotated the value to start with a zero and end with a one.)
int zeroes = nonzeroCountLeadingZeros64(normalized);
int ones = nonzeroCountTrailingZeros64(~normalized);
int size = zeroes + ones;
// Detect the repeating pattern (by comparing every repetition to the
// one next to it, using rotate).
if (rotateRight64(val, size & 63) != val)
return ENCODE_FAILED;
// We do not need to further validate size to ensure it is a power of two
// between 2 and 64. The only "minimal" patterns that can repeat to fill a
// 64-bit value must have a length that is a factor of 64 (i.e. it is a
// power of two in the range [1,64]). And our pattern cannot be of length
// one (as we already rejected 0 and ~0).
//
// By "minimal" patterns I refer to patterns which do not themselves
// contain repetitions. For example, '010101' is a non-minimal pattern of
// a non-power-of-two length that can pass the above rotational test. It
// consists of the minimal pattern '01'. All our patterns are minimal, as
// they contain only one contiguous run of ones separated by at least one
// zero.
// Finally, we encode the values. "rotation" is the amount we rotated
// right by to "undo" the right-rotate encoded in immr, so must be
// negated.
// size 2: N=0 immr=00000r imms=11110s
// size 4: N=0 immr=0000rr imms=1110ss
// size 8: N=0 immr=000rrr imms=110sss
// size 16: N=0 immr=00rrrr imms=10ssss
// size 32: N=0 immr=0rrrrr imms=0sssss
// size 64: N=1 immr=rrrrrr imms=ssssss
int immr = -rotation & (size - 1);
int imms = -(size << 1) | (ones - 1);
int N = (size >> 6);
return (N << 12) | (immr << 6) | (imms & 0x3f);
}
int encodeLogicalImmediate32(uint32_t val) {
return encodeLogicalImmediate64(((uint64_t)val << 32) | val);
}
// Decoding!
bool isValidLogicalImmediate64(unsigned val) {
unsigned N = (val >> 12) & 1;
unsigned imms = val & 0x3f;
unsigned pattern = (N << 6) | (~imms & 0x3f);
return (pattern & (pattern - 1)) != 0;
}
bool isValidLogicalImmediate32(unsigned val) {
unsigned N = (val >> 12) & 1;
return N == 0 && isValidLogicalImmediate64(val);
}
#define DECODE_FAILED 0
// returns DECODE_FAILED (zero) if the encoding is invalid
uint64_t decodeLogicalImmediate64(unsigned val) {
// Fun way to generate the immediates with mask ^ (mask << S)
static const uint64_t mask_lookup[] = {
0xffffffffffffffff, // size = 64
0x00000000ffffffff, // size = 32
0x0000ffff0000ffff, // size = 16
0x00ff00ff00ff00ff, // size = 8
0x0f0f0f0f0f0f0f0f, // size = 4
0x3333333333333333, // size = 2
};
unsigned N = (val >> 12) & 1;
int immr = (val >> 6) & 0x3f;
unsigned imms = val & 0x3f;
unsigned pattern = (N << 6) | (~imms & 0x3f);
if (!(pattern & (pattern - 1))) return DECODE_FAILED;
int leading_zeroes = nonzeroCountLeadingZeros32(pattern);
unsigned imms_mask = 0x7fffffff >> leading_zeroes;
uint64_t mask = mask_lookup[leading_zeroes - 25];
unsigned S = (imms + 1) & imms_mask;
return rotateRight64(mask ^ (mask << S), immr);
}
uint32_t decodeLogicalImmediate32(unsigned val) {
unsigned N = (val >> 12) & 1;
if (N) return DECODE_FAILED;
return (uint32_t)decodeLogicalImmediate64(val);
}
@pcpa
Copy link

pcpa commented Feb 11, 2023

Just used it in the aarch64 GNU Lightning backend, see
https://git.savannah.gnu.org/cgit/lightning.git/commit/?id=c3dfbe299b3e7404c4aa520c6b3a1c9af2ead801
Original code was from 2013, with just a few common pattern, built by just looking at an early binutils implementation and never revisited that code...

@dougallj
Copy link
Author

Nice! Thanks for letting me know :)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment