Skip to content

Instantly share code, notes, and snippets.

@jart
Created Apr 15, 2021
Embed
What would you like to do?
/**
* Example of how AVX512's FPGA-like VPTERNLOG instruction could
* be used to implement the other instructions in the x86 isa.
* Here we implement MMX PADDB. Due to the way we need to reshape
* the data, VPTERNLOG is probably most useful for huge datasets.
*/
uint64_t vpternlog(uint64_t a, uint64_t b, uint64_t c, uint64_t t) {
int i;
uint64_t r;
for (r = i = 0; i < 64; ++i) {
r |= ((t >> ((a & 1) << 2 | (b & 1) << 1 | (c & 1))) & 1) << i;
a >>= 1;
b >>= 1;
c >>= 1;
}
return r;
}
void paddb64(uint64_t A[64], const uint64_t B[64]) {
int i, j;
uint64_t a, c;
for (i = 0; i < 8; ++i) {
for (c = j = 0; j < 8; ++j) {
a = vpternlog(A[i * 8 + j], B[i * 8 + j], c, 0b10010110);
c = vpternlog(A[i * 8 + j], B[i * 8 + j], c, 0b11101000);
A[i * 8 + j] = a;
}
}
}
uint64_t paddb(uint64_t a, uint64_t b) {
int i;
uint64_t A[64], B[64];
for (i = 0; i < 64; ++i) {
A[i] = (a >> i) & 1;
B[i] = (b >> i) & 1;
}
paddb64(A, B);
for (a = i = 0; i < 64; ++i) {
a |= (A[i] & 1) << i;
}
return a;
}
#define A 0b11110000
#define B 0b11001100
#define C 0b10101010
TEST(vpternlog, test) {
int a, b, c;
a = rand(), b = rand(), c = rand();
EXPECT_EQ((~a & b) | c, vpternlog(a, b, c, (~A & B) | C));
}
TEST(paddb, test) {
ASSERT_EQ(4, paddb(2, 2));
ASSERT_EQ(0x0101010101010101, paddb(0xFFFFFFFFFFFFFFFF, 0x0202020202020202));
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment