Skip to content

Instantly share code, notes, and snippets.

@amonakov
Last active September 11, 2022 18:07
Show Gist options
  • Save amonakov/66ecb476991804291de0da6747d2db23 to your computer and use it in GitHub Desktop.
Save amonakov/66ecb476991804291de0da6747d2db23 to your computer and use it in GitHub Desktop.
#include <stdio.h>
#include <stdint.h>
struct xy {
uint16_t x, y;
};
uint32_t xy2z(struct xy *xy)
{
uint64_t r = xy->x | ((uint64_t)xy->y << 32);
r |= r << 8;
r &= -1/0x101ull;
r |= r << 4;
r &= -1/0x11ull;
r |= r << 2;
r &= -1/0x5ull;
r |= r << 1;
r &= -1/0x3ull;
return r | (r >> 31);
}
struct xy z2xy(uint32_t z)
{
uint64_t r = z | ((uint64_t)z << 33);
r &= ~(-1/3ull);
r += r << 1;
r &= ~(-1/5ull);
r += r << 2;
r &= ~(-1/0x11ull);
r += r << 4;
r &= ~(-1/0x101ull);
r += r << 8;
return (struct xy){ r >> 48, r >> 16 };
}
#ifdef __SSE4_2__
#include <immintrin.h>
#include <string.h>
/* It would be more efficient to encode 4 structs at a time with an additional
* _mm_clmulepi64_si128(r, r, 0x11). */
uint32_t xy2z_sse(struct xy *xy)
{
__m128i r = { 0 };
memcpy(&r, xy, sizeof *xy);
r = _mm_clmulepi64_si128(r, r, 0);
__v4su u = (__v4su)r;
return u[0] + (u[1] << 1);
}
/* As above, this should be used for decoding 4 structs at a time instead. */
struct xy z2xy_sse(uint32_t z)
{
union {
__m128i m;
__v16qu qu;
__v8hu hu;
__v4su su;
} lo, hi;
__v16qu acbd = {
0, 1, 4, 5,
2, 3, 6, 7,
8, 9, 12, 13,
10, 11, 14, 15
};
__v16qu pack_lo = {
0, 2, -1, -1,
4, 6, -1, -1,
8, 10, -1, -1,
12, 14, -1, -1
};
__v16qu pack_hi = {
-1, -1, 0, 2,
-1, -1, 4, 6,
-1, -1, 8, 10,
-1, -1, 12, 14
};
lo.su = (__v4su){ z, 0, 0, 0 };
hi.su = lo.su >> 1;
// ..DdCcBbAa -> ..0d0c0b0a
lo.su &= -1/3u;
// ..0d0c0b0a -> ..?d?cdbca
lo.su |= lo.su >> 3;
// -> ..0000dbca
lo.su &= 0x0f0f0f0f;
// -> ..0000dcba
lo.m = _mm_shuffle_epi8((__m128i)acbd, lo.m);
// -> ..0000hgfehgfedcba
lo.hu |= lo.hu >> 4;
lo.m = _mm_shuffle_epi8(lo.m, (__m128i)pack_lo);
hi.su &= -1/3u;
hi.su |= hi.su >> 3;
hi.su &= 0x0f0f0f0f;
hi.m = _mm_shuffle_epi8((__m128i)acbd, hi.m);
hi.hu |= hi.hu >> 4;
hi.m = _mm_shuffle_epi8(hi.m, (__m128i)pack_hi);
lo.hu |= hi.hu;
struct xy r;
memcpy(&r, &lo, sizeof r);
return r;
}
#endif
int main(void)
{
struct xy xy;
uint32_t z;
while (scanf("%hx%hx", &xy.x, &xy.y) == 2) {
z = xy2z(&xy);
printf("%x\n", z);
xy = z2xy(z);
printf("%x %x\n", xy.x, xy.y);
#ifdef __SSE4_2__
z = xy2z_sse(&xy);
printf("%x\n", z);
xy = z2xy_sse(z);
printf("%x %x\n", xy.x, xy.y);
#endif
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment