Skip to content

Instantly share code, notes, and snippets.

@ericlagergren
Created February 5, 2022 19:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ericlagergren/28f9178bff76fcc2a0c043f16656548d to your computer and use it in GitHub Desktop.
Save ericlagergren/28f9178bff76fcc2a0c043f16656548d to your computer and use it in GitHub Desktop.
POLYVAL x86_64
#include <immintrin.h>
#include <inttypes.h>
#include <stdint.h>
#include <stdio.h>
// hi lo
// Vd, Vn, Vm, #imm
#define vext8(x, y) ({ \
uint64_t xhi = _mm_extract_epi64((x), 1); \
uint64_t ylo = _mm_extract_epi64((y), 0); \
_mm_set_epi64((__m64)ylo, (__m64)xhi); \
})
__m128i polymul(uint8_t acc[16], uint8_t key[16]) {
__m128i x = _mm_loadu_si128((const __m128i*)acc);
__m128i y = _mm_loadu_si128((const __m128i*)key);
// Karatsuba 1
__m128i tmp0 = vext8(x, y);
tmp0 = _mm_xor_si128(tmp0, x);
__m128i tmp1 = vext8(y, y);
tmp1 = _mm_xor_si128(tmp1, y);
__m128i m = _mm_clmulepi64_si128(tmp0, tmp1, 0x00);
__m128i h = _mm_clmulepi64_si128(x, y, 0x11);
__m128i l = _mm_clmulepi64_si128(x, y, 0x00);
// Karatsuba 2
__m128i tmp2 = vext8(l, h);
m = _mm_xor_si128(m, tmp2);
tmp2 = _mm_xor_si128(h, l);
tmp2 = _mm_xor_si128(m, tmp2);
h = vext8(h, h);
l = vext8(l, l);
__m128i x23 = vext8(tmp2, h);
__m128i x01 = vext8(l, tmp2);
// Reduce
__m128i poly = _mm_set_epi64((__m64)0xc200000000000000ULL,
(__m64)0xc200000000000000ULL);
__m128i a = _mm_clmulepi64_si128(poly, x01, 0x00);
__m128i b = vext8(a, a);
b = _mm_xor_si128(b, x01);
__m128i c = _mm_clmulepi64_si128(poly, b, 0x11);
__m128i d = _mm_xor_si128(c, b);
d = _mm_xor_si128(d, x23);
return d;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment