Skip to content

Instantly share code, notes, and snippets.

@ericlagergren
Created January 31, 2022 23:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ericlagergren/7a0af12f0d6f5e31ffffbe3e634c3944 to your computer and use it in GitHub Desktop.
Save ericlagergren/7a0af12f0d6f5e31ffffbe3e634c3944 to your computer and use it in GitHub Desktop.
POLYVAL ARMv8
#include <arm_neon.h>
#include <stdint.h>
typedef uint8x16_t u128;
#define pmullq_u8(a, b) ({ \
u128 r; \
__asm__ __volatile__("pmull %0.1q, %1.1d, %2.1d \n\t" \
: "=w"(r) \
: "w"((a)), "w"((b))); \
r; })
#define pmull2q_u8(a, b) ({ \
u128 r; \
__asm__ __volatile__("pmull2 %0.1q, %1.2d, %2.2d \n\t" \
: "=w"(r) \
: "w"((a)), "w"((b))); \
r; })
void polymul(uint8_t acc[16], const uint8_t key[16], const uint8_t input[16]) {
u128 x = vld1q_u8(acc);
u128 y = vld1q_u8(key);
u128 v = vld1q_u8(input);
x = veorq_u8(x, v);
u128 t0 = vextq_u8(x, y, 8);
t0 = veorq_u8(t0, x);
u128 t1 = vextq_u8(y, y, 8);
t1 = veorq_u8(t1, y);
u128 M = pmullq_u8(t0, t1);
u128 H = pmull2q_u8(x, y);
u128 L = pmullq_u8(x, y);
u128 t2 = vextq_u8(L, H, 8);
M = veorq_u8(M, t2);
t2 = veorq_u8(H, L);
t2 = veorq_u8(M, t2);
H = vextq_u8(H, H, 8);
L = vextq_u8(L, L, 8);
u128 x23 = vextq_u8(t2, H, 8);
u128 x01 = vextq_u8(L, t2, 8);
u128 poly = vreinterpretq_u8_u64(vcombine_u64(
vdup_n_u64(0xc200000000000000), vdup_n_u64(0xc200000000000000)));
u128 A = pmullq_u8(poly, x01);
u128 B = vextq_u8(A, A, 8);
B = veorq_u8(B, x01);
u128 C = pmull2q_u8(poly, B);
u128 D = veorq_u8(B, C);
D = veorq_u8(D, x23);
vst1q_u8(acc, D);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment