Created
May 20, 2020 11:33
-
-
Save aguinet/34be8b8378f7a259b43d033180010539 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Extracted from https://github.com/pyjamask-cipher/pyjamask-reference-implementation/blob/master/pyjamask.c | |
#include <cstdint> | |
#define right_rotate(row) \ | |
row = (row >> 1) | (row << 31); | |
#define COL_M0 0xa3861085 | |
#define COL_M1 0x63417021 | |
#define COL_M2 0x692cf280 | |
#define COL_M3 0x48a54813 | |
static uint32_t mat_mult(uint32_t mat_col, uint32_t vec) | |
{ | |
int i; | |
uint32_t mask, res=0; | |
#pragma unroll | |
for (i = 31; i>=0; i--) | |
{ | |
mask = -((vec >> i) & 1); | |
res ^= mask & mat_col; | |
right_rotate(mat_col); | |
} | |
return res; | |
} | |
extern "C" uint32_t mat_mult_m0(uint32_t vec) | |
{ | |
return mat_mult(COL_M0, vec); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.text | |
.intel_syntax noprefix | |
.file "pyjamask.cpp" | |
.section .rodata,"a",@progbits | |
.p2align 6 # -- Begin function mat_mult_m0 | |
.LCPI0_0: | |
.byte 208 # 0xd0 | |
.byte 104 # 0x68 | |
.byte 180 # 0xb4 | |
.byte 90 # 0x5a | |
.byte 45 # 0x2d | |
.byte 22 # 0x16 | |
.byte 139 # 0x8b | |
.byte 197 # 0xc5 | |
.byte 132 # 0x84 | |
.byte 66 # 0x42 | |
.byte 33 # 0x21 | |
.byte 16 # 0x10 | |
.byte 8 # 0x8 | |
.byte 132 # 0x84 | |
.byte 66 # 0x42 | |
.byte 161 # 0xa1 | |
.byte 48 # 0x30 | |
.byte 24 # 0x18 | |
.byte 12 # 0xc | |
.byte 134 # 0x86 | |
.byte 67 # 0x43 | |
.byte 33 # 0x21 | |
.byte 16 # 0x10 | |
.byte 8 # 0x8 | |
.byte 226 # 0xe2 | |
.byte 113 # 0x71 | |
.byte 56 # 0x38 | |
.byte 28 # 0x1c | |
.byte 14 # 0xe | |
.byte 135 # 0x87 | |
.byte 195 # 0xc3 | |
.byte 97 # 0x61 | |
.byte 226 # 0xe2 | |
.byte 113 # 0x71 | |
.byte 56 # 0x38 | |
.byte 28 # 0x1c | |
.byte 14 # 0xe | |
.byte 135 # 0x87 | |
.byte 195 # 0xc3 | |
.byte 97 # 0x61 | |
.byte 208 # 0xd0 | |
.byte 104 # 0x68 | |
.byte 180 # 0xb4 | |
.byte 90 # 0x5a | |
.byte 45 # 0x2d | |
.byte 22 # 0x16 | |
.byte 139 # 0x8b | |
.byte 197 # 0xc5 | |
.byte 132 # 0x84 | |
.byte 66 # 0x42 | |
.byte 33 # 0x21 | |
.byte 16 # 0x10 | |
.byte 8 # 0x8 | |
.byte 132 # 0x84 | |
.byte 66 # 0x42 | |
.byte 161 # 0xa1 | |
.byte 48 # 0x30 | |
.byte 24 # 0x18 | |
.byte 12 # 0xc | |
.byte 134 # 0x86 | |
.byte 67 # 0x43 | |
.byte 33 # 0x21 | |
.byte 16 # 0x10 | |
.byte 8 # 0x8 | |
.LCPI0_1: | |
.byte 48 # 0x30 | |
.byte 24 # 0x18 | |
.byte 12 # 0xc | |
.byte 134 # 0x86 | |
.byte 67 # 0x43 | |
.byte 33 # 0x21 | |
.byte 16 # 0x10 | |
.byte 8 # 0x8 | |
.byte 226 # 0xe2 | |
.byte 113 # 0x71 | |
.byte 56 # 0x38 | |
.byte 28 # 0x1c | |
.byte 14 # 0xe | |
.byte 135 # 0x87 | |
.byte 195 # 0xc3 | |
.byte 97 # 0x61 | |
.byte 208 # 0xd0 | |
.byte 104 # 0x68 | |
.byte 180 # 0xb4 | |
.byte 90 # 0x5a | |
.byte 45 # 0x2d | |
.byte 22 # 0x16 | |
.byte 139 # 0x8b | |
.byte 197 # 0xc5 | |
.byte 132 # 0x84 | |
.byte 66 # 0x42 | |
.byte 33 # 0x21 | |
.byte 16 # 0x10 | |
.byte 8 # 0x8 | |
.byte 132 # 0x84 | |
.byte 66 # 0x42 | |
.byte 161 # 0xa1 | |
.byte 132 # 0x84 | |
.byte 66 # 0x42 | |
.byte 33 # 0x21 | |
.byte 16 # 0x10 | |
.byte 8 # 0x8 | |
.byte 132 # 0x84 | |
.byte 66 # 0x42 | |
.byte 161 # 0xa1 | |
.byte 48 # 0x30 | |
.byte 24 # 0x18 | |
.byte 12 # 0xc | |
.byte 134 # 0x86 | |
.byte 67 # 0x43 | |
.byte 33 # 0x21 | |
.byte 16 # 0x10 | |
.byte 8 # 0x8 | |
.byte 226 # 0xe2 | |
.byte 113 # 0x71 | |
.byte 56 # 0x38 | |
.byte 28 # 0x1c | |
.byte 14 # 0xe | |
.byte 135 # 0x87 | |
.byte 195 # 0xc3 | |
.byte 97 # 0x61 | |
.byte 208 # 0xd0 | |
.byte 104 # 0x68 | |
.byte 180 # 0xb4 | |
.byte 90 # 0x5a | |
.byte 45 # 0x2d | |
.byte 22 # 0x16 | |
.byte 139 # 0x8b | |
.byte 197 # 0xc5 | |
.section .rodata.cst16,"aM",@progbits,16 | |
.p2align 4 | |
.LCPI0_2: | |
.byte 0 # 0x0 | |
.byte 8 # 0x8 | |
.zero 1 | |
.zero 1 | |
.zero 1 | |
.zero 1 | |
.zero 1 | |
.zero 1 | |
.zero 1 | |
.zero 1 | |
.zero 1 | |
.zero 1 | |
.zero 1 | |
.zero 1 | |
.zero 1 | |
.zero 1 | |
.text | |
.globl mat_mult_m0 | |
.p2align 4, 0x90 | |
.type mat_mult_m0,@function | |
mat_mult_m0: # @mat_mult_m0 | |
.Lmat_mult_m0$local: | |
.cfi_startproc | |
# %bb.0: | |
vmovd xmm0, edi | |
vpbroadcastb ymm0, xmm0 | |
mov eax, edi | |
shr eax, 8 | |
vmovd xmm1, eax | |
vpbroadcastb ymm1, xmm1 | |
vinserti64x4 zmm0, zmm0, ymm1, 1 | |
mov eax, edi | |
shr eax, 16 | |
vmovd xmm1, eax | |
vpbroadcastb ymm1, xmm1 | |
shr edi, 24 | |
vmovd xmm2, edi | |
vpbroadcastb ymm2, xmm2 | |
vgf2p8affineqb zmm0, zmm0, zmmword ptr [rip + .LCPI0_0], 0 | |
vinserti64x4 zmm1, zmm1, ymm2, 1 | |
vgf2p8affineqb zmm1, zmm1, zmmword ptr [rip + .LCPI0_1], 0 | |
vpxorq zmm0, zmm0, zmm1 | |
vextracti64x4 ymm1, zmm0, 1 | |
vpxor ymm0, ymm0, ymm1 | |
vextracti128 xmm1, ymm0, 1 | |
vmovdqa xmm2, xmmword ptr [rip + .LCPI0_2] # xmm2 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u> | |
vpshufb xmm1, xmm1, xmm2 | |
vpshufb xmm0, xmm0, xmm2 | |
vpunpcklwd xmm0, xmm0, xmm1 # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] | |
vmovd eax, xmm0 | |
vzeroupper | |
ret | |
.Lfunc_end0: | |
.size mat_mult_m0, .Lfunc_end0-mat_mult_m0 | |
.cfi_endproc | |
# -- End function | |
.ident "clang version 10.0.0-4 " | |
.section ".note.GNU-stack","",@progbits |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment