Created
May 20, 2020 11:06
-
-
Save aguinet/757d2b199de5d506beab9cc0e1430a3f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdint.h> | |
#include <stdlib.h> | |
uint32_t crc32b(uint8_t* data, size_t len) { | |
uint32_t crc = 0xFFFFFFFF; | |
for (size_t i = 0; i < len; ++i) { | |
const uint8_t v = data[i]; | |
crc ^= v; | |
for (size_t j = 0; j < 8; ++j) { | |
const uint32_t bit = crc & 1; | |
const uint32_t mask = -bit; | |
crc >>= 1; | |
crc ^= (0xEDB88320 & mask); | |
} | |
} | |
return ~crc; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.text | |
.intel_syntax noprefix | |
.file "crc32.c" | |
.section .rodata,"a",@progbits | |
.p2align 6 # -- Begin function crc32b | |
.LCPI0_0: | |
.byte 97 # 0x61 | |
.byte 48 # 0x30 | |
.byte 154 # 0x9a | |
.byte 77 # 0x4d | |
.byte 38 # 0x26 | |
.byte 19 # 0x13 | |
.byte 9 # 0x9 | |
.byte 4 # 0x4 | |
.byte 140 # 0x8c | |
.byte 70 # 0x46 | |
.byte 35 # 0x23 | |
.byte 17 # 0x11 | |
.byte 8 # 0x8 | |
.byte 4 # 0x4 | |
.byte 128 # 0x80 | |
.byte 194 # 0xc2 | |
.byte 216 # 0xd8 | |
.byte 108 # 0x6c | |
.byte 180 # 0xb4 | |
.byte 216 # 0xd8 | |
.byte 238 # 0xee | |
.byte 119 # 0x77 | |
.byte 59 # 0x3b | |
.byte 29 # 0x1d | |
.byte 130 # 0x82 | |
.byte 195 # 0xc3 | |
.byte 227 # 0xe3 | |
.byte 113 # 0x71 | |
.byte 186 # 0xba | |
.byte 223 # 0xdf | |
.byte 111 # 0x6f | |
.byte 181 # 0xb5 | |
.byte 128 # 0x80 | |
.byte 64 # 0x40 | |
.byte 32 # 0x20 | |
.byte 16 # 0x10 | |
.byte 8 # 0x8 | |
.byte 4 # 0x4 | |
.byte 2 # 0x2 | |
.byte 1 # 0x1 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.LCPI0_1: | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 128 # 0x80 | |
.byte 64 # 0x40 | |
.byte 32 # 0x20 | |
.byte 16 # 0x10 | |
.byte 8 # 0x8 | |
.byte 4 # 0x4 | |
.byte 2 # 0x2 | |
.byte 1 # 0x1 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 128 # 0x80 | |
.byte 64 # 0x40 | |
.byte 32 # 0x20 | |
.byte 16 # 0x10 | |
.byte 8 # 0x8 | |
.byte 4 # 0x4 | |
.byte 2 # 0x2 | |
.byte 1 # 0x1 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.byte 0 # 0x0 | |
.section .rodata.cst16,"aM",@progbits,16 | |
.p2align 4 | |
.LCPI0_2: | |
.byte 0 # 0x0 | |
.byte 8 # 0x8 | |
.zero 1 | |
.zero 1 | |
.zero 1 | |
.zero 1 | |
.zero 1 | |
.zero 1 | |
.zero 1 | |
.zero 1 | |
.zero 1 | |
.zero 1 | |
.zero 1 | |
.zero 1 | |
.zero 1 | |
.zero 1 | |
.text | |
.globl crc32b | |
.p2align 4, 0x90 | |
.type crc32b,@function | |
crc32b: # @crc32b | |
.Lcrc32b$local: | |
.cfi_startproc | |
# %bb.0: # %entry | |
test rsi, rsi | |
je .LBB0_1 | |
# %bb.4: # %for.body.preheader | |
mov eax, -1 | |
xor ecx, ecx | |
vmovdqa64 zmm0, zmmword ptr [rip + .LCPI0_0] # zmm0 = [97,48,154,77,38,19,9,4,140,70,35,17,8,4,128,194,216,108,180,216,238,119,59,29,130,195,227,113,186,223,111,181,128,64,32,16,8,4,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] | |
vmovdqa64 zmm1, zmmword ptr [rip + .LCPI0_1] # zmm1 = [0,0,0,0,0,0,0,0,128,64,32,16,8,4,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,64,32,16,8,4,2,1,0,0,0,0,0,0,0,0] | |
vmovdqa xmm2, xmmword ptr [rip + .LCPI0_2] # xmm2 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u> | |
.p2align 4, 0x90 | |
.LBB0_5: # %for.body | |
# =>This Inner Loop Header: Depth=1 | |
movzx edx, byte ptr [rdi + rcx] | |
xor edx, eax | |
vmovd xmm3, edx | |
vpbroadcastb ymm3, xmm3 | |
mov edx, eax | |
shr edx, 8 | |
vmovd xmm4, edx | |
vpbroadcastb ymm4, xmm4 | |
vinserti64x4 zmm3, zmm3, ymm4, 1 | |
vgf2p8affineqb zmm3, zmm3, zmm0, 0 | |
mov edx, eax | |
shr edx, 16 | |
vmovd xmm4, edx | |
vpbroadcastb ymm4, xmm4 | |
shr eax, 24 | |
vmovd xmm5, eax | |
vpbroadcastb ymm5, xmm5 | |
vinserti64x4 zmm4, zmm4, ymm5, 1 | |
vgf2p8affineqb zmm4, zmm4, zmm1, 0 | |
vpxorq zmm3, zmm3, zmm4 | |
vextracti64x4 ymm4, zmm3, 1 | |
vpxor ymm3, ymm3, ymm4 | |
vextracti128 xmm4, ymm3, 1 | |
vpshufb xmm4, xmm4, xmm2 | |
vpshufb xmm3, xmm3, xmm2 | |
vpunpcklwd xmm3, xmm3, xmm4 # xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3] | |
vmovd eax, xmm3 | |
add rcx, 1 | |
cmp rsi, rcx | |
jne .LBB0_5 | |
# %bb.2: # %for.cond.cleanup.loopexit | |
not eax | |
vzeroupper | |
ret | |
.LBB0_1: | |
xor eax, eax | |
ret | |
.Lfunc_end0: | |
.size crc32b, .Lfunc_end0-crc32b | |
.cfi_endproc | |
# -- End function | |
.ident "clang version 11.0.0 (https://github.com/llvm/llvm-project e24ed205634c4e567c62b8113c32a5ef33d8dc03)" | |
.section ".note.GNU-stack","",@progbits |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment