Skip to content

Instantly share code, notes, and snippets.

@aguinet
Created May 20, 2020 11:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aguinet/757d2b199de5d506beab9cc0e1430a3f to your computer and use it in GitHub Desktop.
Save aguinet/757d2b199de5d506beab9cc0e1430a3f to your computer and use it in GitHub Desktop.
#include <stdint.h>
#include <stdlib.h>
uint32_t crc32b(uint8_t* data, size_t len) {
uint32_t crc = 0xFFFFFFFF;
for (size_t i = 0; i < len; ++i) {
const uint8_t v = data[i];
crc ^= v;
for (size_t j = 0; j < 8; ++j) {
const uint32_t bit = crc & 1;
const uint32_t mask = -bit;
crc >>= 1;
crc ^= (0xEDB88320 & mask);
}
}
return ~crc;
}
.text
.intel_syntax noprefix
.file "crc32.c"
.section .rodata,"a",@progbits
.p2align 6 # -- Begin function crc32b
.LCPI0_0:
.byte 97 # 0x61
.byte 48 # 0x30
.byte 154 # 0x9a
.byte 77 # 0x4d
.byte 38 # 0x26
.byte 19 # 0x13
.byte 9 # 0x9
.byte 4 # 0x4
.byte 140 # 0x8c
.byte 70 # 0x46
.byte 35 # 0x23
.byte 17 # 0x11
.byte 8 # 0x8
.byte 4 # 0x4
.byte 128 # 0x80
.byte 194 # 0xc2
.byte 216 # 0xd8
.byte 108 # 0x6c
.byte 180 # 0xb4
.byte 216 # 0xd8
.byte 238 # 0xee
.byte 119 # 0x77
.byte 59 # 0x3b
.byte 29 # 0x1d
.byte 130 # 0x82
.byte 195 # 0xc3
.byte 227 # 0xe3
.byte 113 # 0x71
.byte 186 # 0xba
.byte 223 # 0xdf
.byte 111 # 0x6f
.byte 181 # 0xb5
.byte 128 # 0x80
.byte 64 # 0x40
.byte 32 # 0x20
.byte 16 # 0x10
.byte 8 # 0x8
.byte 4 # 0x4
.byte 2 # 0x2
.byte 1 # 0x1
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.LCPI0_1:
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 128 # 0x80
.byte 64 # 0x40
.byte 32 # 0x20
.byte 16 # 0x10
.byte 8 # 0x8
.byte 4 # 0x4
.byte 2 # 0x2
.byte 1 # 0x1
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 128 # 0x80
.byte 64 # 0x40
.byte 32 # 0x20
.byte 16 # 0x10
.byte 8 # 0x8
.byte 4 # 0x4
.byte 2 # 0x2
.byte 1 # 0x1
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.section .rodata.cst16,"aM",@progbits,16
.p2align 4
.LCPI0_2:
.byte 0 # 0x0
.byte 8 # 0x8
.zero 1
.zero 1
.zero 1
.zero 1
.zero 1
.zero 1
.zero 1
.zero 1
.zero 1
.zero 1
.zero 1
.zero 1
.zero 1
.zero 1
.text
.globl crc32b
.p2align 4, 0x90
.type crc32b,@function
crc32b: # @crc32b
.Lcrc32b$local:
.cfi_startproc
# %bb.0: # %entry
test rsi, rsi
je .LBB0_1
# %bb.4: # %for.body.preheader
mov eax, -1
xor ecx, ecx
vmovdqa64 zmm0, zmmword ptr [rip + .LCPI0_0] # zmm0 = [97,48,154,77,38,19,9,4,140,70,35,17,8,4,128,194,216,108,180,216,238,119,59,29,130,195,227,113,186,223,111,181,128,64,32,16,8,4,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
vmovdqa64 zmm1, zmmword ptr [rip + .LCPI0_1] # zmm1 = [0,0,0,0,0,0,0,0,128,64,32,16,8,4,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,64,32,16,8,4,2,1,0,0,0,0,0,0,0,0]
vmovdqa xmm2, xmmword ptr [rip + .LCPI0_2] # xmm2 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
.p2align 4, 0x90
.LBB0_5: # %for.body
# =>This Inner Loop Header: Depth=1
movzx edx, byte ptr [rdi + rcx]
xor edx, eax
vmovd xmm3, edx
vpbroadcastb ymm3, xmm3
mov edx, eax
shr edx, 8
vmovd xmm4, edx
vpbroadcastb ymm4, xmm4
vinserti64x4 zmm3, zmm3, ymm4, 1
vgf2p8affineqb zmm3, zmm3, zmm0, 0
mov edx, eax
shr edx, 16
vmovd xmm4, edx
vpbroadcastb ymm4, xmm4
shr eax, 24
vmovd xmm5, eax
vpbroadcastb ymm5, xmm5
vinserti64x4 zmm4, zmm4, ymm5, 1
vgf2p8affineqb zmm4, zmm4, zmm1, 0
vpxorq zmm3, zmm3, zmm4
vextracti64x4 ymm4, zmm3, 1
vpxor ymm3, ymm3, ymm4
vextracti128 xmm4, ymm3, 1
vpshufb xmm4, xmm4, xmm2
vpshufb xmm3, xmm3, xmm2
vpunpcklwd xmm3, xmm3, xmm4 # xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
vmovd eax, xmm3
add rcx, 1
cmp rsi, rcx
jne .LBB0_5
# %bb.2: # %for.cond.cleanup.loopexit
not eax
vzeroupper
ret
.LBB0_1:
xor eax, eax
ret
.Lfunc_end0:
.size crc32b, .Lfunc_end0-crc32b
.cfi_endproc
# -- End function
.ident "clang version 11.0.0 (https://github.com/llvm/llvm-project e24ed205634c4e567c62b8113c32a5ef33d8dc03)"
.section ".note.GNU-stack","",@progbits
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment