Last active
July 22, 2019 16:49
-
-
Save nmoinvaz/a4c5104c0185f35b622b410e76d659c2 to your computer and use it in GitHub Desktop.
CRC-32C with Intel SSE 4.2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* crc32c_sse.c -- compute the CRC-32 of a data stream | |
* Copyright (C) 1995-2006, 2010, 2011, 2012 Mark Adler | |
* Copyright (C) 2016 Yang Zhang | |
* Copyright (C) 2019 Nathan Moinvaziri | |
* For conditions of distribution and use, see copyright notice in zlib.h | |
* | |
*/ | |
#ifdef X86_SSE4_2_CRC_HASH | |
#include <stdint.h> | |
#include <immintrin.h> | |
# ifdef ZLIB_COMPAT | |
# include <zconf.h> | |
# else | |
# include <zconf-ng.h> | |
# endif | |
#ifdef _MSC_VER | |
# define crc32c_sse_u8 _mm_crc32_u8 | |
# define crc32c_sse_u16 _mm_crc32_u16 | |
# define crc32c_sse_u32 _mm_crc32_u32 | |
# define crc32c_sse_u64 _mm_crc32_u64 | |
#elif defined(X86_SSE4_2_CRC_INTRIN) | |
# define crc32c_sse_u8 __builtin_ia32_crc32qi | |
# define crc32c_sse_u16 __builtin_ia32_crc32hi | |
# define crc32c_sse_u32 __builtin_ia32_crc32si | |
# define crc32c_sse_u64 __builtin_ia32_crc32di | |
#endif | |
uint32_t crc32c_sse(uint32_t crc, const unsigned char *buf, uint64_t len) { | |
register uint32_t c; | |
register const uint16_t *buf2; | |
register const uint32_t *buf4; | |
c = ~crc; | |
if (len && ((ptrdiff_t)buf & 1)) { | |
c = crc32c_sse_u8(c, *buf++); | |
len--; | |
} | |
if ((len > sizeof(uint16_t)) && ((ptrdiff_t)buf & sizeof(uint16_t))) { | |
buf2 = (const uint16_t *) buf; | |
c = crc32c_sse_u16(c, *buf2++); | |
len -= sizeof(uint16_t); | |
buf4 = (const uint32_t *) buf2; | |
} else { | |
buf4 = (const uint32_t *) buf; | |
} | |
# if defined(__x86_64__) | |
if ((len > sizeof(uint32_t)) && ((ptrdiff_t)buf & sizeof(uint32_t))) { | |
c = crc32c_sse_u32(c, *buf4++); | |
len -= sizeof(uint32_t); | |
} | |
const uint64_t *buf8 = (const uint64_t *) buf4; | |
# ifdef UNROLL_MORE | |
while (len >= 4 * sizeof(uint64_t)) { | |
c = crc32c_sse_u64(c, *buf8++); | |
c = crc32c_sse_u64(c, *buf8++); | |
c = crc32c_sse_u64(c, *buf8++); | |
c = crc32c_sse_u64(c, *buf8++); | |
len -= 4 * sizeof(uint64_t); | |
} | |
# endif | |
while (len >= sizeof(uint64_t)) { | |
c = crc32c_sse_u64(c, *buf8++); | |
len -= sizeof(uint64_t); | |
} | |
if (len >= sizeof(uint32_t)) { | |
buf4 = (const uint32_t *) buf8; | |
c = crc32c_sse_u32(c, *buf4++); | |
len -= sizeof(uint32_t); | |
buf2 = (const uint16_t *) buf4; | |
} else { | |
buf2 = (const uint16_t *) buf8; | |
} | |
if (len >= sizeof(uint16_t)) { | |
c = crc32c_sse_u8(c, *buf2++); | |
len -= sizeof(uint16_t); | |
} | |
buf = (const unsigned char *) buf2; | |
# else /* __x86_64__ */ | |
# ifdef UNROLL_MORE | |
while (len >= 8 * sizeof(uint32_t)) { | |
c = crc32c_sse_u32(c, *buf4++); | |
c = crc32c_sse_u32(c, *buf4++); | |
c = crc32c_sse_u32(c, *buf4++); | |
c = crc32c_sse_u32(c, *buf4++); | |
c = crc32c_sse_u32(c, *buf4++); | |
c = crc32c_sse_u32(c, *buf4++); | |
c = crc32c_sse_u32(c, *buf4++); | |
c = crc32c_sse_u32(c, *buf4++); | |
len -= 8 * sizeof(uint32_t); | |
} | |
# endif | |
while (len >= sizeof(uint32_t)) { | |
c = crc32c_sse_u32(c, *buf4++); | |
len -= sizeof(uint32_t); | |
} | |
if (len >= sizeof(uint16_t)) { | |
buf2 = (const uint16_t *) buf4; | |
c = crc32c_sse_u16(c, *buf2++); | |
len -= sizeof(uint16_t); | |
buf = (const unsigned char *) buf2; | |
} else { | |
buf = (const unsigned char *) buf4; | |
} | |
# endif /* __x86_64__ */ | |
if (len) { | |
c = crc32c_sse_u8(c, *buf); | |
} | |
c = ~c; | |
return c; | |
} | |
#endif /* X86_SSE4_2_CRC_HASH */ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment