static const uint64_t zalign(16) poly[] = { 0x01db710641, 0x01f7011641 }; | |
// ^^^^^^^^^^^^ | |
// No use of poly | |
// ... | |
// x0, x1, x2, x3 are 128 bit registers | |
x3 = _mm_setr_epi32(~0, 0, ~0, 0); // Set reverse, i.e. bits from 0 to 31 are 1s, from 32 to 63 are zeros, etc | |
// No reassignment of x3 | |
// ... | |
/* | |
* Barret reduce to 32-bits. | |
*/ | |
x0 = _mm_load_si128((__m128i*)poly); // Load 16 bytes | |
x2 = _mm_and_si128(x1, x3); // Do logical AND with x3 | |
x2 = _mm_clmulepi64_si128(x2, x0, 0x10); // Do CLMUL with the high 8 bytes of poly | |
x2 = _mm_and_si128(x2, x3); // Do logical AND with x3 | |
x2 = _mm_clmulepi64_si128(x2, x0, 0x00); // Do CLMUL with the low 8 bytes of poly | |
x1 = _mm_xor_si128(x1, x2); // XOR x1 and x2 | |
/* | |
* Return the crc32. | |
*/ | |
return _mm_extract_epi32(x1, 1); // Return bytes from 4 to 8 of x1, i.e. extract the second 32 bit integer |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment