| static const uint64_t zalign(16) poly[] = { 0x01db710641, 0x01f7011641 }; | |
| // ^^^^^^^^^^^^ | |
| // No use of poly | |
| // ... | |
| // x0, x1, x2, x3 are 128 bit registers | |
| x3 = _mm_setr_epi32(~0, 0, ~0, 0); // Set reverse, i.e. bits from 0 to 31 are 1s, from 32 to 63 are zeros, etc | |
| // No reassignment of x3 | |
| // ... | |
| /* | |
| * Barret reduce to 32-bits. | |
| */ | |
| x0 = _mm_load_si128((__m128i*)poly); // Load 16 bytes | |
| x2 = _mm_and_si128(x1, x3); // Do logical AND with x3 | |
| x2 = _mm_clmulepi64_si128(x2, x0, 0x10); // Do CLMUL with the high 8 bytes of poly | |
| x2 = _mm_and_si128(x2, x3); // Do logical AND with x3 | |
| x2 = _mm_clmulepi64_si128(x2, x0, 0x00); // Do CLMUL with the low 8 bytes of poly | |
| x1 = _mm_xor_si128(x1, x2); // XOR x1 and x2 | |
| /* | |
| * Return the crc32. | |
| */ | |
| return _mm_extract_epi32(x1, 1); // Return bytes from 4 to 8 of x1, i.e. extract the second 32 bit integer |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment