-
-
Save mmozeiko/68c2d1ce466422b506b2c86e4f603f53 to your computer and use it in GitHub Desktop.
convert 32-bit or 64-bit integer to hex string
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #pragma once | |
| #include <stdint.h> | |
| static inline void hex_from_u32(void* hex, uint32_t x, int uppercase); // writes exactly 8 bytes | |
| static inline void hex_from_u64(void* hex, uint64_t x, int uppercase); // writes exactly 16 bytes | |
| // implementation | |
| #if !defined(HEX_NO_SIMD) | |
| # if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512VBMI__) | |
| # include <immintrin.h> | |
| # define HEX_AVX512 | |
| # elif defined(__AVX2__) // actually only SSSE3 required | |
| # include <tmmintrin.h> | |
| # define HEX_SSSE3 | |
| # elif defined(_M_AMD64) || defined(__x86_64__) | |
| # include <emmintrin.h> | |
| # define HEX_SSE2 | |
| # elif defined(_M_ARM64) || defined(__aarch64__) | |
| # include <arm_neon.h> | |
| # define HEX_NEON | |
| # elif defined(__wasm_simd128__) | |
| # include <wasm_simd128.h> | |
| # define HEX_WASM | |
| # elif defined(__riscv) && (__riscv_v >= 1000000) | |
| # include <riscv_vector.h> | |
| # define HEX_RVV // assumes -march=rva23u64 or -march=rv64gbv_zvbb or -march=rv64gv_zba_zbb_zbs_zvbb | |
| # endif | |
| #endif | |
| #if defined(_MSC_VER) | |
| # include <intrin.h> | |
| # define HEX_BSWAP32(x) _byteswap_ulong(x) | |
| # define HEX_BSWAP64(x) _byteswap_uint64(x) | |
| # pragma pack(push, 1) | |
| typedef struct { uint64_t value; } HexUnaligned64; | |
| # pragma pack(pop) | |
| #else | |
| # define HEX_BSWAP32(x) __builtin_bswap32(x) | |
| # define HEX_BSWAP64(x) __builtin_bswap64(x) | |
| typedef struct __attribute__((packed)) { uint64_t value; } HexUnaligned64; | |
| #endif | |
| void hex_from_u32(void* hex, uint32_t x, int uppercase) | |
| { | |
| #if defined(HEX_AVX512) | |
| __m128i bytes = _mm_cvtsi32_si128(x); | |
| const __m128i bits = _mm_setr_epi8(28,24,20,16,12,8,4,0, 0,0,0,0,0,0,0,0); | |
| __m128i nibbles = _mm_multishift_epi64_epi8(bits, bytes); | |
| const char* table = uppercase ? "0123456789ABCDEF" : "0123456789abcdef"; | |
| __m128i result = _mm_permutexvar_epi8(nibbles, _mm_loadu_si128((const void*)table)); | |
| _mm_storel_epi64(hex, result); | |
| #elif defined(HEX_SSSE3) | |
| __m128i bytes = _mm_cvtsi32_si128(HEX_BSWAP32(x)); | |
| __m128i nibbles = _mm_unpacklo_epi8(_mm_srli_epi64(bytes, 4), bytes); | |
| nibbles = _mm_and_si128(nibbles, _mm_set1_epi8(0xf)); | |
| const char* table = uppercase ? "0123456789ABCDEF" : "0123456789abcdef"; | |
| __m128i result = _mm_shuffle_epi8(_mm_loadu_si128((const void*)table), nibbles); | |
| _mm_storel_epi64(hex, result); | |
| #elif defined(HEX_SSE2) | |
| __m128i bytes = _mm_cvtsi32_si128(HEX_BSWAP32(x)); | |
| // 4-bit nibbles | |
| __m128i nibbles = _mm_unpacklo_epi8(_mm_srli_epi64(bytes, 4), bytes); | |
| nibbles = _mm_and_si128(nibbles, _mm_set1_epi8(0xf)); | |
| // if nibble > 9 | |
| __m128i mask = _mm_cmpgt_epi8(nibbles, _mm_set1_epi8(9)); | |
| // result for 0..9 | |
| __m128i result = _mm_add_epi8(nibbles, _mm_set1_epi8('0')); | |
| // update for a..f | |
| const char offset = (uppercase ? 'A' : 'a') - '0' - 10; | |
| result = _mm_add_epi8(result, _mm_and_si128(mask, _mm_set1_epi8(offset))); | |
| _mm_storel_epi64(hex, result); | |
| #elif defined(HEX_NEON) | |
| uint8x8_t bytes = vreinterpret_u8_u32(vcreate_u32(HEX_BSWAP32(x))); | |
| uint8x8_t nibbles = vzip1_u8(vshr_n_u8(bytes, 4), bytes); | |
| nibbles = vand_u8(nibbles, vdup_n_u8(0xf)); | |
| const char* table = uppercase ? "0123456789ABCDEF" : "0123456789abcdef"; | |
| uint8x8_t result = vqtbl1_u8(vld1q_u8((const uint8_t*)table), nibbles); | |
| vst1_u8(hex, result); | |
| #elif defined(HEX_WASM) | |
| v128_t bytes = wasm_u32x4_make(x, 0, 0, 0); | |
| v128_t nibbles = wasm_i8x16_shuffle(wasm_u64x2_shr(bytes, 4), bytes, | |
| 3,19, 2,18, 1,17, 0,16, 0,0,0,0,0,0,0,0 | |
| ); | |
| nibbles = wasm_v128_and(nibbles, wasm_i8x16_splat(0xf)); | |
| const char* table = uppercase ? "0123456789ABCDEF" : "0123456789abcdef"; | |
| v128_t result = wasm_i8x16_swizzle(wasm_v128_load(table), nibbles); | |
| wasm_v128_store64_lane(hex, result, 0); | |
| #elif defined(HEX_RVV) | |
| vuint32m1_t x32 = __riscv_vmv_s_x_u32m1(HEX_BSWAP32(x), 1); | |
| vuint8m1_t xlo = __riscv_vreinterpret_v_u32m1_u8m1(x32); | |
| vuint8m1_t xhi = __riscv_vsrl_vx_u8m1(xlo, 4, 8); | |
| // interleave bytes from xhi and xlo | |
| vuint16m2_t n8 = __riscv_vwmaccu_vx_u16m2(__riscv_vwaddu_vv_u16m2(xhi, xlo, 8), 0xff, xlo, 8); | |
| vuint16m1_t n16 = __riscv_vget_v_u16m2_u16m1(n8, 0); | |
| // 4-bit nibbles | |
| vuint8m1_t nibbles = __riscv_vand_vx_u8m1(__riscv_vreinterpret_v_u16m1_u8m1(n16), 0xf, 8); | |
| // if nibble > 9 | |
| vbool8_t mask = __riscv_vmsgtu_vx_u8m1_b8(nibbles, 9, 8); | |
| // result for 0..9 | |
| vuint8m1_t result = __riscv_vadd_vx_u8m1(nibbles, '0', 8); | |
| // update for a..f | |
| const char offset = (uppercase ? 'A' : 'a') - '0' - 10; | |
| result = __riscv_vadd_vx_u8m1_mu(mask, result, result, offset, 8); | |
| __riscv_vse8_v_u8m1(hex, result, 8); | |
| #else // SWAR | |
| uint64_t n = x; | |
| // get nibbles in reversed order // 0x0000'0000'8765'4321 | |
| n = ((n & 0x000000000000ffff) << 32) | ((n >> 16) & 0x000000000000ffff); // 0x0000'4321'0000'8765 | |
| n = ((n & 0x000000ff000000ff) << 16) | ((n >> 8) & 0x000000ff000000ff); // 0x0021'0043'0065'0087 | |
| n = ((n & 0x000f000f000f000f) << 8) | ((n >> 4) & 0x000f000f000f000f); // 0x0102'0304'0506'0708 | |
| const uint64_t splat = 0x0101010101010101; | |
| // if nibble >= 10 then addition will overflow in top nibble, shift it down to 0 or 1 | |
| uint64_t mask = ((n + (16 - 10) * splat) >> 4) & splat; | |
| // result for 0..9 | |
| uint64_t result = n + ('0' * splat); | |
| // update for a..f | |
| const char offset = (uppercase ? 'A' : 'a') - '0' - 10; | |
| result += offset * mask; | |
| // assumes little-endian | |
| ((HexUnaligned64*)hex)->value = result; | |
| #endif | |
| } | |
| void hex_from_u64(void* hex, uint64_t x, int uppercase) | |
| { | |
| #if defined(HEX_AVX512) | |
| __m128i bytes = _mm_set1_epi64x(x); | |
| const __m128i bits = _mm_setr_epi8(60,56,52,48,44,40,36,32, 28,24,20,16,12,8,4,0); | |
| __m128i nibbles = _mm_multishift_epi64_epi8(bits, bytes); | |
| const char* table = uppercase ? "0123456789ABCDEF" : "0123456789abcdef"; | |
| __m128i result = _mm_permutexvar_epi8(nibbles, _mm_loadu_si128((const void*)table)); | |
| _mm_storeu_si128(hex, result); | |
| #elif defined(HEX_SSSE3) | |
| __m128i bytes = _mm_cvtsi64_si128(HEX_BSWAP64(x)); | |
| __m128i nibbles = _mm_unpacklo_epi8(_mm_srli_epi64(bytes, 4), bytes); | |
| nibbles = _mm_and_si128(nibbles, _mm_set1_epi8(0xf)); | |
| const char* table = uppercase ? "0123456789ABCDEF" : "0123456789abcdef"; | |
| __m128i result = _mm_shuffle_epi8(_mm_loadu_si128((const void*)table), nibbles); | |
| _mm_storeu_si128(hex, result); | |
| #elif defined(HEX_SSE2) | |
| __m128i bytes = _mm_cvtsi64_si128(HEX_BSWAP64(x)); | |
| // 4-bit nibbles | |
| __m128i nibbles = _mm_unpacklo_epi8(_mm_srli_epi64(bytes, 4), bytes); | |
| nibbles = _mm_and_si128(nibbles, _mm_set1_epi8(0xf)); | |
| // if nibble > 9 | |
| __m128i mask = _mm_cmpgt_epi8(nibbles, _mm_set1_epi8(9)); | |
| // result for 0..9 | |
| __m128i result = _mm_add_epi8(nibbles, _mm_set1_epi8('0')); | |
| // update for a..f | |
| const char offset = (uppercase ? 'A' : 'a') - '0' - 10; | |
| result = _mm_add_epi8(result, _mm_and_si128(mask, _mm_set1_epi8(offset))); | |
| _mm_storeu_si128(hex, result); | |
| #elif defined(HEX_NEON) | |
| uint8x8_t bytes = vreinterpret_u8_u64(vcreate_u64(HEX_BSWAP64(x))); | |
| uint8x8x2_t nibbles2 = vzip_u8(vshr_n_u8(bytes, 4), bytes); | |
| uint8x16_t nibbles = vcombine_u8(nibbles2.val[0], nibbles2.val[1]); | |
| nibbles = vandq_u8(nibbles, vdupq_n_u8(0xf)); | |
| const char* table = uppercase ? "0123456789ABCDEF" : "0123456789abcdef"; | |
| uint8x16_t result = vqtbl1q_u8(vld1q_u8((const uint8_t*)table), nibbles); | |
| vst1q_u8(hex, result); | |
| #elif defined(HEX_WASM) | |
| v128_t bytes = wasm_u64x2_make(x, 0); | |
| v128_t nibbles = wasm_i8x16_shuffle(wasm_u64x2_shr(bytes, 4), bytes, | |
| 7,23, 6,22, 5,21, 4,20, 3,19, 2,18, 1,17, 0,16 | |
| ); | |
| nibbles = wasm_v128_and(nibbles, wasm_i8x16_splat(0xf)); | |
| const char* table = uppercase ? "0123456789ABCDEF" : "0123456789abcdef"; | |
| v128_t result = wasm_i8x16_swizzle(wasm_v128_load(table), nibbles); | |
| wasm_v128_store(hex, result); | |
| #elif defined(HEX_RVV) | |
| vuint64m1_t x64 = __riscv_vmv_s_x_u64m1(HEX_BSWAP64(x), 1); | |
| vuint8m1_t xlo = __riscv_vreinterpret_v_u64m1_u8m1(x64); | |
| vuint8m1_t xhi = __riscv_vsrl_vx_u8m1(xlo, 4, 16); | |
| // interleave bytes from xhi and xlo | |
| vuint16m2_t n8 = __riscv_vwmaccu_vx_u16m2(__riscv_vwaddu_vv_u16m2(xhi, xlo, 16), 0xff, xlo, 16); | |
| vuint16m1_t n16 = __riscv_vget_v_u16m2_u16m1(n8, 0); | |
| // 4-bit nibbles | |
| vuint8m1_t nibbles = __riscv_vand_vx_u8m1(__riscv_vreinterpret_v_u16m1_u8m1(n16), 0xf, 16); | |
| // if nibble > 9 | |
| vbool8_t mask = __riscv_vmsgtu_vx_u8m1_b8(nibbles, 9, 16); | |
| // result for 0..9 | |
| vuint8m1_t result = __riscv_vadd_vx_u8m1(nibbles, '0', 16); | |
| // update for a..f | |
| const char offset = (uppercase ? 'A' : 'a') - '0' - 10; | |
| result = __riscv_vadd_vx_u8m1_mu(mask, result, result, offset, 16); | |
| __riscv_vse8_v_u8m1(hex, result, 16); | |
| #else // SWAR | |
| uint64_t hi = x >> 32; | |
| uint64_t lo = x; | |
| // get nibbles in reversed order | |
| hi = ((hi & 0x000000000000ffff) << 32) | ((hi >> 16) & 0x000000000000ffff); | |
| hi = ((hi & 0x000000ff000000ff) << 16) | ((hi >> 8) & 0x000000ff000000ff); | |
| hi = ((hi & 0x000f000f000f000f) << 8) | ((hi >> 4) & 0x000f000f000f000f); | |
| lo = ((lo & 0x000000000000ffff) << 32) | ((lo >> 16) & 0x000000000000ffff); | |
| lo = ((lo & 0x000000ff000000ff) << 16) | ((lo >> 8) & 0x000000ff000000ff); | |
| lo = ((lo & 0x000f000f000f000f) << 8) | ((lo >> 4) & 0x000f000f000f000f); | |
| const uint64_t splat = 0x0101010101010101; | |
| // if nibble >= 10 then addition will overflow in top nibble, shift it down to 0 or 1 | |
| uint64_t hmask = ((hi + (16 - 10) * splat) >> 4) & splat; | |
| uint64_t lmask = ((lo + (16 - 10) * splat) >> 4) & splat; | |
| // result for 0..9 | |
| hi += '0' * splat; | |
| lo += '0' * splat; | |
| // update for a..f | |
| const char offset = (uppercase ? 'A' : 'a') - '0' - 10; | |
| hi += offset * hmask; | |
| lo += offset * lmask; | |
| // assumes little-endian | |
| ((HexUnaligned64*)hex + 0)->value = hi; | |
| ((HexUnaligned64*)hex + 1)->value = lo; | |
| #endif | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment