Created
May 8, 2018 23:35
-
-
Save glampert/a9bf5ce34f35ff212e43b20919f255b9 to your computer and use it in GitHub Desktop.
Fast integer to ASCII string conversion functions.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <cassert> | |
#include <cstdint> | |
#include <cstdarg> | |
#include <cstdio> | |
#include <cmath> | |
#define WIN32_LEAN_AND_MEAN | |
#include <Windows.h> | |
#include <intrin.h> | |
/////////////////////////////////////////////////////////////////////////////// | |
// Fast integer to string conversion inspired by Andrei Alexandrescu's | |
// "Writing Fast Code" code::dive 2015 presentation about a revised | |
// table-based int<=>string converter - only handles decimal conversion. | |
// | |
// The following implementation is actually based on similar | |
// code present in Facebook's Folly Open Source library. | |
/////////////////////////////////////////////////////////////////////////////// | |
namespace ItoA | |
{ | |
// | |
// Table gen helper | |
// | |
template<typename T, std::uint32_t N> | |
constexpr std::uint32_t array_len(const T (&)[N]) | |
{ | |
return N; | |
} | |
void print(const char* format, ...) | |
{ | |
va_list args; | |
char buffer[256] = {'\0'}; | |
va_start(args, format); | |
std::vsnprintf(buffer, sizeof(buffer), format, args); | |
va_end(args); | |
OutputDebugStringA(buffer); | |
} | |
void print_pow10_table(const std::uint32_t upper_bound, const std::uint32_t bit_width) | |
{ | |
assert(bit_width == 32 || bit_width == 64); | |
const std::uint32_t idx = (bit_width / 32) - 1; | |
const char* const int_types[] = { "std::uint32_t", "std::uint64_t" }; | |
const char* const tbl_suffixes[] = { "32", "64" }; | |
const char* const val_suffixes[] = { "", "ULL" }; | |
print("static const %s pow10_table_%s[%u] = {\n", | |
int_types[idx], tbl_suffixes[idx], upper_bound); | |
for (std::uint64_t i = 0; i < upper_bound; ++i) | |
{ | |
const auto p = static_cast<std::uint64_t>(std::pow(10ULL, i)); | |
print(" %llu%s,\n", p, val_suffixes[idx]); | |
} | |
print("};\n"); | |
} | |
// | |
// int64 -> ASCII string converter | |
// | |
static const std::uint64_t pow10_table_64[20] = { | |
1ULL, | |
10ULL, | |
100ULL, | |
1000ULL, | |
10000ULL, | |
100000ULL, | |
1000000ULL, | |
10000000ULL, | |
100000000ULL, | |
1000000000ULL, | |
10000000000ULL, | |
100000000000ULL, | |
1000000000000ULL, | |
10000000000000ULL, | |
100000000000000ULL, | |
1000000000000000ULL, | |
10000000000000000ULL, | |
100000000000000000ULL, | |
1000000000000000000ULL, | |
10000000000000000000ULL, | |
}; | |
// 20+1 is enough but might as well go for a power-of-two... | |
constexpr int kGoodBufferSize = 32; | |
// 'value' assumed not zero. | |
inline int clz_64(const std::uint64_t value) | |
{ | |
DWORD first_set_bit = 0; | |
const BOOLEAN found_bit = _BitScanReverse64(&first_set_bit, value); | |
assert(found_bit); (void)found_bit; // assert-only | |
return 63 - first_set_bit; | |
} | |
// Returns the number of digits in the base 10 representation of an uint64. | |
inline std::uint32_t digits10_64(const std::uint64_t value) | |
{ | |
// Count Leading Zeros operation not valid for 0 - handle that here. | |
if (value == 0) | |
{ | |
return 1; | |
} | |
// Bits is in the ballpark of log_2(value). | |
const int leading_zeroes = clz_64(value); | |
const int bits = 63 - leading_zeroes; | |
// Approximate log_10(value) == log_10(2) * bits. | |
// Integer magic below: 77/256 is appx. 0.3010 (log_10(2)). | |
// The +1 is to make this the ceiling of the log_10 estimate. | |
const std::uint32_t min_length = 1 + ((bits * 77) >> 8); | |
// Return that log_10 lower bound, plus adjust if input >= 10^(that bound) | |
// in case there's a small error and we misjudged length. | |
assert(min_length < array_len(pow10_table_64)); | |
return min_length + static_cast<std::uint32_t>(value >= pow10_table_64[min_length]); | |
} | |
// Copies the ASCII base 10 representation of 'value' into 'buffer' and | |
// null terminates the buffer. Assumes the buffer points to digits10_64(value) | |
// bytes of valid memory or more. Note that uint64 needs at most 20 bytes, | |
// uint32 needs at most 10 bytes, plus the null terminator. kGoodBufferSize covers all cases. | |
inline void convert_64_impl(char* const buffer, std::uint64_t value) | |
{ | |
const std::uint32_t digits = digits10_64(value); | |
std::uint32_t pos = digits - 1; | |
while (value >= 10) | |
{ | |
const auto q = value / 10; | |
const auto r = static_cast<char>(value % 10); | |
buffer[pos--] = '0' + r; | |
value = q; | |
} | |
// Handle last digit and terminate the string. | |
buffer[pos] = static_cast<char>(value) + '0'; | |
buffer[digits] = '\0'; | |
} | |
char* convert_uint64(char buffer[kGoodBufferSize], const std::uint64_t value) | |
{ | |
convert_64_impl(buffer, value); | |
return buffer; | |
} | |
char* convert_int64(char buffer[kGoodBufferSize], const std::int64_t value) | |
{ | |
char* start = buffer; | |
std::uint64_t uval; | |
if (value < 0) | |
{ | |
*start++ = '-'; | |
uval = ~static_cast<std::uint64_t>(value) + 1; | |
} | |
else | |
{ | |
uval = value; | |
} | |
convert_64_impl(start, uval); | |
return buffer; | |
} | |
} // ItoA | |
/////////////////////////////////////////////////////////////////////////////// | |
int main() | |
{ | |
using namespace ItoA; | |
print("---- TABLES ----\n"); | |
print_pow10_table(20, 64); | |
print_pow10_table(10, 32); | |
print("\n"); | |
char buffer[kGoodBufferSize]; | |
print("---- U64 ----\n"); | |
print("%s\n", convert_uint64(buffer, 0)); | |
print("%s\n", convert_uint64(buffer, 1)); | |
print("%s\n", convert_uint64(buffer, 2)); | |
print("%s\n", convert_uint64(buffer, 3)); | |
print("%s\n", convert_uint64(buffer, 4)); | |
print("%s\n", convert_uint64(buffer, 42)); | |
print("%s\n", convert_uint64(buffer, std::uint64_t(-1))); | |
print("---- I64 ----\n"); | |
print("%s\n", convert_int64(buffer, 0)); | |
print("%s\n", convert_int64(buffer, 1)); | |
print("%s\n", convert_int64(buffer, 2)); | |
print("%s\n", convert_int64(buffer, 3)); | |
print("%s\n", convert_int64(buffer, 4)); | |
print("%s\n", convert_int64(buffer, 42)); | |
print("%s\n", convert_int64(buffer, -0)); | |
print("%s\n", convert_int64(buffer, -1)); | |
print("%s\n", convert_int64(buffer, -2)); | |
print("%s\n", convert_int64(buffer, -3)); | |
print("%s\n", convert_int64(buffer, -4)); | |
print("%s\n", convert_int64(buffer, -42)); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment