Skip to content

Instantly share code, notes, and snippets.

@glampert
Created May 8, 2018 23:35
Show Gist options
  • Save glampert/a9bf5ce34f35ff212e43b20919f255b9 to your computer and use it in GitHub Desktop.
Save glampert/a9bf5ce34f35ff212e43b20919f255b9 to your computer and use it in GitHub Desktop.
Fast integer to ASCII string conversion functions.
#include <cassert>
#include <cstdint>
#include <cstdarg>
#include <cstdio>
#include <cmath>
#define WIN32_LEAN_AND_MEAN
#include <Windows.h>
#include <intrin.h>
///////////////////////////////////////////////////////////////////////////////
// Fast integer to string conversion inspired by Andrei Alexandrescu's
// "Writing Fast Code" code::dive 2015 presentation about a revised
// table-based int<=>string converter - only handles decimal conversion.
//
// The following implementation is actually based on similar
// code present in Facebook's Folly Open Source library.
///////////////////////////////////////////////////////////////////////////////
namespace ItoA
{
//
// Table gen helper
//
template<typename T, std::uint32_t N>
constexpr std::uint32_t array_len(const T (&)[N])
{
return N;
}
void print(const char* format, ...)
{
va_list args;
char buffer[256] = {'\0'};
va_start(args, format);
std::vsnprintf(buffer, sizeof(buffer), format, args);
va_end(args);
OutputDebugStringA(buffer);
}
void print_pow10_table(const std::uint32_t upper_bound, const std::uint32_t bit_width)
{
assert(bit_width == 32 || bit_width == 64);
const std::uint32_t idx = (bit_width / 32) - 1;
const char* const int_types[] = { "std::uint32_t", "std::uint64_t" };
const char* const tbl_suffixes[] = { "32", "64" };
const char* const val_suffixes[] = { "", "ULL" };
print("static const %s pow10_table_%s[%u] = {\n",
int_types[idx], tbl_suffixes[idx], upper_bound);
for (std::uint64_t i = 0; i < upper_bound; ++i)
{
const auto p = static_cast<std::uint64_t>(std::pow(10ULL, i));
print(" %llu%s,\n", p, val_suffixes[idx]);
}
print("};\n");
}
//
// int64 -> ASCII string converter
//
static const std::uint64_t pow10_table_64[20] = {
1ULL,
10ULL,
100ULL,
1000ULL,
10000ULL,
100000ULL,
1000000ULL,
10000000ULL,
100000000ULL,
1000000000ULL,
10000000000ULL,
100000000000ULL,
1000000000000ULL,
10000000000000ULL,
100000000000000ULL,
1000000000000000ULL,
10000000000000000ULL,
100000000000000000ULL,
1000000000000000000ULL,
10000000000000000000ULL,
};
// 20+1 is enough but might as well go for a power-of-two...
constexpr int kGoodBufferSize = 32;
// 'value' assumed not zero.
inline int clz_64(const std::uint64_t value)
{
DWORD first_set_bit = 0;
const BOOLEAN found_bit = _BitScanReverse64(&first_set_bit, value);
assert(found_bit); (void)found_bit; // assert-only
return 63 - first_set_bit;
}
// Returns the number of digits in the base 10 representation of an uint64.
inline std::uint32_t digits10_64(const std::uint64_t value)
{
// Count Leading Zeros operation not valid for 0 - handle that here.
if (value == 0)
{
return 1;
}
// Bits is in the ballpark of log_2(value).
const int leading_zeroes = clz_64(value);
const int bits = 63 - leading_zeroes;
// Approximate log_10(value) == log_10(2) * bits.
// Integer magic below: 77/256 is appx. 0.3010 (log_10(2)).
// The +1 is to make this the ceiling of the log_10 estimate.
const std::uint32_t min_length = 1 + ((bits * 77) >> 8);
// Return that log_10 lower bound, plus adjust if input >= 10^(that bound)
// in case there's a small error and we misjudged length.
assert(min_length < array_len(pow10_table_64));
return min_length + static_cast<std::uint32_t>(value >= pow10_table_64[min_length]);
}
// Copies the ASCII base 10 representation of 'value' into 'buffer' and
// null terminates the buffer. Assumes the buffer points to digits10_64(value)
// bytes of valid memory or more. Note that uint64 needs at most 20 bytes,
// uint32 needs at most 10 bytes, plus the null terminator. kGoodBufferSize covers all cases.
inline void convert_64_impl(char* const buffer, std::uint64_t value)
{
const std::uint32_t digits = digits10_64(value);
std::uint32_t pos = digits - 1;
while (value >= 10)
{
const auto q = value / 10;
const auto r = static_cast<char>(value % 10);
buffer[pos--] = '0' + r;
value = q;
}
// Handle last digit and terminate the string.
buffer[pos] = static_cast<char>(value) + '0';
buffer[digits] = '\0';
}
char* convert_uint64(char buffer[kGoodBufferSize], const std::uint64_t value)
{
convert_64_impl(buffer, value);
return buffer;
}
char* convert_int64(char buffer[kGoodBufferSize], const std::int64_t value)
{
char* start = buffer;
std::uint64_t uval;
if (value < 0)
{
*start++ = '-';
uval = ~static_cast<std::uint64_t>(value) + 1;
}
else
{
uval = value;
}
convert_64_impl(start, uval);
return buffer;
}
} // ItoA
///////////////////////////////////////////////////////////////////////////////
int main()
{
using namespace ItoA;
print("---- TABLES ----\n");
print_pow10_table(20, 64);
print_pow10_table(10, 32);
print("\n");
char buffer[kGoodBufferSize];
print("---- U64 ----\n");
print("%s\n", convert_uint64(buffer, 0));
print("%s\n", convert_uint64(buffer, 1));
print("%s\n", convert_uint64(buffer, 2));
print("%s\n", convert_uint64(buffer, 3));
print("%s\n", convert_uint64(buffer, 4));
print("%s\n", convert_uint64(buffer, 42));
print("%s\n", convert_uint64(buffer, std::uint64_t(-1)));
print("---- I64 ----\n");
print("%s\n", convert_int64(buffer, 0));
print("%s\n", convert_int64(buffer, 1));
print("%s\n", convert_int64(buffer, 2));
print("%s\n", convert_int64(buffer, 3));
print("%s\n", convert_int64(buffer, 4));
print("%s\n", convert_int64(buffer, 42));
print("%s\n", convert_int64(buffer, -0));
print("%s\n", convert_int64(buffer, -1));
print("%s\n", convert_int64(buffer, -2));
print("%s\n", convert_int64(buffer, -3));
print("%s\n", convert_int64(buffer, -4));
print("%s\n", convert_int64(buffer, -42));
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment