glampert/fast_itoa.cpp

## fast_itoa.cpp
#include <cassert>
#include <cstdint>
#include <cstdarg>
#include <cstdio>
#include <cmath>

#define WIN32_LEAN_AND_MEAN
#include <Windows.h>
#include <intrin.h>

///////////////////////////////////////////////////////////////////////////////
// Fast integer to string conversion inspired by Andrei Alexandrescu's
// "Writing Fast Code" code::dive 2015 presentation about a revised
// table-based int<=>string converter - only handles decimal conversion.
//
// The following implementation is actually based on similar
// code present in Facebook's Folly Open Source library.
///////////////////////////////////////////////////////////////////////////////

namespace ItoA
{

//
// Table gen helper
//

template<typename T, std::uint32_t N>
constexpr std::uint32_t array_len(const T (&)[N])
{
    return N;
}

void print(const char* format, ...)
{
    va_list args;
    char buffer[256] = {'\0'};

    va_start(args, format);
    std::vsnprintf(buffer, sizeof(buffer), format, args);
    va_end(args);

    OutputDebugStringA(buffer);
}

void print_pow10_table(const std::uint32_t upper_bound, const std::uint32_t bit_width)
{
    assert(bit_width == 32 || bit_width == 64);
    const std::uint32_t idx = (bit_width / 32) - 1;

    const char* const int_types[]    = { "std::uint32_t", "std::uint64_t" };
    const char* const tbl_suffixes[] = { "32", "64" };
    const char* const val_suffixes[] = { "", "ULL" };

    print("static const %s pow10_table_%s[%u] = {\n",
          int_types[idx], tbl_suffixes[idx], upper_bound);

    for (std::uint64_t i = 0; i < upper_bound; ++i)
    {
        const auto p = static_cast<std::uint64_t>(std::pow(10ULL, i));
        print("    %llu%s,\n", p, val_suffixes[idx]);
    }

    print("};\n");
}

//
// int64 -> ASCII string converter
//

static const std::uint64_t pow10_table_64[20] = {
    1ULL,
    10ULL,
    100ULL,
    1000ULL,
    10000ULL,
    100000ULL,
    1000000ULL,
    10000000ULL,
    100000000ULL,
    1000000000ULL,
    10000000000ULL,
    100000000000ULL,
    1000000000000ULL,
    10000000000000ULL,
    100000000000000ULL,
    1000000000000000ULL,
    10000000000000000ULL,
    100000000000000000ULL,
    1000000000000000000ULL,
    10000000000000000000ULL,
};

// 20+1 is enough but might as well go for a power-of-two...
constexpr int kGoodBufferSize = 32;

// 'value' assumed not zero.
inline int clz_64(const std::uint64_t value)
{
    DWORD first_set_bit = 0;
    const BOOLEAN found_bit = _BitScanReverse64(&first_set_bit, value);
    assert(found_bit); (void)found_bit; // assert-only
    return 63 - first_set_bit;
}

// Returns the number of digits in the base 10 representation of an uint64.
inline std::uint32_t digits10_64(const std::uint64_t value)
{
    // Count Leading Zeros operation not valid for 0 - handle that here.
    if (value == 0)
    {
        return 1;
    }

    // Bits is in the ballpark of log_2(value).
    const int leading_zeroes = clz_64(value);
    const int bits = 63 - leading_zeroes;

    // Approximate log_10(value) == log_10(2) * bits.
    // Integer magic below: 77/256 is appx. 0.3010 (log_10(2)).
    // The +1 is to make this the ceiling of the log_10 estimate.
    const std::uint32_t min_length = 1 + ((bits * 77) >> 8);

    // Return that log_10 lower bound, plus adjust if input >= 10^(that bound)
    // in case there's a small error and we misjudged length.
    assert(min_length < array_len(pow10_table_64));
    return min_length + static_cast<std::uint32_t>(value >= pow10_table_64[min_length]);
}

// Copies the ASCII base 10 representation of 'value' into 'buffer' and
// null terminates the buffer. Assumes the buffer points to digits10_64(value)
// bytes of valid memory or more. Note that uint64 needs at most 20 bytes,
// uint32 needs at most 10 bytes, plus the null terminator. kGoodBufferSize covers all cases.
inline void convert_64_impl(char* const buffer, std::uint64_t value)
{
    const std::uint32_t digits = digits10_64(value);
    std::uint32_t pos = digits - 1;

    while (value >= 10)
    {
        const auto q = value / 10;
        const auto r = static_cast<char>(value % 10);
        buffer[pos--] = '0' + r;
        value = q;
    }

    // Handle last digit and terminate the string.
    buffer[pos] = static_cast<char>(value) + '0';
    buffer[digits] = '\0';
}

char* convert_uint64(char buffer[kGoodBufferSize], const std::uint64_t value)
{
    convert_64_impl(buffer, value);
    return buffer;
}

char* convert_int64(char buffer[kGoodBufferSize], const std::int64_t value)
{
    char* start = buffer;
    std::uint64_t uval;

    if (value < 0)
    {
        *start++ = '-';
        uval = ~static_cast<std::uint64_t>(value) + 1;
    }
    else
    {
        uval = value;
    }

    convert_64_impl(start, uval);
    return buffer;
}

} // ItoA

///////////////////////////////////////////////////////////////////////////////

int main()
{
    using namespace ItoA;

    print("---- TABLES ----\n");
    print_pow10_table(20, 64);
    print_pow10_table(10, 32);
    print("\n");

    char buffer[kGoodBufferSize];

    print("---- U64 ----\n");
    print("%s\n", convert_uint64(buffer, 0));
    print("%s\n", convert_uint64(buffer, 1));
    print("%s\n", convert_uint64(buffer, 2));
    print("%s\n", convert_uint64(buffer, 3));
    print("%s\n", convert_uint64(buffer, 4));
    print("%s\n", convert_uint64(buffer, 42));
    print("%s\n", convert_uint64(buffer, std::uint64_t(-1)));

    print("---- I64 ----\n");
    print("%s\n", convert_int64(buffer,  0));
    print("%s\n", convert_int64(buffer,  1));
    print("%s\n", convert_int64(buffer,  2));
    print("%s\n", convert_int64(buffer,  3));
    print("%s\n", convert_int64(buffer,  4));
    print("%s\n", convert_int64(buffer,  42));
    print("%s\n", convert_int64(buffer, -0));
    print("%s\n", convert_int64(buffer, -1));
    print("%s\n", convert_int64(buffer, -2));
    print("%s\n", convert_int64(buffer, -3));
    print("%s\n", convert_int64(buffer, -4));
    print("%s\n", convert_int64(buffer, -42));
}
	#include <cassert>
	#include <cstdint>
	#include <cstdarg>
	#include <cstdio>
	#include <cmath>

	#define WIN32_LEAN_AND_MEAN
	#include <Windows.h>
	#include <intrin.h>

	///////////////////////////////////////////////////////////////////////////////
	// Fast integer to string conversion inspired by Andrei Alexandrescu's
	// "Writing Fast Code" code::dive 2015 presentation about a revised
	// table-based int<=>string converter - only handles decimal conversion.
	//
	// The following implementation is actually based on similar
	// code present in Facebook's Folly Open Source library.
	///////////////////////////////////////////////////////////////////////////////

	namespace ItoA
	{

	//
	// Table gen helper
	//

	template<typename T, std::uint32_t N>
	constexpr std::uint32_t array_len(const T (&)[N])
	{
	return N;
	}

	void print(const char* format, ...)
	{
	va_list args;
	char buffer[256] = {'\0'};

	va_start(args, format);
	std::vsnprintf(buffer, sizeof(buffer), format, args);
	va_end(args);

	OutputDebugStringA(buffer);
	}

	void print_pow10_table(const std::uint32_t upper_bound, const std::uint32_t bit_width)
	{
	assert(bit_width == 32 \|\| bit_width == 64);
	const std::uint32_t idx = (bit_width / 32) - 1;

	const char* const int_types[] = { "std::uint32_t", "std::uint64_t" };
	const char* const tbl_suffixes[] = { "32", "64" };
	const char* const val_suffixes[] = { "", "ULL" };

	print("static const %s pow10_table_%s[%u] = {\n",
	int_types[idx], tbl_suffixes[idx], upper_bound);

	for (std::uint64_t i = 0; i < upper_bound; ++i)
	{
	const auto p = static_cast<std::uint64_t>(std::pow(10ULL, i));
	print(" %llu%s,\n", p, val_suffixes[idx]);
	}

	print("};\n");
	}

	//
	// int64 -> ASCII string converter
	//

	static const std::uint64_t pow10_table_64[20] = {
	1ULL,
	10ULL,
	100ULL,
	1000ULL,
	10000ULL,
	100000ULL,
	1000000ULL,
	10000000ULL,
	100000000ULL,
	1000000000ULL,
	10000000000ULL,
	100000000000ULL,
	1000000000000ULL,
	10000000000000ULL,
	100000000000000ULL,
	1000000000000000ULL,
	10000000000000000ULL,
	100000000000000000ULL,
	1000000000000000000ULL,
	10000000000000000000ULL,
	};

	// 20+1 is enough but might as well go for a power-of-two...
	constexpr int kGoodBufferSize = 32;

	// 'value' assumed not zero.
	inline int clz_64(const std::uint64_t value)
	{
	DWORD first_set_bit = 0;
	const BOOLEAN found_bit = _BitScanReverse64(&first_set_bit, value);
	assert(found_bit); (void)found_bit; // assert-only
	return 63 - first_set_bit;
	}

	// Returns the number of digits in the base 10 representation of an uint64.
	inline std::uint32_t digits10_64(const std::uint64_t value)
	{
	// Count Leading Zeros operation not valid for 0 - handle that here.
	if (value == 0)
	{
	return 1;
	}

	// Bits is in the ballpark of log_2(value).
	const int leading_zeroes = clz_64(value);
	const int bits = 63 - leading_zeroes;

	// Approximate log_10(value) == log_10(2) * bits.
	// Integer magic below: 77/256 is appx. 0.3010 (log_10(2)).
	// The +1 is to make this the ceiling of the log_10 estimate.
	const std::uint32_t min_length = 1 + ((bits * 77) >> 8);

	// Return that log_10 lower bound, plus adjust if input >= 10^(that bound)
	// in case there's a small error and we misjudged length.
	assert(min_length < array_len(pow10_table_64));
	return min_length + static_cast<std::uint32_t>(value >= pow10_table_64[min_length]);
	}

	// Copies the ASCII base 10 representation of 'value' into 'buffer' and
	// null terminates the buffer. Assumes the buffer points to digits10_64(value)
	// bytes of valid memory or more. Note that uint64 needs at most 20 bytes,
	// uint32 needs at most 10 bytes, plus the null terminator. kGoodBufferSize covers all cases.
	inline void convert_64_impl(char* const buffer, std::uint64_t value)
	{
	const std::uint32_t digits = digits10_64(value);
	std::uint32_t pos = digits - 1;

	while (value >= 10)
	{
	const auto q = value / 10;
	const auto r = static_cast<char>(value % 10);
	buffer[pos--] = '0' + r;
	value = q;
	}

	// Handle last digit and terminate the string.
	buffer[pos] = static_cast<char>(value) + '0';
	buffer[digits] = '\0';
	}

	char* convert_uint64(char buffer[kGoodBufferSize], const std::uint64_t value)
	{
	convert_64_impl(buffer, value);
	return buffer;
	}

	char* convert_int64(char buffer[kGoodBufferSize], const std::int64_t value)
	{
	char* start = buffer;
	std::uint64_t uval;

	if (value < 0)
	{
	*start++ = '-';
	uval = ~static_cast<std::uint64_t>(value) + 1;
	}
	else
	{
	uval = value;
	}

	convert_64_impl(start, uval);
	return buffer;
	}

	} // ItoA

	///////////////////////////////////////////////////////////////////////////////

	int main()
	{
	using namespace ItoA;

	print("---- TABLES ----\n");
	print_pow10_table(20, 64);
	print_pow10_table(10, 32);
	print("\n");

	char buffer[kGoodBufferSize];

	print("---- U64 ----\n");
	print("%s\n", convert_uint64(buffer, 0));
	print("%s\n", convert_uint64(buffer, 1));
	print("%s\n", convert_uint64(buffer, 2));
	print("%s\n", convert_uint64(buffer, 3));
	print("%s\n", convert_uint64(buffer, 4));
	print("%s\n", convert_uint64(buffer, 42));
	print("%s\n", convert_uint64(buffer, std::uint64_t(-1)));

	print("---- I64 ----\n");
	print("%s\n", convert_int64(buffer, 0));
	print("%s\n", convert_int64(buffer, 1));
	print("%s\n", convert_int64(buffer, 2));
	print("%s\n", convert_int64(buffer, 3));
	print("%s\n", convert_int64(buffer, 4));
	print("%s\n", convert_int64(buffer, 42));
	print("%s\n", convert_int64(buffer, -0));
	print("%s\n", convert_int64(buffer, -1));
	print("%s\n", convert_int64(buffer, -2));
	print("%s\n", convert_int64(buffer, -3));
	print("%s\n", convert_int64(buffer, -4));
	print("%s\n", convert_int64(buffer, -42));
	}