Skip to content

Instantly share code, notes, and snippets.

@niXman
Last active February 17, 2024 13:16
Show Gist options
  • Save niXman/5c0e53ad0dc98e66399658915747828e to your computer and use it in GitHub Desktop.
Save niXman/5c0e53ad0dc98e66399658915747828e to your computer and use it in GitHub Desktop.
fastest string to unsigned naive implementation
// based on https://stackoverflow.com/a/73078442/2878380
#include <map>
#include <string>
#include <vector>
#include <algorithm>
#include <chrono>
#include <charconv>
#include <cassert>
#include <cstdint>
#include <cinttypes>
#if !defined(__fallthrough) && defined(__has_cpp_attribute)
#if __has_cpp_attribute(fallthrough)
#define __FJ__FALLTHROUGH [[fallthrough]]
#elif __has_cpp_attribute(gnu::fallthrough)
#define __FJ__FALLTHROUGH [[gnu::fallthrough]]
#endif
#endif
#if !defined(__FJ__FALLTHROUGH) && defined(__has_attribute)
#if __has_attribute(__fallthrough__)
#define __FJ__FALLTHROUGH __attribute__((__fallthrough__))
#endif
#endif
#if !defined(__FJ__FALLTHROUGH) /* well, we tried */
#define __FJ__FALLTHROUGH ((void)0)
#endif
auto NowNs() {
using namespace std::chrono;
return duration_cast<nanoseconds>(system_clock::now().time_since_epoch()).count();
}
int64_t fast_null(const char* str, std::size_t) { return (str[0] - '0') + (str[1] - '0'); }
int64_t fast_atou(const char* str, std::size_t)
{
int64_t val = 0;
while (*str) val = (val << 1) + (val << 3) + *(str++) - 48;
return val;
}
std::uint64_t nixmans_atou64(const char *ptr, std::size_t len) {
# define __FJ__PER_CHAR_EXPR(n, mul) ((str[len - n] - '0') * mul)
constexpr std::uint64_t
_1 = 1ull, _10 = _1 *10, _100 = _10 *10
,_1k = _100 *10, _10k = _1k *10, _100k = _10k *10
,_1m = _100k *10, _10m = _1m *10, _100m = _10m *10
,_1b = _100m *10, _10b = _1b *10, _100b = _10b *10
,_1t = _100b *10, _10t = _1t *10, _100t = _10t *10
,_1kd= _100t *10, _10kd= _1kd*10, _100kd= _10kd*10
,_1kn= _100kd*10, _10kn= _1kn*10
;
const auto *str = reinterpret_cast<const std::uint8_t *>(ptr);
std::uint64_t res = 0;
switch ( len ) {
case 20: res += __FJ__PER_CHAR_EXPR(20, _10kn ); __FJ__FALLTHROUGH;
case 19: res += __FJ__PER_CHAR_EXPR(19, _1kn ); __FJ__FALLTHROUGH;
case 18: res += __FJ__PER_CHAR_EXPR(18, _100kd); __FJ__FALLTHROUGH;
case 17: res += __FJ__PER_CHAR_EXPR(17, _10kd ); __FJ__FALLTHROUGH;
case 16: res += __FJ__PER_CHAR_EXPR(16, _1kd ); __FJ__FALLTHROUGH;
case 15: res += __FJ__PER_CHAR_EXPR(15, _100t ); __FJ__FALLTHROUGH;
case 14: res += __FJ__PER_CHAR_EXPR(14, _10t ); __FJ__FALLTHROUGH;
case 13: res += __FJ__PER_CHAR_EXPR(13, _1t ); __FJ__FALLTHROUGH;
case 12: res += __FJ__PER_CHAR_EXPR(12, _100b ); __FJ__FALLTHROUGH;
case 11: res += __FJ__PER_CHAR_EXPR(11, _10b ); __FJ__FALLTHROUGH;
case 10: res += __FJ__PER_CHAR_EXPR(10, _1b ); __FJ__FALLTHROUGH;
case 9 : res += __FJ__PER_CHAR_EXPR( 9, _100m ); __FJ__FALLTHROUGH;
case 8 : res += __FJ__PER_CHAR_EXPR( 8, _10m ); __FJ__FALLTHROUGH;
case 7 : res += __FJ__PER_CHAR_EXPR( 7, _1m ); __FJ__FALLTHROUGH;
case 6 : res += __FJ__PER_CHAR_EXPR( 6, _100k ); __FJ__FALLTHROUGH;
case 5 : res += __FJ__PER_CHAR_EXPR( 5, _10k ); __FJ__FALLTHROUGH;
case 4 : res += __FJ__PER_CHAR_EXPR( 4, _1k ); __FJ__FALLTHROUGH;
case 3 : res += __FJ__PER_CHAR_EXPR( 3, _100 ); __FJ__FALLTHROUGH;
case 2 : res += __FJ__PER_CHAR_EXPR( 2, _10 ); __FJ__FALLTHROUGH;
case 1 : res += __FJ__PER_CHAR_EXPR( 1, _1 );
}
# undef __FJ__PER_CHAR_EXPR
return res;
}
std::uint64_t nixmans_atou64_shift(const char *ptr, std::size_t len) {
# define __FJ__PER_CHAR_EXPR(n) ((res << 1) + (res << 3) + (str[len - n] - '0'))
const auto *str = reinterpret_cast<const std::uint8_t *>(ptr);
std::uint64_t res = 0;
switch ( len ) {
case 20: res = __FJ__PER_CHAR_EXPR(20); __FJ__FALLTHROUGH;
case 19: res = __FJ__PER_CHAR_EXPR(19); __FJ__FALLTHROUGH;
case 18: res = __FJ__PER_CHAR_EXPR(18); __FJ__FALLTHROUGH;
case 17: res = __FJ__PER_CHAR_EXPR(17); __FJ__FALLTHROUGH;
case 16: res = __FJ__PER_CHAR_EXPR(16); __FJ__FALLTHROUGH;
case 15: res = __FJ__PER_CHAR_EXPR(15); __FJ__FALLTHROUGH;
case 14: res = __FJ__PER_CHAR_EXPR(14); __FJ__FALLTHROUGH;
case 13: res = __FJ__PER_CHAR_EXPR(13); __FJ__FALLTHROUGH;
case 12: res = __FJ__PER_CHAR_EXPR(12); __FJ__FALLTHROUGH;
case 11: res = __FJ__PER_CHAR_EXPR(11); __FJ__FALLTHROUGH;
case 10: res = __FJ__PER_CHAR_EXPR(10); __FJ__FALLTHROUGH;
case 9 : res = __FJ__PER_CHAR_EXPR( 9); __FJ__FALLTHROUGH;
case 8 : res = __FJ__PER_CHAR_EXPR( 8); __FJ__FALLTHROUGH;
case 7 : res = __FJ__PER_CHAR_EXPR( 7); __FJ__FALLTHROUGH;
case 6 : res = __FJ__PER_CHAR_EXPR( 6); __FJ__FALLTHROUGH;
case 5 : res = __FJ__PER_CHAR_EXPR( 5); __FJ__FALLTHROUGH;
case 4 : res = __FJ__PER_CHAR_EXPR( 4); __FJ__FALLTHROUGH;
case 3 : res = __FJ__PER_CHAR_EXPR( 3); __FJ__FALLTHROUGH;
case 2 : res = __FJ__PER_CHAR_EXPR( 2); __FJ__FALLTHROUGH;
case 1 : res = __FJ__PER_CHAR_EXPR( 1);
}
# undef __FJ__PER_CHAR_EXPR
return res;
}
__attribute__((flatten)) int64_t from_chars(const char *str, std::size_t len) {
int64_t res;
std::from_chars(str, str+len, res);
return res;
}
int64_t FastAtoiBitShift(const char* str, std::size_t)
{
int64_t val = 0;
while (*str) val = (val << 3) + (val << 1) + (*str++ - '0');
return val;
}
int64_t FastAtoiMul10(const char* str, std::size_t)
{
int64_t val = 0;
while (*str) val = val * 10 + (*str++ - '0');
return val;
}
int64_t FastAtoiCompare(const char* str, std::size_t)
{
int64_t val = 0;
uint8_t x;
while ((x = uint8_t(*str++ - '0')) <= 9) val = val * 10 + x;
return val;
}
uint64_t FastAtoiCompareu(const char* str, std::size_t)
{
uint64_t val = 0;
uint8_t x;
while ((x = uint8_t(*str++ - '0')) <= 9) val = val * 10 + x;
return val;
}
int32_t FastAtoi32(const char* str, std::size_t)
{
int32_t val = 0;
int sign = 0;
if (*str == '-')
{
sign = 1;
++str;
}
uint8_t digit;
while ((digit = uint8_t(*str++ - '0')) <= 9) val = val * 10 + digit;
return sign ? -val : val;
}
int32_t FastAtoi32u(const char* str, std::size_t)
{
int32_t val = 0;
uint8_t digit;
while ((digit = uint8_t(*str++ - '0')) <= 9) val = val * 10 + digit;
return val;
}
uint32_t FastAtoi32uu(const char* str, std::size_t)
{
uint32_t val = 0;
uint8_t digit;
while ((digit = uint8_t(*str++ - '0')) <= 9) val = val * 10u + digit;
return val;
}
int64_t FastAtoi64(const char* str, std::size_t)
{
int64_t val = 0;
int sign = 0;
if (*str == '-')
{
sign = 1;
++str;
}
uint8_t digit;
while ((digit = uint8_t(*str++ - '0')) <= 9) val = val * 10 + digit;
return sign ? -val : val;
}
int64_t FastAtoi64u(const char* str, std::size_t)
{
int64_t val = 0;
uint8_t digit;
while ((digit = uint8_t(*str++ - '0')) <= 9) val = val * 10 + digit;
return val;
}
uint64_t FastAtoi64uu(const char* str, std::size_t)
{
uint64_t val = 0;
uint8_t digit;
while ((digit = uint8_t(*str++ - '0')) <= 9) val = val * 10u + digit;
return val;
}
__attribute__((flatten)) int64_t wrapped_atoi(const char *s, std::size_t) { return atoi(s); }
__attribute__((flatten)) int64_t wrapped_atoll(const char *s, std::size_t) { return atoll(s); }
__attribute__((flatten)) int64_t wrapped_stoll(const char *s, std::size_t) { return std::stoll(s); }
__attribute__((flatten)) int64_t wrapped_stoull(const char *s, std::size_t) { return std::stoull(s); }
/*************************************************************************************************/
int main() {
std::map<std::string, std::vector<int64_t>> funcTimes;
std::map<std::string, std::vector<int64_t>> funcTotals;
std::map<std::string, int64_t> funcFinals;
#define BENCH_ATOI(func) \
do { \
auto start = NowNs(); \
int64_t z = 0; \
char string[] = "000001987"; \
for (int i = 1e7; i >= 0; --i) { \
string[0] = '0' + (i + 0) % 10; \
string[1] = '0' + (i + 1) % 10; \
string[2] = '0' + (i + 3) % 10; \
string[3] = '0' + (i + 5) % 10; \
string[4] = '0' + (i + 9) % 10; \
z += func(string, sizeof(string)-1); \
} \
auto elapsed = NowNs() - start; \
funcTimes[#func].push_back(elapsed); \
funcTotals[#func].push_back(z); \
} \
while (0)
constexpr auto repeats = 16u;
for ( auto i = 0u; i < repeats; ++i ) {
BENCH_ATOI(wrapped_atoi);
BENCH_ATOI(wrapped_atoll);
BENCH_ATOI(wrapped_stoll);
BENCH_ATOI(wrapped_stoull);
//
BENCH_ATOI(fast_null);
BENCH_ATOI(fast_atou);
BENCH_ATOI(nixmans_atou64);
BENCH_ATOI(nixmans_atou64_shift);
BENCH_ATOI(from_chars);
BENCH_ATOI(FastAtoiBitShift);
BENCH_ATOI(FastAtoiMul10);
BENCH_ATOI(FastAtoiCompare);
BENCH_ATOI(FastAtoiCompareu);
//
BENCH_ATOI(FastAtoi32);
BENCH_ATOI(FastAtoi32u);
BENCH_ATOI(FastAtoi32uu);
BENCH_ATOI(FastAtoi64);
BENCH_ATOI(FastAtoi64u);
BENCH_ATOI(FastAtoi64uu);
}
for ( auto& [func, times] : funcTimes ) {
std::sort(times.begin(), times.end(), [](const auto& a, const auto& b) { return a < b; });
std::printf("%-20s: %" PRId64 "\n", func.c_str(), funcTotals[func][0]);
int64_t total = 0;
for ( auto i = 4u; i <= 11u; ++i )
total += times[i];
total /= 8;
funcFinals[func] = total;
}
const auto base = funcFinals["wrapped_atoi"];
const auto overhead = funcFinals["fast_null"];
for (const auto& [func, final] : funcFinals) {
const char *funcname = func.c_str();
const auto funcfinal = final;
const auto funcavg = base * 1.0 / funcfinal;
const auto without_overhead = funcfinal - overhead;
const auto final_result = (base - overhead) * 1.0 / (final - overhead);
std::printf("%-20s: %9" PRId64 " ns => %5.3gx : %9" PRId64 " ns => %5.3gx\n", funcname, funcfinal, funcavg, without_overhead, final_result);
}
}
// g++ -std=c++17 -O3 strtoint.cpp -o strtoint
// FastAtoi32 : 86412175 ns => 1.84x : 74167056 ns => 1.98x
// FastAtoi32u : 86440254 ns => 1.84x : 74195135 ns => 1.98x
// FastAtoi32uu : 83578425 ns => 1.9x : 71333306 ns => 2.06x
// FastAtoi64 : 81707263 ns => 1.94x : 69462144 ns => 2.11x
// FastAtoi64u : 85603720 ns => 1.86x : 73358601 ns => 2x
// FastAtoi64uu : 87482618 ns => 1.82x : 75237499 ns => 1.95x
// FastAtoiBitShift : 72057189 ns => 2.2x : 59812070 ns => 2.45x
// FastAtoiCompare : 85592740 ns => 1.86x : 73347621 ns => 2x
// FastAtoiCompareu : 87515399 ns => 1.82x : 75270280 ns => 1.95x
// FastAtoiMul10 : 72196698 ns => 2.2x : 59951579 ns => 2.45x
// fast_atou : 67010071 ns => 2.37x : 54764952 ns => 2.68x
// fast_null : 12245119 ns => 13x : 0 ns => infx
// from_chars : 37875506 ns => 4.19x : 25630387 ns => 5.72x
// nixmans_atou64 : 29531256 ns => 5.38x : 17286137 ns => 8.48x
// nixmans_atou64_shift: 26193012 ns => 6.07x : 13947893 ns => 10.5x
// wrapped_atoi : 158876770 ns => 1x : 146631651 ns => 1x
// wrapped_atoll : 160072215 ns => 0.993x : 147827096 ns => 0.992x
// wrapped_stoll : 214041391 ns => 0.742x : 201796272 ns => 0.727x
// wrapped_stoull : 214207486 ns => 0.742x : 201962367 ns => 0.726x
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment