Last active
February 17, 2024 13:16
-
-
Save niXman/5c0e53ad0dc98e66399658915747828e to your computer and use it in GitHub Desktop.
fastest string to unsigned naive implementation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// based on https://stackoverflow.com/a/73078442/2878380 | |
#include <map> | |
#include <string> | |
#include <vector> | |
#include <algorithm> | |
#include <chrono> | |
#include <charconv> | |
#include <cassert> | |
#include <cstdint> | |
#include <cinttypes> | |
#if !defined(__fallthrough) && defined(__has_cpp_attribute) | |
#if __has_cpp_attribute(fallthrough) | |
#define __FJ__FALLTHROUGH [[fallthrough]] | |
#elif __has_cpp_attribute(gnu::fallthrough) | |
#define __FJ__FALLTHROUGH [[gnu::fallthrough]] | |
#endif | |
#endif | |
#if !defined(__FJ__FALLTHROUGH) && defined(__has_attribute) | |
#if __has_attribute(__fallthrough__) | |
#define __FJ__FALLTHROUGH __attribute__((__fallthrough__)) | |
#endif | |
#endif | |
#if !defined(__FJ__FALLTHROUGH) /* well, we tried */ | |
#define __FJ__FALLTHROUGH ((void)0) | |
#endif | |
auto NowNs() { | |
using namespace std::chrono; | |
return duration_cast<nanoseconds>(system_clock::now().time_since_epoch()).count(); | |
} | |
int64_t fast_null(const char* str, std::size_t) { return (str[0] - '0') + (str[1] - '0'); } | |
int64_t fast_atou(const char* str, std::size_t) | |
{ | |
int64_t val = 0; | |
while (*str) val = (val << 1) + (val << 3) + *(str++) - 48; | |
return val; | |
} | |
std::uint64_t nixmans_atou64(const char *ptr, std::size_t len) { | |
# define __FJ__PER_CHAR_EXPR(n, mul) ((str[len - n] - '0') * mul) | |
constexpr std::uint64_t | |
_1 = 1ull, _10 = _1 *10, _100 = _10 *10 | |
,_1k = _100 *10, _10k = _1k *10, _100k = _10k *10 | |
,_1m = _100k *10, _10m = _1m *10, _100m = _10m *10 | |
,_1b = _100m *10, _10b = _1b *10, _100b = _10b *10 | |
,_1t = _100b *10, _10t = _1t *10, _100t = _10t *10 | |
,_1kd= _100t *10, _10kd= _1kd*10, _100kd= _10kd*10 | |
,_1kn= _100kd*10, _10kn= _1kn*10 | |
; | |
const auto *str = reinterpret_cast<const std::uint8_t *>(ptr); | |
std::uint64_t res = 0; | |
switch ( len ) { | |
case 20: res += __FJ__PER_CHAR_EXPR(20, _10kn ); __FJ__FALLTHROUGH; | |
case 19: res += __FJ__PER_CHAR_EXPR(19, _1kn ); __FJ__FALLTHROUGH; | |
case 18: res += __FJ__PER_CHAR_EXPR(18, _100kd); __FJ__FALLTHROUGH; | |
case 17: res += __FJ__PER_CHAR_EXPR(17, _10kd ); __FJ__FALLTHROUGH; | |
case 16: res += __FJ__PER_CHAR_EXPR(16, _1kd ); __FJ__FALLTHROUGH; | |
case 15: res += __FJ__PER_CHAR_EXPR(15, _100t ); __FJ__FALLTHROUGH; | |
case 14: res += __FJ__PER_CHAR_EXPR(14, _10t ); __FJ__FALLTHROUGH; | |
case 13: res += __FJ__PER_CHAR_EXPR(13, _1t ); __FJ__FALLTHROUGH; | |
case 12: res += __FJ__PER_CHAR_EXPR(12, _100b ); __FJ__FALLTHROUGH; | |
case 11: res += __FJ__PER_CHAR_EXPR(11, _10b ); __FJ__FALLTHROUGH; | |
case 10: res += __FJ__PER_CHAR_EXPR(10, _1b ); __FJ__FALLTHROUGH; | |
case 9 : res += __FJ__PER_CHAR_EXPR( 9, _100m ); __FJ__FALLTHROUGH; | |
case 8 : res += __FJ__PER_CHAR_EXPR( 8, _10m ); __FJ__FALLTHROUGH; | |
case 7 : res += __FJ__PER_CHAR_EXPR( 7, _1m ); __FJ__FALLTHROUGH; | |
case 6 : res += __FJ__PER_CHAR_EXPR( 6, _100k ); __FJ__FALLTHROUGH; | |
case 5 : res += __FJ__PER_CHAR_EXPR( 5, _10k ); __FJ__FALLTHROUGH; | |
case 4 : res += __FJ__PER_CHAR_EXPR( 4, _1k ); __FJ__FALLTHROUGH; | |
case 3 : res += __FJ__PER_CHAR_EXPR( 3, _100 ); __FJ__FALLTHROUGH; | |
case 2 : res += __FJ__PER_CHAR_EXPR( 2, _10 ); __FJ__FALLTHROUGH; | |
case 1 : res += __FJ__PER_CHAR_EXPR( 1, _1 ); | |
} | |
# undef __FJ__PER_CHAR_EXPR | |
return res; | |
} | |
std::uint64_t nixmans_atou64_shift(const char *ptr, std::size_t len) { | |
# define __FJ__PER_CHAR_EXPR(n) ((res << 1) + (res << 3) + (str[len - n] - '0')) | |
const auto *str = reinterpret_cast<const std::uint8_t *>(ptr); | |
std::uint64_t res = 0; | |
switch ( len ) { | |
case 20: res = __FJ__PER_CHAR_EXPR(20); __FJ__FALLTHROUGH; | |
case 19: res = __FJ__PER_CHAR_EXPR(19); __FJ__FALLTHROUGH; | |
case 18: res = __FJ__PER_CHAR_EXPR(18); __FJ__FALLTHROUGH; | |
case 17: res = __FJ__PER_CHAR_EXPR(17); __FJ__FALLTHROUGH; | |
case 16: res = __FJ__PER_CHAR_EXPR(16); __FJ__FALLTHROUGH; | |
case 15: res = __FJ__PER_CHAR_EXPR(15); __FJ__FALLTHROUGH; | |
case 14: res = __FJ__PER_CHAR_EXPR(14); __FJ__FALLTHROUGH; | |
case 13: res = __FJ__PER_CHAR_EXPR(13); __FJ__FALLTHROUGH; | |
case 12: res = __FJ__PER_CHAR_EXPR(12); __FJ__FALLTHROUGH; | |
case 11: res = __FJ__PER_CHAR_EXPR(11); __FJ__FALLTHROUGH; | |
case 10: res = __FJ__PER_CHAR_EXPR(10); __FJ__FALLTHROUGH; | |
case 9 : res = __FJ__PER_CHAR_EXPR( 9); __FJ__FALLTHROUGH; | |
case 8 : res = __FJ__PER_CHAR_EXPR( 8); __FJ__FALLTHROUGH; | |
case 7 : res = __FJ__PER_CHAR_EXPR( 7); __FJ__FALLTHROUGH; | |
case 6 : res = __FJ__PER_CHAR_EXPR( 6); __FJ__FALLTHROUGH; | |
case 5 : res = __FJ__PER_CHAR_EXPR( 5); __FJ__FALLTHROUGH; | |
case 4 : res = __FJ__PER_CHAR_EXPR( 4); __FJ__FALLTHROUGH; | |
case 3 : res = __FJ__PER_CHAR_EXPR( 3); __FJ__FALLTHROUGH; | |
case 2 : res = __FJ__PER_CHAR_EXPR( 2); __FJ__FALLTHROUGH; | |
case 1 : res = __FJ__PER_CHAR_EXPR( 1); | |
} | |
# undef __FJ__PER_CHAR_EXPR | |
return res; | |
} | |
__attribute__((flatten)) int64_t from_chars(const char *str, std::size_t len) { | |
int64_t res; | |
std::from_chars(str, str+len, res); | |
return res; | |
} | |
int64_t FastAtoiBitShift(const char* str, std::size_t) | |
{ | |
int64_t val = 0; | |
while (*str) val = (val << 3) + (val << 1) + (*str++ - '0'); | |
return val; | |
} | |
int64_t FastAtoiMul10(const char* str, std::size_t) | |
{ | |
int64_t val = 0; | |
while (*str) val = val * 10 + (*str++ - '0'); | |
return val; | |
} | |
int64_t FastAtoiCompare(const char* str, std::size_t) | |
{ | |
int64_t val = 0; | |
uint8_t x; | |
while ((x = uint8_t(*str++ - '0')) <= 9) val = val * 10 + x; | |
return val; | |
} | |
uint64_t FastAtoiCompareu(const char* str, std::size_t) | |
{ | |
uint64_t val = 0; | |
uint8_t x; | |
while ((x = uint8_t(*str++ - '0')) <= 9) val = val * 10 + x; | |
return val; | |
} | |
int32_t FastAtoi32(const char* str, std::size_t) | |
{ | |
int32_t val = 0; | |
int sign = 0; | |
if (*str == '-') | |
{ | |
sign = 1; | |
++str; | |
} | |
uint8_t digit; | |
while ((digit = uint8_t(*str++ - '0')) <= 9) val = val * 10 + digit; | |
return sign ? -val : val; | |
} | |
int32_t FastAtoi32u(const char* str, std::size_t) | |
{ | |
int32_t val = 0; | |
uint8_t digit; | |
while ((digit = uint8_t(*str++ - '0')) <= 9) val = val * 10 + digit; | |
return val; | |
} | |
uint32_t FastAtoi32uu(const char* str, std::size_t) | |
{ | |
uint32_t val = 0; | |
uint8_t digit; | |
while ((digit = uint8_t(*str++ - '0')) <= 9) val = val * 10u + digit; | |
return val; | |
} | |
int64_t FastAtoi64(const char* str, std::size_t) | |
{ | |
int64_t val = 0; | |
int sign = 0; | |
if (*str == '-') | |
{ | |
sign = 1; | |
++str; | |
} | |
uint8_t digit; | |
while ((digit = uint8_t(*str++ - '0')) <= 9) val = val * 10 + digit; | |
return sign ? -val : val; | |
} | |
int64_t FastAtoi64u(const char* str, std::size_t) | |
{ | |
int64_t val = 0; | |
uint8_t digit; | |
while ((digit = uint8_t(*str++ - '0')) <= 9) val = val * 10 + digit; | |
return val; | |
} | |
uint64_t FastAtoi64uu(const char* str, std::size_t) | |
{ | |
uint64_t val = 0; | |
uint8_t digit; | |
while ((digit = uint8_t(*str++ - '0')) <= 9) val = val * 10u + digit; | |
return val; | |
} | |
__attribute__((flatten)) int64_t wrapped_atoi(const char *s, std::size_t) { return atoi(s); } | |
__attribute__((flatten)) int64_t wrapped_atoll(const char *s, std::size_t) { return atoll(s); } | |
__attribute__((flatten)) int64_t wrapped_stoll(const char *s, std::size_t) { return std::stoll(s); } | |
__attribute__((flatten)) int64_t wrapped_stoull(const char *s, std::size_t) { return std::stoull(s); } | |
/*************************************************************************************************/ | |
int main() { | |
std::map<std::string, std::vector<int64_t>> funcTimes; | |
std::map<std::string, std::vector<int64_t>> funcTotals; | |
std::map<std::string, int64_t> funcFinals; | |
#define BENCH_ATOI(func) \ | |
do { \ | |
auto start = NowNs(); \ | |
int64_t z = 0; \ | |
char string[] = "000001987"; \ | |
for (int i = 1e7; i >= 0; --i) { \ | |
string[0] = '0' + (i + 0) % 10; \ | |
string[1] = '0' + (i + 1) % 10; \ | |
string[2] = '0' + (i + 3) % 10; \ | |
string[3] = '0' + (i + 5) % 10; \ | |
string[4] = '0' + (i + 9) % 10; \ | |
z += func(string, sizeof(string)-1); \ | |
} \ | |
auto elapsed = NowNs() - start; \ | |
funcTimes[#func].push_back(elapsed); \ | |
funcTotals[#func].push_back(z); \ | |
} \ | |
while (0) | |
constexpr auto repeats = 16u; | |
for ( auto i = 0u; i < repeats; ++i ) { | |
BENCH_ATOI(wrapped_atoi); | |
BENCH_ATOI(wrapped_atoll); | |
BENCH_ATOI(wrapped_stoll); | |
BENCH_ATOI(wrapped_stoull); | |
// | |
BENCH_ATOI(fast_null); | |
BENCH_ATOI(fast_atou); | |
BENCH_ATOI(nixmans_atou64); | |
BENCH_ATOI(nixmans_atou64_shift); | |
BENCH_ATOI(from_chars); | |
BENCH_ATOI(FastAtoiBitShift); | |
BENCH_ATOI(FastAtoiMul10); | |
BENCH_ATOI(FastAtoiCompare); | |
BENCH_ATOI(FastAtoiCompareu); | |
// | |
BENCH_ATOI(FastAtoi32); | |
BENCH_ATOI(FastAtoi32u); | |
BENCH_ATOI(FastAtoi32uu); | |
BENCH_ATOI(FastAtoi64); | |
BENCH_ATOI(FastAtoi64u); | |
BENCH_ATOI(FastAtoi64uu); | |
} | |
for ( auto& [func, times] : funcTimes ) { | |
std::sort(times.begin(), times.end(), [](const auto& a, const auto& b) { return a < b; }); | |
std::printf("%-20s: %" PRId64 "\n", func.c_str(), funcTotals[func][0]); | |
int64_t total = 0; | |
for ( auto i = 4u; i <= 11u; ++i ) | |
total += times[i]; | |
total /= 8; | |
funcFinals[func] = total; | |
} | |
const auto base = funcFinals["wrapped_atoi"]; | |
const auto overhead = funcFinals["fast_null"]; | |
for (const auto& [func, final] : funcFinals) { | |
const char *funcname = func.c_str(); | |
const auto funcfinal = final; | |
const auto funcavg = base * 1.0 / funcfinal; | |
const auto without_overhead = funcfinal - overhead; | |
const auto final_result = (base - overhead) * 1.0 / (final - overhead); | |
std::printf("%-20s: %9" PRId64 " ns => %5.3gx : %9" PRId64 " ns => %5.3gx\n", funcname, funcfinal, funcavg, without_overhead, final_result); | |
} | |
} | |
// g++ -std=c++17 -O3 strtoint.cpp -o strtoint | |
// FastAtoi32 : 86412175 ns => 1.84x : 74167056 ns => 1.98x | |
// FastAtoi32u : 86440254 ns => 1.84x : 74195135 ns => 1.98x | |
// FastAtoi32uu : 83578425 ns => 1.9x : 71333306 ns => 2.06x | |
// FastAtoi64 : 81707263 ns => 1.94x : 69462144 ns => 2.11x | |
// FastAtoi64u : 85603720 ns => 1.86x : 73358601 ns => 2x | |
// FastAtoi64uu : 87482618 ns => 1.82x : 75237499 ns => 1.95x | |
// FastAtoiBitShift : 72057189 ns => 2.2x : 59812070 ns => 2.45x | |
// FastAtoiCompare : 85592740 ns => 1.86x : 73347621 ns => 2x | |
// FastAtoiCompareu : 87515399 ns => 1.82x : 75270280 ns => 1.95x | |
// FastAtoiMul10 : 72196698 ns => 2.2x : 59951579 ns => 2.45x | |
// fast_atou : 67010071 ns => 2.37x : 54764952 ns => 2.68x | |
// fast_null : 12245119 ns => 13x : 0 ns => infx | |
// from_chars : 37875506 ns => 4.19x : 25630387 ns => 5.72x | |
// nixmans_atou64 : 29531256 ns => 5.38x : 17286137 ns => 8.48x | |
// nixmans_atou64_shift: 26193012 ns => 6.07x : 13947893 ns => 10.5x | |
// wrapped_atoi : 158876770 ns => 1x : 146631651 ns => 1x | |
// wrapped_atoll : 160072215 ns => 0.993x : 147827096 ns => 0.992x | |
// wrapped_stoll : 214041391 ns => 0.742x : 201796272 ns => 0.727x | |
// wrapped_stoull : 214207486 ns => 0.742x : 201962367 ns => 0.726x |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment