|
#include <iostream> |
|
#include <sstream> |
|
#include <iomanip> |
|
#include <string_view> |
|
#include <charconv> |
|
#include <ctime> |
|
#include <cstring> |
|
|
|
#include <benchmark/benchmark.h> |
|
|
|
#include <fmt/ostream.h> |
|
#include <fmt/compile.h> |
|
|
|
#include <immintrin.h> |
|
|
|
namespace fast { |
|
// write in the format 'aa bb cc' |
|
uint64_t writeTwoDecimalsWithSeparatorImpl(uint32_t a, uint32_t b, uint32_t c) |
|
{ |
|
// send values to destination bytes |
|
uint64_t digits = a | (b << 24) | (uint64_t(c) << 48); |
|
// convert each value to BCD |
|
/* we have x = a*10 + b and we want to convert it to BCD: |
|
y = a*16 + b |
|
the difference is |
|
y - x = a * 6 |
|
a can be found from x: |
|
a = floor(x / 10) |
|
then |
|
y = x + a * 6 = x + floor(x / 10) * 6 |
|
floor(x / 10) is (x * 205) >> 11 (needs 16 bits) */ |
|
digits += (((digits * 205) >> 11) & 0x000f0000'0f00000f) * 6; |
|
// put low hex digit to high byte and high hex digit to low byte |
|
return ((digits & 0x00f00000'f00000f0) >> 4) | ((digits & 0x000f0000'0f00000f) << 8); |
|
} |
|
|
|
template<char separator> |
|
void writeTwoDecimalsWithSeparator(char(&s)[8], uint32_t a, uint32_t b, uint32_t c) |
|
{ |
|
uint64_t digits = writeTwoDecimalsWithSeparatorImpl(a, b, c); |
|
// add value of '0' ASCII char to each byte |
|
digits |= 0x30300030'30003030 | (uint64_t(separator) << 16) | (uint64_t(separator) << 40); |
|
memcpy(s, &digits, 8); |
|
} |
|
|
|
void formatISO8601(char *s, uint32_t year, uint32_t month, uint32_t day, uint32_t hours, uint32_t minutes, uint32_t seconds) { |
|
const auto yearHundreds = year / 100; |
|
const auto yearTensAndUnits = year % 100; |
|
|
|
s[0] = static_cast<char>('0' + yearHundreds / 10); |
|
s[1] = static_cast<char>('0' + yearHundreds % 10); |
|
|
|
char bufD[8] = {}; |
|
writeTwoDecimalsWithSeparator<'-'>(bufD, yearTensAndUnits, month, day); |
|
memcpy(s + 2, bufD, 8); |
|
s[10] = 'T'; |
|
char bufT[8] = {}; |
|
writeTwoDecimalsWithSeparator<':'>(bufT, hours, minutes, seconds); |
|
memcpy(s + 11, bufT, 8); |
|
s[19] = 'Z'; |
|
} |
|
}//namespace fast |
|
|
|
namespace simd { |
|
// write in the format 'aa bb cc' |
|
auto writeTwoDecimalsWithSeparatorImpl(uint32_t a, uint32_t b, uint32_t c, uint32_t d, uint32_t e, uint32_t f) |
|
{ |
|
// send values to destination bytes |
|
auto digits = _mm_set_epi64x(d | (e << 24) | (uint64_t(f) << 48), a | (b << 24) | (uint64_t(c) << 48)); |
|
// convert each value to BCD |
|
/* we have x = a*10 + b and we want to convert it to BCD: |
|
y = a*16 + b |
|
the difference is |
|
y - x = a * 6 |
|
a can be found from x: |
|
a = floor(x / 10) |
|
then |
|
y = x + a * 6 = x + floor(x / 10) * 6 |
|
floor(x / 10) is (x * 205) >> 11 (needs 16 bits) */ |
|
const auto diff = _mm_srli_epi64(_mm_mullo_epi64(digits, _mm_set1_epi64x(205)), 11); |
|
const auto loDigitMask = _mm_set1_epi64x(0x000f0000'0f00000f); |
|
digits = _mm_add_epi64(digits, _mm_mullo_epi64(_mm_and_si128(diff, loDigitMask), _mm_set1_epi64x(6))); |
|
const auto loBytes = _mm_srli_epi64(_mm_and_si128(digits, _mm_set1_epi64x(0x00f00000'f00000f0)), 4); |
|
const auto hiBytes = _mm_slli_epi64(_mm_and_si128(digits, loDigitMask), 8); |
|
const auto asciiMaskAndSeparators = _mm_set_epi64x( |
|
0x30300030'30003030 | (uint64_t(':') << 16) | (uint64_t(':') << 40), |
|
0x30300030'30003030 | (uint64_t('-') << 16) | (uint64_t('-') << 40)); |
|
return _mm_or_si128(_mm_or_si128(loBytes, hiBytes), asciiMaskAndSeparators); |
|
} |
|
|
|
void formatISO8601(char *s, uint32_t year, uint32_t month, uint32_t day, uint32_t hours, uint32_t minutes, uint32_t seconds) { |
|
const auto yearHundreds = year / 100; |
|
const auto yearTensAndUnits = year % 100; |
|
|
|
s[0] = static_cast<char>('0' + yearHundreds / 10); |
|
s[1] = static_cast<char>('0' + yearHundreds % 10); |
|
|
|
const auto result = writeTwoDecimalsWithSeparatorImpl(yearTensAndUnits, month, day, hours, minutes, seconds); |
|
_mm_storeu_si128(reinterpret_cast<__m128i *>(s + 2), result); |
|
std::memmove(s + 11, s + 10, 8); |
|
s[10] = 'T'; |
|
s[19] = 'Z'; |
|
} |
|
}//namespace simd |
|
|
|
namespace manual { |
|
void formatISO8601(char *s, uint32_t year, uint32_t month, uint32_t day, uint32_t hours, uint32_t minutes, uint32_t seconds) { |
|
auto write2Decimals = [](char *s, uint32_t n) { |
|
s[0] = static_cast<char>('0' + n / 10); |
|
s[1] = static_cast<char>('0' + n % 10); |
|
}; |
|
|
|
//0123-56-89T12:45:78Z |
|
// 0 0 1111111111 |
|
//01234567890123456789 |
|
const size_t yearLength = uint32_t(year != 0) + uint32_t(year >= 10) + uint32_t(year >= 100) + uint32_t(year >= 1000); |
|
memset(s, '0', 4 - yearLength); |
|
std::to_chars(s + 4 - yearLength, s + 4, year); |
|
s[4] = '-'; |
|
write2Decimals(s + 5, month); |
|
s[7] = '-'; |
|
write2Decimals(s + 8, day); |
|
s[10] = 'T'; |
|
write2Decimals(s + 11, hours); |
|
s[13] = ':'; |
|
write2Decimals(s + 14, minutes); |
|
s[16] = ':'; |
|
write2Decimals(s + 17, seconds); |
|
s[19] = 'Z'; |
|
} |
|
}//namespace manual |
|
|
|
static void iso8601_stream(benchmark::State &state) { |
|
srand(0); |
|
const uint32_t |
|
a = rand() % 10000, |
|
b = rand() % 100, |
|
c = rand() % 100, |
|
d = rand() % 100, |
|
e = rand() % 100, |
|
f = rand() % 100; |
|
for (auto _ : state) { |
|
std::ostringstream s; |
|
using std::setw; |
|
|
|
const auto fillChar = s.fill('0'); |
|
s << a << '-' << setw(2) << b << '-' << setw(2) << c << 'T' |
|
<< setw(2) << d << ':' << setw(2) << e << ':' << setw(2) << f << 'Z'; |
|
s.fill(fillChar); |
|
benchmark::DoNotOptimize(s); |
|
} |
|
} |
|
BENCHMARK(iso8601_stream); |
|
|
|
static void iso8601_stream_fmt(benchmark::State &state) { |
|
srand(0); |
|
const uint32_t |
|
a = rand() % 10000, |
|
b = rand() % 100, |
|
c = rand() % 100, |
|
d = rand() % 100, |
|
e = rand() % 100, |
|
f = rand() % 100; |
|
for (auto _ : state) { |
|
std::ostringstream s; |
|
fmt::print(s, "{}-{:0>2}-{:0>2}T{:0>2}:{:0>2}:{:0>2}Z", a, b, c, d, e, f); |
|
benchmark::DoNotOptimize(s); |
|
} |
|
} |
|
BENCHMARK(iso8601_stream_fmt); |
|
|
|
|
|
static void iso8601_fmt(benchmark::State &state) { |
|
srand(0); |
|
const uint32_t |
|
a = rand() % 10000, |
|
b = rand() % 100, |
|
c = rand() % 100, |
|
d = rand() % 100, |
|
e = rand() % 100, |
|
f = rand() % 100; |
|
for (auto _ : state) { |
|
char buf[20] = {}; |
|
fmt::format_to(buf, "{}-{:0>2}-{:0>2}T{:0>2}:{:0>2}:{:0>2}Z", a, b, c, d, e, f); |
|
benchmark::DoNotOptimize(buf); |
|
} |
|
} |
|
BENCHMARK(iso8601_fmt); |
|
|
|
static void iso8601_strftime(benchmark::State &state) { |
|
srand(0); |
|
tm t; |
|
t.tm_year = rand() % 10000 - 1900; |
|
t.tm_mon = rand() % 12 + 1; |
|
t.tm_mday = rand() % 31; |
|
t.tm_hour = rand() % 24; |
|
t.tm_min = rand() % 60; |
|
t.tm_sec = rand() % 60; |
|
for (auto _ : state) { |
|
char buf[21] = {}; |
|
std::strftime(buf, 21, "%FT%TZ", &t); |
|
benchmark::DoNotOptimize(buf); |
|
} |
|
} |
|
BENCHMARK(iso8601_strftime); |
|
|
|
static void iso8601_fmt2(benchmark::State &state) { |
|
srand(0); |
|
const uint32_t |
|
a = rand() % 10000, |
|
b = rand() % 100, |
|
c = rand() % 100, |
|
d = rand() % 100, |
|
e = rand() % 100, |
|
f = rand() % 100; |
|
for (auto _ : state) { |
|
char buf[20] = {}; |
|
fmt::format_to(buf, FMT_COMPILE("{}-{:0>2}-{:0>2}T{:0>2}:{:0>2}:{:0>2}Z"), a, b, c, d, e, f); |
|
benchmark::DoNotOptimize(buf); |
|
} |
|
} |
|
BENCHMARK(iso8601_fmt2); |
|
|
|
static void iso8601_manual(benchmark::State &state) { |
|
srand(0); |
|
const uint32_t |
|
a = rand() % 10000, |
|
b = rand() % 100, |
|
c = rand() % 100, |
|
d = rand() % 100, |
|
e = rand() % 100, |
|
f = rand() % 100; |
|
|
|
auto write2Decimals = [](char *s, uint32_t n) { |
|
s[0] = static_cast<char>('0' + n / 10); |
|
s[1] = static_cast<char>('0' + n % 10); |
|
}; |
|
|
|
for (auto _ : state) { |
|
char res[20] = {}; |
|
manual::formatISO8601(res, a, b, c, d, e, f); |
|
benchmark::DoNotOptimize(res); |
|
} |
|
} |
|
BENCHMARK(iso8601_manual); |
|
|
|
static void iso8601_fast(benchmark::State &state) { |
|
srand(0); |
|
const uint32_t |
|
a = rand() % 10000, |
|
b = rand() % 100, |
|
c = rand() % 100, |
|
d = rand() % 100, |
|
e = rand() % 100, |
|
f = rand() % 100; |
|
for (auto _ : state) { |
|
char res[20] = {}; |
|
fast::formatISO8601(res, a, b, c, d, e, f); |
|
benchmark::DoNotOptimize(res); |
|
} |
|
} |
|
BENCHMARK(iso8601_fast); |
|
|
|
|
|
//int main(int argc, char **argv) { |
|
// tm t; |
|
// t.tm_year = 2020 - 1900; |
|
// t.tm_mon = 11; |
|
// t.tm_mday = 22; |
|
// t.tm_hour = 0; |
|
// t.tm_min = 33; |
|
// t.tm_sec = 55; |
|
// |
|
// char buf[21] = {}; |
|
// std::strftime(buf, 21, "%FT%TZ", &t); |
|
// std::cout << std::string_view{ buf, 20 } << '\n'; |
|
// |
|
// manual::formatISO8601(buf, t.tm_year + 1900, t.tm_mon + 1, t.tm_mday, t.tm_hour, t.tm_min, t.tm_sec); |
|
// std::cout << std::string_view{ buf, 20 } << '\n'; |
|
// |
|
// fast::formatISO8601(buf, t.tm_year + 1900, t.tm_mon + 1, t.tm_mday, t.tm_hour, t.tm_min, t.tm_sec); |
|
// std::cout << std::string_view{ buf, 20 } << '\n'; |
|
// |
|
// //simd::formatISO8601(buf, t.tm_year + 1900, t.tm_mon + 1, t.tm_mday, t.tm_hour, t.tm_min, t.tm_sec); |
|
// //std::cout << std::string_view{ buf, 20 } << '\n'; |
|
//} |