Skip to content

Instantly share code, notes, and snippets.

@qis
Last active April 5, 2022 05:50
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save qis/58fd0e0326909a89a8622a338420c3c2 to your computer and use it in GitHub Desktop.
Save qis/58fd0e0326909a89a8622a338420c3c2 to your computer and use it in GitHub Desktop.
Parse ISO 8601 date time strings in C++ and AVX2.
// if(MSVC)
// set(AVX2_FLAGS "/arch:AVX2")
// else()
// set(AVX2_FLAGS "-march=native -mavx2")
// endif()
//
// option(ENABLE_AVX2 "Enable AVX2 support" OFF)
// if(ENABLE_AVX2)
// if(CMAKE_CROSSCOMPILING)
// set(AVX2 TRUE)
// else()
// include(CheckCXXSourceRuns)
// set(CMAKE_REQUIRED_FLAGS "${AVX2_FLAGS}")
// check_cxx_source_runs("
// #include <immintrin.h>
// int main() {
// const auto src = _mm256_set_epi64x(
// 0x0002000200020002ULL, 0x0002000200020002ULL,
// 0x0002000200020002ULL, 0x0002000200020002ULL);
// const auto sub = _mm256_set_epi64x(
// 0x0001000100010001ULL, 0x0001000100010001ULL,
// 0x0001000100010001ULL, 0x0001000100010001ULL);
// const auto dst = _mm256_sub_epi16(src, sub);
// alignas(32) short str[16];
// _mm256_store_si256(reinterpret_cast<__m256i*>(str), dst);
// for (auto c : str) {
// if (c != 1) {
// return -1;
// }
// }
// }" AVX2)
// endif()
// if(AVX2)
// set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${AVX2_FLAGS}")
// add_definitions(-DICE_AVX2=1)
// endif()
// endif()
#pragma once
#include <date/date.h>
#include <array>
#include <string_view>
#include <system_error>
#include <cstddef>
#include <cstdint>
#if defined(ICE_AVX2) || defined(__INTELLISENSE__)
#include <immintrin.h>
#include <emmintrin.h>
#endif
namespace ice {
// clang-format off
constexpr bool is_digit(char c) noexcept {
return c >= '0' && c <= '9';
}
template <typename T, std::size_t N>
constexpr T parse_unsigned_unchecked(const char* s) noexcept {
constexpr T zero = '0';
T result = 0;
for (std::size_t i = 0; i < N; i++) {
result = static_cast<T>((result << 1) + (result << 3) + static_cast<T>(s[i]) - zero);
}
return result;
}
template <typename T, std::size_t N>
inline T parse_unsigned(const char* s) {
constexpr T zero = '0';
T result = 0;
for (std::size_t i = 0; i < N; i++) {
if (!is_digit(s[i])) {
throw std::system_error(std::make_error_code(std::errc::invalid_argument));
}
result = static_cast<T>((result << 1) + (result << 3) + static_cast<T>(s[i]) - zero);
}
return result;
}
// 0 ns
template <std::size_t N>
constexpr date::sys_time<std::chrono::milliseconds> parse_date_unchecked(const char (&s)[N]) noexcept {
static_assert(N > 23u, "date string must be at least 23 characters");
const auto yr = date::year{ parse_unsigned_unchecked<int, 4>(s) };
const auto mo = date::month{ parse_unsigned_unchecked<unsigned, 2>(s + 5) };
const auto dy = date::day{ parse_unsigned_unchecked<unsigned, 2>(s + 8) };
const auto hr = std::chrono::hours{ parse_unsigned_unchecked<int, 2>(s + 11) };
const auto mi = std::chrono::minutes{ parse_unsigned_unchecked<int, 2>(s + 14) };
const auto sc = std::chrono::seconds{ parse_unsigned_unchecked<long long, 2>(s + 17) };
const auto ms = std::chrono::milliseconds{ parse_unsigned_unchecked<long long, 3>(s + 20) };
return date::sys_days{ yr / mo / dy } + hr + mi + sc + ms;
}
// 12 ns
constexpr date::sys_time<std::chrono::milliseconds> parse_date_unchecked(std::string_view sv) noexcept {
const auto s = sv.data();
const auto yr = date::year{ parse_unsigned_unchecked<int, 4>(s) };
const auto mo = date::month{ parse_unsigned_unchecked<unsigned, 2>(s + 5) };
const auto dy = date::day{ parse_unsigned_unchecked<unsigned, 2>(s + 8) };
const auto hr = std::chrono::hours{ parse_unsigned_unchecked<int, 2>(s + 11) };
const auto mi = std::chrono::minutes{ parse_unsigned_unchecked<int, 2>(s + 14) };
const auto sc = std::chrono::seconds{ parse_unsigned_unchecked<long long, 2>(s + 17) };
const auto ms = std::chrono::milliseconds{ parse_unsigned_unchecked<long long, 3>(s + 20) };
return date::sys_days{ yr / mo / dy } + hr + mi + sc + ms;
}
// 22 ns
inline date::sys_time<std::chrono::milliseconds> parse_date(std::string_view sv, std::error_code& ec) noexcept {
ec.clear();
if (sv.size() < 23u) {
ec = std::make_error_code(std::errc::message_size);
return {};
}
if (!is_digit(sv[0]) || !is_digit(sv[1]) || !is_digit(sv[2]) || !is_digit(sv[3]) || sv[4] != '-' ||
!is_digit(sv[5]) || !is_digit(sv[6]) || sv[7] != '-' ||
!is_digit(sv[8]) || !is_digit(sv[8]) || sv[10] != ' ' ||
!is_digit(sv[11]) || !is_digit(sv[12]) || sv[13] != ':' ||
!is_digit(sv[14]) || !is_digit(sv[15]) || sv[16] != ':' ||
!is_digit(sv[17]) || !is_digit(sv[18]) || sv[19] != '.' ||
!is_digit(sv[20]) || !is_digit(sv[21]) || !is_digit(sv[22])) {
ec = std::make_error_code(std::errc::invalid_argument);
return {};
}
return parse_date_unchecked(sv.data());
}
// 20 ns
inline date::sys_time<std::chrono::milliseconds> parse_date(std::string_view sv) {
if (sv.size() < 23u) {
throw std::system_error(std::make_error_code(std::errc::message_size));
}
if (sv[4] != '-' || sv[7] != '-' || sv[10] != ' ' || sv[13] != ':' || sv[16] != ':' || sv[19] != '.') {
throw std::system_error(std::make_error_code(std::errc::invalid_argument));
}
const auto yr = date::year{ parse_unsigned<int, 4>(sv.data()) };
const auto mo = date::month{ parse_unsigned<unsigned, 2>(sv.data() + 5) };
const auto dy = date::day{ parse_unsigned<unsigned, 2>(sv.data() + 8) };
const auto hr = std::chrono::hours{ parse_unsigned<int, 2>(sv.data() + 11) };
const auto mi = std::chrono::minutes{ parse_unsigned<int, 2>(sv.data() + 14) };
const auto sc = std::chrono::seconds{ parse_unsigned<long long, 2>(sv.data() + 17) };
const auto ms = std::chrono::milliseconds{ parse_unsigned<long long, 3>(sv.data() + 20) };
return date::sys_days{ yr / mo / dy } + hr + mi + sc + ms;
}
#if defined(ICE_AVX2) || defined(__INTELLISENSE__)
// 29 ns
inline date::sys_time<std::chrono::milliseconds> parse_date_avx2_unchecked(std::string_view sv) noexcept {
auto src = _mm256_set_epi16(
sv[21], sv[20], sv[18], sv[17], sv[15], sv[14], sv[12], sv[11],
sv[9], sv[8], sv[6], sv[5], sv[3], sv[2], sv[1], sv[0]);
// 16 x '0'
auto tmp = _mm256_set_epi64x(
0x0030003000300030ULL, 0x0030003000300030ULL,
0x0030003000300030ULL, 0x0030003000300030ULL);
src = _mm256_subs_epi16(src, tmp);
// 10, 100, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 100, 1000
tmp = _mm256_set_epi64x(
0x000A00640001000AULL, 0x0001000A0001000AULL,
0x0001000A0001000AULL, 0x0001000A006403E8ULL);
src = _mm256_mullo_epi16(src, tmp);
src = _mm256_hadd_epi16(src, src);
alignas(32) uint16_t dst[16];
_mm256_store_si256(reinterpret_cast<__m256i*>(dst), src);
const auto yr = date::year{ static_cast<int>(dst[0] + dst[1]) };
const auto mo = date::month{ static_cast<unsigned>(dst[2]) };
const auto dy = date::day{ static_cast<unsigned>(dst[3]) };
const auto hr = std::chrono::hours{ static_cast<int>(dst[8]) };
const auto mi = std::chrono::minutes{ static_cast<int>(dst[9]) };
const auto sc = std::chrono::seconds{ static_cast<long long>(dst[10]) };
const auto ms = std::chrono::milliseconds{
static_cast<long long>(dst[11] + static_cast<uint16_t>(sv[22] - '0'))
};
return date::sys_days{ yr / mo / dy } + hr + mi + sc + ms;
}
// 27 ns
inline date::sys_time<std::chrono::milliseconds> parse_date_avx2(std::string_view sv, std::error_code& ec) noexcept {
ec.clear();
if (sv.size() < 23u) {
ec = std::make_error_code(std::errc::message_size);
return {};
}
if (sv[4] != '-' ||
sv[7] != '-' ||
sv[10] != ' ' ||
sv[13] != ':' ||
sv[16] != ':' ||
sv[19] != '.' || !is_digit(sv[22])) {
ec = std::make_error_code(std::errc::invalid_argument);
return {};
}
auto src = _mm256_set_epi16(
sv[21], sv[20], sv[18], sv[17], sv[15], sv[14], sv[12], sv[11],
sv[9], sv[8], sv[6], sv[5], sv[3], sv[2], sv[1], sv[0]);
// 16 x '9'
auto tmp = _mm256_set_epi64x(
0x0039003900390039ULL, 0x0039003900390039ULL,
0x0039003900390039ULL, 0x0039003900390039ULL);
if (_mm256_movemask_epi8(_mm256_cmpgt_epi16(src, tmp))) {
ec = std::make_error_code(std::errc::invalid_argument);
return {};
}
// 16 x '0'
tmp = _mm256_set_epi64x(
0x0030003000300030ULL, 0x0030003000300030ULL,
0x0030003000300030ULL, 0x0030003000300030ULL);
if (_mm256_movemask_epi8(_mm256_cmpgt_epi16(tmp, src))) {
ec = std::make_error_code(std::errc::invalid_argument);
return {};
}
src = _mm256_subs_epi16(src, tmp);
// 10, 100, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 100, 1000
tmp = _mm256_set_epi64x(
0x000A00640001000AULL, 0x0001000A0001000AULL,
0x0001000A0001000AULL, 0x0001000A006403E8ULL);
src = _mm256_mullo_epi16(src, tmp);
src = _mm256_hadd_epi16(src, src);
alignas(32) uint16_t dst[16];
_mm256_store_si256(reinterpret_cast<__m256i*>(dst), src);
const auto yr = date::year{ static_cast<int>(dst[0] + dst[1]) };
const auto mo = date::month{ static_cast<unsigned>(dst[2]) };
const auto dy = date::day{ static_cast<unsigned>(dst[3]) };
const auto hr = std::chrono::hours{ static_cast<int>(dst[8]) };
const auto mi = std::chrono::minutes{ static_cast<int>(dst[9]) };
const auto sc = std::chrono::seconds{ static_cast<long long>(dst[10]) };
const auto ms = std::chrono::milliseconds{
static_cast<long long>(dst[11] + static_cast<uint16_t>(sv[22] - '0'))
};
return date::sys_days{ yr / mo / dy } + hr + mi + sc + ms;
}
// 32 ns
inline date::sys_time<std::chrono::milliseconds> parse_date_avx2(std::string_view sv) {
if (sv.size() < 23u) {
throw std::system_error(std::make_error_code(std::errc::message_size));
}
if (sv[4] != '-' ||
sv[7] != '-' ||
sv[10] != ' ' ||
sv[13] != ':' ||
sv[16] != ':' ||
sv[19] != '.' || !is_digit(sv[22])) {
throw std::system_error(std::make_error_code(std::errc::invalid_argument));
}
auto src = _mm256_set_epi16(
sv[21], sv[20], sv[18], sv[17], sv[15], sv[14], sv[12], sv[11],
sv[9], sv[8], sv[6], sv[5], sv[3], sv[2], sv[1], sv[0]);
// 16 x '9'
auto tmp = _mm256_set_epi64x(
0x0039003900390039ULL, 0x0039003900390039ULL,
0x0039003900390039ULL, 0x0039003900390039ULL);
if (_mm256_movemask_epi8(_mm256_cmpgt_epi16(src, tmp))) {
throw std::system_error(std::make_error_code(std::errc::invalid_argument));
}
// 16 x '0'
tmp = _mm256_set_epi64x(
0x0030003000300030ULL, 0x0030003000300030ULL,
0x0030003000300030ULL, 0x0030003000300030ULL);
if (_mm256_movemask_epi8(_mm256_cmpgt_epi16(tmp, src))) {
throw std::system_error(std::make_error_code(std::errc::invalid_argument));
}
src = _mm256_subs_epi16(src, tmp);
// 10, 100, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 100, 1000
tmp = _mm256_set_epi64x(
0x000A00640001000AULL, 0x0001000A0001000AULL,
0x0001000A0001000AULL, 0x0001000A006403E8ULL);
src = _mm256_mullo_epi16(src, tmp);
src = _mm256_hadd_epi16(src, src);
alignas(32) uint16_t dst[16];
_mm256_store_si256(reinterpret_cast<__m256i*>(dst), src);
const auto yr = date::year{ static_cast<int>(dst[0] + dst[1]) };
const auto mo = date::month{ static_cast<unsigned>(dst[2]) };
const auto dy = date::day{ static_cast<unsigned>(dst[3]) };
const auto hr = std::chrono::hours{ static_cast<int>(dst[8]) };
const auto mi = std::chrono::minutes{ static_cast<int>(dst[9]) };
const auto sc = std::chrono::seconds{ static_cast<long long>(dst[10]) };
const auto ms = std::chrono::milliseconds{
static_cast<long long>(dst[11] + static_cast<uint16_t>(sv[22] - '0'))
};
return date::sys_days{ yr / mo / dy } + hr + mi + sc + ms;
}
#endif
// clang-format on
} // namespace ice
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment