Skip to content

Instantly share code, notes, and snippets.

@ttsugriy
Created September 3, 2023 04:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ttsugriy/be4597c93f0c3b89cc91708cae2e7111 to your computer and use it in GitHub Desktop.
Save ttsugriy/be4597c93f0c3b89cc91708cae2e7111 to your computer and use it in GitHub Desktop.
utf8 length from utf32 benchmark
#include <cstddef>
#include <cstdint>
#include <cuchar>
size_t utf8_length_from_utf32(const char32_t* buf, size_t len) {
// We are not BOM aware.
const uint32_t* p = reinterpret_cast<const uint32_t*>(buf);
size_t counter{0};
for (size_t i = 0; i < len; i++) {
/** ASCII **/
if (p[i] <= 0x7F) {
counter++;
}
/** two-byte **/
else if (p[i] <= 0x7FF) {
counter += 2;
}
/** three-byte **/
else if (p[i] <= 0xFFFF) {
counter += 3;
}
/** four-bytes **/
else {
counter += 4;
}
}
return counter;
}
size_t utf8_length_from_utf32v(const char32_t* buf, size_t len) {
// We are not BOM aware.
const uint32_t* p = reinterpret_cast<const uint32_t*>(buf);
size_t counter{0};
for (size_t i = 0; i < len; i++) {
++counter; /** ASCII **/
counter += static_cast<size_t>(p[i] > 0x7F); /** two-byte **/
counter += static_cast<size_t>(p[i] > 0x7FF); /** three-byte **/
counter += static_cast<size_t>(p[i] > 0xFFFF); /** four-bytes **/
}
return counter;
}
const char32_t text[] = U"eckwd4c7cu47r2wfeckwd4c7cu47r2wfeckwd4c7cu47r2wfeckwd4c7cu47r2wfeckwd4c7cu47r2wfeckwd4c7cu47r2wfeckwd4c7cu47r2wfeckwd4c7cu47r2wf";
// const char32_t text[] = U"MajiでKoiする5秒前MajiでKoiする5秒前MajiでKoiする5秒前MajiでKoiする5秒前MajiでKoiする5秒前MajiでKoiする5秒前MajiでKoiする5秒前MajiでKoiする5秒前MajiでKoiする5秒前";
static void BH_length(benchmark::State& state) {
const auto text = (const char32_t *)state.range(0);
for (auto _ : state) {
benchmark::DoNotOptimize(utf8_length_from_utf32(text, 129));
}
}
BENCHMARK(BH_length)->Arg((int64_t)text);
static void BH_lengthVec(benchmark::State& state) {
const auto text = (const char32_t *)state.range(0);
for (auto _ : state) {
benchmark::DoNotOptimize(utf8_length_from_utf32v(text, 129));
}
}
BENCHMARK(BH_lengthVec)->Arg((int64_t)text);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment