Skip to content

Instantly share code, notes, and snippets.

@anhnguyen1618
Last active July 2, 2020 19:08
Show Gist options
  • Save anhnguyen1618/82bed12b8599f2f1c44e738982a6b412 to your computer and use it in GitHub Desktop.
Save anhnguyen1618/82bed12b8599f2f1c44e738982a6b412 to your computer and use it in GitHub Desktop.
#include <chrono>
#include <codecvt>
#include <iostream>
#include <locale>
#include <memory>
#include <stdexcept>
#include <string>
static std::string dummy;
// normal
inline std::u16string convertUTF8ToUTF16(const std::string &string) {
return std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>()
.from_bytes(string);
}
inline std::string convertUTF16ToUTF8(const std::u16string &string) {
return std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>()
.to_bytes(string);
}
// Djini https://github.com/dropbox/djinni/blob/master/support-lib/jni/djinni_support.cpp#L534
static inline bool is_high_surrogate(char16_t c) {
return (c >= 0xD800) && (c < 0xDC00);
}
static inline bool is_low_surrogate(char16_t c) {
return (c >= 0xDC00) && (c < 0xE000);
}
struct offset_pt {
int offset;
char32_t pt;
};
static constexpr const offset_pt invalid_pt = {-1, 0};
static offset_pt utf16_decode_check(const char16_t *str,
std::u16string::size_type i) {
if (is_high_surrogate(str[i]) && is_low_surrogate(str[i + 1])) {
// High surrogate followed by low surrogate
char32_t pt = (((str[i] - 0xD800) << 10) | (str[i + 1] - 0xDC00)) + 0x10000;
return {2, pt};
}
if (is_high_surrogate(str[i]) || is_low_surrogate(str[i])) {
// High surrogate *not* followed by low surrogate, or unpaired low surrogate
return invalid_pt;
}
return {1, str[i]};
}
static char32_t utf16_decode(const char16_t *str,
std::u16string::size_type &i) {
offset_pt res = utf16_decode_check(str, i);
if (res.offset < 0) {
i += 1;
return 0xFFFD;
} else {
i += res.offset;
return res.pt;
}
}
static void utf8_encode(char32_t pt, std::string &out) {
if (pt < 0x80) {
out += static_cast<char>(pt);
} else if (pt < 0x800) {
out += {static_cast<char>((pt >> 6) | 0xC0),
static_cast<char>((pt & 0x3F) | 0x80)};
} else if (pt < 0x10000) {
out += {static_cast<char>((pt >> 12) | 0xE0),
static_cast<char>(((pt >> 6) & 0x3F) | 0x80),
static_cast<char>((pt & 0x3F) | 0x80)};
} else if (pt < 0x110000) {
out += {static_cast<char>((pt >> 18) | 0xF0),
static_cast<char>(((pt >> 12) & 0x3F) | 0x80),
static_cast<char>(((pt >> 6) & 0x3F) | 0x80),
static_cast<char>((pt & 0x3F) | 0x80)};
} else {
out += {static_cast<char>(0xEF), static_cast<char>(0xBF),
static_cast<char>(0xBD)}; // U+FFFD
}
}
std::string convert_dropbox(const std::u16string &str) {
std::string out;
out.reserve(str.length() * 3 / 2); // estimate
for (std::u16string::size_type i = 0; i < str.length();)
utf8_encode(utf16_decode(str.data(), i), out);
return out;
}
std::u16string small_str =
u"Lorem Ipsum is simply dummy text of the printing and typesetting "
u"industry. Lorem Ipsum has been the industry's standard dummy text "
u"industry. Lorem Ipsum has been the industry's standard dummy text "
u"industry. Lorem Ipsum has been the industry's standard dummy text "
u"ever ";
std::u16string medium_str =
u"Lorem Ipsum is simply dummy text of the printing and typesetting "
u"industry. Lorem Ipsum has been the industry's standard dummy text "
u"industry. Lorem Ipsum has been the industry's standard dummy text "
u"industry. Lorem Ipsum has been the industry's standard dummy text "
u"industry. Lorem Ipsum has been the industry's standard dummy text "
u"industry. Lorem Ipsum has been the industry's standard dummy text "
u"industry. Lorem Ipsum has been the industry's standard dummy text "
u"industry. Lorem Ipsum has been the industry's standard dummy text "
u"industry. Lorem Ipsum has been the industry's standard dummy text "
u"industry. Lorem Ipsum has been the industry's standard dummy text "
u"industry. Lorem Ipsum has been the industry's standard dummy text "
u"industry. Lorem Ipsum has been the industry's standard dummy text "
u"industry. Lorem Ipsum has been the industry's standard dummy text "
u"industry. Lorem Ipsum has been the industry's standard dummy text "
u"ever ";
std::u16string large_str =
u"Lorem Ipsum is simply dummy text of the printing and typesetting "
u"industry. Lorem Ipsum has been the industry's standard dummy text ever "
u"since the 1500s, when an unknown printer took a galley of type and "
u"scrambled it to make a type specimen book. It has survived not only "
u"five centuries, but also the leap into electronic typesetting, "
u"remaining essentially unchanged. It was popularised in the 1960s with "
u"the release of Letraset sheets containing Lorem Ipsum passages, and "
u"more recently with desktop publishing software like Aldus PageMaker "
u"including versions of Lorem Ipsum.Lorem Ipsum is simply dummy text of "
u"the printing and typesetting industry. Lorem Ipsum has been the "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"printer took a galley of type and scrambled it to make a type specimen "
u"book. It has survived not only five centuries, but also the leap into "
u"electronic typesetting, remaining essentially unchanged. It was "
u"popularised in the 1960s with the release of Letraset sheets "
u"containing Lorem Ipsum passages, and more recently with desktop "
u"publishing software like Aldus PageMaker including versions of Lorem "
u"Ipsum.Lorem Ipsum is simply dummy text of the printing and typesetting "
u"industry. Lorem Ipsum has been the industry's standard dummy text ever "
u"since the 1500s, when an unknown printer took a galley of type and "
u"scrambled it to make a type specimen book. It has survived not only "
u"five centuries, but also the leap into electronic typesetting, "
u"remaining essentially unchanged. It was popularised in the 1960s with "
u"the release of Letraset sheets containing Lorem Ipsum passages, and "
u"more recently with desktop publishing software like Aldus PageMaker "
u"including versions of Lorem Ipsum.Lorem Ipsum is simply dummy text of "
u"the printing and typesetting industry. Lorem Ipsum has been the "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"printer took a galley of type and scrambled it to make a type specimen "
u"book. It has survived not only five centuries, but also the leap into "
u"electronic typesetting, remaining essentially unchanged. It was "
u"popularised in the 1960s with the release of Letraset sheets "
u"containing Lorem Ipsum passages, and more recently with desktop "
u"publishing software like Aldus PageMaker including versions of Lorem "
u"Ipsum.Lorem Ipsum is simply dummy text of the printing and typesetting "
u"industry. Lorem Ipsum has been the industry's standard dummy text ever "
u"since the 1500s, when an unknown printer took a galley of type and "
u"scrambled it to make a type specimen book. It has survived not only "
u"five centuries, but also the leap into electronic typesetting, "
u"remaining essentially unchanged. It was popularised in the 1960s with "
u"the release of Letraset sheets containing Lorem Ipsum passages, and "
u"more recently with desktop publishing software like Aldus PageMaker "
u"including versions of Lorem Ipsum.Lorem Ipsum is simply dummy text of "
u"the printing and typesetting industry. Lorem Ipsum has been the "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"printer took a galley of type and scrambled it to make a type specimen "
u"book. It has survived not only five centuries, but also the leap into "
u"electronic typesetting, remaining essentially unchanged. It was "
u"popularised in the 1960s with the release of Letraset sheets "
u"containing Lorem Ipsum passages, and more recently with desktop "
u"publishing software like Aldus PageMaker including versions of Lorem "
u"Ipsum.Lorem Ipsum is simply dummy text of the printing and typesetting "
u"industry. Lorem Ipsum has been the industry's standard dummy text ever "
u"since the 1500s, when an unknown printer took a galley of type and "
u"scrambled it to make a type specimen book. It has survived not only "
u"five centuries, but also the leap into electronic typesetting, "
u"remaining essentially unchanged. It was popularised in the 1960s with "
u"the release of Letraset sheets containing Lorem Ipsum passages, and "
u"more recently with desktop publishing software like Aldus PageMaker "
u"including versions of Lorem Ipsum.Lorem Ipsum is simply dummy text of "
u"the printing and typesetting industry. Lorem Ipsum has been the "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"printer took a galley of type and scrambled it to make a type specimen "
u"book. It has survived not only five centuries, but also the leap into "
u"electronic typesetting, remaining essentially unchanged. It was "
u"popularised in the 1960s with the release of Letraset sheets "
u"containing Lorem Ipsum passages, and more recently with desktop "
u"publishing software like Aldus PageMaker including versions of Lorem "
u"Ipsum.";
std::u16string super_large_str =
u"Lorem Ipsum is simply dummy text of the printing and typesetting "
u"Lorem Ipsum is simply dummy text of the printing and typesetting "
u"Lorem Ipsum is simply dummy text of the printing and typesetting "
u"Lorem Ipsum is simply dummy text of the printing and typesetting "
u"Lorem Ipsum is simply dummy text of the printing and typesetting "
u"Lorem Ipsum is simply dummy text of the printing and typesetting "
u"Lorem Ipsum is simply dummy text of the printing and typesetting "
u"Lorem Ipsum is simply dummy text of the printing and typesetting "
u"Lorem Ipsum is simply dummy text of the printing and typesetting "
u"Lorem Ipsum is simply dummy text of the printing and typesetting "
u"Lorem Ipsum is simply dummy text of the printing and typesetting "
u"Lorem Ipsum is simply dummy text of the printing and typesetting "
u"Lorem Ipsum is simply dummy text of the printing and typesetting "
u"Lorem Ipsum is simply dummy text of the printing and typesetting "
u"Lorem Ipsum is simply dummy text of the printing and typesetting "
u"Lorem Ipsum is simply dummy text of the printing and typesetting "
u"Lorem Ipsum is simply dummy text of the printing and typesetting "
u"Lorem Ipsum is simply dummy text of the printing and typesetting "
u"Lorem Ipsum is simply dummy text of the printing and typesetting "
u"Lorem Ipsum is simply dummy text of the printing and typesetting "
u"Lorem Ipsum is simply dummy text of the printing and typesetting "
u"Lorem Ipsum is simply dummy text of the printing and typesetting "
u"Lorem Ipsum is simply dummy text of the printing and typesetting "
u"Lorem Ipsum is simply dummy text of the printing and typesetting "
u"Lorem Ipsum is simply dummy text of the printing and typesetting "
u"Lorem Ipsum is simply dummy text of the printing and typesetting "
u"Lorem Ipsum is simply dummy text of the printing and typesetting "
u"Lorem Ipsum is simply dummy text of the printing and typesetting "
u"Lorem Ipsum is simply dummy text of the printing and typesetting "
u"Lorem Ipsum is simply dummy text of the printing and typesetting "
u"Lorem Ipsum is simply dummy text of the printing and typesetting "
u"industry. Lorem Ipsum has been the industry's standard dummy text ever "
u"since the 1500s, when an unknown printer took a galley of type and "
u"scrambled it to make a type specimen book. It has survived not only "
u"five centuries, but also the leap into electronic typesetting, "
u"remaining essentially unchanged. It was popularised in the 1960s with "
u"the release of Letraset sheets containing Lorem Ipsum passages, and "
u"more recently with desktop publishing software like Aldus PageMaker "
u"including versions of Lorem Ipsum.Lorem Ipsum is simply dummy text of "
u"the printing and typesetting industry. Lorem Ipsum has been the "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"printer took a galley of type and scrambled it to make a type specimen "
u"book. It has survived not only five centuries, but also the leap into "
u"electronic typesetting, remaining essentially unchanged. It was "
u"popularised in the 1960s with the release of Letraset sheets "
u"containing Lorem Ipsum passages, and more recently with desktop "
u"publishing software like Aldus PageMaker including versions of Lorem "
u"Ipsum.Lorem Ipsum is simply dummy text of the printing and typesetting "
u"industry. Lorem Ipsum has been the industry's standard dummy text ever "
u"since the 1500s, when an unknown printer took a galley of type and "
u"scrambled it to make a type specimen book. It has survived not only "
u"five centuries, but also the leap into electronic typesetting, "
u"remaining essentially unchanged. It was popularised in the 1960s with "
u"the release of Letraset sheets containing Lorem Ipsum passages, and "
u"more recently with desktop publishing software like Aldus PageMaker "
u"including versions of Lorem Ipsum.Lorem Ipsum is simply dummy text of "
u"the printing and typesetting industry. Lorem Ipsum has been the "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"printer took a galley of type and scrambled it to make a type specimen "
u"book. It has survived not only five centuries, but also the leap into "
u"electronic typesetting, remaining essentially unchanged. It was "
u"popularised in the 1960s with the release of Letraset sheets "
u"containing Lorem Ipsum passages, and more recently with desktop "
u"publishing software like Aldus PageMaker including versions of Lorem "
u"Ipsum.Lorem Ipsum is simply dummy text of the printing and typesetting "
u"industry. Lorem Ipsum has been the industry's standard dummy text ever "
u"since the 1500s, when an unknown printer took a galley of type and "
u"scrambled it to make a type specimen book. It has survived not only "
u"five centuries, but also the leap into electronic typesetting, "
u"remaining essentially unchanged. It was popularised in the 1960s with "
u"the release of Letraset sheets containing Lorem Ipsum passages, and "
u"more recently with desktop publishing software like Aldus PageMaker "
u"including versions of Lorem Ipsum.Lorem Ipsum is simply dummy text of "
u"the printing and typesetting industry. Lorem Ipsum has been the "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"printer took a galley of type and scrambled it to make a type specimen "
u"book. It has survived not only five centuries, but also the leap into "
u"electronic typesetting, remaining essentially unchanged. It was "
u"popularised in the 1960s with the release of Letraset sheets "
u"containing Lorem Ipsum passages, and more recently with desktop "
u"publishing software like Aldus PageMaker including versions of Lorem "
u"Ipsum.Lorem Ipsum is simply dummy text of the printing and typesetting "
u"industry. Lorem Ipsum has been the industry's standard dummy text ever "
u"since the 1500s, when an unknown printer took a galley of type and "
u"scrambled it to make a type specimen book. It has survived not only "
u"five centuries, but also the leap into electronic typesetting, "
u"remaining essentially unchanged. It was popularised in the 1960s with "
u"the release of Letraset sheets containing Lorem Ipsum passages, and "
u"more recently with desktop publishing software like Aldus PageMaker "
u"including versions of Lorem Ipsum.Lorem Ipsum is simply dummy text of "
u"the printing and typesetting industry. Lorem Ipsum has been the "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"industry's standard dummy text ever since the 1500s, when an unknown "
u"printer took a galley of type and scrambled it to make a type specimen "
u"book. It has survived not only five centuries, but also the leap into "
u"electronic typesetting, remaining essentially unchanged. It was "
u"popularised in the 1960s with the release of Letraset sheets "
u"containing Lorem Ipsum passages, and more recently with desktop "
u"publishing software like Aldus PageMaker including versions of Lorem "
u"Ipsum.";
int main() {
int num_loop = 1000000;
// benchmark djini
auto started = std::chrono::high_resolution_clock::now();
for (int i = 0; i < num_loop; ++i) {
convert_dropbox(small_str);
}
auto done = std::chrono::high_resolution_clock::now();
auto duration =
std::chrono::duration_cast<std::chrono::nanoseconds>(done - started)
.count() /
num_loop;
std::cout << "[Benchmark djini small string] time elapsed: " << duration
<< " ns" << std::endl;
// Benchmark bindgen
started = std::chrono::high_resolution_clock::now();
for (int i = 0; i < num_loop; ++i) {
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>()
.to_bytes(small_str);
}
done = std::chrono::high_resolution_clock::now();
duration =
std::chrono::duration_cast<std::chrono::nanoseconds>(done - started)
.count() /
num_loop;
// std::cout << dummy << std::endl;
std::cout << "[Benchmark bindgen small string] time elapsed: " << duration
<< " ns" << std::endl;
started = std::chrono::high_resolution_clock::now();
for (int i = 0; i < num_loop; ++i) {
convert_dropbox(medium_str);
}
done = std::chrono::high_resolution_clock::now();
duration =
std::chrono::duration_cast<std::chrono::nanoseconds>(done - started)
.count() /
num_loop;
// std::cout << dummy << std::endl;
std::cout << "[Benchmark djini medium string] time elapsed: " << duration
<< " ns" << std::endl;
started = std::chrono::high_resolution_clock::now();
for (int i = 0; i < num_loop; ++i) {
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>()
.to_bytes(medium_str);
}
done = std::chrono::high_resolution_clock::now();
duration =
std::chrono::duration_cast<std::chrono::nanoseconds>(done - started)
.count() /
num_loop;
// std::cout << dummy << std::endl;
std::cout << "[Benchmark bindgen medium string] time elapsed: " << duration
<< " ns" << std::endl;
started = std::chrono::high_resolution_clock::now();
for (int i = 0; i < num_loop; ++i) {
convert_dropbox(large_str);
}
done = std::chrono::high_resolution_clock::now();
duration =
std::chrono::duration_cast<std::chrono::nanoseconds>(done - started)
.count() /
num_loop;
// std::cout << dummy << std::endl;
std::cout << "[Benchmark djini large string] time elapsed: " << duration
<< " ns" << std::endl;
started = std::chrono::high_resolution_clock::now();
for (int i = 0; i < num_loop; ++i) {
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>()
.to_bytes(large_str);
}
done = std::chrono::high_resolution_clock::now();
duration =
std::chrono::duration_cast<std::chrono::nanoseconds>(done - started)
.count() /
num_loop;
// std::cout << dummy << std::endl;
std::cout << "[Benchmark bindgen large string] time elapsed: " << duration
<< " ns" << std::endl;
started = std::chrono::high_resolution_clock::now();
for (int i = 0; i < num_loop; ++i) {
convert_dropbox(super_large_str);
}
done = std::chrono::high_resolution_clock::now();
duration =
std::chrono::duration_cast<std::chrono::nanoseconds>(done - started)
.count() /
num_loop;
// std::cout << dummy << std::endl;
std::cout << "[Benchmark djini super large string] time elapsed: " << duration
<< " ns" << std::endl;
started = std::chrono::high_resolution_clock::now();
for (int i = 0; i < num_loop; ++i) {
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>()
.to_bytes(super_large_str);
}
done = std::chrono::high_resolution_clock::now();
duration =
std::chrono::duration_cast<std::chrono::nanoseconds>(done - started)
.count() /
num_loop;
// std::cout << dummy << std::endl;
std::cout << "[Benchmark bindgen super large string] time elapsed: "
<< duration << " ns" << std::endl;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment