Last active
April 16, 2024 15:00
-
-
Save texus/8d867996e7a073e1498e8c18d920086c to your computer and use it in GitHub Desktop.
String to lowercase at compile time with with c++14
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <string> | |
#include <utility> | |
// Helper function that converts a character to lowercase on compile time | |
constexpr char charToLower(const char c) { | |
return (c >= 'A' && c <= 'Z') ? c + ('a' - 'A') : c; | |
} | |
// Our compile time string class that is used to pass around the converted string | |
template <std::size_t N> | |
class const_str { | |
private: | |
const char s[N+1]; // One extra byte to fill with a 0 value | |
public: | |
// Constructor that is given the char array and an integer sequence to use parameter pack expansion on the array | |
template <typename T, T... Nums> | |
constexpr const_str(const char (&str)[N], std::integer_sequence<T, Nums...>) | |
: s{charToLower(str[Nums])..., 0} { | |
} | |
// Compile time access operator to the characters | |
constexpr char operator[] (std::size_t i) const { | |
return s[i]; | |
} | |
// Get a pointer to the array at runtime. Even though this happens at runtime, this is a fast operation and much faster than the actual conversion | |
operator const char*() const { | |
return s; | |
} | |
}; | |
// The code that we are actually going to call | |
template <std::size_t N> | |
constexpr const_str<N> toLower(const char(&str)[N]) { | |
return {str, std::make_integer_sequence<unsigned, N>()}; | |
} | |
// Naive implementation of the toLower function, for comparison | |
std::string toLowerSlow(std::string str) { | |
for (auto& c : str) { | |
c = static_cast<char>(std::tolower(c)); | |
} | |
return str; | |
} | |
// The test code | |
int main() { | |
// Check that the code works | |
constexpr auto str = toLower("HELLO"); | |
static_assert(str[0] == 'h'); | |
static_assert(str[1] == 'e'); | |
static_assert(str[2] == 'l'); | |
static_assert(str[3] == 'l'); | |
static_assert(str[4] == 'o'); | |
// Checking performance of using this toLower function in a more practical way (timings are for non-optimized build) | |
// 0.925s (empty loop that is not optimized away also needs time to execute, loops below can't be faster than this one) | |
for (unsigned int i = 0; i < 500000000; ++i) { | |
} | |
// 0.925s (same time as empty loop because the conversion happens at compile time) | |
for (unsigned int i = 0; i < 500000000; ++i) { | |
constexpr auto str = toLower("HELLO"); | |
} | |
// 2s (getting the pointer has to happen at runtime, but conversion was done at compile time) | |
for (unsigned int i = 0; i < 500000000; ++i) { | |
constexpr auto str = toLower("HELLO"); | |
const char* s = str; | |
} | |
// 13.5s (everything happens at runtime) | |
for (unsigned int i = 0; i < 500000000; ++i) { | |
const char* s = toLower("HELLO"); | |
} | |
// When using std::string | |
// 9.3s (cost of allocating an std::string, convertion happens at compile time) | |
for (unsigned int i = 0; i < 500000000; ++i) { | |
constexpr auto str = toLower("HELLO"); | |
std::string s = (const char*)str; | |
} | |
// 22s (both allocating std::string and converting to lowercase happen at runtime) | |
for (unsigned int i = 0; i < 500000000; ++i) { | |
std::string s = (const char*)toLower("HELLO"); | |
} | |
// Using a naive toLower implementation | |
// 44.5s | |
for (unsigned int i = 0; i < 500000000; ++i) { | |
std::string s = toLowerSlow("HELLO"); | |
} | |
} | |
// Conclusions: | |
// Try avoiding std::string unless it is needed | |
// Two lines of code are still needed to let the operation happen at compile time | |
// The one-liner that calls the toLower function lets everything be executed at runtime |
I don't seem to get notifications from comments here. For those who wonder the same in the future: for each unique string a different type is instantiated and each type indeed have their own static
member.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Excellent, that works! The one thing I don't quite understand, though, is the
static
in thestruct string
. How come I can create any number of strings with one single suchstatic
? Is it because each time a new template is instantiated so each time a newstatic
is created?