Created
September 6, 2016 01:31
-
-
Save mooware/f85b9064f7598d061e117ad7082c7099 to your computer and use it in GitHub Desktop.
An experimental "very short string optimization"
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// a fun little experiment i came up with: | |
// similar to the classic "short string optimization", we could actually use | |
// the bytes of the pointer itself as the short string buffer. | |
// a "tagged pointer" is used to distinguish between the internal buffer | |
// and a heap-allocated string. | |
#include <cassert> | |
#include <cstdint> | |
#include <cstring> | |
class tiny_string | |
{ | |
public: | |
// default ctor uses the internal buffer, so it's empty and not null | |
tiny_string() { set_internal_tag(); } | |
tiny_string(const char *str) { set_internal_tag(); set(str); } | |
tiny_string(const tiny_string &rhs) { set_internal_tag(); set(rhs.get()); } | |
tiny_string(tiny_string &&rhs) noexcept : ptr(rhs.ptr) { rhs.set_internal_tag(); } | |
~tiny_string() { clear(); } | |
tiny_string &operator=(const tiny_string &rhs) | |
{ | |
if (this != &rhs) | |
set(rhs.get()); | |
return *this; | |
} | |
tiny_string &operator=(tiny_string &&rhs) | |
{ | |
if (this != &rhs) | |
{ | |
clear(); | |
ptr = rhs.ptr; | |
rhs.set_internal_tag(); | |
} | |
return *this; | |
} | |
const char *get() const | |
{ | |
// if the tag is set, use the internal buffer, i.e. the pointer | |
if (is_internal()) | |
return const_cast<tiny_string*>(this)->get_internal(); | |
else | |
return ptr; | |
} | |
// doesn't handle self-assignment | |
void set(const char *str) | |
{ | |
clear(); | |
if (str && *str) | |
{ | |
size_t len = strlen(str); | |
char *writeptr; | |
if (len <= INTERNAL_SIZE) | |
{ | |
// because of clear() above, the tag is already set (but we can overwrite it) | |
writeptr = get_internal(); | |
} | |
else | |
{ | |
// overwriting the member clears the tag | |
ptr = writeptr = new char[len + 1]; | |
// operator new has to "return a pointer suitably aligned to hold an object | |
// of any fundamental alignment", i.e. probably > 1. | |
assert(!is_internal()); | |
} | |
memcpy(writeptr, str, len + 1); | |
} | |
} | |
// reset the string to empty | |
void clear() | |
{ | |
if (!is_internal()) | |
delete[] get(); | |
set_internal_tag(); | |
} | |
private: | |
bool is_internal() const { return reinterpret_cast<uintptr_t>(ptr) & 1; } | |
char *get_internal() { return reinterpret_cast<char*>(&ptr) + 1; } | |
void set_internal_tag() { ptr = reinterpret_cast<char*>(1); } | |
// first byte of the pointer is used for the tag, but the rest is buffer | |
// (including terminating null) | |
static constexpr size_t INTERNAL_SIZE = sizeof(char*) - 1; | |
char *ptr; | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment