Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
An experimental "very short string optimization"
// a fun little experiment i came up with:
// similar to the classic "short string optimization", we could actually use
// the bytes of the pointer itself as the short string buffer.
// a "tagged pointer" is used to distinguish between the internal buffer
// and a heap-allocated string.
#include <cassert>
#include <cstdint>
#include <cstring>
class tiny_string
{
public:
// default ctor uses the internal buffer, so it's empty and not null
tiny_string() { set_internal_tag(); }
tiny_string(const char *str) { set_internal_tag(); set(str); }
tiny_string(const tiny_string &rhs) { set_internal_tag(); set(rhs.get()); }
tiny_string(tiny_string &&rhs) noexcept : ptr(rhs.ptr) { rhs.set_internal_tag(); }
~tiny_string() { clear(); }
tiny_string &operator=(const tiny_string &rhs)
{
if (this != &rhs)
set(rhs.get());
return *this;
}
tiny_string &operator=(tiny_string &&rhs)
{
if (this != &rhs)
{
clear();
ptr = rhs.ptr;
rhs.set_internal_tag();
}
return *this;
}
const char *get() const
{
// if the tag is set, use the internal buffer, i.e. the pointer
if (is_internal())
return const_cast<tiny_string*>(this)->get_internal();
else
return ptr;
}
// doesn't handle self-assignment
void set(const char *str)
{
clear();
if (str && *str)
{
size_t len = strlen(str);
char *writeptr;
if (len <= INTERNAL_SIZE)
{
// because of clear() above, the tag is already set (but we can overwrite it)
writeptr = get_internal();
}
else
{
// overwriting the member clears the tag
ptr = writeptr = new char[len + 1];
// operator new has to "return a pointer suitably aligned to hold an object
// of any fundamental alignment", i.e. probably > 1.
assert(!is_internal());
}
memcpy(writeptr, str, len + 1);
}
}
// reset the string to empty
void clear()
{
if (!is_internal())
delete[] get();
set_internal_tag();
}
private:
bool is_internal() const { return reinterpret_cast<uintptr_t>(ptr) & 1; }
char *get_internal() { return reinterpret_cast<char*>(&ptr) + 1; }
void set_internal_tag() { ptr = reinterpret_cast<char*>(1); }
// first byte of the pointer is used for the tag, but the rest is buffer
// (including terminating null)
static constexpr size_t INTERNAL_SIZE = sizeof(char*) - 1;
char *ptr;
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.