Last active
March 24, 2019 08:49
-
-
Save jimhansson/9378727 to your computer and use it in GitHub Desktop.
FatValue is container for a value that can be of different types. and tries to conserve as much memory as possible. It's 8 bytes big.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* this class replaces a structure that had a member for a int, a member for a double, a member for a string ..... | |
* and a member telling witch of the other members that contain the right value. that structure was 86 bytes on 32 bit platform | |
* this is 8 bytes always, aslo on 64 bits platform. | |
*/ | |
/** | |
* PEMString is a lightweight wrapper for a ref-counted shared string that can be handled like a pointer | |
* think boost::flyweight<std::string> | |
* utctime that we are using below is a typedef for a int64 in some cases, a small class in other cases | |
*/ | |
/** | |
* Some compiletime checks to make sure we are compiling in an environment where FatValue | |
* will work and not create subtle bugs. | |
*/ | |
namespace { | |
struct COMPILER_CHECK_FATVALUE { | |
static int _init; | |
static int static_init() { | |
#ifdef PEMSTRING_SUPPORT | |
//static_assert(sizeof(PEMString) == 4, "PEMString is to big"); | |
#endif | |
static_assert(sizeof(long) == 4, "long is to big for fatvalue"); | |
// we need to return something that can be placed in _init so this code would not be | |
// removed by the compiler because it is unsused. | |
return 42; | |
} | |
}; | |
int COMPILER_CHECK_FATVALUE::_init = COMPILER_CHECK_FATVALUE::static_init(); | |
} | |
/** | |
* somewhat smart container for strings, utctime, doubles and int | |
* | |
* Stores one value of one of the types above. tries to be smart and conserve spaces by | |
* resuing the same space. tags the value with its type. | |
* | |
* because we want this to be as lightweight container as possible for the value we cant | |
* have a allocator member like you would normally do in containers, instead we use | |
* function-pointers as template arguments(thats compile-time) that don't add any "weight" | |
* to the class. | |
* | |
* NOTE: strings | |
* In the string case we are not really storing a std::string at pointed to location | |
* because MS strings are somewhat big(16 bytes) and if that would not be enough it would | |
* still take those 16 bytes and one more allocation that is big enough, so to get to the | |
* string we would in that case need to do not only one pointer dereference but | |
* two. Instead we use a char buffer. avoids the wasting of 16 bytes and the extra | |
* allocation and double pointer problem. | |
* | |
* NOTE: test-cases. | |
* This class has test-cases make sure to update them also when doing changes to this | |
* class, or else you will be eaten by grue.... | |
* | |
* NOTE: Missing test-cases | |
* - memory checks, do we have matching free:s for every malloc. | |
* | |
* IDEA: more compression | |
* We could reserv one bit for doing the compressed cons trick, placing the value directly | |
* in cdr, that would in some cases avoid extra allocs, save 4 bytes, save cachelines. but | |
* would lead to more unaligned operations. not really sure if that is something we want. | |
* | |
* IDEA: binary data wrapper | |
* instead of using void* to represent the binary case we should have a binary data | |
* wrapper that stores the length at the start and in that way be able to handle binary | |
* data with null:s in the middle. Today those will be sliced, when copy-constructed for | |
* example. | |
* | |
* TODO: implement equality operator | |
* Not as easy as you first might think. we need to check for types and nulls before | |
* comparing values, and then depending on the type we need to do different thing, if it | |
* is a class we need to call that class equality operator, if it is pointers, then it | |
* depends on what we are pointing to. | |
* | |
* BUG: binary data with embedded nulls | |
* Today it can't handle binary data with nulls, in the middle. it will slice the data in | |
* the constructor, and only copy up to the first null character. | |
* | |
* Q: Why is there no templated operator= (assignment)? | |
* A: It hard to implement it in a exception safe way, and people do not expect exception | |
* when doing assignment. | |
* | |
* Q: No operator <type>() convertions? | |
* A: no problem for simple types but hard for binary data and string, and the binary data | |
* would in that case return a pointer that lets you change the internal of the FatValue | |
* that might have been deleted. See also previous question. | |
* | |
* Q: no Arithmetic operators, + / - *....? | |
* A: would make seens when the FatValue is a double or int but what would <string> * | |
* <double> mean? | |
*/ | |
template<void* (*MALLOC)(size_t) = &malloc, | |
void (*FREE)(void*) = &free> | |
class FatValue { | |
private: | |
union { | |
double asDouble; | |
uint64_t asBits; | |
struct { uint32_t asLow32; uint32_t asHigh32; }; | |
// DO NOT Try to add a struct to this union with a PEMString as debugging aid, MS | |
// compiler will fuck it up, and try to run the destructor of PEMString even when | |
// there is no PEMString, don't know where it gets the idea that it should run a | |
// destructor on members in a union. does it run them all or one at random? WTF!! | |
}; | |
// The Addresses are in a range that normally is used by the operating-system, that | |
// mean normal userspace allocations should not be able to have these addresses. | |
// so if one pointer escapes without being converted to the right range it should give | |
// us a fine access violation. | |
static const uint64_t MaxDouble = 0xfff8000000000000; | |
static const uint64_t Int32Tag = 0xfff9000000000000; | |
static const uint64_t Int32NullTag = 0x100000000; // bit 33 | |
static const uint64_t UtcTimePtrTag = 0xfffa000000000000; | |
static const uint64_t StringPtrTag = 0xfffb000000000000; | |
static const uint64_t BinaryPtrTag = 0xfffc000000000000; | |
#ifdef PEMSTRING_SUPPORT | |
static const uint64_t PEMStringTag = 0xfffd000000000000; | |
#endif | |
// still space for 1 more | |
static const uint64_t TagMask = 0xffff000000000000; | |
// we need todo cleanup in few places, we call this method so we don't have to update | |
// every place. it also reset it to a standard state of NULL of type Int32 | |
void free_resources() { | |
if(is<utctime>()) | |
FREE((void*)(asBits & ~TagMask)); | |
else if(is<std::string>()) | |
FREE((void*)(asBits & ~TagMask)); | |
else if(is<void*>()) | |
FREE((void*)(asBits & ~TagMask)); | |
#ifdef PEMSTRING_SUPPORT | |
// we require an l-value to do this, so no one-liner | |
else if(is<PEMString>() && !isNull()) { | |
uint64_t bits = asBits & ~TagMask; | |
((PEMString*)&bits)->~PEMString(); | |
} | |
#endif | |
asBits = 0 | Int32Tag | Int32NullTag; | |
} | |
inline bool isNegativeZero(double number) { | |
return number == 0 && *reinterpret_cast<int64_t *>(&number) != 0; | |
} | |
public: | |
/** | |
* default constructor sets it to Null Int32. | |
*/ | |
FatValue() { asBits = Int32Tag | Int32NullTag; } | |
/** | |
* copy-constructor | |
* | |
* Depending on type we need to do deep-copy. | |
* | |
* TODO: Use C++ things instead of old C things, todo the copy | |
*/ | |
inline FatValue(const FatValue& rhs) { | |
if(rhs.isNull()) { | |
asBits = rhs.asBits; | |
} else if(rhs.is<void*>() || rhs.is<std::string>()) { | |
char* adr = (char*) (rhs.asBits & ~TagMask); | |
size_t len = strlen(adr); | |
char* buf = new char[len+1]; | |
memcpy(buf, adr, len); | |
buf[len] = '\0'; | |
FATVALUE_ASSERT((reinterpret_cast<uint64_t>(buf) & TagMask) == 0); | |
asBits = (rhs.asBits & TagMask) | (reinterpret_cast<uint64_t>(buf) & ~TagMask); | |
} else if(rhs.is<utctime>()) { | |
utctime* timeptr = new utctime; | |
FATVALUE_ASSERT((reinterpret_cast<uint64_t>(timeptr) & TagMask) == 0); | |
*timeptr = *reinterpret_cast<utctime*>(rhs.asBits & ~TagMask); | |
asBits = UtcTimePtrTag | reinterpret_cast<uint64_t>(timeptr); | |
#ifdef PEMSTRING_SUPPORT | |
} else if(rhs.is<PEMString>()) { | |
asBits = 0; | |
PEMString* place = (PEMString*)&asBits; | |
new (place) PEMString(rhs.as<PEMString>()); | |
asBits |= PEMStringTag; | |
#endif | |
} else { | |
asBits = rhs.asBits; | |
} | |
} | |
template<class X> | |
static inline FatValue null() { return null<int32_t>(); }; | |
template<> static FatValue null<std::string>() { | |
FatValue v; v.asBits = 0 | StringPtrTag; return v; } | |
template<> static FatValue null<utctime>() { | |
FatValue v; v.asBits = 0 | UtcTimePtrTag; return v; } | |
template<> static FatValue null<void*>() { | |
FatValue v; v.asBits = 0 | BinaryPtrTag; return v; } | |
template<> static FatValue null<int32_t>() { | |
FatValue v; v.asBits = 0 | Int32Tag | Int32NullTag; return v; } | |
template<> static FatValue null<double>() { | |
FatValue v; v.asBits = MaxDouble; return v; } | |
#ifdef PEMSTRING_SUPPORT | |
template<> static FatValue null<PEMString>() { | |
FatValue v; v.asBits = 0 | PEMStringTag; return v; } | |
#endif | |
/** | |
* Non-throning swap so we can implement other operators in a exception-safe way. | |
* | |
* the standard implementation of swap cant handle structs with unions in a safe way, | |
* therefor we need to implement the swap function. this is also called by the external | |
* version of swap that has been specialized for this class, so we don't need to copy | |
* the implementation around. | |
* | |
* do not implement this in form of the XOR swap trick. It would mean self-assignment | |
* whould cancel out the values. | |
*/ | |
inline FatValue& swap(FatValue& second) { | |
uint64_t bits = this->asBits; | |
this->asBits = second.asBits; | |
second.asBits = bits; | |
return *this; | |
} | |
/** | |
* Assignment operator, | |
* | |
* does not take const ref on purpose, can you figure out why? | |
* | |
* in C++11 when you have implemented a swap the safest way to implement a assignement | |
* operator is to let the function create a copy as done here and swap this with the copy | |
* like it is done here. if you are implementing a pre-C++11 class us then you should | |
* probebly use the old way of implementing the operator= in the form av a | |
* copy-constructor call with checks for self-assignment, that is not needed in this | |
* version because my swap does not use the XOR swap trick, if it did it would cancel | |
* out the value!! | |
* | |
* A check for self-assignment would cost more in branching than it would save. | |
*/ | |
inline FatValue& operator=(FatValue rhs) { | |
return this->swap(rhs); | |
} | |
/** | |
* move-constructor | |
* | |
* Allows the compiler to do some optimizations | |
*/ | |
inline FatValue(FatValue&& rhs) { this->swap(rhs); } | |
/** | |
* Double constructor. | |
*/ | |
inline FatValue(const double& number) { | |
// for some reason VC cant handle this in release mode and think it is doing something | |
// bad with the heap, but this does not really do anything with the heap. | |
//int32_t asInt32 = static_cast<int32_t>(number); | |
// if the double can be losslessly stored as an int32 do so (int32 doesn't have -0, | |
// so check for that too) | |
//if (number == asInt32 && !isNegativeZero(number)) { | |
// its not really right to do this to poor not so old this (pun intended) | |
// *this = FatValue(asInt32); | |
// assignment to a object that is not really constructed yet. | |
//} else { | |
asDouble = number; | |
//} | |
} | |
/** | |
* Int32 constructor | |
*/ | |
inline FatValue(const int32_t number) { asBits = number | Int32Tag; } | |
/** | |
* utctime constructor | |
* | |
* I wish we would tag utctime as a real type and not only use a typedef. | |
*/ | |
inline FatValue(const utctime time) { | |
utctime* timeptr = (utctime*)MALLOC(sizeof(utctime)); | |
*timeptr = time; | |
FATVALUE_ASSERT((reinterpret_cast<uint64_t>(timeptr) & TagMask) == 0); | |
asBits = reinterpret_cast<uint64_t>(timeptr) | UtcTimePtrTag; | |
} | |
#ifdef PEMSTRING_SUPPORT | |
/** | |
* PEMString constructor | |
*/ | |
inline FatValue(const PEMString str) { | |
asBits = 0; | |
PEMString* place = (PEMString*)&asBits; | |
new (place) PEMString(str); | |
asBits |= PEMStringTag; | |
} | |
#endif | |
/** | |
* string constructor. | |
* | |
* WARNING: Does allocation and memcpy | |
*/ | |
inline FatValue(const std::string str) { | |
// is this unicode safe? | |
char* strptr =(char*)MALLOC(str.size() + 1); | |
memcpy(strptr, str.c_str(), str.size()); | |
strptr[str.size()] = '\0'; | |
FATVALUE_ASSERT((reinterpret_cast<uint64_t>(strptr) & TagMask) == 0); | |
asBits = reinterpret_cast<uint64_t>(strptr) | StringPtrTag; | |
} | |
inline FatValue(const char* str) { | |
#ifdef PEMSTRING_SUPPORT | |
*this = FatValue(PEMString(str)); | |
#else | |
*this = FatValue(std::string(str)); | |
#endif | |
} | |
/** | |
* binary constructor. | |
* | |
* WARNING: Does allocation and memcpy | |
*/ | |
inline FatValue(const void* bin) { | |
// unsafe strlen of a binary block | |
size_t length = 0; | |
char* binptr = 0; | |
if(bin != nullptr) { | |
length = strlen((char*)bin); | |
binptr = (char*)MALLOC(length + 1); | |
memcpy(binptr, (char*)bin, length); | |
binptr[length] = '\0'; | |
} | |
FATVALUE_ASSERT((reinterpret_cast<uint64_t>(binptr) & TagMask) == 0); | |
asBits = reinterpret_cast<uint64_t>(binptr) | BinaryPtrTag; | |
} | |
~FatValue() { | |
free_resources(); | |
} | |
template<typename T> | |
inline bool is() const; | |
template<> inline bool is<double>() const { return isDouble(); } | |
template<> inline bool is<int32_t>() const { return isInt32(); } | |
template<> inline bool is<utctime>() const { return isUtcTime(); } | |
template<> inline bool is<std::string>() const { return isString(); } | |
template<> inline bool is<void*>() const { return isBinaryPtr(); } | |
#ifdef PEMSTRING_SUPPORT | |
template<> inline bool is<PEMString>() const { return isPEMString(); } | |
#endif | |
inline bool isDouble() const { return (asBits < MaxDouble); } | |
inline bool isInt32() const { return (asBits & TagMask) == Int32Tag; } | |
inline bool isUtcTime() const { return (asBits & TagMask) == UtcTimePtrTag; } | |
inline bool isString() const { return (asBits & TagMask) == StringPtrTag; } | |
inline bool isBinaryPtr() const { return (asBits & TagMask) == BinaryPtrTag; } | |
#ifdef PEMSTRING_SUPPORT | |
inline bool isPEMString() const { return (asBits & TagMask) == PEMStringTag; } | |
#endif | |
/** | |
* tells you if it's null but to do this we first need to find out the type, because | |
* null is represented in different ways depending on type. If we had a general way of | |
* representing nulls, this would have been more straight-forward to implement. | |
* | |
* it could be done by reserving a bit for null as a type. | |
*/ | |
inline bool isNull() const { | |
if(isInt32()) | |
return (asBits & Int32NullTag) > 0; | |
else if(isString()) | |
return reinterpret_cast<char*>(asBits & ~StringPtrTag) == NULL; | |
else if(isUtcTime()) | |
return reinterpret_cast<utctime*>(asBits & ~UtcTimePtrTag) == NULL; | |
else if(isBinaryPtr()) | |
return reinterpret_cast<void*>(asBits & ~BinaryPtrTag) == NULL; | |
#ifdef PEMSTRING_SUPPORT | |
else if(isPEMString()) | |
return (asBits & ~PEMStringTag) == NULL; | |
#endif | |
// We are reusing nan as representation of null, for doubles. | |
// this will break under some compilers, with some options. but MS could not get | |
// their shit together and provide us with a isnan function. | |
return asDouble != asDouble; | |
} | |
/** | |
* get value as a type, if it's not of that type raise an error by using | |
* FATVALUE_ASSERT, as standard this means halting the program. | |
* | |
* SO CHECK THE TYPE BEFORE! | |
* | |
* the only exception is that you can retrieve a string from a PEMString, but not the | |
* other way around! | |
* | |
* Q: why not the other way? | |
* A: because that would create a PEMString used in maybe only one place(and might be | |
* discarded pretty fast) but it would take up a place forever in the PEMString | |
* registry. | |
*/ | |
template<typename T> | |
inline T as() const; | |
template<> inline double as<double>() const { return getDouble(); } | |
template<> inline int32_t as<int32_t>() const { return getInt32(); } | |
template<> inline utctime as<utctime>() const { return getUtcTime(); } | |
template<> inline void* as<void*>() const { return getBinaryPtr(); } | |
#ifdef PEMSTRING_SUPPORT | |
template<> inline std::string as<std::string>() const { return std::string(is<PEMString>() ? getPEMString() : getString()); } | |
template<> inline PEMString as<PEMString>() const { return getPEMString(); } | |
#else | |
template<> inline std::string as<std::string>() const { return getString(); } | |
#endif | |
inline double getDouble() const { | |
FATVALUE_ASSERT(isDouble()); | |
return asDouble; | |
} | |
inline int32_t getInt32() const { | |
FATVALUE_ASSERT(isInt32()); | |
return static_cast<int32_t>(asBits & ~Int32Tag); | |
} | |
inline utctime getUtcTime() const { | |
FATVALUE_ASSERT(isUtcTime()); | |
return *reinterpret_cast<utctime*>(asBits & ~UtcTimePtrTag); | |
} | |
/** | |
* return value as string if it is a string, this is not as forgiving as the "as" | |
* operator that would convert a PEMString to a string automatically | |
*/ | |
inline std::string getString() const { | |
FATVALUE_ASSERT(isString()); | |
return std::string(reinterpret_cast<char*>(asBits & ~StringPtrTag)); | |
} | |
inline void* getBinaryPtr() const { | |
FATVALUE_ASSERT(isBinaryPtr()); | |
return reinterpret_cast<void*>(asBits & ~BinaryPtrTag); | |
} | |
#ifdef PEMSTRING_SUPPORT | |
inline PEMString getPEMString() const { | |
FATVALUE_ASSERT(isPEMString()); | |
uint64_t bits = asBits & ~PEMStringTag; | |
PEMString* asPEMString = (PEMString*)&bits; | |
return PEMString(*asPEMString); | |
} | |
#endif | |
}; | |
/** | |
* ADL swap for FatValue | |
* | |
* we do not provided a version for where M and F differes between values, because that | |
* can not be implmented in a safe way without doing some form of copying of values, gets | |
* to complex. besides you would probebly not need it. | |
*/ | |
template<void* (*M)(size_t),void (*F)(void*)> | |
void swap(FatValue<M,F>& first, FatValue<M,F>& second) { | |
first.swap(second); | |
} | |
/** | |
* we want it to be easy to print, as primitive type. | |
*/ | |
template<void* (*M)(size_t),void (*F)(void*)> | |
std::ostream& operator<<(std::ostream& os, const FatValue<M,F>& v) { | |
if(v.isNull()) | |
return os << "<NULL>"; | |
else if(v.is<int>()) | |
return os << v.as<int>(); | |
else if(v.is<std::string>()) | |
return os << v.as<std::string>(); | |
else if(v.is<utctime>()) | |
return os << v.as<utctime>(); | |
else if(v.is<void*>()) | |
return os << std::hex << *v.as<void*>(); | |
else if(v.is<double>()) | |
return os << v.as<double>(); | |
#ifdef PEMSTRING_SUPPORT | |
else if(v.is<PEMString>()) | |
return os << v.as<PEMString(); | |
#endif | |
return os // make the compiler shut up. | |
} | |
#endif |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment