-
-
Save LabunskyA/4ac8bcf10c70e7223fe4a8c0b201f897 to your computer and use it in GitHub Desktop.
/* | |
* Use this macro to define variables to save space in your structures | |
* Imagine you have the following structure: | |
* struct A { | |
* void* my_ptr; // 8 bytes | |
* | |
* uint8_t my_flag :1; // merged with the next | |
* uint16_t my_uint :15; // 2 bytes total | |
* } | |
* | |
* The size of that structure is (16), because of the structure padding | |
* Compilers tend to pad members in such way that its total size will | |
* be divided by the size of the biggest primitive member (what?) and since | |
* (8 + 2 = 10) is not divided by (8), the structure is padded to the (16) | |
* | |
* You can use different compiler flags and stuff (like #pragma pack) to | |
* disable such padding, but that way the compiler can create less efficient | |
* code for pretty much every operation with every not aligned structure member | |
* | |
* Another way is to use fixed width byte array as a replacement for the | |
* structure member: | |
* | |
* struct B { | |
* char my_ptr[sizeof(void*)]; | |
* | |
* uint8_t my_flag :1; | |
* uint16_t my_uint :15; | |
* } | |
* | |
* Now the size of your structure is actually (10) bytes since it can be | |
* divided by the new biggest member size of (2). Memory representation | |
* of the structure will be (probably) the same as for the packed with pragma | |
* | |
* The only difference - you will need to use some hacks to access, write | |
* and copy value from such member. Since my_ptr is now a pointer to the | |
* char array, it can be cast for free to void* pointer array in which | |
* you can easily access the first and only element like this: | |
* | |
* ((void**) b.ptr)[0] | |
* | |
* The only problem - this access will require an additional (lea or some other) | |
* instruction (without compiler optimizations ofc) to find given variable | |
* effective address and copy it temporary to the CPU register. So, doing: | |
* | |
* void* some_ptr = ...; | |
* ((void**) b.ptr)[0] = some_ptr; | |
* | |
* will take 30% more instructions on x86_64 with gcc -O0 than | |
* a.ptr = some_ptr; | |
* | |
* On the other hand, memcpy call: | |
* memcpy(b.ptr, &some_ptr, sizeof(void*)); | |
* | |
* will transform in the exact same assembly code. Until you need to assign | |
* member to a constant value or pass its value to the function, you will have the | |
* exact same assembly code as a compilation result as the usual padded one | |
* | |
* | |
* This small macro library is allowing the kind of simple usage of a such technique | |
* You can transform only some members manually to reduce the performance penalty | |
* and structure size at the same time | |
*/ | |
#ifndef RAW_CTYPE | |
#define RAW_CTYPE | |
/* | |
* This macro will allow you to declare raw bytes structure member as following: | |
* | |
* struct my_struct { | |
* ... | |
* type_a member_a; | |
* raw_ctype_member(type_b, member_b); | |
* ... | |
* }; | |
* | |
* struct my_struct my_var; | |
* my_var.member_a; // type_a | |
* my_var.member_b; // char* | |
*/ | |
#define raw_ctype_member(type, var_name) char (var_name)[sizeof(type)] | |
/* | |
* Allowing access to your member as it was the same old typed one: | |
* raw_ctype(type_b, my_var.member_b); // type_b | |
*/ | |
#define raw_ctype(type, var) ((type*) (var))[0] | |
/* | |
* These macros are allowing you to copy value from and to "normal" variables of the | |
* same type to your raw member. Calling them will be faster (or the same, depending | |
* on compiler optimisations) on most platforms then accessing variable and assigning | |
* values via "=" | |
* | |
* Use like following: | |
* | |
* type_b some_other_b = ...; | |
* // some_other_b != raw_ctype(type_b, my_var.member_b) | |
* | |
* raw_ctype_copyfrom(my_var.member_b, some_other_b); | |
* // some_other_b == raw_ctype(type_b, my_var.member_b) | |
* | |
* type_b another_b; | |
* raw_ctype_copyto(another_b, my_var.member_b); | |
* // another_b == raw_ctype(type_b, my_var.member_b) | |
*/ | |
#define raw_ctype_copyfrom(var, src) memcpy((var), &(src), sizeof(src)) | |
#define raw_ctype_copyto(dest, var) memcpy(&(dest), var, sizeof(dest)) | |
/* | |
* You can always change macro names to be pretier | |
* then they are now. IDK. | |
*/ | |
#endif | |
/* | |
* Copyright (c) 2018 Labunsky Artem | |
* All rights reserved. | |
* | |
* Redistribution and use in source and binary forms, with or without | |
* modification, are permitted provided that the following conditions are met: | |
* | |
* 1. Redistributions of source code must retain the above copyright notice, this | |
* list of conditions and the following disclaimer. | |
* 2. Redistributions in binary form must reproduce the above copyright notice, | |
* this list of conditions and the following disclaimer in the documentation | |
* and/or other materials provided with the distribution. | |
* | |
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | |
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR | |
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | |
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | |
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
*/ |
Well, most people who will read this gist - coming from HN - won’t know, because you didn’t document the limitations of this approach.
I assumed it will be obvious to them, since the whole point is to break alignment in the first place, but still briefly mentioned possible consequences in the comments:
* You can use different compiler flags and stuff (like #pragma pack) to
* disable such padding, but that way the compiler can create less efficient
* code for pretty much every operation with every not aligned structure member
...
* You can transform only some members manually to reduce the performance penalty
* and structure size at the same time
Anyway, thank you for providing detailed problems descriptions :)
struct __attribute__ ((packed)) A {
void* my_ptr; // 8 bytes
uint8_t my_flag :1; // merged with the next
uint16_t my_uint :15; // 2 bytes total
};
sizeof (struct A) == 10
struct __attribute__ ((packed)) A { void* my_ptr; // 8 bytes uint8_t my_flag :1; // merged with the next uint16_t my_uint :15; // 2 bytes total }; sizeof (struct A) == 10
Идея в том, чтобы не использовать расширения компиляторов. С их помощью они (из тех, что вообще поддерживают такой атрибут) будут разворачивать код в другие, менее оптимальные наборы инструкций при работе со структурой (доступ, включая поля, присваивание и пр.), обещая нам лишь небольшой ее размер. Более того, это лишает гибкости в случаях, когда нас интересует свертка лишь определенных полей, и обеспечит головной болью при портировании кода между не то, что архитектурами, но даже между разными ОС :)
Who knew, right?
I mean, yeah, it's a tradeoff. This code helped me to save about 40% (~4Gb) of RAM processing big trees with nearly no preformance penalty (5% or so), so at least sometimes a fine one
The good thing is - you can control which members will get such access type, not relying on the compiler's decision (#pragma and stuff)