This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// arm-linux-gnueabihf-gcc 6.3.0 at -O2 ..versus.. bitfields | |
// some struct definition for pin config registers: | |
struct Pad { | |
enum pull_t { pull_down, no_pull, pull_up }; | |
enum slew_t { fast, slow }; | |
enum rx_t { rx_dis, rx_en }; | |
uint mode : 3; | |
pull_t pull : 2; | |
rx_t rx : 1; | |
slew_t slew : 1; | |
// one of the wrappers | |
static Pad io( uint mode, pull_t pull, slew_t slew = fast ) { | |
return { mode, pull, rx_en, slew }; | |
} | |
// (note: declaring it constexpr makes some cases below even worse) | |
}; | |
static_assert( sizeof(Pad) == 4 ); | |
void foo( Pad *x ) { | |
Pad tmp = Pad::io( 7, Pad::pull_down ); | |
x[0] = tmp; | |
x[1] = tmp; | |
} | |
// disassembly: | |
// movs r3, #7 // tmp = 7 | |
// bfc r3, #3, #2 // tmp &= ~0x18 | |
// orr r3, r3, #32 // tmp |= 0x20 | |
// bfc r3, #6, #1 // tmp &= ~0x40 | |
// str r3, [r0] // x[0] = tmp | |
// str r3, [r0, #4] // x[1] = tmp | |
// | |
// Reminder: this is -O2, not -O0 ... go home gcc, you're drunk | |
// okay, maybe that (completely trivial) wrapper function is hard for | |
// gcc to optimize? (when it's drunk anyway...) | |
// | |
// Let's try directly using aggregate initialization instead! | |
// | |
void foo( Pad *x ) { | |
Pad tmp { 7, Pad::pull_down, Pad::rx_en, Pad::fast }; | |
x[0] = tmp; | |
x[1] = tmp; | |
} | |
// ldrb r3, [r0] // tmp = x[0].byte[0] | |
// and r3, r3, #192 // tmp &= 0xc0 | |
// orr r3, r3, #39 // tmp |= 0x27 | |
// bfc r3, #6, #1 // tmp &= ~0x40 | |
// strb r3, [r0] // x[0].byte[0] = tmp | |
// ldrb r3, [r0, #4] // tmp = x[1].byte[0] | |
// and r3, r3, #192 // tmp &= 0xc0 | |
// orr r3, r3, #39 // tmp |= 0x27 | |
// bfc r3, #6, #1 // tmp &= ~0x40 | |
// strb r3, [r0, #4] // x[1].byte[0] = tmp | |
// | |
// ... I don't even... | |
// | |
// oddly it now suddenly seems to understand that 7 | 32 == 39, | |
// although combining the 'and' and 'bfc' was still to hard for it... | |
// but why the HELL is it performing read-modify-update on the | |
// lowest byte when I in fact assigned an entire 4-byte value (as | |
// confirmed by the static_assert). | |
// | |
// in case you're wondering: no, adding an explicit padding field to | |
// ensure all 32 bits are covered does not help, it makes it worse: | |
// | |
// ldrb r3, [r0] // tmp = x[0].byte[0] | |
// and r3, r3, #192 // tmp &= 0xc0 | |
// orr r3, r3, #39 // tmp |= 0x27 | |
// bfc r3, #6, #1 // tmp &= ~0x40 | |
// strb r3, [r0] // x[0].byte[0] = tmp | |
// ldr r3, [r0] // tmp = x[0] | |
// bfc r3, #7, #25 // tmp &= 0x7f | |
// str r3, [r0] // x[0] = tmp | |
// (...repeat for x[1]...) | |
// | |
// | |
// My intuition is that it just generates awful code for copying this | |
// struct in general was also not true, e.g.: | |
// | |
void copy( Pad *x, Pad tmp ) { | |
x[0] = tmp; | |
x[1] = tmp; | |
} | |
// disassembly: | |
// str r1, [r0] | |
// str r1, [r0, #4] | |
// | |
// which is not perfect (see below) but at least sane. | |
// Finally, for comparison, this what clang produces for foo: | |
// movs r1, #39 | |
// strd r1, r1, [r0] | |
// | |
// and for copy: | |
// strd r1, r1, [r0] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment