Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
// arm-linux-gnueabihf-gcc 6.3.0 at -O2 ..versus.. bitfields
// some struct definition for pin config registers:
struct Pad {
enum pull_t { pull_down, no_pull, pull_up };
enum slew_t { fast, slow };
enum rx_t { rx_dis, rx_en };
uint mode : 3;
pull_t pull : 2;
rx_t rx : 1;
slew_t slew : 1;
// one of the wrappers
static Pad io( uint mode, pull_t pull, slew_t slew = fast ) {
return { mode, pull, rx_en, slew };
}
// (note: declaring it constexpr makes some cases below even worse)
};
static_assert( sizeof(Pad) == 4 );
void foo( Pad *x ) {
Pad tmp = Pad::io( 7, Pad::pull_down );
x[0] = tmp;
x[1] = tmp;
}
// disassembly:
// movs r3, #7 // tmp = 7
// bfc r3, #3, #2 // tmp &= ~0x18
// orr r3, r3, #32 // tmp |= 0x20
// bfc r3, #6, #1 // tmp &= ~0x40
// str r3, [r0] // x[0] = tmp
// str r3, [r0, #4] // x[1] = tmp
//
// Reminder: this is -O2, not -O0 ... go home gcc, you're drunk
// okay, maybe that (completely trivial) wrapper function is hard for
// gcc to optimize? (when it's drunk anyway...)
//
// Let's try directly using aggregate initialization instead!
//
void foo( Pad *x ) {
Pad tmp { 7, Pad::pull_down, Pad::rx_en, Pad::fast };
x[0] = tmp;
x[1] = tmp;
}
// ldrb r3, [r0] // tmp = x[0].byte[0]
// and r3, r3, #192 // tmp &= 0xc0
// orr r3, r3, #39 // tmp |= 0x27
// bfc r3, #6, #1 // tmp &= ~0x40
// strb r3, [r0] // x[0].byte[0] = tmp
// ldrb r3, [r0, #4] // tmp = x[1].byte[0]
// and r3, r3, #192 // tmp &= 0xc0
// orr r3, r3, #39 // tmp |= 0x27
// bfc r3, #6, #1 // tmp &= ~0x40
// strb r3, [r0, #4] // x[1].byte[0] = tmp
//
// ... I don't even...
//
// oddly it now suddenly seems to understand that 7 | 32 == 39,
// although combining the 'and' and 'bfc' was still to hard for it...
// but why the HELL is it performing read-modify-update on the
// lowest byte when I in fact assigned an entire 4-byte value (as
// confirmed by the static_assert).
//
// in case you're wondering: no, adding an explicit padding field to
// ensure all 32 bits are covered does not help, it makes it worse:
//
// ldrb r3, [r0] // tmp = x[0].byte[0]
// and r3, r3, #192 // tmp &= 0xc0
// orr r3, r3, #39 // tmp |= 0x27
// bfc r3, #6, #1 // tmp &= ~0x40
// strb r3, [r0] // x[0].byte[0] = tmp
// ldr r3, [r0] // tmp = x[0]
// bfc r3, #7, #25 // tmp &= 0x7f
// str r3, [r0] // x[0] = tmp
// (...repeat for x[1]...)
//
//
// My intuition is that it just generates awful code for copying this
// struct in general was also not true, e.g.:
//
void copy( Pad *x, Pad tmp ) {
x[0] = tmp;
x[1] = tmp;
}
// disassembly:
// str r1, [r0]
// str r1, [r0, #4]
//
// which is not perfect (see below) but at least sane.
// Finally, for comparison, this what clang produces for foo:
// movs r1, #39
// strd r1, r1, [r0]
//
// and for copy:
// strd r1, r1, [r0]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment