Skip to content

Instantly share code, notes, and snippets.

Last active January 23, 2024 04:24
Show Gist options
  • Save fay59/5ccbe684e6e56a7df8815c3486568f01 to your computer and use it in GitHub Desktop.
Save fay59/5ccbe684e6e56a7df8815c3486568f01 to your computer and use it in GitHub Desktop.
Quirks of C

Here's a list of mildly interesting things about the C language that I learned mostly by consuming Clang's ASTs. Although surprises are getting sparser, I might continue to update this document over time.

There are many more mildly interesting features of C++, but the language is literally known for being weird, whereas C is usually considered smaller and simpler, so this is (almost) only about C.

1. Combined type and variable/field declaration, inside a struct scope []

struct foo {
   struct bar {
       int x;
   } baz;

void frob() {
   struct bar b; // <-- defined in body of `struct foo`

2. Compound literals are lvalues []

struct foo {
    int bar;

void baz() {
    // compound literal:
    (struct foo){};

    // these are actually lvalues
    ((struct foo){}).bar = 4;
    &(struct foo){};

3. Switch cases anywhere []

void foo(int p, char* complicated) {
    switch (p) {
    case 0:
        if (complicated[0] == 'a') {
            if (complicated[1] == 'b') {
    case 1:
                complicated[2] = 'c';

(also see: Duff's Device)

4. Flexible array members []

struct flex {
    int count;
    int elems[]; // <-- flexible array member

// this lays out the object exactly as expected
struct flex f = {
    .count = 3,
    .elems = {32, 31, 30}

_Static_assert(sizeof(struct flex) == sizeof(int), "");
// sizeof(f) does not include the size of statically-declared elements
_Static_assert(sizeof(f) == sizeof(struct flex), "");

// this only builds because .elems is not initialized:
struct flex g[2];

5. {0} as a universal initializer []

typedef int empty_array_t[0];
typedef struct {} empty_struct_t;
typedef int array_t[10];
typedef struct { int f; } struct_t;
typedef float vector_t __attribute__((ext_vector_type(4)));

// {} can initialize structs and arrays and vectors, but not scalars:
empty_array_t ea = {};
empty_struct_t es = {};
array_t a = {};
struct_t s = {};
vector_t v = {};
void* p = {}; // <-- error
int i = {}; // <-- error

// {0} can initialize any data type, including empty arrays/structs.
empty_array_t eaa = {0};
empty_struct_t ess = {0};
array_t aa = {0};
struct_t bb = {0};
vector_t cc = {0};
void* dd = {0}; // <-- happy!
int ee = {0}; // <-- happy!

6. Function typedefs []

typedef void (*function_pointer_t)(int); // <-- this creates a function pointer type
typedef void function_t(int); // <-- this creates a function type
// function_pointer_t == function_t*

function_t my_func; // <-- this declares "void my_func(int)"

void bar() {

7. Array pointers []

typedef int array_t[10]; // array typedef
typedef array_t* array_ptr_t; // array pointer typedef
// same as:
// typedef int (*array_ptr_t)[10];

void foo(array_ptr_t array_ptr) {
    int x = (*array_ptr)[1];

void bar() {
    int arr_10[10];
    foo(&arr_10); // <-- yep
    int arr_11[11];
    foo(&arr_11); // <-- nope

8. Modifiers to array sizes in parameter definitions []

void foo(int arr[static const restrict volatile 10]) {
    // static: the array contains at least 10 elements
    // const, volatile and restrict all apply to the array type.

(corrected by Reddit user /u/romv1)

9. Flat initializer lists []

struct foo {
    int x, y;

struct lots_of_inits {
    struct foo z[2];
    int w[3];

// this is probably more typical
struct lots_of_inits init = {
    {{1, 2}, {3, 4}}, {5, 6, 7}

// but braces for inner elements are optional
struct lots_of_inits flat_init = {
    1, 2, 3, 4, 5, 6, 7

10. What’s an lvalue, anyway []

struct bitfield {
    unsigned x: 3;

void foo() {
    int a[2];
    int i;
    const int j;
    struct bitfield bf;

    // these are all lvalues
    a; // DeclRefExpr <col:5> 'int [2]' lvalue Var 0x556800650150 'a' 'int [2]'
    i; // DeclRefExpr <col:5> 'int' lvalue Var 0x56289851bf20 'i' 'int'
    j; // DeclRefExpr <col:5> 'const int' lvalue Var 0x555fc6694ff0 'j' 'const int'
    bf.x; // MemberExpr <col:5, col:8> 'unsigned int' lvalue bitfield .x 0x55dab002de28

    // this is not an lvalue
    foo; // DeclRefExpr <col:6> 'void ()' Function 0x563cb79da098 'foo' 'void ()'

    // ... but you can't assign to all of them
    // a = (int [2]){1, 2};
    i = 4;
    // j = 4;
    bf.x = 4;

    // ... and you can't take all of their addresses
    // &bf.x;
    &foo; // but you can take the address of a function, which is not an lvalue

    // so, an lvalue is a value that:
    // - can have its address taken...
    //  - unless it is a bitfield (still an lvalue)
    //  - unless it is a function (not an lvalue)
    // - can be assigned to...
    //  - unless it is an array (still an lvalue)
    //  - unless it is a constant (still an lvalue)

11. Void globals []

// You can declare extern globals to incomplete types,
// including `void`.
extern void foo;

12. Alignment implications of bitfields []

struct foo {
    char a;
    long b: 16;
    char c;

// `struct foo` has the alignment of its most-aligned member:
// `long b` has an alignment of 8...
int alignof_foo = _Alignof(struct foo);

// ...but `long b: 16` is a bitfield, and is aligned on a char
// boundary.
int offsetof_c = __builtin_offsetof(struct foo, c);

13. static variables are scope-local []

int foo() {
    int* a;
    int* b;
        static int foo;
        a = &foo;
        static int foo;
        b = &foo;
    // this always returns false: two static variables with the same name
    // but declared in different scope refer to different storage.
    return a == b;

14. Typedef goes anywhere []

short typedef signed s16;
unsigned int typedef u32;
struct foo { int bar } const typedef baz;

s16 a;
u32 b;
baz c;

15. Indexing into an integer []

int foo(int* ptr, int index) {
    // When indexing, the pointer and integer parts
    // of the subscript expression are interchangeable.
    return ptr[index] + index[ptr];
    // It works this way, according to the standard (§,
    // because A[B] is the same as *(A + B), and addition
    // is commutative.

16. The type of enums vs. the type of enumerators []

In C, enumerators (values declared in enums) have integer type rather than the type of their enclosing enum. For instance:

enum foo { bar, baz, frob };

enum foo is a valid type to use that can store the value of bar, baz and frob. However, the type of bar, baz and frob is an implementation-defined integer type! On many implementations, bar has type int and enum foo has the underlying type unsigned. This means that a check as simple as this one:

enum foo f = bar;
f < baz;

involves a comparison of integers with different signedness.

Further, the type of each enumerator is not guaranteed to be the same. In this example:

enum foo { bar, baz = 0x80000000 };

The type of bar can be int and the type of baz can be unsigned.

Special mentions

1. The power of UB []

extern void this_is_not_directly_called_by_main();

static void (*side_effects)() = 0;

void bar() {
    side_effects = this_is_not_directly_called_by_main;

int main() {

compiles to:

bar:                                    # @bar
main:                                   # @main
        push    rax
        xor     eax, eax
        call    this_is_not_directly_called_by_main
        xor     eax, eax
        pop     rcx

Main directly calls this_is_not_directly_called_by_main in this implementation. This happens because:

  1. LLVM sees that side_effects has only two possible values: NULL (the initial value) or this_is_not_directly_called_by_main (if bar is called)
  2. LLVM sees that side_effects is called, and it is UB to call a null pointer
  3. UB is impossible, so LLVM assumes that bar will have executed by the time main runs rather than face the consequences
  4. Under this assumption, side_effects is always this_is_not_directly_called_by_main.

2. A constant-expression macro that tells you if an expression is an integer constant []

#define ICE_P(x) (sizeof(int) == sizeof(*(1 ? ((void*)((x) * 0l)) : (int*)1)))

int is_a_constant = ICE_P(4);
int is_not_a_constant = ICE_P(is_a_constant);

From Martin Uecker, on the Linux kernel ML. __builtin_constant_p does the same thing on Clang and GCC.

3. Labels inside expression statements in really weird places []

You can make some pretty weird stuff in C, but for a real disaster, you need C++.

class foo {
    int x;


foo::foo() : x(({ a: 4; })) {
    goto a;

Needless to say, statement expressions are not standard C++ (or standard C), but if your compiler has them, chances are that you can use them in really interesting ways.

Copy link

casual-engineer commented Jul 24, 2023

We can also create a main function of type void and forsake the ugly looking return 0 at the end of the code :)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment