Skip to content

Instantly share code, notes, and snippets.

@Oderjunkie
Last active April 25, 2023 13:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Oderjunkie/3a60145808263496eacaf55428c1c4a8 to your computer and use it in GitHub Desktop.
Save Oderjunkie/3a60145808263496eacaf55428c1c4a8 to your computer and use it in GitHub Desktop.
C Preprocessor Abuse

C Preprocessor Abuse

https://img.shields.io/discord/1096149563871613099.png?style=for-the-badge&label=exploiting%20c%20discord https://img.shields.io/badge/min%20C%20version-78-blueviolet.png?style=for-the-badge

https://licensebuttons.net/p/zero/1.0/80x15.png To the extent possible under law, Otesunki has waived all copyright and related or neighboring rights to C Preprocessor Abuse. This work is published from: Saudi Arabia.

Common Misconceptions

“The C Preprocessor Is Text-Based.”

However, CPP operates at a purely lexical [!!] level, transforming input character sequences [?!] directly to output character sequences when preprocessing C/C++ source code (a.k.a. macro expansion).

- Static Validation of C Preprocessor Macros

Preprocessing means a set of low-level textual [?!] conversions on the source; the C and C++ language specification […] it has no connection with the language syntax. These text-based, unstructured transformations are hard to follow.

- Opening Up The C/C++ Preprocessor Black Box

CPP is in fact, not text-based. It does not act upon characters, it acts upon tokens. While this distinction may seem pedantic, it is one of the biggest sources of confusion when understanding complicated macros. for example, one may think that:

#define FOO +
#define BAR +FOO

BAR

…expands to the singular token ++, when in reality, it expands to the two tokens + and +. While standard C uses a token-based preprocessor, Pre-standard C did use a text-based preprocessor, hence why occasionally you may see macros like:

#ifdef __STDC__
#  define PASTE2(x, y) x##y
#else
#  define PASTE2(x, y) x/**/y
#endif

#ifdef __STDC__
#  define STRINGIZE(x) #x
#else
#  define STRINGIZE(x) "x"
#endif

…because pre-standard C, x/**/y did actually expand to xy, and, sometimes, =”x”= expanded to x surrounded by quotes.

“The typeof / _Generic Macro…”

typeof (and, similarly, _Generic) are not macros, nor are they part of the preprocessor at all. This is why code such as:

#define AUTO_TYPE(expr) CAT2(foo_, typeof(expr))
#define CAT2(x, y) PASTE2(x, y)
#define PASTE2(x, y) x##y

enum { foo_int = 9; }

  int x;
printf("%d\n", MACRO(x));

…doesn’t print 9, but causes a linker error claiming that foo_typeof is undefined– the code was expanded to foo_typeof(x).

Similarly,

   #define PRINTF_SPECIFIER(expr)			\
     _Generic((expr),				\
		int: "%d",				\
		float: "%g",				\
		default: ""				\
		)

   int x;
   printf("x: " MY_SUPER_STANDARD_MACRO(x) "\n", x);

…does not expand to printf("x: %d\n", x); as one might hope, but instead causes a syntax error. The Preprocessor concatenates adjacent strings, but _Generic runs after adjacent string concatenation was already done.

For reference, the compiler processes code in this order:

  1. Character set normalization. – Preprocessor, ex. converting CRLF to just LF
  2. Trigraph substitution. – Preprocessor, ex. ??! to \
  3. Logical line conversion. – Preprocessor, what \ at the end of a line does
  4. Preprocessor tokenization. – Preprocessor, Specifically, the file is tokenized into comments, whitespace, and misc tokens.
  5. Comment Elimintation. – Preprocessor, converts comments into a singular space character since C89, and not an empty string.
  6. Preprocessor directive parsing. – Preprocessor, All #include s are substituted for the file’s contents, and critically: all #define s are parsed and removed at this step.
  7. Macro expansion. – Preprocessor, macros can’t emit preprocessor directives because they were already parsed in the last step, and won’t be parsed ever again.
  8. Backslash escaping..
  9. _Pragma
  10. Adjacent string literal concatenation.
  11. Parsing.
  12. Compiling. – This is where typeof and _Generic are substituted.
  13. Linking.

Compile-Time Checks

Compile-Time Assertions

Assert that cond is true.

/* C78|C++98> UB if =cond= is -1 and not a compile-time constant (VLA) */
char error_message[cond ? 1 : -1];

/* C78|C++98> eliminates the UB above */
static char error_message[cond ? 1 : -1];

/* C89|C++98> expression version of the above */
(void) sizeof(struct { char error_message[cond ? 1 : -1] });

/* C++11> */
static_assert(cond, "error message");

/* C11> */
_Static_assert(cond, "error message");

/* C11> */
#include <assert.h>
static_assert(cond, "error message");

/* C23> */
static_assert(cond, "error message");

/* CL/MSVC C78|C++98> forces CL.EXE to produce an error including the */
/* error message [https://stackoverflow.com/a/4815532]                */
typedef struct { int error_message : !!(cond); } error_message;

/* C89> from glibc's hack for _Static_assert pre-C11 */
extern int (*__Static_assert_function (void))
[!!sizeof (struct { int error_message: expr ? 2 : -1; })]

/* C78> altered version of the above */
extern int (*__Static_assert_function ())
  [!!sizeof (struct { int error_message: expr ? 2 : -1; })]

Compile-Time Type Checks

(Check if | Assert that) expr is of type T.

/* ASSERT C78|C++98> requires =expr= to be an lvalue         */
/* may not result in a compiler error but a compiler warning */
(*(0 ? (T *) NULL : &expr))

/* ASSERT C99> breaks if =expr= and =T= are identically sized */
/* integers that only differ in signedness                    */
(0 ? (T) {0} : expr)

/* CHECK GNU89> */
__builtin_types_compatible_p(typeof(expr), T);

/* CHECK C11> */
_Generic((expr), T: 1, default: 0)

Compile-Time Constant Checks

(Check if | Assert that) expr is a compile-time constant.

/* ASSERT C78|C++98> only for nonnegative integers */
sizeof(sizeof((struct { char c[expr]; }) {0}.c))

/* CHECK GNU89> */
__builtin_constant_p(c)

Literal Checks

(Check if | Assert that) expr is a literal.

Common Macros

Zero Argument Check

Check if the call to IS_EMPTY contains 0 arguments. The idea of passing 0 arguments to a macro that takes a variable number of arguments being UB is a misconception.

(including those arguments consisting of no preprocessing tokens) […] There shall be more arguments in the invocation than there are parameters in the macro definition (excluding the ...). There shall exist a ) preprocessing token that terminates the invocation.

- ISO 9899:1999, paragrpah 6.10.3, bulletpoint 4.

In other words, given #define MACRO(...), MACRO() is a perfectly valid invocation. but given #define MACRO2(foo, ...), MACRO2(foo) is not valid, and should instead be MACRO2(foo, ).

   /* C99|C++11> */
   /* [https://gustedt.wordpress.com/2010/06/08/detect-empty-macro-arguments] */
   #define PP_NARG_1(...)				\
     PP_NARG_2(__VA_ARGS__,PP_RSEQ_N())
   #define PP_NARG_2(...)				\
     PP_ARG_N(__VA_ARGS__)
   #define PP_ARG_N(_1, _2, _3, _4, _5, _6, _7, _8, _9,_10,	\
		      _11,_12,_13,_14,_15,_16,_17,_18,_19,_20,	\
		      _21,_22,_23,_24,_25,_26,_27,_28,_29,_30,	\
		      _31,_32,_33,_34,_35,_36,_37,_38,_39,_40,	\
		      _41,_42,_43,_44,_45,_46,_47,_48,_49,_50,	\
		      _51,_52,_53,_54,_55,_56,_57,_58,_59,_60,	\
		      _61,_62,_63,N,...) N
   #define PP_RSEQ_N()				\
     63,62,61,60,					\
	 59,58,57,56,55,54,53,52,51,50,		\
	 49,48,47,46,45,44,43,42,41,40,		\
	 39,38,37,36,35,34,33,32,31,30,		\
	 29,28,27,26,25,24,23,22,21,20,		\
	 19,18,17,16,15,14,13,12,11,10,		\
	 9,8,7,6,5,4,3,2,1,0
   #define HAS_COMMA_RSEQ_N()			\
     1,1,1,1,					\
	 1,1,1,1,1,1,1,1,1,1,			\
	 1,1,1,1,1,1,1,1,1,1,			\
	 1,1,1,1,1,1,1,1,1,1,			\
	 1,1,1,1,1,1,1,1,1,1,			\
	 1,1,1,1,1,1,1,1,1,1,			\
	 1,1,1,1,1,1,1,1,0
   #define HAS_COMMA(...) PP_NARG_1(__VA_ARGS__, HAS_COMMA_RSEQ_N())
   #define TRIGGER_PARENTHESIS(...) ,
   #define IS_EMPTY(...)							\
     ISEMPTY_SWITCH(							\
		      0,							\
		      1,							\
		      HAS_COMMA(__VA_ARGS__),				\
		      HAS_COMMA(TRIGGER_PARENTHESIS __VA_ARGS__),		\
		      HAS_COMMA(__VA_ARGS__ (/**/)),				\
		      HAS_COMMA(TRIGGER_PARENTHESIS __VA_ARGS__ (/**/)))
   #define IS_EMPTY_CASE_0000(YES, NO) NO
   #define IS_EMPTY_CASE_0001(YES, NO) YES
   #define IS_EMPTY_CASE_0010(YES, NO) NO
   #define IS_EMPTY_CASE_0011(YES, NO) NO
   #define IS_EMPTY_CASE_0100(YES, NO) NO
   #define IS_EMPTY_CASE_0101(YES, NO) NO
   #define IS_EMPTY_CASE_0110(YES, NO) NO
   #define IS_EMPTY_CASE_0111(YES, NO) NO
   #define IS_EMPTY_CASE_1001(YES, NO) NO
   #define IS_EMPTY_CASE_1010(YES, NO) NO
   #define IS_EMPTY_CASE_1011(YES, NO) NO
   #define IS_EMPTY_CASE_1100(YES, NO) NO
   #define IS_EMPTY_CASE_1101(YES, NO) NO
   #define IS_EMPTY_CASE_1110(YES, NO) NO
   #define IS_EMPTY_CASE_1111(YES, NO) NO
   #define PASTE5(_0, _1, _2, _3, _4) _0 ## _1 ## _2 ## _3 ## _4
   #define ISEMPTY_SWITCH(YES, NO, _0, _1, _2, _3)		\
     PASTE5(_IS_EMPTY_CASE_, _0, _1, _2, _3)(YES, NO)

Counting Arguments

Count the number of arguments passed to PP_NARG.

   /* C++11> compile time constant, not preprocessor time constant */
   std::tuple_size<decltype(std::make_tuple(__VA_ARGS__))>::value

   /* C99|C++11> breaks when passed no arguments */
   /* [https://groups.google.com/g/comp.std.c/c/d-6Mj5Lko_s/m/5R6bMWTEbzQJ] */
   #define PP_NARG(...)				\
     PP_NARG_1(__VA_ARGS__,PP_RSEQ_N())
   #define PP_NARG_1(...)				\
     PP_ARG_N(__VA_ARGS__)
   #define PP_ARG_N(_1, _2, _3, _4, _5, _6, _7, _8, _9,_10,	\
		      _11,_12,_13,_14,_15,_16,_17,_18,_19,_20,	\
		      _21,_22,_23,_24,_25,_26,_27,_28,_29,_30,	\
		      _31,_32,_33,_34,_35,_36,_37,_38,_39,_40,	\
		      _41,_42,_43,_44,_45,_46,_47,_48,_49,_50,	\
		      _51,_52,_53,_54,_55,_56,_57,_58,_59,_60,	\
		      _61,_62,_63,N,...) N
   #define PP_RSEQ_N()				\
     63,62,61,60,					\
	 59,58,57,56,55,54,53,52,51,50,		\
	 49,48,47,46,45,44,43,42,41,40,		\
	 39,38,37,36,35,34,33,32,31,30,		\
	 29,28,27,26,25,24,23,22,21,20,		\
	 19,18,17,16,15,14,13,12,11,10,		\
	 9,8,7,6,5,4,3,2,1,0

   /* C99|C++11> fix for the above, breaks when passed a last argument that */
   /* is a function-like macro accepting at least 2 arguments */
   /* [https://gustedt.wordpress.com/2010/06/08/detect-empty-macro-arguments] */
   #define PP_NARG_1(...)				\
     PP_NARG_2(__VA_ARGS__,PP_RSEQ_N())
   #define PP_NARG_2(...)				\
     PP_ARG_N(__VA_ARGS__)
   #define PP_ARG_N(_1, _2, _3, _4, _5, _6, _7, _8, _9,_10,	\
		      _11,_12,_13,_14,_15,_16,_17,_18,_19,_20,	\
		      _21,_22,_23,_24,_25,_26,_27,_28,_29,_30,	\
		      _31,_32,_33,_34,_35,_36,_37,_38,_39,_40,	\
		      _41,_42,_43,_44,_45,_46,_47,_48,_49,_50,	\
		      _51,_52,_53,_54,_55,_56,_57,_58,_59,_60,	\
		      _61,_62,_63,N,...) N
   #define PP_RSEQ_N()				\
     63,62,61,60,					\
	 59,58,57,56,55,54,53,52,51,50,		\
	 49,48,47,46,45,44,43,42,41,40,		\
	 39,38,37,36,35,34,33,32,31,30,		\
	 29,28,27,26,25,24,23,22,21,20,		\
	 19,18,17,16,15,14,13,12,11,10,		\
	 9,8,7,6,5,4,3,2,1,0
   #define HAS_COMMA_RSEQ_N()			\
     1,1,1,1,					\
	 1,1,1,1,1,1,1,1,1,1,			\
	 1,1,1,1,1,1,1,1,1,1,			\
	 1,1,1,1,1,1,1,1,1,1,			\
	 1,1,1,1,1,1,1,1,1,1,			\
	 1,1,1,1,1,1,1,1,1,1,			\
	 1,1,1,1,1,1,1,1,0
   #define HAS_COMMA(...) PP_NARG_1(__VA_ARGS__, HAS_COMMA_RSEQ_N())
   #define TRIGGER_PARENTHESIS(...) ,
   #define PP_NARG(...)							\
     ISEMPTY_SWITCH(							\
		      0,							\
		      PP_NARG_1(__VA_ARGS__),				\
		      HAS_COMMA(__VA_ARGS__),				\
		      HAS_COMMA(TRIGGER_PARENTHESIS __VA_ARGS__),		\
		      HAS_COMMA(__VA_ARGS__ (/**/)),				\
		      HAS_COMMA(TRIGGER_PARENTHESIS __VA_ARGS__ (/**/)))
   #define IS_EMPTY_CASE_0000(YES, NO) NO
   #define IS_EMPTY_CASE_0001(YES, NO) YES
   #define IS_EMPTY_CASE_0010(YES, NO) NO
   #define IS_EMPTY_CASE_0011(YES, NO) NO
   #define IS_EMPTY_CASE_0100(YES, NO) NO
   #define IS_EMPTY_CASE_0101(YES, NO) NO
   #define IS_EMPTY_CASE_0110(YES, NO) NO
   #define IS_EMPTY_CASE_0111(YES, NO) NO
   #define IS_EMPTY_CASE_1001(YES, NO) NO
   #define IS_EMPTY_CASE_1010(YES, NO) NO
   #define IS_EMPTY_CASE_1011(YES, NO) NO
   #define IS_EMPTY_CASE_1100(YES, NO) NO
   #define IS_EMPTY_CASE_1101(YES, NO) NO
   #define IS_EMPTY_CASE_1110(YES, NO) NO
   #define IS_EMPTY_CASE_1111(YES, NO) NO
   #define PASTE5(_0, _1, _2, _3, _4) _0 ## _1 ## _2 ## _3 ## _4
   #define ISEMPTY_SWITCH(YES, NO, _0, _1, _2, _3)		\
     PASTE5(IS_EMPTY_CASE_, _0, _1, _2, _3)(YES, NO)

PROBE & CHECK

CHECK(PROBE(~)) should expand to 1, and all other CHECK(...) s should expand to 0 . On the reason why ~ in particular is used:

The choice of ~ as a first argument is essentially arbitrary (since SECOND will always cause it to disappear). However this particular character is a popular convention since if a bug in your macros results in one sneaking out into the final expansion it frequently results in a syntax error in the compiler alerting you to the problem.

- C Pre-Processor Magic

/* C99|C++11> [https://github.com/pfultz2/Cloak/wiki/C-Preprocessor-tricks,-tips,-and-idioms#detection] */
#define CHECK_N(x, n, ...) n
#define CHECK(...) CHECK_N(__VA_ARGS__, 0,)
#define PROBE(x) x, 1,s

Macros Consisting Of Multiple Operations

Perform multiple operations in one macro.

   /* C99|C++98> breaks when used twice [multiple declarations] */
   #define swap(x, y)				\
     char buffer[sizeof(0 ? (x) : (y))];		\
     void *x_ = &(x), *y_ = &(y);			\
     memcpy(&buffer, x_);				\
     memcpy(x_, y_);				\
     memcpy(y_, &buffer);

   /* C78|C++98> breaks in =if (cond) swap(x, y); else { ... }= */
   #define swap(x, y) {				\
	char buffer[sizeof(0 ? (x) : (y))];		\
	void *x_ = &(x), *y_ = &(y);		\
	memcpy(&buffer, x_);			\
	memcpy(x_, y_);				\
	memcpy(y_, &buffer);			\
     }

   /* C78|C++98> breaks in =return cond > swap(x, y) : y;= */
   #define swap(x, y) do {				\
	char buffer[sizeof(0 ? (x) : (y))];		\
	void *x_ = &(x), *y_ = &(y);		\
	memcpy(&buffer, x_);			\
	memcpy(x_, y_);				\
	memcpy(y_, &buffer);			\
     } while (0)

   /* C99> more efficient, as it allocates on the stack */
   #define swap(x, y)						\
     _swapf(&(x), &(y), (char [sizeof(0 ? (x) : (y))]) { 0 },	\
	     sizeof(0 ? (x) : (y)))

   static inline void _swapf(void *left, void *right, void *tmp, size_t len) {
     memcpy(tmp, left, len);
     memcpy(left, right, len);
     memcpy(right, tmp, len);
   }

   /* C89> requires VLAs. */
   #define swap(x, y)				\
     _swapf(&(x), &(y), sizeof(0 ? (x) : (y)))

   static void _swapf(void *left, void *right, size_t len) {
     char tmp[len];
     memcpy(tmp, left, len);
     memcpy(left, right, len);
     memcpy(right, tmp, len);
   }

   /* C78> allocates on the heap. */
   #define swap(x, y)				\
     _swapf(&(x), &(y), sizeof(0 ? (x) : (y)))

   static int _swapf(void *left, void *right, size_t len) {
     void *tmp = calloc(1, len);
     memcpy(tmp, left, len);
     memcpy(left, right, len);
     memcpy(right, tmp, len);
     free(tmp);
     return(0);
   }

   /* GNU89> */
   #define swap(x, y) ({				\
	  typeof(y) *x_ = &(x);			\
	  typeof(x) *y_ = &(y);			\
	  typeof(x) tmp = *x_;			\
	  *x_ = *y_; *y_ = tmp;			\
	  tmp;					\
	})

   /* C78> does not function as expected, =break= is a no-op */
   #define macro(foo) do {				\
	if (condition)				\
	  break;					\
     } while (0)

   /* C78> breaks in =return cond > macro(foo) : foo;=, may cause warnings */
   #define macro(foo) if (1) {			\
	if (condition)				\
	  break;					\
     } else do ; while (0)

   /* C89> breaks in =return cond > macro(foo) : foo;= */
   #define macro(foo) if (1) {			\
	if (condition)				\
	  break;					\
     } else (void) 0

   /* C78> breaks in =return cond > debug(("foo: %d\n", foo)) : foo;= */
   #define debug(args) do { printf("debug: "); printf args; } while (0)

   /* C78> */
   #define debug(args) (printf("debug: "), printf args)

See Also

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment