Skip to content

Instantly share code, notes, and snippets.

@rui314
Last active Aug 14, 2019
Embed
What would you like to do?
// Copyright (C) 2019 Rui Ueyama
// Licensed under the MIT license
//
// This command parses a C declaration. Here are a few examples:
//
// $ ./cdecl 'int x'
// x: int
//
// $ ./cdecl 'int **const *x'
// x: pointer to const pointer to pointer to int
//
// $ ./cdecl 'int (*x)()'
// x: pointer to function returning int
//
// $ ./cdecl 'int long long signed signed typedef const x'
// x: typedef const longlong
//
// $ ./cdecl 'int ((*const x)[])()'
// x: const pointer to array of function returning int
//
//
// To build this file, just run "gcc -o cdecl cdecl.c".
#define _GNU_SOURCE 1
#include <stdarg.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdnoreturn.h>
#include <string.h>
noreturn void error(char *fmt, ...) {
va_list ap;
va_start(ap, fmt);
vfprintf(stderr, fmt, ap);
fprintf(stderr, "\n");
exit(1);
}
// We tokenize an input string into a vector of tokens.
static char *tokens[100];
// The current position in `tokens`.
static int pos = 0;
// Skip a token if the current token is the same as a given one.
static bool consume(char *tok) {
if (!tokens[pos] || strcmp(tokens[pos], tok) != 0)
return false;
pos++;
return true;
}
// Ensure that the current token is a given one.
static void expect(char *tok) {
if (!tokens[pos] || strcmp(tokens[pos], tok) != 0)
error("%s expected\n", tok);
pos++;
}
enum BaseType {
TY_VOID = 1,
TY_BOOL,
TY_CHAR,
TY_SHORT,
TY_INT,
TY_LONG,
TY_LLONG,
TY_FLOAT,
TY_DOUBLE,
TY_LDOUBLE,
TY_ARRAY,
TY_PTR,
TY_FUNCTION,
};
// Represents a C type.
typedef struct Type {
enum BaseType ty;
bool is_signed;
struct Type *ptr_of;
struct Type *array_of;
struct Type *return_type;
bool is_typedef;
bool is_extern;
bool is_static;
bool is_const;
} Type;
static Type *read_struct() { return NULL; }
static Type *read_union() { return NULL; }
static Type *read_enum() { return NULL; }
// Read a valid type specifier, e.g.
//
// int
// typedef const int
// long const int long typedef
static Type *read_type_specifier() {
Type *ty = calloc(1, sizeof(Type));
enum {
SIGNED = 1,
UNSIGNED = 1 << 2,
VOID = 1 << 4,
BOOL = 1 << 6,
CHAR = 1 << 8,
SHORT = 1 << 10,
INT = 1 << 12,
LONG = 1 << 14,
FLOAT = 1 << 16,
DOUBLE = 1 << 18,
};
int base_type = 0;
Type *user_type = NULL;
bool is_typedef = false;
bool is_extern = false;
bool is_static = false;
bool is_const = false;
for (;;) {
// Read one token at a time.
if (consume("typedef"))
is_typedef = true;
else if (consume("extern"))
is_extern = true;
else if (consume("static"))
is_static = true;
else if (consume("const"))
is_const = true;
else if (consume("signed"))
base_type |= SIGNED;
else if (consume("unsigned"))
base_type |= UNSIGNED;
else if (consume("void"))
base_type += VOID;
else if (consume("bool"))
base_type += BOOL;
else if (consume("char"))
base_type += CHAR;
else if (consume("short"))
base_type += SHORT;
else if (consume("int"))
base_type += INT;
else if (consume("long"))
base_type += LONG;
else if (consume("float"))
base_type += FLOAT;
else if (consume("double"))
base_type += DOUBLE;
else if (consume("struct"))
user_type = read_struct();
else if (consume("union"))
user_type = read_union();
else if (consume("enum"))
user_type = read_enum();
else
break;
if (is_extern && is_static)
error("extern and static may not be used together");
// Something like `int struct foo x` is an error.
if (base_type && user_type)
error("invalid type");
// The valid combinations of type specifiers are irregular. For
// example, `long int` and `short int` are allowed and interpreted
// as `long` and `short`, but `char int` is not `char` but just
// invalid. We simply enumerate all valid combinations in this
// switch.
switch (base_type) {
case VOID:
*ty = (Type){TY_VOID};
break;
case BOOL:
*ty = (Type){TY_BOOL};
break;
case SIGNED + CHAR:
*ty = (Type){TY_CHAR, true};
break;
case CHAR:
case UNSIGNED + CHAR:
*ty = (Type){TY_CHAR};
break;
case SHORT:
case SHORT + INT:
case SIGNED + SHORT:
case SIGNED + SHORT + INT:
*ty = (Type){TY_SHORT, true};
break;
case UNSIGNED + SHORT:
case UNSIGNED + SHORT + INT:
*ty = (Type){TY_SHORT};
break;
case INT:
case SIGNED:
case SIGNED + INT:
*ty = (Type){TY_INT, true};
break;
case UNSIGNED:
case UNSIGNED + INT:
*ty = (Type){TY_INT};
break;
case LONG:
case LONG + INT:
case SIGNED + LONG:
case SIGNED + LONG + INT:
*ty = (Type){TY_LONG, true};
break;
case UNSIGNED + LONG:
case UNSIGNED + LONG + INT:
*ty = (Type){TY_LONG};
break;
case LONG + LONG:
case LONG + LONG + INT:
case SIGNED + LONG + LONG:
case SIGNED + LONG + LONG + INT:
*ty = (Type){TY_LLONG, true};
break;
case UNSIGNED + LONG + LONG:
case UNSIGNED + LONG + LONG + INT:
*ty = (Type){TY_LLONG};
break;
case FLOAT:
*ty = (Type){TY_FLOAT};
break;
case DOUBLE:
*ty = (Type){TY_DOUBLE};
break;
case LONG + DOUBLE:
*ty = (Type){TY_LDOUBLE};
break;
case 0:
// If there's no type specifier, it becomes `int`.
// For example, x in `const x` is `int`.
*ty = user_type ? *user_type : (Type){TY_INT};
break;
default:
error("invalid type");
}
}
ty->is_typedef = is_typedef;
ty->is_extern = is_extern;
ty->is_static = is_static;
ty->is_const = is_const;
return ty;
}
static Type *read_declarator(Type *ty, char **name);
// Read [] for an array size or () for function parameters.
static Type *read_direct_declarator_tail(Type *ty) {
for (;;) {
if (consume("(")) {
expect(")");
Type *new_ty = calloc(1, sizeof(Type));
new_ty->ty = TY_FUNCTION;
new_ty->return_type = ty;
ty = new_ty;
continue;
}
if (consume("[")) {
expect("]");
Type *new_ty = calloc(1, sizeof(Type));
new_ty->ty = TY_ARRAY;
new_ty->array_of = ty;
ty = new_ty;
continue;
}
return ty;
}
}
// Returns true if `s` seems like a part of a type name.
static bool is_reserved(char *s) {
return strcmp("typedef", s) == 0 || strcmp("extern", s) == 0 ||
strcmp("static", s) == 0 || strcmp("thread_local", s) == 0 ||
strcmp("const", s) == 0 || strcmp("noreturn", s) == 0 ||
strcmp("signed", s) == 0 || strcmp("unsigned", s) == 0 ||
strcmp("void", s) == 0 || strcmp("bool", s) == 0 ||
strcmp("char", s) == 0 || strcmp("short", s) == 0 ||
strcmp("int", s) == 0 || strcmp("long", s) == 0 ||
strcmp("float", s) == 0 || strcmp("double", s) == 0 ||
strcmp("struct", s) == 0 || strcmp("union", s) == 0 ||
strcmp("enum", s) == 0;
}
// Read an identifier (if exists) and following [] or (), e.g.
//
// x[] (x is an array of ...)
// (*x)() (x is a pointer to a function returning ...)
static Type *read_direct_declarator(Type *ty, char **name) {
if (consume("(")) {
Type *placeholder = calloc(1, sizeof(Type));
Type *new_ty = read_declarator(placeholder, name);
expect(")");
*placeholder = *read_direct_declarator_tail(ty);
return new_ty;
}
if (!is_reserved(tokens[pos]))
*name = tokens[pos++];
// Read following optional [] or ().
return read_direct_declarator_tail(ty);
}
// Read optional '*' for pointers, e.g.
//
// *
// * const *
// ***
static Type *read_declarator(Type *ty, char **name) {
while (consume("*")) {
Type *new_ty = calloc(1, sizeof(Type));
new_ty->ty = TY_PTR;
new_ty->ptr_of = ty;
while (consume("const"))
new_ty->is_const = true;
ty = new_ty;
}
return read_direct_declarator(ty, name);
}
// Print out a given type to stdou.
void print_type(Type *ty) {
if (ty->is_typedef)
printf(" typedef");
if (ty->is_extern)
printf(" extern");
if (ty->is_static)
printf(" static");
if (ty->is_const)
printf(" const");
if (ty->ty == TY_VOID)
printf(" void");
else if (ty->ty == TY_BOOL)
printf(" bool");
else if (ty->ty == TY_CHAR)
printf(ty->is_signed ? " char" : " uchar");
else if (ty->ty == TY_SHORT)
printf(ty->is_signed ? " short" : " ushort");
else if (ty->ty == TY_INT)
printf(ty->is_signed ? " int" : " uint");
else if (ty->ty == TY_LONG)
printf(ty->is_signed ? " long" : " ulong");
else if (ty->ty == TY_LLONG)
printf(ty->is_signed ? " longlong" : " ulonglong");
else if (ty->ty == TY_FLOAT)
printf(" float");
else if (ty->ty == TY_DOUBLE)
printf(" double");
else if (ty->ty == TY_ARRAY)
printf(" array of");
else if (ty->ty == TY_PTR)
printf(" pointer to");
else if (ty->ty == TY_FUNCTION)
printf(" function returning");
if (ty->ptr_of)
print_type(ty->ptr_of);
if (ty->array_of)
print_type(ty->array_of);
if (ty->return_type)
print_type(ty->return_type);
}
static bool is_alnum(char c) {
return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') ||
('0' <= c && c <= '9') || (c == '_');
}
int main(int argc, char **argv) {
if (argc != 2)
error("Usage: %s declaration\nExample: %s 'int const (*x)[][]", argv[0]);
// Tokenize argv[1].
int i = 0;
for (char *p = argv[1]; *p;) {
if (*p == ' ') {
p++;
continue;
}
if (is_alnum(*p)) {
char *q = p + 1;
while (is_alnum(*q))
q++;
tokens[i++] = strndup(p, q - p);
p = q;
continue;
}
tokens[i++] = strndup(p, 1);
p++;
}
// Parse the input.
Type *ty = read_type_specifier();
char *name = NULL;
ty = read_declarator(ty, &name);
// Print it out.
printf("%s:", name);
print_type(ty);
printf("\n");
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment