Skip to content

Instantly share code, notes, and snippets.

@joelburton
Created July 16, 2015 08:53
Show Gist options
  • Save joelburton/5367bc753bbbe6638f6e to your computer and use it in GitHub Desktop.
Save joelburton/5367bc753bbbe6638f6e to your computer and use it in GitHub Desktop.
C Declaration Explainer
/* C Declaration explainer.
*
* Reads a C declaration from standard input and explains what it means.
*
* For example:
*
* $ echo "char *stuff[]" | this_program
* stuff is an array of pointer to char
*
* Adapted from Peter van der Linden's "Expert C Programming".
*
* by Joel Burton (thatboy@husc.harvard.edu), 11/1994
*/
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#define MAX_TOKENS 100
#define MAX_TOKEN_LEN 64
enum type_tag {
IDENTIFIER, // variable names
QUALIFIER, // const, volatile
TYPE, // char, int, etc.
};
struct token {
char type; // a type_tag or '*' for pointer
char string[MAX_TOKEN_LEN];
};
struct token this;
// We'll use a stack to keep track of tokens we've read in and need to return
// to.
int top = -1; // stack is empty at first
struct token stack[MAX_TOKENS];
#define pop() stack[top--]
#define push(S) stack[++top]=S
/* Figure out the identifier type
*
* Looks at the current token ("this") and returns it's classification.
* */
enum type_tag classify_string(void)
{
char *s = this.string;
if (!strcmp(s, "const")) {
// for const, we replace with "read only"
strcpy(s, "read-only");
return QUALIFIER;
}
if (!strcmp(s, "volatile")) return QUALIFIER;
if (!strcmp(s, "void")) return TYPE;
if (!strcmp(s, "char")) return TYPE;
if (!strcmp(s, "signed")) return TYPE;
if (!strcmp(s, "unsigned")) return TYPE;
if (!strcmp(s, "short")) return TYPE;
if (!strcmp(s, "int")) return TYPE;
if (!strcmp(s, "long")) return TYPE;
if (!strcmp(s, "float")) return TYPE;
if (!strcmp(s, "double")) return TYPE;
if (!strcmp(s, "struct")) return TYPE;
if (!strcmp(s, "union")) return TYPE;
if (!strcmp(s, "enum")) return TYPE;
// We can only assume it's an identifier name -- but, of course,
// it's also possible that it's a typedef'd type, but we've no way
// of knowing. Darn.
return IDENTIFIER;
}
/* Read next token into “this”
*
* Get from STDIN: skip initial whitespace, reading in the first token we
* find. This might be:
*
* - "*", in which case we've found a pointer (classify it as such)
* - a word ("abc123") (pass to classifier to classify the word)
*
*/
void get_token(void)
{
char *p = this.string;
// read past any spaces
while ((*p = (unsigned char) getchar()) == ' ') /* empty */ ;
if (isalnum(*p)) {
// it starts with [A-Za-z0-9] read in word
while (isalnum(*++p = (unsigned char) getchar()));
ungetc(*p, stdin);
*p = '\0';
this.type = classify_string();
return;
}
if (*p == '*') {
strcpy(this.string, "pointer to");
this.type = '*';
return;
}
// If we get here, we found some other punctuation --
// this is likely to be '(', ')', '[', ']'.
//
// Treat it as a token of it's own.
//
// If it's not ()[], that suggests a syntax error in our input, but
// we won't worry about that.
this.string[1] = '\0';
this.type = *p;
return;
}
/* Tokenize until we find the first identifier.
*
* The first step in explaining a C declaration is to find the first
* identifier; this reads tokens, pushing them onto our stack, until we
* find the first identifier.
*/
void read_to_first_identifier()
{
get_token();
while (this.type != IDENTIFIER) {
push(this);
get_token();
}
// We found it, so start our explanation with the identifier name
printf("%s is ", this.string);
// Get the next token so deal_with_declarators has a new "this"
get_token();
}
/* Deal with arrays.
*
* If we set "int my_num[10][20]", this gets called at the first "[".
* It should say "array 0..10 of array 0..20".
*/
void deal_with_arrays()
{
while (this.type == '[') {
printf("array ");
// Get the next piece (an int for "[10]", a "]" for "[]"
get_token();
if (isdigit(this.string[0])) {
printf("0..%d ", atoi(this.string) - 1);
get_token(); // read the ']'
}
printf("of ");
// Done, now get the next token
get_token();
}
}
/* Deal with functions parameter lists.
*
* For functions, we don't do anything with the function parameters--so
* we just need to read tokens until we get to the closing parenthesis.
*/
void deal_with_function_args()
{
while (this.type != ')')
get_token();
printf("function returning ");
// Done, now get next token
get_token();
}
/* When the next thing on our stack is a pointer, pull that pointer off,
* and say "pointer to "
*/
void deal_with_pointers()
{
while (stack[top].type == '*')
printf("%s ", pop().string);
}
/* Deal with declarators.
*
* This is called after we find the first identifier; "this" will be the
* first token afterwards, and our stack will be filled with all the stuff
* that came before us. It then handles our rules.
*
* - first deal with []s and ()s
* - deal with any pointers on top of the stack
* - if we're inside (), get outside (& recurse; we're in a new context)
* - print out qualifiers/types
*/
void deal_with_declarator()
{
// If our current token is an array or function param list, deal with
// these -- these functions will get a new token afterwards, so "this"
// will change.
switch (this.type) {
case '[' :
deal_with_arrays();
break;
case '(' :
deal_with_function_args();
break;
default:
break;
}
deal_with_pointers();
/* Process our stack: pull each item off of stack, and handle:
*
* - if it's an '(', our next unread token will be the matching ')' --
* read that and discard it, then recurse into dealing with the next
* outer layer of declarators
*
* - otherwise, it's a qualifier or a type ("const", "int", etc) --
* print it
*/
while (top >= 0) {
struct token next = pop();
if (next.type == '(') {
get_token(); // read ')' and re-process
deal_with_declarator();
} else {
printf("%s ", next.string);
}
}
}
/* Main function
*
* Find the first identifier, deal recursively with the rest, then exit.
*/
int main(int argc, char *argv[])
{
// put tokens on stack until we reach identifier
read_to_first_identifier();
// Work through ruleset to handle everything before (on stack) and after
deal_with_declarator();
printf("\n");
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment