joelburton/cdecl.c

## cdecl.c
/* C Declaration explainer.
 *
 * Reads a C declaration from standard input and explains what it means.
 *
 * For example:
 *
 *   $ echo "char *stuff[]" | this_program
 *   stuff is an array of pointer to char
 *
 * Adapted from Peter van der Linden's "Expert C Programming".
 *
 * by Joel Burton (thatboy@husc.harvard.edu), 11/1994
 */


#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>


#define MAX_TOKENS 100
#define MAX_TOKEN_LEN 64


enum type_tag {
    IDENTIFIER,   // variable names
    QUALIFIER,    // const, volatile
    TYPE,         // char, int, etc.
};


struct token {
    char type;   // a type_tag or '*' for pointer
    char string[MAX_TOKEN_LEN];
};

struct token this;

// We'll use a stack to keep track of tokens we've read in and need to return
// to.

int top = -1;    // stack is empty at first
struct token stack[MAX_TOKENS];

#define pop() stack[top--]
#define push(S) stack[++top]=S


/* Figure out the identifier type
 *
 * Looks at the current token ("this") and returns it's classification.
 * */

enum type_tag classify_string(void)
{
    char *s = this.string;

    if (!strcmp(s, "const")) {
        // for const, we replace with "read only"
        strcpy(s, "read-only");
        return QUALIFIER;
    }
    if (!strcmp(s, "volatile")) return QUALIFIER;

    if (!strcmp(s, "void")) return TYPE;
    if (!strcmp(s, "char")) return TYPE;
    if (!strcmp(s, "signed")) return TYPE;
    if (!strcmp(s, "unsigned")) return TYPE;
    if (!strcmp(s, "short")) return TYPE;
    if (!strcmp(s, "int")) return TYPE;
    if (!strcmp(s, "long")) return TYPE;
    if (!strcmp(s, "float")) return TYPE;
    if (!strcmp(s, "double")) return TYPE;
    if (!strcmp(s, "struct")) return TYPE;
    if (!strcmp(s, "union")) return TYPE;
    if (!strcmp(s, "enum")) return TYPE;

    // We can only assume it's an identifier name -- but, of course,
    // it's also possible that it's a typedef'd type, but we've no way
    // of knowing. Darn.
    return IDENTIFIER;
}


/* Read next token into “this”
 *
 * Get from STDIN: skip initial whitespace, reading in the first token we
 * find. This might be:
 *
 * - "*", in which case we've found a pointer (classify it as such)
 * - a word ("abc123") (pass to classifier to classify the word)
 *
 */

void get_token(void)
{
    char *p = this.string;

    // read past any spaces
    while ((*p = (unsigned char) getchar()) == ' ') /* empty */ ;

    if (isalnum(*p)) {
        //  it starts with [A-Za-z0-9] read in word
        while (isalnum(*++p = (unsigned char) getchar()));
        ungetc(*p, stdin);
        *p = '\0';
        this.type = classify_string();
        return;
    }

    if (*p == '*') {
        strcpy(this.string, "pointer to");
        this.type = '*';
        return;
    }

    // If we get here, we found some other punctuation --
    // this is likely to be '(', ')', '[', ']'.
    //
    // Treat it as a token of it's own.
    //
    // If it's not ()[], that suggests a syntax error in our input, but
    // we won't worry about that.

    this.string[1] = '\0';
    this.type = *p;
    return;
}


/* Tokenize until we find the first identifier.
 *
 * The first step in explaining a C declaration is to find the first
 * identifier; this reads tokens, pushing them onto our stack, until we
 * find the first identifier.
 */
void read_to_first_identifier()
{
    get_token();
    while (this.type != IDENTIFIER) {
        push(this);
        get_token();
    }

    // We found it, so start our explanation with the identifier name
    printf("%s is ", this.string);

    // Get the next token so deal_with_declarators has a new "this"
    get_token();
}


/* Deal with arrays.
 *
 * If we set "int my_num[10][20]", this gets called at the first "[".
 * It should say "array 0..10 of array 0..20".
 */

void deal_with_arrays()
{
    while (this.type == '[') {
        printf("array ");

        // Get the next piece (an int for "[10]", a "]" for "[]"
        get_token();

        if (isdigit(this.string[0])) {
            printf("0..%d ", atoi(this.string) - 1);
            get_token();   // read the ']'
        }

        printf("of ");

        // Done, now get the next token
        get_token();
    }
}


/* Deal with functions parameter lists.
 *
 * For functions, we don't do anything with the function parameters--so
 * we just need to read tokens until we get to the closing parenthesis.
 */

void deal_with_function_args()
{
    while (this.type != ')')
        get_token();

    printf("function returning ");

    // Done, now get next token
    get_token();
}


/* When the next thing on our stack is a pointer, pull that pointer off,
 * and say "pointer to "
 */

void deal_with_pointers()
{
    while (stack[top].type == '*')
        printf("%s ", pop().string);
}


/* Deal with declarators.
 *
 * This is called after we find the first identifier; "this" will be the
 * first token afterwards, and our stack will be filled with all the stuff
 * that came before us. It then handles our rules.
 *
 * - first deal with []s and ()s
 * - deal with any pointers on top of the stack
 * - if we're inside (), get outside (& recurse; we're in a new context)
 * - print out qualifiers/types
 */

void deal_with_declarator()
{

    // If our current token is an array or function param list, deal with
    // these -- these functions will get a new token afterwards, so "this"
    // will change.
    switch (this.type) {
        case '[' :
            deal_with_arrays();
            break;
        case '(' :
            deal_with_function_args();
            break;
        default:
            break;
    }

    deal_with_pointers();

    /* Process our stack: pull each item off of stack, and handle:
     *
     * - if it's an '(', our next unread token will be the matching ')' --
     *   read that and discard it, then recurse into dealing with the next
     *   outer layer of declarators
     *
     * - otherwise, it's a qualifier or a type ("const", "int", etc) --
     *   print it
     */

    while (top >= 0) {
        struct token next = pop();

        if (next.type == '(') {
            get_token();     // read ')' and re-process
            deal_with_declarator();
        } else {
            printf("%s ", next.string);
        }
    }
}


/* Main function
 *
 * Find the first identifier, deal recursively with the rest, then exit.
 */

int main(int argc, char *argv[])
{
    // put tokens on stack until we reach identifier
    read_to_first_identifier();

    // Work through ruleset to handle everything before (on stack) and after
    deal_with_declarator();

    printf("\n");
    return 0;
}
	/* C Declaration explainer.
	*
	* Reads a C declaration from standard input and explains what it means.
	*
	* For example:
	*
	* $ echo "char *stuff[]" \| this_program
	* stuff is an array of pointer to char
	*
	* Adapted from Peter van der Linden's "Expert C Programming".
	*
	* by Joel Burton (thatboy@husc.harvard.edu), 11/1994
	*/


	#include <stdio.h>
	#include <string.h>
	#include <ctype.h>
	#include <stdlib.h>


	#define MAX_TOKENS 100
	#define MAX_TOKEN_LEN 64


	enum type_tag {
	IDENTIFIER, // variable names
	QUALIFIER, // const, volatile
	TYPE, // char, int, etc.
	};


	struct token {
	char type; // a type_tag or '*' for pointer
	char string[MAX_TOKEN_LEN];
	};

	struct token this;

	// We'll use a stack to keep track of tokens we've read in and need to return
	// to.

	int top = -1; // stack is empty at first
	struct token stack[MAX_TOKENS];

	#define pop() stack[top--]
	#define push(S) stack[++top]=S


	/* Figure out the identifier type
	*
	* Looks at the current token ("this") and returns it's classification.
	* */

	enum type_tag classify_string(void)
	{
	char *s = this.string;

	if (!strcmp(s, "const")) {
	// for const, we replace with "read only"
	strcpy(s, "read-only");
	return QUALIFIER;
	}
	if (!strcmp(s, "volatile")) return QUALIFIER;

	if (!strcmp(s, "void")) return TYPE;
	if (!strcmp(s, "char")) return TYPE;
	if (!strcmp(s, "signed")) return TYPE;
	if (!strcmp(s, "unsigned")) return TYPE;
	if (!strcmp(s, "short")) return TYPE;
	if (!strcmp(s, "int")) return TYPE;
	if (!strcmp(s, "long")) return TYPE;
	if (!strcmp(s, "float")) return TYPE;
	if (!strcmp(s, "double")) return TYPE;
	if (!strcmp(s, "struct")) return TYPE;
	if (!strcmp(s, "union")) return TYPE;
	if (!strcmp(s, "enum")) return TYPE;

	// We can only assume it's an identifier name -- but, of course,
	// it's also possible that it's a typedef'd type, but we've no way
	// of knowing. Darn.
	return IDENTIFIER;
	}


	/* Read next token into “this”
	*
	* Get from STDIN: skip initial whitespace, reading in the first token we
	* find. This might be:
	*
	* - "*", in which case we've found a pointer (classify it as such)
	* - a word ("abc123") (pass to classifier to classify the word)
	*
	*/

	void get_token(void)
	{
	char *p = this.string;

	// read past any spaces
	while ((p = (unsigned char) getchar()) == ' ') / empty */ ;

	if (isalnum(*p)) {
	// it starts with [A-Za-z0-9] read in word
	while (isalnum(*++p = (unsigned char) getchar()));
	ungetc(*p, stdin);
	*p = '\0';
	this.type = classify_string();
	return;
	}

	if (p == '') {
	strcpy(this.string, "pointer to");
	this.type = '*';
	return;
	}

	// If we get here, we found some other punctuation --
	// this is likely to be '(', ')', '[', ']'.
	//
	// Treat it as a token of it's own.
	//
	// If it's not ()[], that suggests a syntax error in our input, but
	// we won't worry about that.

	this.string[1] = '\0';
	this.type = *p;
	return;
	}


	/* Tokenize until we find the first identifier.
	*
	* The first step in explaining a C declaration is to find the first
	* identifier; this reads tokens, pushing them onto our stack, until we
	* find the first identifier.
	*/
	void read_to_first_identifier()
	{
	get_token();
	while (this.type != IDENTIFIER) {
	push(this);
	get_token();
	}

	// We found it, so start our explanation with the identifier name
	printf("%s is ", this.string);

	// Get the next token so deal_with_declarators has a new "this"
	get_token();
	}


	/* Deal with arrays.
	*
	* If we set "int my_num[10][20]", this gets called at the first "[".
	* It should say "array 0..10 of array 0..20".
	*/

	void deal_with_arrays()
	{
	while (this.type == '[') {
	printf("array ");

	// Get the next piece (an int for "[10]", a "]" for "[]"
	get_token();

	if (isdigit(this.string[0])) {
	printf("0..%d ", atoi(this.string) - 1);
	get_token(); // read the ']'
	}

	printf("of ");

	// Done, now get the next token
	get_token();
	}
	}


	/* Deal with functions parameter lists.
	*
	* For functions, we don't do anything with the function parameters--so
	* we just need to read tokens until we get to the closing parenthesis.
	*/

	void deal_with_function_args()
	{
	while (this.type != ')')
	get_token();

	printf("function returning ");

	// Done, now get next token
	get_token();
	}


	/* When the next thing on our stack is a pointer, pull that pointer off,
	* and say "pointer to "
	*/

	void deal_with_pointers()
	{
	while (stack[top].type == '*')
	printf("%s ", pop().string);
	}


	/* Deal with declarators.
	*
	* This is called after we find the first identifier; "this" will be the
	* first token afterwards, and our stack will be filled with all the stuff
	* that came before us. It then handles our rules.
	*
	* - first deal with []s and ()s
	* - deal with any pointers on top of the stack
	* - if we're inside (), get outside (& recurse; we're in a new context)
	* - print out qualifiers/types
	*/

	void deal_with_declarator()
	{

	// If our current token is an array or function param list, deal with
	// these -- these functions will get a new token afterwards, so "this"
	// will change.
	switch (this.type) {
	case '[' :
	deal_with_arrays();
	break;
	case '(' :
	deal_with_function_args();
	break;
	default:
	break;
	}

	deal_with_pointers();

	/* Process our stack: pull each item off of stack, and handle:
	*
	* - if it's an '(', our next unread token will be the matching ')' --
	* read that and discard it, then recurse into dealing with the next
	* outer layer of declarators
	*
	* - otherwise, it's a qualifier or a type ("const", "int", etc) --
	* print it
	*/

	while (top >= 0) {
	struct token next = pop();

	if (next.type == '(') {
	get_token(); // read ')' and re-process
	deal_with_declarator();
	} else {
	printf("%s ", next.string);
	}
	}
	}


	/* Main function
	*
	* Find the first identifier, deal recursively with the rest, then exit.
	*/

	int main(int argc, char *argv[])
	{
	// put tokens on stack until we reach identifier
	read_to_first_identifier();

	// Work through ruleset to handle everything before (on stack) and after
	deal_with_declarator();

	printf("\n");
	return 0;
	}