Created
July 16, 2015 08:53
-
-
Save joelburton/5367bc753bbbe6638f6e to your computer and use it in GitHub Desktop.
C Declaration Explainer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* C Declaration explainer. | |
* | |
* Reads a C declaration from standard input and explains what it means. | |
* | |
* For example: | |
* | |
* $ echo "char *stuff[]" | this_program | |
* stuff is an array of pointer to char | |
* | |
* Adapted from Peter van der Linden's "Expert C Programming". | |
* | |
* by Joel Burton (thatboy@husc.harvard.edu), 11/1994 | |
*/ | |
#include <stdio.h> | |
#include <string.h> | |
#include <ctype.h> | |
#include <stdlib.h> | |
#define MAX_TOKENS 100 | |
#define MAX_TOKEN_LEN 64 | |
enum type_tag { | |
IDENTIFIER, // variable names | |
QUALIFIER, // const, volatile | |
TYPE, // char, int, etc. | |
}; | |
struct token { | |
char type; // a type_tag or '*' for pointer | |
char string[MAX_TOKEN_LEN]; | |
}; | |
struct token this; | |
// We'll use a stack to keep track of tokens we've read in and need to return | |
// to. | |
int top = -1; // stack is empty at first | |
struct token stack[MAX_TOKENS]; | |
#define pop() stack[top--] | |
#define push(S) stack[++top]=S | |
/* Figure out the identifier type | |
* | |
* Looks at the current token ("this") and returns it's classification. | |
* */ | |
enum type_tag classify_string(void) | |
{ | |
char *s = this.string; | |
if (!strcmp(s, "const")) { | |
// for const, we replace with "read only" | |
strcpy(s, "read-only"); | |
return QUALIFIER; | |
} | |
if (!strcmp(s, "volatile")) return QUALIFIER; | |
if (!strcmp(s, "void")) return TYPE; | |
if (!strcmp(s, "char")) return TYPE; | |
if (!strcmp(s, "signed")) return TYPE; | |
if (!strcmp(s, "unsigned")) return TYPE; | |
if (!strcmp(s, "short")) return TYPE; | |
if (!strcmp(s, "int")) return TYPE; | |
if (!strcmp(s, "long")) return TYPE; | |
if (!strcmp(s, "float")) return TYPE; | |
if (!strcmp(s, "double")) return TYPE; | |
if (!strcmp(s, "struct")) return TYPE; | |
if (!strcmp(s, "union")) return TYPE; | |
if (!strcmp(s, "enum")) return TYPE; | |
// We can only assume it's an identifier name -- but, of course, | |
// it's also possible that it's a typedef'd type, but we've no way | |
// of knowing. Darn. | |
return IDENTIFIER; | |
} | |
/* Read next token into “this” | |
* | |
* Get from STDIN: skip initial whitespace, reading in the first token we | |
* find. This might be: | |
* | |
* - "*", in which case we've found a pointer (classify it as such) | |
* - a word ("abc123") (pass to classifier to classify the word) | |
* | |
*/ | |
void get_token(void) | |
{ | |
char *p = this.string; | |
// read past any spaces | |
while ((*p = (unsigned char) getchar()) == ' ') /* empty */ ; | |
if (isalnum(*p)) { | |
// it starts with [A-Za-z0-9] read in word | |
while (isalnum(*++p = (unsigned char) getchar())); | |
ungetc(*p, stdin); | |
*p = '\0'; | |
this.type = classify_string(); | |
return; | |
} | |
if (*p == '*') { | |
strcpy(this.string, "pointer to"); | |
this.type = '*'; | |
return; | |
} | |
// If we get here, we found some other punctuation -- | |
// this is likely to be '(', ')', '[', ']'. | |
// | |
// Treat it as a token of it's own. | |
// | |
// If it's not ()[], that suggests a syntax error in our input, but | |
// we won't worry about that. | |
this.string[1] = '\0'; | |
this.type = *p; | |
return; | |
} | |
/* Tokenize until we find the first identifier. | |
* | |
* The first step in explaining a C declaration is to find the first | |
* identifier; this reads tokens, pushing them onto our stack, until we | |
* find the first identifier. | |
*/ | |
void read_to_first_identifier() | |
{ | |
get_token(); | |
while (this.type != IDENTIFIER) { | |
push(this); | |
get_token(); | |
} | |
// We found it, so start our explanation with the identifier name | |
printf("%s is ", this.string); | |
// Get the next token so deal_with_declarators has a new "this" | |
get_token(); | |
} | |
/* Deal with arrays. | |
* | |
* If we set "int my_num[10][20]", this gets called at the first "[". | |
* It should say "array 0..10 of array 0..20". | |
*/ | |
void deal_with_arrays() | |
{ | |
while (this.type == '[') { | |
printf("array "); | |
// Get the next piece (an int for "[10]", a "]" for "[]" | |
get_token(); | |
if (isdigit(this.string[0])) { | |
printf("0..%d ", atoi(this.string) - 1); | |
get_token(); // read the ']' | |
} | |
printf("of "); | |
// Done, now get the next token | |
get_token(); | |
} | |
} | |
/* Deal with functions parameter lists. | |
* | |
* For functions, we don't do anything with the function parameters--so | |
* we just need to read tokens until we get to the closing parenthesis. | |
*/ | |
void deal_with_function_args() | |
{ | |
while (this.type != ')') | |
get_token(); | |
printf("function returning "); | |
// Done, now get next token | |
get_token(); | |
} | |
/* When the next thing on our stack is a pointer, pull that pointer off, | |
* and say "pointer to " | |
*/ | |
void deal_with_pointers() | |
{ | |
while (stack[top].type == '*') | |
printf("%s ", pop().string); | |
} | |
/* Deal with declarators. | |
* | |
* This is called after we find the first identifier; "this" will be the | |
* first token afterwards, and our stack will be filled with all the stuff | |
* that came before us. It then handles our rules. | |
* | |
* - first deal with []s and ()s | |
* - deal with any pointers on top of the stack | |
* - if we're inside (), get outside (& recurse; we're in a new context) | |
* - print out qualifiers/types | |
*/ | |
void deal_with_declarator() | |
{ | |
// If our current token is an array or function param list, deal with | |
// these -- these functions will get a new token afterwards, so "this" | |
// will change. | |
switch (this.type) { | |
case '[' : | |
deal_with_arrays(); | |
break; | |
case '(' : | |
deal_with_function_args(); | |
break; | |
default: | |
break; | |
} | |
deal_with_pointers(); | |
/* Process our stack: pull each item off of stack, and handle: | |
* | |
* - if it's an '(', our next unread token will be the matching ')' -- | |
* read that and discard it, then recurse into dealing with the next | |
* outer layer of declarators | |
* | |
* - otherwise, it's a qualifier or a type ("const", "int", etc) -- | |
* print it | |
*/ | |
while (top >= 0) { | |
struct token next = pop(); | |
if (next.type == '(') { | |
get_token(); // read ')' and re-process | |
deal_with_declarator(); | |
} else { | |
printf("%s ", next.string); | |
} | |
} | |
} | |
/* Main function | |
* | |
* Find the first identifier, deal recursively with the rest, then exit. | |
*/ | |
int main(int argc, char *argv[]) | |
{ | |
// put tokens on stack until we reach identifier | |
read_to_first_identifier(); | |
// Work through ruleset to handle everything before (on stack) and after | |
deal_with_declarator(); | |
printf("\n"); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment