Last active
August 28, 2019 09:50
-
-
Save phorward/259cf585035ef3a885931f1356340695 to your computer and use it in GitHub Desktop.
Demonstration of a recursive descent parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdlib.h> | |
#include <stdio.h> | |
#include <string.h> | |
/* ~~~~~~~~~~~~~~~~~~AST~~~~~~~~~~~~~~~~~~ */ | |
typedef enum { | |
NODETYPE_OPERATOR, | |
NODETYPE_NUMBER | |
} NODETYPE; | |
typedef enum { | |
OPERATOR_ADD, | |
OPERATOR_MUL, | |
OPERATOR_EQUAL, | |
OPERATOR_UNEQUAL | |
} OPERATORTYPE; | |
typedef struct _node NODE; | |
struct _node | |
{ | |
NODETYPE type; | |
int value; | |
NODE* next; | |
NODE* child; | |
}; | |
NODE* make_node( int type, int value ) | |
{ | |
NODE* n; | |
n = (NODE*)malloc( sizeof( NODE ) ); | |
memset( n, 0, sizeof( NODE ) ); | |
n->type = type; | |
n->value = value; | |
return n; | |
} | |
/* ~~~~~~~~~~~~~~~~~~SCANNER~~~~~~~~~~~~~~~~~~ */ | |
typedef enum { | |
TOKEN_UNKNOWN, | |
TOKEN_EOF, | |
TOKEN_NUMBER, | |
TOKEN_ADD, | |
TOKEN_MUL, | |
TOKEN_EQUAL, | |
TOKEN_UNEQUAL, | |
TOKEN_BR_OPEN, | |
TOKEN_BR_CLOSE | |
} TOKENTYPE; | |
char* source; | |
int lookahead = TOKEN_UNKNOWN; | |
int att; | |
TOKENTYPE get_token( int* val ) | |
{ | |
char buf [ 10 + 1 ]; | |
char* start = source; | |
/* printf( "source = >%s<\n", source ); */ | |
if( *start == '+' ) | |
{ | |
source++; | |
return TOKEN_ADD; | |
} | |
else if( *start == '*' ) | |
{ | |
source++; | |
return TOKEN_MUL; | |
} | |
else if( *start == '(' ) | |
{ | |
source++; | |
return TOKEN_BR_OPEN; | |
} | |
else if( *start == ')' ) | |
{ | |
source++; | |
return TOKEN_BR_CLOSE; | |
} | |
else if( !strncmp( start, "==", 2 ) ) | |
{ | |
source += 2; | |
return TOKEN_EQUAL; | |
} | |
else if( !strncmp( start, "!=", 2 ) ) | |
{ | |
source += 2; | |
return TOKEN_UNEQUAL; | |
} | |
else if( *start == '\0' || *start == '\n' ) | |
{ | |
return TOKEN_EOF; | |
} | |
else if( *start >= '0' && *start <= '9' ) | |
{ | |
while( *source >= '0' && *source <= '9' ) | |
{ | |
source++; | |
} | |
sprintf( buf, "%.*s", source - start, start ); | |
*val = atoi( buf ); | |
return TOKEN_NUMBER; | |
} | |
return TOKEN_UNKNOWN; | |
} | |
/* ~~~~~~~~~~~~~~~~~~PARSER~~~~~~~~~~~~~~~~~~ */ | |
void condition( NODE** start ); | |
void factor( NODE** start ) | |
{ | |
if( lookahead == TOKEN_NUMBER ) | |
{ | |
*start = make_node( NODETYPE_NUMBER, att ); | |
lookahead = get_token( &att ); | |
} | |
else if( lookahead == TOKEN_BR_OPEN ) | |
{ | |
lookahead = get_token( &att ); | |
condition( start ); | |
if( lookahead != TOKEN_BR_CLOSE ) | |
{ | |
printf( "PARSE ERROR at >%s<\n", source ); | |
exit( 0 ); | |
} | |
lookahead = get_token( &att ); | |
} | |
else | |
{ | |
printf( "PARSE ERROR at >%s<\n", source ); | |
exit( 0 ); | |
} | |
} | |
void mul( NODE** start ) | |
{ | |
NODE* op; | |
factor( start ); | |
if( lookahead == TOKEN_MUL ) | |
{ | |
op = make_node( NODETYPE_OPERATOR, OPERATOR_MUL ); | |
op->child = *start; | |
lookahead = get_token( &att ); | |
mul( &( op->child->next ) ); | |
*start = op; | |
} | |
} | |
void add( NODE** start ) | |
{ | |
NODE* op; | |
mul( start ); | |
if( lookahead == TOKEN_ADD ) | |
{ | |
op = make_node( NODETYPE_OPERATOR, OPERATOR_ADD ); | |
op->child = *start; | |
lookahead = get_token( &att ); | |
add( &( op->child->next ) ); | |
*start = op; | |
} | |
} | |
void condition( NODE** start ) | |
{ | |
NODE* op; | |
add( start ); | |
if( lookahead == TOKEN_EQUAL ) | |
{ | |
op = make_node( NODETYPE_OPERATOR, OPERATOR_EQUAL ); | |
op->child = *start; | |
lookahead = get_token( &att ); | |
condition( &( op->child->next ) ); | |
*start = op; | |
} | |
else if( lookahead == TOKEN_UNEQUAL ) | |
{ | |
op = make_node( NODETYPE_OPERATOR, OPERATOR_UNEQUAL ); | |
op->child = *start; | |
lookahead = get_token( &att ); | |
condition( &( op->child->next ) ); | |
*start = op; | |
} | |
} | |
void expression( NODE** start ) | |
{ | |
lookahead = get_token( &att ); | |
condition( start ); | |
if( lookahead != TOKEN_EOF ) | |
{ | |
printf( "PARSE ERROR at >%s<\n", source ); | |
exit( 0 ); | |
} | |
} | |
/* ~~~~~~~~~~~~~~~~~~TREEVIEW~~~~~~~~~~~~~~~~~~ */ | |
int draw( NODE* s ) | |
{ | |
static int i = 0; | |
int y; | |
for( ; s; s = s->next ) | |
{ | |
for( y = 0; y < i; y++ ) | |
printf( " " ); | |
if( s->type == NODETYPE_NUMBER ) | |
{ | |
printf( "int %d\n", s->value ); | |
} | |
else if( s->type == NODETYPE_OPERATOR ) | |
{ | |
switch( s->value ) | |
{ | |
case OPERATOR_ADD: | |
printf( "add" ); | |
break; | |
case OPERATOR_MUL: | |
printf( "mul" ); | |
break; | |
case OPERATOR_EQUAL: | |
printf( "equal" ); | |
break; | |
case OPERATOR_UNEQUAL: | |
printf( "not-euqal" ); | |
break; | |
} | |
printf( "\n" ); | |
i++; | |
draw( s->child ); | |
i--; | |
} | |
} | |
} | |
/* ~~~~~~~~~~~~~~~~~~MAIN~~~~~~~~~~~~~~~~~~ */ | |
int main( int argc, char** argv ) | |
{ | |
char s [ 80 + 1 ]; | |
NODE* root; | |
while( 1 ) | |
{ | |
printf( "What should I parse? "); | |
fgets( s, 80, stdin ); | |
if( *s && *s != '\n' ) | |
{ | |
source = s; | |
expression( &root ); | |
printf( "ok\n" ); | |
draw( root ); | |
} | |
else | |
{ | |
printf( "bye!\n" ); | |
return 1; | |
} | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment