Last active
December 31, 2015 11:59
-
-
Save aniruddha-a/7983060 to your computer and use it in GitHub Desktop.
Lisp parse and indent
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdbool.h> | |
#include <stdlib.h> | |
#include <malloc.h> | |
#include <string.h> | |
typedef enum { | |
TOK_ID_VAR = 257, | |
TOK_SQSTR, | |
TOK_LPAREN, | |
TOK_RPAREN, | |
TOK_COMM_DIRECTIVE, | |
TOK_NUMBER, | |
TOK_PAIR, | |
} nodetype; | |
typedef struct node_ { | |
nodetype type; | |
int line; | |
char *file; | |
char *data; | |
struct node_ *l, *r; /* l is the left (car, or 'first') | |
r is the right(cdr, or 'rest') link */ | |
} node_t; | |
#define DATA(X) ((X)->data) | |
#define TYPE(X) ((X)->type) | |
#define FLINE(X) ((X)->line) | |
#define FNAME(X) ((X)->file) | |
#define ISCOMMENT(X) (TYPE(X) == TOK_COMM_DIRECTIVE) | |
#define ISSTR(X) (TYPE(X) == TOK_SQSTR) | |
#define ISNUMBER(X) (TYPE(X) == TOK_NUMBER) | |
#define PAIRNODE(X) (TYPE(X) == TOK_PAIR) | |
static | |
int get_next_token () | |
{ | |
return yylex(); // or use custom tokenizer | |
} | |
static | |
node_t* get_list_node () | |
{ | |
node_t *t = NULL; | |
t = calloc(1, sizeof(node_t)); | |
t->data = NULL; | |
t->type = TOK_PAIR; | |
t->l = t->r = NULL; | |
return t; | |
} | |
static | |
node_t* get_data_node (char *symbol, int type, char *file, int line) | |
{ | |
node_t *t = NULL; | |
t = calloc(1, sizeof(node_t)); | |
t->data = strdup(symbol); | |
assert(t->data); | |
t->type = type; | |
t->line = line; | |
t->file = file; | |
t->l = t->r = NULL; | |
return t; | |
} | |
// Main tree builder function | |
static | |
node_t* build_list () | |
{ | |
node_t *t, *tmp; | |
int tok = get_next_token(); | |
t = get_list_node(); | |
tmp = t; | |
while ( tok && (tok != TOK_RPAREN)) { /* relaxed (c != '\0') */ | |
switch (tok) { | |
case TOK_LPAREN: | |
tmp->l = build_list(); /* new list returned must go to CAR */ | |
tok = get_next_token(); /* now, set things to proceed in | |
the loop */ | |
tmp->r = get_list_node(); /* the rest that follows must | |
go to CDR */ | |
tmp = tmp->r; | |
break; | |
default: | |
tmp->l = get_data_node(yytext, tok, g_filename, yylineno);/* start building the list | |
at CAR(tmp) */ | |
tmp->r = get_list_node(); /* and next we start from | |
CDR(tmp) */ | |
tmp = tmp->r; | |
tok = get_next_token(); | |
} | |
} | |
return t; | |
} | |
//---- All the above is to parse and get handle to the tree node------------- | |
//---------------- Now the indent part -------------- | |
// Some options to control the indent | |
typedef struct ind_opt_ { | |
int lisp_indent; /* Lisp style indent? */ | |
int nl_b4_list_begin; /* Whether newline to be added before list begin*/ | |
int tab_size; /* Number of spaces for each ident level */ | |
int space_after_fn; /* Whether space to be put after function name */ | |
/* Add new options here */ | |
} indent_opts_t; | |
indent_opts_t g_indent_opts; /* Global options */ | |
static int g_indent_level = 0; /* Depth/nesting count */ | |
static bool g_comment_written = false; /* Last token written - comment? */ | |
static void dump_lisp_in (node_t *,int); | |
/* Handle the CAR sub-tree at any pair node */ | |
static | |
void dump_minilist (node_t *t, int level) | |
{ | |
if (!t) return; | |
if (PAIRNODE(t)) { | |
if (level == 1 && g_indent_opts.nl_b4_list_begin) { | |
printf("\n"); | |
} | |
if (!level) | |
printf("\n("); | |
else | |
printf("\n%*c(", level, ' '); | |
if (g_indent_opts.space_after_fn) putchar(' '); | |
g_indent_level ++; | |
dump_lisp_in(t, level + g_indent_opts.tab_size); | |
} else { | |
if (ISCOMMENT(t)) { | |
if (!level) | |
printf("\n%s ", DATA(t)); | |
else | |
printf("\n%*c%s ", level, ' ', DATA(t)); | |
g_comment_written = true; | |
} else { | |
printf("%s ", DATA(t)); | |
g_comment_written = false; | |
} | |
} | |
} | |
/* Dump internal */ | |
static | |
void dump_lisp_in (node_t *t, int level) | |
{ | |
if (!t) { | |
int lstep = level - g_indent_opts.tab_size; | |
if (!g_indent_level) return; /* hack! unput last ')' */ | |
if (g_indent_opts.lisp_indent) { | |
/* Lisp style - all one one line */ | |
if (g_comment_written) { | |
/* if prev token were a comment - we SHOULD put a newline! */ | |
printf("\n%*c) ", lstep, ' '); | |
} else { | |
printf(")"); | |
} | |
} else { | |
/* C style */ | |
if (lstep <= 0) { | |
printf("\n) "); | |
} else { | |
printf("\n%*c) ", lstep, ' '); | |
} | |
} | |
g_indent_level--; | |
return; | |
} | |
if (PAIRNODE(t)) { | |
dump_minilist(t->l, level); | |
} | |
dump_lisp_in(t->r, level); | |
} | |
// -- call this with the node head returned from build_list() | |
static | |
void dump_lisp (node_t *t) | |
{ | |
/* printf ("\n("); Top root node - we dont need */ | |
dump_lisp_in(t, 0); | |
} | |
I didnt add the nodetype
because it is something that needs to be in sync with the tokenizer. I have edited the code- to add it, but still this relies on flex
(use of yytext
and yylineno
, g_filename
is the file to be parsed - which can be taken from the command line args)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
are you sure you didn't leave out any important parts cause I'm getting a lot of declaration errors and a screenfull of warnings with this option
-g -pedantic -Wall -Wextra