Skip to content

Instantly share code, notes, and snippets.

@Chubek
Created January 27, 2024 06:12
Show Gist options
  • Save Chubek/d2f0ac9067521716d2ab31c93948e885 to your computer and use it in GitHub Desktop.
Save Chubek/d2f0ac9067521716d2ab31c93948e885 to your computer and use it in GitHub Desktop.
An S-Expression Parser in C

The file sexp-parse.c is a minimal S-expression parser written in C. It also has a tree walker which walks the parsed constructs and prints out the values.

What are S-Expressions?

If you have worked with languages such as CommonLisp or Scheme, you know S-Expressions. These are syntactic consructs which are either 'atoms' or 'lists'. A program is a giant list, and each list is comprised of other lists, and atoms.

This is an atom, for example:

an-atom

This is a list of atoms:

(atom-1 atom-2 atom-3)

This is a list that has a list itself:

(atom-1 atom-2 (atom-3 atom-4 (atom-5 atom-6)))

Overal, if you are interested in learning about S-Expressions, Wikipedia is your friend.

#include <ctype.h>
#include <limits.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <wchar.h>
#define BUFF_MAX 256
typedef enum SExpressionType SExpressionType;
typedef struct SExpression SExpression;
typedef struct SExpressionList SExpressionList;
enum SExpressionType { ATOM, LIST };
struct SExpression {
SExpressionType type;
union {
char *atom;
char *string;
SExpressionList *list;
};
};
struct SExpressionList {
SExpression **nodes;
size_t num_nodes;
};
const bool const valid_punct_map[SCHAR_MAX] = {
['!'] = true, ['"'] = true, ['#'] = true, ['%'] = true, ['\''] = true,
['&'] = true, ['*'] = true, ['+'] = true, ['-'] = true, ['>'] = true,
['<'] = true, ['/'] = true, ['~'] = true, ['|'] = true, ['='] = true,
['^'] = true, ['.'] = true, ['?'] = true, ['`'] = true, ['@'] = true,
['$'] = true,
};
bool is_valid_atom_punct(char c) { return valid_punct_map[c]; }
SExpression *create_sexp(SExpressionType type) {
SExpression *sexp = (SExpression *)calloc(1, sizeof(SExpression));
sexp->type = type;
sexp->atom = NULL;
sexp->list = NULL;
return sexp;
}
SExpressionList *create_sexp_list(void) {
SExpressionList *sexpls =
(SExpressionList *)calloc(1, sizeof(SExpressionList));
sexpls->nodes = NULL;
return sexpls;
}
SExpressionList *add_sexpls_node(SExpressionList *sexpls, SExpression *sexp) {
sexpls->nodes = (SExpression **)realloc(
sexpls->nodes, (sexpls->num_nodes + 1) * sizeof(SExpression));
sexpls->nodes[sexpls->num_nodes++] = sexp;
return sexpls;
}
SExpression *parse_sexp_atom(FILE *input_file) {
char buffer[BUFF_MAX] = {0};
int c;
size_t index = 0;
while ((c = fgetc(input_file)) != EOF &&
(is_valid_atom_punct(c) || isalnum(c))) {
buffer[index++] = (char)c;
}
if (index == 0) {
ungetc(c, input_file);
return NULL;
}
SExpression *atom = create_sexp(ATOM);
atom->atom = strndup(buffer, index);
return atom;
}
SExpressionList *parse_sexp_list(FILE *input_file) {
int c;
SExpressionList *sexpls = create_sexp_list();
SExpression *sexp;
while ((c = fgetc(input_file)) != EOF) {
if (c == '(') {
sexp = create_sexp(LIST);
sexp->list = parse_sexp_list(input_file);
} else if (c == ')') {
return sexpls;
} else if (c == ' ' || c == '\n' || c == '\t' || c == '\r') {
continue;
} else {
ungetc(c, input_file);
sexp = parse_sexp_atom(input_file);
}
if (sexp) {
sexpls = add_sexpls_node(sexpls, sexp);
}
}
return sexpls;
}
void walk_sexp_list(SExpressionList *sexpls);
void print_sexp(SExpression *sexp) {
switch (sexp->type) {
case ATOM:
printf("Atom: %s\n", sexp->atom);
break;
case LIST:
printf("List:\n");
walk_sexp_list(sexp->list);
break;
default:
break;
}
}
void walk_sexp_list(SExpressionList *sexpls) {
for (size_t i = 0; i < sexpls->num_nodes; i++)
print_sexp(sexpls->nodes[i]);
}
int main(int argc, char *argv[]) {
FILE *input_file = fopen(argv[1], "r");
SExpressionList *sexpls = parse_sexp_list(input_file);
walk_sexp_list(sexpls);
fclose(input_file);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment