Skip to content

Instantly share code, notes, and snippets.

@Moishe
Created April 17, 2020 17:09
Show Gist options
  • Save Moishe/1fcf4692abba4786387e274366ddf3f8 to your computer and use it in GitHub Desktop.
Save Moishe/1fcf4692abba4786387e274366ddf3f8 to your computer and use it in GitHub Desktop.
/*
This is a very simple and not very robust parser for nested lists.
It doesn't copy the source input, but builds a structured representation of the
input with pointers to values. This could be useful in memory-constrained environments.
The syntax is:
(value1 (value2 value3) value4)
which translates to:
An ordered list of 3 values containing:
a value of "value1"
an ordered list of 2 values containing:
a value of "value2"
a value of "value3"
a value of "value4"
Lists can contain values or other lists.
Parentheses specify lists
Anything that isn't a list is a value.
Behavior is undefined for malformed input (unmatched parentheses, etc)
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
const char *tests[4] = {
"(first (list 1 (+ 2 3) 9))",
"()",
"(((first) second) third)",
"(first (second (third)))"
};
/* No checking is done for types other than these. */
const int LIST_TYPE = 1;
const int VALUE_TYPE = 2;
/* A value specifies a start and end pointer into a character array. We assume no NULL terminator. */
struct val {
const char *start;
const char *end;
};
/* An element is a value or another node */
union el {
struct node *n;
struct val v;
};
/* A node is part of a doubly-linked list, and contains a child node or a value. */
struct node {
struct node *prev;
struct node *next;
int type;
union el content;
};
/* When we see a close-paren or a space, we complete the node we're on. */
struct node *complete_node(const char *c, struct node *n) {
struct node *new_n;
new_n = (struct node*)malloc(sizeof(struct node));
new_n->prev = n;
n->next = new_n;
if (n->type == VALUE_TYPE) {
n->content.v.end = c - 1;
}
return new_n;
}
/* Recursive parsing function to populate our list/tree (surely there's a word for this) of nodes. */
const char *parse(const char *c, struct node *parent) {
struct node *n;
n = (struct node*)malloc(sizeof(struct node));
memset(n, 0, sizeof(struct node));
parent->type = LIST_TYPE;
parent->content.n = n;
while (*c) {
if (*c == '(') {
c = parse(c + 1, n);
} else if (*c == ')') {
complete_node(c, n);
return c;
} else if (*c == ' ') {
n = complete_node(c, n);
} else {
n->type = VALUE_TYPE;
if (!n->content.v.start) {
n->content.v.start = c;
}
}
c++;
}
return 0;
}
/* Mostly for debugging, print a formatted representation of a node list/tree. */
void print_tree(struct node *n, int depth) {
while (n) {
if (n->type == VALUE_TYPE) {
for (int i = 0; i < depth; i++) {
printf(" ");
}
printf("%.*s\n", (int)(n->content.v.end - n->content.v.start + 1), n->content.v.start);
} else {
print_tree(n->content.n, depth + 1);
}
n = n->next;
}
}
/* Parse our array of test data. */
int main() {
struct node root;
for (int i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) {
printf("%s\n", tests[i]);
parse(tests[i], &root);
print_tree(&root, 0);
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment