Created
April 17, 2020 17:09
-
-
Save Moishe/1fcf4692abba4786387e274366ddf3f8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
This is a very simple and not very robust parser for nested lists. | |
It doesn't copy the source input, but builds a structured representation of the | |
input with pointers to values. This could be useful in memory-constrained environments. | |
The syntax is: | |
(value1 (value2 value3) value4) | |
which translates to: | |
An ordered list of 3 values containing: | |
a value of "value1" | |
an ordered list of 2 values containing: | |
a value of "value2" | |
a value of "value3" | |
a value of "value4" | |
Lists can contain values or other lists. | |
Parentheses specify lists | |
Anything that isn't a list is a value. | |
Behavior is undefined for malformed input (unmatched parentheses, etc) | |
*/ | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
const char *tests[4] = { | |
"(first (list 1 (+ 2 3) 9))", | |
"()", | |
"(((first) second) third)", | |
"(first (second (third)))" | |
}; | |
/* No checking is done for types other than these. */ | |
const int LIST_TYPE = 1; | |
const int VALUE_TYPE = 2; | |
/* A value specifies a start and end pointer into a character array. We assume no NULL terminator. */ | |
struct val { | |
const char *start; | |
const char *end; | |
}; | |
/* An element is a value or another node */ | |
union el { | |
struct node *n; | |
struct val v; | |
}; | |
/* A node is part of a doubly-linked list, and contains a child node or a value. */ | |
struct node { | |
struct node *prev; | |
struct node *next; | |
int type; | |
union el content; | |
}; | |
/* When we see a close-paren or a space, we complete the node we're on. */ | |
struct node *complete_node(const char *c, struct node *n) { | |
struct node *new_n; | |
new_n = (struct node*)malloc(sizeof(struct node)); | |
new_n->prev = n; | |
n->next = new_n; | |
if (n->type == VALUE_TYPE) { | |
n->content.v.end = c - 1; | |
} | |
return new_n; | |
} | |
/* Recursive parsing function to populate our list/tree (surely there's a word for this) of nodes. */ | |
const char *parse(const char *c, struct node *parent) { | |
struct node *n; | |
n = (struct node*)malloc(sizeof(struct node)); | |
memset(n, 0, sizeof(struct node)); | |
parent->type = LIST_TYPE; | |
parent->content.n = n; | |
while (*c) { | |
if (*c == '(') { | |
c = parse(c + 1, n); | |
} else if (*c == ')') { | |
complete_node(c, n); | |
return c; | |
} else if (*c == ' ') { | |
n = complete_node(c, n); | |
} else { | |
n->type = VALUE_TYPE; | |
if (!n->content.v.start) { | |
n->content.v.start = c; | |
} | |
} | |
c++; | |
} | |
return 0; | |
} | |
/* Mostly for debugging, print a formatted representation of a node list/tree. */ | |
void print_tree(struct node *n, int depth) { | |
while (n) { | |
if (n->type == VALUE_TYPE) { | |
for (int i = 0; i < depth; i++) { | |
printf(" "); | |
} | |
printf("%.*s\n", (int)(n->content.v.end - n->content.v.start + 1), n->content.v.start); | |
} else { | |
print_tree(n->content.n, depth + 1); | |
} | |
n = n->next; | |
} | |
} | |
/* Parse our array of test data. */ | |
int main() { | |
struct node root; | |
for (int i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) { | |
printf("%s\n", tests[i]); | |
parse(tests[i], &root); | |
print_tree(&root, 0); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment