Skip to content

Instantly share code, notes, and snippets.

@apparentlymart
Created September 15, 2014 02:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save apparentlymart/f38b2c93369b0c99dfdc to your computer and use it in GitHub Desktop.
Save apparentlymart/f38b2c93369b0c99dfdc to your computer and use it in GitHub Desktop.
Streaming JSON into a struct
// This is incomplete. Only supports strings and ints, and doesn't correctly handle the end of an object.
#include <stdio.h>
#include <stddef.h>
struct Person {
char name[50];
int age;
bool has_account;
bool has_vip_account;
};
struct Node;
struct Edge {
const char *label;
Node *node;
};
struct Node {
int num_edges;
Edge *edges;
int field_type;
size_t offset;
size_t length;
};
#define TYPE_INT 1
#define TYPE_CHAR 2
#define TYPE_BOOL 3
#include "tree.h"
char get_next_char() {
static char buf[32] = {0};
static int len = 0;
static int cur = 0;
if (len == cur) {
len = fread(&buf, 1, sizeof(buf), stdin);
cur = 0;
if (len == 0) {
cur = 0;
return '\0';
}
}
return buf[cur++];
}
char get_next_nonwhite_char() {
while (1) {
char c = get_next_char();
switch (c) {
case ' ':
case '\n':
case '\r':
case '\t':
continue;
default:
printf("Next char is %c\n", c);
return c;
}
}
}
bool parse_property_name(Node **node_pp) {
Node *node = *node_pp;
Edge *edge = NULL;
const char *wp = NULL;
*node_pp = 0;
// We expect the caller to already have read the initial quote
// by the time we get here.
char c;
bool escape;
while (c = get_next_char()) {
if (c == '\\') {
escape = true;
continue;
}
if (c == '"' && ! escape) {
// If we landed on a leaf node, pass it back to the caller.
if (node && node->field_type != 0 && edge == NULL) {
*node_pp = node;
}
return true;
}
escape = false;
printf("String char %c\n", c);
// If we've fallen out of the tree then just keep reading
// until we terminate, after which we'll give back a null
// node and thus signal that the key didn't match.
if (node == 0) {
continue;
}
if (edge == NULL) {
Edge * stop = node->edges + node->num_edges;
for (edge = node->edges; edge < stop; edge++) {
if (edge->label[0] == c) {
wp = edge->label;
break;
}
}
if (wp == NULL) {
node = 0;
continue;
}
}
if (*wp != c) {
node = 0;
continue;
}
wp++;
if (*wp == 0) {
// We've reached the end of our current edge,
// so we'll traverse it now.
node = edge->node;
// Go hunting for another edge on the next iteration.
edge = NULL;
wp = NULL;
}
}
// Got EOF during parse.
return false;
}
bool find_next_property_name() {
int brackets = 0;
bool string = false;
bool escape = false;
char c;
while (c = get_next_nonwhite_char()) {
if (c == ',' && brackets == 0 && ! string) {
return true;
}
if (c == '{' || c == '[') {
brackets++;
}
if (c == '}' || c == ']') {
brackets--;
}
if (c == '"') {
if (string) {
if (! escape) {
string = false;
}
}
else {
string = true;
}
}
if (c == '\\') {
if (string) {
escape = true;
}
}
else {
escape = false;
}
}
// Hit end of string before we found the next property name.
return false;
}
bool parse_string(Node *node, Person *out) {
char *target = (char*)out + node->offset;
int length = node->length;
printf("It's a char field from %p for %i\n", target, length);
if (get_next_nonwhite_char() != '"') {
return find_next_property_name();
}
char c;
bool too_long = false;
bool escape = false;
int ofs = 0;
while (c = get_next_char()) {
if (c == '\\') {
escape = true;
continue;
}
if (c == '"' && ! escape) {
target[ofs] = '\0';
if (get_next_nonwhite_char() != ',') {
return false;
}
return true;
}
escape = false;
// If we ran out of space in our buffer then
// just keep reading until we hit the end of the string.
if (too_long) {
continue;
}
printf("String char %c\n", c);
target[ofs++] = c;
if (ofs == length) {
ofs = 0;
too_long = true;
}
}
// Ran out of input while waiting for end of string
return false;
}
bool parse_int(Node *node, Person *out) {
int *target = (int*)((char*)out + node->offset);
printf("It's an int field at %p\n", target);
char c = get_next_nonwhite_char();
while (c) {
if (c >= '0' && c <= '9') {
*target = (*target * 10) + (c - '0');
}
else {
if (c == ',' || c == '}') {
return true;
}
return false;
}
c = get_next_char();
}
return false;
}
bool parse_person(Person *out) {
bool success;
if (get_next_nonwhite_char() != '{') {
return false;
}
char startc;
while (startc = get_next_nonwhite_char()) {
if (startc == '}') {
return true;
}
else if (startc != '"') {
return false;
}
Node *node = &nodes[0];
success = parse_property_name(&node);
if (!success) {
return false;
}
if (! node) {
// Property name doesn't match any we're expecting,
// so just skip ahead.
printf("Unknown property. Skipping.\n");
if (! find_next_property_name()) {
return false;
}
continue;
}
if (get_next_nonwhite_char() != ':') {
return false;
}
switch (node->field_type) {
case TYPE_CHAR:
if (! parse_string(node, out)) {
return false;
}
break;
case TYPE_INT:
if (! parse_int(node, out)) {
return false;
}
find_next_property_name();
break;
case TYPE_BOOL:
printf("It's a bool field\n");
// Not supported yet, but fail successfully so
// we can see the partial result.
return true;
//find_next_property_name();
break;
}
}
// Early EOF.
return false;
}
bool in_tree(const char *test) {
Node * node = &nodes[0];
Edge * edge = NULL;
const char * wp = NULL;
for (; *test != 0; test++) {
// If we don't yet have an edge then we need to go hunting
// for one that starts with our current letter.
if (edge == NULL) {
Edge * stop = node->edges + node->num_edges;
for (edge = node->edges; edge < stop; edge++) {
if (edge->label[0] == *test) {
wp = edge->label;
break;
}
}
if (wp == NULL) {
return false;
}
}
if (*wp != *test) {
return false;
}
wp++;
if (*wp == 0) {
// We've reached the end of our current edge,
// so we'll traverse it now.
node = edge->node;
// Go hunting for another edge on the next iteration.
edge = NULL;
wp = NULL;
}
}
// If we fall out here exactly when we've just traversed to
// a leaf node then the item is in the tree.
return (node->field_type != 0 && edge == NULL);
}
int main(int argc, char **argv) {
const int size = sizeof(words) + sizeof(nodes) + sizeof(edges);
printf("// Data Structure is %i bytes\n\n", size);
populate();
Person p = {0};
bool success = parse_person(&p);
if (success) {
puts("Parsed successfully");
printf("Name %s\n", p.name);
printf("Age %i\n", p.age);
printf("Has Account %i\n", p.has_account);
printf("Has VIP Account %i\n", p.has_vip_account);
}
else {
puts("Failed to parse");
}
}
const char * words = "displayName\0VipAccount\0age\0has";
Node nodes[6];
Edge edges[5];
void populate() {
nodes[0].num_edges = 3;
nodes[0].field_type = 0;
nodes[0].edges = &edges[0];
edges[0].label = &words[23]; // 'age'
edges[0].node = &nodes[1];
edges[1].label = &words[27]; // 'has'
edges[1].node = &nodes[2];
edges[2].label = &words[0]; // 'displayName'
edges[2].node = &nodes[3];
nodes[1].num_edges = 0;
nodes[1].field_type = TYPE_INT;
nodes[1].offset = offsetof(Person, age);
nodes[1].length = sizeof(((Person*)0)->age);
nodes[2].num_edges = 2;
nodes[2].field_type = 0;
nodes[2].edges = &edges[3];
edges[3].label = &words[12]; // 'VipAccount'
edges[3].node = &nodes[4];
edges[4].label = &words[15]; // 'Account'
edges[4].node = &nodes[5];
nodes[4].num_edges = 0;
nodes[4].field_type = TYPE_BOOL;
nodes[4].offset = offsetof(Person, has_vip_account);
nodes[4].length = sizeof(((Person*)0)->has_vip_account);
nodes[5].num_edges = 0;
nodes[5].field_type = TYPE_BOOL;
nodes[5].offset = offsetof(Person, has_account);
nodes[5].length = sizeof(((Person*)0)->has_account);
nodes[3].num_edges = 0;
nodes[3].field_type = TYPE_CHAR;
nodes[3].offset = offsetof(Person, name);
nodes[3].length = sizeof(((Person*)0)->name);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment