Created
September 15, 2014 02:05
-
-
Save apparentlymart/f38b2c93369b0c99dfdc to your computer and use it in GitHub Desktop.
Streaming JSON into a struct
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// This is incomplete. Only supports strings and ints, and doesn't correctly handle the end of an object. | |
#include <stdio.h> | |
#include <stddef.h> | |
struct Person { | |
char name[50]; | |
int age; | |
bool has_account; | |
bool has_vip_account; | |
}; | |
struct Node; | |
struct Edge { | |
const char *label; | |
Node *node; | |
}; | |
struct Node { | |
int num_edges; | |
Edge *edges; | |
int field_type; | |
size_t offset; | |
size_t length; | |
}; | |
#define TYPE_INT 1 | |
#define TYPE_CHAR 2 | |
#define TYPE_BOOL 3 | |
#include "tree.h" | |
char get_next_char() { | |
static char buf[32] = {0}; | |
static int len = 0; | |
static int cur = 0; | |
if (len == cur) { | |
len = fread(&buf, 1, sizeof(buf), stdin); | |
cur = 0; | |
if (len == 0) { | |
cur = 0; | |
return '\0'; | |
} | |
} | |
return buf[cur++]; | |
} | |
char get_next_nonwhite_char() { | |
while (1) { | |
char c = get_next_char(); | |
switch (c) { | |
case ' ': | |
case '\n': | |
case '\r': | |
case '\t': | |
continue; | |
default: | |
printf("Next char is %c\n", c); | |
return c; | |
} | |
} | |
} | |
bool parse_property_name(Node **node_pp) { | |
Node *node = *node_pp; | |
Edge *edge = NULL; | |
const char *wp = NULL; | |
*node_pp = 0; | |
// We expect the caller to already have read the initial quote | |
// by the time we get here. | |
char c; | |
bool escape; | |
while (c = get_next_char()) { | |
if (c == '\\') { | |
escape = true; | |
continue; | |
} | |
if (c == '"' && ! escape) { | |
// If we landed on a leaf node, pass it back to the caller. | |
if (node && node->field_type != 0 && edge == NULL) { | |
*node_pp = node; | |
} | |
return true; | |
} | |
escape = false; | |
printf("String char %c\n", c); | |
// If we've fallen out of the tree then just keep reading | |
// until we terminate, after which we'll give back a null | |
// node and thus signal that the key didn't match. | |
if (node == 0) { | |
continue; | |
} | |
if (edge == NULL) { | |
Edge * stop = node->edges + node->num_edges; | |
for (edge = node->edges; edge < stop; edge++) { | |
if (edge->label[0] == c) { | |
wp = edge->label; | |
break; | |
} | |
} | |
if (wp == NULL) { | |
node = 0; | |
continue; | |
} | |
} | |
if (*wp != c) { | |
node = 0; | |
continue; | |
} | |
wp++; | |
if (*wp == 0) { | |
// We've reached the end of our current edge, | |
// so we'll traverse it now. | |
node = edge->node; | |
// Go hunting for another edge on the next iteration. | |
edge = NULL; | |
wp = NULL; | |
} | |
} | |
// Got EOF during parse. | |
return false; | |
} | |
bool find_next_property_name() { | |
int brackets = 0; | |
bool string = false; | |
bool escape = false; | |
char c; | |
while (c = get_next_nonwhite_char()) { | |
if (c == ',' && brackets == 0 && ! string) { | |
return true; | |
} | |
if (c == '{' || c == '[') { | |
brackets++; | |
} | |
if (c == '}' || c == ']') { | |
brackets--; | |
} | |
if (c == '"') { | |
if (string) { | |
if (! escape) { | |
string = false; | |
} | |
} | |
else { | |
string = true; | |
} | |
} | |
if (c == '\\') { | |
if (string) { | |
escape = true; | |
} | |
} | |
else { | |
escape = false; | |
} | |
} | |
// Hit end of string before we found the next property name. | |
return false; | |
} | |
bool parse_string(Node *node, Person *out) { | |
char *target = (char*)out + node->offset; | |
int length = node->length; | |
printf("It's a char field from %p for %i\n", target, length); | |
if (get_next_nonwhite_char() != '"') { | |
return find_next_property_name(); | |
} | |
char c; | |
bool too_long = false; | |
bool escape = false; | |
int ofs = 0; | |
while (c = get_next_char()) { | |
if (c == '\\') { | |
escape = true; | |
continue; | |
} | |
if (c == '"' && ! escape) { | |
target[ofs] = '\0'; | |
if (get_next_nonwhite_char() != ',') { | |
return false; | |
} | |
return true; | |
} | |
escape = false; | |
// If we ran out of space in our buffer then | |
// just keep reading until we hit the end of the string. | |
if (too_long) { | |
continue; | |
} | |
printf("String char %c\n", c); | |
target[ofs++] = c; | |
if (ofs == length) { | |
ofs = 0; | |
too_long = true; | |
} | |
} | |
// Ran out of input while waiting for end of string | |
return false; | |
} | |
bool parse_int(Node *node, Person *out) { | |
int *target = (int*)((char*)out + node->offset); | |
printf("It's an int field at %p\n", target); | |
char c = get_next_nonwhite_char(); | |
while (c) { | |
if (c >= '0' && c <= '9') { | |
*target = (*target * 10) + (c - '0'); | |
} | |
else { | |
if (c == ',' || c == '}') { | |
return true; | |
} | |
return false; | |
} | |
c = get_next_char(); | |
} | |
return false; | |
} | |
bool parse_person(Person *out) { | |
bool success; | |
if (get_next_nonwhite_char() != '{') { | |
return false; | |
} | |
char startc; | |
while (startc = get_next_nonwhite_char()) { | |
if (startc == '}') { | |
return true; | |
} | |
else if (startc != '"') { | |
return false; | |
} | |
Node *node = &nodes[0]; | |
success = parse_property_name(&node); | |
if (!success) { | |
return false; | |
} | |
if (! node) { | |
// Property name doesn't match any we're expecting, | |
// so just skip ahead. | |
printf("Unknown property. Skipping.\n"); | |
if (! find_next_property_name()) { | |
return false; | |
} | |
continue; | |
} | |
if (get_next_nonwhite_char() != ':') { | |
return false; | |
} | |
switch (node->field_type) { | |
case TYPE_CHAR: | |
if (! parse_string(node, out)) { | |
return false; | |
} | |
break; | |
case TYPE_INT: | |
if (! parse_int(node, out)) { | |
return false; | |
} | |
find_next_property_name(); | |
break; | |
case TYPE_BOOL: | |
printf("It's a bool field\n"); | |
// Not supported yet, but fail successfully so | |
// we can see the partial result. | |
return true; | |
//find_next_property_name(); | |
break; | |
} | |
} | |
// Early EOF. | |
return false; | |
} | |
bool in_tree(const char *test) { | |
Node * node = &nodes[0]; | |
Edge * edge = NULL; | |
const char * wp = NULL; | |
for (; *test != 0; test++) { | |
// If we don't yet have an edge then we need to go hunting | |
// for one that starts with our current letter. | |
if (edge == NULL) { | |
Edge * stop = node->edges + node->num_edges; | |
for (edge = node->edges; edge < stop; edge++) { | |
if (edge->label[0] == *test) { | |
wp = edge->label; | |
break; | |
} | |
} | |
if (wp == NULL) { | |
return false; | |
} | |
} | |
if (*wp != *test) { | |
return false; | |
} | |
wp++; | |
if (*wp == 0) { | |
// We've reached the end of our current edge, | |
// so we'll traverse it now. | |
node = edge->node; | |
// Go hunting for another edge on the next iteration. | |
edge = NULL; | |
wp = NULL; | |
} | |
} | |
// If we fall out here exactly when we've just traversed to | |
// a leaf node then the item is in the tree. | |
return (node->field_type != 0 && edge == NULL); | |
} | |
int main(int argc, char **argv) { | |
const int size = sizeof(words) + sizeof(nodes) + sizeof(edges); | |
printf("// Data Structure is %i bytes\n\n", size); | |
populate(); | |
Person p = {0}; | |
bool success = parse_person(&p); | |
if (success) { | |
puts("Parsed successfully"); | |
printf("Name %s\n", p.name); | |
printf("Age %i\n", p.age); | |
printf("Has Account %i\n", p.has_account); | |
printf("Has VIP Account %i\n", p.has_vip_account); | |
} | |
else { | |
puts("Failed to parse"); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const char * words = "displayName\0VipAccount\0age\0has"; | |
Node nodes[6]; | |
Edge edges[5]; | |
void populate() { | |
nodes[0].num_edges = 3; | |
nodes[0].field_type = 0; | |
nodes[0].edges = &edges[0]; | |
edges[0].label = &words[23]; // 'age' | |
edges[0].node = &nodes[1]; | |
edges[1].label = &words[27]; // 'has' | |
edges[1].node = &nodes[2]; | |
edges[2].label = &words[0]; // 'displayName' | |
edges[2].node = &nodes[3]; | |
nodes[1].num_edges = 0; | |
nodes[1].field_type = TYPE_INT; | |
nodes[1].offset = offsetof(Person, age); | |
nodes[1].length = sizeof(((Person*)0)->age); | |
nodes[2].num_edges = 2; | |
nodes[2].field_type = 0; | |
nodes[2].edges = &edges[3]; | |
edges[3].label = &words[12]; // 'VipAccount' | |
edges[3].node = &nodes[4]; | |
edges[4].label = &words[15]; // 'Account' | |
edges[4].node = &nodes[5]; | |
nodes[4].num_edges = 0; | |
nodes[4].field_type = TYPE_BOOL; | |
nodes[4].offset = offsetof(Person, has_vip_account); | |
nodes[4].length = sizeof(((Person*)0)->has_vip_account); | |
nodes[5].num_edges = 0; | |
nodes[5].field_type = TYPE_BOOL; | |
nodes[5].offset = offsetof(Person, has_account); | |
nodes[5].length = sizeof(((Person*)0)->has_account); | |
nodes[3].num_edges = 0; | |
nodes[3].field_type = TYPE_CHAR; | |
nodes[3].offset = offsetof(Person, name); | |
nodes[3].length = sizeof(((Person*)0)->name); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment