Skip to content

Instantly share code, notes, and snippets.

@vurtun
Last active July 10, 2021 09:15
Show Gist options
  • Save vurtun/5bbf587095371abf4021bf811ce28933 to your computer and use it in GitHub Desktop.
Save vurtun/5bbf587095371abf4021bf811ce28933 to your computer and use it in GitHub Desktop.
#include <stdio.h>
#include <string.h>
enum jstates {JFAILED, JNXT, JSEP, JUP, JDWN, JQUP, JQDWN, JESC, JUNESC, JBARE, JUNBARE, JUTF8_2,
JUTF8_3, JUTF8_4, JUTF8_N, JSTATE_CNT};
enum jtype {JEOS,JERR,JTSEP,JOBJB,JOBJE,JARRB,JARRE,JNUM,JSTR,JTRUE,JFALSE,JNULL};
struct jtok {int type, len; const char *str;};
struct jctx {const unsigned char *tbl; const char *cur, *end;};
static const unsigned char jtbl[256] = {
[48 ... 56]=JBARE, ['\t']=JNXT, ['\r']=JNXT, ['\n']=JNXT, [' ']=JNXT,
['"']=JQUP, [':']=JSEP, ['=']=JSEP, [',']=JNXT, ['[']=JUP, [']']=JDWN,
['{']=JUP, ['}']=JDWN, ['-']=JBARE, ['t']=JBARE, ['f']=JBARE, ['n']=JBARE};
static const unsigned char jbare[256] = {
[32 ... 43]=JNXT, [44]=JUNBARE, [45 ... 92]=JNXT, [93]=JUNBARE, [94 ... 124]=JNXT,
[125]=JUNBARE, [126]=JNXT, ['\t']=JUNBARE, ['\r']=JUNBARE, ['\n']=JUNBARE};
static const unsigned char jstr[256] = {
[32 ... 33]=JNXT, [34]=JQDWN, [35 ... 92]=JNXT, [93]=JESC, [94 ... 126]=JNXT,
[192 ... 223]=JUTF8_2, [224 ... 239]=JUTF8_3, [240 ... 247]=JUTF8_4};
static const unsigned char jutf8[256] = {[128 ... 191]=JUTF8_N};
static const unsigned char jesc[256] = { ['"']=JUNESC, ['\\']=JUNESC, ['/']=JUNESC,
['b']=JUNESC, ['f']=JUNESC, ['n']=JUNESC, ['r']=JUNESC, ['t']=JUNESC, ['u']=JUNESC};
static struct jtok
jparse(struct jctx *ctx)
{
const char *str = 0;
int remain = 0, c = 0;
ctx->tbl = (ctx->tbl) ? ctx->tbl: jtbl;
#define t(...)(struct jtok){__VA_ARGS__}
while (ctx->cur < ctx->end) {
switch (ctx->tbl[c = (unsigned char)(*ctx->cur++)]) {
case JFAILED: return t(.type = JERR);
case JESC: ctx->tbl = jesc; break;
case JUNESC: ctx->tbl = jstr; break;
case JQUP: str = ctx->cur, ctx->tbl = jstr; break;
case JUTF8_2: ctx->tbl = jutf8, remain = 1; break;
case JUTF8_3: ctx->tbl = jutf8, remain = 2; break;
case JUTF8_4: ctx->tbl = jutf8, remain = 3; break;
case JUTF8_N: if (!--remain) ctx->tbl = jstr; break;
case JBARE: ctx->tbl=jbare, str = ctx->cur-1; break;
case JSEP: return t(.type=JTSEP, .str=ctx->cur-1, .len=1);
case JQDWN: ctx->tbl=jtbl; return t(.type = JSTR, .str=str, .len=(int)((ctx->cur-1)-str));
case JUP: return t(.type=(c=='{')?JOBJB:JARRB, .str=ctx->cur-1, .len=1);
case JDWN: return t(.type=(c=='}')?JOBJE:JARRE, .str=ctx->cur-1, .len=1);
case JUNBARE: ctx->tbl=jtbl; return t(.str=str, .len=(int)(--ctx->cur - str),
.type=(str[0] == 't') ? JTRUE:(str[0] == 'f') ? JFALSE:(str[0] == 'n') ? JNULL:JNUM);}
} return t(.type=JEOS);
#undef t
}
int main(void)
{
struct jtok t;
const char json[] = "{\"name\":\"test\", \"age\":42, \"utf8\":\"äöü\", \"alive\":true}";
struct jctx ctx = {.cur = json, .end = json + sizeof(json)};
while ((t = jparse(&ctx)).type) {
switch (t.type) {case JERR: goto err;
case JOBJB: printf("Token(Object_Begin)\n"); break;
case JOBJE: printf("Token(Object_End)\n"); break;
case JARRB: printf("Token(Array_Begin)\n"); break;
case JARRE: printf("Token(Array_End)\n"); break;
case JNUM: printf("Token(Number): %.*s\n", t.len, t.str); break;
case JSTR: printf("Token(String): %.*s\n", t.len, t.str); break;
case JTRUE: printf("Token(True): %.*s\n", t.len, t.str); break;
case JFALSE: printf("Token(False): %.*s\n", t.len, t.str); break;
case JNULL: printf("Token(NULL): %.*s\n", t.len, t.str); break;
case JTSEP: printf("Token(SEP): '%.*s'\n", t.len, t.str); break;}
} err: return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment