Created
June 21, 2020 20:16
-
-
Save Zemnmez/03de7eda5415a59c886775a699b075de to your computer and use it in GitHub Desktop.
wip parser for cultist simulator's 'json' format
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
type runes = string[]; | |
const unexpected = (char: string, ...expected: string[]) => | |
new Error(`unexpected "${char}"` + (expected.length?`; expected ${expected.map(v => `"${v}"`).join(" or ")}`: "")) | |
export const AdaptCultistJson: | |
(i: string) => string | |
= | |
([char, ...chars]) => { | |
const {value, remainder} = parseValue([char, ...chars]); | |
if (remainder.join("") != "") throw unexpected(remainder.join(""), ""); | |
return value.join(""); | |
} | |
; | |
const parseValue: | |
(chars: runes) => {value: runes, remainder: runes } | |
= | |
([char, ...etc]) => { | |
switch (char) { | |
case '{': | |
return parseObject([char, ...etc]); | |
case '[': | |
return parseArray([char, ...etc]); | |
case '"': | |
return parseString([char, ...etc]); | |
} | |
if (char == "t") { | |
const [e, s, t] = etc; | |
if ("t" + e + s + t == "test") { | |
return parseTrue([char, ...etc]); | |
} | |
} | |
if (char == "f") { | |
const [a, l, s, e] = etc; | |
if ("f" + a + l + s + e == "false") { | |
return parseFalse([char, ...etc]); | |
} | |
} | |
if (/\p{Nd}/u.test(char)) return parseNumber([char, ...etc]); | |
throw unexpected(char) | |
} | |
; | |
const parseBool: | |
(chars: runes) => { value: runes, remainder: runes } | |
= | |
([char, ...etc]) => { | |
switch (char) { | |
case "t": return parseTrue([char, ...etc]) | |
case "f": return parseFalse([char, ...etc]); | |
default: throw unexpected(char, "t", "f"); | |
} | |
} | |
; | |
const parseTrue: | |
(chars: runes) => { value: runes, remainder: runes} | |
= | |
([char, ...etc]) => { | |
let value: runes = []; | |
for (const ch2 of "true") { | |
if (char != ch2) throw unexpected(char, ch2); | |
value = [...value, char]; | |
[char, ...etc] = etc; | |
} | |
return { value, remainder: [char, ...etc]}; | |
} | |
; | |
const parseFalse: | |
(chars: runes) => { value: runes, remainder: runes} | |
= | |
([char, ...etc]) => { | |
let value: runes = []; | |
for (const ch2 of "false") { | |
if (char != ch2) throw unexpected(char, ch2); | |
value = [...value, char]; | |
[char, ...etc] = etc; | |
} | |
return { value, remainder: [char, ...etc]}; | |
} | |
; | |
const parseWS: | |
(chars: runes) => { value: runes, remainder: runes } | |
= | |
([char, ...etc]) => { | |
let value: runes = []; | |
for (;;) { | |
if (!/\p{Zs}/u.test(char)) break; | |
value = [...value, char]; | |
[char, ...etc] = etc; | |
} | |
return { value, remainder: [char, ...etc] } | |
} | |
; | |
const parseArray: | |
(chars: runes) => { value: runes, remainder: runes } | |
= | |
([char, ...etc]) => { | |
let value: runes = []; | |
if (char != "[") throw unexpected(char, "["); | |
value = [...value, char]; | |
[char, ...etc] = etc; | |
for (;;) { | |
let ws: runes; | |
({ value: ws, remainder: [char, ...etc ]} = parseWS([char, ...etc])); | |
value = [...value, ...ws]; | |
let aValue: runes; | |
({ value: aValue, remainder: [char, ...etc] } = parseValue([char, ...etc])); | |
value = [...value, ...aValue]; | |
({ value: ws, remainder: [char, ...etc]} = parseWS([char, ...etc])); | |
value = [...value, ...ws]; | |
if (char != ",") break; | |
value = [...value, char]; | |
[char, ...etc] = etc; | |
} | |
let ws: runes; | |
({ value: ws, remainder: [char, ...etc]} = parseWS([char, ...etc])); | |
if (char != "]") throw unexpected(char, "]"); | |
value = [...value, char]; | |
[char, ...etc] = etc; | |
return { value, remainder: [char, ...etc] } | |
} | |
; | |
const parseObject: | |
(chars: runes) => {value: runes, remainder: runes} | |
= | |
([char, ...etc]) => { | |
let value: runes = []; | |
if (char != '{') throw unexpected(char, '{'); | |
value = [...value, char]; | |
[char, ...etc] = etc; | |
for (;;) { | |
let key: runes; | |
({ value: key, remainder: [char, ...etc ]} = parseKey([char, ...etc])); | |
// if the key is an identifier (missing that tag :p) | |
// and it contains a '.' it needs to be boxed. | |
// this test is ridiculously brittle. I'm sorry. | |
if (key[0] != '"' && key.some(chr => chr == ".")) | |
key = [ "[", ...key, "]" ]; | |
value = [...value, ...key]; | |
if (char != ":") throw unexpected(char, ":"); | |
value = [ ...value, char]; | |
[char, ...etc] = etc; | |
let oValue: runes; | |
({ value: oValue, remainder: [char, ...etc] } = parseValue([char, ...etc])); | |
value = [...value, ...oValue]; | |
// check if there's another, or break | |
if (char != ",") { | |
break | |
} | |
// must be ',' | |
value = [...value, char]; | |
[char, ...etc] = etc; | |
} | |
if (char != '}') throw unexpected(char, '}'); | |
value = [...value, char]; | |
[char, ...etc] = etc; | |
return { value, remainder: [ char, ...etc ]} | |
} | |
; | |
const parseKey: | |
(chars: runes) => { value: runes, remainder: runes} | |
= | |
([char, ...etc]) => { | |
switch (char) { | |
case '"': | |
return parseString([char, ...etc], '"') | |
} | |
if (/\p{Nd}/u.test(char)) return parseNumber([char, ...etc]); | |
if (/\p{Letter}/u.test(char)) return parseIdent([char, ...etc]); | |
throw unexpected(char, "letter", "number", 'quote'); | |
} | |
; | |
const parseIdent: | |
(chars: runes) => { value: runes, remainder: runes } | |
= | |
([char, ...etc]) => { | |
let value: runes = []; | |
for (;;) { | |
if (!/\p{Letter}|\./u.test(char)) break; | |
value = [ ...value, char ]; | |
[char, ...etc ] = etc; | |
} | |
return { value, remainder: [char, ...etc] } | |
} | |
; | |
const parseNumber: | |
(chars: runes) => { value: runes, remainder: runes } | |
= | |
([char, ...etc]) => { | |
let value: runes = []; | |
// check for 0x | |
if (char == "0" && etc[0] == "x") return parseHexNumber([char, ...etc]); | |
// we could check for octal, but we don't actually care | |
// since we just spit out valid number idents verbatim | |
for (;;) { | |
// this is not an error, as this fn doesn't know | |
// what an allowed terminator is | |
if (!/\p{Nd}|\./u.test(char)) break; | |
value = [ ...value, char ]; | |
[char, ...etc ] = etc; | |
} | |
return { value, remainder: [char, ...etc] } | |
} | |
; | |
const parseHexNumber: | |
(chars: runes) => { value: runes, remainder: runes } | |
= | |
([char, ...etc]) => { | |
let value: runes = []; | |
if (char != "0") throw unexpected(char, "0"); | |
value = [...value, char]; | |
[char, ...etc] = etc; | |
if (char != "x") throw unexpected(char, "x"); | |
value = [...value, char]; | |
for (;;) { | |
if (!/[0-9a-f]/.test(char)) break; | |
value = [...value, char ]; | |
[char, ...etc ] = etc; | |
} | |
return { value, remainder: [char, ...etc ]} | |
} | |
; | |
// cultist simulator strings, unlike proper json can go over multiple | |
// lines... | |
const parseString: | |
(chars: runes, quoteWith?: string) => { value: runes, remainder: runes } | |
= | |
([char, ...etc], quoteWith = "`") => { | |
let value: runes = []; | |
if (char != '"') throw unexpected(char, 'quote'); | |
value = [ ...value, quoteWith ]; | |
[char, ...etc] = etc; | |
// since we are adapting to json, we don't really care about | |
// escapes i think, just close quotes. Oh, and we should | |
// escape '`' cause we're replacing with those | |
LOOP: | |
for (;;) { | |
switch (char) { | |
case '"': | |
value = [ ...value, quoteWith ]; | |
[char, ...etc] = etc; | |
break LOOP | |
case quoteWith: | |
value = [ ...value, "\\" + quoteWith ]; | |
[char, ...etc] = etc; | |
break LOOP | |
default: | |
// we're not validating json here so just spit | |
// eveything else out verbatim | |
value = [...value, char ]; | |
[char, ...etc] = etc; | |
} | |
} | |
return { value, remainder: [char, ...etc] } | |
} | |
; | |
if (require.main == module) { | |
console.log(AdaptCultistJson(require('fs').readFileSync(0, 'utf-8'))); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment