Skip to content

Instantly share code, notes, and snippets.

@zoren
Created November 13, 2023 18:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zoren/54562f1a3903d19ac29c04a4865d99b7 to your computer and use it in GitHub Desktop.
Save zoren/54562f1a3903d19ac29c04a4865d99b7 to your computer and use it in GitHub Desktop.
a parse for a language that has only parentheses, whitespace, and words
// this is a parser for a language described in this tweet: https://twitter.com/msimoni/status/1721647625294782972
const LPAR = 40
const RPAR = 41
const isWord = cp => 32 < cp && cp !== LPAR && cp !== RPAR
export const parse = inputString => {
const warnings = []
const topLevelArray = []
let currentArray = topLevelArray
const stack = [currentArray]
const wordBuffer = []
let i = 0
for (const character of inputString) {
const codePoint = character.codePointAt(0)
if (isWord(codePoint)) wordBuffer.push(codePoint)
// if the state was a word, but no more, emit the word
if (wordBuffer.length && !isWord(codePoint)) {
currentArray.push(String.fromCodePoint(...wordBuffer))
wordBuffer.length = 0
}
switch (codePoint) {
case LPAR: {
const newArray = []
currentArray.push(newArray)
stack.push(newArray)
currentArray = newArray
break
}
case RPAR: {
// only pop if we have something to pop, this allows for extra closing parens
if (stack.length === 1)
warnings.push({ type: 'extraClosingParen', index: i })
else {
stack.pop()
currentArray = stack.at(-1)
}
break
}
}
i++
}
if (wordBuffer.length) currentArray.push(String.fromCodePoint(...wordBuffer))
if (stack.length !== 1)
warnings.push({ type: 'unclosedParens', number: stack.length - 1 })
return { forms: topLevelArray, warnings }
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment