Created
November 13, 2022 21:17
-
-
Save Bravotic/63e275bcf53050189df33329bb0422f4 to your computer and use it in GitHub Desktop.
Parser generator created in https://bravotic.com/dev/stdscr/index.php/2022/11/13/making-a-parser-generator-doesnt-have-to-be-hard/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// The following is public domain, go wild. | |
// If you could however, please at least keep the blog URL in it if you use it verbaitum | |
// https://bravotic.com/dev/stdscr/index.php/2022/11/13/making-a-parser-generator-doesnt-have-to-be-hard/ | |
// Step 1: Lets get a list of tokens | |
// Our list of tokens | |
var tokens = ["Hello", "Hi", "Goodbye", "Good", "World", "Wordlist"]; | |
// Step 2: Create the trees | |
// We need to create a 'state' which holds the possible next letters, and a state for those as well | |
var initialState = { | |
id: 0, | |
next: [] | |
}; | |
// Keep track of how many states we have made for code generation later | |
var stateCounter = 0; | |
// Loop through our tokens | |
for(let i = 0; i < tokens.length; i++){ | |
// Our current token we are checking | |
let currentToken = tokens[i]; | |
console.log("Current token: " + currentToken); | |
// Reset to the initial state since we are on a new token | |
let currentState = initialState; | |
// Now loop through every letter in our token | |
letterLoop: for(let j = 0; j < currentToken.length; j++){ | |
// Our current token letter | |
let currentLetter = currentToken.at(j); | |
// Go through every possible next letter | |
for(let k = 0; k < currentState.next.length; k++){ | |
let possibleNextLetter = currentState.next[k]; | |
// If the possible next letter is our current letter, we follow it | |
if(possibleNextLetter.letter == currentLetter){ | |
console.log("Found branch for: " + currentLetter); | |
// Set the new current state to the branch we are following | |
currentState = possibleNextLetter.state; | |
// Go back to the letter loop | |
continue letterLoop; | |
} | |
} | |
console.log("Adding " + currentLetter); | |
// If we get down here, its because we couldn't find a branch, so we need to add one | |
currentState.next.push({ | |
letter: currentLetter, | |
state: { | |
id: ++stateCounter, | |
next: [] | |
} | |
}); | |
// Now set our state to the new state we made | |
currentState = currentState.next.at(-1).state; | |
console.log("ID: " + currentState.id); | |
} | |
console.log("Adding end of word marker"); | |
// Add our end of word marker, this contains the token number we return | |
currentState.next.push({ | |
letter: 0, | |
state: { | |
id: ++stateCounter, | |
next: [] | |
}, | |
token: i + 1 | |
}); | |
} | |
// Step 3: Code generation | |
console.log("BELOW IS THE GENERATED PARSER CODE"); | |
// Print the head to our function | |
// function parse(token){ | |
// let i = 0; | |
// let state = 0; | |
// switch(state){ | |
console.log("function parse(token){"); | |
console.log("\tlet i = 0;"); | |
console.log("\tlet state = 0;"); | |
console.log("\twhile(true){"); | |
console.log("\t\tswitch(state){"); | |
// Now go though each state and print out the imporant bits | |
// To do this I'll simply create a recursive function. | |
function printState(state){ | |
// Print our case | |
console.log("\t\t\tcase " + state.id + ":"); | |
// Make our case for the next letter | |
console.log("\t\t\t\tswitch(token.at(i)){"); | |
// And our conditions | |
for(let i = 0; i < state.next.length; i++){ | |
let condition = state.next[i]; | |
// If our letter is 0, we are at the end of the string, in Javascript, the string.at should return undefined at the end, so we check for that | |
if(condition.letter == 0){ | |
// If we are actually at the end of the string, return our token number | |
console.log("\t\t\t\t\tcase undefined: return " + condition.token + ";"); | |
} | |
else{ | |
// Otherwise make it so we go to the next state | |
// case 'nextLetter': | |
// i++; | |
// state = nextStateId; | |
// break; | |
console.log("\t\t\t\t\tcase '" + condition.letter + "':"); | |
console.log("\t\t\t\t\t\ti++;"); | |
console.log("\t\t\t\t\t\tstate = " + condition.state.id + ";"); | |
console.log("\t\t\t\t\t\tbreak;"); | |
} | |
} | |
// Make sure if we can't branch anymore, we return 0 so we don't hit an infinite loop | |
// defualt: reutrn 0; | |
console.log("\t\t\t\t\tdefault: return 0;"); | |
// And finally just close up the switch/case and this current case for the state machine. | |
console.log("\t\t\t\t}"); | |
console.log("\t\t\tbreak;"); | |
// Now go through every next state and print it out again | |
for(let i = 0; i < state.next.length; i++){ | |
// recursion is fun | |
// recursion is fun | |
// recursion is fun | |
// recursion is fun | |
printState(state.next[i].state); | |
} | |
} | |
// Call the function on the initial state and it should go through everything | |
printState(initialState); | |
// Now finish up the function | |
console.log("\t\t}"); | |
console.log("\t}"); | |
console.log("}"); | |
// Now everything is done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment