Skip to content

Instantly share code, notes, and snippets.

@Bravotic
Created November 13, 2022 21:17
Show Gist options
  • Save Bravotic/63e275bcf53050189df33329bb0422f4 to your computer and use it in GitHub Desktop.
Save Bravotic/63e275bcf53050189df33329bb0422f4 to your computer and use it in GitHub Desktop.
// The following is public domain, go wild.
// If you could however, please at least keep the blog URL in it if you use it verbaitum
// https://bravotic.com/dev/stdscr/index.php/2022/11/13/making-a-parser-generator-doesnt-have-to-be-hard/
// Step 1: Lets get a list of tokens
// Our list of tokens
var tokens = ["Hello", "Hi", "Goodbye", "Good", "World", "Wordlist"];
// Step 2: Create the trees
// We need to create a 'state' which holds the possible next letters, and a state for those as well
var initialState = {
id: 0,
next: []
};
// Keep track of how many states we have made for code generation later
var stateCounter = 0;
// Loop through our tokens
for(let i = 0; i < tokens.length; i++){
// Our current token we are checking
let currentToken = tokens[i];
console.log("Current token: " + currentToken);
// Reset to the initial state since we are on a new token
let currentState = initialState;
// Now loop through every letter in our token
letterLoop: for(let j = 0; j < currentToken.length; j++){
// Our current token letter
let currentLetter = currentToken.at(j);
// Go through every possible next letter
for(let k = 0; k < currentState.next.length; k++){
let possibleNextLetter = currentState.next[k];
// If the possible next letter is our current letter, we follow it
if(possibleNextLetter.letter == currentLetter){
console.log("Found branch for: " + currentLetter);
// Set the new current state to the branch we are following
currentState = possibleNextLetter.state;
// Go back to the letter loop
continue letterLoop;
}
}
console.log("Adding " + currentLetter);
// If we get down here, its because we couldn't find a branch, so we need to add one
currentState.next.push({
letter: currentLetter,
state: {
id: ++stateCounter,
next: []
}
});
// Now set our state to the new state we made
currentState = currentState.next.at(-1).state;
console.log("ID: " + currentState.id);
}
console.log("Adding end of word marker");
// Add our end of word marker, this contains the token number we return
currentState.next.push({
letter: 0,
state: {
id: ++stateCounter,
next: []
},
token: i + 1
});
}
// Step 3: Code generation
console.log("BELOW IS THE GENERATED PARSER CODE");
// Print the head to our function
// function parse(token){
// let i = 0;
// let state = 0;
// switch(state){
console.log("function parse(token){");
console.log("\tlet i = 0;");
console.log("\tlet state = 0;");
console.log("\twhile(true){");
console.log("\t\tswitch(state){");
// Now go though each state and print out the imporant bits
// To do this I'll simply create a recursive function.
function printState(state){
// Print our case
console.log("\t\t\tcase " + state.id + ":");
// Make our case for the next letter
console.log("\t\t\t\tswitch(token.at(i)){");
// And our conditions
for(let i = 0; i < state.next.length; i++){
let condition = state.next[i];
// If our letter is 0, we are at the end of the string, in Javascript, the string.at should return undefined at the end, so we check for that
if(condition.letter == 0){
// If we are actually at the end of the string, return our token number
console.log("\t\t\t\t\tcase undefined: return " + condition.token + ";");
}
else{
// Otherwise make it so we go to the next state
// case 'nextLetter':
// i++;
// state = nextStateId;
// break;
console.log("\t\t\t\t\tcase '" + condition.letter + "':");
console.log("\t\t\t\t\t\ti++;");
console.log("\t\t\t\t\t\tstate = " + condition.state.id + ";");
console.log("\t\t\t\t\t\tbreak;");
}
}
// Make sure if we can't branch anymore, we return 0 so we don't hit an infinite loop
// defualt: reutrn 0;
console.log("\t\t\t\t\tdefault: return 0;");
// And finally just close up the switch/case and this current case for the state machine.
console.log("\t\t\t\t}");
console.log("\t\t\tbreak;");
// Now go through every next state and print it out again
for(let i = 0; i < state.next.length; i++){
// recursion is fun
// recursion is fun
// recursion is fun
// recursion is fun
printState(state.next[i].state);
}
}
// Call the function on the initial state and it should go through everything
printState(initialState);
// Now finish up the function
console.log("\t\t}");
console.log("\t}");
console.log("}");
// Now everything is done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment