Created
February 22, 2019 12:40
-
-
Save Roboneet/18443acdf3383e2970171255d670a7c2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
Find first and follow sets in a grammar | |
======================================== | |
( why do programmers ever need a reason? :P ) | |
*/ | |
function split_lr(rule){ | |
return rule.split("===>") | |
} | |
function addRule(dict, lhs, rhs){ | |
lhs = lhs.trim() | |
rhs = rhs.split("|").map(r => r.trim().split(" ").map(e => e.trim())); | |
if(dict[lhs] == undefined){ | |
dict[lhs] = {rules:[], visited:false, backedges:[], follow:[]}; | |
} | |
var l = rhs.length; | |
dict[lhs].rules.push(...rhs); | |
} | |
function create_backedges(dict){ | |
Object.keys(dict).forEach(d => { | |
var rules = dict[d].rules; | |
rules.forEach((rule, i) => { | |
rule.forEach((word, j) =>{ | |
if(dict[word] == undefined)return; | |
var obj = dict[word]; | |
obj.backedges.push([d, i, j]); | |
}) | |
}) | |
}) | |
} | |
// find first | |
function first(dict, lhs){ | |
var obj = dict[lhs] | |
if(obj == undefined)return [lhs]; | |
if(obj.first != undefined) return obj.first; | |
if(obj.visited) return []; | |
var f = []; | |
obj.visited = true; | |
obj.rules.forEach(rule =>{ | |
f.push(...first_(dict, rule)); | |
}) | |
obj.visited = false; | |
obj.first = unique(f); | |
return obj.first.slice(); | |
} | |
function first_(dict, rule){ | |
var l = rule.length; | |
var f = []; | |
var i = 0; | |
for(; i< l; i++){ | |
var w = rule[i]; | |
if(dict[w] == undefined){ // handles "eps" and TOKENS | |
f.push(w) | |
break; | |
} | |
var wf = first(dict, w); | |
var o = wf.indexOf("eps"); | |
if(o == -1){ | |
f.push(...wf); | |
break; | |
}else{ | |
var nwf = wf.slice(); | |
nwf.splice(o, 1); | |
f.push(...nwf); | |
} | |
} | |
if(i == l){ | |
f.push("eps"); | |
} | |
return unique(f); | |
} | |
function unique(arr){ | |
var obj = {}; | |
arr.forEach(e => { | |
obj[e] = true; | |
}) | |
return Object.keys(obj); | |
} | |
// find follow | |
function follow(dict, lhs){ | |
var obj = dict[lhs]; | |
if(obj.visited) return; | |
obj.visited = true; | |
var rules = obj.rules; | |
var next = []; | |
rules.forEach((rule, i) => { | |
var j = rule.length - 1; | |
var w = rule[j]; | |
if(dict[w]){ | |
dict[w].follow.push(...obj.follow); | |
next.push(w); | |
dict[w].follow = unique(dict[w].follow); | |
} | |
j--; | |
for(; j>=0; j--){ | |
var w = rule[j]; | |
var wo = dict[w]; | |
if(wo){ | |
if(dict[rule[j + 1]]){ | |
var prev = dict[rule[j + 1]]; | |
var f = prev.first; | |
var o = f.indexOf("eps"); | |
if(o != -1){ | |
var nf = f.slice(); | |
nf.splice(o, 1); | |
wo.follow.push(...nf); | |
wo.follow.push(...prev.follow); | |
}else{ | |
wo.follow.push(...f); | |
} | |
}else{ | |
wo.follow.push(rule[j + 1]); | |
} | |
next.push(w); | |
dict[w].follow = unique(dict[w].follow); | |
} | |
} | |
}) | |
next.forEach(ele => { | |
dict[ele].follow = unique(dict[ele].follow); | |
follow(dict, ele); | |
}); | |
obj.visited = false; | |
} | |
function findTop(dict){ | |
return Object.keys(dict).filter(key => dict[key].backedges == 0); | |
} | |
// generate first and follow sets from grammar | |
function generate(script){ | |
var rules = script.split("\n"); | |
var dict = {}; | |
rules.map((r) => split_lr(r)).forEach((rule) => {addRule(dict, ...rule)}) | |
create_backedges(dict); | |
Object.keys(dict).forEach(ele => first(dict, ele)) | |
var top = findTop(dict); | |
top.forEach(key => { | |
dict[key].follow = ["$"] | |
follow(dict, key); | |
}) | |
return dict; | |
} | |
var rule_scprit = `<program> ===> <otherFunctions> <mainFunction> | |
<mainFunction>===> TK_MAIN <stmts> TK_END | |
<otherFunctions>===> <function> <otherFunctions> | eps | |
<function>===>TK_FUNID <input_par> <output_par> TK_SEM <stmts> TK_END | |
<input_par>===>TK_INPUT TK_PARAMETER TK_LIST TK_SQL <parameter_list> TK_SQR | |
<output_par>===>TK_OUTPUT TK_PARAMETER TK_LIST TK_SQL <parameter_list> TK_SQR | eps | |
<parameter_list>===><dataType> TK_ID <remaining_list> | |
<dataType>===> <primitiveDatatype> |<constructedDatatype> | |
<primitiveDatatype>===> TK_INT | TK_REAL | |
<constructedDatatype>===>TK_RECORD TK_RECORDID | |
<remaining_list>===>TK_COMMA <parameter_list> | eps | |
<stmts>===><typeDefinitions> <declarations> <otherStmts> <returnStmt> | |
<typeDefinitions>===><typeDefinition> <typeDefinitions> |eps | |
<typeDefinition>===>TK_RECORD TK_RECORDID <fieldDefinitions> TK_ENDRECORD TK_SEM | |
<fieldDefinitions>===> <fieldDefinition> <fieldDefinition> <moreFields> | |
<fieldDefinition>===> TK_TYPE <primitiveDatatype> TK_COLON TK_FIELDID TK_SEM | |
<moreFields>===><fieldDefinition> <moreFields> | eps | |
<declarations> ===> <declaration> <declarations>|eps | |
<declaration>===> TK_TYPE <dataType> TK_COLON TK_ID <global_or_not> TK_SEM | |
<global_or_not>===>TK_COLON TK_GLOBAL| eps | |
<otherStmts>===> <stmt> <otherStmts> | eps | |
<stmt>===> <assignmentStmt> | <iterativeStmt> |<conditionalStmt>|<ioStmt>| <funCallStmt> | |
<assignmentStmt>===><singleOrRecId> TK_ASSIGNOP <arithmeticExpression> TK_SEM | |
<singleOrRecId>===>TK_ID | TK_RECORDID TK_DOT TK_FIELDID | |
<funCallStmt>===><outputParameters> TK_CALL TK_FUNID TK_WITH TK_PARAMETERS <inputParameters> | |
<outputParameters> ===> TK_SQL <idList> TK_SQR TK_ASSIGNOP | eps | |
<inputParameters>===> TK_SQL <idList> TK_SQR | |
<iterativeStmt>===> TK_WHILE TK_OP <booleanExpression> TK_CL <stmt> <otherStmts> TK_ENDWHILE | |
<conditionalStmt>===> TK_IF <booleanExpression> TK_THEN <stmt> <otherStmts> <condStmt> | |
<condStmt> ===> TK_ELSE <otherStmts> TK_ENDIF | TK_ENDIF | |
<ioStmt>===>TK_READ TK_OP <allVar> TK_CL TK_SEM | TK_WRITE TK_OP <allVar> TK_CL TK_SEM | |
<allVar>===><var>| TK_RECORDID | |
<arithmeticExpression>===><mulExp> <addExp> | |
<addExp>===>TK_PLUS <mulExp> <addExp> | TK_MINUS <mulExp> <addExp> | eps | |
<mulExp>===> <exp> <mulExp_> | |
<mulExp_>===>TK_MUL <exp> <mulExp_> | TK_DIV <exp> <mulExp_> | eps | |
<exp>===><var>| TK_OP <arithmeticExpression> TK_CL | |
<booleanExpression>===>TK_OP <booleanExpression> TK_CL <logicalOp> TK_OP <booleanExpression> TK_CL | |
<booleanExpression>===> <var> <relationalOp> <var> | |
<booleanExpression>===> TK_NOT <booleanExpression> | |
<var>===> TK_ID | TK_NUM | TK_RNUM | |
<logicalOp>===>TK_AND | TK_OR | |
<relationalOp>===> TK_LT | TK_LE | TK_EQ |TK_GT | TK_GE | TK_NE | |
<returnStmt>===>TK_RETURN <optionalReturn> TK_SEM | |
<optionalReturn>===>TK_SQL <idList> TK_SQR | eps | |
<idList>===> TK_ID <more_ids> | |
<more_ids>===> TK_COMMA <idList> | eps ` | |
var ans = generate(rule_scprit); | |
Object.keys(ans).forEach(ele => console.log(ele, ":", "\nfirst:[",...ans[ele].first,"]\nfollow:[",...ans[ele].follow,"]")); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment