Skip to content

Instantly share code, notes, and snippets.

@Islidius
Created August 21, 2019 11:40
Show Gist options
  • Save Islidius/e036a9f261b18f9132cf26ef5a9a06c9 to your computer and use it in GitHub Desktop.
Save Islidius/e036a9f261b18f9132cf26ef5a9a06c9 to your computer and use it in GitHub Desktop.
Small piece of code to generate FIRST and FOLLOW sets
// 'ε' symbol for empty symbol
const EPSILON = 'ε';
// this is a set that tracks the fact it contains 'ε' seperatly
class CustomSet {
constructor() {
this.set = new Set();
this.eps = false;
}
addTerminal(terminal) {
this.set.add(terminal);
}
union(other) {
for(let k of other.set) {
this.set.add(k);
}
}
setEpsilon() {
this.eps = true;
}
hasEpsilon() {
return this.eps;
}
asArray() {
if(this.hasEpsilon()) return [...this.set, EPSILON];
return [...this.set];
}
}
// simple graph wrapper
class Graph {
constructor() {
this.graph = {};
}
addLink(from, to) {
if(this.graph[from] === undefined) {
this.graph[from] = [];
}
this.graph[from].push(to);
}
getLink(from) {
return this.graph[from] ? this.graph[from] : [];
}
}
const isTerminal = (symbol) => !/[A-Z]/.test(symbol);
const isNonTerminal = (symbol) => /[A-Z]/.test(symbol);
const isEpsilon = (symbol) => symbol === EPSILON;
const getLHS = (production) =>
production.split('->')[0].replace(/\s+/g, '');
const getRHS = (production) =>
production.split('->')[1].replace(/\s+/g, '');
const getProductionsForSymbol = (symbol, grammar) =>
grammar.filter((p) => getLHS(p) === symbol);
const getProductionsWithSymbol = (symbol, grammar) =>
grammar.filter((p) => getRHS(p).indexOf(symbol) !== -1);
const generateFirst = (symbol, firstSets, grammar) => {
if(firstSets[symbol]) return;
let first = firstSets[symbol] = new CustomSet;
// the first set of a terminal symbol is just the terminal
if(isTerminal(symbol)) {
first.addTerminal(symbol);
return;
}
for(let production of getProductionsForSymbol(symbol, grammar)) {
let rhs = getRHS(production);
let hasAtLeastOneTerminal = false;
for(let prodSymbol of rhs) {
// if production is 'ε' then the first also contains it
if(prodSymbol === EPSILON) {
first.setEpsilon();
break;
}
// get the first of the current symbol
generateFirst(prodSymbol, firstSets, grammar);
let firstOfNonTerminal = firstSets[prodSymbol];
first.union(firstOfNonTerminal);
// if the first of the current symbol does not contain 'ε' break
if(!firstOfNonTerminal.hasEpsilon()) {
hasAtLeastOneTerminal = true;
break;
}
}
// remember that if all non-terminals contain 'ε' first also contains 'ε'
if(!hasAtLeastOneTerminal) {
first.setEpsilon();
}
}
};
const buildFirst = (grammar) => {
let firstSets = {};
for(let prod of grammar) {
generateFirst(getLHS(prod), firstSets, grammar);
}
return firstSets;
};
// this follow pass does only copy in the first sets and builds the follow graph
const generateFollow = (symbol, followSets, firstSets, graph, grammar) => {
if(followSets[symbol]) return;
let follow = followSets[symbol] = new CustomSet;
for(let production of getProductionsWithSymbol(symbol, grammar)) {
let rhs = getRHS(production);
let symbolIndex = 0;
while(true) {
// find the location of the symbol in question
symbolIndex = rhs.indexOf(symbol, symbolIndex);
// was not found
if(symbolIndex === -1) break;
// symbol after the symbol
symbolIndex += 1;
while(true) {
// symbol is the last one. This means FOLLOW(symbol) = FOLLOW(getLHS(production))
// if we copy here we run in caching problems, so just add it
// to the graph and later propagate it
if(symbolIndex === rhs.length) {
graph.addLink(symbol, getLHS(production));
break;
}
let followSymbol = rhs[symbolIndex];
generateFirst(followSymbol, firstSets, grammar);
let firstOfFollow = firstSets[followSymbol];
follow.union(firstOfFollow);
// break if the FIRST does not contain 'ε'
if(!firstOfFollow.hasEpsilon()) break;
symbolIndex += 1;
}
}
}
};
// propagate the FOLLOW sets using the graph previously build
// this employs a simple breath first search of the graph and
// unions the sets.
const propagateFollow = (symbol, followSets, graph) => {
let visited = [symbol];
let current = [symbol];
while(current.length !== 0) {
let next = [];
for(let c of current) {
for(let n of graph.getLink(c)) {
if(!visited.includes(n)) {
visited.push(n);
next.push(n);
followSets[symbol].union(followSets[n]);
}
}
}
current = next;
}
};
// first build the follow sets and graph, the propagate the follow
// set using the graph.
const buildFollow = (grammar, firstSets, startingSymbol) => {
let followSets = {};
let graph = new Graph;
for(let prod of grammar) {
generateFollow(getLHS(prod), followSets, firstSets, graph, grammar);
}
// add the end of input to the starting symbol
followSets[startingSymbol].addTerminal('$');
for(let prod of grammar) {
propagateFollow(getLHS(prod), followSets, graph);
}
return followSets;
};
const printGrammar = (grammar) => {
for(let k of grammar) {
console.log(' ', k);
}
};
const printSet = (set) => {
for(let k in set) {
console.log(' ', k, ':', set[k].asArray());
}
};
const buildAndPrint = (grammar, startingSymbol) => {
let firstSets = buildFirst(grammar);
let followSets = buildFollow(grammar, firstSets, startingSymbol);
console.log('Grammar:\n');
printGrammar(grammar);
console.log('');
console.log('First sets:\n');
printSet(firstSets);
console.log('');
console.log('Follow sets:\n');
printSet(followSets);
console.log('');
}
let grammar = [
'S -> F',
'S -> (S + F)',
'F -> a'
];
buildAndPrint(grammar, 'S');
grammar = [
'E -> TX',
'X -> +TX',
'X -> ε',
'T -> FY',
'Y -> *FY',
'Y -> ε',
'F -> a',
'F -> (E)',
];
buildAndPrint(grammar, 'E');
grammar = [
'S -> AB',
'A -> ε',
'B -> ε'
];
buildAndPrint(grammar, 'S');
grammar = [
'E -> aT',
'E -> ε',
'T -> bE',
'T -> ε',
'S -> Ec'
];
buildAndPrint(grammar, 'S');
grammar = [
'E -> EE',
'E -> a'
];
buildAndPrint(grammar, 'E');
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment