Skip to content

Instantly share code, notes, and snippets.

@aatishnn
Created February 8, 2016 15:21
Show Gist options
  • Save aatishnn/5d613d48aec77a3fdcf1 to your computer and use it in GitHub Desktop.
Save aatishnn/5d613d48aec77a3fdcf1 to your computer and use it in GitHub Desktop.
Top Down Parser
from prettytable import PrettyTable
epsila = '@'
'''
S -> ABC
A -> abA | ab
B -> bD
D -> CD | @
C -> c | cC
E -> TA
A -> +TA|@
T -> FB
B -> *FB|@
F -> (E)|i
'''
#G = {'S': ['ABC',], 'A': ['abD',], 'D': ['A', '@'], 'B': ['bE',], 'E': ['CE', '@',], 'C':['cF', ], 'F': ['C', '@',]}
G = {'S': ['AC',], 'A': ['abD', 'bD',], 'D': ['A', '@'], 'C':['cF', ], 'F': ['C', '@',]}
TERMINALS = 'abc'
#NON_TERMINALS = 'ABCDEFS'
NON_TERMINALS = 'ACDFS'
START_SYM = 'S'
RH_MARKER = '$'
TEST_INPUT = 'abbcc'
# G = {'E':['TA',], 'A':['+TA','@'],'T':['FB',],'B':['*FB','@'],'F':['(E)','i'],}
# TERMINALS = '()i+*'
# NON_TERMINALS = 'ETABF'
# START_SYM = 'E'
# RH_MARKER = '$'
# TEST_INPUT = 'i+i*i'
def has_epsila_derivation(G, alpha):
return epsila in G.get(alpha, [])
def FIRST(G, alpha):
''' alpha may be terminal or non-terminal '''
if alpha in TERMINALS or alpha == epsila:
return [alpha,]
first_set = set()
# In case of non-terminal, see productions to find first
productions = G[alpha]
for production in productions:
for alpha in production:
first_value = alpha
if first_value in TERMINALS:
first_set.add(first_value)
break
elif first_value == epsila:
first_set.add(epsila)
break
elif first_value in NON_TERMINALS:
first_set.update(FIRST(G, first_value))
# But check whether it gives epsila derivation. In that case,
# see the first of next literal.
if not has_epsila_derivation(G, first_value):
break
else:
first_set.remove(epsila)
else:
raise ValueError('Invalid literal: no terminal or non-terminal or epsila')
return list(first_set)
def test_has_epsila_derivation():
assert has_epsila_derivation(G, 'D') == True
assert has_epsila_derivation(G, 'A') == False
def test_FIRST():
assert FIRST(G, 'a') == ['a']
assert FIRST(G, 'A') == ['a']
assert FIRST(G, 'B') == ['b']
assert FIRST(G, 'D') == ['@', 'c']
assert FIRST(G, 'C') == ['c']
assert FIRST(G, 'S') == ['a']
def FOLLOW(G, alpha, called_by=[]):
''' alpha may be terminal or non-terminal '''
follow_set = set()
if alpha == START_SYM:
follow_set.add(RH_MARKER)
for A, productions in G.iteritems():
for production in productions:
if alpha in production:
#print productions
#print "Using production ", production, "for alpha:", alpha
#print follow_set
alpha_index = production.index(alpha)
next_index = alpha_index + 1
while(True):
#print "0"
if next_index == len(production):
#print "1"
if not A in called_by and A != alpha:
#print "Follow"
follow_set.update(FOLLOW(G, A, called_by + [alpha,]))
break
elif production[next_index] in TERMINALS:
#print "2"
follow_set.add(production[next_index])
break
elif production[next_index] in NON_TERMINALS:
#print "3"
#print "First"
follow_set.update(FIRST(G, production[next_index]))
if not has_epsila_derivation(G, production[next_index]):
break
#print "Epsila derivation", next_index
next_index = next_index + 1
try:
follow_set.remove(epsila)
except KeyError:
pass
return list(follow_set)
def test_FOLLOW():
assert FOLLOW(G, 'S') == ['$']
assert FOLLOW(G, 'A') == ['b']
assert FOLLOW(G, 'B') == ['c']
assert FOLLOW(G, 'C') == ['c', '$']
assert FOLLOW(G, 'D') == ['c']
def print_FIRST_FOLLOW(G):
x = PrettyTable(['Production', 'First(alpha)', 'Follow(A)',])
for A, productions in G.iteritems():
for production in productions:
prod_str = "%s -> %-3s" %(A, production)
x.add_row([prod_str, FIRST(G, production[0]), FOLLOW(G, A)])
print x.get_string()
def generate_table(G):
M = {}
for n in NON_TERMINALS:
M[n] = {}
for m in TERMINALS + '$':
M[n][m] = []
for A, productions in G.iteritems():
for production in productions:
first = FIRST(G, production[0])
follow = FOLLOW(G, A)
for f in first:
if f != epsila:
M[A][f].append(production)
else:
for h in follow:
M[A][h].append(production)
return M
def print_TABLE(G):
M = generate_table(G)
x = PrettyTable()
x.add_column(' ', NON_TERMINALS)
for t in TERMINALS + '$':
column = []
for n in NON_TERMINALS:
production = M[n][t]
if len(production) == 0:
production = "Error"
if len(production) == 1:
production = production[0]
column.append(production)
x.add_column(t, column)
print x.get_string()
print_FIRST_FOLLOW(G)
print_TABLE(G)
def test_parse(G, inp):
M = generate_table(G)
inp = list(inp + RH_MARKER)
stack = [RH_MARKER, START_SYM]
action = '-'
while(True):
print "%-30s %-40s %-30s" %(stack, inp, action)
X = stack[-1]
e = inp[0]
if X in TERMINALS or X in '$':
if X == e:
stack.pop()
inp.pop(0)
action = "Terminal Match"
else:
print "Error due to terminal non matching"
return
else:
if len(M[X][e]) != 0:
production_to_append = list(M[X][e][0])
production_to_append.reverse()
stack.pop()
stack.extend(list(production_to_append))
action = "Replace with", M[X][e]
else:
print "Error"
return
# Remove epsila from stack
try:
stack.remove(epsila)
except ValueError:
pass
if X == '$':
print "Parsing success"
break
test_parse(G, TEST_INPUT)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment