Skip to content

Instantly share code, notes, and snippets.

@vrthra
Last active July 2, 2022 05:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vrthra/0c52cb5ae6afdd6f97bc106061d3dfdc to your computer and use it in GitHub Desktop.
Save vrthra/0c52cb5ae6afdd6f97bc106061d3dfdc to your computer and use it in GitHub Desktop.
import sys
def add(L, u, j, R):
R.append((L, u, j))
def pop(s, i, R):
s, (L, i_) = s # pop(s, i, R)
add(L, s, i, R) # check and add
return s, R
def push(s, t):
return (tuple(s), t)
def gll(I):
i = 0
R = []
s = push([], ('L0', 0))
L = 'LS'
while True:
if L == 'L0':
if R:
(L, s1, j), *R = R # remove(L, si, j) from R
if L == 'L0' and s1 == () and j == len(I)-1: # discount $
return "success"
else:
s = s1
i = j
#L = 'L' # goto L
continue
else:
return "error"
elif L == 'LS':
#if I[i] in {'a', 'c'}:
add('LS1', s, i, R)
#if I[i] in {'a', 'b'}:
add('LS2', s, i, R)
#if I[i] in {'d', '$'}:
add('LS3', s, i, R)
add('LS4', s, i, R)
L = 'L0' # fall through
continue
elif L == 'LS1':
s = push(s, ('L1', i))
L = 'LA'
continue
elif L == 'L1':
s = push(s, ('L2', i))
L = 'LS'
continue
elif L == 'L2':
if I[i] == 'd':
i = i+1
s, R = pop(s, i, R)
L = 'L0'
continue
elif L == 'LS2':
s = push(s, ('L3', i))
L = 'LB'
continue
elif L == 'L3':
s = push(s, ('L4', i))
L = 'LS'
continue
elif L == 'L4':
s, R = pop(s, i, R)
L = 'L0'
continue
elif L == 'LS3':
s, R = pop(s, i, R)
L = 'L0'
continue
elif L == 'LS4':
if I[i] == 'd':
i = i+1
#s = push(s, ('L5', i))
L = 'LS4.1'
continue
elif L == 'LS4.1':
if I[i] == 'g':
i = i+1
s = push(s, ('L5', i))
L = 'LC'
continue
elif L == 'L5':
s, R = pop(s, i, R)
L = 'L0'
continue
elif L == 'LC':
add('LC1', s, i, R)
L = 'L0'
continue
elif L == 'LA':
add('LA1', s, i, R)
add('LA2', s, i, R)
L = 'L0'
continue
elif L == 'LA1':
if I[i] == 'a':
i = i+1
s, R = pop(s, i, R)
L = 'L0'
continue
elif L == 'LA2':
if I[i] == 'c':
i = i+1
s, R = pop(s, i, R)
L = 'L0'
continue
elif L == 'LB':
add('LB1', s, i, R)
add('LB2', s, i, R)
L = 'L0'
continue
elif L == 'LB1':
if I[i] == 'a':
i = i+1
s, R = pop(s, i, R)
L = 'L0'
continue
elif L == 'LB2':
if I[i] == 'b':
i = i+1
s, R = pop(s, i, R)
L = 'L0'
continue
elif L == 'LC1':
if I[i] == 'c':
i = i+1
s, R = pop(s, i, R)
L = 'L0'
continue
else:
assert False
I = 'dgc$'
assert gll(I) == 'success'
#I = 'aad$'
#assert gll(I) == 'success'
#I = 'aaadd$'
#assert gll(I) == 'success'
#I = 'bd$'
#assert gll(I) == 'error'
#print('.')
import simplefuzzer
G = {
'<S>': [
['<A>', '<S>', 'd'],
['<B>', '<S>'],
['d', 'g', '<C>'],
[]],
'<A>': [['a'], ['c']],
'<B>': [['a'], ['b']],
'<C>': [['c']]
}
gf = simplefuzzer.LimitFuzzer(G)
for i in range(1000):
s = gf.iter_fuzz(key='<S>', max_depth=100)
print(s)
assert gll(s+'$') == 'success'
print(s)
# The final version.
class Node:
def __init__(self, L, j, children):
self.L, self.i, self.children = L, j, children
def __repr__(self): return str((self.L, self.i, self.children))
class GSS:
def __init__(self): self.gss, self.P = {}, {}
def get(self, L, i, children):
my_label = (L, i)
if my_label not in self.gss:
self.gss[my_label] = Node(L, i, children)
assert my_label not in self.P
self.P[my_label] = []
return self.gss[my_label]
def add_to_P(self, u, j):
label = (u.L, u.i)
self.P[label].append(j)
def __repr__(self): return str(self.gss)
class GLLParser:
def create(self, L, u, j):
v = self.gss.get(L, j, [u])
if u not in v.children:
v.children.append(u)
label = (L, j)
for k in self.gss.P[label]:
self.add(L, u, k)
return v
def add(self, L, u, j):
if (L, u) not in self.U[j]:
self.U[j].append((L, u))
assert (L,u,j) not in self.R
self.R.append((L, u, j))
def pop(self, u, j):
if u != self.u0:
self.gss.add_to_P(u, j)
for v in u.children:
self.add(u.L, v, j)
return u
def __init__(self, input_str):
self.R = []
self.gss = GSS()
self.I = input_str
self.m = len(self.I) # |I| + 1
self.u1 = self.gss.get('L0', 0, [])
self.u0 = self.gss.get('$', self.m, [])
self.u1.children.append(self.u0)
self.U = []
for j in range(self.m): # 0<=j<=m
self.U.append([]) # U_j = empty
def compile_terminal(key, n_alt, r_pos, r_len, token):
if r_len == r_pos:
Lnxt = '_'
else:
Lnxt = '%s[%d]_%d' % (key, n_alt, r_pos+1)
return '''
elif L == '%s[%d]_%d':
if parser.I[i] == '%s':
i = i+1
L = '%s'
else:
L = 'L0'
continue
''' % (key, n_alt, r_pos, token, Lnxt)
def compile_nonterminal(key, n_alt, r_pos, r_len, token):
if r_len == r_pos:
Lnxt = '_'
else:
Lnxt = '%s[%d]_%d' % (key, n_alt, r_pos+1)
return '''
elif L == '%s[%d]_%d':
c_u = parser.create('%s', c_u, i)
L = '%s'
continue
''' % (key, n_alt, r_pos, Lnxt, token)
def compile_rule(key, n_alt, rule):
res = []
for i, t in enumerate(rule):
if (t[0],t[-1]) == ('<', '>'):
r = compile_nonterminal(key, n_alt, i, len(rule), t)
else:
r = compile_terminal(key, n_alt, i, len(rule), t)
res.append(r)
res.append('''
elif L == '%s[%d]_%d':
L = 'L_'
continue
''' % (key, n_alt, len(rule)))
return '\n'.join(res)
def compile_def(key, definition):
res = []
res.append('''
elif L == '%s':
''' % key)
for n_alt,rule in enumerate(definition):
res.append('''
parser.add( '%s[%d]_0', c_u, i)''' % (key, n_alt))
res.append('''
# def
L = 'L0'
continue''')
for n_alt,rule in enumerate(definition):
r = compile_rule(key, n_alt, rule)
res.append(r)
return '\n'.join(res)
def compile_grammar(g, start):
res = ['''
def parse_string(parser):
c_u = parser.u1
i = 0
L = '%s' # starting state
while True:
if L == 'L0':
if parser.R:
(L, c_u, i), *parser.R = parser.R
continue
else:
if ('L0', parser.u0) in parser.U[parser.m-1]: return 'success'
else: return 'failed'
elif L == 'L_':
c_u = parser.pop(c_u, i)
L = 'L0'
continue
''' % start]
for k in g:
r = compile_def(k, g[k])
res.append(r)
res.append('''
else:
assert False
if __name__ == '__main__':
import simplefuzzer
G = {
'<S>': [
['<A>', '<S>', 'd'],
['<B>', '<S>'],
['g', 'p', '<C>'],
[]],
'<A>': [['a'], ['c']],
'<B>': [['a'], ['b']],
'<C>': ['c']
}
import sys
import gll
mystr = sys.argv[1]
g = gll.GLLParser(mystr)
assert parse_string(g) == 'success'
gf = simplefuzzer.LimitFuzzer(G)
for i in range(1000):
s = gf.iter_fuzz(key='<S>', max_depth=100)
print(s)
g = gll.GLLParser(s+'$')
assert parse_string(g) == 'success'
print(g.gss)
''')
return '\n'.join(res)
if __name__ == '__main__':
import simplefuzzer
G = {
'<S>': [
['<A>', '<S>', 'd'],
['<B>', '<S>'],
['g', 'p', '<C>'],
[]],
'<A>': [['a'], ['c']],
'<B>': [['a'], ['b']],
'<C>': ['c']
}
res = compile_grammar(G, '<S>')
print(res)
{
'<S>': [
['<A>', '<S>', 'd'],
['<B>', '<S>'],
[]],
'<A>': [
['a'],
['c']],
'<B>': [
['a'],
['b']]
}
class Node:
def __init__(self, L, j, children):
self.L, self.i, self.children = L, j, children
def __repr__(self): return str((self.L, self.i, self.children))
class GSS:
def __init__(self): self.gss, self.P = {}, {}
def get(self, L, i, children):
my_label = (L, i)
if my_label not in self.gss:
self.gss[my_label] = Node(L, i, children)
assert my_label not in self.P
self.P[my_label] = []
return self.gss[my_label]
def add_to_P(self, u, j):
label = (u.L, u.i)
self.P[label].append(j)
def __repr__(self): return str(self.gss)
class GLLParser:
def create(self, L, u, j):
v = self.gss.get(L, j, [u])
if u not in v.children:
v.children.append(u)
label = (L, j)
for k in self.gss.P[label]:
self.add(L, u, k)
return v
def add(self, L, u, j):
if (L, u) not in self.U[j]:
self.U[j].append((L, u))
assert (L,u,j) not in self.R
self.R.append((L, u, j))
def pop(self, u, j):
if u != self.u0:
self.gss.add_to_P(u, j)
for v in u.children:
self.add(u.L, v, j)
return u
def __init__(self, input_str):
self.R = []
self.gss = GSS()
self.I = input_str
self.m = len(self.I) # |I| + 1
self.u1 = self.gss.get('L0', 0, [])
self.u0 = self.gss.get('$', self.m, [])
self.u1.children.append(self.u0)
self.U = []
for j in range(self.m): # 0<=j<=m
self.U.append([]) # U_j = empty
def compile_terminal(key, n_alt, r_pos, r_len, token):
if r_len == r_pos:
Lnxt = '_'
else:
Lnxt = '%s[%d]_%d' % (key, n_alt, r_pos+1)
return '''\
elif L == '%s[%d]_%d':
if parser.I[i] == '%s':
i = i+1
L = '%s'
else:
L = 'L0'
continue
''' % (key, n_alt, r_pos, token, Lnxt)
def compile_nonterminal(key, n_alt, r_pos, r_len, token):
if r_len == r_pos:
Lnxt = '_'
else:
Lnxt = '%s[%d]_%d' % (key, n_alt, r_pos+1)
return '''\
elif L == '%s[%d]_%d':
c_u = parser.create('%s', c_u, i)
L = '%s'
continue
''' % (key, n_alt, r_pos, Lnxt, token)
def compile_rule(key, n_alt, rule):
res = []
for i, t in enumerate(rule):
if (t[0],t[-1]) == ('<', '>'):
r = compile_nonterminal(key, n_alt, i, len(rule), t)
else:
r = compile_terminal(key, n_alt, i, len(rule), t)
res.append(r)
res.append('''\
elif L == '%s[%d]_%d':
L = 'L_'
continue
''' % (key, n_alt, len(rule)))
return '\n'.join(res)
def compile_def(key, definition):
res = []
res.append('''\
elif L == '%s':
''' % key)
for n_alt,rule in enumerate(definition):
res.append('''\
parser.add( '%s[%d]_0', c_u, i)''' % (key, n_alt))
res.append('''
# def
L = 'L0'
continue''')
for n_alt,rule in enumerate(definition):
r = compile_rule(key, n_alt, rule)
res.append(r)
return '\n'.join(res)
def compile_grammar(g, start):
res = ['''\
def parse_string(parser):
c_u = parser.u1
i = 0
L = '%s' # starting state
while True:
if L == 'L0':
if parser.R:
(L, c_u, i), *parser.R = parser.R
continue
else:
if ('L0', parser.u0) in parser.U[parser.m-1]: return 'success'
else: return 'failed'
elif L == 'L_':
c_u = parser.pop(c_u, i)
L = 'L0'
continue
''' % start]
for k in g:
r = compile_def(k, g[k])
res.append(r)
res.append('''
else:
assert False
if __name__ == '__main__':
import simplefuzzer
G = {
'<S>': [
['<A>', '<S>', 'd'],
['<B>', '<S>'],
['g', 'p', '<C>'],
[]],
'<A>': [['a'], ['c']],
'<B>': [['a'], ['b']],
'<C>': ['c']
}
import sys
import gss_gll as gll
mystr = sys.argv[1]
g = gll.GLLParser(mystr)
assert parse_string(g) == 'success'
gf = simplefuzzer.LimitFuzzer(G)
for i in range(1000):
s = gf.iter_fuzz(key='<S>', max_depth=100)
print(s)
g = gll.GLLParser(s+'$')
assert parse_string(g) == 'success'
print(g.gss)
''')
return '\n'.join(res)
if __name__ == '__main__':
import simplefuzzer
G = {
'<S>': [
['<A>', '<S>', 'd'],
['<B>', '<S>'],
['g', 'p', '<C>'],
[]],
'<A>': [['a'], ['c']],
'<B>': [['a'], ['b']],
'<C>': ['c']
}
res = compile_grammar(G, '<S>')
print(res)
# g = GLLParser('gpc$')
# assert gll(g) == 'success' # Success
# f = g.gss.gss[(g.u1.L, g.u1.i)]
# print(f)
# g = GLLParser('ad$')
# assert gll(g) == 'success' # Success
# f = g.gss.gss[(g.u1.L, g.u1.i)]
# print(f)
# g = GLLParser('aad$')
# assert gll(g) == 'success' # Success
# g = GLLParser('aaadd$')
# assert gll(g) == 'success' # Success
# g = GLLParser('bd$')
# assert gll(g) == 'failed' # Error
import simplefuzzer
G = {
'<S>': [
['<A>', '<S>', 'd'],
['<B>', '<S>'],
['g', 'p', '<C>'],
[]],
'<A>': [['a'], ['c']],
'<B>': [['a'], ['b']],
'<C>': ['c']
}
class Node:
def __init__(self, L, j, children):
self.L, self.i, self.children = L, j, children
def __repr__(self):
return str((self.L, self.i, self.children))
class GSS:
def __init__(self):
self.gss, self.P = {}, {}
def get(self, L, i, children):
my_label = (L, i)
if my_label not in self.gss:
self.gss[my_label] = Node(L, i, children)
assert my_label not in self.P
self.P[my_label] = []
return self.gss[my_label]
def add_to_P(self, u, j):
label = (u.L, u.i)
self.P[label].append(j)
def __repr__(self):
return str(self.gss)
class GLLParser:
def create(self, L, u, j):
v = self.gss.get(L, j, [u])
if u not in v.children:
v.children.append(u)
label = (L, j)
for k in self.gss.P[label]:
self.add(L, u, k)
return v
def add(self, L, u, j):
if (L, u) not in self.U[j]:
self.U[j].append((L, u))
assert (L,u,j) not in self.R
self.R.append((L, u, j))
def pop(self, u, j):
if u != self.u0:
self.gss.add_to_P(u, j)
for v in u.children:
self.add(u.L, v, j)
return u
def __init__(self, input_str):
self.R = []
self.gss = GSS()
self.I = input_str
self.m = len(self.I) # |I| + 1
self.u1 = self.gss.get('L0', 0, [])
self.u0 = self.gss.get('$', self.m, [])
self.u1.children.append(self.u0)
self.U = []
for j in range(self.m): # 0<=j<=m
self.U.append([]) # U_j = empty
def gll(parser):
c_u = parser.u1
i = 0
L = 'LS' # starting state
while True:
if L == 'L0':
if parser.R:
(L, c_u, i), *parser.R = parser.R
continue
else:
if ('L0', parser.u0) in parser.U[parser.m-1]: return 'success'
else: return 'failed'
elif L == 'L_':
c_u = parser.pop(c_u, i)
L = 'L0'
continue
elif L == 'LS':
#if I[i] in {'a', 'c'}:
parser.add('LS1_0', c_u, i)
#if I[i] in {'a', 'b'}:
parser.add('LS2_0', c_u, i)
#if I[i] in {'d', '$'}:
parser.add('LS3_0', c_u, i)
parser.add('LS4_0', c_u, i)
L = 'L0'
continue
elif L == 'LS1_0':
c_u = parser.create('LS1_1', c_u, i)
L = 'LA'
continue
elif L == 'LS1_1':
c_u = parser.create('LS1_2', c_u, i)
L = 'LS'
continue
elif L == 'LS1_2':
if parser.I[i] == 'd':
i = i+1
L = 'L_'
else:
L = 'L0'
continue
elif L == 'LS2_0':
c_u = parser.create('LS2_1', c_u, i)
L = 'LB'
continue
elif L == 'LS2_1':
c_u = parser.create('LS2_2', c_u, i)
L = 'LS'
continue
elif L == 'LS2_2':
L = 'L_'
continue
elif L == 'LS3_0':
if parser.I[i] == 'g':
i = i+1
L = 'LS3_1'
else:
L = 'L0'
continue
elif L == 'LS3_1':
if parser.I[i] == 'p':
i = i+1
L = 'LS3_2'
else:
L = 'L0'
continue
elif L == 'LS3_2':
c_u = parser.create('LS3_3', c_u, i)
L = 'LC'
continue
elif L == 'LS3_3':
L = 'L_'
continue
elif L == 'LS4_0':
L = 'L_'
continue
elif L == 'LC':
parser.add('LC1_0', c_u, i)
L = 'L0'
continue
elif L == 'LC1_0':
if parser.I[i] == 'c':
i = i+1
L = 'L_'
else:
L = 'L0'
continue
elif L == 'LA':
#if I[i] == 'a':
parser.add('LA1_0', c_u, i)
#if I[i] == 'c':
parser.add('LA2_0', c_u, i)
L = 'L0'
continue
elif L == 'LA1_0':
if parser.I[i] == 'a':
i = i+1
L = 'L_'
else:
L = 'L0'
continue
elif L == 'LA2_0':
if parser.I[i] == 'c':
i = i+1
L = 'L_'
else:
L = 'L0'
continue
elif L == 'LB':
#if I[i] == 'a':
parser.add('LB1_0', c_u, i)
#if I[i] == 'b':
parser.add('LB2_0', c_u, i)
L = 'L0'
continue
elif L == 'LB1_0':
if parser.I[i] == 'a':
i = i+1
L = 'L_'
else:
L = 'L0'
continue
elif L == 'LB2_0':
if parser.I[i] == 'b':
i = i+1
L = 'L_'
else:
L = 'L0'
continue
else:
assert False
def compile_def(g, definition):
for rule in definition:
compile_rule(rule)
def compile_rule(g, rule):
res = []
for i, t in enumerate(rule):
if (t[0],t[-1]) == ('<', '>'):
r = compile_nonterminal(t, i)
res.append(r)
else:
r = compile_terminal(t, i)
res.append(r)
def compile_terminal(t, i):
return '''
if parser.I[i] == '%s':
i = i+1
c_u = parser.pop(c_u, i)
L = 'L0'
continue
''' % t
g = GLLParser('gpc$')
assert gll(g) == 'success' # Success
f = g.gss.gss[(g.u1.L, g.u1.i)]
print(f)
g = GLLParser('ad$')
assert gll(g) == 'success' # Success
f = g.gss.gss[(g.u1.L, g.u1.i)]
print(f)
g = GLLParser('aad$')
assert gll(g) == 'success' # Success
g = GLLParser('aaadd$')
assert gll(g) == 'success' # Success
g = GLLParser('bd$')
assert gll(g) == 'failed' # Error
gf = simplefuzzer.LimitFuzzer(G)
for i in range(1000):
s = gf.iter_fuzz(key='<S>', max_depth=100)
print(s)
g = GLLParser(s+'$')
assert gll(g) == 'success'
print(g.gss)
class GLLParser:
def __init__(self, s):
self.R = []
self.I = s
def add(self, L, u, j):
self.R.append((L, u, j))
def pop(self, s, i):
s, (L, i_) = s
self.add(L, s, i)
return s
def push(self, L, s, i):
return (tuple(s), (L, i))
def compile_terminal(key, n_alt, r_pos, r_len, token):
if r_len == r_pos:
Lnxt = '_'
else:
Lnxt = '%s[%d]_%d' % (key, n_alt, r_pos+1)
return '''\
elif L == '%s[%d]_%d':
if parser.I[i] == '%s':
i = i+1
L = '%s'
else:
L = 'L0'
continue
''' % (key, n_alt, r_pos, token, Lnxt)
def compile_nonterminal(key, n_alt, r_pos, r_len, token):
if r_len == r_pos:
Lnxt = '_'
else:
Lnxt = '%s[%d]_%d' % (key, n_alt, r_pos+1)
return '''\
elif L == '%s[%d]_%d':
s = parser.push('%s', s, i)
L = '%s'
continue
''' % (key, n_alt, r_pos, Lnxt, token)
def compile_rule(key, n_alt, rule):
res = []
for i, t in enumerate(rule):
if (t[0],t[-1]) == ('<', '>'):
r = compile_nonterminal(key, n_alt, i, len(rule), t)
else:
r = compile_terminal(key, n_alt, i, len(rule), t)
res.append(r)
res.append('''\
elif L == '%s[%d]_%d':
L = 'L_'
continue
''' % (key, n_alt, len(rule)))
return '\n'.join(res)
def compile_def(key, definition):
res = []
res.append('''\
elif L == '%s':
''' % key)
for n_alt,rule in enumerate(definition):
res.append('''\
parser.add( '%s[%d]_0', s, i)''' % (key, n_alt))
res.append('''
L = 'L0'
continue''')
for n_alt,rule in enumerate(definition):
r = compile_rule(key, n_alt, rule)
res.append(r)
return '\n'.join(res)
def compile_grammar(g, start):
res = ['''\
def parse_string(parser):
L, s, i = '%s', parser.push('L0', [], 0), 0
while True:
if L == 'L0':
if parser.R:
(L, s, i), *parser.R = parser.R
if (L, s, i) == ('L0', (), len(parser.I)-1): return 'success'
else: continue
else: return 'error'
elif L == 'L_':
s = parser.pop(s, i)
L = 'L0'
continue
''' % start]
for k in g:
r = compile_def(k, g[k])
res.append(r)
res.append('''\
else:
assert False
if __name__ == '__main__':
import simplefuzzer
G = {
'<S>': [
['<A>', '<S>', 'd'],
['<B>', '<S>'],
['g', 'p', '<C>'],
[]],
'<A>': [['a'], ['c']],
'<B>': [['a'], ['b']],
'<C>': ['c']
}
import sys
import stack_gll as gll
mystr = sys.argv[1]
g = gll.GLLParser(mystr)
assert parse_string(g) == 'success'
gf = simplefuzzer.LimitFuzzer(G)
for i in range(1000):
s = gf.iter_fuzz(key='<S>', max_depth=100)
print(s)
g = gll.GLLParser(s+'$')
assert parse_string(g) == 'success'
''')
return '\n'.join(res)
if __name__ == '__main__':
import simplefuzzer
G = {
'<S>': [
['<A>', '<S>', 'd'],
['<B>', '<S>'],
['g', 'p', '<C>'],
[]],
'<A>': [['a'], ['c']],
'<B>': [['a'], ['b']],
'<C>': ['c']
}
res = compile_grammar(G, '<S>')
print(res)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment