Skip to content

Instantly share code, notes, and snippets.

@calroc
Created June 26, 2018 01:05
Show Gist options
  • Save calroc/ad4321dc4c43d5f3e09d39283461b8fe to your computer and use it in GitHub Desktop.
Save calroc/ad4321dc4c43d5f3e09d39283461b8fe to your computer and use it in GitHub Desktop.
Dirty but functional implementation of Brzozowski’s derivatives of regular expressions.
from itertools import product
phi = frozenset()
y = frozenset({''})
syms = O, l = frozenset({'0'}), frozenset({'1'})
AND, CONS, KSTAR, NOT, OR = 'and cons * not or'.split()
I = (KSTAR, (OR, O, l)) # Match anything. I.e. [01]* Spelled I or .
# The expression from Brzozowski: (.111.) & (.01 + 11*)'
# ( a ) & ( b + c )'
a = (CONS, I, (CONS, l, (CONS, l, (CONS, l, I))))
b = (CONS, I, (CONS, O, l))
c = (CONS, l, (KSTAR, l))
it = (AND, a, (NOT, (OR, b, c)))
def nully(R):
'''
Return y if y is in R otherwise phi.
'''
if R in syms or R == phi:
return phi
if R == y:
return y
tag = R[0]
if tag == KSTAR:
return y
if tag == NOT:
return y if nully(R[1]) == phi else phi
left, right = nully(R[1]), nully(R[2])
return left & right if tag in {AND, CONS} else left | right
# This is the straightforward version with no "compaction".
# It works fine, but does waaaay too much work because the
# expressions grow each derivation.
##def D(symbol):
##
## def derv(R):
##
## if R == {symbol}:
## return y
##
## if R == y or R == phi or isinstance(R, set) and len(R) == 1:
## return phi
##
## assert isinstance(R, tuple), repr(R)
## tag = R[0]
## if tag == KSTAR:
## return (CONS, derv(R[1]), R)
##
## if tag == NOT:
## return (NOT, derv(R[1]))
##
## left, right = R[1:]
## if tag == CONS:
## A = (CONS, derv(left), right)
## if nully(left) == phi:
## return A
## return (OR, A, derv(right))
##
## left, right = derv(left), derv(right)
## if isinstance(left, set) and isinstance(right, set):
## if tag == AND:
## return left & right
## assert tag == OR
## return left | right
## return (tag, left, right)
##
## return derv
def D_compaction(symbol):
def derv(R):
if R == {symbol}:
return y
if R == y or R == phi or R in syms:
return phi
tag = R[0]
if tag == KSTAR:
return _make_cons(derv(R[1]), R)
if tag == NOT:
return (NOT, derv(R[1]))
left, right = R[1:]
if tag == CONS:
k = derv(left)
if k == phi or right == phi:
A = phi
elif k == y:
A = right
else:
A = _make_cons(k, right)
return A if nully(left) == phi else _make_or(A, derv(right))
left, right = derv(left), derv(right)
return _make_and(left, right) if tag == AND else _make_or(left, right)
return derv
def _make_and(a, b):
if a == I: return b
if b == I: return a
return (AND, a, b)
def _make_or(a, b):
if a == phi: return b
if b == phi: return a
if a == I or b == I: return I
return (OR, a, b)
def _make_cons(a, b):
if a == y: return b
if b == y: return a
if a == phi or b == phi: return phi
return (CONS, a, b)
class Memo(object):
def __init__(self, f):
self.f = f
self.calls = self.hits = 0
self.mem = {}
def __call__(self, key):
self.calls += 1
try:
result = self.mem[key]
self.hits += 1
except KeyError:
result = self.mem[key] = self.f(key)
return result
o, z = Memo(D_compaction('0')), Memo(D_compaction('1'))
def S(re):
return o(re), z(re)
def stringy(re):
if re == I:
return '.'
if re in syms:
return next(iter(re))
if re == y:
return '^'
if re == phi:
return 'X'
assert isinstance(re, tuple), repr(re)
tag = re[0]
if tag == KSTAR:
body = stringy(re[1])
if not body:
return ''
return '(' + body + ')*'
if tag == NOT:
body = stringy(re[1])
if not body:
return ''
if len(body) > 1:
return '(' + body + ")'"
return body + "'"
left, right = stringy(re[1]), stringy(re[2])
if tag == CONS:
return left + right
if tag == OR:
return left + ' | ' + right
return '(' + left + ') | (' + right + ')'
if tag == AND:
return '(' + left + ') & (' + right + ')'
# print it
## ('and',
## ('cons',
## ('*', ('or', set(['1']), set(['0']))),
## set(['1']),
## set(['1']),
## set(['1']),
## ('*', ('or', set(['1']), set(['0'])))),
## ('not', ('or',
## ('cons',
## ('*', ('or', set(['1']), set(['0']))),
## set(['0']),
## set(['1'])),
## ('cons', set(['1']), ('*', set(['1']))))))
##print stringy(it)
##R = it
###ds = [o, o, o,]
###ds = [z, o, z, o, z, o, z, o, ]
###ds = [z] * 7
##ds = [o] * 7
##while ds:
## R = ds.pop()(R)
## print stringy(R)
h = (CONS, (CONS, O, l), I)
j = (CONS, (CONS, O, l), (KSTAR, l))
j = (CONS, (CONS, l, O), (CONS, O, l))
k = (CONS, (CONS, l, l), (CONS, O, l))
d = (CONS, I, (CONS, O, (CONS, O, l)))
##nully(z(o(o(d))))
if __name__ == '__main__':
REs = set()
N = 5
names = list(product(*(N * [(0, 1)])))
dervs = list(product(*(N * [(o, z)])))
for name, ds in zip(names, dervs):
R = it # (OR, j, k) # (CONS, O, (CONS, l, (CONS, f, l)))
print
print ''.join(str(n) for n in reversed(name))
print stringy(R)
ds = list(ds)
while ds:
R = ds.pop()(R)
if R == phi:
print ' X'
break
if R == I:
break
REs.add(R)
print (' ', '+')[nully(R) == y], stringy(R)
print nully(R) == y
print
print '=' * 80
print o.hits, '/', o.calls
print z.hits, '/', z.calls
for s in sorted(
(stringy(r) for r in REs),
key=lambda n: (len(n), n)):
print s
'''
...
(.111.) & ((.01 | 11*)')
...
(.01 )'
(.01 | 1 )'
(.01 | ^ )'
(.01 | 1*)'
(.111.) & ((.01 | 1 )')
(.111. | 11.) & ((.01 | ^ )')
(.111. | 11.) & ((.01 | 1*)')
(.111. | 11. | 1.) & ((.01 )')
(.111. | 11. | 1.) & ((.01 | 1*)')
'''
##for name, (d0, d1, d2, d3) in zip(names, dervs):
## print name, nully(d3(d2(d1(d0(it)))))
##def check(s, re):
## print
## for ch in s:
## der = D_compaction(ch)
## re = der(re)
## print '...', len(str(re)), nully(re) == y
## return nully(re) == y
##for s in (
## '11011100',
## '110111',
## '11011',
## '11101',
## '11111',
## '110110110110110110110110111'
## ):
## print s, check(s, it)
##der = D('1')
##for t in (phi, y, l, O, g, I, f, e, d, c, b, a, it):
### der(t)
## print
## print t
## print
### print nully(t)
## print
## print der(t)
## print
## print '=' * 40
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment