linusyang/dfa.py

## dfa.py
#!/usr/bin/env python
#
# By Linus Yang <laokongzi@gmail.com>
# Written on iPad using Textastic Code Editor
# Licensed under GPL v3
#

'''
Function:
    Transform an NFA to DFA then simplify it

Limitation:
    - input file should be named as 'nfa_*.txt'
    - only one start node named x and end node named y in NFA
    - only one literal on the arrow is allowed in NFA
    - use 'e' to represent epsilon

Input Sample: (nfa_0.txt)
    x 5 e
    5 5 a
    5 5 b
    5 1 e
    1 3 a
    1 4 b
    3 2 a
    4 2 b
    2 6 e
    6 6 a
    6 6 b
    6 y e

Output Sample: (dfa_0.txt)
    {X, 1, 5} {1, 3, 5} {1, 4, 5}
    {1, 3, 5} {1, 2, 3, 5, 6, Y} {1, 4, 5}
    {1, 4, 5} {1, 3, 5} {1, 2, 4, 5, 6, Y}
    {1, 2, 3, 5, 6, Y} {1, 2, 3, 5, 6, Y} {1, 4, 5, 6, Y}
    {1, 2, 4, 5, 6, Y} {1, 3, 5, 6, Y} {1, 2, 4, 5, 6, Y}
    {1, 4, 5, 6, Y} {1, 3, 5, 6, Y} {1, 2, 4, 5, 6, Y}
    {1, 3, 5, 6, Y} {1, 2, 3, 5, 6, Y} {1, 4, 5, 6, Y}

    s a b
    0 1 2
    1 3 2
    2 1 4
    3* 3 5
    4* 6 4
    5* 6 4
    6* 3 5

    {{0}, {1}, {2}, {3, 4, 5, 6}}

    s a b
    0 1 2
    1 3 2
    2 1 3
    3* 3 3
'''

EPS = 'e'
START = 'x'
END = 'y'
fin = None
fout = None

def write(s):
    if fout:
        fout.write(s)
    else:
        print(s),

def str_set(s):
    x = list(s)
    if x:
        y = []
        res = '{'
        c = []
        for i in x:
            if i.isdigit():
                y.append(int(i))
            else:
                c.append(i)
        y.sort()
        if START in c:
            res += START.upper() + ', '
        for i in y:
            res += '%d' % i + ', '
        if END in c:
            res += END.upper() + '}'
        else:
            res = res[:-2] + '}'
        return res
    return '{}'

def eps_closure(nfa, node_set):
    if node_set == set([]):
        return node_set
    res = node_set.copy()
    for node in node_set:
        next_list = nfa.get(node)
        if next_list:
            for next in next_list:
                if next[1] == EPS:
                    res.add(next[0])
                    if next[0] != node:
                        res |= eps_closure(nfa, set([next[0]]))
    return res

def next_set(nfa, now_set, c):
    res = set([])
    for node in now_set:
        next_list = nfa.get(node)
        if next_list:
            for next in next_list:
                if next[1] == c:
                    res.add(next[0])
    return res

def main():
    nfa = {}
    lit = set([])
    for s in fin:
        e = s.lower().split()
        if nfa.get(e[0]):
            nfa[e[0]].append((e[1], e[2]))
        else:
            nfa[e[0]] = [(e[1], e[2])]
        lit.add(e[2])
    lit.remove(EPS)
    liter = list(lit)
    liter.sort()
    q = [eps_closure(nfa, set([START]))]
    status = [q[0]]
    dfa_str = ''
    dfa = {}
    end_node = []
    mid_node = []
    while q:
        now = q.pop(0)
        i = status.index(now)
        now_index = '%d' % i
        end_str = ''
        if END in now:
            end_str = '*'
            end_node.append(i)
        else:
            mid_node.append(i)
        write(str_set(now) + ' ')
        dfa_str += now_index + end_str + ' '
        next_dict = {}
        for c in liter:
            next = eps_closure(nfa, next_set(nfa, now, c))
            if not next in status and next:
                q.append(next)
                status.append(next)
            j = status.index(next) if next else -1
            next_index = '%d' % j
            write(str_set(next) + ' ')
            dfa_str += next_index + ' '
            next_dict[c] = j
        write('\n')
        dfa_str += '\n'
        dfa[i] = next_dict
    write('\ns %s\n%s\n' % (' '.join(liter), dfa_str))
    q = [[end_node, True], [mid_node, True]]
    fresh = True
    while fresh:
        now = q[0]
        for c in liter:
            next = {}
            for i in now[0]:
                if dfa[i][c] == -1:
                    if next.get(-1):
                        next[-1].append(i)
                    else:
                        next[-1] = [i]
                else:
                    j = 0
                    for x in q:
                        if dfa[i][c] in x[0]:
                            if next.get(j):
                                next[j].append(i)
                            else:
                                next[j] = [i]
                        j += 1
            splited = True
            now_split = next.values()
            if now[0] in now_split:
                splited = False
            else:
                for x in now_split:
                    q.append([x, True])
                break
        q.pop(0)
        if not splited:
            q.append([now[0], False])
        fresh = False
        for x in q:
            if x[1] == True:
                fresh = True
                break
    split = [x for x, y in q]
    split.sort()
    write(str(split).replace('[', '{').replace(']', '}') + '\n')
    for x in split:
        if len(x) > 1:
            rep = x[0]
            for i in xrange(1, len(x)):
                for j in dfa:
                    for c in liter:
                        if dfa[j][c] == x[i]:
                            dfa[j][c] = rep
                del dfa[x[i]]
    write('\ns %s\n' % (' '.join(liter)))
    for i in dfa:
        write('%d%s ' % (i, '*' if i in end_node else ''))
        for c in liter:
            write('%d ' % dfa[i][c])
        write('\n')
    fin.close()
    fout.close()

if __name__ == '__main__':
    import os
    now_dir = os.path.dirname(os.path.realpath(__file__))
    files = [x for x in os.listdir(now_dir) if os.path.isfile(x) and x.endswith('txt') and x.startswith('nfa_')]
    for x in files:
        fin = open(x, 'r')
        fout = open(x.replace('nfa_', 'dfa_'), 'w')
        main()
	#!/usr/bin/env python
	#
	# By Linus Yang <laokongzi@gmail.com>
	# Written on iPad using Textastic Code Editor
	# Licensed under GPL v3
	#

	'''
	Function:
	Transform an NFA to DFA then simplify it

	Limitation:
	- input file should be named as 'nfa_*.txt'
	- only one start node named x and end node named y in NFA
	- only one literal on the arrow is allowed in NFA
	- use 'e' to represent epsilon

	Input Sample: (nfa_0.txt)
	x 5 e
	5 5 a
	5 5 b
	5 1 e
	1 3 a
	1 4 b
	3 2 a
	4 2 b
	2 6 e
	6 6 a
	6 6 b
	6 y e

	Output Sample: (dfa_0.txt)
	{X, 1, 5} {1, 3, 5} {1, 4, 5}
	{1, 3, 5} {1, 2, 3, 5, 6, Y} {1, 4, 5}
	{1, 4, 5} {1, 3, 5} {1, 2, 4, 5, 6, Y}
	{1, 2, 3, 5, 6, Y} {1, 2, 3, 5, 6, Y} {1, 4, 5, 6, Y}
	{1, 2, 4, 5, 6, Y} {1, 3, 5, 6, Y} {1, 2, 4, 5, 6, Y}
	{1, 4, 5, 6, Y} {1, 3, 5, 6, Y} {1, 2, 4, 5, 6, Y}
	{1, 3, 5, 6, Y} {1, 2, 3, 5, 6, Y} {1, 4, 5, 6, Y}

	s a b
	0 1 2
	1 3 2
	2 1 4
	3* 3 5
	4* 6 4
	5* 6 4
	6* 3 5

	{{0}, {1}, {2}, {3, 4, 5, 6}}

	s a b
	0 1 2
	1 3 2
	2 1 3
	3* 3 3
	'''

	EPS = 'e'
	START = 'x'
	END = 'y'
	fin = None
	fout = None

	def write(s):
	if fout:
	fout.write(s)
	else:
	print(s),

	def str_set(s):
	x = list(s)
	if x:
	y = []
	res = '{'
	c = []
	for i in x:
	if i.isdigit():
	y.append(int(i))
	else:
	c.append(i)
	y.sort()
	if START in c:
	res += START.upper() + ', '
	for i in y:
	res += '%d' % i + ', '
	if END in c:
	res += END.upper() + '}'
	else:
	res = res[:-2] + '}'
	return res
	return '{}'

	def eps_closure(nfa, node_set):
	if node_set == set([]):
	return node_set
	res = node_set.copy()
	for node in node_set:
	next_list = nfa.get(node)
	if next_list:
	for next in next_list:
	if next[1] == EPS:
	res.add(next[0])
	if next[0] != node:
	res \|= eps_closure(nfa, set([next[0]]))
	return res

	def next_set(nfa, now_set, c):
	res = set([])
	for node in now_set:
	next_list = nfa.get(node)
	if next_list:
	for next in next_list:
	if next[1] == c:
	res.add(next[0])
	return res

	def main():
	nfa = {}
	lit = set([])
	for s in fin:
	e = s.lower().split()
	if nfa.get(e[0]):
	nfa[e[0]].append((e[1], e[2]))
	else:
	nfa[e[0]] = [(e[1], e[2])]
	lit.add(e[2])
	lit.remove(EPS)
	liter = list(lit)
	liter.sort()
	q = [eps_closure(nfa, set([START]))]
	status = [q[0]]
	dfa_str = ''
	dfa = {}
	end_node = []
	mid_node = []
	while q:
	now = q.pop(0)
	i = status.index(now)
	now_index = '%d' % i
	end_str = ''
	if END in now:
	end_str = '*'
	end_node.append(i)
	else:
	mid_node.append(i)
	write(str_set(now) + ' ')
	dfa_str += now_index + end_str + ' '
	next_dict = {}
	for c in liter:
	next = eps_closure(nfa, next_set(nfa, now, c))
	if not next in status and next:
	q.append(next)
	status.append(next)
	j = status.index(next) if next else -1
	next_index = '%d' % j
	write(str_set(next) + ' ')
	dfa_str += next_index + ' '
	next_dict[c] = j
	write('\n')
	dfa_str += '\n'
	dfa[i] = next_dict
	write('\ns %s\n%s\n' % (' '.join(liter), dfa_str))
	q = [[end_node, True], [mid_node, True]]
	fresh = True
	while fresh:
	now = q[0]
	for c in liter:
	next = {}
	for i in now[0]:
	if dfa[i][c] == -1:
	if next.get(-1):
	next[-1].append(i)
	else:
	next[-1] = [i]
	else:
	j = 0
	for x in q:
	if dfa[i][c] in x[0]:
	if next.get(j):
	next[j].append(i)
	else:
	next[j] = [i]
	j += 1
	splited = True
	now_split = next.values()
	if now[0] in now_split:
	splited = False
	else:
	for x in now_split:
	q.append([x, True])
	break
	q.pop(0)
	if not splited:
	q.append([now[0], False])
	fresh = False
	for x in q:
	if x[1] == True:
	fresh = True
	break
	split = [x for x, y in q]
	split.sort()
	write(str(split).replace('[', '{').replace(']', '}') + '\n')
	for x in split:
	if len(x) > 1:
	rep = x[0]
	for i in xrange(1, len(x)):
	for j in dfa:
	for c in liter:
	if dfa[j][c] == x[i]:
	dfa[j][c] = rep
	del dfa[x[i]]
	write('\ns %s\n' % (' '.join(liter)))
	for i in dfa:
	write('%d%s ' % (i, '*' if i in end_node else ''))
	for c in liter:
	write('%d ' % dfa[i][c])
	write('\n')
	fin.close()
	fout.close()

	if __name__ == '__main__':
	import os
	now_dir = os.path.dirname(os.path.realpath(__file__))
	files = [x for x in os.listdir(now_dir) if os.path.isfile(x) and x.endswith('txt') and x.startswith('nfa_')]
	for x in files:
	fin = open(x, 'r')
	fout = open(x.replace('nfa_', 'dfa_'), 'w')
	main()