Skip to content

Instantly share code, notes, and snippets.

@sunghwan2789
Last active May 21, 2022 05:35
Show Gist options
  • Save sunghwan2789/f4f8d339a02c9d710bbd3781e4ded194 to your computer and use it in GitHub Desktop.
Save sunghwan2789/f4f8d339a02c9d710bbd3781e4ded194 to your computer and use it in GitHub Desktop.
Converts NFA to DFA using Epsilon Closure
#include <vector>
#include <set>
#include <string>
#include <iostream>
#include <map>
#include <tuple>
#include <utility>
#include <cctype>
#include <queue>
#include <algorithm>
#include <iterator>
const std::string Epsilon = "varepsilon";
int main()
{
std::map<std::string, std::map<std::string, std::vector<std::string>>> nfa;
std::string startState;
std::vector<std::string> finishStates;
std::set<std::string> terminals;
std::string terminal;
std::string input, u, v;
//
// Read start state and finish state
//
std::cin >> startState;
std::cin >> input;
while (!input.empty()) {
// split finishStates with ','
int tokenPosition = input.find(',');
// tokenization finished
if (tokenPosition == -1) {
finishStates.push_back(input);
break;
}
// push it
finishStates.emplace_back(input.substr(0, tokenPosition + 1));
// and find next token
input = input.substr(tokenPosition + 1);
}
//
// Read lines
//
while (std::cin >> input) {
// type check
if (input[0] == '\\') {
terminal = input.substr(1);
if (terminal != Epsilon) {
terminals.emplace(terminal);
}
continue;
}
u = input;
std::cin >> v;
if (!nfa.count(u)) {
nfa[u] = std::map<std::string, std::vector<std::string>>();
}
if (!nfa[u].count(terminal)) {
nfa[u][terminal] = std::vector<std::string>();
}
nfa[u][terminal].emplace_back(v);
}
// for (auto &i : nfa) {
// std::cout << i.first << " : ";
// for (auto &j : i.second) {
// std::cout << "{" << j.first << "-> " << j.second << "} ";
// }
// std::cout << "\n";
// }
//
// Epsilon closure
//
auto epsilonClosure = [&](const std::set<std::string> & states) {
std::set<std::string> ret = states;
// find equivalent states by bfs
std::queue<std::string> q;
for (auto& i : states) {
q.emplace(i);
}
while (!q.empty()) {
auto u = q.front(); q.pop();
if (nfa.count(u) && nfa[u].count(Epsilon)) {
for (auto& v : nfa[u][Epsilon]) {
if (!ret.count(v)) {
ret.emplace(v);
q.emplace(v);
}
}
}
}
return ret;
};
auto uniqueState = [&]() {
static int sequenceNumber = 0;
std::string name = "q_";
char buffer[30];
sprintf(buffer, "%d", sequenceNumber++);
return name + buffer;
};
auto joinStates = [&](const std::set<std::string> & states) {
std::string ret = " left{";
std::vector<std::string> numericOrder(states.begin(), states.end());
std::sort(numericOrder.begin(), numericOrder.end(), [](auto & a, auto & b) {
if (a.size() != b.size()) {
return a.size() < b.size();
}
return a < b;
});
if (!numericOrder.empty()) {
ret += numericOrder[0];
}
for (int i = 1; i < numericOrder.size(); i++) {
ret += ",` " + numericOrder[i];
}
return ret + "right}";
};
std::queue<std::set<std::string>> Ds;
std::map<std::set<std::string>, std::string> alias;
auto joinDs = [&]() {
std::string ret = " left{";
std::vector<std::string> numericOrder;
for (auto copy = Ds; !copy.empty(); copy.pop()) {
numericOrder.emplace_back(alias[copy.front()]);
}
std::sort(numericOrder.begin(), numericOrder.end(), [](auto & a, auto & b) {
if (a.size() != b.size()) {
return a.size() < b.size();
}
return a < b;
});
if (!numericOrder.empty()) {
ret += " " + numericOrder[0];
}
for (int i = 1; i < numericOrder.size(); i++) {
ret += " ,`" + numericOrder[i];
}
return ret + " right}";
};
std::set<std::string> current = epsilonClosure({ startState });
alias[current] = alias[{ startState }] = uniqueState();
Ds.emplace(current);
std::cout << "& " << Epsilon << "-closure(" << startState << ")"
<< " = " << joinStates(current)
<< " = " << alias[current]
<< " & D_s = " << joinDs() << " #\n";
while (!Ds.empty()) {
auto parent = Ds.front(); Ds.pop();
std::string buffer = "";
for (auto& terminal : terminals) {
buffer += alias[parent] + " -> " + terminal + " `:`& ";
current = std::set<std::string>();
for (auto& u : parent) {
if (nfa.count(u) && nfa[u].count(terminal)) {
for (auto& v : nfa[u][terminal]) {
current.emplace(v);
}
}
}
if (current.empty()) {
buffer += "emptyset &#\n";
continue;
}
buffer += Epsilon + "-closure" + joinStates(current);
if (!alias.count(current)) {
auto result = epsilonClosure(current);
if (!alias.count(result)) {
alias[result] = uniqueState();
Ds.emplace(result);
}
alias[current] = alias[result];
current = result;
buffer += " = " + joinStates(current);
}
buffer += " = " + alias[current] + " & #\n";
}
buffer += " D_s = " + joinDs() + " #\n";
std::cout << buffer;
}
}

Concept

Regular Expression

$$b^\ast(a+\varepsilon)b^\ast(a+\varepsilon)b^\ast$$

NFA

NFA of b*(a+𝜀)b*(a+𝜀)b*

DFA

DFA of b*(a+𝜀)b*(a+𝜀)b*

Input Example Representing NFA

start_state finish_state_1,finish_state_2,...,finish_state_n
\terminal_1
parent_state_1 child_state_1
parent_state_2 child_state_2
parent_state_3 child_state_3
\terminal_2
parent_state_4 child_state_4

How To Use

Compile

g++ epsilon-closure.cc -o epsilon-closure

Run

You input NFA using STDIN, epsilon-closure outputs DFA using STDOUT.

./epsilon-closure <input.nfa
0 25
\varepsilon
0 1 2 1
0 3 2 3
3 4
4 5
4 7 7 8 8 9
6 9 8 9
9 10 10 11
11 12 13 12
11 14 13 14
14 15
15 16
15 18 18 19
17 20 19 20
20 21 21 22
22 23 24 23
24 25 22 25
\b
1 2 12 13 23 24
\a
5 6 16 17
& varepsilon-closure(0) = left{0,` 1,` 3,` 4,` 5,` 7,` 8,` 9,` 10,` 11,` 12,` 14,` 15,` 16,` 18,` 19,` 20,` 21,` 22,` 23,` 25right} = q_0 & D_s = left{ q_0 right} #
q_0 -> a `:`& varepsilon-closure left{6,` 17right} = left{6,` 9,` 10,` 11,` 12,` 14,` 15,` 16,` 17,` 18,` 19,` 20,` 21,` 22,` 23,` 25right} = q_1 & #
q_0 -> b `:`& varepsilon-closure left{2,` 13,` 24right} = left{1,` 2,` 3,` 4,` 5,` 7,` 8,` 9,` 10,` 11,` 12,` 13,` 14,` 15,` 16,` 18,` 19,` 20,` 21,` 22,` 23,` 24,` 25right} = q_2 & #
D_s = left{ q_1 ,`q_2 right} #
q_1 -> a `:`& varepsilon-closure left{17right} = left{17,` 20,` 21,` 22,` 23,` 25right} = q_3 & #
q_1 -> b `:`& varepsilon-closure left{13,` 24right} = left{12,` 13,` 14,` 15,` 16,` 18,` 19,` 20,` 21,` 22,` 23,` 24,` 25right} = q_4 & #
D_s = left{ q_2 ,`q_3 ,`q_4 right} #
q_2 -> a `:`& varepsilon-closure left{6,` 17right} = q_1 & #
q_2 -> b `:`& varepsilon-closure left{2,` 13,` 24right} = q_2 & #
D_s = left{ q_3 ,`q_4 right} #
q_3 -> a `:`& emptyset &#
q_3 -> b `:`& varepsilon-closure left{24right} = left{23,` 24,` 25right} = q_5 & #
D_s = left{ q_4 ,`q_5 right} #
q_4 -> a `:`& varepsilon-closure left{17right} = q_3 & #
q_4 -> b `:`& varepsilon-closure left{13,` 24right} = q_4 & #
D_s = left{ q_5 right} #
q_5 -> a `:`& emptyset &#
q_5 -> b `:`& varepsilon-closure left{24right} = q_5 & #
D_s = left{ right} #
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment