Last active
September 20, 2021 06:07
-
-
Save aaronkampmeier/e3791f79ba2436e41fb1047043bf954b to your computer and use it in GitHub Desktop.
Non-deterministic Finite Automaton for Automatic Lexical Analysis of a DSL
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// Created by Aaron Kampmeier on 3/2/21. | |
// Email: aaron.kampmeier@gmail.com | |
// Copyright © 2021 Aaron Kampmeier. All rights reserved. | |
// | |
// | |
#include "NFA.h" | |
#include <memory> | |
/// State | |
const std::set<std::weak_ptr<NFA::State>, NFA::WeakPtrCompare<NFA::State>> &NFA::State::statesFromTransitionOn(char inputChar) { | |
// Follow this transition to a set of states that it can go to | |
return this->transitionMap[inputChar]; | |
} | |
/// NFA | |
NFA::NFA(char match) { | |
// Verify that the character to match isn't epsilon | |
if (match == EPSILON) { | |
throw AttemptToMatchEpsilon(); | |
} | |
// To create an NFA that just accepts one character, only two states will be made q0 and q1 with F=q1 and a transition | |
// on the match character from q0 to q1 | |
std::shared_ptr<State> q0 = std::make_shared<State>(); | |
std::shared_ptr<State> q1 = std::make_shared<State>(); | |
// Save these into Q | |
this->states.insert(q0); | |
this->states.insert(q1); | |
// Create a transition between them | |
q0->transitionMap[match].emplace(q1); | |
// Set the initial state | |
this->initialState = q0; | |
// Set final state | |
this->finalState = q1; | |
computeEpsilonClosures(); | |
} | |
NFA::NFA(bool rejectEverything) { | |
// Both situations only require one state | |
std::shared_ptr<State> q0 = std::make_shared<State>(); | |
this->states.insert(q0); | |
this->initialState = q0; | |
if (rejectEverything) { | |
// There is no final state | |
} else { | |
// Just accept the initial state which means no transitions can be followed | |
this->finalState = q0; | |
} | |
computeEpsilonClosures(); | |
} | |
NFA::NFA(const NFA &nfa) { | |
overwriteWith(nfa); | |
} | |
NFA::NFA(NFA &&other) noexcept : states(std::move(other.states)), initialState(std::move(other.initialState)), | |
finalState(std::move(other.finalState)), epsilonClosures(std::move(other.epsilonClosures)) { | |
other.states.clear(); | |
} | |
NFA &NFA::operator=(const NFA &other) { | |
if (this == &other) { | |
return *this; | |
} | |
overwriteWith(other); | |
return *this; | |
} | |
NFA &NFA::operator=(NFA &&other) noexcept { | |
this->states = std::move(other.states); | |
other.states.clear(); | |
this->initialState = std::move(other.initialState); | |
finalState = std::move(other.finalState); | |
epsilonClosures = std::move(other.epsilonClosures); | |
return *this; | |
} | |
void NFA::overwriteWith(const NFA &other) { | |
// To copy an NFA the most important thing is to make a mapping from all of the old state pointers to the new ones | |
std::map<std::shared_ptr<State>, std::shared_ptr<State>> mapFromOriginal; | |
for (const auto& oldQ: other.states) { | |
// For each old state, create a new state and store a mapping from the old state's pointer to this one | |
std::shared_ptr<State> newQ = std::make_shared<State>(*oldQ); | |
this->states.insert(newQ); | |
mapFromOriginal[oldQ] = newQ; | |
} | |
// Now we have all new states and a mapping built up | |
// But all of the state's transition maps are pointing to the old ones | |
for (auto &newQ : this->states) { | |
// Go through transition map and move to new pointers | |
for (auto &oldMappingPair : newQ->transitionMap) { | |
std::set<std::weak_ptr<State>, WeakPtrCompare<State>> stateSet = oldMappingPair.second; | |
oldMappingPair.second.clear(); | |
// Transform all of the old | |
std::transform(stateSet.begin(), stateSet.end(), std::inserter(oldMappingPair.second, oldMappingPair.second.begin()), | |
[mapFromOriginal](const std::weak_ptr<State> &oldStatePtr) { | |
try { | |
return mapFromOriginal.at(oldStatePtr.lock()); | |
} catch(std::out_of_range &oor) { | |
throw NFACopyMisalignment(); | |
} | |
}); | |
} | |
} | |
// Now move initial and final state pointers over | |
this->initialState = mapFromOriginal.at(other.initialState.lock()); | |
this->finalState = mapFromOriginal.at(other.finalState.lock()); | |
computeEpsilonClosures(); | |
} | |
NFA NFA::nfaUnion(const NFA &first, const NFA &second) { | |
NFA firstCopy = first; | |
NFA secondCopy = second; | |
// Now just invoke the other union operation | |
return NFA::nfaUnion(std::move(firstCopy), std::move(secondCopy)); | |
} | |
NFA NFA::nfaUnion(NFA &&first, NFA &&second) { | |
// To union two NFAs we simply add a new start and final state and make epsilon transitions from the start state to | |
// the start states of the component NFAs and from the end states of the components to the final end state. | |
NFA newNfa; | |
// Move resources over | |
newNfa.states = std::move(first.states); | |
newNfa.states.insert(second.states.begin(), second.states.end()); | |
first.states.clear(); | |
second.states.clear(); | |
// New start and end states | |
const std::shared_ptr<State> &qi = *(newNfa.states.insert(std::make_shared<State>()).first); | |
const std::shared_ptr<State> &qf = *(newNfa.states.insert(std::make_shared<State>()).first); | |
// Set initial and final states | |
newNfa.initialState = qi; | |
newNfa.finalState = qf; | |
// Make epsilon transitions from qi to first and second start states | |
qi->transitionMap[EPSILON].insert(first.initialState); | |
qi->transitionMap[EPSILON].insert(second.initialState); | |
// Make epsilon transitions to qf from first and second final states | |
first.finalState.lock()->transitionMap[EPSILON].insert(qf); | |
second.finalState.lock()->transitionMap[EPSILON].insert(qf); | |
newNfa.computeEpsilonClosures(); | |
return newNfa; | |
} | |
NFA NFA::nfaConcatenation(const NFA &first, const NFA &second) { | |
NFA firstCopy = first, secondCopy = second; | |
return nfaConcatenation(std::move(firstCopy), std::move(secondCopy)); | |
} | |
NFA NFA::nfaConcatenation(NFA &&first, NFA &&second) { | |
// To concatenate, we make an epsilon transition from the first final state to the second start state | |
NFA newNfa; | |
// Move resources over | |
newNfa.states = std::move(first.states); | |
newNfa.states.insert(second.states.begin(), second.states.end()); | |
first.states.clear(); | |
second.states.clear(); | |
// Set the true initial state | |
newNfa.initialState = first.initialState; | |
newNfa.finalState = second.finalState; | |
// Now make the epsilon transition | |
first.finalState.lock()->transitionMap[EPSILON].insert(second.initialState); | |
newNfa.computeEpsilonClosures(); | |
return newNfa; | |
} | |
NFA NFA::nfaStar(const NFA &nfa) { | |
NFA nfaCopy = nfa; | |
return nfaStar(std::move(nfaCopy)); | |
} | |
NFA NFA::nfaStar(NFA &&nfa) { | |
// To take the star of an nfa there just needs to be a new epsilon transition from the start state to the final state | |
// and back | |
NFA newNfa(std::move(nfa)); | |
newNfa.initialState.lock()->transitionMap[EPSILON].insert(newNfa.finalState); | |
newNfa.finalState.lock()->transitionMap[EPSILON].insert(newNfa.initialState); | |
newNfa.computeEpsilonClosures(); | |
return newNfa; | |
} | |
bool NFA::compute(const std::string &input) const { | |
// Perform a computation by consuming each char of the input string | |
Computation computation(*this); | |
for (char nextChar : input) { | |
computation.consume(nextChar); | |
// If it is trapped then just exit now | |
if (computation.isTrapped()) { | |
return computation.isAccepting(); | |
} | |
} | |
return computation.isAccepting(); | |
} | |
void NFA::computeEpsilonClosures() { | |
epsilonClosures.clear(); | |
// To compute these we need to go through every state and follow the epsilon transitions to see where we get | |
for (const auto& q : this->states) { | |
std::set<std::shared_ptr<State>> touchedStates; | |
epsilonClosures[q].insert(q); // Following 0 epsilon transitions | |
auto reachables = probeEpsilonNetwork(q, touchedStates); | |
epsilonClosures[q].insert(reachables.begin(), reachables.end()); | |
} | |
#if DEBUG | |
this->printDebugInfo(); | |
// DEBUG: Verify that this does not change anything | |
auto savedEpsilonClosures = this->epsilonClosures; | |
for (const auto& q : this->states) { | |
std::set<std::shared_ptr<State>> touchedStates; | |
epsilonClosures[q].insert(q); // Following 0 epsilon transitions | |
auto reachables = probeEpsilonNetwork(q, touchedStates); | |
epsilonClosures[q].insert(reachables.begin(), reachables.end()); | |
} | |
if (savedEpsilonClosures != this->epsilonClosures) { | |
throw 5; | |
} | |
#endif | |
} | |
std::set<std::weak_ptr<NFA::State>, NFA::WeakPtrCompare<NFA::State>> | |
NFA::probeEpsilonNetwork(const std::shared_ptr<NFA::State> &state, std::set<std::shared_ptr<State>> &touchedStates) { | |
touchedStates.insert(state); // Track this state being reached | |
// Probe the epsilon network from this node | |
std::set<std::weak_ptr<NFA::State>, NFA::WeakPtrCompare<NFA::State>> reachableStates; | |
// Add in the EPSILON children | |
reachableStates.insert(state->statesFromTransitionOn(EPSILON).begin(), state->statesFromTransitionOn(EPSILON).end()); | |
for (const auto &childStatePtr: state->statesFromTransitionOn(EPSILON)) { | |
// If it hasn't been touched, then go follow its epsilon transitions | |
if (touchedStates.find(childStatePtr.lock()) == std::end(touchedStates)) { | |
// Hasn't been probed yet | |
auto childReachable = probeEpsilonNetwork(childStatePtr.lock(), touchedStates); | |
reachableStates.insert(childReachable.begin(),childReachable.end()); | |
} | |
} | |
return reachableStates; | |
} | |
const std::set<std::weak_ptr<NFA::State>, NFA::WeakPtrCompare<NFA::State>> &NFA::getEpsilonClosure( | |
const std::weak_ptr<State> &state) const { | |
auto itr = this->epsilonClosures.find(state); | |
if (itr != std::end(epsilonClosures)) { | |
return itr->second; | |
} else { | |
// Error | |
throw EpsilonClosureNotComputed(); | |
} | |
} | |
#if DEBUG | |
void NFA::printDebugInfo() { | |
// Print out the NFA | |
// Print out all states | |
std::cout << "NFA Debug Output: \n"; | |
std::cout << "All States (" << this->states.size() << "): " << std::endl; | |
for (const auto &q: this->states) { | |
std::cout << q.get() << ", Use Count: " << q.use_count() << "\n"; | |
} | |
std::cout << std::endl; | |
// Print out all the states transition functions | |
std::cout << "Transition Functions: " << std::endl; | |
for (const auto &q: this->states) { | |
std::cout << q.get() << ": \n"; | |
for (const std::pair<char, std::set<std::weak_ptr<State>, WeakPtrCompare<State>>> &transition: q->transitionMap) { | |
std::cout << "\t" << transition.first << "-> \n"; | |
for (const auto &stateSet: transition.second) { | |
std::cout << "\t\t" << stateSet.lock().get() << "\n"; | |
} | |
std::cout << std::endl; | |
} | |
std::cout << "\n"; | |
} | |
std::cout << std::endl; | |
// Start and final state | |
std::cout << "\nStart State: " << this->initialState.lock().get(); | |
std::cout << "\nFinal State: " << this->finalState.lock().get() << "\n\n" << std::endl; | |
// if (this->finalState.lock().get() == nullptr) { | |
// std::cout << "hmmm" << std::endl; | |
// } | |
} | |
#endif | |
/// COMPUTATION | |
void NFA::Computation::consume(char nextChar) { | |
if (nextChar == EPSILON) { | |
throw AttemptToMatchEpsilon(); | |
} | |
// For every current possible state we look at where they can go with this transition | |
std::set<std::weak_ptr<State>, WeakPtrCompare<State>> nextStates; | |
for (const auto& weakQ : this->possibleStates) { | |
std::shared_ptr<State> q = weakQ.lock(); | |
// Take all of the states that can be reached following this transition and add it to nextStates | |
nextStates.insert(q->statesFromTransitionOn(nextChar).begin(), q->statesFromTransitionOn(nextChar).end()); | |
} | |
// Clear out the possible states now | |
this->possibleStates.clear(); | |
// Now add in the epsilon closures of each state and set them all into possibleStates | |
for (const auto& weakQ : nextStates) { | |
const auto closure = nfa.getEpsilonClosure(weakQ); | |
possibleStates.insert(closure.begin(), closure.end()); | |
} | |
} | |
bool NFA::Computation::isAccepting() const { | |
// A computation should be accepting if the final state is a member of the current possibleStates | |
return possibleStates.find(nfa.finalState) != possibleStates.end(); | |
} | |
bool NFA::Computation::isTrapped() const { | |
return possibleStates.empty(); | |
} | |
const std::string &NFA::Computation::getConsumedString() { | |
return inputString; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// Created by Aaron Kampmeier on 3/2/21. | |
// Email: aaron.kampmeier@gmail.com | |
// Copyright © 2021 Aaron Kampmeier. All rights reserved. | |
// | |
// | |
#ifndef PROJECT_2_NFA_H | |
#define PROJECT_2_NFA_H | |
#include <string> | |
#include <set> | |
#include <map> | |
#include <unordered_map> | |
#include <memory> | |
#include <algorithm> | |
#include <stdexcept> | |
#if DEBUG | |
#include <iostream> | |
#endif | |
#define EPSILON '_' | |
/** | |
* Represents a non-deterministic finite state automaton (NFA) using a simple graph. There are also the regular operations | |
* defined on this class which are Union, Concatenation, and Kleene Star. These operations are all closed under the | |
* regular languages. | |
* The formal definition of an NFA is a 5-tuple: (Q, Σ, δ, q₀, F) where Q is a set of states, Σ is the alphabet to use (a set of chars), | |
* δ: Q×(Σ∪{ε}) -> P(Q), q₀ ∈ Q is the start state, and F ⊆ Q is a set of final (accept) states. | |
* We apply some constraints to this implementation. The char "_" (underscore) can not be a member of Σ. The empty string (ε) | |
* will be represented by the underscore character ("_"). The set F must also have a cardinality of 1 (meaning only one final | |
* state is allowed). This does not actually limit this implementation or the NFAs it can represent at all. | |
*/ | |
class NFA { | |
friend class Computation; | |
private: | |
/** | |
* Performs a meaningless but consistent comparison of weak_ptrs to allow them to be stored in sets and maps. | |
* This was inspired by the discussion here: https://stackoverflow.com/questions/32668742/a-set-of-weak-ptr | |
* @tparam T | |
*/ | |
template<class T> | |
struct WeakPtrCompare { | |
bool operator() (const std::weak_ptr<T> &lhs, const std::weak_ptr<T> &rhs) const { | |
auto lptr = lhs.lock(), rptr = rhs.lock(); | |
if (!rptr) return false; | |
if (!lptr) return true; | |
return lptr < rptr; | |
} | |
// bool operator() (const std::weak_ptr<T> *lhs, const std::weak_ptr<T> *rhs) { | |
// auto lptr = lhs->lock(), rptr = rhs->lock(); | |
// if (!rptr) return false; | |
// if (!lptr) return true; | |
// return lptr < rptr; | |
// } | |
}; | |
/** | |
* Represents a single state in the NFA. Each state, by the formal definition, can have a label/id and transitions based on | |
* reading 0 or 1 input characters in Σ to 1 or more states. | |
*/ | |
class State { | |
friend class NFA; | |
private: | |
/** | |
* An optional label for this state. Usually just an id or num. | |
*/ | |
std::string label; | |
/** | |
* A property that maps an input character to a set of other states. Recall that the empty string epsilon is represented | |
* by an underscore. Valid values for the keys in this map are then elements in the set Σ∪{_}. | |
*/ | |
std::unordered_map<char, std::set<std::weak_ptr<State>, WeakPtrCompare<State>>> transitionMap; | |
public: | |
/** | |
* Constructor for a new State. | |
* @param label | |
*/ | |
explicit State(std::string label) : label(std::move(label)) {}; | |
State() = default; | |
/** | |
* Copy constructor | |
* @param state | |
*/ | |
State(const State &state) : label(state.label), transitionMap(state.transitionMap) {}; | |
/** | |
* Move constructor | |
* @param other | |
*/ | |
State(State &&other) noexcept : label(std::move(other.label)), transitionMap(std::move(other.transitionMap)) { | |
other.label.clear(); | |
other.transitionMap.clear(); | |
}; | |
/** | |
* Returns all the states from following one transition on the inputChar from this state. Does not follow epsilon | |
* transitions. | |
* @param inputChar | |
* @return | |
*/ | |
const std::set<std::weak_ptr<State>, WeakPtrCompare<State>> &statesFromTransitionOn(char inputChar); | |
}; | |
public: | |
/** | |
* Represents a computation performed on an NFA using some input string. It has some state such as the current read | |
* in input string and the set of possible states the NFA could be in after reading in that input string. | |
* It has a single operation defined on it which is to consume another input character. | |
*/ | |
class Computation { | |
private: | |
/** | |
* The NFA to perform the computation on. | |
*/ | |
const NFA &nfa; | |
/** | |
* An ordered list of the previously consumed input characters. | |
*/ | |
std::string inputString; | |
/** | |
* The possible states the NFA could be in after consuming the inputString. | |
*/ | |
std::set<std::weak_ptr<NFA::State>, WeakPtrCompare<State>> possibleStates; | |
public: | |
/** | |
* Starts a new computation on an NFA. possibleStates will start as the epsilon-closure of the start state (E(q_0)). | |
* @param nfa | |
*/ | |
explicit Computation(const NFA &nfa) : nfa(nfa), inputString(), possibleStates(nfa.epsilonClosures.at(nfa.initialState)) {}; | |
/** | |
* Consumes the nextChar in the input string and updates the possible states that can be reached. | |
* @param nextChar The next char to consume. nextChar should be a member of Σ for the NFA. | |
*/ | |
void consume(char nextChar); | |
/** | |
* Returns whether the computation is currently accepting or not. This property may change if more input chars | |
* are consumed. | |
* @return | |
*/ | |
bool isAccepting() const; | |
/** | |
* Checks if the computation is currently "trapped". This refers to the possibility of a DFA being in a trap state | |
* where there is no way to leave. This computation is "trapped" if the set possibleStates is currently empty. | |
* @return | |
*/ | |
bool isTrapped() const; | |
/** | |
* Returns the currently consumed string for this computation. | |
* @return | |
*/ | |
const std::string &getConsumedString(); | |
}; | |
private: | |
/** | |
* Q. Holds pointers to all states in this NFA to keep them from going out of scope. | |
*/ | |
std::set<std::shared_ptr<State>> states; | |
/** | |
* q₀. The start state of this NFA. | |
*/ | |
std::weak_ptr<State> initialState; | |
/** | |
* F. The final state for this NFA. | |
*/ | |
std::weak_ptr<State> finalState; | |
/** | |
* The epsilon closure is a function E: Q -> P(Q) where E(q) for some state q returns the set of states that can be | |
* reached from this state by following 0 or more epsilon transitions. | |
* | |
* This is computed when the NFA is created so that it doesn't have to be computed every time a computation is performed | |
* on the NFA. | |
*/ | |
std::map<std::weak_ptr<State>, std::set<std::weak_ptr<State>, WeakPtrCompare<State>>, WeakPtrCompare<State>> epsilonClosures; | |
/** | |
* Computes the epsilon closures for every state in this NFA and stores it in epsilonClosures for future use by computations. | |
* This should be done any time the NFA changes. | |
*/ | |
void computeEpsilonClosures(); | |
/** | |
* Probes a state's epsilon transitions recursively to return a set of all the states reachable from that state via | |
* 1 or more epsilon transitions. | |
* @param state | |
* @param touchedStates | |
* @return | |
*/ | |
std::set<std::weak_ptr<NFA::State>, NFA::WeakPtrCompare<NFA::State>> | |
probeEpsilonNetwork(const std::shared_ptr<NFA::State> &state, std::set<std::shared_ptr<State>> &touchedStates); | |
/** | |
* Overwrites all of this NFA with a different nfa. Used by the copy constructor and copy assignment operator. | |
* @param other | |
*/ | |
void overwriteWith(const NFA &other); | |
public: | |
/** | |
* Empty constructor | |
*/ | |
NFA() : NFA(true) {}; | |
/** | |
* Crates a very simple NFA that accepts a single char string. | |
* @param match The character that this NFA will match. | |
*/ | |
explicit NFA(char match); | |
/** | |
* Creates an NFA that either accepts just the empty string or accepts nothing. | |
* @param rejectEverything If this is true, then the NFA created does not accept any string. If it is false, then the NFA | |
* only accepts the empty string epsilon. | |
*/ | |
explicit NFA(bool rejectEverything); | |
/** | |
* Move and Copy constructors | |
* @param nfa | |
*/ | |
NFA(const NFA &nfa); | |
NFA(NFA &&other) noexcept; | |
/** | |
* Assignment operators | |
* @param other | |
* @return | |
*/ | |
NFA &operator=(const NFA &other); | |
NFA &operator=(NFA &&other) noexcept; | |
/** | |
* Unions two NFAs to create a new one whose language is the union of the two languages of the component NFAs. | |
* @param first | |
* @param second | |
* @return | |
*/ | |
static NFA nfaUnion(const NFA &first, const NFA &second); | |
static NFA nfaUnion(NFA &&first, NFA &&second); | |
/** | |
* Concatenates two NFAs to create a new NFA whose language is the concatenation of the two languages of the component NFAs. | |
* @param first | |
* @param second | |
* @return | |
*/ | |
static NFA nfaConcatenation(const NFA &first, const NFA &second); | |
static NFA nfaConcatenation(NFA &&first, NFA &&second); | |
/** | |
* Creates a new NFA whose language is the kleene star of the original language. | |
* @param nfa | |
* @return | |
*/ | |
static NFA nfaStar(const NFA &nfa); | |
static NFA nfaStar(NFA &&nfa); | |
/** | |
* Returns the precomputed epsilon closure for a given state | |
* @param state | |
* @return | |
*/ | |
const std::set<std::weak_ptr<State>, WeakPtrCompare<State>> &getEpsilonClosure(const std::weak_ptr<State> &state) const; | |
/** | |
* Performs a computation on this NFA using the input string provided. A computation will be accepting if the NFA ends | |
* up in a final state and not accepting if it does not. | |
* This is just a simpler version than having to manage the computation yourself using the NFA::Computation class. | |
* @param input The input string. | |
* @return If the computation was accepting. | |
*/ | |
bool compute(const std::string &input) const; | |
#if DEBUG | |
/** | |
* Prints out the NFA with debug info. Not necessarily in a nice format. | |
*/ | |
void printDebugInfo(); | |
#endif | |
/// Exceptions | |
class AttemptToMatchEpsilon: public std::exception { | |
const char *what() const noexcept override { | |
return "Attempting to make an NFA match the empty string epsilon"; | |
} | |
}; | |
class EpsilonClosureNotComputed: public std::exception { | |
const char *what() const noexcept override { | |
return "There is no epsilon closure precomputed for a state."; | |
} | |
}; | |
class NFACopyMisalignment: public std::exception { | |
const char *what() const noexcept override { | |
return "There was an issue copying the NFA."; | |
} | |
}; | |
}; | |
template<class T> | |
bool operator==(const std::weak_ptr<T> &lhs, const std::weak_ptr<T> &rhs) { | |
return lhs.lock() == rhs.lock(); | |
} | |
#endif //PROJECT_2_NFA_H |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is a custom NFA implementation I built for an automated lexer. The lexer took in a description of a language, formed an NFA with it, and then analyzed various input strings to see if they were in the language or not.