Skip to content

Instantly share code, notes, and snippets.

@aaronkampmeier
Last active September 20, 2021 06:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aaronkampmeier/e3791f79ba2436e41fb1047043bf954b to your computer and use it in GitHub Desktop.
Save aaronkampmeier/e3791f79ba2436e41fb1047043bf954b to your computer and use it in GitHub Desktop.
Non-deterministic Finite Automaton for Automatic Lexical Analysis of a DSL
//
// Created by Aaron Kampmeier on 3/2/21.
// Email: aaron.kampmeier@gmail.com
// Copyright © 2021 Aaron Kampmeier. All rights reserved.
//
//
#include "NFA.h"
#include <memory>
/// State
const std::set<std::weak_ptr<NFA::State>, NFA::WeakPtrCompare<NFA::State>> &NFA::State::statesFromTransitionOn(char inputChar) {
// Follow this transition to a set of states that it can go to
return this->transitionMap[inputChar];
}
/// NFA
NFA::NFA(char match) {
// Verify that the character to match isn't epsilon
if (match == EPSILON) {
throw AttemptToMatchEpsilon();
}
// To create an NFA that just accepts one character, only two states will be made q0 and q1 with F=q1 and a transition
// on the match character from q0 to q1
std::shared_ptr<State> q0 = std::make_shared<State>();
std::shared_ptr<State> q1 = std::make_shared<State>();
// Save these into Q
this->states.insert(q0);
this->states.insert(q1);
// Create a transition between them
q0->transitionMap[match].emplace(q1);
// Set the initial state
this->initialState = q0;
// Set final state
this->finalState = q1;
computeEpsilonClosures();
}
NFA::NFA(bool rejectEverything) {
// Both situations only require one state
std::shared_ptr<State> q0 = std::make_shared<State>();
this->states.insert(q0);
this->initialState = q0;
if (rejectEverything) {
// There is no final state
} else {
// Just accept the initial state which means no transitions can be followed
this->finalState = q0;
}
computeEpsilonClosures();
}
NFA::NFA(const NFA &nfa) {
overwriteWith(nfa);
}
NFA::NFA(NFA &&other) noexcept : states(std::move(other.states)), initialState(std::move(other.initialState)),
finalState(std::move(other.finalState)), epsilonClosures(std::move(other.epsilonClosures)) {
other.states.clear();
}
NFA &NFA::operator=(const NFA &other) {
if (this == &other) {
return *this;
}
overwriteWith(other);
return *this;
}
NFA &NFA::operator=(NFA &&other) noexcept {
this->states = std::move(other.states);
other.states.clear();
this->initialState = std::move(other.initialState);
finalState = std::move(other.finalState);
epsilonClosures = std::move(other.epsilonClosures);
return *this;
}
void NFA::overwriteWith(const NFA &other) {
// To copy an NFA the most important thing is to make a mapping from all of the old state pointers to the new ones
std::map<std::shared_ptr<State>, std::shared_ptr<State>> mapFromOriginal;
for (const auto& oldQ: other.states) {
// For each old state, create a new state and store a mapping from the old state's pointer to this one
std::shared_ptr<State> newQ = std::make_shared<State>(*oldQ);
this->states.insert(newQ);
mapFromOriginal[oldQ] = newQ;
}
// Now we have all new states and a mapping built up
// But all of the state's transition maps are pointing to the old ones
for (auto &newQ : this->states) {
// Go through transition map and move to new pointers
for (auto &oldMappingPair : newQ->transitionMap) {
std::set<std::weak_ptr<State>, WeakPtrCompare<State>> stateSet = oldMappingPair.second;
oldMappingPair.second.clear();
// Transform all of the old
std::transform(stateSet.begin(), stateSet.end(), std::inserter(oldMappingPair.second, oldMappingPair.second.begin()),
[mapFromOriginal](const std::weak_ptr<State> &oldStatePtr) {
try {
return mapFromOriginal.at(oldStatePtr.lock());
} catch(std::out_of_range &oor) {
throw NFACopyMisalignment();
}
});
}
}
// Now move initial and final state pointers over
this->initialState = mapFromOriginal.at(other.initialState.lock());
this->finalState = mapFromOriginal.at(other.finalState.lock());
computeEpsilonClosures();
}
NFA NFA::nfaUnion(const NFA &first, const NFA &second) {
NFA firstCopy = first;
NFA secondCopy = second;
// Now just invoke the other union operation
return NFA::nfaUnion(std::move(firstCopy), std::move(secondCopy));
}
NFA NFA::nfaUnion(NFA &&first, NFA &&second) {
// To union two NFAs we simply add a new start and final state and make epsilon transitions from the start state to
// the start states of the component NFAs and from the end states of the components to the final end state.
NFA newNfa;
// Move resources over
newNfa.states = std::move(first.states);
newNfa.states.insert(second.states.begin(), second.states.end());
first.states.clear();
second.states.clear();
// New start and end states
const std::shared_ptr<State> &qi = *(newNfa.states.insert(std::make_shared<State>()).first);
const std::shared_ptr<State> &qf = *(newNfa.states.insert(std::make_shared<State>()).first);
// Set initial and final states
newNfa.initialState = qi;
newNfa.finalState = qf;
// Make epsilon transitions from qi to first and second start states
qi->transitionMap[EPSILON].insert(first.initialState);
qi->transitionMap[EPSILON].insert(second.initialState);
// Make epsilon transitions to qf from first and second final states
first.finalState.lock()->transitionMap[EPSILON].insert(qf);
second.finalState.lock()->transitionMap[EPSILON].insert(qf);
newNfa.computeEpsilonClosures();
return newNfa;
}
NFA NFA::nfaConcatenation(const NFA &first, const NFA &second) {
NFA firstCopy = first, secondCopy = second;
return nfaConcatenation(std::move(firstCopy), std::move(secondCopy));
}
NFA NFA::nfaConcatenation(NFA &&first, NFA &&second) {
// To concatenate, we make an epsilon transition from the first final state to the second start state
NFA newNfa;
// Move resources over
newNfa.states = std::move(first.states);
newNfa.states.insert(second.states.begin(), second.states.end());
first.states.clear();
second.states.clear();
// Set the true initial state
newNfa.initialState = first.initialState;
newNfa.finalState = second.finalState;
// Now make the epsilon transition
first.finalState.lock()->transitionMap[EPSILON].insert(second.initialState);
newNfa.computeEpsilonClosures();
return newNfa;
}
NFA NFA::nfaStar(const NFA &nfa) {
NFA nfaCopy = nfa;
return nfaStar(std::move(nfaCopy));
}
NFA NFA::nfaStar(NFA &&nfa) {
// To take the star of an nfa there just needs to be a new epsilon transition from the start state to the final state
// and back
NFA newNfa(std::move(nfa));
newNfa.initialState.lock()->transitionMap[EPSILON].insert(newNfa.finalState);
newNfa.finalState.lock()->transitionMap[EPSILON].insert(newNfa.initialState);
newNfa.computeEpsilonClosures();
return newNfa;
}
bool NFA::compute(const std::string &input) const {
// Perform a computation by consuming each char of the input string
Computation computation(*this);
for (char nextChar : input) {
computation.consume(nextChar);
// If it is trapped then just exit now
if (computation.isTrapped()) {
return computation.isAccepting();
}
}
return computation.isAccepting();
}
void NFA::computeEpsilonClosures() {
epsilonClosures.clear();
// To compute these we need to go through every state and follow the epsilon transitions to see where we get
for (const auto& q : this->states) {
std::set<std::shared_ptr<State>> touchedStates;
epsilonClosures[q].insert(q); // Following 0 epsilon transitions
auto reachables = probeEpsilonNetwork(q, touchedStates);
epsilonClosures[q].insert(reachables.begin(), reachables.end());
}
#if DEBUG
this->printDebugInfo();
// DEBUG: Verify that this does not change anything
auto savedEpsilonClosures = this->epsilonClosures;
for (const auto& q : this->states) {
std::set<std::shared_ptr<State>> touchedStates;
epsilonClosures[q].insert(q); // Following 0 epsilon transitions
auto reachables = probeEpsilonNetwork(q, touchedStates);
epsilonClosures[q].insert(reachables.begin(), reachables.end());
}
if (savedEpsilonClosures != this->epsilonClosures) {
throw 5;
}
#endif
}
std::set<std::weak_ptr<NFA::State>, NFA::WeakPtrCompare<NFA::State>>
NFA::probeEpsilonNetwork(const std::shared_ptr<NFA::State> &state, std::set<std::shared_ptr<State>> &touchedStates) {
touchedStates.insert(state); // Track this state being reached
// Probe the epsilon network from this node
std::set<std::weak_ptr<NFA::State>, NFA::WeakPtrCompare<NFA::State>> reachableStates;
// Add in the EPSILON children
reachableStates.insert(state->statesFromTransitionOn(EPSILON).begin(), state->statesFromTransitionOn(EPSILON).end());
for (const auto &childStatePtr: state->statesFromTransitionOn(EPSILON)) {
// If it hasn't been touched, then go follow its epsilon transitions
if (touchedStates.find(childStatePtr.lock()) == std::end(touchedStates)) {
// Hasn't been probed yet
auto childReachable = probeEpsilonNetwork(childStatePtr.lock(), touchedStates);
reachableStates.insert(childReachable.begin(),childReachable.end());
}
}
return reachableStates;
}
const std::set<std::weak_ptr<NFA::State>, NFA::WeakPtrCompare<NFA::State>> &NFA::getEpsilonClosure(
const std::weak_ptr<State> &state) const {
auto itr = this->epsilonClosures.find(state);
if (itr != std::end(epsilonClosures)) {
return itr->second;
} else {
// Error
throw EpsilonClosureNotComputed();
}
}
#if DEBUG
void NFA::printDebugInfo() {
// Print out the NFA
// Print out all states
std::cout << "NFA Debug Output: \n";
std::cout << "All States (" << this->states.size() << "): " << std::endl;
for (const auto &q: this->states) {
std::cout << q.get() << ", Use Count: " << q.use_count() << "\n";
}
std::cout << std::endl;
// Print out all the states transition functions
std::cout << "Transition Functions: " << std::endl;
for (const auto &q: this->states) {
std::cout << q.get() << ": \n";
for (const std::pair<char, std::set<std::weak_ptr<State>, WeakPtrCompare<State>>> &transition: q->transitionMap) {
std::cout << "\t" << transition.first << "-> \n";
for (const auto &stateSet: transition.second) {
std::cout << "\t\t" << stateSet.lock().get() << "\n";
}
std::cout << std::endl;
}
std::cout << "\n";
}
std::cout << std::endl;
// Start and final state
std::cout << "\nStart State: " << this->initialState.lock().get();
std::cout << "\nFinal State: " << this->finalState.lock().get() << "\n\n" << std::endl;
// if (this->finalState.lock().get() == nullptr) {
// std::cout << "hmmm" << std::endl;
// }
}
#endif
/// COMPUTATION
void NFA::Computation::consume(char nextChar) {
if (nextChar == EPSILON) {
throw AttemptToMatchEpsilon();
}
// For every current possible state we look at where they can go with this transition
std::set<std::weak_ptr<State>, WeakPtrCompare<State>> nextStates;
for (const auto& weakQ : this->possibleStates) {
std::shared_ptr<State> q = weakQ.lock();
// Take all of the states that can be reached following this transition and add it to nextStates
nextStates.insert(q->statesFromTransitionOn(nextChar).begin(), q->statesFromTransitionOn(nextChar).end());
}
// Clear out the possible states now
this->possibleStates.clear();
// Now add in the epsilon closures of each state and set them all into possibleStates
for (const auto& weakQ : nextStates) {
const auto closure = nfa.getEpsilonClosure(weakQ);
possibleStates.insert(closure.begin(), closure.end());
}
}
bool NFA::Computation::isAccepting() const {
// A computation should be accepting if the final state is a member of the current possibleStates
return possibleStates.find(nfa.finalState) != possibleStates.end();
}
bool NFA::Computation::isTrapped() const {
return possibleStates.empty();
}
const std::string &NFA::Computation::getConsumedString() {
return inputString;
}
//
// Created by Aaron Kampmeier on 3/2/21.
// Email: aaron.kampmeier@gmail.com
// Copyright © 2021 Aaron Kampmeier. All rights reserved.
//
//
#ifndef PROJECT_2_NFA_H
#define PROJECT_2_NFA_H
#include <string>
#include <set>
#include <map>
#include <unordered_map>
#include <memory>
#include <algorithm>
#include <stdexcept>
#if DEBUG
#include <iostream>
#endif
#define EPSILON '_'
/**
* Represents a non-deterministic finite state automaton (NFA) using a simple graph. There are also the regular operations
* defined on this class which are Union, Concatenation, and Kleene Star. These operations are all closed under the
* regular languages.
* The formal definition of an NFA is a 5-tuple: (Q, Σ, δ, q₀, F) where Q is a set of states, Σ is the alphabet to use (a set of chars),
* δ: Q×(Σ∪{ε}) -> P(Q), q₀ ∈ Q is the start state, and F ⊆ Q is a set of final (accept) states.
* We apply some constraints to this implementation. The char "_" (underscore) can not be a member of Σ. The empty string (ε)
* will be represented by the underscore character ("_"). The set F must also have a cardinality of 1 (meaning only one final
* state is allowed). This does not actually limit this implementation or the NFAs it can represent at all.
*/
class NFA {
friend class Computation;
private:
/**
* Performs a meaningless but consistent comparison of weak_ptrs to allow them to be stored in sets and maps.
* This was inspired by the discussion here: https://stackoverflow.com/questions/32668742/a-set-of-weak-ptr
* @tparam T
*/
template<class T>
struct WeakPtrCompare {
bool operator() (const std::weak_ptr<T> &lhs, const std::weak_ptr<T> &rhs) const {
auto lptr = lhs.lock(), rptr = rhs.lock();
if (!rptr) return false;
if (!lptr) return true;
return lptr < rptr;
}
// bool operator() (const std::weak_ptr<T> *lhs, const std::weak_ptr<T> *rhs) {
// auto lptr = lhs->lock(), rptr = rhs->lock();
// if (!rptr) return false;
// if (!lptr) return true;
// return lptr < rptr;
// }
};
/**
* Represents a single state in the NFA. Each state, by the formal definition, can have a label/id and transitions based on
* reading 0 or 1 input characters in Σ to 1 or more states.
*/
class State {
friend class NFA;
private:
/**
* An optional label for this state. Usually just an id or num.
*/
std::string label;
/**
* A property that maps an input character to a set of other states. Recall that the empty string epsilon is represented
* by an underscore. Valid values for the keys in this map are then elements in the set Σ∪{_}.
*/
std::unordered_map<char, std::set<std::weak_ptr<State>, WeakPtrCompare<State>>> transitionMap;
public:
/**
* Constructor for a new State.
* @param label
*/
explicit State(std::string label) : label(std::move(label)) {};
State() = default;
/**
* Copy constructor
* @param state
*/
State(const State &state) : label(state.label), transitionMap(state.transitionMap) {};
/**
* Move constructor
* @param other
*/
State(State &&other) noexcept : label(std::move(other.label)), transitionMap(std::move(other.transitionMap)) {
other.label.clear();
other.transitionMap.clear();
};
/**
* Returns all the states from following one transition on the inputChar from this state. Does not follow epsilon
* transitions.
* @param inputChar
* @return
*/
const std::set<std::weak_ptr<State>, WeakPtrCompare<State>> &statesFromTransitionOn(char inputChar);
};
public:
/**
* Represents a computation performed on an NFA using some input string. It has some state such as the current read
* in input string and the set of possible states the NFA could be in after reading in that input string.
* It has a single operation defined on it which is to consume another input character.
*/
class Computation {
private:
/**
* The NFA to perform the computation on.
*/
const NFA &nfa;
/**
* An ordered list of the previously consumed input characters.
*/
std::string inputString;
/**
* The possible states the NFA could be in after consuming the inputString.
*/
std::set<std::weak_ptr<NFA::State>, WeakPtrCompare<State>> possibleStates;
public:
/**
* Starts a new computation on an NFA. possibleStates will start as the epsilon-closure of the start state (E(q_0)).
* @param nfa
*/
explicit Computation(const NFA &nfa) : nfa(nfa), inputString(), possibleStates(nfa.epsilonClosures.at(nfa.initialState)) {};
/**
* Consumes the nextChar in the input string and updates the possible states that can be reached.
* @param nextChar The next char to consume. nextChar should be a member of Σ for the NFA.
*/
void consume(char nextChar);
/**
* Returns whether the computation is currently accepting or not. This property may change if more input chars
* are consumed.
* @return
*/
bool isAccepting() const;
/**
* Checks if the computation is currently "trapped". This refers to the possibility of a DFA being in a trap state
* where there is no way to leave. This computation is "trapped" if the set possibleStates is currently empty.
* @return
*/
bool isTrapped() const;
/**
* Returns the currently consumed string for this computation.
* @return
*/
const std::string &getConsumedString();
};
private:
/**
* Q. Holds pointers to all states in this NFA to keep them from going out of scope.
*/
std::set<std::shared_ptr<State>> states;
/**
* q₀. The start state of this NFA.
*/
std::weak_ptr<State> initialState;
/**
* F. The final state for this NFA.
*/
std::weak_ptr<State> finalState;
/**
* The epsilon closure is a function E: Q -> P(Q) where E(q) for some state q returns the set of states that can be
* reached from this state by following 0 or more epsilon transitions.
*
* This is computed when the NFA is created so that it doesn't have to be computed every time a computation is performed
* on the NFA.
*/
std::map<std::weak_ptr<State>, std::set<std::weak_ptr<State>, WeakPtrCompare<State>>, WeakPtrCompare<State>> epsilonClosures;
/**
* Computes the epsilon closures for every state in this NFA and stores it in epsilonClosures for future use by computations.
* This should be done any time the NFA changes.
*/
void computeEpsilonClosures();
/**
* Probes a state's epsilon transitions recursively to return a set of all the states reachable from that state via
* 1 or more epsilon transitions.
* @param state
* @param touchedStates
* @return
*/
std::set<std::weak_ptr<NFA::State>, NFA::WeakPtrCompare<NFA::State>>
probeEpsilonNetwork(const std::shared_ptr<NFA::State> &state, std::set<std::shared_ptr<State>> &touchedStates);
/**
* Overwrites all of this NFA with a different nfa. Used by the copy constructor and copy assignment operator.
* @param other
*/
void overwriteWith(const NFA &other);
public:
/**
* Empty constructor
*/
NFA() : NFA(true) {};
/**
* Crates a very simple NFA that accepts a single char string.
* @param match The character that this NFA will match.
*/
explicit NFA(char match);
/**
* Creates an NFA that either accepts just the empty string or accepts nothing.
* @param rejectEverything If this is true, then the NFA created does not accept any string. If it is false, then the NFA
* only accepts the empty string epsilon.
*/
explicit NFA(bool rejectEverything);
/**
* Move and Copy constructors
* @param nfa
*/
NFA(const NFA &nfa);
NFA(NFA &&other) noexcept;
/**
* Assignment operators
* @param other
* @return
*/
NFA &operator=(const NFA &other);
NFA &operator=(NFA &&other) noexcept;
/**
* Unions two NFAs to create a new one whose language is the union of the two languages of the component NFAs.
* @param first
* @param second
* @return
*/
static NFA nfaUnion(const NFA &first, const NFA &second);
static NFA nfaUnion(NFA &&first, NFA &&second);
/**
* Concatenates two NFAs to create a new NFA whose language is the concatenation of the two languages of the component NFAs.
* @param first
* @param second
* @return
*/
static NFA nfaConcatenation(const NFA &first, const NFA &second);
static NFA nfaConcatenation(NFA &&first, NFA &&second);
/**
* Creates a new NFA whose language is the kleene star of the original language.
* @param nfa
* @return
*/
static NFA nfaStar(const NFA &nfa);
static NFA nfaStar(NFA &&nfa);
/**
* Returns the precomputed epsilon closure for a given state
* @param state
* @return
*/
const std::set<std::weak_ptr<State>, WeakPtrCompare<State>> &getEpsilonClosure(const std::weak_ptr<State> &state) const;
/**
* Performs a computation on this NFA using the input string provided. A computation will be accepting if the NFA ends
* up in a final state and not accepting if it does not.
* This is just a simpler version than having to manage the computation yourself using the NFA::Computation class.
* @param input The input string.
* @return If the computation was accepting.
*/
bool compute(const std::string &input) const;
#if DEBUG
/**
* Prints out the NFA with debug info. Not necessarily in a nice format.
*/
void printDebugInfo();
#endif
/// Exceptions
class AttemptToMatchEpsilon: public std::exception {
const char *what() const noexcept override {
return "Attempting to make an NFA match the empty string epsilon";
}
};
class EpsilonClosureNotComputed: public std::exception {
const char *what() const noexcept override {
return "There is no epsilon closure precomputed for a state.";
}
};
class NFACopyMisalignment: public std::exception {
const char *what() const noexcept override {
return "There was an issue copying the NFA.";
}
};
};
template<class T>
bool operator==(const std::weak_ptr<T> &lhs, const std::weak_ptr<T> &rhs) {
return lhs.lock() == rhs.lock();
}
#endif //PROJECT_2_NFA_H
@aaronkampmeier
Copy link
Author

This is a custom NFA implementation I built for an automated lexer. The lexer took in a description of a language, formed an NFA with it, and then analyzed various input strings to see if they were in the language or not.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment