dylech30th/Collections.cs

## Collections.cs
using System.Collections;
using System.Collections.Generic;

namespace CompilerExercise
{
    public static class Collections
    {
        public static int IntArrayHash(this int[] array)
        {
            return StructuralComparisons.StructuralEqualityComparer.GetHashCode(array);
        }

        public static V GetOrDefault<K, V>(this IDictionary<K, V> dictionary, K key)
        {
            return dictionary.TryGetValue(key, out var value) ? value : default;
        }
    }
}

## DeterministicFiniteAutomaton.cs
using System;
using System.Collections.Generic;
using System.Linq;

namespace CompilerExercise
{
    public class DeterministicFiniteAutomaton
    {
        public static readonly int Epsilon = -1;

        public int[] States { get; set; }

        public int[] AcceptStates { get; set; }

        public int StartState { get; set; }

        public IDictionary<(int current, int ch), int> TransitionTable;

        private int currentState;

        public DeterministicFiniteAutomaton(int[] states, int startState, int[] acceptStates)
        {
            States = states;
            StartState = startState;
            currentState = startState;
            AcceptStates = acceptStates;
            TransitionTable = new Dictionary<(int current, int ch), int>();
        }

        public void AddTransition(int ch, int current, int dest)
        {
            TransitionTable[(current, ch)] = dest;
        }

        public void Advance(int ch)
        {
            currentState = TransitionTable[(currentState, ch)];
        }

        public bool Run(string str)
        {
            foreach (var c in str)
            {
                Advance(c);
            }
            return Success();
        }

        public bool Success()
        {
            return AcceptStates.Contains(currentState);
        }
    }
}

## Functions.cs
using System;

namespace CompilerExercise
{
    public class Functions
    {
        public static Func<T, T> Identity<T>() => i => i;
    }
}

## NonDeterministicFiniteAutomaton.cs
using System;
using System.Collections.Generic;
using System.Linq;

namespace CompilerExercise
{
    public class NonDeterministicFiniteAutomaton
    {
        public static readonly int Epsilon = -1;

        public int[] States { get; set; }

        public int[] AcceptStates { get; set; }

        public int StartState { get; set; }

        public IDictionary<(int current, int ch), int[]> TransitionTable;

        public static NonDeterministicFiniteAutomaton EpsilonNfa()
        {
            var nfa = new NonDeterministicFiniteAutomaton(new[] { 0, 1 }, 0, new[] { 1 });
            nfa.AddTransition(0, Epsilon, new[] { 1 });
            return nfa;
        }

        private static void IncreaseStates(NonDeterministicFiniteAutomaton nfa, int incremental)
        {
            nfa.States = nfa.States.Select(i => i + incremental).ToArray();
            var oldTable = nfa.TransitionTable;
            var newTable = new Dictionary<(int current, int ch), int[]>();
            foreach (var ((current, ch), to) in oldTable)
            {
                newTable[(current + incremental, ch)] = to.Select(i => i + incremental).ToArray();
            }
            nfa.TransitionTable = newTable;
            nfa.AcceptStates = nfa.AcceptStates.Select(i => i + incremental).ToArray();
            nfa.StartState += incremental;
        }

        private static int GenerateUniqueMaxState(IEnumerable<NonDeterministicFiniteAutomaton> nfaEnumerable)
        {
            return nfaEnumerable.Select(i => i.States).SelectMany(Functions.Identity<int[]>()).Max() + 1;
        }

        private static int GenerateUniqueMinState(IEnumerable<NonDeterministicFiniteAutomaton> nfaEnumerable)
        {
            return nfaEnumerable.Select(i => i.States).SelectMany(Functions.Identity<int[]>()).Min() - 1;
        }

        private static void DistinctStates(NonDeterministicFiniteAutomaton nfa1, NonDeterministicFiniteAutomaton nfa2)
        {
            if (nfa2.States.Intersect(nfa1.States).Any())
            {
                IncreaseStates(nfa2, nfa1.States.Max() + 1);
            }
        }

        public NonDeterministicFiniteAutomaton Union(NonDeterministicFiniteAutomaton another)
        {
            DistinctStates(this, another);
            var start = GenerateUniqueMaxState(new[] { this, another });
            var accept = start + 1;
            var stateList = new List<int>(States);
            stateList.AddRange(another.States);
            stateList.Add(start);
            stateList.Add(accept);
            var nfa = new NonDeterministicFiniteAutomaton(stateList.ToArray(), start, new[] { accept });
            // add ε-transition to the original start transitions
            nfa.AddTransition(start, Epsilon, new[] { StartState, another.StartState });
            // preserve all other transitions
            foreach (var ((current, ch), v) in TransitionTable)
            {
                nfa.AddTransition(current, ch, v);
            }
            foreach (var ((current, ch), v) in another.TransitionTable)
            {
                nfa.AddTransition(current, ch, v);
            }
            // add ε-transitions from the original accepts to new accept
            foreach (var acceptState in AcceptStates)
            {
                // preserve all the other transitions coming out of acceptState
                var trans = TransitionTable.TryGetValue((acceptState, Epsilon), out var ts) ? ts : Array.Empty<int>();
                nfa.AddTransition(acceptState, Epsilon, new HashSet<int>(trans) { accept }.ToArray());
            }
            foreach (var anotherAcceptState in another.AcceptStates)
            {
                // preserve all the other transitions coming out of acceptState
                var trans = another.TransitionTable.TryGetValue((anotherAcceptState, Epsilon), out var ts) ? ts : Array.Empty<int>();
                nfa.AddTransition(anotherAcceptState, Epsilon, new HashSet<int>(trans) { accept }.ToArray());
            }
            return nfa;
        }

        public NonDeterministicFiniteAutomaton Concat(NonDeterministicFiniteAutomaton another)
        {
            DistinctStates(this, another);

            var stateList = new List<int>(States);
            stateList.AddRange(another.States);
            var nfa = new NonDeterministicFiniteAutomaton(stateList.ToArray(), StartState, another.AcceptStates);

            // Copy transitions
            foreach (var ((current, ch), to) in TransitionTable)
            {
                nfa.AddTransition(current, ch, to);
            }

            foreach (var ((current, ch), to) in another.TransitionTable)
            {
                nfa.AddTransition(current, ch, to);
            }

            // Replace the accept states transitions
            foreach (var acceptState in AcceptStates)
            {
                // preserve all the other transitions coming out of acceptState
                // the using of ε-transition is tricky, we don't need to consider the loop on the start state
                var trans = TransitionTable.TryGetValue((acceptState, Epsilon), out var ts) ? ts : Array.Empty<int>();
                nfa.AddTransition(acceptState, Epsilon, new HashSet<int>(trans) { another.StartState }.ToArray());
            }
            return nfa;
        }

        public NonDeterministicFiniteAutomaton KleeneClosure()
        {
            var start = GenerateUniqueMaxState(new[] { this });
            var accept = start + 1;
            var states = new List<int>
            {
                start,
                accept
            };
            states.AddRange(States);
            var nfa = new NonDeterministicFiniteAutomaton(states.ToArray(), StartState, new[] { accept });
            nfa.AddTransition(start, Epsilon, new[] { StartState, accept });

            foreach (var ((current, ch), to) in TransitionTable)
            {
                nfa.AddTransition(current, ch, to);
            }

            foreach (var acceptState in AcceptStates)
            {
                var trans = TransitionTable.TryGetValue((acceptState, Epsilon), out var ts) ? ts : Array.Empty<int>();
                nfa.AddTransition(acceptState, Epsilon, new HashSet<int>(trans) { StartState, accept }.ToArray());
            }
            return nfa;
        }

        public NonDeterministicFiniteAutomaton(int[] states, int startState, int[] acceptStates)
        {
            States = states;
            StartState = startState;
            AcceptStates = acceptStates;
            TransitionTable = new Dictionary<(int current, int ch), int[]>();
        }

        public void AddTransition(int current, int ch, int[] dest)
        {
            TransitionTable[(current, ch)] = dest;
        }

        public bool RemoveTransition(int current, int ch)
        {
            return TransitionTable.Remove((current, ch));
        }

        public int[] Alphabet()
        {
            return TransitionTable.Keys.Select(i => i.ch).Where(i => i != Epsilon).Distinct().ToArray();
        }

        public int[] EpsilonClosure(int[] s)
        {
            return s.Select(EpsilonClosure).SelectMany(Functions.Identity<int[]>()).Distinct().ToArray();
        }

        public int[] EpsilonClosure(int s)
        {
            // Get all the ε-transition-states of state s
            if (TransitionTable.TryGetValue((s, Epsilon), out var entry))
            {
                // Use stack to check each of the entries recursively, which is all the ε-transition-states of s
                var stack = new Stack<int>(entry);
                // All of the elements in the entry must be reached from s by ε-transitions, so we can add them
                // into the result without hesitation
                // s itself is included, since s can reach itself by using an ε-edge, although it may not be pointed out
                var result = new List<int>(entry) { s };
                // Find every ε-transition-state of every element presented in the stack
                while (stack.Any())
                {
                    var state = stack.Pop();
                    if (TransitionTable.TryGetValue((state, Epsilon), out var nextEntry))
                    {
                        foreach (var i in nextEntry)
                        {
                            // If you don't do this, It is possible that two nodes have ε-transitions
                            // point to each other, in such cases the stack will never be empty
                            // so make sure that the state i hasn't been visited before
                            if (!result.Contains(i))
                            {
                                // A ε-transition-state i of a particular state "state" in the stack is
                                // found, and it hasn't been visited before, so we add it to the result
                                result.Add(i);
                                // and push it onto the stack, because we also need to find all the
                                // ε-transition-states of state i, too
                                stack.Push(i);
                            }
                        }
                    }
                }
                return result.ToArray();
            }
            // the returned value is meaningful if and only if there do exist some ε-transition-states
            // for state s, other wise return an empty array denoting "nothing found"
            return new[] { s };
        }

        public int[] Move(int[] states, int c)
        {
            return states.Select(s => TransitionTable.TryGetValue((s, c), out var sTrans) ? sTrans : Array.Empty<int>())
                .SelectMany(Functions.Identity<int[]>()).Distinct().ToArray();
        }

        // Represents an intermediate state during the translation
        // This class will be a state of a DFA, which semantically
        // equals to a set of states of the NFA
        private class MarkedState
        {
            // Is this state being marked?
            public bool Marked;

            // The corresponding NFA states
            public readonly int[] NfaStates;

            // Performance consideration
            public readonly int HashCode;

            public bool IsAccept;

            public int Id;

            public MarkedState(int[] nfaStates)
            {
                Marked = false;
                NfaStates = nfaStates;
                HashCode = NfaStates.IntArrayHash();
            }

            public void Mark() => Marked = true;
        }

        // Every state in the DFA corresponds to a set of states in the NFA
        // see https://sora.ink/archives/906
        // This is a slightly optimized C# implementation of the subset construction
        // algorithm from dragon book 3-32
        // It uses an array hashcode to reduces the state-comparison time, instead
        // of performs an array search each time
        public DeterministicFiniteAutomaton Dfa()
        {
            // The intermediate state dictionary
            var transitions = new Dictionary<(MarkedState current, int ch), MarkedState>();

            // Get the state representing ε-closure(s0)
            var startStateClosure = EpsilonClosure(StartState);
            // Add it to the state list
            var states = new List<MarkedState>
            {
                new(startStateClosure)
                {
                    Id = 0,
                    IsAccept = startStateClosure.Any(AcceptStates.Contains)
                }
            };
            var alphabet = Alphabet();
            MarkedState markedState;
            var id = 1;
            // Iteratively process all the states that haven't been marked yet
            // There is an alternate approach is to use two data holder, one for
            // the "unmarked" and one for the "marked", but the overall space and
            // time complexities are almost the same
            // If a state is "unmarked", it means that we haven't search its ε-closure
            // yet, since for every state we must find its ε-closure according to the
            // algorithm, so this step is necessary, but like I said, two data holders
            // can avoid the using of "marker" flag, it is only useful when you are mixing
            // the searched and un-searched states
            while ((markedState = states.FirstOrDefault(x => !x.Marked)) != null)
            {
                // into the body of while loop means that we're about to search the ε-closure
                // of markedState, so we mark it
                markedState.Mark();
                // for every letter in the alphabet(we need to calculate all the possibilities
                // of transitions)
                foreach (var i in alphabet)
                {
                    // search the ε-closure of current state, which is markedState
                    var closureState = new MarkedState(EpsilonClosure(Move(markedState.NfaStates, i)));
                    // see if the state that represents its ε-closure is already presented in
                    // the dictionary by checking the array, the HashCode above comes in handy,
                    // we don't need to use a exponential time algorithm to compare if two array's
                    // content is equal, just compare the HashCode property in O(1) time
                    var presence = states.FirstOrDefault(x => x.HashCode == closureState.HashCode);
                    if (presence == null)
                    {
                        // if the state is not already exist, we create a new one
                        closureState.Id = id++;
                        if (closureState.NfaStates.Any(AcceptStates.Contains))
                        {
                            closureState.IsAccept = true;
                        }
                        // Add it to the states, and stay unmarked because we haven't
                        // start searching its ε-closure yet
                        states.Add(closureState);
                        // let the presence(which must be null now) be closureState(which is our initialized state)
                        presence = closureState;
                    }
                    // Set the transition rule, we set presence to closureState if it's null
                    // so it must be non-null here
                    transitions[(markedState, i)] = presence;
                }
            }
            // Turn the MarkedState-represented states and transitions into integer-represented states and transitions
            // Like I said, the former one is an intermediate approach
            var dfaStates = states.Select(i => i.Id).ToArray();
            var dfaAcceptStates = states.Where(i => i.IsAccept).Select(i => i.Id).ToArray();
            return new DeterministicFiniteAutomaton(dfaStates, 0, dfaAcceptStates)
            {
                TransitionTable = transitions.ToDictionary(k =>
                {
                    var ((current, ch), _) = k;
                    return (current.Id, ch);
                }, k => k.Value.Id)
            };
        }

        // This is the C# implementation of the NFA simulation algorithm
        // introduced by dragon book 3-37
        // The essential idea is to acquire all the possibilities of state after one
        // move, and check if the last move's state set is joint with the accept set
        public bool Run(string str)
        {
            var result = str.Aggregate(EpsilonClosure(StartState), (current, c) => EpsilonClosure(Move(current, c)));
            return result.Intersect(AcceptStates).Any();
        }

        // This is the C# implementation of the efficient NFA simulation algorithm
        // using a dynamic programming approach from dragon book 3-37, it uses an
        // array [alreadyOn] to store those states who have been visited along with
        // their closure. More precisely, it is an optimized version of the algorithm
        // above
        //
        // During the traversal of the closure of a particular state, there may be
        // one or more states that are visited more than once, as we know, for every
        // state visited we must visit its ε-closure, too,  and for a NFA with n
        // states and m transitions, the ε-closure algorithm's time complexity can up
        // to O(n), so if we search every state at each step, it may costs O(mn) time
        // in total, which is inefficient, so we use a [alreadyOn] to hold the states
        // that have been visited, note: if a state has been visited, then all of its
        // ε-closure must also have been visited, we jump over all the states that have
        // been marked in [alreadyOn].
        // We use [currentStates] to represent the current state set of the NFA, at
        // first, the current state set will be the ε-closure of the [StartState], so
        // we add them into the [currentStates]
        // At each iteration, we visit and mark every state s in the [currentStates]
        // and the ε-closure(move(s, char)) of s, we add them to [newStates] because
        // their may be other states in [currentStates], and we need to handle them
        // separately
        // At the end of the iteration, [newStates] contains all the states that the
        // NFA possibly at after reading a letter, so we add all the elements in the
        // [newStates] back to [currentStates], and clear the [alreadyOn]
        //
        // By using this approach, instead of build a full transition table like what
        // we did when converting the NFA to DFA, we simulate the execution WHILE we
        // building the DFA, this is more efficient that the two algorithms above
        //
        // Since every state can be visited at most once, the complexity of AddState
        // has been reduced from O(mn) to O(n), that's a huge improvement considering
        // arbitrarily many transitions
        public bool RunDp(string str)
        {
            // the current state set after reading a particular letter
            // before each iteration, this stack hold the possible states set
            // of the last iteration
            var currentStates = new Stack<int>();
            // this stack hold those states during an iteration
            // at the end of iteration, its content will be copy into the
            // currentStates
            var newStates = new Stack<int>();
            // dynamic programming table, jump over those states that have been
            // visited
            var alreadyOn = new bool[States.Max() + 1]; // this is not a good idea, dictionary may be better
            // default to false
            Array.Fill(alreadyOn, false);

            // adds a state and its ε-closure to the [newStates]
            void AddState(int s)
            {
                newStates.Push(s);
                alreadyOn[s] = true;
                if (TransitionTable.TryGetValue((s, Epsilon), out var epsilonStates))
                {
                    foreach (var epsilonState in epsilonStates)
                    {
                        if (!alreadyOn[epsilonState])
                        {
                            AddState(epsilonState);
                        }
                    }
                }
            }

            // find the ε-closure(move(currentStates, c)), and add all of them to the [newStates]
            void AlterStack(int c)
            {
                while (currentStates.Any())
                {
                    var oldState = currentStates.Pop();
                    if (TransitionTable.TryGetValue((oldState, c), out var oldStatesClosure))
                    {
                        foreach (var state in oldStatesClosure)
                        {
                            if (!alreadyOn[state])
                            {
                                AddState(state);
                            }
                        }
                    }
                }

                if (!newStates.Any())
                {
                    throw SimulationFailedException.NoTransitionFound("", c);
                }

                // add [newStates] into [currentStates] and clear the [alreadyOn]
                while (newStates.Any())
                {
                    var newState = newStates.Pop();
                    currentStates.Push(newState);
                    alreadyOn[newState] = false;
                }
            }
            // initialize the simulation by adding the start state
            AddState(StartState);
            foreach (var c in str)
            {
                AlterStack(c);
            }
            return currentStates.Intersect(AcceptStates).Any(); // check if the result and the AcceptStates are joint
        }
    }
}

## Program.cs
 using System;
 using System.Collections;
 using System.Collections.Generic;

 namespace CompilerExercise
{
    public class Program
    {
        public static readonly int Epsilon = -1;

        public static void Main(string[] args)
        {
            var nfa = new NonDeterministicFiniteAutomaton(new[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }, 0, new[] { 10 });
            nfa.AddTransition(0, Epsilon, new[] { 1, 7 });
            nfa.AddTransition(1, Epsilon, new[] { 2, 4 });
            nfa.AddTransition(2, 'a', new[] { 3 });
            nfa.AddTransition(4, 'b', new[] { 5 });
            nfa.AddTransition(3, Epsilon, new[] { 6 });
            nfa.AddTransition(5, Epsilon, new[] { 6 });
            nfa.AddTransition(6, Epsilon, new[] { 1, 7 });
            nfa.AddTransition(7, 'a', new[] { 8 });
            nfa.AddTransition(8, 'b', new[] { 9 });
            nfa.AddTransition(9, 'c', new[] { 10 });
            nfa.AddTransition(10, 'c', new[] { 10 });
            var singleB = new NonDeterministicFiniteAutomaton(new[] { 0, 1 }, 0, new[] { 1 });
            singleB.AddTransition(0, 'b', new[] { 0, 1 });
            // var dfa = nfa.Dfa();
            // Console.WriteLine(dfa.Run("abbababababababababbbaaaaaab"));
            // Console.WriteLine(nfa.Run("abbababababababababbbaaaaaab"));
            // Console.WriteLine(nfa.RunDp("abbababababababababbbaaaaaab"));
            Console.WriteLine(nfa.Concat(singleB).RunDp("aaabcccccccccccccccabbbbbba"));
        }
    }
}

## SimulationFailedException.cs
using System;
using System.Runtime.Serialization;

namespace CompilerExercise
{
    public class SimulationFailedException : Exception
    {
        public SimulationFailedException()
        {
        }

        protected SimulationFailedException(SerializationInfo info, StreamingContext context) : base(info, context)
        {
        }

        public SimulationFailedException(string message) : base(message)
        {
        }

        public SimulationFailedException(string message, Exception innerException) : base(message, innerException)
        {
        }

        public static Exception NoTransitionFound(string state, int c)
        {
            return new SimulationFailedException($"No transition found for {(char) c} at state {state}");
        }
    }
}
	using System.Collections;
	using System.Collections.Generic;

	namespace CompilerExercise
	{
	public static class Collections
	{
	public static int IntArrayHash(this int[] array)
	{
	return StructuralComparisons.StructuralEqualityComparer.GetHashCode(array);
	}

	public static V GetOrDefault<K, V>(this IDictionary<K, V> dictionary, K key)
	{
	return dictionary.TryGetValue(key, out var value) ? value : default;
	}
	}
	}
	using System;
	using System.Collections.Generic;
	using System.Linq;

	namespace CompilerExercise
	{
	public class DeterministicFiniteAutomaton
	{
	public static readonly int Epsilon = -1;

	public int[] States { get; set; }

	public int[] AcceptStates { get; set; }

	public int StartState { get; set; }

	public IDictionary<(int current, int ch), int> TransitionTable;

	private int currentState;

	public DeterministicFiniteAutomaton(int[] states, int startState, int[] acceptStates)
	{
	States = states;
	StartState = startState;
	currentState = startState;
	AcceptStates = acceptStates;
	TransitionTable = new Dictionary<(int current, int ch), int>();
	}

	public void AddTransition(int ch, int current, int dest)
	{
	TransitionTable[(current, ch)] = dest;
	}

	public void Advance(int ch)
	{
	currentState = TransitionTable[(currentState, ch)];
	}

	public bool Run(string str)
	{
	foreach (var c in str)
	{
	Advance(c);
	}
	return Success();
	}

	public bool Success()
	{
	return AcceptStates.Contains(currentState);
	}
	}
	}
	using System;

	namespace CompilerExercise
	{
	public class Functions
	{
	public static Func<T, T> Identity<T>() => i => i;
	}
	}
	using System;
	using System.Runtime.Serialization;

	namespace CompilerExercise
	{
	public class SimulationFailedException : Exception
	{
	public SimulationFailedException()
	{
	}

	protected SimulationFailedException(SerializationInfo info, StreamingContext context) : base(info, context)
	{
	}

	public SimulationFailedException(string message) : base(message)
	{
	}

	public SimulationFailedException(string message, Exception innerException) : base(message, innerException)
	{
	}

	public static Exception NoTransitionFound(string state, int c)
	{
	return new SimulationFailedException($"No transition found for {(char) c} at state {state}");
	}
	}
	}