Skip to content

Instantly share code, notes, and snippets.

@dylech30th
Last active May 29, 2021 14:21
Show Gist options
  • Save dylech30th/be51c8b33711d1a8c5daeb3858dc416a to your computer and use it in GitHub Desktop.
Save dylech30th/be51c8b33711d1a8c5daeb3858dc416a to your computer and use it in GitHub Desktop.
Dragon book chapter 3 code
using System.Collections;
using System.Collections.Generic;
namespace CompilerExercise
{
public static class Collections
{
public static int IntArrayHash(this int[] array)
{
return StructuralComparisons.StructuralEqualityComparer.GetHashCode(array);
}
public static V GetOrDefault<K, V>(this IDictionary<K, V> dictionary, K key)
{
return dictionary.TryGetValue(key, out var value) ? value : default;
}
}
}
using System;
using System.Collections.Generic;
using System.Linq;
namespace CompilerExercise
{
public class DeterministicFiniteAutomaton
{
public static readonly int Epsilon = -1;
public int[] States { get; set; }
public int[] AcceptStates { get; set; }
public int StartState { get; set; }
public IDictionary<(int current, int ch), int> TransitionTable;
private int currentState;
public DeterministicFiniteAutomaton(int[] states, int startState, int[] acceptStates)
{
States = states;
StartState = startState;
currentState = startState;
AcceptStates = acceptStates;
TransitionTable = new Dictionary<(int current, int ch), int>();
}
public void AddTransition(int ch, int current, int dest)
{
TransitionTable[(current, ch)] = dest;
}
public void Advance(int ch)
{
currentState = TransitionTable[(currentState, ch)];
}
public bool Run(string str)
{
foreach (var c in str)
{
Advance(c);
}
return Success();
}
public bool Success()
{
return AcceptStates.Contains(currentState);
}
}
}
using System;
namespace CompilerExercise
{
public class Functions
{
public static Func<T, T> Identity<T>() => i => i;
}
}
using System;
using System.Collections.Generic;
using System.Linq;
namespace CompilerExercise
{
public class NonDeterministicFiniteAutomaton
{
public static readonly int Epsilon = -1;
public int[] States { get; set; }
public int[] AcceptStates { get; set; }
public int StartState { get; set; }
public IDictionary<(int current, int ch), int[]> TransitionTable;
public static NonDeterministicFiniteAutomaton EpsilonNfa()
{
var nfa = new NonDeterministicFiniteAutomaton(new[] { 0, 1 }, 0, new[] { 1 });
nfa.AddTransition(0, Epsilon, new[] { 1 });
return nfa;
}
private static void IncreaseStates(NonDeterministicFiniteAutomaton nfa, int incremental)
{
nfa.States = nfa.States.Select(i => i + incremental).ToArray();
var oldTable = nfa.TransitionTable;
var newTable = new Dictionary<(int current, int ch), int[]>();
foreach (var ((current, ch), to) in oldTable)
{
newTable[(current + incremental, ch)] = to.Select(i => i + incremental).ToArray();
}
nfa.TransitionTable = newTable;
nfa.AcceptStates = nfa.AcceptStates.Select(i => i + incremental).ToArray();
nfa.StartState += incremental;
}
private static int GenerateUniqueMaxState(IEnumerable<NonDeterministicFiniteAutomaton> nfaEnumerable)
{
return nfaEnumerable.Select(i => i.States).SelectMany(Functions.Identity<int[]>()).Max() + 1;
}
private static int GenerateUniqueMinState(IEnumerable<NonDeterministicFiniteAutomaton> nfaEnumerable)
{
return nfaEnumerable.Select(i => i.States).SelectMany(Functions.Identity<int[]>()).Min() - 1;
}
private static void DistinctStates(NonDeterministicFiniteAutomaton nfa1, NonDeterministicFiniteAutomaton nfa2)
{
if (nfa2.States.Intersect(nfa1.States).Any())
{
IncreaseStates(nfa2, nfa1.States.Max() + 1);
}
}
public NonDeterministicFiniteAutomaton Union(NonDeterministicFiniteAutomaton another)
{
DistinctStates(this, another);
var start = GenerateUniqueMaxState(new[] { this, another });
var accept = start + 1;
var stateList = new List<int>(States);
stateList.AddRange(another.States);
stateList.Add(start);
stateList.Add(accept);
var nfa = new NonDeterministicFiniteAutomaton(stateList.ToArray(), start, new[] { accept });
// add ε-transition to the original start transitions
nfa.AddTransition(start, Epsilon, new[] { StartState, another.StartState });
// preserve all other transitions
foreach (var ((current, ch), v) in TransitionTable)
{
nfa.AddTransition(current, ch, v);
}
foreach (var ((current, ch), v) in another.TransitionTable)
{
nfa.AddTransition(current, ch, v);
}
// add ε-transitions from the original accepts to new accept
foreach (var acceptState in AcceptStates)
{
// preserve all the other transitions coming out of acceptState
var trans = TransitionTable.TryGetValue((acceptState, Epsilon), out var ts) ? ts : Array.Empty<int>();
nfa.AddTransition(acceptState, Epsilon, new HashSet<int>(trans) { accept }.ToArray());
}
foreach (var anotherAcceptState in another.AcceptStates)
{
// preserve all the other transitions coming out of acceptState
var trans = another.TransitionTable.TryGetValue((anotherAcceptState, Epsilon), out var ts) ? ts : Array.Empty<int>();
nfa.AddTransition(anotherAcceptState, Epsilon, new HashSet<int>(trans) { accept }.ToArray());
}
return nfa;
}
public NonDeterministicFiniteAutomaton Concat(NonDeterministicFiniteAutomaton another)
{
DistinctStates(this, another);
var stateList = new List<int>(States);
stateList.AddRange(another.States);
var nfa = new NonDeterministicFiniteAutomaton(stateList.ToArray(), StartState, another.AcceptStates);
// Copy transitions
foreach (var ((current, ch), to) in TransitionTable)
{
nfa.AddTransition(current, ch, to);
}
foreach (var ((current, ch), to) in another.TransitionTable)
{
nfa.AddTransition(current, ch, to);
}
// Replace the accept states transitions
foreach (var acceptState in AcceptStates)
{
// preserve all the other transitions coming out of acceptState
// the using of ε-transition is tricky, we don't need to consider the loop on the start state
var trans = TransitionTable.TryGetValue((acceptState, Epsilon), out var ts) ? ts : Array.Empty<int>();
nfa.AddTransition(acceptState, Epsilon, new HashSet<int>(trans) { another.StartState }.ToArray());
}
return nfa;
}
public NonDeterministicFiniteAutomaton KleeneClosure()
{
var start = GenerateUniqueMaxState(new[] { this });
var accept = start + 1;
var states = new List<int>
{
start,
accept
};
states.AddRange(States);
var nfa = new NonDeterministicFiniteAutomaton(states.ToArray(), StartState, new[] { accept });
nfa.AddTransition(start, Epsilon, new[] { StartState, accept });
foreach (var ((current, ch), to) in TransitionTable)
{
nfa.AddTransition(current, ch, to);
}
foreach (var acceptState in AcceptStates)
{
var trans = TransitionTable.TryGetValue((acceptState, Epsilon), out var ts) ? ts : Array.Empty<int>();
nfa.AddTransition(acceptState, Epsilon, new HashSet<int>(trans) { StartState, accept }.ToArray());
}
return nfa;
}
public NonDeterministicFiniteAutomaton(int[] states, int startState, int[] acceptStates)
{
States = states;
StartState = startState;
AcceptStates = acceptStates;
TransitionTable = new Dictionary<(int current, int ch), int[]>();
}
public void AddTransition(int current, int ch, int[] dest)
{
TransitionTable[(current, ch)] = dest;
}
public bool RemoveTransition(int current, int ch)
{
return TransitionTable.Remove((current, ch));
}
public int[] Alphabet()
{
return TransitionTable.Keys.Select(i => i.ch).Where(i => i != Epsilon).Distinct().ToArray();
}
public int[] EpsilonClosure(int[] s)
{
return s.Select(EpsilonClosure).SelectMany(Functions.Identity<int[]>()).Distinct().ToArray();
}
public int[] EpsilonClosure(int s)
{
// Get all the ε-transition-states of state s
if (TransitionTable.TryGetValue((s, Epsilon), out var entry))
{
// Use stack to check each of the entries recursively, which is all the ε-transition-states of s
var stack = new Stack<int>(entry);
// All of the elements in the entry must be reached from s by ε-transitions, so we can add them
// into the result without hesitation
// s itself is included, since s can reach itself by using an ε-edge, although it may not be pointed out
var result = new List<int>(entry) { s };
// Find every ε-transition-state of every element presented in the stack
while (stack.Any())
{
var state = stack.Pop();
if (TransitionTable.TryGetValue((state, Epsilon), out var nextEntry))
{
foreach (var i in nextEntry)
{
// If you don't do this, It is possible that two nodes have ε-transitions
// point to each other, in such cases the stack will never be empty
// so make sure that the state i hasn't been visited before
if (!result.Contains(i))
{
// A ε-transition-state i of a particular state "state" in the stack is
// found, and it hasn't been visited before, so we add it to the result
result.Add(i);
// and push it onto the stack, because we also need to find all the
// ε-transition-states of state i, too
stack.Push(i);
}
}
}
}
return result.ToArray();
}
// the returned value is meaningful if and only if there do exist some ε-transition-states
// for state s, other wise return an empty array denoting "nothing found"
return new[] { s };
}
public int[] Move(int[] states, int c)
{
return states.Select(s => TransitionTable.TryGetValue((s, c), out var sTrans) ? sTrans : Array.Empty<int>())
.SelectMany(Functions.Identity<int[]>()).Distinct().ToArray();
}
// Represents an intermediate state during the translation
// This class will be a state of a DFA, which semantically
// equals to a set of states of the NFA
private class MarkedState
{
// Is this state being marked?
public bool Marked;
// The corresponding NFA states
public readonly int[] NfaStates;
// Performance consideration
public readonly int HashCode;
public bool IsAccept;
public int Id;
public MarkedState(int[] nfaStates)
{
Marked = false;
NfaStates = nfaStates;
HashCode = NfaStates.IntArrayHash();
}
public void Mark() => Marked = true;
}
// Every state in the DFA corresponds to a set of states in the NFA
// see https://sora.ink/archives/906
// This is a slightly optimized C# implementation of the subset construction
// algorithm from dragon book 3-32
// It uses an array hashcode to reduces the state-comparison time, instead
// of performs an array search each time
public DeterministicFiniteAutomaton Dfa()
{
// The intermediate state dictionary
var transitions = new Dictionary<(MarkedState current, int ch), MarkedState>();
// Get the state representing ε-closure(s0)
var startStateClosure = EpsilonClosure(StartState);
// Add it to the state list
var states = new List<MarkedState>
{
new(startStateClosure)
{
Id = 0,
IsAccept = startStateClosure.Any(AcceptStates.Contains)
}
};
var alphabet = Alphabet();
MarkedState markedState;
var id = 1;
// Iteratively process all the states that haven't been marked yet
// There is an alternate approach is to use two data holder, one for
// the "unmarked" and one for the "marked", but the overall space and
// time complexities are almost the same
// If a state is "unmarked", it means that we haven't search its ε-closure
// yet, since for every state we must find its ε-closure according to the
// algorithm, so this step is necessary, but like I said, two data holders
// can avoid the using of "marker" flag, it is only useful when you are mixing
// the searched and un-searched states
while ((markedState = states.FirstOrDefault(x => !x.Marked)) != null)
{
// into the body of while loop means that we're about to search the ε-closure
// of markedState, so we mark it
markedState.Mark();
// for every letter in the alphabet(we need to calculate all the possibilities
// of transitions)
foreach (var i in alphabet)
{
// search the ε-closure of current state, which is markedState
var closureState = new MarkedState(EpsilonClosure(Move(markedState.NfaStates, i)));
// see if the state that represents its ε-closure is already presented in
// the dictionary by checking the array, the HashCode above comes in handy,
// we don't need to use a exponential time algorithm to compare if two array's
// content is equal, just compare the HashCode property in O(1) time
var presence = states.FirstOrDefault(x => x.HashCode == closureState.HashCode);
if (presence == null)
{
// if the state is not already exist, we create a new one
closureState.Id = id++;
if (closureState.NfaStates.Any(AcceptStates.Contains))
{
closureState.IsAccept = true;
}
// Add it to the states, and stay unmarked because we haven't
// start searching its ε-closure yet
states.Add(closureState);
// let the presence(which must be null now) be closureState(which is our initialized state)
presence = closureState;
}
// Set the transition rule, we set presence to closureState if it's null
// so it must be non-null here
transitions[(markedState, i)] = presence;
}
}
// Turn the MarkedState-represented states and transitions into integer-represented states and transitions
// Like I said, the former one is an intermediate approach
var dfaStates = states.Select(i => i.Id).ToArray();
var dfaAcceptStates = states.Where(i => i.IsAccept).Select(i => i.Id).ToArray();
return new DeterministicFiniteAutomaton(dfaStates, 0, dfaAcceptStates)
{
TransitionTable = transitions.ToDictionary(k =>
{
var ((current, ch), _) = k;
return (current.Id, ch);
}, k => k.Value.Id)
};
}
// This is the C# implementation of the NFA simulation algorithm
// introduced by dragon book 3-37
// The essential idea is to acquire all the possibilities of state after one
// move, and check if the last move's state set is joint with the accept set
public bool Run(string str)
{
var result = str.Aggregate(EpsilonClosure(StartState), (current, c) => EpsilonClosure(Move(current, c)));
return result.Intersect(AcceptStates).Any();
}
// This is the C# implementation of the efficient NFA simulation algorithm
// using a dynamic programming approach from dragon book 3-37, it uses an
// array [alreadyOn] to store those states who have been visited along with
// their closure. More precisely, it is an optimized version of the algorithm
// above
//
// During the traversal of the closure of a particular state, there may be
// one or more states that are visited more than once, as we know, for every
// state visited we must visit its ε-closure, too, and for a NFA with n
// states and m transitions, the ε-closure algorithm's time complexity can up
// to O(n), so if we search every state at each step, it may costs O(mn) time
// in total, which is inefficient, so we use a [alreadyOn] to hold the states
// that have been visited, note: if a state has been visited, then all of its
// ε-closure must also have been visited, we jump over all the states that have
// been marked in [alreadyOn].
// We use [currentStates] to represent the current state set of the NFA, at
// first, the current state set will be the ε-closure of the [StartState], so
// we add them into the [currentStates]
// At each iteration, we visit and mark every state s in the [currentStates]
// and the ε-closure(move(s, char)) of s, we add them to [newStates] because
// their may be other states in [currentStates], and we need to handle them
// separately
// At the end of the iteration, [newStates] contains all the states that the
// NFA possibly at after reading a letter, so we add all the elements in the
// [newStates] back to [currentStates], and clear the [alreadyOn]
//
// By using this approach, instead of build a full transition table like what
// we did when converting the NFA to DFA, we simulate the execution WHILE we
// building the DFA, this is more efficient that the two algorithms above
//
// Since every state can be visited at most once, the complexity of AddState
// has been reduced from O(mn) to O(n), that's a huge improvement considering
// arbitrarily many transitions
public bool RunDp(string str)
{
// the current state set after reading a particular letter
// before each iteration, this stack hold the possible states set
// of the last iteration
var currentStates = new Stack<int>();
// this stack hold those states during an iteration
// at the end of iteration, its content will be copy into the
// currentStates
var newStates = new Stack<int>();
// dynamic programming table, jump over those states that have been
// visited
var alreadyOn = new bool[States.Max() + 1]; // this is not a good idea, dictionary may be better
// default to false
Array.Fill(alreadyOn, false);
// adds a state and its ε-closure to the [newStates]
void AddState(int s)
{
newStates.Push(s);
alreadyOn[s] = true;
if (TransitionTable.TryGetValue((s, Epsilon), out var epsilonStates))
{
foreach (var epsilonState in epsilonStates)
{
if (!alreadyOn[epsilonState])
{
AddState(epsilonState);
}
}
}
}
// find the ε-closure(move(currentStates, c)), and add all of them to the [newStates]
void AlterStack(int c)
{
while (currentStates.Any())
{
var oldState = currentStates.Pop();
if (TransitionTable.TryGetValue((oldState, c), out var oldStatesClosure))
{
foreach (var state in oldStatesClosure)
{
if (!alreadyOn[state])
{
AddState(state);
}
}
}
}
if (!newStates.Any())
{
throw SimulationFailedException.NoTransitionFound("", c);
}
// add [newStates] into [currentStates] and clear the [alreadyOn]
while (newStates.Any())
{
var newState = newStates.Pop();
currentStates.Push(newState);
alreadyOn[newState] = false;
}
}
// initialize the simulation by adding the start state
AddState(StartState);
foreach (var c in str)
{
AlterStack(c);
}
return currentStates.Intersect(AcceptStates).Any(); // check if the result and the AcceptStates are joint
}
}
}
using System;
using System.Collections;
using System.Collections.Generic;
namespace CompilerExercise
{
public class Program
{
public static readonly int Epsilon = -1;
public static void Main(string[] args)
{
var nfa = new NonDeterministicFiniteAutomaton(new[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }, 0, new[] { 10 });
nfa.AddTransition(0, Epsilon, new[] { 1, 7 });
nfa.AddTransition(1, Epsilon, new[] { 2, 4 });
nfa.AddTransition(2, 'a', new[] { 3 });
nfa.AddTransition(4, 'b', new[] { 5 });
nfa.AddTransition(3, Epsilon, new[] { 6 });
nfa.AddTransition(5, Epsilon, new[] { 6 });
nfa.AddTransition(6, Epsilon, new[] { 1, 7 });
nfa.AddTransition(7, 'a', new[] { 8 });
nfa.AddTransition(8, 'b', new[] { 9 });
nfa.AddTransition(9, 'c', new[] { 10 });
nfa.AddTransition(10, 'c', new[] { 10 });
var singleB = new NonDeterministicFiniteAutomaton(new[] { 0, 1 }, 0, new[] { 1 });
singleB.AddTransition(0, 'b', new[] { 0, 1 });
// var dfa = nfa.Dfa();
// Console.WriteLine(dfa.Run("abbababababababababbbaaaaaab"));
// Console.WriteLine(nfa.Run("abbababababababababbbaaaaaab"));
// Console.WriteLine(nfa.RunDp("abbababababababababbbaaaaaab"));
Console.WriteLine(nfa.Concat(singleB).RunDp("aaabcccccccccccccccabbbbbba"));
}
}
}
using System;
using System.Runtime.Serialization;
namespace CompilerExercise
{
public class SimulationFailedException : Exception
{
public SimulationFailedException()
{
}
protected SimulationFailedException(SerializationInfo info, StreamingContext context) : base(info, context)
{
}
public SimulationFailedException(string message) : base(message)
{
}
public SimulationFailedException(string message, Exception innerException) : base(message, innerException)
{
}
public static Exception NoTransitionFound(string state, int c)
{
return new SimulationFailedException($"No transition found for {(char) c} at state {state}");
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment