Last active
May 29, 2021 14:21
-
-
Save dylech30th/be51c8b33711d1a8c5daeb3858dc416a to your computer and use it in GitHub Desktop.
Dragon book chapter 3 code
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System.Collections; | |
using System.Collections.Generic; | |
namespace CompilerExercise | |
{ | |
public static class Collections | |
{ | |
public static int IntArrayHash(this int[] array) | |
{ | |
return StructuralComparisons.StructuralEqualityComparer.GetHashCode(array); | |
} | |
public static V GetOrDefault<K, V>(this IDictionary<K, V> dictionary, K key) | |
{ | |
return dictionary.TryGetValue(key, out var value) ? value : default; | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Linq; | |
namespace CompilerExercise | |
{ | |
public class DeterministicFiniteAutomaton | |
{ | |
public static readonly int Epsilon = -1; | |
public int[] States { get; set; } | |
public int[] AcceptStates { get; set; } | |
public int StartState { get; set; } | |
public IDictionary<(int current, int ch), int> TransitionTable; | |
private int currentState; | |
public DeterministicFiniteAutomaton(int[] states, int startState, int[] acceptStates) | |
{ | |
States = states; | |
StartState = startState; | |
currentState = startState; | |
AcceptStates = acceptStates; | |
TransitionTable = new Dictionary<(int current, int ch), int>(); | |
} | |
public void AddTransition(int ch, int current, int dest) | |
{ | |
TransitionTable[(current, ch)] = dest; | |
} | |
public void Advance(int ch) | |
{ | |
currentState = TransitionTable[(currentState, ch)]; | |
} | |
public bool Run(string str) | |
{ | |
foreach (var c in str) | |
{ | |
Advance(c); | |
} | |
return Success(); | |
} | |
public bool Success() | |
{ | |
return AcceptStates.Contains(currentState); | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
namespace CompilerExercise | |
{ | |
public class Functions | |
{ | |
public static Func<T, T> Identity<T>() => i => i; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Linq; | |
namespace CompilerExercise | |
{ | |
public class NonDeterministicFiniteAutomaton | |
{ | |
public static readonly int Epsilon = -1; | |
public int[] States { get; set; } | |
public int[] AcceptStates { get; set; } | |
public int StartState { get; set; } | |
public IDictionary<(int current, int ch), int[]> TransitionTable; | |
public static NonDeterministicFiniteAutomaton EpsilonNfa() | |
{ | |
var nfa = new NonDeterministicFiniteAutomaton(new[] { 0, 1 }, 0, new[] { 1 }); | |
nfa.AddTransition(0, Epsilon, new[] { 1 }); | |
return nfa; | |
} | |
private static void IncreaseStates(NonDeterministicFiniteAutomaton nfa, int incremental) | |
{ | |
nfa.States = nfa.States.Select(i => i + incremental).ToArray(); | |
var oldTable = nfa.TransitionTable; | |
var newTable = new Dictionary<(int current, int ch), int[]>(); | |
foreach (var ((current, ch), to) in oldTable) | |
{ | |
newTable[(current + incremental, ch)] = to.Select(i => i + incremental).ToArray(); | |
} | |
nfa.TransitionTable = newTable; | |
nfa.AcceptStates = nfa.AcceptStates.Select(i => i + incremental).ToArray(); | |
nfa.StartState += incremental; | |
} | |
private static int GenerateUniqueMaxState(IEnumerable<NonDeterministicFiniteAutomaton> nfaEnumerable) | |
{ | |
return nfaEnumerable.Select(i => i.States).SelectMany(Functions.Identity<int[]>()).Max() + 1; | |
} | |
private static int GenerateUniqueMinState(IEnumerable<NonDeterministicFiniteAutomaton> nfaEnumerable) | |
{ | |
return nfaEnumerable.Select(i => i.States).SelectMany(Functions.Identity<int[]>()).Min() - 1; | |
} | |
private static void DistinctStates(NonDeterministicFiniteAutomaton nfa1, NonDeterministicFiniteAutomaton nfa2) | |
{ | |
if (nfa2.States.Intersect(nfa1.States).Any()) | |
{ | |
IncreaseStates(nfa2, nfa1.States.Max() + 1); | |
} | |
} | |
public NonDeterministicFiniteAutomaton Union(NonDeterministicFiniteAutomaton another) | |
{ | |
DistinctStates(this, another); | |
var start = GenerateUniqueMaxState(new[] { this, another }); | |
var accept = start + 1; | |
var stateList = new List<int>(States); | |
stateList.AddRange(another.States); | |
stateList.Add(start); | |
stateList.Add(accept); | |
var nfa = new NonDeterministicFiniteAutomaton(stateList.ToArray(), start, new[] { accept }); | |
// add ε-transition to the original start transitions | |
nfa.AddTransition(start, Epsilon, new[] { StartState, another.StartState }); | |
// preserve all other transitions | |
foreach (var ((current, ch), v) in TransitionTable) | |
{ | |
nfa.AddTransition(current, ch, v); | |
} | |
foreach (var ((current, ch), v) in another.TransitionTable) | |
{ | |
nfa.AddTransition(current, ch, v); | |
} | |
// add ε-transitions from the original accepts to new accept | |
foreach (var acceptState in AcceptStates) | |
{ | |
// preserve all the other transitions coming out of acceptState | |
var trans = TransitionTable.TryGetValue((acceptState, Epsilon), out var ts) ? ts : Array.Empty<int>(); | |
nfa.AddTransition(acceptState, Epsilon, new HashSet<int>(trans) { accept }.ToArray()); | |
} | |
foreach (var anotherAcceptState in another.AcceptStates) | |
{ | |
// preserve all the other transitions coming out of acceptState | |
var trans = another.TransitionTable.TryGetValue((anotherAcceptState, Epsilon), out var ts) ? ts : Array.Empty<int>(); | |
nfa.AddTransition(anotherAcceptState, Epsilon, new HashSet<int>(trans) { accept }.ToArray()); | |
} | |
return nfa; | |
} | |
public NonDeterministicFiniteAutomaton Concat(NonDeterministicFiniteAutomaton another) | |
{ | |
DistinctStates(this, another); | |
var stateList = new List<int>(States); | |
stateList.AddRange(another.States); | |
var nfa = new NonDeterministicFiniteAutomaton(stateList.ToArray(), StartState, another.AcceptStates); | |
// Copy transitions | |
foreach (var ((current, ch), to) in TransitionTable) | |
{ | |
nfa.AddTransition(current, ch, to); | |
} | |
foreach (var ((current, ch), to) in another.TransitionTable) | |
{ | |
nfa.AddTransition(current, ch, to); | |
} | |
// Replace the accept states transitions | |
foreach (var acceptState in AcceptStates) | |
{ | |
// preserve all the other transitions coming out of acceptState | |
// the using of ε-transition is tricky, we don't need to consider the loop on the start state | |
var trans = TransitionTable.TryGetValue((acceptState, Epsilon), out var ts) ? ts : Array.Empty<int>(); | |
nfa.AddTransition(acceptState, Epsilon, new HashSet<int>(trans) { another.StartState }.ToArray()); | |
} | |
return nfa; | |
} | |
public NonDeterministicFiniteAutomaton KleeneClosure() | |
{ | |
var start = GenerateUniqueMaxState(new[] { this }); | |
var accept = start + 1; | |
var states = new List<int> | |
{ | |
start, | |
accept | |
}; | |
states.AddRange(States); | |
var nfa = new NonDeterministicFiniteAutomaton(states.ToArray(), StartState, new[] { accept }); | |
nfa.AddTransition(start, Epsilon, new[] { StartState, accept }); | |
foreach (var ((current, ch), to) in TransitionTable) | |
{ | |
nfa.AddTransition(current, ch, to); | |
} | |
foreach (var acceptState in AcceptStates) | |
{ | |
var trans = TransitionTable.TryGetValue((acceptState, Epsilon), out var ts) ? ts : Array.Empty<int>(); | |
nfa.AddTransition(acceptState, Epsilon, new HashSet<int>(trans) { StartState, accept }.ToArray()); | |
} | |
return nfa; | |
} | |
public NonDeterministicFiniteAutomaton(int[] states, int startState, int[] acceptStates) | |
{ | |
States = states; | |
StartState = startState; | |
AcceptStates = acceptStates; | |
TransitionTable = new Dictionary<(int current, int ch), int[]>(); | |
} | |
public void AddTransition(int current, int ch, int[] dest) | |
{ | |
TransitionTable[(current, ch)] = dest; | |
} | |
public bool RemoveTransition(int current, int ch) | |
{ | |
return TransitionTable.Remove((current, ch)); | |
} | |
public int[] Alphabet() | |
{ | |
return TransitionTable.Keys.Select(i => i.ch).Where(i => i != Epsilon).Distinct().ToArray(); | |
} | |
public int[] EpsilonClosure(int[] s) | |
{ | |
return s.Select(EpsilonClosure).SelectMany(Functions.Identity<int[]>()).Distinct().ToArray(); | |
} | |
public int[] EpsilonClosure(int s) | |
{ | |
// Get all the ε-transition-states of state s | |
if (TransitionTable.TryGetValue((s, Epsilon), out var entry)) | |
{ | |
// Use stack to check each of the entries recursively, which is all the ε-transition-states of s | |
var stack = new Stack<int>(entry); | |
// All of the elements in the entry must be reached from s by ε-transitions, so we can add them | |
// into the result without hesitation | |
// s itself is included, since s can reach itself by using an ε-edge, although it may not be pointed out | |
var result = new List<int>(entry) { s }; | |
// Find every ε-transition-state of every element presented in the stack | |
while (stack.Any()) | |
{ | |
var state = stack.Pop(); | |
if (TransitionTable.TryGetValue((state, Epsilon), out var nextEntry)) | |
{ | |
foreach (var i in nextEntry) | |
{ | |
// If you don't do this, It is possible that two nodes have ε-transitions | |
// point to each other, in such cases the stack will never be empty | |
// so make sure that the state i hasn't been visited before | |
if (!result.Contains(i)) | |
{ | |
// A ε-transition-state i of a particular state "state" in the stack is | |
// found, and it hasn't been visited before, so we add it to the result | |
result.Add(i); | |
// and push it onto the stack, because we also need to find all the | |
// ε-transition-states of state i, too | |
stack.Push(i); | |
} | |
} | |
} | |
} | |
return result.ToArray(); | |
} | |
// the returned value is meaningful if and only if there do exist some ε-transition-states | |
// for state s, other wise return an empty array denoting "nothing found" | |
return new[] { s }; | |
} | |
public int[] Move(int[] states, int c) | |
{ | |
return states.Select(s => TransitionTable.TryGetValue((s, c), out var sTrans) ? sTrans : Array.Empty<int>()) | |
.SelectMany(Functions.Identity<int[]>()).Distinct().ToArray(); | |
} | |
// Represents an intermediate state during the translation | |
// This class will be a state of a DFA, which semantically | |
// equals to a set of states of the NFA | |
private class MarkedState | |
{ | |
// Is this state being marked? | |
public bool Marked; | |
// The corresponding NFA states | |
public readonly int[] NfaStates; | |
// Performance consideration | |
public readonly int HashCode; | |
public bool IsAccept; | |
public int Id; | |
public MarkedState(int[] nfaStates) | |
{ | |
Marked = false; | |
NfaStates = nfaStates; | |
HashCode = NfaStates.IntArrayHash(); | |
} | |
public void Mark() => Marked = true; | |
} | |
// Every state in the DFA corresponds to a set of states in the NFA | |
// see https://sora.ink/archives/906 | |
// This is a slightly optimized C# implementation of the subset construction | |
// algorithm from dragon book 3-32 | |
// It uses an array hashcode to reduces the state-comparison time, instead | |
// of performs an array search each time | |
public DeterministicFiniteAutomaton Dfa() | |
{ | |
// The intermediate state dictionary | |
var transitions = new Dictionary<(MarkedState current, int ch), MarkedState>(); | |
// Get the state representing ε-closure(s0) | |
var startStateClosure = EpsilonClosure(StartState); | |
// Add it to the state list | |
var states = new List<MarkedState> | |
{ | |
new(startStateClosure) | |
{ | |
Id = 0, | |
IsAccept = startStateClosure.Any(AcceptStates.Contains) | |
} | |
}; | |
var alphabet = Alphabet(); | |
MarkedState markedState; | |
var id = 1; | |
// Iteratively process all the states that haven't been marked yet | |
// There is an alternate approach is to use two data holder, one for | |
// the "unmarked" and one for the "marked", but the overall space and | |
// time complexities are almost the same | |
// If a state is "unmarked", it means that we haven't search its ε-closure | |
// yet, since for every state we must find its ε-closure according to the | |
// algorithm, so this step is necessary, but like I said, two data holders | |
// can avoid the using of "marker" flag, it is only useful when you are mixing | |
// the searched and un-searched states | |
while ((markedState = states.FirstOrDefault(x => !x.Marked)) != null) | |
{ | |
// into the body of while loop means that we're about to search the ε-closure | |
// of markedState, so we mark it | |
markedState.Mark(); | |
// for every letter in the alphabet(we need to calculate all the possibilities | |
// of transitions) | |
foreach (var i in alphabet) | |
{ | |
// search the ε-closure of current state, which is markedState | |
var closureState = new MarkedState(EpsilonClosure(Move(markedState.NfaStates, i))); | |
// see if the state that represents its ε-closure is already presented in | |
// the dictionary by checking the array, the HashCode above comes in handy, | |
// we don't need to use a exponential time algorithm to compare if two array's | |
// content is equal, just compare the HashCode property in O(1) time | |
var presence = states.FirstOrDefault(x => x.HashCode == closureState.HashCode); | |
if (presence == null) | |
{ | |
// if the state is not already exist, we create a new one | |
closureState.Id = id++; | |
if (closureState.NfaStates.Any(AcceptStates.Contains)) | |
{ | |
closureState.IsAccept = true; | |
} | |
// Add it to the states, and stay unmarked because we haven't | |
// start searching its ε-closure yet | |
states.Add(closureState); | |
// let the presence(which must be null now) be closureState(which is our initialized state) | |
presence = closureState; | |
} | |
// Set the transition rule, we set presence to closureState if it's null | |
// so it must be non-null here | |
transitions[(markedState, i)] = presence; | |
} | |
} | |
// Turn the MarkedState-represented states and transitions into integer-represented states and transitions | |
// Like I said, the former one is an intermediate approach | |
var dfaStates = states.Select(i => i.Id).ToArray(); | |
var dfaAcceptStates = states.Where(i => i.IsAccept).Select(i => i.Id).ToArray(); | |
return new DeterministicFiniteAutomaton(dfaStates, 0, dfaAcceptStates) | |
{ | |
TransitionTable = transitions.ToDictionary(k => | |
{ | |
var ((current, ch), _) = k; | |
return (current.Id, ch); | |
}, k => k.Value.Id) | |
}; | |
} | |
// This is the C# implementation of the NFA simulation algorithm | |
// introduced by dragon book 3-37 | |
// The essential idea is to acquire all the possibilities of state after one | |
// move, and check if the last move's state set is joint with the accept set | |
public bool Run(string str) | |
{ | |
var result = str.Aggregate(EpsilonClosure(StartState), (current, c) => EpsilonClosure(Move(current, c))); | |
return result.Intersect(AcceptStates).Any(); | |
} | |
// This is the C# implementation of the efficient NFA simulation algorithm | |
// using a dynamic programming approach from dragon book 3-37, it uses an | |
// array [alreadyOn] to store those states who have been visited along with | |
// their closure. More precisely, it is an optimized version of the algorithm | |
// above | |
// | |
// During the traversal of the closure of a particular state, there may be | |
// one or more states that are visited more than once, as we know, for every | |
// state visited we must visit its ε-closure, too, and for a NFA with n | |
// states and m transitions, the ε-closure algorithm's time complexity can up | |
// to O(n), so if we search every state at each step, it may costs O(mn) time | |
// in total, which is inefficient, so we use a [alreadyOn] to hold the states | |
// that have been visited, note: if a state has been visited, then all of its | |
// ε-closure must also have been visited, we jump over all the states that have | |
// been marked in [alreadyOn]. | |
// We use [currentStates] to represent the current state set of the NFA, at | |
// first, the current state set will be the ε-closure of the [StartState], so | |
// we add them into the [currentStates] | |
// At each iteration, we visit and mark every state s in the [currentStates] | |
// and the ε-closure(move(s, char)) of s, we add them to [newStates] because | |
// their may be other states in [currentStates], and we need to handle them | |
// separately | |
// At the end of the iteration, [newStates] contains all the states that the | |
// NFA possibly at after reading a letter, so we add all the elements in the | |
// [newStates] back to [currentStates], and clear the [alreadyOn] | |
// | |
// By using this approach, instead of build a full transition table like what | |
// we did when converting the NFA to DFA, we simulate the execution WHILE we | |
// building the DFA, this is more efficient that the two algorithms above | |
// | |
// Since every state can be visited at most once, the complexity of AddState | |
// has been reduced from O(mn) to O(n), that's a huge improvement considering | |
// arbitrarily many transitions | |
public bool RunDp(string str) | |
{ | |
// the current state set after reading a particular letter | |
// before each iteration, this stack hold the possible states set | |
// of the last iteration | |
var currentStates = new Stack<int>(); | |
// this stack hold those states during an iteration | |
// at the end of iteration, its content will be copy into the | |
// currentStates | |
var newStates = new Stack<int>(); | |
// dynamic programming table, jump over those states that have been | |
// visited | |
var alreadyOn = new bool[States.Max() + 1]; // this is not a good idea, dictionary may be better | |
// default to false | |
Array.Fill(alreadyOn, false); | |
// adds a state and its ε-closure to the [newStates] | |
void AddState(int s) | |
{ | |
newStates.Push(s); | |
alreadyOn[s] = true; | |
if (TransitionTable.TryGetValue((s, Epsilon), out var epsilonStates)) | |
{ | |
foreach (var epsilonState in epsilonStates) | |
{ | |
if (!alreadyOn[epsilonState]) | |
{ | |
AddState(epsilonState); | |
} | |
} | |
} | |
} | |
// find the ε-closure(move(currentStates, c)), and add all of them to the [newStates] | |
void AlterStack(int c) | |
{ | |
while (currentStates.Any()) | |
{ | |
var oldState = currentStates.Pop(); | |
if (TransitionTable.TryGetValue((oldState, c), out var oldStatesClosure)) | |
{ | |
foreach (var state in oldStatesClosure) | |
{ | |
if (!alreadyOn[state]) | |
{ | |
AddState(state); | |
} | |
} | |
} | |
} | |
if (!newStates.Any()) | |
{ | |
throw SimulationFailedException.NoTransitionFound("", c); | |
} | |
// add [newStates] into [currentStates] and clear the [alreadyOn] | |
while (newStates.Any()) | |
{ | |
var newState = newStates.Pop(); | |
currentStates.Push(newState); | |
alreadyOn[newState] = false; | |
} | |
} | |
// initialize the simulation by adding the start state | |
AddState(StartState); | |
foreach (var c in str) | |
{ | |
AlterStack(c); | |
} | |
return currentStates.Intersect(AcceptStates).Any(); // check if the result and the AcceptStates are joint | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections; | |
using System.Collections.Generic; | |
namespace CompilerExercise | |
{ | |
public class Program | |
{ | |
public static readonly int Epsilon = -1; | |
public static void Main(string[] args) | |
{ | |
var nfa = new NonDeterministicFiniteAutomaton(new[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }, 0, new[] { 10 }); | |
nfa.AddTransition(0, Epsilon, new[] { 1, 7 }); | |
nfa.AddTransition(1, Epsilon, new[] { 2, 4 }); | |
nfa.AddTransition(2, 'a', new[] { 3 }); | |
nfa.AddTransition(4, 'b', new[] { 5 }); | |
nfa.AddTransition(3, Epsilon, new[] { 6 }); | |
nfa.AddTransition(5, Epsilon, new[] { 6 }); | |
nfa.AddTransition(6, Epsilon, new[] { 1, 7 }); | |
nfa.AddTransition(7, 'a', new[] { 8 }); | |
nfa.AddTransition(8, 'b', new[] { 9 }); | |
nfa.AddTransition(9, 'c', new[] { 10 }); | |
nfa.AddTransition(10, 'c', new[] { 10 }); | |
var singleB = new NonDeterministicFiniteAutomaton(new[] { 0, 1 }, 0, new[] { 1 }); | |
singleB.AddTransition(0, 'b', new[] { 0, 1 }); | |
// var dfa = nfa.Dfa(); | |
// Console.WriteLine(dfa.Run("abbababababababababbbaaaaaab")); | |
// Console.WriteLine(nfa.Run("abbababababababababbbaaaaaab")); | |
// Console.WriteLine(nfa.RunDp("abbababababababababbbaaaaaab")); | |
Console.WriteLine(nfa.Concat(singleB).RunDp("aaabcccccccccccccccabbbbbba")); | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Runtime.Serialization; | |
namespace CompilerExercise | |
{ | |
public class SimulationFailedException : Exception | |
{ | |
public SimulationFailedException() | |
{ | |
} | |
protected SimulationFailedException(SerializationInfo info, StreamingContext context) : base(info, context) | |
{ | |
} | |
public SimulationFailedException(string message) : base(message) | |
{ | |
} | |
public SimulationFailedException(string message, Exception innerException) : base(message, innerException) | |
{ | |
} | |
public static Exception NoTransitionFound(string state, int c) | |
{ | |
return new SimulationFailedException($"No transition found for {(char) c} at state {state}"); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment