Created
December 16, 2022 23:12
-
-
Save avonwyss/9bce82723de856ad4d0263caf1116826 to your computer and use it in GitHub Desktop.
Knuth-Morris-Pratt Pattern Matching
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
namespace bsn.Har.Multipart { | |
public interface IKmpMatcher<in T> { | |
void Reset(int patternPosition = 0); | |
bool Next(T value); | |
int Index { | |
get; | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Linq; | |
namespace bsn.Har.Multipart { | |
// Knuth-Morris-Pratt algorithm | |
public class KmpPattern<T> { | |
private readonly T[] pattern; | |
private readonly int[] table; | |
private readonly IEqualityComparer<T> comparer; | |
public KmpPattern(IReadOnlyList<T> pattern, IEqualityComparer<T> comparer = null) { | |
this.comparer = comparer ?? EqualityComparer<T>.Default; | |
this.pattern = pattern.ToArray(); | |
if (this.pattern.Length > 1) { | |
table = new int[this.pattern.Length]; | |
table[0] = -1; // not used | |
for (int i=0, j=-1, end=this.pattern.Length-1; i<end;) { | |
if (j < 0 || this.comparer.Equals(this.pattern[i], this.pattern[j])) { | |
table[++i] = ++j; | |
} else { | |
j = table[j]; | |
} | |
} | |
} | |
} | |
public IReadOnlyList<T> Pattern => pattern; | |
public IReadOnlyList<int> Table => table; | |
public IEqualityComparer<T> Comparer => comparer; | |
public int Length => pattern.Length; | |
public IKmpMatcher<T> CreateMatcher() { | |
return pattern.Length == 1 | |
? (IKmpMatcher<T>)new KmpSingleMatcher<T>(pattern[0], comparer) | |
: new KmpTableMatcher<T>(pattern, table, comparer); | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using Xunit; | |
using Xunit.Abstractions; | |
namespace bsn.Har.Multipart { | |
public class KmpPatternTest { | |
private readonly ITestOutputHelper output; | |
public KmpPatternTest(ITestOutputHelper output) { | |
this.output = output; | |
} | |
[Theory] | |
[InlineData("aa", new[] { -1, 0 })] | |
[InlineData("ab", new[] { -1, 0 })] | |
[InlineData("aaa", new[] { -1, 0, 1 })] | |
[InlineData("aba", new[] { -1, 0, 0 })] | |
[InlineData("abcabx", new[] { -1, 0, 0, 0, 1, 2 })] | |
[InlineData("abcdex", new[] { -1, 0, 0, 0, 0, 0 })] | |
[InlineData("ababaaaba", new[] { -1, 0, 0, 1, 2, 3, 1, 1, 2 })] | |
[InlineData("aaaaaaaab", new[] { -1, 0, 1, 2, 3, 4, 5, 6, 7 })] | |
public void TableCreation(string pattern, int[] expected) { | |
var actual = new KmpPattern<char>(pattern.ToCharArray()).Table; | |
output.WriteLine("Expected:\t{0}", string.Join(", ", expected)); | |
output.WriteLine("Actual:\t{0}", string.Join(", ", actual)); | |
Assert.Equal(expected, actual); | |
} | |
[Theory] | |
[InlineData("a", "xyz", new int[] { })] | |
[InlineData("a", "axyz", new[] { 0 })] | |
[InlineData("a", "xyza", new[] { 3 })] | |
[InlineData("a", "axyza", new[] { 0, 4 })] | |
[InlineData("a", "xayaz", new[] { 1, 3 })] | |
[InlineData("ab", "xyz", new int[] { })] | |
[InlineData("ab", "abxyz", new[] { 0 })] | |
[InlineData("ab", "xyzab", new[] { 3 })] | |
[InlineData("ab", "abxayazab", new[] { 0, 7 })] | |
[InlineData("ab", "axabyabzb", new[] { 2, 5 })] | |
[InlineData("aa", "xyz", new int[] { })] | |
[InlineData("aa", "aaxyz", new[] { 0 })] | |
[InlineData("aa", "xayzaa", new[] { 4 })] | |
[InlineData("aa", "aaaxayazaaa", new[] { 0, 8 })] | |
[InlineData("aa", "axaaayaaza", new[] { 2, 6 })] | |
[InlineData("aa", "aaaa", new[] { 0, 2 })] | |
[InlineData("abc", "ababc", new[] { 2 })] | |
public void Matching(string pattern, string input, int[] expected) { | |
var matcher = new KmpPattern<char>(pattern.ToCharArray()).CreateMatcher(); | |
var actual = new List<int>(); | |
foreach (var ch in input) { | |
if (matcher.Next(ch)) { | |
actual.Add(matcher.Index); | |
} | |
} | |
Assert.Equal(expected, actual); | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Runtime.CompilerServices; | |
namespace bsn.Har.Multipart { | |
internal class KmpSingleMatcher<T>: IKmpMatcher<T> { | |
private int i; | |
private readonly T pattern; | |
private readonly IEqualityComparer<T> comparer; | |
public KmpSingleMatcher(T pattern, IEqualityComparer<T> comparer) { | |
this.pattern = pattern; | |
this.comparer = comparer; | |
Reset(); | |
} | |
public void Reset(int patternPosition = 0) { | |
if (patternPosition != 0) { | |
throw new ArgumentOutOfRangeException(nameof(patternPosition)); | |
} | |
i = 0; | |
Index = -1; | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
public bool Next(T value) { | |
if (comparer.Equals(value, pattern)) { | |
Index = i++; | |
return true; | |
} | |
i++; | |
return false; | |
} | |
public int Index { | |
get; | |
private set; | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
namespace bsn.Har.Multipart { | |
public class KmpTableMatcher<T>: IKmpMatcher<T> { | |
private int j; | |
private int startIndex; | |
private readonly T[] pattern; | |
private readonly int[] table; | |
private readonly IEqualityComparer<T> comparer; | |
public KmpTableMatcher(T[] pattern, int[] table, IEqualityComparer<T> comparer) { | |
this.pattern = pattern; | |
this.table = table; | |
this.comparer = comparer; | |
Reset(); | |
} | |
public void Reset(int patternPosition = 0) { | |
if (patternPosition < 0 || patternPosition >= pattern.Length) { | |
throw new ArgumentOutOfRangeException(nameof(patternPosition)); | |
} | |
j = patternPosition; | |
startIndex = 0; | |
Index = -1; | |
} | |
public bool Next(T value) { | |
if (comparer.Equals(pattern[j], value)) { | |
if (++j == pattern.Length) { | |
Index = startIndex; | |
startIndex += j; | |
j = 0; | |
return true; | |
} | |
return false; | |
} | |
if (j == 0) { | |
startIndex++; | |
return false; | |
} | |
startIndex += j; | |
j = table[j]; | |
return Next(value); // tail-call | |
} | |
public int Index { | |
get; | |
private set; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment