Skip to content

Instantly share code, notes, and snippets.

@avonwyss
Created December 16, 2022 23:12
Show Gist options
  • Save avonwyss/9bce82723de856ad4d0263caf1116826 to your computer and use it in GitHub Desktop.
Save avonwyss/9bce82723de856ad4d0263caf1116826 to your computer and use it in GitHub Desktop.
Knuth-Morris-Pratt Pattern Matching
namespace bsn.Har.Multipart {
public interface IKmpMatcher<in T> {
void Reset(int patternPosition = 0);
bool Next(T value);
int Index {
get;
}
}
}
using System;
using System.Collections.Generic;
using System.Linq;
namespace bsn.Har.Multipart {
// Knuth-Morris-Pratt algorithm
public class KmpPattern<T> {
private readonly T[] pattern;
private readonly int[] table;
private readonly IEqualityComparer<T> comparer;
public KmpPattern(IReadOnlyList<T> pattern, IEqualityComparer<T> comparer = null) {
this.comparer = comparer ?? EqualityComparer<T>.Default;
this.pattern = pattern.ToArray();
if (this.pattern.Length > 1) {
table = new int[this.pattern.Length];
table[0] = -1; // not used
for (int i=0, j=-1, end=this.pattern.Length-1; i<end;) {
if (j < 0 || this.comparer.Equals(this.pattern[i], this.pattern[j])) {
table[++i] = ++j;
} else {
j = table[j];
}
}
}
}
public IReadOnlyList<T> Pattern => pattern;
public IReadOnlyList<int> Table => table;
public IEqualityComparer<T> Comparer => comparer;
public int Length => pattern.Length;
public IKmpMatcher<T> CreateMatcher() {
return pattern.Length == 1
? (IKmpMatcher<T>)new KmpSingleMatcher<T>(pattern[0], comparer)
: new KmpTableMatcher<T>(pattern, table, comparer);
}
}
}
using System;
using System.Collections.Generic;
using Xunit;
using Xunit.Abstractions;
namespace bsn.Har.Multipart {
public class KmpPatternTest {
private readonly ITestOutputHelper output;
public KmpPatternTest(ITestOutputHelper output) {
this.output = output;
}
[Theory]
[InlineData("aa", new[] { -1, 0 })]
[InlineData("ab", new[] { -1, 0 })]
[InlineData("aaa", new[] { -1, 0, 1 })]
[InlineData("aba", new[] { -1, 0, 0 })]
[InlineData("abcabx", new[] { -1, 0, 0, 0, 1, 2 })]
[InlineData("abcdex", new[] { -1, 0, 0, 0, 0, 0 })]
[InlineData("ababaaaba", new[] { -1, 0, 0, 1, 2, 3, 1, 1, 2 })]
[InlineData("aaaaaaaab", new[] { -1, 0, 1, 2, 3, 4, 5, 6, 7 })]
public void TableCreation(string pattern, int[] expected) {
var actual = new KmpPattern<char>(pattern.ToCharArray()).Table;
output.WriteLine("Expected:\t{0}", string.Join(", ", expected));
output.WriteLine("Actual:\t{0}", string.Join(", ", actual));
Assert.Equal(expected, actual);
}
[Theory]
[InlineData("a", "xyz", new int[] { })]
[InlineData("a", "axyz", new[] { 0 })]
[InlineData("a", "xyza", new[] { 3 })]
[InlineData("a", "axyza", new[] { 0, 4 })]
[InlineData("a", "xayaz", new[] { 1, 3 })]
[InlineData("ab", "xyz", new int[] { })]
[InlineData("ab", "abxyz", new[] { 0 })]
[InlineData("ab", "xyzab", new[] { 3 })]
[InlineData("ab", "abxayazab", new[] { 0, 7 })]
[InlineData("ab", "axabyabzb", new[] { 2, 5 })]
[InlineData("aa", "xyz", new int[] { })]
[InlineData("aa", "aaxyz", new[] { 0 })]
[InlineData("aa", "xayzaa", new[] { 4 })]
[InlineData("aa", "aaaxayazaaa", new[] { 0, 8 })]
[InlineData("aa", "axaaayaaza", new[] { 2, 6 })]
[InlineData("aa", "aaaa", new[] { 0, 2 })]
[InlineData("abc", "ababc", new[] { 2 })]
public void Matching(string pattern, string input, int[] expected) {
var matcher = new KmpPattern<char>(pattern.ToCharArray()).CreateMatcher();
var actual = new List<int>();
foreach (var ch in input) {
if (matcher.Next(ch)) {
actual.Add(matcher.Index);
}
}
Assert.Equal(expected, actual);
}
}
}
using System;
using System.Collections.Generic;
using System.Runtime.CompilerServices;
namespace bsn.Har.Multipart {
internal class KmpSingleMatcher<T>: IKmpMatcher<T> {
private int i;
private readonly T pattern;
private readonly IEqualityComparer<T> comparer;
public KmpSingleMatcher(T pattern, IEqualityComparer<T> comparer) {
this.pattern = pattern;
this.comparer = comparer;
Reset();
}
public void Reset(int patternPosition = 0) {
if (patternPosition != 0) {
throw new ArgumentOutOfRangeException(nameof(patternPosition));
}
i = 0;
Index = -1;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public bool Next(T value) {
if (comparer.Equals(value, pattern)) {
Index = i++;
return true;
}
i++;
return false;
}
public int Index {
get;
private set;
}
}
}
using System;
using System.Collections.Generic;
namespace bsn.Har.Multipart {
public class KmpTableMatcher<T>: IKmpMatcher<T> {
private int j;
private int startIndex;
private readonly T[] pattern;
private readonly int[] table;
private readonly IEqualityComparer<T> comparer;
public KmpTableMatcher(T[] pattern, int[] table, IEqualityComparer<T> comparer) {
this.pattern = pattern;
this.table = table;
this.comparer = comparer;
Reset();
}
public void Reset(int patternPosition = 0) {
if (patternPosition < 0 || patternPosition >= pattern.Length) {
throw new ArgumentOutOfRangeException(nameof(patternPosition));
}
j = patternPosition;
startIndex = 0;
Index = -1;
}
public bool Next(T value) {
if (comparer.Equals(pattern[j], value)) {
if (++j == pattern.Length) {
Index = startIndex;
startIndex += j;
j = 0;
return true;
}
return false;
}
if (j == 0) {
startIndex++;
return false;
}
startIndex += j;
j = table[j];
return Next(value); // tail-call
}
public int Index {
get;
private set;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment