Skip to content

Instantly share code, notes, and snippets.

@mythagel
Created January 17, 2020 07:41
Show Gist options
  • Save mythagel/f87ea955f9ae891ac7225a8cf37b4e34 to your computer and use it in GitHub Desktop.
Save mythagel/f87ea955f9ae891ac7225a8cf37b4e34 to your computer and use it in GitHub Desktop.
// (c) 2020 Nicholas Gill GPLv3
#include <cstdio>
#include <cstdint>
#include <cstring>
template <typename CharT, unsigned N>
class KMPStream
{
public:
bool begin(const CharT* search, size_t size)
{
reset();
return buildTable(search, size);
}
// Returns current partial match count, matches at ABSOLUTE stream index from last reset()
int push(const CharT* buf, size_t size)
{
size_t off = absPos;
auto pos = [off, this] { return absPos - off; };
while (pos() != size)
{
while (matchIndex > -1 && search[matchIndex] != buf[pos()])
matchIndex = pmTable[matchIndex];
++matchIndex;
++absPos;
if (matchIndex >= pmTableSize)
{
printf("MATCH@%ld\n", absPos - matchIndex);
matchIndex = pmTable[matchIndex];
}
}
return matchIndex;
}
// Reset position - safe when no current partial match
void reset()
{
absPos = 0;
matchIndex = 0;
}
// classic KMP (debugging)
/* void match(const CharT* buf, size_t size) const
{
size_t pos = 0;
int matchIndex = 0;
while (pos != size)
{
while (matchIndex > -1 && search[matchIndex] != buf[pos])
matchIndex = pmTable[matchIndex];
++matchIndex;
++pos;
if (matchIndex >= pmTableSize)
{
printf("MATCH@%lu\n", pos - matchIndex);
matchIndex = pmTable[matchIndex];
}
}
}*/
private:
bool buildTable(const CharT* buf, size_t size)
{
if (size > N) return false;
pmTable[0] = -1;
pmTableSize = size;
size_t pos = 0;
int i = -1;
while (pos != pmTableSize)
{
search[pos] = buf[pos];
while (i > -1 && buf[pos] != buf[i])
i = pmTable[i];
++pos;
++i;
pmTable[pos] = (buf[pos] == buf[i]) ? pmTable[i] : i;
}
return true;
}
private:
int pmTable[N];
size_t pmTableSize;
CharT search[N];
// State
size_t absPos;
int matchIndex;
};
int main()
{
const char* needle = "ABCDABD";
KMPStream<char, 70> kmp;
kmp.begin(needle, strlen(needle));
const char* haystack[] = {"ABC ABCDAB ABCD", "AB", "DABDE"};
for (auto & c : haystack)
{
int i = kmp.push(c, strlen(c));
// if (i == 0) kmp.reset(); // Can reset mid stream if no match in progress
printf("i: %d\n", i);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment