Skip to content

Instantly share code, notes, and snippets.

@codescv
Created December 16, 2013 10:07
Show Gist options
  • Save codescv/7984760 to your computer and use it in GitHub Desktop.
Save codescv/7984760 to your computer and use it in GitHub Desktop.
// GistID: 7984760
// KMP substring search && FSM substring search
#include <string>
#include <iostream>
#include <vector>
using namespace std;
typedef vector<vector<int> > FSMType;
const int R = 256;
template<class T>
ostream & operator<<(ostream & os, vector<T>vec) {
os << "[";
for (int i = 0; i < vec.size(); i++) {
os << vec[i] << ",";
}
os << "]";
}
FSMType build_fsm(const string &pattern)
{
FSMType fsm(pattern.size(), vector<int>(R, 0));
int X = 0;
for (int i = 0; i < pattern.size(); i++) {
for (int j = 0; j < R; j++) {
if (j == pattern[i]) {
fsm[i][j] = i+1;
} else {
fsm[i][j] = fsm[X][j];
}
}
if (i >= 1) {
X = fsm[X][pattern[i]];
}
}
return fsm;
}
int fsm_search(const string &text, const string &pattern)
{
FSMType fsm = build_fsm(pattern);
int state = 0, fin = fsm.size();
int i = 0;
while (i < text.length() && state != fin) {
state = fsm[state][text[i]];
i++;
}
if (state == fin) {
return i - pattern.size();
} else {
return -1;
}
}
vector<int> build_next(const string &pattern)
{
vector<int> next(pattern.size(), 0);
next[0] = 0;
for (int i = 1; i < pattern.size(); i++) {
int p = next[i-1];
while (p > 0 && pattern[p] != pattern[i]) {
p = next[p-1];
}
if (pattern[p] == pattern[i]) {
next[i] = p+1;
}
}
return next;
}
int kmp_search(const string &text, const string &pattern)
{
int p = 0, q = 0;
vector<int> next = build_next(pattern);
while (q != text.length() && p != pattern.length()) {
if (pattern[p] == text[q]) {
p++;
q++;
} else {
if (p == 0) {
q++;
} else {
p = max(next[p-1]+1, 0);
}
}
}
if (p == pattern.length()) {
return q-p;
} else {
return -1;
}
}
int search(const string &text, const string &pattern)
{
//return fsm_search(text, pattern);
return kmp_search(text, pattern);
}
int main(int argc, const char *argv[])
{
string text = "ababaabababcaaaab";
string pattern = "babc";
int pos = search(text, pattern);
cout << "pos: " << pos << endl;
if (pos > 0)
cout << "found: " << text.substr(pos, text.size() - pos) << endl;
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment