Created
December 16, 2013 10:07
-
-
Save codescv/7984760 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// GistID: 7984760 | |
// KMP substring search && FSM substring search | |
#include <string> | |
#include <iostream> | |
#include <vector> | |
using namespace std; | |
typedef vector<vector<int> > FSMType; | |
const int R = 256; | |
template<class T> | |
ostream & operator<<(ostream & os, vector<T>vec) { | |
os << "["; | |
for (int i = 0; i < vec.size(); i++) { | |
os << vec[i] << ","; | |
} | |
os << "]"; | |
} | |
FSMType build_fsm(const string &pattern) | |
{ | |
FSMType fsm(pattern.size(), vector<int>(R, 0)); | |
int X = 0; | |
for (int i = 0; i < pattern.size(); i++) { | |
for (int j = 0; j < R; j++) { | |
if (j == pattern[i]) { | |
fsm[i][j] = i+1; | |
} else { | |
fsm[i][j] = fsm[X][j]; | |
} | |
} | |
if (i >= 1) { | |
X = fsm[X][pattern[i]]; | |
} | |
} | |
return fsm; | |
} | |
int fsm_search(const string &text, const string &pattern) | |
{ | |
FSMType fsm = build_fsm(pattern); | |
int state = 0, fin = fsm.size(); | |
int i = 0; | |
while (i < text.length() && state != fin) { | |
state = fsm[state][text[i]]; | |
i++; | |
} | |
if (state == fin) { | |
return i - pattern.size(); | |
} else { | |
return -1; | |
} | |
} | |
vector<int> build_next(const string &pattern) | |
{ | |
vector<int> next(pattern.size(), 0); | |
next[0] = 0; | |
for (int i = 1; i < pattern.size(); i++) { | |
int p = next[i-1]; | |
while (p > 0 && pattern[p] != pattern[i]) { | |
p = next[p-1]; | |
} | |
if (pattern[p] == pattern[i]) { | |
next[i] = p+1; | |
} | |
} | |
return next; | |
} | |
int kmp_search(const string &text, const string &pattern) | |
{ | |
int p = 0, q = 0; | |
vector<int> next = build_next(pattern); | |
while (q != text.length() && p != pattern.length()) { | |
if (pattern[p] == text[q]) { | |
p++; | |
q++; | |
} else { | |
if (p == 0) { | |
q++; | |
} else { | |
p = max(next[p-1]+1, 0); | |
} | |
} | |
} | |
if (p == pattern.length()) { | |
return q-p; | |
} else { | |
return -1; | |
} | |
} | |
int search(const string &text, const string &pattern) | |
{ | |
//return fsm_search(text, pattern); | |
return kmp_search(text, pattern); | |
} | |
int main(int argc, const char *argv[]) | |
{ | |
string text = "ababaabababcaaaab"; | |
string pattern = "babc"; | |
int pos = search(text, pattern); | |
cout << "pos: " << pos << endl; | |
if (pos > 0) | |
cout << "found: " << text.substr(pos, text.size() - pos) << endl; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment