Skip to content

Instantly share code, notes, and snippets.

@phraniiac
Last active October 11, 2016 02:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save phraniiac/72c3f5642a8343f0a672a493db5a18fe to your computer and use it in GitHub Desktop.
Save phraniiac/72c3f5642a8343f0a672a493db5a18fe to your computer and use it in GitHub Desktop.
String Algorithms
vector<int> Knuth_Morris_Pratt(string text,string pattern)
{
vector<int> F = build_failure_function(pattern);
vector<int> positions;
int i = 0; // state of automaton
int j = 0; // position of counter in string.
for( ; ; ) {
if(j == n) break; // we reached the end of the text
// if the current character of the text "expands" the
// current match
if(text[j] == pattern[i]) {
i++; // change the state of the automaton
j++; // get the next character from the text
if(i == m) { // match found
positions.push_back(j-1);
}
}
// if the current state is not zero (we have not
// reached the empty string yet) we try to
// "expand" the next best (largest) match
else if(i > 0) i = F[i];
// if we reached the empty string and failed to
// "expand" even it; we go to the next
// character from the text, the state of the
// automaton remains zero
else j++;
}
return positions;
}
vector<int> build_failure_function(string pattern)
{
vector <int> F(m);
// length of pattern = m.
F[0] = F[1] = 0; // always true
for(int i = 2; i <= m; i++) {
int j = F[i - 1];
for( ; ; ) {
if(pattern[j] == pattern[i - 1]) {
F[i] = j + 1; break;
}
if(j == 0) { F[i] = 0; break; }
j = F[j];
}
}
}
vector<int> z_function(string s) {
// string has 0 based indexing.
int n = (int) s.length();
// initialize z to vector of zeroes.
vector<int> z(n);
for (int i = 1, l = 0, r = 0; i < n; ++i) {
if (i <= r)
z[i] = min (r - i + 1, z[i - l]);
while (i + z[i] < n && s[z[i]] == s[i + z[i]])
++z[i];
if (i + z[i] - 1 > r)
l = i, r = i + z[i] - 1;
}
return z;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment