Last active
October 11, 2016 02:55
-
-
Save phraniiac/72c3f5642a8343f0a672a493db5a18fe to your computer and use it in GitHub Desktop.
String Algorithms
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
vector<int> Knuth_Morris_Pratt(string text,string pattern) | |
{ | |
vector<int> F = build_failure_function(pattern); | |
vector<int> positions; | |
int i = 0; // state of automaton | |
int j = 0; // position of counter in string. | |
for( ; ; ) { | |
if(j == n) break; // we reached the end of the text | |
// if the current character of the text "expands" the | |
// current match | |
if(text[j] == pattern[i]) { | |
i++; // change the state of the automaton | |
j++; // get the next character from the text | |
if(i == m) { // match found | |
positions.push_back(j-1); | |
} | |
} | |
// if the current state is not zero (we have not | |
// reached the empty string yet) we try to | |
// "expand" the next best (largest) match | |
else if(i > 0) i = F[i]; | |
// if we reached the empty string and failed to | |
// "expand" even it; we go to the next | |
// character from the text, the state of the | |
// automaton remains zero | |
else j++; | |
} | |
return positions; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
vector<int> build_failure_function(string pattern) | |
{ | |
vector <int> F(m); | |
// length of pattern = m. | |
F[0] = F[1] = 0; // always true | |
for(int i = 2; i <= m; i++) { | |
int j = F[i - 1]; | |
for( ; ; ) { | |
if(pattern[j] == pattern[i - 1]) { | |
F[i] = j + 1; break; | |
} | |
if(j == 0) { F[i] = 0; break; } | |
j = F[j]; | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
vector<int> z_function(string s) { | |
// string has 0 based indexing. | |
int n = (int) s.length(); | |
// initialize z to vector of zeroes. | |
vector<int> z(n); | |
for (int i = 1, l = 0, r = 0; i < n; ++i) { | |
if (i <= r) | |
z[i] = min (r - i + 1, z[i - l]); | |
while (i + z[i] < n && s[z[i]] == s[i + z[i]]) | |
++z[i]; | |
if (i + z[i] - 1 > r) | |
l = i, r = i + z[i] - 1; | |
} | |
return z; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment