Last active
January 26, 2022 08:11
-
-
Save EvanMu96/fe838403dcbe0b9a17f4691c7f535e91 to your computer and use it in GitHub Desktop.
Aho-Corasick String match algorithm
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <algorithm> | |
#include <string> | |
#include <vector> | |
using namespace std; | |
const int K = 26; | |
struct Vertex { | |
// where to link | |
int next[K]; | |
// is it the last character of a word | |
bool leaf = false; | |
int p = -1; | |
char pch; | |
int link = -1; | |
// where to move buffer | |
int go[K]; | |
// default parameters lead to a root | |
Vertex(int p=-1, char ch='$') : p(p), pch(ch) { | |
// initialized all direction into -1 | |
fill(begin(next), end(next), -1); | |
fill(begin(go), end(go), -1); | |
} | |
}; | |
vector<Vertex> t(1); | |
void add_string(string const& s) { | |
int v = 0; | |
for (char ch : s) { | |
int c = ch - 'a'; | |
if (t[v].next[c] == -1) { | |
// append a new Vertex | |
t[v].next[c] = t.size(); | |
t.emplace_back(v, ch); | |
} | |
v = t[v].next[c]; | |
} | |
t[v].leaf = true; | |
} | |
int go(int v, char ch); | |
int get_link(int v) { | |
if (t[v].link == -1) { | |
if (v == 0 || t[v].p == 0) | |
t[v].link = 0; | |
else | |
// recursively find the parent and try again | |
// until it reachs the root(v = 0) | |
t[v].link = go(get_link(t[v].p), t[v].pch); | |
} | |
return t[v].link; | |
} | |
int go(int v, char ch) { | |
int c = ch - 'a'; | |
if (t[v].go[c] == -1) { | |
if (t[v].next[c] != -1) | |
t[v].go[c] = t[v].next[c]; | |
else | |
// recursively find the parent and try again | |
// until it reachs the root(v = 0) | |
t[v].go[c] = v == 0 ? 0 : go(get_link(v), ch); | |
} | |
return t[v].go[c]; | |
} | |
void traverse(string const & obj) | |
{ | |
int v = 0; | |
for(int i = 0; i < obj.size(); i++) | |
{ | |
v = go(v, obj[i]); | |
cout << t[v].pch << " "; | |
if(t[v].leaf) | |
{ | |
cout << "OK"; | |
} | |
} | |
} | |
int main() | |
{ | |
add_string("tas"); | |
add_string("app"); | |
add_string("ray"); | |
add_string("mac"); | |
traverse("taspratray"); /* t a s OK$ r a t r a y OK */ | |
cout << endl; | |
traverse("fhduisufdasfdsaseh"); /* $ $ $ $ $ $ $ $ $ a $ $ $ $ a $ $ $ */ | |
cout << endl; | |
traverse("fesafeasmacapp"); /* $ $ $ a $ $ a $ m a c OKa p p OK */ | |
cout << endl; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment