Skip to content

Instantly share code, notes, and snippets.

@zahlenteufel
Last active August 29, 2015 13:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zahlenteufel/9701121 to your computer and use it in GitHub Desktop.
Save zahlenteufel/9701121 to your computer and use it in GitHub Desktop.
KMP Automaton Drawer with Graphviz
#include <iostream>
#include <string>
#include <vector>
#include <set>
#include <map>
using namespace std;
class automatonDrawer {
map<pair<int, int>, set<char> > edges;
public:
void draw(int final) {
cout << "digraph finite_state_machine {" << endl;
cout << "size=\"20,20\"; resolution=72;" << endl;
cout << "rankdir = LR;" << endl;
cout << "rank=\"same\";" << endl;
cout << "node [shape = doublecircle]; Q" << final << ";" << endl;
cout << "node [shape = circle];" << endl;
drawEdges();
cout << "}" << endl;
}
void connect(int i, int j, char c) {
edges[make_pair(i, j)].insert(c);
}
private:
static string join(const set<char>& cs) {
string s = "";
for (auto it = cs.begin(); it != cs.end(); ++it) {
if (it != cs.begin())
s += ",";
s += *it;
}
return s;
}
void drawEdges() {
for (auto edge = edges.begin(); edge != edges.end(); ++edge) {
int i = edge->first.first, j = edge->first.second;
cout << "Q" << i << " -> " << "Q" << j << " ";
cout << "[ label = \"" << join(edge->second) << "\"";
if (j < i)
cout << ", constraint = false";
cout << " ]; " << endl;
}
}
};

#KMP Drawer with Graphviz

Given a pattern string, it generates the Graphviz dot script to draw the implicit automaton that uses the KMP string matching algorithm.

To compile:

g++ -std=c++0x -Wall kmpdraw.cpp -o kmpdraw

Example:

./kmpdraw ababac | dot -Tpng -o automaton.png

Automaton for "ababac"

#include <iostream>
#include <string>
#include <vector>
using namespace std;
#include "automatonDrawer.cpp"
// code taken from http://community.topcoder.com/tc?module=Static&d1=tutorials&d2=stringSearching
// Pay attention!
// the prefix under index i in the table above is
// is the string from pattern[0] to pattern[i - 1]
// inclusive, so the last character of the string under
// index i is pattern[i - 1]
vector<int> build_failure_function(const string& pattern)
{
int m = pattern.size();
// let m be the length of the pattern
vector<int> F(m);
F[0] = F[1] = 0; // always true
for(int i = 2; i <= m; i++) {
// j is the index of the largest next partial match
// (the largest suffix/prefix) of the string under
// index i - 1
int j = F[i - 1];
for( ; ; ) {
// check to see if the last character of string i -
// - pattern[i - 1] "expands" the current "candidate"
// best partial match - the prefix under index j
if(pattern[j] == pattern[i - 1]) {
F[i] = j + 1; break;
}
// if we cannot "expand" even the empty string
if(j == 0) { F[i] = 0; break; }
// else go to the next best "candidate" partial match
j = F[j];
}
}
return F;
}
#define forn(i, n) for (int i = 0; i < int(n); ++i)
vector<char> extract_used_letters(const string& s) {
vector<bool> present(255, false);
forn (i,s.size())
present[s[i]] = true;
vector<char> res;
forn (i, 255)
if (present[i])
res.push_back(i);
return res;
}
void drawKMPAutomaton(const string& pattern) {
const vector<int> v = build_failure_function(pattern);
int n = v.size();
cerr << "failure function: ";
forn (i,n)
cerr << v[i] << " ";
cerr << endl;
automatonDrawer A;
const vector<char> letters = extract_used_letters(pattern);
forn (i, n+1) {
for (char letter : letters) {
int j = i;
for (;;) {
if (pattern[j] == letter) {
A.connect(i, j+1, letter);
break;
}
if (j > 0) { // didn't match, pick another shorter proper prefix-suffix to expand
j = v[j];
} else { // can't expand at all, go to the beginning.
A.connect(i, 0, letter);
break;
}
}
}
}
A.draw(n);
}
int main(int argc, char* argv[]) {
if (argc != 2) {
cerr << endl << "Usage: " << argv[0] << " pattern" << endl << endl;
cerr << "Generates a Graphviz dot file for plotting the implicit automaton used " << endl;
cerr << "to match the pattern during the KMP string matching algorithm." << endl << endl;
cerr << "example:" << endl << endl;
cerr << " " << argv[0] << " ababac | dot -Tpng -o automaton.png" << endl << endl;
return 1;
}
string pattern = argv[1];
drawKMPAutomaton(pattern);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment