Skip to content

Instantly share code, notes, and snippets.

@TheRayTracer
Created April 21, 2012 12:58
Show Gist options
  • Save TheRayTracer/2436989 to your computer and use it in GitHub Desktop.
Save TheRayTracer/2436989 to your computer and use it in GitHub Desktop.
The following is a naive implementation of the Rabin-Karp algorithm to search for sub strings within larger strings using a rolling hash. The standard strstr() function will outperform this proof-of-concept implementation, but this implementation is in th
#ifdef _MSC_VER
#define _CRT_SECURE_NO_WARNINGS
#endif
#include <iostream>
#include <cstring>
#include <cassert>
#include <sys/stat.h>
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
namespace std { };
using namespace std;
size_t hash(char* str, size_t i)
{
size_t h = 0;
// if (str != NULL)
{
while (i-- != 0)
{
h += *str;
str++;
}
}
return h;
}
char* rabin_karp(char* s, char* find)
{
char* p = NULL;
if (s != NULL && find != NULL)
{
size_t n = strlen(s);
size_t m = strlen(find);
if (n > m)
{
size_t hfind = hash(find, m);
size_t hs = hash(s, m);
char* end = s + (n - m + 1);
for (char* i = s; i < end; ++i)
{
if (hs == hfind)
{
if (strncmp(i, find, m) == 0)
{
p = i;
break;
}
}
hs -= *i; // Rolling Hash.
hs += *(i + m);
}
}
}
return p;
}
int main(int argc, char* argv[])
{
if (argc > 1)
{
struct stat file_stat;
stat(argv[1], &file_stat);
FILE* fp = fopen(argv[1], "r");
if (fp != NULL)
{
char* s = new char[file_stat.st_size];
assert(s != NULL);
fread(s, 1, file_stat.st_size, fp);
fclose(fp);
for (int i = 2; i < argc; ++i)
{
{
DWORD start = GetTickCount();
bool found = rabin_karp(s, argv[i]) != NULL;
DWORD finish = GetTickCount();
DWORD diff = finish - start;
cout << "Algorithm: Rabin-Karp, search substring: \"" << argv[i] << "\", found: " << boolalpha << found << ", time: " << diff / 1000.0 << endl;
}
{
DWORD start = GetTickCount();
bool found = strstr(s, argv[i]) != NULL;
DWORD finish = GetTickCount();
DWORD diff = finish - start;
cout << "Algorithm: strstr(), search substring: \"" << argv[i] << "\", found: " << boolalpha << found << ", time: " << diff / 1000.0 << endl;
}
}
}
}
cin.get();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment