Created
April 21, 2012 12:58
-
-
Save TheRayTracer/2436989 to your computer and use it in GitHub Desktop.
The following is a naive implementation of the Rabin-Karp algorithm to search for sub strings within larger strings using a rolling hash. The standard strstr() function will outperform this proof-of-concept implementation, but this implementation is in th
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifdef _MSC_VER | |
#define _CRT_SECURE_NO_WARNINGS | |
#endif | |
#include <iostream> | |
#include <cstring> | |
#include <cassert> | |
#include <sys/stat.h> | |
#define WIN32_LEAN_AND_MEAN | |
#include <windows.h> | |
namespace std { }; | |
using namespace std; | |
size_t hash(char* str, size_t i) | |
{ | |
size_t h = 0; | |
// if (str != NULL) | |
{ | |
while (i-- != 0) | |
{ | |
h += *str; | |
str++; | |
} | |
} | |
return h; | |
} | |
char* rabin_karp(char* s, char* find) | |
{ | |
char* p = NULL; | |
if (s != NULL && find != NULL) | |
{ | |
size_t n = strlen(s); | |
size_t m = strlen(find); | |
if (n > m) | |
{ | |
size_t hfind = hash(find, m); | |
size_t hs = hash(s, m); | |
char* end = s + (n - m + 1); | |
for (char* i = s; i < end; ++i) | |
{ | |
if (hs == hfind) | |
{ | |
if (strncmp(i, find, m) == 0) | |
{ | |
p = i; | |
break; | |
} | |
} | |
hs -= *i; // Rolling Hash. | |
hs += *(i + m); | |
} | |
} | |
} | |
return p; | |
} | |
int main(int argc, char* argv[]) | |
{ | |
if (argc > 1) | |
{ | |
struct stat file_stat; | |
stat(argv[1], &file_stat); | |
FILE* fp = fopen(argv[1], "r"); | |
if (fp != NULL) | |
{ | |
char* s = new char[file_stat.st_size]; | |
assert(s != NULL); | |
fread(s, 1, file_stat.st_size, fp); | |
fclose(fp); | |
for (int i = 2; i < argc; ++i) | |
{ | |
{ | |
DWORD start = GetTickCount(); | |
bool found = rabin_karp(s, argv[i]) != NULL; | |
DWORD finish = GetTickCount(); | |
DWORD diff = finish - start; | |
cout << "Algorithm: Rabin-Karp, search substring: \"" << argv[i] << "\", found: " << boolalpha << found << ", time: " << diff / 1000.0 << endl; | |
} | |
{ | |
DWORD start = GetTickCount(); | |
bool found = strstr(s, argv[i]) != NULL; | |
DWORD finish = GetTickCount(); | |
DWORD diff = finish - start; | |
cout << "Algorithm: strstr(), search substring: \"" << argv[i] << "\", found: " << boolalpha << found << ", time: " << diff / 1000.0 << endl; | |
} | |
} | |
} | |
} | |
cin.get(); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment