Created
August 25, 2016 00:16
-
-
Save Wunkolo/0b93ff719d384ef4ea5cb681fbcf8458 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#pragma once | |
#include <cstdint> | |
#include <string> | |
#include <array> | |
#include <vector> | |
class BoyerMoore | |
{ | |
public: | |
BoyerMoore(const std::string &Pattern) | |
: | |
Pattern(Pattern) | |
{ | |
BadCharacterPreProcess(); | |
GoodPrefixPreProcess(); | |
} | |
intmax_t Match(const std::string &Text) const | |
{ | |
intmax_t Shift = 0; | |
while( Shift <= Text.length() - Pattern.length() ) | |
{ | |
// Test all characters starting from the last | |
intmax_t i = Pattern.length() - 1; | |
for( | |
; | |
i >= 0 && Pattern[i] == Text[Shift + i]; | |
i-- ); | |
if( i < 0 ) // Match | |
{ | |
return Shift; | |
} | |
else // Otherwise, Bad character rule. | |
{ | |
Shift += std::max<intmax_t>(GoodPrefixLUT[1][i + 1], BadCharacterLUT[Text[Shift + i]] - Pattern.length() + 1 + i); | |
} | |
} | |
return -1; | |
} | |
std::vector<intmax_t> MatchAll(const std::string &Text) const | |
{ | |
intmax_t Shift = 0; | |
std::vector<intmax_t> Results; | |
while( Shift <= Text.length() - Pattern.length() ) | |
{ | |
// Test all characters starting from the last | |
intmax_t i = Pattern.length() - 1; | |
for( | |
; | |
i >= 0 && Pattern[i] == Text[Shift + i]; | |
i-- | |
); | |
if( i < 0 ) // Match | |
{ | |
Results.push_back(Shift); | |
Shift += GoodPrefixLUT[1][0]; | |
} | |
else // Otherwise, Bad character rule. | |
{ | |
Shift += std::max<intmax_t>(GoodPrefixLUT[1][i + 1], BadCharacterLUT[Text[Shift + i]] - Pattern.length() + 1 + i); | |
} | |
} | |
return Results; | |
} | |
private: | |
std::string Pattern; | |
// Bad character heuristic | |
std::array<uintmax_t, 256> BadCharacterLUT; | |
void BadCharacterPreProcess() | |
{ | |
BadCharacterLUT.fill(Pattern.length()); | |
for( size_t i = 0; i < Pattern.length(); i++ ) | |
{ | |
BadCharacterLUT[Pattern[i]] = Pattern.length() - i - 1; | |
} | |
} | |
//Good prefix hueristic | |
std::array< std::array<uintmax_t, 256>, 2> GoodPrefixLUT; | |
void GoodPrefixPreProcess() | |
{ | |
GoodPrefixLUT[0].fill(0); | |
GoodPrefixLUT[1].fill(0); | |
// Case 1 | |
size_t i = Pattern.length(); | |
size_t j = i + 1; | |
GoodPrefixLUT[0][i] = j; | |
while( i > 0 ) | |
{ | |
while( j <= Pattern.length() && Pattern[i - 1] != Pattern[j - 1] ) | |
{ | |
if( GoodPrefixLUT[1][j] == 0 ) | |
{ | |
GoodPrefixLUT[1][j] = j - i; | |
} | |
j = GoodPrefixLUT[0][j]; | |
} | |
i--; j--; | |
GoodPrefixLUT[0][i] = j; | |
} | |
// Case 2 | |
for( | |
i = 0, j = GoodPrefixLUT[0][0]; | |
i <= Pattern.length(); | |
i++ | |
) | |
{ | |
if( GoodPrefixLUT[1][i] == 0 ) | |
{ | |
GoodPrefixLUT[1][i] = j; | |
} | |
if( i == j ) | |
{ | |
j = GoodPrefixLUT[0][j]; | |
} | |
} | |
} | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment