Skip to content

Instantly share code, notes, and snippets.

@Wunkolo
Created August 25, 2016 00:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Wunkolo/0b93ff719d384ef4ea5cb681fbcf8458 to your computer and use it in GitHub Desktop.
Save Wunkolo/0b93ff719d384ef4ea5cb681fbcf8458 to your computer and use it in GitHub Desktop.
#pragma once
#include <cstdint>
#include <string>
#include <array>
#include <vector>
class BoyerMoore
{
public:
BoyerMoore(const std::string &Pattern)
:
Pattern(Pattern)
{
BadCharacterPreProcess();
GoodPrefixPreProcess();
}
intmax_t Match(const std::string &Text) const
{
intmax_t Shift = 0;
while( Shift <= Text.length() - Pattern.length() )
{
// Test all characters starting from the last
intmax_t i = Pattern.length() - 1;
for(
;
i >= 0 && Pattern[i] == Text[Shift + i];
i-- );
if( i < 0 ) // Match
{
return Shift;
}
else // Otherwise, Bad character rule.
{
Shift += std::max<intmax_t>(GoodPrefixLUT[1][i + 1], BadCharacterLUT[Text[Shift + i]] - Pattern.length() + 1 + i);
}
}
return -1;
}
std::vector<intmax_t> MatchAll(const std::string &Text) const
{
intmax_t Shift = 0;
std::vector<intmax_t> Results;
while( Shift <= Text.length() - Pattern.length() )
{
// Test all characters starting from the last
intmax_t i = Pattern.length() - 1;
for(
;
i >= 0 && Pattern[i] == Text[Shift + i];
i--
);
if( i < 0 ) // Match
{
Results.push_back(Shift);
Shift += GoodPrefixLUT[1][0];
}
else // Otherwise, Bad character rule.
{
Shift += std::max<intmax_t>(GoodPrefixLUT[1][i + 1], BadCharacterLUT[Text[Shift + i]] - Pattern.length() + 1 + i);
}
}
return Results;
}
private:
std::string Pattern;
// Bad character heuristic
std::array<uintmax_t, 256> BadCharacterLUT;
void BadCharacterPreProcess()
{
BadCharacterLUT.fill(Pattern.length());
for( size_t i = 0; i < Pattern.length(); i++ )
{
BadCharacterLUT[Pattern[i]] = Pattern.length() - i - 1;
}
}
//Good prefix hueristic
std::array< std::array<uintmax_t, 256>, 2> GoodPrefixLUT;
void GoodPrefixPreProcess()
{
GoodPrefixLUT[0].fill(0);
GoodPrefixLUT[1].fill(0);
// Case 1
size_t i = Pattern.length();
size_t j = i + 1;
GoodPrefixLUT[0][i] = j;
while( i > 0 )
{
while( j <= Pattern.length() && Pattern[i - 1] != Pattern[j - 1] )
{
if( GoodPrefixLUT[1][j] == 0 )
{
GoodPrefixLUT[1][j] = j - i;
}
j = GoodPrefixLUT[0][j];
}
i--; j--;
GoodPrefixLUT[0][i] = j;
}
// Case 2
for(
i = 0, j = GoodPrefixLUT[0][0];
i <= Pattern.length();
i++
)
{
if( GoodPrefixLUT[1][i] == 0 )
{
GoodPrefixLUT[1][i] = j;
}
if( i == j )
{
j = GoodPrefixLUT[0][j];
}
}
}
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment