Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
Boyer Moore string search implementation in Python
# Boyer Moore String Search implementation in Python
# Ameer Ayoub <>
# Generate the Bad Character Skip List
def generateBadCharShift(term):
skipList = {}
for i in range(0, len(term)-1):
skipList[term[i]] = len(term)-i-1
return skipList
# Generate the Good Suffix Skip List
def findSuffixPosition(badchar, suffix, full_term):
for offset in range(1, len(full_term)+1)[::-1]:
flag = True
for suffix_index in range(0, len(suffix)):
term_index = offset-len(suffix)-1+suffix_index
if term_index < 0 or suffix[suffix_index] == full_term[term_index]:
flag = False
term_index = offset-len(suffix)-1
if flag and (term_index <= 0 or full_term[term_index-1] != badchar):
return len(full_term)-offset+1
def generateSuffixShift(key):
skipList = {}
buffer = ""
for i in range(0, len(key)):
skipList[len(buffer)] = findSuffixPosition(key[len(key)-1-i], buffer, key)
buffer = key[len(key)-1-i] + buffer
return skipList
# Actual Search Algorithm
def BMSearch(haystack, needle):
goodSuffix = generateSuffixShift(needle)
badChar = generateBadCharShift(needle)
i = 0
while i < len(haystack)-len(needle)+1:
j = len(needle)
while j > 0 and needle[j-1] == haystack[i+j-1]:
j -= 1
if j > 0:
badCharShift = badChar.get(haystack[i+j-1], len(needle))
goodSuffixShift = goodSuffix[len(needle)-j]
if badCharShift > goodSuffixShift:
i += badCharShift
i += goodSuffixShift
return i
return -1
if __name__ == "__main__":
block = "This is a simple example"
print "This is an example search on the string \"", block, "\"."
print "ple :", BMSearch(block, "ple ")
print "example :", BMSearch(block, "example")
print "simple :", BMSearch(block, "simple")
print " imple :", BMSearch(block, " imple")

This comment has been minimized.

Copy link

@joruzani joruzani commented Dec 10, 2015

find a problem with you implementation... and I don't know what is wrong.

trying these as the haystack:

and these as the needle:


and it works... but remove the last 2 numbers from that needle, and it doesn’t ( as in 111109111)

I haven’t found another case in which something similar happens.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.