Instantly share code, notes, and snippets.

Embed
What would you like to do?
Boyer Moore string search implementation in Python
# Boyer Moore String Search implementation in Python
# Ameer Ayoub <ameer.ayoub@gmail.com>
# Generate the Bad Character Skip List
def generateBadCharShift(term):
skipList = {}
for i in range(0, len(term)-1):
skipList[term[i]] = len(term)-i-1
return skipList
# Generate the Good Suffix Skip List
def findSuffixPosition(badchar, suffix, full_term):
for offset in range(1, len(full_term)+1)[::-1]:
flag = True
for suffix_index in range(0, len(suffix)):
term_index = offset-len(suffix)-1+suffix_index
if term_index < 0 or suffix[suffix_index] == full_term[term_index]:
pass
else:
flag = False
term_index = offset-len(suffix)-1
if flag and (term_index <= 0 or full_term[term_index-1] != badchar):
return len(full_term)-offset+1
def generateSuffixShift(key):
skipList = {}
buffer = ""
for i in range(0, len(key)):
skipList[len(buffer)] = findSuffixPosition(key[len(key)-1-i], buffer, key)
buffer = key[len(key)-1-i] + buffer
return skipList
# Actual Search Algorithm
def BMSearch(haystack, needle):
goodSuffix = generateSuffixShift(needle)
badChar = generateBadCharShift(needle)
i = 0
while i < len(haystack)-len(needle)+1:
j = len(needle)
while j > 0 and needle[j-1] == haystack[i+j-1]:
j -= 1
if j > 0:
badCharShift = badChar.get(haystack[i+j-1], len(needle))
goodSuffixShift = goodSuffix[len(needle)-j]
if badCharShift > goodSuffixShift:
i += badCharShift
else:
i += goodSuffixShift
else:
return i
return -1
if __name__ == "__main__":
block = "This is a simple example"
print "This is an example search on the string \"", block, "\"."
print "ple :", BMSearch(block, "ple ")
print "example :", BMSearch(block, "example")
print "simple :", BMSearch(block, "simple")
print " imple :", BMSearch(block, " imple")
@joruzani

This comment has been minimized.

joruzani commented Dec 10, 2015

find a problem with you implementation... and I don't know what is wrong.

trying these as the haystack:
671111091113298117115999711432117110973297103117106973210111032117110321129710697114

and these as the needle:

11110911132

and it works... but remove the last 2 numbers from that needle, and it doesn’t ( as in 111109111)

I haven’t found another case in which something similar happens.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment