Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Boyer Moore string search implementation in Python
# Boyer Moore String Search implementation in Python
# Ameer Ayoub <ameer.ayoub@gmail.com>
# Generate the Bad Character Skip List
def generateBadCharShift(term):
skipList = {}
for i in range(0, len(term)-1):
skipList[term[i]] = len(term)-i-1
return skipList
# Generate the Good Suffix Skip List
def findSuffixPosition(badchar, suffix, full_term):
for offset in range(1, len(full_term)+1)[::-1]:
flag = True
for suffix_index in range(0, len(suffix)):
term_index = offset-len(suffix)-1+suffix_index
if term_index < 0 or suffix[suffix_index] == full_term[term_index]:
pass
else:
flag = False
term_index = offset-len(suffix)-1
if flag and (term_index <= 0 or full_term[term_index-1] != badchar):
return len(full_term)-offset+1
def generateSuffixShift(key):
skipList = {}
buffer = ""
for i in range(0, len(key)):
skipList[len(buffer)] = findSuffixPosition(key[len(key)-1-i], buffer, key)
buffer = key[len(key)-1-i] + buffer
return skipList
# Actual Search Algorithm
def BMSearch(haystack, needle):
goodSuffix = generateSuffixShift(needle)
badChar = generateBadCharShift(needle)
i = 0
while i < len(haystack)-len(needle)+1:
j = len(needle)
while j > 0 and needle[j-1] == haystack[i+j-1]:
j -= 1
if j > 0:
badCharShift = badChar.get(haystack[i+j-1], len(needle))
goodSuffixShift = goodSuffix[len(needle)-j]
if badCharShift > goodSuffixShift:
i += badCharShift
else:
i += goodSuffixShift
else:
return i
return -1
if __name__ == "__main__":
block = "This is a simple example"
print "This is an example search on the string \"", block, "\"."
print "ple :", BMSearch(block, "ple ")
print "example :", BMSearch(block, "example")
print "simple :", BMSearch(block, "simple")
print " imple :", BMSearch(block, " imple")
@joruzani

This comment has been minimized.

Copy link

commented Dec 10, 2015

find a problem with you implementation... and I don't know what is wrong.

trying these as the haystack:
671111091113298117115999711432117110973297103117106973210111032117110321129710697114

and these as the needle:

11110911132

and it works... but remove the last 2 numbers from that needle, and it doesn’t ( as in 111109111)

I haven’t found another case in which something similar happens.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.