Skip to content

Instantly share code, notes, and snippets.

@TACIXAT
Forked from ameerkat/BoyerMooreStringSearch.py
Last active August 29, 2015 14:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save TACIXAT/729df49c49a02e302194 to your computer and use it in GitHub Desktop.
Save TACIXAT/729df49c49a02e302194 to your computer and use it in GitHub Desktop.
BoyerMoore Python - Multiple Matches
# Boyer Moore String Search implementation in Python
# Ameer Ayoub <ameer.ayoub@gmail.com>
# Modified to return all indices - GG
# Generate the Bad Character Skip List
def generateBadCharShift(term):
skipList = {}
for i in range(0, len(term)-1):
skipList[term[i]] = len(term)-i-1
return skipList
# Generate the Good Suffix Skip List
def findSuffixPosition(badchar, suffix, full_term):
for offset in range(1, len(full_term)+1)[::-1]:
flag = True
for suffix_index in range(0, len(suffix)):
term_index = offset-len(suffix)-1+suffix_index
if term_index < 0 or suffix[suffix_index] == full_term[term_index]:
pass
else:
flag = False
term_index = offset-len(suffix)-1
if flag and (term_index <= 0 or full_term[term_index-1] != badchar):
return len(full_term)-offset+1
def generateSuffixShift(key):
skipList = {}
buffer = ""
for i in range(0, len(key)):
skipList[len(buffer)] = findSuffixPosition(key[len(key)-1-i], buffer, key)
buffer = key[len(key)-1-i] + buffer
return skipList
# Actual Search Algorithm
def BMSearch(haystack, needle):
goodSuffix = generateSuffixShift(needle)
badChar = generateBadCharShift(needle)
matches = []
i = 0
while i < len(haystack)-len(needle)+1:
j = len(needle)
while j > 0 and needle[j-1] == haystack[i+j-1]:
j -= 1
if j > 0:
badCharShift = badChar.get(haystack[i+j-1], len(needle))
goodSuffixShift = goodSuffix[len(needle)-j]
if badCharShift > goodSuffixShift:
i += badCharShift
else:
i += goodSuffixShift
else:
matches.append(i)
i += 1
return matches
if __name__ == "__main__":
block = "zzzz This is a simple example"
print "This is an example search on the string \"", block, "\"."
print "ple: ", BMSearch(block, "ple")
print "example:", BMSearch(block, "example")
print "is:", BMSearch(block, "is")
print "mple:", BMSearch(block, "mple")
print "zz:", BMSearch(block, "zz")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment