Skip to content

Instantly share code, notes, and snippets.

@liushuaikobe
Last active December 18, 2015 11:09
Show Gist options
  • Save liushuaikobe/5773812 to your computer and use it in GitHub Desktop.
Save liushuaikobe/5773812 to your computer and use it in GitHub Desktop.
Boyer-Moore algorithm Implementation via Python.
'''
Created on 2013-6-13
@author: liushuai
'''
import time
badCharShift = {}
goodSuffixShift = {}
buildShiftTime = 0
count = 0
def BoyerMoore(text, pattern, debug = False):
global badCharShift, goodSuffixShift, count, buildShiftTime
start = time.time()
badCharShift = getOffsetBybadChar(pattern, debug)
goodSuffixShift = getOffsetBygoodSuffix(pattern, debug)
end = time.time()
buildShiftTime = end - start
pLen = len(pattern)
tLen = len(text)
w = pLen - 1
while True:
if w > tLen:
return False
flag = False
for i in range(pLen)[::-1]:
tmp = w - ( pLen - i ) + 1
if text[tmp] != pattern[i]:
w += max(lookupBadChar(i, text[tmp]), lookupGoodSuffx(i + 1))
count += 1
# w += lookupBadChar(i, text[tmp])
if debug:
print text
print ' ' * (w - pLen + 1) + pattern
flag = True
break
if not flag:
return w - pLen + 1
def getOffsetBybadChar(pattern, debug = False):
shift = {}
charTable = {}.fromkeys(pattern).keys()
for i in range(len(pattern)):
for badChar in charTable:
if pattern[i] == badChar:
continue
for j in range(i)[::-1]:
if pattern[j] == badChar:
shift[(i, badChar)] = i - j
break
if debug:
print shift
return shift
def getOffsetBygoodSuffix(pattern, debug = False):
shift = {}
for indexOfPattern in range(1 ,len(pattern)):
flag = False
for i in range(indexOfPattern, len(pattern)):
if i == indexOfPattern:
tmp = pattern[0:len(pattern) - 1].rfind(pattern[i:])
if tmp != -1 and tmp != indexOfPattern:
shift[indexOfPattern] = i - tmp
flag = True
break
else:
if pattern.startswith(pattern[i:]):
shift[indexOfPattern] = i
flag = True
break
if not flag:
shift[indexOfPattern] = -1
if debug:
print shift
return shift
def lookupBadChar(indexOfPattern, badChar):
global badCharShift
return badCharShift[(indexOfPattern, badChar)] if (indexOfPattern, badChar) in badCharShift else indexOfPattern + 1
def lookupGoodSuffx(indexOfPattern):
global goodSuffixShift
return goodSuffixShift[indexOfPattern] if indexOfPattern in goodSuffixShift else 0
def main():
text = 'THCS CS A SCBPLE EXABPLE'
pattern = 'EXABPLE'
offset = BoyerMoore(text, pattern, True)
if not offset:
print 'lost'
else :
print 'hit at : ', offset
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment