{{ message }}

Instantly share code, notes, and snippets.

# ameerkat/BoyerMooreStringSearch.py

Created Oct 14, 2010
Boyer Moore string search implementation in Python
 # Boyer Moore String Search implementation in Python # Ameer Ayoub # Generate the Bad Character Skip List def generateBadCharShift(term): skipList = {} for i in range(0, len(term)-1): skipList[term[i]] = len(term)-i-1 return skipList # Generate the Good Suffix Skip List def findSuffixPosition(badchar, suffix, full_term): for offset in range(1, len(full_term)+1)[::-1]: flag = True for suffix_index in range(0, len(suffix)): term_index = offset-len(suffix)-1+suffix_index if term_index < 0 or suffix[suffix_index] == full_term[term_index]: pass else: flag = False term_index = offset-len(suffix)-1 if flag and (term_index <= 0 or full_term[term_index-1] != badchar): return len(full_term)-offset+1 def generateSuffixShift(key): skipList = {} buffer = "" for i in range(0, len(key)): skipList[len(buffer)] = findSuffixPosition(key[len(key)-1-i], buffer, key) buffer = key[len(key)-1-i] + buffer return skipList # Actual Search Algorithm def BMSearch(haystack, needle): goodSuffix = generateSuffixShift(needle) badChar = generateBadCharShift(needle) i = 0 while i < len(haystack)-len(needle)+1: j = len(needle) while j > 0 and needle[j-1] == haystack[i+j-1]: j -= 1 if j > 0: badCharShift = badChar.get(haystack[i+j-1], len(needle)) goodSuffixShift = goodSuffix[len(needle)-j] if badCharShift > goodSuffixShift: i += badCharShift else: i += goodSuffixShift else: return i return -1 if __name__ == "__main__": block = "This is a simple example" print "This is an example search on the string \"", block, "\"." print "ple :", BMSearch(block, "ple ") print "example :", BMSearch(block, "example") print "simple :", BMSearch(block, "simple") print " imple :", BMSearch(block, " imple")

### joruzani commented Dec 10, 2015

 find a problem with you implementation... and I don't know what is wrong. trying these as the haystack: 671111091113298117115999711432117110973297103117106973210111032117110321129710697114 and these as the needle: 11110911132 and it works... but remove the last 2 numbers from that needle, and it doesn’t ( as in 111109111) I haven’t found another case in which something similar happens.