Skip to content

Instantly share code, notes, and snippets.

@okurka12
Created January 23, 2024 00:16
Show Gist options
  • Save okurka12/230e57df5e5a53bfa20ed51a0b102eae to your computer and use it in GitHub Desktop.
Save okurka12/230e57df5e5a53bfa20ed51a0b102eae to your computer and use it in GitHub Desktop.
Generation of the prefix table for the Knuth-Morris-Pratt algorithm
#
# part of the knuth-morris-pratt algorithm:
# generation of the prefix table/fail vector
#
# víteček 🔥💯 (okurka12), January 2024
#
def ppre(s: str) -> set[str]:
"""returns a set of all proper prefixes of `s`"""
output = set()
for i in range(1, len(s)):
output.add(s[:i])
return output
def psuf(s: str) -> set[str]:
"""returns a set of all proper suffixes of `s`"""
output = set()
for i in range(1, len(s)):
output.add(s[i:])
return output
def vec_fail(s: str) -> list[int]:
"""returns a fail vector of `s`"""
# every fail vector has this
output = [-1, 0]
# iterate through increasingly large prefixes of `s`
# example: s="ABABC..." -> first "AB", then "ABA", "ABAB", "ABABC", ...
for i in range(2, len(s)):
# current prefix of length i
current = s[:i]
# intersection of proper suffixes and prefixes
inters = ppre(current).intersection(psuf(current))
# length of the greatest common suffix and prefix
val = max([len(s) for s in inters]) if len(inters) > 0 else 0
output.append(val)
return output
def printpresuf(s: str) -> None:
"""prints prefix table/fail vector of `s`"""
print(s)
print(f" - fail vector: {vec_fail(s)}")
def main() -> None:
printpresuf("ABABCBABA")
printpresuf("BARBAROSSA")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment