Skip to content

Instantly share code, notes, and snippets.

@amanuel2
Last active September 24, 2023 18:36
Show Gist options
  • Save amanuel2/d20e8c9e7000c5ce5a9ccb4cb6c4a949 to your computer and use it in GitHub Desktop.
Save amanuel2/d20e8c9e7000c5ce5a9ccb4cb6c4a949 to your computer and use it in GitHub Desktop.
################################ Robin Karp Rolling Hash (string searching)
class Solution:
# Rabin-Karp algorithm
# In the hashing algo, we can have hash collision and when we have hash collision, 2
# different words can have the same hash. So we need to check that 2 words with same hash
# are really the same to avoid false positive.
# This check frequency can be reduced by taking multiple hashes. In case we take 2 hashes
# the probability of false negatives can go down to 10^-9. So we need to check only once.
# Still the time complexity is O(N + M)
# Can be made more optimum for worst case by using KMP.
def strStr(self, haystack: str, needle: str) -> int:
def charToIndex(char): return ord(char) - ord('a')
def hashString(string):
base = 26
modulo = 10**9 + 7
hash = 0
# In this case the first char gets the highest weight of base^(n-1)
for char in string:
hash = (hash * base + charToIndex(char)) % modulo
return hash
def updateHash(hash, removeLetter, addLetter, windowLen):
base = 26
modulo = 10**9 + 7
maxWeight = pow(base, windowLen-1, modulo) # base ** (windowLen - 1)
return (
# Letter getting removed was first and had base ^ n - 1 weight
base * (hash - charToIndex(removeLetter) * maxWeight)
+ charToIndex(addLetter) # New letter add will have 1 weight
) % modulo
if len(needle) > len(haystack): return -1
if needle == haystack: return 0
# Find hash for the first m characters for the initial comparision when i = 0
hashNeedle = hashString(needle)
hash = hashString(haystack[:len(needle)])
# O(n+m)
for i in range(len(haystack) - len(needle) + 1):
# If the hash is same as needle then confrim if the string in current window matches
# needles to avoid false positives (diff strings having same hash due to collision).
if hash == hashNeedle:
if needle == haystack[i: i + len(needle)]: return i
# Update hash:
# When we reach the last window we can't update hash anymore
if i != len(haystack) - len(needle):
# Remove first letter from hash and add letter next to current window in hash
hash = updateHash(hash, haystack[i], haystack[i + len(needle)], len(needle))
return -1
class Solution:
def longestDupSubstring(self, S: str) -> str:
# https://www.youtube.com/watch?v=BfUejqd07yo&t=357s
mod = 1_000_000_007
def fn(k):
"""Return duplicated substring of length k."""
p = pow(26, k, mod)
hs = 0
seen = defaultdict(set)
for i, ch in enumerate(s):
hs = (26*hs + ord(ch) - 97) % mod
if i >= k: hs = (hs - (ord(s[i-k])-97)*p) % mod # rolling hash
if i+1 >= k:
if hs in seen and s[i+1-k:i+1] in seen[hs]: return s[i+1-k:i+1] # resolve hash collision
seen[hs].add(s[i+1-k:i+1])
return ""
lo, hi = 0, len(s)-1
while lo < hi:
mid = lo + hi + 1 >> 1
if fn(mid): lo = mid
else: hi = mid - 1
return fn(lo)
################################ KMP Algorithm (string searching)
class Solution:
def strStr(self, haystack: str, needle: str) -> int:
# KMP Algorithm: build a table to keep track of patterns so we don't have to restart
def build_pattern(substring):
pattern = [0 for _ in substring]
# two pointers one at the first, next position
j = 0
for i in range(1, len(substring)):
while j > 0 and substring[i] != substring[j]:
j = pattern[j - 1]
if substring[i] == substring[j]:
j += 1
pattern[i] = j
return pattern
def does_match(haystack, needle, pattern):
i = j = 0
while i < len(haystack):
if haystack[i] == needle[j]:
i += 1
j += 1
if j == len(needle):
return i - j # Found a match
else:
if j > 0:
j = pattern[j - 1]
else:
i += 1
return -1
pattern = build_pattern(needle)
# print(pattern)
return does_match(haystack, needle, pattern)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment