Created
January 30, 2023 13:41
-
-
Save ronzhin-dmitry/8750bceea13fa12543a2b7320d543beb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def string_hash(input_string, p): | |
#here we implement a simple hash function. It is made in such a way that | |
#this hash function is easy to recalc when moving right on the string (see search function) | |
#p is some modulo, usually large prime for good hashing | |
result = ord(input_string[0]) | |
for i in range(1,len(input_string)): | |
result *= p | |
result += ord(input_string[i]) | |
return result % (2**64) | |
def search_Rabin_Karp (needle, haystack): | |
result = [] | |
p = 524287 #this is just a large prime number | |
n = len(needle) | |
m = len(haystack) | |
hash_needle = string_hash(needle[:n], p) | |
hash_haystack = string_hash(haystack[:n], p) | |
two_pow = (2 ** 64) | |
p_pow = (p ** (n-1)) % two_pow | |
for i in range(m - n + 1): | |
if hash_needle == hash_haystack: #we check only hashes of needle and some part of haystack | |
equal = True | |
for j in range(len(needle)): | |
if needle[j] != haystack[i+j]: #if hashes are equal - we recheck symbols exactly | |
equal = False | |
break | |
if equal: | |
result.append(i) | |
if i + n < m: #and here we recalc hash. due to the hashing function type it is easy to compute - we just 'roll' it | |
hash_haystack = (p * (hash_haystack - p_pow * ord(haystack[i])) + ord(haystack[i + n])) % two_pow | |
return result |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment