Skip to content

Instantly share code, notes, and snippets.

@m00nlight
Last active March 7, 2024 09:23
Show Gist options
  • Star 41 You must be signed in to star a gist
  • Fork 11 You must be signed in to fork a gist
  • Save m00nlight/daa6786cc503fde12a77 to your computer and use it in GitHub Desktop.
Save m00nlight/daa6786cc503fde12a77 to your computer and use it in GitHub Desktop.
Python KMP algorithm
class KMP:
def partial(self, pattern):
""" Calculate partial match table: String -> [Int]"""
ret = [0]
for i in range(1, len(pattern)):
j = ret[i - 1]
while j > 0 and pattern[j] != pattern[i]:
j = ret[j - 1]
ret.append(j + 1 if pattern[j] == pattern[i] else j)
return ret
def search(self, T, P):
"""
KMP search main algorithm: String -> String -> [Int]
Return all the matching position of pattern string P in T
"""
partial, ret, j = self.partial(P), [], 0
for i in range(len(T)):
while j > 0 and T[i] != P[j]:
j = partial[j - 1]
if T[i] == P[j]: j += 1
if j == len(P):
ret.append(i - (j - 1))
j = partial[j - 1]
return ret
def test():
p1 = "aa"
t1 = "aaaaaaaa"
kmp = KMP()
assert(kmp.search(t1, p1) == [0, 1, 2, 3, 4, 5, 6])
p2 = "abc"
t2 = "abdabeabfabc"
assert(kmp.search(t2, p2) == [9])
p3 = "aab"
t3 = "aaabaacbaab"
assert(kmp.search(t3, p3) == [1, 8])
p4 = "11"
t4 = "111"
assert(kmp.search(t4, p4) == [0, 1])
print("all test pass")
@jianhui-ben
Copy link

Thank you @m00nlight, super helpful! I wrote up my own version based on what I learn, if anyone sees anything wrong or anything that could be improved, please let me know!

class KMP:
    
    def partial(self, s):
        """ 
        Calculate partial match table: String -> [Int]
        
        # arrarra -> [0, 0, 0, 1, 2, 3, 4]
        # amar ->    [0, 0, 1, 0]
        # aaoiaa ->  [0, 1, 0, 0, 1, 2]
        """
        res = [0]
        for x in s[1:]:
            check_indx = res[-1]
            if s[check_indx] == x:
                res += [check_indx + 1]
            else:
                res += [0]
        
        return res
        
        
    def search(self, T, P):
        """ 
        KMP search main algorithm: String, String -> [Int] 
        Return all the matching position of pattern string P in S
        
        sample run :

        abcxabxdabxabcdabcdabcyabcdabcy     <~~ text T
        abcdabcy                            <~~ pattern P
        00001230                            <~~ mapping array
        ^^^^                                <~~ current comparison
        abcxabxdabxabcdabcdabcyabcdabcy
           abcdabcy
           00001230
           ^
        abcxabcdabxabcdabcdabcyabcdabcy
            abcdabcy
            00001230
            ^^^^^^^
        abcxabcdabxabcdabcdabcyabcdabcy
                abcdabcy
                00001230
                  ^

        abcxabcdabxabcdabcdabcyabcdabcy
                   abcdabcy
                   00001230
                   ^^^^^^^^

        abcxabcdabxabcdabcdabcyabcdabcy
                       abcdabcy
                       00001230
                          ^^^^^
                          
        abcxabcdabxabcdabcdabcyabcdabcy
                       abcdabcy
                       00001230
                              ^

        """
        mapping = self.partial(P)
        result = []
        
        p_pointer = 0
        t_pointer = 0
        
        while t_pointer < len(T):
            
            if P[p_pointer] == T[t_pointer]:
                p_pointer += 1
                t_pointer += 1 
                
                if p_pointer >= len(P):
                    result += [t_pointer-len(P)]
                    p_pointer = 0 if p_pointer == 0 else mapping[p_pointer-1]
                
            else:
                t_pointer += 1 if p_pointer == 0 else 0
                p_pointer = 0 if p_pointer == 0 else mapping[p_pointer-1]
             
        return result

I think the for loop in the partial() function is incorrect. There should be a while loop inside the for loop. A counter example for your partial function would be input pattern 'a a b a a a c'

@PMLP-novo
Copy link

Fails on
p5 = "ABC ABCDAB ABCDABCDABDE"
t5 = "ABCDABD"
assert (kmp.search(t5, p5) == [15])

@honglu2875
Copy link

Fails on p5 = "ABC ABCDAB ABCDABCDABDE" t5 = "ABCDABD" assert (kmp.search(t5, p5) == [15])

you need to switch t5 and p5 (t is the target string)...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment