Skip to content

Instantly share code, notes, and snippets.

@sysopfb
Created August 24, 2021 15:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sysopfb/80ba7accb7d26d9a38aee8c4df785984 to your computer and use it in GitHub Desktop.
Save sysopfb/80ba7accb7d26d9a38aee8c4df785984 to your computer and use it in GitHub Desktop.
Sift4 in python
"""
Based on: https://gist.github.com/lbenedix/8275d01c2289a7a20d2c6c27ee8ae68e
Moved an if block and added a double empty string check for input validation
"""
def sift4_simple(s1, s2, max_offset=5):
"""
:param s1: string to compare
:param s2: string to compare
:param max_offset: number of characters to search for matching letters
:return: edit distance
"""
if len(s1) == 0:
if len(s2) == 0:
return 0
return len(s2)
elif len(s2) == 0:
return len(s1)
l1 = len(s1)
l2 = len(s2)
c1 = 0 # cursor for s1
c2 = 0 # cursor for s1
lcss = 0 # longest common subsequence
local_cs = 0 # local common subsequence
while c1 < l1 and c2 < l2:
if s1[c1] == s2[c2]:
local_cs += 1
else:
lcss += local_cs
local_cs = 0
if c1 != c2:
c1 = c2 = max(c1, c2)
for i in range(max_offset):
if c1 + i >= l1 or c2 + i >= l2:
break
if len(s1) > c1 + i and s1[c1 + i] == s2[c2]:
c1 += i
local_cs += 1
break
if len(s2) > c1 + i and s1[c1] == s2[c2 + i]:
c2 += i
local_cs += 1
break
c1 += 1
c2 += 1
lcss += local_cs
return round(max(l1, l2) - lcss)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment