Skip to content

Instantly share code, notes, and snippets.

@zurk
Created November 15, 2018 20:09
Show Gist options
  • Save zurk/b37f630fa8232732f1e9fe08ef27b767 to your computer and use it in GitHub Desktop.
Save zurk/b37f630fa8232732f1e9fe08ef27b767 to your computer and use it in GitHub Desktop.
Liechtenstein distance calculation with edit sequence. Align 3 sequences.
class Action:
equal = 0
replace = 1
insert = 2
delete = 3
Action.names = {i: name for name, i in Action.__dict__.items() if name[0] != "_"}
def align3(seq1, seq2, seq3):
aseq1, aseq2 = align(seq1, seq2)
bseq2, bseq3 = align(seq2, seq3)
cseq1, cseq3 = align(aseq1, bseq3)
dseq2, dseq3 = align(bseq2, cseq3)
return cseq1, dseq2, dseq3
def align(seq1, seq2):
for start, s in enumerate(zip(seq1, seq2)):
if s[0] != s[1]:
break
for end, s in enumerate(zip(seq1[::-1], seq2[::-1])):
if s[0] != s[1]:
break
seq1_mid = seq1[start:end]
seq2_mid = seq2[start:end]
op_codes = levenshtein(seq1_mid, seq2_mid, with_equal=True)
res1 = []
res2 = []
for action, i, j in op_codes:
if action == "equal" or action == "replace":
res1.append(seq1_mid[i])
res2.append(seq2_mid[j])
if action == "insert":
res1.append(EMPTY)
res2.append(seq2_mid[j])
if action == "delete":
res1.append(seq1_mid[i])
res2.append(EMPTY)
return seq1[:start] + "".join(res1) + seq1[end:], seq2[:start] + "".join(res2) + seq2[end:]
def get_opcodes(actions: numpy.ndarray, with_equal):
"""
Build the best path opcodes for given a actions matrix.
"""
i = actions.shape[0] - 1
j = actions.shape[1] - 1
opcodes = []
# from matplotlib import pyplot as plt
# actions2 = actions / 255.0
# actions2[actions2 == 1] = float("nan")
# plt.matshow(actions2)
# plt.show()
while i != 0 or j != 0:
action = actions[i][j]
if with_equal or action != Action.equal:
opcodes.append((Action.names[action], i-1, j-1))
if action == Action.equal or action == Action.replace:
i -= 1
j -= 1
elif action == Action.insert:
j -= 1
else:
i -= 1
return opcodes[::-1]
def levenshtein(seq1, seq2, with_equal=False):
"""
How to transform seq1 to seq2
:param seq1:
:param seq2:
:return:
"""
max_distance = 500
if seq1 == seq2:
return [(Action.names[Action.equal], i, i) for i in range(len(seq1))] if with_equal else []
v0 = numpy.arange(start=0, stop=len(seq2) + 1, step=1, dtype=numpy.uint32)
v1 = numpy.empty(shape=(len(seq2) + 1,), dtype=numpy.uint32)
actions = numpy.zeros(shape=(len(seq1) + 1, len(seq2) + 1), dtype=numpy.uint8)
# default action is equal
actions[0, :] = Action.insert
actions[:, 0] = Action.delete
for i, seq1_i in enumerate(seq1):
v1[0] = i
for j, seq2_j in enumerate(seq2):
if abs(i - j) > max_distance:
# Optimization
v1[j + 1] = max_distance + 1
continue
cost = 0 if seq1_i == seq2_j else 1
replace_cost = v0[j] + cost
insert_cost = v1[j] + 1
delete_cost = v0[j+1] + 1
if replace_cost <= insert_cost and replace_cost <= delete_cost:
v1[j+1] = replace_cost
if cost:
actions[i+1, j+1] = Action.replace
elif insert_cost <= delete_cost:
v1[j+1] = insert_cost
actions[i+1, j+1] = Action.insert
else:
v1[j+1] = delete_cost
actions[i+1, j+1] = Action.delete
v0, v1 = v1, v0
return get_opcodes(actions, with_equal=with_equal)
def metrics(init, correct, model_out):
init, correct, model_out = align3(init, correct, model_out)
detected_bad_change = 0
detected_good_change = 0
misdetection = 0
undetected = 0
for init_c, correct_c, model_out_c in zip(init, correct, model_out):
if init_c == correct_c == model_out_c:
continue
elif init_c == correct_c and init_c != model_out_c:
misdetection += 1
elif init_c == model_out_c and init_c != correct_c:
undetected += 1
elif correct_c == model_out_c and init_c != correct_c:
detected_good_change += 1
else:
detected_bad_change += 1
return misdetection, undetected, detected_bad_change, detected_good_change
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment