Skip to content

Instantly share code, notes, and snippets.

@xylcbd
Last active January 6, 2021 05:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save xylcbd/effb388b185321a313fb72442c8912ee to your computer and use it in GitHub Desktop.
Save xylcbd/effb388b185321a313fb72442c8912ee to your computer and use it in GitHub Desktop.
calc cr & ar, for ocr benchmark.
def custom_min(data):
assert len(data) >= 1
if len(data) <= 0:
return data,0
min_idx = 0
min_data = data[min_idx]
for i,item in enumerate(data):
if item < min_data:
min_data = item
min_idx = i
return min_data,min_idx
def wer(r, h):
# initialisation
import numpy
d = numpy.zeros((len(r)+1)*(len(h)+1), dtype=numpy.uint8)
d = d.reshape((len(r)+1, len(h)+1))
for i in range(len(r)+1):
for j in range(len(h)+1):
if i == 0:
d[0][j] = j
elif j == 0:
d[i][0] = i
# computation
for i in range(1, len(r)+1):
for j in range(1, len(h)+1):
if r[i-1] == h[j-1]:
d[i][j] = d[i-1][j-1]
else:
substitution = d[i-1][j-1] + 1
insertion = d[i][j-1] + 1
deletion = d[i-1][j] + 1
d[i][j],min_idx = custom_min([substitution, insertion, deletion])
dist = d[len(r)][len(h)]
#reverse track
#subsitution, insertion, deletion, total
details_count = [0,0,0,0]
min_y = len(r)
min_x = len(h)
min_value = d[min_y][min_x]
while True:
substitution = d[min_y-1][min_x-1]
insertion = d[min_y][min_x-1]
deletion = d[min_y-1][min_x]
cur_min_value,min_idx = custom_min([substitution, insertion, deletion])
details_count[-1] += 1
if min_idx == 0:
min_y -= 1
min_x -= 1
if cur_min_value < min_value:
details_count[0] += 1
elif min_idx == 1:
min_x -= 1
if cur_min_value < min_value:
details_count[2] += 1
elif min_idx == 2:
min_y -= 1
if cur_min_value < min_value:
details_count[1] += 1
min_value = cur_min_value
if min_x < 0 or min_y < 0:
break
return dist,details_count[0],details_count[1],details_count[2]
def cr_ar(subsitution, insertion, deletion, golden_len):
cr = float(golden_len - deletion - subsitution) / float(golden_len)
ar = float(golden_len - deletion - subsitution - insertion) / float(golden_len)
#can be negative
return cr,ar
def score(golden, predicts):
golden_len = len(golden)
_, subsitution, insertion, deletion = wer(golden, predicts)
return cr_ar(subsitution, insertion, deletion, golden_len)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment