Skip to content

Instantly share code, notes, and snippets.

@codekitchen
Created October 17, 2016 15:48
Show Gist options
  • Save codekitchen/9028cd26b2e73615f5f295507f46994c to your computer and use it in GitHub Desktop.
Save codekitchen/9028cd26b2e73615f5f295507f46994c to your computer and use it in GitHub Desktop.
> ./wer.py "stats - transcript.txt" "stats - google.txt"
wer distance: 442
accuracy: 70.75%
#!/usr/bin/env python
import sys, getopt
def wer(r, h):
"""
Calculation of WER with Levenshtein distance.
Works only for iterables up to 254 elements (uint8).
O(nm) time ans space complexity.
Parameters
----------
r : list
h : list
Returns
-------
int
Examples
--------
>>> wer("who is there".split(), "is there".split())
1
>>> wer("who is there".split(), "".split())
3
>>> wer("".split(), "who is there".split())
3
"""
# initialisation
import numpy
d = numpy.zeros((len(r)+1)*(len(h)+1), dtype=numpy.uint32)
d = d.reshape((len(r)+1, len(h)+1))
for i in range(len(r)+1):
for j in range(len(h)+1):
if i == 0:
d[0][j] = j
elif j == 0:
d[i][0] = i
# computation
for i in range(1, len(r)+1):
for j in range(1, len(h)+1):
if r[i-1] == h[j-1]:
d[i][j] = d[i-1][j-1]
else:
substitution = d[i-1][j-1] + 1
insertion = d[i][j-1] + 1
deletion = d[i-1][j] + 1
d[i][j] = min(substitution, insertion, deletion)
return d[len(r)][len(h)]
if __name__ == "__main__":
origfname = ''
testfname = ''
opts, args = getopt.getopt(sys.argv[1:], "")
origfname = args[0]
testfname = args[1]
orig = open(origfname).read().split()
test = open(testfname).read().split()
distance = wer(orig, test)
accuracy = (float(len(orig)) - float(distance)) / float(len(orig)) * 100
print "wer distance: %d" % distance
print "accuracy: %.2f%%" % accuracy
# import doctest
# doctest.testmod()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment