codekitchen/usage

## usage
> ./wer.py "stats - transcript.txt" "stats - google.txt"
wer distance: 442
accuracy: 70.75%

## wer.py
#!/usr/bin/env python

import sys, getopt

def wer(r, h):
    """
    Calculation of WER with Levenshtein distance.

    Works only for iterables up to 254 elements (uint8).
    O(nm) time ans space complexity.

    Parameters
    ----------
    r : list
    h : list

    Returns
    -------
    int

    Examples
    --------
    >>> wer("who is there".split(), "is there".split())
    1
    >>> wer("who is there".split(), "".split())
    3
    >>> wer("".split(), "who is there".split())
    3
    """
    # initialisation
    import numpy
    d = numpy.zeros((len(r)+1)*(len(h)+1), dtype=numpy.uint32)
    d = d.reshape((len(r)+1, len(h)+1))
    for i in range(len(r)+1):
        for j in range(len(h)+1):
            if i == 0:
                d[0][j] = j
            elif j == 0:
                d[i][0] = i

    # computation
    for i in range(1, len(r)+1):
        for j in range(1, len(h)+1):
            if r[i-1] == h[j-1]:
                d[i][j] = d[i-1][j-1]
            else:
                substitution = d[i-1][j-1] + 1
                insertion    = d[i][j-1] + 1
                deletion     = d[i-1][j] + 1
                d[i][j] = min(substitution, insertion, deletion)

    return d[len(r)][len(h)]

if __name__ == "__main__":
    origfname = ''
    testfname = ''
    opts, args = getopt.getopt(sys.argv[1:], "")
    origfname = args[0]
    testfname = args[1]
    orig = open(origfname).read().split()
    test = open(testfname).read().split()
    distance = wer(orig, test)
    accuracy = (float(len(orig)) - float(distance)) / float(len(orig)) * 100
    print "wer distance: %d" % distance
    print "accuracy: %.2f%%" % accuracy
    # import doctest
    # doctest.testmod()
	> ./wer.py "stats - transcript.txt" "stats - google.txt"
	wer distance: 442
	accuracy: 70.75%
	#!/usr/bin/env python

	import sys, getopt

	def wer(r, h):
	"""
	Calculation of WER with Levenshtein distance.

	Works only for iterables up to 254 elements (uint8).
	O(nm) time ans space complexity.

	Parameters
	----------
	r : list
	h : list

	Returns
	-------
	int

	Examples
	--------
	>>> wer("who is there".split(), "is there".split())
	1
	>>> wer("who is there".split(), "".split())
	3
	>>> wer("".split(), "who is there".split())
	3
	"""
	# initialisation
	import numpy
	d = numpy.zeros((len(r)+1)*(len(h)+1), dtype=numpy.uint32)
	d = d.reshape((len(r)+1, len(h)+1))
	for i in range(len(r)+1):
	for j in range(len(h)+1):
	if i == 0:
	d[0][j] = j
	elif j == 0:
	d[i][0] = i

	# computation
	for i in range(1, len(r)+1):
	for j in range(1, len(h)+1):
	if r[i-1] == h[j-1]:
	d[i][j] = d[i-1][j-1]
	else:
	substitution = d[i-1][j-1] + 1
	insertion = d[i][j-1] + 1
	deletion = d[i-1][j] + 1
	d[i][j] = min(substitution, insertion, deletion)

	return d[len(r)][len(h)]

	if __name__ == "__main__":
	origfname = ''
	testfname = ''
	opts, args = getopt.getopt(sys.argv[1:], "")
	origfname = args[0]
	testfname = args[1]
	orig = open(origfname).read().split()
	test = open(testfname).read().split()
	distance = wer(orig, test)
	accuracy = (float(len(orig)) - float(distance)) / float(len(orig)) * 100
	print "wer distance: %d" % distance
	print "accuracy: %.2f%%" % accuracy
	# import doctest
	# doctest.testmod()