Skip to content

Instantly share code, notes, and snippets.

@odashi
Last active September 20, 2019 06:46
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save odashi/fb4ffa936817551a7209 to your computer and use it in GitHub Desktop.
Save odashi/fb4ffa936817551a7209 to your computer and use it in GitHub Desktop.
BLEU calculator
# usage (single sentence):
# ref = ['This', 'is', 'a', 'pen', '.']
# hyp = ['There', 'is', 'a', 'pen', '.']
# stats = get_bleu_stats(ref, hyp)
# bleu = calculate_bleu(stats) # => 0.668740
#
# usage (multiple sentences):
# stats = defaultdict(int)
# for ref, hyp in zip(refs, hyps):
# for k, v in get_bleu_stats(ref, hyp).items():
# stats[k] += v
# bleu = calculate_bleu(stats)
import math
from collections import defaultdict
def get_bleu_stats(ref, hyp, N=4):
stats = defaultdict(int, {'rl': len(ref), 'hl': len(hyp)})
N = len(hyp) if len(hyp) < N else N
for n in range(N):
matched = 0
possible = defaultdict(int)
for k in range(len(ref) - n):
possible[tuple(ref[k : k + n + 1])] += 1
for k in range(len(hyp) - n):
ngram = tuple(hyp[k : k + n + 1])
if possible[ngram] > 0:
possible[ngram] -= 1
matched += 1
stats['d' + str(n + 1)] = len(hyp) - n
stats['n' + str(n + 1)] = matched
return stats
def calculate_bleu(stats, N=4):
np = 0.0
for n in range(N):
nn = stats['n' + str(n + 1)]
if nn == 0:
return 0.0
dd = stats['d' + str(n + 1)]
np += math.log(nn) - math.log(dd)
bp = 1.0 - stats['rl'] / stats['hl']
if bp > 0.0: bp = 0.0
return math.exp(np / N + bp)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment