Skip to content

Instantly share code, notes, and snippets.

@thyeem
Last active October 7, 2023 13:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save thyeem/84bb24612202bfe0583966c468cf2cdd to your computer and use it in GitHub Desktop.
Save thyeem/84bb24612202bfe0583966c468cf2cdd to your computer and use it in GitHub Desktop.
A simple BLEU implementation
# BLEU implementation don't have to be verbose.
# Simple, but robust and not error-prone (tested)
# Introduced 'epsilon' to avoid extreme values due to zero precisions. Adjust it.
#
# >>> references = ["BLEU implementation don't have to be verbose".split()]
# >>> candidate = "Robust for all almost edge cases and not error-prone".split()
# >>> bleu(references, candidate)
import numpy as np
def bleu(refs, x, weights=(0.25, 0.25, 0.25, 0.25), epsilon=1e-12):
"""Calculate BLEU score using refs(references) and x(candidate):
BLEU = brevity-penalty * exp(sum_i(weights[i] * log(precisions[i])))
"""
def count_ng(tokens, n):
o = {}
for ng in [tuple(tokens[i : i + n]) for i in range(len(tokens) - n + 1)]:
o[ng] = o.get(ng, 0) + 1
return o
def precision(refs, x, n):
rs = [count_ng(ref, n) for ref in refs]
c = count_ng(x, n)
return sum([min(c[ng], max(r.get(ng, 0) for r in rs)) for ng in c]) / (
sum(c.values()) or 1
)
def penalty(refs, x): # brevity penalty
c = len(x)
r = min(len(r) - c for r in refs)
r = -1 if r < 0 else r + c
return 0 if c == 0 else 1 if c > r else np.exp(1 - r / c)
ps = [precision(refs, x, n) for n in range(1, len(weights) + 1)]
if sum(ps): # clipping precision to epsilon (if any matches)
ps = [max(p, epsilon) for p in ps]
sigma = np.sum([w * np.log(p) for w, p in zip(weights[: len(x)], ps)])
return penalty(refs, x) * np.exp(sigma)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment