Skip to content

Instantly share code, notes, and snippets.

@shhshn
Created July 31, 2022 04:49
Show Gist options
  • Save shhshn/344034fee97416788bd6f911add2018d to your computer and use it in GitHub Desktop.
Save shhshn/344034fee97416788bd6f911add2018d to your computer and use it in GitHub Desktop.
An implementation of BLEU [Papineni et al. 2002]: reproduces the single-reference mode of multi-bleu.perl
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# A BLEU calculator by Sho Hoshino (hoshino@nii.ac.jp)
# This script outputs BLEU-4 that should be identical to multi-bleu.perl,
# by sentence-level or document-level
#
# Kishore Papineni, Salim Roukos, Todd Ward, and Wei-Jing Zhu
# BLEU: a method for automatic evaluation of machine translation, ACL 2002
#
# 2013/11/18 Added citation
# 2013/10/03 Initial Release
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
import math
def main():
if len(sys.argv) < 2:
print "Usage: %s ref [1:sentence-level BLEU] <hyp" % sys.argv[0]
return
sentencelv = True if len(sys.argv) >= 3 and sys.argv[2] == "1" else False
m = 4 # default: BLEU-4
tb = [[0, 0]] * m
scores = [0] * m
ref_len = 0
hyp_len = 0
try:
file = open(sys.argv[1])
for ref, hyp in zip(file, sys.stdin):
if sentencelv:
result(*iteration(tb, scores, ref_len, hyp_len, ref, hyp))
continue
tb, scores, ref_len, hyp_len = iteration(tb, scores, ref_len, hyp_len, ref, hyp)
except Exception, e:
sys.stderr.write("%s\n" % e)
return
finally:
if file:
file.close()
if not sentencelv:
result(True, scores, ref_len, hyp_len)
def iteration(tb, scores, ref_len, hyp_len, ref, hyp):
m = len(scores)
ref = unicode(ref).rstrip().split()
hyp = unicode(hyp).rstrip().split()
ref_len += len(ref)
hyp_len += len(hyp)
for n in xrange(0, m):
nhyp = ngrams(hyp, n + 1)
nref = ngrams(ref, n + 1)
bottom = len(nhyp)
top = 0
for ngram in nhyp:
if ngram not in nref:
continue
top += 1
nref.remove(ngram)
tb[n] = [tb[n][0] + top, tb[n][1] + bottom]
scores[n] = 100.0 * tb[n][0] / tb[n][1] if tb[n][1] > 0 else 0
return tb, scores, ref_len, hyp_len
def result(documentlv, scores, ref_len, hyp_len):
m = len(scores)
bleu = math.exp(sum([math.log(scores[i]) for i in xrange(0, m) if scores[i] > 0]) / m)
for i in xrange(0, m):
if scores[i] == 0:
bleu = 0
bp = math.exp(1 - (float(ref_len) / hyp_len)) if hyp_len != 0 and hyp_len < ref_len else 1
if documentlv is not True:
print "%.2f" % (bleu * bp)
return
print "BLEU = %.2f," % (bleu * bp), "/".join(["%.1f" % x for x in scores]),
print "(BP=%.3f, ratio=%.3f, hyp_len=%d, ref_len=%d)" % (bp, (float(hyp_len) / ref_len), hyp_len, ref_len),
def ngrams(input_list, n):
return zip(*[input_list[i:] for i in range(n)])
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment