Created
July 31, 2022 04:49
-
-
Save shhshn/344034fee97416788bd6f911add2018d to your computer and use it in GitHub Desktop.
An implementation of BLEU [Papineni et al. 2002]: reproduces the single-reference mode of multi-bleu.perl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# | |
# A BLEU calculator by Sho Hoshino (hoshino@nii.ac.jp) | |
# This script outputs BLEU-4 that should be identical to multi-bleu.perl, | |
# by sentence-level or document-level | |
# | |
# Kishore Papineni, Salim Roukos, Todd Ward, and Wei-Jing Zhu | |
# BLEU: a method for automatic evaluation of machine translation, ACL 2002 | |
# | |
# 2013/11/18 Added citation | |
# 2013/10/03 Initial Release | |
import sys | |
reload(sys) | |
sys.setdefaultencoding('utf-8') | |
import math | |
def main(): | |
if len(sys.argv) < 2: | |
print "Usage: %s ref [1:sentence-level BLEU] <hyp" % sys.argv[0] | |
return | |
sentencelv = True if len(sys.argv) >= 3 and sys.argv[2] == "1" else False | |
m = 4 # default: BLEU-4 | |
tb = [[0, 0]] * m | |
scores = [0] * m | |
ref_len = 0 | |
hyp_len = 0 | |
try: | |
file = open(sys.argv[1]) | |
for ref, hyp in zip(file, sys.stdin): | |
if sentencelv: | |
result(*iteration(tb, scores, ref_len, hyp_len, ref, hyp)) | |
continue | |
tb, scores, ref_len, hyp_len = iteration(tb, scores, ref_len, hyp_len, ref, hyp) | |
except Exception, e: | |
sys.stderr.write("%s\n" % e) | |
return | |
finally: | |
if file: | |
file.close() | |
if not sentencelv: | |
result(True, scores, ref_len, hyp_len) | |
def iteration(tb, scores, ref_len, hyp_len, ref, hyp): | |
m = len(scores) | |
ref = unicode(ref).rstrip().split() | |
hyp = unicode(hyp).rstrip().split() | |
ref_len += len(ref) | |
hyp_len += len(hyp) | |
for n in xrange(0, m): | |
nhyp = ngrams(hyp, n + 1) | |
nref = ngrams(ref, n + 1) | |
bottom = len(nhyp) | |
top = 0 | |
for ngram in nhyp: | |
if ngram not in nref: | |
continue | |
top += 1 | |
nref.remove(ngram) | |
tb[n] = [tb[n][0] + top, tb[n][1] + bottom] | |
scores[n] = 100.0 * tb[n][0] / tb[n][1] if tb[n][1] > 0 else 0 | |
return tb, scores, ref_len, hyp_len | |
def result(documentlv, scores, ref_len, hyp_len): | |
m = len(scores) | |
bleu = math.exp(sum([math.log(scores[i]) for i in xrange(0, m) if scores[i] > 0]) / m) | |
for i in xrange(0, m): | |
if scores[i] == 0: | |
bleu = 0 | |
bp = math.exp(1 - (float(ref_len) / hyp_len)) if hyp_len != 0 and hyp_len < ref_len else 1 | |
if documentlv is not True: | |
print "%.2f" % (bleu * bp) | |
return | |
print "BLEU = %.2f," % (bleu * bp), "/".join(["%.1f" % x for x in scores]), | |
print "(BP=%.3f, ratio=%.3f, hyp_len=%d, ref_len=%d)" % (bp, (float(hyp_len) / ref_len), hyp_len, ref_len), | |
def ngrams(input_list, n): | |
return zip(*[input_list[i:] for i in range(n)]) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment