Compute BLEU Score for Machine Translation
import sacrebleu
from sacremoses import MosesDetokenizer
md = MosesDetokenizer(lang='en')
# Open the test dataset human translation file and detokenize the references
refs = []
with open("target.test") as test:
for line in test:
line = line.strip().split()
line = md.detokenize(line)
print("Reference 1st sentence:", refs[0])
refs = [refs] # Yes, it is a list of list(s) as required by sacreBLEU
# Open the translation file by the NMT model and detokenize the predictions
preds = []
with open("target.pred") as pred:
for line in pred:
line = line.strip().split()
line = md.detokenize(line)
print("MTed 1st sentence:", preds[0])
# Calculate and print the BLEU score
bleu = sacrebleu.corpus_bleu(preds, refs)
