Skip to content

Instantly share code, notes, and snippets.

@ymoslem
Last active February 9, 2020 09:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ymoslem/70c83345efb9c3aba193aad7102b3016 to your computer and use it in GitHub Desktop.
Save ymoslem/70c83345efb9c3aba193aad7102b3016 to your computer and use it in GitHub Desktop.
Compute BLEU with arguments
# Corpus BLEU with arguments
# Run this file from CMD/Terminal
# Example Command: python3 compute-bleu-args.py test_file_name.txt mt_file_name.txt
import sys
import sacrebleu
from sacremoses import MosesDetokenizer
md = MosesDetokenizer(lang='en')
target_test = sys.argv[1] # Test file argument
target_pred = sys.argv[2] # MTed file argument
# Open the test dataset human translation file and detokenize the references
refs = []
with open(target_test) as test:
for line in test:
line = line.strip().split()
line = md.detokenize(line)
refs.append(line)
print("Reference 1st sentence:", refs[0])
refs = [refs] # Yes, it is a list of list(s) as required by sacreBLEU
# Open the translation file by the NMT model and detokenize the predictions
preds = []
with open(target_pred) as pred:
for line in pred:
line = line.strip().split()
line = md.detokenize(line)
preds.append(line)
print("MTed 1st sentence:", preds[0])
# Calculate and print the BLEU score
bleu = sacrebleu.corpus_bleu(preds, refs)
print("BLEU: ", bleu.score)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment