This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
#OpenNMT-py GUI Alpha version by Yasmin Moslem | |
#Contact: yasmin {aatt} machinetranslation.io | |
#Built on OpenNMT-py v. 0.9.1 "translate.py" | |
from __future__ import unicode_literals | |
from itertools import repeat |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sacrebleu | |
from sacremoses import MosesDetokenizer | |
md = MosesDetokenizer(lang='en') | |
# Open the test dataset human translation file and detokenize the references | |
refs = [] | |
with open("target.test") as test: | |
for line in test: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# BLEU for segment by segment | |
import sacrebleu | |
from sacremoses import MosesDetokenizer | |
md = MosesDetokenizer(lang='en') | |
# Open the test dataset human translation file and detokenize the references | |
refs = [] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# BLEU for segment by segment with arguments | |
# Run this file from CMD/Terminal | |
# Example Command: python3 compute-bleu-sentence-args.py test_file_name.txt mt_file_name.txt | |
import sys | |
import sacrebleu | |
from sacremoses import MosesDetokenizer | |
md = MosesDetokenizer(lang='en') | |
target_test = sys.argv[1] # Test file argument |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Corpus BLEU with arguments | |
# Run this file from CMD/Terminal | |
# Example Command: python3 compute-bleu-args.py test_file_name.txt mt_file_name.txt | |
import sys | |
import sacrebleu | |
from sacremoses import MosesDetokenizer | |
md = MosesDetokenizer(lang='en') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Sentence WER | |
# WER for segment by segment with arguments | |
# Run this file from CMD/Terminal | |
# Example Command: python3 sentence-wer.py test_file_name.txt mt_file_name.txt | |
import sys | |
from jiwer import wer | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Corpus WER | |
# WER score for the whole corpus | |
# Run this file from CMD/Terminal | |
# Example Command: python3 corpus-wer.py test_file_name.txt mt_file_name.txt | |
import sys | |
from jiwer import wer | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Sentence METEOR | |
# METEOR mainly works on sentence evaluation rather than corpus evaluation | |
# Run this file from CMD/Terminal | |
# Example Command: python3 sentence-meteor.py test_file_name.txt mt_file_name.txt | |
import sys | |
from nltk.translate.meteor_score import meteor_score | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
# Subwording the source file only | |
# Command: python3 subword.py <source_model_file> <source_pred_file> | |
# Note: If you did not train the model with start and end tokens remove ['<s>'] and ['</s>'] from line #30 | |
import sys | |
import sentencepiece as spm |
OlderNewer