Skip to content

Instantly share code, notes, and snippets.

View ymoslem's full-sized avatar
👩‍🎓

Yasmin Moslem ymoslem

👩‍🎓
View GitHub Profile
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import sentencepiece as spm
import ctranslate2
def tokenize(text, sp_source_model):
sp = spm.SentencePieceProcessor(sp_source_model)
tokens =sp.encode(text, out_type=str)
@ymoslem
ymoslem / CTranslate2-mwe.py
Created April 20, 2021 12:47
CTranslate2 MWE
import ctranslate2
def detokenize(result):
translation = " ".join([t for t in result])
return translation
def tokenize(input_sentence):
tokens = input_sentence.split(" ")
return tokens
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Subwording the source file only
# Command: python3 subword.py <source_model_file> <source_pred_file>
# Note: If you did not train the model with start and end tokens remove ['<s>'] and ['</s>'] from line #30
import sys
import sentencepiece as spm
@ymoslem
ymoslem / sentence-meteor.py
Created April 16, 2020 15:30
Compute METEOR score
# Sentence METEOR
# METEOR mainly works on sentence evaluation rather than corpus evaluation
# Run this file from CMD/Terminal
# Example Command: python3 sentence-meteor.py test_file_name.txt mt_file_name.txt
import sys
from nltk.translate.meteor_score import meteor_score
@ymoslem
ymoslem / corpus-wer.py
Last active March 4, 2020 16:36
Compute WER score for the whole dataset
# Corpus WER
# WER score for the whole corpus
# Run this file from CMD/Terminal
# Example Command: python3 corpus-wer.py test_file_name.txt mt_file_name.txt
import sys
from jiwer import wer
@ymoslem
ymoslem / sentence-wer.py
Last active March 4, 2020 16:35
Compute WER score for each sentence
# Sentence WER
# WER for segment by segment with arguments
# Run this file from CMD/Terminal
# Example Command: python3 sentence-wer.py test_file_name.txt mt_file_name.txt
import sys
from jiwer import wer
@ymoslem
ymoslem / compute-bleu-args.py
Last active February 9, 2020 09:45
Compute BLEU with arguments
# Corpus BLEU with arguments
# Run this file from CMD/Terminal
# Example Command: python3 compute-bleu-args.py test_file_name.txt mt_file_name.txt
import sys
import sacrebleu
from sacremoses import MosesDetokenizer
md = MosesDetokenizer(lang='en')
@ymoslem
ymoslem / compute-bleu-sentence-args.py
Last active July 18, 2020 21:23
Calculate BLEU score for sentence by sentence and save the result to a file, using Python arguments for file names
# BLEU for segment by segment with arguments
# Run this file from CMD/Terminal
# Example Command: python3 compute-bleu-sentence-args.py test_file_name.txt mt_file_name.txt
import sys
import sacrebleu
from sacremoses import MosesDetokenizer
md = MosesDetokenizer(lang='en')
target_test = sys.argv[1] # Test file argument
@ymoslem
ymoslem / compute-bleu-sentence.py
Last active July 18, 2020 21:23
Calculate BLEU score for sentence by sentence and save the result to a file
# BLEU for segment by segment
import sacrebleu
from sacremoses import MosesDetokenizer
md = MosesDetokenizer(lang='en')
# Open the test dataset human translation file and detokenize the references
refs = []
@ymoslem
ymoslem / compute-bleu.py
Last active February 4, 2021 01:48
Compute BLEU Score for Machine Translation
import sacrebleu
from sacremoses import MosesDetokenizer
md = MosesDetokenizer(lang='en')
# Open the test dataset human translation file and detokenize the references
refs = []
with open("target.test") as test:
for line in test: