Yasmin Moslem ymoslem

## CTranslate2-example-adv.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import sentencepiece as spm
import ctranslate2


def tokenize(text, sp_source_model):
    sp = spm.SentencePieceProcessor(sp_source_model)
    tokens =sp.encode(text, out_type=str)

## CTranslate2-mwe.py
import ctranslate2

def detokenize(result):
    translation = " ".join([t for t in result])
    return translation


def tokenize(input_sentence):
    tokens = input_sentence.split(" ")
    return tokens

## subword_source_only.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# Subwording the source file only
# Command: python3 subword.py <source_model_file> <source_pred_file>
# Note: If you did not train the model with start and end tokens remove ['<s>'] and ['</s>'] from line #30


import sys
import sentencepiece as spm

## sentence-meteor.py
# Sentence METEOR

# METEOR mainly works on sentence evaluation rather than corpus evaluation
# Run this file from CMD/Terminal
# Example Command: python3 sentence-meteor.py test_file_name.txt mt_file_name.txt

import sys
from nltk.translate.meteor_score import meteor_score


## corpus-wer.py
# Corpus WER

# WER score for the whole corpus
# Run this file from CMD/Terminal
# Example Command: python3 corpus-wer.py test_file_name.txt mt_file_name.txt

import sys
from jiwer import wer


## sentence-wer.py
# Sentence WER

# WER for segment by segment with arguments
# Run this file from CMD/Terminal
# Example Command: python3 sentence-wer.py test_file_name.txt mt_file_name.txt

import sys
from jiwer import wer


## compute-bleu-args.py
# Corpus BLEU with arguments
# Run this file from CMD/Terminal
# Example Command: python3 compute-bleu-args.py test_file_name.txt mt_file_name.txt


import sys
import sacrebleu
from sacremoses import MosesDetokenizer
md = MosesDetokenizer(lang='en')

## compute-bleu-sentence-args.py
# BLEU for segment by segment with arguments
# Run this file from CMD/Terminal
# Example Command: python3 compute-bleu-sentence-args.py test_file_name.txt mt_file_name.txt

import sys
import sacrebleu
from sacremoses import MosesDetokenizer
md = MosesDetokenizer(lang='en')

target_test = sys.argv[1]  # Test file argument

## compute-bleu-sentence.py
# BLEU for segment by segment

import sacrebleu
from sacremoses import MosesDetokenizer
md = MosesDetokenizer(lang='en')


# Open the test dataset human translation file and detokenize the references
refs = []

## compute-bleu.py
import sacrebleu
from sacremoses import MosesDetokenizer
md = MosesDetokenizer(lang='en')


# Open the test dataset human translation file and detokenize the references
refs = []

with open("target.test") as test:
    for line in test:
	#!/usr/bin/env python3
	# -- coding: utf-8 --

	import sentencepiece as spm
	import ctranslate2


	def tokenize(text, sp_source_model):
	sp = spm.SentencePieceProcessor(sp_source_model)
	tokens =sp.encode(text, out_type=str)
	import ctranslate2

	def detokenize(result):
	translation = " ".join([t for t in result])
	return translation


	def tokenize(input_sentence):
	tokens = input_sentence.split(" ")
	return tokens
	#!/usr/bin/env python3
	# -- coding: utf-8 --

	# Subwording the source file only
	# Command: python3 subword.py <source_model_file> <source_pred_file>
	# Note: If you did not train the model with start and end tokens remove ['<s>'] and ['</s>'] from line #30


	import sys
	import sentencepiece as spm
	# Sentence METEOR

	# METEOR mainly works on sentence evaluation rather than corpus evaluation
	# Run this file from CMD/Terminal
	# Example Command: python3 sentence-meteor.py test_file_name.txt mt_file_name.txt

	import sys
	from nltk.translate.meteor_score import meteor_score
	# Corpus WER

	# WER score for the whole corpus
	# Run this file from CMD/Terminal
	# Example Command: python3 corpus-wer.py test_file_name.txt mt_file_name.txt

	import sys
	from jiwer import wer
	# Sentence WER

	# WER for segment by segment with arguments
	# Run this file from CMD/Terminal
	# Example Command: python3 sentence-wer.py test_file_name.txt mt_file_name.txt

	import sys
	from jiwer import wer
	# Corpus BLEU with arguments
	# Run this file from CMD/Terminal
	# Example Command: python3 compute-bleu-args.py test_file_name.txt mt_file_name.txt


	import sys
	import sacrebleu
	from sacremoses import MosesDetokenizer
	md = MosesDetokenizer(lang='en')
	# BLEU for segment by segment with arguments
	# Run this file from CMD/Terminal
	# Example Command: python3 compute-bleu-sentence-args.py test_file_name.txt mt_file_name.txt

	import sys
	import sacrebleu
	from sacremoses import MosesDetokenizer
	md = MosesDetokenizer(lang='en')

	target_test = sys.argv[1] # Test file argument
	# BLEU for segment by segment

	import sacrebleu
	from sacremoses import MosesDetokenizer
	md = MosesDetokenizer(lang='en')


	# Open the test dataset human translation file and detokenize the references
	refs = []