Yasmin Moslem ymoslem

## compute-bleu-args.py
# Corpus BLEU with arguments
# Run this file from CMD/Terminal
# Example Command: python3 compute-bleu-args.py test_file_name.txt mt_file_name.txt


import sys
import sacrebleu
from sacremoses import MosesDetokenizer
md = MosesDetokenizer(lang='en')

## sentence-wer.py
# Sentence WER

# WER for segment by segment with arguments
# Run this file from CMD/Terminal
# Example Command: python3 sentence-wer.py test_file_name.txt mt_file_name.txt

import sys
from jiwer import wer


## corpus-wer.py
# Corpus WER

# WER score for the whole corpus
# Run this file from CMD/Terminal
# Example Command: python3 corpus-wer.py test_file_name.txt mt_file_name.txt

import sys
from jiwer import wer


## compute-bleu-sentence.py
# BLEU for segment by segment

import sacrebleu
from sacremoses import MosesDetokenizer
md = MosesDetokenizer(lang='en')


# Open the test dataset human translation file and detokenize the references
refs = []

## compute-bleu-sentence-args.py
# BLEU for segment by segment with arguments
# Run this file from CMD/Terminal
# Example Command: python3 compute-bleu-sentence-args.py test_file_name.txt mt_file_name.txt

import sys
import sacrebleu
from sacremoses import MosesDetokenizer
md = MosesDetokenizer(lang='en')

target_test = sys.argv[1]  # Test file argument

## compute-bleu.py
import sacrebleu
from sacremoses import MosesDetokenizer
md = MosesDetokenizer(lang='en')


# Open the test dataset human translation file and detokenize the references
refs = []

with open("target.test") as test:
    for line in test:

## CTranslate2-mwe.py
import ctranslate2

def detokenize(result):
    translation = " ".join([t for t in result])
    return translation


def tokenize(input_sentence):
    tokens = input_sentence.split(" ")
    return tokens

## translate-gui.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-

#OpenNMT-py GUI Alpha version by Yasmin Moslem
#Contact: yasmin {aatt} machinetranslation.io
#Built on OpenNMT-py v. 0.9.1 "translate.py"

from __future__ import unicode_literals
from itertools import repeat

## webz-dataset-text-extraction.py
# https://webz.io/free-datasets/
# Spanish: https://s3.amazonaws.com/webhose-archive/datasets/645_20170904091816.zip
# Extract text from the JSON files


import os
import json
from sentence_splitter import split_text_into_sentences
from tqdm import tqdm

## mBART-example.py
#!pip install transformers sentencepiece torch -U -q
# Replace "test_source.txt" with your source file.
# Change src_lang, tgt_lang, and lang_code_to_id to the source and target languages you need.

from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
import torch
from tqdm import tqdm


# Function to split source lines into chunks to avoid out-of-memory errors
	# Corpus BLEU with arguments
	# Run this file from CMD/Terminal
	# Example Command: python3 compute-bleu-args.py test_file_name.txt mt_file_name.txt


	import sys
	import sacrebleu
	from sacremoses import MosesDetokenizer
	md = MosesDetokenizer(lang='en')
	# Sentence WER

	# WER for segment by segment with arguments
	# Run this file from CMD/Terminal
	# Example Command: python3 sentence-wer.py test_file_name.txt mt_file_name.txt

	import sys
	from jiwer import wer
	# Corpus WER

	# WER score for the whole corpus
	# Run this file from CMD/Terminal
	# Example Command: python3 corpus-wer.py test_file_name.txt mt_file_name.txt

	import sys
	from jiwer import wer
	# BLEU for segment by segment

	import sacrebleu
	from sacremoses import MosesDetokenizer
	md = MosesDetokenizer(lang='en')


	# Open the test dataset human translation file and detokenize the references
	refs = []
	# BLEU for segment by segment with arguments
	# Run this file from CMD/Terminal
	# Example Command: python3 compute-bleu-sentence-args.py test_file_name.txt mt_file_name.txt

	import sys
	import sacrebleu
	from sacremoses import MosesDetokenizer
	md = MosesDetokenizer(lang='en')

	target_test = sys.argv[1] # Test file argument
	import ctranslate2

	def detokenize(result):
	translation = " ".join([t for t in result])
	return translation


	def tokenize(input_sentence):
	tokens = input_sentence.split(" ")
	return tokens
	#!/usr/bin/env python
	# -- coding: utf-8 --

	#OpenNMT-py GUI Alpha version by Yasmin Moslem
	#Contact: yasmin {aatt} machinetranslation.io
	#Built on OpenNMT-py v. 0.9.1 "translate.py"

	from __future__ import unicode_literals
	from itertools import repeat
	# https://webz.io/free-datasets/
	# Spanish: https://s3.amazonaws.com/webhose-archive/datasets/645_20170904091816.zip
	# Extract text from the JSON files


	import os
	import json
	from sentence_splitter import split_text_into_sentences
	from tqdm import tqdm
	#!pip install transformers sentencepiece torch -U -q
	# Replace "test_source.txt" with your source file.
	# Change src_lang, tgt_lang, and lang_code_to_id to the source and target languages you need.

	from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
	import torch
	from tqdm import tqdm


	# Function to split source lines into chunks to avoid out-of-memory errors