Yasmin Moslem ymoslem

## ChatGPT-translation.py
# pip3 install openai

import openai
import time


OPENAI_API_KEY = "your_api_key_here"
openai.api_key = OPENAI_API_KEY

prompt = """French: La semaine dernière, quelqu’un m’a fait part de sa gratitude envers notre travail.

## M2M-100-example.py
# This example uses M2M-100 models converted to the CTranslate2 format.
# Download CTranslate2 models:
# • M2M-100 418M-parameter model: https://bit.ly/33fM1AO
# • M2M-100 1.2B-parameter model: https://bit.ly/3GYiaed


import ctranslate2
import sentencepiece as spm


## GPT-3_term_extractor.py
import streamlit as st
import openai


# Change to your OpenAI API key
OPENAI_API_KEY = "your_OpenAI_API_key_here"
openai.api_key = OPENAI_API_KEY

# Set the page layout to wide
st.set_page_config(page_title="Extract Terms", page_icon=None, layout="wide")

## sentence-meteor.py
# Sentence METEOR

# METEOR mainly works on sentence evaluation rather than corpus evaluation
# Run this file from CMD/Terminal
# Example Command: python3 sentence-meteor.py test_file_name.txt mt_file_name.txt

import sys
from nltk.translate.meteor_score import meteor_score


## subword_source_only.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# Subwording the source file only
# Command: python3 subword.py <source_model_file> <source_pred_file>
# Note: If you did not train the model with start and end tokens remove ['<s>'] and ['</s>'] from line #30


import sys
import sentencepiece as spm

## CTranslate2-example.py
# First convert your OpenNMT-py or OpenNMT-tf model to a CTranslate2 model.
# pip3 install ctranslate2
# • OpenNMT-py:
# ct2-opennmt-py-converter --model_path model.pt --output_dir enja_ctranslate2 --quantization int8
# • OpenNMT-tf:
# ct2-opennmt-tf-converter --model_path model --output_dir enja_ctranslate2 --src_vocab source.vocab --tgt_vocab target.vocab --model_type TransformerBase --quantization int8


import ctranslate2
import sentencepiece as spm

## ctranslate2_mwe.py
import ctranslate2


# Replace with your tokenize function and source tokenization model
def tokenize(input_sentences):
    tokens = [input_sentence.split(" ") for input_sentence in input_sentences]
    return tokens

# Replace with your detokenize function and target tokenization model
def detokenize(outputs):

## filter-monolingual.py
# Remove duplicate, lines with bad characters, and shuffle
# Find the number of CPUs/cores to add to parallel: nproc --all
# sort -S 50% --parallel=4  dataset.es | uniq -u > dataset.unique.es
# shuf dataset.unique.es > dataset.unique.shuf.es
# !perl -ne '/�/ or print' dataset.unique.shuf.es > dataset.unique.shuf.cleaner.es


import re
import fasttext

## sendpulse-rss-to-newsletter.py
# RSS to Newsletter
# This script checks your website RSS feed everyday and sends today's post (if any) to your mailing list.
# If you run it on a server, you might need to use "screen" or "tmux" to keep the script running after closing your Terminal.
# Change the values of the following variables: feed, REST_API_ID, REST_API_SECRET, from_email, from_name, addressbook_id

from pysendpulse.pysendpulse import PySendPulse
import feedparser
import datetime
import schedule
import time

## language_detection.py
# -*- coding: utf-8 -*-
# pip3 install gdown langdetect fasttext pycld2 py3langid

import gdown
from datetime import datetime

# Download fasttext models
url = "https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.ftz"
output = "lid.176.ftz"
gdown.download(url, output, quiet=False)
	# pip3 install openai

	import openai
	import time


	OPENAI_API_KEY = "your_api_key_here"
	openai.api_key = OPENAI_API_KEY

	prompt = """French: La semaine dernière, quelqu’un m’a fait part de sa gratitude envers notre travail.
	# This example uses M2M-100 models converted to the CTranslate2 format.
	# Download CTranslate2 models:
	# • M2M-100 418M-parameter model: https://bit.ly/33fM1AO
	# • M2M-100 1.2B-parameter model: https://bit.ly/3GYiaed


	import ctranslate2
	import sentencepiece as spm
	import streamlit as st
	import openai


	# Change to your OpenAI API key
	OPENAI_API_KEY = "your_OpenAI_API_key_here"
	openai.api_key = OPENAI_API_KEY

	# Set the page layout to wide
	st.set_page_config(page_title="Extract Terms", page_icon=None, layout="wide")
	# Sentence METEOR

	# METEOR mainly works on sentence evaluation rather than corpus evaluation
	# Run this file from CMD/Terminal
	# Example Command: python3 sentence-meteor.py test_file_name.txt mt_file_name.txt

	import sys
	from nltk.translate.meteor_score import meteor_score
	#!/usr/bin/env python3
	# -- coding: utf-8 --

	# Subwording the source file only
	# Command: python3 subword.py <source_model_file> <source_pred_file>
	# Note: If you did not train the model with start and end tokens remove ['<s>'] and ['</s>'] from line #30


	import sys
	import sentencepiece as spm
	# First convert your OpenNMT-py or OpenNMT-tf model to a CTranslate2 model.
	# pip3 install ctranslate2
	# • OpenNMT-py:
	# ct2-opennmt-py-converter --model_path model.pt --output_dir enja_ctranslate2 --quantization int8
	# • OpenNMT-tf:
	# ct2-opennmt-tf-converter --model_path model --output_dir enja_ctranslate2 --src_vocab source.vocab --tgt_vocab target.vocab --model_type TransformerBase --quantization int8


	import ctranslate2
	import sentencepiece as spm
	import ctranslate2


	# Replace with your tokenize function and source tokenization model
	def tokenize(input_sentences):
	tokens = [input_sentence.split(" ") for input_sentence in input_sentences]
	return tokens

	# Replace with your detokenize function and target tokenization model
	def detokenize(outputs):
	# Remove duplicate, lines with bad characters, and shuffle
	# Find the number of CPUs/cores to add to parallel: nproc --all
	# sort -S 50% --parallel=4 dataset.es \| uniq -u > dataset.unique.es
	# shuf dataset.unique.es > dataset.unique.shuf.es
	# !perl -ne '/�/ or print' dataset.unique.shuf.es > dataset.unique.shuf.cleaner.es


	import re
	import fasttext
	# -- coding: utf-8 --
	# pip3 install gdown langdetect fasttext pycld2 py3langid

	import gdown
	from datetime import datetime

	# Download fasttext models
	url = "https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.ftz"
	output = "lid.176.ftz"
	gdown.download(url, output, quiet=False)