Skip to content

Instantly share code, notes, and snippets.

@ymoslem
Last active January 18, 2023 00:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ymoslem/30a56b6c0e9ada2313c223038c8b2b14 to your computer and use it in GitHub Desktop.
Save ymoslem/30a56b6c0e9ada2313c223038c8b2b14 to your computer and use it in GitHub Desktop.
import ctranslate2
# Replace with your tokenize function and source tokenization model
def tokenize(input_sentences):
tokens = [input_sentence.split(" ") for input_sentence in input_sentences]
return tokens
# Replace with your detokenize function and target tokenization model
def detokenize(outputs):
translation = [" ".join([t for t in output]) for output in outputs]
return translation
# Modify the path to the CTranslate2 model directory
model_path = "ctranslate2_model"
source_sentences = ["how are you?", "fine, thanks!", "everything is great.", "I am happy to know that."]
translator = ctranslate2.Translator(model_path, "cpu") # "cpu" or "cuda"
outputs = translator.translate_batch(tokenize(source_sentences), beam_size=5)
translations = detokenize([output.hypotheses[0] for output in outputs])
print(*translations, sep="\n")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment