Skip to content

Instantly share code, notes, and snippets.

@keitazoumana
Last active September 15, 2022 23:43
Show Gist options
  • Save keitazoumana/9cec62b176fe9aae8e2a448fa6de141c to your computer and use it in GitHub Desktop.
Save keitazoumana/9cec62b176fe9aae8e2a448fa6de141c to your computer and use it in GitHub Desktop.
from langdetect import detect, DetectorFactory
DetectorFactory.seed = 0
def translate_text(text, text_lang, target_lang='en'):
# Get the name of the model
model_name = f"Helsinki-NLP/opus-mt-{text_lang}-{target_lang}"
# Get the tokenizer
tokenizer = MarianTokenizer.from_pretrained(model_name)
# Instantiate the model
model = MarianMTModel.from_pretrained(model_name)
# Translation of the text
formated_text = ">>{}<< {}".format(text_lang, text)
translation = model.generate(**tokenizer([formated_text], return_tensors="pt", padding=True))
translated_text = [tokenizer.decode(t, skip_special_tokens=True) for t in translation][0]
return translated_text
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment