Last active
April 14, 2021 08:32
-
-
Save kristjan-eljand/76a1f032dab3f4920dab2261453e28d6 to your computer and use it in GitHub Desktop.
Text generation with hugging face pre-trained models on non-English language
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pprint import pprint #for user-friendly output printing | |
# 1. Create a pipeline for text-generation task | |
generator = pipeline('text-generation', model='distilgpt2') | |
# 2. Translate the sentence beginnings from Est to Eng | |
beginnings_origin = [ | |
"Eesti toodab elektrienergiat peamiselt", | |
"Taastuvenergia on oluline, sest" | |
] | |
translated_beginnings = [translate(b, EST_TO_ENG)[0]['translation_text'] for b in beginnings_origin] | |
print("Sentence beginnings that are translated to English:\n") | |
pprint(translated_beginnings) | |
# 3. Generate texts in English based on the beginnings | |
generated_english = [] | |
for b in translated_beginnings: | |
result = generator(b, max_length=50, num_return_sequences=1) | |
text = result[0]['generated_text'] | |
generated_english.append(text) | |
print("Generated text in English:\n") | |
pprint(generated_english) | |
# 4. Translate generated texts back to Estonian | |
generated_estonian = [translate(g, ENG_TO_EST)[0]['translation_text'] for g in generated_english] | |
print("Generated text that is translated back to Estonian:\n") | |
pprint(generated_estonian) | |
# Output: | |
# Sentence beginnings that are translated to English: | |
# 'Estonia produces electricity mainly' | |
# 'Renewable energy is important because:' | |
# | |
# Generated text in English: | |
# 'Estonia produces electricity mainly from natural gas-fired hydroelectric | |
# power plants, according to a report last summer by The World | |
# Bank.' | |
# | |
# 'Renewable energy is important because: The more we know about fossil fuels, | |
# the more we realize that we need to reduce greenhouse gas emissions (about 10%).' | |
# | |
# Generated text that is translated back to Estonian: | |
# 'Eesti toodab elektrit peamiselt maagaasil töötavatest hüdroelektrijaamadest, | |
# vastavalt Maailmapanga möödunud suve aruandele.' | |
# | |
# 'Taastuvenergia on oluline, sest mida rohkem me teame fossiilkütustest, seda | |
# enam mõistame, et peame vähendama kasvuhoonegaaside heitkoguseid (umbes 10%).' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment