Skip to content

Instantly share code, notes, and snippets.

@andreluiz1987
Created January 7, 2024 18:05
Show Gist options
  • Save andreluiz1987/0a846517b4f46042c9617a8aba6aa17a to your computer and use it in GitHub Desktop.
Save andreluiz1987/0a846517b4f46042c9617a8aba6aa17a to your computer and use it in GitHub Desktop.
import json
import openai
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk
es = Elasticsearch(
[{'host': 'localhost', 'port': 9200, 'scheme': 'http'}])
openai.api_key = 'key'
movie_list = [
{"title": "The Shawshank Redemption", "synopsis": "Two imprisoned men bond over a number of years, finding solace and eventual redemption through acts of common decency."},
{"title": "The Dark Knight", "synopsis": "When the Joker wreaks havoc on Gotham, Batman must confront one of the greatest psychological and physical tests of his ability to fight injustice."},
{"title": "Inception", "synopsis": "A thief who enters the dreams of others to steal their secrets faces a new challenge when tasked with planting an idea into someone's mind."},
{"title": "The Lord of the Rings: The Return of the King", "synopsis": "Frodo and Sam continue their journey to Mount Doom to destroy the One Ring, while the rest of the fellowship prepares for a final battle."},
{"title": "City of God", "synopsis": "In the poverty-stricken favelas of Rio de Janeiro, two boys choose different paths: one becomes a photographer, the other a drug dealer."},
{"title": "The Social Network", "synopsis": "The founding and rise of Facebook, highlighting the relationships and betrayals that marked the creation of the social media giant."},
{"title": "Parasite", "synopsis": "A poor family scams their way into working for a wealthy family, but their deception leads to unexpected consequences."},
{"title": "Eternal Sunshine of the Spotless Mind", "synopsis": "After a painful breakup, a man undergoes a medical procedure to erase memories of his former girlfriend."},
{"title": "No Country for Old Men", "synopsis": "A hunter stumbles upon a drug deal gone wrong and takes a suitcase full of money, setting off a violent and suspenseful chase."},
{"title": "The Grand Budapest Hotel", "synopsis": "The adventures of a hotel concierge and his protégé as they become involved in the theft and recovery of a priceless painting."},
]
def translate_text(text, language):
prompt = f"Translate the following text to {language}: \n\n {text}"
response = openai.chat.completions.create(
model="gpt-3.5-turbo", messages=[{"role": "user", "content": prompt}], max_tokens=100
)
return response.choices[0].message.content.strip()
def format_action(movie):
action = {
"_index": "idx_movies",
"_source": {
"title": {
"en": movie["title"]["en"],
"es": movie["title"]["es"],
"fr": movie["title"]["fr"],
"pt-br": movie["title"]["pt-br"]
},
"synopsis": {
"en": movie["synopsis"]["en"],
"es": movie["synopsis"]["es"],
"fr": movie["synopsis"]["fr"],
"pt-br": movie["synopsis"]["pt-br"]
}
}
}
return action
if __name__ == '__main__':
language_mapping = {
"Portuguese": "pt-br",
"Spanish": "es",
"French": "fr",
}
for movie in movie_list:
movie["title"] = { "en" : movie["title"] }
movie["synopsis"] = { "en" : movie["synopsis"] }
for language in language_mapping:
movie["title"][language_mapping[language]] = translate_text(movie["title"]["en"], language)
movie["synopsis"][language_mapping[language]] = translate_text(movie["synopsis"]["en"], language)
actions = [format_action(movie) for movie in movie_list]
success, failed = bulk(es, actions, index="idx_movies", raise_on_error=True)
print(f"Successfully indexed {success} documents")
print(f"Failed to index {failed} documents")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment