Skip to content

Instantly share code, notes, and snippets.

@amirhmoradi
Created April 18, 2023 12:23
Show Gist options
  • Save amirhmoradi/eee652fa65eacb2536a9d0f711940842 to your computer and use it in GitHub Desktop.
Save amirhmoradi/eee652fa65eacb2536a9d0f711940842 to your computer and use it in GitHub Desktop.
Use OpenAI API to translate json dictionaries
# Description: This script uses OpenAI's API to translate the English text in the JSON file to Persian language.
# You can change the context and languages by changing the task description and the model engine in the variables:
# - task_description
# - model_engine
#
# Make sure to set file paths for the source and destination files in the variables:
# - source_file
# - destination_file
#
# The script will generate temporary files for each batch of translated text, and then combine them into a single file.
# Once the script is done, it will remove the temporary files.
#
# The script will translate the text in batches of 20 lines.
# You can change the batch size by changing the value of the variable:
# - batch_size
#
# The script will create a new JSON file with the translated text.
# The JSON file must have the following format:
# Source file example:
# {
# "key1": "value1",
# "key2": "value2",
# "key3": "value3",
# ...
# }
# Translated file example:
# {
# "key1": "translated value1",
# "key2": "translated value2",
# "key3": "translated value3",
# ...
# }
# Note: The OpenAI API is not free. You need to create an account and get an API key to use it.
#
# Requirements:
# - Python 3.6+
# - OpenAI Python library: pip install openai
# - OpenAI API key
#
# Usage:
# 1. Install the OpenAI Python library: pip install openai
# 2. Set the OPENAI_API_KEY environment variable to your OpenAI API key
# 3. Run the script: python openai_translate.py
import json
import math
import time
import os
import openai
def translate_and_save():
# Load OpenAI API credentials from environment variables:
openai.api_key = os.environ.get("OPENAI_API_KEY")
model_engine = "text-davinci-003"
#Description of the task in natural language
task_description = "For a CRM software like Hubspot, I need translations. Use a friendly tone and translate from English to Persian language the following:"
source_file = "./locales/en/common.json"
destination_file = "./locales/fa/common.json"
# Load the source JSON file
with open(source_file, "r") as f:
json_data = json.load(f)
# Determine the number of batches needed
batch_size = 20
total_lines = len(json_data)
total_batches = math.ceil(total_lines / batch_size)
print(f"Total lines: {total_lines}")
print(f"Batch size: {batch_size}")
print(f"Total batches: {total_batches}")
# Process the batches
for batch_id in range(1, total_batches + 1):
# Get the lines for the current batch
start = (batch_id - 1) * batch_size
end = min(start + batch_size, total_lines)
#batch_lines = json_data[start:end]
batch_lines = list(json_data.items())[start:end]
# Translate the batch
translated_lines = {}
for key, value in batch_lines:
# handle api errors and retry up to 3 times in case of failure, wait 3 seconds between retries:
for i in range(3):
try:
#Perform translation with OpenAI API
response = openai.Completion.create(
engine=model_engine,
#The prompt is created by concatenating the task description and the text to translate
prompt = task_description+"\n"+value,
temperature=0.7,
max_tokens=2048,
n=1,
stop=None
)
break
except Exception as e:
print(f"Error: {e}")
print(f"Retrying in 3 seconds...")
time.sleep(3)
#Add the translated text to the dictionary
translated_lines[key] = response.choices[0].text.strip()
time.sleep(2) # To avoid hitting the OpenAI API rate limit
# Save the translated batch to a new JSON file
with open(f"translated_batch_{batch_id}.json", "w") as f:
json.dump(translated_lines, f, ensure_ascii=False, indent=4)
# Combine all batch files into a single file
translated_data = {}
for i in range(1, total_batches + 1):
with open(f"./translated_batch_{i}.json", "r") as f:
batch_data = json.load(f)
translated_data.update(batch_data)
# Save the translated data to a new JSON file
with open(destination_file, "w") as f:
json.dump(translated_data, f, ensure_ascii=False, indent=4)
# Remove the batch files if translated_full.json is created successfully and has at least 1 line:
if len(translated_data) > 0:
for i in range(1, total_batches + 1):
os.remove(f"./translated_batch_{i}.json")
translate_and_save()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment