Skip to content

Instantly share code, notes, and snippets.

@nirajgiriXD
Last active April 8, 2024 06:52
Show Gist options
  • Save nirajgiriXD/14e620a1ee258f07799e64434f1f6469 to your computer and use it in GitHub Desktop.
Save nirajgiriXD/14e620a1ee258f07799e64434f1f6469 to your computer and use it in GitHub Desktop.
import json
import re
from googletrans import Translator
from bs4 import BeautifulSoup
# Function to translate text while preserving placeholders
def translate_text_with_placeholders(text, target_language):
# Define placeholder pattern
placeholder_pattern = re.compile(r'\$([^\$]+)\$', flags=re.IGNORECASE)
# Split text into segments containing text and placeholders
segments = placeholder_pattern.split(text)
# Separate text and placeholders into arrays
text_segments = [segments[i] for i in range(len(segments)) if i % 2 == 0]
placeholder_segments = [segments[i] for i in range(len(segments)) if i % 2 == 1]
# Translate text segments
translated_text_segments = []
for segment in text_segments:
try:
translation = Translator().translate(segment, dest=target_language).text
translated_text_segments.append(translation)
except Exception as e:
translated_text_segments.append(segment)
# Rebuild translated text by joining translated segments with placeholders
translated_text = ''
for i in range(len(text_segments)):
translated_text += translated_text_segments[i]
if i < len(placeholder_segments):
translated_text += ' $' + placeholder_segments[i] + '$ '
# Regular expression patterns
pattern_dot = r'\$ \.'
pattern_comma = r'\$ \,'
pattern_end = r'(\$\s$)'
# Perform replacement for all matches
filtered_translated_text = re.sub(pattern_dot, r'$.', translated_text)
filtered_translated_text = re.sub(pattern_comma, r'$,', filtered_translated_text)
filtered_translated_text = re.sub(pattern_end, r'$', filtered_translated_text)
return filtered_translated_text
# Function to translate HTML content while preserving placeholders
def translate_html_with_placeholders(key, html_content, target_language):
# Parse HTML content
soup = BeautifulSoup(html_content, 'html.parser')
# Translate text within HTML tags
for tag in soup.find_all(string=True):
if not tag.strip(): # Skip empty strings
continue
translated_text = translate_text_with_placeholders(tag, target_language)
tag.replace_with(translated_text)
# Reconstruct translated HTML content
translated_html = str(soup)
return translated_html
# Read the JSON file
with open('messages.json', 'r') as file:
data = json.load(file)
# Translate messages and update JSON
for key, value in data.items():
# Translate the message
translated_message = translate_html_with_placeholders(key, value["message"], "es")
value["message"] = translated_message
# Write the updated JSON to a file
output_file = 'translated_file.json'
with open(output_file, 'w', encoding='utf-8') as file:
json.dump(data, file, ensure_ascii=False, indent=2)
# Print that the file has been saved
print(f'The translated JSON file has been saved as "{output_file}"')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment