Skip to content

Instantly share code, notes, and snippets.

@DollarAkshay
Created March 16, 2021 06:04
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save DollarAkshay/ba269dfd435d65d301ffa89910cfc933 to your computer and use it in GitHub Desktop.
Save DollarAkshay/ba269dfd435d65d301ffa89910cfc933 to your computer and use it in GitHub Desktop.
Script to translate JSON strings with googletrans library in python. Useful for web language localization
import json
import googletrans
import os
from googletrans import Translator
def translateString(data, destLangCode):
global translator
if isinstance(data, dict):
return {k: translateString(v, destLangCode) for k, v in data.items()}
else:
return translator.translate(data, src='en', dest=destLangCode).text
# Main Code
all_languages = {'af': 'afrikaans', 'sq': 'albanian', 'am': 'amharic', 'ar': 'arabic', 'hy': 'armenian', 'az': 'azerbaijani', 'eu': 'basque', 'be': 'belarusian', 'bn': 'bengali', 'bs': 'bosnian', 'bg': 'bulgarian', 'ca': 'catalan', 'ceb': 'cebuano', 'ny': 'chichewa', 'zh-cn': 'chinese (simplified)', 'zh-tw': 'chinese (traditional)', 'co': 'corsican', 'hr': 'croatian', 'cs': 'czech', 'da': 'danish', 'nl': 'dutch', 'en': 'english', 'eo': 'esperanto', 'et': 'estonian', 'tl': 'filipino', 'fi': 'finnish', 'fr': 'french', 'fy': 'frisian', 'gl': 'galician', 'ka': 'georgian', 'de': 'german', 'el': 'greek', 'gu': 'gujarati', 'ht': 'haitian creole', 'ha': 'hausa', 'haw': 'hawaiian', 'iw':
'hebrew', 'he': 'hebrew', 'hi': 'hindi', 'hmn': 'hmong', 'hu': 'hungarian', 'is': 'icelandic', 'ig': 'igbo', 'id': 'indonesian', 'ga': 'irish', 'it': 'italian', 'ja': 'japanese', 'jw': 'javanese', 'kn': 'kannada', 'kk': 'kazakh', 'km': 'khmer', 'ko': 'korean', 'ku': 'kurdish (kurmanji)', 'ky': 'kyrgyz', 'lo': 'lao', 'la': 'latin', 'lv': 'latvian', 'lt': 'lithuanian', 'lb': 'luxembourgish', 'mk': 'macedonian', 'mg': 'malagasy', 'ms': 'malay', 'ml': 'malayalam', 'mt': 'maltese', 'mi': 'maori', 'mr': 'marathi', 'mn': 'mongolian', 'my': 'myanmar (burmese)', 'ne': 'nepali', 'no': 'norwegian', 'or': 'odia', 'ps': 'pashto', 'fa': 'persian', 'pl': 'polish', 'pt': 'portuguese',
'pa': 'punjabi', 'ro': 'romanian', 'ru': 'russian', 'sm': 'samoan', 'gd': 'scots gaelic', 'sr': 'serbian', 'st': 'sesotho', 'sn': 'shona', 'sd': 'sindhi', 'si': 'sinhala', 'sk': 'slovak', 'sl': 'slovenian', 'so': 'somali', 'es': 'spanish', 'su': 'sundanese', 'sw': 'swahili', 'sv': 'swedish', 'tg': 'tajik', 'ta': 'tamil', 'te': 'telugu', 'th': 'thai', 'tr': 'turkish', 'uk': 'ukrainian', 'ur': 'urdu', 'ug': 'uyghur', 'uz': 'uzbek', 'vi': 'vietnamese', 'cy': 'welsh', 'xh': 'xhosa', 'yi': 'yiddish', 'yo': 'yoruba', 'zu': 'zulu'}
dirname = os.path.dirname(__file__)
src_filename = os.path.join(dirname, 'english.json')
destLangCodeList = [
'hi',
'kn',
'or',
'bn',
'gu',
'pa',
'ml',
'ta',
'te'
]
translator = Translator()
for destLangCode in destLangCodeList:
print('Starting translation for {:} ... '.format(all_languages[destLangCode]), end="")
with open(src_filename, 'r', encoding="utf-8") as fin:
data = json.load(fin)
translated_json = translateString(data, destLangCode)
dest_filename = os.path.join(dirname, all_languages[destLangCode]+'.json')
with open(dest_filename, 'w', encoding="utf-8") as fout:
json_dumps_str = json.dumps(translated_json, indent=4, ensure_ascii=False)
fout.write(json_dumps_str)
print('done')
@SNEHAASHISH
Copy link

How to modify the code to modify dict values in FastAPI dict

For example I take the value of "desc_en" and translate it using your code's logic and update the value of "desc_fr" with the translated string

@SNEHAASHISH
Copy link

My code in scrapper.py:

import requests
import time
from bs4 import BeautifulSoup
from googletrans import Translator

class FastWebScrapingAPI:
def scrape_data(self,categories):
url_news = f"https://thehackernews.com/search/label/{categories}"
res = requests.get(url_news)
soup = BeautifulSoup(res.content, "html.parser")
articles = soup.find_all("a",{"class":"story-link"})
scrappedNews = []
translator = Translator()
for article in articles:
news = {
"title_en": article.find("h2",{"class":"home-title"}).text.strip(),
"desc_en": article.find("div",{"class":"home-desc"}).text.strip(),
"title_fr": "",
"desc_fr": "",
"timestamp": time.time(),
"image_url": article.find("div",{"class":"img-ratio"}).img.get("data-src")
}
#translate_title = translator.translate(str(list(news.values())[0])), src='en', dest='fr')
#translate_desc = translator.translate(list(news.values())[1], src='en', dest='fr')
#print(translate_title.text)
#print(translate_desc.text)
str1 = "French title"
str2 = "French description"
#str1_translate = translator.translate(str1, src='en', dest='fr')
#str2_translate = translator.translate(str2, src='en', dest='fr')
#news.update({"title_fr":str1_translate.text})
#news.update({"desc_fr":str2_translate.text})
news.update({"title_fr":str1})
news.update({"desc_fr":str2})
scrappedNews.append(news)
return scrappedNews

@SNEHAASHISH
Copy link

I am getting this error => AttributeError: 'NoneType' object has no attribute 'group'

@myselfhimself
Copy link

I am getting this error => AttributeError: 'NoneType' object has no attribute 'group'

same here

@sakisdog
Copy link

@SNEHAASHISH , @myselfhimself In case you didn't solve this uninstall googletrans
pip uninstall googletrans
And then install the new version with
pip install googletrans==4.0.0rc1

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment