Skip to content

Instantly share code, notes, and snippets.

@sebastiandres
Created April 14, 2018 02:06
Show Gist options
  • Save sebastiandres/30aa190eefa427f85954a05b390bf280 to your computer and use it in GitHub Desktop.
Save sebastiandres/30aa190eefa427f85954a05b390bf280 to your computer and use it in GitHub Desktop.
Archivo para traducir notebooks [Translation file for notebook]
from googletrans import Translator
import nbformat
import glob
import sys
import os
# References:
# http://nbformat.readthedocs.io/en/latest/api.html
# https://pypi.python.org/pypi/googletrans
def translate_string(source_text, lang_dest, lang_source):
"""
Translation function, powered by google translate and fixed for
custom markdown errors.
"""
# General translation
translator = Translator()
t = translator.translate(source_text, src=lang_source, dest=lang_dest)
translated_text = t.text
# Fix the easy markdown errors
fixed_markdown = translated_text.replace(r"] (", r"](")
fixed_markdown = fixed_markdown.replace(r"</ ", r"</")
fixed_markdown = fixed_markdown.replace(r"\ frac", r"\frac")
fixed_markdown = fixed_markdown.replace(r"\ pi", r"\pi")
fixed_markdown = fixed_markdown.replace(r"\ begin", r"\begin")
fixed_markdown = fixed_markdown.replace(r"\ end", r"\end")
fixed_markdown = fixed_markdown.replace(r"\ partial", r"\partial")
fixed_markdown = fixed_markdown.replace(r"\ sum", r"\sum")
fixed_markdown = fixed_markdown.replace(r"\ quad", r"\quad")
fixed_markdown = fixed_markdown.replace(r"help ()", r"help()")
fixed_markdown = fixed_markdown.replace(r"print ()", r"print()")
fixed_markdown = fixed_markdown.replace(r"\ $", r"\$")
fixed_markdown = fixed_markdown.replace(r"`` `", r"```")
fixed_markdown = fixed_markdown.replace(r" / ", r"/")
fixed_markdown = fixed_markdown.replace(r"—", r"-")
# Fix opening italics
for mark in [r'"', r'¿', r'¡', r'`']:
s_old = '_ {}'.format(mark)
s_new = '_{}'.format(mark)
fixed_markdown = fixed_markdown.replace(s_old, s_new)
# Fix closing italics
for mark in [r'"', r'?', r'!', r'`']:
s_old = '{} _'.format(mark)
s_new = '{}_'.format(mark)
fixed_markdown = fixed_markdown.replace(s_old, s_new)
print(fixed_markdown)
return fixed_markdown
def translate_notebook(ipynb_folder, format_version, lang_dest, lang_source="eng"):
"""
Translate the notebook cells, but only for those cells that are
markdown, the others might be code or something else.
"""
# Translate all the markdown cells
my_translated_notebook_path = my_notebook_path.replace(".ipynb", "_"+lang_dest+".ipynb")
print("Original:\t"+my_notebook_path)
print("Translated:\t"+my_translated_notebook_path)
nb = nbformat.read(my_notebook_path, as_version=format_version)
for i, cell in enumerate(nb["cells"]):
print("\tTranslating cell {} from {}".format(i, len(nb["cells"])))
if cell["cell_type"]=="markdown":
cell["source"] = translate_string(cell["source"], lang_dest, lang_source)
else:
pass
print("Saving the translated notebook")
nbformat.write(nb, fp=my_translated_notebook_path)
if __name__=="__main__":
# Parameters
lang_dest = "en"
format_version = 4
if len(sys.argv)==2:
ipynb_path = sys.argv[1]
all_files = sorted(glob.glob(ipynb_path))
# Translate all the notebooks not translated already
for my_notebook_path in all_files:
if "_"+lang_dest+".ipynb" in my_notebook_path:
pass # Skip if there's no need to translate (already translated, so will be overwritten)
else:
translate_notebook(my_notebook_path, format_version, lang_dest, "es")
else:
print("Run as:")
print("\t$ python3 translate.py EngComp/modules/1_offtheground/1_Interacting_with_Python.ipynb")
print("or more than one with wildmarks")
print('\t$ python3 translate.py "EngComp/modules/1_offtheground/*.ipynb"')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment