Created
April 14, 2018 02:06
-
-
Save sebastiandres/30aa190eefa427f85954a05b390bf280 to your computer and use it in GitHub Desktop.
Archivo para traducir notebooks [Translation file for notebook]
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from googletrans import Translator | |
import nbformat | |
import glob | |
import sys | |
import os | |
# References: | |
# http://nbformat.readthedocs.io/en/latest/api.html | |
# https://pypi.python.org/pypi/googletrans | |
def translate_string(source_text, lang_dest, lang_source): | |
""" | |
Translation function, powered by google translate and fixed for | |
custom markdown errors. | |
""" | |
# General translation | |
translator = Translator() | |
t = translator.translate(source_text, src=lang_source, dest=lang_dest) | |
translated_text = t.text | |
# Fix the easy markdown errors | |
fixed_markdown = translated_text.replace(r"] (", r"](") | |
fixed_markdown = fixed_markdown.replace(r"</ ", r"</") | |
fixed_markdown = fixed_markdown.replace(r"\ frac", r"\frac") | |
fixed_markdown = fixed_markdown.replace(r"\ pi", r"\pi") | |
fixed_markdown = fixed_markdown.replace(r"\ begin", r"\begin") | |
fixed_markdown = fixed_markdown.replace(r"\ end", r"\end") | |
fixed_markdown = fixed_markdown.replace(r"\ partial", r"\partial") | |
fixed_markdown = fixed_markdown.replace(r"\ sum", r"\sum") | |
fixed_markdown = fixed_markdown.replace(r"\ quad", r"\quad") | |
fixed_markdown = fixed_markdown.replace(r"help ()", r"help()") | |
fixed_markdown = fixed_markdown.replace(r"print ()", r"print()") | |
fixed_markdown = fixed_markdown.replace(r"\ $", r"\$") | |
fixed_markdown = fixed_markdown.replace(r"`` `", r"```") | |
fixed_markdown = fixed_markdown.replace(r" / ", r"/") | |
fixed_markdown = fixed_markdown.replace(r"—", r"-") | |
# Fix opening italics | |
for mark in [r'"', r'¿', r'¡', r'`']: | |
s_old = '_ {}'.format(mark) | |
s_new = '_{}'.format(mark) | |
fixed_markdown = fixed_markdown.replace(s_old, s_new) | |
# Fix closing italics | |
for mark in [r'"', r'?', r'!', r'`']: | |
s_old = '{} _'.format(mark) | |
s_new = '{}_'.format(mark) | |
fixed_markdown = fixed_markdown.replace(s_old, s_new) | |
print(fixed_markdown) | |
return fixed_markdown | |
def translate_notebook(ipynb_folder, format_version, lang_dest, lang_source="eng"): | |
""" | |
Translate the notebook cells, but only for those cells that are | |
markdown, the others might be code or something else. | |
""" | |
# Translate all the markdown cells | |
my_translated_notebook_path = my_notebook_path.replace(".ipynb", "_"+lang_dest+".ipynb") | |
print("Original:\t"+my_notebook_path) | |
print("Translated:\t"+my_translated_notebook_path) | |
nb = nbformat.read(my_notebook_path, as_version=format_version) | |
for i, cell in enumerate(nb["cells"]): | |
print("\tTranslating cell {} from {}".format(i, len(nb["cells"]))) | |
if cell["cell_type"]=="markdown": | |
cell["source"] = translate_string(cell["source"], lang_dest, lang_source) | |
else: | |
pass | |
print("Saving the translated notebook") | |
nbformat.write(nb, fp=my_translated_notebook_path) | |
if __name__=="__main__": | |
# Parameters | |
lang_dest = "en" | |
format_version = 4 | |
if len(sys.argv)==2: | |
ipynb_path = sys.argv[1] | |
all_files = sorted(glob.glob(ipynb_path)) | |
# Translate all the notebooks not translated already | |
for my_notebook_path in all_files: | |
if "_"+lang_dest+".ipynb" in my_notebook_path: | |
pass # Skip if there's no need to translate (already translated, so will be overwritten) | |
else: | |
translate_notebook(my_notebook_path, format_version, lang_dest, "es") | |
else: | |
print("Run as:") | |
print("\t$ python3 translate.py EngComp/modules/1_offtheground/1_Interacting_with_Python.ipynb") | |
print("or more than one with wildmarks") | |
print('\t$ python3 translate.py "EngComp/modules/1_offtheground/*.ipynb"') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment