-
-
Save davidraleigh/a24f637ccb018610a87aaacb12281452 to your computer and use it in GitHub Desktop.
ipynb2md.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import re | |
import os | |
import tempfile | |
import difflib | |
import argparse | |
from pathlib import Path | |
from typing import List | |
# https://regex101.com/r/ezekzs/1 | |
regex = r"\n\n( {4}[\w\S]+[\w\S ]*((\n {4}([\w \S]*))|(\n {4})){0,})\n" | |
# TODO this regex will fail if the ``` is not preceded by a newline | |
py_regex = r"```python\n((^(?!```).+\n)|([\n ]+)){1,}```" | |
class FileExists(argparse.Action): | |
def __call__(self, parser, namespace, values, option_string=None): | |
file_path = values | |
if os.path.exists(file_path): | |
setattr(namespace, self.dest, file_path) | |
else: | |
raise argparse.ArgumentTypeError("metadata:{0} is not a file".format(file_path)) | |
def prepare_parser(): | |
arg_parser = argparse.ArgumentParser(description="Convert Jupyter Notebooks into Markdown with Collapseable code " | |
"blocks") | |
arg_parser.add_argument("--all", "-a", type=str, help="process all ipynb") | |
arg_parser.add_argument("--ipynb", "-i", type=str, default="README.ipynb", action=FileExists, | |
help="name of the input jupyter notebook") | |
arg_parser.add_argument("--test", "-t", action="store_true", help="test markdown against previous results") | |
arg_parser.add_argument("--replace", "-r", type=str, default=".md_replace.csv") | |
return arg_parser | |
def prep_lines(lines: List[str], replacements): | |
for pair in replacements: | |
for idx, line in enumerate(lines): | |
lines[idx] = line.replace(pair[0], pair[1]) | |
return lines | |
def compare_markdown(file_1, file_2, replacements): | |
with open(file_1) as file_obj_1: | |
with open(file_2) as file_obj_2: | |
lines1 = [line.strip() for line in file_obj_1.readlines()] | |
lines2 = [line.strip() for line in file_obj_2.readlines()] | |
lines1 = prep_lines(lines1, replacements) | |
lines2 = prep_lines(lines2, replacements) | |
for line in difflib.context_diff(lines1, lines2, fromfile=file_1, tofile=file_2, n=3, lineterm='\n'): | |
print(line) | |
def markdowner(input_filename, markdown_filename, test=False, replacements=()): | |
# read original markdown file | |
compare_all_text = "" | |
if test: | |
with open(markdown_filename, 'rt') as file_obj: | |
compare_all_text = file_obj.read() | |
# requires nbconvert | |
os.system('jupyter nbconvert --to MARKDOWN --execute {0} --output {1} --ExecutePreprocessor.kernel_name=python3 ' | |
'--ExecutePreprocessor.timeout=600' | |
.format(input_filename, markdown_filename)) | |
# read updated markdown file | |
with open(markdown_filename, 'r+') as f: | |
print("code collapse section re-write for file {}".format(markdown_filename)) | |
all_text = f.read() | |
matches = re.finditer(regex, all_text, re.MULTILINE) | |
for matchNum, match in enumerate(matches, start=1): | |
temp = "\n\n\n<details><summary>Expand Python Print-out</summary>\n\n\n```text\n{}\n```\n\n\n</details>\n\n".format( | |
match.group(1)) | |
all_text = all_text.replace(match.group(0), temp) | |
matches = re.finditer(py_regex, all_text, re.MULTILINE) | |
for matchNum, match in enumerate(matches, start=1): | |
temp = "\n\n\n<details><summary>Expand Python Code Sample</summary>\n\n\n{}\n\n\n</details>\n\n".format( | |
match.group(0)) | |
all_text = all_text.replace(match.group(0), temp) | |
f.seek(0) | |
f.write(all_text) | |
f.truncate() | |
if test: | |
with tempfile.NamedTemporaryFile(mode="w+", suffix=".md") as temp: | |
temp.seek(0) | |
for pair in replacements: | |
compare_all_text = compare_all_text.replace(pair[0], pair[1]) | |
temp.write(compare_all_text) | |
temp.truncate() | |
temp.flush() | |
os.system('diff -B -Z {0} {1}'.format(temp.name, markdown_filename)) | |
os.system('diff -B -Z -q {0} {1}'.format(temp.name, markdown_filename)) | |
# compare_markdown(temp.name, markdown_filename, replacements) | |
if __name__ == "__main__": | |
g_arg_parser = prepare_parser() | |
g_args = g_arg_parser.parse_args() | |
g_replacements = [] | |
if os.path.exists(g_args.replace): | |
with open(g_args.replace) as csvfile: | |
g_csvreader = csv.reader(csvfile, delimiter=',') | |
for g_row in g_csvreader: | |
g_replacements.append((g_row[0].strip(), g_row[1].strip())) | |
g_input_filenames = [] | |
if g_args.all: | |
for g_path_obj in Path(g_args.all).glob("./*.ipynb"): | |
g_input_filenames.append(str(g_path_obj.relative_to(g_args.all))) | |
else: | |
g_input_filenames.append(g_args.ipynb) | |
for g_input_filename in g_input_filenames: | |
if g_args.test: | |
os.system('treon {0}'.format(g_input_filename)) | |
g_markdown_filename = '{}.md'.format(os.path.splitext(g_input_filename)[0]) | |
g_test = False | |
if g_args.test and os.path.exists(g_markdown_filename): | |
g_test = True | |
markdowner(input_filename=g_input_filename, | |
markdown_filename=g_markdown_filename, | |
test=g_test, | |
replacements=g_replacements) |
https://stackoverflow.com/a/16423101
-E, --ignore-tab-expansion
ignore changes due to tab expansion
-Z, --ignore-trailing-space
ignore white space at line end
-b, --ignore-space-change
ignore changes in the amount of white space
-w, --ignore-all-space
ignore all white space
-B, --ignore-blank-lines
ignore changes whose lines are all blank```
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
requires treon