Skip to content

Instantly share code, notes, and snippets.

@davidraleigh
Last active November 14, 2020 03:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save davidraleigh/a24f637ccb018610a87aaacb12281452 to your computer and use it in GitHub Desktop.
Save davidraleigh/a24f637ccb018610a87aaacb12281452 to your computer and use it in GitHub Desktop.
ipynb2md.py
import csv
import re
import os
import tempfile
import difflib
import argparse
from pathlib import Path
from typing import List
# https://regex101.com/r/ezekzs/1
regex = r"\n\n( {4}[\w\S]+[\w\S ]*((\n {4}([\w \S]*))|(\n {4})){0,})\n"
# TODO this regex will fail if the ``` is not preceded by a newline
py_regex = r"```python\n((^(?!```).+\n)|([\n ]+)){1,}```"
class FileExists(argparse.Action):
def __call__(self, parser, namespace, values, option_string=None):
file_path = values
if os.path.exists(file_path):
setattr(namespace, self.dest, file_path)
else:
raise argparse.ArgumentTypeError("metadata:{0} is not a file".format(file_path))
def prepare_parser():
arg_parser = argparse.ArgumentParser(description="Convert Jupyter Notebooks into Markdown with Collapseable code "
"blocks")
arg_parser.add_argument("--all", "-a", type=str, help="process all ipynb")
arg_parser.add_argument("--ipynb", "-i", type=str, default="README.ipynb", action=FileExists,
help="name of the input jupyter notebook")
arg_parser.add_argument("--test", "-t", action="store_true", help="test markdown against previous results")
arg_parser.add_argument("--replace", "-r", type=str, default=".md_replace.csv")
return arg_parser
def prep_lines(lines: List[str], replacements):
for pair in replacements:
for idx, line in enumerate(lines):
lines[idx] = line.replace(pair[0], pair[1])
return lines
def compare_markdown(file_1, file_2, replacements):
with open(file_1) as file_obj_1:
with open(file_2) as file_obj_2:
lines1 = [line.strip() for line in file_obj_1.readlines()]
lines2 = [line.strip() for line in file_obj_2.readlines()]
lines1 = prep_lines(lines1, replacements)
lines2 = prep_lines(lines2, replacements)
for line in difflib.context_diff(lines1, lines2, fromfile=file_1, tofile=file_2, n=3, lineterm='\n'):
print(line)
def markdowner(input_filename, markdown_filename, test=False, replacements=()):
# read original markdown file
compare_all_text = ""
if test:
with open(markdown_filename, 'rt') as file_obj:
compare_all_text = file_obj.read()
# requires nbconvert
os.system('jupyter nbconvert --to MARKDOWN --execute {0} --output {1} --ExecutePreprocessor.kernel_name=python3 '
'--ExecutePreprocessor.timeout=600'
.format(input_filename, markdown_filename))
# read updated markdown file
with open(markdown_filename, 'r+') as f:
print("code collapse section re-write for file {}".format(markdown_filename))
all_text = f.read()
matches = re.finditer(regex, all_text, re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
temp = "\n\n\n<details><summary>Expand Python Print-out</summary>\n\n\n```text\n{}\n```\n\n\n</details>\n\n".format(
match.group(1))
all_text = all_text.replace(match.group(0), temp)
matches = re.finditer(py_regex, all_text, re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
temp = "\n\n\n<details><summary>Expand Python Code Sample</summary>\n\n\n{}\n\n\n</details>\n\n".format(
match.group(0))
all_text = all_text.replace(match.group(0), temp)
f.seek(0)
f.write(all_text)
f.truncate()
if test:
with tempfile.NamedTemporaryFile(mode="w+", suffix=".md") as temp:
temp.seek(0)
for pair in replacements:
compare_all_text = compare_all_text.replace(pair[0], pair[1])
temp.write(compare_all_text)
temp.truncate()
temp.flush()
os.system('diff -B -Z {0} {1}'.format(temp.name, markdown_filename))
os.system('diff -B -Z -q {0} {1}'.format(temp.name, markdown_filename))
# compare_markdown(temp.name, markdown_filename, replacements)
if __name__ == "__main__":
g_arg_parser = prepare_parser()
g_args = g_arg_parser.parse_args()
g_replacements = []
if os.path.exists(g_args.replace):
with open(g_args.replace) as csvfile:
g_csvreader = csv.reader(csvfile, delimiter=',')
for g_row in g_csvreader:
g_replacements.append((g_row[0].strip(), g_row[1].strip()))
g_input_filenames = []
if g_args.all:
for g_path_obj in Path(g_args.all).glob("./*.ipynb"):
g_input_filenames.append(str(g_path_obj.relative_to(g_args.all)))
else:
g_input_filenames.append(g_args.ipynb)
for g_input_filename in g_input_filenames:
if g_args.test:
os.system('treon {0}'.format(g_input_filename))
g_markdown_filename = '{}.md'.format(os.path.splitext(g_input_filename)[0])
g_test = False
if g_args.test and os.path.exists(g_markdown_filename):
g_test = True
markdowner(input_filename=g_input_filename,
markdown_filename=g_markdown_filename,
test=g_test,
replacements=g_replacements)
@davidraleigh
Copy link
Author

requires treon

@davidraleigh
Copy link
Author

https://stackoverflow.com/a/16423101

   -E, --ignore-tab-expansion
          ignore changes due to tab expansion

   -Z, --ignore-trailing-space
          ignore white space at line end

   -b, --ignore-space-change
          ignore changes in the amount of white space

   -w, --ignore-all-space
          ignore all white space

   -B, --ignore-blank-lines
          ignore changes whose lines are all blank```

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment