davidraleigh/ipynb2md.py Secret

## ipynb2md.py
import csv
import re
import os
import tempfile
import difflib
import argparse
from pathlib import Path
from typing import List


# https://regex101.com/r/ezekzs/1
regex = r"\n\n( {4}[\w\S]+[\w\S ]*((\n {4}([\w \S]*))|(\n {4})){0,})\n"

# TODO this regex will fail if the ``` is not preceded by a newline
py_regex = r"```python\n((^(?!```).+\n)|([\n ]+)){1,}```"


class FileExists(argparse.Action):
    def __call__(self, parser, namespace, values, option_string=None):
        file_path = values
        if os.path.exists(file_path):
            setattr(namespace, self.dest, file_path)
        else:
            raise argparse.ArgumentTypeError("metadata:{0} is not a file".format(file_path))


def prepare_parser():
    arg_parser = argparse.ArgumentParser(description="Convert Jupyter Notebooks into Markdown with Collapseable code "
                                                     "blocks")
    arg_parser.add_argument("--all", "-a", type=str, help="process all ipynb")
    arg_parser.add_argument("--ipynb", "-i", type=str, default="README.ipynb", action=FileExists,
                            help="name of the input jupyter notebook")
    arg_parser.add_argument("--test", "-t", action="store_true", help="test markdown against previous results")
    arg_parser.add_argument("--replace", "-r", type=str, default=".md_replace.csv")

    return arg_parser


def prep_lines(lines: List[str], replacements):
    for pair in replacements:
        for idx, line in enumerate(lines):
            lines[idx] = line.replace(pair[0], pair[1])
    return lines


def compare_markdown(file_1, file_2, replacements):
    with open(file_1) as file_obj_1:
        with open(file_2) as file_obj_2:
            lines1 = [line.strip() for line in file_obj_1.readlines()]
            lines2 = [line.strip() for line in file_obj_2.readlines()]
            lines1 = prep_lines(lines1, replacements)
            lines2 = prep_lines(lines2, replacements)

            for line in difflib.context_diff(lines1, lines2, fromfile=file_1, tofile=file_2, n=3, lineterm='\n'):
                print(line)


def markdowner(input_filename, markdown_filename, test=False, replacements=()):
    # read original markdown file
    compare_all_text = ""
    if test:
        with open(markdown_filename, 'rt') as file_obj:
            compare_all_text = file_obj.read()

    # requires nbconvert
    os.system('jupyter nbconvert --to MARKDOWN --execute {0} --output {1} --ExecutePreprocessor.kernel_name=python3 '
              '--ExecutePreprocessor.timeout=600'
              .format(input_filename, markdown_filename))

    # read updated markdown file
    with open(markdown_filename, 'r+') as f:
        print("code collapse section re-write for file {}".format(markdown_filename))

        all_text = f.read()

        matches = re.finditer(regex, all_text, re.MULTILINE)
        for matchNum, match in enumerate(matches, start=1):
            temp = "\n\n\n<details><summary>Expand Python Print-out</summary>\n\n\n```text\n{}\n```\n\n\n</details>\n\n".format(
                match.group(1))
            all_text = all_text.replace(match.group(0), temp)

        matches = re.finditer(py_regex, all_text, re.MULTILINE)
        for matchNum, match in enumerate(matches, start=1):
            temp = "\n\n\n<details><summary>Expand Python Code Sample</summary>\n\n\n{}\n\n\n</details>\n\n".format(
                match.group(0))
            all_text = all_text.replace(match.group(0), temp)

        f.seek(0)
        f.write(all_text)
        f.truncate()

    if test:
        with tempfile.NamedTemporaryFile(mode="w+", suffix=".md") as temp:
            temp.seek(0)
            for pair in replacements:
                compare_all_text = compare_all_text.replace(pair[0], pair[1])
            temp.write(compare_all_text)
            temp.truncate()
            temp.flush()

            os.system('diff -B -Z {0} {1}'.format(temp.name, markdown_filename))
            os.system('diff -B -Z -q {0} {1}'.format(temp.name, markdown_filename))
            # compare_markdown(temp.name, markdown_filename, replacements)


if __name__ == "__main__":
    g_arg_parser = prepare_parser()
    g_args = g_arg_parser.parse_args()

    g_replacements = []
    if os.path.exists(g_args.replace):
        with open(g_args.replace) as csvfile:
            g_csvreader = csv.reader(csvfile, delimiter=',')
            for g_row in g_csvreader:
                g_replacements.append((g_row[0].strip(), g_row[1].strip()))

    g_input_filenames = []
    if g_args.all:
        for g_path_obj in Path(g_args.all).glob("./*.ipynb"):
            g_input_filenames.append(str(g_path_obj.relative_to(g_args.all)))
    else:
        g_input_filenames.append(g_args.ipynb)

    for g_input_filename in g_input_filenames:
        if g_args.test:
            os.system('treon {0}'.format(g_input_filename))

        g_markdown_filename = '{}.md'.format(os.path.splitext(g_input_filename)[0])
        g_test = False
        if g_args.test and os.path.exists(g_markdown_filename):
            g_test = True
        markdowner(input_filename=g_input_filename,
                   markdown_filename=g_markdown_filename,
                   test=g_test,
                   replacements=g_replacements)
	import csv
	import re
	import os
	import tempfile
	import difflib
	import argparse
	from pathlib import Path
	from typing import List


	# https://regex101.com/r/ezekzs/1
	regex = r"\n\n( {4}[\w\S]+[\w\S ]((\n {4}([\w \S]))\|(\n {4})){0,})\n"

	# TODO this regex will fail if the ``` is not preceded by a newline
	py_regex = r"```python\n((^(?!```).+\n)\|([\n ]+)){1,}```"


	class FileExists(argparse.Action):
	def __call__(self, parser, namespace, values, option_string=None):
	file_path = values
	if os.path.exists(file_path):
	setattr(namespace, self.dest, file_path)
	else:
	raise argparse.ArgumentTypeError("metadata:{0} is not a file".format(file_path))


	def prepare_parser():
	arg_parser = argparse.ArgumentParser(description="Convert Jupyter Notebooks into Markdown with Collapseable code "
	"blocks")
	arg_parser.add_argument("--all", "-a", type=str, help="process all ipynb")
	arg_parser.add_argument("--ipynb", "-i", type=str, default="README.ipynb", action=FileExists,
	help="name of the input jupyter notebook")
	arg_parser.add_argument("--test", "-t", action="store_true", help="test markdown against previous results")
	arg_parser.add_argument("--replace", "-r", type=str, default=".md_replace.csv")

	return arg_parser


	def prep_lines(lines: List[str], replacements):
	for pair in replacements:
	for idx, line in enumerate(lines):
	lines[idx] = line.replace(pair[0], pair[1])
	return lines


	def compare_markdown(file_1, file_2, replacements):
	with open(file_1) as file_obj_1:
	with open(file_2) as file_obj_2:
	lines1 = [line.strip() for line in file_obj_1.readlines()]
	lines2 = [line.strip() for line in file_obj_2.readlines()]
	lines1 = prep_lines(lines1, replacements)
	lines2 = prep_lines(lines2, replacements)

	for line in difflib.context_diff(lines1, lines2, fromfile=file_1, tofile=file_2, n=3, lineterm='\n'):
	print(line)


	def markdowner(input_filename, markdown_filename, test=False, replacements=()):
	# read original markdown file
	compare_all_text = ""
	if test:
	with open(markdown_filename, 'rt') as file_obj:
	compare_all_text = file_obj.read()

	# requires nbconvert
	os.system('jupyter nbconvert --to MARKDOWN --execute {0} --output {1} --ExecutePreprocessor.kernel_name=python3 '
	'--ExecutePreprocessor.timeout=600'
	.format(input_filename, markdown_filename))

	# read updated markdown file
	with open(markdown_filename, 'r+') as f:
	print("code collapse section re-write for file {}".format(markdown_filename))

	all_text = f.read()

	matches = re.finditer(regex, all_text, re.MULTILINE)
	for matchNum, match in enumerate(matches, start=1):
	temp = "\n\n\n<details><summary>Expand Python Print-out</summary>\n\n\n```text\n{}\n```\n\n\n</details>\n\n".format(
	match.group(1))
	all_text = all_text.replace(match.group(0), temp)

	matches = re.finditer(py_regex, all_text, re.MULTILINE)
	for matchNum, match in enumerate(matches, start=1):
	temp = "\n\n\n<details><summary>Expand Python Code Sample</summary>\n\n\n{}\n\n\n</details>\n\n".format(
	match.group(0))
	all_text = all_text.replace(match.group(0), temp)

	f.seek(0)
	f.write(all_text)
	f.truncate()

	if test:
	with tempfile.NamedTemporaryFile(mode="w+", suffix=".md") as temp:
	temp.seek(0)
	for pair in replacements:
	compare_all_text = compare_all_text.replace(pair[0], pair[1])
	temp.write(compare_all_text)
	temp.truncate()
	temp.flush()

	os.system('diff -B -Z {0} {1}'.format(temp.name, markdown_filename))
	os.system('diff -B -Z -q {0} {1}'.format(temp.name, markdown_filename))
	# compare_markdown(temp.name, markdown_filename, replacements)


	if __name__ == "__main__":
	g_arg_parser = prepare_parser()
	g_args = g_arg_parser.parse_args()

	g_replacements = []
	if os.path.exists(g_args.replace):
	with open(g_args.replace) as csvfile:
	g_csvreader = csv.reader(csvfile, delimiter=',')
	for g_row in g_csvreader:
	g_replacements.append((g_row[0].strip(), g_row[1].strip()))

	g_input_filenames = []
	if g_args.all:
	for g_path_obj in Path(g_args.all).glob("./*.ipynb"):
	g_input_filenames.append(str(g_path_obj.relative_to(g_args.all)))
	else:
	g_input_filenames.append(g_args.ipynb)

	for g_input_filename in g_input_filenames:
	if g_args.test:
	os.system('treon {0}'.format(g_input_filename))

	g_markdown_filename = '{}.md'.format(os.path.splitext(g_input_filename)[0])
	g_test = False
	if g_args.test and os.path.exists(g_markdown_filename):
	g_test = True
	markdowner(input_filename=g_input_filename,
	markdown_filename=g_markdown_filename,
	test=g_test,
	replacements=g_replacements)