atsukoba/nb_table_generator.py

## nb_table_generator.py
import os
import json
from glob import glob
from itertools import chain
from datetime import datetime

sep = os.path.sep


def extract_md(file: str) -> list:

    with open(file) as f:
        json_dict = json.load(f)

    src_list = []

    for cell in json_dict["cells"]:
        if cell["cell_type"] == "markdown":
            src_list.append(cell["source"])

    print("Loading {}, found {} MD cells."
          .format(file.split(sep)[-1], len(src_list)))

    return list(chain.from_iterable(src_list))


def get_headers(s) -> list:
    output = list()
    is_code = False
    for line in s:
        print(line)
        # ignore code block in MarkDown
        if line[:3] == "```":
            is_code = not(is_code)
        if line[0] == "#" and not(is_code) and line != "\n":
            output.append(line)

    print("--found {} md headers".format(len(output)))

    return output


def make_md_text(l :list) -> str:
    output = ""

    for line in l:
        line = line.rstrip().replace("\t", "")
        line = line.replace("##### ", "\t\t\t\t1. ")
        line = line.replace("#### ", "\t\t\t1. ")
        line = line.replace("### ", "\t\t1. ")
        line = line.replace("## ", "\t1. ")
        line = line.replace("# ", "\n1. ")
        if line != "":
            output += line + "\n"

    return output


def check_md(md: str) -> str:
    output = ""
    indent_count = 0
    for l in md.split("\n"):
        tabs = l.count("\t")
        deff = tabs - indent_count
        if deff > 1:
            l = l.replace("\t", "")
            l = "\t" * (indent_count + 1) + l
            tabs = indent_count + 1
        indent_count = tabs
        output += l + "\n"
    return output


def main(md=False, save_file=None):

    output = ""
    file_pathes = glob("*.ipynb")
    print("Find {} notebook files"
          .format(len(file_pathes)))

    for file in file_pathes:
        md_list = extract_md(file)
        headers = get_headers(md_list)
        output += make_md_text(headers)
        output = check_md(output)

    if save_file:
        with open(save_file, "w") as f:
            f.write(output)

    return output


if __name__ == "__main__":
    time = datetime.now().strftime("%Y%m%d_%H%M%S")
    main(save_file="table_of_contents_{}.md"
         .format(time))
	import os
	import json
	from glob import glob
	from itertools import chain
	from datetime import datetime

	sep = os.path.sep


	def extract_md(file: str) -> list:

	with open(file) as f:
	json_dict = json.load(f)

	src_list = []

	for cell in json_dict["cells"]:
	if cell["cell_type"] == "markdown":
	src_list.append(cell["source"])

	print("Loading {}, found {} MD cells."
	.format(file.split(sep)[-1], len(src_list)))

	return list(chain.from_iterable(src_list))


	def get_headers(s) -> list:
	output = list()
	is_code = False
	for line in s:
	print(line)
	# ignore code block in MarkDown
	if line[:3] == "```":
	is_code = not(is_code)
	if line[0] == "#" and not(is_code) and line != "\n":
	output.append(line)

	print("--found {} md headers".format(len(output)))

	return output


	def make_md_text(l :list) -> str:
	output = ""

	for line in l:
	line = line.rstrip().replace("\t", "")
	line = line.replace("##### ", "\t\t\t\t1. ")
	line = line.replace("#### ", "\t\t\t1. ")
	line = line.replace("### ", "\t\t1. ")
	line = line.replace("## ", "\t1. ")
	line = line.replace("# ", "\n1. ")
	if line != "":
	output += line + "\n"

	return output


	def check_md(md: str) -> str:
	output = ""
	indent_count = 0
	for l in md.split("\n"):
	tabs = l.count("\t")
	deff = tabs - indent_count
	if deff > 1:
	l = l.replace("\t", "")
	l = "\t" * (indent_count + 1) + l
	tabs = indent_count + 1
	indent_count = tabs
	output += l + "\n"
	return output


	def main(md=False, save_file=None):

	output = ""
	file_pathes = glob("*.ipynb")
	print("Find {} notebook files"
	.format(len(file_pathes)))

	for file in file_pathes:
	md_list = extract_md(file)
	headers = get_headers(md_list)
	output += make_md_text(headers)
	output = check_md(output)

	if save_file:
	with open(save_file, "w") as f:
	f.write(output)

	return output


	if __name__ == "__main__":
	time = datetime.now().strftime("%Y%m%d_%H%M%S")
	main(save_file="table_of_contents_{}.md"
	.format(time))