Skip to content

Instantly share code, notes, and snippets.

@atsukoba
Last active March 3, 2024 20:58
Show Gist options
  • Save atsukoba/b284847b58c5d4598580dfe9539669e2 to your computer and use it in GitHub Desktop.
Save atsukoba/b284847b58c5d4598580dfe9539669e2 to your computer and use it in GitHub Desktop.
Extract Table of contents from Jupyter Notebook files
import os
import json
from glob import glob
from itertools import chain
from datetime import datetime
sep = os.path.sep
def extract_md(file: str) -> list:
with open(file) as f:
json_dict = json.load(f)
src_list = []
for cell in json_dict["cells"]:
if cell["cell_type"] == "markdown":
src_list.append(cell["source"])
print("Loading {}, found {} MD cells."
.format(file.split(sep)[-1], len(src_list)))
return list(chain.from_iterable(src_list))
def get_headers(s) -> list:
output = list()
is_code = False
for line in s:
print(line)
# ignore code block in MarkDown
if line[:3] == "```":
is_code = not(is_code)
if line[0] == "#" and not(is_code) and line != "\n":
output.append(line)
print("--found {} md headers".format(len(output)))
return output
def make_md_text(l :list) -> str:
output = ""
for line in l:
line = line.rstrip().replace("\t", "")
line = line.replace("##### ", "\t\t\t\t1. ")
line = line.replace("#### ", "\t\t\t1. ")
line = line.replace("### ", "\t\t1. ")
line = line.replace("## ", "\t1. ")
line = line.replace("# ", "\n1. ")
if line != "":
output += line + "\n"
return output
def check_md(md: str) -> str:
output = ""
indent_count = 0
for l in md.split("\n"):
tabs = l.count("\t")
deff = tabs - indent_count
if deff > 1:
l = l.replace("\t", "")
l = "\t" * (indent_count + 1) + l
tabs = indent_count + 1
indent_count = tabs
output += l + "\n"
return output
def main(md=False, save_file=None):
output = ""
file_pathes = glob("*.ipynb")
print("Find {} notebook files"
.format(len(file_pathes)))
for file in file_pathes:
md_list = extract_md(file)
headers = get_headers(md_list)
output += make_md_text(headers)
output = check_md(output)
if save_file:
with open(save_file, "w") as f:
f.write(output)
return output
if __name__ == "__main__":
time = datetime.now().strftime("%Y%m%d_%H%M%S")
main(save_file="table_of_contents_{}.md"
.format(time))
@atsukoba
Copy link
Author

JupyterNoteBook Table of Contents Markdown Generator

Output table of contents text(markdown) of all the .ipynb files on same directory.

Usage

% python nb_table_generator.py

or

import nb_table_generator

nb_table_generator.main(file_name="output_file_name")

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment