Last active
March 3, 2024 20:58
-
-
Save atsukoba/b284847b58c5d4598580dfe9539669e2 to your computer and use it in GitHub Desktop.
Extract Table of contents from Jupyter Notebook files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import json | |
from glob import glob | |
from itertools import chain | |
from datetime import datetime | |
sep = os.path.sep | |
def extract_md(file: str) -> list: | |
with open(file) as f: | |
json_dict = json.load(f) | |
src_list = [] | |
for cell in json_dict["cells"]: | |
if cell["cell_type"] == "markdown": | |
src_list.append(cell["source"]) | |
print("Loading {}, found {} MD cells." | |
.format(file.split(sep)[-1], len(src_list))) | |
return list(chain.from_iterable(src_list)) | |
def get_headers(s) -> list: | |
output = list() | |
is_code = False | |
for line in s: | |
print(line) | |
# ignore code block in MarkDown | |
if line[:3] == "```": | |
is_code = not(is_code) | |
if line[0] == "#" and not(is_code) and line != "\n": | |
output.append(line) | |
print("--found {} md headers".format(len(output))) | |
return output | |
def make_md_text(l :list) -> str: | |
output = "" | |
for line in l: | |
line = line.rstrip().replace("\t", "") | |
line = line.replace("##### ", "\t\t\t\t1. ") | |
line = line.replace("#### ", "\t\t\t1. ") | |
line = line.replace("### ", "\t\t1. ") | |
line = line.replace("## ", "\t1. ") | |
line = line.replace("# ", "\n1. ") | |
if line != "": | |
output += line + "\n" | |
return output | |
def check_md(md: str) -> str: | |
output = "" | |
indent_count = 0 | |
for l in md.split("\n"): | |
tabs = l.count("\t") | |
deff = tabs - indent_count | |
if deff > 1: | |
l = l.replace("\t", "") | |
l = "\t" * (indent_count + 1) + l | |
tabs = indent_count + 1 | |
indent_count = tabs | |
output += l + "\n" | |
return output | |
def main(md=False, save_file=None): | |
output = "" | |
file_pathes = glob("*.ipynb") | |
print("Find {} notebook files" | |
.format(len(file_pathes))) | |
for file in file_pathes: | |
md_list = extract_md(file) | |
headers = get_headers(md_list) | |
output += make_md_text(headers) | |
output = check_md(output) | |
if save_file: | |
with open(save_file, "w") as f: | |
f.write(output) | |
return output | |
if __name__ == "__main__": | |
time = datetime.now().strftime("%Y%m%d_%H%M%S") | |
main(save_file="table_of_contents_{}.md" | |
.format(time)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
JupyterNoteBook Table of Contents Markdown Generator
Output table of contents text(markdown) of all the
.ipynb
files on same directory.Usage
or