Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@Miking98
Created September 15, 2022 08:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Miking98/10244cf5e9d210966e363674b5bd4340 to your computer and use it in GitHub Desktop.
Save Miking98/10244cf5e9d210966e363674b5bd4340 to your computer and use it in GitHub Desktop.
Files to convert .ipynb -> .md for Jekyll
import os
import re
import subprocess
import argparse
import pathlib
parser = argparse.ArgumentParser()
parser.add_argument('ipynb_path', type=str)
args = parser.parse_args()
'''
Input paths
'''
this_script_dir: str = os.path.abspath(pathlib.Path(__file__).parent.resolve())
ipynb_file_name: str = os.path.basename(args.ipynb_path)
config_script_path: str = os.path.join(this_script_dir, 'nbconvert_config.py')
'''
Output paths
'''
output_abs_dir: str = os.path.abspath(pathlib.Path(args.ipynb_path).parent.resolve()) # '/Users/Desktop/_posts/YYYY-MM-DD-post-name/'
output_relative_dir: str = '/'.join(args.ipynb_path.split('/')[:-1]) # '_posts/YYYY-MM-DD-post-name/'
output_image_abs_dir: str = os.path.abspath(os.path.join(output_abs_dir, 'markdown_images/')) # '/Users/Desktop/_posts/YYYY-MM-DD-post-name/markdown_images/'
output_image_relative_dir: str = os.path.join(output_relative_dir, 'markdown_images/') # '_posts/YYYY-MM-DD-post-name/markdown_images/'
base_file_name_with_date_prefix: str = ipynb_file_name.lower().replace(' ', '-').replace('.ipynb', '') # 'YYYY-MM-DD-post-name'
base_file_name: str = re.sub(r'^\d{4}\-\d{2}\-\d{2}\-', '', base_file_name_with_date_prefix) # 'YYYY-MM-DD-post-name' => 'post-name'
output_markdown_abs_path: str = os.path.join(output_abs_dir, base_file_name + '.md') # '/Users/Desktop/_posts/YYYY-MM-DD-post-name/post-name.md'
jekyll_markdown_abs_path: str = os.path.join(output_abs_dir, base_file_name_with_date_prefix + '.md') # '/Users/Desktop/_posts/YYYY-MM-DD-post-name/YYYY-MM-DD-post-name.md'
print(f"Converting {ipynb_file_name} => {os.path.basename(jekyll_markdown_abs_path)}")
subprocess.run(["jupyter", "nbconvert", args.ipynb_path, "--to", "markdown", "--config", config_script_path])
# Clean up markdown
with open(output_markdown_abs_path, 'r') as fd:
md = fd.read()
md_clean = md
# HTML cleanup
# Remove <style> tags
md_clean = re.sub(r'\<style scoped\>(.|\n)*\<\/style\>','', md_clean, flags=re.IGNORECASE)
# Remove <axessubplot> tags
md_clean = re.sub(r'\<\/?axessubplot:.*\n','', md_clean, flags=re.IGNORECASE)
# Avoid "Tag '{%' was not properly terminated with regexp" errors
idxs = [ x.start() for x in re.finditer('{%', md_clean) ] + \
[ x.start() for x in re.finditer('%}', md_clean) ]
# [ x.start() for x in re.finditer('}}', md_clean) ] + \
# [ x.start() for x in re.finditer('{{', md_clean) ]
added_offset = 0
for i in idxs:
i += added_offset
md_clean = md_clean[:i] + "{% raw %}" + md_clean[i:i+2] + "{% endraw %}" + md_clean[i + 2:]
added_offset += len("{% raw %}{% endraw %}")
with open(output_markdown_abs_path, 'w') as fd:
fd.write(md_clean)
# Rename .md file to have 'YYYY-MM-DD' prefix that Jekyll expects for all posts
os.rename(output_markdown_abs_path, jekyll_markdown_abs_path)
# modification of config created here: https://gist.github.com/cscorley/9144544
# Useful nbconvert documentation:
# https://nbconvert.readthedocs.io/en/latest/config_options.html#cli-flags-and-aliases
import os
import re
import sys
from jupyter_core.paths import jupyter_path
import pathlib
import argparse
'''
Expects to be called as: `jupyter nbconvert PATH_TO_NOTEBOOK.ipynb --flag --flag ...
'''
'''
Parse CLI arguments
'''
parser = argparse.ArgumentParser()
parser.add_argument('ipynb_path', type=str)
args, __ = parser.parse_known_args()
'''
Input paths
'''
this_script_dir: str = os.path.abspath(pathlib.Path(__file__).parent.resolve())
template_path: str = os.path.abspath(os.path.join(this_script_dir, 'nbconvert_jekyll.tpl'))
ipynb_file_name: str = os.path.basename(args.ipynb_path)
'''
Output paths
'''
output_abs_dir: str = os.path.abspath(pathlib.Path(args.ipynb_path).parent.resolve()) # '/Users/Desktop/_posts/YYYY-MM-DD-post-name/'
output_relative_dir: str = '/'.join(args.ipynb_path.split('/')[:-1]) # '_posts/YYYY-MM-DD-post-name/'
output_image_abs_dir: str = os.path.abspath(os.path.join(output_abs_dir, 'markdown_images/')) # '/Users/Desktop/_posts/YYYY-MM-DD-post-name/markdown_images/'
output_image_relative_dir: str = os.path.join(output_relative_dir, 'markdown_images/') # '_posts/YYYY-MM-DD-post-name/markdown_images/'
# Make sure our `base_file_name` doesn't start with "YYYY-MM-DD", otherwise Jekyll will be tripped up when creating image/pdf/support files b/c
# it only expects the 'YYYY-MM-DD' prefix for .md posts
# NOTE: This means that we need to add the 'YYYY-MM-DD' prefix to our .md file after creating it
base_file_name_with_date_prefix: str = ipynb_file_name.lower().replace(' ', '-').replace('.ipynb', '') # 'YYYY-MM-DD-post-name'
base_file_name: str = re.sub(r'^\d{4}\-\d{2}\-\d{2}\-', '', base_file_name_with_date_prefix) # 'YYYY-MM-DD-post-name' => 'post-name'
'''
Sanity checks
'''
# assert os.path.exists(template_path), f"[ ] Couldn't find .tpl template @ {template_path}"
# print(f"[X] Successfully found .tpl template @ {template_path}")
assert os.path.exists(output_abs_dir), f"[ ] Couldn't find output directory to store .md file @ {output_abs_dir}/"
print(f"[X] Successfully found directory to save .md file @ {output_abs_dir}/")
os.makedirs(output_image_abs_dir, exist_ok=True)
assert os.path.exists(output_image_abs_dir), f"[ ] Couldn't find output directory to store images @ {output_image_abs_dir}/"
print(f"[X] Successfully found directory to store images @ {output_image_abs_dir}/")
'''
Setup configs
'''
c = get_config()
c.NbConvertApp.export_format = 'markdown'
# See: https://github.com/mpacer/nbconvert/blob/bd20c4f6959d277a9e84cf8f48456e57268aeac4/nbconvert/preprocessors/extractoutput.py
c.NbConvertApp.output_base = base_file_name # Base name for images/files/pdfs/markdown output by `nbconvert`, i.e. NAME of "NAME.md" or "NAME.png"
c.NbConvertApp.output_files_dir = output_image_abs_dir # Directory to write image outputs to
c.FilesWriter.build_directory = output_abs_dir # Directory to write .md outputs to
c.MarkdownExporter.template_path = jupyter_path('nbconvert','templates') # List of paths to user's Jupyter templates directories
c.MarkdownExporter.template_file = template_path # Path to our custom .tpl file
'''
Map image paths -> relative URLs
i.e. "/Users/Desktop/_posts/YYYY-MM-DD-post-name/markdown_images/image.png" => "markdown_images/image.png"
'''
def path2support(path: str):
image_file_name: str = os.path.basename(path)
image_path: str = "markdown_images/" + image_file_name
return image_path
c.MarkdownExporter.filters = {'path2support': path2support}
{% extends 'markdown/index.md.j2' %}
{% block data_svg %}
![svg]({{ output.metadata.filenames['image/svg+xml'] | path2support }})
{% endblock data_svg %}
{% block data_png %}
![png]({{ output.metadata.filenames['image/png'] | path2support }})
{% endblock data_png %}
{% block data_jpg %}
![jpeg]({{ output.metadata.filenames['image/jpeg'] | path2support }})
{% endblock data_jpg %}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment