Skip to content

Instantly share code, notes, and snippets.

Forked from aolle/
Last active November 6, 2022 18:39
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
What would you like to do?
Convert Nimbus Notes HTML to Markdown for UpNote
#!/usr/bin/env python3
Extended into a full repo at
Any future work will be posted there.
import atexit
import logging
import pandas as pd
import subprocess
import time
from functools import wraps
# from icecream import ic
from pathlib import Path
from pathvalidate import replace_symbol
from zipfile import ZipFile
# TODO: colorama
# Define a logger instance
logger = logging.getLogger(__name__)
# Define a stream handler for the console output
handler = logging.StreamHandler()# Customize the formatter on the console
formatter = logging.Formatter(
"%(asctime)s - %(name)s: %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S"
# Add the formatter to the handler
handler.setFormatter(formatter)# Add the stream handler to the logger that we will use
# Set the level of logging to be INFO instead of the default WARNING
def timeit(func):
def timeit_wrapper(*args, **kwargs):
start_time = time.perf_counter()
result = func(*args, **kwargs)
end_time = time.perf_counter()
total_time = end_time - start_time
print(f"Function {func.__name__}{args} {kwargs} Took {total_time:.4f} seconds")
return result
return timeit_wrapper
notes_written = 0
notes_failed = 0
win_dict = {}
fail_dict = {}
def unzip_files():
"""Unzip files in the current directory"""
for _ in Path(".").rglob("*.zip"):
if _.stem in [ for _ in Path(".").iterdir()]:"Skipping {_}, already extracted")
continue"Unzipping {_}")
with ZipFile(_) as zip_file:
except ZipFile.BadZipFile:
logger.error(f"Skipping {_}, not a zip file")
def cleanup_zip_files():
"""Remove zip files in the current directory"""
for _ in Path(".").rglob("*.zip"):"Removing {_}")
def write_note(html_file, markdown_destination):
"""Convert html file to markdown and write to original directory"""
global notes_written, notes_failed
dest = Path(f"{markdown_destination}").resolve()"Writing markdown to {dest}")
cmd = f"pandoc '{html_file}' --from html --to markdown_strict-raw_html"
pandoc_run = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True)
if markdown_destination.exists():"Markdown file {dest} already exists, skipping")
with open(markdown_destination, "w", encoding="utf-8") as md_fp:
md_content = pandoc_run.decode()
notes_written += 1
win_dict[notes_written] = html_file
except (FileNotFoundError, subprocess.CalledProcessError):
logger.error(f"Failed to convert {html_file}")
notes_failed += 1
fail_dict[notes_failed] = html_file
# TODO: error handling for tld retaining nested dirs (e.g., 'All Notes' vs. 'Books')
def main():
# unzip files
# TODO: add joblib parallelization after getting results of html files
# get html files
html_files = [_.resolve() for _ in Path(".").rglob("*.html")]
# convert html files to markdown
for _ in html_files:
html_file = _.resolve()
# directory name
dirname =
# sanitize filepath
sanitized = replace_symbol(dirname, exclude_symbols=[" ", "_", "-"])
# fix double whitespace
sanitized = " ".join(sanitized.split())
# rename directory to sanitized filepath with pathlib
filepath = html_file.parent.rename(sanitized)
# filename
filename = html_file.stem + ".md"
# resolve new html file path
html_file = Path(f"{filepath}/{}").resolve()
# create markdown file name
md_destination = Path(f"{filepath}/{filename}")
if md_destination.exists():
dest = Path(f"{md_destination}").resolve()"Markdown file {dest} already exists, skipping")
write_note(html_file, md_destination)"Converted {notes_written} notes, failed to convert {notes_failed} notes")
# create dataframe
wins = pd.DataFrame.from_dict(win_dict, orient="index", columns=["note"])
fails = pd.DataFrame.from_dict(fail_dict, orient="index", columns=["note"])
# write the dataframe to a csv file
df = pd.DataFrame(wins)
df.to_csv("win_list.csv", index=False)
df = pd.DataFrame(fails)
df.to_csv("fail_list.csv", index=False)
if __name__ == "__main__":
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment