vszakats/apple-notes-export-from-json.py

## apple-notes-export-from-json.py
#!/usr/bin/env python3

# Copyright 2022-present Viktor Szakats. The MIT License.
# SPDX-License-Identifier: MIT

# Convert 'apple_cloud_notes_parser'-made JSON into Markdown or HTML.
# The goal is to export the content losslessly and with the ability to
# continue editing them after importing or opening them in Markdown
# editors as-is.

# Requires:
#   - Ruby
#   - https://github.com/threeplanetssoftware/apple_cloud_notes_parser/releases/tag/v0.11
#     or later
#   - $ bundle install
#   - Compatible Notes.app backup
# Optional:
#   - macOS for 'created' output file timestamps

# Joplin import requirements:
# - --md-linebreaks
# - --frontmatter (optional)
# - as of Joplin v2.8.8, its import functionality does not support
#   importing linked/embedded images as proper attachments (both with
#   or without --embed)

# TODO:
# - Copy linked objects under the output directory and reference them
#   via relative links.
# - Escape double-quotes in values ending up in HTML attributes.
# - Solve continued-lines differently:
#   Iterate through the full line in a sub-loop.
#   This allows to change text attributes when they actually change,
#   so 4 bold fragments result in a single **/** pair instead of one
#   for each fragment.
# - Replace '-notes-font-hints' CSS style once we discover its purpose.
# - Possibly fold long itemlist lines into shorter ones.
# - A newline would be nice after the last continuous monospace fragment.

import argparse
import base64
import copy
import datetime
import json
import mimetypes
import os
import re
import time

# Item styles
STYLE_TYPE_DEFAULT = -1
STYLE_TYPE_TITLE = 0
STYLE_TYPE_HEADING = 1
STYLE_TYPE_SUBHEADING = 2
STYLE_TYPE_MONOSPACED = 4
STYLE_TYPE_LIST_DOT = 100
STYLE_TYPE_LIST_DASH = 101
STYLE_TYPE_LIST_NUM = 102
STYLE_TYPE_CHECKBOX = 103

# Font weights
FONT_TYPE_DEFAULT = 0
FONT_TYPE_BOLD = 1
FONT_TYPE_ITALIC = 2
FONT_TYPE_BOLD_ITALIC = 3

# Text alignments
TEXT_ALIGNMENT_LEFT = 0
TEXT_ALIGNMENT_CENTER = 1
TEXT_ALIGNMENT_RIGHT = 2
TEXT_ALIGNMENT_JUSTIFY = 3


css4 = True


def line_is_codefence(line):
    return line.lstrip()[0:3] == "```"


def fn_to_url(fn, embed=False):
    if embed:
        (mime, _) = mimetypes.guess_type(fn)
        if mime is not None:
            with open(fn, "rb") as f:
                return (
                    "data:"
                    + mime
                    + ";base64,"
                    + base64.b64encode(f.read()).decode("ascii")
                )
    return "file://" + fn.replace(" ", "%20")


def htmlattr(value):
    return value.replace('"', "%22")


def note_export(
    note,
    out_prefix="",
    overwrite=True,
    format="md",
    embedimages=True,
    frontmatter=True,
    md_linebreaks=False,
    fn_template="{account}-{folder}-{note_id:06d}-{title}",
    debug=0,
    autotidy_cr=True,
    autotidy_hr=True,
):

    # Skip folder index
    if "note_proto" not in note:
        return

    outfn = fn_template.format(
        account=note["account"],
        folder=note["folder"],
        note_id=note["note_id"],
        title=note["title"],
    )

    outfn = out_prefix + re.sub(r"[\/\\\?\*|:•]", "_", outfn)

    if format == "md":
        FMT_EXT = ".md"
        FMT_H1_O = "# "
        FMT_H1_C = ""
        FMT_H2_O = "## "
        FMT_H2_C = ""
        FMT_H3_O = "### "
        FMT_H3_C = ""
        FMT_CODE_O = "`"
        FMT_CODE_C = FMT_CODE_O
        FMT_CODEBLOCK_O = "```"
        FMT_CODEBLOCK_C = FMT_CODEBLOCK_O
        FMT_LINE = "---"
        FMT_LINK_PURE = "<{0}>"
        FMT_LINK_NAMED = "[{1}]({0})"
        FMT_LINK_IMAGE = "[![{2}]({0})]({1})"
        FMT_IMAGE = "![{1}]({0})"
        FMT_BOLD_O = "**"
        FMT_BOLD_C = FMT_BOLD_O
        FMT_ITALIC_O = "_"
        FMT_ITALIC_C = FMT_ITALIC_O
        FMT_TABLE_O = ""
        FMT_TABLE_C = ""
        FMT_TABLE_ROW_O = "| "
        FMT_TABLE_ROW_C = ""
        FMT_TABLE_CELL_O = ""
        FMT_TABLE_CELL_C = " | "
        FMT_TABLE_HEADER_O = "---"
        FMT_TABLE_HEADER_C = " | "
        FMT_ITEM_DOT = "* "
        FMT_ITEM_DASH = "- "
        FMT_ITEM_LIST = "1. "
        FMT_ITEM_CHKE = "* [x] "
        FMT_ITEM_CHKD = "* [ ] "
        if md_linebreaks:
            # Some Markdown imports will need this to avoid
            # (mis-)interpreting content as HTML:
            FMT_LINEBREAK = "  "
        else:
            FMT_LINEBREAK = "<br>"
    elif format == "html":
        FMT_EXT = ".html"
        FMT_H1_O = "<h1>"
        FMT_H1_C = "</h1>"
        FMT_H2_O = "<h2>"
        FMT_H2_C = "</h2>"
        FMT_H3_O = "<h3>"
        FMT_H3_C = "</h3>"
        FMT_CODE_O = "<code>"
        FMT_CODE_C = "</code>"
        FMT_CODEBLOCK_O = FMT_CODE_O
        FMT_CODEBLOCK_C = FMT_CODE_C
        FMT_LINE = "<hr>"
        FMT_LINK_PURE = '<a href="{0}">{0}</a>'
        FMT_LINK_NAMED = '<a href="{0}">{1}</a>'
        FMT_LINK_IMAGE = '<a href="{1}"><img style="max-width: 100%; max-height: 100%;" src="{0}" alt="{2}"></a>'
        FMT_IMAGE = (
            '<img style="max-width: 100%; max-height: 100%;" src="{0}" alt="{1}">'
        )
        FMT_BOLD_O = "<b>"
        FMT_BOLD_C = "</b>"
        FMT_ITALIC_O = "<i>"
        FMT_ITALIC_C = "</i>"
        FMT_TABLE_O = "<table>"
        FMT_TABLE_C = "</table>"
        FMT_TABLE_ROW_O = "<tr>"
        FMT_TABLE_ROW_C = "</tr>"
        FMT_TABLE_CELL_O = "<td>"
        FMT_TABLE_CELL_C = "</td>"
        FMT_TABLE_HEADER_O = "<th>"
        FMT_TABLE_HEADER_C = "</th>"
        FMT_ITEM_DOT = "<li>"
        FMT_ITEM_DASH = "<li>"
        FMT_ITEM_LIST = "<li>"
        FMT_ITEM_CHKE = '<li><input type="checkbox" checked>'
        FMT_ITEM_CHKD = '<li><input type="checkbox">'
        FMT_LINEBREAK = "<br>"
        FMT_LIST_DASH_O = '<ul style="list-style-type: ' + "'&ndash;  '" + ';">'
        FMT_LIST_DASH_C = "</ul>"
        FMT_LIST_DOT_O = "<ul>"
        FMT_LIST_DOT_C = "</ul>"
        FMT_LIST_NUM_O = "<ol>"
        FMT_LIST_NUM_C = "</ol>"

    outfnext = outfn + FMT_EXT

    if not overwrite and os.path.isfile(outfnext):
        return

    if debug >= 1:
        print(note["note_id"])

    proto = copy.deepcopy(note["note_proto"])  # Avoid modifying the original object

    eos = note["embedded_objects"]

    proto["embedded_objects"] = eos

    stripcolor = False

    # Raw/unformatted text. UTF-8.
    #
    # \u2028: manual line breaks
    # \n: entry separators
    text = proto["document"]["note"]["noteText"]

    if debug >= 2:
        with open(outfn + "-1-ori.txt", "w") as f:
            f.write(text)
            f.close()
        with open(outfn + "-1-ori.json", "w") as f:
            json.dump(proto, f, indent=2, ensure_ascii=False, sort_keys=True)
            f.close()

    if debug >= 1:
        print("! text length chars:", len(text))
        print("! text length bytes:", len(bytes(text, "utf-8")))
        print("! length ori:", len(proto["document"]["note"]["attributeRun"]))

    # Merge consecutive fragments having the same attributes
    # Can result in a 10x reduction of fragment numbers, while
    # also giving searchable/meaningful text fields.
    attrs = []
    c = 0
    while c < len(proto["document"]["note"]["attributeRun"]):
        i = proto["document"]["note"]["attributeRun"][c]
        this = i.copy()
        this["length"] = 0
        while c < len(proto["document"]["note"]["attributeRun"]) - 1:
            next = proto["document"]["note"]["attributeRun"][c + 1].copy()
            next["length"] = 0
            if json.dumps(this, sort_keys=True) == json.dumps(next, sort_keys=True):
                i["length"] += proto["document"]["note"]["attributeRun"][c + 1][
                    "length"
                ]
                c += 1
            else:
                break

        if stripcolor and "color" in i:
            del i["color"]

        attrs.append(i)
        c += 1

    if debug >= 1:
        print("! length merged:", len(attrs))

    proto["document"]["note"]["attributeRun"] = attrs

    # Fill text for each fragment
    c = 0
    pos = 0
    for i in proto["document"]["note"]["attributeRun"]:
        plen = i["length"]
        tmp = text[pos : pos + plen]
        proto["document"]["note"]["attributeRun"][c]["text"] = tmp
        # Bizarre trick to make positions match after encountering
        # high-Unicode codepoints:
        for cc in tmp:
            if ord(cc) > 65535:
                pos -= 1
        c += 1
        pos += plen

    if debug >= 1:
        print("! slice length total:", pos)

    # Delete lengths
    for i in proto["document"]["note"]["attributeRun"]:
        if "length" in i:
            del i["length"]

    if debug >= 2:
        with open(outfn + "-2-merged.json", "w") as f:
            json.dump(proto, f, indent=2, ensure_ascii=False, sort_keys=True)
            f.close()

    # Split texts at newlines into separate sections.
    # This helps processing it accurately, because Apple uses newlines
    # as item terminators.
    attrs = []
    for i in proto["document"]["note"]["attributeRun"]:
        if (
            "\n" in i["text"] and "link" not in i
        ):  # Do not tear up links where their text have spilled-in newline. Deal with this later.
            ispl = i["text"].split("\n")
            ispllen = len(ispl)
            c = 0
            for t in ispl:
                c += 1
                i2 = i.copy()
                i2["text"] = t
                if c < ispllen:
                    i2["text"] += "\n"
                if len(i2["text"]) > 0:
                    attrs.append(i2)
        else:
            attrs.append(i)

    if debug >= 1:
        print("! length resplit:", len(attrs))

    proto["document"]["note"]["attributeRun"] = attrs

    if debug >= 2:
        with open(outfn + "-3-merged-split-at-eol.json", "w") as f:
            json.dump(proto, f, indent=2, ensure_ascii=False, sort_keys=True)
            f.close()

    INDENT_SPACES = "    "

    tsfmt = "%Y-%m-%d %H:%M:%S %z"  # YYYY-MM-DD hh:mm:ss +1030"
    create = datetime.datetime.strptime(note["creation_time"], tsfmt)
    update = datetime.datetime.strptime(note["modify_time"], tsfmt)
    createutc = create.astimezone(datetime.timezone(datetime.timedelta(0)))
    updateutc = update.astimezone(datetime.timezone(datetime.timedelta(0)))
    if debug >= 1:
        print("!", create, update)
        print("!", createutc, updateutc)

    out = ""

    if frontmatter:
        out += "---\n"
        out += "title: " + note["title"] + "\n"
        tsfmt = "%Y-%m-%d %H:%M:%SZ"  # YYYY-MM-DD hh:mm:ssZ
        out += "updated: " + updateutc.strftime(tsfmt) + "\n"
        out += "created: " + createutc.strftime(tsfmt) + "\n"
        out += "---\n"

    continuing = False
    cont_f1 = ""
    cont_i1 = ""
    in_list = 0
    in_codefence = False
    prev_i1 = ""
    in_list_close = []
    c = 0
    while c < len(proto["document"]["note"]["attributeRun"]):
        i = proto["document"]["note"]["attributeRun"][c]
        c += 1

        if debug >= 3:
            out += ">" + str(c) + "<"

        mystr = i["text"]

        # Text ending with a newline is significant. It closes list items.
        # If there is none, it means the entry continues in the next
        # "attributeRun" item. Notes.app uses separate elements when the
        # font style is different or if there is a special element (e.g.
        # a link) is present in the line.

        if mystr.endswith("\n"):
            eol = True
            mystr = mystr[:-1]
        else:
            eol = False

        # layout

        n1 = ""  # leading newline, if any
        i1 = ""  # indentation
        f1 = ""  # layout prefix markup (bullet, dash, header)
        c1 = ""  # line-ending markup (e.g. FMT_LINEBREAK or FMT_H1_C), if any
        new_in_list = 0

        ao = ""
        ac = ""

        codefence_start = False
        if line_is_codefence(mystr) and not in_codefence:
            if format == "html":
                mystr = FMT_CODE_O
            n1 = "\n"
            in_codefence = True
            codefence_start = True

        # Strip CR

        mystr_no_cr = mystr.replace("\r", "")

        if autotidy_cr:
            # Strip CR, remains of CRLF EOLs
            mystr = mystr_no_cr

        if not in_codefence:

            if "paragraphStyle" in i:
                paragraphStyle = i["paragraphStyle"]
            else:
                paragraphStyle = {}
            if "styleType" in paragraphStyle:
                styleType = paragraphStyle["styleType"]
            else:
                styleType = STYLE_TYPE_DEFAULT
            if "indentAmount" in paragraphStyle:
                indentAmount = paragraphStyle["indentAmount"]
            else:
                indentAmount = 0
            if "alignment" in paragraphStyle:
                alignment = paragraphStyle["alignment"]
            else:
                alignment = 0

            if alignment == TEXT_ALIGNMENT_CENTER:
                ao = '<p style="text-align: center">'
                ac = "</p>"
            elif alignment == TEXT_ALIGNMENT_JUSTIFY:
                ao = '<p style="text-align: justify">'
                ac = "</p>"
            elif alignment == TEXT_ALIGNMENT_RIGHT:
                ao = '<p style="text-align: right">'
                ac = "</p>"

            if (
                styleType == STYLE_TYPE_LIST_DOT
                or styleType == STYLE_TYPE_LIST_DASH
                or styleType == STYLE_TYPE_LIST_NUM
                or styleType == STYLE_TYPE_CHECKBOX
            ):

                if styleType == STYLE_TYPE_CHECKBOX:
                    checkboxDone = paragraphStyle["checklist"]["done"] == 1
                else:
                    checkboxDone = None

                if not continuing:
                    # Make sure to have an empty line before starting a list
                    # (otherwise it may not render as a list)
                    if in_list == 0 and not out.endswith("\n\n") and len(out) > 0:
                        n1 = "\n"

                    if styleType == STYLE_TYPE_LIST_DOT:
                        f1 = FMT_ITEM_DOT
                    elif styleType == STYLE_TYPE_LIST_DASH:
                        f1 = FMT_ITEM_DASH
                    elif styleType == STYLE_TYPE_LIST_NUM:
                        f1 = FMT_ITEM_LIST
                    elif styleType == STYLE_TYPE_CHECKBOX:
                        if checkboxDone:
                            f1 = FMT_ITEM_CHKE
                        else:
                            f1 = FMT_ITEM_CHKD

                    i1 = indentAmount * INDENT_SPACES
                    new_in_list = styleType

            elif styleType == STYLE_TYPE_TITLE:
                f1 = FMT_H1_O
                c1 = FMT_H1_C
            elif styleType == STYLE_TYPE_HEADING:
                f1 = FMT_H2_O
                c1 = FMT_H2_C
            elif styleType == STYLE_TYPE_SUBHEADING:
                f1 = FMT_H3_O
                c1 = FMT_H3_C
            elif styleType == STYLE_TYPE_MONOSPACED:
                n1 = "\n"
                f1 = FMT_CODE_O
                c1 = FMT_CODE_C
            else:
                # Forced newlines for lines not part of lists.
                # Needed for Markdown to avoid joining these lines together.
                if in_list == 0:
                    html_with_div = True
                    if eol:
                        if html_with_div:
                            if mystr_no_cr == "":
                                c1 = FMT_LINEBREAK
                        else:
                            c1 = FMT_LINEBREAK
                    # Not strictly necessary, but makes output more alike the
                    # Notes app built-in HTML export
                    if format == "html" and html_with_div:
                        if eol:
                            c1 += "</div>"
                        if not continuing:
                            f1 = "<div>"

            if continuing:
                indent = cont_i1 + len(cont_f1) * " "
            else:
                indent = i1 + len(f1) * " "

            # Process payload

            # Escape user content that may interfere with HTML
            if format == "html":
                mystr = (
                    mystr.replace("&", "&amp;")
                    .replace("<", "&lt;")
                    .replace(">", "&gt;")
                    .replace("  ", "&nbsp;&nbsp;")
                )

            # Upconvert common "manual separators" (vertical line ASCII art)
            # to markup. Not strictly necessary and we recommend to disable
            # this if it interferes with content.
            if (
                autotidy_hr
                and not continuing
                and in_list == 0
                and mystr_no_cr != ""
                and (
                    mystr_no_cr.replace("*", "") == ""
                    or mystr_no_cr.replace("-", "") == ""
                    or mystr_no_cr.replace("=", "") == ""
                )
            ):
                mystr = FMT_LINE
                if c1 == FMT_LINEBREAK:
                    c1 = ""
                # Strip forced linefeed before it.
                if out.endswith("\n" + FMT_LINEBREAK + "\n"):
                    out = out[: -len("\n" + FMT_LINEBREAK + "\n")]
                    n1 = "\n\n"
                # Make sure to include one linefeed before it.
                # Otherwise Markdown renderers interpret it as
                # a section header marker.
                elif not out.endswith("\n\n") and len(out) > 0:
                    n1 = "\n"

            # Escape user content that may interfere with Markdown markup
            if format == "md":
                # Also escape ']'?
                mystr = (
                    mystr.replace("[", "\[").replace("__", "\_\_").replace("**", "\*\*")
                )

            if "link" in i:
                prep = ""
                if mystr != "":
                    if mystr[0] == "\u2028" or mystr[0] == "\n":
                        # Cleanup newlines sometimes sneaking into the beginning
                        # of the link text
                        prep = FMT_LINEBREAK + "\n" + indent
                        mystr = mystr[1:]
                if i["link"] == mystr:
                    mystr = FMT_LINK_PURE.format(mystr)
                elif i["link"] == "http://" + mystr:  # Bump auto-links to HTTPS
                    mystr = FMT_LINK_NAMED.format("https://" + mystr, mystr)
                else:
                    mystr = FMT_LINK_NAMED.format(i["link"], mystr)
                mystr = prep + mystr
            elif "attachmentInfo" in i:
                id = i["attachmentInfo"]["attachmentIdentifier"]
                myeo = None
                for oi in eos:
                    if oi["uuid"] == id:
                        myeo = oi
                        break
                mystr = ""
                if myeo is not None:
                    type = myeo["type"]
                    if debug >= 1:
                        print("! attachment:", myeo["primary_key"], type)
                    if type == "public.url":
                        if myeo["url"] is not None:
                            mystr += FMT_LINK_PURE.format(myeo["url"])
                        else:
                            mystr += FMT_LINK_PURE.format("url://lost")
                    elif type == "com.apple.notes.inlinetextattachment.hashtag":
                        if myeo["alt_text"] is not None:
                            mystr += myeo["alt_text"]
                        else:
                            # Seen in "Recently Deleted" in iCloud, which was also
                            # not appearing anymore on the UI (aka "fake deleted"),
                            # but still there and also occupying storage.
                            mystr += "#{lost_tag}"
                    elif type == "com.apple.notes.table":

                        # | Name | Size | Color |
                        # | --- | --- | --- |
                        # | lime | small | green |
                        # | orange | medium | orange |
                        # | grapefruit | large | yellow or pink |

                        mystr = ""

                        tbl = FMT_TABLE_O
                        headerdone = False
                        for row in myeo["table"]:
                            rr = ""
                            sep = ""
                            for col in row:
                                if format == "html":
                                    if not headerdone:
                                        rr += (
                                            FMT_TABLE_HEADER_O
                                            + col
                                            + FMT_TABLE_HEADER_C
                                        )
                                    else:
                                        rr += FMT_TABLE_CELL_O + col + FMT_TABLE_CELL_C
                                elif format == "md":
                                    rr += FMT_TABLE_CELL_O + col + FMT_TABLE_CELL_C
                                    sep += FMT_TABLE_HEADER_O + FMT_TABLE_HEADER_C
                            tbl += (
                                "\n"
                                + indent
                                + FMT_TABLE_ROW_O
                                + rr.rstrip()
                                + FMT_TABLE_ROW_C
                            )
                            if not headerdone:
                                if format == "md":
                                    tbl += (
                                        "\n"
                                        + indent
                                        + FMT_TABLE_ROW_O
                                        + sep.rstrip()
                                        + FMT_TABLE_ROW_C
                                    )
                                headerdone = True
                        mystr += tbl + "\n" + FMT_TABLE_C + "\n"
                    elif type == "com.apple.notes.gallery":
                        mystr += "(gallery of {})".format(len(myeo["child_objects"]))
                        for mycho in myeo["child_objects"]:
                            if "filename" in mycho:
                                id = mycho["filename"]
                                if id is None:
                                    id = "unnamed"
                            if "backup_location" in mycho:
                                filepath = mycho["backup_location"]
                            else:
                                filepath = mycho["filepath"]
                            mystr += (
                                "\n"
                                + indent
                                + FMT_IMAGE.format(fn_to_url(filepath, embedimages), id)
                            )
                    else:
                        if "filename" in myeo:
                            id = myeo["filename"]
                            if id is None:
                                id = "unnamed"
                        if "backup_location" in myeo:
                            filepath = myeo["backup_location"]
                        else:
                            filepath = myeo["filepath"]
                        if (
                            myeo["conforms_to"] == "image"
                            or i["attachmentInfo"]["typeUti"] == "com.apple.drawing"
                        ):
                            mystr += FMT_IMAGE.format(
                                fn_to_url(filepath, embedimages), id
                            )
                        elif (
                            "thumbnails" in myeo
                            and myeo["thumbnails"]
                            and len(myeo["thumbnails"]) > 0
                        ):
                            th = myeo["thumbnails"][-1]  # pick the largest
                            if "backup_location" in th:
                                thumbnail = th["backup_location"]
                            else:
                                thumbnail = th["filepath"]
                            mystr += FMT_LINK_IMAGE.format(
                                fn_to_url(thumbnail, embedimages),
                                fn_to_url(filepath),
                                id,
                            )
                        else:
                            mystr += FMT_LINK_NAMED.format(fn_to_url(filepath), id)
                            if (
                                type == "public.vcard"
                                or type == "public.comma-separated-values-text"
                            ):
                                mystr += "\n\n" + FMT_CODEBLOCK_O + "\n"
                                with open(filepath, "r") as f:
                                    mystr += f.read()
                                    f.close()
                                mystr += "\n" + FMT_CODEBLOCK_C + "\n"
                else:
                    mystr += "{" + id + " attachment not found" + "}"
            else:
                # normal text

                # This avoids leaving a indent-spaces-only line below the
                # item, while also preserving the extra closing newline
                # as it appears in Notes.
                if mystr.endswith("\u2028"):
                    mystr = mystr[:-1] + FMT_LINEBREAK
                    if eol:
                        mystr += FMT_LINEBREAK
                    else:
                        mystr += "\n" + indent

                mystr = mystr.replace(
                    "\u2028", FMT_LINEBREAK + "\n" + indent
                )  # \u2028 = LINE SEPARATOR

        # font style

        wo = ""  # basic styles
        wc = wo

        uo = ""  # underline
        uc = uo

        so = ""  # strikethrough
        sc = so

        yo = ""  # superscript/subscript
        yc = yo

        xo = ""  # span style
        xc = xo

        if mystr != "" and not in_codefence:

            st = []  # span styles

            if "fontWeight" in i:
                fontWeight = i["fontWeight"]
                if fontWeight == FONT_TYPE_BOLD:
                    wo = FMT_BOLD_O
                    wc = FMT_BOLD_C
                elif fontWeight == FONT_TYPE_ITALIC:
                    wo = FMT_ITALIC_O
                    wc = FMT_ITALIC_C
                elif fontWeight == FONT_TYPE_BOLD_ITALIC:
                    wo = FMT_BOLD_O + FMT_ITALIC_O
                    wc = FMT_ITALIC_C + FMT_BOLD_C

            if "underlined" in i and i["underlined"] == 1:
                uo = "<u>"
                uc = "</u>"

            if "strikethrough" in i and i["strikethrough"] == 1:
                so = "<s>"
                sc = "</s>"

            if "superscript" in i:
                if i["superscript"] < 0:  # subscript
                    yo = "<sub>"
                    yc = "</sub>"
                elif i["superscript"] > 0:  # superscript
                    yo = "<sup>"
                    yc = "</sup>"

            if "font" in i:
                font = i["font"]
                if "pointSize" in font:
                    # Notes.app built-in export uses "px" (with the pointSize value)
                    st.append(
                        "font-size: " + str(font["pointSize"]) + "px"
                    )  # not "pt"!
                if "fontName" in font:
                    st.append("font-family: " + font["fontName"])
                if "fontHints" in font:
                    # Purpose undiscovered. Values seen: 1
                    st.append("-notes-font-hints: " + str(font["fontHints"]))

            # Omit color for links to avoid noise. Most Markdown renderers
            # color links by default, overriding custom colors we would set
            # here.
            if "color" in i and "link" not in i:
                color = i["color"]
                if color["alpha"] != 1:
                    if css4:
                        rgba = "rgb({} {} {} / {})"
                    else:
                        rgba = "rgba({}, {}, {}, {})"
                    colorhtml = rgba.format(
                        int(color["red"] * 255),
                        int(color["green"] * 255),
                        int(color["blue"] * 255),
                        color["alpha"],
                    )
                else:
                    colorhtml = "#{:02x}{:02x}{:02x}".format(
                        int(color["red"] * 255),
                        int(color["green"] * 255),
                        int(color["blue"] * 255),
                    )
                st.append("color: " + colorhtml)

            if len(st) > 0:
                xo = '<span style="{}">'.format("; ".join(st) + ";")
                xc = "</span>"

        # Omit forced-newline-only lines after a section separator
        if (
            c1 == FMT_LINEBREAK
            and mystr_no_cr == ""
            and out.endswith("\n" + FMT_LINE + "\n")
            and not in_codefence
        ):
            c1 = ""

        # Construct output line

        # Ensure that we move any space prefixes or suffixes outside the
        # markup. Markdown renderers ignore whitespace-separated markups.
        o1 = ""
        if mystr != "" and xc == "" and uc == "" and sc == "" and wc != "":
            olen = len(mystr)
            slen = len(mystr.rstrip())
            if olen != slen:
                c1 = mystr[slen:] + c1
                mystr = mystr.rstrip()
                olen = len(mystr)
            slen = len(mystr.lstrip())
            if olen != slen:
                o1 = mystr[: olen - slen]
                mystr = mystr.lstrip()

        # Quick hack, might not fit/cover all situations
        if ao != "":
            c1 = c1.replace(FMT_LINEBREAK, "")

        if not codefence_start and line_is_codefence(mystr) and in_codefence:
            if format == "html":
                mystr = FMT_CODE_O
            in_codefence = False

        if not continuing:
            if format == "html" and (in_list != new_in_list or prev_i1 != i1):

                list = ""

                i1n = len(i1) / len(INDENT_SPACES)
                prev_i1n = len(prev_i1) / len(INDENT_SPACES)

                if i1n > prev_i1n:
                    while i1n > prev_i1n:
                        if new_in_list == STYLE_TYPE_LIST_NUM:
                            lo = FMT_LIST_NUM_O
                            lc = FMT_LIST_NUM_C
                        elif new_in_list == STYLE_TYPE_LIST_DASH:
                            lo = FMT_LIST_DASH_O
                            lc = FMT_LIST_DASH_C
                        else:
                            lo = FMT_LIST_DOT_O
                            lc = FMT_LIST_DOT_C
                        list += i1 + lo + "\n"
                        in_list_close.append({"c": lc, "i1": i1})
                        i1n -= 1
                elif i1n < prev_i1n:
                    while i1n < prev_i1n:
                        tmp = in_list_close.pop()
                        list += tmp["i1"] + tmp["c"] + "\n"
                        i1n += 1
                else:
                    if len(in_list_close) > 0:
                        tmp = in_list_close.pop()
                        list += tmp["i1"] + tmp["c"] + "\n"
                    if new_in_list == STYLE_TYPE_LIST_NUM:
                        lo = FMT_LIST_NUM_O
                        lc = FMT_LIST_NUM_C
                    elif new_in_list == STYLE_TYPE_LIST_DASH:
                        lo = FMT_LIST_DASH_O
                        lc = FMT_LIST_DASH_C
                    else:
                        lo = FMT_LIST_DOT_O
                        lc = FMT_LIST_DOT_C
                    list += i1 + lo + "\n"
                    in_list_close.append({"c": lc, "i1": i1})

                if list != "":
                    if not out.endswith("\n"):
                        out += "\n"
                    out += list

            in_list = new_in_list
            prev_i1 = i1

        out += (
            n1
            + i1
            + f1
            + o1
            + wo
            + so
            + uo
            + yo
            + ao
            + xo
            + mystr
            + xc
            + ac
            + yc
            + uc
            + sc
            + wc
            + c1
        )

        if eol:
            out += "\n"
            continuing = False
        else:
            if not continuing:
                cont_f1 = f1
                cont_i1 = i1
            continuing = True

    if not out.endswith("\n"):
        out += "\n"

    print("Writing '" + outfnext + "'")

    with open(outfnext, "w") as f:
        f.write(out)
        f.close()

    # Requires macOS + Apple Developer Tools
    if os.path.isfile("/usr/bin/SetFile"):
        tsfmt = "%m/%d/%Y %H:%M:%S"  # "MM/DD/YYYY [hh:mm:[:ss] [AM | PM]]"
        os.popen(
            "TZ= /usr/bin/SetFile -d '{}' -m '{}' '{}'".format(
                createutc.strftime(tsfmt),
                updateutc.strftime(tsfmt),
                outfnext.replace("'", "'\\''"),
            )
        )
    else:
        os.utime(
            outfnext,
            times=(time.mktime(update.timetuple()), time.mktime(update.timetuple())),
        )

    return


parser = argparse.ArgumentParser(
    formatter_class=argparse.RawDescriptionHelpFormatter,
    description="""Convert JSON exports to Markdown or HTML.
First create an export with:
https://github.com/threeplanetssoftware/apple_cloud_notes_parser
e869efe6fac0927eb9a7c5327c67415765b3a6ec
(2022-12-09 20:08:50 -0500) or later.""",
)
parser.add_argument(
    "--overwrite",
    dest="overwrite",
    action="store_true",
    help="overwrite existing output",
)
parser.add_argument(
    "--md", dest="to_md", action="store_true", default=True, help="export in Markdown"
)
parser.add_argument(
    "--html", dest="to_html", action="store_true", help="export in HTML"
)
parser.add_argument(
    "--json",
    dest="json_input",
    action="store",
    default="",
    help="input JSON (or export directory root)",
)
parser.add_argument(
    "--output-prefix",
    dest="out_prefix",
    action="store",
    default="exported-Note-",
    help="output prefix",
)
parser.add_argument(
    "--embed", dest="embedimages", action="store_true", help="embed images"
)
parser.add_argument(
    "--frontmatter",
    dest="frontmatter",
    action="store_true",
    help="add front matter (title and dates)",
)
parser.add_argument(
    "--md-linebreaks",
    dest="md_linebreaks",
    action="store_true",
    help="use native Markdown linebreaks (double-space) instead of <br>",
)
parser.add_argument(
    "--no-tidy-hr",
    dest="notidy_hr",
    action="store_true",
    help="do not convert common manual line separators (full line of '*', '-' or '=' characters) to markup",
)
parser.add_argument(
    "--no-tidy-cr",
    dest="notidy_cr",
    action="store_true",
    help="do not delete <CR> characters",
)
parser.add_argument(
    "--debug", dest="debug", action="store", default=0, help="debug level 0-3"
)
parser.add_argument(
    dest="id_list", metavar="note IDs, export all if none specified", nargs="*"
)
args = parser.parse_args()

# point to a apple_cloud_notes_parser output JSON:
if not args.json_input:
    print(
        "! Error: You must set the input with --json (e.g. 'output/json/all_notes_1.json')"
    )
    quit(0)
if os.path.isdir(args.json_input):
    args.json_input = os.path.join(args.json_input, "json/all_notes_1.json")

formats = []
if args.to_md:
    formats.append("md")
if args.to_html:
    formats.append("html")

if len(formats) == 0:
    print("! Error: You must set at least one export format via --md and/or --html")
    quit(0)

try:
    with open(args.json_input, "r") as f:
        notes = json.load(f)
        f.close()
except IOError:
    print("! Error: Could not open input JSON:", args.json_input)
    quit(1)

if len(args.id_list) == 0:
    args.id_list = notes["notes"]

for note_id in args.id_list:
    if note_id in notes["notes"]:
        note = notes["notes"][note_id]
        for format in formats:
            note_export(
                note,
                out_prefix=args.out_prefix,
                overwrite=args.overwrite,
                format=format,
                embedimages=args.embedimages,
                frontmatter=args.frontmatter,
                md_linebreaks=args.md_linebreaks,
                debug=int(args.debug),
                autotidy_cr=not args.notidy_cr,
                autotidy_hr=not args.notidy_hr,
            )
    else:
        print("! Warning: Note not found:", note_id)