Skip to content

Instantly share code, notes, and snippets.

@vszakats
Last active May 10, 2023 12:39
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vszakats/5a3bd939721d1dde6142d9ea3b2d1b5f to your computer and use it in GitHub Desktop.
Save vszakats/5a3bd939721d1dde6142d9ea3b2d1b5f to your computer and use it in GitHub Desktop.
Apple Notes.app JSON to Markdown/HTML converter
#!/usr/bin/env python3
# Copyright 2022-present Viktor Szakats. The MIT License.
# SPDX-License-Identifier: MIT
# Convert 'apple_cloud_notes_parser'-made JSON into Markdown or HTML.
# The goal is to export the content losslessly and with the ability to
# continue editing them after importing or opening them in Markdown
# editors as-is.
# Requires:
# - Ruby
# - https://github.com/threeplanetssoftware/apple_cloud_notes_parser/releases/tag/v0.11
# or later
# - $ bundle install
# - Compatible Notes.app backup
# Optional:
# - macOS for 'created' output file timestamps
# Joplin import requirements:
# - --md-linebreaks
# - --frontmatter (optional)
# - as of Joplin v2.8.8, its import functionality does not support
# importing linked/embedded images as proper attachments (both with
# or without --embed)
# TODO:
# - Copy linked objects under the output directory and reference them
# via relative links.
# - Escape double-quotes in values ending up in HTML attributes.
# - Solve continued-lines differently:
# Iterate through the full line in a sub-loop.
# This allows to change text attributes when they actually change,
# so 4 bold fragments result in a single **/** pair instead of one
# for each fragment.
# - Replace '-notes-font-hints' CSS style once we discover its purpose.
# - Possibly fold long itemlist lines into shorter ones.
# - A newline would be nice after the last continuous monospace fragment.
import argparse
import base64
import copy
import datetime
import json
import mimetypes
import os
import re
import time
# Item styles
STYLE_TYPE_DEFAULT = -1
STYLE_TYPE_TITLE = 0
STYLE_TYPE_HEADING = 1
STYLE_TYPE_SUBHEADING = 2
STYLE_TYPE_MONOSPACED = 4
STYLE_TYPE_LIST_DOT = 100
STYLE_TYPE_LIST_DASH = 101
STYLE_TYPE_LIST_NUM = 102
STYLE_TYPE_CHECKBOX = 103
# Font weights
FONT_TYPE_DEFAULT = 0
FONT_TYPE_BOLD = 1
FONT_TYPE_ITALIC = 2
FONT_TYPE_BOLD_ITALIC = 3
# Text alignments
TEXT_ALIGNMENT_LEFT = 0
TEXT_ALIGNMENT_CENTER = 1
TEXT_ALIGNMENT_RIGHT = 2
TEXT_ALIGNMENT_JUSTIFY = 3
css4 = True
def line_is_codefence(line):
return line.lstrip()[0:3] == "```"
def fn_to_url(fn, embed=False):
if embed:
(mime, _) = mimetypes.guess_type(fn)
if mime is not None:
with open(fn, "rb") as f:
return (
"data:"
+ mime
+ ";base64,"
+ base64.b64encode(f.read()).decode("ascii")
)
return "file://" + fn.replace(" ", "%20")
def htmlattr(value):
return value.replace('"', "%22")
def note_export(
note,
out_prefix="",
overwrite=True,
format="md",
embedimages=True,
frontmatter=True,
md_linebreaks=False,
fn_template="{account}-{folder}-{note_id:06d}-{title}",
debug=0,
autotidy_cr=True,
autotidy_hr=True,
):
# Skip folder index
if "note_proto" not in note:
return
outfn = fn_template.format(
account=note["account"],
folder=note["folder"],
note_id=note["note_id"],
title=note["title"],
)
outfn = out_prefix + re.sub(r"[\/\\\?\*|:•]", "_", outfn)
if format == "md":
FMT_EXT = ".md"
FMT_H1_O = "# "
FMT_H1_C = ""
FMT_H2_O = "## "
FMT_H2_C = ""
FMT_H3_O = "### "
FMT_H3_C = ""
FMT_CODE_O = "`"
FMT_CODE_C = FMT_CODE_O
FMT_CODEBLOCK_O = "```"
FMT_CODEBLOCK_C = FMT_CODEBLOCK_O
FMT_LINE = "---"
FMT_LINK_PURE = "<{0}>"
FMT_LINK_NAMED = "[{1}]({0})"
FMT_LINK_IMAGE = "[![{2}]({0})]({1})"
FMT_IMAGE = "![{1}]({0})"
FMT_BOLD_O = "**"
FMT_BOLD_C = FMT_BOLD_O
FMT_ITALIC_O = "_"
FMT_ITALIC_C = FMT_ITALIC_O
FMT_TABLE_O = ""
FMT_TABLE_C = ""
FMT_TABLE_ROW_O = "| "
FMT_TABLE_ROW_C = ""
FMT_TABLE_CELL_O = ""
FMT_TABLE_CELL_C = " | "
FMT_TABLE_HEADER_O = "---"
FMT_TABLE_HEADER_C = " | "
FMT_ITEM_DOT = "* "
FMT_ITEM_DASH = "- "
FMT_ITEM_LIST = "1. "
FMT_ITEM_CHKE = "* [x] "
FMT_ITEM_CHKD = "* [ ] "
if md_linebreaks:
# Some Markdown imports will need this to avoid
# (mis-)interpreting content as HTML:
FMT_LINEBREAK = " "
else:
FMT_LINEBREAK = "<br>"
elif format == "html":
FMT_EXT = ".html"
FMT_H1_O = "<h1>"
FMT_H1_C = "</h1>"
FMT_H2_O = "<h2>"
FMT_H2_C = "</h2>"
FMT_H3_O = "<h3>"
FMT_H3_C = "</h3>"
FMT_CODE_O = "<code>"
FMT_CODE_C = "</code>"
FMT_CODEBLOCK_O = FMT_CODE_O
FMT_CODEBLOCK_C = FMT_CODE_C
FMT_LINE = "<hr>"
FMT_LINK_PURE = '<a href="{0}">{0}</a>'
FMT_LINK_NAMED = '<a href="{0}">{1}</a>'
FMT_LINK_IMAGE = '<a href="{1}"><img style="max-width: 100%; max-height: 100%;" src="{0}" alt="{2}"></a>'
FMT_IMAGE = (
'<img style="max-width: 100%; max-height: 100%;" src="{0}" alt="{1}">'
)
FMT_BOLD_O = "<b>"
FMT_BOLD_C = "</b>"
FMT_ITALIC_O = "<i>"
FMT_ITALIC_C = "</i>"
FMT_TABLE_O = "<table>"
FMT_TABLE_C = "</table>"
FMT_TABLE_ROW_O = "<tr>"
FMT_TABLE_ROW_C = "</tr>"
FMT_TABLE_CELL_O = "<td>"
FMT_TABLE_CELL_C = "</td>"
FMT_TABLE_HEADER_O = "<th>"
FMT_TABLE_HEADER_C = "</th>"
FMT_ITEM_DOT = "<li>"
FMT_ITEM_DASH = "<li>"
FMT_ITEM_LIST = "<li>"
FMT_ITEM_CHKE = '<li><input type="checkbox" checked>'
FMT_ITEM_CHKD = '<li><input type="checkbox">'
FMT_LINEBREAK = "<br>"
FMT_LIST_DASH_O = '<ul style="list-style-type: ' + "'&ndash; '" + ';">'
FMT_LIST_DASH_C = "</ul>"
FMT_LIST_DOT_O = "<ul>"
FMT_LIST_DOT_C = "</ul>"
FMT_LIST_NUM_O = "<ol>"
FMT_LIST_NUM_C = "</ol>"
outfnext = outfn + FMT_EXT
if not overwrite and os.path.isfile(outfnext):
return
if debug >= 1:
print(note["note_id"])
proto = copy.deepcopy(note["note_proto"]) # Avoid modifying the original object
eos = note["embedded_objects"]
proto["embedded_objects"] = eos
stripcolor = False
# Raw/unformatted text. UTF-8.
#
# \u2028: manual line breaks
# \n: entry separators
text = proto["document"]["note"]["noteText"]
if debug >= 2:
with open(outfn + "-1-ori.txt", "w") as f:
f.write(text)
f.close()
with open(outfn + "-1-ori.json", "w") as f:
json.dump(proto, f, indent=2, ensure_ascii=False, sort_keys=True)
f.close()
if debug >= 1:
print("! text length chars:", len(text))
print("! text length bytes:", len(bytes(text, "utf-8")))
print("! length ori:", len(proto["document"]["note"]["attributeRun"]))
# Merge consecutive fragments having the same attributes
# Can result in a 10x reduction of fragment numbers, while
# also giving searchable/meaningful text fields.
attrs = []
c = 0
while c < len(proto["document"]["note"]["attributeRun"]):
i = proto["document"]["note"]["attributeRun"][c]
this = i.copy()
this["length"] = 0
while c < len(proto["document"]["note"]["attributeRun"]) - 1:
next = proto["document"]["note"]["attributeRun"][c + 1].copy()
next["length"] = 0
if json.dumps(this, sort_keys=True) == json.dumps(next, sort_keys=True):
i["length"] += proto["document"]["note"]["attributeRun"][c + 1][
"length"
]
c += 1
else:
break
if stripcolor and "color" in i:
del i["color"]
attrs.append(i)
c += 1
if debug >= 1:
print("! length merged:", len(attrs))
proto["document"]["note"]["attributeRun"] = attrs
# Fill text for each fragment
c = 0
pos = 0
for i in proto["document"]["note"]["attributeRun"]:
plen = i["length"]
tmp = text[pos : pos + plen]
proto["document"]["note"]["attributeRun"][c]["text"] = tmp
# Bizarre trick to make positions match after encountering
# high-Unicode codepoints:
for cc in tmp:
if ord(cc) > 65535:
pos -= 1
c += 1
pos += plen
if debug >= 1:
print("! slice length total:", pos)
# Delete lengths
for i in proto["document"]["note"]["attributeRun"]:
if "length" in i:
del i["length"]
if debug >= 2:
with open(outfn + "-2-merged.json", "w") as f:
json.dump(proto, f, indent=2, ensure_ascii=False, sort_keys=True)
f.close()
# Split texts at newlines into separate sections.
# This helps processing it accurately, because Apple uses newlines
# as item terminators.
attrs = []
for i in proto["document"]["note"]["attributeRun"]:
if (
"\n" in i["text"] and "link" not in i
): # Do not tear up links where their text have spilled-in newline. Deal with this later.
ispl = i["text"].split("\n")
ispllen = len(ispl)
c = 0
for t in ispl:
c += 1
i2 = i.copy()
i2["text"] = t
if c < ispllen:
i2["text"] += "\n"
if len(i2["text"]) > 0:
attrs.append(i2)
else:
attrs.append(i)
if debug >= 1:
print("! length resplit:", len(attrs))
proto["document"]["note"]["attributeRun"] = attrs
if debug >= 2:
with open(outfn + "-3-merged-split-at-eol.json", "w") as f:
json.dump(proto, f, indent=2, ensure_ascii=False, sort_keys=True)
f.close()
INDENT_SPACES = " "
tsfmt = "%Y-%m-%d %H:%M:%S %z" # YYYY-MM-DD hh:mm:ss +1030"
create = datetime.datetime.strptime(note["creation_time"], tsfmt)
update = datetime.datetime.strptime(note["modify_time"], tsfmt)
createutc = create.astimezone(datetime.timezone(datetime.timedelta(0)))
updateutc = update.astimezone(datetime.timezone(datetime.timedelta(0)))
if debug >= 1:
print("!", create, update)
print("!", createutc, updateutc)
out = ""
if frontmatter:
out += "---\n"
out += "title: " + note["title"] + "\n"
tsfmt = "%Y-%m-%d %H:%M:%SZ" # YYYY-MM-DD hh:mm:ssZ
out += "updated: " + updateutc.strftime(tsfmt) + "\n"
out += "created: " + createutc.strftime(tsfmt) + "\n"
out += "---\n"
continuing = False
cont_f1 = ""
cont_i1 = ""
in_list = 0
in_codefence = False
prev_i1 = ""
in_list_close = []
c = 0
while c < len(proto["document"]["note"]["attributeRun"]):
i = proto["document"]["note"]["attributeRun"][c]
c += 1
if debug >= 3:
out += ">" + str(c) + "<"
mystr = i["text"]
# Text ending with a newline is significant. It closes list items.
# If there is none, it means the entry continues in the next
# "attributeRun" item. Notes.app uses separate elements when the
# font style is different or if there is a special element (e.g.
# a link) is present in the line.
if mystr.endswith("\n"):
eol = True
mystr = mystr[:-1]
else:
eol = False
# layout
n1 = "" # leading newline, if any
i1 = "" # indentation
f1 = "" # layout prefix markup (bullet, dash, header)
c1 = "" # line-ending markup (e.g. FMT_LINEBREAK or FMT_H1_C), if any
new_in_list = 0
ao = ""
ac = ""
codefence_start = False
if line_is_codefence(mystr) and not in_codefence:
if format == "html":
mystr = FMT_CODE_O
n1 = "\n"
in_codefence = True
codefence_start = True
# Strip CR
mystr_no_cr = mystr.replace("\r", "")
if autotidy_cr:
# Strip CR, remains of CRLF EOLs
mystr = mystr_no_cr
if not in_codefence:
if "paragraphStyle" in i:
paragraphStyle = i["paragraphStyle"]
else:
paragraphStyle = {}
if "styleType" in paragraphStyle:
styleType = paragraphStyle["styleType"]
else:
styleType = STYLE_TYPE_DEFAULT
if "indentAmount" in paragraphStyle:
indentAmount = paragraphStyle["indentAmount"]
else:
indentAmount = 0
if "alignment" in paragraphStyle:
alignment = paragraphStyle["alignment"]
else:
alignment = 0
if alignment == TEXT_ALIGNMENT_CENTER:
ao = '<p style="text-align: center">'
ac = "</p>"
elif alignment == TEXT_ALIGNMENT_JUSTIFY:
ao = '<p style="text-align: justify">'
ac = "</p>"
elif alignment == TEXT_ALIGNMENT_RIGHT:
ao = '<p style="text-align: right">'
ac = "</p>"
if (
styleType == STYLE_TYPE_LIST_DOT
or styleType == STYLE_TYPE_LIST_DASH
or styleType == STYLE_TYPE_LIST_NUM
or styleType == STYLE_TYPE_CHECKBOX
):
if styleType == STYLE_TYPE_CHECKBOX:
checkboxDone = paragraphStyle["checklist"]["done"] == 1
else:
checkboxDone = None
if not continuing:
# Make sure to have an empty line before starting a list
# (otherwise it may not render as a list)
if in_list == 0 and not out.endswith("\n\n") and len(out) > 0:
n1 = "\n"
if styleType == STYLE_TYPE_LIST_DOT:
f1 = FMT_ITEM_DOT
elif styleType == STYLE_TYPE_LIST_DASH:
f1 = FMT_ITEM_DASH
elif styleType == STYLE_TYPE_LIST_NUM:
f1 = FMT_ITEM_LIST
elif styleType == STYLE_TYPE_CHECKBOX:
if checkboxDone:
f1 = FMT_ITEM_CHKE
else:
f1 = FMT_ITEM_CHKD
i1 = indentAmount * INDENT_SPACES
new_in_list = styleType
elif styleType == STYLE_TYPE_TITLE:
f1 = FMT_H1_O
c1 = FMT_H1_C
elif styleType == STYLE_TYPE_HEADING:
f1 = FMT_H2_O
c1 = FMT_H2_C
elif styleType == STYLE_TYPE_SUBHEADING:
f1 = FMT_H3_O
c1 = FMT_H3_C
elif styleType == STYLE_TYPE_MONOSPACED:
n1 = "\n"
f1 = FMT_CODE_O
c1 = FMT_CODE_C
else:
# Forced newlines for lines not part of lists.
# Needed for Markdown to avoid joining these lines together.
if in_list == 0:
html_with_div = True
if eol:
if html_with_div:
if mystr_no_cr == "":
c1 = FMT_LINEBREAK
else:
c1 = FMT_LINEBREAK
# Not strictly necessary, but makes output more alike the
# Notes app built-in HTML export
if format == "html" and html_with_div:
if eol:
c1 += "</div>"
if not continuing:
f1 = "<div>"
if continuing:
indent = cont_i1 + len(cont_f1) * " "
else:
indent = i1 + len(f1) * " "
# Process payload
# Escape user content that may interfere with HTML
if format == "html":
mystr = (
mystr.replace("&", "&amp;")
.replace("<", "&lt;")
.replace(">", "&gt;")
.replace(" ", "&nbsp;&nbsp;")
)
# Upconvert common "manual separators" (vertical line ASCII art)
# to markup. Not strictly necessary and we recommend to disable
# this if it interferes with content.
if (
autotidy_hr
and not continuing
and in_list == 0
and mystr_no_cr != ""
and (
mystr_no_cr.replace("*", "") == ""
or mystr_no_cr.replace("-", "") == ""
or mystr_no_cr.replace("=", "") == ""
)
):
mystr = FMT_LINE
if c1 == FMT_LINEBREAK:
c1 = ""
# Strip forced linefeed before it.
if out.endswith("\n" + FMT_LINEBREAK + "\n"):
out = out[: -len("\n" + FMT_LINEBREAK + "\n")]
n1 = "\n\n"
# Make sure to include one linefeed before it.
# Otherwise Markdown renderers interpret it as
# a section header marker.
elif not out.endswith("\n\n") and len(out) > 0:
n1 = "\n"
# Escape user content that may interfere with Markdown markup
if format == "md":
# Also escape ']'?
mystr = (
mystr.replace("[", "\[").replace("__", "\_\_").replace("**", "\*\*")
)
if "link" in i:
prep = ""
if mystr != "":
if mystr[0] == "\u2028" or mystr[0] == "\n":
# Cleanup newlines sometimes sneaking into the beginning
# of the link text
prep = FMT_LINEBREAK + "\n" + indent
mystr = mystr[1:]
if i["link"] == mystr:
mystr = FMT_LINK_PURE.format(mystr)
elif i["link"] == "http://" + mystr: # Bump auto-links to HTTPS
mystr = FMT_LINK_NAMED.format("https://" + mystr, mystr)
else:
mystr = FMT_LINK_NAMED.format(i["link"], mystr)
mystr = prep + mystr
elif "attachmentInfo" in i:
id = i["attachmentInfo"]["attachmentIdentifier"]
myeo = None
for oi in eos:
if oi["uuid"] == id:
myeo = oi
break
mystr = ""
if myeo is not None:
type = myeo["type"]
if debug >= 1:
print("! attachment:", myeo["primary_key"], type)
if type == "public.url":
if myeo["url"] is not None:
mystr += FMT_LINK_PURE.format(myeo["url"])
else:
mystr += FMT_LINK_PURE.format("url://lost")
elif type == "com.apple.notes.inlinetextattachment.hashtag":
if myeo["alt_text"] is not None:
mystr += myeo["alt_text"]
else:
# Seen in "Recently Deleted" in iCloud, which was also
# not appearing anymore on the UI (aka "fake deleted"),
# but still there and also occupying storage.
mystr += "#{lost_tag}"
elif type == "com.apple.notes.table":
# | Name | Size | Color |
# | --- | --- | --- |
# | lime | small | green |
# | orange | medium | orange |
# | grapefruit | large | yellow or pink |
mystr = ""
tbl = FMT_TABLE_O
headerdone = False
for row in myeo["table"]:
rr = ""
sep = ""
for col in row:
if format == "html":
if not headerdone:
rr += (
FMT_TABLE_HEADER_O
+ col
+ FMT_TABLE_HEADER_C
)
else:
rr += FMT_TABLE_CELL_O + col + FMT_TABLE_CELL_C
elif format == "md":
rr += FMT_TABLE_CELL_O + col + FMT_TABLE_CELL_C
sep += FMT_TABLE_HEADER_O + FMT_TABLE_HEADER_C
tbl += (
"\n"
+ indent
+ FMT_TABLE_ROW_O
+ rr.rstrip()
+ FMT_TABLE_ROW_C
)
if not headerdone:
if format == "md":
tbl += (
"\n"
+ indent
+ FMT_TABLE_ROW_O
+ sep.rstrip()
+ FMT_TABLE_ROW_C
)
headerdone = True
mystr += tbl + "\n" + FMT_TABLE_C + "\n"
elif type == "com.apple.notes.gallery":
mystr += "(gallery of {})".format(len(myeo["child_objects"]))
for mycho in myeo["child_objects"]:
if "filename" in mycho:
id = mycho["filename"]
if id is None:
id = "unnamed"
if "backup_location" in mycho:
filepath = mycho["backup_location"]
else:
filepath = mycho["filepath"]
mystr += (
"\n"
+ indent
+ FMT_IMAGE.format(fn_to_url(filepath, embedimages), id)
)
else:
if "filename" in myeo:
id = myeo["filename"]
if id is None:
id = "unnamed"
if "backup_location" in myeo:
filepath = myeo["backup_location"]
else:
filepath = myeo["filepath"]
if (
myeo["conforms_to"] == "image"
or i["attachmentInfo"]["typeUti"] == "com.apple.drawing"
):
mystr += FMT_IMAGE.format(
fn_to_url(filepath, embedimages), id
)
elif (
"thumbnails" in myeo
and myeo["thumbnails"]
and len(myeo["thumbnails"]) > 0
):
th = myeo["thumbnails"][-1] # pick the largest
if "backup_location" in th:
thumbnail = th["backup_location"]
else:
thumbnail = th["filepath"]
mystr += FMT_LINK_IMAGE.format(
fn_to_url(thumbnail, embedimages),
fn_to_url(filepath),
id,
)
else:
mystr += FMT_LINK_NAMED.format(fn_to_url(filepath), id)
if (
type == "public.vcard"
or type == "public.comma-separated-values-text"
):
mystr += "\n\n" + FMT_CODEBLOCK_O + "\n"
with open(filepath, "r") as f:
mystr += f.read()
f.close()
mystr += "\n" + FMT_CODEBLOCK_C + "\n"
else:
mystr += "{" + id + " attachment not found" + "}"
else:
# normal text
# This avoids leaving a indent-spaces-only line below the
# item, while also preserving the extra closing newline
# as it appears in Notes.
if mystr.endswith("\u2028"):
mystr = mystr[:-1] + FMT_LINEBREAK
if eol:
mystr += FMT_LINEBREAK
else:
mystr += "\n" + indent
mystr = mystr.replace(
"\u2028", FMT_LINEBREAK + "\n" + indent
) # \u2028 = LINE SEPARATOR
# font style
wo = "" # basic styles
wc = wo
uo = "" # underline
uc = uo
so = "" # strikethrough
sc = so
yo = "" # superscript/subscript
yc = yo
xo = "" # span style
xc = xo
if mystr != "" and not in_codefence:
st = [] # span styles
if "fontWeight" in i:
fontWeight = i["fontWeight"]
if fontWeight == FONT_TYPE_BOLD:
wo = FMT_BOLD_O
wc = FMT_BOLD_C
elif fontWeight == FONT_TYPE_ITALIC:
wo = FMT_ITALIC_O
wc = FMT_ITALIC_C
elif fontWeight == FONT_TYPE_BOLD_ITALIC:
wo = FMT_BOLD_O + FMT_ITALIC_O
wc = FMT_ITALIC_C + FMT_BOLD_C
if "underlined" in i and i["underlined"] == 1:
uo = "<u>"
uc = "</u>"
if "strikethrough" in i and i["strikethrough"] == 1:
so = "<s>"
sc = "</s>"
if "superscript" in i:
if i["superscript"] < 0: # subscript
yo = "<sub>"
yc = "</sub>"
elif i["superscript"] > 0: # superscript
yo = "<sup>"
yc = "</sup>"
if "font" in i:
font = i["font"]
if "pointSize" in font:
# Notes.app built-in export uses "px" (with the pointSize value)
st.append(
"font-size: " + str(font["pointSize"]) + "px"
) # not "pt"!
if "fontName" in font:
st.append("font-family: " + font["fontName"])
if "fontHints" in font:
# Purpose undiscovered. Values seen: 1
st.append("-notes-font-hints: " + str(font["fontHints"]))
# Omit color for links to avoid noise. Most Markdown renderers
# color links by default, overriding custom colors we would set
# here.
if "color" in i and "link" not in i:
color = i["color"]
if color["alpha"] != 1:
if css4:
rgba = "rgb({} {} {} / {})"
else:
rgba = "rgba({}, {}, {}, {})"
colorhtml = rgba.format(
int(color["red"] * 255),
int(color["green"] * 255),
int(color["blue"] * 255),
color["alpha"],
)
else:
colorhtml = "#{:02x}{:02x}{:02x}".format(
int(color["red"] * 255),
int(color["green"] * 255),
int(color["blue"] * 255),
)
st.append("color: " + colorhtml)
if len(st) > 0:
xo = '<span style="{}">'.format("; ".join(st) + ";")
xc = "</span>"
# Omit forced-newline-only lines after a section separator
if (
c1 == FMT_LINEBREAK
and mystr_no_cr == ""
and out.endswith("\n" + FMT_LINE + "\n")
and not in_codefence
):
c1 = ""
# Construct output line
# Ensure that we move any space prefixes or suffixes outside the
# markup. Markdown renderers ignore whitespace-separated markups.
o1 = ""
if mystr != "" and xc == "" and uc == "" and sc == "" and wc != "":
olen = len(mystr)
slen = len(mystr.rstrip())
if olen != slen:
c1 = mystr[slen:] + c1
mystr = mystr.rstrip()
olen = len(mystr)
slen = len(mystr.lstrip())
if olen != slen:
o1 = mystr[: olen - slen]
mystr = mystr.lstrip()
# Quick hack, might not fit/cover all situations
if ao != "":
c1 = c1.replace(FMT_LINEBREAK, "")
if not codefence_start and line_is_codefence(mystr) and in_codefence:
if format == "html":
mystr = FMT_CODE_O
in_codefence = False
if not continuing:
if format == "html" and (in_list != new_in_list or prev_i1 != i1):
list = ""
i1n = len(i1) / len(INDENT_SPACES)
prev_i1n = len(prev_i1) / len(INDENT_SPACES)
if i1n > prev_i1n:
while i1n > prev_i1n:
if new_in_list == STYLE_TYPE_LIST_NUM:
lo = FMT_LIST_NUM_O
lc = FMT_LIST_NUM_C
elif new_in_list == STYLE_TYPE_LIST_DASH:
lo = FMT_LIST_DASH_O
lc = FMT_LIST_DASH_C
else:
lo = FMT_LIST_DOT_O
lc = FMT_LIST_DOT_C
list += i1 + lo + "\n"
in_list_close.append({"c": lc, "i1": i1})
i1n -= 1
elif i1n < prev_i1n:
while i1n < prev_i1n:
tmp = in_list_close.pop()
list += tmp["i1"] + tmp["c"] + "\n"
i1n += 1
else:
if len(in_list_close) > 0:
tmp = in_list_close.pop()
list += tmp["i1"] + tmp["c"] + "\n"
if new_in_list == STYLE_TYPE_LIST_NUM:
lo = FMT_LIST_NUM_O
lc = FMT_LIST_NUM_C
elif new_in_list == STYLE_TYPE_LIST_DASH:
lo = FMT_LIST_DASH_O
lc = FMT_LIST_DASH_C
else:
lo = FMT_LIST_DOT_O
lc = FMT_LIST_DOT_C
list += i1 + lo + "\n"
in_list_close.append({"c": lc, "i1": i1})
if list != "":
if not out.endswith("\n"):
out += "\n"
out += list
in_list = new_in_list
prev_i1 = i1
out += (
n1
+ i1
+ f1
+ o1
+ wo
+ so
+ uo
+ yo
+ ao
+ xo
+ mystr
+ xc
+ ac
+ yc
+ uc
+ sc
+ wc
+ c1
)
if eol:
out += "\n"
continuing = False
else:
if not continuing:
cont_f1 = f1
cont_i1 = i1
continuing = True
if not out.endswith("\n"):
out += "\n"
print("Writing '" + outfnext + "'")
with open(outfnext, "w") as f:
f.write(out)
f.close()
# Requires macOS + Apple Developer Tools
if os.path.isfile("/usr/bin/SetFile"):
tsfmt = "%m/%d/%Y %H:%M:%S" # "MM/DD/YYYY [hh:mm:[:ss] [AM | PM]]"
os.popen(
"TZ= /usr/bin/SetFile -d '{}' -m '{}' '{}'".format(
createutc.strftime(tsfmt),
updateutc.strftime(tsfmt),
outfnext.replace("'", "'\\''"),
)
)
else:
os.utime(
outfnext,
times=(time.mktime(update.timetuple()), time.mktime(update.timetuple())),
)
return
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description="""Convert JSON exports to Markdown or HTML.
First create an export with:
https://github.com/threeplanetssoftware/apple_cloud_notes_parser
e869efe6fac0927eb9a7c5327c67415765b3a6ec
(2022-12-09 20:08:50 -0500) or later.""",
)
parser.add_argument(
"--overwrite",
dest="overwrite",
action="store_true",
help="overwrite existing output",
)
parser.add_argument(
"--md", dest="to_md", action="store_true", default=True, help="export in Markdown"
)
parser.add_argument(
"--html", dest="to_html", action="store_true", help="export in HTML"
)
parser.add_argument(
"--json",
dest="json_input",
action="store",
default="",
help="input JSON (or export directory root)",
)
parser.add_argument(
"--output-prefix",
dest="out_prefix",
action="store",
default="exported-Note-",
help="output prefix",
)
parser.add_argument(
"--embed", dest="embedimages", action="store_true", help="embed images"
)
parser.add_argument(
"--frontmatter",
dest="frontmatter",
action="store_true",
help="add front matter (title and dates)",
)
parser.add_argument(
"--md-linebreaks",
dest="md_linebreaks",
action="store_true",
help="use native Markdown linebreaks (double-space) instead of <br>",
)
parser.add_argument(
"--no-tidy-hr",
dest="notidy_hr",
action="store_true",
help="do not convert common manual line separators (full line of '*', '-' or '=' characters) to markup",
)
parser.add_argument(
"--no-tidy-cr",
dest="notidy_cr",
action="store_true",
help="do not delete <CR> characters",
)
parser.add_argument(
"--debug", dest="debug", action="store", default=0, help="debug level 0-3"
)
parser.add_argument(
dest="id_list", metavar="note IDs, export all if none specified", nargs="*"
)
args = parser.parse_args()
# point to a apple_cloud_notes_parser output JSON:
if not args.json_input:
print(
"! Error: You must set the input with --json (e.g. 'output/json/all_notes_1.json')"
)
quit(0)
if os.path.isdir(args.json_input):
args.json_input = os.path.join(args.json_input, "json/all_notes_1.json")
formats = []
if args.to_md:
formats.append("md")
if args.to_html:
formats.append("html")
if len(formats) == 0:
print("! Error: You must set at least one export format via --md and/or --html")
quit(0)
try:
with open(args.json_input, "r") as f:
notes = json.load(f)
f.close()
except IOError:
print("! Error: Could not open input JSON:", args.json_input)
quit(1)
if len(args.id_list) == 0:
args.id_list = notes["notes"]
for note_id in args.id_list:
if note_id in notes["notes"]:
note = notes["notes"][note_id]
for format in formats:
note_export(
note,
out_prefix=args.out_prefix,
overwrite=args.overwrite,
format=format,
embedimages=args.embedimages,
frontmatter=args.frontmatter,
md_linebreaks=args.md_linebreaks,
debug=int(args.debug),
autotidy_cr=not args.notidy_cr,
autotidy_hr=not args.notidy_hr,
)
else:
print("! Warning: Note not found:", note_id)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment