Skip to content

Instantly share code, notes, and snippets.

@jheddings
Last active May 17, 2024 14:20
Show Gist options
  • Save jheddings/80df4f3acaa0f52ea9523be093341f46 to your computer and use it in GitHub Desktop.
Save jheddings/80df4f3acaa0f52ea9523be093341f46 to your computer and use it in GitHub Desktop.
Import Apple Notes into Notion.
#!/usr/bin/env python3
# !! NOTE - this script is no longer maintained... please see the repo for further
# updates: https://github.com/jheddings/notes2notion
# this script attempts to migrate from Apple Notes to Notion while retaining as
# much information and formatting as possible. there are limitations to the
# export data from Notes, so we try to preserve the intent of the original note.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import os
import sys
import re
import yaml
from notion.client import NotionClient
from notion.block import PageBlock, TextBlock, CodeBlock, ImageBlock
from notion.block import HeaderBlock, SubheaderBlock, SubsubheaderBlock
from notion.block import BulletedListBlock, NumberedListBlock
from notion.block import CollectionViewBlock, DividerBlock, QuoteBlock
try:
from yaml import CLoader as Loader, CDumper as Dumper
except ImportError:
from yaml import Loader, Dumper
################################################################################
## CONFIGURATION
# set this to your current token_v2 cookie (use an inspector in your browser to obtain)
notion_token_v2 = 'PUT_YOUR_TOKEN_V2_HERE'
# set this to the top-level page for the import - all notes will be added as sub pages
import_page_url = 'ARCHIVE_PAGE_URL'
# if this is set, the script will log progress to this database. additionally,
# the script will consider status in the log before uploading a document again
#
# the database uses the following schema:
# Name (title) -> name of the note
# Note ID (text) -> the original note ID
# Status [Pending, Failed, Finished] -> current status of the migration
# Page (URL) -> the link to the imported note
# Timestamp (Creation date) [optional] -> date/time of the log entry
#
# set to None to disable this feature
import_log_url = None
# by default, this script will skip the first Title line in the note
skip_title = True
# include raw note metadata in the Notion import
include_meta = True
# include the raw note HTML in the Notion import - note that this can cause problems
# if the notes include pictures, since they are encoded directly in the HTML
include_html = False
# this maps the HTML element from Notes to a Notion block type
block_map = {
'h1' : HeaderBlock,
'h2' : SubheaderBlock,
'h3' : SubsubheaderBlock,
'tt' : CodeBlock,
'pre' : CodeBlock,
'ul' : BulletedListBlock,
'ol' : NumberedListBlock
}
################################################################################
def notes_to_notion(html, page):
from bs4 import BeautifulSoup
soup = BeautifulSoup(html, 'html.parser')
# Apple Notes exports pretty basic HTML...
# there is no html, head or body wrapper.
for elem in soup.children:
if elem.name is None: continue
# let append_* methods do the heavy lifting
if elem.name == 'div':
append_block(page, elem)
# handle lists separately
elif elem.name == 'ul' or elem.name == 'ol':
append_list(page, elem)
else:
print(f'-- UNKNOWN BLOCK: {elem.name}')
################################################################################
# Notion supports inline markdown for common formatting...
def markup_text(tag, text):
# bold text
if tag == 'b' or tag == 'strong':
return '**' + text + '**'
# italics
elif tag == 'i' or tag == 'em':
return '*' + text + '*'
# strike-through text
elif tag == 'strike':
return '~~' + text + '~~'
# standard links
elif tag == 'a':
return '<' + text + '>'
# underline - not supported in markdown
#elif tag == 'u':
return text
################################################################################
def get_block_text(block):
# no-name blocks are just strings...
if block.name is None:
return str(block)
# otherwise, iterate over the text in the child elements
# we could use this method to do additional processing on the text
# e.g. we could look for things that look like URL's and make links
# e.g. we could look for lines that start with '-' and make lists
strings = list()
for child in block.children:
string = get_block_text(child)
if string is None: continue
if len(string) == 0: continue
strings.append(string.strip())
text = ' '.join(strings)
return markup_text(block.name, text)
################################################################################
def build_schema(thead):
schema = dict()
for idx, td in enumerate(thead):
col_id = f'c{idx}'
col_schema = {
'name' : td,
'type' : 'text'
}
# treat the first column differently
if idx == 0:
col_id = 'title'
col_schema['type'] = 'title'
schema[col_id] = col_schema
return schema
################################################################################
def append_block(page, elem):
if elem is None: return None
#print(f'BLOCK: {elem.name}')
# there may be more than one image in a block
imgs = elem('img', recursive=False)
if imgs: return append_imgs(page, imgs)
# handle objects (like tables)
objs = elem('object', recursive=False)
if objs: return append_objects(page, objs)
# most of our decisions will be based on the first child of the block...
first_child = next(elem.children)
if first_child.name == 'h1':
# if this is the first child on the page, assume it is the title
if skip_title and len(page.children) == 0:
return None
# handle remaining elements as direct mapping to Notion blocks
# assume any unknown block types are text and "do our best"
block_type = block_map.get(first_child.name, TextBlock)
text = get_block_text(elem)
if text is None or len(text) == 0:
return None
return page.children.add_new(block_type, title=text)
################################################################################
def append_text(page, elem, markup=None):
text = get_block_text(elem)
if text is None: return None
if markup is not None:
text = markup + text + markup
return page.children.add_new(TextBlock, title=text)
################################################################################
def append_list(page, list_elem):
block_type = block_map.get(list_elem.name, None)
if block_type is None:
print(f'-- Unknown list type - {list_elem.name}')
return None
for li in list_elem.find_all('li', recursive=False):
text = get_block_text(li)
page.children.add_new(block_type, title=text)
# TODO return list of blocks
return True
################################################################################
def append_objects(page, objs):
for obj in objs:
append_object(page, obj)
return True
################################################################################
def append_object(page, elem):
block = next(elem.children)
if block.name == 'table':
return append_table(page, block)
print(f'-- Unsupported object: {block.name}')
return None
################################################################################
# FIXME this is my least favorite part of the script...
def append_table(page, table):
global client
# XXX it would make more sense if Notion supported basic markdown tables
# instead, we have to build a collection view to capture the table data
block = page.children.add_new(CollectionViewBlock)
# does Apple ever set a header? I don't think so...
# XXX maybe we want a flag to use the first table row as a header or not?
thead = None
tbody = table.find('tbody')
for tr in tbody.find_all('tr', recursive=False):
# if no header was provided, we will build it from this row...
if thead is None:
thead = list()
# if we have a header, but no Collection (yet)
elif block.collection is None:
schema = build_schema(thead)
block.collection = client.get_collection(
client.create_record("collection", parent=block, schema=schema)
)
# we need a new view to see our lovely table...
block.views.add_new(view_type='table')
# if we have a valid collection, add data directly to rows
row = None if block.collection is None else block.collection.add_row()
# start processing the column data...
tds = tr.find_all('td', recursive=False)
for idx, td in enumerate(tds):
text = get_block_text(td)
col_id = 'title' if idx == 0 else f'c{idx}'
if block.collection is None:
thead.append(text)
if row is not None and text is not None:
row.set_property(col_id, text)
return block
################################################################################
def append_imgs(page, imgs):
for img in imgs:
append_img(page, img)
return True
################################################################################
img_data_re = re.compile('^data:image/([^;]+);([^,]+),(.+)$')
img_http_re = re.compile('^https?://(.+)$')
def append_img(page, img_elem):
import base64
import tempfile
# Notes uses embedded images... we need to extract the image, upload it
# and reference it in the block
# TODO this probably needs more error handling and better flow
img_src = img_elem['src']
m = img_data_re.match(img_src)
if m is None:
print(f'-- Unsupported img type:')
return None
img_type = m.groups()[0]
img_data_enc = m.groups()[1]
img_data_str = m.groups()[2]
img_data = None
if img_data_enc == 'base64':
img_data_b64 = img_data_str.encode('ascii')
img_data = base64.b64decode(img_data_b64)
else:
print(f'-- Unsupported img encoding: {img_data_enc}')
return None
block = None
with tempfile.NamedTemporaryFile(suffix=f'.{img_type}') as fp:
fp.write(img_data)
# upload the image to Notion
block = page.children.add_new(ImageBlock)
try:
block.upload_file(fp.name)
except Exception:
print('!! UPLOAD FAILED')
return block
################################################################################
def tell_notes(*args):
import applescript
script = "\n".join(args)
res = applescript.tell.app('Notes', script)
if res.code != 0:
print(f'!! ERROR - {res.err}')
return None
# do some basic string to type mapping...
if res.out == 'null': return None
if res.out == 'false': return False
if res.out == 'true': return True
if len(res.out) == 0: return None
return res.out
################################################################################
def get_note(note_id):
# to get the data from Notes, we will get a dump from AppleScript
# as YAML that we can turn back into a Python object
text = tell_notes(
# there is no direct way to get a note from AppleScript using the ID...
# so we have to loop over all notes and look for the right one.
'repeat with theNote in notes of default account',
'set noteID to id of theNote as string',
# the note ID is a full CoreData URL... we only want the pXXXX part
f'if noteID ends with "/{note_id}" then',
# determine the the Notes folder
# TODO get the full folder path
'set folderName to ""',
'set theContainer to container of theNote',
'if theContainer is not missing value',
'set folderName to (name of theContainer) & "/" & folderName',
'end if',
# "export" the note data when we find it...
'set noteMeta to "meta:" ¬',
' & "\n id: " & quoted form of (id of theNote as string) ¬',
' & "\n name: " & quoted form of (name of theNote as string) ¬',
' & "\n folder: " & quoted form of folderName ¬',
' & "\n creation_date: \\"" & (creation date of theNote as date) & "\\"" ¬',
' & "\n modification_date: \\"" & (modification date of theNote as date) & "\\"" ¬',
' & "\n locked: " & (password protected of theNote as boolean) ¬',
' & "\n shared: " & (shared of theNote as boolean) ¬',
' & "\nattachments:"',
# FIXME some attachments (like embedded documenta) are causing problems...
#'repeat with theAttachment in attachments of theNote',
# 'set noteMeta to noteMeta & "\n - id: " & (id of theAttachment) ¬',
# ' & "\n name: " & (name of theAttachment) ¬',
# ' & "\n ref: " & (content identifier of theAttachment) ¬',
# ' & "\n creation_date: " & (creation date of theAttachment as date) ¬',
# ' & "\n modification_date: " & (modification date of theAttachment as date) ¬',
# ' & "\n url: " & (url of theAttachment)',
#'end repeat',
'return noteMeta & "\n---\n" & (body of theNote as string)',
'end if',
'end repeat'
)
#print(text)
if text is None: return None
# parse the output from AppleScript into a Python object...
(text_meta, text_body) = text.split('---', maxsplit=1)
note = yaml.load(text_meta, Loader=Loader)
note['body'] = text_body.strip()
#print(yaml.dump(note))
return note
################################################################################
def get_log_filter(note_id, status):
return { 'filters':
[{
'property': 'note_id',
'filter': {
'operator': 'string_is',
'value': {
'type': 'exact',
'value': note_id
}
}
},
{
'property': 'status',
'filter': {
'operator': 'enum_is',
'value': {
'type': 'exact',
'value': status
}
}
}], 'operator': 'and'
}
################################################################################
def get_log_entry(note_id, status):
global import_log
if import_log is None: return None
filter_params = get_log_filter(note_id, 'Finished')
result = import_log.query(filter=filter_params)
return None if len(result) == 0 else result[0]
################################################################################
## MAIN ENTRY
# since note data can get very large, we will extract one note at a time
# the 'notes' object serializes as a list of Core Data URL's...
notes_raw = tell_notes('return notes of default account')
note_links = re.split(r', *', notes_raw)
all_notes = [ re.sub(r'^.*/(p[0-9]+)', r'\1', link) for link in note_links]
client = NotionClient(token_v2=notion_token_v2)
archive = client.get_block(import_page_url)
import_log = None
if import_log_url is not None:
import_log_view = client.get_collection_view(import_log_url)
import_log = import_log_view.collection
# load each note and upload to Notion
for note_id in all_notes:
# look for an existing 'Finished' entry
log = get_log_entry(note_id, 'Finished')
if log is not None:
print(f'{log.name} (Finished)')
continue
# prepare to import...
note = get_note(note_id)
if note is not None:
note_meta = note['meta']
note_name = note_meta['name']
# skip locked notes
if note_meta['locked']:
continue
print(f'{note_name} [{note_id}]')
# set up the log entry if needed
log = None if import_log is None else import_log.add_row(
name=note_name, status='Pending', note_id=note_id
)
# create a stubbed page for the import
# TODO support the folder heirarchy from the note
page = archive.children.add_new(PageBlock, title=note_name)
if log is not None: log.page = page.get_browseable_url()
html = note['body']
notes_to_notion(html, page)
# TODO upload attachments
if include_meta or include_html:
page.children.add_new(DividerBlock)
if include_meta:
meta_text = yaml.dump(note_meta)
page.children.add_new(CodeBlock, title=meta_text, language='yaml')
if include_html:
page.children.add_new(CodeBlock, title=html, language='html')
# finally, mark the page as uploaded...
if log is not None: log.status = 'Finished'
@moderndayNeo
Copy link

thanks for this script. btw the linked page is private

@jheddings
Copy link
Author

Thanks for the catch... I removed the link since the script is now hosted at the mentioned GitHub repo.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment