Skip to content

Instantly share code, notes, and snippets.

@jswetzen
Created February 9, 2022 13:54
Show Gist options
  • Save jswetzen/edd7d553079082df1d440ab07b522187 to your computer and use it in GitHub Desktop.
Save jswetzen/edd7d553079082df1d440ab07b522187 to your computer and use it in GitHub Desktop.
Python script for importing iCloud notes into Joplin via Gmail
# Import Data from iCloud Notes to Joplin
# Developed and tested on Ubuntu only
# 1. Add a gmail connection to Notes
# 2. Copy all your note folders into that gmail space in Notes
# 3. Wait for all the notes to sync to your mailbox
# 4. Download all emails tagged with Notes with takeout.google.com
# 5. For each of the resulting mbox files, run this script
# 6. Import the markdown folder (with front matter) into Joplin
# Requires
# - bs4
# - markdownify
# - python-magic (with required system packages)
# - Wand (with required system packages)
# Mailbox file name should be <prefix><name>.mbox
# Resulting directory will be <name>/
name = 'Subfolder'
prefix = 'Notes-'
import os
import mailbox
import email
import os
import mailbox
import email
from email import policy
from email.parser import BytesParser
from dateutil import parser
from bs4 import BeautifulSoup
from markdownify import markdownify as md
import magic
from wand.image import Image
try:
os.mkdir(name)
except FileExistsError:
pass
saved_files = []
num = 0
for message in mailbox.mbox(f'{prefix}{name}.mbox', factory=BytesParser(policy=policy.default).parse):
num+=1
title, encoding = email.header.decode_header(message['Subject'])[0]
if encoding:
title = title.decode(encoding)
title = title.replace('"', '\\"')
date = parser.parse(message['Date']).isoformat()
attachments = {}
if message.is_multipart():
html = None
for part in message.get_payload():
# print(message)
content_type = part.get_content_type()
if content_type == 'text/html':
if not html:
html = BeautifulSoup(part.get_content(), 'html.parser')
else:
print(f'Warning: Multiple html parts for "{title}"')
elif content_type in ['image/jpeg', 'image/png', 'application/octet-stream']:
for k in part.keys():
pass
#print(k, part.get(k))
filename = part.get_filename()
savename = str(num) + str(len(attachments)) + filename
payload = part.get_payload(decode=True)
if content_type == 'application/octet-stream':
mime_type = magic.from_buffer(payload, mime=True)
if mime_type in ['image/png', 'image/jpeg', 'image/tiff']:
savename += '.png'
else:
print("Unsupported attachment type: ", mime_type)
continue
saved_files.append(savename)
with Image(blob=payload) as img:
with img.convert('png') as output_img:
img.save(filename=f'_resources/{savename}')
attachment_id = part.get('Content-Id').strip('<>')
attachments[attachment_id] = (filename, savename)
for obj in html.find_all('object'):
cid = str(obj['data'])
if cid.startswith('cid:'):
cid = cid[4:]
img = html.new_tag('img')
img['alt'], img['src'] = attachments.get(cid, ('', ''))
img['src'] = '../_resources/' + img['src']
obj.replace_with(img)
text = md(str(html), header_style='ATX')
else:
text = md(message.get_content(), header_style='ATX')
with open(f'{name}/{num}.md', 'w') as f:
f.write(f'---\n')
f.write(f'title: "{title}"\n')
f.write(f'author: Johan Swetzén\n')
f.write(f'source_application: iCloud Notes\n')
f.write(f'created: {date}\n')
f.write(f'updated: {date}\n')
f.write(f'---\n\n')
f.write(text)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment