Skip to content

Instantly share code, notes, and snippets.

@RhetTbull
Last active March 15, 2023 23:52
Show Gist options
  • Save RhetTbull/025615064d2b8d57cbd3fff5e211137b to your computer and use it in GitHub Desktop.
Save RhetTbull/025615064d2b8d57cbd3fff5e211137b to your computer and use it in GitHub Desktop.
Extract embedded media in Apple Notes notes exported to a bplist file
"""Rescue lost Apple Notes notes, reference: https://fosstodon.org/@Cyberneticist@hachyderm.io/109992039449199371
To run this:
1. Install python (I recommend the latest version of python 3 from python.org)
2. Install the dependencies: python3 -m pip install bpylist2
3. Run the script: python3 noterescue.py <path to note file(s)>
The extracted files will be saved in the parent directory of the note file.
The files will be named like:
<note name> - <UUID>_media.<extension>
<note name> - <UUID>_previewImage_0.<extension>
The file ending in _media is the original photo taken by the camera app and the
_previewImage_0 is the cropped image that is shown in the notes app.
Recombining these into PDFs is left as an exercise for the reader.
"""
from __future__ import annotations
import argparse
import os
import pathlib
import re
import sys
from dataclasses import dataclass, field
from bpylist2 import archiver
from bpylist2.archive_types import DataclassArchiver
JPEG_START = b"\xff\xd8"
PDF_START = b"\x25\x50\x44\x46" # %PDF
# The files are binary plist (bplist) files containing an NSKeyedArchiver object
# which is used to serialize object data on iOS and macOS
# I've reverse engineered the serialized classes which will be created as
# the following classes
@dataclass
class ICDataPersister(DataclassArchiver):
identifierToDataDictionary: field(default_factory=dict)
objectIdentifier: str
cacheDirectoryURL: str
accumulatedDataSize: int
allURLs: field(default_factory=list)
@dataclass
class ICNotePasteboardData(DataclassArchiver):
attributedStringData: bytes
dataPersister: ICDataPersister
@dataclass
class NSURL(DataclassArchiver):
NSrelative: str
NSbase: str
# Register the classes with the archiver so it can deserialize the data
archiver.update_class_map({"ICDataPersister": ICDataPersister})
archiver.update_class_map({"ICNotePasteboardData": ICNotePasteboardData})
archiver.update_class_map({"NSURL": NSURL})
def main():
"""Main function for handling args and processing files"""
parser = argparse.ArgumentParser()
parser.add_argument("files", nargs="*")
files = parser.parse_args().files
if not files:
usage()
sys.exit(0)
print(f"Processing {len(files)} files")
for path in files:
path = pathlib.Path(path)
print(f"Processing {path}")
data = load_note_data_from_plist(path)
attachments = save_attachments(path.stem, path.parent, data)
print(f"Extracted ({len(attachments)}) attachments")
def usage():
"""Print usage information"""
print("Usage: python3 noterescue.py <path to note file(s)>")
print(
f"Extracted attachments will be saved in the current directory ({os.getcwd()})"
)
def save_attachments(
name: str, path: str | pathlib.Path, data: ICNotePasteboardData
) -> list[str]:
"""Save the attachments from the note data
Args:
name: The name of the note
path: The path to save the attachments
data: The unarchived note data
Returns:
A list of the saved files
"""
data_dict = data.dataPersister.identifierToDataDictionary
media_keys = [
key.replace("_media", "") for key in data_dict.keys() if key.endswith("media")
]
print(f"Found {len(media_keys)} media attachments: {media_keys=}")
# which attachmments do we want to save?
keys_to_save = [f"{key}_media" for key in media_keys]
# find largest preview
# we want the full size image, not the thumbnail but the order
# (previewImage_0, previewImage_1, ...) is not deterministic
preview_keys = {}
for key, value in data_dict.items():
key_base = re.sub(r"_.*$", "", key)
if key_base in media_keys and "previewImage" in key:
if key_base not in preview_keys:
preview_keys[key_base] = (key, len(value))
elif len(value) > preview_keys[key_base][1]:
preview_keys[key_base] = (key, len(value))
keys_to_save.extend([key for key, _ in preview_keys.values()])
# save the attachments
saved_files = []
for key in keys_to_save:
value = data_dict[key]
if value.startswith(JPEG_START):
# all the samples I've seen are JPEGs but also check for PDF
ext = "jpeg"
elif value.startswith(PDF_START):
ext = "pdf"
else:
print(f"Unknown file type for {key} ({len(value)} bytes)", file=sys.stderr)
ext = "bin"
output_file = increment_filename(pathlib.Path(path) / f"{name} - {key}.{ext}")
with open(output_file, "wb") as f:
print(f"Writing {output_file} ({len(value)} bytes)")
f.write(value)
saved_files.append(output_file)
return saved_files
def increment_filename(path: str | pathlib.Path) -> str:
"""If filename exists, increment until it doesn't"""
path = str(path)
if not os.path.exists(path):
return path
# find the extension
ext = pathlib.Path(path).suffix
base = path[: -len(ext)]
if match := re.search(r"\s\((\d+)\)$", base):
# increment the number
num = int(match[1]) + 1
base = f"{base[:match.start()]} ({num})"
else:
base = f"{base} (1)"
return increment_filename(base + ext)
def load_note_data_from_plist(path: str) -> ICNotePasteboardData:
with open(path, "rb") as f:
data = f.read()
return archiver.unarchive(data)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment