Skip to content

Instantly share code, notes, and snippets.

@ThomasHineXYZ
Last active July 20, 2024 17:45
Show Gist options
  • Save ThomasHineXYZ/36b61133a2c86ffa1422ee6c3063f786 to your computer and use it in GitHub Desktop.
Save ThomasHineXYZ/36b61133a2c86ffa1422ee6c3063f786 to your computer and use it in GitHub Desktop.
Google Keep "Takeout" to Markdown Converter
#!/usr/bin/env python
# Google Keep "Takeout" to Markdown Converter
# This allows you to convert your Google Keep notes that are downloaded from
# Google's "Takeout" system. This works with Nextcloud's Notes system.
from datetime import datetime
import json
import os
import shutil
# Set the input_path to the `Keep` folder within your Google Takeout archive (extracted).
input_path = "/tmp/Takeout/Keep/"
# The file type that you'd like the export to be saved as.
output_extention = ".md"
# Where you'd like the converted files to be stored.
output_path = "/tmp/keep_converted/"
# These are the note colours, lifted directly from the Takeout's CSS
note_colours = {
"blue": "#3FC3FF",
"brown": "#D7CCC8",
"cerulean": "#82B1FF",
"gray": "#B8C4C9",
"green": "#95D641",
"orange": "#FF9B00",
"pink": "#F8BBD0",
"purple": "#B388FF",
"red": "#FF6D3F",
"teal": "#1CE8B5",
"yellow": "#FFDA00",
}
# Grab a list of the files
files = os.listdir(input_path)
for file in files:
title = "No Title"
colour = ""
content = ""
main_label = ""
# Split the file name up in to the name and the extension
split_tup = os.path.splitext(file)
# Store the file name and extension in variables
file_name = split_tup[0]
file_extension = split_tup[1]
if file_extension.lower() == ".txt":
# Read the contents of the text file
text_file = open(f"{input_path}{file}", 'r')
content += text_file.read()
text_file.close()
title = file_name
elif file_extension.lower() == ".json":
json_file = open(f"{input_path}{file}", 'r', encoding='utf-8')
json_data = json.load(json_file)
json_file.close()
# Set the title to what it had before
if (("title" in json_data) and
(json_data['title'])):
title = json_data['title'].strip()
# Set the colour, if it isn't default
if (("color" in json_data) and
(json_data['color']) and
(json_data['color'].lower() in note_colours)):
colour = note_colours[json_data['color'].lower()]
# Grab the content if there's some there
if (("textContent" in json_data) and
(json_data['textContent'])):
content += json_data['textContent'].strip() + "\n"
# List items
if (("listContent" in json_data) and
(json_data['listContent'])):
for list_item in json_data['listContent']:
if list_item['isChecked']:
content += f"🗹 ~~{list_item['text']}~~\n"
else:
content += f"☐ {list_item['text']}\n"
# Attachments
if (("attachments" in json_data) and
(json_data['attachments'])):
content = content.strip() # Just to clear out any unwanted ending whitespace
content += "\n\n## Attachments:\n"
for attachment in json_data['attachments']:
# Create the output folder if it doesn't exist
if not os.path.exists(f"{output_path}ATTACHMENTS/"):
os.makedirs(f"{output_path}ATTACHMENTS/")
# If it's a JPG image, since Google is dumb and mixes up JPG and JPEG extensions
if attachment['mimetype'] == "image/jpeg":
try:
# Split up the file name
split_tup = os.path.splitext(attachment['filePath'])
image_file = split_tup[0]
# Copy the attachment over
shutil.copy2(f"{input_path}{image_file}.jpg", f"{output_path}ATTACHMENTS/{image_file}.jpg")
content += f"* [{image_file}.jpg](ATTACHMENTS/{image_file}.jpg)\n"
except Exception as e:
raise Exception(e)
# Annotations
if (("annotations" in json_data) and
(json_data['annotations'])):
content = content.strip() # Just to clear out any unwanted ending whitespace
content += "\n\n## Embeds:\n"
for annotation in json_data['annotations']:
# Clean them up a little bit
annotation_description = annotation['description'].replace("\n", "")
annotation_source = annotation['source'].replace("\n", "")
annotation_title = annotation['title'].replace("\n", "")
annotation_url = annotation['url'].replace("\n", "")
annotation_description = annotation_description.replace('"', "'")
annotation_source = annotation_source.replace('"', "'")
annotation_title = annotation_title.replace('"', "'")
annotation_url = annotation_url.replace('"', "'")
annotation_description = annotation_description.strip()
annotation_source = annotation_source.strip()
annotation_title = annotation_title.strip()
annotation_url = annotation_url.strip()
# Then add them in to the content area
content += f"* {annotation_source.title()}: "
content += f"[{annotation_title}]({annotation_url} \"{annotation_description}\")\n"
# Labels
if (("labels" in json_data) and
(json_data['labels'])):
content = content.strip() # Just to clear out any unwanted ending whitespace
content += "\n\n## Labels:\n"
for label in json_data['labels']:
# Sets the main / first label if one isn't set yet
if not main_label:
main_label = label['name']
content += f"* {label['name']}\n"
# Round and convert the value to an int, since we don't care about
# anything smaller than seconds
timestamp = int(round(json_data['userEditedTimestampUsec'] / 1000000))
converted_timestamp = datetime.fromtimestamp(timestamp).isoformat()
# Extra Values
content = content.strip() # Just to clear out any unwanted ending whitespace
content += "\n\n## Values:\n"
content += f"* Colour: {json_data['color']}\n"
content += f"* isArchived: {json_data['isArchived']}\n"
content += f"* isPinned: {json_data['isPinned']}\n"
content += f"* isTrashed: {json_data['isTrashed']}\n"
content += f"* Last Modified: {converted_timestamp}\n"
else: # If it's any other file type, just skip it
continue
# Do some final clean up of the title and content, just in case
title = title.strip()
content = content.strip()
# Now put together the new markdown file
document = ""
document += title + "\n"
document += "-" * len(title) + "\n"
document += f"Colour: {colour}\n\n" if colour else "\n"
document += content + "\n"
label_folder = ""
if main_label:
main_label = main_label.replace("/", "-")
label_folder = f"{main_label}/"
# Create the output folder if it doesn't exist
if not os.path.exists(f"{output_path}{label_folder}"):
os.makedirs(f"{output_path}{label_folder}")
new_file = f"{output_path}{label_folder}{file_name}{output_extention}"
f = open(new_file, "w")
f.write(document)
f.close()
# Set the modified time on them to their old date
os.utime(new_file,(timestamp, timestamp))
print(f"Converted {input_path}{file} to {new_file}")
@john-hix
Copy link

This is great! Thank you for sharing.

Made a quick change, adding line 191 to include os.utime(new_file,(timestamp, timestamp)), to make NextCloud display the notes by year in left sidebar in the Notes app.

@ThomasHineXYZ
Copy link
Author

This is great! Thank you for sharing.

Made a quick change, adding line 191 to include os.utime(new_file,(timestamp, timestamp)), to make NextCloud display the notes by year in left sidebar in the Notes app.

Done.

I didn't see your comment until today. Sorry about that.

@dornyika
Copy link

dornyika commented Sep 1, 2022

Hello, is there a way to support Unicode? I got UnicodeEncodeError.
Thanks

@masiv1001
Copy link

Hello, is there a way to support Unicode? I got UnicodeEncodeError. Thanks

I've just got it working, you've to modify line 191
from:: f = open(new_file, "w")
to: f = open(new_file, 'w',encoding="utf-8")
(be careful with the quote marks, you have to replace with the single ones)

@dornyika
Copy link

dornyika commented Jan 7, 2023

Hello, is there a way to support Unicode? I got UnicodeEncodeError. Thanks

I've just got it working, you've to modify line 191 from:: f = open(new_file, "w") to: f = open(new_file, 'w',encoding="utf-8") (be careful with the quote marks, you have to replace with the single ones)

Thanks for your effort, I'll try it out!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment