ThomasHineXYZ/KeepToMarkdown.py

## KeepToMarkdown.py
#!/usr/bin/env python

# Google Keep "Takeout" to Markdown Converter
# This allows you to convert your Google Keep notes that are downloaded from
# Google's "Takeout" system. This works with Nextcloud's Notes system.

from datetime import datetime
import json
import os
import shutil

# Set the input_path to the `Keep` folder within your Google Takeout archive (extracted).
input_path = "/tmp/Takeout/Keep/"

# The file type that you'd like the export to be saved as.
output_extention = ".md"

# Where you'd like the converted files to be stored.
output_path = "/tmp/keep_converted/"

# These are the note colours, lifted directly from the Takeout's CSS
note_colours = {
    "blue": "#3FC3FF",
    "brown": "#D7CCC8",
    "cerulean": "#82B1FF",
    "gray": "#B8C4C9",
    "green": "#95D641",
    "orange": "#FF9B00",
    "pink": "#F8BBD0",
    "purple": "#B388FF",
    "red": "#FF6D3F",
    "teal": "#1CE8B5",
    "yellow": "#FFDA00",
}

# Grab a list of the files
files = os.listdir(input_path)

for file in files:
    title = "No Title"
    colour = ""
    content = ""
    main_label = ""

    # Split the file name up in to the name and the extension
    split_tup = os.path.splitext(file)

    # Store the file name and extension in variables
    file_name = split_tup[0]
    file_extension = split_tup[1]

    if file_extension.lower() == ".txt":
        # Read the contents of the text file
        text_file = open(f"{input_path}{file}", 'r')
        content += text_file.read()
        text_file.close()

        title = file_name

    elif file_extension.lower() == ".json":
        json_file = open(f"{input_path}{file}", 'r', encoding='utf-8')
        json_data = json.load(json_file)
        json_file.close()

        # Set the title to what it had before
        if (("title" in json_data) and
            (json_data['title'])):
            title = json_data['title'].strip()

        # Set the colour, if it isn't default
        if (("color" in json_data) and
            (json_data['color']) and
            (json_data['color'].lower() in note_colours)):
            colour = note_colours[json_data['color'].lower()]

        # Grab the content if there's some there
        if (("textContent" in json_data) and
            (json_data['textContent'])):
            content += json_data['textContent'].strip() + "\n"

        # List items
        if (("listContent" in json_data) and
            (json_data['listContent'])):
            for list_item in json_data['listContent']:
                if list_item['isChecked']:
                    content += f"🗹 ~~{list_item['text']}~~\n"
                else:
                    content += f"☐ {list_item['text']}\n"

        # Attachments
        if (("attachments" in json_data) and
            (json_data['attachments'])):
            content = content.strip()  # Just to clear out any unwanted ending whitespace
            content += "\n\n## Attachments:\n"
            for attachment in json_data['attachments']:
                # Create the output folder if it doesn't exist
                if not os.path.exists(f"{output_path}ATTACHMENTS/"):
                    os.makedirs(f"{output_path}ATTACHMENTS/")

                # If it's a JPG image, since Google is dumb and mixes up JPG and JPEG extensions
                if attachment['mimetype'] == "image/jpeg":
                    try:
                        # Split up the file name
                        split_tup = os.path.splitext(attachment['filePath'])
                        image_file = split_tup[0]

                        # Copy the attachment over
                        shutil.copy2(f"{input_path}{image_file}.jpg", f"{output_path}ATTACHMENTS/{image_file}.jpg")
                        content += f"* [{image_file}.jpg](ATTACHMENTS/{image_file}.jpg)\n"

                    except Exception as e:
                        raise Exception(e)

        # Annotations
        if (("annotations" in json_data) and
            (json_data['annotations'])):
            content = content.strip()  # Just to clear out any unwanted ending whitespace
            content += "\n\n## Embeds:\n"
            for annotation in json_data['annotations']:
                # Clean them up a little bit
                annotation_description = annotation['description'].replace("\n", "")
                annotation_source = annotation['source'].replace("\n", "")
                annotation_title = annotation['title'].replace("\n", "")
                annotation_url = annotation['url'].replace("\n", "")

                annotation_description = annotation_description.replace('"', "'")
                annotation_source = annotation_source.replace('"', "'")
                annotation_title = annotation_title.replace('"', "'")
                annotation_url = annotation_url.replace('"', "'")

                annotation_description = annotation_description.strip()
                annotation_source = annotation_source.strip()
                annotation_title = annotation_title.strip()
                annotation_url = annotation_url.strip()

                # Then add them in to the content area
                content += f"* {annotation_source.title()}: "
                content += f"[{annotation_title}]({annotation_url} \"{annotation_description}\")\n"

        # Labels
        if (("labels" in json_data) and
            (json_data['labels'])):
            content = content.strip()  # Just to clear out any unwanted ending whitespace
            content += "\n\n## Labels:\n"
            for label in json_data['labels']:
                # Sets the main / first label if one isn't set yet
                if not main_label:
                    main_label = label['name']

                content += f"* {label['name']}\n"

        # Round and convert the value to an int, since we don't care about
        # anything smaller than seconds
        timestamp = int(round(json_data['userEditedTimestampUsec'] / 1000000))

        converted_timestamp = datetime.fromtimestamp(timestamp).isoformat()

        # Extra Values
        content = content.strip()  # Just to clear out any unwanted ending whitespace
        content += "\n\n## Values:\n"
        content += f"* Colour: {json_data['color']}\n"
        content += f"* isArchived: {json_data['isArchived']}\n"
        content += f"* isPinned: {json_data['isPinned']}\n"
        content += f"* isTrashed: {json_data['isTrashed']}\n"
        content += f"* Last Modified: {converted_timestamp}\n"

    else:  # If it's any other file type, just skip it
        continue

    # Do some final clean up of the title and content, just in case
    title = title.strip()
    content = content.strip()

    # Now put together the new markdown file
    document = ""
    document += title + "\n"
    document += "-" * len(title) + "\n"
    document += f"Colour: {colour}\n\n" if colour else "\n"
    document += content + "\n"

    label_folder = ""
    if main_label:
        main_label = main_label.replace("/", "-")
        label_folder = f"{main_label}/"

    # Create the output folder if it doesn't exist
    if not os.path.exists(f"{output_path}{label_folder}"):
        os.makedirs(f"{output_path}{label_folder}")

    new_file = f"{output_path}{label_folder}{file_name}{output_extention}"
    f = open(new_file, "w")
    f.write(document)
    f.close()

    # Set the modified time on them to their old date
    os.utime(new_file,(timestamp, timestamp))

    print(f"Converted {input_path}{file} to {new_file}")
	#!/usr/bin/env python

	# Google Keep "Takeout" to Markdown Converter
	# This allows you to convert your Google Keep notes that are downloaded from
	# Google's "Takeout" system. This works with Nextcloud's Notes system.

	from datetime import datetime
	import json
	import os
	import shutil

	# Set the input_path to the `Keep` folder within your Google Takeout archive (extracted).
	input_path = "/tmp/Takeout/Keep/"

	# The file type that you'd like the export to be saved as.
	output_extention = ".md"

	# Where you'd like the converted files to be stored.
	output_path = "/tmp/keep_converted/"

	# These are the note colours, lifted directly from the Takeout's CSS
	note_colours = {
	"blue": "#3FC3FF",
	"brown": "#D7CCC8",
	"cerulean": "#82B1FF",
	"gray": "#B8C4C9",
	"green": "#95D641",
	"orange": "#FF9B00",
	"pink": "#F8BBD0",
	"purple": "#B388FF",
	"red": "#FF6D3F",
	"teal": "#1CE8B5",
	"yellow": "#FFDA00",
	}

	# Grab a list of the files
	files = os.listdir(input_path)

	for file in files:
	title = "No Title"
	colour = ""
	content = ""
	main_label = ""

	# Split the file name up in to the name and the extension
	split_tup = os.path.splitext(file)

	# Store the file name and extension in variables
	file_name = split_tup[0]
	file_extension = split_tup[1]

	if file_extension.lower() == ".txt":
	# Read the contents of the text file
	text_file = open(f"{input_path}{file}", 'r')
	content += text_file.read()
	text_file.close()

	title = file_name

	elif file_extension.lower() == ".json":
	json_file = open(f"{input_path}{file}", 'r', encoding='utf-8')
	json_data = json.load(json_file)
	json_file.close()

	# Set the title to what it had before
	if (("title" in json_data) and
	(json_data['title'])):
	title = json_data['title'].strip()

	# Set the colour, if it isn't default
	if (("color" in json_data) and
	(json_data['color']) and
	(json_data['color'].lower() in note_colours)):
	colour = note_colours[json_data['color'].lower()]

	# Grab the content if there's some there
	if (("textContent" in json_data) and
	(json_data['textContent'])):
	content += json_data['textContent'].strip() + "\n"

	# List items
	if (("listContent" in json_data) and
	(json_data['listContent'])):
	for list_item in json_data['listContent']:
	if list_item['isChecked']:
	content += f"🗹 ~~{list_item['text']}~~\n"
	else:
	content += f"☐ {list_item['text']}\n"

	# Attachments
	if (("attachments" in json_data) and
	(json_data['attachments'])):
	content = content.strip() # Just to clear out any unwanted ending whitespace
	content += "\n\n## Attachments:\n"
	for attachment in json_data['attachments']:
	# Create the output folder if it doesn't exist
	if not os.path.exists(f"{output_path}ATTACHMENTS/"):
	os.makedirs(f"{output_path}ATTACHMENTS/")

	# If it's a JPG image, since Google is dumb and mixes up JPG and JPEG extensions
	if attachment['mimetype'] == "image/jpeg":
	try:
	# Split up the file name
	split_tup = os.path.splitext(attachment['filePath'])
	image_file = split_tup[0]

	# Copy the attachment over
	shutil.copy2(f"{input_path}{image_file}.jpg", f"{output_path}ATTACHMENTS/{image_file}.jpg")
	content += f"* [{image_file}.jpg](ATTACHMENTS/{image_file}.jpg)\n"

	except Exception as e:
	raise Exception(e)

	# Annotations
	if (("annotations" in json_data) and
	(json_data['annotations'])):
	content = content.strip() # Just to clear out any unwanted ending whitespace
	content += "\n\n## Embeds:\n"
	for annotation in json_data['annotations']:
	# Clean them up a little bit
	annotation_description = annotation['description'].replace("\n", "")
	annotation_source = annotation['source'].replace("\n", "")
	annotation_title = annotation['title'].replace("\n", "")
	annotation_url = annotation['url'].replace("\n", "")

	annotation_description = annotation_description.replace('"', "'")
	annotation_source = annotation_source.replace('"', "'")
	annotation_title = annotation_title.replace('"', "'")
	annotation_url = annotation_url.replace('"', "'")

	annotation_description = annotation_description.strip()
	annotation_source = annotation_source.strip()
	annotation_title = annotation_title.strip()
	annotation_url = annotation_url.strip()

	# Then add them in to the content area
	content += f"* {annotation_source.title()}: "
	content += f"[{annotation_title}]({annotation_url} \"{annotation_description}\")\n"

	# Labels
	if (("labels" in json_data) and
	(json_data['labels'])):
	content = content.strip() # Just to clear out any unwanted ending whitespace
	content += "\n\n## Labels:\n"
	for label in json_data['labels']:
	# Sets the main / first label if one isn't set yet
	if not main_label:
	main_label = label['name']

	content += f"* {label['name']}\n"

	# Round and convert the value to an int, since we don't care about
	# anything smaller than seconds
	timestamp = int(round(json_data['userEditedTimestampUsec'] / 1000000))

	converted_timestamp = datetime.fromtimestamp(timestamp).isoformat()

	# Extra Values
	content = content.strip() # Just to clear out any unwanted ending whitespace
	content += "\n\n## Values:\n"
	content += f"* Colour: {json_data['color']}\n"
	content += f"* isArchived: {json_data['isArchived']}\n"
	content += f"* isPinned: {json_data['isPinned']}\n"
	content += f"* isTrashed: {json_data['isTrashed']}\n"
	content += f"* Last Modified: {converted_timestamp}\n"

	else: # If it's any other file type, just skip it
	continue

	# Do some final clean up of the title and content, just in case
	title = title.strip()
	content = content.strip()

	# Now put together the new markdown file
	document = ""
	document += title + "\n"
	document += "-" * len(title) + "\n"
	document += f"Colour: {colour}\n\n" if colour else "\n"
	document += content + "\n"

	label_folder = ""
	if main_label:
	main_label = main_label.replace("/", "-")
	label_folder = f"{main_label}/"

	# Create the output folder if it doesn't exist
	if not os.path.exists(f"{output_path}{label_folder}"):
	os.makedirs(f"{output_path}{label_folder}")

	new_file = f"{output_path}{label_folder}{file_name}{output_extention}"
	f = open(new_file, "w")
	f.write(document)
	f.close()

	# Set the modified time on them to their old date
	os.utime(new_file,(timestamp, timestamp))

	print(f"Converted {input_path}{file} to {new_file}")