First download and unzip your google keep notes via a takeout package
save the following code into convert.py
- pip install beautifulsoup4 markdownify
- python3 convert.py
- after it is done: mv images folder into markdown folder
- using Obsidian to open the folder
import os
from bs4 import BeautifulSoup
from markdownify import markdownify as md
from shutil import copyfile
# Define the directories
takeout_dir = 'Keep'
#images_dir = 'markdown/images'
images_dir = 'images'
markdown_dir = 'markdown'
# Create directories if they don't exist
if not os.path.exists(images_dir):
os.makedirs(images_dir)
if not os.path.exists(markdown_dir):
os.makedirs(markdown_dir)
def process_html_file(html_file_path):
# Open files with UTF-8 encoding to support Chinese characters
with open(html_file_path, 'r', encoding='utf-8') as file:
content = file.read()
soup = BeautifulSoup(content, 'html.parser')
# Focus on the main content area, adjust the selector as needed
main_content = soup.find(lambda tag: tag.name in ['div', 'body'])
if main_content:
markdown_content = md(str(main_content), bullets='-')
for img_tag in main_content.find_all('img'):
img_src = img_tag.get('src') or img_tag.get('data-src')
if img_src:
img_name = os.path.basename(img_src)
# Ensure the image name is treated as a proper Unicode string
img_path = os.path.join(images_dir, img_name)
source_img_path = os.path.join(os.path.dirname(html_file_path), img_src)
if os.path.exists(source_img_path):
copyfile(source_img_path, img_path)
else:
print(f"Image not found: {source_img_path}")
markdown_content = markdown_content.replace(img_src, os.path.join('..', img_path))
return markdown_content
else:
return ""
# Process each HTML file in the Keep folder
for root, dirs, files in os.walk(takeout_dir):
for file in files:
if file.endswith('.html'):
# Process files and folders with Chinese names correctly
print(f'Processing {file}...')
markdown_content = process_html_file(os.path.join(root, file))
if markdown_content.strip():
markdown_file_path = os.path.join(markdown_dir, os.path.splitext(file)[0] + '.md')
with open(markdown_file_path, 'w', encoding='utf-8') as md_file:
md_file.write(markdown_content)
else:
print(f"No visible content found in {file}, skipping.")