Skip to content

Instantly share code, notes, and snippets.

@bram-dingelstad
Created November 5, 2022 16:28
Show Gist options
  • Save bram-dingelstad/e9594edcf7ce72213db211aa6a7f9d65 to your computer and use it in GitHub Desktop.
Save bram-dingelstad/e9594edcf7ce72213db211aa6a7f9d65 to your computer and use it in GitHub Desktop.
Import Markdown pages (e.g Hugo) into Notion
import os
import re
import json
from pprint import pprint
from subprocess import Popen, PIPE, STDOUT
from notion_client import Client
DATABASE_ID = os.getenv('DATABASE_ID') #NOTE: ID of your notion database you want to import into
NOTION_API_KEY = os.getenv('NOTION_API_KEY')
notion = Client(auth=NOTION_API_KEY)
def get_meta(name, content):
return re.search(r'^' + name + r':(.*)$', content, re.MULTILINE).group(1).strip().strip('"').replace('\\', '')
# NOTE: You might want to extend this to more types (e.g `select` or `number`) in case you need to
class Notion:
def title(content):
return { "title": [{ "text": { "content": content } }] }
def rich_text(content):
return { "rich_text": [{ "text": { "content": content } }] }
def date(content):
return { "date": { "start": content } }
def multi_select(array):
return { "multi_select": list(map(lambda item: { "name": item }, array)) }
# NOTE: Make sure that your current working directory is close to your markdown articles
for article in os.listdir():
if not os.path.isdir(f'./{article}'):
continue
# NOTE: My setup has article-name/index.md, so edit this to whatever your setup is
file = open(f'./{article}/index.md', 'r')
content = ''.join(file.readlines())
file.close()
title = get_meta('title', content)
subtitle = get_meta('subtitle', content)
date = re.sub(r'T.*$', '', get_meta('date', content))
tags = [
str.strip(string) for string in get_meta('tags', content) \
.replace('[', '') \
.replace(']', '') \
.split(',') if string
]
# NOTE: I had a custom property for cover image, you might not need this
cover = re.search(r'^.*image:(.*)$', content, re.MULTILINE).group(1).strip().strip('"').replace('\\', '')
if not 'http' in cover:
cover = ''
content = re.sub(r'^---(\n.+)+(?!---)', '', content, 1).strip()
# NOTE: We use an npm/node library to convert our markdown into the JSON format we need. I could've ported it, but didn't feel like it.
# NOTE: Update this to wherever your `node` binary lives and where you put the markdown-to-notion.js
p = Popen(['/usr/local/bin/node', './markdown-to-notion.js'], stdout=PIPE, stdin=PIPE, stderr=PIPE)
data = p.communicate(input=content.encode())[0]
blocks = json.loads(data)
notion.pages.create(
**{
"parent": {
"type": "database_id",
"database_id": DATABASE_ID
},
"cover": {
"type": "external",
"external": {
"url": cover
}
} if cover != "" else None,
# NOTE: Make sure that these properties correspond in name and type with your database, edit otherwise
"properties": {
"Title": Notion.title(title),
"Subtitle": Notion.rich_text(subtitle),
"Slug": Notion.rich_text(article),
"Date": Notion.date(date),
"Tags": Notion.multi_select(tags)
},
"children": blocks
}
)
const {markdownToBlocks, markdownToRichText} = require('@tryfabric/martian');
var string = require('fs').readFileSync(0, 'utf-8');
console.log(JSON.stringify(markdownToBlocks(string), null, 4))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment