Skip to content

Instantly share code, notes, and snippets.

@cs-qyzhang
Last active January 13, 2023 13:02
Show Gist options
  • Save cs-qyzhang/9ae9f68f91e6c853ce6911f07eddf168 to your computer and use it in GitHub Desktop.
Save cs-qyzhang/9ae9f68f91e6c853ce6911f07eddf168 to your computer and use it in GitHub Desktop.
Transform Obsidian notes to Jekyll posts, see https://jianyue.tech/posts/obsidian-to-jekyll/
# -*- coding: UTF-8 -*-
# obsidian notes -> jekyll posts
#
# python obsidian_to_jekyll.py --help
# python obsidian_to_jekyll.py -w -c -p
# python obsidian_to_jekyll.py --print
import argparse
import os
import pathlib
import sys
import time
import re
import frontmatter # pip install python-frontmatter
from git import Repo # pip install GitPython
from markdown_it import MarkdownIt # pip install markdown-it-py[linkify,plugins]
from markdown_it.tree import SyntaxTreeNode
import mdit_py_plugins.front_matter as md_frontmatter
import yaml # pip install PyYAML
post_subdir = "_posts" # Jekyll posts directory
def eprint(*args, **kwargs):
"""error print"""
print('\033[93m', file=sys.stderr, end='')
print(*args, file=sys.stderr, **kwargs, end='')
print('\033[0m', file=sys.stderr)
class Note:
"""Obsidian Note Class"""
def __init__(self, nodes, vault_path):
"""
:param nodes are corresponding markdown-it-py nodes in Posts.md
:param vault_path is Obsidian vault path
"""
self.name = ''
self.file = ''
self.code = None
self.post_code = None
self.frontmatter = None
self.vault_path = vault_path
self.parse(nodes)
if self.file == '':
eprint("CANNOT GET FILE NAME!!!")
exit(-1)
self.read_content()
self.set_frontmatter()
self.exec_code()
def __str__(self):
return f"Note(name={self.name},file={self.file},frontmatter={self.frontmatter},code={self.code})"
def find_file(self):
"""find md file of wikilink"""
if '/' in self.name:
glob_path = self.name + ".md"
else:
glob_path = "**/" + self.name + ".md"
paths = sorted(pathlib.Path(self.vault_path).glob(glob_path),
key=lambda p: len(str(p)))
if len(paths) >= 1:
return paths[0].absolute()
else:
eprint(f"POST {self.name} NOT FOUND!!!")
exit(-1)
def parse(self, nodes):
"""parse markdown-it-py nodes"""
for node in nodes:
if node.type == "heading":
self.name = node.children[0].content[2:-2]
self.file = self.find_file()
elif node.type == "fence" and node.info.lower() == "yaml":
self.frontmatter = yaml.load(node.content, yaml.Loader)
elif node.type == "fence" and node.info.lower() == "python":
# the python code normally is executed before content
# process (in Post class). if the first line of code
# is `# post`, then the code will be executed after
# content process.
if not self.code and not node.content.startswith("# post"):
self.code = node.content
else:
self.post_code = node.content
else:
eprint(f"UNKNOWN NODE TYPE {node.type} in {self.name}!!!")
exit(-1)
def read_content(self):
"""read frontmatter and content of note"""
with open(self.file, 'r', encoding='utf-8') as f:
metadata, content = frontmatter.parse(f.read())
self.content = content
if not self.frontmatter:
self.frontmatter = metadata
elif metadata:
eprint(f"FILE '{self.name}' HAVE FRONTMATTER!!! "
"USE FRONTMATTER IN Post.md INSTEAD")
def set_frontmatter(self):
"""set note's create date and modified date in frontmatter"""
def format_time(t):
return time.strftime("%Y-%m-%d %H:%M:%S +0800", t)
def same_day(a, b):
return a.tm_year == b.tm_year and a.tm_yday == b.tm_yday
ctime = time.localtime(os.path.getctime(self.file))
mtime = time.localtime(os.path.getmtime(self.file))
if 'date' not in self.frontmatter:
self.frontmatter['date'] = format_time(ctime)
# Chirpy theme key
if not same_day(ctime, mtime) and 'last_modified_at' not in self.frontmatter:
self.frontmatter['last_modified_at'] = format_time(mtime)
def exec_code(self):
"""execute python code in Posts.md"""
if not self.code:
return
ldict = {}
content = self.content
exec(self.code, {'content': content}, ldict)
self.content = ldict['content']
def render(self) -> str:
metadata = yaml.dump(self.frontmatter,encoding='utf-8',
allow_unicode=True).decode()
return f"---\n{metadata}---\n\n{self.content}"
class Post:
"""Jekyll Post Class"""
def __init__(self, blog_path: str, /, note=None, file=None):
"""
:param blog_path path of local jekyll repository
:param note obsidian note object, used to construct post
from obsidian
:param file existing jekyll post file path, used to construct
post from old jekyll post
"""
if note and file:
eprint("Post.__init__ CAN ONLY HAVE NOTE OR FILE!!!")
if not note and not file:
eprint("Post.__init__ DON'T HAVE NOTE OR FILE!!!")
self.blog_path = blog_path
if note:
self.frontmatter = note.frontmatter
self.content = note.content
self.code = note.post_code
date_part = note.frontmatter['date'][:10]
name_part = '-'.join(note.name.lower().split(' '))
self.file = f"{date_part}-{name_part}.md"
else:
self.code = None
self.file = file
path = f"{blog_path}/{post_subdir}/{file}"
with open(path, 'r', encoding='utf-8') as f:
self.frontmatter, self.content = frontmatter.parse(f.read())
self.full_path = f"{self.blog_path}/{post_subdir}/{self.file}"
self.process_image()
self.process_callouts()
self.process_urls()
self.exec_code()
def __str__(self):
return f"Post(file={self.file},frontmatter={self.frontmatter})"
def set_image_size(self):
"""syntax: ![alt text|100](xxx.png) or ![alt text|100x100](xxx.png)"""
def get_image_size(alt):
idx = alt.rfind('|')
if idx != -1:
m = re.fullmatch(r"(\d+)(?:x(\d+))?", alt[idx+1:])
if not m:
# is caption
return alt, 0, 0
width = int(m.group(1))
height = 0 if not m.group(2) else int(m.group(2))
return alt[:idx], width, height
else:
return alt, 0, 0
lines = self.content.splitlines()
for i in range(len(lines)):
imgs = re.finditer(r"!\[(.*)\]\((.+)\)", lines[i])
pos = 0
newline = ""
for img in imgs:
img_alt, img_width, img_height = get_image_size(img.group(1))
markups = []
if img_width:
markups.append(f'width="{img_width}"')
if img_height:
markups.append(f'height="{img_height}"')
if img.start() != 0 or img.end() != len(lines[i]):
# inline image
markups.append(".normal")
img_markup = f'![{img_alt}]({img.group(2)})'
if markups:
img_markup += "{: " + ' '.join(markups) + " }"
newline += lines[i][pos:img.start()]
newline += img_markup
pos = img.end()
newline += lines[i][pos:]
lines[i] = newline
self.content = '\n'.join(lines)
def set_image_caption(self):
"""syntax: ![alt text|caption](xxx.png)
called after set_image_size()
"""
def get_caption(alt):
idx = alt.rfind('|')
cap = ''
if idx != -1:
cap = alt[idx+1:]
alt = alt[:idx]
return alt, cap
capline = []
newlines = []
for line in self.content.splitlines():
imgs = re.finditer(r"!\[(.*)\]\((.+)\)(?:{[^}]*})?", line)
newline = ""
pos = 0
caption = ""
for img in imgs:
img_alt, caption = get_caption(img.group(1))
if caption:
if img.start() != 0 or img.end() != len(line):
# inline image cannot have caption
caption = ''
# remove caption from alt text
newline += line[pos:img.start(1)] + img_alt
pos = img.end(1)
newlines.append(newline + line[pos:])
if caption:
newlines.append(f"_{caption}_")
self.content = '\n'.join(newlines)
def process_image(self):
# set size first
self.set_image_size()
self.set_image_caption()
def process_callouts(self):
"""obsidian callouts to chirpy prompts"""
cur_type = ''
newlines = []
for line in self.content.splitlines():
if cur_type and not line.strip().startswith('>'):
newlines.append(f"{{: .prompt-{cur_type} }}")
cur_type = ''
newlines.append(line)
continue
m = re.fullmatch(r"> \[!(warning|tip|danger|info)\]",
line.strip().lower())
if m:
cur_type = m.group(1)
else:
newlines.append(line)
if cur_type:
newlines.append(f"{{: .prompt-{cur_type} }}")
self.content = '\n'.join(newlines)
def process_urls(self):
"""replace | in url text to html code | because jekyll's bug"""
def process_title(title):
return title.replace('|', '|')
def process_zotero_url(url):
if url.startswith('zotero://'):
eprint("ZOTERO LINK IN ", self.file, url, "!!!")
lines = self.content.splitlines()
new_lines = []
for i in range(len(lines)):
# include image alt
urls = re.finditer(r"\[(.*?)\]\((.*?)\)", lines[i])
newline = ""
pos = 0
for url in urls:
newline += lines[i][pos:url.start(1)] + process_title(url.group(1))
process_zotero_url(url.group(2))
pos = url.end(1)
lines[i] = newline + lines[i][pos:]
self.content = '\n'.join(lines)
def exec_code(self):
"""execute python code in Posts.md"""
if not self.code:
return
ldict = {}
content = self.content
exec(self.code, {'content': content}, ldict)
self.content = ldict['content']
def render(self) -> str:
metadata = yaml.dump(self.frontmatter,encoding='utf-8',
allow_unicode=True).decode()
return f"---\n{metadata}---\n\n{self.content}"
def dump(self):
with open(self.full_path, 'w', encoding='utf-8') as f:
f.write(self.render())
#############################################
post_file = r"<obsidian Posts.md path>"
vault_path = r"<obsidian vault path>"
blog_path = r"<jekyll blog path>"
parser = argparse.ArgumentParser(description='Transform obsidian notes to jekyll posts')
parser.add_argument('-w', '--write',
help='Write posts file',
action='store_true')
parser.add_argument('-c', '--commit',
help='Git commit',
action='store_true')
parser.add_argument('-p', '--push',
help='Git push',
action='store_true')
parser.add_argument('--print',
help='Print rendered posts',
action='store_true')
parser.add_argument('-f', '--force',
help='Force write post files',
action='store_true')
parser.add_argument('--commit_msg',
action='store')
args = parser.parse_args()
f = open(post_file, encoding='utf-8')
text = f.read()
md = (
MarkdownIt("commonmark")
.use(md_frontmatter.front_matter_plugin)
.enable(["table","list"])
)
tokens = md.parse(text)
root = SyntaxTreeNode(tokens)
# parse posts.md
nodes = []
notes = []
for node in root.children:
if node.type == 'front_matter':
continue
if node.type == "heading":
if len(nodes) > 0:
notes.append(Note(nodes, vault_path))
nodes.clear()
nodes.append(node)
if len(nodes) > 0:
notes.append(Note(nodes, vault_path))
# check post update/add and write post file
modified_posts = []
newly_added_posts = []
for note in notes:
new_post = Post(blog_path, note=note)
if args.print:
print(f"---------- {new_post.file} BEGIN ----------")
print(new_post.render())
print(f"---------- {new_post.file} END ----------")
if pathlib.Path(new_post.full_path).is_file():
old_post = Post(blog_path, file=new_post.file)
if old_post.frontmatter == new_post.frontmatter:
if not args.force:
# content assumes the same since last_modified_at is equal
continue
else:
modified_posts.append(new_post)
else:
newly_added_posts.append(new_post)
if args.write:
print(f"writing {new_post.file}...")
new_post.dump()
# commit git repository
if args.commit:
changed_posts = newly_added_posts + modified_posts
if len(changed_posts) > 0 or args.commit_msg:
repo = Repo(blog_path)
repo.git.add(all=True)
modified = ','.join([p.file[:-3] for p in modified_posts])
added = ','.join([p.file[:-3] for p in newly_added_posts])
commit_msg = ""
if len(modified_posts) > 0:
commit_msg += f"Modified posts: {modified}."
if len(newly_added_posts) > 0:
if commit_msg:
commit_msg += " "
commit_msg += f"Newly added posts: {added}."
if args.commit_msg:
if commit_msg:
commit_msg += " "
commit_msg += f"{args.commit_msg}"
print(commit_msg)
repo.index.commit(commit_msg)
if args.push:
for remote in repo.remotes:
remote.push()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment