Last active
January 13, 2023 13:02
-
-
Save cs-qyzhang/9ae9f68f91e6c853ce6911f07eddf168 to your computer and use it in GitHub Desktop.
Transform Obsidian notes to Jekyll posts, see https://jianyue.tech/posts/obsidian-to-jekyll/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: UTF-8 -*- | |
# obsidian notes -> jekyll posts | |
# | |
# python obsidian_to_jekyll.py --help | |
# python obsidian_to_jekyll.py -w -c -p | |
# python obsidian_to_jekyll.py --print | |
import argparse | |
import os | |
import pathlib | |
import sys | |
import time | |
import re | |
import frontmatter # pip install python-frontmatter | |
from git import Repo # pip install GitPython | |
from markdown_it import MarkdownIt # pip install markdown-it-py[linkify,plugins] | |
from markdown_it.tree import SyntaxTreeNode | |
import mdit_py_plugins.front_matter as md_frontmatter | |
import yaml # pip install PyYAML | |
post_subdir = "_posts" # Jekyll posts directory | |
def eprint(*args, **kwargs): | |
"""error print""" | |
print('\033[93m', file=sys.stderr, end='') | |
print(*args, file=sys.stderr, **kwargs, end='') | |
print('\033[0m', file=sys.stderr) | |
class Note: | |
"""Obsidian Note Class""" | |
def __init__(self, nodes, vault_path): | |
""" | |
:param nodes are corresponding markdown-it-py nodes in Posts.md | |
:param vault_path is Obsidian vault path | |
""" | |
self.name = '' | |
self.file = '' | |
self.code = None | |
self.post_code = None | |
self.frontmatter = None | |
self.vault_path = vault_path | |
self.parse(nodes) | |
if self.file == '': | |
eprint("CANNOT GET FILE NAME!!!") | |
exit(-1) | |
self.read_content() | |
self.set_frontmatter() | |
self.exec_code() | |
def __str__(self): | |
return f"Note(name={self.name},file={self.file},frontmatter={self.frontmatter},code={self.code})" | |
def find_file(self): | |
"""find md file of wikilink""" | |
if '/' in self.name: | |
glob_path = self.name + ".md" | |
else: | |
glob_path = "**/" + self.name + ".md" | |
paths = sorted(pathlib.Path(self.vault_path).glob(glob_path), | |
key=lambda p: len(str(p))) | |
if len(paths) >= 1: | |
return paths[0].absolute() | |
else: | |
eprint(f"POST {self.name} NOT FOUND!!!") | |
exit(-1) | |
def parse(self, nodes): | |
"""parse markdown-it-py nodes""" | |
for node in nodes: | |
if node.type == "heading": | |
self.name = node.children[0].content[2:-2] | |
self.file = self.find_file() | |
elif node.type == "fence" and node.info.lower() == "yaml": | |
self.frontmatter = yaml.load(node.content, yaml.Loader) | |
elif node.type == "fence" and node.info.lower() == "python": | |
# the python code normally is executed before content | |
# process (in Post class). if the first line of code | |
# is `# post`, then the code will be executed after | |
# content process. | |
if not self.code and not node.content.startswith("# post"): | |
self.code = node.content | |
else: | |
self.post_code = node.content | |
else: | |
eprint(f"UNKNOWN NODE TYPE {node.type} in {self.name}!!!") | |
exit(-1) | |
def read_content(self): | |
"""read frontmatter and content of note""" | |
with open(self.file, 'r', encoding='utf-8') as f: | |
metadata, content = frontmatter.parse(f.read()) | |
self.content = content | |
if not self.frontmatter: | |
self.frontmatter = metadata | |
elif metadata: | |
eprint(f"FILE '{self.name}' HAVE FRONTMATTER!!! " | |
"USE FRONTMATTER IN Post.md INSTEAD") | |
def set_frontmatter(self): | |
"""set note's create date and modified date in frontmatter""" | |
def format_time(t): | |
return time.strftime("%Y-%m-%d %H:%M:%S +0800", t) | |
def same_day(a, b): | |
return a.tm_year == b.tm_year and a.tm_yday == b.tm_yday | |
ctime = time.localtime(os.path.getctime(self.file)) | |
mtime = time.localtime(os.path.getmtime(self.file)) | |
if 'date' not in self.frontmatter: | |
self.frontmatter['date'] = format_time(ctime) | |
# Chirpy theme key | |
if not same_day(ctime, mtime) and 'last_modified_at' not in self.frontmatter: | |
self.frontmatter['last_modified_at'] = format_time(mtime) | |
def exec_code(self): | |
"""execute python code in Posts.md""" | |
if not self.code: | |
return | |
ldict = {} | |
content = self.content | |
exec(self.code, {'content': content}, ldict) | |
self.content = ldict['content'] | |
def render(self) -> str: | |
metadata = yaml.dump(self.frontmatter,encoding='utf-8', | |
allow_unicode=True).decode() | |
return f"---\n{metadata}---\n\n{self.content}" | |
class Post: | |
"""Jekyll Post Class""" | |
def __init__(self, blog_path: str, /, note=None, file=None): | |
""" | |
:param blog_path path of local jekyll repository | |
:param note obsidian note object, used to construct post | |
from obsidian | |
:param file existing jekyll post file path, used to construct | |
post from old jekyll post | |
""" | |
if note and file: | |
eprint("Post.__init__ CAN ONLY HAVE NOTE OR FILE!!!") | |
if not note and not file: | |
eprint("Post.__init__ DON'T HAVE NOTE OR FILE!!!") | |
self.blog_path = blog_path | |
if note: | |
self.frontmatter = note.frontmatter | |
self.content = note.content | |
self.code = note.post_code | |
date_part = note.frontmatter['date'][:10] | |
name_part = '-'.join(note.name.lower().split(' ')) | |
self.file = f"{date_part}-{name_part}.md" | |
else: | |
self.code = None | |
self.file = file | |
path = f"{blog_path}/{post_subdir}/{file}" | |
with open(path, 'r', encoding='utf-8') as f: | |
self.frontmatter, self.content = frontmatter.parse(f.read()) | |
self.full_path = f"{self.blog_path}/{post_subdir}/{self.file}" | |
self.process_image() | |
self.process_callouts() | |
self.process_urls() | |
self.exec_code() | |
def __str__(self): | |
return f"Post(file={self.file},frontmatter={self.frontmatter})" | |
def set_image_size(self): | |
"""syntax: ![alt text|100](xxx.png) or ![alt text|100x100](xxx.png)""" | |
def get_image_size(alt): | |
idx = alt.rfind('|') | |
if idx != -1: | |
m = re.fullmatch(r"(\d+)(?:x(\d+))?", alt[idx+1:]) | |
if not m: | |
# is caption | |
return alt, 0, 0 | |
width = int(m.group(1)) | |
height = 0 if not m.group(2) else int(m.group(2)) | |
return alt[:idx], width, height | |
else: | |
return alt, 0, 0 | |
lines = self.content.splitlines() | |
for i in range(len(lines)): | |
imgs = re.finditer(r"!\[(.*)\]\((.+)\)", lines[i]) | |
pos = 0 | |
newline = "" | |
for img in imgs: | |
img_alt, img_width, img_height = get_image_size(img.group(1)) | |
markups = [] | |
if img_width: | |
markups.append(f'width="{img_width}"') | |
if img_height: | |
markups.append(f'height="{img_height}"') | |
if img.start() != 0 or img.end() != len(lines[i]): | |
# inline image | |
markups.append(".normal") | |
img_markup = f'![{img_alt}]({img.group(2)})' | |
if markups: | |
img_markup += "{: " + ' '.join(markups) + " }" | |
newline += lines[i][pos:img.start()] | |
newline += img_markup | |
pos = img.end() | |
newline += lines[i][pos:] | |
lines[i] = newline | |
self.content = '\n'.join(lines) | |
def set_image_caption(self): | |
"""syntax: ![alt text|caption](xxx.png) | |
called after set_image_size() | |
""" | |
def get_caption(alt): | |
idx = alt.rfind('|') | |
cap = '' | |
if idx != -1: | |
cap = alt[idx+1:] | |
alt = alt[:idx] | |
return alt, cap | |
capline = [] | |
newlines = [] | |
for line in self.content.splitlines(): | |
imgs = re.finditer(r"!\[(.*)\]\((.+)\)(?:{[^}]*})?", line) | |
newline = "" | |
pos = 0 | |
caption = "" | |
for img in imgs: | |
img_alt, caption = get_caption(img.group(1)) | |
if caption: | |
if img.start() != 0 or img.end() != len(line): | |
# inline image cannot have caption | |
caption = '' | |
# remove caption from alt text | |
newline += line[pos:img.start(1)] + img_alt | |
pos = img.end(1) | |
newlines.append(newline + line[pos:]) | |
if caption: | |
newlines.append(f"_{caption}_") | |
self.content = '\n'.join(newlines) | |
def process_image(self): | |
# set size first | |
self.set_image_size() | |
self.set_image_caption() | |
def process_callouts(self): | |
"""obsidian callouts to chirpy prompts""" | |
cur_type = '' | |
newlines = [] | |
for line in self.content.splitlines(): | |
if cur_type and not line.strip().startswith('>'): | |
newlines.append(f"{{: .prompt-{cur_type} }}") | |
cur_type = '' | |
newlines.append(line) | |
continue | |
m = re.fullmatch(r"> \[!(warning|tip|danger|info)\]", | |
line.strip().lower()) | |
if m: | |
cur_type = m.group(1) | |
else: | |
newlines.append(line) | |
if cur_type: | |
newlines.append(f"{{: .prompt-{cur_type} }}") | |
self.content = '\n'.join(newlines) | |
def process_urls(self): | |
"""replace | in url text to html code | because jekyll's bug""" | |
def process_title(title): | |
return title.replace('|', '|') | |
def process_zotero_url(url): | |
if url.startswith('zotero://'): | |
eprint("ZOTERO LINK IN ", self.file, url, "!!!") | |
lines = self.content.splitlines() | |
new_lines = [] | |
for i in range(len(lines)): | |
# include image alt | |
urls = re.finditer(r"\[(.*?)\]\((.*?)\)", lines[i]) | |
newline = "" | |
pos = 0 | |
for url in urls: | |
newline += lines[i][pos:url.start(1)] + process_title(url.group(1)) | |
process_zotero_url(url.group(2)) | |
pos = url.end(1) | |
lines[i] = newline + lines[i][pos:] | |
self.content = '\n'.join(lines) | |
def exec_code(self): | |
"""execute python code in Posts.md""" | |
if not self.code: | |
return | |
ldict = {} | |
content = self.content | |
exec(self.code, {'content': content}, ldict) | |
self.content = ldict['content'] | |
def render(self) -> str: | |
metadata = yaml.dump(self.frontmatter,encoding='utf-8', | |
allow_unicode=True).decode() | |
return f"---\n{metadata}---\n\n{self.content}" | |
def dump(self): | |
with open(self.full_path, 'w', encoding='utf-8') as f: | |
f.write(self.render()) | |
############################################# | |
post_file = r"<obsidian Posts.md path>" | |
vault_path = r"<obsidian vault path>" | |
blog_path = r"<jekyll blog path>" | |
parser = argparse.ArgumentParser(description='Transform obsidian notes to jekyll posts') | |
parser.add_argument('-w', '--write', | |
help='Write posts file', | |
action='store_true') | |
parser.add_argument('-c', '--commit', | |
help='Git commit', | |
action='store_true') | |
parser.add_argument('-p', '--push', | |
help='Git push', | |
action='store_true') | |
parser.add_argument('--print', | |
help='Print rendered posts', | |
action='store_true') | |
parser.add_argument('-f', '--force', | |
help='Force write post files', | |
action='store_true') | |
parser.add_argument('--commit_msg', | |
action='store') | |
args = parser.parse_args() | |
f = open(post_file, encoding='utf-8') | |
text = f.read() | |
md = ( | |
MarkdownIt("commonmark") | |
.use(md_frontmatter.front_matter_plugin) | |
.enable(["table","list"]) | |
) | |
tokens = md.parse(text) | |
root = SyntaxTreeNode(tokens) | |
# parse posts.md | |
nodes = [] | |
notes = [] | |
for node in root.children: | |
if node.type == 'front_matter': | |
continue | |
if node.type == "heading": | |
if len(nodes) > 0: | |
notes.append(Note(nodes, vault_path)) | |
nodes.clear() | |
nodes.append(node) | |
if len(nodes) > 0: | |
notes.append(Note(nodes, vault_path)) | |
# check post update/add and write post file | |
modified_posts = [] | |
newly_added_posts = [] | |
for note in notes: | |
new_post = Post(blog_path, note=note) | |
if args.print: | |
print(f"---------- {new_post.file} BEGIN ----------") | |
print(new_post.render()) | |
print(f"---------- {new_post.file} END ----------") | |
if pathlib.Path(new_post.full_path).is_file(): | |
old_post = Post(blog_path, file=new_post.file) | |
if old_post.frontmatter == new_post.frontmatter: | |
if not args.force: | |
# content assumes the same since last_modified_at is equal | |
continue | |
else: | |
modified_posts.append(new_post) | |
else: | |
newly_added_posts.append(new_post) | |
if args.write: | |
print(f"writing {new_post.file}...") | |
new_post.dump() | |
# commit git repository | |
if args.commit: | |
changed_posts = newly_added_posts + modified_posts | |
if len(changed_posts) > 0 or args.commit_msg: | |
repo = Repo(blog_path) | |
repo.git.add(all=True) | |
modified = ','.join([p.file[:-3] for p in modified_posts]) | |
added = ','.join([p.file[:-3] for p in newly_added_posts]) | |
commit_msg = "" | |
if len(modified_posts) > 0: | |
commit_msg += f"Modified posts: {modified}." | |
if len(newly_added_posts) > 0: | |
if commit_msg: | |
commit_msg += " " | |
commit_msg += f"Newly added posts: {added}." | |
if args.commit_msg: | |
if commit_msg: | |
commit_msg += " " | |
commit_msg += f"{args.commit_msg}" | |
print(commit_msg) | |
repo.index.commit(commit_msg) | |
if args.push: | |
for remote in repo.remotes: | |
remote.push() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment