Skip to content

Instantly share code, notes, and snippets.

@almet
Created July 16, 2023 15:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save almet/1999c83414d753c281d0a89f536c8f1d to your computer and use it in GitHub Desktop.
Save almet/1999c83414d753c281d0a89f536c8f1d to your computer and use it in GitHub Desktop.
Obsidian Pelican
"""A pelican plugin to read Obsidian files and import them as pelican Articles.
This reads the tags made with hashtags and render them as pelican tags instead
(they won't be present in the output).
Adds the title in the output, and specify a default dummy date.
"""
from pathlib import Path
from itertools import chain
import os
import re
from pelican import signals
from pelican.readers import MarkdownReader
from pelican.contents import Tag
from pelican.utils import pelican_open
from pelican.utils import get_date, slugify
from markdown import Markdown
ARTICLE_PATHS = {}
FILE_PATHS = {}
link = r'\[\[\s*(?P<filename>[^|\]]+)(\|\s*(?P<linkname>.+))?\]\]'
file_re = re.compile(r'!' + link)
link_re = re.compile(link)
tag_re = re.compile(r'#([\w]+)')
"""
# Test cases
[[my link]]
[[ my work ]]
[[ my work | is finished ]]
![[ a file.jpg ]]
![[file.jpg]]
"""
def get_file_and_linkname(match):
group = match.groupdict()
filename = group['filename'].strip()
linkname = group['linkname'] if group['linkname'] else filename
linkname = linkname.strip()
return filename, linkname
class ObsidianMarkdownReader(MarkdownReader):
"""
Change the format of various links to the accepted case of pelican.
"""
def __init__(self, *args, **kwargs):
super(ObsidianMarkdownReader, self).__init__(*args, **kwargs)
self.settings["MARKDOWN"]["extensions"].append("markdown.extensions.toc")
self.settings["MARKDOWN"]["extensions"].append("sane_lists")
def replace_obsidian_links(self, text):
def link_replacement(match):
filename, linkname = get_file_and_linkname(match)
path = ARTICLE_PATHS.get(filename)
if path:
link_structure = '[{linkname}]({{filename}}{path}{filename}.md)'.format(
linkname=linkname, path=path, filename=filename
)
else:
link_structure = '{linkname}'.format(linkname=linkname)
return link_structure
def file_replacement(match):
filename, linkname = get_file_and_linkname(match)
path = FILE_PATHS.get(filename)
if path:
link_structure = '![{linkname}]({{static}}{path}{filename})'.format(
linkname=linkname, path=path, filename=filename
)
else:
# don't show it at all since it will be broken
link_structure = ''
return link_structure
text = file_re.sub(file_replacement, text)
text = link_re.sub(link_replacement, text)
return text
def replace_tags(self, text):
return re.sub(r'#([\w]+)', r'', text)
def get_tags(self, text):
return re.findall(r'#([\w]+)', text)
def read(self, source_path):
"""Parse content and metadata of markdown files
It also changes the links to the acceptable format for pelican
"""
self._source_path = source_path
self._md = Markdown(**self.settings['MARKDOWN'])
tags = []
with pelican_open(source_path) as text:
text = self.replace_obsidian_links(text)
tags = self.get_tags(text)
text = self.replace_tags(text)
content = self._md.convert(text)
if hasattr(self._md, "Meta"):
metadata = self._parse_metadata(self._md.Meta)
else:
metadata = {}
if tags:
metadata['tags'] = [Tag(tag, self.settings) for tag in tags]
# Add the TOC to the metadata.
if len(self._md.toc) > 300:
metadata["table_of_contents"] = self._md.toc
# Get the title from the first h1
if "title" not in metadata and len(self._md.toc_tokens):
first_title = self._md.toc_tokens[0]
metadata["title"] = first_title["name"]
content = content.replace(
'<h1 id="{id}">{name}</h1>'.format(**first_title), ""
)
# Get the date from the filename, if possible.
parts = os.path.splitext(os.path.basename(source_path))[0].split("-")
metadata["date"] = get_date("2023-01-30")
if "read_on" in metadata:
metadata["date"] = datetime.strptime(metadata["read_on"], "%B %Y")
elif len(parts) >= 3:
metadata["date"] = get_date("-".join(parts[:3]))
if "slug" not in metadata:
metadata["slug"] = slugify(
metadata["title"], self.settings.get("SLUG_REGEX_SUBSTITUTIONS", [])
)
category = os.path.basename(
os.path.abspath(os.path.join(source_path, os.pardir))
)
metadata["category"] = self.process_metadata("category", category)
return content, metadata
def populate_files_and_articles(article_generator):
global ARTICLE_PATHS
global FILE_PATHS
base_path = Path(article_generator.path)
articles = base_path.glob('**/*.md')
for article in articles:
full_path, filename_w_ext = os.path.split(article)
filename, ext = os.path.splitext(filename_w_ext)
path = str(full_path).replace(str(base_path), '') + '/'
ARTICLE_PATHS[filename] = path
globs = [base_path.glob('**/*.{}'.format(ext)) for ext in ['png', 'jpg', 'svg', 'apkg', 'gif']]
files = chain(*globs)
for _file in files:
full_path, filename_w_ext = os.path.split(_file)
path = str(full_path).replace(str(base_path), '') + '/'
FILE_PATHS[filename_w_ext] = path
def modify_reader(article_generator):
populate_files_and_articles(article_generator)
article_generator.readers.readers['md'] = ObsidianMarkdownReader(article_generator.settings)
def modify_metadata(article_generator, metadata):
"""
Modify the tags so we can define the tags as we are used to in obsidian.
"""
for tag in metadata.get('tags', []):
if '#' in tag.name:
tag.name = tag.name.replace('#', '')
def register():
signals.article_generator_context.connect(modify_metadata)
signals.article_generator_init.connect(modify_reader)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment