Skip to content

Instantly share code, notes, and snippets.

@BjornFJohansson
Last active December 19, 2023 06:44
Show Gist options
  • Save BjornFJohansson/c340f1040c3f722513901a52a0fc1d5c to your computer and use it in GitHub Desktop.
Save BjornFJohansson/c340f1040c3f722513901a52a0fc1d5c to your computer and use it in GitHub Desktop.
from pathlib import Path
import urllib
import re, os
from urllib.parse import urlparse
from string import punctuation
pages = sorted(Path('.').glob('*.md'))
# Sometimes wp creates pages with ~ (tilde). Rename these
tildepages = [p for p in pages if "~" in str(p)]
for tildepage in tildepages:
a, b = str(tildepage).split("~", maxsplit=1)
np = Path(f"{a}.md")
if not np.exists():
tildepage.rename(np)
# Some wp files have unicode and some punctuation quoted.
# subpages are renamed with a pipe | character
for page in pages:
uq = urllib.parse.unquote(str(page), encoding='cp1252')
if not str(page) == uq:
nm = uq.replace("/", "|")
page.rename(nm)
# Strip page name from first line, remove if only punctuation remains
i = 0
for page in pages:
firstline, *rest = page.read_text(encoding='utf-8')[1:].splitlines()
newfirstline = firstline.strip("# ").replace(page.stem, "")
if not set(newfirstline) - set(punctuation):
newfirstline = ""
if firstline != newfirstline:
newpagetext = newfirstline + "\n" + "\n".join(rest)
page.write_text(newpagetext)
i+=1
# 7175 pages
# remove [alias:...] and replace with obsidian alias:
# https://help.obsidian.md/Linking+notes+and+files/Aliases
i = 0
regxal = re.compile(r"(?:\[)alias:(.+)(?:\])")
for page in pages[1:]:
pagetext = page.read_text(encoding='utf-8')[1:]
matchobj = re.search(regxal, pagetext)
if matchobj:
aliases = matchobj.group(1).strip().split("; ")
nb = f"---\naliases: {', '.join(aliases)}\n---\n\n"
newpagetext = nb + pagetext[:matchobj.start()]+pagetext[matchobj.end():]
page.write_text(newpagetext)
i+=1
# Replace absolute file links with obsidian md style links:
# file:/home/bjorn/Desktop/mecwiki/yeast-colony-pcr.md
# [file:/home/bjorn/Desktop/mecwiki/yeast-colony-pcr.md]
# >>>====>
# [yeast-colony-pcr.md](<file:/home/bjorn/Desktop/mecwiki/yeast-colony-pcr.md>)
# This could be improved to handle images by prepending a ! for some links
regxfl = re.compile(r"(?:\[?)(file:/[^\s\]]+)(?:\]|\s)")
i=0
def repl_file_links(matchobj):
url = matchobj.group(1)
fn = os.path.basename(url)
return f"[{fn}](<{url}>)"
for page in pages:
pagetext = page.read_text(encoding='utf-8').lstrip("\ufeff")
newpagetext, r = re.subn(regxfl, repl_file_links, pagetext)
if r:
page.write_text(newpagetext)
i+=1
# Search and replace all *defined* CamelCase and [wikiwords]
pregs = [re.compile(f"(?:\[)({p.stem})(?:\])") for p in pages]
from tqdm import tqdm
for page in tqdm(pages):
pagetext = page.read_text(encoding='utf-8')
for preg in pregs:
pagetext = re.sub(preg, r"[[\1]]" , pagetext)
page.write_text(pagetext)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment