Skip to content

Instantly share code, notes, and snippets.

@AyeGill
Created April 11, 2020 07:35
Show Gist options
  • Save AyeGill/72744b0e54ebf898268beb8c992cabd5 to your computer and use it in GitHub Desktop.
Save AyeGill/72744b0e54ebf898268beb8c992cabd5 to your computer and use it in GitHub Desktop.
Fix roam files exported to org
#!/usr/bin/env python
import urllib as ul
import re
import sys
import os.path
#Pandoc turns this: [foo]([[My Foo Page]])
#Into this: [[file:%5B%5BMy%20Foo%20Page%5B%5B][foo]]
#We want this: [[file:My Foo Page.org][foo]]
#So we process and "un-escape the escaped characters".
#Then we add .org to links and remove the superfluous brackets
#This may break some links with %s in them - sorry.
#Also breaks file links to non-org files.
slink_re = r"\[\[([^\[]*?)\]\]" #unaliased link
alink_re = r"\[\[file\:(.*?)\]\[(.*?)\]\]"
def slinkfixr(matchobj):
if matchobj.group(1)[:4] == "http":
return matchobj.group(0) #the entire thing
return "[[file:" + matchobj.group(1) + "][" + matchobj.group(1) + "]]"
#No need to "unquote" these.
#We will add the ".org" in the next step, so don't do it here.
def alinkfixr(matchobj):
return "[[file:" + ul.unquote(matchobj.group(1)).strip("[]") + ".org][" + matchobj.group(2) + "]]"
def fix_links(text):
t1 = re.sub(slink_re, slinkfixr, text)
t2 = re.sub(alink_re, alinkfixr, t1)
return t2
def run(filename):
f = open(filename, "r+")
old = f.read()
new = fix_links(old)
print(new)
f.seek(0)
f.write(new)
def dryrun(filename):
f = open(filename, "r")
old = f.read()
new = fix_links(old)
print(new)
for filename in sys.argv[1:]:
if os.path.isfile(filename):
run(filename)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment