Skip to content

Instantly share code, notes, and snippets.

@james2doyle
Created April 17, 2023 19:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save james2doyle/95657149dfd386db2cfd4abb7e0e69f4 to your computer and use it in GitHub Desktop.
Save james2doyle/95657149dfd386db2cfd4abb7e0e69f4 to your computer and use it in GitHub Desktop.
Linkoln parses wikilinks out of a markdown document, and searches the world wide web to find a hyperlink for each one. https://animaomnium.github.io/keep-stuff-linkable/
#!/usr/bin/python3
# Linkoln by Anima Omnium
# Dedicated to the Public Domain
# https://animaomnium.github.io/keep-stuff-linkable/
# [[programming language:Rust]] is a [[systems programming language]] bootstrapped from [[rust prehistory|OCaml]].
# [Rust][1] is a [systems programming language][2] bootstrapped from [OCaml][3].
# [1]: https://www.rust-lang.org
# [2]: https://en.wikipedia.org/wiki/System_programming_language
# [3]: https://github.com/graydon/rust-prehistory
# Just standard library for portability
import sys
import urllib.request
import time
# Input from file, output to stdout
# Suggested usage:
# python linkoln.py INPUT.md > OUTPUT.md
# Read input file name
if len(sys.argv) != 2:
print("Usage: linkoln FILE")
exit(1)
# Read file
FILE = sys.argv[1]
with open(FILE, "r") as fin:
INPUT = fin.read()
# Link numbering start
OFFSET = 1
# Ignore wikilinks in code, headings, frontmatter
IGNORE = [
("```", "```"),
("#", "\n"),
("`", "`"),
("+++", "+++"),
]
# Syntax for links
LINK_OPEN = "[["
LINK_CLOSE = "]]"
LINK_QUERY = "|"
LINK_CONTEXT = ":"
# Parser state enum
S_IGNORE = 0
S_SCANIN = 1
S_EATING = 2
# Initialize parser
state = S_SCANIN
rem = INPUT
closing = ""
inside = ""
colophon = []
# Skip amt chars
def skip(r, amt):
return r[amt:]
# Skip amt, echo what was skipped
def eat(r, amt):
print(r[:amt], end="")
return skip(r, amt)
# Check r prefix matches against
def check(r, against):
return r[:len(against)] == against
# Parse inside wikilink
def extract(inside):
(link, text) = (inside, inside)
if LINK_QUERY in inside:
(link, text) = inside.split(LINK_QUERY)
elif LINK_CONTEXT in inside:
(link, text) = inside.split(LINK_CONTEXT)
link = f"{link} {text}"
return (link, text)
# Echo formatted link
def emit_link(entry):
(num, inside) = entry
(_, inside) = extract(inside)
print(f"[{inside}][{num}]", end="")
# Echo formatted link reference
def emit_entry(entry):
(num, inside) = entry
(inside, _) = extract(inside)
inside = google_it(inside)
print(f"[{num}]: {inside}")
# Locate link matching given query
def google_it(query):
# Dumbest most fragile hack ever
quoted = urllib.parse.quote(query, safe='')
# Don't hammer friends at DuckDuckGo
time.sleep(0.5)
try:
contents = urllib.request.urlopen(f"https://lite.duckduckgo.com/lite/search&q={quoted}").read()
# Parsing html is easy
top_result = contents.split(b"link-text")[1]
top_link = top_result.split(b">")[1].split(b"<")[0]
except:
# Leave for human to fix
return f"ERROR: {query}"
return "https://" + top_link.decode("utf-8")
# State machine driving loop
while rem != "":
# Scanning for next link or comment
if state == S_SCANIN:
for (open, close) in IGNORE:
try:
if check(rem, open):
rem = eat(rem, len(open))
closing = close
state = S_IGNORE
break
except:
pass
if state == S_IGNORE:
continue
try:
if check(rem, LINK_OPEN):
rem = skip(rem, len(LINK_OPEN))
inside = ""
state = S_EATING
continue
except:
pass
rem = eat(rem, 1)
# Eating contents of wikilink
elif state == S_EATING:
if check(rem, LINK_CLOSE):
rem = skip(rem, len(LINK_CLOSE))
entry = (len(colophon) + OFFSET, inside)
emit_link(entry)
colophon.append(entry)
state = S_SCANIN
else:
inside = inside + rem[:1]
rem = skip(rem, 1)
# Ignoring contents of comments
elif state == S_IGNORE:
if check(rem, closing):
rem = eat(rem, len(closing))
state = S_SCANIN
else:
rem = eat(rem, 1)
# Frick your computer is on fire
else:
assert false, "Invalid state"
# Google all the queries
print()
for entry in colophon:
emit_entry(entry)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment