Skip to content

Instantly share code, notes, and snippets.

@j2kun
Created May 13, 2024 03:48
Show Gist options
  • Save j2kun/00804c6e893ee1fdbaee196e100d650d to your computer and use it in GitHub Desktop.
Save j2kun/00804c6e893ee1fdbaee196e100d650d to your computer and use it in GitHub Desktop.
import os
import pathlib
import pprint
import re
import subprocess
import fire
import marko
from mastodon import Mastodon
FILES_TO_IGNORE = set(
[
"_index.md",
]
)
BLOG_URL_BASE = "https://www.jeremykun.com"
# A simple text file with two urls per line
DATABASE_FILE = "scripts/published_toots.txt"
INLINE_MATH_DOLLAR_REGEX = re.compile(r"\$(.*?)\$")
def getGitRoot():
return (
subprocess.Popen(
["git", "rev-parse", "--show-toplevel"],
stdout=subprocess.PIPE,
)
.communicate()[0]
.rstrip()
.decode("utf-8")
)
def canonical_url(filename: str) -> str:
# Note: using the url metadata inside the markdown file itself is not
# supported, we just assume no special url is set.
stripped_filename = filename.rstrip(".md")
return f"{BLOG_URL_BASE}/shortform/{stripped_filename}/"
def convert_paragraph(node):
if node.get_type() != "Paragraph":
raise ValueError(f"Invalid input node of type {node.get_type()}")
toot_str = ""
for child in node.children:
match child.get_type():
case "LineBreak":
toot_str += " "
case "RawText":
# in this case, child.children is a single string, despite the
# name "children".
text = child.children
# Convert to mathstodon-compatible inline mathmode
text = INLINE_MATH_DOLLAR_REGEX.sub(r"\\(\1\\)", text)
toot_str += text
case "Link":
if child.dest.startswith("http"):
url = child.dest
elif child.dest.startswith("/"):
url = f"{BLOG_URL_BASE}{child.dest}"
else:
raise ValueError(f"Unsupported link destination f{child.dest}")
assert len(child.children) == 1
link_text = child.children[0].children
toot_str += f"{link_text} ({url})"
case "CodeSpan":
toot_str += f"`{child.children}`"
case _:
raise ValueError(f"Unsupported paragraph node type: {child.get_type()}")
return toot_str
def convert_code_block(node, post_permalink):
# Code blocks make for bad toots, so just omit the actual code and link to
# the post.
if node.get_type() != "FencedCode":
raise ValueError(f"Invalid input node of type {node.get_type()}")
return f"(Code omitted for brevity. See: {post_permalink})"
def convert_post_to_thread(content, post_permalink):
md = marko.Markdown()
doc = md.parse(content)
toots = []
in_metadata = False
for child in doc.children:
# skip over hugo frontmatter, which uses ------ to demarcate it, and
# marko parses this as a ThematicBreak.
if child.get_type() == "ThematicBreak":
in_metadata = not in_metadata
continue
if in_metadata:
continue
match child.get_type():
case "LineBreak":
continue
case "BlankLine":
continue
case "Paragraph":
toots.append(convert_paragraph(child))
case "FencedCode":
toots.append(convert_code_block(child, post_permalink))
case _:
raise ValueError(
f"Unsupported doc node type {child.get_type()}: {child}"
)
return toots
def load_database(path):
if not os.path.exists(path):
return {}
mapping = {}
with open(path, "r") as infile:
for line in infile:
blog_url, mastodon_url = line.strip().split()
mapping[blog_url] = mastodon_url
return mapping
def dump_database(mapping, path):
with open(path, "w") as outfile:
for blog_url, mastodon_url in mapping.items():
outfile.write(f"{blog_url} {mastodon_url}\n")
def publish_to_mastodon(mastodon_client=None):
"""Idempotently publish all shortform posts to mastodon."""
if mastodon_client is None:
# File generated by scripts/login_with_mastodon.py or else set in
# environment for headless usage in GH actions.
mastodon_client = Mastodon(
api_base_url="https://mathstodon.xyz",
access_token=os.getenv(
"MASTODON_TOKEN", "scripts/jeremykun_tootbot_usercred.secret"
),
)
git_root = pathlib.Path(getGitRoot())
if not os.path.isdir(git_root / ".git"):
raise RuntimeError(f"Could not find git root, looked at {git_root}")
print(f"Found {git_root=}")
shortform_path = git_root / "content" / "shortform"
if not os.path.isdir(shortform_path):
raise ValueError(f"Could not find shortform_path at {shortform_path}")
posts_to_try = set(
[x for x in os.listdir(shortform_path) if x not in FILES_TO_IGNORE]
)
print(f"{posts_to_try=}")
# dict mapping Blog URL to first post url in published Mastodon thread.
database_path = git_root / DATABASE_FILE
published_toots = load_database(database_path)
print("Existing toots:")
pprint.pp(published_toots)
try:
for filename in posts_to_try:
print(f"Processing {filename}")
blog_post_permalink = canonical_url(filename)
if blog_post_permalink in published_toots:
print(
f"{filename} has existing toot thread at "
f"{published_toots[blog_post_permalink]}, skipping."
)
continue
with open(shortform_path / filename, "r") as infile:
toots = convert_post_to_thread(infile.read(), blog_post_permalink)
# Add a backlink to the end of the first toot in the thread
toots[0] += f"\n\nArchived at: {blog_post_permalink}"
# a debug print of the toots about to be posted
print(f"Printing toot thread for {filename}:\n----------------------")
for i, toot in enumerate(toots):
print(f"\n{i}.\t{toot}")
print("\n----------------------\n")
print(f"Publishing toot thread for {filename}")
# TODO: delete thread if later toots fail to post
toots_for_post = []
for i, toot in enumerate(toots):
reply_id = toots_for_post[-1]["id"] if len(toots_for_post) > 0 else None
status_dict = mastodon_client.status_post(toot, in_reply_to_id=reply_id)
print(
f"Successfully posted toot {i} of the thread: "
f"{status_dict['id']} -> {status_dict['url']}"
)
toots_for_post.append(status_dict)
# All toots posted successfully
published_toots[blog_post_permalink] = toots_for_post[0]["url"]
finally:
print("Writing successful toot URLs to disk")
dump_database(published_toots, database_path)
if __name__ == "__main__":
fire.Fire(publish_to_mastodon)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment