Skip to content

Instantly share code, notes, and snippets.

@zackmdavis
Last active June 30, 2020 03:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zackmdavis/a0eb3826456cf9d59d4e82b2629b8fd6 to your computer and use it in GitHub Desktop.
Save zackmdavis/a0eb3826456cf9d59d4e82b2629b8fd6 to your computer and use it in GitHub Desktop.
replace links to Slate Star Codex posts with the last Internet Archive Wayback Machine version
import os
import re
import requests
import sys
slate_sturl_regex = re.compile(r"https?://slatestarcodex.com/\d{4}/\d{2}/\d{2}/[-a-z0-9]+/")
def slate_starchive_post_content(content):
slate_star_links = slate_sturl_regex.finditer(content)
revised = content
for match in slate_star_links:
link_url = match.group()
archive_response = requests.get(
"http://archive.org/wayback/available?url={}".format(link_url)
)
try:
archive_url = archive_response.json()['archived_snapshots']['closest']['url']
except KeyError:
print(
"didn't successfully get an archive link for {}: {}".format(
link_url, archive_response.text
)
)
else:
print("replacing \033[93m{}\033[0m with \033[92m{}\033[0m".format(link_url, archive_url))
revised = revised.replace(link_url, archive_url)
return revised
def tree(root):
for path, _dirs, filenames in os.walk(root):
for filename in filenames:
if not filename.endswith(".md"):
continue
filepath = os.path.join(path, filename)
with open(filepath) as f:
print("examining {}".format(filepath))
content = f.read()
revised = slate_starchive_post_content(content)
if revised != content:
with open(filepath, 'w') as g:
print("revising {}".format(filepath))
g.write(revised)
if __name__ == "__main__":
tree(sys.argv[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment