Skip to content

Instantly share code, notes, and snippets.

@raivivek
Forked from wcaleb/wayback.py
Created June 22, 2020 06:01
Show Gist options
  • Save raivivek/0eb22573d6ba4b0b6873889511118621 to your computer and use it in GitHub Desktop.
Save raivivek/0eb22573d6ba4b0b6873889511118621 to your computer and use it in GitHub Desktop.
Pandoc filter to replace Link URLs with Wayback Machine URLs
#!/usr/local/bin/python
# -*- coding: utf-8 -*-
# Usage: pandoc --filter=wayback.py input
# Install pandocfilters and requests with pip before using
# Warning: may take a while to process input with lots of links
# Note: Links that can't be saved to WBM or already point to WBM are left as is
from pandocfilters import toJSONFilter, Link
import requests
base_url = 'http://web.archive.org'
def wayback(k, v, f, m):
''' Take a non-Wayback-Machine URL, save it to Wayback, replace with snapshot URL '''
if k == 'Link':
attrs = v[1]
url = attrs[0]
if base_url in url:
return Link(v[0], attrs)
else:
r = requests.get(base_url + '/save/' + url)
s = r.status_code
new_url = base_url + r.headers['content-location'] if s == requests.codes.ok else url
return Link(v[0], (new_url, attrs[1]))
if __name__ == "__main__":
toJSONFilter(wayback)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment