Skip to content

Instantly share code, notes, and snippets.

@tmcw
Created November 5, 2017 22:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tmcw/0063bacb47ab04b93fe903dd5f58424c to your computer and use it in GitHub Desktop.
Save tmcw/0063bacb47ab04b93fe903dd5f58424c to your computer and use it in GitHub Desktop.
import re
import codecs
import requests
import glob
AMZN_RE = re.compile(u"https?://amzn.to/([0-9A-Za-z]+)")
ISBN1 = re.compile(u"https://www.amazon.com/(?:[A-Za-z\-]+)/dp/(\d{10})/")
ISBN2 = re.compile(u"https://www.amazon.com/gp/product/(\d{10})/")
def remove_amazon(filename):
print("Translating %s", filename)
f = codecs.open(filename, encoding='utf-8').read()
for cap in re.finditer(AMZN_RE, f):
url = cap.group(0)
redirected_to = requests.head(url, allow_redirects=True).url
capture = ISBN1.match(redirected_to) or ISBN2.match(redirected_to)
if capture == None:
print("Could not capture ISBN from %s", redirected_to)
continue
isbn = capture.group(1)
worldcat_permalink = requests.head("http://www.worldcat.org/isbn/%s" % isbn, allow_redirects=True).url
print(url, worldcat_permalink)
f = f.replace(
url,
worldcat_permalink
)
codecs.open(filename, 'w', encoding='utf-8').write(f)
for file in glob.glob('../tmcw.github.com/_posts/*.md'):
remove_amazon(file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment