Skip to content

Instantly share code, notes, and snippets.

@yalla
Created March 17, 2017 18:55
Show Gist options
  • Save yalla/0c20c6af97c9de25603519a8f6fc0750 to your computer and use it in GitHub Desktop.
Save yalla/0c20c6af97c9de25603519a8f6fc0750 to your computer and use it in GitHub Desktop.
Unshorten URLs, no matter how deep the rabbit hole goes.
import sys
import httplib
import urlparse
# maximum redirects to avoid loops; can't be arsed to build a graph
# and check for cycles.
maxredir = 10
# the orig variable is used to check if the last redirect results in the same
# URL. I've seen sites serving the actual real URL, but still give a 30x
# instead a 200.
def longurl(url, orig, count):
p = urlparse.urlparse(url)
h = httplib.HTTPConnection(p.netloc)
h.request('HEAD', p.path)
r = h.getresponse()
if (r.status == 301 or r.status == 302) and r.getheader('Location') and url != orig and count != maxredir:
count=count+1
return longurl(r.getheader('Location'), url, count)
else:
return url
print longurl(sys.argv[1], "nil", 1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment