Skip to content

Instantly share code, notes, and snippets.

@zhasm
Created May 23, 2011 07:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save zhasm/986361 to your computer and use it in GitHub Desktop.
Save zhasm/986361 to your computer and use it in GitHub Desktop.
unshorten shorten URLs
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
#author: rex
#blog: http://iregex.org
#filename unshortenurl.py
#created: 2011-05-23 15:35
import pycurl
import StringIO
import re
class UnShortenUrl():
def __init__(self, url):
self.url=url
c=pycurl.Curl()
c.setopt(c.NOBODY,True)
self.curl=c
f=StringIO.StringIO()
c.setopt(c.HEADER, True)
c.setopt(c.WRITEFUNCTION, f.write)
self.f=f
def __str__(self):
c=self.curl
c.setopt(c.URL, self.url)
f=self.f
f.truncate(0)
c.perform()
value=f.getvalue()
try:
return re.findall(r'''(?mi)(?<=^Location:\s).+$''', value)[0].strip()
except:
return ""
def UnShortenAll(text):
def _unshoren(x):
url=x.group(1)
url=UnShortenUrl(url)
return str(url)
text=re.sub(ur"""(?i)(http://(?:j\.mp|is\.gd|goo\.gl)\S+)""", _unshoren, text)
return text
def main():
import sys
try:
text=sys.argv[1]
print UnShortenAll(text)
except Exception, e:
print '''Usage: python %s <URL>''' % sys.argv[0]
print str(e)
if __name__=='__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment